Merge branch 'ClickHouse:master' into master

This commit is contained in:
andrc1901 2021-10-26 21:07:33 +03:00 committed by GitHub
commit 5eacbac7a0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
65 changed files with 1684 additions and 185 deletions

13
.github/workflows/cancel.yml vendored Normal file
View File

@ -0,0 +1,13 @@
name: Cancel
on: # yamllint disable-line rule:truthy
workflow_run:
workflows: ["CIGithubActions"]
types:
- requested
jobs:
cancel:
runs-on: [self-hosted, style-checker]
steps:
- uses: styfle/cancel-workflow-action@0.9.1
with:
workflow_id: ${{ github.event.workflow.id }}

View File

@ -1,4 +1,4 @@
name: Ligthweight GithubActions
name: CIGithubActions
on: # yamllint disable-line rule:truthy
pull_request:
types:
@ -11,20 +11,25 @@ on: # yamllint disable-line rule:truthy
- master
jobs:
CheckLabels:
runs-on: [self-hosted]
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
uses: actions/checkout@v2
- name: Labels check
run: cd $GITHUB_WORKSPACE/tests/ci && python3 run_check.py
run: |
cd $GITHUB_WORKSPACE/tests/ci
python3 run_check.py
DockerHubPush:
needs: CheckLabels
runs-on: [self-hosted]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
uses: actions/checkout@v2
- name: Images check
run: cd $GITHUB_WORKSPACE/tests/ci && python3 docker_images_check.py
run: |
cd $GITHUB_WORKSPACE/tests/ci
python3 docker_images_check.py
- name: Upload images files to artifacts
uses: actions/upload-artifact@v2
with:
@ -32,7 +37,7 @@ jobs:
path: ${{ runner.temp }}/docker_images_check/changed_images.json
StyleCheck:
needs: DockerHubPush
runs-on: [self-hosted]
runs-on: [self-hosted, style-checker]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
@ -42,12 +47,82 @@ jobs:
- name: Check out repository code
uses: actions/checkout@v2
- name: Style Check
run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py
run: |
cd $GITHUB_WORKSPACE/tests/ci
python3 style_check.py
BuilderDebDebug:
needs: DockerHubPush
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/build_check
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NUMBER: 7
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NUMBER
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
BuilderReport:
needs: [BuilderDebDebug]
runs-on: [self-hosted, style-checker]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Report Builder
env:
TEMP_PATH: ${{runner.temp}}/report_check
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'ClickHouse build check (actions)'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cd $GITHUB_WORKSPACE/tests/ci
python3 build_report_check.py "$CHECK_NAME"
FastTest:
needs: DockerHubPush
runs-on: [self-hosted, builder]
steps:
- name: Check out repository code
uses: actions/checkout@v2
- name: Fast Test
env:
TEMP_PATH: ${{runner.temp}}/fasttest
REPO_COPY: ${{runner.temp}}/fasttest/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 fast_test_check.py
FinishCheck:
needs: [StyleCheck, DockerHubPush, CheckLabels]
runs-on: [self-hosted]
needs: [StyleCheck, DockerHubPush, CheckLabels, BuilderReport, FastTest]
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
uses: actions/checkout@v2
- name: Finish label
run: cd $GITHUB_WORKSPACE/tests/ci && python3 finish_check.py
run: |
cd $GITHUB_WORKSPACE/tests/ci
python3 finish_check.py

View File

@ -149,8 +149,6 @@ if (ENABLE_FUZZING)
set (ENABLE_JEMALLOC 0)
set (ENABLE_CHECK_HEAVY_BUILDS 1)
set (GLIBC_COMPATIBILITY OFF)
set (ENABLE_PROTOBUF ON)
set (USE_INTERNAL_PROTOBUF_LIBRARY ON)
endif()
# Global libraries

View File

@ -17,6 +17,16 @@ endif ()
add_subdirectory("${protobuf_SOURCE_DIR}/cmake" "${protobuf_BINARY_DIR}")
if (ENABLE_FUZZING)
# `protoc` will be built with sanitizer and it could fail during ClickHouse build
# It easily reproduces in oss-fuzz building pipeline
# To avoid this we can try to build `protoc` without any sanitizer with option `-fno-sanitize=all`, but
# it this case we will face with linker errors, because libcxx still will be built with sanitizer
# So, we can simply suppress all of these failures with a combination this flag and an environment variable
# export MSAN_OPTIONS=exit_code=0
target_compile_options(protoc PRIVATE "-fsanitize-recover=all")
endif()
# We don't want to stop compilation on warnings in protobuf's headers.
# The following line overrides the value assigned by the command target_include_directories() in libprotobuf.cmake
set_property(TARGET libprotobuf PROPERTY INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${protobuf_SOURCE_DIR}/src")

View File

@ -12,19 +12,19 @@ printenv
rm -f CMakeCache.txt
read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}"
# Hope, that the most part of files will be in cache, so we just link new executables
cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA -DCMAKE_C_COMPILER="$CC" -DCMAKE_CXX_COMPILER="$CXX" -DENABLE_CLICKHOUSE_ODBC_BRIDGE=OFF \
-DENABLE_LIBRARIES=0 -DENABLE_SSL=1 -DUSE_INTERNAL_SSL_LIBRARY=1 -DUSE_UNWIND=ON -DENABLE_EMBEDDED_COMPILER=0 \
-DENABLE_EXAMPLES=0 -DENABLE_UTILS=0 -DENABLE_THINLTO=0 "-DSANITIZE=$SANITIZER" \
-DENABLE_FUZZING=1 -DFUZZER='libfuzzer' -DENABLE_TCMALLOC=0 -DENABLE_JEMALLOC=0 \
-DENABLE_CHECK_HEAVY_BUILDS=1 -DGLIBC_COMPATIBILITY=OFF "${CMAKE_FLAGS[@]}" ..
# Please, add or change flags directly in cmake
cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA -DCMAKE_C_COMPILER="$CC" -DCMAKE_CXX_COMPILER="$CXX" \
-DSANITIZE="$SANITIZER" -DENABLE_FUZZING=1 -DFUZZER='libfuzzer' -DENABLE_PROTOBUF=1 -DUSE_INTERNAL_PROTOBUF_LIBRARY=1 "${CMAKE_FLAGS[@]}" ..
FUZZER_TARGETS=$(find ../src -name '*_fuzzer.cpp' -execdir basename {} .cpp ';' | tr '\n' ' ')
NUM_JOBS=$(($(nproc || grep -c ^processor /proc/cpuinfo)))
mkdir -p /output/fuzzers
for FUZZER_TARGET in $FUZZER_TARGETS
do
# shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
ninja $NINJA_FLAGS $FUZZER_TARGET
ninja $NINJA_FLAGS $FUZZER_TARGET -j $NUM_JOBS
# Find this binary in build directory and strip it
FUZZER_PATH=$(find ./src -name "$FUZZER_TARGET")
strip --strip-unneeded "$FUZZER_PATH"

View File

@ -1 +0,0 @@
../../en/development/continuous-integration.md

View File

@ -0,0 +1,155 @@
# 持续集成检查 {#continuous-integration-checks}
当你提交一个pull请求时, ClickHouse[持续集成(CI)系统](https://clickhouse.com/docs/en/development/tests/#test-automation)会对您的代码运行一些自动检查.
这在存储库维护者(来自ClickHouse团队的人)筛选了您的代码并将可测试标签添加到您的pull请求之后发生.
检查的结果被列在[GitHub检查文档](https://docs.github.com/en/github/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/about-status-checks)中所述的GitHub pull请求页面.
如果检查失败,您可能被要求去修复它. 该界面介绍了您可能遇到的检查,以及如何修复它们.
如果检查失败看起来与您的更改无关, 那么它可能是一些暂时的故障或基础设施问题. 向pull请求推一个空的commit以重新启动CI检查:
```
git reset
git commit --allow-empty
git push
```
如果您不确定要做什么,可以向维护人员寻求帮助.
## 与Master合并 {#merge-with-master}
验证PR是否可以合并到master. 如果没有, 它将失败并显示消息'Cannot fetch mergecommit'的.请按[GitHub文档](https://docs.github.com/en/github/collaborating-with-pull-requests/addressing-merge-conflicts/resolving-a-merge-conflict-on-github)中描述的冲突解决, 或使用git将主分支合并到您的pull请求分支来修复这个检查.
## 文档检查 {#docs-check}
尝试构建ClickHouse文档网站. 如果您更改了文档中的某些内容, 它可能会失败. 最可能的原因是文档中的某些交叉链接是错误的. 转到检查报告并查找`ERROR`和`WARNING`消息.
### 报告详情 {#report-details}
- [状态页示例](https://clickhouse-test-reports.s3.yandex.net/12550/eabcc293eb02214caa6826b7c15f101643f67a6b/docs_check.html)
- `docs_output.txt`包含构建日志信息. [成功结果案例](https://clickhouse-test-reports.s3.yandex.net/12550/eabcc293eb02214caa6826b7c15f101643f67a6b/docs_check/docs_output.txt)
## 描述信息检查 {#description-check}
检查pull请求的描述是否符合[PULL_REQUEST_TEMPLATE.md](https://github.com/ClickHouse/ClickHouse/blob/master/.github/PULL_REQUEST_TEMPLATE.md)模板.
您必须为您的更改指定一个更改日志类别(例如Bug修复), 并且为[CHANGELOG.md](../whats-new/changelog/)编写一条用户可读的消息用来描述更改.
## 推送到DockerHub {#push-to-dockerhub}
生成用于构建和测试的docker映像, 然后将它们推送到DockerHub.
## 标记检查 {#marker-check}
该检查意味着CI系统已经开始处理PR.当它处于'待处理'状态时,意味着尚未开始所有检查. 启动所有检查后,状态更改为'成功'.
# 格式检查 {#style-check}
使用`utils/check-style/check-style`二进制文件执行一些简单的基于正则表达式的代码样式检查(注意, 它可以在本地运行).
如果失败, 按照[代码样式指南](./style.md)修复样式错误.
### 报告详情 {#report-details}
- [状态页示例](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check.html)
- `docs_output.txt`记录了查结果错误(无效表格等), 空白页表示没有错误. [成功结果案例](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check/output.txt)
### PVS 检查 {#pvs-check}
使用静态分析工具[PVS-studio](https://www.viva64.com/en/pvs-studio/)检查代码. 查看报告以查看确切的错误.如果可以则修复它们, 如果不行, 可以向ClickHouse的维护人员寻求帮忙.
### 报告详情 {#report-details}
- [状态页示例](https://clickhouse-test-reports.s3.yandex.net/12550/67d716b5cc3987801996c31a67b31bf141bc3486/pvs_check.html)
- `test_run.txt.out.log`包含构建和分析日志文件.它只包含解析或未找到的错误.
- `HTML report`包含分析结果.有关说明请访问PVS的[官方网站](https://www.viva64.com/en/m/0036/#ID14E9A2B2CD)
## 快速测试 {#fast-test}
通常情况下这是PR运行的第一个检查.它构建ClickHouse以及大多数无状态运行测试, 其中省略了一些.如果失败,在修复之前不会开始进一步的检查. 查看报告以了解哪些测试失败, 然后按照[此处](./tests.md#functional-test-locally)描述的在本地重现失败.
### 报告详情 {#report-details}
[状态页示例](https://clickhouse-test-reports.s3.yandex.net/12550/67d716b5cc3987801996c31a67b31bf141bc3486/fast_test.html)
#### 状态页文件 {#status-page-files}
- `runlog.out.log` 是包含所有其他日志的通用日志.
- `test_log.txt`
- `submodule_log.txt` 包含关于克隆和检查所需子模块的消息.
- `stderr.log`
- `stdout.log`
- `clickhouse-server.log`
- `clone_log.txt`
- `install_log.txt`
- `clickhouse-server.err.log`
- `build_log.txt`
- `cmake_log.txt` 包含关于C/C++和Linux标志检查的消息.
#### 状态页列信息 {#status-page-columns}
- 测试名称 -- 包含测试的名称(不带路径, 例如, 所有类型的测试将被剥离到该名称).
- 测试状态 -- 跳过、成功或失败之一.
- 测试时间, 秒. -- 这个测试是空的.
## 建构检查 {#build-check}
在各种配置中构建ClickHouse, 以便在后续步骤中使用. 您必须修复失败的构建.构建日志通常有足够的信息来修复错误, 但是您可能必须在本地重现故障. `cmake`选项可以在构建日志中通过grep `cmake`操作找到.使用这些选项并遵循[一般的构建过程](./build.md).
### 报告详情 {#report-details}
[状态页示例](https://clickhouse-builds.s3.yandex.net/12550/67d716b5cc3987801996c31a67b31bf141bc3486/clickhouse_build_check/report.html)
- **Compiler**: `gcc-9``clang-10` (或其他架构的`clang-10-xx`, 比如`clang-10-freebsd`).
- **Build type**: `Debug` or `RelWithDebInfo` (cmake).
- **Sanitizer**: `none` (without sanitizers), `address` (ASan), `memory` (MSan), `undefined` (UBSan), or `thread` (TSan).
- **Bundled**: `bundled` 构建使用来自 `contrib` 库, 而 `unbundled` 构建使用系统库.
- **Splitted**: `splitted` is a [split build](https://clickhouse.com/docs/en/development/build/#split-build)
- **Status**: `成功``失败`
- **Build log**: 链接到构建和文件复制日志, 当构建失败时很有用.
- **Build time**.
- **Artifacts**: 构建结果文件 (`XXX`是服务器版本, 比如`20.8.1.4344`).
- `clickhouse-client_XXX_all.deb`
-` clickhouse-common-static-dbg_XXX[+asan, +msan, +ubsan, +tsan]_amd64.deb`
- `clickhouse-common-staticXXX_amd64.deb`
- `clickhouse-server_XXX_all.deb`
- `clickhouse-test_XXX_all.deb`
- `clickhouse_XXX_amd64.buildinfo`
- `clickhouse_XXX_amd64.changes`
- `clickhouse`: Main built binary.
- `clickhouse-odbc-bridge`
- `unit_tests_dbms`: 带有 ClickHouse 单元测试的 GoogleTest 二进制文件.
- `shared_build.tgz`: 使用共享库构建.
- `performance.tgz`: 用于性能测试的特殊包.
## 特殊构建检查 {#special-buildcheck}
使用clang-tidy执行静态分析和代码样式检查. 该报告类似于构建检查. 修复在构建日志中发现的错误.
## 功能无状态测试 {#functional-stateless-tests}
为构建在不同配置中的ClickHouse二进制文件运行[无状态功能测试](./tests.md#functional-tests)——发布、调试、使用杀毒软件等.通过报告查看哪些测试失败,然后按照[此处](./tests.md#functional-test-locally)描述的在本地重现失败.注意, 您必须使用正确的构建配置来重现——在AddressSanitizer下测试可能失败,但在Debug中可以通过.从[CI构建检查页面](./build.md#you-dont-have-to-build-clickhouse)下载二进制文件, 或者在本地构建它.
## 功能有状态测试 {#functional-stateful-tests}
运行[有状态功能测试](./tests.md#functional-tests).以无状态功能测试相同的方式对待它们.不同之处在于它们需要从[Yandex.Metrica数据集](https://clickhouse.com/docs/en/getting-started/example-datasets/metrica/)的`hits`和`visits`表来运行.
## 集成测试 {#integration-tests}
运行[集成测试](./tests.md#integration-tests).
## Testflows 检查{#testflows-check}
使用Testflows测试系统去运行一些测试, 在[此处](https://github.com/ClickHouse/ClickHouse/tree/master/tests/testflows#running-tests-locally)查看如何在本地运行它们.
## 压力测试 {#stress-test}
从多个客户端并发运行无状态功能测试, 用以检测与并发相关的错误.如果失败:
```
* Fix all other test failures first;
* Look at the report to find the server logs and check them for possible causes
of error.
```
## 冒烟测试 {#split-build-smoke-test}
检查[拆分构建](./build.md#split-build)配置中的服务器构建是否可以启动并运行简单查询.如果失败:
```
* Fix other test errors first;
* Build the server in [split build](./build.md#split-build) configuration
locally and check whether it can start and run `select 1`.
```
## 兼容性检查 {#compatibility-check}
检查`clickhouse`二进制文件是否可以在带有旧libc版本的发行版上运行.如果失败, 请向维护人员寻求帮助.
## AST模糊器 {#ast-fuzzer}
运行随机生成的查询来捕获程序错误.如果失败, 请向维护人员寻求帮助.
## 性能测试 {#performance-tests}
测量查询性能的变化. 这是最长的检查, 只需不到 6 小时即可运行.性能测试报告在[此处](https://github.com/ClickHouse/ClickHouse/tree/master/docker/test/performance-comparison#how-to-read-the-report)有详细描述.
## 质量保证 {#qa}
什么是状态页面上的任务(专用网络)项目?
它是 Yandex 内部工作系统的链接. Yandex 员工可以看到检查的开始时间及其更详细的状态.
运行测试的地方
Yandex 内部基础设施的某个地方.

View File

@ -1 +0,0 @@
../../../en/operations/external-authenticators/kerberos.md

View File

@ -0,0 +1,105 @@
# Kerberos认证 {#external-authenticators-kerberos}
现有正确配置的 ClickHouse 用户可以通过 Kerberos 身份验证协议进行身份验证.
目前, Kerberos 只能用作现有用户的外部身份验证器,这些用户在 `users.xml` 或本地访问控制路径中定义.
这些用户只能使用 HTTP 请求, 并且必须能够使用 GSS-SPNEGO 机制进行身份验证.
对于这种方法, 必须在系统中配置 Kerberos, 且必须在 ClickHouse 配置中启用.
## 开启Kerberos {#enabling-kerberos-in-clickHouse}
要启用 Kerberos, 应该在 `config.xml` 中包含 `kerberos` 部分. 此部分可能包含其他参数.
#### 参数: {#parameters}
- `principal` - 将在接受安全上下文时获取和使用的规范服务主体名称.
- 此参数是可选的, 如果省略, 将使用默认主体.
- `realm` - 一个领域, 用于将身份验证限制为仅那些发起者领域与其匹配的请求.
- 此参数是可选的,如果省略,则不会应用其他领域的过滤.
示例 (进入 `config.xml`):
```xml
<yandex>
<!- ... -->
<kerberos />
</yandex>
```
主体规范:
```xml
<yandex>
<!- ... -->
<kerberos>
<principal>HTTP/clickhouse.example.com@EXAMPLE.COM</principal>
</kerberos>
</yandex>
```
按领域过滤:
```xml
<yandex>
<!- ... -->
<kerberos>
<realm>EXAMPLE.COM</realm>
</kerberos>
</yandex>
```
!!! warning "注意"
您只能定义一个 `kerberos` 部分. 多个 `kerberos` 部分的存在将强制 ClickHouse 禁用 Kerberos 身份验证.
!!! warning "注意"
`主体`和`领域`部分不能同时指定. `主体`和`领域`的出现将迫使ClickHouse禁用Kerberos身份验证.
## Kerberos作为现有用户的外部身份验证器 {#kerberos-as-an-external-authenticator-for-existing-users}
Kerberos可以用作验证本地定义用户(在`users.xml`或本地访问控制路径中定义的用户)身份的方法。目前,**只有**通过HTTP接口的请求才能被认证(通过GSS-SPNEGO机制).
Kerberos主体名称格式通常遵循以下模式:
- *primary/instance@REALM*
*/instance* 部分可能出现零次或多次. **发起者的规范主体名称的主要部分应与被认证用户名匹配, 以便身份验证成功**.
### `users.xml`中启用Kerberos {#enabling-kerberos-in-users-xml}
为了启用用户的 Kerberos 身份验证, 请在用户定义中指定 `kerberos` 部分而不是`密码`或类似部分.
参数:
- `realm` - 用于将身份验证限制为仅那些发起者的领域与其匹配的请求的领域.
- 此参数是可选的, 如果省略, 则不会应用其他按领域的过滤.
示例 (进入 `users.xml`):
```
<yandex>
<!- ... -->
<users>
<!- ... -->
<my_user>
<!- ... -->
<kerberos>
<realm>EXAMPLE.COM</realm>
</kerberos>
</my_user>
</users>
</yandex>
```
!!! warning "警告"
注意, Kerberos身份验证不能与任何其他身份验证机制一起使用. 任何其他部分(如`密码`和`kerberos`)的出现都会迫使ClickHouse关闭.
!!! info "提醒"
请注意, 现在, 一旦用户 `my_user` 使用 `kerberos`, 必须在主 `config.xml` 文件中启用 Kerberos如前所述.
### 使用 SQL 启用 Kerberos {#enabling-kerberos-using-sql}
在 ClickHouse 中启用 [SQL 驱动的访问控制和帐户管理](https://clickhouse.com/docs/en/operations/access-rights/#access-control)后, 也可以使用 SQL 语句创建由 Kerberos 识别的用户.
```sql
CREATE USER my_user IDENTIFIED WITH kerberos REALM 'EXAMPLE.COM'
```
...或者, 不按领域过滤:
```sql
CREATE USER my_user IDENTIFIED WITH kerberos
```

View File

@ -5,4 +5,34 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
## system.asynchronous_metric_log {#system-tables-async-log}
包含以下内容的历史值 `system.asynchronous_log` (见 [系统。asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics))
包含每分钟记录一次的 `system.asynchronous_metrics`历史值. 默认开启.
列:
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — 事件日期.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — 事件时间.
- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — 事件时间(微秒).
- `name` ([String](../../sql-reference/data-types/string.md)) — 指标名.
- `value` ([Float64](../../sql-reference/data-types/float.md)) — 指标值.
**示例**
``` sql
SELECT * FROM system.asynchronous_metric_log LIMIT 10
```
``` text
┌─event_date─┬──────────event_time─┬────event_time_microseconds─┬─name─────────────────────────────────────┬─────value─┐
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ CPUFrequencyMHz_0 │ 2120.9 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pmuzzy │ 743 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pdirty │ 26288 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.run_intervals │ 0 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.num_runs │ 0 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.retained │ 60694528 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.mapped │ 303161344 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.resident │ 260931584 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.metadata │ 12079488 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.allocated │ 133756128 │
└────────────┴─────────────────────┴────────────────────────────┴──────────────────────────────────────────┴───────────┘
```
**另请参阅**
- [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — 包含在后台定期计算的指标.
- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — 包含定期刷新到磁盘表 `system.metrics` 以及 `system.events` 中的指标值历史记录.

View File

@ -18,7 +18,9 @@
#include <Common/Macros.h>
#include <Common/Config/ConfigProcessor.h>
#include <Common/ThreadStatus.h>
#include <Common/TLDListsHolder.h>
#include <Common/quoteString.h>
#include <Common/randomSeed.h>
#include <loggers/Loggers.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadBufferFromString.h>
@ -35,7 +37,6 @@
#include <Formats/registerFormats.h>
#include <boost/program_options/options_description.hpp>
#include <base/argsToConfig.h>
#include <Common/randomSeed.h>
#include <filesystem>
namespace fs = std::filesystem;
@ -179,7 +180,6 @@ void LocalServer::initialize(Poco::Util::Application & self)
ConfigProcessor config_processor(config_path, false, true);
config_processor.setConfigPath(fs::path(config_path).parent_path());
auto loaded_config = config_processor.loadConfig();
config_processor.savePreprocessedConfig(loaded_config, loaded_config.configuration->getString("path", "."));
config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false);
}
@ -284,6 +284,11 @@ void LocalServer::tryInitPath()
global_context->setFlagsPath(path + "flags");
global_context->setUserFilesPath(""); // user's files are everywhere
/// top_level_domains_lists
const std::string & top_level_domains_path = config().getString("top_level_domains_path", path + "top_level_domains/");
if (!top_level_domains_path.empty())
TLDListsHolder::getInstance().parseConfig(fs::path(top_level_domains_path) / "", config());
}
@ -380,7 +385,6 @@ void LocalServer::setupUsers()
const auto users_config_path = config().getString("users_config", config().getString("config-file", "config.xml"));
ConfigProcessor config_processor(users_config_path);
const auto loaded_config = config_processor.loadConfig();
config_processor.savePreprocessedConfig(loaded_config, config().getString("path", DBMS_DEFAULT_PATH));
users_config = loaded_config.configuration;
}
else
@ -673,6 +677,7 @@ void LocalServer::addOptions(OptionsDescription & options_description)
("no-system-tables", "do not attach system tables (better startup time)")
("path", po::value<std::string>(), "Storage path")
("top_level_domains_path", po::value<std::string>(), "Path to lists with custom TLDs")
;
}

View File

@ -960,9 +960,14 @@ if (ThreadFuzzer::instance().isEffective())
global_context->setMMappedFileCache(mmap_cache_size);
#if USE_EMBEDDED_COMPILER
/// 128 MB
constexpr size_t compiled_expression_cache_size_default = 1024 * 1024 * 128;
size_t compiled_expression_cache_size = config().getUInt64("compiled_expression_cache_size", compiled_expression_cache_size_default);
CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size);
constexpr size_t compiled_expression_cache_elements_size_default = 10000;
size_t compiled_expression_cache_elements_size = config().getUInt64("compiled_expression_cache_elements_size", compiled_expression_cache_elements_size_default);
CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size, compiled_expression_cache_elements_size);
#endif
/// Set path for format schema files

View File

@ -351,9 +351,12 @@
-->
<mmap_cache_size>1000</mmap_cache_size>
<!-- Cache size for compiled expressions.-->
<!-- Cache size in bytes for compiled expressions.-->
<compiled_expression_cache_size>134217728</compiled_expression_cache_size>
<!-- Cache size in elements for compiled expressions.-->
<compiled_expression_cache_elements_size>10000</compiled_expression_cache_elements_size>
<!-- Path to data directory, with trailing slash. -->
<path>/var/lib/clickhouse/</path>

View File

@ -279,9 +279,12 @@ mark_cache_size: 5368709120
# also it can be dropped manually by the SYSTEM DROP MMAP CACHE query.
mmap_cache_size: 1000
# Cache size for compiled expressions.
# Cache size in bytes for compiled expressions.
compiled_expression_cache_size: 134217728
# Cache size in elements for compiled expressions.
compiled_expression_cache_elements_size: 10000
# Path to data directory, with trailing slash.
path: /var/lib/clickhouse/

View File

@ -36,12 +36,13 @@ public:
using Mapped = TMapped;
using MappedPtr = std::shared_ptr<Mapped>;
private:
using Clock = std::chrono::steady_clock;
public:
LRUCache(size_t max_size_)
: max_size(std::max(static_cast<size_t>(1), max_size_)) {}
/** Initialize LRUCache with max_size and max_elements_size.
* max_elements_size == 0 means no elements size restrictions.
*/
LRUCache(size_t max_size_, size_t max_elements_size_ = 0)
: max_size(std::max(static_cast<size_t>(1), max_size_))
, max_elements_size(max_elements_size_)
{}
MappedPtr get(const Key & key)
{
@ -252,6 +253,7 @@ private:
/// Total weight of values.
size_t current_size = 0;
const size_t max_size;
const size_t max_elements_size;
std::atomic<size_t> hits {0};
std::atomic<size_t> misses {0};
@ -311,7 +313,8 @@ private:
{
size_t current_weight_lost = 0;
size_t queue_size = cells.size();
while ((current_size > max_size) && (queue_size > 1))
while ((current_size > max_size || (max_elements_size != 0 && queue_size > max_elements_size)) && (queue_size > 1))
{
const Key & key = queue.front();

View File

@ -227,7 +227,8 @@
M(CreatedHTTPConnections, "Total amount of created HTTP connections (counter increase every time connection is created).") \
\
M(CannotWriteToWriteBufferDiscard, "Number of stack traces dropped by query profiler or signal handler because pipe is full or cannot write to pipe.") \
M(QueryProfilerSignalOverruns, "Number of times we drop processing of a signal due to overrun plus the number of signals that OS has not delivered due to overrun.") \
M(QueryProfilerSignalOverruns, "Number of times we drop processing of a query profiler signal due to overrun plus the number of signals that OS has not delivered due to overrun.") \
M(QueryProfilerRuns, "Number of times QueryProfiler had been run.") \
\
M(CreatedLogEntryForMerge, "Successfully created log entry to merge parts in ReplicatedMergeTree.") \
M(NotCreatedLogEntryForMerge, "Log entry to merge parts in ReplicatedMergeTree is not created due to concurrent log update by another replica.") \

View File

@ -14,17 +14,19 @@
namespace
{
constexpr UInt64 ZERO = 0;
constexpr UInt64 ALL_THREADS = 0;
UInt64 calculateNewCoresNumber(DB::ThreadIdToTimeMap const & prev, DB::ThreadIdToTimeMap const& next)
{
if (next.find(ZERO) == next.end())
return ZERO;
auto accumulated = std::accumulate(next.cbegin(), next.cend(), ZERO,
[&prev](UInt64 acc, auto const & elem)
if (next.find(ALL_THREADS) == next.end())
return 0;
auto accumulated = std::accumulate(next.cbegin(), next.cend(), 0,
[&prev](UInt64 acc, const auto & elem)
{
if (elem.first == ZERO)
if (elem.first == ALL_THREADS)
return acc;
auto thread_time = elem.second.time();
auto it = prev.find(elem.first);
if (it != prev.end())
@ -32,9 +34,9 @@ namespace
return acc + thread_time;
});
auto elapsed = next.at(ZERO).time() - (prev.contains(ZERO) ? prev.at(ZERO).time() : ZERO);
if (elapsed == ZERO)
return ZERO;
auto elapsed = next.at(ALL_THREADS).time() - (prev.contains(ALL_THREADS) ? prev.at(ALL_THREADS).time() : 0);
if (elapsed == 0)
return 0;
return (accumulated + elapsed - 1) / elapsed;
}
}
@ -109,7 +111,7 @@ size_t ProgressIndication::getUsedThreadsCount() const
UInt64 ProgressIndication::getApproximateCoresNumber() const
{
return std::accumulate(host_active_cores.cbegin(), host_active_cores.cend(), ZERO,
return std::accumulate(host_active_cores.cbegin(), host_active_cores.cend(), 0,
[](UInt64 acc, auto const & elem)
{
return acc + elem.second;
@ -121,11 +123,12 @@ ProgressIndication::MemoryUsage ProgressIndication::getMemoryUsage() const
return std::accumulate(thread_data.cbegin(), thread_data.cend(), MemoryUsage{},
[](MemoryUsage const & acc, auto const & host_data)
{
auto host_usage = std::accumulate(host_data.second.cbegin(), host_data.second.cend(), ZERO,
[](UInt64 memory, auto const & data)
{
return memory + data.second.memory_usage;
});
UInt64 host_usage = 0;
// In ProfileEvents packets thread id 0 specifies common profiling information
// for all threads executing current query on specific host. So instead of summing per thread
// memory consumption it's enough to look for data with thread id 0.
if (auto it = host_data.second.find(ALL_THREADS); it != host_data.second.end())
host_usage = it->second.memory_usage;
return MemoryUsage{.total = acc.total + host_usage, .max = std::max(acc.max, host_usage)};
});
}

View File

@ -15,6 +15,7 @@
namespace ProfileEvents
{
extern const Event QueryProfilerSignalOverruns;
extern const Event QueryProfilerRuns;
}
namespace DB
@ -60,6 +61,7 @@ namespace
const StackTrace stack_trace(signal_context);
TraceCollector::collect(trace_type, stack_trace, 0);
ProfileEvents::increment(ProfileEvents::QueryProfilerRuns);
errno = saved_errno;
}

View File

@ -12,7 +12,9 @@
/// Minimum revision with exactly the same set of aggregation methods and rules to select them.
/// Two-level (bucketed) aggregation is incompatible if servers are inconsistent in these rules
/// (keys will be placed in different buckets and result will not be fully aggregated).
#define DBMS_MIN_REVISION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD 54431
#define DBMS_MIN_REVISION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD 54456
#define DBMS_MIN_MAJOR_VERSION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD 21
#define DBMS_MIN_MINOR_VERSION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD 4
#define DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA 54410
#define DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE 54405

View File

@ -43,13 +43,13 @@ private:
class FunctionBaseNow : public IFunctionBase
{
public:
explicit FunctionBaseNow(time_t time_, DataTypePtr return_type_) : time_value(time_), return_type(return_type_) {}
explicit FunctionBaseNow(time_t time_, DataTypes argument_types_, DataTypePtr return_type_)
: time_value(time_), argument_types(std::move(argument_types_)), return_type(std::move(return_type_)) {}
String getName() const override { return "now"; }
const DataTypes & getArgumentTypes() const override
{
static const DataTypes argument_types;
return argument_types;
}
@ -69,6 +69,7 @@ public:
private:
time_t time_value;
DataTypes argument_types;
DataTypePtr return_type;
};
@ -117,8 +118,10 @@ public:
}
if (arguments.size() == 1)
return std::make_unique<FunctionBaseNow>(
time(nullptr), std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 0, 0)));
return std::make_unique<FunctionBaseNow>(time(nullptr), std::make_shared<DataTypeDateTime>());
time(nullptr), DataTypes{arguments.front().type},
std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 0, 0)));
return std::make_unique<FunctionBaseNow>(time(nullptr), DataTypes(), std::make_shared<DataTypeDateTime>());
}
};

View File

@ -67,13 +67,13 @@ private:
class FunctionBaseNow64 : public IFunctionBase
{
public:
explicit FunctionBaseNow64(Field time_, DataTypePtr return_type_) : time_value(time_), return_type(return_type_) {}
explicit FunctionBaseNow64(Field time_, DataTypes argument_types_, DataTypePtr return_type_)
: time_value(time_), argument_types(std::move(argument_types_)), return_type(std::move(return_type_)) {}
String getName() const override { return "now64"; }
const DataTypes & getArgumentTypes() const override
{
static const DataTypes argument_types;
return argument_types;
}
@ -93,6 +93,7 @@ public:
private:
Field time_value;
DataTypes argument_types;
DataTypePtr return_type;
};
@ -139,14 +140,19 @@ public:
return std::make_shared<DataTypeDateTime64>(scale, timezone_name);
}
FunctionBasePtr buildImpl(const ColumnsWithTypeAndName &, const DataTypePtr & result_type) const override
FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const override
{
UInt32 scale = DataTypeDateTime64::default_scale;
auto res_type = removeNullable(result_type);
if (const auto * type = typeid_cast<const DataTypeDateTime64 *>(res_type.get()))
scale = type->getScale();
return std::make_unique<FunctionBaseNow64>(nowSubsecond(scale), result_type);
DataTypes arg_types;
arg_types.reserve(arguments.size());
for (const auto & arg : arguments)
arg_types.push_back(arg.type);
return std::make_unique<FunctionBaseNow64>(nowSubsecond(scale), std::move(arg_types), std::move(result_type));
}
};

View File

@ -407,14 +407,20 @@ try
}
StreamingFormatExecutor executor(header, format, std::move(on_error), std::move(adding_defaults_transform));
std::unique_ptr<ReadBuffer> buffer;
std::unique_ptr<ReadBuffer> last_buffer;
for (const auto & entry : data->entries)
{
buffer = std::make_unique<ReadBufferFromString>(entry->bytes);
auto buffer = std::make_unique<ReadBufferFromString>(entry->bytes);
current_entry = entry;
total_rows += executor.execute(*buffer);
/// Keep buffer, because it still can be used
/// in destructor, while resetting buffer at next iteration.
last_buffer = std::move(buffer);
}
format->addBuffer(std::move(last_buffer));
auto chunk = Chunk(executor.getResultColumns(), total_rows);
size_t total_bytes = chunk.bytes();

View File

@ -84,6 +84,14 @@ public:
UInt64 client_version_patch = 0;
unsigned client_tcp_protocol_version = 0;
/// In case of distributed query, client info for query is actually a client info of client.
/// In order to get a version of server-initiator, use connection_ values.
/// Also for tcp only.
UInt64 connection_client_version_major = 0;
UInt64 connection_client_version_minor = 0;
UInt64 connection_client_version_patch = 0;
unsigned connection_tcp_protocol_version = 0;
/// For http
HTTPMethod http_method = HTTPMethod::UNKNOWN;
String http_user_agent;

View File

@ -69,6 +69,7 @@
#include <Functions/IFunction.h>
#include <Core/Field.h>
#include <Core/ProtocolDefines.h>
#include <base/types.h>
#include <Columns/Collator.h>
#include <Common/FieldVisitorsAccurateComparison.h>
@ -2556,6 +2557,19 @@ void InterpreterSelectQuery::initSettings()
auto & query = getSelectQuery();
if (query.settings())
InterpreterSetQuery(query.settings(), context).executeForCurrentContext();
auto & client_info = context->getClientInfo();
auto min_major = DBMS_MIN_MAJOR_VERSION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD;
auto min_minor = DBMS_MIN_MINOR_VERSION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD;
if (client_info.query_kind == ClientInfo::QueryKind::SECONDARY_QUERY &&
std::forward_as_tuple(client_info.connection_client_version_major, client_info.connection_client_version_minor) < std::forward_as_tuple(min_major, min_minor))
{
/// Disable two-level aggregation due to version incompatibility.
context->setSetting("group_by_two_level_threshold", Field(0));
context->setSetting("group_by_two_level_threshold_bytes", Field(0));
}
}
}

View File

@ -16,12 +16,12 @@ CompiledExpressionCacheFactory & CompiledExpressionCacheFactory::instance()
return factory;
}
void CompiledExpressionCacheFactory::init(size_t cache_size)
void CompiledExpressionCacheFactory::init(size_t cache_size_in_bytes, size_t cache_size_in_elements)
{
if (cache)
throw Exception(ErrorCodes::LOGICAL_ERROR, "CompiledExpressionCache was already initialized");
cache = std::make_unique<CompiledExpressionCache>(cache_size);
cache = std::make_unique<CompiledExpressionCache>(cache_size_in_bytes, cache_size_in_elements);
}
CompiledExpressionCache * CompiledExpressionCacheFactory::tryGetCache()

View File

@ -52,7 +52,7 @@ private:
public:
static CompiledExpressionCacheFactory & instance();
void init(size_t cache_size);
void init(size_t cache_size_in_bytes, size_t cache_size_in_elements);
CompiledExpressionCache * tryGetCache();
};

View File

@ -7,4 +7,3 @@ target_link_libraries(execute_query_fuzzer PRIVATE
clickhouse_dictionaries
clickhouse_dictionaries_embedded
${LIB_FUZZING_ENGINE})

View File

@ -131,18 +131,22 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
}
Pos before_values = pos;
String format_str;
/// VALUES or FROM INFILE or FORMAT or SELECT
if (!infile && s_values.ignore(pos, expected))
{
/// If VALUES is defined in query, everything except setting will be parsed as data
data = pos->begin;
format_str = "Values";
}
else if (s_format.ignore(pos, expected))
{
/// If FORMAT is defined, read format name
if (!name_p.parse(pos, format, expected))
return false;
tryGetIdentifierNameInto(format, format_str);
}
else if (s_select.ignore(pos, expected) || s_with.ignore(pos,expected))
{
@ -155,6 +159,8 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
/// FORMAT section is expected if we have input() in SELECT part
if (s_format.ignore(pos, expected) && !name_p.parse(pos, format, expected))
return false;
tryGetIdentifierNameInto(format, format_str);
}
else if (s_watch.ignore(pos, expected))
{
@ -242,9 +248,8 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
tryGetIdentifierNameInto(table, query->table_id.table_name);
}
tryGetIdentifierNameInto(format, query->format);
query->columns = columns;
query->format = std::move(format_str);
query->select = select;
query->watch = watch;
query->settings_ast = settings_ast;

View File

@ -45,4 +45,4 @@ set_source_files_properties("${PROTO_SRCS}" "out.cpp" PROPERTIES COMPILE_FLAGS "
target_include_directories(codegen_select_fuzzer BEFORE PRIVATE "${Protobuf_INCLUDE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}")
target_include_directories(codegen_select_fuzzer BEFORE PRIVATE "${LibProtobufMutator_SOURCE_DIR}")
target_include_directories(codegen_select_fuzzer BEFORE PRIVATE "${LibProtobufMutator_SOURCE_DIR}/src")
target_link_libraries(codegen_select_fuzzer PRIVATE protobuf-mutator dbms ${LIB_FUZZING_ENGINE})
target_link_libraries(codegen_select_fuzzer PRIVATE protobuf-mutator ${Protobuf_LIBRARY} ${Protobuf_PROTOC_LIBRARY} dbms ${LIB_FUZZING_ENGINE})

View File

@ -55,6 +55,8 @@ public:
*/
virtual void resetParser();
virtual void setReadBuffer(ReadBuffer & in_);
virtual const BlockMissingValues & getMissingValues() const
{
static const BlockMissingValues none;
@ -70,7 +72,6 @@ public:
void setCurrentUnitNumber(size_t current_unit_number_) { current_unit_number = current_unit_number_; }
void addBuffer(std::unique_ptr<ReadBuffer> buffer) { owned_buffers.emplace_back(std::move(buffer)); }
void setReadBuffer(ReadBuffer & in_);
protected:
ColumnMappingPtr column_mapping{};

View File

@ -32,13 +32,25 @@ namespace ErrorCodes
}
ValuesBlockInputFormat::ValuesBlockInputFormat(ReadBuffer & in_, const Block & header_, const RowInputFormatParams & params_,
const FormatSettings & format_settings_)
: IInputFormat(header_, buf), buf(in_), params(params_),
format_settings(format_settings_), num_columns(header_.columns()),
parser_type_for_column(num_columns, ParserType::Streaming),
attempts_to_deduce_template(num_columns), attempts_to_deduce_template_cached(num_columns),
rows_parsed_using_template(num_columns), templates(num_columns), types(header_.getDataTypes())
ValuesBlockInputFormat::ValuesBlockInputFormat(
ReadBuffer & in_,
const Block & header_,
const RowInputFormatParams & params_,
const FormatSettings & format_settings_)
: ValuesBlockInputFormat(std::make_unique<PeekableReadBuffer>(in_), header_, params_, format_settings_)
{
}
ValuesBlockInputFormat::ValuesBlockInputFormat(
std::unique_ptr<PeekableReadBuffer> buf_,
const Block & header_,
const RowInputFormatParams & params_,
const FormatSettings & format_settings_)
: IInputFormat(header_, *buf_), buf(std::move(buf_)),
params(params_), format_settings(format_settings_), num_columns(header_.columns()),
parser_type_for_column(num_columns, ParserType::Streaming),
attempts_to_deduce_template(num_columns), attempts_to_deduce_template_cached(num_columns),
rows_parsed_using_template(num_columns), templates(num_columns), types(header_.getDataTypes())
{
serializations.resize(types.size());
for (size_t i = 0; i < types.size(); ++i)
@ -58,8 +70,8 @@ Chunk ValuesBlockInputFormat::generate()
{
try
{
skipWhitespaceIfAny(buf);
if (buf.eof() || *buf.position() == ';')
skipWhitespaceIfAny(*buf);
if (buf->eof() || *buf->position() == ';')
break;
readRow(columns, rows_in_block);
}
@ -99,12 +111,12 @@ Chunk ValuesBlockInputFormat::generate()
void ValuesBlockInputFormat::readRow(MutableColumns & columns, size_t row_num)
{
assertChar('(', buf);
assertChar('(', *buf);
for (size_t column_idx = 0; column_idx < num_columns; ++column_idx)
{
skipWhitespaceIfAny(buf);
PeekableReadBufferCheckpoint checkpoint{buf};
skipWhitespaceIfAny(*buf);
PeekableReadBufferCheckpoint checkpoint{*buf};
bool read;
/// Parse value using fast streaming parser for literals and slow SQL parser for expressions.
@ -123,9 +135,9 @@ void ValuesBlockInputFormat::readRow(MutableColumns & columns, size_t row_num)
/// If read is true, value still may be missing. Bit mask for these values will be copied from ConstantExpressionTemplate later.
}
skipWhitespaceIfAny(buf);
if (!buf.eof() && *buf.position() == ',')
++buf.position();
skipWhitespaceIfAny(*buf);
if (!buf->eof() && *buf->position() == ',')
++buf->position();
++total_rows;
}
@ -134,7 +146,7 @@ bool ValuesBlockInputFormat::tryParseExpressionUsingTemplate(MutableColumnPtr &
{
/// Try to parse expression using template if one was successfully deduced while parsing the first row
auto settings = context->getSettingsRef();
if (templates[column_idx]->parseExpression(buf, format_settings, settings))
if (templates[column_idx]->parseExpression(*buf, format_settings, settings))
{
++rows_parsed_using_template[column_idx];
return true;
@ -154,7 +166,7 @@ bool ValuesBlockInputFormat::tryParseExpressionUsingTemplate(MutableColumnPtr &
}
/// Do not use this template anymore
templates[column_idx].reset();
buf.rollbackToCheckpoint();
buf->rollbackToCheckpoint();
/// It will deduce new template or fallback to slow SQL parser
return parseExpression(*column, column_idx);
@ -169,13 +181,13 @@ bool ValuesBlockInputFormat::tryReadValue(IColumn & column, size_t column_idx)
const auto & type = types[column_idx];
const auto & serialization = serializations[column_idx];
if (format_settings.null_as_default && !type->isNullable())
read = SerializationNullable::deserializeTextQuotedImpl(column, buf, format_settings, serialization);
read = SerializationNullable::deserializeTextQuotedImpl(column, *buf, format_settings, serialization);
else
serialization->deserializeTextQuoted(column, buf, format_settings);
serialization->deserializeTextQuoted(column, *buf, format_settings);
rollback_on_exception = true;
skipWhitespaceIfAny(buf);
skipWhitespaceIfAny(*buf);
assertDelimiterAfterValue(column_idx);
return read;
}
@ -190,7 +202,7 @@ bool ValuesBlockInputFormat::tryReadValue(IColumn & column, size_t column_idx)
/// Switch to SQL parser and don't try to use streaming parser for complex expressions
/// Note: Throwing exceptions for each expression may be very slow because of stacktraces
buf.rollbackToCheckpoint();
buf->rollbackToCheckpoint();
return parseExpression(column, column_idx);
}
}
@ -284,11 +296,11 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx
/// We need continuous memory containing the expression to use Lexer
skipToNextRow(0, 1);
buf.makeContinuousMemoryFromCheckpointToPos();
buf.rollbackToCheckpoint();
buf->makeContinuousMemoryFromCheckpointToPos();
buf->rollbackToCheckpoint();
Expected expected;
Tokens tokens(buf.position(), buf.buffer().end());
Tokens tokens(buf->position(), buf->buffer().end());
IParser::Pos token_iterator(tokens, settings.max_parser_depth);
ASTPtr ast;
@ -302,7 +314,7 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx
if (!parsed)
throw Exception("Cannot parse expression of type " + type.getName() + " here: "
+ String(buf.position(), std::min(SHOW_CHARS_ON_SYNTAX_ERROR, buf.buffer().end() - buf.position())),
+ String(buf->position(), std::min(SHOW_CHARS_ON_SYNTAX_ERROR, buf->buffer().end() - buf->position())),
ErrorCodes::SYNTAX_ERROR);
++token_iterator;
@ -316,9 +328,9 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx
try
{
const auto & serialization = serializations[column_idx];
serialization->deserializeTextQuoted(column, buf, format_settings);
serialization->deserializeTextQuoted(column, *buf, format_settings);
rollback_on_exception = true;
skipWhitespaceIfAny(buf);
skipWhitespaceIfAny(*buf);
if (checkDelimiterAfterValue(column_idx))
ok = true;
}
@ -366,8 +378,8 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx
else
++attempts_to_deduce_template[column_idx];
buf.rollbackToCheckpoint();
if (templates[column_idx]->parseExpression(buf, format_settings, settings))
buf->rollbackToCheckpoint();
if (templates[column_idx]->parseExpression(*buf, format_settings, settings))
{
++rows_parsed_using_template[column_idx];
parser_type_for_column[column_idx] = ParserType::BatchTemplate;
@ -384,9 +396,9 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx
std::rethrow_exception(exception);
else
{
buf.rollbackToCheckpoint();
size_t len = const_cast<char *>(token_iterator->begin) - buf.position();
throw Exception("Cannot deduce template of expression: " + std::string(buf.position(), len), ErrorCodes::SYNTAX_ERROR);
buf->rollbackToCheckpoint();
size_t len = const_cast<char *>(token_iterator->begin) - buf->position();
throw Exception("Cannot deduce template of expression: " + std::string(buf->position(), len), ErrorCodes::SYNTAX_ERROR);
}
}
/// Continue parsing without template
@ -397,7 +409,7 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx
throw Exception("Interpreting expressions is disabled", ErrorCodes::SUPPORT_IS_DISABLED);
/// Try to evaluate single expression if other parsers don't work
buf.position() = const_cast<char *>(token_iterator->begin);
buf->position() = const_cast<char *>(token_iterator->begin);
std::pair<Field, DataTypePtr> value_raw = evaluateConstantExpression(ast, context);
@ -416,10 +428,10 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx
type.insertDefaultInto(column);
return false;
}
buf.rollbackToCheckpoint();
buf->rollbackToCheckpoint();
throw Exception{"Cannot insert NULL value into a column of type '" + type.getName() + "'"
+ " at: " +
String(buf.position(), std::min(SHOW_CHARS_ON_SYNTAX_ERROR, buf.buffer().end() - buf.position())),
String(buf->position(), std::min(SHOW_CHARS_ON_SYNTAX_ERROR, buf->buffer().end() - buf->position())),
ErrorCodes::TYPE_MISMATCH};
}
@ -430,61 +442,61 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx
/// Can be used in fileSegmentationEngine for parallel parsing of Values
bool ValuesBlockInputFormat::skipToNextRow(size_t min_chunk_bytes, int balance)
{
skipWhitespaceIfAny(buf);
if (buf.eof() || *buf.position() == ';')
skipWhitespaceIfAny(*buf);
if (buf->eof() || *buf->position() == ';')
return false;
bool quoted = false;
size_t chunk_begin_buf_count = buf.count();
while (!buf.eof() && (balance || buf.count() - chunk_begin_buf_count < min_chunk_bytes))
size_t chunk_begin_buf_count = buf->count();
while (!buf->eof() && (balance || buf->count() - chunk_begin_buf_count < min_chunk_bytes))
{
buf.position() = find_first_symbols<'\\', '\'', ')', '('>(buf.position(), buf.buffer().end());
if (buf.position() == buf.buffer().end())
buf->position() = find_first_symbols<'\\', '\'', ')', '('>(buf->position(), buf->buffer().end());
if (buf->position() == buf->buffer().end())
continue;
if (*buf.position() == '\\')
if (*buf->position() == '\\')
{
++buf.position();
if (!buf.eof())
++buf.position();
++buf->position();
if (!buf->eof())
++buf->position();
}
else if (*buf.position() == '\'')
else if (*buf->position() == '\'')
{
quoted ^= true;
++buf.position();
++buf->position();
}
else if (*buf.position() == ')')
else if (*buf->position() == ')')
{
++buf.position();
++buf->position();
if (!quoted)
--balance;
}
else if (*buf.position() == '(')
else if (*buf->position() == '(')
{
++buf.position();
++buf->position();
if (!quoted)
++balance;
}
}
if (!buf.eof() && *buf.position() == ',')
++buf.position();
if (!buf->eof() && *buf->position() == ',')
++buf->position();
return true;
}
void ValuesBlockInputFormat::assertDelimiterAfterValue(size_t column_idx)
{
if (unlikely(!checkDelimiterAfterValue(column_idx)))
throwAtAssertionFailed((column_idx + 1 == num_columns) ? ")" : ",", buf);
throwAtAssertionFailed((column_idx + 1 == num_columns) ? ")" : ",", *buf);
}
bool ValuesBlockInputFormat::checkDelimiterAfterValue(size_t column_idx)
{
skipWhitespaceIfAny(buf);
skipWhitespaceIfAny(*buf);
if (likely(column_idx + 1 != num_columns))
return checkChar(',', buf);
return checkChar(',', *buf);
else
return checkChar(')', buf);
return checkChar(')', *buf);
}
bool ValuesBlockInputFormat::shouldDeduceNewTemplate(size_t column_idx)
@ -516,21 +528,21 @@ bool ValuesBlockInputFormat::shouldDeduceNewTemplate(size_t column_idx)
void ValuesBlockInputFormat::readPrefix()
{
/// In this format, BOM at beginning of stream cannot be confused with value, so it is safe to skip it.
skipBOMIfExists(buf);
skipBOMIfExists(*buf);
}
void ValuesBlockInputFormat::readSuffix()
{
if (!buf.eof() && *buf.position() == ';')
if (!buf->eof() && *buf->position() == ';')
{
++buf.position();
skipWhitespaceIfAny(buf);
if (buf.hasUnreadData())
++buf->position();
skipWhitespaceIfAny(*buf);
if (buf->hasUnreadData())
throw Exception("Cannot read data after semicolon", ErrorCodes::CANNOT_READ_ALL_DATA);
return;
}
if (buf.hasUnreadData())
if (buf->hasUnreadData())
throw Exception("Unread data in PeekableReadBuffer will be lost. Most likely it's a bug.", ErrorCodes::LOGICAL_ERROR);
}
@ -539,10 +551,16 @@ void ValuesBlockInputFormat::resetParser()
IInputFormat::resetParser();
// I'm not resetting parser modes here.
// There is a good chance that all messages have the same format.
buf.reset();
buf->reset();
total_rows = 0;
}
void ValuesBlockInputFormat::setReadBuffer(ReadBuffer & in_)
{
buf = std::make_unique<PeekableReadBuffer>(in_);
IInputFormat::setReadBuffer(*buf);
}
void registerInputFormatValues(FormatFactory & factory)
{
factory.registerInputFormat("Values", [](

View File

@ -32,6 +32,7 @@ public:
String getName() const override { return "ValuesBlockInputFormat"; }
void resetParser() override;
void setReadBuffer(ReadBuffer & in_) override;
/// TODO: remove context somehow.
void setContext(ContextPtr context_) { context = Context::createCopy(context_); }
@ -39,6 +40,9 @@ public:
const BlockMissingValues & getMissingValues() const override { return block_missing_values; }
private:
ValuesBlockInputFormat(std::unique_ptr<PeekableReadBuffer> buf_, const Block & header_, const RowInputFormatParams & params_,
const FormatSettings & format_settings_);
enum class ParserType
{
Streaming,
@ -66,7 +70,7 @@ private:
bool skipToNextRow(size_t min_chunk_bytes = 0, int balance = 0);
PeekableReadBuffer buf;
std::unique_ptr<PeekableReadBuffer> buf;
const RowInputFormatParams params;

View File

@ -42,12 +42,12 @@ InputFormatPtr getInputFormatFromASTInsertQuery(
if (ast_insert_query->infile && context->getApplicationType() == Context::ApplicationType::SERVER)
throw Exception("Query has infile and was send directly to server", ErrorCodes::UNKNOWN_TYPE_OF_QUERY);
String format = ast_insert_query->format;
if (format.empty())
if (ast_insert_query->format.empty())
{
if (input_function)
throw Exception("FORMAT must be specified for function input()", ErrorCodes::INVALID_USAGE_OF_INPUT);
format = "Values";
else
throw Exception("Logical error: INSERT query requires format to be set", ErrorCodes::LOGICAL_ERROR);
}
/// Data could be in parsed (ast_insert_query.data) and in not parsed yet (input_buffer_tail_part) part of query.
@ -59,7 +59,7 @@ InputFormatPtr getInputFormatFromASTInsertQuery(
: std::make_unique<EmptyReadBuffer>();
/// Create a source from input buffer using format from query
auto source = context->getInputFormat(format, *input_buffer, header, context->getSettings().max_insert_block_size);
auto source = context->getInputFormat(ast_insert_query->format, *input_buffer, header, context->getSettings().max_insert_block_size);
source->addBuffer(std::move(input_buffer));
return source;
}

View File

@ -1126,6 +1126,11 @@ void TCPHandler::receiveHello()
client_info.client_version_patch = client_version_patch;
client_info.client_tcp_protocol_version = client_tcp_protocol_version;
client_info.connection_client_version_major = client_version_major;
client_info.connection_client_version_minor = client_version_minor;
client_info.connection_client_version_patch = client_version_patch;
client_info.connection_tcp_protocol_version = client_tcp_protocol_version;
is_interserver_mode = (user == USER_INTERSERVER_MARKER);
if (is_interserver_mode)
{

View File

@ -1044,10 +1044,15 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r
/// Filter in WHERE instead
else
{
auto type = getSampleBlock().getByName(prewhere_info->prewhere_column_name).type;
ColumnWithTypeAndName col(result.getFilterHolder()->convertToFullColumnIfConst(), std::make_shared<DataTypeUInt8>(), "");
result.columns[prewhere_column_pos] = castColumn(col, type);
result.clearFilter(); // Acting as a flag to not filter in PREWHERE
if (prewhere_info->remove_prewhere_column)
result.columns.erase(result.columns.begin() + prewhere_column_pos);
else
{
auto type = getSampleBlock().getByName(prewhere_info->prewhere_column_name).type;
ColumnWithTypeAndName col(result.getFilterHolder()->convertToFullColumnIfConst(), std::make_shared<DataTypeUInt8>(), "");
result.columns[prewhere_column_pos] = castColumn(col, type);
result.clearFilter(); // Acting as a flag to not filter in PREWHERE
}
}
}

View File

@ -446,7 +446,8 @@ static void appendBlock(const Block & from, Block & to)
if (!to)
throw Exception("Cannot append to empty block", ErrorCodes::LOGICAL_ERROR);
assertBlocksHaveEqualStructure(from, to, "Buffer");
if (to.rows())
assertBlocksHaveEqualStructure(from, to, "Buffer");
from.checkNumberOfRows();
to.checkNumberOfRows();
@ -464,14 +465,21 @@ static void appendBlock(const Block & from, Block & to)
{
MemoryTracker::BlockerInThread temporarily_disable_memory_tracker;
for (size_t column_no = 0, columns = to.columns(); column_no < columns; ++column_no)
if (to.rows() == 0)
{
const IColumn & col_from = *from.getByPosition(column_no).column.get();
last_col = IColumn::mutate(std::move(to.getByPosition(column_no).column));
to = from;
}
else
{
for (size_t column_no = 0, columns = to.columns(); column_no < columns; ++column_no)
{
const IColumn & col_from = *from.getByPosition(column_no).column.get();
last_col = IColumn::mutate(std::move(to.getByPosition(column_no).column));
last_col->insertRangeFrom(col_from, 0, rows);
last_col->insertRangeFrom(col_from, 0, rows);
to.getByPosition(column_no).column = std::move(last_col);
to.getByPosition(column_no).column = std::move(last_col);
}
}
}
catch (...)

209
tests/ci/build_check.py Normal file
View File

@ -0,0 +1,209 @@
#!/usr/bin/env python3
#
import subprocess
import logging
from s3_helper import S3Helper
import json
import os
from pr_info import PRInfo
from github import Github
import shutil
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
import os
import sys
import time
from version_helper import get_version_from_repo, update_version_local
def get_build_config(build_check_name, build_number, repo_path):
if build_check_name == 'ClickHouse build check (actions)':
build_config_name = 'build_config'
elif build_check_name == 'ClickHouse special build check (actions)':
build_config_name = 'special_build_config'
else:
raise Exception(f"Unknown build check name {build_check_name}")
ci_config_path = os.path.join(repo_path, "tests/ci/ci_config.json")
with open(ci_config_path, 'r') as ci_config:
config_dict = json.load(ci_config)
return config_dict[build_config_name][build_number]
def _can_export_binaries(build_config):
if build_config['package-type'] != 'deb':
return False
if build_config['bundled'] != "bundled":
return False
if build_config['splitted'] == 'splitted':
return False
if build_config['sanitizer'] != '':
return True
if build_config['build-type'] != '':
return True
return False
def get_packager_cmd(build_config, packager_path, output_path, build_version, image_version, ccache_path):
package_type = build_config['package-type']
comp = build_config['compiler']
cmd = f"cd {packager_path} && ./packager --output-dir={output_path} --package-type={package_type} --compiler={comp}"
if build_config['build-type']:
cmd += ' --build-type={}'.format(build_config['build-type'])
if build_config['sanitizer']:
cmd += ' --sanitizer={}'.format(build_config['sanitizer'])
if build_config['bundled'] == 'unbundled':
cmd += ' --unbundled'
if build_config['splitted'] == 'splitted':
cmd += ' --split-binary'
if build_config['tidy'] == 'enable':
cmd += ' --clang-tidy'
cmd += ' --cache=ccache'
cmd += ' --ccache_dir={}'.format(ccache_path)
if 'alien_pkgs' in build_config and build_config['alien_pkgs']:
cmd += ' --alien-pkgs'
cmd += ' --docker-image-version={}'.format(image_version)
cmd += ' --version={}'.format(build_version)
if _can_export_binaries(build_config):
cmd += ' --with-binaries=tests'
return cmd
def get_image_name(build_config):
if build_config['bundled'] != 'bundled':
return 'clickhouse/unbundled-builder'
elif build_config['package-type'] != 'deb':
return 'clickhouse/binary-builder'
else:
return 'clickhouse/deb-builder'
def build_clickhouse(packager_cmd, logs_path):
build_log_path = os.path.join(logs_path, 'build_log.log')
with open(build_log_path, 'w') as log_file:
retcode = subprocess.Popen(packager_cmd, shell=True, stderr=log_file, stdout=log_file).wait()
if retcode == 0:
logging.info("Built successfully")
else:
logging.info("Build failed")
return build_log_path, retcode == 0
def build_config_to_string(build_config):
if build_config["package-type"] == "performance":
return "performance"
return "_".join([
build_config['compiler'],
build_config['build-type'] if build_config['build-type'] else "relwithdebuginfo",
build_config['sanitizer'] if build_config['sanitizer'] else "none",
build_config['bundled'],
build_config['splitted'],
"tidy" if build_config['tidy'] == "enable" else "notidy",
"with_coverage" if build_config['with_coverage'] else "without_coverage",
build_config['package-type'],
])
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
repo_path = os.getenv("REPO_COPY", os.path.abspath("../../"))
temp_path = os.getenv("TEMP_PATH", os.path.abspath("."))
caches_path = os.getenv("CACHES_PATH", temp_path)
build_check_name = sys.argv[1]
build_number = int(sys.argv[2])
build_config = get_build_config(build_check_name, build_number, repo_path)
if not os.path.exists(temp_path):
os.makedirs(temp_path)
with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file:
event = json.load(event_file)
pr_info = PRInfo(event)
logging.info("Repo copy path %s", repo_path)
gh = Github(get_best_robot_token())
images_path = os.path.join(temp_path, 'changed_images.json')
image_name = get_image_name(build_config)
image_version = 'latest'
if os.path.exists(images_path):
logging.info("Images file exists")
with open(images_path, 'r') as images_fd:
images = json.load(images_fd)
logging.info("Got images %s", images)
if image_name in images:
image_version = images[image_name]
for i in range(10):
try:
logging.info(f"Pulling image {image_name}:{image_version}")
subprocess.check_output(f"docker pull {image_name}:{image_version}", stderr=subprocess.STDOUT, shell=True)
break
except Exception as ex:
time.sleep(i * 3)
logging.info("Got execption pulling docker %s", ex)
else:
raise Exception(f"Cannot pull dockerhub for image docker pull {image_name}:{image_version}")
version = get_version_from_repo(repo_path)
version.tweak_update()
update_version_local(repo_path, pr_info.sha, version)
build_name = build_config_to_string(build_config)
logging.info(f"Build short name {build_name}")
subprocess.check_call(f"echo 'BUILD_NAME=build_urls_{build_name}' >> $GITHUB_ENV", shell=True)
build_output_path = os.path.join(temp_path, build_name)
if not os.path.exists(build_output_path):
os.makedirs(build_output_path)
ccache_path = os.path.join(caches_path, build_name + '_ccache')
if not os.path.exists(ccache_path):
os.makedirs(ccache_path)
packager_cmd = get_packager_cmd(build_config, os.path.join(repo_path, "docker/packager"), build_output_path, version.get_version_string(), image_version, ccache_path)
logging.info("Going to run packager with %s", packager_cmd)
build_clickhouse_log = os.path.join(temp_path, "build_log")
if not os.path.exists(build_clickhouse_log):
os.makedirs(build_clickhouse_log)
start = time.time()
log_path, success = build_clickhouse(packager_cmd, build_clickhouse_log)
elapsed = int(time.time() - start)
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {build_output_path}", shell=True)
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {ccache_path}", shell=True)
logging.info("Build finished with %s, log path %s", success, log_path)
s3_helper = S3Helper('https://s3.amazonaws.com')
s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + build_check_name.lower().replace(' ', '_') + "/" + build_name
if os.path.exists(log_path):
log_url = s3_helper.upload_build_file_to_s3(log_path, s3_path_prefix + "/" + os.path.basename(log_path))
logging.info("Log url %s", log_url)
else:
logging.info("Build log doesn't exist")
build_urls = s3_helper.upload_build_folder_to_s3(build_output_path, s3_path_prefix, keep_dirs_in_s3_path=False, upload_symlinks=False)
logging.info("Got build URLs %s", build_urls)
print("::notice ::Build URLs: {}".format('\n'.join(build_urls)))
result = {
"log_url": log_url,
"build_urls": build_urls,
"build_config": build_config,
"elapsed_seconds": elapsed,
"status": success,
}
print("::notice ::Log URL: {}".format(log_url))
with open(os.path.join(temp_path, "build_urls_" + build_name + '.json'), 'w') as build_links:
json.dump(result, build_links)

View File

@ -0,0 +1,164 @@
#!/usr/bin/env python3
import json
from github import Github
from report import create_build_html_report
from s3_helper import S3Helper
import logging
import os
from get_robot_token import get_best_robot_token
import sys
from pr_info import PRInfo
class BuildResult(object):
def __init__(self, compiler, build_type, sanitizer, bundled, splitted, status, elapsed_seconds, with_coverage):
self.compiler = compiler
self.build_type = build_type
self.sanitizer = sanitizer
self.bundled = bundled
self.splitted = splitted
self.status = status
self.elapsed_seconds = elapsed_seconds
self.with_coverage = with_coverage
def group_by_artifacts(build_urls):
groups = {'deb': [], 'binary': [], 'tgz': [], 'rpm': [], 'preformance': []}
for url in build_urls:
if url.endswith('performance.tgz'):
groups['performance'].append(url)
elif url.endswith('.deb') or url.endswith('.buildinfo') or url.endswith('.changes') or url.endswith('.tar.gz'):
groups['deb'].append(url)
elif url.endswith('.rpm'):
groups['rpm'].append(url)
elif url.endswith('.tgz'):
groups['tgz'].append(url)
else:
groups['binary'].append(url)
return groups
def get_commit(gh, commit_sha):
repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse"))
commit = repo.get_commit(commit_sha)
return commit
def process_report(build_report):
build_config = build_report['build_config']
build_result = BuildResult(
compiler=build_config['compiler'],
build_type=build_config['build-type'],
sanitizer=build_config['sanitizer'],
bundled=build_config['bundled'],
splitted=build_config['splitted'],
status="success" if build_report['status'] else "failure",
elapsed_seconds=build_report['elapsed_seconds'],
with_coverage=False
)
build_results = []
build_urls = []
build_logs_urls = []
urls_groups = group_by_artifacts(build_report['build_urls'])
found_group = False
for _, group_urls in urls_groups.items():
if group_urls:
build_results.append(build_result)
build_urls.append(group_urls)
build_logs_urls.append(build_report['log_url'])
found_group = True
if not found_group:
build_results.append(build_result)
build_urls.append([""])
build_logs_urls.append(build_report['log_url'])
return build_results, build_urls, build_logs_urls
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
reports_path = os.getenv("REPORTS_PATH", "./reports")
temp_path = os.path.join(os.getenv("TEMP_PATH", "."))
logging.info("Reports path %s", reports_path)
if not os.path.exists(temp_path):
os.makedirs(temp_path)
build_check_name = sys.argv[1]
build_reports = []
for root, dirs, files in os.walk(reports_path):
print(files)
for f in files:
if f.startswith("build_urls_") and f.endswith('.json'):
logging.info("Found build report json %s", f)
with open(os.path.join(root, f), 'r') as file_handler:
build_report = json.load(file_handler)
build_reports.append(build_report)
build_results = []
build_artifacts = []
build_logs = []
for build_report in build_reports:
build_result, build_artifacts_url, build_logs_url = process_report(build_report)
logging.info("Got %s result for report", len(build_result))
build_results += build_result
build_artifacts += build_artifacts_url
build_logs += build_logs_url
logging.info("Totally got %s results", len(build_results))
gh = Github(get_best_robot_token())
s3_helper = S3Helper('https://s3.amazonaws.com')
with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file:
event = json.load(event_file)
pr_info = PRInfo(event)
branch_url = "https://github.com/ClickHouse/ClickHouse/commits/master"
branch_name = "master"
if pr_info.number != 0:
branch_name = "PR #{}".format(pr_info.number)
branch_url = "https://github.com/ClickHouse/ClickHouse/pull/" + str(pr_info.number)
commit_url = f"https://github.com/ClickHouse/ClickHouse/commit/{pr_info.sha}"
task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID', 0)}"
report = create_build_html_report(
build_check_name,
build_results,
build_logs,
build_artifacts,
task_url,
branch_url,
branch_name,
commit_url
)
report_path = os.path.join(temp_path, 'report.html')
with open(report_path, 'w') as f:
f.write(report)
logging.info("Going to upload prepared report")
context_name_for_path = build_check_name.lower().replace(' ', '_')
s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + context_name_for_path
url = s3_helper.upload_build_file_to_s3(report_path, s3_path_prefix + "/report.html")
logging.info("Report url %s", url)
total_builds = len(build_results)
ok_builds = 0
summary_status = "success"
for build_result in build_results:
if build_result.status == "failure" and summary_status != "error":
summary_status = "failure"
if build_result.status == "error" or not build_result.status:
summary_status = "error"
if build_result.status == "success":
ok_builds += 1
description = "{}/{} builds are OK".format(ok_builds, total_builds)
print("::notice ::Report url: {}".format(url))
commit = get_commit(gh, pr_info.sha)
commit.create_status(context=build_check_name, description=description, state=summary_status, target_url=url)

195
tests/ci/fast_test_check.py Normal file
View File

@ -0,0 +1,195 @@
#!/usr/bin/env python3
import logging
import subprocess
import os
import json
import time
from pr_info import PRInfo
from report import create_test_html_report
from s3_helper import S3Helper
from github import Github
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
import csv
NAME = 'Fast test (actions)'
def get_fasttest_cmd(workspace, output_path, ccache_path, repo_path, pr_number, commit_sha, image):
return f"docker run --cap-add=SYS_PTRACE " \
f"-e FASTTEST_WORKSPACE=/fasttest-workspace -e FASTTEST_OUTPUT=/test_output " \
f"-e FASTTEST_SOURCE=/ClickHouse --cap-add=SYS_PTRACE " \
f"-e PULL_REQUEST_NUMBER={pr_number} -e COMMIT_SHA={commit_sha} -e COPY_CLICKHOUSE_BINARY_TO_OUTPUT=1 " \
f"--volume={workspace}:/fasttest-workspace --volume={repo_path}:/ClickHouse --volume={output_path}:/test_output "\
f"--volume={ccache_path}:/fasttest-workspace/ccache {image}"
def process_results(result_folder):
test_results = []
additional_files = []
# Just upload all files from result_folder.
# If task provides processed results, then it's responsible for content of result_folder.
if os.path.exists(result_folder):
test_files = [f for f in os.listdir(result_folder) if os.path.isfile(os.path.join(result_folder, f))]
additional_files = [os.path.join(result_folder, f) for f in test_files]
status_path = os.path.join(result_folder, "check_status.tsv")
logging.info("Found test_results.tsv")
status = list(csv.reader(open(status_path, 'r'), delimiter='\t'))
if len(status) != 1 or len(status[0]) != 2:
return "error", "Invalid check_status.tsv", test_results, additional_files
state, description = status[0][0], status[0][1]
results_path = os.path.join(result_folder, "test_results.tsv")
test_results = list(csv.reader(open(results_path, 'r'), delimiter='\t'))
if len(test_results) == 0:
raise Exception("Empty results")
return state, description, test_results, additional_files
def process_logs(s3_client, additional_logs, s3_path_prefix):
additional_urls = []
for log_path in additional_logs:
if log_path:
additional_urls.append(
s3_client.upload_test_report_to_s3(
log_path,
s3_path_prefix + "/" + os.path.basename(log_path)))
return additional_urls
def upload_results(s3_client, pr_number, commit_sha, test_results, raw_log, additional_files):
additional_files = [raw_log] + additional_files
s3_path_prefix = f"{pr_number}/{commit_sha}/fasttest"
additional_urls = process_logs(s3_client, additional_files, s3_path_prefix)
branch_url = "https://github.com/ClickHouse/ClickHouse/commits/master"
branch_name = "master"
if pr_number != 0:
branch_name = "PR #{}".format(pr_number)
branch_url = "https://github.com/ClickHouse/ClickHouse/pull/" + str(pr_number)
commit_url = f"https://github.com/ClickHouse/ClickHouse/commit/{commit_sha}"
task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}"
raw_log_url = additional_urls[0]
additional_urls.pop(0)
html_report = create_test_html_report(NAME, test_results, raw_log_url, task_url, branch_url, branch_name, commit_url, additional_urls, True)
with open('report.html', 'w') as f:
f.write(html_report)
url = s3_client.upload_test_report_to_s3('report.html', s3_path_prefix + ".html")
logging.info("Search result in url %s", url)
return url
def get_commit(gh, commit_sha):
repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse"))
commit = repo.get_commit(commit_sha)
return commit
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
temp_path = os.getenv("TEMP_PATH", os.path.abspath("."))
caches_path = os.getenv("CACHES_PATH", temp_path)
if not os.path.exists(temp_path):
os.makedirs(temp_path)
with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file:
event = json.load(event_file)
pr_info = PRInfo(event)
gh = Github(get_best_robot_token())
images_path = os.path.join(temp_path, 'changed_images.json')
docker_image = 'clickhouse/fasttest'
if os.path.exists(images_path):
logging.info("Images file exists")
with open(images_path, 'r') as images_fd:
images = json.load(images_fd)
logging.info("Got images %s", images)
if 'clickhouse/fasttest' in images:
docker_image += ':' + images['clickhouse/pvs-test']
logging.info("Got docker image %s", docker_image)
for i in range(10):
try:
subprocess.check_output(f"docker pull {docker_image}", shell=True)
break
except Exception as ex:
time.sleep(i * 3)
logging.info("Got execption pulling docker %s", ex)
else:
raise Exception(f"Cannot pull dockerhub for image {docker_image}")
s3_helper = S3Helper('https://s3.amazonaws.com')
workspace = os.path.join(temp_path, "fasttest-workspace")
if not os.path.exists(workspace):
os.makedirs(workspace)
output_path = os.path.join(temp_path, "fasttest-output")
if not os.path.exists(output_path):
os.makedirs(output_path)
cache_path = os.path.join(caches_path, "fasttest")
if not os.path.exists(cache_path):
os.makedirs(cache_path)
repo_path = os.path.join(temp_path, "fasttest-repo")
if not os.path.exists(repo_path):
os.makedirs(repo_path)
run_cmd = get_fasttest_cmd(workspace, output_path, cache_path, repo_path, pr_info.number, pr_info.sha, docker_image)
logging.info("Going to run fasttest with cmd %s", run_cmd)
logs_path = os.path.join(temp_path, "fasttest-logs")
if not os.path.exists(logs_path):
os.makedirs(logs_path)
run_log_path = os.path.join(logs_path, 'runlog.log')
with open(run_log_path, 'w') as log:
retcode = subprocess.Popen(run_cmd, shell=True, stderr=log, stdout=log).wait()
if retcode == 0:
logging.info("Run successfully")
else:
logging.info("Run failed")
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {cache_path}", shell=True)
test_output_files = os.listdir(output_path)
additional_logs = []
for f in test_output_files:
additional_logs.append(os.path.join(output_path, f))
test_log_exists = 'test_log.txt' in test_output_files or 'test_result.txt' in test_output_files
test_result_exists = 'test_results.tsv' in test_output_files
test_results = []
if 'submodule_log.txt' not in test_output_files:
description = "Cannot clone repository"
state = "failure"
elif 'cmake_log.txt' not in test_output_files:
description = "Cannot fetch submodules"
state = "failure"
elif 'build_log.txt' not in test_output_files:
description = "Cannot finish cmake"
state = "failure"
elif 'install_log.txt' not in test_output_files:
description = "Cannot build ClickHouse"
state = "failure"
elif not test_log_exists and not test_result_exists:
description = "Cannot install or start ClickHouse"
state = "failure"
else:
state, description, test_results, additional_logs = process_results(output_path)
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, run_log_path, additional_logs)
print("::notice ::Report url: {}".format(report_url))
commit = get_commit(gh, pr_info.sha)
commit.create_status(context=NAME, description=description, state=state, target_url=report_url)

View File

@ -67,6 +67,24 @@ def list_runners(access_token):
result.append(desc)
return result
def group_runners_by_tag(listed_runners):
result = {}
RUNNER_TYPE_LABELS = ['style-checker', 'builder']
for runner in listed_runners:
for tag in runner.tags:
if tag in RUNNER_TYPE_LABELS:
if tag not in result:
result[tag] = []
result[tag].append(runner)
break
else:
if 'unlabeled' not in result:
result['unlabeled'] = []
result['unlabeled'].append(runner)
return result
def push_metrics_to_cloudwatch(listed_runners, namespace):
import boto3
client = boto3.client('cloudwatch')
@ -100,7 +118,7 @@ def push_metrics_to_cloudwatch(listed_runners, namespace):
'Unit': 'Percent',
})
client.put_metric_data(Namespace='RunnersMetrics', MetricData=metrics_data)
client.put_metric_data(Namespace=namespace, MetricData=metrics_data)
def main(github_secret_key, github_app_id, push_to_cloudwatch):
payload = {
@ -113,10 +131,12 @@ def main(github_secret_key, github_app_id, push_to_cloudwatch):
installation_id = get_installation_id(encoded_jwt)
access_token = get_access_token(encoded_jwt, installation_id)
runners = list_runners(access_token)
if push_to_cloudwatch:
push_metrics_to_cloudwatch(runners, 'RunnersMetrics')
else:
print(runners)
grouped_runners = group_runners_by_tag(runners)
for group, group_runners in grouped_runners.items():
if push_to_cloudwatch:
push_metrics_to_cloudwatch(group_runners, 'RunnersMetrics/' + group)
else:
print(group, group_runners)
if __name__ == "__main__":

View File

@ -39,3 +39,9 @@ class PRInfo:
'user_login': self.user_login,
'user_orgs': self.user_orgs,
}
class FakePRInfo:
def __init__(self):
self.number = 11111
self.sha = "xxxxxxxxxxxxxxxxxx"

View File

@ -9,7 +9,7 @@ from s3_helper import S3Helper
from pr_info import PRInfo
import shutil
import sys
from get_robot_token import get_best_robot_token
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
NAME = 'PVS Studio (actions)'
LICENCE_NAME = 'Free license: ClickHouse, Yandex'
@ -97,7 +97,7 @@ if __name__ == "__main__":
s3_helper = S3Helper('https://s3.amazonaws.com')
licence_key = os.getenv('PVS_STUDIO_KEY')
licence_key = get_parameter_from_ssm('pvs_studio_key')
cmd = f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --volume={repo_path}:/repo_folder --volume={temp_path}:/test_output -e LICENCE_NAME='{LICENCE_NAME}' -e LICENCE_KEY='{licence_key}' {docker_image}"
commit = get_commit(gh, pr_info.sha)

View File

@ -32,6 +32,9 @@ table {{ border: 0; }}
.main {{ margin-left: 10%; }}
p.links a {{ padding: 5px; margin: 3px; background: #FFF; line-height: 2; white-space: nowrap; box-shadow: 0 0 0 1px rgba(0, 0, 0, 0.05), 0 8px 25px -5px rgba(0, 0, 0, 0.1); }}
th {{ cursor: pointer; }}
.failed {{ cursor: pointer; }}
.failed-content.open {{}}
.failed-content {{ display: none; }}
</style>
<title>{title}</title>
@ -51,7 +54,13 @@ th {{ cursor: pointer; }}
<script type="text/javascript">
/// Straight from https://stackoverflow.com/questions/14267781/sorting-html-table-with-javascript
const getCellValue = (tr, idx) => tr.children[idx].innerText || tr.children[idx].textContent;
const getCellValue = (tr, idx) => {{
var classes = tr.classList;
var elem = tr;
if (classes.contains("failed-content") || classes.contains("failed-content.open"))
elem = tr.previousElementSibling;
return elem.children[idx].innerText || elem.children[idx].textContent;
}}
const comparer = (idx, asc) => (a, b) => ((v1, v2) =>
v1 !== '' && v2 !== '' && !isNaN(v1) && !isNaN(v2) ? v1 - v2 : v1.toString().localeCompare(v2)
@ -64,6 +73,12 @@ th {{ cursor: pointer; }}
.sort(comparer(Array.from(th.parentNode.children).indexOf(th), this.asc = !this.asc))
.forEach(tr => table.appendChild(tr) );
}})));
Array.from(document.getElementsByClassName("failed")).forEach(tr => tr.addEventListener('click', function() {{
var content = this.nextElementSibling;
content.classList.toggle("failed-content.open");
content.classList.toggle("failed-content");
}}));
</script>
</html>
"""
@ -107,13 +122,13 @@ def _get_status_style(status):
def _get_html_url(url):
if isinstance(url, str):
return '<a href="{url}">{name}</a>'.format(url=url, name=os.path.basename(url))
return '<a href="{url}">{name}</a>'.format(url=url, name=os.path.basename(url).replace('%2B', '+').replace('%20', ' '))
if isinstance(url, tuple):
return '<a href="{url}">{name}</a>'.format(url=url[0], name=url[1])
return '<a href="{url}">{name}</a>'.format(url=url[0], name=url[1].replace('%2B', '+').replace('%20', ' '))
return ''
def create_test_html_report(header, test_result, raw_log_url, task_url, branch_url, branch_name, commit_url, additional_urls=[]):
def create_test_html_report(header, test_result, raw_log_url, task_url, branch_url, branch_name, commit_url, additional_urls=[], with_raw_logs=False):
if test_result:
rows_part = ""
num_fails = 0
@ -134,11 +149,13 @@ def create_test_html_report(header, test_result, raw_log_url, task_url, branch_u
has_test_logs = True
row = "<tr>"
is_fail = test_status == "FAIL" or test_status == 'FLAKY'
if is_fail and with_raw_logs and test_logs is not None:
row = "<tr class=\"failed\">"
row += "<td>" + test_name + "</td>"
style = _get_status_style(test_status)
# Allow to quickly scroll to the first failure.
is_fail = test_status == "FAIL" or test_status == 'FLAKY'
is_fail_id = ""
if is_fail:
num_fails = num_fails + 1
@ -149,17 +166,23 @@ def create_test_html_report(header, test_result, raw_log_url, task_url, branch_u
if test_time is not None:
row += "<td>" + test_time + "</td>"
if test_logs is not None:
if test_logs is not None and not with_raw_logs:
test_logs_html = "<br>".join([_get_html_url(url) for url in test_logs])
row += "<td>" + test_logs_html + "</td>"
row += "</tr>"
rows_part += row
if test_logs is not None and with_raw_logs:
row = "<tr class=\"failed-content\">"
# TODO: compute colspan too
row += "<td colspan=\"3\"><pre>" + test_logs + "</pre></td>"
row += "</tr>"
rows_part += row
headers = BASE_HEADERS
if has_test_time:
headers.append('Test time, sec.')
if has_test_logs:
if has_test_logs and not with_raw_logs:
headers.append('Logs')
headers = ''.join(['<th>' + h + '</th>' for h in headers])
@ -235,7 +258,7 @@ tr:hover td {{filter: brightness(95%);}}
</table>
<p class="links">
<a href="{commit_url}">Commit</a>
<a href="{task_url}">Task (private network)</a>
<a href="{task_url}">Task (github actions)</a>
</p>
</body>
</html>
@ -281,7 +304,7 @@ def create_build_html_report(header, build_results, build_logs_urls, artifact_ur
link_separator = "<br/>"
if artifact_urls:
for artifact_url in artifact_urls:
links += LINK_TEMPLATE.format(text=os.path.basename(artifact_url), url=artifact_url)
links += LINK_TEMPLATE.format(text=os.path.basename(artifact_url.replace('%2B', '+').replace('%20', ' ')), url=artifact_url)
links += link_separator
if links:
links = links[:-len(link_separator)]

View File

@ -122,5 +122,9 @@ if __name__ == "__main__":
commit.create_status(context=NAME, description=description, state="failure", target_url=url)
sys.exit(1)
else:
print("::notice ::Can run")
commit.create_status(context=NAME, description=description, state="pending", target_url=url)
if 'pr-documentation' in pr_info.labels or 'pr-doc-fix' in pr_info.labels:
commit.create_status(context=NAME, description="Skipping checks for documentation", state="success", target_url=url)
print("::notice ::Can run, but it's documentation PR, skipping")
else:
print("::notice ::Can run")
commit.create_status(context=NAME, description=description, state="pending", target_url=url)

View File

@ -56,7 +56,8 @@ class S3Helper(object):
self.client.upload_file(file_path, bucket_name, s3_path, ExtraArgs=metadata)
logging.info("Upload {} to {}. Meta: {}".format(file_path, s3_path, metadata))
return "https://s3.amazonaws.com/{bucket}/{path}".format(bucket=bucket_name, path=s3_path)
# last two replacements are specifics of AWS urls: https://jamesd3142.wordpress.com/2018/02/28/amazon-s3-and-the-plus-symbol/
return "https://s3.amazonaws.com/{bucket}/{path}".format(bucket=bucket_name, path=s3_path).replace('+', '%2B').replace(' ', '%20')
def upload_test_report_to_s3(self, file_path, s3_path):
return self._upload_file_to_s3('clickhouse-test-reports', file_path, s3_path)

View File

@ -85,15 +85,6 @@ def get_commit(gh, commit_sha):
commit = repo.get_commit(commit_sha)
return commit
def update_check_with_curl(check_id):
cmd_template = ("curl -v --request PATCH --url https://api.github.com/repos/ClickHouse/ClickHouse/check-runs/{} "
"--header 'authorization: Bearer {}' "
"--header 'Accept: application/vnd.github.v3+json' "
"--header 'content-type: application/json' "
"-d '{{\"name\" : \"hello-world-name\"}}'")
cmd = cmd_template.format(check_id, os.getenv("GITHUB_TOKEN"))
subprocess.check_call(cmd, shell=True)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
repo_path = os.path.join(os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")))

139
tests/ci/version_helper.py Normal file
View File

@ -0,0 +1,139 @@
#!/usr/bin/env python3
import os
import subprocess
import datetime
FILE_WITH_VERSION_PATH = "cmake/autogenerated_versions.txt"
CHANGELOG_IN_PATH = "debian/changelog.in"
CHANGELOG_PATH = "debian/changelog"
CONTRIBUTORS_SCRIPT_DIR = "src/Storages/System/"
class ClickHouseVersion(object):
def __init__(self, major, minor, patch, tweak, revision):
self.major = major
self.minor = minor
self.patch = patch
self.tweak = tweak
self.revision = revision
def minor_update(self):
return ClickHouseVersion(
self.major,
self.minor + 1,
1,
1,
self.revision + 1)
def patch_update(self):
return ClickHouseVersion(
self.major,
self.minor,
self.patch + 1,
1,
self.revision)
def tweak_update(self):
return ClickHouseVersion(
self.major,
self.minor,
self.patch,
self.tweak + 1,
self.revision)
def get_version_string(self):
return '.'.join([
str(self.major),
str(self.minor),
str(self.patch),
str(self.tweak)
])
def as_tuple(self):
return (self.major, self.minor, self.patch, self.tweak)
class VersionType(object):
STABLE = "stable"
TESTING = "testing"
def build_version_description(version, version_type):
return "v" + version.get_version_string() + "-" + version_type
def _get_version_from_line(line):
_, ver_with_bracket = line.strip().split(' ')
return ver_with_bracket[:-1]
def get_version_from_repo(repo_path):
path_to_file = os.path.join(repo_path, FILE_WITH_VERSION_PATH)
major = 0
minor = 0
patch = 0
tweak = 0
version_revision = 0
with open(path_to_file, 'r') as ver_file:
for line in ver_file:
if "VERSION_MAJOR" in line and "math" not in line and "SET" in line:
major = _get_version_from_line(line)
elif "VERSION_MINOR" in line and "math" not in line and "SET" in line:
minor = _get_version_from_line(line)
elif "VERSION_PATCH" in line and "math" not in line and "SET" in line:
patch = _get_version_from_line(line)
elif "VERSION_REVISION" in line and "math" not in line:
version_revision = _get_version_from_line(line)
return ClickHouseVersion(major, minor, patch, tweak, version_revision)
def _update_cmake_version(repo_path, version, sha, version_type):
cmd = """sed -i --follow-symlinks -e "s/SET(VERSION_REVISION [^) ]*/SET(VERSION_REVISION {revision}/g;" \
-e "s/SET(VERSION_DESCRIBE [^) ]*/SET(VERSION_DESCRIBE {version_desc}/g;" \
-e "s/SET(VERSION_GITHASH [^) ]*/SET(VERSION_GITHASH {sha}/g;" \
-e "s/SET(VERSION_MAJOR [^) ]*/SET(VERSION_MAJOR {major}/g;" \
-e "s/SET(VERSION_MINOR [^) ]*/SET(VERSION_MINOR {minor}/g;" \
-e "s/SET(VERSION_PATCH [^) ]*/SET(VERSION_PATCH {patch}/g;" \
-e "s/SET(VERSION_STRING [^) ]*/SET(VERSION_STRING {version_string}/g;" \
{path}""".format(
revision=version.revision,
version_desc=build_version_description(version, version_type),
sha=sha,
major=version.major,
minor=version.minor,
patch=version.patch,
version_string=version.get_version_string(),
path=os.path.join(repo_path, FILE_WITH_VERSION_PATH),
)
subprocess.check_call(cmd, shell=True)
def _update_changelog(repo_path, version):
cmd = """sed \
-e "s/[@]VERSION_STRING[@]/{version_str}/g" \
-e "s/[@]DATE[@]/{date}/g" \
-e "s/[@]AUTHOR[@]/clickhouse-release/g" \
-e "s/[@]EMAIL[@]/clickhouse-release@yandex-team.ru/g" \
< {in_path} > {changelog_path}
""".format(
version_str=version.get_version_string(),
date=datetime.datetime.now().strftime("%a, %d %b %Y %H:%M:%S") + " +0300",
in_path=os.path.join(repo_path, CHANGELOG_IN_PATH),
changelog_path=os.path.join(repo_path, CHANGELOG_PATH)
)
subprocess.check_call(cmd, shell=True)
def _update_contributors(repo_path):
cmd = "cd {} && ./StorageSystemContributors.sh".format(os.path.join(repo_path, CONTRIBUTORS_SCRIPT_DIR))
subprocess.check_call(cmd, shell=True)
def _update_dockerfile(repo_path, version):
version_str_for_docker = '.'.join([str(version.major), str(version.minor), str(version.patch), '*'])
cmd = "ls -1 {path}/docker/*/Dockerfile | xargs sed -i -r -e 's/ARG version=.+$/ARG version='{ver}'/'".format(path=repo_path, ver=version_str_for_docker)
subprocess.check_call(cmd, shell=True)
def update_version_local(repo_path, sha, version, version_type="testing"):
_update_contributors(repo_path)
_update_cmake_version(repo_path, version, sha, version_type)
_update_changelog(repo_path, version)
_update_dockerfile(repo_path, version)

View File

@ -0,0 +1,20 @@
#!/usr/bin/env bash
set -euo pipefail
echo "Running init script"
export DEBIAN_FRONTEND=noninteractive
export RUNNER_HOME=/home/ubuntu/actions-runner
echo "Receiving token"
export RUNNER_TOKEN=`/usr/local/bin/aws ssm get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value`
export RUNNER_URL="https://github.com/ClickHouse"
# Funny fact, but metadata service has fixed IP
export INSTANCE_ID=`curl -s http://169.254.169.254/latest/meta-data/instance-id`
cd $RUNNER_HOME
echo "Going to configure runner"
sudo -u ubuntu ./config.sh --url $RUNNER_URL --token $RUNNER_TOKEN --name $INSTANCE_ID --runnergroup Default --labels 'self-hosted,Linux,X64,builder' --work _work
echo "Run"
sudo -u ubuntu ./run.sh

View File

@ -14,7 +14,7 @@ export INSTANCE_ID=`curl -s http://169.254.169.254/latest/meta-data/instance-id`
cd $RUNNER_HOME
echo "Going to configure runner"
sudo -u ubuntu ./config.sh --url $RUNNER_URL --token $RUNNER_TOKEN --name $INSTANCE_ID --runnergroup Default --labels 'self-hosted,Linux,X64' --work _work
sudo -u ubuntu ./config.sh --url $RUNNER_URL --token $RUNNER_TOKEN --name $INSTANCE_ID --runnergroup Default --labels 'self-hosted,Linux,X64,style-checker' --work _work
echo "Run"
sudo -u ubuntu ./run.sh

View File

@ -27,6 +27,16 @@ apt-get install --yes --no-install-recommends docker-ce docker-ce-cli containerd
usermod -aG docker ubuntu
# enable ipv6 in containers (fixed-cidr-v6 is some random network mask)
cat <<EOT > /etc/docker/daemon.json
{
"ipv6": true,
"fixed-cidr-v6": "2001:db8:1::/64"
}
EOT
systemctl restart docker
pip install boto3 pygithub requests urllib3 unidiff
mkdir -p $RUNNER_HOME && cd $RUNNER_HOME

View File

@ -0,0 +1,61 @@
import pytest
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', with_zookeeper=True, image='yandex/clickhouse-server', tag='21.3', with_installed_binary=True)
node2 = cluster.add_instance('node2', with_zookeeper=True, image='yandex/clickhouse-server')
node3 = cluster.add_instance('node3', with_zookeeper=True, image='yandex/clickhouse-server')
@pytest.fixture(scope="module")
def start_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def test_two_level_merge(start_cluster):
for node in start_cluster.instances.values():
node.query(
"""
CREATE TABLE IF NOT EXISTS test_two_level_merge(date Date, zone UInt32, number UInt32)
ENGINE = MergeTree() PARTITION BY toUInt64(number / 1000) ORDER BY tuple();
INSERT INTO
test_two_level_merge
SELECT
toDate('2021-09-28') - number / 1000,
249081628,
number
FROM
numbers(15000);
"""
)
# covers only the keys64 method
for node in start_cluster.instances.values():
print(node.query(
"""
SELECT
throwIf(uniqExact(date) != count(), 'group by is borked')
FROM (
SELECT
date
FROM
remote('node{1,2}', default.test_two_level_merge)
WHERE
date BETWEEN toDate('2021-09-20') AND toDate('2021-09-28')
AND zone = 249081628
GROUP by date, zone
)
SETTINGS
group_by_two_level_threshold = 1,
group_by_two_level_threshold_bytes = 1,
max_threads = 2,
prefer_localhost_replica = 0
"""
))

View File

@ -103,6 +103,11 @@
FORMAT Null;
</query>
<query>
SELECT * FROM simple_key_hashed_array_dictionary
FORMAT Null;
</query>
<query>
WITH (rand64() % toUInt64({elements_count}), toString(rand64() % toUInt64({elements_count}))) as key
SELECT dictGet('default.complex_key_hashed_array_dictionary', {column_name}, key)
@ -119,6 +124,11 @@
FORMAT Null;
</query>
<query>
SELECT * FROM complex_key_hashed_array_dictionary
FORMAT Null;
</query>
<drop_query>DROP TABLE IF EXISTS simple_key_hashed_array_dictionary_source_table;</drop_query>
<drop_query>DROP TABLE IF EXISTS complex_key_hashed_array_dictionary_source_table;</drop_query>

View File

@ -103,6 +103,11 @@
FORMAT Null;
</query>
<query>
SELECT * FROM simple_key_hashed_dictionary
FORMAT Null;
</query>
<query>
WITH (rand64() % toUInt64({elements_count}), toString(rand64() % toUInt64({elements_count}))) as key
SELECT dictGet('default.complex_key_hashed_dictionary', {column_name}, key)
@ -119,6 +124,11 @@
FORMAT Null;
</query>
<query>
SELECT * FROM complex_key_hashed_dictionary
FORMAT Null;
</query>
<drop_query>DROP TABLE IF EXISTS simple_key_hashed_dictionary_source_table;</drop_query>
<drop_query>DROP TABLE IF EXISTS complex_key_hashed_dictionary_source_table;</drop_query>

View File

@ -0,0 +1,3 @@
2020-01-01 00:05:00
2020-01-01 00:05:00
2020-01-01 00:06:00 hello

View File

@ -0,0 +1,25 @@
DROP TABLE IF EXISTS buf_dest;
DROP TABLE IF EXISTS buf;
CREATE TABLE buf_dest (timestamp DateTime)
ENGINE = MergeTree PARTITION BY toYYYYMMDD(timestamp)
ORDER BY (timestamp);
CREATE TABLE buf (timestamp DateTime) Engine = Buffer(currentDatabase(), buf_dest, 16, 0.1, 0.1, 2000000, 20000000, 100000000, 300000000);;
INSERT INTO buf (timestamp) VALUES (toDateTime('2020-01-01 00:05:00'));
--- wait for buffer to flush
SELECT sleep(1) from numbers(1) settings max_block_size=1 format Null;
ALTER TABLE buf_dest ADD COLUMN s String;
ALTER TABLE buf ADD COLUMN s String;
SELECT * FROM buf;
INSERT INTO buf (timestamp, s) VALUES (toDateTime('2020-01-01 00:06:00'), 'hello');
SELECT * FROM buf ORDER BY timestamp;
DROP TABLE IF EXISTS buf;
DROP TABLE IF EXISTS buf_dest;

View File

@ -12,7 +12,7 @@ echo "Starting clickhouse-server"
$PORT
$CLICKHOUSE_BINARY-server -- --tcp_port "$CLICKHOUSE_PORT_TCP" > server.log 2>&1 &
$CLICKHOUSE_BINARY server -- --tcp_port "$CLICKHOUSE_PORT_TCP" --path /tmp/ > server.log 2>&1 &
PID=$!
function finish {

View File

@ -13,12 +13,25 @@ import urllib.request
import subprocess
import lzma
def get_local_port(host):
with socket.socket() as fd:
def is_ipv6(host):
try:
socket.inet_aton(host)
return False
except:
return True
def get_local_port(host, ipv6):
if ipv6:
family = socket.AF_INET6
else:
family = socket.AF_INET
with socket.socket(family) as fd:
fd.bind((host, 0))
return fd.getsockname()[1]
CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1')
CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', 'localhost')
CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123')
#####################################################################################
@ -30,11 +43,15 @@ CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123')
# IP-address of this host accessible from the outside world. Get the first one
HTTP_SERVER_HOST = subprocess.check_output(['hostname', '-i']).decode('utf-8').strip().split()[0]
HTTP_SERVER_PORT = get_local_port(HTTP_SERVER_HOST)
IS_IPV6 = is_ipv6(HTTP_SERVER_HOST)
HTTP_SERVER_PORT = get_local_port(HTTP_SERVER_HOST, IS_IPV6)
# IP address and port of the HTTP server started from this script.
HTTP_SERVER_ADDRESS = (HTTP_SERVER_HOST, HTTP_SERVER_PORT)
HTTP_SERVER_URL_STR = 'http://' + ':'.join(str(s) for s in HTTP_SERVER_ADDRESS) + "/"
if IS_IPV6:
HTTP_SERVER_URL_STR = 'http://' + f'[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}' + "/"
else:
HTTP_SERVER_URL_STR = 'http://' + f'{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}' + "/"
# Because we need to check the content of file.csv we can create this content and avoid reading csv
CSV_DATA = "Hello, 1\nWorld, 2\nThis, 152\nis, 9283\ntesting, 2313213\ndata, 555\n"
@ -48,6 +65,10 @@ ENDINGS = ['.gz', '.xz']
SEND_ENCODING = True
def get_ch_answer(query):
host = CLICKHOUSE_HOST
if IS_IPV6:
host = f'[{host}]'
url = os.environ.get('CLICKHOUSE_URL', 'http://{host}:{port}'.format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP))
return urllib.request.urlopen(url, data=query.encode()).read().decode()
@ -98,8 +119,14 @@ class HttpProcessor(SimpleHTTPRequestHandler):
def log_message(self, format, *args):
return
class HTTPServerV6(HTTPServer):
address_family = socket.AF_INET6
def start_server(requests_amount):
httpd = HTTPServer(HTTP_SERVER_ADDRESS, HttpProcessor)
if IS_IPV6:
httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, HttpProcessor)
else:
httpd = HTTPServer(HTTP_SERVER_ADDRESS, HttpProcessor)
def real_func():
for i in range(requests_amount):
@ -127,7 +154,7 @@ def test_select(dict_name="", schema="word String, counter UInt32", requests=[],
PRIMARY KEY word
SOURCE(HTTP(url '{}' format 'CSV'))
LAYOUT(complex_key_hashed())
LIFETIME(0)'''.format(dict_name, schema, HTTP_SERVER_URL_STR+'/test.csv' + ADDING_ENDING))
LIFETIME(0)'''.format(dict_name, schema, HTTP_SERVER_URL_STR + '/test.csv' + ADDING_ENDING))
COMPRESS_METHOD = requests[i]
print(i, COMPRESS_METHOD, ADDING_ENDING, SEND_ENCODING)

View File

@ -0,0 +1,6 @@
1 a
2 b
3 c
4 d
5 e
6 f

View File

@ -0,0 +1,20 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1"
${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts"
${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_inserts (id UInt32, s String) ENGINE = Memory"
${CLICKHOUSE_CURL} -sS $url -d "INSERT INTO async_inserts VALUES (1, 'a') (2, 'b')" &
${CLICKHOUSE_CURL} -sS $url -d "INSERT INTO async_inserts VALUES (3, 'c'), (4, 'd')" &
${CLICKHOUSE_CURL} -sS $url -d "INSERT INTO async_inserts VALUES (5, 'e'), (6, 'f'), " &
wait
${CLICKHOUSE_CLIENT} -q "SELECT * FROM async_inserts ORDER BY id"
${CLICKHOUSE_CLIENT} -q "DROP TABLE async_inserts"

View File

@ -0,0 +1,5 @@
drop table if exists data_02021;
create table data_02021 (key Int) engine=MergeTree() order by key;
insert into data_02021 values (1);
select count() from data_02021 prewhere 1 or ignore(key) where ignore(key)=0;
drop table data_02021;

View File

@ -0,0 +1,3 @@
2
1
0

View File

@ -0,0 +1,8 @@
SELECT x
FROM
(
SELECT if((number % NULL) = -2147483648, NULL, if(toInt64(toInt64(now64(if((number % NULL) = -2147483648, NULL, if(toInt64(now64(toInt64(9223372036854775807, now64(plus(NULL, NULL))), plus(NULL, NULL))) = (number % NULL), nan, toFloat64(number))), toInt64(9223372036854775807, toInt64(9223372036854775807, now64(plus(NULL, NULL))), now64(plus(NULL, NULL))), plus(NULL, NULL))), now64(toInt64(9223372036854775807, toInt64(0, now64(plus(NULL, NULL))), now64(plus(NULL, NULL))), plus(NULL, NULL))) = (number % NULL), nan, toFloat64(number))) AS x
FROM system.numbers
LIMIT 3
)
ORDER BY x DESC NULLS LAST

View File

@ -0,0 +1,22 @@
<clickhouse>
<profiles>
<default></default>
</profiles>
<users>
<default>
<password></password>
<networks>
<ip>::/0</ip>
</networks>
<profile>default</profile>
<quota>default</quota>
</default>
</users>
<quotas>
<default></default>
</quotas>
<top_level_domains_lists>
<public_suffix_list>02110_clickhouse_local_custom_tld.tld.dat</public_suffix_list>
</top_level_domains_lists>
</clickhouse>

View File

@ -0,0 +1 @@
biz.ss kernel.biz.ss

View File

@ -0,0 +1,21 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
function clickhouse_local()
{
local opts=(
--config "$CURDIR/$(basename "${BASH_SOURCE[0]}" .sh).config.xml"
--top_level_domains_path "$CURDIR"
)
$CLICKHOUSE_LOCAL "${opts[@]}" "$@"
}
# -- biz.ss is not in the default TLD list, hence:
clickhouse_local -q "
select
cutToFirstSignificantSubdomain('foo.kernel.biz.ss'),
cutToFirstSignificantSubdomainCustom('foo.kernel.biz.ss', 'public_suffix_list')
" |& grep -v -e 'Processing configuration file'

View File

@ -0,0 +1 @@
biz.ss