mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 09:32:06 +00:00
Merge remote-tracking branch 'upstream/master' into fix-valid-until
This commit is contained in:
commit
13d4b7bd3e
@ -13,3 +13,6 @@
|
||||
# dbms/ → src/
|
||||
# (though it is unlikely that you will see it in blame)
|
||||
06446b4f08a142d6f1bc30664c47ded88ab51782
|
||||
|
||||
# Applied Black formatter for Python code
|
||||
e6f5a3f98b21ba99cf274a9833797889e020a2b3
|
||||
|
1
.github/actionlint.yml
vendored
1
.github/actionlint.yml
vendored
@ -7,3 +7,4 @@ self-hosted-runner:
|
||||
- stress-tester
|
||||
- style-checker
|
||||
- style-checker-aarch64
|
||||
- release-maker
|
||||
|
151
.github/workflows/create_release.yml
vendored
151
.github/workflows/create_release.yml
vendored
@ -6,8 +6,8 @@ concurrency:
|
||||
'on':
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
sha:
|
||||
description: 'The SHA hash of the commit from which to create the release'
|
||||
ref:
|
||||
description: 'Git reference (branch or commit sha) from which to create the release'
|
||||
required: true
|
||||
type: string
|
||||
type:
|
||||
@ -15,15 +15,152 @@ concurrency:
|
||||
required: true
|
||||
type: choice
|
||||
options:
|
||||
- new
|
||||
- patch
|
||||
- new
|
||||
dry-run:
|
||||
description: 'Dry run'
|
||||
required: false
|
||||
default: true
|
||||
type: boolean
|
||||
|
||||
jobs:
|
||||
Release:
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
CreateRelease:
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }}
|
||||
runs-on: [self-hosted, release-maker]
|
||||
steps:
|
||||
- name: DebugInfo
|
||||
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
|
||||
- name: Set envs
|
||||
# https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
|
||||
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
|
||||
RCSK
|
||||
RELEASE_INFO_FILE=${{ runner.temp }}/release_info.json
|
||||
EOF
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
- name: Print greeting
|
||||
with:
|
||||
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
|
||||
fetch-depth: 0
|
||||
- name: Prepare Release Info
|
||||
run: |
|
||||
python3 ./tests/ci/release.py --commit ${{ inputs.sha }} --type ${{ inputs.type }} --dry-run
|
||||
python3 ./tests/ci/create_release.py --prepare-release-info \
|
||||
--ref ${{ inputs.ref }} --release-type ${{ inputs.type }} \
|
||||
--outfile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
echo "::group::Release Info"
|
||||
python3 -m json.tool "$RELEASE_INFO_FILE"
|
||||
echo "::endgroup::"
|
||||
release_tag=$(jq -r '.release_tag' "$RELEASE_INFO_FILE")
|
||||
commit_sha=$(jq -r '.commit_sha' "$RELEASE_INFO_FILE")
|
||||
echo "Release Tag: $release_tag"
|
||||
echo "RELEASE_TAG=$release_tag" >> "$GITHUB_ENV"
|
||||
echo "COMMIT_SHA=$commit_sha" >> "$GITHUB_ENV"
|
||||
- name: Download All Release Artifacts
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
run: |
|
||||
python3 ./tests/ci/create_release.py --infile "$RELEASE_INFO_FILE" --download-packages ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
- name: Push Git Tag for the Release
|
||||
run: |
|
||||
python3 ./tests/ci/create_release.py --push-release-tag --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
- name: Push New Release Branch
|
||||
if: ${{ inputs.type == 'new' }}
|
||||
run: |
|
||||
python3 ./tests/ci/create_release.py --push-new-release-branch --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
- name: Bump CH Version and Update Contributors' List
|
||||
run: |
|
||||
python3 ./tests/ci/create_release.py --create-bump-version-pr --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
- name: Checkout master
|
||||
run: |
|
||||
git checkout master
|
||||
- name: Bump Docker versions, Changelog, Security
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
run: |
|
||||
[ "$(git branch --show-current)" != "master" ] && echo "not on the master" && exit 1
|
||||
echo "List versions"
|
||||
./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv
|
||||
echo "Update docker version"
|
||||
./utils/list-versions/update-docker-version.sh
|
||||
echo "Generate ChangeLog"
|
||||
export CI=1
|
||||
docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 -e CI=1 --network=host \
|
||||
--volume=".:/ClickHouse" clickhouse/style-test \
|
||||
/ClickHouse/tests/ci/changelog.py -v --debug-helpers \
|
||||
--gh-user-or-token="$GH_TOKEN" --jobs=5 \
|
||||
--output="/ClickHouse/docs/changelogs/${{ env.RELEASE_TAG }}.md" ${{ env.RELEASE_TAG }}
|
||||
git add ./docs/changelogs/${{ env.RELEASE_TAG }}.md
|
||||
echo "Generate Security"
|
||||
python3 ./utils/security-generator/generate_security.py > SECURITY.md
|
||||
git diff HEAD
|
||||
- name: Generate ChangeLog
|
||||
if: ${{ inputs.type == 'patch' && ! inputs.dry-run }}
|
||||
uses: peter-evans/create-pull-request@v6
|
||||
with:
|
||||
author: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
|
||||
token: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }}
|
||||
committer: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
|
||||
commit-message: Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }}
|
||||
branch: auto/${{ env.RELEASE_TAG }}
|
||||
assignees: ${{ github.event.sender.login }} # assign the PR to the tag pusher
|
||||
delete-branch: true
|
||||
title: Update version_date.tsv and changelog after ${{ env.RELEASE_TAG }}
|
||||
labels: do not test
|
||||
body: |
|
||||
Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }}
|
||||
### Changelog category (leave one):
|
||||
- Not for changelog (changelog entry is not required)
|
||||
- name: Reset changes if Dry-run
|
||||
if: ${{ inputs.dry-run }}
|
||||
run: |
|
||||
git reset --hard HEAD
|
||||
- name: Checkout back to GITHUB_REF
|
||||
run: |
|
||||
git checkout "$GITHUB_REF_NAME"
|
||||
- name: Create GH Release
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
run: |
|
||||
python3 ./tests/ci/create_release.py --create-gh-release \
|
||||
--infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
|
||||
- name: Export TGZ Packages
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
run: |
|
||||
python3 ./tests/ci/artifactory.py --export-tgz --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
- name: Test TGZ Packages
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
run: |
|
||||
python3 ./tests/ci/artifactory.py --test-tgz --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
- name: Export RPM Packages
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
run: |
|
||||
python3 ./tests/ci/artifactory.py --export-rpm --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
- name: Test RPM Packages
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
run: |
|
||||
python3 ./tests/ci/artifactory.py --test-rpm --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
- name: Export Debian Packages
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
run: |
|
||||
python3 ./tests/ci/artifactory.py --export-debian --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
- name: Test Debian Packages
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
run: |
|
||||
python3 ./tests/ci/artifactory.py --test-debian --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
- name: Docker clickhouse/clickhouse-server building
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
run: |
|
||||
cd "./tests/ci"
|
||||
export CHECK_NAME="Docker server image"
|
||||
python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }}
|
||||
- name: Docker clickhouse/clickhouse-keeper building
|
||||
if: ${{ inputs.type == 'patch' }}
|
||||
run: |
|
||||
cd "./tests/ci"
|
||||
export CHECK_NAME="Docker keeper image"
|
||||
python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }}
|
||||
- name: Post Slack Message
|
||||
if: always()
|
||||
run: |
|
||||
echo Slack Message
|
||||
|
2
.github/workflows/pull_request.yml
vendored
2
.github/workflows/pull_request.yml
vendored
@ -172,7 +172,7 @@ jobs:
|
||||
################################# Stage Final #################################
|
||||
#
|
||||
FinishCheck:
|
||||
if: ${{ !failure() }}
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3]
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
steps:
|
||||
|
@ -42,9 +42,19 @@ endif ()
|
||||
# But use 2 parallel jobs, since:
|
||||
# - this is what llvm does
|
||||
# - and I've verfied that lld-11 does not use all available CPU time (in peak) while linking one binary
|
||||
if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO AND PARALLEL_LINK_JOBS GREATER 2)
|
||||
message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 2.")
|
||||
set (PARALLEL_LINK_JOBS 2)
|
||||
if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO)
|
||||
if (ARCH_AARCH64)
|
||||
# aarch64 builds start to often fail with OOMs (reason not yet clear), for now let's limit the concurrency
|
||||
message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 1.")
|
||||
set (PARALLEL_LINK_JOBS 1)
|
||||
if (LINKER_NAME MATCHES "lld")
|
||||
math(EXPR LTO_JOBS ${NUMBER_OF_LOGICAL_CORES}/4)
|
||||
set (CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} -Wl,--thinlto-jobs=${LTO_JOBS}")
|
||||
endif()
|
||||
elseif (PARALLEL_LINK_JOBS GREATER 2)
|
||||
message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 2.")
|
||||
set (PARALLEL_LINK_JOBS 2)
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
message(STATUS "Building sub-tree with ${PARALLEL_COMPILE_JOBS} compile jobs and ${PARALLEL_LINK_JOBS} linker jobs (system: ${NUMBER_OF_LOGICAL_CORES} cores, ${TOTAL_PHYSICAL_MEMORY} MB RAM, 'OFF' means the native core count).")
|
||||
|
2
contrib/openssl
vendored
2
contrib/openssl
vendored
@ -1 +1 @@
|
||||
Subproject commit ee2bb8513b28bf86b35404dd17a0e29305ca9e08
|
||||
Subproject commit 66deddc1e53cda8706604a019777259372d1bd62
|
@ -27,19 +27,19 @@ def run_fuzzer(fuzzer: str):
|
||||
parser.read(path)
|
||||
|
||||
if parser.has_section("asan"):
|
||||
os.environ[
|
||||
"ASAN_OPTIONS"
|
||||
] = f"{os.environ['ASAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['asan'].items())}"
|
||||
os.environ["ASAN_OPTIONS"] = (
|
||||
f"{os.environ['ASAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['asan'].items())}"
|
||||
)
|
||||
|
||||
if parser.has_section("msan"):
|
||||
os.environ[
|
||||
"MSAN_OPTIONS"
|
||||
] = f"{os.environ['MSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['msan'].items())}"
|
||||
os.environ["MSAN_OPTIONS"] = (
|
||||
f"{os.environ['MSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['msan'].items())}"
|
||||
)
|
||||
|
||||
if parser.has_section("ubsan"):
|
||||
os.environ[
|
||||
"UBSAN_OPTIONS"
|
||||
] = f"{os.environ['UBSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['ubsan'].items())}"
|
||||
os.environ["UBSAN_OPTIONS"] = (
|
||||
f"{os.environ['UBSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['ubsan'].items())}"
|
||||
)
|
||||
|
||||
if parser.has_section("libfuzzer"):
|
||||
custom_libfuzzer_options = " ".join(
|
||||
|
@ -23,7 +23,10 @@ source /utils.lib
|
||||
/usr/share/clickhouse-test/config/install.sh
|
||||
|
||||
azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --silent --inMemoryPersistence &
|
||||
|
||||
./setup_minio.sh stateful
|
||||
./mc admin trace clickminio > /test_output/rubbish.log &
|
||||
MC_ADMIN_PID=$!
|
||||
|
||||
config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml
|
||||
|
||||
@ -254,6 +257,8 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]
|
||||
sudo clickhouse stop --pid-path /var/run/clickhouse-server2 ||:
|
||||
fi
|
||||
|
||||
# Kill minio admin client to stop collecting logs
|
||||
kill $MC_ADMIN_PID
|
||||
rg -Fa "<Fatal>" /var/log/clickhouse-server/clickhouse-server.log ||:
|
||||
|
||||
zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst ||:
|
||||
|
@ -86,6 +86,7 @@ RUN curl -L --no-verbose -O 'https://archive.apache.org/dist/hadoop/common/hadoo
|
||||
ENV MINIO_ROOT_USER="clickhouse"
|
||||
ENV MINIO_ROOT_PASSWORD="clickhouse"
|
||||
ENV EXPORT_S3_STORAGE_POLICIES=1
|
||||
ENV CLICKHOUSE_GRPC_CLIENT="/usr/share/clickhouse-utils/grpc-client/clickhouse-grpc-client.py"
|
||||
|
||||
RUN npm install -g azurite@3.30.0 \
|
||||
&& npm install -g tslib && npm install -g node
|
||||
|
@ -8,6 +8,7 @@ cryptography==3.4.8
|
||||
dbus-python==1.2.18
|
||||
distro==1.7.0
|
||||
docutils==0.17.1
|
||||
grpcio==1.47.0
|
||||
gyp==0.1
|
||||
httplib2==0.20.2
|
||||
idna==3.3
|
||||
@ -28,6 +29,7 @@ packaging==24.1
|
||||
pandas==1.5.3
|
||||
pip==24.1.1
|
||||
pipdeptree==2.23.0
|
||||
protobuf==4.25.3
|
||||
pyarrow==15.0.0
|
||||
pyasn1==0.4.8
|
||||
PyJWT==2.3.0
|
||||
|
@ -6,8 +6,8 @@ source /setup_export_logs.sh
|
||||
# fail on errors, verbose and export all env variables
|
||||
set -e -x -a
|
||||
|
||||
MAX_RUN_TIME=${MAX_RUN_TIME:-10800}
|
||||
MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 10800 : MAX_RUN_TIME))
|
||||
MAX_RUN_TIME=${MAX_RUN_TIME:-7200}
|
||||
MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 7200 : MAX_RUN_TIME))
|
||||
|
||||
USE_DATABASE_REPLICATED=${USE_DATABASE_REPLICATED:=0}
|
||||
USE_SHARED_CATALOG=${USE_SHARED_CATALOG:=0}
|
||||
@ -54,6 +54,9 @@ source /utils.lib
|
||||
/usr/share/clickhouse-test/config/install.sh
|
||||
|
||||
./setup_minio.sh stateless
|
||||
m./c admin trace clickminio > /test_output/rubbish.log &
|
||||
MC_ADMIN_PID=$!
|
||||
|
||||
./setup_hdfs_minicluster.sh
|
||||
|
||||
config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml
|
||||
@ -320,7 +323,7 @@ export -f run_tests
|
||||
|
||||
|
||||
# This should be enough to setup job and collect artifacts
|
||||
TIMEOUT=$((MAX_RUN_TIME - 600))
|
||||
TIMEOUT=$((MAX_RUN_TIME - 700))
|
||||
if [ "$NUM_TRIES" -gt "1" ]; then
|
||||
# We don't run tests with Ordinary database in PRs, only in master.
|
||||
# So run new/changed tests with Ordinary at least once in flaky check.
|
||||
@ -383,6 +386,9 @@ if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then
|
||||
sudo clickhouse stop --pid-path /var/run/clickhouse-server1 ||:
|
||||
fi
|
||||
|
||||
# Kill minio admin client to stop collecting logs
|
||||
kill $MC_ADMIN_PID
|
||||
|
||||
rg -Fa "<Fatal>" /var/log/clickhouse-server/clickhouse-server.log ||:
|
||||
rg -A50 -Fa "============" /var/log/clickhouse-server/stderr.log ||:
|
||||
zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst &
|
||||
|
@ -3,7 +3,7 @@ aiosignal==1.3.1
|
||||
astroid==3.1.0
|
||||
async-timeout==4.0.3
|
||||
attrs==23.2.0
|
||||
black==23.12.0
|
||||
black==24.4.2
|
||||
boto3==1.34.131
|
||||
botocore==1.34.131
|
||||
certifi==2024.6.2
|
||||
|
@ -11,6 +11,7 @@ TIMEOUT_SIGN = "[ Timeout! "
|
||||
UNKNOWN_SIGN = "[ UNKNOWN "
|
||||
SKIPPED_SIGN = "[ SKIPPED "
|
||||
HUNG_SIGN = "Found hung queries in processlist"
|
||||
SERVER_DIED_SIGN = "Server died, terminating all processes"
|
||||
DATABASE_SIGN = "Database: "
|
||||
|
||||
SUCCESS_FINISH_SIGNS = ["All tests have finished", "No tests were run"]
|
||||
@ -25,6 +26,7 @@ def process_test_log(log_path, broken_tests):
|
||||
failed = 0
|
||||
success = 0
|
||||
hung = False
|
||||
server_died = False
|
||||
retries = False
|
||||
success_finish = False
|
||||
test_results = []
|
||||
@ -41,6 +43,8 @@ def process_test_log(log_path, broken_tests):
|
||||
if HUNG_SIGN in line:
|
||||
hung = True
|
||||
break
|
||||
if SERVER_DIED_SIGN in line:
|
||||
server_died = True
|
||||
if RETRIES_SIGN in line:
|
||||
retries = True
|
||||
if any(
|
||||
@ -123,6 +127,7 @@ def process_test_log(log_path, broken_tests):
|
||||
failed,
|
||||
success,
|
||||
hung,
|
||||
server_died,
|
||||
success_finish,
|
||||
retries,
|
||||
test_results,
|
||||
@ -150,6 +155,7 @@ def process_result(result_path, broken_tests):
|
||||
failed,
|
||||
success,
|
||||
hung,
|
||||
server_died,
|
||||
success_finish,
|
||||
retries,
|
||||
test_results,
|
||||
@ -165,6 +171,10 @@ def process_result(result_path, broken_tests):
|
||||
description = "Some queries hung, "
|
||||
state = "failure"
|
||||
test_results.append(("Some queries hung", "FAIL", "0", ""))
|
||||
elif server_died:
|
||||
description = "Server died, "
|
||||
state = "failure"
|
||||
test_results.append(("Server died", "FAIL", "0", ""))
|
||||
elif not success_finish:
|
||||
description = "Tests are not finished, "
|
||||
state = "failure"
|
||||
@ -218,5 +228,20 @@ if __name__ == "__main__":
|
||||
state, description, test_results = process_result(args.in_results_dir, broken_tests)
|
||||
logging.info("Result parsed")
|
||||
status = (state, description)
|
||||
|
||||
def test_result_comparator(item):
|
||||
# sort by status then by check name
|
||||
order = {
|
||||
"FAIL": 0,
|
||||
"Timeout": 1,
|
||||
"NOT_FAILED": 2,
|
||||
"BROKEN": 3,
|
||||
"OK": 4,
|
||||
"SKIPPED": 5,
|
||||
}
|
||||
return order.get(item[1], 10), str(item[0]), item[1]
|
||||
|
||||
test_results.sort(key=test_result_comparator)
|
||||
|
||||
write_results(args.out_results_file, args.out_status_file, test_results, status)
|
||||
logging.info("Result written")
|
||||
|
@ -226,15 +226,59 @@ Other IDEs you can use are [Sublime Text](https://www.sublimetext.com/), [Visual
|
||||
|
||||
## Writing Code {#writing-code}
|
||||
|
||||
The description of ClickHouse architecture can be found here: https://clickhouse.com/docs/en/development/architecture/
|
||||
Below you can find some quick links which may be useful when writing code for ClickHouse:
|
||||
|
||||
The Code Style Guide: https://clickhouse.com/docs/en/development/style/
|
||||
- [ClickHouse architecture description](https://clickhouse.com/docs/en/development/architecture/).
|
||||
- [The code style guide](https://clickhouse.com/docs/en/development/style/).
|
||||
- [Adding third-party libraries](https://clickhouse.com/docs/en/development/contrib/#adding-third-party-libraries)
|
||||
- [Writing tests](https://clickhouse.com/docs/en/development/tests/)
|
||||
- [List of open issues](https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aopen+is%3Aissue+label%3Ahacktoberfest)
|
||||
|
||||
Adding third-party libraries: https://clickhouse.com/docs/en/development/contrib/#adding-third-party-libraries
|
||||
## Writing Documentation {#writing-documentation}
|
||||
|
||||
Writing tests: https://clickhouse.com/docs/en/development/tests/
|
||||
As part of every pull request which adds a new feature, it is necessary to write documentation for it. If you'd like to preview your documentation changes the instructions for how to build the documentation page locally are available in the README.md file [here](https://github.com/ClickHouse/clickhouse-docs). When adding a new function to ClickHouse you can use the template below as a guide:
|
||||
|
||||
List of tasks: https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aopen+is%3Aissue+label%3Ahacktoberfest
|
||||
```markdown
|
||||
# newFunctionName
|
||||
|
||||
A short description of the function goes here. It should describe briefly what it does and a typical usage case.
|
||||
|
||||
**Syntax**
|
||||
|
||||
\```sql
|
||||
newFunctionName(arg1, arg2[, arg3])
|
||||
\```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `arg1` — Description of the argument. [DataType](../data-types/float.md)
|
||||
- `arg2` — Description of the argument. [DataType](../data-types/float.md)
|
||||
- `arg3` — Description of optional argument (optional). [DataType](../data-types/float.md)
|
||||
|
||||
**Implementation Details**
|
||||
|
||||
A description of implementation details if relevant.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns {insert what the function returns here}. [DataType](../data-types/float.md)
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
\```sql
|
||||
SELECT 'write your example query here';
|
||||
\```
|
||||
|
||||
Response:
|
||||
|
||||
\```response
|
||||
┌───────────────────────────────────┐
|
||||
│ the result of the query │
|
||||
└───────────────────────────────────┘
|
||||
\```
|
||||
```
|
||||
|
||||
## Test Data {#test-data}
|
||||
|
||||
|
@ -15,7 +15,7 @@ You have four options for getting up and running with ClickHouse:
|
||||
|
||||
- **[ClickHouse Cloud](https://clickhouse.com/cloud/):** The official ClickHouse as a service, - built by, maintained and supported by the creators of ClickHouse
|
||||
- **[Quick Install](#quick-install):** an easy-to-download binary for testing and developing with ClickHouse
|
||||
- **[Production Deployments](#available-installation-options):** ClickHouse can run on any Linux, FreeBSD, or macOS with x86-64, ARM, or PowerPC64LE CPU architecture
|
||||
- **[Production Deployments](#available-installation-options):** ClickHouse can run on any Linux, FreeBSD, or macOS with x86-64, modern ARM (ARMv8.2-A up), or PowerPC64LE CPU architecture
|
||||
- **[Docker Image](https://hub.docker.com/r/clickhouse/clickhouse-server/):** use the official Docker image in Docker Hub
|
||||
|
||||
## ClickHouse Cloud
|
||||
|
@ -1030,7 +1030,7 @@ A table with no primary key represents the extreme case of a single equivalence
|
||||
|
||||
The fewer and the larger the equivalence classes are, the higher the degree of freedom when re-shuffling rows.
|
||||
|
||||
The heuristics applied to find the best row order within each equivalence class is suggested by D. Lemir, O. Kaser in [Reordering columns for smaller indexes](https://doi.org/10.1016/j.ins.2011.02.002) and based on sorting the rows within each equivalence class by ascending cardinality of the non-primary key columns.
|
||||
The heuristics applied to find the best row order within each equivalence class is suggested by D. Lemire, O. Kaser in [Reordering columns for smaller indexes](https://doi.org/10.1016/j.ins.2011.02.002) and based on sorting the rows within each equivalence class by ascending cardinality of the non-primary key columns.
|
||||
It performs three steps:
|
||||
1. Find all equivalence classes based on the row values in primary key columns.
|
||||
2. For each equivalence class, calculate (usually estimate) the cardinalities of the non-primary-key columns.
|
||||
|
@ -16,7 +16,7 @@ sidebar_label: clickhouse-local
|
||||
|
||||
While `clickhouse-local` is a great tool for development and testing purposes, and for processing files, it is not suitable for serving end users or applications. In these scenarios, it is recommended to use the open-source [ClickHouse](https://clickhouse.com/docs/en/install). ClickHouse is a powerful OLAP database that is designed to handle large-scale analytical workloads. It provides fast and efficient processing of complex queries on large datasets, making it ideal for use in production environments where high-performance is critical. Additionally, ClickHouse offers a wide range of features such as replication, sharding, and high availability, which are essential for scaling up to handle large datasets and serving applications. If you need to handle larger datasets or serve end users or applications, we recommend using open-source ClickHouse instead of `clickhouse-local`.
|
||||
|
||||
Please read the docs below that show example use cases for `clickhouse-local`, such as [querying local CSVs](#query-data-in-a-csv-file-using-sql) or [reading a parquet file in S3](#query-data-in-a-parquet-file-in-aws-s3).
|
||||
Please read the docs below that show example use cases for `clickhouse-local`, such as [querying local file](#query_data_in_file) or [reading a parquet file in S3](#query-data-in-a-parquet-file-in-aws-s3).
|
||||
|
||||
## Download clickhouse-local
|
||||
|
||||
|
@ -18,7 +18,7 @@ ClickHouse also supports:
|
||||
|
||||
During aggregation, all `NULL` arguments are skipped. If the aggregation has several arguments it will ignore any row in which one or more of them are NULL.
|
||||
|
||||
There is an exception to this rule, which are the functions [`first_value`](../../sql-reference/aggregate-functions/reference/first_value.md), [`last_value`](../../sql-reference/aggregate-functions/reference/last_value.md) and their aliases when followed by the modifier `RESPECT NULLS`: `FIRST_VALUE(b) RESPECT NULLS`.
|
||||
There is an exception to this rule, which are the functions [`first_value`](../../sql-reference/aggregate-functions/reference/first_value.md), [`last_value`](../../sql-reference/aggregate-functions/reference/last_value.md) and their aliases (`any` and `anyLast` respectively) when followed by the modifier `RESPECT NULLS`. For example, `FIRST_VALUE(b) RESPECT NULLS`.
|
||||
|
||||
**Examples:**
|
||||
|
||||
|
@ -5,12 +5,12 @@ sidebar_position: 102
|
||||
|
||||
# any
|
||||
|
||||
Selects the first encountered value of a column.
|
||||
Selects the first encountered value of a column, ignoring any `NULL` values.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
any(column)
|
||||
any(column) [RESPECT NULLS]
|
||||
```
|
||||
|
||||
Aliases: `any_value`, [`first_value`](../reference/first_value.md).
|
||||
@ -20,7 +20,9 @@ Aliases: `any_value`, [`first_value`](../reference/first_value.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
By default, it ignores NULL values and returns the first NOT NULL value found in the column. Like [`first_value`](../../../sql-reference/aggregate-functions/reference/first_value.md) it supports `RESPECT NULLS`, in which case it will select the first value passed, independently on whether it's NULL or not.
|
||||
:::note
|
||||
Supports the `RESPECT NULLS` modifier after the function name. Using this modifier will ensure the function selects the first value passed, regardless of whether it is `NULL` or not.
|
||||
:::
|
||||
|
||||
:::note
|
||||
The return type of the function is the same as the input, except for LowCardinality which is discarded. This means that given no rows as input it will return the default value of that type (0 for integers, or Null for a Nullable() column). You might use the `-OrNull` [combinator](../../../sql-reference/aggregate-functions/combinators.md) ) to modify this behaviour.
|
||||
|
@ -1,44 +0,0 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/any_respect_nulls
|
||||
sidebar_position: 103
|
||||
---
|
||||
|
||||
# any_respect_nulls
|
||||
|
||||
Selects the first encountered value of a column, irregardless of whether it is a `NULL` value or not.
|
||||
|
||||
Alias: `any_value_respect_nulls`, `first_value_repect_nulls`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
any_respect_nulls(column)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
- `column`: The column name.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- The last value encountered, irregardless of whether it is a `NULL` value or not.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE any_nulls (city Nullable(String)) ENGINE=Log;
|
||||
|
||||
INSERT INTO any_nulls (city) VALUES (NULL), ('Amsterdam'), ('New York'), ('Tokyo'), ('Valencia'), (NULL);
|
||||
|
||||
SELECT any(city), any_respect_nulls(city) FROM any_nulls;
|
||||
```
|
||||
|
||||
```response
|
||||
┌─any(city)─┬─any_respect_nulls(city)─┐
|
||||
│ Amsterdam │ ᴺᵁᴸᴸ │
|
||||
└───────────┴─────────────────────────┘
|
||||
```
|
||||
|
||||
**See Also**
|
||||
- [any](../reference/any.md)
|
@ -5,17 +5,21 @@ sidebar_position: 105
|
||||
|
||||
# anyLast
|
||||
|
||||
Selects the last value encountered. The result is just as indeterminate as for the [any](../../../sql-reference/aggregate-functions/reference/any.md) function.
|
||||
Selects the last value encountered, ignoring any `NULL` values by default. The result is just as indeterminate as for the [any](../../../sql-reference/aggregate-functions/reference/any.md) function.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
anyLast(column)
|
||||
anyLast(column) [RESPECT NULLS]
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
- `column`: The column name.
|
||||
|
||||
:::note
|
||||
Supports the `RESPECT NULLS` modifier after the function name. Using this modifier will ensure the function selects the first value passed, regardless of whether it is `NULL` or not.
|
||||
:::
|
||||
|
||||
**Returned value**
|
||||
|
||||
- The last value encountered.
|
||||
|
@ -1,39 +0,0 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/anylast_respect_nulls
|
||||
sidebar_position: 106
|
||||
---
|
||||
|
||||
# anyLast_respect_nulls
|
||||
|
||||
Selects the last value encountered, irregardless of whether it is `NULL` or not.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
anyLast_respect_nulls(column)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
- `column`: The column name.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- The last value encountered, irregardless of whether it is `NULL` or not.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE any_last_nulls (city Nullable(String)) ENGINE=Log;
|
||||
|
||||
INSERT INTO any_last_nulls (city) VALUES ('Amsterdam'),(NULL),('New York'),('Tokyo'),('Valencia'),(NULL);
|
||||
|
||||
SELECT anyLast(city), anyLast_respect_nulls(city) FROM any_last_nulls;
|
||||
```
|
||||
|
||||
```response
|
||||
┌─anyLast(city)─┬─anyLast_respect_nulls(city)─┐
|
||||
│ Valencia │ ᴺᵁᴸᴸ │
|
||||
└───────────────┴─────────────────────────────┘
|
||||
```
|
@ -45,10 +45,9 @@ ClickHouse-specific aggregate functions:
|
||||
|
||||
- [aggThrow](../reference/aggthrow.md)
|
||||
- [analysisOfVariance](../reference/analysis_of_variance.md)
|
||||
- [any](../reference/any_respect_nulls.md)
|
||||
- [any](../reference/any.md)
|
||||
- [anyHeavy](../reference/anyheavy.md)
|
||||
- [anyLast](../reference/anylast.md)
|
||||
- [anyLast](../reference/anylast_respect_nulls.md)
|
||||
- [boundingRatio](../reference/boundrat.md)
|
||||
- [first_value](../reference/first_value.md)
|
||||
- [last_value](../reference/last_value.md)
|
||||
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
slug: /en/sql-reference/data-types/dynamic
|
||||
sidebar_position: 56
|
||||
sidebar_position: 62
|
||||
sidebar_label: Dynamic
|
||||
---
|
||||
|
||||
@ -494,13 +494,43 @@ SELECT count(), dynamicType(d), _part FROM test GROUP BY _part, dynamicType(d) O
|
||||
|
||||
As we can see, ClickHouse kept the most frequent types `UInt64` and `Array(UInt64)` and casted all other types to `String`.
|
||||
|
||||
## JSONExtract functions with Dynamic
|
||||
|
||||
All `JSONExtract*` functions support `Dynamic` type:
|
||||
|
||||
```sql
|
||||
SELECT JSONExtract('{"a" : [1, 2, 3]}', 'a', 'Dynamic') AS dynamic, dynamicType(dynamic) AS dynamic_type;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─dynamic─┬─dynamic_type───────────┐
|
||||
│ [1,2,3] │ Array(Nullable(Int64)) │
|
||||
└─────────┴────────────────────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT JSONExtract('{"obj" : {"a" : 42, "b" : "Hello", "c" : [1,2,3]}}', 'obj', 'Map(String, Variant(UInt32, String, Array(UInt32)))') AS map_of_dynamics, mapApply((k, v) -> (k, variantType(v)), map_of_dynamics) AS map_of_dynamic_types```
|
||||
|
||||
```text
|
||||
┌─map_of_dynamics──────────────────┬─map_of_dynamic_types────────────────────────────┐
|
||||
│ {'a':42,'b':'Hello','c':[1,2,3]} │ {'a':'UInt32','b':'String','c':'Array(UInt32)'} │
|
||||
└──────────────────────────────────┴─────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT JSONExtractKeysAndValues('{"a" : 42, "b" : "Hello", "c" : [1,2,3]}', 'Variant(UInt32, String, Array(UInt32))') AS dynamics, arrayMap(x -> (x.1, variantType(x.2)), dynamics) AS dynamic_types```
|
||||
```
|
||||
|
||||
```text
|
||||
┌─dynamics───────────────────────────────┬─dynamic_types─────────────────────────────────────────┐
|
||||
│ [('a',42),('b','Hello'),('c',[1,2,3])] │ [('a','UInt32'),('b','String'),('c','Array(UInt32)')] │
|
||||
└────────────────────────────────────────┴───────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Binary output format
|
||||
|
||||
In [RowBinary](../../interfaces/formats.md#rowbinary-rowbinary) format values of `Dynamic` type are serialized in the following format:
|
||||
In RowBinary format values of `Dynamic` type are serialized in the following format:
|
||||
|
||||
```text
|
||||
<binary_encoded_data_type><value_in_binary_format_according_to_the_data_type>
|
||||
```
|
||||
|
||||
See the [data types binary encoding specification](../../sql-reference/data-types/data-types-binary-encoding.md)
|
||||
|
@ -3080,4 +3080,4 @@ Result:
|
||||
|
||||
## Distance functions
|
||||
|
||||
All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md).
|
||||
All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md).
|
@ -2698,6 +2698,204 @@ Like function `YYYYMMDDhhmmssToDate()` but produces a [DateTime64](../data-types
|
||||
|
||||
Accepts an additional, optional `precision` parameter after the `timezone` parameter.
|
||||
|
||||
## changeYear
|
||||
|
||||
Changes the year component of a date or date time.
|
||||
|
||||
**Syntax**
|
||||
``` sql
|
||||
|
||||
changeYear(date_or_datetime, value)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
|
||||
- `value` - a new value of the year. [Integer](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- The same type as `date_or_datetime`.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT changeYear(toDate('1999-01-01'), 2000), changeYear(toDateTime64('1999-01-01 00:00:00.000', 3), 2000);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```
|
||||
┌─changeYear(toDate('1999-01-01'), 2000)─┬─changeYear(toDateTime64('1999-01-01 00:00:00.000', 3), 2000)─┐
|
||||
│ 2000-01-01 │ 2000-01-01 00:00:00.000 │
|
||||
└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## changeMonth
|
||||
|
||||
Changes the month component of a date or date time.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
changeMonth(date_or_datetime, value)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
|
||||
- `value` - a new value of the month. [Integer](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns a value of same type as `date_or_datetime`.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT changeMonth(toDate('1999-01-01'), 2), changeMonth(toDateTime64('1999-01-01 00:00:00.000', 3), 2);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```
|
||||
┌─changeMonth(toDate('1999-01-01'), 2)─┬─changeMonth(toDateTime64('1999-01-01 00:00:00.000', 3), 2)─┐
|
||||
│ 1999-02-01 │ 1999-02-01 00:00:00.000 │
|
||||
└──────────────────────────────────────┴────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## changeDay
|
||||
|
||||
Changes the day component of a date or date time.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
changeDay(date_or_datetime, value)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
|
||||
- `value` - a new value of the day. [Integer](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns a value of same type as `date_or_datetime`.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT changeDay(toDate('1999-01-01'), 5), changeDay(toDateTime64('1999-01-01 00:00:00.000', 3), 5);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```
|
||||
┌─changeDay(toDate('1999-01-01'), 5)─┬─changeDay(toDateTime64('1999-01-01 00:00:00.000', 3), 5)─┐
|
||||
│ 1999-01-05 │ 1999-01-05 00:00:00.000 │
|
||||
└────────────────────────────────────┴──────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## changeHour
|
||||
|
||||
Changes the hour component of a date or date time.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
changeHour(date_or_datetime, value)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
|
||||
- `value` - a new value of the hour. [Integer](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns a value of same type as `date_or_datetime`. If the input is a [Date](../data-types/date.md), return [DateTime](../data-types/datetime.md). If the input is a [Date32](../data-types/date32.md), return [DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT changeHour(toDate('1999-01-01'), 14), changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 14);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```
|
||||
┌─changeHour(toDate('1999-01-01'), 14)─┬─changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 14)─┐
|
||||
│ 1999-01-01 14:00:00 │ 1999-01-01 14:00:00.000 │
|
||||
└──────────────────────────────────────┴────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## changeMinute
|
||||
|
||||
Changes the minute component of a date or date time.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
changeMinute(date_or_datetime, value)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
|
||||
- `value` - a new value of the minute. [Integer](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns a value of same type as `date_or_datetime`. If the input is a [Date](../data-types/date.md), return [DateTime](../data-types/datetime.md). If the input is a [Date32](../data-types/date32.md), return [DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT changeMinute(toDate('1999-01-01'), 15), changeMinute(toDateTime64('1999-01-01 00:00:00.000', 3), 15);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```
|
||||
┌─changeMinute(toDate('1999-01-01'), 15)─┬─changeMinute(toDateTime64('1999-01-01 00:00:00.000', 3), 15)─┐
|
||||
│ 1999-01-01 00:15:00 │ 1999-01-01 00:15:00.000 │
|
||||
└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## changeSecond
|
||||
|
||||
Changes the second component of a date or date time.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
changeSecond(date_or_datetime, value)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
|
||||
- `value` - a new value of the second. [Integer](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns a value of same type as `date_or_datetime`. If the input is a [Date](../data-types/date.md), return [DateTime](../data-types/datetime.md). If the input is a [Date32](../data-types/date32.md), return [DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT changeSecond(toDate('1999-01-01'), 15), changeSecond(toDateTime64('1999-01-01 00:00:00.000', 3), 15);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```
|
||||
┌─changeSecond(toDate('1999-01-01'), 15)─┬─changeSecond(toDateTime64('1999-01-01 00:00:00.000', 3), 15)─┐
|
||||
│ 1999-01-01 00:00:15 │ 1999-01-01 00:00:15.000 │
|
||||
└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## addYears
|
||||
|
||||
Adds a specified number of years to a date, a date with time or a string-encoded date / date with time.
|
||||
@ -2714,6 +2912,7 @@ addYears(date, num)
|
||||
- `num`: Number of years to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns `date` plus `num` years. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
@ -2751,6 +2950,7 @@ addQuarters(date, num)
|
||||
- `num`: Number of quarters to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns `date` plus `num` quarters. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
@ -2788,6 +2988,7 @@ addMonths(date, num)
|
||||
- `num`: Number of months to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns `date` plus `num` months. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
@ -2825,6 +3026,7 @@ addWeeks(date, num)
|
||||
- `num`: Number of weeks to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns `date` plus `num` weeks. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
@ -2862,6 +3064,7 @@ addDays(date, num)
|
||||
- `num`: Number of days to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns `date` plus `num` days. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
@ -2899,6 +3102,7 @@ addHours(date, num)
|
||||
- `num`: Number of hours to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
|
||||
|
||||
**Returned value**
|
||||
o
|
||||
- Returns `date` plus `num` hours. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
@ -2936,6 +3140,7 @@ addMinutes(date, num)
|
||||
- `num`: Number of minutes to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns `date` plus `num` minutes. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
@ -2973,6 +3178,7 @@ addSeconds(date, num)
|
||||
- `num`: Number of seconds to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns `date` plus `num` seconds. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
@ -3010,6 +3216,7 @@ addMilliseconds(date_time, num)
|
||||
- `num`: Number of milliseconds to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns `date_time` plus `num` milliseconds. [DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
@ -3045,6 +3252,7 @@ addMicroseconds(date_time, num)
|
||||
- `num`: Number of microseconds to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns `date_time` plus `num` microseconds. [DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
@ -3080,6 +3288,7 @@ addNanoseconds(date_time, num)
|
||||
- `num`: Number of nanoseconds to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns `date_time` plus `num` nanoseconds. [DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
@ -3115,6 +3324,7 @@ addInterval(interval_1, interval_2)
|
||||
- `interval_2`: Second interval to be added. [interval](../data-types/special-data-types/interval.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns a tuple of intervals. [tuple](../data-types/tuple.md)([interval](../data-types/special-data-types/interval.md)).
|
||||
|
||||
:::note
|
||||
@ -3161,6 +3371,7 @@ addTupleOfIntervals(interval_1, interval_2)
|
||||
- `intervals`: Tuple of intervals to add to `date`. [tuple](../data-types/tuple.md)([interval](../data-types/special-data-types/interval.md)).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns `date` with added `intervals`. [date](../data-types/date.md)/[date32](../data-types/date32.md)/[datetime](../data-types/datetime.md)/[datetime64](../data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
@ -3195,6 +3406,7 @@ subtractYears(date, num)
|
||||
- `num`: Number of years to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns `date` minus `num` years. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
@ -3232,6 +3444,7 @@ subtractQuarters(date, num)
|
||||
- `num`: Number of quarters to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns `date` minus `num` quarters. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
@ -3269,6 +3482,7 @@ subtractMonths(date, num)
|
||||
- `num`: Number of months to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns `date` minus `num` months. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
@ -3306,6 +3520,7 @@ subtractWeeks(date, num)
|
||||
- `num`: Number of weeks to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns `date` minus `num` weeks. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
@ -3343,6 +3558,7 @@ subtractDays(date, num)
|
||||
- `num`: Number of days to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns `date` minus `num` days. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
@ -3380,6 +3596,7 @@ subtractHours(date, num)
|
||||
- `num`: Number of hours to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns `date` minus `num` hours. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[Datetime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
@ -3417,6 +3634,7 @@ subtractMinutes(date, num)
|
||||
- `num`: Number of minutes to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns `date` minus `num` minutes. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
@ -3454,6 +3672,7 @@ subtractSeconds(date, num)
|
||||
- `num`: Number of seconds to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns `date` minus `num` seconds. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
@ -3491,6 +3710,7 @@ subtractMilliseconds(date_time, num)
|
||||
- `num`: Number of milliseconds to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns `date_time` minus `num` milliseconds. [DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
@ -3526,6 +3746,7 @@ subtractMicroseconds(date_time, num)
|
||||
- `num`: Number of microseconds to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns `date_time` minus `num` microseconds. [DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
@ -3561,6 +3782,7 @@ subtractNanoseconds(date_time, num)
|
||||
- `num`: Number of nanoseconds to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns `date_time` minus `num` nanoseconds. [DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
@ -3596,6 +3818,7 @@ subtractInterval(interval_1, interval_2)
|
||||
- `interval_2`: Second interval to be negated. [interval](../data-types/special-data-types/interval.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns a tuple of intervals. [tuple](../data-types/tuple.md)([interval](../data-types/special-data-types/interval.md)).
|
||||
|
||||
:::note
|
||||
@ -3642,6 +3865,7 @@ subtractTupleOfIntervals(interval_1, interval_2)
|
||||
- `intervals`: Tuple of intervals to subtract from `date`. [tuple](../data-types/tuple.md)([interval](../data-types/special-data-types/interval.md)).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns `date` with subtracted `intervals`. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
|
@ -314,10 +314,71 @@ SELECT groupBitXor(cityHash64(*)) FROM table
|
||||
Calculates a 32-bit hash code from any type of integer.
|
||||
This is a relatively fast non-cryptographic hash function of average quality for numbers.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
intHash32(int)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `int` — Integer to hash. [(U)Int*](../data-types/int-uint.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- 32-bit hash code. [UInt32](../data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT intHash32(42);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─intHash32(42)─┐
|
||||
│ 1228623923 │
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
## intHash64
|
||||
|
||||
Calculates a 64-bit hash code from any type of integer.
|
||||
It works faster than intHash32. Average quality.
|
||||
This is a relatively fast non-cryptographic hash function of average quality for numbers.
|
||||
It works faster than [intHash32](#inthash32).
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
intHash64(int)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `int` — Integer to hash. [(U)Int*](../data-types/int-uint.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- 64-bit hash code. [UInt64](../data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT intHash64(42);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌────────intHash64(42)─┐
|
||||
│ 11490350930367293593 │
|
||||
└──────────────────────┘
|
||||
```
|
||||
|
||||
## SHA1, SHA224, SHA256, SHA512, SHA512_256
|
||||
|
||||
|
73
docs/en/sql-reference/window-functions/dense_rank.md
Normal file
73
docs/en/sql-reference/window-functions/dense_rank.md
Normal file
@ -0,0 +1,73 @@
|
||||
---
|
||||
slug: /en/sql-reference/window-functions/dense_rank
|
||||
sidebar_label: dense_rank
|
||||
sidebar_position: 7
|
||||
---
|
||||
|
||||
# dense_rank
|
||||
|
||||
Ranks the current row within its partition without gaps. In other words, if the value of any new row encountered is equal to the value of one of the previous rows then it will receive the next successive rank without any gaps in ranking.
|
||||
|
||||
The [rank](./rank.md) function provides the same behaviour, but with gaps in ranking.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
dense_rank (column_name)
|
||||
OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column]
|
||||
[ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name])
|
||||
FROM table_name
|
||||
WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column])
|
||||
```
|
||||
|
||||
For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A number for the current row within its partition, without gaps in ranking. [UInt64](../data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
The following example is based on the example provided in the video instructional [Ranking window functions in ClickHouse](https://youtu.be/Yku9mmBYm_4?si=XIMu1jpYucCQEoXA).
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE salaries
|
||||
(
|
||||
`team` String,
|
||||
`player` String,
|
||||
`salary` UInt32,
|
||||
`position` String
|
||||
)
|
||||
Engine = Memory;
|
||||
|
||||
INSERT INTO salaries FORMAT Values
|
||||
('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'),
|
||||
('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'),
|
||||
('Port Elizabeth Barbarians', 'Michael Stanley', 150000, 'D'),
|
||||
('New Coreystad Archdukes', 'Scott Harrison', 150000, 'D'),
|
||||
('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'),
|
||||
('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'),
|
||||
('South Hampton Seagulls', 'James Henderson', 140000, 'M');
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT player, salary,
|
||||
dense_rank() OVER (ORDER BY salary DESC) AS dense_rank
|
||||
FROM salaries;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─player──────────┬─salary─┬─dense_rank─┐
|
||||
1. │ Gary Chen │ 195000 │ 1 │
|
||||
2. │ Robert George │ 195000 │ 1 │
|
||||
3. │ Charles Juarez │ 190000 │ 2 │
|
||||
4. │ Michael Stanley │ 150000 │ 3 │
|
||||
5. │ Douglas Benson │ 150000 │ 3 │
|
||||
6. │ Scott Harrison │ 150000 │ 3 │
|
||||
7. │ James Henderson │ 140000 │ 4 │
|
||||
└─────────────────┴────────┴────────────┘
|
||||
```
|
79
docs/en/sql-reference/window-functions/first_value.md
Normal file
79
docs/en/sql-reference/window-functions/first_value.md
Normal file
@ -0,0 +1,79 @@
|
||||
---
|
||||
slug: /en/sql-reference/window-functions/first_value
|
||||
sidebar_label: first_value
|
||||
sidebar_position: 3
|
||||
---
|
||||
|
||||
# first_value
|
||||
|
||||
Returns the first value evaluated within its ordered frame. By default, NULL arguments are skipped, however the `RESPECT NULLS` modifier can be used to override this behaviour.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
first_value (column_name) [[RESPECT NULLS] | [IGNORE NULLS]]
|
||||
OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column]
|
||||
[ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name])
|
||||
FROM table_name
|
||||
WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column])
|
||||
```
|
||||
|
||||
Alias: `any`.
|
||||
|
||||
:::note
|
||||
Using the optional modifier `RESPECT NULLS` after `first_value(column_name)` will ensure that `NULL` arguments are not skipped.
|
||||
See [NULL processing](../aggregate-functions/index.md/#null-processing) for more information.
|
||||
:::
|
||||
|
||||
For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- The first value evaluated within its ordered frame.
|
||||
|
||||
**Example**
|
||||
|
||||
In this example the `first_value` function is used to find the highest paid footballer from a fictional dataset of salaries of Premier League football players.
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS salaries;
|
||||
CREATE TABLE salaries
|
||||
(
|
||||
`team` String,
|
||||
`player` String,
|
||||
`salary` UInt32,
|
||||
`position` String
|
||||
)
|
||||
Engine = Memory;
|
||||
|
||||
INSERT INTO salaries FORMAT Values
|
||||
('Port Elizabeth Barbarians', 'Gary Chen', 196000, 'F'),
|
||||
('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'),
|
||||
('Port Elizabeth Barbarians', 'Michael Stanley', 100000, 'D'),
|
||||
('New Coreystad Archdukes', 'Scott Harrison', 180000, 'D'),
|
||||
('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'),
|
||||
('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'),
|
||||
('South Hampton Seagulls', 'James Henderson', 140000, 'M');
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT player, salary,
|
||||
first_value(player) OVER (ORDER BY salary DESC) AS highest_paid_player
|
||||
FROM salaries;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─player──────────┬─salary─┬─highest_paid_player─┐
|
||||
1. │ Gary Chen │ 196000 │ Gary Chen │
|
||||
2. │ Robert George │ 195000 │ Gary Chen │
|
||||
3. │ Charles Juarez │ 190000 │ Gary Chen │
|
||||
4. │ Scott Harrison │ 180000 │ Gary Chen │
|
||||
5. │ Douglas Benson │ 150000 │ Gary Chen │
|
||||
6. │ James Henderson │ 140000 │ Gary Chen │
|
||||
7. │ Michael Stanley │ 100000 │ Gary Chen │
|
||||
└─────────────────┴────────┴─────────────────────┘
|
||||
```
|
@ -1,10 +1,11 @@
|
||||
---
|
||||
slug: /en/sql-reference/window-functions/
|
||||
sidebar_position: 62
|
||||
sidebar_label: Window Functions
|
||||
title: Window Functions
|
||||
sidebar_position: 1
|
||||
---
|
||||
|
||||
# Window Functions
|
||||
|
||||
Windows functions let you perform calculations across a set of rows that are related to the current row.
|
||||
Some of the calculations that you can do are similar to those that can be done with an aggregate function, but a window function doesn't cause rows to be grouped into a single output - the individual rows are still returned.
|
||||
|
||||
@ -12,8 +13,8 @@ Some of the calculations that you can do are similar to those that can be done w
|
||||
|
||||
ClickHouse supports the standard grammar for defining windows and window functions. The table below indicates whether a feature is currently supported.
|
||||
|
||||
| Feature | Supported? |
|
||||
|------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| Feature | Supported? |
|
||||
|--------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| ad hoc window specification (`count(*) over (partition by id order by time desc)`) | ✅ |
|
||||
| expressions involving window functions, e.g. `(count(*) over ()) / 2)` | ✅ |
|
||||
| `WINDOW` clause (`select ... from table window w as (partition by id)`) | ✅ |
|
||||
@ -75,14 +76,14 @@ WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]
|
||||
|
||||
These functions can be used only as a window function.
|
||||
|
||||
- `row_number()` - Number the current row within its partition starting from 1.
|
||||
- `first_value(x)` - Return the first non-NULL value evaluated within its ordered frame.
|
||||
- `last_value(x)` - Return the last non-NULL value evaluated within its ordered frame.
|
||||
- `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame.
|
||||
- `rank()` - Rank the current row within its partition with gaps.
|
||||
- `dense_rank()` - Rank the current row within its partition without gaps.
|
||||
- `lagInFrame(x[, offset[, default]])` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. The offset parameter, if not specified, defaults to 1, meaning it will fetch the value from the next row. If the calculated row exceeds the boundaries of the window frame, the specified default value is returned.
|
||||
- `leadInFrame(x[, offset[, default]])` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. If offset is not provided, it defaults to 1. If the offset leads to a position outside the window frame, the specified default value is used.
|
||||
- [`row_number()`](./row_number.md) - Number the current row within its partition starting from 1.
|
||||
- [`first_value(x)`](./first_value.md) - Return the first value evaluated within its ordered frame.
|
||||
- [`last_value(x)`](./last_value.md) - Return the last value evaluated within its ordered frame.
|
||||
- [`nth_value(x, offset)`](./nth_value.md) - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame.
|
||||
- [`rank()`](./rank.md) - Rank the current row within its partition with gaps.
|
||||
- [`dense_rank()`](./dense_rank.md) - Rank the current row within its partition without gaps.
|
||||
- [`lagInFrame(x)`](./lagInFrame.md) - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame.
|
||||
- [`leadInFrame(x)`](./leadInFrame.md) - Return a value evaluated at the row that is offset rows after the current row within the ordered frame.
|
||||
|
||||
## Examples
|
||||
|
||||
|
79
docs/en/sql-reference/window-functions/lagInFrame.md
Normal file
79
docs/en/sql-reference/window-functions/lagInFrame.md
Normal file
@ -0,0 +1,79 @@
|
||||
---
|
||||
slug: /en/sql-reference/window-functions/lagInFrame
|
||||
sidebar_label: lagInFrame
|
||||
sidebar_position: 8
|
||||
---
|
||||
|
||||
# lagInFrame
|
||||
|
||||
Returns a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
lagInFrame(x[, offset[, default]])
|
||||
OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column]
|
||||
[ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name])
|
||||
FROM table_name
|
||||
WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column])
|
||||
```
|
||||
|
||||
For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax).
|
||||
|
||||
**Parameters**
|
||||
- `x` — Column name.
|
||||
- `offset` — Offset to apply. [(U)Int*](../data-types/int-uint.md). (Optional - `1` by default).
|
||||
- `default` — Value to return if calculated row exceeds the boundaries of the window frame. (Optional - `null` by default).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Value evaluated at the row that is at a specified physical offset before the current row within the ordered frame.
|
||||
|
||||
**Example**
|
||||
|
||||
This example looks at historical data for a specific stock and uses the `lagInFrame` function to calculate a day-to-day delta and percentage change in the closing price of the stock.
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE stock_prices
|
||||
(
|
||||
`date` Date,
|
||||
`open` Float32, -- opening price
|
||||
`high` Float32, -- daily high
|
||||
`low` Float32, -- daily low
|
||||
`close` Float32, -- closing price
|
||||
`volume` UInt32 -- trade volume
|
||||
)
|
||||
Engine = Memory;
|
||||
|
||||
INSERT INTO stock_prices FORMAT Values
|
||||
('2024-06-03', 113.62, 115.00, 112.00, 115.00, 438392000),
|
||||
('2024-06-04', 115.72, 116.60, 114.04, 116.44, 403324000),
|
||||
('2024-06-05', 118.37, 122.45, 117.47, 122.44, 528402000),
|
||||
('2024-06-06', 124.05, 125.59, 118.32, 121.00, 664696000),
|
||||
('2024-06-07', 119.77, 121.69, 118.02, 120.89, 412386000);
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
date,
|
||||
close,
|
||||
lagInFrame(close, 1, close) OVER (ORDER BY date ASC) AS previous_day_close,
|
||||
COALESCE(ROUND(close - previous_day_close, 2)) AS delta,
|
||||
COALESCE(ROUND((delta / previous_day_close) * 100, 2)) AS percent_change
|
||||
FROM stock_prices
|
||||
ORDER BY date DESC;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌───────date─┬──close─┬─previous_day_close─┬─delta─┬─percent_change─┐
|
||||
1. │ 2024-06-07 │ 120.89 │ 121 │ -0.11 │ -0.09 │
|
||||
2. │ 2024-06-06 │ 121 │ 122.44 │ -1.44 │ -1.18 │
|
||||
3. │ 2024-06-05 │ 122.44 │ 116.44 │ 6 │ 5.15 │
|
||||
4. │ 2024-06-04 │ 116.44 │ 115 │ 1.44 │ 1.25 │
|
||||
5. │ 2024-06-03 │ 115 │ 115 │ 0 │ 0 │
|
||||
└────────────┴────────┴────────────────────┴───────┴────────────────┘
|
||||
```
|
79
docs/en/sql-reference/window-functions/last_value.md
Normal file
79
docs/en/sql-reference/window-functions/last_value.md
Normal file
@ -0,0 +1,79 @@
|
||||
---
|
||||
slug: /en/sql-reference/window-functions/last_value
|
||||
sidebar_label: last_value
|
||||
sidebar_position: 4
|
||||
---
|
||||
|
||||
# last_value
|
||||
|
||||
Returns the last value evaluated within its ordered frame. By default, NULL arguments are skipped, however the `RESPECT NULLS` modifier can be used to override this behaviour.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
last_value (column_name) [[RESPECT NULLS] | [IGNORE NULLS]]
|
||||
OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column]
|
||||
[ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name])
|
||||
FROM table_name
|
||||
WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column])
|
||||
```
|
||||
|
||||
Alias: `anyLast`.
|
||||
|
||||
:::note
|
||||
Using the optional modifier `RESPECT NULLS` after `first_value(column_name)` will ensure that `NULL` arguments are not skipped.
|
||||
See [NULL processing](../aggregate-functions/index.md/#null-processing) for more information.
|
||||
:::
|
||||
|
||||
For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- The last value evaluated within its ordered frame.
|
||||
|
||||
**Example**
|
||||
|
||||
In this example the `last_value` function is used to find the highest paid footballer from a fictional dataset of salaries of Premier League football players.
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS salaries;
|
||||
CREATE TABLE salaries
|
||||
(
|
||||
`team` String,
|
||||
`player` String,
|
||||
`salary` UInt32,
|
||||
`position` String
|
||||
)
|
||||
Engine = Memory;
|
||||
|
||||
INSERT INTO salaries FORMAT Values
|
||||
('Port Elizabeth Barbarians', 'Gary Chen', 196000, 'F'),
|
||||
('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'),
|
||||
('Port Elizabeth Barbarians', 'Michael Stanley', 100000, 'D'),
|
||||
('New Coreystad Archdukes', 'Scott Harrison', 180000, 'D'),
|
||||
('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'),
|
||||
('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'),
|
||||
('South Hampton Seagulls', 'James Henderson', 140000, 'M');
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT player, salary,
|
||||
last_value(player) OVER (ORDER BY salary DESC RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS lowest_paid_player
|
||||
FROM salaries;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─player──────────┬─salary─┬─lowest_paid_player─┐
|
||||
1. │ Gary Chen │ 196000 │ Michael Stanley │
|
||||
2. │ Robert George │ 195000 │ Michael Stanley │
|
||||
3. │ Charles Juarez │ 190000 │ Michael Stanley │
|
||||
4. │ Scott Harrison │ 180000 │ Michael Stanley │
|
||||
5. │ Douglas Benson │ 150000 │ Michael Stanley │
|
||||
6. │ James Henderson │ 140000 │ Michael Stanley │
|
||||
7. │ Michael Stanley │ 100000 │ Michael Stanley │
|
||||
└─────────────────┴────────┴────────────────────┘
|
||||
```
|
60
docs/en/sql-reference/window-functions/leadInFrame.md
Normal file
60
docs/en/sql-reference/window-functions/leadInFrame.md
Normal file
@ -0,0 +1,60 @@
|
||||
---
|
||||
slug: /en/sql-reference/window-functions/leadInFrame
|
||||
sidebar_label: leadInFrame
|
||||
sidebar_position: 9
|
||||
---
|
||||
|
||||
# leadInFrame
|
||||
|
||||
Returns a value evaluated at the row that is offset rows after the current row within the ordered frame.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
leadInFrame(x[, offset[, default]])
|
||||
OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column]
|
||||
[ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name])
|
||||
FROM table_name
|
||||
WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column])
|
||||
```
|
||||
|
||||
For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax).
|
||||
|
||||
**Parameters**
|
||||
- `x` — Column name.
|
||||
- `offset` — Offset to apply. [(U)Int*](../data-types/int-uint.md). (Optional - `1` by default).
|
||||
- `default` — Value to return if calculated row exceeds the boundaries of the window frame. (Optional - `null` by default).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- value evaluated at the row that is offset rows after the current row within the ordered frame.
|
||||
|
||||
**Example**
|
||||
|
||||
This example looks at [historical data](https://www.kaggle.com/datasets/sazidthe1/nobel-prize-data) for Nobel Prize winners and uses the `leadInFrame` function to return a list of successive winners in the physics category.
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE OR REPLACE VIEW nobel_prize_laureates AS FROM file('nobel_laureates_data.csv') SELECT *;
|
||||
```
|
||||
|
||||
```sql
|
||||
FROM nobel_prize_laureates SELECT fullName, leadInFrame(year, 1, year) OVER (PARTITION BY category ORDER BY year) AS year, category, motivation WHERE category == 'physics' ORDER BY year DESC LIMIT 9;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─fullName─────────┬─year─┬─category─┬─motivation─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
|
||||
1. │ Pierre Agostini │ 2023 │ physics │ for experimental methods that generate attosecond pulses of light for the study of electron dynamics in matter │
|
||||
2. │ Ferenc Krausz │ 2023 │ physics │ for experimental methods that generate attosecond pulses of light for the study of electron dynamics in matter │
|
||||
3. │ Anne L Huillier │ 2023 │ physics │ for experimental methods that generate attosecond pulses of light for the study of electron dynamics in matter │
|
||||
4. │ Alain Aspect │ 2022 │ physics │ for experiments with entangled photons establishing the violation of Bell inequalities and pioneering quantum information science │
|
||||
5. │ Anton Zeilinger │ 2022 │ physics │ for experiments with entangled photons establishing the violation of Bell inequalities and pioneering quantum information science │
|
||||
6. │ John Clauser │ 2022 │ physics │ for experiments with entangled photons establishing the violation of Bell inequalities and pioneering quantum information science │
|
||||
7. │ Syukuro Manabe │ 2021 │ physics │ for the physical modelling of Earths climate quantifying variability and reliably predicting global warming │
|
||||
8. │ Klaus Hasselmann │ 2021 │ physics │ for the physical modelling of Earths climate quantifying variability and reliably predicting global warming │
|
||||
9. │ Giorgio Parisi │ 2021 │ physics │ for the discovery of the interplay of disorder and fluctuations in physical systems from atomic to planetary scales │
|
||||
└──────────────────┴──────┴──────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
75
docs/en/sql-reference/window-functions/nth_value.md
Normal file
75
docs/en/sql-reference/window-functions/nth_value.md
Normal file
@ -0,0 +1,75 @@
|
||||
---
|
||||
slug: /en/sql-reference/window-functions/nth_value
|
||||
sidebar_label: nth_value
|
||||
sidebar_position: 5
|
||||
---
|
||||
|
||||
# nth_value
|
||||
|
||||
Returns the first non-NULL value evaluated against the nth row (offset) in its ordered frame.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
nth_value (x, offset)
|
||||
OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column]
|
||||
[ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name])
|
||||
FROM table_name
|
||||
WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column])
|
||||
```
|
||||
|
||||
For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax).
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `x` — Column name.
|
||||
- `offset` — nth row to evaluate current row against.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- The first non-NULL value evaluated against the nth row (offset) in its ordered frame.
|
||||
|
||||
**Example**
|
||||
|
||||
In this example the `nth-value` function is used to find the third-highest salary from a fictional dataset of salaries of Premier League football players.
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS salaries;
|
||||
CREATE TABLE salaries
|
||||
(
|
||||
`team` String,
|
||||
`player` String,
|
||||
`salary` UInt32,
|
||||
`position` String
|
||||
)
|
||||
Engine = Memory;
|
||||
|
||||
INSERT INTO salaries FORMAT Values
|
||||
('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'),
|
||||
('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'),
|
||||
('Port Elizabeth Barbarians', 'Michael Stanley', 100000, 'D'),
|
||||
('New Coreystad Archdukes', 'Scott Harrison', 180000, 'D'),
|
||||
('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'),
|
||||
('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'),
|
||||
('South Hampton Seagulls', 'James Henderson', 140000, 'M');
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT player, salary, nth_value(player,3) OVER(ORDER BY salary DESC) AS third_highest_salary FROM salaries;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─player──────────┬─salary─┬─third_highest_salary─┐
|
||||
1. │ Gary Chen │ 195000 │ │
|
||||
2. │ Robert George │ 195000 │ │
|
||||
3. │ Charles Juarez │ 190000 │ Charles Juarez │
|
||||
4. │ Scott Harrison │ 180000 │ Charles Juarez │
|
||||
5. │ Douglas Benson │ 150000 │ Charles Juarez │
|
||||
6. │ James Henderson │ 140000 │ Charles Juarez │
|
||||
7. │ Michael Stanley │ 100000 │ Charles Juarez │
|
||||
└─────────────────┴────────┴──────────────────────┘
|
||||
```
|
74
docs/en/sql-reference/window-functions/rank.md
Normal file
74
docs/en/sql-reference/window-functions/rank.md
Normal file
@ -0,0 +1,74 @@
|
||||
---
|
||||
slug: /en/sql-reference/window-functions/rank
|
||||
sidebar_label: rank
|
||||
sidebar_position: 6
|
||||
---
|
||||
|
||||
# rank
|
||||
|
||||
Ranks the current row within its partition with gaps. In other words, if the value of any row it encounters is equal to the value of a previous row then it will receive the same rank as that previous row.
|
||||
The rank of the next row is then equal to the rank of the previous row plus a gap equal to the number of times the previous rank was given.
|
||||
|
||||
The [dense_rank](./dense_rank.md) function provides the same behaviour but without gaps in ranking.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
rank (column_name)
|
||||
OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column]
|
||||
[ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name])
|
||||
FROM table_name
|
||||
WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column])
|
||||
```
|
||||
|
||||
For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A number for the current row within its partition, including gaps. [UInt64](../data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
The following example is based on the example provided in the video instructional [Ranking window functions in ClickHouse](https://youtu.be/Yku9mmBYm_4?si=XIMu1jpYucCQEoXA).
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE salaries
|
||||
(
|
||||
`team` String,
|
||||
`player` String,
|
||||
`salary` UInt32,
|
||||
`position` String
|
||||
)
|
||||
Engine = Memory;
|
||||
|
||||
INSERT INTO salaries FORMAT Values
|
||||
('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'),
|
||||
('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'),
|
||||
('Port Elizabeth Barbarians', 'Michael Stanley', 150000, 'D'),
|
||||
('New Coreystad Archdukes', 'Scott Harrison', 150000, 'D'),
|
||||
('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'),
|
||||
('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'),
|
||||
('South Hampton Seagulls', 'James Henderson', 140000, 'M');
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT player, salary,
|
||||
rank() OVER (ORDER BY salary DESC) AS rank
|
||||
FROM salaries;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─player──────────┬─salary─┬─rank─┐
|
||||
1. │ Gary Chen │ 195000 │ 1 │
|
||||
2. │ Robert George │ 195000 │ 1 │
|
||||
3. │ Charles Juarez │ 190000 │ 3 │
|
||||
4. │ Douglas Benson │ 150000 │ 4 │
|
||||
5. │ Michael Stanley │ 150000 │ 4 │
|
||||
6. │ Scott Harrison │ 150000 │ 4 │
|
||||
7. │ James Henderson │ 140000 │ 7 │
|
||||
└─────────────────┴────────┴──────┘
|
||||
```
|
67
docs/en/sql-reference/window-functions/row_number.md
Normal file
67
docs/en/sql-reference/window-functions/row_number.md
Normal file
@ -0,0 +1,67 @@
|
||||
---
|
||||
slug: /en/sql-reference/window-functions/row_number
|
||||
sidebar_label: row_number
|
||||
sidebar_position: 2
|
||||
---
|
||||
|
||||
# row_number
|
||||
|
||||
Numbers the current row within its partition starting from 1.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
row_number (column_name)
|
||||
OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column]
|
||||
[ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name])
|
||||
FROM table_name
|
||||
WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column])
|
||||
```
|
||||
|
||||
For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A number for the current row within its partition. [UInt64](../data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
The following example is based on the example provided in the video instructional [Ranking window functions in ClickHouse](https://youtu.be/Yku9mmBYm_4?si=XIMu1jpYucCQEoXA).
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE salaries
|
||||
(
|
||||
`team` String,
|
||||
`player` String,
|
||||
`salary` UInt32,
|
||||
`position` String
|
||||
)
|
||||
Engine = Memory;
|
||||
|
||||
INSERT INTO salaries FORMAT Values
|
||||
('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'),
|
||||
('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'),
|
||||
('Port Elizabeth Barbarians', 'Michael Stanley', 150000, 'D'),
|
||||
('New Coreystad Archdukes', 'Scott Harrison', 150000, 'D'),
|
||||
('Port Elizabeth Barbarians', 'Robert George', 195000, 'M');
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT player, salary,
|
||||
row_number() OVER (ORDER BY salary DESC) AS row_number
|
||||
FROM salaries;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─player──────────┬─salary─┬─row_number─┐
|
||||
1. │ Gary Chen │ 195000 │ 1 │
|
||||
2. │ Robert George │ 195000 │ 2 │
|
||||
3. │ Charles Juarez │ 190000 │ 3 │
|
||||
4. │ Scott Harrison │ 150000 │ 4 │
|
||||
5. │ Michael Stanley │ 150000 │ 5 │
|
||||
└─────────────────┴────────┴────────────┘
|
||||
```
|
@ -376,6 +376,7 @@ void LocalServer::setupUsers()
|
||||
" </networks>"
|
||||
" <profile>default</profile>"
|
||||
" <quota>default</quota>"
|
||||
" <named_collection_control>1</named_collection_control>"
|
||||
" </default>"
|
||||
" </users>"
|
||||
" <quotas>"
|
||||
|
@ -516,6 +516,9 @@
|
||||
/// Save query in history only if it is different.
|
||||
let previous_query = '';
|
||||
|
||||
/// Start of the last query
|
||||
let last_query_start = 0;
|
||||
|
||||
const current_url = new URL(window.location);
|
||||
const opened_locally = location.protocol == 'file:';
|
||||
|
||||
@ -567,6 +570,8 @@
|
||||
'&password=' + encodeURIComponent(password)
|
||||
}
|
||||
|
||||
last_query_start = performance.now();
|
||||
|
||||
const xhr = new XMLHttpRequest;
|
||||
|
||||
xhr.open('POST', url, true);
|
||||
@ -579,7 +584,8 @@
|
||||
if (posted_request_num != request_num) {
|
||||
return;
|
||||
} else if (this.readyState === XMLHttpRequest.DONE) {
|
||||
renderResponse(this.status, this.response);
|
||||
const elapsed_msec = performance.now() - last_query_start;
|
||||
renderResponse(this.status, this.response, elapsed_msec);
|
||||
|
||||
/// The query is saved in browser history (in state JSON object)
|
||||
/// as well as in URL fragment identifier.
|
||||
@ -587,7 +593,8 @@
|
||||
const state = {
|
||||
query: query,
|
||||
status: this.status,
|
||||
response: this.response.length > 100000 ? null : this.response /// Lower than the browser's limit.
|
||||
response: this.response.length > 100000 ? null : this.response, /// Lower than the browser's limit.
|
||||
elapsed_msec: elapsed_msec,
|
||||
};
|
||||
const title = "ClickHouse Query: " + query;
|
||||
|
||||
@ -617,7 +624,7 @@
|
||||
xhr.send(query);
|
||||
}
|
||||
|
||||
function renderResponse(status, response) {
|
||||
function renderResponse(status, response, elapsed_msec) {
|
||||
document.getElementById('hourglass').style.display = 'none';
|
||||
|
||||
if (status === 200) {
|
||||
@ -632,6 +639,7 @@
|
||||
renderChart(json);
|
||||
} else {
|
||||
renderUnparsedResult(response);
|
||||
stats.innerText = `Elapsed (client-side): ${(elapsed_msec / 1000).toFixed(3)} sec.`;
|
||||
}
|
||||
document.getElementById('check-mark').style.display = 'inline';
|
||||
} else {
|
||||
@ -651,7 +659,7 @@
|
||||
clear();
|
||||
return;
|
||||
}
|
||||
renderResponse(event.state.status, event.state.response);
|
||||
renderResponse(event.state.status, event.state.response, event.state.elapsed_msec);
|
||||
};
|
||||
|
||||
if (window.location.hash) {
|
||||
|
@ -17,6 +17,8 @@ src_paths = ["src", "tests/ci", "tests/sqllogic"]
|
||||
[tool.pylint.'MESSAGES CONTROL']
|
||||
# pytest.mark.parametrize is not callable (not-callable)
|
||||
disable = '''
|
||||
pointless-string-statement,
|
||||
line-too-long,
|
||||
missing-docstring,
|
||||
too-few-public-methods,
|
||||
invalid-name,
|
||||
@ -35,10 +37,10 @@ disable = '''
|
||||
broad-except,
|
||||
bare-except,
|
||||
no-else-return,
|
||||
global-statement
|
||||
global-statement,
|
||||
f-string-without-interpolation,
|
||||
'''
|
||||
|
||||
[tool.pylint.SIMILARITIES]
|
||||
# due to SQL
|
||||
min-similarity-lines=1000
|
||||
|
||||
|
@ -38,10 +38,19 @@ namespace ErrorCodes
|
||||
extern const int CANNOT_MREMAP;
|
||||
}
|
||||
|
||||
void abortOnFailedAssertion(const String & description, void * const * trace, size_t trace_offset, size_t trace_size)
|
||||
{
|
||||
auto & logger = Poco::Logger::root();
|
||||
LOG_FATAL(&logger, "Logical error: '{}'.", description);
|
||||
if (trace)
|
||||
LOG_FATAL(&logger, "Stack trace (when copying this message, always include the lines below):\n\n{}", StackTrace::toString(trace, trace_offset, trace_size));
|
||||
abort();
|
||||
}
|
||||
|
||||
void abortOnFailedAssertion(const String & description)
|
||||
{
|
||||
LOG_FATAL(&Poco::Logger::root(), "Logical error: '{}'.", description);
|
||||
abort();
|
||||
StackTrace st;
|
||||
abortOnFailedAssertion(description, st.getFramePointers().data(), st.getOffset(), st.getSize());
|
||||
}
|
||||
|
||||
bool terminate_on_any_exception = false;
|
||||
@ -58,7 +67,7 @@ void handle_error_code(const std::string & msg, int code, bool remote, const Exc
|
||||
#ifdef ABORT_ON_LOGICAL_ERROR
|
||||
if (code == ErrorCodes::LOGICAL_ERROR)
|
||||
{
|
||||
abortOnFailedAssertion(msg);
|
||||
abortOnFailedAssertion(msg, trace.data(), 0, trace.size());
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -25,8 +25,6 @@ namespace DB
|
||||
|
||||
class AtomicLogger;
|
||||
|
||||
[[noreturn]] void abortOnFailedAssertion(const String & description);
|
||||
|
||||
/// This flag can be set for testing purposes - to check that no exceptions are thrown.
|
||||
extern bool terminate_on_any_exception;
|
||||
|
||||
@ -167,6 +165,8 @@ protected:
|
||||
mutable std::vector<StackTrace::FramePointers> capture_thread_frame_pointers;
|
||||
};
|
||||
|
||||
[[noreturn]] void abortOnFailedAssertion(const String & description, void * const * trace, size_t trace_offset, size_t trace_size);
|
||||
[[noreturn]] void abortOnFailedAssertion(const String & description);
|
||||
|
||||
std::string getExceptionStackTraceString(const std::exception & e);
|
||||
std::string getExceptionStackTraceString(std::exception_ptr e);
|
||||
|
@ -14,6 +14,7 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_ALLOCATE_MEMORY;
|
||||
|
@ -508,6 +508,7 @@ The server successfully detected this situation and will download merged part fr
|
||||
M(FileSegmentHolderCompleteMicroseconds, "File segments holder complete() time") \
|
||||
M(FileSegmentFailToIncreasePriority, "Number of times the priority was not increased due to a high contention on the cache lock") \
|
||||
M(FilesystemCacheFailToReserveSpaceBecauseOfLockContention, "Number of times space reservation was skipped due to a high contention on the cache lock") \
|
||||
M(FilesystemCacheFailToReserveSpaceBecauseOfCacheResize, "Number of times space reservation was skipped due to the cache is being resized") \
|
||||
M(FilesystemCacheHoldFileSegments, "Filesystem cache file segments count, which were hold") \
|
||||
M(FilesystemCacheUnusedHoldFileSegments, "Filesystem cache file segments count, which were hold, but not used (because of seek or LIMIT n, etc)") \
|
||||
M(FilesystemCacheFreeSpaceKeepingThreadRun, "Number of times background thread executed free space keeping job") \
|
||||
|
@ -545,7 +545,7 @@ std::string StackTrace::toString() const
|
||||
return toStringCached(frame_pointers, offset, size);
|
||||
}
|
||||
|
||||
std::string StackTrace::toString(void ** frame_pointers_raw, size_t offset, size_t size)
|
||||
std::string StackTrace::toString(void * const * frame_pointers_raw, size_t offset, size_t size)
|
||||
{
|
||||
__msan_unpoison(frame_pointers_raw, size * sizeof(*frame_pointers_raw));
|
||||
|
||||
|
@ -59,7 +59,7 @@ public:
|
||||
const FramePointers & getFramePointers() const { return frame_pointers; }
|
||||
std::string toString() const;
|
||||
|
||||
static std::string toString(void ** frame_pointers, size_t offset, size_t size);
|
||||
static std::string toString(void * const * frame_pointers, size_t offset, size_t size);
|
||||
static void dropCache();
|
||||
|
||||
/// @param fatal - if true, will process inline frames (slower)
|
||||
|
@ -996,6 +996,10 @@ void ZooKeeper::receiveEvent()
|
||||
|
||||
if (request_info.callback)
|
||||
request_info.callback(*response);
|
||||
|
||||
/// Finalize current session if we receive a hardware error from ZooKeeper
|
||||
if (err != Error::ZOK && isHardwareError(err))
|
||||
finalize(/*error_send*/ false, /*error_receive*/ true, fmt::format("Hardware error: {}", err));
|
||||
}
|
||||
|
||||
|
||||
|
@ -25,7 +25,7 @@ namespace DB
|
||||
template <typename To, typename From>
|
||||
inline To assert_cast(From && from)
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
#ifdef ABORT_ON_LOGICAL_ERROR
|
||||
try
|
||||
{
|
||||
if constexpr (std::is_pointer_v<To>)
|
||||
|
@ -228,7 +228,6 @@ Pool::Entry Pool::tryGet()
|
||||
for (auto connection_it = connections.cbegin(); connection_it != connections.cend();)
|
||||
{
|
||||
Connection * connection_ptr = *connection_it;
|
||||
/// Fixme: There is a race condition here b/c we do not synchronize with Pool::Entry's copy-assignment operator
|
||||
if (connection_ptr->ref_count == 0)
|
||||
{
|
||||
{
|
||||
|
@ -64,17 +64,6 @@ public:
|
||||
decrementRefCount();
|
||||
}
|
||||
|
||||
Entry & operator= (const Entry & src) /// NOLINT
|
||||
{
|
||||
pool = src.pool;
|
||||
if (data)
|
||||
decrementRefCount();
|
||||
data = src.data;
|
||||
if (data)
|
||||
incrementRefCount();
|
||||
return * this;
|
||||
}
|
||||
|
||||
bool isNull() const
|
||||
{
|
||||
return data == nullptr;
|
||||
|
@ -13,13 +13,11 @@ mysqlxx::Pool::Entry getWithFailover(mysqlxx::Pool & connections_pool)
|
||||
|
||||
constexpr size_t max_tries = 3;
|
||||
|
||||
mysqlxx::Pool::Entry worker_connection;
|
||||
|
||||
for (size_t try_no = 1; try_no <= max_tries; ++try_no)
|
||||
{
|
||||
try
|
||||
{
|
||||
worker_connection = connections_pool.tryGet();
|
||||
mysqlxx::Pool::Entry worker_connection = connections_pool.tryGet();
|
||||
|
||||
if (!worker_connection.isNull())
|
||||
{
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTSubquery.h>
|
||||
#include <Parsers/ASTSelectWithUnionQuery.h>
|
||||
#include <Parsers/ASTTTLElement.h>
|
||||
#include <Poco/String.h>
|
||||
@ -211,6 +212,13 @@ void DDLLoadingDependencyVisitor::extractTableNameFromArgument(const ASTFunction
|
||||
qualified_name.database = table_identifier->getDatabaseName();
|
||||
qualified_name.table = table_identifier->shortName();
|
||||
}
|
||||
else if (arg->as<ASTSubquery>())
|
||||
{
|
||||
/// Allow IN subquery.
|
||||
/// Do not add tables from the subquery into dependencies,
|
||||
/// because CREATE will succeed anyway.
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(false);
|
||||
|
@ -107,12 +107,24 @@ void DatabaseAtomic::attachTable(ContextPtr /* context_ */, const String & name,
|
||||
|
||||
StoragePtr DatabaseAtomic::detachTable(ContextPtr /* context */, const String & name)
|
||||
{
|
||||
// it is important to call the destructors of not_in_use without
|
||||
// locked mutex to avoid potential deadlock.
|
||||
DetachedTables not_in_use;
|
||||
std::lock_guard lock(mutex);
|
||||
auto table = DatabaseOrdinary::detachTableUnlocked(name);
|
||||
table_name_to_path.erase(name);
|
||||
detached_tables.emplace(table->getStorageID().uuid, table);
|
||||
not_in_use = cleanupDetachedTables();
|
||||
StoragePtr table;
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
table = DatabaseOrdinary::detachTableUnlocked(name);
|
||||
table_name_to_path.erase(name);
|
||||
detached_tables.emplace(table->getStorageID().uuid, table);
|
||||
not_in_use = cleanupDetachedTables();
|
||||
}
|
||||
|
||||
if (!not_in_use.empty())
|
||||
{
|
||||
not_in_use.clear();
|
||||
LOG_DEBUG(log, "Finished removing not used detached tables");
|
||||
}
|
||||
|
||||
return table;
|
||||
}
|
||||
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include <Common/randomNumber.h>
|
||||
#include <Common/setThreadName.h>
|
||||
#include <base/sleep.h>
|
||||
#include <base/scope_guard.h>
|
||||
#include <boost/algorithm/string/split.hpp>
|
||||
#include <boost/algorithm/string/trim.hpp>
|
||||
#include <Parsers/CommonParsers.h>
|
||||
@ -532,13 +533,17 @@ static inline void dumpDataForTables(
|
||||
bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & metadata)
|
||||
{
|
||||
bool opened_transaction = false;
|
||||
mysqlxx::PoolWithFailover::Entry connection;
|
||||
|
||||
while (!isCancelled())
|
||||
{
|
||||
try
|
||||
{
|
||||
connection = pool.tryGet();
|
||||
mysqlxx::PoolWithFailover::Entry connection = pool.tryGet();
|
||||
SCOPE_EXIT({
|
||||
if (opened_transaction)
|
||||
connection->query("ROLLBACK").execute();
|
||||
});
|
||||
|
||||
if (connection.isNull())
|
||||
{
|
||||
if (settings->max_wait_time_when_mysql_unavailable < 0)
|
||||
@ -602,9 +607,6 @@ bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & meta
|
||||
{
|
||||
tryLogCurrentException(log);
|
||||
|
||||
if (opened_transaction)
|
||||
connection->query("ROLLBACK").execute();
|
||||
|
||||
if (settings->max_wait_time_when_mysql_unavailable < 0)
|
||||
throw;
|
||||
|
||||
|
1660
src/Formats/JSONExtractTree.cpp
Normal file
1660
src/Formats/JSONExtractTree.cpp
Normal file
File diff suppressed because it is too large
Load Diff
41
src/Formats/JSONExtractTree.h
Normal file
41
src/Formats/JSONExtractTree.h
Normal file
@ -0,0 +1,41 @@
|
||||
#pragma once
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Formats/FormatSettings.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct JSONExtractInsertSettings
|
||||
{
|
||||
/// If false, JSON boolean values won't be inserted into columns with integer types
|
||||
/// It's used in JSONExtractInt64/JSONExtractUInt64/... functions.
|
||||
bool convert_bool_to_integer = true;
|
||||
/// If true, when complex type like Array/Map has both valid and invalid elements,
|
||||
/// the default value will be inserted on invalid elements.
|
||||
/// For example, if we have [1, "hello", 2] and type Array(UInt32),
|
||||
/// we will insert [1, 0, 2] in the column. Used in all JSONExtract functions.
|
||||
bool insert_default_on_invalid_elements_in_complex_types = false;
|
||||
};
|
||||
|
||||
template <typename JSONParser>
|
||||
class JSONExtractTreeNode
|
||||
{
|
||||
public:
|
||||
JSONExtractTreeNode() = default;
|
||||
virtual ~JSONExtractTreeNode() = default;
|
||||
virtual bool insertResultToColumn(IColumn &, const typename JSONParser::Element &, const JSONExtractInsertSettings & insert_setting, const FormatSettings & format_settings, String & error) const = 0;
|
||||
};
|
||||
|
||||
/// Build a tree for insertion JSON element into a column with provided data type.
|
||||
template <typename JSONParser>
|
||||
std::unique_ptr<JSONExtractTreeNode<JSONParser>> buildJSONExtractTree(const DataTypePtr & type, const char * source_for_exception_message);
|
||||
|
||||
template <typename JSONParser>
|
||||
void jsonElementToString(const typename JSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings);
|
||||
|
||||
template <typename JSONParser, typename NumberType>
|
||||
bool tryGetNumericValueFromJSONElement(NumberType & value, const typename JSONParser::Element & element, bool convert_bool_to_integer, String & error);
|
||||
|
||||
}
|
@ -225,19 +225,6 @@ namespace
|
||||
Paths paths;
|
||||
};
|
||||
|
||||
bool checkIfTypesAreEqual(const DataTypes & types)
|
||||
{
|
||||
if (types.empty())
|
||||
return true;
|
||||
|
||||
for (size_t i = 1; i < types.size(); ++i)
|
||||
{
|
||||
if (!types[0]->equals(*types[i]))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void updateTypeIndexes(DataTypes & data_types, TypeIndexesSet & type_indexes)
|
||||
{
|
||||
type_indexes.clear();
|
||||
@ -272,24 +259,31 @@ namespace
|
||||
type_indexes.erase(TypeIndex::Nothing);
|
||||
}
|
||||
|
||||
/// If we have both Int64 and UInt64, convert all Int64 to UInt64,
|
||||
/// If we have both Int64 and UInt64, convert all not-negative Int64 to UInt64,
|
||||
/// because UInt64 is inferred only in case of Int64 overflow.
|
||||
void transformIntegers(DataTypes & data_types, TypeIndexesSet & type_indexes)
|
||||
void transformIntegers(DataTypes & data_types, TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info)
|
||||
{
|
||||
if (!type_indexes.contains(TypeIndex::Int64) || !type_indexes.contains(TypeIndex::UInt64))
|
||||
return;
|
||||
|
||||
bool have_negative_integers = false;
|
||||
for (auto & type : data_types)
|
||||
{
|
||||
if (WhichDataType(type).isInt64())
|
||||
type = std::make_shared<DataTypeUInt64>();
|
||||
{
|
||||
bool is_negative = json_info && json_info->negative_integers.contains(type.get());
|
||||
have_negative_integers |= is_negative;
|
||||
if (!is_negative)
|
||||
type = std::make_shared<DataTypeUInt64>();
|
||||
}
|
||||
}
|
||||
|
||||
type_indexes.erase(TypeIndex::Int64);
|
||||
if (!have_negative_integers)
|
||||
type_indexes.erase(TypeIndex::Int64);
|
||||
}
|
||||
|
||||
/// If we have both Int64 and Float64 types, convert all Int64 to Float64.
|
||||
void transformIntegersAndFloatsToFloats(DataTypes & data_types, TypeIndexesSet & type_indexes)
|
||||
void transformIntegersAndFloatsToFloats(DataTypes & data_types, TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info)
|
||||
{
|
||||
bool have_floats = type_indexes.contains(TypeIndex::Float64);
|
||||
bool have_integers = type_indexes.contains(TypeIndex::Int64) || type_indexes.contains(TypeIndex::UInt64);
|
||||
@ -300,7 +294,12 @@ namespace
|
||||
{
|
||||
WhichDataType which(type);
|
||||
if (which.isInt64() || which.isUInt64())
|
||||
type = std::make_shared<DataTypeFloat64>();
|
||||
{
|
||||
auto new_type = std::make_shared<DataTypeFloat64>();
|
||||
if (json_info && json_info->numbers_parsed_from_json_strings.erase(type.get()))
|
||||
json_info->numbers_parsed_from_json_strings.insert(new_type.get());
|
||||
type = new_type;
|
||||
}
|
||||
}
|
||||
|
||||
type_indexes.erase(TypeIndex::Int64);
|
||||
@ -635,9 +634,9 @@ namespace
|
||||
if (settings.try_infer_integers)
|
||||
{
|
||||
/// Transform Int64 to UInt64 if needed.
|
||||
transformIntegers(data_types, type_indexes);
|
||||
transformIntegers(data_types, type_indexes, json_info);
|
||||
/// Transform integers to floats if needed.
|
||||
transformIntegersAndFloatsToFloats(data_types, type_indexes);
|
||||
transformIntegersAndFloatsToFloats(data_types, type_indexes, json_info);
|
||||
}
|
||||
|
||||
/// Transform Date to DateTime or both to String if needed.
|
||||
@ -887,7 +886,7 @@ namespace
|
||||
}
|
||||
|
||||
template <bool is_json>
|
||||
DataTypePtr tryInferNumber(ReadBuffer & buf, const FormatSettings & settings)
|
||||
DataTypePtr tryInferNumber(ReadBuffer & buf, const FormatSettings & settings, JSONInferenceInfo * json_info)
|
||||
{
|
||||
if (buf.eof())
|
||||
return nullptr;
|
||||
@ -911,7 +910,12 @@ namespace
|
||||
Int64 tmp_int;
|
||||
buf.position() = number_start;
|
||||
if (tryReadIntText(tmp_int, buf))
|
||||
return std::make_shared<DataTypeInt64>();
|
||||
{
|
||||
auto type = std::make_shared<DataTypeInt64>();
|
||||
if (json_info && tmp_int < 0)
|
||||
json_info->negative_integers.insert(type.get());
|
||||
return type;
|
||||
}
|
||||
|
||||
/// In case of Int64 overflow we can try to infer UInt64.
|
||||
UInt64 tmp_uint;
|
||||
@ -934,7 +938,12 @@ namespace
|
||||
|
||||
Int64 tmp_int;
|
||||
if (tryReadIntText(tmp_int, peekable_buf))
|
||||
return std::make_shared<DataTypeInt64>();
|
||||
{
|
||||
auto type = std::make_shared<DataTypeInt64>();
|
||||
if (json_info && tmp_int < 0)
|
||||
json_info->negative_integers.insert(type.get());
|
||||
return type;
|
||||
}
|
||||
peekable_buf.rollbackToCheckpoint(/* drop= */ true);
|
||||
|
||||
/// In case of Int64 overflow we can try to infer UInt64.
|
||||
@ -952,7 +961,7 @@ namespace
|
||||
}
|
||||
|
||||
template <bool is_json>
|
||||
DataTypePtr tryInferNumberFromStringImpl(std::string_view field, const FormatSettings & settings)
|
||||
DataTypePtr tryInferNumberFromStringImpl(std::string_view field, const FormatSettings & settings, JSONInferenceInfo * json_inference_info = nullptr)
|
||||
{
|
||||
ReadBufferFromString buf(field);
|
||||
|
||||
@ -960,7 +969,12 @@ namespace
|
||||
{
|
||||
Int64 tmp_int;
|
||||
if (tryReadIntText(tmp_int, buf) && buf.eof())
|
||||
return std::make_shared<DataTypeInt64>();
|
||||
{
|
||||
auto type = std::make_shared<DataTypeInt64>();
|
||||
if (json_inference_info && tmp_int < 0)
|
||||
json_inference_info->negative_integers.insert(type.get());
|
||||
return type;
|
||||
}
|
||||
|
||||
/// We can safely get back to the start of buffer, because we read from a string and we didn't reach eof.
|
||||
buf.position() = buf.buffer().begin();
|
||||
@ -1011,7 +1025,7 @@ namespace
|
||||
{
|
||||
if (settings.json.try_infer_numbers_from_strings)
|
||||
{
|
||||
if (auto number_type = tryInferNumberFromStringImpl<true>(field, settings))
|
||||
if (auto number_type = tryInferNumberFromStringImpl<true>(field, settings, json_info))
|
||||
{
|
||||
json_info->numbers_parsed_from_json_strings.insert(number_type.get());
|
||||
return number_type;
|
||||
@ -1254,10 +1268,23 @@ namespace
|
||||
}
|
||||
|
||||
/// Number
|
||||
return tryInferNumber<is_json>(buf, settings);
|
||||
return tryInferNumber<is_json>(buf, settings, json_info);
|
||||
}
|
||||
}
|
||||
|
||||
bool checkIfTypesAreEqual(const DataTypes & types)
|
||||
{
|
||||
if (types.empty())
|
||||
return true;
|
||||
|
||||
for (size_t i = 1; i < types.size(); ++i)
|
||||
{
|
||||
if (!types[0]->equals(*types[i]))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void transformInferredTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings)
|
||||
{
|
||||
DataTypes types = {first, second};
|
||||
@ -1275,6 +1302,11 @@ void transformInferredJSONTypesIfNeeded(
|
||||
second = std::move(types[1]);
|
||||
}
|
||||
|
||||
void transformInferredJSONTypesIfNeeded(DataTypes & types, const FormatSettings & settings, JSONInferenceInfo * json_info)
|
||||
{
|
||||
transformInferredTypesIfNeededImpl<true>(types, settings, json_info);
|
||||
}
|
||||
|
||||
void transformInferredJSONTypesFromDifferentFilesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings)
|
||||
{
|
||||
JSONInferenceInfo json_info;
|
||||
@ -1396,6 +1428,12 @@ DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSetting
|
||||
return tryInferNumberFromStringImpl<false>(field, settings);
|
||||
}
|
||||
|
||||
DataTypePtr tryInferJSONNumberFromString(std::string_view field, const FormatSettings & settings, JSONInferenceInfo * json_info)
|
||||
{
|
||||
return tryInferNumberFromStringImpl<false>(field, settings, json_info);
|
||||
|
||||
}
|
||||
|
||||
DataTypePtr tryInferDateOrDateTimeFromString(std::string_view field, const FormatSettings & settings)
|
||||
{
|
||||
if (settings.try_infer_dates && tryInferDate(field))
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <Formats/FormatSettings.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
@ -18,6 +19,11 @@ struct JSONInferenceInfo
|
||||
/// We store numbers that were parsed from strings.
|
||||
/// It's used in types transformation to change such numbers back to string if needed.
|
||||
std::unordered_set<const IDataType *> numbers_parsed_from_json_strings;
|
||||
/// Store integer types that were inferred from negative numbers.
|
||||
/// It's used to determine common type for Int64 and UInt64
|
||||
/// TODO: check it not only in JSON formats.
|
||||
std::unordered_set<const IDataType *> negative_integers;
|
||||
|
||||
/// Indicates if currently we are inferring type for Map/Object key.
|
||||
bool is_object_key = false;
|
||||
/// When we transform types for the same column from different files
|
||||
@ -48,6 +54,7 @@ DataTypePtr tryInferDateOrDateTimeFromString(std::string_view field, const Forma
|
||||
/// Try to parse a number value from a string. By default, it tries to parse Float64,
|
||||
/// but if setting try_infer_integers is enabled, it also tries to parse Int64.
|
||||
DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSettings & settings);
|
||||
DataTypePtr tryInferJSONNumberFromString(std::string_view field, const FormatSettings & settings, JSONInferenceInfo * json_info);
|
||||
|
||||
/// It takes two types inferred for the same column and tries to transform them to a common type if possible.
|
||||
/// It's also used when we try to infer some not ordinary types from another types.
|
||||
@ -77,6 +84,7 @@ void transformInferredTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, c
|
||||
/// Example 2:
|
||||
/// We merge DataTypeJSONPaths types to a single DataTypeJSONPaths type with union of all JSON paths.
|
||||
void transformInferredJSONTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings, JSONInferenceInfo * json_info);
|
||||
void transformInferredJSONTypesIfNeeded(DataTypes & types, const FormatSettings & settings, JSONInferenceInfo * json_info);
|
||||
|
||||
/// Make final transform for types inferred in JSON format. It does 3 types of transformation:
|
||||
/// 1) Checks if type is unnamed Tuple(...), tries to transform nested types to find a common type for them and if all nested types
|
||||
@ -107,4 +115,6 @@ NamesAndTypesList getNamesAndRecursivelyNullableTypes(const Block & header);
|
||||
/// Check if type contains Nothing, like Array(Tuple(Nullable(Nothing), String))
|
||||
bool checkIfTypeIsComplete(const DataTypePtr & type);
|
||||
|
||||
bool checkIfTypesAreEqual(const DataTypes & types);
|
||||
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
399
src/Functions/changeDate.cpp
Normal file
399
src/Functions/changeDate.cpp
Normal file
@ -0,0 +1,399 @@
|
||||
#include "Common/DateLUTImpl.h"
|
||||
#include "Common/Exception.h"
|
||||
#include <Columns/ColumnDecimal.h>
|
||||
#include <Columns/ColumnsDateTime.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Common/DateLUT.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDate32.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <DataTypes/DataTypeDateTime64.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Interpreters/castColumn.h>
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
enum class Component
|
||||
{
|
||||
Year,
|
||||
Month,
|
||||
Day,
|
||||
Hour,
|
||||
Minute,
|
||||
Second
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
class FunctionChangeDate : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = Traits::name;
|
||||
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionChangeDate>(); }
|
||||
String getName() const override { return Traits::name; }
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
size_t getNumberOfArguments() const override { return 2; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
FunctionArgumentDescriptors args{
|
||||
{"date_or_datetime", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isDateOrDate32OrDateTimeOrDateTime64), nullptr, "Date or date with time"},
|
||||
{"value", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeInteger), nullptr, "Integer"}
|
||||
};
|
||||
validateFunctionArguments(*this, arguments, args);
|
||||
|
||||
const auto & input_type = arguments[0].type;
|
||||
|
||||
if constexpr (Traits::component == Component::Hour || Traits::component == Component::Minute || Traits::component == Component::Second)
|
||||
{
|
||||
if (isDate(input_type))
|
||||
return std::make_shared<DataTypeDateTime>();
|
||||
if (isDate32(input_type))
|
||||
return std::make_shared<DataTypeDateTime64>(DataTypeDateTime64::default_scale);
|
||||
}
|
||||
|
||||
return input_type;
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||
{
|
||||
const auto & input_type = arguments[0].type;
|
||||
if (isDate(input_type))
|
||||
{
|
||||
if constexpr (Traits::component == Component::Hour || Traits::component == Component::Minute || Traits::component == Component::Second)
|
||||
return execute<DataTypeDate, DataTypeDateTime>(arguments, input_type, result_type, input_rows_count);
|
||||
return execute<DataTypeDate, DataTypeDate>(arguments, input_type, result_type, input_rows_count);
|
||||
}
|
||||
if (isDate32(input_type))
|
||||
{
|
||||
if constexpr (Traits::component == Component::Hour || Traits::component == Component::Minute || Traits::component == Component::Second)
|
||||
return execute<DataTypeDate32, DataTypeDateTime64>(arguments, input_type, result_type, input_rows_count);
|
||||
return execute<DataTypeDate32, DataTypeDate32>(arguments, input_type, result_type, input_rows_count);
|
||||
}
|
||||
if (isDateTime(input_type))
|
||||
return execute<DataTypeDateTime, DataTypeDateTime>(arguments, input_type, result_type, input_rows_count);
|
||||
if (isDateTime64(input_type))
|
||||
return execute<DataTypeDateTime64, DataTypeDateTime64>(arguments, input_type, result_type, input_rows_count);
|
||||
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid input type");
|
||||
}
|
||||
|
||||
template <typename InputDataType, typename ResultDataType>
|
||||
ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & input_type, const DataTypePtr & result_type, size_t input_rows_count) const
|
||||
{
|
||||
typename ResultDataType::ColumnType::MutablePtr result_col;
|
||||
if constexpr (std::is_same_v<ResultDataType, DataTypeDateTime64>)
|
||||
{
|
||||
auto scale = DataTypeDateTime64::default_scale;
|
||||
if constexpr (std::is_same_v<InputDataType, DateTime64>)
|
||||
scale = typeid_cast<const DataTypeDateTime64 &>(*result_type).getScale();
|
||||
result_col = ResultDataType::ColumnType::create(input_rows_count, scale);
|
||||
}
|
||||
else
|
||||
result_col = ResultDataType::ColumnType::create(input_rows_count);
|
||||
|
||||
auto date_time_col = arguments[0].column->convertToFullIfNeeded();
|
||||
const auto & date_time_col_data = typeid_cast<const typename InputDataType::ColumnType &>(*date_time_col).getData();
|
||||
|
||||
auto value_col = castColumn(arguments[1], std::make_shared<DataTypeFloat64>());
|
||||
value_col = value_col->convertToFullIfNeeded();
|
||||
const auto & value_col_data = typeid_cast<const ColumnFloat64 &>(*value_col).getData();
|
||||
|
||||
auto & result_col_data = result_col->getData();
|
||||
|
||||
if constexpr (std::is_same_v<InputDataType, DataTypeDateTime64>)
|
||||
{
|
||||
const auto scale = typeid_cast<const DataTypeDateTime64 &>(*result_type).getScale();
|
||||
const auto & date_lut = typeid_cast<const DataTypeDateTime64 &>(*result_type).getTimeZone();
|
||||
|
||||
Int64 deg = 1;
|
||||
for (size_t j = 0; j < scale; ++j)
|
||||
deg *= 10;
|
||||
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
Int64 time = date_lut.toNumYYYYMMDDhhmmss(date_time_col_data[i] / deg);
|
||||
Int64 fraction = date_time_col_data[i] % deg;
|
||||
|
||||
result_col_data[i] = getChangedDate(time, value_col_data[i], result_type, date_lut, scale, fraction);
|
||||
}
|
||||
}
|
||||
else if constexpr (std::is_same_v<InputDataType, DataTypeDate32> && std::is_same_v<ResultDataType, DataTypeDateTime64>)
|
||||
{
|
||||
const auto & date_lut = typeid_cast<const DataTypeDateTime64 &>(*result_type).getTimeZone();
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
Int64 time = static_cast<Int64>(date_lut.toNumYYYYMMDD(ExtendedDayNum(date_time_col_data[i]))) * 1'000'000;
|
||||
result_col_data[i] = getChangedDate(time, value_col_data[i], result_type, date_lut, 3, 0);
|
||||
}
|
||||
}
|
||||
else if constexpr (std::is_same_v<InputDataType, DataTypeDate> && std::is_same_v<ResultDataType, DataTypeDateTime>)
|
||||
{
|
||||
const auto & date_lut = typeid_cast<const DataTypeDateTime &>(*result_type).getTimeZone();
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
Int64 time = static_cast<Int64>(date_lut.toNumYYYYMMDD(ExtendedDayNum(date_time_col_data[i]))) * 1'000'000;
|
||||
result_col_data[i] = static_cast<UInt32>(getChangedDate(time, value_col_data[i], result_type, date_lut));
|
||||
}
|
||||
}
|
||||
else if constexpr (std::is_same_v<InputDataType, DataTypeDateTime>)
|
||||
{
|
||||
const auto & date_lut = typeid_cast<const DataTypeDateTime &>(*result_type).getTimeZone();
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
Int64 time = date_lut.toNumYYYYMMDDhhmmss(date_time_col_data[i]);
|
||||
result_col_data[i] = static_cast<UInt32>(getChangedDate(time, value_col_data[i], result_type, date_lut));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto & date_lut = DateLUT::instance();
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
Int64 time;
|
||||
if (isDate(input_type))
|
||||
time = static_cast<Int64>(date_lut.toNumYYYYMMDD(DayNum(date_time_col_data[i]))) * 1'000'000;
|
||||
else
|
||||
time = static_cast<Int64>(date_lut.toNumYYYYMMDD(ExtendedDayNum(date_time_col_data[i]))) * 1'000'000;
|
||||
|
||||
if (isDate(result_type))
|
||||
result_col_data[i] = static_cast<UInt16>(getChangedDate(time, value_col_data[i], result_type, date_lut));
|
||||
else
|
||||
result_col_data[i] = static_cast<Int32>(getChangedDate(time, value_col_data[i], result_type, date_lut));
|
||||
}
|
||||
}
|
||||
|
||||
return result_col;
|
||||
}
|
||||
|
||||
Int64 getChangedDate(Int64 time, Float64 new_value, const DataTypePtr & result_type, const DateLUTImpl & date_lut, Int64 scale = 0, Int64 fraction = 0) const
|
||||
{
|
||||
auto year = time / 10'000'000'000;
|
||||
auto month = (time % 10'000'000'000) / 100'000'000;
|
||||
auto day = (time % 100'000'000) / 1'000'000;
|
||||
auto hours = (time % 1'000'000) / 10'000;
|
||||
auto minutes = (time % 10'000) / 100;
|
||||
auto seconds = time % 100;
|
||||
|
||||
Int64 min_date = 0, max_date = 0;
|
||||
Int16 min_year, max_year;
|
||||
if (isDate(result_type))
|
||||
{
|
||||
min_date = date_lut.makeDayNum(1970, 1, 1);
|
||||
max_date = date_lut.makeDayNum(2149, 6, 6);
|
||||
min_year = 1970;
|
||||
max_year = 2149;
|
||||
}
|
||||
else if (isDate32(result_type))
|
||||
{
|
||||
min_date = date_lut.makeDayNum(1900, 1, 1);
|
||||
max_date = date_lut.makeDayNum(2299, 12, 31);
|
||||
min_year = 1900;
|
||||
max_year = 2299;
|
||||
}
|
||||
else if (isDateTime(result_type))
|
||||
{
|
||||
min_date = 0;
|
||||
max_date = 0x0FFFFFFFFLL;
|
||||
min_year = 1970;
|
||||
max_year = 2106;
|
||||
}
|
||||
else
|
||||
{
|
||||
min_date = DecimalUtils::decimalFromComponents<DateTime64>(
|
||||
date_lut.makeDateTime(1900, 1, 1, 0, 0, 0),
|
||||
static_cast<Int64>(0),
|
||||
static_cast<UInt32>(scale));
|
||||
Int64 deg = 1;
|
||||
for (Int64 j = 0; j < scale; ++j)
|
||||
deg *= 10;
|
||||
max_date = DecimalUtils::decimalFromComponents<DateTime64>(
|
||||
date_lut.makeDateTime(2299, 12, 31, 23, 59, 59),
|
||||
static_cast<Int64>(deg - 1),
|
||||
static_cast<UInt32>(scale));
|
||||
min_year = 1900;
|
||||
max_year = 2299;
|
||||
}
|
||||
|
||||
switch (Traits::component)
|
||||
{
|
||||
case Component::Year:
|
||||
if (new_value < min_year)
|
||||
return min_date;
|
||||
else if (new_value > max_year)
|
||||
return max_date;
|
||||
year = static_cast<Int16>(new_value);
|
||||
break;
|
||||
case Component::Month:
|
||||
if (new_value < 1 || new_value > 12)
|
||||
return min_date;
|
||||
month = static_cast<UInt8>(new_value);
|
||||
break;
|
||||
case Component::Day:
|
||||
if (new_value < 1 || new_value > 31)
|
||||
return min_date;
|
||||
day = static_cast<UInt8>(new_value);
|
||||
break;
|
||||
case Component::Hour:
|
||||
if (new_value < 0 || new_value > 23)
|
||||
return min_date;
|
||||
hours = static_cast<UInt8>(new_value);
|
||||
break;
|
||||
case Component::Minute:
|
||||
if (new_value < 0 || new_value > 59)
|
||||
return min_date;
|
||||
minutes = static_cast<UInt8>(new_value);
|
||||
break;
|
||||
case Component::Second:
|
||||
if (new_value < 0 || new_value > 59)
|
||||
return min_date;
|
||||
seconds = static_cast<UInt8>(new_value);
|
||||
break;
|
||||
}
|
||||
|
||||
Int64 result;
|
||||
if (isDate(result_type) || isDate32(result_type))
|
||||
result = date_lut.makeDayNum(year, month, day);
|
||||
else if (isDateTime(result_type))
|
||||
result = date_lut.makeDateTime(year, month, day, hours, minutes, seconds);
|
||||
else
|
||||
#ifndef __clang_analyzer__
|
||||
/// ^^ This looks funny. It is the least terrible suppression of a false positive reported by clang-analyzer (a sub-class
|
||||
/// of clang-tidy checks) deep down in 'decimalFromComponents'. Usual suppressions of the form NOLINT* don't work here (they
|
||||
/// would only affect code in _this_ file), and suppressing the issue in 'decimalFromComponents' may suppress true positives.
|
||||
result = DecimalUtils::decimalFromComponents<DateTime64>(
|
||||
date_lut.makeDateTime(year, month, day, hours, minutes, seconds),
|
||||
fraction,
|
||||
static_cast<UInt32>(scale));
|
||||
#else
|
||||
{
|
||||
UNUSED(fraction);
|
||||
result = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (result < min_date)
|
||||
return min_date;
|
||||
|
||||
if (result > max_date)
|
||||
return max_date;
|
||||
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct ChangeYearTraits
|
||||
{
|
||||
static constexpr auto name = "changeYear";
|
||||
static constexpr auto component = Component::Year;
|
||||
};
|
||||
|
||||
struct ChangeMonthTraits
|
||||
{
|
||||
static constexpr auto name = "changeMonth";
|
||||
static constexpr auto component = Component::Month;
|
||||
};
|
||||
|
||||
struct ChangeDayTraits
|
||||
{
|
||||
static constexpr auto name = "changeDay";
|
||||
static constexpr auto component = Component::Day;
|
||||
};
|
||||
|
||||
struct ChangeHourTraits
|
||||
{
|
||||
static constexpr auto name = "changeHour";
|
||||
static constexpr auto component = Component::Hour;
|
||||
};
|
||||
|
||||
struct ChangeMinuteTraits
|
||||
{
|
||||
static constexpr auto name = "changeMinute";
|
||||
static constexpr auto component = Component::Minute;
|
||||
};
|
||||
|
||||
struct ChangeSecondTraits
|
||||
{
|
||||
static constexpr auto name = "changeSecond";
|
||||
static constexpr auto component = Component::Second;
|
||||
};
|
||||
|
||||
REGISTER_FUNCTION(ChangeDate)
|
||||
{
|
||||
{
|
||||
FunctionDocumentation::Description description = "Changes the year component of a date or date time.";
|
||||
FunctionDocumentation::Syntax syntax = "changeYear(date_or_datetime, value);";
|
||||
FunctionDocumentation::Arguments arguments = {{"date_or_datetime", "The value to change. Type: Date, Date32, DateTime, or DateTime64"}, {"value", "The new value. Type: [U]Int*"}};
|
||||
FunctionDocumentation::ReturnedValue returned_value = "The same type as date_or_datetime.";
|
||||
FunctionDocumentation::Categories categories = {"Dates and Times"};
|
||||
FunctionDocumentation function_documentation = {.description = description, .syntax = syntax, .arguments = arguments, .returned_value = returned_value, .categories = categories};
|
||||
factory.registerFunction<FunctionChangeDate<ChangeYearTraits>>(function_documentation);
|
||||
}
|
||||
{
|
||||
FunctionDocumentation::Description description = "Changes the month component of a date or date time.";
|
||||
FunctionDocumentation::Syntax syntax = "changeMonth(date_or_datetime, value);";
|
||||
FunctionDocumentation::Arguments arguments = {{"date_or_datetime", "The value to change. Type: Date, Date32, DateTime, or DateTime64"}, {"value", "The new value. Type: [U]Int*"}};
|
||||
FunctionDocumentation::ReturnedValue returned_value = "The same type as date_or_datetime.";
|
||||
FunctionDocumentation::Categories categories = {"Dates and Times"};
|
||||
FunctionDocumentation function_documentation = {.description = description, .syntax = syntax, .arguments = arguments, .returned_value = returned_value, .categories = categories};
|
||||
factory.registerFunction<FunctionChangeDate<ChangeMonthTraits>>(function_documentation);
|
||||
}
|
||||
{
|
||||
FunctionDocumentation::Description description = "Changes the day component of a date or date time.";
|
||||
FunctionDocumentation::Syntax syntax = "changeDay(date_or_datetime, value);";
|
||||
FunctionDocumentation::Arguments arguments = {{"date_or_datetime", "The value to change. Type: Date, Date32, DateTime, or DateTime64"}, {"value", "The new value. Type: [U]Int*"}};
|
||||
FunctionDocumentation::ReturnedValue returned_value = "The same type as date_or_datetime.";
|
||||
FunctionDocumentation::Categories categories = {"Dates and Times"};
|
||||
FunctionDocumentation function_documentation = {.description = description, .syntax = syntax, .arguments = arguments, .returned_value = returned_value, .categories = categories};
|
||||
factory.registerFunction<FunctionChangeDate<ChangeDayTraits>>(function_documentation);
|
||||
}
|
||||
{
|
||||
FunctionDocumentation::Description description = "Changes the hour component of a date or date time.";
|
||||
FunctionDocumentation::Syntax syntax = "changeHour(date_or_datetime, value);";
|
||||
FunctionDocumentation::Arguments arguments = {{"date_or_datetime", "The value to change. Type: Date, Date32, DateTime, or DateTime64"}, {"value", "The new value. Type: [U]Int*"}};
|
||||
FunctionDocumentation::ReturnedValue returned_value = "The same type as date_or_datetime. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64.";
|
||||
FunctionDocumentation::Categories categories = {"Dates and Times"};
|
||||
FunctionDocumentation function_documentation = {.description = description, .syntax = syntax, .arguments = arguments, .returned_value = returned_value, .categories = categories};
|
||||
factory.registerFunction<FunctionChangeDate<ChangeHourTraits>>(function_documentation);
|
||||
}
|
||||
{
|
||||
FunctionDocumentation::Description description = "Changes the minute component of a date or date time.";
|
||||
FunctionDocumentation::Syntax syntax = "changeMinute(date_or_datetime, value);";
|
||||
FunctionDocumentation::Arguments arguments = {{"date_or_datetime", "The value to change. Type: Date, Date32, DateTime, or DateTime64"}, {"value", "The new value. Type: [U]Int*"}};
|
||||
FunctionDocumentation::ReturnedValue returned_value = "The same type as date_or_datetime. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64.";
|
||||
FunctionDocumentation::Categories categories = {"Dates and Times"};
|
||||
FunctionDocumentation function_documentation = {.description = description, .syntax = syntax, .arguments = arguments, .returned_value = returned_value, .categories = categories};
|
||||
factory.registerFunction<FunctionChangeDate<ChangeMinuteTraits>>(function_documentation);
|
||||
}
|
||||
{
|
||||
FunctionDocumentation::Description description = "Changes the second component of a date or date time.";
|
||||
FunctionDocumentation::Syntax syntax = "changeSecond(date_or_datetime, value);";
|
||||
FunctionDocumentation::Arguments arguments = {{"date_or_datetime", "The value to change. Type: Date, Date32, DateTime, or DateTime64"}, {"value", "The new value. Type: [U]Int*"}};
|
||||
FunctionDocumentation::ReturnedValue returned_value = "The same type as date_or_datetime. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64.";
|
||||
FunctionDocumentation::Categories categories = {"Dates and Times"};
|
||||
FunctionDocumentation function_documentation = {.description = description, .syntax = syntax, .arguments = arguments, .returned_value = returned_value, .categories = categories};
|
||||
factory.registerFunction<FunctionChangeDate<ChangeSecondTraits>>(function_documentation);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -49,7 +49,7 @@ namespace
|
||||
const String & dest_blob_,
|
||||
std::shared_ptr<const AzureBlobStorage::RequestSettings> settings_,
|
||||
ThreadPoolCallbackRunnerUnsafe<void> schedule_,
|
||||
const Poco::Logger * log_)
|
||||
LoggerPtr log_)
|
||||
: create_read_buffer(create_read_buffer_)
|
||||
, client(client_)
|
||||
, offset (offset_)
|
||||
@ -74,7 +74,7 @@ namespace
|
||||
const String & dest_blob;
|
||||
std::shared_ptr<const AzureBlobStorage::RequestSettings> settings;
|
||||
ThreadPoolCallbackRunnerUnsafe<void> schedule;
|
||||
const Poco::Logger * log;
|
||||
const LoggerPtr log;
|
||||
size_t max_single_part_upload_size;
|
||||
|
||||
struct UploadPartTask
|
||||
@ -83,7 +83,6 @@ namespace
|
||||
size_t part_size;
|
||||
std::vector<std::string> block_ids;
|
||||
bool is_finished = false;
|
||||
std::exception_ptr exception;
|
||||
};
|
||||
|
||||
size_t normal_part_size;
|
||||
@ -92,6 +91,7 @@ namespace
|
||||
std::list<UploadPartTask> TSA_GUARDED_BY(bg_tasks_mutex) bg_tasks;
|
||||
int num_added_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0;
|
||||
int num_finished_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0;
|
||||
std::exception_ptr bg_exception TSA_GUARDED_BY(bg_tasks_mutex);
|
||||
std::mutex bg_tasks_mutex;
|
||||
std::condition_variable bg_tasks_condvar;
|
||||
|
||||
@ -186,7 +186,7 @@ namespace
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
tryLogCurrentException(log, fmt::format("While performing multipart upload of blob {} in container {}", dest_blob, dest_container_for_logging));
|
||||
waitForAllBackgroundTasks();
|
||||
throw;
|
||||
}
|
||||
@ -242,7 +242,12 @@ namespace
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
task->exception = std::current_exception();
|
||||
std::lock_guard lock(bg_tasks_mutex);
|
||||
if (!bg_exception)
|
||||
{
|
||||
tryLogCurrentException(log, "While writing part");
|
||||
bg_exception = std::current_exception(); /// The exception will be rethrown after all background tasks stop working.
|
||||
}
|
||||
}
|
||||
task_finish_notify();
|
||||
}, Priority{});
|
||||
@ -299,13 +304,13 @@ namespace
|
||||
/// Suppress warnings because bg_tasks_mutex is actually hold, but tsa annotations do not understand std::unique_lock
|
||||
bg_tasks_condvar.wait(lock, [this]() {return TSA_SUPPRESS_WARNING_FOR_READ(num_added_bg_tasks) == TSA_SUPPRESS_WARNING_FOR_READ(num_finished_bg_tasks); });
|
||||
|
||||
auto & tasks = TSA_SUPPRESS_WARNING_FOR_WRITE(bg_tasks);
|
||||
for (auto & task : tasks)
|
||||
{
|
||||
if (task.exception)
|
||||
std::rethrow_exception(task.exception);
|
||||
auto exception = TSA_SUPPRESS_WARNING_FOR_READ(bg_exception);
|
||||
if (exception)
|
||||
std::rethrow_exception(exception);
|
||||
|
||||
const auto & tasks = TSA_SUPPRESS_WARNING_FOR_READ(bg_tasks);
|
||||
for (const auto & task : tasks)
|
||||
block_ids.insert(block_ids.end(),task.block_ids.begin(), task.block_ids.end());
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
@ -321,7 +326,8 @@ void copyDataToAzureBlobStorageFile(
|
||||
std::shared_ptr<const AzureBlobStorage::RequestSettings> settings,
|
||||
ThreadPoolCallbackRunnerUnsafe<void> schedule)
|
||||
{
|
||||
UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, &Poco::Logger::get("copyDataToAzureBlobStorageFile")};
|
||||
auto log = getLogger("copyDataToAzureBlobStorageFile");
|
||||
UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, log};
|
||||
helper.performCopy();
|
||||
}
|
||||
|
||||
@ -339,9 +345,11 @@ void copyAzureBlobStorageFile(
|
||||
const ReadSettings & read_settings,
|
||||
ThreadPoolCallbackRunnerUnsafe<void> schedule)
|
||||
{
|
||||
auto log = getLogger("copyAzureBlobStorageFile");
|
||||
|
||||
if (settings->use_native_copy)
|
||||
{
|
||||
LOG_TRACE(getLogger("copyAzureBlobStorageFile"), "Copying Blob: {} from Container: {} using native copy", src_container_for_logging, src_blob);
|
||||
LOG_TRACE(log, "Copying Blob: {} from Container: {} using native copy", src_container_for_logging, src_blob);
|
||||
ProfileEvents::increment(ProfileEvents::AzureCopyObject);
|
||||
if (dest_client->GetClickhouseOptions().IsClientForDisk)
|
||||
ProfileEvents::increment(ProfileEvents::DiskAzureCopyObject);
|
||||
@ -352,7 +360,7 @@ void copyAzureBlobStorageFile(
|
||||
|
||||
if (size < settings->max_single_part_copy_size)
|
||||
{
|
||||
LOG_TRACE(getLogger("copyAzureBlobStorageFile"), "Copy blob sync {} -> {}", src_blob, dest_blob);
|
||||
LOG_TRACE(log, "Copy blob sync {} -> {}", src_blob, dest_blob);
|
||||
block_blob_client_dest.CopyFromUri(source_uri);
|
||||
}
|
||||
else
|
||||
@ -368,7 +376,7 @@ void copyAzureBlobStorageFile(
|
||||
|
||||
if (copy_status.HasValue() && copy_status.Value() == Azure::Storage::Blobs::Models::CopyStatus::Success)
|
||||
{
|
||||
LOG_TRACE(getLogger("copyAzureBlobStorageFile"), "Copy of {} to {} finished", properties_model.CopySource.Value(), dest_blob);
|
||||
LOG_TRACE(log, "Copy of {} to {} finished", properties_model.CopySource.Value(), dest_blob);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -382,14 +390,14 @@ void copyAzureBlobStorageFile(
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_TRACE(&Poco::Logger::get("copyAzureBlobStorageFile"), "Reading from Container: {}, Blob: {}", src_container_for_logging, src_blob);
|
||||
LOG_TRACE(log, "Reading from Container: {}, Blob: {}", src_container_for_logging, src_blob);
|
||||
auto create_read_buffer = [&]
|
||||
{
|
||||
return std::make_unique<ReadBufferFromAzureBlobStorage>(
|
||||
src_client, src_blob, read_settings, settings->max_single_read_retries, settings->max_single_download_retries);
|
||||
};
|
||||
|
||||
UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, &Poco::Logger::get("copyAzureBlobStorageFile")};
|
||||
UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, log};
|
||||
helper.performCopy();
|
||||
}
|
||||
}
|
||||
|
@ -98,7 +98,6 @@ namespace
|
||||
size_t part_size;
|
||||
String tag;
|
||||
bool is_finished = false;
|
||||
std::exception_ptr exception;
|
||||
};
|
||||
|
||||
size_t num_parts;
|
||||
@ -111,6 +110,7 @@ namespace
|
||||
size_t num_added_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0;
|
||||
size_t num_finished_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0;
|
||||
size_t num_finished_parts TSA_GUARDED_BY(bg_tasks_mutex) = 0;
|
||||
std::exception_ptr bg_exception TSA_GUARDED_BY(bg_tasks_mutex);
|
||||
std::mutex bg_tasks_mutex;
|
||||
std::condition_variable bg_tasks_condvar;
|
||||
|
||||
@ -273,7 +273,7 @@ namespace
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
tryLogCurrentException(log, fmt::format("While performing multipart upload of {}", dest_key));
|
||||
// Multipart upload failed because it wasn't possible to schedule all the tasks.
|
||||
// To avoid execution of already scheduled tasks we abort MultipartUpload.
|
||||
abortMultipartUpload();
|
||||
@ -385,7 +385,12 @@ namespace
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
task->exception = std::current_exception();
|
||||
std::lock_guard lock(bg_tasks_mutex);
|
||||
if (!bg_exception)
|
||||
{
|
||||
tryLogCurrentException(log, fmt::format("While writing part #{}", task->part_number));
|
||||
bg_exception = std::current_exception(); /// The exception will be rethrown after all background tasks stop working.
|
||||
}
|
||||
}
|
||||
task_finish_notify();
|
||||
}, Priority{});
|
||||
@ -435,22 +440,21 @@ namespace
|
||||
/// Suppress warnings because bg_tasks_mutex is actually hold, but tsa annotations do not understand std::unique_lock
|
||||
bg_tasks_condvar.wait(lock, [this]() {return TSA_SUPPRESS_WARNING_FOR_READ(num_added_bg_tasks) == TSA_SUPPRESS_WARNING_FOR_READ(num_finished_bg_tasks); });
|
||||
|
||||
auto & tasks = TSA_SUPPRESS_WARNING_FOR_WRITE(bg_tasks);
|
||||
for (auto & task : tasks)
|
||||
auto exception = TSA_SUPPRESS_WARNING_FOR_READ(bg_exception);
|
||||
if (exception)
|
||||
{
|
||||
if (task.exception)
|
||||
{
|
||||
/// abortMultipartUpload() might be called already, see processUploadPartRequest().
|
||||
/// However if there were concurrent uploads at that time, those part uploads might or might not succeed.
|
||||
/// As a result, it might be necessary to abort a given multipart upload multiple times in order to completely free
|
||||
/// all storage consumed by all parts.
|
||||
abortMultipartUpload();
|
||||
/// abortMultipartUpload() might be called already, see processUploadPartRequest().
|
||||
/// However if there were concurrent uploads at that time, those part uploads might or might not succeed.
|
||||
/// As a result, it might be necessary to abort a given multipart upload multiple times in order to completely free
|
||||
/// all storage consumed by all parts.
|
||||
abortMultipartUpload();
|
||||
|
||||
std::rethrow_exception(task.exception);
|
||||
}
|
||||
|
||||
part_tags.push_back(task.tag);
|
||||
std::rethrow_exception(exception);
|
||||
}
|
||||
|
||||
const auto & tasks = TSA_SUPPRESS_WARNING_FOR_READ(bg_tasks);
|
||||
for (const auto & task : tasks)
|
||||
part_tags.push_back(task.tag);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -30,6 +30,7 @@ namespace ProfileEvents
|
||||
extern const Event FilesystemCacheFailToReserveSpaceBecauseOfLockContention;
|
||||
extern const Event FilesystemCacheFreeSpaceKeepingThreadRun;
|
||||
extern const Event FilesystemCacheFreeSpaceKeepingThreadWorkMilliseconds;
|
||||
extern const Event FilesystemCacheFailToReserveSpaceBecauseOfCacheResize;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
@ -813,7 +814,7 @@ bool FileCache::tryReserve(
|
||||
/// ok compared to the number of cases this check will help.
|
||||
if (cache_is_being_resized.load(std::memory_order_relaxed))
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::FilesystemCacheFailToReserveSpaceBecauseOfLockContention);
|
||||
ProfileEvents::increment(ProfileEvents::FilesystemCacheFailToReserveSpaceBecauseOfCacheResize);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1281,10 +1281,6 @@ void DatabaseCatalog::rescheduleDropTableTask()
|
||||
auto min_drop_time = getMinDropTime();
|
||||
time_t schedule_after_ms = min_drop_time > current_time ? (min_drop_time - current_time) * 1000 : 0;
|
||||
|
||||
LOG_TRACE(
|
||||
log,
|
||||
"Have {} tables in queue to drop. Schedule background task in {} seconds",
|
||||
tables_marked_dropped.size(), schedule_after_ms / 1000);
|
||||
(*drop_task)->scheduleAfter(schedule_after_ms);
|
||||
}
|
||||
|
||||
|
@ -73,66 +73,55 @@ static bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & v
|
||||
return false;
|
||||
}
|
||||
|
||||
void OptimizeIfWithConstantConditionVisitor::visit(ASTPtr & current_ast)
|
||||
void OptimizeIfWithConstantConditionVisitorData::visit(ASTFunction & function_node, ASTPtr & ast)
|
||||
{
|
||||
if (!current_ast)
|
||||
return;
|
||||
|
||||
checkStackSize();
|
||||
|
||||
for (ASTPtr & child : current_ast->children)
|
||||
if (function_node.name != "if")
|
||||
return;
|
||||
|
||||
if (!function_node.arguments)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Wrong number of arguments for function 'if' (0 instead of 3)");
|
||||
|
||||
if (function_node.arguments->children.size() != 3)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Wrong number of arguments for function 'if' ({} instead of 3)",
|
||||
function_node.arguments->children.size());
|
||||
|
||||
const auto * args = function_node.arguments->as<ASTExpressionList>();
|
||||
|
||||
ASTPtr condition_expr = args->children[0];
|
||||
ASTPtr then_expr = args->children[1];
|
||||
ASTPtr else_expr = args->children[2];
|
||||
|
||||
bool condition;
|
||||
if (tryExtractConstValueFromCondition(condition_expr, condition))
|
||||
{
|
||||
auto * function_node = child->as<ASTFunction>();
|
||||
if (!function_node || function_node->name != "if")
|
||||
ASTPtr replace_ast = condition ? then_expr : else_expr;
|
||||
ASTPtr child_copy = ast;
|
||||
String replace_alias = replace_ast->tryGetAlias();
|
||||
String if_alias = ast->tryGetAlias();
|
||||
|
||||
if (replace_alias.empty())
|
||||
{
|
||||
visit(child);
|
||||
continue;
|
||||
replace_ast->setAlias(if_alias);
|
||||
ast = replace_ast;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Only copy of one node is required here.
|
||||
/// But IAST has only method for deep copy of subtree.
|
||||
/// This can be a reason of performance degradation in case of deep queries.
|
||||
ASTPtr replace_ast_deep_copy = replace_ast->clone();
|
||||
replace_ast_deep_copy->setAlias(if_alias);
|
||||
ast = replace_ast_deep_copy;
|
||||
}
|
||||
|
||||
if (!function_node->arguments)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Wrong number of arguments for function 'if' (0 instead of 3)");
|
||||
|
||||
if (function_node->arguments->children.size() != 3)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Wrong number of arguments for function 'if' ({} instead of 3)",
|
||||
function_node->arguments->children.size());
|
||||
|
||||
visit(function_node->arguments);
|
||||
const auto * args = function_node->arguments->as<ASTExpressionList>();
|
||||
|
||||
ASTPtr condition_expr = args->children[0];
|
||||
ASTPtr then_expr = args->children[1];
|
||||
ASTPtr else_expr = args->children[2];
|
||||
|
||||
bool condition;
|
||||
if (tryExtractConstValueFromCondition(condition_expr, condition))
|
||||
if (!if_alias.empty())
|
||||
{
|
||||
ASTPtr replace_ast = condition ? then_expr : else_expr;
|
||||
ASTPtr child_copy = child;
|
||||
String replace_alias = replace_ast->tryGetAlias();
|
||||
String if_alias = child->tryGetAlias();
|
||||
|
||||
if (replace_alias.empty())
|
||||
{
|
||||
replace_ast->setAlias(if_alias);
|
||||
child = replace_ast;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Only copy of one node is required here.
|
||||
/// But IAST has only method for deep copy of subtree.
|
||||
/// This can be a reason of performance degradation in case of deep queries.
|
||||
ASTPtr replace_ast_deep_copy = replace_ast->clone();
|
||||
replace_ast_deep_copy->setAlias(if_alias);
|
||||
child = replace_ast_deep_copy;
|
||||
}
|
||||
|
||||
if (!if_alias.empty())
|
||||
{
|
||||
auto alias_it = aliases.find(if_alias);
|
||||
if (alias_it != aliases.end() && alias_it->second.get() == child_copy.get())
|
||||
alias_it->second = child;
|
||||
}
|
||||
auto alias_it = aliases.find(if_alias);
|
||||
if (alias_it != aliases.end() && alias_it->second.get() == child_copy.get())
|
||||
alias_it->second = ast;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,23 +1,24 @@
|
||||
#pragma once
|
||||
|
||||
#include <Interpreters/Aliases.h>
|
||||
#include <Interpreters/InDepthNodeVisitor.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// It removes Function_if node from AST if condition is constant.
|
||||
/// TODO: rewrite with InDepthNodeVisitor
|
||||
class OptimizeIfWithConstantConditionVisitor
|
||||
struct OptimizeIfWithConstantConditionVisitorData
|
||||
{
|
||||
public:
|
||||
explicit OptimizeIfWithConstantConditionVisitor(Aliases & aliases_)
|
||||
using TypeToVisit = ASTFunction;
|
||||
|
||||
explicit OptimizeIfWithConstantConditionVisitorData(Aliases & aliases_)
|
||||
: aliases(aliases_)
|
||||
{}
|
||||
|
||||
void visit(ASTPtr & ast);
|
||||
|
||||
void visit(ASTFunction & function_node, ASTPtr & ast);
|
||||
private:
|
||||
Aliases & aliases;
|
||||
};
|
||||
|
||||
/// It removes Function_if node from AST if condition is constant.
|
||||
using OptimizeIfWithConstantConditionVisitor = InDepthNodeVisitor<OneTypeMatcher<OptimizeIfWithConstantConditionVisitorData>, false>;
|
||||
|
||||
}
|
||||
|
@ -577,7 +577,8 @@ void TreeOptimizer::optimizeIf(ASTPtr & query, Aliases & aliases, bool if_chain_
|
||||
optimizeMultiIfToIf(query);
|
||||
|
||||
/// Optimize if with constant condition after constants was substituted instead of scalar subqueries.
|
||||
OptimizeIfWithConstantConditionVisitor(aliases).visit(query);
|
||||
OptimizeIfWithConstantConditionVisitorData visitor_data(aliases);
|
||||
OptimizeIfWithConstantConditionVisitor(visitor_data).visit(query);
|
||||
|
||||
if (if_chain_to_multiif)
|
||||
OptimizeIfChainsVisitor().visit(query);
|
||||
|
@ -445,6 +445,9 @@ bool NpyRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & /*
|
||||
elements_in_current_column *= header.shape[i];
|
||||
}
|
||||
|
||||
if (typeid_cast<ColumnArray *>(current_column))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected nesting level of column '{}', expected {}", column->getName(), header.shape.size() - 1);
|
||||
|
||||
for (size_t i = 0; i != elements_in_current_column; ++i)
|
||||
readValue(current_column);
|
||||
|
||||
|
@ -196,6 +196,16 @@ void DistributedAsyncInsertBatch::readText(ReadBuffer & in)
|
||||
UInt64 idx;
|
||||
in >> idx >> "\n";
|
||||
files.push_back(std::filesystem::absolute(fmt::format("{}/{}.bin", parent.path, idx)).string());
|
||||
|
||||
ReadBufferFromFile header_buffer(files.back());
|
||||
const DistributedAsyncInsertHeader & header = DistributedAsyncInsertHeader::read(header_buffer, parent.log);
|
||||
total_bytes += total_bytes;
|
||||
|
||||
if (header.rows)
|
||||
{
|
||||
total_rows += header.rows;
|
||||
total_bytes += header.bytes;
|
||||
}
|
||||
}
|
||||
|
||||
recovered = true;
|
||||
|
@ -101,9 +101,8 @@ struct MergeTreePartInfo
|
||||
|
||||
bool isFakeDropRangePart() const
|
||||
{
|
||||
/// Another max level was previously used for REPLACE/MOVE PARTITION
|
||||
auto another_max_level = std::numeric_limits<decltype(level)>::max();
|
||||
return level == MergeTreePartInfo::MAX_LEVEL || level == another_max_level;
|
||||
/// LEGACY_MAX_LEVEL was previously used for REPLACE/MOVE PARTITION
|
||||
return level == MergeTreePartInfo::MAX_LEVEL || level == MergeTreePartInfo::LEGACY_MAX_LEVEL;
|
||||
}
|
||||
|
||||
String getPartNameAndCheckFormat(MergeTreeDataFormatVersion format_version) const;
|
||||
|
@ -5,20 +5,21 @@
|
||||
|
||||
#include <base/hex.h>
|
||||
#include <base/interpolate.h>
|
||||
#include <Common/FailPoint.h>
|
||||
#include <Common/Macros.h>
|
||||
#include <Common/MemoryTracker.h>
|
||||
#include <Common/ProfileEventsScope.h>
|
||||
#include <Common/StringUtils.h>
|
||||
#include <Common/ThreadFuzzer.h>
|
||||
#include <Common/ZooKeeper/KeeperException.h>
|
||||
#include <Common/ZooKeeper/Types.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <Common/formatReadable.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/noexcept_scope.h>
|
||||
#include <Common/randomDelay.h>
|
||||
#include <Common/thread_local_rng.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Common/ThreadFuzzer.h>
|
||||
#include <Common/FailPoint.h>
|
||||
#include <Common/randomDelay.h>
|
||||
|
||||
#include <Core/ServerUUID.h>
|
||||
|
||||
@ -5272,6 +5273,8 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown()
|
||||
if (shutdown_prepared_called.exchange(true))
|
||||
return;
|
||||
|
||||
LOG_TRACE(log, "Start preparing for shutdown");
|
||||
|
||||
try
|
||||
{
|
||||
auto settings_ptr = getSettings();
|
||||
@ -5282,7 +5285,11 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown()
|
||||
stopBeingLeader();
|
||||
|
||||
if (attach_thread)
|
||||
{
|
||||
attach_thread->shutdown();
|
||||
LOG_TRACE(log, "The attach thread is shutdown");
|
||||
}
|
||||
|
||||
|
||||
restarting_thread.shutdown(/* part_of_full_shutdown */true);
|
||||
/// Explicitly set the event, because the restarting thread will not set it again
|
||||
@ -5295,6 +5302,8 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown()
|
||||
shutdown_deadline.emplace(std::chrono::system_clock::now());
|
||||
throw;
|
||||
}
|
||||
|
||||
LOG_TRACE(log, "Finished preparing for shutdown");
|
||||
}
|
||||
|
||||
void StorageReplicatedMergeTree::partialShutdown()
|
||||
@ -5332,6 +5341,8 @@ void StorageReplicatedMergeTree::shutdown(bool)
|
||||
if (shutdown_called.exchange(true))
|
||||
return;
|
||||
|
||||
LOG_TRACE(log, "Shutdown started");
|
||||
|
||||
flushAndPrepareForShutdown();
|
||||
|
||||
if (!shutdown_deadline.has_value())
|
||||
@ -5374,6 +5385,7 @@ void StorageReplicatedMergeTree::shutdown(bool)
|
||||
/// Wait for all of them
|
||||
std::lock_guard lock(data_parts_exchange_ptr->rwlock);
|
||||
}
|
||||
LOG_TRACE(log, "Shutdown finished");
|
||||
}
|
||||
|
||||
|
||||
|
@ -15,3 +15,4 @@ warn_return_any = True
|
||||
no_implicit_reexport = True
|
||||
strict_equality = True
|
||||
extra_checks = True
|
||||
ignore_missing_imports = True
|
355
tests/ci/artifactory.py
Normal file
355
tests/ci/artifactory.py
Normal file
@ -0,0 +1,355 @@
|
||||
import argparse
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from shutil import copy2
|
||||
from create_release import PackageDownloader, ReleaseInfo, ShellRunner
|
||||
from ci_utils import WithIter
|
||||
|
||||
|
||||
class MountPointApp(metaclass=WithIter):
|
||||
RCLONE = "rclone"
|
||||
S3FS = "s3fs"
|
||||
|
||||
|
||||
class R2MountPoint:
|
||||
_TEST_BUCKET_NAME = "repo-test"
|
||||
_PROD_BUCKET_NAME = "packages"
|
||||
_CACHE_MAX_SIZE_GB = 20
|
||||
MOUNT_POINT = "/home/ubuntu/mountpoint"
|
||||
API_ENDPOINT = "https://d4fd593eebab2e3a58a599400c4cd64d.r2.cloudflarestorage.com"
|
||||
LOG_FILE = "/home/ubuntu/fuse_mount.log"
|
||||
# mod time is not required by reprepro and createrepo - disable to simplify bucket's mount sync (applicable fro rclone)
|
||||
NOMODTIME = True
|
||||
# enable debug messages in mount log
|
||||
DEBUG = True
|
||||
# enable cache for mountpoint
|
||||
CACHE_ENABLED = False
|
||||
# TODO: which mode is better: minimal/writes/full/off
|
||||
_RCLONE_CACHE_MODE = "minimal"
|
||||
UMASK = "0000"
|
||||
|
||||
def __init__(self, app: str, dry_run: bool) -> None:
|
||||
assert app in MountPointApp
|
||||
self.app = app
|
||||
if dry_run:
|
||||
self.bucket_name = self._TEST_BUCKET_NAME
|
||||
else:
|
||||
self.bucket_name = self._PROD_BUCKET_NAME
|
||||
|
||||
self.aux_mount_options = ""
|
||||
self.async_mount = False
|
||||
if self.app == MountPointApp.S3FS:
|
||||
self.cache_dir = "/home/ubuntu/s3fs_cache"
|
||||
# self.aux_mount_options += "-o nomodtime " if self.NOMODTIME else "" not for s3fs
|
||||
self.aux_mount_options += "--debug " if self.DEBUG else ""
|
||||
self.aux_mount_options += (
|
||||
f"-o use_cache={self.cache_dir} -o cache_size_mb={self._CACHE_MAX_SIZE_GB * 1024} "
|
||||
if self.CACHE_ENABLED
|
||||
else ""
|
||||
)
|
||||
# without -o nomultipart there are errors like "Error 5 writing to /home/ubuntu/***.deb: Input/output error"
|
||||
self.mount_cmd = f"s3fs {self.bucket_name} {self.MOUNT_POINT} -o url={self.API_ENDPOINT} -o use_path_request_style -o umask=0000 -o nomultipart -o logfile={self.LOG_FILE} {self.aux_mount_options}"
|
||||
elif self.app == MountPointApp.RCLONE:
|
||||
# run rclone mount process asynchronously, otherwise subprocess.run(daemonized command) will not return
|
||||
self.async_mount = True
|
||||
self.cache_dir = "/home/ubuntu/rclone_cache"
|
||||
self.aux_mount_options += "--no-modtime " if self.NOMODTIME else ""
|
||||
self.aux_mount_options += "-v " if self.DEBUG else "" # -vv too verbose
|
||||
self.aux_mount_options += (
|
||||
f"--vfs-cache-mode {self._RCLONE_CACHE_MODE} --vfs-cache-max-size {self._CACHE_MAX_SIZE_GB}G"
|
||||
if self.CACHE_ENABLED
|
||||
else "--vfs-cache-mode off"
|
||||
)
|
||||
# Use --no-modtime to try to avoid: ERROR : rpm/lts/clickhouse-client-24.3.6.5.x86_64.rpm: Failed to apply pending mod time
|
||||
self.mount_cmd = f"rclone mount remote:{self.bucket_name} {self.MOUNT_POINT} --daemon --cache-dir {self.cache_dir} --umask 0000 --log-file {self.LOG_FILE} {self.aux_mount_options}"
|
||||
else:
|
||||
assert False
|
||||
|
||||
def init(self):
|
||||
print(f"Mount bucket [{self.bucket_name}] to [{self.MOUNT_POINT}]")
|
||||
_CLEAN_LOG_FILE_CMD = f"tail -n 1000 {self.LOG_FILE} > {self.LOG_FILE}_tmp && mv {self.LOG_FILE}_tmp {self.LOG_FILE} ||:"
|
||||
_MKDIR_CMD = f"mkdir -p {self.MOUNT_POINT}"
|
||||
_MKDIR_FOR_CACHE = f"mkdir -p {self.cache_dir}"
|
||||
_UNMOUNT_CMD = (
|
||||
f"mount | grep -q {self.MOUNT_POINT} && umount {self.MOUNT_POINT} ||:"
|
||||
)
|
||||
|
||||
_TEST_MOUNT_CMD = f"mount | grep -q {self.MOUNT_POINT}"
|
||||
ShellRunner.run(_CLEAN_LOG_FILE_CMD)
|
||||
ShellRunner.run(_UNMOUNT_CMD)
|
||||
ShellRunner.run(_MKDIR_CMD)
|
||||
ShellRunner.run(_MKDIR_FOR_CACHE)
|
||||
ShellRunner.run(self.mount_cmd, async_=self.async_mount)
|
||||
if self.async_mount:
|
||||
time.sleep(3)
|
||||
ShellRunner.run(_TEST_MOUNT_CMD)
|
||||
|
||||
@classmethod
|
||||
def teardown(cls):
|
||||
print(f"Unmount [{cls.MOUNT_POINT}]")
|
||||
ShellRunner.run(f"umount {cls.MOUNT_POINT}")
|
||||
|
||||
|
||||
class RepoCodenames(metaclass=WithIter):
|
||||
LTS = "lts"
|
||||
STABLE = "stable"
|
||||
|
||||
|
||||
class DebianArtifactory:
|
||||
_TEST_REPO_URL = "https://pub-73dd1910f4284a81a02a67018967e028.r2.dev/deb"
|
||||
_PROD_REPO_URL = "https://packages.clickhouse.com/deb"
|
||||
|
||||
def __init__(self, release_info: ReleaseInfo, dry_run: bool):
|
||||
self.codename = release_info.codename
|
||||
self.version = release_info.version
|
||||
if dry_run:
|
||||
self.repo_url = self._TEST_REPO_URL
|
||||
else:
|
||||
self.repo_url = self._PROD_REPO_URL
|
||||
assert self.codename in RepoCodenames
|
||||
self.pd = PackageDownloader(
|
||||
release=release_info.release_branch,
|
||||
commit_sha=release_info.commit_sha,
|
||||
version=release_info.version,
|
||||
)
|
||||
|
||||
def export_packages(self):
|
||||
assert self.pd.local_deb_packages_ready(), "BUG: Packages are not downloaded"
|
||||
print("Start adding packages")
|
||||
paths = [
|
||||
self.pd.LOCAL_DIR + "/" + file for file in self.pd.get_deb_packages_files()
|
||||
]
|
||||
REPREPRO_CMD_PREFIX = f"reprepro --basedir {R2MountPoint.MOUNT_POINT}/configs/deb --outdir {R2MountPoint.MOUNT_POINT}/deb --verbose"
|
||||
cmd = f"{REPREPRO_CMD_PREFIX} includedeb {self.codename} {' '.join(paths)}"
|
||||
print("Running export command:")
|
||||
print(f" {cmd}")
|
||||
ShellRunner.run(cmd)
|
||||
ShellRunner.run("sync")
|
||||
|
||||
if self.codename == RepoCodenames.LTS:
|
||||
packages_with_version = [
|
||||
package + "=" + self.version for package in self.pd.get_packages_names()
|
||||
]
|
||||
print(
|
||||
f"Copy packages from {RepoCodenames.LTS} to {RepoCodenames.STABLE} repository"
|
||||
)
|
||||
cmd = f"{REPREPRO_CMD_PREFIX} copy {RepoCodenames.STABLE} {RepoCodenames.LTS} {' '.join(packages_with_version)}"
|
||||
print("Running copy command:")
|
||||
print(f" {cmd}")
|
||||
ShellRunner.run(cmd)
|
||||
ShellRunner.run("sync")
|
||||
|
||||
def test_packages(self):
|
||||
ShellRunner.run("docker pull ubuntu:latest")
|
||||
print(f"Test packages installation, version [{self.version}]")
|
||||
cmd = f"docker run --rm ubuntu:latest bash -c \"apt update -y; apt install -y sudo gnupg ca-certificates; apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754; echo 'deb {self.repo_url} stable main' | tee /etc/apt/sources.list.d/clickhouse.list; apt update -y; apt-get install -y clickhouse-client={self.version}\""
|
||||
print("Running test command:")
|
||||
print(f" {cmd}")
|
||||
ShellRunner.run(cmd)
|
||||
|
||||
|
||||
def _copy_if_not_exists(src: Path, dst: Path) -> Path:
|
||||
if dst.is_dir():
|
||||
dst = dst / src.name
|
||||
if not dst.exists():
|
||||
return copy2(src, dst) # type: ignore
|
||||
if src.stat().st_size == dst.stat().st_size:
|
||||
return dst
|
||||
return copy2(src, dst) # type: ignore
|
||||
|
||||
|
||||
class RpmArtifactory:
|
||||
_TEST_REPO_URL = (
|
||||
"https://pub-73dd1910f4284a81a02a67018967e028.r2.dev/rpm/clickhouse.repo"
|
||||
)
|
||||
_PROD_REPO_URL = "https://packages.clickhouse.com/rpm/clickhouse.repo"
|
||||
_SIGN_KEY = "885E2BDCF96B0B45ABF058453E4AD4719DDE9A38"
|
||||
|
||||
def __init__(self, release_info: ReleaseInfo, dry_run: bool):
|
||||
self.codename = release_info.codename
|
||||
self.version = release_info.version
|
||||
if dry_run:
|
||||
self.repo_url = self._TEST_REPO_URL
|
||||
else:
|
||||
self.repo_url = self._PROD_REPO_URL
|
||||
assert self.codename in RepoCodenames
|
||||
self.pd = PackageDownloader(
|
||||
release=release_info.release_branch,
|
||||
commit_sha=release_info.commit_sha,
|
||||
version=release_info.version,
|
||||
)
|
||||
|
||||
def export_packages(self, codename: Optional[str] = None) -> None:
|
||||
assert self.pd.local_rpm_packages_ready(), "BUG: Packages are not downloaded"
|
||||
codename = codename or self.codename
|
||||
print(f"Start adding packages to [{codename}]")
|
||||
paths = [
|
||||
self.pd.LOCAL_DIR + "/" + file for file in self.pd.get_rpm_packages_files()
|
||||
]
|
||||
|
||||
dest_dir = Path(R2MountPoint.MOUNT_POINT) / "rpm" / codename
|
||||
|
||||
for package in paths:
|
||||
_copy_if_not_exists(Path(package), dest_dir)
|
||||
|
||||
commands = (
|
||||
f"createrepo_c --local-sqlite --workers=2 --update --verbose {dest_dir}",
|
||||
f"gpg --sign-with {self._SIGN_KEY} --detach-sign --batch --yes --armor {dest_dir / 'repodata' / 'repomd.xml'}",
|
||||
)
|
||||
print(f"Exporting RPM packages into [{codename}]")
|
||||
|
||||
for command in commands:
|
||||
print("Running command:")
|
||||
print(f" {command}")
|
||||
ShellRunner.run(command)
|
||||
|
||||
update_public_key = f"gpg --armor --export {self._SIGN_KEY}"
|
||||
pub_key_path = dest_dir / "repodata" / "repomd.xml.key"
|
||||
print("Updating repomd.xml.key")
|
||||
pub_key_path.write_text(ShellRunner.run(update_public_key)[1])
|
||||
if codename == RepoCodenames.LTS:
|
||||
self.export_packages(RepoCodenames.STABLE)
|
||||
ShellRunner.run("sync")
|
||||
|
||||
def test_packages(self):
|
||||
ShellRunner.run("docker pull fedora:latest")
|
||||
print(f"Test package installation, version [{self.version}]")
|
||||
cmd = f'docker run --rm fedora:latest /bin/bash -c "dnf -y install dnf-plugins-core && dnf config-manager --add-repo={self.repo_url} && dnf makecache && dnf -y install clickhouse-client-{self.version}-1"'
|
||||
print("Running test command:")
|
||||
print(f" {cmd}")
|
||||
ShellRunner.run(cmd)
|
||||
|
||||
|
||||
class TgzArtifactory:
|
||||
_TEST_REPO_URL = "https://pub-73dd1910f4284a81a02a67018967e028.r2.dev/tgz"
|
||||
_PROD_REPO_URL = "https://packages.clickhouse.com/tgz"
|
||||
|
||||
def __init__(self, release_info: ReleaseInfo, dry_run: bool):
|
||||
self.codename = release_info.codename
|
||||
self.version = release_info.version
|
||||
if dry_run:
|
||||
self.repo_url = self._TEST_REPO_URL
|
||||
else:
|
||||
self.repo_url = self._PROD_REPO_URL
|
||||
assert self.codename in RepoCodenames
|
||||
self.pd = PackageDownloader(
|
||||
release=release_info.release_branch,
|
||||
commit_sha=release_info.commit_sha,
|
||||
version=release_info.version,
|
||||
)
|
||||
|
||||
def export_packages(self, codename: Optional[str] = None) -> None:
|
||||
assert self.pd.local_tgz_packages_ready(), "BUG: Packages are not downloaded"
|
||||
codename = codename or self.codename
|
||||
|
||||
paths = [
|
||||
self.pd.LOCAL_DIR + "/" + file for file in self.pd.get_tgz_packages_files()
|
||||
]
|
||||
|
||||
dest_dir = Path(R2MountPoint.MOUNT_POINT) / "tgz" / codename
|
||||
|
||||
print(f"Exporting TGZ packages into [{codename}]")
|
||||
|
||||
for package in paths:
|
||||
_copy_if_not_exists(Path(package), dest_dir)
|
||||
|
||||
if codename == RepoCodenames.LTS:
|
||||
self.export_packages(RepoCodenames.STABLE)
|
||||
ShellRunner.run("sync")
|
||||
|
||||
def test_packages(self):
|
||||
tgz_file = "/tmp/tmp.tgz"
|
||||
tgz_sha_file = "/tmp/tmp.tgz.sha512"
|
||||
ShellRunner.run(
|
||||
f"curl -o {tgz_file} -f0 {self.repo_url}/stable/clickhouse-client-{self.version}-arm64.tgz"
|
||||
)
|
||||
ShellRunner.run(
|
||||
f"curl -o {tgz_sha_file} -f0 {self.repo_url}/stable/clickhouse-client-{self.version}-arm64.tgz.sha512"
|
||||
)
|
||||
expected_checksum = ShellRunner.run(f"cut -d ' ' -f 1 {tgz_sha_file}")
|
||||
actual_checksum = ShellRunner.run(f"sha512sum {tgz_file} | cut -d ' ' -f 1")
|
||||
assert (
|
||||
expected_checksum == actual_checksum
|
||||
), f"[{actual_checksum} != {expected_checksum}]"
|
||||
ShellRunner.run("rm /tmp/tmp.tgz*")
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
description="Adds release packages to the repository",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--infile",
|
||||
type=str,
|
||||
required=True,
|
||||
help="input file with release info",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--export-debian",
|
||||
action="store_true",
|
||||
help="Export debian packages to repository",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--export-rpm",
|
||||
action="store_true",
|
||||
help="Export rpm packages to repository",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--export-tgz",
|
||||
action="store_true",
|
||||
help="Export tgz packages to repository",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--test-debian",
|
||||
action="store_true",
|
||||
help="Test debian packages installation",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--test-rpm",
|
||||
action="store_true",
|
||||
help="Test rpm packages installation",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--test-tgz",
|
||||
action="store_true",
|
||||
help="Test tgz packages installation",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="Dry run mode",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
assert args.dry_run
|
||||
|
||||
release_info = ReleaseInfo.from_file(args.infile)
|
||||
"""
|
||||
Use S3FS. RCLONE has some errors with r2 remote which I didn't figure out how to resolve:
|
||||
ERROR : IO error: NotImplemented: versionId not implemented
|
||||
Failed to copy: NotImplemented: versionId not implemented
|
||||
"""
|
||||
mp = R2MountPoint(MountPointApp.S3FS, dry_run=args.dry_run)
|
||||
if args.export_debian:
|
||||
mp.init()
|
||||
DebianArtifactory(release_info, dry_run=args.dry_run).export_packages()
|
||||
mp.teardown()
|
||||
if args.export_rpm:
|
||||
mp.init()
|
||||
RpmArtifactory(release_info, dry_run=args.dry_run).export_packages()
|
||||
mp.teardown()
|
||||
if args.export_tgz:
|
||||
mp.init()
|
||||
TgzArtifactory(release_info, dry_run=args.dry_run).export_packages()
|
||||
mp.teardown()
|
||||
if args.test_debian:
|
||||
DebianArtifactory(release_info, dry_run=args.dry_run).test_packages()
|
||||
if args.test_tgz:
|
||||
TgzArtifactory(release_info, dry_run=args.dry_run).test_packages()
|
||||
if args.test_rpm:
|
||||
RpmArtifactory(release_info, dry_run=args.dry_run).test_packages()
|
@ -6,6 +6,7 @@ import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
@ -15,7 +16,7 @@ import upload_result_helper
|
||||
from build_check import get_release_or_pr
|
||||
from ci_config import CI
|
||||
from ci_metadata import CiMetadata
|
||||
from ci_utils import GHActions, normalize_string
|
||||
from ci_utils import GHActions, normalize_string, Utils
|
||||
from clickhouse_helper import (
|
||||
CiLogsCredentials,
|
||||
ClickHouseHelper,
|
||||
@ -53,6 +54,7 @@ from stopwatch import Stopwatch
|
||||
from tee_popen import TeePopen
|
||||
from ci_cache import CiCache
|
||||
from ci_settings import CiSettings
|
||||
from ci_buddy import CIBuddy
|
||||
from version_helper import get_version_from_repo
|
||||
|
||||
# pylint: disable=too-many-lines
|
||||
@ -262,6 +264,8 @@ def check_missing_images_on_dockerhub(
|
||||
|
||||
|
||||
def _pre_action(s3, indata, pr_info):
|
||||
print("Clear dmesg")
|
||||
Utils.clear_dmesg()
|
||||
CommitStatusData.cleanup()
|
||||
JobReport.cleanup()
|
||||
BuildResult.cleanup()
|
||||
@ -322,8 +326,8 @@ def _mark_success_action(
|
||||
# do nothing, exit without failure
|
||||
print(f"ERROR: no status file for job [{job}]")
|
||||
|
||||
if job_config.run_always or job_config.run_by_label:
|
||||
print(f"Job [{job}] runs always or by label in CI - do not cache")
|
||||
if job_config.run_by_label or not job_config.has_digest():
|
||||
print(f"Job [{job}] has no digest or run by label in CI - do not cache")
|
||||
else:
|
||||
if pr_info.is_master:
|
||||
pass
|
||||
@ -547,7 +551,17 @@ def _update_gh_statuses_action(indata: Dict, s3: S3Helper) -> None:
|
||||
except Exception as e:
|
||||
raise e
|
||||
print("Going to update overall CI report")
|
||||
set_status_comment(commit, pr_info)
|
||||
for retry in range(2):
|
||||
try:
|
||||
set_status_comment(commit, pr_info)
|
||||
break
|
||||
except Exception as e:
|
||||
print(
|
||||
f"WARNING: Failed to update CI Running status, attempt [{retry + 1}], exception [{e}]"
|
||||
)
|
||||
time.sleep(1)
|
||||
else:
|
||||
print("ERROR: All retry attempts failed.")
|
||||
print("... CI report update - done")
|
||||
|
||||
|
||||
@ -992,7 +1006,11 @@ def main() -> int:
|
||||
ci_settings,
|
||||
args.skip_jobs,
|
||||
)
|
||||
|
||||
ci_cache.print_status()
|
||||
if IS_CI and pr_info.is_pr and not ci_settings.no_ci_cache:
|
||||
ci_cache.filter_out_not_affected_jobs()
|
||||
ci_cache.print_status()
|
||||
|
||||
if IS_CI and not pr_info.is_merge_queue:
|
||||
# wait for pending jobs to be finished, await_jobs is a long blocking call
|
||||
@ -1028,6 +1046,7 @@ def main() -> int:
|
||||
elif args.pre:
|
||||
assert indata, "Run config must be provided via --infile"
|
||||
_pre_action(s3, indata, pr_info)
|
||||
JobReport.create_pre_report().dump()
|
||||
|
||||
### RUN action: start
|
||||
elif args.run:
|
||||
@ -1079,6 +1098,16 @@ def main() -> int:
|
||||
print(status)
|
||||
print("::endgroup::")
|
||||
previous_status = status.state
|
||||
print("Create dummy job report with job_skipped flag")
|
||||
JobReport(
|
||||
status=status.state,
|
||||
description="",
|
||||
test_results=[],
|
||||
start_time="",
|
||||
duration=0.0,
|
||||
additional_files=[],
|
||||
job_skipped=True,
|
||||
).dump()
|
||||
|
||||
# ci cache check
|
||||
if not previous_status and not ci_settings.no_ci_cache:
|
||||
@ -1114,12 +1143,22 @@ def main() -> int:
|
||||
exit_code = 1
|
||||
else:
|
||||
exit_code = _run_test(check_name, args.run_command)
|
||||
job_report = JobReport.load() if JobReport.exist() else None
|
||||
assert (
|
||||
job_report
|
||||
), "BUG. There must be job report either real report, or pre-report if job was killed"
|
||||
job_report.exit_code = exit_code
|
||||
job_report.dump()
|
||||
### RUN action: end
|
||||
|
||||
### POST action: start
|
||||
elif args.post:
|
||||
job_report = JobReport.load() if JobReport.exist() else None
|
||||
if job_report:
|
||||
assert (
|
||||
job_report
|
||||
), "BUG. There must be job report either real report, or pre-report if job was killed"
|
||||
if not job_report.job_skipped and not job_report.pre_report:
|
||||
# it's a real job report
|
||||
ch_helper = ClickHouseHelper()
|
||||
check_url = ""
|
||||
|
||||
@ -1219,9 +1258,32 @@ def main() -> int:
|
||||
indata["build"],
|
||||
ch_helper,
|
||||
)
|
||||
else:
|
||||
# no job report
|
||||
print(f"No job report for {[args.job_name]} - do nothing")
|
||||
elif job_report.job_skipped:
|
||||
print(f"Skipped after rerun check {[args.job_name]} - do nothing")
|
||||
elif job_report.job_skipped:
|
||||
print(f"Job was skipped {[args.job_name]} - do nothing")
|
||||
elif job_report.pre_report:
|
||||
print(f"ERROR: Job was killed - generate evidence")
|
||||
job_report.update_duration()
|
||||
# Job was killed!
|
||||
if Utils.is_killed_with_oom():
|
||||
print("WARNING: OOM while job execution")
|
||||
error = f"Out Of Memory, exit_code {job_report.exit_code}, after {job_report.duration}s"
|
||||
else:
|
||||
error = f"Unknown, exit_code {job_report.exit_code}, after {job_report.duration}s"
|
||||
CIBuddy().post_error(error, job_name=_get_ext_check_name(args.job_name))
|
||||
if CI.is_test_job(args.job_name):
|
||||
gh = GitHub(get_best_robot_token(), per_page=100)
|
||||
commit = get_commit(gh, pr_info.sha)
|
||||
post_commit_status(
|
||||
commit,
|
||||
ERROR,
|
||||
"",
|
||||
"Error: " + error,
|
||||
_get_ext_check_name(args.job_name),
|
||||
pr_info,
|
||||
dump_to_file=True,
|
||||
)
|
||||
### POST action: end
|
||||
|
||||
### MARK SUCCESS action: start
|
||||
|
88
tests/ci/ci_buddy.py
Normal file
88
tests/ci/ci_buddy.py
Normal file
@ -0,0 +1,88 @@
|
||||
import json
|
||||
import os
|
||||
|
||||
import boto3
|
||||
import requests
|
||||
from botocore.exceptions import ClientError
|
||||
|
||||
from pr_info import PRInfo
|
||||
from ci_utils import Shell
|
||||
|
||||
|
||||
class CIBuddy:
|
||||
_HEADERS = {"Content-Type": "application/json"}
|
||||
|
||||
def __init__(self, dry_run=False):
|
||||
self.repo = os.getenv("GITHUB_REPOSITORY", "")
|
||||
self.dry_run = dry_run
|
||||
res = self._get_webhooks()
|
||||
self.test_channel = ""
|
||||
self.dev_ci_channel = ""
|
||||
if res:
|
||||
self.test_channel = json.loads(res)["test_channel"]
|
||||
self.dev_ci_channel = json.loads(res)["ci_channel"]
|
||||
self.job_name = os.getenv("CHECK_NAME", "unknown")
|
||||
pr_info = PRInfo()
|
||||
self.pr_number = pr_info.number
|
||||
self.head_ref = pr_info.head_ref
|
||||
self.commit_url = pr_info.commit_html_url
|
||||
|
||||
@staticmethod
|
||||
def _get_webhooks():
|
||||
name = "ci_buddy_web_hooks"
|
||||
|
||||
session = boto3.Session(region_name="us-east-1") # Replace with your region
|
||||
ssm_client = session.client("ssm")
|
||||
json_string = None
|
||||
try:
|
||||
response = ssm_client.get_parameter(
|
||||
Name=name,
|
||||
WithDecryption=True, # Set to True if the parameter is a SecureString
|
||||
)
|
||||
json_string = response["Parameter"]["Value"]
|
||||
except ClientError as e:
|
||||
print(f"An error occurred: {e}")
|
||||
|
||||
return json_string
|
||||
|
||||
def post(self, message, dry_run=None):
|
||||
if dry_run is None:
|
||||
dry_run = self.dry_run
|
||||
print(f"Posting slack message, dry_run [{dry_run}]")
|
||||
if dry_run:
|
||||
url = self.test_channel
|
||||
else:
|
||||
url = self.dev_ci_channel
|
||||
data = {"text": message}
|
||||
try:
|
||||
requests.post(url, headers=self._HEADERS, data=json.dumps(data), timeout=10)
|
||||
except Exception as e:
|
||||
print(f"ERROR: Failed to post message, ex {e}")
|
||||
|
||||
def post_error(self, error_description, job_name="", with_instance_info=True):
|
||||
instance_id, instance_type = "unknown", "unknown"
|
||||
if with_instance_info:
|
||||
instance_id = Shell.run("ec2metadata --instance-id") or instance_id
|
||||
instance_type = Shell.run("ec2metadata --instance-type") or instance_type
|
||||
if not job_name:
|
||||
job_name = os.getenv("CHECK_NAME", "unknown")
|
||||
line_err = f":red_circle: *Error: {error_description}*\n\n"
|
||||
line_ghr = f" *Runner:* `{instance_type}`, `{instance_id}`\n"
|
||||
line_job = f" *Job:* `{job_name}`\n"
|
||||
line_pr_ = f" *PR:* <https://github.com/{self.repo}/pull/{self.pr_number}|#{self.pr_number}>\n"
|
||||
line_br_ = f" *Branch:* `{self.head_ref}`, <{self.commit_url}|commit>\n"
|
||||
message = line_err
|
||||
message += line_job
|
||||
if with_instance_info:
|
||||
message += line_ghr
|
||||
if self.pr_number > 0:
|
||||
message += line_pr_
|
||||
else:
|
||||
message += line_br_
|
||||
self.post(message)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# test
|
||||
buddy = CIBuddy(dry_run=True)
|
||||
buddy.post_error("Out of memory")
|
@ -520,6 +520,35 @@ class CiCache:
|
||||
self.RecordType.SUCCESSFUL, job, batch, num_batches, release_branch
|
||||
)
|
||||
|
||||
def has_evidence(self, job: str, job_config: CI.JobConfig) -> bool:
|
||||
"""
|
||||
checks if the job has been seen in master/release CI
|
||||
function is to be used to check if change did not affect the job
|
||||
:param job_config:
|
||||
:param job:
|
||||
:return:
|
||||
"""
|
||||
return (
|
||||
self.is_successful(
|
||||
job=job,
|
||||
batch=0,
|
||||
num_batches=job_config.num_batches,
|
||||
release_branch=True,
|
||||
)
|
||||
or self.is_pending(
|
||||
job=job,
|
||||
batch=0,
|
||||
num_batches=job_config.num_batches,
|
||||
release_branch=True,
|
||||
)
|
||||
or self.is_failed(
|
||||
job=job,
|
||||
batch=0,
|
||||
num_batches=job_config.num_batches,
|
||||
release_branch=True,
|
||||
)
|
||||
)
|
||||
|
||||
def is_failed(
|
||||
self, job: str, batch: int, num_batches: int, release_branch: bool
|
||||
) -> bool:
|
||||
@ -609,7 +638,7 @@ class CiCache:
|
||||
pushes pending records for all jobs that supposed to be run
|
||||
"""
|
||||
for job, job_config in self.jobs_to_do.items():
|
||||
if job_config.run_always:
|
||||
if not job_config.has_digest():
|
||||
continue
|
||||
pending_state = PendingState(time.time(), run_url=GITHUB_RUN_URL)
|
||||
assert job_config.batches
|
||||
@ -674,6 +703,51 @@ class CiCache:
|
||||
bucket=S3_BUILDS_BUCKET, file_path=result_json_path, s3_path=s3_path
|
||||
)
|
||||
|
||||
def filter_out_not_affected_jobs(self):
|
||||
"""
|
||||
Filter is to be applied in PRs to remove jobs that are not affected by the change
|
||||
:return:
|
||||
"""
|
||||
remove_from_to_do = []
|
||||
required_builds = []
|
||||
for job_name, job_config in self.jobs_to_do.items():
|
||||
if CI.is_test_job(job_name) and job_name != CI.JobNames.BUILD_CHECK:
|
||||
if job_config.reference_job_name:
|
||||
reference_name = job_config.reference_job_name
|
||||
reference_config = CI.JOB_CONFIGS[reference_name]
|
||||
else:
|
||||
reference_name = job_name
|
||||
reference_config = job_config
|
||||
if self.has_evidence(
|
||||
job=reference_name,
|
||||
job_config=reference_config,
|
||||
):
|
||||
remove_from_to_do.append(job_name)
|
||||
else:
|
||||
required_builds += (
|
||||
job_config.required_builds if job_config.required_builds else []
|
||||
)
|
||||
|
||||
has_builds_to_do = False
|
||||
for job_name, job_config in self.jobs_to_do.items():
|
||||
if CI.is_build_job(job_name):
|
||||
if job_name not in required_builds:
|
||||
remove_from_to_do.append(job_name)
|
||||
else:
|
||||
has_builds_to_do = True
|
||||
|
||||
if not has_builds_to_do:
|
||||
remove_from_to_do.append(CI.JobNames.BUILD_CHECK)
|
||||
|
||||
for job in remove_from_to_do:
|
||||
print(f"Filter job [{job}] - not affected by the change")
|
||||
if job in self.jobs_to_do:
|
||||
del self.jobs_to_do[job]
|
||||
if job in self.jobs_to_wait:
|
||||
del self.jobs_to_wait[job]
|
||||
if job in self.jobs_to_skip:
|
||||
self.jobs_to_skip.remove(job)
|
||||
|
||||
def await_pending_jobs(self, is_release: bool, dry_run: bool = False) -> None:
|
||||
"""
|
||||
await pending jobs to be finished
|
||||
@ -691,14 +765,19 @@ class CiCache:
|
||||
MAX_JOB_NUM_TO_WAIT = 3
|
||||
round_cnt = 0
|
||||
|
||||
# FIXME: temporary experiment: lets enable await for PR' workflows but for a shorter time
|
||||
def _has_build_job():
|
||||
for job in self.jobs_to_wait:
|
||||
if CI.is_build_job(job):
|
||||
return True
|
||||
return False
|
||||
|
||||
if not is_release:
|
||||
MAX_ROUNDS_TO_WAIT = 3
|
||||
# in PRs we can wait only for builds, TIMEOUT*MAX_ROUNDS_TO_WAIT=100min is enough
|
||||
MAX_ROUNDS_TO_WAIT = 2
|
||||
|
||||
while (
|
||||
len(self.jobs_to_wait) > MAX_JOB_NUM_TO_WAIT
|
||||
and round_cnt < MAX_ROUNDS_TO_WAIT
|
||||
):
|
||||
len(self.jobs_to_wait) > MAX_JOB_NUM_TO_WAIT or _has_build_job()
|
||||
) and round_cnt < MAX_ROUNDS_TO_WAIT:
|
||||
round_cnt += 1
|
||||
GHActions.print_in_group(
|
||||
f"Wait pending jobs, round [{round_cnt}/{MAX_ROUNDS_TO_WAIT}]:",
|
||||
|
@ -13,6 +13,9 @@ class CI:
|
||||
each config item in the below dicts should be an instance of JobConfig class or inherited from it
|
||||
"""
|
||||
|
||||
MAX_TOTAL_FAILURES_BEFORE_BLOCKING_CI = 5
|
||||
MAX_TOTAL_FAILURES_PER_JOB_BEFORE_BLOCKING_CI = 2
|
||||
|
||||
# reimport types to CI class so that they visible as CI.* and mypy is happy
|
||||
# pylint:disable=useless-import-alias,reimported,import-outside-toplevel
|
||||
from ci_definitions import BuildConfig as BuildConfig
|
||||
@ -410,7 +413,9 @@ class CI:
|
||||
release_only=True,
|
||||
),
|
||||
JobNames.INTEGRATION_TEST_FLAKY: CommonJobConfigs.INTEGRATION_TEST.with_properties(
|
||||
required_builds=[BuildNames.PACKAGE_ASAN], pr_only=True
|
||||
required_builds=[BuildNames.PACKAGE_ASAN],
|
||||
pr_only=True,
|
||||
reference_job_name=JobNames.INTEGRATION_TEST_TSAN,
|
||||
),
|
||||
JobNames.COMPATIBILITY_TEST: CommonJobConfigs.COMPATIBILITY_TEST.with_properties(
|
||||
required_builds=[BuildNames.PACKAGE_RELEASE],
|
||||
@ -452,7 +457,10 @@ class CI:
|
||||
required_builds=[BuildNames.PACKAGE_UBSAN],
|
||||
),
|
||||
JobNames.STATELESS_TEST_FLAKY_ASAN: CommonJobConfigs.STATELESS_TEST.with_properties(
|
||||
required_builds=[BuildNames.PACKAGE_ASAN], pr_only=True, timeout=3600
|
||||
required_builds=[BuildNames.PACKAGE_ASAN],
|
||||
pr_only=True,
|
||||
timeout=3600,
|
||||
reference_job_name=JobNames.STATELESS_TEST_RELEASE,
|
||||
),
|
||||
JobNames.JEPSEN_KEEPER: JobConfig(
|
||||
required_builds=[BuildNames.BINARY_RELEASE],
|
||||
@ -637,7 +645,7 @@ class CI:
|
||||
|
||||
@classmethod
|
||||
def is_test_job(cls, job: str) -> bool:
|
||||
return not cls.is_build_job(job) and job != cls.JobNames.STYLE_CHECK
|
||||
return not cls.is_build_job(job)
|
||||
|
||||
@classmethod
|
||||
def is_docs_job(cls, job: str) -> bool:
|
||||
|
@ -284,8 +284,12 @@ class JobConfig:
|
||||
|
||||
# GH Runner type (tag from @Runners)
|
||||
runner_type: str
|
||||
# used for config validation in ci unittests
|
||||
# used in ci unittests for config validation
|
||||
job_name_keyword: str = ""
|
||||
# name of another job that (if provided) should be used to check if job was affected by the change or not (in CiCache.has_evidence(job=@reference_job_name) call)
|
||||
# for example: "Stateless flaky check" can use reference_job_name="Stateless tests (release)". "Stateless flaky check" does not run on master
|
||||
# and there cannot be an evidence for it, so instead "Stateless tests (release)" job name can be used to check the evidence
|
||||
reference_job_name: str = ""
|
||||
# builds required for the job (applicable for test jobs)
|
||||
required_builds: Optional[List[str]] = None
|
||||
# build config for the build job (applicable for builds)
|
||||
@ -327,6 +331,9 @@ class JobConfig:
|
||||
assert self.required_builds
|
||||
return self.required_builds[0]
|
||||
|
||||
def has_digest(self) -> bool:
|
||||
return self.digest != DigestConfig()
|
||||
|
||||
|
||||
class CommonJobConfigs:
|
||||
"""
|
||||
@ -378,7 +385,7 @@ class CommonJobConfigs:
|
||||
),
|
||||
run_command='functional_test_check.py "$CHECK_NAME"',
|
||||
runner_type=Runners.FUNC_TESTER,
|
||||
timeout=10800,
|
||||
timeout=7200,
|
||||
)
|
||||
STATEFUL_TEST = JobConfig(
|
||||
job_name_keyword="stateful",
|
||||
@ -440,7 +447,12 @@ class CommonJobConfigs:
|
||||
)
|
||||
ASTFUZZER_TEST = JobConfig(
|
||||
job_name_keyword="ast",
|
||||
digest=DigestConfig(),
|
||||
digest=DigestConfig(
|
||||
include_paths=[
|
||||
"./tests/ci/ast_fuzzer_check.py",
|
||||
],
|
||||
docker=["clickhouse/fuzzer"],
|
||||
),
|
||||
run_command="ast_fuzzer_check.py",
|
||||
run_always=True,
|
||||
runner_type=Runners.FUZZER_UNIT_TESTER,
|
||||
|
@ -1,7 +1,9 @@
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterator, List, Union
|
||||
from typing import Any, Iterator, List, Union, Optional
|
||||
|
||||
|
||||
class WithIter(type):
|
||||
@ -42,3 +44,67 @@ class GHActions:
|
||||
for line in lines:
|
||||
print(line)
|
||||
print("::endgroup::")
|
||||
|
||||
|
||||
class Shell:
|
||||
@classmethod
|
||||
def run_strict(cls, command):
|
||||
subprocess.run(
|
||||
command + " 2>&1",
|
||||
shell=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def run(cls, command):
|
||||
res = ""
|
||||
result = subprocess.run(
|
||||
command,
|
||||
shell=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
if result.returncode == 0:
|
||||
res = result.stdout
|
||||
return res.strip()
|
||||
|
||||
@classmethod
|
||||
def check(cls, command):
|
||||
result = subprocess.run(
|
||||
command + " 2>&1",
|
||||
shell=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
return result.returncode == 0
|
||||
|
||||
|
||||
class Utils:
|
||||
@staticmethod
|
||||
def get_failed_tests_number(description: str) -> Optional[int]:
|
||||
description = description.lower()
|
||||
|
||||
pattern = r"fail:\s*(\d+)\s*(?=,|$)"
|
||||
match = re.search(pattern, description)
|
||||
if match:
|
||||
return int(match.group(1))
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def is_killed_with_oom():
|
||||
if Shell.check(
|
||||
"sudo dmesg -T | grep -q -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE'"
|
||||
):
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def clear_dmesg():
|
||||
Shell.run("sudo dmesg --clear ||:")
|
||||
|
710
tests/ci/create_release.py
Executable file
710
tests/ci/create_release.py
Executable file
@ -0,0 +1,710 @@
|
||||
import argparse
|
||||
import dataclasses
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
from contextlib import contextmanager
|
||||
from copy import copy
|
||||
from pathlib import Path
|
||||
from typing import Iterator, List
|
||||
|
||||
from git_helper import Git, GIT_PREFIX
|
||||
from ssh import SSHAgent
|
||||
from env_helper import GITHUB_REPOSITORY, S3_BUILDS_BUCKET
|
||||
from s3_helper import S3Helper
|
||||
from autoscale_runners_lambda.lambda_shared.pr import Labels
|
||||
from version_helper import (
|
||||
FILE_WITH_VERSION_PATH,
|
||||
GENERATED_CONTRIBUTORS,
|
||||
get_abs_path,
|
||||
get_version_from_repo,
|
||||
update_cmake_version,
|
||||
update_contributors,
|
||||
VersionType,
|
||||
)
|
||||
from ci_config import CI
|
||||
|
||||
CMAKE_PATH = get_abs_path(FILE_WITH_VERSION_PATH)
|
||||
CONTRIBUTORS_PATH = get_abs_path(GENERATED_CONTRIBUTORS)
|
||||
|
||||
|
||||
class ShellRunner:
|
||||
|
||||
@classmethod
|
||||
def run(
|
||||
cls, command, check_retcode=True, print_output=True, async_=False, dry_run=False
|
||||
):
|
||||
if dry_run:
|
||||
print(f"Dry-run: Would run shell command: [{command}]")
|
||||
return 0, ""
|
||||
print(f"Running shell command: [{command}]")
|
||||
if async_:
|
||||
subprocess.Popen(command.split(" ")) # pylint:disable=consider-using-with
|
||||
return 0, ""
|
||||
result = subprocess.run(
|
||||
command + " 2>&1",
|
||||
shell=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
if print_output:
|
||||
print(result.stdout)
|
||||
if check_retcode:
|
||||
assert result.returncode == 0, f"Return code [{result.returncode}]"
|
||||
return result.returncode, result.stdout
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class ReleaseInfo:
|
||||
version: str
|
||||
release_tag: str
|
||||
release_branch: str
|
||||
commit_sha: str
|
||||
# lts or stable
|
||||
codename: str
|
||||
|
||||
@staticmethod
|
||||
def from_file(file_path: str) -> "ReleaseInfo":
|
||||
with open(file_path, "r", encoding="utf-8") as json_file:
|
||||
res = json.load(json_file)
|
||||
return ReleaseInfo(**res)
|
||||
|
||||
@staticmethod
|
||||
def prepare(commit_ref: str, release_type: str, outfile: str) -> None:
|
||||
Path(outfile).parent.mkdir(parents=True, exist_ok=True)
|
||||
Path(outfile).unlink(missing_ok=True)
|
||||
version = None
|
||||
release_branch = None
|
||||
release_tag = None
|
||||
codename = None
|
||||
assert release_type in ("patch", "new")
|
||||
if release_type == "new":
|
||||
# check commit_ref is right and on a right branch
|
||||
ShellRunner.run(
|
||||
f"git merge-base --is-ancestor origin/{commit_ref} origin/master"
|
||||
)
|
||||
with checkout(commit_ref):
|
||||
_, commit_sha = ShellRunner.run(f"git rev-parse {commit_ref}")
|
||||
# Git() must be inside "with checkout" contextmanager
|
||||
git = Git()
|
||||
version = get_version_from_repo(git=git)
|
||||
release_branch = "master"
|
||||
expected_prev_tag = f"v{version.major}.{version.minor}.1.1-new"
|
||||
version.bump().with_description(VersionType.NEW)
|
||||
assert (
|
||||
git.latest_tag == expected_prev_tag
|
||||
), f"BUG: latest tag [{git.latest_tag}], expected [{expected_prev_tag}]"
|
||||
release_tag = version.describe
|
||||
codename = (
|
||||
VersionType.STABLE
|
||||
) # dummy value (artifactory won't be updated for new release)
|
||||
if release_type == "patch":
|
||||
with checkout(commit_ref):
|
||||
_, commit_sha = ShellRunner.run(f"git rev-parse {commit_ref}")
|
||||
# Git() must be inside "with checkout" contextmanager
|
||||
git = Git()
|
||||
version = get_version_from_repo(git=git)
|
||||
codename = version.get_stable_release_type()
|
||||
version.with_description(codename)
|
||||
release_branch = f"{version.major}.{version.minor}"
|
||||
release_tag = version.describe
|
||||
ShellRunner.run(f"{GIT_PREFIX} fetch origin {release_branch} --tags")
|
||||
# check commit is right and on a right branch
|
||||
ShellRunner.run(
|
||||
f"git merge-base --is-ancestor {commit_ref} origin/{release_branch}"
|
||||
)
|
||||
if version.patch == 1:
|
||||
expected_version = copy(version)
|
||||
expected_version.bump()
|
||||
expected_tag_prefix = (
|
||||
f"v{expected_version.major}.{expected_version.minor}-"
|
||||
)
|
||||
expected_tag_suffix = "-new"
|
||||
else:
|
||||
expected_tag_prefix = (
|
||||
f"v{version.major}.{version.minor}.{version.patch-1}."
|
||||
)
|
||||
expected_tag_suffix = f"-{version.get_stable_release_type()}"
|
||||
if git.latest_tag.startswith(
|
||||
expected_tag_prefix
|
||||
) and git.latest_tag.endswith(expected_tag_suffix):
|
||||
pass
|
||||
else:
|
||||
assert (
|
||||
False
|
||||
), f"BUG: Unexpected latest tag [{git.latest_tag}] expected [{expected_tag_prefix}*{expected_tag_suffix}]"
|
||||
|
||||
assert (
|
||||
release_branch
|
||||
and commit_sha
|
||||
and release_tag
|
||||
and version
|
||||
and codename in ("lts", "stable")
|
||||
)
|
||||
res = ReleaseInfo(
|
||||
release_branch=release_branch,
|
||||
commit_sha=commit_sha,
|
||||
release_tag=release_tag,
|
||||
version=version.string,
|
||||
codename=codename,
|
||||
)
|
||||
with open(outfile, "w", encoding="utf-8") as f:
|
||||
print(json.dumps(dataclasses.asdict(res), indent=2), file=f)
|
||||
|
||||
def push_release_tag(self, dry_run: bool) -> None:
|
||||
if dry_run:
|
||||
# remove locally created tag from prev run
|
||||
ShellRunner.run(
|
||||
f"{GIT_PREFIX} tag -l | grep -q {self.release_tag} && git tag -d {self.release_tag} ||:"
|
||||
)
|
||||
# Create release tag
|
||||
print(
|
||||
f"Create and push release tag [{self.release_tag}], commit [{self.commit_sha}]"
|
||||
)
|
||||
tag_message = f"Release {self.release_tag}"
|
||||
ShellRunner.run(
|
||||
f"{GIT_PREFIX} tag -a -m '{tag_message}' {self.release_tag} {self.commit_sha}"
|
||||
)
|
||||
cmd_push_tag = f"{GIT_PREFIX} push origin {self.release_tag}:{self.release_tag}"
|
||||
ShellRunner.run(cmd_push_tag, dry_run=dry_run)
|
||||
|
||||
@staticmethod
|
||||
def _create_gh_label(label: str, color_hex: str, dry_run: bool) -> None:
|
||||
cmd = f"gh api repos/{GITHUB_REPOSITORY}/labels -f name={label} -f color={color_hex}"
|
||||
ShellRunner.run(cmd, dry_run=dry_run)
|
||||
|
||||
def push_new_release_branch(self, dry_run: bool) -> None:
|
||||
assert (
|
||||
self.release_branch == "master"
|
||||
), "New release branch can be created only for release type [new]"
|
||||
git = Git()
|
||||
version = get_version_from_repo(git=git)
|
||||
new_release_branch = f"{version.major}.{version.minor}"
|
||||
stable_release_type = version.get_stable_release_type()
|
||||
version_after_release = copy(version)
|
||||
version_after_release.bump()
|
||||
assert (
|
||||
version_after_release.string == self.version
|
||||
), f"Unexpected current version in git, must precede [{self.version}] by one step, actual [{version.string}]"
|
||||
if dry_run:
|
||||
# remove locally created branch from prev run
|
||||
ShellRunner.run(
|
||||
f"{GIT_PREFIX} branch -l | grep -q {new_release_branch} && git branch -d {new_release_branch} ||:"
|
||||
)
|
||||
print(
|
||||
f"Create and push new release branch [{new_release_branch}], commit [{self.commit_sha}]"
|
||||
)
|
||||
with checkout(self.release_branch):
|
||||
with checkout_new(new_release_branch):
|
||||
pr_labels = f"--label {Labels.RELEASE}"
|
||||
if stable_release_type == VersionType.LTS:
|
||||
pr_labels += f" --label {Labels.RELEASE_LTS}"
|
||||
cmd_push_branch = (
|
||||
f"{GIT_PREFIX} push --set-upstream origin {new_release_branch}"
|
||||
)
|
||||
ShellRunner.run(cmd_push_branch, dry_run=dry_run)
|
||||
|
||||
print("Create and push backport tags for new release branch")
|
||||
ReleaseInfo._create_gh_label(
|
||||
f"v{new_release_branch}-must-backport", "10dbed", dry_run=dry_run
|
||||
)
|
||||
ReleaseInfo._create_gh_label(
|
||||
f"v{new_release_branch}-affected", "c2bfff", dry_run=dry_run
|
||||
)
|
||||
ShellRunner.run(
|
||||
f"""gh pr create --repo {GITHUB_REPOSITORY} --title 'Release pull request for branch {new_release_branch}'
|
||||
--head {new_release_branch} {pr_labels}
|
||||
--body 'This PullRequest is a part of ClickHouse release cycle. It is used by CI system only. Do not perform any changes with it.'
|
||||
""",
|
||||
dry_run=dry_run,
|
||||
)
|
||||
|
||||
def update_version_and_contributors_list(self, dry_run: bool) -> None:
|
||||
# Bump version, update contributors list, create PR
|
||||
branch_upd_version_contributors = f"bump_version_{self.version}"
|
||||
with checkout(self.commit_sha):
|
||||
git = Git()
|
||||
version = get_version_from_repo(git=git)
|
||||
if self.release_branch == "master":
|
||||
version.bump()
|
||||
version.with_description(VersionType.TESTING)
|
||||
else:
|
||||
version.with_description(version.get_stable_release_type())
|
||||
assert (
|
||||
version.string == self.version
|
||||
), f"BUG: version in release info does not match version in git commit, expected [{self.version}], got [{version.string}]"
|
||||
with checkout(self.release_branch):
|
||||
with checkout_new(branch_upd_version_contributors):
|
||||
update_cmake_version(version)
|
||||
update_contributors(raise_error=True)
|
||||
cmd_commit_version_upd = f"{GIT_PREFIX} commit '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}' -m 'Update autogenerated version to {self.version} and contributors'"
|
||||
cmd_push_branch = f"{GIT_PREFIX} push --set-upstream origin {branch_upd_version_contributors}"
|
||||
body_file = get_abs_path(".github/PULL_REQUEST_TEMPLATE.md")
|
||||
actor = os.getenv("GITHUB_ACTOR", "") or "me"
|
||||
cmd_create_pr = f"gh pr create --repo {GITHUB_REPOSITORY} --title 'Update version after release' --head {branch_upd_version_contributors} --base {self.release_branch} --body-file '{body_file} --label 'do not test' --assignee @{actor}"
|
||||
ShellRunner.run(cmd_commit_version_upd, dry_run=dry_run)
|
||||
ShellRunner.run(cmd_push_branch, dry_run=dry_run)
|
||||
ShellRunner.run(cmd_create_pr, dry_run=dry_run)
|
||||
if dry_run:
|
||||
ShellRunner.run(
|
||||
f"{GIT_PREFIX} diff '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'"
|
||||
)
|
||||
ShellRunner.run(
|
||||
f"{GIT_PREFIX} checkout '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'"
|
||||
)
|
||||
|
||||
def create_gh_release(self, packages_files: List[str], dry_run: bool) -> None:
|
||||
repo = os.getenv("GITHUB_REPOSITORY")
|
||||
assert repo
|
||||
cmds = []
|
||||
cmds.append(
|
||||
f"gh release create --repo {repo} --title 'Release {self.release_tag}' {self.release_tag}"
|
||||
)
|
||||
for file in packages_files:
|
||||
cmds.append(f"gh release upload {self.release_tag} {file}")
|
||||
if not dry_run:
|
||||
for cmd in cmds:
|
||||
ShellRunner.run(cmd)
|
||||
else:
|
||||
print("Dry-run, would run commands:")
|
||||
print("\n * ".join(cmds))
|
||||
|
||||
|
||||
class RepoTypes:
|
||||
RPM = "rpm"
|
||||
DEBIAN = "deb"
|
||||
TGZ = "tgz"
|
||||
|
||||
|
||||
class PackageDownloader:
|
||||
PACKAGES = (
|
||||
"clickhouse-client",
|
||||
"clickhouse-common-static",
|
||||
"clickhouse-common-static-dbg",
|
||||
"clickhouse-keeper",
|
||||
"clickhouse-keeper-dbg",
|
||||
"clickhouse-server",
|
||||
)
|
||||
|
||||
EXTRA_PACKAGES = (
|
||||
"clickhouse-library-bridge",
|
||||
"clickhouse-odbc-bridge",
|
||||
)
|
||||
PACKAGE_TYPES = (CI.BuildNames.PACKAGE_RELEASE, CI.BuildNames.PACKAGE_AARCH64)
|
||||
MACOS_PACKAGE_TO_BIN_SUFFIX = {
|
||||
CI.BuildNames.BINARY_DARWIN: "macos",
|
||||
CI.BuildNames.BINARY_DARWIN_AARCH64: "macos-aarch64",
|
||||
}
|
||||
LOCAL_DIR = "/tmp/packages"
|
||||
|
||||
@classmethod
|
||||
def _get_arch_suffix(cls, package_arch, repo_type):
|
||||
if package_arch == CI.BuildNames.PACKAGE_RELEASE:
|
||||
return (
|
||||
"amd64" if repo_type in (RepoTypes.DEBIAN, RepoTypes.TGZ) else "x86_64"
|
||||
)
|
||||
elif package_arch == CI.BuildNames.PACKAGE_AARCH64:
|
||||
return (
|
||||
"arm64" if repo_type in (RepoTypes.DEBIAN, RepoTypes.TGZ) else "aarch64"
|
||||
)
|
||||
else:
|
||||
assert False, "BUG"
|
||||
|
||||
def __init__(self, release, commit_sha, version):
|
||||
assert version.startswith(release), "Invalid release branch or version"
|
||||
major, minor = map(int, release.split("."))
|
||||
self.package_names = list(self.PACKAGES)
|
||||
if major > 24 or (major == 24 and minor > 3):
|
||||
self.package_names += list(self.EXTRA_PACKAGES)
|
||||
self.release = release
|
||||
self.commit_sha = commit_sha
|
||||
self.version = version
|
||||
self.s3 = S3Helper()
|
||||
self.deb_package_files = []
|
||||
self.rpm_package_files = []
|
||||
self.tgz_package_files = []
|
||||
# just binaries for macos
|
||||
self.macos_package_files = ["clickhouse-macos", "clickhouse-macos-aarch64"]
|
||||
self.file_to_type = {}
|
||||
|
||||
ShellRunner.run(f"mkdir -p {self.LOCAL_DIR}")
|
||||
|
||||
for package_type in self.PACKAGE_TYPES:
|
||||
for package in self.package_names:
|
||||
deb_package_file_name = f"{package}_{self.version}_{self._get_arch_suffix(package_type, RepoTypes.DEBIAN)}.deb"
|
||||
self.deb_package_files.append(deb_package_file_name)
|
||||
self.file_to_type[deb_package_file_name] = package_type
|
||||
|
||||
rpm_package_file_name = f"{package}-{self.version}.{self._get_arch_suffix(package_type, RepoTypes.RPM)}.rpm"
|
||||
self.rpm_package_files.append(rpm_package_file_name)
|
||||
self.file_to_type[rpm_package_file_name] = package_type
|
||||
|
||||
tgz_package_file_name = f"{package}-{self.version}-{self._get_arch_suffix(package_type, RepoTypes.TGZ)}.tgz"
|
||||
self.tgz_package_files.append(tgz_package_file_name)
|
||||
self.file_to_type[tgz_package_file_name] = package_type
|
||||
tgz_package_file_name += ".sha512"
|
||||
self.tgz_package_files.append(tgz_package_file_name)
|
||||
self.file_to_type[tgz_package_file_name] = package_type
|
||||
|
||||
def get_deb_packages_files(self):
|
||||
return self.deb_package_files
|
||||
|
||||
def get_rpm_packages_files(self):
|
||||
return self.rpm_package_files
|
||||
|
||||
def get_tgz_packages_files(self):
|
||||
return self.tgz_package_files
|
||||
|
||||
def get_macos_packages_files(self):
|
||||
return self.macos_package_files
|
||||
|
||||
def get_packages_names(self):
|
||||
return self.package_names
|
||||
|
||||
def get_all_packages_files(self):
|
||||
assert self.local_tgz_packages_ready()
|
||||
assert self.local_deb_packages_ready()
|
||||
assert self.local_rpm_packages_ready()
|
||||
assert self.local_macos_packages_ready()
|
||||
res = []
|
||||
for package_file in (
|
||||
self.deb_package_files
|
||||
+ self.rpm_package_files
|
||||
+ self.tgz_package_files
|
||||
+ self.macos_package_files
|
||||
):
|
||||
res.append(self.LOCAL_DIR + "/" + package_file)
|
||||
return res
|
||||
|
||||
def run(self):
|
||||
ShellRunner.run(f"rm -rf {self.LOCAL_DIR}/*")
|
||||
for package_file in (
|
||||
self.deb_package_files + self.rpm_package_files + self.tgz_package_files
|
||||
):
|
||||
print(f"Downloading: [{package_file}]")
|
||||
s3_path = "/".join(
|
||||
[
|
||||
self.release,
|
||||
self.commit_sha,
|
||||
self.file_to_type[package_file],
|
||||
package_file,
|
||||
]
|
||||
)
|
||||
self.s3.download_file(
|
||||
bucket=S3_BUILDS_BUCKET,
|
||||
s3_path=s3_path,
|
||||
local_file_path="/".join([self.LOCAL_DIR, package_file]),
|
||||
)
|
||||
|
||||
for macos_package, bin_suffix in self.MACOS_PACKAGE_TO_BIN_SUFFIX.items():
|
||||
binary_name = "clickhouse"
|
||||
destination_binary_name = f"{binary_name}-{bin_suffix}"
|
||||
assert destination_binary_name in self.macos_package_files
|
||||
print(
|
||||
f"Downloading: [{macos_package}] binary to [{destination_binary_name}]"
|
||||
)
|
||||
s3_path = "/".join(
|
||||
[
|
||||
self.release,
|
||||
self.commit_sha,
|
||||
macos_package,
|
||||
binary_name,
|
||||
]
|
||||
)
|
||||
self.s3.download_file(
|
||||
bucket=S3_BUILDS_BUCKET,
|
||||
s3_path=s3_path,
|
||||
local_file_path="/".join([self.LOCAL_DIR, destination_binary_name]),
|
||||
)
|
||||
|
||||
def local_deb_packages_ready(self) -> bool:
|
||||
assert self.deb_package_files
|
||||
for package_file in self.deb_package_files:
|
||||
print(f"Check package is downloaded [{package_file}]")
|
||||
if not Path(self.LOCAL_DIR + "/" + package_file).is_file():
|
||||
return False
|
||||
return True
|
||||
|
||||
def local_rpm_packages_ready(self) -> bool:
|
||||
assert self.rpm_package_files
|
||||
for package_file in self.rpm_package_files:
|
||||
print(f"Check package is downloaded [{package_file}]")
|
||||
if not Path(self.LOCAL_DIR + "/" + package_file).is_file():
|
||||
return False
|
||||
return True
|
||||
|
||||
def local_tgz_packages_ready(self) -> bool:
|
||||
assert self.tgz_package_files
|
||||
for package_file in self.tgz_package_files:
|
||||
print(f"Check package is downloaded [{package_file}]")
|
||||
if not Path(self.LOCAL_DIR + "/" + package_file).is_file():
|
||||
return False
|
||||
return True
|
||||
|
||||
def local_macos_packages_ready(self) -> bool:
|
||||
assert self.macos_package_files
|
||||
for package_file in self.macos_package_files:
|
||||
print(f"Check package is downloaded [{package_file}]")
|
||||
if not Path(self.LOCAL_DIR + "/" + package_file).is_file():
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
description="Creates release",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--prepare-release-info",
|
||||
action="store_true",
|
||||
help="Initial step to prepare info like release branch, release tag, etc.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--push-release-tag",
|
||||
action="store_true",
|
||||
help="Creates and pushes git tag",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--push-new-release-branch",
|
||||
action="store_true",
|
||||
help="Creates and pushes new release branch and corresponding service gh tags for backports",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--create-bump-version-pr",
|
||||
action="store_true",
|
||||
help="Updates version, contributors' list and creates PR",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--download-packages",
|
||||
action="store_true",
|
||||
help="Downloads all required packages from s3",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--create-gh-release",
|
||||
action="store_true",
|
||||
help="Create GH Release object and attach all packages",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--ref",
|
||||
type=str,
|
||||
help="the commit hash or branch",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--release-type",
|
||||
choices=("new", "patch"),
|
||||
# dest="release_type",
|
||||
help="a release type to bump the major.minor.patch version part, "
|
||||
"new branch is created only for the value 'new'",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="do not make any actual changes in the repo, just show what will be done",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--outfile",
|
||||
default="",
|
||||
type=str,
|
||||
help="output file to write json result to, if not set - stdout",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--infile",
|
||||
default="",
|
||||
type=str,
|
||||
help="input file with release info",
|
||||
)
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
@contextmanager
|
||||
def checkout(ref: str) -> Iterator[None]:
|
||||
_, orig_ref = ShellRunner.run(f"{GIT_PREFIX} symbolic-ref --short HEAD")
|
||||
rollback_cmd = f"{GIT_PREFIX} checkout {orig_ref}"
|
||||
assert orig_ref
|
||||
if ref not in (orig_ref,):
|
||||
ShellRunner.run(f"{GIT_PREFIX} checkout {ref}")
|
||||
try:
|
||||
yield
|
||||
except (Exception, KeyboardInterrupt) as e:
|
||||
print(f"ERROR: Exception [{e}]")
|
||||
ShellRunner.run(rollback_cmd)
|
||||
raise
|
||||
ShellRunner.run(rollback_cmd)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def checkout_new(ref: str) -> Iterator[None]:
|
||||
_, orig_ref = ShellRunner.run(f"{GIT_PREFIX} symbolic-ref --short HEAD")
|
||||
rollback_cmd = f"{GIT_PREFIX} checkout {orig_ref}"
|
||||
assert orig_ref
|
||||
ShellRunner.run(f"{GIT_PREFIX} checkout -b {ref}")
|
||||
try:
|
||||
yield
|
||||
except (Exception, KeyboardInterrupt) as e:
|
||||
print(f"ERROR: Exception [{e}]")
|
||||
ShellRunner.run(rollback_cmd)
|
||||
raise
|
||||
ShellRunner.run(rollback_cmd)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
assert args.dry_run
|
||||
|
||||
# prepare ssh for git if needed
|
||||
_ssh_agent = None
|
||||
_key_pub = None
|
||||
if os.getenv("ROBOT_CLICKHOUSE_SSH_KEY", ""):
|
||||
_key = os.getenv("ROBOT_CLICKHOUSE_SSH_KEY")
|
||||
_ssh_agent = SSHAgent()
|
||||
_key_pub = _ssh_agent.add(_key)
|
||||
_ssh_agent.print_keys()
|
||||
|
||||
if args.prepare_release_info:
|
||||
assert (
|
||||
args.ref and args.release_type and args.outfile
|
||||
), "--ref, --release-type and --outfile must be provided with --prepare-release-info"
|
||||
ReleaseInfo.prepare(
|
||||
commit_ref=args.ref, release_type=args.release_type, outfile=args.outfile
|
||||
)
|
||||
if args.push_release_tag:
|
||||
assert args.infile, "--infile <release info file path> must be provided"
|
||||
release_info = ReleaseInfo.from_file(args.infile)
|
||||
release_info.push_release_tag(dry_run=args.dry_run)
|
||||
if args.push_new_release_branch:
|
||||
assert args.infile, "--infile <release info file path> must be provided"
|
||||
release_info = ReleaseInfo.from_file(args.infile)
|
||||
release_info.push_new_release_branch(dry_run=args.dry_run)
|
||||
if args.create_bump_version_pr:
|
||||
# TODO: store link to PR in release info
|
||||
assert args.infile, "--infile <release info file path> must be provided"
|
||||
release_info = ReleaseInfo.from_file(args.infile)
|
||||
release_info.update_version_and_contributors_list(dry_run=args.dry_run)
|
||||
if args.download_packages:
|
||||
assert args.infile, "--infile <release info file path> must be provided"
|
||||
release_info = ReleaseInfo.from_file(args.infile)
|
||||
p = PackageDownloader(
|
||||
release=release_info.release_branch,
|
||||
commit_sha=release_info.commit_sha,
|
||||
version=release_info.version,
|
||||
)
|
||||
p.run()
|
||||
if args.create_gh_release:
|
||||
assert args.infile, "--infile <release info file path> must be provided"
|
||||
release_info = ReleaseInfo.from_file(args.infile)
|
||||
p = PackageDownloader(
|
||||
release=release_info.release_branch,
|
||||
commit_sha=release_info.commit_sha,
|
||||
version=release_info.version,
|
||||
)
|
||||
release_info.create_gh_release(p.get_all_packages_files(), args.dry_run)
|
||||
|
||||
# tear down ssh
|
||||
if _ssh_agent and _key_pub:
|
||||
_ssh_agent.remove(_key_pub)
|
||||
|
||||
|
||||
"""
|
||||
Prepare release machine:
|
||||
|
||||
### INSTALL PACKAGES
|
||||
sudo apt update
|
||||
sudo apt install --yes --no-install-recommends python3-dev python3-pip gh unzip
|
||||
sudo apt install --yes python3-boto3
|
||||
sudo apt install --yes python3-github
|
||||
sudo apt install --yes python3-unidiff
|
||||
sudo apt install --yes s3fs
|
||||
|
||||
### INSTALL AWS CLI
|
||||
cd /tmp
|
||||
curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o "awscliv2.zip"
|
||||
unzip awscliv2.zip
|
||||
sudo ./aws/install
|
||||
rm -rf aws*
|
||||
cd -
|
||||
|
||||
### INSTALL GH ACTIONS RUNNER:
|
||||
# Create a folder
|
||||
RUNNER_VERSION=2.317.0
|
||||
cd ~
|
||||
mkdir actions-runner && cd actions-runner
|
||||
# Download the latest runner package
|
||||
runner_arch() {
|
||||
case $(uname -m) in
|
||||
x86_64 )
|
||||
echo x64;;
|
||||
aarch64 )
|
||||
echo arm64;;
|
||||
esac
|
||||
}
|
||||
curl -O -L https://github.com/actions/runner/releases/download/v$RUNNER_VERSION/actions-runner-linux-$(runner_arch)-$RUNNER_VERSION.tar.gz
|
||||
# Extract the installer
|
||||
tar xzf ./actions-runner-linux-$(runner_arch)-$RUNNER_VERSION.tar.gz
|
||||
rm ./actions-runner-linux-$(runner_arch)-$RUNNER_VERSION.tar.gz
|
||||
|
||||
### Install reprepro:
|
||||
cd ~
|
||||
sudo apt install dpkg-dev libgpgme-dev libdb-dev libbz2-dev liblzma-dev libarchive-dev shunit2 db-util debhelper
|
||||
git clone https://salsa.debian.org/debian/reprepro.git
|
||||
cd reprepro
|
||||
dpkg-buildpackage -b --no-sign && sudo dpkg -i ../reprepro_$(dpkg-parsechangelog --show-field Version)_$(dpkg-architecture -q DEB_HOST_ARCH).deb
|
||||
|
||||
### Install createrepo-c:
|
||||
sudo apt install createrepo-c
|
||||
createrepo_c --version
|
||||
#Version: 0.17.3 (Features: DeltaRPM LegacyWeakdeps )
|
||||
|
||||
### Import gpg sign key
|
||||
gpg --import key.pgp
|
||||
gpg --list-secret-keys
|
||||
|
||||
### Install docker
|
||||
sudo su; cd ~
|
||||
|
||||
deb_arch() {
|
||||
case $(uname -m) in
|
||||
x86_64 )
|
||||
echo amd64;;
|
||||
aarch64 )
|
||||
echo arm64;;
|
||||
esac
|
||||
}
|
||||
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
|
||||
|
||||
echo "deb [arch=$(deb_arch) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
|
||||
|
||||
sudo apt-get update
|
||||
sudo apt-get install --yes --no-install-recommends docker-ce docker-buildx-plugin docker-ce-cli containerd.io
|
||||
|
||||
sudo usermod -aG docker ubuntu
|
||||
|
||||
# enable ipv6 in containers (fixed-cidr-v6 is some random network mask)
|
||||
cat <<EOT > /etc/docker/daemon.json
|
||||
{
|
||||
"ipv6": true,
|
||||
"fixed-cidr-v6": "2001:db8:1::/64",
|
||||
"log-driver": "json-file",
|
||||
"log-opts": {
|
||||
"max-file": "5",
|
||||
"max-size": "1000m"
|
||||
},
|
||||
"insecure-registries" : ["dockerhub-proxy.dockerhub-proxy-zone:5000"],
|
||||
"registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"]
|
||||
}
|
||||
EOT
|
||||
|
||||
# if docker build does not work:
|
||||
sudo systemctl restart docker
|
||||
docker buildx rm mybuilder
|
||||
docker buildx create --name mybuilder --driver docker-container --use
|
||||
docker buildx inspect mybuilder --bootstrap
|
||||
|
||||
### Install tailscale
|
||||
|
||||
### Configure GH runner
|
||||
"""
|
@ -11,7 +11,6 @@ from os import path as p
|
||||
from pathlib import Path
|
||||
from typing import Dict, List
|
||||
|
||||
from build_check import get_release_or_pr
|
||||
from build_download_helper import read_build_urls
|
||||
from docker_images_helper import DockerImageData, docker_login
|
||||
from env_helper import (
|
||||
@ -22,7 +21,7 @@ from env_helper import (
|
||||
TEMP_PATH,
|
||||
)
|
||||
from git_helper import Git
|
||||
from pr_info import PRInfo
|
||||
from pr_info import PRInfo, EventType
|
||||
from report import FAILURE, SUCCESS, JobReport, TestResult, TestResults
|
||||
from stopwatch import Stopwatch
|
||||
from tee_popen import TeePopen
|
||||
@ -63,6 +62,12 @@ def parse_args() -> argparse.Namespace:
|
||||
help="a version to build, automaticaly got from version_helper, accepts either "
|
||||
"tag ('refs/tags/' is removed automatically) or a normal 22.2.2.2 format",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--sha",
|
||||
type=str,
|
||||
default="",
|
||||
help="sha of the commit to use packages from",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--release-type",
|
||||
type=str,
|
||||
@ -122,7 +127,7 @@ def parse_args() -> argparse.Namespace:
|
||||
|
||||
|
||||
def retry_popen(cmd: str, log_file: Path) -> int:
|
||||
max_retries = 5
|
||||
max_retries = 2
|
||||
for retry in range(max_retries):
|
||||
# From time to time docker build may failed. Curl issues, or even push
|
||||
# It will sleep progressively 5, 15, 30 and 50 seconds between retries
|
||||
@ -370,13 +375,22 @@ def main():
|
||||
tags = gen_tags(args.version, args.release_type)
|
||||
repo_urls = {}
|
||||
direct_urls: Dict[str, List[str]] = {}
|
||||
release_or_pr, _ = get_release_or_pr(pr_info, args.version)
|
||||
if pr_info.event_type == EventType.PULL_REQUEST:
|
||||
release_or_pr = str(pr_info.number)
|
||||
sha = pr_info.sha
|
||||
elif pr_info.event_type == EventType.PUSH and pr_info.is_master:
|
||||
release_or_pr = str(0)
|
||||
sha = pr_info.sha
|
||||
else:
|
||||
release_or_pr = f"{args.version.major}.{args.version.minor}"
|
||||
sha = args.sha
|
||||
assert sha
|
||||
|
||||
for arch, build_name in zip(ARCH, ("package_release", "package_aarch64")):
|
||||
if not args.bucket_prefix:
|
||||
repo_urls[arch] = (
|
||||
f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/"
|
||||
f"{release_or_pr}/{pr_info.sha}/{build_name}"
|
||||
f"{release_or_pr}/{sha}/{build_name}"
|
||||
)
|
||||
else:
|
||||
repo_urls[arch] = f"{args.bucket_prefix}/{build_name}"
|
||||
|
@ -108,6 +108,7 @@ def get_run_command(
|
||||
"--privileged "
|
||||
f"{ci_logs_args}"
|
||||
f"--volume={repo_path}/tests:/usr/share/clickhouse-test "
|
||||
f"--volume={repo_path}/utils/grpc-client:/usr/share/clickhouse-utils/grpc-client "
|
||||
f"{volume_with_broken_test}"
|
||||
f"--volume={result_path}:/test_output "
|
||||
f"--volume={server_log_path}:/var/log/clickhouse-server "
|
||||
|
@ -1,4 +1,5 @@
|
||||
"""Module to get the token for GitHub"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
import json
|
||||
import time
|
||||
|
@ -26,6 +26,8 @@ from pr_info import PRInfo
|
||||
from report import SUCCESS, FAILURE
|
||||
from env_helper import GITHUB_UPSTREAM_REPOSITORY, GITHUB_REPOSITORY
|
||||
from synchronizer_utils import SYNC_BRANCH_PREFIX
|
||||
from ci_config import CI
|
||||
from ci_utils import Utils
|
||||
|
||||
# The team name for accepted approvals
|
||||
TEAM_NAME = getenv("GITHUB_TEAM_NAME", "core")
|
||||
@ -251,23 +253,77 @@ def main():
|
||||
# set mergeable check status and exit
|
||||
commit = get_commit(gh, args.pr_info.sha)
|
||||
statuses = get_commit_filtered_statuses(commit)
|
||||
state = trigger_mergeable_check(
|
||||
commit,
|
||||
statuses,
|
||||
workflow_failed=(args.wf_status != "success"),
|
||||
)
|
||||
|
||||
# Process upstream StatusNames.SYNC
|
||||
pr_info = PRInfo()
|
||||
if (
|
||||
pr_info.head_ref.startswith(f"{SYNC_BRANCH_PREFIX}/pr/")
|
||||
and GITHUB_REPOSITORY != GITHUB_UPSTREAM_REPOSITORY
|
||||
):
|
||||
print("Updating upstream statuses")
|
||||
update_upstream_sync_status(pr_info, state)
|
||||
max_failed_tests_per_job = 0
|
||||
job_name_with_max_failures = None
|
||||
total_failed_tests = 0
|
||||
failed_to_get_info = False
|
||||
has_failed_statuses = False
|
||||
for status in statuses:
|
||||
if not CI.is_required(status.context):
|
||||
continue
|
||||
if status.state == FAILURE:
|
||||
has_failed_statuses = True
|
||||
failed_cnt = Utils.get_failed_tests_number(status.description)
|
||||
if failed_cnt is None:
|
||||
failed_to_get_info = True
|
||||
else:
|
||||
if failed_cnt > max_failed_tests_per_job:
|
||||
job_name_with_max_failures = status.context
|
||||
max_failed_tests_per_job = failed_cnt
|
||||
total_failed_tests += failed_cnt
|
||||
elif status.state != SUCCESS and status.context not in (
|
||||
CI.StatusNames.SYNC,
|
||||
CI.StatusNames.PR_CHECK,
|
||||
):
|
||||
# do not block CI on failures in (CI.StatusNames.SYNC, CI.StatusNames.PR_CHECK)
|
||||
has_failed_statuses = True
|
||||
print(
|
||||
f"Unexpected status for [{status.context}]: [{status.state}] - block further testing"
|
||||
)
|
||||
failed_to_get_info = True
|
||||
|
||||
if args.wf_status != "success":
|
||||
# exit with 1 to rerun on workflow failed job restart
|
||||
can_continue = True
|
||||
if total_failed_tests > CI.MAX_TOTAL_FAILURES_BEFORE_BLOCKING_CI:
|
||||
print(
|
||||
f"Required check has [{total_failed_tests}] failed - block further testing"
|
||||
)
|
||||
can_continue = False
|
||||
if max_failed_tests_per_job > CI.MAX_TOTAL_FAILURES_PER_JOB_BEFORE_BLOCKING_CI:
|
||||
print(
|
||||
f"Job [{job_name_with_max_failures}] has [{max_failed_tests_per_job}] failures - block further testing"
|
||||
)
|
||||
can_continue = False
|
||||
if failed_to_get_info:
|
||||
print("Unexpected commit status state - block further testing")
|
||||
can_continue = False
|
||||
if args.wf_status != SUCCESS and not has_failed_statuses:
|
||||
# workflow failed but reason is unknown as no failed statuses present
|
||||
can_continue = False
|
||||
print(
|
||||
"WARNING: Either the runner is faulty or the operating status is unknown. The first is self-healing, the second requires investigation."
|
||||
)
|
||||
|
||||
if args.wf_status == SUCCESS or has_failed_statuses:
|
||||
# do not set mergeable check status if args.wf_status == failure, apparently it has died runners and is to be restarted
|
||||
state = trigger_mergeable_check(
|
||||
commit,
|
||||
statuses,
|
||||
)
|
||||
# Process upstream StatusNames.SYNC
|
||||
pr_info = PRInfo()
|
||||
if (
|
||||
pr_info.head_ref.startswith(f"{SYNC_BRANCH_PREFIX}/pr/")
|
||||
and GITHUB_REPOSITORY != GITHUB_UPSTREAM_REPOSITORY
|
||||
):
|
||||
print("Updating upstream statuses")
|
||||
update_upstream_sync_status(pr_info, state)
|
||||
else:
|
||||
print(
|
||||
"Workflow failed but no failed statuses found (died runner?) - cannot set Mergeable Check status"
|
||||
)
|
||||
|
||||
if not can_continue:
|
||||
sys.exit(1)
|
||||
sys.exit(0)
|
||||
|
||||
|
@ -23,7 +23,7 @@ from typing import (
|
||||
from build_download_helper import get_gh_api
|
||||
from ci_config import CI
|
||||
from ci_utils import normalize_string
|
||||
from env_helper import REPORT_PATH, TEMP_PATH
|
||||
from env_helper import REPORT_PATH, GITHUB_WORKSPACE
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -244,7 +244,8 @@ HTML_TEST_PART = """
|
||||
"""
|
||||
|
||||
BASE_HEADERS = ["Test name", "Test status"]
|
||||
JOB_REPORT_FILE = Path(TEMP_PATH) / "job_report.json"
|
||||
# should not be in TEMP directory or any directory that may be cleaned during the job execution
|
||||
JOB_REPORT_FILE = Path(GITHUB_WORKSPACE) / "job_report.json"
|
||||
|
||||
|
||||
@dataclass
|
||||
@ -296,6 +297,33 @@ class JobReport:
|
||||
build_dir_for_upload: Union[Path, str] = ""
|
||||
# if False no GH commit status will be created by CI
|
||||
need_commit_status: bool = True
|
||||
# indicates that this is not real job report but report for the job that was skipped by rerun check
|
||||
job_skipped: bool = False
|
||||
# indicates that report generated by CI script in order to check later if job was killed before real report is generated
|
||||
pre_report: bool = False
|
||||
exit_code: int = -1
|
||||
|
||||
@staticmethod
|
||||
def create_pre_report() -> "JobReport":
|
||||
return JobReport(
|
||||
status=ERROR,
|
||||
description="",
|
||||
test_results=[],
|
||||
start_time=datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
duration=0.0,
|
||||
additional_files=[],
|
||||
pre_report=True,
|
||||
)
|
||||
|
||||
def update_duration(self):
|
||||
if not self.start_time:
|
||||
self.duration = 0.0
|
||||
else:
|
||||
start_time = datetime.datetime.strptime(
|
||||
self.start_time, "%Y-%m-%d %H:%M:%S"
|
||||
)
|
||||
current_time = datetime.datetime.utcnow()
|
||||
self.duration = (current_time - start_time).total_seconds()
|
||||
|
||||
def __post_init__(self):
|
||||
assert self.status in (SUCCESS, ERROR, FAILURE, PENDING)
|
||||
|
@ -37,9 +37,9 @@ class SSHAgent:
|
||||
ssh_options = (
|
||||
"," + os.environ["SSH_OPTIONS"] if os.environ.get("SSH_OPTIONS") else ""
|
||||
)
|
||||
os.environ[
|
||||
"SSH_OPTIONS"
|
||||
] = f"{ssh_options}UserKnownHostsFile=/dev/null,StrictHostKeyChecking=no"
|
||||
os.environ["SSH_OPTIONS"] = (
|
||||
f"{ssh_options}UserKnownHostsFile=/dev/null,StrictHostKeyChecking=no"
|
||||
)
|
||||
|
||||
def add(self, key):
|
||||
key_pub = self._key_pub(key)
|
||||
|
@ -16,7 +16,15 @@ from docker_images_helper import get_docker_image, pull_image
|
||||
from env_helper import IS_CI, REPO_COPY, TEMP_PATH, GITHUB_EVENT_PATH
|
||||
from git_helper import GIT_PREFIX, git_runner
|
||||
from pr_info import PRInfo
|
||||
from report import ERROR, FAILURE, SUCCESS, JobReport, TestResults, read_test_results
|
||||
from report import (
|
||||
ERROR,
|
||||
FAILURE,
|
||||
SUCCESS,
|
||||
JobReport,
|
||||
TestResults,
|
||||
read_test_results,
|
||||
FAIL,
|
||||
)
|
||||
from ssh import SSHKey
|
||||
from stopwatch import Stopwatch
|
||||
|
||||
@ -192,15 +200,6 @@ def main():
|
||||
future = executor.submit(subprocess.run, cmd_shell, shell=True)
|
||||
_ = future.result()
|
||||
|
||||
autofix_description = ""
|
||||
if args.push:
|
||||
try:
|
||||
commit_push_staged(pr_info)
|
||||
except subprocess.SubprocessError:
|
||||
# do not fail the whole script if the autofix didn't work out
|
||||
logging.error("Unable to push the autofix. Continue.")
|
||||
autofix_description = "Failed to push autofix to the PR. "
|
||||
|
||||
subprocess.check_call(
|
||||
f"python3 ../../utils/check-style/process_style_check_result.py --in-results-dir {temp_path} "
|
||||
f"--out-results-file {temp_path}/test_results.tsv --out-status-file {temp_path}/check_status.tsv || "
|
||||
@ -210,6 +209,21 @@ def main():
|
||||
|
||||
state, description, test_results, additional_files = process_result(temp_path)
|
||||
|
||||
autofix_description = ""
|
||||
fail_cnt = 0
|
||||
for result in test_results:
|
||||
if result.status in (FAILURE, FAIL):
|
||||
# do not autofix if not only black failed
|
||||
fail_cnt += 1
|
||||
|
||||
if args.push and fail_cnt == 1:
|
||||
try:
|
||||
commit_push_staged(pr_info)
|
||||
except subprocess.SubprocessError:
|
||||
# do not fail the whole script if the autofix didn't work out
|
||||
logging.error("Unable to push the autofix. Continue.")
|
||||
autofix_description = "Failed to push autofix to the PR. "
|
||||
|
||||
JobReport(
|
||||
description=f"{autofix_description}{description}",
|
||||
test_results=test_results,
|
||||
|
@ -1,6 +1,8 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import unittest
|
||||
import random
|
||||
|
||||
from ci_config import CI
|
||||
import ci as CIPY
|
||||
from ci_settings import CiSettings
|
||||
@ -57,6 +59,18 @@ class TestCIConfig(unittest.TestCase):
|
||||
f"Job [{job}] apparently uses wrong common config with job keyword [{CI.JOB_CONFIGS[job].job_name_keyword}]",
|
||||
)
|
||||
|
||||
def test_job_config_has_proper_values(self):
|
||||
for job in CI.JobNames:
|
||||
if CI.JOB_CONFIGS[job].reference_job_name:
|
||||
reference_job_config = CI.JOB_CONFIGS[
|
||||
CI.JOB_CONFIGS[job].reference_job_name
|
||||
]
|
||||
# reference job must run in all workflows and has digest
|
||||
self.assertTrue(reference_job_config.pr_only == False)
|
||||
self.assertTrue(reference_job_config.release_only == False)
|
||||
self.assertTrue(reference_job_config.run_always == False)
|
||||
self.assertTrue(reference_job_config.digest != CI.DigestConfig())
|
||||
|
||||
def test_required_checks(self):
|
||||
for job in CI.REQUIRED_CHECKS:
|
||||
if job in (CI.StatusNames.PR_CHECK, CI.StatusNames.SYNC):
|
||||
@ -417,7 +431,7 @@ class TestCIConfig(unittest.TestCase):
|
||||
assert not ci_cache.jobs_to_skip
|
||||
assert not ci_cache.jobs_to_wait
|
||||
|
||||
# pretend there are pending jobs that we neet to wait
|
||||
# pretend there are pending jobs that we need to wait
|
||||
ci_cache.jobs_to_wait = dict(ci_cache.jobs_to_do)
|
||||
for job, config in ci_cache.jobs_to_wait.items():
|
||||
assert not config.pending_batches
|
||||
@ -489,3 +503,76 @@ class TestCIConfig(unittest.TestCase):
|
||||
self.assertCountEqual(
|
||||
list(ci_cache.jobs_to_do) + ci_cache.jobs_to_skip, all_jobs_in_wf
|
||||
)
|
||||
|
||||
def test_ci_py_filters_not_affected_jobs_in_prs(self):
|
||||
"""
|
||||
checks ci.py filters not affected jobs in PRs
|
||||
"""
|
||||
settings = CiSettings()
|
||||
settings.no_ci_cache = True
|
||||
pr_info = PRInfo(github_event=_TEST_EVENT_JSON)
|
||||
pr_info.event_type = EventType.PULL_REQUEST
|
||||
pr_info.number = 123
|
||||
assert pr_info.is_pr
|
||||
ci_cache = CIPY._configure_jobs(
|
||||
S3Helper(), pr_info, settings, skip_jobs=False, dry_run=True
|
||||
)
|
||||
self.assertTrue(not ci_cache.jobs_to_skip, "Must be no jobs in skip list")
|
||||
assert not ci_cache.jobs_to_wait
|
||||
assert not ci_cache.jobs_to_skip
|
||||
|
||||
MOCK_AFFECTED_JOBS = [
|
||||
CI.JobNames.STATELESS_TEST_S3_DEBUG,
|
||||
CI.JobNames.STRESS_TEST_TSAN,
|
||||
]
|
||||
MOCK_REQUIRED_BUILDS = []
|
||||
|
||||
# pretend there are pending jobs that we need to wait
|
||||
for job, job_config in ci_cache.jobs_to_do.items():
|
||||
if job in MOCK_AFFECTED_JOBS:
|
||||
MOCK_REQUIRED_BUILDS += job_config.required_builds
|
||||
elif job not in MOCK_AFFECTED_JOBS:
|
||||
ci_cache.jobs_to_wait[job] = job_config
|
||||
|
||||
for job, job_config in ci_cache.jobs_to_do.items():
|
||||
if job_config.reference_job_name:
|
||||
# jobs with reference_job_name in config are not supposed to have records in the cache - continue
|
||||
continue
|
||||
if job in MOCK_AFFECTED_JOBS:
|
||||
continue
|
||||
for batch in range(job_config.num_batches):
|
||||
# add any record into cache
|
||||
record = CiCache.Record(
|
||||
record_type=random.choice(
|
||||
[
|
||||
CiCache.RecordType.FAILED,
|
||||
CiCache.RecordType.PENDING,
|
||||
CiCache.RecordType.SUCCESSFUL,
|
||||
]
|
||||
),
|
||||
job_name=job,
|
||||
job_digest=ci_cache.job_digests[job],
|
||||
batch=batch,
|
||||
num_batches=job_config.num_batches,
|
||||
release_branch=True,
|
||||
)
|
||||
for record_t_, records_ in ci_cache.records.items():
|
||||
if record_t_.value == CiCache.RecordType.FAILED.value:
|
||||
records_[record.to_str_key()] = record
|
||||
|
||||
ci_cache.filter_out_not_affected_jobs()
|
||||
expected_to_do = (
|
||||
[
|
||||
CI.JobNames.BUILD_CHECK,
|
||||
]
|
||||
+ MOCK_AFFECTED_JOBS
|
||||
+ MOCK_REQUIRED_BUILDS
|
||||
)
|
||||
self.assertCountEqual(
|
||||
list(ci_cache.jobs_to_wait),
|
||||
[
|
||||
CI.JobNames.BUILD_CHECK,
|
||||
]
|
||||
+ MOCK_REQUIRED_BUILDS,
|
||||
)
|
||||
self.assertCountEqual(list(ci_cache.jobs_to_do), expected_to_do)
|
||||
|
@ -172,14 +172,10 @@ class TestCIOptions(unittest.TestCase):
|
||||
job: CI.JobConfig(runner_type=CI.Runners.STYLE_CHECKER)
|
||||
for job in _TEST_JOB_LIST
|
||||
}
|
||||
jobs_configs[
|
||||
"fuzzers"
|
||||
].run_by_label = (
|
||||
jobs_configs["fuzzers"].run_by_label = (
|
||||
"TEST_LABEL" # check "fuzzers" appears in the result due to the label
|
||||
)
|
||||
jobs_configs[
|
||||
"Integration tests (asan)"
|
||||
].release_only = (
|
||||
jobs_configs["Integration tests (asan)"].release_only = (
|
||||
True # still must be included as it's set with include keywords
|
||||
)
|
||||
filtered_jobs = list(
|
||||
@ -311,9 +307,9 @@ class TestCIOptions(unittest.TestCase):
|
||||
job: CI.JobConfig(runner_type=CI.Runners.STYLE_CHECKER)
|
||||
for job in _TEST_JOB_LIST
|
||||
}
|
||||
jobs_configs[
|
||||
"fuzzers"
|
||||
].run_by_label = "TEST_LABEL" # check "fuzzers" does not appears in the result
|
||||
jobs_configs["fuzzers"].run_by_label = (
|
||||
"TEST_LABEL" # check "fuzzers" does not appears in the result
|
||||
)
|
||||
jobs_configs["Integration tests (asan)"].release_only = True
|
||||
filtered_jobs = list(
|
||||
ci_options.apply(
|
||||
|
@ -72,6 +72,19 @@ class ClickHouseVersion:
|
||||
return self.patch_update()
|
||||
raise KeyError(f"wrong part {part} is used")
|
||||
|
||||
def bump(self) -> "ClickHouseVersion":
|
||||
if self.minor < 12:
|
||||
self._minor += 1
|
||||
self._revision += 1
|
||||
self._patch = 1
|
||||
self._tweak = 1
|
||||
else:
|
||||
self._major += 1
|
||||
self._revision += 1
|
||||
self._patch = 1
|
||||
self._tweak = 1
|
||||
return self
|
||||
|
||||
def major_update(self) -> "ClickHouseVersion":
|
||||
if self._git is not None:
|
||||
self._git.update()
|
||||
@ -148,6 +161,11 @@ class ClickHouseVersion:
|
||||
"""our X.3 and X.8 are LTS"""
|
||||
return self.minor % 5 == 3
|
||||
|
||||
def get_stable_release_type(self) -> str:
|
||||
if self.is_lts:
|
||||
return VersionType.LTS
|
||||
return VersionType.STABLE
|
||||
|
||||
def as_dict(self) -> VERSIONS:
|
||||
return {
|
||||
"revision": self.revision,
|
||||
@ -168,6 +186,7 @@ class ClickHouseVersion:
|
||||
raise ValueError(f"version type {version_type} not in {VersionType.VALID}")
|
||||
self._description = version_type
|
||||
self._describe = f"v{self.string}-{version_type}"
|
||||
return self
|
||||
|
||||
def copy(self) -> "ClickHouseVersion":
|
||||
copy = ClickHouseVersion(
|
||||
|
@ -711,9 +711,9 @@ def get_localzone():
|
||||
|
||||
class SettingsRandomizer:
|
||||
settings = {
|
||||
"max_insert_threads": lambda: 32
|
||||
if random.random() < 0.03
|
||||
else random.randint(1, 3),
|
||||
"max_insert_threads": lambda: (
|
||||
12 if random.random() < 0.03 else random.randint(1, 3)
|
||||
),
|
||||
"group_by_two_level_threshold": threshold_generator(0.2, 0.2, 1, 1000000),
|
||||
"group_by_two_level_threshold_bytes": threshold_generator(
|
||||
0.2, 0.2, 1, 50000000
|
||||
@ -729,7 +729,7 @@ class SettingsRandomizer:
|
||||
"prefer_localhost_replica": lambda: random.randint(0, 1),
|
||||
"max_block_size": lambda: random.randint(8000, 100000),
|
||||
"max_joined_block_size_rows": lambda: random.randint(8000, 100000),
|
||||
"max_threads": lambda: 64 if random.random() < 0.03 else random.randint(1, 3),
|
||||
"max_threads": lambda: 32 if random.random() < 0.03 else random.randint(1, 3),
|
||||
"optimize_append_index": lambda: random.randint(0, 1),
|
||||
"optimize_if_chain_to_multiif": lambda: random.randint(0, 1),
|
||||
"optimize_if_transform_strings_to_enum": lambda: random.randint(0, 1),
|
||||
@ -1750,7 +1750,7 @@ class TestCase:
|
||||
return TestResult(
|
||||
self.name,
|
||||
TestStatus.FAIL,
|
||||
FailureReason.INTERNAL_QUERY_FAIL,
|
||||
FailureReason.TIMEOUT,
|
||||
total_time,
|
||||
self.add_info_about_settings(
|
||||
self.get_description_from_exception_info(sys.exc_info())
|
||||
@ -2189,11 +2189,26 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite, bool
|
||||
sys.stdout.flush()
|
||||
|
||||
while True:
|
||||
test_result = test_case.run(
|
||||
args, test_suite, client_options, server_logs_level
|
||||
)
|
||||
test_result = test_case.process_result(test_result, MESSAGES)
|
||||
if not test_result.need_retry:
|
||||
# This is the upper level timeout
|
||||
# It helps with completely frozen processes, like in case of gdb errors
|
||||
def timeout_handler(signum, frame):
|
||||
raise TimeoutError("Test execution timed out")
|
||||
|
||||
signal.signal(signal.SIGALRM, timeout_handler)
|
||||
signal.alarm(int(args.timeout * 1.1))
|
||||
test_result = None
|
||||
try:
|
||||
test_result = test_case.run(
|
||||
args, test_suite, client_options, server_logs_level
|
||||
)
|
||||
test_result = test_case.process_result(test_result, MESSAGES)
|
||||
break
|
||||
except TimeoutError:
|
||||
break
|
||||
finally:
|
||||
signal.alarm(0)
|
||||
|
||||
if not test_result or not test_result.need_retry:
|
||||
break
|
||||
restarted_tests.append(test_result)
|
||||
|
||||
@ -2452,6 +2467,10 @@ def override_envs(*args_, **kwargs):
|
||||
run_tests_array(*args_, **kwargs)
|
||||
|
||||
|
||||
def run_tests_process(*args, **kwargs):
|
||||
return run_tests_array(*args, **kwargs)
|
||||
|
||||
|
||||
def do_run_tests(jobs, test_suite: TestSuite):
|
||||
if jobs > 1 and len(test_suite.parallel_tests) > 0:
|
||||
print(
|
||||
@ -2475,39 +2494,70 @@ def do_run_tests(jobs, test_suite: TestSuite):
|
||||
# of failures will be nearly the same for all tests from the group.
|
||||
random.shuffle(test_suite.parallel_tests)
|
||||
|
||||
batch_size = max(1, len(test_suite.parallel_tests) // jobs)
|
||||
batch_size = max(1, (len(test_suite.parallel_tests) // jobs) + 1)
|
||||
parallel_tests_array = []
|
||||
for job in range(jobs):
|
||||
range_ = job * batch_size, job * batch_size + batch_size
|
||||
batch = test_suite.parallel_tests[range_[0] : range_[1]]
|
||||
parallel_tests_array.append((batch, batch_size, test_suite, True))
|
||||
|
||||
try:
|
||||
with multiprocessing.Pool(processes=jobs + 1) as pool:
|
||||
future = pool.map_async(run_tests_array, parallel_tests_array)
|
||||
processes = []
|
||||
|
||||
if args.run_sequential_tests_in_parallel:
|
||||
# Run parallel tests and sequential tests at the same time
|
||||
# Sequential tests will use different ClickHouse instance
|
||||
# In this process we can safely override values in `args` and `os.environ`
|
||||
future_seq = pool.map_async(
|
||||
override_envs,
|
||||
[
|
||||
(
|
||||
test_suite.sequential_tests,
|
||||
len(test_suite.sequential_tests),
|
||||
test_suite,
|
||||
False,
|
||||
)
|
||||
],
|
||||
)
|
||||
future_seq.wait()
|
||||
for test_batch in parallel_tests_array:
|
||||
process = multiprocessing.Process(
|
||||
target=run_tests_process, args=(test_batch,)
|
||||
)
|
||||
processes.append(process)
|
||||
process.start()
|
||||
|
||||
future.wait()
|
||||
finally:
|
||||
pool.terminate()
|
||||
pool.close()
|
||||
pool.join()
|
||||
if args.run_sequential_tests_in_parallel:
|
||||
# Run parallel tests and sequential tests at the same time
|
||||
# Sequential tests will use different ClickHouse instance
|
||||
# In this process we can safely override values in `args` and `os.environ`
|
||||
process = multiprocessing.Process(
|
||||
target=override_envs,
|
||||
args=(
|
||||
(
|
||||
test_suite.sequential_tests,
|
||||
len(test_suite.sequential_tests),
|
||||
test_suite,
|
||||
False,
|
||||
),
|
||||
),
|
||||
)
|
||||
processes.append(process)
|
||||
process.start()
|
||||
|
||||
while processes:
|
||||
sys.stdout.flush()
|
||||
# Periodically check the server for hangs
|
||||
# and stop all processes in this case
|
||||
try:
|
||||
clickhouse_execute(
|
||||
args,
|
||||
query="SELECT 1 /*hang up check*/",
|
||||
max_http_retries=5,
|
||||
timeout=20,
|
||||
)
|
||||
except Exception:
|
||||
print("Hang up check failed")
|
||||
server_died.set()
|
||||
|
||||
if server_died.is_set():
|
||||
print("Server died, terminating all processes...")
|
||||
kill_gdb_if_any()
|
||||
# Wait for test results
|
||||
sleep(args.timeout)
|
||||
for p in processes:
|
||||
if p.is_alive():
|
||||
p.terminate()
|
||||
break
|
||||
|
||||
for p in processes[:]:
|
||||
if not p.is_alive():
|
||||
processes.remove(p)
|
||||
|
||||
sleep(5)
|
||||
|
||||
if not args.run_sequential_tests_in_parallel:
|
||||
run_tests_array(
|
||||
@ -3358,6 +3408,14 @@ def parse_args():
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
class Terminated(KeyboardInterrupt):
|
||||
pass
|
||||
|
||||
|
||||
def signal_handler(sig, frame):
|
||||
raise Terminated(f"Terminated with {sig} signal")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
stop_time = None
|
||||
exit_code = multiprocessing.Value("i", 0)
|
||||
@ -3369,6 +3427,9 @@ if __name__ == "__main__":
|
||||
# infinite tests processes left
|
||||
# (new process group is required to avoid killing some parent processes)
|
||||
os.setpgid(0, 0)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGHUP, signal_handler)
|
||||
|
||||
try:
|
||||
args = parse_args()
|
||||
|
3
tests/config/config.d/grpc_protocol.xml
Normal file
3
tests/config/config.d/grpc_protocol.xml
Normal file
@ -0,0 +1,3 @@
|
||||
<clickhouse>
|
||||
<grpc_port>9100</grpc_port>
|
||||
</clickhouse>
|
@ -27,6 +27,7 @@ ln -sf $SRC_PATH/config.d/secure_ports.xml $DEST_SERVER_PATH/config.d/
|
||||
ln -sf $SRC_PATH/config.d/clusters.xml $DEST_SERVER_PATH/config.d/
|
||||
ln -sf $SRC_PATH/config.d/graphite.xml $DEST_SERVER_PATH/config.d/
|
||||
ln -sf $SRC_PATH/config.d/graphite_alternative.xml $DEST_SERVER_PATH/config.d/
|
||||
ln -sf $SRC_PATH/config.d/grpc_protocol.xml $DEST_SERVER_PATH/config.d/
|
||||
ln -sf $SRC_PATH/config.d/database_atomic.xml $DEST_SERVER_PATH/config.d/
|
||||
ln -sf $SRC_PATH/config.d/max_concurrent_queries.xml $DEST_SERVER_PATH/config.d/
|
||||
ln -sf $SRC_PATH/config.d/merge_tree_settings.xml $DEST_SERVER_PATH/config.d/
|
||||
|
@ -1454,9 +1454,9 @@ class ClickHouseCluster:
|
||||
def setup_azurite_cmd(self, instance, env_variables, docker_compose_yml_dir):
|
||||
self.with_azurite = True
|
||||
env_variables["AZURITE_PORT"] = str(self.azurite_port)
|
||||
env_variables[
|
||||
"AZURITE_STORAGE_ACCOUNT_URL"
|
||||
] = f"http://azurite1:{env_variables['AZURITE_PORT']}/devstoreaccount1"
|
||||
env_variables["AZURITE_STORAGE_ACCOUNT_URL"] = (
|
||||
f"http://azurite1:{env_variables['AZURITE_PORT']}/devstoreaccount1"
|
||||
)
|
||||
env_variables["AZURITE_CONNECTION_STRING"] = (
|
||||
f"DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;"
|
||||
f"AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;"
|
||||
@ -1653,9 +1653,9 @@ class ClickHouseCluster:
|
||||
|
||||
# Code coverage files will be placed in database directory
|
||||
# (affect only WITH_COVERAGE=1 build)
|
||||
env_variables[
|
||||
"LLVM_PROFILE_FILE"
|
||||
] = "/var/lib/clickhouse/server_%h_%p_%m.profraw"
|
||||
env_variables["LLVM_PROFILE_FILE"] = (
|
||||
"/var/lib/clickhouse/server_%h_%p_%m.profraw"
|
||||
)
|
||||
|
||||
clickhouse_start_command = CLICKHOUSE_START_COMMAND
|
||||
if clickhouse_log_file:
|
||||
@ -1668,9 +1668,9 @@ class ClickHouseCluster:
|
||||
cluster=self,
|
||||
base_path=self.base_dir,
|
||||
name=name,
|
||||
base_config_dir=base_config_dir
|
||||
if base_config_dir
|
||||
else self.base_config_dir,
|
||||
base_config_dir=(
|
||||
base_config_dir if base_config_dir else self.base_config_dir
|
||||
),
|
||||
custom_main_configs=main_configs or [],
|
||||
custom_user_configs=user_configs or [],
|
||||
custom_dictionaries=dictionaries or [],
|
||||
|
@ -19,9 +19,9 @@ def cluster():
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
cluster.add_instance(
|
||||
"node",
|
||||
main_configs=["configs/storage_arm.xml"]
|
||||
if is_arm()
|
||||
else ["configs/storage_amd.xml"],
|
||||
main_configs=(
|
||||
["configs/storage_arm.xml"] if is_arm() else ["configs/storage_amd.xml"]
|
||||
),
|
||||
with_minio=True,
|
||||
with_hdfs=not is_arm(),
|
||||
)
|
||||
|
@ -5,6 +5,7 @@ in this test we write into per-node tables and read from the distributed table.
|
||||
The default database in the distributed table definition is left empty on purpose to test
|
||||
default database deduction.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from helpers.client import QueryRuntimeException
|
||||
|
@ -1,4 +0,0 @@
|
||||
<clickhouse>
|
||||
<disable_internal_dns_cache>1</disable_internal_dns_cache>
|
||||
<max_concurrent_queries>250</max_concurrent_queries>
|
||||
</clickhouse>
|
@ -1,11 +0,0 @@
|
||||
<clickhouse>
|
||||
<users>
|
||||
<test_dns>
|
||||
<password/>
|
||||
<networks>
|
||||
<host_regexp>test1\.example\.com$</host_regexp>
|
||||
</networks>
|
||||
<profile>default</profile>
|
||||
</test_dns>
|
||||
</users>
|
||||
</clickhouse>
|
@ -1,5 +0,0 @@
|
||||
<clickhouse>
|
||||
<listen_host>::</listen_host>
|
||||
<listen_host>0.0.0.0</listen_host>
|
||||
<listen_try>1</listen_try>
|
||||
</clickhouse>
|
@ -1,8 +0,0 @@
|
||||
. {
|
||||
hosts /example.com {
|
||||
reload "20ms"
|
||||
fallthrough
|
||||
}
|
||||
forward . 127.0.0.11
|
||||
log
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user