mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 08:40:50 +00:00
Merge branch 'master' into fix_assertion_ddl_worker
This commit is contained in:
commit
13bc93c171
1
.github/workflows/nightly.yml
vendored
1
.github/workflows/nightly.yml
vendored
@ -7,6 +7,7 @@ env:
|
||||
"on":
|
||||
schedule:
|
||||
- cron: '13 3 * * *'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
DockerHubPushAarch64:
|
||||
|
45
.github/workflows/pull_request.yml
vendored
45
.github/workflows/pull_request.yml
vendored
@ -1733,6 +1733,51 @@ jobs:
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
TestsBugfixCheck:
|
||||
runs-on: [self-hosted, stress-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/tests_bugfix_check
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Tests bugfix validate check (actions)
|
||||
KILL_TIMEOUT=3600
|
||||
REPO_COPY=${{runner.temp}}/tests_bugfix_check/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
- name: Bugfix test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
|
||||
TEMP_PATH="${TEMP_PATH}/integration" \
|
||||
REPORTS_PATH="${REPORTS_PATH}/integration" \
|
||||
python3 integration_test_check.py "Integration tests bugfix validate check" \
|
||||
--validate-bugfix --post-commit-status=file || echo 'ignore exit code'
|
||||
|
||||
TEMP_PATH="${TEMP_PATH}/stateless" \
|
||||
REPORTS_PATH="${REPORTS_PATH}/stateless" \
|
||||
python3 functional_test_check.py "Stateless tests bugfix validate check" "$KILL_TIMEOUT" \
|
||||
--validate-bugfix --post-commit-status=file || echo 'ignore exit code'
|
||||
|
||||
python3 bugfix_validate_check.py "${TEMP_PATH}/stateless/post_commit_status.tsv" "${TEMP_PATH}/integration/post_commit_status.tsv"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
##############################################################################################
|
||||
############################ FUNCTIONAl STATEFUL TESTS #######################################
|
||||
##############################################################################################
|
||||
|
@ -267,7 +267,10 @@ endif ()
|
||||
|
||||
# Allows to build stripped binary in a separate directory
|
||||
if (OBJCOPY_PATH AND READELF_PATH)
|
||||
set(BUILD_STRIPPED_BINARIES_PREFIX "" CACHE STRING "Build stripped binaries with debug info in separate directory")
|
||||
option(INSTALL_STRIPPED_BINARIES "Build stripped binaries with debug info in separate directory" OFF)
|
||||
if (INSTALL_STRIPPED_BINARIES)
|
||||
set(STRIPPED_BINARIES_OUTPUT "stripped" CACHE STRING "A separate directory for stripped information")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd
|
||||
|
@ -4,11 +4,12 @@
|
||||
import sys
|
||||
import json
|
||||
|
||||
|
||||
def parse_block(block=[], options=[]):
|
||||
|
||||
#print('block is here', block)
|
||||
#show_query = False
|
||||
#show_query = options.show_query
|
||||
# print('block is here', block)
|
||||
# show_query = False
|
||||
# show_query = options.show_query
|
||||
result = []
|
||||
query = block[0].strip()
|
||||
if len(block) > 4:
|
||||
@ -20,9 +21,9 @@ def parse_block(block=[], options=[]):
|
||||
timing2 = block[2].strip().split()[1]
|
||||
timing3 = block[3].strip().split()[1]
|
||||
if options.show_queries:
|
||||
result.append( query )
|
||||
result.append(query)
|
||||
if not options.show_first_timings:
|
||||
result += [ timing1 , timing2, timing3 ]
|
||||
result += [timing1, timing2, timing3]
|
||||
else:
|
||||
result.append(timing1)
|
||||
return result
|
||||
@ -37,12 +38,12 @@ def read_stats_file(options, fname):
|
||||
|
||||
for line in f.readlines():
|
||||
|
||||
if 'SELECT' in line:
|
||||
if "SELECT" in line:
|
||||
if len(block) > 1:
|
||||
result.append( parse_block(block, options) )
|
||||
block = [ line ]
|
||||
elif 'Time:' in line:
|
||||
block.append( line )
|
||||
result.append(parse_block(block, options))
|
||||
block = [line]
|
||||
elif "Time:" in line:
|
||||
block.append(line)
|
||||
|
||||
return result
|
||||
|
||||
@ -50,7 +51,7 @@ def read_stats_file(options, fname):
|
||||
def compare_stats_files(options, arguments):
|
||||
result = []
|
||||
file_output = []
|
||||
pyplot_colors = ['y', 'b', 'g', 'r']
|
||||
pyplot_colors = ["y", "b", "g", "r"]
|
||||
for fname in arguments[1:]:
|
||||
file_output.append((read_stats_file(options, fname)))
|
||||
if len(file_output[0]) > 0:
|
||||
@ -58,65 +59,92 @@ def compare_stats_files(options, arguments):
|
||||
for idx, data_set in enumerate(file_output):
|
||||
int_result = []
|
||||
for timing in data_set:
|
||||
int_result.append(float(timing[0])) #y values
|
||||
result.append([[x for x in range(0, len(int_result)) ], int_result,
|
||||
pyplot_colors[idx] + '^' ] )
|
||||
# result.append([x for x in range(1, len(int_result)) ]) #x values
|
||||
# result.append( pyplot_colors[idx] + '^' )
|
||||
int_result.append(float(timing[0])) # y values
|
||||
result.append(
|
||||
[
|
||||
[x for x in range(0, len(int_result))],
|
||||
int_result,
|
||||
pyplot_colors[idx] + "^",
|
||||
]
|
||||
)
|
||||
# result.append([x for x in range(1, len(int_result)) ]) #x values
|
||||
# result.append( pyplot_colors[idx] + '^' )
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def parse_args():
|
||||
from optparse import OptionParser
|
||||
parser = OptionParser(usage='usage: %prog [options] [result_file_path]..')
|
||||
parser.add_option("-q", "--show-queries", help="Show statements along with timings", action="store_true", dest="show_queries")
|
||||
parser.add_option("-f", "--show-first-timings", help="Show only first tries timings", action="store_true", dest="show_first_timings")
|
||||
parser.add_option("-c", "--compare-mode", help="Prepare output for pyplot comparing result files.", action="store", dest="compare_mode")
|
||||
|
||||
parser = OptionParser(usage="usage: %prog [options] [result_file_path]..")
|
||||
parser.add_option(
|
||||
"-q",
|
||||
"--show-queries",
|
||||
help="Show statements along with timings",
|
||||
action="store_true",
|
||||
dest="show_queries",
|
||||
)
|
||||
parser.add_option(
|
||||
"-f",
|
||||
"--show-first-timings",
|
||||
help="Show only first tries timings",
|
||||
action="store_true",
|
||||
dest="show_first_timings",
|
||||
)
|
||||
parser.add_option(
|
||||
"-c",
|
||||
"--compare-mode",
|
||||
help="Prepare output for pyplot comparing result files.",
|
||||
action="store",
|
||||
dest="compare_mode",
|
||||
)
|
||||
(options, arguments) = parser.parse_args(sys.argv)
|
||||
if len(arguments) < 2:
|
||||
parser.print_usage()
|
||||
sys.exit(1)
|
||||
return ( options, arguments )
|
||||
return (options, arguments)
|
||||
|
||||
|
||||
def gen_pyplot_code(options, arguments):
|
||||
result = ''
|
||||
result = ""
|
||||
data_sets = compare_stats_files(options, arguments)
|
||||
for idx, data_set in enumerate(data_sets, start=0):
|
||||
x_values, y_values, line_style = data_set
|
||||
result += '\nplt.plot('
|
||||
result += '%s, %s, \'%s\'' % ( x_values, y_values, line_style )
|
||||
result += ', label=\'%s try\')' % idx
|
||||
print('import matplotlib.pyplot as plt')
|
||||
result += "\nplt.plot("
|
||||
result += "%s, %s, '%s'" % (x_values, y_values, line_style)
|
||||
result += ", label='%s try')" % idx
|
||||
print("import matplotlib.pyplot as plt")
|
||||
print(result)
|
||||
print( 'plt.xlabel(\'Try number\')' )
|
||||
print( 'plt.ylabel(\'Timing\')' )
|
||||
print( 'plt.title(\'Benchmark query timings\')' )
|
||||
print('plt.legend()')
|
||||
print('plt.show()')
|
||||
print("plt.xlabel('Try number')")
|
||||
print("plt.ylabel('Timing')")
|
||||
print("plt.title('Benchmark query timings')")
|
||||
print("plt.legend()")
|
||||
print("plt.show()")
|
||||
|
||||
|
||||
def gen_html_json(options, arguments):
|
||||
tuples = read_stats_file(options, arguments[1])
|
||||
print('{')
|
||||
print("{")
|
||||
print('"system: GreenPlum(x2),')
|
||||
print(('"version": "%s",' % '4.3.9.1'))
|
||||
print(('"version": "%s",' % "4.3.9.1"))
|
||||
print('"data_size": 10000000,')
|
||||
print('"time": "",')
|
||||
print('"comments": "",')
|
||||
print('"result":')
|
||||
print('[')
|
||||
print("[")
|
||||
for s in tuples:
|
||||
print(s)
|
||||
print(']')
|
||||
print('}')
|
||||
print("]")
|
||||
print("}")
|
||||
|
||||
|
||||
def main():
|
||||
( options, arguments ) = parse_args()
|
||||
(options, arguments) = parse_args()
|
||||
if len(arguments) > 2:
|
||||
gen_pyplot_code(options, arguments)
|
||||
else:
|
||||
gen_html_json(options, arguments)
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
@ -1,15 +1,14 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
BINARY_PATH=$1
|
||||
BINARY_NAME=$(basename $BINARY_PATH)
|
||||
BINARY_NAME=$(basename "$BINARY_PATH")
|
||||
DESTINATION_STRIPPED_DIR=$2
|
||||
OBJCOPY_PATH=${3:objcopy}
|
||||
READELF_PATH=${4:readelf}
|
||||
|
||||
BUILD_ID=$($READELF_PATH -n $1 | sed -n '/Build ID/ { s/.*: //p; q; }')
|
||||
BUILD_ID=$($READELF_PATH -n "$1" | sed -n '/Build ID/ { s/.*: //p; q; }')
|
||||
BUILD_ID_PREFIX=${BUILD_ID:0:2}
|
||||
BUILD_ID_SUFFIX=${BUILD_ID:2}
|
||||
TEMP_BINARY_PATH="${BINARY_PATH}_temp"
|
||||
|
||||
DESTINATION_DEBUG_INFO_DIR="$DESTINATION_STRIPPED_DIR/lib/debug/.build-id"
|
||||
DESTINATION_STRIP_BINARY_DIR="$DESTINATION_STRIPPED_DIR/bin"
|
||||
@ -17,9 +16,13 @@ DESTINATION_STRIP_BINARY_DIR="$DESTINATION_STRIPPED_DIR/bin"
|
||||
mkdir -p "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX"
|
||||
mkdir -p "$DESTINATION_STRIP_BINARY_DIR"
|
||||
|
||||
$OBJCOPY_PATH --only-keep-debug "$BINARY_PATH" "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
|
||||
|
||||
touch "$TEMP_BINARY_PATH"
|
||||
$OBJCOPY_PATH --add-gnu-debuglink "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" "$BINARY_PATH" "$TEMP_BINARY_PATH"
|
||||
$OBJCOPY_PATH --strip-all "$TEMP_BINARY_PATH" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"
|
||||
rm -f "$TEMP_BINARY_PATH"
|
||||
cp "$BINARY_PATH" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"
|
||||
|
||||
$OBJCOPY_PATH --only-keep-debug --compress-debug-sections "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME" "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
|
||||
chmod 0644 "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
|
||||
chown 0:0 "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
|
||||
|
||||
strip --remove-section=.comment --remove-section=.note "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"
|
||||
|
||||
$OBJCOPY_PATH --add-gnu-debuglink "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"
|
||||
|
@ -95,6 +95,14 @@ RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \
|
||||
&& apt-get install gcc-11 g++-11 --yes \
|
||||
&& apt-get clean
|
||||
|
||||
# Architecture of the image when BuildKit/buildx is used
|
||||
ARG TARGETARCH
|
||||
ARG NFPM_VERSION=2.15.0
|
||||
|
||||
RUN arch=${TARGETARCH:-amd64} \
|
||||
&& curl -Lo /tmp/nfpm.deb "https://github.com/goreleaser/nfpm/releases/download/v${NFPM_VERSION}/nfpm_${arch}.deb" \
|
||||
&& dpkg -i /tmp/nfpm.deb \
|
||||
&& rm /tmp/nfpm.deb
|
||||
|
||||
COPY build.sh /
|
||||
CMD ["bash", "-c", "/build.sh 2>&1 | ts"]
|
||||
CMD ["bash", "-c", "/build.sh 2>&1"]
|
||||
|
@ -1,7 +1,13 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
exec &> >(ts)
|
||||
set -x -e
|
||||
|
||||
cache_status () {
|
||||
ccache --show-config ||:
|
||||
ccache --show-stats ||:
|
||||
}
|
||||
|
||||
mkdir -p build/cmake/toolchain/darwin-x86_64
|
||||
tar xJf MacOSX11.0.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1
|
||||
ln -sf darwin-x86_64 build/cmake/toolchain/darwin-aarch64
|
||||
@ -19,15 +25,23 @@ read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}"
|
||||
env
|
||||
cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" ..
|
||||
|
||||
ccache --show-config ||:
|
||||
ccache --show-stats ||:
|
||||
cache_status
|
||||
# clear cache stats
|
||||
ccache --zero-stats ||:
|
||||
|
||||
# shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
|
||||
# No quotes because I want it to expand to nothing if empty.
|
||||
# shellcheck disable=SC2086
|
||||
ninja $NINJA_FLAGS clickhouse-bundle
|
||||
|
||||
ccache --show-config ||:
|
||||
ccache --show-stats ||:
|
||||
cache_status
|
||||
|
||||
if [ -n "$MAKE_DEB" ]; then
|
||||
rm -rf /build/packages/root
|
||||
# No quotes because I want it to expand to nothing if empty.
|
||||
# shellcheck disable=SC2086
|
||||
DESTDIR=/build/packages/root ninja $NINJA_FLAGS install
|
||||
bash -x /build/packages/build
|
||||
fi
|
||||
|
||||
mv ./programs/clickhouse* /output
|
||||
mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds
|
||||
@ -84,8 +98,7 @@ fi
|
||||
# ../docker/packager/other/fuzzer.sh
|
||||
# fi
|
||||
|
||||
ccache --show-config ||:
|
||||
ccache --show-stats ||:
|
||||
cache_status
|
||||
|
||||
if [ "${CCACHE_DEBUG:-}" == "1" ]
|
||||
then
|
||||
|
@ -1,5 +1,5 @@
|
||||
#!/usr/bin/env python3
|
||||
#-*- coding: utf-8 -*-
|
||||
# -*- coding: utf-8 -*-
|
||||
import subprocess
|
||||
import os
|
||||
import argparse
|
||||
@ -8,36 +8,39 @@ import sys
|
||||
|
||||
SCRIPT_PATH = os.path.realpath(__file__)
|
||||
|
||||
IMAGE_MAP = {
|
||||
"deb": "clickhouse/deb-builder",
|
||||
"binary": "clickhouse/binary-builder",
|
||||
}
|
||||
|
||||
def check_image_exists_locally(image_name):
|
||||
try:
|
||||
output = subprocess.check_output("docker images -q {} 2> /dev/null".format(image_name), shell=True)
|
||||
output = subprocess.check_output(
|
||||
f"docker images -q {image_name} 2> /dev/null", shell=True
|
||||
)
|
||||
return output != ""
|
||||
except subprocess.CalledProcessError as ex:
|
||||
except subprocess.CalledProcessError:
|
||||
return False
|
||||
|
||||
|
||||
def pull_image(image_name):
|
||||
try:
|
||||
subprocess.check_call("docker pull {}".format(image_name), shell=True)
|
||||
subprocess.check_call(f"docker pull {image_name}", shell=True)
|
||||
return True
|
||||
except subprocess.CalledProcessError as ex:
|
||||
logging.info("Cannot pull image {}".format(image_name))
|
||||
except subprocess.CalledProcessError:
|
||||
logging.info(f"Cannot pull image {image_name}".format())
|
||||
return False
|
||||
|
||||
|
||||
def build_image(image_name, filepath):
|
||||
context = os.path.dirname(filepath)
|
||||
build_cmd = "docker build --network=host -t {} -f {} {}".format(image_name, filepath, context)
|
||||
logging.info("Will build image with cmd: '{}'".format(build_cmd))
|
||||
build_cmd = f"docker build --network=host -t {image_name} -f {filepath} {context}"
|
||||
logging.info("Will build image with cmd: '%s'", build_cmd)
|
||||
subprocess.check_call(
|
||||
build_cmd,
|
||||
shell=True,
|
||||
)
|
||||
|
||||
def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache_dir, docker_image_version):
|
||||
|
||||
def run_docker_image_with_env(
|
||||
image_name, output, env_variables, ch_root, ccache_dir, docker_image_version
|
||||
):
|
||||
env_part = " -e ".join(env_variables)
|
||||
if env_part:
|
||||
env_part = " -e " + env_part
|
||||
@ -47,28 +50,52 @@ def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache
|
||||
else:
|
||||
interactive = ""
|
||||
|
||||
cmd = "docker run --network=host --rm --volume={output_path}:/output --volume={ch_root}:/build --volume={ccache_dir}:/ccache {env} {interactive} {img_name}".format(
|
||||
output_path=output,
|
||||
ch_root=ch_root,
|
||||
ccache_dir=ccache_dir,
|
||||
env=env_part,
|
||||
img_name=image_name + ":" + docker_image_version,
|
||||
interactive=interactive
|
||||
cmd = (
|
||||
f"docker run --network=host --rm --volume={output}:/output "
|
||||
f"--volume={ch_root}:/build --volume={ccache_dir}:/ccache {env_part} "
|
||||
f"{interactive} {image_name}:{docker_image_version}"
|
||||
)
|
||||
|
||||
logging.info("Will build ClickHouse pkg with cmd: '{}'".format(cmd))
|
||||
logging.info("Will build ClickHouse pkg with cmd: '%s'", cmd)
|
||||
|
||||
subprocess.check_call(cmd, shell=True)
|
||||
|
||||
def parse_env_variables(build_type, compiler, sanitizer, package_type, image_type, cache, distcc_hosts, split_binary, clang_tidy, version, author, official, alien_pkgs, with_coverage, with_binaries):
|
||||
|
||||
def is_release_build(build_type, package_type, sanitizer, split_binary):
|
||||
return (
|
||||
build_type == ""
|
||||
and package_type == "deb"
|
||||
and sanitizer == ""
|
||||
and not split_binary
|
||||
)
|
||||
|
||||
|
||||
def parse_env_variables(
|
||||
build_type,
|
||||
compiler,
|
||||
sanitizer,
|
||||
package_type,
|
||||
image_type,
|
||||
cache,
|
||||
distcc_hosts,
|
||||
split_binary,
|
||||
clang_tidy,
|
||||
version,
|
||||
author,
|
||||
official,
|
||||
additional_pkgs,
|
||||
with_coverage,
|
||||
with_binaries,
|
||||
):
|
||||
DARWIN_SUFFIX = "-darwin"
|
||||
DARWIN_ARM_SUFFIX = "-darwin-aarch64"
|
||||
ARM_SUFFIX = "-aarch64"
|
||||
FREEBSD_SUFFIX = "-freebsd"
|
||||
PPC_SUFFIX = '-ppc64le'
|
||||
PPC_SUFFIX = "-ppc64le"
|
||||
|
||||
result = []
|
||||
cmake_flags = ['$CMAKE_FLAGS']
|
||||
result.append("OUTPUT_DIR=/output")
|
||||
cmake_flags = ["$CMAKE_FLAGS"]
|
||||
|
||||
is_cross_darwin = compiler.endswith(DARWIN_SUFFIX)
|
||||
is_cross_darwin_arm = compiler.endswith(DARWIN_ARM_SUFFIX)
|
||||
@ -77,46 +104,72 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
|
||||
is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX)
|
||||
|
||||
if is_cross_darwin:
|
||||
cc = compiler[:-len(DARWIN_SUFFIX)]
|
||||
cc = compiler[: -len(DARWIN_SUFFIX)]
|
||||
cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/x86_64-apple-darwin-ar")
|
||||
cmake_flags.append("-DCMAKE_INSTALL_NAME_TOOL=/cctools/bin/x86_64-apple-darwin-install_name_tool")
|
||||
cmake_flags.append("-DCMAKE_RANLIB:FILEPATH=/cctools/bin/x86_64-apple-darwin-ranlib")
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_INSTALL_NAME_TOOL=/cctools/bin/"
|
||||
"x86_64-apple-darwin-install_name_tool"
|
||||
)
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_RANLIB:FILEPATH=/cctools/bin/x86_64-apple-darwin-ranlib"
|
||||
)
|
||||
cmake_flags.append("-DLINKER_NAME=/cctools/bin/x86_64-apple-darwin-ld")
|
||||
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake")
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake"
|
||||
)
|
||||
elif is_cross_darwin_arm:
|
||||
cc = compiler[:-len(DARWIN_ARM_SUFFIX)]
|
||||
cc = compiler[: -len(DARWIN_ARM_SUFFIX)]
|
||||
cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/aarch64-apple-darwin-ar")
|
||||
cmake_flags.append("-DCMAKE_INSTALL_NAME_TOOL=/cctools/bin/aarch64-apple-darwin-install_name_tool")
|
||||
cmake_flags.append("-DCMAKE_RANLIB:FILEPATH=/cctools/bin/aarch64-apple-darwin-ranlib")
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_INSTALL_NAME_TOOL=/cctools/bin/"
|
||||
"aarch64-apple-darwin-install_name_tool"
|
||||
)
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_RANLIB:FILEPATH=/cctools/bin/aarch64-apple-darwin-ranlib"
|
||||
)
|
||||
cmake_flags.append("-DLINKER_NAME=/cctools/bin/aarch64-apple-darwin-ld")
|
||||
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-aarch64.cmake")
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-aarch64.cmake"
|
||||
)
|
||||
elif is_cross_arm:
|
||||
cc = compiler[:-len(ARM_SUFFIX)]
|
||||
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake")
|
||||
result.append("DEB_ARCH_FLAG=-aarm64")
|
||||
cc = compiler[: -len(ARM_SUFFIX)]
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake"
|
||||
)
|
||||
result.append("DEB_ARCH=arm64")
|
||||
elif is_cross_freebsd:
|
||||
cc = compiler[:-len(FREEBSD_SUFFIX)]
|
||||
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/freebsd/toolchain-x86_64.cmake")
|
||||
cc = compiler[: -len(FREEBSD_SUFFIX)]
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/freebsd/toolchain-x86_64.cmake"
|
||||
)
|
||||
elif is_cross_ppc:
|
||||
cc = compiler[:-len(PPC_SUFFIX)]
|
||||
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake")
|
||||
cc = compiler[: -len(PPC_SUFFIX)]
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake"
|
||||
)
|
||||
else:
|
||||
cc = compiler
|
||||
result.append("DEB_ARCH_FLAG=-aamd64")
|
||||
result.append("DEB_ARCH=amd64")
|
||||
|
||||
cxx = cc.replace('gcc', 'g++').replace('clang', 'clang++')
|
||||
cxx = cc.replace("gcc", "g++").replace("clang", "clang++")
|
||||
|
||||
if image_type == "deb":
|
||||
result.append("DEB_CC={}".format(cc))
|
||||
result.append("DEB_CXX={}".format(cxx))
|
||||
# For building fuzzers
|
||||
result.append("CC={}".format(cc))
|
||||
result.append("CXX={}".format(cxx))
|
||||
elif image_type == "binary":
|
||||
result.append("CC={}".format(cc))
|
||||
result.append("CXX={}".format(cxx))
|
||||
cmake_flags.append('-DCMAKE_C_COMPILER=`which {}`'.format(cc))
|
||||
cmake_flags.append('-DCMAKE_CXX_COMPILER=`which {}`'.format(cxx))
|
||||
result.append("MAKE_DEB=true")
|
||||
cmake_flags.append("-DENABLE_TESTS=0")
|
||||
cmake_flags.append("-DENABLE_UTILS=0")
|
||||
cmake_flags.append("-DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON")
|
||||
cmake_flags.append("-DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=ON")
|
||||
cmake_flags.append("-DCMAKE_AUTOGEN_VERBOSE=ON")
|
||||
cmake_flags.append("-DCMAKE_INSTALL_PREFIX=/usr")
|
||||
cmake_flags.append("-DCMAKE_INSTALL_SYSCONFDIR=/etc")
|
||||
cmake_flags.append("-DCMAKE_INSTALL_LOCALSTATEDIR=/var")
|
||||
if is_release_build(build_type, package_type, sanitizer, split_binary):
|
||||
cmake_flags.append("-DINSTALL_STRIPPED_BINARIES=ON")
|
||||
|
||||
result.append(f"CC={cc}")
|
||||
result.append(f"CXX={cxx}")
|
||||
cmake_flags.append(f"-DCMAKE_C_COMPILER={cc}")
|
||||
cmake_flags.append(f"-DCMAKE_CXX_COMPILER={cxx}")
|
||||
|
||||
# Create combined output archive for split build and for performance tests.
|
||||
if package_type == "performance":
|
||||
@ -126,12 +179,14 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
|
||||
result.append("COMBINED_OUTPUT=shared_build")
|
||||
|
||||
if sanitizer:
|
||||
result.append("SANITIZER={}".format(sanitizer))
|
||||
result.append(f"SANITIZER={sanitizer}")
|
||||
if build_type:
|
||||
result.append("BUILD_TYPE={}".format(build_type))
|
||||
result.append(f"BUILD_TYPE={build_type.capitalize()}")
|
||||
else:
|
||||
result.append("BUILD_TYPE=None")
|
||||
|
||||
if cache == 'distcc':
|
||||
result.append("CCACHE_PREFIX={}".format(cache))
|
||||
if cache == "distcc":
|
||||
result.append(f"CCACHE_PREFIX={cache}")
|
||||
|
||||
if cache:
|
||||
result.append("CCACHE_DIR=/ccache")
|
||||
@ -142,109 +197,188 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
|
||||
# result.append("CCACHE_UMASK=777")
|
||||
|
||||
if distcc_hosts:
|
||||
hosts_with_params = ["{}/24,lzo".format(host) for host in distcc_hosts] + ["localhost/`nproc`"]
|
||||
result.append('DISTCC_HOSTS="{}"'.format(" ".join(hosts_with_params)))
|
||||
hosts_with_params = [f"{host}/24,lzo" for host in distcc_hosts] + [
|
||||
"localhost/`nproc`"
|
||||
]
|
||||
result.append('DISTCC_HOSTS="' + " ".join(hosts_with_params) + '"')
|
||||
elif cache == "distcc":
|
||||
result.append('DISTCC_HOSTS="{}"'.format("localhost/`nproc`"))
|
||||
result.append('DISTCC_HOSTS="localhost/`nproc`"')
|
||||
|
||||
if alien_pkgs:
|
||||
result.append("ALIEN_PKGS='" + ' '.join(['--' + pkg for pkg in alien_pkgs]) + "'")
|
||||
if additional_pkgs:
|
||||
result.append("MAKE_APK=true")
|
||||
result.append("MAKE_RPM=true")
|
||||
result.append("MAKE_TGZ=true")
|
||||
|
||||
if with_binaries == "programs":
|
||||
result.append('BINARY_OUTPUT=programs')
|
||||
result.append("BINARY_OUTPUT=programs")
|
||||
elif with_binaries == "tests":
|
||||
result.append('ENABLE_TESTS=1')
|
||||
result.append('BINARY_OUTPUT=tests')
|
||||
cmake_flags.append('-DENABLE_TESTS=1')
|
||||
result.append("ENABLE_TESTS=1")
|
||||
result.append("BINARY_OUTPUT=tests")
|
||||
cmake_flags.append("-DENABLE_TESTS=1")
|
||||
|
||||
if split_binary:
|
||||
cmake_flags.append('-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1')
|
||||
cmake_flags.append(
|
||||
"-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 "
|
||||
"-DCLICKHOUSE_SPLIT_BINARY=1"
|
||||
)
|
||||
# We can't always build utils because it requires too much space, but
|
||||
# we have to build them at least in some way in CI. The split build is
|
||||
# probably the least heavy disk-wise.
|
||||
cmake_flags.append('-DENABLE_UTILS=1')
|
||||
cmake_flags.append("-DENABLE_UTILS=1")
|
||||
|
||||
if clang_tidy:
|
||||
cmake_flags.append('-DENABLE_CLANG_TIDY=1')
|
||||
cmake_flags.append('-DENABLE_UTILS=1')
|
||||
cmake_flags.append('-DENABLE_TESTS=1')
|
||||
cmake_flags.append('-DENABLE_EXAMPLES=1')
|
||||
cmake_flags.append("-DENABLE_CLANG_TIDY=1")
|
||||
cmake_flags.append("-DENABLE_UTILS=1")
|
||||
cmake_flags.append("-DENABLE_TESTS=1")
|
||||
cmake_flags.append("-DENABLE_EXAMPLES=1")
|
||||
# Don't stop on first error to find more clang-tidy errors in one run.
|
||||
result.append('NINJA_FLAGS=-k0')
|
||||
result.append("NINJA_FLAGS=-k0")
|
||||
|
||||
if with_coverage:
|
||||
cmake_flags.append('-DWITH_COVERAGE=1')
|
||||
cmake_flags.append("-DWITH_COVERAGE=1")
|
||||
|
||||
if version:
|
||||
result.append("VERSION_STRING='{}'".format(version))
|
||||
result.append(f"VERSION_STRING='{version}'")
|
||||
|
||||
if author:
|
||||
result.append("AUTHOR='{}'".format(author))
|
||||
result.append(f"AUTHOR='{author}'")
|
||||
|
||||
if official:
|
||||
cmake_flags.append('-DYANDEX_OFFICIAL_BUILD=1')
|
||||
cmake_flags.append("-DYANDEX_OFFICIAL_BUILD=1")
|
||||
|
||||
result.append('CMAKE_FLAGS="' + ' '.join(cmake_flags) + '"')
|
||||
result.append('CMAKE_FLAGS="' + " ".join(cmake_flags) + '"')
|
||||
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
parser = argparse.ArgumentParser(description="ClickHouse building script using prebuilt Docker image")
|
||||
# 'performance' creates a combined .tgz with server and configs to be used for performance test.
|
||||
parser.add_argument("--package-type", choices=['deb', 'binary', 'performance'], required=True)
|
||||
parser.add_argument("--clickhouse-repo-path", default=os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir))
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
description="ClickHouse building script using prebuilt Docker image",
|
||||
)
|
||||
# 'performance' creates a combined .tgz with server
|
||||
# and configs to be used for performance test.
|
||||
parser.add_argument(
|
||||
"--package-type",
|
||||
choices=("deb", "binary", "performance"),
|
||||
required=True,
|
||||
help="a build type",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--clickhouse-repo-path",
|
||||
default=os.path.join(
|
||||
os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir
|
||||
),
|
||||
help="ClickHouse git repository",
|
||||
)
|
||||
parser.add_argument("--output-dir", required=True)
|
||||
parser.add_argument("--build-type", choices=("debug", ""), default="")
|
||||
parser.add_argument("--compiler", choices=("clang-11", "clang-11-darwin", "clang-11-darwin-aarch64", "clang-11-aarch64",
|
||||
"clang-12", "clang-12-darwin", "clang-12-darwin-aarch64", "clang-12-aarch64",
|
||||
"clang-13", "clang-13-darwin", "clang-13-darwin-aarch64", "clang-13-aarch64", "clang-13-ppc64le",
|
||||
"clang-11-freebsd", "clang-12-freebsd", "clang-13-freebsd", "gcc-11"), default="clang-13")
|
||||
parser.add_argument("--sanitizer", choices=("address", "thread", "memory", "undefined", ""), default="")
|
||||
parser.add_argument(
|
||||
"--compiler",
|
||||
choices=(
|
||||
"clang-11",
|
||||
"clang-11-darwin",
|
||||
"clang-11-darwin-aarch64",
|
||||
"clang-11-aarch64",
|
||||
"clang-12",
|
||||
"clang-12-darwin",
|
||||
"clang-12-darwin-aarch64",
|
||||
"clang-12-aarch64",
|
||||
"clang-13",
|
||||
"clang-13-darwin",
|
||||
"clang-13-darwin-aarch64",
|
||||
"clang-13-aarch64",
|
||||
"clang-13-ppc64le",
|
||||
"clang-11-freebsd",
|
||||
"clang-12-freebsd",
|
||||
"clang-13-freebsd",
|
||||
"gcc-11",
|
||||
),
|
||||
default="clang-13",
|
||||
help="a compiler to use",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--sanitizer",
|
||||
choices=("address", "thread", "memory", "undefined", ""),
|
||||
default="",
|
||||
)
|
||||
parser.add_argument("--split-binary", action="store_true")
|
||||
parser.add_argument("--clang-tidy", action="store_true")
|
||||
parser.add_argument("--cache", choices=("", "ccache", "distcc"), default="")
|
||||
parser.add_argument("--ccache_dir", default= os.getenv("HOME", "") + '/.ccache')
|
||||
parser.add_argument("--cache", choices=("ccache", "distcc", ""), default="")
|
||||
parser.add_argument(
|
||||
"--ccache_dir",
|
||||
default=os.getenv("HOME", "") + "/.ccache",
|
||||
help="a directory with ccache",
|
||||
)
|
||||
parser.add_argument("--distcc-hosts", nargs="+")
|
||||
parser.add_argument("--force-build-image", action="store_true")
|
||||
parser.add_argument("--version")
|
||||
parser.add_argument("--author", default="clickhouse")
|
||||
parser.add_argument("--author", default="clickhouse", help="a package author")
|
||||
parser.add_argument("--official", action="store_true")
|
||||
parser.add_argument("--alien-pkgs", nargs='+', default=[])
|
||||
parser.add_argument("--additional-pkgs", action="store_true")
|
||||
parser.add_argument("--with-coverage", action="store_true")
|
||||
parser.add_argument("--with-binaries", choices=("programs", "tests", ""), default="")
|
||||
parser.add_argument("--docker-image-version", default="latest")
|
||||
parser.add_argument(
|
||||
"--with-binaries", choices=("programs", "tests", ""), default=""
|
||||
)
|
||||
parser.add_argument(
|
||||
"--docker-image-version", default="latest", help="docker image tag to use"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
if not os.path.isabs(args.output_dir):
|
||||
args.output_dir = os.path.abspath(os.path.join(os.getcwd(), args.output_dir))
|
||||
|
||||
image_type = 'binary' if args.package_type == 'performance' else args.package_type
|
||||
image_name = IMAGE_MAP[image_type]
|
||||
image_type = "binary" if args.package_type == "performance" else args.package_type
|
||||
image_name = "clickhouse/binary-builder"
|
||||
|
||||
if not os.path.isabs(args.clickhouse_repo_path):
|
||||
ch_root = os.path.abspath(os.path.join(os.getcwd(), args.clickhouse_repo_path))
|
||||
else:
|
||||
ch_root = args.clickhouse_repo_path
|
||||
|
||||
if args.alien_pkgs and not image_type == "deb":
|
||||
raise Exception("Can add alien packages only in deb build")
|
||||
if args.additional_pkgs and image_type != "deb":
|
||||
raise Exception("Can build additional packages only in deb build")
|
||||
|
||||
if args.with_binaries != "" and not image_type == "deb":
|
||||
if args.with_binaries != "" and image_type != "deb":
|
||||
raise Exception("Can add additional binaries only in deb build")
|
||||
|
||||
if args.with_binaries != "" and image_type == "deb":
|
||||
logging.info("Should place {} to output".format(args.with_binaries))
|
||||
logging.info("Should place %s to output", args.with_binaries)
|
||||
|
||||
dockerfile = os.path.join(ch_root, "docker/packager", image_type, "Dockerfile")
|
||||
image_with_version = image_name + ":" + args.docker_image_version
|
||||
if image_type != "freebsd" and not check_image_exists_locally(image_name) or args.force_build_image:
|
||||
if (
|
||||
image_type != "freebsd"
|
||||
and not check_image_exists_locally(image_name)
|
||||
or args.force_build_image
|
||||
):
|
||||
if not pull_image(image_with_version) or args.force_build_image:
|
||||
build_image(image_with_version, dockerfile)
|
||||
env_prepared = parse_env_variables(
|
||||
args.build_type, args.compiler, args.sanitizer, args.package_type, image_type,
|
||||
args.cache, args.distcc_hosts, args.split_binary, args.clang_tidy,
|
||||
args.version, args.author, args.official, args.alien_pkgs, args.with_coverage, args.with_binaries)
|
||||
args.build_type,
|
||||
args.compiler,
|
||||
args.sanitizer,
|
||||
args.package_type,
|
||||
image_type,
|
||||
args.cache,
|
||||
args.distcc_hosts,
|
||||
args.split_binary,
|
||||
args.clang_tidy,
|
||||
args.version,
|
||||
args.author,
|
||||
args.official,
|
||||
args.additional_pkgs,
|
||||
args.with_coverage,
|
||||
args.with_binaries,
|
||||
)
|
||||
|
||||
run_docker_image_with_env(image_name, args.output_dir, env_prepared, ch_root, args.ccache_dir, args.docker_image_version)
|
||||
logging.info("Output placed into {}".format(args.output_dir))
|
||||
run_docker_image_with_env(
|
||||
image_name,
|
||||
args.output_dir,
|
||||
env_prepared,
|
||||
ch_root,
|
||||
args.ccache_dir,
|
||||
args.docker_image_version,
|
||||
)
|
||||
logging.info("Output placed into %s", args.output_dir)
|
||||
|
@ -11,7 +11,7 @@ def removesuffix(text, suffix):
|
||||
https://www.python.org/dev/peps/pep-0616/
|
||||
"""
|
||||
if suffix and text.endswith(suffix):
|
||||
return text[:-len(suffix)]
|
||||
return text[: -len(suffix)]
|
||||
else:
|
||||
return text[:]
|
||||
|
||||
|
@ -3,55 +3,55 @@ import subprocess
|
||||
import datetime
|
||||
from flask import Flask, flash, request, redirect, url_for
|
||||
|
||||
|
||||
def run_command(command, wait=False):
|
||||
print("{} - execute shell command:{}".format(datetime.datetime.now(), command))
|
||||
lines = []
|
||||
p = subprocess.Popen(command,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
shell=True)
|
||||
p = subprocess.Popen(
|
||||
command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True
|
||||
)
|
||||
if wait:
|
||||
for l in iter(p.stdout.readline, b''):
|
||||
for l in iter(p.stdout.readline, b""):
|
||||
lines.append(l)
|
||||
p.poll()
|
||||
return (lines, p.returncode)
|
||||
else:
|
||||
return(iter(p.stdout.readline, b''), 0)
|
||||
return (iter(p.stdout.readline, b""), 0)
|
||||
|
||||
|
||||
UPLOAD_FOLDER = './'
|
||||
ALLOWED_EXTENSIONS = {'txt', 'sh'}
|
||||
UPLOAD_FOLDER = "./"
|
||||
ALLOWED_EXTENSIONS = {"txt", "sh"}
|
||||
app = Flask(__name__)
|
||||
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
|
||||
app.config["UPLOAD_FOLDER"] = UPLOAD_FOLDER
|
||||
|
||||
@app.route('/')
|
||||
|
||||
@app.route("/")
|
||||
def hello_world():
|
||||
return 'Hello World'
|
||||
return "Hello World"
|
||||
|
||||
|
||||
def allowed_file(filename):
|
||||
return '.' in filename and \
|
||||
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
|
||||
return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
|
||||
|
||||
|
||||
@app.route('/upload', methods=['GET', 'POST'])
|
||||
@app.route("/upload", methods=["GET", "POST"])
|
||||
def upload_file():
|
||||
if request.method == 'POST':
|
||||
if request.method == "POST":
|
||||
# check if the post request has the file part
|
||||
if 'file' not in request.files:
|
||||
flash('No file part')
|
||||
if "file" not in request.files:
|
||||
flash("No file part")
|
||||
return redirect(request.url)
|
||||
file = request.files['file']
|
||||
file = request.files["file"]
|
||||
# If the user does not select a file, the browser submits an
|
||||
# empty file without a filename.
|
||||
if file.filename == '':
|
||||
flash('No selected file')
|
||||
if file.filename == "":
|
||||
flash("No selected file")
|
||||
return redirect(request.url)
|
||||
if file and allowed_file(file.filename):
|
||||
filename = file.filename
|
||||
file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
|
||||
return redirect(url_for('upload_file', name=filename))
|
||||
return '''
|
||||
file.save(os.path.join(app.config["UPLOAD_FOLDER"], filename))
|
||||
return redirect(url_for("upload_file", name=filename))
|
||||
return """
|
||||
<!doctype html>
|
||||
<title>Upload new File</title>
|
||||
<h1>Upload new File</h1>
|
||||
@ -59,12 +59,15 @@ def upload_file():
|
||||
<input type=file name=file>
|
||||
<input type=submit value=Upload>
|
||||
</form>
|
||||
'''
|
||||
@app.route('/run', methods=['GET', 'POST'])
|
||||
"""
|
||||
|
||||
|
||||
@app.route("/run", methods=["GET", "POST"])
|
||||
def parse_request():
|
||||
data = request.data # data is empty
|
||||
run_command(data, wait=True)
|
||||
return 'Ok'
|
||||
return "Ok"
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(port=5011)
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(port=5011)
|
||||
|
@ -19,58 +19,126 @@ import xml.etree.ElementTree as et
|
||||
from threading import Thread
|
||||
from scipy import stats
|
||||
|
||||
logging.basicConfig(format='%(asctime)s: %(levelname)s: %(module)s: %(message)s', level='WARNING')
|
||||
logging.basicConfig(
|
||||
format="%(asctime)s: %(levelname)s: %(module)s: %(message)s", level="WARNING"
|
||||
)
|
||||
|
||||
total_start_seconds = time.perf_counter()
|
||||
stage_start_seconds = total_start_seconds
|
||||
|
||||
|
||||
def reportStageEnd(stage):
|
||||
global stage_start_seconds, total_start_seconds
|
||||
|
||||
current = time.perf_counter()
|
||||
print(f'stage\t{stage}\t{current - stage_start_seconds:.3f}\t{current - total_start_seconds:.3f}')
|
||||
print(
|
||||
f"stage\t{stage}\t{current - stage_start_seconds:.3f}\t{current - total_start_seconds:.3f}"
|
||||
)
|
||||
stage_start_seconds = current
|
||||
|
||||
|
||||
def tsv_escape(s):
|
||||
return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','')
|
||||
return (
|
||||
s.replace("\\", "\\\\")
|
||||
.replace("\t", "\\t")
|
||||
.replace("\n", "\\n")
|
||||
.replace("\r", "")
|
||||
)
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(description='Run performance test.')
|
||||
parser = argparse.ArgumentParser(description="Run performance test.")
|
||||
# Explicitly decode files as UTF-8 because sometimes we have Russian characters in queries, and LANG=C is set.
|
||||
parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs=1, help='test description file')
|
||||
parser.add_argument('--host', nargs='*', default=['localhost'], help="Space-separated list of server hostname(s). Corresponds to '--port' options.")
|
||||
parser.add_argument('--port', nargs='*', default=[9000], help="Space-separated list of server port(s). Corresponds to '--host' options.")
|
||||
parser.add_argument('--runs', type=int, default=1, help='Number of query runs per server.')
|
||||
parser.add_argument('--max-queries', type=int, default=None, help='Test no more than this number of queries, chosen at random.')
|
||||
parser.add_argument('--queries-to-run', nargs='*', type=int, default=None, help='Space-separated list of indexes of queries to test.')
|
||||
parser.add_argument('--max-query-seconds', type=int, default=15, help='For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.')
|
||||
parser.add_argument('--prewarm-max-query-seconds', type=int, default=180, help='For how many seconds at most a prewarm (cold storage) query is allowed to run. The script finishes with error if this time is exceeded.')
|
||||
parser.add_argument('--profile-seconds', type=int, default=0, help='For how many seconds to profile a query for which the performance has changed.')
|
||||
parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.')
|
||||
parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.')
|
||||
parser.add_argument('--print-settings', action='store_true', help='Print test settings and exit.')
|
||||
parser.add_argument('--keep-created-tables', action='store_true', help="Don't drop the created tables after the test.")
|
||||
parser.add_argument('--use-existing-tables', action='store_true', help="Don't create or drop the tables, use the existing ones instead.")
|
||||
parser.add_argument(
|
||||
"file",
|
||||
metavar="FILE",
|
||||
type=argparse.FileType("r", encoding="utf-8"),
|
||||
nargs=1,
|
||||
help="test description file",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--host",
|
||||
nargs="*",
|
||||
default=["localhost"],
|
||||
help="Space-separated list of server hostname(s). Corresponds to '--port' options.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--port",
|
||||
nargs="*",
|
||||
default=[9000],
|
||||
help="Space-separated list of server port(s). Corresponds to '--host' options.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--runs", type=int, default=1, help="Number of query runs per server."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-queries",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Test no more than this number of queries, chosen at random.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--queries-to-run",
|
||||
nargs="*",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Space-separated list of indexes of queries to test.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-query-seconds",
|
||||
type=int,
|
||||
default=15,
|
||||
help="For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--prewarm-max-query-seconds",
|
||||
type=int,
|
||||
default=180,
|
||||
help="For how many seconds at most a prewarm (cold storage) query is allowed to run. The script finishes with error if this time is exceeded.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--profile-seconds",
|
||||
type=int,
|
||||
default=0,
|
||||
help="For how many seconds to profile a query for which the performance has changed.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--long", action="store_true", help="Do not skip the tests tagged as long."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--print-queries", action="store_true", help="Print test queries and exit."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--print-settings", action="store_true", help="Print test settings and exit."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--keep-created-tables",
|
||||
action="store_true",
|
||||
help="Don't drop the created tables after the test.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--use-existing-tables",
|
||||
action="store_true",
|
||||
help="Don't create or drop the tables, use the existing ones instead.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
reportStageEnd('start')
|
||||
reportStageEnd("start")
|
||||
|
||||
test_name = os.path.splitext(os.path.basename(args.file[0].name))[0]
|
||||
|
||||
tree = et.parse(args.file[0])
|
||||
root = tree.getroot()
|
||||
|
||||
reportStageEnd('parse')
|
||||
reportStageEnd("parse")
|
||||
|
||||
# Process query parameters
|
||||
subst_elems = root.findall('substitutions/substitution')
|
||||
available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... }
|
||||
subst_elems = root.findall("substitutions/substitution")
|
||||
available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... }
|
||||
for e in subst_elems:
|
||||
name = e.find('name').text
|
||||
values = [v.text for v in e.findall('values/value')]
|
||||
name = e.find("name").text
|
||||
values = [v.text for v in e.findall("values/value")]
|
||||
if not values:
|
||||
raise Exception(f'No values given for substitution {{{name}}}')
|
||||
raise Exception(f"No values given for substitution {{{name}}}")
|
||||
|
||||
available_parameters[name] = values
|
||||
|
||||
@ -78,7 +146,7 @@ for e in subst_elems:
|
||||
# parameters. The set of parameters is determined based on the first list.
|
||||
# Note: keep the order of queries -- sometimes we have DROP IF EXISTS
|
||||
# followed by CREATE in create queries section, so the order matters.
|
||||
def substitute_parameters(query_templates, other_templates = []):
|
||||
def substitute_parameters(query_templates, other_templates=[]):
|
||||
query_results = []
|
||||
other_results = [[]] * (len(other_templates))
|
||||
for i, q in enumerate(query_templates):
|
||||
@ -103,17 +171,21 @@ def substitute_parameters(query_templates, other_templates = []):
|
||||
# and reporting the queries marked as short.
|
||||
test_queries = []
|
||||
is_short = []
|
||||
for e in root.findall('query'):
|
||||
new_queries, [new_is_short] = substitute_parameters([e.text], [[e.attrib.get('short', '0')]])
|
||||
for e in root.findall("query"):
|
||||
new_queries, [new_is_short] = substitute_parameters(
|
||||
[e.text], [[e.attrib.get("short", "0")]]
|
||||
)
|
||||
test_queries += new_queries
|
||||
is_short += [eval(s) for s in new_is_short]
|
||||
|
||||
assert(len(test_queries) == len(is_short))
|
||||
assert len(test_queries) == len(is_short)
|
||||
|
||||
# If we're given a list of queries to run, check that it makes sense.
|
||||
for i in args.queries_to_run or []:
|
||||
if i < 0 or i >= len(test_queries):
|
||||
print(f'There is no query no. {i} in this test, only [{0}-{len(test_queries) - 1}] are present')
|
||||
print(
|
||||
f"There is no query no. {i} in this test, only [{0}-{len(test_queries) - 1}] are present"
|
||||
)
|
||||
exit(1)
|
||||
|
||||
# If we're only asked to print the queries, do that and exit.
|
||||
@ -125,60 +197,65 @@ if args.print_queries:
|
||||
# Print short queries
|
||||
for i, s in enumerate(is_short):
|
||||
if s:
|
||||
print(f'short\t{i}')
|
||||
print(f"short\t{i}")
|
||||
|
||||
# If we're only asked to print the settings, do that and exit. These are settings
|
||||
# for clickhouse-benchmark, so we print them as command line arguments, e.g.
|
||||
# '--max_memory_usage=10000000'.
|
||||
if args.print_settings:
|
||||
for s in root.findall('settings/*'):
|
||||
print(f'--{s.tag}={s.text}')
|
||||
for s in root.findall("settings/*"):
|
||||
print(f"--{s.tag}={s.text}")
|
||||
|
||||
exit(0)
|
||||
|
||||
# Skip long tests
|
||||
if not args.long:
|
||||
for tag in root.findall('.//tag'):
|
||||
if tag.text == 'long':
|
||||
print('skipped\tTest is tagged as long.')
|
||||
for tag in root.findall(".//tag"):
|
||||
if tag.text == "long":
|
||||
print("skipped\tTest is tagged as long.")
|
||||
sys.exit(0)
|
||||
|
||||
# Print report threshold for the test if it is set.
|
||||
ignored_relative_change = 0.05
|
||||
if 'max_ignored_relative_change' in root.attrib:
|
||||
if "max_ignored_relative_change" in root.attrib:
|
||||
ignored_relative_change = float(root.attrib["max_ignored_relative_change"])
|
||||
print(f'report-threshold\t{ignored_relative_change}')
|
||||
print(f"report-threshold\t{ignored_relative_change}")
|
||||
|
||||
reportStageEnd('before-connect')
|
||||
reportStageEnd("before-connect")
|
||||
|
||||
# Open connections
|
||||
servers = [{'host': host or args.host[0], 'port': port or args.port[0]} for (host, port) in itertools.zip_longest(args.host, args.port)]
|
||||
servers = [
|
||||
{"host": host or args.host[0], "port": port or args.port[0]}
|
||||
for (host, port) in itertools.zip_longest(args.host, args.port)
|
||||
]
|
||||
# Force settings_is_important to fail queries on unknown settings.
|
||||
all_connections = [clickhouse_driver.Client(**server, settings_is_important=True) for server in servers]
|
||||
all_connections = [
|
||||
clickhouse_driver.Client(**server, settings_is_important=True) for server in servers
|
||||
]
|
||||
|
||||
for i, s in enumerate(servers):
|
||||
print(f'server\t{i}\t{s["host"]}\t{s["port"]}')
|
||||
|
||||
reportStageEnd('connect')
|
||||
reportStageEnd("connect")
|
||||
|
||||
if not args.use_existing_tables:
|
||||
# Run drop queries, ignoring errors. Do this before all other activity,
|
||||
# because clickhouse_driver disconnects on error (this is not configurable),
|
||||
# and the new connection loses the changes in settings.
|
||||
drop_query_templates = [q.text for q in root.findall('drop_query')]
|
||||
drop_query_templates = [q.text for q in root.findall("drop_query")]
|
||||
drop_queries = substitute_parameters(drop_query_templates)
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
for q in drop_queries:
|
||||
try:
|
||||
c.execute(q)
|
||||
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
print(f"drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}")
|
||||
except:
|
||||
pass
|
||||
|
||||
reportStageEnd('drop-1')
|
||||
reportStageEnd("drop-1")
|
||||
|
||||
# Apply settings.
|
||||
settings = root.findall('settings/*')
|
||||
settings = root.findall("settings/*")
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
for s in settings:
|
||||
# requires clickhouse-driver >= 1.1.5 to accept arbitrary new settings
|
||||
@ -189,48 +266,52 @@ for conn_index, c in enumerate(all_connections):
|
||||
# the test, which is wrong.
|
||||
c.execute("select 1")
|
||||
|
||||
reportStageEnd('settings')
|
||||
reportStageEnd("settings")
|
||||
|
||||
# Check tables that should exist. If they don't exist, just skip this test.
|
||||
tables = [e.text for e in root.findall('preconditions/table_exists')]
|
||||
tables = [e.text for e in root.findall("preconditions/table_exists")]
|
||||
for t in tables:
|
||||
for c in all_connections:
|
||||
try:
|
||||
res = c.execute("select 1 from {} limit 1".format(t))
|
||||
except:
|
||||
exception_message = traceback.format_exception_only(*sys.exc_info()[:2])[-1]
|
||||
skipped_message = ' '.join(exception_message.split('\n')[:2])
|
||||
print(f'skipped\t{tsv_escape(skipped_message)}')
|
||||
skipped_message = " ".join(exception_message.split("\n")[:2])
|
||||
print(f"skipped\t{tsv_escape(skipped_message)}")
|
||||
sys.exit(0)
|
||||
|
||||
reportStageEnd('preconditions')
|
||||
reportStageEnd("preconditions")
|
||||
|
||||
if not args.use_existing_tables:
|
||||
# Run create and fill queries. We will run them simultaneously for both
|
||||
# servers, to save time. The weird XML search + filter is because we want to
|
||||
# keep the relative order of elements, and etree doesn't support the
|
||||
# appropriate xpath query.
|
||||
create_query_templates = [q.text for q in root.findall('./*')
|
||||
if q.tag in ('create_query', 'fill_query')]
|
||||
create_query_templates = [
|
||||
q.text for q in root.findall("./*") if q.tag in ("create_query", "fill_query")
|
||||
]
|
||||
create_queries = substitute_parameters(create_query_templates)
|
||||
|
||||
# Disallow temporary tables, because the clickhouse_driver reconnects on
|
||||
# errors, and temporary tables are destroyed. We want to be able to continue
|
||||
# after some errors.
|
||||
for q in create_queries:
|
||||
if re.search('create temporary table', q, flags=re.IGNORECASE):
|
||||
print(f"Temporary tables are not allowed in performance tests: '{q}'",
|
||||
file = sys.stderr)
|
||||
if re.search("create temporary table", q, flags=re.IGNORECASE):
|
||||
print(
|
||||
f"Temporary tables are not allowed in performance tests: '{q}'",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
def do_create(connection, index, queries):
|
||||
for q in queries:
|
||||
connection.execute(q)
|
||||
print(f'create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
print(f"create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}")
|
||||
|
||||
threads = [
|
||||
Thread(target = do_create, args = (connection, index, create_queries))
|
||||
for index, connection in enumerate(all_connections)]
|
||||
Thread(target=do_create, args=(connection, index, create_queries))
|
||||
for index, connection in enumerate(all_connections)
|
||||
]
|
||||
|
||||
for t in threads:
|
||||
t.start()
|
||||
@ -238,14 +319,16 @@ if not args.use_existing_tables:
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
reportStageEnd('create')
|
||||
reportStageEnd("create")
|
||||
|
||||
# By default, test all queries.
|
||||
queries_to_run = range(0, len(test_queries))
|
||||
|
||||
if args.max_queries:
|
||||
# If specified, test a limited number of queries chosen at random.
|
||||
queries_to_run = random.sample(range(0, len(test_queries)), min(len(test_queries), args.max_queries))
|
||||
queries_to_run = random.sample(
|
||||
range(0, len(test_queries)), min(len(test_queries), args.max_queries)
|
||||
)
|
||||
|
||||
if args.queries_to_run:
|
||||
# Run the specified queries.
|
||||
@ -255,16 +338,16 @@ if args.queries_to_run:
|
||||
profile_total_seconds = 0
|
||||
for query_index in queries_to_run:
|
||||
q = test_queries[query_index]
|
||||
query_prefix = f'{test_name}.query{query_index}'
|
||||
query_prefix = f"{test_name}.query{query_index}"
|
||||
|
||||
# We have some crazy long queries (about 100kB), so trim them to a sane
|
||||
# length. This means we can't use query text as an identifier and have to
|
||||
# use the test name + the test-wide query index.
|
||||
query_display_name = q
|
||||
if len(query_display_name) > 1000:
|
||||
query_display_name = f'{query_display_name[:1000]}...({query_index})'
|
||||
query_display_name = f"{query_display_name[:1000]}...({query_index})"
|
||||
|
||||
print(f'display-name\t{query_index}\t{tsv_escape(query_display_name)}')
|
||||
print(f"display-name\t{query_index}\t{tsv_escape(query_display_name)}")
|
||||
|
||||
# Prewarm: run once on both servers. Helps to bring the data into memory,
|
||||
# precompile the queries, etc.
|
||||
@ -272,10 +355,10 @@ for query_index in queries_to_run:
|
||||
# new one. We want to run them on the new server only, so that the PR author
|
||||
# can ensure that the test works properly. Remember the errors we had on
|
||||
# each server.
|
||||
query_error_on_connection = [None] * len(all_connections);
|
||||
query_error_on_connection = [None] * len(all_connections)
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
try:
|
||||
prewarm_id = f'{query_prefix}.prewarm0'
|
||||
prewarm_id = f"{query_prefix}.prewarm0"
|
||||
|
||||
try:
|
||||
# During the warmup runs, we will also:
|
||||
@ -283,25 +366,30 @@ for query_index in queries_to_run:
|
||||
# * collect profiler traces, which might be helpful for analyzing
|
||||
# test coverage. We disable profiler for normal runs because
|
||||
# it makes the results unstable.
|
||||
res = c.execute(q, query_id = prewarm_id,
|
||||
settings = {
|
||||
'max_execution_time': args.prewarm_max_query_seconds,
|
||||
'query_profiler_real_time_period_ns': 10000000,
|
||||
'memory_profiler_step': '4Mi',
|
||||
})
|
||||
res = c.execute(
|
||||
q,
|
||||
query_id=prewarm_id,
|
||||
settings={
|
||||
"max_execution_time": args.prewarm_max_query_seconds,
|
||||
"query_profiler_real_time_period_ns": 10000000,
|
||||
"memory_profiler_step": "4Mi",
|
||||
},
|
||||
)
|
||||
except clickhouse_driver.errors.Error as e:
|
||||
# Add query id to the exception to make debugging easier.
|
||||
e.args = (prewarm_id, *e.args)
|
||||
e.message = prewarm_id + ': ' + e.message
|
||||
e.message = prewarm_id + ": " + e.message
|
||||
raise
|
||||
|
||||
print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}')
|
||||
print(
|
||||
f"prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}"
|
||||
)
|
||||
except KeyboardInterrupt:
|
||||
raise
|
||||
except:
|
||||
# FIXME the driver reconnects on error and we lose settings, so this
|
||||
# might lead to further errors or unexpected behavior.
|
||||
query_error_on_connection[conn_index] = traceback.format_exc();
|
||||
query_error_on_connection[conn_index] = traceback.format_exc()
|
||||
continue
|
||||
|
||||
# Report all errors that ocurred during prewarm and decide what to do next.
|
||||
@ -311,14 +399,14 @@ for query_index in queries_to_run:
|
||||
no_errors = []
|
||||
for i, e in enumerate(query_error_on_connection):
|
||||
if e:
|
||||
print(e, file = sys.stderr)
|
||||
print(e, file=sys.stderr)
|
||||
else:
|
||||
no_errors.append(i)
|
||||
|
||||
if len(no_errors) == 0:
|
||||
continue
|
||||
elif len(no_errors) < len(all_connections):
|
||||
print(f'partial\t{query_index}\t{no_errors}')
|
||||
print(f"partial\t{query_index}\t{no_errors}")
|
||||
|
||||
this_query_connections = [all_connections[index] for index in no_errors]
|
||||
|
||||
@ -337,27 +425,34 @@ for query_index in queries_to_run:
|
||||
all_server_times.append([])
|
||||
|
||||
while True:
|
||||
run_id = f'{query_prefix}.run{run}'
|
||||
run_id = f"{query_prefix}.run{run}"
|
||||
|
||||
for conn_index, c in enumerate(this_query_connections):
|
||||
try:
|
||||
res = c.execute(q, query_id = run_id, settings = {'max_execution_time': args.max_query_seconds})
|
||||
res = c.execute(
|
||||
q,
|
||||
query_id=run_id,
|
||||
settings={"max_execution_time": args.max_query_seconds},
|
||||
)
|
||||
except clickhouse_driver.errors.Error as e:
|
||||
# Add query id to the exception to make debugging easier.
|
||||
e.args = (run_id, *e.args)
|
||||
e.message = run_id + ': ' + e.message
|
||||
e.message = run_id + ": " + e.message
|
||||
raise
|
||||
|
||||
elapsed = c.last_query.elapsed
|
||||
all_server_times[conn_index].append(elapsed)
|
||||
|
||||
server_seconds += elapsed
|
||||
print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}')
|
||||
print(f"query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}")
|
||||
|
||||
if elapsed > args.max_query_seconds:
|
||||
# Do not stop processing pathologically slow queries,
|
||||
# since this may hide errors in other queries.
|
||||
print(f'The query no. {query_index} is taking too long to run ({elapsed} s)', file=sys.stderr)
|
||||
print(
|
||||
f"The query no. {query_index} is taking too long to run ({elapsed} s)",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
# Be careful with the counter, after this line it's the next iteration
|
||||
# already.
|
||||
@ -386,7 +481,7 @@ for query_index in queries_to_run:
|
||||
break
|
||||
|
||||
client_seconds = time.perf_counter() - start_seconds
|
||||
print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}')
|
||||
print(f"client-time\t{query_index}\t{client_seconds}\t{server_seconds}")
|
||||
|
||||
# Run additional profiling queries to collect profile data, but only if test times appeared to be different.
|
||||
# We have to do it after normal runs because otherwise it will affect test statistics too much
|
||||
@ -397,13 +492,15 @@ for query_index in queries_to_run:
|
||||
# Don't fail if for some reason there are not enough measurements.
|
||||
continue
|
||||
|
||||
pvalue = stats.ttest_ind(all_server_times[0], all_server_times[1], equal_var = False).pvalue
|
||||
pvalue = stats.ttest_ind(
|
||||
all_server_times[0], all_server_times[1], equal_var=False
|
||||
).pvalue
|
||||
median = [statistics.median(t) for t in all_server_times]
|
||||
# Keep this consistent with the value used in report. Should eventually move
|
||||
# to (median[1] - median[0]) / min(median), which is compatible with "times"
|
||||
# difference we use in report (max(median) / min(median)).
|
||||
relative_diff = (median[1] - median[0]) / median[0]
|
||||
print(f'diff\t{query_index}\t{median[0]}\t{median[1]}\t{relative_diff}\t{pvalue}')
|
||||
print(f"diff\t{query_index}\t{median[0]}\t{median[1]}\t{relative_diff}\t{pvalue}")
|
||||
if abs(relative_diff) < ignored_relative_change or pvalue > 0.05:
|
||||
continue
|
||||
|
||||
@ -412,25 +509,31 @@ for query_index in queries_to_run:
|
||||
profile_start_seconds = time.perf_counter()
|
||||
run = 0
|
||||
while time.perf_counter() - profile_start_seconds < args.profile_seconds:
|
||||
run_id = f'{query_prefix}.profile{run}'
|
||||
run_id = f"{query_prefix}.profile{run}"
|
||||
|
||||
for conn_index, c in enumerate(this_query_connections):
|
||||
try:
|
||||
res = c.execute(q, query_id = run_id, settings = {'query_profiler_real_time_period_ns': 10000000})
|
||||
print(f'profile\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}')
|
||||
res = c.execute(
|
||||
q,
|
||||
query_id=run_id,
|
||||
settings={"query_profiler_real_time_period_ns": 10000000},
|
||||
)
|
||||
print(
|
||||
f"profile\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}"
|
||||
)
|
||||
except clickhouse_driver.errors.Error as e:
|
||||
# Add query id to the exception to make debugging easier.
|
||||
e.args = (run_id, *e.args)
|
||||
e.message = run_id + ': ' + e.message
|
||||
e.message = run_id + ": " + e.message
|
||||
raise
|
||||
|
||||
run += 1
|
||||
|
||||
profile_total_seconds += time.perf_counter() - profile_start_seconds
|
||||
|
||||
print(f'profile-total\t{profile_total_seconds}')
|
||||
print(f"profile-total\t{profile_total_seconds}")
|
||||
|
||||
reportStageEnd('run')
|
||||
reportStageEnd("run")
|
||||
|
||||
# Run drop queries
|
||||
if not args.keep_created_tables and not args.use_existing_tables:
|
||||
@ -438,6 +541,6 @@ if not args.keep_created_tables and not args.use_existing_tables:
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
for q in drop_queries:
|
||||
c.execute(q)
|
||||
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
print(f"drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}")
|
||||
|
||||
reportStageEnd('drop-2')
|
||||
reportStageEnd("drop-2")
|
||||
|
@ -12,9 +12,13 @@ import pprint
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
parser = argparse.ArgumentParser(description='Create performance test report')
|
||||
parser.add_argument('--report', default='main', choices=['main', 'all-queries'],
|
||||
help='Which report to build')
|
||||
parser = argparse.ArgumentParser(description="Create performance test report")
|
||||
parser.add_argument(
|
||||
"--report",
|
||||
default="main",
|
||||
choices=["main", "all-queries"],
|
||||
help="Which report to build",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
tables = []
|
||||
@ -31,8 +35,8 @@ unstable_partial_queries = 0
|
||||
# max seconds to run one query by itself, not counting preparation
|
||||
allowed_single_run_time = 2
|
||||
|
||||
color_bad='#ffb0c0'
|
||||
color_good='#b0d050'
|
||||
color_bad = "#ffb0c0"
|
||||
color_good = "#b0d050"
|
||||
|
||||
header_template = """
|
||||
<!DOCTYPE html>
|
||||
@ -151,24 +155,29 @@ tr:nth-child(odd) td {{filter: brightness(90%);}}
|
||||
table_anchor = 0
|
||||
row_anchor = 0
|
||||
|
||||
|
||||
def currentTableAnchor():
|
||||
global table_anchor
|
||||
return f'{table_anchor}'
|
||||
return f"{table_anchor}"
|
||||
|
||||
|
||||
def newTableAnchor():
|
||||
global table_anchor
|
||||
table_anchor += 1
|
||||
return currentTableAnchor()
|
||||
|
||||
|
||||
def currentRowAnchor():
|
||||
global row_anchor
|
||||
global table_anchor
|
||||
return f'{table_anchor}.{row_anchor}'
|
||||
return f"{table_anchor}.{row_anchor}"
|
||||
|
||||
|
||||
def nextRowAnchor():
|
||||
global row_anchor
|
||||
global table_anchor
|
||||
return f'{table_anchor}.{row_anchor + 1}'
|
||||
return f"{table_anchor}.{row_anchor + 1}"
|
||||
|
||||
|
||||
def advanceRowAnchor():
|
||||
global row_anchor
|
||||
@ -178,43 +187,58 @@ def advanceRowAnchor():
|
||||
|
||||
|
||||
def tr(x, anchor=None):
|
||||
#return '<tr onclick="location.href=\'#{a}\'" id={a}>{x}</tr>'.format(a=a, x=str(x))
|
||||
# return '<tr onclick="location.href=\'#{a}\'" id={a}>{x}</tr>'.format(a=a, x=str(x))
|
||||
anchor = anchor if anchor else advanceRowAnchor()
|
||||
return f'<tr id={anchor}>{x}</tr>'
|
||||
return f"<tr id={anchor}>{x}</tr>"
|
||||
|
||||
def td(value, cell_attributes = ''):
|
||||
return '<td {cell_attributes}>{value}</td>'.format(
|
||||
cell_attributes = cell_attributes,
|
||||
value = value)
|
||||
|
||||
def th(value, cell_attributes = ''):
|
||||
return '<th {cell_attributes}>{value}</th>'.format(
|
||||
cell_attributes = cell_attributes,
|
||||
value = value)
|
||||
def td(value, cell_attributes=""):
|
||||
return "<td {cell_attributes}>{value}</td>".format(
|
||||
cell_attributes=cell_attributes, value=value
|
||||
)
|
||||
|
||||
def tableRow(cell_values, cell_attributes = [], anchor=None):
|
||||
|
||||
def th(value, cell_attributes=""):
|
||||
return "<th {cell_attributes}>{value}</th>".format(
|
||||
cell_attributes=cell_attributes, value=value
|
||||
)
|
||||
|
||||
|
||||
def tableRow(cell_values, cell_attributes=[], anchor=None):
|
||||
return tr(
|
||||
''.join([td(v, a)
|
||||
for v, a in itertools.zip_longest(
|
||||
cell_values, cell_attributes,
|
||||
fillvalue = '')
|
||||
if a is not None and v is not None]),
|
||||
anchor)
|
||||
"".join(
|
||||
[
|
||||
td(v, a)
|
||||
for v, a in itertools.zip_longest(
|
||||
cell_values, cell_attributes, fillvalue=""
|
||||
)
|
||||
if a is not None and v is not None
|
||||
]
|
||||
),
|
||||
anchor,
|
||||
)
|
||||
|
||||
def tableHeader(cell_values, cell_attributes = []):
|
||||
|
||||
def tableHeader(cell_values, cell_attributes=[]):
|
||||
return tr(
|
||||
''.join([th(v, a)
|
||||
for v, a in itertools.zip_longest(
|
||||
cell_values, cell_attributes,
|
||||
fillvalue = '')
|
||||
if a is not None and v is not None]))
|
||||
"".join(
|
||||
[
|
||||
th(v, a)
|
||||
for v, a in itertools.zip_longest(
|
||||
cell_values, cell_attributes, fillvalue=""
|
||||
)
|
||||
if a is not None and v is not None
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def tableStart(title):
|
||||
cls = '-'.join(title.lower().split(' ')[:3]);
|
||||
cls = "-".join(title.lower().split(" ")[:3])
|
||||
global table_anchor
|
||||
table_anchor = cls
|
||||
anchor = currentTableAnchor()
|
||||
help_anchor = '-'.join(title.lower().split(' '));
|
||||
help_anchor = "-".join(title.lower().split(" "))
|
||||
return f"""
|
||||
<h2 id="{anchor}">
|
||||
<a class="cancela" href="#{anchor}">{title}</a>
|
||||
@ -223,12 +247,14 @@ def tableStart(title):
|
||||
<table class="{cls}">
|
||||
"""
|
||||
|
||||
|
||||
def tableEnd():
|
||||
return '</table>'
|
||||
return "</table>"
|
||||
|
||||
|
||||
def tsvRows(n):
|
||||
try:
|
||||
with open(n, encoding='utf-8') as fd:
|
||||
with open(n, encoding="utf-8") as fd:
|
||||
result = []
|
||||
for row in csv.reader(fd, delimiter="\t", quoting=csv.QUOTE_NONE):
|
||||
new_row = []
|
||||
@ -237,27 +263,32 @@ def tsvRows(n):
|
||||
# The second one (encode('latin1').decode('utf-8')) fixes the changes with unicode vs utf-8 chars, so
|
||||
# 'Чем зÐ<C2B7>нимаеÑ<C2B5>ЬÑ<C2AC>Ñ<EFBFBD>' is transformed back into 'Чем зАнимаешЬся'.
|
||||
|
||||
new_row.append(e.encode('utf-8').decode('unicode-escape').encode('latin1').decode('utf-8'))
|
||||
new_row.append(
|
||||
e.encode("utf-8")
|
||||
.decode("unicode-escape")
|
||||
.encode("latin1")
|
||||
.decode("utf-8")
|
||||
)
|
||||
result.append(new_row)
|
||||
return result
|
||||
|
||||
except:
|
||||
report_errors.append(
|
||||
traceback.format_exception_only(
|
||||
*sys.exc_info()[:2])[-1])
|
||||
report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1])
|
||||
pass
|
||||
return []
|
||||
|
||||
|
||||
def htmlRows(n):
|
||||
rawRows = tsvRows(n)
|
||||
result = ''
|
||||
result = ""
|
||||
for row in rawRows:
|
||||
result += tableRow(row)
|
||||
return result
|
||||
|
||||
|
||||
def addSimpleTable(caption, columns, rows, pos=None):
|
||||
global tables
|
||||
text = ''
|
||||
text = ""
|
||||
if not rows:
|
||||
return
|
||||
|
||||
@ -268,51 +299,63 @@ def addSimpleTable(caption, columns, rows, pos=None):
|
||||
text += tableEnd()
|
||||
tables.insert(pos if pos else len(tables), text)
|
||||
|
||||
|
||||
def add_tested_commits():
|
||||
global report_errors
|
||||
try:
|
||||
addSimpleTable('Tested Commits', ['Old', 'New'],
|
||||
[['<pre>{}</pre>'.format(x) for x in
|
||||
[open('left-commit.txt').read(),
|
||||
open('right-commit.txt').read()]]])
|
||||
addSimpleTable(
|
||||
"Tested Commits",
|
||||
["Old", "New"],
|
||||
[
|
||||
[
|
||||
"<pre>{}</pre>".format(x)
|
||||
for x in [
|
||||
open("left-commit.txt").read(),
|
||||
open("right-commit.txt").read(),
|
||||
]
|
||||
]
|
||||
],
|
||||
)
|
||||
except:
|
||||
# Don't fail if no commit info -- maybe it's a manual run.
|
||||
report_errors.append(
|
||||
traceback.format_exception_only(
|
||||
*sys.exc_info()[:2])[-1])
|
||||
report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1])
|
||||
pass
|
||||
|
||||
|
||||
def add_report_errors():
|
||||
global tables
|
||||
global report_errors
|
||||
# Add the errors reported by various steps of comparison script
|
||||
try:
|
||||
report_errors += [l.strip() for l in open('report/errors.log')]
|
||||
report_errors += [l.strip() for l in open("report/errors.log")]
|
||||
except:
|
||||
report_errors.append(
|
||||
traceback.format_exception_only(
|
||||
*sys.exc_info()[:2])[-1])
|
||||
report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1])
|
||||
pass
|
||||
|
||||
if not report_errors:
|
||||
return
|
||||
|
||||
text = tableStart('Errors while Building the Report')
|
||||
text += tableHeader(['Error'])
|
||||
text = tableStart("Errors while Building the Report")
|
||||
text += tableHeader(["Error"])
|
||||
for x in report_errors:
|
||||
text += tableRow([x])
|
||||
text += tableEnd()
|
||||
# Insert after Tested Commits
|
||||
tables.insert(1, text)
|
||||
errors_explained.append([f'<a href="#{currentTableAnchor()}">There were some errors while building the report</a>']);
|
||||
errors_explained.append(
|
||||
[
|
||||
f'<a href="#{currentTableAnchor()}">There were some errors while building the report</a>'
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def add_errors_explained():
|
||||
if not errors_explained:
|
||||
return
|
||||
|
||||
text = '<a name="fail1"/>'
|
||||
text += tableStart('Error Summary')
|
||||
text += tableHeader(['Description'])
|
||||
text += tableStart("Error Summary")
|
||||
text += tableHeader(["Description"])
|
||||
for row in errors_explained:
|
||||
text += tableRow(row)
|
||||
text += tableEnd()
|
||||
@ -321,59 +364,81 @@ def add_errors_explained():
|
||||
tables.insert(1, text)
|
||||
|
||||
|
||||
if args.report == 'main':
|
||||
if args.report == "main":
|
||||
print((header_template.format()))
|
||||
|
||||
add_tested_commits()
|
||||
|
||||
|
||||
run_error_rows = tsvRows('run-errors.tsv')
|
||||
run_error_rows = tsvRows("run-errors.tsv")
|
||||
error_tests += len(run_error_rows)
|
||||
addSimpleTable('Run Errors', ['Test', 'Error'], run_error_rows)
|
||||
addSimpleTable("Run Errors", ["Test", "Error"], run_error_rows)
|
||||
if run_error_rows:
|
||||
errors_explained.append([f'<a href="#{currentTableAnchor()}">There were some errors while running the tests</a>']);
|
||||
errors_explained.append(
|
||||
[
|
||||
f'<a href="#{currentTableAnchor()}">There were some errors while running the tests</a>'
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
slow_on_client_rows = tsvRows('report/slow-on-client.tsv')
|
||||
slow_on_client_rows = tsvRows("report/slow-on-client.tsv")
|
||||
error_tests += len(slow_on_client_rows)
|
||||
addSimpleTable('Slow on Client',
|
||||
['Client time, s', 'Server time, s', 'Ratio', 'Test', 'Query'],
|
||||
slow_on_client_rows)
|
||||
addSimpleTable(
|
||||
"Slow on Client",
|
||||
["Client time, s", "Server time, s", "Ratio", "Test", "Query"],
|
||||
slow_on_client_rows,
|
||||
)
|
||||
if slow_on_client_rows:
|
||||
errors_explained.append([f'<a href="#{currentTableAnchor()}">Some queries are taking noticeable time client-side (missing `FORMAT Null`?)</a>']);
|
||||
errors_explained.append(
|
||||
[
|
||||
f'<a href="#{currentTableAnchor()}">Some queries are taking noticeable time client-side (missing `FORMAT Null`?)</a>'
|
||||
]
|
||||
)
|
||||
|
||||
unmarked_short_rows = tsvRows('report/unexpected-query-duration.tsv')
|
||||
unmarked_short_rows = tsvRows("report/unexpected-query-duration.tsv")
|
||||
error_tests += len(unmarked_short_rows)
|
||||
addSimpleTable('Unexpected Query Duration',
|
||||
['Problem', 'Marked as "short"?', 'Run time, s', 'Test', '#', 'Query'],
|
||||
unmarked_short_rows)
|
||||
addSimpleTable(
|
||||
"Unexpected Query Duration",
|
||||
["Problem", 'Marked as "short"?', "Run time, s", "Test", "#", "Query"],
|
||||
unmarked_short_rows,
|
||||
)
|
||||
if unmarked_short_rows:
|
||||
errors_explained.append([f'<a href="#{currentTableAnchor()}">Some queries have unexpected duration</a>']);
|
||||
errors_explained.append(
|
||||
[
|
||||
f'<a href="#{currentTableAnchor()}">Some queries have unexpected duration</a>'
|
||||
]
|
||||
)
|
||||
|
||||
def add_partial():
|
||||
rows = tsvRows('report/partial-queries-report.tsv')
|
||||
rows = tsvRows("report/partial-queries-report.tsv")
|
||||
if not rows:
|
||||
return
|
||||
|
||||
global unstable_partial_queries, slow_average_tests, tables
|
||||
text = tableStart('Partial Queries')
|
||||
columns = ['Median time, s', 'Relative time variance', 'Test', '#', 'Query']
|
||||
text = tableStart("Partial Queries")
|
||||
columns = ["Median time, s", "Relative time variance", "Test", "#", "Query"]
|
||||
text += tableHeader(columns)
|
||||
attrs = ['' for c in columns]
|
||||
attrs = ["" for c in columns]
|
||||
for row in rows:
|
||||
anchor = f'{currentTableAnchor()}.{row[2]}.{row[3]}'
|
||||
anchor = f"{currentTableAnchor()}.{row[2]}.{row[3]}"
|
||||
if float(row[1]) > 0.10:
|
||||
attrs[1] = f'style="background: {color_bad}"'
|
||||
unstable_partial_queries += 1
|
||||
errors_explained.append([f'<a href="#{anchor}">The query no. {row[3]} of test \'{row[2]}\' has excessive variance of run time. Keep it below 10%</a>'])
|
||||
errors_explained.append(
|
||||
[
|
||||
f"<a href=\"#{anchor}\">The query no. {row[3]} of test '{row[2]}' has excessive variance of run time. Keep it below 10%</a>"
|
||||
]
|
||||
)
|
||||
else:
|
||||
attrs[1] = ''
|
||||
attrs[1] = ""
|
||||
if float(row[0]) > allowed_single_run_time:
|
||||
attrs[0] = f'style="background: {color_bad}"'
|
||||
errors_explained.append([f'<a href="#{anchor}">The query no. {row[3]} of test \'{row[2]}\' is taking too long to run. Keep the run time below {allowed_single_run_time} seconds"</a>'])
|
||||
errors_explained.append(
|
||||
[
|
||||
f'<a href="#{anchor}">The query no. {row[3]} of test \'{row[2]}\' is taking too long to run. Keep the run time below {allowed_single_run_time} seconds"</a>'
|
||||
]
|
||||
)
|
||||
slow_average_tests += 1
|
||||
else:
|
||||
attrs[0] = ''
|
||||
attrs[0] = ""
|
||||
text += tableRow(row, attrs, anchor)
|
||||
text += tableEnd()
|
||||
tables.append(text)
|
||||
@ -381,41 +446,45 @@ if args.report == 'main':
|
||||
add_partial()
|
||||
|
||||
def add_changes():
|
||||
rows = tsvRows('report/changed-perf.tsv')
|
||||
rows = tsvRows("report/changed-perf.tsv")
|
||||
if not rows:
|
||||
return
|
||||
|
||||
global faster_queries, slower_queries, tables
|
||||
|
||||
text = tableStart('Changes in Performance')
|
||||
text = tableStart("Changes in Performance")
|
||||
columns = [
|
||||
'Old, s', # 0
|
||||
'New, s', # 1
|
||||
'Ratio of speedup (-) or slowdown (+)', # 2
|
||||
'Relative difference (new − old) / old', # 3
|
||||
'p < 0.01 threshold', # 4
|
||||
'', # Failed # 5
|
||||
'Test', # 6
|
||||
'#', # 7
|
||||
'Query', # 8
|
||||
]
|
||||
attrs = ['' for c in columns]
|
||||
"Old, s", # 0
|
||||
"New, s", # 1
|
||||
"Ratio of speedup (-) or slowdown (+)", # 2
|
||||
"Relative difference (new − old) / old", # 3
|
||||
"p < 0.01 threshold", # 4
|
||||
"", # Failed # 5
|
||||
"Test", # 6
|
||||
"#", # 7
|
||||
"Query", # 8
|
||||
]
|
||||
attrs = ["" for c in columns]
|
||||
attrs[5] = None
|
||||
|
||||
text += tableHeader(columns, attrs)
|
||||
|
||||
for row in rows:
|
||||
anchor = f'{currentTableAnchor()}.{row[6]}.{row[7]}'
|
||||
anchor = f"{currentTableAnchor()}.{row[6]}.{row[7]}"
|
||||
if int(row[5]):
|
||||
if float(row[3]) < 0.:
|
||||
if float(row[3]) < 0.0:
|
||||
faster_queries += 1
|
||||
attrs[2] = attrs[3] = f'style="background: {color_good}"'
|
||||
else:
|
||||
slower_queries += 1
|
||||
attrs[2] = attrs[3] = f'style="background: {color_bad}"'
|
||||
errors_explained.append([f'<a href="#{anchor}">The query no. {row[7]} of test \'{row[6]}\' has slowed down</a>'])
|
||||
errors_explained.append(
|
||||
[
|
||||
f"<a href=\"#{anchor}\">The query no. {row[7]} of test '{row[6]}' has slowed down</a>"
|
||||
]
|
||||
)
|
||||
else:
|
||||
attrs[2] = attrs[3] = ''
|
||||
attrs[2] = attrs[3] = ""
|
||||
|
||||
text += tableRow(row, attrs, anchor)
|
||||
|
||||
@ -427,35 +496,35 @@ if args.report == 'main':
|
||||
def add_unstable_queries():
|
||||
global unstable_queries, very_unstable_queries, tables
|
||||
|
||||
unstable_rows = tsvRows('report/unstable-queries.tsv')
|
||||
unstable_rows = tsvRows("report/unstable-queries.tsv")
|
||||
if not unstable_rows:
|
||||
return
|
||||
|
||||
unstable_queries += len(unstable_rows)
|
||||
|
||||
columns = [
|
||||
'Old, s', #0
|
||||
'New, s', #1
|
||||
'Relative difference (new - old)/old', #2
|
||||
'p < 0.01 threshold', #3
|
||||
'', # Failed #4
|
||||
'Test', #5
|
||||
'#', #6
|
||||
'Query' #7
|
||||
"Old, s", # 0
|
||||
"New, s", # 1
|
||||
"Relative difference (new - old)/old", # 2
|
||||
"p < 0.01 threshold", # 3
|
||||
"", # Failed #4
|
||||
"Test", # 5
|
||||
"#", # 6
|
||||
"Query", # 7
|
||||
]
|
||||
attrs = ['' for c in columns]
|
||||
attrs = ["" for c in columns]
|
||||
attrs[4] = None
|
||||
|
||||
text = tableStart('Unstable Queries')
|
||||
text = tableStart("Unstable Queries")
|
||||
text += tableHeader(columns, attrs)
|
||||
|
||||
for r in unstable_rows:
|
||||
anchor = f'{currentTableAnchor()}.{r[5]}.{r[6]}'
|
||||
anchor = f"{currentTableAnchor()}.{r[5]}.{r[6]}"
|
||||
if int(r[4]):
|
||||
very_unstable_queries += 1
|
||||
attrs[3] = f'style="background: {color_bad}"'
|
||||
else:
|
||||
attrs[3] = ''
|
||||
attrs[3] = ""
|
||||
# Just don't add the slightly unstable queries we don't consider
|
||||
# errors. It's not clear what the user should do with them.
|
||||
continue
|
||||
@ -470,53 +539,70 @@ if args.report == 'main':
|
||||
|
||||
add_unstable_queries()
|
||||
|
||||
skipped_tests_rows = tsvRows('analyze/skipped-tests.tsv')
|
||||
addSimpleTable('Skipped Tests', ['Test', 'Reason'], skipped_tests_rows)
|
||||
skipped_tests_rows = tsvRows("analyze/skipped-tests.tsv")
|
||||
addSimpleTable("Skipped Tests", ["Test", "Reason"], skipped_tests_rows)
|
||||
|
||||
addSimpleTable('Test Performance Changes',
|
||||
['Test', 'Ratio of speedup (-) or slowdown (+)', 'Queries', 'Total not OK', 'Changed perf', 'Unstable'],
|
||||
tsvRows('report/test-perf-changes.tsv'))
|
||||
addSimpleTable(
|
||||
"Test Performance Changes",
|
||||
[
|
||||
"Test",
|
||||
"Ratio of speedup (-) or slowdown (+)",
|
||||
"Queries",
|
||||
"Total not OK",
|
||||
"Changed perf",
|
||||
"Unstable",
|
||||
],
|
||||
tsvRows("report/test-perf-changes.tsv"),
|
||||
)
|
||||
|
||||
def add_test_times():
|
||||
global slow_average_tests, tables
|
||||
rows = tsvRows('report/test-times.tsv')
|
||||
rows = tsvRows("report/test-times.tsv")
|
||||
if not rows:
|
||||
return
|
||||
|
||||
columns = [
|
||||
'Test', #0
|
||||
'Wall clock time, entire test, s', #1
|
||||
'Total client time for measured query runs, s', #2
|
||||
'Queries', #3
|
||||
'Longest query, total for measured runs, s', #4
|
||||
'Wall clock time per query, s', #5
|
||||
'Shortest query, total for measured runs, s', #6
|
||||
'', # Runs #7
|
||||
]
|
||||
attrs = ['' for c in columns]
|
||||
"Test", # 0
|
||||
"Wall clock time, entire test, s", # 1
|
||||
"Total client time for measured query runs, s", # 2
|
||||
"Queries", # 3
|
||||
"Longest query, total for measured runs, s", # 4
|
||||
"Wall clock time per query, s", # 5
|
||||
"Shortest query, total for measured runs, s", # 6
|
||||
"", # Runs #7
|
||||
]
|
||||
attrs = ["" for c in columns]
|
||||
attrs[7] = None
|
||||
|
||||
text = tableStart('Test Times')
|
||||
text = tableStart("Test Times")
|
||||
text += tableHeader(columns, attrs)
|
||||
|
||||
allowed_average_run_time = 3.75 # 60 seconds per test at (7 + 1) * 2 runs
|
||||
allowed_average_run_time = 3.75 # 60 seconds per test at (7 + 1) * 2 runs
|
||||
for r in rows:
|
||||
anchor = f'{currentTableAnchor()}.{r[0]}'
|
||||
anchor = f"{currentTableAnchor()}.{r[0]}"
|
||||
total_runs = (int(r[7]) + 1) * 2 # one prewarm run, two servers
|
||||
if r[0] != 'Total' and float(r[5]) > allowed_average_run_time * total_runs:
|
||||
if r[0] != "Total" and float(r[5]) > allowed_average_run_time * total_runs:
|
||||
# FIXME should be 15s max -- investigate parallel_insert
|
||||
slow_average_tests += 1
|
||||
attrs[5] = f'style="background: {color_bad}"'
|
||||
errors_explained.append([f'<a href="#{anchor}">The test \'{r[0]}\' is too slow to run as a whole. Investigate whether the create and fill queries can be sped up'])
|
||||
errors_explained.append(
|
||||
[
|
||||
f"<a href=\"#{anchor}\">The test '{r[0]}' is too slow to run as a whole. Investigate whether the create and fill queries can be sped up"
|
||||
]
|
||||
)
|
||||
else:
|
||||
attrs[5] = ''
|
||||
attrs[5] = ""
|
||||
|
||||
if r[0] != 'Total' and float(r[4]) > allowed_single_run_time * total_runs:
|
||||
if r[0] != "Total" and float(r[4]) > allowed_single_run_time * total_runs:
|
||||
slow_average_tests += 1
|
||||
attrs[4] = f'style="background: {color_bad}"'
|
||||
errors_explained.append([f'<a href="./all-queries.html#all-query-times.{r[0]}.0">Some query of the test \'{r[0]}\' is too slow to run. See the all queries report'])
|
||||
errors_explained.append(
|
||||
[
|
||||
f"<a href=\"./all-queries.html#all-query-times.{r[0]}.0\">Some query of the test '{r[0]}' is too slow to run. See the all queries report"
|
||||
]
|
||||
)
|
||||
else:
|
||||
attrs[4] = ''
|
||||
attrs[4] = ""
|
||||
|
||||
text += tableRow(r, attrs, anchor)
|
||||
|
||||
@ -525,10 +611,17 @@ if args.report == 'main':
|
||||
|
||||
add_test_times()
|
||||
|
||||
addSimpleTable('Metric Changes',
|
||||
['Metric', 'Old median value', 'New median value',
|
||||
'Relative difference', 'Times difference'],
|
||||
tsvRows('metrics/changes.tsv'))
|
||||
addSimpleTable(
|
||||
"Metric Changes",
|
||||
[
|
||||
"Metric",
|
||||
"Old median value",
|
||||
"New median value",
|
||||
"Relative difference",
|
||||
"Times difference",
|
||||
],
|
||||
tsvRows("metrics/changes.tsv"),
|
||||
)
|
||||
|
||||
add_report_errors()
|
||||
add_errors_explained()
|
||||
@ -536,7 +629,8 @@ if args.report == 'main':
|
||||
for t in tables:
|
||||
print(t)
|
||||
|
||||
print(f"""
|
||||
print(
|
||||
f"""
|
||||
</div>
|
||||
<p class="links">
|
||||
<a href="all-queries.html">All queries</a>
|
||||
@ -546,104 +640,111 @@ if args.report == 'main':
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
""")
|
||||
"""
|
||||
)
|
||||
|
||||
status = 'success'
|
||||
message = 'See the report'
|
||||
status = "success"
|
||||
message = "See the report"
|
||||
message_array = []
|
||||
|
||||
if slow_average_tests:
|
||||
status = 'failure'
|
||||
message_array.append(str(slow_average_tests) + ' too long')
|
||||
status = "failure"
|
||||
message_array.append(str(slow_average_tests) + " too long")
|
||||
|
||||
if faster_queries:
|
||||
message_array.append(str(faster_queries) + ' faster')
|
||||
message_array.append(str(faster_queries) + " faster")
|
||||
|
||||
if slower_queries:
|
||||
if slower_queries > 3:
|
||||
status = 'failure'
|
||||
message_array.append(str(slower_queries) + ' slower')
|
||||
status = "failure"
|
||||
message_array.append(str(slower_queries) + " slower")
|
||||
|
||||
if unstable_partial_queries:
|
||||
very_unstable_queries += unstable_partial_queries
|
||||
status = 'failure'
|
||||
status = "failure"
|
||||
|
||||
# Don't show mildly unstable queries, only the very unstable ones we
|
||||
# treat as errors.
|
||||
if very_unstable_queries:
|
||||
if very_unstable_queries > 5:
|
||||
error_tests += very_unstable_queries
|
||||
status = 'failure'
|
||||
message_array.append(str(very_unstable_queries) + ' unstable')
|
||||
status = "failure"
|
||||
message_array.append(str(very_unstable_queries) + " unstable")
|
||||
|
||||
error_tests += slow_average_tests
|
||||
if error_tests:
|
||||
status = 'failure'
|
||||
message_array.insert(0, str(error_tests) + ' errors')
|
||||
status = "failure"
|
||||
message_array.insert(0, str(error_tests) + " errors")
|
||||
|
||||
if message_array:
|
||||
message = ', '.join(message_array)
|
||||
message = ", ".join(message_array)
|
||||
|
||||
if report_errors:
|
||||
status = 'failure'
|
||||
message = 'Errors while building the report.'
|
||||
status = "failure"
|
||||
message = "Errors while building the report."
|
||||
|
||||
print(("""
|
||||
print(
|
||||
(
|
||||
"""
|
||||
<!--status: {status}-->
|
||||
<!--message: {message}-->
|
||||
""".format(status=status, message=message)))
|
||||
""".format(
|
||||
status=status, message=message
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
elif args.report == 'all-queries':
|
||||
elif args.report == "all-queries":
|
||||
|
||||
print((header_template.format()))
|
||||
|
||||
add_tested_commits()
|
||||
|
||||
def add_all_queries():
|
||||
rows = tsvRows('report/all-queries.tsv')
|
||||
rows = tsvRows("report/all-queries.tsv")
|
||||
if not rows:
|
||||
return
|
||||
|
||||
columns = [
|
||||
'', # Changed #0
|
||||
'', # Unstable #1
|
||||
'Old, s', #2
|
||||
'New, s', #3
|
||||
'Ratio of speedup (-) or slowdown (+)', #4
|
||||
'Relative difference (new − old) / old', #5
|
||||
'p < 0.01 threshold', #6
|
||||
'Test', #7
|
||||
'#', #8
|
||||
'Query', #9
|
||||
]
|
||||
attrs = ['' for c in columns]
|
||||
"", # Changed #0
|
||||
"", # Unstable #1
|
||||
"Old, s", # 2
|
||||
"New, s", # 3
|
||||
"Ratio of speedup (-) or slowdown (+)", # 4
|
||||
"Relative difference (new − old) / old", # 5
|
||||
"p < 0.01 threshold", # 6
|
||||
"Test", # 7
|
||||
"#", # 8
|
||||
"Query", # 9
|
||||
]
|
||||
attrs = ["" for c in columns]
|
||||
attrs[0] = None
|
||||
attrs[1] = None
|
||||
|
||||
text = tableStart('All Query Times')
|
||||
text = tableStart("All Query Times")
|
||||
text += tableHeader(columns, attrs)
|
||||
|
||||
for r in rows:
|
||||
anchor = f'{currentTableAnchor()}.{r[7]}.{r[8]}'
|
||||
anchor = f"{currentTableAnchor()}.{r[7]}.{r[8]}"
|
||||
if int(r[1]):
|
||||
attrs[6] = f'style="background: {color_bad}"'
|
||||
else:
|
||||
attrs[6] = ''
|
||||
attrs[6] = ""
|
||||
|
||||
if int(r[0]):
|
||||
if float(r[5]) > 0.:
|
||||
if float(r[5]) > 0.0:
|
||||
attrs[4] = attrs[5] = f'style="background: {color_bad}"'
|
||||
else:
|
||||
attrs[4] = attrs[5] = f'style="background: {color_good}"'
|
||||
else:
|
||||
attrs[4] = attrs[5] = ''
|
||||
attrs[4] = attrs[5] = ""
|
||||
|
||||
if (float(r[2]) + float(r[3])) / 2 > allowed_single_run_time:
|
||||
attrs[2] = f'style="background: {color_bad}"'
|
||||
attrs[3] = f'style="background: {color_bad}"'
|
||||
else:
|
||||
attrs[2] = ''
|
||||
attrs[3] = ''
|
||||
attrs[2] = ""
|
||||
attrs[3] = ""
|
||||
|
||||
text += tableRow(r, attrs, anchor)
|
||||
|
||||
@ -655,7 +756,8 @@ elif args.report == 'all-queries':
|
||||
for t in tables:
|
||||
print(t)
|
||||
|
||||
print(f"""
|
||||
print(
|
||||
f"""
|
||||
</div>
|
||||
<p class="links">
|
||||
<a href="report.html">Main report</a>
|
||||
@ -665,4 +767,5 @@ elif args.report == 'all-queries':
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
""")
|
||||
"""
|
||||
)
|
||||
|
@ -7,18 +7,19 @@ import csv
|
||||
|
||||
RESULT_LOG_NAME = "run.log"
|
||||
|
||||
|
||||
def process_result(result_folder):
|
||||
|
||||
status = "success"
|
||||
description = 'Server started and responded'
|
||||
description = "Server started and responded"
|
||||
summary = [("Smoke test", "OK")]
|
||||
with open(os.path.join(result_folder, RESULT_LOG_NAME), 'r') as run_log:
|
||||
lines = run_log.read().split('\n')
|
||||
if not lines or lines[0].strip() != 'OK':
|
||||
with open(os.path.join(result_folder, RESULT_LOG_NAME), "r") as run_log:
|
||||
lines = run_log.read().split("\n")
|
||||
if not lines or lines[0].strip() != "OK":
|
||||
status = "failure"
|
||||
logging.info("Lines is not ok: %s", str('\n'.join(lines)))
|
||||
logging.info("Lines is not ok: %s", str("\n".join(lines)))
|
||||
summary = [("Smoke test", "FAIL")]
|
||||
description = 'Server failed to respond, see result in logs'
|
||||
description = "Server failed to respond, see result in logs"
|
||||
|
||||
result_logs = []
|
||||
server_log_path = os.path.join(result_folder, "clickhouse-server.log")
|
||||
@ -38,20 +39,22 @@ def process_result(result_folder):
|
||||
|
||||
|
||||
def write_results(results_file, status_file, results, status):
|
||||
with open(results_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(results_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerows(results)
|
||||
with open(status_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(status_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerow(status)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of split build smoke test")
|
||||
parser.add_argument("--in-results-dir", default='/test_output/')
|
||||
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv')
|
||||
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv')
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="ClickHouse script for parsing results of split build smoke test"
|
||||
)
|
||||
parser.add_argument("--in-results-dir", default="/test_output/")
|
||||
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
|
||||
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
|
||||
args = parser.parse_args()
|
||||
|
||||
state, description, test_results, logs = process_result(args.in_results_dir)
|
||||
|
@ -10,11 +10,18 @@ def process_result(result_folder):
|
||||
status = "success"
|
||||
summary = []
|
||||
paths = []
|
||||
tests = ["TLPWhere", "TLPGroupBy", "TLPHaving", "TLPWhereGroupBy", "TLPDistinct", "TLPAggregate"]
|
||||
tests = [
|
||||
"TLPWhere",
|
||||
"TLPGroupBy",
|
||||
"TLPHaving",
|
||||
"TLPWhereGroupBy",
|
||||
"TLPDistinct",
|
||||
"TLPAggregate",
|
||||
]
|
||||
|
||||
for test in tests:
|
||||
err_path = '{}/{}.err'.format(result_folder, test)
|
||||
out_path = '{}/{}.out'.format(result_folder, test)
|
||||
err_path = "{}/{}.err".format(result_folder, test)
|
||||
out_path = "{}/{}.out".format(result_folder, test)
|
||||
if not os.path.exists(err_path):
|
||||
logging.info("No output err on path %s", err_path)
|
||||
summary.append((test, "SKIPPED"))
|
||||
@ -23,24 +30,24 @@ def process_result(result_folder):
|
||||
else:
|
||||
paths.append(err_path)
|
||||
paths.append(out_path)
|
||||
with open(err_path, 'r') as f:
|
||||
if 'AssertionError' in f.read():
|
||||
with open(err_path, "r") as f:
|
||||
if "AssertionError" in f.read():
|
||||
summary.append((test, "FAIL"))
|
||||
status = 'failure'
|
||||
status = "failure"
|
||||
else:
|
||||
summary.append((test, "OK"))
|
||||
|
||||
logs_path = '{}/logs.tar.gz'.format(result_folder)
|
||||
logs_path = "{}/logs.tar.gz".format(result_folder)
|
||||
if not os.path.exists(logs_path):
|
||||
logging.info("No logs tar on path %s", logs_path)
|
||||
else:
|
||||
paths.append(logs_path)
|
||||
stdout_path = '{}/stdout.log'.format(result_folder)
|
||||
stdout_path = "{}/stdout.log".format(result_folder)
|
||||
if not os.path.exists(stdout_path):
|
||||
logging.info("No stdout log on path %s", stdout_path)
|
||||
else:
|
||||
paths.append(stdout_path)
|
||||
stderr_path = '{}/stderr.log'.format(result_folder)
|
||||
stderr_path = "{}/stderr.log".format(result_folder)
|
||||
if not os.path.exists(stderr_path):
|
||||
logging.info("No stderr log on path %s", stderr_path)
|
||||
else:
|
||||
@ -52,20 +59,22 @@ def process_result(result_folder):
|
||||
|
||||
|
||||
def write_results(results_file, status_file, results, status):
|
||||
with open(results_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(results_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerows(results)
|
||||
with open(status_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(status_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerow(status)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of sqlancer test")
|
||||
parser.add_argument("--in-results-dir", default='/test_output/')
|
||||
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv')
|
||||
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv')
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="ClickHouse script for parsing results of sqlancer test"
|
||||
)
|
||||
parser.add_argument("--in-results-dir", default="/test_output/")
|
||||
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
|
||||
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
|
||||
args = parser.parse_args()
|
||||
|
||||
state, description, test_results, logs = process_result(args.in_results_dir)
|
||||
|
@ -4,6 +4,9 @@ import requests
|
||||
import re
|
||||
import os
|
||||
|
||||
from requests.adapters import HTTPAdapter
|
||||
from requests.packages.urllib3.util.retry import Retry
|
||||
|
||||
CLICKHOUSE_TAGS_URL = "https://api.github.com/repos/ClickHouse/ClickHouse/tags"
|
||||
|
||||
CLICKHOUSE_COMMON_STATIC_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-common-static_{version}_amd64.deb"
|
||||
@ -66,8 +69,18 @@ def get_previous_release(server_version):
|
||||
return previous_release
|
||||
|
||||
|
||||
def download_packet(url, local_file_name):
|
||||
response = requests.get(url)
|
||||
def download_packet(url, local_file_name, retries=10, backoff_factor=0.3):
|
||||
session = requests.Session()
|
||||
retry = Retry(
|
||||
total=retries,
|
||||
read=retries,
|
||||
connect=retries,
|
||||
backoff_factor=backoff_factor,
|
||||
)
|
||||
adapter = HTTPAdapter(max_retries=retry)
|
||||
session.mount('http://', adapter)
|
||||
session.mount('https://', adapter)
|
||||
response = session.get(url)
|
||||
print(url)
|
||||
if response.ok:
|
||||
open(PACKETS_DIR + local_file_name, 'wb').write(response.content)
|
||||
|
@ -16,7 +16,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
|
||||
python3-pip \
|
||||
shellcheck \
|
||||
yamllint \
|
||||
&& pip3 install codespell PyGithub boto3 unidiff dohq-artifactory
|
||||
&& pip3 install black boto3 codespell dohq-artifactory PyGithub unidiff
|
||||
|
||||
# Architecture of the image when BuildKit/buildx is used
|
||||
ARG TARGETARCH
|
||||
|
@ -14,6 +14,7 @@ def process_result(result_folder):
|
||||
("header duplicates", "duplicate_output.txt"),
|
||||
("shellcheck", "shellcheck_output.txt"),
|
||||
("style", "style_output.txt"),
|
||||
("black", "black_output.txt"),
|
||||
("typos", "typos_output.txt"),
|
||||
("whitespaces", "whitespaces_output.txt"),
|
||||
("workflows", "workflows_output.txt"),
|
||||
|
@ -7,11 +7,13 @@ echo "Check duplicates" | ts
|
||||
./check-duplicate-includes.sh |& tee /test_output/duplicate_output.txt
|
||||
echo "Check style" | ts
|
||||
./check-style -n |& tee /test_output/style_output.txt
|
||||
echo "Check python formatting with black" | ts
|
||||
./check-black -n |& tee /test_output/black_output.txt
|
||||
echo "Check typos" | ts
|
||||
./check-typos |& tee /test_output/typos_output.txt
|
||||
echo "Check whitespaces" | ts
|
||||
./check-whitespaces -n |& tee /test_output/whitespaces_output.txt
|
||||
echo "Check sorkflows" | ts
|
||||
echo "Check workflows" | ts
|
||||
./check-workflows |& tee /test_output/workflows_output.txt
|
||||
echo "Check shell scripts with shellcheck" | ts
|
||||
./shellcheck-run.sh |& tee /test_output/shellcheck_output.txt
|
||||
|
@ -22,9 +22,9 @@ def process_result(result_folder):
|
||||
total_other = 0
|
||||
test_results = []
|
||||
for test in results["tests"]:
|
||||
test_name = test['test']['test_name']
|
||||
test_result = test['result']['result_type'].upper()
|
||||
test_time = str(test['result']['message_rtime'])
|
||||
test_name = test["test"]["test_name"]
|
||||
test_result = test["result"]["result_type"].upper()
|
||||
test_time = str(test["result"]["message_rtime"])
|
||||
total_tests += 1
|
||||
if test_result == "OK":
|
||||
total_ok += 1
|
||||
@ -39,24 +39,29 @@ def process_result(result_folder):
|
||||
else:
|
||||
status = "success"
|
||||
|
||||
description = "failed: {}, passed: {}, other: {}".format(total_fail, total_ok, total_other)
|
||||
description = "failed: {}, passed: {}, other: {}".format(
|
||||
total_fail, total_ok, total_other
|
||||
)
|
||||
return status, description, test_results, [json_path, test_binary_log]
|
||||
|
||||
|
||||
def write_results(results_file, status_file, results, status):
|
||||
with open(results_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(results_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerows(results)
|
||||
with open(status_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(status_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerow(status)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of Testflows tests")
|
||||
parser.add_argument("--in-results-dir", default='./')
|
||||
parser.add_argument("--out-results-file", default='./test_results.tsv')
|
||||
parser.add_argument("--out-status-file", default='./check_status.tsv')
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="ClickHouse script for parsing results of Testflows tests"
|
||||
)
|
||||
parser.add_argument("--in-results-dir", default="./")
|
||||
parser.add_argument("--out-results-file", default="./test_results.tsv")
|
||||
parser.add_argument("--out-status-file", default="./check_status.tsv")
|
||||
args = parser.parse_args()
|
||||
|
||||
state, description, test_results, logs = process_result(args.in_results_dir)
|
||||
@ -64,4 +69,3 @@ if __name__ == "__main__":
|
||||
status = (state, description)
|
||||
write_results(args.out_results_file, args.out_status_file, test_results, status)
|
||||
logging.info("Result written")
|
||||
|
||||
|
@ -5,24 +5,26 @@ import logging
|
||||
import argparse
|
||||
import csv
|
||||
|
||||
OK_SIGN = 'OK ]'
|
||||
FAILED_SIGN = 'FAILED ]'
|
||||
SEGFAULT = 'Segmentation fault'
|
||||
SIGNAL = 'received signal SIG'
|
||||
PASSED = 'PASSED'
|
||||
OK_SIGN = "OK ]"
|
||||
FAILED_SIGN = "FAILED ]"
|
||||
SEGFAULT = "Segmentation fault"
|
||||
SIGNAL = "received signal SIG"
|
||||
PASSED = "PASSED"
|
||||
|
||||
|
||||
def get_test_name(line):
|
||||
elements = reversed(line.split(' '))
|
||||
elements = reversed(line.split(" "))
|
||||
for element in elements:
|
||||
if '(' not in element and ')' not in element:
|
||||
if "(" not in element and ")" not in element:
|
||||
return element
|
||||
raise Exception("No test name in line '{}'".format(line))
|
||||
|
||||
|
||||
def process_result(result_folder):
|
||||
summary = []
|
||||
total_counter = 0
|
||||
failed_counter = 0
|
||||
result_log_path = '{}/test_result.txt'.format(result_folder)
|
||||
result_log_path = "{}/test_result.txt".format(result_folder)
|
||||
if not os.path.exists(result_log_path):
|
||||
logging.info("No output log on path %s", result_log_path)
|
||||
return "exception", "No output log", []
|
||||
@ -30,7 +32,7 @@ def process_result(result_folder):
|
||||
status = "success"
|
||||
description = ""
|
||||
passed = False
|
||||
with open(result_log_path, 'r') as test_result:
|
||||
with open(result_log_path, "r") as test_result:
|
||||
for line in test_result:
|
||||
if OK_SIGN in line:
|
||||
logging.info("Found ok line: '%s'", line)
|
||||
@ -38,7 +40,7 @@ def process_result(result_folder):
|
||||
logging.info("Test name: '%s'", test_name)
|
||||
summary.append((test_name, "OK"))
|
||||
total_counter += 1
|
||||
elif FAILED_SIGN in line and 'listed below' not in line and 'ms)' in line:
|
||||
elif FAILED_SIGN in line and "listed below" not in line and "ms)" in line:
|
||||
logging.info("Found fail line: '%s'", line)
|
||||
test_name = get_test_name(line.strip())
|
||||
logging.info("Test name: '%s'", test_name)
|
||||
@ -67,25 +69,30 @@ def process_result(result_folder):
|
||||
status = "failure"
|
||||
|
||||
if not description:
|
||||
description += "fail: {}, passed: {}".format(failed_counter, total_counter - failed_counter)
|
||||
description += "fail: {}, passed: {}".format(
|
||||
failed_counter, total_counter - failed_counter
|
||||
)
|
||||
|
||||
return status, description, summary
|
||||
|
||||
|
||||
def write_results(results_file, status_file, results, status):
|
||||
with open(results_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(results_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerows(results)
|
||||
with open(status_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(status_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerow(status)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of unit tests")
|
||||
parser.add_argument("--in-results-dir", default='/test_output/')
|
||||
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv')
|
||||
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv')
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="ClickHouse script for parsing results of unit tests"
|
||||
)
|
||||
parser.add_argument("--in-results-dir", default="/test_output/")
|
||||
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
|
||||
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
|
||||
args = parser.parse_args()
|
||||
|
||||
state, description, test_results = process_result(args.in_results_dir)
|
||||
@ -93,4 +100,3 @@ if __name__ == "__main__":
|
||||
status = (state, description)
|
||||
write_results(args.out_results_file, args.out_status_file, test_results, status)
|
||||
logging.info("Result written")
|
||||
|
||||
|
@ -16,6 +16,7 @@ NO_TASK_TIMEOUT_SIGNS = ["All tests have finished", "No tests were run"]
|
||||
|
||||
RETRIES_SIGN = "Some tests were restarted"
|
||||
|
||||
|
||||
def process_test_log(log_path):
|
||||
total = 0
|
||||
skipped = 0
|
||||
@ -26,7 +27,7 @@ def process_test_log(log_path):
|
||||
retries = False
|
||||
task_timeout = True
|
||||
test_results = []
|
||||
with open(log_path, 'r') as test_file:
|
||||
with open(log_path, "r") as test_file:
|
||||
for line in test_file:
|
||||
original_line = line
|
||||
line = line.strip()
|
||||
@ -36,12 +37,15 @@ def process_test_log(log_path):
|
||||
hung = True
|
||||
if RETRIES_SIGN in line:
|
||||
retries = True
|
||||
if any(sign in line for sign in (OK_SIGN, FAIL_SIGN, UNKNOWN_SIGN, SKIPPED_SIGN)):
|
||||
test_name = line.split(' ')[2].split(':')[0]
|
||||
if any(
|
||||
sign in line
|
||||
for sign in (OK_SIGN, FAIL_SIGN, UNKNOWN_SIGN, SKIPPED_SIGN)
|
||||
):
|
||||
test_name = line.split(" ")[2].split(":")[0]
|
||||
|
||||
test_time = ''
|
||||
test_time = ""
|
||||
try:
|
||||
time_token = line.split(']')[1].strip().split()[0]
|
||||
time_token = line.split("]")[1].strip().split()[0]
|
||||
float(time_token)
|
||||
test_time = time_token
|
||||
except:
|
||||
@ -66,9 +70,22 @@ def process_test_log(log_path):
|
||||
elif len(test_results) > 0 and test_results[-1][1] == "FAIL":
|
||||
test_results[-1][3].append(original_line)
|
||||
|
||||
test_results = [(test[0], test[1], test[2], ''.join(test[3])) for test in test_results]
|
||||
test_results = [
|
||||
(test[0], test[1], test[2], "".join(test[3])) for test in test_results
|
||||
]
|
||||
|
||||
return (
|
||||
total,
|
||||
skipped,
|
||||
unknown,
|
||||
failed,
|
||||
success,
|
||||
hung,
|
||||
task_timeout,
|
||||
retries,
|
||||
test_results,
|
||||
)
|
||||
|
||||
return total, skipped, unknown, failed, success, hung, task_timeout, retries, test_results
|
||||
|
||||
def process_result(result_path):
|
||||
test_results = []
|
||||
@ -76,16 +93,26 @@ def process_result(result_path):
|
||||
description = ""
|
||||
files = os.listdir(result_path)
|
||||
if files:
|
||||
logging.info("Find files in result folder %s", ','.join(files))
|
||||
result_path = os.path.join(result_path, 'test_result.txt')
|
||||
logging.info("Find files in result folder %s", ",".join(files))
|
||||
result_path = os.path.join(result_path, "test_result.txt")
|
||||
else:
|
||||
result_path = None
|
||||
description = "No output log"
|
||||
state = "error"
|
||||
|
||||
if result_path and os.path.exists(result_path):
|
||||
total, skipped, unknown, failed, success, hung, task_timeout, retries, test_results = process_test_log(result_path)
|
||||
is_flacky_check = 1 < int(os.environ.get('NUM_TRIES', 1))
|
||||
(
|
||||
total,
|
||||
skipped,
|
||||
unknown,
|
||||
failed,
|
||||
success,
|
||||
hung,
|
||||
task_timeout,
|
||||
retries,
|
||||
test_results,
|
||||
) = process_test_log(result_path)
|
||||
is_flacky_check = 1 < int(os.environ.get("NUM_TRIES", 1))
|
||||
logging.info("Is flacky check: %s", is_flacky_check)
|
||||
# If no tests were run (success == 0) it indicates an error (e.g. server did not start or crashed immediately)
|
||||
# But it's Ok for "flaky checks" - they can contain just one test for check which is marked as skipped.
|
||||
@ -120,20 +147,22 @@ def process_result(result_path):
|
||||
|
||||
|
||||
def write_results(results_file, status_file, results, status):
|
||||
with open(results_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(results_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerows(results)
|
||||
with open(status_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(status_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerow(status)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of functional tests")
|
||||
parser.add_argument("--in-results-dir", default='/test_output/')
|
||||
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv')
|
||||
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv')
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="ClickHouse script for parsing results of functional tests"
|
||||
)
|
||||
parser.add_argument("--in-results-dir", default="/test_output/")
|
||||
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
|
||||
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
|
||||
args = parser.parse_args()
|
||||
|
||||
state, description, test_results = process_result(args.in_results_dir)
|
||||
|
@ -71,6 +71,8 @@ This check means that the CI system started to process the pull request. When it
|
||||
Performs some simple regex-based checks of code style, using the [`utils/check-style/check-style`](https://github.com/ClickHouse/ClickHouse/blob/master/utils/check-style/check-style) binary (note that it can be run locally).
|
||||
If it fails, fix the style errors following the [code style guide](style.md).
|
||||
|
||||
Python code is checked with [black](https://github.com/psf/black/).
|
||||
|
||||
### Report Details
|
||||
- [Status page example](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check.html)
|
||||
- `output.txt` contains the check resulting errors (invalid tabulation etc), blank page means no errors. [Successful result example](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check/output.txt).
|
||||
|
@ -5,7 +5,7 @@ toc_title: Caches
|
||||
|
||||
# Cache Types {#cache-types}
|
||||
|
||||
When performing queries, ClichHouse uses different caches.
|
||||
When performing queries, ClickHouse uses different caches.
|
||||
|
||||
Main cache types:
|
||||
|
||||
|
@ -15,24 +15,24 @@ import website
|
||||
|
||||
def prepare_amp_html(lang, args, root, site_temp, main_site_dir):
|
||||
src_path = root
|
||||
src_index = os.path.join(src_path, 'index.html')
|
||||
src_index = os.path.join(src_path, "index.html")
|
||||
rel_path = os.path.relpath(src_path, site_temp)
|
||||
dst_path = os.path.join(main_site_dir, rel_path, 'amp')
|
||||
dst_index = os.path.join(dst_path, 'index.html')
|
||||
dst_path = os.path.join(main_site_dir, rel_path, "amp")
|
||||
dst_index = os.path.join(dst_path, "index.html")
|
||||
|
||||
logging.debug(f'Generating AMP version for {rel_path} ({lang})')
|
||||
logging.debug(f"Generating AMP version for {rel_path} ({lang})")
|
||||
os.makedirs(dst_path)
|
||||
with open(src_index, 'r') as f:
|
||||
with open(src_index, "r") as f:
|
||||
content = f.read()
|
||||
css_in = ' '.join(website.get_css_in(args))
|
||||
css_in = " ".join(website.get_css_in(args))
|
||||
command = f"purifycss --min {css_in} '{src_index}'"
|
||||
logging.debug(command)
|
||||
inline_css = subprocess.check_output(command, shell=True).decode('utf-8')
|
||||
inline_css = inline_css.replace('!important', '').replace('/*!', '/*')
|
||||
inline_css = subprocess.check_output(command, shell=True).decode("utf-8")
|
||||
inline_css = inline_css.replace("!important", "").replace("/*!", "/*")
|
||||
inline_css = cssmin.cssmin(inline_css)
|
||||
content = content.replace('CUSTOM_CSS_PLACEHOLDER', inline_css)
|
||||
content = content.replace("CUSTOM_CSS_PLACEHOLDER", inline_css)
|
||||
|
||||
with open(dst_index, 'w') as f:
|
||||
with open(dst_index, "w") as f:
|
||||
f.write(content)
|
||||
|
||||
return dst_index
|
||||
@ -40,15 +40,12 @@ def prepare_amp_html(lang, args, root, site_temp, main_site_dir):
|
||||
|
||||
def build_amp(lang, args, cfg):
|
||||
# AMP docs: https://amp.dev/documentation/
|
||||
logging.info(f'Building AMP version for {lang}')
|
||||
logging.info(f"Building AMP version for {lang}")
|
||||
with util.temp_dir() as site_temp:
|
||||
extra = cfg.data['extra']
|
||||
main_site_dir = cfg.data['site_dir']
|
||||
extra['is_amp'] = True
|
||||
cfg.load_dict({
|
||||
'site_dir': site_temp,
|
||||
'extra': extra
|
||||
})
|
||||
extra = cfg.data["extra"]
|
||||
main_site_dir = cfg.data["site_dir"]
|
||||
extra["is_amp"] = True
|
||||
cfg.load_dict({"site_dir": site_temp, "extra": extra})
|
||||
|
||||
try:
|
||||
mkdocs.commands.build.build(cfg)
|
||||
@ -60,50 +57,49 @@ def build_amp(lang, args, cfg):
|
||||
|
||||
paths = []
|
||||
for root, _, filenames in os.walk(site_temp):
|
||||
if 'index.html' in filenames:
|
||||
paths.append(prepare_amp_html(lang, args, root, site_temp, main_site_dir))
|
||||
logging.info(f'Finished building AMP version for {lang}')
|
||||
if "index.html" in filenames:
|
||||
paths.append(
|
||||
prepare_amp_html(lang, args, root, site_temp, main_site_dir)
|
||||
)
|
||||
logging.info(f"Finished building AMP version for {lang}")
|
||||
|
||||
|
||||
def html_to_amp(content):
|
||||
soup = bs4.BeautifulSoup(
|
||||
content,
|
||||
features='html.parser'
|
||||
)
|
||||
soup = bs4.BeautifulSoup(content, features="html.parser")
|
||||
|
||||
for tag in soup.find_all():
|
||||
if tag.attrs.get('id') == 'tostring':
|
||||
tag.attrs['id'] = '_tostring'
|
||||
if tag.name == 'img':
|
||||
tag.name = 'amp-img'
|
||||
tag.attrs['layout'] = 'responsive'
|
||||
src = tag.attrs['src']
|
||||
if not (src.startswith('/') or src.startswith('http')):
|
||||
tag.attrs['src'] = f'../{src}'
|
||||
if not tag.attrs.get('width'):
|
||||
tag.attrs['width'] = '640'
|
||||
if not tag.attrs.get('height'):
|
||||
tag.attrs['height'] = '320'
|
||||
if tag.name == 'iframe':
|
||||
tag.name = 'amp-iframe'
|
||||
tag.attrs['layout'] = 'responsive'
|
||||
del tag.attrs['alt']
|
||||
del tag.attrs['allowfullscreen']
|
||||
if not tag.attrs.get('width'):
|
||||
tag.attrs['width'] = '640'
|
||||
if not tag.attrs.get('height'):
|
||||
tag.attrs['height'] = '320'
|
||||
elif tag.name == 'a':
|
||||
href = tag.attrs.get('href')
|
||||
if tag.attrs.get("id") == "tostring":
|
||||
tag.attrs["id"] = "_tostring"
|
||||
if tag.name == "img":
|
||||
tag.name = "amp-img"
|
||||
tag.attrs["layout"] = "responsive"
|
||||
src = tag.attrs["src"]
|
||||
if not (src.startswith("/") or src.startswith("http")):
|
||||
tag.attrs["src"] = f"../{src}"
|
||||
if not tag.attrs.get("width"):
|
||||
tag.attrs["width"] = "640"
|
||||
if not tag.attrs.get("height"):
|
||||
tag.attrs["height"] = "320"
|
||||
if tag.name == "iframe":
|
||||
tag.name = "amp-iframe"
|
||||
tag.attrs["layout"] = "responsive"
|
||||
del tag.attrs["alt"]
|
||||
del tag.attrs["allowfullscreen"]
|
||||
if not tag.attrs.get("width"):
|
||||
tag.attrs["width"] = "640"
|
||||
if not tag.attrs.get("height"):
|
||||
tag.attrs["height"] = "320"
|
||||
elif tag.name == "a":
|
||||
href = tag.attrs.get("href")
|
||||
if href:
|
||||
if not (href.startswith('/') or href.startswith('http')):
|
||||
if '#' in href:
|
||||
href, anchor = href.split('#')
|
||||
if not (href.startswith("/") or href.startswith("http")):
|
||||
if "#" in href:
|
||||
href, anchor = href.split("#")
|
||||
else:
|
||||
anchor = None
|
||||
href = f'../{href}amp/'
|
||||
href = f"../{href}amp/"
|
||||
if anchor:
|
||||
href = f'{href}#{anchor}'
|
||||
tag.attrs['href'] = href
|
||||
href = f"{href}#{anchor}"
|
||||
tag.attrs["href"] = href
|
||||
content = str(soup)
|
||||
return website.minify_html(content)
|
||||
|
@ -17,54 +17,52 @@ import util
|
||||
|
||||
|
||||
def build_for_lang(lang, args):
|
||||
logging.info(f'Building {lang} blog')
|
||||
logging.info(f"Building {lang} blog")
|
||||
|
||||
try:
|
||||
theme_cfg = {
|
||||
'name': None,
|
||||
'custom_dir': os.path.join(os.path.dirname(__file__), '..', args.theme_dir),
|
||||
'language': lang,
|
||||
'direction': 'ltr',
|
||||
'static_templates': ['404.html'],
|
||||
'extra': {
|
||||
'now': int(time.mktime(datetime.datetime.now().timetuple())) # TODO better way to avoid caching
|
||||
}
|
||||
"name": None,
|
||||
"custom_dir": os.path.join(os.path.dirname(__file__), "..", args.theme_dir),
|
||||
"language": lang,
|
||||
"direction": "ltr",
|
||||
"static_templates": ["404.html"],
|
||||
"extra": {
|
||||
"now": int(
|
||||
time.mktime(datetime.datetime.now().timetuple())
|
||||
) # TODO better way to avoid caching
|
||||
},
|
||||
}
|
||||
|
||||
# the following list of languages is sorted according to
|
||||
# https://en.wikipedia.org/wiki/List_of_languages_by_total_number_of_speakers
|
||||
languages = {
|
||||
'en': 'English'
|
||||
}
|
||||
languages = {"en": "English"}
|
||||
|
||||
site_names = {
|
||||
'en': 'ClickHouse Blog'
|
||||
}
|
||||
site_names = {"en": "ClickHouse Blog"}
|
||||
|
||||
assert len(site_names) == len(languages)
|
||||
|
||||
site_dir = os.path.join(args.blog_output_dir, lang)
|
||||
|
||||
plugins = ['macros']
|
||||
plugins = ["macros"]
|
||||
if args.htmlproofer:
|
||||
plugins.append('htmlproofer')
|
||||
plugins.append("htmlproofer")
|
||||
|
||||
website_url = 'https://clickhouse.com'
|
||||
site_name = site_names.get(lang, site_names['en'])
|
||||
website_url = "https://clickhouse.com"
|
||||
site_name = site_names.get(lang, site_names["en"])
|
||||
blog_nav, post_meta = nav.build_blog_nav(lang, args)
|
||||
raw_config = dict(
|
||||
site_name=site_name,
|
||||
site_url=f'{website_url}/blog/{lang}/',
|
||||
site_url=f"{website_url}/blog/{lang}/",
|
||||
docs_dir=os.path.join(args.blog_dir, lang),
|
||||
site_dir=site_dir,
|
||||
strict=True,
|
||||
theme=theme_cfg,
|
||||
nav=blog_nav,
|
||||
copyright='©2016–2022 ClickHouse, Inc.',
|
||||
copyright="©2016–2022 ClickHouse, Inc.",
|
||||
use_directory_urls=True,
|
||||
repo_name='ClickHouse/ClickHouse',
|
||||
repo_url='https://github.com/ClickHouse/ClickHouse/',
|
||||
edit_uri=f'edit/master/website/blog/{lang}',
|
||||
repo_name="ClickHouse/ClickHouse",
|
||||
repo_url="https://github.com/ClickHouse/ClickHouse/",
|
||||
edit_uri=f"edit/master/website/blog/{lang}",
|
||||
markdown_extensions=mdx_clickhouse.MARKDOWN_EXTENSIONS,
|
||||
plugins=plugins,
|
||||
extra=dict(
|
||||
@ -75,12 +73,12 @@ def build_for_lang(lang, args):
|
||||
website_url=website_url,
|
||||
events=args.events,
|
||||
languages=languages,
|
||||
includes_dir=os.path.join(os.path.dirname(__file__), '..', '_includes'),
|
||||
includes_dir=os.path.join(os.path.dirname(__file__), "..", "_includes"),
|
||||
is_amp=False,
|
||||
is_blog=True,
|
||||
post_meta=post_meta,
|
||||
today=datetime.date.today().isoformat()
|
||||
)
|
||||
today=datetime.date.today().isoformat(),
|
||||
),
|
||||
)
|
||||
|
||||
cfg = config.load_config(**raw_config)
|
||||
@ -89,21 +87,28 @@ def build_for_lang(lang, args):
|
||||
redirects.build_blog_redirects(args)
|
||||
|
||||
env = util.init_jinja2_env(args)
|
||||
with open(os.path.join(args.website_dir, 'templates', 'blog', 'rss.xml'), 'rb') as f:
|
||||
rss_template_string = f.read().decode('utf-8').strip()
|
||||
with open(
|
||||
os.path.join(args.website_dir, "templates", "blog", "rss.xml"), "rb"
|
||||
) as f:
|
||||
rss_template_string = f.read().decode("utf-8").strip()
|
||||
rss_template = env.from_string(rss_template_string)
|
||||
with open(os.path.join(args.blog_output_dir, lang, 'rss.xml'), 'w') as f:
|
||||
f.write(rss_template.render({'config': raw_config}))
|
||||
with open(os.path.join(args.blog_output_dir, lang, "rss.xml"), "w") as f:
|
||||
f.write(rss_template.render({"config": raw_config}))
|
||||
|
||||
logging.info(f'Finished building {lang} blog')
|
||||
logging.info(f"Finished building {lang} blog")
|
||||
|
||||
except exceptions.ConfigurationError as e:
|
||||
raise SystemExit('\n' + str(e))
|
||||
raise SystemExit("\n" + str(e))
|
||||
|
||||
|
||||
def build_blog(args):
|
||||
tasks = []
|
||||
for lang in args.blog_lang.split(','):
|
||||
for lang in args.blog_lang.split(","):
|
||||
if lang:
|
||||
tasks.append((lang, args,))
|
||||
tasks.append(
|
||||
(
|
||||
lang,
|
||||
args,
|
||||
)
|
||||
)
|
||||
util.run_function_in_parallel(build_for_lang, tasks, threads=False)
|
||||
|
@ -30,76 +30,76 @@ import website
|
||||
|
||||
from cmake_in_clickhouse_generator import generate_cmake_flags_files
|
||||
|
||||
|
||||
class ClickHouseMarkdown(markdown.extensions.Extension):
|
||||
class ClickHousePreprocessor(markdown.util.Processor):
|
||||
def run(self, lines):
|
||||
for line in lines:
|
||||
if '<!--hide-->' not in line:
|
||||
if "<!--hide-->" not in line:
|
||||
yield line
|
||||
|
||||
def extendMarkdown(self, md):
|
||||
md.preprocessors.register(self.ClickHousePreprocessor(), 'clickhouse_preprocessor', 31)
|
||||
md.preprocessors.register(
|
||||
self.ClickHousePreprocessor(), "clickhouse_preprocessor", 31
|
||||
)
|
||||
|
||||
|
||||
markdown.extensions.ClickHouseMarkdown = ClickHouseMarkdown
|
||||
|
||||
|
||||
def build_for_lang(lang, args):
|
||||
logging.info(f'Building {lang} docs')
|
||||
os.environ['SINGLE_PAGE'] = '0'
|
||||
logging.info(f"Building {lang} docs")
|
||||
os.environ["SINGLE_PAGE"] = "0"
|
||||
|
||||
try:
|
||||
theme_cfg = {
|
||||
'name': None,
|
||||
'custom_dir': os.path.join(os.path.dirname(__file__), '..', args.theme_dir),
|
||||
'language': lang,
|
||||
'direction': 'rtl' if lang == 'fa' else 'ltr',
|
||||
'static_templates': ['404.html'],
|
||||
'extra': {
|
||||
'now': int(time.mktime(datetime.datetime.now().timetuple())) # TODO better way to avoid caching
|
||||
}
|
||||
"name": None,
|
||||
"custom_dir": os.path.join(os.path.dirname(__file__), "..", args.theme_dir),
|
||||
"language": lang,
|
||||
"direction": "rtl" if lang == "fa" else "ltr",
|
||||
"static_templates": ["404.html"],
|
||||
"extra": {
|
||||
"now": int(
|
||||
time.mktime(datetime.datetime.now().timetuple())
|
||||
) # TODO better way to avoid caching
|
||||
},
|
||||
}
|
||||
|
||||
# the following list of languages is sorted according to
|
||||
# https://en.wikipedia.org/wiki/List_of_languages_by_total_number_of_speakers
|
||||
languages = {
|
||||
'en': 'English',
|
||||
'zh': '中文',
|
||||
'ru': 'Русский',
|
||||
'ja': '日本語'
|
||||
}
|
||||
languages = {"en": "English", "zh": "中文", "ru": "Русский", "ja": "日本語"}
|
||||
|
||||
site_names = {
|
||||
'en': 'ClickHouse %s Documentation',
|
||||
'zh': 'ClickHouse文档 %s',
|
||||
'ru': 'Документация ClickHouse %s',
|
||||
'ja': 'ClickHouseドキュメント %s'
|
||||
"en": "ClickHouse %s Documentation",
|
||||
"zh": "ClickHouse文档 %s",
|
||||
"ru": "Документация ClickHouse %s",
|
||||
"ja": "ClickHouseドキュメント %s",
|
||||
}
|
||||
|
||||
assert len(site_names) == len(languages)
|
||||
|
||||
site_dir = os.path.join(args.docs_output_dir, lang)
|
||||
|
||||
plugins = ['macros']
|
||||
plugins = ["macros"]
|
||||
if args.htmlproofer:
|
||||
plugins.append('htmlproofer')
|
||||
plugins.append("htmlproofer")
|
||||
|
||||
website_url = 'https://clickhouse.com'
|
||||
site_name = site_names.get(lang, site_names['en']) % ''
|
||||
site_name = site_name.replace(' ', ' ')
|
||||
website_url = "https://clickhouse.com"
|
||||
site_name = site_names.get(lang, site_names["en"]) % ""
|
||||
site_name = site_name.replace(" ", " ")
|
||||
|
||||
raw_config = dict(
|
||||
site_name=site_name,
|
||||
site_url=f'{website_url}/docs/{lang}/',
|
||||
site_url=f"{website_url}/docs/{lang}/",
|
||||
docs_dir=os.path.join(args.docs_dir, lang),
|
||||
site_dir=site_dir,
|
||||
strict=True,
|
||||
theme=theme_cfg,
|
||||
copyright='©2016–2022 ClickHouse, Inc.',
|
||||
copyright="©2016–2022 ClickHouse, Inc.",
|
||||
use_directory_urls=True,
|
||||
repo_name='ClickHouse/ClickHouse',
|
||||
repo_url='https://github.com/ClickHouse/ClickHouse/',
|
||||
edit_uri=f'edit/master/docs/{lang}',
|
||||
repo_name="ClickHouse/ClickHouse",
|
||||
repo_url="https://github.com/ClickHouse/ClickHouse/",
|
||||
edit_uri=f"edit/master/docs/{lang}",
|
||||
markdown_extensions=mdx_clickhouse.MARKDOWN_EXTENSIONS,
|
||||
plugins=plugins,
|
||||
extra=dict(
|
||||
@ -111,16 +111,16 @@ def build_for_lang(lang, args):
|
||||
website_url=website_url,
|
||||
events=args.events,
|
||||
languages=languages,
|
||||
includes_dir=os.path.join(os.path.dirname(__file__), '..', '_includes'),
|
||||
includes_dir=os.path.join(os.path.dirname(__file__), "..", "_includes"),
|
||||
is_amp=False,
|
||||
is_blog=False
|
||||
)
|
||||
is_blog=False,
|
||||
),
|
||||
)
|
||||
|
||||
# Clean to be safe if last build finished abnormally
|
||||
single_page.remove_temporary_files(lang, args)
|
||||
|
||||
raw_config['nav'] = nav.build_docs_nav(lang, args)
|
||||
raw_config["nav"] = nav.build_docs_nav(lang, args)
|
||||
|
||||
cfg = config.load_config(**raw_config)
|
||||
|
||||
@ -131,21 +131,28 @@ def build_for_lang(lang, args):
|
||||
amp.build_amp(lang, args, cfg)
|
||||
|
||||
if not args.skip_single_page:
|
||||
single_page.build_single_page_version(lang, args, raw_config.get('nav'), cfg)
|
||||
single_page.build_single_page_version(
|
||||
lang, args, raw_config.get("nav"), cfg
|
||||
)
|
||||
|
||||
mdx_clickhouse.PatchedMacrosPlugin.disabled = False
|
||||
|
||||
logging.info(f'Finished building {lang} docs')
|
||||
logging.info(f"Finished building {lang} docs")
|
||||
|
||||
except exceptions.ConfigurationError as e:
|
||||
raise SystemExit('\n' + str(e))
|
||||
raise SystemExit("\n" + str(e))
|
||||
|
||||
|
||||
def build_docs(args):
|
||||
tasks = []
|
||||
for lang in args.lang.split(','):
|
||||
for lang in args.lang.split(","):
|
||||
if lang:
|
||||
tasks.append((lang, args,))
|
||||
tasks.append(
|
||||
(
|
||||
lang,
|
||||
args,
|
||||
)
|
||||
)
|
||||
util.run_function_in_parallel(build_for_lang, tasks, threads=False)
|
||||
redirects.build_docs_redirects(args)
|
||||
|
||||
@ -171,56 +178,64 @@ def build(args):
|
||||
redirects.build_static_redirects(args)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
os.chdir(os.path.join(os.path.dirname(__file__), '..'))
|
||||
if __name__ == "__main__":
|
||||
os.chdir(os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
# A root path to ClickHouse source code.
|
||||
src_dir = '..'
|
||||
src_dir = ".."
|
||||
|
||||
website_dir = os.path.join(src_dir, 'website')
|
||||
website_dir = os.path.join(src_dir, "website")
|
||||
|
||||
arg_parser = argparse.ArgumentParser()
|
||||
arg_parser.add_argument('--lang', default='en,ru,zh,ja')
|
||||
arg_parser.add_argument('--blog-lang', default='en')
|
||||
arg_parser.add_argument('--docs-dir', default='.')
|
||||
arg_parser.add_argument('--theme-dir', default=website_dir)
|
||||
arg_parser.add_argument('--website-dir', default=website_dir)
|
||||
arg_parser.add_argument('--src-dir', default=src_dir)
|
||||
arg_parser.add_argument('--blog-dir', default=os.path.join(website_dir, 'blog'))
|
||||
arg_parser.add_argument('--output-dir', default='build')
|
||||
arg_parser.add_argument('--nav-limit', type=int, default='0')
|
||||
arg_parser.add_argument('--skip-multi-page', action='store_true')
|
||||
arg_parser.add_argument('--skip-single-page', action='store_true')
|
||||
arg_parser.add_argument('--skip-amp', action='store_true')
|
||||
arg_parser.add_argument('--skip-website', action='store_true')
|
||||
arg_parser.add_argument('--skip-blog', action='store_true')
|
||||
arg_parser.add_argument('--skip-git-log', action='store_true')
|
||||
arg_parser.add_argument('--skip-docs', action='store_true')
|
||||
arg_parser.add_argument('--test-only', action='store_true')
|
||||
arg_parser.add_argument('--minify', action='store_true')
|
||||
arg_parser.add_argument('--htmlproofer', action='store_true')
|
||||
arg_parser.add_argument('--no-docs-macros', action='store_true')
|
||||
arg_parser.add_argument('--save-raw-single-page', type=str)
|
||||
arg_parser.add_argument('--livereload', type=int, default='0')
|
||||
arg_parser.add_argument('--verbose', action='store_true')
|
||||
arg_parser.add_argument("--lang", default="en,ru,zh,ja")
|
||||
arg_parser.add_argument("--blog-lang", default="en")
|
||||
arg_parser.add_argument("--docs-dir", default=".")
|
||||
arg_parser.add_argument("--theme-dir", default=website_dir)
|
||||
arg_parser.add_argument("--website-dir", default=website_dir)
|
||||
arg_parser.add_argument("--src-dir", default=src_dir)
|
||||
arg_parser.add_argument("--blog-dir", default=os.path.join(website_dir, "blog"))
|
||||
arg_parser.add_argument("--output-dir", default="build")
|
||||
arg_parser.add_argument("--nav-limit", type=int, default="0")
|
||||
arg_parser.add_argument("--skip-multi-page", action="store_true")
|
||||
arg_parser.add_argument("--skip-single-page", action="store_true")
|
||||
arg_parser.add_argument("--skip-amp", action="store_true")
|
||||
arg_parser.add_argument("--skip-website", action="store_true")
|
||||
arg_parser.add_argument("--skip-blog", action="store_true")
|
||||
arg_parser.add_argument("--skip-git-log", action="store_true")
|
||||
arg_parser.add_argument("--skip-docs", action="store_true")
|
||||
arg_parser.add_argument("--test-only", action="store_true")
|
||||
arg_parser.add_argument("--minify", action="store_true")
|
||||
arg_parser.add_argument("--htmlproofer", action="store_true")
|
||||
arg_parser.add_argument("--no-docs-macros", action="store_true")
|
||||
arg_parser.add_argument("--save-raw-single-page", type=str)
|
||||
arg_parser.add_argument("--livereload", type=int, default="0")
|
||||
arg_parser.add_argument("--verbose", action="store_true")
|
||||
|
||||
args = arg_parser.parse_args()
|
||||
args.minify = False # TODO remove
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG if args.verbose else logging.INFO,
|
||||
stream=sys.stderr
|
||||
level=logging.DEBUG if args.verbose else logging.INFO, stream=sys.stderr
|
||||
)
|
||||
|
||||
logging.getLogger('MARKDOWN').setLevel(logging.INFO)
|
||||
logging.getLogger("MARKDOWN").setLevel(logging.INFO)
|
||||
|
||||
args.docs_output_dir = os.path.join(os.path.abspath(args.output_dir), 'docs')
|
||||
args.blog_output_dir = os.path.join(os.path.abspath(args.output_dir), 'blog')
|
||||
args.docs_output_dir = os.path.join(os.path.abspath(args.output_dir), "docs")
|
||||
args.blog_output_dir = os.path.join(os.path.abspath(args.output_dir), "blog")
|
||||
|
||||
from github import get_events
|
||||
args.rev = subprocess.check_output('git rev-parse HEAD', shell=True).decode('utf-8').strip()
|
||||
args.rev_short = subprocess.check_output('git rev-parse --short HEAD', shell=True).decode('utf-8').strip()
|
||||
args.rev_url = f'https://github.com/ClickHouse/ClickHouse/commit/{args.rev}'
|
||||
|
||||
args.rev = (
|
||||
subprocess.check_output("git rev-parse HEAD", shell=True)
|
||||
.decode("utf-8")
|
||||
.strip()
|
||||
)
|
||||
args.rev_short = (
|
||||
subprocess.check_output("git rev-parse --short HEAD", shell=True)
|
||||
.decode("utf-8")
|
||||
.strip()
|
||||
)
|
||||
args.rev_url = f"https://github.com/ClickHouse/ClickHouse/commit/{args.rev}"
|
||||
args.events = get_events(args)
|
||||
|
||||
if args.test_only:
|
||||
@ -233,18 +248,20 @@ if __name__ == '__main__':
|
||||
mdx_clickhouse.PatchedMacrosPlugin.skip_git_log = True
|
||||
|
||||
from build import build
|
||||
|
||||
build(args)
|
||||
|
||||
if args.livereload:
|
||||
new_args = [arg for arg in sys.argv if not arg.startswith('--livereload')]
|
||||
new_args = sys.executable + ' ' + ' '.join(new_args)
|
||||
new_args = [arg for arg in sys.argv if not arg.startswith("--livereload")]
|
||||
new_args = sys.executable + " " + " ".join(new_args)
|
||||
|
||||
server = livereload.Server()
|
||||
server.watch(args.docs_dir + '**/*', livereload.shell(new_args, cwd='tools', shell=True))
|
||||
server.watch(args.website_dir + '**/*', livereload.shell(new_args, cwd='tools', shell=True))
|
||||
server.serve(
|
||||
root=args.output_dir,
|
||||
host='0.0.0.0',
|
||||
port=args.livereload
|
||||
server.watch(
|
||||
args.docs_dir + "**/*", livereload.shell(new_args, cwd="tools", shell=True)
|
||||
)
|
||||
server.watch(
|
||||
args.website_dir + "**/*",
|
||||
livereload.shell(new_args, cwd="tools", shell=True),
|
||||
)
|
||||
server.serve(root=args.output_dir, host="0.0.0.0", port=args.livereload)
|
||||
sys.exit(0)
|
||||
|
@ -6,11 +6,13 @@ from typing import TextIO, List, Tuple, Optional, Dict
|
||||
Entity = Tuple[str, str, str]
|
||||
|
||||
# https://regex101.com/r/R6iogw/12
|
||||
cmake_option_regex: str = r"^\s*option\s*\(([A-Z_0-9${}]+)\s*(?:\"((?:.|\n)*?)\")?\s*(.*)?\).*$"
|
||||
cmake_option_regex: str = (
|
||||
r"^\s*option\s*\(([A-Z_0-9${}]+)\s*(?:\"((?:.|\n)*?)\")?\s*(.*)?\).*$"
|
||||
)
|
||||
|
||||
ch_master_url: str = "https://github.com/clickhouse/clickhouse/blob/master/"
|
||||
|
||||
name_str: str = "<a name=\"{anchor}\"></a>[`{name}`](" + ch_master_url + "{path}#L{line})"
|
||||
name_str: str = '<a name="{anchor}"></a>[`{name}`](' + ch_master_url + "{path}#L{line})"
|
||||
default_anchor_str: str = "[`{name}`](#{anchor})"
|
||||
|
||||
comment_var_regex: str = r"\${(.+)}"
|
||||
@ -27,11 +29,15 @@ entities: Dict[str, Tuple[str, str]] = {}
|
||||
|
||||
|
||||
def make_anchor(t: str) -> str:
|
||||
return "".join(["-" if i == "_" else i.lower() for i in t if i.isalpha() or i == "_"])
|
||||
return "".join(
|
||||
["-" if i == "_" else i.lower() for i in t if i.isalpha() or i == "_"]
|
||||
)
|
||||
|
||||
|
||||
def process_comment(comment: str) -> str:
|
||||
return re.sub(comment_var_regex, comment_var_replace, comment, flags=re.MULTILINE)
|
||||
|
||||
|
||||
def build_entity(path: str, entity: Entity, line_comment: Tuple[int, str]) -> None:
|
||||
(line, comment) = line_comment
|
||||
(name, description, default) = entity
|
||||
@ -47,22 +53,22 @@ def build_entity(path: str, entity: Entity, line_comment: Tuple[int, str]) -> No
|
||||
formatted_default: str = "`" + default + "`"
|
||||
|
||||
formatted_name: str = name_str.format(
|
||||
anchor=make_anchor(name),
|
||||
name=name,
|
||||
path=path,
|
||||
line=line)
|
||||
anchor=make_anchor(name), name=name, path=path, line=line
|
||||
)
|
||||
|
||||
formatted_description: str = "".join(description.split("\n"))
|
||||
|
||||
formatted_comment: str = process_comment(comment)
|
||||
|
||||
formatted_entity: str = "| {} | {} | {} | {} |".format(
|
||||
formatted_name, formatted_default, formatted_description, formatted_comment)
|
||||
formatted_name, formatted_default, formatted_description, formatted_comment
|
||||
)
|
||||
|
||||
entities[name] = path, formatted_entity
|
||||
|
||||
|
||||
def process_file(root_path: str, file_path: str, file_name: str) -> None:
|
||||
with open(os.path.join(file_path, file_name), 'r') as cmake_file:
|
||||
with open(os.path.join(file_path, file_name), "r") as cmake_file:
|
||||
contents: str = cmake_file.read()
|
||||
|
||||
def get_line_and_comment(target: str) -> Tuple[int, str]:
|
||||
@ -70,10 +76,10 @@ def process_file(root_path: str, file_path: str, file_name: str) -> None:
|
||||
comment: str = ""
|
||||
|
||||
for n, line in enumerate(contents_list):
|
||||
if 'option' not in line.lower() or target not in line:
|
||||
if "option" not in line.lower() or target not in line:
|
||||
continue
|
||||
|
||||
for maybe_comment_line in contents_list[n - 1::-1]:
|
||||
for maybe_comment_line in contents_list[n - 1 :: -1]:
|
||||
if not re.match("\s*#\s*", maybe_comment_line):
|
||||
break
|
||||
|
||||
@ -82,16 +88,22 @@ def process_file(root_path: str, file_path: str, file_name: str) -> None:
|
||||
# line numbering starts with 1
|
||||
return n + 1, comment
|
||||
|
||||
matches: Optional[List[Entity]] = re.findall(cmake_option_regex, contents, re.MULTILINE)
|
||||
matches: Optional[List[Entity]] = re.findall(
|
||||
cmake_option_regex, contents, re.MULTILINE
|
||||
)
|
||||
|
||||
|
||||
file_rel_path_with_name: str = os.path.join(file_path[len(root_path):], file_name)
|
||||
if file_rel_path_with_name.startswith('/'):
|
||||
file_rel_path_with_name: str = os.path.join(
|
||||
file_path[len(root_path) :], file_name
|
||||
)
|
||||
if file_rel_path_with_name.startswith("/"):
|
||||
file_rel_path_with_name = file_rel_path_with_name[1:]
|
||||
|
||||
if matches:
|
||||
for entity in matches:
|
||||
build_entity(file_rel_path_with_name, entity, get_line_and_comment(entity[0]))
|
||||
build_entity(
|
||||
file_rel_path_with_name, entity, get_line_and_comment(entity[0])
|
||||
)
|
||||
|
||||
|
||||
def process_folder(root_path: str, name: str) -> None:
|
||||
for root, _, files in os.walk(os.path.join(root_path, name)):
|
||||
@ -99,12 +111,19 @@ def process_folder(root_path: str, name: str) -> None:
|
||||
if f == "CMakeLists.txt" or ".cmake" in f:
|
||||
process_file(root_path, root, f)
|
||||
|
||||
def generate_cmake_flags_files() -> None:
|
||||
root_path: str = os.path.join(os.path.dirname(__file__), '..', '..')
|
||||
|
||||
output_file_name: str = os.path.join(root_path, "docs/en/development/cmake-in-clickhouse.md")
|
||||
header_file_name: str = os.path.join(root_path, "docs/_includes/cmake_in_clickhouse_header.md")
|
||||
footer_file_name: str = os.path.join(root_path, "docs/_includes/cmake_in_clickhouse_footer.md")
|
||||
def generate_cmake_flags_files() -> None:
|
||||
root_path: str = os.path.join(os.path.dirname(__file__), "..", "..")
|
||||
|
||||
output_file_name: str = os.path.join(
|
||||
root_path, "docs/en/development/cmake-in-clickhouse.md"
|
||||
)
|
||||
header_file_name: str = os.path.join(
|
||||
root_path, "docs/_includes/cmake_in_clickhouse_header.md"
|
||||
)
|
||||
footer_file_name: str = os.path.join(
|
||||
root_path, "docs/_includes/cmake_in_clickhouse_footer.md"
|
||||
)
|
||||
|
||||
process_file(root_path, root_path, "CMakeLists.txt")
|
||||
process_file(root_path, os.path.join(root_path, "programs"), "CMakeLists.txt")
|
||||
@ -127,8 +146,10 @@ def generate_cmake_flags_files() -> None:
|
||||
f.write(entities[k][1] + "\n")
|
||||
ignored_keys.append(k)
|
||||
|
||||
f.write("\n### External libraries\nNote that ClickHouse uses forks of these libraries, see https://github.com/ClickHouse-Extras.\n" +
|
||||
table_header)
|
||||
f.write(
|
||||
"\n### External libraries\nNote that ClickHouse uses forks of these libraries, see https://github.com/ClickHouse-Extras.\n"
|
||||
+ table_header
|
||||
)
|
||||
|
||||
for k in sorted_keys:
|
||||
if k.startswith("ENABLE_") and ".cmake" in entities[k][0]:
|
||||
@ -143,15 +164,18 @@ def generate_cmake_flags_files() -> None:
|
||||
with open(footer_file_name, "r") as footer:
|
||||
f.write(footer.read())
|
||||
|
||||
other_languages = ["docs/ja/development/cmake-in-clickhouse.md",
|
||||
"docs/zh/development/cmake-in-clickhouse.md",
|
||||
"docs/ru/development/cmake-in-clickhouse.md"]
|
||||
other_languages = [
|
||||
"docs/ja/development/cmake-in-clickhouse.md",
|
||||
"docs/zh/development/cmake-in-clickhouse.md",
|
||||
"docs/ru/development/cmake-in-clickhouse.md",
|
||||
]
|
||||
|
||||
for lang in other_languages:
|
||||
other_file_name = os.path.join(root_path, lang)
|
||||
if os.path.exists(other_file_name):
|
||||
os.unlink(other_file_name)
|
||||
os.unlink(other_file_name)
|
||||
os.symlink(output_file_name, other_file_name)
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
if __name__ == "__main__":
|
||||
generate_cmake_flags_files()
|
||||
|
@ -8,7 +8,7 @@ import contextlib
|
||||
from git import cmd
|
||||
from tempfile import NamedTemporaryFile
|
||||
|
||||
SCRIPT_DESCRIPTION = '''
|
||||
SCRIPT_DESCRIPTION = """
|
||||
usage: ./easy_diff.py language/document path
|
||||
|
||||
Show the difference between a language document and an English document.
|
||||
@ -53,16 +53,16 @@ SCRIPT_DESCRIPTION = '''
|
||||
OPTIONS:
|
||||
-h, --help show this help message and exit
|
||||
--no-pager use stdout as difference result output
|
||||
'''
|
||||
"""
|
||||
|
||||
SCRIPT_PATH = os.path.abspath(__file__)
|
||||
CLICKHOUSE_REPO_HOME = os.path.join(os.path.dirname(SCRIPT_PATH), '..', '..')
|
||||
CLICKHOUSE_REPO_HOME = os.path.join(os.path.dirname(SCRIPT_PATH), "..", "..")
|
||||
SCRIPT_COMMAND_EXECUTOR = cmd.Git(CLICKHOUSE_REPO_HOME)
|
||||
|
||||
SCRIPT_COMMAND_PARSER = argparse.ArgumentParser(add_help=False)
|
||||
SCRIPT_COMMAND_PARSER.add_argument('path', type=bytes, nargs='?', default=None)
|
||||
SCRIPT_COMMAND_PARSER.add_argument('--no-pager', action='store_true', default=False)
|
||||
SCRIPT_COMMAND_PARSER.add_argument('-h', '--help', action='store_true', default=False)
|
||||
SCRIPT_COMMAND_PARSER.add_argument("path", type=bytes, nargs="?", default=None)
|
||||
SCRIPT_COMMAND_PARSER.add_argument("--no-pager", action="store_true", default=False)
|
||||
SCRIPT_COMMAND_PARSER.add_argument("-h", "--help", action="store_true", default=False)
|
||||
|
||||
|
||||
def execute(commands):
|
||||
@ -70,19 +70,41 @@ def execute(commands):
|
||||
|
||||
|
||||
def get_hash(file_name):
|
||||
return execute(['git', 'log', '-n', '1', '--pretty=format:"%H"', file_name])
|
||||
return execute(["git", "log", "-n", "1", '--pretty=format:"%H"', file_name])
|
||||
|
||||
|
||||
def diff_file(reference_file, working_file, out):
|
||||
if not os.path.exists(reference_file):
|
||||
raise RuntimeError('reference file [' + os.path.abspath(reference_file) + '] is not exists.')
|
||||
raise RuntimeError(
|
||||
"reference file [" + os.path.abspath(reference_file) + "] is not exists."
|
||||
)
|
||||
|
||||
if os.path.islink(working_file):
|
||||
out.writelines(["Need translate document:" + os.path.abspath(reference_file)])
|
||||
elif not os.path.exists(working_file):
|
||||
out.writelines(['Need link document ' + os.path.abspath(reference_file) + ' to ' + os.path.abspath(working_file)])
|
||||
out.writelines(
|
||||
[
|
||||
"Need link document "
|
||||
+ os.path.abspath(reference_file)
|
||||
+ " to "
|
||||
+ os.path.abspath(working_file)
|
||||
]
|
||||
)
|
||||
elif get_hash(working_file) != get_hash(reference_file):
|
||||
out.writelines([(execute(['git', 'diff', get_hash(working_file).strip('"'), reference_file]).encode('utf-8'))])
|
||||
out.writelines(
|
||||
[
|
||||
(
|
||||
execute(
|
||||
[
|
||||
"git",
|
||||
"diff",
|
||||
get_hash(working_file).strip('"'),
|
||||
reference_file,
|
||||
]
|
||||
).encode("utf-8")
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
return 0
|
||||
|
||||
@ -94,20 +116,30 @@ def diff_directory(reference_directory, working_directory, out):
|
||||
for list_item in os.listdir(reference_directory):
|
||||
working_item = os.path.join(working_directory, list_item)
|
||||
reference_item = os.path.join(reference_directory, list_item)
|
||||
if diff_file(reference_item, working_item, out) if os.path.isfile(reference_item) else diff_directory(reference_item, working_item, out) != 0:
|
||||
if (
|
||||
diff_file(reference_item, working_item, out)
|
||||
if os.path.isfile(reference_item)
|
||||
else diff_directory(reference_item, working_item, out) != 0
|
||||
):
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def find_language_doc(custom_document, other_language='en', children=[]):
|
||||
def find_language_doc(custom_document, other_language="en", children=[]):
|
||||
if len(custom_document) == 0:
|
||||
raise RuntimeError('The ' + os.path.join(custom_document, *children) + " is not in docs directory.")
|
||||
raise RuntimeError(
|
||||
"The "
|
||||
+ os.path.join(custom_document, *children)
|
||||
+ " is not in docs directory."
|
||||
)
|
||||
|
||||
if os.path.samefile(os.path.join(CLICKHOUSE_REPO_HOME, 'docs'), custom_document):
|
||||
return os.path.join(CLICKHOUSE_REPO_HOME, 'docs', other_language, *children[1:])
|
||||
if os.path.samefile(os.path.join(CLICKHOUSE_REPO_HOME, "docs"), custom_document):
|
||||
return os.path.join(CLICKHOUSE_REPO_HOME, "docs", other_language, *children[1:])
|
||||
children.insert(0, os.path.split(custom_document)[1])
|
||||
return find_language_doc(os.path.split(custom_document)[0], other_language, children)
|
||||
return find_language_doc(
|
||||
os.path.split(custom_document)[0], other_language, children
|
||||
)
|
||||
|
||||
|
||||
class ToPager:
|
||||
@ -119,7 +151,7 @@ class ToPager:
|
||||
|
||||
def close(self):
|
||||
self.temp_named_file.flush()
|
||||
git_pager = execute(['git', 'var', 'GIT_PAGER'])
|
||||
git_pager = execute(["git", "var", "GIT_PAGER"])
|
||||
subprocess.check_call([git_pager, self.temp_named_file.name])
|
||||
self.temp_named_file.close()
|
||||
|
||||
@ -135,12 +167,20 @@ class ToStdOut:
|
||||
self.system_stdout_stream = system_stdout_stream
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
arguments = SCRIPT_COMMAND_PARSER.parse_args()
|
||||
if arguments.help or not arguments.path:
|
||||
sys.stdout.write(SCRIPT_DESCRIPTION)
|
||||
sys.exit(0)
|
||||
|
||||
working_language = os.path.join(CLICKHOUSE_REPO_HOME, 'docs', arguments.path)
|
||||
with contextlib.closing(ToStdOut(sys.stdout) if arguments.no_pager else ToPager(NamedTemporaryFile('r+'))) as writer:
|
||||
exit(diff_directory(find_language_doc(working_language), working_language, writer))
|
||||
working_language = os.path.join(CLICKHOUSE_REPO_HOME, "docs", arguments.path)
|
||||
with contextlib.closing(
|
||||
ToStdOut(sys.stdout)
|
||||
if arguments.no_pager
|
||||
else ToPager(NamedTemporaryFile("r+"))
|
||||
) as writer:
|
||||
exit(
|
||||
diff_directory(
|
||||
find_language_doc(working_language), working_language, writer
|
||||
)
|
||||
)
|
||||
|
@ -16,27 +16,26 @@ import util
|
||||
def get_events(args):
|
||||
events = []
|
||||
skip = True
|
||||
with open(os.path.join(args.docs_dir, '..', 'README.md')) as f:
|
||||
with open(os.path.join(args.docs_dir, "..", "README.md")) as f:
|
||||
for line in f:
|
||||
if skip:
|
||||
if 'Upcoming Events' in line:
|
||||
if "Upcoming Events" in line:
|
||||
skip = False
|
||||
else:
|
||||
if not line:
|
||||
continue
|
||||
line = line.strip().split('](')
|
||||
line = line.strip().split("](")
|
||||
if len(line) == 2:
|
||||
tail = line[1].split(') ')
|
||||
events.append({
|
||||
'signup_link': tail[0],
|
||||
'event_name': line[0].replace('* [', ''),
|
||||
'event_date': tail[1].replace('on ', '').replace('.', '')
|
||||
})
|
||||
tail = line[1].split(") ")
|
||||
events.append(
|
||||
{
|
||||
"signup_link": tail[0],
|
||||
"event_name": line[0].replace("* [", ""),
|
||||
"event_date": tail[1].replace("on ", "").replace(".", ""),
|
||||
}
|
||||
)
|
||||
return events
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
stream=sys.stderr
|
||||
)
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.DEBUG, stream=sys.stderr)
|
||||
|
@ -16,74 +16,79 @@ import slugify as slugify_impl
|
||||
|
||||
|
||||
def slugify(value, separator):
|
||||
return slugify_impl.slugify(value, separator=separator, word_boundary=True, save_order=True)
|
||||
return slugify_impl.slugify(
|
||||
value, separator=separator, word_boundary=True, save_order=True
|
||||
)
|
||||
|
||||
|
||||
MARKDOWN_EXTENSIONS = [
|
||||
'mdx_clickhouse',
|
||||
'admonition',
|
||||
'attr_list',
|
||||
'def_list',
|
||||
'codehilite',
|
||||
'nl2br',
|
||||
'sane_lists',
|
||||
'pymdownx.details',
|
||||
'pymdownx.magiclink',
|
||||
'pymdownx.superfences',
|
||||
'extra',
|
||||
{
|
||||
'toc': {
|
||||
'permalink': True,
|
||||
'slugify': slugify
|
||||
}
|
||||
}
|
||||
"mdx_clickhouse",
|
||||
"admonition",
|
||||
"attr_list",
|
||||
"def_list",
|
||||
"codehilite",
|
||||
"nl2br",
|
||||
"sane_lists",
|
||||
"pymdownx.details",
|
||||
"pymdownx.magiclink",
|
||||
"pymdownx.superfences",
|
||||
"extra",
|
||||
{"toc": {"permalink": True, "slugify": slugify}},
|
||||
]
|
||||
|
||||
|
||||
class ClickHouseLinkMixin(object):
|
||||
|
||||
def handleMatch(self, m, data):
|
||||
single_page = (os.environ.get('SINGLE_PAGE') == '1')
|
||||
single_page = os.environ.get("SINGLE_PAGE") == "1"
|
||||
try:
|
||||
el, start, end = super(ClickHouseLinkMixin, self).handleMatch(m, data)
|
||||
except IndexError:
|
||||
return
|
||||
|
||||
if el is not None:
|
||||
href = el.get('href') or ''
|
||||
is_external = href.startswith('http:') or href.startswith('https:')
|
||||
href = el.get("href") or ""
|
||||
is_external = href.startswith("http:") or href.startswith("https:")
|
||||
if is_external:
|
||||
if not href.startswith('https://clickhouse.com'):
|
||||
el.set('rel', 'external nofollow noreferrer')
|
||||
if not href.startswith("https://clickhouse.com"):
|
||||
el.set("rel", "external nofollow noreferrer")
|
||||
elif single_page:
|
||||
if '#' in href:
|
||||
el.set('href', '#' + href.split('#', 1)[1])
|
||||
if "#" in href:
|
||||
el.set("href", "#" + href.split("#", 1)[1])
|
||||
else:
|
||||
el.set('href', '#' + href.replace('/index.md', '/').replace('.md', '/'))
|
||||
el.set(
|
||||
"href", "#" + href.replace("/index.md", "/").replace(".md", "/")
|
||||
)
|
||||
return el, start, end
|
||||
|
||||
|
||||
class ClickHouseAutolinkPattern(ClickHouseLinkMixin, markdown.inlinepatterns.AutolinkInlineProcessor):
|
||||
class ClickHouseAutolinkPattern(
|
||||
ClickHouseLinkMixin, markdown.inlinepatterns.AutolinkInlineProcessor
|
||||
):
|
||||
pass
|
||||
|
||||
|
||||
class ClickHouseLinkPattern(ClickHouseLinkMixin, markdown.inlinepatterns.LinkInlineProcessor):
|
||||
class ClickHouseLinkPattern(
|
||||
ClickHouseLinkMixin, markdown.inlinepatterns.LinkInlineProcessor
|
||||
):
|
||||
pass
|
||||
|
||||
|
||||
class ClickHousePreprocessor(markdown.util.Processor):
|
||||
def run(self, lines):
|
||||
for line in lines:
|
||||
if '<!--hide-->' not in line:
|
||||
if "<!--hide-->" not in line:
|
||||
yield line
|
||||
|
||||
|
||||
class ClickHouseMarkdown(markdown.extensions.Extension):
|
||||
|
||||
def extendMarkdown(self, md, md_globals):
|
||||
md.preprocessors['clickhouse'] = ClickHousePreprocessor()
|
||||
md.inlinePatterns['link'] = ClickHouseLinkPattern(markdown.inlinepatterns.LINK_RE, md)
|
||||
md.inlinePatterns['autolink'] = ClickHouseAutolinkPattern(markdown.inlinepatterns.AUTOLINK_RE, md)
|
||||
md.preprocessors["clickhouse"] = ClickHousePreprocessor()
|
||||
md.inlinePatterns["link"] = ClickHouseLinkPattern(
|
||||
markdown.inlinepatterns.LINK_RE, md
|
||||
)
|
||||
md.inlinePatterns["autolink"] = ClickHouseAutolinkPattern(
|
||||
markdown.inlinepatterns.AUTOLINK_RE, md
|
||||
)
|
||||
|
||||
|
||||
def makeExtension(**kwargs):
|
||||
@ -92,10 +97,8 @@ def makeExtension(**kwargs):
|
||||
|
||||
def get_translations(dirname, lang):
|
||||
import babel.support
|
||||
return babel.support.Translations.load(
|
||||
dirname=dirname,
|
||||
locales=[lang, 'en']
|
||||
)
|
||||
|
||||
return babel.support.Translations.load(dirname=dirname, locales=[lang, "en"])
|
||||
|
||||
|
||||
class PatchedMacrosPlugin(macros.plugin.MacrosPlugin):
|
||||
@ -104,22 +107,22 @@ class PatchedMacrosPlugin(macros.plugin.MacrosPlugin):
|
||||
|
||||
def on_config(self, config):
|
||||
super(PatchedMacrosPlugin, self).on_config(config)
|
||||
self.env.comment_start_string = '{##'
|
||||
self.env.comment_end_string = '##}'
|
||||
self.env.loader = jinja2.FileSystemLoader([
|
||||
os.path.join(config.data['site_dir']),
|
||||
os.path.join(config.data['extra']['includes_dir'])
|
||||
])
|
||||
self.env.comment_start_string = "{##"
|
||||
self.env.comment_end_string = "##}"
|
||||
self.env.loader = jinja2.FileSystemLoader(
|
||||
[
|
||||
os.path.join(config.data["site_dir"]),
|
||||
os.path.join(config.data["extra"]["includes_dir"]),
|
||||
]
|
||||
)
|
||||
|
||||
def on_env(self, env, config, files):
|
||||
import util
|
||||
env.add_extension('jinja2.ext.i18n')
|
||||
dirname = os.path.join(config.data['theme'].dirs[0], 'locale')
|
||||
lang = config.data['theme']['language']
|
||||
env.install_gettext_translations(
|
||||
get_translations(dirname, lang),
|
||||
newstyle=True
|
||||
)
|
||||
|
||||
env.add_extension("jinja2.ext.i18n")
|
||||
dirname = os.path.join(config.data["theme"].dirs[0], "locale")
|
||||
lang = config.data["theme"]["language"]
|
||||
env.install_gettext_translations(get_translations(dirname, lang), newstyle=True)
|
||||
util.init_jinja2_filters(env)
|
||||
return env
|
||||
|
||||
@ -130,13 +133,17 @@ class PatchedMacrosPlugin(macros.plugin.MacrosPlugin):
|
||||
return markdown
|
||||
|
||||
def on_page_markdown(self, markdown, page, config, files):
|
||||
markdown = super(PatchedMacrosPlugin, self).on_page_markdown(markdown, page, config, files)
|
||||
markdown = super(PatchedMacrosPlugin, self).on_page_markdown(
|
||||
markdown, page, config, files
|
||||
)
|
||||
|
||||
if os.path.islink(page.file.abs_src_path):
|
||||
lang = config.data['theme']['language']
|
||||
page.canonical_url = page.canonical_url.replace(f'/{lang}/', '/en/', 1)
|
||||
lang = config.data["theme"]["language"]
|
||||
page.canonical_url = page.canonical_url.replace(f"/{lang}/", "/en/", 1)
|
||||
|
||||
if config.data['extra'].get('version_prefix') or config.data['extra'].get('single_page'):
|
||||
if config.data["extra"].get("version_prefix") or config.data["extra"].get(
|
||||
"single_page"
|
||||
):
|
||||
return markdown
|
||||
if self.skip_git_log:
|
||||
return markdown
|
||||
|
@ -10,57 +10,59 @@ import util
|
||||
|
||||
|
||||
def find_first_header(content):
|
||||
for line in content.split('\n'):
|
||||
if line.startswith('#'):
|
||||
no_hash = line.lstrip('#')
|
||||
return no_hash.split('{', 1)[0].strip()
|
||||
for line in content.split("\n"):
|
||||
if line.startswith("#"):
|
||||
no_hash = line.lstrip("#")
|
||||
return no_hash.split("{", 1)[0].strip()
|
||||
|
||||
|
||||
def build_nav_entry(root, args):
|
||||
if root.endswith('images'):
|
||||
if root.endswith("images"):
|
||||
return None, None, None
|
||||
result_items = []
|
||||
index_meta, index_content = util.read_md_file(os.path.join(root, 'index.md'))
|
||||
current_title = index_meta.get('toc_folder_title', index_meta.get('toc_title'))
|
||||
current_title = current_title or index_meta.get('title', find_first_header(index_content))
|
||||
index_meta, index_content = util.read_md_file(os.path.join(root, "index.md"))
|
||||
current_title = index_meta.get("toc_folder_title", index_meta.get("toc_title"))
|
||||
current_title = current_title or index_meta.get(
|
||||
"title", find_first_header(index_content)
|
||||
)
|
||||
for filename in os.listdir(root):
|
||||
path = os.path.join(root, filename)
|
||||
if os.path.isdir(path):
|
||||
prio, title, payload = build_nav_entry(path, args)
|
||||
if title and payload:
|
||||
result_items.append((prio, title, payload))
|
||||
elif filename.endswith('.md'):
|
||||
elif filename.endswith(".md"):
|
||||
path = os.path.join(root, filename)
|
||||
|
||||
meta = ''
|
||||
content = ''
|
||||
meta = ""
|
||||
content = ""
|
||||
|
||||
try:
|
||||
meta, content = util.read_md_file(path)
|
||||
except:
|
||||
print('Error in file: {}'.format(path))
|
||||
print("Error in file: {}".format(path))
|
||||
raise
|
||||
|
||||
path = path.split('/', 2)[-1]
|
||||
title = meta.get('toc_title', find_first_header(content))
|
||||
path = path.split("/", 2)[-1]
|
||||
title = meta.get("toc_title", find_first_header(content))
|
||||
if title:
|
||||
title = title.strip().rstrip('.')
|
||||
title = title.strip().rstrip(".")
|
||||
else:
|
||||
title = meta.get('toc_folder_title', 'hidden')
|
||||
prio = meta.get('toc_priority', 9999)
|
||||
logging.debug(f'Nav entry: {prio}, {title}, {path}')
|
||||
if meta.get('toc_hidden') or not content.strip():
|
||||
title = 'hidden'
|
||||
if title == 'hidden':
|
||||
title = 'hidden-' + hashlib.sha1(content.encode('utf-8')).hexdigest()
|
||||
title = meta.get("toc_folder_title", "hidden")
|
||||
prio = meta.get("toc_priority", 9999)
|
||||
logging.debug(f"Nav entry: {prio}, {title}, {path}")
|
||||
if meta.get("toc_hidden") or not content.strip():
|
||||
title = "hidden"
|
||||
if title == "hidden":
|
||||
title = "hidden-" + hashlib.sha1(content.encode("utf-8")).hexdigest()
|
||||
if args.nav_limit and len(result_items) >= args.nav_limit:
|
||||
break
|
||||
result_items.append((prio, title, path))
|
||||
result_items = sorted(result_items, key=lambda x: (x[0], x[1]))
|
||||
result = collections.OrderedDict([(item[1], item[2]) for item in result_items])
|
||||
if index_meta.get('toc_hidden_folder'):
|
||||
current_title += '|hidden-folder'
|
||||
return index_meta.get('toc_priority', 10000), current_title, result
|
||||
if index_meta.get("toc_hidden_folder"):
|
||||
current_title += "|hidden-folder"
|
||||
return index_meta.get("toc_priority", 10000), current_title, result
|
||||
|
||||
|
||||
def build_docs_nav(lang, args):
|
||||
@ -70,7 +72,7 @@ def build_docs_nav(lang, args):
|
||||
index_key = None
|
||||
for key, value in list(nav.items()):
|
||||
if key and value:
|
||||
if value == 'index.md':
|
||||
if value == "index.md":
|
||||
index_key = key
|
||||
continue
|
||||
result.append({key: value})
|
||||
@ -78,7 +80,7 @@ def build_docs_nav(lang, args):
|
||||
break
|
||||
if index_key:
|
||||
key = list(result[0].keys())[0]
|
||||
result[0][key][index_key] = 'index.md'
|
||||
result[0][key][index_key] = "index.md"
|
||||
result[0][key].move_to_end(index_key, last=False)
|
||||
return result
|
||||
|
||||
@ -86,7 +88,7 @@ def build_docs_nav(lang, args):
|
||||
def build_blog_nav(lang, args):
|
||||
blog_dir = os.path.join(args.blog_dir, lang)
|
||||
years = sorted(os.listdir(blog_dir), reverse=True)
|
||||
result_nav = [{'hidden': 'index.md'}]
|
||||
result_nav = [{"hidden": "index.md"}]
|
||||
post_meta = collections.OrderedDict()
|
||||
for year in years:
|
||||
year_dir = os.path.join(blog_dir, year)
|
||||
@ -97,38 +99,53 @@ def build_blog_nav(lang, args):
|
||||
post_meta_items = []
|
||||
for post in os.listdir(year_dir):
|
||||
post_path = os.path.join(year_dir, post)
|
||||
if not post.endswith('.md'):
|
||||
raise RuntimeError(f'Unexpected non-md file in posts folder: {post_path}')
|
||||
if not post.endswith(".md"):
|
||||
raise RuntimeError(
|
||||
f"Unexpected non-md file in posts folder: {post_path}"
|
||||
)
|
||||
meta, _ = util.read_md_file(post_path)
|
||||
post_date = meta['date']
|
||||
post_title = meta['title']
|
||||
post_date = meta["date"]
|
||||
post_title = meta["title"]
|
||||
if datetime.date.fromisoformat(post_date) > datetime.date.today():
|
||||
continue
|
||||
posts.append(
|
||||
(post_date, post_title, os.path.join(year, post),)
|
||||
(
|
||||
post_date,
|
||||
post_title,
|
||||
os.path.join(year, post),
|
||||
)
|
||||
)
|
||||
if post_title in post_meta:
|
||||
raise RuntimeError(f'Duplicate post title: {post_title}')
|
||||
if not post_date.startswith(f'{year}-'):
|
||||
raise RuntimeError(f'Post date {post_date} doesn\'t match the folder year {year}: {post_title}')
|
||||
post_url_part = post.replace('.md', '')
|
||||
post_meta_items.append((post_date, {
|
||||
'date': post_date,
|
||||
'title': post_title,
|
||||
'image': meta.get('image'),
|
||||
'url': f'/blog/{lang}/{year}/{post_url_part}/'
|
||||
},))
|
||||
raise RuntimeError(f"Duplicate post title: {post_title}")
|
||||
if not post_date.startswith(f"{year}-"):
|
||||
raise RuntimeError(
|
||||
f"Post date {post_date} doesn't match the folder year {year}: {post_title}"
|
||||
)
|
||||
post_url_part = post.replace(".md", "")
|
||||
post_meta_items.append(
|
||||
(
|
||||
post_date,
|
||||
{
|
||||
"date": post_date,
|
||||
"title": post_title,
|
||||
"image": meta.get("image"),
|
||||
"url": f"/blog/{lang}/{year}/{post_url_part}/",
|
||||
},
|
||||
)
|
||||
)
|
||||
for _, title, path in sorted(posts, reverse=True):
|
||||
result_nav[-1][year][title] = path
|
||||
for _, post_meta_item in sorted(post_meta_items,
|
||||
reverse=True,
|
||||
key=lambda item: item[0]):
|
||||
post_meta[post_meta_item['title']] = post_meta_item
|
||||
for _, post_meta_item in sorted(
|
||||
post_meta_items, reverse=True, key=lambda item: item[0]
|
||||
):
|
||||
post_meta[post_meta_item["title"]] = post_meta_item
|
||||
return result_nav, post_meta
|
||||
|
||||
|
||||
def _custom_get_navigation(files, config):
|
||||
nav_config = config['nav'] or mkdocs.structure.nav.nest_paths(f.src_path for f in files.documentation_pages())
|
||||
nav_config = config["nav"] or mkdocs.structure.nav.nest_paths(
|
||||
f.src_path for f in files.documentation_pages()
|
||||
)
|
||||
items = mkdocs.structure.nav._data_to_navigation(nav_config, files, config)
|
||||
if not isinstance(items, list):
|
||||
items = [items]
|
||||
@ -138,19 +155,25 @@ def _custom_get_navigation(files, config):
|
||||
mkdocs.structure.nav._add_previous_and_next_links(pages)
|
||||
mkdocs.structure.nav._add_parent_links(items)
|
||||
|
||||
missing_from_config = [file for file in files.documentation_pages() if file.page is None]
|
||||
missing_from_config = [
|
||||
file for file in files.documentation_pages() if file.page is None
|
||||
]
|
||||
if missing_from_config:
|
||||
files._files = [file for file in files._files if file not in missing_from_config]
|
||||
files._files = [
|
||||
file for file in files._files if file not in missing_from_config
|
||||
]
|
||||
|
||||
links = mkdocs.structure.nav._get_by_type(items, mkdocs.structure.nav.Link)
|
||||
for link in links:
|
||||
scheme, netloc, path, params, query, fragment = mkdocs.structure.nav.urlparse(link.url)
|
||||
scheme, netloc, path, params, query, fragment = mkdocs.structure.nav.urlparse(
|
||||
link.url
|
||||
)
|
||||
if scheme or netloc:
|
||||
mkdocs.structure.nav.log.debug(
|
||||
"An external link to '{}' is included in "
|
||||
"the 'nav' configuration.".format(link.url)
|
||||
)
|
||||
elif link.url.startswith('/'):
|
||||
elif link.url.startswith("/"):
|
||||
mkdocs.structure.nav.log.debug(
|
||||
"An absolute path to '{}' is included in the 'nav' configuration, "
|
||||
"which presumably points to an external resource.".format(link.url)
|
||||
|
@ -7,8 +7,9 @@ def write_redirect_html(out_path, to_url):
|
||||
os.makedirs(out_dir)
|
||||
except OSError:
|
||||
pass
|
||||
with open(out_path, 'w') as f:
|
||||
f.write(f'''<!--[if IE 6]> Redirect: {to_url} <![endif]-->
|
||||
with open(out_path, "w") as f:
|
||||
f.write(
|
||||
f"""<!--[if IE 6]> Redirect: {to_url} <![endif]-->
|
||||
<!DOCTYPE HTML>
|
||||
<html lang="en-US">
|
||||
<head>
|
||||
@ -22,18 +23,20 @@ def write_redirect_html(out_path, to_url):
|
||||
<body>
|
||||
If you are not redirected automatically, follow this <a href="{to_url}">link</a>.
|
||||
</body>
|
||||
</html>''')
|
||||
</html>"""
|
||||
)
|
||||
|
||||
|
||||
def build_redirect_html(args, base_prefix, lang, output_dir, from_path, to_path):
|
||||
out_path = os.path.join(
|
||||
output_dir, lang,
|
||||
from_path.replace('/index.md', '/index.html').replace('.md', '/index.html')
|
||||
output_dir,
|
||||
lang,
|
||||
from_path.replace("/index.md", "/index.html").replace(".md", "/index.html"),
|
||||
)
|
||||
target_path = to_path.replace('/index.md', '/').replace('.md', '/')
|
||||
target_path = to_path.replace("/index.md", "/").replace(".md", "/")
|
||||
|
||||
if target_path[0:7] != 'http://' and target_path[0:8] != 'https://':
|
||||
to_url = f'/{base_prefix}/{lang}/{target_path}'
|
||||
if target_path[0:7] != "http://" and target_path[0:8] != "https://":
|
||||
to_url = f"/{base_prefix}/{lang}/{target_path}"
|
||||
else:
|
||||
to_url = target_path
|
||||
|
||||
@ -42,33 +45,48 @@ def build_redirect_html(args, base_prefix, lang, output_dir, from_path, to_path)
|
||||
|
||||
|
||||
def build_docs_redirects(args):
|
||||
with open(os.path.join(args.docs_dir, 'redirects.txt'), 'r') as f:
|
||||
with open(os.path.join(args.docs_dir, "redirects.txt"), "r") as f:
|
||||
for line in f:
|
||||
for lang in args.lang.split(','):
|
||||
from_path, to_path = line.split(' ', 1)
|
||||
build_redirect_html(args, 'docs', lang, args.docs_output_dir, from_path, to_path)
|
||||
for lang in args.lang.split(","):
|
||||
from_path, to_path = line.split(" ", 1)
|
||||
build_redirect_html(
|
||||
args, "docs", lang, args.docs_output_dir, from_path, to_path
|
||||
)
|
||||
|
||||
|
||||
def build_blog_redirects(args):
|
||||
for lang in args.blog_lang.split(','):
|
||||
redirects_path = os.path.join(args.blog_dir, lang, 'redirects.txt')
|
||||
for lang in args.blog_lang.split(","):
|
||||
redirects_path = os.path.join(args.blog_dir, lang, "redirects.txt")
|
||||
if os.path.exists(redirects_path):
|
||||
with open(redirects_path, 'r') as f:
|
||||
with open(redirects_path, "r") as f:
|
||||
for line in f:
|
||||
from_path, to_path = line.split(' ', 1)
|
||||
build_redirect_html(args, 'blog', lang, args.blog_output_dir, from_path, to_path)
|
||||
from_path, to_path = line.split(" ", 1)
|
||||
build_redirect_html(
|
||||
args, "blog", lang, args.blog_output_dir, from_path, to_path
|
||||
)
|
||||
|
||||
|
||||
def build_static_redirects(args):
|
||||
for static_redirect in [
|
||||
('benchmark.html', '/benchmark/dbms/'),
|
||||
('benchmark_hardware.html', '/benchmark/hardware/'),
|
||||
('tutorial.html', '/docs/en/getting_started/tutorial/',),
|
||||
('reference_en.html', '/docs/en/single/', ),
|
||||
('reference_ru.html', '/docs/ru/single/',),
|
||||
('docs/index.html', '/docs/en/',),
|
||||
("benchmark.html", "/benchmark/dbms/"),
|
||||
("benchmark_hardware.html", "/benchmark/hardware/"),
|
||||
(
|
||||
"tutorial.html",
|
||||
"/docs/en/getting_started/tutorial/",
|
||||
),
|
||||
(
|
||||
"reference_en.html",
|
||||
"/docs/en/single/",
|
||||
),
|
||||
(
|
||||
"reference_ru.html",
|
||||
"/docs/ru/single/",
|
||||
),
|
||||
(
|
||||
"docs/index.html",
|
||||
"/docs/en/",
|
||||
),
|
||||
]:
|
||||
write_redirect_html(
|
||||
os.path.join(args.output_dir, static_redirect[0]),
|
||||
static_redirect[1]
|
||||
os.path.join(args.output_dir, static_redirect[0]), static_redirect[1]
|
||||
)
|
||||
|
@ -12,7 +12,8 @@ import test
|
||||
import util
|
||||
import website
|
||||
|
||||
TEMPORARY_FILE_NAME = 'single.md'
|
||||
TEMPORARY_FILE_NAME = "single.md"
|
||||
|
||||
|
||||
def recursive_values(item):
|
||||
if isinstance(item, dict):
|
||||
@ -25,11 +26,14 @@ def recursive_values(item):
|
||||
yield item
|
||||
|
||||
|
||||
anchor_not_allowed_chars = re.compile(r'[^\w\-]')
|
||||
def generate_anchor_from_path(path):
|
||||
return re.sub(anchor_not_allowed_chars, '-', path)
|
||||
anchor_not_allowed_chars = re.compile(r"[^\w\-]")
|
||||
|
||||
absolute_link = re.compile(r'^https?://')
|
||||
|
||||
def generate_anchor_from_path(path):
|
||||
return re.sub(anchor_not_allowed_chars, "-", path)
|
||||
|
||||
|
||||
absolute_link = re.compile(r"^https?://")
|
||||
|
||||
|
||||
def replace_link(match, path):
|
||||
@ -40,46 +44,55 @@ def replace_link(match, path):
|
||||
if re.search(absolute_link, link):
|
||||
return match.group(0)
|
||||
|
||||
if link.endswith('/'):
|
||||
link = link[0:-1] + '.md'
|
||||
if link.endswith("/"):
|
||||
link = link[0:-1] + ".md"
|
||||
|
||||
return '{}(#{})'.format(title, generate_anchor_from_path(os.path.normpath(os.path.join(os.path.dirname(path), link))))
|
||||
return "{}(#{})".format(
|
||||
title,
|
||||
generate_anchor_from_path(
|
||||
os.path.normpath(os.path.join(os.path.dirname(path), link))
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
# Concatenates Markdown files to a single file.
|
||||
def concatenate(lang, docs_path, single_page_file, nav):
|
||||
lang_path = os.path.join(docs_path, lang)
|
||||
|
||||
proj_config = f'{docs_path}/toc_{lang}.yml'
|
||||
proj_config = f"{docs_path}/toc_{lang}.yml"
|
||||
if os.path.exists(proj_config):
|
||||
with open(proj_config) as cfg_file:
|
||||
nav = yaml.full_load(cfg_file.read())['nav']
|
||||
nav = yaml.full_load(cfg_file.read())["nav"]
|
||||
|
||||
files_to_concatenate = list(recursive_values(nav))
|
||||
files_count = len(files_to_concatenate)
|
||||
logging.info(f'{files_count} files will be concatenated into single md-file for {lang}.')
|
||||
logging.debug('Concatenating: ' + ', '.join(files_to_concatenate))
|
||||
assert files_count > 0, f'Empty single-page for {lang}'
|
||||
logging.info(
|
||||
f"{files_count} files will be concatenated into single md-file for {lang}."
|
||||
)
|
||||
logging.debug("Concatenating: " + ", ".join(files_to_concatenate))
|
||||
assert files_count > 0, f"Empty single-page for {lang}"
|
||||
|
||||
link_regexp = re.compile(r'(\[[^\]]+\])\(([^)#]+)(?:#[^\)]+)?\)')
|
||||
link_regexp = re.compile(r"(\[[^\]]+\])\(([^)#]+)(?:#[^\)]+)?\)")
|
||||
|
||||
for path in files_to_concatenate:
|
||||
try:
|
||||
with open(os.path.join(lang_path, path)) as f:
|
||||
# Insert a horizontal ruler. Then insert an anchor that we will link to. Its name will be a path to the .md file.
|
||||
single_page_file.write('\n______\n<a name="%s"></a>\n' % generate_anchor_from_path(path))
|
||||
single_page_file.write(
|
||||
'\n______\n<a name="%s"></a>\n' % generate_anchor_from_path(path)
|
||||
)
|
||||
|
||||
in_metadata = False
|
||||
for line in f:
|
||||
# Skip YAML metadata.
|
||||
if line == '---\n':
|
||||
if line == "---\n":
|
||||
in_metadata = not in_metadata
|
||||
continue
|
||||
|
||||
if not in_metadata:
|
||||
# Increase the level of headers.
|
||||
if line.startswith('#'):
|
||||
line = '#' + line
|
||||
if line.startswith("#"):
|
||||
line = "#" + line
|
||||
|
||||
# Replace links within the docs.
|
||||
|
||||
@ -87,14 +100,19 @@ def concatenate(lang, docs_path, single_page_file, nav):
|
||||
line = re.sub(
|
||||
link_regexp,
|
||||
lambda match: replace_link(match, path),
|
||||
line)
|
||||
line,
|
||||
)
|
||||
|
||||
# If failed to replace the relative link, print to log
|
||||
# But with some exceptions:
|
||||
# - "../src/" -- for cmake-in-clickhouse.md (link to sources)
|
||||
# - "../usr/share" -- changelog entry that has "../usr/share/zoneinfo"
|
||||
if '../' in line and (not '../usr/share' in line) and (not '../src/' in line):
|
||||
logging.info('Failed to resolve relative link:')
|
||||
if (
|
||||
"../" in line
|
||||
and (not "../usr/share" in line)
|
||||
and (not "../src/" in line)
|
||||
):
|
||||
logging.info("Failed to resolve relative link:")
|
||||
logging.info(path)
|
||||
logging.info(line)
|
||||
|
||||
@ -105,9 +123,11 @@ def concatenate(lang, docs_path, single_page_file, nav):
|
||||
|
||||
single_page_file.flush()
|
||||
|
||||
|
||||
def get_temporary_file_name(lang, args):
|
||||
return os.path.join(args.docs_dir, lang, TEMPORARY_FILE_NAME)
|
||||
|
||||
|
||||
def remove_temporary_files(lang, args):
|
||||
single_md_path = get_temporary_file_name(lang, args)
|
||||
if os.path.exists(single_md_path):
|
||||
@ -115,14 +135,14 @@ def remove_temporary_files(lang, args):
|
||||
|
||||
|
||||
def build_single_page_version(lang, args, nav, cfg):
|
||||
logging.info(f'Building single page version for {lang}')
|
||||
os.environ['SINGLE_PAGE'] = '1'
|
||||
extra = cfg.data['extra']
|
||||
extra['single_page'] = True
|
||||
extra['is_amp'] = False
|
||||
logging.info(f"Building single page version for {lang}")
|
||||
os.environ["SINGLE_PAGE"] = "1"
|
||||
extra = cfg.data["extra"]
|
||||
extra["single_page"] = True
|
||||
extra["is_amp"] = False
|
||||
|
||||
single_md_path = get_temporary_file_name(lang, args)
|
||||
with open(single_md_path, 'w') as single_md:
|
||||
with open(single_md_path, "w") as single_md:
|
||||
concatenate(lang, args.docs_dir, single_md, nav)
|
||||
|
||||
with util.temp_dir() as site_temp:
|
||||
@ -132,72 +152,83 @@ def build_single_page_version(lang, args, nav, cfg):
|
||||
shutil.copytree(docs_src_lang, docs_temp_lang)
|
||||
for root, _, filenames in os.walk(docs_temp_lang):
|
||||
for filename in filenames:
|
||||
if filename != 'single.md' and filename.endswith('.md'):
|
||||
if filename != "single.md" and filename.endswith(".md"):
|
||||
os.unlink(os.path.join(root, filename))
|
||||
|
||||
cfg.load_dict({
|
||||
'docs_dir': docs_temp_lang,
|
||||
'site_dir': site_temp,
|
||||
'extra': extra,
|
||||
'nav': [
|
||||
{cfg.data.get('site_name'): 'single.md'}
|
||||
]
|
||||
})
|
||||
cfg.load_dict(
|
||||
{
|
||||
"docs_dir": docs_temp_lang,
|
||||
"site_dir": site_temp,
|
||||
"extra": extra,
|
||||
"nav": [{cfg.data.get("site_name"): "single.md"}],
|
||||
}
|
||||
)
|
||||
|
||||
if not args.test_only:
|
||||
mkdocs.commands.build.build(cfg)
|
||||
|
||||
single_page_output_path = os.path.join(args.docs_dir, args.docs_output_dir, lang, 'single')
|
||||
single_page_output_path = os.path.join(
|
||||
args.docs_dir, args.docs_output_dir, lang, "single"
|
||||
)
|
||||
|
||||
if os.path.exists(single_page_output_path):
|
||||
shutil.rmtree(single_page_output_path)
|
||||
|
||||
shutil.copytree(
|
||||
os.path.join(site_temp, 'single'),
|
||||
single_page_output_path
|
||||
os.path.join(site_temp, "single"), single_page_output_path
|
||||
)
|
||||
|
||||
single_page_index_html = os.path.join(single_page_output_path, 'index.html')
|
||||
single_page_content_js = os.path.join(single_page_output_path, 'content.js')
|
||||
single_page_index_html = os.path.join(
|
||||
single_page_output_path, "index.html"
|
||||
)
|
||||
single_page_content_js = os.path.join(
|
||||
single_page_output_path, "content.js"
|
||||
)
|
||||
|
||||
with open(single_page_index_html, 'r') as f:
|
||||
sp_prefix, sp_js, sp_suffix = f.read().split('<!-- BREAK -->')
|
||||
with open(single_page_index_html, "r") as f:
|
||||
sp_prefix, sp_js, sp_suffix = f.read().split("<!-- BREAK -->")
|
||||
|
||||
with open(single_page_index_html, 'w') as f:
|
||||
with open(single_page_index_html, "w") as f:
|
||||
f.write(sp_prefix)
|
||||
f.write(sp_suffix)
|
||||
|
||||
with open(single_page_content_js, 'w') as f:
|
||||
with open(single_page_content_js, "w") as f:
|
||||
if args.minify:
|
||||
import jsmin
|
||||
|
||||
sp_js = jsmin.jsmin(sp_js)
|
||||
f.write(sp_js)
|
||||
|
||||
logging.info(f'Re-building single page for {lang} pdf/test')
|
||||
logging.info(f"Re-building single page for {lang} pdf/test")
|
||||
with util.temp_dir() as test_dir:
|
||||
extra['single_page'] = False
|
||||
cfg.load_dict({
|
||||
'docs_dir': docs_temp_lang,
|
||||
'site_dir': test_dir,
|
||||
'extra': extra,
|
||||
'nav': [
|
||||
{cfg.data.get('site_name'): 'single.md'}
|
||||
]
|
||||
})
|
||||
extra["single_page"] = False
|
||||
cfg.load_dict(
|
||||
{
|
||||
"docs_dir": docs_temp_lang,
|
||||
"site_dir": test_dir,
|
||||
"extra": extra,
|
||||
"nav": [{cfg.data.get("site_name"): "single.md"}],
|
||||
}
|
||||
)
|
||||
mkdocs.commands.build.build(cfg)
|
||||
|
||||
css_in = ' '.join(website.get_css_in(args))
|
||||
js_in = ' '.join(website.get_js_in(args))
|
||||
subprocess.check_call(f'cat {css_in} > {test_dir}/css/base.css', shell=True)
|
||||
subprocess.check_call(f'cat {js_in} > {test_dir}/js/base.js', shell=True)
|
||||
css_in = " ".join(website.get_css_in(args))
|
||||
js_in = " ".join(website.get_js_in(args))
|
||||
subprocess.check_call(
|
||||
f"cat {css_in} > {test_dir}/css/base.css", shell=True
|
||||
)
|
||||
subprocess.check_call(
|
||||
f"cat {js_in} > {test_dir}/js/base.js", shell=True
|
||||
)
|
||||
|
||||
if args.save_raw_single_page:
|
||||
shutil.copytree(test_dir, args.save_raw_single_page)
|
||||
|
||||
logging.info(f'Running tests for {lang}')
|
||||
logging.info(f"Running tests for {lang}")
|
||||
test.test_single_page(
|
||||
os.path.join(test_dir, 'single', 'index.html'), lang)
|
||||
os.path.join(test_dir, "single", "index.html"), lang
|
||||
)
|
||||
|
||||
logging.info(f'Finished building single page version for {lang}')
|
||||
logging.info(f"Finished building single page version for {lang}")
|
||||
|
||||
remove_temporary_files(lang, args)
|
||||
|
@ -8,14 +8,11 @@ import subprocess
|
||||
|
||||
|
||||
def test_single_page(input_path, lang):
|
||||
if not (lang == 'en'):
|
||||
if not (lang == "en"):
|
||||
return
|
||||
|
||||
with open(input_path) as f:
|
||||
soup = bs4.BeautifulSoup(
|
||||
f,
|
||||
features='html.parser'
|
||||
)
|
||||
soup = bs4.BeautifulSoup(f, features="html.parser")
|
||||
|
||||
anchor_points = set()
|
||||
|
||||
@ -23,30 +20,27 @@ def test_single_page(input_path, lang):
|
||||
links_to_nowhere = 0
|
||||
|
||||
for tag in soup.find_all():
|
||||
for anchor_point in [tag.attrs.get('name'), tag.attrs.get('id')]:
|
||||
for anchor_point in [tag.attrs.get("name"), tag.attrs.get("id")]:
|
||||
if anchor_point:
|
||||
anchor_points.add(anchor_point)
|
||||
|
||||
for tag in soup.find_all():
|
||||
href = tag.attrs.get('href')
|
||||
if href and href.startswith('#') and href != '#':
|
||||
href = tag.attrs.get("href")
|
||||
if href and href.startswith("#") and href != "#":
|
||||
if href[1:] not in anchor_points:
|
||||
links_to_nowhere += 1
|
||||
logging.info("Tag %s", tag)
|
||||
logging.info('Link to nowhere: %s' % href)
|
||||
logging.info("Link to nowhere: %s" % href)
|
||||
|
||||
if links_to_nowhere:
|
||||
logging.error(f'Found {links_to_nowhere} links to nowhere in {lang}')
|
||||
logging.error(f"Found {links_to_nowhere} links to nowhere in {lang}")
|
||||
sys.exit(1)
|
||||
|
||||
if len(anchor_points) <= 10:
|
||||
logging.error('Html parsing is probably broken')
|
||||
logging.error("Html parsing is probably broken")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
stream=sys.stderr
|
||||
)
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.DEBUG, stream=sys.stderr)
|
||||
test_single_page(sys.argv[1], sys.argv[2])
|
||||
|
@ -15,7 +15,7 @@ import yaml
|
||||
|
||||
@contextlib.contextmanager
|
||||
def temp_dir():
|
||||
path = tempfile.mkdtemp(dir=os.environ.get('TEMP'))
|
||||
path = tempfile.mkdtemp(dir=os.environ.get("TEMP"))
|
||||
try:
|
||||
yield path
|
||||
finally:
|
||||
@ -34,7 +34,7 @@ def cd(new_cwd):
|
||||
|
||||
def get_free_port():
|
||||
with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
|
||||
s.bind(('', 0))
|
||||
s.bind(("", 0))
|
||||
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||
return s.getsockname()[1]
|
||||
|
||||
@ -61,12 +61,12 @@ def read_md_file(path):
|
||||
meta_text = []
|
||||
content = []
|
||||
if os.path.exists(path):
|
||||
with open(path, 'r') as f:
|
||||
with open(path, "r") as f:
|
||||
for line in f:
|
||||
if line.startswith('---'):
|
||||
if line.startswith("---"):
|
||||
if in_meta:
|
||||
in_meta = False
|
||||
meta = yaml.full_load(''.join(meta_text))
|
||||
meta = yaml.full_load("".join(meta_text))
|
||||
else:
|
||||
in_meta = True
|
||||
else:
|
||||
@ -74,7 +74,7 @@ def read_md_file(path):
|
||||
meta_text.append(line)
|
||||
else:
|
||||
content.append(line)
|
||||
return meta, ''.join(content)
|
||||
return meta, "".join(content)
|
||||
|
||||
|
||||
def write_md_file(path, meta, content):
|
||||
@ -82,13 +82,13 @@ def write_md_file(path, meta, content):
|
||||
if not os.path.exists(dirname):
|
||||
os.makedirs(dirname)
|
||||
|
||||
with open(path, 'w') as f:
|
||||
with open(path, "w") as f:
|
||||
if meta:
|
||||
print('---', file=f)
|
||||
print("---", file=f)
|
||||
yaml.dump(meta, f)
|
||||
print('---', file=f)
|
||||
if not content.startswith('\n'):
|
||||
print('', file=f)
|
||||
print("---", file=f)
|
||||
if not content.startswith("\n"):
|
||||
print("", file=f)
|
||||
f.write(content)
|
||||
|
||||
|
||||
@ -100,7 +100,7 @@ def represent_ordereddict(dumper, data):
|
||||
|
||||
value.append((node_key, node_value))
|
||||
|
||||
return yaml.nodes.MappingNode(u'tag:yaml.org,2002:map', value)
|
||||
return yaml.nodes.MappingNode("tag:yaml.org,2002:map", value)
|
||||
|
||||
|
||||
yaml.add_representer(collections.OrderedDict, represent_ordereddict)
|
||||
@ -109,30 +109,31 @@ yaml.add_representer(collections.OrderedDict, represent_ordereddict)
|
||||
def init_jinja2_filters(env):
|
||||
import amp
|
||||
import website
|
||||
|
||||
chunk_size = 10240
|
||||
env.filters['chunks'] = lambda line: [line[i:i + chunk_size] for i in range(0, len(line), chunk_size)]
|
||||
env.filters['html_to_amp'] = amp.html_to_amp
|
||||
env.filters['adjust_markdown_html'] = website.adjust_markdown_html
|
||||
env.filters['to_rfc882'] = lambda d: datetime.datetime.strptime(d, '%Y-%m-%d').strftime('%a, %d %b %Y %H:%M:%S GMT')
|
||||
env.filters["chunks"] = lambda line: [
|
||||
line[i : i + chunk_size] for i in range(0, len(line), chunk_size)
|
||||
]
|
||||
env.filters["html_to_amp"] = amp.html_to_amp
|
||||
env.filters["adjust_markdown_html"] = website.adjust_markdown_html
|
||||
env.filters["to_rfc882"] = lambda d: datetime.datetime.strptime(
|
||||
d, "%Y-%m-%d"
|
||||
).strftime("%a, %d %b %Y %H:%M:%S GMT")
|
||||
|
||||
|
||||
def init_jinja2_env(args):
|
||||
import mdx_clickhouse
|
||||
|
||||
env = jinja2.Environment(
|
||||
loader=jinja2.FileSystemLoader([
|
||||
args.website_dir,
|
||||
os.path.join(args.docs_dir, '_includes')
|
||||
]),
|
||||
extensions=[
|
||||
'jinja2.ext.i18n',
|
||||
'jinja2_highlight.HighlightExtension'
|
||||
]
|
||||
loader=jinja2.FileSystemLoader(
|
||||
[args.website_dir, os.path.join(args.docs_dir, "_includes")]
|
||||
),
|
||||
extensions=["jinja2.ext.i18n", "jinja2_highlight.HighlightExtension"],
|
||||
)
|
||||
env.extend(jinja2_highlight_cssclass='syntax p-3 my-3')
|
||||
translations_dir = os.path.join(args.website_dir, 'locale')
|
||||
env.extend(jinja2_highlight_cssclass="syntax p-3 my-3")
|
||||
translations_dir = os.path.join(args.website_dir, "locale")
|
||||
env.install_gettext_translations(
|
||||
mdx_clickhouse.get_translations(translations_dir, 'en'),
|
||||
newstyle=True
|
||||
mdx_clickhouse.get_translations(translations_dir, "en"), newstyle=True
|
||||
)
|
||||
init_jinja2_filters(env)
|
||||
return env
|
||||
|
@ -17,108 +17,112 @@ import util
|
||||
|
||||
|
||||
def handle_iframe(iframe, soup):
|
||||
allowed_domains = ['https://www.youtube.com/', 'https://datalens.yandex/']
|
||||
allowed_domains = ["https://www.youtube.com/", "https://datalens.yandex/"]
|
||||
illegal_domain = True
|
||||
iframe_src = iframe.attrs['src']
|
||||
iframe_src = iframe.attrs["src"]
|
||||
for domain in allowed_domains:
|
||||
if iframe_src.startswith(domain):
|
||||
illegal_domain = False
|
||||
break
|
||||
if illegal_domain:
|
||||
raise RuntimeError(f'iframe from illegal domain: {iframe_src}')
|
||||
wrapper = soup.new_tag('div')
|
||||
wrapper.attrs['class'] = ['embed-responsive', 'embed-responsive-16by9']
|
||||
raise RuntimeError(f"iframe from illegal domain: {iframe_src}")
|
||||
wrapper = soup.new_tag("div")
|
||||
wrapper.attrs["class"] = ["embed-responsive", "embed-responsive-16by9"]
|
||||
iframe.insert_before(wrapper)
|
||||
iframe.extract()
|
||||
wrapper.insert(0, iframe)
|
||||
if 'width' in iframe.attrs:
|
||||
del iframe.attrs['width']
|
||||
if 'height' in iframe.attrs:
|
||||
del iframe.attrs['height']
|
||||
iframe.attrs['allow'] = 'accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture'
|
||||
iframe.attrs['class'] = 'embed-responsive-item'
|
||||
iframe.attrs['frameborder'] = '0'
|
||||
iframe.attrs['allowfullscreen'] = '1'
|
||||
if "width" in iframe.attrs:
|
||||
del iframe.attrs["width"]
|
||||
if "height" in iframe.attrs:
|
||||
del iframe.attrs["height"]
|
||||
iframe.attrs[
|
||||
"allow"
|
||||
] = "accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture"
|
||||
iframe.attrs["class"] = "embed-responsive-item"
|
||||
iframe.attrs["frameborder"] = "0"
|
||||
iframe.attrs["allowfullscreen"] = "1"
|
||||
|
||||
|
||||
def adjust_markdown_html(content):
|
||||
soup = bs4.BeautifulSoup(
|
||||
content,
|
||||
features='html.parser'
|
||||
)
|
||||
soup = bs4.BeautifulSoup(content, features="html.parser")
|
||||
|
||||
for a in soup.find_all('a'):
|
||||
a_class = a.attrs.get('class')
|
||||
a_href = a.attrs.get('href')
|
||||
if a_class and 'headerlink' in a_class:
|
||||
a.string = '\xa0'
|
||||
if a_href and a_href.startswith('http'):
|
||||
a.attrs['target'] = '_blank'
|
||||
for a in soup.find_all("a"):
|
||||
a_class = a.attrs.get("class")
|
||||
a_href = a.attrs.get("href")
|
||||
if a_class and "headerlink" in a_class:
|
||||
a.string = "\xa0"
|
||||
if a_href and a_href.startswith("http"):
|
||||
a.attrs["target"] = "_blank"
|
||||
|
||||
for code in soup.find_all('code'):
|
||||
code_class = code.attrs.get('class')
|
||||
for code in soup.find_all("code"):
|
||||
code_class = code.attrs.get("class")
|
||||
if code_class:
|
||||
code.attrs['class'] = code_class + ['syntax']
|
||||
code.attrs["class"] = code_class + ["syntax"]
|
||||
else:
|
||||
code.attrs['class'] = 'syntax'
|
||||
code.attrs["class"] = "syntax"
|
||||
|
||||
for iframe in soup.find_all('iframe'):
|
||||
for iframe in soup.find_all("iframe"):
|
||||
handle_iframe(iframe, soup)
|
||||
|
||||
for img in soup.find_all('img'):
|
||||
if img.attrs.get('alt') == 'iframe':
|
||||
img.name = 'iframe'
|
||||
img.string = ''
|
||||
for img in soup.find_all("img"):
|
||||
if img.attrs.get("alt") == "iframe":
|
||||
img.name = "iframe"
|
||||
img.string = ""
|
||||
handle_iframe(img, soup)
|
||||
continue
|
||||
img_class = img.attrs.get('class')
|
||||
img_class = img.attrs.get("class")
|
||||
if img_class:
|
||||
img.attrs['class'] = img_class + ['img-fluid']
|
||||
img.attrs["class"] = img_class + ["img-fluid"]
|
||||
else:
|
||||
img.attrs['class'] = 'img-fluid'
|
||||
img.attrs["class"] = "img-fluid"
|
||||
|
||||
for details in soup.find_all('details'):
|
||||
for summary in details.find_all('summary'):
|
||||
for details in soup.find_all("details"):
|
||||
for summary in details.find_all("summary"):
|
||||
if summary.parent != details:
|
||||
summary.extract()
|
||||
details.insert(0, summary)
|
||||
|
||||
for dd in soup.find_all('dd'):
|
||||
dd_class = dd.attrs.get('class')
|
||||
for dd in soup.find_all("dd"):
|
||||
dd_class = dd.attrs.get("class")
|
||||
if dd_class:
|
||||
dd.attrs['class'] = dd_class + ['pl-3']
|
||||
dd.attrs["class"] = dd_class + ["pl-3"]
|
||||
else:
|
||||
dd.attrs['class'] = 'pl-3'
|
||||
dd.attrs["class"] = "pl-3"
|
||||
|
||||
for div in soup.find_all('div'):
|
||||
div_class = div.attrs.get('class')
|
||||
is_admonition = div_class and 'admonition' in div.attrs.get('class')
|
||||
for div in soup.find_all("div"):
|
||||
div_class = div.attrs.get("class")
|
||||
is_admonition = div_class and "admonition" in div.attrs.get("class")
|
||||
if is_admonition:
|
||||
for a in div.find_all('a'):
|
||||
a_class = a.attrs.get('class')
|
||||
for a in div.find_all("a"):
|
||||
a_class = a.attrs.get("class")
|
||||
if a_class:
|
||||
a.attrs['class'] = a_class + ['alert-link']
|
||||
a.attrs["class"] = a_class + ["alert-link"]
|
||||
else:
|
||||
a.attrs['class'] = 'alert-link'
|
||||
a.attrs["class"] = "alert-link"
|
||||
|
||||
for p in div.find_all('p'):
|
||||
p_class = p.attrs.get('class')
|
||||
if is_admonition and p_class and ('admonition-title' in p_class):
|
||||
p.attrs['class'] = p_class + ['alert-heading', 'display-4', 'text-reset', 'mb-2']
|
||||
for p in div.find_all("p"):
|
||||
p_class = p.attrs.get("class")
|
||||
if is_admonition and p_class and ("admonition-title" in p_class):
|
||||
p.attrs["class"] = p_class + [
|
||||
"alert-heading",
|
||||
"display-4",
|
||||
"text-reset",
|
||||
"mb-2",
|
||||
]
|
||||
|
||||
if is_admonition:
|
||||
div.attrs['role'] = 'alert'
|
||||
if ('info' in div_class) or ('note' in div_class):
|
||||
mode = 'alert-primary'
|
||||
elif ('attention' in div_class) or ('warning' in div_class):
|
||||
mode = 'alert-warning'
|
||||
elif 'important' in div_class:
|
||||
mode = 'alert-danger'
|
||||
elif 'tip' in div_class:
|
||||
mode = 'alert-info'
|
||||
div.attrs["role"] = "alert"
|
||||
if ("info" in div_class) or ("note" in div_class):
|
||||
mode = "alert-primary"
|
||||
elif ("attention" in div_class) or ("warning" in div_class):
|
||||
mode = "alert-warning"
|
||||
elif "important" in div_class:
|
||||
mode = "alert-danger"
|
||||
elif "tip" in div_class:
|
||||
mode = "alert-info"
|
||||
else:
|
||||
mode = 'alert-secondary'
|
||||
div.attrs['class'] = div_class + ['alert', 'pb-0', 'mb-4', mode]
|
||||
mode = "alert-secondary"
|
||||
div.attrs["class"] = div_class + ["alert", "pb-0", "mb-4", mode]
|
||||
|
||||
return str(soup)
|
||||
|
||||
@ -128,61 +132,63 @@ def minify_html(content):
|
||||
|
||||
|
||||
def build_website(args):
|
||||
logging.info('Building website')
|
||||
logging.info("Building website")
|
||||
env = util.init_jinja2_env(args)
|
||||
|
||||
shutil.copytree(
|
||||
args.website_dir,
|
||||
args.output_dir,
|
||||
ignore=shutil.ignore_patterns(
|
||||
'*.md',
|
||||
'*.sh',
|
||||
'*.css',
|
||||
'*.json',
|
||||
'js/*.js',
|
||||
'build',
|
||||
'docs',
|
||||
'public',
|
||||
'node_modules',
|
||||
'src',
|
||||
'templates',
|
||||
'locale',
|
||||
'.gitkeep'
|
||||
)
|
||||
"*.md",
|
||||
"*.sh",
|
||||
"*.css",
|
||||
"*.json",
|
||||
"js/*.js",
|
||||
"build",
|
||||
"docs",
|
||||
"public",
|
||||
"node_modules",
|
||||
"src",
|
||||
"templates",
|
||||
"locale",
|
||||
".gitkeep",
|
||||
),
|
||||
)
|
||||
|
||||
shutil.copytree(
|
||||
os.path.join(args.website_dir, 'images'),
|
||||
os.path.join(args.output_dir, 'docs', 'images')
|
||||
os.path.join(args.website_dir, "images"),
|
||||
os.path.join(args.output_dir, "docs", "images"),
|
||||
)
|
||||
|
||||
# This file can be requested to check for available ClickHouse releases.
|
||||
shutil.copy2(
|
||||
os.path.join(args.src_dir, 'utils', 'list-versions', 'version_date.tsv'),
|
||||
os.path.join(args.output_dir, 'data', 'version_date.tsv'))
|
||||
os.path.join(args.src_dir, "utils", "list-versions", "version_date.tsv"),
|
||||
os.path.join(args.output_dir, "data", "version_date.tsv"),
|
||||
)
|
||||
|
||||
# This file can be requested to install ClickHouse.
|
||||
shutil.copy2(
|
||||
os.path.join(args.src_dir, 'docs', '_includes', 'install', 'universal.sh'),
|
||||
os.path.join(args.output_dir, 'data', 'install.sh'))
|
||||
os.path.join(args.src_dir, "docs", "_includes", "install", "universal.sh"),
|
||||
os.path.join(args.output_dir, "data", "install.sh"),
|
||||
)
|
||||
|
||||
for root, _, filenames in os.walk(args.output_dir):
|
||||
for filename in filenames:
|
||||
if filename == 'main.html':
|
||||
if filename == "main.html":
|
||||
continue
|
||||
|
||||
path = os.path.join(root, filename)
|
||||
if not filename.endswith('.html'):
|
||||
if not filename.endswith(".html"):
|
||||
continue
|
||||
logging.info('Processing %s', path)
|
||||
with open(path, 'rb') as f:
|
||||
content = f.read().decode('utf-8')
|
||||
logging.info("Processing %s", path)
|
||||
with open(path, "rb") as f:
|
||||
content = f.read().decode("utf-8")
|
||||
|
||||
template = env.from_string(content)
|
||||
content = template.render(args.__dict__)
|
||||
|
||||
with open(path, 'wb') as f:
|
||||
f.write(content.encode('utf-8'))
|
||||
with open(path, "wb") as f:
|
||||
f.write(content.encode("utf-8"))
|
||||
|
||||
|
||||
def get_css_in(args):
|
||||
@ -193,7 +199,7 @@ def get_css_in(args):
|
||||
f"'{args.website_dir}/css/blog.css'",
|
||||
f"'{args.website_dir}/css/docs.css'",
|
||||
f"'{args.website_dir}/css/highlight.css'",
|
||||
f"'{args.website_dir}/css/main.css'"
|
||||
f"'{args.website_dir}/css/main.css'",
|
||||
]
|
||||
|
||||
|
||||
@ -207,42 +213,41 @@ def get_js_in(args):
|
||||
f"'{args.website_dir}/js/index.js'",
|
||||
f"'{args.website_dir}/js/docsearch.js'",
|
||||
f"'{args.website_dir}/js/docs.js'",
|
||||
f"'{args.website_dir}/js/main.js'"
|
||||
f"'{args.website_dir}/js/main.js'",
|
||||
]
|
||||
|
||||
|
||||
def minify_file(path, css_digest, js_digest):
|
||||
if not (
|
||||
path.endswith('.html') or
|
||||
path.endswith('.css')
|
||||
):
|
||||
if not (path.endswith(".html") or path.endswith(".css")):
|
||||
return
|
||||
|
||||
logging.info('Minifying %s', path)
|
||||
with open(path, 'rb') as f:
|
||||
content = f.read().decode('utf-8')
|
||||
if path.endswith('.html'):
|
||||
logging.info("Minifying %s", path)
|
||||
with open(path, "rb") as f:
|
||||
content = f.read().decode("utf-8")
|
||||
if path.endswith(".html"):
|
||||
content = minify_html(content)
|
||||
content = content.replace('base.css?css_digest', f'base.css?{css_digest}')
|
||||
content = content.replace('base.js?js_digest', f'base.js?{js_digest}')
|
||||
# TODO: restore cssmin
|
||||
# elif path.endswith('.css'):
|
||||
# content = cssmin.cssmin(content)
|
||||
# TODO: restore jsmin
|
||||
# elif path.endswith('.js'):
|
||||
# content = jsmin.jsmin(content)
|
||||
with open(path, 'wb') as f:
|
||||
f.write(content.encode('utf-8'))
|
||||
content = content.replace("base.css?css_digest", f"base.css?{css_digest}")
|
||||
content = content.replace("base.js?js_digest", f"base.js?{js_digest}")
|
||||
# TODO: restore cssmin
|
||||
# elif path.endswith('.css'):
|
||||
# content = cssmin.cssmin(content)
|
||||
# TODO: restore jsmin
|
||||
# elif path.endswith('.js'):
|
||||
# content = jsmin.jsmin(content)
|
||||
with open(path, "wb") as f:
|
||||
f.write(content.encode("utf-8"))
|
||||
|
||||
|
||||
def minify_website(args):
|
||||
css_in = ' '.join(get_css_in(args))
|
||||
css_out = f'{args.output_dir}/docs/css/base.css'
|
||||
os.makedirs(f'{args.output_dir}/docs/css')
|
||||
css_in = " ".join(get_css_in(args))
|
||||
css_out = f"{args.output_dir}/docs/css/base.css"
|
||||
os.makedirs(f"{args.output_dir}/docs/css")
|
||||
|
||||
if args.minify and False: # TODO: return closure
|
||||
command = f"purifycss -w '*algolia*' --min {css_in} '{args.output_dir}/*.html' " \
|
||||
command = (
|
||||
f"purifycss -w '*algolia*' --min {css_in} '{args.output_dir}/*.html' "
|
||||
f"'{args.output_dir}/docs/en/**/*.html' '{args.website_dir}/js/**/*.js' > {css_out}"
|
||||
)
|
||||
logging.info(css_in)
|
||||
logging.info(command)
|
||||
output = subprocess.check_output(command, shell=True)
|
||||
@ -251,51 +256,60 @@ def minify_website(args):
|
||||
else:
|
||||
command = f"cat {css_in}"
|
||||
output = subprocess.check_output(command, shell=True)
|
||||
with open(css_out, 'wb+') as f:
|
||||
with open(css_out, "wb+") as f:
|
||||
f.write(output)
|
||||
|
||||
with open(css_out, 'rb') as f:
|
||||
with open(css_out, "rb") as f:
|
||||
css_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
|
||||
|
||||
js_in = ' '.join(get_js_in(args))
|
||||
js_out = f'{args.output_dir}/docs/js/base.js'
|
||||
os.makedirs(f'{args.output_dir}/docs/js')
|
||||
js_in = " ".join(get_js_in(args))
|
||||
js_out = f"{args.output_dir}/docs/js/base.js"
|
||||
os.makedirs(f"{args.output_dir}/docs/js")
|
||||
|
||||
if args.minify and False: # TODO: return closure
|
||||
js_in = [js[1:-1] for js in js_in]
|
||||
closure_args = [
|
||||
'--js', *js_in, '--js_output_file', js_out,
|
||||
'--compilation_level', 'SIMPLE',
|
||||
'--dependency_mode', 'NONE',
|
||||
'--third_party', '--use_types_for_optimization',
|
||||
'--isolation_mode', 'IIFE'
|
||||
"--js",
|
||||
*js_in,
|
||||
"--js_output_file",
|
||||
js_out,
|
||||
"--compilation_level",
|
||||
"SIMPLE",
|
||||
"--dependency_mode",
|
||||
"NONE",
|
||||
"--third_party",
|
||||
"--use_types_for_optimization",
|
||||
"--isolation_mode",
|
||||
"IIFE",
|
||||
]
|
||||
logging.info(closure_args)
|
||||
if closure.run(*closure_args):
|
||||
raise RuntimeError('failed to run closure compiler')
|
||||
with open(js_out, 'r') as f:
|
||||
raise RuntimeError("failed to run closure compiler")
|
||||
with open(js_out, "r") as f:
|
||||
js_content = jsmin.jsmin(f.read())
|
||||
with open(js_out, 'w') as f:
|
||||
with open(js_out, "w") as f:
|
||||
f.write(js_content)
|
||||
|
||||
else:
|
||||
command = f"cat {js_in}"
|
||||
output = subprocess.check_output(command, shell=True)
|
||||
with open(js_out, 'wb+') as f:
|
||||
with open(js_out, "wb+") as f:
|
||||
f.write(output)
|
||||
|
||||
with open(js_out, 'rb') as f:
|
||||
with open(js_out, "rb") as f:
|
||||
js_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
|
||||
logging.info(js_digest)
|
||||
|
||||
if args.minify:
|
||||
logging.info('Minifying website')
|
||||
logging.info("Minifying website")
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
futures = []
|
||||
for root, _, filenames in os.walk(args.output_dir):
|
||||
for filename in filenames:
|
||||
path = os.path.join(root, filename)
|
||||
futures.append(executor.submit(minify_file, path, css_digest, js_digest))
|
||||
futures.append(
|
||||
executor.submit(minify_file, path, css_digest, js_digest)
|
||||
)
|
||||
for future in futures:
|
||||
exc = future.exception()
|
||||
if exc:
|
||||
@ -304,24 +318,28 @@ def minify_website(args):
|
||||
|
||||
|
||||
def process_benchmark_results(args):
|
||||
benchmark_root = os.path.join(args.website_dir, 'benchmark')
|
||||
benchmark_root = os.path.join(args.website_dir, "benchmark")
|
||||
required_keys = {
|
||||
'dbms': ['result'],
|
||||
'hardware': ['result', 'system', 'system_full', 'kind']
|
||||
"dbms": ["result"],
|
||||
"hardware": ["result", "system", "system_full", "kind"],
|
||||
}
|
||||
for benchmark_kind in ['dbms', 'hardware']:
|
||||
for benchmark_kind in ["dbms", "hardware"]:
|
||||
results = []
|
||||
results_root = os.path.join(benchmark_root, benchmark_kind, 'results')
|
||||
results_root = os.path.join(benchmark_root, benchmark_kind, "results")
|
||||
for result in sorted(os.listdir(results_root)):
|
||||
result_file = os.path.join(results_root, result)
|
||||
logging.debug(f'Reading benchmark result from {result_file}')
|
||||
with open(result_file, 'r') as f:
|
||||
logging.debug(f"Reading benchmark result from {result_file}")
|
||||
with open(result_file, "r") as f:
|
||||
result = json.loads(f.read())
|
||||
for item in result:
|
||||
for required_key in required_keys[benchmark_kind]:
|
||||
assert required_key in item, f'No "{required_key}" in {result_file}'
|
||||
assert (
|
||||
required_key in item
|
||||
), f'No "{required_key}" in {result_file}'
|
||||
results += result
|
||||
results_js = os.path.join(args.output_dir, 'benchmark', benchmark_kind, 'results.js')
|
||||
with open(results_js, 'w') as f:
|
||||
results_js = os.path.join(
|
||||
args.output_dir, "benchmark", benchmark_kind, "results.js"
|
||||
)
|
||||
with open(results_js, "w") as f:
|
||||
data = json.dumps(results)
|
||||
f.write(f'var results = {data};')
|
||||
f.write(f"var results = {data};")
|
||||
|
@ -42,6 +42,8 @@ git push
|
||||
使用`utils/check-style/check-style`二进制文件执行一些简单的基于正则表达式的代码样式检查(注意, 它可以在本地运行).
|
||||
如果失败, 按照[代码样式指南](./style.md)修复样式错误.
|
||||
|
||||
使用 [black](https://github.com/psf/black/) 檢查 python 代碼.
|
||||
|
||||
### 报告详情 {#report-details}
|
||||
- [状态页示例](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check.html)
|
||||
- `docs_output.txt`记录了查结果错误(无效表格等), 空白页表示没有错误. [成功结果案例](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check/output.txt)
|
||||
|
1
packages/.gitignore
vendored
Normal file
1
packages/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
*/
|
156
packages/build
Executable file
156
packages/build
Executable file
@ -0,0 +1,156 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
# Avoid dependency on locale
|
||||
LC_ALL=C
|
||||
|
||||
# Normalize output directory
|
||||
if [ -n "$OUTPUT_DIR" ]; then
|
||||
OUTPUT_DIR=$(realpath -m "$OUTPUT_DIR")
|
||||
fi
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
cd "$CUR_DIR"
|
||||
|
||||
ROOT_DIR=$(readlink -f "$(git rev-parse --show-cdup)")
|
||||
|
||||
PKG_ROOT='root'
|
||||
|
||||
DEB_ARCH=${DEB_ARCH:-amd64}
|
||||
OUTPUT_DIR=${OUTPUT_DIR:-$ROOT_DIR}
|
||||
[ -d "${OUTPUT_DIR}" ] || mkdir -p "${OUTPUT_DIR}"
|
||||
SANITIZER=${SANITIZER:-""}
|
||||
SOURCE=${SOURCE:-$PKG_ROOT}
|
||||
|
||||
HELP="${0} [--test] [--rpm] [-h|--help]
|
||||
--test - adds '+test' prefix to version
|
||||
--apk - build APK packages
|
||||
--rpm - build RPM packages
|
||||
--tgz - build tarball package
|
||||
--help - show this help and exit
|
||||
|
||||
Used envs:
|
||||
DEB_ARCH='${DEB_ARCH}'
|
||||
OUTPUT_DIR='${OUTPUT_DIR}' - where the artifact will be placed
|
||||
SANITIZER='${SANITIZER}' - if any sanitizer is used, affects version string
|
||||
SOURCE='${SOURCE}' - directory with sources tree
|
||||
VERSION_STRING='${VERSION_STRING}' - the package version to overwrite
|
||||
"
|
||||
|
||||
if [ -z "${VERSION_STRING}" ]; then
|
||||
# Get CLICKHOUSE_VERSION_STRING from the current git repo
|
||||
eval "$("$ROOT_DIR/tests/ci/version_helper.py" -e)"
|
||||
else
|
||||
CLICKHOUSE_VERSION_STRING=${VERSION_STRING}
|
||||
fi
|
||||
export CLICKHOUSE_VERSION_STRING
|
||||
|
||||
|
||||
|
||||
while [[ $1 == --* ]]
|
||||
do
|
||||
case "$1" in
|
||||
--test )
|
||||
VERSION_POSTFIX+='+test'
|
||||
shift ;;
|
||||
--apk )
|
||||
MAKE_APK=1
|
||||
shift ;;
|
||||
--rpm )
|
||||
MAKE_RPM=1
|
||||
shift ;;
|
||||
--tgz )
|
||||
MAKE_TGZ=1
|
||||
shift ;;
|
||||
--help )
|
||||
echo "$HELP"
|
||||
exit ;;
|
||||
* )
|
||||
echo "Unknown option $1"
|
||||
exit 2 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
function deb2tgz {
|
||||
local FILE PKG_NAME PKG_DIR PKG_PATH TARBALL
|
||||
FILE=$1
|
||||
PKG_NAME=${FILE##*/}; PKG_NAME=${PKG_NAME%%_*}
|
||||
PKG_DIR="$PKG_NAME-$CLICKHOUSE_VERSION_STRING"
|
||||
PKG_PATH="$OUTPUT_DIR/$PKG_NAME-$CLICKHOUSE_VERSION_STRING"
|
||||
TARBALL="$OUTPUT_DIR/$PKG_NAME-$CLICKHOUSE_VERSION_STRING-$DEB_ARCH.tgz"
|
||||
rm -rf "$PKG_PATH"
|
||||
dpkg-deb -R "$FILE" "$PKG_PATH"
|
||||
mkdir -p "$PKG_PATH/install"
|
||||
cat > "$PKG_PATH/install/doinst.sh" << 'EOF'
|
||||
#!/bin/sh
|
||||
set -e
|
||||
|
||||
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
|
||||
for filepath in `find $SCRIPTPATH/.. -type f -or -type l | grep -v "\.\./install/"`; do
|
||||
destpath=${filepath##$SCRIPTPATH/..}
|
||||
mkdir -p $(dirname "$destpath")
|
||||
cp -r "$filepath" "$destpath"
|
||||
done
|
||||
EOF
|
||||
chmod +x "$PKG_PATH/install/doinst.sh"
|
||||
if [ -f "$PKG_PATH/DEBIAN/postinst" ]; then
|
||||
tail +2 "$PKG_PATH/DEBIAN/postinst" > "$PKG_PATH/install/doinst.sh"
|
||||
fi
|
||||
rm -rf "$PKG_PATH/DEBIAN"
|
||||
if [ -f "/usr/bin/pigz" ]; then
|
||||
tar --use-compress-program=pigz -cf "$TARBALL" -C "$OUTPUT_DIR" "$PKG_DIR"
|
||||
else
|
||||
tar -czf "$TARBALL" -C "$OUTPUT_DIR" "$PKG_DIR"
|
||||
fi
|
||||
|
||||
rm -r "$PKG_PATH"
|
||||
}
|
||||
|
||||
# Build options
|
||||
if [ -n "$SANITIZER" ]; then
|
||||
if [[ "$SANITIZER" == "address" ]]; then VERSION_POSTFIX+="+asan"
|
||||
elif [[ "$SANITIZER" == "thread" ]]; then VERSION_POSTFIX+="+tsan"
|
||||
elif [[ "$SANITIZER" == "memory" ]]; then VERSION_POSTFIX+="+msan"
|
||||
elif [[ "$SANITIZER" == "undefined" ]]; then VERSION_POSTFIX+="+ubsan"
|
||||
else
|
||||
echo "Unknown value of SANITIZER variable: $SANITIZER"
|
||||
exit 3
|
||||
fi
|
||||
elif [[ $BUILD_TYPE == 'debug' ]]; then
|
||||
VERSION_POSTFIX+="+debug"
|
||||
fi
|
||||
|
||||
if [[ "$PKG_ROOT" != "$SOURCE" ]]; then
|
||||
# packages are built only from PKG_SOURCE
|
||||
rm -rf "./$PKG_ROOT"
|
||||
ln -sf "$SOURCE" "$PKG_SOURCE"
|
||||
fi
|
||||
|
||||
CLICKHOUSE_VERSION_STRING+=$VERSION_POSTFIX
|
||||
echo -e "\nCurrent version is $CLICKHOUSE_VERSION_STRING"
|
||||
|
||||
for config in clickhouse*.yaml; do
|
||||
echo "Building deb package for $config"
|
||||
|
||||
# Preserve package path
|
||||
exec 9>&1
|
||||
PKG_PATH=$(nfpm package --target "$OUTPUT_DIR" --config "$config" --packager deb | tee /dev/fd/9)
|
||||
PKG_PATH=${PKG_PATH##*created package: }
|
||||
exec 9>&-
|
||||
|
||||
if [ -n "$MAKE_APK" ]; then
|
||||
echo "Building apk package for $config"
|
||||
nfpm package --target "$OUTPUT_DIR" --config "$config" --packager apk
|
||||
fi
|
||||
if [ -n "$MAKE_RPM" ]; then
|
||||
echo "Building rpm package for $config"
|
||||
nfpm package --target "$OUTPUT_DIR" --config "$config" --packager rpm
|
||||
fi
|
||||
if [ -n "$MAKE_TGZ" ]; then
|
||||
echo "Building tarball for $config"
|
||||
deb2tgz "$PKG_PATH"
|
||||
fi
|
||||
done
|
||||
|
||||
# vim: ts=4: sw=4: sts=4: expandtab
|
57
packages/clickhouse-client.yaml
Normal file
57
packages/clickhouse-client.yaml
Normal file
@ -0,0 +1,57 @@
|
||||
# package sources should be placed in ${PWD}/root
|
||||
# nfpm should run from the same directory with a config
|
||||
name: "clickhouse-client"
|
||||
arch: "all"
|
||||
platform: "linux"
|
||||
version: "${CLICKHOUSE_VERSION_STRING}"
|
||||
vendor: "ClickHouse Inc."
|
||||
homepage: "https://clickhouse.com"
|
||||
license: "Apache"
|
||||
section: "database"
|
||||
priority: "optional"
|
||||
|
||||
replaces:
|
||||
- clickhouse-compressor
|
||||
conflicts:
|
||||
- clickhouse-compressor
|
||||
|
||||
maintainer: "ClickHouse Dev Team <packages+linux@clickhouse.com>"
|
||||
description: |
|
||||
Client binary for ClickHouse
|
||||
ClickHouse is a column-oriented database management system
|
||||
that allows generating analytical data reports in real time.
|
||||
This package provides clickhouse-client , clickhouse-local and clickhouse-benchmark
|
||||
|
||||
overrides:
|
||||
deb:
|
||||
depends:
|
||||
- clickhouse-common-static (= ${CLICKHOUSE_VERSION_STRING})
|
||||
rpm:
|
||||
depends:
|
||||
- clickhouse-common-static = ${CLICKHOUSE_VERSION_STRING}
|
||||
|
||||
contents:
|
||||
- src: root/etc/clickhouse-client/config.xml
|
||||
dst: /etc/clickhouse-client/config.xml
|
||||
type: config
|
||||
- src: root/usr/bin/clickhouse-benchmark
|
||||
dst: /usr/bin/clickhouse-benchmark
|
||||
- src: root/usr/bin/clickhouse-compressor
|
||||
dst: /usr/bin/clickhouse-compressor
|
||||
- src: root/usr/bin/clickhouse-format
|
||||
dst: /usr/bin/clickhouse-format
|
||||
- src: root/usr/bin/clickhouse-client
|
||||
dst: /usr/bin/clickhouse-client
|
||||
- src: root/usr/bin/clickhouse-local
|
||||
dst: /usr/bin/clickhouse-local
|
||||
- src: root/usr/bin/clickhouse-obfuscator
|
||||
dst: /usr/bin/clickhouse-obfuscator
|
||||
# docs
|
||||
- src: ../AUTHORS
|
||||
dst: /usr/share/doc/clickhouse-client/AUTHORS
|
||||
- src: ../CHANGELOG.md
|
||||
dst: /usr/share/doc/clickhouse-client/CHANGELOG.md
|
||||
- src: ../LICENSE
|
||||
dst: /usr/share/doc/clickhouse-client/LICENSE
|
||||
- src: ../README.md
|
||||
dst: /usr/share/doc/clickhouse-client/README.md
|
34
packages/clickhouse-common-static-dbg.yaml
Normal file
34
packages/clickhouse-common-static-dbg.yaml
Normal file
@ -0,0 +1,34 @@
|
||||
# package sources should be placed in ${PWD}/root
|
||||
# nfpm should run from the same directory with a config
|
||||
name: "clickhouse-common-static-dbg"
|
||||
arch: "${DEB_ARCH}" # amd64, arm64
|
||||
platform: "linux"
|
||||
version: "${CLICKHOUSE_VERSION_STRING}"
|
||||
vendor: "ClickHouse Inc."
|
||||
homepage: "https://clickhouse.com"
|
||||
license: "Apache"
|
||||
section: "database"
|
||||
priority: "optional"
|
||||
|
||||
replaces:
|
||||
- clickhouse-common-dbg
|
||||
conflicts:
|
||||
- clickhouse-common-dbg
|
||||
|
||||
maintainer: "ClickHouse Dev Team <packages+linux@clickhouse.com>"
|
||||
description: |
|
||||
debugging symbols for clickhouse-common-static
|
||||
This package contains the debugging symbols for clickhouse-common.
|
||||
|
||||
contents:
|
||||
- src: root/usr/lib/debug
|
||||
dst: /usr/lib/debug
|
||||
# docs
|
||||
- src: ../AUTHORS
|
||||
dst: /usr/share/doc/clickhouse-common-static-dbg/AUTHORS
|
||||
- src: ../CHANGELOG.md
|
||||
dst: /usr/share/doc/clickhouse-common-static-dbg/CHANGELOG.md
|
||||
- src: ../LICENSE
|
||||
dst: /usr/share/doc/clickhouse-common-static-dbg/LICENSE
|
||||
- src: ../README.md
|
||||
dst: /usr/share/doc/clickhouse-common-static-dbg/README.md
|
48
packages/clickhouse-common-static.yaml
Normal file
48
packages/clickhouse-common-static.yaml
Normal file
@ -0,0 +1,48 @@
|
||||
# package sources should be placed in ${PWD}/root
|
||||
# nfpm should run from the same directory with a config
|
||||
name: "clickhouse-common-static"
|
||||
arch: "${DEB_ARCH}" # amd64, arm64
|
||||
platform: "linux"
|
||||
version: "${CLICKHOUSE_VERSION_STRING}"
|
||||
vendor: "ClickHouse Inc."
|
||||
homepage: "https://clickhouse.com"
|
||||
license: "Apache"
|
||||
section: "database"
|
||||
priority: "optional"
|
||||
|
||||
replaces:
|
||||
- clickhouse-common
|
||||
- clickhouse-server-base
|
||||
provides:
|
||||
- clickhouse-common
|
||||
- clickhouse-server-base
|
||||
suggests:
|
||||
- clickhouse-common-static-dbg
|
||||
|
||||
maintainer: "ClickHouse Dev Team <packages+linux@clickhouse.com>"
|
||||
description: |
|
||||
Common files for ClickHouse
|
||||
ClickHouse is a column-oriented database management system
|
||||
that allows generating analytical data reports in real time.
|
||||
This package provides common files for both clickhouse server and client
|
||||
|
||||
contents:
|
||||
- src: root/usr/bin/clickhouse
|
||||
dst: /usr/bin/clickhouse
|
||||
- src: root/usr/bin/clickhouse-odbc-bridge
|
||||
dst: /usr/bin/clickhouse-odbc-bridge
|
||||
- src: root/usr/bin/clickhouse-library-bridge
|
||||
dst: /usr/bin/clickhouse-library-bridge
|
||||
- src: root/usr/bin/clickhouse-extract-from-config
|
||||
dst: /usr/bin/clickhouse-extract-from-config
|
||||
- src: root/usr/share/bash-completion/completions
|
||||
dst: /usr/share/bash-completion/completions
|
||||
# docs
|
||||
- src: ../AUTHORS
|
||||
dst: /usr/share/doc/clickhouse-common-static/AUTHORS
|
||||
- src: ../CHANGELOG.md
|
||||
dst: /usr/share/doc/clickhouse-common-static/CHANGELOG.md
|
||||
- src: ../LICENSE
|
||||
dst: /usr/share/doc/clickhouse-common-static/LICENSE
|
||||
- src: ../README.md
|
||||
dst: /usr/share/doc/clickhouse-common-static/README.md
|
227
packages/clickhouse-server.init
Executable file
227
packages/clickhouse-server.init
Executable file
@ -0,0 +1,227 @@
|
||||
#!/bin/sh
|
||||
### BEGIN INIT INFO
|
||||
# Provides: clickhouse-server
|
||||
# Default-Start: 2 3 4 5
|
||||
# Default-Stop: 0 1 6
|
||||
# Should-Start: $time $network
|
||||
# Should-Stop: $network
|
||||
# Short-Description: clickhouse-server daemon
|
||||
### END INIT INFO
|
||||
#
|
||||
# NOTES:
|
||||
# - Should-* -- script can start if the listed facilities are missing, unlike Required-*
|
||||
#
|
||||
# For the documentation [1]:
|
||||
#
|
||||
# [1]: https://wiki.debian.org/LSBInitScripts
|
||||
|
||||
CLICKHOUSE_USER=clickhouse
|
||||
CLICKHOUSE_GROUP=${CLICKHOUSE_USER}
|
||||
SHELL=/bin/bash
|
||||
PROGRAM=clickhouse-server
|
||||
CLICKHOUSE_GENERIC_PROGRAM=clickhouse
|
||||
CLICKHOUSE_PROGRAM_ENV=""
|
||||
EXTRACT_FROM_CONFIG=${CLICKHOUSE_GENERIC_PROGRAM}-extract-from-config
|
||||
CLICKHOUSE_CONFDIR=/etc/$PROGRAM
|
||||
CLICKHOUSE_LOGDIR=/var/log/clickhouse-server
|
||||
CLICKHOUSE_LOGDIR_USER=root
|
||||
CLICKHOUSE_DATADIR=/var/lib/clickhouse
|
||||
if [ -d "/var/lock" ]; then
|
||||
LOCALSTATEDIR=/var/lock
|
||||
else
|
||||
LOCALSTATEDIR=/run/lock
|
||||
fi
|
||||
|
||||
if [ ! -d "$LOCALSTATEDIR" ]; then
|
||||
mkdir -p "$LOCALSTATEDIR"
|
||||
fi
|
||||
|
||||
CLICKHOUSE_BINDIR=/usr/bin
|
||||
CLICKHOUSE_CRONFILE=/etc/cron.d/clickhouse-server
|
||||
CLICKHOUSE_CONFIG=$CLICKHOUSE_CONFDIR/config.xml
|
||||
LOCKFILE=$LOCALSTATEDIR/$PROGRAM
|
||||
CLICKHOUSE_PIDDIR=/var/run/$PROGRAM
|
||||
CLICKHOUSE_PIDFILE="$CLICKHOUSE_PIDDIR/$PROGRAM.pid"
|
||||
# CLICKHOUSE_STOP_TIMEOUT=60 # Disabled by default. Place to /etc/default/clickhouse if you need.
|
||||
|
||||
# Some systems lack "flock"
|
||||
command -v flock >/dev/null && FLOCK=flock
|
||||
|
||||
# Override defaults from optional config file
|
||||
test -f /etc/default/clickhouse && . /etc/default/clickhouse
|
||||
|
||||
|
||||
die()
|
||||
{
|
||||
echo $1 >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
|
||||
# Check that configuration file is Ok.
|
||||
check_config()
|
||||
{
|
||||
if [ -x "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG" ]; then
|
||||
su -s $SHELL ${CLICKHOUSE_USER} -c "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path" >/dev/null || die "Configuration file ${CLICKHOUSE_CONFIG} doesn't parse successfully. Won't restart server. You may use forcerestart if you are sure.";
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
initdb()
|
||||
{
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} install --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
|
||||
}
|
||||
|
||||
|
||||
start()
|
||||
{
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} start --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
|
||||
}
|
||||
|
||||
|
||||
stop()
|
||||
{
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} stop --pid-path "${CLICKHOUSE_PIDDIR}"
|
||||
}
|
||||
|
||||
|
||||
restart()
|
||||
{
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} restart --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
|
||||
}
|
||||
|
||||
|
||||
forcestop()
|
||||
{
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} stop --force --pid-path "${CLICKHOUSE_PIDDIR}"
|
||||
}
|
||||
|
||||
|
||||
service_or_func()
|
||||
{
|
||||
if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then
|
||||
systemctl $1 $PROGRAM
|
||||
else
|
||||
$1
|
||||
fi
|
||||
}
|
||||
|
||||
forcerestart()
|
||||
{
|
||||
forcestop
|
||||
# Should not use 'start' function if systemd active
|
||||
service_or_func start
|
||||
}
|
||||
|
||||
use_cron()
|
||||
{
|
||||
# 1. running systemd
|
||||
if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then
|
||||
return 1
|
||||
fi
|
||||
# 2. disabled by config
|
||||
if [ -z "$CLICKHOUSE_CRONFILE" ]; then
|
||||
return 2
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
# returns false if cron disabled (with systemd)
|
||||
enable_cron()
|
||||
{
|
||||
use_cron && sed -i 's/^#*//' "$CLICKHOUSE_CRONFILE"
|
||||
}
|
||||
# returns false if cron disabled (with systemd)
|
||||
disable_cron()
|
||||
{
|
||||
use_cron && sed -i 's/^#*/#/' "$CLICKHOUSE_CRONFILE"
|
||||
}
|
||||
|
||||
|
||||
is_cron_disabled()
|
||||
{
|
||||
use_cron || return 0
|
||||
|
||||
# Assumes that either no lines are commented or all lines are commented.
|
||||
# Also please note, that currently cron file for ClickHouse has only one line (but some time ago there was more).
|
||||
grep -q -E '^#' "$CLICKHOUSE_CRONFILE";
|
||||
}
|
||||
|
||||
|
||||
main()
|
||||
{
|
||||
# See how we were called.
|
||||
EXIT_STATUS=0
|
||||
case "$1" in
|
||||
start)
|
||||
service_or_func start && enable_cron
|
||||
;;
|
||||
stop)
|
||||
disable_cron
|
||||
service_or_func stop
|
||||
;;
|
||||
restart)
|
||||
service_or_func restart && enable_cron
|
||||
;;
|
||||
forcestop)
|
||||
disable_cron
|
||||
forcestop
|
||||
;;
|
||||
forcerestart)
|
||||
forcerestart && enable_cron
|
||||
;;
|
||||
reload)
|
||||
service_or_func restart
|
||||
;;
|
||||
condstart)
|
||||
service_or_func start
|
||||
;;
|
||||
condstop)
|
||||
service_or_func stop
|
||||
;;
|
||||
condrestart)
|
||||
service_or_func restart
|
||||
;;
|
||||
condreload)
|
||||
service_or_func restart
|
||||
;;
|
||||
initdb)
|
||||
initdb
|
||||
;;
|
||||
enable_cron)
|
||||
enable_cron
|
||||
;;
|
||||
disable_cron)
|
||||
disable_cron
|
||||
;;
|
||||
*)
|
||||
echo "Usage: $0 {start|stop|status|restart|forcestop|forcerestart|reload|condstart|condstop|condrestart|condreload|initdb}"
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
|
||||
exit $EXIT_STATUS
|
||||
}
|
||||
|
||||
|
||||
status()
|
||||
{
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} status --pid-path "${CLICKHOUSE_PIDDIR}"
|
||||
}
|
||||
|
||||
|
||||
# Running commands without need of locking
|
||||
case "$1" in
|
||||
status)
|
||||
status
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
|
||||
|
||||
(
|
||||
if $FLOCK -n 9; then
|
||||
main "$@"
|
||||
else
|
||||
echo "Init script is already running" && exit 1
|
||||
fi
|
||||
) 9> $LOCKFILE
|
47
packages/clickhouse-server.postinstall
Normal file
47
packages/clickhouse-server.postinstall
Normal file
@ -0,0 +1,47 @@
|
||||
#!/bin/sh
|
||||
set -e
|
||||
# set -x
|
||||
|
||||
PROGRAM=clickhouse-server
|
||||
CLICKHOUSE_USER=${CLICKHOUSE_USER:=clickhouse}
|
||||
CLICKHOUSE_GROUP=${CLICKHOUSE_GROUP:=${CLICKHOUSE_USER}}
|
||||
# Please note that we don't support paths with whitespaces. This is rather ignorant.
|
||||
CLICKHOUSE_CONFDIR=${CLICKHOUSE_CONFDIR:=/etc/clickhouse-server}
|
||||
CLICKHOUSE_DATADIR=${CLICKHOUSE_DATADIR:=/var/lib/clickhouse}
|
||||
CLICKHOUSE_LOGDIR=${CLICKHOUSE_LOGDIR:=/var/log/clickhouse-server}
|
||||
CLICKHOUSE_BINDIR=${CLICKHOUSE_BINDIR:=/usr/bin}
|
||||
CLICKHOUSE_GENERIC_PROGRAM=${CLICKHOUSE_GENERIC_PROGRAM:=clickhouse}
|
||||
EXTRACT_FROM_CONFIG=${CLICKHOUSE_GENERIC_PROGRAM}-extract-from-config
|
||||
CLICKHOUSE_CONFIG=$CLICKHOUSE_CONFDIR/config.xml
|
||||
CLICKHOUSE_PIDDIR=/var/run/$PROGRAM
|
||||
|
||||
[ -f /usr/share/debconf/confmodule ] && . /usr/share/debconf/confmodule
|
||||
[ -f /etc/default/clickhouse ] && . /etc/default/clickhouse
|
||||
|
||||
if [ ! -f "/etc/debian_version" ]; then
|
||||
not_deb_os=1
|
||||
fi
|
||||
|
||||
if [ "$1" = configure ] || [ -n "$not_deb_os" ]; then
|
||||
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} install --user "${CLICKHOUSE_USER}" --group "${CLICKHOUSE_GROUP}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}" --log-path "${CLICKHOUSE_LOGDIR}" --data-path "${CLICKHOUSE_DATADIR}"
|
||||
|
||||
if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then
|
||||
# if old rc.d service present - remove it
|
||||
if [ -x "/etc/init.d/clickhouse-server" ] && [ -x "/usr/sbin/update-rc.d" ]; then
|
||||
/usr/sbin/update-rc.d clickhouse-server remove
|
||||
fi
|
||||
|
||||
/bin/systemctl daemon-reload
|
||||
/bin/systemctl enable clickhouse-server
|
||||
else
|
||||
# If you downgrading to version older than 1.1.54336 run: systemctl disable clickhouse-server
|
||||
if [ -x "/etc/init.d/clickhouse-server" ]; then
|
||||
if [ -x "/usr/sbin/update-rc.d" ]; then
|
||||
/usr/sbin/update-rc.d clickhouse-server defaults 19 19 >/dev/null || exit $?
|
||||
else
|
||||
echo # Other OS
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
27
packages/clickhouse-server.service
Normal file
27
packages/clickhouse-server.service
Normal file
@ -0,0 +1,27 @@
|
||||
[Unit]
|
||||
Description=ClickHouse Server (analytic DBMS for big data)
|
||||
Requires=network-online.target
|
||||
# NOTE: that After/Wants=time-sync.target is not enough, you need to ensure
|
||||
# that the time was adjusted already, if you use systemd-timesyncd you are
|
||||
# safe, but if you use ntp or some other daemon, you should configure it
|
||||
# additionaly.
|
||||
After=time-sync.target network-online.target
|
||||
Wants=time-sync.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=clickhouse
|
||||
Group=clickhouse
|
||||
Restart=always
|
||||
RestartSec=30
|
||||
RuntimeDirectory=clickhouse-server
|
||||
ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=/run/clickhouse-server/clickhouse-server.pid
|
||||
# Minus means that this file is optional.
|
||||
EnvironmentFile=-/etc/default/clickhouse
|
||||
LimitCORE=infinity
|
||||
LimitNOFILE=500000
|
||||
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE
|
||||
|
||||
[Install]
|
||||
# ClickHouse should not start from the rescue shell (rescue.target).
|
||||
WantedBy=multi-user.target
|
68
packages/clickhouse-server.yaml
Normal file
68
packages/clickhouse-server.yaml
Normal file
@ -0,0 +1,68 @@
|
||||
# package sources should be placed in ${PWD}/root
|
||||
# nfpm should run from the same directory with a config
|
||||
name: "clickhouse-server"
|
||||
arch: "all"
|
||||
platform: "linux"
|
||||
version: "${CLICKHOUSE_VERSION_STRING}"
|
||||
vendor: "ClickHouse Inc."
|
||||
homepage: "https://clickhouse.com"
|
||||
license: "Apache"
|
||||
section: "database"
|
||||
priority: "optional"
|
||||
|
||||
conflicts:
|
||||
- clickhouse-keeper
|
||||
depends:
|
||||
- adduser
|
||||
replaces:
|
||||
- clickhouse-server-common
|
||||
- clickhouse-server-base
|
||||
provides:
|
||||
- clickhouse-server-common
|
||||
recommends:
|
||||
- libcap2-bin
|
||||
|
||||
maintainer: "ClickHouse Dev Team <packages+linux@clickhouse.com>"
|
||||
description: |
|
||||
Server binary for ClickHouse
|
||||
ClickHouse is a column-oriented database management system
|
||||
that allows generating analytical data reports in real time.
|
||||
This package provides clickhouse common configuration files
|
||||
|
||||
overrides:
|
||||
deb:
|
||||
depends:
|
||||
- clickhouse-common-static (= ${CLICKHOUSE_VERSION_STRING})
|
||||
rpm:
|
||||
depends:
|
||||
- clickhouse-common-static = ${CLICKHOUSE_VERSION_STRING}
|
||||
|
||||
contents:
|
||||
- src: root/etc/clickhouse-server
|
||||
dst: /etc/clickhouse-server
|
||||
type: config
|
||||
- src: clickhouse-server.init
|
||||
dst: /etc/init.d/clickhouse-server
|
||||
- src: clickhouse-server.service
|
||||
dst: /lib/systemd/system/clickhouse-server.service
|
||||
- src: root/usr/bin/clickhouse-copier
|
||||
dst: /usr/bin/clickhouse-copier
|
||||
- src: clickhouse
|
||||
dst: /usr/bin/clickhouse-keeper
|
||||
type: symlink
|
||||
- src: root/usr/bin/clickhouse-report
|
||||
dst: /usr/bin/clickhouse-report
|
||||
- src: root/usr/bin/clickhouse-server
|
||||
dst: /usr/bin/clickhouse-server
|
||||
# docs
|
||||
- src: ../AUTHORS
|
||||
dst: /usr/share/doc/clickhouse-server/AUTHORS
|
||||
- src: ../CHANGELOG.md
|
||||
dst: /usr/share/doc/clickhouse-server/CHANGELOG.md
|
||||
- src: ../LICENSE
|
||||
dst: /usr/share/doc/clickhouse-server/LICENSE
|
||||
- src: ../README.md
|
||||
dst: /usr/share/doc/clickhouse-server/README.md
|
||||
|
||||
scripts:
|
||||
postinstall: ./clickhouse-server.postinstall
|
@ -460,10 +460,6 @@ else ()
|
||||
list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper-converter)
|
||||
endif ()
|
||||
|
||||
if (NOT BUILD_STRIPPED_BINARIES_PREFIX)
|
||||
install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
|
||||
endif()
|
||||
|
||||
add_custom_target (clickhouse-bundle ALL DEPENDS ${CLICKHOUSE_BUNDLE})
|
||||
|
||||
if (USE_GDB_ADD_INDEX)
|
||||
@ -474,11 +470,19 @@ else ()
|
||||
add_custom_command(TARGET clickhouse POST_BUILD COMMAND ./clickhouse hash-binary > hash && ${OBJCOPY_PATH} --add-section .note.ClickHouse.hash=hash clickhouse COMMENT "Adding .note.ClickHouse.hash to clickhouse" VERBATIM)
|
||||
endif()
|
||||
|
||||
if (BUILD_STRIPPED_BINARIES_PREFIX)
|
||||
clickhouse_strip_binary(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${BUILD_STRIPPED_BINARIES_PREFIX} BINARY_PATH clickhouse)
|
||||
if (INSTALL_STRIPPED_BINARIES)
|
||||
clickhouse_strip_binary(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT} BINARY_PATH clickhouse)
|
||||
else()
|
||||
install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (NOT INSTALL_STRIPPED_BINARIES)
|
||||
# Install dunny debug directory
|
||||
# TODO: move logic to every place where clickhouse_strip_binary is used
|
||||
add_custom_command(TARGET clickhouse POST_BUILD COMMAND echo > .empty )
|
||||
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/.empty" DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/.empty)
|
||||
endif()
|
||||
|
||||
|
||||
if (ENABLE_TESTS)
|
||||
|
@ -792,9 +792,9 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
|
||||
fmt::print("Setting capabilities for clickhouse binary. This is optional.\n");
|
||||
std::string command = fmt::format("command -v setcap >/dev/null"
|
||||
" && command -v capsh >/dev/null"
|
||||
" && capsh --has-p=cap_net_admin,cap_ipc_lock,cap_sys_nice+ep >/dev/null 2>&1"
|
||||
" && setcap 'cap_net_admin,cap_ipc_lock,cap_sys_nice+ep' {0}"
|
||||
" || echo \"Cannot set 'net_admin' or 'ipc_lock' or 'sys_nice' capability for clickhouse binary."
|
||||
" && capsh --has-p=cap_net_admin,cap_ipc_lock,cap_sys_nice,cap_net_bind_service+ep >/dev/null 2>&1"
|
||||
" && setcap 'cap_net_admin,cap_ipc_lock,cap_sys_nice,cap_net_bind_service+ep' {0}"
|
||||
" || echo \"Cannot set 'net_admin' or 'ipc_lock' or 'sys_nice' or 'net_bind_service' capability for clickhouse binary."
|
||||
" This is optional. Taskstats accounting will be disabled."
|
||||
" To enable taskstats accounting you may add the required capability later manually.\"",
|
||||
fs::canonical(main_bin_path).string());
|
||||
|
@ -24,10 +24,8 @@ target_link_libraries(clickhouse-library-bridge PRIVATE
|
||||
|
||||
set_target_properties(clickhouse-library-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..)
|
||||
|
||||
if (BUILD_STRIPPED_BINARIES_PREFIX)
|
||||
clickhouse_strip_binary(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${BUILD_STRIPPED_BINARIES_PREFIX} BINARY_PATH ../clickhouse-library-bridge)
|
||||
endif()
|
||||
|
||||
if (NOT BUILD_STRIPPED_BINARIES_PREFIX)
|
||||
if (INSTALL_STRIPPED_BINARIES)
|
||||
clickhouse_strip_binary(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-library-bridge)
|
||||
else()
|
||||
install(TARGETS clickhouse-library-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
|
||||
endif()
|
||||
|
@ -39,11 +39,9 @@ if (USE_GDB_ADD_INDEX)
|
||||
add_custom_command(TARGET clickhouse-odbc-bridge POST_BUILD COMMAND ${GDB_ADD_INDEX_EXE} ../clickhouse-odbc-bridge COMMENT "Adding .gdb-index to clickhouse-odbc-bridge" VERBATIM)
|
||||
endif()
|
||||
|
||||
if (BUILD_STRIPPED_BINARIES_PREFIX)
|
||||
clickhouse_strip_binary(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${BUILD_STRIPPED_BINARIES_PREFIX} BINARY_PATH ../clickhouse-odbc-bridge)
|
||||
endif()
|
||||
|
||||
if (NOT BUILD_STRIPPED_BINARIES_PREFIX)
|
||||
if (INSTALL_STRIPPED_BINARIES)
|
||||
clickhouse_strip_binary(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-odbc-bridge)
|
||||
else()
|
||||
install(TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
|
||||
endif()
|
||||
|
||||
|
@ -45,6 +45,7 @@
|
||||
#include <Core/ServerUUID.h>
|
||||
#include <IO/HTTPCommon.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/IOThreadPool.h>
|
||||
#include <IO/UseSSL.h>
|
||||
#include <Interpreters/AsynchronousMetrics.h>
|
||||
#include <Interpreters/DDLWorker.h>
|
||||
@ -554,6 +555,10 @@ if (ThreadFuzzer::instance().isEffective())
|
||||
config().getUInt("thread_pool_queue_size", 10000)
|
||||
);
|
||||
|
||||
IOThreadPool::initialize(
|
||||
config().getUInt("max_io_thread_pool_size", 100),
|
||||
config().getUInt("max_io_thread_pool_free_size", 0),
|
||||
config().getUInt("io_thread_pool_queue_size", 10000));
|
||||
|
||||
/// Initialize global local cache for remote filesystem.
|
||||
if (config().has("local_cache_for_remote_fs"))
|
||||
@ -1022,8 +1027,8 @@ if (ThreadFuzzer::instance().isEffective())
|
||||
std::make_unique<TCPServer>(
|
||||
new KeeperTCPHandlerFactory(
|
||||
config_getter, global_context->getKeeperDispatcher(),
|
||||
global_context->getSettingsRef().receive_timeout,
|
||||
global_context->getSettingsRef().send_timeout,
|
||||
global_context->getSettingsRef().receive_timeout.totalSeconds(),
|
||||
global_context->getSettingsRef().send_timeout.totalSeconds(),
|
||||
false), server_pool, socket));
|
||||
});
|
||||
|
||||
@ -1045,8 +1050,8 @@ if (ThreadFuzzer::instance().isEffective())
|
||||
std::make_unique<TCPServer>(
|
||||
new KeeperTCPHandlerFactory(
|
||||
config_getter, global_context->getKeeperDispatcher(),
|
||||
global_context->getSettingsRef().receive_timeout,
|
||||
global_context->getSettingsRef().send_timeout, true), server_pool, socket));
|
||||
global_context->getSettingsRef().receive_timeout.totalSeconds(),
|
||||
global_context->getSettingsRef().send_timeout.totalSeconds(), true), server_pool, socket));
|
||||
#else
|
||||
UNUSED(port);
|
||||
throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.",
|
||||
|
@ -85,7 +85,9 @@ size_t extractMaskNumericImpl(
|
||||
{
|
||||
size_t ones_count = 0;
|
||||
size_t data_index = 0;
|
||||
for (size_t i = 0; i != mask.size(); ++i)
|
||||
size_t mask_size = mask.size();
|
||||
|
||||
for (size_t i = 0; i != mask_size; ++i)
|
||||
{
|
||||
// Change mask only where value is 1.
|
||||
if (!mask[i])
|
||||
|
@ -113,5 +113,35 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class SynchronizedArenaWithFreeLists : private ArenaWithFreeLists
|
||||
{
|
||||
public:
|
||||
explicit SynchronizedArenaWithFreeLists(
|
||||
const size_t initial_size = 4096, const size_t growth_factor = 2,
|
||||
const size_t linear_growth_threshold = 128 * 1024 * 1024)
|
||||
: ArenaWithFreeLists{initial_size, growth_factor, linear_growth_threshold}
|
||||
{}
|
||||
|
||||
char * alloc(const size_t size)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
return ArenaWithFreeLists::alloc(size);
|
||||
}
|
||||
|
||||
void free(char * ptr, const size_t size)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
return ArenaWithFreeLists::free(ptr, size);
|
||||
}
|
||||
|
||||
/// Size of the allocated pool in bytes
|
||||
size_t size() const
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
return ArenaWithFreeLists::size();
|
||||
}
|
||||
private:
|
||||
mutable std::mutex mutex;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -25,7 +25,6 @@
|
||||
#include <Common/Dwarf.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
|
||||
#define DW_CHILDREN_no 0
|
||||
#define DW_FORM_addr 1
|
||||
#define DW_FORM_block1 0x0a
|
||||
@ -125,7 +124,7 @@ template <typename T>
|
||||
requires std::is_trivial_v<T> && std::is_standard_layout_v<T>
|
||||
T read(std::string_view & sp)
|
||||
{
|
||||
SAFE_CHECK(sp.size() >= sizeof(T), "underflow");
|
||||
SAFE_CHECK(sp.size() >= sizeof(T), fmt::format("underflow: expected bytes {}, got bytes {}", sizeof(T), sp.size()));
|
||||
T x;
|
||||
memcpy(&x, sp.data(), sizeof(T));
|
||||
sp.remove_prefix(sizeof(T));
|
||||
@ -690,7 +689,7 @@ bool Dwarf::findDebugInfoOffset(uintptr_t address, std::string_view aranges, uin
|
||||
|
||||
Dwarf::Die Dwarf::getDieAtOffset(const CompilationUnit & cu, uint64_t offset) const
|
||||
{
|
||||
SAFE_CHECK(offset < info_.size(), "unexpected offset");
|
||||
SAFE_CHECK(offset < info_.size(), fmt::format("unexpected offset {}, info size {}", offset, info_.size()));
|
||||
Die die;
|
||||
std::string_view sp{info_.data() + offset, cu.offset + cu.size - offset};
|
||||
die.offset = offset;
|
||||
@ -708,19 +707,6 @@ Dwarf::Die Dwarf::getDieAtOffset(const CompilationUnit & cu, uint64_t offset) co
|
||||
return die;
|
||||
}
|
||||
|
||||
Dwarf::Die Dwarf::findDefinitionDie(const CompilationUnit & cu, const Die & die) const
|
||||
{
|
||||
// Find the real definition instead of declaration.
|
||||
// DW_AT_specification: Incomplete, non-defining, or separate declaration
|
||||
// corresponding to a declaration
|
||||
auto offset = getAttribute<uint64_t>(cu, die, DW_AT_specification);
|
||||
if (!offset)
|
||||
{
|
||||
return die;
|
||||
}
|
||||
return getDieAtOffset(cu, cu.offset + offset.value());
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the @locationInfo for @address in the compilation unit represented
|
||||
* by the @sp .debug_info entry.
|
||||
@ -861,7 +847,10 @@ bool Dwarf::findLocation(
|
||||
SymbolizedFrame inline_frame;
|
||||
inline_frame.found = true;
|
||||
inline_frame.addr = address;
|
||||
inline_frame.name = call_location.name.data();
|
||||
if (!call_location.name.empty())
|
||||
inline_frame.name = call_location.name.data();
|
||||
else
|
||||
inline_frame.name = nullptr;
|
||||
inline_frame.location.has_file_and_line = true;
|
||||
inline_frame.location.file = call_location.file;
|
||||
inline_frame.location.line = call_location.line;
|
||||
@ -1034,17 +1023,54 @@ void Dwarf::findInlinedSubroutineDieForAddress(
|
||||
location.file = line_vm.getFullFileName(*call_file);
|
||||
location.line = *call_line;
|
||||
|
||||
/// Something wrong with receiving debug info about inline.
|
||||
/// If set to true we stop parsing DWARF.
|
||||
bool die_for_inline_broken = false;
|
||||
|
||||
auto get_function_name = [&](const CompilationUnit & srcu, uint64_t die_offset)
|
||||
{
|
||||
auto decl_die = getDieAtOffset(srcu, die_offset);
|
||||
Die decl_die = getDieAtOffset(srcu, die_offset);
|
||||
auto & die_to_look_for_name = decl_die;
|
||||
|
||||
Die def_die;
|
||||
// Jump to the actual function definition instead of declaration for name
|
||||
// and line info.
|
||||
auto def_die = findDefinitionDie(srcu, decl_die);
|
||||
// DW_AT_specification: Incomplete, non-defining, or separate declaration
|
||||
// corresponding to a declaration
|
||||
auto offset = getAttribute<uint64_t>(srcu, decl_die, DW_AT_specification);
|
||||
if (offset)
|
||||
{
|
||||
/// FIXME: actually it's a bug in our DWARF parser.
|
||||
///
|
||||
/// Most of the times compilation unit offset (srcu.offset) is some big number inside .debug_info (like 434782255).
|
||||
/// Offset of DIE definition is some small relative number to srcu.offset (like 3518).
|
||||
/// However in some unknown cases offset looks like global, non relative number (like 434672579) and in this
|
||||
/// case we obviously doing something wrong parsing DWARF.
|
||||
///
|
||||
/// What is important -- this bug? reproduces only with -flto=thin in release mode.
|
||||
/// Also llvm-dwarfdump --verify ./clickhouse says that our DWARF is ok, so it's another prove
|
||||
/// that we just doing something wrong.
|
||||
///
|
||||
/// FIXME: Currently we just give up parsing DWARF for inlines when we got into this situation.
|
||||
if (srcu.offset + offset.value() >= info_.size())
|
||||
{
|
||||
die_for_inline_broken = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
def_die = getDieAtOffset(srcu, srcu.offset + offset.value());
|
||||
die_to_look_for_name = def_die;
|
||||
}
|
||||
}
|
||||
|
||||
std::string_view name;
|
||||
|
||||
if (die_for_inline_broken)
|
||||
return name;
|
||||
|
||||
// The file and line will be set in the next inline subroutine based on
|
||||
// its DW_AT_call_file and DW_AT_call_line.
|
||||
forEachAttribute(srcu, def_die, [&](const Attribute & attr)
|
||||
forEachAttribute(srcu, die_to_look_for_name, [&](const Attribute & attr)
|
||||
{
|
||||
switch (attr.spec.name)
|
||||
{
|
||||
@ -1083,6 +1109,10 @@ void Dwarf::findInlinedSubroutineDieForAddress(
|
||||
? get_function_name(cu, cu.offset + *abstract_origin)
|
||||
: get_function_name(findCompilationUnit(info_, *abstract_origin), *abstract_origin);
|
||||
|
||||
/// FIXME: see comment above
|
||||
if (die_for_inline_broken)
|
||||
return false;
|
||||
|
||||
locations.push_back(location);
|
||||
|
||||
findInlinedSubroutineDieForAddress(cu, child_die, line_vm, address, base_addr_cu, locations, max_size);
|
||||
|
@ -260,11 +260,6 @@ private:
|
||||
/** cu must exist during the life cycle of created detail::Die. */
|
||||
Die getDieAtOffset(const CompilationUnit & cu, uint64_t offset) const;
|
||||
|
||||
/**
|
||||
* Find the actual definition DIE instead of declaration for the given die.
|
||||
*/
|
||||
Die findDefinitionDie(const CompilationUnit & cu, const Die & die) const;
|
||||
|
||||
bool findLocation(
|
||||
uintptr_t address,
|
||||
LocationInfoMode mode,
|
||||
|
@ -127,7 +127,14 @@ PoolWithFailover::Entry PoolWithFailover::get()
|
||||
|
||||
/// If we cannot connect to some replica due to pool overflow, than we will wait and connect.
|
||||
PoolPtr * full_pool = nullptr;
|
||||
std::map<std::string, std::tuple<std::string, int>> error_detail;
|
||||
|
||||
struct ErrorDetail
|
||||
{
|
||||
int code;
|
||||
std::string description;
|
||||
};
|
||||
|
||||
std::unordered_map<std::string, ErrorDetail> replica_name_to_error_detail;
|
||||
|
||||
for (size_t try_no = 0; try_no < max_tries; ++try_no)
|
||||
{
|
||||
@ -161,15 +168,8 @@ PoolWithFailover::Entry PoolWithFailover::get()
|
||||
}
|
||||
|
||||
app.logger().warning("Connection to " + pool->getDescription() + " failed: " + e.displayText());
|
||||
//save all errors to error_detail
|
||||
if (error_detail.contains(pool->getDescription()))
|
||||
{
|
||||
error_detail[pool->getDescription()] = {e.displayText(), e.code()};
|
||||
}
|
||||
else
|
||||
{
|
||||
error_detail.insert({pool->getDescription(), {e.displayText(), e.code()}});
|
||||
}
|
||||
replica_name_to_error_detail.insert_or_assign(pool->getDescription(), ErrorDetail{e.code(), e.displayText()});
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -189,15 +189,19 @@ PoolWithFailover::Entry PoolWithFailover::get()
|
||||
DB::WriteBufferFromOwnString message;
|
||||
message << "Connections to all replicas failed: ";
|
||||
for (auto it = replicas_by_priority.begin(); it != replicas_by_priority.end(); ++it)
|
||||
{
|
||||
for (auto jt = it->second.begin(); jt != it->second.end(); ++jt)
|
||||
{
|
||||
message << (it == replicas_by_priority.begin() && jt == it->second.begin() ? "" : ", ") << (*jt)->getDescription();
|
||||
if (error_detail.contains((*jt)->getDescription()))
|
||||
|
||||
if (auto error_detail_it = replica_name_to_error_detail.find(((*jt)->getDescription()));
|
||||
error_detail_it != replica_name_to_error_detail.end())
|
||||
{
|
||||
std::tuple<std::string, int> error_and_code = error_detail[(*jt)->getDescription()];
|
||||
message << ", ERROR " << std::get<1>(error_and_code) << " : " << std::get<0>(error_and_code);
|
||||
const auto & [code, description] = error_detail_it->second;
|
||||
message << ", ERROR " << code << " : " << description;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw Poco::Exception(message.str());
|
||||
}
|
||||
|
@ -47,6 +47,8 @@ class IColumn;
|
||||
M(UInt64, max_insert_delayed_streams_for_parallel_write, 0, "The maximum number of streams (columns) to delay final part flush. Default - auto (1000 in case of underlying storage supports parallel write, for example S3 and disabled otherwise)", 0) \
|
||||
M(UInt64, max_final_threads, 16, "The maximum number of threads to read from table with FINAL.", 0) \
|
||||
M(MaxThreads, max_threads, 0, "The maximum number of threads to execute the request. By default, it is determined automatically.", 0) \
|
||||
M(MaxThreads, max_download_threads, 4, "The maximum number of threads to download data (e.g. for URL engine).", 0) \
|
||||
M(UInt64, max_download_buffer_size, 10*1024*1024, "The maximal size of buffer for parallel downloading (e.g. for URL engine) per each thread.", 0) \
|
||||
M(UInt64, max_read_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the buffer to read from the filesystem.", 0) \
|
||||
M(UInt64, max_distributed_connections, 1024, "The maximum number of connections for distributed processing of one query (should be greater than max_threads).", 0) \
|
||||
M(UInt64, max_query_size, DBMS_DEFAULT_MAX_QUERY_SIZE, "Which part of the query can be read into RAM for parsing (the remaining data for INSERT, if any, is read later)", 0) \
|
||||
|
@ -75,7 +75,7 @@ private:
|
||||
writeChar(':', out);
|
||||
writeIntText(location.line, out);
|
||||
|
||||
if (frame)
|
||||
if (frame && frame->name != nullptr)
|
||||
{
|
||||
writeChar(':', out);
|
||||
int status = 0;
|
||||
|
@ -894,13 +894,20 @@ private:
|
||||
/// If then is NULL, we create Nullable column with null mask OR-ed with condition.
|
||||
if (then_is_null)
|
||||
{
|
||||
ColumnPtr arg_else_column;
|
||||
/// In case when arg_else column type differs with result
|
||||
/// column type we should cast it to result type.
|
||||
if (removeNullable(arg_else.type)->getName() != removeNullable(result_type)->getName())
|
||||
arg_else_column = castColumn(arg_else, result_type);
|
||||
else
|
||||
arg_else_column = arg_else.column;
|
||||
|
||||
if (cond_col)
|
||||
{
|
||||
auto arg_else_column = arg_else.column;
|
||||
auto result_column = IColumn::mutate(std::move(arg_else_column));
|
||||
if (else_is_short)
|
||||
result_column->expand(cond_col->getData(), true);
|
||||
if (isColumnNullable(*arg_else.column))
|
||||
if (isColumnNullable(*result_column))
|
||||
{
|
||||
assert_cast<ColumnNullable &>(*result_column).applyNullMap(assert_cast<const ColumnUInt8 &>(*arg_cond.column));
|
||||
return result_column;
|
||||
@ -913,7 +920,7 @@ private:
|
||||
if (cond_const_col->getValue<UInt8>())
|
||||
return result_type->createColumn()->cloneResized(input_rows_count);
|
||||
else
|
||||
return makeNullableColumnIfNot(arg_else.column);
|
||||
return makeNullableColumnIfNot(arg_else_column);
|
||||
}
|
||||
else
|
||||
throw Exception("Illegal column " + arg_cond.column->getName() + " of first argument of function " + getName()
|
||||
@ -924,14 +931,21 @@ private:
|
||||
/// If else is NULL, we create Nullable column with null mask OR-ed with negated condition.
|
||||
if (else_is_null)
|
||||
{
|
||||
ColumnPtr arg_then_column;
|
||||
/// In case when arg_then column type differs with result
|
||||
/// column type we should cast it to result type.
|
||||
if (removeNullable(arg_then.type)->getName() != removeNullable(result_type)->getName())
|
||||
arg_then_column = castColumn(arg_then, result_type);
|
||||
else
|
||||
arg_then_column = arg_then.column;
|
||||
|
||||
if (cond_col)
|
||||
{
|
||||
auto arg_then_column = arg_then.column;
|
||||
auto result_column = IColumn::mutate(std::move(arg_then_column));
|
||||
if (then_is_short)
|
||||
result_column->expand(cond_col->getData(), false);
|
||||
|
||||
if (isColumnNullable(*arg_then.column))
|
||||
if (isColumnNullable(*result_column))
|
||||
{
|
||||
assert_cast<ColumnNullable &>(*result_column).applyNegatedNullMap(assert_cast<const ColumnUInt8 &>(*arg_cond.column));
|
||||
return result_column;
|
||||
@ -954,7 +968,7 @@ private:
|
||||
else if (cond_const_col)
|
||||
{
|
||||
if (cond_const_col->getValue<UInt8>())
|
||||
return makeNullableColumnIfNot(arg_then.column);
|
||||
return makeNullableColumnIfNot(arg_then_column);
|
||||
else
|
||||
return result_type->createColumn()->cloneResized(input_rows_count);
|
||||
}
|
||||
|
@ -131,8 +131,10 @@ public:
|
||||
message.value_or("Value passed to '" + getName() + "' function is non zero"));
|
||||
}
|
||||
|
||||
size_t result_size = in_untyped->size();
|
||||
|
||||
/// We return non constant to avoid constant folding.
|
||||
return ColumnUInt8::create(in_data.size(), 0);
|
||||
return ColumnUInt8::create(result_size, 0);
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
|
34
src/IO/IOThreadPool.cpp
Normal file
34
src/IO/IOThreadPool.cpp
Normal file
@ -0,0 +1,34 @@
|
||||
#include <IO/IOThreadPool.h>
|
||||
#include "Core/Field.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
std::unique_ptr<ThreadPool> IOThreadPool::instance;
|
||||
|
||||
void IOThreadPool::initialize(size_t max_threads, size_t max_free_threads, size_t queue_size)
|
||||
{
|
||||
if (instance)
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "The IO thread pool is initialized twice");
|
||||
}
|
||||
|
||||
instance = std::make_unique<ThreadPool>(max_threads, max_free_threads, queue_size, false /*shutdown_on_exception*/);
|
||||
}
|
||||
|
||||
ThreadPool & IOThreadPool::get()
|
||||
{
|
||||
if (!instance)
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "The IO thread pool is not initialized");
|
||||
}
|
||||
|
||||
return *instance;
|
||||
}
|
||||
|
||||
}
|
20
src/IO/IOThreadPool.h
Normal file
20
src/IO/IOThreadPool.h
Normal file
@ -0,0 +1,20 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/ThreadPool.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/*
|
||||
* ThreadPool used for the IO.
|
||||
*/
|
||||
class IOThreadPool
|
||||
{
|
||||
static std::unique_ptr<ThreadPool> instance;
|
||||
|
||||
public:
|
||||
static void initialize(size_t max_threads, size_t max_free_threads, size_t queue_size);
|
||||
static ThreadPool & get();
|
||||
};
|
||||
|
||||
}
|
290
src/IO/ParallelReadBuffer.cpp
Normal file
290
src/IO/ParallelReadBuffer.cpp
Normal file
@ -0,0 +1,290 @@
|
||||
#include <IO/ParallelReadBuffer.h>
|
||||
#include <base/logger_useful.h>
|
||||
#include <Poco/Logger.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int CANNOT_SEEK_THROUGH_FILE;
|
||||
extern const int SEEK_POSITION_OUT_OF_BOUND;
|
||||
|
||||
}
|
||||
|
||||
ParallelReadBuffer::ParallelReadBuffer(
|
||||
std::unique_ptr<ReadBufferFactory> reader_factory_,
|
||||
ThreadPool * pool_,
|
||||
size_t max_working_readers_,
|
||||
WorkerSetup worker_setup_,
|
||||
WorkerCleanup worker_cleanup_)
|
||||
: SeekableReadBufferWithSize(nullptr, 0)
|
||||
, pool(pool_)
|
||||
, max_working_readers(max_working_readers_)
|
||||
, reader_factory(std::move(reader_factory_))
|
||||
, worker_setup(std::move(worker_setup_))
|
||||
, worker_cleanup(std::move(worker_cleanup_))
|
||||
{
|
||||
std::unique_lock<std::mutex> lock{mutex};
|
||||
addReaders(lock);
|
||||
}
|
||||
|
||||
bool ParallelReadBuffer::addReaderToPool(std::unique_lock<std::mutex> & /*buffer_lock*/)
|
||||
{
|
||||
auto reader = reader_factory->getReader();
|
||||
if (!reader)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
auto worker = read_workers.emplace_back(std::make_shared<ReadWorker>(std::move(reader)));
|
||||
|
||||
pool->scheduleOrThrow(
|
||||
[&, this, worker = std::move(worker)]() mutable
|
||||
{
|
||||
ThreadStatus thread_status;
|
||||
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
++active_working_reader;
|
||||
}
|
||||
|
||||
SCOPE_EXIT({
|
||||
worker_cleanup(thread_status);
|
||||
|
||||
std::lock_guard lock{mutex};
|
||||
--active_working_reader;
|
||||
if (active_working_reader == 0)
|
||||
{
|
||||
readers_done.notify_all();
|
||||
}
|
||||
});
|
||||
worker_setup(thread_status);
|
||||
|
||||
readerThreadFunction(std::move(worker));
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
void ParallelReadBuffer::addReaders(std::unique_lock<std::mutex> & buffer_lock)
|
||||
{
|
||||
while (read_workers.size() < max_working_readers && addReaderToPool(buffer_lock))
|
||||
;
|
||||
}
|
||||
|
||||
off_t ParallelReadBuffer::seek(off_t offset, int whence)
|
||||
{
|
||||
if (whence != SEEK_SET)
|
||||
throw Exception("Only SEEK_SET mode is allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
|
||||
|
||||
if (offset < 0)
|
||||
throw Exception("Seek position is out of bounds. Offset: " + std::to_string(offset), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND);
|
||||
|
||||
if (!working_buffer.empty() && static_cast<size_t>(offset) >= current_position - working_buffer.size() && offset < current_position)
|
||||
{
|
||||
pos = working_buffer.end() - (current_position - offset);
|
||||
assert(pos >= working_buffer.begin());
|
||||
assert(pos <= working_buffer.end());
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
std::unique_lock lock{mutex};
|
||||
const auto offset_is_in_range
|
||||
= [&](const auto & range) { return static_cast<size_t>(offset) >= range.left && static_cast<size_t>(offset) <= *range.right; };
|
||||
|
||||
while (!read_workers.empty() && (offset < current_position || !offset_is_in_range(read_workers.front()->range)))
|
||||
{
|
||||
read_workers.front()->cancel = true;
|
||||
read_workers.pop_front();
|
||||
}
|
||||
|
||||
if (!read_workers.empty())
|
||||
{
|
||||
auto & front_worker = read_workers.front();
|
||||
auto & segments = front_worker->segments;
|
||||
current_position = front_worker->range.left;
|
||||
while (true)
|
||||
{
|
||||
next_condvar.wait(lock, [&] { return emergency_stop || !segments.empty(); });
|
||||
|
||||
if (emergency_stop)
|
||||
handleEmergencyStop();
|
||||
|
||||
auto next_segment = front_worker->nextSegment();
|
||||
if (static_cast<size_t>(offset) < current_position + next_segment.size())
|
||||
{
|
||||
current_segment = std::move(next_segment);
|
||||
working_buffer = internal_buffer = Buffer(current_segment.data(), current_segment.data() + current_segment.size());
|
||||
current_position += current_segment.size();
|
||||
pos = working_buffer.end() - (current_position - offset);
|
||||
addReaders(lock);
|
||||
return offset;
|
||||
}
|
||||
|
||||
current_position += next_segment.size();
|
||||
}
|
||||
}
|
||||
|
||||
lock.unlock();
|
||||
finishAndWait();
|
||||
|
||||
reader_factory->seek(offset, whence);
|
||||
all_completed = false;
|
||||
read_workers.clear();
|
||||
|
||||
current_position = offset;
|
||||
resetWorkingBuffer();
|
||||
|
||||
emergency_stop = false;
|
||||
|
||||
lock.lock();
|
||||
addReaders(lock);
|
||||
return offset;
|
||||
}
|
||||
|
||||
std::optional<size_t> ParallelReadBuffer::getTotalSize()
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
return reader_factory->getTotalSize();
|
||||
}
|
||||
|
||||
off_t ParallelReadBuffer::getPosition()
|
||||
{
|
||||
return current_position - available();
|
||||
}
|
||||
|
||||
bool ParallelReadBuffer::currentWorkerReady() const
|
||||
{
|
||||
assert(!read_workers.empty());
|
||||
return read_workers.front()->finished || !read_workers.front()->segments.empty();
|
||||
}
|
||||
|
||||
bool ParallelReadBuffer::currentWorkerCompleted() const
|
||||
{
|
||||
assert(!read_workers.empty());
|
||||
return read_workers.front()->finished && read_workers.front()->segments.empty();
|
||||
}
|
||||
|
||||
void ParallelReadBuffer::handleEmergencyStop()
|
||||
{
|
||||
// this can only be called from the main thread when there is an exception
|
||||
assert(background_exception);
|
||||
if (background_exception)
|
||||
std::rethrow_exception(background_exception);
|
||||
}
|
||||
|
||||
bool ParallelReadBuffer::nextImpl()
|
||||
{
|
||||
if (all_completed)
|
||||
return false;
|
||||
|
||||
while (true)
|
||||
{
|
||||
std::unique_lock lock(mutex);
|
||||
next_condvar.wait(
|
||||
lock,
|
||||
[this]()
|
||||
{
|
||||
/// Check if no more readers left or current reader can be processed
|
||||
return emergency_stop || currentWorkerReady();
|
||||
});
|
||||
|
||||
bool worker_removed = false;
|
||||
/// Remove completed units
|
||||
while (!read_workers.empty() && currentWorkerCompleted() && !emergency_stop)
|
||||
{
|
||||
read_workers.pop_front();
|
||||
worker_removed = true;
|
||||
}
|
||||
|
||||
if (emergency_stop)
|
||||
handleEmergencyStop();
|
||||
|
||||
if (worker_removed)
|
||||
addReaders(lock);
|
||||
|
||||
/// All readers processed, stop
|
||||
if (read_workers.empty())
|
||||
{
|
||||
all_completed = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
auto & front_worker = read_workers.front();
|
||||
/// Read data from first segment of the first reader
|
||||
if (!front_worker->segments.empty())
|
||||
{
|
||||
current_segment = front_worker->nextSegment();
|
||||
if (currentWorkerCompleted())
|
||||
{
|
||||
read_workers.pop_front();
|
||||
all_completed = !addReaderToPool(lock) && read_workers.empty();
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
working_buffer = internal_buffer = Buffer(current_segment.data(), current_segment.data() + current_segment.size());
|
||||
current_position += working_buffer.size();
|
||||
return true;
|
||||
}
|
||||
|
||||
void ParallelReadBuffer::readerThreadFunction(ReadWorkerPtr read_worker)
|
||||
{
|
||||
try
|
||||
{
|
||||
while (!emergency_stop && !read_worker->cancel)
|
||||
{
|
||||
if (!read_worker->reader->next())
|
||||
throw Exception("Failed to read all the data from the reader", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
if (emergency_stop || read_worker->cancel)
|
||||
break;
|
||||
|
||||
Buffer buffer = read_worker->reader->buffer();
|
||||
size_t bytes_to_copy = std::min(buffer.size(), read_worker->bytes_left);
|
||||
Segment new_segment(bytes_to_copy, &arena);
|
||||
memcpy(new_segment.data(), buffer.begin(), bytes_to_copy);
|
||||
read_worker->reader->ignore(bytes_to_copy);
|
||||
read_worker->bytes_left -= bytes_to_copy;
|
||||
{
|
||||
/// New data ready to be read
|
||||
std::lock_guard lock(mutex);
|
||||
read_worker->segments.emplace_back(std::move(new_segment));
|
||||
read_worker->finished = read_worker->bytes_left == 0;
|
||||
next_condvar.notify_all();
|
||||
}
|
||||
|
||||
if (read_worker->finished)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
onBackgroundException();
|
||||
}
|
||||
}
|
||||
|
||||
void ParallelReadBuffer::onBackgroundException()
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
if (!background_exception)
|
||||
{
|
||||
background_exception = std::current_exception();
|
||||
}
|
||||
emergency_stop = true;
|
||||
next_condvar.notify_all();
|
||||
}
|
||||
|
||||
void ParallelReadBuffer::finishAndWait()
|
||||
{
|
||||
emergency_stop = true;
|
||||
|
||||
std::unique_lock lock{mutex};
|
||||
readers_done.wait(lock, [&] { return active_working_reader == 0; });
|
||||
}
|
||||
|
||||
}
|
174
src/IO/ParallelReadBuffer.h
Normal file
174
src/IO/ParallelReadBuffer.h
Normal file
@ -0,0 +1,174 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/BufferWithOwnMemory.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <IO/SeekableReadBuffer.h>
|
||||
#include <Common/ArenaWithFreeLists.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/**
|
||||
* Reads from multiple ReadBuffers in parallel.
|
||||
* Preserves order of readers obtained from ReadBufferFactory.
|
||||
*
|
||||
* It consumes multiple readers and yields data from them in order as it passed.
|
||||
* Each working reader save segments of data to internal queue.
|
||||
*
|
||||
* ParallelReadBuffer in nextImpl method take first available segment from first reader in deque and fed it to user.
|
||||
* When first reader finish reading, they will be removed from worker deque and data from next reader consumed.
|
||||
*
|
||||
* Number of working readers limited by max_working_readers.
|
||||
*/
|
||||
class ParallelReadBuffer : public SeekableReadBufferWithSize
|
||||
{
|
||||
private:
|
||||
/// Blocks until data occurred in the first reader or this reader indicate finishing
|
||||
/// Finished readers removed from queue and data from next readers processed
|
||||
bool nextImpl() override;
|
||||
|
||||
class Segment : private boost::noncopyable
|
||||
{
|
||||
public:
|
||||
Segment(size_t size_, SynchronizedArenaWithFreeLists * arena_) : arena(arena_), m_data(arena->alloc(size_)), m_size(size_) { }
|
||||
|
||||
Segment() = default;
|
||||
|
||||
Segment(Segment && other) noexcept : arena(other.arena)
|
||||
{
|
||||
std::swap(m_data, other.m_data);
|
||||
std::swap(m_size, other.m_size);
|
||||
}
|
||||
|
||||
Segment & operator=(Segment && other) noexcept
|
||||
{
|
||||
arena = other.arena;
|
||||
std::swap(m_data, other.m_data);
|
||||
std::swap(m_size, other.m_size);
|
||||
return *this;
|
||||
}
|
||||
|
||||
~Segment()
|
||||
{
|
||||
if (m_data)
|
||||
{
|
||||
arena->free(m_data, m_size);
|
||||
}
|
||||
}
|
||||
|
||||
auto data() const noexcept { return m_data; }
|
||||
auto size() const noexcept { return m_size; }
|
||||
|
||||
private:
|
||||
SynchronizedArenaWithFreeLists * arena{nullptr};
|
||||
char * m_data{nullptr};
|
||||
size_t m_size{0};
|
||||
};
|
||||
|
||||
public:
|
||||
class ReadBufferFactory
|
||||
{
|
||||
public:
|
||||
virtual SeekableReadBufferPtr getReader() = 0;
|
||||
virtual ~ReadBufferFactory() = default;
|
||||
virtual off_t seek(off_t off, int whence) = 0;
|
||||
virtual std::optional<size_t> getTotalSize() = 0;
|
||||
};
|
||||
|
||||
using WorkerSetup = std::function<void(ThreadStatus &)>;
|
||||
using WorkerCleanup = std::function<void(ThreadStatus &)>;
|
||||
explicit ParallelReadBuffer(
|
||||
std::unique_ptr<ReadBufferFactory> reader_factory_,
|
||||
ThreadPool * pool,
|
||||
size_t max_working_readers,
|
||||
WorkerSetup worker_setup = {},
|
||||
WorkerCleanup worker_cleanup = {});
|
||||
|
||||
~ParallelReadBuffer() override { finishAndWait(); }
|
||||
|
||||
off_t seek(off_t off, int whence) override;
|
||||
std::optional<size_t> getTotalSize() override;
|
||||
off_t getPosition() override;
|
||||
|
||||
private:
|
||||
/// Reader in progress with a list of read segments
|
||||
struct ReadWorker
|
||||
{
|
||||
explicit ReadWorker(SeekableReadBufferPtr reader_) : reader(std::move(reader_)), range(reader->getRemainingReadRange())
|
||||
{
|
||||
assert(range.right);
|
||||
bytes_left = *range.right - range.left + 1;
|
||||
}
|
||||
|
||||
Segment nextSegment()
|
||||
{
|
||||
assert(!segments.empty());
|
||||
auto next_segment = std::move(segments.front());
|
||||
segments.pop_front();
|
||||
range.left += next_segment.size();
|
||||
return next_segment;
|
||||
}
|
||||
|
||||
SeekableReadBufferPtr reader;
|
||||
std::deque<Segment> segments;
|
||||
bool finished{false};
|
||||
SeekableReadBuffer::Range range;
|
||||
size_t bytes_left{0};
|
||||
std::atomic_bool cancel{false};
|
||||
};
|
||||
|
||||
using ReadWorkerPtr = std::shared_ptr<ReadWorker>;
|
||||
|
||||
/// First worker in deque have new data or processed all available amount
|
||||
bool currentWorkerReady() const;
|
||||
/// First worker in deque processed and flushed all data
|
||||
bool currentWorkerCompleted() const;
|
||||
|
||||
void handleEmergencyStop();
|
||||
|
||||
void addReaders(std::unique_lock<std::mutex> & buffer_lock);
|
||||
bool addReaderToPool(std::unique_lock<std::mutex> & buffer_lock);
|
||||
|
||||
/// Process read_worker, read data and save into internal segments queue
|
||||
void readerThreadFunction(ReadWorkerPtr read_worker);
|
||||
|
||||
void onBackgroundException();
|
||||
void finishAndWait();
|
||||
|
||||
SynchronizedArenaWithFreeLists arena;
|
||||
|
||||
Segment current_segment;
|
||||
|
||||
ThreadPool * pool;
|
||||
size_t max_working_readers;
|
||||
size_t active_working_reader{0};
|
||||
// Triggered when all reader workers are done
|
||||
std::condition_variable readers_done;
|
||||
|
||||
std::unique_ptr<ReadBufferFactory> reader_factory;
|
||||
|
||||
WorkerSetup worker_setup;
|
||||
WorkerCleanup worker_cleanup;
|
||||
|
||||
/**
|
||||
* FIFO queue of readers.
|
||||
* Each worker contains reader itself and downloaded segments.
|
||||
* When reader read all available data it will be removed from
|
||||
* deque and data from next reader will be consumed to user.
|
||||
*/
|
||||
std::deque<ReadWorkerPtr> read_workers;
|
||||
|
||||
std::mutex mutex;
|
||||
/// Triggered when new data available
|
||||
std::condition_variable next_condvar;
|
||||
|
||||
std::exception_ptr background_exception = nullptr;
|
||||
std::atomic_bool emergency_stop{false};
|
||||
|
||||
off_t current_position{0};
|
||||
|
||||
bool all_completed{false};
|
||||
};
|
||||
|
||||
}
|
@ -1,32 +1,33 @@
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <base/types.h>
|
||||
#include <base/sleep.h>
|
||||
#include <IO/ConnectionTimeouts.h>
|
||||
#include <IO/HTTPCommon.h>
|
||||
#include <IO/ParallelReadBuffer.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <IO/ReadBufferFromIStream.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/ReadSettings.h>
|
||||
#include <base/logger_useful.h>
|
||||
#include <base/sleep.h>
|
||||
#include <base/types.h>
|
||||
#include <Poco/Any.h>
|
||||
#include <Poco/Net/HTTPBasicCredentials.h>
|
||||
#include <Poco/Net/HTTPClientSession.h>
|
||||
#include <Poco/Net/HTTPRequest.h>
|
||||
#include <Poco/Net/HTTPResponse.h>
|
||||
#include <Poco/URI.h>
|
||||
#include <Poco/URIStreamFactory.h>
|
||||
#include <Poco/Version.h>
|
||||
#include <Common/DNSResolver.h>
|
||||
#include <Common/RemoteHostFilter.h>
|
||||
#include <Common/config.h>
|
||||
#include <Common/config_version.h>
|
||||
#include <base/logger_useful.h>
|
||||
#include <Poco/URIStreamFactory.h>
|
||||
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event ReadBufferSeekCancelConnection;
|
||||
extern const Event ReadBufferSeekCancelConnection;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
@ -48,7 +49,7 @@ class UpdatableSessionBase
|
||||
{
|
||||
protected:
|
||||
SessionPtr session;
|
||||
UInt64 redirects { 0 };
|
||||
UInt64 redirects{0};
|
||||
Poco::URI initial_uri;
|
||||
ConnectionTimeouts timeouts;
|
||||
UInt64 max_redirects;
|
||||
@ -56,19 +57,12 @@ protected:
|
||||
public:
|
||||
virtual void buildNewSession(const Poco::URI & uri) = 0;
|
||||
|
||||
explicit UpdatableSessionBase(const Poco::URI uri,
|
||||
const ConnectionTimeouts & timeouts_,
|
||||
UInt64 max_redirects_)
|
||||
: initial_uri { uri }
|
||||
, timeouts { timeouts_ }
|
||||
, max_redirects { max_redirects_ }
|
||||
explicit UpdatableSessionBase(const Poco::URI uri, const ConnectionTimeouts & timeouts_, UInt64 max_redirects_)
|
||||
: initial_uri{uri}, timeouts{timeouts_}, max_redirects{max_redirects_}
|
||||
{
|
||||
}
|
||||
|
||||
SessionPtr getSession()
|
||||
{
|
||||
return session;
|
||||
}
|
||||
SessionPtr getSession() { return session; }
|
||||
|
||||
void updateSession(const Poco::URI & uri)
|
||||
{
|
||||
@ -99,7 +93,7 @@ namespace detail
|
||||
/// HTTP range, including right bound [begin, end].
|
||||
struct Range
|
||||
{
|
||||
size_t begin = 0;
|
||||
std::optional<size_t> begin;
|
||||
std::optional<size_t> end;
|
||||
};
|
||||
|
||||
@ -144,10 +138,9 @@ namespace detail
|
||||
return read_range.begin || read_range.end || retry_with_range_header;
|
||||
}
|
||||
|
||||
size_t getOffset() const
|
||||
{
|
||||
return read_range.begin + offset_from_begin_pos;
|
||||
}
|
||||
size_t getRangeBegin() const { return read_range.begin.value_or(0); }
|
||||
|
||||
size_t getOffset() const { return getRangeBegin() + offset_from_begin_pos; }
|
||||
|
||||
std::istream * callImpl(Poco::URI uri_, Poco::Net::HTTPResponse & response, const std::string & method_)
|
||||
{
|
||||
@ -161,7 +154,7 @@ namespace detail
|
||||
if (out_stream_callback)
|
||||
request.setChunkedTransferEncoding(true);
|
||||
|
||||
for (auto & http_header_entry: http_header_entries)
|
||||
for (auto & http_header_entry : http_header_entries)
|
||||
request.set(std::get<0>(http_header_entry), std::get<1>(http_header_entry));
|
||||
|
||||
if (withPartialContent())
|
||||
@ -207,26 +200,14 @@ namespace detail
|
||||
std::optional<size_t> getTotalSize() override
|
||||
{
|
||||
if (read_range.end)
|
||||
return *read_range.end - read_range.begin;
|
||||
return *read_range.end - getRangeBegin();
|
||||
|
||||
Poco::Net::HTTPResponse response;
|
||||
for (size_t i = 0; i < 10; ++i)
|
||||
{
|
||||
try
|
||||
{
|
||||
call(response, Poco::Net::HTTPRequest::HTTP_HEAD);
|
||||
|
||||
while (isRedirect(response.getStatus()))
|
||||
{
|
||||
Poco::URI uri_redirect(response.get("Location"));
|
||||
if (remote_host_filter)
|
||||
remote_host_filter->checkURL(uri_redirect);
|
||||
|
||||
session->updateSession(uri_redirect);
|
||||
|
||||
istr = callImpl(uri_redirect, response, method);
|
||||
}
|
||||
|
||||
callWithRedirects(response, Poco::Net::HTTPRequest::HTTP_HEAD);
|
||||
break;
|
||||
}
|
||||
catch (const Poco::Exception & e)
|
||||
@ -236,7 +217,7 @@ namespace detail
|
||||
}
|
||||
|
||||
if (response.hasContentLength())
|
||||
read_range.end = read_range.begin + response.getContentLength();
|
||||
read_range.end = getRangeBegin() + response.getContentLength();
|
||||
|
||||
return read_range.end;
|
||||
}
|
||||
@ -252,6 +233,21 @@ namespace detail
|
||||
|
||||
InitializeError initialization_error = InitializeError::NONE;
|
||||
|
||||
private:
|
||||
void setupExternalBuffer()
|
||||
{
|
||||
/**
|
||||
* use_external_buffer -- means we read into the buffer which
|
||||
* was passed to us from somewhere else. We do not check whether
|
||||
* previously returned buffer was read or not (no hasPendingData() check is needed),
|
||||
* because this branch means we are prefetching data,
|
||||
* each nextImpl() call we can fill a different buffer.
|
||||
*/
|
||||
impl->set(internal_buffer.begin(), internal_buffer.size());
|
||||
assert(working_buffer.begin() != nullptr);
|
||||
assert(!internal_buffer.empty());
|
||||
}
|
||||
|
||||
public:
|
||||
using NextCallback = std::function<void(size_t)>;
|
||||
using OutStreamCallback = std::function<void(std::ostream &)>;
|
||||
@ -276,7 +272,7 @@ namespace detail
|
||||
, session {session_}
|
||||
, out_stream_callback {out_stream_callback_}
|
||||
, credentials {credentials_}
|
||||
, http_header_entries {http_header_entries_}
|
||||
, http_header_entries {std::move(http_header_entries_)}
|
||||
, remote_host_filter {remote_host_filter_}
|
||||
, buffer_size {buffer_size_}
|
||||
, use_external_buffer {use_external_buffer_}
|
||||
@ -287,18 +283,21 @@ namespace detail
|
||||
{
|
||||
if (settings.http_max_tries <= 0 || settings.http_retry_initial_backoff_ms <= 0
|
||||
|| settings.http_retry_initial_backoff_ms >= settings.http_retry_max_backoff_ms)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Invalid setting for http backoff, "
|
||||
"must be http_max_tries >= 1 (current is {}) and "
|
||||
"0 < http_retry_initial_backoff_ms < settings.http_retry_max_backoff_ms (now 0 < {} < {})",
|
||||
settings.http_max_tries, settings.http_retry_initial_backoff_ms, settings.http_retry_max_backoff_ms);
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"Invalid setting for http backoff, "
|
||||
"must be http_max_tries >= 1 (current is {}) and "
|
||||
"0 < http_retry_initial_backoff_ms < settings.http_retry_max_backoff_ms (now 0 < {} < {})",
|
||||
settings.http_max_tries,
|
||||
settings.http_retry_initial_backoff_ms,
|
||||
settings.http_retry_max_backoff_ms);
|
||||
|
||||
// Configure User-Agent if it not already set.
|
||||
const std::string user_agent = "User-Agent";
|
||||
auto iter = std::find_if(http_header_entries.begin(), http_header_entries.end(), [&user_agent](const HTTPHeaderEntry & entry)
|
||||
{
|
||||
return std::get<0>(entry) == user_agent;
|
||||
});
|
||||
auto iter = std::find_if(
|
||||
http_header_entries.begin(),
|
||||
http_header_entries.end(),
|
||||
[&user_agent](const HTTPHeaderEntry & entry) { return std::get<0>(entry) == user_agent; });
|
||||
|
||||
if (iter == http_header_entries.end())
|
||||
{
|
||||
@ -313,7 +312,36 @@ namespace detail
|
||||
}
|
||||
}
|
||||
|
||||
void call(Poco::Net::HTTPResponse & response, const String & method_)
|
||||
static bool isRetriableError(const Poco::Net::HTTPResponse::HTTPStatus http_status) noexcept
|
||||
{
|
||||
constexpr std::array non_retriable_errors{
|
||||
Poco::Net::HTTPResponse::HTTPStatus::HTTP_BAD_REQUEST,
|
||||
Poco::Net::HTTPResponse::HTTPStatus::HTTP_UNAUTHORIZED,
|
||||
Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND,
|
||||
Poco::Net::HTTPResponse::HTTPStatus::HTTP_FORBIDDEN,
|
||||
Poco::Net::HTTPResponse::HTTPStatus::HTTP_METHOD_NOT_ALLOWED};
|
||||
|
||||
return std::all_of(
|
||||
non_retriable_errors.begin(), non_retriable_errors.end(), [&](const auto status) { return http_status != status; });
|
||||
}
|
||||
|
||||
void callWithRedirects(Poco::Net::HTTPResponse & response, const String & method_, bool throw_on_all_errors = false)
|
||||
{
|
||||
call(response, method_, throw_on_all_errors);
|
||||
|
||||
while (isRedirect(response.getStatus()))
|
||||
{
|
||||
Poco::URI uri_redirect(response.get("Location"));
|
||||
if (remote_host_filter)
|
||||
remote_host_filter->checkURL(uri_redirect);
|
||||
|
||||
session->updateSession(uri_redirect);
|
||||
|
||||
istr = callImpl(uri_redirect, response, method);
|
||||
}
|
||||
}
|
||||
|
||||
void call(Poco::Net::HTTPResponse & response, const String & method_, bool throw_on_all_errors = false)
|
||||
{
|
||||
try
|
||||
{
|
||||
@ -321,18 +349,18 @@ namespace detail
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (throw_on_all_errors)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
|
||||
auto http_status = response.getStatus();
|
||||
|
||||
if (http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND
|
||||
&& http_skip_not_found_url)
|
||||
if (http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND && http_skip_not_found_url)
|
||||
{
|
||||
initialization_error = InitializeError::SKIP_NOT_FOUND_URL;
|
||||
}
|
||||
else if (http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_BAD_REQUEST
|
||||
|| http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_UNAUTHORIZED
|
||||
|| http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND
|
||||
|| http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_FORBIDDEN
|
||||
|| http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_METHOD_NOT_ALLOWED)
|
||||
else if (!isRetriableError(http_status))
|
||||
{
|
||||
initialization_error = InitializeError::NON_RETRIABLE_ERROR;
|
||||
exception = std::current_exception();
|
||||
@ -372,12 +400,14 @@ namespace detail
|
||||
if (withPartialContent() && response.getStatus() != Poco::Net::HTTPResponse::HTTPStatus::HTTP_PARTIAL_CONTENT)
|
||||
{
|
||||
/// Having `200 OK` instead of `206 Partial Content` is acceptable in case we retried with range.begin == 0.
|
||||
if (read_range.begin)
|
||||
if (read_range.begin && *read_range.begin != 0)
|
||||
{
|
||||
if (!exception)
|
||||
exception = std::make_exception_ptr(
|
||||
Exception(ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE,
|
||||
"Cannot read with range: [{}, {}]", read_range.begin, read_range.end ? *read_range.end : '-'));
|
||||
exception = std::make_exception_ptr(Exception(
|
||||
ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE,
|
||||
"Cannot read with range: [{}, {}]",
|
||||
*read_range.begin,
|
||||
read_range.end ? *read_range.end : '-'));
|
||||
|
||||
initialization_error = InitializeError::NON_RETRIABLE_ERROR;
|
||||
return;
|
||||
@ -386,12 +416,12 @@ namespace detail
|
||||
{
|
||||
/// We could have range.begin == 0 and range.end != 0 in case of DiskWeb and failing to read with partial content
|
||||
/// will affect only performance, so a warning is enough.
|
||||
LOG_WARNING(log, "Unable to read with range header: [{}, {}]", read_range.begin, *read_range.end);
|
||||
LOG_WARNING(log, "Unable to read with range header: [{}, {}]", getRangeBegin(), *read_range.end);
|
||||
}
|
||||
}
|
||||
|
||||
if (!offset_from_begin_pos && !read_range.end && response.hasContentLength())
|
||||
read_range.end = read_range.begin + response.getContentLength();
|
||||
read_range.end = getRangeBegin() + response.getContentLength();
|
||||
|
||||
try
|
||||
{
|
||||
@ -399,12 +429,7 @@ namespace detail
|
||||
|
||||
if (use_external_buffer)
|
||||
{
|
||||
/**
|
||||
* See comment 30 lines below.
|
||||
*/
|
||||
impl->set(internal_buffer.begin(), internal_buffer.size());
|
||||
assert(working_buffer.begin() != nullptr);
|
||||
assert(!internal_buffer.empty());
|
||||
setupExternalBuffer();
|
||||
}
|
||||
}
|
||||
catch (const Poco::Exception & e)
|
||||
@ -426,23 +451,17 @@ namespace detail
|
||||
if (next_callback)
|
||||
next_callback(count());
|
||||
|
||||
if (read_range.end && getOffset() == read_range.end.value())
|
||||
if (read_range.end && getOffset() > read_range.end.value())
|
||||
{
|
||||
assert(getOffset() == read_range.end.value() + 1);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (impl)
|
||||
{
|
||||
if (use_external_buffer)
|
||||
{
|
||||
/**
|
||||
* use_external_buffer -- means we read into the buffer which
|
||||
* was passed to us from somewhere else. We do not check whether
|
||||
* previously returned buffer was read or not (no hasPendingData() check is needed),
|
||||
* because this branch means we are prefetching data,
|
||||
* each nextImpl() call we can fill a different buffer.
|
||||
*/
|
||||
impl->set(internal_buffer.begin(), internal_buffer.size());
|
||||
assert(working_buffer.begin() != nullptr);
|
||||
assert(!internal_buffer.empty());
|
||||
setupExternalBuffer();
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -477,10 +496,7 @@ namespace detail
|
||||
|
||||
if (use_external_buffer)
|
||||
{
|
||||
/// See comment 40 lines above.
|
||||
impl->set(internal_buffer.begin(), internal_buffer.size());
|
||||
assert(working_buffer.begin() != nullptr);
|
||||
assert(!internal_buffer.empty());
|
||||
setupExternalBuffer();
|
||||
}
|
||||
}
|
||||
|
||||
@ -498,13 +514,18 @@ namespace detail
|
||||
if (!can_retry_request)
|
||||
throw;
|
||||
|
||||
LOG_ERROR(log,
|
||||
"HTTP request to `{}` failed at try {}/{} with bytes read: {}/{}. "
|
||||
"Error: {}. (Current backoff wait is {}/{} ms)",
|
||||
uri.toString(), i + 1, settings.http_max_tries,
|
||||
getOffset(), read_range.end ? toString(*read_range.end) : "unknown",
|
||||
e.displayText(),
|
||||
milliseconds_to_wait, settings.http_retry_max_backoff_ms);
|
||||
LOG_ERROR(
|
||||
log,
|
||||
"HTTP request to `{}` failed at try {}/{} with bytes read: {}/{}. "
|
||||
"Error: {}. (Current backoff wait is {}/{} ms)",
|
||||
uri.toString(),
|
||||
i + 1,
|
||||
settings.http_max_tries,
|
||||
getOffset(),
|
||||
read_range.end ? toString(*read_range.end) : "unknown",
|
||||
e.displayText(),
|
||||
milliseconds_to_wait,
|
||||
settings.http_retry_max_backoff_ms);
|
||||
|
||||
retry_with_range_header = true;
|
||||
exception = std::current_exception();
|
||||
@ -529,10 +550,7 @@ namespace detail
|
||||
return true;
|
||||
}
|
||||
|
||||
off_t getPosition() override
|
||||
{
|
||||
return getOffset() - available();
|
||||
}
|
||||
off_t getPosition() override { return getOffset() - available(); }
|
||||
|
||||
off_t seek(off_t offset_, int whence) override
|
||||
{
|
||||
@ -540,12 +558,11 @@ namespace detail
|
||||
throw Exception("Only SEEK_SET mode is allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
|
||||
|
||||
if (offset_ < 0)
|
||||
throw Exception("Seek position is out of bounds. Offset: " + std::to_string(offset_), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND);
|
||||
throw Exception(
|
||||
"Seek position is out of bounds. Offset: " + std::to_string(offset_), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND);
|
||||
|
||||
off_t current_offset = getOffset();
|
||||
if (!working_buffer.empty()
|
||||
&& size_t(offset_) >= current_offset - working_buffer.size()
|
||||
&& offset_ < current_offset)
|
||||
if (!working_buffer.empty() && size_t(offset_) >= current_offset - working_buffer.size() && offset_ < current_offset)
|
||||
{
|
||||
pos = working_buffer.end() - (current_offset - offset_);
|
||||
assert(pos >= working_buffer.begin());
|
||||
@ -567,7 +584,6 @@ namespace detail
|
||||
|
||||
if (impl)
|
||||
{
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::ReadBufferSeekCancelConnection);
|
||||
impl.reset();
|
||||
}
|
||||
@ -580,6 +596,8 @@ namespace detail
|
||||
return offset_;
|
||||
}
|
||||
|
||||
SeekableReadBuffer::Range getRemainingReadRange() const override { return {getOffset(), read_range.end}; }
|
||||
|
||||
std::string getResponseCookie(const std::string & name, const std::string & def) const
|
||||
{
|
||||
for (const auto & cookie : cookies)
|
||||
@ -599,10 +617,7 @@ namespace detail
|
||||
next_callback(count());
|
||||
}
|
||||
|
||||
const std::string & getCompressionMethod() const
|
||||
{
|
||||
return content_encoding;
|
||||
}
|
||||
const std::string & getCompressionMethod() const { return content_encoding; }
|
||||
};
|
||||
}
|
||||
|
||||
@ -611,19 +626,50 @@ class UpdatableSession : public UpdatableSessionBase<HTTPSessionPtr>
|
||||
using Parent = UpdatableSessionBase<HTTPSessionPtr>;
|
||||
|
||||
public:
|
||||
UpdatableSession(
|
||||
const Poco::URI uri,
|
||||
const ConnectionTimeouts & timeouts_,
|
||||
const UInt64 max_redirects_)
|
||||
UpdatableSession(const Poco::URI uri, const ConnectionTimeouts & timeouts_, const UInt64 max_redirects_)
|
||||
: Parent(uri, timeouts_, max_redirects_)
|
||||
{
|
||||
session = makeHTTPSession(initial_uri, timeouts);
|
||||
}
|
||||
|
||||
void buildNewSession(const Poco::URI & uri) override
|
||||
void buildNewSession(const Poco::URI & uri) override { session = makeHTTPSession(uri, timeouts); }
|
||||
};
|
||||
|
||||
class RangeGenerator
|
||||
{
|
||||
public:
|
||||
explicit RangeGenerator(size_t total_size_, size_t range_step_, size_t range_start = 0)
|
||||
: from(range_start), range_step(range_step_), total_size(total_size_)
|
||||
{
|
||||
session = makeHTTPSession(uri, timeouts);
|
||||
}
|
||||
|
||||
size_t totalRanges() const { return static_cast<size_t>(round(static_cast<float>(total_size - from) / range_step)); }
|
||||
|
||||
using Range = std::pair<size_t, size_t>;
|
||||
|
||||
// return upper exclusive range of values, i.e. [from_range, to_range>
|
||||
std::optional<Range> nextRange()
|
||||
{
|
||||
if (from >= total_size)
|
||||
{
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
auto to = from + range_step;
|
||||
if (to >= total_size)
|
||||
{
|
||||
to = total_size;
|
||||
}
|
||||
|
||||
Range range{from, to};
|
||||
from = to;
|
||||
return std::move(range);
|
||||
}
|
||||
|
||||
private:
|
||||
size_t from;
|
||||
size_t range_step;
|
||||
size_t total_size;
|
||||
};
|
||||
|
||||
class ReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession>>
|
||||
@ -631,7 +677,7 @@ class ReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<std::
|
||||
using Parent = detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession>>;
|
||||
|
||||
public:
|
||||
ReadWriteBufferFromHTTP(
|
||||
ReadWriteBufferFromHTTP(
|
||||
Poco::URI uri_,
|
||||
const std::string & method_,
|
||||
OutStreamCallback out_stream_callback_,
|
||||
@ -646,14 +692,117 @@ public:
|
||||
bool delay_initialization_ = true,
|
||||
bool use_external_buffer_ = false,
|
||||
bool skip_not_found_url_ = false)
|
||||
: Parent(std::make_shared<UpdatableSession>(uri_, timeouts, max_redirects),
|
||||
uri_, credentials_, method_, out_stream_callback_, buffer_size_,
|
||||
settings_, http_header_entries_, read_range_, remote_host_filter_,
|
||||
delay_initialization_, use_external_buffer_, skip_not_found_url_)
|
||||
: Parent(
|
||||
std::make_shared<UpdatableSession>(uri_, timeouts, max_redirects),
|
||||
uri_,
|
||||
credentials_,
|
||||
method_,
|
||||
out_stream_callback_,
|
||||
buffer_size_,
|
||||
settings_,
|
||||
http_header_entries_,
|
||||
read_range_,
|
||||
remote_host_filter_,
|
||||
delay_initialization_,
|
||||
use_external_buffer_,
|
||||
skip_not_found_url_)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
class RangedReadWriteBufferFromHTTPFactory : public ParallelReadBuffer::ReadBufferFactory
|
||||
{
|
||||
using OutStreamCallback = ReadWriteBufferFromHTTP::OutStreamCallback;
|
||||
|
||||
public:
|
||||
RangedReadWriteBufferFromHTTPFactory(
|
||||
size_t total_object_size_,
|
||||
size_t range_step_,
|
||||
Poco::URI uri_,
|
||||
std::string method_,
|
||||
OutStreamCallback out_stream_callback_,
|
||||
ConnectionTimeouts timeouts_,
|
||||
const Poco::Net::HTTPBasicCredentials & credentials_,
|
||||
UInt64 max_redirects_ = 0,
|
||||
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE,
|
||||
ReadSettings settings_ = {},
|
||||
ReadWriteBufferFromHTTP::HTTPHeaderEntries http_header_entries_ = {},
|
||||
const RemoteHostFilter * remote_host_filter_ = nullptr,
|
||||
bool delay_initialization_ = true,
|
||||
bool use_external_buffer_ = false,
|
||||
bool skip_not_found_url_ = false)
|
||||
: range_generator(total_object_size_, range_step_)
|
||||
, total_object_size(total_object_size_)
|
||||
, range_step(range_step_)
|
||||
, uri(uri_)
|
||||
, method(std::move(method_))
|
||||
, out_stream_callback(out_stream_callback_)
|
||||
, timeouts(std::move(timeouts_))
|
||||
, credentials(credentials_)
|
||||
, max_redirects(max_redirects_)
|
||||
, buffer_size(buffer_size_)
|
||||
, settings(std::move(settings_))
|
||||
, http_header_entries(std::move(http_header_entries_))
|
||||
, remote_host_filter(remote_host_filter_)
|
||||
, delay_initialization(delay_initialization_)
|
||||
, use_external_buffer(use_external_buffer_)
|
||||
, skip_not_found_url(skip_not_found_url_)
|
||||
{
|
||||
}
|
||||
|
||||
SeekableReadBufferPtr getReader() override
|
||||
{
|
||||
const auto next_range = range_generator.nextRange();
|
||||
if (!next_range)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return std::make_shared<ReadWriteBufferFromHTTP>(
|
||||
uri,
|
||||
method,
|
||||
out_stream_callback,
|
||||
timeouts,
|
||||
credentials,
|
||||
max_redirects,
|
||||
buffer_size,
|
||||
settings,
|
||||
http_header_entries,
|
||||
// HTTP Range has inclusive bounds, i.e. [from, to]
|
||||
ReadWriteBufferFromHTTP::Range{next_range->first, next_range->second - 1},
|
||||
remote_host_filter,
|
||||
delay_initialization,
|
||||
use_external_buffer,
|
||||
skip_not_found_url);
|
||||
}
|
||||
|
||||
off_t seek(off_t off, [[maybe_unused]] int whence) override
|
||||
{
|
||||
range_generator = RangeGenerator{total_object_size, range_step, static_cast<size_t>(off)};
|
||||
return off;
|
||||
}
|
||||
|
||||
std::optional<size_t> getTotalSize() override { return total_object_size; }
|
||||
|
||||
private:
|
||||
RangeGenerator range_generator;
|
||||
size_t total_object_size;
|
||||
size_t range_step;
|
||||
Poco::URI uri;
|
||||
std::string method;
|
||||
OutStreamCallback out_stream_callback;
|
||||
ConnectionTimeouts timeouts;
|
||||
const Poco::Net::HTTPBasicCredentials & credentials;
|
||||
UInt64 max_redirects;
|
||||
size_t buffer_size;
|
||||
ReadSettings settings;
|
||||
ReadWriteBufferFromHTTP::HTTPHeaderEntries http_header_entries;
|
||||
const RemoteHostFilter * remote_host_filter;
|
||||
bool delay_initialization;
|
||||
bool use_external_buffer;
|
||||
bool skip_not_found_url;
|
||||
};
|
||||
|
||||
class UpdatablePooledSession : public UpdatableSessionBase<PooledHTTPSessionPtr>
|
||||
{
|
||||
using Parent = UpdatableSessionBase<PooledHTTPSessionPtr>;
|
||||
@ -662,20 +811,14 @@ private:
|
||||
size_t per_endpoint_pool_size;
|
||||
|
||||
public:
|
||||
explicit UpdatablePooledSession(const Poco::URI uri,
|
||||
const ConnectionTimeouts & timeouts_,
|
||||
const UInt64 max_redirects_,
|
||||
size_t per_endpoint_pool_size_)
|
||||
: Parent(uri, timeouts_, max_redirects_)
|
||||
, per_endpoint_pool_size { per_endpoint_pool_size_ }
|
||||
explicit UpdatablePooledSession(
|
||||
const Poco::URI uri, const ConnectionTimeouts & timeouts_, const UInt64 max_redirects_, size_t per_endpoint_pool_size_)
|
||||
: Parent(uri, timeouts_, max_redirects_), per_endpoint_pool_size{per_endpoint_pool_size_}
|
||||
{
|
||||
session = makePooledHTTPSession(initial_uri, timeouts, per_endpoint_pool_size);
|
||||
}
|
||||
|
||||
void buildNewSession(const Poco::URI & uri) override
|
||||
{
|
||||
session = makePooledHTTPSession(uri, timeouts, per_endpoint_pool_size);
|
||||
}
|
||||
void buildNewSession(const Poco::URI & uri) override { session = makePooledHTTPSession(uri, timeouts, per_endpoint_pool_size); }
|
||||
};
|
||||
|
||||
class PooledReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatablePooledSession>>
|
||||
@ -683,7 +826,8 @@ class PooledReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase
|
||||
using Parent = detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatablePooledSession>>;
|
||||
|
||||
public:
|
||||
explicit PooledReadWriteBufferFromHTTP(Poco::URI uri_,
|
||||
explicit PooledReadWriteBufferFromHTTP(
|
||||
Poco::URI uri_,
|
||||
const std::string & method_ = {},
|
||||
OutStreamCallback out_stream_callback_ = {},
|
||||
const ConnectionTimeouts & timeouts_ = {},
|
||||
@ -691,12 +835,13 @@ public:
|
||||
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE,
|
||||
const UInt64 max_redirects = 0,
|
||||
size_t max_connections_per_endpoint = DEFAULT_COUNT_OF_HTTP_CONNECTIONS_PER_ENDPOINT)
|
||||
: Parent(std::make_shared<UpdatablePooledSession>(uri_, timeouts_, max_redirects, max_connections_per_endpoint),
|
||||
uri_,
|
||||
credentials_,
|
||||
method_,
|
||||
out_stream_callback_,
|
||||
buffer_size_)
|
||||
: Parent(
|
||||
std::make_shared<UpdatablePooledSession>(uri_, timeouts_, max_redirects, max_connections_per_endpoint),
|
||||
uri_,
|
||||
credentials_,
|
||||
method_,
|
||||
out_stream_callback_,
|
||||
buffer_size_)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
@ -359,6 +359,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
table_lock.reset();
|
||||
table_id = StorageID::createEmpty();
|
||||
metadata_snapshot = nullptr;
|
||||
storage_snapshot = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -41,6 +41,57 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
class StorageWithComment : public IAST
|
||||
{
|
||||
public:
|
||||
ASTPtr storage;
|
||||
ASTPtr comment;
|
||||
|
||||
String getID(char) const override { return "Storage with comment definition"; }
|
||||
|
||||
ASTPtr clone() const override
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method clone is not supported");
|
||||
}
|
||||
|
||||
void formatImpl(const FormatSettings &, FormatState &, FormatStateStacked) const override
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported");
|
||||
}
|
||||
};
|
||||
|
||||
class ParserStorageWithComment : public IParserBase
|
||||
{
|
||||
protected:
|
||||
const char * getName() const override { return "storage definition with comment"; }
|
||||
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override
|
||||
{
|
||||
ParserStorage storage_p;
|
||||
ASTPtr storage;
|
||||
|
||||
if (!storage_p.parse(pos, storage, expected))
|
||||
return false;
|
||||
|
||||
ParserKeyword s_comment("COMMENT");
|
||||
ParserStringLiteral string_literal_parser;
|
||||
ASTPtr comment;
|
||||
|
||||
if (s_comment.ignore(pos, expected))
|
||||
string_literal_parser.parse(pos, comment, expected);
|
||||
|
||||
auto storage_with_comment = std::make_shared<StorageWithComment>();
|
||||
storage_with_comment->storage = std::move(storage);
|
||||
storage_with_comment->comment = std::move(comment);
|
||||
|
||||
node = storage_with_comment;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -102,8 +153,9 @@ std::shared_ptr<TSystemLog> createSystemLog(
|
||||
engine += " TTL " + ttl;
|
||||
engine += " ORDER BY (event_date, event_time)";
|
||||
}
|
||||
|
||||
// Validate engine definition grammatically to prevent some configuration errors
|
||||
ParserStorage storage_parser;
|
||||
ParserStorageWithComment storage_parser;
|
||||
parseQuery(storage_parser, engine.data(), engine.data() + engine.size(),
|
||||
"Storage to create table for " + config_prefix, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
||||
|
||||
@ -450,7 +502,6 @@ void SystemLog<LogElement>::prepareTable()
|
||||
is_prepared = true;
|
||||
}
|
||||
|
||||
|
||||
template <typename LogElement>
|
||||
ASTPtr SystemLog<LogElement>::getCreateTableQuery()
|
||||
{
|
||||
@ -465,11 +516,16 @@ ASTPtr SystemLog<LogElement>::getCreateTableQuery()
|
||||
new_columns_list->set(new_columns_list->columns, InterpreterCreateQuery::formatColumns(ordinary_columns, alias_columns));
|
||||
create->set(create->columns_list, new_columns_list);
|
||||
|
||||
ParserStorage storage_parser;
|
||||
ASTPtr storage_ast = parseQuery(
|
||||
ParserStorageWithComment storage_parser;
|
||||
|
||||
ASTPtr storage_with_comment_ast = parseQuery(
|
||||
storage_parser, storage_def.data(), storage_def.data() + storage_def.size(),
|
||||
"Storage to create table for " + LogElement::name(), 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
||||
create->set(create->storage, storage_ast);
|
||||
|
||||
StorageWithComment & storage_with_comment = storage_with_comment_ast->as<StorageWithComment &>();
|
||||
|
||||
create->set(create->storage, storage_with_comment.storage);
|
||||
create->set(create->comment, storage_with_comment.comment);
|
||||
|
||||
/// Write additional (default) settings for MergeTree engine to make it make it possible to compare ASTs
|
||||
/// and recreate tables on settings changes.
|
||||
|
@ -7,16 +7,14 @@ import string
|
||||
TOKEN_TEXT = 1
|
||||
TOKEN_VAR = 2
|
||||
|
||||
TOKEN_COLON = ':'
|
||||
TOKEN_SEMI = ';'
|
||||
TOKEN_OR = '|'
|
||||
TOKEN_QUESTIONMARK = '?'
|
||||
TOKEN_ROUND_BRACKET_OPEN = '('
|
||||
TOKEN_ROUND_BRACKET_CLOSE = ')'
|
||||
TOKEN_ASTERISK = '*'
|
||||
TOKEN_SLASH = '/'
|
||||
|
||||
|
||||
TOKEN_COLON = ":"
|
||||
TOKEN_SEMI = ";"
|
||||
TOKEN_OR = "|"
|
||||
TOKEN_QUESTIONMARK = "?"
|
||||
TOKEN_ROUND_BRACKET_OPEN = "("
|
||||
TOKEN_ROUND_BRACKET_CLOSE = ")"
|
||||
TOKEN_ASTERISK = "*"
|
||||
TOKEN_SLASH = "/"
|
||||
|
||||
|
||||
class TextValue:
|
||||
@ -27,9 +25,9 @@ class TextValue:
|
||||
def get_slug(self):
|
||||
if self.slug is not None:
|
||||
return self.slug
|
||||
slug = ''
|
||||
slug = ""
|
||||
for c in self.t:
|
||||
slug += c if c in string.ascii_letters else '_'
|
||||
slug += c if c in string.ascii_letters else "_"
|
||||
self.slug = slug
|
||||
return slug
|
||||
|
||||
@ -37,12 +35,12 @@ class TextValue:
|
||||
return f"TextValue_{self.get_slug()}"
|
||||
|
||||
def __repr__(self):
|
||||
return f"TextValue(\"{self.t}\")"
|
||||
return f'TextValue("{self.t}")'
|
||||
|
||||
|
||||
class Var:
|
||||
def __init__(self, id_):
|
||||
self.id_ = id_
|
||||
self.id_ = id_
|
||||
|
||||
def __repr__(self):
|
||||
return f"Var({self.id_})"
|
||||
@ -59,8 +57,8 @@ class Parser:
|
||||
self.cur_tok = None
|
||||
self.includes = []
|
||||
|
||||
self.proto = ''
|
||||
self.cpp = ''
|
||||
self.proto = ""
|
||||
self.cpp = ""
|
||||
|
||||
def parse_file(self, filename):
|
||||
with open(filename) as f:
|
||||
@ -81,7 +79,7 @@ class Parser:
|
||||
if self.text[0] == '"':
|
||||
return self.parse_txt_value()
|
||||
|
||||
if self.text[0] == '$':
|
||||
if self.text[0] == "$":
|
||||
return self.parse_var_value()
|
||||
|
||||
c, self.text = self.text[0], self.text[1:]
|
||||
@ -89,9 +87,9 @@ class Parser:
|
||||
return c
|
||||
|
||||
def parse_var_value(self):
|
||||
i = self.text.find(' ')
|
||||
i = self.text.find(" ")
|
||||
|
||||
id_, self.text = self.text[1:i], self.text[i+1:]
|
||||
id_, self.text = self.text[1:i], self.text[i + 1 :]
|
||||
self.var_id = int(id_)
|
||||
self.cur_tok = TOKEN_VAR
|
||||
return TOKEN_VAR
|
||||
@ -100,12 +98,12 @@ class Parser:
|
||||
if self.text[0] != '"':
|
||||
raise Exception("parse_txt_value: expected quote at the start")
|
||||
|
||||
self.t = ''
|
||||
self.t = ""
|
||||
self.text = self.text[1:]
|
||||
|
||||
while self.text[0] != '"':
|
||||
if self.text[0] == '\\':
|
||||
if self.text[1] == 'x':
|
||||
if self.text[0] == "\\":
|
||||
if self.text[1] == "x":
|
||||
self.t += self.text[:4]
|
||||
self.text = self.text[4:]
|
||||
elif self.text[1] in 'nt\\"':
|
||||
@ -123,7 +121,7 @@ class Parser:
|
||||
|
||||
def skip_ws(self):
|
||||
while self.text and self.text[0] in string.whitespace:
|
||||
if self.text[0] == '\n':
|
||||
if self.text[0] == "\n":
|
||||
self.line += 1
|
||||
self.col = 0
|
||||
self.text = self.text[1:]
|
||||
@ -134,10 +132,9 @@ class Parser:
|
||||
|
||||
def skip_line(self):
|
||||
self.line += 1
|
||||
index = self.text.find('\n')
|
||||
index = self.text.find("\n")
|
||||
self.text = self.text[index:]
|
||||
|
||||
|
||||
def parse_statement(self):
|
||||
if self.skip_ws() is None:
|
||||
return None
|
||||
@ -164,52 +161,54 @@ class Parser:
|
||||
|
||||
def generate(self):
|
||||
self.proto = 'syntax = "proto3";\n\n'
|
||||
self.cpp = '#include <iostream>\n#include <string>\n#include <vector>\n\n#include <libfuzzer/libfuzzer_macro.h>\n\n'
|
||||
self.cpp = "#include <iostream>\n#include <string>\n#include <vector>\n\n#include <libfuzzer/libfuzzer_macro.h>\n\n"
|
||||
|
||||
for incl_file in self.includes:
|
||||
self.cpp += f'#include "{incl_file}"\n'
|
||||
self.cpp += '\n'
|
||||
self.cpp += "\n"
|
||||
|
||||
self.proto += 'message Word {\n'
|
||||
self.proto += '\tenum Value {\n'
|
||||
self.proto += "message Word {\n"
|
||||
self.proto += "\tenum Value {\n"
|
||||
|
||||
self.cpp += 'void GenerateWord(const Word&, std::string&, int);\n\n'
|
||||
self.cpp += "void GenerateWord(const Word&, std::string&, int);\n\n"
|
||||
|
||||
self.cpp += 'void GenerateSentence(const Sentence& stc, std::string &s, int depth) {\n'
|
||||
self.cpp += '\tfor (int i = 0; i < stc.words_size(); i++ ) {\n'
|
||||
self.cpp += '\t\tGenerateWord(stc.words(i), s, ++depth);\n'
|
||||
self.cpp += '\t}\n'
|
||||
self.cpp += '}\n'
|
||||
self.cpp += (
|
||||
"void GenerateSentence(const Sentence& stc, std::string &s, int depth) {\n"
|
||||
)
|
||||
self.cpp += "\tfor (int i = 0; i < stc.words_size(); i++ ) {\n"
|
||||
self.cpp += "\t\tGenerateWord(stc.words(i), s, ++depth);\n"
|
||||
self.cpp += "\t}\n"
|
||||
self.cpp += "}\n"
|
||||
|
||||
self.cpp += 'void GenerateWord(const Word& word, std::string &s, int depth) {\n'
|
||||
self.cpp += "void GenerateWord(const Word& word, std::string &s, int depth) {\n"
|
||||
|
||||
self.cpp += '\tif (depth > 5) return;\n\n'
|
||||
self.cpp += '\tswitch (word.value()) {\n'
|
||||
self.cpp += "\tif (depth > 5) return;\n\n"
|
||||
self.cpp += "\tswitch (word.value()) {\n"
|
||||
|
||||
for idx, chain in enumerate(self.chains):
|
||||
self.proto += f'\t\tvalue_{idx} = {idx};\n'
|
||||
self.proto += f"\t\tvalue_{idx} = {idx};\n"
|
||||
|
||||
self.cpp += f'\t\tcase {idx}: {{\n'
|
||||
self.cpp += f"\t\tcase {idx}: {{\n"
|
||||
num_var = 0
|
||||
for item in chain:
|
||||
if isinstance(item, TextValue):
|
||||
self.cpp += f'\t\t\ts += "{item.t}";\n'
|
||||
elif isinstance(item, Var):
|
||||
self.cpp += f'\t\t\tif (word.inner().words_size() > {num_var})\t\t\t\tGenerateWord(word.inner().words({num_var}), s, ++depth);\n'
|
||||
self.cpp += f"\t\t\tif (word.inner().words_size() > {num_var})\t\t\t\tGenerateWord(word.inner().words({num_var}), s, ++depth);\n"
|
||||
num_var += 1
|
||||
else:
|
||||
raise Exception("unknown token met during generation")
|
||||
self.cpp += '\t\t\tbreak;\n\t\t}\n'
|
||||
self.cpp += '\t\tdefault: break;\n'
|
||||
self.cpp += "\t\t\tbreak;\n\t\t}\n"
|
||||
self.cpp += "\t\tdefault: break;\n"
|
||||
|
||||
self.cpp += '\t}\n'
|
||||
self.cpp += "\t}\n"
|
||||
|
||||
self.proto += '\t}\n'
|
||||
self.proto += '\tValue value = 1;\n'
|
||||
self.proto += '\tSentence inner = 2;\n'
|
||||
self.proto += '}\nmessage Sentence {\n\trepeated Word words = 1;\n}'
|
||||
self.proto += "\t}\n"
|
||||
self.proto += "\tValue value = 1;\n"
|
||||
self.proto += "\tSentence inner = 2;\n"
|
||||
self.proto += "}\nmessage Sentence {\n\trepeated Word words = 1;\n}"
|
||||
|
||||
self.cpp += '}\n'
|
||||
self.cpp += "}\n"
|
||||
return self.cpp, self.proto
|
||||
|
||||
def fatal_parsing_error(self, msg):
|
||||
@ -220,7 +219,7 @@ class Parser:
|
||||
def main(args):
|
||||
input_file, outfile_cpp, outfile_proto = args
|
||||
|
||||
if not outfile_proto.endswith('.proto'):
|
||||
if not outfile_proto.endswith(".proto"):
|
||||
raise Exception("outfile_proto (argv[3]) should end with `.proto`")
|
||||
|
||||
include_filename = outfile_proto[:-6] + ".pb.h"
|
||||
@ -231,17 +230,17 @@ def main(args):
|
||||
|
||||
cpp, proto = p.generate()
|
||||
|
||||
proto = proto.replace('\t', ' ' * 4)
|
||||
cpp = cpp.replace('\t', ' ' * 4)
|
||||
proto = proto.replace("\t", " " * 4)
|
||||
cpp = cpp.replace("\t", " " * 4)
|
||||
|
||||
with open(outfile_cpp, 'w') as f:
|
||||
with open(outfile_cpp, "w") as f:
|
||||
f.write(cpp)
|
||||
|
||||
with open(outfile_proto, 'w') as f:
|
||||
with open(outfile_proto, "w") as f:
|
||||
f.write(proto)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 3:
|
||||
print(f"Usage {sys.argv[0]} <input_file> <outfile.cpp> <outfile.proto>")
|
||||
sys.exit(1)
|
||||
|
@ -32,14 +32,14 @@ public:
|
||||
KeeperTCPHandlerFactory(
|
||||
ConfigGetter config_getter_,
|
||||
std::shared_ptr<KeeperDispatcher> keeper_dispatcher_,
|
||||
Poco::Timespan receive_timeout_,
|
||||
Poco::Timespan send_timeout_,
|
||||
uint64_t receive_timeout_seconds,
|
||||
uint64_t send_timeout_seconds,
|
||||
bool secure)
|
||||
: config_getter(config_getter_)
|
||||
, keeper_dispatcher(keeper_dispatcher_)
|
||||
, log(&Poco::Logger::get(std::string{"KeeperTCP"} + (secure ? "S" : "") + "HandlerFactory"))
|
||||
, receive_timeout(receive_timeout_)
|
||||
, send_timeout(send_timeout_)
|
||||
, receive_timeout(/* seconds = */ receive_timeout_seconds, /* microseconds = */ 0)
|
||||
, send_timeout(/* seconds = */ send_timeout_seconds, /* microseconds = */ 0)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -124,7 +124,8 @@ public:
|
||||
/// Initialize to_read_block, which is used to read data from HDFS.
|
||||
for (const auto & name_type : source_info->partition_name_types)
|
||||
{
|
||||
to_read_block.erase(name_type.name);
|
||||
if (to_read_block.has(name_type.name))
|
||||
to_read_block.erase(name_type.name);
|
||||
}
|
||||
|
||||
/// Initialize format settings
|
||||
|
@ -179,8 +179,9 @@ std::unique_ptr<ReadBuffer> createReadBuffer(
|
||||
method = chooseCompressionMethod(current_path, compression_method);
|
||||
}
|
||||
|
||||
/// For clickhouse-local add progress callback to display progress bar.
|
||||
if (context->getApplicationType() == Context::ApplicationType::LOCAL)
|
||||
/// For clickhouse-local and clickhouse-client add progress callback to display progress bar.
|
||||
if (context->getApplicationType() == Context::ApplicationType::LOCAL
|
||||
|| context->getApplicationType() == Context::ApplicationType::CLIENT)
|
||||
{
|
||||
auto & in = static_cast<ReadBufferFromFileDescriptor &>(*nested_buffer);
|
||||
in.setProgressCallback(context);
|
||||
@ -643,7 +644,9 @@ Pipe StorageFile::read(
|
||||
|
||||
/// Set total number of bytes to process. For progress bar.
|
||||
auto progress_callback = context->getFileProgressCallback();
|
||||
if (context->getApplicationType() == Context::ApplicationType::LOCAL && progress_callback)
|
||||
if ((context->getApplicationType() == Context::ApplicationType::LOCAL
|
||||
|| context->getApplicationType() == Context::ApplicationType::CLIENT)
|
||||
&& progress_callback)
|
||||
progress_callback(FileProgress(0, total_bytes_to_read));
|
||||
|
||||
for (size_t i = 0; i < num_streams; ++i)
|
||||
|
@ -3,30 +3,35 @@
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/evaluateConstantExpression.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTInsertQuery.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
|
||||
#include <IO/ConnectionTimeouts.h>
|
||||
#include <IO/ConnectionTimeoutsContext.h>
|
||||
#include <IO/IOThreadPool.h>
|
||||
#include <IO/ParallelReadBuffer.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteBufferFromHTTP.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ConnectionTimeouts.h>
|
||||
#include <IO/ConnectionTimeoutsContext.h>
|
||||
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <Formats/ReadSchemaUtils.h>
|
||||
#include <Processors/Formats/IInputFormat.h>
|
||||
#include <Processors/Formats/IOutputFormat.h>
|
||||
|
||||
#include <Common/parseRemoteDescription.h>
|
||||
#include <Processors/Transforms/AddingDefaultsTransform.h>
|
||||
#include <Storages/PartitionedSink.h>
|
||||
#include "Common/ThreadStatus.h"
|
||||
#include <Common/parseRemoteDescription.h>
|
||||
#include "IO/HTTPCommon.h"
|
||||
#include "IO/ReadWriteBufferFromHTTP.h"
|
||||
|
||||
#include <Poco/Net/HTTPRequest.h>
|
||||
#include <algorithm>
|
||||
#include <Processors/Executors/PullingPipelineExecutor.h>
|
||||
#include <Processors/Sources/SourceWithProgress.h>
|
||||
#include <QueryPipeline/QueryPipelineBuilder.h>
|
||||
#include <Processors/Executors/PullingPipelineExecutor.h>
|
||||
#include <base/logger_useful.h>
|
||||
#include <algorithm>
|
||||
#include <Poco/Net/HTTPRequest.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -43,8 +48,7 @@ namespace ErrorCodes
|
||||
|
||||
static bool urlWithGlobs(const String & uri)
|
||||
{
|
||||
return (uri.find('{') != std::string::npos && uri.find('}') != std::string::npos)
|
||||
|| uri.find('|') != std::string::npos;
|
||||
return (uri.find('{') != std::string::npos && uri.find('}') != std::string::npos) || uri.find('|') != std::string::npos;
|
||||
}
|
||||
|
||||
|
||||
@ -88,8 +92,7 @@ IStorageURLBase::IStorageURLBase(
|
||||
|
||||
namespace
|
||||
{
|
||||
ReadWriteBufferFromHTTP::HTTPHeaderEntries getHeaders(
|
||||
const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers_)
|
||||
ReadWriteBufferFromHTTP::HTTPHeaderEntries getHeaders(const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers_)
|
||||
{
|
||||
ReadWriteBufferFromHTTP::HTTPHeaderEntries headers(headers_.begin(), headers_.end());
|
||||
// Propagate OpenTelemetry trace context, if any, downstream.
|
||||
@ -98,13 +101,11 @@ namespace
|
||||
const auto & thread_trace_context = CurrentThread::get().thread_trace_context;
|
||||
if (thread_trace_context.trace_id != UUID())
|
||||
{
|
||||
headers.emplace_back("traceparent",
|
||||
thread_trace_context.composeTraceparentHeader());
|
||||
headers.emplace_back("traceparent", thread_trace_context.composeTraceparentHeader());
|
||||
|
||||
if (!thread_trace_context.tracestate.empty())
|
||||
{
|
||||
headers.emplace_back("tracestate",
|
||||
thread_trace_context.tracestate);
|
||||
headers.emplace_back("tracestate", thread_trace_context.tracestate);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -114,8 +115,7 @@ namespace
|
||||
|
||||
class StorageURLSource : public SourceWithProgress
|
||||
{
|
||||
|
||||
using URIParams = std::vector<std::pair<String, String>>;
|
||||
using URIParams = std::vector<std::pair<String, String>>;
|
||||
|
||||
public:
|
||||
struct URIInfo
|
||||
@ -160,11 +160,11 @@ namespace
|
||||
UInt64 max_block_size,
|
||||
const ConnectionTimeouts & timeouts,
|
||||
const String & compression_method,
|
||||
size_t download_threads,
|
||||
const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers_ = {},
|
||||
const URIParams & params = {},
|
||||
bool glob_url = false)
|
||||
: SourceWithProgress(sample_block), name(std::move(name_))
|
||||
, uri_info(uri_info_)
|
||||
: SourceWithProgress(sample_block), name(std::move(name_)), uri_info(uri_info_)
|
||||
{
|
||||
auto headers = getHeaders(headers_);
|
||||
|
||||
@ -176,33 +176,40 @@ namespace
|
||||
|
||||
auto first_option = uri_options.begin();
|
||||
read_buf = getFirstAvailableURLReadBuffer(
|
||||
first_option, uri_options.end(), context, params, http_method,
|
||||
callback, timeouts, compression_method, credentials, headers, glob_url, uri_options.size() == 1);
|
||||
first_option,
|
||||
uri_options.end(),
|
||||
context,
|
||||
params,
|
||||
http_method,
|
||||
callback,
|
||||
timeouts,
|
||||
compression_method,
|
||||
credentials,
|
||||
headers,
|
||||
glob_url,
|
||||
uri_options.size() == 1,
|
||||
download_threads);
|
||||
|
||||
auto input_format = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size, format_settings);
|
||||
auto input_format
|
||||
= FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size, format_settings);
|
||||
QueryPipelineBuilder builder;
|
||||
builder.init(Pipe(input_format));
|
||||
|
||||
builder.addSimpleTransform([&](const Block & cur_header)
|
||||
{
|
||||
return std::make_shared<AddingDefaultsTransform>(cur_header, columns, *input_format, context);
|
||||
});
|
||||
builder.addSimpleTransform(
|
||||
[&](const Block & cur_header)
|
||||
{ return std::make_shared<AddingDefaultsTransform>(cur_header, columns, *input_format, context); });
|
||||
|
||||
pipeline = std::make_unique<QueryPipeline>(QueryPipelineBuilder::getPipeline(std::move(builder)));
|
||||
reader = std::make_unique<PullingPipelineExecutor>(*pipeline);
|
||||
};
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
String getName() const override { return name; }
|
||||
|
||||
Chunk generate() override
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
|
||||
if (!reader)
|
||||
{
|
||||
auto current_uri_pos = uri_info->next_uri_to_read.fetch_add(1);
|
||||
@ -239,7 +246,8 @@ namespace
|
||||
Poco::Net::HTTPBasicCredentials & credentials,
|
||||
const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers,
|
||||
bool glob_url,
|
||||
bool delay_initialization)
|
||||
bool delay_initialization,
|
||||
size_t download_threads)
|
||||
{
|
||||
String first_exception_message;
|
||||
ReadSettings read_settings = context->getReadSettings();
|
||||
@ -255,8 +263,137 @@ namespace
|
||||
|
||||
setCredentials(credentials, request_uri);
|
||||
|
||||
const auto settings = context->getSettings();
|
||||
try
|
||||
{
|
||||
if (download_threads > 1)
|
||||
{
|
||||
try
|
||||
{
|
||||
ReadWriteBufferFromHTTP buffer(
|
||||
request_uri,
|
||||
Poco::Net::HTTPRequest::HTTP_HEAD,
|
||||
callback,
|
||||
timeouts,
|
||||
credentials,
|
||||
settings.max_http_get_redirects,
|
||||
DBMS_DEFAULT_BUFFER_SIZE,
|
||||
read_settings,
|
||||
headers,
|
||||
ReadWriteBufferFromHTTP::Range{0, std::nullopt},
|
||||
&context->getRemoteHostFilter(),
|
||||
true,
|
||||
/* use_external_buffer */ false,
|
||||
/* skip_url_not_found_error */ skip_url_not_found_error);
|
||||
|
||||
Poco::Net::HTTPResponse res;
|
||||
|
||||
for (size_t i = 0; i < settings.http_max_tries; ++i)
|
||||
{
|
||||
try
|
||||
{
|
||||
buffer.callWithRedirects(res, Poco::Net::HTTPRequest::HTTP_HEAD, true);
|
||||
break;
|
||||
}
|
||||
catch (const Poco::Exception & e)
|
||||
{
|
||||
LOG_TRACE(
|
||||
&Poco::Logger::get("StorageURLSource"),
|
||||
"HTTP HEAD request to `{}` failed at try {}/{}. "
|
||||
"Error: {}.",
|
||||
request_uri.toString(),
|
||||
i + 1,
|
||||
settings.http_max_tries,
|
||||
e.displayText());
|
||||
if (!ReadWriteBufferFromHTTP::isRetriableError(res.getStatus()))
|
||||
{
|
||||
throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// to check if Range header is supported, we need to send a request with it set
|
||||
const bool supports_ranges = (res.has("Accept-Ranges") && res.get("Accept-Ranges") == "bytes")
|
||||
|| (res.has("Content-Range") && res.get("Content-Range").starts_with("bytes"));
|
||||
LOG_TRACE(
|
||||
&Poco::Logger::get("StorageURLSource"),
|
||||
fmt::runtime(supports_ranges ? "HTTP Range is supported" : "HTTP Range is not supported"));
|
||||
|
||||
|
||||
if (supports_ranges && res.getStatus() == Poco::Net::HTTPResponse::HTTP_PARTIAL_CONTENT
|
||||
&& res.hasContentLength())
|
||||
{
|
||||
LOG_TRACE(
|
||||
&Poco::Logger::get("StorageURLSource"),
|
||||
"Using ParallelReadBuffer with {} workers with chunks of {} bytes",
|
||||
download_threads,
|
||||
settings.max_download_buffer_size);
|
||||
|
||||
auto read_buffer_factory = std::make_unique<RangedReadWriteBufferFromHTTPFactory>(
|
||||
res.getContentLength(),
|
||||
settings.max_download_buffer_size,
|
||||
request_uri,
|
||||
http_method,
|
||||
callback,
|
||||
timeouts,
|
||||
credentials,
|
||||
settings.max_http_get_redirects,
|
||||
DBMS_DEFAULT_BUFFER_SIZE,
|
||||
read_settings,
|
||||
headers,
|
||||
&context->getRemoteHostFilter(),
|
||||
delay_initialization,
|
||||
/* use_external_buffer */ false,
|
||||
/* skip_url_not_found_error */ skip_url_not_found_error);
|
||||
|
||||
ThreadGroupStatusPtr running_group = CurrentThread::isInitialized() && CurrentThread::get().getThreadGroup()
|
||||
? CurrentThread::get().getThreadGroup()
|
||||
: MainThreadStatus::getInstance().getThreadGroup();
|
||||
|
||||
ContextPtr query_context
|
||||
= CurrentThread::isInitialized() ? CurrentThread::get().getQueryContext() : nullptr;
|
||||
|
||||
auto worker_cleanup = [has_running_group = running_group == nullptr](ThreadStatus & thread_status)
|
||||
{
|
||||
if (has_running_group)
|
||||
thread_status.detachQuery(false);
|
||||
};
|
||||
|
||||
auto worker_setup = [query_context = std::move(query_context),
|
||||
running_group = std::move(running_group)](ThreadStatus & thread_status)
|
||||
{
|
||||
/// Save query context if any, because cache implementation needs it.
|
||||
if (query_context)
|
||||
thread_status.attachQueryContext(query_context);
|
||||
|
||||
/// To be able to pass ProfileEvents.
|
||||
if (running_group)
|
||||
thread_status.attachQuery(running_group);
|
||||
};
|
||||
|
||||
|
||||
return wrapReadBufferWithCompressionMethod(
|
||||
std::make_unique<ParallelReadBuffer>(
|
||||
std::move(read_buffer_factory),
|
||||
&IOThreadPool::get(),
|
||||
download_threads,
|
||||
std::move(worker_setup),
|
||||
std::move(worker_cleanup)),
|
||||
chooseCompressionMethod(request_uri.getPath(), compression_method));
|
||||
}
|
||||
}
|
||||
catch (const Poco::Exception & e)
|
||||
{
|
||||
LOG_TRACE(
|
||||
&Poco::Logger::get("StorageURLSource"),
|
||||
"Failed to setup ParallelReadBuffer because of an exception:\n{}.\nFalling back to the single-threaded "
|
||||
"buffer",
|
||||
e.displayText());
|
||||
}
|
||||
}
|
||||
|
||||
LOG_TRACE(&Poco::Logger::get("StorageURLSource"), "Using single-threaded read buffer");
|
||||
|
||||
return wrapReadBufferWithCompressionMethod(
|
||||
std::make_unique<ReadWriteBufferFromHTTP>(
|
||||
request_uri,
|
||||
@ -264,15 +401,15 @@ namespace
|
||||
callback,
|
||||
timeouts,
|
||||
credentials,
|
||||
context->getSettingsRef().max_http_get_redirects,
|
||||
settings.max_http_get_redirects,
|
||||
DBMS_DEFAULT_BUFFER_SIZE,
|
||||
read_settings,
|
||||
headers,
|
||||
ReadWriteBufferFromHTTP::Range{},
|
||||
&context->getRemoteHostFilter(),
|
||||
delay_initialization,
|
||||
/* use_external_buffer */false,
|
||||
/* skip_url_not_found_error */skip_url_not_found_error),
|
||||
/* use_external_buffer */ false,
|
||||
/* skip_url_not_found_error */ skip_url_not_found_error),
|
||||
chooseCompressionMethod(request_uri.getPath(), compression_method));
|
||||
}
|
||||
catch (...)
|
||||
@ -323,10 +460,10 @@ StorageURLSink::StorageURLSink(
|
||||
std::string content_encoding = toContentEncodingName(compression_method);
|
||||
|
||||
write_buf = wrapWriteBufferWithCompressionMethod(
|
||||
std::make_unique<WriteBufferFromHTTP>(Poco::URI(uri), http_method, content_type, content_encoding, timeouts),
|
||||
compression_method, 3);
|
||||
writer = FormatFactory::instance().getOutputFormat(format, *write_buf, sample_block,
|
||||
context, {} /* write callback */, format_settings);
|
||||
std::make_unique<WriteBufferFromHTTP>(Poco::URI(uri), http_method, content_type, content_encoding, timeouts),
|
||||
compression_method,
|
||||
3);
|
||||
writer = FormatFactory::instance().getOutputFormat(format, *write_buf, sample_block, context, {} /* write callback */, format_settings);
|
||||
}
|
||||
|
||||
|
||||
@ -355,15 +492,15 @@ public:
|
||||
const ConnectionTimeouts & timeouts_,
|
||||
const CompressionMethod compression_method_,
|
||||
const String & http_method_)
|
||||
: PartitionedSink(partition_by, context_, sample_block_)
|
||||
, uri(uri_)
|
||||
, format(format_)
|
||||
, format_settings(format_settings_)
|
||||
, sample_block(sample_block_)
|
||||
, context(context_)
|
||||
, timeouts(timeouts_)
|
||||
, compression_method(compression_method_)
|
||||
, http_method(http_method_)
|
||||
: PartitionedSink(partition_by, context_, sample_block_)
|
||||
, uri(uri_)
|
||||
, format(format_)
|
||||
, format_settings(format_settings_)
|
||||
, sample_block(sample_block_)
|
||||
, context(context_)
|
||||
, timeouts(timeouts_)
|
||||
, compression_method(compression_method_)
|
||||
, http_method(http_method_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -371,8 +508,8 @@ public:
|
||||
{
|
||||
auto partition_path = PartitionedSink::replaceWildcards(uri, partition_id);
|
||||
context->getRemoteHostFilter().checkURL(Poco::URI(partition_path));
|
||||
return std::make_shared<StorageURLSink>(partition_path, format,
|
||||
format_settings, sample_block, context, timeouts, compression_method, http_method);
|
||||
return std::make_shared<StorageURLSink>(
|
||||
partition_path, format, format_settings, sample_block, context, timeouts, compression_method, http_method);
|
||||
}
|
||||
|
||||
private:
|
||||
@ -462,7 +599,8 @@ ColumnsDescription IStorageURLBase::getTableStructureFromData(
|
||||
credentials,
|
||||
headers,
|
||||
false,
|
||||
false);
|
||||
false,
|
||||
context->getSettingsRef().max_download_threads);
|
||||
};
|
||||
|
||||
try
|
||||
@ -479,7 +617,10 @@ ColumnsDescription IStorageURLBase::getTableStructureFromData(
|
||||
|
||||
} while (++option < urls_to_check.end());
|
||||
|
||||
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "All attempts to extract table structure from urls failed. Errors:\n{}", exception_messages);
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
|
||||
"All attempts to extract table structure from urls failed. Errors:\n{}",
|
||||
exception_messages);
|
||||
}
|
||||
|
||||
bool IStorageURLBase::isColumnOriented() const
|
||||
@ -512,6 +653,8 @@ Pipe IStorageURLBase::read(
|
||||
block_for_format = storage_snapshot->metadata->getSampleBlock();
|
||||
}
|
||||
|
||||
size_t max_download_threads = local_context->getSettingsRef().max_download_threads;
|
||||
|
||||
if (urlWithGlobs(uri))
|
||||
{
|
||||
size_t max_addresses = local_context->getSettingsRef().glob_expansion_max_elements;
|
||||
@ -528,14 +671,13 @@ Pipe IStorageURLBase::read(
|
||||
Pipes pipes;
|
||||
pipes.reserve(num_streams);
|
||||
|
||||
size_t download_threads = num_streams >= max_download_threads ? 1 : (max_download_threads / num_streams);
|
||||
for (size_t i = 0; i < num_streams; ++i)
|
||||
{
|
||||
pipes.emplace_back(std::make_shared<StorageURLSource>(
|
||||
uri_info,
|
||||
getReadMethod(),
|
||||
getReadPOSTDataCallback(
|
||||
column_names, columns_description, query_info,
|
||||
local_context, processed_stage, max_block_size),
|
||||
getReadPOSTDataCallback(column_names, columns_description, query_info, local_context, processed_stage, max_block_size),
|
||||
format_name,
|
||||
format_settings,
|
||||
getName(),
|
||||
@ -544,7 +686,11 @@ Pipe IStorageURLBase::read(
|
||||
columns_description,
|
||||
max_block_size,
|
||||
ConnectionTimeouts::getHTTPTimeouts(local_context),
|
||||
compression_method, headers, params, /* glob_url */true));
|
||||
compression_method,
|
||||
download_threads,
|
||||
headers,
|
||||
params,
|
||||
/* glob_url */ true));
|
||||
}
|
||||
return Pipe::unitePipes(std::move(pipes));
|
||||
}
|
||||
@ -555,9 +701,7 @@ Pipe IStorageURLBase::read(
|
||||
return Pipe(std::make_shared<StorageURLSource>(
|
||||
uri_info,
|
||||
getReadMethod(),
|
||||
getReadPOSTDataCallback(
|
||||
column_names, columns_description, query_info,
|
||||
local_context, processed_stage, max_block_size),
|
||||
getReadPOSTDataCallback(column_names, columns_description, query_info, local_context, processed_stage, max_block_size),
|
||||
format_name,
|
||||
format_settings,
|
||||
getName(),
|
||||
@ -566,7 +710,10 @@ Pipe IStorageURLBase::read(
|
||||
columns_description,
|
||||
max_block_size,
|
||||
ConnectionTimeouts::getHTTPTimeouts(local_context),
|
||||
compression_method, headers, params));
|
||||
compression_method,
|
||||
max_download_threads,
|
||||
headers,
|
||||
params));
|
||||
}
|
||||
}
|
||||
|
||||
@ -598,12 +745,10 @@ Pipe StorageURLWithFailover::read(
|
||||
|
||||
auto uri_info = std::make_shared<StorageURLSource::URIInfo>();
|
||||
uri_info->uri_list_to_read.emplace_back(uri_options);
|
||||
auto pipe = Pipe(std::make_shared<StorageURLSource>(
|
||||
auto pipe = Pipe(std::make_shared<StorageURLSource>(
|
||||
uri_info,
|
||||
getReadMethod(),
|
||||
getReadPOSTDataCallback(
|
||||
column_names, columns_description, query_info,
|
||||
local_context, processed_stage, max_block_size),
|
||||
getReadPOSTDataCallback(column_names, columns_description, query_info, local_context, processed_stage, max_block_size),
|
||||
format_name,
|
||||
format_settings,
|
||||
getName(),
|
||||
@ -612,7 +757,10 @@ Pipe StorageURLWithFailover::read(
|
||||
columns_description,
|
||||
max_block_size,
|
||||
ConnectionTimeouts::getHTTPTimeouts(local_context),
|
||||
compression_method, headers, params));
|
||||
compression_method,
|
||||
local_context->getSettingsRef().max_download_threads,
|
||||
headers,
|
||||
params));
|
||||
std::shuffle(uri_options.begin(), uri_options.end(), thread_local_rng);
|
||||
return pipe;
|
||||
}
|
||||
@ -632,17 +780,26 @@ SinkToStoragePtr IStorageURLBase::write(const ASTPtr & query, const StorageMetad
|
||||
{
|
||||
return std::make_shared<PartitionedStorageURLSink>(
|
||||
partition_by_ast,
|
||||
uri, format_name,
|
||||
format_settings, metadata_snapshot->getSampleBlock(), context,
|
||||
uri,
|
||||
format_name,
|
||||
format_settings,
|
||||
metadata_snapshot->getSampleBlock(),
|
||||
context,
|
||||
ConnectionTimeouts::getHTTPTimeouts(context),
|
||||
chooseCompressionMethod(uri, compression_method), http_method);
|
||||
chooseCompressionMethod(uri, compression_method),
|
||||
http_method);
|
||||
}
|
||||
else
|
||||
{
|
||||
return std::make_shared<StorageURLSink>(uri, format_name,
|
||||
format_settings, metadata_snapshot->getSampleBlock(), context,
|
||||
return std::make_shared<StorageURLSink>(
|
||||
uri,
|
||||
format_name,
|
||||
format_settings,
|
||||
metadata_snapshot->getSampleBlock(),
|
||||
context,
|
||||
ConnectionTimeouts::getHTTPTimeouts(context),
|
||||
chooseCompressionMethod(uri, compression_method), http_method);
|
||||
chooseCompressionMethod(uri, compression_method),
|
||||
http_method);
|
||||
}
|
||||
}
|
||||
|
||||
@ -659,8 +816,19 @@ StorageURL::StorageURL(
|
||||
const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers_,
|
||||
const String & http_method_,
|
||||
ASTPtr partition_by_)
|
||||
: IStorageURLBase(uri_, context_, table_id_, format_name_, format_settings_,
|
||||
columns_, constraints_, comment, compression_method_, headers_, http_method_, partition_by_)
|
||||
: IStorageURLBase(
|
||||
uri_,
|
||||
context_,
|
||||
table_id_,
|
||||
format_name_,
|
||||
format_settings_,
|
||||
columns_,
|
||||
constraints_,
|
||||
comment,
|
||||
compression_method_,
|
||||
headers_,
|
||||
http_method_,
|
||||
partition_by_)
|
||||
{
|
||||
context_->getRemoteHostFilter().checkURL(Poco::URI(uri));
|
||||
}
|
||||
@ -711,8 +879,7 @@ FormatSettings StorageURL::getFormatSettingsFromArgs(const StorageFactory::Argum
|
||||
// Apply changes from SETTINGS clause, with validation.
|
||||
user_format_settings.applyChanges(args.storage_def->settings->changes);
|
||||
|
||||
format_settings = getFormatSettings(args.getContext(),
|
||||
user_format_settings);
|
||||
format_settings = getFormatSettings(args.getContext(), user_format_settings);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -731,12 +898,12 @@ URLBasedDataSourceConfiguration StorageURL::getConfiguration(ASTs & args, Contex
|
||||
auto [common_configuration, storage_specific_args] = named_collection.value();
|
||||
configuration.set(common_configuration);
|
||||
|
||||
if (!configuration.http_method.empty()
|
||||
&& configuration.http_method != Poco::Net::HTTPRequest::HTTP_POST
|
||||
if (!configuration.http_method.empty() && configuration.http_method != Poco::Net::HTTPRequest::HTTP_POST
|
||||
&& configuration.http_method != Poco::Net::HTTPRequest::HTTP_PUT)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Http method can be POST or PUT (current: {}). For insert default is POST, for select GET",
|
||||
configuration.http_method);
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"Http method can be POST or PUT (current: {}). For insert default is POST, for select GET",
|
||||
configuration.http_method);
|
||||
|
||||
if (!storage_specific_args.empty())
|
||||
{
|
||||
@ -754,7 +921,8 @@ URLBasedDataSourceConfiguration StorageURL::getConfiguration(ASTs & args, Contex
|
||||
{
|
||||
if (args.empty() || args.size() > 3)
|
||||
throw Exception(
|
||||
"Storage URL requires 1, 2 or 3 arguments: url, name of used format (taken from file extension by default) and optional compression method.",
|
||||
"Storage URL requires 1, 2 or 3 arguments: url, name of used format (taken from file extension by default) and optional "
|
||||
"compression method.",
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
for (auto & arg : args)
|
||||
@ -776,43 +944,45 @@ URLBasedDataSourceConfiguration StorageURL::getConfiguration(ASTs & args, Contex
|
||||
|
||||
void registerStorageURL(StorageFactory & factory)
|
||||
{
|
||||
factory.registerStorage("URL", [](const StorageFactory::Arguments & args)
|
||||
{
|
||||
ASTs & engine_args = args.engine_args;
|
||||
auto configuration = StorageURL::getConfiguration(engine_args, args.getLocalContext());
|
||||
auto format_settings = StorageURL::getFormatSettingsFromArgs(args);
|
||||
|
||||
ReadWriteBufferFromHTTP::HTTPHeaderEntries headers;
|
||||
for (const auto & [header, value] : configuration.headers)
|
||||
factory.registerStorage(
|
||||
"URL",
|
||||
[](const StorageFactory::Arguments & args)
|
||||
{
|
||||
auto value_literal = value.safeGet<String>();
|
||||
if (header == "Range")
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Range headers are not allowed");
|
||||
headers.emplace_back(std::make_pair(header, value_literal));
|
||||
}
|
||||
ASTs & engine_args = args.engine_args;
|
||||
auto configuration = StorageURL::getConfiguration(engine_args, args.getLocalContext());
|
||||
auto format_settings = StorageURL::getFormatSettingsFromArgs(args);
|
||||
|
||||
ASTPtr partition_by;
|
||||
if (args.storage_def->partition_by)
|
||||
partition_by = args.storage_def->partition_by->clone();
|
||||
ReadWriteBufferFromHTTP::HTTPHeaderEntries headers;
|
||||
for (const auto & [header, value] : configuration.headers)
|
||||
{
|
||||
auto value_literal = value.safeGet<String>();
|
||||
if (header == "Range")
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Range headers are not allowed");
|
||||
headers.emplace_back(std::make_pair(header, value_literal));
|
||||
}
|
||||
|
||||
return StorageURL::create(
|
||||
configuration.url,
|
||||
args.table_id,
|
||||
configuration.format,
|
||||
format_settings,
|
||||
args.columns,
|
||||
args.constraints,
|
||||
args.comment,
|
||||
args.getContext(),
|
||||
configuration.compression_method,
|
||||
headers,
|
||||
configuration.http_method,
|
||||
partition_by);
|
||||
},
|
||||
{
|
||||
.supports_settings = true,
|
||||
.supports_schema_inference = true,
|
||||
.source_access_type = AccessType::URL,
|
||||
});
|
||||
ASTPtr partition_by;
|
||||
if (args.storage_def->partition_by)
|
||||
partition_by = args.storage_def->partition_by->clone();
|
||||
|
||||
return StorageURL::create(
|
||||
configuration.url,
|
||||
args.table_id,
|
||||
configuration.format,
|
||||
format_settings,
|
||||
args.columns,
|
||||
args.constraints,
|
||||
args.comment,
|
||||
args.getContext(),
|
||||
configuration.compression_method,
|
||||
headers,
|
||||
configuration.http_method,
|
||||
partition_by);
|
||||
},
|
||||
{
|
||||
.supports_settings = true,
|
||||
.supports_schema_inference = true,
|
||||
.source_access_type = AccessType::URL,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeNested.h>
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
#include <DataTypes/DataTypeUUID.h>
|
||||
#include <Storages/VirtualColumnUtils.h>
|
||||
@ -64,7 +65,11 @@ StorageSystemPartsColumns::StorageSystemPartsColumns(const StorageID & table_id_
|
||||
{"serialization_kind", std::make_shared<DataTypeString>()},
|
||||
{"subcolumns.names", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
|
||||
{"subcolumns.types", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
|
||||
{"subcolumns.serializations", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())}
|
||||
{"subcolumns.serializations", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
|
||||
{"subcolumns.bytes_on_disk", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())},
|
||||
{"subcolumns.data_compressed_bytes", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())},
|
||||
{"subcolumns.data_uncompressed_bytes", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())},
|
||||
{"subcolumns.marks_bytes", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())},
|
||||
}
|
||||
)
|
||||
{
|
||||
@ -228,13 +233,43 @@ void StorageSystemPartsColumns::processNextStorage(
|
||||
|
||||
Array subcolumn_names;
|
||||
Array subcolumn_types;
|
||||
Array subcolumn_sers;
|
||||
Array subcolumn_serializations;
|
||||
Array subcolumn_bytes_on_disk;
|
||||
Array subcolumn_data_compressed_bytes;
|
||||
Array subcolumn_data_uncompressed_bytes;
|
||||
Array subcolumn_marks_bytes;
|
||||
|
||||
IDataType::forEachSubcolumn([&](const auto &, const auto & name, const auto & data)
|
||||
IDataType::forEachSubcolumn([&](const auto & subpath, const auto & name, const auto & data)
|
||||
{
|
||||
/// We count only final subcolumns, which are represented by files on disk
|
||||
/// and skip intermediate suibcolumns of types Tuple and Nested.
|
||||
if (isTuple(data.type) || isNested(data.type))
|
||||
return;
|
||||
|
||||
subcolumn_names.push_back(name);
|
||||
subcolumn_types.push_back(data.type->getName());
|
||||
subcolumn_sers.push_back(ISerialization::kindToString(data.serialization->getKind()));
|
||||
subcolumn_serializations.push_back(ISerialization::kindToString(data.serialization->getKind()));
|
||||
|
||||
ColumnSize size;
|
||||
NameAndTypePair subcolumn(column.name, name, column.type, data.type);
|
||||
String file_name = ISerialization::getFileNameForStream(subcolumn, subpath);
|
||||
|
||||
auto bin_checksum = part->checksums.files.find(file_name + ".bin");
|
||||
if (bin_checksum != part->checksums.files.end())
|
||||
{
|
||||
size.data_compressed += bin_checksum->second.file_size;
|
||||
size.data_uncompressed += bin_checksum->second.uncompressed_size;
|
||||
}
|
||||
|
||||
auto mrk_checksum = part->checksums.files.find(file_name + part->index_granularity_info.marks_file_extension);
|
||||
if (mrk_checksum != part->checksums.files.end())
|
||||
size.marks += mrk_checksum->second.file_size;
|
||||
|
||||
subcolumn_bytes_on_disk.push_back(size.data_compressed + size.marks);
|
||||
subcolumn_data_compressed_bytes.push_back(size.data_compressed);
|
||||
subcolumn_data_uncompressed_bytes.push_back(size.data_uncompressed);
|
||||
subcolumn_marks_bytes.push_back(size.marks);
|
||||
|
||||
}, { serialization, column.type, nullptr, nullptr });
|
||||
|
||||
if (columns_mask[src_index++])
|
||||
@ -242,7 +277,15 @@ void StorageSystemPartsColumns::processNextStorage(
|
||||
if (columns_mask[src_index++])
|
||||
columns[res_index++]->insert(subcolumn_types);
|
||||
if (columns_mask[src_index++])
|
||||
columns[res_index++]->insert(subcolumn_sers);
|
||||
columns[res_index++]->insert(subcolumn_serializations);
|
||||
if (columns_mask[src_index++])
|
||||
columns[res_index++]->insert(subcolumn_bytes_on_disk);
|
||||
if (columns_mask[src_index++])
|
||||
columns[res_index++]->insert(subcolumn_data_compressed_bytes);
|
||||
if (columns_mask[src_index++])
|
||||
columns[res_index++]->insert(subcolumn_data_uncompressed_bytes);
|
||||
if (columns_mask[src_index++])
|
||||
columns[res_index++]->insert(subcolumn_marks_bytes);
|
||||
|
||||
if (has_state_column)
|
||||
columns[res_index++]->insert(part->stateString());
|
||||
|
@ -32,6 +32,8 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d
|
||||
|
||||
auto & ast_create = ast->as<ASTCreateQuery &>();
|
||||
assert(view_name == ast_create.getTable());
|
||||
ast_create.attach = false;
|
||||
ast_create.setDatabase(database.getDatabaseName());
|
||||
if (is_uppercase)
|
||||
ast_create.setTable(Poco::toUpper(view_name));
|
||||
|
||||
|
@ -9,7 +9,9 @@ import re
|
||||
|
||||
parts = {}
|
||||
for s in sys.stdin.read().split():
|
||||
m = re.match('^([0-9]{6})[0-9]{2}_([0-9]{6})[0-9]{2}_([0-9]+)_([0-9]+)_([0-9]+)$', s)
|
||||
m = re.match(
|
||||
"^([0-9]{6})[0-9]{2}_([0-9]{6})[0-9]{2}_([0-9]+)_([0-9]+)_([0-9]+)$", s
|
||||
)
|
||||
if m == None:
|
||||
continue
|
||||
m1 = m.group(1)
|
||||
@ -18,7 +20,7 @@ for s in sys.stdin.read().split():
|
||||
i2 = int(m.group(4))
|
||||
l = int(m.group(5))
|
||||
if m1 != m2:
|
||||
raise Exception('not in single month: ' + s)
|
||||
raise Exception("not in single month: " + s)
|
||||
if m1 not in parts:
|
||||
parts[m1] = []
|
||||
parts[m1].append((i1, i2, l, s))
|
||||
@ -27,13 +29,13 @@ for m, ps in sorted(parts.items()):
|
||||
ps.sort(key=lambda i1_i2_l_s: (i1_i2_l_s[0], -i1_i2_l_s[1], -i1_i2_l_s[2]))
|
||||
(x2, y2, l2, s2) = (-1, -1, -1, -1)
|
||||
for x1, y1, l1, s1 in ps:
|
||||
if x1 >= x2 and y1 <= y2 and l1 < l2 and (x1, y1) != (x2, y2): # 2 contains 1
|
||||
if x1 >= x2 and y1 <= y2 and l1 < l2 and (x1, y1) != (x2, y2): # 2 contains 1
|
||||
pass
|
||||
elif x1 > y2: # 1 is to the right of 2
|
||||
elif x1 > y2: # 1 is to the right of 2
|
||||
if x1 != y2 + 1 and y2 != -1:
|
||||
print() # to see the missing numbers
|
||||
print() # to see the missing numbers
|
||||
(x2, y2, l2, s2) = (x1, y1, l1, s1)
|
||||
print(s1)
|
||||
else:
|
||||
raise Exception('invalid parts intersection: ' + s1 + ' and ' + s2)
|
||||
raise Exception("invalid parts intersection: " + s1 + " and " + s2)
|
||||
print()
|
||||
|
@ -7,7 +7,7 @@ else ()
|
||||
include (${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake)
|
||||
endif ()
|
||||
|
||||
option (ENABLE_CLICKHOUSE_TEST "Install clickhouse-test script and relevant tests scenarios" ON)
|
||||
option (ENABLE_CLICKHOUSE_TEST "Install clickhouse-test script and relevant tests scenarios" OFF)
|
||||
|
||||
if (ENABLE_CLICKHOUSE_TEST)
|
||||
install (PROGRAMS clickhouse-test DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
|
||||
|
@ -7,8 +7,14 @@ import sys
|
||||
|
||||
from github import Github
|
||||
|
||||
from env_helper import GITHUB_REPOSITORY, TEMP_PATH, REPO_COPY, REPORTS_PATH, GITHUB_SERVER_URL, \
|
||||
GITHUB_RUN_ID
|
||||
from env_helper import (
|
||||
GITHUB_REPOSITORY,
|
||||
TEMP_PATH,
|
||||
REPO_COPY,
|
||||
REPORTS_PATH,
|
||||
GITHUB_SERVER_URL,
|
||||
GITHUB_RUN_ID,
|
||||
)
|
||||
from s3_helper import S3Helper
|
||||
from get_robot_token import get_best_robot_token
|
||||
from pr_info import PRInfo
|
||||
@ -19,19 +25,24 @@ from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickh
|
||||
from stopwatch import Stopwatch
|
||||
from rerun_helper import RerunHelper
|
||||
|
||||
IMAGE_NAME = 'clickhouse/fuzzer'
|
||||
IMAGE_NAME = "clickhouse/fuzzer"
|
||||
|
||||
|
||||
def get_run_command(pr_number, sha, download_url, workspace_path, image):
|
||||
return f'docker run --network=host --volume={workspace_path}:/workspace ' \
|
||||
'--cap-add syslog --cap-add sys_admin --cap-add=SYS_PTRACE ' \
|
||||
f'-e PR_TO_TEST={pr_number} -e SHA_TO_TEST={sha} -e BINARY_URL_TO_DOWNLOAD="{download_url}" '\
|
||||
f'{image}'
|
||||
return (
|
||||
f"docker run --network=host --volume={workspace_path}:/workspace "
|
||||
"--cap-add syslog --cap-add sys_admin --cap-add=SYS_PTRACE "
|
||||
f'-e PR_TO_TEST={pr_number} -e SHA_TO_TEST={sha} -e BINARY_URL_TO_DOWNLOAD="{download_url}" '
|
||||
f"{image}"
|
||||
)
|
||||
|
||||
|
||||
def get_commit(gh, commit_sha):
|
||||
repo = gh.get_repo(GITHUB_REPOSITORY)
|
||||
commit = repo.get_commit(commit_sha)
|
||||
return commit
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
@ -64,7 +75,7 @@ if __name__ == "__main__":
|
||||
raise Exception("No build URLs found")
|
||||
|
||||
for url in urls:
|
||||
if url.endswith('/clickhouse'):
|
||||
if url.endswith("/clickhouse"):
|
||||
build_url = url
|
||||
break
|
||||
else:
|
||||
@ -72,16 +83,20 @@ if __name__ == "__main__":
|
||||
|
||||
logging.info("Got build url %s", build_url)
|
||||
|
||||
workspace_path = os.path.join(temp_path, 'workspace')
|
||||
workspace_path = os.path.join(temp_path, "workspace")
|
||||
if not os.path.exists(workspace_path):
|
||||
os.makedirs(workspace_path)
|
||||
|
||||
run_command = get_run_command(pr_info.number, pr_info.sha, build_url, workspace_path, docker_image)
|
||||
run_command = get_run_command(
|
||||
pr_info.number, pr_info.sha, build_url, workspace_path, docker_image
|
||||
)
|
||||
logging.info("Going to run %s", run_command)
|
||||
|
||||
run_log_path = os.path.join(temp_path, "runlog.log")
|
||||
with open(run_log_path, 'w', encoding='utf-8') as log:
|
||||
with subprocess.Popen(run_command, shell=True, stderr=log, stdout=log) as process:
|
||||
with open(run_log_path, "w", encoding="utf-8") as log:
|
||||
with subprocess.Popen(
|
||||
run_command, shell=True, stderr=log, stdout=log
|
||||
) as process:
|
||||
retcode = process.wait()
|
||||
if retcode == 0:
|
||||
logging.info("Run successfully")
|
||||
@ -90,56 +105,70 @@ if __name__ == "__main__":
|
||||
|
||||
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
|
||||
|
||||
check_name_lower = check_name.lower().replace('(', '').replace(')', '').replace(' ', '')
|
||||
s3_prefix = f'{pr_info.number}/{pr_info.sha}/fuzzer_{check_name_lower}/'
|
||||
check_name_lower = (
|
||||
check_name.lower().replace("(", "").replace(")", "").replace(" ", "")
|
||||
)
|
||||
s3_prefix = f"{pr_info.number}/{pr_info.sha}/fuzzer_{check_name_lower}/"
|
||||
paths = {
|
||||
'runlog.log': run_log_path,
|
||||
'main.log': os.path.join(workspace_path, 'main.log'),
|
||||
'server.log': os.path.join(workspace_path, 'server.log'),
|
||||
'fuzzer.log': os.path.join(workspace_path, 'fuzzer.log'),
|
||||
'report.html': os.path.join(workspace_path, 'report.html'),
|
||||
'core.gz': os.path.join(workspace_path, 'core.gz'),
|
||||
"runlog.log": run_log_path,
|
||||
"main.log": os.path.join(workspace_path, "main.log"),
|
||||
"server.log": os.path.join(workspace_path, "server.log"),
|
||||
"fuzzer.log": os.path.join(workspace_path, "fuzzer.log"),
|
||||
"report.html": os.path.join(workspace_path, "report.html"),
|
||||
"core.gz": os.path.join(workspace_path, "core.gz"),
|
||||
}
|
||||
|
||||
s3_helper = S3Helper('https://s3.amazonaws.com')
|
||||
s3_helper = S3Helper("https://s3.amazonaws.com")
|
||||
for f in paths:
|
||||
try:
|
||||
paths[f] = s3_helper.upload_test_report_to_s3(paths[f], s3_prefix + '/' + f)
|
||||
paths[f] = s3_helper.upload_test_report_to_s3(paths[f], s3_prefix + "/" + f)
|
||||
except Exception as ex:
|
||||
logging.info("Exception uploading file %s text %s", f, ex)
|
||||
paths[f] = ''
|
||||
paths[f] = ""
|
||||
|
||||
report_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}"
|
||||
if paths['runlog.log']:
|
||||
report_url = paths['runlog.log']
|
||||
if paths['main.log']:
|
||||
report_url = paths['main.log']
|
||||
if paths['server.log']:
|
||||
report_url = paths['server.log']
|
||||
if paths['fuzzer.log']:
|
||||
report_url = paths['fuzzer.log']
|
||||
if paths['report.html']:
|
||||
report_url = paths['report.html']
|
||||
if paths["runlog.log"]:
|
||||
report_url = paths["runlog.log"]
|
||||
if paths["main.log"]:
|
||||
report_url = paths["main.log"]
|
||||
if paths["server.log"]:
|
||||
report_url = paths["server.log"]
|
||||
if paths["fuzzer.log"]:
|
||||
report_url = paths["fuzzer.log"]
|
||||
if paths["report.html"]:
|
||||
report_url = paths["report.html"]
|
||||
|
||||
# Try to get status message saved by the fuzzer
|
||||
try:
|
||||
with open(os.path.join(workspace_path, 'status.txt'), 'r', encoding='utf-8') as status_f:
|
||||
status = status_f.readline().rstrip('\n')
|
||||
with open(
|
||||
os.path.join(workspace_path, "status.txt"), "r", encoding="utf-8"
|
||||
) as status_f:
|
||||
status = status_f.readline().rstrip("\n")
|
||||
|
||||
with open(os.path.join(workspace_path, 'description.txt'), 'r', encoding='utf-8') as desc_f:
|
||||
description = desc_f.readline().rstrip('\n')[:140]
|
||||
with open(
|
||||
os.path.join(workspace_path, "description.txt"), "r", encoding="utf-8"
|
||||
) as desc_f:
|
||||
description = desc_f.readline().rstrip("\n")[:140]
|
||||
except:
|
||||
status = 'failure'
|
||||
description = 'Task failed: $?=' + str(retcode)
|
||||
status = "failure"
|
||||
description = "Task failed: $?=" + str(retcode)
|
||||
|
||||
if 'fail' in status:
|
||||
test_result = [(description, 'FAIL')]
|
||||
if "fail" in status:
|
||||
test_result = [(description, "FAIL")]
|
||||
else:
|
||||
test_result = [(description, 'OK')]
|
||||
test_result = [(description, "OK")]
|
||||
|
||||
ch_helper = ClickHouseHelper()
|
||||
|
||||
prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_result, status, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name)
|
||||
prepared_events = prepare_tests_results_for_clickhouse(
|
||||
pr_info,
|
||||
test_result,
|
||||
status,
|
||||
stopwatch.duration_seconds,
|
||||
stopwatch.start_time_str,
|
||||
report_url,
|
||||
check_name,
|
||||
)
|
||||
|
||||
logging.info("Result: '%s', '%s', '%s'", status, description, report_url)
|
||||
print(f"::notice ::Report url: {report_url}")
|
||||
|
48
tests/ci/bugfix_validate_check.py
Normal file
48
tests/ci/bugfix_validate_check.py
Normal file
@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import itertools
|
||||
import os
|
||||
import sys
|
||||
|
||||
NO_CHANGES_MSG = "Nothing to run"
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("report1")
|
||||
parser.add_argument("report2")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def post_commit_status_from_file(file_path):
|
||||
res = []
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
fin = csv.reader(f, delimiter="\t")
|
||||
res = list(itertools.islice(fin, 1))
|
||||
if len(res) < 1:
|
||||
raise Exception(f'Can\'t read from "{file_path}"')
|
||||
if len(res[0]) != 3:
|
||||
raise Exception(f'Can\'t read from "{file_path}"')
|
||||
return res[0]
|
||||
|
||||
|
||||
def process_results(file_path):
|
||||
state, report_url, description = post_commit_status_from_file(file_path)
|
||||
prefix = os.path.basename(os.path.dirname(file_path))
|
||||
print(
|
||||
f"::notice:: bugfix check: {prefix} - {state}: {description} Report url: {report_url}"
|
||||
)
|
||||
return state == "success"
|
||||
|
||||
|
||||
def main(args):
|
||||
is_ok = False
|
||||
is_ok = process_results(args.report1) or is_ok
|
||||
is_ok = process_results(args.report2) or is_ok
|
||||
sys.exit(0 if is_ok else 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(parse_args())
|
@ -21,6 +21,8 @@ from ci_config import CI_CONFIG, BuildConfig
|
||||
from docker_pull_helper import get_image_with_version
|
||||
from tee_popen import TeePopen
|
||||
|
||||
IMAGE_NAME = "clickhouse/binary-builder"
|
||||
|
||||
|
||||
def get_build_config(build_check_name: str, build_name: str) -> BuildConfig:
|
||||
if build_check_name == "ClickHouse build check (actions)":
|
||||
@ -52,7 +54,6 @@ def get_packager_cmd(
|
||||
build_version: str,
|
||||
image_version: str,
|
||||
ccache_path: str,
|
||||
pr_info: PRInfo,
|
||||
) -> str:
|
||||
package_type = build_config["package_type"]
|
||||
comp = build_config["compiler"]
|
||||
@ -73,9 +74,8 @@ def get_packager_cmd(
|
||||
cmd += " --cache=ccache"
|
||||
cmd += " --ccache_dir={}".format(ccache_path)
|
||||
|
||||
if "alien_pkgs" in build_config and build_config["alien_pkgs"]:
|
||||
if pr_info.number == 0 or "release" in pr_info.labels:
|
||||
cmd += " --alien-pkgs rpm tgz"
|
||||
if "additional_pkgs" in build_config and build_config["additional_pkgs"]:
|
||||
cmd += " --additional-pkgs"
|
||||
|
||||
cmd += " --docker-image-version={}".format(image_version)
|
||||
cmd += " --version={}".format(build_version)
|
||||
@ -86,13 +86,6 @@ def get_packager_cmd(
|
||||
return cmd
|
||||
|
||||
|
||||
def get_image_name(build_config: BuildConfig) -> str:
|
||||
if build_config["package_type"] != "deb":
|
||||
return "clickhouse/binary-builder"
|
||||
else:
|
||||
return "clickhouse/deb-builder"
|
||||
|
||||
|
||||
def build_clickhouse(
|
||||
packager_cmd: str, logs_path: str, build_output_path: str
|
||||
) -> Tuple[str, bool]:
|
||||
@ -256,8 +249,7 @@ def main():
|
||||
else:
|
||||
sys.exit(0)
|
||||
|
||||
image_name = get_image_name(build_config)
|
||||
docker_image = get_image_with_version(IMAGES_PATH, image_name)
|
||||
docker_image = get_image_with_version(IMAGES_PATH, IMAGE_NAME)
|
||||
image_version = docker_image.version
|
||||
|
||||
logging.info("Got version from repo %s", version.string)
|
||||
@ -298,7 +290,6 @@ def main():
|
||||
version.string,
|
||||
image_version,
|
||||
ccache_path,
|
||||
pr_info,
|
||||
)
|
||||
logging.info("Going to run packager with %s", packager_cmd)
|
||||
|
||||
|
@ -6,7 +6,13 @@ import os
|
||||
import sys
|
||||
from github import Github
|
||||
|
||||
from env_helper import REPORTS_PATH, TEMP_PATH, GITHUB_REPOSITORY, GITHUB_SERVER_URL, GITHUB_RUN_ID
|
||||
from env_helper import (
|
||||
REPORTS_PATH,
|
||||
TEMP_PATH,
|
||||
GITHUB_REPOSITORY,
|
||||
GITHUB_SERVER_URL,
|
||||
GITHUB_RUN_ID,
|
||||
)
|
||||
from report import create_build_html_report
|
||||
from s3_helper import S3Helper
|
||||
from get_robot_token import get_best_robot_token
|
||||
@ -15,8 +21,19 @@ from commit_status_helper import get_commit
|
||||
from ci_config import CI_CONFIG
|
||||
from rerun_helper import RerunHelper
|
||||
|
||||
class BuildResult():
|
||||
def __init__(self, compiler, build_type, sanitizer, bundled, splitted, status, elapsed_seconds, with_coverage):
|
||||
|
||||
class BuildResult:
|
||||
def __init__(
|
||||
self,
|
||||
compiler,
|
||||
build_type,
|
||||
sanitizer,
|
||||
bundled,
|
||||
splitted,
|
||||
status,
|
||||
elapsed_seconds,
|
||||
with_coverage,
|
||||
):
|
||||
self.compiler = compiler
|
||||
self.build_type = build_type
|
||||
self.sanitizer = sanitizer
|
||||
@ -26,54 +43,72 @@ class BuildResult():
|
||||
self.elapsed_seconds = elapsed_seconds
|
||||
self.with_coverage = with_coverage
|
||||
|
||||
|
||||
def group_by_artifacts(build_urls):
|
||||
groups = {'deb': [], 'binary': [], 'tgz': [], 'rpm': [], 'performance': []}
|
||||
groups = {
|
||||
"apk": [],
|
||||
"deb": [],
|
||||
"binary": [],
|
||||
"tgz": [],
|
||||
"rpm": [],
|
||||
"performance": [],
|
||||
}
|
||||
for url in build_urls:
|
||||
if url.endswith('performance.tgz'):
|
||||
groups['performance'].append(url)
|
||||
elif url.endswith('.deb') or url.endswith('.buildinfo') or url.endswith('.changes') or url.endswith('.tar.gz'):
|
||||
groups['deb'].append(url)
|
||||
elif url.endswith('.rpm'):
|
||||
groups['rpm'].append(url)
|
||||
elif url.endswith('.tgz'):
|
||||
groups['tgz'].append(url)
|
||||
if url.endswith("performance.tgz"):
|
||||
groups["performance"].append(url)
|
||||
elif (
|
||||
url.endswith(".deb")
|
||||
or url.endswith(".buildinfo")
|
||||
or url.endswith(".changes")
|
||||
or url.endswith(".tar.gz")
|
||||
):
|
||||
groups["deb"].append(url)
|
||||
elif url.endswith(".apk"):
|
||||
groups["apk"].append(url)
|
||||
elif url.endswith(".rpm"):
|
||||
groups["rpm"].append(url)
|
||||
elif url.endswith(".tgz"):
|
||||
groups["tgz"].append(url)
|
||||
else:
|
||||
groups['binary'].append(url)
|
||||
groups["binary"].append(url)
|
||||
return groups
|
||||
|
||||
|
||||
def process_report(build_report):
|
||||
build_config = build_report['build_config']
|
||||
build_config = build_report["build_config"]
|
||||
build_result = BuildResult(
|
||||
compiler=build_config['compiler'],
|
||||
build_type=build_config['build_type'],
|
||||
sanitizer=build_config['sanitizer'],
|
||||
bundled=build_config['bundled'],
|
||||
splitted=build_config['splitted'],
|
||||
status="success" if build_report['status'] else "failure",
|
||||
elapsed_seconds=build_report['elapsed_seconds'],
|
||||
with_coverage=False
|
||||
compiler=build_config["compiler"],
|
||||
build_type=build_config["build_type"],
|
||||
sanitizer=build_config["sanitizer"],
|
||||
bundled=build_config["bundled"],
|
||||
splitted=build_config["splitted"],
|
||||
status="success" if build_report["status"] else "failure",
|
||||
elapsed_seconds=build_report["elapsed_seconds"],
|
||||
with_coverage=False,
|
||||
)
|
||||
build_results = []
|
||||
build_urls = []
|
||||
build_logs_urls = []
|
||||
urls_groups = group_by_artifacts(build_report['build_urls'])
|
||||
urls_groups = group_by_artifacts(build_report["build_urls"])
|
||||
found_group = False
|
||||
for _, group_urls in urls_groups.items():
|
||||
if group_urls:
|
||||
build_results.append(build_result)
|
||||
build_urls.append(group_urls)
|
||||
build_logs_urls.append(build_report['log_url'])
|
||||
build_logs_urls.append(build_report["log_url"])
|
||||
found_group = True
|
||||
|
||||
if not found_group:
|
||||
build_results.append(build_result)
|
||||
build_urls.append([""])
|
||||
build_logs_urls.append(build_report['log_url'])
|
||||
build_logs_urls.append(build_report["log_url"])
|
||||
|
||||
return build_results, build_urls, build_logs_urls
|
||||
|
||||
|
||||
def get_build_name_from_file_name(file_name):
|
||||
return file_name.replace('build_urls_', '').replace('.json', '')
|
||||
return file_name.replace("build_urls_", "").replace(".json", "")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
@ -99,17 +134,25 @@ if __name__ == "__main__":
|
||||
build_reports_map = {}
|
||||
for root, dirs, files in os.walk(reports_path):
|
||||
for f in files:
|
||||
if f.startswith("build_urls_") and f.endswith('.json'):
|
||||
if f.startswith("build_urls_") and f.endswith(".json"):
|
||||
logging.info("Found build report json %s", f)
|
||||
build_name = get_build_name_from_file_name(f)
|
||||
if build_name in reports_order:
|
||||
with open(os.path.join(root, f), 'r') as file_handler:
|
||||
with open(os.path.join(root, f), "r") as file_handler:
|
||||
build_report = json.load(file_handler)
|
||||
build_reports_map[build_name] = build_report
|
||||
else:
|
||||
logging.info("Skipping report %s for build %s, it's not in our reports list", f, build_name)
|
||||
logging.info(
|
||||
"Skipping report %s for build %s, it's not in our reports list",
|
||||
f,
|
||||
build_name,
|
||||
)
|
||||
|
||||
build_reports = [build_reports_map[build_name] for build_name in reports_order if build_name in build_reports_map]
|
||||
build_reports = [
|
||||
build_reports_map[build_name]
|
||||
for build_name in reports_order
|
||||
if build_name in build_reports_map
|
||||
]
|
||||
|
||||
build_results = []
|
||||
build_artifacts = []
|
||||
@ -127,7 +170,7 @@ if __name__ == "__main__":
|
||||
logging.info("No builds, failing check")
|
||||
sys.exit(1)
|
||||
|
||||
s3_helper = S3Helper('https://s3.amazonaws.com')
|
||||
s3_helper = S3Helper("https://s3.amazonaws.com")
|
||||
|
||||
pr_info = PRInfo()
|
||||
|
||||
@ -137,7 +180,9 @@ if __name__ == "__main__":
|
||||
branch_name = "PR #{}".format(pr_info.number)
|
||||
branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/pull/{pr_info.number}"
|
||||
commit_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commit/{pr_info.sha}"
|
||||
task_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID or '0'}"
|
||||
task_url = (
|
||||
f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID or '0'}"
|
||||
)
|
||||
report = create_build_html_report(
|
||||
build_check_name,
|
||||
build_results,
|
||||
@ -146,18 +191,22 @@ if __name__ == "__main__":
|
||||
task_url,
|
||||
branch_url,
|
||||
branch_name,
|
||||
commit_url
|
||||
commit_url,
|
||||
)
|
||||
|
||||
report_path = os.path.join(temp_path, 'report.html')
|
||||
with open(report_path, 'w') as f:
|
||||
report_path = os.path.join(temp_path, "report.html")
|
||||
with open(report_path, "w") as f:
|
||||
f.write(report)
|
||||
|
||||
logging.info("Going to upload prepared report")
|
||||
context_name_for_path = build_check_name.lower().replace(' ', '_')
|
||||
s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + context_name_for_path
|
||||
context_name_for_path = build_check_name.lower().replace(" ", "_")
|
||||
s3_path_prefix = (
|
||||
str(pr_info.number) + "/" + pr_info.sha + "/" + context_name_for_path
|
||||
)
|
||||
|
||||
url = s3_helper.upload_build_file_to_s3(report_path, s3_path_prefix + "/report.html")
|
||||
url = s3_helper.upload_build_file_to_s3(
|
||||
report_path, s3_path_prefix + "/report.html"
|
||||
)
|
||||
logging.info("Report url %s", url)
|
||||
|
||||
total_builds = len(build_results)
|
||||
@ -180,4 +229,9 @@ if __name__ == "__main__":
|
||||
print("::notice ::Report url: {}".format(url))
|
||||
|
||||
commit = get_commit(gh, pr_info.sha)
|
||||
commit.create_status(context=build_check_name, description=description, state=summary_status, target_url=url)
|
||||
commit.create_status(
|
||||
context=build_check_name,
|
||||
description=description,
|
||||
state=summary_status,
|
||||
target_url=url,
|
||||
)
|
||||
|
@ -13,16 +13,19 @@ from compress_files import decompress_fast, compress_fast
|
||||
|
||||
DOWNLOAD_RETRIES_COUNT = 5
|
||||
|
||||
|
||||
def dowload_file_with_progress(url, path):
|
||||
logging.info("Downloading from %s to temp path %s", url, path)
|
||||
for i in range(DOWNLOAD_RETRIES_COUNT):
|
||||
try:
|
||||
with open(path, 'wb') as f:
|
||||
with open(path, "wb") as f:
|
||||
response = requests.get(url, stream=True)
|
||||
response.raise_for_status()
|
||||
total_length = response.headers.get('content-length')
|
||||
total_length = response.headers.get("content-length")
|
||||
if total_length is None or int(total_length) == 0:
|
||||
logging.info("No content-length, will download file without progress")
|
||||
logging.info(
|
||||
"No content-length, will download file without progress"
|
||||
)
|
||||
f.write(response.content)
|
||||
else:
|
||||
dl = 0
|
||||
@ -34,8 +37,8 @@ def dowload_file_with_progress(url, path):
|
||||
if sys.stdout.isatty():
|
||||
done = int(50 * dl / total_length)
|
||||
percent = int(100 * float(dl) / total_length)
|
||||
eq_str = '=' * done
|
||||
space_str = ' ' * (50 - done)
|
||||
eq_str = "=" * done
|
||||
space_str = " " * (50 - done)
|
||||
sys.stdout.write(f"\r[{eq_str}{space_str}] {percent}%")
|
||||
sys.stdout.flush()
|
||||
break
|
||||
@ -52,7 +55,9 @@ def dowload_file_with_progress(url, path):
|
||||
logging.info("Downloading finished")
|
||||
|
||||
|
||||
def get_ccache_if_not_exists(path_to_ccache_dir, s3_helper, current_pr_number, temp_path):
|
||||
def get_ccache_if_not_exists(
|
||||
path_to_ccache_dir, s3_helper, current_pr_number, temp_path
|
||||
):
|
||||
ccache_name = os.path.basename(path_to_ccache_dir)
|
||||
cache_found = False
|
||||
prs_to_check = [current_pr_number]
|
||||
@ -93,13 +98,16 @@ def get_ccache_if_not_exists(path_to_ccache_dir, s3_helper, current_pr_number, t
|
||||
else:
|
||||
logging.info("ccache downloaded")
|
||||
|
||||
|
||||
def upload_ccache(path_to_ccache_dir, s3_helper, current_pr_number, temp_path):
|
||||
logging.info("Uploading cache %s for pr %s", path_to_ccache_dir, current_pr_number)
|
||||
ccache_name = os.path.basename(path_to_ccache_dir)
|
||||
compressed_cache_path = os.path.join(temp_path, ccache_name + ".tar.gz")
|
||||
compress_fast(path_to_ccache_dir, compressed_cache_path)
|
||||
|
||||
s3_path = str(current_pr_number) + "/ccaches/" + os.path.basename(compressed_cache_path)
|
||||
s3_path = (
|
||||
str(current_pr_number) + "/ccaches/" + os.path.basename(compressed_cache_path)
|
||||
)
|
||||
logging.info("Will upload %s to path %s", compressed_cache_path, s3_path)
|
||||
s3_helper.upload_build_file_to_s3(compressed_cache_path, s3_path)
|
||||
logging.info("Upload finished")
|
||||
|
@ -20,21 +20,29 @@ if __name__ == "__main__":
|
||||
if not os.path.exists(temp_path):
|
||||
os.makedirs(temp_path)
|
||||
|
||||
|
||||
sys.path.append(os.path.join(repo_path, "utils/github"))
|
||||
|
||||
|
||||
with SSHKey("ROBOT_CLICKHOUSE_SSH_KEY"):
|
||||
token = get_parameter_from_ssm("github_robot_token_1")
|
||||
|
||||
bp = Backport(token, os.environ.get("REPO_OWNER"), os.environ.get("REPO_NAME"), os.environ.get("REPO_TEAM"))
|
||||
bp = Backport(
|
||||
token,
|
||||
os.environ.get("REPO_OWNER"),
|
||||
os.environ.get("REPO_NAME"),
|
||||
os.environ.get("REPO_TEAM"),
|
||||
)
|
||||
|
||||
def cherrypick_run(token, pr, branch):
|
||||
return CherryPick(token,
|
||||
os.environ.get("REPO_OWNER"), os.environ.get("REPO_NAME"),
|
||||
os.environ.get("REPO_TEAM"), pr, branch
|
||||
).execute(repo_path, False)
|
||||
return CherryPick(
|
||||
token,
|
||||
os.environ.get("REPO_OWNER"),
|
||||
os.environ.get("REPO_NAME"),
|
||||
os.environ.get("REPO_TEAM"),
|
||||
pr,
|
||||
branch,
|
||||
).execute(repo_path, False)
|
||||
|
||||
try:
|
||||
bp.execute(repo_path, 'origin', None, cherrypick_run)
|
||||
bp.execute(repo_path, "origin", None, cherrypick_run)
|
||||
except subprocess.CalledProcessError as e:
|
||||
logging.error(e.output)
|
||||
|
@ -17,7 +17,9 @@ import sys
|
||||
|
||||
class Backport:
|
||||
def __init__(self, token, owner, name, team):
|
||||
self._gh = RemoteRepo(token, owner=owner, name=name, team=team, max_page_size=30, min_page_size=7)
|
||||
self._gh = RemoteRepo(
|
||||
token, owner=owner, name=name, team=team, max_page_size=30, min_page_size=7
|
||||
)
|
||||
self._token = token
|
||||
self.default_branch_name = self._gh.default_branch
|
||||
self.ssh_url = self._gh.ssh_url
|
||||
@ -28,7 +30,7 @@ class Backport:
|
||||
def getBranchesWithRelease(self):
|
||||
branches = set()
|
||||
for pull_request in self._gh.find_pull_requests("release"):
|
||||
branches.add(pull_request['headRefName'])
|
||||
branches.add(pull_request["headRefName"])
|
||||
return branches
|
||||
|
||||
def execute(self, repo, upstream, until_commit, run_cherrypick):
|
||||
@ -44,11 +46,11 @@ class Backport:
|
||||
branches.append(branch)
|
||||
|
||||
if not branches:
|
||||
logging.info('No release branches found!')
|
||||
logging.info("No release branches found!")
|
||||
return
|
||||
|
||||
for branch in branches:
|
||||
logging.info('Found release branch: %s', branch[0])
|
||||
logging.info("Found release branch: %s", branch[0])
|
||||
|
||||
if not until_commit:
|
||||
until_commit = branches[0][1]
|
||||
@ -56,73 +58,128 @@ class Backport:
|
||||
|
||||
backport_map = {}
|
||||
|
||||
RE_MUST_BACKPORT = re.compile(r'^v(\d+\.\d+)-must-backport$')
|
||||
RE_NO_BACKPORT = re.compile(r'^v(\d+\.\d+)-no-backport$')
|
||||
RE_BACKPORTED = re.compile(r'^v(\d+\.\d+)-backported$')
|
||||
RE_MUST_BACKPORT = re.compile(r"^v(\d+\.\d+)-must-backport$")
|
||||
RE_NO_BACKPORT = re.compile(r"^v(\d+\.\d+)-no-backport$")
|
||||
RE_BACKPORTED = re.compile(r"^v(\d+\.\d+)-backported$")
|
||||
|
||||
# pull-requests are sorted by ancestry from the most recent.
|
||||
for pr in pull_requests:
|
||||
while repo.comparator(branches[-1][1]) >= repo.comparator(pr['mergeCommit']['oid']):
|
||||
logging.info("PR #{} is already inside {}. Dropping this branch for further PRs".format(pr['number'], branches[-1][0]))
|
||||
while repo.comparator(branches[-1][1]) >= repo.comparator(
|
||||
pr["mergeCommit"]["oid"]
|
||||
):
|
||||
logging.info(
|
||||
"PR #{} is already inside {}. Dropping this branch for further PRs".format(
|
||||
pr["number"], branches[-1][0]
|
||||
)
|
||||
)
|
||||
branches.pop()
|
||||
|
||||
logging.info("Processing PR #{}".format(pr['number']))
|
||||
logging.info("Processing PR #{}".format(pr["number"]))
|
||||
|
||||
assert len(branches)
|
||||
|
||||
branch_set = set([branch[0] for branch in branches])
|
||||
|
||||
# First pass. Find all must-backports
|
||||
for label in pr['labels']['nodes']:
|
||||
if label['name'] == 'pr-must-backport':
|
||||
backport_map[pr['number']] = branch_set.copy()
|
||||
for label in pr["labels"]["nodes"]:
|
||||
if label["name"] == "pr-must-backport":
|
||||
backport_map[pr["number"]] = branch_set.copy()
|
||||
continue
|
||||
matched = RE_MUST_BACKPORT.match(label['name'])
|
||||
matched = RE_MUST_BACKPORT.match(label["name"])
|
||||
if matched:
|
||||
if pr['number'] not in backport_map:
|
||||
backport_map[pr['number']] = set()
|
||||
backport_map[pr['number']].add(matched.group(1))
|
||||
if pr["number"] not in backport_map:
|
||||
backport_map[pr["number"]] = set()
|
||||
backport_map[pr["number"]].add(matched.group(1))
|
||||
|
||||
# Second pass. Find all no-backports
|
||||
for label in pr['labels']['nodes']:
|
||||
if label['name'] == 'pr-no-backport' and pr['number'] in backport_map:
|
||||
del backport_map[pr['number']]
|
||||
for label in pr["labels"]["nodes"]:
|
||||
if label["name"] == "pr-no-backport" and pr["number"] in backport_map:
|
||||
del backport_map[pr["number"]]
|
||||
break
|
||||
matched_no_backport = RE_NO_BACKPORT.match(label['name'])
|
||||
matched_backported = RE_BACKPORTED.match(label['name'])
|
||||
if matched_no_backport and pr['number'] in backport_map and matched_no_backport.group(1) in backport_map[pr['number']]:
|
||||
backport_map[pr['number']].remove(matched_no_backport.group(1))
|
||||
logging.info('\tskipping %s because of forced no-backport', matched_no_backport.group(1))
|
||||
elif matched_backported and pr['number'] in backport_map and matched_backported.group(1) in backport_map[pr['number']]:
|
||||
backport_map[pr['number']].remove(matched_backported.group(1))
|
||||
logging.info('\tskipping %s because it\'s already backported manually', matched_backported.group(1))
|
||||
matched_no_backport = RE_NO_BACKPORT.match(label["name"])
|
||||
matched_backported = RE_BACKPORTED.match(label["name"])
|
||||
if (
|
||||
matched_no_backport
|
||||
and pr["number"] in backport_map
|
||||
and matched_no_backport.group(1) in backport_map[pr["number"]]
|
||||
):
|
||||
backport_map[pr["number"]].remove(matched_no_backport.group(1))
|
||||
logging.info(
|
||||
"\tskipping %s because of forced no-backport",
|
||||
matched_no_backport.group(1),
|
||||
)
|
||||
elif (
|
||||
matched_backported
|
||||
and pr["number"] in backport_map
|
||||
and matched_backported.group(1) in backport_map[pr["number"]]
|
||||
):
|
||||
backport_map[pr["number"]].remove(matched_backported.group(1))
|
||||
logging.info(
|
||||
"\tskipping %s because it's already backported manually",
|
||||
matched_backported.group(1),
|
||||
)
|
||||
|
||||
for pr, branches in list(backport_map.items()):
|
||||
logging.info('PR #%s needs to be backported to:', pr)
|
||||
logging.info("PR #%s needs to be backported to:", pr)
|
||||
for branch in branches:
|
||||
logging.info('\t%s, and the status is: %s', branch, run_cherrypick(self._token, pr, branch))
|
||||
logging.info(
|
||||
"\t%s, and the status is: %s",
|
||||
branch,
|
||||
run_cherrypick(self._token, pr, branch),
|
||||
)
|
||||
|
||||
# print API costs
|
||||
logging.info('\nGitHub API total costs per query:')
|
||||
logging.info("\nGitHub API total costs per query:")
|
||||
for name, value in list(self._gh.api_costs.items()):
|
||||
logging.info('%s : %s', name, value)
|
||||
logging.info("%s : %s", name, value)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--token', type=str, required=True, help='token for Github access')
|
||||
parser.add_argument('--repo', type=str, required=True, help='path to full repository', metavar='PATH')
|
||||
parser.add_argument('--til', type=str, help='check PRs from HEAD til this commit', metavar='COMMIT')
|
||||
parser.add_argument('--dry-run', action='store_true', help='do not create or merge any PRs', default=False)
|
||||
parser.add_argument('--verbose', '-v', action='store_true', help='more verbose output', default=False)
|
||||
parser.add_argument('--upstream', '-u', type=str, help='remote name of upstream in repository', default='origin')
|
||||
parser.add_argument(
|
||||
"--token", type=str, required=True, help="token for Github access"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--repo",
|
||||
type=str,
|
||||
required=True,
|
||||
help="path to full repository",
|
||||
metavar="PATH",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--til", type=str, help="check PRs from HEAD til this commit", metavar="COMMIT"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="do not create or merge any PRs",
|
||||
default=False,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--verbose",
|
||||
"-v",
|
||||
action="store_true",
|
||||
help="more verbose output",
|
||||
default=False,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--upstream",
|
||||
"-u",
|
||||
type=str,
|
||||
help="remote name of upstream in repository",
|
||||
default="origin",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.verbose:
|
||||
logging.basicConfig(format='%(message)s', stream=sys.stdout, level=logging.DEBUG)
|
||||
logging.basicConfig(
|
||||
format="%(message)s", stream=sys.stdout, level=logging.DEBUG
|
||||
)
|
||||
else:
|
||||
logging.basicConfig(format='%(message)s', stream=sys.stdout, level=logging.INFO)
|
||||
logging.basicConfig(format="%(message)s", stream=sys.stdout, level=logging.INFO)
|
||||
|
||||
cherrypick_run = lambda token, pr, branch: CherryPick(token, 'ClickHouse', 'ClickHouse', 'core', pr, branch).execute(args.repo, args.dry_run)
|
||||
bp = Backport(args.token, 'ClickHouse', 'ClickHouse', 'core')
|
||||
cherrypick_run = lambda token, pr, branch: CherryPick(
|
||||
token, "ClickHouse", "ClickHouse", "core", pr, branch
|
||||
).execute(args.repo, args.dry_run)
|
||||
bp = Backport(args.token, "ClickHouse", "ClickHouse", "core")
|
||||
bp.execute(args.repo, args.upstream, args.til, cherrypick_run)
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
'''
|
||||
"""
|
||||
Backports changes from PR to release branch.
|
||||
Requires multiple separate runs as part of the implementation.
|
||||
|
||||
@ -12,7 +12,7 @@ First run should do the following:
|
||||
Second run checks PR from previous run to be merged or at least being mergeable. If it's not merged then try to merge it.
|
||||
|
||||
Third run creates PR from backport branch (with merged previous PR) to release branch.
|
||||
'''
|
||||
"""
|
||||
|
||||
try:
|
||||
from clickhouse.utils.github.query import Query as RemoteRepo
|
||||
@ -29,13 +29,13 @@ import sys
|
||||
|
||||
class CherryPick:
|
||||
class Status(Enum):
|
||||
DISCARDED = 'discarded'
|
||||
NOT_INITIATED = 'not started'
|
||||
FIRST_MERGEABLE = 'waiting for 1st stage'
|
||||
FIRST_CONFLICTS = 'conflicts on 1st stage'
|
||||
SECOND_MERGEABLE = 'waiting for 2nd stage'
|
||||
SECOND_CONFLICTS = 'conflicts on 2nd stage'
|
||||
MERGED = 'backported'
|
||||
DISCARDED = "discarded"
|
||||
NOT_INITIATED = "not started"
|
||||
FIRST_MERGEABLE = "waiting for 1st stage"
|
||||
FIRST_CONFLICTS = "conflicts on 1st stage"
|
||||
SECOND_MERGEABLE = "waiting for 2nd stage"
|
||||
SECOND_CONFLICTS = "conflicts on 2nd stage"
|
||||
MERGED = "backported"
|
||||
|
||||
def _run(self, args):
|
||||
out = subprocess.check_output(args).rstrip()
|
||||
@ -50,51 +50,90 @@ class CherryPick:
|
||||
|
||||
# TODO: check if pull-request is merged.
|
||||
|
||||
self.merge_commit_oid = self._pr['mergeCommit']['oid']
|
||||
self.merge_commit_oid = self._pr["mergeCommit"]["oid"]
|
||||
|
||||
self.target_branch = target_branch
|
||||
self.backport_branch = 'backport/{branch}/{pr}'.format(branch=target_branch, pr=pr_number)
|
||||
self.cherrypick_branch = 'cherrypick/{branch}/{oid}'.format(branch=target_branch, oid=self.merge_commit_oid)
|
||||
self.backport_branch = "backport/{branch}/{pr}".format(
|
||||
branch=target_branch, pr=pr_number
|
||||
)
|
||||
self.cherrypick_branch = "cherrypick/{branch}/{oid}".format(
|
||||
branch=target_branch, oid=self.merge_commit_oid
|
||||
)
|
||||
|
||||
def getCherryPickPullRequest(self):
|
||||
return self._gh.find_pull_request(base=self.backport_branch, head=self.cherrypick_branch)
|
||||
return self._gh.find_pull_request(
|
||||
base=self.backport_branch, head=self.cherrypick_branch
|
||||
)
|
||||
|
||||
def createCherryPickPullRequest(self, repo_path):
|
||||
DESCRIPTION = (
|
||||
'This pull-request is a first step of an automated backporting.\n'
|
||||
'It contains changes like after calling a local command `git cherry-pick`.\n'
|
||||
'If you intend to continue backporting this changes, then resolve all conflicts if any.\n'
|
||||
'Otherwise, if you do not want to backport them, then just close this pull-request.\n'
|
||||
'\n'
|
||||
'The check results does not matter at this step - you can safely ignore them.\n'
|
||||
'Also this pull-request will be merged automatically as it reaches the mergeable state, but you always can merge it manually.\n'
|
||||
"This pull-request is a first step of an automated backporting.\n"
|
||||
"It contains changes like after calling a local command `git cherry-pick`.\n"
|
||||
"If you intend to continue backporting this changes, then resolve all conflicts if any.\n"
|
||||
"Otherwise, if you do not want to backport them, then just close this pull-request.\n"
|
||||
"\n"
|
||||
"The check results does not matter at this step - you can safely ignore them.\n"
|
||||
"Also this pull-request will be merged automatically as it reaches the mergeable state, but you always can merge it manually.\n"
|
||||
)
|
||||
|
||||
# FIXME: replace with something better than os.system()
|
||||
git_prefix = ['git', '-C', repo_path, '-c', 'user.email=robot-clickhouse@yandex-team.ru', '-c', 'user.name=robot-clickhouse']
|
||||
base_commit_oid = self._pr['mergeCommit']['parents']['nodes'][0]['oid']
|
||||
git_prefix = [
|
||||
"git",
|
||||
"-C",
|
||||
repo_path,
|
||||
"-c",
|
||||
"user.email=robot-clickhouse@yandex-team.ru",
|
||||
"-c",
|
||||
"user.name=robot-clickhouse",
|
||||
]
|
||||
base_commit_oid = self._pr["mergeCommit"]["parents"]["nodes"][0]["oid"]
|
||||
|
||||
# Create separate branch for backporting, and make it look like real cherry-pick.
|
||||
self._run(git_prefix + ['checkout', '-f', self.target_branch])
|
||||
self._run(git_prefix + ['checkout', '-B', self.backport_branch])
|
||||
self._run(git_prefix + ['merge', '-s', 'ours', '--no-edit', base_commit_oid])
|
||||
self._run(git_prefix + ["checkout", "-f", self.target_branch])
|
||||
self._run(git_prefix + ["checkout", "-B", self.backport_branch])
|
||||
self._run(git_prefix + ["merge", "-s", "ours", "--no-edit", base_commit_oid])
|
||||
|
||||
# Create secondary branch to allow pull request with cherry-picked commit.
|
||||
self._run(git_prefix + ['branch', '-f', self.cherrypick_branch, self.merge_commit_oid])
|
||||
self._run(
|
||||
git_prefix + ["branch", "-f", self.cherrypick_branch, self.merge_commit_oid]
|
||||
)
|
||||
|
||||
self._run(git_prefix + ['push', '-f', 'origin', '{branch}:{branch}'.format(branch=self.backport_branch)])
|
||||
self._run(git_prefix + ['push', '-f', 'origin', '{branch}:{branch}'.format(branch=self.cherrypick_branch)])
|
||||
self._run(
|
||||
git_prefix
|
||||
+ [
|
||||
"push",
|
||||
"-f",
|
||||
"origin",
|
||||
"{branch}:{branch}".format(branch=self.backport_branch),
|
||||
]
|
||||
)
|
||||
self._run(
|
||||
git_prefix
|
||||
+ [
|
||||
"push",
|
||||
"-f",
|
||||
"origin",
|
||||
"{branch}:{branch}".format(branch=self.cherrypick_branch),
|
||||
]
|
||||
)
|
||||
|
||||
# Create pull-request like a local cherry-pick
|
||||
pr = self._gh.create_pull_request(source=self.cherrypick_branch, target=self.backport_branch,
|
||||
title='Cherry pick #{number} to {target}: {title}'.format(
|
||||
number=self._pr['number'], target=self.target_branch,
|
||||
title=self._pr['title'].replace('"', '\\"')),
|
||||
description='Original pull-request #{}\n\n{}'.format(self._pr['number'], DESCRIPTION))
|
||||
pr = self._gh.create_pull_request(
|
||||
source=self.cherrypick_branch,
|
||||
target=self.backport_branch,
|
||||
title="Cherry pick #{number} to {target}: {title}".format(
|
||||
number=self._pr["number"],
|
||||
target=self.target_branch,
|
||||
title=self._pr["title"].replace('"', '\\"'),
|
||||
),
|
||||
description="Original pull-request #{}\n\n{}".format(
|
||||
self._pr["number"], DESCRIPTION
|
||||
),
|
||||
)
|
||||
|
||||
# FIXME: use `team` to leave a single eligible assignee.
|
||||
self._gh.add_assignee(pr, self._pr['author'])
|
||||
self._gh.add_assignee(pr, self._pr['mergedBy'])
|
||||
self._gh.add_assignee(pr, self._pr["author"])
|
||||
self._gh.add_assignee(pr, self._pr["mergedBy"])
|
||||
|
||||
self._gh.set_label(pr, "do not test")
|
||||
self._gh.set_label(pr, "pr-cherrypick")
|
||||
@ -102,36 +141,76 @@ class CherryPick:
|
||||
return pr
|
||||
|
||||
def mergeCherryPickPullRequest(self, cherrypick_pr):
|
||||
return self._gh.merge_pull_request(cherrypick_pr['id'])
|
||||
return self._gh.merge_pull_request(cherrypick_pr["id"])
|
||||
|
||||
def getBackportPullRequest(self):
|
||||
return self._gh.find_pull_request(base=self.target_branch, head=self.backport_branch)
|
||||
return self._gh.find_pull_request(
|
||||
base=self.target_branch, head=self.backport_branch
|
||||
)
|
||||
|
||||
def createBackportPullRequest(self, cherrypick_pr, repo_path):
|
||||
DESCRIPTION = (
|
||||
'This pull-request is a last step of an automated backporting.\n'
|
||||
'Treat it as a standard pull-request: look at the checks and resolve conflicts.\n'
|
||||
'Merge it only if you intend to backport changes to the target branch, otherwise just close it.\n'
|
||||
"This pull-request is a last step of an automated backporting.\n"
|
||||
"Treat it as a standard pull-request: look at the checks and resolve conflicts.\n"
|
||||
"Merge it only if you intend to backport changes to the target branch, otherwise just close it.\n"
|
||||
)
|
||||
|
||||
git_prefix = ['git', '-C', repo_path, '-c', 'user.email=robot-clickhouse@clickhouse.com', '-c', 'user.name=robot-clickhouse']
|
||||
git_prefix = [
|
||||
"git",
|
||||
"-C",
|
||||
repo_path,
|
||||
"-c",
|
||||
"user.email=robot-clickhouse@clickhouse.com",
|
||||
"-c",
|
||||
"user.name=robot-clickhouse",
|
||||
]
|
||||
|
||||
pr_title = 'Backport #{number} to {target}: {title}'.format(
|
||||
number=self._pr['number'], target=self.target_branch,
|
||||
title=self._pr['title'].replace('"', '\\"'))
|
||||
pr_title = "Backport #{number} to {target}: {title}".format(
|
||||
number=self._pr["number"],
|
||||
target=self.target_branch,
|
||||
title=self._pr["title"].replace('"', '\\"'),
|
||||
)
|
||||
|
||||
self._run(git_prefix + ['checkout', '-f', self.backport_branch])
|
||||
self._run(git_prefix + ['pull', '--ff-only', 'origin', self.backport_branch])
|
||||
self._run(git_prefix + ['reset', '--soft', self._run(git_prefix + ['merge-base', 'origin/' + self.target_branch, self.backport_branch])])
|
||||
self._run(git_prefix + ['commit', '-a', '--allow-empty', '-m', pr_title])
|
||||
self._run(git_prefix + ['push', '-f', 'origin', '{branch}:{branch}'.format(branch=self.backport_branch)])
|
||||
self._run(git_prefix + ["checkout", "-f", self.backport_branch])
|
||||
self._run(git_prefix + ["pull", "--ff-only", "origin", self.backport_branch])
|
||||
self._run(
|
||||
git_prefix
|
||||
+ [
|
||||
"reset",
|
||||
"--soft",
|
||||
self._run(
|
||||
git_prefix
|
||||
+ [
|
||||
"merge-base",
|
||||
"origin/" + self.target_branch,
|
||||
self.backport_branch,
|
||||
]
|
||||
),
|
||||
]
|
||||
)
|
||||
self._run(git_prefix + ["commit", "-a", "--allow-empty", "-m", pr_title])
|
||||
self._run(
|
||||
git_prefix
|
||||
+ [
|
||||
"push",
|
||||
"-f",
|
||||
"origin",
|
||||
"{branch}:{branch}".format(branch=self.backport_branch),
|
||||
]
|
||||
)
|
||||
|
||||
pr = self._gh.create_pull_request(source=self.backport_branch, target=self.target_branch, title=pr_title,
|
||||
description='Original pull-request #{}\nCherry-pick pull-request #{}\n\n{}'.format(self._pr['number'], cherrypick_pr['number'], DESCRIPTION))
|
||||
pr = self._gh.create_pull_request(
|
||||
source=self.backport_branch,
|
||||
target=self.target_branch,
|
||||
title=pr_title,
|
||||
description="Original pull-request #{}\nCherry-pick pull-request #{}\n\n{}".format(
|
||||
self._pr["number"], cherrypick_pr["number"], DESCRIPTION
|
||||
),
|
||||
)
|
||||
|
||||
# FIXME: use `team` to leave a single eligible assignee.
|
||||
self._gh.add_assignee(pr, self._pr['author'])
|
||||
self._gh.add_assignee(pr, self._pr['mergedBy'])
|
||||
self._gh.add_assignee(pr, self._pr["author"])
|
||||
self._gh.add_assignee(pr, self._pr["mergedBy"])
|
||||
|
||||
self._gh.set_label(pr, "pr-backport")
|
||||
|
||||
@ -142,23 +221,43 @@ class CherryPick:
|
||||
if not pr1:
|
||||
if not dry_run:
|
||||
pr1 = self.createCherryPickPullRequest(repo_path)
|
||||
logging.debug('Created PR with cherry-pick of %s to %s: %s', self._pr['number'], self.target_branch, pr1['url'])
|
||||
logging.debug(
|
||||
"Created PR with cherry-pick of %s to %s: %s",
|
||||
self._pr["number"],
|
||||
self.target_branch,
|
||||
pr1["url"],
|
||||
)
|
||||
else:
|
||||
return CherryPick.Status.NOT_INITIATED
|
||||
else:
|
||||
logging.debug('Found PR with cherry-pick of %s to %s: %s', self._pr['number'], self.target_branch, pr1['url'])
|
||||
logging.debug(
|
||||
"Found PR with cherry-pick of %s to %s: %s",
|
||||
self._pr["number"],
|
||||
self.target_branch,
|
||||
pr1["url"],
|
||||
)
|
||||
|
||||
if not pr1['merged'] and pr1['mergeable'] == 'MERGEABLE' and not pr1['closed']:
|
||||
if not pr1["merged"] and pr1["mergeable"] == "MERGEABLE" and not pr1["closed"]:
|
||||
if not dry_run:
|
||||
pr1 = self.mergeCherryPickPullRequest(pr1)
|
||||
logging.debug('Merged PR with cherry-pick of %s to %s: %s', self._pr['number'], self.target_branch, pr1['url'])
|
||||
logging.debug(
|
||||
"Merged PR with cherry-pick of %s to %s: %s",
|
||||
self._pr["number"],
|
||||
self.target_branch,
|
||||
pr1["url"],
|
||||
)
|
||||
|
||||
if not pr1['merged']:
|
||||
logging.debug('Waiting for PR with cherry-pick of %s to %s: %s', self._pr['number'], self.target_branch, pr1['url'])
|
||||
if not pr1["merged"]:
|
||||
logging.debug(
|
||||
"Waiting for PR with cherry-pick of %s to %s: %s",
|
||||
self._pr["number"],
|
||||
self.target_branch,
|
||||
pr1["url"],
|
||||
)
|
||||
|
||||
if pr1['closed']:
|
||||
if pr1["closed"]:
|
||||
return CherryPick.Status.DISCARDED
|
||||
elif pr1['mergeable'] == 'CONFLICTING':
|
||||
elif pr1["mergeable"] == "CONFLICTING":
|
||||
return CherryPick.Status.FIRST_CONFLICTS
|
||||
else:
|
||||
return CherryPick.Status.FIRST_MERGEABLE
|
||||
@ -167,31 +266,58 @@ class CherryPick:
|
||||
if not pr2:
|
||||
if not dry_run:
|
||||
pr2 = self.createBackportPullRequest(pr1, repo_path)
|
||||
logging.debug('Created PR with backport of %s to %s: %s', self._pr['number'], self.target_branch, pr2['url'])
|
||||
logging.debug(
|
||||
"Created PR with backport of %s to %s: %s",
|
||||
self._pr["number"],
|
||||
self.target_branch,
|
||||
pr2["url"],
|
||||
)
|
||||
else:
|
||||
return CherryPick.Status.FIRST_MERGEABLE
|
||||
else:
|
||||
logging.debug('Found PR with backport of %s to %s: %s', self._pr['number'], self.target_branch, pr2['url'])
|
||||
logging.debug(
|
||||
"Found PR with backport of %s to %s: %s",
|
||||
self._pr["number"],
|
||||
self.target_branch,
|
||||
pr2["url"],
|
||||
)
|
||||
|
||||
if pr2['merged']:
|
||||
if pr2["merged"]:
|
||||
return CherryPick.Status.MERGED
|
||||
elif pr2['closed']:
|
||||
elif pr2["closed"]:
|
||||
return CherryPick.Status.DISCARDED
|
||||
elif pr2['mergeable'] == 'CONFLICTING':
|
||||
elif pr2["mergeable"] == "CONFLICTING":
|
||||
return CherryPick.Status.SECOND_CONFLICTS
|
||||
else:
|
||||
return CherryPick.Status.SECOND_MERGEABLE
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(format='%(message)s', stream=sys.stdout, level=logging.DEBUG)
|
||||
logging.basicConfig(format="%(message)s", stream=sys.stdout, level=logging.DEBUG)
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--token', '-t', type=str, required=True, help='token for Github access')
|
||||
parser.add_argument('--pr', type=str, required=True, help='PR# to cherry-pick')
|
||||
parser.add_argument('--branch', '-b', type=str, required=True, help='target branch name for cherry-pick')
|
||||
parser.add_argument('--repo', '-r', type=str, required=True, help='path to full repository', metavar='PATH')
|
||||
parser.add_argument(
|
||||
"--token", "-t", type=str, required=True, help="token for Github access"
|
||||
)
|
||||
parser.add_argument("--pr", type=str, required=True, help="PR# to cherry-pick")
|
||||
parser.add_argument(
|
||||
"--branch",
|
||||
"-b",
|
||||
type=str,
|
||||
required=True,
|
||||
help="target branch name for cherry-pick",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--repo",
|
||||
"-r",
|
||||
type=str,
|
||||
required=True,
|
||||
help="path to full repository",
|
||||
metavar="PATH",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
cp = CherryPick(args.token, 'ClickHouse', 'ClickHouse', 'core', args.pr, args.branch)
|
||||
cp = CherryPick(
|
||||
args.token, "ClickHouse", "ClickHouse", "core", args.pr, args.branch
|
||||
)
|
||||
cp.execute(args.repo)
|
||||
|
@ -20,13 +20,14 @@ class RepositoryBase:
|
||||
return -1
|
||||
else:
|
||||
return 1
|
||||
|
||||
self.comparator = functools.cmp_to_key(cmp)
|
||||
|
||||
def get_head_commit(self):
|
||||
return self._repo.commit(self._default)
|
||||
|
||||
def iterate(self, begin, end):
|
||||
rev_range = '{}...{}'.format(begin, end)
|
||||
rev_range = "{}...{}".format(begin, end)
|
||||
for commit in self._repo.iter_commits(rev_range, first_parent=True):
|
||||
yield commit
|
||||
|
||||
@ -39,27 +40,35 @@ class Repository(RepositoryBase):
|
||||
self._default = self._remote.refs[default_branch_name]
|
||||
|
||||
def get_release_branches(self):
|
||||
'''
|
||||
"""
|
||||
Returns sorted list of tuples:
|
||||
* remote branch (git.refs.remote.RemoteReference),
|
||||
* base commit (git.Commit),
|
||||
* head (git.Commit)).
|
||||
List is sorted by commits in ascending order.
|
||||
'''
|
||||
"""
|
||||
release_branches = []
|
||||
|
||||
RE_RELEASE_BRANCH_REF = re.compile(r'^refs/remotes/.+/\d+\.\d+$')
|
||||
RE_RELEASE_BRANCH_REF = re.compile(r"^refs/remotes/.+/\d+\.\d+$")
|
||||
|
||||
for branch in [r for r in self._remote.refs if RE_RELEASE_BRANCH_REF.match(r.path)]:
|
||||
for branch in [
|
||||
r for r in self._remote.refs if RE_RELEASE_BRANCH_REF.match(r.path)
|
||||
]:
|
||||
base = self._repo.merge_base(self._default, self._repo.commit(branch))
|
||||
if not base:
|
||||
logging.info('Branch %s is not based on branch %s. Ignoring.', branch.path, self._default)
|
||||
logging.info(
|
||||
"Branch %s is not based on branch %s. Ignoring.",
|
||||
branch.path,
|
||||
self._default,
|
||||
)
|
||||
elif len(base) > 1:
|
||||
logging.info('Branch %s has more than one base commit. Ignoring.', branch.path)
|
||||
logging.info(
|
||||
"Branch %s has more than one base commit. Ignoring.", branch.path
|
||||
)
|
||||
else:
|
||||
release_branches.append((os.path.basename(branch.name), base[0]))
|
||||
|
||||
return sorted(release_branches, key=lambda x : self.comparator(x[1]))
|
||||
return sorted(release_branches, key=lambda x: self.comparator(x[1]))
|
||||
|
||||
|
||||
class BareRepository(RepositoryBase):
|
||||
@ -68,24 +77,32 @@ class BareRepository(RepositoryBase):
|
||||
self._default = self._repo.branches[default_branch_name]
|
||||
|
||||
def get_release_branches(self):
|
||||
'''
|
||||
"""
|
||||
Returns sorted list of tuples:
|
||||
* branch (git.refs.head?),
|
||||
* base commit (git.Commit),
|
||||
* head (git.Commit)).
|
||||
List is sorted by commits in ascending order.
|
||||
'''
|
||||
"""
|
||||
release_branches = []
|
||||
|
||||
RE_RELEASE_BRANCH_REF = re.compile(r'^refs/heads/\d+\.\d+$')
|
||||
RE_RELEASE_BRANCH_REF = re.compile(r"^refs/heads/\d+\.\d+$")
|
||||
|
||||
for branch in [r for r in self._repo.branches if RE_RELEASE_BRANCH_REF.match(r.path)]:
|
||||
for branch in [
|
||||
r for r in self._repo.branches if RE_RELEASE_BRANCH_REF.match(r.path)
|
||||
]:
|
||||
base = self._repo.merge_base(self._default, self._repo.commit(branch))
|
||||
if not base:
|
||||
logging.info('Branch %s is not based on branch %s. Ignoring.', branch.path, self._default)
|
||||
logging.info(
|
||||
"Branch %s is not based on branch %s. Ignoring.",
|
||||
branch.path,
|
||||
self._default,
|
||||
)
|
||||
elif len(base) > 1:
|
||||
logging.info('Branch %s has more than one base commit. Ignoring.', branch.path)
|
||||
logging.info(
|
||||
"Branch %s has more than one base commit. Ignoring.", branch.path
|
||||
)
|
||||
else:
|
||||
release_branches.append((os.path.basename(branch.name), base[0]))
|
||||
|
||||
return sorted(release_branches, key=lambda x : self.comparator(x[1]))
|
||||
return sorted(release_branches, key=lambda x: self.comparator(x[1]))
|
||||
|
@ -1,19 +1,20 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
class Description:
|
||||
'''Parsed description representation
|
||||
'''
|
||||
"""Parsed description representation"""
|
||||
|
||||
MAP_CATEGORY_TO_LABEL = {
|
||||
'New Feature': 'pr-feature',
|
||||
'Bug Fix': 'pr-bugfix',
|
||||
'Improvement': 'pr-improvement',
|
||||
'Performance Improvement': 'pr-performance',
|
||||
"New Feature": "pr-feature",
|
||||
"Bug Fix": "pr-bugfix",
|
||||
"Improvement": "pr-improvement",
|
||||
"Performance Improvement": "pr-performance",
|
||||
# 'Backward Incompatible Change': doesn't match anything
|
||||
'Build/Testing/Packaging Improvement': 'pr-build',
|
||||
'Non-significant (changelog entry is not needed)': 'pr-non-significant',
|
||||
'Non-significant (changelog entry is not required)': 'pr-non-significant',
|
||||
'Non-significant': 'pr-non-significant',
|
||||
'Documentation (changelog entry is not required)': 'pr-documentation',
|
||||
"Build/Testing/Packaging Improvement": "pr-build",
|
||||
"Non-significant (changelog entry is not needed)": "pr-non-significant",
|
||||
"Non-significant (changelog entry is not required)": "pr-non-significant",
|
||||
"Non-significant": "pr-non-significant",
|
||||
"Documentation (changelog entry is not required)": "pr-documentation",
|
||||
# 'Other': doesn't match anything
|
||||
}
|
||||
|
||||
@ -21,7 +22,7 @@ class Description:
|
||||
self.label_name = str()
|
||||
self.legal = False
|
||||
|
||||
self._parse(pull_request['bodyText'])
|
||||
self._parse(pull_request["bodyText"])
|
||||
|
||||
def _parse(self, text):
|
||||
lines = text.splitlines()
|
||||
@ -38,14 +39,17 @@ class Description:
|
||||
category = stripped
|
||||
next_category = False
|
||||
|
||||
if stripped == 'I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla/?lang=en':
|
||||
if (
|
||||
stripped
|
||||
== "I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla/?lang=en"
|
||||
):
|
||||
self.legal = True
|
||||
|
||||
category_headers = (
|
||||
'Category (leave one):',
|
||||
'Changelog category (leave one):',
|
||||
'Changelog category:',
|
||||
'Category:'
|
||||
"Category (leave one):",
|
||||
"Changelog category (leave one):",
|
||||
"Changelog category:",
|
||||
"Category:",
|
||||
)
|
||||
|
||||
if stripped in category_headers:
|
||||
@ -55,6 +59,6 @@ class Description:
|
||||
self.label_name = Description.MAP_CATEGORY_TO_LABEL[category]
|
||||
else:
|
||||
if not category:
|
||||
print('Cannot find category in pr description')
|
||||
print("Cannot find category in pr description")
|
||||
else:
|
||||
print(('Unknown category: ' + category))
|
||||
print(("Unknown category: " + category))
|
||||
|
@ -5,11 +5,11 @@ import time
|
||||
|
||||
|
||||
class Query:
|
||||
'''
|
||||
"""
|
||||
Implements queries to the Github API using GraphQL
|
||||
'''
|
||||
"""
|
||||
|
||||
_PULL_REQUEST = '''
|
||||
_PULL_REQUEST = """
|
||||
author {{
|
||||
... on User {{
|
||||
id
|
||||
@ -47,7 +47,7 @@ class Query:
|
||||
number
|
||||
title
|
||||
url
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, token, owner, name, team, max_page_size=100, min_page_size=10):
|
||||
self._PULL_REQUEST = Query._PULL_REQUEST.format(min_page_size=min_page_size)
|
||||
@ -63,14 +63,14 @@ class Query:
|
||||
self.api_costs = {}
|
||||
|
||||
repo = self.get_repository()
|
||||
self._id = repo['id']
|
||||
self.ssh_url = repo['sshUrl']
|
||||
self.default_branch = repo['defaultBranchRef']['name']
|
||||
self._id = repo["id"]
|
||||
self.ssh_url = repo["sshUrl"]
|
||||
self.default_branch = repo["defaultBranchRef"]["name"]
|
||||
|
||||
self.members = set(self.get_members())
|
||||
|
||||
def get_repository(self):
|
||||
_QUERY = '''
|
||||
_QUERY = """
|
||||
repository(owner: "{owner}" name: "{name}") {{
|
||||
defaultBranchRef {{
|
||||
name
|
||||
@ -78,19 +78,19 @@ class Query:
|
||||
id
|
||||
sshUrl
|
||||
}}
|
||||
'''
|
||||
"""
|
||||
|
||||
query = _QUERY.format(owner=self._owner, name=self._name)
|
||||
return self._run(query)['repository']
|
||||
return self._run(query)["repository"]
|
||||
|
||||
def get_members(self):
|
||||
'''Get all team members for organization
|
||||
"""Get all team members for organization
|
||||
|
||||
Returns:
|
||||
members: a map of members' logins to ids
|
||||
'''
|
||||
"""
|
||||
|
||||
_QUERY = '''
|
||||
_QUERY = """
|
||||
organization(login: "{organization}") {{
|
||||
team(slug: "{team}") {{
|
||||
members(first: {max_page_size} {next}) {{
|
||||
@ -105,43 +105,54 @@ class Query:
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
'''
|
||||
"""
|
||||
|
||||
members = {}
|
||||
not_end = True
|
||||
query = _QUERY.format(organization=self._owner, team=self._team,
|
||||
max_page_size=self._max_page_size,
|
||||
next='')
|
||||
query = _QUERY.format(
|
||||
organization=self._owner,
|
||||
team=self._team,
|
||||
max_page_size=self._max_page_size,
|
||||
next="",
|
||||
)
|
||||
|
||||
while not_end:
|
||||
result = self._run(query)['organization']['team']
|
||||
result = self._run(query)["organization"]["team"]
|
||||
if result is None:
|
||||
break
|
||||
result = result['members']
|
||||
not_end = result['pageInfo']['hasNextPage']
|
||||
query = _QUERY.format(organization=self._owner, team=self._team,
|
||||
max_page_size=self._max_page_size,
|
||||
next='after: "{}"'.format(result["pageInfo"]["endCursor"]))
|
||||
result = result["members"]
|
||||
not_end = result["pageInfo"]["hasNextPage"]
|
||||
query = _QUERY.format(
|
||||
organization=self._owner,
|
||||
team=self._team,
|
||||
max_page_size=self._max_page_size,
|
||||
next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
|
||||
)
|
||||
|
||||
members += dict([(node['login'], node['id']) for node in result['nodes']])
|
||||
members += dict([(node["login"], node["id"]) for node in result["nodes"]])
|
||||
|
||||
return members
|
||||
|
||||
def get_pull_request(self, number):
|
||||
_QUERY = '''
|
||||
_QUERY = """
|
||||
repository(owner: "{owner}" name: "{name}") {{
|
||||
pullRequest(number: {number}) {{
|
||||
{pull_request_data}
|
||||
}}
|
||||
}}
|
||||
'''
|
||||
"""
|
||||
|
||||
query = _QUERY.format(owner=self._owner, name=self._name, number=number,
|
||||
pull_request_data=self._PULL_REQUEST, min_page_size=self._min_page_size)
|
||||
return self._run(query)['repository']['pullRequest']
|
||||
query = _QUERY.format(
|
||||
owner=self._owner,
|
||||
name=self._name,
|
||||
number=number,
|
||||
pull_request_data=self._PULL_REQUEST,
|
||||
min_page_size=self._min_page_size,
|
||||
)
|
||||
return self._run(query)["repository"]["pullRequest"]
|
||||
|
||||
def find_pull_request(self, base, head):
|
||||
_QUERY = '''
|
||||
_QUERY = """
|
||||
repository(owner: "{owner}" name: "{name}") {{
|
||||
pullRequests(first: {min_page_size} baseRefName: "{base}" headRefName: "{head}") {{
|
||||
nodes {{
|
||||
@ -150,21 +161,27 @@ class Query:
|
||||
totalCount
|
||||
}}
|
||||
}}
|
||||
'''
|
||||
"""
|
||||
|
||||
query = _QUERY.format(owner=self._owner, name=self._name, base=base, head=head,
|
||||
pull_request_data=self._PULL_REQUEST, min_page_size=self._min_page_size)
|
||||
result = self._run(query)['repository']['pullRequests']
|
||||
if result['totalCount'] > 0:
|
||||
return result['nodes'][0]
|
||||
query = _QUERY.format(
|
||||
owner=self._owner,
|
||||
name=self._name,
|
||||
base=base,
|
||||
head=head,
|
||||
pull_request_data=self._PULL_REQUEST,
|
||||
min_page_size=self._min_page_size,
|
||||
)
|
||||
result = self._run(query)["repository"]["pullRequests"]
|
||||
if result["totalCount"] > 0:
|
||||
return result["nodes"][0]
|
||||
else:
|
||||
return {}
|
||||
|
||||
def find_pull_requests(self, label_name):
|
||||
'''
|
||||
"""
|
||||
Get all pull-requests filtered by label name
|
||||
'''
|
||||
_QUERY = '''
|
||||
"""
|
||||
_QUERY = """
|
||||
repository(owner: "{owner}" name: "{name}") {{
|
||||
pullRequests(first: {min_page_size} labels: "{label_name}" states: OPEN) {{
|
||||
nodes {{
|
||||
@ -172,18 +189,23 @@ class Query:
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
'''
|
||||
"""
|
||||
|
||||
query = _QUERY.format(owner=self._owner, name=self._name, label_name=label_name,
|
||||
pull_request_data=self._PULL_REQUEST, min_page_size=self._min_page_size)
|
||||
return self._run(query)['repository']['pullRequests']['nodes']
|
||||
query = _QUERY.format(
|
||||
owner=self._owner,
|
||||
name=self._name,
|
||||
label_name=label_name,
|
||||
pull_request_data=self._PULL_REQUEST,
|
||||
min_page_size=self._min_page_size,
|
||||
)
|
||||
return self._run(query)["repository"]["pullRequests"]["nodes"]
|
||||
|
||||
def get_pull_requests(self, before_commit):
|
||||
'''
|
||||
"""
|
||||
Get all merged pull-requests from the HEAD of default branch to the last commit (excluding)
|
||||
'''
|
||||
"""
|
||||
|
||||
_QUERY = '''
|
||||
_QUERY = """
|
||||
repository(owner: "{owner}" name: "{name}") {{
|
||||
defaultBranchRef {{
|
||||
target {{
|
||||
@ -221,44 +243,60 @@ class Query:
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
'''
|
||||
"""
|
||||
|
||||
pull_requests = []
|
||||
not_end = True
|
||||
query = _QUERY.format(owner=self._owner, name=self._name,
|
||||
max_page_size=self._max_page_size,
|
||||
min_page_size=self._min_page_size,
|
||||
pull_request_data=self._PULL_REQUEST,
|
||||
next='')
|
||||
query = _QUERY.format(
|
||||
owner=self._owner,
|
||||
name=self._name,
|
||||
max_page_size=self._max_page_size,
|
||||
min_page_size=self._min_page_size,
|
||||
pull_request_data=self._PULL_REQUEST,
|
||||
next="",
|
||||
)
|
||||
|
||||
while not_end:
|
||||
result = self._run(query)['repository']['defaultBranchRef']['target']['history']
|
||||
not_end = result['pageInfo']['hasNextPage']
|
||||
query = _QUERY.format(owner=self._owner, name=self._name,
|
||||
max_page_size=self._max_page_size,
|
||||
min_page_size=self._min_page_size,
|
||||
pull_request_data=self._PULL_REQUEST,
|
||||
next='after: "{}"'.format(result["pageInfo"]["endCursor"]))
|
||||
result = self._run(query)["repository"]["defaultBranchRef"]["target"][
|
||||
"history"
|
||||
]
|
||||
not_end = result["pageInfo"]["hasNextPage"]
|
||||
query = _QUERY.format(
|
||||
owner=self._owner,
|
||||
name=self._name,
|
||||
max_page_size=self._max_page_size,
|
||||
min_page_size=self._min_page_size,
|
||||
pull_request_data=self._PULL_REQUEST,
|
||||
next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
|
||||
)
|
||||
|
||||
for commit in result['nodes']:
|
||||
for commit in result["nodes"]:
|
||||
# FIXME: maybe include `before_commit`?
|
||||
if str(commit['oid']) == str(before_commit):
|
||||
if str(commit["oid"]) == str(before_commit):
|
||||
not_end = False
|
||||
break
|
||||
|
||||
# TODO: fetch all pull-requests that were merged in a single commit.
|
||||
assert commit['associatedPullRequests']['totalCount'] <= self._min_page_size
|
||||
assert (
|
||||
commit["associatedPullRequests"]["totalCount"]
|
||||
<= self._min_page_size
|
||||
)
|
||||
|
||||
for pull_request in commit['associatedPullRequests']['nodes']:
|
||||
if(pull_request['baseRepository']['nameWithOwner'] == '{}/{}'.format(self._owner, self._name) and
|
||||
pull_request['baseRefName'] == self.default_branch and
|
||||
pull_request['mergeCommit']['oid'] == commit['oid']):
|
||||
for pull_request in commit["associatedPullRequests"]["nodes"]:
|
||||
if (
|
||||
pull_request["baseRepository"]["nameWithOwner"]
|
||||
== "{}/{}".format(self._owner, self._name)
|
||||
and pull_request["baseRefName"] == self.default_branch
|
||||
and pull_request["mergeCommit"]["oid"] == commit["oid"]
|
||||
):
|
||||
pull_requests.append(pull_request)
|
||||
|
||||
return pull_requests
|
||||
|
||||
def create_pull_request(self, source, target, title, description="", draft=False, can_modify=True):
|
||||
_QUERY = '''
|
||||
def create_pull_request(
|
||||
self, source, target, title, description="", draft=False, can_modify=True
|
||||
):
|
||||
_QUERY = """
|
||||
createPullRequest(input: {{
|
||||
baseRefName: "{target}",
|
||||
headRefName: "{source}",
|
||||
@ -272,15 +310,22 @@ class Query:
|
||||
{pull_request_data}
|
||||
}}
|
||||
}}
|
||||
'''
|
||||
"""
|
||||
|
||||
query = _QUERY.format(target=target, source=source, id=self._id, title=title, body=description,
|
||||
draft="true" if draft else "false", modify="true" if can_modify else "false",
|
||||
pull_request_data=self._PULL_REQUEST)
|
||||
return self._run(query, is_mutation=True)['createPullRequest']['pullRequest']
|
||||
query = _QUERY.format(
|
||||
target=target,
|
||||
source=source,
|
||||
id=self._id,
|
||||
title=title,
|
||||
body=description,
|
||||
draft="true" if draft else "false",
|
||||
modify="true" if can_modify else "false",
|
||||
pull_request_data=self._PULL_REQUEST,
|
||||
)
|
||||
return self._run(query, is_mutation=True)["createPullRequest"]["pullRequest"]
|
||||
|
||||
def merge_pull_request(self, id):
|
||||
_QUERY = '''
|
||||
_QUERY = """
|
||||
mergePullRequest(input: {{
|
||||
pullRequestId: "{id}"
|
||||
}}) {{
|
||||
@ -288,35 +333,35 @@ class Query:
|
||||
{pull_request_data}
|
||||
}}
|
||||
}}
|
||||
'''
|
||||
"""
|
||||
|
||||
query = _QUERY.format(id=id, pull_request_data=self._PULL_REQUEST)
|
||||
return self._run(query, is_mutation=True)['mergePullRequest']['pullRequest']
|
||||
return self._run(query, is_mutation=True)["mergePullRequest"]["pullRequest"]
|
||||
|
||||
# FIXME: figure out how to add more assignees at once
|
||||
def add_assignee(self, pr, assignee):
|
||||
_QUERY = '''
|
||||
_QUERY = """
|
||||
addAssigneesToAssignable(input: {{
|
||||
assignableId: "{id1}",
|
||||
assigneeIds: "{id2}"
|
||||
}}) {{
|
||||
clientMutationId
|
||||
}}
|
||||
'''
|
||||
"""
|
||||
|
||||
query = _QUERY.format(id1=pr['id'], id2=assignee['id'])
|
||||
query = _QUERY.format(id1=pr["id"], id2=assignee["id"])
|
||||
self._run(query, is_mutation=True)
|
||||
|
||||
def set_label(self, pull_request, label_name):
|
||||
'''
|
||||
"""
|
||||
Set label by name to the pull request
|
||||
|
||||
Args:
|
||||
pull_request: JSON object returned by `get_pull_requests()`
|
||||
label_name (string): label name
|
||||
'''
|
||||
"""
|
||||
|
||||
_GET_LABEL = '''
|
||||
_GET_LABEL = """
|
||||
repository(owner: "{owner}" name: "{name}") {{
|
||||
labels(first: {max_page_size} {next} query: "{label_name}") {{
|
||||
pageInfo {{
|
||||
@ -330,36 +375,44 @@ class Query:
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
'''
|
||||
"""
|
||||
|
||||
_SET_LABEL = '''
|
||||
_SET_LABEL = """
|
||||
addLabelsToLabelable(input: {{
|
||||
labelableId: "{pr_id}",
|
||||
labelIds: "{label_id}"
|
||||
}}) {{
|
||||
clientMutationId
|
||||
}}
|
||||
'''
|
||||
"""
|
||||
|
||||
labels = []
|
||||
not_end = True
|
||||
query = _GET_LABEL.format(owner=self._owner, name=self._name, label_name=label_name,
|
||||
max_page_size=self._max_page_size,
|
||||
next='')
|
||||
query = _GET_LABEL.format(
|
||||
owner=self._owner,
|
||||
name=self._name,
|
||||
label_name=label_name,
|
||||
max_page_size=self._max_page_size,
|
||||
next="",
|
||||
)
|
||||
|
||||
while not_end:
|
||||
result = self._run(query)['repository']['labels']
|
||||
not_end = result['pageInfo']['hasNextPage']
|
||||
query = _GET_LABEL.format(owner=self._owner, name=self._name, label_name=label_name,
|
||||
max_page_size=self._max_page_size,
|
||||
next='after: "{}"'.format(result["pageInfo"]["endCursor"]))
|
||||
result = self._run(query)["repository"]["labels"]
|
||||
not_end = result["pageInfo"]["hasNextPage"]
|
||||
query = _GET_LABEL.format(
|
||||
owner=self._owner,
|
||||
name=self._name,
|
||||
label_name=label_name,
|
||||
max_page_size=self._max_page_size,
|
||||
next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
|
||||
)
|
||||
|
||||
labels += [label for label in result['nodes']]
|
||||
labels += [label for label in result["nodes"]]
|
||||
|
||||
if not labels:
|
||||
return
|
||||
|
||||
query = _SET_LABEL.format(pr_id=pull_request['id'], label_id=labels[0]['id'])
|
||||
query = _SET_LABEL.format(pr_id=pull_request["id"], label_id=labels[0]["id"])
|
||||
self._run(query, is_mutation=True)
|
||||
|
||||
def _run(self, query, is_mutation=False):
|
||||
@ -385,19 +438,21 @@ class Query:
|
||||
status_forcelist=status_forcelist,
|
||||
)
|
||||
adapter = HTTPAdapter(max_retries=retry)
|
||||
session.mount('http://', adapter)
|
||||
session.mount('https://', adapter)
|
||||
session.mount("http://", adapter)
|
||||
session.mount("https://", adapter)
|
||||
return session
|
||||
|
||||
headers = {'Authorization': 'bearer {}'.format(self._token)}
|
||||
headers = {"Authorization": "bearer {}".format(self._token)}
|
||||
if is_mutation:
|
||||
query = '''
|
||||
query = """
|
||||
mutation {{
|
||||
{query}
|
||||
}}
|
||||
'''.format(query=query)
|
||||
""".format(
|
||||
query=query
|
||||
)
|
||||
else:
|
||||
query = '''
|
||||
query = """
|
||||
query {{
|
||||
{query}
|
||||
rateLimit {{
|
||||
@ -405,23 +460,38 @@ class Query:
|
||||
remaining
|
||||
}}
|
||||
}}
|
||||
'''.format(query=query)
|
||||
""".format(
|
||||
query=query
|
||||
)
|
||||
|
||||
while True:
|
||||
request = requests_retry_session().post('https://api.github.com/graphql', json={'query': query}, headers=headers)
|
||||
request = requests_retry_session().post(
|
||||
"https://api.github.com/graphql", json={"query": query}, headers=headers
|
||||
)
|
||||
if request.status_code == 200:
|
||||
result = request.json()
|
||||
if 'errors' in result:
|
||||
raise Exception('Errors occurred: {}\nOriginal query: {}'.format(result["errors"], query))
|
||||
if "errors" in result:
|
||||
raise Exception(
|
||||
"Errors occurred: {}\nOriginal query: {}".format(
|
||||
result["errors"], query
|
||||
)
|
||||
)
|
||||
|
||||
if not is_mutation:
|
||||
import inspect
|
||||
|
||||
caller = inspect.getouterframes(inspect.currentframe(), 2)[1][3]
|
||||
if caller not in list(self.api_costs.keys()):
|
||||
self.api_costs[caller] = 0
|
||||
self.api_costs[caller] += result['data']['rateLimit']['cost']
|
||||
self.api_costs[caller] += result["data"]["rateLimit"]["cost"]
|
||||
|
||||
return result['data']
|
||||
return result["data"]
|
||||
else:
|
||||
import json
|
||||
raise Exception('Query failed with code {code}:\n{json}'.format(code=request.status_code, json=json.dumps(request.json(), indent=4)))
|
||||
|
||||
raise Exception(
|
||||
"Query failed with code {code}:\n{json}".format(
|
||||
code=request.status_code,
|
||||
json=json.dumps(request.json(), indent=4),
|
||||
)
|
||||
)
|
||||
|
@ -14,7 +14,7 @@ CI_CONFIG = {
|
||||
"package_type": "deb",
|
||||
"bundled": "bundled",
|
||||
"splitted": "unsplitted",
|
||||
"alien_pkgs": True,
|
||||
"additional_pkgs": True,
|
||||
"tidy": "disable",
|
||||
"with_coverage": False,
|
||||
},
|
||||
@ -45,7 +45,7 @@ CI_CONFIG = {
|
||||
"package_type": "deb",
|
||||
"bundled": "bundled",
|
||||
"splitted": "unsplitted",
|
||||
"alien_pkgs": True,
|
||||
"additional_pkgs": True,
|
||||
"tidy": "disable",
|
||||
"with_coverage": False,
|
||||
},
|
||||
@ -349,6 +349,9 @@ CI_CONFIG = {
|
||||
"Stateless tests flaky check (address, actions)": {
|
||||
"required_build": "package_asan",
|
||||
},
|
||||
"Stateless tests bugfix validate check (address, actions)": {
|
||||
"required_build": "package_asan",
|
||||
},
|
||||
"ClickHouse Keeper Jepsen (actions)": {
|
||||
"required_build": "binary_release",
|
||||
},
|
||||
|
@ -6,6 +6,7 @@ import json
|
||||
import requests # type: ignore
|
||||
from get_robot_token import get_parameter_from_ssm
|
||||
|
||||
|
||||
class ClickHouseHelper:
|
||||
def __init__(self, url=None, user=None, password=None):
|
||||
self.url2 = None
|
||||
@ -15,27 +16,35 @@ class ClickHouseHelper:
|
||||
url = get_parameter_from_ssm("clickhouse-test-stat-url")
|
||||
self.url2 = get_parameter_from_ssm("clickhouse-test-stat-url2")
|
||||
self.auth2 = {
|
||||
'X-ClickHouse-User': get_parameter_from_ssm("clickhouse-test-stat-login2"),
|
||||
'X-ClickHouse-Key': ''
|
||||
"X-ClickHouse-User": get_parameter_from_ssm(
|
||||
"clickhouse-test-stat-login2"
|
||||
),
|
||||
"X-ClickHouse-Key": "",
|
||||
}
|
||||
|
||||
self.url = url
|
||||
self.auth = {
|
||||
'X-ClickHouse-User': user if user is not None else get_parameter_from_ssm("clickhouse-test-stat-login"),
|
||||
'X-ClickHouse-Key': password if password is not None else get_parameter_from_ssm("clickhouse-test-stat-password")
|
||||
"X-ClickHouse-User": user
|
||||
if user is not None
|
||||
else get_parameter_from_ssm("clickhouse-test-stat-login"),
|
||||
"X-ClickHouse-Key": password
|
||||
if password is not None
|
||||
else get_parameter_from_ssm("clickhouse-test-stat-password"),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _insert_json_str_info_impl(url, auth, db, table, json_str):
|
||||
params = {
|
||||
'database': db,
|
||||
'query': 'INSERT INTO {table} FORMAT JSONEachRow'.format(table=table),
|
||||
'date_time_input_format': 'best_effort',
|
||||
'send_logs_level': 'warning',
|
||||
"database": db,
|
||||
"query": "INSERT INTO {table} FORMAT JSONEachRow".format(table=table),
|
||||
"date_time_input_format": "best_effort",
|
||||
"send_logs_level": "warning",
|
||||
}
|
||||
|
||||
for i in range(5):
|
||||
response = requests.post(url, params=params, data=json_str, headers=auth, verify=False)
|
||||
response = requests.post(
|
||||
url, params=params, data=json_str, headers=auth, verify=False
|
||||
)
|
||||
|
||||
logging.info("Response content '%s'", response.content)
|
||||
|
||||
@ -43,16 +52,25 @@ class ClickHouseHelper:
|
||||
break
|
||||
|
||||
error = (
|
||||
"Cannot insert data into clickhouse at try " + str(i)
|
||||
+ ": HTTP code " + str(response.status_code) + ": '"
|
||||
+ str(response.text) + "'")
|
||||
"Cannot insert data into clickhouse at try "
|
||||
+ str(i)
|
||||
+ ": HTTP code "
|
||||
+ str(response.status_code)
|
||||
+ ": '"
|
||||
+ str(response.text)
|
||||
+ "'"
|
||||
)
|
||||
|
||||
if response.status_code >= 500:
|
||||
# A retriable error
|
||||
time.sleep(1)
|
||||
continue
|
||||
|
||||
logging.info("Request headers '%s', body '%s'", response.request.headers, response.request.body)
|
||||
logging.info(
|
||||
"Request headers '%s', body '%s'",
|
||||
response.request.headers,
|
||||
response.request.body,
|
||||
)
|
||||
|
||||
raise Exception(error)
|
||||
else:
|
||||
@ -72,18 +90,20 @@ class ClickHouseHelper:
|
||||
for event in events:
|
||||
jsons.append(json.dumps(event))
|
||||
|
||||
self._insert_json_str_info(db, table, ','.join(jsons))
|
||||
self._insert_json_str_info(db, table, ",".join(jsons))
|
||||
|
||||
def _select_and_get_json_each_row(self, db, query):
|
||||
params = {
|
||||
'database': db,
|
||||
'query': query,
|
||||
'default_format': 'JSONEachRow',
|
||||
"database": db,
|
||||
"query": query,
|
||||
"default_format": "JSONEachRow",
|
||||
}
|
||||
for i in range(5):
|
||||
response = None
|
||||
try:
|
||||
response = requests.get(self.url, params=params, headers=self.auth, verify=False)
|
||||
response = requests.get(
|
||||
self.url, params=params, headers=self.auth, verify=False
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.text
|
||||
except Exception as ex:
|
||||
@ -97,15 +117,21 @@ class ClickHouseHelper:
|
||||
def select_json_each_row(self, db, query):
|
||||
text = self._select_and_get_json_each_row(db, query)
|
||||
result = []
|
||||
for line in text.split('\n'):
|
||||
for line in text.split("\n"):
|
||||
if line:
|
||||
result.append(json.loads(line))
|
||||
return result
|
||||
|
||||
|
||||
def prepare_tests_results_for_clickhouse(
|
||||
pr_info, test_results,
|
||||
check_status, check_duration, check_start_time,
|
||||
report_url, check_name):
|
||||
pr_info,
|
||||
test_results,
|
||||
check_status,
|
||||
check_duration,
|
||||
check_start_time,
|
||||
report_url,
|
||||
check_name,
|
||||
):
|
||||
|
||||
pull_request_url = "https://github.com/ClickHouse/ClickHouse/commits/master"
|
||||
base_ref = "master"
|
||||
@ -147,13 +173,14 @@ def prepare_tests_results_for_clickhouse(
|
||||
test_time = 0
|
||||
if len(test_result) > 2 and test_result[2]:
|
||||
test_time = test_result[2]
|
||||
current_row['test_duration_ms'] = int(float(test_time) * 1000)
|
||||
current_row['test_name'] = test_name
|
||||
current_row['test_status'] = test_status
|
||||
current_row["test_duration_ms"] = int(float(test_time) * 1000)
|
||||
current_row["test_name"] = test_name
|
||||
current_row["test_status"] = test_status
|
||||
result.append(current_row)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def mark_flaky_tests(clickhouse_helper, check_name, test_results):
|
||||
try:
|
||||
query = """
|
||||
@ -164,14 +191,16 @@ def mark_flaky_tests(clickhouse_helper, check_name, test_results):
|
||||
AND check_name = '{check_name}'
|
||||
AND (test_status = 'FAIL' OR test_status = 'FLAKY')
|
||||
AND pull_request_number = 0
|
||||
""".format(check_name=check_name)
|
||||
""".format(
|
||||
check_name=check_name
|
||||
)
|
||||
|
||||
tests_data = clickhouse_helper.select_json_each_row('gh-data', query)
|
||||
master_failed_tests = {row['test_name'] for row in tests_data}
|
||||
logging.info("Found flaky tests: %s", ', '.join(master_failed_tests))
|
||||
tests_data = clickhouse_helper.select_json_each_row("gh-data", query)
|
||||
master_failed_tests = {row["test_name"] for row in tests_data}
|
||||
logging.info("Found flaky tests: %s", ", ".join(master_failed_tests))
|
||||
|
||||
for test_result in test_results:
|
||||
if test_result[1] == 'FAIL' and test_result[0] in master_failed_tests:
|
||||
test_result[1] = 'FLAKY'
|
||||
if test_result[1] == "FAIL" and test_result[0] in master_failed_tests:
|
||||
test_result[1] = "FLAKY"
|
||||
except Exception as ex:
|
||||
logging.info("Exception happened during flaky tests fetch %s", ex)
|
||||
|
@ -18,13 +18,16 @@ from tee_popen import TeePopen
|
||||
|
||||
NAME = "Woboq Build (actions)"
|
||||
|
||||
|
||||
def get_run_command(repo_path, output_path, image):
|
||||
cmd = "docker run " + \
|
||||
f"--volume={repo_path}:/repo_folder " \
|
||||
f"--volume={output_path}:/test_output " \
|
||||
f"-e 'DATA=https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data' {image}"
|
||||
cmd = (
|
||||
"docker run " + f"--volume={repo_path}:/repo_folder "
|
||||
f"--volume={output_path}:/test_output "
|
||||
f"-e 'DATA=https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data' {image}"
|
||||
)
|
||||
return cmd
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
@ -37,8 +40,8 @@ if __name__ == "__main__":
|
||||
if not os.path.exists(temp_path):
|
||||
os.makedirs(temp_path)
|
||||
|
||||
docker_image = get_image_with_version(IMAGES_PATH, 'clickhouse/codebrowser')
|
||||
s3_helper = S3Helper('https://s3.amazonaws.com')
|
||||
docker_image = get_image_with_version(IMAGES_PATH, "clickhouse/codebrowser")
|
||||
s3_helper = S3Helper("https://s3.amazonaws.com")
|
||||
|
||||
result_path = os.path.join(temp_path, "result_path")
|
||||
if not os.path.exists(result_path):
|
||||
@ -62,14 +65,20 @@ if __name__ == "__main__":
|
||||
report_path = os.path.join(result_path, "html_report")
|
||||
logging.info("Report path %s", report_path)
|
||||
s3_path_prefix = "codebrowser"
|
||||
html_urls = s3_helper.fast_parallel_upload_dir(report_path, s3_path_prefix, 'clickhouse-test-reports')
|
||||
html_urls = s3_helper.fast_parallel_upload_dir(
|
||||
report_path, s3_path_prefix, "clickhouse-test-reports"
|
||||
)
|
||||
|
||||
index_html = '<a href="https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/index.html">HTML report</a>'
|
||||
|
||||
test_results = [(index_html, "Look at the report")]
|
||||
|
||||
report_url = upload_results(s3_helper, 0, os.getenv("GITHUB_SHA"), test_results, [], NAME)
|
||||
report_url = upload_results(
|
||||
s3_helper, 0, os.getenv("GITHUB_SHA"), test_results, [], NAME
|
||||
)
|
||||
|
||||
print(f"::notice ::Report url: {report_url}")
|
||||
|
||||
post_commit_status(gh, os.getenv("GITHUB_SHA"), NAME, "Report built", "success", report_url)
|
||||
post_commit_status(
|
||||
gh, os.getenv("GITHUB_SHA"), NAME, "Report built", "success", report_url
|
||||
)
|
||||
|
@ -1,15 +1,23 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import time
|
||||
import os
|
||||
import csv
|
||||
from env_helper import GITHUB_REPOSITORY
|
||||
from ci_config import CI_CONFIG
|
||||
|
||||
RETRY = 5
|
||||
|
||||
|
||||
def override_status(status, check_name):
|
||||
if CI_CONFIG["tests_config"][check_name].get("force_tests", False):
|
||||
def override_status(status, check_name, invert=False):
|
||||
if CI_CONFIG["tests_config"].get(check_name, {}).get("force_tests", False):
|
||||
return "success"
|
||||
|
||||
if invert:
|
||||
if status == "success":
|
||||
return "error"
|
||||
return "success"
|
||||
|
||||
return status
|
||||
|
||||
|
||||
@ -43,3 +51,11 @@ def post_commit_status(gh, sha, check_name, description, state, report_url):
|
||||
if i == RETRY - 1:
|
||||
raise ex
|
||||
time.sleep(i)
|
||||
|
||||
|
||||
def post_commit_status_to_file(file_path, description, state, report_url):
|
||||
if os.path.exists(file_path):
|
||||
raise Exception(f'File "{file_path}" already exists!')
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerow([state, report_url, description])
|
||||
|
@ -16,34 +16,40 @@ from build_download_helper import download_builds_filter
|
||||
from upload_result_helper import upload_results
|
||||
from docker_pull_helper import get_images_with_versions
|
||||
from commit_status_helper import post_commit_status
|
||||
from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse
|
||||
from clickhouse_helper import (
|
||||
ClickHouseHelper,
|
||||
mark_flaky_tests,
|
||||
prepare_tests_results_for_clickhouse,
|
||||
)
|
||||
from stopwatch import Stopwatch
|
||||
from rerun_helper import RerunHelper
|
||||
|
||||
IMAGE_UBUNTU = "clickhouse/test-old-ubuntu"
|
||||
IMAGE_CENTOS = "clickhouse/test-old-centos"
|
||||
MAX_GLIBC_VERSION = '2.4'
|
||||
MAX_GLIBC_VERSION = "2.4"
|
||||
DOWNLOAD_RETRIES_COUNT = 5
|
||||
CHECK_NAME = "Compatibility check (actions)"
|
||||
|
||||
|
||||
def process_os_check(log_path):
|
||||
name = os.path.basename(log_path)
|
||||
with open(log_path, 'r') as log:
|
||||
line = log.read().split('\n')[0].strip()
|
||||
if line != 'OK':
|
||||
with open(log_path, "r") as log:
|
||||
line = log.read().split("\n")[0].strip()
|
||||
if line != "OK":
|
||||
return (name, "FAIL")
|
||||
else:
|
||||
return (name, "OK")
|
||||
|
||||
|
||||
def process_glibc_check(log_path):
|
||||
bad_lines = []
|
||||
with open(log_path, 'r') as log:
|
||||
with open(log_path, "r") as log:
|
||||
for line in log:
|
||||
if line.strip():
|
||||
columns = line.strip().split(' ')
|
||||
columns = line.strip().split(" ")
|
||||
symbol_with_glibc = columns[-2] # sysconf@GLIBC_2.2.5
|
||||
_, version = symbol_with_glibc.split('@GLIBC_')
|
||||
if version == 'PRIVATE':
|
||||
_, version = symbol_with_glibc.split("@GLIBC_")
|
||||
if version == "PRIVATE":
|
||||
bad_lines.append((symbol_with_glibc, "FAIL"))
|
||||
elif StrictVersion(version) > MAX_GLIBC_VERSION:
|
||||
bad_lines.append((symbol_with_glibc, "FAIL"))
|
||||
@ -51,6 +57,7 @@ def process_glibc_check(log_path):
|
||||
bad_lines.append(("glibc check", "OK"))
|
||||
return bad_lines
|
||||
|
||||
|
||||
def process_result(result_folder, server_log_folder):
|
||||
summary = process_glibc_check(os.path.join(result_folder, "glibc.log"))
|
||||
|
||||
@ -86,16 +93,18 @@ def process_result(result_folder, server_log_folder):
|
||||
return status, description, summary, result_logs
|
||||
|
||||
|
||||
def get_run_commands(build_path, result_folder, server_log_folder, image_centos, image_ubuntu):
|
||||
def get_run_commands(
|
||||
build_path, result_folder, server_log_folder, image_centos, image_ubuntu
|
||||
):
|
||||
return [
|
||||
f"readelf -s {build_path}/usr/bin/clickhouse | grep '@GLIBC_' > {result_folder}/glibc.log",
|
||||
f"readelf -s {build_path}/usr/bin/clickhouse-odbc-bridge | grep '@GLIBC_' >> {result_folder}/glibc.log",
|
||||
f"docker run --network=host --volume={build_path}/usr/bin/clickhouse:/clickhouse " \
|
||||
f"--volume={build_path}/etc/clickhouse-server:/config " \
|
||||
f"--volume={server_log_folder}:/var/log/clickhouse-server {image_ubuntu} > {result_folder}/ubuntu:12.04",
|
||||
f"docker run --network=host --volume={build_path}/usr/bin/clickhouse:/clickhouse " \
|
||||
f"--volume={build_path}/etc/clickhouse-server:/config " \
|
||||
f"--volume={server_log_folder}:/var/log/clickhouse-server {image_centos} > {result_folder}/centos:5",
|
||||
f"docker run --network=host --volume={build_path}/usr/bin/clickhouse:/clickhouse "
|
||||
f"--volume={build_path}/etc/clickhouse-server:/config "
|
||||
f"--volume={server_log_folder}:/var/log/clickhouse-server {image_ubuntu} > {result_folder}/ubuntu:12.04",
|
||||
f"docker run --network=host --volume={build_path}/usr/bin/clickhouse:/clickhouse "
|
||||
f"--volume={build_path}/etc/clickhouse-server:/config "
|
||||
f"--volume={server_log_folder}:/var/log/clickhouse-server {image_centos} > {result_folder}/centos:5",
|
||||
]
|
||||
|
||||
|
||||
@ -124,14 +133,18 @@ if __name__ == "__main__":
|
||||
os.makedirs(packages_path)
|
||||
|
||||
def url_filter(url):
|
||||
return url.endswith('.deb') and ('clickhouse-common-static_' in url or 'clickhouse-server_' in url)
|
||||
return url.endswith(".deb") and (
|
||||
"clickhouse-common-static_" in url or "clickhouse-server_" in url
|
||||
)
|
||||
|
||||
download_builds_filter(CHECK_NAME, reports_path, packages_path, url_filter)
|
||||
|
||||
for f in os.listdir(packages_path):
|
||||
if '.deb' in f:
|
||||
if ".deb" in f:
|
||||
full_path = os.path.join(packages_path, f)
|
||||
subprocess.check_call(f"dpkg -x {full_path} {packages_path} && rm {full_path}", shell=True)
|
||||
subprocess.check_call(
|
||||
f"dpkg -x {full_path} {packages_path} && rm {full_path}", shell=True
|
||||
)
|
||||
|
||||
server_log_path = os.path.join(temp_path, "server_log")
|
||||
if not os.path.exists(server_log_path):
|
||||
@ -141,7 +154,9 @@ if __name__ == "__main__":
|
||||
if not os.path.exists(result_path):
|
||||
os.makedirs(result_path)
|
||||
|
||||
run_commands = get_run_commands(packages_path, result_path, server_log_path, docker_images[0], docker_images[1])
|
||||
run_commands = get_run_commands(
|
||||
packages_path, result_path, server_log_path, docker_images[0], docker_images[1]
|
||||
)
|
||||
|
||||
state = "success"
|
||||
for run_command in run_commands:
|
||||
@ -154,15 +169,32 @@ if __name__ == "__main__":
|
||||
|
||||
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
|
||||
|
||||
s3_helper = S3Helper('https://s3.amazonaws.com')
|
||||
state, description, test_results, additional_logs = process_result(result_path, server_log_path)
|
||||
s3_helper = S3Helper("https://s3.amazonaws.com")
|
||||
state, description, test_results, additional_logs = process_result(
|
||||
result_path, server_log_path
|
||||
)
|
||||
|
||||
ch_helper = ClickHouseHelper()
|
||||
mark_flaky_tests(ch_helper, CHECK_NAME, test_results)
|
||||
|
||||
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_logs, CHECK_NAME)
|
||||
report_url = upload_results(
|
||||
s3_helper,
|
||||
pr_info.number,
|
||||
pr_info.sha,
|
||||
test_results,
|
||||
additional_logs,
|
||||
CHECK_NAME,
|
||||
)
|
||||
print(f"::notice ::Report url: {report_url}")
|
||||
post_commit_status(gh, pr_info.sha, CHECK_NAME, description, state, report_url)
|
||||
|
||||
prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, CHECK_NAME)
|
||||
prepared_events = prepare_tests_results_for_clickhouse(
|
||||
pr_info,
|
||||
test_results,
|
||||
state,
|
||||
stopwatch.duration_seconds,
|
||||
stopwatch.start_time_str,
|
||||
report_url,
|
||||
CHECK_NAME,
|
||||
)
|
||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
||||
|
@ -3,20 +3,21 @@ import subprocess
|
||||
import logging
|
||||
import os
|
||||
|
||||
|
||||
def compress_file_fast(path, archive_path):
|
||||
if os.path.exists('/usr/bin/pigz'):
|
||||
if os.path.exists("/usr/bin/pigz"):
|
||||
subprocess.check_call("pigz < {} > {}".format(path, archive_path), shell=True)
|
||||
else:
|
||||
subprocess.check_call("gzip < {} > {}".format(path, archive_path), shell=True)
|
||||
|
||||
|
||||
def compress_fast(path, archive_path, exclude=None):
|
||||
pigz_part = ''
|
||||
if os.path.exists('/usr/bin/pigz'):
|
||||
pigz_part = ""
|
||||
if os.path.exists("/usr/bin/pigz"):
|
||||
logging.info("pigz found, will compress and decompress faster")
|
||||
pigz_part = "--use-compress-program='pigz'"
|
||||
else:
|
||||
pigz_part = '-z'
|
||||
pigz_part = "-z"
|
||||
logging.info("no pigz, compressing with default tar")
|
||||
|
||||
if exclude is None:
|
||||
@ -31,21 +32,36 @@ def compress_fast(path, archive_path, exclude=None):
|
||||
path = os.path.dirname(path)
|
||||
else:
|
||||
path += "/.."
|
||||
cmd = "tar {} {} -cf {} -C {} {}".format(pigz_part, exclude_part, archive_path, path, fname)
|
||||
cmd = "tar {} {} -cf {} -C {} {}".format(
|
||||
pigz_part, exclude_part, archive_path, path, fname
|
||||
)
|
||||
logging.debug("compress_fast cmd: %s", cmd)
|
||||
subprocess.check_call(cmd, shell=True)
|
||||
|
||||
|
||||
def decompress_fast(archive_path, result_path=None):
|
||||
pigz_part = ''
|
||||
if os.path.exists('/usr/bin/pigz'):
|
||||
logging.info("pigz found, will compress and decompress faster ('%s' -> '%s')", archive_path, result_path)
|
||||
pigz_part = ""
|
||||
if os.path.exists("/usr/bin/pigz"):
|
||||
logging.info(
|
||||
"pigz found, will compress and decompress faster ('%s' -> '%s')",
|
||||
archive_path,
|
||||
result_path,
|
||||
)
|
||||
pigz_part = "--use-compress-program='pigz'"
|
||||
else:
|
||||
pigz_part = '-z'
|
||||
logging.info("no pigz, decompressing with default tar ('%s' -> '%s')", archive_path, result_path)
|
||||
pigz_part = "-z"
|
||||
logging.info(
|
||||
"no pigz, decompressing with default tar ('%s' -> '%s')",
|
||||
archive_path,
|
||||
result_path,
|
||||
)
|
||||
|
||||
if result_path is None:
|
||||
subprocess.check_call("tar {} -xf {}".format(pigz_part, archive_path), shell=True)
|
||||
subprocess.check_call(
|
||||
"tar {} -xf {}".format(pigz_part, archive_path), shell=True
|
||||
)
|
||||
else:
|
||||
subprocess.check_call("tar {} -xf {} -C {}".format(pigz_part, archive_path, result_path), shell=True)
|
||||
subprocess.check_call(
|
||||
"tar {} -xf {} -C {}".format(pigz_part, archive_path, result_path),
|
||||
shell=True,
|
||||
)
|
||||
|
@ -6,23 +6,29 @@ import time
|
||||
import subprocess
|
||||
import logging
|
||||
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class DockerImage:
|
||||
def __init__(self, name, version=None):
|
||||
def __init__(self, name, version: Optional[str] = None):
|
||||
self.name = name
|
||||
if version is None:
|
||||
self.version = 'latest'
|
||||
self.version = "latest"
|
||||
else:
|
||||
self.version = version
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.name}:{self.version}"
|
||||
|
||||
def get_images_with_versions(reports_path, required_image, pull=True):
|
||||
|
||||
def get_images_with_versions(
|
||||
reports_path, required_image, pull=True, version: Optional[str] = None
|
||||
):
|
||||
images_path = None
|
||||
for root, _, files in os.walk(reports_path):
|
||||
for f in files:
|
||||
if f == 'changed_images.json':
|
||||
images_path = os.path.join(root, 'changed_images.json')
|
||||
if f == "changed_images.json":
|
||||
images_path = os.path.join(root, "changed_images.json")
|
||||
break
|
||||
|
||||
if not images_path:
|
||||
@ -32,7 +38,7 @@ def get_images_with_versions(reports_path, required_image, pull=True):
|
||||
|
||||
if images_path is not None and os.path.exists(images_path):
|
||||
logging.info("Images file exists")
|
||||
with open(images_path, 'r', encoding='utf-8') as images_fd:
|
||||
with open(images_path, "r", encoding="utf-8") as images_fd:
|
||||
images = json.load(images_fd)
|
||||
logging.info("Got images %s", images)
|
||||
else:
|
||||
@ -40,7 +46,7 @@ def get_images_with_versions(reports_path, required_image, pull=True):
|
||||
|
||||
docker_images = []
|
||||
for image_name in required_image:
|
||||
docker_image = DockerImage(image_name)
|
||||
docker_image = DockerImage(image_name, version)
|
||||
if image_name in images:
|
||||
docker_image.version = images[image_name]
|
||||
docker_images.append(docker_image)
|
||||
@ -50,15 +56,22 @@ def get_images_with_versions(reports_path, required_image, pull=True):
|
||||
for i in range(10):
|
||||
try:
|
||||
logging.info("Pulling image %s", docker_image)
|
||||
latest_error = subprocess.check_output(f"docker pull {docker_image}", stderr=subprocess.STDOUT, shell=True)
|
||||
latest_error = subprocess.check_output(
|
||||
f"docker pull {docker_image}",
|
||||
stderr=subprocess.STDOUT,
|
||||
shell=True,
|
||||
)
|
||||
break
|
||||
except Exception as ex:
|
||||
time.sleep(i * 3)
|
||||
logging.info("Got execption pulling docker %s", ex)
|
||||
else:
|
||||
raise Exception(f"Cannot pull dockerhub for image docker pull {docker_image} because of {latest_error}")
|
||||
raise Exception(
|
||||
f"Cannot pull dockerhub for image docker pull {docker_image} because of {latest_error}"
|
||||
)
|
||||
|
||||
return docker_images
|
||||
|
||||
def get_image_with_version(reports_path, image, pull=True):
|
||||
return get_images_with_versions(reports_path, [image], pull)[0]
|
||||
|
||||
def get_image_with_version(reports_path, image, pull=True, version=None):
|
||||
return get_images_with_versions(reports_path, [image], pull, version=version)[0]
|
||||
|
@ -40,7 +40,9 @@ if __name__ == "__main__":
|
||||
if not pr_info.has_changes_in_documentation():
|
||||
logging.info("No changes in documentation")
|
||||
commit = get_commit(gh, pr_info.sha)
|
||||
commit.create_status(context=NAME, description="No changes in docs", state="success")
|
||||
commit.create_status(
|
||||
context=NAME, description="No changes in docs", state="success"
|
||||
)
|
||||
sys.exit(0)
|
||||
|
||||
logging.info("Has changes in docs")
|
||||
@ -48,15 +50,15 @@ if __name__ == "__main__":
|
||||
if not os.path.exists(temp_path):
|
||||
os.makedirs(temp_path)
|
||||
|
||||
docker_image = get_image_with_version(temp_path, 'clickhouse/docs-check')
|
||||
docker_image = get_image_with_version(temp_path, "clickhouse/docs-check")
|
||||
|
||||
test_output = os.path.join(temp_path, 'docs_check_log')
|
||||
test_output = os.path.join(temp_path, "docs_check_log")
|
||||
if not os.path.exists(test_output):
|
||||
os.makedirs(test_output)
|
||||
|
||||
cmd = f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/repo_path --volume={test_output}:/output_path {docker_image}"
|
||||
|
||||
run_log_path = os.path.join(test_output, 'runlog.log')
|
||||
run_log_path = os.path.join(test_output, "runlog.log")
|
||||
logging.info("Running command: '%s'", cmd)
|
||||
|
||||
with TeePopen(cmd, run_log_path) as process:
|
||||
@ -82,10 +84,10 @@ if __name__ == "__main__":
|
||||
for f in files:
|
||||
path = os.path.join(test_output, f)
|
||||
additional_files.append(path)
|
||||
with open(path, 'r', encoding='utf-8') as check_file:
|
||||
with open(path, "r", encoding="utf-8") as check_file:
|
||||
for line in check_file:
|
||||
if "ERROR" in line:
|
||||
lines.append((line.split(':')[-1], "FAIL"))
|
||||
lines.append((line.split(":")[-1], "FAIL"))
|
||||
if lines:
|
||||
status = "failure"
|
||||
description = "Found errors in docs"
|
||||
@ -94,12 +96,22 @@ if __name__ == "__main__":
|
||||
else:
|
||||
lines.append(("Non zero exit code", "FAIL"))
|
||||
|
||||
s3_helper = S3Helper('https://s3.amazonaws.com')
|
||||
s3_helper = S3Helper("https://s3.amazonaws.com")
|
||||
ch_helper = ClickHouseHelper()
|
||||
|
||||
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, lines, additional_files, NAME)
|
||||
report_url = upload_results(
|
||||
s3_helper, pr_info.number, pr_info.sha, lines, additional_files, NAME
|
||||
)
|
||||
print("::notice ::Report url: {report_url}")
|
||||
post_commit_status(gh, pr_info.sha, NAME, description, status, report_url)
|
||||
|
||||
prepared_events = prepare_tests_results_for_clickhouse(pr_info, lines, status, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, NAME)
|
||||
prepared_events = prepare_tests_results_for_clickhouse(
|
||||
pr_info,
|
||||
lines,
|
||||
status,
|
||||
stopwatch.duration_seconds,
|
||||
stopwatch.start_time_str,
|
||||
report_url,
|
||||
NAME,
|
||||
)
|
||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
||||
|
@ -34,19 +34,23 @@ if __name__ == "__main__":
|
||||
if not os.path.exists(temp_path):
|
||||
os.makedirs(temp_path)
|
||||
|
||||
docker_image = get_image_with_version(temp_path, 'clickhouse/docs-release')
|
||||
docker_image = get_image_with_version(temp_path, "clickhouse/docs-release")
|
||||
|
||||
test_output = os.path.join(temp_path, 'docs_release_log')
|
||||
test_output = os.path.join(temp_path, "docs_release_log")
|
||||
if not os.path.exists(test_output):
|
||||
os.makedirs(test_output)
|
||||
|
||||
token = CLOUDFLARE_TOKEN
|
||||
cmd = "docker run --cap-add=SYS_PTRACE --volume=$SSH_AUTH_SOCK:/ssh-agent -e SSH_AUTH_SOCK=/ssh-agent " \
|
||||
f"-e CLOUDFLARE_TOKEN={token} --volume={repo_path}:/repo_path --volume={test_output}:/output_path {docker_image}"
|
||||
cmd = (
|
||||
"docker run --cap-add=SYS_PTRACE --volume=$SSH_AUTH_SOCK:/ssh-agent -e SSH_AUTH_SOCK=/ssh-agent "
|
||||
f"-e CLOUDFLARE_TOKEN={token} --volume={repo_path}:/repo_path --volume={test_output}:/output_path {docker_image}"
|
||||
)
|
||||
|
||||
run_log_path = os.path.join(test_output, 'runlog.log')
|
||||
run_log_path = os.path.join(test_output, "runlog.log")
|
||||
|
||||
with open(run_log_path, 'w', encoding='utf-8') as log, SSHKey("ROBOT_CLICKHOUSE_SSH_KEY"):
|
||||
with open(run_log_path, "w", encoding="utf-8") as log, SSHKey(
|
||||
"ROBOT_CLICKHOUSE_SSH_KEY"
|
||||
):
|
||||
with subprocess.Popen(cmd, shell=True, stderr=log, stdout=log) as process:
|
||||
retcode = process.wait()
|
||||
if retcode == 0:
|
||||
@ -70,10 +74,10 @@ if __name__ == "__main__":
|
||||
for f in files:
|
||||
path = os.path.join(test_output, f)
|
||||
additional_files.append(path)
|
||||
with open(path, 'r', encoding='utf-8') as check_file:
|
||||
with open(path, "r", encoding="utf-8") as check_file:
|
||||
for line in check_file:
|
||||
if "ERROR" in line:
|
||||
lines.append((line.split(':')[-1], "FAIL"))
|
||||
lines.append((line.split(":")[-1], "FAIL"))
|
||||
if lines:
|
||||
status = "failure"
|
||||
description = "Found errors in docs"
|
||||
@ -82,9 +86,13 @@ if __name__ == "__main__":
|
||||
else:
|
||||
lines.append(("Non zero exit code", "FAIL"))
|
||||
|
||||
s3_helper = S3Helper('https://s3.amazonaws.com')
|
||||
s3_helper = S3Helper("https://s3.amazonaws.com")
|
||||
|
||||
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, lines, additional_files, NAME)
|
||||
report_url = upload_results(
|
||||
s3_helper, pr_info.number, pr_info.sha, lines, additional_files, NAME
|
||||
)
|
||||
print("::notice ::Report url: {report_url}")
|
||||
commit = get_commit(gh, pr_info.sha)
|
||||
commit.create_status(context=NAME, description=description, state=status, target_url=report_url)
|
||||
commit.create_status(
|
||||
context=NAME, description=description, state=status, target_url=report_url
|
||||
)
|
||||
|
165
tests/ci/download_previous_release.py
Normal file
165
tests/ci/download_previous_release.py
Normal file
@ -0,0 +1,165 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
###########################################################################
|
||||
# #
|
||||
# TODO (@vdimir, @Avogar) #
|
||||
# Merge with one from https://github.com/ClickHouse/ClickHouse/pull/27928 #
|
||||
# #
|
||||
###########################################################################
|
||||
|
||||
import re
|
||||
import os
|
||||
import logging
|
||||
|
||||
import requests
|
||||
|
||||
CLICKHOUSE_TAGS_URL = "https://api.github.com/repos/ClickHouse/ClickHouse/tags"
|
||||
|
||||
CLICKHOUSE_COMMON_STATIC_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-common-static_{version}_amd64.deb"
|
||||
CLICKHOUSE_COMMON_STATIC_DBG_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-common-static-dbg_{version}_amd64.deb"
|
||||
CLICKHOUSE_SERVER_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-server_{version}_all.deb"
|
||||
CLICKHOUSE_CLIENT_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-client_{version}_all.deb"
|
||||
|
||||
|
||||
CLICKHOUSE_COMMON_STATIC_PACKET_NAME = "clickhouse-common-static_{version}_amd64.deb"
|
||||
CLICKHOUSE_COMMON_STATIC_DBG_PACKET_NAME = (
|
||||
"clickhouse-common-static-dbg_{version}_amd64.deb"
|
||||
)
|
||||
CLICKHOUSE_SERVER_PACKET_NAME = "clickhouse-server_{version}_all.deb"
|
||||
CLICKHOUSE_CLIENT_PACKET_NAME = "clickhouse-client_{version}_all.deb"
|
||||
|
||||
PACKETS_DIR = "previous_release_package_folder/"
|
||||
VERSION_PATTERN = r"((?:\d+\.)?(?:\d+\.)?(?:\d+\.)?\d+-[a-zA-Z]*)"
|
||||
|
||||
|
||||
class Version:
|
||||
def __init__(self, version):
|
||||
self.version = version
|
||||
|
||||
def __lt__(self, other):
|
||||
return list(map(int, self.version.split("."))) < list(
|
||||
map(int, other.version.split("."))
|
||||
)
|
||||
|
||||
def __str__(self):
|
||||
return self.version
|
||||
|
||||
|
||||
class ReleaseInfo:
|
||||
def __init__(self, version, release_type):
|
||||
self.version = version
|
||||
self.type = release_type
|
||||
|
||||
def __repr__(self):
|
||||
return f"ReleaseInfo: {self.version}-{self.type}"
|
||||
|
||||
|
||||
def find_previous_release(server_version, releases):
|
||||
releases.sort(key=lambda x: x.version, reverse=True)
|
||||
|
||||
if server_version is None:
|
||||
return True, releases[0]
|
||||
|
||||
for release in releases:
|
||||
if release.version < server_version:
|
||||
return True, release
|
||||
|
||||
return False, None
|
||||
|
||||
|
||||
def get_previous_release(server_version=None):
|
||||
page = 1
|
||||
found = False
|
||||
while not found:
|
||||
response = requests.get(CLICKHOUSE_TAGS_URL, {"page": page, "per_page": 100})
|
||||
if not response.ok:
|
||||
raise Exception(
|
||||
"Cannot load the list of tags from github: " + response.reason
|
||||
)
|
||||
|
||||
releases_str = set(re.findall(VERSION_PATTERN, response.text))
|
||||
if len(releases_str) == 0:
|
||||
raise Exception(
|
||||
"Cannot find previous release for "
|
||||
+ str(server_version)
|
||||
+ " server version"
|
||||
)
|
||||
|
||||
releases = list(
|
||||
map(
|
||||
lambda x: ReleaseInfo(Version(x.split("-")[0]), x.split("-")[1]),
|
||||
releases_str,
|
||||
)
|
||||
)
|
||||
found, previous_release = find_previous_release(server_version, releases)
|
||||
page += 1
|
||||
|
||||
return previous_release
|
||||
|
||||
|
||||
def download_packet(url, out_path):
|
||||
"""
|
||||
TODO: use dowload_build_with_progress from build_download_helper.py
|
||||
"""
|
||||
|
||||
response = requests.get(url)
|
||||
logging.info("Downloading %s", url)
|
||||
if response.ok:
|
||||
open(out_path, "wb").write(response.content)
|
||||
|
||||
|
||||
def download_packets(release, dest_path=PACKETS_DIR):
|
||||
if not os.path.exists(dest_path):
|
||||
os.makedirs(dest_path)
|
||||
|
||||
logging.info("Will download %s", release)
|
||||
|
||||
download_packet(
|
||||
CLICKHOUSE_COMMON_STATIC_DOWNLOAD_URL.format(
|
||||
version=release.version, type=release.type
|
||||
),
|
||||
out_path=os.path.join(
|
||||
dest_path,
|
||||
CLICKHOUSE_COMMON_STATIC_PACKET_NAME.format(version=release.version),
|
||||
),
|
||||
)
|
||||
|
||||
download_packet(
|
||||
CLICKHOUSE_COMMON_STATIC_DBG_DOWNLOAD_URL.format(
|
||||
version=release.version, type=release.type
|
||||
),
|
||||
out_path=os.path.join(
|
||||
dest_path,
|
||||
CLICKHOUSE_COMMON_STATIC_DBG_PACKET_NAME.format(version=release.version),
|
||||
),
|
||||
)
|
||||
|
||||
download_packet(
|
||||
CLICKHOUSE_SERVER_DOWNLOAD_URL.format(
|
||||
version=release.version, type=release.type
|
||||
),
|
||||
out_path=os.path.join(
|
||||
dest_path, CLICKHOUSE_SERVER_PACKET_NAME.format(version=release.version)
|
||||
),
|
||||
)
|
||||
|
||||
download_packet(
|
||||
CLICKHOUSE_CLIENT_DOWNLOAD_URL.format(
|
||||
version=release.version, type=release.type
|
||||
),
|
||||
out_path=os.path.join(
|
||||
dest_path, CLICKHOUSE_CLIENT_PACKET_NAME.format(version=release.version)
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def download_previous_release(dest_path):
|
||||
current_release = get_previous_release(None)
|
||||
download_packets(current_release, dest_path=dest_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
server_version = Version(input())
|
||||
previous_release = get_previous_release(server_version)
|
||||
download_packets(previous_release)
|
@ -7,7 +7,7 @@ from pr_info import PRInfo
|
||||
from get_robot_token import get_best_robot_token
|
||||
from commit_status_helper import get_commit
|
||||
|
||||
NAME = 'Run Check (actions)'
|
||||
NAME = "Run Check (actions)"
|
||||
|
||||
|
||||
def filter_statuses(statuses):
|
||||
@ -36,4 +36,9 @@ if __name__ == "__main__":
|
||||
url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}"
|
||||
statuses = filter_statuses(list(commit.get_statuses()))
|
||||
if NAME in statuses and statuses[NAME].state == "pending":
|
||||
commit.create_status(context=NAME, description="All checks finished", state="success", target_url=url)
|
||||
commit.create_status(
|
||||
context=NAME,
|
||||
description="All checks finished",
|
||||
state="success",
|
||||
target_url=url,
|
||||
)
|
||||
|
@ -1,9 +1,10 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import logging
|
||||
import subprocess
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
from github import Github
|
||||
@ -13,25 +14,38 @@ from s3_helper import S3Helper
|
||||
from get_robot_token import get_best_robot_token
|
||||
from pr_info import PRInfo
|
||||
from build_download_helper import download_all_deb_packages
|
||||
from download_previous_release import download_previous_release
|
||||
from upload_result_helper import upload_results
|
||||
from docker_pull_helper import get_image_with_version
|
||||
from commit_status_helper import post_commit_status, get_commit, override_status
|
||||
from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse
|
||||
from commit_status_helper import (
|
||||
post_commit_status,
|
||||
get_commit,
|
||||
override_status,
|
||||
post_commit_status_to_file,
|
||||
)
|
||||
from clickhouse_helper import (
|
||||
ClickHouseHelper,
|
||||
mark_flaky_tests,
|
||||
prepare_tests_results_for_clickhouse,
|
||||
)
|
||||
from stopwatch import Stopwatch
|
||||
from rerun_helper import RerunHelper
|
||||
from tee_popen import TeePopen
|
||||
|
||||
NO_CHANGES_MSG = "Nothing to run"
|
||||
|
||||
|
||||
def get_additional_envs(check_name, run_by_hash_num, run_by_hash_total):
|
||||
result = []
|
||||
if 'DatabaseReplicated' in check_name:
|
||||
if "DatabaseReplicated" in check_name:
|
||||
result.append("USE_DATABASE_REPLICATED=1")
|
||||
if 'DatabaseOrdinary' in check_name:
|
||||
if "DatabaseOrdinary" in check_name:
|
||||
result.append("USE_DATABASE_ORDINARY=1")
|
||||
if 'wide parts enabled' in check_name:
|
||||
if "wide parts enabled" in check_name:
|
||||
result.append("USE_POLYMORPHIC_PARTS=1")
|
||||
|
||||
#temporary
|
||||
if 's3 storage' in check_name:
|
||||
# temporary
|
||||
if "s3 storage" in check_name:
|
||||
result.append("USE_S3_STORAGE_FOR_MERGE_TREE=1")
|
||||
|
||||
if run_by_hash_total != 0:
|
||||
@ -40,36 +54,55 @@ def get_additional_envs(check_name, run_by_hash_num, run_by_hash_total):
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def get_image_name(check_name):
|
||||
if 'stateless' in check_name.lower():
|
||||
return 'clickhouse/stateless-test'
|
||||
if 'stateful' in check_name.lower():
|
||||
return 'clickhouse/stateful-test'
|
||||
if "stateless" in check_name.lower():
|
||||
return "clickhouse/stateless-test"
|
||||
if "stateful" in check_name.lower():
|
||||
return "clickhouse/stateful-test"
|
||||
else:
|
||||
raise Exception(f"Cannot deduce image name based on check name {check_name}")
|
||||
|
||||
def get_run_command(builds_path, repo_tests_path, result_path, server_log_path, kill_timeout, additional_envs, image, flaky_check, tests_to_run):
|
||||
additional_options = ['--hung-check']
|
||||
additional_options.append('--print-time')
|
||||
|
||||
def get_run_command(
|
||||
builds_path,
|
||||
repo_tests_path,
|
||||
result_path,
|
||||
server_log_path,
|
||||
kill_timeout,
|
||||
additional_envs,
|
||||
image,
|
||||
flaky_check,
|
||||
tests_to_run,
|
||||
):
|
||||
additional_options = ["--hung-check"]
|
||||
additional_options.append("--print-time")
|
||||
|
||||
if tests_to_run:
|
||||
additional_options += tests_to_run
|
||||
|
||||
additional_options_str = '-e ADDITIONAL_OPTIONS="' + ' '.join(additional_options) + '"'
|
||||
additional_options_str = (
|
||||
'-e ADDITIONAL_OPTIONS="' + " ".join(additional_options) + '"'
|
||||
)
|
||||
|
||||
envs = [f'-e MAX_RUN_TIME={int(0.9 * kill_timeout)}', '-e S3_URL="https://clickhouse-datasets.s3.amazonaws.com"']
|
||||
envs = [
|
||||
f"-e MAX_RUN_TIME={int(0.9 * kill_timeout)}",
|
||||
'-e S3_URL="https://clickhouse-datasets.s3.amazonaws.com"',
|
||||
]
|
||||
|
||||
if flaky_check:
|
||||
envs += ['-e NUM_TRIES=100', '-e MAX_RUN_TIME=1800']
|
||||
envs += ["-e NUM_TRIES=100", "-e MAX_RUN_TIME=1800"]
|
||||
|
||||
envs += [f'-e {e}' for e in additional_envs]
|
||||
envs += [f"-e {e}" for e in additional_envs]
|
||||
|
||||
env_str = ' '.join(envs)
|
||||
env_str = " ".join(envs)
|
||||
|
||||
return f"docker run --volume={builds_path}:/package_folder " \
|
||||
f"--volume={repo_tests_path}:/usr/share/clickhouse-test " \
|
||||
f"--volume={result_path}:/test_output --volume={server_log_path}:/var/log/clickhouse-server " \
|
||||
return (
|
||||
f"docker run --volume={builds_path}:/package_folder "
|
||||
f"--volume={repo_tests_path}:/usr/share/clickhouse-test "
|
||||
f"--volume={result_path}:/test_output --volume={server_log_path}:/var/log/clickhouse-server "
|
||||
f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image}"
|
||||
)
|
||||
|
||||
|
||||
def get_tests_to_run(pr_info):
|
||||
@ -79,32 +112,43 @@ def get_tests_to_run(pr_info):
|
||||
return []
|
||||
|
||||
for fpath in pr_info.changed_files:
|
||||
if 'tests/queries/0_stateless/0' in fpath:
|
||||
logging.info('File %s changed and seems like stateless test', fpath)
|
||||
fname = fpath.split('/')[3]
|
||||
if "tests/queries/0_stateless/0" in fpath:
|
||||
logging.info("File %s changed and seems like stateless test", fpath)
|
||||
fname = fpath.split("/")[3]
|
||||
fname_without_ext = os.path.splitext(fname)[0]
|
||||
result.add(fname_without_ext + '.')
|
||||
result.add(fname_without_ext + ".")
|
||||
return list(result)
|
||||
|
||||
|
||||
def process_results(result_folder, server_log_path):
|
||||
test_results = []
|
||||
additional_files = []
|
||||
# Just upload all files from result_folder.
|
||||
# If task provides processed results, then it's responsible for content of result_folder.
|
||||
if os.path.exists(result_folder):
|
||||
test_files = [f for f in os.listdir(result_folder) if os.path.isfile(os.path.join(result_folder, f))]
|
||||
test_files = [
|
||||
f
|
||||
for f in os.listdir(result_folder)
|
||||
if os.path.isfile(os.path.join(result_folder, f))
|
||||
]
|
||||
additional_files = [os.path.join(result_folder, f) for f in test_files]
|
||||
|
||||
if os.path.exists(server_log_path):
|
||||
server_log_files = [f for f in os.listdir(server_log_path) if os.path.isfile(os.path.join(server_log_path, f))]
|
||||
additional_files = additional_files + [os.path.join(server_log_path, f) for f in server_log_files]
|
||||
server_log_files = [
|
||||
f
|
||||
for f in os.listdir(server_log_path)
|
||||
if os.path.isfile(os.path.join(server_log_path, f))
|
||||
]
|
||||
additional_files = additional_files + [
|
||||
os.path.join(server_log_path, f) for f in server_log_files
|
||||
]
|
||||
|
||||
status = []
|
||||
status_path = os.path.join(result_folder, "check_status.tsv")
|
||||
if os.path.exists(status_path):
|
||||
logging.info("Found test_results.tsv")
|
||||
with open(status_path, 'r', encoding='utf-8') as status_file:
|
||||
status = list(csv.reader(status_file, delimiter='\t'))
|
||||
with open(status_path, "r", encoding="utf-8") as status_file:
|
||||
status = list(csv.reader(status_file, delimiter="\t"))
|
||||
|
||||
if len(status) != 1 or len(status[0]) != 2:
|
||||
logging.info("Files in result folder %s", os.listdir(result_folder))
|
||||
@ -119,14 +163,32 @@ def process_results(result_folder, server_log_path):
|
||||
logging.info("Files in result folder %s", os.listdir(result_folder))
|
||||
return "error", "Not found test_results.tsv", test_results, additional_files
|
||||
|
||||
with open(results_path, 'r', encoding='utf-8') as results_file:
|
||||
test_results = list(csv.reader(results_file, delimiter='\t'))
|
||||
with open(results_path, "r", encoding="utf-8") as results_file:
|
||||
test_results = list(csv.reader(results_file, delimiter="\t"))
|
||||
if len(test_results) == 0:
|
||||
return "error", "Empty test_results.tsv", test_results, additional_files
|
||||
|
||||
return state, description, test_results, additional_files
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("check_name")
|
||||
parser.add_argument("kill_timeout", type=int)
|
||||
parser.add_argument(
|
||||
"--validate-bugfix",
|
||||
action="store_true",
|
||||
help="Check that added tests failed on latest stable",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--post-commit-status",
|
||||
default="commit_status",
|
||||
choices=["commit_status", "file"],
|
||||
help="Where to public post commit status",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
@ -136,18 +198,38 @@ if __name__ == "__main__":
|
||||
repo_path = REPO_COPY
|
||||
reports_path = REPORTS_PATH
|
||||
|
||||
check_name = sys.argv[1]
|
||||
kill_timeout = int(sys.argv[2])
|
||||
args = parse_args()
|
||||
check_name = args.check_name
|
||||
kill_timeout = args.kill_timeout
|
||||
validate_bugix_check = args.validate_bugfix
|
||||
|
||||
flaky_check = 'flaky' in check_name.lower()
|
||||
flaky_check = "flaky" in check_name.lower()
|
||||
|
||||
run_changed_tests = flaky_check or validate_bugix_check
|
||||
gh = Github(get_best_robot_token())
|
||||
|
||||
pr_info = PRInfo(need_changed_files=flaky_check)
|
||||
pr_info = PRInfo(need_changed_files=run_changed_tests)
|
||||
|
||||
if 'RUN_BY_HASH_NUM' in os.environ:
|
||||
run_by_hash_num = int(os.getenv('RUN_BY_HASH_NUM'))
|
||||
run_by_hash_total = int(os.getenv('RUN_BY_HASH_TOTAL'))
|
||||
check_name_with_group = check_name + f' [{run_by_hash_num + 1}/{run_by_hash_total}]'
|
||||
if not os.path.exists(temp_path):
|
||||
os.makedirs(temp_path)
|
||||
|
||||
if validate_bugix_check and "pr-bugfix" not in pr_info.labels:
|
||||
if args.post_commit_status == "file":
|
||||
post_commit_status_to_file(
|
||||
os.path.join(temp_path, "post_commit_status.tsv"),
|
||||
"Skipped (no pr-bugfix)",
|
||||
"success",
|
||||
"null",
|
||||
)
|
||||
logging.info("Skipping '%s' (no pr-bugfix)", check_name)
|
||||
sys.exit(0)
|
||||
|
||||
if "RUN_BY_HASH_NUM" in os.environ:
|
||||
run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM"))
|
||||
run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL"))
|
||||
check_name_with_group = (
|
||||
check_name + f" [{run_by_hash_num + 1}/{run_by_hash_total}]"
|
||||
)
|
||||
else:
|
||||
run_by_hash_num = 0
|
||||
run_by_hash_total = 0
|
||||
@ -158,15 +240,23 @@ if __name__ == "__main__":
|
||||
logging.info("Check is already finished according to github status, exiting")
|
||||
sys.exit(0)
|
||||
|
||||
if not os.path.exists(temp_path):
|
||||
os.makedirs(temp_path)
|
||||
|
||||
tests_to_run = []
|
||||
if flaky_check:
|
||||
if run_changed_tests:
|
||||
tests_to_run = get_tests_to_run(pr_info)
|
||||
if not tests_to_run:
|
||||
commit = get_commit(gh, pr_info.sha)
|
||||
commit.create_status(context=check_name_with_group, description='Not found changed stateless tests', state='success')
|
||||
state = override_status("success", check_name, validate_bugix_check)
|
||||
if args.post_commit_status == "commit_status":
|
||||
commit.create_status(
|
||||
context=check_name_with_group,
|
||||
description=NO_CHANGES_MSG,
|
||||
state=state,
|
||||
)
|
||||
elif args.post_commit_status == "file":
|
||||
fpath = os.path.join(temp_path, "post_commit_status.tsv")
|
||||
post_commit_status_to_file(
|
||||
fpath, description=NO_CHANGES_MSG, state=state, report_url="null"
|
||||
)
|
||||
sys.exit(0)
|
||||
|
||||
image_name = get_image_name(check_name)
|
||||
@ -178,7 +268,10 @@ if __name__ == "__main__":
|
||||
if not os.path.exists(packages_path):
|
||||
os.makedirs(packages_path)
|
||||
|
||||
download_all_deb_packages(check_name, reports_path, packages_path)
|
||||
if validate_bugix_check:
|
||||
download_previous_release(packages_path)
|
||||
else:
|
||||
download_all_deb_packages(check_name, reports_path, packages_path)
|
||||
|
||||
server_log_path = os.path.join(temp_path, "server_log")
|
||||
if not os.path.exists(server_log_path):
|
||||
@ -190,8 +283,23 @@ if __name__ == "__main__":
|
||||
|
||||
run_log_path = os.path.join(result_path, "runlog.log")
|
||||
|
||||
additional_envs = get_additional_envs(check_name, run_by_hash_num, run_by_hash_total)
|
||||
run_command = get_run_command(packages_path, repo_tests_path, result_path, server_log_path, kill_timeout, additional_envs, docker_image, flaky_check, tests_to_run)
|
||||
additional_envs = get_additional_envs(
|
||||
check_name, run_by_hash_num, run_by_hash_total
|
||||
)
|
||||
if validate_bugix_check:
|
||||
additional_envs.append("GLOBAL_TAGS=no-random-settings")
|
||||
|
||||
run_command = get_run_command(
|
||||
packages_path,
|
||||
repo_tests_path,
|
||||
result_path,
|
||||
server_log_path,
|
||||
kill_timeout,
|
||||
additional_envs,
|
||||
docker_image,
|
||||
flaky_check,
|
||||
tests_to_run,
|
||||
)
|
||||
logging.info("Going to run func tests: %s", run_command)
|
||||
|
||||
with TeePopen(run_command, run_log_path) as process:
|
||||
@ -203,24 +311,55 @@ if __name__ == "__main__":
|
||||
|
||||
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
|
||||
|
||||
s3_helper = S3Helper('https://s3.amazonaws.com')
|
||||
s3_helper = S3Helper("https://s3.amazonaws.com")
|
||||
|
||||
state, description, test_results, additional_logs = process_results(result_path, server_log_path)
|
||||
state = override_status(state, check_name)
|
||||
state, description, test_results, additional_logs = process_results(
|
||||
result_path, server_log_path
|
||||
)
|
||||
state = override_status(state, check_name, validate_bugix_check)
|
||||
|
||||
ch_helper = ClickHouseHelper()
|
||||
mark_flaky_tests(ch_helper, check_name, test_results)
|
||||
|
||||
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [run_log_path] + additional_logs, check_name_with_group)
|
||||
report_url = upload_results(
|
||||
s3_helper,
|
||||
pr_info.number,
|
||||
pr_info.sha,
|
||||
test_results,
|
||||
[run_log_path] + additional_logs,
|
||||
check_name_with_group,
|
||||
)
|
||||
|
||||
print(f"::notice ::Report url: {report_url}")
|
||||
post_commit_status(gh, pr_info.sha, check_name_with_group, description, state, report_url)
|
||||
print(f"::notice:: {check_name} Report url: {report_url}")
|
||||
if args.post_commit_status == "commit_status":
|
||||
post_commit_status(
|
||||
gh, pr_info.sha, check_name_with_group, description, state, report_url
|
||||
)
|
||||
elif args.post_commit_status == "file":
|
||||
post_commit_status_to_file(
|
||||
os.path.join(temp_path, "post_commit_status.tsv"),
|
||||
description,
|
||||
state,
|
||||
report_url,
|
||||
)
|
||||
else:
|
||||
raise Exception(
|
||||
f'Unknown post_commit_status option "{args.post_commit_status}"'
|
||||
)
|
||||
|
||||
prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name_with_group)
|
||||
prepared_events = prepare_tests_results_for_clickhouse(
|
||||
pr_info,
|
||||
test_results,
|
||||
state,
|
||||
stopwatch.duration_seconds,
|
||||
stopwatch.start_time_str,
|
||||
report_url,
|
||||
check_name_with_group,
|
||||
)
|
||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
||||
|
||||
if state != 'success':
|
||||
if 'force-tests' in pr_info.labels:
|
||||
if state != "success":
|
||||
if "force-tests" in pr_info.labels:
|
||||
print("'force-tests' enabled, will report success")
|
||||
else:
|
||||
sys.exit(1)
|
||||
|
@ -2,13 +2,15 @@
|
||||
import boto3 # type: ignore
|
||||
from github import Github # type: ignore
|
||||
|
||||
|
||||
def get_parameter_from_ssm(name, decrypt=True, client=None):
|
||||
if not client:
|
||||
client = boto3.client('ssm', region_name='us-east-1')
|
||||
return client.get_parameter(Name=name, WithDecryption=decrypt)['Parameter']['Value']
|
||||
client = boto3.client("ssm", region_name="us-east-1")
|
||||
return client.get_parameter(Name=name, WithDecryption=decrypt)["Parameter"]["Value"]
|
||||
|
||||
|
||||
def get_best_robot_token(token_prefix_env_name="github_robot_token_", total_tokens=4):
|
||||
client = boto3.client('ssm', region_name='us-east-1')
|
||||
client = boto3.client("ssm", region_name="us-east-1")
|
||||
tokens = {}
|
||||
for i in range(1, total_tokens + 1):
|
||||
token_name = token_prefix_env_name + str(i)
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user