Merge branch 'master' into fix_assertion_ddl_worker

This commit is contained in:
mergify[bot] 2022-03-23 17:06:33 +00:00 committed by GitHub
commit 13bc93c171
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
814 changed files with 97004 additions and 45420 deletions

View File

@ -7,6 +7,7 @@ env:
"on":
schedule:
- cron: '13 3 * * *'
workflow_dispatch:
jobs:
DockerHubPushAarch64:

View File

@ -1733,6 +1733,51 @@ jobs:
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
TestsBugfixCheck:
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/tests_bugfix_check
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Tests bugfix validate check (actions)
KILL_TIMEOUT=3600
REPO_COPY=${{runner.temp}}/tests_bugfix_check/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Bugfix test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
TEMP_PATH="${TEMP_PATH}/integration" \
REPORTS_PATH="${REPORTS_PATH}/integration" \
python3 integration_test_check.py "Integration tests bugfix validate check" \
--validate-bugfix --post-commit-status=file || echo 'ignore exit code'
TEMP_PATH="${TEMP_PATH}/stateless" \
REPORTS_PATH="${REPORTS_PATH}/stateless" \
python3 functional_test_check.py "Stateless tests bugfix validate check" "$KILL_TIMEOUT" \
--validate-bugfix --post-commit-status=file || echo 'ignore exit code'
python3 bugfix_validate_check.py "${TEMP_PATH}/stateless/post_commit_status.tsv" "${TEMP_PATH}/integration/post_commit_status.tsv"
- name: Cleanup
if: always()
run: |
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
##############################################################################################
############################ FUNCTIONAl STATEFUL TESTS #######################################
##############################################################################################

View File

@ -267,7 +267,10 @@ endif ()
# Allows to build stripped binary in a separate directory
if (OBJCOPY_PATH AND READELF_PATH)
set(BUILD_STRIPPED_BINARIES_PREFIX "" CACHE STRING "Build stripped binaries with debug info in separate directory")
option(INSTALL_STRIPPED_BINARIES "Build stripped binaries with debug info in separate directory" OFF)
if (INSTALL_STRIPPED_BINARIES)
set(STRIPPED_BINARIES_OUTPUT "stripped" CACHE STRING "A separate directory for stripped information")
endif()
endif()
cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd

View File

@ -4,11 +4,12 @@
import sys
import json
def parse_block(block=[], options=[]):
#print('block is here', block)
#show_query = False
#show_query = options.show_query
# print('block is here', block)
# show_query = False
# show_query = options.show_query
result = []
query = block[0].strip()
if len(block) > 4:
@ -20,9 +21,9 @@ def parse_block(block=[], options=[]):
timing2 = block[2].strip().split()[1]
timing3 = block[3].strip().split()[1]
if options.show_queries:
result.append( query )
result.append(query)
if not options.show_first_timings:
result += [ timing1 , timing2, timing3 ]
result += [timing1, timing2, timing3]
else:
result.append(timing1)
return result
@ -37,12 +38,12 @@ def read_stats_file(options, fname):
for line in f.readlines():
if 'SELECT' in line:
if "SELECT" in line:
if len(block) > 1:
result.append( parse_block(block, options) )
block = [ line ]
elif 'Time:' in line:
block.append( line )
result.append(parse_block(block, options))
block = [line]
elif "Time:" in line:
block.append(line)
return result
@ -50,7 +51,7 @@ def read_stats_file(options, fname):
def compare_stats_files(options, arguments):
result = []
file_output = []
pyplot_colors = ['y', 'b', 'g', 'r']
pyplot_colors = ["y", "b", "g", "r"]
for fname in arguments[1:]:
file_output.append((read_stats_file(options, fname)))
if len(file_output[0]) > 0:
@ -58,65 +59,92 @@ def compare_stats_files(options, arguments):
for idx, data_set in enumerate(file_output):
int_result = []
for timing in data_set:
int_result.append(float(timing[0])) #y values
result.append([[x for x in range(0, len(int_result)) ], int_result,
pyplot_colors[idx] + '^' ] )
# result.append([x for x in range(1, len(int_result)) ]) #x values
# result.append( pyplot_colors[idx] + '^' )
int_result.append(float(timing[0])) # y values
result.append(
[
[x for x in range(0, len(int_result))],
int_result,
pyplot_colors[idx] + "^",
]
)
# result.append([x for x in range(1, len(int_result)) ]) #x values
# result.append( pyplot_colors[idx] + '^' )
return result
def parse_args():
from optparse import OptionParser
parser = OptionParser(usage='usage: %prog [options] [result_file_path]..')
parser.add_option("-q", "--show-queries", help="Show statements along with timings", action="store_true", dest="show_queries")
parser.add_option("-f", "--show-first-timings", help="Show only first tries timings", action="store_true", dest="show_first_timings")
parser.add_option("-c", "--compare-mode", help="Prepare output for pyplot comparing result files.", action="store", dest="compare_mode")
parser = OptionParser(usage="usage: %prog [options] [result_file_path]..")
parser.add_option(
"-q",
"--show-queries",
help="Show statements along with timings",
action="store_true",
dest="show_queries",
)
parser.add_option(
"-f",
"--show-first-timings",
help="Show only first tries timings",
action="store_true",
dest="show_first_timings",
)
parser.add_option(
"-c",
"--compare-mode",
help="Prepare output for pyplot comparing result files.",
action="store",
dest="compare_mode",
)
(options, arguments) = parser.parse_args(sys.argv)
if len(arguments) < 2:
parser.print_usage()
sys.exit(1)
return ( options, arguments )
return (options, arguments)
def gen_pyplot_code(options, arguments):
result = ''
result = ""
data_sets = compare_stats_files(options, arguments)
for idx, data_set in enumerate(data_sets, start=0):
x_values, y_values, line_style = data_set
result += '\nplt.plot('
result += '%s, %s, \'%s\'' % ( x_values, y_values, line_style )
result += ', label=\'%s try\')' % idx
print('import matplotlib.pyplot as plt')
result += "\nplt.plot("
result += "%s, %s, '%s'" % (x_values, y_values, line_style)
result += ", label='%s try')" % idx
print("import matplotlib.pyplot as plt")
print(result)
print( 'plt.xlabel(\'Try number\')' )
print( 'plt.ylabel(\'Timing\')' )
print( 'plt.title(\'Benchmark query timings\')' )
print('plt.legend()')
print('plt.show()')
print("plt.xlabel('Try number')")
print("plt.ylabel('Timing')")
print("plt.title('Benchmark query timings')")
print("plt.legend()")
print("plt.show()")
def gen_html_json(options, arguments):
tuples = read_stats_file(options, arguments[1])
print('{')
print("{")
print('"system: GreenPlum(x2),')
print(('"version": "%s",' % '4.3.9.1'))
print(('"version": "%s",' % "4.3.9.1"))
print('"data_size": 10000000,')
print('"time": "",')
print('"comments": "",')
print('"result":')
print('[')
print("[")
for s in tuples:
print(s)
print(']')
print('}')
print("]")
print("}")
def main():
( options, arguments ) = parse_args()
(options, arguments) = parse_args()
if len(arguments) > 2:
gen_pyplot_code(options, arguments)
else:
gen_html_json(options, arguments)
if __name__ == '__main__':
if __name__ == "__main__":
main()

View File

@ -1,15 +1,14 @@
#!/usr/bin/env bash
BINARY_PATH=$1
BINARY_NAME=$(basename $BINARY_PATH)
BINARY_NAME=$(basename "$BINARY_PATH")
DESTINATION_STRIPPED_DIR=$2
OBJCOPY_PATH=${3:objcopy}
READELF_PATH=${4:readelf}
BUILD_ID=$($READELF_PATH -n $1 | sed -n '/Build ID/ { s/.*: //p; q; }')
BUILD_ID=$($READELF_PATH -n "$1" | sed -n '/Build ID/ { s/.*: //p; q; }')
BUILD_ID_PREFIX=${BUILD_ID:0:2}
BUILD_ID_SUFFIX=${BUILD_ID:2}
TEMP_BINARY_PATH="${BINARY_PATH}_temp"
DESTINATION_DEBUG_INFO_DIR="$DESTINATION_STRIPPED_DIR/lib/debug/.build-id"
DESTINATION_STRIP_BINARY_DIR="$DESTINATION_STRIPPED_DIR/bin"
@ -17,9 +16,13 @@ DESTINATION_STRIP_BINARY_DIR="$DESTINATION_STRIPPED_DIR/bin"
mkdir -p "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX"
mkdir -p "$DESTINATION_STRIP_BINARY_DIR"
$OBJCOPY_PATH --only-keep-debug "$BINARY_PATH" "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
touch "$TEMP_BINARY_PATH"
$OBJCOPY_PATH --add-gnu-debuglink "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" "$BINARY_PATH" "$TEMP_BINARY_PATH"
$OBJCOPY_PATH --strip-all "$TEMP_BINARY_PATH" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"
rm -f "$TEMP_BINARY_PATH"
cp "$BINARY_PATH" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"
$OBJCOPY_PATH --only-keep-debug --compress-debug-sections "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME" "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
chmod 0644 "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
chown 0:0 "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
strip --remove-section=.comment --remove-section=.note "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"
$OBJCOPY_PATH --add-gnu-debuglink "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"

View File

@ -95,6 +95,14 @@ RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \
&& apt-get install gcc-11 g++-11 --yes \
&& apt-get clean
# Architecture of the image when BuildKit/buildx is used
ARG TARGETARCH
ARG NFPM_VERSION=2.15.0
RUN arch=${TARGETARCH:-amd64} \
&& curl -Lo /tmp/nfpm.deb "https://github.com/goreleaser/nfpm/releases/download/v${NFPM_VERSION}/nfpm_${arch}.deb" \
&& dpkg -i /tmp/nfpm.deb \
&& rm /tmp/nfpm.deb
COPY build.sh /
CMD ["bash", "-c", "/build.sh 2>&1 | ts"]
CMD ["bash", "-c", "/build.sh 2>&1"]

View File

@ -1,7 +1,13 @@
#!/usr/bin/env bash
exec &> >(ts)
set -x -e
cache_status () {
ccache --show-config ||:
ccache --show-stats ||:
}
mkdir -p build/cmake/toolchain/darwin-x86_64
tar xJf MacOSX11.0.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1
ln -sf darwin-x86_64 build/cmake/toolchain/darwin-aarch64
@ -19,15 +25,23 @@ read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}"
env
cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" ..
ccache --show-config ||:
ccache --show-stats ||:
cache_status
# clear cache stats
ccache --zero-stats ||:
# shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
# No quotes because I want it to expand to nothing if empty.
# shellcheck disable=SC2086
ninja $NINJA_FLAGS clickhouse-bundle
ccache --show-config ||:
ccache --show-stats ||:
cache_status
if [ -n "$MAKE_DEB" ]; then
rm -rf /build/packages/root
# No quotes because I want it to expand to nothing if empty.
# shellcheck disable=SC2086
DESTDIR=/build/packages/root ninja $NINJA_FLAGS install
bash -x /build/packages/build
fi
mv ./programs/clickhouse* /output
mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds
@ -84,8 +98,7 @@ fi
# ../docker/packager/other/fuzzer.sh
# fi
ccache --show-config ||:
ccache --show-stats ||:
cache_status
if [ "${CCACHE_DEBUG:-}" == "1" ]
then

View File

@ -1,5 +1,5 @@
#!/usr/bin/env python3
#-*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
import subprocess
import os
import argparse
@ -8,36 +8,39 @@ import sys
SCRIPT_PATH = os.path.realpath(__file__)
IMAGE_MAP = {
"deb": "clickhouse/deb-builder",
"binary": "clickhouse/binary-builder",
}
def check_image_exists_locally(image_name):
try:
output = subprocess.check_output("docker images -q {} 2> /dev/null".format(image_name), shell=True)
output = subprocess.check_output(
f"docker images -q {image_name} 2> /dev/null", shell=True
)
return output != ""
except subprocess.CalledProcessError as ex:
except subprocess.CalledProcessError:
return False
def pull_image(image_name):
try:
subprocess.check_call("docker pull {}".format(image_name), shell=True)
subprocess.check_call(f"docker pull {image_name}", shell=True)
return True
except subprocess.CalledProcessError as ex:
logging.info("Cannot pull image {}".format(image_name))
except subprocess.CalledProcessError:
logging.info(f"Cannot pull image {image_name}".format())
return False
def build_image(image_name, filepath):
context = os.path.dirname(filepath)
build_cmd = "docker build --network=host -t {} -f {} {}".format(image_name, filepath, context)
logging.info("Will build image with cmd: '{}'".format(build_cmd))
build_cmd = f"docker build --network=host -t {image_name} -f {filepath} {context}"
logging.info("Will build image with cmd: '%s'", build_cmd)
subprocess.check_call(
build_cmd,
shell=True,
)
def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache_dir, docker_image_version):
def run_docker_image_with_env(
image_name, output, env_variables, ch_root, ccache_dir, docker_image_version
):
env_part = " -e ".join(env_variables)
if env_part:
env_part = " -e " + env_part
@ -47,28 +50,52 @@ def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache
else:
interactive = ""
cmd = "docker run --network=host --rm --volume={output_path}:/output --volume={ch_root}:/build --volume={ccache_dir}:/ccache {env} {interactive} {img_name}".format(
output_path=output,
ch_root=ch_root,
ccache_dir=ccache_dir,
env=env_part,
img_name=image_name + ":" + docker_image_version,
interactive=interactive
cmd = (
f"docker run --network=host --rm --volume={output}:/output "
f"--volume={ch_root}:/build --volume={ccache_dir}:/ccache {env_part} "
f"{interactive} {image_name}:{docker_image_version}"
)
logging.info("Will build ClickHouse pkg with cmd: '{}'".format(cmd))
logging.info("Will build ClickHouse pkg with cmd: '%s'", cmd)
subprocess.check_call(cmd, shell=True)
def parse_env_variables(build_type, compiler, sanitizer, package_type, image_type, cache, distcc_hosts, split_binary, clang_tidy, version, author, official, alien_pkgs, with_coverage, with_binaries):
def is_release_build(build_type, package_type, sanitizer, split_binary):
return (
build_type == ""
and package_type == "deb"
and sanitizer == ""
and not split_binary
)
def parse_env_variables(
build_type,
compiler,
sanitizer,
package_type,
image_type,
cache,
distcc_hosts,
split_binary,
clang_tidy,
version,
author,
official,
additional_pkgs,
with_coverage,
with_binaries,
):
DARWIN_SUFFIX = "-darwin"
DARWIN_ARM_SUFFIX = "-darwin-aarch64"
ARM_SUFFIX = "-aarch64"
FREEBSD_SUFFIX = "-freebsd"
PPC_SUFFIX = '-ppc64le'
PPC_SUFFIX = "-ppc64le"
result = []
cmake_flags = ['$CMAKE_FLAGS']
result.append("OUTPUT_DIR=/output")
cmake_flags = ["$CMAKE_FLAGS"]
is_cross_darwin = compiler.endswith(DARWIN_SUFFIX)
is_cross_darwin_arm = compiler.endswith(DARWIN_ARM_SUFFIX)
@ -77,46 +104,72 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX)
if is_cross_darwin:
cc = compiler[:-len(DARWIN_SUFFIX)]
cc = compiler[: -len(DARWIN_SUFFIX)]
cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/x86_64-apple-darwin-ar")
cmake_flags.append("-DCMAKE_INSTALL_NAME_TOOL=/cctools/bin/x86_64-apple-darwin-install_name_tool")
cmake_flags.append("-DCMAKE_RANLIB:FILEPATH=/cctools/bin/x86_64-apple-darwin-ranlib")
cmake_flags.append(
"-DCMAKE_INSTALL_NAME_TOOL=/cctools/bin/"
"x86_64-apple-darwin-install_name_tool"
)
cmake_flags.append(
"-DCMAKE_RANLIB:FILEPATH=/cctools/bin/x86_64-apple-darwin-ranlib"
)
cmake_flags.append("-DLINKER_NAME=/cctools/bin/x86_64-apple-darwin-ld")
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake")
cmake_flags.append(
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake"
)
elif is_cross_darwin_arm:
cc = compiler[:-len(DARWIN_ARM_SUFFIX)]
cc = compiler[: -len(DARWIN_ARM_SUFFIX)]
cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/aarch64-apple-darwin-ar")
cmake_flags.append("-DCMAKE_INSTALL_NAME_TOOL=/cctools/bin/aarch64-apple-darwin-install_name_tool")
cmake_flags.append("-DCMAKE_RANLIB:FILEPATH=/cctools/bin/aarch64-apple-darwin-ranlib")
cmake_flags.append(
"-DCMAKE_INSTALL_NAME_TOOL=/cctools/bin/"
"aarch64-apple-darwin-install_name_tool"
)
cmake_flags.append(
"-DCMAKE_RANLIB:FILEPATH=/cctools/bin/aarch64-apple-darwin-ranlib"
)
cmake_flags.append("-DLINKER_NAME=/cctools/bin/aarch64-apple-darwin-ld")
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-aarch64.cmake")
cmake_flags.append(
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-aarch64.cmake"
)
elif is_cross_arm:
cc = compiler[:-len(ARM_SUFFIX)]
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake")
result.append("DEB_ARCH_FLAG=-aarm64")
cc = compiler[: -len(ARM_SUFFIX)]
cmake_flags.append(
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake"
)
result.append("DEB_ARCH=arm64")
elif is_cross_freebsd:
cc = compiler[:-len(FREEBSD_SUFFIX)]
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/freebsd/toolchain-x86_64.cmake")
cc = compiler[: -len(FREEBSD_SUFFIX)]
cmake_flags.append(
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/freebsd/toolchain-x86_64.cmake"
)
elif is_cross_ppc:
cc = compiler[:-len(PPC_SUFFIX)]
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake")
cc = compiler[: -len(PPC_SUFFIX)]
cmake_flags.append(
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake"
)
else:
cc = compiler
result.append("DEB_ARCH_FLAG=-aamd64")
result.append("DEB_ARCH=amd64")
cxx = cc.replace('gcc', 'g++').replace('clang', 'clang++')
cxx = cc.replace("gcc", "g++").replace("clang", "clang++")
if image_type == "deb":
result.append("DEB_CC={}".format(cc))
result.append("DEB_CXX={}".format(cxx))
# For building fuzzers
result.append("CC={}".format(cc))
result.append("CXX={}".format(cxx))
elif image_type == "binary":
result.append("CC={}".format(cc))
result.append("CXX={}".format(cxx))
cmake_flags.append('-DCMAKE_C_COMPILER=`which {}`'.format(cc))
cmake_flags.append('-DCMAKE_CXX_COMPILER=`which {}`'.format(cxx))
result.append("MAKE_DEB=true")
cmake_flags.append("-DENABLE_TESTS=0")
cmake_flags.append("-DENABLE_UTILS=0")
cmake_flags.append("-DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON")
cmake_flags.append("-DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=ON")
cmake_flags.append("-DCMAKE_AUTOGEN_VERBOSE=ON")
cmake_flags.append("-DCMAKE_INSTALL_PREFIX=/usr")
cmake_flags.append("-DCMAKE_INSTALL_SYSCONFDIR=/etc")
cmake_flags.append("-DCMAKE_INSTALL_LOCALSTATEDIR=/var")
if is_release_build(build_type, package_type, sanitizer, split_binary):
cmake_flags.append("-DINSTALL_STRIPPED_BINARIES=ON")
result.append(f"CC={cc}")
result.append(f"CXX={cxx}")
cmake_flags.append(f"-DCMAKE_C_COMPILER={cc}")
cmake_flags.append(f"-DCMAKE_CXX_COMPILER={cxx}")
# Create combined output archive for split build and for performance tests.
if package_type == "performance":
@ -126,12 +179,14 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
result.append("COMBINED_OUTPUT=shared_build")
if sanitizer:
result.append("SANITIZER={}".format(sanitizer))
result.append(f"SANITIZER={sanitizer}")
if build_type:
result.append("BUILD_TYPE={}".format(build_type))
result.append(f"BUILD_TYPE={build_type.capitalize()}")
else:
result.append("BUILD_TYPE=None")
if cache == 'distcc':
result.append("CCACHE_PREFIX={}".format(cache))
if cache == "distcc":
result.append(f"CCACHE_PREFIX={cache}")
if cache:
result.append("CCACHE_DIR=/ccache")
@ -142,109 +197,188 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
# result.append("CCACHE_UMASK=777")
if distcc_hosts:
hosts_with_params = ["{}/24,lzo".format(host) for host in distcc_hosts] + ["localhost/`nproc`"]
result.append('DISTCC_HOSTS="{}"'.format(" ".join(hosts_with_params)))
hosts_with_params = [f"{host}/24,lzo" for host in distcc_hosts] + [
"localhost/`nproc`"
]
result.append('DISTCC_HOSTS="' + " ".join(hosts_with_params) + '"')
elif cache == "distcc":
result.append('DISTCC_HOSTS="{}"'.format("localhost/`nproc`"))
result.append('DISTCC_HOSTS="localhost/`nproc`"')
if alien_pkgs:
result.append("ALIEN_PKGS='" + ' '.join(['--' + pkg for pkg in alien_pkgs]) + "'")
if additional_pkgs:
result.append("MAKE_APK=true")
result.append("MAKE_RPM=true")
result.append("MAKE_TGZ=true")
if with_binaries == "programs":
result.append('BINARY_OUTPUT=programs')
result.append("BINARY_OUTPUT=programs")
elif with_binaries == "tests":
result.append('ENABLE_TESTS=1')
result.append('BINARY_OUTPUT=tests')
cmake_flags.append('-DENABLE_TESTS=1')
result.append("ENABLE_TESTS=1")
result.append("BINARY_OUTPUT=tests")
cmake_flags.append("-DENABLE_TESTS=1")
if split_binary:
cmake_flags.append('-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1')
cmake_flags.append(
"-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 "
"-DCLICKHOUSE_SPLIT_BINARY=1"
)
# We can't always build utils because it requires too much space, but
# we have to build them at least in some way in CI. The split build is
# probably the least heavy disk-wise.
cmake_flags.append('-DENABLE_UTILS=1')
cmake_flags.append("-DENABLE_UTILS=1")
if clang_tidy:
cmake_flags.append('-DENABLE_CLANG_TIDY=1')
cmake_flags.append('-DENABLE_UTILS=1')
cmake_flags.append('-DENABLE_TESTS=1')
cmake_flags.append('-DENABLE_EXAMPLES=1')
cmake_flags.append("-DENABLE_CLANG_TIDY=1")
cmake_flags.append("-DENABLE_UTILS=1")
cmake_flags.append("-DENABLE_TESTS=1")
cmake_flags.append("-DENABLE_EXAMPLES=1")
# Don't stop on first error to find more clang-tidy errors in one run.
result.append('NINJA_FLAGS=-k0')
result.append("NINJA_FLAGS=-k0")
if with_coverage:
cmake_flags.append('-DWITH_COVERAGE=1')
cmake_flags.append("-DWITH_COVERAGE=1")
if version:
result.append("VERSION_STRING='{}'".format(version))
result.append(f"VERSION_STRING='{version}'")
if author:
result.append("AUTHOR='{}'".format(author))
result.append(f"AUTHOR='{author}'")
if official:
cmake_flags.append('-DYANDEX_OFFICIAL_BUILD=1')
cmake_flags.append("-DYANDEX_OFFICIAL_BUILD=1")
result.append('CMAKE_FLAGS="' + ' '.join(cmake_flags) + '"')
result.append('CMAKE_FLAGS="' + " ".join(cmake_flags) + '"')
return result
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
parser = argparse.ArgumentParser(description="ClickHouse building script using prebuilt Docker image")
# 'performance' creates a combined .tgz with server and configs to be used for performance test.
parser.add_argument("--package-type", choices=['deb', 'binary', 'performance'], required=True)
parser.add_argument("--clickhouse-repo-path", default=os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir))
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description="ClickHouse building script using prebuilt Docker image",
)
# 'performance' creates a combined .tgz with server
# and configs to be used for performance test.
parser.add_argument(
"--package-type",
choices=("deb", "binary", "performance"),
required=True,
help="a build type",
)
parser.add_argument(
"--clickhouse-repo-path",
default=os.path.join(
os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir
),
help="ClickHouse git repository",
)
parser.add_argument("--output-dir", required=True)
parser.add_argument("--build-type", choices=("debug", ""), default="")
parser.add_argument("--compiler", choices=("clang-11", "clang-11-darwin", "clang-11-darwin-aarch64", "clang-11-aarch64",
"clang-12", "clang-12-darwin", "clang-12-darwin-aarch64", "clang-12-aarch64",
"clang-13", "clang-13-darwin", "clang-13-darwin-aarch64", "clang-13-aarch64", "clang-13-ppc64le",
"clang-11-freebsd", "clang-12-freebsd", "clang-13-freebsd", "gcc-11"), default="clang-13")
parser.add_argument("--sanitizer", choices=("address", "thread", "memory", "undefined", ""), default="")
parser.add_argument(
"--compiler",
choices=(
"clang-11",
"clang-11-darwin",
"clang-11-darwin-aarch64",
"clang-11-aarch64",
"clang-12",
"clang-12-darwin",
"clang-12-darwin-aarch64",
"clang-12-aarch64",
"clang-13",
"clang-13-darwin",
"clang-13-darwin-aarch64",
"clang-13-aarch64",
"clang-13-ppc64le",
"clang-11-freebsd",
"clang-12-freebsd",
"clang-13-freebsd",
"gcc-11",
),
default="clang-13",
help="a compiler to use",
)
parser.add_argument(
"--sanitizer",
choices=("address", "thread", "memory", "undefined", ""),
default="",
)
parser.add_argument("--split-binary", action="store_true")
parser.add_argument("--clang-tidy", action="store_true")
parser.add_argument("--cache", choices=("", "ccache", "distcc"), default="")
parser.add_argument("--ccache_dir", default= os.getenv("HOME", "") + '/.ccache')
parser.add_argument("--cache", choices=("ccache", "distcc", ""), default="")
parser.add_argument(
"--ccache_dir",
default=os.getenv("HOME", "") + "/.ccache",
help="a directory with ccache",
)
parser.add_argument("--distcc-hosts", nargs="+")
parser.add_argument("--force-build-image", action="store_true")
parser.add_argument("--version")
parser.add_argument("--author", default="clickhouse")
parser.add_argument("--author", default="clickhouse", help="a package author")
parser.add_argument("--official", action="store_true")
parser.add_argument("--alien-pkgs", nargs='+', default=[])
parser.add_argument("--additional-pkgs", action="store_true")
parser.add_argument("--with-coverage", action="store_true")
parser.add_argument("--with-binaries", choices=("programs", "tests", ""), default="")
parser.add_argument("--docker-image-version", default="latest")
parser.add_argument(
"--with-binaries", choices=("programs", "tests", ""), default=""
)
parser.add_argument(
"--docker-image-version", default="latest", help="docker image tag to use"
)
args = parser.parse_args()
if not os.path.isabs(args.output_dir):
args.output_dir = os.path.abspath(os.path.join(os.getcwd(), args.output_dir))
image_type = 'binary' if args.package_type == 'performance' else args.package_type
image_name = IMAGE_MAP[image_type]
image_type = "binary" if args.package_type == "performance" else args.package_type
image_name = "clickhouse/binary-builder"
if not os.path.isabs(args.clickhouse_repo_path):
ch_root = os.path.abspath(os.path.join(os.getcwd(), args.clickhouse_repo_path))
else:
ch_root = args.clickhouse_repo_path
if args.alien_pkgs and not image_type == "deb":
raise Exception("Can add alien packages only in deb build")
if args.additional_pkgs and image_type != "deb":
raise Exception("Can build additional packages only in deb build")
if args.with_binaries != "" and not image_type == "deb":
if args.with_binaries != "" and image_type != "deb":
raise Exception("Can add additional binaries only in deb build")
if args.with_binaries != "" and image_type == "deb":
logging.info("Should place {} to output".format(args.with_binaries))
logging.info("Should place %s to output", args.with_binaries)
dockerfile = os.path.join(ch_root, "docker/packager", image_type, "Dockerfile")
image_with_version = image_name + ":" + args.docker_image_version
if image_type != "freebsd" and not check_image_exists_locally(image_name) or args.force_build_image:
if (
image_type != "freebsd"
and not check_image_exists_locally(image_name)
or args.force_build_image
):
if not pull_image(image_with_version) or args.force_build_image:
build_image(image_with_version, dockerfile)
env_prepared = parse_env_variables(
args.build_type, args.compiler, args.sanitizer, args.package_type, image_type,
args.cache, args.distcc_hosts, args.split_binary, args.clang_tidy,
args.version, args.author, args.official, args.alien_pkgs, args.with_coverage, args.with_binaries)
args.build_type,
args.compiler,
args.sanitizer,
args.package_type,
image_type,
args.cache,
args.distcc_hosts,
args.split_binary,
args.clang_tidy,
args.version,
args.author,
args.official,
args.additional_pkgs,
args.with_coverage,
args.with_binaries,
)
run_docker_image_with_env(image_name, args.output_dir, env_prepared, ch_root, args.ccache_dir, args.docker_image_version)
logging.info("Output placed into {}".format(args.output_dir))
run_docker_image_with_env(
image_name,
args.output_dir,
env_prepared,
ch_root,
args.ccache_dir,
args.docker_image_version,
)
logging.info("Output placed into %s", args.output_dir)

View File

@ -11,7 +11,7 @@ def removesuffix(text, suffix):
https://www.python.org/dev/peps/pep-0616/
"""
if suffix and text.endswith(suffix):
return text[:-len(suffix)]
return text[: -len(suffix)]
else:
return text[:]

View File

@ -3,55 +3,55 @@ import subprocess
import datetime
from flask import Flask, flash, request, redirect, url_for
def run_command(command, wait=False):
print("{} - execute shell command:{}".format(datetime.datetime.now(), command))
lines = []
p = subprocess.Popen(command,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
shell=True)
p = subprocess.Popen(
command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True
)
if wait:
for l in iter(p.stdout.readline, b''):
for l in iter(p.stdout.readline, b""):
lines.append(l)
p.poll()
return (lines, p.returncode)
else:
return(iter(p.stdout.readline, b''), 0)
return (iter(p.stdout.readline, b""), 0)
UPLOAD_FOLDER = './'
ALLOWED_EXTENSIONS = {'txt', 'sh'}
UPLOAD_FOLDER = "./"
ALLOWED_EXTENSIONS = {"txt", "sh"}
app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
app.config["UPLOAD_FOLDER"] = UPLOAD_FOLDER
@app.route('/')
@app.route("/")
def hello_world():
return 'Hello World'
return "Hello World"
def allowed_file(filename):
return '.' in filename and \
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
@app.route('/upload', methods=['GET', 'POST'])
@app.route("/upload", methods=["GET", "POST"])
def upload_file():
if request.method == 'POST':
if request.method == "POST":
# check if the post request has the file part
if 'file' not in request.files:
flash('No file part')
if "file" not in request.files:
flash("No file part")
return redirect(request.url)
file = request.files['file']
file = request.files["file"]
# If the user does not select a file, the browser submits an
# empty file without a filename.
if file.filename == '':
flash('No selected file')
if file.filename == "":
flash("No selected file")
return redirect(request.url)
if file and allowed_file(file.filename):
filename = file.filename
file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
return redirect(url_for('upload_file', name=filename))
return '''
file.save(os.path.join(app.config["UPLOAD_FOLDER"], filename))
return redirect(url_for("upload_file", name=filename))
return """
<!doctype html>
<title>Upload new File</title>
<h1>Upload new File</h1>
@ -59,12 +59,15 @@ def upload_file():
<input type=file name=file>
<input type=submit value=Upload>
</form>
'''
@app.route('/run', methods=['GET', 'POST'])
"""
@app.route("/run", methods=["GET", "POST"])
def parse_request():
data = request.data # data is empty
run_command(data, wait=True)
return 'Ok'
return "Ok"
if __name__ == '__main__':
app.run(port=5011)
if __name__ == "__main__":
app.run(port=5011)

View File

@ -19,58 +19,126 @@ import xml.etree.ElementTree as et
from threading import Thread
from scipy import stats
logging.basicConfig(format='%(asctime)s: %(levelname)s: %(module)s: %(message)s', level='WARNING')
logging.basicConfig(
format="%(asctime)s: %(levelname)s: %(module)s: %(message)s", level="WARNING"
)
total_start_seconds = time.perf_counter()
stage_start_seconds = total_start_seconds
def reportStageEnd(stage):
global stage_start_seconds, total_start_seconds
current = time.perf_counter()
print(f'stage\t{stage}\t{current - stage_start_seconds:.3f}\t{current - total_start_seconds:.3f}')
print(
f"stage\t{stage}\t{current - stage_start_seconds:.3f}\t{current - total_start_seconds:.3f}"
)
stage_start_seconds = current
def tsv_escape(s):
return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','')
return (
s.replace("\\", "\\\\")
.replace("\t", "\\t")
.replace("\n", "\\n")
.replace("\r", "")
)
parser = argparse.ArgumentParser(description='Run performance test.')
parser = argparse.ArgumentParser(description="Run performance test.")
# Explicitly decode files as UTF-8 because sometimes we have Russian characters in queries, and LANG=C is set.
parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs=1, help='test description file')
parser.add_argument('--host', nargs='*', default=['localhost'], help="Space-separated list of server hostname(s). Corresponds to '--port' options.")
parser.add_argument('--port', nargs='*', default=[9000], help="Space-separated list of server port(s). Corresponds to '--host' options.")
parser.add_argument('--runs', type=int, default=1, help='Number of query runs per server.')
parser.add_argument('--max-queries', type=int, default=None, help='Test no more than this number of queries, chosen at random.')
parser.add_argument('--queries-to-run', nargs='*', type=int, default=None, help='Space-separated list of indexes of queries to test.')
parser.add_argument('--max-query-seconds', type=int, default=15, help='For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.')
parser.add_argument('--prewarm-max-query-seconds', type=int, default=180, help='For how many seconds at most a prewarm (cold storage) query is allowed to run. The script finishes with error if this time is exceeded.')
parser.add_argument('--profile-seconds', type=int, default=0, help='For how many seconds to profile a query for which the performance has changed.')
parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.')
parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.')
parser.add_argument('--print-settings', action='store_true', help='Print test settings and exit.')
parser.add_argument('--keep-created-tables', action='store_true', help="Don't drop the created tables after the test.")
parser.add_argument('--use-existing-tables', action='store_true', help="Don't create or drop the tables, use the existing ones instead.")
parser.add_argument(
"file",
metavar="FILE",
type=argparse.FileType("r", encoding="utf-8"),
nargs=1,
help="test description file",
)
parser.add_argument(
"--host",
nargs="*",
default=["localhost"],
help="Space-separated list of server hostname(s). Corresponds to '--port' options.",
)
parser.add_argument(
"--port",
nargs="*",
default=[9000],
help="Space-separated list of server port(s). Corresponds to '--host' options.",
)
parser.add_argument(
"--runs", type=int, default=1, help="Number of query runs per server."
)
parser.add_argument(
"--max-queries",
type=int,
default=None,
help="Test no more than this number of queries, chosen at random.",
)
parser.add_argument(
"--queries-to-run",
nargs="*",
type=int,
default=None,
help="Space-separated list of indexes of queries to test.",
)
parser.add_argument(
"--max-query-seconds",
type=int,
default=15,
help="For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.",
)
parser.add_argument(
"--prewarm-max-query-seconds",
type=int,
default=180,
help="For how many seconds at most a prewarm (cold storage) query is allowed to run. The script finishes with error if this time is exceeded.",
)
parser.add_argument(
"--profile-seconds",
type=int,
default=0,
help="For how many seconds to profile a query for which the performance has changed.",
)
parser.add_argument(
"--long", action="store_true", help="Do not skip the tests tagged as long."
)
parser.add_argument(
"--print-queries", action="store_true", help="Print test queries and exit."
)
parser.add_argument(
"--print-settings", action="store_true", help="Print test settings and exit."
)
parser.add_argument(
"--keep-created-tables",
action="store_true",
help="Don't drop the created tables after the test.",
)
parser.add_argument(
"--use-existing-tables",
action="store_true",
help="Don't create or drop the tables, use the existing ones instead.",
)
args = parser.parse_args()
reportStageEnd('start')
reportStageEnd("start")
test_name = os.path.splitext(os.path.basename(args.file[0].name))[0]
tree = et.parse(args.file[0])
root = tree.getroot()
reportStageEnd('parse')
reportStageEnd("parse")
# Process query parameters
subst_elems = root.findall('substitutions/substitution')
available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... }
subst_elems = root.findall("substitutions/substitution")
available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... }
for e in subst_elems:
name = e.find('name').text
values = [v.text for v in e.findall('values/value')]
name = e.find("name").text
values = [v.text for v in e.findall("values/value")]
if not values:
raise Exception(f'No values given for substitution {{{name}}}')
raise Exception(f"No values given for substitution {{{name}}}")
available_parameters[name] = values
@ -78,7 +146,7 @@ for e in subst_elems:
# parameters. The set of parameters is determined based on the first list.
# Note: keep the order of queries -- sometimes we have DROP IF EXISTS
# followed by CREATE in create queries section, so the order matters.
def substitute_parameters(query_templates, other_templates = []):
def substitute_parameters(query_templates, other_templates=[]):
query_results = []
other_results = [[]] * (len(other_templates))
for i, q in enumerate(query_templates):
@ -103,17 +171,21 @@ def substitute_parameters(query_templates, other_templates = []):
# and reporting the queries marked as short.
test_queries = []
is_short = []
for e in root.findall('query'):
new_queries, [new_is_short] = substitute_parameters([e.text], [[e.attrib.get('short', '0')]])
for e in root.findall("query"):
new_queries, [new_is_short] = substitute_parameters(
[e.text], [[e.attrib.get("short", "0")]]
)
test_queries += new_queries
is_short += [eval(s) for s in new_is_short]
assert(len(test_queries) == len(is_short))
assert len(test_queries) == len(is_short)
# If we're given a list of queries to run, check that it makes sense.
for i in args.queries_to_run or []:
if i < 0 or i >= len(test_queries):
print(f'There is no query no. {i} in this test, only [{0}-{len(test_queries) - 1}] are present')
print(
f"There is no query no. {i} in this test, only [{0}-{len(test_queries) - 1}] are present"
)
exit(1)
# If we're only asked to print the queries, do that and exit.
@ -125,60 +197,65 @@ if args.print_queries:
# Print short queries
for i, s in enumerate(is_short):
if s:
print(f'short\t{i}')
print(f"short\t{i}")
# If we're only asked to print the settings, do that and exit. These are settings
# for clickhouse-benchmark, so we print them as command line arguments, e.g.
# '--max_memory_usage=10000000'.
if args.print_settings:
for s in root.findall('settings/*'):
print(f'--{s.tag}={s.text}')
for s in root.findall("settings/*"):
print(f"--{s.tag}={s.text}")
exit(0)
# Skip long tests
if not args.long:
for tag in root.findall('.//tag'):
if tag.text == 'long':
print('skipped\tTest is tagged as long.')
for tag in root.findall(".//tag"):
if tag.text == "long":
print("skipped\tTest is tagged as long.")
sys.exit(0)
# Print report threshold for the test if it is set.
ignored_relative_change = 0.05
if 'max_ignored_relative_change' in root.attrib:
if "max_ignored_relative_change" in root.attrib:
ignored_relative_change = float(root.attrib["max_ignored_relative_change"])
print(f'report-threshold\t{ignored_relative_change}')
print(f"report-threshold\t{ignored_relative_change}")
reportStageEnd('before-connect')
reportStageEnd("before-connect")
# Open connections
servers = [{'host': host or args.host[0], 'port': port or args.port[0]} for (host, port) in itertools.zip_longest(args.host, args.port)]
servers = [
{"host": host or args.host[0], "port": port or args.port[0]}
for (host, port) in itertools.zip_longest(args.host, args.port)
]
# Force settings_is_important to fail queries on unknown settings.
all_connections = [clickhouse_driver.Client(**server, settings_is_important=True) for server in servers]
all_connections = [
clickhouse_driver.Client(**server, settings_is_important=True) for server in servers
]
for i, s in enumerate(servers):
print(f'server\t{i}\t{s["host"]}\t{s["port"]}')
reportStageEnd('connect')
reportStageEnd("connect")
if not args.use_existing_tables:
# Run drop queries, ignoring errors. Do this before all other activity,
# because clickhouse_driver disconnects on error (this is not configurable),
# and the new connection loses the changes in settings.
drop_query_templates = [q.text for q in root.findall('drop_query')]
drop_query_templates = [q.text for q in root.findall("drop_query")]
drop_queries = substitute_parameters(drop_query_templates)
for conn_index, c in enumerate(all_connections):
for q in drop_queries:
try:
c.execute(q)
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
print(f"drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}")
except:
pass
reportStageEnd('drop-1')
reportStageEnd("drop-1")
# Apply settings.
settings = root.findall('settings/*')
settings = root.findall("settings/*")
for conn_index, c in enumerate(all_connections):
for s in settings:
# requires clickhouse-driver >= 1.1.5 to accept arbitrary new settings
@ -189,48 +266,52 @@ for conn_index, c in enumerate(all_connections):
# the test, which is wrong.
c.execute("select 1")
reportStageEnd('settings')
reportStageEnd("settings")
# Check tables that should exist. If they don't exist, just skip this test.
tables = [e.text for e in root.findall('preconditions/table_exists')]
tables = [e.text for e in root.findall("preconditions/table_exists")]
for t in tables:
for c in all_connections:
try:
res = c.execute("select 1 from {} limit 1".format(t))
except:
exception_message = traceback.format_exception_only(*sys.exc_info()[:2])[-1]
skipped_message = ' '.join(exception_message.split('\n')[:2])
print(f'skipped\t{tsv_escape(skipped_message)}')
skipped_message = " ".join(exception_message.split("\n")[:2])
print(f"skipped\t{tsv_escape(skipped_message)}")
sys.exit(0)
reportStageEnd('preconditions')
reportStageEnd("preconditions")
if not args.use_existing_tables:
# Run create and fill queries. We will run them simultaneously for both
# servers, to save time. The weird XML search + filter is because we want to
# keep the relative order of elements, and etree doesn't support the
# appropriate xpath query.
create_query_templates = [q.text for q in root.findall('./*')
if q.tag in ('create_query', 'fill_query')]
create_query_templates = [
q.text for q in root.findall("./*") if q.tag in ("create_query", "fill_query")
]
create_queries = substitute_parameters(create_query_templates)
# Disallow temporary tables, because the clickhouse_driver reconnects on
# errors, and temporary tables are destroyed. We want to be able to continue
# after some errors.
for q in create_queries:
if re.search('create temporary table', q, flags=re.IGNORECASE):
print(f"Temporary tables are not allowed in performance tests: '{q}'",
file = sys.stderr)
if re.search("create temporary table", q, flags=re.IGNORECASE):
print(
f"Temporary tables are not allowed in performance tests: '{q}'",
file=sys.stderr,
)
sys.exit(1)
def do_create(connection, index, queries):
for q in queries:
connection.execute(q)
print(f'create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}')
print(f"create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}")
threads = [
Thread(target = do_create, args = (connection, index, create_queries))
for index, connection in enumerate(all_connections)]
Thread(target=do_create, args=(connection, index, create_queries))
for index, connection in enumerate(all_connections)
]
for t in threads:
t.start()
@ -238,14 +319,16 @@ if not args.use_existing_tables:
for t in threads:
t.join()
reportStageEnd('create')
reportStageEnd("create")
# By default, test all queries.
queries_to_run = range(0, len(test_queries))
if args.max_queries:
# If specified, test a limited number of queries chosen at random.
queries_to_run = random.sample(range(0, len(test_queries)), min(len(test_queries), args.max_queries))
queries_to_run = random.sample(
range(0, len(test_queries)), min(len(test_queries), args.max_queries)
)
if args.queries_to_run:
# Run the specified queries.
@ -255,16 +338,16 @@ if args.queries_to_run:
profile_total_seconds = 0
for query_index in queries_to_run:
q = test_queries[query_index]
query_prefix = f'{test_name}.query{query_index}'
query_prefix = f"{test_name}.query{query_index}"
# We have some crazy long queries (about 100kB), so trim them to a sane
# length. This means we can't use query text as an identifier and have to
# use the test name + the test-wide query index.
query_display_name = q
if len(query_display_name) > 1000:
query_display_name = f'{query_display_name[:1000]}...({query_index})'
query_display_name = f"{query_display_name[:1000]}...({query_index})"
print(f'display-name\t{query_index}\t{tsv_escape(query_display_name)}')
print(f"display-name\t{query_index}\t{tsv_escape(query_display_name)}")
# Prewarm: run once on both servers. Helps to bring the data into memory,
# precompile the queries, etc.
@ -272,10 +355,10 @@ for query_index in queries_to_run:
# new one. We want to run them on the new server only, so that the PR author
# can ensure that the test works properly. Remember the errors we had on
# each server.
query_error_on_connection = [None] * len(all_connections);
query_error_on_connection = [None] * len(all_connections)
for conn_index, c in enumerate(all_connections):
try:
prewarm_id = f'{query_prefix}.prewarm0'
prewarm_id = f"{query_prefix}.prewarm0"
try:
# During the warmup runs, we will also:
@ -283,25 +366,30 @@ for query_index in queries_to_run:
# * collect profiler traces, which might be helpful for analyzing
# test coverage. We disable profiler for normal runs because
# it makes the results unstable.
res = c.execute(q, query_id = prewarm_id,
settings = {
'max_execution_time': args.prewarm_max_query_seconds,
'query_profiler_real_time_period_ns': 10000000,
'memory_profiler_step': '4Mi',
})
res = c.execute(
q,
query_id=prewarm_id,
settings={
"max_execution_time": args.prewarm_max_query_seconds,
"query_profiler_real_time_period_ns": 10000000,
"memory_profiler_step": "4Mi",
},
)
except clickhouse_driver.errors.Error as e:
# Add query id to the exception to make debugging easier.
e.args = (prewarm_id, *e.args)
e.message = prewarm_id + ': ' + e.message
e.message = prewarm_id + ": " + e.message
raise
print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}')
print(
f"prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}"
)
except KeyboardInterrupt:
raise
except:
# FIXME the driver reconnects on error and we lose settings, so this
# might lead to further errors or unexpected behavior.
query_error_on_connection[conn_index] = traceback.format_exc();
query_error_on_connection[conn_index] = traceback.format_exc()
continue
# Report all errors that ocurred during prewarm and decide what to do next.
@ -311,14 +399,14 @@ for query_index in queries_to_run:
no_errors = []
for i, e in enumerate(query_error_on_connection):
if e:
print(e, file = sys.stderr)
print(e, file=sys.stderr)
else:
no_errors.append(i)
if len(no_errors) == 0:
continue
elif len(no_errors) < len(all_connections):
print(f'partial\t{query_index}\t{no_errors}')
print(f"partial\t{query_index}\t{no_errors}")
this_query_connections = [all_connections[index] for index in no_errors]
@ -337,27 +425,34 @@ for query_index in queries_to_run:
all_server_times.append([])
while True:
run_id = f'{query_prefix}.run{run}'
run_id = f"{query_prefix}.run{run}"
for conn_index, c in enumerate(this_query_connections):
try:
res = c.execute(q, query_id = run_id, settings = {'max_execution_time': args.max_query_seconds})
res = c.execute(
q,
query_id=run_id,
settings={"max_execution_time": args.max_query_seconds},
)
except clickhouse_driver.errors.Error as e:
# Add query id to the exception to make debugging easier.
e.args = (run_id, *e.args)
e.message = run_id + ': ' + e.message
e.message = run_id + ": " + e.message
raise
elapsed = c.last_query.elapsed
all_server_times[conn_index].append(elapsed)
server_seconds += elapsed
print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}')
print(f"query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}")
if elapsed > args.max_query_seconds:
# Do not stop processing pathologically slow queries,
# since this may hide errors in other queries.
print(f'The query no. {query_index} is taking too long to run ({elapsed} s)', file=sys.stderr)
print(
f"The query no. {query_index} is taking too long to run ({elapsed} s)",
file=sys.stderr,
)
# Be careful with the counter, after this line it's the next iteration
# already.
@ -386,7 +481,7 @@ for query_index in queries_to_run:
break
client_seconds = time.perf_counter() - start_seconds
print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}')
print(f"client-time\t{query_index}\t{client_seconds}\t{server_seconds}")
# Run additional profiling queries to collect profile data, but only if test times appeared to be different.
# We have to do it after normal runs because otherwise it will affect test statistics too much
@ -397,13 +492,15 @@ for query_index in queries_to_run:
# Don't fail if for some reason there are not enough measurements.
continue
pvalue = stats.ttest_ind(all_server_times[0], all_server_times[1], equal_var = False).pvalue
pvalue = stats.ttest_ind(
all_server_times[0], all_server_times[1], equal_var=False
).pvalue
median = [statistics.median(t) for t in all_server_times]
# Keep this consistent with the value used in report. Should eventually move
# to (median[1] - median[0]) / min(median), which is compatible with "times"
# difference we use in report (max(median) / min(median)).
relative_diff = (median[1] - median[0]) / median[0]
print(f'diff\t{query_index}\t{median[0]}\t{median[1]}\t{relative_diff}\t{pvalue}')
print(f"diff\t{query_index}\t{median[0]}\t{median[1]}\t{relative_diff}\t{pvalue}")
if abs(relative_diff) < ignored_relative_change or pvalue > 0.05:
continue
@ -412,25 +509,31 @@ for query_index in queries_to_run:
profile_start_seconds = time.perf_counter()
run = 0
while time.perf_counter() - profile_start_seconds < args.profile_seconds:
run_id = f'{query_prefix}.profile{run}'
run_id = f"{query_prefix}.profile{run}"
for conn_index, c in enumerate(this_query_connections):
try:
res = c.execute(q, query_id = run_id, settings = {'query_profiler_real_time_period_ns': 10000000})
print(f'profile\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}')
res = c.execute(
q,
query_id=run_id,
settings={"query_profiler_real_time_period_ns": 10000000},
)
print(
f"profile\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}"
)
except clickhouse_driver.errors.Error as e:
# Add query id to the exception to make debugging easier.
e.args = (run_id, *e.args)
e.message = run_id + ': ' + e.message
e.message = run_id + ": " + e.message
raise
run += 1
profile_total_seconds += time.perf_counter() - profile_start_seconds
print(f'profile-total\t{profile_total_seconds}')
print(f"profile-total\t{profile_total_seconds}")
reportStageEnd('run')
reportStageEnd("run")
# Run drop queries
if not args.keep_created_tables and not args.use_existing_tables:
@ -438,6 +541,6 @@ if not args.keep_created_tables and not args.use_existing_tables:
for conn_index, c in enumerate(all_connections):
for q in drop_queries:
c.execute(q)
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
print(f"drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}")
reportStageEnd('drop-2')
reportStageEnd("drop-2")

View File

@ -12,9 +12,13 @@ import pprint
import sys
import traceback
parser = argparse.ArgumentParser(description='Create performance test report')
parser.add_argument('--report', default='main', choices=['main', 'all-queries'],
help='Which report to build')
parser = argparse.ArgumentParser(description="Create performance test report")
parser.add_argument(
"--report",
default="main",
choices=["main", "all-queries"],
help="Which report to build",
)
args = parser.parse_args()
tables = []
@ -31,8 +35,8 @@ unstable_partial_queries = 0
# max seconds to run one query by itself, not counting preparation
allowed_single_run_time = 2
color_bad='#ffb0c0'
color_good='#b0d050'
color_bad = "#ffb0c0"
color_good = "#b0d050"
header_template = """
<!DOCTYPE html>
@ -151,24 +155,29 @@ tr:nth-child(odd) td {{filter: brightness(90%);}}
table_anchor = 0
row_anchor = 0
def currentTableAnchor():
global table_anchor
return f'{table_anchor}'
return f"{table_anchor}"
def newTableAnchor():
global table_anchor
table_anchor += 1
return currentTableAnchor()
def currentRowAnchor():
global row_anchor
global table_anchor
return f'{table_anchor}.{row_anchor}'
return f"{table_anchor}.{row_anchor}"
def nextRowAnchor():
global row_anchor
global table_anchor
return f'{table_anchor}.{row_anchor + 1}'
return f"{table_anchor}.{row_anchor + 1}"
def advanceRowAnchor():
global row_anchor
@ -178,43 +187,58 @@ def advanceRowAnchor():
def tr(x, anchor=None):
#return '<tr onclick="location.href=\'#{a}\'" id={a}>{x}</tr>'.format(a=a, x=str(x))
# return '<tr onclick="location.href=\'#{a}\'" id={a}>{x}</tr>'.format(a=a, x=str(x))
anchor = anchor if anchor else advanceRowAnchor()
return f'<tr id={anchor}>{x}</tr>'
return f"<tr id={anchor}>{x}</tr>"
def td(value, cell_attributes = ''):
return '<td {cell_attributes}>{value}</td>'.format(
cell_attributes = cell_attributes,
value = value)
def th(value, cell_attributes = ''):
return '<th {cell_attributes}>{value}</th>'.format(
cell_attributes = cell_attributes,
value = value)
def td(value, cell_attributes=""):
return "<td {cell_attributes}>{value}</td>".format(
cell_attributes=cell_attributes, value=value
)
def tableRow(cell_values, cell_attributes = [], anchor=None):
def th(value, cell_attributes=""):
return "<th {cell_attributes}>{value}</th>".format(
cell_attributes=cell_attributes, value=value
)
def tableRow(cell_values, cell_attributes=[], anchor=None):
return tr(
''.join([td(v, a)
for v, a in itertools.zip_longest(
cell_values, cell_attributes,
fillvalue = '')
if a is not None and v is not None]),
anchor)
"".join(
[
td(v, a)
for v, a in itertools.zip_longest(
cell_values, cell_attributes, fillvalue=""
)
if a is not None and v is not None
]
),
anchor,
)
def tableHeader(cell_values, cell_attributes = []):
def tableHeader(cell_values, cell_attributes=[]):
return tr(
''.join([th(v, a)
for v, a in itertools.zip_longest(
cell_values, cell_attributes,
fillvalue = '')
if a is not None and v is not None]))
"".join(
[
th(v, a)
for v, a in itertools.zip_longest(
cell_values, cell_attributes, fillvalue=""
)
if a is not None and v is not None
]
)
)
def tableStart(title):
cls = '-'.join(title.lower().split(' ')[:3]);
cls = "-".join(title.lower().split(" ")[:3])
global table_anchor
table_anchor = cls
anchor = currentTableAnchor()
help_anchor = '-'.join(title.lower().split(' '));
help_anchor = "-".join(title.lower().split(" "))
return f"""
<h2 id="{anchor}">
<a class="cancela" href="#{anchor}">{title}</a>
@ -223,12 +247,14 @@ def tableStart(title):
<table class="{cls}">
"""
def tableEnd():
return '</table>'
return "</table>"
def tsvRows(n):
try:
with open(n, encoding='utf-8') as fd:
with open(n, encoding="utf-8") as fd:
result = []
for row in csv.reader(fd, delimiter="\t", quoting=csv.QUOTE_NONE):
new_row = []
@ -237,27 +263,32 @@ def tsvRows(n):
# The second one (encode('latin1').decode('utf-8')) fixes the changes with unicode vs utf-8 chars, so
# 'Чем зÐ<C2B7>нимаеÑ<C2B5>ЬÑ<C2AC>Ñ<EFBFBD>' is transformed back into 'Чем зАнимаешЬся'.
new_row.append(e.encode('utf-8').decode('unicode-escape').encode('latin1').decode('utf-8'))
new_row.append(
e.encode("utf-8")
.decode("unicode-escape")
.encode("latin1")
.decode("utf-8")
)
result.append(new_row)
return result
except:
report_errors.append(
traceback.format_exception_only(
*sys.exc_info()[:2])[-1])
report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1])
pass
return []
def htmlRows(n):
rawRows = tsvRows(n)
result = ''
result = ""
for row in rawRows:
result += tableRow(row)
return result
def addSimpleTable(caption, columns, rows, pos=None):
global tables
text = ''
text = ""
if not rows:
return
@ -268,51 +299,63 @@ def addSimpleTable(caption, columns, rows, pos=None):
text += tableEnd()
tables.insert(pos if pos else len(tables), text)
def add_tested_commits():
global report_errors
try:
addSimpleTable('Tested Commits', ['Old', 'New'],
[['<pre>{}</pre>'.format(x) for x in
[open('left-commit.txt').read(),
open('right-commit.txt').read()]]])
addSimpleTable(
"Tested Commits",
["Old", "New"],
[
[
"<pre>{}</pre>".format(x)
for x in [
open("left-commit.txt").read(),
open("right-commit.txt").read(),
]
]
],
)
except:
# Don't fail if no commit info -- maybe it's a manual run.
report_errors.append(
traceback.format_exception_only(
*sys.exc_info()[:2])[-1])
report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1])
pass
def add_report_errors():
global tables
global report_errors
# Add the errors reported by various steps of comparison script
try:
report_errors += [l.strip() for l in open('report/errors.log')]
report_errors += [l.strip() for l in open("report/errors.log")]
except:
report_errors.append(
traceback.format_exception_only(
*sys.exc_info()[:2])[-1])
report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1])
pass
if not report_errors:
return
text = tableStart('Errors while Building the Report')
text += tableHeader(['Error'])
text = tableStart("Errors while Building the Report")
text += tableHeader(["Error"])
for x in report_errors:
text += tableRow([x])
text += tableEnd()
# Insert after Tested Commits
tables.insert(1, text)
errors_explained.append([f'<a href="#{currentTableAnchor()}">There were some errors while building the report</a>']);
errors_explained.append(
[
f'<a href="#{currentTableAnchor()}">There were some errors while building the report</a>'
]
)
def add_errors_explained():
if not errors_explained:
return
text = '<a name="fail1"/>'
text += tableStart('Error Summary')
text += tableHeader(['Description'])
text += tableStart("Error Summary")
text += tableHeader(["Description"])
for row in errors_explained:
text += tableRow(row)
text += tableEnd()
@ -321,59 +364,81 @@ def add_errors_explained():
tables.insert(1, text)
if args.report == 'main':
if args.report == "main":
print((header_template.format()))
add_tested_commits()
run_error_rows = tsvRows('run-errors.tsv')
run_error_rows = tsvRows("run-errors.tsv")
error_tests += len(run_error_rows)
addSimpleTable('Run Errors', ['Test', 'Error'], run_error_rows)
addSimpleTable("Run Errors", ["Test", "Error"], run_error_rows)
if run_error_rows:
errors_explained.append([f'<a href="#{currentTableAnchor()}">There were some errors while running the tests</a>']);
errors_explained.append(
[
f'<a href="#{currentTableAnchor()}">There were some errors while running the tests</a>'
]
)
slow_on_client_rows = tsvRows('report/slow-on-client.tsv')
slow_on_client_rows = tsvRows("report/slow-on-client.tsv")
error_tests += len(slow_on_client_rows)
addSimpleTable('Slow on Client',
['Client time,&nbsp;s', 'Server time,&nbsp;s', 'Ratio', 'Test', 'Query'],
slow_on_client_rows)
addSimpleTable(
"Slow on Client",
["Client time,&nbsp;s", "Server time,&nbsp;s", "Ratio", "Test", "Query"],
slow_on_client_rows,
)
if slow_on_client_rows:
errors_explained.append([f'<a href="#{currentTableAnchor()}">Some queries are taking noticeable time client-side (missing `FORMAT Null`?)</a>']);
errors_explained.append(
[
f'<a href="#{currentTableAnchor()}">Some queries are taking noticeable time client-side (missing `FORMAT Null`?)</a>'
]
)
unmarked_short_rows = tsvRows('report/unexpected-query-duration.tsv')
unmarked_short_rows = tsvRows("report/unexpected-query-duration.tsv")
error_tests += len(unmarked_short_rows)
addSimpleTable('Unexpected Query Duration',
['Problem', 'Marked as "short"?', 'Run time, s', 'Test', '#', 'Query'],
unmarked_short_rows)
addSimpleTable(
"Unexpected Query Duration",
["Problem", 'Marked as "short"?', "Run time, s", "Test", "#", "Query"],
unmarked_short_rows,
)
if unmarked_short_rows:
errors_explained.append([f'<a href="#{currentTableAnchor()}">Some queries have unexpected duration</a>']);
errors_explained.append(
[
f'<a href="#{currentTableAnchor()}">Some queries have unexpected duration</a>'
]
)
def add_partial():
rows = tsvRows('report/partial-queries-report.tsv')
rows = tsvRows("report/partial-queries-report.tsv")
if not rows:
return
global unstable_partial_queries, slow_average_tests, tables
text = tableStart('Partial Queries')
columns = ['Median time, s', 'Relative time variance', 'Test', '#', 'Query']
text = tableStart("Partial Queries")
columns = ["Median time, s", "Relative time variance", "Test", "#", "Query"]
text += tableHeader(columns)
attrs = ['' for c in columns]
attrs = ["" for c in columns]
for row in rows:
anchor = f'{currentTableAnchor()}.{row[2]}.{row[3]}'
anchor = f"{currentTableAnchor()}.{row[2]}.{row[3]}"
if float(row[1]) > 0.10:
attrs[1] = f'style="background: {color_bad}"'
unstable_partial_queries += 1
errors_explained.append([f'<a href="#{anchor}">The query no. {row[3]} of test \'{row[2]}\' has excessive variance of run time. Keep it below 10%</a>'])
errors_explained.append(
[
f"<a href=\"#{anchor}\">The query no. {row[3]} of test '{row[2]}' has excessive variance of run time. Keep it below 10%</a>"
]
)
else:
attrs[1] = ''
attrs[1] = ""
if float(row[0]) > allowed_single_run_time:
attrs[0] = f'style="background: {color_bad}"'
errors_explained.append([f'<a href="#{anchor}">The query no. {row[3]} of test \'{row[2]}\' is taking too long to run. Keep the run time below {allowed_single_run_time} seconds"</a>'])
errors_explained.append(
[
f'<a href="#{anchor}">The query no. {row[3]} of test \'{row[2]}\' is taking too long to run. Keep the run time below {allowed_single_run_time} seconds"</a>'
]
)
slow_average_tests += 1
else:
attrs[0] = ''
attrs[0] = ""
text += tableRow(row, attrs, anchor)
text += tableEnd()
tables.append(text)
@ -381,41 +446,45 @@ if args.report == 'main':
add_partial()
def add_changes():
rows = tsvRows('report/changed-perf.tsv')
rows = tsvRows("report/changed-perf.tsv")
if not rows:
return
global faster_queries, slower_queries, tables
text = tableStart('Changes in Performance')
text = tableStart("Changes in Performance")
columns = [
'Old,&nbsp;s', # 0
'New,&nbsp;s', # 1
'Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)', # 2
'Relative difference (new&nbsp;&minus;&nbsp;old) / old', # 3
'p&nbsp;<&nbsp;0.01 threshold', # 4
'', # Failed # 5
'Test', # 6
'#', # 7
'Query', # 8
]
attrs = ['' for c in columns]
"Old,&nbsp;s", # 0
"New,&nbsp;s", # 1
"Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)", # 2
"Relative difference (new&nbsp;&minus;&nbsp;old) / old", # 3
"p&nbsp;<&nbsp;0.01 threshold", # 4
"", # Failed # 5
"Test", # 6
"#", # 7
"Query", # 8
]
attrs = ["" for c in columns]
attrs[5] = None
text += tableHeader(columns, attrs)
for row in rows:
anchor = f'{currentTableAnchor()}.{row[6]}.{row[7]}'
anchor = f"{currentTableAnchor()}.{row[6]}.{row[7]}"
if int(row[5]):
if float(row[3]) < 0.:
if float(row[3]) < 0.0:
faster_queries += 1
attrs[2] = attrs[3] = f'style="background: {color_good}"'
else:
slower_queries += 1
attrs[2] = attrs[3] = f'style="background: {color_bad}"'
errors_explained.append([f'<a href="#{anchor}">The query no. {row[7]} of test \'{row[6]}\' has slowed down</a>'])
errors_explained.append(
[
f"<a href=\"#{anchor}\">The query no. {row[7]} of test '{row[6]}' has slowed down</a>"
]
)
else:
attrs[2] = attrs[3] = ''
attrs[2] = attrs[3] = ""
text += tableRow(row, attrs, anchor)
@ -427,35 +496,35 @@ if args.report == 'main':
def add_unstable_queries():
global unstable_queries, very_unstable_queries, tables
unstable_rows = tsvRows('report/unstable-queries.tsv')
unstable_rows = tsvRows("report/unstable-queries.tsv")
if not unstable_rows:
return
unstable_queries += len(unstable_rows)
columns = [
'Old,&nbsp;s', #0
'New,&nbsp;s', #1
'Relative difference (new&nbsp;-&nbsp;old)/old', #2
'p&nbsp;&lt;&nbsp;0.01 threshold', #3
'', # Failed #4
'Test', #5
'#', #6
'Query' #7
"Old,&nbsp;s", # 0
"New,&nbsp;s", # 1
"Relative difference (new&nbsp;-&nbsp;old)/old", # 2
"p&nbsp;&lt;&nbsp;0.01 threshold", # 3
"", # Failed #4
"Test", # 5
"#", # 6
"Query", # 7
]
attrs = ['' for c in columns]
attrs = ["" for c in columns]
attrs[4] = None
text = tableStart('Unstable Queries')
text = tableStart("Unstable Queries")
text += tableHeader(columns, attrs)
for r in unstable_rows:
anchor = f'{currentTableAnchor()}.{r[5]}.{r[6]}'
anchor = f"{currentTableAnchor()}.{r[5]}.{r[6]}"
if int(r[4]):
very_unstable_queries += 1
attrs[3] = f'style="background: {color_bad}"'
else:
attrs[3] = ''
attrs[3] = ""
# Just don't add the slightly unstable queries we don't consider
# errors. It's not clear what the user should do with them.
continue
@ -470,53 +539,70 @@ if args.report == 'main':
add_unstable_queries()
skipped_tests_rows = tsvRows('analyze/skipped-tests.tsv')
addSimpleTable('Skipped Tests', ['Test', 'Reason'], skipped_tests_rows)
skipped_tests_rows = tsvRows("analyze/skipped-tests.tsv")
addSimpleTable("Skipped Tests", ["Test", "Reason"], skipped_tests_rows)
addSimpleTable('Test Performance Changes',
['Test', 'Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)', 'Queries', 'Total not OK', 'Changed perf', 'Unstable'],
tsvRows('report/test-perf-changes.tsv'))
addSimpleTable(
"Test Performance Changes",
[
"Test",
"Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)",
"Queries",
"Total not OK",
"Changed perf",
"Unstable",
],
tsvRows("report/test-perf-changes.tsv"),
)
def add_test_times():
global slow_average_tests, tables
rows = tsvRows('report/test-times.tsv')
rows = tsvRows("report/test-times.tsv")
if not rows:
return
columns = [
'Test', #0
'Wall clock time, entire test,&nbsp;s', #1
'Total client time for measured query runs,&nbsp;s', #2
'Queries', #3
'Longest query, total for measured runs,&nbsp;s', #4
'Wall clock time per query,&nbsp;s', #5
'Shortest query, total for measured runs,&nbsp;s', #6
'', # Runs #7
]
attrs = ['' for c in columns]
"Test", # 0
"Wall clock time, entire test,&nbsp;s", # 1
"Total client time for measured query runs,&nbsp;s", # 2
"Queries", # 3
"Longest query, total for measured runs,&nbsp;s", # 4
"Wall clock time per query,&nbsp;s", # 5
"Shortest query, total for measured runs,&nbsp;s", # 6
"", # Runs #7
]
attrs = ["" for c in columns]
attrs[7] = None
text = tableStart('Test Times')
text = tableStart("Test Times")
text += tableHeader(columns, attrs)
allowed_average_run_time = 3.75 # 60 seconds per test at (7 + 1) * 2 runs
allowed_average_run_time = 3.75 # 60 seconds per test at (7 + 1) * 2 runs
for r in rows:
anchor = f'{currentTableAnchor()}.{r[0]}'
anchor = f"{currentTableAnchor()}.{r[0]}"
total_runs = (int(r[7]) + 1) * 2 # one prewarm run, two servers
if r[0] != 'Total' and float(r[5]) > allowed_average_run_time * total_runs:
if r[0] != "Total" and float(r[5]) > allowed_average_run_time * total_runs:
# FIXME should be 15s max -- investigate parallel_insert
slow_average_tests += 1
attrs[5] = f'style="background: {color_bad}"'
errors_explained.append([f'<a href="#{anchor}">The test \'{r[0]}\' is too slow to run as a whole. Investigate whether the create and fill queries can be sped up'])
errors_explained.append(
[
f"<a href=\"#{anchor}\">The test '{r[0]}' is too slow to run as a whole. Investigate whether the create and fill queries can be sped up"
]
)
else:
attrs[5] = ''
attrs[5] = ""
if r[0] != 'Total' and float(r[4]) > allowed_single_run_time * total_runs:
if r[0] != "Total" and float(r[4]) > allowed_single_run_time * total_runs:
slow_average_tests += 1
attrs[4] = f'style="background: {color_bad}"'
errors_explained.append([f'<a href="./all-queries.html#all-query-times.{r[0]}.0">Some query of the test \'{r[0]}\' is too slow to run. See the all queries report'])
errors_explained.append(
[
f"<a href=\"./all-queries.html#all-query-times.{r[0]}.0\">Some query of the test '{r[0]}' is too slow to run. See the all queries report"
]
)
else:
attrs[4] = ''
attrs[4] = ""
text += tableRow(r, attrs, anchor)
@ -525,10 +611,17 @@ if args.report == 'main':
add_test_times()
addSimpleTable('Metric Changes',
['Metric', 'Old median value', 'New median value',
'Relative difference', 'Times difference'],
tsvRows('metrics/changes.tsv'))
addSimpleTable(
"Metric Changes",
[
"Metric",
"Old median value",
"New median value",
"Relative difference",
"Times difference",
],
tsvRows("metrics/changes.tsv"),
)
add_report_errors()
add_errors_explained()
@ -536,7 +629,8 @@ if args.report == 'main':
for t in tables:
print(t)
print(f"""
print(
f"""
</div>
<p class="links">
<a href="all-queries.html">All queries</a>
@ -546,104 +640,111 @@ if args.report == 'main':
</p>
</body>
</html>
""")
"""
)
status = 'success'
message = 'See the report'
status = "success"
message = "See the report"
message_array = []
if slow_average_tests:
status = 'failure'
message_array.append(str(slow_average_tests) + ' too long')
status = "failure"
message_array.append(str(slow_average_tests) + " too long")
if faster_queries:
message_array.append(str(faster_queries) + ' faster')
message_array.append(str(faster_queries) + " faster")
if slower_queries:
if slower_queries > 3:
status = 'failure'
message_array.append(str(slower_queries) + ' slower')
status = "failure"
message_array.append(str(slower_queries) + " slower")
if unstable_partial_queries:
very_unstable_queries += unstable_partial_queries
status = 'failure'
status = "failure"
# Don't show mildly unstable queries, only the very unstable ones we
# treat as errors.
if very_unstable_queries:
if very_unstable_queries > 5:
error_tests += very_unstable_queries
status = 'failure'
message_array.append(str(very_unstable_queries) + ' unstable')
status = "failure"
message_array.append(str(very_unstable_queries) + " unstable")
error_tests += slow_average_tests
if error_tests:
status = 'failure'
message_array.insert(0, str(error_tests) + ' errors')
status = "failure"
message_array.insert(0, str(error_tests) + " errors")
if message_array:
message = ', '.join(message_array)
message = ", ".join(message_array)
if report_errors:
status = 'failure'
message = 'Errors while building the report.'
status = "failure"
message = "Errors while building the report."
print(("""
print(
(
"""
<!--status: {status}-->
<!--message: {message}-->
""".format(status=status, message=message)))
""".format(
status=status, message=message
)
)
)
elif args.report == 'all-queries':
elif args.report == "all-queries":
print((header_template.format()))
add_tested_commits()
def add_all_queries():
rows = tsvRows('report/all-queries.tsv')
rows = tsvRows("report/all-queries.tsv")
if not rows:
return
columns = [
'', # Changed #0
'', # Unstable #1
'Old,&nbsp;s', #2
'New,&nbsp;s', #3
'Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)', #4
'Relative difference (new&nbsp;&minus;&nbsp;old) / old', #5
'p&nbsp;&lt;&nbsp;0.01 threshold', #6
'Test', #7
'#', #8
'Query', #9
]
attrs = ['' for c in columns]
"", # Changed #0
"", # Unstable #1
"Old,&nbsp;s", # 2
"New,&nbsp;s", # 3
"Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)", # 4
"Relative difference (new&nbsp;&minus;&nbsp;old) / old", # 5
"p&nbsp;&lt;&nbsp;0.01 threshold", # 6
"Test", # 7
"#", # 8
"Query", # 9
]
attrs = ["" for c in columns]
attrs[0] = None
attrs[1] = None
text = tableStart('All Query Times')
text = tableStart("All Query Times")
text += tableHeader(columns, attrs)
for r in rows:
anchor = f'{currentTableAnchor()}.{r[7]}.{r[8]}'
anchor = f"{currentTableAnchor()}.{r[7]}.{r[8]}"
if int(r[1]):
attrs[6] = f'style="background: {color_bad}"'
else:
attrs[6] = ''
attrs[6] = ""
if int(r[0]):
if float(r[5]) > 0.:
if float(r[5]) > 0.0:
attrs[4] = attrs[5] = f'style="background: {color_bad}"'
else:
attrs[4] = attrs[5] = f'style="background: {color_good}"'
else:
attrs[4] = attrs[5] = ''
attrs[4] = attrs[5] = ""
if (float(r[2]) + float(r[3])) / 2 > allowed_single_run_time:
attrs[2] = f'style="background: {color_bad}"'
attrs[3] = f'style="background: {color_bad}"'
else:
attrs[2] = ''
attrs[3] = ''
attrs[2] = ""
attrs[3] = ""
text += tableRow(r, attrs, anchor)
@ -655,7 +756,8 @@ elif args.report == 'all-queries':
for t in tables:
print(t)
print(f"""
print(
f"""
</div>
<p class="links">
<a href="report.html">Main report</a>
@ -665,4 +767,5 @@ elif args.report == 'all-queries':
</p>
</body>
</html>
""")
"""
)

View File

@ -7,18 +7,19 @@ import csv
RESULT_LOG_NAME = "run.log"
def process_result(result_folder):
status = "success"
description = 'Server started and responded'
description = "Server started and responded"
summary = [("Smoke test", "OK")]
with open(os.path.join(result_folder, RESULT_LOG_NAME), 'r') as run_log:
lines = run_log.read().split('\n')
if not lines or lines[0].strip() != 'OK':
with open(os.path.join(result_folder, RESULT_LOG_NAME), "r") as run_log:
lines = run_log.read().split("\n")
if not lines or lines[0].strip() != "OK":
status = "failure"
logging.info("Lines is not ok: %s", str('\n'.join(lines)))
logging.info("Lines is not ok: %s", str("\n".join(lines)))
summary = [("Smoke test", "FAIL")]
description = 'Server failed to respond, see result in logs'
description = "Server failed to respond, see result in logs"
result_logs = []
server_log_path = os.path.join(result_folder, "clickhouse-server.log")
@ -38,20 +39,22 @@ def process_result(result_folder):
def write_results(results_file, status_file, results, status):
with open(results_file, 'w') as f:
out = csv.writer(f, delimiter='\t')
with open(results_file, "w") as f:
out = csv.writer(f, delimiter="\t")
out.writerows(results)
with open(status_file, 'w') as f:
out = csv.writer(f, delimiter='\t')
with open(status_file, "w") as f:
out = csv.writer(f, delimiter="\t")
out.writerow(status)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of split build smoke test")
parser.add_argument("--in-results-dir", default='/test_output/')
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv')
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv')
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
parser = argparse.ArgumentParser(
description="ClickHouse script for parsing results of split build smoke test"
)
parser.add_argument("--in-results-dir", default="/test_output/")
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
args = parser.parse_args()
state, description, test_results, logs = process_result(args.in_results_dir)

View File

@ -10,11 +10,18 @@ def process_result(result_folder):
status = "success"
summary = []
paths = []
tests = ["TLPWhere", "TLPGroupBy", "TLPHaving", "TLPWhereGroupBy", "TLPDistinct", "TLPAggregate"]
tests = [
"TLPWhere",
"TLPGroupBy",
"TLPHaving",
"TLPWhereGroupBy",
"TLPDistinct",
"TLPAggregate",
]
for test in tests:
err_path = '{}/{}.err'.format(result_folder, test)
out_path = '{}/{}.out'.format(result_folder, test)
err_path = "{}/{}.err".format(result_folder, test)
out_path = "{}/{}.out".format(result_folder, test)
if not os.path.exists(err_path):
logging.info("No output err on path %s", err_path)
summary.append((test, "SKIPPED"))
@ -23,24 +30,24 @@ def process_result(result_folder):
else:
paths.append(err_path)
paths.append(out_path)
with open(err_path, 'r') as f:
if 'AssertionError' in f.read():
with open(err_path, "r") as f:
if "AssertionError" in f.read():
summary.append((test, "FAIL"))
status = 'failure'
status = "failure"
else:
summary.append((test, "OK"))
logs_path = '{}/logs.tar.gz'.format(result_folder)
logs_path = "{}/logs.tar.gz".format(result_folder)
if not os.path.exists(logs_path):
logging.info("No logs tar on path %s", logs_path)
else:
paths.append(logs_path)
stdout_path = '{}/stdout.log'.format(result_folder)
stdout_path = "{}/stdout.log".format(result_folder)
if not os.path.exists(stdout_path):
logging.info("No stdout log on path %s", stdout_path)
else:
paths.append(stdout_path)
stderr_path = '{}/stderr.log'.format(result_folder)
stderr_path = "{}/stderr.log".format(result_folder)
if not os.path.exists(stderr_path):
logging.info("No stderr log on path %s", stderr_path)
else:
@ -52,20 +59,22 @@ def process_result(result_folder):
def write_results(results_file, status_file, results, status):
with open(results_file, 'w') as f:
out = csv.writer(f, delimiter='\t')
with open(results_file, "w") as f:
out = csv.writer(f, delimiter="\t")
out.writerows(results)
with open(status_file, 'w') as f:
out = csv.writer(f, delimiter='\t')
with open(status_file, "w") as f:
out = csv.writer(f, delimiter="\t")
out.writerow(status)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of sqlancer test")
parser.add_argument("--in-results-dir", default='/test_output/')
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv')
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv')
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
parser = argparse.ArgumentParser(
description="ClickHouse script for parsing results of sqlancer test"
)
parser.add_argument("--in-results-dir", default="/test_output/")
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
args = parser.parse_args()
state, description, test_results, logs = process_result(args.in_results_dir)

View File

@ -4,6 +4,9 @@ import requests
import re
import os
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
CLICKHOUSE_TAGS_URL = "https://api.github.com/repos/ClickHouse/ClickHouse/tags"
CLICKHOUSE_COMMON_STATIC_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-common-static_{version}_amd64.deb"
@ -66,8 +69,18 @@ def get_previous_release(server_version):
return previous_release
def download_packet(url, local_file_name):
response = requests.get(url)
def download_packet(url, local_file_name, retries=10, backoff_factor=0.3):
session = requests.Session()
retry = Retry(
total=retries,
read=retries,
connect=retries,
backoff_factor=backoff_factor,
)
adapter = HTTPAdapter(max_retries=retry)
session.mount('http://', adapter)
session.mount('https://', adapter)
response = session.get(url)
print(url)
if response.ok:
open(PACKETS_DIR + local_file_name, 'wb').write(response.content)

View File

@ -16,7 +16,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
python3-pip \
shellcheck \
yamllint \
&& pip3 install codespell PyGithub boto3 unidiff dohq-artifactory
&& pip3 install black boto3 codespell dohq-artifactory PyGithub unidiff
# Architecture of the image when BuildKit/buildx is used
ARG TARGETARCH

View File

@ -14,6 +14,7 @@ def process_result(result_folder):
("header duplicates", "duplicate_output.txt"),
("shellcheck", "shellcheck_output.txt"),
("style", "style_output.txt"),
("black", "black_output.txt"),
("typos", "typos_output.txt"),
("whitespaces", "whitespaces_output.txt"),
("workflows", "workflows_output.txt"),

View File

@ -7,11 +7,13 @@ echo "Check duplicates" | ts
./check-duplicate-includes.sh |& tee /test_output/duplicate_output.txt
echo "Check style" | ts
./check-style -n |& tee /test_output/style_output.txt
echo "Check python formatting with black" | ts
./check-black -n |& tee /test_output/black_output.txt
echo "Check typos" | ts
./check-typos |& tee /test_output/typos_output.txt
echo "Check whitespaces" | ts
./check-whitespaces -n |& tee /test_output/whitespaces_output.txt
echo "Check sorkflows" | ts
echo "Check workflows" | ts
./check-workflows |& tee /test_output/workflows_output.txt
echo "Check shell scripts with shellcheck" | ts
./shellcheck-run.sh |& tee /test_output/shellcheck_output.txt

View File

@ -22,9 +22,9 @@ def process_result(result_folder):
total_other = 0
test_results = []
for test in results["tests"]:
test_name = test['test']['test_name']
test_result = test['result']['result_type'].upper()
test_time = str(test['result']['message_rtime'])
test_name = test["test"]["test_name"]
test_result = test["result"]["result_type"].upper()
test_time = str(test["result"]["message_rtime"])
total_tests += 1
if test_result == "OK":
total_ok += 1
@ -39,24 +39,29 @@ def process_result(result_folder):
else:
status = "success"
description = "failed: {}, passed: {}, other: {}".format(total_fail, total_ok, total_other)
description = "failed: {}, passed: {}, other: {}".format(
total_fail, total_ok, total_other
)
return status, description, test_results, [json_path, test_binary_log]
def write_results(results_file, status_file, results, status):
with open(results_file, 'w') as f:
out = csv.writer(f, delimiter='\t')
with open(results_file, "w") as f:
out = csv.writer(f, delimiter="\t")
out.writerows(results)
with open(status_file, 'w') as f:
out = csv.writer(f, delimiter='\t')
with open(status_file, "w") as f:
out = csv.writer(f, delimiter="\t")
out.writerow(status)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of Testflows tests")
parser.add_argument("--in-results-dir", default='./')
parser.add_argument("--out-results-file", default='./test_results.tsv')
parser.add_argument("--out-status-file", default='./check_status.tsv')
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
parser = argparse.ArgumentParser(
description="ClickHouse script for parsing results of Testflows tests"
)
parser.add_argument("--in-results-dir", default="./")
parser.add_argument("--out-results-file", default="./test_results.tsv")
parser.add_argument("--out-status-file", default="./check_status.tsv")
args = parser.parse_args()
state, description, test_results, logs = process_result(args.in_results_dir)
@ -64,4 +69,3 @@ if __name__ == "__main__":
status = (state, description)
write_results(args.out_results_file, args.out_status_file, test_results, status)
logging.info("Result written")

View File

@ -5,24 +5,26 @@ import logging
import argparse
import csv
OK_SIGN = 'OK ]'
FAILED_SIGN = 'FAILED ]'
SEGFAULT = 'Segmentation fault'
SIGNAL = 'received signal SIG'
PASSED = 'PASSED'
OK_SIGN = "OK ]"
FAILED_SIGN = "FAILED ]"
SEGFAULT = "Segmentation fault"
SIGNAL = "received signal SIG"
PASSED = "PASSED"
def get_test_name(line):
elements = reversed(line.split(' '))
elements = reversed(line.split(" "))
for element in elements:
if '(' not in element and ')' not in element:
if "(" not in element and ")" not in element:
return element
raise Exception("No test name in line '{}'".format(line))
def process_result(result_folder):
summary = []
total_counter = 0
failed_counter = 0
result_log_path = '{}/test_result.txt'.format(result_folder)
result_log_path = "{}/test_result.txt".format(result_folder)
if not os.path.exists(result_log_path):
logging.info("No output log on path %s", result_log_path)
return "exception", "No output log", []
@ -30,7 +32,7 @@ def process_result(result_folder):
status = "success"
description = ""
passed = False
with open(result_log_path, 'r') as test_result:
with open(result_log_path, "r") as test_result:
for line in test_result:
if OK_SIGN in line:
logging.info("Found ok line: '%s'", line)
@ -38,7 +40,7 @@ def process_result(result_folder):
logging.info("Test name: '%s'", test_name)
summary.append((test_name, "OK"))
total_counter += 1
elif FAILED_SIGN in line and 'listed below' not in line and 'ms)' in line:
elif FAILED_SIGN in line and "listed below" not in line and "ms)" in line:
logging.info("Found fail line: '%s'", line)
test_name = get_test_name(line.strip())
logging.info("Test name: '%s'", test_name)
@ -67,25 +69,30 @@ def process_result(result_folder):
status = "failure"
if not description:
description += "fail: {}, passed: {}".format(failed_counter, total_counter - failed_counter)
description += "fail: {}, passed: {}".format(
failed_counter, total_counter - failed_counter
)
return status, description, summary
def write_results(results_file, status_file, results, status):
with open(results_file, 'w') as f:
out = csv.writer(f, delimiter='\t')
with open(results_file, "w") as f:
out = csv.writer(f, delimiter="\t")
out.writerows(results)
with open(status_file, 'w') as f:
out = csv.writer(f, delimiter='\t')
with open(status_file, "w") as f:
out = csv.writer(f, delimiter="\t")
out.writerow(status)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of unit tests")
parser.add_argument("--in-results-dir", default='/test_output/')
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv')
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv')
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
parser = argparse.ArgumentParser(
description="ClickHouse script for parsing results of unit tests"
)
parser.add_argument("--in-results-dir", default="/test_output/")
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
args = parser.parse_args()
state, description, test_results = process_result(args.in_results_dir)
@ -93,4 +100,3 @@ if __name__ == "__main__":
status = (state, description)
write_results(args.out_results_file, args.out_status_file, test_results, status)
logging.info("Result written")

View File

@ -16,6 +16,7 @@ NO_TASK_TIMEOUT_SIGNS = ["All tests have finished", "No tests were run"]
RETRIES_SIGN = "Some tests were restarted"
def process_test_log(log_path):
total = 0
skipped = 0
@ -26,7 +27,7 @@ def process_test_log(log_path):
retries = False
task_timeout = True
test_results = []
with open(log_path, 'r') as test_file:
with open(log_path, "r") as test_file:
for line in test_file:
original_line = line
line = line.strip()
@ -36,12 +37,15 @@ def process_test_log(log_path):
hung = True
if RETRIES_SIGN in line:
retries = True
if any(sign in line for sign in (OK_SIGN, FAIL_SIGN, UNKNOWN_SIGN, SKIPPED_SIGN)):
test_name = line.split(' ')[2].split(':')[0]
if any(
sign in line
for sign in (OK_SIGN, FAIL_SIGN, UNKNOWN_SIGN, SKIPPED_SIGN)
):
test_name = line.split(" ")[2].split(":")[0]
test_time = ''
test_time = ""
try:
time_token = line.split(']')[1].strip().split()[0]
time_token = line.split("]")[1].strip().split()[0]
float(time_token)
test_time = time_token
except:
@ -66,9 +70,22 @@ def process_test_log(log_path):
elif len(test_results) > 0 and test_results[-1][1] == "FAIL":
test_results[-1][3].append(original_line)
test_results = [(test[0], test[1], test[2], ''.join(test[3])) for test in test_results]
test_results = [
(test[0], test[1], test[2], "".join(test[3])) for test in test_results
]
return (
total,
skipped,
unknown,
failed,
success,
hung,
task_timeout,
retries,
test_results,
)
return total, skipped, unknown, failed, success, hung, task_timeout, retries, test_results
def process_result(result_path):
test_results = []
@ -76,16 +93,26 @@ def process_result(result_path):
description = ""
files = os.listdir(result_path)
if files:
logging.info("Find files in result folder %s", ','.join(files))
result_path = os.path.join(result_path, 'test_result.txt')
logging.info("Find files in result folder %s", ",".join(files))
result_path = os.path.join(result_path, "test_result.txt")
else:
result_path = None
description = "No output log"
state = "error"
if result_path and os.path.exists(result_path):
total, skipped, unknown, failed, success, hung, task_timeout, retries, test_results = process_test_log(result_path)
is_flacky_check = 1 < int(os.environ.get('NUM_TRIES', 1))
(
total,
skipped,
unknown,
failed,
success,
hung,
task_timeout,
retries,
test_results,
) = process_test_log(result_path)
is_flacky_check = 1 < int(os.environ.get("NUM_TRIES", 1))
logging.info("Is flacky check: %s", is_flacky_check)
# If no tests were run (success == 0) it indicates an error (e.g. server did not start or crashed immediately)
# But it's Ok for "flaky checks" - they can contain just one test for check which is marked as skipped.
@ -120,20 +147,22 @@ def process_result(result_path):
def write_results(results_file, status_file, results, status):
with open(results_file, 'w') as f:
out = csv.writer(f, delimiter='\t')
with open(results_file, "w") as f:
out = csv.writer(f, delimiter="\t")
out.writerows(results)
with open(status_file, 'w') as f:
out = csv.writer(f, delimiter='\t')
with open(status_file, "w") as f:
out = csv.writer(f, delimiter="\t")
out.writerow(status)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of functional tests")
parser.add_argument("--in-results-dir", default='/test_output/')
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv')
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv')
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
parser = argparse.ArgumentParser(
description="ClickHouse script for parsing results of functional tests"
)
parser.add_argument("--in-results-dir", default="/test_output/")
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
args = parser.parse_args()
state, description, test_results = process_result(args.in_results_dir)

View File

@ -71,6 +71,8 @@ This check means that the CI system started to process the pull request. When it
Performs some simple regex-based checks of code style, using the [`utils/check-style/check-style`](https://github.com/ClickHouse/ClickHouse/blob/master/utils/check-style/check-style) binary (note that it can be run locally).
If it fails, fix the style errors following the [code style guide](style.md).
Python code is checked with [black](https://github.com/psf/black/).
### Report Details
- [Status page example](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check.html)
- `output.txt` contains the check resulting errors (invalid tabulation etc), blank page means no errors. [Successful result example](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check/output.txt).

View File

@ -5,7 +5,7 @@ toc_title: Caches
# Cache Types {#cache-types}
When performing queries, ClichHouse uses different caches.
When performing queries, ClickHouse uses different caches.
Main cache types:

View File

@ -15,24 +15,24 @@ import website
def prepare_amp_html(lang, args, root, site_temp, main_site_dir):
src_path = root
src_index = os.path.join(src_path, 'index.html')
src_index = os.path.join(src_path, "index.html")
rel_path = os.path.relpath(src_path, site_temp)
dst_path = os.path.join(main_site_dir, rel_path, 'amp')
dst_index = os.path.join(dst_path, 'index.html')
dst_path = os.path.join(main_site_dir, rel_path, "amp")
dst_index = os.path.join(dst_path, "index.html")
logging.debug(f'Generating AMP version for {rel_path} ({lang})')
logging.debug(f"Generating AMP version for {rel_path} ({lang})")
os.makedirs(dst_path)
with open(src_index, 'r') as f:
with open(src_index, "r") as f:
content = f.read()
css_in = ' '.join(website.get_css_in(args))
css_in = " ".join(website.get_css_in(args))
command = f"purifycss --min {css_in} '{src_index}'"
logging.debug(command)
inline_css = subprocess.check_output(command, shell=True).decode('utf-8')
inline_css = inline_css.replace('!important', '').replace('/*!', '/*')
inline_css = subprocess.check_output(command, shell=True).decode("utf-8")
inline_css = inline_css.replace("!important", "").replace("/*!", "/*")
inline_css = cssmin.cssmin(inline_css)
content = content.replace('CUSTOM_CSS_PLACEHOLDER', inline_css)
content = content.replace("CUSTOM_CSS_PLACEHOLDER", inline_css)
with open(dst_index, 'w') as f:
with open(dst_index, "w") as f:
f.write(content)
return dst_index
@ -40,15 +40,12 @@ def prepare_amp_html(lang, args, root, site_temp, main_site_dir):
def build_amp(lang, args, cfg):
# AMP docs: https://amp.dev/documentation/
logging.info(f'Building AMP version for {lang}')
logging.info(f"Building AMP version for {lang}")
with util.temp_dir() as site_temp:
extra = cfg.data['extra']
main_site_dir = cfg.data['site_dir']
extra['is_amp'] = True
cfg.load_dict({
'site_dir': site_temp,
'extra': extra
})
extra = cfg.data["extra"]
main_site_dir = cfg.data["site_dir"]
extra["is_amp"] = True
cfg.load_dict({"site_dir": site_temp, "extra": extra})
try:
mkdocs.commands.build.build(cfg)
@ -60,50 +57,49 @@ def build_amp(lang, args, cfg):
paths = []
for root, _, filenames in os.walk(site_temp):
if 'index.html' in filenames:
paths.append(prepare_amp_html(lang, args, root, site_temp, main_site_dir))
logging.info(f'Finished building AMP version for {lang}')
if "index.html" in filenames:
paths.append(
prepare_amp_html(lang, args, root, site_temp, main_site_dir)
)
logging.info(f"Finished building AMP version for {lang}")
def html_to_amp(content):
soup = bs4.BeautifulSoup(
content,
features='html.parser'
)
soup = bs4.BeautifulSoup(content, features="html.parser")
for tag in soup.find_all():
if tag.attrs.get('id') == 'tostring':
tag.attrs['id'] = '_tostring'
if tag.name == 'img':
tag.name = 'amp-img'
tag.attrs['layout'] = 'responsive'
src = tag.attrs['src']
if not (src.startswith('/') or src.startswith('http')):
tag.attrs['src'] = f'../{src}'
if not tag.attrs.get('width'):
tag.attrs['width'] = '640'
if not tag.attrs.get('height'):
tag.attrs['height'] = '320'
if tag.name == 'iframe':
tag.name = 'amp-iframe'
tag.attrs['layout'] = 'responsive'
del tag.attrs['alt']
del tag.attrs['allowfullscreen']
if not tag.attrs.get('width'):
tag.attrs['width'] = '640'
if not tag.attrs.get('height'):
tag.attrs['height'] = '320'
elif tag.name == 'a':
href = tag.attrs.get('href')
if tag.attrs.get("id") == "tostring":
tag.attrs["id"] = "_tostring"
if tag.name == "img":
tag.name = "amp-img"
tag.attrs["layout"] = "responsive"
src = tag.attrs["src"]
if not (src.startswith("/") or src.startswith("http")):
tag.attrs["src"] = f"../{src}"
if not tag.attrs.get("width"):
tag.attrs["width"] = "640"
if not tag.attrs.get("height"):
tag.attrs["height"] = "320"
if tag.name == "iframe":
tag.name = "amp-iframe"
tag.attrs["layout"] = "responsive"
del tag.attrs["alt"]
del tag.attrs["allowfullscreen"]
if not tag.attrs.get("width"):
tag.attrs["width"] = "640"
if not tag.attrs.get("height"):
tag.attrs["height"] = "320"
elif tag.name == "a":
href = tag.attrs.get("href")
if href:
if not (href.startswith('/') or href.startswith('http')):
if '#' in href:
href, anchor = href.split('#')
if not (href.startswith("/") or href.startswith("http")):
if "#" in href:
href, anchor = href.split("#")
else:
anchor = None
href = f'../{href}amp/'
href = f"../{href}amp/"
if anchor:
href = f'{href}#{anchor}'
tag.attrs['href'] = href
href = f"{href}#{anchor}"
tag.attrs["href"] = href
content = str(soup)
return website.minify_html(content)

View File

@ -17,54 +17,52 @@ import util
def build_for_lang(lang, args):
logging.info(f'Building {lang} blog')
logging.info(f"Building {lang} blog")
try:
theme_cfg = {
'name': None,
'custom_dir': os.path.join(os.path.dirname(__file__), '..', args.theme_dir),
'language': lang,
'direction': 'ltr',
'static_templates': ['404.html'],
'extra': {
'now': int(time.mktime(datetime.datetime.now().timetuple())) # TODO better way to avoid caching
}
"name": None,
"custom_dir": os.path.join(os.path.dirname(__file__), "..", args.theme_dir),
"language": lang,
"direction": "ltr",
"static_templates": ["404.html"],
"extra": {
"now": int(
time.mktime(datetime.datetime.now().timetuple())
) # TODO better way to avoid caching
},
}
# the following list of languages is sorted according to
# https://en.wikipedia.org/wiki/List_of_languages_by_total_number_of_speakers
languages = {
'en': 'English'
}
languages = {"en": "English"}
site_names = {
'en': 'ClickHouse Blog'
}
site_names = {"en": "ClickHouse Blog"}
assert len(site_names) == len(languages)
site_dir = os.path.join(args.blog_output_dir, lang)
plugins = ['macros']
plugins = ["macros"]
if args.htmlproofer:
plugins.append('htmlproofer')
plugins.append("htmlproofer")
website_url = 'https://clickhouse.com'
site_name = site_names.get(lang, site_names['en'])
website_url = "https://clickhouse.com"
site_name = site_names.get(lang, site_names["en"])
blog_nav, post_meta = nav.build_blog_nav(lang, args)
raw_config = dict(
site_name=site_name,
site_url=f'{website_url}/blog/{lang}/',
site_url=f"{website_url}/blog/{lang}/",
docs_dir=os.path.join(args.blog_dir, lang),
site_dir=site_dir,
strict=True,
theme=theme_cfg,
nav=blog_nav,
copyright='©20162022 ClickHouse, Inc.',
copyright="©20162022 ClickHouse, Inc.",
use_directory_urls=True,
repo_name='ClickHouse/ClickHouse',
repo_url='https://github.com/ClickHouse/ClickHouse/',
edit_uri=f'edit/master/website/blog/{lang}',
repo_name="ClickHouse/ClickHouse",
repo_url="https://github.com/ClickHouse/ClickHouse/",
edit_uri=f"edit/master/website/blog/{lang}",
markdown_extensions=mdx_clickhouse.MARKDOWN_EXTENSIONS,
plugins=plugins,
extra=dict(
@ -75,12 +73,12 @@ def build_for_lang(lang, args):
website_url=website_url,
events=args.events,
languages=languages,
includes_dir=os.path.join(os.path.dirname(__file__), '..', '_includes'),
includes_dir=os.path.join(os.path.dirname(__file__), "..", "_includes"),
is_amp=False,
is_blog=True,
post_meta=post_meta,
today=datetime.date.today().isoformat()
)
today=datetime.date.today().isoformat(),
),
)
cfg = config.load_config(**raw_config)
@ -89,21 +87,28 @@ def build_for_lang(lang, args):
redirects.build_blog_redirects(args)
env = util.init_jinja2_env(args)
with open(os.path.join(args.website_dir, 'templates', 'blog', 'rss.xml'), 'rb') as f:
rss_template_string = f.read().decode('utf-8').strip()
with open(
os.path.join(args.website_dir, "templates", "blog", "rss.xml"), "rb"
) as f:
rss_template_string = f.read().decode("utf-8").strip()
rss_template = env.from_string(rss_template_string)
with open(os.path.join(args.blog_output_dir, lang, 'rss.xml'), 'w') as f:
f.write(rss_template.render({'config': raw_config}))
with open(os.path.join(args.blog_output_dir, lang, "rss.xml"), "w") as f:
f.write(rss_template.render({"config": raw_config}))
logging.info(f'Finished building {lang} blog')
logging.info(f"Finished building {lang} blog")
except exceptions.ConfigurationError as e:
raise SystemExit('\n' + str(e))
raise SystemExit("\n" + str(e))
def build_blog(args):
tasks = []
for lang in args.blog_lang.split(','):
for lang in args.blog_lang.split(","):
if lang:
tasks.append((lang, args,))
tasks.append(
(
lang,
args,
)
)
util.run_function_in_parallel(build_for_lang, tasks, threads=False)

View File

@ -30,76 +30,76 @@ import website
from cmake_in_clickhouse_generator import generate_cmake_flags_files
class ClickHouseMarkdown(markdown.extensions.Extension):
class ClickHousePreprocessor(markdown.util.Processor):
def run(self, lines):
for line in lines:
if '<!--hide-->' not in line:
if "<!--hide-->" not in line:
yield line
def extendMarkdown(self, md):
md.preprocessors.register(self.ClickHousePreprocessor(), 'clickhouse_preprocessor', 31)
md.preprocessors.register(
self.ClickHousePreprocessor(), "clickhouse_preprocessor", 31
)
markdown.extensions.ClickHouseMarkdown = ClickHouseMarkdown
def build_for_lang(lang, args):
logging.info(f'Building {lang} docs')
os.environ['SINGLE_PAGE'] = '0'
logging.info(f"Building {lang} docs")
os.environ["SINGLE_PAGE"] = "0"
try:
theme_cfg = {
'name': None,
'custom_dir': os.path.join(os.path.dirname(__file__), '..', args.theme_dir),
'language': lang,
'direction': 'rtl' if lang == 'fa' else 'ltr',
'static_templates': ['404.html'],
'extra': {
'now': int(time.mktime(datetime.datetime.now().timetuple())) # TODO better way to avoid caching
}
"name": None,
"custom_dir": os.path.join(os.path.dirname(__file__), "..", args.theme_dir),
"language": lang,
"direction": "rtl" if lang == "fa" else "ltr",
"static_templates": ["404.html"],
"extra": {
"now": int(
time.mktime(datetime.datetime.now().timetuple())
) # TODO better way to avoid caching
},
}
# the following list of languages is sorted according to
# https://en.wikipedia.org/wiki/List_of_languages_by_total_number_of_speakers
languages = {
'en': 'English',
'zh': '中文',
'ru': 'Русский',
'ja': '日本語'
}
languages = {"en": "English", "zh": "中文", "ru": "Русский", "ja": "日本語"}
site_names = {
'en': 'ClickHouse %s Documentation',
'zh': 'ClickHouse文档 %s',
'ru': 'Документация ClickHouse %s',
'ja': 'ClickHouseドキュメント %s'
"en": "ClickHouse %s Documentation",
"zh": "ClickHouse文档 %s",
"ru": "Документация ClickHouse %s",
"ja": "ClickHouseドキュメント %s",
}
assert len(site_names) == len(languages)
site_dir = os.path.join(args.docs_output_dir, lang)
plugins = ['macros']
plugins = ["macros"]
if args.htmlproofer:
plugins.append('htmlproofer')
plugins.append("htmlproofer")
website_url = 'https://clickhouse.com'
site_name = site_names.get(lang, site_names['en']) % ''
site_name = site_name.replace(' ', ' ')
website_url = "https://clickhouse.com"
site_name = site_names.get(lang, site_names["en"]) % ""
site_name = site_name.replace(" ", " ")
raw_config = dict(
site_name=site_name,
site_url=f'{website_url}/docs/{lang}/',
site_url=f"{website_url}/docs/{lang}/",
docs_dir=os.path.join(args.docs_dir, lang),
site_dir=site_dir,
strict=True,
theme=theme_cfg,
copyright='©20162022 ClickHouse, Inc.',
copyright="©20162022 ClickHouse, Inc.",
use_directory_urls=True,
repo_name='ClickHouse/ClickHouse',
repo_url='https://github.com/ClickHouse/ClickHouse/',
edit_uri=f'edit/master/docs/{lang}',
repo_name="ClickHouse/ClickHouse",
repo_url="https://github.com/ClickHouse/ClickHouse/",
edit_uri=f"edit/master/docs/{lang}",
markdown_extensions=mdx_clickhouse.MARKDOWN_EXTENSIONS,
plugins=plugins,
extra=dict(
@ -111,16 +111,16 @@ def build_for_lang(lang, args):
website_url=website_url,
events=args.events,
languages=languages,
includes_dir=os.path.join(os.path.dirname(__file__), '..', '_includes'),
includes_dir=os.path.join(os.path.dirname(__file__), "..", "_includes"),
is_amp=False,
is_blog=False
)
is_blog=False,
),
)
# Clean to be safe if last build finished abnormally
single_page.remove_temporary_files(lang, args)
raw_config['nav'] = nav.build_docs_nav(lang, args)
raw_config["nav"] = nav.build_docs_nav(lang, args)
cfg = config.load_config(**raw_config)
@ -131,21 +131,28 @@ def build_for_lang(lang, args):
amp.build_amp(lang, args, cfg)
if not args.skip_single_page:
single_page.build_single_page_version(lang, args, raw_config.get('nav'), cfg)
single_page.build_single_page_version(
lang, args, raw_config.get("nav"), cfg
)
mdx_clickhouse.PatchedMacrosPlugin.disabled = False
logging.info(f'Finished building {lang} docs')
logging.info(f"Finished building {lang} docs")
except exceptions.ConfigurationError as e:
raise SystemExit('\n' + str(e))
raise SystemExit("\n" + str(e))
def build_docs(args):
tasks = []
for lang in args.lang.split(','):
for lang in args.lang.split(","):
if lang:
tasks.append((lang, args,))
tasks.append(
(
lang,
args,
)
)
util.run_function_in_parallel(build_for_lang, tasks, threads=False)
redirects.build_docs_redirects(args)
@ -171,56 +178,64 @@ def build(args):
redirects.build_static_redirects(args)
if __name__ == '__main__':
os.chdir(os.path.join(os.path.dirname(__file__), '..'))
if __name__ == "__main__":
os.chdir(os.path.join(os.path.dirname(__file__), ".."))
# A root path to ClickHouse source code.
src_dir = '..'
src_dir = ".."
website_dir = os.path.join(src_dir, 'website')
website_dir = os.path.join(src_dir, "website")
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('--lang', default='en,ru,zh,ja')
arg_parser.add_argument('--blog-lang', default='en')
arg_parser.add_argument('--docs-dir', default='.')
arg_parser.add_argument('--theme-dir', default=website_dir)
arg_parser.add_argument('--website-dir', default=website_dir)
arg_parser.add_argument('--src-dir', default=src_dir)
arg_parser.add_argument('--blog-dir', default=os.path.join(website_dir, 'blog'))
arg_parser.add_argument('--output-dir', default='build')
arg_parser.add_argument('--nav-limit', type=int, default='0')
arg_parser.add_argument('--skip-multi-page', action='store_true')
arg_parser.add_argument('--skip-single-page', action='store_true')
arg_parser.add_argument('--skip-amp', action='store_true')
arg_parser.add_argument('--skip-website', action='store_true')
arg_parser.add_argument('--skip-blog', action='store_true')
arg_parser.add_argument('--skip-git-log', action='store_true')
arg_parser.add_argument('--skip-docs', action='store_true')
arg_parser.add_argument('--test-only', action='store_true')
arg_parser.add_argument('--minify', action='store_true')
arg_parser.add_argument('--htmlproofer', action='store_true')
arg_parser.add_argument('--no-docs-macros', action='store_true')
arg_parser.add_argument('--save-raw-single-page', type=str)
arg_parser.add_argument('--livereload', type=int, default='0')
arg_parser.add_argument('--verbose', action='store_true')
arg_parser.add_argument("--lang", default="en,ru,zh,ja")
arg_parser.add_argument("--blog-lang", default="en")
arg_parser.add_argument("--docs-dir", default=".")
arg_parser.add_argument("--theme-dir", default=website_dir)
arg_parser.add_argument("--website-dir", default=website_dir)
arg_parser.add_argument("--src-dir", default=src_dir)
arg_parser.add_argument("--blog-dir", default=os.path.join(website_dir, "blog"))
arg_parser.add_argument("--output-dir", default="build")
arg_parser.add_argument("--nav-limit", type=int, default="0")
arg_parser.add_argument("--skip-multi-page", action="store_true")
arg_parser.add_argument("--skip-single-page", action="store_true")
arg_parser.add_argument("--skip-amp", action="store_true")
arg_parser.add_argument("--skip-website", action="store_true")
arg_parser.add_argument("--skip-blog", action="store_true")
arg_parser.add_argument("--skip-git-log", action="store_true")
arg_parser.add_argument("--skip-docs", action="store_true")
arg_parser.add_argument("--test-only", action="store_true")
arg_parser.add_argument("--minify", action="store_true")
arg_parser.add_argument("--htmlproofer", action="store_true")
arg_parser.add_argument("--no-docs-macros", action="store_true")
arg_parser.add_argument("--save-raw-single-page", type=str)
arg_parser.add_argument("--livereload", type=int, default="0")
arg_parser.add_argument("--verbose", action="store_true")
args = arg_parser.parse_args()
args.minify = False # TODO remove
logging.basicConfig(
level=logging.DEBUG if args.verbose else logging.INFO,
stream=sys.stderr
level=logging.DEBUG if args.verbose else logging.INFO, stream=sys.stderr
)
logging.getLogger('MARKDOWN').setLevel(logging.INFO)
logging.getLogger("MARKDOWN").setLevel(logging.INFO)
args.docs_output_dir = os.path.join(os.path.abspath(args.output_dir), 'docs')
args.blog_output_dir = os.path.join(os.path.abspath(args.output_dir), 'blog')
args.docs_output_dir = os.path.join(os.path.abspath(args.output_dir), "docs")
args.blog_output_dir = os.path.join(os.path.abspath(args.output_dir), "blog")
from github import get_events
args.rev = subprocess.check_output('git rev-parse HEAD', shell=True).decode('utf-8').strip()
args.rev_short = subprocess.check_output('git rev-parse --short HEAD', shell=True).decode('utf-8').strip()
args.rev_url = f'https://github.com/ClickHouse/ClickHouse/commit/{args.rev}'
args.rev = (
subprocess.check_output("git rev-parse HEAD", shell=True)
.decode("utf-8")
.strip()
)
args.rev_short = (
subprocess.check_output("git rev-parse --short HEAD", shell=True)
.decode("utf-8")
.strip()
)
args.rev_url = f"https://github.com/ClickHouse/ClickHouse/commit/{args.rev}"
args.events = get_events(args)
if args.test_only:
@ -233,18 +248,20 @@ if __name__ == '__main__':
mdx_clickhouse.PatchedMacrosPlugin.skip_git_log = True
from build import build
build(args)
if args.livereload:
new_args = [arg for arg in sys.argv if not arg.startswith('--livereload')]
new_args = sys.executable + ' ' + ' '.join(new_args)
new_args = [arg for arg in sys.argv if not arg.startswith("--livereload")]
new_args = sys.executable + " " + " ".join(new_args)
server = livereload.Server()
server.watch(args.docs_dir + '**/*', livereload.shell(new_args, cwd='tools', shell=True))
server.watch(args.website_dir + '**/*', livereload.shell(new_args, cwd='tools', shell=True))
server.serve(
root=args.output_dir,
host='0.0.0.0',
port=args.livereload
server.watch(
args.docs_dir + "**/*", livereload.shell(new_args, cwd="tools", shell=True)
)
server.watch(
args.website_dir + "**/*",
livereload.shell(new_args, cwd="tools", shell=True),
)
server.serve(root=args.output_dir, host="0.0.0.0", port=args.livereload)
sys.exit(0)

View File

@ -6,11 +6,13 @@ from typing import TextIO, List, Tuple, Optional, Dict
Entity = Tuple[str, str, str]
# https://regex101.com/r/R6iogw/12
cmake_option_regex: str = r"^\s*option\s*\(([A-Z_0-9${}]+)\s*(?:\"((?:.|\n)*?)\")?\s*(.*)?\).*$"
cmake_option_regex: str = (
r"^\s*option\s*\(([A-Z_0-9${}]+)\s*(?:\"((?:.|\n)*?)\")?\s*(.*)?\).*$"
)
ch_master_url: str = "https://github.com/clickhouse/clickhouse/blob/master/"
name_str: str = "<a name=\"{anchor}\"></a>[`{name}`](" + ch_master_url + "{path}#L{line})"
name_str: str = '<a name="{anchor}"></a>[`{name}`](' + ch_master_url + "{path}#L{line})"
default_anchor_str: str = "[`{name}`](#{anchor})"
comment_var_regex: str = r"\${(.+)}"
@ -27,11 +29,15 @@ entities: Dict[str, Tuple[str, str]] = {}
def make_anchor(t: str) -> str:
return "".join(["-" if i == "_" else i.lower() for i in t if i.isalpha() or i == "_"])
return "".join(
["-" if i == "_" else i.lower() for i in t if i.isalpha() or i == "_"]
)
def process_comment(comment: str) -> str:
return re.sub(comment_var_regex, comment_var_replace, comment, flags=re.MULTILINE)
def build_entity(path: str, entity: Entity, line_comment: Tuple[int, str]) -> None:
(line, comment) = line_comment
(name, description, default) = entity
@ -47,22 +53,22 @@ def build_entity(path: str, entity: Entity, line_comment: Tuple[int, str]) -> No
formatted_default: str = "`" + default + "`"
formatted_name: str = name_str.format(
anchor=make_anchor(name),
name=name,
path=path,
line=line)
anchor=make_anchor(name), name=name, path=path, line=line
)
formatted_description: str = "".join(description.split("\n"))
formatted_comment: str = process_comment(comment)
formatted_entity: str = "| {} | {} | {} | {} |".format(
formatted_name, formatted_default, formatted_description, formatted_comment)
formatted_name, formatted_default, formatted_description, formatted_comment
)
entities[name] = path, formatted_entity
def process_file(root_path: str, file_path: str, file_name: str) -> None:
with open(os.path.join(file_path, file_name), 'r') as cmake_file:
with open(os.path.join(file_path, file_name), "r") as cmake_file:
contents: str = cmake_file.read()
def get_line_and_comment(target: str) -> Tuple[int, str]:
@ -70,10 +76,10 @@ def process_file(root_path: str, file_path: str, file_name: str) -> None:
comment: str = ""
for n, line in enumerate(contents_list):
if 'option' not in line.lower() or target not in line:
if "option" not in line.lower() or target not in line:
continue
for maybe_comment_line in contents_list[n - 1::-1]:
for maybe_comment_line in contents_list[n - 1 :: -1]:
if not re.match("\s*#\s*", maybe_comment_line):
break
@ -82,16 +88,22 @@ def process_file(root_path: str, file_path: str, file_name: str) -> None:
# line numbering starts with 1
return n + 1, comment
matches: Optional[List[Entity]] = re.findall(cmake_option_regex, contents, re.MULTILINE)
matches: Optional[List[Entity]] = re.findall(
cmake_option_regex, contents, re.MULTILINE
)
file_rel_path_with_name: str = os.path.join(file_path[len(root_path):], file_name)
if file_rel_path_with_name.startswith('/'):
file_rel_path_with_name: str = os.path.join(
file_path[len(root_path) :], file_name
)
if file_rel_path_with_name.startswith("/"):
file_rel_path_with_name = file_rel_path_with_name[1:]
if matches:
for entity in matches:
build_entity(file_rel_path_with_name, entity, get_line_and_comment(entity[0]))
build_entity(
file_rel_path_with_name, entity, get_line_and_comment(entity[0])
)
def process_folder(root_path: str, name: str) -> None:
for root, _, files in os.walk(os.path.join(root_path, name)):
@ -99,12 +111,19 @@ def process_folder(root_path: str, name: str) -> None:
if f == "CMakeLists.txt" or ".cmake" in f:
process_file(root_path, root, f)
def generate_cmake_flags_files() -> None:
root_path: str = os.path.join(os.path.dirname(__file__), '..', '..')
output_file_name: str = os.path.join(root_path, "docs/en/development/cmake-in-clickhouse.md")
header_file_name: str = os.path.join(root_path, "docs/_includes/cmake_in_clickhouse_header.md")
footer_file_name: str = os.path.join(root_path, "docs/_includes/cmake_in_clickhouse_footer.md")
def generate_cmake_flags_files() -> None:
root_path: str = os.path.join(os.path.dirname(__file__), "..", "..")
output_file_name: str = os.path.join(
root_path, "docs/en/development/cmake-in-clickhouse.md"
)
header_file_name: str = os.path.join(
root_path, "docs/_includes/cmake_in_clickhouse_header.md"
)
footer_file_name: str = os.path.join(
root_path, "docs/_includes/cmake_in_clickhouse_footer.md"
)
process_file(root_path, root_path, "CMakeLists.txt")
process_file(root_path, os.path.join(root_path, "programs"), "CMakeLists.txt")
@ -127,8 +146,10 @@ def generate_cmake_flags_files() -> None:
f.write(entities[k][1] + "\n")
ignored_keys.append(k)
f.write("\n### External libraries\nNote that ClickHouse uses forks of these libraries, see https://github.com/ClickHouse-Extras.\n" +
table_header)
f.write(
"\n### External libraries\nNote that ClickHouse uses forks of these libraries, see https://github.com/ClickHouse-Extras.\n"
+ table_header
)
for k in sorted_keys:
if k.startswith("ENABLE_") and ".cmake" in entities[k][0]:
@ -143,15 +164,18 @@ def generate_cmake_flags_files() -> None:
with open(footer_file_name, "r") as footer:
f.write(footer.read())
other_languages = ["docs/ja/development/cmake-in-clickhouse.md",
"docs/zh/development/cmake-in-clickhouse.md",
"docs/ru/development/cmake-in-clickhouse.md"]
other_languages = [
"docs/ja/development/cmake-in-clickhouse.md",
"docs/zh/development/cmake-in-clickhouse.md",
"docs/ru/development/cmake-in-clickhouse.md",
]
for lang in other_languages:
other_file_name = os.path.join(root_path, lang)
if os.path.exists(other_file_name):
os.unlink(other_file_name)
os.unlink(other_file_name)
os.symlink(output_file_name, other_file_name)
if __name__ == '__main__':
if __name__ == "__main__":
generate_cmake_flags_files()

View File

@ -8,7 +8,7 @@ import contextlib
from git import cmd
from tempfile import NamedTemporaryFile
SCRIPT_DESCRIPTION = '''
SCRIPT_DESCRIPTION = """
usage: ./easy_diff.py language/document path
Show the difference between a language document and an English document.
@ -53,16 +53,16 @@ SCRIPT_DESCRIPTION = '''
OPTIONS:
-h, --help show this help message and exit
--no-pager use stdout as difference result output
'''
"""
SCRIPT_PATH = os.path.abspath(__file__)
CLICKHOUSE_REPO_HOME = os.path.join(os.path.dirname(SCRIPT_PATH), '..', '..')
CLICKHOUSE_REPO_HOME = os.path.join(os.path.dirname(SCRIPT_PATH), "..", "..")
SCRIPT_COMMAND_EXECUTOR = cmd.Git(CLICKHOUSE_REPO_HOME)
SCRIPT_COMMAND_PARSER = argparse.ArgumentParser(add_help=False)
SCRIPT_COMMAND_PARSER.add_argument('path', type=bytes, nargs='?', default=None)
SCRIPT_COMMAND_PARSER.add_argument('--no-pager', action='store_true', default=False)
SCRIPT_COMMAND_PARSER.add_argument('-h', '--help', action='store_true', default=False)
SCRIPT_COMMAND_PARSER.add_argument("path", type=bytes, nargs="?", default=None)
SCRIPT_COMMAND_PARSER.add_argument("--no-pager", action="store_true", default=False)
SCRIPT_COMMAND_PARSER.add_argument("-h", "--help", action="store_true", default=False)
def execute(commands):
@ -70,19 +70,41 @@ def execute(commands):
def get_hash(file_name):
return execute(['git', 'log', '-n', '1', '--pretty=format:"%H"', file_name])
return execute(["git", "log", "-n", "1", '--pretty=format:"%H"', file_name])
def diff_file(reference_file, working_file, out):
if not os.path.exists(reference_file):
raise RuntimeError('reference file [' + os.path.abspath(reference_file) + '] is not exists.')
raise RuntimeError(
"reference file [" + os.path.abspath(reference_file) + "] is not exists."
)
if os.path.islink(working_file):
out.writelines(["Need translate document:" + os.path.abspath(reference_file)])
elif not os.path.exists(working_file):
out.writelines(['Need link document ' + os.path.abspath(reference_file) + ' to ' + os.path.abspath(working_file)])
out.writelines(
[
"Need link document "
+ os.path.abspath(reference_file)
+ " to "
+ os.path.abspath(working_file)
]
)
elif get_hash(working_file) != get_hash(reference_file):
out.writelines([(execute(['git', 'diff', get_hash(working_file).strip('"'), reference_file]).encode('utf-8'))])
out.writelines(
[
(
execute(
[
"git",
"diff",
get_hash(working_file).strip('"'),
reference_file,
]
).encode("utf-8")
)
]
)
return 0
@ -94,20 +116,30 @@ def diff_directory(reference_directory, working_directory, out):
for list_item in os.listdir(reference_directory):
working_item = os.path.join(working_directory, list_item)
reference_item = os.path.join(reference_directory, list_item)
if diff_file(reference_item, working_item, out) if os.path.isfile(reference_item) else diff_directory(reference_item, working_item, out) != 0:
if (
diff_file(reference_item, working_item, out)
if os.path.isfile(reference_item)
else diff_directory(reference_item, working_item, out) != 0
):
return 1
return 0
def find_language_doc(custom_document, other_language='en', children=[]):
def find_language_doc(custom_document, other_language="en", children=[]):
if len(custom_document) == 0:
raise RuntimeError('The ' + os.path.join(custom_document, *children) + " is not in docs directory.")
raise RuntimeError(
"The "
+ os.path.join(custom_document, *children)
+ " is not in docs directory."
)
if os.path.samefile(os.path.join(CLICKHOUSE_REPO_HOME, 'docs'), custom_document):
return os.path.join(CLICKHOUSE_REPO_HOME, 'docs', other_language, *children[1:])
if os.path.samefile(os.path.join(CLICKHOUSE_REPO_HOME, "docs"), custom_document):
return os.path.join(CLICKHOUSE_REPO_HOME, "docs", other_language, *children[1:])
children.insert(0, os.path.split(custom_document)[1])
return find_language_doc(os.path.split(custom_document)[0], other_language, children)
return find_language_doc(
os.path.split(custom_document)[0], other_language, children
)
class ToPager:
@ -119,7 +151,7 @@ class ToPager:
def close(self):
self.temp_named_file.flush()
git_pager = execute(['git', 'var', 'GIT_PAGER'])
git_pager = execute(["git", "var", "GIT_PAGER"])
subprocess.check_call([git_pager, self.temp_named_file.name])
self.temp_named_file.close()
@ -135,12 +167,20 @@ class ToStdOut:
self.system_stdout_stream = system_stdout_stream
if __name__ == '__main__':
if __name__ == "__main__":
arguments = SCRIPT_COMMAND_PARSER.parse_args()
if arguments.help or not arguments.path:
sys.stdout.write(SCRIPT_DESCRIPTION)
sys.exit(0)
working_language = os.path.join(CLICKHOUSE_REPO_HOME, 'docs', arguments.path)
with contextlib.closing(ToStdOut(sys.stdout) if arguments.no_pager else ToPager(NamedTemporaryFile('r+'))) as writer:
exit(diff_directory(find_language_doc(working_language), working_language, writer))
working_language = os.path.join(CLICKHOUSE_REPO_HOME, "docs", arguments.path)
with contextlib.closing(
ToStdOut(sys.stdout)
if arguments.no_pager
else ToPager(NamedTemporaryFile("r+"))
) as writer:
exit(
diff_directory(
find_language_doc(working_language), working_language, writer
)
)

View File

@ -16,27 +16,26 @@ import util
def get_events(args):
events = []
skip = True
with open(os.path.join(args.docs_dir, '..', 'README.md')) as f:
with open(os.path.join(args.docs_dir, "..", "README.md")) as f:
for line in f:
if skip:
if 'Upcoming Events' in line:
if "Upcoming Events" in line:
skip = False
else:
if not line:
continue
line = line.strip().split('](')
line = line.strip().split("](")
if len(line) == 2:
tail = line[1].split(') ')
events.append({
'signup_link': tail[0],
'event_name': line[0].replace('* [', ''),
'event_date': tail[1].replace('on ', '').replace('.', '')
})
tail = line[1].split(") ")
events.append(
{
"signup_link": tail[0],
"event_name": line[0].replace("* [", ""),
"event_date": tail[1].replace("on ", "").replace(".", ""),
}
)
return events
if __name__ == '__main__':
logging.basicConfig(
level=logging.DEBUG,
stream=sys.stderr
)
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG, stream=sys.stderr)

View File

@ -16,74 +16,79 @@ import slugify as slugify_impl
def slugify(value, separator):
return slugify_impl.slugify(value, separator=separator, word_boundary=True, save_order=True)
return slugify_impl.slugify(
value, separator=separator, word_boundary=True, save_order=True
)
MARKDOWN_EXTENSIONS = [
'mdx_clickhouse',
'admonition',
'attr_list',
'def_list',
'codehilite',
'nl2br',
'sane_lists',
'pymdownx.details',
'pymdownx.magiclink',
'pymdownx.superfences',
'extra',
{
'toc': {
'permalink': True,
'slugify': slugify
}
}
"mdx_clickhouse",
"admonition",
"attr_list",
"def_list",
"codehilite",
"nl2br",
"sane_lists",
"pymdownx.details",
"pymdownx.magiclink",
"pymdownx.superfences",
"extra",
{"toc": {"permalink": True, "slugify": slugify}},
]
class ClickHouseLinkMixin(object):
def handleMatch(self, m, data):
single_page = (os.environ.get('SINGLE_PAGE') == '1')
single_page = os.environ.get("SINGLE_PAGE") == "1"
try:
el, start, end = super(ClickHouseLinkMixin, self).handleMatch(m, data)
except IndexError:
return
if el is not None:
href = el.get('href') or ''
is_external = href.startswith('http:') or href.startswith('https:')
href = el.get("href") or ""
is_external = href.startswith("http:") or href.startswith("https:")
if is_external:
if not href.startswith('https://clickhouse.com'):
el.set('rel', 'external nofollow noreferrer')
if not href.startswith("https://clickhouse.com"):
el.set("rel", "external nofollow noreferrer")
elif single_page:
if '#' in href:
el.set('href', '#' + href.split('#', 1)[1])
if "#" in href:
el.set("href", "#" + href.split("#", 1)[1])
else:
el.set('href', '#' + href.replace('/index.md', '/').replace('.md', '/'))
el.set(
"href", "#" + href.replace("/index.md", "/").replace(".md", "/")
)
return el, start, end
class ClickHouseAutolinkPattern(ClickHouseLinkMixin, markdown.inlinepatterns.AutolinkInlineProcessor):
class ClickHouseAutolinkPattern(
ClickHouseLinkMixin, markdown.inlinepatterns.AutolinkInlineProcessor
):
pass
class ClickHouseLinkPattern(ClickHouseLinkMixin, markdown.inlinepatterns.LinkInlineProcessor):
class ClickHouseLinkPattern(
ClickHouseLinkMixin, markdown.inlinepatterns.LinkInlineProcessor
):
pass
class ClickHousePreprocessor(markdown.util.Processor):
def run(self, lines):
for line in lines:
if '<!--hide-->' not in line:
if "<!--hide-->" not in line:
yield line
class ClickHouseMarkdown(markdown.extensions.Extension):
def extendMarkdown(self, md, md_globals):
md.preprocessors['clickhouse'] = ClickHousePreprocessor()
md.inlinePatterns['link'] = ClickHouseLinkPattern(markdown.inlinepatterns.LINK_RE, md)
md.inlinePatterns['autolink'] = ClickHouseAutolinkPattern(markdown.inlinepatterns.AUTOLINK_RE, md)
md.preprocessors["clickhouse"] = ClickHousePreprocessor()
md.inlinePatterns["link"] = ClickHouseLinkPattern(
markdown.inlinepatterns.LINK_RE, md
)
md.inlinePatterns["autolink"] = ClickHouseAutolinkPattern(
markdown.inlinepatterns.AUTOLINK_RE, md
)
def makeExtension(**kwargs):
@ -92,10 +97,8 @@ def makeExtension(**kwargs):
def get_translations(dirname, lang):
import babel.support
return babel.support.Translations.load(
dirname=dirname,
locales=[lang, 'en']
)
return babel.support.Translations.load(dirname=dirname, locales=[lang, "en"])
class PatchedMacrosPlugin(macros.plugin.MacrosPlugin):
@ -104,22 +107,22 @@ class PatchedMacrosPlugin(macros.plugin.MacrosPlugin):
def on_config(self, config):
super(PatchedMacrosPlugin, self).on_config(config)
self.env.comment_start_string = '{##'
self.env.comment_end_string = '##}'
self.env.loader = jinja2.FileSystemLoader([
os.path.join(config.data['site_dir']),
os.path.join(config.data['extra']['includes_dir'])
])
self.env.comment_start_string = "{##"
self.env.comment_end_string = "##}"
self.env.loader = jinja2.FileSystemLoader(
[
os.path.join(config.data["site_dir"]),
os.path.join(config.data["extra"]["includes_dir"]),
]
)
def on_env(self, env, config, files):
import util
env.add_extension('jinja2.ext.i18n')
dirname = os.path.join(config.data['theme'].dirs[0], 'locale')
lang = config.data['theme']['language']
env.install_gettext_translations(
get_translations(dirname, lang),
newstyle=True
)
env.add_extension("jinja2.ext.i18n")
dirname = os.path.join(config.data["theme"].dirs[0], "locale")
lang = config.data["theme"]["language"]
env.install_gettext_translations(get_translations(dirname, lang), newstyle=True)
util.init_jinja2_filters(env)
return env
@ -130,13 +133,17 @@ class PatchedMacrosPlugin(macros.plugin.MacrosPlugin):
return markdown
def on_page_markdown(self, markdown, page, config, files):
markdown = super(PatchedMacrosPlugin, self).on_page_markdown(markdown, page, config, files)
markdown = super(PatchedMacrosPlugin, self).on_page_markdown(
markdown, page, config, files
)
if os.path.islink(page.file.abs_src_path):
lang = config.data['theme']['language']
page.canonical_url = page.canonical_url.replace(f'/{lang}/', '/en/', 1)
lang = config.data["theme"]["language"]
page.canonical_url = page.canonical_url.replace(f"/{lang}/", "/en/", 1)
if config.data['extra'].get('version_prefix') or config.data['extra'].get('single_page'):
if config.data["extra"].get("version_prefix") or config.data["extra"].get(
"single_page"
):
return markdown
if self.skip_git_log:
return markdown

View File

@ -10,57 +10,59 @@ import util
def find_first_header(content):
for line in content.split('\n'):
if line.startswith('#'):
no_hash = line.lstrip('#')
return no_hash.split('{', 1)[0].strip()
for line in content.split("\n"):
if line.startswith("#"):
no_hash = line.lstrip("#")
return no_hash.split("{", 1)[0].strip()
def build_nav_entry(root, args):
if root.endswith('images'):
if root.endswith("images"):
return None, None, None
result_items = []
index_meta, index_content = util.read_md_file(os.path.join(root, 'index.md'))
current_title = index_meta.get('toc_folder_title', index_meta.get('toc_title'))
current_title = current_title or index_meta.get('title', find_first_header(index_content))
index_meta, index_content = util.read_md_file(os.path.join(root, "index.md"))
current_title = index_meta.get("toc_folder_title", index_meta.get("toc_title"))
current_title = current_title or index_meta.get(
"title", find_first_header(index_content)
)
for filename in os.listdir(root):
path = os.path.join(root, filename)
if os.path.isdir(path):
prio, title, payload = build_nav_entry(path, args)
if title and payload:
result_items.append((prio, title, payload))
elif filename.endswith('.md'):
elif filename.endswith(".md"):
path = os.path.join(root, filename)
meta = ''
content = ''
meta = ""
content = ""
try:
meta, content = util.read_md_file(path)
except:
print('Error in file: {}'.format(path))
print("Error in file: {}".format(path))
raise
path = path.split('/', 2)[-1]
title = meta.get('toc_title', find_first_header(content))
path = path.split("/", 2)[-1]
title = meta.get("toc_title", find_first_header(content))
if title:
title = title.strip().rstrip('.')
title = title.strip().rstrip(".")
else:
title = meta.get('toc_folder_title', 'hidden')
prio = meta.get('toc_priority', 9999)
logging.debug(f'Nav entry: {prio}, {title}, {path}')
if meta.get('toc_hidden') or not content.strip():
title = 'hidden'
if title == 'hidden':
title = 'hidden-' + hashlib.sha1(content.encode('utf-8')).hexdigest()
title = meta.get("toc_folder_title", "hidden")
prio = meta.get("toc_priority", 9999)
logging.debug(f"Nav entry: {prio}, {title}, {path}")
if meta.get("toc_hidden") or not content.strip():
title = "hidden"
if title == "hidden":
title = "hidden-" + hashlib.sha1(content.encode("utf-8")).hexdigest()
if args.nav_limit and len(result_items) >= args.nav_limit:
break
result_items.append((prio, title, path))
result_items = sorted(result_items, key=lambda x: (x[0], x[1]))
result = collections.OrderedDict([(item[1], item[2]) for item in result_items])
if index_meta.get('toc_hidden_folder'):
current_title += '|hidden-folder'
return index_meta.get('toc_priority', 10000), current_title, result
if index_meta.get("toc_hidden_folder"):
current_title += "|hidden-folder"
return index_meta.get("toc_priority", 10000), current_title, result
def build_docs_nav(lang, args):
@ -70,7 +72,7 @@ def build_docs_nav(lang, args):
index_key = None
for key, value in list(nav.items()):
if key and value:
if value == 'index.md':
if value == "index.md":
index_key = key
continue
result.append({key: value})
@ -78,7 +80,7 @@ def build_docs_nav(lang, args):
break
if index_key:
key = list(result[0].keys())[0]
result[0][key][index_key] = 'index.md'
result[0][key][index_key] = "index.md"
result[0][key].move_to_end(index_key, last=False)
return result
@ -86,7 +88,7 @@ def build_docs_nav(lang, args):
def build_blog_nav(lang, args):
blog_dir = os.path.join(args.blog_dir, lang)
years = sorted(os.listdir(blog_dir), reverse=True)
result_nav = [{'hidden': 'index.md'}]
result_nav = [{"hidden": "index.md"}]
post_meta = collections.OrderedDict()
for year in years:
year_dir = os.path.join(blog_dir, year)
@ -97,38 +99,53 @@ def build_blog_nav(lang, args):
post_meta_items = []
for post in os.listdir(year_dir):
post_path = os.path.join(year_dir, post)
if not post.endswith('.md'):
raise RuntimeError(f'Unexpected non-md file in posts folder: {post_path}')
if not post.endswith(".md"):
raise RuntimeError(
f"Unexpected non-md file in posts folder: {post_path}"
)
meta, _ = util.read_md_file(post_path)
post_date = meta['date']
post_title = meta['title']
post_date = meta["date"]
post_title = meta["title"]
if datetime.date.fromisoformat(post_date) > datetime.date.today():
continue
posts.append(
(post_date, post_title, os.path.join(year, post),)
(
post_date,
post_title,
os.path.join(year, post),
)
)
if post_title in post_meta:
raise RuntimeError(f'Duplicate post title: {post_title}')
if not post_date.startswith(f'{year}-'):
raise RuntimeError(f'Post date {post_date} doesn\'t match the folder year {year}: {post_title}')
post_url_part = post.replace('.md', '')
post_meta_items.append((post_date, {
'date': post_date,
'title': post_title,
'image': meta.get('image'),
'url': f'/blog/{lang}/{year}/{post_url_part}/'
},))
raise RuntimeError(f"Duplicate post title: {post_title}")
if not post_date.startswith(f"{year}-"):
raise RuntimeError(
f"Post date {post_date} doesn't match the folder year {year}: {post_title}"
)
post_url_part = post.replace(".md", "")
post_meta_items.append(
(
post_date,
{
"date": post_date,
"title": post_title,
"image": meta.get("image"),
"url": f"/blog/{lang}/{year}/{post_url_part}/",
},
)
)
for _, title, path in sorted(posts, reverse=True):
result_nav[-1][year][title] = path
for _, post_meta_item in sorted(post_meta_items,
reverse=True,
key=lambda item: item[0]):
post_meta[post_meta_item['title']] = post_meta_item
for _, post_meta_item in sorted(
post_meta_items, reverse=True, key=lambda item: item[0]
):
post_meta[post_meta_item["title"]] = post_meta_item
return result_nav, post_meta
def _custom_get_navigation(files, config):
nav_config = config['nav'] or mkdocs.structure.nav.nest_paths(f.src_path for f in files.documentation_pages())
nav_config = config["nav"] or mkdocs.structure.nav.nest_paths(
f.src_path for f in files.documentation_pages()
)
items = mkdocs.structure.nav._data_to_navigation(nav_config, files, config)
if not isinstance(items, list):
items = [items]
@ -138,19 +155,25 @@ def _custom_get_navigation(files, config):
mkdocs.structure.nav._add_previous_and_next_links(pages)
mkdocs.structure.nav._add_parent_links(items)
missing_from_config = [file for file in files.documentation_pages() if file.page is None]
missing_from_config = [
file for file in files.documentation_pages() if file.page is None
]
if missing_from_config:
files._files = [file for file in files._files if file not in missing_from_config]
files._files = [
file for file in files._files if file not in missing_from_config
]
links = mkdocs.structure.nav._get_by_type(items, mkdocs.structure.nav.Link)
for link in links:
scheme, netloc, path, params, query, fragment = mkdocs.structure.nav.urlparse(link.url)
scheme, netloc, path, params, query, fragment = mkdocs.structure.nav.urlparse(
link.url
)
if scheme or netloc:
mkdocs.structure.nav.log.debug(
"An external link to '{}' is included in "
"the 'nav' configuration.".format(link.url)
)
elif link.url.startswith('/'):
elif link.url.startswith("/"):
mkdocs.structure.nav.log.debug(
"An absolute path to '{}' is included in the 'nav' configuration, "
"which presumably points to an external resource.".format(link.url)

View File

@ -7,8 +7,9 @@ def write_redirect_html(out_path, to_url):
os.makedirs(out_dir)
except OSError:
pass
with open(out_path, 'w') as f:
f.write(f'''<!--[if IE 6]> Redirect: {to_url} <![endif]-->
with open(out_path, "w") as f:
f.write(
f"""<!--[if IE 6]> Redirect: {to_url} <![endif]-->
<!DOCTYPE HTML>
<html lang="en-US">
<head>
@ -22,18 +23,20 @@ def write_redirect_html(out_path, to_url):
<body>
If you are not redirected automatically, follow this <a href="{to_url}">link</a>.
</body>
</html>''')
</html>"""
)
def build_redirect_html(args, base_prefix, lang, output_dir, from_path, to_path):
out_path = os.path.join(
output_dir, lang,
from_path.replace('/index.md', '/index.html').replace('.md', '/index.html')
output_dir,
lang,
from_path.replace("/index.md", "/index.html").replace(".md", "/index.html"),
)
target_path = to_path.replace('/index.md', '/').replace('.md', '/')
target_path = to_path.replace("/index.md", "/").replace(".md", "/")
if target_path[0:7] != 'http://' and target_path[0:8] != 'https://':
to_url = f'/{base_prefix}/{lang}/{target_path}'
if target_path[0:7] != "http://" and target_path[0:8] != "https://":
to_url = f"/{base_prefix}/{lang}/{target_path}"
else:
to_url = target_path
@ -42,33 +45,48 @@ def build_redirect_html(args, base_prefix, lang, output_dir, from_path, to_path)
def build_docs_redirects(args):
with open(os.path.join(args.docs_dir, 'redirects.txt'), 'r') as f:
with open(os.path.join(args.docs_dir, "redirects.txt"), "r") as f:
for line in f:
for lang in args.lang.split(','):
from_path, to_path = line.split(' ', 1)
build_redirect_html(args, 'docs', lang, args.docs_output_dir, from_path, to_path)
for lang in args.lang.split(","):
from_path, to_path = line.split(" ", 1)
build_redirect_html(
args, "docs", lang, args.docs_output_dir, from_path, to_path
)
def build_blog_redirects(args):
for lang in args.blog_lang.split(','):
redirects_path = os.path.join(args.blog_dir, lang, 'redirects.txt')
for lang in args.blog_lang.split(","):
redirects_path = os.path.join(args.blog_dir, lang, "redirects.txt")
if os.path.exists(redirects_path):
with open(redirects_path, 'r') as f:
with open(redirects_path, "r") as f:
for line in f:
from_path, to_path = line.split(' ', 1)
build_redirect_html(args, 'blog', lang, args.blog_output_dir, from_path, to_path)
from_path, to_path = line.split(" ", 1)
build_redirect_html(
args, "blog", lang, args.blog_output_dir, from_path, to_path
)
def build_static_redirects(args):
for static_redirect in [
('benchmark.html', '/benchmark/dbms/'),
('benchmark_hardware.html', '/benchmark/hardware/'),
('tutorial.html', '/docs/en/getting_started/tutorial/',),
('reference_en.html', '/docs/en/single/', ),
('reference_ru.html', '/docs/ru/single/',),
('docs/index.html', '/docs/en/',),
("benchmark.html", "/benchmark/dbms/"),
("benchmark_hardware.html", "/benchmark/hardware/"),
(
"tutorial.html",
"/docs/en/getting_started/tutorial/",
),
(
"reference_en.html",
"/docs/en/single/",
),
(
"reference_ru.html",
"/docs/ru/single/",
),
(
"docs/index.html",
"/docs/en/",
),
]:
write_redirect_html(
os.path.join(args.output_dir, static_redirect[0]),
static_redirect[1]
os.path.join(args.output_dir, static_redirect[0]), static_redirect[1]
)

View File

@ -12,7 +12,8 @@ import test
import util
import website
TEMPORARY_FILE_NAME = 'single.md'
TEMPORARY_FILE_NAME = "single.md"
def recursive_values(item):
if isinstance(item, dict):
@ -25,11 +26,14 @@ def recursive_values(item):
yield item
anchor_not_allowed_chars = re.compile(r'[^\w\-]')
def generate_anchor_from_path(path):
return re.sub(anchor_not_allowed_chars, '-', path)
anchor_not_allowed_chars = re.compile(r"[^\w\-]")
absolute_link = re.compile(r'^https?://')
def generate_anchor_from_path(path):
return re.sub(anchor_not_allowed_chars, "-", path)
absolute_link = re.compile(r"^https?://")
def replace_link(match, path):
@ -40,46 +44,55 @@ def replace_link(match, path):
if re.search(absolute_link, link):
return match.group(0)
if link.endswith('/'):
link = link[0:-1] + '.md'
if link.endswith("/"):
link = link[0:-1] + ".md"
return '{}(#{})'.format(title, generate_anchor_from_path(os.path.normpath(os.path.join(os.path.dirname(path), link))))
return "{}(#{})".format(
title,
generate_anchor_from_path(
os.path.normpath(os.path.join(os.path.dirname(path), link))
),
)
# Concatenates Markdown files to a single file.
def concatenate(lang, docs_path, single_page_file, nav):
lang_path = os.path.join(docs_path, lang)
proj_config = f'{docs_path}/toc_{lang}.yml'
proj_config = f"{docs_path}/toc_{lang}.yml"
if os.path.exists(proj_config):
with open(proj_config) as cfg_file:
nav = yaml.full_load(cfg_file.read())['nav']
nav = yaml.full_load(cfg_file.read())["nav"]
files_to_concatenate = list(recursive_values(nav))
files_count = len(files_to_concatenate)
logging.info(f'{files_count} files will be concatenated into single md-file for {lang}.')
logging.debug('Concatenating: ' + ', '.join(files_to_concatenate))
assert files_count > 0, f'Empty single-page for {lang}'
logging.info(
f"{files_count} files will be concatenated into single md-file for {lang}."
)
logging.debug("Concatenating: " + ", ".join(files_to_concatenate))
assert files_count > 0, f"Empty single-page for {lang}"
link_regexp = re.compile(r'(\[[^\]]+\])\(([^)#]+)(?:#[^\)]+)?\)')
link_regexp = re.compile(r"(\[[^\]]+\])\(([^)#]+)(?:#[^\)]+)?\)")
for path in files_to_concatenate:
try:
with open(os.path.join(lang_path, path)) as f:
# Insert a horizontal ruler. Then insert an anchor that we will link to. Its name will be a path to the .md file.
single_page_file.write('\n______\n<a name="%s"></a>\n' % generate_anchor_from_path(path))
single_page_file.write(
'\n______\n<a name="%s"></a>\n' % generate_anchor_from_path(path)
)
in_metadata = False
for line in f:
# Skip YAML metadata.
if line == '---\n':
if line == "---\n":
in_metadata = not in_metadata
continue
if not in_metadata:
# Increase the level of headers.
if line.startswith('#'):
line = '#' + line
if line.startswith("#"):
line = "#" + line
# Replace links within the docs.
@ -87,14 +100,19 @@ def concatenate(lang, docs_path, single_page_file, nav):
line = re.sub(
link_regexp,
lambda match: replace_link(match, path),
line)
line,
)
# If failed to replace the relative link, print to log
# But with some exceptions:
# - "../src/" -- for cmake-in-clickhouse.md (link to sources)
# - "../usr/share" -- changelog entry that has "../usr/share/zoneinfo"
if '../' in line and (not '../usr/share' in line) and (not '../src/' in line):
logging.info('Failed to resolve relative link:')
if (
"../" in line
and (not "../usr/share" in line)
and (not "../src/" in line)
):
logging.info("Failed to resolve relative link:")
logging.info(path)
logging.info(line)
@ -105,9 +123,11 @@ def concatenate(lang, docs_path, single_page_file, nav):
single_page_file.flush()
def get_temporary_file_name(lang, args):
return os.path.join(args.docs_dir, lang, TEMPORARY_FILE_NAME)
def remove_temporary_files(lang, args):
single_md_path = get_temporary_file_name(lang, args)
if os.path.exists(single_md_path):
@ -115,14 +135,14 @@ def remove_temporary_files(lang, args):
def build_single_page_version(lang, args, nav, cfg):
logging.info(f'Building single page version for {lang}')
os.environ['SINGLE_PAGE'] = '1'
extra = cfg.data['extra']
extra['single_page'] = True
extra['is_amp'] = False
logging.info(f"Building single page version for {lang}")
os.environ["SINGLE_PAGE"] = "1"
extra = cfg.data["extra"]
extra["single_page"] = True
extra["is_amp"] = False
single_md_path = get_temporary_file_name(lang, args)
with open(single_md_path, 'w') as single_md:
with open(single_md_path, "w") as single_md:
concatenate(lang, args.docs_dir, single_md, nav)
with util.temp_dir() as site_temp:
@ -132,72 +152,83 @@ def build_single_page_version(lang, args, nav, cfg):
shutil.copytree(docs_src_lang, docs_temp_lang)
for root, _, filenames in os.walk(docs_temp_lang):
for filename in filenames:
if filename != 'single.md' and filename.endswith('.md'):
if filename != "single.md" and filename.endswith(".md"):
os.unlink(os.path.join(root, filename))
cfg.load_dict({
'docs_dir': docs_temp_lang,
'site_dir': site_temp,
'extra': extra,
'nav': [
{cfg.data.get('site_name'): 'single.md'}
]
})
cfg.load_dict(
{
"docs_dir": docs_temp_lang,
"site_dir": site_temp,
"extra": extra,
"nav": [{cfg.data.get("site_name"): "single.md"}],
}
)
if not args.test_only:
mkdocs.commands.build.build(cfg)
single_page_output_path = os.path.join(args.docs_dir, args.docs_output_dir, lang, 'single')
single_page_output_path = os.path.join(
args.docs_dir, args.docs_output_dir, lang, "single"
)
if os.path.exists(single_page_output_path):
shutil.rmtree(single_page_output_path)
shutil.copytree(
os.path.join(site_temp, 'single'),
single_page_output_path
os.path.join(site_temp, "single"), single_page_output_path
)
single_page_index_html = os.path.join(single_page_output_path, 'index.html')
single_page_content_js = os.path.join(single_page_output_path, 'content.js')
single_page_index_html = os.path.join(
single_page_output_path, "index.html"
)
single_page_content_js = os.path.join(
single_page_output_path, "content.js"
)
with open(single_page_index_html, 'r') as f:
sp_prefix, sp_js, sp_suffix = f.read().split('<!-- BREAK -->')
with open(single_page_index_html, "r") as f:
sp_prefix, sp_js, sp_suffix = f.read().split("<!-- BREAK -->")
with open(single_page_index_html, 'w') as f:
with open(single_page_index_html, "w") as f:
f.write(sp_prefix)
f.write(sp_suffix)
with open(single_page_content_js, 'w') as f:
with open(single_page_content_js, "w") as f:
if args.minify:
import jsmin
sp_js = jsmin.jsmin(sp_js)
f.write(sp_js)
logging.info(f'Re-building single page for {lang} pdf/test')
logging.info(f"Re-building single page for {lang} pdf/test")
with util.temp_dir() as test_dir:
extra['single_page'] = False
cfg.load_dict({
'docs_dir': docs_temp_lang,
'site_dir': test_dir,
'extra': extra,
'nav': [
{cfg.data.get('site_name'): 'single.md'}
]
})
extra["single_page"] = False
cfg.load_dict(
{
"docs_dir": docs_temp_lang,
"site_dir": test_dir,
"extra": extra,
"nav": [{cfg.data.get("site_name"): "single.md"}],
}
)
mkdocs.commands.build.build(cfg)
css_in = ' '.join(website.get_css_in(args))
js_in = ' '.join(website.get_js_in(args))
subprocess.check_call(f'cat {css_in} > {test_dir}/css/base.css', shell=True)
subprocess.check_call(f'cat {js_in} > {test_dir}/js/base.js', shell=True)
css_in = " ".join(website.get_css_in(args))
js_in = " ".join(website.get_js_in(args))
subprocess.check_call(
f"cat {css_in} > {test_dir}/css/base.css", shell=True
)
subprocess.check_call(
f"cat {js_in} > {test_dir}/js/base.js", shell=True
)
if args.save_raw_single_page:
shutil.copytree(test_dir, args.save_raw_single_page)
logging.info(f'Running tests for {lang}')
logging.info(f"Running tests for {lang}")
test.test_single_page(
os.path.join(test_dir, 'single', 'index.html'), lang)
os.path.join(test_dir, "single", "index.html"), lang
)
logging.info(f'Finished building single page version for {lang}')
logging.info(f"Finished building single page version for {lang}")
remove_temporary_files(lang, args)

View File

@ -8,14 +8,11 @@ import subprocess
def test_single_page(input_path, lang):
if not (lang == 'en'):
if not (lang == "en"):
return
with open(input_path) as f:
soup = bs4.BeautifulSoup(
f,
features='html.parser'
)
soup = bs4.BeautifulSoup(f, features="html.parser")
anchor_points = set()
@ -23,30 +20,27 @@ def test_single_page(input_path, lang):
links_to_nowhere = 0
for tag in soup.find_all():
for anchor_point in [tag.attrs.get('name'), tag.attrs.get('id')]:
for anchor_point in [tag.attrs.get("name"), tag.attrs.get("id")]:
if anchor_point:
anchor_points.add(anchor_point)
for tag in soup.find_all():
href = tag.attrs.get('href')
if href and href.startswith('#') and href != '#':
href = tag.attrs.get("href")
if href and href.startswith("#") and href != "#":
if href[1:] not in anchor_points:
links_to_nowhere += 1
logging.info("Tag %s", tag)
logging.info('Link to nowhere: %s' % href)
logging.info("Link to nowhere: %s" % href)
if links_to_nowhere:
logging.error(f'Found {links_to_nowhere} links to nowhere in {lang}')
logging.error(f"Found {links_to_nowhere} links to nowhere in {lang}")
sys.exit(1)
if len(anchor_points) <= 10:
logging.error('Html parsing is probably broken')
logging.error("Html parsing is probably broken")
sys.exit(1)
if __name__ == '__main__':
logging.basicConfig(
level=logging.DEBUG,
stream=sys.stderr
)
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG, stream=sys.stderr)
test_single_page(sys.argv[1], sys.argv[2])

View File

@ -15,7 +15,7 @@ import yaml
@contextlib.contextmanager
def temp_dir():
path = tempfile.mkdtemp(dir=os.environ.get('TEMP'))
path = tempfile.mkdtemp(dir=os.environ.get("TEMP"))
try:
yield path
finally:
@ -34,7 +34,7 @@ def cd(new_cwd):
def get_free_port():
with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
s.bind(('', 0))
s.bind(("", 0))
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
return s.getsockname()[1]
@ -61,12 +61,12 @@ def read_md_file(path):
meta_text = []
content = []
if os.path.exists(path):
with open(path, 'r') as f:
with open(path, "r") as f:
for line in f:
if line.startswith('---'):
if line.startswith("---"):
if in_meta:
in_meta = False
meta = yaml.full_load(''.join(meta_text))
meta = yaml.full_load("".join(meta_text))
else:
in_meta = True
else:
@ -74,7 +74,7 @@ def read_md_file(path):
meta_text.append(line)
else:
content.append(line)
return meta, ''.join(content)
return meta, "".join(content)
def write_md_file(path, meta, content):
@ -82,13 +82,13 @@ def write_md_file(path, meta, content):
if not os.path.exists(dirname):
os.makedirs(dirname)
with open(path, 'w') as f:
with open(path, "w") as f:
if meta:
print('---', file=f)
print("---", file=f)
yaml.dump(meta, f)
print('---', file=f)
if not content.startswith('\n'):
print('', file=f)
print("---", file=f)
if not content.startswith("\n"):
print("", file=f)
f.write(content)
@ -100,7 +100,7 @@ def represent_ordereddict(dumper, data):
value.append((node_key, node_value))
return yaml.nodes.MappingNode(u'tag:yaml.org,2002:map', value)
return yaml.nodes.MappingNode("tag:yaml.org,2002:map", value)
yaml.add_representer(collections.OrderedDict, represent_ordereddict)
@ -109,30 +109,31 @@ yaml.add_representer(collections.OrderedDict, represent_ordereddict)
def init_jinja2_filters(env):
import amp
import website
chunk_size = 10240
env.filters['chunks'] = lambda line: [line[i:i + chunk_size] for i in range(0, len(line), chunk_size)]
env.filters['html_to_amp'] = amp.html_to_amp
env.filters['adjust_markdown_html'] = website.adjust_markdown_html
env.filters['to_rfc882'] = lambda d: datetime.datetime.strptime(d, '%Y-%m-%d').strftime('%a, %d %b %Y %H:%M:%S GMT')
env.filters["chunks"] = lambda line: [
line[i : i + chunk_size] for i in range(0, len(line), chunk_size)
]
env.filters["html_to_amp"] = amp.html_to_amp
env.filters["adjust_markdown_html"] = website.adjust_markdown_html
env.filters["to_rfc882"] = lambda d: datetime.datetime.strptime(
d, "%Y-%m-%d"
).strftime("%a, %d %b %Y %H:%M:%S GMT")
def init_jinja2_env(args):
import mdx_clickhouse
env = jinja2.Environment(
loader=jinja2.FileSystemLoader([
args.website_dir,
os.path.join(args.docs_dir, '_includes')
]),
extensions=[
'jinja2.ext.i18n',
'jinja2_highlight.HighlightExtension'
]
loader=jinja2.FileSystemLoader(
[args.website_dir, os.path.join(args.docs_dir, "_includes")]
),
extensions=["jinja2.ext.i18n", "jinja2_highlight.HighlightExtension"],
)
env.extend(jinja2_highlight_cssclass='syntax p-3 my-3')
translations_dir = os.path.join(args.website_dir, 'locale')
env.extend(jinja2_highlight_cssclass="syntax p-3 my-3")
translations_dir = os.path.join(args.website_dir, "locale")
env.install_gettext_translations(
mdx_clickhouse.get_translations(translations_dir, 'en'),
newstyle=True
mdx_clickhouse.get_translations(translations_dir, "en"), newstyle=True
)
init_jinja2_filters(env)
return env

View File

@ -17,108 +17,112 @@ import util
def handle_iframe(iframe, soup):
allowed_domains = ['https://www.youtube.com/', 'https://datalens.yandex/']
allowed_domains = ["https://www.youtube.com/", "https://datalens.yandex/"]
illegal_domain = True
iframe_src = iframe.attrs['src']
iframe_src = iframe.attrs["src"]
for domain in allowed_domains:
if iframe_src.startswith(domain):
illegal_domain = False
break
if illegal_domain:
raise RuntimeError(f'iframe from illegal domain: {iframe_src}')
wrapper = soup.new_tag('div')
wrapper.attrs['class'] = ['embed-responsive', 'embed-responsive-16by9']
raise RuntimeError(f"iframe from illegal domain: {iframe_src}")
wrapper = soup.new_tag("div")
wrapper.attrs["class"] = ["embed-responsive", "embed-responsive-16by9"]
iframe.insert_before(wrapper)
iframe.extract()
wrapper.insert(0, iframe)
if 'width' in iframe.attrs:
del iframe.attrs['width']
if 'height' in iframe.attrs:
del iframe.attrs['height']
iframe.attrs['allow'] = 'accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture'
iframe.attrs['class'] = 'embed-responsive-item'
iframe.attrs['frameborder'] = '0'
iframe.attrs['allowfullscreen'] = '1'
if "width" in iframe.attrs:
del iframe.attrs["width"]
if "height" in iframe.attrs:
del iframe.attrs["height"]
iframe.attrs[
"allow"
] = "accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture"
iframe.attrs["class"] = "embed-responsive-item"
iframe.attrs["frameborder"] = "0"
iframe.attrs["allowfullscreen"] = "1"
def adjust_markdown_html(content):
soup = bs4.BeautifulSoup(
content,
features='html.parser'
)
soup = bs4.BeautifulSoup(content, features="html.parser")
for a in soup.find_all('a'):
a_class = a.attrs.get('class')
a_href = a.attrs.get('href')
if a_class and 'headerlink' in a_class:
a.string = '\xa0'
if a_href and a_href.startswith('http'):
a.attrs['target'] = '_blank'
for a in soup.find_all("a"):
a_class = a.attrs.get("class")
a_href = a.attrs.get("href")
if a_class and "headerlink" in a_class:
a.string = "\xa0"
if a_href and a_href.startswith("http"):
a.attrs["target"] = "_blank"
for code in soup.find_all('code'):
code_class = code.attrs.get('class')
for code in soup.find_all("code"):
code_class = code.attrs.get("class")
if code_class:
code.attrs['class'] = code_class + ['syntax']
code.attrs["class"] = code_class + ["syntax"]
else:
code.attrs['class'] = 'syntax'
code.attrs["class"] = "syntax"
for iframe in soup.find_all('iframe'):
for iframe in soup.find_all("iframe"):
handle_iframe(iframe, soup)
for img in soup.find_all('img'):
if img.attrs.get('alt') == 'iframe':
img.name = 'iframe'
img.string = ''
for img in soup.find_all("img"):
if img.attrs.get("alt") == "iframe":
img.name = "iframe"
img.string = ""
handle_iframe(img, soup)
continue
img_class = img.attrs.get('class')
img_class = img.attrs.get("class")
if img_class:
img.attrs['class'] = img_class + ['img-fluid']
img.attrs["class"] = img_class + ["img-fluid"]
else:
img.attrs['class'] = 'img-fluid'
img.attrs["class"] = "img-fluid"
for details in soup.find_all('details'):
for summary in details.find_all('summary'):
for details in soup.find_all("details"):
for summary in details.find_all("summary"):
if summary.parent != details:
summary.extract()
details.insert(0, summary)
for dd in soup.find_all('dd'):
dd_class = dd.attrs.get('class')
for dd in soup.find_all("dd"):
dd_class = dd.attrs.get("class")
if dd_class:
dd.attrs['class'] = dd_class + ['pl-3']
dd.attrs["class"] = dd_class + ["pl-3"]
else:
dd.attrs['class'] = 'pl-3'
dd.attrs["class"] = "pl-3"
for div in soup.find_all('div'):
div_class = div.attrs.get('class')
is_admonition = div_class and 'admonition' in div.attrs.get('class')
for div in soup.find_all("div"):
div_class = div.attrs.get("class")
is_admonition = div_class and "admonition" in div.attrs.get("class")
if is_admonition:
for a in div.find_all('a'):
a_class = a.attrs.get('class')
for a in div.find_all("a"):
a_class = a.attrs.get("class")
if a_class:
a.attrs['class'] = a_class + ['alert-link']
a.attrs["class"] = a_class + ["alert-link"]
else:
a.attrs['class'] = 'alert-link'
a.attrs["class"] = "alert-link"
for p in div.find_all('p'):
p_class = p.attrs.get('class')
if is_admonition and p_class and ('admonition-title' in p_class):
p.attrs['class'] = p_class + ['alert-heading', 'display-4', 'text-reset', 'mb-2']
for p in div.find_all("p"):
p_class = p.attrs.get("class")
if is_admonition and p_class and ("admonition-title" in p_class):
p.attrs["class"] = p_class + [
"alert-heading",
"display-4",
"text-reset",
"mb-2",
]
if is_admonition:
div.attrs['role'] = 'alert'
if ('info' in div_class) or ('note' in div_class):
mode = 'alert-primary'
elif ('attention' in div_class) or ('warning' in div_class):
mode = 'alert-warning'
elif 'important' in div_class:
mode = 'alert-danger'
elif 'tip' in div_class:
mode = 'alert-info'
div.attrs["role"] = "alert"
if ("info" in div_class) or ("note" in div_class):
mode = "alert-primary"
elif ("attention" in div_class) or ("warning" in div_class):
mode = "alert-warning"
elif "important" in div_class:
mode = "alert-danger"
elif "tip" in div_class:
mode = "alert-info"
else:
mode = 'alert-secondary'
div.attrs['class'] = div_class + ['alert', 'pb-0', 'mb-4', mode]
mode = "alert-secondary"
div.attrs["class"] = div_class + ["alert", "pb-0", "mb-4", mode]
return str(soup)
@ -128,61 +132,63 @@ def minify_html(content):
def build_website(args):
logging.info('Building website')
logging.info("Building website")
env = util.init_jinja2_env(args)
shutil.copytree(
args.website_dir,
args.output_dir,
ignore=shutil.ignore_patterns(
'*.md',
'*.sh',
'*.css',
'*.json',
'js/*.js',
'build',
'docs',
'public',
'node_modules',
'src',
'templates',
'locale',
'.gitkeep'
)
"*.md",
"*.sh",
"*.css",
"*.json",
"js/*.js",
"build",
"docs",
"public",
"node_modules",
"src",
"templates",
"locale",
".gitkeep",
),
)
shutil.copytree(
os.path.join(args.website_dir, 'images'),
os.path.join(args.output_dir, 'docs', 'images')
os.path.join(args.website_dir, "images"),
os.path.join(args.output_dir, "docs", "images"),
)
# This file can be requested to check for available ClickHouse releases.
shutil.copy2(
os.path.join(args.src_dir, 'utils', 'list-versions', 'version_date.tsv'),
os.path.join(args.output_dir, 'data', 'version_date.tsv'))
os.path.join(args.src_dir, "utils", "list-versions", "version_date.tsv"),
os.path.join(args.output_dir, "data", "version_date.tsv"),
)
# This file can be requested to install ClickHouse.
shutil.copy2(
os.path.join(args.src_dir, 'docs', '_includes', 'install', 'universal.sh'),
os.path.join(args.output_dir, 'data', 'install.sh'))
os.path.join(args.src_dir, "docs", "_includes", "install", "universal.sh"),
os.path.join(args.output_dir, "data", "install.sh"),
)
for root, _, filenames in os.walk(args.output_dir):
for filename in filenames:
if filename == 'main.html':
if filename == "main.html":
continue
path = os.path.join(root, filename)
if not filename.endswith('.html'):
if not filename.endswith(".html"):
continue
logging.info('Processing %s', path)
with open(path, 'rb') as f:
content = f.read().decode('utf-8')
logging.info("Processing %s", path)
with open(path, "rb") as f:
content = f.read().decode("utf-8")
template = env.from_string(content)
content = template.render(args.__dict__)
with open(path, 'wb') as f:
f.write(content.encode('utf-8'))
with open(path, "wb") as f:
f.write(content.encode("utf-8"))
def get_css_in(args):
@ -193,7 +199,7 @@ def get_css_in(args):
f"'{args.website_dir}/css/blog.css'",
f"'{args.website_dir}/css/docs.css'",
f"'{args.website_dir}/css/highlight.css'",
f"'{args.website_dir}/css/main.css'"
f"'{args.website_dir}/css/main.css'",
]
@ -207,42 +213,41 @@ def get_js_in(args):
f"'{args.website_dir}/js/index.js'",
f"'{args.website_dir}/js/docsearch.js'",
f"'{args.website_dir}/js/docs.js'",
f"'{args.website_dir}/js/main.js'"
f"'{args.website_dir}/js/main.js'",
]
def minify_file(path, css_digest, js_digest):
if not (
path.endswith('.html') or
path.endswith('.css')
):
if not (path.endswith(".html") or path.endswith(".css")):
return
logging.info('Minifying %s', path)
with open(path, 'rb') as f:
content = f.read().decode('utf-8')
if path.endswith('.html'):
logging.info("Minifying %s", path)
with open(path, "rb") as f:
content = f.read().decode("utf-8")
if path.endswith(".html"):
content = minify_html(content)
content = content.replace('base.css?css_digest', f'base.css?{css_digest}')
content = content.replace('base.js?js_digest', f'base.js?{js_digest}')
# TODO: restore cssmin
# elif path.endswith('.css'):
# content = cssmin.cssmin(content)
# TODO: restore jsmin
# elif path.endswith('.js'):
# content = jsmin.jsmin(content)
with open(path, 'wb') as f:
f.write(content.encode('utf-8'))
content = content.replace("base.css?css_digest", f"base.css?{css_digest}")
content = content.replace("base.js?js_digest", f"base.js?{js_digest}")
# TODO: restore cssmin
# elif path.endswith('.css'):
# content = cssmin.cssmin(content)
# TODO: restore jsmin
# elif path.endswith('.js'):
# content = jsmin.jsmin(content)
with open(path, "wb") as f:
f.write(content.encode("utf-8"))
def minify_website(args):
css_in = ' '.join(get_css_in(args))
css_out = f'{args.output_dir}/docs/css/base.css'
os.makedirs(f'{args.output_dir}/docs/css')
css_in = " ".join(get_css_in(args))
css_out = f"{args.output_dir}/docs/css/base.css"
os.makedirs(f"{args.output_dir}/docs/css")
if args.minify and False: # TODO: return closure
command = f"purifycss -w '*algolia*' --min {css_in} '{args.output_dir}/*.html' " \
command = (
f"purifycss -w '*algolia*' --min {css_in} '{args.output_dir}/*.html' "
f"'{args.output_dir}/docs/en/**/*.html' '{args.website_dir}/js/**/*.js' > {css_out}"
)
logging.info(css_in)
logging.info(command)
output = subprocess.check_output(command, shell=True)
@ -251,51 +256,60 @@ def minify_website(args):
else:
command = f"cat {css_in}"
output = subprocess.check_output(command, shell=True)
with open(css_out, 'wb+') as f:
with open(css_out, "wb+") as f:
f.write(output)
with open(css_out, 'rb') as f:
with open(css_out, "rb") as f:
css_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
js_in = ' '.join(get_js_in(args))
js_out = f'{args.output_dir}/docs/js/base.js'
os.makedirs(f'{args.output_dir}/docs/js')
js_in = " ".join(get_js_in(args))
js_out = f"{args.output_dir}/docs/js/base.js"
os.makedirs(f"{args.output_dir}/docs/js")
if args.minify and False: # TODO: return closure
js_in = [js[1:-1] for js in js_in]
closure_args = [
'--js', *js_in, '--js_output_file', js_out,
'--compilation_level', 'SIMPLE',
'--dependency_mode', 'NONE',
'--third_party', '--use_types_for_optimization',
'--isolation_mode', 'IIFE'
"--js",
*js_in,
"--js_output_file",
js_out,
"--compilation_level",
"SIMPLE",
"--dependency_mode",
"NONE",
"--third_party",
"--use_types_for_optimization",
"--isolation_mode",
"IIFE",
]
logging.info(closure_args)
if closure.run(*closure_args):
raise RuntimeError('failed to run closure compiler')
with open(js_out, 'r') as f:
raise RuntimeError("failed to run closure compiler")
with open(js_out, "r") as f:
js_content = jsmin.jsmin(f.read())
with open(js_out, 'w') as f:
with open(js_out, "w") as f:
f.write(js_content)
else:
command = f"cat {js_in}"
output = subprocess.check_output(command, shell=True)
with open(js_out, 'wb+') as f:
with open(js_out, "wb+") as f:
f.write(output)
with open(js_out, 'rb') as f:
with open(js_out, "rb") as f:
js_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
logging.info(js_digest)
if args.minify:
logging.info('Minifying website')
logging.info("Minifying website")
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = []
for root, _, filenames in os.walk(args.output_dir):
for filename in filenames:
path = os.path.join(root, filename)
futures.append(executor.submit(minify_file, path, css_digest, js_digest))
futures.append(
executor.submit(minify_file, path, css_digest, js_digest)
)
for future in futures:
exc = future.exception()
if exc:
@ -304,24 +318,28 @@ def minify_website(args):
def process_benchmark_results(args):
benchmark_root = os.path.join(args.website_dir, 'benchmark')
benchmark_root = os.path.join(args.website_dir, "benchmark")
required_keys = {
'dbms': ['result'],
'hardware': ['result', 'system', 'system_full', 'kind']
"dbms": ["result"],
"hardware": ["result", "system", "system_full", "kind"],
}
for benchmark_kind in ['dbms', 'hardware']:
for benchmark_kind in ["dbms", "hardware"]:
results = []
results_root = os.path.join(benchmark_root, benchmark_kind, 'results')
results_root = os.path.join(benchmark_root, benchmark_kind, "results")
for result in sorted(os.listdir(results_root)):
result_file = os.path.join(results_root, result)
logging.debug(f'Reading benchmark result from {result_file}')
with open(result_file, 'r') as f:
logging.debug(f"Reading benchmark result from {result_file}")
with open(result_file, "r") as f:
result = json.loads(f.read())
for item in result:
for required_key in required_keys[benchmark_kind]:
assert required_key in item, f'No "{required_key}" in {result_file}'
assert (
required_key in item
), f'No "{required_key}" in {result_file}'
results += result
results_js = os.path.join(args.output_dir, 'benchmark', benchmark_kind, 'results.js')
with open(results_js, 'w') as f:
results_js = os.path.join(
args.output_dir, "benchmark", benchmark_kind, "results.js"
)
with open(results_js, "w") as f:
data = json.dumps(results)
f.write(f'var results = {data};')
f.write(f"var results = {data};")

View File

@ -42,6 +42,8 @@ git push
使用`utils/check-style/check-style`二进制文件执行一些简单的基于正则表达式的代码样式检查(注意, 它可以在本地运行).
如果失败, 按照[代码样式指南](./style.md)修复样式错误.
使用 [black](https://github.com/psf/black/) 檢查 python 代碼.
### 报告详情 {#report-details}
- [状态页示例](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check.html)
- `docs_output.txt`记录了查结果错误(无效表格等), 空白页表示没有错误. [成功结果案例](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check/output.txt)

1
packages/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
*/

156
packages/build Executable file
View File

@ -0,0 +1,156 @@
#!/usr/bin/env bash
set -e
# Avoid dependency on locale
LC_ALL=C
# Normalize output directory
if [ -n "$OUTPUT_DIR" ]; then
OUTPUT_DIR=$(realpath -m "$OUTPUT_DIR")
fi
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
cd "$CUR_DIR"
ROOT_DIR=$(readlink -f "$(git rev-parse --show-cdup)")
PKG_ROOT='root'
DEB_ARCH=${DEB_ARCH:-amd64}
OUTPUT_DIR=${OUTPUT_DIR:-$ROOT_DIR}
[ -d "${OUTPUT_DIR}" ] || mkdir -p "${OUTPUT_DIR}"
SANITIZER=${SANITIZER:-""}
SOURCE=${SOURCE:-$PKG_ROOT}
HELP="${0} [--test] [--rpm] [-h|--help]
--test - adds '+test' prefix to version
--apk - build APK packages
--rpm - build RPM packages
--tgz - build tarball package
--help - show this help and exit
Used envs:
DEB_ARCH='${DEB_ARCH}'
OUTPUT_DIR='${OUTPUT_DIR}' - where the artifact will be placed
SANITIZER='${SANITIZER}' - if any sanitizer is used, affects version string
SOURCE='${SOURCE}' - directory with sources tree
VERSION_STRING='${VERSION_STRING}' - the package version to overwrite
"
if [ -z "${VERSION_STRING}" ]; then
# Get CLICKHOUSE_VERSION_STRING from the current git repo
eval "$("$ROOT_DIR/tests/ci/version_helper.py" -e)"
else
CLICKHOUSE_VERSION_STRING=${VERSION_STRING}
fi
export CLICKHOUSE_VERSION_STRING
while [[ $1 == --* ]]
do
case "$1" in
--test )
VERSION_POSTFIX+='+test'
shift ;;
--apk )
MAKE_APK=1
shift ;;
--rpm )
MAKE_RPM=1
shift ;;
--tgz )
MAKE_TGZ=1
shift ;;
--help )
echo "$HELP"
exit ;;
* )
echo "Unknown option $1"
exit 2 ;;
esac
done
function deb2tgz {
local FILE PKG_NAME PKG_DIR PKG_PATH TARBALL
FILE=$1
PKG_NAME=${FILE##*/}; PKG_NAME=${PKG_NAME%%_*}
PKG_DIR="$PKG_NAME-$CLICKHOUSE_VERSION_STRING"
PKG_PATH="$OUTPUT_DIR/$PKG_NAME-$CLICKHOUSE_VERSION_STRING"
TARBALL="$OUTPUT_DIR/$PKG_NAME-$CLICKHOUSE_VERSION_STRING-$DEB_ARCH.tgz"
rm -rf "$PKG_PATH"
dpkg-deb -R "$FILE" "$PKG_PATH"
mkdir -p "$PKG_PATH/install"
cat > "$PKG_PATH/install/doinst.sh" << 'EOF'
#!/bin/sh
set -e
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
for filepath in `find $SCRIPTPATH/.. -type f -or -type l | grep -v "\.\./install/"`; do
destpath=${filepath##$SCRIPTPATH/..}
mkdir -p $(dirname "$destpath")
cp -r "$filepath" "$destpath"
done
EOF
chmod +x "$PKG_PATH/install/doinst.sh"
if [ -f "$PKG_PATH/DEBIAN/postinst" ]; then
tail +2 "$PKG_PATH/DEBIAN/postinst" > "$PKG_PATH/install/doinst.sh"
fi
rm -rf "$PKG_PATH/DEBIAN"
if [ -f "/usr/bin/pigz" ]; then
tar --use-compress-program=pigz -cf "$TARBALL" -C "$OUTPUT_DIR" "$PKG_DIR"
else
tar -czf "$TARBALL" -C "$OUTPUT_DIR" "$PKG_DIR"
fi
rm -r "$PKG_PATH"
}
# Build options
if [ -n "$SANITIZER" ]; then
if [[ "$SANITIZER" == "address" ]]; then VERSION_POSTFIX+="+asan"
elif [[ "$SANITIZER" == "thread" ]]; then VERSION_POSTFIX+="+tsan"
elif [[ "$SANITIZER" == "memory" ]]; then VERSION_POSTFIX+="+msan"
elif [[ "$SANITIZER" == "undefined" ]]; then VERSION_POSTFIX+="+ubsan"
else
echo "Unknown value of SANITIZER variable: $SANITIZER"
exit 3
fi
elif [[ $BUILD_TYPE == 'debug' ]]; then
VERSION_POSTFIX+="+debug"
fi
if [[ "$PKG_ROOT" != "$SOURCE" ]]; then
# packages are built only from PKG_SOURCE
rm -rf "./$PKG_ROOT"
ln -sf "$SOURCE" "$PKG_SOURCE"
fi
CLICKHOUSE_VERSION_STRING+=$VERSION_POSTFIX
echo -e "\nCurrent version is $CLICKHOUSE_VERSION_STRING"
for config in clickhouse*.yaml; do
echo "Building deb package for $config"
# Preserve package path
exec 9>&1
PKG_PATH=$(nfpm package --target "$OUTPUT_DIR" --config "$config" --packager deb | tee /dev/fd/9)
PKG_PATH=${PKG_PATH##*created package: }
exec 9>&-
if [ -n "$MAKE_APK" ]; then
echo "Building apk package for $config"
nfpm package --target "$OUTPUT_DIR" --config "$config" --packager apk
fi
if [ -n "$MAKE_RPM" ]; then
echo "Building rpm package for $config"
nfpm package --target "$OUTPUT_DIR" --config "$config" --packager rpm
fi
if [ -n "$MAKE_TGZ" ]; then
echo "Building tarball for $config"
deb2tgz "$PKG_PATH"
fi
done
# vim: ts=4: sw=4: sts=4: expandtab

View File

@ -0,0 +1,57 @@
# package sources should be placed in ${PWD}/root
# nfpm should run from the same directory with a config
name: "clickhouse-client"
arch: "all"
platform: "linux"
version: "${CLICKHOUSE_VERSION_STRING}"
vendor: "ClickHouse Inc."
homepage: "https://clickhouse.com"
license: "Apache"
section: "database"
priority: "optional"
replaces:
- clickhouse-compressor
conflicts:
- clickhouse-compressor
maintainer: "ClickHouse Dev Team <packages+linux@clickhouse.com>"
description: |
Client binary for ClickHouse
ClickHouse is a column-oriented database management system
that allows generating analytical data reports in real time.
This package provides clickhouse-client , clickhouse-local and clickhouse-benchmark
overrides:
deb:
depends:
- clickhouse-common-static (= ${CLICKHOUSE_VERSION_STRING})
rpm:
depends:
- clickhouse-common-static = ${CLICKHOUSE_VERSION_STRING}
contents:
- src: root/etc/clickhouse-client/config.xml
dst: /etc/clickhouse-client/config.xml
type: config
- src: root/usr/bin/clickhouse-benchmark
dst: /usr/bin/clickhouse-benchmark
- src: root/usr/bin/clickhouse-compressor
dst: /usr/bin/clickhouse-compressor
- src: root/usr/bin/clickhouse-format
dst: /usr/bin/clickhouse-format
- src: root/usr/bin/clickhouse-client
dst: /usr/bin/clickhouse-client
- src: root/usr/bin/clickhouse-local
dst: /usr/bin/clickhouse-local
- src: root/usr/bin/clickhouse-obfuscator
dst: /usr/bin/clickhouse-obfuscator
# docs
- src: ../AUTHORS
dst: /usr/share/doc/clickhouse-client/AUTHORS
- src: ../CHANGELOG.md
dst: /usr/share/doc/clickhouse-client/CHANGELOG.md
- src: ../LICENSE
dst: /usr/share/doc/clickhouse-client/LICENSE
- src: ../README.md
dst: /usr/share/doc/clickhouse-client/README.md

View File

@ -0,0 +1,34 @@
# package sources should be placed in ${PWD}/root
# nfpm should run from the same directory with a config
name: "clickhouse-common-static-dbg"
arch: "${DEB_ARCH}" # amd64, arm64
platform: "linux"
version: "${CLICKHOUSE_VERSION_STRING}"
vendor: "ClickHouse Inc."
homepage: "https://clickhouse.com"
license: "Apache"
section: "database"
priority: "optional"
replaces:
- clickhouse-common-dbg
conflicts:
- clickhouse-common-dbg
maintainer: "ClickHouse Dev Team <packages+linux@clickhouse.com>"
description: |
debugging symbols for clickhouse-common-static
This package contains the debugging symbols for clickhouse-common.
contents:
- src: root/usr/lib/debug
dst: /usr/lib/debug
# docs
- src: ../AUTHORS
dst: /usr/share/doc/clickhouse-common-static-dbg/AUTHORS
- src: ../CHANGELOG.md
dst: /usr/share/doc/clickhouse-common-static-dbg/CHANGELOG.md
- src: ../LICENSE
dst: /usr/share/doc/clickhouse-common-static-dbg/LICENSE
- src: ../README.md
dst: /usr/share/doc/clickhouse-common-static-dbg/README.md

View File

@ -0,0 +1,48 @@
# package sources should be placed in ${PWD}/root
# nfpm should run from the same directory with a config
name: "clickhouse-common-static"
arch: "${DEB_ARCH}" # amd64, arm64
platform: "linux"
version: "${CLICKHOUSE_VERSION_STRING}"
vendor: "ClickHouse Inc."
homepage: "https://clickhouse.com"
license: "Apache"
section: "database"
priority: "optional"
replaces:
- clickhouse-common
- clickhouse-server-base
provides:
- clickhouse-common
- clickhouse-server-base
suggests:
- clickhouse-common-static-dbg
maintainer: "ClickHouse Dev Team <packages+linux@clickhouse.com>"
description: |
Common files for ClickHouse
ClickHouse is a column-oriented database management system
that allows generating analytical data reports in real time.
This package provides common files for both clickhouse server and client
contents:
- src: root/usr/bin/clickhouse
dst: /usr/bin/clickhouse
- src: root/usr/bin/clickhouse-odbc-bridge
dst: /usr/bin/clickhouse-odbc-bridge
- src: root/usr/bin/clickhouse-library-bridge
dst: /usr/bin/clickhouse-library-bridge
- src: root/usr/bin/clickhouse-extract-from-config
dst: /usr/bin/clickhouse-extract-from-config
- src: root/usr/share/bash-completion/completions
dst: /usr/share/bash-completion/completions
# docs
- src: ../AUTHORS
dst: /usr/share/doc/clickhouse-common-static/AUTHORS
- src: ../CHANGELOG.md
dst: /usr/share/doc/clickhouse-common-static/CHANGELOG.md
- src: ../LICENSE
dst: /usr/share/doc/clickhouse-common-static/LICENSE
- src: ../README.md
dst: /usr/share/doc/clickhouse-common-static/README.md

227
packages/clickhouse-server.init Executable file
View File

@ -0,0 +1,227 @@
#!/bin/sh
### BEGIN INIT INFO
# Provides: clickhouse-server
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
# Should-Start: $time $network
# Should-Stop: $network
# Short-Description: clickhouse-server daemon
### END INIT INFO
#
# NOTES:
# - Should-* -- script can start if the listed facilities are missing, unlike Required-*
#
# For the documentation [1]:
#
# [1]: https://wiki.debian.org/LSBInitScripts
CLICKHOUSE_USER=clickhouse
CLICKHOUSE_GROUP=${CLICKHOUSE_USER}
SHELL=/bin/bash
PROGRAM=clickhouse-server
CLICKHOUSE_GENERIC_PROGRAM=clickhouse
CLICKHOUSE_PROGRAM_ENV=""
EXTRACT_FROM_CONFIG=${CLICKHOUSE_GENERIC_PROGRAM}-extract-from-config
CLICKHOUSE_CONFDIR=/etc/$PROGRAM
CLICKHOUSE_LOGDIR=/var/log/clickhouse-server
CLICKHOUSE_LOGDIR_USER=root
CLICKHOUSE_DATADIR=/var/lib/clickhouse
if [ -d "/var/lock" ]; then
LOCALSTATEDIR=/var/lock
else
LOCALSTATEDIR=/run/lock
fi
if [ ! -d "$LOCALSTATEDIR" ]; then
mkdir -p "$LOCALSTATEDIR"
fi
CLICKHOUSE_BINDIR=/usr/bin
CLICKHOUSE_CRONFILE=/etc/cron.d/clickhouse-server
CLICKHOUSE_CONFIG=$CLICKHOUSE_CONFDIR/config.xml
LOCKFILE=$LOCALSTATEDIR/$PROGRAM
CLICKHOUSE_PIDDIR=/var/run/$PROGRAM
CLICKHOUSE_PIDFILE="$CLICKHOUSE_PIDDIR/$PROGRAM.pid"
# CLICKHOUSE_STOP_TIMEOUT=60 # Disabled by default. Place to /etc/default/clickhouse if you need.
# Some systems lack "flock"
command -v flock >/dev/null && FLOCK=flock
# Override defaults from optional config file
test -f /etc/default/clickhouse && . /etc/default/clickhouse
die()
{
echo $1 >&2
exit 1
}
# Check that configuration file is Ok.
check_config()
{
if [ -x "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG" ]; then
su -s $SHELL ${CLICKHOUSE_USER} -c "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path" >/dev/null || die "Configuration file ${CLICKHOUSE_CONFIG} doesn't parse successfully. Won't restart server. You may use forcerestart if you are sure.";
fi
}
initdb()
{
${CLICKHOUSE_GENERIC_PROGRAM} install --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
}
start()
{
${CLICKHOUSE_GENERIC_PROGRAM} start --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
}
stop()
{
${CLICKHOUSE_GENERIC_PROGRAM} stop --pid-path "${CLICKHOUSE_PIDDIR}"
}
restart()
{
${CLICKHOUSE_GENERIC_PROGRAM} restart --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
}
forcestop()
{
${CLICKHOUSE_GENERIC_PROGRAM} stop --force --pid-path "${CLICKHOUSE_PIDDIR}"
}
service_or_func()
{
if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then
systemctl $1 $PROGRAM
else
$1
fi
}
forcerestart()
{
forcestop
# Should not use 'start' function if systemd active
service_or_func start
}
use_cron()
{
# 1. running systemd
if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then
return 1
fi
# 2. disabled by config
if [ -z "$CLICKHOUSE_CRONFILE" ]; then
return 2
fi
return 0
}
# returns false if cron disabled (with systemd)
enable_cron()
{
use_cron && sed -i 's/^#*//' "$CLICKHOUSE_CRONFILE"
}
# returns false if cron disabled (with systemd)
disable_cron()
{
use_cron && sed -i 's/^#*/#/' "$CLICKHOUSE_CRONFILE"
}
is_cron_disabled()
{
use_cron || return 0
# Assumes that either no lines are commented or all lines are commented.
# Also please note, that currently cron file for ClickHouse has only one line (but some time ago there was more).
grep -q -E '^#' "$CLICKHOUSE_CRONFILE";
}
main()
{
# See how we were called.
EXIT_STATUS=0
case "$1" in
start)
service_or_func start && enable_cron
;;
stop)
disable_cron
service_or_func stop
;;
restart)
service_or_func restart && enable_cron
;;
forcestop)
disable_cron
forcestop
;;
forcerestart)
forcerestart && enable_cron
;;
reload)
service_or_func restart
;;
condstart)
service_or_func start
;;
condstop)
service_or_func stop
;;
condrestart)
service_or_func restart
;;
condreload)
service_or_func restart
;;
initdb)
initdb
;;
enable_cron)
enable_cron
;;
disable_cron)
disable_cron
;;
*)
echo "Usage: $0 {start|stop|status|restart|forcestop|forcerestart|reload|condstart|condstop|condrestart|condreload|initdb}"
exit 2
;;
esac
exit $EXIT_STATUS
}
status()
{
${CLICKHOUSE_GENERIC_PROGRAM} status --pid-path "${CLICKHOUSE_PIDDIR}"
}
# Running commands without need of locking
case "$1" in
status)
status
exit 0
;;
esac
(
if $FLOCK -n 9; then
main "$@"
else
echo "Init script is already running" && exit 1
fi
) 9> $LOCKFILE

View File

@ -0,0 +1,47 @@
#!/bin/sh
set -e
# set -x
PROGRAM=clickhouse-server
CLICKHOUSE_USER=${CLICKHOUSE_USER:=clickhouse}
CLICKHOUSE_GROUP=${CLICKHOUSE_GROUP:=${CLICKHOUSE_USER}}
# Please note that we don't support paths with whitespaces. This is rather ignorant.
CLICKHOUSE_CONFDIR=${CLICKHOUSE_CONFDIR:=/etc/clickhouse-server}
CLICKHOUSE_DATADIR=${CLICKHOUSE_DATADIR:=/var/lib/clickhouse}
CLICKHOUSE_LOGDIR=${CLICKHOUSE_LOGDIR:=/var/log/clickhouse-server}
CLICKHOUSE_BINDIR=${CLICKHOUSE_BINDIR:=/usr/bin}
CLICKHOUSE_GENERIC_PROGRAM=${CLICKHOUSE_GENERIC_PROGRAM:=clickhouse}
EXTRACT_FROM_CONFIG=${CLICKHOUSE_GENERIC_PROGRAM}-extract-from-config
CLICKHOUSE_CONFIG=$CLICKHOUSE_CONFDIR/config.xml
CLICKHOUSE_PIDDIR=/var/run/$PROGRAM
[ -f /usr/share/debconf/confmodule ] && . /usr/share/debconf/confmodule
[ -f /etc/default/clickhouse ] && . /etc/default/clickhouse
if [ ! -f "/etc/debian_version" ]; then
not_deb_os=1
fi
if [ "$1" = configure ] || [ -n "$not_deb_os" ]; then
${CLICKHOUSE_GENERIC_PROGRAM} install --user "${CLICKHOUSE_USER}" --group "${CLICKHOUSE_GROUP}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}" --log-path "${CLICKHOUSE_LOGDIR}" --data-path "${CLICKHOUSE_DATADIR}"
if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then
# if old rc.d service present - remove it
if [ -x "/etc/init.d/clickhouse-server" ] && [ -x "/usr/sbin/update-rc.d" ]; then
/usr/sbin/update-rc.d clickhouse-server remove
fi
/bin/systemctl daemon-reload
/bin/systemctl enable clickhouse-server
else
# If you downgrading to version older than 1.1.54336 run: systemctl disable clickhouse-server
if [ -x "/etc/init.d/clickhouse-server" ]; then
if [ -x "/usr/sbin/update-rc.d" ]; then
/usr/sbin/update-rc.d clickhouse-server defaults 19 19 >/dev/null || exit $?
else
echo # Other OS
fi
fi
fi
fi

View File

@ -0,0 +1,27 @@
[Unit]
Description=ClickHouse Server (analytic DBMS for big data)
Requires=network-online.target
# NOTE: that After/Wants=time-sync.target is not enough, you need to ensure
# that the time was adjusted already, if you use systemd-timesyncd you are
# safe, but if you use ntp or some other daemon, you should configure it
# additionaly.
After=time-sync.target network-online.target
Wants=time-sync.target
[Service]
Type=simple
User=clickhouse
Group=clickhouse
Restart=always
RestartSec=30
RuntimeDirectory=clickhouse-server
ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=/run/clickhouse-server/clickhouse-server.pid
# Minus means that this file is optional.
EnvironmentFile=-/etc/default/clickhouse
LimitCORE=infinity
LimitNOFILE=500000
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE
[Install]
# ClickHouse should not start from the rescue shell (rescue.target).
WantedBy=multi-user.target

View File

@ -0,0 +1,68 @@
# package sources should be placed in ${PWD}/root
# nfpm should run from the same directory with a config
name: "clickhouse-server"
arch: "all"
platform: "linux"
version: "${CLICKHOUSE_VERSION_STRING}"
vendor: "ClickHouse Inc."
homepage: "https://clickhouse.com"
license: "Apache"
section: "database"
priority: "optional"
conflicts:
- clickhouse-keeper
depends:
- adduser
replaces:
- clickhouse-server-common
- clickhouse-server-base
provides:
- clickhouse-server-common
recommends:
- libcap2-bin
maintainer: "ClickHouse Dev Team <packages+linux@clickhouse.com>"
description: |
Server binary for ClickHouse
ClickHouse is a column-oriented database management system
that allows generating analytical data reports in real time.
This package provides clickhouse common configuration files
overrides:
deb:
depends:
- clickhouse-common-static (= ${CLICKHOUSE_VERSION_STRING})
rpm:
depends:
- clickhouse-common-static = ${CLICKHOUSE_VERSION_STRING}
contents:
- src: root/etc/clickhouse-server
dst: /etc/clickhouse-server
type: config
- src: clickhouse-server.init
dst: /etc/init.d/clickhouse-server
- src: clickhouse-server.service
dst: /lib/systemd/system/clickhouse-server.service
- src: root/usr/bin/clickhouse-copier
dst: /usr/bin/clickhouse-copier
- src: clickhouse
dst: /usr/bin/clickhouse-keeper
type: symlink
- src: root/usr/bin/clickhouse-report
dst: /usr/bin/clickhouse-report
- src: root/usr/bin/clickhouse-server
dst: /usr/bin/clickhouse-server
# docs
- src: ../AUTHORS
dst: /usr/share/doc/clickhouse-server/AUTHORS
- src: ../CHANGELOG.md
dst: /usr/share/doc/clickhouse-server/CHANGELOG.md
- src: ../LICENSE
dst: /usr/share/doc/clickhouse-server/LICENSE
- src: ../README.md
dst: /usr/share/doc/clickhouse-server/README.md
scripts:
postinstall: ./clickhouse-server.postinstall

View File

@ -460,10 +460,6 @@ else ()
list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper-converter)
endif ()
if (NOT BUILD_STRIPPED_BINARIES_PREFIX)
install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif()
add_custom_target (clickhouse-bundle ALL DEPENDS ${CLICKHOUSE_BUNDLE})
if (USE_GDB_ADD_INDEX)
@ -474,11 +470,19 @@ else ()
add_custom_command(TARGET clickhouse POST_BUILD COMMAND ./clickhouse hash-binary > hash && ${OBJCOPY_PATH} --add-section .note.ClickHouse.hash=hash clickhouse COMMENT "Adding .note.ClickHouse.hash to clickhouse" VERBATIM)
endif()
if (BUILD_STRIPPED_BINARIES_PREFIX)
clickhouse_strip_binary(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${BUILD_STRIPPED_BINARIES_PREFIX} BINARY_PATH clickhouse)
if (INSTALL_STRIPPED_BINARIES)
clickhouse_strip_binary(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT} BINARY_PATH clickhouse)
else()
install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif()
endif()
if (NOT INSTALL_STRIPPED_BINARIES)
# Install dunny debug directory
# TODO: move logic to every place where clickhouse_strip_binary is used
add_custom_command(TARGET clickhouse POST_BUILD COMMAND echo > .empty )
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/.empty" DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/.empty)
endif()
if (ENABLE_TESTS)

View File

@ -792,9 +792,9 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
fmt::print("Setting capabilities for clickhouse binary. This is optional.\n");
std::string command = fmt::format("command -v setcap >/dev/null"
" && command -v capsh >/dev/null"
" && capsh --has-p=cap_net_admin,cap_ipc_lock,cap_sys_nice+ep >/dev/null 2>&1"
" && setcap 'cap_net_admin,cap_ipc_lock,cap_sys_nice+ep' {0}"
" || echo \"Cannot set 'net_admin' or 'ipc_lock' or 'sys_nice' capability for clickhouse binary."
" && capsh --has-p=cap_net_admin,cap_ipc_lock,cap_sys_nice,cap_net_bind_service+ep >/dev/null 2>&1"
" && setcap 'cap_net_admin,cap_ipc_lock,cap_sys_nice,cap_net_bind_service+ep' {0}"
" || echo \"Cannot set 'net_admin' or 'ipc_lock' or 'sys_nice' or 'net_bind_service' capability for clickhouse binary."
" This is optional. Taskstats accounting will be disabled."
" To enable taskstats accounting you may add the required capability later manually.\"",
fs::canonical(main_bin_path).string());

View File

@ -24,10 +24,8 @@ target_link_libraries(clickhouse-library-bridge PRIVATE
set_target_properties(clickhouse-library-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..)
if (BUILD_STRIPPED_BINARIES_PREFIX)
clickhouse_strip_binary(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${BUILD_STRIPPED_BINARIES_PREFIX} BINARY_PATH ../clickhouse-library-bridge)
endif()
if (NOT BUILD_STRIPPED_BINARIES_PREFIX)
if (INSTALL_STRIPPED_BINARIES)
clickhouse_strip_binary(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-library-bridge)
else()
install(TARGETS clickhouse-library-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif()

View File

@ -39,11 +39,9 @@ if (USE_GDB_ADD_INDEX)
add_custom_command(TARGET clickhouse-odbc-bridge POST_BUILD COMMAND ${GDB_ADD_INDEX_EXE} ../clickhouse-odbc-bridge COMMENT "Adding .gdb-index to clickhouse-odbc-bridge" VERBATIM)
endif()
if (BUILD_STRIPPED_BINARIES_PREFIX)
clickhouse_strip_binary(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${BUILD_STRIPPED_BINARIES_PREFIX} BINARY_PATH ../clickhouse-odbc-bridge)
endif()
if (NOT BUILD_STRIPPED_BINARIES_PREFIX)
if (INSTALL_STRIPPED_BINARIES)
clickhouse_strip_binary(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-odbc-bridge)
else()
install(TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif()

View File

@ -45,6 +45,7 @@
#include <Core/ServerUUID.h>
#include <IO/HTTPCommon.h>
#include <IO/ReadHelpers.h>
#include <IO/IOThreadPool.h>
#include <IO/UseSSL.h>
#include <Interpreters/AsynchronousMetrics.h>
#include <Interpreters/DDLWorker.h>
@ -554,6 +555,10 @@ if (ThreadFuzzer::instance().isEffective())
config().getUInt("thread_pool_queue_size", 10000)
);
IOThreadPool::initialize(
config().getUInt("max_io_thread_pool_size", 100),
config().getUInt("max_io_thread_pool_free_size", 0),
config().getUInt("io_thread_pool_queue_size", 10000));
/// Initialize global local cache for remote filesystem.
if (config().has("local_cache_for_remote_fs"))
@ -1022,8 +1027,8 @@ if (ThreadFuzzer::instance().isEffective())
std::make_unique<TCPServer>(
new KeeperTCPHandlerFactory(
config_getter, global_context->getKeeperDispatcher(),
global_context->getSettingsRef().receive_timeout,
global_context->getSettingsRef().send_timeout,
global_context->getSettingsRef().receive_timeout.totalSeconds(),
global_context->getSettingsRef().send_timeout.totalSeconds(),
false), server_pool, socket));
});
@ -1045,8 +1050,8 @@ if (ThreadFuzzer::instance().isEffective())
std::make_unique<TCPServer>(
new KeeperTCPHandlerFactory(
config_getter, global_context->getKeeperDispatcher(),
global_context->getSettingsRef().receive_timeout,
global_context->getSettingsRef().send_timeout, true), server_pool, socket));
global_context->getSettingsRef().receive_timeout.totalSeconds(),
global_context->getSettingsRef().send_timeout.totalSeconds(), true), server_pool, socket));
#else
UNUSED(port);
throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.",

View File

@ -85,7 +85,9 @@ size_t extractMaskNumericImpl(
{
size_t ones_count = 0;
size_t data_index = 0;
for (size_t i = 0; i != mask.size(); ++i)
size_t mask_size = mask.size();
for (size_t i = 0; i != mask_size; ++i)
{
// Change mask only where value is 1.
if (!mask[i])

View File

@ -113,5 +113,35 @@ public:
}
};
class SynchronizedArenaWithFreeLists : private ArenaWithFreeLists
{
public:
explicit SynchronizedArenaWithFreeLists(
const size_t initial_size = 4096, const size_t growth_factor = 2,
const size_t linear_growth_threshold = 128 * 1024 * 1024)
: ArenaWithFreeLists{initial_size, growth_factor, linear_growth_threshold}
{}
char * alloc(const size_t size)
{
std::lock_guard lock{mutex};
return ArenaWithFreeLists::alloc(size);
}
void free(char * ptr, const size_t size)
{
std::lock_guard lock{mutex};
return ArenaWithFreeLists::free(ptr, size);
}
/// Size of the allocated pool in bytes
size_t size() const
{
std::lock_guard lock{mutex};
return ArenaWithFreeLists::size();
}
private:
mutable std::mutex mutex;
};
}

View File

@ -25,7 +25,6 @@
#include <Common/Dwarf.h>
#include <Common/Exception.h>
#define DW_CHILDREN_no 0
#define DW_FORM_addr 1
#define DW_FORM_block1 0x0a
@ -125,7 +124,7 @@ template <typename T>
requires std::is_trivial_v<T> && std::is_standard_layout_v<T>
T read(std::string_view & sp)
{
SAFE_CHECK(sp.size() >= sizeof(T), "underflow");
SAFE_CHECK(sp.size() >= sizeof(T), fmt::format("underflow: expected bytes {}, got bytes {}", sizeof(T), sp.size()));
T x;
memcpy(&x, sp.data(), sizeof(T));
sp.remove_prefix(sizeof(T));
@ -690,7 +689,7 @@ bool Dwarf::findDebugInfoOffset(uintptr_t address, std::string_view aranges, uin
Dwarf::Die Dwarf::getDieAtOffset(const CompilationUnit & cu, uint64_t offset) const
{
SAFE_CHECK(offset < info_.size(), "unexpected offset");
SAFE_CHECK(offset < info_.size(), fmt::format("unexpected offset {}, info size {}", offset, info_.size()));
Die die;
std::string_view sp{info_.data() + offset, cu.offset + cu.size - offset};
die.offset = offset;
@ -708,19 +707,6 @@ Dwarf::Die Dwarf::getDieAtOffset(const CompilationUnit & cu, uint64_t offset) co
return die;
}
Dwarf::Die Dwarf::findDefinitionDie(const CompilationUnit & cu, const Die & die) const
{
// Find the real definition instead of declaration.
// DW_AT_specification: Incomplete, non-defining, or separate declaration
// corresponding to a declaration
auto offset = getAttribute<uint64_t>(cu, die, DW_AT_specification);
if (!offset)
{
return die;
}
return getDieAtOffset(cu, cu.offset + offset.value());
}
/**
* Find the @locationInfo for @address in the compilation unit represented
* by the @sp .debug_info entry.
@ -861,7 +847,10 @@ bool Dwarf::findLocation(
SymbolizedFrame inline_frame;
inline_frame.found = true;
inline_frame.addr = address;
inline_frame.name = call_location.name.data();
if (!call_location.name.empty())
inline_frame.name = call_location.name.data();
else
inline_frame.name = nullptr;
inline_frame.location.has_file_and_line = true;
inline_frame.location.file = call_location.file;
inline_frame.location.line = call_location.line;
@ -1034,17 +1023,54 @@ void Dwarf::findInlinedSubroutineDieForAddress(
location.file = line_vm.getFullFileName(*call_file);
location.line = *call_line;
/// Something wrong with receiving debug info about inline.
/// If set to true we stop parsing DWARF.
bool die_for_inline_broken = false;
auto get_function_name = [&](const CompilationUnit & srcu, uint64_t die_offset)
{
auto decl_die = getDieAtOffset(srcu, die_offset);
Die decl_die = getDieAtOffset(srcu, die_offset);
auto & die_to_look_for_name = decl_die;
Die def_die;
// Jump to the actual function definition instead of declaration for name
// and line info.
auto def_die = findDefinitionDie(srcu, decl_die);
// DW_AT_specification: Incomplete, non-defining, or separate declaration
// corresponding to a declaration
auto offset = getAttribute<uint64_t>(srcu, decl_die, DW_AT_specification);
if (offset)
{
/// FIXME: actually it's a bug in our DWARF parser.
///
/// Most of the times compilation unit offset (srcu.offset) is some big number inside .debug_info (like 434782255).
/// Offset of DIE definition is some small relative number to srcu.offset (like 3518).
/// However in some unknown cases offset looks like global, non relative number (like 434672579) and in this
/// case we obviously doing something wrong parsing DWARF.
///
/// What is important -- this bug? reproduces only with -flto=thin in release mode.
/// Also llvm-dwarfdump --verify ./clickhouse says that our DWARF is ok, so it's another prove
/// that we just doing something wrong.
///
/// FIXME: Currently we just give up parsing DWARF for inlines when we got into this situation.
if (srcu.offset + offset.value() >= info_.size())
{
die_for_inline_broken = true;
}
else
{
def_die = getDieAtOffset(srcu, srcu.offset + offset.value());
die_to_look_for_name = def_die;
}
}
std::string_view name;
if (die_for_inline_broken)
return name;
// The file and line will be set in the next inline subroutine based on
// its DW_AT_call_file and DW_AT_call_line.
forEachAttribute(srcu, def_die, [&](const Attribute & attr)
forEachAttribute(srcu, die_to_look_for_name, [&](const Attribute & attr)
{
switch (attr.spec.name)
{
@ -1083,6 +1109,10 @@ void Dwarf::findInlinedSubroutineDieForAddress(
? get_function_name(cu, cu.offset + *abstract_origin)
: get_function_name(findCompilationUnit(info_, *abstract_origin), *abstract_origin);
/// FIXME: see comment above
if (die_for_inline_broken)
return false;
locations.push_back(location);
findInlinedSubroutineDieForAddress(cu, child_die, line_vm, address, base_addr_cu, locations, max_size);

View File

@ -260,11 +260,6 @@ private:
/** cu must exist during the life cycle of created detail::Die. */
Die getDieAtOffset(const CompilationUnit & cu, uint64_t offset) const;
/**
* Find the actual definition DIE instead of declaration for the given die.
*/
Die findDefinitionDie(const CompilationUnit & cu, const Die & die) const;
bool findLocation(
uintptr_t address,
LocationInfoMode mode,

View File

@ -127,7 +127,14 @@ PoolWithFailover::Entry PoolWithFailover::get()
/// If we cannot connect to some replica due to pool overflow, than we will wait and connect.
PoolPtr * full_pool = nullptr;
std::map<std::string, std::tuple<std::string, int>> error_detail;
struct ErrorDetail
{
int code;
std::string description;
};
std::unordered_map<std::string, ErrorDetail> replica_name_to_error_detail;
for (size_t try_no = 0; try_no < max_tries; ++try_no)
{
@ -161,15 +168,8 @@ PoolWithFailover::Entry PoolWithFailover::get()
}
app.logger().warning("Connection to " + pool->getDescription() + " failed: " + e.displayText());
//save all errors to error_detail
if (error_detail.contains(pool->getDescription()))
{
error_detail[pool->getDescription()] = {e.displayText(), e.code()};
}
else
{
error_detail.insert({pool->getDescription(), {e.displayText(), e.code()}});
}
replica_name_to_error_detail.insert_or_assign(pool->getDescription(), ErrorDetail{e.code(), e.displayText()});
continue;
}
@ -189,15 +189,19 @@ PoolWithFailover::Entry PoolWithFailover::get()
DB::WriteBufferFromOwnString message;
message << "Connections to all replicas failed: ";
for (auto it = replicas_by_priority.begin(); it != replicas_by_priority.end(); ++it)
{
for (auto jt = it->second.begin(); jt != it->second.end(); ++jt)
{
message << (it == replicas_by_priority.begin() && jt == it->second.begin() ? "" : ", ") << (*jt)->getDescription();
if (error_detail.contains((*jt)->getDescription()))
if (auto error_detail_it = replica_name_to_error_detail.find(((*jt)->getDescription()));
error_detail_it != replica_name_to_error_detail.end())
{
std::tuple<std::string, int> error_and_code = error_detail[(*jt)->getDescription()];
message << ", ERROR " << std::get<1>(error_and_code) << " : " << std::get<0>(error_and_code);
const auto & [code, description] = error_detail_it->second;
message << ", ERROR " << code << " : " << description;
}
}
}
throw Poco::Exception(message.str());
}

View File

@ -47,6 +47,8 @@ class IColumn;
M(UInt64, max_insert_delayed_streams_for_parallel_write, 0, "The maximum number of streams (columns) to delay final part flush. Default - auto (1000 in case of underlying storage supports parallel write, for example S3 and disabled otherwise)", 0) \
M(UInt64, max_final_threads, 16, "The maximum number of threads to read from table with FINAL.", 0) \
M(MaxThreads, max_threads, 0, "The maximum number of threads to execute the request. By default, it is determined automatically.", 0) \
M(MaxThreads, max_download_threads, 4, "The maximum number of threads to download data (e.g. for URL engine).", 0) \
M(UInt64, max_download_buffer_size, 10*1024*1024, "The maximal size of buffer for parallel downloading (e.g. for URL engine) per each thread.", 0) \
M(UInt64, max_read_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the buffer to read from the filesystem.", 0) \
M(UInt64, max_distributed_connections, 1024, "The maximum number of connections for distributed processing of one query (should be greater than max_threads).", 0) \
M(UInt64, max_query_size, DBMS_DEFAULT_MAX_QUERY_SIZE, "Which part of the query can be read into RAM for parsing (the remaining data for INSERT, if any, is read later)", 0) \

View File

@ -75,7 +75,7 @@ private:
writeChar(':', out);
writeIntText(location.line, out);
if (frame)
if (frame && frame->name != nullptr)
{
writeChar(':', out);
int status = 0;

View File

@ -894,13 +894,20 @@ private:
/// If then is NULL, we create Nullable column with null mask OR-ed with condition.
if (then_is_null)
{
ColumnPtr arg_else_column;
/// In case when arg_else column type differs with result
/// column type we should cast it to result type.
if (removeNullable(arg_else.type)->getName() != removeNullable(result_type)->getName())
arg_else_column = castColumn(arg_else, result_type);
else
arg_else_column = arg_else.column;
if (cond_col)
{
auto arg_else_column = arg_else.column;
auto result_column = IColumn::mutate(std::move(arg_else_column));
if (else_is_short)
result_column->expand(cond_col->getData(), true);
if (isColumnNullable(*arg_else.column))
if (isColumnNullable(*result_column))
{
assert_cast<ColumnNullable &>(*result_column).applyNullMap(assert_cast<const ColumnUInt8 &>(*arg_cond.column));
return result_column;
@ -913,7 +920,7 @@ private:
if (cond_const_col->getValue<UInt8>())
return result_type->createColumn()->cloneResized(input_rows_count);
else
return makeNullableColumnIfNot(arg_else.column);
return makeNullableColumnIfNot(arg_else_column);
}
else
throw Exception("Illegal column " + arg_cond.column->getName() + " of first argument of function " + getName()
@ -924,14 +931,21 @@ private:
/// If else is NULL, we create Nullable column with null mask OR-ed with negated condition.
if (else_is_null)
{
ColumnPtr arg_then_column;
/// In case when arg_then column type differs with result
/// column type we should cast it to result type.
if (removeNullable(arg_then.type)->getName() != removeNullable(result_type)->getName())
arg_then_column = castColumn(arg_then, result_type);
else
arg_then_column = arg_then.column;
if (cond_col)
{
auto arg_then_column = arg_then.column;
auto result_column = IColumn::mutate(std::move(arg_then_column));
if (then_is_short)
result_column->expand(cond_col->getData(), false);
if (isColumnNullable(*arg_then.column))
if (isColumnNullable(*result_column))
{
assert_cast<ColumnNullable &>(*result_column).applyNegatedNullMap(assert_cast<const ColumnUInt8 &>(*arg_cond.column));
return result_column;
@ -954,7 +968,7 @@ private:
else if (cond_const_col)
{
if (cond_const_col->getValue<UInt8>())
return makeNullableColumnIfNot(arg_then.column);
return makeNullableColumnIfNot(arg_then_column);
else
return result_type->createColumn()->cloneResized(input_rows_count);
}

View File

@ -131,8 +131,10 @@ public:
message.value_or("Value passed to '" + getName() + "' function is non zero"));
}
size_t result_size = in_untyped->size();
/// We return non constant to avoid constant folding.
return ColumnUInt8::create(in_data.size(), 0);
return ColumnUInt8::create(result_size, 0);
}
return nullptr;

34
src/IO/IOThreadPool.cpp Normal file
View File

@ -0,0 +1,34 @@
#include <IO/IOThreadPool.h>
#include "Core/Field.h"
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
std::unique_ptr<ThreadPool> IOThreadPool::instance;
void IOThreadPool::initialize(size_t max_threads, size_t max_free_threads, size_t queue_size)
{
if (instance)
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "The IO thread pool is initialized twice");
}
instance = std::make_unique<ThreadPool>(max_threads, max_free_threads, queue_size, false /*shutdown_on_exception*/);
}
ThreadPool & IOThreadPool::get()
{
if (!instance)
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "The IO thread pool is not initialized");
}
return *instance;
}
}

20
src/IO/IOThreadPool.h Normal file
View File

@ -0,0 +1,20 @@
#pragma once
#include <Common/ThreadPool.h>
namespace DB
{
/*
* ThreadPool used for the IO.
*/
class IOThreadPool
{
static std::unique_ptr<ThreadPool> instance;
public:
static void initialize(size_t max_threads, size_t max_free_threads, size_t queue_size);
static ThreadPool & get();
};
}

View File

@ -0,0 +1,290 @@
#include <IO/ParallelReadBuffer.h>
#include <base/logger_useful.h>
#include <Poco/Logger.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int CANNOT_SEEK_THROUGH_FILE;
extern const int SEEK_POSITION_OUT_OF_BOUND;
}
ParallelReadBuffer::ParallelReadBuffer(
std::unique_ptr<ReadBufferFactory> reader_factory_,
ThreadPool * pool_,
size_t max_working_readers_,
WorkerSetup worker_setup_,
WorkerCleanup worker_cleanup_)
: SeekableReadBufferWithSize(nullptr, 0)
, pool(pool_)
, max_working_readers(max_working_readers_)
, reader_factory(std::move(reader_factory_))
, worker_setup(std::move(worker_setup_))
, worker_cleanup(std::move(worker_cleanup_))
{
std::unique_lock<std::mutex> lock{mutex};
addReaders(lock);
}
bool ParallelReadBuffer::addReaderToPool(std::unique_lock<std::mutex> & /*buffer_lock*/)
{
auto reader = reader_factory->getReader();
if (!reader)
{
return false;
}
auto worker = read_workers.emplace_back(std::make_shared<ReadWorker>(std::move(reader)));
pool->scheduleOrThrow(
[&, this, worker = std::move(worker)]() mutable
{
ThreadStatus thread_status;
{
std::lock_guard lock{mutex};
++active_working_reader;
}
SCOPE_EXIT({
worker_cleanup(thread_status);
std::lock_guard lock{mutex};
--active_working_reader;
if (active_working_reader == 0)
{
readers_done.notify_all();
}
});
worker_setup(thread_status);
readerThreadFunction(std::move(worker));
});
return true;
}
void ParallelReadBuffer::addReaders(std::unique_lock<std::mutex> & buffer_lock)
{
while (read_workers.size() < max_working_readers && addReaderToPool(buffer_lock))
;
}
off_t ParallelReadBuffer::seek(off_t offset, int whence)
{
if (whence != SEEK_SET)
throw Exception("Only SEEK_SET mode is allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
if (offset < 0)
throw Exception("Seek position is out of bounds. Offset: " + std::to_string(offset), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND);
if (!working_buffer.empty() && static_cast<size_t>(offset) >= current_position - working_buffer.size() && offset < current_position)
{
pos = working_buffer.end() - (current_position - offset);
assert(pos >= working_buffer.begin());
assert(pos <= working_buffer.end());
return offset;
}
std::unique_lock lock{mutex};
const auto offset_is_in_range
= [&](const auto & range) { return static_cast<size_t>(offset) >= range.left && static_cast<size_t>(offset) <= *range.right; };
while (!read_workers.empty() && (offset < current_position || !offset_is_in_range(read_workers.front()->range)))
{
read_workers.front()->cancel = true;
read_workers.pop_front();
}
if (!read_workers.empty())
{
auto & front_worker = read_workers.front();
auto & segments = front_worker->segments;
current_position = front_worker->range.left;
while (true)
{
next_condvar.wait(lock, [&] { return emergency_stop || !segments.empty(); });
if (emergency_stop)
handleEmergencyStop();
auto next_segment = front_worker->nextSegment();
if (static_cast<size_t>(offset) < current_position + next_segment.size())
{
current_segment = std::move(next_segment);
working_buffer = internal_buffer = Buffer(current_segment.data(), current_segment.data() + current_segment.size());
current_position += current_segment.size();
pos = working_buffer.end() - (current_position - offset);
addReaders(lock);
return offset;
}
current_position += next_segment.size();
}
}
lock.unlock();
finishAndWait();
reader_factory->seek(offset, whence);
all_completed = false;
read_workers.clear();
current_position = offset;
resetWorkingBuffer();
emergency_stop = false;
lock.lock();
addReaders(lock);
return offset;
}
std::optional<size_t> ParallelReadBuffer::getTotalSize()
{
std::lock_guard lock{mutex};
return reader_factory->getTotalSize();
}
off_t ParallelReadBuffer::getPosition()
{
return current_position - available();
}
bool ParallelReadBuffer::currentWorkerReady() const
{
assert(!read_workers.empty());
return read_workers.front()->finished || !read_workers.front()->segments.empty();
}
bool ParallelReadBuffer::currentWorkerCompleted() const
{
assert(!read_workers.empty());
return read_workers.front()->finished && read_workers.front()->segments.empty();
}
void ParallelReadBuffer::handleEmergencyStop()
{
// this can only be called from the main thread when there is an exception
assert(background_exception);
if (background_exception)
std::rethrow_exception(background_exception);
}
bool ParallelReadBuffer::nextImpl()
{
if (all_completed)
return false;
while (true)
{
std::unique_lock lock(mutex);
next_condvar.wait(
lock,
[this]()
{
/// Check if no more readers left or current reader can be processed
return emergency_stop || currentWorkerReady();
});
bool worker_removed = false;
/// Remove completed units
while (!read_workers.empty() && currentWorkerCompleted() && !emergency_stop)
{
read_workers.pop_front();
worker_removed = true;
}
if (emergency_stop)
handleEmergencyStop();
if (worker_removed)
addReaders(lock);
/// All readers processed, stop
if (read_workers.empty())
{
all_completed = true;
return false;
}
auto & front_worker = read_workers.front();
/// Read data from first segment of the first reader
if (!front_worker->segments.empty())
{
current_segment = front_worker->nextSegment();
if (currentWorkerCompleted())
{
read_workers.pop_front();
all_completed = !addReaderToPool(lock) && read_workers.empty();
}
break;
}
}
working_buffer = internal_buffer = Buffer(current_segment.data(), current_segment.data() + current_segment.size());
current_position += working_buffer.size();
return true;
}
void ParallelReadBuffer::readerThreadFunction(ReadWorkerPtr read_worker)
{
try
{
while (!emergency_stop && !read_worker->cancel)
{
if (!read_worker->reader->next())
throw Exception("Failed to read all the data from the reader", ErrorCodes::LOGICAL_ERROR);
if (emergency_stop || read_worker->cancel)
break;
Buffer buffer = read_worker->reader->buffer();
size_t bytes_to_copy = std::min(buffer.size(), read_worker->bytes_left);
Segment new_segment(bytes_to_copy, &arena);
memcpy(new_segment.data(), buffer.begin(), bytes_to_copy);
read_worker->reader->ignore(bytes_to_copy);
read_worker->bytes_left -= bytes_to_copy;
{
/// New data ready to be read
std::lock_guard lock(mutex);
read_worker->segments.emplace_back(std::move(new_segment));
read_worker->finished = read_worker->bytes_left == 0;
next_condvar.notify_all();
}
if (read_worker->finished)
{
break;
}
}
}
catch (...)
{
onBackgroundException();
}
}
void ParallelReadBuffer::onBackgroundException()
{
std::lock_guard lock(mutex);
if (!background_exception)
{
background_exception = std::current_exception();
}
emergency_stop = true;
next_condvar.notify_all();
}
void ParallelReadBuffer::finishAndWait()
{
emergency_stop = true;
std::unique_lock lock{mutex};
readers_done.wait(lock, [&] { return active_working_reader == 0; });
}
}

174
src/IO/ParallelReadBuffer.h Normal file
View File

@ -0,0 +1,174 @@
#pragma once
#include <IO/BufferWithOwnMemory.h>
#include <IO/ReadBuffer.h>
#include <IO/SeekableReadBuffer.h>
#include <Common/ArenaWithFreeLists.h>
#include <Common/ThreadPool.h>
namespace DB
{
/**
* Reads from multiple ReadBuffers in parallel.
* Preserves order of readers obtained from ReadBufferFactory.
*
* It consumes multiple readers and yields data from them in order as it passed.
* Each working reader save segments of data to internal queue.
*
* ParallelReadBuffer in nextImpl method take first available segment from first reader in deque and fed it to user.
* When first reader finish reading, they will be removed from worker deque and data from next reader consumed.
*
* Number of working readers limited by max_working_readers.
*/
class ParallelReadBuffer : public SeekableReadBufferWithSize
{
private:
/// Blocks until data occurred in the first reader or this reader indicate finishing
/// Finished readers removed from queue and data from next readers processed
bool nextImpl() override;
class Segment : private boost::noncopyable
{
public:
Segment(size_t size_, SynchronizedArenaWithFreeLists * arena_) : arena(arena_), m_data(arena->alloc(size_)), m_size(size_) { }
Segment() = default;
Segment(Segment && other) noexcept : arena(other.arena)
{
std::swap(m_data, other.m_data);
std::swap(m_size, other.m_size);
}
Segment & operator=(Segment && other) noexcept
{
arena = other.arena;
std::swap(m_data, other.m_data);
std::swap(m_size, other.m_size);
return *this;
}
~Segment()
{
if (m_data)
{
arena->free(m_data, m_size);
}
}
auto data() const noexcept { return m_data; }
auto size() const noexcept { return m_size; }
private:
SynchronizedArenaWithFreeLists * arena{nullptr};
char * m_data{nullptr};
size_t m_size{0};
};
public:
class ReadBufferFactory
{
public:
virtual SeekableReadBufferPtr getReader() = 0;
virtual ~ReadBufferFactory() = default;
virtual off_t seek(off_t off, int whence) = 0;
virtual std::optional<size_t> getTotalSize() = 0;
};
using WorkerSetup = std::function<void(ThreadStatus &)>;
using WorkerCleanup = std::function<void(ThreadStatus &)>;
explicit ParallelReadBuffer(
std::unique_ptr<ReadBufferFactory> reader_factory_,
ThreadPool * pool,
size_t max_working_readers,
WorkerSetup worker_setup = {},
WorkerCleanup worker_cleanup = {});
~ParallelReadBuffer() override { finishAndWait(); }
off_t seek(off_t off, int whence) override;
std::optional<size_t> getTotalSize() override;
off_t getPosition() override;
private:
/// Reader in progress with a list of read segments
struct ReadWorker
{
explicit ReadWorker(SeekableReadBufferPtr reader_) : reader(std::move(reader_)), range(reader->getRemainingReadRange())
{
assert(range.right);
bytes_left = *range.right - range.left + 1;
}
Segment nextSegment()
{
assert(!segments.empty());
auto next_segment = std::move(segments.front());
segments.pop_front();
range.left += next_segment.size();
return next_segment;
}
SeekableReadBufferPtr reader;
std::deque<Segment> segments;
bool finished{false};
SeekableReadBuffer::Range range;
size_t bytes_left{0};
std::atomic_bool cancel{false};
};
using ReadWorkerPtr = std::shared_ptr<ReadWorker>;
/// First worker in deque have new data or processed all available amount
bool currentWorkerReady() const;
/// First worker in deque processed and flushed all data
bool currentWorkerCompleted() const;
void handleEmergencyStop();
void addReaders(std::unique_lock<std::mutex> & buffer_lock);
bool addReaderToPool(std::unique_lock<std::mutex> & buffer_lock);
/// Process read_worker, read data and save into internal segments queue
void readerThreadFunction(ReadWorkerPtr read_worker);
void onBackgroundException();
void finishAndWait();
SynchronizedArenaWithFreeLists arena;
Segment current_segment;
ThreadPool * pool;
size_t max_working_readers;
size_t active_working_reader{0};
// Triggered when all reader workers are done
std::condition_variable readers_done;
std::unique_ptr<ReadBufferFactory> reader_factory;
WorkerSetup worker_setup;
WorkerCleanup worker_cleanup;
/**
* FIFO queue of readers.
* Each worker contains reader itself and downloaded segments.
* When reader read all available data it will be removed from
* deque and data from next reader will be consumed to user.
*/
std::deque<ReadWorkerPtr> read_workers;
std::mutex mutex;
/// Triggered when new data available
std::condition_variable next_condvar;
std::exception_ptr background_exception = nullptr;
std::atomic_bool emergency_stop{false};
off_t current_position{0};
bool all_completed{false};
};
}

View File

@ -1,32 +1,33 @@
#pragma once
#include <functional>
#include <base/types.h>
#include <base/sleep.h>
#include <IO/ConnectionTimeouts.h>
#include <IO/HTTPCommon.h>
#include <IO/ParallelReadBuffer.h>
#include <IO/ReadBuffer.h>
#include <IO/ReadBufferFromIStream.h>
#include <IO/ReadHelpers.h>
#include <IO/ReadSettings.h>
#include <base/logger_useful.h>
#include <base/sleep.h>
#include <base/types.h>
#include <Poco/Any.h>
#include <Poco/Net/HTTPBasicCredentials.h>
#include <Poco/Net/HTTPClientSession.h>
#include <Poco/Net/HTTPRequest.h>
#include <Poco/Net/HTTPResponse.h>
#include <Poco/URI.h>
#include <Poco/URIStreamFactory.h>
#include <Poco/Version.h>
#include <Common/DNSResolver.h>
#include <Common/RemoteHostFilter.h>
#include <Common/config.h>
#include <Common/config_version.h>
#include <base/logger_useful.h>
#include <Poco/URIStreamFactory.h>
namespace ProfileEvents
{
extern const Event ReadBufferSeekCancelConnection;
extern const Event ReadBufferSeekCancelConnection;
}
namespace DB
@ -48,7 +49,7 @@ class UpdatableSessionBase
{
protected:
SessionPtr session;
UInt64 redirects { 0 };
UInt64 redirects{0};
Poco::URI initial_uri;
ConnectionTimeouts timeouts;
UInt64 max_redirects;
@ -56,19 +57,12 @@ protected:
public:
virtual void buildNewSession(const Poco::URI & uri) = 0;
explicit UpdatableSessionBase(const Poco::URI uri,
const ConnectionTimeouts & timeouts_,
UInt64 max_redirects_)
: initial_uri { uri }
, timeouts { timeouts_ }
, max_redirects { max_redirects_ }
explicit UpdatableSessionBase(const Poco::URI uri, const ConnectionTimeouts & timeouts_, UInt64 max_redirects_)
: initial_uri{uri}, timeouts{timeouts_}, max_redirects{max_redirects_}
{
}
SessionPtr getSession()
{
return session;
}
SessionPtr getSession() { return session; }
void updateSession(const Poco::URI & uri)
{
@ -99,7 +93,7 @@ namespace detail
/// HTTP range, including right bound [begin, end].
struct Range
{
size_t begin = 0;
std::optional<size_t> begin;
std::optional<size_t> end;
};
@ -144,10 +138,9 @@ namespace detail
return read_range.begin || read_range.end || retry_with_range_header;
}
size_t getOffset() const
{
return read_range.begin + offset_from_begin_pos;
}
size_t getRangeBegin() const { return read_range.begin.value_or(0); }
size_t getOffset() const { return getRangeBegin() + offset_from_begin_pos; }
std::istream * callImpl(Poco::URI uri_, Poco::Net::HTTPResponse & response, const std::string & method_)
{
@ -161,7 +154,7 @@ namespace detail
if (out_stream_callback)
request.setChunkedTransferEncoding(true);
for (auto & http_header_entry: http_header_entries)
for (auto & http_header_entry : http_header_entries)
request.set(std::get<0>(http_header_entry), std::get<1>(http_header_entry));
if (withPartialContent())
@ -207,26 +200,14 @@ namespace detail
std::optional<size_t> getTotalSize() override
{
if (read_range.end)
return *read_range.end - read_range.begin;
return *read_range.end - getRangeBegin();
Poco::Net::HTTPResponse response;
for (size_t i = 0; i < 10; ++i)
{
try
{
call(response, Poco::Net::HTTPRequest::HTTP_HEAD);
while (isRedirect(response.getStatus()))
{
Poco::URI uri_redirect(response.get("Location"));
if (remote_host_filter)
remote_host_filter->checkURL(uri_redirect);
session->updateSession(uri_redirect);
istr = callImpl(uri_redirect, response, method);
}
callWithRedirects(response, Poco::Net::HTTPRequest::HTTP_HEAD);
break;
}
catch (const Poco::Exception & e)
@ -236,7 +217,7 @@ namespace detail
}
if (response.hasContentLength())
read_range.end = read_range.begin + response.getContentLength();
read_range.end = getRangeBegin() + response.getContentLength();
return read_range.end;
}
@ -252,6 +233,21 @@ namespace detail
InitializeError initialization_error = InitializeError::NONE;
private:
void setupExternalBuffer()
{
/**
* use_external_buffer -- means we read into the buffer which
* was passed to us from somewhere else. We do not check whether
* previously returned buffer was read or not (no hasPendingData() check is needed),
* because this branch means we are prefetching data,
* each nextImpl() call we can fill a different buffer.
*/
impl->set(internal_buffer.begin(), internal_buffer.size());
assert(working_buffer.begin() != nullptr);
assert(!internal_buffer.empty());
}
public:
using NextCallback = std::function<void(size_t)>;
using OutStreamCallback = std::function<void(std::ostream &)>;
@ -276,7 +272,7 @@ namespace detail
, session {session_}
, out_stream_callback {out_stream_callback_}
, credentials {credentials_}
, http_header_entries {http_header_entries_}
, http_header_entries {std::move(http_header_entries_)}
, remote_host_filter {remote_host_filter_}
, buffer_size {buffer_size_}
, use_external_buffer {use_external_buffer_}
@ -287,18 +283,21 @@ namespace detail
{
if (settings.http_max_tries <= 0 || settings.http_retry_initial_backoff_ms <= 0
|| settings.http_retry_initial_backoff_ms >= settings.http_retry_max_backoff_ms)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Invalid setting for http backoff, "
"must be http_max_tries >= 1 (current is {}) and "
"0 < http_retry_initial_backoff_ms < settings.http_retry_max_backoff_ms (now 0 < {} < {})",
settings.http_max_tries, settings.http_retry_initial_backoff_ms, settings.http_retry_max_backoff_ms);
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Invalid setting for http backoff, "
"must be http_max_tries >= 1 (current is {}) and "
"0 < http_retry_initial_backoff_ms < settings.http_retry_max_backoff_ms (now 0 < {} < {})",
settings.http_max_tries,
settings.http_retry_initial_backoff_ms,
settings.http_retry_max_backoff_ms);
// Configure User-Agent if it not already set.
const std::string user_agent = "User-Agent";
auto iter = std::find_if(http_header_entries.begin(), http_header_entries.end(), [&user_agent](const HTTPHeaderEntry & entry)
{
return std::get<0>(entry) == user_agent;
});
auto iter = std::find_if(
http_header_entries.begin(),
http_header_entries.end(),
[&user_agent](const HTTPHeaderEntry & entry) { return std::get<0>(entry) == user_agent; });
if (iter == http_header_entries.end())
{
@ -313,7 +312,36 @@ namespace detail
}
}
void call(Poco::Net::HTTPResponse & response, const String & method_)
static bool isRetriableError(const Poco::Net::HTTPResponse::HTTPStatus http_status) noexcept
{
constexpr std::array non_retriable_errors{
Poco::Net::HTTPResponse::HTTPStatus::HTTP_BAD_REQUEST,
Poco::Net::HTTPResponse::HTTPStatus::HTTP_UNAUTHORIZED,
Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND,
Poco::Net::HTTPResponse::HTTPStatus::HTTP_FORBIDDEN,
Poco::Net::HTTPResponse::HTTPStatus::HTTP_METHOD_NOT_ALLOWED};
return std::all_of(
non_retriable_errors.begin(), non_retriable_errors.end(), [&](const auto status) { return http_status != status; });
}
void callWithRedirects(Poco::Net::HTTPResponse & response, const String & method_, bool throw_on_all_errors = false)
{
call(response, method_, throw_on_all_errors);
while (isRedirect(response.getStatus()))
{
Poco::URI uri_redirect(response.get("Location"));
if (remote_host_filter)
remote_host_filter->checkURL(uri_redirect);
session->updateSession(uri_redirect);
istr = callImpl(uri_redirect, response, method);
}
}
void call(Poco::Net::HTTPResponse & response, const String & method_, bool throw_on_all_errors = false)
{
try
{
@ -321,18 +349,18 @@ namespace detail
}
catch (...)
{
if (throw_on_all_errors)
{
throw;
}
auto http_status = response.getStatus();
if (http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND
&& http_skip_not_found_url)
if (http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND && http_skip_not_found_url)
{
initialization_error = InitializeError::SKIP_NOT_FOUND_URL;
}
else if (http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_BAD_REQUEST
|| http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_UNAUTHORIZED
|| http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND
|| http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_FORBIDDEN
|| http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_METHOD_NOT_ALLOWED)
else if (!isRetriableError(http_status))
{
initialization_error = InitializeError::NON_RETRIABLE_ERROR;
exception = std::current_exception();
@ -372,12 +400,14 @@ namespace detail
if (withPartialContent() && response.getStatus() != Poco::Net::HTTPResponse::HTTPStatus::HTTP_PARTIAL_CONTENT)
{
/// Having `200 OK` instead of `206 Partial Content` is acceptable in case we retried with range.begin == 0.
if (read_range.begin)
if (read_range.begin && *read_range.begin != 0)
{
if (!exception)
exception = std::make_exception_ptr(
Exception(ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE,
"Cannot read with range: [{}, {}]", read_range.begin, read_range.end ? *read_range.end : '-'));
exception = std::make_exception_ptr(Exception(
ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE,
"Cannot read with range: [{}, {}]",
*read_range.begin,
read_range.end ? *read_range.end : '-'));
initialization_error = InitializeError::NON_RETRIABLE_ERROR;
return;
@ -386,12 +416,12 @@ namespace detail
{
/// We could have range.begin == 0 and range.end != 0 in case of DiskWeb and failing to read with partial content
/// will affect only performance, so a warning is enough.
LOG_WARNING(log, "Unable to read with range header: [{}, {}]", read_range.begin, *read_range.end);
LOG_WARNING(log, "Unable to read with range header: [{}, {}]", getRangeBegin(), *read_range.end);
}
}
if (!offset_from_begin_pos && !read_range.end && response.hasContentLength())
read_range.end = read_range.begin + response.getContentLength();
read_range.end = getRangeBegin() + response.getContentLength();
try
{
@ -399,12 +429,7 @@ namespace detail
if (use_external_buffer)
{
/**
* See comment 30 lines below.
*/
impl->set(internal_buffer.begin(), internal_buffer.size());
assert(working_buffer.begin() != nullptr);
assert(!internal_buffer.empty());
setupExternalBuffer();
}
}
catch (const Poco::Exception & e)
@ -426,23 +451,17 @@ namespace detail
if (next_callback)
next_callback(count());
if (read_range.end && getOffset() == read_range.end.value())
if (read_range.end && getOffset() > read_range.end.value())
{
assert(getOffset() == read_range.end.value() + 1);
return false;
}
if (impl)
{
if (use_external_buffer)
{
/**
* use_external_buffer -- means we read into the buffer which
* was passed to us from somewhere else. We do not check whether
* previously returned buffer was read or not (no hasPendingData() check is needed),
* because this branch means we are prefetching data,
* each nextImpl() call we can fill a different buffer.
*/
impl->set(internal_buffer.begin(), internal_buffer.size());
assert(working_buffer.begin() != nullptr);
assert(!internal_buffer.empty());
setupExternalBuffer();
}
else
{
@ -477,10 +496,7 @@ namespace detail
if (use_external_buffer)
{
/// See comment 40 lines above.
impl->set(internal_buffer.begin(), internal_buffer.size());
assert(working_buffer.begin() != nullptr);
assert(!internal_buffer.empty());
setupExternalBuffer();
}
}
@ -498,13 +514,18 @@ namespace detail
if (!can_retry_request)
throw;
LOG_ERROR(log,
"HTTP request to `{}` failed at try {}/{} with bytes read: {}/{}. "
"Error: {}. (Current backoff wait is {}/{} ms)",
uri.toString(), i + 1, settings.http_max_tries,
getOffset(), read_range.end ? toString(*read_range.end) : "unknown",
e.displayText(),
milliseconds_to_wait, settings.http_retry_max_backoff_ms);
LOG_ERROR(
log,
"HTTP request to `{}` failed at try {}/{} with bytes read: {}/{}. "
"Error: {}. (Current backoff wait is {}/{} ms)",
uri.toString(),
i + 1,
settings.http_max_tries,
getOffset(),
read_range.end ? toString(*read_range.end) : "unknown",
e.displayText(),
milliseconds_to_wait,
settings.http_retry_max_backoff_ms);
retry_with_range_header = true;
exception = std::current_exception();
@ -529,10 +550,7 @@ namespace detail
return true;
}
off_t getPosition() override
{
return getOffset() - available();
}
off_t getPosition() override { return getOffset() - available(); }
off_t seek(off_t offset_, int whence) override
{
@ -540,12 +558,11 @@ namespace detail
throw Exception("Only SEEK_SET mode is allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
if (offset_ < 0)
throw Exception("Seek position is out of bounds. Offset: " + std::to_string(offset_), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND);
throw Exception(
"Seek position is out of bounds. Offset: " + std::to_string(offset_), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND);
off_t current_offset = getOffset();
if (!working_buffer.empty()
&& size_t(offset_) >= current_offset - working_buffer.size()
&& offset_ < current_offset)
if (!working_buffer.empty() && size_t(offset_) >= current_offset - working_buffer.size() && offset_ < current_offset)
{
pos = working_buffer.end() - (current_offset - offset_);
assert(pos >= working_buffer.begin());
@ -567,7 +584,6 @@ namespace detail
if (impl)
{
ProfileEvents::increment(ProfileEvents::ReadBufferSeekCancelConnection);
impl.reset();
}
@ -580,6 +596,8 @@ namespace detail
return offset_;
}
SeekableReadBuffer::Range getRemainingReadRange() const override { return {getOffset(), read_range.end}; }
std::string getResponseCookie(const std::string & name, const std::string & def) const
{
for (const auto & cookie : cookies)
@ -599,10 +617,7 @@ namespace detail
next_callback(count());
}
const std::string & getCompressionMethod() const
{
return content_encoding;
}
const std::string & getCompressionMethod() const { return content_encoding; }
};
}
@ -611,19 +626,50 @@ class UpdatableSession : public UpdatableSessionBase<HTTPSessionPtr>
using Parent = UpdatableSessionBase<HTTPSessionPtr>;
public:
UpdatableSession(
const Poco::URI uri,
const ConnectionTimeouts & timeouts_,
const UInt64 max_redirects_)
UpdatableSession(const Poco::URI uri, const ConnectionTimeouts & timeouts_, const UInt64 max_redirects_)
: Parent(uri, timeouts_, max_redirects_)
{
session = makeHTTPSession(initial_uri, timeouts);
}
void buildNewSession(const Poco::URI & uri) override
void buildNewSession(const Poco::URI & uri) override { session = makeHTTPSession(uri, timeouts); }
};
class RangeGenerator
{
public:
explicit RangeGenerator(size_t total_size_, size_t range_step_, size_t range_start = 0)
: from(range_start), range_step(range_step_), total_size(total_size_)
{
session = makeHTTPSession(uri, timeouts);
}
size_t totalRanges() const { return static_cast<size_t>(round(static_cast<float>(total_size - from) / range_step)); }
using Range = std::pair<size_t, size_t>;
// return upper exclusive range of values, i.e. [from_range, to_range>
std::optional<Range> nextRange()
{
if (from >= total_size)
{
return std::nullopt;
}
auto to = from + range_step;
if (to >= total_size)
{
to = total_size;
}
Range range{from, to};
from = to;
return std::move(range);
}
private:
size_t from;
size_t range_step;
size_t total_size;
};
class ReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession>>
@ -631,7 +677,7 @@ class ReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<std::
using Parent = detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession>>;
public:
ReadWriteBufferFromHTTP(
ReadWriteBufferFromHTTP(
Poco::URI uri_,
const std::string & method_,
OutStreamCallback out_stream_callback_,
@ -646,14 +692,117 @@ public:
bool delay_initialization_ = true,
bool use_external_buffer_ = false,
bool skip_not_found_url_ = false)
: Parent(std::make_shared<UpdatableSession>(uri_, timeouts, max_redirects),
uri_, credentials_, method_, out_stream_callback_, buffer_size_,
settings_, http_header_entries_, read_range_, remote_host_filter_,
delay_initialization_, use_external_buffer_, skip_not_found_url_)
: Parent(
std::make_shared<UpdatableSession>(uri_, timeouts, max_redirects),
uri_,
credentials_,
method_,
out_stream_callback_,
buffer_size_,
settings_,
http_header_entries_,
read_range_,
remote_host_filter_,
delay_initialization_,
use_external_buffer_,
skip_not_found_url_)
{
}
};
class RangedReadWriteBufferFromHTTPFactory : public ParallelReadBuffer::ReadBufferFactory
{
using OutStreamCallback = ReadWriteBufferFromHTTP::OutStreamCallback;
public:
RangedReadWriteBufferFromHTTPFactory(
size_t total_object_size_,
size_t range_step_,
Poco::URI uri_,
std::string method_,
OutStreamCallback out_stream_callback_,
ConnectionTimeouts timeouts_,
const Poco::Net::HTTPBasicCredentials & credentials_,
UInt64 max_redirects_ = 0,
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE,
ReadSettings settings_ = {},
ReadWriteBufferFromHTTP::HTTPHeaderEntries http_header_entries_ = {},
const RemoteHostFilter * remote_host_filter_ = nullptr,
bool delay_initialization_ = true,
bool use_external_buffer_ = false,
bool skip_not_found_url_ = false)
: range_generator(total_object_size_, range_step_)
, total_object_size(total_object_size_)
, range_step(range_step_)
, uri(uri_)
, method(std::move(method_))
, out_stream_callback(out_stream_callback_)
, timeouts(std::move(timeouts_))
, credentials(credentials_)
, max_redirects(max_redirects_)
, buffer_size(buffer_size_)
, settings(std::move(settings_))
, http_header_entries(std::move(http_header_entries_))
, remote_host_filter(remote_host_filter_)
, delay_initialization(delay_initialization_)
, use_external_buffer(use_external_buffer_)
, skip_not_found_url(skip_not_found_url_)
{
}
SeekableReadBufferPtr getReader() override
{
const auto next_range = range_generator.nextRange();
if (!next_range)
{
return nullptr;
}
return std::make_shared<ReadWriteBufferFromHTTP>(
uri,
method,
out_stream_callback,
timeouts,
credentials,
max_redirects,
buffer_size,
settings,
http_header_entries,
// HTTP Range has inclusive bounds, i.e. [from, to]
ReadWriteBufferFromHTTP::Range{next_range->first, next_range->second - 1},
remote_host_filter,
delay_initialization,
use_external_buffer,
skip_not_found_url);
}
off_t seek(off_t off, [[maybe_unused]] int whence) override
{
range_generator = RangeGenerator{total_object_size, range_step, static_cast<size_t>(off)};
return off;
}
std::optional<size_t> getTotalSize() override { return total_object_size; }
private:
RangeGenerator range_generator;
size_t total_object_size;
size_t range_step;
Poco::URI uri;
std::string method;
OutStreamCallback out_stream_callback;
ConnectionTimeouts timeouts;
const Poco::Net::HTTPBasicCredentials & credentials;
UInt64 max_redirects;
size_t buffer_size;
ReadSettings settings;
ReadWriteBufferFromHTTP::HTTPHeaderEntries http_header_entries;
const RemoteHostFilter * remote_host_filter;
bool delay_initialization;
bool use_external_buffer;
bool skip_not_found_url;
};
class UpdatablePooledSession : public UpdatableSessionBase<PooledHTTPSessionPtr>
{
using Parent = UpdatableSessionBase<PooledHTTPSessionPtr>;
@ -662,20 +811,14 @@ private:
size_t per_endpoint_pool_size;
public:
explicit UpdatablePooledSession(const Poco::URI uri,
const ConnectionTimeouts & timeouts_,
const UInt64 max_redirects_,
size_t per_endpoint_pool_size_)
: Parent(uri, timeouts_, max_redirects_)
, per_endpoint_pool_size { per_endpoint_pool_size_ }
explicit UpdatablePooledSession(
const Poco::URI uri, const ConnectionTimeouts & timeouts_, const UInt64 max_redirects_, size_t per_endpoint_pool_size_)
: Parent(uri, timeouts_, max_redirects_), per_endpoint_pool_size{per_endpoint_pool_size_}
{
session = makePooledHTTPSession(initial_uri, timeouts, per_endpoint_pool_size);
}
void buildNewSession(const Poco::URI & uri) override
{
session = makePooledHTTPSession(uri, timeouts, per_endpoint_pool_size);
}
void buildNewSession(const Poco::URI & uri) override { session = makePooledHTTPSession(uri, timeouts, per_endpoint_pool_size); }
};
class PooledReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatablePooledSession>>
@ -683,7 +826,8 @@ class PooledReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase
using Parent = detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatablePooledSession>>;
public:
explicit PooledReadWriteBufferFromHTTP(Poco::URI uri_,
explicit PooledReadWriteBufferFromHTTP(
Poco::URI uri_,
const std::string & method_ = {},
OutStreamCallback out_stream_callback_ = {},
const ConnectionTimeouts & timeouts_ = {},
@ -691,12 +835,13 @@ public:
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE,
const UInt64 max_redirects = 0,
size_t max_connections_per_endpoint = DEFAULT_COUNT_OF_HTTP_CONNECTIONS_PER_ENDPOINT)
: Parent(std::make_shared<UpdatablePooledSession>(uri_, timeouts_, max_redirects, max_connections_per_endpoint),
uri_,
credentials_,
method_,
out_stream_callback_,
buffer_size_)
: Parent(
std::make_shared<UpdatablePooledSession>(uri_, timeouts_, max_redirects, max_connections_per_endpoint),
uri_,
credentials_,
method_,
out_stream_callback_,
buffer_size_)
{
}
};

View File

@ -359,6 +359,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
table_lock.reset();
table_id = StorageID::createEmpty();
metadata_snapshot = nullptr;
storage_snapshot = nullptr;
}
}

View File

@ -41,6 +41,57 @@ namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR;
extern const int NOT_IMPLEMENTED;
}
namespace
{
class StorageWithComment : public IAST
{
public:
ASTPtr storage;
ASTPtr comment;
String getID(char) const override { return "Storage with comment definition"; }
ASTPtr clone() const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method clone is not supported");
}
void formatImpl(const FormatSettings &, FormatState &, FormatStateStacked) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported");
}
};
class ParserStorageWithComment : public IParserBase
{
protected:
const char * getName() const override { return "storage definition with comment"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override
{
ParserStorage storage_p;
ASTPtr storage;
if (!storage_p.parse(pos, storage, expected))
return false;
ParserKeyword s_comment("COMMENT");
ParserStringLiteral string_literal_parser;
ASTPtr comment;
if (s_comment.ignore(pos, expected))
string_literal_parser.parse(pos, comment, expected);
auto storage_with_comment = std::make_shared<StorageWithComment>();
storage_with_comment->storage = std::move(storage);
storage_with_comment->comment = std::move(comment);
node = storage_with_comment;
return true;
}
};
}
namespace
@ -102,8 +153,9 @@ std::shared_ptr<TSystemLog> createSystemLog(
engine += " TTL " + ttl;
engine += " ORDER BY (event_date, event_time)";
}
// Validate engine definition grammatically to prevent some configuration errors
ParserStorage storage_parser;
ParserStorageWithComment storage_parser;
parseQuery(storage_parser, engine.data(), engine.data() + engine.size(),
"Storage to create table for " + config_prefix, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
@ -450,7 +502,6 @@ void SystemLog<LogElement>::prepareTable()
is_prepared = true;
}
template <typename LogElement>
ASTPtr SystemLog<LogElement>::getCreateTableQuery()
{
@ -465,11 +516,16 @@ ASTPtr SystemLog<LogElement>::getCreateTableQuery()
new_columns_list->set(new_columns_list->columns, InterpreterCreateQuery::formatColumns(ordinary_columns, alias_columns));
create->set(create->columns_list, new_columns_list);
ParserStorage storage_parser;
ASTPtr storage_ast = parseQuery(
ParserStorageWithComment storage_parser;
ASTPtr storage_with_comment_ast = parseQuery(
storage_parser, storage_def.data(), storage_def.data() + storage_def.size(),
"Storage to create table for " + LogElement::name(), 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
create->set(create->storage, storage_ast);
StorageWithComment & storage_with_comment = storage_with_comment_ast->as<StorageWithComment &>();
create->set(create->storage, storage_with_comment.storage);
create->set(create->comment, storage_with_comment.comment);
/// Write additional (default) settings for MergeTree engine to make it make it possible to compare ASTs
/// and recreate tables on settings changes.

View File

@ -7,16 +7,14 @@ import string
TOKEN_TEXT = 1
TOKEN_VAR = 2
TOKEN_COLON = ':'
TOKEN_SEMI = ';'
TOKEN_OR = '|'
TOKEN_QUESTIONMARK = '?'
TOKEN_ROUND_BRACKET_OPEN = '('
TOKEN_ROUND_BRACKET_CLOSE = ')'
TOKEN_ASTERISK = '*'
TOKEN_SLASH = '/'
TOKEN_COLON = ":"
TOKEN_SEMI = ";"
TOKEN_OR = "|"
TOKEN_QUESTIONMARK = "?"
TOKEN_ROUND_BRACKET_OPEN = "("
TOKEN_ROUND_BRACKET_CLOSE = ")"
TOKEN_ASTERISK = "*"
TOKEN_SLASH = "/"
class TextValue:
@ -27,9 +25,9 @@ class TextValue:
def get_slug(self):
if self.slug is not None:
return self.slug
slug = ''
slug = ""
for c in self.t:
slug += c if c in string.ascii_letters else '_'
slug += c if c in string.ascii_letters else "_"
self.slug = slug
return slug
@ -37,12 +35,12 @@ class TextValue:
return f"TextValue_{self.get_slug()}"
def __repr__(self):
return f"TextValue(\"{self.t}\")"
return f'TextValue("{self.t}")'
class Var:
def __init__(self, id_):
self.id_ = id_
self.id_ = id_
def __repr__(self):
return f"Var({self.id_})"
@ -59,8 +57,8 @@ class Parser:
self.cur_tok = None
self.includes = []
self.proto = ''
self.cpp = ''
self.proto = ""
self.cpp = ""
def parse_file(self, filename):
with open(filename) as f:
@ -81,7 +79,7 @@ class Parser:
if self.text[0] == '"':
return self.parse_txt_value()
if self.text[0] == '$':
if self.text[0] == "$":
return self.parse_var_value()
c, self.text = self.text[0], self.text[1:]
@ -89,9 +87,9 @@ class Parser:
return c
def parse_var_value(self):
i = self.text.find(' ')
i = self.text.find(" ")
id_, self.text = self.text[1:i], self.text[i+1:]
id_, self.text = self.text[1:i], self.text[i + 1 :]
self.var_id = int(id_)
self.cur_tok = TOKEN_VAR
return TOKEN_VAR
@ -100,12 +98,12 @@ class Parser:
if self.text[0] != '"':
raise Exception("parse_txt_value: expected quote at the start")
self.t = ''
self.t = ""
self.text = self.text[1:]
while self.text[0] != '"':
if self.text[0] == '\\':
if self.text[1] == 'x':
if self.text[0] == "\\":
if self.text[1] == "x":
self.t += self.text[:4]
self.text = self.text[4:]
elif self.text[1] in 'nt\\"':
@ -123,7 +121,7 @@ class Parser:
def skip_ws(self):
while self.text and self.text[0] in string.whitespace:
if self.text[0] == '\n':
if self.text[0] == "\n":
self.line += 1
self.col = 0
self.text = self.text[1:]
@ -134,10 +132,9 @@ class Parser:
def skip_line(self):
self.line += 1
index = self.text.find('\n')
index = self.text.find("\n")
self.text = self.text[index:]
def parse_statement(self):
if self.skip_ws() is None:
return None
@ -164,52 +161,54 @@ class Parser:
def generate(self):
self.proto = 'syntax = "proto3";\n\n'
self.cpp = '#include <iostream>\n#include <string>\n#include <vector>\n\n#include <libfuzzer/libfuzzer_macro.h>\n\n'
self.cpp = "#include <iostream>\n#include <string>\n#include <vector>\n\n#include <libfuzzer/libfuzzer_macro.h>\n\n"
for incl_file in self.includes:
self.cpp += f'#include "{incl_file}"\n'
self.cpp += '\n'
self.cpp += "\n"
self.proto += 'message Word {\n'
self.proto += '\tenum Value {\n'
self.proto += "message Word {\n"
self.proto += "\tenum Value {\n"
self.cpp += 'void GenerateWord(const Word&, std::string&, int);\n\n'
self.cpp += "void GenerateWord(const Word&, std::string&, int);\n\n"
self.cpp += 'void GenerateSentence(const Sentence& stc, std::string &s, int depth) {\n'
self.cpp += '\tfor (int i = 0; i < stc.words_size(); i++ ) {\n'
self.cpp += '\t\tGenerateWord(stc.words(i), s, ++depth);\n'
self.cpp += '\t}\n'
self.cpp += '}\n'
self.cpp += (
"void GenerateSentence(const Sentence& stc, std::string &s, int depth) {\n"
)
self.cpp += "\tfor (int i = 0; i < stc.words_size(); i++ ) {\n"
self.cpp += "\t\tGenerateWord(stc.words(i), s, ++depth);\n"
self.cpp += "\t}\n"
self.cpp += "}\n"
self.cpp += 'void GenerateWord(const Word& word, std::string &s, int depth) {\n'
self.cpp += "void GenerateWord(const Word& word, std::string &s, int depth) {\n"
self.cpp += '\tif (depth > 5) return;\n\n'
self.cpp += '\tswitch (word.value()) {\n'
self.cpp += "\tif (depth > 5) return;\n\n"
self.cpp += "\tswitch (word.value()) {\n"
for idx, chain in enumerate(self.chains):
self.proto += f'\t\tvalue_{idx} = {idx};\n'
self.proto += f"\t\tvalue_{idx} = {idx};\n"
self.cpp += f'\t\tcase {idx}: {{\n'
self.cpp += f"\t\tcase {idx}: {{\n"
num_var = 0
for item in chain:
if isinstance(item, TextValue):
self.cpp += f'\t\t\ts += "{item.t}";\n'
elif isinstance(item, Var):
self.cpp += f'\t\t\tif (word.inner().words_size() > {num_var})\t\t\t\tGenerateWord(word.inner().words({num_var}), s, ++depth);\n'
self.cpp += f"\t\t\tif (word.inner().words_size() > {num_var})\t\t\t\tGenerateWord(word.inner().words({num_var}), s, ++depth);\n"
num_var += 1
else:
raise Exception("unknown token met during generation")
self.cpp += '\t\t\tbreak;\n\t\t}\n'
self.cpp += '\t\tdefault: break;\n'
self.cpp += "\t\t\tbreak;\n\t\t}\n"
self.cpp += "\t\tdefault: break;\n"
self.cpp += '\t}\n'
self.cpp += "\t}\n"
self.proto += '\t}\n'
self.proto += '\tValue value = 1;\n'
self.proto += '\tSentence inner = 2;\n'
self.proto += '}\nmessage Sentence {\n\trepeated Word words = 1;\n}'
self.proto += "\t}\n"
self.proto += "\tValue value = 1;\n"
self.proto += "\tSentence inner = 2;\n"
self.proto += "}\nmessage Sentence {\n\trepeated Word words = 1;\n}"
self.cpp += '}\n'
self.cpp += "}\n"
return self.cpp, self.proto
def fatal_parsing_error(self, msg):
@ -220,7 +219,7 @@ class Parser:
def main(args):
input_file, outfile_cpp, outfile_proto = args
if not outfile_proto.endswith('.proto'):
if not outfile_proto.endswith(".proto"):
raise Exception("outfile_proto (argv[3]) should end with `.proto`")
include_filename = outfile_proto[:-6] + ".pb.h"
@ -231,17 +230,17 @@ def main(args):
cpp, proto = p.generate()
proto = proto.replace('\t', ' ' * 4)
cpp = cpp.replace('\t', ' ' * 4)
proto = proto.replace("\t", " " * 4)
cpp = cpp.replace("\t", " " * 4)
with open(outfile_cpp, 'w') as f:
with open(outfile_cpp, "w") as f:
f.write(cpp)
with open(outfile_proto, 'w') as f:
with open(outfile_proto, "w") as f:
f.write(proto)
if __name__ == '__main__':
if __name__ == "__main__":
if len(sys.argv) < 3:
print(f"Usage {sys.argv[0]} <input_file> <outfile.cpp> <outfile.proto>")
sys.exit(1)

View File

@ -32,14 +32,14 @@ public:
KeeperTCPHandlerFactory(
ConfigGetter config_getter_,
std::shared_ptr<KeeperDispatcher> keeper_dispatcher_,
Poco::Timespan receive_timeout_,
Poco::Timespan send_timeout_,
uint64_t receive_timeout_seconds,
uint64_t send_timeout_seconds,
bool secure)
: config_getter(config_getter_)
, keeper_dispatcher(keeper_dispatcher_)
, log(&Poco::Logger::get(std::string{"KeeperTCP"} + (secure ? "S" : "") + "HandlerFactory"))
, receive_timeout(receive_timeout_)
, send_timeout(send_timeout_)
, receive_timeout(/* seconds = */ receive_timeout_seconds, /* microseconds = */ 0)
, send_timeout(/* seconds = */ send_timeout_seconds, /* microseconds = */ 0)
{
}

View File

@ -124,7 +124,8 @@ public:
/// Initialize to_read_block, which is used to read data from HDFS.
for (const auto & name_type : source_info->partition_name_types)
{
to_read_block.erase(name_type.name);
if (to_read_block.has(name_type.name))
to_read_block.erase(name_type.name);
}
/// Initialize format settings

View File

@ -179,8 +179,9 @@ std::unique_ptr<ReadBuffer> createReadBuffer(
method = chooseCompressionMethod(current_path, compression_method);
}
/// For clickhouse-local add progress callback to display progress bar.
if (context->getApplicationType() == Context::ApplicationType::LOCAL)
/// For clickhouse-local and clickhouse-client add progress callback to display progress bar.
if (context->getApplicationType() == Context::ApplicationType::LOCAL
|| context->getApplicationType() == Context::ApplicationType::CLIENT)
{
auto & in = static_cast<ReadBufferFromFileDescriptor &>(*nested_buffer);
in.setProgressCallback(context);
@ -643,7 +644,9 @@ Pipe StorageFile::read(
/// Set total number of bytes to process. For progress bar.
auto progress_callback = context->getFileProgressCallback();
if (context->getApplicationType() == Context::ApplicationType::LOCAL && progress_callback)
if ((context->getApplicationType() == Context::ApplicationType::LOCAL
|| context->getApplicationType() == Context::ApplicationType::CLIENT)
&& progress_callback)
progress_callback(FileProgress(0, total_bytes_to_read));
for (size_t i = 0; i < num_streams; ++i)

View File

@ -3,30 +3,35 @@
#include <Interpreters/Context.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTInsertQuery.h>
#include <Parsers/ASTLiteral.h>
#include <IO/ConnectionTimeouts.h>
#include <IO/ConnectionTimeoutsContext.h>
#include <IO/IOThreadPool.h>
#include <IO/ParallelReadBuffer.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromHTTP.h>
#include <IO/WriteHelpers.h>
#include <IO/ConnectionTimeouts.h>
#include <IO/ConnectionTimeoutsContext.h>
#include <Formats/FormatFactory.h>
#include <Formats/ReadSchemaUtils.h>
#include <Processors/Formats/IInputFormat.h>
#include <Processors/Formats/IOutputFormat.h>
#include <Common/parseRemoteDescription.h>
#include <Processors/Transforms/AddingDefaultsTransform.h>
#include <Storages/PartitionedSink.h>
#include "Common/ThreadStatus.h"
#include <Common/parseRemoteDescription.h>
#include "IO/HTTPCommon.h"
#include "IO/ReadWriteBufferFromHTTP.h"
#include <Poco/Net/HTTPRequest.h>
#include <algorithm>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Processors/Sources/SourceWithProgress.h>
#include <QueryPipeline/QueryPipelineBuilder.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <base/logger_useful.h>
#include <algorithm>
#include <Poco/Net/HTTPRequest.h>
namespace DB
@ -43,8 +48,7 @@ namespace ErrorCodes
static bool urlWithGlobs(const String & uri)
{
return (uri.find('{') != std::string::npos && uri.find('}') != std::string::npos)
|| uri.find('|') != std::string::npos;
return (uri.find('{') != std::string::npos && uri.find('}') != std::string::npos) || uri.find('|') != std::string::npos;
}
@ -88,8 +92,7 @@ IStorageURLBase::IStorageURLBase(
namespace
{
ReadWriteBufferFromHTTP::HTTPHeaderEntries getHeaders(
const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers_)
ReadWriteBufferFromHTTP::HTTPHeaderEntries getHeaders(const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers_)
{
ReadWriteBufferFromHTTP::HTTPHeaderEntries headers(headers_.begin(), headers_.end());
// Propagate OpenTelemetry trace context, if any, downstream.
@ -98,13 +101,11 @@ namespace
const auto & thread_trace_context = CurrentThread::get().thread_trace_context;
if (thread_trace_context.trace_id != UUID())
{
headers.emplace_back("traceparent",
thread_trace_context.composeTraceparentHeader());
headers.emplace_back("traceparent", thread_trace_context.composeTraceparentHeader());
if (!thread_trace_context.tracestate.empty())
{
headers.emplace_back("tracestate",
thread_trace_context.tracestate);
headers.emplace_back("tracestate", thread_trace_context.tracestate);
}
}
}
@ -114,8 +115,7 @@ namespace
class StorageURLSource : public SourceWithProgress
{
using URIParams = std::vector<std::pair<String, String>>;
using URIParams = std::vector<std::pair<String, String>>;
public:
struct URIInfo
@ -160,11 +160,11 @@ namespace
UInt64 max_block_size,
const ConnectionTimeouts & timeouts,
const String & compression_method,
size_t download_threads,
const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers_ = {},
const URIParams & params = {},
bool glob_url = false)
: SourceWithProgress(sample_block), name(std::move(name_))
, uri_info(uri_info_)
: SourceWithProgress(sample_block), name(std::move(name_)), uri_info(uri_info_)
{
auto headers = getHeaders(headers_);
@ -176,33 +176,40 @@ namespace
auto first_option = uri_options.begin();
read_buf = getFirstAvailableURLReadBuffer(
first_option, uri_options.end(), context, params, http_method,
callback, timeouts, compression_method, credentials, headers, glob_url, uri_options.size() == 1);
first_option,
uri_options.end(),
context,
params,
http_method,
callback,
timeouts,
compression_method,
credentials,
headers,
glob_url,
uri_options.size() == 1,
download_threads);
auto input_format = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size, format_settings);
auto input_format
= FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size, format_settings);
QueryPipelineBuilder builder;
builder.init(Pipe(input_format));
builder.addSimpleTransform([&](const Block & cur_header)
{
return std::make_shared<AddingDefaultsTransform>(cur_header, columns, *input_format, context);
});
builder.addSimpleTransform(
[&](const Block & cur_header)
{ return std::make_shared<AddingDefaultsTransform>(cur_header, columns, *input_format, context); });
pipeline = std::make_unique<QueryPipeline>(QueryPipelineBuilder::getPipeline(std::move(builder)));
reader = std::make_unique<PullingPipelineExecutor>(*pipeline);
};
}
String getName() const override
{
return name;
}
String getName() const override { return name; }
Chunk generate() override
{
while (true)
{
if (!reader)
{
auto current_uri_pos = uri_info->next_uri_to_read.fetch_add(1);
@ -239,7 +246,8 @@ namespace
Poco::Net::HTTPBasicCredentials & credentials,
const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers,
bool glob_url,
bool delay_initialization)
bool delay_initialization,
size_t download_threads)
{
String first_exception_message;
ReadSettings read_settings = context->getReadSettings();
@ -255,8 +263,137 @@ namespace
setCredentials(credentials, request_uri);
const auto settings = context->getSettings();
try
{
if (download_threads > 1)
{
try
{
ReadWriteBufferFromHTTP buffer(
request_uri,
Poco::Net::HTTPRequest::HTTP_HEAD,
callback,
timeouts,
credentials,
settings.max_http_get_redirects,
DBMS_DEFAULT_BUFFER_SIZE,
read_settings,
headers,
ReadWriteBufferFromHTTP::Range{0, std::nullopt},
&context->getRemoteHostFilter(),
true,
/* use_external_buffer */ false,
/* skip_url_not_found_error */ skip_url_not_found_error);
Poco::Net::HTTPResponse res;
for (size_t i = 0; i < settings.http_max_tries; ++i)
{
try
{
buffer.callWithRedirects(res, Poco::Net::HTTPRequest::HTTP_HEAD, true);
break;
}
catch (const Poco::Exception & e)
{
LOG_TRACE(
&Poco::Logger::get("StorageURLSource"),
"HTTP HEAD request to `{}` failed at try {}/{}. "
"Error: {}.",
request_uri.toString(),
i + 1,
settings.http_max_tries,
e.displayText());
if (!ReadWriteBufferFromHTTP::isRetriableError(res.getStatus()))
{
throw;
}
}
}
// to check if Range header is supported, we need to send a request with it set
const bool supports_ranges = (res.has("Accept-Ranges") && res.get("Accept-Ranges") == "bytes")
|| (res.has("Content-Range") && res.get("Content-Range").starts_with("bytes"));
LOG_TRACE(
&Poco::Logger::get("StorageURLSource"),
fmt::runtime(supports_ranges ? "HTTP Range is supported" : "HTTP Range is not supported"));
if (supports_ranges && res.getStatus() == Poco::Net::HTTPResponse::HTTP_PARTIAL_CONTENT
&& res.hasContentLength())
{
LOG_TRACE(
&Poco::Logger::get("StorageURLSource"),
"Using ParallelReadBuffer with {} workers with chunks of {} bytes",
download_threads,
settings.max_download_buffer_size);
auto read_buffer_factory = std::make_unique<RangedReadWriteBufferFromHTTPFactory>(
res.getContentLength(),
settings.max_download_buffer_size,
request_uri,
http_method,
callback,
timeouts,
credentials,
settings.max_http_get_redirects,
DBMS_DEFAULT_BUFFER_SIZE,
read_settings,
headers,
&context->getRemoteHostFilter(),
delay_initialization,
/* use_external_buffer */ false,
/* skip_url_not_found_error */ skip_url_not_found_error);
ThreadGroupStatusPtr running_group = CurrentThread::isInitialized() && CurrentThread::get().getThreadGroup()
? CurrentThread::get().getThreadGroup()
: MainThreadStatus::getInstance().getThreadGroup();
ContextPtr query_context
= CurrentThread::isInitialized() ? CurrentThread::get().getQueryContext() : nullptr;
auto worker_cleanup = [has_running_group = running_group == nullptr](ThreadStatus & thread_status)
{
if (has_running_group)
thread_status.detachQuery(false);
};
auto worker_setup = [query_context = std::move(query_context),
running_group = std::move(running_group)](ThreadStatus & thread_status)
{
/// Save query context if any, because cache implementation needs it.
if (query_context)
thread_status.attachQueryContext(query_context);
/// To be able to pass ProfileEvents.
if (running_group)
thread_status.attachQuery(running_group);
};
return wrapReadBufferWithCompressionMethod(
std::make_unique<ParallelReadBuffer>(
std::move(read_buffer_factory),
&IOThreadPool::get(),
download_threads,
std::move(worker_setup),
std::move(worker_cleanup)),
chooseCompressionMethod(request_uri.getPath(), compression_method));
}
}
catch (const Poco::Exception & e)
{
LOG_TRACE(
&Poco::Logger::get("StorageURLSource"),
"Failed to setup ParallelReadBuffer because of an exception:\n{}.\nFalling back to the single-threaded "
"buffer",
e.displayText());
}
}
LOG_TRACE(&Poco::Logger::get("StorageURLSource"), "Using single-threaded read buffer");
return wrapReadBufferWithCompressionMethod(
std::make_unique<ReadWriteBufferFromHTTP>(
request_uri,
@ -264,15 +401,15 @@ namespace
callback,
timeouts,
credentials,
context->getSettingsRef().max_http_get_redirects,
settings.max_http_get_redirects,
DBMS_DEFAULT_BUFFER_SIZE,
read_settings,
headers,
ReadWriteBufferFromHTTP::Range{},
&context->getRemoteHostFilter(),
delay_initialization,
/* use_external_buffer */false,
/* skip_url_not_found_error */skip_url_not_found_error),
/* use_external_buffer */ false,
/* skip_url_not_found_error */ skip_url_not_found_error),
chooseCompressionMethod(request_uri.getPath(), compression_method));
}
catch (...)
@ -323,10 +460,10 @@ StorageURLSink::StorageURLSink(
std::string content_encoding = toContentEncodingName(compression_method);
write_buf = wrapWriteBufferWithCompressionMethod(
std::make_unique<WriteBufferFromHTTP>(Poco::URI(uri), http_method, content_type, content_encoding, timeouts),
compression_method, 3);
writer = FormatFactory::instance().getOutputFormat(format, *write_buf, sample_block,
context, {} /* write callback */, format_settings);
std::make_unique<WriteBufferFromHTTP>(Poco::URI(uri), http_method, content_type, content_encoding, timeouts),
compression_method,
3);
writer = FormatFactory::instance().getOutputFormat(format, *write_buf, sample_block, context, {} /* write callback */, format_settings);
}
@ -355,15 +492,15 @@ public:
const ConnectionTimeouts & timeouts_,
const CompressionMethod compression_method_,
const String & http_method_)
: PartitionedSink(partition_by, context_, sample_block_)
, uri(uri_)
, format(format_)
, format_settings(format_settings_)
, sample_block(sample_block_)
, context(context_)
, timeouts(timeouts_)
, compression_method(compression_method_)
, http_method(http_method_)
: PartitionedSink(partition_by, context_, sample_block_)
, uri(uri_)
, format(format_)
, format_settings(format_settings_)
, sample_block(sample_block_)
, context(context_)
, timeouts(timeouts_)
, compression_method(compression_method_)
, http_method(http_method_)
{
}
@ -371,8 +508,8 @@ public:
{
auto partition_path = PartitionedSink::replaceWildcards(uri, partition_id);
context->getRemoteHostFilter().checkURL(Poco::URI(partition_path));
return std::make_shared<StorageURLSink>(partition_path, format,
format_settings, sample_block, context, timeouts, compression_method, http_method);
return std::make_shared<StorageURLSink>(
partition_path, format, format_settings, sample_block, context, timeouts, compression_method, http_method);
}
private:
@ -462,7 +599,8 @@ ColumnsDescription IStorageURLBase::getTableStructureFromData(
credentials,
headers,
false,
false);
false,
context->getSettingsRef().max_download_threads);
};
try
@ -479,7 +617,10 @@ ColumnsDescription IStorageURLBase::getTableStructureFromData(
} while (++option < urls_to_check.end());
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "All attempts to extract table structure from urls failed. Errors:\n{}", exception_messages);
throw Exception(
ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
"All attempts to extract table structure from urls failed. Errors:\n{}",
exception_messages);
}
bool IStorageURLBase::isColumnOriented() const
@ -512,6 +653,8 @@ Pipe IStorageURLBase::read(
block_for_format = storage_snapshot->metadata->getSampleBlock();
}
size_t max_download_threads = local_context->getSettingsRef().max_download_threads;
if (urlWithGlobs(uri))
{
size_t max_addresses = local_context->getSettingsRef().glob_expansion_max_elements;
@ -528,14 +671,13 @@ Pipe IStorageURLBase::read(
Pipes pipes;
pipes.reserve(num_streams);
size_t download_threads = num_streams >= max_download_threads ? 1 : (max_download_threads / num_streams);
for (size_t i = 0; i < num_streams; ++i)
{
pipes.emplace_back(std::make_shared<StorageURLSource>(
uri_info,
getReadMethod(),
getReadPOSTDataCallback(
column_names, columns_description, query_info,
local_context, processed_stage, max_block_size),
getReadPOSTDataCallback(column_names, columns_description, query_info, local_context, processed_stage, max_block_size),
format_name,
format_settings,
getName(),
@ -544,7 +686,11 @@ Pipe IStorageURLBase::read(
columns_description,
max_block_size,
ConnectionTimeouts::getHTTPTimeouts(local_context),
compression_method, headers, params, /* glob_url */true));
compression_method,
download_threads,
headers,
params,
/* glob_url */ true));
}
return Pipe::unitePipes(std::move(pipes));
}
@ -555,9 +701,7 @@ Pipe IStorageURLBase::read(
return Pipe(std::make_shared<StorageURLSource>(
uri_info,
getReadMethod(),
getReadPOSTDataCallback(
column_names, columns_description, query_info,
local_context, processed_stage, max_block_size),
getReadPOSTDataCallback(column_names, columns_description, query_info, local_context, processed_stage, max_block_size),
format_name,
format_settings,
getName(),
@ -566,7 +710,10 @@ Pipe IStorageURLBase::read(
columns_description,
max_block_size,
ConnectionTimeouts::getHTTPTimeouts(local_context),
compression_method, headers, params));
compression_method,
max_download_threads,
headers,
params));
}
}
@ -598,12 +745,10 @@ Pipe StorageURLWithFailover::read(
auto uri_info = std::make_shared<StorageURLSource::URIInfo>();
uri_info->uri_list_to_read.emplace_back(uri_options);
auto pipe = Pipe(std::make_shared<StorageURLSource>(
auto pipe = Pipe(std::make_shared<StorageURLSource>(
uri_info,
getReadMethod(),
getReadPOSTDataCallback(
column_names, columns_description, query_info,
local_context, processed_stage, max_block_size),
getReadPOSTDataCallback(column_names, columns_description, query_info, local_context, processed_stage, max_block_size),
format_name,
format_settings,
getName(),
@ -612,7 +757,10 @@ Pipe StorageURLWithFailover::read(
columns_description,
max_block_size,
ConnectionTimeouts::getHTTPTimeouts(local_context),
compression_method, headers, params));
compression_method,
local_context->getSettingsRef().max_download_threads,
headers,
params));
std::shuffle(uri_options.begin(), uri_options.end(), thread_local_rng);
return pipe;
}
@ -632,17 +780,26 @@ SinkToStoragePtr IStorageURLBase::write(const ASTPtr & query, const StorageMetad
{
return std::make_shared<PartitionedStorageURLSink>(
partition_by_ast,
uri, format_name,
format_settings, metadata_snapshot->getSampleBlock(), context,
uri,
format_name,
format_settings,
metadata_snapshot->getSampleBlock(),
context,
ConnectionTimeouts::getHTTPTimeouts(context),
chooseCompressionMethod(uri, compression_method), http_method);
chooseCompressionMethod(uri, compression_method),
http_method);
}
else
{
return std::make_shared<StorageURLSink>(uri, format_name,
format_settings, metadata_snapshot->getSampleBlock(), context,
return std::make_shared<StorageURLSink>(
uri,
format_name,
format_settings,
metadata_snapshot->getSampleBlock(),
context,
ConnectionTimeouts::getHTTPTimeouts(context),
chooseCompressionMethod(uri, compression_method), http_method);
chooseCompressionMethod(uri, compression_method),
http_method);
}
}
@ -659,8 +816,19 @@ StorageURL::StorageURL(
const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers_,
const String & http_method_,
ASTPtr partition_by_)
: IStorageURLBase(uri_, context_, table_id_, format_name_, format_settings_,
columns_, constraints_, comment, compression_method_, headers_, http_method_, partition_by_)
: IStorageURLBase(
uri_,
context_,
table_id_,
format_name_,
format_settings_,
columns_,
constraints_,
comment,
compression_method_,
headers_,
http_method_,
partition_by_)
{
context_->getRemoteHostFilter().checkURL(Poco::URI(uri));
}
@ -711,8 +879,7 @@ FormatSettings StorageURL::getFormatSettingsFromArgs(const StorageFactory::Argum
// Apply changes from SETTINGS clause, with validation.
user_format_settings.applyChanges(args.storage_def->settings->changes);
format_settings = getFormatSettings(args.getContext(),
user_format_settings);
format_settings = getFormatSettings(args.getContext(), user_format_settings);
}
else
{
@ -731,12 +898,12 @@ URLBasedDataSourceConfiguration StorageURL::getConfiguration(ASTs & args, Contex
auto [common_configuration, storage_specific_args] = named_collection.value();
configuration.set(common_configuration);
if (!configuration.http_method.empty()
&& configuration.http_method != Poco::Net::HTTPRequest::HTTP_POST
if (!configuration.http_method.empty() && configuration.http_method != Poco::Net::HTTPRequest::HTTP_POST
&& configuration.http_method != Poco::Net::HTTPRequest::HTTP_PUT)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Http method can be POST or PUT (current: {}). For insert default is POST, for select GET",
configuration.http_method);
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Http method can be POST or PUT (current: {}). For insert default is POST, for select GET",
configuration.http_method);
if (!storage_specific_args.empty())
{
@ -754,7 +921,8 @@ URLBasedDataSourceConfiguration StorageURL::getConfiguration(ASTs & args, Contex
{
if (args.empty() || args.size() > 3)
throw Exception(
"Storage URL requires 1, 2 or 3 arguments: url, name of used format (taken from file extension by default) and optional compression method.",
"Storage URL requires 1, 2 or 3 arguments: url, name of used format (taken from file extension by default) and optional "
"compression method.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
for (auto & arg : args)
@ -776,43 +944,45 @@ URLBasedDataSourceConfiguration StorageURL::getConfiguration(ASTs & args, Contex
void registerStorageURL(StorageFactory & factory)
{
factory.registerStorage("URL", [](const StorageFactory::Arguments & args)
{
ASTs & engine_args = args.engine_args;
auto configuration = StorageURL::getConfiguration(engine_args, args.getLocalContext());
auto format_settings = StorageURL::getFormatSettingsFromArgs(args);
ReadWriteBufferFromHTTP::HTTPHeaderEntries headers;
for (const auto & [header, value] : configuration.headers)
factory.registerStorage(
"URL",
[](const StorageFactory::Arguments & args)
{
auto value_literal = value.safeGet<String>();
if (header == "Range")
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Range headers are not allowed");
headers.emplace_back(std::make_pair(header, value_literal));
}
ASTs & engine_args = args.engine_args;
auto configuration = StorageURL::getConfiguration(engine_args, args.getLocalContext());
auto format_settings = StorageURL::getFormatSettingsFromArgs(args);
ASTPtr partition_by;
if (args.storage_def->partition_by)
partition_by = args.storage_def->partition_by->clone();
ReadWriteBufferFromHTTP::HTTPHeaderEntries headers;
for (const auto & [header, value] : configuration.headers)
{
auto value_literal = value.safeGet<String>();
if (header == "Range")
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Range headers are not allowed");
headers.emplace_back(std::make_pair(header, value_literal));
}
return StorageURL::create(
configuration.url,
args.table_id,
configuration.format,
format_settings,
args.columns,
args.constraints,
args.comment,
args.getContext(),
configuration.compression_method,
headers,
configuration.http_method,
partition_by);
},
{
.supports_settings = true,
.supports_schema_inference = true,
.source_access_type = AccessType::URL,
});
ASTPtr partition_by;
if (args.storage_def->partition_by)
partition_by = args.storage_def->partition_by->clone();
return StorageURL::create(
configuration.url,
args.table_id,
configuration.format,
format_settings,
args.columns,
args.constraints,
args.comment,
args.getContext(),
configuration.compression_method,
headers,
configuration.http_method,
partition_by);
},
{
.supports_settings = true,
.supports_schema_inference = true,
.source_access_type = AccessType::URL,
});
}
}

View File

@ -7,6 +7,7 @@
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeNested.h>
#include <DataTypes/NestedUtils.h>
#include <DataTypes/DataTypeUUID.h>
#include <Storages/VirtualColumnUtils.h>
@ -64,7 +65,11 @@ StorageSystemPartsColumns::StorageSystemPartsColumns(const StorageID & table_id_
{"serialization_kind", std::make_shared<DataTypeString>()},
{"subcolumns.names", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
{"subcolumns.types", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
{"subcolumns.serializations", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())}
{"subcolumns.serializations", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
{"subcolumns.bytes_on_disk", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())},
{"subcolumns.data_compressed_bytes", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())},
{"subcolumns.data_uncompressed_bytes", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())},
{"subcolumns.marks_bytes", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())},
}
)
{
@ -228,13 +233,43 @@ void StorageSystemPartsColumns::processNextStorage(
Array subcolumn_names;
Array subcolumn_types;
Array subcolumn_sers;
Array subcolumn_serializations;
Array subcolumn_bytes_on_disk;
Array subcolumn_data_compressed_bytes;
Array subcolumn_data_uncompressed_bytes;
Array subcolumn_marks_bytes;
IDataType::forEachSubcolumn([&](const auto &, const auto & name, const auto & data)
IDataType::forEachSubcolumn([&](const auto & subpath, const auto & name, const auto & data)
{
/// We count only final subcolumns, which are represented by files on disk
/// and skip intermediate suibcolumns of types Tuple and Nested.
if (isTuple(data.type) || isNested(data.type))
return;
subcolumn_names.push_back(name);
subcolumn_types.push_back(data.type->getName());
subcolumn_sers.push_back(ISerialization::kindToString(data.serialization->getKind()));
subcolumn_serializations.push_back(ISerialization::kindToString(data.serialization->getKind()));
ColumnSize size;
NameAndTypePair subcolumn(column.name, name, column.type, data.type);
String file_name = ISerialization::getFileNameForStream(subcolumn, subpath);
auto bin_checksum = part->checksums.files.find(file_name + ".bin");
if (bin_checksum != part->checksums.files.end())
{
size.data_compressed += bin_checksum->second.file_size;
size.data_uncompressed += bin_checksum->second.uncompressed_size;
}
auto mrk_checksum = part->checksums.files.find(file_name + part->index_granularity_info.marks_file_extension);
if (mrk_checksum != part->checksums.files.end())
size.marks += mrk_checksum->second.file_size;
subcolumn_bytes_on_disk.push_back(size.data_compressed + size.marks);
subcolumn_data_compressed_bytes.push_back(size.data_compressed);
subcolumn_data_uncompressed_bytes.push_back(size.data_uncompressed);
subcolumn_marks_bytes.push_back(size.marks);
}, { serialization, column.type, nullptr, nullptr });
if (columns_mask[src_index++])
@ -242,7 +277,15 @@ void StorageSystemPartsColumns::processNextStorage(
if (columns_mask[src_index++])
columns[res_index++]->insert(subcolumn_types);
if (columns_mask[src_index++])
columns[res_index++]->insert(subcolumn_sers);
columns[res_index++]->insert(subcolumn_serializations);
if (columns_mask[src_index++])
columns[res_index++]->insert(subcolumn_bytes_on_disk);
if (columns_mask[src_index++])
columns[res_index++]->insert(subcolumn_data_compressed_bytes);
if (columns_mask[src_index++])
columns[res_index++]->insert(subcolumn_data_uncompressed_bytes);
if (columns_mask[src_index++])
columns[res_index++]->insert(subcolumn_marks_bytes);
if (has_state_column)
columns[res_index++]->insert(part->stateString());

View File

@ -32,6 +32,8 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d
auto & ast_create = ast->as<ASTCreateQuery &>();
assert(view_name == ast_create.getTable());
ast_create.attach = false;
ast_create.setDatabase(database.getDatabaseName());
if (is_uppercase)
ast_create.setTable(Poco::toUpper(view_name));

View File

@ -9,7 +9,9 @@ import re
parts = {}
for s in sys.stdin.read().split():
m = re.match('^([0-9]{6})[0-9]{2}_([0-9]{6})[0-9]{2}_([0-9]+)_([0-9]+)_([0-9]+)$', s)
m = re.match(
"^([0-9]{6})[0-9]{2}_([0-9]{6})[0-9]{2}_([0-9]+)_([0-9]+)_([0-9]+)$", s
)
if m == None:
continue
m1 = m.group(1)
@ -18,7 +20,7 @@ for s in sys.stdin.read().split():
i2 = int(m.group(4))
l = int(m.group(5))
if m1 != m2:
raise Exception('not in single month: ' + s)
raise Exception("not in single month: " + s)
if m1 not in parts:
parts[m1] = []
parts[m1].append((i1, i2, l, s))
@ -27,13 +29,13 @@ for m, ps in sorted(parts.items()):
ps.sort(key=lambda i1_i2_l_s: (i1_i2_l_s[0], -i1_i2_l_s[1], -i1_i2_l_s[2]))
(x2, y2, l2, s2) = (-1, -1, -1, -1)
for x1, y1, l1, s1 in ps:
if x1 >= x2 and y1 <= y2 and l1 < l2 and (x1, y1) != (x2, y2): # 2 contains 1
if x1 >= x2 and y1 <= y2 and l1 < l2 and (x1, y1) != (x2, y2): # 2 contains 1
pass
elif x1 > y2: # 1 is to the right of 2
elif x1 > y2: # 1 is to the right of 2
if x1 != y2 + 1 and y2 != -1:
print() # to see the missing numbers
print() # to see the missing numbers
(x2, y2, l2, s2) = (x1, y1, l1, s1)
print(s1)
else:
raise Exception('invalid parts intersection: ' + s1 + ' and ' + s2)
raise Exception("invalid parts intersection: " + s1 + " and " + s2)
print()

View File

@ -7,7 +7,7 @@ else ()
include (${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake)
endif ()
option (ENABLE_CLICKHOUSE_TEST "Install clickhouse-test script and relevant tests scenarios" ON)
option (ENABLE_CLICKHOUSE_TEST "Install clickhouse-test script and relevant tests scenarios" OFF)
if (ENABLE_CLICKHOUSE_TEST)
install (PROGRAMS clickhouse-test DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)

View File

@ -7,8 +7,14 @@ import sys
from github import Github
from env_helper import GITHUB_REPOSITORY, TEMP_PATH, REPO_COPY, REPORTS_PATH, GITHUB_SERVER_URL, \
GITHUB_RUN_ID
from env_helper import (
GITHUB_REPOSITORY,
TEMP_PATH,
REPO_COPY,
REPORTS_PATH,
GITHUB_SERVER_URL,
GITHUB_RUN_ID,
)
from s3_helper import S3Helper
from get_robot_token import get_best_robot_token
from pr_info import PRInfo
@ -19,19 +25,24 @@ from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickh
from stopwatch import Stopwatch
from rerun_helper import RerunHelper
IMAGE_NAME = 'clickhouse/fuzzer'
IMAGE_NAME = "clickhouse/fuzzer"
def get_run_command(pr_number, sha, download_url, workspace_path, image):
return f'docker run --network=host --volume={workspace_path}:/workspace ' \
'--cap-add syslog --cap-add sys_admin --cap-add=SYS_PTRACE ' \
f'-e PR_TO_TEST={pr_number} -e SHA_TO_TEST={sha} -e BINARY_URL_TO_DOWNLOAD="{download_url}" '\
f'{image}'
return (
f"docker run --network=host --volume={workspace_path}:/workspace "
"--cap-add syslog --cap-add sys_admin --cap-add=SYS_PTRACE "
f'-e PR_TO_TEST={pr_number} -e SHA_TO_TEST={sha} -e BINARY_URL_TO_DOWNLOAD="{download_url}" '
f"{image}"
)
def get_commit(gh, commit_sha):
repo = gh.get_repo(GITHUB_REPOSITORY)
commit = repo.get_commit(commit_sha)
return commit
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
@ -64,7 +75,7 @@ if __name__ == "__main__":
raise Exception("No build URLs found")
for url in urls:
if url.endswith('/clickhouse'):
if url.endswith("/clickhouse"):
build_url = url
break
else:
@ -72,16 +83,20 @@ if __name__ == "__main__":
logging.info("Got build url %s", build_url)
workspace_path = os.path.join(temp_path, 'workspace')
workspace_path = os.path.join(temp_path, "workspace")
if not os.path.exists(workspace_path):
os.makedirs(workspace_path)
run_command = get_run_command(pr_info.number, pr_info.sha, build_url, workspace_path, docker_image)
run_command = get_run_command(
pr_info.number, pr_info.sha, build_url, workspace_path, docker_image
)
logging.info("Going to run %s", run_command)
run_log_path = os.path.join(temp_path, "runlog.log")
with open(run_log_path, 'w', encoding='utf-8') as log:
with subprocess.Popen(run_command, shell=True, stderr=log, stdout=log) as process:
with open(run_log_path, "w", encoding="utf-8") as log:
with subprocess.Popen(
run_command, shell=True, stderr=log, stdout=log
) as process:
retcode = process.wait()
if retcode == 0:
logging.info("Run successfully")
@ -90,56 +105,70 @@ if __name__ == "__main__":
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
check_name_lower = check_name.lower().replace('(', '').replace(')', '').replace(' ', '')
s3_prefix = f'{pr_info.number}/{pr_info.sha}/fuzzer_{check_name_lower}/'
check_name_lower = (
check_name.lower().replace("(", "").replace(")", "").replace(" ", "")
)
s3_prefix = f"{pr_info.number}/{pr_info.sha}/fuzzer_{check_name_lower}/"
paths = {
'runlog.log': run_log_path,
'main.log': os.path.join(workspace_path, 'main.log'),
'server.log': os.path.join(workspace_path, 'server.log'),
'fuzzer.log': os.path.join(workspace_path, 'fuzzer.log'),
'report.html': os.path.join(workspace_path, 'report.html'),
'core.gz': os.path.join(workspace_path, 'core.gz'),
"runlog.log": run_log_path,
"main.log": os.path.join(workspace_path, "main.log"),
"server.log": os.path.join(workspace_path, "server.log"),
"fuzzer.log": os.path.join(workspace_path, "fuzzer.log"),
"report.html": os.path.join(workspace_path, "report.html"),
"core.gz": os.path.join(workspace_path, "core.gz"),
}
s3_helper = S3Helper('https://s3.amazonaws.com')
s3_helper = S3Helper("https://s3.amazonaws.com")
for f in paths:
try:
paths[f] = s3_helper.upload_test_report_to_s3(paths[f], s3_prefix + '/' + f)
paths[f] = s3_helper.upload_test_report_to_s3(paths[f], s3_prefix + "/" + f)
except Exception as ex:
logging.info("Exception uploading file %s text %s", f, ex)
paths[f] = ''
paths[f] = ""
report_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}"
if paths['runlog.log']:
report_url = paths['runlog.log']
if paths['main.log']:
report_url = paths['main.log']
if paths['server.log']:
report_url = paths['server.log']
if paths['fuzzer.log']:
report_url = paths['fuzzer.log']
if paths['report.html']:
report_url = paths['report.html']
if paths["runlog.log"]:
report_url = paths["runlog.log"]
if paths["main.log"]:
report_url = paths["main.log"]
if paths["server.log"]:
report_url = paths["server.log"]
if paths["fuzzer.log"]:
report_url = paths["fuzzer.log"]
if paths["report.html"]:
report_url = paths["report.html"]
# Try to get status message saved by the fuzzer
try:
with open(os.path.join(workspace_path, 'status.txt'), 'r', encoding='utf-8') as status_f:
status = status_f.readline().rstrip('\n')
with open(
os.path.join(workspace_path, "status.txt"), "r", encoding="utf-8"
) as status_f:
status = status_f.readline().rstrip("\n")
with open(os.path.join(workspace_path, 'description.txt'), 'r', encoding='utf-8') as desc_f:
description = desc_f.readline().rstrip('\n')[:140]
with open(
os.path.join(workspace_path, "description.txt"), "r", encoding="utf-8"
) as desc_f:
description = desc_f.readline().rstrip("\n")[:140]
except:
status = 'failure'
description = 'Task failed: $?=' + str(retcode)
status = "failure"
description = "Task failed: $?=" + str(retcode)
if 'fail' in status:
test_result = [(description, 'FAIL')]
if "fail" in status:
test_result = [(description, "FAIL")]
else:
test_result = [(description, 'OK')]
test_result = [(description, "OK")]
ch_helper = ClickHouseHelper()
prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_result, status, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name)
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,
test_result,
status,
stopwatch.duration_seconds,
stopwatch.start_time_str,
report_url,
check_name,
)
logging.info("Result: '%s', '%s', '%s'", status, description, report_url)
print(f"::notice ::Report url: {report_url}")

View File

@ -0,0 +1,48 @@
#!/usr/bin/env python3
import argparse
import csv
import itertools
import os
import sys
NO_CHANGES_MSG = "Nothing to run"
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("report1")
parser.add_argument("report2")
return parser.parse_args()
def post_commit_status_from_file(file_path):
res = []
with open(file_path, "r", encoding="utf-8") as f:
fin = csv.reader(f, delimiter="\t")
res = list(itertools.islice(fin, 1))
if len(res) < 1:
raise Exception(f'Can\'t read from "{file_path}"')
if len(res[0]) != 3:
raise Exception(f'Can\'t read from "{file_path}"')
return res[0]
def process_results(file_path):
state, report_url, description = post_commit_status_from_file(file_path)
prefix = os.path.basename(os.path.dirname(file_path))
print(
f"::notice:: bugfix check: {prefix} - {state}: {description} Report url: {report_url}"
)
return state == "success"
def main(args):
is_ok = False
is_ok = process_results(args.report1) or is_ok
is_ok = process_results(args.report2) or is_ok
sys.exit(0 if is_ok else 1)
if __name__ == "__main__":
main(parse_args())

View File

@ -21,6 +21,8 @@ from ci_config import CI_CONFIG, BuildConfig
from docker_pull_helper import get_image_with_version
from tee_popen import TeePopen
IMAGE_NAME = "clickhouse/binary-builder"
def get_build_config(build_check_name: str, build_name: str) -> BuildConfig:
if build_check_name == "ClickHouse build check (actions)":
@ -52,7 +54,6 @@ def get_packager_cmd(
build_version: str,
image_version: str,
ccache_path: str,
pr_info: PRInfo,
) -> str:
package_type = build_config["package_type"]
comp = build_config["compiler"]
@ -73,9 +74,8 @@ def get_packager_cmd(
cmd += " --cache=ccache"
cmd += " --ccache_dir={}".format(ccache_path)
if "alien_pkgs" in build_config and build_config["alien_pkgs"]:
if pr_info.number == 0 or "release" in pr_info.labels:
cmd += " --alien-pkgs rpm tgz"
if "additional_pkgs" in build_config and build_config["additional_pkgs"]:
cmd += " --additional-pkgs"
cmd += " --docker-image-version={}".format(image_version)
cmd += " --version={}".format(build_version)
@ -86,13 +86,6 @@ def get_packager_cmd(
return cmd
def get_image_name(build_config: BuildConfig) -> str:
if build_config["package_type"] != "deb":
return "clickhouse/binary-builder"
else:
return "clickhouse/deb-builder"
def build_clickhouse(
packager_cmd: str, logs_path: str, build_output_path: str
) -> Tuple[str, bool]:
@ -256,8 +249,7 @@ def main():
else:
sys.exit(0)
image_name = get_image_name(build_config)
docker_image = get_image_with_version(IMAGES_PATH, image_name)
docker_image = get_image_with_version(IMAGES_PATH, IMAGE_NAME)
image_version = docker_image.version
logging.info("Got version from repo %s", version.string)
@ -298,7 +290,6 @@ def main():
version.string,
image_version,
ccache_path,
pr_info,
)
logging.info("Going to run packager with %s", packager_cmd)

View File

@ -6,7 +6,13 @@ import os
import sys
from github import Github
from env_helper import REPORTS_PATH, TEMP_PATH, GITHUB_REPOSITORY, GITHUB_SERVER_URL, GITHUB_RUN_ID
from env_helper import (
REPORTS_PATH,
TEMP_PATH,
GITHUB_REPOSITORY,
GITHUB_SERVER_URL,
GITHUB_RUN_ID,
)
from report import create_build_html_report
from s3_helper import S3Helper
from get_robot_token import get_best_robot_token
@ -15,8 +21,19 @@ from commit_status_helper import get_commit
from ci_config import CI_CONFIG
from rerun_helper import RerunHelper
class BuildResult():
def __init__(self, compiler, build_type, sanitizer, bundled, splitted, status, elapsed_seconds, with_coverage):
class BuildResult:
def __init__(
self,
compiler,
build_type,
sanitizer,
bundled,
splitted,
status,
elapsed_seconds,
with_coverage,
):
self.compiler = compiler
self.build_type = build_type
self.sanitizer = sanitizer
@ -26,54 +43,72 @@ class BuildResult():
self.elapsed_seconds = elapsed_seconds
self.with_coverage = with_coverage
def group_by_artifacts(build_urls):
groups = {'deb': [], 'binary': [], 'tgz': [], 'rpm': [], 'performance': []}
groups = {
"apk": [],
"deb": [],
"binary": [],
"tgz": [],
"rpm": [],
"performance": [],
}
for url in build_urls:
if url.endswith('performance.tgz'):
groups['performance'].append(url)
elif url.endswith('.deb') or url.endswith('.buildinfo') or url.endswith('.changes') or url.endswith('.tar.gz'):
groups['deb'].append(url)
elif url.endswith('.rpm'):
groups['rpm'].append(url)
elif url.endswith('.tgz'):
groups['tgz'].append(url)
if url.endswith("performance.tgz"):
groups["performance"].append(url)
elif (
url.endswith(".deb")
or url.endswith(".buildinfo")
or url.endswith(".changes")
or url.endswith(".tar.gz")
):
groups["deb"].append(url)
elif url.endswith(".apk"):
groups["apk"].append(url)
elif url.endswith(".rpm"):
groups["rpm"].append(url)
elif url.endswith(".tgz"):
groups["tgz"].append(url)
else:
groups['binary'].append(url)
groups["binary"].append(url)
return groups
def process_report(build_report):
build_config = build_report['build_config']
build_config = build_report["build_config"]
build_result = BuildResult(
compiler=build_config['compiler'],
build_type=build_config['build_type'],
sanitizer=build_config['sanitizer'],
bundled=build_config['bundled'],
splitted=build_config['splitted'],
status="success" if build_report['status'] else "failure",
elapsed_seconds=build_report['elapsed_seconds'],
with_coverage=False
compiler=build_config["compiler"],
build_type=build_config["build_type"],
sanitizer=build_config["sanitizer"],
bundled=build_config["bundled"],
splitted=build_config["splitted"],
status="success" if build_report["status"] else "failure",
elapsed_seconds=build_report["elapsed_seconds"],
with_coverage=False,
)
build_results = []
build_urls = []
build_logs_urls = []
urls_groups = group_by_artifacts(build_report['build_urls'])
urls_groups = group_by_artifacts(build_report["build_urls"])
found_group = False
for _, group_urls in urls_groups.items():
if group_urls:
build_results.append(build_result)
build_urls.append(group_urls)
build_logs_urls.append(build_report['log_url'])
build_logs_urls.append(build_report["log_url"])
found_group = True
if not found_group:
build_results.append(build_result)
build_urls.append([""])
build_logs_urls.append(build_report['log_url'])
build_logs_urls.append(build_report["log_url"])
return build_results, build_urls, build_logs_urls
def get_build_name_from_file_name(file_name):
return file_name.replace('build_urls_', '').replace('.json', '')
return file_name.replace("build_urls_", "").replace(".json", "")
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
@ -99,17 +134,25 @@ if __name__ == "__main__":
build_reports_map = {}
for root, dirs, files in os.walk(reports_path):
for f in files:
if f.startswith("build_urls_") and f.endswith('.json'):
if f.startswith("build_urls_") and f.endswith(".json"):
logging.info("Found build report json %s", f)
build_name = get_build_name_from_file_name(f)
if build_name in reports_order:
with open(os.path.join(root, f), 'r') as file_handler:
with open(os.path.join(root, f), "r") as file_handler:
build_report = json.load(file_handler)
build_reports_map[build_name] = build_report
else:
logging.info("Skipping report %s for build %s, it's not in our reports list", f, build_name)
logging.info(
"Skipping report %s for build %s, it's not in our reports list",
f,
build_name,
)
build_reports = [build_reports_map[build_name] for build_name in reports_order if build_name in build_reports_map]
build_reports = [
build_reports_map[build_name]
for build_name in reports_order
if build_name in build_reports_map
]
build_results = []
build_artifacts = []
@ -127,7 +170,7 @@ if __name__ == "__main__":
logging.info("No builds, failing check")
sys.exit(1)
s3_helper = S3Helper('https://s3.amazonaws.com')
s3_helper = S3Helper("https://s3.amazonaws.com")
pr_info = PRInfo()
@ -137,7 +180,9 @@ if __name__ == "__main__":
branch_name = "PR #{}".format(pr_info.number)
branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/pull/{pr_info.number}"
commit_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commit/{pr_info.sha}"
task_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID or '0'}"
task_url = (
f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID or '0'}"
)
report = create_build_html_report(
build_check_name,
build_results,
@ -146,18 +191,22 @@ if __name__ == "__main__":
task_url,
branch_url,
branch_name,
commit_url
commit_url,
)
report_path = os.path.join(temp_path, 'report.html')
with open(report_path, 'w') as f:
report_path = os.path.join(temp_path, "report.html")
with open(report_path, "w") as f:
f.write(report)
logging.info("Going to upload prepared report")
context_name_for_path = build_check_name.lower().replace(' ', '_')
s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + context_name_for_path
context_name_for_path = build_check_name.lower().replace(" ", "_")
s3_path_prefix = (
str(pr_info.number) + "/" + pr_info.sha + "/" + context_name_for_path
)
url = s3_helper.upload_build_file_to_s3(report_path, s3_path_prefix + "/report.html")
url = s3_helper.upload_build_file_to_s3(
report_path, s3_path_prefix + "/report.html"
)
logging.info("Report url %s", url)
total_builds = len(build_results)
@ -180,4 +229,9 @@ if __name__ == "__main__":
print("::notice ::Report url: {}".format(url))
commit = get_commit(gh, pr_info.sha)
commit.create_status(context=build_check_name, description=description, state=summary_status, target_url=url)
commit.create_status(
context=build_check_name,
description=description,
state=summary_status,
target_url=url,
)

View File

@ -13,16 +13,19 @@ from compress_files import decompress_fast, compress_fast
DOWNLOAD_RETRIES_COUNT = 5
def dowload_file_with_progress(url, path):
logging.info("Downloading from %s to temp path %s", url, path)
for i in range(DOWNLOAD_RETRIES_COUNT):
try:
with open(path, 'wb') as f:
with open(path, "wb") as f:
response = requests.get(url, stream=True)
response.raise_for_status()
total_length = response.headers.get('content-length')
total_length = response.headers.get("content-length")
if total_length is None or int(total_length) == 0:
logging.info("No content-length, will download file without progress")
logging.info(
"No content-length, will download file without progress"
)
f.write(response.content)
else:
dl = 0
@ -34,8 +37,8 @@ def dowload_file_with_progress(url, path):
if sys.stdout.isatty():
done = int(50 * dl / total_length)
percent = int(100 * float(dl) / total_length)
eq_str = '=' * done
space_str = ' ' * (50 - done)
eq_str = "=" * done
space_str = " " * (50 - done)
sys.stdout.write(f"\r[{eq_str}{space_str}] {percent}%")
sys.stdout.flush()
break
@ -52,7 +55,9 @@ def dowload_file_with_progress(url, path):
logging.info("Downloading finished")
def get_ccache_if_not_exists(path_to_ccache_dir, s3_helper, current_pr_number, temp_path):
def get_ccache_if_not_exists(
path_to_ccache_dir, s3_helper, current_pr_number, temp_path
):
ccache_name = os.path.basename(path_to_ccache_dir)
cache_found = False
prs_to_check = [current_pr_number]
@ -93,13 +98,16 @@ def get_ccache_if_not_exists(path_to_ccache_dir, s3_helper, current_pr_number, t
else:
logging.info("ccache downloaded")
def upload_ccache(path_to_ccache_dir, s3_helper, current_pr_number, temp_path):
logging.info("Uploading cache %s for pr %s", path_to_ccache_dir, current_pr_number)
ccache_name = os.path.basename(path_to_ccache_dir)
compressed_cache_path = os.path.join(temp_path, ccache_name + ".tar.gz")
compress_fast(path_to_ccache_dir, compressed_cache_path)
s3_path = str(current_pr_number) + "/ccaches/" + os.path.basename(compressed_cache_path)
s3_path = (
str(current_pr_number) + "/ccaches/" + os.path.basename(compressed_cache_path)
)
logging.info("Will upload %s to path %s", compressed_cache_path, s3_path)
s3_helper.upload_build_file_to_s3(compressed_cache_path, s3_path)
logging.info("Upload finished")

View File

@ -20,21 +20,29 @@ if __name__ == "__main__":
if not os.path.exists(temp_path):
os.makedirs(temp_path)
sys.path.append(os.path.join(repo_path, "utils/github"))
with SSHKey("ROBOT_CLICKHOUSE_SSH_KEY"):
token = get_parameter_from_ssm("github_robot_token_1")
bp = Backport(token, os.environ.get("REPO_OWNER"), os.environ.get("REPO_NAME"), os.environ.get("REPO_TEAM"))
bp = Backport(
token,
os.environ.get("REPO_OWNER"),
os.environ.get("REPO_NAME"),
os.environ.get("REPO_TEAM"),
)
def cherrypick_run(token, pr, branch):
return CherryPick(token,
os.environ.get("REPO_OWNER"), os.environ.get("REPO_NAME"),
os.environ.get("REPO_TEAM"), pr, branch
).execute(repo_path, False)
return CherryPick(
token,
os.environ.get("REPO_OWNER"),
os.environ.get("REPO_NAME"),
os.environ.get("REPO_TEAM"),
pr,
branch,
).execute(repo_path, False)
try:
bp.execute(repo_path, 'origin', None, cherrypick_run)
bp.execute(repo_path, "origin", None, cherrypick_run)
except subprocess.CalledProcessError as e:
logging.error(e.output)

View File

@ -17,7 +17,9 @@ import sys
class Backport:
def __init__(self, token, owner, name, team):
self._gh = RemoteRepo(token, owner=owner, name=name, team=team, max_page_size=30, min_page_size=7)
self._gh = RemoteRepo(
token, owner=owner, name=name, team=team, max_page_size=30, min_page_size=7
)
self._token = token
self.default_branch_name = self._gh.default_branch
self.ssh_url = self._gh.ssh_url
@ -28,7 +30,7 @@ class Backport:
def getBranchesWithRelease(self):
branches = set()
for pull_request in self._gh.find_pull_requests("release"):
branches.add(pull_request['headRefName'])
branches.add(pull_request["headRefName"])
return branches
def execute(self, repo, upstream, until_commit, run_cherrypick):
@ -44,11 +46,11 @@ class Backport:
branches.append(branch)
if not branches:
logging.info('No release branches found!')
logging.info("No release branches found!")
return
for branch in branches:
logging.info('Found release branch: %s', branch[0])
logging.info("Found release branch: %s", branch[0])
if not until_commit:
until_commit = branches[0][1]
@ -56,73 +58,128 @@ class Backport:
backport_map = {}
RE_MUST_BACKPORT = re.compile(r'^v(\d+\.\d+)-must-backport$')
RE_NO_BACKPORT = re.compile(r'^v(\d+\.\d+)-no-backport$')
RE_BACKPORTED = re.compile(r'^v(\d+\.\d+)-backported$')
RE_MUST_BACKPORT = re.compile(r"^v(\d+\.\d+)-must-backport$")
RE_NO_BACKPORT = re.compile(r"^v(\d+\.\d+)-no-backport$")
RE_BACKPORTED = re.compile(r"^v(\d+\.\d+)-backported$")
# pull-requests are sorted by ancestry from the most recent.
for pr in pull_requests:
while repo.comparator(branches[-1][1]) >= repo.comparator(pr['mergeCommit']['oid']):
logging.info("PR #{} is already inside {}. Dropping this branch for further PRs".format(pr['number'], branches[-1][0]))
while repo.comparator(branches[-1][1]) >= repo.comparator(
pr["mergeCommit"]["oid"]
):
logging.info(
"PR #{} is already inside {}. Dropping this branch for further PRs".format(
pr["number"], branches[-1][0]
)
)
branches.pop()
logging.info("Processing PR #{}".format(pr['number']))
logging.info("Processing PR #{}".format(pr["number"]))
assert len(branches)
branch_set = set([branch[0] for branch in branches])
# First pass. Find all must-backports
for label in pr['labels']['nodes']:
if label['name'] == 'pr-must-backport':
backport_map[pr['number']] = branch_set.copy()
for label in pr["labels"]["nodes"]:
if label["name"] == "pr-must-backport":
backport_map[pr["number"]] = branch_set.copy()
continue
matched = RE_MUST_BACKPORT.match(label['name'])
matched = RE_MUST_BACKPORT.match(label["name"])
if matched:
if pr['number'] not in backport_map:
backport_map[pr['number']] = set()
backport_map[pr['number']].add(matched.group(1))
if pr["number"] not in backport_map:
backport_map[pr["number"]] = set()
backport_map[pr["number"]].add(matched.group(1))
# Second pass. Find all no-backports
for label in pr['labels']['nodes']:
if label['name'] == 'pr-no-backport' and pr['number'] in backport_map:
del backport_map[pr['number']]
for label in pr["labels"]["nodes"]:
if label["name"] == "pr-no-backport" and pr["number"] in backport_map:
del backport_map[pr["number"]]
break
matched_no_backport = RE_NO_BACKPORT.match(label['name'])
matched_backported = RE_BACKPORTED.match(label['name'])
if matched_no_backport and pr['number'] in backport_map and matched_no_backport.group(1) in backport_map[pr['number']]:
backport_map[pr['number']].remove(matched_no_backport.group(1))
logging.info('\tskipping %s because of forced no-backport', matched_no_backport.group(1))
elif matched_backported and pr['number'] in backport_map and matched_backported.group(1) in backport_map[pr['number']]:
backport_map[pr['number']].remove(matched_backported.group(1))
logging.info('\tskipping %s because it\'s already backported manually', matched_backported.group(1))
matched_no_backport = RE_NO_BACKPORT.match(label["name"])
matched_backported = RE_BACKPORTED.match(label["name"])
if (
matched_no_backport
and pr["number"] in backport_map
and matched_no_backport.group(1) in backport_map[pr["number"]]
):
backport_map[pr["number"]].remove(matched_no_backport.group(1))
logging.info(
"\tskipping %s because of forced no-backport",
matched_no_backport.group(1),
)
elif (
matched_backported
and pr["number"] in backport_map
and matched_backported.group(1) in backport_map[pr["number"]]
):
backport_map[pr["number"]].remove(matched_backported.group(1))
logging.info(
"\tskipping %s because it's already backported manually",
matched_backported.group(1),
)
for pr, branches in list(backport_map.items()):
logging.info('PR #%s needs to be backported to:', pr)
logging.info("PR #%s needs to be backported to:", pr)
for branch in branches:
logging.info('\t%s, and the status is: %s', branch, run_cherrypick(self._token, pr, branch))
logging.info(
"\t%s, and the status is: %s",
branch,
run_cherrypick(self._token, pr, branch),
)
# print API costs
logging.info('\nGitHub API total costs per query:')
logging.info("\nGitHub API total costs per query:")
for name, value in list(self._gh.api_costs.items()):
logging.info('%s : %s', name, value)
logging.info("%s : %s", name, value)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--token', type=str, required=True, help='token for Github access')
parser.add_argument('--repo', type=str, required=True, help='path to full repository', metavar='PATH')
parser.add_argument('--til', type=str, help='check PRs from HEAD til this commit', metavar='COMMIT')
parser.add_argument('--dry-run', action='store_true', help='do not create or merge any PRs', default=False)
parser.add_argument('--verbose', '-v', action='store_true', help='more verbose output', default=False)
parser.add_argument('--upstream', '-u', type=str, help='remote name of upstream in repository', default='origin')
parser.add_argument(
"--token", type=str, required=True, help="token for Github access"
)
parser.add_argument(
"--repo",
type=str,
required=True,
help="path to full repository",
metavar="PATH",
)
parser.add_argument(
"--til", type=str, help="check PRs from HEAD til this commit", metavar="COMMIT"
)
parser.add_argument(
"--dry-run",
action="store_true",
help="do not create or merge any PRs",
default=False,
)
parser.add_argument(
"--verbose",
"-v",
action="store_true",
help="more verbose output",
default=False,
)
parser.add_argument(
"--upstream",
"-u",
type=str,
help="remote name of upstream in repository",
default="origin",
)
args = parser.parse_args()
if args.verbose:
logging.basicConfig(format='%(message)s', stream=sys.stdout, level=logging.DEBUG)
logging.basicConfig(
format="%(message)s", stream=sys.stdout, level=logging.DEBUG
)
else:
logging.basicConfig(format='%(message)s', stream=sys.stdout, level=logging.INFO)
logging.basicConfig(format="%(message)s", stream=sys.stdout, level=logging.INFO)
cherrypick_run = lambda token, pr, branch: CherryPick(token, 'ClickHouse', 'ClickHouse', 'core', pr, branch).execute(args.repo, args.dry_run)
bp = Backport(args.token, 'ClickHouse', 'ClickHouse', 'core')
cherrypick_run = lambda token, pr, branch: CherryPick(
token, "ClickHouse", "ClickHouse", "core", pr, branch
).execute(args.repo, args.dry_run)
bp = Backport(args.token, "ClickHouse", "ClickHouse", "core")
bp.execute(args.repo, args.upstream, args.til, cherrypick_run)

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
'''
"""
Backports changes from PR to release branch.
Requires multiple separate runs as part of the implementation.
@ -12,7 +12,7 @@ First run should do the following:
Second run checks PR from previous run to be merged or at least being mergeable. If it's not merged then try to merge it.
Third run creates PR from backport branch (with merged previous PR) to release branch.
'''
"""
try:
from clickhouse.utils.github.query import Query as RemoteRepo
@ -29,13 +29,13 @@ import sys
class CherryPick:
class Status(Enum):
DISCARDED = 'discarded'
NOT_INITIATED = 'not started'
FIRST_MERGEABLE = 'waiting for 1st stage'
FIRST_CONFLICTS = 'conflicts on 1st stage'
SECOND_MERGEABLE = 'waiting for 2nd stage'
SECOND_CONFLICTS = 'conflicts on 2nd stage'
MERGED = 'backported'
DISCARDED = "discarded"
NOT_INITIATED = "not started"
FIRST_MERGEABLE = "waiting for 1st stage"
FIRST_CONFLICTS = "conflicts on 1st stage"
SECOND_MERGEABLE = "waiting for 2nd stage"
SECOND_CONFLICTS = "conflicts on 2nd stage"
MERGED = "backported"
def _run(self, args):
out = subprocess.check_output(args).rstrip()
@ -50,51 +50,90 @@ class CherryPick:
# TODO: check if pull-request is merged.
self.merge_commit_oid = self._pr['mergeCommit']['oid']
self.merge_commit_oid = self._pr["mergeCommit"]["oid"]
self.target_branch = target_branch
self.backport_branch = 'backport/{branch}/{pr}'.format(branch=target_branch, pr=pr_number)
self.cherrypick_branch = 'cherrypick/{branch}/{oid}'.format(branch=target_branch, oid=self.merge_commit_oid)
self.backport_branch = "backport/{branch}/{pr}".format(
branch=target_branch, pr=pr_number
)
self.cherrypick_branch = "cherrypick/{branch}/{oid}".format(
branch=target_branch, oid=self.merge_commit_oid
)
def getCherryPickPullRequest(self):
return self._gh.find_pull_request(base=self.backport_branch, head=self.cherrypick_branch)
return self._gh.find_pull_request(
base=self.backport_branch, head=self.cherrypick_branch
)
def createCherryPickPullRequest(self, repo_path):
DESCRIPTION = (
'This pull-request is a first step of an automated backporting.\n'
'It contains changes like after calling a local command `git cherry-pick`.\n'
'If you intend to continue backporting this changes, then resolve all conflicts if any.\n'
'Otherwise, if you do not want to backport them, then just close this pull-request.\n'
'\n'
'The check results does not matter at this step - you can safely ignore them.\n'
'Also this pull-request will be merged automatically as it reaches the mergeable state, but you always can merge it manually.\n'
"This pull-request is a first step of an automated backporting.\n"
"It contains changes like after calling a local command `git cherry-pick`.\n"
"If you intend to continue backporting this changes, then resolve all conflicts if any.\n"
"Otherwise, if you do not want to backport them, then just close this pull-request.\n"
"\n"
"The check results does not matter at this step - you can safely ignore them.\n"
"Also this pull-request will be merged automatically as it reaches the mergeable state, but you always can merge it manually.\n"
)
# FIXME: replace with something better than os.system()
git_prefix = ['git', '-C', repo_path, '-c', 'user.email=robot-clickhouse@yandex-team.ru', '-c', 'user.name=robot-clickhouse']
base_commit_oid = self._pr['mergeCommit']['parents']['nodes'][0]['oid']
git_prefix = [
"git",
"-C",
repo_path,
"-c",
"user.email=robot-clickhouse@yandex-team.ru",
"-c",
"user.name=robot-clickhouse",
]
base_commit_oid = self._pr["mergeCommit"]["parents"]["nodes"][0]["oid"]
# Create separate branch for backporting, and make it look like real cherry-pick.
self._run(git_prefix + ['checkout', '-f', self.target_branch])
self._run(git_prefix + ['checkout', '-B', self.backport_branch])
self._run(git_prefix + ['merge', '-s', 'ours', '--no-edit', base_commit_oid])
self._run(git_prefix + ["checkout", "-f", self.target_branch])
self._run(git_prefix + ["checkout", "-B", self.backport_branch])
self._run(git_prefix + ["merge", "-s", "ours", "--no-edit", base_commit_oid])
# Create secondary branch to allow pull request with cherry-picked commit.
self._run(git_prefix + ['branch', '-f', self.cherrypick_branch, self.merge_commit_oid])
self._run(
git_prefix + ["branch", "-f", self.cherrypick_branch, self.merge_commit_oid]
)
self._run(git_prefix + ['push', '-f', 'origin', '{branch}:{branch}'.format(branch=self.backport_branch)])
self._run(git_prefix + ['push', '-f', 'origin', '{branch}:{branch}'.format(branch=self.cherrypick_branch)])
self._run(
git_prefix
+ [
"push",
"-f",
"origin",
"{branch}:{branch}".format(branch=self.backport_branch),
]
)
self._run(
git_prefix
+ [
"push",
"-f",
"origin",
"{branch}:{branch}".format(branch=self.cherrypick_branch),
]
)
# Create pull-request like a local cherry-pick
pr = self._gh.create_pull_request(source=self.cherrypick_branch, target=self.backport_branch,
title='Cherry pick #{number} to {target}: {title}'.format(
number=self._pr['number'], target=self.target_branch,
title=self._pr['title'].replace('"', '\\"')),
description='Original pull-request #{}\n\n{}'.format(self._pr['number'], DESCRIPTION))
pr = self._gh.create_pull_request(
source=self.cherrypick_branch,
target=self.backport_branch,
title="Cherry pick #{number} to {target}: {title}".format(
number=self._pr["number"],
target=self.target_branch,
title=self._pr["title"].replace('"', '\\"'),
),
description="Original pull-request #{}\n\n{}".format(
self._pr["number"], DESCRIPTION
),
)
# FIXME: use `team` to leave a single eligible assignee.
self._gh.add_assignee(pr, self._pr['author'])
self._gh.add_assignee(pr, self._pr['mergedBy'])
self._gh.add_assignee(pr, self._pr["author"])
self._gh.add_assignee(pr, self._pr["mergedBy"])
self._gh.set_label(pr, "do not test")
self._gh.set_label(pr, "pr-cherrypick")
@ -102,36 +141,76 @@ class CherryPick:
return pr
def mergeCherryPickPullRequest(self, cherrypick_pr):
return self._gh.merge_pull_request(cherrypick_pr['id'])
return self._gh.merge_pull_request(cherrypick_pr["id"])
def getBackportPullRequest(self):
return self._gh.find_pull_request(base=self.target_branch, head=self.backport_branch)
return self._gh.find_pull_request(
base=self.target_branch, head=self.backport_branch
)
def createBackportPullRequest(self, cherrypick_pr, repo_path):
DESCRIPTION = (
'This pull-request is a last step of an automated backporting.\n'
'Treat it as a standard pull-request: look at the checks and resolve conflicts.\n'
'Merge it only if you intend to backport changes to the target branch, otherwise just close it.\n'
"This pull-request is a last step of an automated backporting.\n"
"Treat it as a standard pull-request: look at the checks and resolve conflicts.\n"
"Merge it only if you intend to backport changes to the target branch, otherwise just close it.\n"
)
git_prefix = ['git', '-C', repo_path, '-c', 'user.email=robot-clickhouse@clickhouse.com', '-c', 'user.name=robot-clickhouse']
git_prefix = [
"git",
"-C",
repo_path,
"-c",
"user.email=robot-clickhouse@clickhouse.com",
"-c",
"user.name=robot-clickhouse",
]
pr_title = 'Backport #{number} to {target}: {title}'.format(
number=self._pr['number'], target=self.target_branch,
title=self._pr['title'].replace('"', '\\"'))
pr_title = "Backport #{number} to {target}: {title}".format(
number=self._pr["number"],
target=self.target_branch,
title=self._pr["title"].replace('"', '\\"'),
)
self._run(git_prefix + ['checkout', '-f', self.backport_branch])
self._run(git_prefix + ['pull', '--ff-only', 'origin', self.backport_branch])
self._run(git_prefix + ['reset', '--soft', self._run(git_prefix + ['merge-base', 'origin/' + self.target_branch, self.backport_branch])])
self._run(git_prefix + ['commit', '-a', '--allow-empty', '-m', pr_title])
self._run(git_prefix + ['push', '-f', 'origin', '{branch}:{branch}'.format(branch=self.backport_branch)])
self._run(git_prefix + ["checkout", "-f", self.backport_branch])
self._run(git_prefix + ["pull", "--ff-only", "origin", self.backport_branch])
self._run(
git_prefix
+ [
"reset",
"--soft",
self._run(
git_prefix
+ [
"merge-base",
"origin/" + self.target_branch,
self.backport_branch,
]
),
]
)
self._run(git_prefix + ["commit", "-a", "--allow-empty", "-m", pr_title])
self._run(
git_prefix
+ [
"push",
"-f",
"origin",
"{branch}:{branch}".format(branch=self.backport_branch),
]
)
pr = self._gh.create_pull_request(source=self.backport_branch, target=self.target_branch, title=pr_title,
description='Original pull-request #{}\nCherry-pick pull-request #{}\n\n{}'.format(self._pr['number'], cherrypick_pr['number'], DESCRIPTION))
pr = self._gh.create_pull_request(
source=self.backport_branch,
target=self.target_branch,
title=pr_title,
description="Original pull-request #{}\nCherry-pick pull-request #{}\n\n{}".format(
self._pr["number"], cherrypick_pr["number"], DESCRIPTION
),
)
# FIXME: use `team` to leave a single eligible assignee.
self._gh.add_assignee(pr, self._pr['author'])
self._gh.add_assignee(pr, self._pr['mergedBy'])
self._gh.add_assignee(pr, self._pr["author"])
self._gh.add_assignee(pr, self._pr["mergedBy"])
self._gh.set_label(pr, "pr-backport")
@ -142,23 +221,43 @@ class CherryPick:
if not pr1:
if not dry_run:
pr1 = self.createCherryPickPullRequest(repo_path)
logging.debug('Created PR with cherry-pick of %s to %s: %s', self._pr['number'], self.target_branch, pr1['url'])
logging.debug(
"Created PR with cherry-pick of %s to %s: %s",
self._pr["number"],
self.target_branch,
pr1["url"],
)
else:
return CherryPick.Status.NOT_INITIATED
else:
logging.debug('Found PR with cherry-pick of %s to %s: %s', self._pr['number'], self.target_branch, pr1['url'])
logging.debug(
"Found PR with cherry-pick of %s to %s: %s",
self._pr["number"],
self.target_branch,
pr1["url"],
)
if not pr1['merged'] and pr1['mergeable'] == 'MERGEABLE' and not pr1['closed']:
if not pr1["merged"] and pr1["mergeable"] == "MERGEABLE" and not pr1["closed"]:
if not dry_run:
pr1 = self.mergeCherryPickPullRequest(pr1)
logging.debug('Merged PR with cherry-pick of %s to %s: %s', self._pr['number'], self.target_branch, pr1['url'])
logging.debug(
"Merged PR with cherry-pick of %s to %s: %s",
self._pr["number"],
self.target_branch,
pr1["url"],
)
if not pr1['merged']:
logging.debug('Waiting for PR with cherry-pick of %s to %s: %s', self._pr['number'], self.target_branch, pr1['url'])
if not pr1["merged"]:
logging.debug(
"Waiting for PR with cherry-pick of %s to %s: %s",
self._pr["number"],
self.target_branch,
pr1["url"],
)
if pr1['closed']:
if pr1["closed"]:
return CherryPick.Status.DISCARDED
elif pr1['mergeable'] == 'CONFLICTING':
elif pr1["mergeable"] == "CONFLICTING":
return CherryPick.Status.FIRST_CONFLICTS
else:
return CherryPick.Status.FIRST_MERGEABLE
@ -167,31 +266,58 @@ class CherryPick:
if not pr2:
if not dry_run:
pr2 = self.createBackportPullRequest(pr1, repo_path)
logging.debug('Created PR with backport of %s to %s: %s', self._pr['number'], self.target_branch, pr2['url'])
logging.debug(
"Created PR with backport of %s to %s: %s",
self._pr["number"],
self.target_branch,
pr2["url"],
)
else:
return CherryPick.Status.FIRST_MERGEABLE
else:
logging.debug('Found PR with backport of %s to %s: %s', self._pr['number'], self.target_branch, pr2['url'])
logging.debug(
"Found PR with backport of %s to %s: %s",
self._pr["number"],
self.target_branch,
pr2["url"],
)
if pr2['merged']:
if pr2["merged"]:
return CherryPick.Status.MERGED
elif pr2['closed']:
elif pr2["closed"]:
return CherryPick.Status.DISCARDED
elif pr2['mergeable'] == 'CONFLICTING':
elif pr2["mergeable"] == "CONFLICTING":
return CherryPick.Status.SECOND_CONFLICTS
else:
return CherryPick.Status.SECOND_MERGEABLE
if __name__ == "__main__":
logging.basicConfig(format='%(message)s', stream=sys.stdout, level=logging.DEBUG)
logging.basicConfig(format="%(message)s", stream=sys.stdout, level=logging.DEBUG)
parser = argparse.ArgumentParser()
parser.add_argument('--token', '-t', type=str, required=True, help='token for Github access')
parser.add_argument('--pr', type=str, required=True, help='PR# to cherry-pick')
parser.add_argument('--branch', '-b', type=str, required=True, help='target branch name for cherry-pick')
parser.add_argument('--repo', '-r', type=str, required=True, help='path to full repository', metavar='PATH')
parser.add_argument(
"--token", "-t", type=str, required=True, help="token for Github access"
)
parser.add_argument("--pr", type=str, required=True, help="PR# to cherry-pick")
parser.add_argument(
"--branch",
"-b",
type=str,
required=True,
help="target branch name for cherry-pick",
)
parser.add_argument(
"--repo",
"-r",
type=str,
required=True,
help="path to full repository",
metavar="PATH",
)
args = parser.parse_args()
cp = CherryPick(args.token, 'ClickHouse', 'ClickHouse', 'core', args.pr, args.branch)
cp = CherryPick(
args.token, "ClickHouse", "ClickHouse", "core", args.pr, args.branch
)
cp.execute(args.repo)

View File

@ -20,13 +20,14 @@ class RepositoryBase:
return -1
else:
return 1
self.comparator = functools.cmp_to_key(cmp)
def get_head_commit(self):
return self._repo.commit(self._default)
def iterate(self, begin, end):
rev_range = '{}...{}'.format(begin, end)
rev_range = "{}...{}".format(begin, end)
for commit in self._repo.iter_commits(rev_range, first_parent=True):
yield commit
@ -39,27 +40,35 @@ class Repository(RepositoryBase):
self._default = self._remote.refs[default_branch_name]
def get_release_branches(self):
'''
"""
Returns sorted list of tuples:
* remote branch (git.refs.remote.RemoteReference),
* base commit (git.Commit),
* head (git.Commit)).
List is sorted by commits in ascending order.
'''
"""
release_branches = []
RE_RELEASE_BRANCH_REF = re.compile(r'^refs/remotes/.+/\d+\.\d+$')
RE_RELEASE_BRANCH_REF = re.compile(r"^refs/remotes/.+/\d+\.\d+$")
for branch in [r for r in self._remote.refs if RE_RELEASE_BRANCH_REF.match(r.path)]:
for branch in [
r for r in self._remote.refs if RE_RELEASE_BRANCH_REF.match(r.path)
]:
base = self._repo.merge_base(self._default, self._repo.commit(branch))
if not base:
logging.info('Branch %s is not based on branch %s. Ignoring.', branch.path, self._default)
logging.info(
"Branch %s is not based on branch %s. Ignoring.",
branch.path,
self._default,
)
elif len(base) > 1:
logging.info('Branch %s has more than one base commit. Ignoring.', branch.path)
logging.info(
"Branch %s has more than one base commit. Ignoring.", branch.path
)
else:
release_branches.append((os.path.basename(branch.name), base[0]))
return sorted(release_branches, key=lambda x : self.comparator(x[1]))
return sorted(release_branches, key=lambda x: self.comparator(x[1]))
class BareRepository(RepositoryBase):
@ -68,24 +77,32 @@ class BareRepository(RepositoryBase):
self._default = self._repo.branches[default_branch_name]
def get_release_branches(self):
'''
"""
Returns sorted list of tuples:
* branch (git.refs.head?),
* base commit (git.Commit),
* head (git.Commit)).
List is sorted by commits in ascending order.
'''
"""
release_branches = []
RE_RELEASE_BRANCH_REF = re.compile(r'^refs/heads/\d+\.\d+$')
RE_RELEASE_BRANCH_REF = re.compile(r"^refs/heads/\d+\.\d+$")
for branch in [r for r in self._repo.branches if RE_RELEASE_BRANCH_REF.match(r.path)]:
for branch in [
r for r in self._repo.branches if RE_RELEASE_BRANCH_REF.match(r.path)
]:
base = self._repo.merge_base(self._default, self._repo.commit(branch))
if not base:
logging.info('Branch %s is not based on branch %s. Ignoring.', branch.path, self._default)
logging.info(
"Branch %s is not based on branch %s. Ignoring.",
branch.path,
self._default,
)
elif len(base) > 1:
logging.info('Branch %s has more than one base commit. Ignoring.', branch.path)
logging.info(
"Branch %s has more than one base commit. Ignoring.", branch.path
)
else:
release_branches.append((os.path.basename(branch.name), base[0]))
return sorted(release_branches, key=lambda x : self.comparator(x[1]))
return sorted(release_branches, key=lambda x: self.comparator(x[1]))

View File

@ -1,19 +1,20 @@
# -*- coding: utf-8 -*-
class Description:
'''Parsed description representation
'''
"""Parsed description representation"""
MAP_CATEGORY_TO_LABEL = {
'New Feature': 'pr-feature',
'Bug Fix': 'pr-bugfix',
'Improvement': 'pr-improvement',
'Performance Improvement': 'pr-performance',
"New Feature": "pr-feature",
"Bug Fix": "pr-bugfix",
"Improvement": "pr-improvement",
"Performance Improvement": "pr-performance",
# 'Backward Incompatible Change': doesn't match anything
'Build/Testing/Packaging Improvement': 'pr-build',
'Non-significant (changelog entry is not needed)': 'pr-non-significant',
'Non-significant (changelog entry is not required)': 'pr-non-significant',
'Non-significant': 'pr-non-significant',
'Documentation (changelog entry is not required)': 'pr-documentation',
"Build/Testing/Packaging Improvement": "pr-build",
"Non-significant (changelog entry is not needed)": "pr-non-significant",
"Non-significant (changelog entry is not required)": "pr-non-significant",
"Non-significant": "pr-non-significant",
"Documentation (changelog entry is not required)": "pr-documentation",
# 'Other': doesn't match anything
}
@ -21,7 +22,7 @@ class Description:
self.label_name = str()
self.legal = False
self._parse(pull_request['bodyText'])
self._parse(pull_request["bodyText"])
def _parse(self, text):
lines = text.splitlines()
@ -38,14 +39,17 @@ class Description:
category = stripped
next_category = False
if stripped == 'I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla/?lang=en':
if (
stripped
== "I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla/?lang=en"
):
self.legal = True
category_headers = (
'Category (leave one):',
'Changelog category (leave one):',
'Changelog category:',
'Category:'
"Category (leave one):",
"Changelog category (leave one):",
"Changelog category:",
"Category:",
)
if stripped in category_headers:
@ -55,6 +59,6 @@ class Description:
self.label_name = Description.MAP_CATEGORY_TO_LABEL[category]
else:
if not category:
print('Cannot find category in pr description')
print("Cannot find category in pr description")
else:
print(('Unknown category: ' + category))
print(("Unknown category: " + category))

View File

@ -5,11 +5,11 @@ import time
class Query:
'''
"""
Implements queries to the Github API using GraphQL
'''
"""
_PULL_REQUEST = '''
_PULL_REQUEST = """
author {{
... on User {{
id
@ -47,7 +47,7 @@ class Query:
number
title
url
'''
"""
def __init__(self, token, owner, name, team, max_page_size=100, min_page_size=10):
self._PULL_REQUEST = Query._PULL_REQUEST.format(min_page_size=min_page_size)
@ -63,14 +63,14 @@ class Query:
self.api_costs = {}
repo = self.get_repository()
self._id = repo['id']
self.ssh_url = repo['sshUrl']
self.default_branch = repo['defaultBranchRef']['name']
self._id = repo["id"]
self.ssh_url = repo["sshUrl"]
self.default_branch = repo["defaultBranchRef"]["name"]
self.members = set(self.get_members())
def get_repository(self):
_QUERY = '''
_QUERY = """
repository(owner: "{owner}" name: "{name}") {{
defaultBranchRef {{
name
@ -78,19 +78,19 @@ class Query:
id
sshUrl
}}
'''
"""
query = _QUERY.format(owner=self._owner, name=self._name)
return self._run(query)['repository']
return self._run(query)["repository"]
def get_members(self):
'''Get all team members for organization
"""Get all team members for organization
Returns:
members: a map of members' logins to ids
'''
"""
_QUERY = '''
_QUERY = """
organization(login: "{organization}") {{
team(slug: "{team}") {{
members(first: {max_page_size} {next}) {{
@ -105,43 +105,54 @@ class Query:
}}
}}
}}
'''
"""
members = {}
not_end = True
query = _QUERY.format(organization=self._owner, team=self._team,
max_page_size=self._max_page_size,
next='')
query = _QUERY.format(
organization=self._owner,
team=self._team,
max_page_size=self._max_page_size,
next="",
)
while not_end:
result = self._run(query)['organization']['team']
result = self._run(query)["organization"]["team"]
if result is None:
break
result = result['members']
not_end = result['pageInfo']['hasNextPage']
query = _QUERY.format(organization=self._owner, team=self._team,
max_page_size=self._max_page_size,
next='after: "{}"'.format(result["pageInfo"]["endCursor"]))
result = result["members"]
not_end = result["pageInfo"]["hasNextPage"]
query = _QUERY.format(
organization=self._owner,
team=self._team,
max_page_size=self._max_page_size,
next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
)
members += dict([(node['login'], node['id']) for node in result['nodes']])
members += dict([(node["login"], node["id"]) for node in result["nodes"]])
return members
def get_pull_request(self, number):
_QUERY = '''
_QUERY = """
repository(owner: "{owner}" name: "{name}") {{
pullRequest(number: {number}) {{
{pull_request_data}
}}
}}
'''
"""
query = _QUERY.format(owner=self._owner, name=self._name, number=number,
pull_request_data=self._PULL_REQUEST, min_page_size=self._min_page_size)
return self._run(query)['repository']['pullRequest']
query = _QUERY.format(
owner=self._owner,
name=self._name,
number=number,
pull_request_data=self._PULL_REQUEST,
min_page_size=self._min_page_size,
)
return self._run(query)["repository"]["pullRequest"]
def find_pull_request(self, base, head):
_QUERY = '''
_QUERY = """
repository(owner: "{owner}" name: "{name}") {{
pullRequests(first: {min_page_size} baseRefName: "{base}" headRefName: "{head}") {{
nodes {{
@ -150,21 +161,27 @@ class Query:
totalCount
}}
}}
'''
"""
query = _QUERY.format(owner=self._owner, name=self._name, base=base, head=head,
pull_request_data=self._PULL_REQUEST, min_page_size=self._min_page_size)
result = self._run(query)['repository']['pullRequests']
if result['totalCount'] > 0:
return result['nodes'][0]
query = _QUERY.format(
owner=self._owner,
name=self._name,
base=base,
head=head,
pull_request_data=self._PULL_REQUEST,
min_page_size=self._min_page_size,
)
result = self._run(query)["repository"]["pullRequests"]
if result["totalCount"] > 0:
return result["nodes"][0]
else:
return {}
def find_pull_requests(self, label_name):
'''
"""
Get all pull-requests filtered by label name
'''
_QUERY = '''
"""
_QUERY = """
repository(owner: "{owner}" name: "{name}") {{
pullRequests(first: {min_page_size} labels: "{label_name}" states: OPEN) {{
nodes {{
@ -172,18 +189,23 @@ class Query:
}}
}}
}}
'''
"""
query = _QUERY.format(owner=self._owner, name=self._name, label_name=label_name,
pull_request_data=self._PULL_REQUEST, min_page_size=self._min_page_size)
return self._run(query)['repository']['pullRequests']['nodes']
query = _QUERY.format(
owner=self._owner,
name=self._name,
label_name=label_name,
pull_request_data=self._PULL_REQUEST,
min_page_size=self._min_page_size,
)
return self._run(query)["repository"]["pullRequests"]["nodes"]
def get_pull_requests(self, before_commit):
'''
"""
Get all merged pull-requests from the HEAD of default branch to the last commit (excluding)
'''
"""
_QUERY = '''
_QUERY = """
repository(owner: "{owner}" name: "{name}") {{
defaultBranchRef {{
target {{
@ -221,44 +243,60 @@ class Query:
}}
}}
}}
'''
"""
pull_requests = []
not_end = True
query = _QUERY.format(owner=self._owner, name=self._name,
max_page_size=self._max_page_size,
min_page_size=self._min_page_size,
pull_request_data=self._PULL_REQUEST,
next='')
query = _QUERY.format(
owner=self._owner,
name=self._name,
max_page_size=self._max_page_size,
min_page_size=self._min_page_size,
pull_request_data=self._PULL_REQUEST,
next="",
)
while not_end:
result = self._run(query)['repository']['defaultBranchRef']['target']['history']
not_end = result['pageInfo']['hasNextPage']
query = _QUERY.format(owner=self._owner, name=self._name,
max_page_size=self._max_page_size,
min_page_size=self._min_page_size,
pull_request_data=self._PULL_REQUEST,
next='after: "{}"'.format(result["pageInfo"]["endCursor"]))
result = self._run(query)["repository"]["defaultBranchRef"]["target"][
"history"
]
not_end = result["pageInfo"]["hasNextPage"]
query = _QUERY.format(
owner=self._owner,
name=self._name,
max_page_size=self._max_page_size,
min_page_size=self._min_page_size,
pull_request_data=self._PULL_REQUEST,
next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
)
for commit in result['nodes']:
for commit in result["nodes"]:
# FIXME: maybe include `before_commit`?
if str(commit['oid']) == str(before_commit):
if str(commit["oid"]) == str(before_commit):
not_end = False
break
# TODO: fetch all pull-requests that were merged in a single commit.
assert commit['associatedPullRequests']['totalCount'] <= self._min_page_size
assert (
commit["associatedPullRequests"]["totalCount"]
<= self._min_page_size
)
for pull_request in commit['associatedPullRequests']['nodes']:
if(pull_request['baseRepository']['nameWithOwner'] == '{}/{}'.format(self._owner, self._name) and
pull_request['baseRefName'] == self.default_branch and
pull_request['mergeCommit']['oid'] == commit['oid']):
for pull_request in commit["associatedPullRequests"]["nodes"]:
if (
pull_request["baseRepository"]["nameWithOwner"]
== "{}/{}".format(self._owner, self._name)
and pull_request["baseRefName"] == self.default_branch
and pull_request["mergeCommit"]["oid"] == commit["oid"]
):
pull_requests.append(pull_request)
return pull_requests
def create_pull_request(self, source, target, title, description="", draft=False, can_modify=True):
_QUERY = '''
def create_pull_request(
self, source, target, title, description="", draft=False, can_modify=True
):
_QUERY = """
createPullRequest(input: {{
baseRefName: "{target}",
headRefName: "{source}",
@ -272,15 +310,22 @@ class Query:
{pull_request_data}
}}
}}
'''
"""
query = _QUERY.format(target=target, source=source, id=self._id, title=title, body=description,
draft="true" if draft else "false", modify="true" if can_modify else "false",
pull_request_data=self._PULL_REQUEST)
return self._run(query, is_mutation=True)['createPullRequest']['pullRequest']
query = _QUERY.format(
target=target,
source=source,
id=self._id,
title=title,
body=description,
draft="true" if draft else "false",
modify="true" if can_modify else "false",
pull_request_data=self._PULL_REQUEST,
)
return self._run(query, is_mutation=True)["createPullRequest"]["pullRequest"]
def merge_pull_request(self, id):
_QUERY = '''
_QUERY = """
mergePullRequest(input: {{
pullRequestId: "{id}"
}}) {{
@ -288,35 +333,35 @@ class Query:
{pull_request_data}
}}
}}
'''
"""
query = _QUERY.format(id=id, pull_request_data=self._PULL_REQUEST)
return self._run(query, is_mutation=True)['mergePullRequest']['pullRequest']
return self._run(query, is_mutation=True)["mergePullRequest"]["pullRequest"]
# FIXME: figure out how to add more assignees at once
def add_assignee(self, pr, assignee):
_QUERY = '''
_QUERY = """
addAssigneesToAssignable(input: {{
assignableId: "{id1}",
assigneeIds: "{id2}"
}}) {{
clientMutationId
}}
'''
"""
query = _QUERY.format(id1=pr['id'], id2=assignee['id'])
query = _QUERY.format(id1=pr["id"], id2=assignee["id"])
self._run(query, is_mutation=True)
def set_label(self, pull_request, label_name):
'''
"""
Set label by name to the pull request
Args:
pull_request: JSON object returned by `get_pull_requests()`
label_name (string): label name
'''
"""
_GET_LABEL = '''
_GET_LABEL = """
repository(owner: "{owner}" name: "{name}") {{
labels(first: {max_page_size} {next} query: "{label_name}") {{
pageInfo {{
@ -330,36 +375,44 @@ class Query:
}}
}}
}}
'''
"""
_SET_LABEL = '''
_SET_LABEL = """
addLabelsToLabelable(input: {{
labelableId: "{pr_id}",
labelIds: "{label_id}"
}}) {{
clientMutationId
}}
'''
"""
labels = []
not_end = True
query = _GET_LABEL.format(owner=self._owner, name=self._name, label_name=label_name,
max_page_size=self._max_page_size,
next='')
query = _GET_LABEL.format(
owner=self._owner,
name=self._name,
label_name=label_name,
max_page_size=self._max_page_size,
next="",
)
while not_end:
result = self._run(query)['repository']['labels']
not_end = result['pageInfo']['hasNextPage']
query = _GET_LABEL.format(owner=self._owner, name=self._name, label_name=label_name,
max_page_size=self._max_page_size,
next='after: "{}"'.format(result["pageInfo"]["endCursor"]))
result = self._run(query)["repository"]["labels"]
not_end = result["pageInfo"]["hasNextPage"]
query = _GET_LABEL.format(
owner=self._owner,
name=self._name,
label_name=label_name,
max_page_size=self._max_page_size,
next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
)
labels += [label for label in result['nodes']]
labels += [label for label in result["nodes"]]
if not labels:
return
query = _SET_LABEL.format(pr_id=pull_request['id'], label_id=labels[0]['id'])
query = _SET_LABEL.format(pr_id=pull_request["id"], label_id=labels[0]["id"])
self._run(query, is_mutation=True)
def _run(self, query, is_mutation=False):
@ -385,19 +438,21 @@ class Query:
status_forcelist=status_forcelist,
)
adapter = HTTPAdapter(max_retries=retry)
session.mount('http://', adapter)
session.mount('https://', adapter)
session.mount("http://", adapter)
session.mount("https://", adapter)
return session
headers = {'Authorization': 'bearer {}'.format(self._token)}
headers = {"Authorization": "bearer {}".format(self._token)}
if is_mutation:
query = '''
query = """
mutation {{
{query}
}}
'''.format(query=query)
""".format(
query=query
)
else:
query = '''
query = """
query {{
{query}
rateLimit {{
@ -405,23 +460,38 @@ class Query:
remaining
}}
}}
'''.format(query=query)
""".format(
query=query
)
while True:
request = requests_retry_session().post('https://api.github.com/graphql', json={'query': query}, headers=headers)
request = requests_retry_session().post(
"https://api.github.com/graphql", json={"query": query}, headers=headers
)
if request.status_code == 200:
result = request.json()
if 'errors' in result:
raise Exception('Errors occurred: {}\nOriginal query: {}'.format(result["errors"], query))
if "errors" in result:
raise Exception(
"Errors occurred: {}\nOriginal query: {}".format(
result["errors"], query
)
)
if not is_mutation:
import inspect
caller = inspect.getouterframes(inspect.currentframe(), 2)[1][3]
if caller not in list(self.api_costs.keys()):
self.api_costs[caller] = 0
self.api_costs[caller] += result['data']['rateLimit']['cost']
self.api_costs[caller] += result["data"]["rateLimit"]["cost"]
return result['data']
return result["data"]
else:
import json
raise Exception('Query failed with code {code}:\n{json}'.format(code=request.status_code, json=json.dumps(request.json(), indent=4)))
raise Exception(
"Query failed with code {code}:\n{json}".format(
code=request.status_code,
json=json.dumps(request.json(), indent=4),
)
)

View File

@ -14,7 +14,7 @@ CI_CONFIG = {
"package_type": "deb",
"bundled": "bundled",
"splitted": "unsplitted",
"alien_pkgs": True,
"additional_pkgs": True,
"tidy": "disable",
"with_coverage": False,
},
@ -45,7 +45,7 @@ CI_CONFIG = {
"package_type": "deb",
"bundled": "bundled",
"splitted": "unsplitted",
"alien_pkgs": True,
"additional_pkgs": True,
"tidy": "disable",
"with_coverage": False,
},
@ -349,6 +349,9 @@ CI_CONFIG = {
"Stateless tests flaky check (address, actions)": {
"required_build": "package_asan",
},
"Stateless tests bugfix validate check (address, actions)": {
"required_build": "package_asan",
},
"ClickHouse Keeper Jepsen (actions)": {
"required_build": "binary_release",
},

View File

@ -6,6 +6,7 @@ import json
import requests # type: ignore
from get_robot_token import get_parameter_from_ssm
class ClickHouseHelper:
def __init__(self, url=None, user=None, password=None):
self.url2 = None
@ -15,27 +16,35 @@ class ClickHouseHelper:
url = get_parameter_from_ssm("clickhouse-test-stat-url")
self.url2 = get_parameter_from_ssm("clickhouse-test-stat-url2")
self.auth2 = {
'X-ClickHouse-User': get_parameter_from_ssm("clickhouse-test-stat-login2"),
'X-ClickHouse-Key': ''
"X-ClickHouse-User": get_parameter_from_ssm(
"clickhouse-test-stat-login2"
),
"X-ClickHouse-Key": "",
}
self.url = url
self.auth = {
'X-ClickHouse-User': user if user is not None else get_parameter_from_ssm("clickhouse-test-stat-login"),
'X-ClickHouse-Key': password if password is not None else get_parameter_from_ssm("clickhouse-test-stat-password")
"X-ClickHouse-User": user
if user is not None
else get_parameter_from_ssm("clickhouse-test-stat-login"),
"X-ClickHouse-Key": password
if password is not None
else get_parameter_from_ssm("clickhouse-test-stat-password"),
}
@staticmethod
def _insert_json_str_info_impl(url, auth, db, table, json_str):
params = {
'database': db,
'query': 'INSERT INTO {table} FORMAT JSONEachRow'.format(table=table),
'date_time_input_format': 'best_effort',
'send_logs_level': 'warning',
"database": db,
"query": "INSERT INTO {table} FORMAT JSONEachRow".format(table=table),
"date_time_input_format": "best_effort",
"send_logs_level": "warning",
}
for i in range(5):
response = requests.post(url, params=params, data=json_str, headers=auth, verify=False)
response = requests.post(
url, params=params, data=json_str, headers=auth, verify=False
)
logging.info("Response content '%s'", response.content)
@ -43,16 +52,25 @@ class ClickHouseHelper:
break
error = (
"Cannot insert data into clickhouse at try " + str(i)
+ ": HTTP code " + str(response.status_code) + ": '"
+ str(response.text) + "'")
"Cannot insert data into clickhouse at try "
+ str(i)
+ ": HTTP code "
+ str(response.status_code)
+ ": '"
+ str(response.text)
+ "'"
)
if response.status_code >= 500:
# A retriable error
time.sleep(1)
continue
logging.info("Request headers '%s', body '%s'", response.request.headers, response.request.body)
logging.info(
"Request headers '%s', body '%s'",
response.request.headers,
response.request.body,
)
raise Exception(error)
else:
@ -72,18 +90,20 @@ class ClickHouseHelper:
for event in events:
jsons.append(json.dumps(event))
self._insert_json_str_info(db, table, ','.join(jsons))
self._insert_json_str_info(db, table, ",".join(jsons))
def _select_and_get_json_each_row(self, db, query):
params = {
'database': db,
'query': query,
'default_format': 'JSONEachRow',
"database": db,
"query": query,
"default_format": "JSONEachRow",
}
for i in range(5):
response = None
try:
response = requests.get(self.url, params=params, headers=self.auth, verify=False)
response = requests.get(
self.url, params=params, headers=self.auth, verify=False
)
response.raise_for_status()
return response.text
except Exception as ex:
@ -97,15 +117,21 @@ class ClickHouseHelper:
def select_json_each_row(self, db, query):
text = self._select_and_get_json_each_row(db, query)
result = []
for line in text.split('\n'):
for line in text.split("\n"):
if line:
result.append(json.loads(line))
return result
def prepare_tests_results_for_clickhouse(
pr_info, test_results,
check_status, check_duration, check_start_time,
report_url, check_name):
pr_info,
test_results,
check_status,
check_duration,
check_start_time,
report_url,
check_name,
):
pull_request_url = "https://github.com/ClickHouse/ClickHouse/commits/master"
base_ref = "master"
@ -147,13 +173,14 @@ def prepare_tests_results_for_clickhouse(
test_time = 0
if len(test_result) > 2 and test_result[2]:
test_time = test_result[2]
current_row['test_duration_ms'] = int(float(test_time) * 1000)
current_row['test_name'] = test_name
current_row['test_status'] = test_status
current_row["test_duration_ms"] = int(float(test_time) * 1000)
current_row["test_name"] = test_name
current_row["test_status"] = test_status
result.append(current_row)
return result
def mark_flaky_tests(clickhouse_helper, check_name, test_results):
try:
query = """
@ -164,14 +191,16 @@ def mark_flaky_tests(clickhouse_helper, check_name, test_results):
AND check_name = '{check_name}'
AND (test_status = 'FAIL' OR test_status = 'FLAKY')
AND pull_request_number = 0
""".format(check_name=check_name)
""".format(
check_name=check_name
)
tests_data = clickhouse_helper.select_json_each_row('gh-data', query)
master_failed_tests = {row['test_name'] for row in tests_data}
logging.info("Found flaky tests: %s", ', '.join(master_failed_tests))
tests_data = clickhouse_helper.select_json_each_row("gh-data", query)
master_failed_tests = {row["test_name"] for row in tests_data}
logging.info("Found flaky tests: %s", ", ".join(master_failed_tests))
for test_result in test_results:
if test_result[1] == 'FAIL' and test_result[0] in master_failed_tests:
test_result[1] = 'FLAKY'
if test_result[1] == "FAIL" and test_result[0] in master_failed_tests:
test_result[1] = "FLAKY"
except Exception as ex:
logging.info("Exception happened during flaky tests fetch %s", ex)

View File

@ -18,13 +18,16 @@ from tee_popen import TeePopen
NAME = "Woboq Build (actions)"
def get_run_command(repo_path, output_path, image):
cmd = "docker run " + \
f"--volume={repo_path}:/repo_folder " \
f"--volume={output_path}:/test_output " \
f"-e 'DATA=https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data' {image}"
cmd = (
"docker run " + f"--volume={repo_path}:/repo_folder "
f"--volume={output_path}:/test_output "
f"-e 'DATA=https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data' {image}"
)
return cmd
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
@ -37,8 +40,8 @@ if __name__ == "__main__":
if not os.path.exists(temp_path):
os.makedirs(temp_path)
docker_image = get_image_with_version(IMAGES_PATH, 'clickhouse/codebrowser')
s3_helper = S3Helper('https://s3.amazonaws.com')
docker_image = get_image_with_version(IMAGES_PATH, "clickhouse/codebrowser")
s3_helper = S3Helper("https://s3.amazonaws.com")
result_path = os.path.join(temp_path, "result_path")
if not os.path.exists(result_path):
@ -62,14 +65,20 @@ if __name__ == "__main__":
report_path = os.path.join(result_path, "html_report")
logging.info("Report path %s", report_path)
s3_path_prefix = "codebrowser"
html_urls = s3_helper.fast_parallel_upload_dir(report_path, s3_path_prefix, 'clickhouse-test-reports')
html_urls = s3_helper.fast_parallel_upload_dir(
report_path, s3_path_prefix, "clickhouse-test-reports"
)
index_html = '<a href="https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/index.html">HTML report</a>'
test_results = [(index_html, "Look at the report")]
report_url = upload_results(s3_helper, 0, os.getenv("GITHUB_SHA"), test_results, [], NAME)
report_url = upload_results(
s3_helper, 0, os.getenv("GITHUB_SHA"), test_results, [], NAME
)
print(f"::notice ::Report url: {report_url}")
post_commit_status(gh, os.getenv("GITHUB_SHA"), NAME, "Report built", "success", report_url)
post_commit_status(
gh, os.getenv("GITHUB_SHA"), NAME, "Report built", "success", report_url
)

View File

@ -1,15 +1,23 @@
#!/usr/bin/env python3
import time
import os
import csv
from env_helper import GITHUB_REPOSITORY
from ci_config import CI_CONFIG
RETRY = 5
def override_status(status, check_name):
if CI_CONFIG["tests_config"][check_name].get("force_tests", False):
def override_status(status, check_name, invert=False):
if CI_CONFIG["tests_config"].get(check_name, {}).get("force_tests", False):
return "success"
if invert:
if status == "success":
return "error"
return "success"
return status
@ -43,3 +51,11 @@ def post_commit_status(gh, sha, check_name, description, state, report_url):
if i == RETRY - 1:
raise ex
time.sleep(i)
def post_commit_status_to_file(file_path, description, state, report_url):
if os.path.exists(file_path):
raise Exception(f'File "{file_path}" already exists!')
with open(file_path, "w", encoding="utf-8") as f:
out = csv.writer(f, delimiter="\t")
out.writerow([state, report_url, description])

View File

@ -16,34 +16,40 @@ from build_download_helper import download_builds_filter
from upload_result_helper import upload_results
from docker_pull_helper import get_images_with_versions
from commit_status_helper import post_commit_status
from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse
from clickhouse_helper import (
ClickHouseHelper,
mark_flaky_tests,
prepare_tests_results_for_clickhouse,
)
from stopwatch import Stopwatch
from rerun_helper import RerunHelper
IMAGE_UBUNTU = "clickhouse/test-old-ubuntu"
IMAGE_CENTOS = "clickhouse/test-old-centos"
MAX_GLIBC_VERSION = '2.4'
MAX_GLIBC_VERSION = "2.4"
DOWNLOAD_RETRIES_COUNT = 5
CHECK_NAME = "Compatibility check (actions)"
def process_os_check(log_path):
name = os.path.basename(log_path)
with open(log_path, 'r') as log:
line = log.read().split('\n')[0].strip()
if line != 'OK':
with open(log_path, "r") as log:
line = log.read().split("\n")[0].strip()
if line != "OK":
return (name, "FAIL")
else:
return (name, "OK")
def process_glibc_check(log_path):
bad_lines = []
with open(log_path, 'r') as log:
with open(log_path, "r") as log:
for line in log:
if line.strip():
columns = line.strip().split(' ')
columns = line.strip().split(" ")
symbol_with_glibc = columns[-2] # sysconf@GLIBC_2.2.5
_, version = symbol_with_glibc.split('@GLIBC_')
if version == 'PRIVATE':
_, version = symbol_with_glibc.split("@GLIBC_")
if version == "PRIVATE":
bad_lines.append((symbol_with_glibc, "FAIL"))
elif StrictVersion(version) > MAX_GLIBC_VERSION:
bad_lines.append((symbol_with_glibc, "FAIL"))
@ -51,6 +57,7 @@ def process_glibc_check(log_path):
bad_lines.append(("glibc check", "OK"))
return bad_lines
def process_result(result_folder, server_log_folder):
summary = process_glibc_check(os.path.join(result_folder, "glibc.log"))
@ -86,16 +93,18 @@ def process_result(result_folder, server_log_folder):
return status, description, summary, result_logs
def get_run_commands(build_path, result_folder, server_log_folder, image_centos, image_ubuntu):
def get_run_commands(
build_path, result_folder, server_log_folder, image_centos, image_ubuntu
):
return [
f"readelf -s {build_path}/usr/bin/clickhouse | grep '@GLIBC_' > {result_folder}/glibc.log",
f"readelf -s {build_path}/usr/bin/clickhouse-odbc-bridge | grep '@GLIBC_' >> {result_folder}/glibc.log",
f"docker run --network=host --volume={build_path}/usr/bin/clickhouse:/clickhouse " \
f"--volume={build_path}/etc/clickhouse-server:/config " \
f"--volume={server_log_folder}:/var/log/clickhouse-server {image_ubuntu} > {result_folder}/ubuntu:12.04",
f"docker run --network=host --volume={build_path}/usr/bin/clickhouse:/clickhouse " \
f"--volume={build_path}/etc/clickhouse-server:/config " \
f"--volume={server_log_folder}:/var/log/clickhouse-server {image_centos} > {result_folder}/centos:5",
f"docker run --network=host --volume={build_path}/usr/bin/clickhouse:/clickhouse "
f"--volume={build_path}/etc/clickhouse-server:/config "
f"--volume={server_log_folder}:/var/log/clickhouse-server {image_ubuntu} > {result_folder}/ubuntu:12.04",
f"docker run --network=host --volume={build_path}/usr/bin/clickhouse:/clickhouse "
f"--volume={build_path}/etc/clickhouse-server:/config "
f"--volume={server_log_folder}:/var/log/clickhouse-server {image_centos} > {result_folder}/centos:5",
]
@ -124,14 +133,18 @@ if __name__ == "__main__":
os.makedirs(packages_path)
def url_filter(url):
return url.endswith('.deb') and ('clickhouse-common-static_' in url or 'clickhouse-server_' in url)
return url.endswith(".deb") and (
"clickhouse-common-static_" in url or "clickhouse-server_" in url
)
download_builds_filter(CHECK_NAME, reports_path, packages_path, url_filter)
for f in os.listdir(packages_path):
if '.deb' in f:
if ".deb" in f:
full_path = os.path.join(packages_path, f)
subprocess.check_call(f"dpkg -x {full_path} {packages_path} && rm {full_path}", shell=True)
subprocess.check_call(
f"dpkg -x {full_path} {packages_path} && rm {full_path}", shell=True
)
server_log_path = os.path.join(temp_path, "server_log")
if not os.path.exists(server_log_path):
@ -141,7 +154,9 @@ if __name__ == "__main__":
if not os.path.exists(result_path):
os.makedirs(result_path)
run_commands = get_run_commands(packages_path, result_path, server_log_path, docker_images[0], docker_images[1])
run_commands = get_run_commands(
packages_path, result_path, server_log_path, docker_images[0], docker_images[1]
)
state = "success"
for run_command in run_commands:
@ -154,15 +169,32 @@ if __name__ == "__main__":
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
s3_helper = S3Helper('https://s3.amazonaws.com')
state, description, test_results, additional_logs = process_result(result_path, server_log_path)
s3_helper = S3Helper("https://s3.amazonaws.com")
state, description, test_results, additional_logs = process_result(
result_path, server_log_path
)
ch_helper = ClickHouseHelper()
mark_flaky_tests(ch_helper, CHECK_NAME, test_results)
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_logs, CHECK_NAME)
report_url = upload_results(
s3_helper,
pr_info.number,
pr_info.sha,
test_results,
additional_logs,
CHECK_NAME,
)
print(f"::notice ::Report url: {report_url}")
post_commit_status(gh, pr_info.sha, CHECK_NAME, description, state, report_url)
prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, CHECK_NAME)
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,
test_results,
state,
stopwatch.duration_seconds,
stopwatch.start_time_str,
report_url,
CHECK_NAME,
)
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)

View File

@ -3,20 +3,21 @@ import subprocess
import logging
import os
def compress_file_fast(path, archive_path):
if os.path.exists('/usr/bin/pigz'):
if os.path.exists("/usr/bin/pigz"):
subprocess.check_call("pigz < {} > {}".format(path, archive_path), shell=True)
else:
subprocess.check_call("gzip < {} > {}".format(path, archive_path), shell=True)
def compress_fast(path, archive_path, exclude=None):
pigz_part = ''
if os.path.exists('/usr/bin/pigz'):
pigz_part = ""
if os.path.exists("/usr/bin/pigz"):
logging.info("pigz found, will compress and decompress faster")
pigz_part = "--use-compress-program='pigz'"
else:
pigz_part = '-z'
pigz_part = "-z"
logging.info("no pigz, compressing with default tar")
if exclude is None:
@ -31,21 +32,36 @@ def compress_fast(path, archive_path, exclude=None):
path = os.path.dirname(path)
else:
path += "/.."
cmd = "tar {} {} -cf {} -C {} {}".format(pigz_part, exclude_part, archive_path, path, fname)
cmd = "tar {} {} -cf {} -C {} {}".format(
pigz_part, exclude_part, archive_path, path, fname
)
logging.debug("compress_fast cmd: %s", cmd)
subprocess.check_call(cmd, shell=True)
def decompress_fast(archive_path, result_path=None):
pigz_part = ''
if os.path.exists('/usr/bin/pigz'):
logging.info("pigz found, will compress and decompress faster ('%s' -> '%s')", archive_path, result_path)
pigz_part = ""
if os.path.exists("/usr/bin/pigz"):
logging.info(
"pigz found, will compress and decompress faster ('%s' -> '%s')",
archive_path,
result_path,
)
pigz_part = "--use-compress-program='pigz'"
else:
pigz_part = '-z'
logging.info("no pigz, decompressing with default tar ('%s' -> '%s')", archive_path, result_path)
pigz_part = "-z"
logging.info(
"no pigz, decompressing with default tar ('%s' -> '%s')",
archive_path,
result_path,
)
if result_path is None:
subprocess.check_call("tar {} -xf {}".format(pigz_part, archive_path), shell=True)
subprocess.check_call(
"tar {} -xf {}".format(pigz_part, archive_path), shell=True
)
else:
subprocess.check_call("tar {} -xf {} -C {}".format(pigz_part, archive_path, result_path), shell=True)
subprocess.check_call(
"tar {} -xf {} -C {}".format(pigz_part, archive_path, result_path),
shell=True,
)

View File

@ -6,23 +6,29 @@ import time
import subprocess
import logging
from typing import Optional
class DockerImage:
def __init__(self, name, version=None):
def __init__(self, name, version: Optional[str] = None):
self.name = name
if version is None:
self.version = 'latest'
self.version = "latest"
else:
self.version = version
def __str__(self):
return f"{self.name}:{self.version}"
def get_images_with_versions(reports_path, required_image, pull=True):
def get_images_with_versions(
reports_path, required_image, pull=True, version: Optional[str] = None
):
images_path = None
for root, _, files in os.walk(reports_path):
for f in files:
if f == 'changed_images.json':
images_path = os.path.join(root, 'changed_images.json')
if f == "changed_images.json":
images_path = os.path.join(root, "changed_images.json")
break
if not images_path:
@ -32,7 +38,7 @@ def get_images_with_versions(reports_path, required_image, pull=True):
if images_path is not None and os.path.exists(images_path):
logging.info("Images file exists")
with open(images_path, 'r', encoding='utf-8') as images_fd:
with open(images_path, "r", encoding="utf-8") as images_fd:
images = json.load(images_fd)
logging.info("Got images %s", images)
else:
@ -40,7 +46,7 @@ def get_images_with_versions(reports_path, required_image, pull=True):
docker_images = []
for image_name in required_image:
docker_image = DockerImage(image_name)
docker_image = DockerImage(image_name, version)
if image_name in images:
docker_image.version = images[image_name]
docker_images.append(docker_image)
@ -50,15 +56,22 @@ def get_images_with_versions(reports_path, required_image, pull=True):
for i in range(10):
try:
logging.info("Pulling image %s", docker_image)
latest_error = subprocess.check_output(f"docker pull {docker_image}", stderr=subprocess.STDOUT, shell=True)
latest_error = subprocess.check_output(
f"docker pull {docker_image}",
stderr=subprocess.STDOUT,
shell=True,
)
break
except Exception as ex:
time.sleep(i * 3)
logging.info("Got execption pulling docker %s", ex)
else:
raise Exception(f"Cannot pull dockerhub for image docker pull {docker_image} because of {latest_error}")
raise Exception(
f"Cannot pull dockerhub for image docker pull {docker_image} because of {latest_error}"
)
return docker_images
def get_image_with_version(reports_path, image, pull=True):
return get_images_with_versions(reports_path, [image], pull)[0]
def get_image_with_version(reports_path, image, pull=True, version=None):
return get_images_with_versions(reports_path, [image], pull, version=version)[0]

View File

@ -40,7 +40,9 @@ if __name__ == "__main__":
if not pr_info.has_changes_in_documentation():
logging.info("No changes in documentation")
commit = get_commit(gh, pr_info.sha)
commit.create_status(context=NAME, description="No changes in docs", state="success")
commit.create_status(
context=NAME, description="No changes in docs", state="success"
)
sys.exit(0)
logging.info("Has changes in docs")
@ -48,15 +50,15 @@ if __name__ == "__main__":
if not os.path.exists(temp_path):
os.makedirs(temp_path)
docker_image = get_image_with_version(temp_path, 'clickhouse/docs-check')
docker_image = get_image_with_version(temp_path, "clickhouse/docs-check")
test_output = os.path.join(temp_path, 'docs_check_log')
test_output = os.path.join(temp_path, "docs_check_log")
if not os.path.exists(test_output):
os.makedirs(test_output)
cmd = f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/repo_path --volume={test_output}:/output_path {docker_image}"
run_log_path = os.path.join(test_output, 'runlog.log')
run_log_path = os.path.join(test_output, "runlog.log")
logging.info("Running command: '%s'", cmd)
with TeePopen(cmd, run_log_path) as process:
@ -82,10 +84,10 @@ if __name__ == "__main__":
for f in files:
path = os.path.join(test_output, f)
additional_files.append(path)
with open(path, 'r', encoding='utf-8') as check_file:
with open(path, "r", encoding="utf-8") as check_file:
for line in check_file:
if "ERROR" in line:
lines.append((line.split(':')[-1], "FAIL"))
lines.append((line.split(":")[-1], "FAIL"))
if lines:
status = "failure"
description = "Found errors in docs"
@ -94,12 +96,22 @@ if __name__ == "__main__":
else:
lines.append(("Non zero exit code", "FAIL"))
s3_helper = S3Helper('https://s3.amazonaws.com')
s3_helper = S3Helper("https://s3.amazonaws.com")
ch_helper = ClickHouseHelper()
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, lines, additional_files, NAME)
report_url = upload_results(
s3_helper, pr_info.number, pr_info.sha, lines, additional_files, NAME
)
print("::notice ::Report url: {report_url}")
post_commit_status(gh, pr_info.sha, NAME, description, status, report_url)
prepared_events = prepare_tests_results_for_clickhouse(pr_info, lines, status, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, NAME)
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,
lines,
status,
stopwatch.duration_seconds,
stopwatch.start_time_str,
report_url,
NAME,
)
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)

View File

@ -34,19 +34,23 @@ if __name__ == "__main__":
if not os.path.exists(temp_path):
os.makedirs(temp_path)
docker_image = get_image_with_version(temp_path, 'clickhouse/docs-release')
docker_image = get_image_with_version(temp_path, "clickhouse/docs-release")
test_output = os.path.join(temp_path, 'docs_release_log')
test_output = os.path.join(temp_path, "docs_release_log")
if not os.path.exists(test_output):
os.makedirs(test_output)
token = CLOUDFLARE_TOKEN
cmd = "docker run --cap-add=SYS_PTRACE --volume=$SSH_AUTH_SOCK:/ssh-agent -e SSH_AUTH_SOCK=/ssh-agent " \
f"-e CLOUDFLARE_TOKEN={token} --volume={repo_path}:/repo_path --volume={test_output}:/output_path {docker_image}"
cmd = (
"docker run --cap-add=SYS_PTRACE --volume=$SSH_AUTH_SOCK:/ssh-agent -e SSH_AUTH_SOCK=/ssh-agent "
f"-e CLOUDFLARE_TOKEN={token} --volume={repo_path}:/repo_path --volume={test_output}:/output_path {docker_image}"
)
run_log_path = os.path.join(test_output, 'runlog.log')
run_log_path = os.path.join(test_output, "runlog.log")
with open(run_log_path, 'w', encoding='utf-8') as log, SSHKey("ROBOT_CLICKHOUSE_SSH_KEY"):
with open(run_log_path, "w", encoding="utf-8") as log, SSHKey(
"ROBOT_CLICKHOUSE_SSH_KEY"
):
with subprocess.Popen(cmd, shell=True, stderr=log, stdout=log) as process:
retcode = process.wait()
if retcode == 0:
@ -70,10 +74,10 @@ if __name__ == "__main__":
for f in files:
path = os.path.join(test_output, f)
additional_files.append(path)
with open(path, 'r', encoding='utf-8') as check_file:
with open(path, "r", encoding="utf-8") as check_file:
for line in check_file:
if "ERROR" in line:
lines.append((line.split(':')[-1], "FAIL"))
lines.append((line.split(":")[-1], "FAIL"))
if lines:
status = "failure"
description = "Found errors in docs"
@ -82,9 +86,13 @@ if __name__ == "__main__":
else:
lines.append(("Non zero exit code", "FAIL"))
s3_helper = S3Helper('https://s3.amazonaws.com')
s3_helper = S3Helper("https://s3.amazonaws.com")
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, lines, additional_files, NAME)
report_url = upload_results(
s3_helper, pr_info.number, pr_info.sha, lines, additional_files, NAME
)
print("::notice ::Report url: {report_url}")
commit = get_commit(gh, pr_info.sha)
commit.create_status(context=NAME, description=description, state=status, target_url=report_url)
commit.create_status(
context=NAME, description=description, state=status, target_url=report_url
)

View File

@ -0,0 +1,165 @@
#!/usr/bin/env python3
###########################################################################
# #
# TODO (@vdimir, @Avogar) #
# Merge with one from https://github.com/ClickHouse/ClickHouse/pull/27928 #
# #
###########################################################################
import re
import os
import logging
import requests
CLICKHOUSE_TAGS_URL = "https://api.github.com/repos/ClickHouse/ClickHouse/tags"
CLICKHOUSE_COMMON_STATIC_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-common-static_{version}_amd64.deb"
CLICKHOUSE_COMMON_STATIC_DBG_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-common-static-dbg_{version}_amd64.deb"
CLICKHOUSE_SERVER_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-server_{version}_all.deb"
CLICKHOUSE_CLIENT_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-client_{version}_all.deb"
CLICKHOUSE_COMMON_STATIC_PACKET_NAME = "clickhouse-common-static_{version}_amd64.deb"
CLICKHOUSE_COMMON_STATIC_DBG_PACKET_NAME = (
"clickhouse-common-static-dbg_{version}_amd64.deb"
)
CLICKHOUSE_SERVER_PACKET_NAME = "clickhouse-server_{version}_all.deb"
CLICKHOUSE_CLIENT_PACKET_NAME = "clickhouse-client_{version}_all.deb"
PACKETS_DIR = "previous_release_package_folder/"
VERSION_PATTERN = r"((?:\d+\.)?(?:\d+\.)?(?:\d+\.)?\d+-[a-zA-Z]*)"
class Version:
def __init__(self, version):
self.version = version
def __lt__(self, other):
return list(map(int, self.version.split("."))) < list(
map(int, other.version.split("."))
)
def __str__(self):
return self.version
class ReleaseInfo:
def __init__(self, version, release_type):
self.version = version
self.type = release_type
def __repr__(self):
return f"ReleaseInfo: {self.version}-{self.type}"
def find_previous_release(server_version, releases):
releases.sort(key=lambda x: x.version, reverse=True)
if server_version is None:
return True, releases[0]
for release in releases:
if release.version < server_version:
return True, release
return False, None
def get_previous_release(server_version=None):
page = 1
found = False
while not found:
response = requests.get(CLICKHOUSE_TAGS_URL, {"page": page, "per_page": 100})
if not response.ok:
raise Exception(
"Cannot load the list of tags from github: " + response.reason
)
releases_str = set(re.findall(VERSION_PATTERN, response.text))
if len(releases_str) == 0:
raise Exception(
"Cannot find previous release for "
+ str(server_version)
+ " server version"
)
releases = list(
map(
lambda x: ReleaseInfo(Version(x.split("-")[0]), x.split("-")[1]),
releases_str,
)
)
found, previous_release = find_previous_release(server_version, releases)
page += 1
return previous_release
def download_packet(url, out_path):
"""
TODO: use dowload_build_with_progress from build_download_helper.py
"""
response = requests.get(url)
logging.info("Downloading %s", url)
if response.ok:
open(out_path, "wb").write(response.content)
def download_packets(release, dest_path=PACKETS_DIR):
if not os.path.exists(dest_path):
os.makedirs(dest_path)
logging.info("Will download %s", release)
download_packet(
CLICKHOUSE_COMMON_STATIC_DOWNLOAD_URL.format(
version=release.version, type=release.type
),
out_path=os.path.join(
dest_path,
CLICKHOUSE_COMMON_STATIC_PACKET_NAME.format(version=release.version),
),
)
download_packet(
CLICKHOUSE_COMMON_STATIC_DBG_DOWNLOAD_URL.format(
version=release.version, type=release.type
),
out_path=os.path.join(
dest_path,
CLICKHOUSE_COMMON_STATIC_DBG_PACKET_NAME.format(version=release.version),
),
)
download_packet(
CLICKHOUSE_SERVER_DOWNLOAD_URL.format(
version=release.version, type=release.type
),
out_path=os.path.join(
dest_path, CLICKHOUSE_SERVER_PACKET_NAME.format(version=release.version)
),
)
download_packet(
CLICKHOUSE_CLIENT_DOWNLOAD_URL.format(
version=release.version, type=release.type
),
out_path=os.path.join(
dest_path, CLICKHOUSE_CLIENT_PACKET_NAME.format(version=release.version)
),
)
def download_previous_release(dest_path):
current_release = get_previous_release(None)
download_packets(current_release, dest_path=dest_path)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
server_version = Version(input())
previous_release = get_previous_release(server_version)
download_packets(previous_release)

View File

@ -7,7 +7,7 @@ from pr_info import PRInfo
from get_robot_token import get_best_robot_token
from commit_status_helper import get_commit
NAME = 'Run Check (actions)'
NAME = "Run Check (actions)"
def filter_statuses(statuses):
@ -36,4 +36,9 @@ if __name__ == "__main__":
url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}"
statuses = filter_statuses(list(commit.get_statuses()))
if NAME in statuses and statuses[NAME].state == "pending":
commit.create_status(context=NAME, description="All checks finished", state="success", target_url=url)
commit.create_status(
context=NAME,
description="All checks finished",
state="success",
target_url=url,
)

View File

@ -1,9 +1,10 @@
#!/usr/bin/env python3
import argparse
import csv
import logging
import subprocess
import os
import subprocess
import sys
from github import Github
@ -13,25 +14,38 @@ from s3_helper import S3Helper
from get_robot_token import get_best_robot_token
from pr_info import PRInfo
from build_download_helper import download_all_deb_packages
from download_previous_release import download_previous_release
from upload_result_helper import upload_results
from docker_pull_helper import get_image_with_version
from commit_status_helper import post_commit_status, get_commit, override_status
from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse
from commit_status_helper import (
post_commit_status,
get_commit,
override_status,
post_commit_status_to_file,
)
from clickhouse_helper import (
ClickHouseHelper,
mark_flaky_tests,
prepare_tests_results_for_clickhouse,
)
from stopwatch import Stopwatch
from rerun_helper import RerunHelper
from tee_popen import TeePopen
NO_CHANGES_MSG = "Nothing to run"
def get_additional_envs(check_name, run_by_hash_num, run_by_hash_total):
result = []
if 'DatabaseReplicated' in check_name:
if "DatabaseReplicated" in check_name:
result.append("USE_DATABASE_REPLICATED=1")
if 'DatabaseOrdinary' in check_name:
if "DatabaseOrdinary" in check_name:
result.append("USE_DATABASE_ORDINARY=1")
if 'wide parts enabled' in check_name:
if "wide parts enabled" in check_name:
result.append("USE_POLYMORPHIC_PARTS=1")
#temporary
if 's3 storage' in check_name:
# temporary
if "s3 storage" in check_name:
result.append("USE_S3_STORAGE_FOR_MERGE_TREE=1")
if run_by_hash_total != 0:
@ -40,36 +54,55 @@ def get_additional_envs(check_name, run_by_hash_num, run_by_hash_total):
return result
def get_image_name(check_name):
if 'stateless' in check_name.lower():
return 'clickhouse/stateless-test'
if 'stateful' in check_name.lower():
return 'clickhouse/stateful-test'
if "stateless" in check_name.lower():
return "clickhouse/stateless-test"
if "stateful" in check_name.lower():
return "clickhouse/stateful-test"
else:
raise Exception(f"Cannot deduce image name based on check name {check_name}")
def get_run_command(builds_path, repo_tests_path, result_path, server_log_path, kill_timeout, additional_envs, image, flaky_check, tests_to_run):
additional_options = ['--hung-check']
additional_options.append('--print-time')
def get_run_command(
builds_path,
repo_tests_path,
result_path,
server_log_path,
kill_timeout,
additional_envs,
image,
flaky_check,
tests_to_run,
):
additional_options = ["--hung-check"]
additional_options.append("--print-time")
if tests_to_run:
additional_options += tests_to_run
additional_options_str = '-e ADDITIONAL_OPTIONS="' + ' '.join(additional_options) + '"'
additional_options_str = (
'-e ADDITIONAL_OPTIONS="' + " ".join(additional_options) + '"'
)
envs = [f'-e MAX_RUN_TIME={int(0.9 * kill_timeout)}', '-e S3_URL="https://clickhouse-datasets.s3.amazonaws.com"']
envs = [
f"-e MAX_RUN_TIME={int(0.9 * kill_timeout)}",
'-e S3_URL="https://clickhouse-datasets.s3.amazonaws.com"',
]
if flaky_check:
envs += ['-e NUM_TRIES=100', '-e MAX_RUN_TIME=1800']
envs += ["-e NUM_TRIES=100", "-e MAX_RUN_TIME=1800"]
envs += [f'-e {e}' for e in additional_envs]
envs += [f"-e {e}" for e in additional_envs]
env_str = ' '.join(envs)
env_str = " ".join(envs)
return f"docker run --volume={builds_path}:/package_folder " \
f"--volume={repo_tests_path}:/usr/share/clickhouse-test " \
f"--volume={result_path}:/test_output --volume={server_log_path}:/var/log/clickhouse-server " \
return (
f"docker run --volume={builds_path}:/package_folder "
f"--volume={repo_tests_path}:/usr/share/clickhouse-test "
f"--volume={result_path}:/test_output --volume={server_log_path}:/var/log/clickhouse-server "
f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image}"
)
def get_tests_to_run(pr_info):
@ -79,32 +112,43 @@ def get_tests_to_run(pr_info):
return []
for fpath in pr_info.changed_files:
if 'tests/queries/0_stateless/0' in fpath:
logging.info('File %s changed and seems like stateless test', fpath)
fname = fpath.split('/')[3]
if "tests/queries/0_stateless/0" in fpath:
logging.info("File %s changed and seems like stateless test", fpath)
fname = fpath.split("/")[3]
fname_without_ext = os.path.splitext(fname)[0]
result.add(fname_without_ext + '.')
result.add(fname_without_ext + ".")
return list(result)
def process_results(result_folder, server_log_path):
test_results = []
additional_files = []
# Just upload all files from result_folder.
# If task provides processed results, then it's responsible for content of result_folder.
if os.path.exists(result_folder):
test_files = [f for f in os.listdir(result_folder) if os.path.isfile(os.path.join(result_folder, f))]
test_files = [
f
for f in os.listdir(result_folder)
if os.path.isfile(os.path.join(result_folder, f))
]
additional_files = [os.path.join(result_folder, f) for f in test_files]
if os.path.exists(server_log_path):
server_log_files = [f for f in os.listdir(server_log_path) if os.path.isfile(os.path.join(server_log_path, f))]
additional_files = additional_files + [os.path.join(server_log_path, f) for f in server_log_files]
server_log_files = [
f
for f in os.listdir(server_log_path)
if os.path.isfile(os.path.join(server_log_path, f))
]
additional_files = additional_files + [
os.path.join(server_log_path, f) for f in server_log_files
]
status = []
status_path = os.path.join(result_folder, "check_status.tsv")
if os.path.exists(status_path):
logging.info("Found test_results.tsv")
with open(status_path, 'r', encoding='utf-8') as status_file:
status = list(csv.reader(status_file, delimiter='\t'))
with open(status_path, "r", encoding="utf-8") as status_file:
status = list(csv.reader(status_file, delimiter="\t"))
if len(status) != 1 or len(status[0]) != 2:
logging.info("Files in result folder %s", os.listdir(result_folder))
@ -119,14 +163,32 @@ def process_results(result_folder, server_log_path):
logging.info("Files in result folder %s", os.listdir(result_folder))
return "error", "Not found test_results.tsv", test_results, additional_files
with open(results_path, 'r', encoding='utf-8') as results_file:
test_results = list(csv.reader(results_file, delimiter='\t'))
with open(results_path, "r", encoding="utf-8") as results_file:
test_results = list(csv.reader(results_file, delimiter="\t"))
if len(test_results) == 0:
return "error", "Empty test_results.tsv", test_results, additional_files
return state, description, test_results, additional_files
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("check_name")
parser.add_argument("kill_timeout", type=int)
parser.add_argument(
"--validate-bugfix",
action="store_true",
help="Check that added tests failed on latest stable",
)
parser.add_argument(
"--post-commit-status",
default="commit_status",
choices=["commit_status", "file"],
help="Where to public post commit status",
)
return parser.parse_args()
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
@ -136,18 +198,38 @@ if __name__ == "__main__":
repo_path = REPO_COPY
reports_path = REPORTS_PATH
check_name = sys.argv[1]
kill_timeout = int(sys.argv[2])
args = parse_args()
check_name = args.check_name
kill_timeout = args.kill_timeout
validate_bugix_check = args.validate_bugfix
flaky_check = 'flaky' in check_name.lower()
flaky_check = "flaky" in check_name.lower()
run_changed_tests = flaky_check or validate_bugix_check
gh = Github(get_best_robot_token())
pr_info = PRInfo(need_changed_files=flaky_check)
pr_info = PRInfo(need_changed_files=run_changed_tests)
if 'RUN_BY_HASH_NUM' in os.environ:
run_by_hash_num = int(os.getenv('RUN_BY_HASH_NUM'))
run_by_hash_total = int(os.getenv('RUN_BY_HASH_TOTAL'))
check_name_with_group = check_name + f' [{run_by_hash_num + 1}/{run_by_hash_total}]'
if not os.path.exists(temp_path):
os.makedirs(temp_path)
if validate_bugix_check and "pr-bugfix" not in pr_info.labels:
if args.post_commit_status == "file":
post_commit_status_to_file(
os.path.join(temp_path, "post_commit_status.tsv"),
"Skipped (no pr-bugfix)",
"success",
"null",
)
logging.info("Skipping '%s' (no pr-bugfix)", check_name)
sys.exit(0)
if "RUN_BY_HASH_NUM" in os.environ:
run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM"))
run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL"))
check_name_with_group = (
check_name + f" [{run_by_hash_num + 1}/{run_by_hash_total}]"
)
else:
run_by_hash_num = 0
run_by_hash_total = 0
@ -158,15 +240,23 @@ if __name__ == "__main__":
logging.info("Check is already finished according to github status, exiting")
sys.exit(0)
if not os.path.exists(temp_path):
os.makedirs(temp_path)
tests_to_run = []
if flaky_check:
if run_changed_tests:
tests_to_run = get_tests_to_run(pr_info)
if not tests_to_run:
commit = get_commit(gh, pr_info.sha)
commit.create_status(context=check_name_with_group, description='Not found changed stateless tests', state='success')
state = override_status("success", check_name, validate_bugix_check)
if args.post_commit_status == "commit_status":
commit.create_status(
context=check_name_with_group,
description=NO_CHANGES_MSG,
state=state,
)
elif args.post_commit_status == "file":
fpath = os.path.join(temp_path, "post_commit_status.tsv")
post_commit_status_to_file(
fpath, description=NO_CHANGES_MSG, state=state, report_url="null"
)
sys.exit(0)
image_name = get_image_name(check_name)
@ -178,7 +268,10 @@ if __name__ == "__main__":
if not os.path.exists(packages_path):
os.makedirs(packages_path)
download_all_deb_packages(check_name, reports_path, packages_path)
if validate_bugix_check:
download_previous_release(packages_path)
else:
download_all_deb_packages(check_name, reports_path, packages_path)
server_log_path = os.path.join(temp_path, "server_log")
if not os.path.exists(server_log_path):
@ -190,8 +283,23 @@ if __name__ == "__main__":
run_log_path = os.path.join(result_path, "runlog.log")
additional_envs = get_additional_envs(check_name, run_by_hash_num, run_by_hash_total)
run_command = get_run_command(packages_path, repo_tests_path, result_path, server_log_path, kill_timeout, additional_envs, docker_image, flaky_check, tests_to_run)
additional_envs = get_additional_envs(
check_name, run_by_hash_num, run_by_hash_total
)
if validate_bugix_check:
additional_envs.append("GLOBAL_TAGS=no-random-settings")
run_command = get_run_command(
packages_path,
repo_tests_path,
result_path,
server_log_path,
kill_timeout,
additional_envs,
docker_image,
flaky_check,
tests_to_run,
)
logging.info("Going to run func tests: %s", run_command)
with TeePopen(run_command, run_log_path) as process:
@ -203,24 +311,55 @@ if __name__ == "__main__":
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
s3_helper = S3Helper('https://s3.amazonaws.com')
s3_helper = S3Helper("https://s3.amazonaws.com")
state, description, test_results, additional_logs = process_results(result_path, server_log_path)
state = override_status(state, check_name)
state, description, test_results, additional_logs = process_results(
result_path, server_log_path
)
state = override_status(state, check_name, validate_bugix_check)
ch_helper = ClickHouseHelper()
mark_flaky_tests(ch_helper, check_name, test_results)
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [run_log_path] + additional_logs, check_name_with_group)
report_url = upload_results(
s3_helper,
pr_info.number,
pr_info.sha,
test_results,
[run_log_path] + additional_logs,
check_name_with_group,
)
print(f"::notice ::Report url: {report_url}")
post_commit_status(gh, pr_info.sha, check_name_with_group, description, state, report_url)
print(f"::notice:: {check_name} Report url: {report_url}")
if args.post_commit_status == "commit_status":
post_commit_status(
gh, pr_info.sha, check_name_with_group, description, state, report_url
)
elif args.post_commit_status == "file":
post_commit_status_to_file(
os.path.join(temp_path, "post_commit_status.tsv"),
description,
state,
report_url,
)
else:
raise Exception(
f'Unknown post_commit_status option "{args.post_commit_status}"'
)
prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name_with_group)
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,
test_results,
state,
stopwatch.duration_seconds,
stopwatch.start_time_str,
report_url,
check_name_with_group,
)
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
if state != 'success':
if 'force-tests' in pr_info.labels:
if state != "success":
if "force-tests" in pr_info.labels:
print("'force-tests' enabled, will report success")
else:
sys.exit(1)

View File

@ -2,13 +2,15 @@
import boto3 # type: ignore
from github import Github # type: ignore
def get_parameter_from_ssm(name, decrypt=True, client=None):
if not client:
client = boto3.client('ssm', region_name='us-east-1')
return client.get_parameter(Name=name, WithDecryption=decrypt)['Parameter']['Value']
client = boto3.client("ssm", region_name="us-east-1")
return client.get_parameter(Name=name, WithDecryption=decrypt)["Parameter"]["Value"]
def get_best_robot_token(token_prefix_env_name="github_robot_token_", total_tokens=4):
client = boto3.client('ssm', region_name='us-east-1')
client = boto3.client("ssm", region_name="us-east-1")
tokens = {}
for i in range(1, total_tokens + 1):
token_name = token_prefix_env_name + str(i)

Some files were not shown because too many files have changed in this diff Show More