mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-29 11:02:08 +00:00
Merge master
This commit is contained in:
commit
c2c7f365ce
2
.github/ISSUE_TEMPLATE/10_question.md
vendored
2
.github/ISSUE_TEMPLATE/10_question.md
vendored
@ -7,6 +7,6 @@ assignees: ''
|
||||
|
||||
---
|
||||
|
||||
> Make sure to check documentation https://clickhouse.yandex/docs/en/ first. If the question is concise and probably has a short answer, asking it in Telegram chat https://telegram.me/clickhouse_en is probably the fastest way to find the answer. For more complicated questions, consider asking them on StackOverflow with "clickhouse" tag https://stackoverflow.com/questions/tagged/clickhouse
|
||||
> Make sure to check documentation https://clickhouse.com/docs/en/ first. If the question is concise and probably has a short answer, asking it in Telegram chat https://telegram.me/clickhouse_en is probably the fastest way to find the answer. For more complicated questions, consider asking them on StackOverflow with "clickhouse" tag https://stackoverflow.com/questions/tagged/clickhouse
|
||||
|
||||
> If you still prefer GitHub issues, remove all this text and ask your question here.
|
||||
|
2
.github/ISSUE_TEMPLATE/50_build-issue.md
vendored
2
.github/ISSUE_TEMPLATE/50_build-issue.md
vendored
@ -7,7 +7,7 @@ assignees: ''
|
||||
|
||||
---
|
||||
|
||||
> Make sure that `git diff` result is empty and you've just pulled fresh master. Try cleaning up cmake cache. Just in case, official build instructions are published here: https://clickhouse.yandex/docs/en/development/build/
|
||||
> Make sure that `git diff` result is empty and you've just pulled fresh master. Try cleaning up cmake cache. Just in case, official build instructions are published here: https://clickhouse.com/docs/en/development/build/
|
||||
|
||||
**Operating system**
|
||||
|
||||
|
1
.github/workflows/nightly.yml
vendored
1
.github/workflows/nightly.yml
vendored
@ -7,6 +7,7 @@ env:
|
||||
"on":
|
||||
schedule:
|
||||
- cron: '13 3 * * *'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
DockerHubPushAarch64:
|
||||
|
45
.github/workflows/pull_request.yml
vendored
45
.github/workflows/pull_request.yml
vendored
@ -1733,6 +1733,51 @@ jobs:
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
TestsBugfixCheck:
|
||||
runs-on: [self-hosted, stress-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/tests_bugfix_check
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Tests bugfix validate check (actions)
|
||||
KILL_TIMEOUT=3600
|
||||
REPO_COPY=${{runner.temp}}/tests_bugfix_check/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
- name: Bugfix test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
|
||||
TEMP_PATH="${TEMP_PATH}/integration" \
|
||||
REPORTS_PATH="${REPORTS_PATH}/integration" \
|
||||
python3 integration_test_check.py "Integration tests bugfix validate check" \
|
||||
--validate-bugfix --post-commit-status=file || echo 'ignore exit code'
|
||||
|
||||
TEMP_PATH="${TEMP_PATH}/stateless" \
|
||||
REPORTS_PATH="${REPORTS_PATH}/stateless" \
|
||||
python3 functional_test_check.py "Stateless tests bugfix validate check" "$KILL_TIMEOUT" \
|
||||
--validate-bugfix --post-commit-status=file || echo 'ignore exit code'
|
||||
|
||||
python3 bugfix_validate_check.py "${TEMP_PATH}/stateless/post_commit_status.tsv" "${TEMP_PATH}/integration/post_commit_status.tsv"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
##############################################################################################
|
||||
############################ FUNCTIONAl STATEFUL TESTS #######################################
|
||||
##############################################################################################
|
||||
|
13
CHANGELOG.md
13
CHANGELOG.md
@ -1,4 +1,11 @@
|
||||
### ClickHouse release v22.3-lts, 2022-03-17
|
||||
### Table of Contents
|
||||
**[ClickHouse release v22.3-lts, 2022-03-17](#223)**<br>
|
||||
**[ClickHouse release v22.2, 2022-02-17](#222)**<br>
|
||||
**[ClickHouse release v22.1, 2022-01-18](#221)**<br>
|
||||
**[Changelog for 2021](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/whats-new/changelog/2021.md)**<br>
|
||||
|
||||
|
||||
## <a id="223"></a> ClickHouse release v22.3-lts, 2022-03-17
|
||||
|
||||
#### Backward Incompatible Change
|
||||
|
||||
@ -125,7 +132,7 @@
|
||||
* Fix inconsistency of `max_query_size` limitation in distributed subqueries. [#34078](https://github.com/ClickHouse/ClickHouse/pull/34078) ([Chao Ma](https://github.com/godliness)).
|
||||
|
||||
|
||||
### ClickHouse release v22.2, 2022-02-17
|
||||
### <a id="222"></a> ClickHouse release v22.2, 2022-02-17
|
||||
|
||||
#### Upgrade Notes
|
||||
|
||||
@ -301,7 +308,7 @@
|
||||
* This PR allows using multiple LDAP storages in the same list of user directories. It worked earlier but was broken because LDAP tests are disabled (they are part of the testflows tests). [#33574](https://github.com/ClickHouse/ClickHouse/pull/33574) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
|
||||
|
||||
### ClickHouse release v22.1, 2022-01-18
|
||||
### <a id="221"></a> ClickHouse release v22.1, 2022-01-18
|
||||
|
||||
#### Upgrade Notes
|
||||
|
||||
|
@ -267,7 +267,10 @@ endif ()
|
||||
|
||||
# Allows to build stripped binary in a separate directory
|
||||
if (OBJCOPY_PATH AND READELF_PATH)
|
||||
set(BUILD_STRIPPED_BINARIES_PREFIX "" CACHE STRING "Build stripped binaries with debug info in separate directory")
|
||||
option(INSTALL_STRIPPED_BINARIES "Build stripped binaries with debug info in separate directory" OFF)
|
||||
if (INSTALL_STRIPPED_BINARIES)
|
||||
set(STRIPPED_BINARIES_OUTPUT "stripped" CACHE STRING "A separate directory for stripped information")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd
|
||||
|
@ -4,6 +4,7 @@
|
||||
import sys
|
||||
import json
|
||||
|
||||
|
||||
def parse_block(block=[], options=[]):
|
||||
|
||||
# print('block is here', block)
|
||||
@ -37,11 +38,11 @@ def read_stats_file(options, fname):
|
||||
|
||||
for line in f.readlines():
|
||||
|
||||
if 'SELECT' in line:
|
||||
if "SELECT" in line:
|
||||
if len(block) > 1:
|
||||
result.append(parse_block(block, options))
|
||||
block = [line]
|
||||
elif 'Time:' in line:
|
||||
elif "Time:" in line:
|
||||
block.append(line)
|
||||
|
||||
return result
|
||||
@ -50,7 +51,7 @@ def read_stats_file(options, fname):
|
||||
def compare_stats_files(options, arguments):
|
||||
result = []
|
||||
file_output = []
|
||||
pyplot_colors = ['y', 'b', 'g', 'r']
|
||||
pyplot_colors = ["y", "b", "g", "r"]
|
||||
for fname in arguments[1:]:
|
||||
file_output.append((read_stats_file(options, fname)))
|
||||
if len(file_output[0]) > 0:
|
||||
@ -59,56 +60,82 @@ def compare_stats_files(options, arguments):
|
||||
int_result = []
|
||||
for timing in data_set:
|
||||
int_result.append(float(timing[0])) # y values
|
||||
result.append([[x for x in range(0, len(int_result)) ], int_result,
|
||||
pyplot_colors[idx] + '^' ] )
|
||||
result.append(
|
||||
[
|
||||
[x for x in range(0, len(int_result))],
|
||||
int_result,
|
||||
pyplot_colors[idx] + "^",
|
||||
]
|
||||
)
|
||||
# result.append([x for x in range(1, len(int_result)) ]) #x values
|
||||
# result.append( pyplot_colors[idx] + '^' )
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def parse_args():
|
||||
from optparse import OptionParser
|
||||
parser = OptionParser(usage='usage: %prog [options] [result_file_path]..')
|
||||
parser.add_option("-q", "--show-queries", help="Show statements along with timings", action="store_true", dest="show_queries")
|
||||
parser.add_option("-f", "--show-first-timings", help="Show only first tries timings", action="store_true", dest="show_first_timings")
|
||||
parser.add_option("-c", "--compare-mode", help="Prepare output for pyplot comparing result files.", action="store", dest="compare_mode")
|
||||
|
||||
parser = OptionParser(usage="usage: %prog [options] [result_file_path]..")
|
||||
parser.add_option(
|
||||
"-q",
|
||||
"--show-queries",
|
||||
help="Show statements along with timings",
|
||||
action="store_true",
|
||||
dest="show_queries",
|
||||
)
|
||||
parser.add_option(
|
||||
"-f",
|
||||
"--show-first-timings",
|
||||
help="Show only first tries timings",
|
||||
action="store_true",
|
||||
dest="show_first_timings",
|
||||
)
|
||||
parser.add_option(
|
||||
"-c",
|
||||
"--compare-mode",
|
||||
help="Prepare output for pyplot comparing result files.",
|
||||
action="store",
|
||||
dest="compare_mode",
|
||||
)
|
||||
(options, arguments) = parser.parse_args(sys.argv)
|
||||
if len(arguments) < 2:
|
||||
parser.print_usage()
|
||||
sys.exit(1)
|
||||
return (options, arguments)
|
||||
|
||||
|
||||
def gen_pyplot_code(options, arguments):
|
||||
result = ''
|
||||
result = ""
|
||||
data_sets = compare_stats_files(options, arguments)
|
||||
for idx, data_set in enumerate(data_sets, start=0):
|
||||
x_values, y_values, line_style = data_set
|
||||
result += '\nplt.plot('
|
||||
result += '%s, %s, \'%s\'' % ( x_values, y_values, line_style )
|
||||
result += ', label=\'%s try\')' % idx
|
||||
print('import matplotlib.pyplot as plt')
|
||||
result += "\nplt.plot("
|
||||
result += "%s, %s, '%s'" % (x_values, y_values, line_style)
|
||||
result += ", label='%s try')" % idx
|
||||
print("import matplotlib.pyplot as plt")
|
||||
print(result)
|
||||
print( 'plt.xlabel(\'Try number\')' )
|
||||
print( 'plt.ylabel(\'Timing\')' )
|
||||
print( 'plt.title(\'Benchmark query timings\')' )
|
||||
print('plt.legend()')
|
||||
print('plt.show()')
|
||||
print("plt.xlabel('Try number')")
|
||||
print("plt.ylabel('Timing')")
|
||||
print("plt.title('Benchmark query timings')")
|
||||
print("plt.legend()")
|
||||
print("plt.show()")
|
||||
|
||||
|
||||
def gen_html_json(options, arguments):
|
||||
tuples = read_stats_file(options, arguments[1])
|
||||
print('{')
|
||||
print("{")
|
||||
print('"system: GreenPlum(x2),')
|
||||
print(('"version": "%s",' % '4.3.9.1'))
|
||||
print(('"version": "%s",' % "4.3.9.1"))
|
||||
print('"data_size": 10000000,')
|
||||
print('"time": "",')
|
||||
print('"comments": "",')
|
||||
print('"result":')
|
||||
print('[')
|
||||
print("[")
|
||||
for s in tuples:
|
||||
print(s)
|
||||
print(']')
|
||||
print('}')
|
||||
print("]")
|
||||
print("}")
|
||||
|
||||
|
||||
def main():
|
||||
@ -118,5 +145,6 @@ def main():
|
||||
else:
|
||||
gen_html_json(options, arguments)
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
@ -1,15 +1,14 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
BINARY_PATH=$1
|
||||
BINARY_NAME=$(basename $BINARY_PATH)
|
||||
BINARY_NAME=$(basename "$BINARY_PATH")
|
||||
DESTINATION_STRIPPED_DIR=$2
|
||||
OBJCOPY_PATH=${3:objcopy}
|
||||
READELF_PATH=${4:readelf}
|
||||
|
||||
BUILD_ID=$($READELF_PATH -n $1 | sed -n '/Build ID/ { s/.*: //p; q; }')
|
||||
BUILD_ID=$($READELF_PATH -n "$1" | sed -n '/Build ID/ { s/.*: //p; q; }')
|
||||
BUILD_ID_PREFIX=${BUILD_ID:0:2}
|
||||
BUILD_ID_SUFFIX=${BUILD_ID:2}
|
||||
TEMP_BINARY_PATH="${BINARY_PATH}_temp"
|
||||
|
||||
DESTINATION_DEBUG_INFO_DIR="$DESTINATION_STRIPPED_DIR/lib/debug/.build-id"
|
||||
DESTINATION_STRIP_BINARY_DIR="$DESTINATION_STRIPPED_DIR/bin"
|
||||
@ -17,9 +16,13 @@ DESTINATION_STRIP_BINARY_DIR="$DESTINATION_STRIPPED_DIR/bin"
|
||||
mkdir -p "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX"
|
||||
mkdir -p "$DESTINATION_STRIP_BINARY_DIR"
|
||||
|
||||
$OBJCOPY_PATH --only-keep-debug "$BINARY_PATH" "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
|
||||
|
||||
touch "$TEMP_BINARY_PATH"
|
||||
$OBJCOPY_PATH --add-gnu-debuglink "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" "$BINARY_PATH" "$TEMP_BINARY_PATH"
|
||||
$OBJCOPY_PATH --strip-all "$TEMP_BINARY_PATH" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"
|
||||
rm -f "$TEMP_BINARY_PATH"
|
||||
cp "$BINARY_PATH" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"
|
||||
|
||||
$OBJCOPY_PATH --only-keep-debug --compress-debug-sections "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME" "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
|
||||
chmod 0644 "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
|
||||
chown 0:0 "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
|
||||
|
||||
strip --remove-section=.comment --remove-section=.note "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"
|
||||
|
||||
$OBJCOPY_PATH --add-gnu-debuglink "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"
|
||||
|
@ -95,6 +95,14 @@ RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \
|
||||
&& apt-get install gcc-11 g++-11 --yes \
|
||||
&& apt-get clean
|
||||
|
||||
# Architecture of the image when BuildKit/buildx is used
|
||||
ARG TARGETARCH
|
||||
ARG NFPM_VERSION=2.15.0
|
||||
|
||||
RUN arch=${TARGETARCH:-amd64} \
|
||||
&& curl -Lo /tmp/nfpm.deb "https://github.com/goreleaser/nfpm/releases/download/v${NFPM_VERSION}/nfpm_${arch}.deb" \
|
||||
&& dpkg -i /tmp/nfpm.deb \
|
||||
&& rm /tmp/nfpm.deb
|
||||
|
||||
COPY build.sh /
|
||||
CMD ["bash", "-c", "/build.sh 2>&1 | ts"]
|
||||
CMD ["bash", "-c", "/build.sh 2>&1"]
|
||||
|
@ -1,7 +1,13 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
exec &> >(ts)
|
||||
set -x -e
|
||||
|
||||
cache_status () {
|
||||
ccache --show-config ||:
|
||||
ccache --show-stats ||:
|
||||
}
|
||||
|
||||
mkdir -p build/cmake/toolchain/darwin-x86_64
|
||||
tar xJf MacOSX11.0.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1
|
||||
ln -sf darwin-x86_64 build/cmake/toolchain/darwin-aarch64
|
||||
@ -19,15 +25,23 @@ read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}"
|
||||
env
|
||||
cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" ..
|
||||
|
||||
ccache --show-config ||:
|
||||
ccache --show-stats ||:
|
||||
cache_status
|
||||
# clear cache stats
|
||||
ccache --zero-stats ||:
|
||||
|
||||
# shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
|
||||
# No quotes because I want it to expand to nothing if empty.
|
||||
# shellcheck disable=SC2086
|
||||
ninja $NINJA_FLAGS clickhouse-bundle
|
||||
|
||||
ccache --show-config ||:
|
||||
ccache --show-stats ||:
|
||||
cache_status
|
||||
|
||||
if [ -n "$MAKE_DEB" ]; then
|
||||
rm -rf /build/packages/root
|
||||
# No quotes because I want it to expand to nothing if empty.
|
||||
# shellcheck disable=SC2086
|
||||
DESTDIR=/build/packages/root ninja $NINJA_FLAGS install
|
||||
bash -x /build/packages/build
|
||||
fi
|
||||
|
||||
mv ./programs/clickhouse* /output
|
||||
mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds
|
||||
@ -84,8 +98,7 @@ fi
|
||||
# ../docker/packager/other/fuzzer.sh
|
||||
# fi
|
||||
|
||||
ccache --show-config ||:
|
||||
ccache --show-stats ||:
|
||||
cache_status
|
||||
|
||||
if [ "${CCACHE_DEBUG:-}" == "1" ]
|
||||
then
|
||||
|
@ -8,36 +8,39 @@ import sys
|
||||
|
||||
SCRIPT_PATH = os.path.realpath(__file__)
|
||||
|
||||
IMAGE_MAP = {
|
||||
"deb": "clickhouse/deb-builder",
|
||||
"binary": "clickhouse/binary-builder",
|
||||
}
|
||||
|
||||
def check_image_exists_locally(image_name):
|
||||
try:
|
||||
output = subprocess.check_output("docker images -q {} 2> /dev/null".format(image_name), shell=True)
|
||||
output = subprocess.check_output(
|
||||
f"docker images -q {image_name} 2> /dev/null", shell=True
|
||||
)
|
||||
return output != ""
|
||||
except subprocess.CalledProcessError as ex:
|
||||
except subprocess.CalledProcessError:
|
||||
return False
|
||||
|
||||
|
||||
def pull_image(image_name):
|
||||
try:
|
||||
subprocess.check_call("docker pull {}".format(image_name), shell=True)
|
||||
subprocess.check_call(f"docker pull {image_name}", shell=True)
|
||||
return True
|
||||
except subprocess.CalledProcessError as ex:
|
||||
logging.info("Cannot pull image {}".format(image_name))
|
||||
except subprocess.CalledProcessError:
|
||||
logging.info(f"Cannot pull image {image_name}".format())
|
||||
return False
|
||||
|
||||
|
||||
def build_image(image_name, filepath):
|
||||
context = os.path.dirname(filepath)
|
||||
build_cmd = "docker build --network=host -t {} -f {} {}".format(image_name, filepath, context)
|
||||
logging.info("Will build image with cmd: '{}'".format(build_cmd))
|
||||
build_cmd = f"docker build --network=host -t {image_name} -f {filepath} {context}"
|
||||
logging.info("Will build image with cmd: '%s'", build_cmd)
|
||||
subprocess.check_call(
|
||||
build_cmd,
|
||||
shell=True,
|
||||
)
|
||||
|
||||
def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache_dir, docker_image_version):
|
||||
|
||||
def run_docker_image_with_env(
|
||||
image_name, output, env_variables, ch_root, ccache_dir, docker_image_version
|
||||
):
|
||||
env_part = " -e ".join(env_variables)
|
||||
if env_part:
|
||||
env_part = " -e " + env_part
|
||||
@ -47,28 +50,52 @@ def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache
|
||||
else:
|
||||
interactive = ""
|
||||
|
||||
cmd = "docker run --network=host --rm --volume={output_path}:/output --volume={ch_root}:/build --volume={ccache_dir}:/ccache {env} {interactive} {img_name}".format(
|
||||
output_path=output,
|
||||
ch_root=ch_root,
|
||||
ccache_dir=ccache_dir,
|
||||
env=env_part,
|
||||
img_name=image_name + ":" + docker_image_version,
|
||||
interactive=interactive
|
||||
cmd = (
|
||||
f"docker run --network=host --rm --volume={output}:/output "
|
||||
f"--volume={ch_root}:/build --volume={ccache_dir}:/ccache {env_part} "
|
||||
f"{interactive} {image_name}:{docker_image_version}"
|
||||
)
|
||||
|
||||
logging.info("Will build ClickHouse pkg with cmd: '{}'".format(cmd))
|
||||
logging.info("Will build ClickHouse pkg with cmd: '%s'", cmd)
|
||||
|
||||
subprocess.check_call(cmd, shell=True)
|
||||
|
||||
def parse_env_variables(build_type, compiler, sanitizer, package_type, image_type, cache, distcc_hosts, split_binary, clang_tidy, version, author, official, alien_pkgs, with_coverage, with_binaries):
|
||||
|
||||
def is_release_build(build_type, package_type, sanitizer, split_binary):
|
||||
return (
|
||||
build_type == ""
|
||||
and package_type == "deb"
|
||||
and sanitizer == ""
|
||||
and not split_binary
|
||||
)
|
||||
|
||||
|
||||
def parse_env_variables(
|
||||
build_type,
|
||||
compiler,
|
||||
sanitizer,
|
||||
package_type,
|
||||
image_type,
|
||||
cache,
|
||||
distcc_hosts,
|
||||
split_binary,
|
||||
clang_tidy,
|
||||
version,
|
||||
author,
|
||||
official,
|
||||
additional_pkgs,
|
||||
with_coverage,
|
||||
with_binaries,
|
||||
):
|
||||
DARWIN_SUFFIX = "-darwin"
|
||||
DARWIN_ARM_SUFFIX = "-darwin-aarch64"
|
||||
ARM_SUFFIX = "-aarch64"
|
||||
FREEBSD_SUFFIX = "-freebsd"
|
||||
PPC_SUFFIX = '-ppc64le'
|
||||
PPC_SUFFIX = "-ppc64le"
|
||||
|
||||
result = []
|
||||
cmake_flags = ['$CMAKE_FLAGS']
|
||||
result.append("OUTPUT_DIR=/output")
|
||||
cmake_flags = ["$CMAKE_FLAGS"]
|
||||
|
||||
is_cross_darwin = compiler.endswith(DARWIN_SUFFIX)
|
||||
is_cross_darwin_arm = compiler.endswith(DARWIN_ARM_SUFFIX)
|
||||
@ -79,44 +106,70 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
|
||||
if is_cross_darwin:
|
||||
cc = compiler[: -len(DARWIN_SUFFIX)]
|
||||
cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/x86_64-apple-darwin-ar")
|
||||
cmake_flags.append("-DCMAKE_INSTALL_NAME_TOOL=/cctools/bin/x86_64-apple-darwin-install_name_tool")
|
||||
cmake_flags.append("-DCMAKE_RANLIB:FILEPATH=/cctools/bin/x86_64-apple-darwin-ranlib")
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_INSTALL_NAME_TOOL=/cctools/bin/"
|
||||
"x86_64-apple-darwin-install_name_tool"
|
||||
)
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_RANLIB:FILEPATH=/cctools/bin/x86_64-apple-darwin-ranlib"
|
||||
)
|
||||
cmake_flags.append("-DLINKER_NAME=/cctools/bin/x86_64-apple-darwin-ld")
|
||||
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake")
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake"
|
||||
)
|
||||
elif is_cross_darwin_arm:
|
||||
cc = compiler[: -len(DARWIN_ARM_SUFFIX)]
|
||||
cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/aarch64-apple-darwin-ar")
|
||||
cmake_flags.append("-DCMAKE_INSTALL_NAME_TOOL=/cctools/bin/aarch64-apple-darwin-install_name_tool")
|
||||
cmake_flags.append("-DCMAKE_RANLIB:FILEPATH=/cctools/bin/aarch64-apple-darwin-ranlib")
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_INSTALL_NAME_TOOL=/cctools/bin/"
|
||||
"aarch64-apple-darwin-install_name_tool"
|
||||
)
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_RANLIB:FILEPATH=/cctools/bin/aarch64-apple-darwin-ranlib"
|
||||
)
|
||||
cmake_flags.append("-DLINKER_NAME=/cctools/bin/aarch64-apple-darwin-ld")
|
||||
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-aarch64.cmake")
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-aarch64.cmake"
|
||||
)
|
||||
elif is_cross_arm:
|
||||
cc = compiler[: -len(ARM_SUFFIX)]
|
||||
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake")
|
||||
result.append("DEB_ARCH_FLAG=-aarm64")
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake"
|
||||
)
|
||||
result.append("DEB_ARCH=arm64")
|
||||
elif is_cross_freebsd:
|
||||
cc = compiler[: -len(FREEBSD_SUFFIX)]
|
||||
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/freebsd/toolchain-x86_64.cmake")
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/freebsd/toolchain-x86_64.cmake"
|
||||
)
|
||||
elif is_cross_ppc:
|
||||
cc = compiler[: -len(PPC_SUFFIX)]
|
||||
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake")
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake"
|
||||
)
|
||||
else:
|
||||
cc = compiler
|
||||
result.append("DEB_ARCH_FLAG=-aamd64")
|
||||
result.append("DEB_ARCH=amd64")
|
||||
|
||||
cxx = cc.replace('gcc', 'g++').replace('clang', 'clang++')
|
||||
cxx = cc.replace("gcc", "g++").replace("clang", "clang++")
|
||||
|
||||
if image_type == "deb":
|
||||
result.append("DEB_CC={}".format(cc))
|
||||
result.append("DEB_CXX={}".format(cxx))
|
||||
# For building fuzzers
|
||||
result.append("CC={}".format(cc))
|
||||
result.append("CXX={}".format(cxx))
|
||||
elif image_type == "binary":
|
||||
result.append("CC={}".format(cc))
|
||||
result.append("CXX={}".format(cxx))
|
||||
cmake_flags.append('-DCMAKE_C_COMPILER=`which {}`'.format(cc))
|
||||
cmake_flags.append('-DCMAKE_CXX_COMPILER=`which {}`'.format(cxx))
|
||||
result.append("MAKE_DEB=true")
|
||||
cmake_flags.append("-DENABLE_TESTS=0")
|
||||
cmake_flags.append("-DENABLE_UTILS=0")
|
||||
cmake_flags.append("-DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON")
|
||||
cmake_flags.append("-DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=ON")
|
||||
cmake_flags.append("-DCMAKE_AUTOGEN_VERBOSE=ON")
|
||||
cmake_flags.append("-DCMAKE_INSTALL_PREFIX=/usr")
|
||||
cmake_flags.append("-DCMAKE_INSTALL_SYSCONFDIR=/etc")
|
||||
cmake_flags.append("-DCMAKE_INSTALL_LOCALSTATEDIR=/var")
|
||||
if is_release_build(build_type, package_type, sanitizer, split_binary):
|
||||
cmake_flags.append("-DINSTALL_STRIPPED_BINARIES=ON")
|
||||
|
||||
result.append(f"CC={cc}")
|
||||
result.append(f"CXX={cxx}")
|
||||
cmake_flags.append(f"-DCMAKE_C_COMPILER={cc}")
|
||||
cmake_flags.append(f"-DCMAKE_CXX_COMPILER={cxx}")
|
||||
|
||||
# Create combined output archive for split build and for performance tests.
|
||||
if package_type == "performance":
|
||||
@ -126,12 +179,14 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
|
||||
result.append("COMBINED_OUTPUT=shared_build")
|
||||
|
||||
if sanitizer:
|
||||
result.append("SANITIZER={}".format(sanitizer))
|
||||
result.append(f"SANITIZER={sanitizer}")
|
||||
if build_type:
|
||||
result.append("BUILD_TYPE={}".format(build_type))
|
||||
result.append(f"BUILD_TYPE={build_type.capitalize()}")
|
||||
else:
|
||||
result.append("BUILD_TYPE=None")
|
||||
|
||||
if cache == 'distcc':
|
||||
result.append("CCACHE_PREFIX={}".format(cache))
|
||||
if cache == "distcc":
|
||||
result.append(f"CCACHE_PREFIX={cache}")
|
||||
|
||||
if cache:
|
||||
result.append("CCACHE_DIR=/ccache")
|
||||
@ -142,109 +197,188 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
|
||||
# result.append("CCACHE_UMASK=777")
|
||||
|
||||
if distcc_hosts:
|
||||
hosts_with_params = ["{}/24,lzo".format(host) for host in distcc_hosts] + ["localhost/`nproc`"]
|
||||
result.append('DISTCC_HOSTS="{}"'.format(" ".join(hosts_with_params)))
|
||||
hosts_with_params = [f"{host}/24,lzo" for host in distcc_hosts] + [
|
||||
"localhost/`nproc`"
|
||||
]
|
||||
result.append('DISTCC_HOSTS="' + " ".join(hosts_with_params) + '"')
|
||||
elif cache == "distcc":
|
||||
result.append('DISTCC_HOSTS="{}"'.format("localhost/`nproc`"))
|
||||
result.append('DISTCC_HOSTS="localhost/`nproc`"')
|
||||
|
||||
if alien_pkgs:
|
||||
result.append("ALIEN_PKGS='" + ' '.join(['--' + pkg for pkg in alien_pkgs]) + "'")
|
||||
if additional_pkgs:
|
||||
result.append("MAKE_APK=true")
|
||||
result.append("MAKE_RPM=true")
|
||||
result.append("MAKE_TGZ=true")
|
||||
|
||||
if with_binaries == "programs":
|
||||
result.append('BINARY_OUTPUT=programs')
|
||||
result.append("BINARY_OUTPUT=programs")
|
||||
elif with_binaries == "tests":
|
||||
result.append('ENABLE_TESTS=1')
|
||||
result.append('BINARY_OUTPUT=tests')
|
||||
cmake_flags.append('-DENABLE_TESTS=1')
|
||||
result.append("ENABLE_TESTS=1")
|
||||
result.append("BINARY_OUTPUT=tests")
|
||||
cmake_flags.append("-DENABLE_TESTS=1")
|
||||
|
||||
if split_binary:
|
||||
cmake_flags.append('-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1')
|
||||
cmake_flags.append(
|
||||
"-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 "
|
||||
"-DCLICKHOUSE_SPLIT_BINARY=1"
|
||||
)
|
||||
# We can't always build utils because it requires too much space, but
|
||||
# we have to build them at least in some way in CI. The split build is
|
||||
# probably the least heavy disk-wise.
|
||||
cmake_flags.append('-DENABLE_UTILS=1')
|
||||
cmake_flags.append("-DENABLE_UTILS=1")
|
||||
|
||||
if clang_tidy:
|
||||
cmake_flags.append('-DENABLE_CLANG_TIDY=1')
|
||||
cmake_flags.append('-DENABLE_UTILS=1')
|
||||
cmake_flags.append('-DENABLE_TESTS=1')
|
||||
cmake_flags.append('-DENABLE_EXAMPLES=1')
|
||||
cmake_flags.append("-DENABLE_CLANG_TIDY=1")
|
||||
cmake_flags.append("-DENABLE_UTILS=1")
|
||||
cmake_flags.append("-DENABLE_TESTS=1")
|
||||
cmake_flags.append("-DENABLE_EXAMPLES=1")
|
||||
# Don't stop on first error to find more clang-tidy errors in one run.
|
||||
result.append('NINJA_FLAGS=-k0')
|
||||
result.append("NINJA_FLAGS=-k0")
|
||||
|
||||
if with_coverage:
|
||||
cmake_flags.append('-DWITH_COVERAGE=1')
|
||||
cmake_flags.append("-DWITH_COVERAGE=1")
|
||||
|
||||
if version:
|
||||
result.append("VERSION_STRING='{}'".format(version))
|
||||
result.append(f"VERSION_STRING='{version}'")
|
||||
|
||||
if author:
|
||||
result.append("AUTHOR='{}'".format(author))
|
||||
result.append(f"AUTHOR='{author}'")
|
||||
|
||||
if official:
|
||||
cmake_flags.append('-DYANDEX_OFFICIAL_BUILD=1')
|
||||
cmake_flags.append("-DYANDEX_OFFICIAL_BUILD=1")
|
||||
|
||||
result.append('CMAKE_FLAGS="' + ' '.join(cmake_flags) + '"')
|
||||
result.append('CMAKE_FLAGS="' + " ".join(cmake_flags) + '"')
|
||||
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
parser = argparse.ArgumentParser(description="ClickHouse building script using prebuilt Docker image")
|
||||
# 'performance' creates a combined .tgz with server and configs to be used for performance test.
|
||||
parser.add_argument("--package-type", choices=['deb', 'binary', 'performance'], required=True)
|
||||
parser.add_argument("--clickhouse-repo-path", default=os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir))
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
description="ClickHouse building script using prebuilt Docker image",
|
||||
)
|
||||
# 'performance' creates a combined .tgz with server
|
||||
# and configs to be used for performance test.
|
||||
parser.add_argument(
|
||||
"--package-type",
|
||||
choices=("deb", "binary", "performance"),
|
||||
required=True,
|
||||
help="a build type",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--clickhouse-repo-path",
|
||||
default=os.path.join(
|
||||
os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir
|
||||
),
|
||||
help="ClickHouse git repository",
|
||||
)
|
||||
parser.add_argument("--output-dir", required=True)
|
||||
parser.add_argument("--build-type", choices=("debug", ""), default="")
|
||||
parser.add_argument("--compiler", choices=("clang-11", "clang-11-darwin", "clang-11-darwin-aarch64", "clang-11-aarch64",
|
||||
"clang-12", "clang-12-darwin", "clang-12-darwin-aarch64", "clang-12-aarch64",
|
||||
"clang-13", "clang-13-darwin", "clang-13-darwin-aarch64", "clang-13-aarch64", "clang-13-ppc64le",
|
||||
"clang-11-freebsd", "clang-12-freebsd", "clang-13-freebsd", "gcc-11"), default="clang-13")
|
||||
parser.add_argument("--sanitizer", choices=("address", "thread", "memory", "undefined", ""), default="")
|
||||
parser.add_argument(
|
||||
"--compiler",
|
||||
choices=(
|
||||
"clang-11",
|
||||
"clang-11-darwin",
|
||||
"clang-11-darwin-aarch64",
|
||||
"clang-11-aarch64",
|
||||
"clang-12",
|
||||
"clang-12-darwin",
|
||||
"clang-12-darwin-aarch64",
|
||||
"clang-12-aarch64",
|
||||
"clang-13",
|
||||
"clang-13-darwin",
|
||||
"clang-13-darwin-aarch64",
|
||||
"clang-13-aarch64",
|
||||
"clang-13-ppc64le",
|
||||
"clang-11-freebsd",
|
||||
"clang-12-freebsd",
|
||||
"clang-13-freebsd",
|
||||
"gcc-11",
|
||||
),
|
||||
default="clang-13",
|
||||
help="a compiler to use",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--sanitizer",
|
||||
choices=("address", "thread", "memory", "undefined", ""),
|
||||
default="",
|
||||
)
|
||||
parser.add_argument("--split-binary", action="store_true")
|
||||
parser.add_argument("--clang-tidy", action="store_true")
|
||||
parser.add_argument("--cache", choices=("", "ccache", "distcc"), default="")
|
||||
parser.add_argument("--ccache_dir", default= os.getenv("HOME", "") + '/.ccache')
|
||||
parser.add_argument("--cache", choices=("ccache", "distcc", ""), default="")
|
||||
parser.add_argument(
|
||||
"--ccache_dir",
|
||||
default=os.getenv("HOME", "") + "/.ccache",
|
||||
help="a directory with ccache",
|
||||
)
|
||||
parser.add_argument("--distcc-hosts", nargs="+")
|
||||
parser.add_argument("--force-build-image", action="store_true")
|
||||
parser.add_argument("--version")
|
||||
parser.add_argument("--author", default="clickhouse")
|
||||
parser.add_argument("--author", default="clickhouse", help="a package author")
|
||||
parser.add_argument("--official", action="store_true")
|
||||
parser.add_argument("--alien-pkgs", nargs='+', default=[])
|
||||
parser.add_argument("--additional-pkgs", action="store_true")
|
||||
parser.add_argument("--with-coverage", action="store_true")
|
||||
parser.add_argument("--with-binaries", choices=("programs", "tests", ""), default="")
|
||||
parser.add_argument("--docker-image-version", default="latest")
|
||||
parser.add_argument(
|
||||
"--with-binaries", choices=("programs", "tests", ""), default=""
|
||||
)
|
||||
parser.add_argument(
|
||||
"--docker-image-version", default="latest", help="docker image tag to use"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
if not os.path.isabs(args.output_dir):
|
||||
args.output_dir = os.path.abspath(os.path.join(os.getcwd(), args.output_dir))
|
||||
|
||||
image_type = 'binary' if args.package_type == 'performance' else args.package_type
|
||||
image_name = IMAGE_MAP[image_type]
|
||||
image_type = "binary" if args.package_type == "performance" else args.package_type
|
||||
image_name = "clickhouse/binary-builder"
|
||||
|
||||
if not os.path.isabs(args.clickhouse_repo_path):
|
||||
ch_root = os.path.abspath(os.path.join(os.getcwd(), args.clickhouse_repo_path))
|
||||
else:
|
||||
ch_root = args.clickhouse_repo_path
|
||||
|
||||
if args.alien_pkgs and not image_type == "deb":
|
||||
raise Exception("Can add alien packages only in deb build")
|
||||
if args.additional_pkgs and image_type != "deb":
|
||||
raise Exception("Can build additional packages only in deb build")
|
||||
|
||||
if args.with_binaries != "" and not image_type == "deb":
|
||||
if args.with_binaries != "" and image_type != "deb":
|
||||
raise Exception("Can add additional binaries only in deb build")
|
||||
|
||||
if args.with_binaries != "" and image_type == "deb":
|
||||
logging.info("Should place {} to output".format(args.with_binaries))
|
||||
logging.info("Should place %s to output", args.with_binaries)
|
||||
|
||||
dockerfile = os.path.join(ch_root, "docker/packager", image_type, "Dockerfile")
|
||||
image_with_version = image_name + ":" + args.docker_image_version
|
||||
if image_type != "freebsd" and not check_image_exists_locally(image_name) or args.force_build_image:
|
||||
if (
|
||||
image_type != "freebsd"
|
||||
and not check_image_exists_locally(image_name)
|
||||
or args.force_build_image
|
||||
):
|
||||
if not pull_image(image_with_version) or args.force_build_image:
|
||||
build_image(image_with_version, dockerfile)
|
||||
env_prepared = parse_env_variables(
|
||||
args.build_type, args.compiler, args.sanitizer, args.package_type, image_type,
|
||||
args.cache, args.distcc_hosts, args.split_binary, args.clang_tidy,
|
||||
args.version, args.author, args.official, args.alien_pkgs, args.with_coverage, args.with_binaries)
|
||||
args.build_type,
|
||||
args.compiler,
|
||||
args.sanitizer,
|
||||
args.package_type,
|
||||
image_type,
|
||||
args.cache,
|
||||
args.distcc_hosts,
|
||||
args.split_binary,
|
||||
args.clang_tidy,
|
||||
args.version,
|
||||
args.author,
|
||||
args.official,
|
||||
args.additional_pkgs,
|
||||
args.with_coverage,
|
||||
args.with_binaries,
|
||||
)
|
||||
|
||||
run_docker_image_with_env(image_name, args.output_dir, env_prepared, ch_root, args.ccache_dir, args.docker_image_version)
|
||||
logging.info("Output placed into {}".format(args.output_dir))
|
||||
run_docker_image_with_env(
|
||||
image_name,
|
||||
args.output_dir,
|
||||
env_prepared,
|
||||
ch_root,
|
||||
args.ccache_dir,
|
||||
args.docker_image_version,
|
||||
)
|
||||
logging.info("Output placed into %s", args.output_dir)
|
||||
|
@ -3,55 +3,55 @@ import subprocess
|
||||
import datetime
|
||||
from flask import Flask, flash, request, redirect, url_for
|
||||
|
||||
|
||||
def run_command(command, wait=False):
|
||||
print("{} - execute shell command:{}".format(datetime.datetime.now(), command))
|
||||
lines = []
|
||||
p = subprocess.Popen(command,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
shell=True)
|
||||
p = subprocess.Popen(
|
||||
command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True
|
||||
)
|
||||
if wait:
|
||||
for l in iter(p.stdout.readline, b''):
|
||||
for l in iter(p.stdout.readline, b""):
|
||||
lines.append(l)
|
||||
p.poll()
|
||||
return (lines, p.returncode)
|
||||
else:
|
||||
return(iter(p.stdout.readline, b''), 0)
|
||||
return (iter(p.stdout.readline, b""), 0)
|
||||
|
||||
|
||||
UPLOAD_FOLDER = './'
|
||||
ALLOWED_EXTENSIONS = {'txt', 'sh'}
|
||||
UPLOAD_FOLDER = "./"
|
||||
ALLOWED_EXTENSIONS = {"txt", "sh"}
|
||||
app = Flask(__name__)
|
||||
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
|
||||
app.config["UPLOAD_FOLDER"] = UPLOAD_FOLDER
|
||||
|
||||
@app.route('/')
|
||||
|
||||
@app.route("/")
|
||||
def hello_world():
|
||||
return 'Hello World'
|
||||
return "Hello World"
|
||||
|
||||
|
||||
def allowed_file(filename):
|
||||
return '.' in filename and \
|
||||
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
|
||||
return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
|
||||
|
||||
|
||||
@app.route('/upload', methods=['GET', 'POST'])
|
||||
@app.route("/upload", methods=["GET", "POST"])
|
||||
def upload_file():
|
||||
if request.method == 'POST':
|
||||
if request.method == "POST":
|
||||
# check if the post request has the file part
|
||||
if 'file' not in request.files:
|
||||
flash('No file part')
|
||||
if "file" not in request.files:
|
||||
flash("No file part")
|
||||
return redirect(request.url)
|
||||
file = request.files['file']
|
||||
file = request.files["file"]
|
||||
# If the user does not select a file, the browser submits an
|
||||
# empty file without a filename.
|
||||
if file.filename == '':
|
||||
flash('No selected file')
|
||||
if file.filename == "":
|
||||
flash("No selected file")
|
||||
return redirect(request.url)
|
||||
if file and allowed_file(file.filename):
|
||||
filename = file.filename
|
||||
file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
|
||||
return redirect(url_for('upload_file', name=filename))
|
||||
return '''
|
||||
file.save(os.path.join(app.config["UPLOAD_FOLDER"], filename))
|
||||
return redirect(url_for("upload_file", name=filename))
|
||||
return """
|
||||
<!doctype html>
|
||||
<title>Upload new File</title>
|
||||
<h1>Upload new File</h1>
|
||||
@ -59,12 +59,15 @@ def upload_file():
|
||||
<input type=file name=file>
|
||||
<input type=submit value=Upload>
|
||||
</form>
|
||||
'''
|
||||
@app.route('/run', methods=['GET', 'POST'])
|
||||
"""
|
||||
|
||||
|
||||
@app.route("/run", methods=["GET", "POST"])
|
||||
def parse_request():
|
||||
data = request.data # data is empty
|
||||
run_command(data, wait=True)
|
||||
return 'Ok'
|
||||
return "Ok"
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(port=5011)
|
||||
|
@ -19,58 +19,126 @@ import xml.etree.ElementTree as et
|
||||
from threading import Thread
|
||||
from scipy import stats
|
||||
|
||||
logging.basicConfig(format='%(asctime)s: %(levelname)s: %(module)s: %(message)s', level='WARNING')
|
||||
logging.basicConfig(
|
||||
format="%(asctime)s: %(levelname)s: %(module)s: %(message)s", level="WARNING"
|
||||
)
|
||||
|
||||
total_start_seconds = time.perf_counter()
|
||||
stage_start_seconds = total_start_seconds
|
||||
|
||||
|
||||
def reportStageEnd(stage):
|
||||
global stage_start_seconds, total_start_seconds
|
||||
|
||||
current = time.perf_counter()
|
||||
print(f'stage\t{stage}\t{current - stage_start_seconds:.3f}\t{current - total_start_seconds:.3f}')
|
||||
print(
|
||||
f"stage\t{stage}\t{current - stage_start_seconds:.3f}\t{current - total_start_seconds:.3f}"
|
||||
)
|
||||
stage_start_seconds = current
|
||||
|
||||
|
||||
def tsv_escape(s):
|
||||
return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','')
|
||||
return (
|
||||
s.replace("\\", "\\\\")
|
||||
.replace("\t", "\\t")
|
||||
.replace("\n", "\\n")
|
||||
.replace("\r", "")
|
||||
)
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(description='Run performance test.')
|
||||
parser = argparse.ArgumentParser(description="Run performance test.")
|
||||
# Explicitly decode files as UTF-8 because sometimes we have Russian characters in queries, and LANG=C is set.
|
||||
parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs=1, help='test description file')
|
||||
parser.add_argument('--host', nargs='*', default=['localhost'], help="Space-separated list of server hostname(s). Corresponds to '--port' options.")
|
||||
parser.add_argument('--port', nargs='*', default=[9000], help="Space-separated list of server port(s). Corresponds to '--host' options.")
|
||||
parser.add_argument('--runs', type=int, default=1, help='Number of query runs per server.')
|
||||
parser.add_argument('--max-queries', type=int, default=None, help='Test no more than this number of queries, chosen at random.')
|
||||
parser.add_argument('--queries-to-run', nargs='*', type=int, default=None, help='Space-separated list of indexes of queries to test.')
|
||||
parser.add_argument('--max-query-seconds', type=int, default=15, help='For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.')
|
||||
parser.add_argument('--prewarm-max-query-seconds', type=int, default=180, help='For how many seconds at most a prewarm (cold storage) query is allowed to run. The script finishes with error if this time is exceeded.')
|
||||
parser.add_argument('--profile-seconds', type=int, default=0, help='For how many seconds to profile a query for which the performance has changed.')
|
||||
parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.')
|
||||
parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.')
|
||||
parser.add_argument('--print-settings', action='store_true', help='Print test settings and exit.')
|
||||
parser.add_argument('--keep-created-tables', action='store_true', help="Don't drop the created tables after the test.")
|
||||
parser.add_argument('--use-existing-tables', action='store_true', help="Don't create or drop the tables, use the existing ones instead.")
|
||||
parser.add_argument(
|
||||
"file",
|
||||
metavar="FILE",
|
||||
type=argparse.FileType("r", encoding="utf-8"),
|
||||
nargs=1,
|
||||
help="test description file",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--host",
|
||||
nargs="*",
|
||||
default=["localhost"],
|
||||
help="Space-separated list of server hostname(s). Corresponds to '--port' options.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--port",
|
||||
nargs="*",
|
||||
default=[9000],
|
||||
help="Space-separated list of server port(s). Corresponds to '--host' options.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--runs", type=int, default=1, help="Number of query runs per server."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-queries",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Test no more than this number of queries, chosen at random.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--queries-to-run",
|
||||
nargs="*",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Space-separated list of indexes of queries to test.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-query-seconds",
|
||||
type=int,
|
||||
default=15,
|
||||
help="For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--prewarm-max-query-seconds",
|
||||
type=int,
|
||||
default=180,
|
||||
help="For how many seconds at most a prewarm (cold storage) query is allowed to run. The script finishes with error if this time is exceeded.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--profile-seconds",
|
||||
type=int,
|
||||
default=0,
|
||||
help="For how many seconds to profile a query for which the performance has changed.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--long", action="store_true", help="Do not skip the tests tagged as long."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--print-queries", action="store_true", help="Print test queries and exit."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--print-settings", action="store_true", help="Print test settings and exit."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--keep-created-tables",
|
||||
action="store_true",
|
||||
help="Don't drop the created tables after the test.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--use-existing-tables",
|
||||
action="store_true",
|
||||
help="Don't create or drop the tables, use the existing ones instead.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
reportStageEnd('start')
|
||||
reportStageEnd("start")
|
||||
|
||||
test_name = os.path.splitext(os.path.basename(args.file[0].name))[0]
|
||||
|
||||
tree = et.parse(args.file[0])
|
||||
root = tree.getroot()
|
||||
|
||||
reportStageEnd('parse')
|
||||
reportStageEnd("parse")
|
||||
|
||||
# Process query parameters
|
||||
subst_elems = root.findall('substitutions/substitution')
|
||||
subst_elems = root.findall("substitutions/substitution")
|
||||
available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... }
|
||||
for e in subst_elems:
|
||||
name = e.find('name').text
|
||||
values = [v.text for v in e.findall('values/value')]
|
||||
name = e.find("name").text
|
||||
values = [v.text for v in e.findall("values/value")]
|
||||
if not values:
|
||||
raise Exception(f'No values given for substitution {{{name}}}')
|
||||
raise Exception(f"No values given for substitution {{{name}}}")
|
||||
|
||||
available_parameters[name] = values
|
||||
|
||||
@ -103,17 +171,21 @@ def substitute_parameters(query_templates, other_templates = []):
|
||||
# and reporting the queries marked as short.
|
||||
test_queries = []
|
||||
is_short = []
|
||||
for e in root.findall('query'):
|
||||
new_queries, [new_is_short] = substitute_parameters([e.text], [[e.attrib.get('short', '0')]])
|
||||
for e in root.findall("query"):
|
||||
new_queries, [new_is_short] = substitute_parameters(
|
||||
[e.text], [[e.attrib.get("short", "0")]]
|
||||
)
|
||||
test_queries += new_queries
|
||||
is_short += [eval(s) for s in new_is_short]
|
||||
|
||||
assert(len(test_queries) == len(is_short))
|
||||
assert len(test_queries) == len(is_short)
|
||||
|
||||
# If we're given a list of queries to run, check that it makes sense.
|
||||
for i in args.queries_to_run or []:
|
||||
if i < 0 or i >= len(test_queries):
|
||||
print(f'There is no query no. {i} in this test, only [{0}-{len(test_queries) - 1}] are present')
|
||||
print(
|
||||
f"There is no query no. {i} in this test, only [{0}-{len(test_queries) - 1}] are present"
|
||||
)
|
||||
exit(1)
|
||||
|
||||
# If we're only asked to print the queries, do that and exit.
|
||||
@ -125,60 +197,65 @@ if args.print_queries:
|
||||
# Print short queries
|
||||
for i, s in enumerate(is_short):
|
||||
if s:
|
||||
print(f'short\t{i}')
|
||||
print(f"short\t{i}")
|
||||
|
||||
# If we're only asked to print the settings, do that and exit. These are settings
|
||||
# for clickhouse-benchmark, so we print them as command line arguments, e.g.
|
||||
# '--max_memory_usage=10000000'.
|
||||
if args.print_settings:
|
||||
for s in root.findall('settings/*'):
|
||||
print(f'--{s.tag}={s.text}')
|
||||
for s in root.findall("settings/*"):
|
||||
print(f"--{s.tag}={s.text}")
|
||||
|
||||
exit(0)
|
||||
|
||||
# Skip long tests
|
||||
if not args.long:
|
||||
for tag in root.findall('.//tag'):
|
||||
if tag.text == 'long':
|
||||
print('skipped\tTest is tagged as long.')
|
||||
for tag in root.findall(".//tag"):
|
||||
if tag.text == "long":
|
||||
print("skipped\tTest is tagged as long.")
|
||||
sys.exit(0)
|
||||
|
||||
# Print report threshold for the test if it is set.
|
||||
ignored_relative_change = 0.05
|
||||
if 'max_ignored_relative_change' in root.attrib:
|
||||
if "max_ignored_relative_change" in root.attrib:
|
||||
ignored_relative_change = float(root.attrib["max_ignored_relative_change"])
|
||||
print(f'report-threshold\t{ignored_relative_change}')
|
||||
print(f"report-threshold\t{ignored_relative_change}")
|
||||
|
||||
reportStageEnd('before-connect')
|
||||
reportStageEnd("before-connect")
|
||||
|
||||
# Open connections
|
||||
servers = [{'host': host or args.host[0], 'port': port or args.port[0]} for (host, port) in itertools.zip_longest(args.host, args.port)]
|
||||
servers = [
|
||||
{"host": host or args.host[0], "port": port or args.port[0]}
|
||||
for (host, port) in itertools.zip_longest(args.host, args.port)
|
||||
]
|
||||
# Force settings_is_important to fail queries on unknown settings.
|
||||
all_connections = [clickhouse_driver.Client(**server, settings_is_important=True) for server in servers]
|
||||
all_connections = [
|
||||
clickhouse_driver.Client(**server, settings_is_important=True) for server in servers
|
||||
]
|
||||
|
||||
for i, s in enumerate(servers):
|
||||
print(f'server\t{i}\t{s["host"]}\t{s["port"]}')
|
||||
|
||||
reportStageEnd('connect')
|
||||
reportStageEnd("connect")
|
||||
|
||||
if not args.use_existing_tables:
|
||||
# Run drop queries, ignoring errors. Do this before all other activity,
|
||||
# because clickhouse_driver disconnects on error (this is not configurable),
|
||||
# and the new connection loses the changes in settings.
|
||||
drop_query_templates = [q.text for q in root.findall('drop_query')]
|
||||
drop_query_templates = [q.text for q in root.findall("drop_query")]
|
||||
drop_queries = substitute_parameters(drop_query_templates)
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
for q in drop_queries:
|
||||
try:
|
||||
c.execute(q)
|
||||
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
print(f"drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}")
|
||||
except:
|
||||
pass
|
||||
|
||||
reportStageEnd('drop-1')
|
||||
reportStageEnd("drop-1")
|
||||
|
||||
# Apply settings.
|
||||
settings = root.findall('settings/*')
|
||||
settings = root.findall("settings/*")
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
for s in settings:
|
||||
# requires clickhouse-driver >= 1.1.5 to accept arbitrary new settings
|
||||
@ -189,48 +266,52 @@ for conn_index, c in enumerate(all_connections):
|
||||
# the test, which is wrong.
|
||||
c.execute("select 1")
|
||||
|
||||
reportStageEnd('settings')
|
||||
reportStageEnd("settings")
|
||||
|
||||
# Check tables that should exist. If they don't exist, just skip this test.
|
||||
tables = [e.text for e in root.findall('preconditions/table_exists')]
|
||||
tables = [e.text for e in root.findall("preconditions/table_exists")]
|
||||
for t in tables:
|
||||
for c in all_connections:
|
||||
try:
|
||||
res = c.execute("select 1 from {} limit 1".format(t))
|
||||
except:
|
||||
exception_message = traceback.format_exception_only(*sys.exc_info()[:2])[-1]
|
||||
skipped_message = ' '.join(exception_message.split('\n')[:2])
|
||||
print(f'skipped\t{tsv_escape(skipped_message)}')
|
||||
skipped_message = " ".join(exception_message.split("\n")[:2])
|
||||
print(f"skipped\t{tsv_escape(skipped_message)}")
|
||||
sys.exit(0)
|
||||
|
||||
reportStageEnd('preconditions')
|
||||
reportStageEnd("preconditions")
|
||||
|
||||
if not args.use_existing_tables:
|
||||
# Run create and fill queries. We will run them simultaneously for both
|
||||
# servers, to save time. The weird XML search + filter is because we want to
|
||||
# keep the relative order of elements, and etree doesn't support the
|
||||
# appropriate xpath query.
|
||||
create_query_templates = [q.text for q in root.findall('./*')
|
||||
if q.tag in ('create_query', 'fill_query')]
|
||||
create_query_templates = [
|
||||
q.text for q in root.findall("./*") if q.tag in ("create_query", "fill_query")
|
||||
]
|
||||
create_queries = substitute_parameters(create_query_templates)
|
||||
|
||||
# Disallow temporary tables, because the clickhouse_driver reconnects on
|
||||
# errors, and temporary tables are destroyed. We want to be able to continue
|
||||
# after some errors.
|
||||
for q in create_queries:
|
||||
if re.search('create temporary table', q, flags=re.IGNORECASE):
|
||||
print(f"Temporary tables are not allowed in performance tests: '{q}'",
|
||||
file = sys.stderr)
|
||||
if re.search("create temporary table", q, flags=re.IGNORECASE):
|
||||
print(
|
||||
f"Temporary tables are not allowed in performance tests: '{q}'",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
def do_create(connection, index, queries):
|
||||
for q in queries:
|
||||
connection.execute(q)
|
||||
print(f'create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
print(f"create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}")
|
||||
|
||||
threads = [
|
||||
Thread(target=do_create, args=(connection, index, create_queries))
|
||||
for index, connection in enumerate(all_connections)]
|
||||
for index, connection in enumerate(all_connections)
|
||||
]
|
||||
|
||||
for t in threads:
|
||||
t.start()
|
||||
@ -238,14 +319,16 @@ if not args.use_existing_tables:
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
reportStageEnd('create')
|
||||
reportStageEnd("create")
|
||||
|
||||
# By default, test all queries.
|
||||
queries_to_run = range(0, len(test_queries))
|
||||
|
||||
if args.max_queries:
|
||||
# If specified, test a limited number of queries chosen at random.
|
||||
queries_to_run = random.sample(range(0, len(test_queries)), min(len(test_queries), args.max_queries))
|
||||
queries_to_run = random.sample(
|
||||
range(0, len(test_queries)), min(len(test_queries), args.max_queries)
|
||||
)
|
||||
|
||||
if args.queries_to_run:
|
||||
# Run the specified queries.
|
||||
@ -255,16 +338,16 @@ if args.queries_to_run:
|
||||
profile_total_seconds = 0
|
||||
for query_index in queries_to_run:
|
||||
q = test_queries[query_index]
|
||||
query_prefix = f'{test_name}.query{query_index}'
|
||||
query_prefix = f"{test_name}.query{query_index}"
|
||||
|
||||
# We have some crazy long queries (about 100kB), so trim them to a sane
|
||||
# length. This means we can't use query text as an identifier and have to
|
||||
# use the test name + the test-wide query index.
|
||||
query_display_name = q
|
||||
if len(query_display_name) > 1000:
|
||||
query_display_name = f'{query_display_name[:1000]}...({query_index})'
|
||||
query_display_name = f"{query_display_name[:1000]}...({query_index})"
|
||||
|
||||
print(f'display-name\t{query_index}\t{tsv_escape(query_display_name)}')
|
||||
print(f"display-name\t{query_index}\t{tsv_escape(query_display_name)}")
|
||||
|
||||
# Prewarm: run once on both servers. Helps to bring the data into memory,
|
||||
# precompile the queries, etc.
|
||||
@ -272,10 +355,10 @@ for query_index in queries_to_run:
|
||||
# new one. We want to run them on the new server only, so that the PR author
|
||||
# can ensure that the test works properly. Remember the errors we had on
|
||||
# each server.
|
||||
query_error_on_connection = [None] * len(all_connections);
|
||||
query_error_on_connection = [None] * len(all_connections)
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
try:
|
||||
prewarm_id = f'{query_prefix}.prewarm0'
|
||||
prewarm_id = f"{query_prefix}.prewarm0"
|
||||
|
||||
try:
|
||||
# During the warmup runs, we will also:
|
||||
@ -283,25 +366,30 @@ for query_index in queries_to_run:
|
||||
# * collect profiler traces, which might be helpful for analyzing
|
||||
# test coverage. We disable profiler for normal runs because
|
||||
# it makes the results unstable.
|
||||
res = c.execute(q, query_id = prewarm_id,
|
||||
res = c.execute(
|
||||
q,
|
||||
query_id=prewarm_id,
|
||||
settings={
|
||||
'max_execution_time': args.prewarm_max_query_seconds,
|
||||
'query_profiler_real_time_period_ns': 10000000,
|
||||
'memory_profiler_step': '4Mi',
|
||||
})
|
||||
"max_execution_time": args.prewarm_max_query_seconds,
|
||||
"query_profiler_real_time_period_ns": 10000000,
|
||||
"memory_profiler_step": "4Mi",
|
||||
},
|
||||
)
|
||||
except clickhouse_driver.errors.Error as e:
|
||||
# Add query id to the exception to make debugging easier.
|
||||
e.args = (prewarm_id, *e.args)
|
||||
e.message = prewarm_id + ': ' + e.message
|
||||
e.message = prewarm_id + ": " + e.message
|
||||
raise
|
||||
|
||||
print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}')
|
||||
print(
|
||||
f"prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}"
|
||||
)
|
||||
except KeyboardInterrupt:
|
||||
raise
|
||||
except:
|
||||
# FIXME the driver reconnects on error and we lose settings, so this
|
||||
# might lead to further errors or unexpected behavior.
|
||||
query_error_on_connection[conn_index] = traceback.format_exc();
|
||||
query_error_on_connection[conn_index] = traceback.format_exc()
|
||||
continue
|
||||
|
||||
# Report all errors that ocurred during prewarm and decide what to do next.
|
||||
@ -318,7 +406,7 @@ for query_index in queries_to_run:
|
||||
if len(no_errors) == 0:
|
||||
continue
|
||||
elif len(no_errors) < len(all_connections):
|
||||
print(f'partial\t{query_index}\t{no_errors}')
|
||||
print(f"partial\t{query_index}\t{no_errors}")
|
||||
|
||||
this_query_connections = [all_connections[index] for index in no_errors]
|
||||
|
||||
@ -337,27 +425,34 @@ for query_index in queries_to_run:
|
||||
all_server_times.append([])
|
||||
|
||||
while True:
|
||||
run_id = f'{query_prefix}.run{run}'
|
||||
run_id = f"{query_prefix}.run{run}"
|
||||
|
||||
for conn_index, c in enumerate(this_query_connections):
|
||||
try:
|
||||
res = c.execute(q, query_id = run_id, settings = {'max_execution_time': args.max_query_seconds})
|
||||
res = c.execute(
|
||||
q,
|
||||
query_id=run_id,
|
||||
settings={"max_execution_time": args.max_query_seconds},
|
||||
)
|
||||
except clickhouse_driver.errors.Error as e:
|
||||
# Add query id to the exception to make debugging easier.
|
||||
e.args = (run_id, *e.args)
|
||||
e.message = run_id + ': ' + e.message
|
||||
e.message = run_id + ": " + e.message
|
||||
raise
|
||||
|
||||
elapsed = c.last_query.elapsed
|
||||
all_server_times[conn_index].append(elapsed)
|
||||
|
||||
server_seconds += elapsed
|
||||
print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}')
|
||||
print(f"query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}")
|
||||
|
||||
if elapsed > args.max_query_seconds:
|
||||
# Do not stop processing pathologically slow queries,
|
||||
# since this may hide errors in other queries.
|
||||
print(f'The query no. {query_index} is taking too long to run ({elapsed} s)', file=sys.stderr)
|
||||
print(
|
||||
f"The query no. {query_index} is taking too long to run ({elapsed} s)",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
# Be careful with the counter, after this line it's the next iteration
|
||||
# already.
|
||||
@ -386,7 +481,7 @@ for query_index in queries_to_run:
|
||||
break
|
||||
|
||||
client_seconds = time.perf_counter() - start_seconds
|
||||
print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}')
|
||||
print(f"client-time\t{query_index}\t{client_seconds}\t{server_seconds}")
|
||||
|
||||
# Run additional profiling queries to collect profile data, but only if test times appeared to be different.
|
||||
# We have to do it after normal runs because otherwise it will affect test statistics too much
|
||||
@ -397,13 +492,15 @@ for query_index in queries_to_run:
|
||||
# Don't fail if for some reason there are not enough measurements.
|
||||
continue
|
||||
|
||||
pvalue = stats.ttest_ind(all_server_times[0], all_server_times[1], equal_var = False).pvalue
|
||||
pvalue = stats.ttest_ind(
|
||||
all_server_times[0], all_server_times[1], equal_var=False
|
||||
).pvalue
|
||||
median = [statistics.median(t) for t in all_server_times]
|
||||
# Keep this consistent with the value used in report. Should eventually move
|
||||
# to (median[1] - median[0]) / min(median), which is compatible with "times"
|
||||
# difference we use in report (max(median) / min(median)).
|
||||
relative_diff = (median[1] - median[0]) / median[0]
|
||||
print(f'diff\t{query_index}\t{median[0]}\t{median[1]}\t{relative_diff}\t{pvalue}')
|
||||
print(f"diff\t{query_index}\t{median[0]}\t{median[1]}\t{relative_diff}\t{pvalue}")
|
||||
if abs(relative_diff) < ignored_relative_change or pvalue > 0.05:
|
||||
continue
|
||||
|
||||
@ -412,25 +509,31 @@ for query_index in queries_to_run:
|
||||
profile_start_seconds = time.perf_counter()
|
||||
run = 0
|
||||
while time.perf_counter() - profile_start_seconds < args.profile_seconds:
|
||||
run_id = f'{query_prefix}.profile{run}'
|
||||
run_id = f"{query_prefix}.profile{run}"
|
||||
|
||||
for conn_index, c in enumerate(this_query_connections):
|
||||
try:
|
||||
res = c.execute(q, query_id = run_id, settings = {'query_profiler_real_time_period_ns': 10000000})
|
||||
print(f'profile\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}')
|
||||
res = c.execute(
|
||||
q,
|
||||
query_id=run_id,
|
||||
settings={"query_profiler_real_time_period_ns": 10000000},
|
||||
)
|
||||
print(
|
||||
f"profile\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}"
|
||||
)
|
||||
except clickhouse_driver.errors.Error as e:
|
||||
# Add query id to the exception to make debugging easier.
|
||||
e.args = (run_id, *e.args)
|
||||
e.message = run_id + ': ' + e.message
|
||||
e.message = run_id + ": " + e.message
|
||||
raise
|
||||
|
||||
run += 1
|
||||
|
||||
profile_total_seconds += time.perf_counter() - profile_start_seconds
|
||||
|
||||
print(f'profile-total\t{profile_total_seconds}')
|
||||
print(f"profile-total\t{profile_total_seconds}")
|
||||
|
||||
reportStageEnd('run')
|
||||
reportStageEnd("run")
|
||||
|
||||
# Run drop queries
|
||||
if not args.keep_created_tables and not args.use_existing_tables:
|
||||
@ -438,6 +541,6 @@ if not args.keep_created_tables and not args.use_existing_tables:
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
for q in drop_queries:
|
||||
c.execute(q)
|
||||
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
print(f"drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}")
|
||||
|
||||
reportStageEnd('drop-2')
|
||||
reportStageEnd("drop-2")
|
||||
|
@ -12,9 +12,13 @@ import pprint
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
parser = argparse.ArgumentParser(description='Create performance test report')
|
||||
parser.add_argument('--report', default='main', choices=['main', 'all-queries'],
|
||||
help='Which report to build')
|
||||
parser = argparse.ArgumentParser(description="Create performance test report")
|
||||
parser.add_argument(
|
||||
"--report",
|
||||
default="main",
|
||||
choices=["main", "all-queries"],
|
||||
help="Which report to build",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
tables = []
|
||||
@ -31,8 +35,8 @@ unstable_partial_queries = 0
|
||||
# max seconds to run one query by itself, not counting preparation
|
||||
allowed_single_run_time = 2
|
||||
|
||||
color_bad='#ffb0c0'
|
||||
color_good='#b0d050'
|
||||
color_bad = "#ffb0c0"
|
||||
color_good = "#b0d050"
|
||||
|
||||
header_template = """
|
||||
<!DOCTYPE html>
|
||||
@ -151,24 +155,29 @@ tr:nth-child(odd) td {{filter: brightness(90%);}}
|
||||
table_anchor = 0
|
||||
row_anchor = 0
|
||||
|
||||
|
||||
def currentTableAnchor():
|
||||
global table_anchor
|
||||
return f'{table_anchor}'
|
||||
return f"{table_anchor}"
|
||||
|
||||
|
||||
def newTableAnchor():
|
||||
global table_anchor
|
||||
table_anchor += 1
|
||||
return currentTableAnchor()
|
||||
|
||||
|
||||
def currentRowAnchor():
|
||||
global row_anchor
|
||||
global table_anchor
|
||||
return f'{table_anchor}.{row_anchor}'
|
||||
return f"{table_anchor}.{row_anchor}"
|
||||
|
||||
|
||||
def nextRowAnchor():
|
||||
global row_anchor
|
||||
global table_anchor
|
||||
return f'{table_anchor}.{row_anchor + 1}'
|
||||
return f"{table_anchor}.{row_anchor + 1}"
|
||||
|
||||
|
||||
def advanceRowAnchor():
|
||||
global row_anchor
|
||||
@ -180,41 +189,56 @@ def advanceRowAnchor():
|
||||
def tr(x, anchor=None):
|
||||
# return '<tr onclick="location.href=\'#{a}\'" id={a}>{x}</tr>'.format(a=a, x=str(x))
|
||||
anchor = anchor if anchor else advanceRowAnchor()
|
||||
return f'<tr id={anchor}>{x}</tr>'
|
||||
return f"<tr id={anchor}>{x}</tr>"
|
||||
|
||||
def td(value, cell_attributes = ''):
|
||||
return '<td {cell_attributes}>{value}</td>'.format(
|
||||
cell_attributes = cell_attributes,
|
||||
value = value)
|
||||
|
||||
def th(value, cell_attributes = ''):
|
||||
return '<th {cell_attributes}>{value}</th>'.format(
|
||||
cell_attributes = cell_attributes,
|
||||
value = value)
|
||||
def td(value, cell_attributes=""):
|
||||
return "<td {cell_attributes}>{value}</td>".format(
|
||||
cell_attributes=cell_attributes, value=value
|
||||
)
|
||||
|
||||
|
||||
def th(value, cell_attributes=""):
|
||||
return "<th {cell_attributes}>{value}</th>".format(
|
||||
cell_attributes=cell_attributes, value=value
|
||||
)
|
||||
|
||||
|
||||
def tableRow(cell_values, cell_attributes=[], anchor=None):
|
||||
return tr(
|
||||
''.join([td(v, a)
|
||||
"".join(
|
||||
[
|
||||
td(v, a)
|
||||
for v, a in itertools.zip_longest(
|
||||
cell_values, cell_attributes,
|
||||
fillvalue = '')
|
||||
if a is not None and v is not None]),
|
||||
anchor)
|
||||
cell_values, cell_attributes, fillvalue=""
|
||||
)
|
||||
if a is not None and v is not None
|
||||
]
|
||||
),
|
||||
anchor,
|
||||
)
|
||||
|
||||
|
||||
def tableHeader(cell_values, cell_attributes=[]):
|
||||
return tr(
|
||||
''.join([th(v, a)
|
||||
"".join(
|
||||
[
|
||||
th(v, a)
|
||||
for v, a in itertools.zip_longest(
|
||||
cell_values, cell_attributes,
|
||||
fillvalue = '')
|
||||
if a is not None and v is not None]))
|
||||
cell_values, cell_attributes, fillvalue=""
|
||||
)
|
||||
if a is not None and v is not None
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def tableStart(title):
|
||||
cls = '-'.join(title.lower().split(' ')[:3]);
|
||||
cls = "-".join(title.lower().split(" ")[:3])
|
||||
global table_anchor
|
||||
table_anchor = cls
|
||||
anchor = currentTableAnchor()
|
||||
help_anchor = '-'.join(title.lower().split(' '));
|
||||
help_anchor = "-".join(title.lower().split(" "))
|
||||
return f"""
|
||||
<h2 id="{anchor}">
|
||||
<a class="cancela" href="#{anchor}">{title}</a>
|
||||
@ -223,12 +247,14 @@ def tableStart(title):
|
||||
<table class="{cls}">
|
||||
"""
|
||||
|
||||
|
||||
def tableEnd():
|
||||
return '</table>'
|
||||
return "</table>"
|
||||
|
||||
|
||||
def tsvRows(n):
|
||||
try:
|
||||
with open(n, encoding='utf-8') as fd:
|
||||
with open(n, encoding="utf-8") as fd:
|
||||
result = []
|
||||
for row in csv.reader(fd, delimiter="\t", quoting=csv.QUOTE_NONE):
|
||||
new_row = []
|
||||
@ -237,27 +263,32 @@ def tsvRows(n):
|
||||
# The second one (encode('latin1').decode('utf-8')) fixes the changes with unicode vs utf-8 chars, so
|
||||
# 'Чем зÐ<C2B7>нимаеÑ<C2B5>ЬÑ<C2AC>Ñ<EFBFBD>' is transformed back into 'Чем зАнимаешЬся'.
|
||||
|
||||
new_row.append(e.encode('utf-8').decode('unicode-escape').encode('latin1').decode('utf-8'))
|
||||
new_row.append(
|
||||
e.encode("utf-8")
|
||||
.decode("unicode-escape")
|
||||
.encode("latin1")
|
||||
.decode("utf-8")
|
||||
)
|
||||
result.append(new_row)
|
||||
return result
|
||||
|
||||
except:
|
||||
report_errors.append(
|
||||
traceback.format_exception_only(
|
||||
*sys.exc_info()[:2])[-1])
|
||||
report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1])
|
||||
pass
|
||||
return []
|
||||
|
||||
|
||||
def htmlRows(n):
|
||||
rawRows = tsvRows(n)
|
||||
result = ''
|
||||
result = ""
|
||||
for row in rawRows:
|
||||
result += tableRow(row)
|
||||
return result
|
||||
|
||||
|
||||
def addSimpleTable(caption, columns, rows, pos=None):
|
||||
global tables
|
||||
text = ''
|
||||
text = ""
|
||||
if not rows:
|
||||
return
|
||||
|
||||
@ -268,51 +299,63 @@ def addSimpleTable(caption, columns, rows, pos=None):
|
||||
text += tableEnd()
|
||||
tables.insert(pos if pos else len(tables), text)
|
||||
|
||||
|
||||
def add_tested_commits():
|
||||
global report_errors
|
||||
try:
|
||||
addSimpleTable('Tested Commits', ['Old', 'New'],
|
||||
[['<pre>{}</pre>'.format(x) for x in
|
||||
[open('left-commit.txt').read(),
|
||||
open('right-commit.txt').read()]]])
|
||||
addSimpleTable(
|
||||
"Tested Commits",
|
||||
["Old", "New"],
|
||||
[
|
||||
[
|
||||
"<pre>{}</pre>".format(x)
|
||||
for x in [
|
||||
open("left-commit.txt").read(),
|
||||
open("right-commit.txt").read(),
|
||||
]
|
||||
]
|
||||
],
|
||||
)
|
||||
except:
|
||||
# Don't fail if no commit info -- maybe it's a manual run.
|
||||
report_errors.append(
|
||||
traceback.format_exception_only(
|
||||
*sys.exc_info()[:2])[-1])
|
||||
report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1])
|
||||
pass
|
||||
|
||||
|
||||
def add_report_errors():
|
||||
global tables
|
||||
global report_errors
|
||||
# Add the errors reported by various steps of comparison script
|
||||
try:
|
||||
report_errors += [l.strip() for l in open('report/errors.log')]
|
||||
report_errors += [l.strip() for l in open("report/errors.log")]
|
||||
except:
|
||||
report_errors.append(
|
||||
traceback.format_exception_only(
|
||||
*sys.exc_info()[:2])[-1])
|
||||
report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1])
|
||||
pass
|
||||
|
||||
if not report_errors:
|
||||
return
|
||||
|
||||
text = tableStart('Errors while Building the Report')
|
||||
text += tableHeader(['Error'])
|
||||
text = tableStart("Errors while Building the Report")
|
||||
text += tableHeader(["Error"])
|
||||
for x in report_errors:
|
||||
text += tableRow([x])
|
||||
text += tableEnd()
|
||||
# Insert after Tested Commits
|
||||
tables.insert(1, text)
|
||||
errors_explained.append([f'<a href="#{currentTableAnchor()}">There were some errors while building the report</a>']);
|
||||
errors_explained.append(
|
||||
[
|
||||
f'<a href="#{currentTableAnchor()}">There were some errors while building the report</a>'
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def add_errors_explained():
|
||||
if not errors_explained:
|
||||
return
|
||||
|
||||
text = '<a name="fail1"/>'
|
||||
text += tableStart('Error Summary')
|
||||
text += tableHeader(['Description'])
|
||||
text += tableStart("Error Summary")
|
||||
text += tableHeader(["Description"])
|
||||
for row in errors_explained:
|
||||
text += tableRow(row)
|
||||
text += tableEnd()
|
||||
@ -321,59 +364,81 @@ def add_errors_explained():
|
||||
tables.insert(1, text)
|
||||
|
||||
|
||||
if args.report == 'main':
|
||||
if args.report == "main":
|
||||
print((header_template.format()))
|
||||
|
||||
add_tested_commits()
|
||||
|
||||
|
||||
run_error_rows = tsvRows('run-errors.tsv')
|
||||
run_error_rows = tsvRows("run-errors.tsv")
|
||||
error_tests += len(run_error_rows)
|
||||
addSimpleTable('Run Errors', ['Test', 'Error'], run_error_rows)
|
||||
addSimpleTable("Run Errors", ["Test", "Error"], run_error_rows)
|
||||
if run_error_rows:
|
||||
errors_explained.append([f'<a href="#{currentTableAnchor()}">There were some errors while running the tests</a>']);
|
||||
errors_explained.append(
|
||||
[
|
||||
f'<a href="#{currentTableAnchor()}">There were some errors while running the tests</a>'
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
slow_on_client_rows = tsvRows('report/slow-on-client.tsv')
|
||||
slow_on_client_rows = tsvRows("report/slow-on-client.tsv")
|
||||
error_tests += len(slow_on_client_rows)
|
||||
addSimpleTable('Slow on Client',
|
||||
['Client time, s', 'Server time, s', 'Ratio', 'Test', 'Query'],
|
||||
slow_on_client_rows)
|
||||
addSimpleTable(
|
||||
"Slow on Client",
|
||||
["Client time, s", "Server time, s", "Ratio", "Test", "Query"],
|
||||
slow_on_client_rows,
|
||||
)
|
||||
if slow_on_client_rows:
|
||||
errors_explained.append([f'<a href="#{currentTableAnchor()}">Some queries are taking noticeable time client-side (missing `FORMAT Null`?)</a>']);
|
||||
errors_explained.append(
|
||||
[
|
||||
f'<a href="#{currentTableAnchor()}">Some queries are taking noticeable time client-side (missing `FORMAT Null`?)</a>'
|
||||
]
|
||||
)
|
||||
|
||||
unmarked_short_rows = tsvRows('report/unexpected-query-duration.tsv')
|
||||
unmarked_short_rows = tsvRows("report/unexpected-query-duration.tsv")
|
||||
error_tests += len(unmarked_short_rows)
|
||||
addSimpleTable('Unexpected Query Duration',
|
||||
['Problem', 'Marked as "short"?', 'Run time, s', 'Test', '#', 'Query'],
|
||||
unmarked_short_rows)
|
||||
addSimpleTable(
|
||||
"Unexpected Query Duration",
|
||||
["Problem", 'Marked as "short"?', "Run time, s", "Test", "#", "Query"],
|
||||
unmarked_short_rows,
|
||||
)
|
||||
if unmarked_short_rows:
|
||||
errors_explained.append([f'<a href="#{currentTableAnchor()}">Some queries have unexpected duration</a>']);
|
||||
errors_explained.append(
|
||||
[
|
||||
f'<a href="#{currentTableAnchor()}">Some queries have unexpected duration</a>'
|
||||
]
|
||||
)
|
||||
|
||||
def add_partial():
|
||||
rows = tsvRows('report/partial-queries-report.tsv')
|
||||
rows = tsvRows("report/partial-queries-report.tsv")
|
||||
if not rows:
|
||||
return
|
||||
|
||||
global unstable_partial_queries, slow_average_tests, tables
|
||||
text = tableStart('Partial Queries')
|
||||
columns = ['Median time, s', 'Relative time variance', 'Test', '#', 'Query']
|
||||
text = tableStart("Partial Queries")
|
||||
columns = ["Median time, s", "Relative time variance", "Test", "#", "Query"]
|
||||
text += tableHeader(columns)
|
||||
attrs = ['' for c in columns]
|
||||
attrs = ["" for c in columns]
|
||||
for row in rows:
|
||||
anchor = f'{currentTableAnchor()}.{row[2]}.{row[3]}'
|
||||
anchor = f"{currentTableAnchor()}.{row[2]}.{row[3]}"
|
||||
if float(row[1]) > 0.10:
|
||||
attrs[1] = f'style="background: {color_bad}"'
|
||||
unstable_partial_queries += 1
|
||||
errors_explained.append([f'<a href="#{anchor}">The query no. {row[3]} of test \'{row[2]}\' has excessive variance of run time. Keep it below 10%</a>'])
|
||||
errors_explained.append(
|
||||
[
|
||||
f"<a href=\"#{anchor}\">The query no. {row[3]} of test '{row[2]}' has excessive variance of run time. Keep it below 10%</a>"
|
||||
]
|
||||
)
|
||||
else:
|
||||
attrs[1] = ''
|
||||
attrs[1] = ""
|
||||
if float(row[0]) > allowed_single_run_time:
|
||||
attrs[0] = f'style="background: {color_bad}"'
|
||||
errors_explained.append([f'<a href="#{anchor}">The query no. {row[3]} of test \'{row[2]}\' is taking too long to run. Keep the run time below {allowed_single_run_time} seconds"</a>'])
|
||||
errors_explained.append(
|
||||
[
|
||||
f'<a href="#{anchor}">The query no. {row[3]} of test \'{row[2]}\' is taking too long to run. Keep the run time below {allowed_single_run_time} seconds"</a>'
|
||||
]
|
||||
)
|
||||
slow_average_tests += 1
|
||||
else:
|
||||
attrs[0] = ''
|
||||
attrs[0] = ""
|
||||
text += tableRow(row, attrs, anchor)
|
||||
text += tableEnd()
|
||||
tables.append(text)
|
||||
@ -381,41 +446,45 @@ if args.report == 'main':
|
||||
add_partial()
|
||||
|
||||
def add_changes():
|
||||
rows = tsvRows('report/changed-perf.tsv')
|
||||
rows = tsvRows("report/changed-perf.tsv")
|
||||
if not rows:
|
||||
return
|
||||
|
||||
global faster_queries, slower_queries, tables
|
||||
|
||||
text = tableStart('Changes in Performance')
|
||||
text = tableStart("Changes in Performance")
|
||||
columns = [
|
||||
'Old, s', # 0
|
||||
'New, s', # 1
|
||||
'Ratio of speedup (-) or slowdown (+)', # 2
|
||||
'Relative difference (new − old) / old', # 3
|
||||
'p < 0.01 threshold', # 4
|
||||
'', # Failed # 5
|
||||
'Test', # 6
|
||||
'#', # 7
|
||||
'Query', # 8
|
||||
"Old, s", # 0
|
||||
"New, s", # 1
|
||||
"Ratio of speedup (-) or slowdown (+)", # 2
|
||||
"Relative difference (new − old) / old", # 3
|
||||
"p < 0.01 threshold", # 4
|
||||
"", # Failed # 5
|
||||
"Test", # 6
|
||||
"#", # 7
|
||||
"Query", # 8
|
||||
]
|
||||
attrs = ['' for c in columns]
|
||||
attrs = ["" for c in columns]
|
||||
attrs[5] = None
|
||||
|
||||
text += tableHeader(columns, attrs)
|
||||
|
||||
for row in rows:
|
||||
anchor = f'{currentTableAnchor()}.{row[6]}.{row[7]}'
|
||||
anchor = f"{currentTableAnchor()}.{row[6]}.{row[7]}"
|
||||
if int(row[5]):
|
||||
if float(row[3]) < 0.:
|
||||
if float(row[3]) < 0.0:
|
||||
faster_queries += 1
|
||||
attrs[2] = attrs[3] = f'style="background: {color_good}"'
|
||||
else:
|
||||
slower_queries += 1
|
||||
attrs[2] = attrs[3] = f'style="background: {color_bad}"'
|
||||
errors_explained.append([f'<a href="#{anchor}">The query no. {row[7]} of test \'{row[6]}\' has slowed down</a>'])
|
||||
errors_explained.append(
|
||||
[
|
||||
f"<a href=\"#{anchor}\">The query no. {row[7]} of test '{row[6]}' has slowed down</a>"
|
||||
]
|
||||
)
|
||||
else:
|
||||
attrs[2] = attrs[3] = ''
|
||||
attrs[2] = attrs[3] = ""
|
||||
|
||||
text += tableRow(row, attrs, anchor)
|
||||
|
||||
@ -427,35 +496,35 @@ if args.report == 'main':
|
||||
def add_unstable_queries():
|
||||
global unstable_queries, very_unstable_queries, tables
|
||||
|
||||
unstable_rows = tsvRows('report/unstable-queries.tsv')
|
||||
unstable_rows = tsvRows("report/unstable-queries.tsv")
|
||||
if not unstable_rows:
|
||||
return
|
||||
|
||||
unstable_queries += len(unstable_rows)
|
||||
|
||||
columns = [
|
||||
'Old, s', #0
|
||||
'New, s', #1
|
||||
'Relative difference (new - old)/old', #2
|
||||
'p < 0.01 threshold', #3
|
||||
'', # Failed #4
|
||||
'Test', #5
|
||||
'#', #6
|
||||
'Query' #7
|
||||
"Old, s", # 0
|
||||
"New, s", # 1
|
||||
"Relative difference (new - old)/old", # 2
|
||||
"p < 0.01 threshold", # 3
|
||||
"", # Failed #4
|
||||
"Test", # 5
|
||||
"#", # 6
|
||||
"Query", # 7
|
||||
]
|
||||
attrs = ['' for c in columns]
|
||||
attrs = ["" for c in columns]
|
||||
attrs[4] = None
|
||||
|
||||
text = tableStart('Unstable Queries')
|
||||
text = tableStart("Unstable Queries")
|
||||
text += tableHeader(columns, attrs)
|
||||
|
||||
for r in unstable_rows:
|
||||
anchor = f'{currentTableAnchor()}.{r[5]}.{r[6]}'
|
||||
anchor = f"{currentTableAnchor()}.{r[5]}.{r[6]}"
|
||||
if int(r[4]):
|
||||
very_unstable_queries += 1
|
||||
attrs[3] = f'style="background: {color_bad}"'
|
||||
else:
|
||||
attrs[3] = ''
|
||||
attrs[3] = ""
|
||||
# Just don't add the slightly unstable queries we don't consider
|
||||
# errors. It's not clear what the user should do with them.
|
||||
continue
|
||||
@ -470,53 +539,70 @@ if args.report == 'main':
|
||||
|
||||
add_unstable_queries()
|
||||
|
||||
skipped_tests_rows = tsvRows('analyze/skipped-tests.tsv')
|
||||
addSimpleTable('Skipped Tests', ['Test', 'Reason'], skipped_tests_rows)
|
||||
skipped_tests_rows = tsvRows("analyze/skipped-tests.tsv")
|
||||
addSimpleTable("Skipped Tests", ["Test", "Reason"], skipped_tests_rows)
|
||||
|
||||
addSimpleTable('Test Performance Changes',
|
||||
['Test', 'Ratio of speedup (-) or slowdown (+)', 'Queries', 'Total not OK', 'Changed perf', 'Unstable'],
|
||||
tsvRows('report/test-perf-changes.tsv'))
|
||||
addSimpleTable(
|
||||
"Test Performance Changes",
|
||||
[
|
||||
"Test",
|
||||
"Ratio of speedup (-) or slowdown (+)",
|
||||
"Queries",
|
||||
"Total not OK",
|
||||
"Changed perf",
|
||||
"Unstable",
|
||||
],
|
||||
tsvRows("report/test-perf-changes.tsv"),
|
||||
)
|
||||
|
||||
def add_test_times():
|
||||
global slow_average_tests, tables
|
||||
rows = tsvRows('report/test-times.tsv')
|
||||
rows = tsvRows("report/test-times.tsv")
|
||||
if not rows:
|
||||
return
|
||||
|
||||
columns = [
|
||||
'Test', #0
|
||||
'Wall clock time, entire test, s', #1
|
||||
'Total client time for measured query runs, s', #2
|
||||
'Queries', #3
|
||||
'Longest query, total for measured runs, s', #4
|
||||
'Wall clock time per query, s', #5
|
||||
'Shortest query, total for measured runs, s', #6
|
||||
'', # Runs #7
|
||||
"Test", # 0
|
||||
"Wall clock time, entire test, s", # 1
|
||||
"Total client time for measured query runs, s", # 2
|
||||
"Queries", # 3
|
||||
"Longest query, total for measured runs, s", # 4
|
||||
"Wall clock time per query, s", # 5
|
||||
"Shortest query, total for measured runs, s", # 6
|
||||
"", # Runs #7
|
||||
]
|
||||
attrs = ['' for c in columns]
|
||||
attrs = ["" for c in columns]
|
||||
attrs[7] = None
|
||||
|
||||
text = tableStart('Test Times')
|
||||
text = tableStart("Test Times")
|
||||
text += tableHeader(columns, attrs)
|
||||
|
||||
allowed_average_run_time = 3.75 # 60 seconds per test at (7 + 1) * 2 runs
|
||||
for r in rows:
|
||||
anchor = f'{currentTableAnchor()}.{r[0]}'
|
||||
anchor = f"{currentTableAnchor()}.{r[0]}"
|
||||
total_runs = (int(r[7]) + 1) * 2 # one prewarm run, two servers
|
||||
if r[0] != 'Total' and float(r[5]) > allowed_average_run_time * total_runs:
|
||||
if r[0] != "Total" and float(r[5]) > allowed_average_run_time * total_runs:
|
||||
# FIXME should be 15s max -- investigate parallel_insert
|
||||
slow_average_tests += 1
|
||||
attrs[5] = f'style="background: {color_bad}"'
|
||||
errors_explained.append([f'<a href="#{anchor}">The test \'{r[0]}\' is too slow to run as a whole. Investigate whether the create and fill queries can be sped up'])
|
||||
errors_explained.append(
|
||||
[
|
||||
f"<a href=\"#{anchor}\">The test '{r[0]}' is too slow to run as a whole. Investigate whether the create and fill queries can be sped up"
|
||||
]
|
||||
)
|
||||
else:
|
||||
attrs[5] = ''
|
||||
attrs[5] = ""
|
||||
|
||||
if r[0] != 'Total' and float(r[4]) > allowed_single_run_time * total_runs:
|
||||
if r[0] != "Total" and float(r[4]) > allowed_single_run_time * total_runs:
|
||||
slow_average_tests += 1
|
||||
attrs[4] = f'style="background: {color_bad}"'
|
||||
errors_explained.append([f'<a href="./all-queries.html#all-query-times.{r[0]}.0">Some query of the test \'{r[0]}\' is too slow to run. See the all queries report'])
|
||||
errors_explained.append(
|
||||
[
|
||||
f"<a href=\"./all-queries.html#all-query-times.{r[0]}.0\">Some query of the test '{r[0]}' is too slow to run. See the all queries report"
|
||||
]
|
||||
)
|
||||
else:
|
||||
attrs[4] = ''
|
||||
attrs[4] = ""
|
||||
|
||||
text += tableRow(r, attrs, anchor)
|
||||
|
||||
@ -525,10 +611,17 @@ if args.report == 'main':
|
||||
|
||||
add_test_times()
|
||||
|
||||
addSimpleTable('Metric Changes',
|
||||
['Metric', 'Old median value', 'New median value',
|
||||
'Relative difference', 'Times difference'],
|
||||
tsvRows('metrics/changes.tsv'))
|
||||
addSimpleTable(
|
||||
"Metric Changes",
|
||||
[
|
||||
"Metric",
|
||||
"Old median value",
|
||||
"New median value",
|
||||
"Relative difference",
|
||||
"Times difference",
|
||||
],
|
||||
tsvRows("metrics/changes.tsv"),
|
||||
)
|
||||
|
||||
add_report_errors()
|
||||
add_errors_explained()
|
||||
@ -536,7 +629,8 @@ if args.report == 'main':
|
||||
for t in tables:
|
||||
print(t)
|
||||
|
||||
print(f"""
|
||||
print(
|
||||
f"""
|
||||
</div>
|
||||
<p class="links">
|
||||
<a href="all-queries.html">All queries</a>
|
||||
@ -546,104 +640,111 @@ if args.report == 'main':
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
""")
|
||||
"""
|
||||
)
|
||||
|
||||
status = 'success'
|
||||
message = 'See the report'
|
||||
status = "success"
|
||||
message = "See the report"
|
||||
message_array = []
|
||||
|
||||
if slow_average_tests:
|
||||
status = 'failure'
|
||||
message_array.append(str(slow_average_tests) + ' too long')
|
||||
status = "failure"
|
||||
message_array.append(str(slow_average_tests) + " too long")
|
||||
|
||||
if faster_queries:
|
||||
message_array.append(str(faster_queries) + ' faster')
|
||||
message_array.append(str(faster_queries) + " faster")
|
||||
|
||||
if slower_queries:
|
||||
if slower_queries > 3:
|
||||
status = 'failure'
|
||||
message_array.append(str(slower_queries) + ' slower')
|
||||
status = "failure"
|
||||
message_array.append(str(slower_queries) + " slower")
|
||||
|
||||
if unstable_partial_queries:
|
||||
very_unstable_queries += unstable_partial_queries
|
||||
status = 'failure'
|
||||
status = "failure"
|
||||
|
||||
# Don't show mildly unstable queries, only the very unstable ones we
|
||||
# treat as errors.
|
||||
if very_unstable_queries:
|
||||
if very_unstable_queries > 5:
|
||||
error_tests += very_unstable_queries
|
||||
status = 'failure'
|
||||
message_array.append(str(very_unstable_queries) + ' unstable')
|
||||
status = "failure"
|
||||
message_array.append(str(very_unstable_queries) + " unstable")
|
||||
|
||||
error_tests += slow_average_tests
|
||||
if error_tests:
|
||||
status = 'failure'
|
||||
message_array.insert(0, str(error_tests) + ' errors')
|
||||
status = "failure"
|
||||
message_array.insert(0, str(error_tests) + " errors")
|
||||
|
||||
if message_array:
|
||||
message = ', '.join(message_array)
|
||||
message = ", ".join(message_array)
|
||||
|
||||
if report_errors:
|
||||
status = 'failure'
|
||||
message = 'Errors while building the report.'
|
||||
status = "failure"
|
||||
message = "Errors while building the report."
|
||||
|
||||
print(("""
|
||||
print(
|
||||
(
|
||||
"""
|
||||
<!--status: {status}-->
|
||||
<!--message: {message}-->
|
||||
""".format(status=status, message=message)))
|
||||
""".format(
|
||||
status=status, message=message
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
elif args.report == 'all-queries':
|
||||
elif args.report == "all-queries":
|
||||
|
||||
print((header_template.format()))
|
||||
|
||||
add_tested_commits()
|
||||
|
||||
def add_all_queries():
|
||||
rows = tsvRows('report/all-queries.tsv')
|
||||
rows = tsvRows("report/all-queries.tsv")
|
||||
if not rows:
|
||||
return
|
||||
|
||||
columns = [
|
||||
'', # Changed #0
|
||||
'', # Unstable #1
|
||||
'Old, s', #2
|
||||
'New, s', #3
|
||||
'Ratio of speedup (-) or slowdown (+)', #4
|
||||
'Relative difference (new − old) / old', #5
|
||||
'p < 0.01 threshold', #6
|
||||
'Test', #7
|
||||
'#', #8
|
||||
'Query', #9
|
||||
"", # Changed #0
|
||||
"", # Unstable #1
|
||||
"Old, s", # 2
|
||||
"New, s", # 3
|
||||
"Ratio of speedup (-) or slowdown (+)", # 4
|
||||
"Relative difference (new − old) / old", # 5
|
||||
"p < 0.01 threshold", # 6
|
||||
"Test", # 7
|
||||
"#", # 8
|
||||
"Query", # 9
|
||||
]
|
||||
attrs = ['' for c in columns]
|
||||
attrs = ["" for c in columns]
|
||||
attrs[0] = None
|
||||
attrs[1] = None
|
||||
|
||||
text = tableStart('All Query Times')
|
||||
text = tableStart("All Query Times")
|
||||
text += tableHeader(columns, attrs)
|
||||
|
||||
for r in rows:
|
||||
anchor = f'{currentTableAnchor()}.{r[7]}.{r[8]}'
|
||||
anchor = f"{currentTableAnchor()}.{r[7]}.{r[8]}"
|
||||
if int(r[1]):
|
||||
attrs[6] = f'style="background: {color_bad}"'
|
||||
else:
|
||||
attrs[6] = ''
|
||||
attrs[6] = ""
|
||||
|
||||
if int(r[0]):
|
||||
if float(r[5]) > 0.:
|
||||
if float(r[5]) > 0.0:
|
||||
attrs[4] = attrs[5] = f'style="background: {color_bad}"'
|
||||
else:
|
||||
attrs[4] = attrs[5] = f'style="background: {color_good}"'
|
||||
else:
|
||||
attrs[4] = attrs[5] = ''
|
||||
attrs[4] = attrs[5] = ""
|
||||
|
||||
if (float(r[2]) + float(r[3])) / 2 > allowed_single_run_time:
|
||||
attrs[2] = f'style="background: {color_bad}"'
|
||||
attrs[3] = f'style="background: {color_bad}"'
|
||||
else:
|
||||
attrs[2] = ''
|
||||
attrs[3] = ''
|
||||
attrs[2] = ""
|
||||
attrs[3] = ""
|
||||
|
||||
text += tableRow(r, attrs, anchor)
|
||||
|
||||
@ -655,7 +756,8 @@ elif args.report == 'all-queries':
|
||||
for t in tables:
|
||||
print(t)
|
||||
|
||||
print(f"""
|
||||
print(
|
||||
f"""
|
||||
</div>
|
||||
<p class="links">
|
||||
<a href="report.html">Main report</a>
|
||||
@ -665,4 +767,5 @@ elif args.report == 'all-queries':
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
""")
|
||||
"""
|
||||
)
|
||||
|
@ -7,18 +7,19 @@ import csv
|
||||
|
||||
RESULT_LOG_NAME = "run.log"
|
||||
|
||||
|
||||
def process_result(result_folder):
|
||||
|
||||
status = "success"
|
||||
description = 'Server started and responded'
|
||||
description = "Server started and responded"
|
||||
summary = [("Smoke test", "OK")]
|
||||
with open(os.path.join(result_folder, RESULT_LOG_NAME), 'r') as run_log:
|
||||
lines = run_log.read().split('\n')
|
||||
if not lines or lines[0].strip() != 'OK':
|
||||
with open(os.path.join(result_folder, RESULT_LOG_NAME), "r") as run_log:
|
||||
lines = run_log.read().split("\n")
|
||||
if not lines or lines[0].strip() != "OK":
|
||||
status = "failure"
|
||||
logging.info("Lines is not ok: %s", str('\n'.join(lines)))
|
||||
logging.info("Lines is not ok: %s", str("\n".join(lines)))
|
||||
summary = [("Smoke test", "FAIL")]
|
||||
description = 'Server failed to respond, see result in logs'
|
||||
description = "Server failed to respond, see result in logs"
|
||||
|
||||
result_logs = []
|
||||
server_log_path = os.path.join(result_folder, "clickhouse-server.log")
|
||||
@ -38,20 +39,22 @@ def process_result(result_folder):
|
||||
|
||||
|
||||
def write_results(results_file, status_file, results, status):
|
||||
with open(results_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(results_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerows(results)
|
||||
with open(status_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(status_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerow(status)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of split build smoke test")
|
||||
parser.add_argument("--in-results-dir", default='/test_output/')
|
||||
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv')
|
||||
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv')
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="ClickHouse script for parsing results of split build smoke test"
|
||||
)
|
||||
parser.add_argument("--in-results-dir", default="/test_output/")
|
||||
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
|
||||
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
|
||||
args = parser.parse_args()
|
||||
|
||||
state, description, test_results, logs = process_result(args.in_results_dir)
|
||||
|
@ -10,11 +10,18 @@ def process_result(result_folder):
|
||||
status = "success"
|
||||
summary = []
|
||||
paths = []
|
||||
tests = ["TLPWhere", "TLPGroupBy", "TLPHaving", "TLPWhereGroupBy", "TLPDistinct", "TLPAggregate"]
|
||||
tests = [
|
||||
"TLPWhere",
|
||||
"TLPGroupBy",
|
||||
"TLPHaving",
|
||||
"TLPWhereGroupBy",
|
||||
"TLPDistinct",
|
||||
"TLPAggregate",
|
||||
]
|
||||
|
||||
for test in tests:
|
||||
err_path = '{}/{}.err'.format(result_folder, test)
|
||||
out_path = '{}/{}.out'.format(result_folder, test)
|
||||
err_path = "{}/{}.err".format(result_folder, test)
|
||||
out_path = "{}/{}.out".format(result_folder, test)
|
||||
if not os.path.exists(err_path):
|
||||
logging.info("No output err on path %s", err_path)
|
||||
summary.append((test, "SKIPPED"))
|
||||
@ -23,24 +30,24 @@ def process_result(result_folder):
|
||||
else:
|
||||
paths.append(err_path)
|
||||
paths.append(out_path)
|
||||
with open(err_path, 'r') as f:
|
||||
if 'AssertionError' in f.read():
|
||||
with open(err_path, "r") as f:
|
||||
if "AssertionError" in f.read():
|
||||
summary.append((test, "FAIL"))
|
||||
status = 'failure'
|
||||
status = "failure"
|
||||
else:
|
||||
summary.append((test, "OK"))
|
||||
|
||||
logs_path = '{}/logs.tar.gz'.format(result_folder)
|
||||
logs_path = "{}/logs.tar.gz".format(result_folder)
|
||||
if not os.path.exists(logs_path):
|
||||
logging.info("No logs tar on path %s", logs_path)
|
||||
else:
|
||||
paths.append(logs_path)
|
||||
stdout_path = '{}/stdout.log'.format(result_folder)
|
||||
stdout_path = "{}/stdout.log".format(result_folder)
|
||||
if not os.path.exists(stdout_path):
|
||||
logging.info("No stdout log on path %s", stdout_path)
|
||||
else:
|
||||
paths.append(stdout_path)
|
||||
stderr_path = '{}/stderr.log'.format(result_folder)
|
||||
stderr_path = "{}/stderr.log".format(result_folder)
|
||||
if not os.path.exists(stderr_path):
|
||||
logging.info("No stderr log on path %s", stderr_path)
|
||||
else:
|
||||
@ -52,20 +59,22 @@ def process_result(result_folder):
|
||||
|
||||
|
||||
def write_results(results_file, status_file, results, status):
|
||||
with open(results_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(results_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerows(results)
|
||||
with open(status_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(status_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerow(status)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of sqlancer test")
|
||||
parser.add_argument("--in-results-dir", default='/test_output/')
|
||||
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv')
|
||||
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv')
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="ClickHouse script for parsing results of sqlancer test"
|
||||
)
|
||||
parser.add_argument("--in-results-dir", default="/test_output/")
|
||||
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
|
||||
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
|
||||
args = parser.parse_args()
|
||||
|
||||
state, description, test_results, logs = process_result(args.in_results_dir)
|
||||
|
@ -25,6 +25,7 @@ RUN apt-get update -y \
|
||||
brotli
|
||||
|
||||
COPY ./stress /stress
|
||||
COPY ./download_previous_release /download_previous_release
|
||||
COPY run.sh /
|
||||
|
||||
ENV DATASETS="hits visits"
|
||||
|
110
docker/test/stress/download_previous_release
Executable file
110
docker/test/stress/download_previous_release
Executable file
@ -0,0 +1,110 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import requests
|
||||
import re
|
||||
import os
|
||||
|
||||
from requests.adapters import HTTPAdapter
|
||||
from requests.packages.urllib3.util.retry import Retry
|
||||
|
||||
CLICKHOUSE_TAGS_URL = "https://api.github.com/repos/ClickHouse/ClickHouse/tags"
|
||||
|
||||
CLICKHOUSE_COMMON_STATIC_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-common-static_{version}_amd64.deb"
|
||||
CLICKHOUSE_COMMON_STATIC_DBG_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-common-static-dbg_{version}_amd64.deb"
|
||||
CLICKHOUSE_SERVER_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-server_{version}_all.deb"
|
||||
CLICKHOUSE_CLIENT_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-client_{version}_amd64.deb"
|
||||
|
||||
|
||||
CLICKHOUSE_COMMON_STATIC_PACKET_NAME = "clickhouse-common-static_{version}_amd64.deb"
|
||||
CLICKHOUSE_COMMON_STATIC_DBG_PACKET_NAME = "clickhouse-common-static-dbg_{version}_amd64.deb"
|
||||
CLICKHOUSE_SERVER_PACKET_NAME = "clickhouse-server_{version}_all.deb"
|
||||
CLICKHOUSE_CLIENT_PACKET_NAME = "clickhouse-client_{version}_all.deb"
|
||||
|
||||
PACKETS_DIR = "previous_release_package_folder/"
|
||||
VERSION_PATTERN = r"((?:\d+\.)?(?:\d+\.)?(?:\d+\.)?\d+-[a-zA-Z]*)"
|
||||
|
||||
|
||||
class Version:
|
||||
def __init__(self, version):
|
||||
self.version = version
|
||||
|
||||
def __lt__(self, other):
|
||||
return list(map(int, self.version.split('.'))) < list(map(int, other.version.split('.')))
|
||||
|
||||
def __str__(self):
|
||||
return self.version
|
||||
|
||||
|
||||
class ReleaseInfo:
|
||||
def __init__(self, version, release_type):
|
||||
self.version = version
|
||||
self.type = release_type
|
||||
|
||||
|
||||
def find_previous_release(server_version, releases):
|
||||
releases.sort(key=lambda x: x.version, reverse=True)
|
||||
for release in releases:
|
||||
if release.version < server_version:
|
||||
return True, release
|
||||
|
||||
return False, None
|
||||
|
||||
|
||||
def get_previous_release(server_version):
|
||||
page = 1
|
||||
found = False
|
||||
while not found:
|
||||
response = requests.get(CLICKHOUSE_TAGS_URL, {'page': page, 'per_page': 100})
|
||||
if not response.ok:
|
||||
raise Exception('Cannot load the list of tags from github: ' + response.reason)
|
||||
|
||||
releases_str = set(re.findall(VERSION_PATTERN, response.text))
|
||||
if len(releases_str) == 0:
|
||||
raise Exception('Cannot find previous release for ' + str(server_version) + ' server version')
|
||||
|
||||
releases = list(map(lambda x: ReleaseInfo(Version(x.split('-')[0]), x.split('-')[1]), releases_str))
|
||||
found, previous_release = find_previous_release(server_version, releases)
|
||||
page += 1
|
||||
|
||||
return previous_release
|
||||
|
||||
|
||||
def download_packet(url, local_file_name, retries=10, backoff_factor=0.3):
|
||||
session = requests.Session()
|
||||
retry = Retry(
|
||||
total=retries,
|
||||
read=retries,
|
||||
connect=retries,
|
||||
backoff_factor=backoff_factor,
|
||||
)
|
||||
adapter = HTTPAdapter(max_retries=retry)
|
||||
session.mount('http://', adapter)
|
||||
session.mount('https://', adapter)
|
||||
response = session.get(url)
|
||||
print(url)
|
||||
if response.ok:
|
||||
open(PACKETS_DIR + local_file_name, 'wb').write(response.content)
|
||||
|
||||
|
||||
def download_packets(release):
|
||||
if not os.path.exists(PACKETS_DIR):
|
||||
os.makedirs(PACKETS_DIR)
|
||||
|
||||
download_packet(CLICKHOUSE_COMMON_STATIC_DOWNLOAD_URL.format(version=release.version, type=release.type),
|
||||
CLICKHOUSE_COMMON_STATIC_PACKET_NAME.format(version=release.version))
|
||||
|
||||
download_packet(CLICKHOUSE_COMMON_STATIC_DBG_DOWNLOAD_URL.format(version=release.version, type=release.type),
|
||||
CLICKHOUSE_COMMON_STATIC_DBG_PACKET_NAME.format(version=release.version))
|
||||
|
||||
download_packet(CLICKHOUSE_SERVER_DOWNLOAD_URL.format(version=release.version, type=release.type),
|
||||
CLICKHOUSE_SERVER_PACKET_NAME.format(version=release.version))
|
||||
|
||||
download_packet(CLICKHOUSE_CLIENT_DOWNLOAD_URL.format(version=release.version, type=release.type),
|
||||
CLICKHOUSE_CLIENT_PACKET_NAME.format(version=release.version))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
server_version = Version(input())
|
||||
previous_release = get_previous_release(server_version)
|
||||
download_packets(previous_release)
|
||||
|
@ -22,15 +22,19 @@ export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001
|
||||
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001
|
||||
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001
|
||||
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000
|
||||
|
||||
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000
|
||||
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000
|
||||
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000
|
||||
|
||||
|
||||
dpkg -i package_folder/clickhouse-common-static_*.deb
|
||||
dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
|
||||
dpkg -i package_folder/clickhouse-server_*.deb
|
||||
dpkg -i package_folder/clickhouse-client_*.deb
|
||||
function install_packages()
|
||||
{
|
||||
dpkg -i $1/clickhouse-common-static_*.deb
|
||||
dpkg -i $1/clickhouse-common-static-dbg_*.deb
|
||||
dpkg -i $1/clickhouse-server_*.deb
|
||||
dpkg -i $1/clickhouse-client_*.deb
|
||||
}
|
||||
|
||||
function configure()
|
||||
{
|
||||
@ -116,7 +120,7 @@ function start()
|
||||
counter=0
|
||||
until clickhouse-client --query "SELECT 1"
|
||||
do
|
||||
if [ "$counter" -gt 240 ]
|
||||
if [ "$counter" -gt ${1:-240} ]
|
||||
then
|
||||
echo "Cannot start clickhouse-server"
|
||||
cat /var/log/clickhouse-server/stdout.log
|
||||
@ -127,6 +131,9 @@ function start()
|
||||
# use root to match with current uid
|
||||
clickhouse start --user root >/var/log/clickhouse-server/stdout.log 2>>/var/log/clickhouse-server/stderr.log
|
||||
sleep 0.5
|
||||
cat /var/log/clickhouse-server/stdout.log
|
||||
tail -n200 /var/log/clickhouse-server/stderr.log
|
||||
tail -n200 /var/log/clickhouse-server/clickhouse-server.log
|
||||
counter=$((counter + 1))
|
||||
done
|
||||
|
||||
@ -171,6 +178,8 @@ quit
|
||||
time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||:
|
||||
}
|
||||
|
||||
install_packages package_folder
|
||||
|
||||
configure
|
||||
|
||||
./setup_minio.sh
|
||||
@ -246,6 +255,120 @@ zgrep -Fa "########################################" /test_output/* > /dev/null
|
||||
zgrep -Fa " received signal " /test_output/gdb.log > /dev/null \
|
||||
&& echo -e 'Found signal in gdb.log\tFAIL' >> /test_output/test_results.tsv
|
||||
|
||||
echo -e "Backward compatibility check\n"
|
||||
|
||||
echo "Download previous release server"
|
||||
mkdir previous_release_package_folder
|
||||
clickhouse-client --query="SELECT version()" | ./download_previous_release && echo -e 'Download script exit code\tOK' >> /test_output/backward_compatibility_check_results.tsv \
|
||||
|| echo -e 'Download script failed\tFAIL' >> /test_output/backward_compatibility_check_results.tsv
|
||||
|
||||
if [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ]
|
||||
then
|
||||
echo -e "Successfully downloaded previous release packets\tOK" >> /test_output/backward_compatibility_check_results.tsv
|
||||
stop
|
||||
|
||||
# Uninstall current packages
|
||||
dpkg --remove clickhouse-client
|
||||
dpkg --remove clickhouse-server
|
||||
dpkg --remove clickhouse-common-static-dbg
|
||||
dpkg --remove clickhouse-common-static
|
||||
|
||||
rm -rf /var/lib/clickhouse/*
|
||||
|
||||
# Install previous release packages
|
||||
install_packages previous_release_package_folder
|
||||
|
||||
# Start server from previous release
|
||||
configure
|
||||
start
|
||||
|
||||
clickhouse-client --query="SELECT 'Server version: ', version()"
|
||||
|
||||
# Install new package before running stress test because we should use new clickhouse-client and new clickhouse-test
|
||||
install_packages package_folder
|
||||
|
||||
mkdir tmp_stress_output
|
||||
|
||||
./stress --backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \
|
||||
&& echo -e 'Test script exit code\tOK' >> /test_output/backward_compatibility_check_results.tsv \
|
||||
|| echo -e 'Test script failed\tFAIL' >> /test_output/backward_compatibility_check_results.tsv
|
||||
rm -rf tmp_stress_output
|
||||
|
||||
clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables"
|
||||
|
||||
stop
|
||||
|
||||
# Start new server
|
||||
configure
|
||||
start 500
|
||||
clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/backward_compatibility_check_results.tsv \
|
||||
|| echo -e 'Server failed to start\tFAIL' >> /test_output/backward_compatibility_check_results.tsv
|
||||
|
||||
clickhouse-client --query="SELECT 'Server version: ', version()"
|
||||
|
||||
# Let the server run for a while before checking log.
|
||||
sleep 60
|
||||
|
||||
stop
|
||||
|
||||
# Error messages (we should ignore some errors)
|
||||
zgrep -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \
|
||||
-e "Code: 236. DB::Exception: Cancelled mutating parts" \
|
||||
-e "REPLICA_IS_ALREADY_ACTIVE" \
|
||||
-e "REPLICA_IS_ALREADY_EXIST" \
|
||||
-e "DDLWorker: Cannot parse DDL task query" \
|
||||
-e "RaftInstance: failed to accept a rpc connection due to error 125" \
|
||||
-e "UNKNOWN_DATABASE" \
|
||||
-e "NETWORK_ERROR" \
|
||||
-e "UNKNOWN_TABLE" \
|
||||
-e "ZooKeeperClient" \
|
||||
-e "KEEPER_EXCEPTION" \
|
||||
-e "DirectoryMonitor" \
|
||||
-e "TABLE_IS_READ_ONLY" \
|
||||
-e "Code: 1000, e.code() = 111, Connection refused" \
|
||||
-e "UNFINISHED" \
|
||||
-e "Renaming unexpected part" \
|
||||
/var/log/clickhouse-server/clickhouse-server.log | zgrep -Fa "<Error>" > /dev/null \
|
||||
&& echo -e 'Error message in clickhouse-server.log\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \
|
||||
|| echo -e 'No Error messages in clickhouse-server.log\tOK' >> /test_output/backward_compatibility_check_results.tsv
|
||||
|
||||
# Sanitizer asserts
|
||||
zgrep -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
|
||||
zgrep -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
|
||||
zgrep -Fav "ASan doesn't fully support makecontext/swapcontext functions" /test_output/tmp > /dev/null \
|
||||
&& echo -e 'Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \
|
||||
|| echo -e 'No sanitizer asserts\tOK' >> /test_output/backward_compatibility_check_results.tsv
|
||||
rm -f /test_output/tmp
|
||||
|
||||
# OOM
|
||||
zgrep -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
|
||||
&& echo -e 'OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \
|
||||
|| echo -e 'No OOM messages in clickhouse-server.log\tOK' >> /test_output/backward_compatibility_check_results.tsv
|
||||
|
||||
# Logical errors
|
||||
zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
|
||||
&& echo -e 'Logical error thrown (see clickhouse-server.log)\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \
|
||||
|| echo -e 'No logical errors\tOK' >> /test_output/backward_compatibility_check_results.tsv
|
||||
|
||||
# Crash
|
||||
zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
|
||||
&& echo -e 'Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \
|
||||
|| echo -e 'Not crashed\tOK' >> /test_output/backward_compatibility_check_results.tsv
|
||||
|
||||
# It also checks for crash without stacktrace (printed by watchdog)
|
||||
zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
|
||||
&& echo -e 'Fatal message in clickhouse-server.log\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \
|
||||
|| echo -e 'No fatal messages in clickhouse-server.log\tOK' >> /test_output/backward_compatibility_check_results.tsv
|
||||
|
||||
else
|
||||
echo -e "Failed to download previous release packets\tFAIL" >> /test_output/backward_compatibility_check_results.tsv
|
||||
fi
|
||||
|
||||
zgrep -Fa "FAIL" /test_output/backward_compatibility_check_results.tsv > /dev/null \
|
||||
&& echo -e 'Backward compatibility check\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Backward compatibility check\tOK' >> /test_output/test_results.tsv
|
||||
|
||||
|
||||
# Put logs into /test_output/
|
||||
for log_file in /var/log/clickhouse-server/clickhouse-server.log*
|
||||
do
|
||||
|
@ -47,7 +47,8 @@ def get_options(i):
|
||||
return ' '.join(options)
|
||||
|
||||
|
||||
def run_func_test(cmd, output_prefix, num_processes, skip_tests_option, global_time_limit):
|
||||
def run_func_test(cmd, output_prefix, num_processes, skip_tests_option, global_time_limit, backward_compatibility_check):
|
||||
backward_compatibility_check_option = '--backward-compatibility-check' if backward_compatibility_check else ''
|
||||
global_time_limit_option = ''
|
||||
if global_time_limit:
|
||||
global_time_limit_option = "--global_time_limit={}".format(global_time_limit)
|
||||
@ -56,7 +57,7 @@ def run_func_test(cmd, output_prefix, num_processes, skip_tests_option, global_t
|
||||
pipes = []
|
||||
for i in range(0, len(output_paths)):
|
||||
f = open(output_paths[i], 'w')
|
||||
full_command = "{} {} {} {}".format(cmd, get_options(i), global_time_limit_option, skip_tests_option)
|
||||
full_command = "{} {} {} {} {}".format(cmd, get_options(i), global_time_limit_option, skip_tests_option, backward_compatibility_check_option)
|
||||
logging.info("Run func tests '%s'", full_command)
|
||||
p = Popen(full_command, shell=True, stdout=f, stderr=f)
|
||||
pipes.append(p)
|
||||
@ -168,6 +169,7 @@ if __name__ == "__main__":
|
||||
parser.add_argument("--output-folder")
|
||||
parser.add_argument("--global-time-limit", type=int, default=1800)
|
||||
parser.add_argument("--num-parallel", type=int, default=cpu_count())
|
||||
parser.add_argument('--backward-compatibility-check', action='store_true')
|
||||
parser.add_argument('--hung-check', action='store_true', default=False)
|
||||
# make sense only for hung check
|
||||
parser.add_argument('--drop-databases', action='store_true', default=False)
|
||||
@ -176,7 +178,7 @@ if __name__ == "__main__":
|
||||
if args.drop_databases and not args.hung_check:
|
||||
raise Exception("--drop-databases only used in hung check (--hung-check)")
|
||||
func_pipes = []
|
||||
func_pipes = run_func_test(args.test_cmd, args.output_folder, args.num_parallel, args.skip_func_tests, args.global_time_limit)
|
||||
func_pipes = run_func_test(args.test_cmd, args.output_folder, args.num_parallel, args.skip_func_tests, args.global_time_limit, args.backward_compatibility_check)
|
||||
|
||||
logging.info("Will wait functests to finish")
|
||||
while True:
|
||||
|
@ -16,7 +16,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
|
||||
python3-pip \
|
||||
shellcheck \
|
||||
yamllint \
|
||||
&& pip3 install codespell PyGithub boto3 unidiff dohq-artifactory
|
||||
&& pip3 install black boto3 codespell dohq-artifactory PyGithub unidiff
|
||||
|
||||
# Architecture of the image when BuildKit/buildx is used
|
||||
ARG TARGETARCH
|
||||
|
@ -14,6 +14,7 @@ def process_result(result_folder):
|
||||
("header duplicates", "duplicate_output.txt"),
|
||||
("shellcheck", "shellcheck_output.txt"),
|
||||
("style", "style_output.txt"),
|
||||
("black", "black_output.txt"),
|
||||
("typos", "typos_output.txt"),
|
||||
("whitespaces", "whitespaces_output.txt"),
|
||||
("workflows", "workflows_output.txt"),
|
||||
|
@ -7,11 +7,13 @@ echo "Check duplicates" | ts
|
||||
./check-duplicate-includes.sh |& tee /test_output/duplicate_output.txt
|
||||
echo "Check style" | ts
|
||||
./check-style -n |& tee /test_output/style_output.txt
|
||||
echo "Check python formatting with black" | ts
|
||||
./check-black -n |& tee /test_output/black_output.txt
|
||||
echo "Check typos" | ts
|
||||
./check-typos |& tee /test_output/typos_output.txt
|
||||
echo "Check whitespaces" | ts
|
||||
./check-whitespaces -n |& tee /test_output/whitespaces_output.txt
|
||||
echo "Check sorkflows" | ts
|
||||
echo "Check workflows" | ts
|
||||
./check-workflows |& tee /test_output/workflows_output.txt
|
||||
echo "Check shell scripts with shellcheck" | ts
|
||||
./shellcheck-run.sh |& tee /test_output/shellcheck_output.txt
|
||||
|
@ -22,9 +22,9 @@ def process_result(result_folder):
|
||||
total_other = 0
|
||||
test_results = []
|
||||
for test in results["tests"]:
|
||||
test_name = test['test']['test_name']
|
||||
test_result = test['result']['result_type'].upper()
|
||||
test_time = str(test['result']['message_rtime'])
|
||||
test_name = test["test"]["test_name"]
|
||||
test_result = test["result"]["result_type"].upper()
|
||||
test_time = str(test["result"]["message_rtime"])
|
||||
total_tests += 1
|
||||
if test_result == "OK":
|
||||
total_ok += 1
|
||||
@ -39,24 +39,29 @@ def process_result(result_folder):
|
||||
else:
|
||||
status = "success"
|
||||
|
||||
description = "failed: {}, passed: {}, other: {}".format(total_fail, total_ok, total_other)
|
||||
description = "failed: {}, passed: {}, other: {}".format(
|
||||
total_fail, total_ok, total_other
|
||||
)
|
||||
return status, description, test_results, [json_path, test_binary_log]
|
||||
|
||||
|
||||
def write_results(results_file, status_file, results, status):
|
||||
with open(results_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(results_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerows(results)
|
||||
with open(status_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(status_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerow(status)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of Testflows tests")
|
||||
parser.add_argument("--in-results-dir", default='./')
|
||||
parser.add_argument("--out-results-file", default='./test_results.tsv')
|
||||
parser.add_argument("--out-status-file", default='./check_status.tsv')
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="ClickHouse script for parsing results of Testflows tests"
|
||||
)
|
||||
parser.add_argument("--in-results-dir", default="./")
|
||||
parser.add_argument("--out-results-file", default="./test_results.tsv")
|
||||
parser.add_argument("--out-status-file", default="./check_status.tsv")
|
||||
args = parser.parse_args()
|
||||
|
||||
state, description, test_results, logs = process_result(args.in_results_dir)
|
||||
@ -64,4 +69,3 @@ if __name__ == "__main__":
|
||||
status = (state, description)
|
||||
write_results(args.out_results_file, args.out_status_file, test_results, status)
|
||||
logging.info("Result written")
|
||||
|
||||
|
@ -5,24 +5,26 @@ import logging
|
||||
import argparse
|
||||
import csv
|
||||
|
||||
OK_SIGN = 'OK ]'
|
||||
FAILED_SIGN = 'FAILED ]'
|
||||
SEGFAULT = 'Segmentation fault'
|
||||
SIGNAL = 'received signal SIG'
|
||||
PASSED = 'PASSED'
|
||||
OK_SIGN = "OK ]"
|
||||
FAILED_SIGN = "FAILED ]"
|
||||
SEGFAULT = "Segmentation fault"
|
||||
SIGNAL = "received signal SIG"
|
||||
PASSED = "PASSED"
|
||||
|
||||
|
||||
def get_test_name(line):
|
||||
elements = reversed(line.split(' '))
|
||||
elements = reversed(line.split(" "))
|
||||
for element in elements:
|
||||
if '(' not in element and ')' not in element:
|
||||
if "(" not in element and ")" not in element:
|
||||
return element
|
||||
raise Exception("No test name in line '{}'".format(line))
|
||||
|
||||
|
||||
def process_result(result_folder):
|
||||
summary = []
|
||||
total_counter = 0
|
||||
failed_counter = 0
|
||||
result_log_path = '{}/test_result.txt'.format(result_folder)
|
||||
result_log_path = "{}/test_result.txt".format(result_folder)
|
||||
if not os.path.exists(result_log_path):
|
||||
logging.info("No output log on path %s", result_log_path)
|
||||
return "exception", "No output log", []
|
||||
@ -30,7 +32,7 @@ def process_result(result_folder):
|
||||
status = "success"
|
||||
description = ""
|
||||
passed = False
|
||||
with open(result_log_path, 'r') as test_result:
|
||||
with open(result_log_path, "r") as test_result:
|
||||
for line in test_result:
|
||||
if OK_SIGN in line:
|
||||
logging.info("Found ok line: '%s'", line)
|
||||
@ -38,7 +40,7 @@ def process_result(result_folder):
|
||||
logging.info("Test name: '%s'", test_name)
|
||||
summary.append((test_name, "OK"))
|
||||
total_counter += 1
|
||||
elif FAILED_SIGN in line and 'listed below' not in line and 'ms)' in line:
|
||||
elif FAILED_SIGN in line and "listed below" not in line and "ms)" in line:
|
||||
logging.info("Found fail line: '%s'", line)
|
||||
test_name = get_test_name(line.strip())
|
||||
logging.info("Test name: '%s'", test_name)
|
||||
@ -67,25 +69,30 @@ def process_result(result_folder):
|
||||
status = "failure"
|
||||
|
||||
if not description:
|
||||
description += "fail: {}, passed: {}".format(failed_counter, total_counter - failed_counter)
|
||||
description += "fail: {}, passed: {}".format(
|
||||
failed_counter, total_counter - failed_counter
|
||||
)
|
||||
|
||||
return status, description, summary
|
||||
|
||||
|
||||
def write_results(results_file, status_file, results, status):
|
||||
with open(results_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(results_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerows(results)
|
||||
with open(status_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(status_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerow(status)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of unit tests")
|
||||
parser.add_argument("--in-results-dir", default='/test_output/')
|
||||
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv')
|
||||
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv')
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="ClickHouse script for parsing results of unit tests"
|
||||
)
|
||||
parser.add_argument("--in-results-dir", default="/test_output/")
|
||||
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
|
||||
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
|
||||
args = parser.parse_args()
|
||||
|
||||
state, description, test_results = process_result(args.in_results_dir)
|
||||
@ -93,4 +100,3 @@ if __name__ == "__main__":
|
||||
status = (state, description)
|
||||
write_results(args.out_results_file, args.out_status_file, test_results, status)
|
||||
logging.info("Result written")
|
||||
|
||||
|
@ -16,6 +16,7 @@ NO_TASK_TIMEOUT_SIGNS = ["All tests have finished", "No tests were run"]
|
||||
|
||||
RETRIES_SIGN = "Some tests were restarted"
|
||||
|
||||
|
||||
def process_test_log(log_path):
|
||||
total = 0
|
||||
skipped = 0
|
||||
@ -26,7 +27,7 @@ def process_test_log(log_path):
|
||||
retries = False
|
||||
task_timeout = True
|
||||
test_results = []
|
||||
with open(log_path, 'r') as test_file:
|
||||
with open(log_path, "r") as test_file:
|
||||
for line in test_file:
|
||||
original_line = line
|
||||
line = line.strip()
|
||||
@ -36,12 +37,15 @@ def process_test_log(log_path):
|
||||
hung = True
|
||||
if RETRIES_SIGN in line:
|
||||
retries = True
|
||||
if any(sign in line for sign in (OK_SIGN, FAIL_SIGN, UNKNOWN_SIGN, SKIPPED_SIGN)):
|
||||
test_name = line.split(' ')[2].split(':')[0]
|
||||
if any(
|
||||
sign in line
|
||||
for sign in (OK_SIGN, FAIL_SIGN, UNKNOWN_SIGN, SKIPPED_SIGN)
|
||||
):
|
||||
test_name = line.split(" ")[2].split(":")[0]
|
||||
|
||||
test_time = ''
|
||||
test_time = ""
|
||||
try:
|
||||
time_token = line.split(']')[1].strip().split()[0]
|
||||
time_token = line.split("]")[1].strip().split()[0]
|
||||
float(time_token)
|
||||
test_time = time_token
|
||||
except:
|
||||
@ -66,9 +70,22 @@ def process_test_log(log_path):
|
||||
elif len(test_results) > 0 and test_results[-1][1] == "FAIL":
|
||||
test_results[-1][3].append(original_line)
|
||||
|
||||
test_results = [(test[0], test[1], test[2], ''.join(test[3])) for test in test_results]
|
||||
test_results = [
|
||||
(test[0], test[1], test[2], "".join(test[3])) for test in test_results
|
||||
]
|
||||
|
||||
return (
|
||||
total,
|
||||
skipped,
|
||||
unknown,
|
||||
failed,
|
||||
success,
|
||||
hung,
|
||||
task_timeout,
|
||||
retries,
|
||||
test_results,
|
||||
)
|
||||
|
||||
return total, skipped, unknown, failed, success, hung, task_timeout, retries, test_results
|
||||
|
||||
def process_result(result_path):
|
||||
test_results = []
|
||||
@ -76,16 +93,26 @@ def process_result(result_path):
|
||||
description = ""
|
||||
files = os.listdir(result_path)
|
||||
if files:
|
||||
logging.info("Find files in result folder %s", ','.join(files))
|
||||
result_path = os.path.join(result_path, 'test_result.txt')
|
||||
logging.info("Find files in result folder %s", ",".join(files))
|
||||
result_path = os.path.join(result_path, "test_result.txt")
|
||||
else:
|
||||
result_path = None
|
||||
description = "No output log"
|
||||
state = "error"
|
||||
|
||||
if result_path and os.path.exists(result_path):
|
||||
total, skipped, unknown, failed, success, hung, task_timeout, retries, test_results = process_test_log(result_path)
|
||||
is_flacky_check = 1 < int(os.environ.get('NUM_TRIES', 1))
|
||||
(
|
||||
total,
|
||||
skipped,
|
||||
unknown,
|
||||
failed,
|
||||
success,
|
||||
hung,
|
||||
task_timeout,
|
||||
retries,
|
||||
test_results,
|
||||
) = process_test_log(result_path)
|
||||
is_flacky_check = 1 < int(os.environ.get("NUM_TRIES", 1))
|
||||
logging.info("Is flacky check: %s", is_flacky_check)
|
||||
# If no tests were run (success == 0) it indicates an error (e.g. server did not start or crashed immediately)
|
||||
# But it's Ok for "flaky checks" - they can contain just one test for check which is marked as skipped.
|
||||
@ -120,20 +147,22 @@ def process_result(result_path):
|
||||
|
||||
|
||||
def write_results(results_file, status_file, results, status):
|
||||
with open(results_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(results_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerows(results)
|
||||
with open(status_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(status_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerow(status)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of functional tests")
|
||||
parser.add_argument("--in-results-dir", default='/test_output/')
|
||||
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv')
|
||||
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv')
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="ClickHouse script for parsing results of functional tests"
|
||||
)
|
||||
parser.add_argument("--in-results-dir", default="/test_output/")
|
||||
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
|
||||
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
|
||||
args = parser.parse_args()
|
||||
|
||||
state, description, test_results = process_result(args.in_results_dir)
|
||||
|
@ -71,6 +71,8 @@ This check means that the CI system started to process the pull request. When it
|
||||
Performs some simple regex-based checks of code style, using the [`utils/check-style/check-style`](https://github.com/ClickHouse/ClickHouse/blob/master/utils/check-style/check-style) binary (note that it can be run locally).
|
||||
If it fails, fix the style errors following the [code style guide](style.md).
|
||||
|
||||
Python code is checked with [black](https://github.com/psf/black/).
|
||||
|
||||
### Report Details
|
||||
- [Status page example](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check.html)
|
||||
- `output.txt` contains the check resulting errors (invalid tabulation etc), blank page means no errors. [Successful result example](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check/output.txt).
|
||||
|
@ -229,6 +229,25 @@ As simple code editors, you can use Sublime Text or Visual Studio Code, or Kate
|
||||
|
||||
Just in case, it is worth mentioning that CLion creates `build` path on its own, it also on its own selects `debug` for build type, for configuration it uses a version of CMake that is defined in CLion and not the one installed by you, and finally, CLion will use `make` to run build tasks instead of `ninja`. This is normal behaviour, just keep that in mind to avoid confusion.
|
||||
|
||||
## Debugging
|
||||
|
||||
Many graphical IDEs offer with an integrated debugger but you can also use a standalone debugger.
|
||||
|
||||
### GDB
|
||||
|
||||
### LLDB
|
||||
|
||||
# tell LLDB where to find the source code
|
||||
settings set target.source-map /path/to/build/dir /path/to/source/dir
|
||||
|
||||
# configure LLDB to display code before/after currently executing line
|
||||
settings set stop-line-count-before 10
|
||||
settings set stop-line-count-after 10
|
||||
|
||||
target create ./clickhouse-client
|
||||
# <set breakpoints here>
|
||||
process launch -- --query="SELECT * FROM TAB"
|
||||
|
||||
## Writing Code {#writing-code}
|
||||
|
||||
The description of ClickHouse architecture can be found here: https://clickhouse.com/docs/en/development/architecture/
|
||||
|
@ -5,30 +5,19 @@ toc_title: Playground
|
||||
|
||||
# ClickHouse Playground {#clickhouse-playground}
|
||||
|
||||
!!! warning "Warning"
|
||||
This service is deprecated and will be replaced in foreseeable future.
|
||||
|
||||
[ClickHouse Playground](https://play.clickhouse.com) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster.
|
||||
Several example datasets are available in Playground as well as sample queries that show ClickHouse features. There’s also a selection of ClickHouse LTS releases to experiment with.
|
||||
[ClickHouse Playground](https://play.clickhouse.com/play?user=play) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster.
|
||||
Several example datasets are available in Playground.
|
||||
|
||||
You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md).
|
||||
|
||||
## Credentials {#credentials}
|
||||
|
||||
| Parameter | Value |
|
||||
|:--------------------|:----------------------------------------|
|
||||
| HTTPS endpoint | `https://play-api.clickhouse.com:8443` |
|
||||
| Native TCP endpoint | `play-api.clickhouse.com:9440` |
|
||||
| User | `playground` |
|
||||
| Password | `clickhouse` |
|
||||
|
||||
There are additional endpoints with specific ClickHouse releases to experiment with their differences (ports and user/password are the same as above):
|
||||
|
||||
- 20.3 LTS: `play-api-v20-3.clickhouse.com`
|
||||
- 19.14 LTS: `play-api-v19-14.clickhouse.com`
|
||||
|
||||
!!! note "Note"
|
||||
All these endpoints require a secure TLS connection.
|
||||
|:--------------------|:-----------------------------------|
|
||||
| HTTPS endpoint | `https://play.clickhouse.com:443/` |
|
||||
| Native TCP endpoint | `play.clickhouse.com:9440` |
|
||||
| User | `explorer` or `play` |
|
||||
| Password | (empty) |
|
||||
|
||||
## Limitations {#limitations}
|
||||
|
||||
@ -37,23 +26,18 @@ The queries are executed as a read-only user. It implies some limitations:
|
||||
- DDL queries are not allowed
|
||||
- INSERT queries are not allowed
|
||||
|
||||
The following settings are also enforced:
|
||||
|
||||
- [max_result_bytes=10485760](../operations/settings/query-complexity/#max-result-bytes)
|
||||
- [max_result_rows=2000](../operations/settings/query-complexity/#setting-max_result_rows)
|
||||
- [result_overflow_mode=break](../operations/settings/query-complexity/#result-overflow-mode)
|
||||
- [max_execution_time=60000](../operations/settings/query-complexity/#max-execution-time)
|
||||
The service also have quotas on its usage.
|
||||
|
||||
## Examples {#examples}
|
||||
|
||||
HTTPS endpoint example with `curl`:
|
||||
|
||||
``` bash
|
||||
curl "https://play-api.clickhouse.com:8443/?query=SELECT+'Play+ClickHouse\!';&user=playground&password=clickhouse&database=datasets"
|
||||
curl "https://play.clickhouse.com/?user=explorer" --data-binary "SELECT 'Play ClickHouse'"
|
||||
```
|
||||
|
||||
TCP endpoint example with [CLI](../interfaces/cli.md):
|
||||
|
||||
``` bash
|
||||
clickhouse client --secure -h play-api.clickhouse.com --port 9440 -u playground --password clickhouse -q "SELECT 'Play ClickHouse\!'"
|
||||
clickhouse client --secure --host play.clickhouse.com --user explorer
|
||||
```
|
||||
|
@ -51,6 +51,7 @@ The supported formats are:
|
||||
| [PrettySpace](#prettyspace) | ✗ | ✔ |
|
||||
| [Protobuf](#protobuf) | ✔ | ✔ |
|
||||
| [ProtobufSingle](#protobufsingle) | ✔ | ✔ |
|
||||
| [ProtobufList](#protobuflist) | ✔ | ✔ |
|
||||
| [Avro](#data-format-avro) | ✔ | ✔ |
|
||||
| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ |
|
||||
| [Parquet](#data-format-parquet) | ✔ | ✔ |
|
||||
@ -401,7 +402,7 @@ Parsing allows the presence of the additional field `tskv` without the equal sig
|
||||
|
||||
Comma Separated Values format ([RFC](https://tools.ietf.org/html/rfc4180)).
|
||||
|
||||
When formatting, rows are enclosed in double-quotes. A double quote inside a string is output as two double quotes in a row. There are no other rules for escaping characters. Date and date-time are enclosed in double-quotes. Numbers are output without quotes. Values are separated by a delimiter character, which is `,` by default. The delimiter character is defined in the setting [format_csv_delimiter](../operations/settings/settings.md#settings-format_csv_delimiter). Rows are separated using the Unix line feed (LF). Arrays are serialized in CSV as follows: first, the array is serialized to a string as in TabSeparated format, and then the resulting string is output to CSV in double-quotes. Tuples in CSV format are serialized as separate columns (that is, their nesting in the tuple is lost).
|
||||
When formatting, strings are enclosed in double-quotes. A double quote inside a string is output as two double quotes in a row. There are no other rules for escaping characters. Date and date-time are enclosed in double-quotes. Numbers are output without quotes. Values are separated by a delimiter character, which is `,` by default. The delimiter character is defined in the setting [format_csv_delimiter](../operations/settings/settings.md#settings-format_csv_delimiter). Rows are separated using the Unix line feed (LF). Arrays are serialized in CSV as follows: first, the array is serialized to a string as in TabSeparated format, and then the resulting string is output to CSV in double-quotes. Tuples in CSV format are serialized as separate columns (that is, their nesting in the tuple is lost).
|
||||
|
||||
``` bash
|
||||
$ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FORMAT CSV" < data.csv
|
||||
@ -409,7 +410,7 @@ $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FOR
|
||||
|
||||
\*By default, the delimiter is `,`. See the [format_csv_delimiter](../operations/settings/settings.md#settings-format_csv_delimiter) setting for more information.
|
||||
|
||||
When parsing, all values can be parsed either with or without quotes. Both double and single quotes are supported. Rows can also be arranged without quotes. In this case, they are parsed up to the delimiter character or line feed (CR or LF). In violation of the RFC, when parsing rows without quotes, the leading and trailing spaces and tabs are ignored. For the line feed, Unix (LF), Windows (CR LF) and Mac OS Classic (CR LF) types are all supported.
|
||||
When parsing, all values can be parsed either with or without quotes. Both double and single quotes are supported. Strings can also be arranged without quotes. In this case, they are parsed up to the delimiter character or line feed (CR or LF). In violation of the RFC, when parsing strings without quotes, the leading and trailing spaces and tabs are ignored. For the line feed, Unix (LF), Windows (CR LF) and Mac OS Classic (CR LF) types are all supported.
|
||||
|
||||
If setting [input_format_csv_empty_as_default](../operations/settings/settings.md#settings-input_format_csv_empty_as_default) is enabled,
|
||||
empty unquoted input values are replaced with default values. For complex default expressions [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#settings-input_format_defaults_for_omitted_fields) must be enabled too.
|
||||
@ -1230,7 +1231,38 @@ See also [how to read/write length-delimited protobuf messages in popular langua
|
||||
|
||||
## ProtobufSingle {#protobufsingle}
|
||||
|
||||
Same as [Protobuf](#protobuf) but for storing/parsing single Protobuf message without length delimiters.
|
||||
Same as [Protobuf](#protobuf) but for storing/parsing a single Protobuf message without length delimiter.
|
||||
As a result, only a single table row can be written/read.
|
||||
|
||||
## ProtobufList {#protobuflist}
|
||||
|
||||
Similar to Protobuf but rows are represented as a sequence of sub-messages contained in a message with fixed name "Envelope".
|
||||
|
||||
Usage example:
|
||||
|
||||
``` sql
|
||||
SELECT * FROM test.table FORMAT ProtobufList SETTINGS format_schema = 'schemafile:MessageType'
|
||||
```
|
||||
|
||||
``` bash
|
||||
cat protobuflist_messages.bin | clickhouse-client --query "INSERT INTO test.table FORMAT ProtobufList SETTINGS format_schema='schemafile:MessageType'"
|
||||
```
|
||||
|
||||
where the file `schemafile.proto` looks like this:
|
||||
|
||||
``` capnp
|
||||
syntax = "proto3";
|
||||
|
||||
message Envelope {
|
||||
message MessageType {
|
||||
string name = 1;
|
||||
string surname = 2;
|
||||
uint32 birthDate = 3;
|
||||
repeated string phoneNumbers = 4;
|
||||
};
|
||||
MessageType row = 1;
|
||||
};
|
||||
```
|
||||
|
||||
## Avro {#data-format-avro}
|
||||
|
||||
@ -1364,7 +1396,8 @@ The table below shows supported data types and how they match ClickHouse [data t
|
||||
| `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT` |
|
||||
| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` |
|
||||
| `DATE32` | [Date](../sql-reference/data-types/date.md) | `UINT16` |
|
||||
| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` |
|
||||
| `DATE64` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` |
|
||||
| `TIMESTAMP` | [DateTime64](../sql-reference/data-types/datetime64.md) | `TIMESTAMP` |
|
||||
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` |
|
||||
| — | [FixedString](../sql-reference/data-types/fixedstring.md) | `BINARY` |
|
||||
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
|
||||
@ -1421,7 +1454,8 @@ The table below shows supported data types and how they match ClickHouse [data t
|
||||
| `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT32` |
|
||||
| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `FLOAT64` |
|
||||
| `DATE32` | [Date](../sql-reference/data-types/date.md) | `UINT16` |
|
||||
| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` |
|
||||
| `DATE64` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` |
|
||||
| `TIMESTAMP` | [DateTime64](../sql-reference/data-types/datetime64.md) | `TIMESTAMP` |
|
||||
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` |
|
||||
| `STRING`, `BINARY` | [FixedString](../sql-reference/data-types/fixedstring.md) | `BINARY` |
|
||||
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
|
||||
@ -1483,7 +1517,8 @@ The table below shows supported data types and how they match ClickHouse [data t
|
||||
| `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT` |
|
||||
| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` |
|
||||
| `DATE32` | [Date](../sql-reference/data-types/date.md) | `DATE32` |
|
||||
| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `TIMESTAMP` |
|
||||
| `DATE64` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` |
|
||||
| `TIMESTAMP` | [DateTime64](../sql-reference/data-types/datetime64.md) | `TIMESTAMP` |
|
||||
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` |
|
||||
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
|
||||
| `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` |
|
||||
|
@ -55,7 +55,7 @@ Internal coordination settings are located in `<keeper_server>.<coordination_set
|
||||
- `auto_forwarding` — Allow to forward write requests from followers to the leader (default: true).
|
||||
- `shutdown_timeout` — Wait to finish internal connections and shutdown (ms) (default: 5000).
|
||||
- `startup_timeout` — If the server doesn't connect to other quorum participants in the specified timeout it will terminate (ms) (default: 30000).
|
||||
- `four_letter_word_white_list` — White list of 4lw commands (default: "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro").
|
||||
- `four_letter_word_allow_list` — Allow list of 4lw commands (default: "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro").
|
||||
|
||||
Quorum configuration is located in `<keeper_server>.<raft_configuration>` section and contain servers description.
|
||||
|
||||
@ -121,7 +121,7 @@ clickhouse keeper --config /etc/your_path_to_config/config.xml
|
||||
|
||||
ClickHouse Keeper also provides 4lw commands which are almost the same with Zookeeper. Each command is composed of four letters such as `mntr`, `stat` etc. There are some more interesting commands: `stat` gives some general information about the server and connected clients, while `srvr` and `cons` give extended details on server and connections respectively.
|
||||
|
||||
The 4lw commands has a white list configuration `four_letter_word_white_list` which has default value "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro".
|
||||
The 4lw commands has a allow list configuration `four_letter_word_allow_list` which has default value "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro".
|
||||
|
||||
You can issue the commands to ClickHouse Keeper via telnet or nc, at the client port.
|
||||
|
||||
@ -201,7 +201,7 @@ Server stats reset.
|
||||
```
|
||||
server_id=1
|
||||
tcp_port=2181
|
||||
four_letter_word_white_list=*
|
||||
four_letter_word_allow_list=*
|
||||
log_storage_path=./coordination/logs
|
||||
snapshot_storage_path=./coordination/snapshots
|
||||
max_requests_batch_size=100
|
||||
|
@ -225,15 +225,15 @@ This storage method works the same way as hashed and allows using date/time (arb
|
||||
Example: The table contains discounts for each advertiser in the format:
|
||||
|
||||
``` text
|
||||
+---------|-------------|-------------|------+
|
||||
+---------------|---------------------|-------------------|--------+
|
||||
| advertiser id | discount start date | discount end date | amount |
|
||||
+===============+=====================+===================+========+
|
||||
| 123 | 2015-01-01 | 2015-01-15 | 0.15 |
|
||||
+---------|-------------|-------------|------+
|
||||
+---------------|---------------------|-------------------|--------+
|
||||
| 123 | 2015-01-16 | 2015-01-31 | 0.25 |
|
||||
+---------|-------------|-------------|------+
|
||||
+---------------|---------------------|-------------------|--------+
|
||||
| 456 | 2015-01-01 | 2015-01-15 | 0.05 |
|
||||
+---------|-------------|-------------|------+
|
||||
+---------------|---------------------|-------------------|--------+
|
||||
```
|
||||
|
||||
To use a sample for date ranges, define the `range_min` and `range_max` elements in the [structure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). These elements must contain elements `name` and `type` (if `type` is not specified, the default type will be used - Date). `type` can be any numeric type (Date / DateTime / UInt64 / Int32 / others).
|
||||
@ -272,10 +272,10 @@ LAYOUT(RANGE_HASHED())
|
||||
RANGE(MIN first MAX last)
|
||||
```
|
||||
|
||||
To work with these dictionaries, you need to pass an additional argument to the `dictGetT` function, for which a range is selected:
|
||||
To work with these dictionaries, you need to pass an additional argument to the `dictGet*` function, for which a range is selected:
|
||||
|
||||
``` sql
|
||||
dictGetT('dict_name', 'attr_name', id, date)
|
||||
dictGet*('dict_name', 'attr_name', id, date)
|
||||
```
|
||||
|
||||
This function returns the value for the specified `id`s and the date range that includes the passed date.
|
||||
@ -479,17 +479,17 @@ This type of storage is for mapping network prefixes (IP addresses) to metadata
|
||||
Example: The table contains network prefixes and their corresponding AS number and country code:
|
||||
|
||||
``` text
|
||||
+-----------|-----|------+
|
||||
+-----------------|-------|--------+
|
||||
| prefix | asn | cca2 |
|
||||
+=================+=======+========+
|
||||
| 202.79.32.0/20 | 17501 | NP |
|
||||
+-----------|-----|------+
|
||||
+-----------------|-------|--------+
|
||||
| 2620:0:870::/48 | 3856 | US |
|
||||
+-----------|-----|------+
|
||||
+-----------------|-------|--------+
|
||||
| 2a02:6b8:1::/48 | 13238 | RU |
|
||||
+-----------|-----|------+
|
||||
+-----------------|-------|--------+
|
||||
| 2001:db8::/32 | 65536 | ZZ |
|
||||
+-----------|-----|------+
|
||||
+-----------------|-------|--------+
|
||||
```
|
||||
|
||||
When using this type of layout, the structure must have a composite key.
|
||||
@ -538,10 +538,10 @@ PRIMARY KEY prefix
|
||||
|
||||
The key must have only one String type attribute that contains an allowed IP prefix. Other types are not supported yet.
|
||||
|
||||
For queries, you must use the same functions (`dictGetT` with a tuple) as for dictionaries with composite keys:
|
||||
For queries, you must use the same functions (`dictGet*` with a tuple) as for dictionaries with composite keys:
|
||||
|
||||
``` sql
|
||||
dictGetT('dict_name', 'attr_name', tuple(ip))
|
||||
dictGet*('dict_name', 'attr_name', tuple(ip))
|
||||
```
|
||||
|
||||
The function takes either `UInt32` for IPv4, or `FixedString(16)` for IPv6:
|
||||
|
@ -1392,12 +1392,24 @@ Returns the first element in the `arr1` array for which `func` returns something
|
||||
|
||||
Note that the `arrayFirst` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted.
|
||||
|
||||
## arrayFirstOrNull(func, arr1, …) {#array-first-or-null}
|
||||
|
||||
Returns the first element in the `arr1` array for which `func` returns something other than 0. If there are no such element, returns null.
|
||||
|
||||
Note that the `arrayFirstOrNull` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted.
|
||||
|
||||
## arrayLast(func, arr1, …) {#array-last}
|
||||
|
||||
Returns the last element in the `arr1` array for which `func` returns something other than 0.
|
||||
|
||||
Note that the `arrayLast` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted.
|
||||
|
||||
## arrayLastOrNull(func, arr1, …) {#array-last-or-null}
|
||||
|
||||
Returns the last element in the `arr1` array for which `func` returns something other than 0. If there are no such element, returns null.
|
||||
|
||||
Note that the `arrayLast` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted.
|
||||
|
||||
## arrayFirstIndex(func, arr1, …) {#array-first-index}
|
||||
|
||||
Returns the index of the first element in the `arr1` array for which `func` returns something other than 0.
|
||||
|
@ -2,6 +2,49 @@
|
||||
toc_priority: 76
|
||||
toc_title: Security Changelog
|
||||
---
|
||||
## Fixed in ClickHouse 21.10.2.15, 2021-10-18 {#fixed-in-clickhouse-release-21-10-2-215-2021-10-18}
|
||||
|
||||
### CVE-2021-43304 {#cve-2021-43304}
|
||||
|
||||
Heap buffer overflow in Clickhouse's LZ4 compression codec when parsing a malicious query. There is no verification that the copy operations in the LZ4::decompressImpl loop and especially the arbitrary copy operation wildCopy<copy_amount>(op, ip, copy_end), don’t exceed the destination buffer’s limits.
|
||||
|
||||
Credits: JFrog Security Research Team
|
||||
|
||||
### CVE-2021-43305 {#cve-2021-43305}
|
||||
|
||||
Heap buffer overflow in Clickhouse's LZ4 compression codec when parsing a malicious query. There is no verification that the copy operations in the LZ4::decompressImpl loop and especially the arbitrary copy operation wildCopy<copy_amount>(op, ip, copy_end), don’t exceed the destination buffer’s limits. This issue is very similar to CVE-2021-43304, but the vulnerable copy operation is in a different wildCopy call.
|
||||
|
||||
Credits: JFrog Security Research Team
|
||||
|
||||
### CVE-2021-42387 {#cve-2021-42387}
|
||||
|
||||
Heap out-of-bounds read in Clickhouse's LZ4 compression codec when parsing a malicious query. As part of the LZ4::decompressImpl() loop, a 16-bit unsigned user-supplied value ('offset') is read from the compressed data. The offset is later used in the length of a copy operation, without checking the upper bounds of the source of the copy operation.
|
||||
|
||||
Credits: JFrog Security Research Team
|
||||
|
||||
### CVE-2021-42388 {#cve-2021-42388}
|
||||
|
||||
Heap out-of-bounds read in Clickhouse's LZ4 compression codec when parsing a malicious query. As part of the LZ4::decompressImpl() loop, a 16-bit unsigned user-supplied value ('offset') is read from the compressed data. The offset is later used in the length of a copy operation, without checking the lower bounds of the source of the copy operation.
|
||||
|
||||
Credits: JFrog Security Research Team
|
||||
|
||||
### CVE-2021-42389 {#cve-2021-42389}
|
||||
|
||||
Divide-by-zero in Clickhouse's Delta compression codec when parsing a malicious query. The first byte of the compressed buffer is used in a modulo operation without being checked for 0.
|
||||
|
||||
Credits: JFrog Security Research Team
|
||||
|
||||
### CVE-2021-42390 {#cve-2021-42390}
|
||||
|
||||
Divide-by-zero in Clickhouse's DeltaDouble compression codec when parsing a malicious query. The first byte of the compressed buffer is used in a modulo operation without being checked for 0.
|
||||
|
||||
Credits: JFrog Security Research Team
|
||||
|
||||
### CVE-2021-42391 {#cve-2021-42391}
|
||||
|
||||
Divide-by-zero in Clickhouse's Gorilla compression codec when parsing a malicious query. The first byte of the compressed buffer is used in a modulo operation without being checked for 0.
|
||||
|
||||
Credits: JFrog Security Research Team
|
||||
|
||||
## Fixed in ClickHouse 21.4.3.21, 2021-04-12 {#fixed-in-clickhouse-release-21-4-3-21-2021-04-12}
|
||||
|
||||
|
@ -5,58 +5,39 @@ toc_title: Playground
|
||||
|
||||
# ClickHouse Playground {#clickhouse-playground}
|
||||
|
||||
!!! warning "Warning"
|
||||
This service is deprecated and will be replaced in foreseeable future.
|
||||
[ClickHouse Playground](https://play.clickhouse.com/play?user=play) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster.
|
||||
Several example datasets are available in Playground.
|
||||
|
||||
[ClickHouse Playground](https://play.clickhouse.com) では、サーバーやクラスタを設定することなく、即座にクエリを実行して ClickHouse を試すことができます。
|
||||
いくつかの例のデータセットは、Playground だけでなく、ClickHouse の機能を示すサンプルクエリとして利用可能です. また、 ClickHouse の LTS リリースで試すこともできます。
|
||||
You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md).
|
||||
|
||||
任意の HTTP クライアントを使用してプレイグラウンドへのクエリを作成することができます。例えば[curl](https://curl.haxx.se)、[wget](https://www.gnu.org/software/wget/)、[JDBC](../interfaces/jdbc.md)または[ODBC](../interfaces/odbc.md)ドライバを使用して接続を設定します。
|
||||
ClickHouse をサポートするソフトウェア製品の詳細情報は[こちら](../interfaces/index.md)をご覧ください。
|
||||
## Credentials {#credentials}
|
||||
|
||||
## 資格情報 {#credentials}
|
||||
| Parameter | Value |
|
||||
|:--------------------|:-----------------------------------|
|
||||
| HTTPS endpoint | `https://play.clickhouse.com:443/` |
|
||||
| Native TCP endpoint | `play.clickhouse.com:9440` |
|
||||
| User | `explorer` or `play` |
|
||||
| Password | (empty) |
|
||||
|
||||
| パラメータ | 値 |
|
||||
| :---------------------------- | :-------------------------------------- |
|
||||
| HTTPS エンドポイント | `https://play-api.clickhouse.com:8443` |
|
||||
| ネイティブ TCP エンドポイント | `play-api.clickhouse.com:9440` |
|
||||
| ユーザ名 | `playgrounnd` |
|
||||
| パスワード | `clickhouse` |
|
||||
## Limitations {#limitations}
|
||||
|
||||
The queries are executed as a read-only user. It implies some limitations:
|
||||
|
||||
特定のClickHouseのリリースで試すために、追加のエンドポイントがあります。(ポートとユーザー/パスワードは上記と同じです)。
|
||||
- DDL queries are not allowed
|
||||
- INSERT queries are not allowed
|
||||
|
||||
- 20.3 LTS: `play-api-v20-3.clickhouse.com`
|
||||
- 19.14 LTS: `play-api-v19-14.clickhouse.com`
|
||||
The service also have quotas on its usage.
|
||||
|
||||
!!! note "備考"
|
||||
これらのエンドポイントはすべて、安全なTLS接続が必要です。
|
||||
## Examples {#examples}
|
||||
|
||||
|
||||
## 制限事項 {#limitations}
|
||||
|
||||
クエリは読み取り専用のユーザとして実行されます。これにはいくつかの制限があります。
|
||||
|
||||
- DDL クエリは許可されていません。
|
||||
- INSERT クエリは許可されていません。
|
||||
|
||||
また、以下の設定がなされています。
|
||||
|
||||
- [max_result_bytes=10485760](../operations/settings/query_complexity/#max-result-bytes)
|
||||
- [max_result_rows=2000](../operations/settings/query_complexity/#setting-max_result_rows)
|
||||
- [result_overflow_mode=break](../operations/settings/query_complexity/#result-overflow-mode)
|
||||
- [max_execution_time=60000](../operations/settings/query_complexity/#max-execution-time)
|
||||
|
||||
## 例 {#examples}
|
||||
|
||||
`curl` を用いて HTTPSエンドポイントへ接続する例:
|
||||
HTTPS endpoint example with `curl`:
|
||||
|
||||
``` bash
|
||||
curl "https://play-api.clickhouse.com:8443/?query=SELECT+'Play+ClickHouse\!';&user=playground&password=clickhouse&database=datasets"
|
||||
curl "https://play.clickhouse.com/?user=explorer" --data-binary "SELECT 'Play ClickHouse'"
|
||||
```
|
||||
|
||||
[CLI](../interfaces/cli.md) で TCP エンドポイントへ接続する例:
|
||||
TCP endpoint example with [CLI](../interfaces/cli.md):
|
||||
|
||||
``` bash
|
||||
clickhouse client --secure -h play-api.clickhouse.com --port 9440 -u playground --password clickhouse -q "SELECT 'Play ClickHouse\!'"
|
||||
clickhouse client --secure --host play.clickhouse.com --user explorer
|
||||
```
|
||||
|
@ -5,53 +5,39 @@ toc_title: Playground
|
||||
|
||||
# ClickHouse Playground {#clickhouse-playground}
|
||||
|
||||
!!! warning "Warning"
|
||||
This service is deprecated and will be replaced in foreseeable future.
|
||||
[ClickHouse Playground](https://play.clickhouse.com/play?user=play) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster.
|
||||
Several example datasets are available in Playground.
|
||||
|
||||
[ClickHouse Playground](https://play.clickhouse.com) позволяет пользователям экспериментировать с ClickHouse, мгновенно выполняя запросы без настройки своего сервера или кластера.
|
||||
В Playground доступны несколько тестовых массивов данных, а также примеры запросов, которые показывают возможности ClickHouse. Кроме того, вы можете выбрать LTS релиз ClickHouse, который хотите протестировать.
|
||||
You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md).
|
||||
|
||||
Вы можете отправлять запросы к Playground с помощью любого HTTP-клиента, например [curl](https://curl.haxx.se) или [wget](https://www.gnu.org/software/wget/), также можно установить соединение с помощью драйверов [JDBC](../interfaces/jdbc.md) или [ODBC](../interfaces/odbc.md). Более подробная информация о программных продуктах, поддерживающих ClickHouse, доступна [здесь](../interfaces/index.md).
|
||||
## Credentials {#credentials}
|
||||
|
||||
## Параметры доступа {#credentials}
|
||||
| Parameter | Value |
|
||||
|:--------------------|:-----------------------------------|
|
||||
| HTTPS endpoint | `https://play.clickhouse.com:443/` |
|
||||
| Native TCP endpoint | `play.clickhouse.com:9440` |
|
||||
| User | `explorer` or `play` |
|
||||
| Password | (empty) |
|
||||
|
||||
| Параметр | Значение |
|
||||
|:--------------------|:----------------------------------------|
|
||||
| Конечная точка HTTPS| `https://play-api.clickhouse.com:8443` |
|
||||
| Конечная точка TCP | `play-api.clickhouse.com:9440` |
|
||||
| Пользователь | `playground` |
|
||||
| Пароль | `clickhouse` |
|
||||
## Limitations {#limitations}
|
||||
|
||||
Также можно подключаться к ClickHouse определённых релизов, чтобы протестировать их различия (порты и пользователь / пароль остаются неизменными):
|
||||
The queries are executed as a read-only user. It implies some limitations:
|
||||
|
||||
- 20.3 LTS: `play-api-v20-3.clickhouse.com`
|
||||
- 19.14 LTS: `play-api-v19-14.clickhouse.com`
|
||||
- DDL queries are not allowed
|
||||
- INSERT queries are not allowed
|
||||
|
||||
!!! note "Примечание"
|
||||
Для всех этих конечных точек требуется безопасное соединение TLS.
|
||||
The service also have quotas on its usage.
|
||||
|
||||
## Ограничения {#limitations}
|
||||
## Examples {#examples}
|
||||
|
||||
Запросы выполняются под пользователем с правами `readonly`, для которого есть следующие ограничения:
|
||||
- запрещены DDL запросы
|
||||
- запрещены INSERT запросы
|
||||
|
||||
Также установлены следующие опции:
|
||||
- [max_result_bytes=10485760](../operations/settings/query-complexity.md#max-result-bytes)
|
||||
- [max_result_rows=2000](../operations/settings/query-complexity.md#setting-max_result_rows)
|
||||
- [result_overflow_mode=break](../operations/settings/query-complexity.md#result-overflow-mode)
|
||||
- [max_execution_time=60000](../operations/settings/query-complexity.md#max-execution-time)
|
||||
|
||||
## Примеры {#examples}
|
||||
|
||||
Пример конечной точки HTTPS с `curl`:
|
||||
HTTPS endpoint example with `curl`:
|
||||
|
||||
``` bash
|
||||
curl "https://play-api.clickhouse.com:8443/?query=SELECT+'Play+ClickHouse\!';&user=playground&password=clickhouse&database=datasets"
|
||||
curl "https://play.clickhouse.com/?user=explorer" --data-binary "SELECT 'Play ClickHouse'"
|
||||
```
|
||||
|
||||
Пример конечной точки TCP с [CLI](../interfaces/cli.md):
|
||||
TCP endpoint example with [CLI](../interfaces/cli.md):
|
||||
|
||||
``` bash
|
||||
clickhouse client --secure -h play-api.clickhouse.com --port 9440 -u playground --password clickhouse -q "SELECT 'Play ClickHouse\!'"
|
||||
clickhouse client --secure --host play.clickhouse.com --user explorer
|
||||
```
|
||||
|
@ -54,7 +54,7 @@ ClickHouse Keeper может использоваться как равноце
|
||||
- `auto_forwarding` — разрешить пересылку запросов на запись от последователей лидеру (по умолчанию: true).
|
||||
- `shutdown_timeout` — время ожидания завершения внутренних подключений и выключения, в миллисекундах (по умолчанию: 5000).
|
||||
- `startup_timeout` — время отключения сервера, если он не подключается к другим участникам кворума, в миллисекундах (по умолчанию: 30000).
|
||||
- `four_letter_word_white_list` — список разрешенных 4-х буквенных команд (по умолчанию: "conf,cons,crst,envi,ruok,srst,srvr,stat,wchc,wchs,dirs,mntr,isro").
|
||||
- `four_letter_word_allow_list` — список разрешенных 4-х буквенных команд (по умолчанию: "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro").
|
||||
|
||||
Конфигурация кворума находится в `<keeper_server>.<raft_configuration>` и содержит описание серверов.
|
||||
|
||||
@ -114,7 +114,7 @@ clickhouse-keeper --config /etc/your_path_to_config/config.xml --daemon
|
||||
|
||||
ClickHouse Keeper также поддерживает 4-х буквенные команды, почти такие же, как у Zookeeper. Каждая команда состоит из 4-х символов, например, `mntr`, `stat` и т. д. Несколько интересных команд: `stat` предоставляет общую информацию о сервере и подключенных клиентах, а `srvr` и `cons` предоставляют расширенные сведения о сервере и подключениях соответственно.
|
||||
|
||||
У 4-х буквенных команд есть параметр для настройки разрешенного списка `four_letter_word_white_list`, который имеет значение по умолчанию "conf,cons,crst,envi,ruok,srst,srvr,stat, wchc,wchs,dirs,mntr,isro".
|
||||
У 4-х буквенных команд есть параметр для настройки разрешенного списка `four_letter_word_allow_list`, который имеет значение по умолчанию "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro".
|
||||
|
||||
Вы можете отправлять команды в ClickHouse Keeper через telnet или nc на порт для клиента.
|
||||
|
||||
@ -194,7 +194,7 @@ Server stats reset.
|
||||
```
|
||||
server_id=1
|
||||
tcp_port=2181
|
||||
four_letter_word_white_list=*
|
||||
four_letter_word_allow_list=*
|
||||
log_storage_path=./coordination/logs
|
||||
snapshot_storage_path=./coordination/snapshots
|
||||
max_requests_batch_size=100
|
||||
|
@ -15,24 +15,24 @@ import website
|
||||
|
||||
def prepare_amp_html(lang, args, root, site_temp, main_site_dir):
|
||||
src_path = root
|
||||
src_index = os.path.join(src_path, 'index.html')
|
||||
src_index = os.path.join(src_path, "index.html")
|
||||
rel_path = os.path.relpath(src_path, site_temp)
|
||||
dst_path = os.path.join(main_site_dir, rel_path, 'amp')
|
||||
dst_index = os.path.join(dst_path, 'index.html')
|
||||
dst_path = os.path.join(main_site_dir, rel_path, "amp")
|
||||
dst_index = os.path.join(dst_path, "index.html")
|
||||
|
||||
logging.debug(f'Generating AMP version for {rel_path} ({lang})')
|
||||
logging.debug(f"Generating AMP version for {rel_path} ({lang})")
|
||||
os.makedirs(dst_path)
|
||||
with open(src_index, 'r') as f:
|
||||
with open(src_index, "r") as f:
|
||||
content = f.read()
|
||||
css_in = ' '.join(website.get_css_in(args))
|
||||
css_in = " ".join(website.get_css_in(args))
|
||||
command = f"purifycss --min {css_in} '{src_index}'"
|
||||
logging.debug(command)
|
||||
inline_css = subprocess.check_output(command, shell=True).decode('utf-8')
|
||||
inline_css = inline_css.replace('!important', '').replace('/*!', '/*')
|
||||
inline_css = subprocess.check_output(command, shell=True).decode("utf-8")
|
||||
inline_css = inline_css.replace("!important", "").replace("/*!", "/*")
|
||||
inline_css = cssmin.cssmin(inline_css)
|
||||
content = content.replace('CUSTOM_CSS_PLACEHOLDER', inline_css)
|
||||
content = content.replace("CUSTOM_CSS_PLACEHOLDER", inline_css)
|
||||
|
||||
with open(dst_index, 'w') as f:
|
||||
with open(dst_index, "w") as f:
|
||||
f.write(content)
|
||||
|
||||
return dst_index
|
||||
@ -40,15 +40,12 @@ def prepare_amp_html(lang, args, root, site_temp, main_site_dir):
|
||||
|
||||
def build_amp(lang, args, cfg):
|
||||
# AMP docs: https://amp.dev/documentation/
|
||||
logging.info(f'Building AMP version for {lang}')
|
||||
logging.info(f"Building AMP version for {lang}")
|
||||
with util.temp_dir() as site_temp:
|
||||
extra = cfg.data['extra']
|
||||
main_site_dir = cfg.data['site_dir']
|
||||
extra['is_amp'] = True
|
||||
cfg.load_dict({
|
||||
'site_dir': site_temp,
|
||||
'extra': extra
|
||||
})
|
||||
extra = cfg.data["extra"]
|
||||
main_site_dir = cfg.data["site_dir"]
|
||||
extra["is_amp"] = True
|
||||
cfg.load_dict({"site_dir": site_temp, "extra": extra})
|
||||
|
||||
try:
|
||||
mkdocs.commands.build.build(cfg)
|
||||
@ -60,50 +57,49 @@ def build_amp(lang, args, cfg):
|
||||
|
||||
paths = []
|
||||
for root, _, filenames in os.walk(site_temp):
|
||||
if 'index.html' in filenames:
|
||||
paths.append(prepare_amp_html(lang, args, root, site_temp, main_site_dir))
|
||||
logging.info(f'Finished building AMP version for {lang}')
|
||||
if "index.html" in filenames:
|
||||
paths.append(
|
||||
prepare_amp_html(lang, args, root, site_temp, main_site_dir)
|
||||
)
|
||||
logging.info(f"Finished building AMP version for {lang}")
|
||||
|
||||
|
||||
def html_to_amp(content):
|
||||
soup = bs4.BeautifulSoup(
|
||||
content,
|
||||
features='html.parser'
|
||||
)
|
||||
soup = bs4.BeautifulSoup(content, features="html.parser")
|
||||
|
||||
for tag in soup.find_all():
|
||||
if tag.attrs.get('id') == 'tostring':
|
||||
tag.attrs['id'] = '_tostring'
|
||||
if tag.name == 'img':
|
||||
tag.name = 'amp-img'
|
||||
tag.attrs['layout'] = 'responsive'
|
||||
src = tag.attrs['src']
|
||||
if not (src.startswith('/') or src.startswith('http')):
|
||||
tag.attrs['src'] = f'../{src}'
|
||||
if not tag.attrs.get('width'):
|
||||
tag.attrs['width'] = '640'
|
||||
if not tag.attrs.get('height'):
|
||||
tag.attrs['height'] = '320'
|
||||
if tag.name == 'iframe':
|
||||
tag.name = 'amp-iframe'
|
||||
tag.attrs['layout'] = 'responsive'
|
||||
del tag.attrs['alt']
|
||||
del tag.attrs['allowfullscreen']
|
||||
if not tag.attrs.get('width'):
|
||||
tag.attrs['width'] = '640'
|
||||
if not tag.attrs.get('height'):
|
||||
tag.attrs['height'] = '320'
|
||||
elif tag.name == 'a':
|
||||
href = tag.attrs.get('href')
|
||||
if tag.attrs.get("id") == "tostring":
|
||||
tag.attrs["id"] = "_tostring"
|
||||
if tag.name == "img":
|
||||
tag.name = "amp-img"
|
||||
tag.attrs["layout"] = "responsive"
|
||||
src = tag.attrs["src"]
|
||||
if not (src.startswith("/") or src.startswith("http")):
|
||||
tag.attrs["src"] = f"../{src}"
|
||||
if not tag.attrs.get("width"):
|
||||
tag.attrs["width"] = "640"
|
||||
if not tag.attrs.get("height"):
|
||||
tag.attrs["height"] = "320"
|
||||
if tag.name == "iframe":
|
||||
tag.name = "amp-iframe"
|
||||
tag.attrs["layout"] = "responsive"
|
||||
del tag.attrs["alt"]
|
||||
del tag.attrs["allowfullscreen"]
|
||||
if not tag.attrs.get("width"):
|
||||
tag.attrs["width"] = "640"
|
||||
if not tag.attrs.get("height"):
|
||||
tag.attrs["height"] = "320"
|
||||
elif tag.name == "a":
|
||||
href = tag.attrs.get("href")
|
||||
if href:
|
||||
if not (href.startswith('/') or href.startswith('http')):
|
||||
if '#' in href:
|
||||
href, anchor = href.split('#')
|
||||
if not (href.startswith("/") or href.startswith("http")):
|
||||
if "#" in href:
|
||||
href, anchor = href.split("#")
|
||||
else:
|
||||
anchor = None
|
||||
href = f'../{href}amp/'
|
||||
href = f"../{href}amp/"
|
||||
if anchor:
|
||||
href = f'{href}#{anchor}'
|
||||
tag.attrs['href'] = href
|
||||
href = f"{href}#{anchor}"
|
||||
tag.attrs["href"] = href
|
||||
content = str(soup)
|
||||
return website.minify_html(content)
|
||||
|
@ -17,54 +17,52 @@ import util
|
||||
|
||||
|
||||
def build_for_lang(lang, args):
|
||||
logging.info(f'Building {lang} blog')
|
||||
logging.info(f"Building {lang} blog")
|
||||
|
||||
try:
|
||||
theme_cfg = {
|
||||
'name': None,
|
||||
'custom_dir': os.path.join(os.path.dirname(__file__), '..', args.theme_dir),
|
||||
'language': lang,
|
||||
'direction': 'ltr',
|
||||
'static_templates': ['404.html'],
|
||||
'extra': {
|
||||
'now': int(time.mktime(datetime.datetime.now().timetuple())) # TODO better way to avoid caching
|
||||
}
|
||||
"name": None,
|
||||
"custom_dir": os.path.join(os.path.dirname(__file__), "..", args.theme_dir),
|
||||
"language": lang,
|
||||
"direction": "ltr",
|
||||
"static_templates": ["404.html"],
|
||||
"extra": {
|
||||
"now": int(
|
||||
time.mktime(datetime.datetime.now().timetuple())
|
||||
) # TODO better way to avoid caching
|
||||
},
|
||||
}
|
||||
|
||||
# the following list of languages is sorted according to
|
||||
# https://en.wikipedia.org/wiki/List_of_languages_by_total_number_of_speakers
|
||||
languages = {
|
||||
'en': 'English'
|
||||
}
|
||||
languages = {"en": "English"}
|
||||
|
||||
site_names = {
|
||||
'en': 'ClickHouse Blog'
|
||||
}
|
||||
site_names = {"en": "ClickHouse Blog"}
|
||||
|
||||
assert len(site_names) == len(languages)
|
||||
|
||||
site_dir = os.path.join(args.blog_output_dir, lang)
|
||||
|
||||
plugins = ['macros']
|
||||
plugins = ["macros"]
|
||||
if args.htmlproofer:
|
||||
plugins.append('htmlproofer')
|
||||
plugins.append("htmlproofer")
|
||||
|
||||
website_url = 'https://clickhouse.com'
|
||||
site_name = site_names.get(lang, site_names['en'])
|
||||
website_url = "https://clickhouse.com"
|
||||
site_name = site_names.get(lang, site_names["en"])
|
||||
blog_nav, post_meta = nav.build_blog_nav(lang, args)
|
||||
raw_config = dict(
|
||||
site_name=site_name,
|
||||
site_url=f'{website_url}/blog/{lang}/',
|
||||
site_url=f"{website_url}/blog/{lang}/",
|
||||
docs_dir=os.path.join(args.blog_dir, lang),
|
||||
site_dir=site_dir,
|
||||
strict=True,
|
||||
theme=theme_cfg,
|
||||
nav=blog_nav,
|
||||
copyright='©2016–2022 ClickHouse, Inc.',
|
||||
copyright="©2016–2022 ClickHouse, Inc.",
|
||||
use_directory_urls=True,
|
||||
repo_name='ClickHouse/ClickHouse',
|
||||
repo_url='https://github.com/ClickHouse/ClickHouse/',
|
||||
edit_uri=f'edit/master/website/blog/{lang}',
|
||||
repo_name="ClickHouse/ClickHouse",
|
||||
repo_url="https://github.com/ClickHouse/ClickHouse/",
|
||||
edit_uri=f"edit/master/website/blog/{lang}",
|
||||
markdown_extensions=mdx_clickhouse.MARKDOWN_EXTENSIONS,
|
||||
plugins=plugins,
|
||||
extra=dict(
|
||||
@ -75,12 +73,12 @@ def build_for_lang(lang, args):
|
||||
website_url=website_url,
|
||||
events=args.events,
|
||||
languages=languages,
|
||||
includes_dir=os.path.join(os.path.dirname(__file__), '..', '_includes'),
|
||||
includes_dir=os.path.join(os.path.dirname(__file__), "..", "_includes"),
|
||||
is_amp=False,
|
||||
is_blog=True,
|
||||
post_meta=post_meta,
|
||||
today=datetime.date.today().isoformat()
|
||||
)
|
||||
today=datetime.date.today().isoformat(),
|
||||
),
|
||||
)
|
||||
|
||||
cfg = config.load_config(**raw_config)
|
||||
@ -89,21 +87,28 @@ def build_for_lang(lang, args):
|
||||
redirects.build_blog_redirects(args)
|
||||
|
||||
env = util.init_jinja2_env(args)
|
||||
with open(os.path.join(args.website_dir, 'templates', 'blog', 'rss.xml'), 'rb') as f:
|
||||
rss_template_string = f.read().decode('utf-8').strip()
|
||||
with open(
|
||||
os.path.join(args.website_dir, "templates", "blog", "rss.xml"), "rb"
|
||||
) as f:
|
||||
rss_template_string = f.read().decode("utf-8").strip()
|
||||
rss_template = env.from_string(rss_template_string)
|
||||
with open(os.path.join(args.blog_output_dir, lang, 'rss.xml'), 'w') as f:
|
||||
f.write(rss_template.render({'config': raw_config}))
|
||||
with open(os.path.join(args.blog_output_dir, lang, "rss.xml"), "w") as f:
|
||||
f.write(rss_template.render({"config": raw_config}))
|
||||
|
||||
logging.info(f'Finished building {lang} blog')
|
||||
logging.info(f"Finished building {lang} blog")
|
||||
|
||||
except exceptions.ConfigurationError as e:
|
||||
raise SystemExit('\n' + str(e))
|
||||
raise SystemExit("\n" + str(e))
|
||||
|
||||
|
||||
def build_blog(args):
|
||||
tasks = []
|
||||
for lang in args.blog_lang.split(','):
|
||||
for lang in args.blog_lang.split(","):
|
||||
if lang:
|
||||
tasks.append((lang, args,))
|
||||
tasks.append(
|
||||
(
|
||||
lang,
|
||||
args,
|
||||
)
|
||||
)
|
||||
util.run_function_in_parallel(build_for_lang, tasks, threads=False)
|
||||
|
@ -30,76 +30,76 @@ import website
|
||||
|
||||
from cmake_in_clickhouse_generator import generate_cmake_flags_files
|
||||
|
||||
|
||||
class ClickHouseMarkdown(markdown.extensions.Extension):
|
||||
class ClickHousePreprocessor(markdown.util.Processor):
|
||||
def run(self, lines):
|
||||
for line in lines:
|
||||
if '<!--hide-->' not in line:
|
||||
if "<!--hide-->" not in line:
|
||||
yield line
|
||||
|
||||
def extendMarkdown(self, md):
|
||||
md.preprocessors.register(self.ClickHousePreprocessor(), 'clickhouse_preprocessor', 31)
|
||||
md.preprocessors.register(
|
||||
self.ClickHousePreprocessor(), "clickhouse_preprocessor", 31
|
||||
)
|
||||
|
||||
|
||||
markdown.extensions.ClickHouseMarkdown = ClickHouseMarkdown
|
||||
|
||||
|
||||
def build_for_lang(lang, args):
|
||||
logging.info(f'Building {lang} docs')
|
||||
os.environ['SINGLE_PAGE'] = '0'
|
||||
logging.info(f"Building {lang} docs")
|
||||
os.environ["SINGLE_PAGE"] = "0"
|
||||
|
||||
try:
|
||||
theme_cfg = {
|
||||
'name': None,
|
||||
'custom_dir': os.path.join(os.path.dirname(__file__), '..', args.theme_dir),
|
||||
'language': lang,
|
||||
'direction': 'rtl' if lang == 'fa' else 'ltr',
|
||||
'static_templates': ['404.html'],
|
||||
'extra': {
|
||||
'now': int(time.mktime(datetime.datetime.now().timetuple())) # TODO better way to avoid caching
|
||||
}
|
||||
"name": None,
|
||||
"custom_dir": os.path.join(os.path.dirname(__file__), "..", args.theme_dir),
|
||||
"language": lang,
|
||||
"direction": "rtl" if lang == "fa" else "ltr",
|
||||
"static_templates": ["404.html"],
|
||||
"extra": {
|
||||
"now": int(
|
||||
time.mktime(datetime.datetime.now().timetuple())
|
||||
) # TODO better way to avoid caching
|
||||
},
|
||||
}
|
||||
|
||||
# the following list of languages is sorted according to
|
||||
# https://en.wikipedia.org/wiki/List_of_languages_by_total_number_of_speakers
|
||||
languages = {
|
||||
'en': 'English',
|
||||
'zh': '中文',
|
||||
'ru': 'Русский',
|
||||
'ja': '日本語'
|
||||
}
|
||||
languages = {"en": "English", "zh": "中文", "ru": "Русский", "ja": "日本語"}
|
||||
|
||||
site_names = {
|
||||
'en': 'ClickHouse %s Documentation',
|
||||
'zh': 'ClickHouse文档 %s',
|
||||
'ru': 'Документация ClickHouse %s',
|
||||
'ja': 'ClickHouseドキュメント %s'
|
||||
"en": "ClickHouse %s Documentation",
|
||||
"zh": "ClickHouse文档 %s",
|
||||
"ru": "Документация ClickHouse %s",
|
||||
"ja": "ClickHouseドキュメント %s",
|
||||
}
|
||||
|
||||
assert len(site_names) == len(languages)
|
||||
|
||||
site_dir = os.path.join(args.docs_output_dir, lang)
|
||||
|
||||
plugins = ['macros']
|
||||
plugins = ["macros"]
|
||||
if args.htmlproofer:
|
||||
plugins.append('htmlproofer')
|
||||
plugins.append("htmlproofer")
|
||||
|
||||
website_url = 'https://clickhouse.com'
|
||||
site_name = site_names.get(lang, site_names['en']) % ''
|
||||
site_name = site_name.replace(' ', ' ')
|
||||
website_url = "https://clickhouse.com"
|
||||
site_name = site_names.get(lang, site_names["en"]) % ""
|
||||
site_name = site_name.replace(" ", " ")
|
||||
|
||||
raw_config = dict(
|
||||
site_name=site_name,
|
||||
site_url=f'{website_url}/docs/{lang}/',
|
||||
site_url=f"{website_url}/docs/{lang}/",
|
||||
docs_dir=os.path.join(args.docs_dir, lang),
|
||||
site_dir=site_dir,
|
||||
strict=True,
|
||||
theme=theme_cfg,
|
||||
copyright='©2016–2022 ClickHouse, Inc.',
|
||||
copyright="©2016–2022 ClickHouse, Inc.",
|
||||
use_directory_urls=True,
|
||||
repo_name='ClickHouse/ClickHouse',
|
||||
repo_url='https://github.com/ClickHouse/ClickHouse/',
|
||||
edit_uri=f'edit/master/docs/{lang}',
|
||||
repo_name="ClickHouse/ClickHouse",
|
||||
repo_url="https://github.com/ClickHouse/ClickHouse/",
|
||||
edit_uri=f"edit/master/docs/{lang}",
|
||||
markdown_extensions=mdx_clickhouse.MARKDOWN_EXTENSIONS,
|
||||
plugins=plugins,
|
||||
extra=dict(
|
||||
@ -111,16 +111,16 @@ def build_for_lang(lang, args):
|
||||
website_url=website_url,
|
||||
events=args.events,
|
||||
languages=languages,
|
||||
includes_dir=os.path.join(os.path.dirname(__file__), '..', '_includes'),
|
||||
includes_dir=os.path.join(os.path.dirname(__file__), "..", "_includes"),
|
||||
is_amp=False,
|
||||
is_blog=False
|
||||
)
|
||||
is_blog=False,
|
||||
),
|
||||
)
|
||||
|
||||
# Clean to be safe if last build finished abnormally
|
||||
single_page.remove_temporary_files(lang, args)
|
||||
|
||||
raw_config['nav'] = nav.build_docs_nav(lang, args)
|
||||
raw_config["nav"] = nav.build_docs_nav(lang, args)
|
||||
|
||||
cfg = config.load_config(**raw_config)
|
||||
|
||||
@ -131,21 +131,28 @@ def build_for_lang(lang, args):
|
||||
amp.build_amp(lang, args, cfg)
|
||||
|
||||
if not args.skip_single_page:
|
||||
single_page.build_single_page_version(lang, args, raw_config.get('nav'), cfg)
|
||||
single_page.build_single_page_version(
|
||||
lang, args, raw_config.get("nav"), cfg
|
||||
)
|
||||
|
||||
mdx_clickhouse.PatchedMacrosPlugin.disabled = False
|
||||
|
||||
logging.info(f'Finished building {lang} docs')
|
||||
logging.info(f"Finished building {lang} docs")
|
||||
|
||||
except exceptions.ConfigurationError as e:
|
||||
raise SystemExit('\n' + str(e))
|
||||
raise SystemExit("\n" + str(e))
|
||||
|
||||
|
||||
def build_docs(args):
|
||||
tasks = []
|
||||
for lang in args.lang.split(','):
|
||||
for lang in args.lang.split(","):
|
||||
if lang:
|
||||
tasks.append((lang, args,))
|
||||
tasks.append(
|
||||
(
|
||||
lang,
|
||||
args,
|
||||
)
|
||||
)
|
||||
util.run_function_in_parallel(build_for_lang, tasks, threads=False)
|
||||
redirects.build_docs_redirects(args)
|
||||
|
||||
@ -171,56 +178,64 @@ def build(args):
|
||||
redirects.build_static_redirects(args)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
os.chdir(os.path.join(os.path.dirname(__file__), '..'))
|
||||
if __name__ == "__main__":
|
||||
os.chdir(os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
# A root path to ClickHouse source code.
|
||||
src_dir = '..'
|
||||
src_dir = ".."
|
||||
|
||||
website_dir = os.path.join(src_dir, 'website')
|
||||
website_dir = os.path.join(src_dir, "website")
|
||||
|
||||
arg_parser = argparse.ArgumentParser()
|
||||
arg_parser.add_argument('--lang', default='en,ru,zh,ja')
|
||||
arg_parser.add_argument('--blog-lang', default='en')
|
||||
arg_parser.add_argument('--docs-dir', default='.')
|
||||
arg_parser.add_argument('--theme-dir', default=website_dir)
|
||||
arg_parser.add_argument('--website-dir', default=website_dir)
|
||||
arg_parser.add_argument('--src-dir', default=src_dir)
|
||||
arg_parser.add_argument('--blog-dir', default=os.path.join(website_dir, 'blog'))
|
||||
arg_parser.add_argument('--output-dir', default='build')
|
||||
arg_parser.add_argument('--nav-limit', type=int, default='0')
|
||||
arg_parser.add_argument('--skip-multi-page', action='store_true')
|
||||
arg_parser.add_argument('--skip-single-page', action='store_true')
|
||||
arg_parser.add_argument('--skip-amp', action='store_true')
|
||||
arg_parser.add_argument('--skip-website', action='store_true')
|
||||
arg_parser.add_argument('--skip-blog', action='store_true')
|
||||
arg_parser.add_argument('--skip-git-log', action='store_true')
|
||||
arg_parser.add_argument('--skip-docs', action='store_true')
|
||||
arg_parser.add_argument('--test-only', action='store_true')
|
||||
arg_parser.add_argument('--minify', action='store_true')
|
||||
arg_parser.add_argument('--htmlproofer', action='store_true')
|
||||
arg_parser.add_argument('--no-docs-macros', action='store_true')
|
||||
arg_parser.add_argument('--save-raw-single-page', type=str)
|
||||
arg_parser.add_argument('--livereload', type=int, default='0')
|
||||
arg_parser.add_argument('--verbose', action='store_true')
|
||||
arg_parser.add_argument("--lang", default="en,ru,zh,ja")
|
||||
arg_parser.add_argument("--blog-lang", default="en")
|
||||
arg_parser.add_argument("--docs-dir", default=".")
|
||||
arg_parser.add_argument("--theme-dir", default=website_dir)
|
||||
arg_parser.add_argument("--website-dir", default=website_dir)
|
||||
arg_parser.add_argument("--src-dir", default=src_dir)
|
||||
arg_parser.add_argument("--blog-dir", default=os.path.join(website_dir, "blog"))
|
||||
arg_parser.add_argument("--output-dir", default="build")
|
||||
arg_parser.add_argument("--nav-limit", type=int, default="0")
|
||||
arg_parser.add_argument("--skip-multi-page", action="store_true")
|
||||
arg_parser.add_argument("--skip-single-page", action="store_true")
|
||||
arg_parser.add_argument("--skip-amp", action="store_true")
|
||||
arg_parser.add_argument("--skip-website", action="store_true")
|
||||
arg_parser.add_argument("--skip-blog", action="store_true")
|
||||
arg_parser.add_argument("--skip-git-log", action="store_true")
|
||||
arg_parser.add_argument("--skip-docs", action="store_true")
|
||||
arg_parser.add_argument("--test-only", action="store_true")
|
||||
arg_parser.add_argument("--minify", action="store_true")
|
||||
arg_parser.add_argument("--htmlproofer", action="store_true")
|
||||
arg_parser.add_argument("--no-docs-macros", action="store_true")
|
||||
arg_parser.add_argument("--save-raw-single-page", type=str)
|
||||
arg_parser.add_argument("--livereload", type=int, default="0")
|
||||
arg_parser.add_argument("--verbose", action="store_true")
|
||||
|
||||
args = arg_parser.parse_args()
|
||||
args.minify = False # TODO remove
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG if args.verbose else logging.INFO,
|
||||
stream=sys.stderr
|
||||
level=logging.DEBUG if args.verbose else logging.INFO, stream=sys.stderr
|
||||
)
|
||||
|
||||
logging.getLogger('MARKDOWN').setLevel(logging.INFO)
|
||||
logging.getLogger("MARKDOWN").setLevel(logging.INFO)
|
||||
|
||||
args.docs_output_dir = os.path.join(os.path.abspath(args.output_dir), 'docs')
|
||||
args.blog_output_dir = os.path.join(os.path.abspath(args.output_dir), 'blog')
|
||||
args.docs_output_dir = os.path.join(os.path.abspath(args.output_dir), "docs")
|
||||
args.blog_output_dir = os.path.join(os.path.abspath(args.output_dir), "blog")
|
||||
|
||||
from github import get_events
|
||||
args.rev = subprocess.check_output('git rev-parse HEAD', shell=True).decode('utf-8').strip()
|
||||
args.rev_short = subprocess.check_output('git rev-parse --short HEAD', shell=True).decode('utf-8').strip()
|
||||
args.rev_url = f'https://github.com/ClickHouse/ClickHouse/commit/{args.rev}'
|
||||
|
||||
args.rev = (
|
||||
subprocess.check_output("git rev-parse HEAD", shell=True)
|
||||
.decode("utf-8")
|
||||
.strip()
|
||||
)
|
||||
args.rev_short = (
|
||||
subprocess.check_output("git rev-parse --short HEAD", shell=True)
|
||||
.decode("utf-8")
|
||||
.strip()
|
||||
)
|
||||
args.rev_url = f"https://github.com/ClickHouse/ClickHouse/commit/{args.rev}"
|
||||
args.events = get_events(args)
|
||||
|
||||
if args.test_only:
|
||||
@ -233,18 +248,20 @@ if __name__ == '__main__':
|
||||
mdx_clickhouse.PatchedMacrosPlugin.skip_git_log = True
|
||||
|
||||
from build import build
|
||||
|
||||
build(args)
|
||||
|
||||
if args.livereload:
|
||||
new_args = [arg for arg in sys.argv if not arg.startswith('--livereload')]
|
||||
new_args = sys.executable + ' ' + ' '.join(new_args)
|
||||
new_args = [arg for arg in sys.argv if not arg.startswith("--livereload")]
|
||||
new_args = sys.executable + " " + " ".join(new_args)
|
||||
|
||||
server = livereload.Server()
|
||||
server.watch(args.docs_dir + '**/*', livereload.shell(new_args, cwd='tools', shell=True))
|
||||
server.watch(args.website_dir + '**/*', livereload.shell(new_args, cwd='tools', shell=True))
|
||||
server.serve(
|
||||
root=args.output_dir,
|
||||
host='0.0.0.0',
|
||||
port=args.livereload
|
||||
server.watch(
|
||||
args.docs_dir + "**/*", livereload.shell(new_args, cwd="tools", shell=True)
|
||||
)
|
||||
server.watch(
|
||||
args.website_dir + "**/*",
|
||||
livereload.shell(new_args, cwd="tools", shell=True),
|
||||
)
|
||||
server.serve(root=args.output_dir, host="0.0.0.0", port=args.livereload)
|
||||
sys.exit(0)
|
||||
|
@ -6,11 +6,13 @@ from typing import TextIO, List, Tuple, Optional, Dict
|
||||
Entity = Tuple[str, str, str]
|
||||
|
||||
# https://regex101.com/r/R6iogw/12
|
||||
cmake_option_regex: str = r"^\s*option\s*\(([A-Z_0-9${}]+)\s*(?:\"((?:.|\n)*?)\")?\s*(.*)?\).*$"
|
||||
cmake_option_regex: str = (
|
||||
r"^\s*option\s*\(([A-Z_0-9${}]+)\s*(?:\"((?:.|\n)*?)\")?\s*(.*)?\).*$"
|
||||
)
|
||||
|
||||
ch_master_url: str = "https://github.com/clickhouse/clickhouse/blob/master/"
|
||||
|
||||
name_str: str = "<a name=\"{anchor}\"></a>[`{name}`](" + ch_master_url + "{path}#L{line})"
|
||||
name_str: str = '<a name="{anchor}"></a>[`{name}`](' + ch_master_url + "{path}#L{line})"
|
||||
default_anchor_str: str = "[`{name}`](#{anchor})"
|
||||
|
||||
comment_var_regex: str = r"\${(.+)}"
|
||||
@ -27,11 +29,15 @@ entities: Dict[str, Tuple[str, str]] = {}
|
||||
|
||||
|
||||
def make_anchor(t: str) -> str:
|
||||
return "".join(["-" if i == "_" else i.lower() for i in t if i.isalpha() or i == "_"])
|
||||
return "".join(
|
||||
["-" if i == "_" else i.lower() for i in t if i.isalpha() or i == "_"]
|
||||
)
|
||||
|
||||
|
||||
def process_comment(comment: str) -> str:
|
||||
return re.sub(comment_var_regex, comment_var_replace, comment, flags=re.MULTILINE)
|
||||
|
||||
|
||||
def build_entity(path: str, entity: Entity, line_comment: Tuple[int, str]) -> None:
|
||||
(line, comment) = line_comment
|
||||
(name, description, default) = entity
|
||||
@ -47,22 +53,22 @@ def build_entity(path: str, entity: Entity, line_comment: Tuple[int, str]) -> No
|
||||
formatted_default: str = "`" + default + "`"
|
||||
|
||||
formatted_name: str = name_str.format(
|
||||
anchor=make_anchor(name),
|
||||
name=name,
|
||||
path=path,
|
||||
line=line)
|
||||
anchor=make_anchor(name), name=name, path=path, line=line
|
||||
)
|
||||
|
||||
formatted_description: str = "".join(description.split("\n"))
|
||||
|
||||
formatted_comment: str = process_comment(comment)
|
||||
|
||||
formatted_entity: str = "| {} | {} | {} | {} |".format(
|
||||
formatted_name, formatted_default, formatted_description, formatted_comment)
|
||||
formatted_name, formatted_default, formatted_description, formatted_comment
|
||||
)
|
||||
|
||||
entities[name] = path, formatted_entity
|
||||
|
||||
|
||||
def process_file(root_path: str, file_path: str, file_name: str) -> None:
|
||||
with open(os.path.join(file_path, file_name), 'r') as cmake_file:
|
||||
with open(os.path.join(file_path, file_name), "r") as cmake_file:
|
||||
contents: str = cmake_file.read()
|
||||
|
||||
def get_line_and_comment(target: str) -> Tuple[int, str]:
|
||||
@ -70,7 +76,7 @@ def process_file(root_path: str, file_path: str, file_name: str) -> None:
|
||||
comment: str = ""
|
||||
|
||||
for n, line in enumerate(contents_list):
|
||||
if 'option' not in line.lower() or target not in line:
|
||||
if "option" not in line.lower() or target not in line:
|
||||
continue
|
||||
|
||||
for maybe_comment_line in contents_list[n - 1 :: -1]:
|
||||
@ -82,16 +88,22 @@ def process_file(root_path: str, file_path: str, file_name: str) -> None:
|
||||
# line numbering starts with 1
|
||||
return n + 1, comment
|
||||
|
||||
matches: Optional[List[Entity]] = re.findall(cmake_option_regex, contents, re.MULTILINE)
|
||||
matches: Optional[List[Entity]] = re.findall(
|
||||
cmake_option_regex, contents, re.MULTILINE
|
||||
)
|
||||
|
||||
|
||||
file_rel_path_with_name: str = os.path.join(file_path[len(root_path):], file_name)
|
||||
if file_rel_path_with_name.startswith('/'):
|
||||
file_rel_path_with_name: str = os.path.join(
|
||||
file_path[len(root_path) :], file_name
|
||||
)
|
||||
if file_rel_path_with_name.startswith("/"):
|
||||
file_rel_path_with_name = file_rel_path_with_name[1:]
|
||||
|
||||
if matches:
|
||||
for entity in matches:
|
||||
build_entity(file_rel_path_with_name, entity, get_line_and_comment(entity[0]))
|
||||
build_entity(
|
||||
file_rel_path_with_name, entity, get_line_and_comment(entity[0])
|
||||
)
|
||||
|
||||
|
||||
def process_folder(root_path: str, name: str) -> None:
|
||||
for root, _, files in os.walk(os.path.join(root_path, name)):
|
||||
@ -99,12 +111,19 @@ def process_folder(root_path: str, name: str) -> None:
|
||||
if f == "CMakeLists.txt" or ".cmake" in f:
|
||||
process_file(root_path, root, f)
|
||||
|
||||
def generate_cmake_flags_files() -> None:
|
||||
root_path: str = os.path.join(os.path.dirname(__file__), '..', '..')
|
||||
|
||||
output_file_name: str = os.path.join(root_path, "docs/en/development/cmake-in-clickhouse.md")
|
||||
header_file_name: str = os.path.join(root_path, "docs/_includes/cmake_in_clickhouse_header.md")
|
||||
footer_file_name: str = os.path.join(root_path, "docs/_includes/cmake_in_clickhouse_footer.md")
|
||||
def generate_cmake_flags_files() -> None:
|
||||
root_path: str = os.path.join(os.path.dirname(__file__), "..", "..")
|
||||
|
||||
output_file_name: str = os.path.join(
|
||||
root_path, "docs/en/development/cmake-in-clickhouse.md"
|
||||
)
|
||||
header_file_name: str = os.path.join(
|
||||
root_path, "docs/_includes/cmake_in_clickhouse_header.md"
|
||||
)
|
||||
footer_file_name: str = os.path.join(
|
||||
root_path, "docs/_includes/cmake_in_clickhouse_footer.md"
|
||||
)
|
||||
|
||||
process_file(root_path, root_path, "CMakeLists.txt")
|
||||
process_file(root_path, os.path.join(root_path, "programs"), "CMakeLists.txt")
|
||||
@ -127,8 +146,10 @@ def generate_cmake_flags_files() -> None:
|
||||
f.write(entities[k][1] + "\n")
|
||||
ignored_keys.append(k)
|
||||
|
||||
f.write("\n### External libraries\nNote that ClickHouse uses forks of these libraries, see https://github.com/ClickHouse-Extras.\n" +
|
||||
table_header)
|
||||
f.write(
|
||||
"\n### External libraries\nNote that ClickHouse uses forks of these libraries, see https://github.com/ClickHouse-Extras.\n"
|
||||
+ table_header
|
||||
)
|
||||
|
||||
for k in sorted_keys:
|
||||
if k.startswith("ENABLE_") and ".cmake" in entities[k][0]:
|
||||
@ -143,9 +164,11 @@ def generate_cmake_flags_files() -> None:
|
||||
with open(footer_file_name, "r") as footer:
|
||||
f.write(footer.read())
|
||||
|
||||
other_languages = ["docs/ja/development/cmake-in-clickhouse.md",
|
||||
other_languages = [
|
||||
"docs/ja/development/cmake-in-clickhouse.md",
|
||||
"docs/zh/development/cmake-in-clickhouse.md",
|
||||
"docs/ru/development/cmake-in-clickhouse.md"]
|
||||
"docs/ru/development/cmake-in-clickhouse.md",
|
||||
]
|
||||
|
||||
for lang in other_languages:
|
||||
other_file_name = os.path.join(root_path, lang)
|
||||
@ -153,5 +176,6 @@ def generate_cmake_flags_files() -> None:
|
||||
os.unlink(other_file_name)
|
||||
os.symlink(output_file_name, other_file_name)
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
if __name__ == "__main__":
|
||||
generate_cmake_flags_files()
|
||||
|
@ -8,7 +8,7 @@ import contextlib
|
||||
from git import cmd
|
||||
from tempfile import NamedTemporaryFile
|
||||
|
||||
SCRIPT_DESCRIPTION = '''
|
||||
SCRIPT_DESCRIPTION = """
|
||||
usage: ./easy_diff.py language/document path
|
||||
|
||||
Show the difference between a language document and an English document.
|
||||
@ -53,16 +53,16 @@ SCRIPT_DESCRIPTION = '''
|
||||
OPTIONS:
|
||||
-h, --help show this help message and exit
|
||||
--no-pager use stdout as difference result output
|
||||
'''
|
||||
"""
|
||||
|
||||
SCRIPT_PATH = os.path.abspath(__file__)
|
||||
CLICKHOUSE_REPO_HOME = os.path.join(os.path.dirname(SCRIPT_PATH), '..', '..')
|
||||
CLICKHOUSE_REPO_HOME = os.path.join(os.path.dirname(SCRIPT_PATH), "..", "..")
|
||||
SCRIPT_COMMAND_EXECUTOR = cmd.Git(CLICKHOUSE_REPO_HOME)
|
||||
|
||||
SCRIPT_COMMAND_PARSER = argparse.ArgumentParser(add_help=False)
|
||||
SCRIPT_COMMAND_PARSER.add_argument('path', type=bytes, nargs='?', default=None)
|
||||
SCRIPT_COMMAND_PARSER.add_argument('--no-pager', action='store_true', default=False)
|
||||
SCRIPT_COMMAND_PARSER.add_argument('-h', '--help', action='store_true', default=False)
|
||||
SCRIPT_COMMAND_PARSER.add_argument("path", type=bytes, nargs="?", default=None)
|
||||
SCRIPT_COMMAND_PARSER.add_argument("--no-pager", action="store_true", default=False)
|
||||
SCRIPT_COMMAND_PARSER.add_argument("-h", "--help", action="store_true", default=False)
|
||||
|
||||
|
||||
def execute(commands):
|
||||
@ -70,19 +70,41 @@ def execute(commands):
|
||||
|
||||
|
||||
def get_hash(file_name):
|
||||
return execute(['git', 'log', '-n', '1', '--pretty=format:"%H"', file_name])
|
||||
return execute(["git", "log", "-n", "1", '--pretty=format:"%H"', file_name])
|
||||
|
||||
|
||||
def diff_file(reference_file, working_file, out):
|
||||
if not os.path.exists(reference_file):
|
||||
raise RuntimeError('reference file [' + os.path.abspath(reference_file) + '] is not exists.')
|
||||
raise RuntimeError(
|
||||
"reference file [" + os.path.abspath(reference_file) + "] is not exists."
|
||||
)
|
||||
|
||||
if os.path.islink(working_file):
|
||||
out.writelines(["Need translate document:" + os.path.abspath(reference_file)])
|
||||
elif not os.path.exists(working_file):
|
||||
out.writelines(['Need link document ' + os.path.abspath(reference_file) + ' to ' + os.path.abspath(working_file)])
|
||||
out.writelines(
|
||||
[
|
||||
"Need link document "
|
||||
+ os.path.abspath(reference_file)
|
||||
+ " to "
|
||||
+ os.path.abspath(working_file)
|
||||
]
|
||||
)
|
||||
elif get_hash(working_file) != get_hash(reference_file):
|
||||
out.writelines([(execute(['git', 'diff', get_hash(working_file).strip('"'), reference_file]).encode('utf-8'))])
|
||||
out.writelines(
|
||||
[
|
||||
(
|
||||
execute(
|
||||
[
|
||||
"git",
|
||||
"diff",
|
||||
get_hash(working_file).strip('"'),
|
||||
reference_file,
|
||||
]
|
||||
).encode("utf-8")
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
return 0
|
||||
|
||||
@ -94,20 +116,30 @@ def diff_directory(reference_directory, working_directory, out):
|
||||
for list_item in os.listdir(reference_directory):
|
||||
working_item = os.path.join(working_directory, list_item)
|
||||
reference_item = os.path.join(reference_directory, list_item)
|
||||
if diff_file(reference_item, working_item, out) if os.path.isfile(reference_item) else diff_directory(reference_item, working_item, out) != 0:
|
||||
if (
|
||||
diff_file(reference_item, working_item, out)
|
||||
if os.path.isfile(reference_item)
|
||||
else diff_directory(reference_item, working_item, out) != 0
|
||||
):
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def find_language_doc(custom_document, other_language='en', children=[]):
|
||||
def find_language_doc(custom_document, other_language="en", children=[]):
|
||||
if len(custom_document) == 0:
|
||||
raise RuntimeError('The ' + os.path.join(custom_document, *children) + " is not in docs directory.")
|
||||
raise RuntimeError(
|
||||
"The "
|
||||
+ os.path.join(custom_document, *children)
|
||||
+ " is not in docs directory."
|
||||
)
|
||||
|
||||
if os.path.samefile(os.path.join(CLICKHOUSE_REPO_HOME, 'docs'), custom_document):
|
||||
return os.path.join(CLICKHOUSE_REPO_HOME, 'docs', other_language, *children[1:])
|
||||
if os.path.samefile(os.path.join(CLICKHOUSE_REPO_HOME, "docs"), custom_document):
|
||||
return os.path.join(CLICKHOUSE_REPO_HOME, "docs", other_language, *children[1:])
|
||||
children.insert(0, os.path.split(custom_document)[1])
|
||||
return find_language_doc(os.path.split(custom_document)[0], other_language, children)
|
||||
return find_language_doc(
|
||||
os.path.split(custom_document)[0], other_language, children
|
||||
)
|
||||
|
||||
|
||||
class ToPager:
|
||||
@ -119,7 +151,7 @@ class ToPager:
|
||||
|
||||
def close(self):
|
||||
self.temp_named_file.flush()
|
||||
git_pager = execute(['git', 'var', 'GIT_PAGER'])
|
||||
git_pager = execute(["git", "var", "GIT_PAGER"])
|
||||
subprocess.check_call([git_pager, self.temp_named_file.name])
|
||||
self.temp_named_file.close()
|
||||
|
||||
@ -135,12 +167,20 @@ class ToStdOut:
|
||||
self.system_stdout_stream = system_stdout_stream
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
arguments = SCRIPT_COMMAND_PARSER.parse_args()
|
||||
if arguments.help or not arguments.path:
|
||||
sys.stdout.write(SCRIPT_DESCRIPTION)
|
||||
sys.exit(0)
|
||||
|
||||
working_language = os.path.join(CLICKHOUSE_REPO_HOME, 'docs', arguments.path)
|
||||
with contextlib.closing(ToStdOut(sys.stdout) if arguments.no_pager else ToPager(NamedTemporaryFile('r+'))) as writer:
|
||||
exit(diff_directory(find_language_doc(working_language), working_language, writer))
|
||||
working_language = os.path.join(CLICKHOUSE_REPO_HOME, "docs", arguments.path)
|
||||
with contextlib.closing(
|
||||
ToStdOut(sys.stdout)
|
||||
if arguments.no_pager
|
||||
else ToPager(NamedTemporaryFile("r+"))
|
||||
) as writer:
|
||||
exit(
|
||||
diff_directory(
|
||||
find_language_doc(working_language), working_language, writer
|
||||
)
|
||||
)
|
||||
|
@ -16,27 +16,26 @@ import util
|
||||
def get_events(args):
|
||||
events = []
|
||||
skip = True
|
||||
with open(os.path.join(args.docs_dir, '..', 'README.md')) as f:
|
||||
with open(os.path.join(args.docs_dir, "..", "README.md")) as f:
|
||||
for line in f:
|
||||
if skip:
|
||||
if 'Upcoming Events' in line:
|
||||
if "Upcoming Events" in line:
|
||||
skip = False
|
||||
else:
|
||||
if not line:
|
||||
continue
|
||||
line = line.strip().split('](')
|
||||
line = line.strip().split("](")
|
||||
if len(line) == 2:
|
||||
tail = line[1].split(') ')
|
||||
events.append({
|
||||
'signup_link': tail[0],
|
||||
'event_name': line[0].replace('* [', ''),
|
||||
'event_date': tail[1].replace('on ', '').replace('.', '')
|
||||
})
|
||||
tail = line[1].split(") ")
|
||||
events.append(
|
||||
{
|
||||
"signup_link": tail[0],
|
||||
"event_name": line[0].replace("* [", ""),
|
||||
"event_date": tail[1].replace("on ", "").replace(".", ""),
|
||||
}
|
||||
)
|
||||
return events
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
stream=sys.stderr
|
||||
)
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.DEBUG, stream=sys.stderr)
|
||||
|
@ -16,74 +16,79 @@ import slugify as slugify_impl
|
||||
|
||||
|
||||
def slugify(value, separator):
|
||||
return slugify_impl.slugify(value, separator=separator, word_boundary=True, save_order=True)
|
||||
return slugify_impl.slugify(
|
||||
value, separator=separator, word_boundary=True, save_order=True
|
||||
)
|
||||
|
||||
|
||||
MARKDOWN_EXTENSIONS = [
|
||||
'mdx_clickhouse',
|
||||
'admonition',
|
||||
'attr_list',
|
||||
'def_list',
|
||||
'codehilite',
|
||||
'nl2br',
|
||||
'sane_lists',
|
||||
'pymdownx.details',
|
||||
'pymdownx.magiclink',
|
||||
'pymdownx.superfences',
|
||||
'extra',
|
||||
{
|
||||
'toc': {
|
||||
'permalink': True,
|
||||
'slugify': slugify
|
||||
}
|
||||
}
|
||||
"mdx_clickhouse",
|
||||
"admonition",
|
||||
"attr_list",
|
||||
"def_list",
|
||||
"codehilite",
|
||||
"nl2br",
|
||||
"sane_lists",
|
||||
"pymdownx.details",
|
||||
"pymdownx.magiclink",
|
||||
"pymdownx.superfences",
|
||||
"extra",
|
||||
{"toc": {"permalink": True, "slugify": slugify}},
|
||||
]
|
||||
|
||||
|
||||
class ClickHouseLinkMixin(object):
|
||||
|
||||
def handleMatch(self, m, data):
|
||||
single_page = (os.environ.get('SINGLE_PAGE') == '1')
|
||||
single_page = os.environ.get("SINGLE_PAGE") == "1"
|
||||
try:
|
||||
el, start, end = super(ClickHouseLinkMixin, self).handleMatch(m, data)
|
||||
except IndexError:
|
||||
return
|
||||
|
||||
if el is not None:
|
||||
href = el.get('href') or ''
|
||||
is_external = href.startswith('http:') or href.startswith('https:')
|
||||
href = el.get("href") or ""
|
||||
is_external = href.startswith("http:") or href.startswith("https:")
|
||||
if is_external:
|
||||
if not href.startswith('https://clickhouse.com'):
|
||||
el.set('rel', 'external nofollow noreferrer')
|
||||
if not href.startswith("https://clickhouse.com"):
|
||||
el.set("rel", "external nofollow noreferrer")
|
||||
elif single_page:
|
||||
if '#' in href:
|
||||
el.set('href', '#' + href.split('#', 1)[1])
|
||||
if "#" in href:
|
||||
el.set("href", "#" + href.split("#", 1)[1])
|
||||
else:
|
||||
el.set('href', '#' + href.replace('/index.md', '/').replace('.md', '/'))
|
||||
el.set(
|
||||
"href", "#" + href.replace("/index.md", "/").replace(".md", "/")
|
||||
)
|
||||
return el, start, end
|
||||
|
||||
|
||||
class ClickHouseAutolinkPattern(ClickHouseLinkMixin, markdown.inlinepatterns.AutolinkInlineProcessor):
|
||||
class ClickHouseAutolinkPattern(
|
||||
ClickHouseLinkMixin, markdown.inlinepatterns.AutolinkInlineProcessor
|
||||
):
|
||||
pass
|
||||
|
||||
|
||||
class ClickHouseLinkPattern(ClickHouseLinkMixin, markdown.inlinepatterns.LinkInlineProcessor):
|
||||
class ClickHouseLinkPattern(
|
||||
ClickHouseLinkMixin, markdown.inlinepatterns.LinkInlineProcessor
|
||||
):
|
||||
pass
|
||||
|
||||
|
||||
class ClickHousePreprocessor(markdown.util.Processor):
|
||||
def run(self, lines):
|
||||
for line in lines:
|
||||
if '<!--hide-->' not in line:
|
||||
if "<!--hide-->" not in line:
|
||||
yield line
|
||||
|
||||
|
||||
class ClickHouseMarkdown(markdown.extensions.Extension):
|
||||
|
||||
def extendMarkdown(self, md, md_globals):
|
||||
md.preprocessors['clickhouse'] = ClickHousePreprocessor()
|
||||
md.inlinePatterns['link'] = ClickHouseLinkPattern(markdown.inlinepatterns.LINK_RE, md)
|
||||
md.inlinePatterns['autolink'] = ClickHouseAutolinkPattern(markdown.inlinepatterns.AUTOLINK_RE, md)
|
||||
md.preprocessors["clickhouse"] = ClickHousePreprocessor()
|
||||
md.inlinePatterns["link"] = ClickHouseLinkPattern(
|
||||
markdown.inlinepatterns.LINK_RE, md
|
||||
)
|
||||
md.inlinePatterns["autolink"] = ClickHouseAutolinkPattern(
|
||||
markdown.inlinepatterns.AUTOLINK_RE, md
|
||||
)
|
||||
|
||||
|
||||
def makeExtension(**kwargs):
|
||||
@ -92,10 +97,8 @@ def makeExtension(**kwargs):
|
||||
|
||||
def get_translations(dirname, lang):
|
||||
import babel.support
|
||||
return babel.support.Translations.load(
|
||||
dirname=dirname,
|
||||
locales=[lang, 'en']
|
||||
)
|
||||
|
||||
return babel.support.Translations.load(dirname=dirname, locales=[lang, "en"])
|
||||
|
||||
|
||||
class PatchedMacrosPlugin(macros.plugin.MacrosPlugin):
|
||||
@ -104,22 +107,22 @@ class PatchedMacrosPlugin(macros.plugin.MacrosPlugin):
|
||||
|
||||
def on_config(self, config):
|
||||
super(PatchedMacrosPlugin, self).on_config(config)
|
||||
self.env.comment_start_string = '{##'
|
||||
self.env.comment_end_string = '##}'
|
||||
self.env.loader = jinja2.FileSystemLoader([
|
||||
os.path.join(config.data['site_dir']),
|
||||
os.path.join(config.data['extra']['includes_dir'])
|
||||
])
|
||||
self.env.comment_start_string = "{##"
|
||||
self.env.comment_end_string = "##}"
|
||||
self.env.loader = jinja2.FileSystemLoader(
|
||||
[
|
||||
os.path.join(config.data["site_dir"]),
|
||||
os.path.join(config.data["extra"]["includes_dir"]),
|
||||
]
|
||||
)
|
||||
|
||||
def on_env(self, env, config, files):
|
||||
import util
|
||||
env.add_extension('jinja2.ext.i18n')
|
||||
dirname = os.path.join(config.data['theme'].dirs[0], 'locale')
|
||||
lang = config.data['theme']['language']
|
||||
env.install_gettext_translations(
|
||||
get_translations(dirname, lang),
|
||||
newstyle=True
|
||||
)
|
||||
|
||||
env.add_extension("jinja2.ext.i18n")
|
||||
dirname = os.path.join(config.data["theme"].dirs[0], "locale")
|
||||
lang = config.data["theme"]["language"]
|
||||
env.install_gettext_translations(get_translations(dirname, lang), newstyle=True)
|
||||
util.init_jinja2_filters(env)
|
||||
return env
|
||||
|
||||
@ -130,13 +133,17 @@ class PatchedMacrosPlugin(macros.plugin.MacrosPlugin):
|
||||
return markdown
|
||||
|
||||
def on_page_markdown(self, markdown, page, config, files):
|
||||
markdown = super(PatchedMacrosPlugin, self).on_page_markdown(markdown, page, config, files)
|
||||
markdown = super(PatchedMacrosPlugin, self).on_page_markdown(
|
||||
markdown, page, config, files
|
||||
)
|
||||
|
||||
if os.path.islink(page.file.abs_src_path):
|
||||
lang = config.data['theme']['language']
|
||||
page.canonical_url = page.canonical_url.replace(f'/{lang}/', '/en/', 1)
|
||||
lang = config.data["theme"]["language"]
|
||||
page.canonical_url = page.canonical_url.replace(f"/{lang}/", "/en/", 1)
|
||||
|
||||
if config.data['extra'].get('version_prefix') or config.data['extra'].get('single_page'):
|
||||
if config.data["extra"].get("version_prefix") or config.data["extra"].get(
|
||||
"single_page"
|
||||
):
|
||||
return markdown
|
||||
if self.skip_git_log:
|
||||
return markdown
|
||||
|
@ -10,57 +10,59 @@ import util
|
||||
|
||||
|
||||
def find_first_header(content):
|
||||
for line in content.split('\n'):
|
||||
if line.startswith('#'):
|
||||
no_hash = line.lstrip('#')
|
||||
return no_hash.split('{', 1)[0].strip()
|
||||
for line in content.split("\n"):
|
||||
if line.startswith("#"):
|
||||
no_hash = line.lstrip("#")
|
||||
return no_hash.split("{", 1)[0].strip()
|
||||
|
||||
|
||||
def build_nav_entry(root, args):
|
||||
if root.endswith('images'):
|
||||
if root.endswith("images"):
|
||||
return None, None, None
|
||||
result_items = []
|
||||
index_meta, index_content = util.read_md_file(os.path.join(root, 'index.md'))
|
||||
current_title = index_meta.get('toc_folder_title', index_meta.get('toc_title'))
|
||||
current_title = current_title or index_meta.get('title', find_first_header(index_content))
|
||||
index_meta, index_content = util.read_md_file(os.path.join(root, "index.md"))
|
||||
current_title = index_meta.get("toc_folder_title", index_meta.get("toc_title"))
|
||||
current_title = current_title or index_meta.get(
|
||||
"title", find_first_header(index_content)
|
||||
)
|
||||
for filename in os.listdir(root):
|
||||
path = os.path.join(root, filename)
|
||||
if os.path.isdir(path):
|
||||
prio, title, payload = build_nav_entry(path, args)
|
||||
if title and payload:
|
||||
result_items.append((prio, title, payload))
|
||||
elif filename.endswith('.md'):
|
||||
elif filename.endswith(".md"):
|
||||
path = os.path.join(root, filename)
|
||||
|
||||
meta = ''
|
||||
content = ''
|
||||
meta = ""
|
||||
content = ""
|
||||
|
||||
try:
|
||||
meta, content = util.read_md_file(path)
|
||||
except:
|
||||
print('Error in file: {}'.format(path))
|
||||
print("Error in file: {}".format(path))
|
||||
raise
|
||||
|
||||
path = path.split('/', 2)[-1]
|
||||
title = meta.get('toc_title', find_first_header(content))
|
||||
path = path.split("/", 2)[-1]
|
||||
title = meta.get("toc_title", find_first_header(content))
|
||||
if title:
|
||||
title = title.strip().rstrip('.')
|
||||
title = title.strip().rstrip(".")
|
||||
else:
|
||||
title = meta.get('toc_folder_title', 'hidden')
|
||||
prio = meta.get('toc_priority', 9999)
|
||||
logging.debug(f'Nav entry: {prio}, {title}, {path}')
|
||||
if meta.get('toc_hidden') or not content.strip():
|
||||
title = 'hidden'
|
||||
if title == 'hidden':
|
||||
title = 'hidden-' + hashlib.sha1(content.encode('utf-8')).hexdigest()
|
||||
title = meta.get("toc_folder_title", "hidden")
|
||||
prio = meta.get("toc_priority", 9999)
|
||||
logging.debug(f"Nav entry: {prio}, {title}, {path}")
|
||||
if meta.get("toc_hidden") or not content.strip():
|
||||
title = "hidden"
|
||||
if title == "hidden":
|
||||
title = "hidden-" + hashlib.sha1(content.encode("utf-8")).hexdigest()
|
||||
if args.nav_limit and len(result_items) >= args.nav_limit:
|
||||
break
|
||||
result_items.append((prio, title, path))
|
||||
result_items = sorted(result_items, key=lambda x: (x[0], x[1]))
|
||||
result = collections.OrderedDict([(item[1], item[2]) for item in result_items])
|
||||
if index_meta.get('toc_hidden_folder'):
|
||||
current_title += '|hidden-folder'
|
||||
return index_meta.get('toc_priority', 10000), current_title, result
|
||||
if index_meta.get("toc_hidden_folder"):
|
||||
current_title += "|hidden-folder"
|
||||
return index_meta.get("toc_priority", 10000), current_title, result
|
||||
|
||||
|
||||
def build_docs_nav(lang, args):
|
||||
@ -70,7 +72,7 @@ def build_docs_nav(lang, args):
|
||||
index_key = None
|
||||
for key, value in list(nav.items()):
|
||||
if key and value:
|
||||
if value == 'index.md':
|
||||
if value == "index.md":
|
||||
index_key = key
|
||||
continue
|
||||
result.append({key: value})
|
||||
@ -78,7 +80,7 @@ def build_docs_nav(lang, args):
|
||||
break
|
||||
if index_key:
|
||||
key = list(result[0].keys())[0]
|
||||
result[0][key][index_key] = 'index.md'
|
||||
result[0][key][index_key] = "index.md"
|
||||
result[0][key].move_to_end(index_key, last=False)
|
||||
return result
|
||||
|
||||
@ -86,7 +88,7 @@ def build_docs_nav(lang, args):
|
||||
def build_blog_nav(lang, args):
|
||||
blog_dir = os.path.join(args.blog_dir, lang)
|
||||
years = sorted(os.listdir(blog_dir), reverse=True)
|
||||
result_nav = [{'hidden': 'index.md'}]
|
||||
result_nav = [{"hidden": "index.md"}]
|
||||
post_meta = collections.OrderedDict()
|
||||
for year in years:
|
||||
year_dir = os.path.join(blog_dir, year)
|
||||
@ -97,38 +99,53 @@ def build_blog_nav(lang, args):
|
||||
post_meta_items = []
|
||||
for post in os.listdir(year_dir):
|
||||
post_path = os.path.join(year_dir, post)
|
||||
if not post.endswith('.md'):
|
||||
raise RuntimeError(f'Unexpected non-md file in posts folder: {post_path}')
|
||||
if not post.endswith(".md"):
|
||||
raise RuntimeError(
|
||||
f"Unexpected non-md file in posts folder: {post_path}"
|
||||
)
|
||||
meta, _ = util.read_md_file(post_path)
|
||||
post_date = meta['date']
|
||||
post_title = meta['title']
|
||||
post_date = meta["date"]
|
||||
post_title = meta["title"]
|
||||
if datetime.date.fromisoformat(post_date) > datetime.date.today():
|
||||
continue
|
||||
posts.append(
|
||||
(post_date, post_title, os.path.join(year, post),)
|
||||
(
|
||||
post_date,
|
||||
post_title,
|
||||
os.path.join(year, post),
|
||||
)
|
||||
)
|
||||
if post_title in post_meta:
|
||||
raise RuntimeError(f'Duplicate post title: {post_title}')
|
||||
if not post_date.startswith(f'{year}-'):
|
||||
raise RuntimeError(f'Post date {post_date} doesn\'t match the folder year {year}: {post_title}')
|
||||
post_url_part = post.replace('.md', '')
|
||||
post_meta_items.append((post_date, {
|
||||
'date': post_date,
|
||||
'title': post_title,
|
||||
'image': meta.get('image'),
|
||||
'url': f'/blog/{lang}/{year}/{post_url_part}/'
|
||||
},))
|
||||
raise RuntimeError(f"Duplicate post title: {post_title}")
|
||||
if not post_date.startswith(f"{year}-"):
|
||||
raise RuntimeError(
|
||||
f"Post date {post_date} doesn't match the folder year {year}: {post_title}"
|
||||
)
|
||||
post_url_part = post.replace(".md", "")
|
||||
post_meta_items.append(
|
||||
(
|
||||
post_date,
|
||||
{
|
||||
"date": post_date,
|
||||
"title": post_title,
|
||||
"image": meta.get("image"),
|
||||
"url": f"/blog/{lang}/{year}/{post_url_part}/",
|
||||
},
|
||||
)
|
||||
)
|
||||
for _, title, path in sorted(posts, reverse=True):
|
||||
result_nav[-1][year][title] = path
|
||||
for _, post_meta_item in sorted(post_meta_items,
|
||||
reverse=True,
|
||||
key=lambda item: item[0]):
|
||||
post_meta[post_meta_item['title']] = post_meta_item
|
||||
for _, post_meta_item in sorted(
|
||||
post_meta_items, reverse=True, key=lambda item: item[0]
|
||||
):
|
||||
post_meta[post_meta_item["title"]] = post_meta_item
|
||||
return result_nav, post_meta
|
||||
|
||||
|
||||
def _custom_get_navigation(files, config):
|
||||
nav_config = config['nav'] or mkdocs.structure.nav.nest_paths(f.src_path for f in files.documentation_pages())
|
||||
nav_config = config["nav"] or mkdocs.structure.nav.nest_paths(
|
||||
f.src_path for f in files.documentation_pages()
|
||||
)
|
||||
items = mkdocs.structure.nav._data_to_navigation(nav_config, files, config)
|
||||
if not isinstance(items, list):
|
||||
items = [items]
|
||||
@ -138,19 +155,25 @@ def _custom_get_navigation(files, config):
|
||||
mkdocs.structure.nav._add_previous_and_next_links(pages)
|
||||
mkdocs.structure.nav._add_parent_links(items)
|
||||
|
||||
missing_from_config = [file for file in files.documentation_pages() if file.page is None]
|
||||
missing_from_config = [
|
||||
file for file in files.documentation_pages() if file.page is None
|
||||
]
|
||||
if missing_from_config:
|
||||
files._files = [file for file in files._files if file not in missing_from_config]
|
||||
files._files = [
|
||||
file for file in files._files if file not in missing_from_config
|
||||
]
|
||||
|
||||
links = mkdocs.structure.nav._get_by_type(items, mkdocs.structure.nav.Link)
|
||||
for link in links:
|
||||
scheme, netloc, path, params, query, fragment = mkdocs.structure.nav.urlparse(link.url)
|
||||
scheme, netloc, path, params, query, fragment = mkdocs.structure.nav.urlparse(
|
||||
link.url
|
||||
)
|
||||
if scheme or netloc:
|
||||
mkdocs.structure.nav.log.debug(
|
||||
"An external link to '{}' is included in "
|
||||
"the 'nav' configuration.".format(link.url)
|
||||
)
|
||||
elif link.url.startswith('/'):
|
||||
elif link.url.startswith("/"):
|
||||
mkdocs.structure.nav.log.debug(
|
||||
"An absolute path to '{}' is included in the 'nav' configuration, "
|
||||
"which presumably points to an external resource.".format(link.url)
|
||||
|
@ -7,8 +7,9 @@ def write_redirect_html(out_path, to_url):
|
||||
os.makedirs(out_dir)
|
||||
except OSError:
|
||||
pass
|
||||
with open(out_path, 'w') as f:
|
||||
f.write(f'''<!--[if IE 6]> Redirect: {to_url} <![endif]-->
|
||||
with open(out_path, "w") as f:
|
||||
f.write(
|
||||
f"""<!--[if IE 6]> Redirect: {to_url} <![endif]-->
|
||||
<!DOCTYPE HTML>
|
||||
<html lang="en-US">
|
||||
<head>
|
||||
@ -22,18 +23,20 @@ def write_redirect_html(out_path, to_url):
|
||||
<body>
|
||||
If you are not redirected automatically, follow this <a href="{to_url}">link</a>.
|
||||
</body>
|
||||
</html>''')
|
||||
</html>"""
|
||||
)
|
||||
|
||||
|
||||
def build_redirect_html(args, base_prefix, lang, output_dir, from_path, to_path):
|
||||
out_path = os.path.join(
|
||||
output_dir, lang,
|
||||
from_path.replace('/index.md', '/index.html').replace('.md', '/index.html')
|
||||
output_dir,
|
||||
lang,
|
||||
from_path.replace("/index.md", "/index.html").replace(".md", "/index.html"),
|
||||
)
|
||||
target_path = to_path.replace('/index.md', '/').replace('.md', '/')
|
||||
target_path = to_path.replace("/index.md", "/").replace(".md", "/")
|
||||
|
||||
if target_path[0:7] != 'http://' and target_path[0:8] != 'https://':
|
||||
to_url = f'/{base_prefix}/{lang}/{target_path}'
|
||||
if target_path[0:7] != "http://" and target_path[0:8] != "https://":
|
||||
to_url = f"/{base_prefix}/{lang}/{target_path}"
|
||||
else:
|
||||
to_url = target_path
|
||||
|
||||
@ -42,33 +45,48 @@ def build_redirect_html(args, base_prefix, lang, output_dir, from_path, to_path)
|
||||
|
||||
|
||||
def build_docs_redirects(args):
|
||||
with open(os.path.join(args.docs_dir, 'redirects.txt'), 'r') as f:
|
||||
with open(os.path.join(args.docs_dir, "redirects.txt"), "r") as f:
|
||||
for line in f:
|
||||
for lang in args.lang.split(','):
|
||||
from_path, to_path = line.split(' ', 1)
|
||||
build_redirect_html(args, 'docs', lang, args.docs_output_dir, from_path, to_path)
|
||||
for lang in args.lang.split(","):
|
||||
from_path, to_path = line.split(" ", 1)
|
||||
build_redirect_html(
|
||||
args, "docs", lang, args.docs_output_dir, from_path, to_path
|
||||
)
|
||||
|
||||
|
||||
def build_blog_redirects(args):
|
||||
for lang in args.blog_lang.split(','):
|
||||
redirects_path = os.path.join(args.blog_dir, lang, 'redirects.txt')
|
||||
for lang in args.blog_lang.split(","):
|
||||
redirects_path = os.path.join(args.blog_dir, lang, "redirects.txt")
|
||||
if os.path.exists(redirects_path):
|
||||
with open(redirects_path, 'r') as f:
|
||||
with open(redirects_path, "r") as f:
|
||||
for line in f:
|
||||
from_path, to_path = line.split(' ', 1)
|
||||
build_redirect_html(args, 'blog', lang, args.blog_output_dir, from_path, to_path)
|
||||
from_path, to_path = line.split(" ", 1)
|
||||
build_redirect_html(
|
||||
args, "blog", lang, args.blog_output_dir, from_path, to_path
|
||||
)
|
||||
|
||||
|
||||
def build_static_redirects(args):
|
||||
for static_redirect in [
|
||||
('benchmark.html', '/benchmark/dbms/'),
|
||||
('benchmark_hardware.html', '/benchmark/hardware/'),
|
||||
('tutorial.html', '/docs/en/getting_started/tutorial/',),
|
||||
('reference_en.html', '/docs/en/single/', ),
|
||||
('reference_ru.html', '/docs/ru/single/',),
|
||||
('docs/index.html', '/docs/en/',),
|
||||
("benchmark.html", "/benchmark/dbms/"),
|
||||
("benchmark_hardware.html", "/benchmark/hardware/"),
|
||||
(
|
||||
"tutorial.html",
|
||||
"/docs/en/getting_started/tutorial/",
|
||||
),
|
||||
(
|
||||
"reference_en.html",
|
||||
"/docs/en/single/",
|
||||
),
|
||||
(
|
||||
"reference_ru.html",
|
||||
"/docs/ru/single/",
|
||||
),
|
||||
(
|
||||
"docs/index.html",
|
||||
"/docs/en/",
|
||||
),
|
||||
]:
|
||||
write_redirect_html(
|
||||
os.path.join(args.output_dir, static_redirect[0]),
|
||||
static_redirect[1]
|
||||
os.path.join(args.output_dir, static_redirect[0]), static_redirect[1]
|
||||
)
|
||||
|
@ -12,7 +12,8 @@ import test
|
||||
import util
|
||||
import website
|
||||
|
||||
TEMPORARY_FILE_NAME = 'single.md'
|
||||
TEMPORARY_FILE_NAME = "single.md"
|
||||
|
||||
|
||||
def recursive_values(item):
|
||||
if isinstance(item, dict):
|
||||
@ -25,11 +26,14 @@ def recursive_values(item):
|
||||
yield item
|
||||
|
||||
|
||||
anchor_not_allowed_chars = re.compile(r'[^\w\-]')
|
||||
def generate_anchor_from_path(path):
|
||||
return re.sub(anchor_not_allowed_chars, '-', path)
|
||||
anchor_not_allowed_chars = re.compile(r"[^\w\-]")
|
||||
|
||||
absolute_link = re.compile(r'^https?://')
|
||||
|
||||
def generate_anchor_from_path(path):
|
||||
return re.sub(anchor_not_allowed_chars, "-", path)
|
||||
|
||||
|
||||
absolute_link = re.compile(r"^https?://")
|
||||
|
||||
|
||||
def replace_link(match, path):
|
||||
@ -40,46 +44,55 @@ def replace_link(match, path):
|
||||
if re.search(absolute_link, link):
|
||||
return match.group(0)
|
||||
|
||||
if link.endswith('/'):
|
||||
link = link[0:-1] + '.md'
|
||||
if link.endswith("/"):
|
||||
link = link[0:-1] + ".md"
|
||||
|
||||
return '{}(#{})'.format(title, generate_anchor_from_path(os.path.normpath(os.path.join(os.path.dirname(path), link))))
|
||||
return "{}(#{})".format(
|
||||
title,
|
||||
generate_anchor_from_path(
|
||||
os.path.normpath(os.path.join(os.path.dirname(path), link))
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
# Concatenates Markdown files to a single file.
|
||||
def concatenate(lang, docs_path, single_page_file, nav):
|
||||
lang_path = os.path.join(docs_path, lang)
|
||||
|
||||
proj_config = f'{docs_path}/toc_{lang}.yml'
|
||||
proj_config = f"{docs_path}/toc_{lang}.yml"
|
||||
if os.path.exists(proj_config):
|
||||
with open(proj_config) as cfg_file:
|
||||
nav = yaml.full_load(cfg_file.read())['nav']
|
||||
nav = yaml.full_load(cfg_file.read())["nav"]
|
||||
|
||||
files_to_concatenate = list(recursive_values(nav))
|
||||
files_count = len(files_to_concatenate)
|
||||
logging.info(f'{files_count} files will be concatenated into single md-file for {lang}.')
|
||||
logging.debug('Concatenating: ' + ', '.join(files_to_concatenate))
|
||||
assert files_count > 0, f'Empty single-page for {lang}'
|
||||
logging.info(
|
||||
f"{files_count} files will be concatenated into single md-file for {lang}."
|
||||
)
|
||||
logging.debug("Concatenating: " + ", ".join(files_to_concatenate))
|
||||
assert files_count > 0, f"Empty single-page for {lang}"
|
||||
|
||||
link_regexp = re.compile(r'(\[[^\]]+\])\(([^)#]+)(?:#[^\)]+)?\)')
|
||||
link_regexp = re.compile(r"(\[[^\]]+\])\(([^)#]+)(?:#[^\)]+)?\)")
|
||||
|
||||
for path in files_to_concatenate:
|
||||
try:
|
||||
with open(os.path.join(lang_path, path)) as f:
|
||||
# Insert a horizontal ruler. Then insert an anchor that we will link to. Its name will be a path to the .md file.
|
||||
single_page_file.write('\n______\n<a name="%s"></a>\n' % generate_anchor_from_path(path))
|
||||
single_page_file.write(
|
||||
'\n______\n<a name="%s"></a>\n' % generate_anchor_from_path(path)
|
||||
)
|
||||
|
||||
in_metadata = False
|
||||
for line in f:
|
||||
# Skip YAML metadata.
|
||||
if line == '---\n':
|
||||
if line == "---\n":
|
||||
in_metadata = not in_metadata
|
||||
continue
|
||||
|
||||
if not in_metadata:
|
||||
# Increase the level of headers.
|
||||
if line.startswith('#'):
|
||||
line = '#' + line
|
||||
if line.startswith("#"):
|
||||
line = "#" + line
|
||||
|
||||
# Replace links within the docs.
|
||||
|
||||
@ -87,14 +100,19 @@ def concatenate(lang, docs_path, single_page_file, nav):
|
||||
line = re.sub(
|
||||
link_regexp,
|
||||
lambda match: replace_link(match, path),
|
||||
line)
|
||||
line,
|
||||
)
|
||||
|
||||
# If failed to replace the relative link, print to log
|
||||
# But with some exceptions:
|
||||
# - "../src/" -- for cmake-in-clickhouse.md (link to sources)
|
||||
# - "../usr/share" -- changelog entry that has "../usr/share/zoneinfo"
|
||||
if '../' in line and (not '../usr/share' in line) and (not '../src/' in line):
|
||||
logging.info('Failed to resolve relative link:')
|
||||
if (
|
||||
"../" in line
|
||||
and (not "../usr/share" in line)
|
||||
and (not "../src/" in line)
|
||||
):
|
||||
logging.info("Failed to resolve relative link:")
|
||||
logging.info(path)
|
||||
logging.info(line)
|
||||
|
||||
@ -105,9 +123,11 @@ def concatenate(lang, docs_path, single_page_file, nav):
|
||||
|
||||
single_page_file.flush()
|
||||
|
||||
|
||||
def get_temporary_file_name(lang, args):
|
||||
return os.path.join(args.docs_dir, lang, TEMPORARY_FILE_NAME)
|
||||
|
||||
|
||||
def remove_temporary_files(lang, args):
|
||||
single_md_path = get_temporary_file_name(lang, args)
|
||||
if os.path.exists(single_md_path):
|
||||
@ -115,14 +135,14 @@ def remove_temporary_files(lang, args):
|
||||
|
||||
|
||||
def build_single_page_version(lang, args, nav, cfg):
|
||||
logging.info(f'Building single page version for {lang}')
|
||||
os.environ['SINGLE_PAGE'] = '1'
|
||||
extra = cfg.data['extra']
|
||||
extra['single_page'] = True
|
||||
extra['is_amp'] = False
|
||||
logging.info(f"Building single page version for {lang}")
|
||||
os.environ["SINGLE_PAGE"] = "1"
|
||||
extra = cfg.data["extra"]
|
||||
extra["single_page"] = True
|
||||
extra["is_amp"] = False
|
||||
|
||||
single_md_path = get_temporary_file_name(lang, args)
|
||||
with open(single_md_path, 'w') as single_md:
|
||||
with open(single_md_path, "w") as single_md:
|
||||
concatenate(lang, args.docs_dir, single_md, nav)
|
||||
|
||||
with util.temp_dir() as site_temp:
|
||||
@ -132,72 +152,83 @@ def build_single_page_version(lang, args, nav, cfg):
|
||||
shutil.copytree(docs_src_lang, docs_temp_lang)
|
||||
for root, _, filenames in os.walk(docs_temp_lang):
|
||||
for filename in filenames:
|
||||
if filename != 'single.md' and filename.endswith('.md'):
|
||||
if filename != "single.md" and filename.endswith(".md"):
|
||||
os.unlink(os.path.join(root, filename))
|
||||
|
||||
cfg.load_dict({
|
||||
'docs_dir': docs_temp_lang,
|
||||
'site_dir': site_temp,
|
||||
'extra': extra,
|
||||
'nav': [
|
||||
{cfg.data.get('site_name'): 'single.md'}
|
||||
]
|
||||
})
|
||||
cfg.load_dict(
|
||||
{
|
||||
"docs_dir": docs_temp_lang,
|
||||
"site_dir": site_temp,
|
||||
"extra": extra,
|
||||
"nav": [{cfg.data.get("site_name"): "single.md"}],
|
||||
}
|
||||
)
|
||||
|
||||
if not args.test_only:
|
||||
mkdocs.commands.build.build(cfg)
|
||||
|
||||
single_page_output_path = os.path.join(args.docs_dir, args.docs_output_dir, lang, 'single')
|
||||
single_page_output_path = os.path.join(
|
||||
args.docs_dir, args.docs_output_dir, lang, "single"
|
||||
)
|
||||
|
||||
if os.path.exists(single_page_output_path):
|
||||
shutil.rmtree(single_page_output_path)
|
||||
|
||||
shutil.copytree(
|
||||
os.path.join(site_temp, 'single'),
|
||||
single_page_output_path
|
||||
os.path.join(site_temp, "single"), single_page_output_path
|
||||
)
|
||||
|
||||
single_page_index_html = os.path.join(single_page_output_path, 'index.html')
|
||||
single_page_content_js = os.path.join(single_page_output_path, 'content.js')
|
||||
single_page_index_html = os.path.join(
|
||||
single_page_output_path, "index.html"
|
||||
)
|
||||
single_page_content_js = os.path.join(
|
||||
single_page_output_path, "content.js"
|
||||
)
|
||||
|
||||
with open(single_page_index_html, 'r') as f:
|
||||
sp_prefix, sp_js, sp_suffix = f.read().split('<!-- BREAK -->')
|
||||
with open(single_page_index_html, "r") as f:
|
||||
sp_prefix, sp_js, sp_suffix = f.read().split("<!-- BREAK -->")
|
||||
|
||||
with open(single_page_index_html, 'w') as f:
|
||||
with open(single_page_index_html, "w") as f:
|
||||
f.write(sp_prefix)
|
||||
f.write(sp_suffix)
|
||||
|
||||
with open(single_page_content_js, 'w') as f:
|
||||
with open(single_page_content_js, "w") as f:
|
||||
if args.minify:
|
||||
import jsmin
|
||||
|
||||
sp_js = jsmin.jsmin(sp_js)
|
||||
f.write(sp_js)
|
||||
|
||||
logging.info(f'Re-building single page for {lang} pdf/test')
|
||||
logging.info(f"Re-building single page for {lang} pdf/test")
|
||||
with util.temp_dir() as test_dir:
|
||||
extra['single_page'] = False
|
||||
cfg.load_dict({
|
||||
'docs_dir': docs_temp_lang,
|
||||
'site_dir': test_dir,
|
||||
'extra': extra,
|
||||
'nav': [
|
||||
{cfg.data.get('site_name'): 'single.md'}
|
||||
]
|
||||
})
|
||||
extra["single_page"] = False
|
||||
cfg.load_dict(
|
||||
{
|
||||
"docs_dir": docs_temp_lang,
|
||||
"site_dir": test_dir,
|
||||
"extra": extra,
|
||||
"nav": [{cfg.data.get("site_name"): "single.md"}],
|
||||
}
|
||||
)
|
||||
mkdocs.commands.build.build(cfg)
|
||||
|
||||
css_in = ' '.join(website.get_css_in(args))
|
||||
js_in = ' '.join(website.get_js_in(args))
|
||||
subprocess.check_call(f'cat {css_in} > {test_dir}/css/base.css', shell=True)
|
||||
subprocess.check_call(f'cat {js_in} > {test_dir}/js/base.js', shell=True)
|
||||
css_in = " ".join(website.get_css_in(args))
|
||||
js_in = " ".join(website.get_js_in(args))
|
||||
subprocess.check_call(
|
||||
f"cat {css_in} > {test_dir}/css/base.css", shell=True
|
||||
)
|
||||
subprocess.check_call(
|
||||
f"cat {js_in} > {test_dir}/js/base.js", shell=True
|
||||
)
|
||||
|
||||
if args.save_raw_single_page:
|
||||
shutil.copytree(test_dir, args.save_raw_single_page)
|
||||
|
||||
logging.info(f'Running tests for {lang}')
|
||||
logging.info(f"Running tests for {lang}")
|
||||
test.test_single_page(
|
||||
os.path.join(test_dir, 'single', 'index.html'), lang)
|
||||
os.path.join(test_dir, "single", "index.html"), lang
|
||||
)
|
||||
|
||||
logging.info(f'Finished building single page version for {lang}')
|
||||
logging.info(f"Finished building single page version for {lang}")
|
||||
|
||||
remove_temporary_files(lang, args)
|
||||
|
@ -8,14 +8,11 @@ import subprocess
|
||||
|
||||
|
||||
def test_single_page(input_path, lang):
|
||||
if not (lang == 'en'):
|
||||
if not (lang == "en"):
|
||||
return
|
||||
|
||||
with open(input_path) as f:
|
||||
soup = bs4.BeautifulSoup(
|
||||
f,
|
||||
features='html.parser'
|
||||
)
|
||||
soup = bs4.BeautifulSoup(f, features="html.parser")
|
||||
|
||||
anchor_points = set()
|
||||
|
||||
@ -23,30 +20,27 @@ def test_single_page(input_path, lang):
|
||||
links_to_nowhere = 0
|
||||
|
||||
for tag in soup.find_all():
|
||||
for anchor_point in [tag.attrs.get('name'), tag.attrs.get('id')]:
|
||||
for anchor_point in [tag.attrs.get("name"), tag.attrs.get("id")]:
|
||||
if anchor_point:
|
||||
anchor_points.add(anchor_point)
|
||||
|
||||
for tag in soup.find_all():
|
||||
href = tag.attrs.get('href')
|
||||
if href and href.startswith('#') and href != '#':
|
||||
href = tag.attrs.get("href")
|
||||
if href and href.startswith("#") and href != "#":
|
||||
if href[1:] not in anchor_points:
|
||||
links_to_nowhere += 1
|
||||
logging.info("Tag %s", tag)
|
||||
logging.info('Link to nowhere: %s' % href)
|
||||
logging.info("Link to nowhere: %s" % href)
|
||||
|
||||
if links_to_nowhere:
|
||||
logging.error(f'Found {links_to_nowhere} links to nowhere in {lang}')
|
||||
logging.error(f"Found {links_to_nowhere} links to nowhere in {lang}")
|
||||
sys.exit(1)
|
||||
|
||||
if len(anchor_points) <= 10:
|
||||
logging.error('Html parsing is probably broken')
|
||||
logging.error("Html parsing is probably broken")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
stream=sys.stderr
|
||||
)
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.DEBUG, stream=sys.stderr)
|
||||
test_single_page(sys.argv[1], sys.argv[2])
|
||||
|
@ -15,7 +15,7 @@ import yaml
|
||||
|
||||
@contextlib.contextmanager
|
||||
def temp_dir():
|
||||
path = tempfile.mkdtemp(dir=os.environ.get('TEMP'))
|
||||
path = tempfile.mkdtemp(dir=os.environ.get("TEMP"))
|
||||
try:
|
||||
yield path
|
||||
finally:
|
||||
@ -34,7 +34,7 @@ def cd(new_cwd):
|
||||
|
||||
def get_free_port():
|
||||
with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
|
||||
s.bind(('', 0))
|
||||
s.bind(("", 0))
|
||||
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||
return s.getsockname()[1]
|
||||
|
||||
@ -61,12 +61,12 @@ def read_md_file(path):
|
||||
meta_text = []
|
||||
content = []
|
||||
if os.path.exists(path):
|
||||
with open(path, 'r') as f:
|
||||
with open(path, "r") as f:
|
||||
for line in f:
|
||||
if line.startswith('---'):
|
||||
if line.startswith("---"):
|
||||
if in_meta:
|
||||
in_meta = False
|
||||
meta = yaml.full_load(''.join(meta_text))
|
||||
meta = yaml.full_load("".join(meta_text))
|
||||
else:
|
||||
in_meta = True
|
||||
else:
|
||||
@ -74,7 +74,7 @@ def read_md_file(path):
|
||||
meta_text.append(line)
|
||||
else:
|
||||
content.append(line)
|
||||
return meta, ''.join(content)
|
||||
return meta, "".join(content)
|
||||
|
||||
|
||||
def write_md_file(path, meta, content):
|
||||
@ -82,13 +82,13 @@ def write_md_file(path, meta, content):
|
||||
if not os.path.exists(dirname):
|
||||
os.makedirs(dirname)
|
||||
|
||||
with open(path, 'w') as f:
|
||||
with open(path, "w") as f:
|
||||
if meta:
|
||||
print('---', file=f)
|
||||
print("---", file=f)
|
||||
yaml.dump(meta, f)
|
||||
print('---', file=f)
|
||||
if not content.startswith('\n'):
|
||||
print('', file=f)
|
||||
print("---", file=f)
|
||||
if not content.startswith("\n"):
|
||||
print("", file=f)
|
||||
f.write(content)
|
||||
|
||||
|
||||
@ -100,7 +100,7 @@ def represent_ordereddict(dumper, data):
|
||||
|
||||
value.append((node_key, node_value))
|
||||
|
||||
return yaml.nodes.MappingNode(u'tag:yaml.org,2002:map', value)
|
||||
return yaml.nodes.MappingNode("tag:yaml.org,2002:map", value)
|
||||
|
||||
|
||||
yaml.add_representer(collections.OrderedDict, represent_ordereddict)
|
||||
@ -109,30 +109,31 @@ yaml.add_representer(collections.OrderedDict, represent_ordereddict)
|
||||
def init_jinja2_filters(env):
|
||||
import amp
|
||||
import website
|
||||
|
||||
chunk_size = 10240
|
||||
env.filters['chunks'] = lambda line: [line[i:i + chunk_size] for i in range(0, len(line), chunk_size)]
|
||||
env.filters['html_to_amp'] = amp.html_to_amp
|
||||
env.filters['adjust_markdown_html'] = website.adjust_markdown_html
|
||||
env.filters['to_rfc882'] = lambda d: datetime.datetime.strptime(d, '%Y-%m-%d').strftime('%a, %d %b %Y %H:%M:%S GMT')
|
||||
env.filters["chunks"] = lambda line: [
|
||||
line[i : i + chunk_size] for i in range(0, len(line), chunk_size)
|
||||
]
|
||||
env.filters["html_to_amp"] = amp.html_to_amp
|
||||
env.filters["adjust_markdown_html"] = website.adjust_markdown_html
|
||||
env.filters["to_rfc882"] = lambda d: datetime.datetime.strptime(
|
||||
d, "%Y-%m-%d"
|
||||
).strftime("%a, %d %b %Y %H:%M:%S GMT")
|
||||
|
||||
|
||||
def init_jinja2_env(args):
|
||||
import mdx_clickhouse
|
||||
|
||||
env = jinja2.Environment(
|
||||
loader=jinja2.FileSystemLoader([
|
||||
args.website_dir,
|
||||
os.path.join(args.docs_dir, '_includes')
|
||||
]),
|
||||
extensions=[
|
||||
'jinja2.ext.i18n',
|
||||
'jinja2_highlight.HighlightExtension'
|
||||
]
|
||||
loader=jinja2.FileSystemLoader(
|
||||
[args.website_dir, os.path.join(args.docs_dir, "_includes")]
|
||||
),
|
||||
extensions=["jinja2.ext.i18n", "jinja2_highlight.HighlightExtension"],
|
||||
)
|
||||
env.extend(jinja2_highlight_cssclass='syntax p-3 my-3')
|
||||
translations_dir = os.path.join(args.website_dir, 'locale')
|
||||
env.extend(jinja2_highlight_cssclass="syntax p-3 my-3")
|
||||
translations_dir = os.path.join(args.website_dir, "locale")
|
||||
env.install_gettext_translations(
|
||||
mdx_clickhouse.get_translations(translations_dir, 'en'),
|
||||
newstyle=True
|
||||
mdx_clickhouse.get_translations(translations_dir, "en"), newstyle=True
|
||||
)
|
||||
init_jinja2_filters(env)
|
||||
return env
|
||||
|
@ -17,108 +17,112 @@ import util
|
||||
|
||||
|
||||
def handle_iframe(iframe, soup):
|
||||
allowed_domains = ['https://www.youtube.com/', 'https://datalens.yandex/']
|
||||
allowed_domains = ["https://www.youtube.com/", "https://datalens.yandex/"]
|
||||
illegal_domain = True
|
||||
iframe_src = iframe.attrs['src']
|
||||
iframe_src = iframe.attrs["src"]
|
||||
for domain in allowed_domains:
|
||||
if iframe_src.startswith(domain):
|
||||
illegal_domain = False
|
||||
break
|
||||
if illegal_domain:
|
||||
raise RuntimeError(f'iframe from illegal domain: {iframe_src}')
|
||||
wrapper = soup.new_tag('div')
|
||||
wrapper.attrs['class'] = ['embed-responsive', 'embed-responsive-16by9']
|
||||
raise RuntimeError(f"iframe from illegal domain: {iframe_src}")
|
||||
wrapper = soup.new_tag("div")
|
||||
wrapper.attrs["class"] = ["embed-responsive", "embed-responsive-16by9"]
|
||||
iframe.insert_before(wrapper)
|
||||
iframe.extract()
|
||||
wrapper.insert(0, iframe)
|
||||
if 'width' in iframe.attrs:
|
||||
del iframe.attrs['width']
|
||||
if 'height' in iframe.attrs:
|
||||
del iframe.attrs['height']
|
||||
iframe.attrs['allow'] = 'accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture'
|
||||
iframe.attrs['class'] = 'embed-responsive-item'
|
||||
iframe.attrs['frameborder'] = '0'
|
||||
iframe.attrs['allowfullscreen'] = '1'
|
||||
if "width" in iframe.attrs:
|
||||
del iframe.attrs["width"]
|
||||
if "height" in iframe.attrs:
|
||||
del iframe.attrs["height"]
|
||||
iframe.attrs[
|
||||
"allow"
|
||||
] = "accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture"
|
||||
iframe.attrs["class"] = "embed-responsive-item"
|
||||
iframe.attrs["frameborder"] = "0"
|
||||
iframe.attrs["allowfullscreen"] = "1"
|
||||
|
||||
|
||||
def adjust_markdown_html(content):
|
||||
soup = bs4.BeautifulSoup(
|
||||
content,
|
||||
features='html.parser'
|
||||
)
|
||||
soup = bs4.BeautifulSoup(content, features="html.parser")
|
||||
|
||||
for a in soup.find_all('a'):
|
||||
a_class = a.attrs.get('class')
|
||||
a_href = a.attrs.get('href')
|
||||
if a_class and 'headerlink' in a_class:
|
||||
a.string = '\xa0'
|
||||
if a_href and a_href.startswith('http'):
|
||||
a.attrs['target'] = '_blank'
|
||||
for a in soup.find_all("a"):
|
||||
a_class = a.attrs.get("class")
|
||||
a_href = a.attrs.get("href")
|
||||
if a_class and "headerlink" in a_class:
|
||||
a.string = "\xa0"
|
||||
if a_href and a_href.startswith("http"):
|
||||
a.attrs["target"] = "_blank"
|
||||
|
||||
for code in soup.find_all('code'):
|
||||
code_class = code.attrs.get('class')
|
||||
for code in soup.find_all("code"):
|
||||
code_class = code.attrs.get("class")
|
||||
if code_class:
|
||||
code.attrs['class'] = code_class + ['syntax']
|
||||
code.attrs["class"] = code_class + ["syntax"]
|
||||
else:
|
||||
code.attrs['class'] = 'syntax'
|
||||
code.attrs["class"] = "syntax"
|
||||
|
||||
for iframe in soup.find_all('iframe'):
|
||||
for iframe in soup.find_all("iframe"):
|
||||
handle_iframe(iframe, soup)
|
||||
|
||||
for img in soup.find_all('img'):
|
||||
if img.attrs.get('alt') == 'iframe':
|
||||
img.name = 'iframe'
|
||||
img.string = ''
|
||||
for img in soup.find_all("img"):
|
||||
if img.attrs.get("alt") == "iframe":
|
||||
img.name = "iframe"
|
||||
img.string = ""
|
||||
handle_iframe(img, soup)
|
||||
continue
|
||||
img_class = img.attrs.get('class')
|
||||
img_class = img.attrs.get("class")
|
||||
if img_class:
|
||||
img.attrs['class'] = img_class + ['img-fluid']
|
||||
img.attrs["class"] = img_class + ["img-fluid"]
|
||||
else:
|
||||
img.attrs['class'] = 'img-fluid'
|
||||
img.attrs["class"] = "img-fluid"
|
||||
|
||||
for details in soup.find_all('details'):
|
||||
for summary in details.find_all('summary'):
|
||||
for details in soup.find_all("details"):
|
||||
for summary in details.find_all("summary"):
|
||||
if summary.parent != details:
|
||||
summary.extract()
|
||||
details.insert(0, summary)
|
||||
|
||||
for dd in soup.find_all('dd'):
|
||||
dd_class = dd.attrs.get('class')
|
||||
for dd in soup.find_all("dd"):
|
||||
dd_class = dd.attrs.get("class")
|
||||
if dd_class:
|
||||
dd.attrs['class'] = dd_class + ['pl-3']
|
||||
dd.attrs["class"] = dd_class + ["pl-3"]
|
||||
else:
|
||||
dd.attrs['class'] = 'pl-3'
|
||||
dd.attrs["class"] = "pl-3"
|
||||
|
||||
for div in soup.find_all('div'):
|
||||
div_class = div.attrs.get('class')
|
||||
is_admonition = div_class and 'admonition' in div.attrs.get('class')
|
||||
for div in soup.find_all("div"):
|
||||
div_class = div.attrs.get("class")
|
||||
is_admonition = div_class and "admonition" in div.attrs.get("class")
|
||||
if is_admonition:
|
||||
for a in div.find_all('a'):
|
||||
a_class = a.attrs.get('class')
|
||||
for a in div.find_all("a"):
|
||||
a_class = a.attrs.get("class")
|
||||
if a_class:
|
||||
a.attrs['class'] = a_class + ['alert-link']
|
||||
a.attrs["class"] = a_class + ["alert-link"]
|
||||
else:
|
||||
a.attrs['class'] = 'alert-link'
|
||||
a.attrs["class"] = "alert-link"
|
||||
|
||||
for p in div.find_all('p'):
|
||||
p_class = p.attrs.get('class')
|
||||
if is_admonition and p_class and ('admonition-title' in p_class):
|
||||
p.attrs['class'] = p_class + ['alert-heading', 'display-4', 'text-reset', 'mb-2']
|
||||
for p in div.find_all("p"):
|
||||
p_class = p.attrs.get("class")
|
||||
if is_admonition and p_class and ("admonition-title" in p_class):
|
||||
p.attrs["class"] = p_class + [
|
||||
"alert-heading",
|
||||
"display-4",
|
||||
"text-reset",
|
||||
"mb-2",
|
||||
]
|
||||
|
||||
if is_admonition:
|
||||
div.attrs['role'] = 'alert'
|
||||
if ('info' in div_class) or ('note' in div_class):
|
||||
mode = 'alert-primary'
|
||||
elif ('attention' in div_class) or ('warning' in div_class):
|
||||
mode = 'alert-warning'
|
||||
elif 'important' in div_class:
|
||||
mode = 'alert-danger'
|
||||
elif 'tip' in div_class:
|
||||
mode = 'alert-info'
|
||||
div.attrs["role"] = "alert"
|
||||
if ("info" in div_class) or ("note" in div_class):
|
||||
mode = "alert-primary"
|
||||
elif ("attention" in div_class) or ("warning" in div_class):
|
||||
mode = "alert-warning"
|
||||
elif "important" in div_class:
|
||||
mode = "alert-danger"
|
||||
elif "tip" in div_class:
|
||||
mode = "alert-info"
|
||||
else:
|
||||
mode = 'alert-secondary'
|
||||
div.attrs['class'] = div_class + ['alert', 'pb-0', 'mb-4', mode]
|
||||
mode = "alert-secondary"
|
||||
div.attrs["class"] = div_class + ["alert", "pb-0", "mb-4", mode]
|
||||
|
||||
return str(soup)
|
||||
|
||||
@ -128,61 +132,63 @@ def minify_html(content):
|
||||
|
||||
|
||||
def build_website(args):
|
||||
logging.info('Building website')
|
||||
logging.info("Building website")
|
||||
env = util.init_jinja2_env(args)
|
||||
|
||||
shutil.copytree(
|
||||
args.website_dir,
|
||||
args.output_dir,
|
||||
ignore=shutil.ignore_patterns(
|
||||
'*.md',
|
||||
'*.sh',
|
||||
'*.css',
|
||||
'*.json',
|
||||
'js/*.js',
|
||||
'build',
|
||||
'docs',
|
||||
'public',
|
||||
'node_modules',
|
||||
'src',
|
||||
'templates',
|
||||
'locale',
|
||||
'.gitkeep'
|
||||
)
|
||||
"*.md",
|
||||
"*.sh",
|
||||
"*.css",
|
||||
"*.json",
|
||||
"js/*.js",
|
||||
"build",
|
||||
"docs",
|
||||
"public",
|
||||
"node_modules",
|
||||
"src",
|
||||
"templates",
|
||||
"locale",
|
||||
".gitkeep",
|
||||
),
|
||||
)
|
||||
|
||||
shutil.copytree(
|
||||
os.path.join(args.website_dir, 'images'),
|
||||
os.path.join(args.output_dir, 'docs', 'images')
|
||||
os.path.join(args.website_dir, "images"),
|
||||
os.path.join(args.output_dir, "docs", "images"),
|
||||
)
|
||||
|
||||
# This file can be requested to check for available ClickHouse releases.
|
||||
shutil.copy2(
|
||||
os.path.join(args.src_dir, 'utils', 'list-versions', 'version_date.tsv'),
|
||||
os.path.join(args.output_dir, 'data', 'version_date.tsv'))
|
||||
os.path.join(args.src_dir, "utils", "list-versions", "version_date.tsv"),
|
||||
os.path.join(args.output_dir, "data", "version_date.tsv"),
|
||||
)
|
||||
|
||||
# This file can be requested to install ClickHouse.
|
||||
shutil.copy2(
|
||||
os.path.join(args.src_dir, 'docs', '_includes', 'install', 'universal.sh'),
|
||||
os.path.join(args.output_dir, 'data', 'install.sh'))
|
||||
os.path.join(args.src_dir, "docs", "_includes", "install", "universal.sh"),
|
||||
os.path.join(args.output_dir, "data", "install.sh"),
|
||||
)
|
||||
|
||||
for root, _, filenames in os.walk(args.output_dir):
|
||||
for filename in filenames:
|
||||
if filename == 'main.html':
|
||||
if filename == "main.html":
|
||||
continue
|
||||
|
||||
path = os.path.join(root, filename)
|
||||
if not filename.endswith('.html'):
|
||||
if not filename.endswith(".html"):
|
||||
continue
|
||||
logging.info('Processing %s', path)
|
||||
with open(path, 'rb') as f:
|
||||
content = f.read().decode('utf-8')
|
||||
logging.info("Processing %s", path)
|
||||
with open(path, "rb") as f:
|
||||
content = f.read().decode("utf-8")
|
||||
|
||||
template = env.from_string(content)
|
||||
content = template.render(args.__dict__)
|
||||
|
||||
with open(path, 'wb') as f:
|
||||
f.write(content.encode('utf-8'))
|
||||
with open(path, "wb") as f:
|
||||
f.write(content.encode("utf-8"))
|
||||
|
||||
|
||||
def get_css_in(args):
|
||||
@ -193,7 +199,7 @@ def get_css_in(args):
|
||||
f"'{args.website_dir}/css/blog.css'",
|
||||
f"'{args.website_dir}/css/docs.css'",
|
||||
f"'{args.website_dir}/css/highlight.css'",
|
||||
f"'{args.website_dir}/css/main.css'"
|
||||
f"'{args.website_dir}/css/main.css'",
|
||||
]
|
||||
|
||||
|
||||
@ -207,42 +213,41 @@ def get_js_in(args):
|
||||
f"'{args.website_dir}/js/index.js'",
|
||||
f"'{args.website_dir}/js/docsearch.js'",
|
||||
f"'{args.website_dir}/js/docs.js'",
|
||||
f"'{args.website_dir}/js/main.js'"
|
||||
f"'{args.website_dir}/js/main.js'",
|
||||
]
|
||||
|
||||
|
||||
def minify_file(path, css_digest, js_digest):
|
||||
if not (
|
||||
path.endswith('.html') or
|
||||
path.endswith('.css')
|
||||
):
|
||||
if not (path.endswith(".html") or path.endswith(".css")):
|
||||
return
|
||||
|
||||
logging.info('Minifying %s', path)
|
||||
with open(path, 'rb') as f:
|
||||
content = f.read().decode('utf-8')
|
||||
if path.endswith('.html'):
|
||||
logging.info("Minifying %s", path)
|
||||
with open(path, "rb") as f:
|
||||
content = f.read().decode("utf-8")
|
||||
if path.endswith(".html"):
|
||||
content = minify_html(content)
|
||||
content = content.replace('base.css?css_digest', f'base.css?{css_digest}')
|
||||
content = content.replace('base.js?js_digest', f'base.js?{js_digest}')
|
||||
content = content.replace("base.css?css_digest", f"base.css?{css_digest}")
|
||||
content = content.replace("base.js?js_digest", f"base.js?{js_digest}")
|
||||
# TODO: restore cssmin
|
||||
# elif path.endswith('.css'):
|
||||
# content = cssmin.cssmin(content)
|
||||
# TODO: restore jsmin
|
||||
# elif path.endswith('.js'):
|
||||
# content = jsmin.jsmin(content)
|
||||
with open(path, 'wb') as f:
|
||||
f.write(content.encode('utf-8'))
|
||||
with open(path, "wb") as f:
|
||||
f.write(content.encode("utf-8"))
|
||||
|
||||
|
||||
def minify_website(args):
|
||||
css_in = ' '.join(get_css_in(args))
|
||||
css_out = f'{args.output_dir}/docs/css/base.css'
|
||||
os.makedirs(f'{args.output_dir}/docs/css')
|
||||
css_in = " ".join(get_css_in(args))
|
||||
css_out = f"{args.output_dir}/docs/css/base.css"
|
||||
os.makedirs(f"{args.output_dir}/docs/css")
|
||||
|
||||
if args.minify and False: # TODO: return closure
|
||||
command = f"purifycss -w '*algolia*' --min {css_in} '{args.output_dir}/*.html' " \
|
||||
command = (
|
||||
f"purifycss -w '*algolia*' --min {css_in} '{args.output_dir}/*.html' "
|
||||
f"'{args.output_dir}/docs/en/**/*.html' '{args.website_dir}/js/**/*.js' > {css_out}"
|
||||
)
|
||||
logging.info(css_in)
|
||||
logging.info(command)
|
||||
output = subprocess.check_output(command, shell=True)
|
||||
@ -251,51 +256,60 @@ def minify_website(args):
|
||||
else:
|
||||
command = f"cat {css_in}"
|
||||
output = subprocess.check_output(command, shell=True)
|
||||
with open(css_out, 'wb+') as f:
|
||||
with open(css_out, "wb+") as f:
|
||||
f.write(output)
|
||||
|
||||
with open(css_out, 'rb') as f:
|
||||
with open(css_out, "rb") as f:
|
||||
css_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
|
||||
|
||||
js_in = ' '.join(get_js_in(args))
|
||||
js_out = f'{args.output_dir}/docs/js/base.js'
|
||||
os.makedirs(f'{args.output_dir}/docs/js')
|
||||
js_in = " ".join(get_js_in(args))
|
||||
js_out = f"{args.output_dir}/docs/js/base.js"
|
||||
os.makedirs(f"{args.output_dir}/docs/js")
|
||||
|
||||
if args.minify and False: # TODO: return closure
|
||||
js_in = [js[1:-1] for js in js_in]
|
||||
closure_args = [
|
||||
'--js', *js_in, '--js_output_file', js_out,
|
||||
'--compilation_level', 'SIMPLE',
|
||||
'--dependency_mode', 'NONE',
|
||||
'--third_party', '--use_types_for_optimization',
|
||||
'--isolation_mode', 'IIFE'
|
||||
"--js",
|
||||
*js_in,
|
||||
"--js_output_file",
|
||||
js_out,
|
||||
"--compilation_level",
|
||||
"SIMPLE",
|
||||
"--dependency_mode",
|
||||
"NONE",
|
||||
"--third_party",
|
||||
"--use_types_for_optimization",
|
||||
"--isolation_mode",
|
||||
"IIFE",
|
||||
]
|
||||
logging.info(closure_args)
|
||||
if closure.run(*closure_args):
|
||||
raise RuntimeError('failed to run closure compiler')
|
||||
with open(js_out, 'r') as f:
|
||||
raise RuntimeError("failed to run closure compiler")
|
||||
with open(js_out, "r") as f:
|
||||
js_content = jsmin.jsmin(f.read())
|
||||
with open(js_out, 'w') as f:
|
||||
with open(js_out, "w") as f:
|
||||
f.write(js_content)
|
||||
|
||||
else:
|
||||
command = f"cat {js_in}"
|
||||
output = subprocess.check_output(command, shell=True)
|
||||
with open(js_out, 'wb+') as f:
|
||||
with open(js_out, "wb+") as f:
|
||||
f.write(output)
|
||||
|
||||
with open(js_out, 'rb') as f:
|
||||
with open(js_out, "rb") as f:
|
||||
js_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
|
||||
logging.info(js_digest)
|
||||
|
||||
if args.minify:
|
||||
logging.info('Minifying website')
|
||||
logging.info("Minifying website")
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
futures = []
|
||||
for root, _, filenames in os.walk(args.output_dir):
|
||||
for filename in filenames:
|
||||
path = os.path.join(root, filename)
|
||||
futures.append(executor.submit(minify_file, path, css_digest, js_digest))
|
||||
futures.append(
|
||||
executor.submit(minify_file, path, css_digest, js_digest)
|
||||
)
|
||||
for future in futures:
|
||||
exc = future.exception()
|
||||
if exc:
|
||||
@ -304,24 +318,28 @@ def minify_website(args):
|
||||
|
||||
|
||||
def process_benchmark_results(args):
|
||||
benchmark_root = os.path.join(args.website_dir, 'benchmark')
|
||||
benchmark_root = os.path.join(args.website_dir, "benchmark")
|
||||
required_keys = {
|
||||
'dbms': ['result'],
|
||||
'hardware': ['result', 'system', 'system_full', 'kind']
|
||||
"dbms": ["result"],
|
||||
"hardware": ["result", "system", "system_full", "kind"],
|
||||
}
|
||||
for benchmark_kind in ['dbms', 'hardware']:
|
||||
for benchmark_kind in ["dbms", "hardware"]:
|
||||
results = []
|
||||
results_root = os.path.join(benchmark_root, benchmark_kind, 'results')
|
||||
results_root = os.path.join(benchmark_root, benchmark_kind, "results")
|
||||
for result in sorted(os.listdir(results_root)):
|
||||
result_file = os.path.join(results_root, result)
|
||||
logging.debug(f'Reading benchmark result from {result_file}')
|
||||
with open(result_file, 'r') as f:
|
||||
logging.debug(f"Reading benchmark result from {result_file}")
|
||||
with open(result_file, "r") as f:
|
||||
result = json.loads(f.read())
|
||||
for item in result:
|
||||
for required_key in required_keys[benchmark_kind]:
|
||||
assert required_key in item, f'No "{required_key}" in {result_file}'
|
||||
assert (
|
||||
required_key in item
|
||||
), f'No "{required_key}" in {result_file}'
|
||||
results += result
|
||||
results_js = os.path.join(args.output_dir, 'benchmark', benchmark_kind, 'results.js')
|
||||
with open(results_js, 'w') as f:
|
||||
results_js = os.path.join(
|
||||
args.output_dir, "benchmark", benchmark_kind, "results.js"
|
||||
)
|
||||
with open(results_js, "w") as f:
|
||||
data = json.dumps(results)
|
||||
f.write(f'var results = {data};')
|
||||
f.write(f"var results = {data};")
|
||||
|
@ -42,6 +42,8 @@ git push
|
||||
使用`utils/check-style/check-style`二进制文件执行一些简单的基于正则表达式的代码样式检查(注意, 它可以在本地运行).
|
||||
如果失败, 按照[代码样式指南](./style.md)修复样式错误.
|
||||
|
||||
使用 [black](https://github.com/psf/black/) 檢查 python 代碼.
|
||||
|
||||
### 报告详情 {#report-details}
|
||||
- [状态页示例](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check.html)
|
||||
- `docs_output.txt`记录了查结果错误(无效表格等), 空白页表示没有错误. [成功结果案例](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check/output.txt)
|
||||
|
@ -3,62 +3,41 @@ toc_priority: 14
|
||||
toc_title: 体验平台
|
||||
---
|
||||
|
||||
# ClickHouse体验平台 {#clickhouse-playground}
|
||||
# ClickHouse Playground {#clickhouse-playground}
|
||||
|
||||
!!! warning "Warning"
|
||||
This service is deprecated and will be replaced in foreseeable future.
|
||||
[ClickHouse Playground](https://play.clickhouse.com/play?user=play) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster.
|
||||
Several example datasets are available in Playground.
|
||||
|
||||
[ClickHouse体验平台](https://play.clickhouse.com?file=welcome) 允许人们通过即时运行查询来尝试ClickHouse,而无需设置他们的服务器或集群。
|
||||
|
||||
体验平台中提供几个示例数据集以及显示ClickHouse特性的示例查询。还有一些ClickHouse LTS版本可供尝试。
|
||||
|
||||
您可以使用任何HTTP客户端对ClickHouse体验平台进行查询,例如[curl](https://curl.haxx.se)或者[wget](https://www.gnu.org/software/wget/),或使用[JDBC](../interfaces/jdbc.md)或者[ODBC](../interfaces/odbc.md)驱动连接。关于支持ClickHouse的软件产品的更多信息详见[here](../interfaces/index.md).
|
||||
You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md).
|
||||
|
||||
## Credentials {#credentials}
|
||||
|
||||
| 参数 | 值 |
|
||||
|:--------------------|:----------------------------------------|
|
||||
| HTTPS端点 | `https://play-api.clickhouse.com:8443` |
|
||||
| TCP端点 | `play-api.clickhouse.com:9440` |
|
||||
| 用户 | `playground` |
|
||||
| 密码 | `clickhouse` |
|
||||
| Parameter | Value |
|
||||
|:--------------------|:-----------------------------------|
|
||||
| HTTPS endpoint | `https://play.clickhouse.com:443/` |
|
||||
| Native TCP endpoint | `play.clickhouse.com:9440` |
|
||||
| User | `explorer` or `play` |
|
||||
| Password | (empty) |
|
||||
|
||||
还有一些带有特定ClickHouse版本的附加信息来试验它们之间的差异(端口和用户/密码与上面相同):
|
||||
## Limitations {#limitations}
|
||||
|
||||
- 20.3 LTS: `play-api-v20-3.clickhouse.com`
|
||||
- 19.14 LTS: `play-api-v19-14.clickhouse.com`
|
||||
The queries are executed as a read-only user. It implies some limitations:
|
||||
|
||||
!!! note "注意"
|
||||
所有这些端点都需要安全的TLS连接。
|
||||
- DDL queries are not allowed
|
||||
- INSERT queries are not allowed
|
||||
|
||||
## 查询限制 {#limitations}
|
||||
The service also have quotas on its usage.
|
||||
|
||||
查询以只读用户身份执行。 这意味着一些局限性:
|
||||
## Examples {#examples}
|
||||
|
||||
- 不允许DDL查询
|
||||
- 不允许插入查询
|
||||
|
||||
还强制执行以下设置:
|
||||
- [max_result_bytes=10485760](../operations/settings/query-complexity/#max-result-bytes)
|
||||
- [max_result_rows=2000](../operations/settings/query-complexity/#setting-max_result_rows)
|
||||
- [result_overflow_mode=break](../operations/settings/query-complexity/#result-overflow-mode)
|
||||
- [max_execution_time=60000](../operations/settings/query-complexity/#max-execution-time)
|
||||
|
||||
ClickHouse体验还有如下:
|
||||
[ClickHouse管理服务](https://cloud.yandex.com/services/managed-clickhouse)
|
||||
实例托管 [Yandex云](https://cloud.yandex.com/)。
|
||||
更多信息 [云提供商](../commercial/cloud.md)。
|
||||
|
||||
## 示例 {#examples}
|
||||
|
||||
使用`curl`连接Https服务:
|
||||
HTTPS endpoint example with `curl`:
|
||||
|
||||
``` bash
|
||||
curl "https://play-api.clickhouse.com:8443/?query=SELECT+'Play+ClickHouse\!';&user=playground&password=clickhouse&database=datasets"
|
||||
curl "https://play.clickhouse.com/?user=explorer" --data-binary "SELECT 'Play ClickHouse'"
|
||||
```
|
||||
|
||||
TCP连接示例[CLI](../interfaces/cli.md):
|
||||
TCP endpoint example with [CLI](../interfaces/cli.md):
|
||||
|
||||
``` bash
|
||||
clickhouse client --secure -h play-api.clickhouse.com --port 9440 -u playground --password clickhouse -q "SELECT 'Play ClickHouse\!'"
|
||||
clickhouse client --secure --host play.clickhouse.com --user explorer
|
||||
```
|
||||
|
@ -1240,7 +1240,8 @@ SELECT * FROM topic1_stream;
|
||||
| `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT` |
|
||||
| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` |
|
||||
| `DATE32` | [Date](../sql-reference/data-types/date.md) | `UINT16` |
|
||||
| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` |
|
||||
| `DATE64` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` |
|
||||
| `TIMESTAMP` | [DateTime64](../sql-reference/data-types/datetime64.md) | `TIMESTAMP` |
|
||||
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `STRING` |
|
||||
| — | [FixedString](../sql-reference/data-types/fixedstring.md) | `STRING` |
|
||||
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
|
||||
@ -1295,7 +1296,8 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_
|
||||
| `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT` |
|
||||
| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` |
|
||||
| `DATE32` | [Date](../sql-reference/data-types/date.md) | `DATE32` |
|
||||
| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `TIMESTAMP` |
|
||||
| `DATE64` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` |
|
||||
| `TIMESTAMP` | [DateTime64](../sql-reference/data-types/datetime64.md) | `TIMESTAMP` |
|
||||
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` |
|
||||
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
|
||||
| `-` | [Array](../sql-reference/data-types/array.md) | `LIST` |
|
||||
|
@ -31,7 +31,7 @@
|
||||
|
||||
- 对于’dict_name’分层字典,查找’child_id’键是否位于’ancestor_id’内(或匹配’ancestor_id’)。返回UInt8。
|
||||
|
||||
## 独裁主义 {#dictgethierarchy}
|
||||
## dictGetHierarchy {#dictgethierarchy}
|
||||
|
||||
`dictGetHierarchy('dict_name', id)`
|
||||
|
||||
|
1
packages/.gitignore
vendored
Normal file
1
packages/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
*/
|
156
packages/build
Executable file
156
packages/build
Executable file
@ -0,0 +1,156 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
# Avoid dependency on locale
|
||||
LC_ALL=C
|
||||
|
||||
# Normalize output directory
|
||||
if [ -n "$OUTPUT_DIR" ]; then
|
||||
OUTPUT_DIR=$(realpath -m "$OUTPUT_DIR")
|
||||
fi
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
cd "$CUR_DIR"
|
||||
|
||||
ROOT_DIR=$(readlink -f "$(git rev-parse --show-cdup)")
|
||||
|
||||
PKG_ROOT='root'
|
||||
|
||||
DEB_ARCH=${DEB_ARCH:-amd64}
|
||||
OUTPUT_DIR=${OUTPUT_DIR:-$ROOT_DIR}
|
||||
[ -d "${OUTPUT_DIR}" ] || mkdir -p "${OUTPUT_DIR}"
|
||||
SANITIZER=${SANITIZER:-""}
|
||||
SOURCE=${SOURCE:-$PKG_ROOT}
|
||||
|
||||
HELP="${0} [--test] [--rpm] [-h|--help]
|
||||
--test - adds '+test' prefix to version
|
||||
--apk - build APK packages
|
||||
--rpm - build RPM packages
|
||||
--tgz - build tarball package
|
||||
--help - show this help and exit
|
||||
|
||||
Used envs:
|
||||
DEB_ARCH='${DEB_ARCH}'
|
||||
OUTPUT_DIR='${OUTPUT_DIR}' - where the artifact will be placed
|
||||
SANITIZER='${SANITIZER}' - if any sanitizer is used, affects version string
|
||||
SOURCE='${SOURCE}' - directory with sources tree
|
||||
VERSION_STRING='${VERSION_STRING}' - the package version to overwrite
|
||||
"
|
||||
|
||||
if [ -z "${VERSION_STRING}" ]; then
|
||||
# Get CLICKHOUSE_VERSION_STRING from the current git repo
|
||||
eval "$("$ROOT_DIR/tests/ci/version_helper.py" -e)"
|
||||
else
|
||||
CLICKHOUSE_VERSION_STRING=${VERSION_STRING}
|
||||
fi
|
||||
export CLICKHOUSE_VERSION_STRING
|
||||
|
||||
|
||||
|
||||
while [[ $1 == --* ]]
|
||||
do
|
||||
case "$1" in
|
||||
--test )
|
||||
VERSION_POSTFIX+='+test'
|
||||
shift ;;
|
||||
--apk )
|
||||
MAKE_APK=1
|
||||
shift ;;
|
||||
--rpm )
|
||||
MAKE_RPM=1
|
||||
shift ;;
|
||||
--tgz )
|
||||
MAKE_TGZ=1
|
||||
shift ;;
|
||||
--help )
|
||||
echo "$HELP"
|
||||
exit ;;
|
||||
* )
|
||||
echo "Unknown option $1"
|
||||
exit 2 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
function deb2tgz {
|
||||
local FILE PKG_NAME PKG_DIR PKG_PATH TARBALL
|
||||
FILE=$1
|
||||
PKG_NAME=${FILE##*/}; PKG_NAME=${PKG_NAME%%_*}
|
||||
PKG_DIR="$PKG_NAME-$CLICKHOUSE_VERSION_STRING"
|
||||
PKG_PATH="$OUTPUT_DIR/$PKG_NAME-$CLICKHOUSE_VERSION_STRING"
|
||||
TARBALL="$OUTPUT_DIR/$PKG_NAME-$CLICKHOUSE_VERSION_STRING-$DEB_ARCH.tgz"
|
||||
rm -rf "$PKG_PATH"
|
||||
dpkg-deb -R "$FILE" "$PKG_PATH"
|
||||
mkdir -p "$PKG_PATH/install"
|
||||
cat > "$PKG_PATH/install/doinst.sh" << 'EOF'
|
||||
#!/bin/sh
|
||||
set -e
|
||||
|
||||
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
|
||||
for filepath in `find $SCRIPTPATH/.. -type f -or -type l | grep -v "\.\./install/"`; do
|
||||
destpath=${filepath##$SCRIPTPATH/..}
|
||||
mkdir -p $(dirname "$destpath")
|
||||
cp -r "$filepath" "$destpath"
|
||||
done
|
||||
EOF
|
||||
chmod +x "$PKG_PATH/install/doinst.sh"
|
||||
if [ -f "$PKG_PATH/DEBIAN/postinst" ]; then
|
||||
tail +2 "$PKG_PATH/DEBIAN/postinst" > "$PKG_PATH/install/doinst.sh"
|
||||
fi
|
||||
rm -rf "$PKG_PATH/DEBIAN"
|
||||
if [ -f "/usr/bin/pigz" ]; then
|
||||
tar --use-compress-program=pigz -cf "$TARBALL" -C "$OUTPUT_DIR" "$PKG_DIR"
|
||||
else
|
||||
tar -czf "$TARBALL" -C "$OUTPUT_DIR" "$PKG_DIR"
|
||||
fi
|
||||
|
||||
rm -r "$PKG_PATH"
|
||||
}
|
||||
|
||||
# Build options
|
||||
if [ -n "$SANITIZER" ]; then
|
||||
if [[ "$SANITIZER" == "address" ]]; then VERSION_POSTFIX+="+asan"
|
||||
elif [[ "$SANITIZER" == "thread" ]]; then VERSION_POSTFIX+="+tsan"
|
||||
elif [[ "$SANITIZER" == "memory" ]]; then VERSION_POSTFIX+="+msan"
|
||||
elif [[ "$SANITIZER" == "undefined" ]]; then VERSION_POSTFIX+="+ubsan"
|
||||
else
|
||||
echo "Unknown value of SANITIZER variable: $SANITIZER"
|
||||
exit 3
|
||||
fi
|
||||
elif [[ $BUILD_TYPE == 'debug' ]]; then
|
||||
VERSION_POSTFIX+="+debug"
|
||||
fi
|
||||
|
||||
if [[ "$PKG_ROOT" != "$SOURCE" ]]; then
|
||||
# packages are built only from PKG_SOURCE
|
||||
rm -rf "./$PKG_ROOT"
|
||||
ln -sf "$SOURCE" "$PKG_SOURCE"
|
||||
fi
|
||||
|
||||
CLICKHOUSE_VERSION_STRING+=$VERSION_POSTFIX
|
||||
echo -e "\nCurrent version is $CLICKHOUSE_VERSION_STRING"
|
||||
|
||||
for config in clickhouse*.yaml; do
|
||||
echo "Building deb package for $config"
|
||||
|
||||
# Preserve package path
|
||||
exec 9>&1
|
||||
PKG_PATH=$(nfpm package --target "$OUTPUT_DIR" --config "$config" --packager deb | tee /dev/fd/9)
|
||||
PKG_PATH=${PKG_PATH##*created package: }
|
||||
exec 9>&-
|
||||
|
||||
if [ -n "$MAKE_APK" ]; then
|
||||
echo "Building apk package for $config"
|
||||
nfpm package --target "$OUTPUT_DIR" --config "$config" --packager apk
|
||||
fi
|
||||
if [ -n "$MAKE_RPM" ]; then
|
||||
echo "Building rpm package for $config"
|
||||
nfpm package --target "$OUTPUT_DIR" --config "$config" --packager rpm
|
||||
fi
|
||||
if [ -n "$MAKE_TGZ" ]; then
|
||||
echo "Building tarball for $config"
|
||||
deb2tgz "$PKG_PATH"
|
||||
fi
|
||||
done
|
||||
|
||||
# vim: ts=4: sw=4: sts=4: expandtab
|
57
packages/clickhouse-client.yaml
Normal file
57
packages/clickhouse-client.yaml
Normal file
@ -0,0 +1,57 @@
|
||||
# package sources should be placed in ${PWD}/root
|
||||
# nfpm should run from the same directory with a config
|
||||
name: "clickhouse-client"
|
||||
arch: "all"
|
||||
platform: "linux"
|
||||
version: "${CLICKHOUSE_VERSION_STRING}"
|
||||
vendor: "ClickHouse Inc."
|
||||
homepage: "https://clickhouse.com"
|
||||
license: "Apache"
|
||||
section: "database"
|
||||
priority: "optional"
|
||||
|
||||
replaces:
|
||||
- clickhouse-compressor
|
||||
conflicts:
|
||||
- clickhouse-compressor
|
||||
|
||||
maintainer: "ClickHouse Dev Team <packages+linux@clickhouse.com>"
|
||||
description: |
|
||||
Client binary for ClickHouse
|
||||
ClickHouse is a column-oriented database management system
|
||||
that allows generating analytical data reports in real time.
|
||||
This package provides clickhouse-client , clickhouse-local and clickhouse-benchmark
|
||||
|
||||
overrides:
|
||||
deb:
|
||||
depends:
|
||||
- clickhouse-common-static (= ${CLICKHOUSE_VERSION_STRING})
|
||||
rpm:
|
||||
depends:
|
||||
- clickhouse-common-static = ${CLICKHOUSE_VERSION_STRING}
|
||||
|
||||
contents:
|
||||
- src: root/etc/clickhouse-client/config.xml
|
||||
dst: /etc/clickhouse-client/config.xml
|
||||
type: config
|
||||
- src: root/usr/bin/clickhouse-benchmark
|
||||
dst: /usr/bin/clickhouse-benchmark
|
||||
- src: root/usr/bin/clickhouse-compressor
|
||||
dst: /usr/bin/clickhouse-compressor
|
||||
- src: root/usr/bin/clickhouse-format
|
||||
dst: /usr/bin/clickhouse-format
|
||||
- src: root/usr/bin/clickhouse-client
|
||||
dst: /usr/bin/clickhouse-client
|
||||
- src: root/usr/bin/clickhouse-local
|
||||
dst: /usr/bin/clickhouse-local
|
||||
- src: root/usr/bin/clickhouse-obfuscator
|
||||
dst: /usr/bin/clickhouse-obfuscator
|
||||
# docs
|
||||
- src: ../AUTHORS
|
||||
dst: /usr/share/doc/clickhouse-client/AUTHORS
|
||||
- src: ../CHANGELOG.md
|
||||
dst: /usr/share/doc/clickhouse-client/CHANGELOG.md
|
||||
- src: ../LICENSE
|
||||
dst: /usr/share/doc/clickhouse-client/LICENSE
|
||||
- src: ../README.md
|
||||
dst: /usr/share/doc/clickhouse-client/README.md
|
34
packages/clickhouse-common-static-dbg.yaml
Normal file
34
packages/clickhouse-common-static-dbg.yaml
Normal file
@ -0,0 +1,34 @@
|
||||
# package sources should be placed in ${PWD}/root
|
||||
# nfpm should run from the same directory with a config
|
||||
name: "clickhouse-common-static-dbg"
|
||||
arch: "${DEB_ARCH}" # amd64, arm64
|
||||
platform: "linux"
|
||||
version: "${CLICKHOUSE_VERSION_STRING}"
|
||||
vendor: "ClickHouse Inc."
|
||||
homepage: "https://clickhouse.com"
|
||||
license: "Apache"
|
||||
section: "database"
|
||||
priority: "optional"
|
||||
|
||||
replaces:
|
||||
- clickhouse-common-dbg
|
||||
conflicts:
|
||||
- clickhouse-common-dbg
|
||||
|
||||
maintainer: "ClickHouse Dev Team <packages+linux@clickhouse.com>"
|
||||
description: |
|
||||
debugging symbols for clickhouse-common-static
|
||||
This package contains the debugging symbols for clickhouse-common.
|
||||
|
||||
contents:
|
||||
- src: root/usr/lib/debug
|
||||
dst: /usr/lib/debug
|
||||
# docs
|
||||
- src: ../AUTHORS
|
||||
dst: /usr/share/doc/clickhouse-common-static-dbg/AUTHORS
|
||||
- src: ../CHANGELOG.md
|
||||
dst: /usr/share/doc/clickhouse-common-static-dbg/CHANGELOG.md
|
||||
- src: ../LICENSE
|
||||
dst: /usr/share/doc/clickhouse-common-static-dbg/LICENSE
|
||||
- src: ../README.md
|
||||
dst: /usr/share/doc/clickhouse-common-static-dbg/README.md
|
48
packages/clickhouse-common-static.yaml
Normal file
48
packages/clickhouse-common-static.yaml
Normal file
@ -0,0 +1,48 @@
|
||||
# package sources should be placed in ${PWD}/root
|
||||
# nfpm should run from the same directory with a config
|
||||
name: "clickhouse-common-static"
|
||||
arch: "${DEB_ARCH}" # amd64, arm64
|
||||
platform: "linux"
|
||||
version: "${CLICKHOUSE_VERSION_STRING}"
|
||||
vendor: "ClickHouse Inc."
|
||||
homepage: "https://clickhouse.com"
|
||||
license: "Apache"
|
||||
section: "database"
|
||||
priority: "optional"
|
||||
|
||||
replaces:
|
||||
- clickhouse-common
|
||||
- clickhouse-server-base
|
||||
provides:
|
||||
- clickhouse-common
|
||||
- clickhouse-server-base
|
||||
suggests:
|
||||
- clickhouse-common-static-dbg
|
||||
|
||||
maintainer: "ClickHouse Dev Team <packages+linux@clickhouse.com>"
|
||||
description: |
|
||||
Common files for ClickHouse
|
||||
ClickHouse is a column-oriented database management system
|
||||
that allows generating analytical data reports in real time.
|
||||
This package provides common files for both clickhouse server and client
|
||||
|
||||
contents:
|
||||
- src: root/usr/bin/clickhouse
|
||||
dst: /usr/bin/clickhouse
|
||||
- src: root/usr/bin/clickhouse-odbc-bridge
|
||||
dst: /usr/bin/clickhouse-odbc-bridge
|
||||
- src: root/usr/bin/clickhouse-library-bridge
|
||||
dst: /usr/bin/clickhouse-library-bridge
|
||||
- src: root/usr/bin/clickhouse-extract-from-config
|
||||
dst: /usr/bin/clickhouse-extract-from-config
|
||||
- src: root/usr/share/bash-completion/completions
|
||||
dst: /usr/share/bash-completion/completions
|
||||
# docs
|
||||
- src: ../AUTHORS
|
||||
dst: /usr/share/doc/clickhouse-common-static/AUTHORS
|
||||
- src: ../CHANGELOG.md
|
||||
dst: /usr/share/doc/clickhouse-common-static/CHANGELOG.md
|
||||
- src: ../LICENSE
|
||||
dst: /usr/share/doc/clickhouse-common-static/LICENSE
|
||||
- src: ../README.md
|
||||
dst: /usr/share/doc/clickhouse-common-static/README.md
|
227
packages/clickhouse-server.init
Executable file
227
packages/clickhouse-server.init
Executable file
@ -0,0 +1,227 @@
|
||||
#!/bin/sh
|
||||
### BEGIN INIT INFO
|
||||
# Provides: clickhouse-server
|
||||
# Default-Start: 2 3 4 5
|
||||
# Default-Stop: 0 1 6
|
||||
# Should-Start: $time $network
|
||||
# Should-Stop: $network
|
||||
# Short-Description: clickhouse-server daemon
|
||||
### END INIT INFO
|
||||
#
|
||||
# NOTES:
|
||||
# - Should-* -- script can start if the listed facilities are missing, unlike Required-*
|
||||
#
|
||||
# For the documentation [1]:
|
||||
#
|
||||
# [1]: https://wiki.debian.org/LSBInitScripts
|
||||
|
||||
CLICKHOUSE_USER=clickhouse
|
||||
CLICKHOUSE_GROUP=${CLICKHOUSE_USER}
|
||||
SHELL=/bin/bash
|
||||
PROGRAM=clickhouse-server
|
||||
CLICKHOUSE_GENERIC_PROGRAM=clickhouse
|
||||
CLICKHOUSE_PROGRAM_ENV=""
|
||||
EXTRACT_FROM_CONFIG=${CLICKHOUSE_GENERIC_PROGRAM}-extract-from-config
|
||||
CLICKHOUSE_CONFDIR=/etc/$PROGRAM
|
||||
CLICKHOUSE_LOGDIR=/var/log/clickhouse-server
|
||||
CLICKHOUSE_LOGDIR_USER=root
|
||||
CLICKHOUSE_DATADIR=/var/lib/clickhouse
|
||||
if [ -d "/var/lock" ]; then
|
||||
LOCALSTATEDIR=/var/lock
|
||||
else
|
||||
LOCALSTATEDIR=/run/lock
|
||||
fi
|
||||
|
||||
if [ ! -d "$LOCALSTATEDIR" ]; then
|
||||
mkdir -p "$LOCALSTATEDIR"
|
||||
fi
|
||||
|
||||
CLICKHOUSE_BINDIR=/usr/bin
|
||||
CLICKHOUSE_CRONFILE=/etc/cron.d/clickhouse-server
|
||||
CLICKHOUSE_CONFIG=$CLICKHOUSE_CONFDIR/config.xml
|
||||
LOCKFILE=$LOCALSTATEDIR/$PROGRAM
|
||||
CLICKHOUSE_PIDDIR=/var/run/$PROGRAM
|
||||
CLICKHOUSE_PIDFILE="$CLICKHOUSE_PIDDIR/$PROGRAM.pid"
|
||||
# CLICKHOUSE_STOP_TIMEOUT=60 # Disabled by default. Place to /etc/default/clickhouse if you need.
|
||||
|
||||
# Some systems lack "flock"
|
||||
command -v flock >/dev/null && FLOCK=flock
|
||||
|
||||
# Override defaults from optional config file
|
||||
test -f /etc/default/clickhouse && . /etc/default/clickhouse
|
||||
|
||||
|
||||
die()
|
||||
{
|
||||
echo $1 >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
|
||||
# Check that configuration file is Ok.
|
||||
check_config()
|
||||
{
|
||||
if [ -x "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG" ]; then
|
||||
su -s $SHELL ${CLICKHOUSE_USER} -c "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path" >/dev/null || die "Configuration file ${CLICKHOUSE_CONFIG} doesn't parse successfully. Won't restart server. You may use forcerestart if you are sure.";
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
initdb()
|
||||
{
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} install --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
|
||||
}
|
||||
|
||||
|
||||
start()
|
||||
{
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} start --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
|
||||
}
|
||||
|
||||
|
||||
stop()
|
||||
{
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} stop --pid-path "${CLICKHOUSE_PIDDIR}"
|
||||
}
|
||||
|
||||
|
||||
restart()
|
||||
{
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} restart --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
|
||||
}
|
||||
|
||||
|
||||
forcestop()
|
||||
{
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} stop --force --pid-path "${CLICKHOUSE_PIDDIR}"
|
||||
}
|
||||
|
||||
|
||||
service_or_func()
|
||||
{
|
||||
if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then
|
||||
systemctl $1 $PROGRAM
|
||||
else
|
||||
$1
|
||||
fi
|
||||
}
|
||||
|
||||
forcerestart()
|
||||
{
|
||||
forcestop
|
||||
# Should not use 'start' function if systemd active
|
||||
service_or_func start
|
||||
}
|
||||
|
||||
use_cron()
|
||||
{
|
||||
# 1. running systemd
|
||||
if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then
|
||||
return 1
|
||||
fi
|
||||
# 2. disabled by config
|
||||
if [ -z "$CLICKHOUSE_CRONFILE" ]; then
|
||||
return 2
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
# returns false if cron disabled (with systemd)
|
||||
enable_cron()
|
||||
{
|
||||
use_cron && sed -i 's/^#*//' "$CLICKHOUSE_CRONFILE"
|
||||
}
|
||||
# returns false if cron disabled (with systemd)
|
||||
disable_cron()
|
||||
{
|
||||
use_cron && sed -i 's/^#*/#/' "$CLICKHOUSE_CRONFILE"
|
||||
}
|
||||
|
||||
|
||||
is_cron_disabled()
|
||||
{
|
||||
use_cron || return 0
|
||||
|
||||
# Assumes that either no lines are commented or all lines are commented.
|
||||
# Also please note, that currently cron file for ClickHouse has only one line (but some time ago there was more).
|
||||
grep -q -E '^#' "$CLICKHOUSE_CRONFILE";
|
||||
}
|
||||
|
||||
|
||||
main()
|
||||
{
|
||||
# See how we were called.
|
||||
EXIT_STATUS=0
|
||||
case "$1" in
|
||||
start)
|
||||
service_or_func start && enable_cron
|
||||
;;
|
||||
stop)
|
||||
disable_cron
|
||||
service_or_func stop
|
||||
;;
|
||||
restart)
|
||||
service_or_func restart && enable_cron
|
||||
;;
|
||||
forcestop)
|
||||
disable_cron
|
||||
forcestop
|
||||
;;
|
||||
forcerestart)
|
||||
forcerestart && enable_cron
|
||||
;;
|
||||
reload)
|
||||
service_or_func restart
|
||||
;;
|
||||
condstart)
|
||||
service_or_func start
|
||||
;;
|
||||
condstop)
|
||||
service_or_func stop
|
||||
;;
|
||||
condrestart)
|
||||
service_or_func restart
|
||||
;;
|
||||
condreload)
|
||||
service_or_func restart
|
||||
;;
|
||||
initdb)
|
||||
initdb
|
||||
;;
|
||||
enable_cron)
|
||||
enable_cron
|
||||
;;
|
||||
disable_cron)
|
||||
disable_cron
|
||||
;;
|
||||
*)
|
||||
echo "Usage: $0 {start|stop|status|restart|forcestop|forcerestart|reload|condstart|condstop|condrestart|condreload|initdb}"
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
|
||||
exit $EXIT_STATUS
|
||||
}
|
||||
|
||||
|
||||
status()
|
||||
{
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} status --pid-path "${CLICKHOUSE_PIDDIR}"
|
||||
}
|
||||
|
||||
|
||||
# Running commands without need of locking
|
||||
case "$1" in
|
||||
status)
|
||||
status
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
|
||||
|
||||
(
|
||||
if $FLOCK -n 9; then
|
||||
main "$@"
|
||||
else
|
||||
echo "Init script is already running" && exit 1
|
||||
fi
|
||||
) 9> $LOCKFILE
|
47
packages/clickhouse-server.postinstall
Normal file
47
packages/clickhouse-server.postinstall
Normal file
@ -0,0 +1,47 @@
|
||||
#!/bin/sh
|
||||
set -e
|
||||
# set -x
|
||||
|
||||
PROGRAM=clickhouse-server
|
||||
CLICKHOUSE_USER=${CLICKHOUSE_USER:=clickhouse}
|
||||
CLICKHOUSE_GROUP=${CLICKHOUSE_GROUP:=${CLICKHOUSE_USER}}
|
||||
# Please note that we don't support paths with whitespaces. This is rather ignorant.
|
||||
CLICKHOUSE_CONFDIR=${CLICKHOUSE_CONFDIR:=/etc/clickhouse-server}
|
||||
CLICKHOUSE_DATADIR=${CLICKHOUSE_DATADIR:=/var/lib/clickhouse}
|
||||
CLICKHOUSE_LOGDIR=${CLICKHOUSE_LOGDIR:=/var/log/clickhouse-server}
|
||||
CLICKHOUSE_BINDIR=${CLICKHOUSE_BINDIR:=/usr/bin}
|
||||
CLICKHOUSE_GENERIC_PROGRAM=${CLICKHOUSE_GENERIC_PROGRAM:=clickhouse}
|
||||
EXTRACT_FROM_CONFIG=${CLICKHOUSE_GENERIC_PROGRAM}-extract-from-config
|
||||
CLICKHOUSE_CONFIG=$CLICKHOUSE_CONFDIR/config.xml
|
||||
CLICKHOUSE_PIDDIR=/var/run/$PROGRAM
|
||||
|
||||
[ -f /usr/share/debconf/confmodule ] && . /usr/share/debconf/confmodule
|
||||
[ -f /etc/default/clickhouse ] && . /etc/default/clickhouse
|
||||
|
||||
if [ ! -f "/etc/debian_version" ]; then
|
||||
not_deb_os=1
|
||||
fi
|
||||
|
||||
if [ "$1" = configure ] || [ -n "$not_deb_os" ]; then
|
||||
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} install --user "${CLICKHOUSE_USER}" --group "${CLICKHOUSE_GROUP}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}" --log-path "${CLICKHOUSE_LOGDIR}" --data-path "${CLICKHOUSE_DATADIR}"
|
||||
|
||||
if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then
|
||||
# if old rc.d service present - remove it
|
||||
if [ -x "/etc/init.d/clickhouse-server" ] && [ -x "/usr/sbin/update-rc.d" ]; then
|
||||
/usr/sbin/update-rc.d clickhouse-server remove
|
||||
fi
|
||||
|
||||
/bin/systemctl daemon-reload
|
||||
/bin/systemctl enable clickhouse-server
|
||||
else
|
||||
# If you downgrading to version older than 1.1.54336 run: systemctl disable clickhouse-server
|
||||
if [ -x "/etc/init.d/clickhouse-server" ]; then
|
||||
if [ -x "/usr/sbin/update-rc.d" ]; then
|
||||
/usr/sbin/update-rc.d clickhouse-server defaults 19 19 >/dev/null || exit $?
|
||||
else
|
||||
echo # Other OS
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
27
packages/clickhouse-server.service
Normal file
27
packages/clickhouse-server.service
Normal file
@ -0,0 +1,27 @@
|
||||
[Unit]
|
||||
Description=ClickHouse Server (analytic DBMS for big data)
|
||||
Requires=network-online.target
|
||||
# NOTE: that After/Wants=time-sync.target is not enough, you need to ensure
|
||||
# that the time was adjusted already, if you use systemd-timesyncd you are
|
||||
# safe, but if you use ntp or some other daemon, you should configure it
|
||||
# additionaly.
|
||||
After=time-sync.target network-online.target
|
||||
Wants=time-sync.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=clickhouse
|
||||
Group=clickhouse
|
||||
Restart=always
|
||||
RestartSec=30
|
||||
RuntimeDirectory=clickhouse-server
|
||||
ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=/run/clickhouse-server/clickhouse-server.pid
|
||||
# Minus means that this file is optional.
|
||||
EnvironmentFile=-/etc/default/clickhouse
|
||||
LimitCORE=infinity
|
||||
LimitNOFILE=500000
|
||||
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE
|
||||
|
||||
[Install]
|
||||
# ClickHouse should not start from the rescue shell (rescue.target).
|
||||
WantedBy=multi-user.target
|
68
packages/clickhouse-server.yaml
Normal file
68
packages/clickhouse-server.yaml
Normal file
@ -0,0 +1,68 @@
|
||||
# package sources should be placed in ${PWD}/root
|
||||
# nfpm should run from the same directory with a config
|
||||
name: "clickhouse-server"
|
||||
arch: "all"
|
||||
platform: "linux"
|
||||
version: "${CLICKHOUSE_VERSION_STRING}"
|
||||
vendor: "ClickHouse Inc."
|
||||
homepage: "https://clickhouse.com"
|
||||
license: "Apache"
|
||||
section: "database"
|
||||
priority: "optional"
|
||||
|
||||
conflicts:
|
||||
- clickhouse-keeper
|
||||
depends:
|
||||
- adduser
|
||||
replaces:
|
||||
- clickhouse-server-common
|
||||
- clickhouse-server-base
|
||||
provides:
|
||||
- clickhouse-server-common
|
||||
recommends:
|
||||
- libcap2-bin
|
||||
|
||||
maintainer: "ClickHouse Dev Team <packages+linux@clickhouse.com>"
|
||||
description: |
|
||||
Server binary for ClickHouse
|
||||
ClickHouse is a column-oriented database management system
|
||||
that allows generating analytical data reports in real time.
|
||||
This package provides clickhouse common configuration files
|
||||
|
||||
overrides:
|
||||
deb:
|
||||
depends:
|
||||
- clickhouse-common-static (= ${CLICKHOUSE_VERSION_STRING})
|
||||
rpm:
|
||||
depends:
|
||||
- clickhouse-common-static = ${CLICKHOUSE_VERSION_STRING}
|
||||
|
||||
contents:
|
||||
- src: root/etc/clickhouse-server
|
||||
dst: /etc/clickhouse-server
|
||||
type: config
|
||||
- src: clickhouse-server.init
|
||||
dst: /etc/init.d/clickhouse-server
|
||||
- src: clickhouse-server.service
|
||||
dst: /lib/systemd/system/clickhouse-server.service
|
||||
- src: root/usr/bin/clickhouse-copier
|
||||
dst: /usr/bin/clickhouse-copier
|
||||
- src: clickhouse
|
||||
dst: /usr/bin/clickhouse-keeper
|
||||
type: symlink
|
||||
- src: root/usr/bin/clickhouse-report
|
||||
dst: /usr/bin/clickhouse-report
|
||||
- src: root/usr/bin/clickhouse-server
|
||||
dst: /usr/bin/clickhouse-server
|
||||
# docs
|
||||
- src: ../AUTHORS
|
||||
dst: /usr/share/doc/clickhouse-server/AUTHORS
|
||||
- src: ../CHANGELOG.md
|
||||
dst: /usr/share/doc/clickhouse-server/CHANGELOG.md
|
||||
- src: ../LICENSE
|
||||
dst: /usr/share/doc/clickhouse-server/LICENSE
|
||||
- src: ../README.md
|
||||
dst: /usr/share/doc/clickhouse-server/README.md
|
||||
|
||||
scripts:
|
||||
postinstall: ./clickhouse-server.postinstall
|
@ -460,10 +460,6 @@ else ()
|
||||
list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper-converter)
|
||||
endif ()
|
||||
|
||||
if (NOT BUILD_STRIPPED_BINARIES_PREFIX)
|
||||
install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
|
||||
endif()
|
||||
|
||||
add_custom_target (clickhouse-bundle ALL DEPENDS ${CLICKHOUSE_BUNDLE})
|
||||
|
||||
if (USE_GDB_ADD_INDEX)
|
||||
@ -474,11 +470,19 @@ else ()
|
||||
add_custom_command(TARGET clickhouse POST_BUILD COMMAND ./clickhouse hash-binary > hash && ${OBJCOPY_PATH} --add-section .note.ClickHouse.hash=hash clickhouse COMMENT "Adding .note.ClickHouse.hash to clickhouse" VERBATIM)
|
||||
endif()
|
||||
|
||||
if (BUILD_STRIPPED_BINARIES_PREFIX)
|
||||
clickhouse_strip_binary(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${BUILD_STRIPPED_BINARIES_PREFIX} BINARY_PATH clickhouse)
|
||||
if (INSTALL_STRIPPED_BINARIES)
|
||||
clickhouse_strip_binary(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT} BINARY_PATH clickhouse)
|
||||
else()
|
||||
install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (NOT INSTALL_STRIPPED_BINARIES)
|
||||
# Install dunny debug directory
|
||||
# TODO: move logic to every place where clickhouse_strip_binary is used
|
||||
add_custom_command(TARGET clickhouse POST_BUILD COMMAND echo > .empty )
|
||||
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/.empty" DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/.empty)
|
||||
endif()
|
||||
|
||||
|
||||
if (ENABLE_TESTS)
|
||||
|
@ -820,6 +820,7 @@ void Client::addOptions(OptionsDescription & options_description)
|
||||
("opentelemetry-tracestate", po::value<std::string>(), "OpenTelemetry tracestate header as described by W3C Trace Context recommendation")
|
||||
|
||||
("no-warnings", "disable warnings when client connects to server")
|
||||
("fake-drop", "Ignore all DROP queries, should be used only for testing")
|
||||
;
|
||||
|
||||
/// Commandline options related to external tables.
|
||||
@ -952,6 +953,8 @@ void Client::processOptions(const OptionsDescription & options_description,
|
||||
config().setBool("compression", options["compression"].as<bool>());
|
||||
if (options.count("no-warnings"))
|
||||
config().setBool("no-warnings", true);
|
||||
if (options.count("fake-drop"))
|
||||
fake_drop = true;
|
||||
|
||||
if ((query_fuzzer_runs = options["query-fuzzer-runs"].as<int>()))
|
||||
{
|
||||
|
@ -792,9 +792,9 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
|
||||
fmt::print("Setting capabilities for clickhouse binary. This is optional.\n");
|
||||
std::string command = fmt::format("command -v setcap >/dev/null"
|
||||
" && command -v capsh >/dev/null"
|
||||
" && capsh --has-p=cap_net_admin,cap_ipc_lock,cap_sys_nice+ep >/dev/null 2>&1"
|
||||
" && setcap 'cap_net_admin,cap_ipc_lock,cap_sys_nice+ep' {0}"
|
||||
" || echo \"Cannot set 'net_admin' or 'ipc_lock' or 'sys_nice' capability for clickhouse binary."
|
||||
" && capsh --has-p=cap_net_admin,cap_ipc_lock,cap_sys_nice,cap_net_bind_service+ep >/dev/null 2>&1"
|
||||
" && setcap 'cap_net_admin,cap_ipc_lock,cap_sys_nice,cap_net_bind_service+ep' {0}"
|
||||
" || echo \"Cannot set 'net_admin' or 'ipc_lock' or 'sys_nice' or 'net_bind_service' capability for clickhouse binary."
|
||||
" This is optional. Taskstats accounting will be disabled."
|
||||
" To enable taskstats accounting you may add the required capability later manually.\"",
|
||||
fs::canonical(main_bin_path).string());
|
||||
|
@ -24,10 +24,8 @@ target_link_libraries(clickhouse-library-bridge PRIVATE
|
||||
|
||||
set_target_properties(clickhouse-library-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..)
|
||||
|
||||
if (BUILD_STRIPPED_BINARIES_PREFIX)
|
||||
clickhouse_strip_binary(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${BUILD_STRIPPED_BINARIES_PREFIX} BINARY_PATH ../clickhouse-library-bridge)
|
||||
endif()
|
||||
|
||||
if (NOT BUILD_STRIPPED_BINARIES_PREFIX)
|
||||
if (INSTALL_STRIPPED_BINARIES)
|
||||
clickhouse_strip_binary(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-library-bridge)
|
||||
else()
|
||||
install(TARGETS clickhouse-library-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
|
||||
endif()
|
||||
|
@ -39,11 +39,9 @@ if (USE_GDB_ADD_INDEX)
|
||||
add_custom_command(TARGET clickhouse-odbc-bridge POST_BUILD COMMAND ${GDB_ADD_INDEX_EXE} ../clickhouse-odbc-bridge COMMENT "Adding .gdb-index to clickhouse-odbc-bridge" VERBATIM)
|
||||
endif()
|
||||
|
||||
if (BUILD_STRIPPED_BINARIES_PREFIX)
|
||||
clickhouse_strip_binary(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${BUILD_STRIPPED_BINARIES_PREFIX} BINARY_PATH ../clickhouse-odbc-bridge)
|
||||
endif()
|
||||
|
||||
if (NOT BUILD_STRIPPED_BINARIES_PREFIX)
|
||||
if (INSTALL_STRIPPED_BINARIES)
|
||||
clickhouse_strip_binary(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-odbc-bridge)
|
||||
else()
|
||||
install(TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
|
||||
endif()
|
||||
|
||||
|
@ -45,6 +45,7 @@
|
||||
#include <Core/ServerUUID.h>
|
||||
#include <IO/HTTPCommon.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/IOThreadPool.h>
|
||||
#include <IO/UseSSL.h>
|
||||
#include <Interpreters/AsynchronousMetrics.h>
|
||||
#include <Interpreters/DDLWorker.h>
|
||||
@ -554,6 +555,10 @@ if (ThreadFuzzer::instance().isEffective())
|
||||
config().getUInt("thread_pool_queue_size", 10000)
|
||||
);
|
||||
|
||||
IOThreadPool::initialize(
|
||||
config().getUInt("max_io_thread_pool_size", 100),
|
||||
config().getUInt("max_io_thread_pool_free_size", 0),
|
||||
config().getUInt("io_thread_pool_queue_size", 10000));
|
||||
|
||||
/// Initialize global local cache for remote filesystem.
|
||||
if (config().has("local_cache_for_remote_fs"))
|
||||
@ -1022,8 +1027,8 @@ if (ThreadFuzzer::instance().isEffective())
|
||||
std::make_unique<TCPServer>(
|
||||
new KeeperTCPHandlerFactory(
|
||||
config_getter, global_context->getKeeperDispatcher(),
|
||||
global_context->getSettingsRef().receive_timeout,
|
||||
global_context->getSettingsRef().send_timeout,
|
||||
global_context->getSettingsRef().receive_timeout.totalSeconds(),
|
||||
global_context->getSettingsRef().send_timeout.totalSeconds(),
|
||||
false), server_pool, socket));
|
||||
});
|
||||
|
||||
@ -1045,8 +1050,8 @@ if (ThreadFuzzer::instance().isEffective())
|
||||
std::make_unique<TCPServer>(
|
||||
new KeeperTCPHandlerFactory(
|
||||
config_getter, global_context->getKeeperDispatcher(),
|
||||
global_context->getSettingsRef().receive_timeout,
|
||||
global_context->getSettingsRef().send_timeout, true), server_pool, socket));
|
||||
global_context->getSettingsRef().receive_timeout.totalSeconds(),
|
||||
global_context->getSettingsRef().send_timeout.totalSeconds(), true), server_pool, socket));
|
||||
#else
|
||||
UNUSED(port);
|
||||
throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.",
|
||||
|
112
src/Backups/ArchiveBackup.cpp
Normal file
112
src/Backups/ArchiveBackup.cpp
Normal file
@ -0,0 +1,112 @@
|
||||
#include <Backups/ArchiveBackup.h>
|
||||
#include <Disks/IDisk.h>
|
||||
#include <IO/ReadBufferFromFileBase.h>
|
||||
#include <IO/WriteBufferFromFileBase.h>
|
||||
#include <IO/Archives/IArchiveReader.h>
|
||||
#include <IO/Archives/IArchiveWriter.h>
|
||||
#include <IO/Archives/createArchiveReader.h>
|
||||
#include <IO/Archives/createArchiveWriter.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
|
||||
ArchiveBackup::ArchiveBackup(
|
||||
const String & backup_name_,
|
||||
const DiskPtr & disk_,
|
||||
const String & path_,
|
||||
const ContextPtr & context_,
|
||||
const std::optional<BackupInfo> & base_backup_info_)
|
||||
: BackupImpl(backup_name_, context_, base_backup_info_), disk(disk_), path(path_)
|
||||
{
|
||||
}
|
||||
|
||||
ArchiveBackup::~ArchiveBackup()
|
||||
{
|
||||
close();
|
||||
}
|
||||
|
||||
bool ArchiveBackup::backupExists() const
|
||||
{
|
||||
return disk ? disk->exists(path) : fs::exists(path);
|
||||
}
|
||||
|
||||
void ArchiveBackup::openImpl(OpenMode open_mode_)
|
||||
{
|
||||
/// mutex is already locked
|
||||
if (open_mode_ == OpenMode::WRITE)
|
||||
{
|
||||
if (disk)
|
||||
writer = createArchiveWriter(path, disk->writeFile(path));
|
||||
else
|
||||
writer = createArchiveWriter(path);
|
||||
|
||||
writer->setCompression(compression_method, compression_level);
|
||||
writer->setPassword(password);
|
||||
}
|
||||
else if (open_mode_ == OpenMode::READ)
|
||||
{
|
||||
if (disk)
|
||||
{
|
||||
auto archive_read_function = [d = disk, p = path]() -> std::unique_ptr<SeekableReadBuffer> { return d->readFile(p); };
|
||||
size_t archive_size = disk->getFileSize(path);
|
||||
reader = createArchiveReader(path, archive_read_function, archive_size);
|
||||
}
|
||||
else
|
||||
reader = createArchiveReader(path);
|
||||
|
||||
reader->setPassword(password);
|
||||
}
|
||||
}
|
||||
|
||||
void ArchiveBackup::closeImpl(bool writing_finalized_)
|
||||
{
|
||||
/// mutex is already locked
|
||||
if (writer && writer->isWritingFile())
|
||||
throw Exception("There is some writing unfinished on close", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
writer.reset();
|
||||
reader.reset();
|
||||
|
||||
if ((getOpenModeNoLock() == OpenMode::WRITE) && !writing_finalized_)
|
||||
fs::remove(path);
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBuffer> ArchiveBackup::readFileImpl(const String & file_name) const
|
||||
{
|
||||
/// mutex is already locked
|
||||
return reader->readFile(file_name);
|
||||
}
|
||||
|
||||
std::unique_ptr<WriteBuffer> ArchiveBackup::writeFileImpl(const String & file_name)
|
||||
{
|
||||
/// mutex is already locked
|
||||
return writer->writeFile(file_name);
|
||||
}
|
||||
|
||||
void ArchiveBackup::setCompression(const String & compression_method_, int compression_level_)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
compression_method = compression_method_;
|
||||
compression_level = compression_level_;
|
||||
if (writer)
|
||||
writer->setCompression(compression_method, compression_level);
|
||||
}
|
||||
|
||||
void ArchiveBackup::setPassword(const String & password_)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
password = password_;
|
||||
if (writer)
|
||||
writer->setPassword(password);
|
||||
if (reader)
|
||||
reader->setPassword(password);
|
||||
}
|
||||
|
||||
}
|
52
src/Backups/ArchiveBackup.h
Normal file
52
src/Backups/ArchiveBackup.h
Normal file
@ -0,0 +1,52 @@
|
||||
#pragma once
|
||||
|
||||
#include <Backups/BackupImpl.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class IDisk;
|
||||
using DiskPtr = std::shared_ptr<IDisk>;
|
||||
class IArchiveReader;
|
||||
class IArchiveWriter;
|
||||
|
||||
/// Stores a backup as a single .zip file.
|
||||
class ArchiveBackup : public BackupImpl
|
||||
{
|
||||
public:
|
||||
/// `disk`_ is allowed to be nullptr and that means the `path_` is a path in the local filesystem.
|
||||
ArchiveBackup(
|
||||
const String & backup_name_,
|
||||
const DiskPtr & disk_,
|
||||
const String & path_,
|
||||
const ContextPtr & context_,
|
||||
const std::optional<BackupInfo> & base_backup_info_ = {});
|
||||
|
||||
~ArchiveBackup() override;
|
||||
|
||||
static constexpr const int kDefaultCompressionLevel = -1;
|
||||
|
||||
/// Sets compression method and level.
|
||||
void setCompression(const String & compression_method_, int compression_level_ = kDefaultCompressionLevel);
|
||||
|
||||
/// Sets password.
|
||||
void setPassword(const String & password_);
|
||||
|
||||
private:
|
||||
bool backupExists() const override;
|
||||
void openImpl(OpenMode open_mode_) override;
|
||||
void closeImpl(bool writing_finalized_) override;
|
||||
bool supportsWritingInMultipleThreads() const override { return false; }
|
||||
std::unique_ptr<ReadBuffer> readFileImpl(const String & file_name) const override;
|
||||
std::unique_ptr<WriteBuffer> writeFileImpl(const String & file_name) override;
|
||||
|
||||
const DiskPtr disk;
|
||||
const String path;
|
||||
std::shared_ptr<IArchiveReader> reader;
|
||||
std::shared_ptr<IArchiveWriter> writer;
|
||||
String compression_method;
|
||||
int compression_level = kDefaultCompressionLevel;
|
||||
String password;
|
||||
};
|
||||
|
||||
}
|
@ -1,28 +0,0 @@
|
||||
#include <Backups/BackupEntryConcat.h>
|
||||
#include <IO/ConcatReadBuffer.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
BackupEntryConcat::BackupEntryConcat(
|
||||
BackupEntryPtr first_source_,
|
||||
BackupEntryPtr second_source_,
|
||||
const std::optional<UInt128> & checksum_)
|
||||
: first_source(std::move(first_source_))
|
||||
, second_source(std::move(second_source_))
|
||||
, checksum(checksum_)
|
||||
{
|
||||
}
|
||||
|
||||
UInt64 BackupEntryConcat::getSize() const
|
||||
{
|
||||
if (!size)
|
||||
size = first_source->getSize() + second_source->getSize();
|
||||
return *size;
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBuffer> BackupEntryConcat::getReadBuffer() const
|
||||
{
|
||||
return std::make_unique<ConcatReadBuffer>(*first_source->getReadBuffer(), *second_source->getReadBuffer());
|
||||
}
|
||||
}
|
@ -1,30 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <Backups/IBackupEntry.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Concatenates data of two backup entries.
|
||||
class BackupEntryConcat : public IBackupEntry
|
||||
{
|
||||
public:
|
||||
/// The constructor is allowed to not set `checksum_`, in that case it will be calculated from the data.
|
||||
BackupEntryConcat(
|
||||
BackupEntryPtr first_source_,
|
||||
BackupEntryPtr second_source_,
|
||||
const std::optional<UInt128> & checksum_ = {});
|
||||
|
||||
UInt64 getSize() const override;
|
||||
std::optional<UInt128> getChecksum() const override { return checksum; }
|
||||
std::unique_ptr<ReadBuffer> getReadBuffer() const override;
|
||||
|
||||
private:
|
||||
BackupEntryPtr first_source;
|
||||
BackupEntryPtr second_source;
|
||||
mutable std::optional<UInt64> size;
|
||||
std::optional<UInt128> checksum;
|
||||
};
|
||||
|
||||
}
|
@ -29,7 +29,7 @@ BackupEntryFromAppendOnlyFile::BackupEntryFromAppendOnlyFile(
|
||||
std::unique_ptr<ReadBuffer> BackupEntryFromAppendOnlyFile::getReadBuffer() const
|
||||
{
|
||||
auto buf = BackupEntryFromImmutableFile::getReadBuffer();
|
||||
return std::make_unique<LimitReadBuffer>(std::move(buf), limit, true);
|
||||
return std::make_unique<LimitReadBuffer>(std::move(buf), limit, false);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,31 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <Backups/IBackupEntry.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Represents small preloaded data to be included in a backup.
|
||||
class BackupEntryFromCallback : public IBackupEntry
|
||||
{
|
||||
public:
|
||||
using ReadBufferCreator = std::function<std::unique_ptr<ReadBuffer>()>;
|
||||
|
||||
/// The constructor is allowed to not set `checksum_`, in that case it will be calculated from the data.
|
||||
BackupEntryFromCallback(const ReadBufferCreator & callback_, size_t size_, const std::optional<UInt128> & checksum_ = {})
|
||||
: callback(callback_), size(size_), checksum(checksum_)
|
||||
{
|
||||
}
|
||||
|
||||
UInt64 getSize() const override { return size; }
|
||||
std::optional<UInt128> getChecksum() const override { return checksum; }
|
||||
std::unique_ptr<ReadBuffer> getReadBuffer() const override { return callback(); }
|
||||
|
||||
private:
|
||||
const ReadBufferCreator callback;
|
||||
const size_t size;
|
||||
const std::optional<UInt128> checksum;
|
||||
};
|
||||
|
||||
}
|
@ -21,7 +21,9 @@ BackupMutablePtr BackupFactory::createBackup(const CreateParams & params) const
|
||||
auto it = creators.find(engine_name);
|
||||
if (it == creators.end())
|
||||
throw Exception(ErrorCodes::BACKUP_ENGINE_NOT_FOUND, "Not found backup engine {}", engine_name);
|
||||
return (it->second)(params);
|
||||
BackupMutablePtr backup = (it->second)(params);
|
||||
backup->open(params.open_mode);
|
||||
return backup;
|
||||
}
|
||||
|
||||
void BackupFactory::registerBackupEngine(const String & engine_name, const CreatorFn & creator_fn)
|
||||
@ -31,7 +33,12 @@ void BackupFactory::registerBackupEngine(const String & engine_name, const Creat
|
||||
creators[engine_name] = creator_fn;
|
||||
}
|
||||
|
||||
void registerBackupEngines(BackupFactory & factory);
|
||||
void registerBackupEnginesFileAndDisk(BackupFactory &);
|
||||
|
||||
void registerBackupEngines(BackupFactory & factory)
|
||||
{
|
||||
registerBackupEnginesFileAndDisk(factory);
|
||||
}
|
||||
|
||||
BackupFactory::BackupFactory()
|
||||
{
|
||||
|
@ -26,6 +26,9 @@ public:
|
||||
OpenMode open_mode = OpenMode::WRITE;
|
||||
BackupInfo backup_info;
|
||||
std::optional<BackupInfo> base_backup_info;
|
||||
String compression_method;
|
||||
int compression_level = -1;
|
||||
String password;
|
||||
ContextPtr context;
|
||||
};
|
||||
|
||||
|
@ -1,13 +1,11 @@
|
||||
#include <Backups/BackupImpl.h>
|
||||
#include <Backups/BackupFactory.h>
|
||||
#include <Backups/BackupEntryConcat.h>
|
||||
#include <Backups/BackupEntryFromCallback.h>
|
||||
#include <Backups/BackupEntryFromMemory.h>
|
||||
#include <Backups/IBackupEntry.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/hex.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <IO/ConcatReadBuffer.h>
|
||||
#include <IO/HashingReadBuffer.h>
|
||||
#include <IO/ReadBufferFromFileBase.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
@ -47,34 +45,82 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
BackupImpl::BackupImpl(const String & backup_name_, OpenMode open_mode_, const ContextPtr & context_, const std::optional<BackupInfo> & base_backup_info_)
|
||||
: backup_name(backup_name_), open_mode(open_mode_), context(context_), base_backup_info(base_backup_info_)
|
||||
|
||||
class BackupImpl::BackupEntryFromBackupImpl : public IBackupEntry
|
||||
{
|
||||
public:
|
||||
BackupEntryFromBackupImpl(
|
||||
const std::shared_ptr<const BackupImpl> & backup_,
|
||||
const String & file_name_,
|
||||
UInt64 size_,
|
||||
const std::optional<UInt128> checksum_,
|
||||
BackupEntryPtr base_backup_entry_ = {})
|
||||
: backup(backup_), file_name(file_name_), size(size_), checksum(checksum_),
|
||||
base_backup_entry(std::move(base_backup_entry_))
|
||||
{
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBuffer> getReadBuffer() const override
|
||||
{
|
||||
auto read_buffer = backup->readFileImpl(file_name);
|
||||
if (base_backup_entry)
|
||||
{
|
||||
auto base_backup_read_buffer = base_backup_entry->getReadBuffer();
|
||||
read_buffer = std::make_unique<ConcatReadBuffer>(std::move(base_backup_read_buffer), std::move(read_buffer));
|
||||
}
|
||||
return read_buffer;
|
||||
}
|
||||
|
||||
UInt64 getSize() const override { return size; }
|
||||
std::optional<UInt128> getChecksum() const override { return checksum; }
|
||||
|
||||
private:
|
||||
const std::shared_ptr<const BackupImpl> backup;
|
||||
const String file_name;
|
||||
const UInt64 size;
|
||||
const std::optional<UInt128> checksum;
|
||||
BackupEntryPtr base_backup_entry;
|
||||
};
|
||||
|
||||
|
||||
BackupImpl::BackupImpl(const String & backup_name_, const ContextPtr & context_, const std::optional<BackupInfo> & base_backup_info_)
|
||||
: backup_name(backup_name_), context(context_), base_backup_info_param(base_backup_info_)
|
||||
{
|
||||
}
|
||||
|
||||
BackupImpl::~BackupImpl() = default;
|
||||
|
||||
void BackupImpl::open()
|
||||
void BackupImpl::open(OpenMode open_mode_)
|
||||
{
|
||||
if (open_mode == OpenMode::WRITE)
|
||||
std::lock_guard lock{mutex};
|
||||
if (open_mode == open_mode_)
|
||||
return;
|
||||
|
||||
if (open_mode != OpenMode::NONE)
|
||||
throw Exception("Backup is already opened", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
if (open_mode_ == OpenMode::WRITE)
|
||||
{
|
||||
if (backupExists())
|
||||
throw Exception(ErrorCodes::BACKUP_ALREADY_EXISTS, "Backup {} already exists", getName());
|
||||
|
||||
timestamp = std::time(nullptr);
|
||||
uuid = UUIDHelpers::generateV4();
|
||||
|
||||
startWriting();
|
||||
writing_started = true;
|
||||
writing_finalized = false;
|
||||
}
|
||||
|
||||
if (open_mode == OpenMode::READ)
|
||||
if (open_mode_ == OpenMode::READ)
|
||||
{
|
||||
if (!backupExists())
|
||||
throw Exception(ErrorCodes::BACKUP_NOT_FOUND, "Backup {} not found", getName());
|
||||
readBackupMetadata();
|
||||
}
|
||||
|
||||
openImpl(open_mode_);
|
||||
|
||||
base_backup_info = base_backup_info_param;
|
||||
if (open_mode_ == OpenMode::READ)
|
||||
readBackupMetadata();
|
||||
|
||||
if (base_backup_info)
|
||||
{
|
||||
BackupFactory::CreateParams params;
|
||||
@ -83,25 +129,43 @@ void BackupImpl::open()
|
||||
params.context = context;
|
||||
base_backup = BackupFactory::instance().createBackup(params);
|
||||
|
||||
if (open_mode == OpenMode::WRITE)
|
||||
if (open_mode_ == OpenMode::WRITE)
|
||||
base_backup_uuid = base_backup->getUUID();
|
||||
else if (base_backup_uuid != base_backup->getUUID())
|
||||
throw Exception(ErrorCodes::WRONG_BASE_BACKUP, "Backup {}: The base backup {} has different UUID ({} != {})",
|
||||
getName(), base_backup->getName(), toString(base_backup->getUUID()), (base_backup_uuid ? toString(*base_backup_uuid) : ""));
|
||||
}
|
||||
|
||||
open_mode = open_mode_;
|
||||
}
|
||||
|
||||
void BackupImpl::close()
|
||||
{
|
||||
if (open_mode == OpenMode::WRITE)
|
||||
{
|
||||
if (writing_started && !writing_finalized)
|
||||
{
|
||||
/// Creating of the backup wasn't finished correctly,
|
||||
/// so the backup cannot be used and it's better to remove its files.
|
||||
removeAllFilesAfterFailure();
|
||||
std::lock_guard lock{mutex};
|
||||
if (open_mode == OpenMode::NONE)
|
||||
return;
|
||||
|
||||
closeImpl(writing_finalized);
|
||||
|
||||
uuid = UUIDHelpers::Nil;
|
||||
timestamp = 0;
|
||||
base_backup_info.reset();
|
||||
base_backup.reset();
|
||||
base_backup_uuid.reset();
|
||||
file_infos.clear();
|
||||
open_mode = OpenMode::NONE;
|
||||
}
|
||||
|
||||
IBackup::OpenMode BackupImpl::getOpenMode() const
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
return open_mode;
|
||||
}
|
||||
|
||||
time_t BackupImpl::getTimestamp() const
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
return timestamp;
|
||||
}
|
||||
|
||||
void BackupImpl::writeBackupMetadata()
|
||||
@ -112,9 +176,20 @@ void BackupImpl::writeBackupMetadata()
|
||||
config->setString("uuid", toString(uuid));
|
||||
|
||||
if (base_backup_info)
|
||||
{
|
||||
bool base_backup_in_use = false;
|
||||
for (const auto & [name, info] : file_infos)
|
||||
{
|
||||
if (info.base_size)
|
||||
base_backup_in_use = true;
|
||||
}
|
||||
|
||||
if (base_backup_in_use)
|
||||
{
|
||||
config->setString("base_backup", base_backup_info->toString());
|
||||
if (base_backup_uuid)
|
||||
config->setString("base_backup_uuid", toString(*base_backup_uuid));
|
||||
}
|
||||
}
|
||||
|
||||
size_t index = 0;
|
||||
for (const auto & [name, info] : file_infos)
|
||||
@ -128,7 +203,7 @@ void BackupImpl::writeBackupMetadata()
|
||||
if (info.base_size)
|
||||
{
|
||||
config->setUInt(prefix + "base_size", info.base_size);
|
||||
if (info.base_size != info.size)
|
||||
if (info.base_checksum != info.checksum)
|
||||
config->setString(prefix + "base_checksum", getHexUIntLowercase(info.base_checksum));
|
||||
}
|
||||
}
|
||||
@ -138,7 +213,7 @@ void BackupImpl::writeBackupMetadata()
|
||||
std::ostringstream stream; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
|
||||
config->save(stream);
|
||||
String str = stream.str();
|
||||
auto out = addFileImpl(".backup");
|
||||
auto out = writeFileImpl(".backup");
|
||||
out->write(str.data(), str.size());
|
||||
}
|
||||
|
||||
@ -161,7 +236,7 @@ void BackupImpl::readBackupMetadata()
|
||||
if (config->has("base_backup") && !base_backup_info)
|
||||
base_backup_info = BackupInfo::fromString(config->getString("base_backup"));
|
||||
|
||||
if (config->has("base_backup_uuid") && !base_backup_uuid)
|
||||
if (config->has("base_backup_uuid"))
|
||||
base_backup_uuid = parse<UUID>(config->getString("base_backup_uuid"));
|
||||
|
||||
file_infos.clear();
|
||||
@ -173,20 +248,22 @@ void BackupImpl::readBackupMetadata()
|
||||
{
|
||||
String prefix = "contents." + key + ".";
|
||||
String name = config->getString(prefix + "name");
|
||||
FileInfo & info = file_infos.emplace(name, FileInfo{}).first->second;
|
||||
FileInfo info;
|
||||
info.size = config->getUInt(prefix + "size");
|
||||
if (info.size)
|
||||
{
|
||||
info.checksum = unhexChecksum(config->getString(prefix + "checksum"));
|
||||
if (config->has(prefix + "base_size"))
|
||||
info.base_size = config->getUInt(prefix + "base_size", 0);
|
||||
if (info.base_size)
|
||||
{
|
||||
info.base_size = config->getUInt(prefix + "base_size");
|
||||
if (info.base_size == info.size)
|
||||
info.base_checksum = info.checksum;
|
||||
else
|
||||
if (config->has(prefix + "base_checksum"))
|
||||
info.base_checksum = unhexChecksum(config->getString(prefix + "base_checksum"));
|
||||
else
|
||||
info.base_checksum = info.checksum;
|
||||
}
|
||||
}
|
||||
file_infos.emplace(name, info);
|
||||
file_checksums.emplace(info.checksum, name);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -240,10 +317,22 @@ UInt128 BackupImpl::getFileChecksum(const String & file_name) const
|
||||
return it->second.checksum;
|
||||
}
|
||||
|
||||
std::optional<String> BackupImpl::findFileByChecksum(const UInt128 & checksum) const
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
auto it = file_checksums.find(checksum);
|
||||
if (it == file_checksums.end())
|
||||
return std::nullopt;
|
||||
return it->second;
|
||||
}
|
||||
|
||||
|
||||
BackupEntryPtr BackupImpl::readFile(const String & file_name) const
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
if (open_mode != OpenMode::READ)
|
||||
throw Exception("Backup is not opened for reading", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
auto it = file_infos.find(file_name);
|
||||
if (it == file_infos.end())
|
||||
throw Exception(
|
||||
@ -264,7 +353,8 @@ BackupEntryPtr BackupImpl::readFile(const String & file_name) const
|
||||
if (!info.base_size)
|
||||
{
|
||||
/// Data goes completely from this backup, the base backup isn't used.
|
||||
return std::make_unique<BackupEntryFromCallback>(read_callback, info.size, info.checksum);
|
||||
return std::make_unique<BackupEntryFromBackupImpl>(
|
||||
std::static_pointer_cast<const BackupImpl>(shared_from_this()), file_name, info.size, info.checksum);
|
||||
}
|
||||
|
||||
if (info.size < info.base_size)
|
||||
@ -283,7 +373,8 @@ BackupEntryPtr BackupImpl::readFile(const String & file_name) const
|
||||
getName(), quoteString(file_name));
|
||||
}
|
||||
|
||||
if (!base_backup->fileExists(file_name))
|
||||
auto base_file_name = base_backup->findFileByChecksum(info.base_checksum);
|
||||
if (!base_file_name)
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::WRONG_BASE_BACKUP,
|
||||
@ -291,7 +382,7 @@ BackupEntryPtr BackupImpl::readFile(const String & file_name) const
|
||||
getName(), quoteString(file_name));
|
||||
}
|
||||
|
||||
auto base_entry = base_backup->readFile(file_name);
|
||||
auto base_entry = base_backup->readFile(*base_file_name);
|
||||
auto base_size = base_entry->getSize();
|
||||
if (base_size != info.base_size)
|
||||
{
|
||||
@ -301,15 +392,6 @@ BackupEntryPtr BackupImpl::readFile(const String & file_name) const
|
||||
getName(), quoteString(file_name), base_backup->getName(), base_size, info.base_size);
|
||||
}
|
||||
|
||||
auto base_checksum = base_entry->getChecksum();
|
||||
if (base_checksum && (*base_checksum != info.base_checksum))
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::WRONG_BASE_BACKUP,
|
||||
"Backup {}: Entry {} has unexpected checksum in the base backup {}",
|
||||
getName(), quoteString(file_name), base_backup->getName());
|
||||
}
|
||||
|
||||
if (info.size == info.base_size)
|
||||
{
|
||||
/// Data goes completely from the base backup (nothing goes from this backup).
|
||||
@ -318,18 +400,16 @@ BackupEntryPtr BackupImpl::readFile(const String & file_name) const
|
||||
|
||||
/// The beginning of the data goes from the base backup,
|
||||
/// and the ending goes from this backup.
|
||||
return std::make_unique<BackupEntryConcat>(
|
||||
std::move(base_entry),
|
||||
std::make_unique<BackupEntryFromCallback>(read_callback, info.size - info.base_size),
|
||||
info.checksum);
|
||||
return std::make_unique<BackupEntryFromBackupImpl>(
|
||||
static_pointer_cast<const BackupImpl>(shared_from_this()), file_name, info.size, info.checksum, std::move(base_entry));
|
||||
}
|
||||
|
||||
|
||||
void BackupImpl::addFile(const String & file_name, BackupEntryPtr entry)
|
||||
void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
if (open_mode != OpenMode::WRITE)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal operation: Cannot write to a backup opened for reading");
|
||||
throw Exception("Backup is not opened for writing", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
if (file_infos.contains(file_name))
|
||||
throw Exception(
|
||||
@ -356,44 +436,60 @@ void BackupImpl::addFile(const String & file_name, BackupEntryPtr entry)
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBuffer> read_buffer; /// We'll set that later.
|
||||
UInt64 read_pos = 0; /// Current position in read_buffer.
|
||||
std::optional<HashingReadBuffer> hashing_read_buffer;
|
||||
UInt64 hashing_pos = 0; /// Current position in `hashing_read_buffer`.
|
||||
|
||||
/// Determine whether it's possible to receive this entry's data from the base backup completely or partly.
|
||||
bool use_base = false;
|
||||
if (base_exists && base_size)
|
||||
if (base_exists && base_size && (size >= base_size))
|
||||
{
|
||||
if (size == base_size)
|
||||
if (checksum && (size == base_size))
|
||||
{
|
||||
/// The size is the same, we need to compare checksums to find out
|
||||
/// if the entry's data has not been changed since the base backup.
|
||||
if (!checksum)
|
||||
/// if the entry's data has not changed since the base backup.
|
||||
use_base = (*checksum == base_checksum);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// The size has increased, we need to calculate a partial checksum to find out
|
||||
/// if the entry's data has only appended since the base backup.
|
||||
read_buffer = entry->getReadBuffer();
|
||||
HashingReadBuffer hashing_read_buffer{*read_buffer};
|
||||
hashing_read_buffer.ignore(size);
|
||||
read_pos = size;
|
||||
checksum = hashing_read_buffer.getHash();
|
||||
}
|
||||
if (checksum == base_checksum)
|
||||
use_base = true; /// The data has not been changed.
|
||||
}
|
||||
else if (size > base_size)
|
||||
{
|
||||
/// The size has been increased, we need to calculate a partial checksum to find out
|
||||
/// if the entry's data has been only appended since the base backup.
|
||||
read_buffer = entry->getReadBuffer();
|
||||
HashingReadBuffer hashing_read_buffer{*read_buffer};
|
||||
hashing_read_buffer.ignore(base_size);
|
||||
UInt128 partial_checksum = hashing_read_buffer.getHash();
|
||||
read_pos = base_size;
|
||||
if (!checksum)
|
||||
{
|
||||
hashing_read_buffer.ignore(size - base_size);
|
||||
checksum = hashing_read_buffer.getHash();
|
||||
read_pos = size;
|
||||
}
|
||||
hashing_read_buffer.emplace(*read_buffer);
|
||||
hashing_read_buffer->ignore(base_size);
|
||||
hashing_pos = base_size;
|
||||
UInt128 partial_checksum = hashing_read_buffer->getHash();
|
||||
if (size == base_size)
|
||||
checksum = partial_checksum;
|
||||
if (partial_checksum == base_checksum)
|
||||
use_base = true; /// The data has been appended.
|
||||
use_base = true;
|
||||
}
|
||||
}
|
||||
|
||||
/// Finish calculating the checksum.
|
||||
if (!checksum)
|
||||
{
|
||||
if (!read_buffer)
|
||||
read_buffer = entry->getReadBuffer();
|
||||
if (!hashing_read_buffer)
|
||||
hashing_read_buffer.emplace(*read_buffer);
|
||||
hashing_read_buffer->ignore(size - hashing_pos);
|
||||
checksum = hashing_read_buffer->getHash();
|
||||
}
|
||||
hashing_read_buffer.reset();
|
||||
|
||||
/// Check if a entry with the same checksum exists in the base backup.
|
||||
if (base_backup && !use_base)
|
||||
{
|
||||
if (auto base_file_name = base_backup->findFileByChecksum(*checksum))
|
||||
{
|
||||
if (size == base_backup->getFileSize(*base_file_name))
|
||||
{
|
||||
/// The entry's data has not changed since the base backup,
|
||||
/// but the entry itself has been moved or renamed.
|
||||
base_size = size;
|
||||
base_checksum = *checksum;
|
||||
use_base = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -401,15 +497,15 @@ void BackupImpl::addFile(const String & file_name, BackupEntryPtr entry)
|
||||
{
|
||||
/// The entry's data has not been changed since the base backup.
|
||||
FileInfo info;
|
||||
info.size = base_size;
|
||||
info.checksum = base_checksum;
|
||||
info.size = size;
|
||||
info.checksum = *checksum;
|
||||
info.base_size = base_size;
|
||||
info.base_checksum = base_checksum;
|
||||
file_infos.emplace(file_name, info);
|
||||
file_checksums.emplace(*checksum, file_name);
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
/// Either the entry wasn't exist in the base backup
|
||||
/// or the entry has data appended to the end of the data from the base backup.
|
||||
/// In both those cases we have to copy data to this backup.
|
||||
@ -421,36 +517,17 @@ void BackupImpl::addFile(const String & file_name, BackupEntryPtr entry)
|
||||
/// If `read_buffer` is seekable it's easier, otherwise we can use ignore().
|
||||
if (auto * seekable_buffer = dynamic_cast<SeekableReadBuffer *>(read_buffer.get()))
|
||||
{
|
||||
if (read_pos != copy_pos)
|
||||
seekable_buffer->seek(copy_pos, SEEK_SET);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (read_pos > copy_pos)
|
||||
{
|
||||
read_buffer.reset();
|
||||
read_pos = 0;
|
||||
}
|
||||
|
||||
if (!read_buffer)
|
||||
read_buffer = entry->getReadBuffer();
|
||||
|
||||
if (read_pos < copy_pos)
|
||||
read_buffer->ignore(copy_pos - read_pos);
|
||||
read_buffer->ignore(copy_pos);
|
||||
}
|
||||
|
||||
/// If we haven't received or calculated a checksum yet, calculate it now.
|
||||
ReadBuffer * maybe_hashing_read_buffer = read_buffer.get();
|
||||
std::optional<HashingReadBuffer> hashing_read_buffer;
|
||||
if (!checksum)
|
||||
maybe_hashing_read_buffer = &hashing_read_buffer.emplace(*read_buffer);
|
||||
|
||||
/// Copy the entry's data after `copy_pos`.
|
||||
auto out = addFileImpl(file_name);
|
||||
copyData(*maybe_hashing_read_buffer, *out);
|
||||
|
||||
if (hashing_read_buffer)
|
||||
checksum = hashing_read_buffer->getHash();
|
||||
auto out = writeFileImpl(file_name);
|
||||
copyData(*read_buffer, *out);
|
||||
|
||||
/// Done!
|
||||
FileInfo info;
|
||||
@ -462,13 +539,19 @@ void BackupImpl::addFile(const String & file_name, BackupEntryPtr entry)
|
||||
info.base_checksum = base_checksum;
|
||||
}
|
||||
file_infos.emplace(file_name, info);
|
||||
file_checksums.emplace(*checksum, file_name);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void BackupImpl::finalizeWriting()
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
if (writing_finalized)
|
||||
return;
|
||||
|
||||
if (open_mode != OpenMode::WRITE)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal operation: Cannot write to a backup opened for reading");
|
||||
throw Exception("Backup is not opened for writing", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
writeBackupMetadata();
|
||||
writing_finalized = true;
|
||||
}
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <Backups/BackupInfo.h>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -20,29 +21,33 @@ class BackupImpl : public IBackup
|
||||
public:
|
||||
BackupImpl(
|
||||
const String & backup_name_,
|
||||
OpenMode open_mode_,
|
||||
const ContextPtr & context_,
|
||||
const std::optional<BackupInfo> & base_backup_info_ = {});
|
||||
~BackupImpl() override;
|
||||
|
||||
const String & getName() const override { return backup_name; }
|
||||
OpenMode getOpenMode() const override { return open_mode; }
|
||||
time_t getTimestamp() const override { return timestamp; }
|
||||
void open(OpenMode open_mode_) override;
|
||||
OpenMode getOpenMode() const override;
|
||||
void close() override;
|
||||
time_t getTimestamp() const override;
|
||||
UUID getUUID() const override { return uuid; }
|
||||
Strings listFiles(const String & prefix, const String & terminator) const override;
|
||||
bool fileExists(const String & file_name) const override;
|
||||
size_t getFileSize(const String & file_name) const override;
|
||||
UInt128 getFileChecksum(const String & file_name) const override;
|
||||
std::optional<String> findFileByChecksum(const UInt128 & checksum) const override;
|
||||
BackupEntryPtr readFile(const String & file_name) const override;
|
||||
void addFile(const String & file_name, BackupEntryPtr entry) override;
|
||||
void writeFile(const String & file_name, BackupEntryPtr entry) override;
|
||||
void finalizeWriting() override;
|
||||
|
||||
protected:
|
||||
/// Should be called in the constructor of a derived class.
|
||||
void open();
|
||||
/// Checks if this backup exists.
|
||||
virtual bool backupExists() const = 0;
|
||||
|
||||
/// Should be called in the destructor of a derived class.
|
||||
void close();
|
||||
virtual void openImpl(OpenMode open_mode_) = 0;
|
||||
OpenMode getOpenModeNoLock() const { return open_mode; }
|
||||
|
||||
virtual void closeImpl(bool writing_finalized_) = 0;
|
||||
|
||||
/// Read a file from the backup.
|
||||
/// Low level: the function doesn't check base backup or checksums.
|
||||
@ -50,18 +55,9 @@ protected:
|
||||
|
||||
/// Add a file to the backup.
|
||||
/// Low level: the function doesn't check base backup or checksums.
|
||||
virtual std::unique_ptr<WriteBuffer> addFileImpl(const String & file_name) = 0;
|
||||
virtual std::unique_ptr<WriteBuffer> writeFileImpl(const String & file_name) = 0;
|
||||
|
||||
/// Checks if this backup exists.
|
||||
virtual bool backupExists() const = 0;
|
||||
|
||||
/// Starts writing of this backup, only used if `open_mode == OpenMode::WRITE`.
|
||||
/// After calling this function `backupExists()` should return true.
|
||||
virtual void startWriting() = 0;
|
||||
|
||||
/// Removes all the backup files, called if something goes wrong while we're writing the backup.
|
||||
/// This function is called by `close()` if `startWriting()` was called and `finalizeWriting()` wasn't.
|
||||
virtual void removeAllFilesAfterFailure() = 0;
|
||||
mutable std::mutex mutex;
|
||||
|
||||
private:
|
||||
void writeBackupMetadata();
|
||||
@ -77,18 +73,20 @@ private:
|
||||
UInt128 base_checksum{0, 0};
|
||||
};
|
||||
|
||||
class BackupEntryFromBackupImpl;
|
||||
|
||||
const String backup_name;
|
||||
const OpenMode open_mode;
|
||||
UUID uuid;
|
||||
time_t timestamp = 0;
|
||||
ContextPtr context;
|
||||
const std::optional<BackupInfo> base_backup_info_param;
|
||||
OpenMode open_mode = OpenMode::NONE;
|
||||
UUID uuid = {};
|
||||
time_t timestamp = 0;
|
||||
std::optional<BackupInfo> base_backup_info;
|
||||
std::shared_ptr<const IBackup> base_backup;
|
||||
std::optional<UUID> base_backup_uuid;
|
||||
std::map<String, FileInfo> file_infos;
|
||||
bool writing_started = false;
|
||||
std::map<String, FileInfo> file_infos; /// Should be ordered alphabetically, see listFiles().
|
||||
std::unordered_map<UInt128, String> file_checksums;
|
||||
bool writing_finalized = false;
|
||||
mutable std::mutex mutex;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,160 +0,0 @@
|
||||
#include <Backups/BackupInDirectory.h>
|
||||
#include <Backups/BackupFactory.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <Disks/DiskSelector.h>
|
||||
#include <Disks/IDisk.h>
|
||||
#include <Disks/DiskLocal.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
/// Checks multiple keys "key", "key[1]", "key[2]", and so on in the configuration
|
||||
/// and find out if some of them have matching value.
|
||||
bool findConfigKeyWithMatchingValue(const Poco::Util::AbstractConfiguration & config, const String & key, const std::function<bool(const String & value)> & match_function)
|
||||
{
|
||||
String current_key = key;
|
||||
size_t counter = 0;
|
||||
while (config.has(current_key))
|
||||
{
|
||||
if (match_function(config.getString(current_key)))
|
||||
return true;
|
||||
current_key = key + "[" + std::to_string(++counter) + "]";
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isDiskAllowed(const String & disk_name, const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
return findConfigKeyWithMatchingValue(config, "backups.allowed_disk", [&](const String & value) { return value == disk_name; });
|
||||
}
|
||||
|
||||
bool isPathAllowed(const String & path, const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
return findConfigKeyWithMatchingValue(config, "backups.allowed_path", [&](const String & value) { return path.starts_with(value); });
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
BackupInDirectory::BackupInDirectory(
|
||||
const String & backup_name_,
|
||||
OpenMode open_mode_,
|
||||
const DiskPtr & disk_,
|
||||
const String & path_,
|
||||
const ContextPtr & context_,
|
||||
const std::optional<BackupInfo> & base_backup_info_)
|
||||
: BackupImpl(backup_name_, open_mode_, context_, base_backup_info_)
|
||||
, disk(disk_), path(path_)
|
||||
{
|
||||
/// Path to backup must end with '/'
|
||||
if (path.back() != '/')
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Backup {}: Path to backup must end with '/', but {} doesn't.", getName(), quoteString(path));
|
||||
dir_path = fs::path(path).parent_path(); /// get path without terminating slash
|
||||
|
||||
/// If `disk` is not specified, we create an internal instance of `DiskLocal` here.
|
||||
if (!disk)
|
||||
{
|
||||
auto fspath = fs::path{dir_path};
|
||||
if (!fspath.has_filename())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Backup {}: Path to a backup must be a directory path.", getName(), quoteString(path));
|
||||
path = fspath.filename() / "";
|
||||
dir_path = fs::path(path).parent_path(); /// get path without terminating slash
|
||||
String disk_path = fspath.remove_filename();
|
||||
disk = std::make_shared<DiskLocal>(disk_path, disk_path, 0);
|
||||
}
|
||||
|
||||
open();
|
||||
}
|
||||
|
||||
|
||||
BackupInDirectory::~BackupInDirectory()
|
||||
{
|
||||
close();
|
||||
}
|
||||
|
||||
bool BackupInDirectory::backupExists() const
|
||||
{
|
||||
return disk->isDirectory(dir_path);
|
||||
}
|
||||
|
||||
void BackupInDirectory::startWriting()
|
||||
{
|
||||
disk->createDirectories(dir_path);
|
||||
}
|
||||
|
||||
void BackupInDirectory::removeAllFilesAfterFailure()
|
||||
{
|
||||
if (disk->isDirectory(dir_path))
|
||||
disk->removeRecursive(dir_path);
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBuffer> BackupInDirectory::readFileImpl(const String & file_name) const
|
||||
{
|
||||
String file_path = path + file_name;
|
||||
return disk->readFile(file_path);
|
||||
}
|
||||
|
||||
std::unique_ptr<WriteBuffer> BackupInDirectory::addFileImpl(const String & file_name)
|
||||
{
|
||||
String file_path = path + file_name;
|
||||
disk->createDirectories(fs::path(file_path).parent_path());
|
||||
return disk->writeFile(file_path);
|
||||
}
|
||||
|
||||
|
||||
void registerBackupEngineFile(BackupFactory & factory)
|
||||
{
|
||||
auto creator_fn = [](const BackupFactory::CreateParams & params)
|
||||
{
|
||||
String backup_name = params.backup_info.toString();
|
||||
const String & engine_name = params.backup_info.backup_engine_name;
|
||||
const auto & args = params.backup_info.args;
|
||||
|
||||
DiskPtr disk;
|
||||
String path;
|
||||
if (engine_name == "File")
|
||||
{
|
||||
if (args.size() != 1)
|
||||
{
|
||||
throw Exception(
|
||||
"Backup engine 'File' requires 1 argument (path)",
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
}
|
||||
|
||||
path = args[0].safeGet<String>();
|
||||
|
||||
if (!isPathAllowed(path, params.context->getConfigRef()))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path {} is not allowed for backups", path);
|
||||
}
|
||||
else if (engine_name == "Disk")
|
||||
{
|
||||
if (args.size() != 2)
|
||||
{
|
||||
throw Exception(
|
||||
"Backup engine 'Disk' requires 2 arguments (disk_name, path)",
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
}
|
||||
|
||||
String disk_name = args[0].safeGet<String>();
|
||||
disk = params.context->getDisk(disk_name);
|
||||
path = args[1].safeGet<String>();
|
||||
|
||||
if (!isDiskAllowed(disk_name, params.context->getConfigRef()))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Disk {} is not allowed for backups", disk_name);
|
||||
}
|
||||
|
||||
return std::make_shared<BackupInDirectory>(backup_name, params.open_mode, disk, path, params.context, params.base_backup_info);
|
||||
};
|
||||
|
||||
factory.registerBackupEngine("File", creator_fn);
|
||||
factory.registerBackupEngine("Disk", creator_fn);
|
||||
}
|
||||
|
||||
}
|
@ -1,89 +0,0 @@
|
||||
#include <Backups/BackupRenamingConfig.h>
|
||||
#include <Parsers/ASTBackupQuery.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
using Kind = ASTBackupQuery::Kind;
|
||||
using ElementType = ASTBackupQuery::ElementType;
|
||||
|
||||
void BackupRenamingConfig::setNewTableName(const DatabaseAndTableName & old_table_name, const DatabaseAndTableName & new_table_name)
|
||||
{
|
||||
old_to_new_table_names[old_table_name] = new_table_name;
|
||||
}
|
||||
|
||||
void BackupRenamingConfig::setNewDatabaseName(const String & old_database_name, const String & new_database_name)
|
||||
{
|
||||
old_to_new_database_names[old_database_name] = new_database_name;
|
||||
}
|
||||
|
||||
void BackupRenamingConfig::setNewTemporaryTableName(const String & old_temporary_table_name, const String & new_temporary_table_name)
|
||||
{
|
||||
old_to_new_temporary_table_names[old_temporary_table_name] = new_temporary_table_name;
|
||||
}
|
||||
|
||||
void BackupRenamingConfig::setFromBackupQuery(const ASTBackupQuery & backup_query)
|
||||
{
|
||||
setFromBackupQueryElements(backup_query.elements);
|
||||
}
|
||||
|
||||
void BackupRenamingConfig::setFromBackupQueryElements(const ASTBackupQuery::Elements & backup_query_elements)
|
||||
{
|
||||
for (const auto & element : backup_query_elements)
|
||||
{
|
||||
switch (element.type)
|
||||
{
|
||||
case ElementType::TABLE: [[fallthrough]];
|
||||
case ElementType::DICTIONARY:
|
||||
{
|
||||
const auto & new_name = element.new_name.second.empty() ? element.name : element.new_name;
|
||||
setNewTableName(element.name, new_name);
|
||||
break;
|
||||
}
|
||||
|
||||
case ASTBackupQuery::DATABASE:
|
||||
{
|
||||
const auto & new_name = element.new_name.first.empty() ? element.name.first : element.new_name.first;
|
||||
setNewDatabaseName(element.name.first, new_name);
|
||||
break;
|
||||
}
|
||||
|
||||
case ASTBackupQuery::TEMPORARY_TABLE:
|
||||
{
|
||||
const auto & new_name = element.new_name.second.empty() ? element.name.second : element.new_name.second;
|
||||
setNewTemporaryTableName(element.name.second, new_name);
|
||||
break;
|
||||
}
|
||||
|
||||
case ASTBackupQuery::ALL_DATABASES: break;
|
||||
case ASTBackupQuery::ALL_TEMPORARY_TABLES: break;
|
||||
case ASTBackupQuery::EVERYTHING: break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DatabaseAndTableName BackupRenamingConfig::getNewTableName(const DatabaseAndTableName & old_table_name) const
|
||||
{
|
||||
auto it = old_to_new_table_names.find(old_table_name);
|
||||
if (it != old_to_new_table_names.end())
|
||||
return it->second;
|
||||
return {getNewDatabaseName(old_table_name.first), old_table_name.second};
|
||||
}
|
||||
|
||||
const String & BackupRenamingConfig::getNewDatabaseName(const String & old_database_name) const
|
||||
{
|
||||
auto it = old_to_new_database_names.find(old_database_name);
|
||||
if (it != old_to_new_database_names.end())
|
||||
return it->second;
|
||||
return old_database_name;
|
||||
}
|
||||
|
||||
const String & BackupRenamingConfig::getNewTemporaryTableName(const String & old_temporary_table_name) const
|
||||
{
|
||||
auto it = old_to_new_temporary_table_names.find(old_temporary_table_name);
|
||||
if (it != old_to_new_temporary_table_names.end())
|
||||
return it->second;
|
||||
return old_temporary_table_name;
|
||||
}
|
||||
|
||||
}
|
@ -1,39 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/ASTBackupQuery.h>
|
||||
#include <Core/Types.h>
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
using DatabaseAndTableName = std::pair<String, String>;
|
||||
|
||||
/// Keeps information about renamings of databases or tables being processed
|
||||
/// while we're making a backup or while we're restoring from a backup.
|
||||
class BackupRenamingConfig
|
||||
{
|
||||
public:
|
||||
BackupRenamingConfig() = default;
|
||||
|
||||
void setNewTableName(const DatabaseAndTableName & old_table_name, const DatabaseAndTableName & new_table_name);
|
||||
void setNewDatabaseName(const String & old_database_name, const String & new_database_name);
|
||||
void setNewTemporaryTableName(const String & old_temporary_table_name, const String & new_temporary_table_name);
|
||||
void setFromBackupQuery(const ASTBackupQuery & backup_query);
|
||||
void setFromBackupQueryElements(const ASTBackupQuery::Elements & backup_query_elements);
|
||||
|
||||
/// Changes names according to the renaming.
|
||||
DatabaseAndTableName getNewTableName(const DatabaseAndTableName & old_table_name) const;
|
||||
const String & getNewDatabaseName(const String & old_database_name) const;
|
||||
const String & getNewTemporaryTableName(const String & old_temporary_table_name) const;
|
||||
|
||||
private:
|
||||
std::map<DatabaseAndTableName, DatabaseAndTableName> old_to_new_table_names;
|
||||
std::unordered_map<String, String> old_to_new_database_names;
|
||||
std::unordered_map<String, String> old_to_new_temporary_table_names;
|
||||
};
|
||||
|
||||
using BackupRenamingConfigPtr = std::shared_ptr<const BackupRenamingConfig>;
|
||||
|
||||
}
|
@ -1,6 +1,43 @@
|
||||
#include <Backups/BackupSettings.h>
|
||||
#include <Backups/BackupInfo.h>
|
||||
#include <Core/SettingsFields.h>
|
||||
#include <Parsers/ASTBackupQuery.h>
|
||||
#include <Parsers/ASTSetQuery.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
IMPLEMENT_SETTINGS_TRAITS(BackupSettingsTraits, LIST_OF_BACKUP_SETTINGS)
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int UNKNOWN_SETTING;
|
||||
}
|
||||
|
||||
BackupSettings BackupSettings::fromBackupQuery(const ASTBackupQuery & query)
|
||||
{
|
||||
BackupSettings res;
|
||||
|
||||
if (query.base_backup_name)
|
||||
res.base_backup_info = BackupInfo::fromAST(*query.base_backup_name);
|
||||
|
||||
if (query.settings)
|
||||
{
|
||||
const auto & settings = query.settings->as<const ASTSetQuery &>().changes;
|
||||
for (const auto & setting : settings)
|
||||
{
|
||||
if (setting.name == "compression_method")
|
||||
res.compression_method = SettingFieldString{setting.value};
|
||||
else if (setting.name == "compression_level")
|
||||
res.compression_level = SettingFieldInt64{setting.value};
|
||||
else if (setting.name == "password")
|
||||
res.password = SettingFieldString{setting.value};
|
||||
else if (setting.name == "structure_only")
|
||||
res.structure_only = SettingFieldBool{setting.value};
|
||||
else
|
||||
throw Exception(ErrorCodes::UNKNOWN_SETTING, "Unknown setting {}", setting.name);
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,16 +1,31 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/BaseSettings.h>
|
||||
#include <Backups/BackupInfo.h>
|
||||
#include <optional>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class ASTBackupQuery;
|
||||
|
||||
#define LIST_OF_BACKUP_SETTINGS(M) \
|
||||
M(Bool, dummy, false, "", 0) \
|
||||
/// Settings specified in the "SETTINGS" clause of a BACKUP query.
|
||||
struct BackupSettings
|
||||
{
|
||||
/// Base backup, if it's set an incremental backup will be built.
|
||||
std::optional<BackupInfo> base_backup_info;
|
||||
|
||||
DECLARE_SETTINGS_TRAITS_ALLOW_CUSTOM_SETTINGS(BackupSettingsTraits, LIST_OF_BACKUP_SETTINGS)
|
||||
/// Compression method and level for writing the backup (when applicable).
|
||||
String compression_method; /// "" means default method
|
||||
int compression_level = -1; /// -1 means default level
|
||||
|
||||
struct BackupSettings : public BaseSettings<BackupSettingsTraits> {};
|
||||
/// Password used to encrypt the backup.
|
||||
String password;
|
||||
|
||||
/// If this is set to true then only create queries will be written to backup,
|
||||
/// without the data of tables.
|
||||
bool structure_only = false;
|
||||
|
||||
static BackupSettings fromBackupQuery(const ASTBackupQuery & query);
|
||||
};
|
||||
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -5,35 +5,28 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class IBackup;
|
||||
using BackupPtr = std::shared_ptr<const IBackup>;
|
||||
using BackupMutablePtr = std::shared_ptr<IBackup>;
|
||||
class IBackupEntry;
|
||||
using BackupEntryPtr = std::unique_ptr<IBackupEntry>;
|
||||
using BackupEntries = std::vector<std::pair<String, BackupEntryPtr>>;
|
||||
using RestoreDataTask = std::function<void()>;
|
||||
using RestoreDataTasks = std::vector<RestoreDataTask>;
|
||||
using RestoreObjectTask = std::function<RestoreDataTasks()>;
|
||||
using RestoreObjectsTasks = std::vector<RestoreObjectTask>;
|
||||
struct BackupSettings;
|
||||
class Context;
|
||||
using ContextPtr = std::shared_ptr<const Context>;
|
||||
using ContextMutablePtr = std::shared_ptr<Context>;
|
||||
|
||||
|
||||
/// Prepares backup entries.
|
||||
BackupEntries makeBackupEntries(const ASTBackupQuery::Elements & elements, const ContextPtr & context);
|
||||
|
||||
/// Estimate total size of the backup which would be written from the specified entries.
|
||||
UInt64 estimateBackupSize(const BackupEntries & backup_entries, const BackupPtr & base_backup);
|
||||
BackupEntries makeBackupEntries(const ContextPtr & context, const ASTBackupQuery::Elements & elements, const BackupSettings & backup_settings);
|
||||
|
||||
/// Write backup entries to an opened backup.
|
||||
void writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup_entries, size_t num_threads);
|
||||
|
||||
/// Prepare restore tasks.
|
||||
RestoreObjectsTasks makeRestoreTasks(const ASTBackupQuery::Elements & elements, ContextMutablePtr context, const BackupPtr & backup);
|
||||
/// Returns the path to metadata in backup.
|
||||
String getMetadataPathInBackup(const DatabaseAndTableName & table_name);
|
||||
String getMetadataPathInBackup(const String & database_name);
|
||||
String getMetadataPathInBackup(const IAST & create_query);
|
||||
|
||||
/// Execute restore tasks.
|
||||
void executeRestoreTasks(RestoreObjectsTasks && restore_tasks, size_t num_threads);
|
||||
/// Returns the path to table's data in backup.
|
||||
String getDataPathInBackup(const DatabaseAndTableName & table_name);
|
||||
String getDataPathInBackup(const IAST & create_query);
|
||||
|
||||
}
|
||||
|
87
src/Backups/DDLCompareUtils.cpp
Normal file
87
src/Backups/DDLCompareUtils.cpp
Normal file
@ -0,0 +1,87 @@
|
||||
#include <Backups/DDLCompareUtils.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/formatAST.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace
|
||||
{
|
||||
std::shared_ptr<const ASTCreateQuery> prepareDDLToCompare(const ASTCreateQuery & ast)
|
||||
{
|
||||
auto res = typeid_cast<std::shared_ptr<const ASTCreateQuery>>(ast.shared_from_this());
|
||||
|
||||
std::shared_ptr<ASTCreateQuery> clone;
|
||||
auto get_clone = [&]
|
||||
{
|
||||
if (!clone)
|
||||
{
|
||||
clone = typeid_cast<std::shared_ptr<ASTCreateQuery>>(res->clone());
|
||||
res = clone;
|
||||
}
|
||||
return clone;
|
||||
};
|
||||
|
||||
/// Remove UUIDs.
|
||||
if (res->uuid != UUIDHelpers::Nil)
|
||||
get_clone()->uuid = UUIDHelpers::Nil;
|
||||
|
||||
if (res->to_inner_uuid != UUIDHelpers::Nil)
|
||||
get_clone()->to_inner_uuid = UUIDHelpers::Nil;
|
||||
|
||||
/// Clear IF NOT EXISTS flag.
|
||||
if (res->if_not_exists)
|
||||
get_clone()->if_not_exists = false;
|
||||
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool areTableDefinitionsSame(const IAST & table1, const IAST & table2)
|
||||
{
|
||||
auto ast1 = typeid_cast<std::shared_ptr<const ASTCreateQuery>>(table1.shared_from_this());
|
||||
if (!ast1 || !ast1->table)
|
||||
return false;
|
||||
|
||||
auto ast2 = typeid_cast<std::shared_ptr<const ASTCreateQuery>>(table2.shared_from_this());
|
||||
if (!ast2 || !ast2->table)
|
||||
return false;
|
||||
|
||||
if ((ast1->uuid != ast2->uuid) || (ast1->to_inner_uuid != ast2->to_inner_uuid) ||
|
||||
(ast1->if_not_exists != ast2->if_not_exists))
|
||||
{
|
||||
ast1 = prepareDDLToCompare(*ast1);
|
||||
ast2 = prepareDDLToCompare(*ast2);
|
||||
}
|
||||
|
||||
return serializeAST(*ast1) == serializeAST(*ast1);
|
||||
}
|
||||
|
||||
|
||||
bool areDatabaseDefinitionsSame(const IAST & database1, const IAST & database2)
|
||||
{
|
||||
auto ast1 = typeid_cast<std::shared_ptr<const ASTCreateQuery>>(database1.shared_from_this());
|
||||
if (!ast1 || ast1->table || !ast1->database)
|
||||
return false;
|
||||
|
||||
auto ast2 = typeid_cast<std::shared_ptr<const ASTCreateQuery>>(database2.shared_from_this());
|
||||
if (!ast2 || ast2->table || !ast2->database)
|
||||
return false;
|
||||
|
||||
if ((ast1->uuid != ast2->uuid) || (ast1->if_not_exists != ast2->if_not_exists))
|
||||
{
|
||||
ast1 = prepareDDLToCompare(*ast1);
|
||||
ast2 = prepareDDLToCompare(*ast2);
|
||||
}
|
||||
|
||||
return serializeAST(*ast1) == serializeAST(*ast1);
|
||||
}
|
||||
|
||||
|
||||
bool areTableDataCompatible(const IAST & src_table, const IAST & dest_table)
|
||||
{
|
||||
return areTableDefinitionsSame(src_table, dest_table);
|
||||
}
|
||||
|
||||
}
|
17
src/Backups/DDLCompareUtils.h
Normal file
17
src/Backups/DDLCompareUtils.h
Normal file
@ -0,0 +1,17 @@
|
||||
#pragma once
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class IAST;
|
||||
|
||||
/// Checks that two table definitions are actually the same.
|
||||
bool areTableDefinitionsSame(const IAST & table1, const IAST & table2);
|
||||
|
||||
/// Checks that two database definitions are actually the same.
|
||||
bool areDatabaseDefinitionsSame(const IAST & database1, const IAST & database2);
|
||||
|
||||
/// Whether the data from the first table can be attached to the second table.
|
||||
bool areTableDataCompatible(const IAST & src_table, const IAST & dest_table);
|
||||
|
||||
}
|
379
src/Backups/DDLRenamingVisitor.cpp
Normal file
379
src/Backups/DDLRenamingVisitor.cpp
Normal file
@ -0,0 +1,379 @@
|
||||
#include <Backups/DDLRenamingVisitor.h>
|
||||
#include <Interpreters/DatabaseCatalog.h>
|
||||
#include <Interpreters/InDepthNodeVisitor.h>
|
||||
#include <Interpreters/evaluateConstantExpression.h>
|
||||
#include <Parsers/ASTBackupQuery.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTTablesInSelectQuery.h>
|
||||
#include <TableFunctions/TableFunctionFactory.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int WRONG_DDL_RENAMING_SETTINGS;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
/// Replaces names of tables and databases used in a CREATE query, which can be either CREATE TABLE or
|
||||
/// CREATE DICTIONARY or CREATE VIEW or CREATE TEMPORARY TABLE or CREATE DATABASE query.
|
||||
void visitCreateQuery(ASTCreateQuery & create, const DDLRenamingVisitor::Data & data)
|
||||
{
|
||||
if (create.table)
|
||||
{
|
||||
DatabaseAndTableName table_name;
|
||||
table_name.second = create.getTable();
|
||||
if (create.temporary)
|
||||
table_name.first = DatabaseCatalog::TEMPORARY_DATABASE;
|
||||
else if (create.database)
|
||||
table_name.first = create.getDatabase();
|
||||
else
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Database name specified in the CREATE TABLE query must not be empty");
|
||||
|
||||
table_name = data.renaming_settings.getNewTableName(table_name);
|
||||
|
||||
if (table_name.first == DatabaseCatalog::TEMPORARY_DATABASE)
|
||||
{
|
||||
create.temporary = true;
|
||||
create.setDatabase("");
|
||||
}
|
||||
else
|
||||
{
|
||||
create.temporary = false;
|
||||
create.setDatabase(table_name.first);
|
||||
}
|
||||
create.setTable(table_name.second);
|
||||
}
|
||||
else if (create.database)
|
||||
{
|
||||
String database_name = create.getDatabase();
|
||||
database_name = data.renaming_settings.getNewDatabaseName(database_name);
|
||||
create.setDatabase(database_name);
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Database name specified in the CREATE DATABASE query must not be empty");
|
||||
|
||||
if (!create.as_table.empty() && !create.as_database.empty())
|
||||
std::tie(create.as_database, create.as_table) = data.renaming_settings.getNewTableName({create.as_database, create.as_table});
|
||||
|
||||
if (!create.to_table_id.table_name.empty() && !create.to_table_id.database_name.empty())
|
||||
{
|
||||
auto to_table = data.renaming_settings.getNewTableName({create.to_table_id.database_name, create.to_table_id.table_name});
|
||||
create.to_table_id = StorageID{to_table.first, to_table.second};
|
||||
}
|
||||
}
|
||||
|
||||
/// Replaces names of a database and a table in a expression like `db`.`table`
|
||||
void visitTableExpression(ASTTableExpression & expr, const DDLRenamingVisitor::Data & data)
|
||||
{
|
||||
if (!expr.database_and_table_name)
|
||||
return;
|
||||
|
||||
ASTIdentifier * id = expr.database_and_table_name->as<ASTIdentifier>();
|
||||
if (!id)
|
||||
return;
|
||||
|
||||
auto table_id = id->createTable();
|
||||
if (!table_id)
|
||||
return;
|
||||
|
||||
const String & db_name = table_id->getDatabaseName();
|
||||
const String & table_name = table_id->shortName();
|
||||
if (db_name.empty() || table_name.empty())
|
||||
return;
|
||||
|
||||
String new_db_name, new_table_name;
|
||||
std::tie(new_db_name, new_table_name) = data.renaming_settings.getNewTableName({db_name, table_name});
|
||||
if ((new_db_name == db_name) && (new_table_name == table_name))
|
||||
return;
|
||||
|
||||
expr.database_and_table_name = std::make_shared<ASTIdentifier>(Strings{new_db_name, new_table_name});
|
||||
expr.children.push_back(expr.database_and_table_name);
|
||||
}
|
||||
|
||||
/// Replaces a database's name passed via an argument of the function merge() or the table engine Merge.
|
||||
void visitFunctionMerge(ASTFunction & function, const DDLRenamingVisitor::Data & data)
|
||||
{
|
||||
if (!function.arguments)
|
||||
return;
|
||||
|
||||
/// The first argument is a database's name and we can rename it.
|
||||
/// The second argument is a regular expression and we can do nothing about it.
|
||||
auto & args = function.arguments->as<ASTExpressionList &>().children;
|
||||
size_t db_name_arg_index = 0;
|
||||
if (args.size() <= db_name_arg_index)
|
||||
return;
|
||||
|
||||
String db_name = evaluateConstantExpressionForDatabaseName(args[db_name_arg_index], data.context)->as<ASTLiteral &>().value.safeGet<String>();
|
||||
if (db_name.empty())
|
||||
return;
|
||||
|
||||
String new_db_name = data.renaming_settings.getNewDatabaseName(db_name);
|
||||
if (new_db_name == db_name)
|
||||
return;
|
||||
args[db_name_arg_index] = std::make_shared<ASTLiteral>(new_db_name);
|
||||
}
|
||||
|
||||
/// Replaces names of a table and a database passed via arguments of the function remote() or cluster() or the table engine Distributed.
|
||||
void visitFunctionRemote(ASTFunction & function, const DDLRenamingVisitor::Data & data)
|
||||
{
|
||||
if (!function.arguments)
|
||||
return;
|
||||
|
||||
/// The first argument is an address or cluster's name, so we skip it.
|
||||
/// The second argument can be either 'db.name' or just 'db' followed by the third argument 'table'.
|
||||
auto & args = function.arguments->as<ASTExpressionList &>().children;
|
||||
|
||||
const auto * second_arg_as_function = args[1]->as<ASTFunction>();
|
||||
if (second_arg_as_function && TableFunctionFactory::instance().isTableFunctionName(second_arg_as_function->name))
|
||||
return;
|
||||
|
||||
size_t db_name_index = 1;
|
||||
if (args.size() <= db_name_index)
|
||||
return;
|
||||
|
||||
String name = evaluateConstantExpressionForDatabaseName(args[db_name_index], data.context)->as<ASTLiteral &>().value.safeGet<String>();
|
||||
|
||||
size_t table_name_index = static_cast<size_t>(-1);
|
||||
|
||||
QualifiedTableName qualified_name;
|
||||
|
||||
if (function.name == "Distributed")
|
||||
qualified_name.table = name;
|
||||
else
|
||||
qualified_name = QualifiedTableName::parseFromString(name);
|
||||
|
||||
if (qualified_name.database.empty())
|
||||
{
|
||||
std::swap(qualified_name.database, qualified_name.table);
|
||||
table_name_index = 2;
|
||||
if (args.size() <= table_name_index)
|
||||
return;
|
||||
qualified_name.table = evaluateConstantExpressionForDatabaseName(args[table_name_index], data.context)->as<ASTLiteral &>().value.safeGet<String>();
|
||||
}
|
||||
|
||||
const String & db_name = qualified_name.database;
|
||||
const String & table_name = qualified_name.table;
|
||||
|
||||
if (db_name.empty() || table_name.empty())
|
||||
return;
|
||||
|
||||
String new_db_name, new_table_name;
|
||||
std::tie(new_db_name, new_table_name) = data.renaming_settings.getNewTableName({db_name, table_name});
|
||||
if ((new_db_name == db_name) && (new_table_name == table_name))
|
||||
return;
|
||||
|
||||
if (table_name_index != static_cast<size_t>(-1))
|
||||
{
|
||||
if (new_db_name != db_name)
|
||||
args[db_name_index] = std::make_shared<ASTLiteral>(new_db_name);
|
||||
if (new_table_name != table_name)
|
||||
args[table_name_index] = std::make_shared<ASTLiteral>(new_table_name);
|
||||
}
|
||||
else
|
||||
{
|
||||
args[db_name_index] = std::make_shared<ASTLiteral>(new_db_name);
|
||||
args.insert(args.begin() + db_name_index + 1, std::make_shared<ASTLiteral>(new_table_name));
|
||||
}
|
||||
}
|
||||
|
||||
/// Replaces names of tables and databases used in arguments of a table function or a table engine.
|
||||
void visitFunction(ASTFunction & function, const DDLRenamingVisitor::Data & data)
|
||||
{
|
||||
if ((function.name == "merge") || (function.name == "Merge"))
|
||||
{
|
||||
visitFunctionMerge(function, data);
|
||||
}
|
||||
else if ((function.name == "remote") || (function.name == "remoteSecure") || (function.name == "cluster") ||
|
||||
(function.name == "clusterAllReplicas") || (function.name == "Distributed"))
|
||||
{
|
||||
visitFunctionRemote(function, data);
|
||||
}
|
||||
}
|
||||
|
||||
/// Replaces names of a table and a database used in source parameters of a dictionary.
|
||||
void visitDictionary(ASTDictionary & dictionary, const DDLRenamingVisitor::Data & data)
|
||||
{
|
||||
if (!dictionary.source || dictionary.source->name != "clickhouse" || !dictionary.source->elements)
|
||||
return;
|
||||
|
||||
auto & elements = dictionary.source->elements->as<ASTExpressionList &>().children;
|
||||
String db_name, table_name;
|
||||
size_t db_name_index = static_cast<size_t>(-1);
|
||||
size_t table_name_index = static_cast<size_t>(-1);
|
||||
|
||||
for (size_t i = 0; i != elements.size(); ++i)
|
||||
{
|
||||
auto & pair = elements[i]->as<ASTPair &>();
|
||||
if (pair.first == "db")
|
||||
{
|
||||
if (db_name_index != static_cast<size_t>(-1))
|
||||
return;
|
||||
db_name = pair.second->as<ASTLiteral &>().value.safeGet<String>();
|
||||
db_name_index = i;
|
||||
}
|
||||
else if (pair.first == "table")
|
||||
{
|
||||
if (table_name_index != static_cast<size_t>(-1))
|
||||
return;
|
||||
table_name = pair.second->as<ASTLiteral &>().value.safeGet<String>();
|
||||
table_name_index = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (db_name.empty() || table_name.empty())
|
||||
return;
|
||||
|
||||
String new_db_name, new_table_name;
|
||||
std::tie(new_db_name, new_table_name) = data.renaming_settings.getNewTableName({db_name, table_name});
|
||||
if ((new_db_name == db_name) && (new_table_name == table_name))
|
||||
return;
|
||||
|
||||
if (new_db_name != db_name)
|
||||
{
|
||||
auto & pair = elements[db_name_index]->as<ASTPair &>();
|
||||
pair.replace(pair.second, std::make_shared<ASTLiteral>(new_db_name));
|
||||
}
|
||||
if (new_table_name != table_name)
|
||||
{
|
||||
auto & pair = elements[table_name_index]->as<ASTPair &>();
|
||||
pair.replace(pair.second, std::make_shared<ASTLiteral>(new_table_name));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void DDLRenamingSettings::setNewTableName(const DatabaseAndTableName & old_table_name, const DatabaseAndTableName & new_table_name)
|
||||
{
|
||||
auto it = old_to_new_table_names.find(old_table_name);
|
||||
if ((it != old_to_new_table_names.end()))
|
||||
{
|
||||
if (it->second == new_table_name)
|
||||
return;
|
||||
throw Exception(ErrorCodes::WRONG_DDL_RENAMING_SETTINGS, "Wrong renaming: it's specified that table {}.{} should be renamed to {}.{} and to {}.{} at the same time",
|
||||
backQuoteIfNeed(old_table_name.first), backQuoteIfNeed(old_table_name.second),
|
||||
backQuoteIfNeed(it->second.first), backQuoteIfNeed(it->second.second),
|
||||
backQuoteIfNeed(new_table_name.first), backQuoteIfNeed(new_table_name.second));
|
||||
}
|
||||
old_to_new_table_names[old_table_name] = new_table_name;
|
||||
}
|
||||
|
||||
void DDLRenamingSettings::setNewDatabaseName(const String & old_database_name, const String & new_database_name)
|
||||
{
|
||||
auto it = old_to_new_database_names.find(old_database_name);
|
||||
if ((it != old_to_new_database_names.end()))
|
||||
{
|
||||
if (it->second == new_database_name)
|
||||
return;
|
||||
throw Exception(ErrorCodes::WRONG_DDL_RENAMING_SETTINGS, "Wrong renaming: it's specified that database {} should be renamed to {} and to {} at the same time",
|
||||
backQuoteIfNeed(old_database_name), backQuoteIfNeed(it->second), backQuoteIfNeed(new_database_name));
|
||||
}
|
||||
old_to_new_database_names[old_database_name] = new_database_name;
|
||||
}
|
||||
|
||||
void DDLRenamingSettings::setFromBackupQuery(const ASTBackupQuery & backup_query, const String & current_database)
|
||||
{
|
||||
setFromBackupQuery(backup_query.elements, current_database);
|
||||
}
|
||||
|
||||
void DDLRenamingSettings::setFromBackupQuery(const ASTBackupQuery::Elements & backup_query_elements, const String & current_database)
|
||||
{
|
||||
old_to_new_table_names.clear();
|
||||
old_to_new_database_names.clear();
|
||||
|
||||
using ElementType = ASTBackupQuery::ElementType;
|
||||
|
||||
for (const auto & element : backup_query_elements)
|
||||
{
|
||||
switch (element.type)
|
||||
{
|
||||
case ElementType::TABLE:
|
||||
{
|
||||
const String & table_name = element.name.second;
|
||||
String database_name = element.name.first;
|
||||
if (element.name_is_in_temp_db)
|
||||
database_name = DatabaseCatalog::TEMPORARY_DATABASE;
|
||||
else if (database_name.empty())
|
||||
database_name = current_database;
|
||||
|
||||
const String & new_table_name = element.new_name.second;
|
||||
String new_database_name = element.new_name.first;
|
||||
if (element.new_name_is_in_temp_db)
|
||||
new_database_name = DatabaseCatalog::TEMPORARY_DATABASE;
|
||||
else if (new_database_name.empty())
|
||||
new_database_name = current_database;
|
||||
|
||||
setNewTableName({database_name, table_name}, {new_database_name, new_table_name});
|
||||
break;
|
||||
}
|
||||
|
||||
case ASTBackupQuery::DATABASE:
|
||||
{
|
||||
String database_name = element.name.first;
|
||||
if (element.name_is_in_temp_db)
|
||||
database_name = DatabaseCatalog::TEMPORARY_DATABASE;
|
||||
|
||||
String new_database_name = element.new_name.first;
|
||||
if (element.new_name_is_in_temp_db)
|
||||
new_database_name = DatabaseCatalog::TEMPORARY_DATABASE;
|
||||
|
||||
setNewDatabaseName(database_name, new_database_name);
|
||||
break;
|
||||
}
|
||||
|
||||
case ASTBackupQuery::ALL_DATABASES: break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DatabaseAndTableName DDLRenamingSettings::getNewTableName(const DatabaseAndTableName & old_table_name) const
|
||||
{
|
||||
auto it = old_to_new_table_names.find(old_table_name);
|
||||
if (it != old_to_new_table_names.end())
|
||||
return it->second;
|
||||
return {getNewDatabaseName(old_table_name.first), old_table_name.second};
|
||||
}
|
||||
|
||||
const String & DDLRenamingSettings::getNewDatabaseName(const String & old_database_name) const
|
||||
{
|
||||
auto it = old_to_new_database_names.find(old_database_name);
|
||||
if (it != old_to_new_database_names.end())
|
||||
return it->second;
|
||||
return old_database_name;
|
||||
}
|
||||
|
||||
|
||||
bool DDLRenamingVisitor::needChildVisit(ASTPtr &, const ASTPtr &) { return true; }
|
||||
|
||||
void DDLRenamingVisitor::visit(ASTPtr & ast, const Data & data)
|
||||
{
|
||||
if (auto * create = ast->as<ASTCreateQuery>())
|
||||
visitCreateQuery(*create, data);
|
||||
else if (auto * expr = ast->as<ASTTableExpression>())
|
||||
visitTableExpression(*expr, data);
|
||||
else if (auto * function = ast->as<ASTFunction>())
|
||||
visitFunction(*function, data);
|
||||
else if (auto * dictionary = ast->as<ASTDictionary>())
|
||||
visitDictionary(*dictionary, data);
|
||||
}
|
||||
|
||||
void renameInCreateQuery(ASTPtr & ast, const ContextPtr & global_context, const DDLRenamingSettings & renaming_settings)
|
||||
{
|
||||
try
|
||||
{
|
||||
DDLRenamingVisitor::Data data{renaming_settings, global_context};
|
||||
DDLRenamingVisitor::Visitor{data}.visit(ast);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException("Backup", "Error while renaming in AST");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
61
src/Backups/DDLRenamingVisitor.h
Normal file
61
src/Backups/DDLRenamingVisitor.h
Normal file
@ -0,0 +1,61 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/Types.h>
|
||||
#include <Interpreters/InDepthNodeVisitor.h>
|
||||
#include <Parsers/ASTBackupQuery.h>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
using DatabaseAndTableName = std::pair<String, String>;
|
||||
class IAST;
|
||||
using ASTPtr = std::shared_ptr<IAST>;
|
||||
class Context;
|
||||
using ContextPtr = std::shared_ptr<const Context>;
|
||||
|
||||
/// Keeps information about renamings of databases or tables being processed
|
||||
/// while we're making a backup or while we're restoring from a backup.
|
||||
class DDLRenamingSettings
|
||||
{
|
||||
public:
|
||||
DDLRenamingSettings() = default;
|
||||
|
||||
void setNewTableName(const DatabaseAndTableName & old_table_name, const DatabaseAndTableName & new_table_name);
|
||||
void setNewDatabaseName(const String & old_database_name, const String & new_database_name);
|
||||
|
||||
void setFromBackupQuery(const ASTBackupQuery & backup_query, const String & current_database);
|
||||
void setFromBackupQuery(const ASTBackupQuery::Elements & backup_query_elements, const String & current_database);
|
||||
|
||||
/// Changes names according to the renaming.
|
||||
DatabaseAndTableName getNewTableName(const DatabaseAndTableName & old_table_name) const;
|
||||
const String & getNewDatabaseName(const String & old_database_name) const;
|
||||
|
||||
private:
|
||||
std::map<DatabaseAndTableName, DatabaseAndTableName> old_to_new_table_names;
|
||||
std::unordered_map<String, String> old_to_new_database_names;
|
||||
};
|
||||
|
||||
|
||||
/// Changes names in AST according to the renaming settings.
|
||||
void renameInCreateQuery(ASTPtr & ast, const ContextPtr & global_context, const DDLRenamingSettings & renaming_settings);
|
||||
|
||||
/// Visits ASTCreateQuery and changes names of tables and databases according to passed DDLRenamingConfig.
|
||||
class DDLRenamingVisitor
|
||||
{
|
||||
public:
|
||||
struct Data
|
||||
{
|
||||
const DDLRenamingSettings & renaming_settings;
|
||||
ContextPtr context;
|
||||
};
|
||||
|
||||
using Visitor = InDepthNodeVisitor<DDLRenamingVisitor, false>;
|
||||
|
||||
static bool needChildVisit(ASTPtr &, const ASTPtr &);
|
||||
static void visit(ASTPtr & ast, const Data & data);
|
||||
};
|
||||
|
||||
}
|
82
src/Backups/DirectoryBackup.cpp
Normal file
82
src/Backups/DirectoryBackup.cpp
Normal file
@ -0,0 +1,82 @@
|
||||
#include <Backups/DirectoryBackup.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <Disks/IDisk.h>
|
||||
#include <Disks/DiskLocal.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
|
||||
DirectoryBackup::DirectoryBackup(
|
||||
const String & backup_name_,
|
||||
const DiskPtr & disk_,
|
||||
const String & path_,
|
||||
const ContextPtr & context_,
|
||||
const std::optional<BackupInfo> & base_backup_info_)
|
||||
: BackupImpl(backup_name_, context_, base_backup_info_)
|
||||
, disk(disk_), path(path_)
|
||||
{
|
||||
/// Path to backup must end with '/'
|
||||
if (!path.ends_with("/"))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Backup {}: Path to backup must end with '/', but {} doesn't.", getName(), quoteString(path));
|
||||
dir_path = fs::path(path).parent_path(); /// get path without terminating slash
|
||||
|
||||
/// If `disk` is not specified, we create an internal instance of `DiskLocal` here.
|
||||
if (!disk)
|
||||
{
|
||||
auto fspath = fs::path{dir_path};
|
||||
if (!fspath.has_filename())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Backup {}: Path to a backup must be a directory path.", getName(), quoteString(path));
|
||||
path = fspath.filename() / "";
|
||||
dir_path = fs::path(path).parent_path(); /// get path without terminating slash
|
||||
String disk_path = fspath.remove_filename();
|
||||
disk = std::make_shared<DiskLocal>(disk_path, disk_path, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
DirectoryBackup::~DirectoryBackup()
|
||||
{
|
||||
close();
|
||||
}
|
||||
|
||||
bool DirectoryBackup::backupExists() const
|
||||
{
|
||||
return disk->isDirectory(dir_path);
|
||||
}
|
||||
|
||||
void DirectoryBackup::openImpl(OpenMode open_mode_)
|
||||
{
|
||||
if (open_mode_ == OpenMode::WRITE)
|
||||
disk->createDirectories(dir_path);
|
||||
}
|
||||
|
||||
void DirectoryBackup::closeImpl(bool writing_finalized_)
|
||||
{
|
||||
if ((getOpenModeNoLock() == OpenMode::WRITE) && !writing_finalized_ && disk->isDirectory(dir_path))
|
||||
{
|
||||
/// Creating of the backup wasn't finished correctly,
|
||||
/// so the backup cannot be used and it's better to remove its files.
|
||||
disk->removeRecursive(dir_path);
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBuffer> DirectoryBackup::readFileImpl(const String & file_name) const
|
||||
{
|
||||
String file_path = path + file_name;
|
||||
return disk->readFile(file_path);
|
||||
}
|
||||
|
||||
std::unique_ptr<WriteBuffer> DirectoryBackup::writeFileImpl(const String & file_name)
|
||||
{
|
||||
String file_path = path + file_name;
|
||||
disk->createDirectories(fs::path(file_path).parent_path());
|
||||
return disk->writeFile(file_path);
|
||||
}
|
||||
|
||||
}
|
@ -10,25 +10,24 @@ using DiskPtr = std::shared_ptr<IDisk>;
|
||||
|
||||
/// Represents a backup stored on a disk.
|
||||
/// A backup is stored as a directory, each entry is stored as a file in that directory.
|
||||
class BackupInDirectory : public BackupImpl
|
||||
class DirectoryBackup : public BackupImpl
|
||||
{
|
||||
public:
|
||||
/// `disk`_ is allowed to be nullptr and that means the `path_` is a path in the local filesystem.
|
||||
BackupInDirectory(
|
||||
DirectoryBackup(
|
||||
const String & backup_name_,
|
||||
OpenMode open_mode_,
|
||||
const DiskPtr & disk_,
|
||||
const String & path_,
|
||||
const ContextPtr & context_,
|
||||
const std::optional<BackupInfo> & base_backup_info_ = {});
|
||||
~BackupInDirectory() override;
|
||||
~DirectoryBackup() override;
|
||||
|
||||
private:
|
||||
bool backupExists() const override;
|
||||
void startWriting() override;
|
||||
void removeAllFilesAfterFailure() override;
|
||||
void openImpl(OpenMode open_mode_) override;
|
||||
void closeImpl(bool writing_finalized_) override;
|
||||
std::unique_ptr<ReadBuffer> readFileImpl(const String & file_name) const override;
|
||||
std::unique_ptr<WriteBuffer> addFileImpl(const String & file_name) override;
|
||||
std::unique_ptr<WriteBuffer> writeFileImpl(const String & file_name) override;
|
||||
|
||||
DiskPtr disk;
|
||||
String path;
|
@ -1,8 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/Types.h>
|
||||
#include <Common/TypePromotion.h>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -13,7 +13,7 @@ using BackupEntryPtr = std::unique_ptr<IBackupEntry>;
|
||||
/// Represents a backup, i.e. a storage of BackupEntries which can be accessed by their names.
|
||||
/// A backup can be either incremental or non-incremental. An incremental backup doesn't store
|
||||
/// the data of the entries which are not changed compared to its base backup.
|
||||
class IBackup : public std::enable_shared_from_this<IBackup>, public TypePromotion<IBackup>
|
||||
class IBackup : public std::enable_shared_from_this<IBackup>
|
||||
{
|
||||
public:
|
||||
IBackup() = default;
|
||||
@ -24,13 +24,18 @@ public:
|
||||
|
||||
enum class OpenMode
|
||||
{
|
||||
NONE,
|
||||
READ,
|
||||
WRITE,
|
||||
};
|
||||
|
||||
/// A backup can be open either in READ or WRITE mode.
|
||||
/// Opens the backup and start its reading or writing depending on `open_mode`.
|
||||
virtual void open(OpenMode open_mode) = 0;
|
||||
virtual OpenMode getOpenMode() const = 0;
|
||||
|
||||
/// Closes the backup and ends its reading or writing.
|
||||
virtual void close() = 0;
|
||||
|
||||
/// Returns the time point when this backup was created.
|
||||
virtual time_t getTimestamp() const = 0;
|
||||
|
||||
@ -57,17 +62,20 @@ public:
|
||||
/// This function does the same as `read(file_name)->getCheckum()` but faster.
|
||||
virtual UInt128 getFileChecksum(const String & file_name) const = 0;
|
||||
|
||||
/// Finds a file by its checksum, returns nullopt if not found.
|
||||
virtual std::optional<String> findFileByChecksum(const UInt128 & checksum) const = 0;
|
||||
|
||||
/// Reads an entry from the backup.
|
||||
virtual BackupEntryPtr readFile(const String & file_name) const = 0;
|
||||
|
||||
/// Puts a new entry to the backup.
|
||||
virtual void addFile(const String & file_name, BackupEntryPtr entry) = 0;
|
||||
|
||||
/// Whether it's possible to add new entries to the backup in multiple threads.
|
||||
virtual bool supportsWritingInMultipleThreads() const { return true; }
|
||||
virtual void writeFile(const String & file_name, BackupEntryPtr entry) = 0;
|
||||
|
||||
/// Finalizes writing the backup, should be called after all entries have been successfully written.
|
||||
virtual void finalizeWriting() = 0;
|
||||
|
||||
/// Whether it's possible to add new entries to the backup in multiple threads.
|
||||
virtual bool supportsWritingInMultipleThreads() const { return true; }
|
||||
};
|
||||
|
||||
using BackupPtr = std::shared_ptr<const IBackup>;
|
||||
|
37
src/Backups/IBackupEntriesBatch.cpp
Normal file
37
src/Backups/IBackupEntriesBatch.cpp
Normal file
@ -0,0 +1,37 @@
|
||||
#include <Backups/IBackupEntriesBatch.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class IBackupEntriesBatch::BackupEntryFromBatch : public IBackupEntry
|
||||
{
|
||||
public:
|
||||
BackupEntryFromBatch(const std::shared_ptr<IBackupEntriesBatch> & generator_, size_t index_) : batch(generator_), index(index_)
|
||||
{
|
||||
assert(batch);
|
||||
}
|
||||
|
||||
UInt64 getSize() const override { return batch->getSize(index); }
|
||||
std::optional<UInt128> getChecksum() const override { return batch->getChecksum(index); }
|
||||
std::unique_ptr<ReadBuffer> getReadBuffer() const override { return batch->getReadBuffer(index); }
|
||||
|
||||
private:
|
||||
const std::shared_ptr<IBackupEntriesBatch> batch;
|
||||
const size_t index;
|
||||
};
|
||||
|
||||
|
||||
BackupEntries IBackupEntriesBatch::getBackupEntries()
|
||||
{
|
||||
BackupEntries res;
|
||||
res.reserve(entry_names.size());
|
||||
for (size_t i = 0; i != entry_names.size(); ++i)
|
||||
{
|
||||
res.emplace_back(entry_names[i], std::make_unique<BackupEntryFromBatch>(shared_from_this(), i));
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
29
src/Backups/IBackupEntriesBatch.h
Normal file
29
src/Backups/IBackupEntriesBatch.h
Normal file
@ -0,0 +1,29 @@
|
||||
#pragma once
|
||||
|
||||
#include <Backups/IBackupEntry.h>
|
||||
#include <mutex>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Helper class designed to generate multiple backup entries from one source.
|
||||
class IBackupEntriesBatch : public std::enable_shared_from_this<IBackupEntriesBatch>
|
||||
{
|
||||
public:
|
||||
BackupEntries getBackupEntries();
|
||||
|
||||
virtual ~IBackupEntriesBatch() = default;
|
||||
|
||||
protected:
|
||||
IBackupEntriesBatch(const Strings & entry_names_) : entry_names(entry_names_) {}
|
||||
|
||||
virtual std::unique_ptr<ReadBuffer> getReadBuffer(size_t index) = 0;
|
||||
virtual UInt64 getSize(size_t index) = 0;
|
||||
virtual std::optional<UInt128> getChecksum(size_t) { return {}; }
|
||||
|
||||
private:
|
||||
class BackupEntryFromBatch;
|
||||
const Strings entry_names;
|
||||
};
|
||||
|
||||
}
|
31
src/Backups/IRestoreTask.h
Normal file
31
src/Backups/IRestoreTask.h
Normal file
@ -0,0 +1,31 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Represents a task of restoring something (database / table / table's part) from backup.
|
||||
class IRestoreTask
|
||||
{
|
||||
public:
|
||||
IRestoreTask() = default;
|
||||
virtual ~IRestoreTask() = default;
|
||||
|
||||
/// Perform restoring, the function also can return a list of nested tasks that should be run later.
|
||||
virtual std::vector<std::unique_ptr<IRestoreTask>> run() = 0;
|
||||
|
||||
/// Is it necessary to run this task sequentially?
|
||||
/// Sequential tasks are executed first and strictly in one thread.
|
||||
virtual bool isSequential() const { return false; }
|
||||
|
||||
/// Reverts the effect of run(). If that's not possible, the function does nothing.
|
||||
virtual void rollback() {}
|
||||
};
|
||||
|
||||
using RestoreTaskPtr = std::unique_ptr<IRestoreTask>;
|
||||
using RestoreTasks = std::vector<RestoreTaskPtr>;
|
||||
|
||||
}
|
47
src/Backups/RestoreSettings.cpp
Normal file
47
src/Backups/RestoreSettings.cpp
Normal file
@ -0,0 +1,47 @@
|
||||
#include <Backups/RestoreSettings.h>
|
||||
#include <Backups/BackupInfo.h>
|
||||
#include <Core/SettingsFields.h>
|
||||
#include <Parsers/ASTBackupQuery.h>
|
||||
#include <Parsers/ASTSetQuery.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int UNKNOWN_SETTING;
|
||||
}
|
||||
|
||||
RestoreSettings RestoreSettings::fromRestoreQuery(const ASTBackupQuery & query)
|
||||
{
|
||||
RestoreSettings res;
|
||||
|
||||
if (query.base_backup_name)
|
||||
res.base_backup_info = BackupInfo::fromAST(*query.base_backup_name);
|
||||
|
||||
if (query.settings)
|
||||
{
|
||||
const auto & settings = query.settings->as<const ASTSetQuery &>().changes;
|
||||
for (const auto & setting : settings)
|
||||
{
|
||||
if (setting.name == "password")
|
||||
res.password = SettingFieldString{setting.value};
|
||||
else if (setting.name == "structure_only")
|
||||
res.structure_only = SettingFieldBool{setting.value};
|
||||
else if (setting.name == "throw_if_database_exists")
|
||||
res.throw_if_database_exists = SettingFieldBool{setting.value};
|
||||
else if (setting.name == "throw_if_table_exists")
|
||||
res.throw_if_table_exists = SettingFieldBool{setting.value};
|
||||
else if (setting.name == "throw_if_database_def_differs")
|
||||
res.throw_if_database_def_differs = SettingFieldBool{setting.value};
|
||||
else if (setting.name == "throw_if_table_def_differs")
|
||||
res.throw_if_table_def_differs = SettingFieldBool{setting.value};
|
||||
else
|
||||
throw Exception(ErrorCodes::UNKNOWN_SETTING, "Unknown setting {}", setting.name);
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
47
src/Backups/RestoreSettings.h
Normal file
47
src/Backups/RestoreSettings.h
Normal file
@ -0,0 +1,47 @@
|
||||
#pragma once
|
||||
|
||||
#include <Backups/BackupInfo.h>
|
||||
#include <optional>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class ASTBackupQuery;
|
||||
|
||||
struct StorageRestoreSettings
|
||||
{
|
||||
};
|
||||
|
||||
/// Settings specified in the "SETTINGS" clause of a RESTORE query.
|
||||
struct RestoreSettings : public StorageRestoreSettings
|
||||
{
|
||||
/// Base backup, with this setting we can override the location of the base backup while restoring.
|
||||
/// Any incremental backup keeps inside the information about its base backup,
|
||||
/// so using this setting is optional.
|
||||
std::optional<BackupInfo> base_backup_info;
|
||||
|
||||
/// Password used to decrypt the backup.
|
||||
String password;
|
||||
|
||||
/// If this is set to true then only create queries will be read from backup,
|
||||
/// without the data of tables.
|
||||
bool structure_only = false;
|
||||
|
||||
/// Whether RESTORE DATABASE must throw an exception if a destination database already exists.
|
||||
bool throw_if_database_exists = true;
|
||||
|
||||
/// Whether RESTORE TABLE must throw an exception if a destination table already exists.
|
||||
bool throw_if_table_exists = true;
|
||||
|
||||
/// Whether RESTORE DATABASE must throw an exception if a destination database has
|
||||
/// a different definition comparing with the definition read from backup.
|
||||
bool throw_if_database_def_differs = true;
|
||||
|
||||
/// Whether RESTORE TABLE must throw an exception if a destination table has
|
||||
/// a different definition comparing with the definition read from backup.
|
||||
bool throw_if_table_def_differs = true;
|
||||
|
||||
static RestoreSettings fromRestoreQuery(const ASTBackupQuery & query);
|
||||
};
|
||||
|
||||
}
|
685
src/Backups/RestoreUtils.cpp
Normal file
685
src/Backups/RestoreUtils.cpp
Normal file
@ -0,0 +1,685 @@
|
||||
#include <Backups/RestoreUtils.h>
|
||||
#include <Backups/BackupUtils.h>
|
||||
#include <Backups/DDLCompareUtils.h>
|
||||
#include <Backups/DDLRenamingVisitor.h>
|
||||
#include <Backups/IBackup.h>
|
||||
#include <Backups/IBackupEntry.h>
|
||||
#include <Backups/IRestoreTask.h>
|
||||
#include <Backups/RestoreSettings.h>
|
||||
#include <Backups/formatTableNameOrTemporaryTableName.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <Databases/IDatabase.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/InterpreterCreateQuery.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/ParserCreateQuery.h>
|
||||
#include <Parsers/formatAST.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <boost/range/adaptor/reversed.hpp>
|
||||
#include <filesystem>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_RESTORE_TABLE;
|
||||
extern const int CANNOT_RESTORE_DATABASE;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
using Kind = ASTBackupQuery::Kind;
|
||||
using Element = ASTBackupQuery::Element;
|
||||
using Elements = ASTBackupQuery::Elements;
|
||||
using ElementType = ASTBackupQuery::ElementType;
|
||||
using RestoreSettingsPtr = std::shared_ptr<const RestoreSettings>;
|
||||
|
||||
|
||||
/// Restores a database (without tables inside), should be executed before executing
|
||||
/// RestoreTableTask.
|
||||
class RestoreDatabaseTask : public IRestoreTask
|
||||
{
|
||||
public:
|
||||
RestoreDatabaseTask(
|
||||
ContextMutablePtr context_,
|
||||
const ASTPtr & create_query_,
|
||||
const RestoreSettingsPtr & restore_settings_,
|
||||
bool ignore_if_database_def_differs_)
|
||||
: context(context_)
|
||||
, create_query(typeid_cast<std::shared_ptr<ASTCreateQuery>>(create_query_))
|
||||
, restore_settings(restore_settings_)
|
||||
, ignore_if_database_def_differs(ignore_if_database_def_differs_)
|
||||
{
|
||||
}
|
||||
|
||||
RestoreTasks run() override
|
||||
{
|
||||
createDatabase();
|
||||
getDatabase();
|
||||
checkDatabaseCreateQuery();
|
||||
return {};
|
||||
}
|
||||
|
||||
bool isSequential() const override { return true; }
|
||||
|
||||
private:
|
||||
void createDatabase()
|
||||
{
|
||||
/// We need to call clone() for `create_query` because the interpreter can decide
|
||||
/// to change a passed AST a little bit.
|
||||
InterpreterCreateQuery create_interpreter{create_query->clone(), context};
|
||||
create_interpreter.execute();
|
||||
}
|
||||
|
||||
DatabasePtr getDatabase()
|
||||
{
|
||||
if (!database)
|
||||
database = DatabaseCatalog::instance().getDatabase(create_query->getDatabase());
|
||||
return database;
|
||||
}
|
||||
|
||||
ASTPtr getDatabaseCreateQuery()
|
||||
{
|
||||
if (!database_create_query)
|
||||
database_create_query = getDatabase()->getCreateDatabaseQuery();
|
||||
return database_create_query;
|
||||
}
|
||||
|
||||
void checkDatabaseCreateQuery()
|
||||
{
|
||||
if (ignore_if_database_def_differs || !restore_settings->throw_if_database_def_differs)
|
||||
return;
|
||||
|
||||
getDatabaseCreateQuery();
|
||||
if (areDatabaseDefinitionsSame(*create_query, *database_create_query))
|
||||
return;
|
||||
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_RESTORE_DATABASE,
|
||||
"The database {} already exists but has a different definition: {}, "
|
||||
"compare to its definition in the backup: {}",
|
||||
backQuoteIfNeed(create_query->getDatabase()),
|
||||
serializeAST(*database_create_query),
|
||||
serializeAST(*create_query));
|
||||
}
|
||||
|
||||
ContextMutablePtr context;
|
||||
std::shared_ptr<ASTCreateQuery> create_query;
|
||||
RestoreSettingsPtr restore_settings;
|
||||
bool ignore_if_database_def_differs = false;
|
||||
DatabasePtr database;
|
||||
ASTPtr database_create_query;
|
||||
};
|
||||
|
||||
|
||||
/// Restores a table and fills it with data.
|
||||
class RestoreTableTask : public IRestoreTask
|
||||
{
|
||||
public:
|
||||
RestoreTableTask(
|
||||
ContextMutablePtr context_,
|
||||
const ASTPtr & create_query_,
|
||||
const ASTs & partitions_,
|
||||
const BackupPtr & backup_,
|
||||
const DatabaseAndTableName & table_name_in_backup_,
|
||||
const RestoreSettingsPtr & restore_settings_)
|
||||
: context(context_), create_query(typeid_cast<std::shared_ptr<ASTCreateQuery>>(create_query_)),
|
||||
partitions(partitions_), backup(backup_), table_name_in_backup(table_name_in_backup_),
|
||||
restore_settings(restore_settings_)
|
||||
{
|
||||
table_name = DatabaseAndTableName{create_query->getDatabase(), create_query->getTable()};
|
||||
if (create_query->temporary)
|
||||
table_name.first = DatabaseCatalog::TEMPORARY_DATABASE;
|
||||
}
|
||||
|
||||
RestoreTasks run() override
|
||||
{
|
||||
createStorage();
|
||||
getStorage();
|
||||
checkStorageCreateQuery();
|
||||
RestoreTasks tasks;
|
||||
if (auto task = insertData())
|
||||
tasks.push_back(std::move(task));
|
||||
return tasks;
|
||||
}
|
||||
|
||||
bool isSequential() const override { return true; }
|
||||
|
||||
private:
|
||||
void createStorage()
|
||||
{
|
||||
/// We need to call clone() for `create_query` because the interpreter can decide
|
||||
/// to change a passed AST a little bit.
|
||||
InterpreterCreateQuery create_interpreter{create_query->clone(), context};
|
||||
create_interpreter.execute();
|
||||
}
|
||||
|
||||
StoragePtr getStorage()
|
||||
{
|
||||
if (!storage)
|
||||
std::tie(database, storage) = DatabaseCatalog::instance().getDatabaseAndTable({table_name.first, table_name.second}, context);
|
||||
return storage;
|
||||
}
|
||||
|
||||
ASTPtr getStorageCreateQuery()
|
||||
{
|
||||
if (!storage_create_query)
|
||||
{
|
||||
getStorage();
|
||||
storage_create_query = database->getCreateTableQuery(table_name.second, context);
|
||||
}
|
||||
return storage_create_query;
|
||||
}
|
||||
|
||||
void checkStorageCreateQuery()
|
||||
{
|
||||
if (!restore_settings->throw_if_table_def_differs)
|
||||
return;
|
||||
|
||||
getStorageCreateQuery();
|
||||
if (areTableDefinitionsSame(*create_query, *storage_create_query))
|
||||
return;
|
||||
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_RESTORE_TABLE,
|
||||
"The {} already exists but has a different definition: {}, "
|
||||
"compare to its definition in the backup: {}",
|
||||
formatTableNameOrTemporaryTableName(table_name),
|
||||
serializeAST(*storage_create_query),
|
||||
serializeAST(*create_query));
|
||||
}
|
||||
|
||||
bool hasData()
|
||||
{
|
||||
if (has_data)
|
||||
return *has_data;
|
||||
|
||||
has_data = false;
|
||||
if (restore_settings->structure_only)
|
||||
return false;
|
||||
|
||||
data_path_in_backup = getDataPathInBackup(table_name_in_backup);
|
||||
if (backup->listFiles(data_path_in_backup).empty())
|
||||
return false;
|
||||
|
||||
getStorageCreateQuery();
|
||||
if (!areTableDataCompatible(*create_query, *storage_create_query))
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_RESTORE_TABLE,
|
||||
"Cannot attach data of the {} in the backup to the existing {} because of they are not compatible. "
|
||||
"Here is the definition of the {} in the backup: {}, and here is the definition of the existing {}: {}",
|
||||
formatTableNameOrTemporaryTableName(table_name_in_backup),
|
||||
formatTableNameOrTemporaryTableName(table_name),
|
||||
formatTableNameOrTemporaryTableName(table_name_in_backup),
|
||||
serializeAST(*create_query),
|
||||
formatTableNameOrTemporaryTableName(table_name),
|
||||
serializeAST(*storage_create_query));
|
||||
|
||||
/// We check for INSERT privilege only if we're going to write into table.
|
||||
context->checkAccess(AccessType::INSERT, table_name.first, table_name.second);
|
||||
|
||||
has_data = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
RestoreTaskPtr insertData()
|
||||
{
|
||||
if (!hasData())
|
||||
return {};
|
||||
return storage->restoreData(context, partitions, backup, data_path_in_backup, *restore_settings);
|
||||
}
|
||||
|
||||
ContextMutablePtr context;
|
||||
std::shared_ptr<ASTCreateQuery> create_query;
|
||||
DatabaseAndTableName table_name;
|
||||
ASTs partitions;
|
||||
BackupPtr backup;
|
||||
DatabaseAndTableName table_name_in_backup;
|
||||
RestoreSettingsPtr restore_settings;
|
||||
DatabasePtr database;
|
||||
StoragePtr storage;
|
||||
ASTPtr storage_create_query;
|
||||
std::optional<bool> has_data;
|
||||
String data_path_in_backup;
|
||||
};
|
||||
|
||||
|
||||
/// Makes tasks for restoring databases and tables according to the elements of ASTBackupQuery.
|
||||
/// Keep this class consistent with BackupEntriesBuilder.
|
||||
class RestoreTasksBuilder
|
||||
{
|
||||
public:
|
||||
RestoreTasksBuilder(ContextMutablePtr context_, const BackupPtr & backup_, const RestoreSettings & restore_settings_)
|
||||
: context(context_), backup(backup_), restore_settings(restore_settings_) {}
|
||||
|
||||
/// Prepares internal structures for making tasks for restoring.
|
||||
void prepare(const ASTBackupQuery::Elements & elements)
|
||||
{
|
||||
String current_database = context->getCurrentDatabase();
|
||||
renaming_settings.setFromBackupQuery(elements, current_database);
|
||||
|
||||
for (const auto & element : elements)
|
||||
{
|
||||
switch (element.type)
|
||||
{
|
||||
case ElementType::TABLE:
|
||||
{
|
||||
const String & table_name = element.name.second;
|
||||
String database_name = element.name.first;
|
||||
if (database_name.empty())
|
||||
database_name = current_database;
|
||||
prepareToRestoreTable(DatabaseAndTableName{database_name, table_name}, element.partitions);
|
||||
break;
|
||||
}
|
||||
|
||||
case ElementType::DATABASE:
|
||||
{
|
||||
const String & database_name = element.name.first;
|
||||
prepareToRestoreDatabase(database_name, element.except_list);
|
||||
break;
|
||||
}
|
||||
|
||||
case ElementType::ALL_DATABASES:
|
||||
{
|
||||
prepareToRestoreAllDatabases(element.except_list);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Makes tasks for restoring, should be called after prepare().
|
||||
RestoreTasks makeTasks() const
|
||||
{
|
||||
/// Check that there are not `different_create_query`. (If it's set it means error.)
|
||||
for (const auto & info : databases | boost::adaptors::map_values)
|
||||
{
|
||||
if (info.different_create_query)
|
||||
throw Exception(ErrorCodes::CANNOT_RESTORE_DATABASE,
|
||||
"Cannot restore a database because two different create queries were generated for it: {} and {}",
|
||||
serializeAST(*info.create_query), serializeAST(*info.different_create_query));
|
||||
}
|
||||
|
||||
auto restore_settings_ptr = std::make_shared<const RestoreSettings>(restore_settings);
|
||||
|
||||
RestoreTasks res;
|
||||
for (const auto & info : databases | boost::adaptors::map_values)
|
||||
res.push_back(std::make_unique<RestoreDatabaseTask>(context, info.create_query, restore_settings_ptr,
|
||||
/* ignore_if_database_def_differs = */ !info.is_explicit));
|
||||
|
||||
/// TODO: We need to restore tables according to their dependencies.
|
||||
for (const auto & info : tables | boost::adaptors::map_values)
|
||||
res.push_back(std::make_unique<RestoreTableTask>(context, info.create_query, info.partitions, backup, info.name_in_backup, restore_settings_ptr));
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
private:
|
||||
/// Prepares to restore a single table and probably its database's definition.
|
||||
void prepareToRestoreTable(const DatabaseAndTableName & table_name_, const ASTs & partitions_)
|
||||
{
|
||||
/// Check that we are not trying to restore the same table again.
|
||||
DatabaseAndTableName new_table_name = renaming_settings.getNewTableName(table_name_);
|
||||
if (tables.contains(new_table_name))
|
||||
throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Cannot restore the {} twice", formatTableNameOrTemporaryTableName(new_table_name));
|
||||
|
||||
/// Make a create query for this table.
|
||||
auto create_query = renameInCreateQuery(readCreateQueryFromBackup(table_name_));
|
||||
create_query->if_not_exists = !restore_settings.throw_if_table_exists;
|
||||
|
||||
CreateTableInfo info;
|
||||
info.create_query = create_query;
|
||||
info.name_in_backup = table_name_;
|
||||
info.partitions = partitions_;
|
||||
tables[new_table_name] = std::move(info);
|
||||
|
||||
/// If it's not system or temporary database then probably we need to restore the database's definition too.
|
||||
if (!isSystemOrTemporaryDatabase(new_table_name.first))
|
||||
{
|
||||
if (!databases.contains(new_table_name.first))
|
||||
{
|
||||
/// Add a create query for restoring the database if we haven't done it yet.
|
||||
std::shared_ptr<ASTCreateQuery> create_db_query;
|
||||
String db_name_in_backup = table_name_.first;
|
||||
if (hasCreateQueryInBackup(db_name_in_backup))
|
||||
{
|
||||
create_db_query = renameInCreateQuery(readCreateQueryFromBackup(db_name_in_backup));
|
||||
}
|
||||
else
|
||||
{
|
||||
create_db_query = std::make_shared<ASTCreateQuery>();
|
||||
db_name_in_backup.clear();
|
||||
}
|
||||
create_db_query->setDatabase(new_table_name.first);
|
||||
create_db_query->if_not_exists = true;
|
||||
|
||||
CreateDatabaseInfo info_db;
|
||||
info_db.create_query = create_db_query;
|
||||
info_db.name_in_backup = std::move(db_name_in_backup);
|
||||
info_db.is_explicit = false;
|
||||
databases[new_table_name.first] = std::move(info_db);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// We already have added a create query for restoring the database,
|
||||
/// set `different_create_query` if it's not the same.
|
||||
auto & info_db = databases[new_table_name.first];
|
||||
if (!info_db.is_explicit && (info_db.name_in_backup != table_name_.first) && !info_db.different_create_query)
|
||||
{
|
||||
std::shared_ptr<ASTCreateQuery> create_db_query;
|
||||
if (hasCreateQueryInBackup(table_name_.first))
|
||||
create_db_query = renameInCreateQuery(readCreateQueryFromBackup(table_name_.first));
|
||||
else
|
||||
create_db_query = std::make_shared<ASTCreateQuery>();
|
||||
create_db_query->setDatabase(new_table_name.first);
|
||||
create_db_query->if_not_exists = true;
|
||||
if (!areDatabaseDefinitionsSame(*info_db.create_query, *create_db_query))
|
||||
info_db.different_create_query = create_db_query;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Prepares to restore a database and all tables in it.
|
||||
void prepareToRestoreDatabase(const String & database_name_, const std::set<String> & except_list_)
|
||||
{
|
||||
/// Check that we are not trying to restore the same database again.
|
||||
String new_database_name = renaming_settings.getNewDatabaseName(database_name_);
|
||||
if (databases.contains(new_database_name) && databases[new_database_name].is_explicit)
|
||||
throw Exception(ErrorCodes::CANNOT_RESTORE_DATABASE, "Cannot restore the database {} twice", backQuoteIfNeed(new_database_name));
|
||||
|
||||
Strings table_metadata_filenames = backup->listFiles("metadata/" + escapeForFileName(database_name_) + "/", "/");
|
||||
|
||||
bool throw_if_no_create_database_query = table_metadata_filenames.empty();
|
||||
if (throw_if_no_create_database_query && !hasCreateQueryInBackup(database_name_))
|
||||
throw Exception(ErrorCodes::CANNOT_RESTORE_DATABASE, "Cannot restore the database {} because there is no such database in the backup", backQuoteIfNeed(database_name_));
|
||||
|
||||
/// Of course we're not going to restore the definition of the system or the temporary database.
|
||||
if (!isSystemOrTemporaryDatabase(new_database_name))
|
||||
{
|
||||
/// Make a create query for this database.
|
||||
std::shared_ptr<ASTCreateQuery> create_db_query;
|
||||
String db_name_in_backup = database_name_;
|
||||
if (hasCreateQueryInBackup(db_name_in_backup))
|
||||
{
|
||||
create_db_query = renameInCreateQuery(readCreateQueryFromBackup(db_name_in_backup));
|
||||
}
|
||||
else
|
||||
{
|
||||
create_db_query = std::make_shared<ASTCreateQuery>();
|
||||
create_db_query->setDatabase(database_name_);
|
||||
db_name_in_backup.clear();
|
||||
}
|
||||
|
||||
create_db_query->if_not_exists = !restore_settings.throw_if_database_exists;
|
||||
|
||||
CreateDatabaseInfo info_db;
|
||||
info_db.create_query = create_db_query;
|
||||
info_db.name_in_backup = std::move(db_name_in_backup);
|
||||
info_db.is_explicit = true;
|
||||
databases[new_database_name] = std::move(info_db);
|
||||
}
|
||||
|
||||
/// Restore tables in this database.
|
||||
for (const String & table_metadata_filename : table_metadata_filenames)
|
||||
{
|
||||
String table_name = unescapeForFileName(fs::path{table_metadata_filename}.stem());
|
||||
if (except_list_.contains(table_name))
|
||||
continue;
|
||||
prepareToRestoreTable(DatabaseAndTableName{database_name_, table_name}, ASTs{});
|
||||
}
|
||||
}
|
||||
|
||||
/// Prepares to restore all the databases contained in the backup.
|
||||
void prepareToRestoreAllDatabases(const std::set<String> & except_list_)
|
||||
{
|
||||
Strings database_metadata_filenames = backup->listFiles("metadata/", "/");
|
||||
for (const String & database_metadata_filename : database_metadata_filenames)
|
||||
{
|
||||
String database_name = unescapeForFileName(fs::path{database_metadata_filename}.stem());
|
||||
if (except_list_.contains(database_name))
|
||||
continue;
|
||||
prepareToRestoreDatabase(database_name, std::set<String>{});
|
||||
}
|
||||
}
|
||||
|
||||
/// Reads a create query for creating a specified table from the backup.
|
||||
std::shared_ptr<ASTCreateQuery> readCreateQueryFromBackup(const DatabaseAndTableName & table_name) const
|
||||
{
|
||||
String create_query_path = getMetadataPathInBackup(table_name);
|
||||
if (!backup->fileExists(create_query_path))
|
||||
throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Cannot restore the {} because there is no such table in the backup",
|
||||
formatTableNameOrTemporaryTableName(table_name));
|
||||
auto read_buffer = backup->readFile(create_query_path)->getReadBuffer();
|
||||
String create_query_str;
|
||||
readStringUntilEOF(create_query_str, *read_buffer);
|
||||
read_buffer.reset();
|
||||
ParserCreateQuery create_parser;
|
||||
return typeid_cast<std::shared_ptr<ASTCreateQuery>>(parseQuery(create_parser, create_query_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH));
|
||||
}
|
||||
|
||||
/// Reads a create query for creating a specified database from the backup.
|
||||
std::shared_ptr<ASTCreateQuery> readCreateQueryFromBackup(const String & database_name) const
|
||||
{
|
||||
String create_query_path = getMetadataPathInBackup(database_name);
|
||||
if (!backup->fileExists(create_query_path))
|
||||
throw Exception(ErrorCodes::CANNOT_RESTORE_DATABASE, "Cannot restore the database {} because there is no such database in the backup", backQuoteIfNeed(database_name));
|
||||
auto read_buffer = backup->readFile(create_query_path)->getReadBuffer();
|
||||
String create_query_str;
|
||||
readStringUntilEOF(create_query_str, *read_buffer);
|
||||
read_buffer.reset();
|
||||
ParserCreateQuery create_parser;
|
||||
return typeid_cast<std::shared_ptr<ASTCreateQuery>>(parseQuery(create_parser, create_query_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH));
|
||||
}
|
||||
|
||||
/// Whether there is a create query for creating a specified database in the backup.
|
||||
bool hasCreateQueryInBackup(const String & database_name) const
|
||||
{
|
||||
String create_query_path = getMetadataPathInBackup(database_name);
|
||||
return backup->fileExists(create_query_path);
|
||||
}
|
||||
|
||||
/// Do renaming in the create query according to the renaming config.
|
||||
std::shared_ptr<ASTCreateQuery> renameInCreateQuery(const ASTPtr & ast) const
|
||||
{
|
||||
ASTPtr query = ast;
|
||||
::DB::renameInCreateQuery(query, context, renaming_settings);
|
||||
auto create_query = typeid_cast<std::shared_ptr<ASTCreateQuery>>(query);
|
||||
return create_query;
|
||||
}
|
||||
|
||||
static bool isSystemOrTemporaryDatabase(const String & database_name)
|
||||
{
|
||||
return (database_name == DatabaseCatalog::SYSTEM_DATABASE) || (database_name == DatabaseCatalog::TEMPORARY_DATABASE);
|
||||
}
|
||||
|
||||
/// Information which is used to make an instance of RestoreTableTask.
|
||||
struct CreateTableInfo
|
||||
{
|
||||
ASTPtr create_query;
|
||||
DatabaseAndTableName name_in_backup;
|
||||
ASTs partitions;
|
||||
};
|
||||
|
||||
/// Information which is used to make an instance of RestoreDatabaseTask.
|
||||
struct CreateDatabaseInfo
|
||||
{
|
||||
ASTPtr create_query;
|
||||
String name_in_backup;
|
||||
|
||||
/// Whether the creation of this database is specified explicitly, via RESTORE DATABASE or
|
||||
/// RESTORE ALL DATABASES.
|
||||
/// It's false if the creation of this database is caused by creating a table contained in it.
|
||||
bool is_explicit = false;
|
||||
|
||||
/// If this is set it means the following error:
|
||||
/// it means that for implicitly created database there were two different create query
|
||||
/// generated so we cannot restore the database.
|
||||
ASTPtr different_create_query;
|
||||
};
|
||||
|
||||
ContextMutablePtr context;
|
||||
BackupPtr backup;
|
||||
RestoreSettings restore_settings;
|
||||
DDLRenamingSettings renaming_settings;
|
||||
std::map<String, CreateDatabaseInfo> databases;
|
||||
std::map<DatabaseAndTableName, CreateTableInfo> tables;
|
||||
};
|
||||
|
||||
|
||||
/// Reverts completed restore tasks (in reversed order).
|
||||
void rollbackRestoreTasks(RestoreTasks && restore_tasks)
|
||||
{
|
||||
for (auto & restore_task : restore_tasks | boost::adaptors::reversed)
|
||||
{
|
||||
try
|
||||
{
|
||||
std::move(restore_task)->rollback();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException("Restore", "Couldn't rollback changes after failed RESTORE");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
RestoreTasks makeRestoreTasks(ContextMutablePtr context, const BackupPtr & backup, const Elements & elements, const RestoreSettings & restore_settings)
|
||||
{
|
||||
RestoreTasksBuilder builder{context, backup, restore_settings};
|
||||
builder.prepare(elements);
|
||||
return builder.makeTasks();
|
||||
}
|
||||
|
||||
|
||||
void executeRestoreTasks(RestoreTasks && restore_tasks, size_t num_threads)
|
||||
{
|
||||
if (!num_threads)
|
||||
num_threads = 1;
|
||||
|
||||
RestoreTasks completed_tasks;
|
||||
bool need_rollback_completed_tasks = true;
|
||||
|
||||
SCOPE_EXIT({
|
||||
if (need_rollback_completed_tasks)
|
||||
rollbackRestoreTasks(std::move(completed_tasks));
|
||||
});
|
||||
|
||||
std::deque<std::unique_ptr<IRestoreTask>> sequential_tasks;
|
||||
std::deque<std::unique_ptr<IRestoreTask>> enqueued_tasks;
|
||||
|
||||
/// There are two kinds of restore tasks: sequential and non-sequential ones.
|
||||
/// Sequential tasks are executed first and always in one thread.
|
||||
for (auto & task : restore_tasks)
|
||||
{
|
||||
if (task->isSequential())
|
||||
sequential_tasks.push_back(std::move(task));
|
||||
else
|
||||
enqueued_tasks.push_back(std::move(task));
|
||||
}
|
||||
|
||||
/// Sequential tasks.
|
||||
while (!sequential_tasks.empty())
|
||||
{
|
||||
auto current_task = std::move(sequential_tasks.front());
|
||||
sequential_tasks.pop_front();
|
||||
|
||||
RestoreTasks new_tasks = current_task->run();
|
||||
|
||||
completed_tasks.push_back(std::move(current_task));
|
||||
for (auto & task : new_tasks)
|
||||
{
|
||||
if (task->isSequential())
|
||||
sequential_tasks.push_back(std::move(task));
|
||||
else
|
||||
enqueued_tasks.push_back(std::move(task));
|
||||
}
|
||||
}
|
||||
|
||||
/// Non-sequential tasks.
|
||||
std::unordered_map<IRestoreTask *, std::unique_ptr<IRestoreTask>> running_tasks;
|
||||
std::vector<ThreadFromGlobalPool> threads;
|
||||
std::mutex mutex;
|
||||
std::condition_variable cond;
|
||||
std::exception_ptr exception;
|
||||
|
||||
while (true)
|
||||
{
|
||||
IRestoreTask * current_task = nullptr;
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
cond.wait(lock, [&]
|
||||
{
|
||||
if (exception)
|
||||
return true;
|
||||
if (enqueued_tasks.empty())
|
||||
return running_tasks.empty();
|
||||
return (running_tasks.size() < num_threads);
|
||||
});
|
||||
|
||||
if (exception || enqueued_tasks.empty())
|
||||
break;
|
||||
|
||||
auto current_task_ptr = std::move(enqueued_tasks.front());
|
||||
current_task = current_task_ptr.get();
|
||||
enqueued_tasks.pop_front();
|
||||
running_tasks[current_task] = std::move(current_task_ptr);
|
||||
}
|
||||
|
||||
assert(current_task);
|
||||
threads.emplace_back([current_task, &mutex, &cond, &enqueued_tasks, &running_tasks, &completed_tasks, &exception]() mutable
|
||||
{
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
if (exception)
|
||||
return;
|
||||
}
|
||||
|
||||
RestoreTasks new_tasks;
|
||||
std::exception_ptr new_exception;
|
||||
try
|
||||
{
|
||||
new_tasks = current_task->run();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
new_exception = std::current_exception();
|
||||
}
|
||||
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
auto current_task_it = running_tasks.find(current_task);
|
||||
auto current_task_ptr = std::move(current_task_it->second);
|
||||
running_tasks.erase(current_task_it);
|
||||
|
||||
if (!new_exception)
|
||||
{
|
||||
completed_tasks.push_back(std::move(current_task_ptr));
|
||||
enqueued_tasks.insert(
|
||||
enqueued_tasks.end(), std::make_move_iterator(new_tasks.begin()), std::make_move_iterator(new_tasks.end()));
|
||||
}
|
||||
|
||||
if (!exception)
|
||||
exception = new_exception;
|
||||
|
||||
cond.notify_all();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
for (auto & thread : threads)
|
||||
thread.join();
|
||||
|
||||
if (exception)
|
||||
std::rethrow_exception(exception);
|
||||
else
|
||||
need_rollback_completed_tasks = false;
|
||||
}
|
||||
|
||||
}
|
24
src/Backups/RestoreUtils.h
Normal file
24
src/Backups/RestoreUtils.h
Normal file
@ -0,0 +1,24 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/ASTBackupQuery.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class IBackup;
|
||||
using BackupPtr = std::shared_ptr<const IBackup>;
|
||||
class IRestoreTask;
|
||||
using RestoreTaskPtr = std::unique_ptr<IRestoreTask>;
|
||||
using RestoreTasks = std::vector<RestoreTaskPtr>;
|
||||
struct RestoreSettings;
|
||||
class Context;
|
||||
using ContextMutablePtr = std::shared_ptr<Context>;
|
||||
|
||||
/// Prepares restore tasks.
|
||||
RestoreTasks makeRestoreTasks(ContextMutablePtr context, const BackupPtr & backup, const ASTBackupQuery::Elements & elements, const RestoreSettings & restore_settings);
|
||||
|
||||
/// Executes restore tasks.
|
||||
void executeRestoreTasks(RestoreTasks && tasks, size_t num_threads);
|
||||
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user