Merge branch 'master' into clickhouse-keeper

This commit is contained in:
alesapin 2022-03-25 11:22:57 +01:00
commit c95a9971bc
959 changed files with 99268 additions and 46814 deletions

View File

@ -7,6 +7,7 @@ env:
"on": "on":
schedule: schedule:
- cron: '13 3 * * *' - cron: '13 3 * * *'
workflow_dispatch:
jobs: jobs:
DockerHubPushAarch64: DockerHubPushAarch64:

View File

@ -1733,6 +1733,51 @@ jobs:
docker kill "$(docker ps -q)" ||: docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||: docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH" sudo rm -fr "$TEMP_PATH"
TestsBugfixCheck:
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/tests_bugfix_check
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Tests bugfix validate check (actions)
KILL_TIMEOUT=3600
REPO_COPY=${{runner.temp}}/tests_bugfix_check/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Bugfix test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
TEMP_PATH="${TEMP_PATH}/integration" \
REPORTS_PATH="${REPORTS_PATH}/integration" \
python3 integration_test_check.py "Integration tests bugfix validate check" \
--validate-bugfix --post-commit-status=file || echo 'ignore exit code'
TEMP_PATH="${TEMP_PATH}/stateless" \
REPORTS_PATH="${REPORTS_PATH}/stateless" \
python3 functional_test_check.py "Stateless tests bugfix validate check" "$KILL_TIMEOUT" \
--validate-bugfix --post-commit-status=file || echo 'ignore exit code'
python3 bugfix_validate_check.py "${TEMP_PATH}/stateless/post_commit_status.tsv" "${TEMP_PATH}/integration/post_commit_status.tsv"
- name: Cleanup
if: always()
run: |
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
############################################################################################## ##############################################################################################
############################ FUNCTIONAl STATEFUL TESTS ####################################### ############################ FUNCTIONAl STATEFUL TESTS #######################################
############################################################################################## ##############################################################################################

View File

@ -11,6 +11,7 @@
* Make `arrayCompact` function behave as other higher-order functions: perform compaction not of lambda function results but on the original array. If you're using nontrivial lambda functions in arrayCompact you may restore old behaviour by wrapping `arrayCompact` arguments into `arrayMap`. Closes [#34010](https://github.com/ClickHouse/ClickHouse/issues/34010) [#18535](https://github.com/ClickHouse/ClickHouse/issues/18535) [#14778](https://github.com/ClickHouse/ClickHouse/issues/14778). [#34795](https://github.com/ClickHouse/ClickHouse/pull/34795) ([Alexandre Snarskii](https://github.com/snar)). * Make `arrayCompact` function behave as other higher-order functions: perform compaction not of lambda function results but on the original array. If you're using nontrivial lambda functions in arrayCompact you may restore old behaviour by wrapping `arrayCompact` arguments into `arrayMap`. Closes [#34010](https://github.com/ClickHouse/ClickHouse/issues/34010) [#18535](https://github.com/ClickHouse/ClickHouse/issues/18535) [#14778](https://github.com/ClickHouse/ClickHouse/issues/14778). [#34795](https://github.com/ClickHouse/ClickHouse/pull/34795) ([Alexandre Snarskii](https://github.com/snar)).
* Change implementation specific behavior on overflow of function `toDatetime`. It will be saturated to the nearest min/max supported instant of datetime instead of wraparound. This change is highlighted as "backward incompatible" because someone may unintentionally rely on the old behavior. [#32898](https://github.com/ClickHouse/ClickHouse/pull/32898) ([HaiBo Li](https://github.com/marising)). * Change implementation specific behavior on overflow of function `toDatetime`. It will be saturated to the nearest min/max supported instant of datetime instead of wraparound. This change is highlighted as "backward incompatible" because someone may unintentionally rely on the old behavior. [#32898](https://github.com/ClickHouse/ClickHouse/pull/32898) ([HaiBo Li](https://github.com/marising)).
* Make function `cast(value, 'IPv4')`, `cast(value, 'IPv6')` behave same as `toIPv4`, `toIPv6` functions. Changed behavior of incorrect IP address passed into functions `toIPv4`,` toIPv6`, now if invalid IP address passes into this functions exception will be raised, before this function return default value. Added functions `IPv4StringToNumOrDefault`, `IPv4StringToNumOrNull`, `IPv6StringToNumOrDefault`, `IPv6StringOrNull` `toIPv4OrDefault`, `toIPv4OrNull`, `toIPv6OrDefault`, `toIPv6OrNull`. Functions `IPv4StringToNumOrDefault `, `toIPv4OrDefault `, `toIPv6OrDefault ` should be used if previous logic relied on `IPv4StringToNum`, `toIPv4`, `toIPv6` returning default value for invalid address. Added setting `cast_ipv4_ipv6_default_on_conversion_error`, if this setting enabled, then IP address conversion functions will behave as before. Closes [#22825](https://github.com/ClickHouse/ClickHouse/issues/22825). Closes [#5799](https://github.com/ClickHouse/ClickHouse/issues/5799). Closes [#35156](https://github.com/ClickHouse/ClickHouse/issues/35156). [#35240](https://github.com/ClickHouse/ClickHouse/pull/35240) ([Maksim Kita](https://github.com/kitaisreal)).
#### New Feature #### New Feature

View File

@ -266,7 +266,7 @@ if (OBJCOPY_PATH AND YANDEX_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE))
endif () endif ()
# Allows to build stripped binary in a separate directory # Allows to build stripped binary in a separate directory
if (OBJCOPY_PATH AND READELF_PATH) if (OBJCOPY_PATH AND STRIP_PATH)
option(INSTALL_STRIPPED_BINARIES "Build stripped binaries with debug info in separate directory" OFF) option(INSTALL_STRIPPED_BINARIES "Build stripped binaries with debug info in separate directory" OFF)
if (INSTALL_STRIPPED_BINARIES) if (INSTALL_STRIPPED_BINARIES)
set(STRIPPED_BINARIES_OUTPUT "stripped" CACHE STRING "A separate directory for stripped information") set(STRIPPED_BINARIES_OUTPUT "stripped" CACHE STRING "A separate directory for stripped information")

View File

@ -4,11 +4,12 @@
import sys import sys
import json import json
def parse_block(block=[], options=[]): def parse_block(block=[], options=[]):
#print('block is here', block) # print('block is here', block)
#show_query = False # show_query = False
#show_query = options.show_query # show_query = options.show_query
result = [] result = []
query = block[0].strip() query = block[0].strip()
if len(block) > 4: if len(block) > 4:
@ -20,9 +21,9 @@ def parse_block(block=[], options=[]):
timing2 = block[2].strip().split()[1] timing2 = block[2].strip().split()[1]
timing3 = block[3].strip().split()[1] timing3 = block[3].strip().split()[1]
if options.show_queries: if options.show_queries:
result.append( query ) result.append(query)
if not options.show_first_timings: if not options.show_first_timings:
result += [ timing1 , timing2, timing3 ] result += [timing1, timing2, timing3]
else: else:
result.append(timing1) result.append(timing1)
return result return result
@ -37,12 +38,12 @@ def read_stats_file(options, fname):
for line in f.readlines(): for line in f.readlines():
if 'SELECT' in line: if "SELECT" in line:
if len(block) > 1: if len(block) > 1:
result.append( parse_block(block, options) ) result.append(parse_block(block, options))
block = [ line ] block = [line]
elif 'Time:' in line: elif "Time:" in line:
block.append( line ) block.append(line)
return result return result
@ -50,7 +51,7 @@ def read_stats_file(options, fname):
def compare_stats_files(options, arguments): def compare_stats_files(options, arguments):
result = [] result = []
file_output = [] file_output = []
pyplot_colors = ['y', 'b', 'g', 'r'] pyplot_colors = ["y", "b", "g", "r"]
for fname in arguments[1:]: for fname in arguments[1:]:
file_output.append((read_stats_file(options, fname))) file_output.append((read_stats_file(options, fname)))
if len(file_output[0]) > 0: if len(file_output[0]) > 0:
@ -58,65 +59,92 @@ def compare_stats_files(options, arguments):
for idx, data_set in enumerate(file_output): for idx, data_set in enumerate(file_output):
int_result = [] int_result = []
for timing in data_set: for timing in data_set:
int_result.append(float(timing[0])) #y values int_result.append(float(timing[0])) # y values
result.append([[x for x in range(0, len(int_result)) ], int_result, result.append(
pyplot_colors[idx] + '^' ] ) [
# result.append([x for x in range(1, len(int_result)) ]) #x values [x for x in range(0, len(int_result))],
# result.append( pyplot_colors[idx] + '^' ) int_result,
pyplot_colors[idx] + "^",
]
)
# result.append([x for x in range(1, len(int_result)) ]) #x values
# result.append( pyplot_colors[idx] + '^' )
return result return result
def parse_args(): def parse_args():
from optparse import OptionParser from optparse import OptionParser
parser = OptionParser(usage='usage: %prog [options] [result_file_path]..')
parser.add_option("-q", "--show-queries", help="Show statements along with timings", action="store_true", dest="show_queries") parser = OptionParser(usage="usage: %prog [options] [result_file_path]..")
parser.add_option("-f", "--show-first-timings", help="Show only first tries timings", action="store_true", dest="show_first_timings") parser.add_option(
parser.add_option("-c", "--compare-mode", help="Prepare output for pyplot comparing result files.", action="store", dest="compare_mode") "-q",
"--show-queries",
help="Show statements along with timings",
action="store_true",
dest="show_queries",
)
parser.add_option(
"-f",
"--show-first-timings",
help="Show only first tries timings",
action="store_true",
dest="show_first_timings",
)
parser.add_option(
"-c",
"--compare-mode",
help="Prepare output for pyplot comparing result files.",
action="store",
dest="compare_mode",
)
(options, arguments) = parser.parse_args(sys.argv) (options, arguments) = parser.parse_args(sys.argv)
if len(arguments) < 2: if len(arguments) < 2:
parser.print_usage() parser.print_usage()
sys.exit(1) sys.exit(1)
return ( options, arguments ) return (options, arguments)
def gen_pyplot_code(options, arguments): def gen_pyplot_code(options, arguments):
result = '' result = ""
data_sets = compare_stats_files(options, arguments) data_sets = compare_stats_files(options, arguments)
for idx, data_set in enumerate(data_sets, start=0): for idx, data_set in enumerate(data_sets, start=0):
x_values, y_values, line_style = data_set x_values, y_values, line_style = data_set
result += '\nplt.plot(' result += "\nplt.plot("
result += '%s, %s, \'%s\'' % ( x_values, y_values, line_style ) result += "%s, %s, '%s'" % (x_values, y_values, line_style)
result += ', label=\'%s try\')' % idx result += ", label='%s try')" % idx
print('import matplotlib.pyplot as plt') print("import matplotlib.pyplot as plt")
print(result) print(result)
print( 'plt.xlabel(\'Try number\')' ) print("plt.xlabel('Try number')")
print( 'plt.ylabel(\'Timing\')' ) print("plt.ylabel('Timing')")
print( 'plt.title(\'Benchmark query timings\')' ) print("plt.title('Benchmark query timings')")
print('plt.legend()') print("plt.legend()")
print('plt.show()') print("plt.show()")
def gen_html_json(options, arguments): def gen_html_json(options, arguments):
tuples = read_stats_file(options, arguments[1]) tuples = read_stats_file(options, arguments[1])
print('{') print("{")
print('"system: GreenPlum(x2),') print('"system: GreenPlum(x2),')
print(('"version": "%s",' % '4.3.9.1')) print(('"version": "%s",' % "4.3.9.1"))
print('"data_size": 10000000,') print('"data_size": 10000000,')
print('"time": "",') print('"time": "",')
print('"comments": "",') print('"comments": "",')
print('"result":') print('"result":')
print('[') print("[")
for s in tuples: for s in tuples:
print(s) print(s)
print(']') print("]")
print('}') print("}")
def main(): def main():
( options, arguments ) = parse_args() (options, arguments) = parse_args()
if len(arguments) > 2: if len(arguments) > 2:
gen_pyplot_code(options, arguments) gen_pyplot_code(options, arguments)
else: else:
gen_html_json(options, arguments) gen_html_json(options, arguments)
if __name__ == '__main__':
if __name__ == "__main__":
main() main()

View File

@ -1,28 +0,0 @@
#!/usr/bin/env bash
BINARY_PATH=$1
BINARY_NAME=$(basename "$BINARY_PATH")
DESTINATION_STRIPPED_DIR=$2
OBJCOPY_PATH=${3:objcopy}
READELF_PATH=${4:readelf}
BUILD_ID=$($READELF_PATH -n "$1" | sed -n '/Build ID/ { s/.*: //p; q; }')
BUILD_ID_PREFIX=${BUILD_ID:0:2}
BUILD_ID_SUFFIX=${BUILD_ID:2}
DESTINATION_DEBUG_INFO_DIR="$DESTINATION_STRIPPED_DIR/lib/debug/.build-id"
DESTINATION_STRIP_BINARY_DIR="$DESTINATION_STRIPPED_DIR/bin"
mkdir -p "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX"
mkdir -p "$DESTINATION_STRIP_BINARY_DIR"
cp "$BINARY_PATH" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"
$OBJCOPY_PATH --only-keep-debug --compress-debug-sections "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME" "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
chmod 0644 "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
chown 0:0 "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
strip --remove-section=.comment --remove-section=.note "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"
$OBJCOPY_PATH --add-gnu-debuglink "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"

View File

@ -11,16 +11,43 @@ macro(clickhouse_strip_binary)
message(FATAL_ERROR "A binary path name must be provided for stripping binary") message(FATAL_ERROR "A binary path name must be provided for stripping binary")
endif() endif()
if (NOT DEFINED STRIP_DESTINATION_DIR) if (NOT DEFINED STRIP_DESTINATION_DIR)
message(FATAL_ERROR "Destination directory for stripped binary must be provided") message(FATAL_ERROR "Destination directory for stripped binary must be provided")
endif() endif()
add_custom_command(TARGET ${STRIP_TARGET} POST_BUILD add_custom_command(TARGET ${STRIP_TARGET} POST_BUILD
COMMAND bash ${ClickHouse_SOURCE_DIR}/cmake/strip.sh ${STRIP_BINARY_PATH} ${STRIP_DESTINATION_DIR} ${OBJCOPY_PATH} ${READELF_PATH} COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/lib/debug/bin"
COMMENT "Stripping clickhouse binary" VERBATIM COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/bin"
COMMAND cp "${STRIP_BINARY_PATH}" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
COMMAND "${OBJCOPY_PATH}" --only-keep-debug --compress-debug-sections "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
COMMAND "${OBJCOPY_PATH}" --add-gnu-debuglink "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
COMMENT "Stripping clickhouse binary" VERBATIM
) )
install(PROGRAMS ${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET} DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) install(PROGRAMS ${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET} DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
install(DIRECTORY ${STRIP_DESTINATION_DIR}/lib/debug DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse) install(FILES ${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_FULL_BINDIR}/${STRIP_TARGET}.debug COMPONENT clickhouse)
endmacro()
macro(clickhouse_make_empty_debug_info_for_nfpm)
set(oneValueArgs TARGET DESTINATION_DIR)
cmake_parse_arguments(EMPTY_DEBUG "" "${oneValueArgs}" "" ${ARGN})
if (NOT DEFINED EMPTY_DEBUG_TARGET)
message(FATAL_ERROR "A target name must be provided for stripping binary")
endif()
if (NOT DEFINED EMPTY_DEBUG_DESTINATION_DIR)
message(FATAL_ERROR "Destination directory for empty debug must be provided")
endif()
add_custom_command(TARGET ${EMPTY_DEBUG_TARGET} POST_BUILD
COMMAND mkdir -p "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug"
COMMAND touch "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug/${EMPTY_DEBUG_TARGET}.debug"
COMMENT "Addiding empty debug info for NFPM" VERBATIM
)
install(FILES "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug/${EMPTY_DEBUG_TARGET}.debug" DESTINATION "${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_FULL_BINDIR}" COMPONENT clickhouse)
endmacro() endmacro()

View File

@ -170,32 +170,32 @@ else ()
message (FATAL_ERROR "Cannot find objcopy.") message (FATAL_ERROR "Cannot find objcopy.")
endif () endif ()
# Readelf (FIXME copypaste) # Strip (FIXME copypaste)
if (COMPILER_GCC) if (COMPILER_GCC)
find_program (READELF_PATH NAMES "llvm-readelf" "llvm-readelf-13" "llvm-readelf-12" "llvm-readelf-11" "readelf") find_program (STRIP_PATH NAMES "llvm-strip" "llvm-strip-13" "llvm-strip-12" "llvm-strip-11" "strip")
else () else ()
find_program (READELF_PATH NAMES "llvm-readelf-${COMPILER_VERSION_MAJOR}" "llvm-readelf" "readelf") find_program (STRIP_PATH NAMES "llvm-strip-${COMPILER_VERSION_MAJOR}" "llvm-strip" "strip")
endif () endif ()
if (NOT READELF_PATH AND OS_DARWIN) if (NOT STRIP_PATH AND OS_DARWIN)
find_program (BREW_PATH NAMES "brew") find_program (BREW_PATH NAMES "brew")
if (BREW_PATH) if (BREW_PATH)
execute_process (COMMAND ${BREW_PATH} --prefix llvm ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE LLVM_PREFIX) execute_process (COMMAND ${BREW_PATH} --prefix llvm ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE LLVM_PREFIX)
if (LLVM_PREFIX) if (LLVM_PREFIX)
find_program (READELF_PATH NAMES "llvm-readelf" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH) find_program (STRIP_PATH NAMES "llvm-strip" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH)
endif () endif ()
if (NOT READELF_PATH) if (NOT STRIP_PATH)
execute_process (COMMAND ${BREW_PATH} --prefix binutils ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE BINUTILS_PREFIX) execute_process (COMMAND ${BREW_PATH} --prefix binutils ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE BINUTILS_PREFIX)
if (BINUTILS_PREFIX) if (BINUTILS_PREFIX)
find_program (READELF_PATH NAMES "readelf" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH) find_program (STRIP_PATH NAMES "strip" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH)
endif () endif ()
endif () endif ()
endif () endif ()
endif () endif ()
if (READELF_PATH) if (STRIP_PATH)
message (STATUS "Using readelf: ${READELF_PATH}") message (STATUS "Using strip: ${STRIP_PATH}")
else () else ()
message (FATAL_ERROR "Cannot find readelf.") message (FATAL_ERROR "Cannot find strip.")
endif () endif ()

2
contrib/libxml2 vendored

@ -1 +1 @@
Subproject commit 18890f471c420411aa3c989e104d090966ec9dbf Subproject commit a075d256fd9ff15590b86d981b75a50ead124fca

View File

@ -1,4 +1,3 @@
# rebuild in #33610
# docker build -t clickhouse/docs-check . # docker build -t clickhouse/docs-check .
ARG FROM_TAG=latest ARG FROM_TAG=latest
FROM clickhouse/docs-builder:$FROM_TAG FROM clickhouse/docs-builder:$FROM_TAG

View File

@ -11,7 +11,7 @@ def removesuffix(text, suffix):
https://www.python.org/dev/peps/pep-0616/ https://www.python.org/dev/peps/pep-0616/
""" """
if suffix and text.endswith(suffix): if suffix and text.endswith(suffix):
return text[:-len(suffix)] return text[: -len(suffix)]
else: else:
return text[:] return text[:]

View File

@ -3,55 +3,55 @@ import subprocess
import datetime import datetime
from flask import Flask, flash, request, redirect, url_for from flask import Flask, flash, request, redirect, url_for
def run_command(command, wait=False): def run_command(command, wait=False):
print("{} - execute shell command:{}".format(datetime.datetime.now(), command)) print("{} - execute shell command:{}".format(datetime.datetime.now(), command))
lines = [] lines = []
p = subprocess.Popen(command, p = subprocess.Popen(
stdout=subprocess.PIPE, command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True
stderr=subprocess.STDOUT, )
shell=True)
if wait: if wait:
for l in iter(p.stdout.readline, b''): for l in iter(p.stdout.readline, b""):
lines.append(l) lines.append(l)
p.poll() p.poll()
return (lines, p.returncode) return (lines, p.returncode)
else: else:
return(iter(p.stdout.readline, b''), 0) return (iter(p.stdout.readline, b""), 0)
UPLOAD_FOLDER = './' UPLOAD_FOLDER = "./"
ALLOWED_EXTENSIONS = {'txt', 'sh'} ALLOWED_EXTENSIONS = {"txt", "sh"}
app = Flask(__name__) app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER app.config["UPLOAD_FOLDER"] = UPLOAD_FOLDER
@app.route('/')
@app.route("/")
def hello_world(): def hello_world():
return 'Hello World' return "Hello World"
def allowed_file(filename): def allowed_file(filename):
return '.' in filename and \ return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
@app.route('/upload', methods=['GET', 'POST']) @app.route("/upload", methods=["GET", "POST"])
def upload_file(): def upload_file():
if request.method == 'POST': if request.method == "POST":
# check if the post request has the file part # check if the post request has the file part
if 'file' not in request.files: if "file" not in request.files:
flash('No file part') flash("No file part")
return redirect(request.url) return redirect(request.url)
file = request.files['file'] file = request.files["file"]
# If the user does not select a file, the browser submits an # If the user does not select a file, the browser submits an
# empty file without a filename. # empty file without a filename.
if file.filename == '': if file.filename == "":
flash('No selected file') flash("No selected file")
return redirect(request.url) return redirect(request.url)
if file and allowed_file(file.filename): if file and allowed_file(file.filename):
filename = file.filename filename = file.filename
file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) file.save(os.path.join(app.config["UPLOAD_FOLDER"], filename))
return redirect(url_for('upload_file', name=filename)) return redirect(url_for("upload_file", name=filename))
return ''' return """
<!doctype html> <!doctype html>
<title>Upload new File</title> <title>Upload new File</title>
<h1>Upload new File</h1> <h1>Upload new File</h1>
@ -59,12 +59,15 @@ def upload_file():
<input type=file name=file> <input type=file name=file>
<input type=submit value=Upload> <input type=submit value=Upload>
</form> </form>
''' """
@app.route('/run', methods=['GET', 'POST'])
@app.route("/run", methods=["GET", "POST"])
def parse_request(): def parse_request():
data = request.data # data is empty data = request.data # data is empty
run_command(data, wait=True) run_command(data, wait=True)
return 'Ok' return "Ok"
if __name__ == '__main__':
app.run(port=5011) if __name__ == "__main__":
app.run(port=5011)

View File

@ -19,58 +19,126 @@ import xml.etree.ElementTree as et
from threading import Thread from threading import Thread
from scipy import stats from scipy import stats
logging.basicConfig(format='%(asctime)s: %(levelname)s: %(module)s: %(message)s', level='WARNING') logging.basicConfig(
format="%(asctime)s: %(levelname)s: %(module)s: %(message)s", level="WARNING"
)
total_start_seconds = time.perf_counter() total_start_seconds = time.perf_counter()
stage_start_seconds = total_start_seconds stage_start_seconds = total_start_seconds
def reportStageEnd(stage): def reportStageEnd(stage):
global stage_start_seconds, total_start_seconds global stage_start_seconds, total_start_seconds
current = time.perf_counter() current = time.perf_counter()
print(f'stage\t{stage}\t{current - stage_start_seconds:.3f}\t{current - total_start_seconds:.3f}') print(
f"stage\t{stage}\t{current - stage_start_seconds:.3f}\t{current - total_start_seconds:.3f}"
)
stage_start_seconds = current stage_start_seconds = current
def tsv_escape(s): def tsv_escape(s):
return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','') return (
s.replace("\\", "\\\\")
.replace("\t", "\\t")
.replace("\n", "\\n")
.replace("\r", "")
)
parser = argparse.ArgumentParser(description='Run performance test.') parser = argparse.ArgumentParser(description="Run performance test.")
# Explicitly decode files as UTF-8 because sometimes we have Russian characters in queries, and LANG=C is set. # Explicitly decode files as UTF-8 because sometimes we have Russian characters in queries, and LANG=C is set.
parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs=1, help='test description file') parser.add_argument(
parser.add_argument('--host', nargs='*', default=['localhost'], help="Space-separated list of server hostname(s). Corresponds to '--port' options.") "file",
parser.add_argument('--port', nargs='*', default=[9000], help="Space-separated list of server port(s). Corresponds to '--host' options.") metavar="FILE",
parser.add_argument('--runs', type=int, default=1, help='Number of query runs per server.') type=argparse.FileType("r", encoding="utf-8"),
parser.add_argument('--max-queries', type=int, default=None, help='Test no more than this number of queries, chosen at random.') nargs=1,
parser.add_argument('--queries-to-run', nargs='*', type=int, default=None, help='Space-separated list of indexes of queries to test.') help="test description file",
parser.add_argument('--max-query-seconds', type=int, default=15, help='For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.') )
parser.add_argument('--prewarm-max-query-seconds', type=int, default=180, help='For how many seconds at most a prewarm (cold storage) query is allowed to run. The script finishes with error if this time is exceeded.') parser.add_argument(
parser.add_argument('--profile-seconds', type=int, default=0, help='For how many seconds to profile a query for which the performance has changed.') "--host",
parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.') nargs="*",
parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.') default=["localhost"],
parser.add_argument('--print-settings', action='store_true', help='Print test settings and exit.') help="Space-separated list of server hostname(s). Corresponds to '--port' options.",
parser.add_argument('--keep-created-tables', action='store_true', help="Don't drop the created tables after the test.") )
parser.add_argument('--use-existing-tables', action='store_true', help="Don't create or drop the tables, use the existing ones instead.") parser.add_argument(
"--port",
nargs="*",
default=[9000],
help="Space-separated list of server port(s). Corresponds to '--host' options.",
)
parser.add_argument(
"--runs", type=int, default=1, help="Number of query runs per server."
)
parser.add_argument(
"--max-queries",
type=int,
default=None,
help="Test no more than this number of queries, chosen at random.",
)
parser.add_argument(
"--queries-to-run",
nargs="*",
type=int,
default=None,
help="Space-separated list of indexes of queries to test.",
)
parser.add_argument(
"--max-query-seconds",
type=int,
default=15,
help="For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.",
)
parser.add_argument(
"--prewarm-max-query-seconds",
type=int,
default=180,
help="For how many seconds at most a prewarm (cold storage) query is allowed to run. The script finishes with error if this time is exceeded.",
)
parser.add_argument(
"--profile-seconds",
type=int,
default=0,
help="For how many seconds to profile a query for which the performance has changed.",
)
parser.add_argument(
"--long", action="store_true", help="Do not skip the tests tagged as long."
)
parser.add_argument(
"--print-queries", action="store_true", help="Print test queries and exit."
)
parser.add_argument(
"--print-settings", action="store_true", help="Print test settings and exit."
)
parser.add_argument(
"--keep-created-tables",
action="store_true",
help="Don't drop the created tables after the test.",
)
parser.add_argument(
"--use-existing-tables",
action="store_true",
help="Don't create or drop the tables, use the existing ones instead.",
)
args = parser.parse_args() args = parser.parse_args()
reportStageEnd('start') reportStageEnd("start")
test_name = os.path.splitext(os.path.basename(args.file[0].name))[0] test_name = os.path.splitext(os.path.basename(args.file[0].name))[0]
tree = et.parse(args.file[0]) tree = et.parse(args.file[0])
root = tree.getroot() root = tree.getroot()
reportStageEnd('parse') reportStageEnd("parse")
# Process query parameters # Process query parameters
subst_elems = root.findall('substitutions/substitution') subst_elems = root.findall("substitutions/substitution")
available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... } available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... }
for e in subst_elems: for e in subst_elems:
name = e.find('name').text name = e.find("name").text
values = [v.text for v in e.findall('values/value')] values = [v.text for v in e.findall("values/value")]
if not values: if not values:
raise Exception(f'No values given for substitution {{{name}}}') raise Exception(f"No values given for substitution {{{name}}}")
available_parameters[name] = values available_parameters[name] = values
@ -78,7 +146,7 @@ for e in subst_elems:
# parameters. The set of parameters is determined based on the first list. # parameters. The set of parameters is determined based on the first list.
# Note: keep the order of queries -- sometimes we have DROP IF EXISTS # Note: keep the order of queries -- sometimes we have DROP IF EXISTS
# followed by CREATE in create queries section, so the order matters. # followed by CREATE in create queries section, so the order matters.
def substitute_parameters(query_templates, other_templates = []): def substitute_parameters(query_templates, other_templates=[]):
query_results = [] query_results = []
other_results = [[]] * (len(other_templates)) other_results = [[]] * (len(other_templates))
for i, q in enumerate(query_templates): for i, q in enumerate(query_templates):
@ -103,17 +171,21 @@ def substitute_parameters(query_templates, other_templates = []):
# and reporting the queries marked as short. # and reporting the queries marked as short.
test_queries = [] test_queries = []
is_short = [] is_short = []
for e in root.findall('query'): for e in root.findall("query"):
new_queries, [new_is_short] = substitute_parameters([e.text], [[e.attrib.get('short', '0')]]) new_queries, [new_is_short] = substitute_parameters(
[e.text], [[e.attrib.get("short", "0")]]
)
test_queries += new_queries test_queries += new_queries
is_short += [eval(s) for s in new_is_short] is_short += [eval(s) for s in new_is_short]
assert(len(test_queries) == len(is_short)) assert len(test_queries) == len(is_short)
# If we're given a list of queries to run, check that it makes sense. # If we're given a list of queries to run, check that it makes sense.
for i in args.queries_to_run or []: for i in args.queries_to_run or []:
if i < 0 or i >= len(test_queries): if i < 0 or i >= len(test_queries):
print(f'There is no query no. {i} in this test, only [{0}-{len(test_queries) - 1}] are present') print(
f"There is no query no. {i} in this test, only [{0}-{len(test_queries) - 1}] are present"
)
exit(1) exit(1)
# If we're only asked to print the queries, do that and exit. # If we're only asked to print the queries, do that and exit.
@ -125,60 +197,65 @@ if args.print_queries:
# Print short queries # Print short queries
for i, s in enumerate(is_short): for i, s in enumerate(is_short):
if s: if s:
print(f'short\t{i}') print(f"short\t{i}")
# If we're only asked to print the settings, do that and exit. These are settings # If we're only asked to print the settings, do that and exit. These are settings
# for clickhouse-benchmark, so we print them as command line arguments, e.g. # for clickhouse-benchmark, so we print them as command line arguments, e.g.
# '--max_memory_usage=10000000'. # '--max_memory_usage=10000000'.
if args.print_settings: if args.print_settings:
for s in root.findall('settings/*'): for s in root.findall("settings/*"):
print(f'--{s.tag}={s.text}') print(f"--{s.tag}={s.text}")
exit(0) exit(0)
# Skip long tests # Skip long tests
if not args.long: if not args.long:
for tag in root.findall('.//tag'): for tag in root.findall(".//tag"):
if tag.text == 'long': if tag.text == "long":
print('skipped\tTest is tagged as long.') print("skipped\tTest is tagged as long.")
sys.exit(0) sys.exit(0)
# Print report threshold for the test if it is set. # Print report threshold for the test if it is set.
ignored_relative_change = 0.05 ignored_relative_change = 0.05
if 'max_ignored_relative_change' in root.attrib: if "max_ignored_relative_change" in root.attrib:
ignored_relative_change = float(root.attrib["max_ignored_relative_change"]) ignored_relative_change = float(root.attrib["max_ignored_relative_change"])
print(f'report-threshold\t{ignored_relative_change}') print(f"report-threshold\t{ignored_relative_change}")
reportStageEnd('before-connect') reportStageEnd("before-connect")
# Open connections # Open connections
servers = [{'host': host or args.host[0], 'port': port or args.port[0]} for (host, port) in itertools.zip_longest(args.host, args.port)] servers = [
{"host": host or args.host[0], "port": port or args.port[0]}
for (host, port) in itertools.zip_longest(args.host, args.port)
]
# Force settings_is_important to fail queries on unknown settings. # Force settings_is_important to fail queries on unknown settings.
all_connections = [clickhouse_driver.Client(**server, settings_is_important=True) for server in servers] all_connections = [
clickhouse_driver.Client(**server, settings_is_important=True) for server in servers
]
for i, s in enumerate(servers): for i, s in enumerate(servers):
print(f'server\t{i}\t{s["host"]}\t{s["port"]}') print(f'server\t{i}\t{s["host"]}\t{s["port"]}')
reportStageEnd('connect') reportStageEnd("connect")
if not args.use_existing_tables: if not args.use_existing_tables:
# Run drop queries, ignoring errors. Do this before all other activity, # Run drop queries, ignoring errors. Do this before all other activity,
# because clickhouse_driver disconnects on error (this is not configurable), # because clickhouse_driver disconnects on error (this is not configurable),
# and the new connection loses the changes in settings. # and the new connection loses the changes in settings.
drop_query_templates = [q.text for q in root.findall('drop_query')] drop_query_templates = [q.text for q in root.findall("drop_query")]
drop_queries = substitute_parameters(drop_query_templates) drop_queries = substitute_parameters(drop_query_templates)
for conn_index, c in enumerate(all_connections): for conn_index, c in enumerate(all_connections):
for q in drop_queries: for q in drop_queries:
try: try:
c.execute(q) c.execute(q)
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}') print(f"drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}")
except: except:
pass pass
reportStageEnd('drop-1') reportStageEnd("drop-1")
# Apply settings. # Apply settings.
settings = root.findall('settings/*') settings = root.findall("settings/*")
for conn_index, c in enumerate(all_connections): for conn_index, c in enumerate(all_connections):
for s in settings: for s in settings:
# requires clickhouse-driver >= 1.1.5 to accept arbitrary new settings # requires clickhouse-driver >= 1.1.5 to accept arbitrary new settings
@ -189,48 +266,52 @@ for conn_index, c in enumerate(all_connections):
# the test, which is wrong. # the test, which is wrong.
c.execute("select 1") c.execute("select 1")
reportStageEnd('settings') reportStageEnd("settings")
# Check tables that should exist. If they don't exist, just skip this test. # Check tables that should exist. If they don't exist, just skip this test.
tables = [e.text for e in root.findall('preconditions/table_exists')] tables = [e.text for e in root.findall("preconditions/table_exists")]
for t in tables: for t in tables:
for c in all_connections: for c in all_connections:
try: try:
res = c.execute("select 1 from {} limit 1".format(t)) res = c.execute("select 1 from {} limit 1".format(t))
except: except:
exception_message = traceback.format_exception_only(*sys.exc_info()[:2])[-1] exception_message = traceback.format_exception_only(*sys.exc_info()[:2])[-1]
skipped_message = ' '.join(exception_message.split('\n')[:2]) skipped_message = " ".join(exception_message.split("\n")[:2])
print(f'skipped\t{tsv_escape(skipped_message)}') print(f"skipped\t{tsv_escape(skipped_message)}")
sys.exit(0) sys.exit(0)
reportStageEnd('preconditions') reportStageEnd("preconditions")
if not args.use_existing_tables: if not args.use_existing_tables:
# Run create and fill queries. We will run them simultaneously for both # Run create and fill queries. We will run them simultaneously for both
# servers, to save time. The weird XML search + filter is because we want to # servers, to save time. The weird XML search + filter is because we want to
# keep the relative order of elements, and etree doesn't support the # keep the relative order of elements, and etree doesn't support the
# appropriate xpath query. # appropriate xpath query.
create_query_templates = [q.text for q in root.findall('./*') create_query_templates = [
if q.tag in ('create_query', 'fill_query')] q.text for q in root.findall("./*") if q.tag in ("create_query", "fill_query")
]
create_queries = substitute_parameters(create_query_templates) create_queries = substitute_parameters(create_query_templates)
# Disallow temporary tables, because the clickhouse_driver reconnects on # Disallow temporary tables, because the clickhouse_driver reconnects on
# errors, and temporary tables are destroyed. We want to be able to continue # errors, and temporary tables are destroyed. We want to be able to continue
# after some errors. # after some errors.
for q in create_queries: for q in create_queries:
if re.search('create temporary table', q, flags=re.IGNORECASE): if re.search("create temporary table", q, flags=re.IGNORECASE):
print(f"Temporary tables are not allowed in performance tests: '{q}'", print(
file = sys.stderr) f"Temporary tables are not allowed in performance tests: '{q}'",
file=sys.stderr,
)
sys.exit(1) sys.exit(1)
def do_create(connection, index, queries): def do_create(connection, index, queries):
for q in queries: for q in queries:
connection.execute(q) connection.execute(q)
print(f'create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}') print(f"create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}")
threads = [ threads = [
Thread(target = do_create, args = (connection, index, create_queries)) Thread(target=do_create, args=(connection, index, create_queries))
for index, connection in enumerate(all_connections)] for index, connection in enumerate(all_connections)
]
for t in threads: for t in threads:
t.start() t.start()
@ -238,14 +319,16 @@ if not args.use_existing_tables:
for t in threads: for t in threads:
t.join() t.join()
reportStageEnd('create') reportStageEnd("create")
# By default, test all queries. # By default, test all queries.
queries_to_run = range(0, len(test_queries)) queries_to_run = range(0, len(test_queries))
if args.max_queries: if args.max_queries:
# If specified, test a limited number of queries chosen at random. # If specified, test a limited number of queries chosen at random.
queries_to_run = random.sample(range(0, len(test_queries)), min(len(test_queries), args.max_queries)) queries_to_run = random.sample(
range(0, len(test_queries)), min(len(test_queries), args.max_queries)
)
if args.queries_to_run: if args.queries_to_run:
# Run the specified queries. # Run the specified queries.
@ -255,16 +338,16 @@ if args.queries_to_run:
profile_total_seconds = 0 profile_total_seconds = 0
for query_index in queries_to_run: for query_index in queries_to_run:
q = test_queries[query_index] q = test_queries[query_index]
query_prefix = f'{test_name}.query{query_index}' query_prefix = f"{test_name}.query{query_index}"
# We have some crazy long queries (about 100kB), so trim them to a sane # We have some crazy long queries (about 100kB), so trim them to a sane
# length. This means we can't use query text as an identifier and have to # length. This means we can't use query text as an identifier and have to
# use the test name + the test-wide query index. # use the test name + the test-wide query index.
query_display_name = q query_display_name = q
if len(query_display_name) > 1000: if len(query_display_name) > 1000:
query_display_name = f'{query_display_name[:1000]}...({query_index})' query_display_name = f"{query_display_name[:1000]}...({query_index})"
print(f'display-name\t{query_index}\t{tsv_escape(query_display_name)}') print(f"display-name\t{query_index}\t{tsv_escape(query_display_name)}")
# Prewarm: run once on both servers. Helps to bring the data into memory, # Prewarm: run once on both servers. Helps to bring the data into memory,
# precompile the queries, etc. # precompile the queries, etc.
@ -272,10 +355,10 @@ for query_index in queries_to_run:
# new one. We want to run them on the new server only, so that the PR author # new one. We want to run them on the new server only, so that the PR author
# can ensure that the test works properly. Remember the errors we had on # can ensure that the test works properly. Remember the errors we had on
# each server. # each server.
query_error_on_connection = [None] * len(all_connections); query_error_on_connection = [None] * len(all_connections)
for conn_index, c in enumerate(all_connections): for conn_index, c in enumerate(all_connections):
try: try:
prewarm_id = f'{query_prefix}.prewarm0' prewarm_id = f"{query_prefix}.prewarm0"
try: try:
# During the warmup runs, we will also: # During the warmup runs, we will also:
@ -283,25 +366,30 @@ for query_index in queries_to_run:
# * collect profiler traces, which might be helpful for analyzing # * collect profiler traces, which might be helpful for analyzing
# test coverage. We disable profiler for normal runs because # test coverage. We disable profiler for normal runs because
# it makes the results unstable. # it makes the results unstable.
res = c.execute(q, query_id = prewarm_id, res = c.execute(
settings = { q,
'max_execution_time': args.prewarm_max_query_seconds, query_id=prewarm_id,
'query_profiler_real_time_period_ns': 10000000, settings={
'memory_profiler_step': '4Mi', "max_execution_time": args.prewarm_max_query_seconds,
}) "query_profiler_real_time_period_ns": 10000000,
"memory_profiler_step": "4Mi",
},
)
except clickhouse_driver.errors.Error as e: except clickhouse_driver.errors.Error as e:
# Add query id to the exception to make debugging easier. # Add query id to the exception to make debugging easier.
e.args = (prewarm_id, *e.args) e.args = (prewarm_id, *e.args)
e.message = prewarm_id + ': ' + e.message e.message = prewarm_id + ": " + e.message
raise raise
print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}') print(
f"prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}"
)
except KeyboardInterrupt: except KeyboardInterrupt:
raise raise
except: except:
# FIXME the driver reconnects on error and we lose settings, so this # FIXME the driver reconnects on error and we lose settings, so this
# might lead to further errors or unexpected behavior. # might lead to further errors or unexpected behavior.
query_error_on_connection[conn_index] = traceback.format_exc(); query_error_on_connection[conn_index] = traceback.format_exc()
continue continue
# Report all errors that ocurred during prewarm and decide what to do next. # Report all errors that ocurred during prewarm and decide what to do next.
@ -311,14 +399,14 @@ for query_index in queries_to_run:
no_errors = [] no_errors = []
for i, e in enumerate(query_error_on_connection): for i, e in enumerate(query_error_on_connection):
if e: if e:
print(e, file = sys.stderr) print(e, file=sys.stderr)
else: else:
no_errors.append(i) no_errors.append(i)
if len(no_errors) == 0: if len(no_errors) == 0:
continue continue
elif len(no_errors) < len(all_connections): elif len(no_errors) < len(all_connections):
print(f'partial\t{query_index}\t{no_errors}') print(f"partial\t{query_index}\t{no_errors}")
this_query_connections = [all_connections[index] for index in no_errors] this_query_connections = [all_connections[index] for index in no_errors]
@ -337,27 +425,34 @@ for query_index in queries_to_run:
all_server_times.append([]) all_server_times.append([])
while True: while True:
run_id = f'{query_prefix}.run{run}' run_id = f"{query_prefix}.run{run}"
for conn_index, c in enumerate(this_query_connections): for conn_index, c in enumerate(this_query_connections):
try: try:
res = c.execute(q, query_id = run_id, settings = {'max_execution_time': args.max_query_seconds}) res = c.execute(
q,
query_id=run_id,
settings={"max_execution_time": args.max_query_seconds},
)
except clickhouse_driver.errors.Error as e: except clickhouse_driver.errors.Error as e:
# Add query id to the exception to make debugging easier. # Add query id to the exception to make debugging easier.
e.args = (run_id, *e.args) e.args = (run_id, *e.args)
e.message = run_id + ': ' + e.message e.message = run_id + ": " + e.message
raise raise
elapsed = c.last_query.elapsed elapsed = c.last_query.elapsed
all_server_times[conn_index].append(elapsed) all_server_times[conn_index].append(elapsed)
server_seconds += elapsed server_seconds += elapsed
print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}') print(f"query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}")
if elapsed > args.max_query_seconds: if elapsed > args.max_query_seconds:
# Do not stop processing pathologically slow queries, # Do not stop processing pathologically slow queries,
# since this may hide errors in other queries. # since this may hide errors in other queries.
print(f'The query no. {query_index} is taking too long to run ({elapsed} s)', file=sys.stderr) print(
f"The query no. {query_index} is taking too long to run ({elapsed} s)",
file=sys.stderr,
)
# Be careful with the counter, after this line it's the next iteration # Be careful with the counter, after this line it's the next iteration
# already. # already.
@ -386,7 +481,7 @@ for query_index in queries_to_run:
break break
client_seconds = time.perf_counter() - start_seconds client_seconds = time.perf_counter() - start_seconds
print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}') print(f"client-time\t{query_index}\t{client_seconds}\t{server_seconds}")
# Run additional profiling queries to collect profile data, but only if test times appeared to be different. # Run additional profiling queries to collect profile data, but only if test times appeared to be different.
# We have to do it after normal runs because otherwise it will affect test statistics too much # We have to do it after normal runs because otherwise it will affect test statistics too much
@ -397,13 +492,15 @@ for query_index in queries_to_run:
# Don't fail if for some reason there are not enough measurements. # Don't fail if for some reason there are not enough measurements.
continue continue
pvalue = stats.ttest_ind(all_server_times[0], all_server_times[1], equal_var = False).pvalue pvalue = stats.ttest_ind(
all_server_times[0], all_server_times[1], equal_var=False
).pvalue
median = [statistics.median(t) for t in all_server_times] median = [statistics.median(t) for t in all_server_times]
# Keep this consistent with the value used in report. Should eventually move # Keep this consistent with the value used in report. Should eventually move
# to (median[1] - median[0]) / min(median), which is compatible with "times" # to (median[1] - median[0]) / min(median), which is compatible with "times"
# difference we use in report (max(median) / min(median)). # difference we use in report (max(median) / min(median)).
relative_diff = (median[1] - median[0]) / median[0] relative_diff = (median[1] - median[0]) / median[0]
print(f'diff\t{query_index}\t{median[0]}\t{median[1]}\t{relative_diff}\t{pvalue}') print(f"diff\t{query_index}\t{median[0]}\t{median[1]}\t{relative_diff}\t{pvalue}")
if abs(relative_diff) < ignored_relative_change or pvalue > 0.05: if abs(relative_diff) < ignored_relative_change or pvalue > 0.05:
continue continue
@ -412,25 +509,31 @@ for query_index in queries_to_run:
profile_start_seconds = time.perf_counter() profile_start_seconds = time.perf_counter()
run = 0 run = 0
while time.perf_counter() - profile_start_seconds < args.profile_seconds: while time.perf_counter() - profile_start_seconds < args.profile_seconds:
run_id = f'{query_prefix}.profile{run}' run_id = f"{query_prefix}.profile{run}"
for conn_index, c in enumerate(this_query_connections): for conn_index, c in enumerate(this_query_connections):
try: try:
res = c.execute(q, query_id = run_id, settings = {'query_profiler_real_time_period_ns': 10000000}) res = c.execute(
print(f'profile\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}') q,
query_id=run_id,
settings={"query_profiler_real_time_period_ns": 10000000},
)
print(
f"profile\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}"
)
except clickhouse_driver.errors.Error as e: except clickhouse_driver.errors.Error as e:
# Add query id to the exception to make debugging easier. # Add query id to the exception to make debugging easier.
e.args = (run_id, *e.args) e.args = (run_id, *e.args)
e.message = run_id + ': ' + e.message e.message = run_id + ": " + e.message
raise raise
run += 1 run += 1
profile_total_seconds += time.perf_counter() - profile_start_seconds profile_total_seconds += time.perf_counter() - profile_start_seconds
print(f'profile-total\t{profile_total_seconds}') print(f"profile-total\t{profile_total_seconds}")
reportStageEnd('run') reportStageEnd("run")
# Run drop queries # Run drop queries
if not args.keep_created_tables and not args.use_existing_tables: if not args.keep_created_tables and not args.use_existing_tables:
@ -438,6 +541,6 @@ if not args.keep_created_tables and not args.use_existing_tables:
for conn_index, c in enumerate(all_connections): for conn_index, c in enumerate(all_connections):
for q in drop_queries: for q in drop_queries:
c.execute(q) c.execute(q)
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}') print(f"drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}")
reportStageEnd('drop-2') reportStageEnd("drop-2")

View File

@ -12,9 +12,13 @@ import pprint
import sys import sys
import traceback import traceback
parser = argparse.ArgumentParser(description='Create performance test report') parser = argparse.ArgumentParser(description="Create performance test report")
parser.add_argument('--report', default='main', choices=['main', 'all-queries'], parser.add_argument(
help='Which report to build') "--report",
default="main",
choices=["main", "all-queries"],
help="Which report to build",
)
args = parser.parse_args() args = parser.parse_args()
tables = [] tables = []
@ -31,8 +35,8 @@ unstable_partial_queries = 0
# max seconds to run one query by itself, not counting preparation # max seconds to run one query by itself, not counting preparation
allowed_single_run_time = 2 allowed_single_run_time = 2
color_bad='#ffb0c0' color_bad = "#ffb0c0"
color_good='#b0d050' color_good = "#b0d050"
header_template = """ header_template = """
<!DOCTYPE html> <!DOCTYPE html>
@ -151,24 +155,29 @@ tr:nth-child(odd) td {{filter: brightness(90%);}}
table_anchor = 0 table_anchor = 0
row_anchor = 0 row_anchor = 0
def currentTableAnchor(): def currentTableAnchor():
global table_anchor global table_anchor
return f'{table_anchor}' return f"{table_anchor}"
def newTableAnchor(): def newTableAnchor():
global table_anchor global table_anchor
table_anchor += 1 table_anchor += 1
return currentTableAnchor() return currentTableAnchor()
def currentRowAnchor(): def currentRowAnchor():
global row_anchor global row_anchor
global table_anchor global table_anchor
return f'{table_anchor}.{row_anchor}' return f"{table_anchor}.{row_anchor}"
def nextRowAnchor(): def nextRowAnchor():
global row_anchor global row_anchor
global table_anchor global table_anchor
return f'{table_anchor}.{row_anchor + 1}' return f"{table_anchor}.{row_anchor + 1}"
def advanceRowAnchor(): def advanceRowAnchor():
global row_anchor global row_anchor
@ -178,43 +187,58 @@ def advanceRowAnchor():
def tr(x, anchor=None): def tr(x, anchor=None):
#return '<tr onclick="location.href=\'#{a}\'" id={a}>{x}</tr>'.format(a=a, x=str(x)) # return '<tr onclick="location.href=\'#{a}\'" id={a}>{x}</tr>'.format(a=a, x=str(x))
anchor = anchor if anchor else advanceRowAnchor() anchor = anchor if anchor else advanceRowAnchor()
return f'<tr id={anchor}>{x}</tr>' return f"<tr id={anchor}>{x}</tr>"
def td(value, cell_attributes = ''):
return '<td {cell_attributes}>{value}</td>'.format(
cell_attributes = cell_attributes,
value = value)
def th(value, cell_attributes = ''): def td(value, cell_attributes=""):
return '<th {cell_attributes}>{value}</th>'.format( return "<td {cell_attributes}>{value}</td>".format(
cell_attributes = cell_attributes, cell_attributes=cell_attributes, value=value
value = value) )
def tableRow(cell_values, cell_attributes = [], anchor=None):
def th(value, cell_attributes=""):
return "<th {cell_attributes}>{value}</th>".format(
cell_attributes=cell_attributes, value=value
)
def tableRow(cell_values, cell_attributes=[], anchor=None):
return tr( return tr(
''.join([td(v, a) "".join(
for v, a in itertools.zip_longest( [
cell_values, cell_attributes, td(v, a)
fillvalue = '') for v, a in itertools.zip_longest(
if a is not None and v is not None]), cell_values, cell_attributes, fillvalue=""
anchor) )
if a is not None and v is not None
]
),
anchor,
)
def tableHeader(cell_values, cell_attributes = []):
def tableHeader(cell_values, cell_attributes=[]):
return tr( return tr(
''.join([th(v, a) "".join(
for v, a in itertools.zip_longest( [
cell_values, cell_attributes, th(v, a)
fillvalue = '') for v, a in itertools.zip_longest(
if a is not None and v is not None])) cell_values, cell_attributes, fillvalue=""
)
if a is not None and v is not None
]
)
)
def tableStart(title): def tableStart(title):
cls = '-'.join(title.lower().split(' ')[:3]); cls = "-".join(title.lower().split(" ")[:3])
global table_anchor global table_anchor
table_anchor = cls table_anchor = cls
anchor = currentTableAnchor() anchor = currentTableAnchor()
help_anchor = '-'.join(title.lower().split(' ')); help_anchor = "-".join(title.lower().split(" "))
return f""" return f"""
<h2 id="{anchor}"> <h2 id="{anchor}">
<a class="cancela" href="#{anchor}">{title}</a> <a class="cancela" href="#{anchor}">{title}</a>
@ -223,12 +247,14 @@ def tableStart(title):
<table class="{cls}"> <table class="{cls}">
""" """
def tableEnd(): def tableEnd():
return '</table>' return "</table>"
def tsvRows(n): def tsvRows(n):
try: try:
with open(n, encoding='utf-8') as fd: with open(n, encoding="utf-8") as fd:
result = [] result = []
for row in csv.reader(fd, delimiter="\t", quoting=csv.QUOTE_NONE): for row in csv.reader(fd, delimiter="\t", quoting=csv.QUOTE_NONE):
new_row = [] new_row = []
@ -237,27 +263,32 @@ def tsvRows(n):
# The second one (encode('latin1').decode('utf-8')) fixes the changes with unicode vs utf-8 chars, so # The second one (encode('latin1').decode('utf-8')) fixes the changes with unicode vs utf-8 chars, so
# 'Чем зÐ<C2B7>нимаеÑ<C2B5>ЬÑ<C2AC>Ñ<EFBFBD>' is transformed back into 'Чем зАнимаешЬся'. # 'Чем зÐ<C2B7>нимаеÑ<C2B5>ЬÑ<C2AC>Ñ<EFBFBD>' is transformed back into 'Чем зАнимаешЬся'.
new_row.append(e.encode('utf-8').decode('unicode-escape').encode('latin1').decode('utf-8')) new_row.append(
e.encode("utf-8")
.decode("unicode-escape")
.encode("latin1")
.decode("utf-8")
)
result.append(new_row) result.append(new_row)
return result return result
except: except:
report_errors.append( report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1])
traceback.format_exception_only(
*sys.exc_info()[:2])[-1])
pass pass
return [] return []
def htmlRows(n): def htmlRows(n):
rawRows = tsvRows(n) rawRows = tsvRows(n)
result = '' result = ""
for row in rawRows: for row in rawRows:
result += tableRow(row) result += tableRow(row)
return result return result
def addSimpleTable(caption, columns, rows, pos=None): def addSimpleTable(caption, columns, rows, pos=None):
global tables global tables
text = '' text = ""
if not rows: if not rows:
return return
@ -268,51 +299,63 @@ def addSimpleTable(caption, columns, rows, pos=None):
text += tableEnd() text += tableEnd()
tables.insert(pos if pos else len(tables), text) tables.insert(pos if pos else len(tables), text)
def add_tested_commits(): def add_tested_commits():
global report_errors global report_errors
try: try:
addSimpleTable('Tested Commits', ['Old', 'New'], addSimpleTable(
[['<pre>{}</pre>'.format(x) for x in "Tested Commits",
[open('left-commit.txt').read(), ["Old", "New"],
open('right-commit.txt').read()]]]) [
[
"<pre>{}</pre>".format(x)
for x in [
open("left-commit.txt").read(),
open("right-commit.txt").read(),
]
]
],
)
except: except:
# Don't fail if no commit info -- maybe it's a manual run. # Don't fail if no commit info -- maybe it's a manual run.
report_errors.append( report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1])
traceback.format_exception_only(
*sys.exc_info()[:2])[-1])
pass pass
def add_report_errors(): def add_report_errors():
global tables global tables
global report_errors global report_errors
# Add the errors reported by various steps of comparison script # Add the errors reported by various steps of comparison script
try: try:
report_errors += [l.strip() for l in open('report/errors.log')] report_errors += [l.strip() for l in open("report/errors.log")]
except: except:
report_errors.append( report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1])
traceback.format_exception_only(
*sys.exc_info()[:2])[-1])
pass pass
if not report_errors: if not report_errors:
return return
text = tableStart('Errors while Building the Report') text = tableStart("Errors while Building the Report")
text += tableHeader(['Error']) text += tableHeader(["Error"])
for x in report_errors: for x in report_errors:
text += tableRow([x]) text += tableRow([x])
text += tableEnd() text += tableEnd()
# Insert after Tested Commits # Insert after Tested Commits
tables.insert(1, text) tables.insert(1, text)
errors_explained.append([f'<a href="#{currentTableAnchor()}">There were some errors while building the report</a>']); errors_explained.append(
[
f'<a href="#{currentTableAnchor()}">There were some errors while building the report</a>'
]
)
def add_errors_explained(): def add_errors_explained():
if not errors_explained: if not errors_explained:
return return
text = '<a name="fail1"/>' text = '<a name="fail1"/>'
text += tableStart('Error Summary') text += tableStart("Error Summary")
text += tableHeader(['Description']) text += tableHeader(["Description"])
for row in errors_explained: for row in errors_explained:
text += tableRow(row) text += tableRow(row)
text += tableEnd() text += tableEnd()
@ -321,59 +364,81 @@ def add_errors_explained():
tables.insert(1, text) tables.insert(1, text)
if args.report == 'main': if args.report == "main":
print((header_template.format())) print((header_template.format()))
add_tested_commits() add_tested_commits()
run_error_rows = tsvRows("run-errors.tsv")
run_error_rows = tsvRows('run-errors.tsv')
error_tests += len(run_error_rows) error_tests += len(run_error_rows)
addSimpleTable('Run Errors', ['Test', 'Error'], run_error_rows) addSimpleTable("Run Errors", ["Test", "Error"], run_error_rows)
if run_error_rows: if run_error_rows:
errors_explained.append([f'<a href="#{currentTableAnchor()}">There were some errors while running the tests</a>']); errors_explained.append(
[
f'<a href="#{currentTableAnchor()}">There were some errors while running the tests</a>'
]
)
slow_on_client_rows = tsvRows("report/slow-on-client.tsv")
slow_on_client_rows = tsvRows('report/slow-on-client.tsv')
error_tests += len(slow_on_client_rows) error_tests += len(slow_on_client_rows)
addSimpleTable('Slow on Client', addSimpleTable(
['Client time,&nbsp;s', 'Server time,&nbsp;s', 'Ratio', 'Test', 'Query'], "Slow on Client",
slow_on_client_rows) ["Client time,&nbsp;s", "Server time,&nbsp;s", "Ratio", "Test", "Query"],
slow_on_client_rows,
)
if slow_on_client_rows: if slow_on_client_rows:
errors_explained.append([f'<a href="#{currentTableAnchor()}">Some queries are taking noticeable time client-side (missing `FORMAT Null`?)</a>']); errors_explained.append(
[
f'<a href="#{currentTableAnchor()}">Some queries are taking noticeable time client-side (missing `FORMAT Null`?)</a>'
]
)
unmarked_short_rows = tsvRows('report/unexpected-query-duration.tsv') unmarked_short_rows = tsvRows("report/unexpected-query-duration.tsv")
error_tests += len(unmarked_short_rows) error_tests += len(unmarked_short_rows)
addSimpleTable('Unexpected Query Duration', addSimpleTable(
['Problem', 'Marked as "short"?', 'Run time, s', 'Test', '#', 'Query'], "Unexpected Query Duration",
unmarked_short_rows) ["Problem", 'Marked as "short"?', "Run time, s", "Test", "#", "Query"],
unmarked_short_rows,
)
if unmarked_short_rows: if unmarked_short_rows:
errors_explained.append([f'<a href="#{currentTableAnchor()}">Some queries have unexpected duration</a>']); errors_explained.append(
[
f'<a href="#{currentTableAnchor()}">Some queries have unexpected duration</a>'
]
)
def add_partial(): def add_partial():
rows = tsvRows('report/partial-queries-report.tsv') rows = tsvRows("report/partial-queries-report.tsv")
if not rows: if not rows:
return return
global unstable_partial_queries, slow_average_tests, tables global unstable_partial_queries, slow_average_tests, tables
text = tableStart('Partial Queries') text = tableStart("Partial Queries")
columns = ['Median time, s', 'Relative time variance', 'Test', '#', 'Query'] columns = ["Median time, s", "Relative time variance", "Test", "#", "Query"]
text += tableHeader(columns) text += tableHeader(columns)
attrs = ['' for c in columns] attrs = ["" for c in columns]
for row in rows: for row in rows:
anchor = f'{currentTableAnchor()}.{row[2]}.{row[3]}' anchor = f"{currentTableAnchor()}.{row[2]}.{row[3]}"
if float(row[1]) > 0.10: if float(row[1]) > 0.10:
attrs[1] = f'style="background: {color_bad}"' attrs[1] = f'style="background: {color_bad}"'
unstable_partial_queries += 1 unstable_partial_queries += 1
errors_explained.append([f'<a href="#{anchor}">The query no. {row[3]} of test \'{row[2]}\' has excessive variance of run time. Keep it below 10%</a>']) errors_explained.append(
[
f"<a href=\"#{anchor}\">The query no. {row[3]} of test '{row[2]}' has excessive variance of run time. Keep it below 10%</a>"
]
)
else: else:
attrs[1] = '' attrs[1] = ""
if float(row[0]) > allowed_single_run_time: if float(row[0]) > allowed_single_run_time:
attrs[0] = f'style="background: {color_bad}"' attrs[0] = f'style="background: {color_bad}"'
errors_explained.append([f'<a href="#{anchor}">The query no. {row[3]} of test \'{row[2]}\' is taking too long to run. Keep the run time below {allowed_single_run_time} seconds"</a>']) errors_explained.append(
[
f'<a href="#{anchor}">The query no. {row[3]} of test \'{row[2]}\' is taking too long to run. Keep the run time below {allowed_single_run_time} seconds"</a>'
]
)
slow_average_tests += 1 slow_average_tests += 1
else: else:
attrs[0] = '' attrs[0] = ""
text += tableRow(row, attrs, anchor) text += tableRow(row, attrs, anchor)
text += tableEnd() text += tableEnd()
tables.append(text) tables.append(text)
@ -381,41 +446,45 @@ if args.report == 'main':
add_partial() add_partial()
def add_changes(): def add_changes():
rows = tsvRows('report/changed-perf.tsv') rows = tsvRows("report/changed-perf.tsv")
if not rows: if not rows:
return return
global faster_queries, slower_queries, tables global faster_queries, slower_queries, tables
text = tableStart('Changes in Performance') text = tableStart("Changes in Performance")
columns = [ columns = [
'Old,&nbsp;s', # 0 "Old,&nbsp;s", # 0
'New,&nbsp;s', # 1 "New,&nbsp;s", # 1
'Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)', # 2 "Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)", # 2
'Relative difference (new&nbsp;&minus;&nbsp;old) / old', # 3 "Relative difference (new&nbsp;&minus;&nbsp;old) / old", # 3
'p&nbsp;<&nbsp;0.01 threshold', # 4 "p&nbsp;<&nbsp;0.01 threshold", # 4
'', # Failed # 5 "", # Failed # 5
'Test', # 6 "Test", # 6
'#', # 7 "#", # 7
'Query', # 8 "Query", # 8
] ]
attrs = ['' for c in columns] attrs = ["" for c in columns]
attrs[5] = None attrs[5] = None
text += tableHeader(columns, attrs) text += tableHeader(columns, attrs)
for row in rows: for row in rows:
anchor = f'{currentTableAnchor()}.{row[6]}.{row[7]}' anchor = f"{currentTableAnchor()}.{row[6]}.{row[7]}"
if int(row[5]): if int(row[5]):
if float(row[3]) < 0.: if float(row[3]) < 0.0:
faster_queries += 1 faster_queries += 1
attrs[2] = attrs[3] = f'style="background: {color_good}"' attrs[2] = attrs[3] = f'style="background: {color_good}"'
else: else:
slower_queries += 1 slower_queries += 1
attrs[2] = attrs[3] = f'style="background: {color_bad}"' attrs[2] = attrs[3] = f'style="background: {color_bad}"'
errors_explained.append([f'<a href="#{anchor}">The query no. {row[7]} of test \'{row[6]}\' has slowed down</a>']) errors_explained.append(
[
f"<a href=\"#{anchor}\">The query no. {row[7]} of test '{row[6]}' has slowed down</a>"
]
)
else: else:
attrs[2] = attrs[3] = '' attrs[2] = attrs[3] = ""
text += tableRow(row, attrs, anchor) text += tableRow(row, attrs, anchor)
@ -427,35 +496,35 @@ if args.report == 'main':
def add_unstable_queries(): def add_unstable_queries():
global unstable_queries, very_unstable_queries, tables global unstable_queries, very_unstable_queries, tables
unstable_rows = tsvRows('report/unstable-queries.tsv') unstable_rows = tsvRows("report/unstable-queries.tsv")
if not unstable_rows: if not unstable_rows:
return return
unstable_queries += len(unstable_rows) unstable_queries += len(unstable_rows)
columns = [ columns = [
'Old,&nbsp;s', #0 "Old,&nbsp;s", # 0
'New,&nbsp;s', #1 "New,&nbsp;s", # 1
'Relative difference (new&nbsp;-&nbsp;old)/old', #2 "Relative difference (new&nbsp;-&nbsp;old)/old", # 2
'p&nbsp;&lt;&nbsp;0.01 threshold', #3 "p&nbsp;&lt;&nbsp;0.01 threshold", # 3
'', # Failed #4 "", # Failed #4
'Test', #5 "Test", # 5
'#', #6 "#", # 6
'Query' #7 "Query", # 7
] ]
attrs = ['' for c in columns] attrs = ["" for c in columns]
attrs[4] = None attrs[4] = None
text = tableStart('Unstable Queries') text = tableStart("Unstable Queries")
text += tableHeader(columns, attrs) text += tableHeader(columns, attrs)
for r in unstable_rows: for r in unstable_rows:
anchor = f'{currentTableAnchor()}.{r[5]}.{r[6]}' anchor = f"{currentTableAnchor()}.{r[5]}.{r[6]}"
if int(r[4]): if int(r[4]):
very_unstable_queries += 1 very_unstable_queries += 1
attrs[3] = f'style="background: {color_bad}"' attrs[3] = f'style="background: {color_bad}"'
else: else:
attrs[3] = '' attrs[3] = ""
# Just don't add the slightly unstable queries we don't consider # Just don't add the slightly unstable queries we don't consider
# errors. It's not clear what the user should do with them. # errors. It's not clear what the user should do with them.
continue continue
@ -470,53 +539,70 @@ if args.report == 'main':
add_unstable_queries() add_unstable_queries()
skipped_tests_rows = tsvRows('analyze/skipped-tests.tsv') skipped_tests_rows = tsvRows("analyze/skipped-tests.tsv")
addSimpleTable('Skipped Tests', ['Test', 'Reason'], skipped_tests_rows) addSimpleTable("Skipped Tests", ["Test", "Reason"], skipped_tests_rows)
addSimpleTable('Test Performance Changes', addSimpleTable(
['Test', 'Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)', 'Queries', 'Total not OK', 'Changed perf', 'Unstable'], "Test Performance Changes",
tsvRows('report/test-perf-changes.tsv')) [
"Test",
"Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)",
"Queries",
"Total not OK",
"Changed perf",
"Unstable",
],
tsvRows("report/test-perf-changes.tsv"),
)
def add_test_times(): def add_test_times():
global slow_average_tests, tables global slow_average_tests, tables
rows = tsvRows('report/test-times.tsv') rows = tsvRows("report/test-times.tsv")
if not rows: if not rows:
return return
columns = [ columns = [
'Test', #0 "Test", # 0
'Wall clock time, entire test,&nbsp;s', #1 "Wall clock time, entire test,&nbsp;s", # 1
'Total client time for measured query runs,&nbsp;s', #2 "Total client time for measured query runs,&nbsp;s", # 2
'Queries', #3 "Queries", # 3
'Longest query, total for measured runs,&nbsp;s', #4 "Longest query, total for measured runs,&nbsp;s", # 4
'Wall clock time per query,&nbsp;s', #5 "Wall clock time per query,&nbsp;s", # 5
'Shortest query, total for measured runs,&nbsp;s', #6 "Shortest query, total for measured runs,&nbsp;s", # 6
'', # Runs #7 "", # Runs #7
] ]
attrs = ['' for c in columns] attrs = ["" for c in columns]
attrs[7] = None attrs[7] = None
text = tableStart('Test Times') text = tableStart("Test Times")
text += tableHeader(columns, attrs) text += tableHeader(columns, attrs)
allowed_average_run_time = 3.75 # 60 seconds per test at (7 + 1) * 2 runs allowed_average_run_time = 3.75 # 60 seconds per test at (7 + 1) * 2 runs
for r in rows: for r in rows:
anchor = f'{currentTableAnchor()}.{r[0]}' anchor = f"{currentTableAnchor()}.{r[0]}"
total_runs = (int(r[7]) + 1) * 2 # one prewarm run, two servers total_runs = (int(r[7]) + 1) * 2 # one prewarm run, two servers
if r[0] != 'Total' and float(r[5]) > allowed_average_run_time * total_runs: if r[0] != "Total" and float(r[5]) > allowed_average_run_time * total_runs:
# FIXME should be 15s max -- investigate parallel_insert # FIXME should be 15s max -- investigate parallel_insert
slow_average_tests += 1 slow_average_tests += 1
attrs[5] = f'style="background: {color_bad}"' attrs[5] = f'style="background: {color_bad}"'
errors_explained.append([f'<a href="#{anchor}">The test \'{r[0]}\' is too slow to run as a whole. Investigate whether the create and fill queries can be sped up']) errors_explained.append(
[
f"<a href=\"#{anchor}\">The test '{r[0]}' is too slow to run as a whole. Investigate whether the create and fill queries can be sped up"
]
)
else: else:
attrs[5] = '' attrs[5] = ""
if r[0] != 'Total' and float(r[4]) > allowed_single_run_time * total_runs: if r[0] != "Total" and float(r[4]) > allowed_single_run_time * total_runs:
slow_average_tests += 1 slow_average_tests += 1
attrs[4] = f'style="background: {color_bad}"' attrs[4] = f'style="background: {color_bad}"'
errors_explained.append([f'<a href="./all-queries.html#all-query-times.{r[0]}.0">Some query of the test \'{r[0]}\' is too slow to run. See the all queries report']) errors_explained.append(
[
f"<a href=\"./all-queries.html#all-query-times.{r[0]}.0\">Some query of the test '{r[0]}' is too slow to run. See the all queries report"
]
)
else: else:
attrs[4] = '' attrs[4] = ""
text += tableRow(r, attrs, anchor) text += tableRow(r, attrs, anchor)
@ -525,10 +611,17 @@ if args.report == 'main':
add_test_times() add_test_times()
addSimpleTable('Metric Changes', addSimpleTable(
['Metric', 'Old median value', 'New median value', "Metric Changes",
'Relative difference', 'Times difference'], [
tsvRows('metrics/changes.tsv')) "Metric",
"Old median value",
"New median value",
"Relative difference",
"Times difference",
],
tsvRows("metrics/changes.tsv"),
)
add_report_errors() add_report_errors()
add_errors_explained() add_errors_explained()
@ -536,7 +629,8 @@ if args.report == 'main':
for t in tables: for t in tables:
print(t) print(t)
print(f""" print(
f"""
</div> </div>
<p class="links"> <p class="links">
<a href="all-queries.html">All queries</a> <a href="all-queries.html">All queries</a>
@ -546,104 +640,111 @@ if args.report == 'main':
</p> </p>
</body> </body>
</html> </html>
""") """
)
status = 'success' status = "success"
message = 'See the report' message = "See the report"
message_array = [] message_array = []
if slow_average_tests: if slow_average_tests:
status = 'failure' status = "failure"
message_array.append(str(slow_average_tests) + ' too long') message_array.append(str(slow_average_tests) + " too long")
if faster_queries: if faster_queries:
message_array.append(str(faster_queries) + ' faster') message_array.append(str(faster_queries) + " faster")
if slower_queries: if slower_queries:
if slower_queries > 3: if slower_queries > 3:
status = 'failure' status = "failure"
message_array.append(str(slower_queries) + ' slower') message_array.append(str(slower_queries) + " slower")
if unstable_partial_queries: if unstable_partial_queries:
very_unstable_queries += unstable_partial_queries very_unstable_queries += unstable_partial_queries
status = 'failure' status = "failure"
# Don't show mildly unstable queries, only the very unstable ones we # Don't show mildly unstable queries, only the very unstable ones we
# treat as errors. # treat as errors.
if very_unstable_queries: if very_unstable_queries:
if very_unstable_queries > 5: if very_unstable_queries > 5:
error_tests += very_unstable_queries error_tests += very_unstable_queries
status = 'failure' status = "failure"
message_array.append(str(very_unstable_queries) + ' unstable') message_array.append(str(very_unstable_queries) + " unstable")
error_tests += slow_average_tests error_tests += slow_average_tests
if error_tests: if error_tests:
status = 'failure' status = "failure"
message_array.insert(0, str(error_tests) + ' errors') message_array.insert(0, str(error_tests) + " errors")
if message_array: if message_array:
message = ', '.join(message_array) message = ", ".join(message_array)
if report_errors: if report_errors:
status = 'failure' status = "failure"
message = 'Errors while building the report.' message = "Errors while building the report."
print((""" print(
(
"""
<!--status: {status}--> <!--status: {status}-->
<!--message: {message}--> <!--message: {message}-->
""".format(status=status, message=message))) """.format(
status=status, message=message
)
)
)
elif args.report == 'all-queries': elif args.report == "all-queries":
print((header_template.format())) print((header_template.format()))
add_tested_commits() add_tested_commits()
def add_all_queries(): def add_all_queries():
rows = tsvRows('report/all-queries.tsv') rows = tsvRows("report/all-queries.tsv")
if not rows: if not rows:
return return
columns = [ columns = [
'', # Changed #0 "", # Changed #0
'', # Unstable #1 "", # Unstable #1
'Old,&nbsp;s', #2 "Old,&nbsp;s", # 2
'New,&nbsp;s', #3 "New,&nbsp;s", # 3
'Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)', #4 "Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)", # 4
'Relative difference (new&nbsp;&minus;&nbsp;old) / old', #5 "Relative difference (new&nbsp;&minus;&nbsp;old) / old", # 5
'p&nbsp;&lt;&nbsp;0.01 threshold', #6 "p&nbsp;&lt;&nbsp;0.01 threshold", # 6
'Test', #7 "Test", # 7
'#', #8 "#", # 8
'Query', #9 "Query", # 9
] ]
attrs = ['' for c in columns] attrs = ["" for c in columns]
attrs[0] = None attrs[0] = None
attrs[1] = None attrs[1] = None
text = tableStart('All Query Times') text = tableStart("All Query Times")
text += tableHeader(columns, attrs) text += tableHeader(columns, attrs)
for r in rows: for r in rows:
anchor = f'{currentTableAnchor()}.{r[7]}.{r[8]}' anchor = f"{currentTableAnchor()}.{r[7]}.{r[8]}"
if int(r[1]): if int(r[1]):
attrs[6] = f'style="background: {color_bad}"' attrs[6] = f'style="background: {color_bad}"'
else: else:
attrs[6] = '' attrs[6] = ""
if int(r[0]): if int(r[0]):
if float(r[5]) > 0.: if float(r[5]) > 0.0:
attrs[4] = attrs[5] = f'style="background: {color_bad}"' attrs[4] = attrs[5] = f'style="background: {color_bad}"'
else: else:
attrs[4] = attrs[5] = f'style="background: {color_good}"' attrs[4] = attrs[5] = f'style="background: {color_good}"'
else: else:
attrs[4] = attrs[5] = '' attrs[4] = attrs[5] = ""
if (float(r[2]) + float(r[3])) / 2 > allowed_single_run_time: if (float(r[2]) + float(r[3])) / 2 > allowed_single_run_time:
attrs[2] = f'style="background: {color_bad}"' attrs[2] = f'style="background: {color_bad}"'
attrs[3] = f'style="background: {color_bad}"' attrs[3] = f'style="background: {color_bad}"'
else: else:
attrs[2] = '' attrs[2] = ""
attrs[3] = '' attrs[3] = ""
text += tableRow(r, attrs, anchor) text += tableRow(r, attrs, anchor)
@ -655,7 +756,8 @@ elif args.report == 'all-queries':
for t in tables: for t in tables:
print(t) print(t)
print(f""" print(
f"""
</div> </div>
<p class="links"> <p class="links">
<a href="report.html">Main report</a> <a href="report.html">Main report</a>
@ -665,4 +767,5 @@ elif args.report == 'all-queries':
</p> </p>
</body> </body>
</html> </html>
""") """
)

View File

@ -7,18 +7,19 @@ import csv
RESULT_LOG_NAME = "run.log" RESULT_LOG_NAME = "run.log"
def process_result(result_folder): def process_result(result_folder):
status = "success" status = "success"
description = 'Server started and responded' description = "Server started and responded"
summary = [("Smoke test", "OK")] summary = [("Smoke test", "OK")]
with open(os.path.join(result_folder, RESULT_LOG_NAME), 'r') as run_log: with open(os.path.join(result_folder, RESULT_LOG_NAME), "r") as run_log:
lines = run_log.read().split('\n') lines = run_log.read().split("\n")
if not lines or lines[0].strip() != 'OK': if not lines or lines[0].strip() != "OK":
status = "failure" status = "failure"
logging.info("Lines is not ok: %s", str('\n'.join(lines))) logging.info("Lines is not ok: %s", str("\n".join(lines)))
summary = [("Smoke test", "FAIL")] summary = [("Smoke test", "FAIL")]
description = 'Server failed to respond, see result in logs' description = "Server failed to respond, see result in logs"
result_logs = [] result_logs = []
server_log_path = os.path.join(result_folder, "clickhouse-server.log") server_log_path = os.path.join(result_folder, "clickhouse-server.log")
@ -38,20 +39,22 @@ def process_result(result_folder):
def write_results(results_file, status_file, results, status): def write_results(results_file, status_file, results, status):
with open(results_file, 'w') as f: with open(results_file, "w") as f:
out = csv.writer(f, delimiter='\t') out = csv.writer(f, delimiter="\t")
out.writerows(results) out.writerows(results)
with open(status_file, 'w') as f: with open(status_file, "w") as f:
out = csv.writer(f, delimiter='\t') out = csv.writer(f, delimiter="\t")
out.writerow(status) out.writerow(status)
if __name__ == "__main__": if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of split build smoke test") parser = argparse.ArgumentParser(
parser.add_argument("--in-results-dir", default='/test_output/') description="ClickHouse script for parsing results of split build smoke test"
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv') )
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv') parser.add_argument("--in-results-dir", default="/test_output/")
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
args = parser.parse_args() args = parser.parse_args()
state, description, test_results, logs = process_result(args.in_results_dir) state, description, test_results, logs = process_result(args.in_results_dir)

View File

@ -10,11 +10,18 @@ def process_result(result_folder):
status = "success" status = "success"
summary = [] summary = []
paths = [] paths = []
tests = ["TLPWhere", "TLPGroupBy", "TLPHaving", "TLPWhereGroupBy", "TLPDistinct", "TLPAggregate"] tests = [
"TLPWhere",
"TLPGroupBy",
"TLPHaving",
"TLPWhereGroupBy",
"TLPDistinct",
"TLPAggregate",
]
for test in tests: for test in tests:
err_path = '{}/{}.err'.format(result_folder, test) err_path = "{}/{}.err".format(result_folder, test)
out_path = '{}/{}.out'.format(result_folder, test) out_path = "{}/{}.out".format(result_folder, test)
if not os.path.exists(err_path): if not os.path.exists(err_path):
logging.info("No output err on path %s", err_path) logging.info("No output err on path %s", err_path)
summary.append((test, "SKIPPED")) summary.append((test, "SKIPPED"))
@ -23,24 +30,24 @@ def process_result(result_folder):
else: else:
paths.append(err_path) paths.append(err_path)
paths.append(out_path) paths.append(out_path)
with open(err_path, 'r') as f: with open(err_path, "r") as f:
if 'AssertionError' in f.read(): if "AssertionError" in f.read():
summary.append((test, "FAIL")) summary.append((test, "FAIL"))
status = 'failure' status = "failure"
else: else:
summary.append((test, "OK")) summary.append((test, "OK"))
logs_path = '{}/logs.tar.gz'.format(result_folder) logs_path = "{}/logs.tar.gz".format(result_folder)
if not os.path.exists(logs_path): if not os.path.exists(logs_path):
logging.info("No logs tar on path %s", logs_path) logging.info("No logs tar on path %s", logs_path)
else: else:
paths.append(logs_path) paths.append(logs_path)
stdout_path = '{}/stdout.log'.format(result_folder) stdout_path = "{}/stdout.log".format(result_folder)
if not os.path.exists(stdout_path): if not os.path.exists(stdout_path):
logging.info("No stdout log on path %s", stdout_path) logging.info("No stdout log on path %s", stdout_path)
else: else:
paths.append(stdout_path) paths.append(stdout_path)
stderr_path = '{}/stderr.log'.format(result_folder) stderr_path = "{}/stderr.log".format(result_folder)
if not os.path.exists(stderr_path): if not os.path.exists(stderr_path):
logging.info("No stderr log on path %s", stderr_path) logging.info("No stderr log on path %s", stderr_path)
else: else:
@ -52,20 +59,22 @@ def process_result(result_folder):
def write_results(results_file, status_file, results, status): def write_results(results_file, status_file, results, status):
with open(results_file, 'w') as f: with open(results_file, "w") as f:
out = csv.writer(f, delimiter='\t') out = csv.writer(f, delimiter="\t")
out.writerows(results) out.writerows(results)
with open(status_file, 'w') as f: with open(status_file, "w") as f:
out = csv.writer(f, delimiter='\t') out = csv.writer(f, delimiter="\t")
out.writerow(status) out.writerow(status)
if __name__ == "__main__": if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of sqlancer test") parser = argparse.ArgumentParser(
parser.add_argument("--in-results-dir", default='/test_output/') description="ClickHouse script for parsing results of sqlancer test"
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv') )
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv') parser.add_argument("--in-results-dir", default="/test_output/")
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
args = parser.parse_args() args = parser.parse_args()
state, description, test_results, logs = process_result(args.in_results_dir) state, description, test_results, logs = process_result(args.in_results_dir)

View File

@ -131,9 +131,6 @@ function start()
# use root to match with current uid # use root to match with current uid
clickhouse start --user root >/var/log/clickhouse-server/stdout.log 2>>/var/log/clickhouse-server/stderr.log clickhouse start --user root >/var/log/clickhouse-server/stdout.log 2>>/var/log/clickhouse-server/stderr.log
sleep 0.5 sleep 0.5
cat /var/log/clickhouse-server/stdout.log
tail -n200 /var/log/clickhouse-server/stderr.log
tail -n200 /var/log/clickhouse-server/clickhouse-server.log
counter=$((counter + 1)) counter=$((counter + 1))
done done
@ -211,14 +208,12 @@ stop
start start
clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/test_results.tsv \ clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/test_results.tsv \
|| echo -e 'Server failed to start\tFAIL' >> /test_output/test_results.tsv || (echo -e 'Server failed to start (see application_errors.txt)\tFAIL' >> /test_output/test_results.tsv \
&& grep -Fa "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt)
[ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL" [ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL"
[ -f /var/log/clickhouse-server/stderr.log ] || echo -e "Stderr log does not exist\tFAIL" [ -f /var/log/clickhouse-server/stderr.log ] || echo -e "Stderr log does not exist\tFAIL"
# Print Fatal log messages to stdout
zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log*
# Grep logs for sanitizer asserts, crashes and other critical errors # Grep logs for sanitizer asserts, crashes and other critical errors
# Sanitizer asserts # Sanitizer asserts
@ -235,20 +230,26 @@ zgrep -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/
|| echo -e 'No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv || echo -e 'No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
# Logical errors # Logical errors
zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \ zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log* > /test_output/logical_errors.txt \
&& echo -e 'Logical error thrown (see clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \ && echo -e 'Logical error thrown (see clickhouse-server.log or logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'No logical errors\tOK' >> /test_output/test_results.tsv || echo -e 'No logical errors\tOK' >> /test_output/test_results.tsv
# Remove file logical_errors.txt if it's empty
[ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt
# Crash # Crash
zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \ zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \
&& echo -e 'Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \ && echo -e 'Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Not crashed\tOK' >> /test_output/test_results.tsv || echo -e 'Not crashed\tOK' >> /test_output/test_results.tsv
# It also checks for crash without stacktrace (printed by watchdog) # It also checks for crash without stacktrace (printed by watchdog)
zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \ zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log* > /test_output/fatal_messages.txt \
&& echo -e 'Fatal message in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \ && echo -e 'Fatal message in clickhouse-server.log (see fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv || echo -e 'No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
# Remove file fatal_messages.txt if it's empty
[ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt
zgrep -Fa "########################################" /test_output/* > /dev/null \ zgrep -Fa "########################################" /test_output/* > /dev/null \
&& echo -e 'Killed by signal (output files)\tFAIL' >> /test_output/test_results.tsv && echo -e 'Killed by signal (output files)\tFAIL' >> /test_output/test_results.tsv
@ -259,12 +260,12 @@ echo -e "Backward compatibility check\n"
echo "Download previous release server" echo "Download previous release server"
mkdir previous_release_package_folder mkdir previous_release_package_folder
clickhouse-client --query="SELECT version()" | ./download_previous_release && echo -e 'Download script exit code\tOK' >> /test_output/backward_compatibility_check_results.tsv \ clickhouse-client --query="SELECT version()" | ./download_previous_release && echo -e 'Download script exit code\tOK' >> /test_output/test_results.tsv \
|| echo -e 'Download script failed\tFAIL' >> /test_output/backward_compatibility_check_results.tsv || echo -e 'Download script failed\tFAIL' >> /test_output/test_results.tsv
if [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ] if [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ]
then then
echo -e "Successfully downloaded previous release packets\tOK" >> /test_output/backward_compatibility_check_results.tsv echo -e "Successfully downloaded previous release packets\tOK" >> /test_output/test_results.tsv
stop stop
# Uninstall current packages # Uninstall current packages
@ -290,8 +291,8 @@ then
mkdir tmp_stress_output mkdir tmp_stress_output
./stress --backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \ ./stress --backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \
&& echo -e 'Test script exit code\tOK' >> /test_output/backward_compatibility_check_results.tsv \ && echo -e 'Backward compatibility check: Test script exit code\tOK' >> /test_output/test_results.tsv \
|| echo -e 'Test script failed\tFAIL' >> /test_output/backward_compatibility_check_results.tsv || echo -e 'Backward compatibility check: Test script failed\tFAIL' >> /test_output/test_results.tsv
rm -rf tmp_stress_output rm -rf tmp_stress_output
clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables" clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables"
@ -301,8 +302,9 @@ then
# Start new server # Start new server
configure configure
start 500 start 500
clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/backward_compatibility_check_results.tsv \ clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK'" >> /test_output/test_results.tsv \
|| echo -e 'Server failed to start\tFAIL' >> /test_output/backward_compatibility_check_results.tsv || (echo -e 'Backward compatibility check: Server failed to start\tFAIL' >> /test_output/test_results.tsv \
&& grep -Fa "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/bc_check_application_errors.txt)
clickhouse-client --query="SELECT 'Server version: ', version()" clickhouse-client --query="SELECT 'Server version: ', version()"
@ -312,10 +314,12 @@ then
stop stop
# Error messages (we should ignore some errors) # Error messages (we should ignore some errors)
echo "Check for Error messages in server log:"
zgrep -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \ zgrep -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \
-e "Code: 236. DB::Exception: Cancelled mutating parts" \ -e "Code: 236. DB::Exception: Cancelled mutating parts" \
-e "REPLICA_IS_ALREADY_ACTIVE" \ -e "REPLICA_IS_ALREADY_ACTIVE" \
-e "REPLICA_IS_ALREADY_EXIST" \ -e "REPLICA_IS_ALREADY_EXIST" \
-e "ALL_REPLICAS_LOST" \
-e "DDLWorker: Cannot parse DDL task query" \ -e "DDLWorker: Cannot parse DDL task query" \
-e "RaftInstance: failed to accept a rpc connection due to error 125" \ -e "RaftInstance: failed to accept a rpc connection due to error 125" \
-e "UNKNOWN_DATABASE" \ -e "UNKNOWN_DATABASE" \
@ -328,47 +332,53 @@ then
-e "Code: 1000, e.code() = 111, Connection refused" \ -e "Code: 1000, e.code() = 111, Connection refused" \
-e "UNFINISHED" \ -e "UNFINISHED" \
-e "Renaming unexpected part" \ -e "Renaming unexpected part" \
/var/log/clickhouse-server/clickhouse-server.log | zgrep -Fa "<Error>" > /dev/null \ /var/log/clickhouse-server/clickhouse-server.log | zgrep -Fa "<Error>" > /test_output/bc_check_error_messages.txt \
&& echo -e 'Error message in clickhouse-server.log\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \ && echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'No Error messages in clickhouse-server.log\tOK' >> /test_output/backward_compatibility_check_results.tsv || echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
# Remove file bc_check_error_messages.txt if it's empty
[ -s /test_output/bc_check_error_messages.txt ] || rm /test_output/bc_check_error_messages.txt
# Sanitizer asserts # Sanitizer asserts
zgrep -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp zgrep -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
zgrep -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp zgrep -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
zgrep -Fav "ASan doesn't fully support makecontext/swapcontext functions" /test_output/tmp > /dev/null \ zgrep -Fav "ASan doesn't fully support makecontext/swapcontext functions" /test_output/tmp > /dev/null \
&& echo -e 'Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \ && echo -e 'Backward compatibility check: Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'No sanitizer asserts\tOK' >> /test_output/backward_compatibility_check_results.tsv || echo -e 'Backward compatibility check: No sanitizer asserts\tOK' >> /test_output/test_results.tsv
rm -f /test_output/tmp rm -f /test_output/tmp
# OOM # OOM
zgrep -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \ zgrep -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
&& echo -e 'OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \ && echo -e 'Backward compatibility check: OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'No OOM messages in clickhouse-server.log\tOK' >> /test_output/backward_compatibility_check_results.tsv || echo -e 'Backward compatibility check: No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
# Logical errors # Logical errors
zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \ echo "Check for Logical errors in server log:"
&& echo -e 'Logical error thrown (see clickhouse-server.log)\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \ zgrep -Fa -A20 "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log > /test_output/bc_check_logical_errors.txt \
|| echo -e 'No logical errors\tOK' >> /test_output/backward_compatibility_check_results.tsv && echo -e 'Backward compatibility check: Logical error thrown (see clickhouse-server.log or bc_check_logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Backward compatibility check: No logical errors\tOK' >> /test_output/test_results.tsv
# Remove file bc_check_logical_errors.txt if it's empty
[ -s /test_output/bc_check_logical_errors.txt ] || rm /test_output/bc_check_logical_errors.txt
# Crash # Crash
zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \ zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
&& echo -e 'Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \ && echo -e 'Backward compatibility check: Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Not crashed\tOK' >> /test_output/backward_compatibility_check_results.tsv || echo -e 'Backward compatibility check: Not crashed\tOK' >> /test_output/test_results.tsv
# It also checks for crash without stacktrace (printed by watchdog) # It also checks for crash without stacktrace (printed by watchdog)
zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log > /dev/null \ echo "Check for Fatal message in server log:"
&& echo -e 'Fatal message in clickhouse-server.log\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \ zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log > /test_output/bc_check_fatal_messages.txt \
|| echo -e 'No fatal messages in clickhouse-server.log\tOK' >> /test_output/backward_compatibility_check_results.tsv && echo -e 'Backward compatibility check: Fatal message in clickhouse-server.log (see bc_check_fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Backward compatibility check: No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
# Remove file bc_check_fatal_messages.txt if it's empty
[ -s /test_output/bc_check_fatal_messages.txt ] || rm /test_output/bc_check_fatal_messages.txt
else else
echo -e "Failed to download previous release packets\tFAIL" >> /test_output/backward_compatibility_check_results.tsv echo -e "Backward compatibility check: Failed to download previous release packets\tFAIL" >> /test_output/test_results.tsv
fi fi
zgrep -Fa "FAIL" /test_output/backward_compatibility_check_results.tsv > /dev/null \
&& echo -e 'Backward compatibility check\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Backward compatibility check\tOK' >> /test_output/test_results.tsv
# Put logs into /test_output/ # Put logs into /test_output/
for log_file in /var/log/clickhouse-server/clickhouse-server.log* for log_file in /var/log/clickhouse-server/clickhouse-server.log*
do do

View File

@ -16,7 +16,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
python3-pip \ python3-pip \
shellcheck \ shellcheck \
yamllint \ yamllint \
&& pip3 install codespell PyGithub boto3 unidiff dohq-artifactory && pip3 install black boto3 codespell dohq-artifactory PyGithub unidiff
# Architecture of the image when BuildKit/buildx is used # Architecture of the image when BuildKit/buildx is used
ARG TARGETARCH ARG TARGETARCH

View File

@ -14,6 +14,7 @@ def process_result(result_folder):
("header duplicates", "duplicate_output.txt"), ("header duplicates", "duplicate_output.txt"),
("shellcheck", "shellcheck_output.txt"), ("shellcheck", "shellcheck_output.txt"),
("style", "style_output.txt"), ("style", "style_output.txt"),
("black", "black_output.txt"),
("typos", "typos_output.txt"), ("typos", "typos_output.txt"),
("whitespaces", "whitespaces_output.txt"), ("whitespaces", "whitespaces_output.txt"),
("workflows", "workflows_output.txt"), ("workflows", "workflows_output.txt"),

View File

@ -7,6 +7,8 @@ echo "Check duplicates" | ts
./check-duplicate-includes.sh |& tee /test_output/duplicate_output.txt ./check-duplicate-includes.sh |& tee /test_output/duplicate_output.txt
echo "Check style" | ts echo "Check style" | ts
./check-style -n |& tee /test_output/style_output.txt ./check-style -n |& tee /test_output/style_output.txt
echo "Check python formatting with black" | ts
./check-black -n |& tee /test_output/black_output.txt
echo "Check typos" | ts echo "Check typos" | ts
./check-typos |& tee /test_output/typos_output.txt ./check-typos |& tee /test_output/typos_output.txt
echo "Check whitespaces" | ts echo "Check whitespaces" | ts

View File

@ -22,9 +22,9 @@ def process_result(result_folder):
total_other = 0 total_other = 0
test_results = [] test_results = []
for test in results["tests"]: for test in results["tests"]:
test_name = test['test']['test_name'] test_name = test["test"]["test_name"]
test_result = test['result']['result_type'].upper() test_result = test["result"]["result_type"].upper()
test_time = str(test['result']['message_rtime']) test_time = str(test["result"]["message_rtime"])
total_tests += 1 total_tests += 1
if test_result == "OK": if test_result == "OK":
total_ok += 1 total_ok += 1
@ -39,24 +39,29 @@ def process_result(result_folder):
else: else:
status = "success" status = "success"
description = "failed: {}, passed: {}, other: {}".format(total_fail, total_ok, total_other) description = "failed: {}, passed: {}, other: {}".format(
total_fail, total_ok, total_other
)
return status, description, test_results, [json_path, test_binary_log] return status, description, test_results, [json_path, test_binary_log]
def write_results(results_file, status_file, results, status): def write_results(results_file, status_file, results, status):
with open(results_file, 'w') as f: with open(results_file, "w") as f:
out = csv.writer(f, delimiter='\t') out = csv.writer(f, delimiter="\t")
out.writerows(results) out.writerows(results)
with open(status_file, 'w') as f: with open(status_file, "w") as f:
out = csv.writer(f, delimiter='\t') out = csv.writer(f, delimiter="\t")
out.writerow(status) out.writerow(status)
if __name__ == "__main__": if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of Testflows tests") parser = argparse.ArgumentParser(
parser.add_argument("--in-results-dir", default='./') description="ClickHouse script for parsing results of Testflows tests"
parser.add_argument("--out-results-file", default='./test_results.tsv') )
parser.add_argument("--out-status-file", default='./check_status.tsv') parser.add_argument("--in-results-dir", default="./")
parser.add_argument("--out-results-file", default="./test_results.tsv")
parser.add_argument("--out-status-file", default="./check_status.tsv")
args = parser.parse_args() args = parser.parse_args()
state, description, test_results, logs = process_result(args.in_results_dir) state, description, test_results, logs = process_result(args.in_results_dir)
@ -64,4 +69,3 @@ if __name__ == "__main__":
status = (state, description) status = (state, description)
write_results(args.out_results_file, args.out_status_file, test_results, status) write_results(args.out_results_file, args.out_status_file, test_results, status)
logging.info("Result written") logging.info("Result written")

View File

@ -5,24 +5,26 @@ import logging
import argparse import argparse
import csv import csv
OK_SIGN = 'OK ]' OK_SIGN = "OK ]"
FAILED_SIGN = 'FAILED ]' FAILED_SIGN = "FAILED ]"
SEGFAULT = 'Segmentation fault' SEGFAULT = "Segmentation fault"
SIGNAL = 'received signal SIG' SIGNAL = "received signal SIG"
PASSED = 'PASSED' PASSED = "PASSED"
def get_test_name(line): def get_test_name(line):
elements = reversed(line.split(' ')) elements = reversed(line.split(" "))
for element in elements: for element in elements:
if '(' not in element and ')' not in element: if "(" not in element and ")" not in element:
return element return element
raise Exception("No test name in line '{}'".format(line)) raise Exception("No test name in line '{}'".format(line))
def process_result(result_folder): def process_result(result_folder):
summary = [] summary = []
total_counter = 0 total_counter = 0
failed_counter = 0 failed_counter = 0
result_log_path = '{}/test_result.txt'.format(result_folder) result_log_path = "{}/test_result.txt".format(result_folder)
if not os.path.exists(result_log_path): if not os.path.exists(result_log_path):
logging.info("No output log on path %s", result_log_path) logging.info("No output log on path %s", result_log_path)
return "exception", "No output log", [] return "exception", "No output log", []
@ -30,7 +32,7 @@ def process_result(result_folder):
status = "success" status = "success"
description = "" description = ""
passed = False passed = False
with open(result_log_path, 'r') as test_result: with open(result_log_path, "r") as test_result:
for line in test_result: for line in test_result:
if OK_SIGN in line: if OK_SIGN in line:
logging.info("Found ok line: '%s'", line) logging.info("Found ok line: '%s'", line)
@ -38,7 +40,7 @@ def process_result(result_folder):
logging.info("Test name: '%s'", test_name) logging.info("Test name: '%s'", test_name)
summary.append((test_name, "OK")) summary.append((test_name, "OK"))
total_counter += 1 total_counter += 1
elif FAILED_SIGN in line and 'listed below' not in line and 'ms)' in line: elif FAILED_SIGN in line and "listed below" not in line and "ms)" in line:
logging.info("Found fail line: '%s'", line) logging.info("Found fail line: '%s'", line)
test_name = get_test_name(line.strip()) test_name = get_test_name(line.strip())
logging.info("Test name: '%s'", test_name) logging.info("Test name: '%s'", test_name)
@ -67,25 +69,30 @@ def process_result(result_folder):
status = "failure" status = "failure"
if not description: if not description:
description += "fail: {}, passed: {}".format(failed_counter, total_counter - failed_counter) description += "fail: {}, passed: {}".format(
failed_counter, total_counter - failed_counter
)
return status, description, summary return status, description, summary
def write_results(results_file, status_file, results, status): def write_results(results_file, status_file, results, status):
with open(results_file, 'w') as f: with open(results_file, "w") as f:
out = csv.writer(f, delimiter='\t') out = csv.writer(f, delimiter="\t")
out.writerows(results) out.writerows(results)
with open(status_file, 'w') as f: with open(status_file, "w") as f:
out = csv.writer(f, delimiter='\t') out = csv.writer(f, delimiter="\t")
out.writerow(status) out.writerow(status)
if __name__ == "__main__": if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of unit tests") parser = argparse.ArgumentParser(
parser.add_argument("--in-results-dir", default='/test_output/') description="ClickHouse script for parsing results of unit tests"
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv') )
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv') parser.add_argument("--in-results-dir", default="/test_output/")
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
args = parser.parse_args() args = parser.parse_args()
state, description, test_results = process_result(args.in_results_dir) state, description, test_results = process_result(args.in_results_dir)
@ -93,4 +100,3 @@ if __name__ == "__main__":
status = (state, description) status = (state, description)
write_results(args.out_results_file, args.out_status_file, test_results, status) write_results(args.out_results_file, args.out_status_file, test_results, status)
logging.info("Result written") logging.info("Result written")

View File

@ -16,6 +16,7 @@ NO_TASK_TIMEOUT_SIGNS = ["All tests have finished", "No tests were run"]
RETRIES_SIGN = "Some tests were restarted" RETRIES_SIGN = "Some tests were restarted"
def process_test_log(log_path): def process_test_log(log_path):
total = 0 total = 0
skipped = 0 skipped = 0
@ -26,7 +27,7 @@ def process_test_log(log_path):
retries = False retries = False
task_timeout = True task_timeout = True
test_results = [] test_results = []
with open(log_path, 'r') as test_file: with open(log_path, "r") as test_file:
for line in test_file: for line in test_file:
original_line = line original_line = line
line = line.strip() line = line.strip()
@ -36,12 +37,15 @@ def process_test_log(log_path):
hung = True hung = True
if RETRIES_SIGN in line: if RETRIES_SIGN in line:
retries = True retries = True
if any(sign in line for sign in (OK_SIGN, FAIL_SIGN, UNKNOWN_SIGN, SKIPPED_SIGN)): if any(
test_name = line.split(' ')[2].split(':')[0] sign in line
for sign in (OK_SIGN, FAIL_SIGN, UNKNOWN_SIGN, SKIPPED_SIGN)
):
test_name = line.split(" ")[2].split(":")[0]
test_time = '' test_time = ""
try: try:
time_token = line.split(']')[1].strip().split()[0] time_token = line.split("]")[1].strip().split()[0]
float(time_token) float(time_token)
test_time = time_token test_time = time_token
except: except:
@ -66,9 +70,22 @@ def process_test_log(log_path):
elif len(test_results) > 0 and test_results[-1][1] == "FAIL": elif len(test_results) > 0 and test_results[-1][1] == "FAIL":
test_results[-1][3].append(original_line) test_results[-1][3].append(original_line)
test_results = [(test[0], test[1], test[2], ''.join(test[3])) for test in test_results] test_results = [
(test[0], test[1], test[2], "".join(test[3])) for test in test_results
]
return (
total,
skipped,
unknown,
failed,
success,
hung,
task_timeout,
retries,
test_results,
)
return total, skipped, unknown, failed, success, hung, task_timeout, retries, test_results
def process_result(result_path): def process_result(result_path):
test_results = [] test_results = []
@ -76,16 +93,26 @@ def process_result(result_path):
description = "" description = ""
files = os.listdir(result_path) files = os.listdir(result_path)
if files: if files:
logging.info("Find files in result folder %s", ','.join(files)) logging.info("Find files in result folder %s", ",".join(files))
result_path = os.path.join(result_path, 'test_result.txt') result_path = os.path.join(result_path, "test_result.txt")
else: else:
result_path = None result_path = None
description = "No output log" description = "No output log"
state = "error" state = "error"
if result_path and os.path.exists(result_path): if result_path and os.path.exists(result_path):
total, skipped, unknown, failed, success, hung, task_timeout, retries, test_results = process_test_log(result_path) (
is_flacky_check = 1 < int(os.environ.get('NUM_TRIES', 1)) total,
skipped,
unknown,
failed,
success,
hung,
task_timeout,
retries,
test_results,
) = process_test_log(result_path)
is_flacky_check = 1 < int(os.environ.get("NUM_TRIES", 1))
logging.info("Is flacky check: %s", is_flacky_check) logging.info("Is flacky check: %s", is_flacky_check)
# If no tests were run (success == 0) it indicates an error (e.g. server did not start or crashed immediately) # If no tests were run (success == 0) it indicates an error (e.g. server did not start or crashed immediately)
# But it's Ok for "flaky checks" - they can contain just one test for check which is marked as skipped. # But it's Ok for "flaky checks" - they can contain just one test for check which is marked as skipped.
@ -120,20 +147,22 @@ def process_result(result_path):
def write_results(results_file, status_file, results, status): def write_results(results_file, status_file, results, status):
with open(results_file, 'w') as f: with open(results_file, "w") as f:
out = csv.writer(f, delimiter='\t') out = csv.writer(f, delimiter="\t")
out.writerows(results) out.writerows(results)
with open(status_file, 'w') as f: with open(status_file, "w") as f:
out = csv.writer(f, delimiter='\t') out = csv.writer(f, delimiter="\t")
out.writerow(status) out.writerow(status)
if __name__ == "__main__": if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of functional tests") parser = argparse.ArgumentParser(
parser.add_argument("--in-results-dir", default='/test_output/') description="ClickHouse script for parsing results of functional tests"
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv') )
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv') parser.add_argument("--in-results-dir", default="/test_output/")
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
args = parser.parse_args() args = parser.parse_args()
state, description, test_results = process_result(args.in_results_dir) state, description, test_results = process_result(args.in_results_dir)

View File

@ -71,6 +71,8 @@ This check means that the CI system started to process the pull request. When it
Performs some simple regex-based checks of code style, using the [`utils/check-style/check-style`](https://github.com/ClickHouse/ClickHouse/blob/master/utils/check-style/check-style) binary (note that it can be run locally). Performs some simple regex-based checks of code style, using the [`utils/check-style/check-style`](https://github.com/ClickHouse/ClickHouse/blob/master/utils/check-style/check-style) binary (note that it can be run locally).
If it fails, fix the style errors following the [code style guide](style.md). If it fails, fix the style errors following the [code style guide](style.md).
Python code is checked with [black](https://github.com/psf/black/).
### Report Details ### Report Details
- [Status page example](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check.html) - [Status page example](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check.html)
- `output.txt` contains the check resulting errors (invalid tabulation etc), blank page means no errors. [Successful result example](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check/output.txt). - `output.txt` contains the check resulting errors (invalid tabulation etc), blank page means no errors. [Successful result example](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check/output.txt).

View File

@ -137,7 +137,7 @@ CREATE TABLE test.test_orc
`f_array_array_float` Array(Array(Float32)), `f_array_array_float` Array(Array(Float32)),
`day` String `day` String
) )
ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc') ENGINE = Hive('thrift://localhost:9083', 'test', 'test_orc')
PARTITION BY day PARTITION BY day
``` ```

View File

@ -195,5 +195,6 @@ toc_title: Adopters
| <a href="https://shop.okraina.ru/" class="favicon">ООО «МПЗ Богородский»</a> | Agriculture | — | — | — | [Article in Russian, November 2020](https://cloud.yandex.ru/cases/okraina) | | <a href="https://shop.okraina.ru/" class="favicon">ООО «МПЗ Богородский»</a> | Agriculture | — | — | — | [Article in Russian, November 2020](https://cloud.yandex.ru/cases/okraina) |
| <a href="https://domclick.ru/" class="favicon">ДомКлик</a> | Real Estate | — | — | — | [Article in Russian, October 2021](https://habr.com/ru/company/domclick/blog/585936/) | | <a href="https://domclick.ru/" class="favicon">ДомКлик</a> | Real Estate | — | — | — | [Article in Russian, October 2021](https://habr.com/ru/company/domclick/blog/585936/) |
| <a href="https://magenta-technology.ru/sistema-upravleniya-marshrutami-inkassacii-as-strela/" class="favicon">АС "Стрела"</a> | Transportation | — | — | — | [Job posting, Jan 2022](https://vk.com/topic-111905078_35689124?post=3553) | | <a href="https://magenta-technology.ru/sistema-upravleniya-marshrutami-inkassacii-as-strela/" class="favicon">АС "Стрела"</a> | Transportation | — | — | — | [Job posting, Jan 2022](https://vk.com/topic-111905078_35689124?post=3553) |
| <a href="https://piwik.pro/" class="favicon">Piwik PRO</a> | Web Analytics | — | — | — | [Official website, Dec 2018](https://piwik.pro/blog/piwik-pro-clickhouse-faster-efficient-reports/) |
[Original article](https://clickhouse.com/docs/en/introduction/adopters/) <!--hide--> [Original article](https://clickhouse.com/docs/en/introduction/adopters/) <!--hide-->

View File

@ -5,7 +5,7 @@ toc_title: Caches
# Cache Types {#cache-types} # Cache Types {#cache-types}
When performing queries, ClichHouse uses different caches. When performing queries, ClickHouse uses different caches.
Main cache types: Main cache types:

View File

@ -15,24 +15,24 @@ import website
def prepare_amp_html(lang, args, root, site_temp, main_site_dir): def prepare_amp_html(lang, args, root, site_temp, main_site_dir):
src_path = root src_path = root
src_index = os.path.join(src_path, 'index.html') src_index = os.path.join(src_path, "index.html")
rel_path = os.path.relpath(src_path, site_temp) rel_path = os.path.relpath(src_path, site_temp)
dst_path = os.path.join(main_site_dir, rel_path, 'amp') dst_path = os.path.join(main_site_dir, rel_path, "amp")
dst_index = os.path.join(dst_path, 'index.html') dst_index = os.path.join(dst_path, "index.html")
logging.debug(f'Generating AMP version for {rel_path} ({lang})') logging.debug(f"Generating AMP version for {rel_path} ({lang})")
os.makedirs(dst_path) os.makedirs(dst_path)
with open(src_index, 'r') as f: with open(src_index, "r") as f:
content = f.read() content = f.read()
css_in = ' '.join(website.get_css_in(args)) css_in = " ".join(website.get_css_in(args))
command = f"purifycss --min {css_in} '{src_index}'" command = f"purifycss --min {css_in} '{src_index}'"
logging.debug(command) logging.debug(command)
inline_css = subprocess.check_output(command, shell=True).decode('utf-8') inline_css = subprocess.check_output(command, shell=True).decode("utf-8")
inline_css = inline_css.replace('!important', '').replace('/*!', '/*') inline_css = inline_css.replace("!important", "").replace("/*!", "/*")
inline_css = cssmin.cssmin(inline_css) inline_css = cssmin.cssmin(inline_css)
content = content.replace('CUSTOM_CSS_PLACEHOLDER', inline_css) content = content.replace("CUSTOM_CSS_PLACEHOLDER", inline_css)
with open(dst_index, 'w') as f: with open(dst_index, "w") as f:
f.write(content) f.write(content)
return dst_index return dst_index
@ -40,15 +40,12 @@ def prepare_amp_html(lang, args, root, site_temp, main_site_dir):
def build_amp(lang, args, cfg): def build_amp(lang, args, cfg):
# AMP docs: https://amp.dev/documentation/ # AMP docs: https://amp.dev/documentation/
logging.info(f'Building AMP version for {lang}') logging.info(f"Building AMP version for {lang}")
with util.temp_dir() as site_temp: with util.temp_dir() as site_temp:
extra = cfg.data['extra'] extra = cfg.data["extra"]
main_site_dir = cfg.data['site_dir'] main_site_dir = cfg.data["site_dir"]
extra['is_amp'] = True extra["is_amp"] = True
cfg.load_dict({ cfg.load_dict({"site_dir": site_temp, "extra": extra})
'site_dir': site_temp,
'extra': extra
})
try: try:
mkdocs.commands.build.build(cfg) mkdocs.commands.build.build(cfg)
@ -60,50 +57,49 @@ def build_amp(lang, args, cfg):
paths = [] paths = []
for root, _, filenames in os.walk(site_temp): for root, _, filenames in os.walk(site_temp):
if 'index.html' in filenames: if "index.html" in filenames:
paths.append(prepare_amp_html(lang, args, root, site_temp, main_site_dir)) paths.append(
logging.info(f'Finished building AMP version for {lang}') prepare_amp_html(lang, args, root, site_temp, main_site_dir)
)
logging.info(f"Finished building AMP version for {lang}")
def html_to_amp(content): def html_to_amp(content):
soup = bs4.BeautifulSoup( soup = bs4.BeautifulSoup(content, features="html.parser")
content,
features='html.parser'
)
for tag in soup.find_all(): for tag in soup.find_all():
if tag.attrs.get('id') == 'tostring': if tag.attrs.get("id") == "tostring":
tag.attrs['id'] = '_tostring' tag.attrs["id"] = "_tostring"
if tag.name == 'img': if tag.name == "img":
tag.name = 'amp-img' tag.name = "amp-img"
tag.attrs['layout'] = 'responsive' tag.attrs["layout"] = "responsive"
src = tag.attrs['src'] src = tag.attrs["src"]
if not (src.startswith('/') or src.startswith('http')): if not (src.startswith("/") or src.startswith("http")):
tag.attrs['src'] = f'../{src}' tag.attrs["src"] = f"../{src}"
if not tag.attrs.get('width'): if not tag.attrs.get("width"):
tag.attrs['width'] = '640' tag.attrs["width"] = "640"
if not tag.attrs.get('height'): if not tag.attrs.get("height"):
tag.attrs['height'] = '320' tag.attrs["height"] = "320"
if tag.name == 'iframe': if tag.name == "iframe":
tag.name = 'amp-iframe' tag.name = "amp-iframe"
tag.attrs['layout'] = 'responsive' tag.attrs["layout"] = "responsive"
del tag.attrs['alt'] del tag.attrs["alt"]
del tag.attrs['allowfullscreen'] del tag.attrs["allowfullscreen"]
if not tag.attrs.get('width'): if not tag.attrs.get("width"):
tag.attrs['width'] = '640' tag.attrs["width"] = "640"
if not tag.attrs.get('height'): if not tag.attrs.get("height"):
tag.attrs['height'] = '320' tag.attrs["height"] = "320"
elif tag.name == 'a': elif tag.name == "a":
href = tag.attrs.get('href') href = tag.attrs.get("href")
if href: if href:
if not (href.startswith('/') or href.startswith('http')): if not (href.startswith("/") or href.startswith("http")):
if '#' in href: if "#" in href:
href, anchor = href.split('#') href, anchor = href.split("#")
else: else:
anchor = None anchor = None
href = f'../{href}amp/' href = f"../{href}amp/"
if anchor: if anchor:
href = f'{href}#{anchor}' href = f"{href}#{anchor}"
tag.attrs['href'] = href tag.attrs["href"] = href
content = str(soup) content = str(soup)
return website.minify_html(content) return website.minify_html(content)

View File

@ -17,54 +17,52 @@ import util
def build_for_lang(lang, args): def build_for_lang(lang, args):
logging.info(f'Building {lang} blog') logging.info(f"Building {lang} blog")
try: try:
theme_cfg = { theme_cfg = {
'name': None, "name": None,
'custom_dir': os.path.join(os.path.dirname(__file__), '..', args.theme_dir), "custom_dir": os.path.join(os.path.dirname(__file__), "..", args.theme_dir),
'language': lang, "language": lang,
'direction': 'ltr', "direction": "ltr",
'static_templates': ['404.html'], "static_templates": ["404.html"],
'extra': { "extra": {
'now': int(time.mktime(datetime.datetime.now().timetuple())) # TODO better way to avoid caching "now": int(
} time.mktime(datetime.datetime.now().timetuple())
) # TODO better way to avoid caching
},
} }
# the following list of languages is sorted according to # the following list of languages is sorted according to
# https://en.wikipedia.org/wiki/List_of_languages_by_total_number_of_speakers # https://en.wikipedia.org/wiki/List_of_languages_by_total_number_of_speakers
languages = { languages = {"en": "English"}
'en': 'English'
}
site_names = { site_names = {"en": "ClickHouse Blog"}
'en': 'ClickHouse Blog'
}
assert len(site_names) == len(languages) assert len(site_names) == len(languages)
site_dir = os.path.join(args.blog_output_dir, lang) site_dir = os.path.join(args.blog_output_dir, lang)
plugins = ['macros'] plugins = ["macros"]
if args.htmlproofer: if args.htmlproofer:
plugins.append('htmlproofer') plugins.append("htmlproofer")
website_url = 'https://clickhouse.com' website_url = "https://clickhouse.com"
site_name = site_names.get(lang, site_names['en']) site_name = site_names.get(lang, site_names["en"])
blog_nav, post_meta = nav.build_blog_nav(lang, args) blog_nav, post_meta = nav.build_blog_nav(lang, args)
raw_config = dict( raw_config = dict(
site_name=site_name, site_name=site_name,
site_url=f'{website_url}/blog/{lang}/', site_url=f"{website_url}/blog/{lang}/",
docs_dir=os.path.join(args.blog_dir, lang), docs_dir=os.path.join(args.blog_dir, lang),
site_dir=site_dir, site_dir=site_dir,
strict=True, strict=True,
theme=theme_cfg, theme=theme_cfg,
nav=blog_nav, nav=blog_nav,
copyright='©20162022 ClickHouse, Inc.', copyright="©20162022 ClickHouse, Inc.",
use_directory_urls=True, use_directory_urls=True,
repo_name='ClickHouse/ClickHouse', repo_name="ClickHouse/ClickHouse",
repo_url='https://github.com/ClickHouse/ClickHouse/', repo_url="https://github.com/ClickHouse/ClickHouse/",
edit_uri=f'edit/master/website/blog/{lang}', edit_uri=f"edit/master/website/blog/{lang}",
markdown_extensions=mdx_clickhouse.MARKDOWN_EXTENSIONS, markdown_extensions=mdx_clickhouse.MARKDOWN_EXTENSIONS,
plugins=plugins, plugins=plugins,
extra=dict( extra=dict(
@ -75,12 +73,12 @@ def build_for_lang(lang, args):
website_url=website_url, website_url=website_url,
events=args.events, events=args.events,
languages=languages, languages=languages,
includes_dir=os.path.join(os.path.dirname(__file__), '..', '_includes'), includes_dir=os.path.join(os.path.dirname(__file__), "..", "_includes"),
is_amp=False, is_amp=False,
is_blog=True, is_blog=True,
post_meta=post_meta, post_meta=post_meta,
today=datetime.date.today().isoformat() today=datetime.date.today().isoformat(),
) ),
) )
cfg = config.load_config(**raw_config) cfg = config.load_config(**raw_config)
@ -89,21 +87,28 @@ def build_for_lang(lang, args):
redirects.build_blog_redirects(args) redirects.build_blog_redirects(args)
env = util.init_jinja2_env(args) env = util.init_jinja2_env(args)
with open(os.path.join(args.website_dir, 'templates', 'blog', 'rss.xml'), 'rb') as f: with open(
rss_template_string = f.read().decode('utf-8').strip() os.path.join(args.website_dir, "templates", "blog", "rss.xml"), "rb"
) as f:
rss_template_string = f.read().decode("utf-8").strip()
rss_template = env.from_string(rss_template_string) rss_template = env.from_string(rss_template_string)
with open(os.path.join(args.blog_output_dir, lang, 'rss.xml'), 'w') as f: with open(os.path.join(args.blog_output_dir, lang, "rss.xml"), "w") as f:
f.write(rss_template.render({'config': raw_config})) f.write(rss_template.render({"config": raw_config}))
logging.info(f'Finished building {lang} blog') logging.info(f"Finished building {lang} blog")
except exceptions.ConfigurationError as e: except exceptions.ConfigurationError as e:
raise SystemExit('\n' + str(e)) raise SystemExit("\n" + str(e))
def build_blog(args): def build_blog(args):
tasks = [] tasks = []
for lang in args.blog_lang.split(','): for lang in args.blog_lang.split(","):
if lang: if lang:
tasks.append((lang, args,)) tasks.append(
(
lang,
args,
)
)
util.run_function_in_parallel(build_for_lang, tasks, threads=False) util.run_function_in_parallel(build_for_lang, tasks, threads=False)

View File

@ -30,76 +30,76 @@ import website
from cmake_in_clickhouse_generator import generate_cmake_flags_files from cmake_in_clickhouse_generator import generate_cmake_flags_files
class ClickHouseMarkdown(markdown.extensions.Extension): class ClickHouseMarkdown(markdown.extensions.Extension):
class ClickHousePreprocessor(markdown.util.Processor): class ClickHousePreprocessor(markdown.util.Processor):
def run(self, lines): def run(self, lines):
for line in lines: for line in lines:
if '<!--hide-->' not in line: if "<!--hide-->" not in line:
yield line yield line
def extendMarkdown(self, md): def extendMarkdown(self, md):
md.preprocessors.register(self.ClickHousePreprocessor(), 'clickhouse_preprocessor', 31) md.preprocessors.register(
self.ClickHousePreprocessor(), "clickhouse_preprocessor", 31
)
markdown.extensions.ClickHouseMarkdown = ClickHouseMarkdown markdown.extensions.ClickHouseMarkdown = ClickHouseMarkdown
def build_for_lang(lang, args): def build_for_lang(lang, args):
logging.info(f'Building {lang} docs') logging.info(f"Building {lang} docs")
os.environ['SINGLE_PAGE'] = '0' os.environ["SINGLE_PAGE"] = "0"
try: try:
theme_cfg = { theme_cfg = {
'name': None, "name": None,
'custom_dir': os.path.join(os.path.dirname(__file__), '..', args.theme_dir), "custom_dir": os.path.join(os.path.dirname(__file__), "..", args.theme_dir),
'language': lang, "language": lang,
'direction': 'rtl' if lang == 'fa' else 'ltr', "direction": "rtl" if lang == "fa" else "ltr",
'static_templates': ['404.html'], "static_templates": ["404.html"],
'extra': { "extra": {
'now': int(time.mktime(datetime.datetime.now().timetuple())) # TODO better way to avoid caching "now": int(
} time.mktime(datetime.datetime.now().timetuple())
) # TODO better way to avoid caching
},
} }
# the following list of languages is sorted according to # the following list of languages is sorted according to
# https://en.wikipedia.org/wiki/List_of_languages_by_total_number_of_speakers # https://en.wikipedia.org/wiki/List_of_languages_by_total_number_of_speakers
languages = { languages = {"en": "English", "zh": "中文", "ru": "Русский", "ja": "日本語"}
'en': 'English',
'zh': '中文',
'ru': 'Русский',
'ja': '日本語'
}
site_names = { site_names = {
'en': 'ClickHouse %s Documentation', "en": "ClickHouse %s Documentation",
'zh': 'ClickHouse文档 %s', "zh": "ClickHouse文档 %s",
'ru': 'Документация ClickHouse %s', "ru": "Документация ClickHouse %s",
'ja': 'ClickHouseドキュメント %s' "ja": "ClickHouseドキュメント %s",
} }
assert len(site_names) == len(languages) assert len(site_names) == len(languages)
site_dir = os.path.join(args.docs_output_dir, lang) site_dir = os.path.join(args.docs_output_dir, lang)
plugins = ['macros'] plugins = ["macros"]
if args.htmlproofer: if args.htmlproofer:
plugins.append('htmlproofer') plugins.append("htmlproofer")
website_url = 'https://clickhouse.com' website_url = "https://clickhouse.com"
site_name = site_names.get(lang, site_names['en']) % '' site_name = site_names.get(lang, site_names["en"]) % ""
site_name = site_name.replace(' ', ' ') site_name = site_name.replace(" ", " ")
raw_config = dict( raw_config = dict(
site_name=site_name, site_name=site_name,
site_url=f'{website_url}/docs/{lang}/', site_url=f"{website_url}/docs/{lang}/",
docs_dir=os.path.join(args.docs_dir, lang), docs_dir=os.path.join(args.docs_dir, lang),
site_dir=site_dir, site_dir=site_dir,
strict=True, strict=True,
theme=theme_cfg, theme=theme_cfg,
copyright='©20162022 ClickHouse, Inc.', copyright="©20162022 ClickHouse, Inc.",
use_directory_urls=True, use_directory_urls=True,
repo_name='ClickHouse/ClickHouse', repo_name="ClickHouse/ClickHouse",
repo_url='https://github.com/ClickHouse/ClickHouse/', repo_url="https://github.com/ClickHouse/ClickHouse/",
edit_uri=f'edit/master/docs/{lang}', edit_uri=f"edit/master/docs/{lang}",
markdown_extensions=mdx_clickhouse.MARKDOWN_EXTENSIONS, markdown_extensions=mdx_clickhouse.MARKDOWN_EXTENSIONS,
plugins=plugins, plugins=plugins,
extra=dict( extra=dict(
@ -111,16 +111,16 @@ def build_for_lang(lang, args):
website_url=website_url, website_url=website_url,
events=args.events, events=args.events,
languages=languages, languages=languages,
includes_dir=os.path.join(os.path.dirname(__file__), '..', '_includes'), includes_dir=os.path.join(os.path.dirname(__file__), "..", "_includes"),
is_amp=False, is_amp=False,
is_blog=False is_blog=False,
) ),
) )
# Clean to be safe if last build finished abnormally # Clean to be safe if last build finished abnormally
single_page.remove_temporary_files(lang, args) single_page.remove_temporary_files(lang, args)
raw_config['nav'] = nav.build_docs_nav(lang, args) raw_config["nav"] = nav.build_docs_nav(lang, args)
cfg = config.load_config(**raw_config) cfg = config.load_config(**raw_config)
@ -131,21 +131,28 @@ def build_for_lang(lang, args):
amp.build_amp(lang, args, cfg) amp.build_amp(lang, args, cfg)
if not args.skip_single_page: if not args.skip_single_page:
single_page.build_single_page_version(lang, args, raw_config.get('nav'), cfg) single_page.build_single_page_version(
lang, args, raw_config.get("nav"), cfg
)
mdx_clickhouse.PatchedMacrosPlugin.disabled = False mdx_clickhouse.PatchedMacrosPlugin.disabled = False
logging.info(f'Finished building {lang} docs') logging.info(f"Finished building {lang} docs")
except exceptions.ConfigurationError as e: except exceptions.ConfigurationError as e:
raise SystemExit('\n' + str(e)) raise SystemExit("\n" + str(e))
def build_docs(args): def build_docs(args):
tasks = [] tasks = []
for lang in args.lang.split(','): for lang in args.lang.split(","):
if lang: if lang:
tasks.append((lang, args,)) tasks.append(
(
lang,
args,
)
)
util.run_function_in_parallel(build_for_lang, tasks, threads=False) util.run_function_in_parallel(build_for_lang, tasks, threads=False)
redirects.build_docs_redirects(args) redirects.build_docs_redirects(args)
@ -171,56 +178,64 @@ def build(args):
redirects.build_static_redirects(args) redirects.build_static_redirects(args)
if __name__ == '__main__': if __name__ == "__main__":
os.chdir(os.path.join(os.path.dirname(__file__), '..')) os.chdir(os.path.join(os.path.dirname(__file__), ".."))
# A root path to ClickHouse source code. # A root path to ClickHouse source code.
src_dir = '..' src_dir = ".."
website_dir = os.path.join(src_dir, 'website') website_dir = os.path.join(src_dir, "website")
arg_parser = argparse.ArgumentParser() arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('--lang', default='en,ru,zh,ja') arg_parser.add_argument("--lang", default="en,ru,zh,ja")
arg_parser.add_argument('--blog-lang', default='en') arg_parser.add_argument("--blog-lang", default="en")
arg_parser.add_argument('--docs-dir', default='.') arg_parser.add_argument("--docs-dir", default=".")
arg_parser.add_argument('--theme-dir', default=website_dir) arg_parser.add_argument("--theme-dir", default=website_dir)
arg_parser.add_argument('--website-dir', default=website_dir) arg_parser.add_argument("--website-dir", default=website_dir)
arg_parser.add_argument('--src-dir', default=src_dir) arg_parser.add_argument("--src-dir", default=src_dir)
arg_parser.add_argument('--blog-dir', default=os.path.join(website_dir, 'blog')) arg_parser.add_argument("--blog-dir", default=os.path.join(website_dir, "blog"))
arg_parser.add_argument('--output-dir', default='build') arg_parser.add_argument("--output-dir", default="build")
arg_parser.add_argument('--nav-limit', type=int, default='0') arg_parser.add_argument("--nav-limit", type=int, default="0")
arg_parser.add_argument('--skip-multi-page', action='store_true') arg_parser.add_argument("--skip-multi-page", action="store_true")
arg_parser.add_argument('--skip-single-page', action='store_true') arg_parser.add_argument("--skip-single-page", action="store_true")
arg_parser.add_argument('--skip-amp', action='store_true') arg_parser.add_argument("--skip-amp", action="store_true")
arg_parser.add_argument('--skip-website', action='store_true') arg_parser.add_argument("--skip-website", action="store_true")
arg_parser.add_argument('--skip-blog', action='store_true') arg_parser.add_argument("--skip-blog", action="store_true")
arg_parser.add_argument('--skip-git-log', action='store_true') arg_parser.add_argument("--skip-git-log", action="store_true")
arg_parser.add_argument('--skip-docs', action='store_true') arg_parser.add_argument("--skip-docs", action="store_true")
arg_parser.add_argument('--test-only', action='store_true') arg_parser.add_argument("--test-only", action="store_true")
arg_parser.add_argument('--minify', action='store_true') arg_parser.add_argument("--minify", action="store_true")
arg_parser.add_argument('--htmlproofer', action='store_true') arg_parser.add_argument("--htmlproofer", action="store_true")
arg_parser.add_argument('--no-docs-macros', action='store_true') arg_parser.add_argument("--no-docs-macros", action="store_true")
arg_parser.add_argument('--save-raw-single-page', type=str) arg_parser.add_argument("--save-raw-single-page", type=str)
arg_parser.add_argument('--livereload', type=int, default='0') arg_parser.add_argument("--livereload", type=int, default="0")
arg_parser.add_argument('--verbose', action='store_true') arg_parser.add_argument("--verbose", action="store_true")
args = arg_parser.parse_args() args = arg_parser.parse_args()
args.minify = False # TODO remove args.minify = False # TODO remove
logging.basicConfig( logging.basicConfig(
level=logging.DEBUG if args.verbose else logging.INFO, level=logging.DEBUG if args.verbose else logging.INFO, stream=sys.stderr
stream=sys.stderr
) )
logging.getLogger('MARKDOWN').setLevel(logging.INFO) logging.getLogger("MARKDOWN").setLevel(logging.INFO)
args.docs_output_dir = os.path.join(os.path.abspath(args.output_dir), 'docs') args.docs_output_dir = os.path.join(os.path.abspath(args.output_dir), "docs")
args.blog_output_dir = os.path.join(os.path.abspath(args.output_dir), 'blog') args.blog_output_dir = os.path.join(os.path.abspath(args.output_dir), "blog")
from github import get_events from github import get_events
args.rev = subprocess.check_output('git rev-parse HEAD', shell=True).decode('utf-8').strip()
args.rev_short = subprocess.check_output('git rev-parse --short HEAD', shell=True).decode('utf-8').strip() args.rev = (
args.rev_url = f'https://github.com/ClickHouse/ClickHouse/commit/{args.rev}' subprocess.check_output("git rev-parse HEAD", shell=True)
.decode("utf-8")
.strip()
)
args.rev_short = (
subprocess.check_output("git rev-parse --short HEAD", shell=True)
.decode("utf-8")
.strip()
)
args.rev_url = f"https://github.com/ClickHouse/ClickHouse/commit/{args.rev}"
args.events = get_events(args) args.events = get_events(args)
if args.test_only: if args.test_only:
@ -233,18 +248,20 @@ if __name__ == '__main__':
mdx_clickhouse.PatchedMacrosPlugin.skip_git_log = True mdx_clickhouse.PatchedMacrosPlugin.skip_git_log = True
from build import build from build import build
build(args) build(args)
if args.livereload: if args.livereload:
new_args = [arg for arg in sys.argv if not arg.startswith('--livereload')] new_args = [arg for arg in sys.argv if not arg.startswith("--livereload")]
new_args = sys.executable + ' ' + ' '.join(new_args) new_args = sys.executable + " " + " ".join(new_args)
server = livereload.Server() server = livereload.Server()
server.watch(args.docs_dir + '**/*', livereload.shell(new_args, cwd='tools', shell=True)) server.watch(
server.watch(args.website_dir + '**/*', livereload.shell(new_args, cwd='tools', shell=True)) args.docs_dir + "**/*", livereload.shell(new_args, cwd="tools", shell=True)
server.serve(
root=args.output_dir,
host='0.0.0.0',
port=args.livereload
) )
server.watch(
args.website_dir + "**/*",
livereload.shell(new_args, cwd="tools", shell=True),
)
server.serve(root=args.output_dir, host="0.0.0.0", port=args.livereload)
sys.exit(0) sys.exit(0)

View File

@ -6,11 +6,13 @@ from typing import TextIO, List, Tuple, Optional, Dict
Entity = Tuple[str, str, str] Entity = Tuple[str, str, str]
# https://regex101.com/r/R6iogw/12 # https://regex101.com/r/R6iogw/12
cmake_option_regex: str = r"^\s*option\s*\(([A-Z_0-9${}]+)\s*(?:\"((?:.|\n)*?)\")?\s*(.*)?\).*$" cmake_option_regex: str = (
r"^\s*option\s*\(([A-Z_0-9${}]+)\s*(?:\"((?:.|\n)*?)\")?\s*(.*)?\).*$"
)
ch_master_url: str = "https://github.com/clickhouse/clickhouse/blob/master/" ch_master_url: str = "https://github.com/clickhouse/clickhouse/blob/master/"
name_str: str = "<a name=\"{anchor}\"></a>[`{name}`](" + ch_master_url + "{path}#L{line})" name_str: str = '<a name="{anchor}"></a>[`{name}`](' + ch_master_url + "{path}#L{line})"
default_anchor_str: str = "[`{name}`](#{anchor})" default_anchor_str: str = "[`{name}`](#{anchor})"
comment_var_regex: str = r"\${(.+)}" comment_var_regex: str = r"\${(.+)}"
@ -27,11 +29,15 @@ entities: Dict[str, Tuple[str, str]] = {}
def make_anchor(t: str) -> str: def make_anchor(t: str) -> str:
return "".join(["-" if i == "_" else i.lower() for i in t if i.isalpha() or i == "_"]) return "".join(
["-" if i == "_" else i.lower() for i in t if i.isalpha() or i == "_"]
)
def process_comment(comment: str) -> str: def process_comment(comment: str) -> str:
return re.sub(comment_var_regex, comment_var_replace, comment, flags=re.MULTILINE) return re.sub(comment_var_regex, comment_var_replace, comment, flags=re.MULTILINE)
def build_entity(path: str, entity: Entity, line_comment: Tuple[int, str]) -> None: def build_entity(path: str, entity: Entity, line_comment: Tuple[int, str]) -> None:
(line, comment) = line_comment (line, comment) = line_comment
(name, description, default) = entity (name, description, default) = entity
@ -47,22 +53,22 @@ def build_entity(path: str, entity: Entity, line_comment: Tuple[int, str]) -> No
formatted_default: str = "`" + default + "`" formatted_default: str = "`" + default + "`"
formatted_name: str = name_str.format( formatted_name: str = name_str.format(
anchor=make_anchor(name), anchor=make_anchor(name), name=name, path=path, line=line
name=name, )
path=path,
line=line)
formatted_description: str = "".join(description.split("\n")) formatted_description: str = "".join(description.split("\n"))
formatted_comment: str = process_comment(comment) formatted_comment: str = process_comment(comment)
formatted_entity: str = "| {} | {} | {} | {} |".format( formatted_entity: str = "| {} | {} | {} | {} |".format(
formatted_name, formatted_default, formatted_description, formatted_comment) formatted_name, formatted_default, formatted_description, formatted_comment
)
entities[name] = path, formatted_entity entities[name] = path, formatted_entity
def process_file(root_path: str, file_path: str, file_name: str) -> None: def process_file(root_path: str, file_path: str, file_name: str) -> None:
with open(os.path.join(file_path, file_name), 'r') as cmake_file: with open(os.path.join(file_path, file_name), "r") as cmake_file:
contents: str = cmake_file.read() contents: str = cmake_file.read()
def get_line_and_comment(target: str) -> Tuple[int, str]: def get_line_and_comment(target: str) -> Tuple[int, str]:
@ -70,10 +76,10 @@ def process_file(root_path: str, file_path: str, file_name: str) -> None:
comment: str = "" comment: str = ""
for n, line in enumerate(contents_list): for n, line in enumerate(contents_list):
if 'option' not in line.lower() or target not in line: if "option" not in line.lower() or target not in line:
continue continue
for maybe_comment_line in contents_list[n - 1::-1]: for maybe_comment_line in contents_list[n - 1 :: -1]:
if not re.match("\s*#\s*", maybe_comment_line): if not re.match("\s*#\s*", maybe_comment_line):
break break
@ -82,16 +88,22 @@ def process_file(root_path: str, file_path: str, file_name: str) -> None:
# line numbering starts with 1 # line numbering starts with 1
return n + 1, comment return n + 1, comment
matches: Optional[List[Entity]] = re.findall(cmake_option_regex, contents, re.MULTILINE) matches: Optional[List[Entity]] = re.findall(
cmake_option_regex, contents, re.MULTILINE
)
file_rel_path_with_name: str = os.path.join(
file_rel_path_with_name: str = os.path.join(file_path[len(root_path):], file_name) file_path[len(root_path) :], file_name
if file_rel_path_with_name.startswith('/'): )
if file_rel_path_with_name.startswith("/"):
file_rel_path_with_name = file_rel_path_with_name[1:] file_rel_path_with_name = file_rel_path_with_name[1:]
if matches: if matches:
for entity in matches: for entity in matches:
build_entity(file_rel_path_with_name, entity, get_line_and_comment(entity[0])) build_entity(
file_rel_path_with_name, entity, get_line_and_comment(entity[0])
)
def process_folder(root_path: str, name: str) -> None: def process_folder(root_path: str, name: str) -> None:
for root, _, files in os.walk(os.path.join(root_path, name)): for root, _, files in os.walk(os.path.join(root_path, name)):
@ -99,12 +111,19 @@ def process_folder(root_path: str, name: str) -> None:
if f == "CMakeLists.txt" or ".cmake" in f: if f == "CMakeLists.txt" or ".cmake" in f:
process_file(root_path, root, f) process_file(root_path, root, f)
def generate_cmake_flags_files() -> None:
root_path: str = os.path.join(os.path.dirname(__file__), '..', '..')
output_file_name: str = os.path.join(root_path, "docs/en/development/cmake-in-clickhouse.md") def generate_cmake_flags_files() -> None:
header_file_name: str = os.path.join(root_path, "docs/_includes/cmake_in_clickhouse_header.md") root_path: str = os.path.join(os.path.dirname(__file__), "..", "..")
footer_file_name: str = os.path.join(root_path, "docs/_includes/cmake_in_clickhouse_footer.md")
output_file_name: str = os.path.join(
root_path, "docs/en/development/cmake-in-clickhouse.md"
)
header_file_name: str = os.path.join(
root_path, "docs/_includes/cmake_in_clickhouse_header.md"
)
footer_file_name: str = os.path.join(
root_path, "docs/_includes/cmake_in_clickhouse_footer.md"
)
process_file(root_path, root_path, "CMakeLists.txt") process_file(root_path, root_path, "CMakeLists.txt")
process_file(root_path, os.path.join(root_path, "programs"), "CMakeLists.txt") process_file(root_path, os.path.join(root_path, "programs"), "CMakeLists.txt")
@ -127,8 +146,10 @@ def generate_cmake_flags_files() -> None:
f.write(entities[k][1] + "\n") f.write(entities[k][1] + "\n")
ignored_keys.append(k) ignored_keys.append(k)
f.write("\n### External libraries\nNote that ClickHouse uses forks of these libraries, see https://github.com/ClickHouse-Extras.\n" + f.write(
table_header) "\n### External libraries\nNote that ClickHouse uses forks of these libraries, see https://github.com/ClickHouse-Extras.\n"
+ table_header
)
for k in sorted_keys: for k in sorted_keys:
if k.startswith("ENABLE_") and ".cmake" in entities[k][0]: if k.startswith("ENABLE_") and ".cmake" in entities[k][0]:
@ -143,9 +164,11 @@ def generate_cmake_flags_files() -> None:
with open(footer_file_name, "r") as footer: with open(footer_file_name, "r") as footer:
f.write(footer.read()) f.write(footer.read())
other_languages = ["docs/ja/development/cmake-in-clickhouse.md", other_languages = [
"docs/zh/development/cmake-in-clickhouse.md", "docs/ja/development/cmake-in-clickhouse.md",
"docs/ru/development/cmake-in-clickhouse.md"] "docs/zh/development/cmake-in-clickhouse.md",
"docs/ru/development/cmake-in-clickhouse.md",
]
for lang in other_languages: for lang in other_languages:
other_file_name = os.path.join(root_path, lang) other_file_name = os.path.join(root_path, lang)
@ -153,5 +176,6 @@ def generate_cmake_flags_files() -> None:
os.unlink(other_file_name) os.unlink(other_file_name)
os.symlink(output_file_name, other_file_name) os.symlink(output_file_name, other_file_name)
if __name__ == '__main__':
if __name__ == "__main__":
generate_cmake_flags_files() generate_cmake_flags_files()

View File

@ -8,7 +8,7 @@ import contextlib
from git import cmd from git import cmd
from tempfile import NamedTemporaryFile from tempfile import NamedTemporaryFile
SCRIPT_DESCRIPTION = ''' SCRIPT_DESCRIPTION = """
usage: ./easy_diff.py language/document path usage: ./easy_diff.py language/document path
Show the difference between a language document and an English document. Show the difference between a language document and an English document.
@ -53,16 +53,16 @@ SCRIPT_DESCRIPTION = '''
OPTIONS: OPTIONS:
-h, --help show this help message and exit -h, --help show this help message and exit
--no-pager use stdout as difference result output --no-pager use stdout as difference result output
''' """
SCRIPT_PATH = os.path.abspath(__file__) SCRIPT_PATH = os.path.abspath(__file__)
CLICKHOUSE_REPO_HOME = os.path.join(os.path.dirname(SCRIPT_PATH), '..', '..') CLICKHOUSE_REPO_HOME = os.path.join(os.path.dirname(SCRIPT_PATH), "..", "..")
SCRIPT_COMMAND_EXECUTOR = cmd.Git(CLICKHOUSE_REPO_HOME) SCRIPT_COMMAND_EXECUTOR = cmd.Git(CLICKHOUSE_REPO_HOME)
SCRIPT_COMMAND_PARSER = argparse.ArgumentParser(add_help=False) SCRIPT_COMMAND_PARSER = argparse.ArgumentParser(add_help=False)
SCRIPT_COMMAND_PARSER.add_argument('path', type=bytes, nargs='?', default=None) SCRIPT_COMMAND_PARSER.add_argument("path", type=bytes, nargs="?", default=None)
SCRIPT_COMMAND_PARSER.add_argument('--no-pager', action='store_true', default=False) SCRIPT_COMMAND_PARSER.add_argument("--no-pager", action="store_true", default=False)
SCRIPT_COMMAND_PARSER.add_argument('-h', '--help', action='store_true', default=False) SCRIPT_COMMAND_PARSER.add_argument("-h", "--help", action="store_true", default=False)
def execute(commands): def execute(commands):
@ -70,19 +70,41 @@ def execute(commands):
def get_hash(file_name): def get_hash(file_name):
return execute(['git', 'log', '-n', '1', '--pretty=format:"%H"', file_name]) return execute(["git", "log", "-n", "1", '--pretty=format:"%H"', file_name])
def diff_file(reference_file, working_file, out): def diff_file(reference_file, working_file, out):
if not os.path.exists(reference_file): if not os.path.exists(reference_file):
raise RuntimeError('reference file [' + os.path.abspath(reference_file) + '] is not exists.') raise RuntimeError(
"reference file [" + os.path.abspath(reference_file) + "] is not exists."
)
if os.path.islink(working_file): if os.path.islink(working_file):
out.writelines(["Need translate document:" + os.path.abspath(reference_file)]) out.writelines(["Need translate document:" + os.path.abspath(reference_file)])
elif not os.path.exists(working_file): elif not os.path.exists(working_file):
out.writelines(['Need link document ' + os.path.abspath(reference_file) + ' to ' + os.path.abspath(working_file)]) out.writelines(
[
"Need link document "
+ os.path.abspath(reference_file)
+ " to "
+ os.path.abspath(working_file)
]
)
elif get_hash(working_file) != get_hash(reference_file): elif get_hash(working_file) != get_hash(reference_file):
out.writelines([(execute(['git', 'diff', get_hash(working_file).strip('"'), reference_file]).encode('utf-8'))]) out.writelines(
[
(
execute(
[
"git",
"diff",
get_hash(working_file).strip('"'),
reference_file,
]
).encode("utf-8")
)
]
)
return 0 return 0
@ -94,20 +116,30 @@ def diff_directory(reference_directory, working_directory, out):
for list_item in os.listdir(reference_directory): for list_item in os.listdir(reference_directory):
working_item = os.path.join(working_directory, list_item) working_item = os.path.join(working_directory, list_item)
reference_item = os.path.join(reference_directory, list_item) reference_item = os.path.join(reference_directory, list_item)
if diff_file(reference_item, working_item, out) if os.path.isfile(reference_item) else diff_directory(reference_item, working_item, out) != 0: if (
diff_file(reference_item, working_item, out)
if os.path.isfile(reference_item)
else diff_directory(reference_item, working_item, out) != 0
):
return 1 return 1
return 0 return 0
def find_language_doc(custom_document, other_language='en', children=[]): def find_language_doc(custom_document, other_language="en", children=[]):
if len(custom_document) == 0: if len(custom_document) == 0:
raise RuntimeError('The ' + os.path.join(custom_document, *children) + " is not in docs directory.") raise RuntimeError(
"The "
+ os.path.join(custom_document, *children)
+ " is not in docs directory."
)
if os.path.samefile(os.path.join(CLICKHOUSE_REPO_HOME, 'docs'), custom_document): if os.path.samefile(os.path.join(CLICKHOUSE_REPO_HOME, "docs"), custom_document):
return os.path.join(CLICKHOUSE_REPO_HOME, 'docs', other_language, *children[1:]) return os.path.join(CLICKHOUSE_REPO_HOME, "docs", other_language, *children[1:])
children.insert(0, os.path.split(custom_document)[1]) children.insert(0, os.path.split(custom_document)[1])
return find_language_doc(os.path.split(custom_document)[0], other_language, children) return find_language_doc(
os.path.split(custom_document)[0], other_language, children
)
class ToPager: class ToPager:
@ -119,7 +151,7 @@ class ToPager:
def close(self): def close(self):
self.temp_named_file.flush() self.temp_named_file.flush()
git_pager = execute(['git', 'var', 'GIT_PAGER']) git_pager = execute(["git", "var", "GIT_PAGER"])
subprocess.check_call([git_pager, self.temp_named_file.name]) subprocess.check_call([git_pager, self.temp_named_file.name])
self.temp_named_file.close() self.temp_named_file.close()
@ -135,12 +167,20 @@ class ToStdOut:
self.system_stdout_stream = system_stdout_stream self.system_stdout_stream = system_stdout_stream
if __name__ == '__main__': if __name__ == "__main__":
arguments = SCRIPT_COMMAND_PARSER.parse_args() arguments = SCRIPT_COMMAND_PARSER.parse_args()
if arguments.help or not arguments.path: if arguments.help or not arguments.path:
sys.stdout.write(SCRIPT_DESCRIPTION) sys.stdout.write(SCRIPT_DESCRIPTION)
sys.exit(0) sys.exit(0)
working_language = os.path.join(CLICKHOUSE_REPO_HOME, 'docs', arguments.path) working_language = os.path.join(CLICKHOUSE_REPO_HOME, "docs", arguments.path)
with contextlib.closing(ToStdOut(sys.stdout) if arguments.no_pager else ToPager(NamedTemporaryFile('r+'))) as writer: with contextlib.closing(
exit(diff_directory(find_language_doc(working_language), working_language, writer)) ToStdOut(sys.stdout)
if arguments.no_pager
else ToPager(NamedTemporaryFile("r+"))
) as writer:
exit(
diff_directory(
find_language_doc(working_language), working_language, writer
)
)

View File

@ -16,27 +16,26 @@ import util
def get_events(args): def get_events(args):
events = [] events = []
skip = True skip = True
with open(os.path.join(args.docs_dir, '..', 'README.md')) as f: with open(os.path.join(args.docs_dir, "..", "README.md")) as f:
for line in f: for line in f:
if skip: if skip:
if 'Upcoming Events' in line: if "Upcoming Events" in line:
skip = False skip = False
else: else:
if not line: if not line:
continue continue
line = line.strip().split('](') line = line.strip().split("](")
if len(line) == 2: if len(line) == 2:
tail = line[1].split(') ') tail = line[1].split(") ")
events.append({ events.append(
'signup_link': tail[0], {
'event_name': line[0].replace('* [', ''), "signup_link": tail[0],
'event_date': tail[1].replace('on ', '').replace('.', '') "event_name": line[0].replace("* [", ""),
}) "event_date": tail[1].replace("on ", "").replace(".", ""),
}
)
return events return events
if __name__ == '__main__': if __name__ == "__main__":
logging.basicConfig( logging.basicConfig(level=logging.DEBUG, stream=sys.stderr)
level=logging.DEBUG,
stream=sys.stderr
)

View File

@ -16,74 +16,79 @@ import slugify as slugify_impl
def slugify(value, separator): def slugify(value, separator):
return slugify_impl.slugify(value, separator=separator, word_boundary=True, save_order=True) return slugify_impl.slugify(
value, separator=separator, word_boundary=True, save_order=True
)
MARKDOWN_EXTENSIONS = [ MARKDOWN_EXTENSIONS = [
'mdx_clickhouse', "mdx_clickhouse",
'admonition', "admonition",
'attr_list', "attr_list",
'def_list', "def_list",
'codehilite', "codehilite",
'nl2br', "nl2br",
'sane_lists', "sane_lists",
'pymdownx.details', "pymdownx.details",
'pymdownx.magiclink', "pymdownx.magiclink",
'pymdownx.superfences', "pymdownx.superfences",
'extra', "extra",
{ {"toc": {"permalink": True, "slugify": slugify}},
'toc': {
'permalink': True,
'slugify': slugify
}
}
] ]
class ClickHouseLinkMixin(object): class ClickHouseLinkMixin(object):
def handleMatch(self, m, data): def handleMatch(self, m, data):
single_page = (os.environ.get('SINGLE_PAGE') == '1') single_page = os.environ.get("SINGLE_PAGE") == "1"
try: try:
el, start, end = super(ClickHouseLinkMixin, self).handleMatch(m, data) el, start, end = super(ClickHouseLinkMixin, self).handleMatch(m, data)
except IndexError: except IndexError:
return return
if el is not None: if el is not None:
href = el.get('href') or '' href = el.get("href") or ""
is_external = href.startswith('http:') or href.startswith('https:') is_external = href.startswith("http:") or href.startswith("https:")
if is_external: if is_external:
if not href.startswith('https://clickhouse.com'): if not href.startswith("https://clickhouse.com"):
el.set('rel', 'external nofollow noreferrer') el.set("rel", "external nofollow noreferrer")
elif single_page: elif single_page:
if '#' in href: if "#" in href:
el.set('href', '#' + href.split('#', 1)[1]) el.set("href", "#" + href.split("#", 1)[1])
else: else:
el.set('href', '#' + href.replace('/index.md', '/').replace('.md', '/')) el.set(
"href", "#" + href.replace("/index.md", "/").replace(".md", "/")
)
return el, start, end return el, start, end
class ClickHouseAutolinkPattern(ClickHouseLinkMixin, markdown.inlinepatterns.AutolinkInlineProcessor): class ClickHouseAutolinkPattern(
ClickHouseLinkMixin, markdown.inlinepatterns.AutolinkInlineProcessor
):
pass pass
class ClickHouseLinkPattern(ClickHouseLinkMixin, markdown.inlinepatterns.LinkInlineProcessor): class ClickHouseLinkPattern(
ClickHouseLinkMixin, markdown.inlinepatterns.LinkInlineProcessor
):
pass pass
class ClickHousePreprocessor(markdown.util.Processor): class ClickHousePreprocessor(markdown.util.Processor):
def run(self, lines): def run(self, lines):
for line in lines: for line in lines:
if '<!--hide-->' not in line: if "<!--hide-->" not in line:
yield line yield line
class ClickHouseMarkdown(markdown.extensions.Extension): class ClickHouseMarkdown(markdown.extensions.Extension):
def extendMarkdown(self, md, md_globals): def extendMarkdown(self, md, md_globals):
md.preprocessors['clickhouse'] = ClickHousePreprocessor() md.preprocessors["clickhouse"] = ClickHousePreprocessor()
md.inlinePatterns['link'] = ClickHouseLinkPattern(markdown.inlinepatterns.LINK_RE, md) md.inlinePatterns["link"] = ClickHouseLinkPattern(
md.inlinePatterns['autolink'] = ClickHouseAutolinkPattern(markdown.inlinepatterns.AUTOLINK_RE, md) markdown.inlinepatterns.LINK_RE, md
)
md.inlinePatterns["autolink"] = ClickHouseAutolinkPattern(
markdown.inlinepatterns.AUTOLINK_RE, md
)
def makeExtension(**kwargs): def makeExtension(**kwargs):
@ -92,10 +97,8 @@ def makeExtension(**kwargs):
def get_translations(dirname, lang): def get_translations(dirname, lang):
import babel.support import babel.support
return babel.support.Translations.load(
dirname=dirname, return babel.support.Translations.load(dirname=dirname, locales=[lang, "en"])
locales=[lang, 'en']
)
class PatchedMacrosPlugin(macros.plugin.MacrosPlugin): class PatchedMacrosPlugin(macros.plugin.MacrosPlugin):
@ -104,22 +107,22 @@ class PatchedMacrosPlugin(macros.plugin.MacrosPlugin):
def on_config(self, config): def on_config(self, config):
super(PatchedMacrosPlugin, self).on_config(config) super(PatchedMacrosPlugin, self).on_config(config)
self.env.comment_start_string = '{##' self.env.comment_start_string = "{##"
self.env.comment_end_string = '##}' self.env.comment_end_string = "##}"
self.env.loader = jinja2.FileSystemLoader([ self.env.loader = jinja2.FileSystemLoader(
os.path.join(config.data['site_dir']), [
os.path.join(config.data['extra']['includes_dir']) os.path.join(config.data["site_dir"]),
]) os.path.join(config.data["extra"]["includes_dir"]),
]
)
def on_env(self, env, config, files): def on_env(self, env, config, files):
import util import util
env.add_extension('jinja2.ext.i18n')
dirname = os.path.join(config.data['theme'].dirs[0], 'locale') env.add_extension("jinja2.ext.i18n")
lang = config.data['theme']['language'] dirname = os.path.join(config.data["theme"].dirs[0], "locale")
env.install_gettext_translations( lang = config.data["theme"]["language"]
get_translations(dirname, lang), env.install_gettext_translations(get_translations(dirname, lang), newstyle=True)
newstyle=True
)
util.init_jinja2_filters(env) util.init_jinja2_filters(env)
return env return env
@ -130,13 +133,17 @@ class PatchedMacrosPlugin(macros.plugin.MacrosPlugin):
return markdown return markdown
def on_page_markdown(self, markdown, page, config, files): def on_page_markdown(self, markdown, page, config, files):
markdown = super(PatchedMacrosPlugin, self).on_page_markdown(markdown, page, config, files) markdown = super(PatchedMacrosPlugin, self).on_page_markdown(
markdown, page, config, files
)
if os.path.islink(page.file.abs_src_path): if os.path.islink(page.file.abs_src_path):
lang = config.data['theme']['language'] lang = config.data["theme"]["language"]
page.canonical_url = page.canonical_url.replace(f'/{lang}/', '/en/', 1) page.canonical_url = page.canonical_url.replace(f"/{lang}/", "/en/", 1)
if config.data['extra'].get('version_prefix') or config.data['extra'].get('single_page'): if config.data["extra"].get("version_prefix") or config.data["extra"].get(
"single_page"
):
return markdown return markdown
if self.skip_git_log: if self.skip_git_log:
return markdown return markdown

View File

@ -10,57 +10,59 @@ import util
def find_first_header(content): def find_first_header(content):
for line in content.split('\n'): for line in content.split("\n"):
if line.startswith('#'): if line.startswith("#"):
no_hash = line.lstrip('#') no_hash = line.lstrip("#")
return no_hash.split('{', 1)[0].strip() return no_hash.split("{", 1)[0].strip()
def build_nav_entry(root, args): def build_nav_entry(root, args):
if root.endswith('images'): if root.endswith("images"):
return None, None, None return None, None, None
result_items = [] result_items = []
index_meta, index_content = util.read_md_file(os.path.join(root, 'index.md')) index_meta, index_content = util.read_md_file(os.path.join(root, "index.md"))
current_title = index_meta.get('toc_folder_title', index_meta.get('toc_title')) current_title = index_meta.get("toc_folder_title", index_meta.get("toc_title"))
current_title = current_title or index_meta.get('title', find_first_header(index_content)) current_title = current_title or index_meta.get(
"title", find_first_header(index_content)
)
for filename in os.listdir(root): for filename in os.listdir(root):
path = os.path.join(root, filename) path = os.path.join(root, filename)
if os.path.isdir(path): if os.path.isdir(path):
prio, title, payload = build_nav_entry(path, args) prio, title, payload = build_nav_entry(path, args)
if title and payload: if title and payload:
result_items.append((prio, title, payload)) result_items.append((prio, title, payload))
elif filename.endswith('.md'): elif filename.endswith(".md"):
path = os.path.join(root, filename) path = os.path.join(root, filename)
meta = '' meta = ""
content = '' content = ""
try: try:
meta, content = util.read_md_file(path) meta, content = util.read_md_file(path)
except: except:
print('Error in file: {}'.format(path)) print("Error in file: {}".format(path))
raise raise
path = path.split('/', 2)[-1] path = path.split("/", 2)[-1]
title = meta.get('toc_title', find_first_header(content)) title = meta.get("toc_title", find_first_header(content))
if title: if title:
title = title.strip().rstrip('.') title = title.strip().rstrip(".")
else: else:
title = meta.get('toc_folder_title', 'hidden') title = meta.get("toc_folder_title", "hidden")
prio = meta.get('toc_priority', 9999) prio = meta.get("toc_priority", 9999)
logging.debug(f'Nav entry: {prio}, {title}, {path}') logging.debug(f"Nav entry: {prio}, {title}, {path}")
if meta.get('toc_hidden') or not content.strip(): if meta.get("toc_hidden") or not content.strip():
title = 'hidden' title = "hidden"
if title == 'hidden': if title == "hidden":
title = 'hidden-' + hashlib.sha1(content.encode('utf-8')).hexdigest() title = "hidden-" + hashlib.sha1(content.encode("utf-8")).hexdigest()
if args.nav_limit and len(result_items) >= args.nav_limit: if args.nav_limit and len(result_items) >= args.nav_limit:
break break
result_items.append((prio, title, path)) result_items.append((prio, title, path))
result_items = sorted(result_items, key=lambda x: (x[0], x[1])) result_items = sorted(result_items, key=lambda x: (x[0], x[1]))
result = collections.OrderedDict([(item[1], item[2]) for item in result_items]) result = collections.OrderedDict([(item[1], item[2]) for item in result_items])
if index_meta.get('toc_hidden_folder'): if index_meta.get("toc_hidden_folder"):
current_title += '|hidden-folder' current_title += "|hidden-folder"
return index_meta.get('toc_priority', 10000), current_title, result return index_meta.get("toc_priority", 10000), current_title, result
def build_docs_nav(lang, args): def build_docs_nav(lang, args):
@ -70,7 +72,7 @@ def build_docs_nav(lang, args):
index_key = None index_key = None
for key, value in list(nav.items()): for key, value in list(nav.items()):
if key and value: if key and value:
if value == 'index.md': if value == "index.md":
index_key = key index_key = key
continue continue
result.append({key: value}) result.append({key: value})
@ -78,7 +80,7 @@ def build_docs_nav(lang, args):
break break
if index_key: if index_key:
key = list(result[0].keys())[0] key = list(result[0].keys())[0]
result[0][key][index_key] = 'index.md' result[0][key][index_key] = "index.md"
result[0][key].move_to_end(index_key, last=False) result[0][key].move_to_end(index_key, last=False)
return result return result
@ -86,7 +88,7 @@ def build_docs_nav(lang, args):
def build_blog_nav(lang, args): def build_blog_nav(lang, args):
blog_dir = os.path.join(args.blog_dir, lang) blog_dir = os.path.join(args.blog_dir, lang)
years = sorted(os.listdir(blog_dir), reverse=True) years = sorted(os.listdir(blog_dir), reverse=True)
result_nav = [{'hidden': 'index.md'}] result_nav = [{"hidden": "index.md"}]
post_meta = collections.OrderedDict() post_meta = collections.OrderedDict()
for year in years: for year in years:
year_dir = os.path.join(blog_dir, year) year_dir = os.path.join(blog_dir, year)
@ -97,38 +99,53 @@ def build_blog_nav(lang, args):
post_meta_items = [] post_meta_items = []
for post in os.listdir(year_dir): for post in os.listdir(year_dir):
post_path = os.path.join(year_dir, post) post_path = os.path.join(year_dir, post)
if not post.endswith('.md'): if not post.endswith(".md"):
raise RuntimeError(f'Unexpected non-md file in posts folder: {post_path}') raise RuntimeError(
f"Unexpected non-md file in posts folder: {post_path}"
)
meta, _ = util.read_md_file(post_path) meta, _ = util.read_md_file(post_path)
post_date = meta['date'] post_date = meta["date"]
post_title = meta['title'] post_title = meta["title"]
if datetime.date.fromisoformat(post_date) > datetime.date.today(): if datetime.date.fromisoformat(post_date) > datetime.date.today():
continue continue
posts.append( posts.append(
(post_date, post_title, os.path.join(year, post),) (
post_date,
post_title,
os.path.join(year, post),
)
) )
if post_title in post_meta: if post_title in post_meta:
raise RuntimeError(f'Duplicate post title: {post_title}') raise RuntimeError(f"Duplicate post title: {post_title}")
if not post_date.startswith(f'{year}-'): if not post_date.startswith(f"{year}-"):
raise RuntimeError(f'Post date {post_date} doesn\'t match the folder year {year}: {post_title}') raise RuntimeError(
post_url_part = post.replace('.md', '') f"Post date {post_date} doesn't match the folder year {year}: {post_title}"
post_meta_items.append((post_date, { )
'date': post_date, post_url_part = post.replace(".md", "")
'title': post_title, post_meta_items.append(
'image': meta.get('image'), (
'url': f'/blog/{lang}/{year}/{post_url_part}/' post_date,
},)) {
"date": post_date,
"title": post_title,
"image": meta.get("image"),
"url": f"/blog/{lang}/{year}/{post_url_part}/",
},
)
)
for _, title, path in sorted(posts, reverse=True): for _, title, path in sorted(posts, reverse=True):
result_nav[-1][year][title] = path result_nav[-1][year][title] = path
for _, post_meta_item in sorted(post_meta_items, for _, post_meta_item in sorted(
reverse=True, post_meta_items, reverse=True, key=lambda item: item[0]
key=lambda item: item[0]): ):
post_meta[post_meta_item['title']] = post_meta_item post_meta[post_meta_item["title"]] = post_meta_item
return result_nav, post_meta return result_nav, post_meta
def _custom_get_navigation(files, config): def _custom_get_navigation(files, config):
nav_config = config['nav'] or mkdocs.structure.nav.nest_paths(f.src_path for f in files.documentation_pages()) nav_config = config["nav"] or mkdocs.structure.nav.nest_paths(
f.src_path for f in files.documentation_pages()
)
items = mkdocs.structure.nav._data_to_navigation(nav_config, files, config) items = mkdocs.structure.nav._data_to_navigation(nav_config, files, config)
if not isinstance(items, list): if not isinstance(items, list):
items = [items] items = [items]
@ -138,19 +155,25 @@ def _custom_get_navigation(files, config):
mkdocs.structure.nav._add_previous_and_next_links(pages) mkdocs.structure.nav._add_previous_and_next_links(pages)
mkdocs.structure.nav._add_parent_links(items) mkdocs.structure.nav._add_parent_links(items)
missing_from_config = [file for file in files.documentation_pages() if file.page is None] missing_from_config = [
file for file in files.documentation_pages() if file.page is None
]
if missing_from_config: if missing_from_config:
files._files = [file for file in files._files if file not in missing_from_config] files._files = [
file for file in files._files if file not in missing_from_config
]
links = mkdocs.structure.nav._get_by_type(items, mkdocs.structure.nav.Link) links = mkdocs.structure.nav._get_by_type(items, mkdocs.structure.nav.Link)
for link in links: for link in links:
scheme, netloc, path, params, query, fragment = mkdocs.structure.nav.urlparse(link.url) scheme, netloc, path, params, query, fragment = mkdocs.structure.nav.urlparse(
link.url
)
if scheme or netloc: if scheme or netloc:
mkdocs.structure.nav.log.debug( mkdocs.structure.nav.log.debug(
"An external link to '{}' is included in " "An external link to '{}' is included in "
"the 'nav' configuration.".format(link.url) "the 'nav' configuration.".format(link.url)
) )
elif link.url.startswith('/'): elif link.url.startswith("/"):
mkdocs.structure.nav.log.debug( mkdocs.structure.nav.log.debug(
"An absolute path to '{}' is included in the 'nav' configuration, " "An absolute path to '{}' is included in the 'nav' configuration, "
"which presumably points to an external resource.".format(link.url) "which presumably points to an external resource.".format(link.url)

View File

@ -7,8 +7,9 @@ def write_redirect_html(out_path, to_url):
os.makedirs(out_dir) os.makedirs(out_dir)
except OSError: except OSError:
pass pass
with open(out_path, 'w') as f: with open(out_path, "w") as f:
f.write(f'''<!--[if IE 6]> Redirect: {to_url} <![endif]--> f.write(
f"""<!--[if IE 6]> Redirect: {to_url} <![endif]-->
<!DOCTYPE HTML> <!DOCTYPE HTML>
<html lang="en-US"> <html lang="en-US">
<head> <head>
@ -22,18 +23,20 @@ def write_redirect_html(out_path, to_url):
<body> <body>
If you are not redirected automatically, follow this <a href="{to_url}">link</a>. If you are not redirected automatically, follow this <a href="{to_url}">link</a>.
</body> </body>
</html>''') </html>"""
)
def build_redirect_html(args, base_prefix, lang, output_dir, from_path, to_path): def build_redirect_html(args, base_prefix, lang, output_dir, from_path, to_path):
out_path = os.path.join( out_path = os.path.join(
output_dir, lang, output_dir,
from_path.replace('/index.md', '/index.html').replace('.md', '/index.html') lang,
from_path.replace("/index.md", "/index.html").replace(".md", "/index.html"),
) )
target_path = to_path.replace('/index.md', '/').replace('.md', '/') target_path = to_path.replace("/index.md", "/").replace(".md", "/")
if target_path[0:7] != 'http://' and target_path[0:8] != 'https://': if target_path[0:7] != "http://" and target_path[0:8] != "https://":
to_url = f'/{base_prefix}/{lang}/{target_path}' to_url = f"/{base_prefix}/{lang}/{target_path}"
else: else:
to_url = target_path to_url = target_path
@ -42,33 +45,48 @@ def build_redirect_html(args, base_prefix, lang, output_dir, from_path, to_path)
def build_docs_redirects(args): def build_docs_redirects(args):
with open(os.path.join(args.docs_dir, 'redirects.txt'), 'r') as f: with open(os.path.join(args.docs_dir, "redirects.txt"), "r") as f:
for line in f: for line in f:
for lang in args.lang.split(','): for lang in args.lang.split(","):
from_path, to_path = line.split(' ', 1) from_path, to_path = line.split(" ", 1)
build_redirect_html(args, 'docs', lang, args.docs_output_dir, from_path, to_path) build_redirect_html(
args, "docs", lang, args.docs_output_dir, from_path, to_path
)
def build_blog_redirects(args): def build_blog_redirects(args):
for lang in args.blog_lang.split(','): for lang in args.blog_lang.split(","):
redirects_path = os.path.join(args.blog_dir, lang, 'redirects.txt') redirects_path = os.path.join(args.blog_dir, lang, "redirects.txt")
if os.path.exists(redirects_path): if os.path.exists(redirects_path):
with open(redirects_path, 'r') as f: with open(redirects_path, "r") as f:
for line in f: for line in f:
from_path, to_path = line.split(' ', 1) from_path, to_path = line.split(" ", 1)
build_redirect_html(args, 'blog', lang, args.blog_output_dir, from_path, to_path) build_redirect_html(
args, "blog", lang, args.blog_output_dir, from_path, to_path
)
def build_static_redirects(args): def build_static_redirects(args):
for static_redirect in [ for static_redirect in [
('benchmark.html', '/benchmark/dbms/'), ("benchmark.html", "/benchmark/dbms/"),
('benchmark_hardware.html', '/benchmark/hardware/'), ("benchmark_hardware.html", "/benchmark/hardware/"),
('tutorial.html', '/docs/en/getting_started/tutorial/',), (
('reference_en.html', '/docs/en/single/', ), "tutorial.html",
('reference_ru.html', '/docs/ru/single/',), "/docs/en/getting_started/tutorial/",
('docs/index.html', '/docs/en/',), ),
(
"reference_en.html",
"/docs/en/single/",
),
(
"reference_ru.html",
"/docs/ru/single/",
),
(
"docs/index.html",
"/docs/en/",
),
]: ]:
write_redirect_html( write_redirect_html(
os.path.join(args.output_dir, static_redirect[0]), os.path.join(args.output_dir, static_redirect[0]), static_redirect[1]
static_redirect[1]
) )

View File

@ -10,7 +10,7 @@ cssmin==0.2.0
future==0.18.2 future==0.18.2
htmlmin==0.1.12 htmlmin==0.1.12
idna==2.10 idna==2.10
Jinja2>=3.0.3 Jinja2==3.0.3
jinja2-highlight==0.6.1 jinja2-highlight==0.6.1
jsmin==3.0.0 jsmin==3.0.0
livereload==2.6.3 livereload==2.6.3

View File

@ -12,7 +12,8 @@ import test
import util import util
import website import website
TEMPORARY_FILE_NAME = 'single.md' TEMPORARY_FILE_NAME = "single.md"
def recursive_values(item): def recursive_values(item):
if isinstance(item, dict): if isinstance(item, dict):
@ -25,11 +26,14 @@ def recursive_values(item):
yield item yield item
anchor_not_allowed_chars = re.compile(r'[^\w\-]') anchor_not_allowed_chars = re.compile(r"[^\w\-]")
def generate_anchor_from_path(path):
return re.sub(anchor_not_allowed_chars, '-', path)
absolute_link = re.compile(r'^https?://')
def generate_anchor_from_path(path):
return re.sub(anchor_not_allowed_chars, "-", path)
absolute_link = re.compile(r"^https?://")
def replace_link(match, path): def replace_link(match, path):
@ -40,46 +44,55 @@ def replace_link(match, path):
if re.search(absolute_link, link): if re.search(absolute_link, link):
return match.group(0) return match.group(0)
if link.endswith('/'): if link.endswith("/"):
link = link[0:-1] + '.md' link = link[0:-1] + ".md"
return '{}(#{})'.format(title, generate_anchor_from_path(os.path.normpath(os.path.join(os.path.dirname(path), link)))) return "{}(#{})".format(
title,
generate_anchor_from_path(
os.path.normpath(os.path.join(os.path.dirname(path), link))
),
)
# Concatenates Markdown files to a single file. # Concatenates Markdown files to a single file.
def concatenate(lang, docs_path, single_page_file, nav): def concatenate(lang, docs_path, single_page_file, nav):
lang_path = os.path.join(docs_path, lang) lang_path = os.path.join(docs_path, lang)
proj_config = f'{docs_path}/toc_{lang}.yml' proj_config = f"{docs_path}/toc_{lang}.yml"
if os.path.exists(proj_config): if os.path.exists(proj_config):
with open(proj_config) as cfg_file: with open(proj_config) as cfg_file:
nav = yaml.full_load(cfg_file.read())['nav'] nav = yaml.full_load(cfg_file.read())["nav"]
files_to_concatenate = list(recursive_values(nav)) files_to_concatenate = list(recursive_values(nav))
files_count = len(files_to_concatenate) files_count = len(files_to_concatenate)
logging.info(f'{files_count} files will be concatenated into single md-file for {lang}.') logging.info(
logging.debug('Concatenating: ' + ', '.join(files_to_concatenate)) f"{files_count} files will be concatenated into single md-file for {lang}."
assert files_count > 0, f'Empty single-page for {lang}' )
logging.debug("Concatenating: " + ", ".join(files_to_concatenate))
assert files_count > 0, f"Empty single-page for {lang}"
link_regexp = re.compile(r'(\[[^\]]+\])\(([^)#]+)(?:#[^\)]+)?\)') link_regexp = re.compile(r"(\[[^\]]+\])\(([^)#]+)(?:#[^\)]+)?\)")
for path in files_to_concatenate: for path in files_to_concatenate:
try: try:
with open(os.path.join(lang_path, path)) as f: with open(os.path.join(lang_path, path)) as f:
# Insert a horizontal ruler. Then insert an anchor that we will link to. Its name will be a path to the .md file. # Insert a horizontal ruler. Then insert an anchor that we will link to. Its name will be a path to the .md file.
single_page_file.write('\n______\n<a name="%s"></a>\n' % generate_anchor_from_path(path)) single_page_file.write(
'\n______\n<a name="%s"></a>\n' % generate_anchor_from_path(path)
)
in_metadata = False in_metadata = False
for line in f: for line in f:
# Skip YAML metadata. # Skip YAML metadata.
if line == '---\n': if line == "---\n":
in_metadata = not in_metadata in_metadata = not in_metadata
continue continue
if not in_metadata: if not in_metadata:
# Increase the level of headers. # Increase the level of headers.
if line.startswith('#'): if line.startswith("#"):
line = '#' + line line = "#" + line
# Replace links within the docs. # Replace links within the docs.
@ -87,14 +100,19 @@ def concatenate(lang, docs_path, single_page_file, nav):
line = re.sub( line = re.sub(
link_regexp, link_regexp,
lambda match: replace_link(match, path), lambda match: replace_link(match, path),
line) line,
)
# If failed to replace the relative link, print to log # If failed to replace the relative link, print to log
# But with some exceptions: # But with some exceptions:
# - "../src/" -- for cmake-in-clickhouse.md (link to sources) # - "../src/" -- for cmake-in-clickhouse.md (link to sources)
# - "../usr/share" -- changelog entry that has "../usr/share/zoneinfo" # - "../usr/share" -- changelog entry that has "../usr/share/zoneinfo"
if '../' in line and (not '../usr/share' in line) and (not '../src/' in line): if (
logging.info('Failed to resolve relative link:') "../" in line
and (not "../usr/share" in line)
and (not "../src/" in line)
):
logging.info("Failed to resolve relative link:")
logging.info(path) logging.info(path)
logging.info(line) logging.info(line)
@ -105,9 +123,11 @@ def concatenate(lang, docs_path, single_page_file, nav):
single_page_file.flush() single_page_file.flush()
def get_temporary_file_name(lang, args): def get_temporary_file_name(lang, args):
return os.path.join(args.docs_dir, lang, TEMPORARY_FILE_NAME) return os.path.join(args.docs_dir, lang, TEMPORARY_FILE_NAME)
def remove_temporary_files(lang, args): def remove_temporary_files(lang, args):
single_md_path = get_temporary_file_name(lang, args) single_md_path = get_temporary_file_name(lang, args)
if os.path.exists(single_md_path): if os.path.exists(single_md_path):
@ -115,14 +135,14 @@ def remove_temporary_files(lang, args):
def build_single_page_version(lang, args, nav, cfg): def build_single_page_version(lang, args, nav, cfg):
logging.info(f'Building single page version for {lang}') logging.info(f"Building single page version for {lang}")
os.environ['SINGLE_PAGE'] = '1' os.environ["SINGLE_PAGE"] = "1"
extra = cfg.data['extra'] extra = cfg.data["extra"]
extra['single_page'] = True extra["single_page"] = True
extra['is_amp'] = False extra["is_amp"] = False
single_md_path = get_temporary_file_name(lang, args) single_md_path = get_temporary_file_name(lang, args)
with open(single_md_path, 'w') as single_md: with open(single_md_path, "w") as single_md:
concatenate(lang, args.docs_dir, single_md, nav) concatenate(lang, args.docs_dir, single_md, nav)
with util.temp_dir() as site_temp: with util.temp_dir() as site_temp:
@ -132,72 +152,83 @@ def build_single_page_version(lang, args, nav, cfg):
shutil.copytree(docs_src_lang, docs_temp_lang) shutil.copytree(docs_src_lang, docs_temp_lang)
for root, _, filenames in os.walk(docs_temp_lang): for root, _, filenames in os.walk(docs_temp_lang):
for filename in filenames: for filename in filenames:
if filename != 'single.md' and filename.endswith('.md'): if filename != "single.md" and filename.endswith(".md"):
os.unlink(os.path.join(root, filename)) os.unlink(os.path.join(root, filename))
cfg.load_dict({ cfg.load_dict(
'docs_dir': docs_temp_lang, {
'site_dir': site_temp, "docs_dir": docs_temp_lang,
'extra': extra, "site_dir": site_temp,
'nav': [ "extra": extra,
{cfg.data.get('site_name'): 'single.md'} "nav": [{cfg.data.get("site_name"): "single.md"}],
] }
}) )
if not args.test_only: if not args.test_only:
mkdocs.commands.build.build(cfg) mkdocs.commands.build.build(cfg)
single_page_output_path = os.path.join(args.docs_dir, args.docs_output_dir, lang, 'single') single_page_output_path = os.path.join(
args.docs_dir, args.docs_output_dir, lang, "single"
)
if os.path.exists(single_page_output_path): if os.path.exists(single_page_output_path):
shutil.rmtree(single_page_output_path) shutil.rmtree(single_page_output_path)
shutil.copytree( shutil.copytree(
os.path.join(site_temp, 'single'), os.path.join(site_temp, "single"), single_page_output_path
single_page_output_path
) )
single_page_index_html = os.path.join(single_page_output_path, 'index.html') single_page_index_html = os.path.join(
single_page_content_js = os.path.join(single_page_output_path, 'content.js') single_page_output_path, "index.html"
)
single_page_content_js = os.path.join(
single_page_output_path, "content.js"
)
with open(single_page_index_html, 'r') as f: with open(single_page_index_html, "r") as f:
sp_prefix, sp_js, sp_suffix = f.read().split('<!-- BREAK -->') sp_prefix, sp_js, sp_suffix = f.read().split("<!-- BREAK -->")
with open(single_page_index_html, 'w') as f: with open(single_page_index_html, "w") as f:
f.write(sp_prefix) f.write(sp_prefix)
f.write(sp_suffix) f.write(sp_suffix)
with open(single_page_content_js, 'w') as f: with open(single_page_content_js, "w") as f:
if args.minify: if args.minify:
import jsmin import jsmin
sp_js = jsmin.jsmin(sp_js) sp_js = jsmin.jsmin(sp_js)
f.write(sp_js) f.write(sp_js)
logging.info(f'Re-building single page for {lang} pdf/test') logging.info(f"Re-building single page for {lang} pdf/test")
with util.temp_dir() as test_dir: with util.temp_dir() as test_dir:
extra['single_page'] = False extra["single_page"] = False
cfg.load_dict({ cfg.load_dict(
'docs_dir': docs_temp_lang, {
'site_dir': test_dir, "docs_dir": docs_temp_lang,
'extra': extra, "site_dir": test_dir,
'nav': [ "extra": extra,
{cfg.data.get('site_name'): 'single.md'} "nav": [{cfg.data.get("site_name"): "single.md"}],
] }
}) )
mkdocs.commands.build.build(cfg) mkdocs.commands.build.build(cfg)
css_in = ' '.join(website.get_css_in(args)) css_in = " ".join(website.get_css_in(args))
js_in = ' '.join(website.get_js_in(args)) js_in = " ".join(website.get_js_in(args))
subprocess.check_call(f'cat {css_in} > {test_dir}/css/base.css', shell=True) subprocess.check_call(
subprocess.check_call(f'cat {js_in} > {test_dir}/js/base.js', shell=True) f"cat {css_in} > {test_dir}/css/base.css", shell=True
)
subprocess.check_call(
f"cat {js_in} > {test_dir}/js/base.js", shell=True
)
if args.save_raw_single_page: if args.save_raw_single_page:
shutil.copytree(test_dir, args.save_raw_single_page) shutil.copytree(test_dir, args.save_raw_single_page)
logging.info(f'Running tests for {lang}') logging.info(f"Running tests for {lang}")
test.test_single_page( test.test_single_page(
os.path.join(test_dir, 'single', 'index.html'), lang) os.path.join(test_dir, "single", "index.html"), lang
)
logging.info(f'Finished building single page version for {lang}') logging.info(f"Finished building single page version for {lang}")
remove_temporary_files(lang, args) remove_temporary_files(lang, args)

View File

@ -8,14 +8,11 @@ import subprocess
def test_single_page(input_path, lang): def test_single_page(input_path, lang):
if not (lang == 'en'): if not (lang == "en"):
return return
with open(input_path) as f: with open(input_path) as f:
soup = bs4.BeautifulSoup( soup = bs4.BeautifulSoup(f, features="html.parser")
f,
features='html.parser'
)
anchor_points = set() anchor_points = set()
@ -23,30 +20,27 @@ def test_single_page(input_path, lang):
links_to_nowhere = 0 links_to_nowhere = 0
for tag in soup.find_all(): for tag in soup.find_all():
for anchor_point in [tag.attrs.get('name'), tag.attrs.get('id')]: for anchor_point in [tag.attrs.get("name"), tag.attrs.get("id")]:
if anchor_point: if anchor_point:
anchor_points.add(anchor_point) anchor_points.add(anchor_point)
for tag in soup.find_all(): for tag in soup.find_all():
href = tag.attrs.get('href') href = tag.attrs.get("href")
if href and href.startswith('#') and href != '#': if href and href.startswith("#") and href != "#":
if href[1:] not in anchor_points: if href[1:] not in anchor_points:
links_to_nowhere += 1 links_to_nowhere += 1
logging.info("Tag %s", tag) logging.info("Tag %s", tag)
logging.info('Link to nowhere: %s' % href) logging.info("Link to nowhere: %s" % href)
if links_to_nowhere: if links_to_nowhere:
logging.error(f'Found {links_to_nowhere} links to nowhere in {lang}') logging.error(f"Found {links_to_nowhere} links to nowhere in {lang}")
sys.exit(1) sys.exit(1)
if len(anchor_points) <= 10: if len(anchor_points) <= 10:
logging.error('Html parsing is probably broken') logging.error("Html parsing is probably broken")
sys.exit(1) sys.exit(1)
if __name__ == '__main__': if __name__ == "__main__":
logging.basicConfig( logging.basicConfig(level=logging.DEBUG, stream=sys.stderr)
level=logging.DEBUG,
stream=sys.stderr
)
test_single_page(sys.argv[1], sys.argv[2]) test_single_page(sys.argv[1], sys.argv[2])

View File

@ -15,7 +15,7 @@ import yaml
@contextlib.contextmanager @contextlib.contextmanager
def temp_dir(): def temp_dir():
path = tempfile.mkdtemp(dir=os.environ.get('TEMP')) path = tempfile.mkdtemp(dir=os.environ.get("TEMP"))
try: try:
yield path yield path
finally: finally:
@ -34,7 +34,7 @@ def cd(new_cwd):
def get_free_port(): def get_free_port():
with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
s.bind(('', 0)) s.bind(("", 0))
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
return s.getsockname()[1] return s.getsockname()[1]
@ -61,12 +61,12 @@ def read_md_file(path):
meta_text = [] meta_text = []
content = [] content = []
if os.path.exists(path): if os.path.exists(path):
with open(path, 'r') as f: with open(path, "r") as f:
for line in f: for line in f:
if line.startswith('---'): if line.startswith("---"):
if in_meta: if in_meta:
in_meta = False in_meta = False
meta = yaml.full_load(''.join(meta_text)) meta = yaml.full_load("".join(meta_text))
else: else:
in_meta = True in_meta = True
else: else:
@ -74,7 +74,7 @@ def read_md_file(path):
meta_text.append(line) meta_text.append(line)
else: else:
content.append(line) content.append(line)
return meta, ''.join(content) return meta, "".join(content)
def write_md_file(path, meta, content): def write_md_file(path, meta, content):
@ -82,13 +82,13 @@ def write_md_file(path, meta, content):
if not os.path.exists(dirname): if not os.path.exists(dirname):
os.makedirs(dirname) os.makedirs(dirname)
with open(path, 'w') as f: with open(path, "w") as f:
if meta: if meta:
print('---', file=f) print("---", file=f)
yaml.dump(meta, f) yaml.dump(meta, f)
print('---', file=f) print("---", file=f)
if not content.startswith('\n'): if not content.startswith("\n"):
print('', file=f) print("", file=f)
f.write(content) f.write(content)
@ -100,7 +100,7 @@ def represent_ordereddict(dumper, data):
value.append((node_key, node_value)) value.append((node_key, node_value))
return yaml.nodes.MappingNode(u'tag:yaml.org,2002:map', value) return yaml.nodes.MappingNode("tag:yaml.org,2002:map", value)
yaml.add_representer(collections.OrderedDict, represent_ordereddict) yaml.add_representer(collections.OrderedDict, represent_ordereddict)
@ -109,30 +109,31 @@ yaml.add_representer(collections.OrderedDict, represent_ordereddict)
def init_jinja2_filters(env): def init_jinja2_filters(env):
import amp import amp
import website import website
chunk_size = 10240 chunk_size = 10240
env.filters['chunks'] = lambda line: [line[i:i + chunk_size] for i in range(0, len(line), chunk_size)] env.filters["chunks"] = lambda line: [
env.filters['html_to_amp'] = amp.html_to_amp line[i : i + chunk_size] for i in range(0, len(line), chunk_size)
env.filters['adjust_markdown_html'] = website.adjust_markdown_html ]
env.filters['to_rfc882'] = lambda d: datetime.datetime.strptime(d, '%Y-%m-%d').strftime('%a, %d %b %Y %H:%M:%S GMT') env.filters["html_to_amp"] = amp.html_to_amp
env.filters["adjust_markdown_html"] = website.adjust_markdown_html
env.filters["to_rfc882"] = lambda d: datetime.datetime.strptime(
d, "%Y-%m-%d"
).strftime("%a, %d %b %Y %H:%M:%S GMT")
def init_jinja2_env(args): def init_jinja2_env(args):
import mdx_clickhouse import mdx_clickhouse
env = jinja2.Environment( env = jinja2.Environment(
loader=jinja2.FileSystemLoader([ loader=jinja2.FileSystemLoader(
args.website_dir, [args.website_dir, os.path.join(args.docs_dir, "_includes")]
os.path.join(args.docs_dir, '_includes') ),
]), extensions=["jinja2.ext.i18n", "jinja2_highlight.HighlightExtension"],
extensions=[
'jinja2.ext.i18n',
'jinja2_highlight.HighlightExtension'
]
) )
env.extend(jinja2_highlight_cssclass='syntax p-3 my-3') env.extend(jinja2_highlight_cssclass="syntax p-3 my-3")
translations_dir = os.path.join(args.website_dir, 'locale') translations_dir = os.path.join(args.website_dir, "locale")
env.install_gettext_translations( env.install_gettext_translations(
mdx_clickhouse.get_translations(translations_dir, 'en'), mdx_clickhouse.get_translations(translations_dir, "en"), newstyle=True
newstyle=True
) )
init_jinja2_filters(env) init_jinja2_filters(env)
return env return env

View File

@ -17,108 +17,112 @@ import util
def handle_iframe(iframe, soup): def handle_iframe(iframe, soup):
allowed_domains = ['https://www.youtube.com/', 'https://datalens.yandex/'] allowed_domains = ["https://www.youtube.com/", "https://datalens.yandex/"]
illegal_domain = True illegal_domain = True
iframe_src = iframe.attrs['src'] iframe_src = iframe.attrs["src"]
for domain in allowed_domains: for domain in allowed_domains:
if iframe_src.startswith(domain): if iframe_src.startswith(domain):
illegal_domain = False illegal_domain = False
break break
if illegal_domain: if illegal_domain:
raise RuntimeError(f'iframe from illegal domain: {iframe_src}') raise RuntimeError(f"iframe from illegal domain: {iframe_src}")
wrapper = soup.new_tag('div') wrapper = soup.new_tag("div")
wrapper.attrs['class'] = ['embed-responsive', 'embed-responsive-16by9'] wrapper.attrs["class"] = ["embed-responsive", "embed-responsive-16by9"]
iframe.insert_before(wrapper) iframe.insert_before(wrapper)
iframe.extract() iframe.extract()
wrapper.insert(0, iframe) wrapper.insert(0, iframe)
if 'width' in iframe.attrs: if "width" in iframe.attrs:
del iframe.attrs['width'] del iframe.attrs["width"]
if 'height' in iframe.attrs: if "height" in iframe.attrs:
del iframe.attrs['height'] del iframe.attrs["height"]
iframe.attrs['allow'] = 'accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture' iframe.attrs[
iframe.attrs['class'] = 'embed-responsive-item' "allow"
iframe.attrs['frameborder'] = '0' ] = "accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture"
iframe.attrs['allowfullscreen'] = '1' iframe.attrs["class"] = "embed-responsive-item"
iframe.attrs["frameborder"] = "0"
iframe.attrs["allowfullscreen"] = "1"
def adjust_markdown_html(content): def adjust_markdown_html(content):
soup = bs4.BeautifulSoup( soup = bs4.BeautifulSoup(content, features="html.parser")
content,
features='html.parser'
)
for a in soup.find_all('a'): for a in soup.find_all("a"):
a_class = a.attrs.get('class') a_class = a.attrs.get("class")
a_href = a.attrs.get('href') a_href = a.attrs.get("href")
if a_class and 'headerlink' in a_class: if a_class and "headerlink" in a_class:
a.string = '\xa0' a.string = "\xa0"
if a_href and a_href.startswith('http'): if a_href and a_href.startswith("http"):
a.attrs['target'] = '_blank' a.attrs["target"] = "_blank"
for code in soup.find_all('code'): for code in soup.find_all("code"):
code_class = code.attrs.get('class') code_class = code.attrs.get("class")
if code_class: if code_class:
code.attrs['class'] = code_class + ['syntax'] code.attrs["class"] = code_class + ["syntax"]
else: else:
code.attrs['class'] = 'syntax' code.attrs["class"] = "syntax"
for iframe in soup.find_all('iframe'): for iframe in soup.find_all("iframe"):
handle_iframe(iframe, soup) handle_iframe(iframe, soup)
for img in soup.find_all('img'): for img in soup.find_all("img"):
if img.attrs.get('alt') == 'iframe': if img.attrs.get("alt") == "iframe":
img.name = 'iframe' img.name = "iframe"
img.string = '' img.string = ""
handle_iframe(img, soup) handle_iframe(img, soup)
continue continue
img_class = img.attrs.get('class') img_class = img.attrs.get("class")
if img_class: if img_class:
img.attrs['class'] = img_class + ['img-fluid'] img.attrs["class"] = img_class + ["img-fluid"]
else: else:
img.attrs['class'] = 'img-fluid' img.attrs["class"] = "img-fluid"
for details in soup.find_all('details'): for details in soup.find_all("details"):
for summary in details.find_all('summary'): for summary in details.find_all("summary"):
if summary.parent != details: if summary.parent != details:
summary.extract() summary.extract()
details.insert(0, summary) details.insert(0, summary)
for dd in soup.find_all('dd'): for dd in soup.find_all("dd"):
dd_class = dd.attrs.get('class') dd_class = dd.attrs.get("class")
if dd_class: if dd_class:
dd.attrs['class'] = dd_class + ['pl-3'] dd.attrs["class"] = dd_class + ["pl-3"]
else: else:
dd.attrs['class'] = 'pl-3' dd.attrs["class"] = "pl-3"
for div in soup.find_all('div'): for div in soup.find_all("div"):
div_class = div.attrs.get('class') div_class = div.attrs.get("class")
is_admonition = div_class and 'admonition' in div.attrs.get('class') is_admonition = div_class and "admonition" in div.attrs.get("class")
if is_admonition: if is_admonition:
for a in div.find_all('a'): for a in div.find_all("a"):
a_class = a.attrs.get('class') a_class = a.attrs.get("class")
if a_class: if a_class:
a.attrs['class'] = a_class + ['alert-link'] a.attrs["class"] = a_class + ["alert-link"]
else: else:
a.attrs['class'] = 'alert-link' a.attrs["class"] = "alert-link"
for p in div.find_all('p'): for p in div.find_all("p"):
p_class = p.attrs.get('class') p_class = p.attrs.get("class")
if is_admonition and p_class and ('admonition-title' in p_class): if is_admonition and p_class and ("admonition-title" in p_class):
p.attrs['class'] = p_class + ['alert-heading', 'display-4', 'text-reset', 'mb-2'] p.attrs["class"] = p_class + [
"alert-heading",
"display-4",
"text-reset",
"mb-2",
]
if is_admonition: if is_admonition:
div.attrs['role'] = 'alert' div.attrs["role"] = "alert"
if ('info' in div_class) or ('note' in div_class): if ("info" in div_class) or ("note" in div_class):
mode = 'alert-primary' mode = "alert-primary"
elif ('attention' in div_class) or ('warning' in div_class): elif ("attention" in div_class) or ("warning" in div_class):
mode = 'alert-warning' mode = "alert-warning"
elif 'important' in div_class: elif "important" in div_class:
mode = 'alert-danger' mode = "alert-danger"
elif 'tip' in div_class: elif "tip" in div_class:
mode = 'alert-info' mode = "alert-info"
else: else:
mode = 'alert-secondary' mode = "alert-secondary"
div.attrs['class'] = div_class + ['alert', 'pb-0', 'mb-4', mode] div.attrs["class"] = div_class + ["alert", "pb-0", "mb-4", mode]
return str(soup) return str(soup)
@ -128,61 +132,63 @@ def minify_html(content):
def build_website(args): def build_website(args):
logging.info('Building website') logging.info("Building website")
env = util.init_jinja2_env(args) env = util.init_jinja2_env(args)
shutil.copytree( shutil.copytree(
args.website_dir, args.website_dir,
args.output_dir, args.output_dir,
ignore=shutil.ignore_patterns( ignore=shutil.ignore_patterns(
'*.md', "*.md",
'*.sh', "*.sh",
'*.css', "*.css",
'*.json', "*.json",
'js/*.js', "js/*.js",
'build', "build",
'docs', "docs",
'public', "public",
'node_modules', "node_modules",
'src', "src",
'templates', "templates",
'locale', "locale",
'.gitkeep' ".gitkeep",
) ),
) )
shutil.copytree( shutil.copytree(
os.path.join(args.website_dir, 'images'), os.path.join(args.website_dir, "images"),
os.path.join(args.output_dir, 'docs', 'images') os.path.join(args.output_dir, "docs", "images"),
) )
# This file can be requested to check for available ClickHouse releases. # This file can be requested to check for available ClickHouse releases.
shutil.copy2( shutil.copy2(
os.path.join(args.src_dir, 'utils', 'list-versions', 'version_date.tsv'), os.path.join(args.src_dir, "utils", "list-versions", "version_date.tsv"),
os.path.join(args.output_dir, 'data', 'version_date.tsv')) os.path.join(args.output_dir, "data", "version_date.tsv"),
)
# This file can be requested to install ClickHouse. # This file can be requested to install ClickHouse.
shutil.copy2( shutil.copy2(
os.path.join(args.src_dir, 'docs', '_includes', 'install', 'universal.sh'), os.path.join(args.src_dir, "docs", "_includes", "install", "universal.sh"),
os.path.join(args.output_dir, 'data', 'install.sh')) os.path.join(args.output_dir, "data", "install.sh"),
)
for root, _, filenames in os.walk(args.output_dir): for root, _, filenames in os.walk(args.output_dir):
for filename in filenames: for filename in filenames:
if filename == 'main.html': if filename == "main.html":
continue continue
path = os.path.join(root, filename) path = os.path.join(root, filename)
if not filename.endswith('.html'): if not filename.endswith(".html"):
continue continue
logging.info('Processing %s', path) logging.info("Processing %s", path)
with open(path, 'rb') as f: with open(path, "rb") as f:
content = f.read().decode('utf-8') content = f.read().decode("utf-8")
template = env.from_string(content) template = env.from_string(content)
content = template.render(args.__dict__) content = template.render(args.__dict__)
with open(path, 'wb') as f: with open(path, "wb") as f:
f.write(content.encode('utf-8')) f.write(content.encode("utf-8"))
def get_css_in(args): def get_css_in(args):
@ -193,7 +199,7 @@ def get_css_in(args):
f"'{args.website_dir}/css/blog.css'", f"'{args.website_dir}/css/blog.css'",
f"'{args.website_dir}/css/docs.css'", f"'{args.website_dir}/css/docs.css'",
f"'{args.website_dir}/css/highlight.css'", f"'{args.website_dir}/css/highlight.css'",
f"'{args.website_dir}/css/main.css'" f"'{args.website_dir}/css/main.css'",
] ]
@ -207,42 +213,41 @@ def get_js_in(args):
f"'{args.website_dir}/js/index.js'", f"'{args.website_dir}/js/index.js'",
f"'{args.website_dir}/js/docsearch.js'", f"'{args.website_dir}/js/docsearch.js'",
f"'{args.website_dir}/js/docs.js'", f"'{args.website_dir}/js/docs.js'",
f"'{args.website_dir}/js/main.js'" f"'{args.website_dir}/js/main.js'",
] ]
def minify_file(path, css_digest, js_digest): def minify_file(path, css_digest, js_digest):
if not ( if not (path.endswith(".html") or path.endswith(".css")):
path.endswith('.html') or
path.endswith('.css')
):
return return
logging.info('Minifying %s', path) logging.info("Minifying %s", path)
with open(path, 'rb') as f: with open(path, "rb") as f:
content = f.read().decode('utf-8') content = f.read().decode("utf-8")
if path.endswith('.html'): if path.endswith(".html"):
content = minify_html(content) content = minify_html(content)
content = content.replace('base.css?css_digest', f'base.css?{css_digest}') content = content.replace("base.css?css_digest", f"base.css?{css_digest}")
content = content.replace('base.js?js_digest', f'base.js?{js_digest}') content = content.replace("base.js?js_digest", f"base.js?{js_digest}")
# TODO: restore cssmin # TODO: restore cssmin
# elif path.endswith('.css'): # elif path.endswith('.css'):
# content = cssmin.cssmin(content) # content = cssmin.cssmin(content)
# TODO: restore jsmin # TODO: restore jsmin
# elif path.endswith('.js'): # elif path.endswith('.js'):
# content = jsmin.jsmin(content) # content = jsmin.jsmin(content)
with open(path, 'wb') as f: with open(path, "wb") as f:
f.write(content.encode('utf-8')) f.write(content.encode("utf-8"))
def minify_website(args): def minify_website(args):
css_in = ' '.join(get_css_in(args)) css_in = " ".join(get_css_in(args))
css_out = f'{args.output_dir}/docs/css/base.css' css_out = f"{args.output_dir}/docs/css/base.css"
os.makedirs(f'{args.output_dir}/docs/css') os.makedirs(f"{args.output_dir}/docs/css")
if args.minify and False: # TODO: return closure if args.minify and False: # TODO: return closure
command = f"purifycss -w '*algolia*' --min {css_in} '{args.output_dir}/*.html' " \ command = (
f"purifycss -w '*algolia*' --min {css_in} '{args.output_dir}/*.html' "
f"'{args.output_dir}/docs/en/**/*.html' '{args.website_dir}/js/**/*.js' > {css_out}" f"'{args.output_dir}/docs/en/**/*.html' '{args.website_dir}/js/**/*.js' > {css_out}"
)
logging.info(css_in) logging.info(css_in)
logging.info(command) logging.info(command)
output = subprocess.check_output(command, shell=True) output = subprocess.check_output(command, shell=True)
@ -251,51 +256,60 @@ def minify_website(args):
else: else:
command = f"cat {css_in}" command = f"cat {css_in}"
output = subprocess.check_output(command, shell=True) output = subprocess.check_output(command, shell=True)
with open(css_out, 'wb+') as f: with open(css_out, "wb+") as f:
f.write(output) f.write(output)
with open(css_out, 'rb') as f: with open(css_out, "rb") as f:
css_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8] css_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
js_in = ' '.join(get_js_in(args)) js_in = " ".join(get_js_in(args))
js_out = f'{args.output_dir}/docs/js/base.js' js_out = f"{args.output_dir}/docs/js/base.js"
os.makedirs(f'{args.output_dir}/docs/js') os.makedirs(f"{args.output_dir}/docs/js")
if args.minify and False: # TODO: return closure if args.minify and False: # TODO: return closure
js_in = [js[1:-1] for js in js_in] js_in = [js[1:-1] for js in js_in]
closure_args = [ closure_args = [
'--js', *js_in, '--js_output_file', js_out, "--js",
'--compilation_level', 'SIMPLE', *js_in,
'--dependency_mode', 'NONE', "--js_output_file",
'--third_party', '--use_types_for_optimization', js_out,
'--isolation_mode', 'IIFE' "--compilation_level",
"SIMPLE",
"--dependency_mode",
"NONE",
"--third_party",
"--use_types_for_optimization",
"--isolation_mode",
"IIFE",
] ]
logging.info(closure_args) logging.info(closure_args)
if closure.run(*closure_args): if closure.run(*closure_args):
raise RuntimeError('failed to run closure compiler') raise RuntimeError("failed to run closure compiler")
with open(js_out, 'r') as f: with open(js_out, "r") as f:
js_content = jsmin.jsmin(f.read()) js_content = jsmin.jsmin(f.read())
with open(js_out, 'w') as f: with open(js_out, "w") as f:
f.write(js_content) f.write(js_content)
else: else:
command = f"cat {js_in}" command = f"cat {js_in}"
output = subprocess.check_output(command, shell=True) output = subprocess.check_output(command, shell=True)
with open(js_out, 'wb+') as f: with open(js_out, "wb+") as f:
f.write(output) f.write(output)
with open(js_out, 'rb') as f: with open(js_out, "rb") as f:
js_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8] js_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
logging.info(js_digest) logging.info(js_digest)
if args.minify: if args.minify:
logging.info('Minifying website') logging.info("Minifying website")
with concurrent.futures.ThreadPoolExecutor() as executor: with concurrent.futures.ThreadPoolExecutor() as executor:
futures = [] futures = []
for root, _, filenames in os.walk(args.output_dir): for root, _, filenames in os.walk(args.output_dir):
for filename in filenames: for filename in filenames:
path = os.path.join(root, filename) path = os.path.join(root, filename)
futures.append(executor.submit(minify_file, path, css_digest, js_digest)) futures.append(
executor.submit(minify_file, path, css_digest, js_digest)
)
for future in futures: for future in futures:
exc = future.exception() exc = future.exception()
if exc: if exc:
@ -304,24 +318,28 @@ def minify_website(args):
def process_benchmark_results(args): def process_benchmark_results(args):
benchmark_root = os.path.join(args.website_dir, 'benchmark') benchmark_root = os.path.join(args.website_dir, "benchmark")
required_keys = { required_keys = {
'dbms': ['result'], "dbms": ["result"],
'hardware': ['result', 'system', 'system_full', 'kind'] "hardware": ["result", "system", "system_full", "kind"],
} }
for benchmark_kind in ['dbms', 'hardware']: for benchmark_kind in ["dbms", "hardware"]:
results = [] results = []
results_root = os.path.join(benchmark_root, benchmark_kind, 'results') results_root = os.path.join(benchmark_root, benchmark_kind, "results")
for result in sorted(os.listdir(results_root)): for result in sorted(os.listdir(results_root)):
result_file = os.path.join(results_root, result) result_file = os.path.join(results_root, result)
logging.debug(f'Reading benchmark result from {result_file}') logging.debug(f"Reading benchmark result from {result_file}")
with open(result_file, 'r') as f: with open(result_file, "r") as f:
result = json.loads(f.read()) result = json.loads(f.read())
for item in result: for item in result:
for required_key in required_keys[benchmark_kind]: for required_key in required_keys[benchmark_kind]:
assert required_key in item, f'No "{required_key}" in {result_file}' assert (
required_key in item
), f'No "{required_key}" in {result_file}'
results += result results += result
results_js = os.path.join(args.output_dir, 'benchmark', benchmark_kind, 'results.js') results_js = os.path.join(
with open(results_js, 'w') as f: args.output_dir, "benchmark", benchmark_kind, "results.js"
)
with open(results_js, "w") as f:
data = json.dumps(results) data = json.dumps(results)
f.write(f'var results = {data};') f.write(f"var results = {data};")

View File

@ -42,6 +42,8 @@ git push
使用`utils/check-style/check-style`二进制文件执行一些简单的基于正则表达式的代码样式检查(注意, 它可以在本地运行). 使用`utils/check-style/check-style`二进制文件执行一些简单的基于正则表达式的代码样式检查(注意, 它可以在本地运行).
如果失败, 按照[代码样式指南](./style.md)修复样式错误. 如果失败, 按照[代码样式指南](./style.md)修复样式错误.
使用 [black](https://github.com/psf/black/) 檢查 python 代碼.
### 报告详情 {#report-details} ### 报告详情 {#report-details}
- [状态页示例](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check.html) - [状态页示例](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check.html)
- `docs_output.txt`记录了查结果错误(无效表格等), 空白页表示没有错误. [成功结果案例](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check/output.txt) - `docs_output.txt`记录了查结果错误(无效表格等), 空白页表示没有错误. [成功结果案例](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check/output.txt)

View File

@ -140,7 +140,7 @@ CREATE TABLE test.test_orc
`f_array_array_float` Array(Array(Float32)), `f_array_array_float` Array(Array(Float32)),
`day` String `day` String
) )
ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc') ENGINE = Hive('thrift://localhost:9083', 'test', 'test_orc')
PARTITION BY day PARTITION BY day
``` ```

View File

@ -15,7 +15,7 @@
``` ```
┌─name─────────────────────┬─is_aggregate─┬─case_insensitive─┬─alias_to─┐ ┌─name─────────────────────┬─is_aggregate─┬─case_insensitive─┬─alias_to─┐
│ sumburConsistentHash │ 0 │ 0 │ │ │ sumburConsistentHash │ 0 │ 0 │ │
yandexConsistentHash │ 0 │ 0 │ │ kostikConsistentHash │ 0 │ 0 │ │
│ demangle │ 0 │ 0 │ │ │ demangle │ 0 │ 0 │ │
│ addressToLine │ 0 │ 0 │ │ │ addressToLine │ 0 │ 0 │ │
│ JSONExtractRaw │ 0 │ 0 │ │ │ JSONExtractRaw │ 0 │ 0 │ │

View File

@ -21,8 +21,12 @@ description: |
This package contains the debugging symbols for clickhouse-common. This package contains the debugging symbols for clickhouse-common.
contents: contents:
- src: root/usr/lib/debug - src: root/usr/lib/debug/usr/bin/clickhouse.debug
dst: /usr/lib/debug dst: /usr/lib/debug/usr/bin/clickhouse.debug
- src: root/usr/lib/debug/usr/bin/clickhouse-odbc-bridge.debug
dst: /usr/lib/debug/usr/bin/clickhouse-odbc-bridge.debug
- src: root/usr/lib/debug/usr/bin/clickhouse-library-bridge.debug
dst: /usr/lib/debug/usr/bin/clickhouse-library-bridge.debug
# docs # docs
- src: ../AUTHORS - src: ../AUTHORS
dst: /usr/share/doc/clickhouse-common-static-dbg/AUTHORS dst: /usr/share/doc/clickhouse-common-static-dbg/AUTHORS

View File

@ -473,18 +473,11 @@ else ()
if (INSTALL_STRIPPED_BINARIES) if (INSTALL_STRIPPED_BINARIES)
clickhouse_strip_binary(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT} BINARY_PATH clickhouse) clickhouse_strip_binary(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT} BINARY_PATH clickhouse)
else() else()
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT})
install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif() endif()
endif() endif()
if (NOT INSTALL_STRIPPED_BINARIES)
# Install dunny debug directory
# TODO: move logic to every place where clickhouse_strip_binary is used
add_custom_command(TARGET clickhouse POST_BUILD COMMAND echo > .empty )
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/.empty" DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/.empty)
endif()
if (ENABLE_TESTS) if (ENABLE_TESTS)
set (CLICKHOUSE_UNIT_TESTS_TARGETS unit_tests_dbms) set (CLICKHOUSE_UNIT_TESTS_TARGETS unit_tests_dbms)
add_custom_target (clickhouse-tests ALL DEPENDS ${CLICKHOUSE_UNIT_TESTS_TARGETS}) add_custom_target (clickhouse-tests ALL DEPENDS ${CLICKHOUSE_UNIT_TESTS_TARGETS})

View File

@ -131,5 +131,10 @@ if (BUILD_STANDALONE_KEEPER)
add_dependencies(clickhouse-keeper clickhouse_keeper_configs) add_dependencies(clickhouse-keeper clickhouse_keeper_configs)
set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../) set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../)
install(TARGETS clickhouse-keeper RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) if (INSTALL_STRIPPED_BINARIES)
clickhouse_strip_binary(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-keeper)
else()
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT})
install(TARGETS clickhouse-keeper RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif()
endif() endif()

View File

@ -27,5 +27,6 @@ set_target_properties(clickhouse-library-bridge PROPERTIES RUNTIME_OUTPUT_DIRECT
if (INSTALL_STRIPPED_BINARIES) if (INSTALL_STRIPPED_BINARIES)
clickhouse_strip_binary(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-library-bridge) clickhouse_strip_binary(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-library-bridge)
else() else()
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT})
install(TARGETS clickhouse-library-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) install(TARGETS clickhouse-library-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif() endif()

View File

@ -42,6 +42,7 @@ endif()
if (INSTALL_STRIPPED_BINARIES) if (INSTALL_STRIPPED_BINARIES)
clickhouse_strip_binary(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-odbc-bridge) clickhouse_strip_binary(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-odbc-bridge)
else() else()
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT})
install(TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) install(TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif() endif()

View File

@ -45,6 +45,7 @@
#include <Core/ServerUUID.h> #include <Core/ServerUUID.h>
#include <IO/HTTPCommon.h> #include <IO/HTTPCommon.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <IO/IOThreadPool.h>
#include <IO/UseSSL.h> #include <IO/UseSSL.h>
#include <Interpreters/AsynchronousMetrics.h> #include <Interpreters/AsynchronousMetrics.h>
#include <Interpreters/DDLWorker.h> #include <Interpreters/DDLWorker.h>
@ -554,6 +555,10 @@ if (ThreadFuzzer::instance().isEffective())
config().getUInt("thread_pool_queue_size", 10000) config().getUInt("thread_pool_queue_size", 10000)
); );
IOThreadPool::initialize(
config().getUInt("max_io_thread_pool_size", 100),
config().getUInt("max_io_thread_pool_free_size", 0),
config().getUInt("io_thread_pool_queue_size", 10000));
/// Initialize global local cache for remote filesystem. /// Initialize global local cache for remote filesystem.
if (config().has("local_cache_for_remote_fs")) if (config().has("local_cache_for_remote_fs"))
@ -1022,8 +1027,8 @@ if (ThreadFuzzer::instance().isEffective())
std::make_unique<TCPServer>( std::make_unique<TCPServer>(
new KeeperTCPHandlerFactory( new KeeperTCPHandlerFactory(
config_getter, global_context->getKeeperDispatcher(), config_getter, global_context->getKeeperDispatcher(),
global_context->getSettingsRef().receive_timeout, global_context->getSettingsRef().receive_timeout.totalSeconds(),
global_context->getSettingsRef().send_timeout, global_context->getSettingsRef().send_timeout.totalSeconds(),
false), server_pool, socket)); false), server_pool, socket));
}); });
@ -1045,8 +1050,8 @@ if (ThreadFuzzer::instance().isEffective())
std::make_unique<TCPServer>( std::make_unique<TCPServer>(
new KeeperTCPHandlerFactory( new KeeperTCPHandlerFactory(
config_getter, global_context->getKeeperDispatcher(), config_getter, global_context->getKeeperDispatcher(),
global_context->getSettingsRef().receive_timeout, global_context->getSettingsRef().receive_timeout.totalSeconds(),
global_context->getSettingsRef().send_timeout, true), server_pool, socket)); global_context->getSettingsRef().send_timeout.totalSeconds(), true), server_pool, socket));
#else #else
UNUSED(port); UNUSED(port);
throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.",

View File

@ -29,15 +29,15 @@ ConnectionPoolWithFailover::ConnectionPoolWithFailover(
time_t decrease_error_period_, time_t decrease_error_period_,
size_t max_error_cap_) size_t max_error_cap_)
: Base(std::move(nested_pools_), decrease_error_period_, max_error_cap_, &Poco::Logger::get("ConnectionPoolWithFailover")) : Base(std::move(nested_pools_), decrease_error_period_, max_error_cap_, &Poco::Logger::get("ConnectionPoolWithFailover"))
, default_load_balancing(load_balancing) , get_priority_load_balancing(load_balancing)
{ {
const std::string & local_hostname = getFQDNOrHostName(); const std::string & local_hostname = getFQDNOrHostName();
hostname_differences.resize(nested_pools.size()); get_priority_load_balancing.hostname_differences.resize(nested_pools.size());
for (size_t i = 0; i < nested_pools.size(); ++i) for (size_t i = 0; i < nested_pools.size(); ++i)
{ {
ConnectionPool & connection_pool = dynamic_cast<ConnectionPool &>(*nested_pools[i]); ConnectionPool & connection_pool = dynamic_cast<ConnectionPool &>(*nested_pools[i]);
hostname_differences[i] = getHostNameDifference(local_hostname, connection_pool.getHost()); get_priority_load_balancing.hostname_differences[i] = getHostNameDifference(local_hostname, connection_pool.getHost());
} }
} }
@ -51,36 +51,15 @@ IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts
}; };
size_t offset = 0; size_t offset = 0;
LoadBalancing load_balancing = get_priority_load_balancing.load_balancing;
if (settings) if (settings)
offset = settings->load_balancing_first_offset % nested_pools.size();
GetPriorityFunc get_priority;
switch (settings ? LoadBalancing(settings->load_balancing) : default_load_balancing)
{ {
case LoadBalancing::NEAREST_HOSTNAME: offset = settings->load_balancing_first_offset % nested_pools.size();
get_priority = [&](size_t i) { return hostname_differences[i]; }; load_balancing = LoadBalancing(settings->load_balancing);
break;
case LoadBalancing::IN_ORDER:
get_priority = [](size_t i) { return i; };
break;
case LoadBalancing::RANDOM:
break;
case LoadBalancing::FIRST_OR_RANDOM:
get_priority = [offset](size_t i) -> size_t { return i != offset; };
break;
case LoadBalancing::ROUND_ROBIN:
if (last_used >= nested_pools.size())
last_used = 0;
++last_used;
/* Consider nested_pools.size() equals to 5
* last_used = 1 -> get_priority: 0 1 2 3 4
* last_used = 2 -> get_priority: 4 0 1 2 3
* last_used = 3 -> get_priority: 4 3 0 1 2
* ...
* */
get_priority = [&](size_t i) { ++i; return i < last_used ? nested_pools.size() - i : i - last_used; };
break;
} }
GetPriorityFunc get_priority = get_priority_load_balancing.getPriorityFunc(load_balancing, offset, nested_pools.size());
UInt64 max_ignored_errors = settings ? settings->distributed_replica_max_ignored_errors.value : 0; UInt64 max_ignored_errors = settings ? settings->distributed_replica_max_ignored_errors.value : 0;
bool fallback_to_stale_replicas = settings ? settings->fallback_to_stale_replicas_for_distributed_queries.value : true; bool fallback_to_stale_replicas = settings ? settings->fallback_to_stale_replicas_for_distributed_queries.value : true;
@ -173,38 +152,14 @@ std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::g
ConnectionPoolWithFailover::Base::GetPriorityFunc ConnectionPoolWithFailover::makeGetPriorityFunc(const Settings * settings) ConnectionPoolWithFailover::Base::GetPriorityFunc ConnectionPoolWithFailover::makeGetPriorityFunc(const Settings * settings)
{ {
size_t offset = 0; size_t offset = 0;
LoadBalancing load_balancing = get_priority_load_balancing.load_balancing;
if (settings) if (settings)
offset = settings->load_balancing_first_offset % nested_pools.size();
GetPriorityFunc get_priority;
switch (settings ? LoadBalancing(settings->load_balancing) : default_load_balancing)
{ {
case LoadBalancing::NEAREST_HOSTNAME: offset = settings->load_balancing_first_offset % nested_pools.size();
get_priority = [&](size_t i) { return hostname_differences[i]; }; load_balancing = LoadBalancing(settings->load_balancing);
break;
case LoadBalancing::IN_ORDER:
get_priority = [](size_t i) { return i; };
break;
case LoadBalancing::RANDOM:
break;
case LoadBalancing::FIRST_OR_RANDOM:
get_priority = [offset](size_t i) -> size_t { return i != offset; };
break;
case LoadBalancing::ROUND_ROBIN:
if (last_used >= nested_pools.size())
last_used = 0;
++last_used;
/* Consider nested_pools.size() equals to 5
* last_used = 1 -> get_priority: 0 1 2 3 4
* last_used = 2 -> get_priority: 5 0 1 2 3
* last_used = 3 -> get_priority: 5 4 0 1 2
* ...
* */
get_priority = [&](size_t i) { ++i; return i < last_used ? nested_pools.size() - i : i - last_used; };
break;
} }
return get_priority; return get_priority_load_balancing.getPriorityFunc(load_balancing, offset, nested_pools.size());
} }
std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::getManyImpl( std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::getManyImpl(

View File

@ -1,6 +1,7 @@
#pragma once #pragma once
#include <Common/PoolWithFailoverBase.h> #include <Common/PoolWithFailoverBase.h>
#include <Common/GetPriorityForLoadBalancing.h>
#include <Client/ConnectionPool.h> #include <Client/ConnectionPool.h>
#include <chrono> #include <chrono>
@ -109,9 +110,7 @@ private:
GetPriorityFunc makeGetPriorityFunc(const Settings * settings); GetPriorityFunc makeGetPriorityFunc(const Settings * settings);
std::vector<size_t> hostname_differences; /// Distances from name of this host to the names of hosts of pools. GetPriorityForLoadBalancing get_priority_load_balancing;
size_t last_used = 0; /// Last used for round_robin policy.
LoadBalancing default_load_balancing;
}; };
using ConnectionPoolWithFailoverPtr = std::shared_ptr<ConnectionPoolWithFailover>; using ConnectionPoolWithFailoverPtr = std::shared_ptr<ConnectionPoolWithFailover>;

View File

@ -83,9 +83,20 @@ size_t extractMaskNumericImpl(
const PaddedPODArray<UInt8> * null_bytemap, const PaddedPODArray<UInt8> * null_bytemap,
PaddedPODArray<UInt8> * nulls) PaddedPODArray<UInt8> * nulls)
{ {
if constexpr (!column_is_short)
{
if (data.size() != mask.size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a full data column is not equal to the size of a mask");
}
size_t ones_count = 0; size_t ones_count = 0;
size_t data_index = 0; size_t data_index = 0;
for (size_t i = 0; i != mask.size(); ++i)
size_t mask_size = mask.size();
size_t data_size = data.size();
size_t i = 0;
for (; i != mask_size && data_index != data_size; ++i)
{ {
// Change mask only where value is 1. // Change mask only where value is 1.
if (!mask[i]) if (!mask[i])
@ -118,6 +129,13 @@ size_t extractMaskNumericImpl(
mask[i] = value; mask[i] = value;
} }
if constexpr (column_is_short)
{
if (data_index != data_size)
throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a short column is not equal to the number of ones in a mask");
}
return ones_count; return ones_count;
} }

View File

@ -113,5 +113,35 @@ public:
} }
}; };
class SynchronizedArenaWithFreeLists : private ArenaWithFreeLists
{
public:
explicit SynchronizedArenaWithFreeLists(
const size_t initial_size = 4096, const size_t growth_factor = 2,
const size_t linear_growth_threshold = 128 * 1024 * 1024)
: ArenaWithFreeLists{initial_size, growth_factor, linear_growth_threshold}
{}
char * alloc(const size_t size)
{
std::lock_guard lock{mutex};
return ArenaWithFreeLists::alloc(size);
}
void free(char * ptr, const size_t size)
{
std::lock_guard lock{mutex};
return ArenaWithFreeLists::free(ptr, size);
}
/// Size of the allocated pool in bytes
size_t size() const
{
std::lock_guard lock{mutex};
return ArenaWithFreeLists::size();
}
private:
mutable std::mutex mutex;
};
} }

View File

@ -31,8 +31,8 @@ public:
/// probably it worth to try to increase stack size for coroutines. /// probably it worth to try to increase stack size for coroutines.
/// ///
/// Current value is just enough for all tests in our CI. It's not selected in some special /// Current value is just enough for all tests in our CI. It's not selected in some special
/// way. We will have 40 pages with 4KB page size. /// way. We will have 80 pages with 4KB page size.
static constexpr size_t default_stack_size = 192 * 1024; /// 64KB was not enough for tests static constexpr size_t default_stack_size = 320 * 1024; /// 64KB was not enough for tests
explicit FiberStack(size_t stack_size_ = default_stack_size) : stack_size(stack_size_) explicit FiberStack(size_t stack_size_ = default_stack_size) : stack_size(stack_size_)
{ {

View File

@ -0,0 +1,49 @@
#include <Common/GetPriorityForLoadBalancing.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
std::function<size_t(size_t index)> GetPriorityForLoadBalancing::getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const
{
std::function<size_t(size_t index)> get_priority;
switch (load_balance)
{
case LoadBalancing::NEAREST_HOSTNAME:
if (hostname_differences.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "It's a bug: hostname_differences is not initialized");
get_priority = [&](size_t i) { return hostname_differences[i]; };
break;
case LoadBalancing::IN_ORDER:
get_priority = [](size_t i) { return i; };
break;
case LoadBalancing::RANDOM:
break;
case LoadBalancing::FIRST_OR_RANDOM:
get_priority = [offset](size_t i) -> size_t { return i != offset; };
break;
case LoadBalancing::ROUND_ROBIN:
if (last_used >= pool_size)
last_used = 0;
++last_used;
/* Consider pool_size equals to 5
* last_used = 1 -> get_priority: 0 1 2 3 4
* last_used = 2 -> get_priority: 4 0 1 2 3
* last_used = 3 -> get_priority: 4 3 0 1 2
* ...
* */
get_priority = [&](size_t i)
{
++i;
return i < last_used ? pool_size - i : i - last_used;
};
break;
}
return get_priority;
}
}

View File

@ -0,0 +1,34 @@
#pragma once
#include <Core/SettingsEnums.h>
namespace DB
{
class GetPriorityForLoadBalancing
{
public:
GetPriorityForLoadBalancing(LoadBalancing load_balancing_) : load_balancing(load_balancing_) {}
GetPriorityForLoadBalancing(){}
bool operator == (const GetPriorityForLoadBalancing & other) const
{
return load_balancing == other.load_balancing && hostname_differences == other.hostname_differences;
}
bool operator != (const GetPriorityForLoadBalancing & other) const
{
return !(*this == other);
}
std::function<size_t(size_t index)> getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const;
std::vector<size_t> hostname_differences; /// Distances from name of this host to the names of hosts of pools.
LoadBalancing load_balancing = LoadBalancing::RANDOM;
private:
mutable size_t last_used = 0; /// Last used for round_robin policy.
};
}

View File

@ -13,6 +13,9 @@ Int32 IntervalKind::toAvgSeconds() const
{ {
switch (kind) switch (kind)
{ {
case IntervalKind::Nanosecond: return 0; /// fractional parts of seconds have 0 seconds
case IntervalKind::Microsecond: return 0;
case IntervalKind::Millisecond: return 0;
case IntervalKind::Second: return 1; case IntervalKind::Second: return 1;
case IntervalKind::Minute: return 60; case IntervalKind::Minute: return 60;
case IntervalKind::Hour: return 3600; case IntervalKind::Hour: return 3600;
@ -52,6 +55,9 @@ const char * IntervalKind::toKeyword() const
{ {
switch (kind) switch (kind)
{ {
case IntervalKind::Nanosecond: return "NANOSECOND";
case IntervalKind::Microsecond: return "MICROSECOND";
case IntervalKind::Millisecond: return "MILLISECOND";
case IntervalKind::Second: return "SECOND"; case IntervalKind::Second: return "SECOND";
case IntervalKind::Minute: return "MINUTE"; case IntervalKind::Minute: return "MINUTE";
case IntervalKind::Hour: return "HOUR"; case IntervalKind::Hour: return "HOUR";
@ -69,6 +75,9 @@ const char * IntervalKind::toLowercasedKeyword() const
{ {
switch (kind) switch (kind)
{ {
case IntervalKind::Nanosecond: return "nanosecond";
case IntervalKind::Microsecond: return "microsecond";
case IntervalKind::Millisecond: return "millisecond";
case IntervalKind::Second: return "second"; case IntervalKind::Second: return "second";
case IntervalKind::Minute: return "minute"; case IntervalKind::Minute: return "minute";
case IntervalKind::Hour: return "hour"; case IntervalKind::Hour: return "hour";
@ -86,6 +95,12 @@ const char * IntervalKind::toDateDiffUnit() const
{ {
switch (kind) switch (kind)
{ {
case IntervalKind::Nanosecond:
return "nanosecond";
case IntervalKind::Microsecond:
return "microsecond";
case IntervalKind::Millisecond:
return "millisecond";
case IntervalKind::Second: case IntervalKind::Second:
return "second"; return "second";
case IntervalKind::Minute: case IntervalKind::Minute:
@ -111,6 +126,12 @@ const char * IntervalKind::toNameOfFunctionToIntervalDataType() const
{ {
switch (kind) switch (kind)
{ {
case IntervalKind::Nanosecond:
return "toIntervalNanosecond";
case IntervalKind::Microsecond:
return "toIntervalMicrosecond";
case IntervalKind::Millisecond:
return "toIntervalMillisecond";
case IntervalKind::Second: case IntervalKind::Second:
return "toIntervalSecond"; return "toIntervalSecond";
case IntervalKind::Minute: case IntervalKind::Minute:
@ -136,6 +157,12 @@ const char * IntervalKind::toNameOfFunctionExtractTimePart() const
{ {
switch (kind) switch (kind)
{ {
case IntervalKind::Nanosecond:
return "toNanosecond";
case IntervalKind::Microsecond:
return "toMicrosecond";
case IntervalKind::Millisecond:
return "toMillisecond";
case IntervalKind::Second: case IntervalKind::Second:
return "toSecond"; return "toSecond";
case IntervalKind::Minute: case IntervalKind::Minute:
@ -162,6 +189,21 @@ const char * IntervalKind::toNameOfFunctionExtractTimePart() const
bool IntervalKind::tryParseString(const std::string & kind, IntervalKind::Kind & result) bool IntervalKind::tryParseString(const std::string & kind, IntervalKind::Kind & result)
{ {
if ("nanosecond" == kind)
{
result = IntervalKind::Nanosecond;
return true;
}
if ("microsecond" == kind)
{
result = IntervalKind::Microsecond;
return true;
}
if ("millisecond" == kind)
{
result = IntervalKind::Millisecond;
return true;
}
if ("second" == kind) if ("second" == kind)
{ {
result = IntervalKind::Second; result = IntervalKind::Second;

View File

@ -10,6 +10,9 @@ struct IntervalKind
{ {
enum Kind enum Kind
{ {
Nanosecond,
Microsecond,
Millisecond,
Second, Second,
Minute, Minute,
Hour, Hour,
@ -61,6 +64,9 @@ struct IntervalKind
/// NOLINTNEXTLINE /// NOLINTNEXTLINE
#define FOR_EACH_INTERVAL_KIND(M) \ #define FOR_EACH_INTERVAL_KIND(M) \
M(Nanosecond) \
M(Microsecond) \
M(Millisecond) \
M(Second) \ M(Second) \
M(Minute) \ M(Minute) \
M(Hour) \ M(Hour) \

View File

@ -515,6 +515,11 @@ public:
radixSortLSDInternal<false>(arr, size, false, nullptr); radixSortLSDInternal<false>(arr, size, false, nullptr);
} }
static void executeLSD(Element * arr, size_t size, bool reverse)
{
radixSortLSDInternal<false>(arr, size, reverse, nullptr);
}
/** This function will start to sort inplace (modify 'arr') /** This function will start to sort inplace (modify 'arr')
* but on the last step it will write result directly to the destination * but on the last step it will write result directly to the destination
* instead of finishing sorting 'arr'. * instead of finishing sorting 'arr'.

View File

@ -22,7 +22,6 @@ target_link_libraries (clickhouse_common_zookeeper_no_log
PRIVATE PRIVATE
string_utils string_utils
) )
if (ENABLE_EXAMPLES) if (ENABLE_EXAMPLES)
add_subdirectory(examples) add_subdirectory(examples)
endif() endif()

View File

@ -5,15 +5,15 @@
#include <functional> #include <functional>
#include <filesystem> #include <filesystem>
#include <pcg-random/pcg_random.hpp>
#include <base/logger_useful.h>
#include <base/find_symbols.h> #include <base/find_symbols.h>
#include <Common/randomSeed.h> #include <base/getFQDNOrHostName.h>
#include <Common/StringUtils/StringUtils.h> #include <Common/StringUtils/StringUtils.h>
#include <Common/Exception.h> #include <Common/Exception.h>
#include <Common/isLocalAddress.h>
#include <Poco/Net/NetException.h> #include <Poco/Net/NetException.h>
#include <Poco/Net/DNS.h>
#define ZOOKEEPER_CONNECTION_TIMEOUT_MS 1000 #define ZOOKEEPER_CONNECTION_TIMEOUT_MS 1000
@ -48,7 +48,7 @@ static void check(Coordination::Error code, const std::string & path)
void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_, void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_,
int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_) int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, const GetPriorityForLoadBalancing & get_priority_load_balancing_)
{ {
log = &Poco::Logger::get("ZooKeeper"); log = &Poco::Logger::get("ZooKeeper");
hosts = hosts_; hosts = hosts_;
@ -57,6 +57,7 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_
operation_timeout_ms = operation_timeout_ms_; operation_timeout_ms = operation_timeout_ms_;
chroot = chroot_; chroot = chroot_;
implementation = implementation_; implementation = implementation_;
get_priority_load_balancing = get_priority_load_balancing_;
if (implementation == "zookeeper") if (implementation == "zookeeper")
{ {
@ -66,14 +67,13 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_
Coordination::ZooKeeper::Nodes nodes; Coordination::ZooKeeper::Nodes nodes;
nodes.reserve(hosts.size()); nodes.reserve(hosts.size());
Strings shuffled_hosts = hosts;
/// Shuffle the hosts to distribute the load among ZooKeeper nodes. /// Shuffle the hosts to distribute the load among ZooKeeper nodes.
pcg64 generator(randomSeed()); std::vector<ShuffleHost> shuffled_hosts = shuffleHosts();
std::shuffle(shuffled_hosts.begin(), shuffled_hosts.end(), generator);
bool dns_error = false; bool dns_error = false;
for (auto & host_string : shuffled_hosts) for (auto & host : shuffled_hosts)
{ {
auto & host_string = host.host;
try try
{ {
bool secure = bool(startsWith(host_string, "secure://")); bool secure = bool(startsWith(host_string, "secure://"));
@ -81,6 +81,7 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_
if (secure) if (secure)
host_string.erase(0, strlen("secure://")); host_string.erase(0, strlen("secure://"));
LOG_TEST(log, "Adding ZooKeeper host {} ({})", host_string, Poco::Net::SocketAddress{host_string}.toString());
nodes.emplace_back(Coordination::ZooKeeper::Node{Poco::Net::SocketAddress{host_string}, secure}); nodes.emplace_back(Coordination::ZooKeeper::Node{Poco::Net::SocketAddress{host_string}, secure});
} }
catch (const Poco::Net::HostNotFoundException & e) catch (const Poco::Net::HostNotFoundException & e)
@ -154,23 +155,47 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_
} }
} }
std::vector<ShuffleHost> ZooKeeper::shuffleHosts() const
{
std::function<size_t(size_t index)> get_priority = get_priority_load_balancing.getPriorityFunc(get_priority_load_balancing.load_balancing, 0, hosts.size());
std::vector<ShuffleHost> shuffle_hosts;
for (size_t i = 0; i < hosts.size(); ++i)
{
ShuffleHost shuffle_host;
shuffle_host.host = hosts[i];
if (get_priority)
shuffle_host.priority = get_priority(i);
shuffle_host.randomize();
shuffle_hosts.emplace_back(shuffle_host);
}
std::sort(
shuffle_hosts.begin(), shuffle_hosts.end(),
[](const ShuffleHost & lhs, const ShuffleHost & rhs)
{
return ShuffleHost::compare(lhs, rhs);
});
return shuffle_hosts;
}
ZooKeeper::ZooKeeper(const std::string & hosts_string, const std::string & identity_, int32_t session_timeout_ms_, ZooKeeper::ZooKeeper(const std::string & hosts_string, const std::string & identity_, int32_t session_timeout_ms_,
int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_, int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_,
std::shared_ptr<DB::ZooKeeperLog> zk_log_) std::shared_ptr<DB::ZooKeeperLog> zk_log_, const GetPriorityForLoadBalancing & get_priority_load_balancing_)
{ {
zk_log = std::move(zk_log_); zk_log = std::move(zk_log_);
Strings hosts_strings; Strings hosts_strings;
splitInto<','>(hosts_strings, hosts_string); splitInto<','>(hosts_strings, hosts_string);
init(implementation_, hosts_strings, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_); init(implementation_, hosts_strings, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_, get_priority_load_balancing_);
} }
ZooKeeper::ZooKeeper(const Strings & hosts_, const std::string & identity_, int32_t session_timeout_ms_, ZooKeeper::ZooKeeper(const Strings & hosts_, const std::string & identity_, int32_t session_timeout_ms_,
int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_, int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_,
std::shared_ptr<DB::ZooKeeperLog> zk_log_) std::shared_ptr<DB::ZooKeeperLog> zk_log_, const GetPriorityForLoadBalancing & get_priority_load_balancing_)
{ {
zk_log = std::move(zk_log_); zk_log = std::move(zk_log_);
init(implementation_, hosts_, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_); init(implementation_, hosts_, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_, get_priority_load_balancing_);
} }
struct ZooKeeperArgs struct ZooKeeperArgs
@ -213,6 +238,15 @@ struct ZooKeeperArgs
{ {
implementation = config.getString(config_name + "." + key); implementation = config.getString(config_name + "." + key);
} }
else if (key == "zookeeper_load_balancing")
{
String load_balancing_str = config.getString(config_name + "." + key);
/// Use magic_enum to avoid dependency from dbms (`SettingFieldLoadBalancingTraits::fromString(...)`)
auto load_balancing = magic_enum::enum_cast<DB::LoadBalancing>(Poco::toUpper(load_balancing_str));
if (!load_balancing)
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Unknown load balancing: {}", load_balancing_str);
get_priority_load_balancing.load_balancing = *load_balancing;
}
else else
throw KeeperException(std::string("Unknown key ") + key + " in config file", Coordination::Error::ZBADARGUMENTS); throw KeeperException(std::string("Unknown key ") + key + " in config file", Coordination::Error::ZBADARGUMENTS);
} }
@ -224,6 +258,15 @@ struct ZooKeeperArgs
if (chroot.back() == '/') if (chroot.back() == '/')
chroot.pop_back(); chroot.pop_back();
} }
/// init get_priority_load_balancing
get_priority_load_balancing.hostname_differences.resize(hosts.size());
const String & local_hostname = getFQDNOrHostName();
for (size_t i = 0; i < hosts.size(); ++i)
{
const String & node_host = hosts[i].substr(0, hosts[i].find_last_of(':'));
get_priority_load_balancing.hostname_differences[i] = DB::getHostNameDifference(local_hostname, node_host);
}
} }
Strings hosts; Strings hosts;
@ -232,13 +275,14 @@ struct ZooKeeperArgs
int operation_timeout_ms; int operation_timeout_ms;
std::string chroot; std::string chroot;
std::string implementation; std::string implementation;
GetPriorityForLoadBalancing get_priority_load_balancing;
}; };
ZooKeeper::ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name, std::shared_ptr<DB::ZooKeeperLog> zk_log_) ZooKeeper::ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name, std::shared_ptr<DB::ZooKeeperLog> zk_log_)
: zk_log(std::move(zk_log_)) : zk_log(std::move(zk_log_))
{ {
ZooKeeperArgs args(config, config_name); ZooKeeperArgs args(config, config_name);
init(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot); init(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot, args.get_priority_load_balancing);
} }
bool ZooKeeper::configChanged(const Poco::Util::AbstractConfiguration & config, const std::string & config_name) const bool ZooKeeper::configChanged(const Poco::Util::AbstractConfiguration & config, const std::string & config_name) const
@ -249,8 +293,11 @@ bool ZooKeeper::configChanged(const Poco::Util::AbstractConfiguration & config,
if (args.implementation == implementation && implementation == "testkeeper") if (args.implementation == implementation && implementation == "testkeeper")
return false; return false;
return std::tie(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot) if (args.get_priority_load_balancing != get_priority_load_balancing)
!= std::tie(implementation, hosts, identity, session_timeout_ms, operation_timeout_ms, chroot); return true;
return std::tie(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot, args.get_priority_load_balancing)
!= std::tie(implementation, hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, args.get_priority_load_balancing);
} }
@ -757,7 +804,7 @@ bool ZooKeeper::waitForDisappear(const std::string & path, const WaitCondition &
ZooKeeperPtr ZooKeeper::startNewSession() const ZooKeeperPtr ZooKeeper::startNewSession() const
{ {
return std::make_shared<ZooKeeper>(hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, implementation, zk_log); return std::make_shared<ZooKeeper>(hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, implementation, zk_log, get_priority_load_balancing);
} }

View File

@ -13,7 +13,10 @@
#include <Common/Stopwatch.h> #include <Common/Stopwatch.h>
#include <Common/ZooKeeper/IKeeper.h> #include <Common/ZooKeeper/IKeeper.h>
#include <Common/ZooKeeper/ZooKeeperConstants.h> #include <Common/ZooKeeper/ZooKeeperConstants.h>
#include <Common/GetPriorityForLoadBalancing.h>
#include <Common/thread_local_rng.h>
#include <unistd.h> #include <unistd.h>
#include <random>
namespace ProfileEvents namespace ProfileEvents
@ -37,6 +40,25 @@ namespace zkutil
/// Preferred size of multi() command (in number of ops) /// Preferred size of multi() command (in number of ops)
constexpr size_t MULTI_BATCH_SIZE = 100; constexpr size_t MULTI_BATCH_SIZE = 100;
struct ShuffleHost
{
String host;
Int64 priority = 0;
UInt32 random = 0;
void randomize()
{
random = thread_local_rng();
}
static bool compare(const ShuffleHost & lhs, const ShuffleHost & rhs)
{
return std::forward_as_tuple(lhs.priority, lhs.random)
< std::forward_as_tuple(rhs.priority, rhs.random);
}
};
using GetPriorityForLoadBalancing = DB::GetPriorityForLoadBalancing;
/// ZooKeeper session. The interface is substantially different from the usual libzookeeper API. /// ZooKeeper session. The interface is substantially different from the usual libzookeeper API.
/// ///
@ -58,14 +80,16 @@ public:
int32_t operation_timeout_ms_ = Coordination::DEFAULT_OPERATION_TIMEOUT_MS, int32_t operation_timeout_ms_ = Coordination::DEFAULT_OPERATION_TIMEOUT_MS,
const std::string & chroot_ = "", const std::string & chroot_ = "",
const std::string & implementation_ = "zookeeper", const std::string & implementation_ = "zookeeper",
std::shared_ptr<DB::ZooKeeperLog> zk_log_ = nullptr); std::shared_ptr<DB::ZooKeeperLog> zk_log_ = nullptr,
const GetPriorityForLoadBalancing & get_priority_load_balancing_ = {});
explicit ZooKeeper(const Strings & hosts_, const std::string & identity_ = "", explicit ZooKeeper(const Strings & hosts_, const std::string & identity_ = "",
int32_t session_timeout_ms_ = Coordination::DEFAULT_SESSION_TIMEOUT_MS, int32_t session_timeout_ms_ = Coordination::DEFAULT_SESSION_TIMEOUT_MS,
int32_t operation_timeout_ms_ = Coordination::DEFAULT_OPERATION_TIMEOUT_MS, int32_t operation_timeout_ms_ = Coordination::DEFAULT_OPERATION_TIMEOUT_MS,
const std::string & chroot_ = "", const std::string & chroot_ = "",
const std::string & implementation_ = "zookeeper", const std::string & implementation_ = "zookeeper",
std::shared_ptr<DB::ZooKeeperLog> zk_log_ = nullptr); std::shared_ptr<DB::ZooKeeperLog> zk_log_ = nullptr,
const GetPriorityForLoadBalancing & get_priority_load_balancing_ = {});
/** Config of the form: /** Config of the form:
<zookeeper> <zookeeper>
@ -91,6 +115,8 @@ public:
*/ */
ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name, std::shared_ptr<DB::ZooKeeperLog> zk_log_); ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name, std::shared_ptr<DB::ZooKeeperLog> zk_log_);
std::vector<ShuffleHost> shuffleHosts() const;
/// Creates a new session with the same parameters. This method can be used for reconnecting /// Creates a new session with the same parameters. This method can be used for reconnecting
/// after the session has expired. /// after the session has expired.
/// This object remains unchanged, and the new session is returned. /// This object remains unchanged, and the new session is returned.
@ -284,7 +310,7 @@ private:
friend class EphemeralNodeHolder; friend class EphemeralNodeHolder;
void init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_, void init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_,
int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_); int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, const GetPriorityForLoadBalancing & get_priority_load_balancing_);
/// The following methods don't any throw exceptions but return error codes. /// The following methods don't any throw exceptions but return error codes.
Coordination::Error createImpl(const std::string & path, const std::string & data, int32_t mode, std::string & path_created); Coordination::Error createImpl(const std::string & path, const std::string & data, int32_t mode, std::string & path_created);
@ -311,6 +337,8 @@ private:
Poco::Logger * log = nullptr; Poco::Logger * log = nullptr;
std::shared_ptr<DB::ZooKeeperLog> zk_log; std::shared_ptr<DB::ZooKeeperLog> zk_log;
GetPriorityForLoadBalancing get_priority_load_balancing;
AtomicStopwatch session_uptime; AtomicStopwatch session_uptime;
}; };

View File

@ -451,7 +451,7 @@ void ZooKeeper::connect(
} }
else else
{ {
LOG_TEST(log, "Connected to ZooKeeper at {} with session_id {}", socket.peerAddress().toString(), session_id); LOG_TEST(log, "Connected to ZooKeeper at {} with session_id {}{}", socket.peerAddress().toString(), session_id, fail_reasons.str());
} }
} }

View File

@ -11,7 +11,7 @@
constexpr size_t IPV4_BINARY_LENGTH = 4; constexpr size_t IPV4_BINARY_LENGTH = 4;
constexpr size_t IPV6_BINARY_LENGTH = 16; constexpr size_t IPV6_BINARY_LENGTH = 16;
constexpr size_t IPV4_MAX_TEXT_LENGTH = 15; /// Does not count tail zero byte. constexpr size_t IPV4_MAX_TEXT_LENGTH = 15; /// Does not count tail zero byte.
constexpr size_t IPV6_MAX_TEXT_LENGTH = 39; constexpr size_t IPV6_MAX_TEXT_LENGTH = 45; /// Does not count tail zero byte.
namespace DB namespace DB
{ {

View File

@ -124,6 +124,7 @@ bool isLocalAddress(const Poco::Net::SocketAddress & address, UInt16 clickhouse_
size_t getHostNameDifference(const std::string & local_hostname, const std::string & host) size_t getHostNameDifference(const std::string & local_hostname, const std::string & host)
{ {
/// FIXME should we replace it with Levenstein distance? (we already have it in NamePrompter)
size_t hostname_difference = 0; size_t hostname_difference = 0;
for (size_t i = 0; i < std::min(local_hostname.length(), host.length()); ++i) for (size_t i = 0; i < std::min(local_hostname.length(), host.length()); ++i)
if (local_hostname[i] != host[i]) if (local_hostname[i] != host[i])

View File

@ -13,6 +13,7 @@
#include <iterator> #include <iterator>
#include <base/sort.h> #include <base/sort.h>
#include <boost/algorithm/string.hpp>
namespace DB namespace DB
@ -269,8 +270,18 @@ const ColumnWithTypeAndName & Block::safeGetByPosition(size_t position) const
} }
const ColumnWithTypeAndName * Block::findByName(const std::string & name) const const ColumnWithTypeAndName * Block::findByName(const std::string & name, bool case_insensitive) const
{ {
if (case_insensitive)
{
auto found = std::find_if(data.begin(), data.end(), [&](const auto & column) { return boost::iequals(column.name, name); });
if (found == data.end())
{
return nullptr;
}
return &*found;
}
auto it = index_by_name.find(name); auto it = index_by_name.find(name);
if (index_by_name.end() == it) if (index_by_name.end() == it)
{ {
@ -280,19 +291,23 @@ const ColumnWithTypeAndName * Block::findByName(const std::string & name) const
} }
const ColumnWithTypeAndName & Block::getByName(const std::string & name) const const ColumnWithTypeAndName & Block::getByName(const std::string & name, bool case_insensitive) const
{ {
const auto * result = findByName(name); const auto * result = findByName(name, case_insensitive);
if (!result) if (!result)
throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames() throw Exception(
, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); "Not found column " + name + " in block. There are only columns: " + dumpNames(), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
return *result; return *result;
} }
bool Block::has(const std::string & name) const bool Block::has(const std::string & name, bool case_insensitive) const
{ {
if (case_insensitive)
return std::find_if(data.begin(), data.end(), [&](const auto & column) { return boost::iequals(column.name, name); })
!= data.end();
return index_by_name.end() != index_by_name.find(name); return index_by_name.end() != index_by_name.find(name);
} }
@ -301,8 +316,8 @@ size_t Block::getPositionByName(const std::string & name) const
{ {
auto it = index_by_name.find(name); auto it = index_by_name.find(name);
if (index_by_name.end() == it) if (index_by_name.end() == it)
throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames() throw Exception(
, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); "Not found column " + name + " in block. There are only columns: " + dumpNames(), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
return it->second; return it->second;
} }

View File

@ -60,21 +60,21 @@ public:
ColumnWithTypeAndName & safeGetByPosition(size_t position); ColumnWithTypeAndName & safeGetByPosition(size_t position);
const ColumnWithTypeAndName & safeGetByPosition(size_t position) const; const ColumnWithTypeAndName & safeGetByPosition(size_t position) const;
ColumnWithTypeAndName* findByName(const std::string & name) ColumnWithTypeAndName* findByName(const std::string & name, bool case_insensitive = false)
{ {
return const_cast<ColumnWithTypeAndName *>( return const_cast<ColumnWithTypeAndName *>(
const_cast<const Block *>(this)->findByName(name)); const_cast<const Block *>(this)->findByName(name, case_insensitive));
} }
const ColumnWithTypeAndName * findByName(const std::string & name) const; const ColumnWithTypeAndName * findByName(const std::string & name, bool case_insensitive = false) const;
ColumnWithTypeAndName & getByName(const std::string & name) ColumnWithTypeAndName & getByName(const std::string & name, bool case_insensitive = false)
{ {
return const_cast<ColumnWithTypeAndName &>( return const_cast<ColumnWithTypeAndName &>(
const_cast<const Block *>(this)->getByName(name)); const_cast<const Block *>(this)->getByName(name, case_insensitive));
} }
const ColumnWithTypeAndName & getByName(const std::string & name) const; const ColumnWithTypeAndName & getByName(const std::string & name, bool case_insensitive = false) const;
Container::iterator begin() { return data.begin(); } Container::iterator begin() { return data.begin(); }
Container::iterator end() { return data.end(); } Container::iterator end() { return data.end(); }
@ -83,7 +83,7 @@ public:
Container::const_iterator cbegin() const { return data.cbegin(); } Container::const_iterator cbegin() const { return data.cbegin(); }
Container::const_iterator cend() const { return data.cend(); } Container::const_iterator cend() const { return data.cend(); }
bool has(const std::string & name) const; bool has(const std::string & name, bool case_insensitive = false) const;
size_t getPositionByName(const std::string & name) const; size_t getPositionByName(const std::string & name) const;

View File

@ -47,6 +47,8 @@ class IColumn;
M(UInt64, max_insert_delayed_streams_for_parallel_write, 0, "The maximum number of streams (columns) to delay final part flush. Default - auto (1000 in case of underlying storage supports parallel write, for example S3 and disabled otherwise)", 0) \ M(UInt64, max_insert_delayed_streams_for_parallel_write, 0, "The maximum number of streams (columns) to delay final part flush. Default - auto (1000 in case of underlying storage supports parallel write, for example S3 and disabled otherwise)", 0) \
M(UInt64, max_final_threads, 16, "The maximum number of threads to read from table with FINAL.", 0) \ M(UInt64, max_final_threads, 16, "The maximum number of threads to read from table with FINAL.", 0) \
M(MaxThreads, max_threads, 0, "The maximum number of threads to execute the request. By default, it is determined automatically.", 0) \ M(MaxThreads, max_threads, 0, "The maximum number of threads to execute the request. By default, it is determined automatically.", 0) \
M(MaxThreads, max_download_threads, 4, "The maximum number of threads to download data (e.g. for URL engine).", 0) \
M(UInt64, max_download_buffer_size, 10*1024*1024, "The maximal size of buffer for parallel downloading (e.g. for URL engine) per each thread.", 0) \
M(UInt64, max_read_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the buffer to read from the filesystem.", 0) \ M(UInt64, max_read_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the buffer to read from the filesystem.", 0) \
M(UInt64, max_distributed_connections, 1024, "The maximum number of connections for distributed processing of one query (should be greater than max_threads).", 0) \ M(UInt64, max_distributed_connections, 1024, "The maximum number of connections for distributed processing of one query (should be greater than max_threads).", 0) \
M(UInt64, max_query_size, DBMS_DEFAULT_MAX_QUERY_SIZE, "Which part of the query can be read into RAM for parsing (the remaining data for INSERT, if any, is read later)", 0) \ M(UInt64, max_query_size, DBMS_DEFAULT_MAX_QUERY_SIZE, "Which part of the query can be read into RAM for parsing (the remaining data for INSERT, if any, is read later)", 0) \
@ -614,11 +616,13 @@ class IColumn;
M(Bool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \ M(Bool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \
M(Bool, input_format_tsv_enum_as_number, false, "Treat inserted enum values in TSV formats as enum indices \\N", 0) \ M(Bool, input_format_tsv_enum_as_number, false, "Treat inserted enum values in TSV formats as enum indices \\N", 0) \
M(Bool, input_format_null_as_default, true, "For text input formats initialize null fields with default values if data type of this field is not nullable", 0) \ M(Bool, input_format_null_as_default, true, "For text input formats initialize null fields with default values if data type of this field is not nullable", 0) \
M(Bool, input_format_use_lowercase_column_name, false, "Use lowercase column name while reading input formats", 0) \
M(Bool, input_format_arrow_import_nested, false, "Allow to insert array of structs into Nested table in Arrow input format.", 0) \ M(Bool, input_format_arrow_import_nested, false, "Allow to insert array of structs into Nested table in Arrow input format.", 0) \
M(Bool, input_format_arrow_case_insensitive_column_matching, false, "Ignore case when matching Arrow columns with CH columns.", 0) \
M(Bool, input_format_orc_import_nested, false, "Allow to insert array of structs into Nested table in ORC input format.", 0) \ M(Bool, input_format_orc_import_nested, false, "Allow to insert array of structs into Nested table in ORC input format.", 0) \
M(Int64, input_format_orc_row_batch_size, 100'000, "Batch size when reading ORC stripes.", 0) \ M(Int64, input_format_orc_row_batch_size, 100'000, "Batch size when reading ORC stripes.", 0) \
M(Bool, input_format_orc_case_insensitive_column_matching, false, "Ignore case when matching ORC columns with CH columns.", 0) \
M(Bool, input_format_parquet_import_nested, false, "Allow to insert array of structs into Nested table in Parquet input format.", 0) \ M(Bool, input_format_parquet_import_nested, false, "Allow to insert array of structs into Nested table in Parquet input format.", 0) \
M(Bool, input_format_parquet_case_insensitive_column_matching, false, "Ignore case when matching Parquet columns with CH columns.", 0) \
M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \ M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \
M(Bool, input_format_orc_allow_missing_columns, false, "Allow missing columns while reading ORC input formats", 0) \ M(Bool, input_format_orc_allow_missing_columns, false, "Allow missing columns while reading ORC input formats", 0) \
M(Bool, input_format_parquet_allow_missing_columns, false, "Allow missing columns while reading Parquet input formats", 0) \ M(Bool, input_format_parquet_allow_missing_columns, false, "Allow missing columns while reading Parquet input formats", 0) \

View File

@ -149,4 +149,5 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS,
{"str", FormatSettings::MsgPackUUIDRepresentation::STR}, {"str", FormatSettings::MsgPackUUIDRepresentation::STR},
{"ext", FormatSettings::MsgPackUUIDRepresentation::EXT}}) {"ext", FormatSettings::MsgPackUUIDRepresentation::EXT}})
} }

View File

@ -13,6 +13,9 @@ bool DataTypeInterval::equals(const IDataType & rhs) const
void registerDataTypeInterval(DataTypeFactory & factory) void registerDataTypeInterval(DataTypeFactory & factory)
{ {
factory.registerSimpleDataType("IntervalNanosecond", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Nanosecond)); });
factory.registerSimpleDataType("IntervalMicrosecond", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Microsecond)); });
factory.registerSimpleDataType("IntervalMillisecond", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Millisecond)); });
factory.registerSimpleDataType("IntervalSecond", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Second)); }); factory.registerSimpleDataType("IntervalSecond", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Second)); });
factory.registerSimpleDataType("IntervalMinute", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Minute)); }); factory.registerSimpleDataType("IntervalMinute", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Minute)); });
factory.registerSimpleDataType("IntervalHour", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Hour)); }); factory.registerSimpleDataType("IntervalHour", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Hour)); });

View File

@ -15,6 +15,8 @@
#include <Parsers/IAST.h> #include <Parsers/IAST.h>
#include <boost/algorithm/string/case_conv.hpp>
namespace DB namespace DB
{ {
@ -227,14 +229,17 @@ void validateArraySizes(const Block & block)
} }
std::unordered_set<String> getAllTableNames(const Block & block) std::unordered_set<String> getAllTableNames(const Block & block, bool to_lower_case)
{ {
std::unordered_set<String> nested_table_names; std::unordered_set<String> nested_table_names;
for (auto & name : block.getNames()) for (const auto & name : block.getNames())
{ {
auto nested_table_name = Nested::extractTableName(name); auto nested_table_name = Nested::extractTableName(name);
if (to_lower_case)
boost::to_lower(nested_table_name);
if (!nested_table_name.empty()) if (!nested_table_name.empty())
nested_table_names.insert(nested_table_name); nested_table_names.insert(std::move(nested_table_name));
} }
return nested_table_names; return nested_table_names;
} }

View File

@ -32,7 +32,7 @@ namespace Nested
void validateArraySizes(const Block & block); void validateArraySizes(const Block & block);
/// Get all nested tables names from a block. /// Get all nested tables names from a block.
std::unordered_set<String> getAllTableNames(const Block & block); std::unordered_set<String> getAllTableNames(const Block & block, bool to_lower_case = false);
} }
} }

View File

@ -88,6 +88,9 @@ DatabaseReplicated::DatabaseReplicated(
/// If zookeeper chroot prefix is used, path should start with '/', because chroot concatenates without it. /// If zookeeper chroot prefix is used, path should start with '/', because chroot concatenates without it.
if (zookeeper_path.front() != '/') if (zookeeper_path.front() != '/')
zookeeper_path = "/" + zookeeper_path; zookeeper_path = "/" + zookeeper_path;
if (!db_settings.collection_name.value.empty())
fillClusterAuthInfo(db_settings.collection_name.value, context_->getConfigRef());
} }
String DatabaseReplicated::getFullReplicaName() const String DatabaseReplicated::getFullReplicaName() const
@ -191,22 +194,36 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const
shards.back().emplace_back(unescapeForFileName(host_port)); shards.back().emplace_back(unescapeForFileName(host_port));
} }
String username = db_settings.cluster_username;
String password = db_settings.cluster_password;
UInt16 default_port = getContext()->getTCPPort(); UInt16 default_port = getContext()->getTCPPort();
bool secure = db_settings.cluster_secure_connection;
bool treat_local_as_remote = false; bool treat_local_as_remote = false;
bool treat_local_port_as_remote = getContext()->getApplicationType() == Context::ApplicationType::LOCAL; bool treat_local_port_as_remote = getContext()->getApplicationType() == Context::ApplicationType::LOCAL;
return std::make_shared<Cluster>( return std::make_shared<Cluster>(
getContext()->getSettingsRef(), getContext()->getSettingsRef(),
shards, shards,
username, cluster_auth_info.cluster_username,
password, cluster_auth_info.cluster_password,
default_port, default_port,
treat_local_as_remote, treat_local_as_remote,
treat_local_port_as_remote, treat_local_port_as_remote,
secure); cluster_auth_info.cluster_secure_connection,
/*priority=*/1,
database_name,
cluster_auth_info.cluster_secret);
}
void DatabaseReplicated::fillClusterAuthInfo(String collection_name, const Poco::Util::AbstractConfiguration & config_ref)
{
const auto & config_prefix = fmt::format("named_collections.{}", collection_name);
if (!config_ref.has(config_prefix))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", collection_name);
cluster_auth_info.cluster_username = config_ref.getString(config_prefix + ".cluster_username", "");
cluster_auth_info.cluster_password = config_ref.getString(config_prefix + ".cluster_password", "");
cluster_auth_info.cluster_secret = config_ref.getString(config_prefix + ".cluster_secret", "");
cluster_auth_info.cluster_secure_connection = config_ref.getBool(config_prefix + ".cluster_secure_connection", false);
} }
void DatabaseReplicated::tryConnectToZooKeeperAndInitDatabase(bool force_attach) void DatabaseReplicated::tryConnectToZooKeeperAndInitDatabase(bool force_attach)

View File

@ -75,6 +75,16 @@ private:
bool createDatabaseNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper); bool createDatabaseNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper);
void createReplicaNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper); void createReplicaNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper);
struct
{
String cluster_username{"default"};
String cluster_password;
String cluster_secret;
bool cluster_secure_connection{false};
} cluster_auth_info;
void fillClusterAuthInfo(String collection_name, const Poco::Util::AbstractConfiguration & config);
void checkQueryValid(const ASTPtr & query, ContextPtr query_context) const; void checkQueryValid(const ASTPtr & query, ContextPtr query_context) const;
void recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 our_log_ptr, UInt32 max_log_ptr); void recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 our_log_ptr, UInt32 max_log_ptr);

View File

@ -8,12 +8,11 @@ namespace DB
class ASTStorage; class ASTStorage;
#define LIST_OF_DATABASE_REPLICATED_SETTINGS(M) \ #define LIST_OF_DATABASE_REPLICATED_SETTINGS(M) \
M(Float, max_broken_tables_ratio, 0.5, "Do not recover replica automatically if the ratio of staled tables to all tables is greater", 0) \ M(Float, max_broken_tables_ratio, 0.5, "Do not recover replica automatically if the ratio of staled tables to all tables is greater", 0) \
M(UInt64, max_replication_lag_to_enqueue, 10, "Replica will throw exception on attempt to execute query if its replication lag greater", 0) \ M(UInt64, max_replication_lag_to_enqueue, 10, "Replica will throw exception on attempt to execute query if its replication lag greater", 0) \
M(UInt64, wait_entry_commited_timeout_sec, 3600, "Replicas will try to cancel query if timeout exceed, but initiator host has not executed it yet", 0) \ M(UInt64, wait_entry_commited_timeout_sec, 3600, "Replicas will try to cancel query if timeout exceed, but initiator host has not executed it yet", 0) \
M(String, cluster_username, "default", "Username to use when connecting to hosts of cluster", 0) \ M(String, collection_name, "", "A name of a collection defined in server's config where all info for cluster authentication is defined", 0) \
M(String, cluster_password, "", "Password to use when connecting to hosts of cluster", 0) \
M(Bool, cluster_secure_connection, false, "Enable TLS when connecting to hosts of cluster", 0) \
DECLARE_SETTINGS_TRAITS(DatabaseReplicatedSettingsTraits, LIST_OF_DATABASE_REPLICATED_SETTINGS) DECLARE_SETTINGS_TRAITS(DatabaseReplicatedSettingsTraits, LIST_OF_DATABASE_REPLICATED_SETTINGS)

View File

@ -20,6 +20,7 @@
#include <Common/getRandomASCIIString.h> #include <Common/getRandomASCIIString.h>
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <Interpreters/threadPoolCallbackRunner.h>
#include <IO/ReadBufferFromS3.h> #include <IO/ReadBufferFromS3.h>
#include <IO/ReadBufferFromString.h> #include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
@ -264,32 +265,6 @@ std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path,
LOG_TRACE(log, "{} to file by path: {}. S3 path: {}", LOG_TRACE(log, "{} to file by path: {}. S3 path: {}",
mode == WriteMode::Rewrite ? "Write" : "Append", backQuote(metadata_disk->getPath() + path), remote_fs_root_path + blob_name); mode == WriteMode::Rewrite ? "Write" : "Append", backQuote(metadata_disk->getPath() + path), remote_fs_root_path + blob_name);
ScheduleFunc schedule = [pool = &getThreadPoolWriter(), thread_group = CurrentThread::getGroup()](auto callback)
{
pool->scheduleOrThrow([callback = std::move(callback), thread_group]()
{
if (thread_group)
CurrentThread::attachTo(thread_group);
SCOPE_EXIT_SAFE(
if (thread_group)
CurrentThread::detachQueryIfNotDetached();
/// After we detached from the thread_group, parent for memory_tracker inside ThreadStatus will be reset to it's parent.
/// Typically, it may be changes from Process to User.
/// Usually it could be ok, because thread pool task is executed before user-level memory tracker is destroyed.
/// However, thread could stay alive inside the thread pool, and it's ThreadStatus as well.
/// When, finally, we destroy the thread (and the ThreadStatus),
/// it can use memory tracker in the ~ThreadStatus in order to alloc/free untracked_memory,\
/// and by this time user-level memory tracker may be already destroyed.
///
/// As a work-around, reset memory tracker to total, which is always alive.
CurrentThread::get().memory_tracker.setParent(&total_memory_tracker);
);
callback();
});
};
auto s3_buffer = std::make_unique<WriteBufferFromS3>( auto s3_buffer = std::make_unique<WriteBufferFromS3>(
settings->client, settings->client,
bucket, bucket,
@ -299,7 +274,7 @@ std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path,
settings->s3_upload_part_size_multiply_parts_count_threshold, settings->s3_upload_part_size_multiply_parts_count_threshold,
settings->s3_max_single_part_upload_size, settings->s3_max_single_part_upload_size,
std::move(object_metadata), std::move(object_metadata),
buf_size, std::move(schedule)); buf_size, threadPoolCallbackRunner(getThreadPoolWriter()));
auto create_metadata_callback = [this, path, blob_name, mode] (size_t count) auto create_metadata_callback = [this, path, blob_name, mode] (size_t count)
{ {

View File

@ -89,10 +89,10 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers; format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers;
format_settings.json.quote_denormals = settings.output_format_json_quote_denormals; format_settings.json.quote_denormals = settings.output_format_json_quote_denormals;
format_settings.null_as_default = settings.input_format_null_as_default; format_settings.null_as_default = settings.input_format_null_as_default;
format_settings.use_lowercase_column_name = settings.input_format_use_lowercase_column_name;
format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros; format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros;
format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size; format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size;
format_settings.parquet.import_nested = settings.input_format_parquet_import_nested; format_settings.parquet.import_nested = settings.input_format_parquet_import_nested;
format_settings.parquet.case_insensitive_column_matching = settings.input_format_parquet_case_insensitive_column_matching;
format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns; format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns;
format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8; format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8;
format_settings.pretty.color = settings.output_format_pretty_color; format_settings.pretty.color = settings.output_format_pretty_color;
@ -123,9 +123,11 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.arrow.low_cardinality_as_dictionary = settings.output_format_arrow_low_cardinality_as_dictionary; format_settings.arrow.low_cardinality_as_dictionary = settings.output_format_arrow_low_cardinality_as_dictionary;
format_settings.arrow.import_nested = settings.input_format_arrow_import_nested; format_settings.arrow.import_nested = settings.input_format_arrow_import_nested;
format_settings.arrow.allow_missing_columns = settings.input_format_arrow_allow_missing_columns; format_settings.arrow.allow_missing_columns = settings.input_format_arrow_allow_missing_columns;
format_settings.arrow.case_insensitive_column_matching = settings.input_format_arrow_case_insensitive_column_matching;
format_settings.orc.import_nested = settings.input_format_orc_import_nested; format_settings.orc.import_nested = settings.input_format_orc_import_nested;
format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns; format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns;
format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size; format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size;
format_settings.orc.case_insensitive_column_matching = settings.input_format_orc_case_insensitive_column_matching;
format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields; format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields;
format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode; format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode;
format_settings.seekable_read = settings.input_format_allow_seeks; format_settings.seekable_read = settings.input_format_allow_seeks;

View File

@ -32,7 +32,6 @@ struct FormatSettings
bool null_as_default = true; bool null_as_default = true;
bool decimal_trailing_zeros = false; bool decimal_trailing_zeros = false;
bool defaults_for_omitted_fields = true; bool defaults_for_omitted_fields = true;
bool use_lowercase_column_name = false;
bool seekable_read = true; bool seekable_read = true;
UInt64 max_rows_to_read_for_schema_inference = 100; UInt64 max_rows_to_read_for_schema_inference = 100;
@ -75,6 +74,7 @@ struct FormatSettings
bool low_cardinality_as_dictionary = false; bool low_cardinality_as_dictionary = false;
bool import_nested = false; bool import_nested = false;
bool allow_missing_columns = false; bool allow_missing_columns = false;
bool case_insensitive_column_matching = false;
} arrow; } arrow;
struct struct
@ -137,6 +137,7 @@ struct FormatSettings
UInt64 row_group_size = 1000000; UInt64 row_group_size = 1000000;
bool import_nested = false; bool import_nested = false;
bool allow_missing_columns = false; bool allow_missing_columns = false;
bool case_insensitive_column_matching = false;
} parquet; } parquet;
struct Pretty struct Pretty
@ -217,6 +218,7 @@ struct FormatSettings
bool import_nested = false; bool import_nested = false;
bool allow_missing_columns = false; bool allow_missing_columns = false;
int64_t row_batch_size = 100'000; int64_t row_batch_size = 100'000;
bool case_insensitive_column_matching = false;
} orc; } orc;
/// For capnProto format we should determine how to /// For capnProto format we should determine how to

View File

@ -41,6 +41,11 @@ namespace ErrorCodes
throw Exception("Illegal type Date of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); throw Exception("Illegal type Date of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
} }
static inline UInt32 dateTimeIsNotSupported(const char * name)
{
throw Exception("Illegal type DateTime of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
/// This factor transformation will say that the function is monotone everywhere. /// This factor transformation will say that the function is monotone everywhere.
struct ZeroTransform struct ZeroTransform
{ {
@ -311,6 +316,133 @@ struct ToStartOfSecondImpl
using FactorTransform = ZeroTransform; using FactorTransform = ZeroTransform;
}; };
struct ToStartOfMillisecondImpl
{
static constexpr auto name = "toStartOfMillisecond";
static inline DateTime64 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl &)
{
// given that scale is 6, scale_multiplier is 1000000
// for DateTime64 value of 123.456789:
// 123456789 - 789 = 123456000
// for DateTime64 value of -123.456789:
// -123456789 - (1000 + (-789)) = -123457000
if (scale_multiplier == 1000)
{
return datetime64;
}
else if (scale_multiplier <= 1000)
{
return datetime64 * (1000 / scale_multiplier);
}
else
{
auto droppable_part_with_sign = DecimalUtils::getFractionalPartWithScaleMultiplier<DateTime64, true>(datetime64, scale_multiplier / 1000);
if (droppable_part_with_sign < 0)
droppable_part_with_sign += scale_multiplier;
return datetime64 - droppable_part_with_sign;
}
}
static inline UInt32 execute(UInt32, const DateLUTImpl &)
{
throw Exception("Illegal type DateTime of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
static inline UInt32 execute(Int32, const DateLUTImpl &)
{
return dateIsNotSupported(name);
}
static inline UInt32 execute(UInt16, const DateLUTImpl &)
{
return dateIsNotSupported(name);
}
using FactorTransform = ZeroTransform;
};
struct ToStartOfMicrosecondImpl
{
static constexpr auto name = "toStartOfMicrosecond";
static inline DateTime64 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl &)
{
// @see ToStartOfMillisecondImpl
if (scale_multiplier == 1000000)
{
return datetime64;
}
else if (scale_multiplier <= 1000000)
{
return datetime64 * (1000000 / scale_multiplier);
}
else
{
auto droppable_part_with_sign = DecimalUtils::getFractionalPartWithScaleMultiplier<DateTime64, true>(datetime64, scale_multiplier / 1000000);
if (droppable_part_with_sign < 0)
droppable_part_with_sign += scale_multiplier;
return datetime64 - droppable_part_with_sign;
}
}
static inline UInt32 execute(UInt32, const DateLUTImpl &)
{
throw Exception("Illegal type DateTime of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
static inline UInt32 execute(Int32, const DateLUTImpl &)
{
return dateIsNotSupported(name);
}
static inline UInt32 execute(UInt16, const DateLUTImpl &)
{
return dateIsNotSupported(name);
}
using FactorTransform = ZeroTransform;
};
struct ToStartOfNanosecondImpl
{
static constexpr auto name = "toStartOfNanosecond";
static inline DateTime64 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl &)
{
// @see ToStartOfMillisecondImpl
if (scale_multiplier == 1000000000)
{
return datetime64;
}
else if (scale_multiplier <= 1000000000)
{
return datetime64 * (1000000000 / scale_multiplier);
}
else
{
throw Exception("Illegal type of argument for function " + std::string(name) + ", DateTime64 expected", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
}
static inline UInt32 execute(UInt32, const DateLUTImpl &)
{
throw Exception("Illegal type DateTime of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
static inline UInt32 execute(Int32, const DateLUTImpl &)
{
return dateIsNotSupported(name);
}
static inline UInt32 execute(UInt16, const DateLUTImpl &)
{
return dateIsNotSupported(name);
}
using FactorTransform = ZeroTransform;
};
struct ToStartOfFiveMinuteImpl struct ToStartOfFiveMinuteImpl
{ {
static constexpr auto name = "toStartOfFiveMinute"; static constexpr auto name = "toStartOfFiveMinute";

View File

@ -40,26 +40,158 @@ namespace ErrorCodes
/// - 'AddSecondsImpl::execute(UInt32, ...) -> UInt32' is available to the ClickHouse users as 'addSeconds(DateTime, ...) -> DateTime' /// - 'AddSecondsImpl::execute(UInt32, ...) -> UInt32' is available to the ClickHouse users as 'addSeconds(DateTime, ...) -> DateTime'
/// - 'AddSecondsImpl::execute(UInt16, ...) -> UInt32' is available to the ClickHouse users as 'addSeconds(Date, ...) -> DateTime' /// - 'AddSecondsImpl::execute(UInt16, ...) -> UInt32' is available to the ClickHouse users as 'addSeconds(Date, ...) -> DateTime'
struct AddNanosecondsImpl
{
static constexpr auto name = "addNanoseconds";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale)
{
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(9 - scale);
auto division = std::div(t.fractional * multiplier + delta, static_cast<Int64>(1000000000));
return {t.whole * multiplier + division.quot, t.fractional * multiplier + delta};
}
static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
{
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(9 - scale);
return t * multiplier + delta;
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
{
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(9);
return t * multiplier + delta;
}
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0)
{
throw Exception("addNanoSeconds() cannot be used with Date", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0)
{
throw Exception("addNanoSeconds() cannot be used with Date32", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
};
struct AddMicrosecondsImpl
{
static constexpr auto name = "addMicroseconds";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
{
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(std::abs(6 - scale));
if (scale <= 6)
{
auto division = std::div((t.fractional + delta), static_cast<Int64>(10e6));
return {t.whole * multiplier + division.quot, division.rem};
}
else
{
auto division = std::div((t.fractional + delta * multiplier), static_cast<Int64>(10e6 * multiplier));
return {t.whole + division.quot, division.rem};
}
}
static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
{
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(std::abs(6 - scale));
return scale <= 6 ? t * multiplier + delta : t + delta * multiplier;
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
{
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(6);
return t * multiplier + delta;
}
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0)
{
throw Exception("addMicroSeconds() cannot be used with Date", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0)
{
throw Exception("addMicroSeconds() cannot be used with Date32", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
};
struct AddMillisecondsImpl
{
static constexpr auto name = "addMilliseconds";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale)
{
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(std::abs(3 - scale));
if (scale <= 3)
{
auto division = std::div((t.fractional + delta), static_cast<Int64>(1000));
return {t.whole * multiplier + division.quot, division.rem};
}
else
{
auto division = std::div((t.fractional + delta * multiplier), static_cast<Int64>(1000 * multiplier));
return {t.whole + division.quot,division.rem};
}
}
static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
{
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(std::abs(3 - scale));
return scale <= 3 ? t * multiplier + delta : t + delta * multiplier;
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
{
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(3);
return t * multiplier + delta;
}
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0)
{
throw Exception("addMilliSeconds() cannot be used with Date", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0)
{
throw Exception("addMilliSeconds() cannot be used with Date32", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
};
struct AddSecondsImpl struct AddSecondsImpl
{ {
static constexpr auto name = "addSeconds"; static constexpr auto name = "addSeconds";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64> static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &) execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
{ {
return {t.whole + delta, t.fractional}; return {t.whole + delta, t.fractional};
} }
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &) static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
{
return t + delta * DecimalUtils::scaleMultiplier<DateTime64>(scale);
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
{ {
return t + delta; return t + delta;
} }
static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone)
static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{ {
// use default datetime64 scale // use default datetime64 scale
return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta) * 1000; return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta) * 1000;
} }
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{ {
return time_zone.fromDayNum(DayNum(d)) + delta; return time_zone.fromDayNum(DayNum(d)) + delta;
} }
@ -70,21 +202,29 @@ struct AddMinutesImpl
static constexpr auto name = "addMinutes"; static constexpr auto name = "addMinutes";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64> static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &) execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
{ {
return {t.whole + delta * 60, t.fractional}; return {t.whole + delta * 60, t.fractional};
} }
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &) static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
{
return t + 60 * delta * DecimalUtils::scaleMultiplier<DateTime64>(scale);
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
{ {
return t + delta * 60; return t + delta * 60;
} }
static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone)
static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{ {
// use default datetime64 scale // use default datetime64 scale
return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 60) * 1000; return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 60) * 1000;
} }
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{ {
return time_zone.fromDayNum(DayNum(d)) + delta * 60; return time_zone.fromDayNum(DayNum(d)) + delta * 60;
} }
@ -95,20 +235,29 @@ struct AddHoursImpl
static constexpr auto name = "addHours"; static constexpr auto name = "addHours";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64> static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &) execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
{ {
return {t.whole + delta * 3600, t.fractional}; return {t.whole + delta * 3600, t.fractional};
} }
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &)
static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
{
return t + 3600 * delta * DecimalUtils::scaleMultiplier<DateTime64>(scale);
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
{ {
return t + delta * 3600; return t + delta * 3600;
} }
static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone)
static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{ {
// use default datetime64 scale // use default datetime64 scale
return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 3600) * 1000; return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 3600) * 1000;
} }
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{ {
return time_zone.fromDayNum(DayNum(d)) + delta * 3600; return time_zone.fromDayNum(DayNum(d)) + delta * 3600;
} }
@ -119,22 +268,30 @@ struct AddDaysImpl
static constexpr auto name = "addDays"; static constexpr auto name = "addDays";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64> static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone) execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{ {
return {time_zone.addDays(t.whole, delta), t.fractional}; return {time_zone.addDays(t.whole, delta), t.fractional};
} }
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone) static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0)
{
auto multiplier = DecimalUtils::scaleMultiplier<DateTime64>(scale);
auto d = std::div(t, multiplier);
return time_zone.addDays(d.quot, delta) * multiplier + d.rem;
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{ {
return time_zone.addDays(t, delta); return time_zone.addDays(t, delta);
} }
static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &) static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &, UInt16 = 0)
{ {
return d + delta; return d + delta;
} }
static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl &) static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl &, UInt16 = 0)
{ {
return d + delta; return d + delta;
} }
@ -145,22 +302,30 @@ struct AddWeeksImpl
static constexpr auto name = "addWeeks"; static constexpr auto name = "addWeeks";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64> static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int32 delta, const DateLUTImpl & time_zone) execute(DecimalUtils::DecimalComponents<DateTime64> t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{ {
return {time_zone.addWeeks(t.whole, delta), t.fractional}; return {time_zone.addWeeks(t.whole, delta), t.fractional};
} }
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone) static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 scale = 0)
{
auto multiplier = DecimalUtils::scaleMultiplier<DateTime64>(scale);
auto d = std::div(t, multiplier);
return time_zone.addDays(d.quot, delta * 7) * multiplier + d.rem;
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{ {
return time_zone.addWeeks(t, delta); return time_zone.addWeeks(t, delta);
} }
static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl &) static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl &, UInt16 = 0)
{ {
return d + delta * 7; return d + delta * 7;
} }
static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int32 delta, const DateLUTImpl &) static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int32 delta, const DateLUTImpl &, UInt16 = 0)
{ {
return d + delta * 7; return d + delta * 7;
} }
@ -170,23 +335,31 @@ struct AddMonthsImpl
{ {
static constexpr auto name = "addMonths"; static constexpr auto name = "addMonths";
static inline DecimalUtils::DecimalComponents<DateTime64> static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone) execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{ {
return {time_zone.addMonths(t.whole, delta), t.fractional}; return {time_zone.addMonths(t.whole, delta), t.fractional};
} }
static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone) static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0)
{
auto multiplier = DecimalUtils::scaleMultiplier<DateTime64>(scale);
auto d = std::div(t, multiplier);
return time_zone.addMonths(d.quot, delta) * multiplier + d.rem;
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{ {
return time_zone.addMonths(t, delta); return time_zone.addMonths(t, delta);
} }
static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{ {
return time_zone.addMonths(DayNum(d), delta); return time_zone.addMonths(DayNum(d), delta);
} }
static inline Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone) static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{ {
return time_zone.addMonths(ExtendedDayNum(d), delta); return time_zone.addMonths(ExtendedDayNum(d), delta);
} }
@ -197,22 +370,30 @@ struct AddQuartersImpl
static constexpr auto name = "addQuarters"; static constexpr auto name = "addQuarters";
static inline DecimalUtils::DecimalComponents<DateTime64> static inline DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int32 delta, const DateLUTImpl & time_zone) execute(DecimalUtils::DecimalComponents<DateTime64> t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{ {
return {time_zone.addQuarters(t.whole, delta), t.fractional}; return {time_zone.addQuarters(t.whole, delta), t.fractional};
} }
static inline UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone) static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 scale = 0)
{
auto multiplier = DecimalUtils::scaleMultiplier<DateTime64>(scale);
auto d = std::div(t, multiplier);
return time_zone.addQuarters(d.quot, delta) * multiplier + d.rem;
}
static inline UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{ {
return time_zone.addQuarters(t, delta); return time_zone.addQuarters(t, delta);
} }
static inline UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl & time_zone) static inline UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{ {
return time_zone.addQuarters(DayNum(d), delta); return time_zone.addQuarters(DayNum(d), delta);
} }
static inline Int32 execute(Int32 d, Int32 delta, const DateLUTImpl & time_zone) static inline Int32 execute(Int32 d, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{ {
return time_zone.addQuarters(ExtendedDayNum(d), delta); return time_zone.addQuarters(ExtendedDayNum(d), delta);
} }
@ -222,23 +403,31 @@ struct AddYearsImpl
{ {
static constexpr auto name = "addYears"; static constexpr auto name = "addYears";
static inline DecimalUtils::DecimalComponents<DateTime64> static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone) execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{ {
return {time_zone.addYears(t.whole, delta), t.fractional}; return {time_zone.addYears(t.whole, delta), t.fractional};
} }
static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone) static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0)
{
auto multiplier = DecimalUtils::scaleMultiplier<DateTime64>(scale);
auto d = std::div(t, multiplier);
return time_zone.addYears(d.quot, delta) * multiplier + d.rem;
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{ {
return time_zone.addYears(t, delta); return time_zone.addYears(t, delta);
} }
static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{ {
return time_zone.addYears(DayNum(d), delta); return time_zone.addYears(DayNum(d), delta);
} }
static inline Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone) static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{ {
return time_zone.addYears(ExtendedDayNum(d), delta); return time_zone.addYears(ExtendedDayNum(d), delta);
} }
@ -250,13 +439,16 @@ struct SubtractIntervalImpl : public Transform
using Transform::Transform; using Transform::Transform;
template <typename T> template <typename T>
inline NO_SANITIZE_UNDEFINED auto execute(T t, Int64 delta, const DateLUTImpl & time_zone) const inline NO_SANITIZE_UNDEFINED auto execute(T t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale) const
{ {
/// Signed integer overflow is Ok. /// Signed integer overflow is Ok.
return Transform::execute(t, -delta, time_zone); return Transform::execute(t, -delta, time_zone, scale);
} }
}; };
struct SubtractNanosecondsImpl : SubtractIntervalImpl<AddNanosecondsImpl> { static constexpr auto name = "subtractNanoseconds"; };
struct SubtractMicrosecondsImpl : SubtractIntervalImpl<AddMicrosecondsImpl> { static constexpr auto name = "subtractMicroseconds"; };
struct SubtractMillisecondsImpl : SubtractIntervalImpl<AddMillisecondsImpl> { static constexpr auto name = "subtractMilliseconds"; };
struct SubtractSecondsImpl : SubtractIntervalImpl<AddSecondsImpl> { static constexpr auto name = "subtractSeconds"; }; struct SubtractSecondsImpl : SubtractIntervalImpl<AddSecondsImpl> { static constexpr auto name = "subtractSeconds"; };
struct SubtractMinutesImpl : SubtractIntervalImpl<AddMinutesImpl> { static constexpr auto name = "subtractMinutes"; }; struct SubtractMinutesImpl : SubtractIntervalImpl<AddMinutesImpl> { static constexpr auto name = "subtractMinutes"; };
struct SubtractHoursImpl : SubtractIntervalImpl<AddHoursImpl> { static constexpr auto name = "subtractHours"; }; struct SubtractHoursImpl : SubtractIntervalImpl<AddHoursImpl> { static constexpr auto name = "subtractHours"; };
@ -277,17 +469,17 @@ struct Adder
{} {}
template <typename FromVectorType, typename ToVectorType> template <typename FromVectorType, typename ToVectorType>
void NO_INLINE vectorConstant(const FromVectorType & vec_from, ToVectorType & vec_to, Int64 delta, const DateLUTImpl & time_zone) const void NO_INLINE vectorConstant(const FromVectorType & vec_from, ToVectorType & vec_to, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale) const
{ {
size_t size = vec_from.size(); size_t size = vec_from.size();
vec_to.resize(size); vec_to.resize(size);
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta), time_zone); vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta), time_zone, scale);
} }
template <typename FromVectorType, typename ToVectorType> template <typename FromVectorType, typename ToVectorType>
void vectorVector(const FromVectorType & vec_from, ToVectorType & vec_to, const IColumn & delta, const DateLUTImpl & time_zone) const void vectorVector(const FromVectorType & vec_from, ToVectorType & vec_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale) const
{ {
size_t size = vec_from.size(); size_t size = vec_from.size();
vec_to.resize(size); vec_to.resize(size);
@ -296,11 +488,11 @@ struct Adder
ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64, ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64,
ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64, ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64,
ColumnFloat32, ColumnFloat64>( ColumnFloat32, ColumnFloat64>(
&delta, [&](const auto & column){ vectorVector(vec_from, vec_to, column, time_zone, size); return true; }); &delta, [&](const auto & column){ vectorVector(vec_from, vec_to, column, time_zone, scale, size); return true; });
} }
template <typename FromType, typename ToVectorType> template <typename FromType, typename ToVectorType>
void constantVector(const FromType & from, ToVectorType & vec_to, const IColumn & delta, const DateLUTImpl & time_zone) const void constantVector(const FromType & from, ToVectorType & vec_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale) const
{ {
size_t size = delta.size(); size_t size = delta.size();
vec_to.resize(size); vec_to.resize(size);
@ -309,7 +501,7 @@ struct Adder
ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64, ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64,
ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64, ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64,
ColumnFloat32, ColumnFloat64>( ColumnFloat32, ColumnFloat64>(
&delta, [&](const auto & column){ constantVector(from, vec_to, column, time_zone, size); return true; }); &delta, [&](const auto & column){ constantVector(from, vec_to, column, time_zone, scale, size); return true; });
} }
private: private:
@ -325,18 +517,18 @@ private:
template <typename FromVectorType, typename ToVectorType, typename DeltaColumnType> template <typename FromVectorType, typename ToVectorType, typename DeltaColumnType>
NO_INLINE NO_SANITIZE_UNDEFINED void vectorVector( NO_INLINE NO_SANITIZE_UNDEFINED void vectorVector(
const FromVectorType & vec_from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, size_t size) const const FromVectorType & vec_from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t size) const
{ {
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta.getData()[i]), time_zone); vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta.getData()[i]), time_zone, scale);
} }
template <typename FromType, typename ToVectorType, typename DeltaColumnType> template <typename FromType, typename ToVectorType, typename DeltaColumnType>
NO_INLINE NO_SANITIZE_UNDEFINED void constantVector( NO_INLINE NO_SANITIZE_UNDEFINED void constantVector(
const FromType & from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, size_t size) const const FromType & from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t size) const
{ {
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
vec_to[i] = transform.execute(from, checkOverflow(delta.getData()[i]), time_zone); vec_to[i] = transform.execute(from, checkOverflow(delta.getData()[i]), time_zone, scale);
} }
}; };
@ -344,7 +536,7 @@ private:
template <typename FromDataType, typename ToDataType, typename Transform> template <typename FromDataType, typename ToDataType, typename Transform>
struct DateTimeAddIntervalImpl struct DateTimeAddIntervalImpl
{ {
static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, UInt16 scale = 0)
{ {
using FromValueType = typename FromDataType::FieldType; using FromValueType = typename FromDataType::FieldType;
using FromColumnType = typename FromDataType::ColumnType; using FromColumnType = typename FromDataType::ColumnType;
@ -363,16 +555,15 @@ struct DateTimeAddIntervalImpl
if (const auto * sources = checkAndGetColumn<FromColumnType>(source_col.get())) if (const auto * sources = checkAndGetColumn<FromColumnType>(source_col.get()))
{ {
if (const auto * delta_const_column = typeid_cast<const ColumnConst *>(&delta_column)) if (const auto * delta_const_column = typeid_cast<const ColumnConst *>(&delta_column))
op.vectorConstant(sources->getData(), col_to->getData(), delta_const_column->getInt(0), time_zone); op.vectorConstant(sources->getData(), col_to->getData(), delta_const_column->getInt(0), time_zone, scale);
else else
op.vectorVector(sources->getData(), col_to->getData(), delta_column, time_zone); op.vectorVector(sources->getData(), col_to->getData(), delta_column, time_zone, scale);
} }
else if (const auto * sources_const = checkAndGetColumnConst<FromColumnType>(source_col.get())) else if (const auto * sources_const = checkAndGetColumnConst<FromColumnType>(source_col.get()))
{ {
op.constantVector( op.constantVector(
sources_const->template getValue<FromValueType>(), sources_const->template getValue<FromValueType>(),
col_to->getData(), col_to->getData(), delta_column, time_zone, scale);
delta_column, time_zone);
} }
else else
{ {
@ -463,18 +654,10 @@ public:
} }
} }
// TransformDateTime64 helps choosing correct overload of exec and does some transformations
// on input and output parameters to simplify support of DateTime64 in concrete Transform.
template <typename FieldType>
using TransformType = std::conditional_t<
std::is_same_v<FieldType, DateTime64>,
TransformDateTime64<Transform>,
Transform>;
/// Helper templates to deduce return type based on argument type, since some overloads may promote or denote types, /// Helper templates to deduce return type based on argument type, since some overloads may promote or denote types,
/// e.g. addSeconds(Date, 1) => DateTime /// e.g. addSeconds(Date, 1) => DateTime
template <typename FieldType> template <typename FieldType>
using TransformExecuteReturnType = decltype(std::declval<TransformType<FieldType>>().execute(FieldType(), 0, std::declval<DateLUTImpl>())); using TransformExecuteReturnType = decltype(std::declval<Transform>().execute(FieldType(), 0, std::declval<DateLUTImpl>(), 0));
// Deduces RETURN DataType from INPUT DataType, based on return type of Transform{}.execute(INPUT_TYPE, UInt64, DateLUTImpl). // Deduces RETURN DataType from INPUT DataType, based on return type of Transform{}.execute(INPUT_TYPE, UInt64, DateLUTImpl).
// e.g. for Transform-type that has execute()-overload with 'UInt16' input and 'UInt32' return, // e.g. for Transform-type that has execute()-overload with 'UInt16' input and 'UInt32' return,
@ -500,11 +683,33 @@ public:
if (typeid_cast<const DataTypeDateTime64 *>(arguments[0].type.get())) if (typeid_cast<const DataTypeDateTime64 *>(arguments[0].type.get()))
{ {
const auto & datetime64_type = assert_cast<const DataTypeDateTime64 &>(*arguments[0].type); const auto & datetime64_type = assert_cast<const DataTypeDateTime64 &>(*arguments[0].type);
return std::make_shared<DataTypeDateTime64>(datetime64_type.getScale(), extractTimeZoneNameFromFunctionArguments(arguments, 2, 0));
auto from_scale = datetime64_type.getScale();
auto scale = from_scale;
if (std::is_same_v<Transform, AddNanosecondsImpl>)
scale = 9;
else if (std::is_same_v<Transform, AddMicrosecondsImpl>)
scale = 6;
else if (std::is_same_v<Transform, AddMillisecondsImpl>)
scale = 3;
scale = std::max(scale, from_scale);
return std::make_shared<DataTypeDateTime64>(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0));
} }
else else
{ {
return std::make_shared<DataTypeDateTime64>(DataTypeDateTime64::default_scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); auto scale = DataTypeDateTime64::default_scale;
if (std::is_same_v<Transform, AddNanosecondsImpl>)
scale = 9;
else if (std::is_same_v<Transform, AddMicrosecondsImpl>)
scale = 6;
else if (std::is_same_v<Transform, AddMillisecondsImpl>)
scale = 3;
return std::make_shared<DataTypeDateTime64>(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0));
} }
} }
else else
@ -541,9 +746,9 @@ public:
} }
else if (const auto * datetime64_type = assert_cast<const DataTypeDateTime64 *>(from_type)) else if (const auto * datetime64_type = assert_cast<const DataTypeDateTime64 *>(from_type))
{ {
using WrappedTransformType = TransformType<typename DataTypeDateTime64::FieldType>; auto from_scale = datetime64_type->getScale();
return DateTimeAddIntervalImpl<DataTypeDateTime64, TransformResultDataType<DataTypeDateTime64>, WrappedTransformType>::execute( return DateTimeAddIntervalImpl<DataTypeDateTime64, TransformResultDataType<DataTypeDateTime64>, Transform>::execute(
WrappedTransformType{datetime64_type->getScale()}, arguments, result_type); Transform{}, arguments, result_type, from_scale);
} }
else else
throw Exception("Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName(), throw Exception("Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName(),

View File

@ -88,6 +88,20 @@ public:
Int64 scale = DataTypeDateTime64::default_scale; Int64 scale = DataTypeDateTime64::default_scale;
if (const auto * dt64 = checkAndGetDataType<DataTypeDateTime64>(arguments[0].type.get())) if (const auto * dt64 = checkAndGetDataType<DataTypeDateTime64>(arguments[0].type.get()))
scale = dt64->getScale(); scale = dt64->getScale();
auto source_scale = scale;
if constexpr (std::is_same_v<ToStartOfMillisecondImpl, Transform>)
{
scale = std::max(source_scale, static_cast<Int64>(3));
}
else if constexpr (std::is_same_v<ToStartOfMicrosecondImpl, Transform>)
{
scale = std::max(source_scale, static_cast<Int64>(6));
}
else if constexpr (std::is_same_v<ToStartOfNanosecondImpl, Transform>)
{
scale = std::max(source_scale, static_cast<Int64>(9));
}
return std::make_shared<ToDataType>(scale, extractTimeZoneNameFromFunctionArguments(arguments, 1, 0)); return std::make_shared<ToDataType>(scale, extractTimeZoneNameFromFunctionArguments(arguments, 1, 0));
} }

View File

@ -112,6 +112,9 @@ void registerFunctionsConversion(FunctionFactory & factory)
factory.registerFunction<FunctionParseDateTime64BestEffortOrZero>(); factory.registerFunction<FunctionParseDateTime64BestEffortOrZero>();
factory.registerFunction<FunctionParseDateTime64BestEffortOrNull>(); factory.registerFunction<FunctionParseDateTime64BestEffortOrNull>();
factory.registerFunction<FunctionConvert<DataTypeInterval, NameToIntervalNanosecond, PositiveMonotonicity>>();
factory.registerFunction<FunctionConvert<DataTypeInterval, NameToIntervalMicrosecond, PositiveMonotonicity>>();
factory.registerFunction<FunctionConvert<DataTypeInterval, NameToIntervalMillisecond, PositiveMonotonicity>>();
factory.registerFunction<FunctionConvert<DataTypeInterval, NameToIntervalSecond, PositiveMonotonicity>>(); factory.registerFunction<FunctionConvert<DataTypeInterval, NameToIntervalSecond, PositiveMonotonicity>>();
factory.registerFunction<FunctionConvert<DataTypeInterval, NameToIntervalMinute, PositiveMonotonicity>>(); factory.registerFunction<FunctionConvert<DataTypeInterval, NameToIntervalMinute, PositiveMonotonicity>>();
factory.registerFunction<FunctionConvert<DataTypeInterval, NameToIntervalHour, PositiveMonotonicity>>(); factory.registerFunction<FunctionConvert<DataTypeInterval, NameToIntervalHour, PositiveMonotonicity>>();

View File

@ -1487,6 +1487,9 @@ struct NameToDecimal256 { static constexpr auto name = "toDecimal256"; };
static constexpr auto kind = IntervalKind::INTERVAL_KIND; \ static constexpr auto kind = IntervalKind::INTERVAL_KIND; \
}; };
DEFINE_NAME_TO_INTERVAL(Nanosecond)
DEFINE_NAME_TO_INTERVAL(Microsecond)
DEFINE_NAME_TO_INTERVAL(Millisecond)
DEFINE_NAME_TO_INTERVAL(Second) DEFINE_NAME_TO_INTERVAL(Second)
DEFINE_NAME_TO_INTERVAL(Minute) DEFINE_NAME_TO_INTERVAL(Minute)
DEFINE_NAME_TO_INTERVAL(Hour) DEFINE_NAME_TO_INTERVAL(Hour)
@ -2703,13 +2706,10 @@ private:
return createWrapper<ToDataType>(from_type, to_type, requested_result_is_nullable); return createWrapper<ToDataType>(from_type, to_type, requested_result_is_nullable);
} }
WrapperType createUInt8ToUInt8Wrapper(const DataTypePtr from_type, const DataTypePtr to_type) const WrapperType createUInt8ToBoolWrapper(const DataTypePtr from_type, const DataTypePtr to_type) const
{ {
return [from_type, to_type] (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t /*input_rows_count*/) -> ColumnPtr return [from_type, to_type] (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t /*input_rows_count*/) -> ColumnPtr
{ {
if (isBool(from_type) || !isBool(to_type))
return arguments.front().column;
/// Special case when we convert UInt8 column to Bool column. /// Special case when we convert UInt8 column to Bool column.
/// both columns have type UInt8, but we shouldn't use identity wrapper, /// both columns have type UInt8, but we shouldn't use identity wrapper,
/// because Bool column can contain only 0 and 1. /// because Bool column can contain only 0 and 1.
@ -3506,15 +3506,19 @@ private:
/// 'requested_result_is_nullable' is true if CAST to Nullable type is requested. /// 'requested_result_is_nullable' is true if CAST to Nullable type is requested.
WrapperType prepareImpl(const DataTypePtr & from_type, const DataTypePtr & to_type, bool requested_result_is_nullable) const WrapperType prepareImpl(const DataTypePtr & from_type, const DataTypePtr & to_type, bool requested_result_is_nullable) const
{ {
bool convert_to_ipv6 = to_type->getCustomName() && to_type->getCustomName()->getName() == "IPv6"; if (isUInt8(from_type) && isBool(to_type))
return createUInt8ToBoolWrapper(from_type, to_type);
if (from_type->equals(*to_type) && !convert_to_ipv6) /// We can cast IPv6 into IPv6, IPv4 into IPv4, but we should not allow to cast FixedString(16) into IPv6 as part of identity cast
{ bool safe_convert_custom_types = true;
if (isUInt8(from_type))
return createUInt8ToUInt8Wrapper(from_type, to_type);
if (const auto * to_type_custom_name = to_type->getCustomName())
safe_convert_custom_types = from_type->getCustomName() && from_type->getCustomName()->getName() == to_type_custom_name->getName();
else if (const auto * from_type_custom_name = from_type->getCustomName())
safe_convert_custom_types = to_type->getCustomName() && from_type_custom_name->getName() == to_type->getCustomName()->getName();
if (from_type->equals(*to_type) && safe_convert_custom_types)
return createIdentityWrapper(from_type); return createIdentityWrapper(from_type);
}
else if (WhichDataType(from_type).isNothing()) else if (WhichDataType(from_type).isNothing())
return createNothingWrapper(to_type.get()); return createNothingWrapper(to_type.get());

View File

@ -20,6 +20,7 @@ namespace ErrorCodes
extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ARGUMENT_OUT_OF_BOUND; extern const int ARGUMENT_OUT_OF_BOUND;
extern const int SYNTAX_ERROR;
} }
namespace namespace
@ -167,6 +168,13 @@ struct TimeWindowImpl<TUMBLE>
switch (std::get<0>(interval)) switch (std::get<0>(interval))
{ {
//TODO: add proper support for fractional seconds
// case IntervalKind::Nanosecond:
// return executeTumble<UInt32, IntervalKind::Nanosecond>(*time_column_vec, std::get<1>(interval), time_zone);
// case IntervalKind::Microsecond:
// return executeTumble<UInt32, IntervalKind::Microsecond>(*time_column_vec, std::get<1>(interval), time_zone);
// case IntervalKind::Millisecond:
// return executeTumble<UInt32, IntervalKind::Millisecond>(*time_column_vec, std::get<1>(interval), time_zone);
case IntervalKind::Second: case IntervalKind::Second:
return executeTumble<UInt32, IntervalKind::Second>(*time_column_vec, std::get<1>(interval), time_zone); return executeTumble<UInt32, IntervalKind::Second>(*time_column_vec, std::get<1>(interval), time_zone);
case IntervalKind::Minute: case IntervalKind::Minute:
@ -183,6 +191,8 @@ struct TimeWindowImpl<TUMBLE>
return executeTumble<UInt16, IntervalKind::Quarter>(*time_column_vec, std::get<1>(interval), time_zone); return executeTumble<UInt16, IntervalKind::Quarter>(*time_column_vec, std::get<1>(interval), time_zone);
case IntervalKind::Year: case IntervalKind::Year:
return executeTumble<UInt16, IntervalKind::Year>(*time_column_vec, std::get<1>(interval), time_zone); return executeTumble<UInt16, IntervalKind::Year>(*time_column_vec, std::get<1>(interval), time_zone);
default:
throw Exception("Fraction seconds are unsupported by windows yet", ErrorCodes::SYNTAX_ERROR);
} }
__builtin_unreachable(); __builtin_unreachable();
} }
@ -350,6 +360,16 @@ struct TimeWindowImpl<HOP>
switch (std::get<0>(window_interval)) switch (std::get<0>(window_interval))
{ {
//TODO: add proper support for fractional seconds
// case IntervalKind::Nanosecond:
// return executeHop<UInt32, IntervalKind::Nanosecond>(
// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
// case IntervalKind::Microsecond:
// return executeHop<UInt32, IntervalKind::Microsecond>(
// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
// case IntervalKind::Millisecond:
// return executeHop<UInt32, IntervalKind::Millisecond>(
// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Second: case IntervalKind::Second:
return executeHop<UInt32, IntervalKind::Second>( return executeHop<UInt32, IntervalKind::Second>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
@ -374,6 +394,8 @@ struct TimeWindowImpl<HOP>
case IntervalKind::Year: case IntervalKind::Year:
return executeHop<UInt16, IntervalKind::Year>( return executeHop<UInt16, IntervalKind::Year>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
default:
throw Exception("Fraction seconds are unsupported by windows yet", ErrorCodes::SYNTAX_ERROR);
} }
__builtin_unreachable(); __builtin_unreachable();
} }
@ -487,6 +509,16 @@ struct TimeWindowImpl<WINDOW_ID>
switch (std::get<0>(window_interval)) switch (std::get<0>(window_interval))
{ {
//TODO: add proper support for fractional seconds
// case IntervalKind::Nanosecond:
// return executeHopSlice<UInt32, IntervalKind::Nanosecond>(
// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
// case IntervalKind::Microsecond:
// return executeHopSlice<UInt32, IntervalKind::Microsecond>(
// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
// case IntervalKind::Millisecond:
// return executeHopSlice<UInt32, IntervalKind::Millisecond>(
// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Second: case IntervalKind::Second:
return executeHopSlice<UInt32, IntervalKind::Second>( return executeHopSlice<UInt32, IntervalKind::Second>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
@ -511,6 +543,8 @@ struct TimeWindowImpl<WINDOW_ID>
case IntervalKind::Year: case IntervalKind::Year:
return executeHopSlice<UInt16, IntervalKind::Year>( return executeHopSlice<UInt16, IntervalKind::Year>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
default:
throw Exception("Fraction seconds are unsupported by windows yet", ErrorCodes::SYNTAX_ERROR);
} }
__builtin_unreachable(); __builtin_unreachable();
} }

View File

@ -80,7 +80,32 @@ struct ToStartOfTransform;
TRANSFORM_TIME(Hour) TRANSFORM_TIME(Hour)
TRANSFORM_TIME(Minute) TRANSFORM_TIME(Minute)
TRANSFORM_TIME(Second) TRANSFORM_TIME(Second)
#undef TRANSFORM_DATE #undef TRANSFORM_TIME
#define TRANSFORM_SUBSECONDS(INTERVAL_KIND, DEF_SCALE) \
template<> \
struct ToStartOfTransform<IntervalKind::INTERVAL_KIND> \
{ \
static Int64 execute(Int64 t, UInt64 delta, const UInt32 scale) \
{ \
if (scale <= DEF_SCALE) \
{ \
auto val = t * DecimalUtils::scaleMultiplier<DateTime64>(DEF_SCALE - scale); \
if (delta == 1) \
return val; \
else \
return val - (val % delta); \
} \
else \
{ \
return t - (t % (delta * DecimalUtils::scaleMultiplier<DateTime64>(scale - DEF_SCALE))) ; \
} \
} \
};
TRANSFORM_SUBSECONDS(Millisecond, 3)
TRANSFORM_SUBSECONDS(Microsecond, 6)
TRANSFORM_SUBSECONDS(Nanosecond, 9)
#undef TRANSFORM_SUBSECONDS
template <IntervalKind::Kind unit> template <IntervalKind::Kind unit>
struct AddTime; struct AddTime;
@ -117,6 +142,25 @@ struct ToStartOfTransform;
ADD_TIME(Second, 1) ADD_TIME(Second, 1)
#undef ADD_TIME #undef ADD_TIME
#define ADD_SUBSECONDS(INTERVAL_KIND, DEF_SCALE) \
template <> \
struct AddTime<IntervalKind::INTERVAL_KIND> \
{ \
static inline NO_SANITIZE_UNDEFINED Int64 execute(Int64 t, UInt64 delta, const UInt32 scale) \
{ \
if (scale < DEF_SCALE) \
{ \
return t + delta * DecimalUtils::scaleMultiplier<DateTime64>(DEF_SCALE - scale); \
} \
else \
return t + delta * DecimalUtils::scaleMultiplier<DateTime64>(scale - DEF_SCALE); \
} \
};
ADD_SUBSECONDS(Millisecond, 3)
ADD_SUBSECONDS(Microsecond, 6)
ADD_SUBSECONDS(Nanosecond, 9)
#undef ADD_SUBSECONDS
template <TimeWindowFunctionName type> template <TimeWindowFunctionName type>
struct TimeWindowImpl struct TimeWindowImpl
{ {

View File

@ -0,0 +1,28 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionDateOrDateTimeAddInterval.h>
namespace DB
{
using FunctionSubtractNanoseconds = FunctionDateOrDateTimeAddInterval<SubtractNanosecondsImpl>;
void registerFunctionSubtractNanoseconds(FunctionFactory & factory)
{
factory.registerFunction<FunctionSubtractNanoseconds>();
};
using FunctionSubtractMicroseconds = FunctionDateOrDateTimeAddInterval<SubtractMicrosecondsImpl>;
void registerFunctionSubtractMicroseconds(FunctionFactory & factory)
{
factory.registerFunction<FunctionSubtractMicroseconds>();
};
using FunctionSubtractMilliseconds = FunctionDateOrDateTimeAddInterval<SubtractMillisecondsImpl>;
void registerFunctionSubtractMilliseconds(FunctionFactory & factory)
{
factory.registerFunction<FunctionSubtractMilliseconds>();
};
}

View File

@ -13,7 +13,7 @@ namespace DB
* * DateTime64 value and scale factor (2) * * DateTime64 value and scale factor (2)
* * DateTime64 broken down to components, result of execute is then re-assembled back into DateTime64 value (3) * * DateTime64 broken down to components, result of execute is then re-assembled back into DateTime64 value (3)
* *
* Suitable Transfotm-types are commonly used in Date/DateTime manipulation functions, * Suitable Transform-types are commonly used in Date/DateTime manipulation functions,
* and should implement static (or const) function with following signatures: * and should implement static (or const) function with following signatures:
* 1: * 1:
* R execute(Int64 whole_value, ... ) * R execute(Int64 whole_value, ... )

View File

@ -0,0 +1,28 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionDateOrDateTimeAddInterval.h>
namespace DB
{
using FunctionAddNanoseconds = FunctionDateOrDateTimeAddInterval<AddNanosecondsImpl>;
void registerFunctionAddNanoseconds(FunctionFactory & factory)
{
factory.registerFunction<FunctionAddNanoseconds>();
};
using FunctionAddMicroseconds = FunctionDateOrDateTimeAddInterval<AddMicrosecondsImpl>;
void registerFunctionAddMicroseconds(FunctionFactory & factory)
{
factory.registerFunction<FunctionAddMicroseconds>();
};
using FunctionAddMilliseconds = FunctionDateOrDateTimeAddInterval<AddMillisecondsImpl>;
void registerFunctionAddMilliseconds(FunctionFactory & factory)
{
factory.registerFunction<FunctionAddMilliseconds>();
};
}

View File

@ -7,9 +7,9 @@ namespace DB
{ {
/// An O(1) time and space consistent hash algorithm by Konstantin Oblakov /// An O(1) time and space consistent hash algorithm by Konstantin Oblakov
struct YandexConsistentHashImpl struct KostikConsistentHashImpl
{ {
static constexpr auto name = "yandexConsistentHash"; static constexpr auto name = "kostikConsistentHash";
using HashType = UInt64; using HashType = UInt64;
/// Actually it supports UInt64, but it is efficient only if n <= 32768 /// Actually it supports UInt64, but it is efficient only if n <= 32768
@ -23,12 +23,12 @@ struct YandexConsistentHashImpl
} }
}; };
using FunctionYandexConsistentHash = FunctionConsistentHashImpl<YandexConsistentHashImpl>; using FunctionKostikConsistentHash = FunctionConsistentHashImpl<KostikConsistentHashImpl>;
void registerFunctionYandexConsistentHash(FunctionFactory & factory) void registerFunctionKostikConsistentHash(FunctionFactory & factory)
{ {
factory.registerFunction<FunctionYandexConsistentHash>(); factory.registerFunction<FunctionKostikConsistentHash>();
factory.registerAlias("yandexConsistentHash", "kostikConsistentHash");
} }
} }

View File

@ -2,12 +2,12 @@ namespace DB
{ {
class FunctionFactory; class FunctionFactory;
void registerFunctionYandexConsistentHash(FunctionFactory & factory); void registerFunctionKostikConsistentHash(FunctionFactory & factory);
void registerFunctionJumpConsistentHash(FunctionFactory & factory); void registerFunctionJumpConsistentHash(FunctionFactory & factory);
void registerFunctionsConsistentHashing(FunctionFactory & factory) void registerFunctionsConsistentHashing(FunctionFactory & factory)
{ {
registerFunctionYandexConsistentHash(factory); registerFunctionKostikConsistentHash(factory);
registerFunctionJumpConsistentHash(factory); registerFunctionJumpConsistentHash(factory);
} }

View File

@ -11,6 +11,9 @@ void registerFunctionToDayOfWeek(FunctionFactory &);
void registerFunctionToDayOfYear(FunctionFactory &); void registerFunctionToDayOfYear(FunctionFactory &);
void registerFunctionToHour(FunctionFactory &); void registerFunctionToHour(FunctionFactory &);
void registerFunctionToMinute(FunctionFactory &); void registerFunctionToMinute(FunctionFactory &);
void registerFunctionToStartOfNanosecond(FunctionFactory &);
void registerFunctionToStartOfMicrosecond(FunctionFactory &);
void registerFunctionToStartOfMillisecond(FunctionFactory &);
void registerFunctionToStartOfSecond(FunctionFactory &); void registerFunctionToStartOfSecond(FunctionFactory &);
void registerFunctionToSecond(FunctionFactory &); void registerFunctionToSecond(FunctionFactory &);
void registerFunctionToStartOfDay(FunctionFactory &); void registerFunctionToStartOfDay(FunctionFactory &);
@ -47,6 +50,9 @@ void registerFunctionTimeSlots(FunctionFactory &);
void registerFunctionToYYYYMM(FunctionFactory &); void registerFunctionToYYYYMM(FunctionFactory &);
void registerFunctionToYYYYMMDD(FunctionFactory &); void registerFunctionToYYYYMMDD(FunctionFactory &);
void registerFunctionToYYYYMMDDhhmmss(FunctionFactory &); void registerFunctionToYYYYMMDDhhmmss(FunctionFactory &);
void registerFunctionAddNanoseconds(FunctionFactory &);
void registerFunctionAddMicroseconds(FunctionFactory &);
void registerFunctionAddMilliseconds(FunctionFactory &);
void registerFunctionAddSeconds(FunctionFactory &); void registerFunctionAddSeconds(FunctionFactory &);
void registerFunctionAddMinutes(FunctionFactory &); void registerFunctionAddMinutes(FunctionFactory &);
void registerFunctionAddHours(FunctionFactory &); void registerFunctionAddHours(FunctionFactory &);
@ -55,6 +61,9 @@ void registerFunctionAddWeeks(FunctionFactory &);
void registerFunctionAddMonths(FunctionFactory &); void registerFunctionAddMonths(FunctionFactory &);
void registerFunctionAddQuarters(FunctionFactory &); void registerFunctionAddQuarters(FunctionFactory &);
void registerFunctionAddYears(FunctionFactory &); void registerFunctionAddYears(FunctionFactory &);
void registerFunctionSubtractNanoseconds(FunctionFactory &);
void registerFunctionSubtractMicroseconds(FunctionFactory &);
void registerFunctionSubtractMilliseconds(FunctionFactory &);
void registerFunctionSubtractSeconds(FunctionFactory &); void registerFunctionSubtractSeconds(FunctionFactory &);
void registerFunctionSubtractMinutes(FunctionFactory &); void registerFunctionSubtractMinutes(FunctionFactory &);
void registerFunctionSubtractHours(FunctionFactory &); void registerFunctionSubtractHours(FunctionFactory &);
@ -93,6 +102,9 @@ void registerFunctionsDateTime(FunctionFactory & factory)
registerFunctionToStartOfMonth(factory); registerFunctionToStartOfMonth(factory);
registerFunctionToStartOfQuarter(factory); registerFunctionToStartOfQuarter(factory);
registerFunctionToStartOfYear(factory); registerFunctionToStartOfYear(factory);
registerFunctionToStartOfNanosecond(factory);
registerFunctionToStartOfMicrosecond(factory);
registerFunctionToStartOfMillisecond(factory);
registerFunctionToStartOfSecond(factory); registerFunctionToStartOfSecond(factory);
registerFunctionToStartOfMinute(factory); registerFunctionToStartOfMinute(factory);
registerFunctionToStartOfFiveMinute(factory); registerFunctionToStartOfFiveMinute(factory);
@ -119,6 +131,9 @@ void registerFunctionsDateTime(FunctionFactory & factory)
registerFunctionToYYYYMM(factory); registerFunctionToYYYYMM(factory);
registerFunctionToYYYYMMDD(factory); registerFunctionToYYYYMMDD(factory);
registerFunctionToYYYYMMDDhhmmss(factory); registerFunctionToYYYYMMDDhhmmss(factory);
registerFunctionAddNanoseconds(factory);
registerFunctionAddMicroseconds(factory);
registerFunctionAddMilliseconds(factory);
registerFunctionAddSeconds(factory); registerFunctionAddSeconds(factory);
registerFunctionAddMinutes(factory); registerFunctionAddMinutes(factory);
registerFunctionAddHours(factory); registerFunctionAddHours(factory);
@ -127,6 +142,9 @@ void registerFunctionsDateTime(FunctionFactory & factory)
registerFunctionAddMonths(factory); registerFunctionAddMonths(factory);
registerFunctionAddQuarters(factory); registerFunctionAddQuarters(factory);
registerFunctionAddYears(factory); registerFunctionAddYears(factory);
registerFunctionSubtractNanoseconds(factory);
registerFunctionSubtractMicroseconds(factory);
registerFunctionSubtractMilliseconds(factory);
registerFunctionSubtractSeconds(factory); registerFunctionSubtractSeconds(factory);
registerFunctionSubtractMinutes(factory); registerFunctionSubtractMinutes(factory);
registerFunctionSubtractHours(factory); registerFunctionSubtractHours(factory);

View File

@ -33,184 +33,273 @@ namespace
template <> template <>
struct Transform<IntervalKind::Year> struct Transform<IntervalKind::Year>
{ {
static constexpr auto name = function_name; static UInt16 execute(UInt16 d, Int64 years, const DateLUTImpl & time_zone, Int64)
static UInt16 execute(UInt16 d, UInt64 years, const DateLUTImpl & time_zone)
{ {
return time_zone.toStartOfYearInterval(DayNum(d), years); return time_zone.toStartOfYearInterval(DayNum(d), years);
} }
static UInt16 execute(Int32 d, UInt64 years, const DateLUTImpl & time_zone) static UInt16 execute(Int32 d, Int64 years, const DateLUTImpl & time_zone, Int64)
{ {
return time_zone.toStartOfYearInterval(ExtendedDayNum(d), years); return time_zone.toStartOfYearInterval(ExtendedDayNum(d), years);
} }
static UInt16 execute(UInt32 t, UInt64 years, const DateLUTImpl & time_zone) static UInt16 execute(UInt32 t, Int64 years, const DateLUTImpl & time_zone, Int64)
{ {
return time_zone.toStartOfYearInterval(time_zone.toDayNum(t), years); return time_zone.toStartOfYearInterval(time_zone.toDayNum(t), years);
} }
static UInt16 execute(Int64 t, UInt64 years, const DateLUTImpl & time_zone) static UInt16 execute(Int64 t, Int64 years, const DateLUTImpl & time_zone, Int64 scale_multiplier)
{ {
return time_zone.toStartOfYearInterval(time_zone.toDayNum(t), years); return time_zone.toStartOfYearInterval(time_zone.toDayNum(t / scale_multiplier), years);
} }
}; };
template <> template <>
struct Transform<IntervalKind::Quarter> struct Transform<IntervalKind::Quarter>
{ {
static constexpr auto name = function_name; static UInt16 execute(UInt16 d, Int64 quarters, const DateLUTImpl & time_zone, Int64)
static UInt16 execute(UInt16 d, UInt64 quarters, const DateLUTImpl & time_zone)
{ {
return time_zone.toStartOfQuarterInterval(DayNum(d), quarters); return time_zone.toStartOfQuarterInterval(DayNum(d), quarters);
} }
static UInt16 execute(Int32 d, UInt64 quarters, const DateLUTImpl & time_zone) static UInt16 execute(Int32 d, Int64 quarters, const DateLUTImpl & time_zone, Int64)
{ {
return time_zone.toStartOfQuarterInterval(ExtendedDayNum(d), quarters); return time_zone.toStartOfQuarterInterval(ExtendedDayNum(d), quarters);
} }
static UInt16 execute(UInt32 t, UInt64 quarters, const DateLUTImpl & time_zone) static UInt16 execute(UInt32 t, Int64 quarters, const DateLUTImpl & time_zone, Int64)
{ {
return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t), quarters); return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t), quarters);
} }
static UInt16 execute(Int64 t, UInt64 quarters, const DateLUTImpl & time_zone) static UInt16 execute(Int64 t, Int64 quarters, const DateLUTImpl & time_zone, Int64 scale_multiplier)
{ {
return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t), quarters); return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t / scale_multiplier), quarters);
} }
}; };
template <> template <>
struct Transform<IntervalKind::Month> struct Transform<IntervalKind::Month>
{ {
static constexpr auto name = function_name; static UInt16 execute(UInt16 d, Int64 months, const DateLUTImpl & time_zone, Int64)
static UInt16 execute(UInt16 d, UInt64 months, const DateLUTImpl & time_zone)
{ {
return time_zone.toStartOfMonthInterval(DayNum(d), months); return time_zone.toStartOfMonthInterval(DayNum(d), months);
} }
static UInt16 execute(Int32 d, UInt64 months, const DateLUTImpl & time_zone) static UInt16 execute(Int32 d, Int64 months, const DateLUTImpl & time_zone, Int64)
{ {
return time_zone.toStartOfMonthInterval(ExtendedDayNum(d), months); return time_zone.toStartOfMonthInterval(ExtendedDayNum(d), months);
} }
static UInt16 execute(UInt32 t, UInt64 months, const DateLUTImpl & time_zone) static UInt16 execute(UInt32 t, Int64 months, const DateLUTImpl & time_zone, Int64)
{ {
return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t), months); return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t), months);
} }
static UInt16 execute(Int64 t, UInt64 months, const DateLUTImpl & time_zone) static UInt16 execute(Int64 t, Int64 months, const DateLUTImpl & time_zone, Int64 scale_multiplier)
{ {
return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t), months); return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t / scale_multiplier), months);
} }
}; };
template <> template <>
struct Transform<IntervalKind::Week> struct Transform<IntervalKind::Week>
{ {
static constexpr auto name = function_name; static UInt16 execute(UInt16 d, Int64 weeks, const DateLUTImpl & time_zone, Int64)
static UInt16 execute(UInt16 d, UInt64 weeks, const DateLUTImpl & time_zone)
{ {
return time_zone.toStartOfWeekInterval(DayNum(d), weeks); return time_zone.toStartOfWeekInterval(DayNum(d), weeks);
} }
static UInt16 execute(Int32 d, UInt64 weeks, const DateLUTImpl & time_zone) static UInt16 execute(Int32 d, Int64 weeks, const DateLUTImpl & time_zone, Int64)
{ {
return time_zone.toStartOfWeekInterval(ExtendedDayNum(d), weeks); return time_zone.toStartOfWeekInterval(ExtendedDayNum(d), weeks);
} }
static UInt16 execute(UInt32 t, UInt64 weeks, const DateLUTImpl & time_zone) static UInt16 execute(UInt32 t, Int64 weeks, const DateLUTImpl & time_zone, Int64)
{ {
return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t), weeks); return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t), weeks);
} }
static UInt16 execute(Int64 t, UInt64 weeks, const DateLUTImpl & time_zone) static UInt16 execute(Int64 t, Int64 weeks, const DateLUTImpl & time_zone, Int64 scale_multiplier)
{ {
return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t), weeks); return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t / scale_multiplier), weeks);
} }
}; };
template <> template <>
struct Transform<IntervalKind::Day> struct Transform<IntervalKind::Day>
{ {
static constexpr auto name = function_name; static UInt32 execute(UInt16 d, Int64 days, const DateLUTImpl & time_zone, Int64)
static UInt32 execute(UInt16 d, UInt64 days, const DateLUTImpl & time_zone)
{ {
return time_zone.toStartOfDayInterval(ExtendedDayNum(d), days); return time_zone.toStartOfDayInterval(ExtendedDayNum(d), days);
} }
static UInt32 execute(Int32 d, UInt64 days, const DateLUTImpl & time_zone) static UInt32 execute(Int32 d, Int64 days, const DateLUTImpl & time_zone, Int64)
{ {
return time_zone.toStartOfDayInterval(ExtendedDayNum(d), days); return time_zone.toStartOfDayInterval(ExtendedDayNum(d), days);
} }
static UInt32 execute(UInt32 t, UInt64 days, const DateLUTImpl & time_zone) static UInt32 execute(UInt32 t, Int64 days, const DateLUTImpl & time_zone, Int64)
{ {
return time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days); return time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days);
} }
static UInt32 execute(Int64 t, UInt64 days, const DateLUTImpl & time_zone) static Int64 execute(Int64 t, Int64 days, const DateLUTImpl & time_zone, Int64 scale_multiplier)
{ {
return time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days); return time_zone.toStartOfDayInterval(time_zone.toDayNum(t / scale_multiplier), days);
} }
}; };
template <> template <>
struct Transform<IntervalKind::Hour> struct Transform<IntervalKind::Hour>
{ {
static constexpr auto name = function_name; static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); }
static UInt32 execute(UInt32 t, UInt64 hours, const DateLUTImpl & time_zone) { return time_zone.toStartOfHourInterval(t, hours); } static UInt32 execute(UInt32 t, Int64 hours, const DateLUTImpl & time_zone, Int64)
static UInt32 execute(Int64 t, UInt64 hours, const DateLUTImpl & time_zone) { return time_zone.toStartOfHourInterval(t, hours); } {
return time_zone.toStartOfHourInterval(t, hours);
}
static UInt32 execute(Int64 t, Int64 hours, const DateLUTImpl & time_zone, Int64 scale_multiplier)
{
return time_zone.toStartOfHourInterval(t / scale_multiplier, hours);
}
}; };
template <> template <>
struct Transform<IntervalKind::Minute> struct Transform<IntervalKind::Minute>
{ {
static constexpr auto name = function_name; static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } static UInt32 execute(UInt32 t, Int64 minutes, const DateLUTImpl & time_zone, Int64)
static UInt32 execute(UInt32 t, UInt64 minutes, const DateLUTImpl & time_zone)
{ {
return time_zone.toStartOfMinuteInterval(t, minutes); return time_zone.toStartOfMinuteInterval(t, minutes);
} }
static UInt32 execute(Int64 t, UInt64 minutes, const DateLUTImpl & time_zone) static UInt32 execute(Int64 t, Int64 minutes, const DateLUTImpl & time_zone, Int64 scale_multiplier)
{ {
return time_zone.toStartOfMinuteInterval(t, minutes); return time_zone.toStartOfMinuteInterval(t / scale_multiplier, minutes);
} }
}; };
template <> template <>
struct Transform<IntervalKind::Second> struct Transform<IntervalKind::Second>
{ {
static constexpr auto name = function_name; static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } static UInt32 execute(UInt32 t, Int64 seconds, const DateLUTImpl & time_zone, Int64)
static UInt32 execute(UInt32 t, UInt64 seconds, const DateLUTImpl & time_zone)
{ {
return time_zone.toStartOfSecondInterval(t, seconds); return time_zone.toStartOfSecondInterval(t, seconds);
} }
static Int64 execute(Int64 t, UInt64 seconds, const DateLUTImpl & time_zone) static UInt32 execute(Int64 t, Int64 seconds, const DateLUTImpl & time_zone, Int64 scale_multiplier)
{ {
return time_zone.toStartOfSecondInterval(t, seconds); return time_zone.toStartOfSecondInterval(t / scale_multiplier, seconds);
} }
}; };
template <>
struct Transform<IntervalKind::Millisecond>
{
static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { return dateTimeIsNotSupported(function_name); }
static Int64 execute(Int64 t, Int64 milliseconds, const DateLUTImpl &, Int64 scale_multiplier)
{
if (scale_multiplier < 1000)
{
Int64 t_milliseconds = t * (static_cast<Int64>(1000) / scale_multiplier);
if (likely(t >= 0))
return t_milliseconds / milliseconds * milliseconds;
else
return ((t_milliseconds + 1) / milliseconds - 1) * milliseconds;
}
else if (scale_multiplier > 1000)
{
Int64 scale_diff = scale_multiplier / static_cast<Int64>(1000);
if (likely(t >= 0))
return t / milliseconds / scale_diff * milliseconds;
else
return ((t + 1) / milliseconds / scale_diff - 1) * milliseconds;
}
else
if (likely(t >= 0))
return t / milliseconds * milliseconds;
else
return ((t + 1) / milliseconds - 1) * milliseconds;
}
};
template <>
struct Transform<IntervalKind::Microsecond>
{
static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { return dateTimeIsNotSupported(function_name); }
static Int64 execute(Int64 t, Int64 microseconds, const DateLUTImpl &, Int64 scale_multiplier)
{
if (scale_multiplier < 1000000)
{
Int64 t_microseconds = t * (static_cast<Int64>(1000000) / scale_multiplier);
if (likely(t >= 0))
return t_microseconds / microseconds * microseconds;
else
return ((t_microseconds + 1) / microseconds - 1) * microseconds;
}
else if (scale_multiplier > 1000000)
{
Int64 scale_diff = scale_multiplier / static_cast<Int64>(1000000);
if (likely(t >= 0))
return t / microseconds / scale_diff * microseconds;
else
return ((t + 1) / microseconds / scale_diff - 1) * microseconds;
}
else
if (likely(t >= 0))
return t / microseconds * microseconds;
else
return ((t + 1) / microseconds - 1) * microseconds;
}
};
template <>
struct Transform<IntervalKind::Nanosecond>
{
static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { return dateTimeIsNotSupported(function_name); }
static Int64 execute(Int64 t, Int64 nanoseconds, const DateLUTImpl &, Int64 scale_multiplier)
{
if (scale_multiplier < 1000000000)
{
Int64 t_nanoseconds = t * (static_cast<Int64>(1000000000) / scale_multiplier);
if (likely(t >= 0))
return t_nanoseconds / nanoseconds * nanoseconds;
else
return ((t_nanoseconds + 1) / nanoseconds - 1) * nanoseconds;
}
else
if (likely(t >= 0))
return t / nanoseconds * nanoseconds;
else
return ((t + 1) / nanoseconds - 1) * nanoseconds;
}
};
class FunctionToStartOfInterval : public IFunction class FunctionToStartOfInterval : public IFunction
{ {
@ -240,6 +329,7 @@ public:
const DataTypeInterval * interval_type = nullptr; const DataTypeInterval * interval_type = nullptr;
bool result_type_is_date = false; bool result_type_is_date = false;
bool result_type_is_datetime = false;
auto check_interval_argument = [&] auto check_interval_argument = [&]
{ {
interval_type = checkAndGetDataType<DataTypeInterval>(arguments[1].type.get()); interval_type = checkAndGetDataType<DataTypeInterval>(arguments[1].type.get());
@ -251,6 +341,8 @@ public:
result_type_is_date = (interval_type->getKind() == IntervalKind::Year) result_type_is_date = (interval_type->getKind() == IntervalKind::Year)
|| (interval_type->getKind() == IntervalKind::Quarter) || (interval_type->getKind() == IntervalKind::Month) || (interval_type->getKind() == IntervalKind::Quarter) || (interval_type->getKind() == IntervalKind::Month)
|| (interval_type->getKind() == IntervalKind::Week); || (interval_type->getKind() == IntervalKind::Week);
result_type_is_datetime = (interval_type->getKind() == IntervalKind::Day) || (interval_type->getKind() == IntervalKind::Hour)
|| (interval_type->getKind() == IntervalKind::Minute) || (interval_type->getKind() == IntervalKind::Second);
}; };
auto check_timezone_argument = [&] auto check_timezone_argument = [&]
@ -263,7 +355,7 @@ public:
if (first_argument_is_date && result_type_is_date) if (first_argument_is_date && result_type_is_date)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"The timezone argument of function {} with interval type {} is allowed only when the 1st argument " "The timezone argument of function {} with interval type {} is allowed only when the 1st argument "
"has the type DateTime", "has the type DateTime or DateTime64",
getName(), interval_type->getKind().toString()); getName(), interval_type->getKind().toString());
}; };
@ -288,19 +380,33 @@ public:
if (result_type_is_date) if (result_type_is_date)
return std::make_shared<DataTypeDate>(); return std::make_shared<DataTypeDate>();
else else if (result_type_is_datetime)
return std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); return std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0));
else
{
auto scale = 0;
if (interval_type->getKind() == IntervalKind::Nanosecond)
scale = 9;
else if (interval_type->getKind() == IntervalKind::Microsecond)
scale = 6;
else if (interval_type->getKind() == IntervalKind::Millisecond)
scale = 3;
return std::make_shared<DataTypeDateTime64>(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0));
}
} }
bool useDefaultImplementationForConstants() const override { return true; } bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /* input_rows_count */) const override ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /* input_rows_count */) const override
{ {
const auto & time_column = arguments[0]; const auto & time_column = arguments[0];
const auto & interval_column = arguments[1]; const auto & interval_column = arguments[1];
const auto & time_zone = extractTimeZoneFromFunctionArguments(arguments, 2, 0); const auto & time_zone = extractTimeZoneFromFunctionArguments(arguments, 2, 0);
auto result_column = dispatchForColumns(time_column, interval_column, time_zone); auto result_column = dispatchForColumns(time_column, interval_column, result_type, time_zone);
return result_column; return result_column;
} }
@ -316,33 +422,36 @@ public:
private: private:
ColumnPtr dispatchForColumns( ColumnPtr dispatchForColumns(
const ColumnWithTypeAndName & time_column, const ColumnWithTypeAndName & interval_column, const DateLUTImpl & time_zone) const const ColumnWithTypeAndName & time_column, const ColumnWithTypeAndName & interval_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone) const
{ {
const auto & from_datatype = *time_column.type.get(); const auto & from_datatype = *time_column.type.get();
const auto which_type = WhichDataType(from_datatype); const auto which_type = WhichDataType(from_datatype);
if (which_type.isDateTime64())
{
const auto * time_column_vec = checkAndGetColumn<DataTypeDateTime64::ColumnType>(time_column.column.get());
auto scale = assert_cast<const DataTypeDateTime64 &>(from_datatype).getScale();
if (time_column_vec)
return dispatchForIntervalColumn(assert_cast<const DataTypeDateTime64&>(from_datatype), *time_column_vec, interval_column, result_type, time_zone, scale);
}
if (which_type.isDateTime()) if (which_type.isDateTime())
{ {
const auto * time_column_vec = checkAndGetColumn<ColumnUInt32>(time_column.column.get()); const auto * time_column_vec = checkAndGetColumn<ColumnUInt32>(time_column.column.get());
if (time_column_vec) if (time_column_vec)
return dispatchForIntervalColumn(assert_cast<const DataTypeDateTime&>(from_datatype), *time_column_vec, interval_column, time_zone); return dispatchForIntervalColumn(assert_cast<const DataTypeDateTime&>(from_datatype), *time_column_vec, interval_column, result_type, time_zone);
} }
if (which_type.isDate()) if (which_type.isDate())
{ {
const auto * time_column_vec = checkAndGetColumn<ColumnUInt16>(time_column.column.get()); const auto * time_column_vec = checkAndGetColumn<ColumnUInt16>(time_column.column.get());
if (time_column_vec) if (time_column_vec)
return dispatchForIntervalColumn(assert_cast<const DataTypeDate&>(from_datatype), *time_column_vec, interval_column, time_zone); return dispatchForIntervalColumn(assert_cast<const DataTypeDate&>(from_datatype), *time_column_vec, interval_column, result_type, time_zone);
} }
if (which_type.isDate32()) if (which_type.isDate32())
{ {
const auto * time_column_vec = checkAndGetColumn<ColumnInt32>(time_column.column.get()); const auto * time_column_vec = checkAndGetColumn<ColumnInt32>(time_column.column.get());
if (time_column_vec) if (time_column_vec)
return dispatchForIntervalColumn(assert_cast<const DataTypeDate32&>(from_datatype), *time_column_vec, interval_column, time_zone); return dispatchForIntervalColumn(assert_cast<const DataTypeDate32&>(from_datatype), *time_column_vec, interval_column, result_type, time_zone);
}
if (which_type.isDateTime64())
{
const auto * time_column_vec = checkAndGetColumn<DataTypeDateTime64::ColumnType>(time_column.column.get());
if (time_column_vec)
return dispatchForIntervalColumn(assert_cast<const DataTypeDateTime64&>(from_datatype), *time_column_vec, interval_column, time_zone);
} }
throw Exception( throw Exception(
"Illegal column for first argument of function " + getName() + ". Must contain dates or dates with time", "Illegal column for first argument of function " + getName() + ". Must contain dates or dates with time",
@ -351,7 +460,8 @@ private:
template <typename ColumnType, typename FromDataType> template <typename ColumnType, typename FromDataType>
ColumnPtr dispatchForIntervalColumn( ColumnPtr dispatchForIntervalColumn(
const FromDataType & from, const ColumnType & time_column, const ColumnWithTypeAndName & interval_column, const DateLUTImpl & time_zone) const const FromDataType & from, const ColumnType & time_column, const ColumnWithTypeAndName & interval_column,
const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale = 1) const
{ {
const auto * interval_type = checkAndGetDataType<DataTypeInterval>(interval_column.type.get()); const auto * interval_type = checkAndGetDataType<DataTypeInterval>(interval_column.type.get());
if (!interval_type) if (!interval_type)
@ -368,49 +478,52 @@ private:
switch (interval_type->getKind()) switch (interval_type->getKind())
{ {
case IntervalKind::Nanosecond:
return execute<FromDataType, DataTypeDateTime64, IntervalKind::Nanosecond>(from, time_column, num_units, result_type, time_zone, scale);
case IntervalKind::Microsecond:
return execute<FromDataType, DataTypeDateTime64, IntervalKind::Microsecond>(from, time_column, num_units, result_type, time_zone, scale);
case IntervalKind::Millisecond:
return execute<FromDataType, DataTypeDateTime64, IntervalKind::Millisecond>(from, time_column, num_units, result_type, time_zone, scale);
case IntervalKind::Second: case IntervalKind::Second:
return execute<FromDataType, UInt32, IntervalKind::Second>(from, time_column, num_units, time_zone); return execute<FromDataType, DataTypeDateTime, IntervalKind::Second>(from, time_column, num_units, result_type, time_zone, scale);
case IntervalKind::Minute: case IntervalKind::Minute:
return execute<FromDataType, UInt32, IntervalKind::Minute>(from, time_column, num_units, time_zone); return execute<FromDataType, DataTypeDateTime, IntervalKind::Minute>(from, time_column, num_units, result_type, time_zone, scale);
case IntervalKind::Hour: case IntervalKind::Hour:
return execute<FromDataType, UInt32, IntervalKind::Hour>(from, time_column, num_units, time_zone); return execute<FromDataType, DataTypeDateTime, IntervalKind::Hour>(from, time_column, num_units, result_type, time_zone, scale);
case IntervalKind::Day: case IntervalKind::Day:
return execute<FromDataType, UInt32, IntervalKind::Day>(from, time_column, num_units, time_zone); return execute<FromDataType, DataTypeDateTime, IntervalKind::Day>(from, time_column, num_units, result_type, time_zone, scale);
case IntervalKind::Week: case IntervalKind::Week:
return execute<FromDataType, UInt16, IntervalKind::Week>(from, time_column, num_units, time_zone); return execute<FromDataType, DataTypeDate, IntervalKind::Week>(from, time_column, num_units, result_type, time_zone, scale);
case IntervalKind::Month: case IntervalKind::Month:
return execute<FromDataType, UInt16, IntervalKind::Month>(from, time_column, num_units, time_zone); return execute<FromDataType, DataTypeDate, IntervalKind::Month>(from, time_column, num_units, result_type, time_zone, scale);
case IntervalKind::Quarter: case IntervalKind::Quarter:
return execute<FromDataType, UInt16, IntervalKind::Quarter>(from, time_column, num_units, time_zone); return execute<FromDataType, DataTypeDate, IntervalKind::Quarter>(from, time_column, num_units, result_type, time_zone, scale);
case IntervalKind::Year: case IntervalKind::Year:
return execute<FromDataType, UInt16, IntervalKind::Year>(from, time_column, num_units, time_zone); return execute<FromDataType, DataTypeDate, IntervalKind::Year>(from, time_column, num_units, result_type, time_zone, scale);
} }
__builtin_unreachable(); __builtin_unreachable();
} }
template <typename FromDataType, typename ToDataType, IntervalKind::Kind unit, typename ColumnType>
template <typename FromDataType, typename ToType, IntervalKind::Kind unit, typename ColumnType> ColumnPtr execute(const FromDataType &, const ColumnType & time_column_type, Int64 num_units, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale) const
ColumnPtr execute(const FromDataType & from_datatype, const ColumnType & time_column, UInt64 num_units, const DateLUTImpl & time_zone) const
{ {
const auto & time_data = time_column.getData(); using ToColumnType = typename ToDataType::ColumnType;
size_t size = time_column.size();
auto result = ColumnVector<ToType>::create(); const auto & time_data = time_column_type.getData();
auto & result_data = result->getData(); size_t size = time_data.size();
auto result_col = result_type->createColumn();
auto *col_to = assert_cast<ToColumnType *>(result_col.get());
auto & result_data = col_to->getData();
result_data.resize(size); result_data.resize(size);
if constexpr (std::is_same_v<FromDataType, DataTypeDateTime64>) Int64 scale_multiplier = DecimalUtils::scaleMultiplier<DateTime64>(scale);
{
const auto transform = TransformDateTime64<Transform<unit>>{from_datatype.getScale()}; for (size_t i = 0; i != size; ++i)
for (size_t i = 0; i != size; ++i) result_data[i] = Transform<unit>::execute(time_data[i], num_units, time_zone, scale_multiplier);
result_data[i] = transform.execute(time_data[i], num_units, time_zone);
} return result_col;
else
{
for (size_t i = 0; i != size; ++i)
result_data[i] = Transform<unit>::execute(time_data[i], num_units, time_zone);
}
return result;
} }
}; };

View File

@ -0,0 +1,30 @@
#include <Functions/FunctionFactory.h>
#include <Functions/DateTimeTransforms.h>
#include <Functions/FunctionDateOrDateTimeToSomething.h>
namespace DB
{
using FunctionToStartOfMillisecond = FunctionDateOrDateTimeToSomething<DataTypeDateTime64, ToStartOfMillisecondImpl>;
void registerFunctionToStartOfMillisecond(FunctionFactory & factory)
{
factory.registerFunction<FunctionToStartOfMillisecond>();
}
using FunctionToStartOfMicrosecond = FunctionDateOrDateTimeToSomething<DataTypeDateTime64, ToStartOfMicrosecondImpl>;
void registerFunctionToStartOfMicrosecond(FunctionFactory & factory)
{
factory.registerFunction<FunctionToStartOfMicrosecond>();
}
using FunctionToStartOfNanosecond = FunctionDateOrDateTimeToSomething<DataTypeDateTime64, ToStartOfNanosecondImpl>;
void registerFunctionToStartOfNanosecond(FunctionFactory & factory)
{
factory.registerFunction<FunctionToStartOfNanosecond>();
}
}

34
src/IO/IOThreadPool.cpp Normal file
View File

@ -0,0 +1,34 @@
#include <IO/IOThreadPool.h>
#include "Core/Field.h"
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
std::unique_ptr<ThreadPool> IOThreadPool::instance;
void IOThreadPool::initialize(size_t max_threads, size_t max_free_threads, size_t queue_size)
{
if (instance)
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "The IO thread pool is initialized twice");
}
instance = std::make_unique<ThreadPool>(max_threads, max_free_threads, queue_size, false /*shutdown_on_exception*/);
}
ThreadPool & IOThreadPool::get()
{
if (!instance)
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "The IO thread pool is not initialized");
}
return *instance;
}
}

20
src/IO/IOThreadPool.h Normal file
View File

@ -0,0 +1,20 @@
#pragma once
#include <Common/ThreadPool.h>
namespace DB
{
/*
* ThreadPool used for the IO.
*/
class IOThreadPool
{
static std::unique_ptr<ThreadPool> instance;
public:
static void initialize(size_t max_threads, size_t max_free_threads, size_t queue_size);
static ThreadPool & get();
};
}

View File

@ -0,0 +1,290 @@
#include <IO/ParallelReadBuffer.h>
#include <base/logger_useful.h>
#include <Poco/Logger.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int CANNOT_SEEK_THROUGH_FILE;
extern const int SEEK_POSITION_OUT_OF_BOUND;
}
ParallelReadBuffer::ParallelReadBuffer(
std::unique_ptr<ReadBufferFactory> reader_factory_,
ThreadPool * pool_,
size_t max_working_readers_,
WorkerSetup worker_setup_,
WorkerCleanup worker_cleanup_)
: SeekableReadBufferWithSize(nullptr, 0)
, pool(pool_)
, max_working_readers(max_working_readers_)
, reader_factory(std::move(reader_factory_))
, worker_setup(std::move(worker_setup_))
, worker_cleanup(std::move(worker_cleanup_))
{
std::unique_lock<std::mutex> lock{mutex};
addReaders(lock);
}
bool ParallelReadBuffer::addReaderToPool(std::unique_lock<std::mutex> & /*buffer_lock*/)
{
auto reader = reader_factory->getReader();
if (!reader)
{
return false;
}
auto worker = read_workers.emplace_back(std::make_shared<ReadWorker>(std::move(reader)));
pool->scheduleOrThrow(
[&, this, worker = std::move(worker)]() mutable
{
ThreadStatus thread_status;
{
std::lock_guard lock{mutex};
++active_working_reader;
}
SCOPE_EXIT({
worker_cleanup(thread_status);
std::lock_guard lock{mutex};
--active_working_reader;
if (active_working_reader == 0)
{
readers_done.notify_all();
}
});
worker_setup(thread_status);
readerThreadFunction(std::move(worker));
});
return true;
}
void ParallelReadBuffer::addReaders(std::unique_lock<std::mutex> & buffer_lock)
{
while (read_workers.size() < max_working_readers && addReaderToPool(buffer_lock))
;
}
off_t ParallelReadBuffer::seek(off_t offset, int whence)
{
if (whence != SEEK_SET)
throw Exception("Only SEEK_SET mode is allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
if (offset < 0)
throw Exception("Seek position is out of bounds. Offset: " + std::to_string(offset), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND);
if (!working_buffer.empty() && static_cast<size_t>(offset) >= current_position - working_buffer.size() && offset < current_position)
{
pos = working_buffer.end() - (current_position - offset);
assert(pos >= working_buffer.begin());
assert(pos <= working_buffer.end());
return offset;
}
std::unique_lock lock{mutex};
const auto offset_is_in_range
= [&](const auto & range) { return static_cast<size_t>(offset) >= range.left && static_cast<size_t>(offset) <= *range.right; };
while (!read_workers.empty() && (offset < current_position || !offset_is_in_range(read_workers.front()->range)))
{
read_workers.front()->cancel = true;
read_workers.pop_front();
}
if (!read_workers.empty())
{
auto & front_worker = read_workers.front();
auto & segments = front_worker->segments;
current_position = front_worker->range.left;
while (true)
{
next_condvar.wait(lock, [&] { return emergency_stop || !segments.empty(); });
if (emergency_stop)
handleEmergencyStop();
auto next_segment = front_worker->nextSegment();
if (static_cast<size_t>(offset) < current_position + next_segment.size())
{
current_segment = std::move(next_segment);
working_buffer = internal_buffer = Buffer(current_segment.data(), current_segment.data() + current_segment.size());
current_position += current_segment.size();
pos = working_buffer.end() - (current_position - offset);
addReaders(lock);
return offset;
}
current_position += next_segment.size();
}
}
lock.unlock();
finishAndWait();
reader_factory->seek(offset, whence);
all_completed = false;
read_workers.clear();
current_position = offset;
resetWorkingBuffer();
emergency_stop = false;
lock.lock();
addReaders(lock);
return offset;
}
std::optional<size_t> ParallelReadBuffer::getTotalSize()
{
std::lock_guard lock{mutex};
return reader_factory->getTotalSize();
}
off_t ParallelReadBuffer::getPosition()
{
return current_position - available();
}
bool ParallelReadBuffer::currentWorkerReady() const
{
assert(!read_workers.empty());
return read_workers.front()->finished || !read_workers.front()->segments.empty();
}
bool ParallelReadBuffer::currentWorkerCompleted() const
{
assert(!read_workers.empty());
return read_workers.front()->finished && read_workers.front()->segments.empty();
}
void ParallelReadBuffer::handleEmergencyStop()
{
// this can only be called from the main thread when there is an exception
assert(background_exception);
if (background_exception)
std::rethrow_exception(background_exception);
}
bool ParallelReadBuffer::nextImpl()
{
if (all_completed)
return false;
while (true)
{
std::unique_lock lock(mutex);
next_condvar.wait(
lock,
[this]()
{
/// Check if no more readers left or current reader can be processed
return emergency_stop || currentWorkerReady();
});
bool worker_removed = false;
/// Remove completed units
while (!read_workers.empty() && currentWorkerCompleted() && !emergency_stop)
{
read_workers.pop_front();
worker_removed = true;
}
if (emergency_stop)
handleEmergencyStop();
if (worker_removed)
addReaders(lock);
/// All readers processed, stop
if (read_workers.empty())
{
all_completed = true;
return false;
}
auto & front_worker = read_workers.front();
/// Read data from first segment of the first reader
if (!front_worker->segments.empty())
{
current_segment = front_worker->nextSegment();
if (currentWorkerCompleted())
{
read_workers.pop_front();
all_completed = !addReaderToPool(lock) && read_workers.empty();
}
break;
}
}
working_buffer = internal_buffer = Buffer(current_segment.data(), current_segment.data() + current_segment.size());
current_position += working_buffer.size();
return true;
}
void ParallelReadBuffer::readerThreadFunction(ReadWorkerPtr read_worker)
{
try
{
while (!emergency_stop && !read_worker->cancel)
{
if (!read_worker->reader->next())
throw Exception("Failed to read all the data from the reader", ErrorCodes::LOGICAL_ERROR);
if (emergency_stop || read_worker->cancel)
break;
Buffer buffer = read_worker->reader->buffer();
size_t bytes_to_copy = std::min(buffer.size(), read_worker->bytes_left);
Segment new_segment(bytes_to_copy, &arena);
memcpy(new_segment.data(), buffer.begin(), bytes_to_copy);
read_worker->reader->ignore(bytes_to_copy);
read_worker->bytes_left -= bytes_to_copy;
{
/// New data ready to be read
std::lock_guard lock(mutex);
read_worker->segments.emplace_back(std::move(new_segment));
read_worker->finished = read_worker->bytes_left == 0;
next_condvar.notify_all();
}
if (read_worker->finished)
{
break;
}
}
}
catch (...)
{
onBackgroundException();
}
}
void ParallelReadBuffer::onBackgroundException()
{
std::lock_guard lock(mutex);
if (!background_exception)
{
background_exception = std::current_exception();
}
emergency_stop = true;
next_condvar.notify_all();
}
void ParallelReadBuffer::finishAndWait()
{
emergency_stop = true;
std::unique_lock lock{mutex};
readers_done.wait(lock, [&] { return active_working_reader == 0; });
}
}

174
src/IO/ParallelReadBuffer.h Normal file
View File

@ -0,0 +1,174 @@
#pragma once
#include <IO/BufferWithOwnMemory.h>
#include <IO/ReadBuffer.h>
#include <IO/SeekableReadBuffer.h>
#include <Common/ArenaWithFreeLists.h>
#include <Common/ThreadPool.h>
namespace DB
{
/**
* Reads from multiple ReadBuffers in parallel.
* Preserves order of readers obtained from ReadBufferFactory.
*
* It consumes multiple readers and yields data from them in order as it passed.
* Each working reader save segments of data to internal queue.
*
* ParallelReadBuffer in nextImpl method take first available segment from first reader in deque and fed it to user.
* When first reader finish reading, they will be removed from worker deque and data from next reader consumed.
*
* Number of working readers limited by max_working_readers.
*/
class ParallelReadBuffer : public SeekableReadBufferWithSize
{
private:
/// Blocks until data occurred in the first reader or this reader indicate finishing
/// Finished readers removed from queue and data from next readers processed
bool nextImpl() override;
class Segment : private boost::noncopyable
{
public:
Segment(size_t size_, SynchronizedArenaWithFreeLists * arena_) : arena(arena_), m_data(arena->alloc(size_)), m_size(size_) { }
Segment() = default;
Segment(Segment && other) noexcept : arena(other.arena)
{
std::swap(m_data, other.m_data);
std::swap(m_size, other.m_size);
}
Segment & operator=(Segment && other) noexcept
{
arena = other.arena;
std::swap(m_data, other.m_data);
std::swap(m_size, other.m_size);
return *this;
}
~Segment()
{
if (m_data)
{
arena->free(m_data, m_size);
}
}
auto data() const noexcept { return m_data; }
auto size() const noexcept { return m_size; }
private:
SynchronizedArenaWithFreeLists * arena{nullptr};
char * m_data{nullptr};
size_t m_size{0};
};
public:
class ReadBufferFactory
{
public:
virtual SeekableReadBufferPtr getReader() = 0;
virtual ~ReadBufferFactory() = default;
virtual off_t seek(off_t off, int whence) = 0;
virtual std::optional<size_t> getTotalSize() = 0;
};
using WorkerSetup = std::function<void(ThreadStatus &)>;
using WorkerCleanup = std::function<void(ThreadStatus &)>;
explicit ParallelReadBuffer(
std::unique_ptr<ReadBufferFactory> reader_factory_,
ThreadPool * pool,
size_t max_working_readers,
WorkerSetup worker_setup = {},
WorkerCleanup worker_cleanup = {});
~ParallelReadBuffer() override { finishAndWait(); }
off_t seek(off_t off, int whence) override;
std::optional<size_t> getTotalSize() override;
off_t getPosition() override;
private:
/// Reader in progress with a list of read segments
struct ReadWorker
{
explicit ReadWorker(SeekableReadBufferPtr reader_) : reader(std::move(reader_)), range(reader->getRemainingReadRange())
{
assert(range.right);
bytes_left = *range.right - range.left + 1;
}
Segment nextSegment()
{
assert(!segments.empty());
auto next_segment = std::move(segments.front());
segments.pop_front();
range.left += next_segment.size();
return next_segment;
}
SeekableReadBufferPtr reader;
std::deque<Segment> segments;
bool finished{false};
SeekableReadBuffer::Range range;
size_t bytes_left{0};
std::atomic_bool cancel{false};
};
using ReadWorkerPtr = std::shared_ptr<ReadWorker>;
/// First worker in deque have new data or processed all available amount
bool currentWorkerReady() const;
/// First worker in deque processed and flushed all data
bool currentWorkerCompleted() const;
void handleEmergencyStop();
void addReaders(std::unique_lock<std::mutex> & buffer_lock);
bool addReaderToPool(std::unique_lock<std::mutex> & buffer_lock);
/// Process read_worker, read data and save into internal segments queue
void readerThreadFunction(ReadWorkerPtr read_worker);
void onBackgroundException();
void finishAndWait();
SynchronizedArenaWithFreeLists arena;
Segment current_segment;
ThreadPool * pool;
size_t max_working_readers;
size_t active_working_reader{0};
// Triggered when all reader workers are done
std::condition_variable readers_done;
std::unique_ptr<ReadBufferFactory> reader_factory;
WorkerSetup worker_setup;
WorkerCleanup worker_cleanup;
/**
* FIFO queue of readers.
* Each worker contains reader itself and downloaded segments.
* When reader read all available data it will be removed from
* deque and data from next reader will be consumed to user.
*/
std::deque<ReadWorkerPtr> read_workers;
std::mutex mutex;
/// Triggered when new data available
std::condition_variable next_condvar;
std::exception_ptr background_exception = nullptr;
std::atomic_bool emergency_stop{false};
off_t current_position{0};
bool all_completed{false};
};
}

View File

@ -1,32 +1,33 @@
#pragma once #pragma once
#include <functional> #include <functional>
#include <base/types.h>
#include <base/sleep.h>
#include <IO/ConnectionTimeouts.h> #include <IO/ConnectionTimeouts.h>
#include <IO/HTTPCommon.h> #include <IO/HTTPCommon.h>
#include <IO/ParallelReadBuffer.h>
#include <IO/ReadBuffer.h> #include <IO/ReadBuffer.h>
#include <IO/ReadBufferFromIStream.h> #include <IO/ReadBufferFromIStream.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <IO/ReadSettings.h> #include <IO/ReadSettings.h>
#include <base/logger_useful.h>
#include <base/sleep.h>
#include <base/types.h>
#include <Poco/Any.h> #include <Poco/Any.h>
#include <Poco/Net/HTTPBasicCredentials.h> #include <Poco/Net/HTTPBasicCredentials.h>
#include <Poco/Net/HTTPClientSession.h> #include <Poco/Net/HTTPClientSession.h>
#include <Poco/Net/HTTPRequest.h> #include <Poco/Net/HTTPRequest.h>
#include <Poco/Net/HTTPResponse.h> #include <Poco/Net/HTTPResponse.h>
#include <Poco/URI.h> #include <Poco/URI.h>
#include <Poco/URIStreamFactory.h>
#include <Poco/Version.h> #include <Poco/Version.h>
#include <Common/DNSResolver.h> #include <Common/DNSResolver.h>
#include <Common/RemoteHostFilter.h> #include <Common/RemoteHostFilter.h>
#include <Common/config.h> #include <Common/config.h>
#include <Common/config_version.h> #include <Common/config_version.h>
#include <base/logger_useful.h>
#include <Poco/URIStreamFactory.h>
namespace ProfileEvents namespace ProfileEvents
{ {
extern const Event ReadBufferSeekCancelConnection; extern const Event ReadBufferSeekCancelConnection;
} }
namespace DB namespace DB
@ -48,7 +49,7 @@ class UpdatableSessionBase
{ {
protected: protected:
SessionPtr session; SessionPtr session;
UInt64 redirects { 0 }; UInt64 redirects{0};
Poco::URI initial_uri; Poco::URI initial_uri;
ConnectionTimeouts timeouts; ConnectionTimeouts timeouts;
UInt64 max_redirects; UInt64 max_redirects;
@ -56,19 +57,12 @@ protected:
public: public:
virtual void buildNewSession(const Poco::URI & uri) = 0; virtual void buildNewSession(const Poco::URI & uri) = 0;
explicit UpdatableSessionBase(const Poco::URI uri, explicit UpdatableSessionBase(const Poco::URI uri, const ConnectionTimeouts & timeouts_, UInt64 max_redirects_)
const ConnectionTimeouts & timeouts_, : initial_uri{uri}, timeouts{timeouts_}, max_redirects{max_redirects_}
UInt64 max_redirects_)
: initial_uri { uri }
, timeouts { timeouts_ }
, max_redirects { max_redirects_ }
{ {
} }
SessionPtr getSession() SessionPtr getSession() { return session; }
{
return session;
}
void updateSession(const Poco::URI & uri) void updateSession(const Poco::URI & uri)
{ {
@ -99,7 +93,7 @@ namespace detail
/// HTTP range, including right bound [begin, end]. /// HTTP range, including right bound [begin, end].
struct Range struct Range
{ {
size_t begin = 0; std::optional<size_t> begin;
std::optional<size_t> end; std::optional<size_t> end;
}; };
@ -144,10 +138,9 @@ namespace detail
return read_range.begin || read_range.end || retry_with_range_header; return read_range.begin || read_range.end || retry_with_range_header;
} }
size_t getOffset() const size_t getRangeBegin() const { return read_range.begin.value_or(0); }
{
return read_range.begin + offset_from_begin_pos; size_t getOffset() const { return getRangeBegin() + offset_from_begin_pos; }
}
std::istream * callImpl(Poco::URI uri_, Poco::Net::HTTPResponse & response, const std::string & method_) std::istream * callImpl(Poco::URI uri_, Poco::Net::HTTPResponse & response, const std::string & method_)
{ {
@ -161,7 +154,7 @@ namespace detail
if (out_stream_callback) if (out_stream_callback)
request.setChunkedTransferEncoding(true); request.setChunkedTransferEncoding(true);
for (auto & http_header_entry: http_header_entries) for (auto & http_header_entry : http_header_entries)
request.set(std::get<0>(http_header_entry), std::get<1>(http_header_entry)); request.set(std::get<0>(http_header_entry), std::get<1>(http_header_entry));
if (withPartialContent()) if (withPartialContent())
@ -207,26 +200,14 @@ namespace detail
std::optional<size_t> getTotalSize() override std::optional<size_t> getTotalSize() override
{ {
if (read_range.end) if (read_range.end)
return *read_range.end - read_range.begin; return *read_range.end - getRangeBegin();
Poco::Net::HTTPResponse response; Poco::Net::HTTPResponse response;
for (size_t i = 0; i < 10; ++i) for (size_t i = 0; i < 10; ++i)
{ {
try try
{ {
call(response, Poco::Net::HTTPRequest::HTTP_HEAD); callWithRedirects(response, Poco::Net::HTTPRequest::HTTP_HEAD);
while (isRedirect(response.getStatus()))
{
Poco::URI uri_redirect(response.get("Location"));
if (remote_host_filter)
remote_host_filter->checkURL(uri_redirect);
session->updateSession(uri_redirect);
istr = callImpl(uri_redirect, response, method);
}
break; break;
} }
catch (const Poco::Exception & e) catch (const Poco::Exception & e)
@ -236,7 +217,7 @@ namespace detail
} }
if (response.hasContentLength()) if (response.hasContentLength())
read_range.end = read_range.begin + response.getContentLength(); read_range.end = getRangeBegin() + response.getContentLength();
return read_range.end; return read_range.end;
} }
@ -252,6 +233,21 @@ namespace detail
InitializeError initialization_error = InitializeError::NONE; InitializeError initialization_error = InitializeError::NONE;
private:
void setupExternalBuffer()
{
/**
* use_external_buffer -- means we read into the buffer which
* was passed to us from somewhere else. We do not check whether
* previously returned buffer was read or not (no hasPendingData() check is needed),
* because this branch means we are prefetching data,
* each nextImpl() call we can fill a different buffer.
*/
impl->set(internal_buffer.begin(), internal_buffer.size());
assert(working_buffer.begin() != nullptr);
assert(!internal_buffer.empty());
}
public: public:
using NextCallback = std::function<void(size_t)>; using NextCallback = std::function<void(size_t)>;
using OutStreamCallback = std::function<void(std::ostream &)>; using OutStreamCallback = std::function<void(std::ostream &)>;
@ -276,7 +272,7 @@ namespace detail
, session {session_} , session {session_}
, out_stream_callback {out_stream_callback_} , out_stream_callback {out_stream_callback_}
, credentials {credentials_} , credentials {credentials_}
, http_header_entries {http_header_entries_} , http_header_entries {std::move(http_header_entries_)}
, remote_host_filter {remote_host_filter_} , remote_host_filter {remote_host_filter_}
, buffer_size {buffer_size_} , buffer_size {buffer_size_}
, use_external_buffer {use_external_buffer_} , use_external_buffer {use_external_buffer_}
@ -287,18 +283,21 @@ namespace detail
{ {
if (settings.http_max_tries <= 0 || settings.http_retry_initial_backoff_ms <= 0 if (settings.http_max_tries <= 0 || settings.http_retry_initial_backoff_ms <= 0
|| settings.http_retry_initial_backoff_ms >= settings.http_retry_max_backoff_ms) || settings.http_retry_initial_backoff_ms >= settings.http_retry_max_backoff_ms)
throw Exception(ErrorCodes::BAD_ARGUMENTS, throw Exception(
"Invalid setting for http backoff, " ErrorCodes::BAD_ARGUMENTS,
"must be http_max_tries >= 1 (current is {}) and " "Invalid setting for http backoff, "
"0 < http_retry_initial_backoff_ms < settings.http_retry_max_backoff_ms (now 0 < {} < {})", "must be http_max_tries >= 1 (current is {}) and "
settings.http_max_tries, settings.http_retry_initial_backoff_ms, settings.http_retry_max_backoff_ms); "0 < http_retry_initial_backoff_ms < settings.http_retry_max_backoff_ms (now 0 < {} < {})",
settings.http_max_tries,
settings.http_retry_initial_backoff_ms,
settings.http_retry_max_backoff_ms);
// Configure User-Agent if it not already set. // Configure User-Agent if it not already set.
const std::string user_agent = "User-Agent"; const std::string user_agent = "User-Agent";
auto iter = std::find_if(http_header_entries.begin(), http_header_entries.end(), [&user_agent](const HTTPHeaderEntry & entry) auto iter = std::find_if(
{ http_header_entries.begin(),
return std::get<0>(entry) == user_agent; http_header_entries.end(),
}); [&user_agent](const HTTPHeaderEntry & entry) { return std::get<0>(entry) == user_agent; });
if (iter == http_header_entries.end()) if (iter == http_header_entries.end())
{ {
@ -313,7 +312,36 @@ namespace detail
} }
} }
void call(Poco::Net::HTTPResponse & response, const String & method_) static bool isRetriableError(const Poco::Net::HTTPResponse::HTTPStatus http_status) noexcept
{
constexpr std::array non_retriable_errors{
Poco::Net::HTTPResponse::HTTPStatus::HTTP_BAD_REQUEST,
Poco::Net::HTTPResponse::HTTPStatus::HTTP_UNAUTHORIZED,
Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND,
Poco::Net::HTTPResponse::HTTPStatus::HTTP_FORBIDDEN,
Poco::Net::HTTPResponse::HTTPStatus::HTTP_METHOD_NOT_ALLOWED};
return std::all_of(
non_retriable_errors.begin(), non_retriable_errors.end(), [&](const auto status) { return http_status != status; });
}
void callWithRedirects(Poco::Net::HTTPResponse & response, const String & method_, bool throw_on_all_errors = false)
{
call(response, method_, throw_on_all_errors);
while (isRedirect(response.getStatus()))
{
Poco::URI uri_redirect(response.get("Location"));
if (remote_host_filter)
remote_host_filter->checkURL(uri_redirect);
session->updateSession(uri_redirect);
istr = callImpl(uri_redirect, response, method);
}
}
void call(Poco::Net::HTTPResponse & response, const String & method_, bool throw_on_all_errors = false)
{ {
try try
{ {
@ -321,18 +349,18 @@ namespace detail
} }
catch (...) catch (...)
{ {
if (throw_on_all_errors)
{
throw;
}
auto http_status = response.getStatus(); auto http_status = response.getStatus();
if (http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND if (http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND && http_skip_not_found_url)
&& http_skip_not_found_url)
{ {
initialization_error = InitializeError::SKIP_NOT_FOUND_URL; initialization_error = InitializeError::SKIP_NOT_FOUND_URL;
} }
else if (http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_BAD_REQUEST else if (!isRetriableError(http_status))
|| http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_UNAUTHORIZED
|| http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND
|| http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_FORBIDDEN
|| http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_METHOD_NOT_ALLOWED)
{ {
initialization_error = InitializeError::NON_RETRIABLE_ERROR; initialization_error = InitializeError::NON_RETRIABLE_ERROR;
exception = std::current_exception(); exception = std::current_exception();
@ -372,12 +400,14 @@ namespace detail
if (withPartialContent() && response.getStatus() != Poco::Net::HTTPResponse::HTTPStatus::HTTP_PARTIAL_CONTENT) if (withPartialContent() && response.getStatus() != Poco::Net::HTTPResponse::HTTPStatus::HTTP_PARTIAL_CONTENT)
{ {
/// Having `200 OK` instead of `206 Partial Content` is acceptable in case we retried with range.begin == 0. /// Having `200 OK` instead of `206 Partial Content` is acceptable in case we retried with range.begin == 0.
if (read_range.begin) if (read_range.begin && *read_range.begin != 0)
{ {
if (!exception) if (!exception)
exception = std::make_exception_ptr( exception = std::make_exception_ptr(Exception(
Exception(ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE, ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE,
"Cannot read with range: [{}, {}]", read_range.begin, read_range.end ? *read_range.end : '-')); "Cannot read with range: [{}, {}]",
*read_range.begin,
read_range.end ? *read_range.end : '-'));
initialization_error = InitializeError::NON_RETRIABLE_ERROR; initialization_error = InitializeError::NON_RETRIABLE_ERROR;
return; return;
@ -386,12 +416,12 @@ namespace detail
{ {
/// We could have range.begin == 0 and range.end != 0 in case of DiskWeb and failing to read with partial content /// We could have range.begin == 0 and range.end != 0 in case of DiskWeb and failing to read with partial content
/// will affect only performance, so a warning is enough. /// will affect only performance, so a warning is enough.
LOG_WARNING(log, "Unable to read with range header: [{}, {}]", read_range.begin, *read_range.end); LOG_WARNING(log, "Unable to read with range header: [{}, {}]", getRangeBegin(), *read_range.end);
} }
} }
if (!offset_from_begin_pos && !read_range.end && response.hasContentLength()) if (!offset_from_begin_pos && !read_range.end && response.hasContentLength())
read_range.end = read_range.begin + response.getContentLength(); read_range.end = getRangeBegin() + response.getContentLength();
try try
{ {
@ -399,12 +429,7 @@ namespace detail
if (use_external_buffer) if (use_external_buffer)
{ {
/** setupExternalBuffer();
* See comment 30 lines below.
*/
impl->set(internal_buffer.begin(), internal_buffer.size());
assert(working_buffer.begin() != nullptr);
assert(!internal_buffer.empty());
} }
} }
catch (const Poco::Exception & e) catch (const Poco::Exception & e)
@ -426,23 +451,17 @@ namespace detail
if (next_callback) if (next_callback)
next_callback(count()); next_callback(count());
if (read_range.end && getOffset() == read_range.end.value()) if (read_range.end && getOffset() > read_range.end.value())
{
assert(getOffset() == read_range.end.value() + 1);
return false; return false;
}
if (impl) if (impl)
{ {
if (use_external_buffer) if (use_external_buffer)
{ {
/** setupExternalBuffer();
* use_external_buffer -- means we read into the buffer which
* was passed to us from somewhere else. We do not check whether
* previously returned buffer was read or not (no hasPendingData() check is needed),
* because this branch means we are prefetching data,
* each nextImpl() call we can fill a different buffer.
*/
impl->set(internal_buffer.begin(), internal_buffer.size());
assert(working_buffer.begin() != nullptr);
assert(!internal_buffer.empty());
} }
else else
{ {
@ -477,10 +496,7 @@ namespace detail
if (use_external_buffer) if (use_external_buffer)
{ {
/// See comment 40 lines above. setupExternalBuffer();
impl->set(internal_buffer.begin(), internal_buffer.size());
assert(working_buffer.begin() != nullptr);
assert(!internal_buffer.empty());
} }
} }
@ -498,13 +514,18 @@ namespace detail
if (!can_retry_request) if (!can_retry_request)
throw; throw;
LOG_ERROR(log, LOG_ERROR(
"HTTP request to `{}` failed at try {}/{} with bytes read: {}/{}. " log,
"Error: {}. (Current backoff wait is {}/{} ms)", "HTTP request to `{}` failed at try {}/{} with bytes read: {}/{}. "
uri.toString(), i + 1, settings.http_max_tries, "Error: {}. (Current backoff wait is {}/{} ms)",
getOffset(), read_range.end ? toString(*read_range.end) : "unknown", uri.toString(),
e.displayText(), i + 1,
milliseconds_to_wait, settings.http_retry_max_backoff_ms); settings.http_max_tries,
getOffset(),
read_range.end ? toString(*read_range.end) : "unknown",
e.displayText(),
milliseconds_to_wait,
settings.http_retry_max_backoff_ms);
retry_with_range_header = true; retry_with_range_header = true;
exception = std::current_exception(); exception = std::current_exception();
@ -529,10 +550,7 @@ namespace detail
return true; return true;
} }
off_t getPosition() override off_t getPosition() override { return getOffset() - available(); }
{
return getOffset() - available();
}
off_t seek(off_t offset_, int whence) override off_t seek(off_t offset_, int whence) override
{ {
@ -540,12 +558,11 @@ namespace detail
throw Exception("Only SEEK_SET mode is allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE); throw Exception("Only SEEK_SET mode is allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
if (offset_ < 0) if (offset_ < 0)
throw Exception("Seek position is out of bounds. Offset: " + std::to_string(offset_), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); throw Exception(
"Seek position is out of bounds. Offset: " + std::to_string(offset_), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND);
off_t current_offset = getOffset(); off_t current_offset = getOffset();
if (!working_buffer.empty() if (!working_buffer.empty() && size_t(offset_) >= current_offset - working_buffer.size() && offset_ < current_offset)
&& size_t(offset_) >= current_offset - working_buffer.size()
&& offset_ < current_offset)
{ {
pos = working_buffer.end() - (current_offset - offset_); pos = working_buffer.end() - (current_offset - offset_);
assert(pos >= working_buffer.begin()); assert(pos >= working_buffer.begin());
@ -567,7 +584,6 @@ namespace detail
if (impl) if (impl)
{ {
ProfileEvents::increment(ProfileEvents::ReadBufferSeekCancelConnection); ProfileEvents::increment(ProfileEvents::ReadBufferSeekCancelConnection);
impl.reset(); impl.reset();
} }
@ -580,6 +596,8 @@ namespace detail
return offset_; return offset_;
} }
SeekableReadBuffer::Range getRemainingReadRange() const override { return {getOffset(), read_range.end}; }
std::string getResponseCookie(const std::string & name, const std::string & def) const std::string getResponseCookie(const std::string & name, const std::string & def) const
{ {
for (const auto & cookie : cookies) for (const auto & cookie : cookies)
@ -599,10 +617,7 @@ namespace detail
next_callback(count()); next_callback(count());
} }
const std::string & getCompressionMethod() const const std::string & getCompressionMethod() const { return content_encoding; }
{
return content_encoding;
}
}; };
} }
@ -611,19 +626,50 @@ class UpdatableSession : public UpdatableSessionBase<HTTPSessionPtr>
using Parent = UpdatableSessionBase<HTTPSessionPtr>; using Parent = UpdatableSessionBase<HTTPSessionPtr>;
public: public:
UpdatableSession( UpdatableSession(const Poco::URI uri, const ConnectionTimeouts & timeouts_, const UInt64 max_redirects_)
const Poco::URI uri,
const ConnectionTimeouts & timeouts_,
const UInt64 max_redirects_)
: Parent(uri, timeouts_, max_redirects_) : Parent(uri, timeouts_, max_redirects_)
{ {
session = makeHTTPSession(initial_uri, timeouts); session = makeHTTPSession(initial_uri, timeouts);
} }
void buildNewSession(const Poco::URI & uri) override void buildNewSession(const Poco::URI & uri) override { session = makeHTTPSession(uri, timeouts); }
};
class RangeGenerator
{
public:
explicit RangeGenerator(size_t total_size_, size_t range_step_, size_t range_start = 0)
: from(range_start), range_step(range_step_), total_size(total_size_)
{ {
session = makeHTTPSession(uri, timeouts);
} }
size_t totalRanges() const { return static_cast<size_t>(round(static_cast<float>(total_size - from) / range_step)); }
using Range = std::pair<size_t, size_t>;
// return upper exclusive range of values, i.e. [from_range, to_range>
std::optional<Range> nextRange()
{
if (from >= total_size)
{
return std::nullopt;
}
auto to = from + range_step;
if (to >= total_size)
{
to = total_size;
}
Range range{from, to};
from = to;
return std::move(range);
}
private:
size_t from;
size_t range_step;
size_t total_size;
}; };
class ReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession>> class ReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession>>
@ -631,7 +677,7 @@ class ReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<std::
using Parent = detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession>>; using Parent = detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession>>;
public: public:
ReadWriteBufferFromHTTP( ReadWriteBufferFromHTTP(
Poco::URI uri_, Poco::URI uri_,
const std::string & method_, const std::string & method_,
OutStreamCallback out_stream_callback_, OutStreamCallback out_stream_callback_,
@ -646,14 +692,117 @@ public:
bool delay_initialization_ = true, bool delay_initialization_ = true,
bool use_external_buffer_ = false, bool use_external_buffer_ = false,
bool skip_not_found_url_ = false) bool skip_not_found_url_ = false)
: Parent(std::make_shared<UpdatableSession>(uri_, timeouts, max_redirects), : Parent(
uri_, credentials_, method_, out_stream_callback_, buffer_size_, std::make_shared<UpdatableSession>(uri_, timeouts, max_redirects),
settings_, http_header_entries_, read_range_, remote_host_filter_, uri_,
delay_initialization_, use_external_buffer_, skip_not_found_url_) credentials_,
method_,
out_stream_callback_,
buffer_size_,
settings_,
http_header_entries_,
read_range_,
remote_host_filter_,
delay_initialization_,
use_external_buffer_,
skip_not_found_url_)
{ {
} }
}; };
class RangedReadWriteBufferFromHTTPFactory : public ParallelReadBuffer::ReadBufferFactory
{
using OutStreamCallback = ReadWriteBufferFromHTTP::OutStreamCallback;
public:
RangedReadWriteBufferFromHTTPFactory(
size_t total_object_size_,
size_t range_step_,
Poco::URI uri_,
std::string method_,
OutStreamCallback out_stream_callback_,
ConnectionTimeouts timeouts_,
const Poco::Net::HTTPBasicCredentials & credentials_,
UInt64 max_redirects_ = 0,
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE,
ReadSettings settings_ = {},
ReadWriteBufferFromHTTP::HTTPHeaderEntries http_header_entries_ = {},
const RemoteHostFilter * remote_host_filter_ = nullptr,
bool delay_initialization_ = true,
bool use_external_buffer_ = false,
bool skip_not_found_url_ = false)
: range_generator(total_object_size_, range_step_)
, total_object_size(total_object_size_)
, range_step(range_step_)
, uri(uri_)
, method(std::move(method_))
, out_stream_callback(out_stream_callback_)
, timeouts(std::move(timeouts_))
, credentials(credentials_)
, max_redirects(max_redirects_)
, buffer_size(buffer_size_)
, settings(std::move(settings_))
, http_header_entries(std::move(http_header_entries_))
, remote_host_filter(remote_host_filter_)
, delay_initialization(delay_initialization_)
, use_external_buffer(use_external_buffer_)
, skip_not_found_url(skip_not_found_url_)
{
}
SeekableReadBufferPtr getReader() override
{
const auto next_range = range_generator.nextRange();
if (!next_range)
{
return nullptr;
}
return std::make_shared<ReadWriteBufferFromHTTP>(
uri,
method,
out_stream_callback,
timeouts,
credentials,
max_redirects,
buffer_size,
settings,
http_header_entries,
// HTTP Range has inclusive bounds, i.e. [from, to]
ReadWriteBufferFromHTTP::Range{next_range->first, next_range->second - 1},
remote_host_filter,
delay_initialization,
use_external_buffer,
skip_not_found_url);
}
off_t seek(off_t off, [[maybe_unused]] int whence) override
{
range_generator = RangeGenerator{total_object_size, range_step, static_cast<size_t>(off)};
return off;
}
std::optional<size_t> getTotalSize() override { return total_object_size; }
private:
RangeGenerator range_generator;
size_t total_object_size;
size_t range_step;
Poco::URI uri;
std::string method;
OutStreamCallback out_stream_callback;
ConnectionTimeouts timeouts;
const Poco::Net::HTTPBasicCredentials & credentials;
UInt64 max_redirects;
size_t buffer_size;
ReadSettings settings;
ReadWriteBufferFromHTTP::HTTPHeaderEntries http_header_entries;
const RemoteHostFilter * remote_host_filter;
bool delay_initialization;
bool use_external_buffer;
bool skip_not_found_url;
};
class UpdatablePooledSession : public UpdatableSessionBase<PooledHTTPSessionPtr> class UpdatablePooledSession : public UpdatableSessionBase<PooledHTTPSessionPtr>
{ {
using Parent = UpdatableSessionBase<PooledHTTPSessionPtr>; using Parent = UpdatableSessionBase<PooledHTTPSessionPtr>;
@ -662,20 +811,14 @@ private:
size_t per_endpoint_pool_size; size_t per_endpoint_pool_size;
public: public:
explicit UpdatablePooledSession(const Poco::URI uri, explicit UpdatablePooledSession(
const ConnectionTimeouts & timeouts_, const Poco::URI uri, const ConnectionTimeouts & timeouts_, const UInt64 max_redirects_, size_t per_endpoint_pool_size_)
const UInt64 max_redirects_, : Parent(uri, timeouts_, max_redirects_), per_endpoint_pool_size{per_endpoint_pool_size_}
size_t per_endpoint_pool_size_)
: Parent(uri, timeouts_, max_redirects_)
, per_endpoint_pool_size { per_endpoint_pool_size_ }
{ {
session = makePooledHTTPSession(initial_uri, timeouts, per_endpoint_pool_size); session = makePooledHTTPSession(initial_uri, timeouts, per_endpoint_pool_size);
} }
void buildNewSession(const Poco::URI & uri) override void buildNewSession(const Poco::URI & uri) override { session = makePooledHTTPSession(uri, timeouts, per_endpoint_pool_size); }
{
session = makePooledHTTPSession(uri, timeouts, per_endpoint_pool_size);
}
}; };
class PooledReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatablePooledSession>> class PooledReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatablePooledSession>>
@ -683,7 +826,8 @@ class PooledReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase
using Parent = detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatablePooledSession>>; using Parent = detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatablePooledSession>>;
public: public:
explicit PooledReadWriteBufferFromHTTP(Poco::URI uri_, explicit PooledReadWriteBufferFromHTTP(
Poco::URI uri_,
const std::string & method_ = {}, const std::string & method_ = {},
OutStreamCallback out_stream_callback_ = {}, OutStreamCallback out_stream_callback_ = {},
const ConnectionTimeouts & timeouts_ = {}, const ConnectionTimeouts & timeouts_ = {},
@ -691,12 +835,13 @@ public:
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE,
const UInt64 max_redirects = 0, const UInt64 max_redirects = 0,
size_t max_connections_per_endpoint = DEFAULT_COUNT_OF_HTTP_CONNECTIONS_PER_ENDPOINT) size_t max_connections_per_endpoint = DEFAULT_COUNT_OF_HTTP_CONNECTIONS_PER_ENDPOINT)
: Parent(std::make_shared<UpdatablePooledSession>(uri_, timeouts_, max_redirects, max_connections_per_endpoint), : Parent(
uri_, std::make_shared<UpdatablePooledSession>(uri_, timeouts_, max_redirects, max_connections_per_endpoint),
credentials_, uri_,
method_, credentials_,
out_stream_callback_, method_,
buffer_size_) out_stream_callback_,
buffer_size_)
{ {
} }
}; };

View File

@ -372,8 +372,8 @@ SetPtr makeExplicitSet(
element_type = low_cardinality_type->getDictionaryType(); element_type = low_cardinality_type->getDictionaryType();
auto set_key = PreparedSetKey::forLiteral(*right_arg, set_element_types); auto set_key = PreparedSetKey::forLiteral(*right_arg, set_element_types);
if (prepared_sets.count(set_key)) if (auto it = prepared_sets.find(set_key); it != prepared_sets.end())
return prepared_sets.at(set_key); /// Already prepared. return it->second; /// Already prepared.
Block block; Block block;
const auto & right_arg_func = std::dynamic_pointer_cast<ASTFunction>(right_arg); const auto & right_arg_func = std::dynamic_pointer_cast<ASTFunction>(right_arg);
@ -388,7 +388,7 @@ SetPtr makeExplicitSet(
set->insertFromBlock(block.getColumnsWithTypeAndName()); set->insertFromBlock(block.getColumnsWithTypeAndName());
set->finishInsert(); set->finishInsert();
prepared_sets[set_key] = set; prepared_sets.emplace(set_key, set);
return set; return set;
} }
@ -707,7 +707,7 @@ ASTs ActionsMatcher::doUntuple(const ASTFunction * function, ActionsMatcher::Dat
if (tid != 0) if (tid != 0)
tuple_ast = tuple_ast->clone(); tuple_ast = tuple_ast->clone();
auto literal = std::make_shared<ASTLiteral>(UInt64(++tid)); auto literal = std::make_shared<ASTLiteral>(UInt64{++tid});
visit(*literal, literal, data); visit(*literal, literal, data);
auto func = makeASTFunction("tupleElement", tuple_ast, literal); auto func = makeASTFunction("tupleElement", tuple_ast, literal);
@ -814,14 +814,13 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
if (!data.only_consts) if (!data.only_consts)
{ {
/// We are in the part of the tree that we are not going to compute. You just need to define types. /// We are in the part of the tree that we are not going to compute. You just need to define types.
/// Do not subquery and create sets. We replace "in*" function to "in*IgnoreSet". /// Do not evaluate subquery and create sets. We replace "in*" function to "in*IgnoreSet".
auto argument_name = node.arguments->children.at(0)->getColumnName(); auto argument_name = node.arguments->children.at(0)->getColumnName();
data.addFunction( data.addFunction(
FunctionFactory::instance().get(node.name + "IgnoreSet", data.getContext()), FunctionFactory::instance().get(node.name + "IgnoreSet", data.getContext()),
{ argument_name, argument_name }, {argument_name, argument_name},
column_name); column_name);
} }
return; return;
} }
@ -1145,8 +1144,8 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
if (no_subqueries) if (no_subqueries)
return {}; return {};
auto set_key = PreparedSetKey::forSubquery(*right_in_operand); auto set_key = PreparedSetKey::forSubquery(*right_in_operand);
if (data.prepared_sets.count(set_key)) if (auto it = data.prepared_sets.find(set_key); it != data.prepared_sets.end())
return data.prepared_sets.at(set_key); return it->second;
/// A special case is if the name of the table is specified on the right side of the IN statement, /// A special case is if the name of the table is specified on the right side of the IN statement,
/// and the table has the type Set (a previously prepared set). /// and the table has the type Set (a previously prepared set).
@ -1160,7 +1159,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
StorageSet * storage_set = dynamic_cast<StorageSet *>(table.get()); StorageSet * storage_set = dynamic_cast<StorageSet *>(table.get());
if (storage_set) if (storage_set)
{ {
data.prepared_sets[set_key] = storage_set->getSet(); data.prepared_sets.emplace(set_key, storage_set->getSet());
return storage_set->getSet(); return storage_set->getSet();
} }
} }
@ -1174,7 +1173,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
/// If you already created a Set with the same subquery / table. /// If you already created a Set with the same subquery / table.
if (subquery_for_set.set) if (subquery_for_set.set)
{ {
data.prepared_sets[set_key] = subquery_for_set.set; data.prepared_sets.emplace(set_key, subquery_for_set.set);
return subquery_for_set.set; return subquery_for_set.set;
} }
@ -1196,7 +1195,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
} }
subquery_for_set.set = set; subquery_for_set.set = set;
data.prepared_sets[set_key] = set; data.prepared_sets.emplace(set_key, set);
return set; return set;
} }
else else

Some files were not shown because too many files have changed in this diff Show More