From e46fdfa807f7a0cda82dbf19aa3807008a799443 Mon Sep 17 00:00:00 2001 From: proller Date: Mon, 25 Mar 2019 17:44:17 +0300 Subject: [PATCH 1/8] Upgrade boost to 1.69 --- contrib/boost | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/boost b/contrib/boost index 32abf16beb7..471ea208abb 160000 --- a/contrib/boost +++ b/contrib/boost @@ -1 +1 @@ -Subproject commit 32abf16beb7bb8b243a4d100ccdd6acb271738c4 +Subproject commit 471ea208abb92a5cba7d3a08a819bb728f27e95f From 268ebcd3da2ff5c9ab124f89d585609bf1069e98 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 25 Mar 2019 18:03:12 +0300 Subject: [PATCH 2/8] Fix in clickhouse-test --binary option can accept path --- dbms/tests/clickhouse-test | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dbms/tests/clickhouse-test b/dbms/tests/clickhouse-test index b17f408f959..242dc17de0b 100755 --- a/dbms/tests/clickhouse-test +++ b/dbms/tests/clickhouse-test @@ -404,6 +404,8 @@ def main(args): def find_binary(name): + if os.path.exists(name) and os.access(name, os.X_OK): + return True paths = os.environ.get("PATH").split(':') for path in paths: if os.access(os.path.join(path, name), os.X_OK): @@ -416,7 +418,7 @@ if __name__ == '__main__': parser=ArgumentParser(description='ClickHouse functional tests') parser.add_argument('-q', '--queries', help='Path to queries dir') parser.add_argument('--tmp', help='Path to tmp dir') - parser.add_argument('-b', '--binary', default='clickhouse', help='Main clickhouse binary') + parser.add_argument('-b', '--binary', default='clickhouse', help='Path to clickhouse binary or name of binary in PATH') parser.add_argument('-c', '--client', help='Client program') parser.add_argument('--extract_from_config', help='extract-from-config program') parser.add_argument('--configclient', help='Client config (if you use not default ports)') From 7333170090db8b11bd0b1a35d8823ee5f25dfb8a Mon Sep 17 00:00:00 2001 From: proller Date: Mon, 25 Mar 2019 20:31:45 +0300 Subject: [PATCH 3/8] Fix client detection in ctest (#4795) --- dbms/tests/clickhouse-test-server | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/dbms/tests/clickhouse-test-server b/dbms/tests/clickhouse-test-server index b324a270473..80a1db4a153 100755 --- a/dbms/tests/clickhouse-test-server +++ b/dbms/tests/clickhouse-test-server @@ -9,18 +9,18 @@ ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && cd ../.. && pwd) DATA_DIR=${DATA_DIR:=`mktemp -d /tmp/clickhouse.test..XXXXX`} DATA_DIR_PATTERN=${DATA_DIR_PATTERN:=/tmp/clickhouse} # path from config file, will be replaced to temporary LOG_DIR=${LOG_DIR:=$DATA_DIR/log} -export CLICKHOUSE_BINARY=${CLICKHOUSE_BINARY:="clickhouse"} -( [ -x "$ROOT_DIR/dbms/programs/${CLICKHOUSE_BINARY}-server" ] || [ -x "$ROOT_DIR/dbms/programs/${CLICKHOUSE_BINARY}" ] ) && BUILD_DIR=${BUILD_DIR:=$ROOT_DIR} # Build without separate build dir +export CLICKHOUSE_BINARY_NAME=${CLICKHOUSE_BINARY_NAME:="clickhouse"} +( [ -x "$ROOT_DIR/dbms/programs/${CLICKHOUSE_BINARY_NAME}-server" ] || [ -x "$ROOT_DIR/dbms/programs/${CLICKHOUSE_BINARY_NAME}" ] ) && BUILD_DIR=${BUILD_DIR:=$ROOT_DIR} # Build without separate build dir [ -d "$ROOT_DIR/build${BUILD_TYPE}" ] && BUILD_DIR=${BUILD_DIR:=$ROOT_DIR/build${BUILD_TYPE}} BUILD_DIR=${BUILD_DIR:=$ROOT_DIR} -[ -x ${CLICKHOUSE_BINARY}-server" ] && [ -x ${CLICKHOUSE_BINARY}-client" ] && BIN_DIR= # Allow run in /usr/bin -( [ -x "$BUILD_DIR/dbms/programs/${CLICKHOUSE_BINARY}" ] || [ -x "$BUILD_DIR/dbms/programs/${CLICKHOUSE_BINARY}-server" ] ) && BIN_DIR=${BIN_DIR:=$BUILD_DIR/dbms/programs/} -[ -x "$BIN_DIR/${CLICKHOUSE_BINARY}-server" ] && CLICKHOUSE_SERVER=${CLICKHOUSE_SERVER:=$BIN_DIR/${CLICKHOUSE_BINARY}-server} -[ -x "$BIN_DIR/${CLICKHOUSE_BINARY}" ] && CLICKHOUSE_SERVER=${CLICKHOUSE_SERVER:=$BIN_DIR/${CLICKHOUSE_BINARY} server} -[ -x "$BIN_DIR/${CLICKHOUSE_BINARY}-client" ] && CLICKHOUSE_CLIENT=${CLICKHOUSE_CLIENT:=$BIN_DIR/${CLICKHOUSE_BINARY}-client} -[ -x "$BIN_DIR/${CLICKHOUSE_BINARY}" ] && CLICKHOUSE_CLIENT=${CLICKHOUSE_CLIENT:=$BIN_DIR/${CLICKHOUSE_BINARY} client} -[ -x "$BIN_DIR/${CLICKHOUSE_BINARY}-extract-from-config" ] && CLICKHOUSE_EXTRACT=${CLICKHOUSE_EXTRACT:=$BIN_DIR/${CLICKHOUSE_BINARY}-extract-from-config} -[ -x "$BIN_DIR/${CLICKHOUSE_BINARY}" ] && CLICKHOUSE_EXTRACT=${CLICKHOUSE_EXTRACT:=$BIN_DIR/${CLICKHOUSE_BINARY} extract-from-config} +[ -x ${CLICKHOUSE_BINARY_NAME}-server" ] && [ -x ${CLICKHOUSE_BINARY_NAME}-client" ] && BIN_DIR= # Allow run in /usr/bin +( [ -x "$BUILD_DIR/dbms/programs/${CLICKHOUSE_BINARY_NAME}" ] || [ -x "$BUILD_DIR/dbms/programs/${CLICKHOUSE_BINARY_NAME}-server" ] ) && BIN_DIR=${BIN_DIR:=$BUILD_DIR/dbms/programs/} +[ -x "$BIN_DIR/${CLICKHOUSE_BINARY_NAME}-server" ] && CLICKHOUSE_SERVER=${CLICKHOUSE_SERVER:=$BIN_DIR/${CLICKHOUSE_BINARY_NAME}-server} +[ -x "$BIN_DIR/${CLICKHOUSE_BINARY_NAME}" ] && CLICKHOUSE_SERVER=${CLICKHOUSE_SERVER:=$BIN_DIR/${CLICKHOUSE_BINARY_NAME} server} +[ -x "$BIN_DIR/${CLICKHOUSE_BINARY_NAME}-client" ] && CLICKHOUSE_CLIENT=${CLICKHOUSE_CLIENT:=$BIN_DIR/${CLICKHOUSE_BINARY_NAME}-client} +[ -x "$BIN_DIR/${CLICKHOUSE_BINARY_NAME}" ] && CLICKHOUSE_CLIENT=${CLICKHOUSE_CLIENT:=$BIN_DIR/${CLICKHOUSE_BINARY_NAME} client} +[ -x "$BIN_DIR/${CLICKHOUSE_BINARY_NAME}-extract-from-config" ] && CLICKHOUSE_EXTRACT=${CLICKHOUSE_EXTRACT:=$BIN_DIR/${CLICKHOUSE_BINARY_NAME}-extract-from-config} +[ -x "$BIN_DIR/${CLICKHOUSE_BINARY_NAME}" ] && CLICKHOUSE_EXTRACT=${CLICKHOUSE_EXTRACT:=$BIN_DIR/${CLICKHOUSE_BINARY_NAME} extract-from-config} [ -f "$CUR_DIR/server-test.xml" ] && CONFIG_DIR=${CONFIG_DIR=$CUR_DIR}/ CONFIG_CLIENT_DIR=${CONFIG_CLIENT_DIR=$CONFIG_DIR} @@ -131,7 +131,7 @@ else TEST_DICT=${TEST_DICT=1} CLICKHOUSE_CLIENT_QUERY="${CLICKHOUSE_CLIENT} --config ${CLICKHOUSE_CONFIG_CLIENT} --port $CLICKHOUSE_PORT_TCP -m -n -q" $CLICKHOUSE_CLIENT_QUERY 'SELECT * from system.build_options; SELECT * FROM system.clusters;' - CLICKHOUSE_TEST="env ${TEST_DIR}clickhouse-test --binary ${BIN_DIR}${CLICKHOUSE_BINARY} --configclient $CLICKHOUSE_CONFIG_CLIENT --configserver $CLICKHOUSE_CONFIG --tmp $DATA_DIR/tmp --queries $QUERIES_DIR $TEST_OPT0 $TEST_OPT" + CLICKHOUSE_TEST="env ${TEST_DIR}clickhouse-test --binary ${BIN_DIR}${CLICKHOUSE_BINARY_NAME} --configclient $CLICKHOUSE_CONFIG_CLIENT --configserver $CLICKHOUSE_CONFIG --tmp $DATA_DIR/tmp --queries $QUERIES_DIR $TEST_OPT0 $TEST_OPT" CLICKHOUSE_PERFORMANCE_TEST="${BIN_DIR}clickhouse-performance-test --port $CLICKHOUSE_PORT_TCP --recursive $CUR_DIR/performance --skip-tags=long" if [ "${TEST_RUN_STRESS}" ]; then # Running test in parallel will fail some results (tests can create/fill/drop same tables) From 1584fd436734ca7a4660f7942889fc80f8c70371 Mon Sep 17 00:00:00 2001 From: proller Date: Tue, 26 Mar 2019 00:31:00 +0300 Subject: [PATCH 4/8] Fix hyperscan library find and use (#4798) --- cmake/find_hyperscan.cmake | 28 ++++++++++++++++++- contrib/CMakeLists.txt | 2 +- dbms/src/Common/config.h.in | 1 + dbms/src/Functions/FunctionsStringSearch.cpp | 18 ++++++------ dbms/src/Functions/Regexps.h | 14 ++++++---- ...StorageSystemBuildOptions.generated.cpp.in | 1 + utils/build/build_debian_unbundled.sh | 2 +- 7 files changed, 50 insertions(+), 16 deletions(-) diff --git a/cmake/find_hyperscan.cmake b/cmake/find_hyperscan.cmake index 826ee555d53..a3e0b6bc9bc 100644 --- a/cmake/find_hyperscan.cmake +++ b/cmake/find_hyperscan.cmake @@ -1,7 +1,33 @@ if (HAVE_SSSE3) + option (ENABLE_HYPERSCAN "Enable hyperscan" ON) +endif () + +if (ENABLE_HYPERSCAN) + +option (USE_INTERNAL_HYPERSCAN_LIBRARY "Set to FALSE to use system hyperscan instead of the bundled" ${NOT_UNBUNDLED}) + +if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/hyperscan/CMakeLists.txt") + if (USE_INTERNAL_HYPERSCAN_LIBRARY) + message (WARNING "submodule contrib/hyperscan is missing. to fix try run: \n git submodule update --init --recursive") + endif () + set (MISSING_INTERNAL_HYPERSCAN_LIBRARY 1) + set (USE_INTERNAL_HYPERSCAN_LIBRARY 0) +endif () + +if (NOT USE_INTERNAL_HYPERSCAN_LIBRARY) + find_library (HYPERSCAN_LIBRARY hs) + find_path (HYPERSCAN_INCLUDE_DIR NAMES hs/hs.h hs.h PATHS ${HYPERSCAN_INCLUDE_PATHS}) +endif () + +if (HYPERSCAN_LIBRARY AND HYPERSCAN_INCLUDE_DIR) + set (USE_HYPERSCAN 1) +elseif (NOT MISSING_INTERNAL_HYPERSCAN_LIBRARY) set (HYPERSCAN_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/hyperscan/src) set (HYPERSCAN_LIBRARY hs) set (USE_HYPERSCAN 1) set (USE_INTERNAL_HYPERSCAN_LIBRARY 1) - message (STATUS "Using hyperscan: ${HYPERSCAN_INCLUDE_DIR} " : ${HYPERSCAN_LIBRARY}) endif() + +message (STATUS "Using hyperscan=${USE_HYPERSCAN}: ${HYPERSCAN_INCLUDE_DIR} : ${HYPERSCAN_LIBRARY}") + +endif () diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index dee9b3765de..03daaf8907b 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -305,6 +305,6 @@ if (USE_BASE64) add_subdirectory (base64-cmake) endif() -if (USE_HYPERSCAN) +if (USE_INTERNAL_HYPERSCAN_LIBRARY) add_subdirectory (hyperscan) endif() diff --git a/dbms/src/Common/config.h.in b/dbms/src/Common/config.h.in index 0b31466d522..c323afe369e 100644 --- a/dbms/src/Common/config.h.in +++ b/dbms/src/Common/config.h.in @@ -24,6 +24,7 @@ #cmakedefine01 USE_CPUINFO #cmakedefine01 USE_BROTLI #cmakedefine01 USE_SSL +#cmakedefine01 USE_HYPERSCAN #cmakedefine01 CLICKHOUSE_SPLIT_BINARY #cmakedefine01 LLVM_HAS_RTTI diff --git a/dbms/src/Functions/FunctionsStringSearch.cpp b/dbms/src/Functions/FunctionsStringSearch.cpp index 0af2d3d7007..7c22afc9020 100644 --- a/dbms/src/Functions/FunctionsStringSearch.cpp +++ b/dbms/src/Functions/FunctionsStringSearch.cpp @@ -1,8 +1,6 @@ -#include +#include "FunctionsStringSearch.h" #include -#include - #include #include #include @@ -11,12 +9,16 @@ #include #include #include - #include #include -#ifdef __SSSE3__ -# include +#include +#if USE_HYPERSCAN +# if __has_include() +# include +# else +# include +# endif #endif #if USE_RE2_ST @@ -617,7 +619,7 @@ struct MultiMatchAnyImpl { (void)FindAny; (void)FindAnyIndex; -#ifdef __SSSE3__ +#if USE_HYPERSCAN using ScratchPtr = std::unique_ptr>; const auto & hyperscan_regex = MultiRegexps::get(needles); @@ -670,7 +672,7 @@ struct MultiMatchAnyImpl res[i] = j + 1; } } -#endif // __SSSE3__ +#endif // USE_HYPERSCAN } }; diff --git a/dbms/src/Functions/Regexps.h b/dbms/src/Functions/Regexps.h index f5ad738425c..f6a37f94ddc 100644 --- a/dbms/src/Functions/Regexps.h +++ b/dbms/src/Functions/Regexps.h @@ -5,13 +5,17 @@ #include #include #include - #include #include #include -#ifdef __SSSE3__ -# include +#include +#if USE_HYPERSCAN +# if __has_include() +# include +# else +# include +# endif #endif namespace ProfileEvents @@ -63,7 +67,7 @@ namespace Regexps } } -#ifdef __SSSE3__ +#if USE_HYPERSCAN namespace MultiRegexps { @@ -139,6 +143,6 @@ namespace MultiRegexps } } -#endif // __SSSE3__ +#endif // USE_HYPERSCAN } diff --git a/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in b/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in index e2e4b397d0b..97358ac02c9 100644 --- a/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in +++ b/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in @@ -56,6 +56,7 @@ const char * auto_config_build[] "USE_PROTOBUF", "@USE_PROTOBUF@", "USE_BROTLI", "@USE_BROTLI@", "USE_SSL", "@USE_SSL@", + "USE_HYPERSCAN", "@USE_HYPERSCAN@", nullptr, nullptr }; diff --git a/utils/build/build_debian_unbundled.sh b/utils/build/build_debian_unbundled.sh index 0d9ae74f169..41c951c4bae 100755 --- a/utils/build/build_debian_unbundled.sh +++ b/utils/build/build_debian_unbundled.sh @@ -22,5 +22,5 @@ env TEST_RUN=1 \ `# Use all possible contrib libs from system` \ `# psmisc - killall` \ `# gdb - symbol test in pbuilder` \ - EXTRAPACKAGES="psmisc libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev libboost-regex-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev unixodbc-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libunwind-dev googletest libcctz-dev libcapnp-dev libjemalloc-dev libssl-dev libunwind-dev libgsasl7-dev libxml2-dev libbrotli-dev $EXTRAPACKAGES" \ + EXTRAPACKAGES="psmisc libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev libboost-regex-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev unixodbc-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libunwind-dev googletest libcctz-dev libcapnp-dev libjemalloc-dev libssl-dev libunwind-dev libgsasl7-dev libxml2-dev libbrotli-dev libhyperscan-dev $EXTRAPACKAGES" \ pdebuild --configfile $ROOT_DIR/debian/.pbuilderrc $PDEBUILD_OPT From 333b56ccfa4282b33f532ab40d014f7890ed001c Mon Sep 17 00:00:00 2001 From: BayoNet Date: Tue, 26 Mar 2019 11:05:41 +0300 Subject: [PATCH 5/8] DOCAPI-4177: HTTP compression settings a described. (#4801) --- docs/en/interfaces/http.md | 23 ++++++++++++---- docs/en/operations/settings/settings.md | 35 +++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 5 deletions(-) diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index 8dece39249f..a72a2e4a06e 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -128,16 +128,29 @@ echo 'DROP TABLE t' | curl 'http://localhost:8123/' --data-binary @- For successful requests that don't return a data table, an empty response body is returned. -You can use the internal ClickHouse compression format when transmitting data. The compressed data has a non-standard format, and you will need to use the special clickhouse-compressor program to work with it (it is installed with the clickhouse-client package). +You can use the internal ClickHouse compression format when transmitting data. The compressed data has a non-standard format, and you will need to use the special `clickhouse-compressor` program to work with it (it is installed with the `clickhouse-client` package). To increase the efficiency of the data insertion, you may disable the server-side checksum verification with the [http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress) setting. -If you specified 'compress=1' in the URL, the server will compress the data it sends you. -If you specified 'decompress=1' in the URL, the server will decompress the same data that you pass in the POST method. +If you specified `compress = 1` in the URL, the server compresses the data it sends you. +If you specified `decompress = 1` in the URL, the server decompresses the same data that you pass in the `POST` method. -It is also possible to use the standard gzip-based HTTP compression. To send a POST request compressed using gzip, append the request header `Content-Encoding: gzip`. -In order for ClickHouse to compress the response using gzip, you must append `Accept-Encoding: gzip` to the request headers, and enable the ClickHouse setting `enable_http_compression`. +It is also possible to use the standard `gzip`-based [HTTP compression](https://en.wikipedia.org/wiki/HTTP_compression). To send a `POST` request compressed using `gzip`, append the request header `Content-Encoding: gzip`. +In order for ClickHouse to compress the response using `gzip`, you must append `Accept-Encoding: gzip` to the request headers, and enable the ClickHouse [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) setting. You can configure the compression level of the data with the [http_zlib_compression_level](#settings-http_zlib_compression_level) setting. You can use this to reduce network traffic when transmitting a large amount of data, or for creating dumps that are immediately compressed. +Examples of sending the data with compression: + +```bash +#Sending the data to the server: +curl -vsS "http://localhost:8123/?enable_http_compression=1" -d 'SELECT number FROM system.numbers LIMIT 10' -H 'Accept-Encoding: gzip' + +#Sending the data to the client: +echo "SELECT 1" | gzip -c | curl -sS --data-binary @- -H 'Content-Encoding: gzip' 'http://localhost:8123/' +``` + +!!! note "Note" + Some HTTP clients can decompress data (`gzip` and `deflate`) from the server by default and you may get the decompressed data even if you use the compression settings correctly. + You can use the 'database' URL parameter to specify the default database. ```bash diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 3e9ecef9d17..00c5d476771 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -79,6 +79,41 @@ Enable or disable fsync when writing .sql files. Enabled by default. It makes sense to disable it if the server has millions of tiny table chunks that are constantly being created and destroyed. +## enable_http_compression {#settings-enable_http_compression} + +Enables/disables compression of the data in the response to an HTTP request. + +For more information, read the [HTTP interface description](../../interfaces/http.md). + +Possible values: + +- 0 — The functionality is disabled. +- 1 — The functionality is enabled. + +Default value: 0. + +## http_zlib_compression_level {#settings-http_zlib_compression_level} + +Sets the level of the compression of the data in the response to an HTTP request if [enable_http_compression = 1](#settings-enable_http_compression). + +Possible values: numbers from 1 to 9. + +Default value: 3. + + +## http_native_compression_disable_checksumming_on_decompress {#settings-http_native_compression_disable_checksumming_on_decompress} + +Enables/disables the verification of the checksum when uncompressing the HTTP POST data from the client. Used only for ClickHouse native format of compression (neither `gzip` nor `deflate`). + +For more information, read the [HTTP interface description](../../interfaces/http.md). + +Possible values: + +- 0 — The functionality is disabled. +- 1 — The functionality is enabled. + +Default value: 0. + ## input_format_allow_errors_num Sets the maximum number of acceptable errors when reading from text formats (CSV, TSV, etc.). From 9cdbdf7cf511a9b09295b17a7bcc6d11b67a9f5b Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 26 Mar 2019 12:50:28 +0300 Subject: [PATCH 6/8] CLICKHOUSE-3006: get rid of nodejs during website build (#4803) * CLICKHOUSE-3006: get rid of nodejs during website build * webassets is not actually used --- docs/tools/README.md | 2 +- docs/tools/build.py | 18 +++- {website => docs/tools}/release.sh | 10 +- docs/tools/requirements.txt | 5 +- docs/tools/website.py | 45 +++++++++ website/Dockerfile | 2 +- website/README.md | 15 +-- website/gulpfile.js | 154 ----------------------------- website/setup_gulp.sh | 3 - 9 files changed, 72 insertions(+), 182 deletions(-) rename {website => docs/tools}/release.sh (87%) create mode 100644 docs/tools/website.py delete mode 100644 website/gulpfile.js delete mode 100755 website/setup_gulp.sh diff --git a/docs/tools/README.md b/docs/tools/README.md index 9b4dd26dd14..6332a22f164 100644 --- a/docs/tools/README.md +++ b/docs/tools/README.md @@ -2,7 +2,7 @@ ClickHouse documentation is built using [build.py](build.py) script that uses [mkdocs](https://www.mkdocs.org) library and it's dependencies to separately build all version of documentations (all languages in either single and multi page mode) as static HTMLs. The results are then put in correct directory structure. It can also generate PDF version. -Finally [the infrustructure](../website) that builds ClickHouse [official website](https://clickhouse.yandex) just puts that directory structure into the same Docker container together with rest of website and deploys it to Yandex private cloud. +[release.sh](release.sh) also pulls static files needed for [official ClickHouse website](https://clickhouse.yandex) from [../../website](../../website) folder, packs them alongside docs into Docker container and tries to deploy it (possible only from Yandex private network). ## How to check if the documentation will look fine? diff --git a/docs/tools/build.py b/docs/tools/build.py index e7e1c777b42..ff89b437ffc 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -21,6 +21,7 @@ from mkdocs import exceptions from mkdocs.commands import build as mkdocs_build from concatenate import concatenate +from website import build_website, minify_website import mdx_clickhouse import test @@ -96,7 +97,7 @@ def build_for_lang(lang, args): site_name=site_names.get(lang, site_names['en']), site_url='https://clickhouse.yandex/docs/%s/' % lang, docs_dir=os.path.join(args.docs_dir, lang), - site_dir=os.path.join(args.output_dir, lang), + site_dir=os.path.join(args.docs_output_dir, lang), strict=True, theme=theme_cfg, copyright='©2016–2019 Yandex LLC', @@ -168,7 +169,7 @@ def build_single_page_version(lang, args, cfg): mkdocs_build.build(cfg) - single_page_output_path = os.path.join(args.docs_dir, args.output_dir, lang, 'single') + single_page_output_path = os.path.join(args.docs_dir, args.docs_output_dir, lang, 'single') if os.path.exists(single_page_output_path): shutil.rmtree(single_page_output_path) @@ -212,29 +213,40 @@ def build_redirects(args): to_path = '/docs/$1/' + to_path.replace('.md', '/') rewrites.append(' '.join(['rewrite', from_path, to_path, 'permanent;'])) - with open(os.path.join(args.output_dir, 'redirects.conf'), 'w') as f: + with open(os.path.join(args.docs_output_dir, 'redirects.conf'), 'w') as f: f.write('\n'.join(rewrites)) def build(args): + if os.path.exists(args.output_dir): + shutil.rmtree(args.output_dir) + + if not args.skip_website: + build_website(args) + for lang in args.lang.split(','): build_for_lang(lang, args) build_redirects(args) + if not args.skip_website: + minify_website(args) if __name__ == '__main__': arg_parser = argparse.ArgumentParser() arg_parser.add_argument('--lang', default='en,ru,zh,fa') arg_parser.add_argument('--docs-dir', default='.') arg_parser.add_argument('--theme-dir', default='mkdocs-material-theme') + arg_parser.add_argument('--website-dir', default=os.path.join('..', 'website')) arg_parser.add_argument('--output-dir', default='build') arg_parser.add_argument('--skip-single-page', action='store_true') arg_parser.add_argument('--skip-pdf', action='store_true') + arg_parser.add_argument('--skip-website', action='store_true') arg_parser.add_argument('--save-raw-single-page', type=str) arg_parser.add_argument('--verbose', action='store_true') args = arg_parser.parse_args() + args.docs_output_dir = os.path.join(args.output_dir, 'docs') os.chdir(os.path.join(os.path.dirname(__file__), '..')) logging.basicConfig( diff --git a/website/release.sh b/docs/tools/release.sh similarity index 87% rename from website/release.sh rename to docs/tools/release.sh index 83e25563a57..e671dd8cea0 100755 --- a/website/release.sh +++ b/docs/tools/release.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -ex BASE_DIR=$(dirname $(readlink -f $0)) -cd "${BASE_DIR}" +BUILD_DIR="${BASE_DIR}/../build" IMAGE="clickhouse/website" if [[ -z "$1" ]] then @@ -12,12 +12,12 @@ fi FULL_NAME="${IMAGE}:${TAG}" REMOTE_NAME="registry.yandex.net/${FULL_NAME}" DOCKER_HASH="$2" -GULP="$BASE_DIR/node_modules/gulp/bin/gulp.js" if [[ -z "$1" ]] then - $GULP clean - $GULP build - docker build -t "${FULL_NAME}" "${BASE_DIR}" + source "${BASE_DIR}/venv/bin/activate" + python "${BASE_DIR}/build.py" + cd "${BUILD_DIR}" + docker build -t "${FULL_NAME}" "${BUILD_DIR}" docker tag "${FULL_NAME}" "${REMOTE_NAME}" DOCKER_HASH=$(docker push "${REMOTE_NAME}" | tail -1 | awk '{print $3;}') docker rmi "${FULL_NAME}" diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index 85cd355dbdc..f0df3b8ff36 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -6,11 +6,14 @@ certifi==2017.11.5 chardet==3.0.4 click==6.7 CommonMark==0.5.4 +cssmin==0.2.0 docutils==0.14 futures==3.2.0 +htmlmin==0.1.12 idna==2.6 imagesize==0.7.1 Jinja2==2.10 +jsmin==2.2.2 livereload==2.5.1 Markdown==2.6.11 MarkupSafe==1.0 @@ -18,7 +21,7 @@ mkdocs==1.0.4 Pygments==2.2.0 python-slugify==1.2.6 pytz==2017.3 -PyYAML==4.2b1 +PyYAML==3.12 recommonmark==0.4.0 requests==2.21.0 singledispatch==3.4.0.3 diff --git a/docs/tools/website.py b/docs/tools/website.py new file mode 100644 index 00000000000..0605058a6c6 --- /dev/null +++ b/docs/tools/website.py @@ -0,0 +1,45 @@ +import logging +import os +import shutil + +import cssmin +import htmlmin +import jsmin + +def build_website(args): + logging.info('Building website') + shutil.copytree( + args.website_dir, + args.output_dir, + ignore=shutil.ignore_patterns( + '*.md', + '*.sh', + 'build', + 'docs', + 'public', + 'node_modules' + ) + ) + +def minify_website(args): + for root, _, filenames in os.walk(args.output_dir): + for filename in filenames: + path = os.path.join(root, filename) + if not ( + filename.endswith('.html') or + filename.endswith('.css') or + filename.endswith('.js') + ): + continue + + logging.info('Minifying %s', path) + with open(path, 'r') as f: + content = f.read().decode('utf-8') + if filename.endswith('.html'): + content = htmlmin.minify(content, remove_empty_space=False) + elif filename.endswith('.css'): + content = cssmin.cssmin(content) + elif filename.endswith('.js'): + content = jsmin.jsmin(content) + with open(path, 'w') as f: + f.write(content.encode('utf-8')) diff --git a/website/Dockerfile b/website/Dockerfile index b66e0c8da34..64eb0ce5e33 100644 --- a/website/Dockerfile +++ b/website/Dockerfile @@ -1,4 +1,4 @@ FROM nginx:mainline -COPY public /usr/share/nginx/html +COPY . /usr/share/nginx/html COPY nginx/nginx.conf /etc/nginx/nginx.conf COPY nginx/default.conf /etc/nginx/conf.d/default.conf diff --git a/website/README.md b/website/README.md index d6abca119c2..26bb1dceab5 100644 --- a/website/README.md +++ b/website/README.md @@ -1,15 +1,2 @@ -ClickHouse website quickstart: +ClickHouse website is built alongside it's documentation via [docs/tools](https://github.com/yandex/ClickHouse/tree/master/docs/tools), see [README.md there](https://github.com/yandex/ClickHouse/tree/master/docs/tools/README.md). -On Linux, do the following: -``` -sudo apt-get install nodejs -sudo ln -s /usr/bin/nodejs /usr/bin/node -sudo npm install gulp-cli -g -sudo npm install gulp -D -``` - -1. Make sure you have `npm`, `docker` and `python` installed and available in your `$PATH`. -2. Run `setup\_gulp.sh` once to install build prerequisites via npm. -3. Use `gulp build` to minify website to "public" subfolder or just `gulp` to run local webserver with livereload serving it (note: livereload browser extension is required to make it actually reload pages on edits automatically). -4. There's Dockerfile that can be used to build and run ClickHouse website inside docker. -5. Deployment to https://clickhouse.yandex/ is managed by `release.sh`, but it is only usable from inside Yandex private network. diff --git a/website/gulpfile.js b/website/gulpfile.js deleted file mode 100644 index ca254bf681f..00000000000 --- a/website/gulpfile.js +++ /dev/null @@ -1,154 +0,0 @@ -var gulp = require('gulp'); -var concat = require('gulp-concat'); -var uglify = require('gulp-uglify'); -var cleanCss = require('gulp-clean-css'); -var imagemin = require('gulp-imagemin'); -var sourcemaps = require('gulp-sourcemaps'); -var htmlmin = require('gulp-htmlmin'); -var minifyInline = require('gulp-minify-inline'); -var del = require('del'); -var connect = require('gulp-connect'); -var run = require('gulp-run'); - -var outputDir = 'public'; -var docsDir = '../docs'; - -var paths = { - htmls: [ - '**/*.html', - '!deprecated/reference_ru.html', - '!deprecated/reference_en.html', - '!node_modules/**/*.html', - '!presentations/**/*.html', - '!public/**/*.html'], - reference: ['deprecated/reference_ru.html', 'deprecated/reference_en.html'], - docs: [docsDir + '/build/**/*'], - docstxt: ['docs/**/*.txt', 'docs/redirects.conf'], - docsjson: ['docs/**/*.json'], - docsxml: ['docs/**/*.xml'], - docspdf: ['docs/**/*.pdf'], - docssitemap: ['sitemap.xml', 'sitemap_static.xml'], - scripts: [ - '**/*.js', - '!gulpfile.js', - '!node_modules/**/*.js', - '!presentations/**/*.js', - '!public/**/*.js'], - styles: [ - '**/*.css', - '!node_modules/**/*.css', - '!presentations/**/*.css', - '!public/**/*.css'], - images: [ - '**/*.{jpg,jpeg,png,gif,svg,ico}', - '!node_modules/**/*.{jpg,jpeg,png,gif,svg,ico}', - '!presentations/**/*.{jpg,jpeg,png,gif,svg,ico}', - '!public/**/*.{jpg,jpeg,png,gif,svg,ico}'], - robotstxt: ['robots.txt'], - presentations: ['presentations/**/*'] -}; - -gulp.task('clean', function () { - return del([outputDir + '/**']); -}); - -gulp.task('reference', [], function () { - return gulp.src(paths.reference) - .pipe(minifyInline()) - .pipe(gulp.dest(outputDir + '/deprecated')) -}); - -gulp.task('docs', [], function () { - run('cd ' + docsDir + '/tools; ./build.py'); - return gulp.src(paths.docs) - .pipe(gulp.dest(outputDir + '/../docs')) -}); - -gulp.task('docstxt', ['docs'], function () { - return gulp.src(paths.docstxt) - .pipe(gulp.dest(outputDir + '/docs')) -}); - -gulp.task('docsjson', ['docs'], function () { - return gulp.src(paths.docsjson) - .pipe(gulp.dest(outputDir + '/docs')) -}); - -gulp.task('docsxml', ['docs'], function () { - return gulp.src(paths.docsxml) - .pipe(gulp.dest(outputDir + '/docs')) -}); - -gulp.task('docspdf', ['docs'], function () { - return gulp.src(paths.docspdf) - .pipe(gulp.dest(outputDir + '/docs')) -}); - -gulp.task('docssitemap', [], function () { - return gulp.src(paths.docssitemap) - .pipe(gulp.dest(outputDir + '/docs')) -}); - -gulp.task('presentations', [], function () { - return gulp.src(paths.presentations) - .pipe(gulp.dest(outputDir + '/presentations')) -}); - -gulp.task('robotstxt', [], function () { - return gulp.src(paths.robotstxt) - .pipe(gulp.dest(outputDir)) -}); - -gulp.task('htmls', ['docs', 'docstxt', 'docsjson', 'docsxml', 'docspdf', 'docssitemap'], function () { - return gulp.src(paths.htmls) - .pipe(htmlmin({collapseWhitespace: true})) - .pipe(minifyInline()) - .pipe(gulp.dest(outputDir)) -}); - -gulp.task('sourcemaps', ['docs'], function () { - return gulp.src(paths.scripts) - .pipe(sourcemaps.init()) - .pipe(uglify()) - .pipe(sourcemaps.write()) - .pipe(gulp.dest(outputDir)) -}); - -gulp.task('scripts', ['docs'], function () { - return gulp.src(paths.scripts) - .pipe(uglify()) - .pipe(gulp.dest(outputDir)) -}); - -gulp.task('styles', ['docs'], function () { - return gulp.src(paths.styles) - .pipe(cleanCss()) - .pipe(gulp.dest(outputDir)) -}); - -gulp.task('images', ['docs'], function () { - return gulp.src(paths.images) - .pipe(imagemin({optimizationLevel: 9})) - .pipe(gulp.dest(outputDir)) -}); - -gulp.task('watch', function () { - gulp.watch(paths.htmls, ['htmls']); - gulp.watch(paths.docs, ['docs']); - gulp.watch(paths.reference, ['reference']); - gulp.watch(paths.scripts, ['scripts']); - gulp.watch(paths.images, ['images']); -}); - -gulp.task('connect', function() { - connect.server({ - root: outputDir, - port: 8080, - keepalive: true, - livereload: true - }) -}); - -gulp.task('build', ['htmls', 'robotstxt', 'reference', 'scripts', 'styles', 'images', 'presentations']); - -gulp.task('default', ['build', 'connect']); diff --git a/website/setup_gulp.sh b/website/setup_gulp.sh deleted file mode 100755 index 06398ccc3e4..00000000000 --- a/website/setup_gulp.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/bin/env bash -set -ex -grep require gulpfile.js | awk -F\' '{print $2;}' | xargs npm install From efe35835af60eb159786add93f816f90873cff1b Mon Sep 17 00:00:00 2001 From: ogorbacheva Date: Tue, 26 Mar 2019 15:41:11 +0300 Subject: [PATCH 7/8] fix issue (#4805) --- docs/en/operations/table_engines/mergetree.md | 4 +--- docs/ru/operations/table_engines/mergetree.md | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/docs/en/operations/table_engines/mergetree.md b/docs/en/operations/table_engines/mergetree.md index 9846e9fd8e4..ee08913dcd1 100644 --- a/docs/en/operations/table_engines/mergetree.md +++ b/docs/en/operations/table_engines/mergetree.md @@ -191,9 +191,7 @@ added dimensions. In this case it makes sense to leave only a few columns in the primary key that will provide efficient range scans and add the remaining dimension columns to the sorting key tuple. -[ALTER of the sorting key](../../query_language/alter.md) is a -lightweight operation because when a new column is simultaneously added to the table and to the sorting key -data parts need not be changed (they remain sorted by the new sorting key expression). +[ALTER of the sorting key](../../query_language/alter.md) is a lightweight operation because when a new column is simultaneously added to the table and to the sorting key, existing data parts don't need to be changed. Since the old sorting key is a prefix of the new sorting key and there is no data in the just added column, the data at the moment of table modification is sorted by both the old and the new sorting key. ### Use of Indexes and Partitions in Queries diff --git a/docs/ru/operations/table_engines/mergetree.md b/docs/ru/operations/table_engines/mergetree.md index 9182d23a4c8..aeb29c270e1 100644 --- a/docs/ru/operations/table_engines/mergetree.md +++ b/docs/ru/operations/table_engines/mergetree.md @@ -189,7 +189,7 @@ ClickHouse не требует уникального первичного кл В этом сценарии имеет смысл оставить в первичном ключе всего несколько столбцов, которые обеспечат эффективную фильтрацию по индексу, а остальные столбцы-измерения добавить в выражение ключа сортировки. -[ALTER ключа сортировки](../../query_language/alter.md) — легкая операция, так как при одновременном добавлении нового столбца в таблицу и ключ сортировки не нужно изменять +[ALTER ключа сортировки](../../query_language/alter.md) — легкая операция, так как при одновременном добавлении нового столбца в таблицу и в ключ сортировки, не нужно изменять данные кусков (они остаются упорядоченными и по новому выражению ключа). ### Использование индексов и партиций в запросах From 62dde447ab306fea9d8a6f661e2a86e5f41e2562 Mon Sep 17 00:00:00 2001 From: Danila Kutenin Date: Tue, 26 Mar 2019 14:09:04 +0300 Subject: [PATCH 8/8] Better type system in Volnitsky --- dbms/src/Common/Volnitsky.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/src/Common/Volnitsky.h b/dbms/src/Common/Volnitsky.h index d8fc42245bf..bce37e655cd 100644 --- a/dbms/src/Common/Volnitsky.h +++ b/dbms/src/Common/Volnitsky.h @@ -516,7 +516,7 @@ public: template void searchFirstPosition(const ColumnString::Chars & haystack_data, const ColumnString::Offsets & haystack_offsets, const CountCharsCallback & count_chars_callback, ResultType & ans) { - auto callback = [this, &count_chars_callback](const UInt8 * haystack, const UInt8 * haystack_end) -> size_t + auto callback = [this, &count_chars_callback](const UInt8 * haystack, const UInt8 * haystack_end) -> UInt64 { return this->searchOneFirstPosition(haystack, haystack_end, count_chars_callback); }; @@ -676,11 +676,11 @@ private: } template - inline size_t searchOneFirstPosition(const UInt8 * haystack, const UInt8 * haystack_end, const CountCharsCallback & callback) const + inline UInt64 searchOneFirstPosition(const UInt8 * haystack, const UInt8 * haystack_end, const CountCharsCallback & callback) const { const size_t fallback_size = fallback_needles.size(); - size_t ans = std::numeric_limits::max(); + UInt64 ans = std::numeric_limits::max(); for (size_t i = 0; i < fallback_size; ++i) if (auto pos = fallback_searchers[fallback_needles[i]].search(haystack, haystack_end); pos != haystack_end) @@ -705,7 +705,7 @@ private: } } } - if (ans == std::numeric_limits::max()) + if (ans == std::numeric_limits::max()) return 0; return ans; }