From 72bcbc76b1e116aa9e0af9a78618ca827c19f0fe Mon Sep 17 00:00:00 2001 From: hcz Date: Wed, 13 Mar 2019 15:22:57 +0800 Subject: [PATCH 001/102] Add aggregate function leastSqr --- .../AggregateFunctionLeastSqr.cpp | 112 +++++++++++ .../AggregateFunctionLeastSqr.h | 176 ++++++++++++++++++ .../registerAggregateFunctions.cpp | 2 + .../0_stateless/00917_least_sqr.reference | 7 + .../queries/0_stateless/00917_least_sqr.sql | 7 + 5 files changed, 304 insertions(+) create mode 100644 dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.cpp create mode 100644 dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.h create mode 100644 dbms/tests/queries/0_stateless/00917_least_sqr.reference create mode 100644 dbms/tests/queries/0_stateless/00917_least_sqr.sql diff --git a/dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.cpp b/dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.cpp new file mode 100644 index 00000000000..1cb213b6360 --- /dev/null +++ b/dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.cpp @@ -0,0 +1,112 @@ +#include + +#include +#include + + +namespace DB +{ + +namespace +{ + +AggregateFunctionPtr createAggregateFunctionLeastSqr( + const String & name, + const DataTypes & arguments, + const Array & params +) +{ + assertNoParameters(name, params); + assertBinary(name, arguments); + + const IDataType * x_arg = arguments.front().get(); + + WhichDataType which_x { + x_arg + }; + + if ( + !which_x.isNativeUInt() + && !which_x.isNativeInt() + && !which_x.isFloat() + ) + throw Exception { + "Illegal type " + x_arg->getName() + + " of first argument of aggregate function " + + name + ", must be Native Int, Native UInt or Float", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT + }; + + const IDataType * y_arg = arguments.back().get(); + + WhichDataType which_y { + y_arg + }; + + if ( + !which_y.isNativeUInt() + && !which_y.isNativeInt() + && !which_y.isFloat() + ) + throw Exception { + "Illegal type " + y_arg->getName() + + " of second argument of aggregate function " + + name + ", must be Native Int, Native UInt or Float", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT + }; + + if (which_x.isNativeUInt() && which_y.isNativeUInt()) + return std::make_shared>( + arguments, + params + ); + else if (which_x.isNativeUInt() && which_y.isNativeInt()) + return std::make_shared>( + arguments, + params + ); + else if (which_x.isNativeUInt() && which_y.isFloat()) + return std::make_shared>( + arguments, + params + ); + else if (which_x.isNativeInt() && which_y.isNativeUInt()) + return std::make_shared>( + arguments, + params + ); + else if (which_x.isNativeInt() && which_y.isNativeInt()) + return std::make_shared>( + arguments, + params + ); + else if (which_x.isNativeInt() && which_y.isFloat()) + return std::make_shared>( + arguments, + params + ); + else if (which_x.isFloat() && which_y.isNativeUInt()) + return std::make_shared>( + arguments, + params + ); + else if (which_x.isFloat() && which_y.isNativeInt()) + return std::make_shared>( + arguments, + params + ); + else // if (which_x.isFloat() && which_y.isFloat()) + return std::make_shared>( + arguments, + params + ); +} + +} + +void registerAggregateFunctionLeastSqr(AggregateFunctionFactory & factory) +{ + factory.registerFunction("leastSqr", createAggregateFunctionLeastSqr); +} + +} diff --git a/dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.h b/dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.h new file mode 100644 index 00000000000..c527e34588d --- /dev/null +++ b/dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.h @@ -0,0 +1,176 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +template +struct AggregateFunctionLeastSqrData +{ + size_t count = 0; + Ret sum_x = 0; + Ret sum_y = 0; + Ret sum_xx = 0; + Ret sum_xy = 0; + + void add(X x, Y y) + { + count += 1; + sum_x += x; + sum_y += y; + sum_xx += x * x; + sum_xy += x * y; + } + + void merge(const AggregateFunctionLeastSqrData & other) + { + count += other.count; + sum_x += other.sum_x; + sum_y += other.sum_y; + sum_xx += other.sum_xx; + sum_xy += other.sum_xy; + } + + void serialize(WriteBuffer & buf) const + { + writeBinary(count, buf); + writeBinary(sum_x, buf); + writeBinary(sum_y, buf); + writeBinary(sum_xx, buf); + writeBinary(sum_xy, buf); + } + + void deserialize(ReadBuffer & buf) + { + readBinary(count, buf); + readBinary(sum_x, buf); + readBinary(sum_y, buf); + readBinary(sum_xx, buf); + readBinary(sum_xy, buf); + } + + Ret getK() const + { + return (sum_xy * count - sum_x * sum_y) + / (sum_xx * count - sum_x * sum_x); + } + + Ret getB(Ret k) const + { + return (sum_y - k * sum_x) / count; + } +}; + +template +class AggregateFunctionLeastSqr final : public IAggregateFunctionDataHelper< + AggregateFunctionLeastSqrData, + AggregateFunctionLeastSqr +> +{ +public: + AggregateFunctionLeastSqr( + const DataTypes & arguments, + const Array & params + ): + IAggregateFunctionDataHelper< + AggregateFunctionLeastSqrData, + AggregateFunctionLeastSqr + > {arguments, params} + { + // notice: arguments has been checked before + } + + void add( + AggregateDataPtr place, + const IColumn ** columns, + size_t row_num, + Arena * + ) const override + { + X x = (*columns[0])[row_num].template get(); + Y y = (*columns[1])[row_num].template get(); + + this->data(place).add(x, y); + } + + void merge( + AggregateDataPtr place, + ConstAggregateDataPtr rhs, Arena * + ) const override + { + this->data(place).merge(this->data(rhs)); + } + + void serialize( + ConstAggregateDataPtr place, + WriteBuffer & buf + ) const override + { + this->data(place).serialize(buf); + } + + void deserialize( + AggregateDataPtr place, + ReadBuffer & buf, Arena * + ) const override + { + this->data(place).deserialize(buf); + } + + DataTypePtr getReturnType() const override + { + DataTypes types { + std::make_shared( + std::make_shared() + ), + std::make_shared( + std::make_shared() + ), + }; + + Strings names { + "k", + "b", + }; + + return std::make_shared( + std::move(types), + std::move(names) + ); + } + + void insertResultInto( + ConstAggregateDataPtr place, + IColumn & to + ) const override + { + Ret k = this->data(place).getK(); + Ret b = this->data(place).getB(k); + + Tuple result; + result.toUnderType().reserve(2); + + result.toUnderType().emplace_back(k); + result.toUnderType().emplace_back(b); + + to.insert(std::move(result)); + } + + String getName() const override { return "leastSqr"; } + const char * getHeaderFilePath() const override { return __FILE__; } +}; + +} diff --git a/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp b/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp index 0ef138119f9..2d5a0eafc07 100644 --- a/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp +++ b/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp @@ -29,6 +29,7 @@ void registerAggregateFunctionsBitwise(AggregateFunctionFactory &); void registerAggregateFunctionsBitmap(AggregateFunctionFactory &); void registerAggregateFunctionsMaxIntersections(AggregateFunctionFactory &); void registerAggregateFunctionEntropy(AggregateFunctionFactory &); +void registerAggregateFunctionLeastSqr(AggregateFunctionFactory &); void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &); void registerAggregateFunctionCombinatorArray(AggregateFunctionCombinatorFactory &); @@ -69,6 +70,7 @@ void registerAggregateFunctions() registerAggregateFunctionHistogram(factory); registerAggregateFunctionRetention(factory); registerAggregateFunctionEntropy(factory); + registerAggregateFunctionLeastSqr(factory); } { diff --git a/dbms/tests/queries/0_stateless/00917_least_sqr.reference b/dbms/tests/queries/0_stateless/00917_least_sqr.reference new file mode 100644 index 00000000000..89d168b03bb --- /dev/null +++ b/dbms/tests/queries/0_stateless/00917_least_sqr.reference @@ -0,0 +1,7 @@ +(10,90) +(10.3,89.5) +(10,-90) +(1,1) +(nan,nan) +(0,3) +(nan,nan) diff --git a/dbms/tests/queries/0_stateless/00917_least_sqr.sql b/dbms/tests/queries/0_stateless/00917_least_sqr.sql new file mode 100644 index 00000000000..80f28a6abd9 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00917_least_sqr.sql @@ -0,0 +1,7 @@ +select arrayReduce('leastSqr', [1, 2, 3, 4], [100, 110, 120, 130]); +select arrayReduce('leastSqr', [1, 2, 3, 4], [100, 110, 120, 131]); +select arrayReduce('leastSqr', [-1, -2, -3, -4], [-100, -110, -120, -130]); +select arrayReduce('leastSqr', [5, 5.1], [6, 6.1]); +select arrayReduce('leastSqr', [0], [0]); +select arrayReduce('leastSqr', [3, 4], [3, 3]); +select arrayReduce('leastSqr', [3, 3], [3, 4]); From 06bb9ad833ae69eb561e61eeb7dc6503672ca6d3 Mon Sep 17 00:00:00 2001 From: hcz Date: Fri, 22 Mar 2019 15:57:17 +0800 Subject: [PATCH 002/102] Update AggregateFunctionLeastSqr.cpp --- .../AggregateFunctionLeastSqr.cpp | 109 +++++++----------- 1 file changed, 41 insertions(+), 68 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.cpp b/dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.cpp index 1cb213b6360..a76a22b3d92 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.cpp @@ -25,81 +25,54 @@ AggregateFunctionPtr createAggregateFunctionLeastSqr( x_arg }; - if ( - !which_x.isNativeUInt() - && !which_x.isNativeInt() - && !which_x.isFloat() - ) - throw Exception { - "Illegal type " + x_arg->getName() - + " of first argument of aggregate function " - + name + ", must be Native Int, Native UInt or Float", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT - }; - const IDataType * y_arg = arguments.back().get(); WhichDataType which_y { y_arg }; - if ( - !which_y.isNativeUInt() - && !which_y.isNativeInt() - && !which_y.isFloat() - ) - throw Exception { - "Illegal type " + y_arg->getName() - + " of second argument of aggregate function " - + name + ", must be Native Int, Native UInt or Float", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT - }; + #define FOR_LEASTSQR_TYPES_2(M, T) \ + M(T, UInt8) \ + M(T, UInt16) \ + M(T, UInt32) \ + M(T, UInt64) \ + M(T, Int8) \ + M(T, Int16) \ + M(T, Int32) \ + M(T, Int64) \ + M(T, Float32) \ + M(T, Float64) + #define FOR_LEASTSQR_TYPES(M) \ + FOR_LEASTSQR_TYPES_2(M, UInt8) \ + FOR_LEASTSQR_TYPES_2(M, UInt16) \ + FOR_LEASTSQR_TYPES_2(M, UInt32) \ + FOR_LEASTSQR_TYPES_2(M, UInt64) \ + FOR_LEASTSQR_TYPES_2(M, Int8) \ + FOR_LEASTSQR_TYPES_2(M, Int16) \ + FOR_LEASTSQR_TYPES_2(M, Int32) \ + FOR_LEASTSQR_TYPES_2(M, Int64) \ + FOR_LEASTSQR_TYPES_2(M, Float32) \ + FOR_LEASTSQR_TYPES_2(M, Float64) + #define DISPATCH(T1, T2) \ + if (which_x.idx == TypeIndex::T1 && which_y.idx == TypeIndex::T2) \ + return std::make_shared>( \ + arguments, \ + params \ + ); - if (which_x.isNativeUInt() && which_y.isNativeUInt()) - return std::make_shared>( - arguments, - params - ); - else if (which_x.isNativeUInt() && which_y.isNativeInt()) - return std::make_shared>( - arguments, - params - ); - else if (which_x.isNativeUInt() && which_y.isFloat()) - return std::make_shared>( - arguments, - params - ); - else if (which_x.isNativeInt() && which_y.isNativeUInt()) - return std::make_shared>( - arguments, - params - ); - else if (which_x.isNativeInt() && which_y.isNativeInt()) - return std::make_shared>( - arguments, - params - ); - else if (which_x.isNativeInt() && which_y.isFloat()) - return std::make_shared>( - arguments, - params - ); - else if (which_x.isFloat() && which_y.isNativeUInt()) - return std::make_shared>( - arguments, - params - ); - else if (which_x.isFloat() && which_y.isNativeInt()) - return std::make_shared>( - arguments, - params - ); - else // if (which_x.isFloat() && which_y.isFloat()) - return std::make_shared>( - arguments, - params - ); + FOR_LEASTSQR_TYPES(DISPATCH) + + #undef FOR_LEASTSQR_TYPES_2 + #undef FOR_LEASTSQR_TYPES + #undef DISPATCH + + throw Exception { + "Illegal types (" + + x_arg->getName() + ", " + y_arg->getName() + + ") of arguments of aggregate function " + name + + ", must be Native Ints, Native UInts or Floats", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT + }; } } From 38ea75748cb3702b28b8691ba2b4a152419b6998 Mon Sep 17 00:00:00 2001 From: hcz Date: Fri, 22 Mar 2019 15:57:33 +0800 Subject: [PATCH 003/102] Update AggregateFunctionLeastSqr.h --- .../AggregateFunctionLeastSqr.h | 28 ++++++++++++++----- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.h b/dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.h index c527e34588d..e1a57961af0 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include #include @@ -18,7 +18,7 @@ namespace ErrorCodes } template -struct AggregateFunctionLeastSqrData +struct AggregateFunctionLeastSqrData final { size_t count = 0; Ret sum_x = 0; @@ -93,6 +93,16 @@ public: // notice: arguments has been checked before } + String getName() const override + { + return "leastSqr"; + } + + const char * getHeaderFilePath() const override + { + return __FILE__; + } + void add( AggregateDataPtr place, const IColumn ** columns, @@ -100,8 +110,15 @@ public: Arena * ) const override { - X x = (*columns[0])[row_num].template get(); - Y y = (*columns[1])[row_num].template get(); + auto col_x { + static_cast *>(columns[0]) + }; + auto col_y { + static_cast *>(columns[1]) + }; + + X x = col_x->getData()[row_num]; + Y y = col_y->getData()[row_num]; this->data(place).add(x, y); } @@ -168,9 +185,6 @@ public: to.insert(std::move(result)); } - - String getName() const override { return "leastSqr"; } - const char * getHeaderFilePath() const override { return __FILE__; } }; } From b74d3501e43d2afe3a1fa7774f6744568fa66197 Mon Sep 17 00:00:00 2001 From: hcz Date: Sat, 23 Mar 2019 20:20:40 +0800 Subject: [PATCH 004/102] Fix style error --- dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.cpp b/dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.cpp index a76a22b3d92..18474a7a7d4 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.cpp @@ -66,13 +66,13 @@ AggregateFunctionPtr createAggregateFunctionLeastSqr( #undef FOR_LEASTSQR_TYPES #undef DISPATCH - throw Exception { + throw Exception( "Illegal types (" + x_arg->getName() + ", " + y_arg->getName() + ") of arguments of aggregate function " + name + ", must be Native Ints, Native UInts or Floats", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT - }; + ); } } From 9ee019d9751ca435be970f275dbb3d71ce94d9f0 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 26 Mar 2019 12:28:08 +0300 Subject: [PATCH 005/102] CLICKHOUSE-3006: get rid of nodejs during website build --- docs/tools/README.md | 2 +- docs/tools/build.py | 18 +++- {website => docs/tools}/release.sh | 10 +- docs/tools/requirements.txt | 6 +- docs/tools/website.py | 45 +++++++++ website/Dockerfile | 2 +- website/README.md | 15 +-- website/gulpfile.js | 154 ----------------------------- website/setup_gulp.sh | 3 - 9 files changed, 73 insertions(+), 182 deletions(-) rename {website => docs/tools}/release.sh (87%) create mode 100644 docs/tools/website.py delete mode 100644 website/gulpfile.js delete mode 100755 website/setup_gulp.sh diff --git a/docs/tools/README.md b/docs/tools/README.md index 9b4dd26dd14..6332a22f164 100644 --- a/docs/tools/README.md +++ b/docs/tools/README.md @@ -2,7 +2,7 @@ ClickHouse documentation is built using [build.py](build.py) script that uses [mkdocs](https://www.mkdocs.org) library and it's dependencies to separately build all version of documentations (all languages in either single and multi page mode) as static HTMLs. The results are then put in correct directory structure. It can also generate PDF version. -Finally [the infrustructure](../website) that builds ClickHouse [official website](https://clickhouse.yandex) just puts that directory structure into the same Docker container together with rest of website and deploys it to Yandex private cloud. +[release.sh](release.sh) also pulls static files needed for [official ClickHouse website](https://clickhouse.yandex) from [../../website](../../website) folder, packs them alongside docs into Docker container and tries to deploy it (possible only from Yandex private network). ## How to check if the documentation will look fine? diff --git a/docs/tools/build.py b/docs/tools/build.py index e7e1c777b42..ff89b437ffc 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -21,6 +21,7 @@ from mkdocs import exceptions from mkdocs.commands import build as mkdocs_build from concatenate import concatenate +from website import build_website, minify_website import mdx_clickhouse import test @@ -96,7 +97,7 @@ def build_for_lang(lang, args): site_name=site_names.get(lang, site_names['en']), site_url='https://clickhouse.yandex/docs/%s/' % lang, docs_dir=os.path.join(args.docs_dir, lang), - site_dir=os.path.join(args.output_dir, lang), + site_dir=os.path.join(args.docs_output_dir, lang), strict=True, theme=theme_cfg, copyright='©2016–2019 Yandex LLC', @@ -168,7 +169,7 @@ def build_single_page_version(lang, args, cfg): mkdocs_build.build(cfg) - single_page_output_path = os.path.join(args.docs_dir, args.output_dir, lang, 'single') + single_page_output_path = os.path.join(args.docs_dir, args.docs_output_dir, lang, 'single') if os.path.exists(single_page_output_path): shutil.rmtree(single_page_output_path) @@ -212,29 +213,40 @@ def build_redirects(args): to_path = '/docs/$1/' + to_path.replace('.md', '/') rewrites.append(' '.join(['rewrite', from_path, to_path, 'permanent;'])) - with open(os.path.join(args.output_dir, 'redirects.conf'), 'w') as f: + with open(os.path.join(args.docs_output_dir, 'redirects.conf'), 'w') as f: f.write('\n'.join(rewrites)) def build(args): + if os.path.exists(args.output_dir): + shutil.rmtree(args.output_dir) + + if not args.skip_website: + build_website(args) + for lang in args.lang.split(','): build_for_lang(lang, args) build_redirects(args) + if not args.skip_website: + minify_website(args) if __name__ == '__main__': arg_parser = argparse.ArgumentParser() arg_parser.add_argument('--lang', default='en,ru,zh,fa') arg_parser.add_argument('--docs-dir', default='.') arg_parser.add_argument('--theme-dir', default='mkdocs-material-theme') + arg_parser.add_argument('--website-dir', default=os.path.join('..', 'website')) arg_parser.add_argument('--output-dir', default='build') arg_parser.add_argument('--skip-single-page', action='store_true') arg_parser.add_argument('--skip-pdf', action='store_true') + arg_parser.add_argument('--skip-website', action='store_true') arg_parser.add_argument('--save-raw-single-page', type=str) arg_parser.add_argument('--verbose', action='store_true') args = arg_parser.parse_args() + args.docs_output_dir = os.path.join(args.output_dir, 'docs') os.chdir(os.path.join(os.path.dirname(__file__), '..')) logging.basicConfig( diff --git a/website/release.sh b/docs/tools/release.sh similarity index 87% rename from website/release.sh rename to docs/tools/release.sh index 83e25563a57..e671dd8cea0 100755 --- a/website/release.sh +++ b/docs/tools/release.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -ex BASE_DIR=$(dirname $(readlink -f $0)) -cd "${BASE_DIR}" +BUILD_DIR="${BASE_DIR}/../build" IMAGE="clickhouse/website" if [[ -z "$1" ]] then @@ -12,12 +12,12 @@ fi FULL_NAME="${IMAGE}:${TAG}" REMOTE_NAME="registry.yandex.net/${FULL_NAME}" DOCKER_HASH="$2" -GULP="$BASE_DIR/node_modules/gulp/bin/gulp.js" if [[ -z "$1" ]] then - $GULP clean - $GULP build - docker build -t "${FULL_NAME}" "${BASE_DIR}" + source "${BASE_DIR}/venv/bin/activate" + python "${BASE_DIR}/build.py" + cd "${BUILD_DIR}" + docker build -t "${FULL_NAME}" "${BUILD_DIR}" docker tag "${FULL_NAME}" "${REMOTE_NAME}" DOCKER_HASH=$(docker push "${REMOTE_NAME}" | tail -1 | awk '{print $3;}') docker rmi "${FULL_NAME}" diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index 85cd355dbdc..7976a01043c 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -6,11 +6,14 @@ certifi==2017.11.5 chardet==3.0.4 click==6.7 CommonMark==0.5.4 +cssmin==0.2.0 docutils==0.14 futures==3.2.0 +htmlmin==0.1.12 idna==2.6 imagesize==0.7.1 Jinja2==2.10 +jsmin==2.2.2 livereload==2.5.1 Markdown==2.6.11 MarkupSafe==1.0 @@ -18,7 +21,7 @@ mkdocs==1.0.4 Pygments==2.2.0 python-slugify==1.2.6 pytz==2017.3 -PyYAML==4.2b1 +PyYAML==3.12 recommonmark==0.4.0 requests==2.21.0 singledispatch==3.4.0.3 @@ -30,3 +33,4 @@ tornado==5.1 typing==3.6.2 Unidecode==1.0.23 urllib3==1.23 +webassets==0.12.1 diff --git a/docs/tools/website.py b/docs/tools/website.py new file mode 100644 index 00000000000..0605058a6c6 --- /dev/null +++ b/docs/tools/website.py @@ -0,0 +1,45 @@ +import logging +import os +import shutil + +import cssmin +import htmlmin +import jsmin + +def build_website(args): + logging.info('Building website') + shutil.copytree( + args.website_dir, + args.output_dir, + ignore=shutil.ignore_patterns( + '*.md', + '*.sh', + 'build', + 'docs', + 'public', + 'node_modules' + ) + ) + +def minify_website(args): + for root, _, filenames in os.walk(args.output_dir): + for filename in filenames: + path = os.path.join(root, filename) + if not ( + filename.endswith('.html') or + filename.endswith('.css') or + filename.endswith('.js') + ): + continue + + logging.info('Minifying %s', path) + with open(path, 'r') as f: + content = f.read().decode('utf-8') + if filename.endswith('.html'): + content = htmlmin.minify(content, remove_empty_space=False) + elif filename.endswith('.css'): + content = cssmin.cssmin(content) + elif filename.endswith('.js'): + content = jsmin.jsmin(content) + with open(path, 'w') as f: + f.write(content.encode('utf-8')) diff --git a/website/Dockerfile b/website/Dockerfile index b66e0c8da34..64eb0ce5e33 100644 --- a/website/Dockerfile +++ b/website/Dockerfile @@ -1,4 +1,4 @@ FROM nginx:mainline -COPY public /usr/share/nginx/html +COPY . /usr/share/nginx/html COPY nginx/nginx.conf /etc/nginx/nginx.conf COPY nginx/default.conf /etc/nginx/conf.d/default.conf diff --git a/website/README.md b/website/README.md index d6abca119c2..26bb1dceab5 100644 --- a/website/README.md +++ b/website/README.md @@ -1,15 +1,2 @@ -ClickHouse website quickstart: +ClickHouse website is built alongside it's documentation via [docs/tools](https://github.com/yandex/ClickHouse/tree/master/docs/tools), see [README.md there](https://github.com/yandex/ClickHouse/tree/master/docs/tools/README.md). -On Linux, do the following: -``` -sudo apt-get install nodejs -sudo ln -s /usr/bin/nodejs /usr/bin/node -sudo npm install gulp-cli -g -sudo npm install gulp -D -``` - -1. Make sure you have `npm`, `docker` and `python` installed and available in your `$PATH`. -2. Run `setup\_gulp.sh` once to install build prerequisites via npm. -3. Use `gulp build` to minify website to "public" subfolder or just `gulp` to run local webserver with livereload serving it (note: livereload browser extension is required to make it actually reload pages on edits automatically). -4. There's Dockerfile that can be used to build and run ClickHouse website inside docker. -5. Deployment to https://clickhouse.yandex/ is managed by `release.sh`, but it is only usable from inside Yandex private network. diff --git a/website/gulpfile.js b/website/gulpfile.js deleted file mode 100644 index ca254bf681f..00000000000 --- a/website/gulpfile.js +++ /dev/null @@ -1,154 +0,0 @@ -var gulp = require('gulp'); -var concat = require('gulp-concat'); -var uglify = require('gulp-uglify'); -var cleanCss = require('gulp-clean-css'); -var imagemin = require('gulp-imagemin'); -var sourcemaps = require('gulp-sourcemaps'); -var htmlmin = require('gulp-htmlmin'); -var minifyInline = require('gulp-minify-inline'); -var del = require('del'); -var connect = require('gulp-connect'); -var run = require('gulp-run'); - -var outputDir = 'public'; -var docsDir = '../docs'; - -var paths = { - htmls: [ - '**/*.html', - '!deprecated/reference_ru.html', - '!deprecated/reference_en.html', - '!node_modules/**/*.html', - '!presentations/**/*.html', - '!public/**/*.html'], - reference: ['deprecated/reference_ru.html', 'deprecated/reference_en.html'], - docs: [docsDir + '/build/**/*'], - docstxt: ['docs/**/*.txt', 'docs/redirects.conf'], - docsjson: ['docs/**/*.json'], - docsxml: ['docs/**/*.xml'], - docspdf: ['docs/**/*.pdf'], - docssitemap: ['sitemap.xml', 'sitemap_static.xml'], - scripts: [ - '**/*.js', - '!gulpfile.js', - '!node_modules/**/*.js', - '!presentations/**/*.js', - '!public/**/*.js'], - styles: [ - '**/*.css', - '!node_modules/**/*.css', - '!presentations/**/*.css', - '!public/**/*.css'], - images: [ - '**/*.{jpg,jpeg,png,gif,svg,ico}', - '!node_modules/**/*.{jpg,jpeg,png,gif,svg,ico}', - '!presentations/**/*.{jpg,jpeg,png,gif,svg,ico}', - '!public/**/*.{jpg,jpeg,png,gif,svg,ico}'], - robotstxt: ['robots.txt'], - presentations: ['presentations/**/*'] -}; - -gulp.task('clean', function () { - return del([outputDir + '/**']); -}); - -gulp.task('reference', [], function () { - return gulp.src(paths.reference) - .pipe(minifyInline()) - .pipe(gulp.dest(outputDir + '/deprecated')) -}); - -gulp.task('docs', [], function () { - run('cd ' + docsDir + '/tools; ./build.py'); - return gulp.src(paths.docs) - .pipe(gulp.dest(outputDir + '/../docs')) -}); - -gulp.task('docstxt', ['docs'], function () { - return gulp.src(paths.docstxt) - .pipe(gulp.dest(outputDir + '/docs')) -}); - -gulp.task('docsjson', ['docs'], function () { - return gulp.src(paths.docsjson) - .pipe(gulp.dest(outputDir + '/docs')) -}); - -gulp.task('docsxml', ['docs'], function () { - return gulp.src(paths.docsxml) - .pipe(gulp.dest(outputDir + '/docs')) -}); - -gulp.task('docspdf', ['docs'], function () { - return gulp.src(paths.docspdf) - .pipe(gulp.dest(outputDir + '/docs')) -}); - -gulp.task('docssitemap', [], function () { - return gulp.src(paths.docssitemap) - .pipe(gulp.dest(outputDir + '/docs')) -}); - -gulp.task('presentations', [], function () { - return gulp.src(paths.presentations) - .pipe(gulp.dest(outputDir + '/presentations')) -}); - -gulp.task('robotstxt', [], function () { - return gulp.src(paths.robotstxt) - .pipe(gulp.dest(outputDir)) -}); - -gulp.task('htmls', ['docs', 'docstxt', 'docsjson', 'docsxml', 'docspdf', 'docssitemap'], function () { - return gulp.src(paths.htmls) - .pipe(htmlmin({collapseWhitespace: true})) - .pipe(minifyInline()) - .pipe(gulp.dest(outputDir)) -}); - -gulp.task('sourcemaps', ['docs'], function () { - return gulp.src(paths.scripts) - .pipe(sourcemaps.init()) - .pipe(uglify()) - .pipe(sourcemaps.write()) - .pipe(gulp.dest(outputDir)) -}); - -gulp.task('scripts', ['docs'], function () { - return gulp.src(paths.scripts) - .pipe(uglify()) - .pipe(gulp.dest(outputDir)) -}); - -gulp.task('styles', ['docs'], function () { - return gulp.src(paths.styles) - .pipe(cleanCss()) - .pipe(gulp.dest(outputDir)) -}); - -gulp.task('images', ['docs'], function () { - return gulp.src(paths.images) - .pipe(imagemin({optimizationLevel: 9})) - .pipe(gulp.dest(outputDir)) -}); - -gulp.task('watch', function () { - gulp.watch(paths.htmls, ['htmls']); - gulp.watch(paths.docs, ['docs']); - gulp.watch(paths.reference, ['reference']); - gulp.watch(paths.scripts, ['scripts']); - gulp.watch(paths.images, ['images']); -}); - -gulp.task('connect', function() { - connect.server({ - root: outputDir, - port: 8080, - keepalive: true, - livereload: true - }) -}); - -gulp.task('build', ['htmls', 'robotstxt', 'reference', 'scripts', 'styles', 'images', 'presentations']); - -gulp.task('default', ['build', 'connect']); diff --git a/website/setup_gulp.sh b/website/setup_gulp.sh deleted file mode 100755 index 06398ccc3e4..00000000000 --- a/website/setup_gulp.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/bin/env bash -set -ex -grep require gulpfile.js | awk -F\' '{print $2;}' | xargs npm install From 876cab387272af2f54e7cdd079f5228f9077da61 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 26 Mar 2019 12:49:32 +0300 Subject: [PATCH 006/102] webassets is not actually used --- docs/tools/requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index 7976a01043c..f0df3b8ff36 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -33,4 +33,3 @@ tornado==5.1 typing==3.6.2 Unidecode==1.0.23 urllib3==1.23 -webassets==0.12.1 From 3edcf1429a3d6403b6b36a20dc47cff125799e6a Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 26 Mar 2019 16:28:09 +0300 Subject: [PATCH 007/102] Website Dockerfile fix --- website/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/Dockerfile b/website/Dockerfile index 64eb0ce5e33..ee4f9ffccdc 100644 --- a/website/Dockerfile +++ b/website/Dockerfile @@ -1,4 +1,4 @@ FROM nginx:mainline -COPY . /usr/share/nginx/html +COPY . /usr/share/nginx/html/public COPY nginx/nginx.conf /etc/nginx/nginx.conf COPY nginx/default.conf /etc/nginx/conf.d/default.conf From 0f4fc1e4f84fa87e5c78fd4287f9bc3913a269a4 Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 28 Mar 2019 13:36:49 +0300 Subject: [PATCH 008/102] Try use CMAKE_LINK_DEPENDS_NO_SHARED --- CMakeLists.txt | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 750a3686c1d..b5f2a88f702 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,8 +1,11 @@ -project (ClickHouse) -cmake_minimum_required (VERSION 3.3) +project(ClickHouse) +cmake_minimum_required(VERSION 3.3) cmake_policy(SET CMP0023 NEW) - set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/") +set(CMAKE_EXPORT_COMPILE_COMMANDS 1) # Write compile_commands.json +set(CMAKE_LINK_DEPENDS_NO_SHARED 1) # Do not relink all depended targets on .so +set(CMAKE_CONFIGURATION_TYPES "RelWithDebInfo;Debug;Release;MinSizeRel" CACHE STRING "" FORCE) +set(CMAKE_DEBUG_POSTFIX "d" CACHE STRING "Generate debug library name with a postfix.") # To be consistent with CMakeLists from contrib libs. option(ENABLE_IPO "Enable inter-procedural optimization (aka LTO)" OFF) # need cmake 3.9+ if(ENABLE_IPO) @@ -38,9 +41,6 @@ if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git" AND NOT EXISTS "${ClickHouse_SOURC message (FATAL_ERROR "Submodules are not initialized. Run\n\tgit submodule update --init --recursive") endif () -# Write compile_commands.json -set(CMAKE_EXPORT_COMPILE_COMMANDS 1) - include (cmake/find_ccache.cmake) if (NOT CMAKE_BUILD_TYPE OR CMAKE_BUILD_TYPE STREQUAL "None") @@ -50,8 +50,6 @@ endif () string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC) message (STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}") -set (CMAKE_CONFIGURATION_TYPES "RelWithDebInfo;Debug;Release;MinSizeRel" CACHE STRING "" FORCE) -set (CMAKE_DEBUG_POSTFIX "d" CACHE STRING "Generate debug library name with a postfix.") # To be consistent with CMakeLists from contrib libs. option (USE_STATIC_LIBRARIES "Set to FALSE to use shared libraries" ON) option (MAKE_STATIC_LIBRARIES "Set to FALSE to make shared libraries" ${USE_STATIC_LIBRARIES}) From 8784a4407b52d303dc35b35372207213a15c792e Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 29 Mar 2019 12:05:19 +0300 Subject: [PATCH 009/102] Update roadmap.md --- docs/en/roadmap.md | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/docs/en/roadmap.md b/docs/en/roadmap.md index b59b58ee8dd..34307e519b0 100644 --- a/docs/en/roadmap.md +++ b/docs/en/roadmap.md @@ -1,22 +1,16 @@ # Roadmap -## Q1 2019 - -- JOIN syntax compatible with SQL standard: - - Mutliple `JOIN`s in single `SELECT` -- Protobuf and Parquet input and output formats -- Import/export from HDFS and S3 -- Lower metadata size in ZooKeeper -- Adaptive index granularity for MergeTree engine family - ## Q2 2019 +- DDL for dictionaries +- Integration with S3-like object stores +- Multiple storages for hot/cold data, JBOD support + +## Q3 2019 + - JOIN execution improvements: - Distributed join not limited by memory - Resource pools for more precise distribution of cluster capacity between users - -## Q3 2019 - - Fine-grained authorization - Integration with external authentication services From 1eb5713a74d5376e2913a4d683ef06d1a2e003a7 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Mon, 25 Mar 2019 22:56:15 +0800 Subject: [PATCH 010/102] fix crash when predicate optimzer & join on --- dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp | 2 +- .../queries/0_stateless/00597_push_down_predicate.reference | 2 ++ dbms/tests/queries/0_stateless/00597_push_down_predicate.sql | 3 +++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 47ab1528c65..1e824d44e52 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -74,7 +74,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTIdentifier & identifier, ASTPtr &, IdentifierSemantic::setMembership(identifier, best_table_pos + 1); /// In case if column from the joined table are in source columns, change it's name to qualified. - if (best_table_pos && data.source_columns.count(identifier.shortName())) + if (best_table_pos && !data.source_columns.empty() && data.source_columns.count(identifier.shortName())) IdentifierSemantic::setNeedLongName(identifier, true); if (!data.tables.empty()) IdentifierSemantic::setColumnNormalName(identifier, data.tables[best_table_pos].first); diff --git a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference index 94adaffd52f..5fad6845a28 100644 --- a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference +++ b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference @@ -53,3 +53,5 @@ SELECT \n date, \n id, \n name, \n value, \n b.date, \n b.name 2000-01-01 1 test string 1 1 2000-01-01 test string 1 1 SELECT \n id, \n date, \n name, \n value\nFROM \n(\n SELECT \n toInt8(1) AS id, \n toDate(\'2000-01-01\') AS date\n FROM system.numbers \n LIMIT 1\n) \nANY LEFT JOIN \n(\n SELECT *\n FROM test.test \n WHERE date = toDate(\'2000-01-01\')\n) AS b USING (date, id)\nWHERE b.date = toDate(\'2000-01-01\') 1 2000-01-01 test string 1 1 +SELECT \n date, \n id, \n name, \n value, \n `b.date`, \n `b.id`, \n `b.name`, \n `b.value`\nFROM \n(\n SELECT \n date, \n id, \n name, \n value, \n b.date, \n b.id, \n b.name, \n b.value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test.test \n WHERE id = 1\n ) AS a \n ANY LEFT JOIN \n (\n SELECT *\n FROM test.test \n ) AS b ON id = b.id\n WHERE id = 1\n) \nWHERE id = 1 +2000-01-01 1 test string 1 1 2000-01-01 1 test string 1 1 diff --git a/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql b/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql index 4d3187c6338..c9618becaea 100644 --- a/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql +++ b/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql @@ -108,5 +108,8 @@ SELECT * FROM (SELECT * FROM test.test) ANY LEFT JOIN (SELECT * FROM test.test) ANALYZE SELECT * FROM (SELECT toInt8(1) AS id, toDate('2000-01-01') AS date FROM system.numbers LIMIT 1) ANY LEFT JOIN (SELECT * FROM test.test) AS b USING date, id WHERE b.date = toDate('2000-01-01'); SELECT * FROM (SELECT toInt8(1) AS id, toDate('2000-01-01') AS date FROM system.numbers LIMIT 1) ANY LEFT JOIN (SELECT * FROM test.test) AS b USING date, id WHERE b.date = toDate('2000-01-01'); +ANALYZE SELECT * FROM (SELECT * FROM (SELECT * FROM test.test) AS a ANY LEFT JOIN (SELECT * FROM test.test) AS b ON a.id = b.id) WHERE id = 1; +SELECT * FROM (SELECT * FROM (SELECT * FROM test.test) AS a ANY LEFT JOIN (SELECT * FROM test.test) AS b ON a.id = b.id) WHERE id = 1; + DROP TABLE IF EXISTS test.test; DROP TABLE IF EXISTS test.test_view; From bdb420cdfdb9c591ee0fed42cf86fb51c9f66553 Mon Sep 17 00:00:00 2001 From: qianlixiang Date: Fri, 29 Mar 2019 18:53:50 +0800 Subject: [PATCH 011/102] Fixed segment fault of arrayIntersect --- dbms/src/Functions/arrayIntersect.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/dbms/src/Functions/arrayIntersect.cpp b/dbms/src/Functions/arrayIntersect.cpp index c6cb02e6caf..51ae95cf707 100644 --- a/dbms/src/Functions/arrayIntersect.cpp +++ b/dbms/src/Functions/arrayIntersect.cpp @@ -277,7 +277,7 @@ void FunctionArrayIntersect::executeImpl(Block & block, const ColumnNumbers & ar const auto & return_type = block.getByPosition(result).type; auto return_type_array = checkAndGetDataType(return_type.get()); - if (!return_type) + if (!return_type_array) throw Exception{"Return type for function " + getName() + " must be array.", ErrorCodes::LOGICAL_ERROR}; const auto & nested_return_type = return_type_array->getNestedType(); @@ -393,6 +393,11 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable { bool current_has_nullable = false; size_t off = (*arrays.offsets[arg])[row]; + // const array has only one row + bool const_arg = arrays.is_const[arg]; + if (const_arg) + off = (*arrays.offsets[arg])[0]; + for (auto i : ext::range(prev_off[arg], off)) { if (arrays.null_maps[arg] && (*arrays.null_maps[arg])[i]) @@ -412,6 +417,9 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable } prev_off[arg] = off; + if (const_arg) + prev_off[arg] = 0; + if (!current_has_nullable) all_has_nullable = false; } From 0096ef35247b9bc8a603eaa76a2fa65efe396ce3 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Fri, 29 Mar 2019 22:25:37 +0800 Subject: [PATCH 012/102] try fix --- dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp | 3 ++- dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index b564c2cd52d..7d4a1437b7a 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -340,8 +340,9 @@ ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast std::vector tables = getDatabaseAndTables(*select_query, context.getCurrentDatabase()); /// TODO: get tables from evaluateAsterisk instead of tablesOnly() to extract asterisks in general way + NameSet source_columns; std::vector tables_with_columns = TranslateQualifiedNamesVisitor::Data::tablesOnly(tables); - TranslateQualifiedNamesVisitor::Data qn_visitor_data({}, tables_with_columns, false); + TranslateQualifiedNamesVisitor::Data qn_visitor_data(source_columns, tables_with_columns, false); TranslateQualifiedNamesVisitor(qn_visitor_data).visit(ast); QueryAliasesVisitor::Data query_aliases_data{aliases}; diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 1e824d44e52..01f564c11b3 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -16,6 +16,7 @@ #include #include #include +#include namespace DB @@ -74,7 +75,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTIdentifier & identifier, ASTPtr &, IdentifierSemantic::setMembership(identifier, best_table_pos + 1); /// In case if column from the joined table are in source columns, change it's name to qualified. - if (best_table_pos && !data.source_columns.empty() && data.source_columns.count(identifier.shortName())) + if (best_table_pos && data.source_columns.count(identifier.shortName())) IdentifierSemantic::setNeedLongName(identifier, true); if (!data.tables.empty()) IdentifierSemantic::setColumnNormalName(identifier, data.tables[best_table_pos].first); From a78e067c7044b480154ee3b55d7ab1047db0702a Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Tue, 26 Mar 2019 16:05:53 +0300 Subject: [PATCH 013/102] Allow empty row_delimiter when using Kafka. --- dbms/src/Core/SettingsCommon.cpp | 20 +++++++------------- dbms/src/Core/SettingsCommon.h | 3 --- dbms/src/IO/DelimitedReadBuffer.h | 2 +- 3 files changed, 8 insertions(+), 17 deletions(-) diff --git a/dbms/src/Core/SettingsCommon.cpp b/dbms/src/Core/SettingsCommon.cpp index a44f8bfd20e..950768d21db 100644 --- a/dbms/src/Core/SettingsCommon.cpp +++ b/dbms/src/Core/SettingsCommon.cpp @@ -533,12 +533,6 @@ void SettingString::write(WriteBuffer & buf) const } -void SettingChar::checkStringIsACharacter(const String & x) const -{ - if (x.size() != 1) - throw Exception("A setting's value string has to be an exactly one character long", ErrorCodes::SIZE_OF_FIXED_STRING_DOESNT_MATCH); -} - String SettingChar::toString() const { return String(1, value); @@ -552,9 +546,10 @@ void SettingChar::set(char x) void SettingChar::set(const String & x) { - checkStringIsACharacter(x); - value = x[0]; - changed = true; + if (x.size() > 1) + throw Exception("A setting's value string has to be an exactly one character long", ErrorCodes::SIZE_OF_FIXED_STRING_DOESNT_MATCH); + char c = (x.size() == 1) ? x[0] : '\0'; + set(c); } void SettingChar::set(const Field & x) @@ -565,10 +560,9 @@ void SettingChar::set(const Field & x) void SettingChar::set(ReadBuffer & buf) { - String x; - readBinary(x, buf); - checkStringIsACharacter(x); - set(x); + String s; + readBinary(s, buf); + set(s); } void SettingChar::write(WriteBuffer & buf) const diff --git a/dbms/src/Core/SettingsCommon.h b/dbms/src/Core/SettingsCommon.h index ff2c0cd9339..452161e1f94 100644 --- a/dbms/src/Core/SettingsCommon.h +++ b/dbms/src/Core/SettingsCommon.h @@ -335,9 +335,6 @@ struct SettingString struct SettingChar { -private: - void checkStringIsACharacter(const String & x) const; - public: char value; bool changed = false; diff --git a/dbms/src/IO/DelimitedReadBuffer.h b/dbms/src/IO/DelimitedReadBuffer.h index 0ad77f0d0ed..332fb4b3f77 100644 --- a/dbms/src/IO/DelimitedReadBuffer.h +++ b/dbms/src/IO/DelimitedReadBuffer.h @@ -36,7 +36,7 @@ protected: return false; BufferBase::set(buffer->position(), buffer->available(), 0); - put_delimiter = true; + put_delimiter = (delimiter != 0); } return true; From 08f85804d0630929efcbdd3f9b0286a611c86069 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Tue, 26 Mar 2019 16:07:00 +0300 Subject: [PATCH 014/102] Send query settings before data from client to server. Apply query settings only for the containing query (client). --- dbms/programs/client/Client.cpp | 39 ++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 24336822f88..3786ae7ee80 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -796,14 +796,33 @@ private: written_progress_chars = 0; written_first_block = false; - connection->forceConnected(); + { + /// Temporarily apply query settings to context. + std::optional old_settings; + SCOPE_EXIT({ if (old_settings) context.setSettings(*old_settings); }); + auto apply_query_settings = [&](const IAST & settings_ast) + { + if (!old_settings) + old_settings.emplace(context.getSettingsRef()); + for (const auto & change : settings_ast.as()->changes) + context.setSetting(change.name, change.value); + }; + const auto * insert = parsed_query->as(); + if (insert && insert->settings_ast) + apply_query_settings(*insert->settings_ast); + /// FIXME: try to prettify this cast using `as<>()` + const auto * with_output = dynamic_cast(parsed_query.get()); + if (with_output && with_output->settings_ast) + apply_query_settings(*with_output->settings_ast); - /// INSERT query for which data transfer is needed (not an INSERT SELECT) is processed separately. - const auto * insert_query = parsed_query->as(); - if (insert_query && !insert_query->select) - processInsertQuery(); - else - processOrdinaryQuery(); + connection->forceConnected(); + + /// INSERT query for which data transfer is needed (not an INSERT SELECT) is processed separately. + if (insert && !insert->select) + processInsertQuery(); + else + processOrdinaryQuery(); + } /// Do not change context (current DB, settings) in case of an exception. if (!got_exception) @@ -963,8 +982,6 @@ private: { if (!insert->format.empty()) current_format = insert->format; - if (insert->settings_ast) - InterpreterSetQuery(insert->settings_ast, context).executeForCurrentContext(); } BlockInputStreamPtr block_input = context.getInputFormat( @@ -1247,10 +1264,6 @@ private: const auto & id = query_with_output->format->as(); current_format = id.name; } - if (query_with_output->settings_ast) - { - InterpreterSetQuery(query_with_output->settings_ast, context).executeForCurrentContext(); - } } if (has_vertical_output_suffix) From 378c6f7a238f6d3074437d51100e07dd2b274caa Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Fri, 29 Mar 2019 17:37:08 +0300 Subject: [PATCH 015/102] Improve the error message when format schema isn't set. --- dbms/src/Formats/CapnProtoRowInputStream.cpp | 2 +- dbms/src/Formats/FormatSchemaInfo.cpp | 38 ++++++++++++-------- dbms/src/Formats/FormatSchemaInfo.h | 6 +--- dbms/src/Formats/ProtobufRowInputStream.cpp | 2 +- dbms/src/Formats/ProtobufRowOutputStream.cpp | 2 +- 5 files changed, 27 insertions(+), 23 deletions(-) diff --git a/dbms/src/Formats/CapnProtoRowInputStream.cpp b/dbms/src/Formats/CapnProtoRowInputStream.cpp index 692d1ec8dee..de5b54989c4 100644 --- a/dbms/src/Formats/CapnProtoRowInputStream.cpp +++ b/dbms/src/Formats/CapnProtoRowInputStream.cpp @@ -298,7 +298,7 @@ void registerInputFormatCapnProto(FormatFactory & factory) [](ReadBuffer & buf, const Block & sample, const Context & context, UInt64 max_block_size, const FormatSettings & settings) { return std::make_shared( - std::make_shared(buf, sample, FormatSchemaInfo(context, "capnp")), + std::make_shared(buf, sample, FormatSchemaInfo(context, "CapnProto")), sample, max_block_size, settings); diff --git a/dbms/src/Formats/FormatSchemaInfo.cpp b/dbms/src/Formats/FormatSchemaInfo.cpp index 5fcf1f981eb..f01dbe457db 100644 --- a/dbms/src/Formats/FormatSchemaInfo.cpp +++ b/dbms/src/Formats/FormatSchemaInfo.cpp @@ -11,20 +11,29 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -FormatSchemaInfo::FormatSchemaInfo(const Context & context, const String & schema_file_extension, bool schema_required) + +namespace +{ + String getFormatSchemaDefaultFileExtension(const String & format) + { + if (format == "Protobuf") + return "proto"; + else if (format == "CapnProto") + return "capnp"; + else + return ""; + } +} + + +FormatSchemaInfo::FormatSchemaInfo(const Context & context, const String & format) { String format_schema = context.getSettingsRef().format_schema.toString(); if (format_schema.empty()) - { - if (schema_required) - { - throw Exception( - "Format schema requires the 'format_schema' setting to have the 'schema_file:message_name' format" - + (schema_file_extension.empty() ? "" : ", e.g. 'schema." + schema_file_extension + ":Message'"), - ErrorCodes::BAD_ARGUMENTS); - } - return; - } + throw Exception( + "The format " + format + " requires a schema. The 'format_schema' setting should be set", ErrorCodes::BAD_ARGUMENTS); + + String default_file_extension = getFormatSchemaDefaultFileExtension(format); size_t colon_pos = format_schema.find(':'); Poco::Path path; @@ -33,12 +42,11 @@ FormatSchemaInfo::FormatSchemaInfo(const Context & context, const String & schem { throw Exception( "Format schema requires the 'format_schema' setting to have the 'schema_file:message_name' format" - + (schema_file_extension.empty() ? "" : ", e.g. 'schema." + schema_file_extension + ":Message'") + ". Got '" + format_schema + + (default_file_extension.empty() ? "" : ", e.g. 'schema." + default_file_extension + ":Message'") + ". Got '" + format_schema + "'", ErrorCodes::BAD_ARGUMENTS); } - is_null = false; message_name = format_schema.substr(colon_pos + 1); auto default_schema_directory = [&context]() @@ -51,8 +59,8 @@ FormatSchemaInfo::FormatSchemaInfo(const Context & context, const String & schem return context.hasGlobalContext() && (context.getGlobalContext().getApplicationType() == Context::ApplicationType::SERVER); }; - if (path.getExtension().empty() && !schema_file_extension.empty()) - path.setExtension(schema_file_extension); + if (path.getExtension().empty() && !default_file_extension.empty()) + path.setExtension(default_file_extension); if (path.isAbsolute()) { diff --git a/dbms/src/Formats/FormatSchemaInfo.h b/dbms/src/Formats/FormatSchemaInfo.h index 6ad9e6fb2b0..f7921b5f8e3 100644 --- a/dbms/src/Formats/FormatSchemaInfo.h +++ b/dbms/src/Formats/FormatSchemaInfo.h @@ -10,10 +10,7 @@ class Context; class FormatSchemaInfo { public: - FormatSchemaInfo() = default; - FormatSchemaInfo(const Context & context, const String & schema_file_extension = String(), bool schema_required = true); - - bool isNull() const { return is_null; } + FormatSchemaInfo(const Context & context, const String & format); /// Returns path to the schema file. const String & schemaPath() const { return schema_path; } @@ -26,7 +23,6 @@ public: const String & messageName() const { return message_name; } private: - bool is_null = true; String schema_path; String schema_directory; String message_name; diff --git a/dbms/src/Formats/ProtobufRowInputStream.cpp b/dbms/src/Formats/ProtobufRowInputStream.cpp index a05ef650b0c..86f2b15fc1c 100644 --- a/dbms/src/Formats/ProtobufRowInputStream.cpp +++ b/dbms/src/Formats/ProtobufRowInputStream.cpp @@ -75,7 +75,7 @@ void registerInputFormatProtobuf(FormatFactory & factory) const FormatSettings & settings) { return std::make_shared( - std::make_shared(buf, sample, FormatSchemaInfo(context, "proto")), + std::make_shared(buf, sample, FormatSchemaInfo(context, "Protobuf")), sample, max_block_size, settings); }); } diff --git a/dbms/src/Formats/ProtobufRowOutputStream.cpp b/dbms/src/Formats/ProtobufRowOutputStream.cpp index 3253755d970..791c419e150 100644 --- a/dbms/src/Formats/ProtobufRowOutputStream.cpp +++ b/dbms/src/Formats/ProtobufRowOutputStream.cpp @@ -38,7 +38,7 @@ void registerOutputFormatProtobuf(FormatFactory & factory) "Protobuf", [](WriteBuffer & buf, const Block & header, const Context & context, const FormatSettings &) { return std::make_shared( - std::make_shared(buf, header, FormatSchemaInfo(context, "proto")), header); + std::make_shared(buf, header, FormatSchemaInfo(context, "Protobuf")), header); }); } From ba58fbbbec102405c4908b7c83a0460396f63813 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Fri, 29 Mar 2019 16:53:00 +0300 Subject: [PATCH 016/102] Add a kafka test using protobuf format. --- .../helpers/docker_compose_kafka.yml | 19 +++-- dbms/tests/integration/image/Dockerfile | 2 +- .../format_schemas/kafka.proto | 6 ++ .../test_storage_kafka/kafka_pb2.py | 76 +++++++++++++++++++ .../integration/test_storage_kafka/test.py | 68 ++++++++++++++--- 5 files changed, 153 insertions(+), 18 deletions(-) create mode 100755 dbms/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/kafka.proto create mode 100644 dbms/tests/integration/test_storage_kafka/kafka_pb2.py diff --git a/dbms/tests/integration/helpers/docker_compose_kafka.yml b/dbms/tests/integration/helpers/docker_compose_kafka.yml index e896f3009be..bed537a9760 100644 --- a/dbms/tests/integration/helpers/docker_compose_kafka.yml +++ b/dbms/tests/integration/helpers/docker_compose_kafka.yml @@ -15,14 +15,17 @@ services: image: confluentinc/cp-kafka:4.1.0 hostname: kafka1 ports: - - "9092:9092" + - "9092:9092" environment: - KAFKA_ADVERTISED_LISTENERS: "PLAINTEXT://kafka1:9092" - KAFKA_ZOOKEEPER_CONNECT: "kafka_zookeeper:2181" - KAFKA_BROKER_ID: 1 - KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO" - KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 + KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:9092,OUTSIDE://kafka1:19092 + KAFKA_LISTENERS: INSIDE://:9092,OUTSIDE://:19092 + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT + KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE + KAFKA_BROKER_ID: 1 + KAFKA_ZOOKEEPER_CONNECT: "kafka_zookeeper:2181" + KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO" + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 depends_on: - - kafka_zookeeper + - kafka_zookeeper security_opt: - - label:disable + - label:disable diff --git a/dbms/tests/integration/image/Dockerfile b/dbms/tests/integration/image/Dockerfile index 1dd5c1713b2..d36f9ef0e7b 100644 --- a/dbms/tests/integration/image/Dockerfile +++ b/dbms/tests/integration/image/Dockerfile @@ -25,7 +25,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes - ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone -RUN pip install pytest docker-compose==1.22.0 docker dicttoxml kazoo PyMySQL psycopg2 pymongo tzlocal +RUN pip install pytest docker-compose==1.22.0 docker dicttoxml kazoo PyMySQL psycopg2 pymongo tzlocal kafka-python protobuf ENV DOCKER_CHANNEL stable ENV DOCKER_VERSION 17.09.1-ce diff --git a/dbms/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/kafka.proto b/dbms/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/kafka.proto new file mode 100755 index 00000000000..96b24be4938 --- /dev/null +++ b/dbms/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/kafka.proto @@ -0,0 +1,6 @@ +syntax = "proto3"; + +message KeyValuePair { + uint64 key = 1; + string value = 2; +} \ No newline at end of file diff --git a/dbms/tests/integration/test_storage_kafka/kafka_pb2.py b/dbms/tests/integration/test_storage_kafka/kafka_pb2.py new file mode 100644 index 00000000000..79890682125 --- /dev/null +++ b/dbms/tests/integration/test_storage_kafka/kafka_pb2.py @@ -0,0 +1,76 @@ +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: clickhouse_path/format_schemas/kafka.proto + +import sys +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database +from google.protobuf import descriptor_pb2 +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor.FileDescriptor( + name='clickhouse_path/format_schemas/kafka.proto', + package='', + syntax='proto3', + serialized_pb=_b('\n*clickhouse_path/format_schemas/kafka.proto\"*\n\x0cKeyValuePair\x12\x0b\n\x03key\x18\x01 \x01(\x04\x12\r\n\x05value\x18\x02 \x01(\tb\x06proto3') +) +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + + + + +_KEYVALUEPAIR = _descriptor.Descriptor( + name='KeyValuePair', + full_name='KeyValuePair', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='key', full_name='KeyValuePair.key', index=0, + number=1, type=4, cpp_type=4, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='value', full_name='KeyValuePair.value', index=1, + number=2, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=46, + serialized_end=88, +) + +DESCRIPTOR.message_types_by_name['KeyValuePair'] = _KEYVALUEPAIR + +KeyValuePair = _reflection.GeneratedProtocolMessageType('KeyValuePair', (_message.Message,), dict( + DESCRIPTOR = _KEYVALUEPAIR, + __module__ = 'clickhouse_path.format_schemas.kafka_pb2' + # @@protoc_insertion_point(class_scope:KeyValuePair) + )) +_sym_db.RegisterMessage(KeyValuePair) + + +# @@protoc_insertion_point(module_scope) diff --git a/dbms/tests/integration/test_storage_kafka/test.py b/dbms/tests/integration/test_storage_kafka/test.py index 27ae3c97536..ed4b6d14d25 100644 --- a/dbms/tests/integration/test_storage_kafka/test.py +++ b/dbms/tests/integration/test_storage_kafka/test.py @@ -7,6 +7,17 @@ from helpers.test_tools import TSV import json import subprocess +from kafka import KafkaProducer +from google.protobuf.internal.encoder import _VarintBytes + +""" +protoc --version +libprotoc 3.0.0 + +# to create kafka_pb2.py +protoc --python_out=. kafka.proto +""" +import kafka_pb2 # TODO: add test for run-time offset update in CH, if we manually update it on Kafka side. @@ -17,7 +28,8 @@ import subprocess cluster = ClickHouseCluster(__file__) instance = cluster.add_instance('instance', main_configs=['configs/kafka.xml'], - with_kafka=True) + with_kafka=True, + clickhouse_path_dir='clickhouse_path') kafka_id = '' @@ -30,7 +42,7 @@ def check_kafka_is_available(): kafka_id, '/usr/bin/kafka-broker-api-versions', '--bootstrap-server', - 'PLAINTEXT://localhost:9092'), + 'INSIDE://localhost:9092'), stdout=subprocess.PIPE) p.communicate() return p.returncode == 0 @@ -56,7 +68,7 @@ def kafka_produce(topic, messages): kafka_id, '/usr/bin/kafka-console-producer', '--broker-list', - 'localhost:9092', + 'INSIDE://localhost:9092', '--topic', topic, '--sync', @@ -65,7 +77,21 @@ def kafka_produce(topic, messages): stdin=subprocess.PIPE) p.communicate(messages) p.stdin.close() - print("Produced {} messages".format(len(messages.splitlines()))) + print("Produced {} messages for topic {}".format(len(messages.splitlines()), topic)) + + +def kafka_produce_protobuf_messages(topic, start_index, num_messages): + data = '' + for i in range(start_index, start_index + num_messages): + msg = kafka_pb2.KeyValuePair() + msg.key = i + msg.value = str(i) + serialized_msg = msg.SerializeToString() + data = data + _VarintBytes(len(serialized_msg)) + serialized_msg + producer = KafkaProducer(bootstrap_servers="localhost:9092") + producer.send(topic=topic, value=data) + producer.flush() + print("Produced {} messages for topic {}".format(num_messages, topic)) # Since everything is async and shaky when receiving messages from Kafka, @@ -110,7 +136,7 @@ def kafka_setup_teardown(): def test_kafka_settings_old_syntax(kafka_cluster): instance.query(''' CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka('kafka1:9092', 'old', 'old', 'JSONEachRow', '\\n'); + ENGINE = Kafka('kafka1:19092', 'old', 'old', 'JSONEachRow', '\\n'); ''') # Don't insert malformed messages since old settings syntax @@ -133,7 +159,7 @@ def test_kafka_settings_new_syntax(kafka_cluster): CREATE TABLE test.kafka (key UInt64, value UInt64) ENGINE = Kafka SETTINGS - kafka_broker_list = 'kafka1:9092', + kafka_broker_list = 'kafka1:19092', kafka_topic_list = 'new', kafka_group_name = 'new', kafka_format = 'JSONEachRow', @@ -168,7 +194,7 @@ def test_kafka_csv_with_delimiter(kafka_cluster): CREATE TABLE test.kafka (key UInt64, value UInt64) ENGINE = Kafka SETTINGS - kafka_broker_list = 'kafka1:9092', + kafka_broker_list = 'kafka1:19092', kafka_topic_list = 'csv', kafka_group_name = 'csv', kafka_format = 'CSV', @@ -193,7 +219,7 @@ def test_kafka_tsv_with_delimiter(kafka_cluster): CREATE TABLE test.kafka (key UInt64, value UInt64) ENGINE = Kafka SETTINGS - kafka_broker_list = 'kafka1:9092', + kafka_broker_list = 'kafka1:19092', kafka_topic_list = 'tsv', kafka_group_name = 'tsv', kafka_format = 'TSV', @@ -213,6 +239,30 @@ def test_kafka_tsv_with_delimiter(kafka_cluster): kafka_check_result(result, True) +def test_kafka_protobuf(kafka_cluster): + instance.query(''' + CREATE TABLE test.kafka (key UInt64, value String) + ENGINE = Kafka + SETTINGS + kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'pb', + kafka_group_name = 'pb', + kafka_format = 'Protobuf', + kafka_schema = 'kafka.proto:KeyValuePair'; + ''') + + kafka_produce_protobuf_messages('pb', 0, 20) + kafka_produce_protobuf_messages('pb', 20, 1) + kafka_produce_protobuf_messages('pb', 21, 29) + + result = '' + for i in range(50): + result += instance.query('SELECT * FROM test.kafka') + if kafka_check_result(result): + break + kafka_check_result(result, True) + + def test_kafka_materialized_view(kafka_cluster): instance.query(''' DROP TABLE IF EXISTS test.view; @@ -220,7 +270,7 @@ def test_kafka_materialized_view(kafka_cluster): CREATE TABLE test.kafka (key UInt64, value UInt64) ENGINE = Kafka SETTINGS - kafka_broker_list = 'kafka1:9092', + kafka_broker_list = 'kafka1:19092', kafka_topic_list = 'json', kafka_group_name = 'json', kafka_format = 'JSONEachRow', From 89515861dff53f74ea2ccd9660bc578373a7f588 Mon Sep 17 00:00:00 2001 From: Martijn Bakker Date: Fri, 29 Mar 2019 21:20:23 +0000 Subject: [PATCH 017/102] asof join without using std::map, but still only on u32 --- dbms/src/Interpreters/Join.cpp | 149 ++++++++++++++++++++++----------- dbms/src/Interpreters/Join.h | 66 ++++++++++++--- 2 files changed, 152 insertions(+), 63 deletions(-) diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index ddc2173d75b..f8af06bf6a8 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -298,12 +298,17 @@ void Join::setSampleBlock(const Block & block) if (kind != ASTTableJoin::Kind::Left and kind != ASTTableJoin::Kind::Inner) throw Exception("ASOF only supports LEFT and INNER as base joins", ErrorCodes::NOT_IMPLEMENTED); - if (key_columns.back()->sizeOfValueIfFixed() != sizeof(ASOFTimeType)) + const IColumn * asof_column = key_columns.back(); + + if (auto t = AsofRowRefs::getType(asof_column)) + asof_type = *t; + else { - std::string msg = "ASOF join column needs to have size "; - msg += std::to_string(sizeof(ASOFTimeType)); + std::string msg = "ASOF join not supported for type"; + msg += asof_column->getFamilyName(); throw Exception(msg, ErrorCodes::BAD_TYPE_OF_FIELD); } + key_columns.pop_back(); if (key_columns.empty()) @@ -314,7 +319,7 @@ void Join::setSampleBlock(const Block & block) /// Therefore, add it back in such that it can be extracted appropriately from the full stored /// key_columns and key_sizes init(chooseMethod(key_columns, key_sizes)); - key_sizes.push_back(sizeof(ASOFTimeType)); + key_sizes.push_back(AsofRowRefs::getSize(asof_type)); } else { @@ -357,34 +362,99 @@ void Join::setSampleBlock(const Block & block) convertColumnToNullable(sample_block_with_columns_to_add.getByPosition(i)); } -void Join::TSRowRef::insert(Join::ASOFTimeType t, const Block * block, size_t row_num) +void Join::AsofRowRefs::AsofLookups::create(Join::AsofRowRefs::AsofType which) { - ts.insert(std::pair(t, RowRef(block, row_num))); + switch (which) + { + #define M(NAME, TYPE) \ + case AsofType::NAME: NAME = std::make_unique(); break; + APPLY_FOR_ASOF_JOIN_VARIANTS(M) + #undef M + } } -std::string Join::TSRowRef::dumpStructure() const +void Join::AsofRowRefs::create(AsofType which) { - std::stringstream ss; + type = which; + lookups.create(which); +} - for (auto const& x : ts) +template +using AsofGetterType = ColumnsHashing::HashMethodOneNumber; + +void Join::AsofRowRefs::insert(const IColumn * asof_column, const Block * block, size_t row_num, Arena & pool) +{ + assert(!sorted); + switch (type) { - ss << "(t=" << x.first << " row_num=" << x.second.row_num << " ptr=" << x.second.block << "),"; + #define M(NAME, TYPE) \ + case AsofType::NAME: { \ + auto asof_getter = AsofGetterType(asof_column); \ + auto entry = AsofEntry(asof_getter.getKey(row_num, pool), RowRef(block, row_num)); \ + lookups.NAME->push_back(entry); \ + break; \ + } + APPLY_FOR_ASOF_JOIN_VARIANTS(M) + #undef M + } +} + +const Join::RowRef * Join::AsofRowRefs::findAsof(const IColumn * asof_column, size_t row_num, Arena & pool) const +{ + if (!sorted) + { + // sort whenever needed + switch (type) + { + #define M(NAME, TYPE) \ + case AsofType::NAME: std::sort(lookups.NAME->begin(), lookups.NAME->end()); break; + APPLY_FOR_ASOF_JOIN_VARIANTS(M) + #undef M + } + sorted = true; } - return ss.str(); + switch (type) + { + #define M(NAME, TYPE) \ + case AsofType::NAME: { \ + auto asof_getter = AsofGetterType(asof_column); \ + TYPE key = asof_getter.getKey(row_num, pool); \ + auto it = std::upper_bound(lookups.NAME->cbegin(), lookups.NAME->cend(), AsofEntry(key)); \ + if (it == lookups.NAME->cbegin()) \ + return nullptr; \ + return &((--it)->row_ref); \ + } + APPLY_FOR_ASOF_JOIN_VARIANTS(M) + #undef M + } + + __builtin_unreachable(); } -size_t Join::TSRowRef::size() const + +std::optional Join::AsofRowRefs::getType(const IColumn * asof_column) { - return ts.size(); + #define M(NAME, TYPE) \ + if (strcmp(#TYPE, asof_column->getFamilyName()) == 0) \ + return AsofType::NAME; + APPLY_FOR_ASOF_JOIN_VARIANTS(M) + #undef M + return {}; } -std::optional> Join::TSRowRef::findAsof(Join::ASOFTimeType t) const + +size_t Join::AsofRowRefs::getSize(Join::AsofRowRefs::AsofType type) { - auto it = ts.upper_bound(t); - if (it == ts.cbegin()) - return {}; - return *(--it); + switch (type) + { + #define M(NAME, TYPE) \ + case AsofType::NAME: return sizeof(TYPE); + APPLY_FOR_ASOF_JOIN_VARIANTS(M) + #undef M + } + __builtin_unreachable(); } + namespace { /// Inserting an element into a hash table of the form `key -> reference to a string`, which will then be used by JOIN. @@ -435,8 +505,7 @@ namespace template struct Inserter { - template - static ALWAYS_INLINE void insert(Map & map, KeyGetter & key_getter, AsofGetter & asof_getter, Block * stored_block, size_t i, Arena & pool) + static ALWAYS_INLINE void insert(Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool, const IColumn * asof_column) { auto emplace_result = key_getter.emplaceKey(map, i, pool); typename Map::mapped_type * time_series_map = &emplace_result.getMapped(); @@ -444,10 +513,11 @@ namespace if (emplace_result.isInserted()) { time_series_map = new (time_series_map) typename Map::mapped_type(); + // TODO extract this from either the column type or from the main join object + time_series_map->create(Join::AsofRowRefs::AsofType::key32); } - auto k = asof_getter.getKey(i, pool); - time_series_map->insert(k, stored_block, i); -// std::cout << "inserted key into time series map=" << k << " result=" << time_series_map->dumpStructure() << std::endl; + + time_series_map->insert(asof_column, stored_block, i, pool); } }; @@ -469,10 +539,8 @@ namespace continue; if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof) - { - auto asof_getter = Join::AsofGetterType(asof_column); - Inserter::insert(map, key_getter, asof_getter, stored_block, i, pool); - } else + Inserter::insert(map, key_getter, stored_block, i, pool, asof_column); + else Inserter::insert(map, key_getter, stored_block, i, pool); } } @@ -678,20 +746,6 @@ void addFoundRow(const typename Map::mapped_type & mapped, AddedColumns & added, } }; -template -bool addFoundRowAsof(const typename Map::mapped_type & mapped, AddedColumns & added, IColumn::Offset & current_offset [[maybe_unused]], Join::ASOFTimeType asof_key) -{ - if (auto v = mapped.findAsof(asof_key)) - { - std::pair res = *v; -// std::cout << "Adder::addFound" << " to_add" << num_columns_to_add << " i=" << i << " asof_key=" << asof_key << " found=" << res.first << std::endl; - added.appendFromBlock(*res.second.block, res.second.row_num); - return true; - } -// std::cout << "Adder::addFound" << " not found in map" << num_columns_to_add << " i=" << i << " asof_key=" << asof_key << std::endl; - return false; -} - template void addNotFoundRow(AddedColumns & added [[maybe_unused]], IColumn::Offset & current_offset [[maybe_unused]]) { @@ -739,19 +793,14 @@ std::unique_ptr NO_INLINE joinRightIndexedColumns( auto & mapped = find_result.getMapped(); if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof) - { - Join::AsofGetterType asof_getter(asof_column); - auto asof_key = asof_getter.getKey(i, pool); - bool actually_found = addFoundRowAsof(mapped, added_columns, current_offset, asof_key); - - if (actually_found) + if (const Join::RowRef * found = mapped.findAsof(asof_column, i, pool)) { - filter[i] = 1; - mapped.setUsed(); + filter[i] = 1; + mapped.setUsed(); + added_columns.appendFromBlock(*found->block, found->row_num); } else addNotFoundRow<_add_missing>(added_columns, current_offset); - } else { filter[i] = 1; diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index 426ea93a365..1b716d422ac 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -151,20 +151,59 @@ public: RowRefList(const Block * block_, size_t row_num_) : RowRef(block_, row_num_) {} }; - /// Map for a time series - using ASOFTimeType = UInt32; - using AsofGetterType = ColumnsHashing::HashMethodOneNumber; - struct TSRowRef + struct AsofRowRefs { - // TODO use the arena allocator to get memory for this - // This would require ditching std::map because std::allocator is incompatible with the arena allocator - std::map ts; + /// Different types of asof join keys + #define APPLY_FOR_ASOF_JOIN_VARIANTS(M) \ + M(key32, UInt32) \ + M(key64, UInt64) - TSRowRef() {} - void insert(ASOFTimeType t, const Block * block, size_t row_num); - std::optional> findAsof(ASOFTimeType t) const; - std::string dumpStructure() const; - size_t size() const; + enum class AsofType + { + #define M(NAME, TYPE) NAME, + APPLY_FOR_ASOF_JOIN_VARIANTS(M) + #undef M + }; + + static std::optional getType(const IColumn * asof_column); + static size_t getSize(AsofType type); + + template + struct AsofEntry + { + T asof_value; + RowRef row_ref; + + AsofEntry(T v) : asof_value(v) {} + AsofEntry(T v, RowRef rr) : asof_value(v), row_ref(rr) {} + + bool operator< (const AsofEntry& o) const + { + return asof_value < o.asof_value; + } + }; + + struct AsofLookups + { + #define M(NAME, TYPE) \ + std::unique_ptr>> NAME; + APPLY_FOR_ASOF_JOIN_VARIANTS(M) + #undef M + + void create(AsofType which); + }; + + AsofRowRefs() {} + + void create(AsofType which); + void insert(const IColumn * asof_column, const Block * block, size_t row_num, Arena & pool); + + const RowRef * findAsof(const IColumn * asof_column, size_t row_num, Arena & pool) const; + + private: + AsofType type; + mutable AsofLookups lookups; + mutable bool sorted = false; }; /** Depending on template parameter, adds or doesn't add a flag, that element was used (row was joined). @@ -297,7 +336,7 @@ public: using MapsAnyFull = MapsTemplate>; using MapsAnyFullOverwrite = MapsTemplate>; using MapsAllFull = MapsTemplate>; - using MapsAsof = MapsTemplate>; + using MapsAsof = MapsTemplate>; template struct KindTrait @@ -400,6 +439,7 @@ private: private: Type type = Type::EMPTY; + AsofRowRefs::AsofType asof_type; static Type chooseMethod(const ColumnRawPtrs & key_columns, Sizes & key_sizes); From 389f1088076d56a45def73198de8ff0d66974020 Mon Sep 17 00:00:00 2001 From: Martijn Bakker Date: Sat, 30 Mar 2019 01:32:08 +0000 Subject: [PATCH 018/102] working multi type asof join columns --- dbms/src/Interpreters/Join.cpp | 62 +++++++++++++++------------------- dbms/src/Interpreters/Join.h | 27 +++++++++------ 2 files changed, 44 insertions(+), 45 deletions(-) diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index f8af06bf6a8..d09e0f10a66 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -362,23 +362,18 @@ void Join::setSampleBlock(const Block & block) convertColumnToNullable(sample_block_with_columns_to_add.getByPosition(i)); } -void Join::AsofRowRefs::AsofLookups::create(Join::AsofRowRefs::AsofType which) +void Join::AsofRowRefs::Lookups::create(Join::AsofRowRefs::AsofType which) { switch (which) { + case AsofType::EMPTY: break; #define M(NAME, TYPE) \ - case AsofType::NAME: NAME = std::make_unique(); break; + case AsofType::NAME: NAME = std::make_unique(); break; APPLY_FOR_ASOF_JOIN_VARIANTS(M) #undef M } } -void Join::AsofRowRefs::create(AsofType which) -{ - type = which; - lookups.create(which); -} - template using AsofGetterType = ColumnsHashing::HashMethodOneNumber; @@ -387,13 +382,14 @@ void Join::AsofRowRefs::insert(const IColumn * asof_column, const Block * block, assert(!sorted); switch (type) { + case AsofType::EMPTY: break; #define M(NAME, TYPE) \ - case AsofType::NAME: { \ - auto asof_getter = AsofGetterType(asof_column); \ - auto entry = AsofEntry(asof_getter.getKey(row_num, pool), RowRef(block, row_num)); \ - lookups.NAME->push_back(entry); \ - break; \ - } + case AsofType::NAME: { \ + auto asof_getter = AsofGetterType(asof_column); \ + auto entry = Entry(asof_getter.getKey(row_num, pool), RowRef(block, row_num)); \ + lookups.NAME->push_back(entry); \ + break; \ + } APPLY_FOR_ASOF_JOIN_VARIANTS(M) #undef M } @@ -406,6 +402,7 @@ const Join::RowRef * Join::AsofRowRefs::findAsof(const IColumn * asof_column, si // sort whenever needed switch (type) { + case AsofType::EMPTY: break; #define M(NAME, TYPE) \ case AsofType::NAME: std::sort(lookups.NAME->begin(), lookups.NAME->end()); break; APPLY_FOR_ASOF_JOIN_VARIANTS(M) @@ -416,11 +413,12 @@ const Join::RowRef * Join::AsofRowRefs::findAsof(const IColumn * asof_column, si switch (type) { + case AsofType::EMPTY: return nullptr; #define M(NAME, TYPE) \ case AsofType::NAME: { \ auto asof_getter = AsofGetterType(asof_column); \ TYPE key = asof_getter.getKey(row_num, pool); \ - auto it = std::upper_bound(lookups.NAME->cbegin(), lookups.NAME->cend(), AsofEntry(key)); \ + auto it = std::upper_bound(lookups.NAME->cbegin(), lookups.NAME->cend(), Entry(key)); \ if (it == lookups.NAME->cbegin()) \ return nullptr; \ return &((--it)->row_ref); \ @@ -446,6 +444,7 @@ size_t Join::AsofRowRefs::getSize(Join::AsofRowRefs::AsofType type) { switch (type) { + case AsofType::EMPTY: return 0; #define M(NAME, TYPE) \ case AsofType::NAME: return sizeof(TYPE); APPLY_FOR_ASOF_JOIN_VARIANTS(M) @@ -461,13 +460,13 @@ namespace template struct Inserter { - static void insert(Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool); + static void insert(const Join *, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool); }; template struct Inserter { - static ALWAYS_INLINE void insert(Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool) + static ALWAYS_INLINE void insert(const Join *, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool) { auto emplace_result = key_getter.emplaceKey(map, i, pool); @@ -479,7 +478,7 @@ namespace template struct Inserter { - static ALWAYS_INLINE void insert(Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool) + static ALWAYS_INLINE void insert(const Join *, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool) { auto emplace_result = key_getter.emplaceKey(map, i, pool); @@ -505,18 +504,13 @@ namespace template struct Inserter { - static ALWAYS_INLINE void insert(Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool, const IColumn * asof_column) + static ALWAYS_INLINE void insert(const Join * join, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool, const IColumn * asof_column) { auto emplace_result = key_getter.emplaceKey(map, i, pool); typename Map::mapped_type * time_series_map = &emplace_result.getMapped(); if (emplace_result.isInserted()) - { - time_series_map = new (time_series_map) typename Map::mapped_type(); - // TODO extract this from either the column type or from the main join object - time_series_map->create(Join::AsofRowRefs::AsofType::key32); - } - + time_series_map = new (time_series_map) typename Map::mapped_type(join->getAsofType()); time_series_map->insert(asof_column, stored_block, i, pool); } }; @@ -524,7 +518,7 @@ namespace template void NO_INLINE insertFromBlockImplTypeCase( - Map & map, size_t rows, const ColumnRawPtrs & key_columns, + const Join * join, Map & map, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool) { const IColumn * asof_column [[maybe_unused]] = nullptr; @@ -539,28 +533,28 @@ namespace continue; if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof) - Inserter::insert(map, key_getter, stored_block, i, pool, asof_column); + Inserter::insert(join, map, key_getter, stored_block, i, pool, asof_column); else - Inserter::insert(map, key_getter, stored_block, i, pool); + Inserter::insert(join, map, key_getter, stored_block, i, pool); } } template void insertFromBlockImplType( - Map & map, size_t rows, const ColumnRawPtrs & key_columns, + const Join * join, Map & map, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool) { if (null_map) - insertFromBlockImplTypeCase(map, rows, key_columns, key_sizes, stored_block, null_map, pool); + insertFromBlockImplTypeCase(join, map, rows, key_columns, key_sizes, stored_block, null_map, pool); else - insertFromBlockImplTypeCase(map, rows, key_columns, key_sizes, stored_block, null_map, pool); + insertFromBlockImplTypeCase(join, map, rows, key_columns, key_sizes, stored_block, null_map, pool); } template void insertFromBlockImpl( - Join::Type type, Maps & maps, size_t rows, const ColumnRawPtrs & key_columns, + const Join * join, Join::Type type, Maps & maps, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool) { switch (type) @@ -571,7 +565,7 @@ namespace #define M(TYPE) \ case Join::Type::TYPE: \ insertFromBlockImplType>::Type>(\ - *maps.TYPE, rows, key_columns, key_sizes, stored_block, null_map, pool); \ + join, *maps.TYPE, rows, key_columns, key_sizes, stored_block, null_map, pool); \ break; APPLY_FOR_JOIN_VARIANTS(M) #undef M @@ -658,7 +652,7 @@ bool Join::insertFromBlock(const Block & block) { dispatch([&](auto, auto strictness_, auto & map) { - insertFromBlockImpl(type, map, rows, key_columns, key_sizes, stored_block, null_map, pool); + insertFromBlockImpl(this, type, map, rows, key_columns, key_sizes, stored_block, null_map, pool); }); } diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index 1b716d422ac..dcd4ea6e9d4 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -160,6 +160,7 @@ public: enum class AsofType { + EMPTY, #define M(NAME, TYPE) NAME, APPLY_FOR_ASOF_JOIN_VARIANTS(M) #undef M @@ -169,43 +170,47 @@ public: static size_t getSize(AsofType type); template - struct AsofEntry + struct Entry { T asof_value; RowRef row_ref; - AsofEntry(T v) : asof_value(v) {} - AsofEntry(T v, RowRef rr) : asof_value(v), row_ref(rr) {} + Entry(T v) : asof_value(v) {} + Entry(T v, RowRef rr) : asof_value(v), row_ref(rr) {} - bool operator< (const AsofEntry& o) const + bool operator< (const Entry& o) const { return asof_value < o.asof_value; } }; - struct AsofLookups + struct Lookups { #define M(NAME, TYPE) \ - std::unique_ptr>> NAME; + std::unique_ptr>> NAME; APPLY_FOR_ASOF_JOIN_VARIANTS(M) #undef M void create(AsofType which); }; - AsofRowRefs() {} + AsofRowRefs() : type(AsofType::EMPTY) {} + AsofRowRefs(AsofType t) : type(t) { + lookups.create(t); + } - void create(AsofType which); void insert(const IColumn * asof_column, const Block * block, size_t row_num, Arena & pool); - const RowRef * findAsof(const IColumn * asof_column, size_t row_num, Arena & pool) const; private: - AsofType type; - mutable AsofLookups lookups; + const AsofType type; + mutable Lookups lookups; mutable bool sorted = false; }; + AsofRowRefs::AsofType getAsofType() const { return asof_type; } + + /** Depending on template parameter, adds or doesn't add a flag, that element was used (row was joined). * Depending on template parameter, decide whether to overwrite existing values when encountering the same key again * with_used is for implementation of RIGHT and FULL JOINs. From 18d3813ff9e866cfa7ae65516eec1e9126ff91ba Mon Sep 17 00:00:00 2001 From: Danila Kutenin Date: Sat, 30 Mar 2019 12:39:45 +0300 Subject: [PATCH 019/102] Optimize Volnitsky by inlining compare function --- dbms/src/Common/StringSearcher.h | 6 +++--- dbms/src/Functions/FunctionsStringSearch.cpp | 11 +++++------ 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/dbms/src/Common/StringSearcher.h b/dbms/src/Common/StringSearcher.h index da34ccd820a..6d897e4326d 100644 --- a/dbms/src/Common/StringSearcher.h +++ b/dbms/src/Common/StringSearcher.h @@ -156,7 +156,7 @@ public: #endif } - bool compare(const UInt8 * pos) const + ALWAYS_INLINE bool compare(const UInt8 * pos) const { static const Poco::UTF8Encoding utf8; @@ -374,7 +374,7 @@ public: #endif } - bool compare(const UInt8 * pos) const + ALWAYS_INLINE bool compare(const UInt8 * pos) const { #ifdef __SSE4_1__ if (pageSafe(pos)) @@ -568,7 +568,7 @@ public: #endif } - bool compare(const UInt8 * pos) const + ALWAYS_INLINE bool compare(const UInt8 * pos) const { #ifdef __SSE4_1__ if (pageSafe(pos)) diff --git a/dbms/src/Functions/FunctionsStringSearch.cpp b/dbms/src/Functions/FunctionsStringSearch.cpp index d95a50fb39c..2744811d336 100644 --- a/dbms/src/Functions/FunctionsStringSearch.cpp +++ b/dbms/src/Functions/FunctionsStringSearch.cpp @@ -173,10 +173,7 @@ struct PositionImpl /// We check that the entry does not pass through the boundaries of strings. if (pos + needle.size() < begin + offsets[i]) - { - size_t prev_offset = i != 0 ? offsets[i - 1] : 0; - res[i] = 1 + Impl::countChars(reinterpret_cast(begin + prev_offset), reinterpret_cast(pos)); - } + res[i] = 1 + Impl::countChars(reinterpret_cast(begin + offsets[i - 1]), reinterpret_cast(pos)); else res[i] = 0; @@ -306,7 +303,8 @@ struct MultiSearchAllPositionsImpl const std::vector & needles, PaddedPODArray & res) { - auto res_callback = [](const UInt8 * start, const UInt8 * end) -> UInt64 { + auto res_callback = [](const UInt8 * start, const UInt8 * end) -> UInt64 + { return 1 + Impl::countChars(reinterpret_cast(start), reinterpret_cast(end)); }; Impl::createMultiSearcherInBigHaystack(needles).searchAllPositions(haystack_data, haystack_offsets, res_callback, res); @@ -341,7 +339,8 @@ struct MultiSearchFirstPositionImpl const std::vector & needles, PaddedPODArray & res) { - auto res_callback = [](const UInt8 * start, const UInt8 * end) -> UInt64 { + auto res_callback = [](const UInt8 * start, const UInt8 * end) -> UInt64 + { return 1 + Impl::countChars(reinterpret_cast(start), reinterpret_cast(end)); }; Impl::createMultiSearcherInBigHaystack(needles).searchFirstPosition(haystack_data, haystack_offsets, res_callback, res); From 9b5950f4034d7c2f33399cd519da9118b05865a9 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Thu, 21 Feb 2019 12:21:20 +0300 Subject: [PATCH 020/102] Fixed test failures when running clickhouse-server on different host Fixed test in docker: writing to read-only filesystem Multi-stage builds for test-runner and server to allow putting packages directory anywhere Fixed more tests --- dbms/tests/clickhouse-test | 7 +- .../00354_host_command_line_option.sh | 6 +- .../00368_format_option_collision.sh | 2 +- .../0_stateless/00634_logging_shard.sh | 1 + ...4_performance_introspection_and_logging.sh | 1 + .../0_stateless/00646_url_engine.python | 34 ++++--- .../queries/0_stateless/00837_minmax_index.sh | 2 + .../queries/0_stateless/00838_unique_index.sh | 2 + dbms/tests/queries/shell_config.sh | 7 ++ docker/server/local.Dockerfile | 31 ++++--- ...lickhouse-statelest-test-runner.Dockerfile | 17 ++-- docker/test/test_runner.sh | 89 +++++++++++++++---- docker/test/test_runner_docker_compose.yaml | 30 ++++--- 13 files changed, 163 insertions(+), 66 deletions(-) diff --git a/dbms/tests/clickhouse-test b/dbms/tests/clickhouse-test index ac9dcde1f36..3969d93e3c0 100755 --- a/dbms/tests/clickhouse-test +++ b/dbms/tests/clickhouse-test @@ -341,7 +341,7 @@ def main(args): if result_is_different: diff = Popen(['diff', '--unified', reference_file, stdout_file], stdout = PIPE).communicate()[0] diff = unicode(diff, errors='replace', encoding='utf-8') - cat = Popen(['cat', '-A'], stdin=PIPE, stdout=PIPE).communicate(input=diff)[0] + cat = Popen(['cat', '-A'], stdin=PIPE, stdout=PIPE).communicate(input=diff.encode(encoding='utf-8', errors='replace'))[0] failure = et.Element("failure", attrib = {"message": "result differs with reference"}) report_testcase.append(failure) @@ -367,12 +367,13 @@ def main(args): print(colored("Break tests execution", "red")) raise e except: - (exc_type, exc_value) = sys.exc_info()[:2] + import traceback + exc_type, exc_value, tb = sys.exc_info() error = et.Element("error", attrib = {"type": exc_type.__name__, "message": str(exc_value)}) report_testcase.append(error) failures += 1 - print("{0} - Test internal error: {1}\n{2}".format(MSG_FAIL, exc_type.__name__, exc_value)) + print("{0} - Test internal error: {1}\n{2}\n{3}".format(MSG_FAIL, exc_type.__name__, exc_value, "\n".join(traceback.format_tb(tb, 10)))) finally: dump_report(args.output, suite, name, report_testcase) diff --git a/dbms/tests/queries/0_stateless/00354_host_command_line_option.sh b/dbms/tests/queries/0_stateless/00354_host_command_line_option.sh index c158e929ec0..a9a510dc45f 100755 --- a/dbms/tests/queries/0_stateless/00354_host_command_line_option.sh +++ b/dbms/tests/queries/0_stateless/00354_host_command_line_option.sh @@ -3,6 +3,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -$CLICKHOUSE_CLIENT --host=localhost --query="SELECT 1"; -$CLICKHOUSE_CLIENT --host localhost --query "SELECT 1"; -$CLICKHOUSE_CLIENT -hlocalhost -q"SELECT 1"; +clickhouse_client_removed_host_parameter --host="${CLICKHOUSE_HOST}" --query="SELECT 1"; +clickhouse_client_removed_host_parameter --host "${CLICKHOUSE_HOST}" --query "SELECT 1"; +clickhouse_client_removed_host_parameter -h"${CLICKHOUSE_HOST}" -q"SELECT 1"; diff --git a/dbms/tests/queries/0_stateless/00368_format_option_collision.sh b/dbms/tests/queries/0_stateless/00368_format_option_collision.sh index a9c1178b495..6d0a355d78a 100755 --- a/dbms/tests/queries/0_stateless/00368_format_option_collision.sh +++ b/dbms/tests/queries/0_stateless/00368_format_option_collision.sh @@ -3,4 +3,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -$CLICKHOUSE_CLIENT --host=localhost --query="SELECT * FROM ext" --format=Vertical --external --file=- --structure="s String" --name=ext --format=JSONEachRow <<< '{"s":"Hello"}' +clickhouse_client_removed_host_parameter --host="${CLICKHOUSE_HOST}" --query="SELECT * FROM ext" --format=Vertical --external --file=- --structure="s String" --name=ext --format=JSONEachRow <<< '{"s":"Hello"}' diff --git a/dbms/tests/queries/0_stateless/00634_logging_shard.sh b/dbms/tests/queries/0_stateless/00634_logging_shard.sh index 3ea0df81bab..7df6e768a8b 100755 --- a/dbms/tests/queries/0_stateless/00634_logging_shard.sh +++ b/dbms/tests/queries/0_stateless/00634_logging_shard.sh @@ -9,6 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) cur_name=$(basename "${BASH_SOURCE[0]}") server_logs_file="${CLICKHOUSE_TMP}/${cur_name}_server.logs" + server_logs="--server_logs_file=$server_logs_file" rm -f "$server_logs_file" diff --git a/dbms/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh b/dbms/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh index 56a0514e90d..47e9aa07476 100755 --- a/dbms/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh +++ b/dbms/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh @@ -9,6 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) cur_name=$(basename "${BASH_SOURCE[0]}") server_logs_file=${CLICKHOUSE_TMP}/$cur_name"_server.logs" + server_logs="--server_logs_file=$server_logs_file" rm -f "$server_logs_file" diff --git a/dbms/tests/queries/0_stateless/00646_url_engine.python b/dbms/tests/queries/0_stateless/00646_url_engine.python index fed6b2a4d84..960048dbb8f 100644 --- a/dbms/tests/queries/0_stateless/00646_url_engine.python +++ b/dbms/tests/queries/0_stateless/00646_url_engine.python @@ -5,17 +5,31 @@ import sys import tempfile import threading import os, urllib +import subprocess from io import StringIO from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer +CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1') +CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123') -SERVER_ADDRESS = ('127.0.0.1', 51234) -SERVER_ADDRESS_STR = 'http://' + ':'.join(str(s) for s in SERVER_ADDRESS) + "/" +##################################################################################### +# This test starts an HTTP server and serves data to clickhouse url-engine based table. +# In order for it to work ip+port of http server (given below) should be +# accessible from clickhouse server. +##################################################################################### + +# IP-address of this host accessible from outside world. +HTTP_SERVER_HOST = subprocess.check_output(['hostname', '-i']).decode('utf-8').strip() +HTTP_SERVER_PORT = int(os.environ.get('CLICKHOUSE_TEST_HOST_EXPOSED_PORT', 51234)) + +# IP address and port of the HTTP server started from this script. +HTTP_SERVER_ADDRESS = (HTTP_SERVER_HOST, HTTP_SERVER_PORT) +HTTP_SERVER_URL_STR = 'http://' + ':'.join(str(s) for s in HTTP_SERVER_ADDRESS) + "/" CSV_DATA = os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names())) - def get_ch_answer(query): - return urllib.urlopen(os.environ.get('CLICKHOUSE_URL', 'http://localhost:' + os.environ.get('CLICKHOUSE_PORT_HTTP', '8123')), data=query).read() + url = os.environ.get('CLICKHOUSE_URL', 'http://{host}:{port}'.format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP)) + return urllib.urlopen(url, data=query).read() def check_answers(query, answer): ch_answer = get_ch_answer(query) @@ -75,7 +89,7 @@ class CSVHTTPServer(BaseHTTPRequestHandler): return def start_server(requests_amount): - httpd = HTTPServer(SERVER_ADDRESS, CSVHTTPServer) + httpd = HTTPServer(HTTP_SERVER_ADDRESS, CSVHTTPServer) def real_func(): for i in xrange(requests_amount): @@ -96,12 +110,12 @@ def test_select(table_name="", schema="str String,numuint UInt32,numint Int32,do if table_name: get_ch_answer("drop table if exists {}".format(table_name)) - get_ch_answer("create table {} ({}) engine=URL('{}', 'CSV')".format(table_name, schema, SERVER_ADDRESS_STR)) + get_ch_answer("create table {} ({}) engine=URL('{}', 'CSV')".format(table_name, schema, HTTP_SERVER_URL_STR)) for i in xrange(len(requests)): tbl = table_name if not tbl: - tbl = "url('{addr}', 'CSV', '{schema}')".format(addr=SERVER_ADDRESS_STR, schema=schema) + tbl = "url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema) check_answers(requests[i].format(tbl=tbl), answers[i]) if table_name: @@ -113,19 +127,19 @@ def test_insert(table_name="", schema="str String,numuint UInt32,numint Int32,do if table_name: get_ch_answer("drop table if exists {}".format(table_name)) - get_ch_answer("create table {} ({}) engine=URL('{}', 'CSV')".format(table_name, schema, SERVER_ADDRESS_STR)) + get_ch_answer("create table {} ({}) engine=URL('{}', 'CSV')".format(table_name, schema, HTTP_SERVER_URL_STR)) for req in requests_insert: tbl = table_name if not tbl: - tbl = "table function url('{addr}', 'CSV', '{schema}')".format(addr=SERVER_ADDRESS_STR, schema=schema) + tbl = "table function url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema) get_ch_answer(req.format(tbl=tbl)) for i in xrange(len(requests_select)): tbl = table_name if not tbl: - tbl = "url('{addr}', 'CSV', '{schema}')".format(addr=SERVER_ADDRESS_STR, schema=schema) + tbl = "url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema) check_answers(requests_select[i].format(tbl=tbl), answers[i]) if table_name: diff --git a/dbms/tests/queries/0_stateless/00837_minmax_index.sh b/dbms/tests/queries/0_stateless/00837_minmax_index.sh index d38f7bbabfd..67686475970 100755 --- a/dbms/tests/queries/0_stateless/00837_minmax_index.sh +++ b/dbms/tests/queries/0_stateless/00837_minmax_index.sh @@ -1,5 +1,7 @@ #!/usr/bin/env bash +CLICKHOUSE_CLIENT_OPT="--allow_experimental_data_skipping_indices=1" + CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh diff --git a/dbms/tests/queries/0_stateless/00838_unique_index.sh b/dbms/tests/queries/0_stateless/00838_unique_index.sh index f6bea4f083a..c580eb21a9c 100755 --- a/dbms/tests/queries/0_stateless/00838_unique_index.sh +++ b/dbms/tests/queries/0_stateless/00838_unique_index.sh @@ -1,5 +1,7 @@ #!/usr/bin/env bash +CLICKHOUSE_CLIENT_OPT="--allow_experimental_data_skipping_indices=1" + CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh diff --git a/dbms/tests/queries/shell_config.sh b/dbms/tests/queries/shell_config.sh index f086143db65..a972690e0cf 100644 --- a/dbms/tests/queries/shell_config.sh +++ b/dbms/tests/queries/shell_config.sh @@ -39,3 +39,10 @@ export CLICKHOUSE_CURL_COMMAND=${CLICKHOUSE_CURL_COMMAND:="curl"} export CLICKHOUSE_CURL=${CLICKHOUSE_CURL:="${CLICKHOUSE_CURL_COMMAND} --max-time 10"} export CLICKHOUSE_TMP=${CLICKHOUSE_TMP:="."} mkdir -p ${CLICKHOUSE_TMP} + +function clickhouse_client_removed_host_parameter() +{ + # removing only `--host=value` and `--host value` (removing '-hvalue' feels to dangerous) with python regex. + # bash regex magic is arcane, but version dependant and weak; sed or awk are not really portable. + $(echo "$CLICKHOUSE_CLIENT" | python -c "import sys, re; print re.sub('--host(\s+|=)[^\s]+', '', sys.stdin.read())") "$@" +} diff --git a/docker/server/local.Dockerfile b/docker/server/local.Dockerfile index 33d7e11f118..390f840513e 100644 --- a/docker/server/local.Dockerfile +++ b/docker/server/local.Dockerfile @@ -1,18 +1,19 @@ -FROM ubuntu:18.04 +# Since right now we can't set volumes to the docker during build, we split building container in stages: +# 1. build base container +# 2. run base conatiner with mounted volumes +# 3. commit container as image +# 4. build final container atop that image +# Middle steps are performed by the bash script. +FROM ubuntu:18.04 as clickhouse-server-base ARG gosu_ver=1.10 -ARG CLICKHOUSE_PACKAGES_DIR -COPY ${CLICKHOUSE_PACKAGES_DIR}/clickhouse-*.deb /packages/ +VOLUME /packages/ -# installing via apt to simulate real-world scenario, where user installs deb package and all it's dependecies automatically. +# update to allow installing dependencies of clickhouse automatically RUN apt update; \ DEBIAN_FRONTEND=noninteractive \ - apt install -y \ - /packages/clickhouse-common-static_*.deb \ - /packages/clickhouse-server_*.deb \ - locales ;\ - rm -rf /packages + apt install -y locales; ADD https://github.com/tianon/gosu/releases/download/${gosu_ver}/gosu-amd64 /bin/gosu @@ -21,10 +22,18 @@ ENV LANG en_US.UTF-8 ENV LANGUAGE en_US:en ENV LC_ALL en_US.UTF-8 +# installing via apt to simulate real-world scenario, where user installs deb package and all it's dependecies automatically. +CMD DEBIAN_FRONTEND=noninteractive \ + apt install -y \ + /packages/clickhouse-common-static_*.deb \ + /packages/clickhouse-server_*.deb ; + +FROM clickhouse-server-base:postinstall as clickhouse-server + RUN mkdir /docker-entrypoint-initdb.d -COPY server/docker_related_config.xml /etc/clickhouse-server/config.d/ -COPY server/entrypoint.sh /entrypoint.sh +COPY docker_related_config.xml /etc/clickhouse-server/config.d/ +COPY entrypoint.sh /entrypoint.sh RUN chmod +x \ /entrypoint.sh \ diff --git a/docker/test/stateless/clickhouse-statelest-test-runner.Dockerfile b/docker/test/stateless/clickhouse-statelest-test-runner.Dockerfile index 8aecb7119cc..562141ba147 100644 --- a/docker/test/stateless/clickhouse-statelest-test-runner.Dockerfile +++ b/docker/test/stateless/clickhouse-statelest-test-runner.Dockerfile @@ -1,12 +1,15 @@ -FROM ubuntu:18.10 +# Since right now we can't set volumes to the docker during build, we split building container in stages: +# 1. build base container +# 2. run base conatiner with mounted volumes +# 3. commit container as image +FROM ubuntu:18.10 as clickhouse-test-runner-base -ARG CLICKHOUSE_PACKAGES_DIR -COPY ${CLICKHOUSE_PACKAGES_DIR}/clickhouse-*.deb /packages/ +# A volume where directory with clickhouse packages to be mounted, +# for later installing. +VOLUME /packages -RUN apt-get update ;\ +CMD apt-get update ;\ DEBIAN_FRONTEND=noninteractive \ apt install -y /packages/clickhouse-common-static_*.deb \ /packages/clickhouse-client_*.deb \ - /packages/clickhouse-test_*.deb \ - wait-for-it; \ - rm -rf /packages + /packages/clickhouse-test_*.deb diff --git a/docker/test/test_runner.sh b/docker/test/test_runner.sh index afa5c95720b..6e6d4537603 100755 --- a/docker/test/test_runner.sh +++ b/docker/test/test_runner.sh @@ -1,31 +1,84 @@ #!/bin/sh -set -e +set -e -x -# Run tests in docker -# OR -# Build containers from deb packages, copying the tests from the source directory +trap 'rc=$?; echo EXITED WITH: $rc; exit $rc' EXIT + +# CLI option to prevent rebuilding images, just re-run tests with images leftover from previuos time +readonly NO_REBUILD_FLAG="--no-rebuild" readonly CLICKHOUSE_DOCKER_DIR="$(realpath ${1})" -readonly CLICKHOUSE_PACKAGES_DIR="${2}" +readonly CLICKHOUSE_PACKAGES_ARG="${2}" CLICKHOUSE_SERVER_IMAGE="${3}" -# Build test runner image -docker build \ - -f "${CLICKHOUSE_DOCKER_DIR}/test/stateless/clickhouse-statelest-test-runner.Dockerfile" \ - -t clickhouse-statelest-test-runner:local \ - --build-arg CLICKHOUSE_PACKAGES_DIR="${CLICKHOUSE_PACKAGES_DIR}" \ - "${CLICKHOUSE_DOCKER_DIR}" +if [ ${CLICKHOUSE_PACKAGES_ARG} != ${NO_REBUILD_FLAG} ]; then + readonly CLICKHOUSE_PACKAGES_DIR="$(realpath ${2})" # or --no-rebuild +fi + + +# In order to allow packages directory to be anywhere, and to reduce amoun of context sent to the docker daemon, +# all images are built in multiple stages: +# 1. build base image, install dependencies +# 2. run image with volume mounted, install what needed from those volumes +# 3. tag container as image +# 4. [optional] build another image atop of tagged. + +# TODO: optionally mount most recent clickhouse-test and queries directory from local machine + +if [ ${CLICKHOUSE_PACKAGES_ARG} != ${NO_REBUILD_FLAG} ]; then + docker build \ + -f "${CLICKHOUSE_DOCKER_DIR}/test/stateless/clickhouse-statelest-test-runner.Dockerfile" \ + --target clickhouse-test-runner-base \ + -t clickhouse-test-runner-base:preinstall \ + "${CLICKHOUSE_DOCKER_DIR}/test/stateless" + + docker rm -f clickhouse-test-runner-installing-packages || true + docker run \ + -v "${CLICKHOUSE_PACKAGES_DIR}:/packages" \ + --name clickhouse-test-runner-installing-packages \ + clickhouse-test-runner-base:preinstall + docker commit clickhouse-test-runner-installing-packages clickhouse-statelest-test-runner:local + docker rm -f clickhouse-test-runner-installing-packages || true +fi + +# # Create a bind-volume to the clickhouse-test script file +# docker volume create --driver local --opt type=none --opt device=/home/enmk/proj/ClickHouse_master/dbms/tests/clickhouse-test --opt o=bind clickhouse-test-script-volume +# docker volume create --driver local --opt type=none --opt device=/home/enmk/proj/ClickHouse_master/dbms/tests/queries --opt o=bind clickhouse-test-queries-dir-volume # Build server image (optional) from local packages if [ -z "${CLICKHOUSE_SERVER_IMAGE}" ]; then - CLICKHOUSE_SERVER_IMAGE="yandex/clickhouse_server:local" + CLICKHOUSE_SERVER_IMAGE="yandex/clickhouse-server:local" - docker build \ - -f "${CLICKHOUSE_DOCKER_DIR}/server/local.Dockerfile" \ - -t "${CLICKHOUSE_SERVER_IMAGE}" \ - --build-arg CLICKHOUSE_PACKAGES_DIR=${CLICKHOUSE_PACKAGES_DIR} \ - "${CLICKHOUSE_DOCKER_DIR}" + if [ ${CLICKHOUSE_PACKAGES_ARG} != ${NO_REBUILD_FLAG} ]; then + docker build \ + -f "${CLICKHOUSE_DOCKER_DIR}/server/local.Dockerfile" \ + --target clickhouse-server-base \ + -t clickhouse-server-base:preinstall \ + "${CLICKHOUSE_DOCKER_DIR}/server" + + docker rm -f clickhouse_server_base_installing_server || true + docker run -v "${CLICKHOUSE_PACKAGES_DIR}:/packages" \ + --name clickhouse_server_base_installing_server \ + clickhouse-server-base:preinstall + docker commit clickhouse_server_base_installing_server clickhouse-server-base:postinstall + + docker build \ + -f "${CLICKHOUSE_DOCKER_DIR}/server/local.Dockerfile" \ + --target clickhouse-server \ + -t "${CLICKHOUSE_SERVER_IMAGE}" \ + "${CLICKHOUSE_DOCKER_DIR}/server" + fi fi -CLICKHOUSE_SERVER_IMAGE="${CLICKHOUSE_SERVER_IMAGE}" docker-compose -f "${CLICKHOUSE_DOCKER_DIR}/test/test_runner_docker_compose.yaml" run test-runner \ No newline at end of file +docker rm -f test-runner || true +docker-compose down +CLICKHOUSE_SERVER_IMAGE="${CLICKHOUSE_SERVER_IMAGE}" \ + docker-compose -f "${CLICKHOUSE_DOCKER_DIR}/test/test_runner_docker_compose.yaml" \ + create \ + --build --force-recreate + +CLICKHOUSE_SERVER_IMAGE="${CLICKHOUSE_SERVER_IMAGE}" \ + docker-compose -f "${CLICKHOUSE_DOCKER_DIR}/test/test_runner_docker_compose.yaml" \ + run \ + --name test-runner \ + test-runner \ No newline at end of file diff --git a/docker/test/test_runner_docker_compose.yaml b/docker/test/test_runner_docker_compose.yaml index 281442f26a4..ba2e525b3a5 100644 --- a/docker/test/test_runner_docker_compose.yaml +++ b/docker/test/test_runner_docker_compose.yaml @@ -4,13 +4,13 @@ services: clickhouse-server: image: ${CLICKHOUSE_SERVER_IMAGE} expose: - - "8123" - - "9000" - - "9009" + - "8123" # HTTP + - "9000" # TCP + - "9009" # HTTP-interserver restart: "no" test-runner: - image: yandex/clickhouse-statelest-test-runner:local + image: clickhouse-statelest-test-runner:local restart: "no" depends_on: @@ -18,13 +18,17 @@ services: environment: # these are used by clickhouse-test to point clickhouse-client to the right server - CLICKHOUSE_HOST=clickhouse-server - - CLICKHOUSE_PORT=8123 + - CLICKHOUSE_PORT=9009 + - CLICKHOUSE_TEST_HOST_EXPOSED_PORT=51234 + expose: + # port for any test to serve data to clickhouse-server on rare occasion (like URL-engine tables in 00646), + # should match value of CLICKHOUSE_TEST_HOST_EXPOSED_PORT above + - "51234" - entrypoint: - - wait-for-it - - clickhouse-server:8123 - - -- - - clickhouse-test - # - -c - # - `which clickhouse-client` - - ${CLICKHOUSE_TEST_ARGS} + # NOTE: Dev-mode: mount newest versions of the queries and clickhouse-test script into container. + # volumes: + # - /home/enmk/proj/ClickHouse_master/dbms/tests/queries:/usr/share/clickhouse-test/queries:ro + # - /home/enmk/proj/ClickHouse_master/dbms/tests/clickhouse-test:/usr/bin/clickhouse-test:ro + + # String-form instead of list-form to allow multiple arguments in "${CLICKHOUSE_TEST_ARGS}" + entrypoint: "clickhouse-test ${CLICKHOUSE_TEST_ARGS}" From 4a94545882ae9c9e261c87c6a07c91deedbb4249 Mon Sep 17 00:00:00 2001 From: Martijn Bakker Date: Sat, 30 Mar 2019 12:52:48 +0000 Subject: [PATCH 021/102] add test for multiple supported asof types --- dbms/src/Interpreters/Join.cpp | 24 +++++-------------- dbms/src/Interpreters/Join.h | 7 +++--- .../00927_asof_join_other_types.reference | 12 ++++++++++ .../00927_asof_join_other_types.sh | 22 +++++++++++++++++ 4 files changed, 44 insertions(+), 21 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00927_asof_join_other_types.reference create mode 100755 dbms/tests/queries/0_stateless/00927_asof_join_other_types.sh diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 234a56f7183..b93dcbf0cab 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -299,9 +299,10 @@ void Join::setSampleBlock(const Block & block) throw Exception("ASOF only supports LEFT and INNER as base joins", ErrorCodes::NOT_IMPLEMENTED); const IColumn * asof_column = key_columns.back(); + size_t asof_size; - if (auto t = AsofRowRefs::getType(asof_column)) - asof_type = *t; + if (auto t = AsofRowRefs::getTypeSize(asof_column)) + std::tie(asof_type, asof_size) = *t; else { std::string msg = "ASOF join not supported for type"; @@ -319,7 +320,7 @@ void Join::setSampleBlock(const Block & block) /// Therefore, add it back in such that it can be extracted appropriately from the full stored /// key_columns and key_sizes init(chooseMethod(key_columns, key_sizes)); - key_sizes.push_back(AsofRowRefs::getSize(asof_type)); + key_sizes.push_back(asof_size); } else { @@ -430,29 +431,16 @@ const Join::RowRef * Join::AsofRowRefs::findAsof(const IColumn * asof_column, si __builtin_unreachable(); } -std::optional Join::AsofRowRefs::getType(const IColumn * asof_column) +std::optional> Join::AsofRowRefs::getTypeSize(const IColumn * asof_column) { #define M(NAME, TYPE) \ if (strcmp(#TYPE, asof_column->getFamilyName()) == 0) \ - return AsofType::NAME; + return std::make_pair(AsofType::NAME,sizeof(TYPE)); APPLY_FOR_ASOF_JOIN_VARIANTS(M) #undef M return {}; } -size_t Join::AsofRowRefs::getSize(Join::AsofRowRefs::AsofType type) -{ - switch (type) - { - case AsofType::EMPTY: return 0; - #define M(NAME, TYPE) \ - case AsofType::NAME: return sizeof(TYPE); - APPLY_FOR_ASOF_JOIN_VARIANTS(M) - #undef M - } - __builtin_unreachable(); -} - namespace { diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index dcd4ea6e9d4..d8d54cdac30 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -156,7 +156,9 @@ public: /// Different types of asof join keys #define APPLY_FOR_ASOF_JOIN_VARIANTS(M) \ M(key32, UInt32) \ - M(key64, UInt64) + M(key64, UInt64) \ + M(keyf32, Float32) \ + M(keyf64, Float64) enum class AsofType { @@ -166,8 +168,7 @@ public: #undef M }; - static std::optional getType(const IColumn * asof_column); - static size_t getSize(AsofType type); + static std::optional> getTypeSize(const IColumn * asof_column); template struct Entry diff --git a/dbms/tests/queries/0_stateless/00927_asof_join_other_types.reference b/dbms/tests/queries/0_stateless/00927_asof_join_other_types.reference new file mode 100644 index 00000000000..674df7e4845 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00927_asof_join_other_types.reference @@ -0,0 +1,12 @@ +2 1 1 0 +2 3 3 3 +2 5 5 3 +2 1 1 0 +2 3 3 3 +2 5 5 3 +2 1 1 0 +2 3 3 3 +2 5 5 3 +2 1 1 0 +2 3 3 3 +2 5 5 3 diff --git a/dbms/tests/queries/0_stateless/00927_asof_join_other_types.sh b/dbms/tests/queries/0_stateless/00927_asof_join_other_types.sh new file mode 100755 index 00000000000..3cf1791bcfe --- /dev/null +++ b/dbms/tests/queries/0_stateless/00927_asof_join_other_types.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +set -e + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "USE test;" + +for typename in "UInt32" "UInt64" "Float64" "Float32" +do + $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS A;" + $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS B;" + + $CLICKHOUSE_CLIENT -q "CREATE TABLE A(k UInt32, t ${typename}, a Float64) ENGINE = MergeTree() ORDER BY (k, t);" + $CLICKHOUSE_CLIENT -q "INSERT INTO A(k,t,a) VALUES (2,1,1),(2,3,3),(2,5,5);" + + $CLICKHOUSE_CLIENT -q "CREATE TABLE B(k UInt32, t ${typename}, b Float64) ENGINE = MergeTree() ORDER BY (k, t);" + $CLICKHOUSE_CLIENT -q "INSERT INTO B(k,t,b) VALUES (2,3,3);" + + $CLICKHOUSE_CLIENT -q "SELECT k, t, a, b FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (k,t);" +done \ No newline at end of file From 20e5fb61c476fffbaeade7891447b31b7460b2a7 Mon Sep 17 00:00:00 2001 From: Martijn Bakker Date: Sat, 30 Mar 2019 13:02:30 +0000 Subject: [PATCH 022/102] fix style --- dbms/src/Interpreters/Join.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index d8d54cdac30..910c235c70f 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -196,7 +196,8 @@ public: }; AsofRowRefs() : type(AsofType::EMPTY) {} - AsofRowRefs(AsofType t) : type(t) { + AsofRowRefs(AsofType t) : type(t) + { lookups.create(t); } @@ -210,7 +211,7 @@ public: }; AsofRowRefs::AsofType getAsofType() const { return asof_type; } - + /** Depending on template parameter, adds or doesn't add a flag, that element was used (row was joined). * Depending on template parameter, decide whether to overwrite existing values when encountering the same key again From 3ac66dfdb656363a0463e94ac9897d8ced09b0a7 Mon Sep 17 00:00:00 2001 From: Martijn Bakker Date: Sat, 30 Mar 2019 13:09:03 +0000 Subject: [PATCH 023/102] set default asof type value --- dbms/src/Interpreters/Join.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index 910c235c70f..876fd2a70b1 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -446,7 +446,7 @@ private: private: Type type = Type::EMPTY; - AsofRowRefs::AsofType asof_type; + AsofRowRefs::AsofType asof_type = AsofRowRefs::AsofType::EMPTY; static Type chooseMethod(const ColumnRawPtrs & key_columns, Sizes & key_sizes); From 48a590551b2d7b9e07cd05e16e3a9ae4f38839f5 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sat, 30 Mar 2019 21:40:52 +0300 Subject: [PATCH 024/102] Add test checking using format schema via HTTP interface. --- dbms/tests/integration/helpers/cluster.py | 5 +++ .../test_format_schema_on_server/__init__.py | 0 .../format_schemas/simple.proto | 6 +++ .../test_format_schema_on_server/test.py | 40 +++++++++++++++++++ 4 files changed, 51 insertions(+) create mode 100644 dbms/tests/integration/test_format_schema_on_server/__init__.py create mode 100755 dbms/tests/integration/test_format_schema_on_server/clickhouse_path/format_schemas/simple.proto create mode 100644 dbms/tests/integration/test_format_schema_on_server/test.py diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py index 240cc2c8695..8db5e59eb00 100644 --- a/dbms/tests/integration/helpers/cluster.py +++ b/dbms/tests/integration/helpers/cluster.py @@ -17,6 +17,7 @@ import psycopg2 import requests import base64 import pymongo +import urllib import docker from docker.errors import ContainerError @@ -482,6 +483,10 @@ class ClickHouseInstance: def get_query_request(self, *args, **kwargs): return self.client.get_query_request(*args, **kwargs) + # Connects to the instance via HTTP interface, sends a query and returns the answer + def http_query(self, sql, data=None): + return urllib.urlopen("http://"+self.ip_address+":8123/?query="+urllib.quote(sql,safe=''), data).read() + def restart_clickhouse(self, stop_start_wait_sec=5): if not self.stay_alive: raise Exception("clickhouse can be restarted only with stay_alive=True instance") diff --git a/dbms/tests/integration/test_format_schema_on_server/__init__.py b/dbms/tests/integration/test_format_schema_on_server/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/integration/test_format_schema_on_server/clickhouse_path/format_schemas/simple.proto b/dbms/tests/integration/test_format_schema_on_server/clickhouse_path/format_schemas/simple.proto new file mode 100755 index 00000000000..96b24be4938 --- /dev/null +++ b/dbms/tests/integration/test_format_schema_on_server/clickhouse_path/format_schemas/simple.proto @@ -0,0 +1,6 @@ +syntax = "proto3"; + +message KeyValuePair { + uint64 key = 1; + string value = 2; +} \ No newline at end of file diff --git a/dbms/tests/integration/test_format_schema_on_server/test.py b/dbms/tests/integration/test_format_schema_on_server/test.py new file mode 100644 index 00000000000..9d0f6948aef --- /dev/null +++ b/dbms/tests/integration/test_format_schema_on_server/test.py @@ -0,0 +1,40 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance('instance', + clickhouse_path_dir='clickhouse_path') + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + instance.query('CREATE DATABASE test') + yield cluster + + finally: + cluster.shutdown() + + +def create_simple_table(): + instance.query("DROP TABLE IF EXISTS test.simple") + instance.query(''' + CREATE TABLE test.simple (key UInt64, value String) + ENGINE = MergeTree ORDER BY tuple(); + ''') + + +def test_protobuf_format_input(started_cluster): + create_simple_table() + instance.http_query( + "INSERT INTO test.simple FORMAT Protobuf SETTINGS format_schema='simple:KeyValuePair'", + "\x07\x08\x01\x12\x03abc\x07\x08\x02\x12\x03def") + assert instance.query("SELECT * from test.simple") == "1\tabc\n2\tdef\n" + + +def test_protobuf_format_output(started_cluster): + create_simple_table() + instance.query("INSERT INTO test.simple VALUES (1, 'abc'), (2, 'def')"); + assert instance.http_query("SELECT * FROM test.simple FORMAT Protobuf SETTINGS format_schema='simple:KeyValuePair'") == \ + "\x07\x08\x01\x12\x03abc\x07\x08\x02\x12\x03def" From a2e756287e293e9cb1ae3811e96d24ae3cc27bf1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 30 Mar 2019 23:52:36 +0300 Subject: [PATCH 025/102] Whitespaces --- dbms/src/Compression/CompressionCodecDelta.h | 2 ++ dbms/src/Compression/CompressionFactory.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/dbms/src/Compression/CompressionCodecDelta.h b/dbms/src/Compression/CompressionCodecDelta.h index ffe2822fa1e..547bfdf2808 100644 --- a/dbms/src/Compression/CompressionCodecDelta.h +++ b/dbms/src/Compression/CompressionCodecDelta.h @@ -1,6 +1,7 @@ #pragma once #include + namespace DB { @@ -22,5 +23,6 @@ protected: private: const UInt8 delta_bytes_size; }; + } diff --git a/dbms/src/Compression/CompressionFactory.h b/dbms/src/Compression/CompressionFactory.h index b36bed1cf8e..4b959cef847 100644 --- a/dbms/src/Compression/CompressionFactory.h +++ b/dbms/src/Compression/CompressionFactory.h @@ -40,7 +40,7 @@ public: /// Get codec by AST and possible column_type /// some codecs can use information about type to improve inner settings /// but every codec should be able to work without information about type - CompressionCodecPtr get(const ASTPtr & ast, DataTypePtr column_type=nullptr) const; + CompressionCodecPtr get(const ASTPtr & ast, DataTypePtr column_type = nullptr) const; /// Get codec by method byte (no params available) CompressionCodecPtr get(const UInt8 byte_code) const; From 4709b744bb8e4d3f9f4493e5c0e6fc35f38c4bee Mon Sep 17 00:00:00 2001 From: Martijn Bakker Date: Sat, 30 Mar 2019 21:30:21 +0000 Subject: [PATCH 026/102] address the code review issues --- dbms/src/Common/SortedLookupPODArray.h | 48 +++++++++++ dbms/src/Interpreters/Join.cpp | 107 ++++--------------------- dbms/src/Interpreters/Join.h | 86 +------------------- dbms/src/Interpreters/RowRefs.cpp | 75 +++++++++++++++++ dbms/src/Interpreters/RowRefs.h | 91 +++++++++++++++++++++ 5 files changed, 232 insertions(+), 175 deletions(-) create mode 100644 dbms/src/Common/SortedLookupPODArray.h create mode 100644 dbms/src/Interpreters/RowRefs.cpp create mode 100644 dbms/src/Interpreters/RowRefs.h diff --git a/dbms/src/Common/SortedLookupPODArray.h b/dbms/src/Common/SortedLookupPODArray.h new file mode 100644 index 00000000000..60b3529c69e --- /dev/null +++ b/dbms/src/Common/SortedLookupPODArray.h @@ -0,0 +1,48 @@ +#pragma once + +#include + +namespace DB { + +/** + * This class is intended to push sortable data into. + * When looking up values the container ensures that it is sorted for log(N) lookup + * + * Note, this is only efficient when the insertions happen in one stage, followed by all retrievals + * This way the data only gets sorted once. + */ + +template > +class SortedLookupPODArray : private PaddedPODArray +{ +public: + using Base = PaddedPODArray; + using Base::PODArray; + using Base::cbegin; + using Base::cend; + + template + void insert(U && x, TAllocatorParams &&... allocator_params) + { + Base::push_back(std::forward(x), std::forward(allocator_params)...); + sorted = false; + } + + typename Base::const_iterator upper_bound (const T& k) + { + if (!sorted) + this->sort(); + return std::upper_bound(this->cbegin(), this->cend(), k); + } +private: + void sort() + { + std::sort(this->begin(), this->end()); + sorted = true; + } + + bool sorted = false; +}; + + +} \ No newline at end of file diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index b93dcbf0cab..2d959be98f2 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -363,98 +363,19 @@ void Join::setSampleBlock(const Block & block) convertColumnToNullable(sample_block_with_columns_to_add.getByPosition(i)); } -void Join::AsofRowRefs::Lookups::create(Join::AsofRowRefs::AsofType which) -{ - switch (which) - { - case AsofType::EMPTY: break; - #define M(NAME, TYPE) \ - case AsofType::NAME: NAME = std::make_unique(); break; - APPLY_FOR_ASOF_JOIN_VARIANTS(M) - #undef M - } -} - -template -using AsofGetterType = ColumnsHashing::HashMethodOneNumber; - -void Join::AsofRowRefs::insert(const IColumn * asof_column, const Block * block, size_t row_num, Arena & pool) -{ - assert(!sorted); - switch (type) - { - case AsofType::EMPTY: break; - #define M(NAME, TYPE) \ - case AsofType::NAME: { \ - auto asof_getter = AsofGetterType(asof_column); \ - auto entry = Entry(asof_getter.getKey(row_num, pool), RowRef(block, row_num)); \ - lookups.NAME->push_back(entry); \ - break; \ - } - APPLY_FOR_ASOF_JOIN_VARIANTS(M) - #undef M - } -} - -const Join::RowRef * Join::AsofRowRefs::findAsof(const IColumn * asof_column, size_t row_num, Arena & pool) const -{ - if (!sorted) - { - // sort whenever needed - switch (type) - { - case AsofType::EMPTY: break; - #define M(NAME, TYPE) \ - case AsofType::NAME: std::sort(lookups.NAME->begin(), lookups.NAME->end()); break; - APPLY_FOR_ASOF_JOIN_VARIANTS(M) - #undef M - } - sorted = true; - } - - switch (type) - { - case AsofType::EMPTY: return nullptr; - #define M(NAME, TYPE) \ - case AsofType::NAME: { \ - auto asof_getter = AsofGetterType(asof_column); \ - TYPE key = asof_getter.getKey(row_num, pool); \ - auto it = std::upper_bound(lookups.NAME->cbegin(), lookups.NAME->cend(), Entry(key)); \ - if (it == lookups.NAME->cbegin()) \ - return nullptr; \ - return &((--it)->row_ref); \ - } - APPLY_FOR_ASOF_JOIN_VARIANTS(M) - #undef M - } - - __builtin_unreachable(); -} - -std::optional> Join::AsofRowRefs::getTypeSize(const IColumn * asof_column) -{ - #define M(NAME, TYPE) \ - if (strcmp(#TYPE, asof_column->getFamilyName()) == 0) \ - return std::make_pair(AsofType::NAME,sizeof(TYPE)); - APPLY_FOR_ASOF_JOIN_VARIANTS(M) - #undef M - return {}; -} - - namespace { /// Inserting an element into a hash table of the form `key -> reference to a string`, which will then be used by JOIN. template struct Inserter { - static void insert(const Join *, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool); + static void insert(const Join &, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool); }; template struct Inserter { - static ALWAYS_INLINE void insert(const Join *, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool) + static ALWAYS_INLINE void insert(const Join &, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool) { auto emplace_result = key_getter.emplaceKey(map, i, pool); @@ -466,7 +387,7 @@ namespace template struct Inserter { - static ALWAYS_INLINE void insert(const Join *, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool) + static ALWAYS_INLINE void insert(const Join &, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool) { auto emplace_result = key_getter.emplaceKey(map, i, pool); @@ -492,13 +413,13 @@ namespace template struct Inserter { - static ALWAYS_INLINE void insert(const Join * join, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool, const IColumn * asof_column) + static ALWAYS_INLINE void insert(const Join & join, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool, const IColumn * asof_column) { auto emplace_result = key_getter.emplaceKey(map, i, pool); typename Map::mapped_type * time_series_map = &emplace_result.getMapped(); if (emplace_result.isInserted()) - time_series_map = new (time_series_map) typename Map::mapped_type(join->getAsofType()); + time_series_map = new (time_series_map) typename Map::mapped_type(join.getAsofType()); time_series_map->insert(asof_column, stored_block, i, pool); } }; @@ -506,7 +427,7 @@ namespace template void NO_INLINE insertFromBlockImplTypeCase( - const Join * join, Map & map, size_t rows, const ColumnRawPtrs & key_columns, + const Join & join, Map & map, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool) { const IColumn * asof_column [[maybe_unused]] = nullptr; @@ -530,7 +451,7 @@ namespace template void insertFromBlockImplType( - const Join * join, Map & map, size_t rows, const ColumnRawPtrs & key_columns, + const Join & join, Map & map, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool) { if (null_map) @@ -542,7 +463,7 @@ namespace template void insertFromBlockImpl( - const Join * join, Join::Type type, Maps & maps, size_t rows, const ColumnRawPtrs & key_columns, + const Join & join, Join::Type type, Maps & maps, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool) { switch (type) @@ -640,7 +561,7 @@ bool Join::insertFromBlock(const Block & block) { dispatch([&](auto, auto strictness_, auto & map) { - insertFromBlockImpl(this, type, map, rows, key_columns, key_sizes, stored_block, null_map, pool); + insertFromBlockImpl(*this, type, map, rows, key_columns, key_sizes, stored_block, null_map, pool); }); } @@ -775,14 +696,16 @@ std::unique_ptr NO_INLINE joinRightIndexedColumns( auto & mapped = find_result.getMapped(); if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof) - if (const Join::RowRef * found = mapped.findAsof(asof_column, i, pool)) + { + if (const RowRef * found = mapped.findAsof(asof_column, i, pool)) { - filter[i] = 1; - mapped.setUsed(); - added_columns.appendFromBlock(*found->block, found->row_num); + filter[i] = 1; + mapped.setUsed(); + added_columns.appendFromBlock(*found->block, found->row_num); } else addNotFoundRow<_add_missing>(added_columns, current_offset); + } else { filter[i] = 1; diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index 876fd2a70b1..f6ddaf87af0 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -130,88 +131,7 @@ public: size_t getTotalByteCount() const; ASTTableJoin::Kind getKind() const { return kind; } - - - /// Reference to the row in block. - struct RowRef - { - const Block * block = nullptr; - size_t row_num = 0; - - RowRef() {} - RowRef(const Block * block_, size_t row_num_) : block(block_), row_num(row_num_) {} - }; - - /// Single linked list of references to rows. Used for ALL JOINs (non-unique JOINs) - struct RowRefList : RowRef - { - RowRefList * next = nullptr; - - RowRefList() {} - RowRefList(const Block * block_, size_t row_num_) : RowRef(block_, row_num_) {} - }; - - struct AsofRowRefs - { - /// Different types of asof join keys - #define APPLY_FOR_ASOF_JOIN_VARIANTS(M) \ - M(key32, UInt32) \ - M(key64, UInt64) \ - M(keyf32, Float32) \ - M(keyf64, Float64) - - enum class AsofType - { - EMPTY, - #define M(NAME, TYPE) NAME, - APPLY_FOR_ASOF_JOIN_VARIANTS(M) - #undef M - }; - - static std::optional> getTypeSize(const IColumn * asof_column); - - template - struct Entry - { - T asof_value; - RowRef row_ref; - - Entry(T v) : asof_value(v) {} - Entry(T v, RowRef rr) : asof_value(v), row_ref(rr) {} - - bool operator< (const Entry& o) const - { - return asof_value < o.asof_value; - } - }; - - struct Lookups - { - #define M(NAME, TYPE) \ - std::unique_ptr>> NAME; - APPLY_FOR_ASOF_JOIN_VARIANTS(M) - #undef M - - void create(AsofType which); - }; - - AsofRowRefs() : type(AsofType::EMPTY) {} - AsofRowRefs(AsofType t) : type(t) - { - lookups.create(t); - } - - void insert(const IColumn * asof_column, const Block * block, size_t row_num, Arena & pool); - const RowRef * findAsof(const IColumn * asof_column, size_t row_num, Arena & pool) const; - - private: - const AsofType type; - mutable Lookups lookups; - mutable bool sorted = false; - }; - - AsofRowRefs::AsofType getAsofType() const { return asof_type; } - + AsofRowRefs::Type getAsofType() const { return asof_type; } /** Depending on template parameter, adds or doesn't add a flag, that element was used (row was joined). * Depending on template parameter, decide whether to overwrite existing values when encountering the same key again @@ -446,7 +366,7 @@ private: private: Type type = Type::EMPTY; - AsofRowRefs::AsofType asof_type = AsofRowRefs::AsofType::EMPTY; + AsofRowRefs::Type asof_type = AsofRowRefs::Type::EMPTY; static Type chooseMethod(const ColumnRawPtrs & key_columns, Sizes & key_sizes); diff --git a/dbms/src/Interpreters/RowRefs.cpp b/dbms/src/Interpreters/RowRefs.cpp new file mode 100644 index 00000000000..18b144e984e --- /dev/null +++ b/dbms/src/Interpreters/RowRefs.cpp @@ -0,0 +1,75 @@ +#include + +#include +#include +#include + +#include + +namespace DB +{ + +void AsofRowRefs::Lookups::create(AsofRowRefs::Type which) +{ + switch (which) + { + case Type::EMPTY: break; + #define M(NAME, TYPE) \ + case Type::NAME: NAME = std::make_unique(); break; + APPLY_FOR_ASOF_JOIN_VARIANTS(M) + #undef M + } +} + +template +using AsofGetterType = ColumnsHashing::HashMethodOneNumber; + +void AsofRowRefs::insert(const IColumn * asof_column, const Block * block, size_t row_num, Arena & pool) +{ + switch (type) + { + case Type::EMPTY: break; + #define M(NAME, TYPE) \ + case Type::NAME: { \ + auto asof_getter = AsofGetterType(asof_column); \ + auto entry = Entry(asof_getter.getKey(row_num, pool), RowRef(block, row_num)); \ + lookups.NAME->insert(entry); \ + break; \ + } + APPLY_FOR_ASOF_JOIN_VARIANTS(M) + #undef M + } +} + +const RowRef * AsofRowRefs::findAsof(const IColumn * asof_column, size_t row_num, Arena & pool) const +{ + switch (type) + { + case Type::EMPTY: return nullptr; + #define M(NAME, TYPE) \ + case Type::NAME: { \ + auto asof_getter = AsofGetterType(asof_column); \ + TYPE key = asof_getter.getKey(row_num, pool); \ + auto it = lookups.NAME->upper_bound(Entry(key)); \ + if (it == lookups.NAME->cbegin()) \ + return nullptr; \ + return &((--it)->row_ref); \ + } + APPLY_FOR_ASOF_JOIN_VARIANTS(M) + #undef M + } + + __builtin_unreachable(); +} + +std::optional> AsofRowRefs::getTypeSize(const IColumn * asof_column) +{ + #define M(NAME, TYPE) \ + if (strcmp(#TYPE, asof_column->getFamilyName()) == 0) \ + return std::make_pair(Type::NAME,sizeof(TYPE)); + APPLY_FOR_ASOF_JOIN_VARIANTS(M) + #undef M + return {}; +} + +} \ No newline at end of file diff --git a/dbms/src/Interpreters/RowRefs.h b/dbms/src/Interpreters/RowRefs.h new file mode 100644 index 00000000000..84f2a91af34 --- /dev/null +++ b/dbms/src/Interpreters/RowRefs.h @@ -0,0 +1,91 @@ +#pragma once + +#include +#include + +#include + +namespace DB +{ + +class Block; + +/// Reference to the row in block. +struct RowRef +{ + const Block * block = nullptr; + size_t row_num = 0; + + RowRef() {} + RowRef(const Block * block_, size_t row_num_) : block(block_), row_num(row_num_) {} +}; + +/// Single linked list of references to rows. Used for ALL JOINs (non-unique JOINs) +struct RowRefList : RowRef +{ + RowRefList * next = nullptr; + + RowRefList() {} + RowRefList(const Block * block_, size_t row_num_) : RowRef(block_, row_num_) {} +}; + +class AsofRowRefs +{ +public: + /// Different types of asof join keys + #define APPLY_FOR_ASOF_JOIN_VARIANTS(M) \ + M(key32, UInt32) \ + M(key64, UInt64) \ + M(keyf32, Float32) \ + M(keyf64, Float64) + + enum class Type + { + EMPTY, + #define M(NAME, TYPE) NAME, + APPLY_FOR_ASOF_JOIN_VARIANTS(M) + #undef M + }; + + static std::optional> getTypeSize(const IColumn * asof_column); + + template + struct Entry + { + T asof_value; + RowRef row_ref; + + Entry(T v) : asof_value(v) {} + Entry(T v, RowRef rr) : asof_value(v), row_ref(rr) {} + + bool operator< (const Entry& o) const + { + return asof_value < o.asof_value; + } + }; + + struct Lookups + { + #define M(NAME, TYPE) \ + std::unique_ptr>> NAME; + APPLY_FOR_ASOF_JOIN_VARIANTS(M) + #undef M + + void create(Type which); + }; + + AsofRowRefs() : type(Type::EMPTY) {} + AsofRowRefs(Type t) : type(t) + { + lookups.create(t); + } + + void insert(const IColumn * asof_column, const Block * block, size_t row_num, Arena & pool); + const RowRef * findAsof(const IColumn * asof_column, size_t row_num, Arena & pool) const; + +private: + const Type type; + mutable Lookups lookups; +}; + +} \ No newline at end of file From 6695e304afd49215e9aad226b6025324a0fa2834 Mon Sep 17 00:00:00 2001 From: Martijn Bakker Date: Sat, 30 Mar 2019 21:55:40 +0000 Subject: [PATCH 027/102] fix style --- dbms/src/Common/SortedLookupPODArray.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Common/SortedLookupPODArray.h b/dbms/src/Common/SortedLookupPODArray.h index 60b3529c69e..72cb5648735 100644 --- a/dbms/src/Common/SortedLookupPODArray.h +++ b/dbms/src/Common/SortedLookupPODArray.h @@ -2,7 +2,8 @@ #include -namespace DB { +namespace DB +{ /** * This class is intended to push sortable data into. From a1d2732d4cd691944b3b54efa96f464c5d28215b Mon Sep 17 00:00:00 2001 From: qianlixiang Date: Sun, 31 Mar 2019 11:44:05 +0800 Subject: [PATCH 028/102] Fixed segment fault of arrayIntersect and add a test --- dbms/src/Functions/arrayIntersect.cpp | 31 +++++++++++- .../00930_arrayIntersect.reference | 48 +++++++++++++++++++ .../0_stateless/00930_arrayIntersect.sql | 27 +++++++++++ 3 files changed, 104 insertions(+), 2 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00930_arrayIntersect.reference create mode 100644 dbms/tests/queries/0_stateless/00930_arrayIntersect.sql diff --git a/dbms/src/Functions/arrayIntersect.cpp b/dbms/src/Functions/arrayIntersect.cpp index 51ae95cf707..51edf1c48b6 100644 --- a/dbms/src/Functions/arrayIntersect.cpp +++ b/dbms/src/Functions/arrayIntersect.cpp @@ -56,6 +56,7 @@ private: struct UnpackedArrays { + size_t base_rows = 0; std::vector is_const; std::vector null_maps; std::vector offsets; @@ -246,6 +247,8 @@ FunctionArrayIntersect::UnpackedArrays FunctionArrayIntersect::prepareArrays(con arrays.offsets.resize(columns_number); arrays.nested_columns.resize(columns_number); + bool all_const = true; + for (auto i : ext::range(0, columns_number)) { auto argument_column = columns[i].get(); @@ -257,6 +260,9 @@ FunctionArrayIntersect::UnpackedArrays FunctionArrayIntersect::prepareArrays(con if (auto argument_column_array = typeid_cast(argument_column)) { + if (!arrays.is_const[i]) + all_const = false; + arrays.offsets[i] = &argument_column_array->getOffsets(); arrays.nested_columns[i] = &argument_column_array->getData(); if (auto column_nullable = typeid_cast(arrays.nested_columns[i])) @@ -269,6 +275,24 @@ FunctionArrayIntersect::UnpackedArrays FunctionArrayIntersect::prepareArrays(con throw Exception{"Arguments for function " + getName() + " must be arrays.", ErrorCodes::LOGICAL_ERROR}; } + if (all_const) + { + arrays.base_rows = arrays.offsets.front()->size(); + } + else + { + for (auto i : ext::range(0, columns_number)) + { + if (arrays.is_const[i]) continue; + + size_t rows = arrays.offsets[i]->size(); + if (arrays.base_rows == 0 && rows > 0) + arrays.base_rows = rows; + else if (arrays.base_rows != rows) + throw Exception("Non-const array columns in function " + getName() + "should have same rows", ErrorCodes::LOGICAL_ERROR); + } + } + return arrays; } @@ -352,7 +376,7 @@ template ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, MutableColumnPtr result_data_ptr) { auto args = arrays.nested_columns.size(); - auto rows = arrays.offsets.front()->size(); + auto rows = arrays.base_rows; bool all_nullable = true; @@ -392,11 +416,14 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable for (auto arg : ext::range(0, args)) { bool current_has_nullable = false; - size_t off = (*arrays.offsets[arg])[row]; + + size_t off; // const array has only one row bool const_arg = arrays.is_const[arg]; if (const_arg) off = (*arrays.offsets[arg])[0]; + else + off = (*arrays.offsets[arg])[row]; for (auto i : ext::range(prev_off[arg], off)) { diff --git a/dbms/tests/queries/0_stateless/00930_arrayIntersect.reference b/dbms/tests/queries/0_stateless/00930_arrayIntersect.reference new file mode 100644 index 00000000000..57cf8b8baf0 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00930_arrayIntersect.reference @@ -0,0 +1,48 @@ +[] +[1,2] +[1] +[1,2] +[] +[] +[] +[] +[] +[] +[] +[] +[1,2] +[1] +[1,2] +[] +[1,2] +[1,2] +[1,2] +[1,2] +[] +[] +[] +[] +[1,2] +[1,2] +[1] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[1,2] +[1,2] +[1] +[] +[1,2] +[1,2] +[1,2] +[1,2] +[] +[] +[] +[] diff --git a/dbms/tests/queries/0_stateless/00930_arrayIntersect.sql b/dbms/tests/queries/0_stateless/00930_arrayIntersect.sql new file mode 100644 index 00000000000..08c20a517ea --- /dev/null +++ b/dbms/tests/queries/0_stateless/00930_arrayIntersect.sql @@ -0,0 +1,27 @@ +drop table if exists test.array_intersect; + +create table test.array_intersect (date Date, arr Array(UInt8)) engine=MergeTree partition by date order by date; + +insert into test.array_intersect values ('2019-01-01', [1,2,3]); +insert into test.array_intersect values ('2019-01-01', [1,2]); +insert into test.array_intersect values ('2019-01-01', [1]); +insert into test.array_intersect values ('2019-01-01', []); + +select arrayIntersect(arr, [1,2]) from test.array_intersect; +select arrayIntersect(arr, []) from test.array_intersect; +select arrayIntersect([], arr) from test.array_intersect; +select arrayIntersect([1,2], arr) from test.array_intersect; +select arrayIntersect([1,2], [1,2,3,4]) from test.array_intersect; +select arrayIntersect([], []) from test.array_intersect; + +optimize table test.array_intersect; + +select arrayIntersect(arr, [1,2]) from test.array_intersect; +select arrayIntersect(arr, []) from test.array_intersect; +select arrayIntersect([], arr) from test.array_intersect; +select arrayIntersect([1,2], arr) from test.array_intersect; +select arrayIntersect([1,2], [1,2,3,4]) from test.array_intersect; +select arrayIntersect([], []) from test.array_intersect; + +drop table if exists test.array_intersect; + From dad023ea480adfcdcdab53c34e84ea5e9c7cf89e Mon Sep 17 00:00:00 2001 From: qianlixiang Date: Sun, 31 Mar 2019 12:00:11 +0800 Subject: [PATCH 029/102] Fixed style check errors --- dbms/src/Functions/arrayIntersect.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dbms/src/Functions/arrayIntersect.cpp b/dbms/src/Functions/arrayIntersect.cpp index 51edf1c48b6..0cb941a3d68 100644 --- a/dbms/src/Functions/arrayIntersect.cpp +++ b/dbms/src/Functions/arrayIntersect.cpp @@ -283,13 +283,14 @@ FunctionArrayIntersect::UnpackedArrays FunctionArrayIntersect::prepareArrays(con { for (auto i : ext::range(0, columns_number)) { - if (arrays.is_const[i]) continue; - + if (arrays.is_const[i]) + continue; + size_t rows = arrays.offsets[i]->size(); if (arrays.base_rows == 0 && rows > 0) arrays.base_rows = rows; else if (arrays.base_rows != rows) - throw Exception("Non-const array columns in function " + getName() + "should have same rows", ErrorCodes::LOGICAL_ERROR); + throw Exception("Non-const array columns in function " + getName() + "should have same rows", ErrorCodes::LOGICAL_ERROR); } } From 32692fb0dd0352ae0a8fd0e4a0d74f0c72c449a9 Mon Sep 17 00:00:00 2001 From: qianlixiang Date: Sun, 31 Mar 2019 12:52:24 +0800 Subject: [PATCH 030/102] Fixed style check errors --- dbms/src/Functions/arrayIntersect.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Functions/arrayIntersect.cpp b/dbms/src/Functions/arrayIntersect.cpp index 0cb941a3d68..148e781af33 100644 --- a/dbms/src/Functions/arrayIntersect.cpp +++ b/dbms/src/Functions/arrayIntersect.cpp @@ -284,14 +284,14 @@ FunctionArrayIntersect::UnpackedArrays FunctionArrayIntersect::prepareArrays(con for (auto i : ext::range(0, columns_number)) { if (arrays.is_const[i]) - continue; + continue; size_t rows = arrays.offsets[i]->size(); if (arrays.base_rows == 0 && rows > 0) arrays.base_rows = rows; else if (arrays.base_rows != rows) throw Exception("Non-const array columns in function " + getName() + "should have same rows", ErrorCodes::LOGICAL_ERROR); - } + } } return arrays; From bd6bc86ceb943a38ee0b21c6f859e1159c0fc901 Mon Sep 17 00:00:00 2001 From: qianlixiang Date: Sun, 31 Mar 2019 13:03:08 +0800 Subject: [PATCH 031/102] Fixed style check errors --- dbms/src/Functions/arrayIntersect.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/arrayIntersect.cpp b/dbms/src/Functions/arrayIntersect.cpp index 148e781af33..38de7227325 100644 --- a/dbms/src/Functions/arrayIntersect.cpp +++ b/dbms/src/Functions/arrayIntersect.cpp @@ -283,7 +283,7 @@ FunctionArrayIntersect::UnpackedArrays FunctionArrayIntersect::prepareArrays(con { for (auto i : ext::range(0, columns_number)) { - if (arrays.is_const[i]) + if (arrays.is_const[i]) continue; size_t rows = arrays.offsets[i]->size(); From 6c77d418388be007df3b6e8c753b1254ee236bd2 Mon Sep 17 00:00:00 2001 From: qianlixiang Date: Sun, 31 Mar 2019 14:59:21 +0800 Subject: [PATCH 032/102] Fixed stateless tests error --- .../00930_arrayIntersect.reference | 44 +++++++++---------- .../0_stateless/00930_arrayIntersect.sql | 24 +++++----- 2 files changed, 34 insertions(+), 34 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00930_arrayIntersect.reference b/dbms/tests/queries/0_stateless/00930_arrayIntersect.reference index 57cf8b8baf0..31d8d92cd89 100644 --- a/dbms/tests/queries/0_stateless/00930_arrayIntersect.reference +++ b/dbms/tests/queries/0_stateless/00930_arrayIntersect.reference @@ -1,19 +1,19 @@ [] -[1,2] [1] [1,2] -[] -[] -[] -[] -[] -[] -[] -[] [1,2] +[] +[] +[] +[] +[] +[] +[] +[] +[] [1] [1,2] -[] +[1,2] [1,2] [1,2] [1,2] @@ -22,22 +22,22 @@ [] [] [] -[1,2] -[1,2] +[] [1] -[] -[] -[] -[] -[] -[] -[] -[] -[] [1,2] [1,2] +[] +[] +[] +[] +[] +[] +[] +[] +[] [1] -[] +[1,2] +[1,2] [1,2] [1,2] [1,2] diff --git a/dbms/tests/queries/0_stateless/00930_arrayIntersect.sql b/dbms/tests/queries/0_stateless/00930_arrayIntersect.sql index 08c20a517ea..f034c74a62c 100644 --- a/dbms/tests/queries/0_stateless/00930_arrayIntersect.sql +++ b/dbms/tests/queries/0_stateless/00930_arrayIntersect.sql @@ -7,21 +7,21 @@ insert into test.array_intersect values ('2019-01-01', [1,2]); insert into test.array_intersect values ('2019-01-01', [1]); insert into test.array_intersect values ('2019-01-01', []); -select arrayIntersect(arr, [1,2]) from test.array_intersect; -select arrayIntersect(arr, []) from test.array_intersect; -select arrayIntersect([], arr) from test.array_intersect; -select arrayIntersect([1,2], arr) from test.array_intersect; -select arrayIntersect([1,2], [1,2,3,4]) from test.array_intersect; -select arrayIntersect([], []) from test.array_intersect; +select arrayIntersect(arr, [1,2]) from test.array_intersect order by arr; +select arrayIntersect(arr, []) from test.array_intersect order by arr; +select arrayIntersect([], arr) from test.array_intersect order by arr; +select arrayIntersect([1,2], arr) from test.array_intersect order by arr; +select arrayIntersect([1,2], [1,2,3,4]) from test.array_intersect order by arr; +select arrayIntersect([], []) from test.array_intersect order by arr; optimize table test.array_intersect; -select arrayIntersect(arr, [1,2]) from test.array_intersect; -select arrayIntersect(arr, []) from test.array_intersect; -select arrayIntersect([], arr) from test.array_intersect; -select arrayIntersect([1,2], arr) from test.array_intersect; -select arrayIntersect([1,2], [1,2,3,4]) from test.array_intersect; -select arrayIntersect([], []) from test.array_intersect; +select arrayIntersect(arr, [1,2]) from test.array_intersect order by arr; +select arrayIntersect(arr, []) from test.array_intersect order by arr; +select arrayIntersect([], arr) from test.array_intersect order by arr; +select arrayIntersect([1,2], arr) from test.array_intersect order by arr; +select arrayIntersect([1,2], [1,2,3,4]) from test.array_intersect order by arr; +select arrayIntersect([], []) from test.array_intersect order by arr; drop table if exists test.array_intersect; From 11997ed772081effb7721950b57561615ca74faa Mon Sep 17 00:00:00 2001 From: chertus Date: Sun, 31 Mar 2019 13:56:54 +0300 Subject: [PATCH 033/102] fix build --- dbms/src/Common/SortedLookupPODArray.h | 5 ++--- dbms/src/Interpreters/RowRefs.cpp | 2 +- dbms/src/Interpreters/RowRefs.h | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/dbms/src/Common/SortedLookupPODArray.h b/dbms/src/Common/SortedLookupPODArray.h index 72cb5648735..c01cfe95605 100644 --- a/dbms/src/Common/SortedLookupPODArray.h +++ b/dbms/src/Common/SortedLookupPODArray.h @@ -18,7 +18,7 @@ class SortedLookupPODArray : private PaddedPODArray { public: using Base = PaddedPODArray; - using Base::PODArray; + using typename Base::PODArray; using Base::cbegin; using Base::cend; @@ -45,5 +45,4 @@ private: bool sorted = false; }; - -} \ No newline at end of file +} diff --git a/dbms/src/Interpreters/RowRefs.cpp b/dbms/src/Interpreters/RowRefs.cpp index 18b144e984e..aca948c9270 100644 --- a/dbms/src/Interpreters/RowRefs.cpp +++ b/dbms/src/Interpreters/RowRefs.cpp @@ -72,4 +72,4 @@ std::optional> AsofRowRefs::getTypeSize(con return {}; } -} \ No newline at end of file +} diff --git a/dbms/src/Interpreters/RowRefs.h b/dbms/src/Interpreters/RowRefs.h index 84f2a91af34..e6ba5daef35 100644 --- a/dbms/src/Interpreters/RowRefs.h +++ b/dbms/src/Interpreters/RowRefs.h @@ -88,4 +88,4 @@ private: mutable Lookups lookups; }; -} \ No newline at end of file +} From bf2d1a854b89e8ff2124852282ca07e129145f5d Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sun, 31 Mar 2019 16:19:54 +0200 Subject: [PATCH 034/102] fix docs Dockerfile --- website/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/Dockerfile b/website/Dockerfile index ee4f9ffccdc..64eb0ce5e33 100644 --- a/website/Dockerfile +++ b/website/Dockerfile @@ -1,4 +1,4 @@ FROM nginx:mainline -COPY . /usr/share/nginx/html/public +COPY . /usr/share/nginx/html COPY nginx/nginx.conf /etc/nginx/nginx.conf COPY nginx/default.conf /etc/nginx/conf.d/default.conf From 4a7393cfc6ffa403b0183fd66bb40886e7b7a680 Mon Sep 17 00:00:00 2001 From: Martijn Bakker Date: Sun, 31 Mar 2019 20:03:57 +0100 Subject: [PATCH 035/102] include the asof column in the block stored. overall not working yet but at least doesnt crash anymore --- dbms/src/Interpreters/Join.cpp | 39 ++++++++++++++++--- .../00927_asof_join_correct_bt.reference | 5 +++ .../00927_asof_join_correct_bt.sql | 15 +++++++ 3 files changed, 53 insertions(+), 6 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00927_asof_join_correct_bt.reference create mode 100644 dbms/tests/queries/0_stateless/00927_asof_join_correct_bt.sql diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 2d959be98f2..cb1be05015a 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -534,8 +534,15 @@ bool Join::insertFromBlock(const Block & block) NameSet erased; /// HOTFIX: there could be duplicates in JOIN ON section /// Remove the key columns from stored_block, as they are not needed. + /// However, do not erase the ASOF column if this is an asof join for (const auto & name : key_names_right) { + if (strictness == ASTTableJoin::Strictness::Asof && name == key_names_right.back()) + { + LOG_DEBUG(log, "preventing removal of ASOF join column with name=" << name); + break; // this is the last column so break is OK + } + if (!erased.count(name)) stored_block->erase(stored_block->getPositionByName(name)); erased.insert(name); @@ -606,8 +613,13 @@ public: void appendFromBlock(const Block & block, size_t row_num) { - for (size_t j = 0; j < right_indexes.size(); ++j) + std::cout << "appendFromBlock block=" << block.dumpStructure() << " row_num=" << row_num << std::endl; + + + for (size_t j = 0; j < right_indexes.size(); ++j) { + std::cout << "right_index=" << right_indexes[j] << std::endl; columns[j]->insertFrom(*block.getByPosition(right_indexes[j]).column, row_num); + } } @@ -616,19 +628,25 @@ public: for (size_t j = 0; j < right_indexes.size(); ++j) columns[j]->insertDefault(); } + void addExtraColumn(const ColumnWithTypeAndName & src_column) + { + addColumn(src_column, columns.size()); + } + private: - TypeAndNames type_name; - MutableColumns columns; - std::vector right_indexes; - void addColumn(const ColumnWithTypeAndName & src_column, size_t idx) { + std::cout << "adding column from src=" << src_column.dumpStructure() << " idx=" << idx << std::endl; columns.push_back(src_column.column->cloneEmpty()); columns.back()->reserve(src_column.column->size()); type_name.emplace_back(src_column.type, src_column.name); right_indexes.push_back(idx); } + + TypeAndNames type_name; + MutableColumns columns; + std::vector right_indexes; }; template @@ -825,17 +843,26 @@ void Join::joinBlockImpl( num_columns_to_skip = keys_size; /// Add new columns to the block. - AddedColumns added(sample_block_with_columns_to_add, block_with_columns_to_add, block, num_columns_to_skip); + if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof) + { + // Add the last key column which is the ASOF key + added.addExtraColumn(sample_block_with_keys.safeGetByPosition(sample_block_with_keys.columns()-1)); + } + std::unique_ptr offsets_to_replicate; IColumn::Filter row_filter = switchJoinRightColumns( type, maps_, block.rows(), key_columns, key_sizes, added, null_map, offsets_to_replicate); + LOG_DEBUG(log, "joinBlockImpl - switchJoinRightColumns"); + for (size_t i = 0; i < added.size(); ++i) block.insert(added.moveColumn(i)); + LOG_DEBUG(log, "joinBlockImpl - after insert: " << block.dumpStructure()); + /// Filter & insert missing rows auto right_keys = requiredRightKeys(key_names_right, columns_added_by_join); diff --git a/dbms/tests/queries/0_stateless/00927_asof_join_correct_bt.reference b/dbms/tests/queries/0_stateless/00927_asof_join_correct_bt.reference new file mode 100644 index 00000000000..2e22280c8da --- /dev/null +++ b/dbms/tests/queries/0_stateless/00927_asof_join_correct_bt.reference @@ -0,0 +1,5 @@ +1 101 1 0 0 0 +1 102 2 2 102 1 +1 103 3 2 102 1 +1 104 4 4 104 1 +1 105 5 4 104 1 diff --git a/dbms/tests/queries/0_stateless/00927_asof_join_correct_bt.sql b/dbms/tests/queries/0_stateless/00927_asof_join_correct_bt.sql new file mode 100644 index 00000000000..27860bb5d05 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00927_asof_join_correct_bt.sql @@ -0,0 +1,15 @@ +USE test; + +DROP TABLE IF EXISTS A; +DROP TABLE IF EXISTS B; + +CREATE TABLE A(k UInt32, t UInt32, a UInt64) ENGINE = MergeTree() ORDER BY (k, t); +INSERT INTO A(k,t,a) VALUES (1,101,1),(1,102,2),(1,103,3),(1,104,4),(1,105,5); + +CREATE TABLE B(k UInt32, t UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t); +INSERT INTO B(k,t,b) VALUES (1,102,2), (1,104,4); + +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t); + +DROP TABLE A; +DROP TABLE B; From f80fb09b810e46d710bc8e0b5a20f6925f7da417 Mon Sep 17 00:00:00 2001 From: Danila Kutenin Date: Sun, 31 Mar 2019 23:19:22 +0300 Subject: [PATCH 036/102] Hyperscan better usage of scratch space and error handling --- dbms/src/Common/ErrorCodes.cpp | 1 + dbms/src/Functions/FunctionsStringRegex.cpp | 11 ++++++---- dbms/src/Functions/Regexps.h | 24 ++++++++++++++++++--- 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index 141fc9007b8..49e0937282e 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -421,6 +421,7 @@ namespace ErrorCodes extern const int UNKNOWN_PROTOBUF_FORMAT = 444; extern const int CANNOT_MPROTECT = 445; extern const int FUNCTION_NOT_ALLOWED = 446; + extern const int HYPERSCAN_CANNOT_SCAN_TEXT = 447; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/dbms/src/Functions/FunctionsStringRegex.cpp b/dbms/src/Functions/FunctionsStringRegex.cpp index a36c2900a60..d1d4a7b246f 100644 --- a/dbms/src/Functions/FunctionsStringRegex.cpp +++ b/dbms/src/Functions/FunctionsStringRegex.cpp @@ -40,6 +40,7 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int TOO_MANY_BYTES; extern const int NOT_IMPLEMENTED; + extern const int HYPERSCAN_CANNOT_SCAN_TEXT; } /// Is the LIKE expression reduced to finding a substring in a string? @@ -289,10 +290,10 @@ struct MultiMatchAnyImpl #if USE_HYPERSCAN const auto & hyperscan_regex = MultiRegexps::get(needles, edit_distance); hs_scratch_t * scratch = nullptr; - hs_error_t err = hs_alloc_scratch(hyperscan_regex->get(), &scratch); + hs_error_t err = hs_clone_scratch(hyperscan_regex->getScratch(), &scratch); if (err != HS_SUCCESS) - throw Exception("Could not allocate scratch space for hyperscan", ErrorCodes::CANNOT_ALLOCATE_MEMORY); + throw Exception("Could not clone scratch space for hyperscan", ErrorCodes::CANNOT_ALLOCATE_MEMORY); MultiRegexps::ScratchPtr smart_scratch(scratch); @@ -316,14 +317,16 @@ struct MultiMatchAnyImpl if (length > std::numeric_limits::max()) throw Exception("Too long string to search", ErrorCodes::TOO_MANY_BYTES); res[i] = 0; - hs_scan( - hyperscan_regex->get(), + err = hs_scan( + hyperscan_regex->getDB(), reinterpret_cast(haystack_data.data()) + offset, length, 0, smart_scratch.get(), on_match, &res[i]); + if (err != HS_SUCCESS) + throw Exception("Failed to scan with hyperscan", ErrorCodes::HYPERSCAN_CANNOT_SCAN_TEXT); offset = haystack_offsets[i]; } #else diff --git a/dbms/src/Functions/Regexps.h b/dbms/src/Functions/Regexps.h index c92f739cf82..4ca9072a1f4 100644 --- a/dbms/src/Functions/Regexps.h +++ b/dbms/src/Functions/Regexps.h @@ -84,8 +84,20 @@ namespace MultiRegexps }; using CompilerError = std::unique_ptr>; - using ScratchPtr = std::unique_ptr>; - using Regexps = std::unique_ptr>; + using ScratchPtr = std::unique_ptr>; + using DataBasePtr = std::unique_ptr>; + + class Regexps + { + public: + Regexps(hs_database_t * db_, hs_scratch_t * scratch_) : db{db_}, scratch{scratch_} {} + + hs_database_t * getDB() const { return db.get(); }; + hs_scratch_t * getScratch() const { return scratch.get(); }; + private: + DataBasePtr db; + ScratchPtr scratch; + }; using Pool = ObjectPoolMap, std::optional>>; @@ -183,7 +195,13 @@ namespace MultiRegexps ProfileEvents::increment(ProfileEvents::RegexpCreated); - return new Regexps{db}; + hs_scratch_t * scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + + if (err != HS_SUCCESS) + throw Exception("Could not allocate scratch space for hyperscan", ErrorCodes::CANNOT_ALLOCATE_MEMORY); + + return new Regexps{db, scratch}; }); } } From d509c226ab7bf6082fa33627f303af540a4a252c Mon Sep 17 00:00:00 2001 From: Danila Kutenin Date: Sun, 31 Mar 2019 23:55:36 +0300 Subject: [PATCH 037/102] Pool was not doing what I expected, great improvement of compilation --- dbms/src/Functions/Regexps.h | 198 +++++++++++++++++++---------------- 1 file changed, 108 insertions(+), 90 deletions(-) diff --git a/dbms/src/Functions/Regexps.h b/dbms/src/Functions/Regexps.h index 4ca9072a1f4..33e4735ed80 100644 --- a/dbms/src/Functions/Regexps.h +++ b/dbms/src/Functions/Regexps.h @@ -1,6 +1,8 @@ #pragma once +#include #include +#include #include #include #include @@ -11,6 +13,7 @@ #include #include + #include #if USE_HYPERSCAN # if __has_include() @@ -99,11 +102,107 @@ namespace MultiRegexps ScratchPtr scratch; }; - using Pool = ObjectPoolMap, std::optional>>; + struct Pool + { + std::mutex mutex; + std::map, std::optional>, Regexps> storage; + }; + + template + inline Regexps constructRegexps(const std::vector & str_patterns, std::optional edit_distance) + { + (void)edit_distance; + /// Common pointers + std::vector ptrns; + std::vector flags; + + /// Pointer for external edit distance compilation + std::vector ext_exprs; + std::vector ext_exprs_ptrs; + + ptrns.reserve(str_patterns.size()); + flags.reserve(str_patterns.size()); + + if constexpr (CompileForEditDistance) + { + ext_exprs.reserve(str_patterns.size()); + ext_exprs_ptrs.reserve(str_patterns.size()); + } + + for (const StringRef ref : str_patterns) + { + ptrns.push_back(ref.data); + flags.push_back(HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_SINGLEMATCH); + if constexpr (CompileForEditDistance) + { + ext_exprs.emplace_back(); + ext_exprs.back().flags = HS_EXT_FLAG_EDIT_DISTANCE; + ext_exprs.back().edit_distance = edit_distance.value(); + ext_exprs_ptrs.push_back(&ext_exprs.back()); + } + } + hs_database_t * db = nullptr; + hs_compile_error_t * compile_error; + + + std::unique_ptr ids; + + if constexpr (FindAnyIndex) + { + ids.reset(new unsigned int[ptrns.size()]); + for (size_t i = 0; i < ptrns.size(); ++i) + ids[i] = i + 1; + } + + hs_error_t err; + if constexpr (!CompileForEditDistance) + err = hs_compile_multi( + ptrns.data(), + flags.data(), + ids.get(), + ptrns.size(), + HS_MODE_BLOCK, + nullptr, + &db, + &compile_error); + else + err = hs_compile_ext_multi( + ptrns.data(), + flags.data(), + ids.get(), + ext_exprs_ptrs.data(), + ptrns.size(), + HS_MODE_BLOCK, + nullptr, + &db, + &compile_error); + + if (err != HS_SUCCESS) + { + CompilerError error(compile_error); + + if (error->expression < 0) + throw Exception(String(error->message), ErrorCodes::LOGICAL_ERROR); + else + throw Exception( + "Pattern '" + str_patterns[error->expression] + "' failed with error '" + String(error->message), + ErrorCodes::LOGICAL_ERROR); + } + + ProfileEvents::increment(ProfileEvents::RegexpCreated); + + hs_scratch_t * scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + + if (err != HS_SUCCESS) + throw Exception("Could not allocate scratch space for hyperscan", ErrorCodes::CANNOT_ALLOCATE_MEMORY); + + return Regexps{db, scratch}; + } /// If CompileForEditDistance is False, edit_distance must be nullopt template - inline Pool::Pointer get(const std::vector & patterns, std::optional edit_distance) + inline Regexps * get(const std::vector & patterns, std::optional edit_distance) { /// C++11 has thread-safe function-local statics on most modern compilers. static Pool known_regexps; /// Different variables for different pattern parameters. @@ -113,96 +212,15 @@ namespace MultiRegexps for (const StringRef & ref : patterns) str_patterns.push_back(ref.toString()); - return known_regexps.get({str_patterns, edit_distance}, [&str_patterns, edit_distance] - { - (void)edit_distance; - /// Common pointers - std::vector ptrns; - std::vector flags; + std::unique_lock lock(known_regexps.mutex); - /// Pointer for external edit distance compilation - std::vector ext_exprs; - std::vector ext_exprs_ptrs; + auto it = known_regexps.storage.find(std::pair{str_patterns, edit_distance}); + if (known_regexps.storage.end() == it) + it = known_regexps.storage.emplace( + std::pair{str_patterns, edit_distance}, + constructRegexps(str_patterns, edit_distance)).first; - ptrns.reserve(str_patterns.size()); - flags.reserve(str_patterns.size()); - - if constexpr (CompileForEditDistance) - { - ext_exprs.reserve(str_patterns.size()); - ext_exprs_ptrs.reserve(str_patterns.size()); - } - - for (const StringRef ref : str_patterns) - { - ptrns.push_back(ref.data); - flags.push_back(HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_SINGLEMATCH); - if constexpr (CompileForEditDistance) - { - ext_exprs.emplace_back(); - ext_exprs.back().flags = HS_EXT_FLAG_EDIT_DISTANCE; - ext_exprs.back().edit_distance = edit_distance.value(); - ext_exprs_ptrs.push_back(&ext_exprs.back()); - } - } - hs_database_t * db = nullptr; - hs_compile_error_t * compile_error; - - - std::unique_ptr ids; - - if constexpr (FindAnyIndex) - { - ids.reset(new unsigned int[ptrns.size()]); - for (size_t i = 0; i < ptrns.size(); ++i) - ids[i] = i + 1; - } - - hs_error_t err; - if constexpr (!CompileForEditDistance) - err = hs_compile_multi( - ptrns.data(), - flags.data(), - ids.get(), - ptrns.size(), - HS_MODE_BLOCK, - nullptr, - &db, - &compile_error); - else - err = hs_compile_ext_multi( - ptrns.data(), - flags.data(), - ids.get(), - ext_exprs_ptrs.data(), - ptrns.size(), - HS_MODE_BLOCK, - nullptr, - &db, - &compile_error); - - if (err != HS_SUCCESS) - { - CompilerError error(compile_error); - - if (error->expression < 0) - throw Exception(String(error->message), ErrorCodes::LOGICAL_ERROR); - else - throw Exception( - "Pattern '" + str_patterns[error->expression] + "' failed with error '" + String(error->message), - ErrorCodes::LOGICAL_ERROR); - } - - ProfileEvents::increment(ProfileEvents::RegexpCreated); - - hs_scratch_t * scratch = nullptr; - err = hs_alloc_scratch(db, &scratch); - - if (err != HS_SUCCESS) - throw Exception("Could not allocate scratch space for hyperscan", ErrorCodes::CANNOT_ALLOCATE_MEMORY); - - return new Regexps{db, scratch}; - }); + return &it->second; } } From 8edb5c5b2dc85eba7761cc5a882b66bba129eb92 Mon Sep 17 00:00:00 2001 From: Danila Kutenin Date: Sun, 31 Mar 2019 23:59:18 +0300 Subject: [PATCH 038/102] Some cosmetics --- dbms/src/Functions/Regexps.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dbms/src/Functions/Regexps.h b/dbms/src/Functions/Regexps.h index 33e4735ed80..9e415e39fc6 100644 --- a/dbms/src/Functions/Regexps.h +++ b/dbms/src/Functions/Regexps.h @@ -104,7 +104,9 @@ namespace MultiRegexps struct Pool { + /// Mutex for finding in map std::mutex mutex; + /// Patterns + possible edit_distance to database and scratch std::map, std::optional>, Regexps> storage; }; @@ -219,7 +221,7 @@ namespace MultiRegexps it = known_regexps.storage.emplace( std::pair{str_patterns, edit_distance}, constructRegexps(str_patterns, edit_distance)).first; - + lock.unlock(); return &it->second; } } From 3ce9686bc497c9790cbeca4481fb729a91130456 Mon Sep 17 00:00:00 2001 From: Danila Kutenin Date: Mon, 1 Apr 2019 00:04:45 +0300 Subject: [PATCH 039/102] Some cosmetics --- dbms/src/Functions/Regexps.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dbms/src/Functions/Regexps.h b/dbms/src/Functions/Regexps.h index 9e415e39fc6..5a5e7190acc 100644 --- a/dbms/src/Functions/Regexps.h +++ b/dbms/src/Functions/Regexps.h @@ -90,6 +90,7 @@ namespace MultiRegexps using ScratchPtr = std::unique_ptr>; using DataBasePtr = std::unique_ptr>; + /// Database is thread safe across multiple threads and Scratch is not but we can copy it whenever we use it in the searcher class Regexps { public: @@ -216,12 +217,15 @@ namespace MultiRegexps std::unique_lock lock(known_regexps.mutex); - auto it = known_regexps.storage.find(std::pair{str_patterns, edit_distance}); + auto it = known_regexps.storage.find({str_patterns, edit_distance}); + if (known_regexps.storage.end() == it) it = known_regexps.storage.emplace( std::pair{str_patterns, edit_distance}, constructRegexps(str_patterns, edit_distance)).first; + lock.unlock(); + return &it->second; } } From 7ea03f6fa78d122a1083a0a8f0700b098c52d2af Mon Sep 17 00:00:00 2001 From: Martijn Bakker Date: Sun, 31 Mar 2019 22:14:43 +0100 Subject: [PATCH 040/102] appears to work and get the correct timestamps --- dbms/src/Interpreters/Join.cpp | 79 +++++++++++++++++++++------------- 1 file changed, 49 insertions(+), 30 deletions(-) diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index cb1be05015a..a0c667da175 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -420,6 +420,8 @@ namespace if (emplace_result.isInserted()) time_series_map = new (time_series_map) typename Map::mapped_type(join.getAsofType()); + + std::cout << "inserting rhs block=" << stored_block->dumpStructure() << std::endl; time_series_map->insert(asof_column, stored_block, i, pool); } }; @@ -485,7 +487,6 @@ namespace bool Join::insertFromBlock(const Block & block) { std::unique_lock lock(rwlock); - LOG_DEBUG(log, "joinBlock: " << block.dumpStructure()); if (empty()) throw Exception("Logical error: Join was not initialized", ErrorCodes::LOGICAL_ERROR); @@ -549,6 +550,8 @@ bool Join::insertFromBlock(const Block & block) } } + LOG_DEBUG(log, "insertFromBlock stored_block=" << stored_block->dumpStructure()); + size_t size = stored_block->columns(); /// Rare case, when joined columns are constant. To avoid code bloat, simply materialize them. @@ -586,7 +589,7 @@ public: AddedColumns(const Block & sample_block_with_columns_to_add, const Block & block_with_columns_to_add, - const Block & block, size_t num_columns_to_skip) + const Block & block) { size_t num_columns_to_add = sample_block_with_columns_to_add.columns(); @@ -599,11 +602,32 @@ public: const ColumnWithTypeAndName & src_column = sample_block_with_columns_to_add.safeGetByPosition(i); /// Don't insert column if it's in left block or not explicitly required. - if (!block.has(src_column.name) && block_with_columns_to_add.has(src_column.name)) - addColumn(src_column, num_columns_to_skip + i); + if (!block.has(src_column.name) && block_with_columns_to_add.has(src_column.name)) { + addColumn(src_column); + } } } + void addColumn(const ColumnWithTypeAndName & src_column) + { + std::cout << "adding column from src=" << src_column.dumpStructure() << std::endl; + columns.push_back(src_column.column->cloneEmpty()); + columns.back()->reserve(src_column.column->size()); + type_name.emplace_back(src_column.type, src_column.name); + } + + void fillRightIndices(const Block& rhs_block) + { + for(auto& tn : type_name) { + right_indexes.push_back(rhs_block.getPositionByName(tn.second)); + } + + for(unsigned i = 0; i < right_indexes.size(); i++) { + std::cout << "ri i=" << i << " ri=" << right_indexes[i] << std::endl; + } + loaded = true; + } + size_t size() const { return columns.size(); } ColumnWithTypeAndName moveColumn(size_t i) @@ -613,39 +637,29 @@ public: void appendFromBlock(const Block & block, size_t row_num) { - std::cout << "appendFromBlock block=" << block.dumpStructure() << " row_num=" << row_num << std::endl; + if(!loaded) + return; + std::cout << "appendFromBlock block=" << block.dumpStructure() << " row_num=" << row_num << " right_indexes=["; - for (size_t j = 0; j < right_indexes.size(); ++j) { - std::cout << "right_index=" << right_indexes[j] << std::endl; + for (size_t j = 0; j < columns.size(); ++j) { + std::cout << right_indexes[j] << " "; columns[j]->insertFrom(*block.getByPosition(right_indexes[j]).column, row_num); } - } + std::cout << "]" << std::endl; + } void appendDefaultRow() { - for (size_t j = 0; j < right_indexes.size(); ++j) + for (size_t j = 0; j < columns.size(); ++j) columns[j]->insertDefault(); } - void addExtraColumn(const ColumnWithTypeAndName & src_column) - { - addColumn(src_column, columns.size()); - } - private: - void addColumn(const ColumnWithTypeAndName & src_column, size_t idx) - { - std::cout << "adding column from src=" << src_column.dumpStructure() << " idx=" << idx << std::endl; - columns.push_back(src_column.column->cloneEmpty()); - columns.back()->reserve(src_column.column->size()); - type_name.emplace_back(src_column.type, src_column.name); - right_indexes.push_back(idx); - } - TypeAndNames type_name; MutableColumns columns; + bool loaded = false; std::vector right_indexes; }; @@ -838,17 +852,22 @@ void Join::joinBlockImpl( * For FULL/RIGHT JOIN, the saved blocks contain keys; * but they will not be used at this stage of joining (and will be in `AdderNonJoined`), and they need to be skipped. */ - size_t num_columns_to_skip = 0; - if constexpr (right_or_full) - num_columns_to_skip = keys_size; +// size_t num_columns_to_skip = 0; +// if constexpr (right_or_full) +// num_columns_to_skip = keys_size; /// Add new columns to the block. - AddedColumns added(sample_block_with_columns_to_add, block_with_columns_to_add, block, num_columns_to_skip); + LOG_DEBUG(log, "joinBlockImpl - sample_block_with_columns_to_add" << sample_block_with_columns_to_add.dumpStructure()); + + AddedColumns added(sample_block_with_columns_to_add, block_with_columns_to_add, block); if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof) - { - // Add the last key column which is the ASOF key - added.addExtraColumn(sample_block_with_keys.safeGetByPosition(sample_block_with_keys.columns()-1)); + added.addColumn(sample_block_with_keys.safeGetByPosition(sample_block_with_keys.columns()-1)); + + if(!blocks.empty()) { + added.fillRightIndices(*blocks.begin()); + } else { + LOG_DEBUG(log, "unable to fill right index of added columns"); } std::unique_ptr offsets_to_replicate; From 02320de49c2357f99d8682743024e2604756aa1a Mon Sep 17 00:00:00 2001 From: Martijn Bakker Date: Sun, 31 Mar 2019 22:22:58 +0100 Subject: [PATCH 041/102] fix up the timestamps to match the real timestamps --- .../00927_asof_join_noninclusive.reference | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00927_asof_join_noninclusive.reference b/dbms/tests/queries/0_stateless/00927_asof_join_noninclusive.reference index b4022cef7da..5d19ee97374 100644 --- a/dbms/tests/queries/0_stateless/00927_asof_join_noninclusive.reference +++ b/dbms/tests/queries/0_stateless/00927_asof_join_noninclusive.reference @@ -1,29 +1,29 @@ 1 1970-01-01 00:00:01 1 0 0000-00-00 00:00:00 0 1 1970-01-01 00:00:02 2 2 1970-01-01 00:00:02 1 -1 1970-01-01 00:00:03 3 2 1970-01-01 00:00:03 1 +1 1970-01-01 00:00:03 3 2 1970-01-01 00:00:02 1 1 1970-01-01 00:00:04 4 4 1970-01-01 00:00:04 1 -1 1970-01-01 00:00:05 5 4 1970-01-01 00:00:05 1 +1 1970-01-01 00:00:05 5 4 1970-01-01 00:00:04 1 2 1970-01-01 00:00:01 1 0 0000-00-00 00:00:00 0 2 1970-01-01 00:00:02 2 0 0000-00-00 00:00:00 0 2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2 -2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:04 2 -2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:05 2 +2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2 +2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2 3 1970-01-01 00:00:01 1 0 0000-00-00 00:00:00 0 3 1970-01-01 00:00:02 2 0 0000-00-00 00:00:00 0 3 1970-01-01 00:00:03 3 0 0000-00-00 00:00:00 0 3 1970-01-01 00:00:04 4 0 0000-00-00 00:00:00 0 3 1970-01-01 00:00:05 5 0 0000-00-00 00:00:00 0 1 1970-01-01 00:00:02 2 2 1970-01-01 00:00:02 1 -1 1970-01-01 00:00:03 3 2 1970-01-01 00:00:03 1 +1 1970-01-01 00:00:03 3 2 1970-01-01 00:00:02 1 1 1970-01-01 00:00:04 4 4 1970-01-01 00:00:04 1 -1 1970-01-01 00:00:05 5 4 1970-01-01 00:00:05 1 +1 1970-01-01 00:00:05 5 4 1970-01-01 00:00:04 1 2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2 -2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:04 2 -2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:05 2 +2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2 +2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2 1 1970-01-01 00:00:02 2 2 1970-01-01 00:00:02 1 -1 1970-01-01 00:00:03 3 2 1970-01-01 00:00:03 1 +1 1970-01-01 00:00:03 3 2 1970-01-01 00:00:02 1 1 1970-01-01 00:00:04 4 4 1970-01-01 00:00:04 1 -1 1970-01-01 00:00:05 5 4 1970-01-01 00:00:05 1 +1 1970-01-01 00:00:05 5 4 1970-01-01 00:00:04 1 2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2 -2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:04 2 -2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:05 2 +2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2 +2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2 From 27776ca929c3b529dcb505aff0d00a1a712a4771 Mon Sep 17 00:00:00 2001 From: Martijn Bakker Date: Sun, 31 Mar 2019 22:56:37 +0100 Subject: [PATCH 042/102] fix up wrong assumption that the sample_block_with_keys has same ordering as key_names_right --- dbms/src/Interpreters/Join.cpp | 8 +++++++- .../00927_asof_join_correct_bt.reference | 10 ++++++++++ .../0_stateless/00927_asof_join_correct_bt.sql | 14 ++++++++++++-- 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index a0c667da175..110cce8e7da 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -330,6 +330,7 @@ void Join::setSampleBlock(const Block & block) sample_block_with_columns_to_add = materializeBlock(block); + LOG_DEBUG(log, "setSampleBlock sample_block_with_columns_to_add " << sample_block_with_columns_to_add.dumpStructure()); /// Move from `sample_block_with_columns_to_add` key columns to `sample_block_with_keys`, keeping the order. size_t pos = 0; @@ -361,6 +362,9 @@ void Join::setSampleBlock(const Block & block) if (use_nulls && isLeftOrFull(kind)) for (size_t i = 0; i < num_columns_to_add; ++i) convertColumnToNullable(sample_block_with_columns_to_add.getByPosition(i)); + + LOG_DEBUG(log, "setSampleBlock sample_block_with_keys " << sample_block_with_keys.dumpStructure()); + LOG_DEBUG(log, "setSampleBlock sample_block_with_columns_to_add " << sample_block_with_columns_to_add.dumpStructure()); } namespace @@ -618,6 +622,7 @@ public: void fillRightIndices(const Block& rhs_block) { + std::cout << "rhs_block=" << rhs_block.dumpStructure() << std::endl; for(auto& tn : type_name) { right_indexes.push_back(rhs_block.getPositionByName(tn.second)); } @@ -861,8 +866,9 @@ void Join::joinBlockImpl( AddedColumns added(sample_block_with_columns_to_add, block_with_columns_to_add, block); + // the last column in the key names is the asof column if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof) - added.addColumn(sample_block_with_keys.safeGetByPosition(sample_block_with_keys.columns()-1)); + added.addColumn(sample_block_with_keys.getByName(key_names_right.back())); if(!blocks.empty()) { added.fillRightIndices(*blocks.begin()); diff --git a/dbms/tests/queries/0_stateless/00927_asof_join_correct_bt.reference b/dbms/tests/queries/0_stateless/00927_asof_join_correct_bt.reference index 2e22280c8da..bb199d0159a 100644 --- a/dbms/tests/queries/0_stateless/00927_asof_join_correct_bt.reference +++ b/dbms/tests/queries/0_stateless/00927_asof_join_correct_bt.reference @@ -3,3 +3,13 @@ 1 103 3 2 102 1 1 104 4 4 104 1 1 105 5 4 104 1 +1 101 1 0 0 0 +1 102 2 2 102 1 +1 103 3 2 102 1 +1 104 4 4 104 1 +1 105 5 4 104 1 +1 101 1 0 0 0 +1 102 2 2 102 1 +1 103 3 2 102 1 +1 104 4 4 104 1 +1 105 5 4 104 1 diff --git a/dbms/tests/queries/0_stateless/00927_asof_join_correct_bt.sql b/dbms/tests/queries/0_stateless/00927_asof_join_correct_bt.sql index 27860bb5d05..a813f2fa410 100644 --- a/dbms/tests/queries/0_stateless/00927_asof_join_correct_bt.sql +++ b/dbms/tests/queries/0_stateless/00927_asof_join_correct_bt.sql @@ -8,8 +8,18 @@ INSERT INTO A(k,t,a) VALUES (1,101,1),(1,102,2),(1,103,3),(1,104,4),(1,105,5); CREATE TABLE B(k UInt32, t UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t); INSERT INTO B(k,t,b) VALUES (1,102,2), (1,104,4); - SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t); +DROP TABLE B; + + +CREATE TABLE B(t UInt32, k UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t); +INSERT INTO B(k,t,b) VALUES (1,102,2), (1,104,4); +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t); +DROP TABLE B; + +CREATE TABLE B(k UInt32, b UInt64, t UInt32) ENGINE = MergeTree() ORDER BY (k, t); +INSERT INTO B(k,t,b) VALUES (1,102,2), (1,104,4); +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t); +DROP TABLE B; DROP TABLE A; -DROP TABLE B; From a64b8afc7e7511430e603ef653dd80b3a471517e Mon Sep 17 00:00:00 2001 From: Martijn Bakker Date: Mon, 1 Apr 2019 00:09:00 +0100 Subject: [PATCH 043/102] cleanup --- dbms/src/Interpreters/Join.cpp | 162 +++++++++++++-------------------- dbms/src/Interpreters/Join.h | 4 + 2 files changed, 69 insertions(+), 97 deletions(-) diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 110cce8e7da..8a17fd8e22c 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -330,7 +330,9 @@ void Join::setSampleBlock(const Block & block) sample_block_with_columns_to_add = materializeBlock(block); - LOG_DEBUG(log, "setSampleBlock sample_block_with_columns_to_add " << sample_block_with_columns_to_add.dumpStructure()); + + blocklist_sample = Block(block.getColumnsWithTypeAndName()); + prepareBlockListStructure(blocklist_sample); /// Move from `sample_block_with_columns_to_add` key columns to `sample_block_with_keys`, keeping the order. size_t pos = 0; @@ -362,9 +364,6 @@ void Join::setSampleBlock(const Block & block) if (use_nulls && isLeftOrFull(kind)) for (size_t i = 0; i < num_columns_to_add; ++i) convertColumnToNullable(sample_block_with_columns_to_add.getByPosition(i)); - - LOG_DEBUG(log, "setSampleBlock sample_block_with_keys " << sample_block_with_keys.dumpStructure()); - LOG_DEBUG(log, "setSampleBlock sample_block_with_columns_to_add " << sample_block_with_columns_to_add.dumpStructure()); } namespace @@ -424,8 +423,6 @@ namespace if (emplace_result.isInserted()) time_series_map = new (time_series_map) typename Map::mapped_type(join.getAsofType()); - - std::cout << "inserting rhs block=" << stored_block->dumpStructure() << std::endl; time_series_map->insert(asof_column, stored_block, i, pool); } }; @@ -488,6 +485,44 @@ namespace } } +void Join::prepareBlockListStructure(Block& stored_block) +{ + if (isRightOrFull(kind)) + { + /** Move the key columns to the beginning of the block. + * This is where NonJoinedBlockInputStream will expect. + */ + size_t key_num = 0; + for (const auto & name : key_names_right) + { + size_t pos = stored_block.getPositionByName(name); + ColumnWithTypeAndName col = stored_block.safeGetByPosition(pos); + stored_block.erase(pos); + stored_block.insert(key_num, std::move(col)); + ++key_num; + } + } + else + { + NameSet erased; /// HOTFIX: there could be duplicates in JOIN ON section + + /// Remove the key columns from stored_block, as they are not needed. + /// However, do not erase the ASOF column if this is an asof join + for (const auto & name : key_names_right) + { + if (strictness == ASTTableJoin::Strictness::Asof && name == key_names_right.back()) + { + LOG_DEBUG(log, "preventing removal of ASOF join column with name=" << name); + break; // this is the last column so break is OK + } + + if (!erased.count(name)) + stored_block.erase(stored_block.getPositionByName(name)); + erased.insert(name); + } + } +} + bool Join::insertFromBlock(const Block & block) { std::unique_lock lock(rwlock); @@ -519,40 +554,7 @@ bool Join::insertFromBlock(const Block & block) blocks.push_back(block); Block * stored_block = &blocks.back(); - if (isRightOrFull(kind)) - { - /** Move the key columns to the beginning of the block. - * This is where NonJoinedBlockInputStream will expect. - */ - size_t key_num = 0; - for (const auto & name : key_names_right) - { - size_t pos = stored_block->getPositionByName(name); - ColumnWithTypeAndName col = stored_block->safeGetByPosition(pos); - stored_block->erase(pos); - stored_block->insert(key_num, std::move(col)); - ++key_num; - } - } - else - { - NameSet erased; /// HOTFIX: there could be duplicates in JOIN ON section - - /// Remove the key columns from stored_block, as they are not needed. - /// However, do not erase the ASOF column if this is an asof join - for (const auto & name : key_names_right) - { - if (strictness == ASTTableJoin::Strictness::Asof && name == key_names_right.back()) - { - LOG_DEBUG(log, "preventing removal of ASOF join column with name=" << name); - break; // this is the last column so break is OK - } - - if (!erased.count(name)) - stored_block->erase(stored_block->getPositionByName(name)); - erased.insert(name); - } - } + prepareBlockListStructure(*stored_block); LOG_DEBUG(log, "insertFromBlock stored_block=" << stored_block->dumpStructure()); @@ -593,7 +595,9 @@ public: AddedColumns(const Block & sample_block_with_columns_to_add, const Block & block_with_columns_to_add, - const Block & block) + const Block & block, + const Block & blocklist_sample, + const ColumnsWithTypeAndName& extras) { size_t num_columns_to_add = sample_block_with_columns_to_add.columns(); @@ -606,31 +610,15 @@ public: const ColumnWithTypeAndName & src_column = sample_block_with_columns_to_add.safeGetByPosition(i); /// Don't insert column if it's in left block or not explicitly required. - if (!block.has(src_column.name) && block_with_columns_to_add.has(src_column.name)) { + if (!block.has(src_column.name) && block_with_columns_to_add.has(src_column.name)) addColumn(src_column); - } - } - } - - void addColumn(const ColumnWithTypeAndName & src_column) - { - std::cout << "adding column from src=" << src_column.dumpStructure() << std::endl; - columns.push_back(src_column.column->cloneEmpty()); - columns.back()->reserve(src_column.column->size()); - type_name.emplace_back(src_column.type, src_column.name); - } - - void fillRightIndices(const Block& rhs_block) - { - std::cout << "rhs_block=" << rhs_block.dumpStructure() << std::endl; - for(auto& tn : type_name) { - right_indexes.push_back(rhs_block.getPositionByName(tn.second)); } - for(unsigned i = 0; i < right_indexes.size(); i++) { - std::cout << "ri i=" << i << " ri=" << right_indexes[i] << std::endl; - } - loaded = true; + for (auto& extra : extras) + addColumn(extra); + + for (auto& tn : type_name) + right_indexes.push_back(blocklist_sample.getPositionByName(tn.second)); } size_t size() const { return columns.size(); } @@ -642,30 +630,28 @@ public: void appendFromBlock(const Block & block, size_t row_num) { - if(!loaded) - return; - - std::cout << "appendFromBlock block=" << block.dumpStructure() << " row_num=" << row_num << " right_indexes=["; - - for (size_t j = 0; j < columns.size(); ++j) { - std::cout << right_indexes[j] << " "; + for (size_t j = 0; j < right_indexes.size(); ++j) columns[j]->insertFrom(*block.getByPosition(right_indexes[j]).column, row_num); - } - - std::cout << "]" << std::endl; } + void appendDefaultRow() { - for (size_t j = 0; j < columns.size(); ++j) + for (size_t j = 0; j < right_indexes.size(); ++j) columns[j]->insertDefault(); } private: TypeAndNames type_name; MutableColumns columns; - bool loaded = false; std::vector right_indexes; + + void addColumn(const ColumnWithTypeAndName & src_column) + { + columns.push_back(src_column.column->cloneEmpty()); + columns.back()->reserve(src_column.column->size()); + type_name.emplace_back(src_column.type, src_column.name); + } }; template @@ -856,40 +842,22 @@ void Join::joinBlockImpl( /** For LEFT/INNER JOIN, the saved blocks do not contain keys. * For FULL/RIGHT JOIN, the saved blocks contain keys; * but they will not be used at this stage of joining (and will be in `AdderNonJoined`), and they need to be skipped. + * For ASOF, the last column is used as the ASOF column */ -// size_t num_columns_to_skip = 0; -// if constexpr (right_or_full) -// num_columns_to_skip = keys_size; - - /// Add new columns to the block. - LOG_DEBUG(log, "joinBlockImpl - sample_block_with_columns_to_add" << sample_block_with_columns_to_add.dumpStructure()); - - AddedColumns added(sample_block_with_columns_to_add, block_with_columns_to_add, block); - - // the last column in the key names is the asof column + ColumnsWithTypeAndName extras; if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof) - added.addColumn(sample_block_with_keys.getByName(key_names_right.back())); - - if(!blocks.empty()) { - added.fillRightIndices(*blocks.begin()); - } else { - LOG_DEBUG(log, "unable to fill right index of added columns"); - } + extras.push_back(sample_block_with_keys.getByName(key_names_right.back())); + AddedColumns added(sample_block_with_columns_to_add, block_with_columns_to_add, block, blocklist_sample, extras); std::unique_ptr offsets_to_replicate; IColumn::Filter row_filter = switchJoinRightColumns( type, maps_, block.rows(), key_columns, key_sizes, added, null_map, offsets_to_replicate); - LOG_DEBUG(log, "joinBlockImpl - switchJoinRightColumns"); - for (size_t i = 0; i < added.size(); ++i) block.insert(added.moveColumn(i)); - LOG_DEBUG(log, "joinBlockImpl - after insert: " << block.dumpStructure()); - /// Filter & insert missing rows - auto right_keys = requiredRightKeys(key_names_right, columns_added_by_join); if constexpr (STRICTNESS == ASTTableJoin::Strictness::Any || STRICTNESS == ASTTableJoin::Strictness::Asof) diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index f6ddaf87af0..77a2abacb5a 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -377,6 +377,10 @@ private: /// Block with key columns in the same order they appear in the right-side table. Block sample_block_with_keys; + /// Block as it would appear in the BlockList + void prepareBlockListStructure(Block& stored_block); + Block blocklist_sample; + Poco::Logger * log; /// Limits for maximum map size. From fca67c8820316a7289eb9301a2d2e0024f44032d Mon Sep 17 00:00:00 2001 From: Danila Kutenin Date: Mon, 1 Apr 2019 08:05:52 +0300 Subject: [PATCH 044/102] Fix clang build --- dbms/src/Functions/Regexps.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Functions/Regexps.h b/dbms/src/Functions/Regexps.h index 5a5e7190acc..d815d475aa5 100644 --- a/dbms/src/Functions/Regexps.h +++ b/dbms/src/Functions/Regexps.h @@ -96,8 +96,8 @@ namespace MultiRegexps public: Regexps(hs_database_t * db_, hs_scratch_t * scratch_) : db{db_}, scratch{scratch_} {} - hs_database_t * getDB() const { return db.get(); }; - hs_scratch_t * getScratch() const { return scratch.get(); }; + hs_database_t * getDB() const { return db.get(); } + hs_scratch_t * getScratch() const { return scratch.get(); } private: DataBasePtr db; ScratchPtr scratch; From 6c71f952bce511e6dcfeac7dc20f58431d2953fe Mon Sep 17 00:00:00 2001 From: levysh <33598492+levysh@users.noreply.github.com> Date: Mon, 1 Apr 2019 11:14:15 +0300 Subject: [PATCH 045/102] removed src directory from test paths --- docs/en/development/tests.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index bef50139083..679b1c79432 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -7,7 +7,7 @@ Functional tests are the most simple and convenient to use. Most of ClickHouse f Each functional test sends one or multiple queries to the running ClickHouse server and compares the result with reference. -Tests are located in `dbms/src/tests/queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from Yandex.Metrica and not available to general public. We tend to use only `stateless` tests and avoid adding new `stateful` tests. +Tests are located in `dbms/tests/queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from Yandex.Metrica and not available to general public. We tend to use only `stateless` tests and avoid adding new `stateful` tests. Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery --testmode`. `.sh` test is a script that is run by itself. @@ -15,7 +15,7 @@ To run all tests, use `dbms/tests/clickhouse-test` tool. Look `--help` for the l The most simple way to invoke functional tests is to copy `clickhouse-client` to `/usr/bin/`, run `clickhouse-server` and then run `./clickhouse-test` from its own directory. -To add new test, create a `.sql` or `.sh` file in `dbms/src/tests/queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`. +To add new test, create a `.sql` or `.sh` file in `dbms/tests/queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`. Tests should use (create, drop, etc) only tables in `test` database that is assumed to be created beforehand; also tests can use temporary tables. @@ -26,7 +26,7 @@ Some tests are marked with `zookeeper`, `shard` or `long` in their names. `zooke ## Known bugs -If we know some bugs that can be easily reproduced by functional tests, we place prepared functional tests in `dbms/src/tests/queries/bugs` directory. These tests will be moved to `dbms/src/tests/queries/0_stateless` when bugs are fixed. +If we know some bugs that can be easily reproduced by functional tests, we place prepared functional tests in `dbms/tests/queries/bugs` directory. These tests will be moved to `dbms/tests/queries/0_stateless` when bugs are fixed. ## Integration Tests From d4ec3bbf70b6e4229bbf0574e54e48870dceb4a6 Mon Sep 17 00:00:00 2001 From: Martijn Bakker Date: Mon, 1 Apr 2019 11:35:37 +0100 Subject: [PATCH 046/102] fix style --- dbms/src/Interpreters/Join.cpp | 8 ++++---- dbms/src/Interpreters/Join.h | 6 +++++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 8a17fd8e22c..f19781ca380 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -485,7 +485,7 @@ namespace } } -void Join::prepareBlockListStructure(Block& stored_block) +void Join::prepareBlockListStructure(Block & stored_block) { if (isRightOrFull(kind)) { @@ -597,7 +597,7 @@ public: const Block & block_with_columns_to_add, const Block & block, const Block & blocklist_sample, - const ColumnsWithTypeAndName& extras) + const ColumnsWithTypeAndName & extras) { size_t num_columns_to_add = sample_block_with_columns_to_add.columns(); @@ -614,10 +614,10 @@ public: addColumn(src_column); } - for (auto& extra : extras) + for (auto & extra : extras) addColumn(extra); - for (auto& tn : type_name) + for (auto & tn : type_name) right_indexes.push_back(blocklist_sample.getPositionByName(tn.second)); } diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index 77a2abacb5a..01bd1335cbd 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -378,7 +378,6 @@ private: Block sample_block_with_keys; /// Block as it would appear in the BlockList - void prepareBlockListStructure(Block& stored_block); Block blocklist_sample; Poco::Logger * log; @@ -397,6 +396,11 @@ private: void init(Type type_); + /** Take an inserted block and discard everything that does not need to be stored + * Example, remove the keys as they come from the LHS block, but do keep the ASOF timestamps + */ + void prepareBlockListStructure(Block & stored_block); + /// Throw an exception if blocks have different types of key columns. void checkTypesOfKeys(const Block & block_left, const Names & key_names_left, const Block & block_right) const; From e8a8da1a4fe30d417cf1c48bc26d941b41edde14 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 1 Apr 2019 16:27:07 +0300 Subject: [PATCH 047/102] Fix array intersect. --- dbms/src/Functions/arrayIntersect.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/dbms/src/Functions/arrayIntersect.cpp b/dbms/src/Functions/arrayIntersect.cpp index 38de7227325..93c588fc4f9 100644 --- a/dbms/src/Functions/arrayIntersect.cpp +++ b/dbms/src/Functions/arrayIntersect.cpp @@ -432,15 +432,20 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable current_has_nullable = true; else { + typename Map::mapped_type * value = nullptr; + if constexpr (is_numeric_column) - ++map[columns[arg]->getElement(i)]; + value = &map[columns[arg]->getElement(i)]; else if constexpr (std::is_same::value || std::is_same::value) - ++map[columns[arg]->getDataAt(i)]; + value = &map[columns[arg]->getDataAt(i)]; else { const char * data = nullptr; - ++map[columns[arg]->serializeValueIntoArena(i, arena, data)]; + value = &map[columns[arg]->serializeValueIntoArena(i, arena, data)]; } + + if (*value == arg) + ++(*value); } } From cd7368dc6de0e633328d2afad513bf69b3eb5b0a Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 1 Apr 2019 16:29:20 +0300 Subject: [PATCH 048/102] Added tests for arrayIntersect. --- dbms/tests/queries/00932_array_intersect_bug.reference | 4 ++++ dbms/tests/queries/00932_array_intersect_bug.sql | 5 +++++ 2 files changed, 9 insertions(+) create mode 100644 dbms/tests/queries/00932_array_intersect_bug.reference create mode 100644 dbms/tests/queries/00932_array_intersect_bug.sql diff --git a/dbms/tests/queries/00932_array_intersect_bug.reference b/dbms/tests/queries/00932_array_intersect_bug.reference new file mode 100644 index 00000000000..23b66e9033d --- /dev/null +++ b/dbms/tests/queries/00932_array_intersect_bug.reference @@ -0,0 +1,4 @@ +['a'] +[] +[1] +[] diff --git a/dbms/tests/queries/00932_array_intersect_bug.sql b/dbms/tests/queries/00932_array_intersect_bug.sql new file mode 100644 index 00000000000..4c3c199596d --- /dev/null +++ b/dbms/tests/queries/00932_array_intersect_bug.sql @@ -0,0 +1,5 @@ +SELECT arrayIntersect(['a', 'b', 'c'], ['a', 'a']); +SELECT arrayIntersect([1, 1], [2, 2]); +SELECT arrayIntersect([1, 1], [1, 2]); +SELECT arrayIntersect([1, 1, 1], [3], [2, 2, 2]); + From 821f80e44c7e461394aac1b99cc2da73a10465de Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 1 Apr 2019 16:46:18 +0300 Subject: [PATCH 049/102] Moved tests. --- .../queries/{ => 0_stateless}/00932_array_intersect_bug.reference | 0 .../tests/queries/{ => 0_stateless}/00932_array_intersect_bug.sql | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename dbms/tests/queries/{ => 0_stateless}/00932_array_intersect_bug.reference (100%) rename dbms/tests/queries/{ => 0_stateless}/00932_array_intersect_bug.sql (100%) diff --git a/dbms/tests/queries/00932_array_intersect_bug.reference b/dbms/tests/queries/0_stateless/00932_array_intersect_bug.reference similarity index 100% rename from dbms/tests/queries/00932_array_intersect_bug.reference rename to dbms/tests/queries/0_stateless/00932_array_intersect_bug.reference diff --git a/dbms/tests/queries/00932_array_intersect_bug.sql b/dbms/tests/queries/0_stateless/00932_array_intersect_bug.sql similarity index 100% rename from dbms/tests/queries/00932_array_intersect_bug.sql rename to dbms/tests/queries/0_stateless/00932_array_intersect_bug.sql From edaec2353c18c0eb2e246a7bdb4524430f942036 Mon Sep 17 00:00:00 2001 From: proller Date: Mon, 1 Apr 2019 17:58:04 +0300 Subject: [PATCH 050/102] CLICKHOUSE-4421 Fix segfault in copier (#4835) * Fix segfault in copier * add test * better runner --- dbms/programs/copier/ClusterCopier.cpp | 3 +- dbms/tests/integration/CMakeLists.txt | 20 +++- .../test_cluster_copier/task_no_index.xml | 109 ++++++++++++++++++ .../integration/test_cluster_copier/test.py | 25 ++++ 4 files changed, 153 insertions(+), 4 deletions(-) create mode 100644 dbms/tests/integration/test_cluster_copier/task_no_index.xml diff --git a/dbms/programs/copier/ClusterCopier.cpp b/dbms/programs/copier/ClusterCopier.cpp index 451df591bbd..90cace9aa32 100644 --- a/dbms/programs/copier/ClusterCopier.cpp +++ b/dbms/programs/copier/ClusterCopier.cpp @@ -1201,7 +1201,8 @@ protected: auto new_columns_list = std::make_shared(); new_columns_list->set(new_columns_list->columns, new_columns); - new_columns_list->set(new_columns_list->indices, query_ast->as()->columns_list->indices->clone()); + if (auto indices = query_ast->as()->columns_list->indices) + new_columns_list->set(new_columns_list->indices, indices->clone()); new_query.replace(new_query.columns_list, new_columns_list); diff --git a/dbms/tests/integration/CMakeLists.txt b/dbms/tests/integration/CMakeLists.txt index e2dae4634ca..54d5f5e727a 100644 --- a/dbms/tests/integration/CMakeLists.txt +++ b/dbms/tests/integration/CMakeLists.txt @@ -1,10 +1,24 @@ -if (CLICKHOUSE_SPLIT_BINARY) +if(CLICKHOUSE_SPLIT_BINARY) set (TEST_USE_BINARIES CLICKHOUSE_TESTS_SERVER_BIN_PATH=${ClickHouse_BINARY_DIR}/dbms/programs/clickhouse-server CLICKHOUSE_TESTS_CLIENT_BIN_PATH=${ClickHouse_BINARY_DIR}/dbms/programs/clickhouse-client) else() set (TEST_USE_BINARIES CLICKHOUSE_TESTS_SERVER_BIN_PATH=${ClickHouse_BINARY_DIR}/dbms/programs/clickhouse CLICKHOUSE_TESTS_CLIENT_BIN_PATH=${ClickHouse_BINARY_DIR}/dbms/programs/clickhouse) endif() +find_program(DOCKER_CMD docker) +find_program(DOCKER_COMPOSE_CMD docker-compose) +find_program(PYTEST_CMD pytest) +find_program(SUDO_CMD sudo) + # will mount only one binary to docker container - build with .so cant work -if (MAKE_STATIC_LIBRARIES) - add_test (NAME integration WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMAND env ${TEST_USE_BINARIES} "CLICKHOUSE_TESTS_BASE_CONFIG_DIR=${ClickHouse_SOURCE_DIR}/dbms/programs/server/" ${PYTEST_STARTER} pytest ${PYTEST_OPT}) +if(MAKE_STATIC_LIBRARIES AND DOCKER_CMD) + if(INTEGRATION_USE_RUNNER AND SUDO_CMD) + add_test(NAME integration-runner WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMAND ${SUDO_CMD} ${CMAKE_CURRENT_SOURCE_DIR}/runner --binary ${ClickHouse_BINARY_DIR}/dbms/programs/clickhouse --configs-dir ${ClickHouse_SOURCE_DIR}/dbms/programs/server/) + message(STATUS "Using tests in docker with runner SUDO=${SUDO_CMD}; DOCKER=${DOCKER_CMD};") + endif() + if(NOT INTEGRATION_USE_RUNNER AND DOCKER_COMPOSE_CMD AND PYTEST_CMD) + # To run one test with debug: + # cmake . -DPYTEST_OPT="-ss;test_cluster_copier" + add_test(NAME integration-pytest WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMAND env ${TEST_USE_BINARIES} "CLICKHOUSE_TESTS_BASE_CONFIG_DIR=${ClickHouse_SOURCE_DIR}/dbms/programs/server/" ${PYTEST_STARTER} ${PYTEST_CMD} ${PYTEST_OPT}) + message(STATUS "Using tests in docker DOCKER=${DOCKER_CMD}; DOCKER_COMPOSE=${DOCKER_COMPOSE_CMD}; PYTEST=${PYTEST_STARTER} ${PYTEST_CMD} ${PYTEST_OPT}") + endif() endif() diff --git a/dbms/tests/integration/test_cluster_copier/task_no_index.xml b/dbms/tests/integration/test_cluster_copier/task_no_index.xml new file mode 100644 index 00000000000..c9359aa9278 --- /dev/null +++ b/dbms/tests/integration/test_cluster_copier/task_no_index.xml @@ -0,0 +1,109 @@ + + + + + false + + s0_0_0 + 9000 + + + + + + + false + + s1_1_0 + 9000 + + + + + + + 2 + + + + 1 + + + + + 0 + + + + + 3 + + 1 + + + + + + + + source_cluster + default + ontime + + + + destination_cluster + default + ontime22 + + + + + + + ENGINE = MergeTree() PARTITION BY Year ORDER BY (Year, FlightDate) SETTINGS index_granularity=8192 + + + + + jumpConsistentHash(intHash64(Year), 2) + + + + + + + 2017 + + + + + + + diff --git a/dbms/tests/integration/test_cluster_copier/test.py b/dbms/tests/integration/test_cluster_copier/test.py index 3f3c5f31741..31804c184f8 100644 --- a/dbms/tests/integration/test_cluster_copier/test.py +++ b/dbms/tests/integration/test_cluster_copier/test.py @@ -167,6 +167,28 @@ class Task_test_block_size: ddl_check_query(instance, "DROP TABLE test_block_size ON CLUSTER shard_0_0", 2) ddl_check_query(instance, "DROP TABLE test_block_size ON CLUSTER cluster1") +class Task_no_index: + + def __init__(self, cluster): + self.cluster = cluster + self.zk_task_path="/clickhouse-copier/task_no_index" + self.copier_task_config = open(os.path.join(CURRENT_TEST_DIR, 'task_no_index.xml'), 'r').read() + self.rows = 1000000 + + + def start(self): + instance = cluster.instances['s0_0_0'] + instance.query("create table ontime (Year UInt16, FlightDate String) ENGINE = Memory") + instance.query("insert into ontime values (2016, 'test6'), (2017, 'test7'), (2018, 'test8')") + + + def check(self): + assert TSV(self.cluster.instances['s1_1_0'].query("SELECT Year FROM ontime22")) == TSV("2017\n") + instance = cluster.instances['s0_0_0'] + instance.query("DROP TABLE ontime") + instance = cluster.instances['s1_1_0'] + instance.query("DROP TABLE ontime22") + def execute_task(task, cmd_options): task.start() @@ -229,6 +251,9 @@ def test_copy_month_to_week_partition_with_recovering(started_cluster): def test_block_size(started_cluster): execute_task(Task_test_block_size(started_cluster), []) +def test_no_index(started_cluster): + execute_task(Task_no_index(started_cluster), []) + if __name__ == '__main__': with contextmanager(started_cluster)() as cluster: From e6b50a05db3198b4f8462526f37e51e6a4224eb7 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 1 Apr 2019 18:05:22 +0300 Subject: [PATCH 051/102] Remove optional argument from codecs documentation --- docs/en/query_language/create.md | 2 +- docs/ru/query_language/create.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/query_language/create.md b/docs/en/query_language/create.md index 6af36a1baf0..265883df22b 100644 --- a/docs/en/query_language/create.md +++ b/docs/en/query_language/create.md @@ -117,7 +117,7 @@ CREATE TABLE timeseries_example dt Date, ts DateTime, path String, - value Float32 CODEC(Delta(2), ZSTD) + value Float32 CODEC(Delta, ZSTD) ) ENGINE = MergeTree PARTITION BY dt diff --git a/docs/ru/query_language/create.md b/docs/ru/query_language/create.md index 4b9bdd6d77a..3484d6f6f31 100644 --- a/docs/ru/query_language/create.md +++ b/docs/ru/query_language/create.md @@ -120,7 +120,7 @@ CREATE TABLE timeseries_example dt Date, ts DateTime, path String, - value Float32 CODEC(Delta(2), ZSTD) + value Float32 CODEC(Delta, ZSTD) ) ENGINE = MergeTree PARTITION BY dt From 966bee4d93607c33dd063fcb96446a5d8e333662 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 1 Apr 2019 19:28:20 +0300 Subject: [PATCH 052/102] Parse compression codecs from AST --- dbms/programs/compressor/Compressor.cpp | 30 ++++++++++++------------- dbms/programs/compressor/README.md | 4 ++-- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/dbms/programs/compressor/Compressor.cpp b/dbms/programs/compressor/Compressor.cpp index de51f16833e..a7389901612 100644 --- a/dbms/programs/compressor/Compressor.cpp +++ b/dbms/programs/compressor/Compressor.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -9,6 +10,8 @@ #include #include #include +#include +#include #include @@ -64,7 +67,7 @@ int mainEntryClickHouseCompressor(int argc, char ** argv) ("hc", "use LZ4HC instead of LZ4") ("zstd", "use ZSTD instead of LZ4") ("codec", boost::program_options::value>()->multitoken(), "use codecs combination instead of LZ4") - ("level", boost::program_options::value>()->multitoken(), "compression levels for codecs specified via --codec") + ("level", boost::program_options::value(), "compression level for codecs spicified via flags") ("none", "use no compression instead of LZ4") ("stat", "print block statistics of compressed data") ; @@ -94,6 +97,9 @@ int mainEntryClickHouseCompressor(int argc, char ** argv) if ((use_lz4hc || use_zstd || use_none) && !codecs.empty()) throw DB::Exception("Wrong options, codec flags like --zstd and --codec options are mutually exclusive", DB::ErrorCodes::BAD_ARGUMENTS); + if (!codecs.empty() && options.count("level")) + throw DB::Exception("Wrong options, --level is not compatible with --codec list", DB::ErrorCodes::BAD_ARGUMENTS); + std::string method_family = "LZ4"; if (use_lz4hc) @@ -103,28 +109,22 @@ int mainEntryClickHouseCompressor(int argc, char ** argv) else if (use_none) method_family = "NONE"; - std::vector levels; + int level; if (options.count("level")) - levels = options["level"].as>(); + levels = options["level"].as(); + DB::CompressionCodecPtr codec; if (!codecs.empty()) { - if (levels.size() > codecs.size()) - throw DB::Exception("Specified more levels than codecs", DB::ErrorCodes::BAD_ARGUMENTS); + DB::ParserCodec codec_parser; - std::vector codec_names; - for (size_t i = 0; i < codecs.size(); ++i) - { - if (i < levels.size()) - codec_names.emplace_back(codecs[i], levels[i]); - else - codec_names.emplace_back(codecs[i], std::nullopt); - } - codec = DB::CompressionCodecFactory::instance().get(codec_names); + std::string codecs_line = boost::algorithm::join(codecs, ","); + auto ast = DB::parseQuery(codec_parser, "(" + codecs_line + ")", 0); + codec = DB::CompressionCodecFactory::instance().get(ast, nullptr); } else - codec = DB::CompressionCodecFactory::instance().get(method_family, levels.empty() ? std::nullopt : std::optional(levels.back())); + codec = DB::CompressionCodecFactory::instance().get(method_family, levels.empty() ? std::nullopt : std::optional(level)); DB::ReadBufferFromFileDescriptor rb(STDIN_FILENO); diff --git a/dbms/programs/compressor/README.md b/dbms/programs/compressor/README.md index 92dfe50cbc1..61e83e85024 100644 --- a/dbms/programs/compressor/README.md +++ b/dbms/programs/compressor/README.md @@ -17,11 +17,11 @@ $ ./clickhouse-compressor --decompress < input_file > output_file Compress data with ZSTD at level 5: ``` -$ ./clickhouse-compressor --codec ZSTD --level 5 < input_file > output_file +$ ./clickhouse-compressor --codec 'ZSTD(5)' < input_file > output_file ``` Compress data with ZSTD level 10, LZ4HC level 7 and LZ4. ``` -$ ./clickhouse-compressor --codec ZSTD --level 5 --codec LZ4HC --level 7 --codec LZ4 < input_file > output_file +$ ./clickhouse-compressor --codec ZSTD --codec LZ4HC --level 7 --codec LZ4 < input_file > output_file ``` From ac24e1ad9079fa61c00e1122c9775141723e7316 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 1 Apr 2019 19:29:41 +0300 Subject: [PATCH 053/102] Update CHANGELOG.md --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 498d9ae743d..59343cd9936 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +## ClickHouse release 19.4.2.7, 2019-03-30 + +### Bug Fixes +* Fixed reading from `Array(LowCardinality)` column in rare case when column contained a long sequence of empty arrays. [#4850](https://github.com/yandex/ClickHouse/pull/4850) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) + ## ClickHouse release 19.4.1.3, 2019-03-19 ### Bug Fixes From 5585eebe014fded69edb6c4c81d9876f4aca19e0 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 1 Apr 2019 19:34:27 +0300 Subject: [PATCH 054/102] Fix level parsing --- dbms/programs/compressor/Compressor.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/programs/compressor/Compressor.cpp b/dbms/programs/compressor/Compressor.cpp index a7389901612..427d58cbdc6 100644 --- a/dbms/programs/compressor/Compressor.cpp +++ b/dbms/programs/compressor/Compressor.cpp @@ -109,9 +109,9 @@ int mainEntryClickHouseCompressor(int argc, char ** argv) else if (use_none) method_family = "NONE"; - int level; + std::optional level = std::nullopt; if (options.count("level")) - levels = options["level"].as(); + level = options["level"].as(); DB::CompressionCodecPtr codec; @@ -124,7 +124,7 @@ int mainEntryClickHouseCompressor(int argc, char ** argv) codec = DB::CompressionCodecFactory::instance().get(ast, nullptr); } else - codec = DB::CompressionCodecFactory::instance().get(method_family, levels.empty() ? std::nullopt : std::optional(level)); + codec = DB::CompressionCodecFactory::instance().get(method_family, level); DB::ReadBufferFromFileDescriptor rb(STDIN_FILENO); From 1d031e9695339f83b084e8770f4386211b916a22 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 1 Apr 2019 19:36:46 +0300 Subject: [PATCH 055/102] Fix readme --- dbms/programs/compressor/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/programs/compressor/README.md b/dbms/programs/compressor/README.md index 61e83e85024..44a1f052824 100644 --- a/dbms/programs/compressor/README.md +++ b/dbms/programs/compressor/README.md @@ -20,8 +20,8 @@ Compress data with ZSTD at level 5: $ ./clickhouse-compressor --codec 'ZSTD(5)' < input_file > output_file ``` -Compress data with ZSTD level 10, LZ4HC level 7 and LZ4. +Compress data with Delta of four bytes and ZSTD level 10. ``` -$ ./clickhouse-compressor --codec ZSTD --codec LZ4HC --level 7 --codec LZ4 < input_file > output_file +$ ./clickhouse-compressor --codec 'Delta(4)' --codec 'ZSTD(10)' < input_file > output_file ``` From 9ed4ac7e82e75e34a3e98c9257a9933e2dfdecc9 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 1 Apr 2019 19:44:15 +0300 Subject: [PATCH 056/102] get rid of macro in AsofRowRefs --- dbms/src/Interpreters/Join.cpp | 5 +- dbms/src/Interpreters/Join.h | 4 +- dbms/src/Interpreters/RowRefs.cpp | 123 ++++++++++++++++++++---------- dbms/src/Interpreters/RowRefs.h | 57 +++++++------- 4 files changed, 115 insertions(+), 74 deletions(-) diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index f19781ca380..aacb9f07420 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -301,9 +301,8 @@ void Join::setSampleBlock(const Block & block) const IColumn * asof_column = key_columns.back(); size_t asof_size; - if (auto t = AsofRowRefs::getTypeSize(asof_column)) - std::tie(asof_type, asof_size) = *t; - else + asof_type = AsofRowRefs::getTypeSize(asof_column, asof_size); + if (!asof_type) { std::string msg = "ASOF join not supported for type"; msg += asof_column->getFamilyName(); diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index 01bd1335cbd..7a223f46b35 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -131,7 +131,7 @@ public: size_t getTotalByteCount() const; ASTTableJoin::Kind getKind() const { return kind; } - AsofRowRefs::Type getAsofType() const { return asof_type; } + AsofRowRefs::Type getAsofType() const { return *asof_type; } /** Depending on template parameter, adds or doesn't add a flag, that element was used (row was joined). * Depending on template parameter, decide whether to overwrite existing values when encountering the same key again @@ -366,7 +366,7 @@ private: private: Type type = Type::EMPTY; - AsofRowRefs::Type asof_type = AsofRowRefs::Type::EMPTY; + std::optional asof_type; static Type chooseMethod(const ColumnRawPtrs & key_columns, Sizes & key_sizes); diff --git a/dbms/src/Interpreters/RowRefs.cpp b/dbms/src/Interpreters/RowRefs.cpp index aca948c9270..bfb44b99bc5 100644 --- a/dbms/src/Interpreters/RowRefs.cpp +++ b/dbms/src/Interpreters/RowRefs.cpp @@ -1,24 +1,46 @@ #include +#include #include #include #include -#include namespace DB { -void AsofRowRefs::Lookups::create(AsofRowRefs::Type which) +namespace +{ + +/// maps enum values to types +template +void callWithType(AsofRowRefs::Type which, F && f) { switch (which) { - case Type::EMPTY: break; - #define M(NAME, TYPE) \ - case Type::NAME: NAME = std::make_unique(); break; - APPLY_FOR_ASOF_JOIN_VARIANTS(M) - #undef M + case AsofRowRefs::Type::key32: return f(AsofRowRefs::LookupTypes()); + case AsofRowRefs::Type::key64: return f(AsofRowRefs::LookupTypes()); + case AsofRowRefs::Type::keyf32: return f(AsofRowRefs::LookupTypes()); + case AsofRowRefs::Type::keyf64: return f(AsofRowRefs::LookupTypes()); } + + __builtin_unreachable(); +} + +} // namespace + + +void AsofRowRefs::createLookup(AsofRowRefs::Type which) +{ + auto call = [&](const auto & types) + { + using Types = std::decay_t; + using SearcherType = typename Types::SearcherType; + + lookups = std::make_unique(); + }; + + callWithType(which, call); } template @@ -26,49 +48,68 @@ using AsofGetterType = ColumnsHashing::HashMethodOneNumber; void AsofRowRefs::insert(const IColumn * asof_column, const Block * block, size_t row_num, Arena & pool) { - switch (type) + auto call = [&](const auto & types) { - case Type::EMPTY: break; - #define M(NAME, TYPE) \ - case Type::NAME: { \ - auto asof_getter = AsofGetterType(asof_column); \ - auto entry = Entry(asof_getter.getKey(row_num, pool), RowRef(block, row_num)); \ - lookups.NAME->insert(entry); \ - break; \ - } - APPLY_FOR_ASOF_JOIN_VARIANTS(M) - #undef M - } + using Types = std::decay_t; + using ElementType = typename Types::ElementType; + using SearcherPtr = typename Types::Ptr; + + auto asof_getter = AsofGetterType(asof_column); + auto entry = Entry(asof_getter.getKey(row_num, pool), RowRef(block, row_num)); + + std::get(lookups)->insert(entry); + }; + + callWithType(*type, call); } const RowRef * AsofRowRefs::findAsof(const IColumn * asof_column, size_t row_num, Arena & pool) const { - switch (type) - { - case Type::EMPTY: return nullptr; - #define M(NAME, TYPE) \ - case Type::NAME: { \ - auto asof_getter = AsofGetterType(asof_column); \ - TYPE key = asof_getter.getKey(row_num, pool); \ - auto it = lookups.NAME->upper_bound(Entry(key)); \ - if (it == lookups.NAME->cbegin()) \ - return nullptr; \ - return &((--it)->row_ref); \ - } - APPLY_FOR_ASOF_JOIN_VARIANTS(M) - #undef M - } + const RowRef * out = nullptr; - __builtin_unreachable(); + auto call = [&](const auto & types) + { + using Types = std::decay_t; + using ElementType = typename Types::ElementType; + using SearcherPtr = typename Types::Ptr; + + auto asof_getter = AsofGetterType(asof_column); + ElementType key = asof_getter.getKey(row_num, pool); + auto & typed_lookup = std::get(lookups); + + auto it = typed_lookup->upper_bound(Entry(key)); + if (it != typed_lookup->cbegin()) + out = &((--it)->row_ref); + }; + + callWithType(*type, call); + return out; } -std::optional> AsofRowRefs::getTypeSize(const IColumn * asof_column) +std::optional AsofRowRefs::getTypeSize(const IColumn * asof_column, size_t & size) { - #define M(NAME, TYPE) \ - if (strcmp(#TYPE, asof_column->getFamilyName()) == 0) \ - return std::make_pair(Type::NAME,sizeof(TYPE)); - APPLY_FOR_ASOF_JOIN_VARIANTS(M) - #undef M + if (typeid_cast *>(asof_column)) + { + size = sizeof(UInt32); + return Type::key32; + } + else if (typeid_cast *>(asof_column)) + { + size = sizeof(UInt64); + return Type::key64; + } + else if (typeid_cast *>(asof_column)) + { + size = sizeof(Float32); + return Type::keyf32; + } + else if (typeid_cast *>(asof_column)) + { + size = sizeof(Float64); + return Type::keyf64; + } + + size = 0; return {}; } diff --git a/dbms/src/Interpreters/RowRefs.h b/dbms/src/Interpreters/RowRefs.h index e6ba5daef35..b6dbcc8ceb3 100644 --- a/dbms/src/Interpreters/RowRefs.h +++ b/dbms/src/Interpreters/RowRefs.h @@ -4,6 +4,7 @@ #include #include +#include namespace DB { @@ -32,23 +33,6 @@ struct RowRefList : RowRef class AsofRowRefs { public: - /// Different types of asof join keys - #define APPLY_FOR_ASOF_JOIN_VARIANTS(M) \ - M(key32, UInt32) \ - M(key64, UInt64) \ - M(keyf32, Float32) \ - M(keyf64, Float64) - - enum class Type - { - EMPTY, - #define M(NAME, TYPE) NAME, - APPLY_FOR_ASOF_JOIN_VARIANTS(M) - #undef M - }; - - static std::optional> getTypeSize(const IColumn * asof_column); - template struct Entry { @@ -64,28 +48,45 @@ public: } }; - struct Lookups + template + struct LookupTypes { - #define M(NAME, TYPE) \ - std::unique_ptr>> NAME; - APPLY_FOR_ASOF_JOIN_VARIANTS(M) - #undef M - - void create(Type which); + using ElementType = T; + using SearcherType = SortedLookupPODArray>; + using Ptr = std::unique_ptr; }; - AsofRowRefs() : type(Type::EMPTY) {} - AsofRowRefs(Type t) : type(t) + using Lookups = std::variant< + LookupTypes::Ptr, + LookupTypes::Ptr, + LookupTypes::Ptr, + LookupTypes::Ptr>; + + enum class Type { - lookups.create(t); + key32, + key64, + keyf32, + keyf64, + }; + + static std::optional getTypeSize(const IColumn * asof_column, size_t & type_size); + + AsofRowRefs() = default; + AsofRowRefs(Type t) + : type(t) + { + createLookup(t); } void insert(const IColumn * asof_column, const Block * block, size_t row_num, Arena & pool); const RowRef * findAsof(const IColumn * asof_column, size_t row_num, Arena & pool) const; private: - const Type type; + const std::optional type; mutable Lookups lookups; + + void createLookup(Type which); }; } From 886e6883e632555cfcb322ee74e0fd45b06a99d4 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 1 Apr 2019 21:37:52 +0300 Subject: [PATCH 057/102] fix clang-7 build --- dbms/src/Interpreters/RowRefs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/RowRefs.h b/dbms/src/Interpreters/RowRefs.h index b6dbcc8ceb3..74bb2ee0d55 100644 --- a/dbms/src/Interpreters/RowRefs.h +++ b/dbms/src/Interpreters/RowRefs.h @@ -83,7 +83,7 @@ public: const RowRef * findAsof(const IColumn * asof_column, size_t row_num, Arena & pool) const; private: - const std::optional type; + const std::optional type = {}; mutable Lookups lookups; void createLookup(Type which); From 4cfcdd0f5269a2dca7d98f3b8c04aba327860e04 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 1 Apr 2019 23:51:32 +0300 Subject: [PATCH 058/102] Removed useless method #4874 --- dbms/src/Compression/CompressionFactory.cpp | 13 +------------ dbms/src/Compression/CompressionFactory.h | 2 -- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/dbms/src/Compression/CompressionFactory.cpp b/dbms/src/Compression/CompressionFactory.cpp index ed34b8817d3..e7827d2ebc5 100644 --- a/dbms/src/Compression/CompressionFactory.cpp +++ b/dbms/src/Compression/CompressionFactory.cpp @@ -42,17 +42,6 @@ CompressionCodecPtr CompressionCodecFactory::get(const String & family_name, std } } -CompressionCodecPtr CompressionCodecFactory::get(const std::vector & codecs) const -{ - Codecs result; - for (const auto & [codec_name, level] : codecs) - result.push_back(get(codec_name, level)); - - if (result.size() == 1) - return result.back(); - - return std::make_shared(result); -} CompressionCodecPtr CompressionCodecFactory::get(const ASTPtr & ast, DataTypePtr column_type) const { @@ -93,7 +82,7 @@ CompressionCodecPtr CompressionCodecFactory::get(const UInt8 byte_code) const CompressionCodecPtr CompressionCodecFactory::getImpl(const String & family_name, const ASTPtr & arguments, DataTypePtr column_type) const { if (family_name == "Multiple") - throw Exception("Codec MULTIPLE cannot be specified directly", ErrorCodes::UNKNOWN_CODEC); + throw Exception("Codec Multiple cannot be specified directly", ErrorCodes::UNKNOWN_CODEC); const auto family_and_creator = family_name_with_codec.find(family_name); diff --git a/dbms/src/Compression/CompressionFactory.h b/dbms/src/Compression/CompressionFactory.h index 4b959cef847..8ad9ca330a9 100644 --- a/dbms/src/Compression/CompressionFactory.h +++ b/dbms/src/Compression/CompressionFactory.h @@ -48,8 +48,6 @@ public: /// For backward compatibility with config settings CompressionCodecPtr get(const String & family_name, std::optional level) const; - CompressionCodecPtr get(const std::vector & codecs) const; - /// Register codec with parameters and column type void registerCompressionCodecWithType(const String & family_name, std::optional byte_code, CreatorWithType creator); /// Register codec with parameters From 45591c9d832ea6588f2d5c8020766206bac97782 Mon Sep 17 00:00:00 2001 From: proller Date: Tue, 2 Apr 2019 12:43:53 +0300 Subject: [PATCH 059/102] Build and test fixes (#4873) --- CMakeLists.txt | 2 +- dbms/src/Functions/FunctionsRound.h | 4 ++++ dbms/tests/queries/0_stateless/00823_capnproto_input.sh | 8 ++++---- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b5f2a88f702..8c4802295a7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -178,7 +178,7 @@ include (cmake/use_libcxx.cmake) # This is intended for more control of what we are linking. set (DEFAULT_LIBS "") -if (OS_LINUX AND NOT UNBUNDLED) +if (OS_LINUX AND NOT UNBUNDLED AND (GLIBC_COMPATIBILITY OR USE_LIBCXX)) # Note: this probably has no effect, but I'm not an expert in CMake. set (CMAKE_C_IMPLICIT_LINK_LIBRARIES "") set (CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "") diff --git a/dbms/src/Functions/FunctionsRound.h b/dbms/src/Functions/FunctionsRound.h index bff8248b3ea..d9d0bcae037 100644 --- a/dbms/src/Functions/FunctionsRound.h +++ b/dbms/src/Functions/FunctionsRound.h @@ -119,6 +119,8 @@ struct IntegerRoundingComputation return x; } } + + __builtin_unreachable(); } static ALWAYS_INLINE T compute(T x, T scale) @@ -132,6 +134,8 @@ struct IntegerRoundingComputation case ScaleMode::Negative: return computeImpl(x, scale); } + + __builtin_unreachable(); } static ALWAYS_INLINE void compute(const T * __restrict in, size_t scale, T * __restrict out) diff --git a/dbms/tests/queries/0_stateless/00823_capnproto_input.sh b/dbms/tests/queries/0_stateless/00823_capnproto_input.sh index c33b185311a..cf0e2739abd 100755 --- a/dbms/tests/queries/0_stateless/00823_capnproto_input.sh +++ b/dbms/tests/queries/0_stateless/00823_capnproto_input.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -set -e +set -e CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh @@ -35,7 +35,7 @@ struct CapnProto nestedone @2 : NestedOne; nestedtwo @3 : NestedTwo; nestedthree @4 : NestedNestedTwo; -}" > test.capnp +}" > ${CLICKHOUSE_TMP}/test.capnp $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test.capnproto_input" $CLICKHOUSE_CLIENT -q "CREATE TABLE test.capnproto_input @@ -50,10 +50,10 @@ $CLICKHOUSE_CLIENT -q "CREATE TABLE test.capnproto_input nestedtwo_nestedtext String ) ENGINE = Memory" -echo -ne '\x00\x00\x00\x00\x15\x00\x00\x00\x00\x00\x00\x00\x01\x00\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x0d\x00\x00\x00\x12\x00\x00\x00\x0c\x00\x00\x00\x01\x00\x02\x00\x20\x00\x00\x00\x00\x00\x03\x00\x34\x00\x00\x00\x00\x00\x01\x00\x32\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x01\x00\x00\x00\x04\x00\x00\x00\x00\x00\x01\x00\x03\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x12\x00\x00\x00\x34\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x01\x00\x00\x00\x08\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x12\x00\x00\x00\x37\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x12\x00\x00\x00\x39\x00\x00\x00\x00\x00\x00\x00' | $CLICKHOUSE_CLIENT --stacktrace --format_schema='test:CapnProto' --query="INSERT INTO test.capnproto_input FORMAT CapnProto"; +echo -ne '\x00\x00\x00\x00\x15\x00\x00\x00\x00\x00\x00\x00\x01\x00\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x0d\x00\x00\x00\x12\x00\x00\x00\x0c\x00\x00\x00\x01\x00\x02\x00\x20\x00\x00\x00\x00\x00\x03\x00\x34\x00\x00\x00\x00\x00\x01\x00\x32\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x01\x00\x00\x00\x04\x00\x00\x00\x00\x00\x01\x00\x03\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x12\x00\x00\x00\x34\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x01\x00\x00\x00\x08\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x12\x00\x00\x00\x37\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x12\x00\x00\x00\x39\x00\x00\x00\x00\x00\x00\x00' | $CLICKHOUSE_CLIENT --stacktrace --format_schema="${CLICKHOUSE_TMP}/test:CapnProto" --query="INSERT INTO test.capnproto_input FORMAT CapnProto"; $CLICKHOUSE_CLIENT -q "SELECT * FROM test.capnproto_input" $CLICKHOUSE_CLIENT -q "DROP TABLE test.capnproto_input" # remove the schema file -rm test.capnp +rm ${CLICKHOUSE_TMP}/test.capnp From 293c70ef02ebef930634e2582fe38fdadae7998e Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Tue, 2 Apr 2019 14:24:11 +0300 Subject: [PATCH 060/102] Add more test-cases to cover future refactorings. --- .../queries/0_stateless/00932_array_intersect_bug.reference | 5 +++++ .../tests/queries/0_stateless/00932_array_intersect_bug.sql | 6 +++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00932_array_intersect_bug.reference b/dbms/tests/queries/0_stateless/00932_array_intersect_bug.reference index 23b66e9033d..7ac5ecf7ac2 100644 --- a/dbms/tests/queries/0_stateless/00932_array_intersect_bug.reference +++ b/dbms/tests/queries/0_stateless/00932_array_intersect_bug.reference @@ -2,3 +2,8 @@ [] [1] [] +[2] +[] +[] +[3,1,2] +[] diff --git a/dbms/tests/queries/0_stateless/00932_array_intersect_bug.sql b/dbms/tests/queries/0_stateless/00932_array_intersect_bug.sql index 4c3c199596d..6d9d2382ae1 100644 --- a/dbms/tests/queries/0_stateless/00932_array_intersect_bug.sql +++ b/dbms/tests/queries/0_stateless/00932_array_intersect_bug.sql @@ -2,4 +2,8 @@ SELECT arrayIntersect(['a', 'b', 'c'], ['a', 'a']); SELECT arrayIntersect([1, 1], [2, 2]); SELECT arrayIntersect([1, 1], [1, 2]); SELECT arrayIntersect([1, 1, 1], [3], [2, 2, 2]); - +SELECT arrayIntersect([1, 2], [1, 2], [2]); +SELECT arrayIntersect([1, 1], [2, 1], [2, 2], [1]); +SELECT arrayIntersect([]); +SELECT arrayIntersect([1, 2, 3]); +SELECT arrayIntersect([1, 1], [2, 1], [2, 2], [2, 2, 2]); From 42a0c2f1945eb6741763f242c53cd2399ba503a5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 2 Apr 2019 15:51:31 +0300 Subject: [PATCH 061/102] Fix alter modify of codec when column type is not specified --- .../src/Compression/CompressionCodecDelta.cpp | 31 ++++++++++++++----- dbms/src/Compression/CompressionCodecDelta.h | 7 +++-- .../Compression/CompressionCodecMultiple.cpp | 29 ++++++++++------- .../Compression/CompressionCodecMultiple.h | 3 +- dbms/src/Compression/ICompressionCodec.h | 3 ++ dbms/src/Storages/AlterCommands.cpp | 10 ++++-- ...4_test_delta_codec_no_type_alter.reference | 3 ++ .../00804_test_delta_codec_no_type_alter.sql | 10 ++++++ 8 files changed, 72 insertions(+), 24 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00804_test_delta_codec_no_type_alter.reference create mode 100644 dbms/tests/queries/0_stateless/00804_test_delta_codec_no_type_alter.sql diff --git a/dbms/src/Compression/CompressionCodecDelta.cpp b/dbms/src/Compression/CompressionCodecDelta.cpp index 08cc37864dd..1a37b95d712 100644 --- a/dbms/src/Compression/CompressionCodecDelta.cpp +++ b/dbms/src/Compression/CompressionCodecDelta.cpp @@ -125,19 +125,34 @@ void CompressionCodecDelta::doDecompressData(const char * source, UInt32 source_ } } +namespace +{ + +UInt8 getDeltaBytesSize(DataTypePtr column_type) +{ + UInt8 delta_bytes_size = 1; + if (column_type && column_type->haveMaximumSizeOfValue()) + { + size_t max_size = column_type->getSizeOfValueInMemory(); + if (max_size == 1 || max_size == 2 || max_size == 4 || max_size == 8) + delta_bytes_size = static_cast(max_size); + } + return delta_bytes_size; +} + +} + +void CompressionCodecDelta::useInfoAboutType(DataTypePtr data_type) +{ + delta_bytes_size = getDeltaBytesSize(data_type); +} + void registerCodecDelta(CompressionCodecFactory & factory) { UInt8 method_code = UInt8(CompressionMethodByte::Delta); factory.registerCompressionCodecWithType("Delta", method_code, [&](const ASTPtr & arguments, DataTypePtr column_type) -> CompressionCodecPtr { - UInt8 delta_bytes_size = 1; - if (column_type && column_type->haveMaximumSizeOfValue()) - { - size_t max_size = column_type->getSizeOfValueInMemory(); - if (max_size == 1 || max_size == 2 || max_size == 4 || max_size == 8) - delta_bytes_size = static_cast(max_size); - } - + UInt8 delta_bytes_size = getDeltaBytesSize(column_type); if (arguments && !arguments->children.empty()) { if (arguments->children.size() > 1) diff --git a/dbms/src/Compression/CompressionCodecDelta.h b/dbms/src/Compression/CompressionCodecDelta.h index 547bfdf2808..05068cd467e 100644 --- a/dbms/src/Compression/CompressionCodecDelta.h +++ b/dbms/src/Compression/CompressionCodecDelta.h @@ -14,15 +14,18 @@ public: String getCodecDesc() const override; + void useInfoAboutType(DataTypePtr data_type) override; + protected: UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override { return uncompressed_size + 2; } + + private: - const UInt8 delta_bytes_size; + UInt8 delta_bytes_size; }; } - diff --git a/dbms/src/Compression/CompressionCodecMultiple.cpp b/dbms/src/Compression/CompressionCodecMultiple.cpp index ffb961fb2db..a40fbafefc9 100644 --- a/dbms/src/Compression/CompressionCodecMultiple.cpp +++ b/dbms/src/Compression/CompressionCodecMultiple.cpp @@ -21,16 +21,6 @@ extern const int CORRUPTED_DATA; CompressionCodecMultiple::CompressionCodecMultiple(Codecs codecs) : codecs(codecs) { - std::ostringstream ss; - for (size_t idx = 0; idx < codecs.size(); idx++) - { - if (idx != 0) - ss << ',' << ' '; - - const auto codec = codecs[idx]; - ss << codec->getCodecDesc(); - } - codec_desc = ss.str(); } UInt8 CompressionCodecMultiple::getMethodByte() const @@ -40,7 +30,16 @@ UInt8 CompressionCodecMultiple::getMethodByte() const String CompressionCodecMultiple::getCodecDesc() const { - return codec_desc; + std::ostringstream ss; + for (size_t idx = 0; idx < codecs.size(); idx++) + { + if (idx != 0) + ss << ',' << ' '; + + const auto codec = codecs[idx]; + ss << codec->getCodecDesc(); + } + return ss.str(); } UInt32 CompressionCodecMultiple::getMaxCompressedDataSize(UInt32 uncompressed_size) const @@ -79,6 +78,14 @@ UInt32 CompressionCodecMultiple::doCompressData(const char * source, UInt32 sour return 1 + codecs.size() + source_size; } +void CompressionCodecMultiple::useInfoAboutType(DataTypePtr data_type) +{ + for (auto & codec : codecs) + { + codec->useInfoAboutType(data_type); + } +} + void CompressionCodecMultiple::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 decompressed_size) const { UInt8 compression_methods_size = source[0]; diff --git a/dbms/src/Compression/CompressionCodecMultiple.h b/dbms/src/Compression/CompressionCodecMultiple.h index cea67bdb49b..3770266e915 100644 --- a/dbms/src/Compression/CompressionCodecMultiple.h +++ b/dbms/src/Compression/CompressionCodecMultiple.h @@ -17,6 +17,8 @@ public: UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override; + void useInfoAboutType(DataTypePtr data_type) override; + protected: UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; @@ -24,7 +26,6 @@ protected: private: Codecs codecs; - String codec_desc; }; diff --git a/dbms/src/Compression/ICompressionCodec.h b/dbms/src/Compression/ICompressionCodec.h index d3e2fab12d8..040cb84c5eb 100644 --- a/dbms/src/Compression/ICompressionCodec.h +++ b/dbms/src/Compression/ICompressionCodec.h @@ -58,6 +58,9 @@ public: /// Read method byte from compressed source static UInt8 readMethod(const char * source); + /// Some codecs may use information about column type which appears after codec creation + virtual void useInfoAboutType(DataTypePtr /* data_type */) { } + protected: /// Return size of compressed data without header diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index 03a39ba3bd7..cc06624b995 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -59,7 +59,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ } if (ast_col_decl.codec) - command.codec = compression_codec_factory.get(ast_col_decl.codec); + command.codec = compression_codec_factory.get(ast_col_decl.codec, command.data_type); if (command_ast->column) command.after_column = *getIdentifierName(command_ast->column); @@ -105,7 +105,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ } if (ast_col_decl.codec) - command.codec = compression_codec_factory.get(ast_col_decl.codec); + command.codec = compression_codec_factory.get(ast_col_decl.codec, command.data_type); command.if_exists = command_ast->if_exists; @@ -190,7 +190,13 @@ void AlterCommand::apply(ColumnsDescription & columns_description, IndicesDescri ColumnDescription & column = columns_description.get(column_name); if (codec) + { + /// User doesn't specify data type, it means that datatype doesn't change + /// let's use info about old type + if (data_type == nullptr) + codec->useInfoAboutType(column.type); column.codec = codec; + } if (!is_mutable()) { diff --git a/dbms/tests/queries/0_stateless/00804_test_delta_codec_no_type_alter.reference b/dbms/tests/queries/0_stateless/00804_test_delta_codec_no_type_alter.reference new file mode 100644 index 00000000000..87e43ba75a3 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00804_test_delta_codec_no_type_alter.reference @@ -0,0 +1,3 @@ +CODEC(Delta(4)) +CODEC(Delta(4), LZ4) +CODEC(Delta(8), LZ4) diff --git a/dbms/tests/queries/0_stateless/00804_test_delta_codec_no_type_alter.sql b/dbms/tests/queries/0_stateless/00804_test_delta_codec_no_type_alter.sql new file mode 100644 index 00000000000..1eda7afe834 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00804_test_delta_codec_no_type_alter.sql @@ -0,0 +1,10 @@ +SET send_logs_level = 'none'; + +DROP TABLE IF EXISTS test.delta_codec_for_alter; +CREATE TABLE test.delta_codec_for_alter (date Date, x UInt32 Codec(Delta), s FixedString(128)) ENGINE = MergeTree ORDER BY tuple(); +SELECT compression_codec FROM system.columns WHERE database = 'test' AND table = 'delta_codec_for_alter' AND name = 'x'; +ALTER TABLE test.delta_codec_for_alter MODIFY COLUMN x Codec(Delta, LZ4); +SELECT compression_codec FROM system.columns WHERE database = 'test' AND table = 'delta_codec_for_alter' AND name = 'x'; +ALTER TABLE test.delta_codec_for_alter MODIFY COLUMN x UInt64 Codec(Delta, LZ4); +SELECT compression_codec FROM system.columns WHERE database = 'test' AND table = 'delta_codec_for_alter' AND name = 'x'; +DROP TABLE IF EXISTS test.delta_codec_for_alter; From 04efcf2bdc5cfa0b09b40cac9402468891c23bd6 Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 2 Apr 2019 19:22:14 +0300 Subject: [PATCH 062/102] asof refactoring (searching for the crash reason) --- dbms/src/Common/SortedLookupPODArray.h | 34 +++++++++-------- dbms/src/Interpreters/Join.cpp | 4 +- dbms/src/Interpreters/RowRefs.cpp | 51 ++++++++++++-------------- dbms/src/Interpreters/RowRefs.h | 27 ++++++-------- 4 files changed, 55 insertions(+), 61 deletions(-) diff --git a/dbms/src/Common/SortedLookupPODArray.h b/dbms/src/Common/SortedLookupPODArray.h index c01cfe95605..ce96e8e2839 100644 --- a/dbms/src/Common/SortedLookupPODArray.h +++ b/dbms/src/Common/SortedLookupPODArray.h @@ -13,36 +13,38 @@ namespace DB * This way the data only gets sorted once. */ -template > -class SortedLookupPODArray : private PaddedPODArray +template +class SortedLookupPODArray { public: - using Base = PaddedPODArray; - using typename Base::PODArray; - using Base::cbegin; - using Base::cend; + using Base = PaddedPODArray; template void insert(U && x, TAllocatorParams &&... allocator_params) { - Base::push_back(std::forward(x), std::forward(allocator_params)...); + array.push_back(std::forward(x), std::forward(allocator_params)...); sorted = false; } - typename Base::const_iterator upper_bound (const T& k) + typename Base::const_iterator upper_bound(const T & k) { if (!sorted) - this->sort(); - return std::upper_bound(this->cbegin(), this->cend(), k); - } -private: - void sort() - { - std::sort(this->begin(), this->end()); - sorted = true; + sort(); + return std::upper_bound(array.cbegin(), array.cend(), k); } + typename Base::const_iterator cbegin() const { return array.cbegin(); } + typename Base::const_iterator cend() const { return array.cend(); } + +private: + Base array; bool sorted = false; + + void sort() + { + std::sort(array.begin(), array.end()); + sorted = true; + } }; } diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index aacb9f07420..be4284004ef 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -422,7 +422,7 @@ namespace if (emplace_result.isInserted()) time_series_map = new (time_series_map) typename Map::mapped_type(join.getAsofType()); - time_series_map->insert(asof_column, stored_block, i, pool); + time_series_map->insert(asof_column, stored_block, i); } }; @@ -719,7 +719,7 @@ std::unique_ptr NO_INLINE joinRightIndexedColumns( if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof) { - if (const RowRef * found = mapped.findAsof(asof_column, i, pool)) + if (const RowRef * found = mapped.findAsof(asof_column, i)) { filter[i] = 1; mapped.setUsed(); diff --git a/dbms/src/Interpreters/RowRefs.cpp b/dbms/src/Interpreters/RowRefs.cpp index bfb44b99bc5..95d2c796b23 100644 --- a/dbms/src/Interpreters/RowRefs.cpp +++ b/dbms/src/Interpreters/RowRefs.cpp @@ -18,10 +18,10 @@ void callWithType(AsofRowRefs::Type which, F && f) { switch (which) { - case AsofRowRefs::Type::key32: return f(AsofRowRefs::LookupTypes()); - case AsofRowRefs::Type::key64: return f(AsofRowRefs::LookupTypes()); - case AsofRowRefs::Type::keyf32: return f(AsofRowRefs::LookupTypes()); - case AsofRowRefs::Type::keyf64: return f(AsofRowRefs::LookupTypes()); + case AsofRowRefs::Type::key32: return f(UInt32()); + case AsofRowRefs::Type::key64: return f(UInt64()); + case AsofRowRefs::Type::keyf32: return f(Float32()); + case AsofRowRefs::Type::keyf64: return f(Float64()); } __builtin_unreachable(); @@ -32,52 +32,49 @@ void callWithType(AsofRowRefs::Type which, F && f) void AsofRowRefs::createLookup(AsofRowRefs::Type which) { - auto call = [&](const auto & types) + auto call = [&](const auto & t) { - using Types = std::decay_t; - using SearcherType = typename Types::SearcherType; + using T = std::decay_t; + using LookupType = typename Entry::LookupType; - lookups = std::make_unique(); + lookups = std::make_unique(); }; callWithType(which, call); } -template -using AsofGetterType = ColumnsHashing::HashMethodOneNumber; -void AsofRowRefs::insert(const IColumn * asof_column, const Block * block, size_t row_num, Arena & pool) +void AsofRowRefs::insert(const IColumn * asof_column, const Block * block, size_t row_num) { - auto call = [&](const auto & types) + auto call = [&](const auto & t) { - using Types = std::decay_t; - using ElementType = typename Types::ElementType; - using SearcherPtr = typename Types::Ptr; + using T = std::decay_t; + using LookupPtr = typename Entry::LookupPtr; - auto asof_getter = AsofGetterType(asof_column); - auto entry = Entry(asof_getter.getKey(row_num, pool), RowRef(block, row_num)); + auto * column = typeid_cast *>(asof_column); + T key = column->getElement(row_num); + auto entry = Entry(key, RowRef(block, row_num)); - std::get(lookups)->insert(entry); + std::get(lookups)->insert(entry); }; callWithType(*type, call); } -const RowRef * AsofRowRefs::findAsof(const IColumn * asof_column, size_t row_num, Arena & pool) const +const RowRef * AsofRowRefs::findAsof(const IColumn * asof_column, size_t row_num) const { const RowRef * out = nullptr; - auto call = [&](const auto & types) + auto call = [&](const auto & t) { - using Types = std::decay_t; - using ElementType = typename Types::ElementType; - using SearcherPtr = typename Types::Ptr; + using T = std::decay_t; + using LookupPtr = typename Entry::LookupPtr; - auto asof_getter = AsofGetterType(asof_column); - ElementType key = asof_getter.getKey(row_num, pool); - auto & typed_lookup = std::get(lookups); + auto * column = typeid_cast *>(asof_column); + T key = column->getElement(row_num); - auto it = typed_lookup->upper_bound(Entry(key)); + auto & typed_lookup = std::get(lookups); + auto it = typed_lookup->upper_bound(Entry(key)); if (it != typed_lookup->cbegin()) out = &((--it)->row_ref); }; diff --git a/dbms/src/Interpreters/RowRefs.h b/dbms/src/Interpreters/RowRefs.h index 74bb2ee0d55..f476b4146e4 100644 --- a/dbms/src/Interpreters/RowRefs.h +++ b/dbms/src/Interpreters/RowRefs.h @@ -33,34 +33,29 @@ struct RowRefList : RowRef class AsofRowRefs { public: - template + template struct Entry { + using LookupType = SortedLookupPODArray>; + using LookupPtr = std::unique_ptr; + T asof_value; RowRef row_ref; Entry(T v) : asof_value(v) {} Entry(T v, RowRef rr) : asof_value(v), row_ref(rr) {} - bool operator< (const Entry& o) const + bool operator < (const Entry & o) const { return asof_value < o.asof_value; } }; - template - struct LookupTypes - { - using ElementType = T; - using SearcherType = SortedLookupPODArray>; - using Ptr = std::unique_ptr; - }; - using Lookups = std::variant< - LookupTypes::Ptr, - LookupTypes::Ptr, - LookupTypes::Ptr, - LookupTypes::Ptr>; + Entry::LookupPtr, + Entry::LookupPtr, + Entry::LookupPtr, + Entry::LookupPtr>; enum class Type { @@ -79,8 +74,8 @@ public: createLookup(t); } - void insert(const IColumn * asof_column, const Block * block, size_t row_num, Arena & pool); - const RowRef * findAsof(const IColumn * asof_column, size_t row_num, Arena & pool) const; + void insert(const IColumn * asof_column, const Block * block, size_t row_num); + const RowRef * findAsof(const IColumn * asof_column, size_t row_num) const; private: const std::optional type = {}; From 55df5b016fcc312b8974ba27ed4a5c9f334ab692 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 2 Apr 2019 19:28:19 +0300 Subject: [PATCH 063/102] Auto version update to [19.6.1.1] [54418] --- dbms/cmake/version.cmake | 10 +++++----- debian/changelog | 4 ++-- docker/client/Dockerfile | 2 +- docker/server/Dockerfile | 2 +- docker/test/Dockerfile | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index 7df40c7c0d4..81876cf5fe6 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -1,11 +1,11 @@ # This strings autochanged from release_lib.sh: -set(VERSION_REVISION 54417) +set(VERSION_REVISION 54418) set(VERSION_MAJOR 19) -set(VERSION_MINOR 5) +set(VERSION_MINOR 6) set(VERSION_PATCH 1) -set(VERSION_GITHASH 628ed349c335b79a441a1bd6e4bc791d61dfe62c) -set(VERSION_DESCRIBE v19.5.1.1-testing) -set(VERSION_STRING 19.5.1.1) +set(VERSION_GITHASH 30d3496c36cf3945c9828ac0b7cf7d1774a9f845) +set(VERSION_DESCRIBE v19.6.1.1-testing) +set(VERSION_STRING 19.6.1.1) # end of autochange set(VERSION_EXTRA "" CACHE STRING "") diff --git a/debian/changelog b/debian/changelog index e9bb4c1caa0..48e6a8c19bd 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (19.5.1.1) unstable; urgency=low +clickhouse (19.6.1.1) unstable; urgency=low * Modified source code - -- clickhouse-release Sat, 09 Mar 2019 10:45:02 +300 + -- clickhouse-release Tue, 02 Apr 2019 19:28:15 +300 diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index cbcc52c6f0d..995cdbf29af 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.5.1.1 +ARG version=19.6.1.* RUN apt-get update \ && apt-get install --yes --no-install-recommends \ diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 71cf28082d5..07f49a044c9 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.5.1.1 +ARG version=19.6.1.* ARG gosu_ver=1.10 RUN apt-get update \ diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index 315cb2f958b..08c5014405c 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.5.1.1 +ARG version=19.6.1.* RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ From d5ffbd9b6d28719174a3658d3589ba7a0ad54ce9 Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 2 Apr 2019 21:50:35 +0300 Subject: [PATCH 064/102] fix multithreaded ASOF JOIN crash --- dbms/src/Common/SortedLookupPODArray.h | 6 ++-- dbms/src/Interpreters/Join.cpp | 29 +++++++++-------- dbms/src/Interpreters/Join.h | 3 ++ dbms/src/Interpreters/RowRefs.cpp | 45 +++++++++++++------------- dbms/src/Interpreters/RowRefs.h | 33 +++++++++---------- 5 files changed, 59 insertions(+), 57 deletions(-) diff --git a/dbms/src/Common/SortedLookupPODArray.h b/dbms/src/Common/SortedLookupPODArray.h index ce96e8e2839..d9b03f5704d 100644 --- a/dbms/src/Common/SortedLookupPODArray.h +++ b/dbms/src/Common/SortedLookupPODArray.h @@ -1,6 +1,7 @@ #pragma once -#include +#include +//#include namespace DB { @@ -17,7 +18,8 @@ template class SortedLookupPODArray { public: - using Base = PaddedPODArray; + using Base = std::vector; + //using Base = PaddedPODArray; template void insert(U && x, TAllocatorParams &&... allocator_params) diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index be4284004ef..7faaac5f607 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -415,21 +415,22 @@ namespace template struct Inserter { - static ALWAYS_INLINE void insert(const Join & join, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool, const IColumn * asof_column) + static ALWAYS_INLINE void insert(Join & join, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool, + const IColumn * asof_column) { auto emplace_result = key_getter.emplaceKey(map, i, pool); typename Map::mapped_type * time_series_map = &emplace_result.getMapped(); if (emplace_result.isInserted()) - time_series_map = new (time_series_map) typename Map::mapped_type(join.getAsofType()); - time_series_map->insert(asof_column, stored_block, i); + time_series_map = new (time_series_map) typename Map::mapped_type(); + time_series_map->insert(join.getAsofType(), join.getAsofData(), asof_column, stored_block, i); } }; template void NO_INLINE insertFromBlockImplTypeCase( - const Join & join, Map & map, size_t rows, const ColumnRawPtrs & key_columns, + Join & join, Map & map, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool) { const IColumn * asof_column [[maybe_unused]] = nullptr; @@ -453,7 +454,7 @@ namespace template void insertFromBlockImplType( - const Join & join, Map & map, size_t rows, const ColumnRawPtrs & key_columns, + Join & join, Map & map, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool) { if (null_map) @@ -465,7 +466,7 @@ namespace template void insertFromBlockImpl( - const Join & join, Join::Type type, Maps & maps, size_t rows, const ColumnRawPtrs & key_columns, + Join & join, Join::Type type, Maps & maps, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool) { switch (type) @@ -686,7 +687,7 @@ void addNotFoundRow(AddedColumns & added [[maybe_unused]], IColumn::Offset & cur /// Makes filter (1 if row presented in right table) and returns offsets to replicate (for ALL JOINS). template std::unique_ptr NO_INLINE joinRightIndexedColumns( - const Map & map, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, + const Join & join, const Map & map, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, AddedColumns & added_columns, ConstNullMapPtr null_map, IColumn::Filter & filter) { std::unique_ptr offsets_to_replicate; @@ -719,7 +720,7 @@ std::unique_ptr NO_INLINE joinRightIndexedColumns( if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof) { - if (const RowRef * found = mapped.findAsof(asof_column, i)) + if (const RowRef * found = mapped.findAsof(join.getAsofType(), join.getAsofData(), asof_column, i)) { filter[i] = 1; mapped.setUsed(); @@ -748,7 +749,7 @@ std::unique_ptr NO_INLINE joinRightIndexedColumns( template IColumn::Filter joinRightColumns( - const Map & map, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, + const Join & join, const Map & map, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, AddedColumns & added_columns, ConstNullMapPtr null_map, std::unique_ptr & offsets_to_replicate) { constexpr bool left_or_full = static_in_v; @@ -757,17 +758,17 @@ IColumn::Filter joinRightColumns( if (null_map) offsets_to_replicate = joinRightIndexedColumns( - map, rows, key_columns, key_sizes, added_columns, null_map, filter); + join, map, rows, key_columns, key_sizes, added_columns, null_map, filter); else offsets_to_replicate = joinRightIndexedColumns( - map, rows, key_columns, key_sizes, added_columns, null_map, filter); + join, map, rows, key_columns, key_sizes, added_columns, null_map, filter); return filter; } template IColumn::Filter switchJoinRightColumns( - Join::Type type, + Join::Type type, const Join & join, const Maps & maps_, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, AddedColumns & added_columns, ConstNullMapPtr null_map, std::unique_ptr & offsets_to_replicate) @@ -777,7 +778,7 @@ IColumn::Filter switchJoinRightColumns( #define M(TYPE) \ case Join::Type::TYPE: \ return joinRightColumns>::Type>(\ - *maps_.TYPE, rows, key_columns, key_sizes, added_columns, null_map, offsets_to_replicate); + join, *maps_.TYPE, rows, key_columns, key_sizes, added_columns, null_map, offsets_to_replicate); APPLY_FOR_JOIN_VARIANTS(M) #undef M @@ -851,7 +852,7 @@ void Join::joinBlockImpl( std::unique_ptr offsets_to_replicate; IColumn::Filter row_filter = switchJoinRightColumns( - type, maps_, block.rows(), key_columns, key_sizes, added, null_map, offsets_to_replicate); + type, *this, maps_, block.rows(), key_columns, key_sizes, added, null_map, offsets_to_replicate); for (size_t i = 0; i < added.size(); ++i) block.insert(added.moveColumn(i)); diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index 7a223f46b35..85255aaaaa0 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -132,6 +132,8 @@ public: ASTTableJoin::Kind getKind() const { return kind; } AsofRowRefs::Type getAsofType() const { return *asof_type; } + AsofRowRefs::LookupLists & getAsofData() { return asof_lookup_lists; } + const AsofRowRefs::LookupLists & getAsofData() const { return asof_lookup_lists; } /** Depending on template parameter, adds or doesn't add a flag, that element was used (row was joined). * Depending on template parameter, decide whether to overwrite existing values when encountering the same key again @@ -367,6 +369,7 @@ private: private: Type type = Type::EMPTY; std::optional asof_type; + AsofRowRefs::LookupLists asof_lookup_lists; static Type chooseMethod(const ColumnRawPtrs & key_columns, Sizes & key_sizes); diff --git a/dbms/src/Interpreters/RowRefs.cpp b/dbms/src/Interpreters/RowRefs.cpp index 95d2c796b23..9fea9819132 100644 --- a/dbms/src/Interpreters/RowRefs.cpp +++ b/dbms/src/Interpreters/RowRefs.cpp @@ -30,56 +30,55 @@ void callWithType(AsofRowRefs::Type which, F && f) } // namespace -void AsofRowRefs::createLookup(AsofRowRefs::Type which) +void AsofRowRefs::insert(Type type, LookupLists & lookup_data, const IColumn * asof_column, const Block * block, size_t row_num) { auto call = [&](const auto & t) { using T = std::decay_t; using LookupType = typename Entry::LookupType; - lookups = std::make_unique(); - }; - - callWithType(which, call); -} - - -void AsofRowRefs::insert(const IColumn * asof_column, const Block * block, size_t row_num) -{ - auto call = [&](const auto & t) - { - using T = std::decay_t; - using LookupPtr = typename Entry::LookupPtr; - auto * column = typeid_cast *>(asof_column); T key = column->getElement(row_num); auto entry = Entry(key, RowRef(block, row_num)); - std::get(lookups)->insert(entry); + std::lock_guard lock(lookup_data.mutex); + + if (!lookups) + { + lookup_data.lookups.push_back(Lookups()); + lookup_data.lookups.back() = LookupType(); + lookups = &lookup_data.lookups.back(); + } + std::get(*lookups).insert(entry); }; - callWithType(*type, call); + callWithType(type, call); } -const RowRef * AsofRowRefs::findAsof(const IColumn * asof_column, size_t row_num) const +const RowRef * AsofRowRefs::findAsof(Type type, const LookupLists & lookup_data, const IColumn * asof_column, size_t row_num) const { const RowRef * out = nullptr; auto call = [&](const auto & t) { using T = std::decay_t; - using LookupPtr = typename Entry::LookupPtr; + using LookupType = typename Entry::LookupType; auto * column = typeid_cast *>(asof_column); T key = column->getElement(row_num); - auto & typed_lookup = std::get(lookups); - auto it = typed_lookup->upper_bound(Entry(key)); - if (it != typed_lookup->cbegin()) + std::lock_guard lock(lookup_data.mutex); + + if (!lookups) + return; + + auto & typed_lookup = std::get(*lookups); + auto it = typed_lookup.upper_bound(Entry(key)); + if (it != typed_lookup.cbegin()) out = &((--it)->row_ref); }; - callWithType(*type, call); + callWithType(type, call); return out; } diff --git a/dbms/src/Interpreters/RowRefs.h b/dbms/src/Interpreters/RowRefs.h index f476b4146e4..227fba965b3 100644 --- a/dbms/src/Interpreters/RowRefs.h +++ b/dbms/src/Interpreters/RowRefs.h @@ -5,6 +5,8 @@ #include #include +#include +#include namespace DB { @@ -37,7 +39,6 @@ public: struct Entry { using LookupType = SortedLookupPODArray>; - using LookupPtr = std::unique_ptr; T asof_value; RowRef row_ref; @@ -52,10 +53,16 @@ public: }; using Lookups = std::variant< - Entry::LookupPtr, - Entry::LookupPtr, - Entry::LookupPtr, - Entry::LookupPtr>; + Entry::LookupType, + Entry::LookupType, + Entry::LookupType, + Entry::LookupType>; + + struct LookupLists + { + mutable std::mutex mutex; + std::list lookups; + }; enum class Type { @@ -67,21 +74,11 @@ public: static std::optional getTypeSize(const IColumn * asof_column, size_t & type_size); - AsofRowRefs() = default; - AsofRowRefs(Type t) - : type(t) - { - createLookup(t); - } - - void insert(const IColumn * asof_column, const Block * block, size_t row_num); - const RowRef * findAsof(const IColumn * asof_column, size_t row_num) const; + void insert(Type type, LookupLists &, const IColumn * asof_column, const Block * block, size_t row_num); + const RowRef * findAsof(Type type, const LookupLists &, const IColumn * asof_column, size_t row_num) const; private: - const std::optional type = {}; - mutable Lookups lookups; - - void createLookup(Type which); + Lookups * lookups = nullptr; }; } From bb2e5e940fe7f5e37adab7ab145f86b69b1202f0 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 3 Apr 2019 00:30:03 +0200 Subject: [PATCH 065/102] Remove outdated link --- README.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/README.md b/README.md index 3e840d2cf10..0e9974f763f 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,3 @@ ClickHouse is an open-source column-oriented database management system that all * [Blog](https://clickhouse.yandex/blog/en/) contains various ClickHouse-related articles, as well as announces and reports about events. * [Contacts](https://clickhouse.yandex/#contacts) can help to get your questions answered if there are any. * You can also [fill this form](https://forms.yandex.com/surveys/meet-yandex-clickhouse-team/) to meet Yandex ClickHouse team in person. - -## Upcoming Events - -* [ClickHouse Community Meetup](https://www.eventbrite.com/e/clickhouse-meetup-in-madrid-registration-55376746339) in Madrid on April 2. From 029dd107e43b8f0e88b5f456b5ab4958abfef482 Mon Sep 17 00:00:00 2001 From: Danila Kutenin Date: Wed, 3 Apr 2019 14:13:22 +0300 Subject: [PATCH 066/102] Typos for many factories --- dbms/src/Common/IFactoryWithAliases.h | 17 ++--------------- dbms/src/Common/NamePrompter.h | 19 +++++++++++++++++++ dbms/src/Storages/StorageFactory.cpp | 10 ++++++++-- dbms/src/Storages/StorageFactory.h | 11 ++++++++++- .../TableFunctions/TableFunctionFactory.cpp | 9 ++++++++- .../src/TableFunctions/TableFunctionFactory.h | 11 ++++++++++- .../00834_hints_for_type_function_typos.sh | 7 +++++++ 7 files changed, 64 insertions(+), 20 deletions(-) diff --git a/dbms/src/Common/IFactoryWithAliases.h b/dbms/src/Common/IFactoryWithAliases.h index df68e7b1255..476c0251be9 100644 --- a/dbms/src/Common/IFactoryWithAliases.h +++ b/dbms/src/Common/IFactoryWithAliases.h @@ -20,7 +20,7 @@ namespace ErrorCodes * template parameter is available as Creator */ template -class IFactoryWithAliases +class IFactoryWithAliases : public IHints<2, IFactoryWithAliases> { protected: using Creator = CreatorFunc; @@ -76,7 +76,7 @@ public: throw Exception(factory_name + ": alias name '" + alias_name + "' is not unique", ErrorCodes::LOGICAL_ERROR); } - std::vector getAllRegisteredNames() const + std::vector getAllRegisteredNames() const override { std::vector result; auto getter = [](const auto & pair) { return pair.first; }; @@ -106,12 +106,6 @@ public: return aliases.count(name) || case_insensitive_aliases.count(name); } - std::vector getHints(const String & name) const - { - static const auto registered_names = getAllRegisteredNames(); - return prompter.getHints(name, registered_names); - } - virtual ~IFactoryWithAliases() {} private: @@ -127,13 +121,6 @@ private: /// Case insensitive aliases AliasMap case_insensitive_aliases; - - /** - * prompter for names, if a person makes a typo for some function or type, it - * helps to find best possible match (in particular, edit distance is done like in clang - * (max edit distance is (typo.size() + 2) / 3) - */ - NamePrompter prompter; }; } diff --git a/dbms/src/Common/NamePrompter.h b/dbms/src/Common/NamePrompter.h index d37a5946dcd..956a60f9a4e 100644 --- a/dbms/src/Common/NamePrompter.h +++ b/dbms/src/Common/NamePrompter.h @@ -97,4 +97,23 @@ private: } }; +template +class IHints +{ +public: + + virtual std::vector getAllRegisteredNames() const = 0; + + std::vector getHints(const String & name) const + { + static const auto registered_names = getAllRegisteredNames(); + return prompter.getHints(name, registered_names); + } + + virtual ~IHints() = default; + +private: + NamePrompter prompter; +}; + } diff --git a/dbms/src/Storages/StorageFactory.cpp b/dbms/src/Storages/StorageFactory.cpp index 319258e3adf..1dd49e5c99b 100644 --- a/dbms/src/Storages/StorageFactory.cpp +++ b/dbms/src/Storages/StorageFactory.cpp @@ -4,7 +4,7 @@ #include #include #include - +#include namespace DB { @@ -120,7 +120,13 @@ StoragePtr StorageFactory::get( auto it = storages.find(name); if (it == storages.end()) - throw Exception("Unknown table engine " + name, ErrorCodes::UNKNOWN_STORAGE); + { + auto hints = getHints(name); + if (!hints.empty()) + throw Exception("Unknown table engine " + name + ". Maybe you meant: " + toString(hints), ErrorCodes::UNKNOWN_STORAGE); + else + throw Exception("Unknown table engine " + name, ErrorCodes::UNKNOWN_STORAGE); + } Arguments arguments { diff --git a/dbms/src/Storages/StorageFactory.h b/dbms/src/Storages/StorageFactory.h index 4addfcd9794..125758cef09 100644 --- a/dbms/src/Storages/StorageFactory.h +++ b/dbms/src/Storages/StorageFactory.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -17,7 +18,7 @@ class ASTStorage; * In 'columns' Nested data structures must be flattened. * You should subsequently call IStorage::startup method to work with table. */ -class StorageFactory : public ext::singleton +class StorageFactory : public ext::singleton, public IHints<1, StorageFactory> { public: struct Arguments @@ -58,6 +59,14 @@ public: return storages; } + std::vector getAllRegisteredNames() const override + { + std::vector result; + auto getter = [](const auto & pair) { return pair.first; }; + std::transform(storages.begin(), storages.end(), std::back_inserter(result), getter); + return result; + } + private: using Storages = std::unordered_map; Storages storages; diff --git a/dbms/src/TableFunctions/TableFunctionFactory.cpp b/dbms/src/TableFunctions/TableFunctionFactory.cpp index 8fb8533176b..7edd445379a 100644 --- a/dbms/src/TableFunctions/TableFunctionFactory.cpp +++ b/dbms/src/TableFunctions/TableFunctionFactory.cpp @@ -4,6 +4,7 @@ #include +#include namespace DB { @@ -32,7 +33,13 @@ TableFunctionPtr TableFunctionFactory::get( auto it = functions.find(name); if (it == functions.end()) - throw Exception("Unknown table function " + name, ErrorCodes::UNKNOWN_FUNCTION); + { + auto hints = getHints(name); + if (!hints.empty()) + throw Exception("Unknown table function " + name + ". Maybe you meant: " + toString(hints), ErrorCodes::UNKNOWN_FUNCTION); + else + throw Exception("Unknown table function " + name, ErrorCodes::UNKNOWN_FUNCTION); + } return it->second(); } diff --git a/dbms/src/TableFunctions/TableFunctionFactory.h b/dbms/src/TableFunctions/TableFunctionFactory.h index 22bc5cdb99f..acbb6244c4e 100644 --- a/dbms/src/TableFunctions/TableFunctionFactory.h +++ b/dbms/src/TableFunctions/TableFunctionFactory.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include @@ -18,7 +19,7 @@ class Context; /** Lets you get a table function by its name. */ -class TableFunctionFactory final: public ext::singleton +class TableFunctionFactory final: public ext::singleton, public IHints<1, TableFunctionFactory> { public: using Creator = std::function; @@ -50,6 +51,14 @@ public: return functions; } + std::vector getAllRegisteredNames() const override + { + std::vector result; + auto getter = [](const auto & pair) { return pair.first; }; + std::transform(functions.begin(), functions.end(), std::back_inserter(result), getter); + return result; + } + private: TableFunctions functions; }; diff --git a/dbms/tests/queries/0_stateless/00834_hints_for_type_function_typos.sh b/dbms/tests/queries/0_stateless/00834_hints_for_type_function_typos.sh index 35adb15e839..5a1b813ecb1 100755 --- a/dbms/tests/queries/0_stateless/00834_hints_for_type_function_typos.sh +++ b/dbms/tests/queries/0_stateless/00834_hints_for_type_function_typos.sh @@ -20,3 +20,10 @@ $CLICKHOUSE_CLIENT -q "select multisearchallposicionutf7('abc');" 2>&1 | grep "M $CLICKHOUSE_CLIENT -q "select multisearchallposicionutf7casesensitive('abc');" 2>&1 | grep "Maybe you meant: \['multiSearchAllPositionsCaseInsensitive','multiSearchAllPositionsCaseInsensitiveUTF8'\]." &>/dev/null; $CLICKHOUSE_CLIENT -q "select multiSearchAllposicionutf7sensitive('abc');" 2>&1 | grep "Maybe you meant: \['multiSearchAllPositionsCaseInsensitive','multiSearchAnyCaseInsensitive'\]." &>/dev/null; $CLICKHOUSE_CLIENT -q "select multiSearchAllPosicionSensitiveUTF8('abc');" 2>&1 | grep "Maybe you meant: \['multiSearchAnyCaseInsensitiveUTF8','multiSearchAllPositionsCaseInsensitiveUTF8'\]." &>/dev/null; + +$CLICKHOUSE_CLIENT -q "select * FROM numberss(10);" 2>&1 | grep "Maybe you meant: \['numbers'\]." &>/dev/null +$CLICKHOUSE_CLIENT -q "select * FROM anothernumbers(10);" 2>&1 | grep -v "Maybe you meant: \['numbers'\]." &>/dev/null +$CLICKHOUSE_CLIENT -q "select * FROM mynumbers(10);" 2>&1 | grep "Maybe you meant: \['numbers'\]." &>/dev/null + +$CLICKHOUSE_CLIENT -q "CREATE TABLE test.stored_aggregates (d Date, Uniq AggregateFunction(uniq, UInt64)) ENGINE = MergeTre(d, d, 8192);" 2>&1 | grep "Maybe you meant: \['MergeTree'\]." &>/dev/null +$CLICKHOUSE_CLIENT -q "CREATE TABLE test.stored_aggregates (d Date, Uniq AgregateFunction(uniq, UInt64)) ENGINE = MergeTree(d, d, 8192);" 2>&1 | grep "Maybe you meant: \['AggregateFunction'\]." &>/dev/null From 842ce6cff8dcaaeaa73673073952f4a214e227d6 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Wed, 3 Apr 2019 15:58:38 +0300 Subject: [PATCH 067/102] Fix function cutQueryStringAndFragment(). --- dbms/src/Functions/queryStringAndFragment.h | 8 ++++---- .../queries/0_stateless/00398_url_functions.reference | 2 ++ dbms/tests/queries/0_stateless/00398_url_functions.sql | 2 ++ 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/dbms/src/Functions/queryStringAndFragment.h b/dbms/src/Functions/queryStringAndFragment.h index 33f1a198182..a2aeb5ebbb5 100644 --- a/dbms/src/Functions/queryStringAndFragment.h +++ b/dbms/src/Functions/queryStringAndFragment.h @@ -17,15 +17,15 @@ struct ExtractQueryStringAndFragment res_data = data; res_size = 0; - Pos pos = data; - Pos end = pos + size; + Pos end = data + size; + Pos pos; - if (end != (pos = find_first_symbols<'?'>(pos, end))) + if (end != (pos = find_first_symbols<'?'>(data, end))) { res_data = pos + (without_leading_char ? 1 : 0); res_size = end - res_data; } - else if (end != (pos = find_first_symbols<'#'>(pos, end))) + else if (end != (pos = find_first_symbols<'#'>(data, end))) { res_data = pos; res_size = end - res_data; diff --git a/dbms/tests/queries/0_stateless/00398_url_functions.reference b/dbms/tests/queries/0_stateless/00398_url_functions.reference index ddbc98781ff..e4a31f0654a 100644 --- a/dbms/tests/queries/0_stateless/00398_url_functions.reference +++ b/dbms/tests/queries/0_stateless/00398_url_functions.reference @@ -52,6 +52,7 @@ query=hello world+foo+bar query=hello world+foo+bar#a=b query=hello world+foo+bar#a=b query=hello world+foo+bar#a=b +#a=b ====CUT TO FIRST SIGNIFICANT SUBDOMAIN==== example.com example.com @@ -92,3 +93,4 @@ http://www.example.com/a/b/c http://www.example.com/a/b/c http://paul@www.example.com/a/b/c //paul@www.example.com/a/b/c +//paul@www.example.com/a/b/c diff --git a/dbms/tests/queries/0_stateless/00398_url_functions.sql b/dbms/tests/queries/0_stateless/00398_url_functions.sql index 9bc5043f163..16425dae46d 100644 --- a/dbms/tests/queries/0_stateless/00398_url_functions.sql +++ b/dbms/tests/queries/0_stateless/00398_url_functions.sql @@ -59,6 +59,7 @@ SELECT decodeURLComponent(queryStringAndFragment('http://127.0.0.1/?query=hello% SELECT decodeURLComponent(queryStringAndFragment('http://127.0.0.1/?query=hello%20world+foo%2Bbar#a=b')); SELECT decodeURLComponent(queryStringAndFragment('http://paul@127.0.0.1/?query=hello%20world+foo%2Bbar#a=b')); SELECT decodeURLComponent(queryStringAndFragment('//paul@127.0.0.1/?query=hello%20world+foo%2Bbar#a=b')); +SELECT decodeURLComponent(queryStringAndFragment('//paul@127.0.0.1/#a=b')); SELECT '====CUT TO FIRST SIGNIFICANT SUBDOMAIN===='; SELECT cutToFirstSignificantSubdomain('http://www.example.com'); @@ -104,4 +105,5 @@ SELECT cutQueryStringAndFragment('http://www.example.com/a/b/c?a=b'); SELECT cutQueryStringAndFragment('http://www.example.com/a/b/c?a=b#d=f'); SELECT cutQueryStringAndFragment('http://paul@www.example.com/a/b/c?a=b#d=f'); SELECT cutQueryStringAndFragment('//paul@www.example.com/a/b/c?a=b#d=f'); +SELECT cutQueryStringAndFragment('//paul@www.example.com/a/b/c#d=f'); From 5b1bde2e801a49bcd643dd441c5f44ce6ca4964b Mon Sep 17 00:00:00 2001 From: proller Date: Wed, 3 Apr 2019 17:06:59 +0300 Subject: [PATCH 068/102] CLICKHOUSE-4387 Add (official build) to version of yandex builds --- dbms/CMakeLists.txt | 2 +- dbms/cmake/version.cmake | 4 ++++ dbms/programs/client/Client.cpp | 2 +- dbms/programs/local/LocalServer.cpp | 2 +- dbms/programs/server/Server.cpp | 2 +- dbms/src/Common/Exception.cpp | 13 ++++--------- dbms/src/Common/config_version.h.in | 5 +++++ libs/libdaemon/src/BaseDaemon.cpp | 4 ++-- 8 files changed, 19 insertions(+), 15 deletions(-) diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 49eec8451ab..1306039e9c3 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -20,7 +20,7 @@ set (CONFIG_VERSION ${CMAKE_CURRENT_BINARY_DIR}/src/Common/config_version.h) set (CONFIG_COMMON ${CMAKE_CURRENT_BINARY_DIR}/src/Common/config.h) include (cmake/version.cmake) -message (STATUS "Will build ${VERSION_FULL} revision ${VERSION_REVISION}") +message (STATUS "Will build ${VERSION_FULL} revision ${VERSION_REVISION} ${VERSION_OFFICIAL}") configure_file (src/Common/config.h.in ${CONFIG_COMMON}) configure_file (src/Common/config_version.h.in ${CONFIG_VERSION}) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index 81876cf5fe6..e3f512f91a1 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -24,3 +24,7 @@ set (VERSION_FULL "${VERSION_NAME} ${VERSION_STRING}") set (VERSION_SO "${VERSION_STRING}") math (EXPR VERSION_INTEGER "${VERSION_PATCH} + ${VERSION_MINOR}*1000 + ${VERSION_MAJOR}*1000000") + +if(YANDEX_OFFICIAL_BUILD) + set(VERSION_OFFICIAL " (official build)") +endif() diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 9d81f96041a..54271996290 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -1523,7 +1523,7 @@ private: void showClientVersion() { - std::cout << DBMS_NAME << " client version " << VERSION_STRING << "." << std::endl; + std::cout << DBMS_NAME << " client version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl; } public: diff --git a/dbms/programs/local/LocalServer.cpp b/dbms/programs/local/LocalServer.cpp index 58e723513a4..9808cadd303 100644 --- a/dbms/programs/local/LocalServer.cpp +++ b/dbms/programs/local/LocalServer.cpp @@ -369,7 +369,7 @@ void LocalServer::setupUsers() static void showClientVersion() { - std::cout << DBMS_NAME << " client version " << VERSION_STRING << "." << '\n'; + std::cout << DBMS_NAME << " client version " << VERSION_STRING << VERSION_OFFICIAL << "." << '\n'; } std::string LocalServer::getHelpHeader() const diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index 2b10c9e3c98..fea06e9506d 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -132,7 +132,7 @@ int Server::run() } if (config().hasOption("version")) { - std::cout << DBMS_NAME << " server version " << VERSION_STRING << "." << std::endl; + std::cout << DBMS_NAME << " server version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl; return 0; } return Application::run(); diff --git a/dbms/src/Common/Exception.cpp b/dbms/src/Common/Exception.cpp index a9197d6bc7d..4bb34f99f54 100644 --- a/dbms/src/Common/Exception.cpp +++ b/dbms/src/Common/Exception.cpp @@ -21,11 +21,6 @@ namespace ErrorCodes extern const int CANNOT_TRUNCATE_FILE; } -const char * getVersion() -{ - return VERSION_STRING; -} - std::string errnoToString(int code, int e) { const size_t buf_size = 128; @@ -82,13 +77,13 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded } catch (const Exception & e) { - stream << "(version " << getVersion() << ") " << getExceptionMessage(e, with_stacktrace, check_embedded_stacktrace); + stream << "(version " << VERSION_STRING << VERSION_OFFICIAL << ") " << getExceptionMessage(e, with_stacktrace, check_embedded_stacktrace); } catch (const Poco::Exception & e) { try { - stream << "(version " << getVersion() << ") " << "Poco::Exception. Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code() + stream << "(version " << VERSION_STRING << VERSION_OFFICIAL << ") " << "Poco::Exception. Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code() << ", e.displayText() = " << e.displayText(); } catch (...) {} @@ -103,7 +98,7 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded if (status) name += " (demangling status: " + toString(status) + ")"; - stream << "(version " << getVersion() << ") " << "std::exception. Code: " << ErrorCodes::STD_EXCEPTION << ", type: " << name << ", e.what() = " << e.what(); + stream << "(version " << VERSION_STRING << VERSION_OFFICIAL << ") " << "std::exception. Code: " << ErrorCodes::STD_EXCEPTION << ", type: " << name << ", e.what() = " << e.what(); } catch (...) {} } @@ -117,7 +112,7 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded if (status) name += " (demangling status: " + toString(status) + ")"; - stream << "(version " << getVersion() << ") " << "Unknown exception. Code: " << ErrorCodes::UNKNOWN_EXCEPTION << ", type: " << name; + stream << "(version " << VERSION_STRING << VERSION_OFFICIAL << ") " << "Unknown exception. Code: " << ErrorCodes::UNKNOWN_EXCEPTION << ", type: " << name; } catch (...) {} } diff --git a/dbms/src/Common/config_version.h.in b/dbms/src/Common/config_version.h.in index a90fd77b6a8..bc90e63e39c 100644 --- a/dbms/src/Common/config_version.h.in +++ b/dbms/src/Common/config_version.h.in @@ -20,6 +20,7 @@ #cmakedefine VERSION_MINOR @VERSION_MINOR@ #cmakedefine VERSION_PATCH @VERSION_PATCH@ #cmakedefine VERSION_STRING "@VERSION_STRING@" +#cmakedefine VERSION_OFFICIAL "@VERSION_OFFICIAL@" #cmakedefine VERSION_FULL "@VERSION_FULL@" #cmakedefine VERSION_DESCRIBE "@VERSION_DESCRIBE@" #cmakedefine VERSION_GITHASH "@VERSION_GITHASH@" @@ -42,3 +43,7 @@ #else #define DBMS_VERSION_PATCH 0 #endif + +#if !defined(VERSION_OFFICIAL) +# define VERSION_OFFICIAL "" +#endif diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp index c61f74d54d9..cb2346f4379 100644 --- a/libs/libdaemon/src/BaseDaemon.cpp +++ b/libs/libdaemon/src/BaseDaemon.cpp @@ -299,13 +299,13 @@ private: private: void onTerminate(const std::string & message, ThreadNumber thread_num) const { - LOG_ERROR(log, "(version " << VERSION_STRING << ") (from thread " << thread_num << ") " << message); + LOG_ERROR(log, "(version " << VERSION_STRING << VERSION_OFFICIAL << ") (from thread " << thread_num << ") " << message); } void onFault(int sig, siginfo_t & info, ucontext_t & context, ThreadNumber thread_num) const { LOG_ERROR(log, "########################################"); - LOG_ERROR(log, "(version " << VERSION_STRING << ") (from thread " << thread_num << ") " + LOG_ERROR(log, "(version " << VERSION_STRING << VERSION_OFFICIAL << ") (from thread " << thread_num << ") " << "Received signal " << strsignal(sig) << " (" << sig << ")" << "."); void * caller_address = nullptr; From 1f245e195946c3dab26a067c59b00af35e367224 Mon Sep 17 00:00:00 2001 From: proller Date: Wed, 3 Apr 2019 17:11:59 +0300 Subject: [PATCH 069/102] Tests: fix cat usage --- dbms/tests/clickhouse-test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/clickhouse-test b/dbms/tests/clickhouse-test index ac9dcde1f36..b285b273b2f 100755 --- a/dbms/tests/clickhouse-test +++ b/dbms/tests/clickhouse-test @@ -341,7 +341,7 @@ def main(args): if result_is_different: diff = Popen(['diff', '--unified', reference_file, stdout_file], stdout = PIPE).communicate()[0] diff = unicode(diff, errors='replace', encoding='utf-8') - cat = Popen(['cat', '-A'], stdin=PIPE, stdout=PIPE).communicate(input=diff)[0] + cat = Popen(['cat', '-vet'], stdin=PIPE, stdout=PIPE).communicate(input=diff)[0] failure = et.Element("failure", attrib = {"message": "result differs with reference"}) report_testcase.append(failure) From bee8d61e26d64548ffc639aec96d697b755a6d06 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 3 Apr 2019 17:24:55 +0300 Subject: [PATCH 070/102] Fix seek backwards after eof in ReadBufferAIO --- dbms/src/IO/ReadBufferAIO.cpp | 3 + .../tests/gtest_aio_seek_back_after_eof.cpp | 71 +++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 dbms/src/IO/tests/gtest_aio_seek_back_after_eof.cpp diff --git a/dbms/src/IO/ReadBufferAIO.cpp b/dbms/src/IO/ReadBufferAIO.cpp index ccbc0dfe818..a00ff326d5a 100644 --- a/dbms/src/IO/ReadBufferAIO.cpp +++ b/dbms/src/IO/ReadBufferAIO.cpp @@ -187,6 +187,9 @@ off_t ReadBufferAIO::doSeek(off_t off, int whence) pos = working_buffer.end(); first_unread_pos_in_file = new_pos_in_file; + /// If we goes back, than it's not eof + is_eof = false; + /// We can not use the result of the current asynchronous request. skip(); } diff --git a/dbms/src/IO/tests/gtest_aio_seek_back_after_eof.cpp b/dbms/src/IO/tests/gtest_aio_seek_back_after_eof.cpp new file mode 100644 index 00000000000..66e4a1e5a6d --- /dev/null +++ b/dbms/src/IO/tests/gtest_aio_seek_back_after_eof.cpp @@ -0,0 +1,71 @@ +#pragma GCC diagnostic ignored "-Wsign-compare" +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" +#pragma clang diagnostic ignored "-Wundef" +#endif +#include + +#include +#include +#include +#include + +namespace +{ +std::string createTmpFileForEOFtest() +{ + char pattern[] = "/tmp/fileXXXXXX"; + char * dir = ::mkdtemp(pattern); + return std::string(dir) + "/foo"; +} + +void prepare_for_eof(std::string & filename, std::string & buf) +{ + static const std::string symbols = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; + + filename = createTmpFileForEOFtest(); + + size_t n = 10 * DEFAULT_AIO_FILE_BLOCK_SIZE; + buf.reserve(n); + + for (size_t i = 0; i < n; ++i) + buf += symbols[i % symbols.length()]; + + std::ofstream out(filename.c_str()); + out << buf; +} + + +} +TEST(ReadBufferAIOTest, TestReadAfterAIO) +{ + using namespace DB; + std::string data; + std::string file_path; + prepare_for_eof(file_path, data); + ReadBufferAIO testbuf(file_path); + + std::string newdata; + newdata.resize(data.length()); + + size_t total_read = testbuf.read(newdata.data(), newdata.length()); + EXPECT_EQ(total_read, data.length()); + EXPECT_TRUE(testbuf.eof()); + + + testbuf.seek(data.length() - 100); + + std::string smalldata; + smalldata.resize(100); + size_t read_after_eof = testbuf.read(smalldata.data(), smalldata.size()); + EXPECT_EQ(read_after_eof, 100); + EXPECT_TRUE(testbuf.eof()); + + + testbuf.seek(0); + std::string repeatdata; + repeatdata.resize(data.length()); + size_t read_after_eof_big = testbuf.read(repeatdata.data(), repeatdata.size()); + EXPECT_EQ(read_after_eof_big, data.length()); + EXPECT_TRUE(testbuf.eof()); +} From 804a7612c20e4e4bf9f59e84020f8f3972b6fd03 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 3 Apr 2019 17:28:39 +0300 Subject: [PATCH 071/102] Add comment --- dbms/src/IO/ReadBufferFromFileBase.h | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/IO/ReadBufferFromFileBase.h b/dbms/src/IO/ReadBufferFromFileBase.h index 461a91102d9..1fddd0c87cb 100644 --- a/dbms/src/IO/ReadBufferFromFileBase.h +++ b/dbms/src/IO/ReadBufferFromFileBase.h @@ -43,6 +43,7 @@ protected: ProfileCallback profile_callback; clockid_t clock_type; + /// Children implementation should be able to seek backwards virtual off_t doSeek(off_t off, int whence) = 0; }; From 5a6c4bf52658615b3355c9b625e157644af3b657 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 3 Apr 2019 17:34:46 +0300 Subject: [PATCH 072/102] Better comment --- dbms/src/IO/ReadBufferAIO.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/IO/ReadBufferAIO.cpp b/dbms/src/IO/ReadBufferAIO.cpp index a00ff326d5a..f47e04bff75 100644 --- a/dbms/src/IO/ReadBufferAIO.cpp +++ b/dbms/src/IO/ReadBufferAIO.cpp @@ -187,7 +187,7 @@ off_t ReadBufferAIO::doSeek(off_t off, int whence) pos = working_buffer.end(); first_unread_pos_in_file = new_pos_in_file; - /// If we goes back, than it's not eof + /// If we go back, than it's not eof is_eof = false; /// We can not use the result of the current asynchronous request. From 6b49900e1a22ac5110915855f858de68455cbff5 Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 3 Apr 2019 19:06:05 +0300 Subject: [PATCH 073/102] Asterisks for multiple joins CLICKHOUSE-4372 --- .../Interpreters/InterpreterSelectQuery.cpp | 2 +- .../JoinToSubqueryTransformVisitor.cpp | 128 +++++++++++++++++- .../JoinToSubqueryTransformVisitor.h | 2 + .../00854_multiple_join_asterisks.reference | 5 + .../00854_multiple_join_asterisks.sql | 9 ++ 5 files changed, 139 insertions(+), 7 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00854_multiple_join_asterisks.reference create mode 100644 dbms/tests/queries/0_stateless/00854_multiple_join_asterisks.sql diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 465a71d8801..8eb8c2d7a22 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -203,7 +203,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (settings.allow_experimental_multiple_joins_emulation) { - JoinToSubqueryTransformVisitor::Data join_to_subs_data; + JoinToSubqueryTransformVisitor::Data join_to_subs_data{context}; JoinToSubqueryTransformVisitor(join_to_subs_data).visit(query_ptr); } diff --git a/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index a99e79b1120..22deeabfcc1 100644 --- a/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -1,8 +1,10 @@ #include +#include #include #include #include #include +#include #include #include #include @@ -22,11 +24,113 @@ namespace ErrorCodes extern const int TOO_DEEP_AST; extern const int AMBIGUOUS_COLUMN_NAME; extern const int NOT_IMPLEMENTED; + extern const int UNKNOWN_IDENTIFIER; } +NamesAndTypesList getNamesAndTypeListFromTableExpression(const ASTTableExpression & table_expression, const Context & context); + namespace { +/// Replace asterisks in select_expression_list with column identifiers +struct ExtractAsterisksMatcher +{ + using Visitor = InDepthNodeVisitor; + + struct Data + { + std::unordered_map table_columns; + std::shared_ptr new_select_expression_list; + + Data(const Context & context, const std::vector & table_expressions) + { + for (const auto & expr : table_expressions) + { + if (expr->subquery) + { + table_columns.clear(); + break; + } + + String table_name = DatabaseAndTableWithAlias(*expr, context.getCurrentDatabase()).getQualifiedNamePrefix(false); + NamesAndTypesList columns = getNamesAndTypeListFromTableExpression(*expr, context); + table_columns.emplace(std::move(table_name), std::move(columns)); + } + } + }; + + static bool needChildVisit(ASTPtr &, const ASTPtr &) { return false; } + + static void visit(ASTPtr & ast, Data & data) + { + if (auto * t = ast->as()) + visit(*t, ast, data); + if (auto * t = ast->as()) + visit(*t, ast, data); + } + + static void visit(ASTSelectQuery & node, ASTPtr &, Data & data) + { + if (data.table_columns.empty()) + return; + + Visitor(data).visit(node.select_expression_list); + if (!data.new_select_expression_list) + return; + + size_t pos = 0; + for (; pos < node.children.size(); ++pos) + if (node.children[pos].get() == node.select_expression_list.get()) + break; + if (pos == node.children.size()) + throw Exception("No select expressions list in select", ErrorCodes::NOT_IMPLEMENTED); + + node.select_expression_list = data.new_select_expression_list; + node.children[pos] = node.select_expression_list; + } + + static void visit(ASTExpressionList & node, ASTPtr &, Data & data) + { + bool has_asterisks = false; + data.new_select_expression_list = std::make_shared(); + data.new_select_expression_list->children.reserve(node.children.size()); + + for (auto & child : node.children) + { + if (child->as()) + { + has_asterisks = true; + + for (auto & pr : data.table_columns) + for (const auto & column : pr.second) + data.new_select_expression_list->children.push_back( + std::make_shared(std::vector{pr.first, column.name})); + } + else if (child->as()) + { + has_asterisks = true; + + if (child->children.size() != 1) + throw Exception("Logical error: qualified asterisk must have exactly one child", ErrorCodes::LOGICAL_ERROR); + ASTIdentifier & identifier = child->children[0]->as(); + + auto it = data.table_columns.find(identifier.name); + if (it == data.table_columns.end()) + throw Exception("Unknown qualified identifier: " + identifier.name, ErrorCodes::UNKNOWN_IDENTIFIER); + + for (const auto & column : it->second) + data.new_select_expression_list->children.push_back( + std::make_shared(std::vector{it->first, column.name})); + } + else + data.new_select_expression_list->children.push_back(child); + } + + if (!has_asterisks) + data.new_select_expression_list.reset(); + } +}; + /// Find columns with aliases to push them into rewritten subselects. /// Normalize table aliases: table_name.column_name -> table_alias.column_name /// Make aliases maps (alias -> column_name, column_name -> alias) @@ -41,7 +145,7 @@ struct ColumnAliasesMatcher std::vector> compound_identifiers; std::set allowed_long_names; /// original names allowed as aliases '--t.x as t.x' (select expressions only). - Data(std::vector && tables_) + Data(const std::vector && tables_) : tables(tables_) , public_names(false) {} @@ -101,7 +205,7 @@ struct ColumnAliasesMatcher visit(*t, ast, data); if (ast->as() || ast->as()) - throw Exception("Multiple JOIN do not support asterisks yet", ErrorCodes::NOT_IMPLEMENTED); + throw Exception("Multiple JOIN do not support asterisks for complex queries yet", ErrorCodes::NOT_IMPLEMENTED); } static void visit(ASTIdentifier & node, ASTPtr &, Data & data) @@ -190,7 +294,7 @@ struct RewriteTablesVisitorData } }; -bool needRewrite(ASTSelectQuery & select) +bool needRewrite(ASTSelectQuery & select, std::vector & table_expressions) { if (!select.tables) return false; @@ -203,9 +307,16 @@ bool needRewrite(ASTSelectQuery & select) if (num_tables <= 2) return false; - for (size_t i = 1; i < tables->children.size(); ++i) + table_expressions.reserve(num_tables); + for (size_t i = 0; i < num_tables; ++i) { const auto * table = tables->children[i]->as(); + if (table && table->table_expression) + if (const auto * expression = table->table_expression->as()) + table_expressions.push_back(expression); + if (!i) + continue; + if (!table || !table->table_join) throw Exception("Multiple JOIN expects joined tables", ErrorCodes::LOGICAL_ERROR); @@ -223,6 +334,7 @@ bool needRewrite(ASTSelectQuery & select) using RewriteMatcher = OneTypeMatcher; using RewriteVisitor = InDepthNodeVisitor; +using ExtractAsterisksVisitor = ExtractAsterisksMatcher::Visitor; using ColumnAliasesVisitor = InDepthNodeVisitor; using AppendSemanticMatcher = OneTypeMatcher; using AppendSemanticVisitor = InDepthNodeVisitor; @@ -236,13 +348,17 @@ void JoinToSubqueryTransformMatcher::visit(ASTPtr & ast, Data & data) visit(*t, ast, data); } -void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr &, Data & data) +void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr & ast, Data & data) { using RevertedAliases = AsteriskSemantic::RevertedAliases; - if (!needRewrite(select)) + std::vector table_expressions; + if (!needRewrite(select, table_expressions)) return; + ExtractAsterisksVisitor::Data asterisks_data(data.context, table_expressions); + ExtractAsterisksVisitor(asterisks_data).visit(ast); + ColumnAliasesVisitor::Data aliases_data(getDatabaseAndTables(select, "")); if (select.select_expression_list) { diff --git a/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.h b/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.h index f030131497c..7b325a7cff2 100644 --- a/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.h +++ b/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.h @@ -6,6 +6,7 @@ namespace DB { class ASTSelectQuery; +class Context; /// AST transformer. It replaces multiple joins to (subselect + join) track. /// 'select * from t1 join t2 on ... join t3 on ... join t4 on ...' would be rewriten with @@ -15,6 +16,7 @@ class JoinToSubqueryTransformMatcher public: struct Data { + const Context & context; bool done = false; }; diff --git a/dbms/tests/queries/0_stateless/00854_multiple_join_asterisks.reference b/dbms/tests/queries/0_stateless/00854_multiple_join_asterisks.reference new file mode 100644 index 00000000000..9a2733a6d15 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00854_multiple_join_asterisks.reference @@ -0,0 +1,5 @@ +0 0 0 +0 0 0 +0 +0 0 +0 0 0 diff --git a/dbms/tests/queries/0_stateless/00854_multiple_join_asterisks.sql b/dbms/tests/queries/0_stateless/00854_multiple_join_asterisks.sql new file mode 100644 index 00000000000..3697a957c8a --- /dev/null +++ b/dbms/tests/queries/0_stateless/00854_multiple_join_asterisks.sql @@ -0,0 +1,9 @@ +select t1.dummy, t2.dummy, t3.dummy from system.one t1 join system.one t2 on t1.dummy = t2.dummy join system.one t3 ON t1.dummy = t3.dummy; +select * from system.one t1 join system.one t2 on t1.dummy = t2.dummy join system.one t3 ON t1.dummy = t3.dummy; +select t1.* from system.one t1 join system.one t2 on t1.dummy = t2.dummy join system.one t3 ON t1.dummy = t3.dummy; +select t2.*, t3.* from system.one t1 join system.one t2 on t1.dummy = t2.dummy join system.one t3 ON t1.dummy = t3.dummy; +select t1.dummy, t2.*, t3.dummy from system.one t1 join system.one t2 on t1.dummy = t2.dummy join system.one t3 ON t1.dummy = t3.dummy; + +select t1.dummy, t2.*, t3.dummy from (select * from system.one) t1 +join system.one t2 on t1.dummy = t2.dummy +join system.one t3 ON t1.dummy = t3.dummy; -- { serverError 48 } From 5295b89fd97b3944391fe0865b266895abed54e1 Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 3 Apr 2019 20:25:58 +0300 Subject: [PATCH 074/102] fix test expectation --- .../queries/0_stateless/00820_multiple_joins.reference | 8 ++++++++ dbms/tests/queries/0_stateless/00820_multiple_joins.sql | 6 ++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00820_multiple_joins.reference b/dbms/tests/queries/0_stateless/00820_multiple_joins.reference index 2e7d8660562..335c17933ec 100644 --- a/dbms/tests/queries/0_stateless/00820_multiple_joins.reference +++ b/dbms/tests/queries/0_stateless/00820_multiple_joins.reference @@ -32,6 +32,14 @@ 6 6 60 60 12 12 120 120 18 18 180 180 +0 0 0 0 0 +6 6 60 60 600 +12 12 120 120 1200 +18 18 180 180 1800 +0 0 0 0 0 +60 600 6 60 6 +120 1200 12 120 12 +180 1800 18 180 18 0 0 0 0 0 0 0 6 6 60 60 66 66 120 12 12 120 120 132 132 240 diff --git a/dbms/tests/queries/0_stateless/00820_multiple_joins.sql b/dbms/tests/queries/0_stateless/00820_multiple_joins.sql index c19f4467934..628a3d9fd8c 100644 --- a/dbms/tests/queries/0_stateless/00820_multiple_joins.sql +++ b/dbms/tests/queries/0_stateless/00820_multiple_joins.sql @@ -57,17 +57,15 @@ from table1 as t1 join table2 as t2 on table1.a = table2.a join table3 as t3 on table2.b = table3.b; --- TODO select t1.*, t2.*, t3.* from table1 as t1 join table2 as t2 on table1.a = table2.a -join table3 as t3 on table2.b = table3.b; -- { serverError 48 } +join table3 as t3 on table2.b = table3.b; --- TODO select * from table1 as t1 join table2 as t2 on t1.a = t2.a -join table3 as t3 on t2.b = t3.b; -- { serverError 48 } +join table3 as t3 on t2.b = t3.b; select t1.a as t1_a, t2.a as t2_a, t2.b as t2_b, t3.b as t3_b, (t1.a + table2.b) as t1_t2_x, (table1.a + table3.b) as t1_t3_x, (t2.b + t3.b) as t2_t3_x From ea53a0a85df3a2f78be7cc9b24c8672828c1961b Mon Sep 17 00:00:00 2001 From: Danila Kutenin Date: Thu, 4 Apr 2019 09:23:12 +0300 Subject: [PATCH 075/102] Clang build is too annoying --- dbms/src/Common/IFactoryWithAliases.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Common/IFactoryWithAliases.h b/dbms/src/Common/IFactoryWithAliases.h index 476c0251be9..64703e51082 100644 --- a/dbms/src/Common/IFactoryWithAliases.h +++ b/dbms/src/Common/IFactoryWithAliases.h @@ -106,7 +106,7 @@ public: return aliases.count(name) || case_insensitive_aliases.count(name); } - virtual ~IFactoryWithAliases() {} + virtual ~IFactoryWithAliases() override {} private: using InnerMap = std::unordered_map; // name -> creator From 236f2a4354587ad122fdfce67dc5c53bb8bc78f7 Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 4 Apr 2019 12:22:54 +0300 Subject: [PATCH 076/102] clickhouse-copier - fix another segfault (#4900) --- dbms/programs/copier/ClusterCopier.cpp | 9 +++-- .../test_cluster_copier/task_no_arg.xml | 39 +++++++++++++++++++ .../integration/test_cluster_copier/test.py | 26 +++++++++++++ 3 files changed, 71 insertions(+), 3 deletions(-) create mode 100644 dbms/tests/integration/test_cluster_copier/task_no_arg.xml diff --git a/dbms/programs/copier/ClusterCopier.cpp b/dbms/programs/copier/ClusterCopier.cpp index 90cace9aa32..e478c591d16 100644 --- a/dbms/programs/copier/ClusterCopier.cpp +++ b/dbms/programs/copier/ClusterCopier.cpp @@ -500,9 +500,6 @@ static ASTPtr extractPartitionKey(const ASTPtr & storage_ast) ErrorCodes::BAD_ARGUMENTS); } - ASTPtr arguments_ast = engine.arguments->clone(); - ASTs & arguments = arguments_ast->children; - if (isExtendedDefinitionStorage(storage_ast)) { if (storage.partition_by) @@ -516,6 +513,12 @@ static ASTPtr extractPartitionKey(const ASTPtr & storage_ast) bool is_replicated = startsWith(engine.name, "Replicated"); size_t min_args = is_replicated ? 3 : 1; + if (!engine.arguments) + throw Exception("Expected arguments in " + storage_str, ErrorCodes::BAD_ARGUMENTS); + + ASTPtr arguments_ast = engine.arguments->clone(); + ASTs & arguments = arguments_ast->children; + if (arguments.size() < min_args) throw Exception("Expected at least " + toString(min_args) + " arguments in " + storage_str, ErrorCodes::BAD_ARGUMENTS); diff --git a/dbms/tests/integration/test_cluster_copier/task_no_arg.xml b/dbms/tests/integration/test_cluster_copier/task_no_arg.xml new file mode 100644 index 00000000000..d9d49011f3f --- /dev/null +++ b/dbms/tests/integration/test_cluster_copier/task_no_arg.xml @@ -0,0 +1,39 @@ + + + + + 1 + + s0_0_0 + 9000 + + + + + + + 1 + + s1_1_0 + 9000 + + + + + + 1 + + + + source_cluster + default + copier_test1 + + default_cluster + default + copier_test1_1 + ENGINE = MergeTree PARTITION BY date ORDER BY date + rand() + + + diff --git a/dbms/tests/integration/test_cluster_copier/test.py b/dbms/tests/integration/test_cluster_copier/test.py index 31804c184f8..c223a73f59e 100644 --- a/dbms/tests/integration/test_cluster_copier/test.py +++ b/dbms/tests/integration/test_cluster_copier/test.py @@ -167,6 +167,7 @@ class Task_test_block_size: ddl_check_query(instance, "DROP TABLE test_block_size ON CLUSTER shard_0_0", 2) ddl_check_query(instance, "DROP TABLE test_block_size ON CLUSTER cluster1") + class Task_no_index: def __init__(self, cluster): @@ -190,6 +191,29 @@ class Task_no_index: instance.query("DROP TABLE ontime22") +class Task_no_arg: + + def __init__(self, cluster): + self.cluster = cluster + self.zk_task_path="/clickhouse-copier/task_no_arg" + self.copier_task_config = open(os.path.join(CURRENT_TEST_DIR, 'task_no_arg.xml'), 'r').read() + self.rows = 1000000 + + + def start(self): + instance = cluster.instances['s0_0_0'] + instance.query("create table copier_test1 (date Date, id UInt32) engine = MergeTree PARTITION BY date ORDER BY date SETTINGS index_granularity = 8192") + instance.query("insert into copier_test1 values ('2016-01-01', 10);") + + + def check(self): + assert TSV(self.cluster.instances['s1_1_0'].query("SELECT date FROM copier_test1_1")) == TSV("2016-01-01\n") + instance = cluster.instances['s0_0_0'] + instance.query("DROP TABLE copier_test1") + instance = cluster.instances['s1_1_0'] + instance.query("DROP TABLE copier_test1_1") + + def execute_task(task, cmd_options): task.start() @@ -254,6 +278,8 @@ def test_block_size(started_cluster): def test_no_index(started_cluster): execute_task(Task_no_index(started_cluster), []) +def test_no_arg(started_cluster): + execute_task(Task_no_arg(started_cluster), []) if __name__ == '__main__': with contextmanager(started_cluster)() as cluster: From 6e2b6fc27b4c779a4c1c303c338b18a9076d4c69 Mon Sep 17 00:00:00 2001 From: Chen Yufei Date: Thu, 4 Apr 2019 18:23:15 +0800 Subject: [PATCH 077/102] Add logger config for clickhouse-copier doc. (#4908) --- docs/en/operations/utils/clickhouse-copier.md | 6 ++++++ docs/ru/operations/utils/clickhouse-copier.md | 6 ++++++ docs/zh/operations/utils/clickhouse-copier.md | 6 ++++++ 3 files changed, 18 insertions(+) diff --git a/docs/en/operations/utils/clickhouse-copier.md b/docs/en/operations/utils/clickhouse-copier.md index bb2666a05cc..fac374b4790 100644 --- a/docs/en/operations/utils/clickhouse-copier.md +++ b/docs/en/operations/utils/clickhouse-copier.md @@ -38,6 +38,12 @@ Parameters: ```xml + + trace + 100M + 3 + + 127.0.0.1 diff --git a/docs/ru/operations/utils/clickhouse-copier.md b/docs/ru/operations/utils/clickhouse-copier.md index 1959a768def..0c852450457 100644 --- a/docs/ru/operations/utils/clickhouse-copier.md +++ b/docs/ru/operations/utils/clickhouse-copier.md @@ -37,6 +37,12 @@ clickhouse-copier copier --daemon --config zookeeper.xml --task-path /task/path ```xml + + trace + 100M + 3 + + 127.0.0.1 diff --git a/docs/zh/operations/utils/clickhouse-copier.md b/docs/zh/operations/utils/clickhouse-copier.md index bb2666a05cc..fac374b4790 100644 --- a/docs/zh/operations/utils/clickhouse-copier.md +++ b/docs/zh/operations/utils/clickhouse-copier.md @@ -38,6 +38,12 @@ Parameters: ```xml + + trace + 100M + 3 + + 127.0.0.1 From 30610f8a6e0b74305b6e2fe6cc4e671463bd1d20 Mon Sep 17 00:00:00 2001 From: Stefan Thies Date: Thu, 4 Apr 2019 12:44:20 +0200 Subject: [PATCH 078/102] add Sematext clickhouse integrations (#4887) --- docs/en/interfaces/third-party/integrations.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/en/interfaces/third-party/integrations.md b/docs/en/interfaces/third-party/integrations.md index 76833a869f6..c9a20c58e88 100644 --- a/docs/en/interfaces/third-party/integrations.md +++ b/docs/en/interfaces/third-party/integrations.md @@ -35,11 +35,16 @@ - [clickhouse_exporter](https://github.com/hot-wifi/clickhouse_exporter) (uses [Go client](https://github.com/kshvakov/clickhouse/)) - [Nagios](https://www.nagios.org/) - [check_clickhouse](https://github.com/exogroup/check_clickhouse/) + - [Sematext](https://sematext.com/) + - [Monitoring ClickHouse with Sematext](https://sematext.com/blog/clickhouse-monitoring-sematext/) + - [clickhouse integration](https://github.com/sematext/sematext-agent-integrations/tree/master/clickhouse) - Logging - [rsyslog](https://www.rsyslog.com/) - [omclickhouse](https://www.rsyslog.com/doc/master/configuration/modules/omclickhouse.html) - [fluentd](https://www.fluentd.org) - [loghouse](https://github.com/flant/loghouse) (for [Kubernetes](https://kubernetes.io)) + - [logagent](https://www.sematext.com/logagent) + - [logagent output-plugin-clickhouse](https://sematext.com/docs/logagent/output-plugin-clickhouse/) ## Programming Language Ecosystems From 7374083c5e95e1e5248e4255a08380c25eb87ff9 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 4 Apr 2019 13:49:01 +0300 Subject: [PATCH 079/102] docs fixes after #4887 --- docs/en/interfaces/third-party/integrations.md | 3 +-- docs/fa/interfaces/third-party/integrations.md | 4 ++++ docs/ru/interfaces/third-party/integrations.md | 4 ++++ docs/zh/interfaces/third-party/integrations.md | 4 ++++ 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/docs/en/interfaces/third-party/integrations.md b/docs/en/interfaces/third-party/integrations.md index c9a20c58e88..511c61116d2 100644 --- a/docs/en/interfaces/third-party/integrations.md +++ b/docs/en/interfaces/third-party/integrations.md @@ -36,8 +36,7 @@ - [Nagios](https://www.nagios.org/) - [check_clickhouse](https://github.com/exogroup/check_clickhouse/) - [Sematext](https://sematext.com/) - - [Monitoring ClickHouse with Sematext](https://sematext.com/blog/clickhouse-monitoring-sematext/) - - [clickhouse integration](https://github.com/sematext/sematext-agent-integrations/tree/master/clickhouse) + - [clickhouse integration](https://github.com/sematext/sematext-agent-integrations/tree/master/clickhouse) - Logging - [rsyslog](https://www.rsyslog.com/) - [omclickhouse](https://www.rsyslog.com/doc/master/configuration/modules/omclickhouse.html) diff --git a/docs/fa/interfaces/third-party/integrations.md b/docs/fa/interfaces/third-party/integrations.md index d0b2e041799..f3e3b9aa20a 100644 --- a/docs/fa/interfaces/third-party/integrations.md +++ b/docs/fa/interfaces/third-party/integrations.md @@ -36,11 +36,15 @@ - [PromHouse](https://github.com/Percona-Lab/PromHouse) - [Nagios](https://www.nagios.org/) - [check_clickhouse](https://github.com/exogroup/check_clickhouse/) + - [Sematext](https://sematext.com/) + - [clickhouse ادغام](https://github.com/sematext/sematext-agent-integrations/tree/master/clickhouse) - ثبت نام - [rsyslog](https://www.rsyslog.com/) - [omclickhouse](https://www.rsyslog.com/doc/master/configuration/modules/omclickhouse.html) - [fluentd](https://www.fluentd.org) - [loghouse](https://github.com/flant/loghouse) (برای [Kubernetes](https://kubernetes.io)) + - [logagent](https://www.sematext.com/logagent) + - [logagent output-plugin-clickhouse](https://sematext.com/docs/logagent/output-plugin-clickhouse/) ## اکوسیستم زبان برنامه نویسی diff --git a/docs/ru/interfaces/third-party/integrations.md b/docs/ru/interfaces/third-party/integrations.md index ee9864a16b7..77db016ba0e 100644 --- a/docs/ru/interfaces/third-party/integrations.md +++ b/docs/ru/interfaces/third-party/integrations.md @@ -34,11 +34,15 @@ - [clickhouse_exporter](https://github.com/hot-wifi/clickhouse_exporter) (использует [Go client](https://github.com/kshvakov/clickhouse/)) - [Nagios](https://www.nagios.org/) - [check_clickhouse](https://github.com/exogroup/check_clickhouse/) + - [Sematext](https://sematext.com/) + - [clickhouse интеграция](https://github.com/sematext/sematext-agent-integrations/tree/master/clickhouse) - Логирование - [rsyslog](https://www.rsyslog.com/) - [omclickhouse](https://www.rsyslog.com/doc/master/configuration/modules/omclickhouse.html) - [fluentd](https://www.fluentd.org) - [loghouse](https://github.com/flant/loghouse) (для [Kubernetes](https://kubernetes.io)) + - [logagent](https://www.sematext.com/logagent) + - [logagent output-plugin-clickhouse](https://sematext.com/docs/logagent/output-plugin-clickhouse/) ## Экосистемы вокруг языков программирования diff --git a/docs/zh/interfaces/third-party/integrations.md b/docs/zh/interfaces/third-party/integrations.md index 6c77f6bb1e7..569c2d4824b 100644 --- a/docs/zh/interfaces/third-party/integrations.md +++ b/docs/zh/interfaces/third-party/integrations.md @@ -33,11 +33,15 @@ - [PromHouse](https://github.com/Percona-Lab/PromHouse) - [Nagios](https://www.nagios.org/) - [check_clickhouse](https://github.com/exogroup/check_clickhouse/) + - [Sematext](https://sematext.com/) + - [clickhouse积分](https://github.com/sematext/sematext-agent-integrations/tree/master/clickhouse) - 记录 - [rsyslog](https://www.rsyslog.com/) - [omclickhouse](https://www.rsyslog.com/doc/master/configuration/modules/omclickhouse.html) - [fluentd](https://www.fluentd.org) - [loghouse](https://github.com/flant/loghouse) (对于 [Kubernetes](https://kubernetes.io)) + - [logagent](https://www.sematext.com/logagent) + - [logagent output-plugin-clickhouse](https://sematext.com/docs/logagent/output-plugin-clickhouse/) ## 编程语言生态系统 From 70dbeaa88e59f7f6403ff252f7d58d100d5aa110 Mon Sep 17 00:00:00 2001 From: ogorbacheva Date: Thu, 4 Apr 2019 15:07:10 +0300 Subject: [PATCH 080/102] Doc fix: Editing README.md (#4892) --- docs/README.md | 60 +++++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/docs/README.md b/docs/README.md index 60f452cc488..5432b3e1824 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,56 +1,56 @@ -# How to contribute to ClickHouse documentation? +# How to Contribute to ClickHouse Documentation -Basically ClickHouse uses "documentation as code" approach, so you can edit Markdown files in this folder from GitHub web interface or fork ClickHouse repository, edit, commit, push and open pull request. +ClickHouse uses the "documentation as code" approach, so you can edit Markdown files in this folder from the GitHub web interface. Alternatively, fork the ClickHouse repository, edit, commit, push, and open a pull request. -At the moment documentation is bilingual in English and Russian, so it's better to try keeping languages in sync if you can, but it's not strictly required as there are people watching over this. If you add new article, you should also add it to `toc_{en,ru,zh,fa}.yaml` files with pages index. +At the moment documentation is bilingual in English and Russian. Try to keep all languages in sync if you can, but this is not strictly required. There are people who are responsible for monitoring language versions and syncing them. If you add a new article, you should also add it to `toc_{en,ru,zh,fa}.yaml` files with the pages index. -Master branch is then asynchronously published to ClickHouse official website: +The master branch is then asynchronously published to the ClickHouse official website: * In English: https://clickhouse.yandex/docs/en/ * In Russian: https://clickhouse.yandex/docs/ru/ * In Chinese: https://clickhouse.yandex/docs/zh/ * In Farsi: https://clickhouse.yandex/docs/fa/ -Infrastructure to build Markdown to documentation website resides in [tools](tools) folder, it has it's own [README.md](tools/README.md) with more details. +The infrastructure to build Markdown for publishing on the documentation website resides in the [tools](tools) folder. It has its own [README.md](tools/README.md) file with more details. -# How to write content for ClickHouse documentation? +# How to Write Content for ClickHouse Documentation -## Target audience +## Target Audience -When you write pretty much any text, first thing you should think about: who exactly will read it and in which terms it is better to "talk" with them. +When you write pretty much any text, the first thing you should think about is who will read it and which terms you should use for communicating with them. -ClickHouse can be directly used by all sorts of either analysts and engineers, so you should only basic technical background of reader when writing content for generic parts of documentation, like query language, tutorials or overviews. Though it is ok for articles describing ClickHouse internals, guides for operating ClickHouse clusters, contributing to C++ code and other similar topics. +ClickHouse can be directly used by all sorts of analysts and engineers. For generic parts of documentation (like the query language, tutorials or overviews), assume that the reader only has a basic technical background. For more technical sections (like articles that describe ClickHouse internals, guides for operating ClickHouse clusters, or rules for contributing to C++ code), you can use technical language and concepts. -## Specific recommendations +## Specific Recommendations -* Documentation should make sense when you read it roughly from start to end. So when choosing a place for new content try to minimize referring to stuff that will be described later on. -* If documentation section consists of many similar items, like functions or operators, try to order them from more generic (usable by wider audience) to more specific (to some usecases or application types). If several items are intended to be mostly used together, keep them together in documentation too. -* Try to avoid slang, use the most common and specific terms for everythings. If some terms are used as synonyms, state this explicitly. -* All functionality descriptions should be accompanied by examples. At least very basic ones, but real world examples are welcome too. -* Debatable topics like politics, religion, racial and so on are strictly prohibited in either documentation, examples, comments and code. -* People tend to get temporary stuck with some specific words or phrases, usually auxiliary, for a short period of time. So they get repeated over and over in small part of content, which looks weird when reading. It is easy to fix this by reading your text again before publishing, also you can use this opportunity to fix mistypes and lost punctuation. -* Try to avoid naming the reader in text, it is not strictly prohibited though. +* Documentation should make sense when you read it through from beginning to end. If you add new content, try to place it where the necessary concepts have already been explained. +* If a documentation section consists of many similar items, like functions or operators, try to order them from more generic (usable by a wide audience) to more specific (for specific use cases or application types). If several items are intended to be mostly used together, group them together in the documentation. +* Try to avoid slang. Use the most common and specific terms possible for everything. If some terms are used as synonyms, state this explicitly. +* All descriptions of functionality should be accompanied by examples. Basic examples are acceptable, but real world examples are welcome, too. +* Sensitive topics like politics, religion, race, and so on are strictly prohibited in documentation, examples, comments, and code. +* Proofread your text before publishing. Look for typos, missing punctuation, or repetitions that could be avoided. +* Try to avoid addressing the reader directly, although this is not strictly prohibited. -# How to start translation to new language +# How to Add a New Language -1. Create new docs subfolder named with [ISO-639-1 language code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) -2. Add Markdown files with some translation, mirroring the folder structure of other languages -3. Commit and open pull request with new content +1. Create a new docs subfolder named using the [ISO-639-1 language code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes). +2. Add Markdown files with the translation, mirroring the folder structure of other languages. +3. Commit and open a pull request with the new content. -Some additional configuration has to be done to actually make new language live on official website, but it's not automated/documented yet, so we'll do it on our own after pull request with content is merged. +Some additional configuration has to be done to actually make a new language live on the official website, but it's not automated or documented yet, so we'll do it on our own after the pull request with the content is merged. -# Quick cheatsheet on used Markdown dialect +# Markdown Dialect Cheatsheet -* Headers on separate line starting with `# `, `## ` or `### `. +* Headings are on a separate line starting with `# `, `## ` or `### `. * Bold is in `**asterisks**` or `__underlines__`. * Links `[anchor](http://...)`, images `![with exclamation sign](http://...jpeg)`. -* Lists are on lines starting with `* unordered` or `1. ordered`, but there should be empty line before first list item. Sub-lists must be indented with 4 spaces. -* Inline piece of code is `in backticks`. -* Multiline code block are ```in triple backtick quotes ```. -* Brightly highlighted block of text starts with `!!! info "Header"`, on next line 4 spaces and content. Instead of `info` can be `warning`. -* Hide block to be opened by click: `
Header hidden content
`. +* Lists are on lines starting with `* unordered` or `1. ordered`, but there should be an empty line before the first list item. Sub-lists must be indented with 4 spaces. +* Inline code fragments are `in backticks`. +* Multiline code blocks are ```in triple backtick quotes ```. +* Brightly highlighted text starts with `!!! info "Header"`, followed by 4 spaces on the next line and content. For a warning, replace `info` with `warning`. +* Hidden block that opens on click: `
Header hidden content
`. * Colored text: `text`. -* Additional anchor to be linked to: ``, for headers fully in English they are created automatically like `"FoO Bar" -> "foo-bar"`. +* Heading anchor to be linked to: `Title {#anchor-name}`. * Table: ``` | Header 1 | Header 2 | Header 3 | From e578020bd3c9e829709245c612c00075d8fa29f2 Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 4 Apr 2019 15:14:10 +0300 Subject: [PATCH 081/102] safe tables order in select * with multiple joins --- .../JoinToSubqueryTransformVisitor.cpp | 33 ++++++++++++------- .../00820_multiple_joins.reference | 20 ++++++----- .../0_stateless/00820_multiple_joins.sql | 6 ++-- 3 files changed, 37 insertions(+), 22 deletions(-) diff --git a/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index 22deeabfcc1..43680d7aa61 100644 --- a/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -33,30 +33,46 @@ namespace { /// Replace asterisks in select_expression_list with column identifiers -struct ExtractAsterisksMatcher +class ExtractAsterisksMatcher { +public: using Visitor = InDepthNodeVisitor; struct Data { std::unordered_map table_columns; + std::vector tables_order; std::shared_ptr new_select_expression_list; Data(const Context & context, const std::vector & table_expressions) { + tables_order.reserve(table_expressions.size()); for (const auto & expr : table_expressions) { if (expr->subquery) { table_columns.clear(); + tables_order.clear(); break; } String table_name = DatabaseAndTableWithAlias(*expr, context.getCurrentDatabase()).getQualifiedNamePrefix(false); NamesAndTypesList columns = getNamesAndTypeListFromTableExpression(*expr, context); + tables_order.push_back(table_name); table_columns.emplace(std::move(table_name), std::move(columns)); } } + + void addTableColumns(const String & table_name) + { + auto it = table_columns.find(table_name); + if (it == table_columns.end()) + throw Exception("Unknown qualified identifier: " + table_name, ErrorCodes::UNKNOWN_IDENTIFIER); + + for (const auto & column : it->second) + new_select_expression_list->children.push_back( + std::make_shared(std::vector{it->first, column.name})); + } }; static bool needChildVisit(ASTPtr &, const ASTPtr &) { return false; } @@ -69,6 +85,7 @@ struct ExtractAsterisksMatcher visit(*t, ast, data); } +private: static void visit(ASTSelectQuery & node, ASTPtr &, Data & data) { if (data.table_columns.empty()) @@ -101,10 +118,8 @@ struct ExtractAsterisksMatcher { has_asterisks = true; - for (auto & pr : data.table_columns) - for (const auto & column : pr.second) - data.new_select_expression_list->children.push_back( - std::make_shared(std::vector{pr.first, column.name})); + for (auto & table_name : data.tables_order) + data.addTableColumns(table_name); } else if (child->as()) { @@ -114,13 +129,7 @@ struct ExtractAsterisksMatcher throw Exception("Logical error: qualified asterisk must have exactly one child", ErrorCodes::LOGICAL_ERROR); ASTIdentifier & identifier = child->children[0]->as(); - auto it = data.table_columns.find(identifier.name); - if (it == data.table_columns.end()) - throw Exception("Unknown qualified identifier: " + identifier.name, ErrorCodes::UNKNOWN_IDENTIFIER); - - for (const auto & column : it->second) - data.new_select_expression_list->children.push_back( - std::make_shared(std::vector{it->first, column.name})); + data.addTableColumns(identifier.name); } else data.new_select_expression_list->children.push_back(child); diff --git a/dbms/tests/queries/0_stateless/00820_multiple_joins.reference b/dbms/tests/queries/0_stateless/00820_multiple_joins.reference index 335c17933ec..6d317230813 100644 --- a/dbms/tests/queries/0_stateless/00820_multiple_joins.reference +++ b/dbms/tests/queries/0_stateless/00820_multiple_joins.reference @@ -32,14 +32,18 @@ 6 6 60 60 12 12 120 120 18 18 180 180 -0 0 0 0 0 -6 6 60 60 600 -12 12 120 120 1200 -18 18 180 180 1800 -0 0 0 0 0 -60 600 6 60 6 -120 1200 12 120 12 -180 1800 18 180 18 +┌─t1.a─┬─t2.a─┬─t2.b─┬─t3.b─┬─t3.c─┐ +│ 0 │ 0 │ 0 │ 0 │ 0 │ +│ 6 │ 6 │ 60 │ 60 │ 600 │ +│ 12 │ 12 │ 120 │ 120 │ 1200 │ +│ 18 │ 18 │ 180 │ 180 │ 1800 │ +└──────┴──────┴──────┴──────┴──────┘ +┌─t1.a─┬─t2.a─┬─t2.b─┬─t3.b─┬─t3.c─┐ +│ 0 │ 0 │ 0 │ 0 │ 0 │ +│ 6 │ 6 │ 60 │ 60 │ 600 │ +│ 12 │ 12 │ 120 │ 120 │ 1200 │ +│ 18 │ 18 │ 180 │ 180 │ 1800 │ +└──────┴──────┴──────┴──────┴──────┘ 0 0 0 0 0 0 0 6 6 60 60 66 66 120 12 12 120 120 132 132 240 diff --git a/dbms/tests/queries/0_stateless/00820_multiple_joins.sql b/dbms/tests/queries/0_stateless/00820_multiple_joins.sql index 628a3d9fd8c..8588edc5641 100644 --- a/dbms/tests/queries/0_stateless/00820_multiple_joins.sql +++ b/dbms/tests/queries/0_stateless/00820_multiple_joins.sql @@ -60,12 +60,14 @@ join table3 as t3 on table2.b = table3.b; select t1.*, t2.*, t3.* from table1 as t1 join table2 as t2 on table1.a = table2.a -join table3 as t3 on table2.b = table3.b; +join table3 as t3 on table2.b = table3.b +FORMAT PrettyCompactNoEscapes; select * from table1 as t1 join table2 as t2 on t1.a = t2.a -join table3 as t3 on t2.b = t3.b; +join table3 as t3 on t2.b = t3.b +FORMAT PrettyCompactNoEscapes; select t1.a as t1_a, t2.a as t2_a, t2.b as t2_b, t3.b as t3_b, (t1.a + table2.b) as t1_t2_x, (table1.a + table3.b) as t1_t3_x, (t2.b + t3.b) as t2_t3_x From 60bc13e619952005b4afa3929366aa01476458e0 Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 4 Apr 2019 15:34:49 +0300 Subject: [PATCH 082/102] Version to end of exception --- dbms/src/Common/Exception.cpp | 11 ++++++----- dbms/tests/queries/0_stateless/00834_kill_mutation.sh | 5 ++--- .../00834_kill_mutation_replicated_zookeeper.sh | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/dbms/src/Common/Exception.cpp b/dbms/src/Common/Exception.cpp index 4bb34f99f54..e76031df993 100644 --- a/dbms/src/Common/Exception.cpp +++ b/dbms/src/Common/Exception.cpp @@ -77,14 +77,15 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded } catch (const Exception & e) { - stream << "(version " << VERSION_STRING << VERSION_OFFICIAL << ") " << getExceptionMessage(e, with_stacktrace, check_embedded_stacktrace); + stream << getExceptionMessage(e, with_stacktrace, check_embedded_stacktrace) << " (version " << VERSION_STRING << VERSION_OFFICIAL << ")"; } catch (const Poco::Exception & e) { try { - stream << "(version " << VERSION_STRING << VERSION_OFFICIAL << ") " << "Poco::Exception. Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code() - << ", e.displayText() = " << e.displayText(); + stream << "Poco::Exception. Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code() + << ", e.displayText() = " << e.displayText() + << " (version " << VERSION_STRING << VERSION_OFFICIAL << ")"; } catch (...) {} } @@ -98,7 +99,7 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded if (status) name += " (demangling status: " + toString(status) + ")"; - stream << "(version " << VERSION_STRING << VERSION_OFFICIAL << ") " << "std::exception. Code: " << ErrorCodes::STD_EXCEPTION << ", type: " << name << ", e.what() = " << e.what(); + stream << "std::exception. Code: " << ErrorCodes::STD_EXCEPTION << ", type: " << name << ", e.what() = " << e.what() << ", version = " << VERSION_STRING << VERSION_OFFICIAL; } catch (...) {} } @@ -112,7 +113,7 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded if (status) name += " (demangling status: " + toString(status) + ")"; - stream << "(version " << VERSION_STRING << VERSION_OFFICIAL << ") " << "Unknown exception. Code: " << ErrorCodes::UNKNOWN_EXCEPTION << ", type: " << name; + stream << "Unknown exception. Code: " << ErrorCodes::UNKNOWN_EXCEPTION << ", type: " << name << " (version " << VERSION_STRING << VERSION_OFFICIAL << ")"; } catch (...) {} } diff --git a/dbms/tests/queries/0_stateless/00834_kill_mutation.sh b/dbms/tests/queries/0_stateless/00834_kill_mutation.sh index cb48140a368..d70963db8e2 100755 --- a/dbms/tests/queries/0_stateless/00834_kill_mutation.sh +++ b/dbms/tests/queries/0_stateless/00834_kill_mutation.sh @@ -17,8 +17,7 @@ ${CLICKHOUSE_CLIENT} --query="SELECT '*** Create and kill a single invalid mutat ${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.kill_mutation DELETE WHERE toUInt32(s) = 1" sleep 0.1 - -${CLICKHOUSE_CLIENT} --query="SELECT mutation_id, latest_failed_part IN ('20000101_1_1_0', '20010101_2_2_0'), latest_fail_time != 0, substr(replaceRegexpOne(latest_fail_reason, '.version [0-9.]+. ', ''), 1, 8) FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation'" +${CLICKHOUSE_CLIENT} --query="SELECT mutation_id, latest_failed_part IN ('20000101_1_1_0', '20010101_2_2_0'), latest_fail_time != 0, substr(latest_fail_reason, 1, 8) FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation'" ${CLICKHOUSE_CLIENT} --query="KILL MUTATION WHERE database = 'test' AND table = 'kill_mutation'" @@ -30,7 +29,7 @@ ${CLICKHOUSE_CLIENT} --query="SELECT '*** Create and kill invalid mutation that ${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.kill_mutation DELETE WHERE toUInt32(s) = 1" ${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.kill_mutation DELETE WHERE x = 1" -${CLICKHOUSE_CLIENT} --query="SELECT mutation_id, latest_failed_part IN ('20000101_1_1_0', '20010101_2_2_0'), latest_fail_time != 0, substr(replaceRegexpOne(latest_fail_reason, '.version [0-9.]+. ', ''), 1, 8) FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation' AND mutation_id = 'mutation_4.txt'" +${CLICKHOUSE_CLIENT} --query="SELECT mutation_id, latest_failed_part IN ('20000101_1_1_0', '20010101_2_2_0'), latest_fail_time != 0, substr(latest_fail_reason, 1, 8) FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation' AND mutation_id = 'mutation_4.txt'" sleep 0.1 ${CLICKHOUSE_CLIENT} --query="KILL MUTATION WHERE database = 'test' AND table = 'kill_mutation' AND mutation_id = 'mutation_4.txt'" diff --git a/dbms/tests/queries/0_stateless/00834_kill_mutation_replicated_zookeeper.sh b/dbms/tests/queries/0_stateless/00834_kill_mutation_replicated_zookeeper.sh index 5fbc3f061d3..dfaa85f2f2b 100755 --- a/dbms/tests/queries/0_stateless/00834_kill_mutation_replicated_zookeeper.sh +++ b/dbms/tests/queries/0_stateless/00834_kill_mutation_replicated_zookeeper.sh @@ -20,7 +20,7 @@ ${CLICKHOUSE_CLIENT} --query="SELECT '*** Create and kill a single invalid mutat ${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.kill_mutation_r1 DELETE WHERE toUInt32(s) = 1" sleep 1 -${CLICKHOUSE_CLIENT} --query="SELECT mutation_id, latest_failed_part IN ('20000101_0_0_0', '20010101_0_0_0'), latest_fail_time != 0, substr(replaceRegexpOne(latest_fail_reason, '.version [0-9.]+. ', ''), 1, 8) FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation_r1'" +${CLICKHOUSE_CLIENT} --query="SELECT mutation_id, latest_failed_part IN ('20000101_0_0_0', '20010101_0_0_0'), latest_fail_time != 0, substr(latest_fail_reason, 1, 8) FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation_r1'" ${CLICKHOUSE_CLIENT} --query="KILL MUTATION WHERE database = 'test' AND table = 'kill_mutation_r1'" @@ -34,7 +34,7 @@ ${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.kill_mutation_r1 DELETE WHERE toU ${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.kill_mutation_r1 DELETE WHERE x = 1" sleep 1 -${CLICKHOUSE_CLIENT} --query="SELECT mutation_id, latest_failed_part IN ('20000101_0_0_0_1', '20010101_0_0_0_1'), latest_fail_time != 0, substr(replaceRegexpOne(latest_fail_reason, '.version [0-9.]+. ', ''), 1, 8) FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation_r1' AND mutation_id = '0000000001'" +${CLICKHOUSE_CLIENT} --query="SELECT mutation_id, latest_failed_part IN ('20000101_0_0_0_1', '20010101_0_0_0_1'), latest_fail_time != 0, substr(latest_fail_reason, 1, 8) FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation_r1' AND mutation_id = '0000000001'" ${CLICKHOUSE_CLIENT} --query="KILL MUTATION WHERE database = 'test' AND table = 'kill_mutation_r1' AND mutation_id = '0000000001'" From ee4925216028995202dddd5a173a18fbbb3b7088 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 4 Apr 2019 16:01:48 +0300 Subject: [PATCH 083/102] Add util for adjust block numbers in zookeper --- .../CMakeLists.txt | 2 + .../main.cpp | 135 ++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 utils/zookeeper-adjust-block-numbers-to-parts/CMakeLists.txt create mode 100644 utils/zookeeper-adjust-block-numbers-to-parts/main.cpp diff --git a/utils/zookeeper-adjust-block-numbers-to-parts/CMakeLists.txt b/utils/zookeeper-adjust-block-numbers-to-parts/CMakeLists.txt new file mode 100644 index 00000000000..d2357ec755d --- /dev/null +++ b/utils/zookeeper-adjust-block-numbers-to-parts/CMakeLists.txt @@ -0,0 +1,2 @@ +add_executable (zookeeper-adjust-block-numbers-to-parts main.cpp ${SRCS}) +target_link_libraries (zookeeper-adjust-block-numbers-to-parts PRIVATE dbms clickhouse_common_zookeeper ${Boost_PROGRAM_OPTIONS_LIBRARY}) diff --git a/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp b/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp new file mode 100644 index 00000000000..6ac06b07337 --- /dev/null +++ b/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp @@ -0,0 +1,135 @@ +#include +#include +#include +#include +#include + +#include +#include + +size_t getMaxBlockSizeForPartition(zkutil::ZooKeeper & zk, + const std::string & replica_path, + const std::string & partition_name, + const DB::MergeTreeDataFormatVersion & format_version) +{ + auto replicas_path = replica_path + "/replicas"; + auto replica_hosts = zk.getChildren(replicas_path); + size_t max_block_num = 0; + for (const auto & replica_host : replica_hosts) + { + auto parts = zk.getChildren(replicas_path + "/" + replica_host + "/parts"); + for (const auto & part : parts) + { + try { + auto info = DB::MergeTreePartInfo::fromPartName(part, format_version); + if (info.partition_id == partition_name) + max_block_num = std::max(info.max_block, max_block_num); + } + catch (const DB::Exception & ex) + { + std::cerr << "Ex on:" << ex.displayText() << std::endl; + } + } + } + return max_block_num; +} + +std::unordered_map getAllTablesBlockPaths(zkutil::ZooKeeper & zk, const std::string & root) +{ + std::unordered_map result; + auto shards = zk.getChildren(root); + for (const auto & shard : shards) + { + std::string shard_path = root + "/" + shard; + auto tables = zk.getChildren(shard_path); + for (auto table : tables) + { + std::cerr << "Searching for nodes in:" << table << std::endl; + std::string table_path = shard_path + "/" + table; + auto format_version = DB::ReplicatedMergeTreeTableMetadata::parse(zk.get(table_path + "/metadata")).data_format_version; + std::string blocks_path = table_path + "/block_numbers"; + auto partitions = zk.getChildren(blocks_path); + if (!partitions.empty()) + { + for (auto partition : partitions) + { + std::string part_path = blocks_path + "/" + partition; + size_t partition_max_block = getMaxBlockSizeForPartition(zk, table_path, partition, format_version); + std::cerr << "\tFound max block number:" << partition_max_block << " for part: " << partition << std::endl; + result.emplace(part_path, partition_max_block); + } + } + } + } + return result; +} + + +void rotateNodes(zkutil::ZooKeeper & zk, const std::string & path, size_t max_block_num) +{ + Coordination::Requests requests; + std::string block_prefix = path + "/block-"; + std::string current = zk.create(block_prefix, "", zkutil::CreateMode::EphemeralSequential); + size_t current_block_num = DB::parse(current.c_str() + block_prefix.size(), current.size() - block_prefix.size()); + if (current_block_num >= max_block_num) + { + std::cerr << "Nothing to rotate, current block num:" << current_block_num << " max_block_num:" << max_block_num << std::endl; + return; + } + + size_t need_to_rotate = max_block_num - current_block_num; + std::cerr << "Will rotate:" << need_to_rotate << " block numbers from " << current_block_num << " to " << max_block_num << std::endl; + + for (size_t i = 0; i < need_to_rotate; ++i) + { + if (requests.size() == 50) + { + std::cerr << "Rotating: " << i << " block numbers" << std::endl; + zk.multi(requests); + requests.clear(); + } + requests.emplace_back(zkutil::makeCreateRequest(path + "/block-", "", zkutil::CreateMode::EphemeralSequential)); + } + if (!requests.empty()) + { + zk.multi(requests); + } +} + +int main(int argc, char ** argv) +try +{ + boost::program_options::options_description desc("Allowed options"); + desc.add_options() + ("help,h", "produce help message") + ("address,a", boost::program_options::value()->required(), "addresses of ZooKeeper instances, comma separated. Example: example01e.yandex.ru:2181") + ("path,p", boost::program_options::value()->required(), "path of replica queue to insert node (without trailing slash)"); + + boost::program_options::variables_map options; + boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options); + + if (options.count("help")) + { + std::cout << "Util for /block_numbers node adjust with max block number in partition" << std::endl; + std::cout << "Usage: " << argv[0] << " [options]" << std::endl; + std::cout << desc << std::endl; + return 1; + } + + std::string global_path = options.at("path").as(); + + zkutil::ZooKeeper zookeeper(options.at("address").as()); + + auto all_path = getAllTablesBlockPaths(zookeeper, global_path); + for (const auto & [path, max_block_num] : all_path) + { + std::cerr << "Rotating on:" << path << std::endl; + rotateNodes(zookeeper, path, max_block_num); + } + return 0; +} +catch (const Poco::Exception & e) +{ + std::cerr << DB::getCurrentExceptionMessage(true) << '\n'; + throw; +} From 43ed0a8e7f3e975383fcd9f933b9cb2244ad342c Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 4 Apr 2019 16:03:20 +0300 Subject: [PATCH 084/102] Style fix --- utils/zookeeper-adjust-block-numbers-to-parts/main.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp b/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp index 6ac06b07337..c91f45671a5 100644 --- a/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp +++ b/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp @@ -20,7 +20,8 @@ size_t getMaxBlockSizeForPartition(zkutil::ZooKeeper & zk, auto parts = zk.getChildren(replicas_path + "/" + replica_host + "/parts"); for (const auto & part : parts) { - try { + try + { auto info = DB::MergeTreePartInfo::fromPartName(part, format_version); if (info.partition_id == partition_name) max_block_num = std::max(info.max_block, max_block_num); From 1291ecbed7bb2a269fe9f6edb534878349e746a2 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 4 Apr 2019 22:44:04 +0300 Subject: [PATCH 085/102] Better logging in adjust block util --- .../zookeeper-adjust-block-numbers-to-parts/main.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp b/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp index c91f45671a5..dda1677f3a4 100644 --- a/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp +++ b/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp @@ -28,7 +28,7 @@ size_t getMaxBlockSizeForPartition(zkutil::ZooKeeper & zk, } catch (const DB::Exception & ex) { - std::cerr << "Ex on:" << ex.displayText() << std::endl; + std::cerr << "Exception on: " << ex.displayText() << " will skip part: " << part << std::endl; } } } @@ -45,7 +45,7 @@ std::unordered_map getAllTablesBlockPaths(zkutil::ZooKeeper auto tables = zk.getChildren(shard_path); for (auto table : tables) { - std::cerr << "Searching for nodes in:" << table << std::endl; + std::cerr << "Searching for nodes in: " << table << std::endl; std::string table_path = shard_path + "/" + table; auto format_version = DB::ReplicatedMergeTreeTableMetadata::parse(zk.get(table_path + "/metadata")).data_format_version; std::string blocks_path = table_path + "/block_numbers"; @@ -56,7 +56,7 @@ std::unordered_map getAllTablesBlockPaths(zkutil::ZooKeeper { std::string part_path = blocks_path + "/" + partition; size_t partition_max_block = getMaxBlockSizeForPartition(zk, table_path, partition, format_version); - std::cerr << "\tFound max block number:" << partition_max_block << " for part: " << partition << std::endl; + std::cerr << "\tFound max block number: " << partition_max_block << " for part: " << partition << std::endl; result.emplace(part_path, partition_max_block); } } @@ -74,12 +74,12 @@ void rotateNodes(zkutil::ZooKeeper & zk, const std::string & path, size_t max_bl size_t current_block_num = DB::parse(current.c_str() + block_prefix.size(), current.size() - block_prefix.size()); if (current_block_num >= max_block_num) { - std::cerr << "Nothing to rotate, current block num:" << current_block_num << " max_block_num:" << max_block_num << std::endl; + std::cerr << "Nothing to rotate, current block num: " << current_block_num << " max_block_num:" << max_block_num << std::endl; return; } size_t need_to_rotate = max_block_num - current_block_num; - std::cerr << "Will rotate:" << need_to_rotate << " block numbers from " << current_block_num << " to " << max_block_num << std::endl; + std::cerr << "Will rotate: " << need_to_rotate << " block numbers from " << current_block_num << " to " << max_block_num << std::endl; for (size_t i = 0; i < need_to_rotate; ++i) { @@ -124,7 +124,7 @@ try auto all_path = getAllTablesBlockPaths(zookeeper, global_path); for (const auto & [path, max_block_num] : all_path) { - std::cerr << "Rotating on:" << path << std::endl; + std::cerr << "Rotating on: " << path << std::endl; rotateNodes(zookeeper, path, max_block_num); } return 0; From f50a0778fb043079d064a2b1eda57a11fe5974d8 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 5 Apr 2019 15:50:14 +0300 Subject: [PATCH 086/102] fix missing column error message --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 58 +++++++++++++------- 1 file changed, 37 insertions(+), 21 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 8e69d5ecfee..bd661f3631c 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -974,19 +974,11 @@ void ExpressionAnalyzer::collectUsedColumns() RequiredSourceColumnsVisitor::Data columns_context; RequiredSourceColumnsVisitor(columns_context).visit(query); - NameSet required = columns_context.requiredColumns(); + NameSet source_column_names; + for (const auto & column : source_columns) + source_column_names.insert(column.name); -#if 0 - std::cerr << "Query: " << query << std::endl; - std::cerr << "CTX: " << columns_context << std::endl; - std::cerr << "source_columns: "; - for (const auto & name : source_columns) - std::cerr << "'" << name.name << "' "; - std::cerr << "required: "; - for (const auto & pr : required) - std::cerr << "'" << pr.first << "' "; - std::cerr << std::endl; -#endif + NameSet required = columns_context.requiredColumns(); if (columns_context.has_table_join) { @@ -1013,10 +1005,10 @@ void ExpressionAnalyzer::collectUsedColumns() } } + NameSet array_join_sources; if (columns_context.has_array_join) { /// Insert the columns required for the ARRAY JOIN calculation into the required columns list. - NameSet array_join_sources; for (const auto & result_source : syntax->array_join_result_to_source) array_join_sources.insert(result_source.second); @@ -1063,15 +1055,39 @@ void ExpressionAnalyzer::collectUsedColumns() if (!unknown_required_source_columns.empty()) { std::stringstream ss; - ss << "query: '" << query << "' "; - ss << columns_context; - ss << "source_columns: "; - for (const auto & name : source_columns) - ss << "'" << name.name << "' "; + ss << "Missing columns:"; + for (const auto & name : unknown_required_source_columns) + ss << " '" << name << "'"; + ss << " while procesing query: '" << query << "'"; - throw Exception("Unknown identifier: " + *unknown_required_source_columns.begin() - + (select_query && !select_query->tables ? ". Note that there are no tables (FROM clause) in your query" : "") - + ", context: " + ss.str(), ErrorCodes::UNKNOWN_IDENTIFIER); + ss << ", required columns:"; + for (const auto & name : columns_context.requiredColumns()) + ss << " '" << name << "'"; + + if (!source_column_names.empty()) + { + ss << ", source columns:"; + for (const auto & name : source_column_names) + ss << " '" << name << "'"; + } + else + ss << ", no source columns"; + + if (columns_context.has_table_join) + { + ss << ", joined columns:"; + for (const auto & column : analyzedJoin().available_joined_columns) + ss << " '" << column.name_and_type.name << "'"; + } + + if (!array_join_sources.empty()) + { + ss << ", arrayJoin columns:"; + for (const auto & name : array_join_sources) + ss << " '" << name << "'"; + } + + throw Exception(ss.str(), ErrorCodes::UNKNOWN_IDENTIFIER); } } From aae1e56214a07ad17858e83055be29318506b289 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 5 Apr 2019 15:57:08 +0300 Subject: [PATCH 087/102] fix typo --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index bd661f3631c..200327f00ca 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -1058,7 +1058,7 @@ void ExpressionAnalyzer::collectUsedColumns() ss << "Missing columns:"; for (const auto & name : unknown_required_source_columns) ss << " '" << name << "'"; - ss << " while procesing query: '" << query << "'"; + ss << " while processing query: '" << query << "'"; ss << ", required columns:"; for (const auto & name : columns_context.requiredColumns()) From bb9958b0d7a827244799f76488087ac4c6efb50b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 5 Apr 2019 16:32:25 +0300 Subject: [PATCH 088/102] Minor fixes for leastSqr. --- .../AggregateFunctionLeastSqr.h | 22 +++++++++---------- .../0_stateless/00917_least_sqr.reference | 1 + .../queries/0_stateless/00917_least_sqr.sql | 2 ++ 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.h b/dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.h index e1a57961af0..9bab0671987 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -74,6 +75,8 @@ struct AggregateFunctionLeastSqrData final } }; +/// Calculates simple linear regression parameters. +/// Result is a tuple (k, b) for y = k * x + b equation, solved by least squares approximation. template class AggregateFunctionLeastSqr final : public IAggregateFunctionDataHelper< AggregateFunctionLeastSqrData, @@ -150,12 +153,8 @@ public: DataTypePtr getReturnType() const override { DataTypes types { - std::make_shared( - std::make_shared() - ), - std::make_shared( - std::make_shared() - ), + std::make_shared>(), + std::make_shared>(), }; Strings names { @@ -177,13 +176,12 @@ public: Ret k = this->data(place).getK(); Ret b = this->data(place).getB(k); - Tuple result; - result.toUnderType().reserve(2); + auto & col_tuple = static_cast(to); + auto & col_k = static_cast &>(col_tuple.getColumn(0)); + auto & col_b = static_cast &>(col_tuple.getColumn(1)); - result.toUnderType().emplace_back(k); - result.toUnderType().emplace_back(b); - - to.insert(std::move(result)); + col_k.getData().push_back(k); + col_b.getData().push_back(b); } }; diff --git a/dbms/tests/queries/0_stateless/00917_least_sqr.reference b/dbms/tests/queries/0_stateless/00917_least_sqr.reference index 89d168b03bb..8abd62892db 100644 --- a/dbms/tests/queries/0_stateless/00917_least_sqr.reference +++ b/dbms/tests/queries/0_stateless/00917_least_sqr.reference @@ -5,3 +5,4 @@ (nan,nan) (0,3) (nan,nan) +(nan,nan) diff --git a/dbms/tests/queries/0_stateless/00917_least_sqr.sql b/dbms/tests/queries/0_stateless/00917_least_sqr.sql index 80f28a6abd9..729d140ca30 100644 --- a/dbms/tests/queries/0_stateless/00917_least_sqr.sql +++ b/dbms/tests/queries/0_stateless/00917_least_sqr.sql @@ -5,3 +5,5 @@ select arrayReduce('leastSqr', [5, 5.1], [6, 6.1]); select arrayReduce('leastSqr', [0], [0]); select arrayReduce('leastSqr', [3, 4], [3, 3]); select arrayReduce('leastSqr', [3, 3], [3, 4]); +select arrayReduce('leastSqr', emptyArrayUInt8(), emptyArrayUInt8()); + From c35f97d8165f1062c1918321a6e0b99b1826fa95 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 5 Apr 2019 17:29:15 +0300 Subject: [PATCH 089/102] test improvement --- .../0_stateless/00818_alias_bug_4110.reference | 6 ++++++ .../queries/0_stateless/00818_alias_bug_4110.sql | 11 +++++++++++ 2 files changed, 17 insertions(+) diff --git a/dbms/tests/queries/0_stateless/00818_alias_bug_4110.reference b/dbms/tests/queries/0_stateless/00818_alias_bug_4110.reference index 5186cb8eeff..e6013d269c2 100644 --- a/dbms/tests/queries/0_stateless/00818_alias_bug_4110.reference +++ b/dbms/tests/queries/0_stateless/00818_alias_bug_4110.reference @@ -3,6 +3,12 @@ 11 11 11 12 12 11 +10 10 +10 11 11 +12 11 +10 12 +11 12 +11 12 0 1 123 456 diff --git a/dbms/tests/queries/0_stateless/00818_alias_bug_4110.sql b/dbms/tests/queries/0_stateless/00818_alias_bug_4110.sql index 7480f137a65..6cab0f1995c 100644 --- a/dbms/tests/queries/0_stateless/00818_alias_bug_4110.sql +++ b/dbms/tests/queries/0_stateless/00818_alias_bug_4110.sql @@ -4,6 +4,17 @@ select s.a + 1 as a, s.a + 1 as b from (select 10 as a) s; select s.a + 1 as b, s.a + 2 as a from (select 10 as a) s; select s.a + 2 as b, s.a + 1 as a from (select 10 as a) s; +select a, a as a from (select 10 as a); +select s.a, a, a + 1 as a from (select 10 as a) as s; +select s.a + 2 as b, b - 1 as a from (select 10 as a) s; +select s.a as a, s.a + 2 as b from (select 10 as a) s; +select s.a + 1 as a, s.a + 2 as b from (select 10 as a) s; +select a + 1 as a, a + 1 as b from (select 10 as a); +select a + 1 as b, b + 1 as a from (select 10 as a); -- { serverError 174 } +select 10 as a, a + 1 as a; -- { serverError 179 } +with 10 as a select a as a; -- { serverError 179 } +with 10 as a select a + 1 as a; -- { serverError 179 } + SELECT 0 as t FROM (SELECT 1 as t) as inn WHERE inn.t = 1; SELECT sum(value) as value FROM (SELECT 1 as value) as data WHERE data.value > 0; From 085c758dbd72b5fc7a3c4e380e7995581bcdce14 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 5 Apr 2019 17:52:27 +0300 Subject: [PATCH 090/102] Avoid divizion by zero in leastSqr function. --- .../AggregateFunctions/AggregateFunctionLeastSqr.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.h b/dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.h index 9bab0671987..fd0b65c051f 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.h @@ -8,7 +8,7 @@ #include #include #include - +#include namespace DB { @@ -65,12 +65,19 @@ struct AggregateFunctionLeastSqrData final Ret getK() const { - return (sum_xy * count - sum_x * sum_y) - / (sum_xx * count - sum_x * sum_x); + Ret divisor = sum_xx * count - sum_x * sum_x; + + if (divisor == 0) + return std::numeric_limits::quiet_NaN(); + + return (sum_xy * count - sum_x * sum_y) / divisor; } Ret getB(Ret k) const { + if (count == 0) + return std::numeric_limits::quiet_NaN(); + return (sum_y - k * sum_x) / count; } }; From eaffca28ddf070cc9f886acdd70d506ea7d7c21b Mon Sep 17 00:00:00 2001 From: proller Date: Fri, 5 Apr 2019 19:10:52 +0300 Subject: [PATCH 091/102] Copier: auto upload task configuration from --task-file option (#4876) * Copier: auto upload task configuration from --task-file option * task-upload-force * doc * Update ClusterCopier.cpp * Requested changes --- dbms/programs/copier/ClusterCopier.cpp | 36 ++++++++++++++++--- docs/en/operations/utils/clickhouse-copier.md | 2 ++ docs/ru/operations/utils/clickhouse-copier.md | 2 ++ 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/dbms/programs/copier/ClusterCopier.cpp b/dbms/programs/copier/ClusterCopier.cpp index e478c591d16..67351d9696d 100644 --- a/dbms/programs/copier/ClusterCopier.cpp +++ b/dbms/programs/copier/ClusterCopier.cpp @@ -1,7 +1,6 @@ #include "ClusterCopier.h" #include - #include #include #include @@ -13,14 +12,11 @@ #include #include #include - #include #include - #include #include #include - #include #include #include @@ -61,6 +57,7 @@ #include #include #include +#include #include #include #include @@ -897,6 +894,28 @@ public: } } + void uploadTaskDescription(const std::string & task_path, const std::string & task_file, const bool force) + { + auto local_task_description_path = task_path + "/description"; + + String task_config_str; + { + ReadBufferFromFile in(task_file); + readStringUntilEOF(task_config_str, in); + } + if (task_config_str.empty()) + return; + + auto zookeeper = context.getZooKeeper(); + + zookeeper->createAncestors(local_task_description_path); + auto code = zookeeper->tryCreate(local_task_description_path, task_config_str, zkutil::CreateMode::Persistent); + if (code && force) + zookeeper->createOrUpdate(local_task_description_path, task_config_str, zkutil::CreateMode::Persistent); + + LOG_DEBUG(log, "Task description " << ((code && !force) ? "not " : "") << "uploaded to " << local_task_description_path << " with result " << code << " ("<< zookeeper->error2string(code) << ")"); + } + void reloadTaskDescription() { auto zookeeper = context.getZooKeeper(); @@ -2107,6 +2126,10 @@ void ClusterCopierApp::defineOptions(Poco::Util::OptionSet & options) options.addOption(Poco::Util::Option("task-path", "", "path to task in ZooKeeper") .argument("task-path").binding("task-path")); + options.addOption(Poco::Util::Option("task-file", "", "path to task file for uploading in ZooKeeper to task-path") + .argument("task-file").binding("task-file")); + options.addOption(Poco::Util::Option("task-upload-force", "", "Force upload task-file even node already exists") + .argument("task-upload-force").binding("task-upload-force")); options.addOption(Poco::Util::Option("safe-mode", "", "disables ALTER DROP PARTITION in case of errors") .binding("safe-mode")); options.addOption(Poco::Util::Option("copy-fault-probability", "", "the copying fails with specified probability (used to test partition state recovering)") @@ -2157,6 +2180,11 @@ void ClusterCopierApp::mainImpl() auto copier = std::make_unique(task_path, host_id, default_database, *context); copier->setSafeMode(is_safe_mode); copier->setCopyFaultProbability(copy_fault_probability); + + auto task_file = config().getString("task-file", ""); + if (!task_file.empty()) + copier->uploadTaskDescription(task_path, task_file, config().getBool("task-upload-force", false)); + copier->init(); copier->process(); } diff --git a/docs/en/operations/utils/clickhouse-copier.md b/docs/en/operations/utils/clickhouse-copier.md index fac374b4790..57358d49f90 100644 --- a/docs/en/operations/utils/clickhouse-copier.md +++ b/docs/en/operations/utils/clickhouse-copier.md @@ -32,6 +32,8 @@ Parameters: - `daemon` — Starts `clickhouse-copier` in daemon mode. - `config` — The path to the `zookeeper.xml` file with the parameters for the connection to ZooKeeper. - `task-path` — The path to the ZooKeeper node. This node is used for syncing `clickhouse-copier` processes and storing tasks. Tasks are stored in `$task-path/description`. +- `task-file` — Optional path to file with task configuration for initial upload to ZooKeeper. +- `task-upload-force` — Force upload `task-file` even if node already exists. - `base-dir` — The path to logs and auxiliary files. When it starts, `clickhouse-copier` creates `clickhouse-copier_YYYYMMHHSS_` subdirectories in `$base-dir`. If this parameter is omitted, the directories are created in the directory where `clickhouse-copier` was launched. ## Format of zookeeper.xml diff --git a/docs/ru/operations/utils/clickhouse-copier.md b/docs/ru/operations/utils/clickhouse-copier.md index 0c852450457..b38e25f6c16 100644 --- a/docs/ru/operations/utils/clickhouse-copier.md +++ b/docs/ru/operations/utils/clickhouse-copier.md @@ -31,6 +31,8 @@ clickhouse-copier copier --daemon --config zookeeper.xml --task-path /task/path - `daemon` - запускает `clickhouse-copier` в режиме демона. - `config` - путь к файлу `zookeeper.xml` с параметрами соединения с ZooKeeper. - `task-path` - путь к ноде ZooKeeper. Нода используется для синхронизации между процессами `clickhouse-copier` и для хранения заданий. Задания хранятся в `$task-path/description`. +- `task-file` - необязательный путь к файлу с описанием конфигурация заданий для загрузки в ZooKeeper. +- `task-upload-force` - Загрузить `task-file` в ZooKeeper даже если уже было загружено. - `base-dir` - путь к логам и вспомогательным файлам. При запуске `clickhouse-copier` создает в `$base-dir` подкаталоги `clickhouse-copier_YYYYMMHHSS_`. Если параметр не указан, то каталоги будут создаваться в каталоге, где `clickhouse-copier` был запущен. ## Формат zookeeper.xml From 34cf495db8888bb31defec46caeb5af27cd7470f Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Fri, 5 Apr 2019 19:21:46 +0300 Subject: [PATCH 092/102] Fix BlockIO::operator= --- dbms/src/DataStreams/BlockIO.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dbms/src/DataStreams/BlockIO.h b/dbms/src/DataStreams/BlockIO.h index 5ebfa45d179..4618b183d48 100644 --- a/dbms/src/DataStreams/BlockIO.h +++ b/dbms/src/DataStreams/BlockIO.h @@ -43,6 +43,9 @@ struct BlockIO BlockIO & operator= (const BlockIO & rhs) { + if (this == &rhs) + return *this; + out.reset(); in.reset(); process_list_entry.reset(); From eff767b6aa106983ae3d8063739703f2ffada255 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 5 Apr 2019 19:31:53 +0300 Subject: [PATCH 093/102] Fix PVS studio warning. --- dbms/src/DataTypes/DataTypeLowCardinality.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dbms/src/DataTypes/DataTypeLowCardinality.cpp b/dbms/src/DataTypes/DataTypeLowCardinality.cpp index 98b662d8fe8..504a451741d 100644 --- a/dbms/src/DataTypes/DataTypeLowCardinality.cpp +++ b/dbms/src/DataTypes/DataTypeLowCardinality.cpp @@ -690,10 +690,9 @@ void DataTypeLowCardinality::deserializeBinaryBulkWithMultipleStreams( }; if (!settings.continuous_reading) + { low_cardinality_state->num_pending_rows = 0; - if (!settings.continuous_reading) - { /// Remember in state that some granules were skipped and we need to update dictionary. low_cardinality_state->need_update_dictionary = true; } From 523c8e5ec1274f17432b393f30b9699d71875d94 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 5 Apr 2019 19:45:41 +0300 Subject: [PATCH 094/102] Remove LowCardinality from aggregate function argument types. #4919 --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 200327f00ca..caf53100179 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -406,7 +407,7 @@ void ExpressionAnalyzer::getAggregates(const ASTPtr & ast, ExpressionActionsPtr getRootActions(arguments[i], true, actions); const std::string & name = arguments[i]->getColumnName(); - types[i] = actions->getSampleBlock().getByName(name).type; + types[i] = recursiveRemoveLowCardinality(actions->getSampleBlock().getByName(name).type); aggregate.argument_names[i] = name; } From 50510ab7efc9ddbb3a38663a1486e2c8b060273c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 5 Apr 2019 19:51:27 +0300 Subject: [PATCH 095/102] Added test. #4919 --- ...cardinality_nullable_aggregate_function_type.reference | 1 + ...1_low_cardinality_nullable_aggregate_function_type.sql | 8 ++++++++ 2 files changed, 9 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00931_low_cardinality_nullable_aggregate_function_type.reference create mode 100644 dbms/tests/queries/0_stateless/00931_low_cardinality_nullable_aggregate_function_type.sql diff --git a/dbms/tests/queries/0_stateless/00931_low_cardinality_nullable_aggregate_function_type.reference b/dbms/tests/queries/0_stateless/00931_low_cardinality_nullable_aggregate_function_type.reference new file mode 100644 index 00000000000..866b45bd9ea --- /dev/null +++ b/dbms/tests/queries/0_stateless/00931_low_cardinality_nullable_aggregate_function_type.reference @@ -0,0 +1 @@ +2019-01-01 \N diff --git a/dbms/tests/queries/0_stateless/00931_low_cardinality_nullable_aggregate_function_type.sql b/dbms/tests/queries/0_stateless/00931_low_cardinality_nullable_aggregate_function_type.sql new file mode 100644 index 00000000000..04089aef377 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00931_low_cardinality_nullable_aggregate_function_type.sql @@ -0,0 +1,8 @@ +drop table if exists test.lc; + +CREATE TABLE test.lc (`date` Date, `name` LowCardinality(Nullable(String)), `clicks` Nullable(Int32)) ENGINE = MergeTree() ORDER BY date SETTINGS index_granularity = 8192; +INSERT INTO test.lc SELECT '2019-01-01', null, 0 FROM numbers(1000000); +SELECT date, argMax(name, clicks) FROM test.lc GROUP BY date; + +drop table if exists test.lc; + From f48927a1b9c6a898a81c55d6c11103e79916aca5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 5 Apr 2019 20:07:58 +0300 Subject: [PATCH 096/102] Added a test from aadant #4921 --- .../queries/0_stateless/00933_reserved_word.reference | 0 dbms/tests/queries/0_stateless/00933_reserved_word.sql | 7 +++++++ 2 files changed, 7 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00933_reserved_word.reference create mode 100644 dbms/tests/queries/0_stateless/00933_reserved_word.sql diff --git a/dbms/tests/queries/0_stateless/00933_reserved_word.reference b/dbms/tests/queries/0_stateless/00933_reserved_word.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/00933_reserved_word.sql b/dbms/tests/queries/0_stateless/00933_reserved_word.sql new file mode 100644 index 00000000000..8e463fbffb9 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00933_reserved_word.sql @@ -0,0 +1,7 @@ +DROP TABLE IF EXISTS test.reserved_word_table; +CREATE TABLE test.reserved_word_table (`index` UInt8) ENGINE = MergeTree ORDER BY `index`; + +DETACH TABLE test.reserved_word_table; +ATTACH TABLE test.reserved_word_table; + +DROP TABLE test.reserved_word_table; From 3bf422b1b0c31f2fd8e6be6b0a0f2c382e1b2892 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 5 Apr 2019 21:27:20 +0300 Subject: [PATCH 097/102] Update trim_whitespace.xml --- dbms/tests/performance/trim/trim_whitespace.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/performance/trim/trim_whitespace.xml b/dbms/tests/performance/trim/trim_whitespace.xml index 9ef5cf92611..cf4a5dac896 100644 --- a/dbms/tests/performance/trim/trim_whitespace.xml +++ b/dbms/tests/performance/trim/trim_whitespace.xml @@ -3,7 +3,7 @@ loop CREATE TABLE IF NOT EXISTS whitespaces(value String) ENGINE = MergeTree() PARTITION BY tuple() ORDER BY tuple() - INSERT INTO whitespaces SELECT value FROM (SELECT arrayStringConcat(groupArray(' ')) AS spaces, concat(spaces, toString(any(number)), spaces) AS value FROM numbers(100000000) GROUP BY pow(number, intHash32(number) % 4) % 12345678) + INSERT INTO whitespaces SELECT value FROM (SELECT arrayStringConcat(groupArray(' ')) AS spaces, concat(spaces, toString(any(number)), spaces) AS value FROM numbers(100000000) GROUP BY pow(number, intHash32(number) % 4) % 12345678) From 38f9c489ff708b5b62eee5bd019826ec6a1d8ef4 Mon Sep 17 00:00:00 2001 From: ogorbacheva Date: Fri, 5 Apr 2019 21:46:51 +0300 Subject: [PATCH 098/102] Doc fix: Edit info about SAMPLE (en, ru) (#4907) --- docs/en/query_language/select.md | 52 ++++++++++----- docs/ru/query_language/select.md | 109 ++++++++++++++++++++++++++----- 2 files changed, 129 insertions(+), 32 deletions(-) diff --git a/docs/en/query_language/select.md b/docs/en/query_language/select.md index 7c4aa82e379..458fa732f81 100644 --- a/docs/en/query_language/select.md +++ b/docs/en/query_language/select.md @@ -46,22 +46,38 @@ The FINAL modifier can be used only for a SELECT from a CollapsingMergeTree tabl ### SAMPLE Clause {#select-sample-clause} -The `SAMPLE` clause allows for approximated query processing. Approximated query processing is only supported by the tables in the `MergeTree` family, and only if the sampling expression was specified during table creation (see [MergeTree engine](../operations/table_engines/mergetree.md)). +The `SAMPLE` clause allows for approximated query processing. + +When data sampling is enabled, the query is not performed on all the data, but only on a certain fraction of data (sample). For example, if you need to calculate statistics for all the visits, it is enough to execute the query on the 1/10 fraction of all the visits and then multiply the result by 10. + +Approximated query processing can be useful in the following cases: + +- When you have strict timing requirements (like <100ms) but you can't justify the cost of additional hardware resources to meet them. +- When your raw data is not accurate, so approximation doesn't noticeably degrade the quality. +- Business requirements target approximate results (for cost-effectiveness, or in order to market exact results to premium users). + +!!! note + You can only use sampling with the tables in the [MergeTree](../operations/table_engines/mergetree.md) family, and only if the sampling expression was specified during table creation (see [MergeTree engine](../operations/table_engines/mergetree.md#table_engine-mergetree-creating-a-table)). The features of data sampling are listed below: - Data sampling is a deterministic mechanism. The result of the same `SELECT .. SAMPLE` query is always the same. -- Sampling works consistently for different tables. For tables with a single sampling key, a sample with the same coefficient always selects the same subset of possible data. For example, a sample of user IDs takes rows with the same subset of all the possible user IDs from different tables. This means that you can use the sample in subqueries in the `IN` clause, as well as manually correlate results of different queries with samples. +- Sampling works consistently for different tables. For tables with a single sampling key, a sample with the same coefficient always selects the same subset of possible data. For example, a sample of user IDs takes rows with the same subset of all the possible user IDs from different tables. This means that you can use the sample in subqueries in the [IN](#select-in-operators) clause. Also, you can join samples using the [JOIN](#select-join) clause. - Sampling allows reading less data from a disk. Note that you must specify the sampling key correctly. For more information, see [Creating a MergeTree Table](../operations/table_engines/mergetree.md#table_engine-mergetree-creating-a-table). For the `SAMPLE` clause the following syntax is supported: -- `SAMPLE k`, where `k` is a decimal number from 0 to 1. The query is executed on `k` fraction of data. For example, `SAMPLE 0.1` runs the query on 10% of data. [Read more](#select-sample-k) -- `SAMPLE n`, where `n` is a sufficiently large integer. The query is executed on a sample of at least `n` rows (but not significantly more than this). For example, `SAMPLE 10000000` runs the query on a minimum of 10,000,000 rows. [Read more](#select-sample-n) -- `SAMPLE k OFFSET m` where `k` and `m` are numbers from 0 to 1. The query is executed on a sample of `k` percent of the data. The data used for the sample is offset by `m` percent. [Read more](#select-sample-offset) +| SAMPLE Clause Syntax | Description | +| ---------------- | --------- | +| `SAMPLE k` | Here `k` is the number from 0 to 1.
The query is executed on `k` fraction of data. For example, `SAMPLE 0.1` runs the query on 10% of data. [Read more](#select-sample-k)| +| `SAMPLE n` | Here `n` is a sufficiently large integer.
The query is executed on a sample of at least `n` rows (but not significantly more than this). For example, `SAMPLE 10000000` runs the query on a minimum of 10,000,000 rows. [Read more](#select-sample-n) | +| `SAMPLE k OFFSET m` | Here `k` and `m` are the numbers from 0 to 1.
The query is executed on a sample of `k` fraction of the data. The data used for the sample is offset by `m` fraction. [Read more](#select-sample-offset) | + #### SAMPLE k {#select-sample-k} +Here `k` is the number from 0 to 1 (both fractional and decimal notations are supported). For example, `SAMPLE 1/2` or `SAMPLE 0.5`. + In a `SAMPLE k` clause, the sample is taken from the `k` fraction of data. The example is shown below: ``` sql @@ -76,25 +92,29 @@ GROUP BY Title ORDER BY PageViews DESC LIMIT 1000 ``` -In this example, the query is executed on a sample from 0.1 (10%) of data. Values of aggregate functions are not corrected automatically, so to get an approximate result, the value 'count()' is manually multiplied by 10. +In this example, the query is executed on a sample from 0.1 (10%) of data. Values of aggregate functions are not corrected automatically, so to get an approximate result, the value `count()` is manually multiplied by 10. #### SAMPLE n {#select-sample-n} -In this case, the query is executed on a sample of at least `n` rows, where `n` is a sufficiently large integer. For example, `SAMPLE 10000000`. +Here `n` is a sufficiently large integer. For example, `SAMPLE 10000000`. + +In this case, the query is executed on a sample of at least `n` rows (but not significantly more than this). For example, `SAMPLE 10000000` runs the query on a minimum of 10,000,000 rows. Since the minimum unit for data reading is one granule (its size is set by the `index_granularity` setting), it makes sense to set a sample that is much larger than the size of the granule. -When using the `SAMPLE n` clause, the relative coefficient is calculated dynamically. Since you do not know which relative percent of data was processed, you do not know the coefficient the aggregate functions should be multiplied by (for example, you do not know if `SAMPLE 1000000` was taken from a set of 10,000,000 rows or from a set of 1,000,000,000 rows). In this case, use the `_sample_factor` virtual column to get the approximate result. +When using the `SAMPLE n` clause, you don't know which relative percent of data was processed. So you don't know the coefficient the aggregate functions should be multiplied by. Use the `_sample_factor` virtual column to get the approximate result. -The `_sample_factor` column is where ClickHouse stores relative coefficients. This column is created automatically when you create a table with the specified sampling key. The usage example is shown below: +The `_sample_factor` column contains relative coefficients that are calculated dynamically. This column is created automatically when you [create](../operations/table_engines/mergetree.md#table_engine-mergetree-creating-a-table) a table with the specified sampling key. The usage examples of the `_sample_factor` column are shown below. + +Let's consider the table `visits`, which contains the statistics about site visits. The first example shows how to calculate the number of page views: ``` sql -SELECT sum(Duration * _sample_factor) +SELECT sum(PageViews * _sample_factor) FROM visits SAMPLE 10000000 ``` -If you need to get the approximate count of rows in a `SELECT .. SAMPLE n` query, get the sum() of the `_sample_factor` column instead of counting the `count(*) * _sample_factor` value. For example: +The next example shows how to calculate the total number of visits: ``` sql SELECT sum(_sample_factor) @@ -102,7 +122,7 @@ FROM visits SAMPLE 10000000 ``` -Note that to calculate the average in a `SELECT .. SAMPLE n` query, you do not need to use the `_sample_factor` column: +The example below shows how to calculate the average session duration. Note that you don't need to use the relative coefficient to calculate the average values. ``` sql SELECT avg(Duration) @@ -112,9 +132,9 @@ SAMPLE 10000000 #### SAMPLE k OFFSET m {#select-sample-offset} -You can specify the `SAMPLE k OFFSET m` clause, where `k` and `m` are numbers from 0 to 1. Examples are shown below. +Here `k` and `m` are numbers from 0 to 1. Examples are shown below. -Example 1. +**Example 1** ``` sql SAMPLE 1/10 @@ -124,13 +144,13 @@ In this example, the sample is 1/10th of all data: `[++------------------]` -Example 2. +**Example 2** ``` sql SAMPLE 1/10 OFFSET 1/2 ``` -Here, a sample of 10% is taken from the second half of data. +Here, a sample of 10% is taken from the second half of the data. `[----------++--------]` diff --git a/docs/ru/query_language/select.md b/docs/ru/query_language/select.md index 153e20bd8df..427b9d18286 100644 --- a/docs/ru/query_language/select.md +++ b/docs/ru/query_language/select.md @@ -45,19 +45,42 @@ SELECT [DISTINCT] expr_list Модификатор FINAL может быть использован только при SELECT-е из таблицы типа CollapsingMergeTree. При указании FINAL, данные будут выбираться полностью "сколлапсированными". Стоит учитывать, что использование FINAL приводит к выбору кроме указанных в SELECT-е столбцов также столбцов, относящихся к первичному ключу. Также, запрос будет выполняться в один поток, и при выполнении запроса будет выполняться слияние данных. Это приводит к тому, что при использовании FINAL, запрос выполняется медленнее. В большинстве случаев, следует избегать использования FINAL. Подробнее смотрите раздел "Движок CollapsingMergeTree". +### Секция SAMPLE {#select-sample-clause} -### Секция SAMPLE +Секция `SAMPLE` позволяет выполнять запросы приближённо. Например, чтобы посчитать статистику по всем визитам, можно обработать 1/10 всех визитов и результат домножить на 10. -Секция SAMPLE позволяет выполнить запрос приближённо. Приближённое выполнение запроса поддерживается только таблицами типа MergeTree\* и только если при создании таблицы было указано выражение, по которому производится выборка (смотрите раздел "Движок MergeTree"). +Сэмплирование имеет смысл, когда: -`SAMPLE` имеет вид `SAMPLE k`, где `k` - дробное число в интервале от 0 до 1, или `SAMPLE n`, где n - достаточно большое целое число. +1. Точность результата не важна, например, для оценочных расчетов. +2. Возможности аппаратной части не позволяют соответствовать строгим критериям. Например, время ответа должно быть <100 мс. При этом точность расчета имеет более низкий приоритет. +3. Точность результата участвует в бизнес-модели сервиса. Например, пользователи с бесплатной подпиской на сервис могут получать отчеты с меньшей точностью, чем пользователи с премиум подпиской. -В первом случае, запрос будет выполнен по k-доле данных. Например, если указано `SAMPLE 0.1`, то запрос будет выполнен по 10% данных. -Во втором случае, запрос будет выполнен по выборке из не более n строк. Например, если указано `SAMPLE 10000000`, то запрос будет выполнен по не более чем 10 000 000 строкам. +!!! note "Внимание" + Не стоит использовать сэмплирование в тех задачах, где важна точность расчетов. Например, при работе с финансовыми отчетами. -Пример: +Свойства сэмплирования: -``` sql +- Сэмплирование работает детерминированно. При многократном выполнении одного и того же запроса `SELECT .. SAMPLE`, результат всегда будет одинаковым. +- Сэмплирование поддерживает консистентность для разных таблиц. Имеется в виду, что для таблиц с одним и тем же ключом сэмплирования, подмножество данных в выборках будет одинаковым (выборки при этом должны быть сформированы для одинаковой доли данных). Например, выборка по идентификаторам посетителей выберет из разных таблиц строки с одинаковым подмножеством всех возможных идентификаторов. Это свойство позволяет использовать выборки в подзапросах в секции [IN](#select-in-operators), а также объединять выборки с помощью [JOIN](#select-join). +- Сэмплирование позволяет читать меньше данных с диска. Обратите внимание, для этого необходимо корректно указать ключ сэмплирования. Подробнее см. в разделе [Создание таблицы MergeTree](../operations/table_engines/mergetree.md#table_engine-mergetree-creating-a-table). + +Сэмплирование поддерживается только таблицами семейства [MergeTree](../operations/table_engines/mergetree.md) и только в том случае, если для таблиц был указан ключ сэмплирования (выражение, на основе которого должна производиться выборка). Подробнее см. в разделе [Создание таблиц MergeTree](../operations/table_engines/mergetree.md#table_engine-mergetree-creating-a-table). + +Выражение `SAMPLE` в запросе можно задать следующими способами: + +| Способ задания SAMPLE| Описание | +| ---------------- | --------- | +| `SAMPLE k` | Здесь `k` – это дробное число в интервале от 0 до 1.
Запрос будет выполнен по `k` доле данных. Например, если указано `SAMPLE 1/10`, то запрос будет выполнен для выборки из 1/10 данных. [Подробнее](#select-sample-k)| +| `SAMPLE n` | Здесь `n` – это достаточно большое целое число.
Запрос будет выполнен для выборки, состоящей из не менее чем `n` строк. Например, если указано `SAMPLE 10000000`, то запрос будет выполнен для не менее чем 10,000,000 строк. [Подробнее](#select-sample-n) | +| `SAMPLE k OFFSET m` | Здесь `k` и `m` – числа от 0 до 1.
Запрос будет выполнен по `k` доле данных. При этом выборка будет сформирована со смещением на `m` долю. [Подробнее](#select-sample-offset) | + +#### SAMPLE k {#select-sample-k} + +Здесь `k` – число в интервале от 0 до 1. Поддерживается как дробная, так и десятичная форма записи. Например, `SAMPLE 1/2` или `SAMPLE 0.5`. + +Если задано выражение `SAMPLE k`, запрос будет выполнен для `k` доли данных. Рассмотрим пример: + +```sql SELECT Title, count() * 10 AS PageViews @@ -65,22 +88,76 @@ FROM hits_distributed SAMPLE 0.1 WHERE CounterID = 34 - AND toDate(EventDate) >= toDate('2013-01-29') - AND toDate(EventDate) <= toDate('2013-02-04') - AND NOT DontCountHits - AND NOT Refresh - AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 1000 ``` -В этом примере, запрос выполняется по выборке из 0.1 (10%) данных. Значения агрегатных функций не корректируются автоматически, поэтому для получения приближённого результата, значение count() вручную домножается на 10. +В этом примере запрос выполняется по выборке из 0.1 (10%) данных. Значения агрегатных функций не корректируются автоматически, поэтому чтобы получить приближённый результат, значение `count()` нужно вручную умножить на 10. -При использовании варианта вида `SAMPLE 10000000`, нет информации, какая относительная доля данных была обработана, и на что следует домножить агрегатные функции, поэтому такой способ записи подходит не для всех случаев. +Выборка с указанием относительного коэффициента является "согласованной": для таблиц с одним и тем же ключом сэмплирования, выборка с одинаковой относительной долей всегда будет составлять одно и то же подмножество данных. То есть выборка из разных таблиц, на разных серверах, в разное время, формируется одинаковым образом. -Выборка с указанием относительного коэффициента является "согласованной": если рассмотреть все возможные данные, которые могли бы быть в таблице, то выборка (при использовании одного выражения сэмплирования, указанного при создании таблицы), с одинаковым коэффициентом, выбирает всегда одно и то же подмножество этих всевозможных данных. То есть, выборка из разных таблиц, на разных серверах, в разное время, делается одинаковым образом. +#### SAMPLE n {#select-sample-n} -Например, выборка по идентификаторам посетителей, выберет из разных таблиц строки с одинаковым подмножеством всех возможных идентификаторов посетителей. Это позволяет использовать выборку в подзапросах в секции IN, а также при ручном сопоставлении результатов разных запросов с выборками. +Здесь `n` – это достаточно большое целое число. Например, `SAMPLE 10000000`. + +Если задано выражение `SAMPLE n`, запрос будет выполнен для выборки из не менее `n` строк (но не значительно больше этого значения). Например, если задать `SAMPLE 10000000`, в выборку попадут не менее 10,000,000 строк. + +!!! note "Примечание" + Следует иметь в виду, что `n` должно быть достаточно большим числом. Так как минимальной единицей данных для чтения является одна гранула (её размер задаётся настройкой `index_granularity` для таблицы), имеет смысл создавать выборки, размер которых существенно превосходит размер гранулы. + +При выполнении `SAMPLE n` коэффициент сэмплирования заранее неизвестен (то есть нет информации о том, относительно какого количества данных будет сформирована выборка). Чтобы узнать коэффициент сэмплирования, используйте столбец `_sample_factor`. + +Виртуальный столбец `_sample_factor` автоматически создается в тех таблицах, для которых задано выражение `SAMPLE BY` (подробнее см. в разделе [Создание таблицы MergeTree](../operations/table_engines/mergetree.md#table_engine-mergetree-creating-a-table)). В столбце содержится коэффициент сэмплирования для таблицы – он рассчитывается динамически по мере добавления данных в таблицу. Ниже приведены примеры использования столбца `_sample_factor`. + +Предположим, у нас есть таблица, в которой ведется статистика посещений сайта. Пример ниже показывает, как рассчитать суммарное число просмотров: + +```sql +SELECT sum(PageViews * _sample_factor) +FROM visits +SAMPLE 10000000 +``` + +Следующий пример показывает, как посчитать общее число визитов: + +```sql +SELECT sum(_sample_factor) +FROM visits +SAMPLE 10000000 +``` + +В примере ниже рассчитывается среднее время на сайте. Обратите внимание, при расчете средних значений, умножать результат на коэффициент сэмплирования не нужно. + +```sql +SELECT avg(Duration) +FROM visits +SAMPLE 10000000 +``` + +#### SAMPLE k OFFSET m {#select-sample-offset} + +Здесь `k` и `m` – числа в интервале от 0 до 1. Например, `SAMPLE 0.1 OFFSET 0.5`. Поддерживается как дробная, так и десятичная форма записи. + +При задании `SAMPLE k OFFSET m`, выборка будет сформирована из `k` доли данных со смещением на долю `m`. Примеры приведены ниже. + +**Пример 1** + +```sql +SAMPLE 1/10 +``` + +В этом примере выборка будет сформирована по 1/10 доле всех данных: + +`[++------------------]` + +**Пример 2** + +```sql +SAMPLE 1/10 OFFSET 1/2 +``` + +Здесь выборка, которая состоит из 1/10 доли данных, взята из второй половины данных. + +`[----------++--------]` ### Секция ARRAY JOIN {#select-array-join-clause} From 571bc7065a2dc13f15b75a8001c04db3a595f53e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 6 Apr 2019 01:35:56 +0300 Subject: [PATCH 099/102] Removed unused code related to MemoryTracker --- dbms/src/AggregateFunctions/AggregateFunctionUniq.h | 1 - dbms/src/Common/AlignedBuffer.h | 2 +- dbms/src/DataStreams/AsynchronousBlockInputStream.h | 2 -- .../MergingAggregatedMemoryEfficientBlockInputStream.cpp | 1 - .../MergingAggregatedMemoryEfficientBlockInputStream.h | 2 -- dbms/src/DataStreams/ParallelInputsProcessor.h | 1 - dbms/src/Interpreters/ExpressionJIT.cpp | 1 - dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp | 1 - dbms/src/Storages/MergeTree/MergeTreeReader.cpp | 1 - dbms/src/Storages/MergeTree/MergeTreeReaderStream.cpp | 1 - 10 files changed, 1 insertion(+), 12 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniq.h b/dbms/src/AggregateFunctions/AggregateFunctionUniq.h index 62eb1db8115..bb292091788 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniq.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniq.h @@ -16,7 +16,6 @@ #include #include #include -#include #include #include diff --git a/dbms/src/Common/AlignedBuffer.h b/dbms/src/Common/AlignedBuffer.h index 6534d7dc0ef..0d9ecb61f2b 100644 --- a/dbms/src/Common/AlignedBuffer.h +++ b/dbms/src/Common/AlignedBuffer.h @@ -10,7 +10,7 @@ namespace DB /** Aligned piece of memory. * It can only be allocated and destroyed. - * MemoryTracker is not used. It is intended for small pieces of memory. + * MemoryTracker is not used. AlignedBuffer is intended for small pieces of memory. */ class AlignedBuffer : private boost::noncopyable { diff --git a/dbms/src/DataStreams/AsynchronousBlockInputStream.h b/dbms/src/DataStreams/AsynchronousBlockInputStream.h index 53e265fdddd..6cfa247ab44 100644 --- a/dbms/src/DataStreams/AsynchronousBlockInputStream.h +++ b/dbms/src/DataStreams/AsynchronousBlockInputStream.h @@ -5,8 +5,6 @@ #include #include #include -#include -#include namespace CurrentMetrics diff --git a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp index bde030d8afa..96ea9112e1d 100644 --- a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp +++ b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp @@ -1,7 +1,6 @@ #include #include #include -#include #include #include diff --git a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h index 44de41b2802..9fe322c3f43 100644 --- a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h +++ b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h @@ -8,8 +8,6 @@ #include -class MemoryTracker; - namespace DB { diff --git a/dbms/src/DataStreams/ParallelInputsProcessor.h b/dbms/src/DataStreams/ParallelInputsProcessor.h index b7402a45793..9c7a1fc6928 100644 --- a/dbms/src/DataStreams/ParallelInputsProcessor.h +++ b/dbms/src/DataStreams/ParallelInputsProcessor.h @@ -11,7 +11,6 @@ #include #include #include -#include #include #include diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index 41a8e4e318b..8cb9f2003e1 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp index afbc7855c8f..7f47a76a068 100644 --- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp +++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/dbms/src/Storages/MergeTree/MergeTreeReader.cpp b/dbms/src/Storages/MergeTree/MergeTreeReader.cpp index b226d55978e..0717bdac58c 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeReader.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeReader.cpp @@ -1,7 +1,6 @@ #include #include #include -#include #include #include #include diff --git a/dbms/src/Storages/MergeTree/MergeTreeReaderStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeReaderStream.cpp index 89f5aaeafd5..4107663f11b 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeReaderStream.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeReaderStream.cpp @@ -1,4 +1,3 @@ -#include #include #include From 460b58379a6955989ac25054e4d56169bc909b7a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 6 Apr 2019 03:36:04 +0300 Subject: [PATCH 100/102] Fixed bad whitespaces --- dbms/src/Storages/MergeTree/BackgroundProcessingPool.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h index ac7d231d966..748ba19032b 100644 --- a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h +++ b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h @@ -16,7 +16,6 @@ #include - namespace DB { @@ -29,6 +28,8 @@ enum class BackgroundProcessingPoolTaskResult ERROR, NOTHING_TO_DO, }; + + /** Using a fixed number of threads, perform an arbitrary number of tasks in an infinite loop. * In this case, one task can run simultaneously from different threads. * Designed for tasks that perform continuous background work (for example, merge). @@ -45,7 +46,6 @@ public: using TaskHandle = std::shared_ptr; - BackgroundProcessingPool(int size_); size_t getNumberOfThreads() const From 07334ebad4e97d4e36fa1bb7215e70a6f8a38c34 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 6 Apr 2019 04:09:15 +0300 Subject: [PATCH 101/102] Fixed race condition in DNSCacheUpdater --- dbms/src/Core/BackgroundSchedulePool.cpp | 25 ++-- dbms/src/Core/BackgroundSchedulePool.h | 157 ++++++++++++---------- dbms/src/Interpreters/DNSCacheUpdater.cpp | 37 ++--- dbms/src/Interpreters/DNSCacheUpdater.h | 16 +-- 4 files changed, 111 insertions(+), 124 deletions(-) diff --git a/dbms/src/Core/BackgroundSchedulePool.cpp b/dbms/src/Core/BackgroundSchedulePool.cpp index ce67c895234..ee63fdbadff 100644 --- a/dbms/src/Core/BackgroundSchedulePool.cpp +++ b/dbms/src/Core/BackgroundSchedulePool.cpp @@ -23,20 +23,21 @@ namespace DB class TaskNotification final : public Poco::Notification { public: - explicit TaskNotification(const BackgroundSchedulePool::TaskInfoPtr & task) : task(task) {} + explicit TaskNotification(const BackgroundSchedulePoolTaskInfoPtr & task) : task(task) {} void execute() { task->execute(); } private: - BackgroundSchedulePool::TaskInfoPtr task; + BackgroundSchedulePoolTaskInfoPtr task; }; -BackgroundSchedulePool::TaskInfo::TaskInfo(BackgroundSchedulePool & pool_, const std::string & log_name_, const TaskFunc & function_) - : pool(pool_) , log_name(log_name_) , function(function_) +BackgroundSchedulePoolTaskInfo::BackgroundSchedulePoolTaskInfo( + BackgroundSchedulePool & pool_, const std::string & log_name_, const BackgroundSchedulePool::TaskFunc & function_) + : pool(pool_), log_name(log_name_), function(function_) { } -bool BackgroundSchedulePool::TaskInfo::schedule() +bool BackgroundSchedulePoolTaskInfo::schedule() { std::lock_guard lock(schedule_mutex); @@ -47,7 +48,7 @@ bool BackgroundSchedulePool::TaskInfo::schedule() return true; } -bool BackgroundSchedulePool::TaskInfo::scheduleAfter(size_t ms) +bool BackgroundSchedulePoolTaskInfo::scheduleAfter(size_t ms) { std::lock_guard lock(schedule_mutex); @@ -58,7 +59,7 @@ bool BackgroundSchedulePool::TaskInfo::scheduleAfter(size_t ms) return true; } -void BackgroundSchedulePool::TaskInfo::deactivate() +void BackgroundSchedulePoolTaskInfo::deactivate() { std::lock_guard lock_exec(exec_mutex); std::lock_guard lock_schedule(schedule_mutex); @@ -73,13 +74,13 @@ void BackgroundSchedulePool::TaskInfo::deactivate() pool.cancelDelayedTask(shared_from_this(), lock_schedule); } -void BackgroundSchedulePool::TaskInfo::activate() +void BackgroundSchedulePoolTaskInfo::activate() { std::lock_guard lock(schedule_mutex); deactivated = false; } -bool BackgroundSchedulePool::TaskInfo::activateAndSchedule() +bool BackgroundSchedulePoolTaskInfo::activateAndSchedule() { std::lock_guard lock(schedule_mutex); @@ -91,7 +92,7 @@ bool BackgroundSchedulePool::TaskInfo::activateAndSchedule() return true; } -void BackgroundSchedulePool::TaskInfo::execute() +void BackgroundSchedulePoolTaskInfo::execute() { Stopwatch watch; CurrentMetrics::Increment metric_increment{CurrentMetrics::BackgroundSchedulePoolTask}; @@ -131,7 +132,7 @@ void BackgroundSchedulePool::TaskInfo::execute() } } -void BackgroundSchedulePool::TaskInfo::scheduleImpl(std::lock_guard & schedule_mutex_lock) +void BackgroundSchedulePoolTaskInfo::scheduleImpl(std::lock_guard & schedule_mutex_lock) { scheduled = true; @@ -145,7 +146,7 @@ void BackgroundSchedulePool::TaskInfo::scheduleImpl(std::lock_guard pool.queue.enqueueNotification(new TaskNotification(shared_from_this())); } -Coordination::WatchCallback BackgroundSchedulePool::TaskInfo::getWatchCallback() +Coordination::WatchCallback BackgroundSchedulePoolTaskInfo::getWatchCallback() { return [t = shared_from_this()](const Coordination::WatchResponse &) { diff --git a/dbms/src/Core/BackgroundSchedulePool.h b/dbms/src/Core/BackgroundSchedulePool.h index 11f2c5195e6..f2627366da7 100644 --- a/dbms/src/Core/BackgroundSchedulePool.h +++ b/dbms/src/Core/BackgroundSchedulePool.h @@ -20,6 +20,8 @@ namespace DB { class TaskNotification; +class BackgroundSchedulePoolTaskInfo; +class BackgroundSchedulePoolTaskHolder; /** Executes functions scheduled at a specific point in time. @@ -35,84 +37,14 @@ class TaskNotification; class BackgroundSchedulePool { public: - class TaskInfo; + friend class BackgroundSchedulePoolTaskInfo; + + using TaskInfo = BackgroundSchedulePoolTaskInfo; using TaskInfoPtr = std::shared_ptr; using TaskFunc = std::function; + using TaskHolder = BackgroundSchedulePoolTaskHolder; using DelayedTasks = std::multimap; - class TaskInfo : public std::enable_shared_from_this, private boost::noncopyable - { - public: - TaskInfo(BackgroundSchedulePool & pool_, const std::string & log_name_, const TaskFunc & function_); - - /// Schedule for execution as soon as possible (if not already scheduled). - /// If the task was already scheduled with delay, the delay will be ignored. - bool schedule(); - - /// Schedule for execution after specified delay. - bool scheduleAfter(size_t ms); - - /// Further attempts to schedule become no-op. Will wait till the end of the current execution of the task. - void deactivate(); - - void activate(); - - /// Atomically activate task and schedule it for execution. - bool activateAndSchedule(); - - /// get Coordination::WatchCallback needed for notifications from ZooKeeper watches. - Coordination::WatchCallback getWatchCallback(); - - private: - friend class TaskNotification; - friend class BackgroundSchedulePool; - - void execute(); - - void scheduleImpl(std::lock_guard & schedule_mutex_lock); - - BackgroundSchedulePool & pool; - std::string log_name; - TaskFunc function; - - std::mutex exec_mutex; - std::mutex schedule_mutex; - - /// Invariants: - /// * If deactivated is true then scheduled, delayed and executing are all false. - /// * scheduled and delayed cannot be true at the same time. - bool deactivated = false; - bool scheduled = false; - bool delayed = false; - bool executing = false; - - /// If the task is scheduled with delay, points to element of delayed_tasks. - DelayedTasks::iterator iterator; - }; - - class TaskHolder - { - public: - TaskHolder() = default; - explicit TaskHolder(const TaskInfoPtr & task_info_) : task_info(task_info_) {} - TaskHolder(const TaskHolder & other) = delete; - TaskHolder(TaskHolder && other) noexcept = default; - TaskHolder & operator=(const TaskHolder & other) noexcept = delete; - TaskHolder & operator=(TaskHolder && other) noexcept = default; - - ~TaskHolder() - { - if (task_info) - task_info->deactivate(); - } - - TaskInfo * operator->() { return task_info.get(); } - const TaskInfo * operator->() const { return task_info.get(); } - - private: - TaskInfoPtr task_info; - }; - TaskHolder createTask(const std::string & log_name, const TaskFunc & function); size_t getNumberOfThreads() const { return size; } @@ -153,4 +85,81 @@ private: void attachToThreadGroup(); }; + +class BackgroundSchedulePoolTaskInfo : public std::enable_shared_from_this, private boost::noncopyable +{ +public: + BackgroundSchedulePoolTaskInfo(BackgroundSchedulePool & pool_, const std::string & log_name_, const BackgroundSchedulePool::TaskFunc & function_); + + /// Schedule for execution as soon as possible (if not already scheduled). + /// If the task was already scheduled with delay, the delay will be ignored. + bool schedule(); + + /// Schedule for execution after specified delay. + bool scheduleAfter(size_t ms); + + /// Further attempts to schedule become no-op. Will wait till the end of the current execution of the task. + void deactivate(); + + void activate(); + + /// Atomically activate task and schedule it for execution. + bool activateAndSchedule(); + + /// get Coordination::WatchCallback needed for notifications from ZooKeeper watches. + Coordination::WatchCallback getWatchCallback(); + +private: + friend class TaskNotification; + friend class BackgroundSchedulePool; + + void execute(); + + void scheduleImpl(std::lock_guard & schedule_mutex_lock); + + BackgroundSchedulePool & pool; + std::string log_name; + BackgroundSchedulePool::TaskFunc function; + + std::mutex exec_mutex; + std::mutex schedule_mutex; + + /// Invariants: + /// * If deactivated is true then scheduled, delayed and executing are all false. + /// * scheduled and delayed cannot be true at the same time. + bool deactivated = false; + bool scheduled = false; + bool delayed = false; + bool executing = false; + + /// If the task is scheduled with delay, points to element of delayed_tasks. + BackgroundSchedulePool::DelayedTasks::iterator iterator; +}; + +using BackgroundSchedulePoolTaskInfoPtr = std::shared_ptr; + + +class BackgroundSchedulePoolTaskHolder +{ +public: + BackgroundSchedulePoolTaskHolder() = default; + explicit BackgroundSchedulePoolTaskHolder(const BackgroundSchedulePoolTaskInfoPtr & task_info_) : task_info(task_info_) {} + BackgroundSchedulePoolTaskHolder(const BackgroundSchedulePoolTaskHolder & other) = delete; + BackgroundSchedulePoolTaskHolder(BackgroundSchedulePoolTaskHolder && other) noexcept = default; + BackgroundSchedulePoolTaskHolder & operator=(const BackgroundSchedulePoolTaskHolder & other) noexcept = delete; + BackgroundSchedulePoolTaskHolder & operator=(BackgroundSchedulePoolTaskHolder && other) noexcept = default; + + ~BackgroundSchedulePoolTaskHolder() + { + if (task_info) + task_info->deactivate(); + } + + BackgroundSchedulePoolTaskInfo * operator->() { return task_info.get(); } + const BackgroundSchedulePoolTaskInfo * operator->() const { return task_info.get(); } + +private: + BackgroundSchedulePoolTaskInfoPtr task_info; +}; + } diff --git a/dbms/src/Interpreters/DNSCacheUpdater.cpp b/dbms/src/Interpreters/DNSCacheUpdater.cpp index 2a2d772ffb3..80ea1258f48 100644 --- a/dbms/src/Interpreters/DNSCacheUpdater.cpp +++ b/dbms/src/Interpreters/DNSCacheUpdater.cpp @@ -1,7 +1,7 @@ #include "DNSCacheUpdater.h" #include #include -#include +#include #include #include #include @@ -16,8 +16,6 @@ namespace ProfileEvents namespace DB { -using BackgroundProcessingPoolTaskInfo = BackgroundProcessingPool::TaskInfo; - namespace ErrorCodes { extern const int TIMEOUT_EXCEEDED; @@ -56,18 +54,15 @@ static bool isNetworkError() DNSCacheUpdater::DNSCacheUpdater(Context & context_) - : context(context_), pool(context_.getBackgroundPool()) + : context(context_), pool(context_.getSchedulePool()) { - task_handle = pool.addTask([this] () { return run(); }); + task_handle = pool.createTask("DNSCacheUpdater", [this]{ run(); }); } -BackgroundProcessingPoolTaskResult DNSCacheUpdater::run() +void DNSCacheUpdater::run() { - /// TODO: Ensusre that we get global counter (not thread local) auto num_current_network_exceptions = ProfileEvents::global_counters[ProfileEvents::NetworkErrors].load(std::memory_order_relaxed); - - if (num_current_network_exceptions >= last_num_network_erros + min_errors_to_update_cache - && time(nullptr) > last_update_time + min_update_period_seconds) + if (num_current_network_exceptions >= last_num_network_erros + min_errors_to_update_cache) { try { @@ -77,32 +72,18 @@ BackgroundProcessingPoolTaskResult DNSCacheUpdater::run() context.reloadClusterConfig(); last_num_network_erros = num_current_network_exceptions; - last_update_time = time(nullptr); - - return BackgroundProcessingPoolTaskResult::SUCCESS; + task_handle->scheduleAfter(min_update_period_seconds * 1000); + return; } catch (...) { - /// Do not increment ProfileEvents::NetworkErrors twice - if (isNetworkError()) - return BackgroundProcessingPoolTaskResult::ERROR; - - throw; + tryLogCurrentException(__PRETTY_FUNCTION__); } } - /// According to BackgroundProcessingPool logic, if task has done work, it could be executed again immediately. - return BackgroundProcessingPoolTaskResult::NOTHING_TO_DO; + task_handle->scheduleAfter(10 * 1000); } -DNSCacheUpdater::~DNSCacheUpdater() -{ - if (task_handle) - pool.removeTask(task_handle); - task_handle.reset(); -} - - bool DNSCacheUpdater::incrementNetworkErrorEventsIfNeeded() { if (isNetworkError()) diff --git a/dbms/src/Interpreters/DNSCacheUpdater.h b/dbms/src/Interpreters/DNSCacheUpdater.h index 885bcc143e3..6d34697c401 100644 --- a/dbms/src/Interpreters/DNSCacheUpdater.h +++ b/dbms/src/Interpreters/DNSCacheUpdater.h @@ -4,35 +4,31 @@ #include #include +#include + namespace DB { class Context; -class BackgroundProcessingPool; -class BackgroundProcessingPoolTaskInfo; -enum class BackgroundProcessingPoolTaskResult; - /// Add a task to BackgroundProcessingPool that watch for ProfileEvents::NetworkErrors and updates DNS cache if it has increased class DNSCacheUpdater { public: - explicit DNSCacheUpdater(Context & context); - ~DNSCacheUpdater(); /// Checks if it is a network error and increments ProfileEvents::NetworkErrors static bool incrementNetworkErrorEventsIfNeeded(); private: - BackgroundProcessingPoolTaskResult run(); + void run(); Context & context; - BackgroundProcessingPool & pool; - std::shared_ptr task_handle; + BackgroundSchedulePool & pool; + BackgroundSchedulePoolTaskHolder task_handle; + size_t last_num_network_erros = 0; - time_t last_update_time = 0; static constexpr size_t min_errors_to_update_cache = 3; static constexpr time_t min_update_period_seconds = 45; From 31daaa20956dfb16b72a201058fb80fc8c6d5854 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 6 Apr 2019 18:43:16 +0300 Subject: [PATCH 102/102] Add redis to runner image --- dbms/tests/integration/image/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/integration/image/Dockerfile b/dbms/tests/integration/image/Dockerfile index d36f9ef0e7b..4db05f74b93 100644 --- a/dbms/tests/integration/image/Dockerfile +++ b/dbms/tests/integration/image/Dockerfile @@ -25,7 +25,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes - ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone -RUN pip install pytest docker-compose==1.22.0 docker dicttoxml kazoo PyMySQL psycopg2 pymongo tzlocal kafka-python protobuf +RUN pip install pytest docker-compose==1.22.0 docker dicttoxml kazoo PyMySQL psycopg2==2.7.5 pymongo tzlocal kafka-python protobuf redis ENV DOCKER_CHANNEL stable ENV DOCKER_VERSION 17.09.1-ce