From c12cd35099213cb44fa78911b12a4405bbb28e01 Mon Sep 17 00:00:00 2001 From: Alexey Ilyukhov Date: Sat, 2 May 2020 09:51:10 +0300 Subject: [PATCH 001/120] Add point in polygon for non-const polygons --- src/Functions/pointInPolygon.cpp | 177 ++++++++++++++---- ..._point_in_polygon_non_const_poly.reference | 68 +++++++ .../00500_point_in_polygon_non_const_poly.sql | 86 +++++++++ 3 files changed, 297 insertions(+), 34 deletions(-) create mode 100644 tests/queries/0_stateless/00500_point_in_polygon_non_const_poly.reference create mode 100644 tests/queries/0_stateless/00500_point_in_polygon_non_const_poly.sql diff --git a/src/Functions/pointInPolygon.cpp b/src/Functions/pointInPolygon.cpp index 460c60d6e4c..42e6e0ffeeb 100644 --- a/src/Functions/pointInPolygon.cpp +++ b/src/Functions/pointInPolygon.cpp @@ -44,7 +44,7 @@ namespace { template -ColumnPtr callPointInPolygonImplWithPool(const IColumn & x, const IColumn & y, Polygon & polygon) +UInt8 callPointInPolygonImplWithPool(Float64 x, Float64 y, Polygon & polygon) { using Pool = ObjectPoolMap; /// C++11 has thread-safe function-local statics on most modern compilers. @@ -63,14 +63,14 @@ ColumnPtr callPointInPolygonImplWithPool(const IColumn & x, const IColumn & y, P std::string serialized_polygon = serialize(polygon); auto impl = known_polygons.get(serialized_polygon, factory); - return pointInPolygon(x, y, *impl); + return impl->contains(x, y); } template -ColumnPtr callPointInPolygonImpl(const IColumn & x, const IColumn & y, Polygon & polygon) +UInt8 callPointInPolygonImpl(Float64 x, Float64 y, Polygon & polygon) { PointInPolygonImpl impl(polygon); - return pointInPolygon(x, y, impl); + return impl.contains(x, y); } } @@ -116,74 +116,176 @@ public: throw Exception("Too few arguments", ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION); } - auto get_message_prefix = [this](size_t i) { return "Argument " + toString(i + 1) + " for function " + getName(); }; - - for (size_t i = 1; i < arguments.size(); ++i) - { - const auto * array = checkAndGetDataType(arguments[i].get()); - if (array == nullptr && i != 1) - throw Exception(get_message_prefix(i) + " must be array of tuples.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - const auto * tuple = checkAndGetDataType(array ? array->getNestedType().get() : arguments[i].get()); + auto validateTuple = [this](size_t i, const DataTypeTuple * tuple) { if (tuple == nullptr) - throw Exception(get_message_prefix(i) + " must contains tuple.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(getMessagePrefix(i) + " must contain a tuple", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); const DataTypes & elements = tuple->getElements(); if (elements.size() != 2) - throw Exception(get_message_prefix(i) + " must have exactly two elements.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(getMessagePrefix(i) + " must have exactly two elements", ErrorCodes::BAD_ARGUMENTS); for (auto j : ext::range(0, elements.size())) { if (!isNativeNumber(elements[j])) { - throw Exception(get_message_prefix(i) + " must contains numeric tuple at position " + toString(j + 1), + throw Exception(getMessagePrefix(i) + " must contain numeric tuple at position " + toString(j + 1), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } } + }; + + validateTuple(0, checkAndGetDataType(arguments[0].get())); + + if (arguments.size() == 2) { + auto * array = checkAndGetDataType(arguments[1].get()); + if (array == nullptr) + throw Exception(getMessagePrefix(1) + " must contain an array of tuples or an array of arrays of tuples.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + auto * nested_array = checkAndGetDataType(array->getNestedType().get()); + if (nested_array != nullptr) { + array = nested_array; + } + + validateTuple(1, checkAndGetDataType(array->getNestedType().get())); + } else { + for (size_t i = 1; i < arguments.size(); i++) { + auto * array = checkAndGetDataType(arguments[1].get()); + if (array == nullptr) + throw Exception(getMessagePrefix(i) + " must contain an array of tuples", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + validateTuple(i, checkAndGetDataType(array->getNestedType().get())); + } } return std::make_shared(); } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { const IColumn * point_col = block.getByPosition(arguments[0]).column.get(); const auto * const_tuple_col = checkAndGetColumn(point_col); if (const_tuple_col) point_col = &const_tuple_col->getDataColumn(); - const auto * tuple_col = checkAndGetColumn(point_col); + const auto * tuple_col = checkAndGetColumn(point_col); if (!tuple_col) throw Exception("First argument for function " + getName() + " must be constant array of tuples.", ErrorCodes::ILLEGAL_COLUMN); - auto & result_column = block.safeGetByPosition(result).column; - const auto & tuple_columns = tuple_col->getColumns(); - result_column = executeForType(*tuple_columns[0], *tuple_columns[1], block, arguments); - if (const_tuple_col) + const IColumn * poly_col = block.getByPosition(arguments[1]).column.get(); + const auto * const_poly_col = checkAndGetColumn(poly_col); + + bool point_is_const = const_tuple_col != nullptr; + bool poly_is_const = const_poly_col != nullptr; + + auto call_impl = use_object_pool + ? callPointInPolygonImplWithPool + : callPointInPolygonImpl; + + size_t size = point_is_const && poly_is_const ? 1 : input_rows_count; + auto execution_result = ColumnVector::create(size); + auto & data = execution_result->getData(); + + Polygon polygon; + for (auto i : ext::range(0, size)) { + if (!poly_is_const || i == 0) { + polygon = parsePolygon(block, arguments, i); + } + + size_t point_index = point_is_const ? 0 : i; + data[i] = call_impl(tuple_columns[0]->getFloat64(point_index), tuple_columns[1]->getFloat64(point_index), polygon); + } + + auto & result_column = block.safeGetByPosition(result).column; + result_column = std::move(execution_result); + if (point_is_const && poly_is_const) result_column = ColumnConst::create(result_column, const_tuple_col->size()); } + private: bool validate; - ColumnPtr executeForType(const IColumn & x, const IColumn & y, Block & block, const ColumnNumbers & arguments) + std::string getMessagePrefix(size_t i) const + { + return "Argument " + toString(i + 1) + " for function " + getName(); + } + + Polygon parsePolygonFromSingleColumn(Block & block, const ColumnNumbers & arguments, size_t i) const + { + const auto & poly = block.getByPosition(arguments[1]).column.get(); + const auto * column_const = checkAndGetColumn(poly); + const auto * array_col = + column_const ? checkAndGetColumn(column_const->getDataColumn()) : checkAndGetColumn(poly); + + if (!array_col) + throw Exception(getMessagePrefix(1) + " must contain an array of tuples or an array of arrays of tuples", + ErrorCodes::ILLEGAL_COLUMN); + + const auto * nested_array_col = checkAndGetColumn(array_col->getData()); + const auto & tuple_data = nested_array_col ? nested_array_col->getData() : array_col->getData(); + const auto & tuple_col = checkAndGetColumn(tuple_data); + if (!tuple_col) + throw Exception(getMessagePrefix(1) + " must contain an array of tuples or an array of arrays of tuples", + ErrorCodes::ILLEGAL_COLUMN); + + const auto & tuple_columns = tuple_col->getColumns(); + const auto & x_column = tuple_columns[0]; + const auto & y_column = tuple_columns[1]; + + auto parse_polygon_part = [&x_column, &y_column](auto & container, size_t l, size_t r) { + for (auto j : ext::range(l, r)) + { + CoordinateType x_coord = x_column->getFloat64(j); + CoordinateType y_coord = y_column->getFloat64(j); + + container.push_back(Point(x_coord, y_coord)); + } + }; + + Polygon polygon; + if (nested_array_col) { + for (auto j : ext::range(array_col->getOffsets()[i - 1], array_col->getOffsets()[i])) { + size_t l = nested_array_col->getOffsets()[j - 1]; + size_t r = nested_array_col->getOffsets()[j]; + if (polygon.outer().empty()) { + parse_polygon_part(polygon.outer(), l, r); + } else { + polygon.inners().emplace_back(); + parse_polygon_part(polygon.inners().back(), l, r); + } + } + } else { + size_t l = array_col->getOffsets()[i - 1]; + size_t r = array_col->getOffsets()[i]; + + parse_polygon_part(polygon.outer(), l, r); + } + + return polygon; + } + + Polygon parsePolygonFromMultipleColumns(Block & block, const ColumnNumbers & arguments, size_t) const { Polygon polygon; - auto get_message_prefix = [this](size_t i) { return "Argument " + toString(i + 1) + " for function " + getName(); }; - for (size_t i = 1; i < arguments.size(); ++i) { const auto * const_col = checkAndGetColumn(block.getByPosition(arguments[i]).column.get()); - const auto * array_col = const_col ? checkAndGetColumn(&const_col->getDataColumn()) : nullptr; + if (!const_col) + throw Exception("Multi-argument version of function " + getName() + " works only with const polygon", + ErrorCodes::BAD_ARGUMENTS); + + const auto * array_col = checkAndGetColumn(&const_col->getDataColumn()); const auto * tuple_col = array_col ? checkAndGetColumn(&array_col->getData()) : nullptr; if (!tuple_col) - throw Exception(get_message_prefix(i) + " must be constant array of tuples.", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(getMessagePrefix(i) + " must be constant array of tuples", ErrorCodes::ILLEGAL_COLUMN); const auto & tuple_columns = tuple_col->getColumns(); const auto & column_x = tuple_columns[0]; @@ -197,7 +299,7 @@ private: auto size = column_x->size(); if (size == 0) - throw Exception(get_message_prefix(i) + " shouldn't be empty.", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(getMessagePrefix(i) + " shouldn't be empty.", ErrorCodes::ILLEGAL_COLUMN); for (auto j : ext::range(0, size)) { @@ -207,6 +309,18 @@ private: } } + return polygon; + } + + Polygon parsePolygon(Block & block, const ColumnNumbers & arguments, size_t i) const + { + Polygon polygon; + if (arguments.size() == 2) { + polygon = parsePolygonFromSingleColumn(block, arguments, i); + } else { + polygon = parsePolygonFromMultipleColumns(block, arguments, i); + } + boost::geometry::correct(polygon); #if !defined(__clang_analyzer__) /// It does not like boost. @@ -218,12 +332,7 @@ private: throw Exception("Polygon is not valid: " + failure_message, ErrorCodes::BAD_ARGUMENTS); } #endif - - auto call_impl = use_object_pool - ? callPointInPolygonImplWithPool - : callPointInPolygonImpl; - - return call_impl(x, y, polygon); + return polygon; } }; diff --git a/tests/queries/0_stateless/00500_point_in_polygon_non_const_poly.reference b/tests/queries/0_stateless/00500_point_in_polygon_non_const_poly.reference new file mode 100644 index 00000000000..083a7ac7236 --- /dev/null +++ b/tests/queries/0_stateless/00500_point_in_polygon_non_const_poly.reference @@ -0,0 +1,68 @@ +Const point; No holes +0 +0 +0 +0 +0 +1 +1 +1 +1 +1 +1 +0 +1 +0 +1 +0 +Non-const point; No holes +0 +0 +0 +0 +0 +1 +1 +1 +1 +1 +1 +0 +1 +0 +1 +0 +Const point; With holes +0 +0 +0 +0 +0 +1 +1 +0 +0 +1 +1 +0 +1 +0 +1 +0 +Non-const point; With holes +0 +0 +0 +0 +0 +1 +1 +0 +0 +1 +1 +0 +1 +0 +1 +0 diff --git a/tests/queries/0_stateless/00500_point_in_polygon_non_const_poly.sql b/tests/queries/0_stateless/00500_point_in_polygon_non_const_poly.sql new file mode 100644 index 00000000000..f38066debbf --- /dev/null +++ b/tests/queries/0_stateless/00500_point_in_polygon_non_const_poly.sql @@ -0,0 +1,86 @@ +DROP TABLE IF EXISTS polygons; + +SELECT 'Const point; No holes'; +create table polygons ( id Int32, poly Array(Tuple(Int32, Int32))) engine = Log(); + +INSERT INTO polygons VALUES (1, [(0, 0), (10, 0), (10, 10), (0, 10)]); +INSERT INTO polygons VALUES (2, [(-5, -5), (5, -5), (5, 5), (-5, 5)]); + +SELECT pointInPolygon((-10, 0), poly) FROM polygons ORDER BY id; +SELECT pointInPolygon((0, -10), poly) FROM polygons ORDER BY id; +SELECT pointInPolygon((-5, -5), poly) FROM polygons ORDER BY id; +SELECT pointInPolygon((0, 0), poly) FROM polygons ORDER BY id; +SELECT pointInPolygon((5, 5), poly) FROM polygons ORDER BY id; +SELECT pointInPolygon((10, 10), poly) FROM polygons ORDER BY id; +SELECT pointInPolygon((10, 5), poly) FROM polygons ORDER BY id; +SELECT pointInPolygon((5, 10), poly) FROM polygons ORDER BY id; + +DROP TABLE polygons; + +SELECT 'Non-const point; No holes'; + +create table polygons ( id Int32, pt Tuple(Int32, Int32), poly Array(Tuple(Int32, Int32))) engine = Log(); + +INSERT INTO polygons VALUES (1, (-10, 0), [(0, 0), (10, 0), (10, 10), (0, 10)]); +INSERT INTO polygons VALUES (2, (-10, 0), [(-5, -5), (5, -5), (5, 5), (-5, 5)]); +INSERT INTO polygons VALUES (3, (0, -10), [(0, 0), (10, 0), (10, 10), (0, 10)]); +INSERT INTO polygons VALUES (4, (0, -10), [(-5, -5), (5, -5), (5, 5), (-5, 5)]); +INSERT INTO polygons VALUES (5, (-5, -5), [(0, 0), (10, 0), (10, 10), (0, 10)]); +INSERT INTO polygons VALUES (6, (-5, -5), [(-5, -5), (5, -5), (5, 5), (-5, 5)]); +INSERT INTO polygons VALUES (7, (0, 0), [(0, 0), (10, 0), (10, 10), (0, 10)]); +INSERT INTO polygons VALUES (8, (0, 0), [(-5, -5), (5, -5), (5, 5), (-5, 5)]); +INSERT INTO polygons VALUES (9, (5, 5), [(0, 0), (10, 0), (10, 10), (0, 10)]); +INSERT INTO polygons VALUES (10, (5, 5), [(-5, -5), (5, -5), (5, 5), (-5, 5)]); +INSERT INTO polygons VALUES (11, (10, 10), [(0, 0), (10, 0), (10, 10), (0, 10)]); +INSERT INTO polygons VALUES (12, (10, 10), [(-5, -5), (5, -5), (5, 5), (-5, 5)]); +INSERT INTO polygons VALUES (13, (10, 5), [(0, 0), (10, 0), (10, 10), (0, 10)]); +INSERT INTO polygons VALUES (14, (10, 5), [(-5, -5), (5, -5), (5, 5), (-5, 5)]); +INSERT INTO polygons VALUES (15, (5, 10), [(0, 0), (10, 0), (10, 10), (0, 10)]); +INSERT INTO polygons VALUES (16, (5, 10), [(-5, -5), (5, -5), (5, 5), (-5, 5)]); + +SELECT pointInPolygon(pt, poly) FROM polygons ORDER BY id; + +DROP TABLE polygons; + +SELECT 'Const point; With holes'; + +create table polygons ( id Int32, poly Array(Array(Tuple(Int32, Int32)))) engine = Log(); + +INSERT INTO polygons VALUES (1, [[(0, 0), (10, 0), (10, 10), (0, 10)], [(4, 4), (6, 4), (6, 6), (4, 6)]]); +INSERT INTO polygons VALUES (2, [[(-5, -5), (5, -5), (5, 5), (-5, 5)], [(-1, -1), (1, -1), (1, 1), (-1, 1)]]); + +SELECT pointInPolygon((-10, 0), poly) FROM polygons ORDER BY id; +SELECT pointInPolygon((0, -10), poly) FROM polygons ORDER BY id; +SELECT pointInPolygon((-5, -5), poly) FROM polygons ORDER BY id; +SELECT pointInPolygon((0, 0), poly) FROM polygons ORDER BY id; +SELECT pointInPolygon((5, 5), poly) FROM polygons ORDER BY id; +SELECT pointInPolygon((10, 10), poly) FROM polygons ORDER BY id; +SELECT pointInPolygon((10, 5), poly) FROM polygons ORDER BY id; +SELECT pointInPolygon((5, 10), poly) FROM polygons ORDER BY id; + +DROP TABLE polygons; + +SELECT 'Non-const point; With holes'; + +create table polygons ( id Int32, pt Tuple(Int32, Int32), poly Array(Array(Tuple(Int32, Int32)))) engine = Log(); + +INSERT INTO polygons VALUES (1, (-10, 0), [[(0, 0), (10, 0), (10, 10), (0, 10)], [(4, 4), (6, 4), (6, 6), (4, 6)]]); +INSERT INTO polygons VALUES (2, (-10, 0), [[(-5, -5), (5, -5), (5, 5), (-5, 5)], [(-1, -1), (1, -1), (1, 1), (-1, 1)]]); +INSERT INTO polygons VALUES (3, (0, -10), [[(0, 0), (10, 0), (10, 10), (0, 10)], [(4, 4), (6, 4), (6, 6), (4, 6)]]); +INSERT INTO polygons VALUES (4, (0, -10), [[(-5, -5), (5, -5), (5, 5), (-5, 5)], [(-1, -1), (1, -1), (1, 1), (-1, 1)]]); +INSERT INTO polygons VALUES (5, (-5, -5), [[(0, 0), (10, 0), (10, 10), (0, 10)], [(4, 4), (6, 4), (6, 6), (4, 6)]]); +INSERT INTO polygons VALUES (6, (-5, -5), [[(-5, -5), (5, -5), (5, 5), (-5, 5)], [(-1, -1), (1, -1), (1, 1), (-1, 1)]]); +INSERT INTO polygons VALUES (7, (0, 0), [[(0, 0), (10, 0), (10, 10), (0, 10)], [(4, 4), (6, 4), (6, 6), (4, 6)]]); +INSERT INTO polygons VALUES (8, (0, 0), [[(-5, -5), (5, -5), (5, 5), (-5, 5)], [(-1, -1), (1, -1), (1, 1), (-1, 1)]]); +INSERT INTO polygons VALUES (9, (5, 5), [[(0, 0), (10, 0), (10, 10), (0, 10)], [(4, 4), (6, 4), (6, 6), (4, 6)]]); +INSERT INTO polygons VALUES (10, (5, 5), [[(-5, -5), (5, -5), (5, 5), (-5, 5)], [(-1, -1), (1, -1), (1, 1), (-1, 1)]]); +INSERT INTO polygons VALUES (11, (10, 10), [[(0, 0), (10, 0), (10, 10), (0, 10)], [(4, 4), (6, 4), (6, 6), (4, 6)]]); +INSERT INTO polygons VALUES (12, (10, 10), [[(-5, -5), (5, -5), (5, 5), (-5, 5)], [(-1, -1), (1, -1), (1, 1), (-1, 1)]]); +INSERT INTO polygons VALUES (13, (10, 5), [[(0, 0), (10, 0), (10, 10), (0, 10)], [(4, 4), (6, 4), (6, 6), (4, 6)]]); +INSERT INTO polygons VALUES (14, (10, 5), [[(-5, -5), (5, -5), (5, 5), (-5, 5)], [(-1, -1), (1, -1), (1, 1), (-1, 1)]]); +INSERT INTO polygons VALUES (15, (5, 10), [[(0, 0), (10, 0), (10, 10), (0, 10)], [(4, 4), (6, 4), (6, 6), (4, 6)]]); +INSERT INTO polygons VALUES (16, (5, 10), [[(-5, -5), (5, -5), (5, 5), (-5, 5)], [(-1, -1), (1, -1), (1, 1), (-1, 1)]]); + +SELECT pointInPolygon(pt, poly) FROM polygons ORDER BY id; + +DROP TABLE polygons; \ No newline at end of file From a03da4eb34fb089aaece2cfe0f10d928ab6a59dd Mon Sep 17 00:00:00 2001 From: Alexey Ilyukhov Date: Sat, 2 May 2020 19:48:36 +0300 Subject: [PATCH 002/120] Fix style and array index --- src/Functions/pointInPolygon.cpp | 57 +++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/src/Functions/pointInPolygon.cpp b/src/Functions/pointInPolygon.cpp index 42e6e0ffeeb..f1761c49485 100644 --- a/src/Functions/pointInPolygon.cpp +++ b/src/Functions/pointInPolygon.cpp @@ -116,7 +116,8 @@ public: throw Exception("Too few arguments", ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION); } - auto validateTuple = [this](size_t i, const DataTypeTuple * tuple) { + auto validate_tuple = [this](size_t i, const DataTypeTuple * tuple) + { if (tuple == nullptr) throw Exception(getMessagePrefix(i) + " must contain a tuple", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); @@ -135,28 +136,33 @@ public: } }; - validateTuple(0, checkAndGetDataType(arguments[0].get())); + validate_tuple(0, checkAndGetDataType(arguments[0].get())); - if (arguments.size() == 2) { + if (arguments.size() == 2) + { auto * array = checkAndGetDataType(arguments[1].get()); if (array == nullptr) throw Exception(getMessagePrefix(1) + " must contain an array of tuples or an array of arrays of tuples.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); auto * nested_array = checkAndGetDataType(array->getNestedType().get()); - if (nested_array != nullptr) { + if (nested_array != nullptr) + { array = nested_array; } - validateTuple(1, checkAndGetDataType(array->getNestedType().get())); - } else { - for (size_t i = 1; i < arguments.size(); i++) { - auto * array = checkAndGetDataType(arguments[1].get()); + validate_tuple(1, checkAndGetDataType(array->getNestedType().get())); + } + else + { + for (size_t i = 1; i < arguments.size(); i++) + { + auto * array = checkAndGetDataType(arguments[i].get()); if (array == nullptr) throw Exception(getMessagePrefix(i) + " must contain an array of tuples", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - validateTuple(i, checkAndGetDataType(array->getNestedType().get())); + validate_tuple(i, checkAndGetDataType(array->getNestedType().get())); } } @@ -192,8 +198,10 @@ public: auto & data = execution_result->getData(); Polygon polygon; - for (auto i : ext::range(0, size)) { - if (!poly_is_const || i == 0) { + for (auto i : ext::range(0, size)) + { + if (!poly_is_const || i == 0) + { polygon = parsePolygon(block, arguments, i); } @@ -238,7 +246,8 @@ private: const auto & x_column = tuple_columns[0]; const auto & y_column = tuple_columns[1]; - auto parse_polygon_part = [&x_column, &y_column](auto & container, size_t l, size_t r) { + auto parse_polygon_part = [&x_column, &y_column](auto & container, size_t l, size_t r) + { for (auto j : ext::range(l, r)) { CoordinateType x_coord = x_column->getFloat64(j); @@ -249,18 +258,25 @@ private: }; Polygon polygon; - if (nested_array_col) { - for (auto j : ext::range(array_col->getOffsets()[i - 1], array_col->getOffsets()[i])) { + if (nested_array_col) + { + for (auto j : ext::range(array_col->getOffsets()[i - 1], array_col->getOffsets()[i])) + { size_t l = nested_array_col->getOffsets()[j - 1]; size_t r = nested_array_col->getOffsets()[j]; - if (polygon.outer().empty()) { + if (polygon.outer().empty()) + { parse_polygon_part(polygon.outer(), l, r); - } else { + } + else + { polygon.inners().emplace_back(); parse_polygon_part(polygon.inners().back(), l, r); } } - } else { + } + else + { size_t l = array_col->getOffsets()[i - 1]; size_t r = array_col->getOffsets()[i]; @@ -315,9 +331,12 @@ private: Polygon parsePolygon(Block & block, const ColumnNumbers & arguments, size_t i) const { Polygon polygon; - if (arguments.size() == 2) { + if (arguments.size() == 2) + { polygon = parsePolygonFromSingleColumn(block, arguments, i); - } else { + } + else + { polygon = parsePolygonFromMultipleColumns(block, arguments, i); } From 6a9355c95dddecf5f996e61e76f624f21095e88b Mon Sep 17 00:00:00 2001 From: Alexey Ilyukhov Date: Tue, 5 May 2020 13:34:58 +0300 Subject: [PATCH 003/120] Do not use grid for non const --- src/Functions/pointInPolygon.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Functions/pointInPolygon.cpp b/src/Functions/pointInPolygon.cpp index f1761c49485..b46b385f738 100644 --- a/src/Functions/pointInPolygon.cpp +++ b/src/Functions/pointInPolygon.cpp @@ -75,7 +75,7 @@ UInt8 callPointInPolygonImpl(Float64 x, Float64 y, Polygon & polygon) } -template +template class FunctionPointInPolygon : public IFunction { public: @@ -91,7 +91,8 @@ public: static FunctionPtr create(const Context & context) { - return std::make_shared>(context.getSettingsRef().validate_polygons); + return std::make_shared>( + context.getSettingsRef().validate_polygons); } String getName() const override @@ -189,9 +190,9 @@ public: bool point_is_const = const_tuple_col != nullptr; bool poly_is_const = const_poly_col != nullptr; - auto call_impl = use_object_pool - ? callPointInPolygonImplWithPool - : callPointInPolygonImpl; + auto call_impl = poly_is_const + ? callPointInPolygonImplWithPool + : callPointInPolygonImpl; size_t size = point_is_const && poly_is_const ? 1 : input_rows_count; auto execution_result = ColumnVector::create(size); @@ -358,7 +359,7 @@ private: void registerFunctionPointInPolygon(FunctionFactory & factory) { - factory.registerFunction, true>>(); + factory.registerFunction, PointInPolygonTrivial>>(); } } From 26424ec3884685b244654ae83327828c0f8504af Mon Sep 17 00:00:00 2001 From: DoomzD Date: Tue, 5 May 2020 17:46:23 +0300 Subject: [PATCH 004/120] Add perftest for pointInPolygon --- tests/performance/point_in_polygon.xml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 tests/performance/point_in_polygon.xml diff --git a/tests/performance/point_in_polygon.xml b/tests/performance/point_in_polygon.xml new file mode 100644 index 00000000000..d854fb6952b --- /dev/null +++ b/tests/performance/point_in_polygon.xml @@ -0,0 +1,6 @@ + + CREATE TABLE point_in_polygon(`polygon` Array(Array(Float64, Float64))) ENGINE = Log() + insert into point_in_polygon SELECT arrayJoin(arrayMap(y -> [arrayMap(x -> (cos(x / 90. * pi()) * y, sin(x / 90. * pi()) * y), range(180))], arraySlice(range(35000), 2, 35000))) + SELECT pointInPolygon((100, 100), `polygon`) from point_in_polygon + DROP TABLE IF EXISTS point_in_polygon + From ad2630613180db23f3b361491f7d45d6200ba78a Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 10 May 2020 03:09:51 +0300 Subject: [PATCH 005/120] Fix clang-tidy. --- src/Functions/pointInPolygon.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Functions/pointInPolygon.cpp b/src/Functions/pointInPolygon.cpp index b46b385f738..dcd94af03ee 100644 --- a/src/Functions/pointInPolygon.cpp +++ b/src/Functions/pointInPolygon.cpp @@ -141,12 +141,12 @@ public: if (arguments.size() == 2) { - auto * array = checkAndGetDataType(arguments[1].get()); + const auto * array = checkAndGetDataType(arguments[1].get()); if (array == nullptr) throw Exception(getMessagePrefix(1) + " must contain an array of tuples or an array of arrays of tuples.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - auto * nested_array = checkAndGetDataType(array->getNestedType().get()); + const auto * nested_array = checkAndGetDataType(array->getNestedType().get()); if (nested_array != nullptr) { array = nested_array; @@ -158,7 +158,7 @@ public: { for (size_t i = 1; i < arguments.size(); i++) { - auto * array = checkAndGetDataType(arguments[i].get()); + const auto * array = checkAndGetDataType(arguments[i].get()); if (array == nullptr) throw Exception(getMessagePrefix(i) + " must contain an array of tuples", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); From 99d50063f388038b42416d11dec1616f66fc9923 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Thu, 14 May 2020 14:36:19 +0300 Subject: [PATCH 006/120] Support MongoDB URI --- src/Dictionaries/MongoDBDictionarySource.cpp | 76 ++++++++++--------- src/Dictionaries/MongoDBDictionarySource.h | 10 +-- .../external_sources.py | 15 ++++ .../test.py | 3 +- 4 files changed, 60 insertions(+), 44 deletions(-) diff --git a/src/Dictionaries/MongoDBDictionarySource.cpp b/src/Dictionaries/MongoDBDictionarySource.cpp index 2e423b7d511..07f0b20e908 100644 --- a/src/Dictionaries/MongoDBDictionarySource.cpp +++ b/src/Dictionaries/MongoDBDictionarySource.cpp @@ -8,15 +8,27 @@ namespace DB void registerDictionarySourceMongoDB(DictionarySourceFactory & factory) { - auto create_table_source = [=](const DictionaryStructure & dict_struct, - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, - Block & sample_block, - const Context & /* context */, - bool /* check_config */) -> DictionarySourcePtr { - return std::make_unique(dict_struct, config, config_prefix + ".mongodb", sample_block); + auto createMongoDBDictionary = []( + const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + Block & sample_block, + const Context &, + bool /* check_config */) + { + return std::make_unique(dict_struct, + config.getString(config_prefix + ".uri"), + config.getString(config_prefix + ".host"), + config.getUInt(config_prefix + ".port"), + config.getString(config_prefix + ".user", ""), + config.getString(config_prefix + ".password", ""), + config.getString(config_prefix + ".method", ""), + config.getString(config_prefix + ".db", ""), + config.getString(config_prefix + ".collection"), + sample_block); }; - factory.registerSource("mongodb", create_table_source); + + factory.registerSource("mongodb", createMongoDBDictionary); } } @@ -155,6 +167,7 @@ authenticate(Poco::MongoDB::Connection & connection, const std::string & databas MongoDBDictionarySource::MongoDBDictionarySource( const DictionaryStructure & dict_struct_, + const std::string & uri_, const std::string & host_, UInt16 port_, const std::string & user_, @@ -164,6 +177,7 @@ MongoDBDictionarySource::MongoDBDictionarySource( const std::string & collection_, const Block & sample_block_) : dict_struct{dict_struct_} + , uri{uri_} , host{host_} , port{port_} , user{user_} @@ -174,41 +188,31 @@ MongoDBDictionarySource::MongoDBDictionarySource( , sample_block{sample_block_} , connection{std::make_shared(host, port)} { - if (!user.empty()) + if (!uri.empty()) { -#if POCO_VERSION >= 0x01070800 - Poco::MongoDB::Database poco_db(db); - if (!poco_db.authenticate(*connection, user, password, method.empty() ? Poco::MongoDB::Database::AUTH_SCRAM_SHA1 : method)) - throw Exception("Cannot authenticate in MongoDB, incorrect user or password", ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); -#else - authenticate(*connection, db, user, password); -#endif + Poco::MongoDB::Connection::SocketFactory socket_factory; + connection->connect(uri, socket_factory); + } + else + { + connection->connect(host, port); + if (!user.empty()) + { +#if POCO_VERSION >= 0x01070800 + Poco::MongoDB::Database poco_db(db); + if (!poco_db.authenticate(*connection, user, password, method.empty() ? Poco::MongoDB::Database::AUTH_SCRAM_SHA1 : method)) + throw Exception("Cannot authenticate in MongoDB, incorrect user or password", ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); +#else + authenticate(*connection, db, user, password); +#endif + } } -} - - -MongoDBDictionarySource::MongoDBDictionarySource( - const DictionaryStructure & dict_struct_, - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, - Block & sample_block_) - : MongoDBDictionarySource( - dict_struct_, - config.getString(config_prefix + ".host"), - config.getUInt(config_prefix + ".port"), - config.getString(config_prefix + ".user", ""), - config.getString(config_prefix + ".password", ""), - config.getString(config_prefix + ".method", ""), - config.getString(config_prefix + ".db", ""), - config.getString(config_prefix + ".collection"), - sample_block_) -{ } MongoDBDictionarySource::MongoDBDictionarySource(const MongoDBDictionarySource & other) : MongoDBDictionarySource{ - other.dict_struct, other.host, other.port, other.user, other.password, other.method, other.db, other.collection, other.sample_block} + other.dict_struct, other.uri, other.host, other.port, other.user, other.password, other.method, other.db, other.collection, other.sample_block} { } diff --git a/src/Dictionaries/MongoDBDictionarySource.h b/src/Dictionaries/MongoDBDictionarySource.h index 23562c75500..d90f28e1e74 100644 --- a/src/Dictionaries/MongoDBDictionarySource.h +++ b/src/Dictionaries/MongoDBDictionarySource.h @@ -29,8 +29,10 @@ namespace ErrorCodes /// Allows loading dictionaries from a MongoDB collection class MongoDBDictionarySource final : public IDictionarySource { +public: MongoDBDictionarySource( const DictionaryStructure & dict_struct_, + const std::string & uri_, const std::string & host_, UInt16 port_, const std::string & user_, @@ -40,13 +42,6 @@ class MongoDBDictionarySource final : public IDictionarySource const std::string & collection_, const Block & sample_block_); -public: - MongoDBDictionarySource( - const DictionaryStructure & dict_struct, - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, - Block & sample_block); - MongoDBDictionarySource(const MongoDBDictionarySource & other); ~MongoDBDictionarySource() override; @@ -76,6 +71,7 @@ public: private: const DictionaryStructure dict_struct; + const std::string uri; const std::string host; const UInt16 port; const std::string user; diff --git a/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py b/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py index d4879232172..7d1ded04bdc 100644 --- a/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py +++ b/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py @@ -178,6 +178,21 @@ class SourceMongo(ExternalSource): result = tbl.insert_many(to_insert) +class SourceMongoURI(SourceMongo): + def get_source_str(self, table_name): + return ''' + + mongodb://{user}:{password}@{host}:{port}/test + {tbl} + + '''.format( + host=self.docker_hostname, + port=self.docker_port, + user=self.user, + password=self.password, + tbl=table_name, + ) + class SourceClickHouse(ExternalSource): def get_source_str(self, table_name): diff --git a/tests/integration/test_dictionaries_all_layouts_and_sources/test.py b/tests/integration/test_dictionaries_all_layouts_and_sources/test.py index cc899ffd0cc..36034bab357 100644 --- a/tests/integration/test_dictionaries_all_layouts_and_sources/test.py +++ b/tests/integration/test_dictionaries_all_layouts_and_sources/test.py @@ -4,7 +4,7 @@ import os from helpers.cluster import ClickHouseCluster from dictionary import Field, Row, Dictionary, DictionaryStructure, Layout from external_sources import SourceMySQL, SourceClickHouse, SourceFile, SourceExecutableCache, SourceExecutableHashed -from external_sources import SourceMongo, SourceHTTP, SourceHTTPS, SourceRedis +from external_sources import SourceMongo, SourceMongoURI, SourceHTTP, SourceHTTPS, SourceRedis import math SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -117,6 +117,7 @@ LAYOUTS = [ SOURCES = [ SourceMongo("MongoDB", "localhost", "27018", "mongo1", "27017", "root", "clickhouse"), + SourceMongoURI("MongoDB", "localhost", "27018", "mongo1", "27017", "root", "clickhouse"), SourceMySQL("MySQL", "localhost", "3308", "mysql1", "3306", "root", "clickhouse"), SourceClickHouse("RemoteClickHouse", "localhost", "9000", "clickhouse1", "9000", "default", ""), SourceClickHouse("LocalClickHouse", "localhost", "9000", "node", "9000", "default", ""), From 8e532a5d1f9504fbf215c72d4da584ef149b45c2 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 14 May 2020 21:17:15 +0300 Subject: [PATCH 007/120] Update MongoDBDictionarySource.cpp --- src/Dictionaries/MongoDBDictionarySource.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Dictionaries/MongoDBDictionarySource.cpp b/src/Dictionaries/MongoDBDictionarySource.cpp index 07f0b20e908..0828c9ff51b 100644 --- a/src/Dictionaries/MongoDBDictionarySource.cpp +++ b/src/Dictionaries/MongoDBDictionarySource.cpp @@ -17,9 +17,9 @@ void registerDictionarySourceMongoDB(DictionarySourceFactory & factory) bool /* check_config */) { return std::make_unique(dict_struct, - config.getString(config_prefix + ".uri"), - config.getString(config_prefix + ".host"), - config.getUInt(config_prefix + ".port"), + config.getString(config_prefix + ".uri", ""), + config.getString(config_prefix + ".host", ""), + config.getUInt(config_prefix + ".port", 0), config.getString(config_prefix + ".user", ""), config.getString(config_prefix + ".password", ""), config.getString(config_prefix + ".method", ""), From a11c9a3a228a8efb60a2f1c35f1ec3f5d73fdd8d Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 14 May 2020 21:18:37 +0300 Subject: [PATCH 008/120] Update MongoDBDictionarySource.cpp --- src/Dictionaries/MongoDBDictionarySource.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Dictionaries/MongoDBDictionarySource.cpp b/src/Dictionaries/MongoDBDictionarySource.cpp index 0828c9ff51b..4baa66728c9 100644 --- a/src/Dictionaries/MongoDBDictionarySource.cpp +++ b/src/Dictionaries/MongoDBDictionarySource.cpp @@ -8,7 +8,7 @@ namespace DB void registerDictionarySourceMongoDB(DictionarySourceFactory & factory) { - auto createMongoDBDictionary = []( + auto create_mongo_db_dictionary = []( const DictionaryStructure & dict_struct, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, @@ -28,7 +28,7 @@ void registerDictionarySourceMongoDB(DictionarySourceFactory & factory) sample_block); }; - factory.registerSource("mongodb", createMongoDBDictionary); + factory.registerSource("mongodb", create_mongo_db_dictionary); } } From 500b9bac03b15533a561ea6375b50f513c30449e Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Fri, 15 May 2020 13:48:55 +0300 Subject: [PATCH 009/120] fix config prefix --- src/Dictionaries/MongoDBDictionarySource.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Dictionaries/MongoDBDictionarySource.cpp b/src/Dictionaries/MongoDBDictionarySource.cpp index 4baa66728c9..d9601f29a03 100644 --- a/src/Dictionaries/MongoDBDictionarySource.cpp +++ b/src/Dictionaries/MongoDBDictionarySource.cpp @@ -11,11 +11,12 @@ void registerDictionarySourceMongoDB(DictionarySourceFactory & factory) auto create_mongo_db_dictionary = []( const DictionaryStructure & dict_struct, const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, + const std::string & root_config_prefix, Block & sample_block, const Context &, bool /* check_config */) { + const auto config_prefix = root_config_prefix + ".mongodb"; return std::make_unique(dict_struct, config.getString(config_prefix + ".uri", ""), config.getString(config_prefix + ".host", ""), From c35c89f61aeace947f256657db74cc61976f66c5 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 12 May 2020 21:05:11 +0300 Subject: [PATCH 010/120] Add a test for min_insert_block_size_rows_for_materialized_views --- ...size_rows_for_materialized_views.reference | 4 + ..._block_size_rows_for_materialized_views.sh | 92 +++++++++++++++++++ 2 files changed, 96 insertions(+) create mode 100644 tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.reference create mode 100755 tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.sh diff --git a/tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.reference b/tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.reference new file mode 100644 index 00000000000..ed22b7e1e35 --- /dev/null +++ b/tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.reference @@ -0,0 +1,4 @@ +0 +0 +100000 +200000 diff --git a/tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.sh b/tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.sh new file mode 100755 index 00000000000..7e08c930f67 --- /dev/null +++ b/tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.sh @@ -0,0 +1,92 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +# just in case +set -o pipefail + +function execute() +{ + ${CLICKHOUSE_CLIENT} -n "$@" +} + +# +# TEST SETTINGS +# +TEST_01278_PARTS=9 +TEST_01278_MEMORY=$((100<<20)) + +function cleanup() +{ + for i in $(seq 1 $TEST_01278_PARTS); do + echo "drop table if exists part_01278_$i;" + echo "drop table if exists mv_01278_$i;" + done | execute + echo 'drop table if exists data_01278;' | execute + echo 'drop table if exists out_01278;' | execute + echo 'drop table if exists null_01278;' | execute +} + +cleanup +trap cleanup EXIT + +# +# CREATE +# +{ +cat < Date: Tue, 12 May 2020 21:05:12 +0300 Subject: [PATCH 011/120] Introduce min_insert_block_size_{rows,bytes}_for_materialized_views With tons of MATERIALIZED VIEW attached to one table pushing to this views can be pretty memory consuming due to blocks squashing, add ability to control this separatelly for MATERIALIZED VIEWs. Plus squashing is useless if the underlying engine is Buffer(). --- src/Core/Settings.h | 2 ++ .../PushingToViewsBlockOutputStream.cpp | 27 ++++++++++++++----- .../PushingToViewsBlockOutputStream.h | 3 ++- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 34d05900f77..d0e86df45c4 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -52,6 +52,8 @@ struct Settings : public SettingsCollection M(SettingUInt64, max_insert_block_size, DEFAULT_INSERT_BLOCK_SIZE, "The maximum block size for insertion, if we control the creation of blocks for insertion.", 0) \ M(SettingUInt64, min_insert_block_size_rows, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough.", 0) \ M(SettingUInt64, min_insert_block_size_bytes, (DEFAULT_INSERT_BLOCK_SIZE * 256), "Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough.", 0) \ + M(SettingUInt64, min_insert_block_size_rows_for_materialized_views, 0, "Like min_insert_block_size_rows, but applied only during pushing to MATERIALIZED VIEW (default: min_insert_block_size_rows)", 0) \ + M(SettingUInt64, min_insert_block_size_bytes_for_materialized_views, 0, "Like min_insert_block_size_bytes, but applied only during pushing to MATERIALIZED VIEW (default: min_insert_block_size_bytes)", 0) \ M(SettingUInt64, max_joined_block_size_rows, DEFAULT_BLOCK_SIZE, "Maximum block size for JOIN result (if join algorithm supports it). 0 means unlimited.", 0) \ M(SettingUInt64, max_insert_threads, 0, "The maximum number of threads to execute the INSERT SELECT query. Values 0 or 1 means that INSERT SELECT is not run in parallel. Higher values will lead to higher memory usage. Parallel INSERT SELECT has effect only if the SELECT part is run on parallel, see 'max_threads' setting.", 0) \ M(SettingMaxThreads, max_threads, 0, "The maximum number of threads to execute the request. By default, it is determined automatically.", 0) \ diff --git a/src/DataStreams/PushingToViewsBlockOutputStream.cpp b/src/DataStreams/PushingToViewsBlockOutputStream.cpp index ce0922bf282..9a65d948b30 100644 --- a/src/DataStreams/PushingToViewsBlockOutputStream.cpp +++ b/src/DataStreams/PushingToViewsBlockOutputStream.cpp @@ -40,10 +40,23 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream( /// We need special context for materialized views insertions if (!dependencies.empty()) { - views_context = std::make_unique(context); + select_context = std::make_unique(context); + insert_context = std::make_unique(context); + + const auto & insert_settings = insert_context->getSettingsRef(); + // Do not deduplicate insertions into MV if the main insertion is Ok if (disable_deduplication_for_children) - views_context->setSetting("insert_deduplicate", false); + insert_context->setSetting("insert_deduplicate", false); + + // Separate min_insert_block_size_rows/min_insert_block_size_bytes for children + if (insert_settings.min_insert_block_size_rows_for_materialized_views.changed || insert_settings.min_insert_block_size_bytes_for_materialized_views.changed) + { + if (insert_settings.min_insert_block_size_rows_for_materialized_views.changed) + insert_context->setSetting("min_insert_block_size_rows", insert_settings.min_insert_block_size_rows_for_materialized_views.value); + if (insert_settings.min_insert_block_size_bytes_for_materialized_views.changed) + insert_context->setSetting("min_insert_block_size_bytes", insert_settings.min_insert_block_size_bytes_for_materialized_views.value); + } } for (const auto & database_table : dependencies) @@ -67,7 +80,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream( insert->table_id = inner_table_id; /// Get list of columns we get from select query. - auto header = InterpreterSelectQuery(query, *views_context, SelectQueryOptions().analyze()) + auto header = InterpreterSelectQuery(query, *select_context, SelectQueryOptions().analyze()) .getSampleBlock(); /// Insert only columns returned by select. @@ -81,14 +94,14 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream( insert->columns = std::move(list); ASTPtr insert_query_ptr(insert.release()); - InterpreterInsertQuery interpreter(insert_query_ptr, *views_context); + InterpreterInsertQuery interpreter(insert_query_ptr, *insert_context); BlockIO io = interpreter.execute(); out = io.out; } else if (dynamic_cast(dependent_table.get())) - out = std::make_shared(dependent_table, *views_context, ASTPtr(), true); + out = std::make_shared(dependent_table, *insert_context, ASTPtr(), true); else - out = std::make_shared(dependent_table, *views_context, ASTPtr()); + out = std::make_shared(dependent_table, *insert_context, ASTPtr()); views.emplace_back(ViewInfo{std::move(query), database_table, std::move(out), nullptr}); } @@ -258,7 +271,7 @@ void PushingToViewsBlockOutputStream::process(const Block & block, size_t view_n /// but it will contain single block (that is INSERT-ed into main table). /// InterpreterSelectQuery will do processing of alias columns. - Context local_context = *views_context; + Context local_context = *select_context; local_context.addViewSource( StorageValues::create( storage->getStorageID(), storage->getColumns(), block, storage->getVirtuals())); diff --git a/src/DataStreams/PushingToViewsBlockOutputStream.h b/src/DataStreams/PushingToViewsBlockOutputStream.h index a2a1ca5caf5..c5fef413a23 100644 --- a/src/DataStreams/PushingToViewsBlockOutputStream.h +++ b/src/DataStreams/PushingToViewsBlockOutputStream.h @@ -44,7 +44,8 @@ private: }; std::vector views; - std::unique_ptr views_context; + std::unique_ptr select_context; + std::unique_ptr insert_context; void process(const Block & block, size_t view_num); }; From fdc4823065e9ee5772a5cbf22537778abcd45760 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 17 May 2020 13:41:54 +0300 Subject: [PATCH 012/120] Simplify min_insert_block_size_{rows,bytes}_for_materialized_views changed detection --- src/DataStreams/PushingToViewsBlockOutputStream.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/DataStreams/PushingToViewsBlockOutputStream.cpp b/src/DataStreams/PushingToViewsBlockOutputStream.cpp index 9a65d948b30..7f8eccda549 100644 --- a/src/DataStreams/PushingToViewsBlockOutputStream.cpp +++ b/src/DataStreams/PushingToViewsBlockOutputStream.cpp @@ -50,13 +50,10 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream( insert_context->setSetting("insert_deduplicate", false); // Separate min_insert_block_size_rows/min_insert_block_size_bytes for children - if (insert_settings.min_insert_block_size_rows_for_materialized_views.changed || insert_settings.min_insert_block_size_bytes_for_materialized_views.changed) - { - if (insert_settings.min_insert_block_size_rows_for_materialized_views.changed) - insert_context->setSetting("min_insert_block_size_rows", insert_settings.min_insert_block_size_rows_for_materialized_views.value); - if (insert_settings.min_insert_block_size_bytes_for_materialized_views.changed) - insert_context->setSetting("min_insert_block_size_bytes", insert_settings.min_insert_block_size_bytes_for_materialized_views.value); - } + if (insert_settings.min_insert_block_size_rows_for_materialized_views.changed) + insert_context->setSetting("min_insert_block_size_rows", insert_settings.min_insert_block_size_rows_for_materialized_views.value); + if (insert_settings.min_insert_block_size_bytes_for_materialized_views.changed) + insert_context->setSetting("min_insert_block_size_bytes", insert_settings.min_insert_block_size_bytes_for_materialized_views.value); } for (const auto & database_table : dependencies) From b0a5ce7743b4f0288bdb06a725033d1045a280ca Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 18 May 2020 04:19:50 +0300 Subject: [PATCH 013/120] Enable clang-tidy for programs and utils --- .clang-tidy | 14 +++++++------- programs/CMakeLists.txt | 4 ++++ programs/odbc-bridge/ODBCBlockOutputStream.cpp | 4 ++-- src/Common/tests/cow_columns.cpp | 8 ++++---- src/Common/tests/cow_compositions.cpp | 8 ++++---- utils/CMakeLists.txt | 4 ++++ utils/convert-month-partitioned-parts/main.cpp | 2 +- utils/iotest/iotest.cpp | 8 ++++---- utils/iotest/iotest_aio.cpp | 18 +++++++----------- utils/iotest/iotest_nonblock.cpp | 6 +++--- utils/test-data-generator/CMakeLists.txt | 3 +++ .../main.cpp | 4 ++-- utils/zookeeper-cli/zookeeper-cli.cpp | 8 +++----- 13 files changed, 48 insertions(+), 43 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 3c222fbf8da..b0971418e0e 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -9,7 +9,7 @@ Checks: '-*, misc-unused-alias-decls, misc-unused-parameters, misc-unused-using-decls, - + modernize-avoid-bind, modernize-loop-convert, modernize-make-shared, @@ -33,7 +33,7 @@ Checks: '-*, performance-no-automatic-move, performance-trivially-destructible, performance-unnecessary-copy-initialization, - + readability-avoid-const-params-in-decls, readability-const-return-type, readability-container-size-empty, @@ -58,7 +58,7 @@ Checks: '-*, readability-simplify-boolean-expr, readability-inconsistent-declaration-parameter-name, readability-identifier-naming, - + bugprone-undelegated-constructor, bugprone-argument-comment, bugprone-bad-signal-to-kill-thread, @@ -102,7 +102,7 @@ Checks: '-*, bugprone-unused-return-value, bugprone-use-after-move, bugprone-virtual-near-miss, - + cert-dcl21-cpp, cert-dcl50-cpp, cert-env33-c, @@ -112,7 +112,7 @@ Checks: '-*, cert-mem57-cpp, cert-msc50-cpp, cert-oop58-cpp, - + google-build-explicit-make-pair, google-build-namespaces, google-default-arguments, @@ -121,9 +121,9 @@ Checks: '-*, google-readability-avoid-underscore-in-googletest-name, google-runtime-int, google-runtime-operator, - + hicpp-exception-baseclass, - + clang-analyzer-core.CallAndMessage, clang-analyzer-core.DivideZero, clang-analyzer-core.NonNullParamChecker, diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 7cbe2e7a2a6..a3d3188653b 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -1,3 +1,7 @@ +if (USE_CLANG_TIDY) + set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}") +endif () + # 'clickhouse' binary is a multi purpose tool, # that contain multiple execution modes (client, server, etc.) # each of them is built and linked as a separate library, defined below. diff --git a/programs/odbc-bridge/ODBCBlockOutputStream.cpp b/programs/odbc-bridge/ODBCBlockOutputStream.cpp index c2597805230..ab24c008e40 100644 --- a/programs/odbc-bridge/ODBCBlockOutputStream.cpp +++ b/programs/odbc-bridge/ODBCBlockOutputStream.cpp @@ -24,8 +24,8 @@ namespace query.table_id.table_name = table_name; query.columns = std::make_shared(','); query.children.push_back(query.columns); - for (size_t i = 0; i < columns.size(); ++i) - query.columns->children.emplace_back(std::make_shared(columns[i].name)); + for (const auto & column : columns) + query.columns->children.emplace_back(std::make_shared(column.name)); std::stringstream ss; IAST::FormatSettings settings(ss, true); diff --git a/src/Common/tests/cow_columns.cpp b/src/Common/tests/cow_columns.cpp index fa84fc9ebc2..404b478f5a0 100644 --- a/src/Common/tests/cow_columns.cpp +++ b/src/Common/tests/cow_columns.cpp @@ -56,8 +56,8 @@ int main(int, char **) MutableColumnPtr mut = IColumn::mutate(std::move(y)); mut->set(2); - std::cerr << "refcounts: " << x->use_count() << ", " << y->use_count() << ", " << mut->use_count() << "\n"; - std::cerr << "addresses: " << x.get() << ", " << y.get() << ", " << mut.get() << "\n"; + std::cerr << "refcounts: " << x->use_count() << ", " << mut->use_count() << "\n"; + std::cerr << "addresses: " << x.get() << ", " << mut.get() << "\n"; y = std::move(mut); } @@ -75,8 +75,8 @@ int main(int, char **) MutableColumnPtr mut = IColumn::mutate(std::move(y)); mut->set(3); - std::cerr << "refcounts: " << x->use_count() << ", " << y->use_count() << ", " << mut->use_count() << "\n"; - std::cerr << "addresses: " << x.get() << ", " << y.get() << ", " << mut.get() << "\n"; + std::cerr << "refcounts: " << x->use_count() << ", " << mut->use_count() << "\n"; + std::cerr << "addresses: " << x.get() << ", " << mut.get() << "\n"; y = std::move(mut); } diff --git a/src/Common/tests/cow_compositions.cpp b/src/Common/tests/cow_compositions.cpp index be33f392497..74369e86300 100644 --- a/src/Common/tests/cow_compositions.cpp +++ b/src/Common/tests/cow_compositions.cpp @@ -75,8 +75,8 @@ int main(int, char **) MutableColumnPtr mut = IColumn::mutate(std::move(y)); mut->set(2); - std::cerr << "refcounts: " << x->use_count() << ", " << y->use_count() << ", " << mut->use_count() << "\n"; - std::cerr << "addresses: " << x.get() << ", " << y.get() << ", " << mut.get() << "\n"; + std::cerr << "refcounts: " << x->use_count() << ", " << mut->use_count() << "\n"; + std::cerr << "addresses: " << x.get() << ", " << mut.get() << "\n"; y = std::move(mut); } @@ -94,8 +94,8 @@ int main(int, char **) MutableColumnPtr mut = IColumn::mutate(std::move(y)); mut->set(3); - std::cerr << "refcounts: " << x->use_count() << ", " << y->use_count() << ", " << mut->use_count() << "\n"; - std::cerr << "addresses: " << x.get() << ", " << y.get() << ", " << mut.get() << "\n"; + std::cerr << "refcounts: " << x->use_count() << ", " << mut->use_count() << "\n"; + std::cerr << "addresses: " << x.get() << ", " << ", " << mut.get() << "\n"; y = std::move(mut); } diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index c8531bd63a0..94042ea4090 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -1,3 +1,7 @@ +if (USE_CLANG_TIDY) + set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}") +endif () + if(MAKE_STATIC_LIBRARIES) set(MAX_LINKER_MEMORY 3500) else() diff --git a/utils/convert-month-partitioned-parts/main.cpp b/utils/convert-month-partitioned-parts/main.cpp index 51ea87d35b9..af8e221a10b 100644 --- a/utils/convert-month-partitioned-parts/main.cpp +++ b/utils/convert-month-partitioned-parts/main.cpp @@ -30,7 +30,7 @@ void run(String part_path, String date_column, String dest_path) { std::shared_ptr disk = std::make_shared("local", "/", 0); auto old_part_path = Poco::Path::forDirectory(part_path); - String old_part_name = old_part_path.directory(old_part_path.depth() - 1); + const String & old_part_name = old_part_path.directory(old_part_path.depth() - 1); String old_part_path_str = old_part_path.toString(); auto part_info = MergeTreePartInfo::fromPartName(old_part_name, MergeTreeDataFormatVersion(0)); diff --git a/utils/iotest/iotest.cpp b/utils/iotest/iotest.cpp index ed846e3d46f..e578a539bcd 100644 --- a/utils/iotest/iotest.cpp +++ b/utils/iotest/iotest.cpp @@ -59,9 +59,9 @@ void thread(int fd, int mode, size_t min_offset, size_t max_offset, size_t block for (size_t i = 0; i < count; ++i) { - long rand_result1 = rng(); - long rand_result2 = rng(); - long rand_result3 = rng(); + uint64_t rand_result1 = rng(); + uint64_t rand_result2 = rng(); + uint64_t rand_result3 = rng(); size_t rand_result = rand_result1 ^ (rand_result2 << 22) ^ (rand_result3 << 43); size_t offset; @@ -152,7 +152,7 @@ int mainImpl(int argc, char ** argv) Stopwatch watch; for (size_t i = 0; i < threads; ++i) - pool.scheduleOrThrowOnError(std::bind(thread, fd, mode, min_offset, max_offset, block_size, count)); + pool.scheduleOrThrowOnError([=]{ thread(fd, mode, min_offset, max_offset, block_size, count); }); pool.wait(); fsync(fd); diff --git a/utils/iotest/iotest_aio.cpp b/utils/iotest/iotest_aio.cpp index c0945fbe1e1..24508c1dd9f 100644 --- a/utils/iotest/iotest_aio.cpp +++ b/utils/iotest/iotest_aio.cpp @@ -13,6 +13,8 @@ int main(int, char **) { return 0; } #include #include #include +#include +#include #include #include #include @@ -52,10 +54,7 @@ void thread(int fd, int mode, size_t min_offset, size_t max_offset, size_t block for (size_t i = 0; i < buffers_count; ++i) buffers[i] = Memory<>(block_size, sysconf(_SC_PAGESIZE)); - drand48_data rand_data; - timespec times; - clock_gettime(CLOCK_THREAD_CPUTIME_ID, ×); - srand48_r(times.tv_nsec, &rand_data); + pcg64_fast rng(randomSeed()); size_t in_progress = 0; size_t blocks_sent = 0; @@ -82,12 +81,9 @@ void thread(int fd, int mode, size_t min_offset, size_t max_offset, size_t block char * buf = buffers[i].data(); - long rand_result1 = 0; - long rand_result2 = 0; - long rand_result3 = 0; - lrand48_r(&rand_data, &rand_result1); - lrand48_r(&rand_data, &rand_result2); - lrand48_r(&rand_data, &rand_result3); + uint64_t rand_result1 = rng(); + uint64_t rand_result2 = rng(); + uint64_t rand_result3 = rng(); size_t rand_result = rand_result1 ^ (rand_result2 << 22) ^ (rand_result3 << 43); size_t offset = min_offset + rand_result % ((max_offset - min_offset) / block_size) * block_size; @@ -172,7 +168,7 @@ int mainImpl(int argc, char ** argv) Stopwatch watch; for (size_t i = 0; i < threads_count; ++i) - pool.scheduleOrThrowOnError(std::bind(thread, fd, mode, min_offset, max_offset, block_size, buffers_count, count)); + pool.scheduleOrThrowOnError([=]{ thread(fd, mode, min_offset, max_offset, block_size, buffers_count, count); }); pool.wait(); watch.stop(); diff --git a/utils/iotest/iotest_nonblock.cpp b/utils/iotest/iotest_nonblock.cpp index 9317e7ed47f..524d6298da5 100644 --- a/utils/iotest/iotest_nonblock.cpp +++ b/utils/iotest/iotest_nonblock.cpp @@ -113,9 +113,9 @@ int mainImpl(int argc, char ** argv) polls[i].revents = 0; ++ops; - long rand_result1 = rng(); - long rand_result2 = rng(); - long rand_result3 = rng(); + uint64_t rand_result1 = rng(); + uint64_t rand_result2 = rng(); + uint64_t rand_result3 = rng(); size_t rand_result = rand_result1 ^ (rand_result2 << 22) ^ (rand_result3 << 43); size_t offset; diff --git a/utils/test-data-generator/CMakeLists.txt b/utils/test-data-generator/CMakeLists.txt index d8a2111cf07..3a94358e86d 100644 --- a/utils/test-data-generator/CMakeLists.txt +++ b/utils/test-data-generator/CMakeLists.txt @@ -1,3 +1,6 @@ +# Disable clang-tidy for protobuf generated files +set (CMAKE_CXX_CLANG_TIDY "") + add_compile_options(-Wno-zero-as-null-pointer-constant -Wno-array-bounds) # Protobuf generated files if (USE_PROTOBUF) diff --git a/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp b/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp index 91431c01648..a896129f915 100644 --- a/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp +++ b/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp @@ -102,7 +102,7 @@ std::unordered_map getPartitionsNeedAdjustingBlockNumbers( std::cout << "Shard: " << shard << std::endl; std::vector use_tables = tables.empty() ? getAllTables(zk, root, shard) : removeNotExistingTables(zk, root, shard, tables); - for (auto table : use_tables) + for (const auto & table : use_tables) { std::cout << "\tTable: " << table << std::endl; std::string table_path = root + "/" + shard + "/" + table; @@ -121,7 +121,7 @@ std::unordered_map getPartitionsNeedAdjustingBlockNumbers( continue; } - for (auto partition : partitions) + for (const auto & partition : partitions) { try { diff --git a/utils/zookeeper-cli/zookeeper-cli.cpp b/utils/zookeeper-cli/zookeeper-cli.cpp index 40755fc0160..0a503e77250 100644 --- a/utils/zookeeper-cli/zookeeper-cli.cpp +++ b/utils/zookeeper-cli/zookeeper-cli.cpp @@ -97,10 +97,8 @@ int main(int argc, char ** argv) bool watch = w == "w"; zkutil::EventPtr event = watch ? std::make_shared() : nullptr; std::vector v = zk.getChildren(path, nullptr, event); - for (size_t i = 0; i < v.size(); ++i) - { - std::cout << v[i] << std::endl; - } + for (const auto & child : v) + std::cout << child << std::endl; if (watch) waitForWatch(event); } @@ -193,7 +191,7 @@ int main(int argc, char ** argv) zk.set(path, data, version, &stat); printStat(stat); } - else if (cmd != "") + else if (!cmd.empty()) { std::cout << "commands:\n"; std::cout << " q\n"; From 6f0c78dfdda139aecdec1f1d1e647ae9190894bb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 18 May 2020 06:50:53 +0300 Subject: [PATCH 014/120] Vectorize "sum" function --- src/AggregateFunctions/AggregateFunctionSum.h | 94 +++++++++++++++++-- 1 file changed, 85 insertions(+), 9 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h index e9a6e50d9ef..b500fe91653 100644 --- a/src/AggregateFunctions/AggregateFunctionSum.h +++ b/src/AggregateFunctions/AggregateFunctionSum.h @@ -25,6 +25,36 @@ struct AggregateFunctionSumData sum += value; } + /// Vectorized version + template + void addMany(const Value * __restrict ptr, size_t count) + { + /// Compiler cannot unroll this loop, do it manually. + + /// Something around the number of SSE registers * the number of elements fit in register. + constexpr size_t unroll_count = 128 / sizeof(T); + T partial_sums[unroll_count]{}; + + const auto * end = ptr + count; + const auto * unrolled_end = ptr + (count / unroll_count * unroll_count); + + while (ptr < unrolled_end) + { + for (size_t i = 0; i < unroll_count; ++i) + partial_sums[i] += ptr[i]; + ptr += unroll_count; + } + + for (size_t i = 0; i < unroll_count; ++i) + sum += partial_sums[i]; + + while (ptr < end) + { + sum += *ptr; + ++ptr; + } + } + void merge(const AggregateFunctionSumData & rhs) { sum += rhs.sum; @@ -55,21 +85,60 @@ struct AggregateFunctionSumKahanData T sum{}; T compensation{}; + template + ALWAYS_INLINE void addImpl(Value value, T & out_sum, T & out_compensation) + { + auto compensated_value = value - out_compensation; + auto new_sum = out_sum + compensated_value; + out_compensation = (new_sum - out_sum) - compensated_value; + out_sum = new_sum; + } + void add(T value) { - auto compensated_value = value - compensation; - auto new_sum = sum + compensated_value; - compensation = (new_sum - sum) - compensated_value; - sum = new_sum; + addImpl(value, sum, compensation); + } + + /// Vectorized version + template + void addMany(const Value * __restrict ptr, size_t count) + { + constexpr size_t unroll_count = 4; // 128 / sizeof(T); + T partial_sums[unroll_count]{}; + T partial_compensations[unroll_count]{}; + + const auto * end = ptr + count; + const auto * unrolled_end = ptr + (count / unroll_count * unroll_count); + + while (ptr < unrolled_end) + { + for (size_t i = 0; i < unroll_count; ++i) + addImpl(ptr[i], partial_sums[i], partial_compensations[i]); + ptr += unroll_count; + } + + for (size_t i = 0; i < unroll_count; ++i) + mergeImpl(sum, compensation, partial_sums[i], partial_compensations[i]); + + while (ptr < end) + { + addImpl(*ptr, sum, compensation); + ++ptr; + } + } + + void ALWAYS_INLINE mergeImpl(T & to_sum, T & to_compensation, T from_sum, T from_compensation) + { + auto raw_sum = to_sum + from_sum; + auto rhs_compensated = raw_sum - to_sum; + auto compensations = ((from_sum - rhs_compensated) + (to_sum - (raw_sum - rhs_compensated))) + compensation + from_compensation; + to_sum = raw_sum + compensations; + to_compensation = compensations - (to_sum - raw_sum); } void merge(const AggregateFunctionSumKahanData & rhs) { - auto raw_sum = sum + rhs.sum; - auto rhs_compensated = raw_sum - sum; - auto compensations = ((rhs.sum - rhs_compensated) + (sum - (raw_sum - rhs_compensated))) + compensation + rhs.compensation; - sum = raw_sum + compensations; - compensation = compensations - (sum - raw_sum); + mergeImpl(sum, compensation, rhs.sum, rhs.compensation); } void write(WriteBuffer & buf) const @@ -141,6 +210,13 @@ public: this->data(place).add(column.getData()[row_num]); } + /// Vectorized version when there is no GROUP BY keys. + void addBatchSinglePlace(size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena *) const override + { + const auto & column = static_cast(*columns[0]); + this->data(place).addMany(column.getData().data(), batch_size); + } + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override { this->data(place).merge(this->data(rhs)); From 719a3b5a2c61101cd5e6207e61ab1d1c90c04fb9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 18 May 2020 07:49:51 +0300 Subject: [PATCH 015/120] Performance improvement for "sum" over nullable --- .../AggregateFunctionNull.h | 11 ++- src/AggregateFunctions/AggregateFunctionSum.h | 81 +++++++++++++++++-- src/AggregateFunctions/IAggregateFunction.h | 13 +++ 3 files changed, 98 insertions(+), 7 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionNull.h b/src/AggregateFunctions/AggregateFunctionNull.h index e5309e1300a..10a9f207e93 100644 --- a/src/AggregateFunctions/AggregateFunctionNull.h +++ b/src/AggregateFunctions/AggregateFunctionNull.h @@ -194,13 +194,22 @@ public: void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override { const ColumnNullable * column = assert_cast(columns[0]); + const IColumn * nested_column = &column->getNestedColumn(); if (!column->isNullAt(row_num)) { this->setFlag(place); - const IColumn * nested_column = &column->getNestedColumn(); this->nested_function->add(this->nestedPlace(place), &nested_column, row_num, arena); } } + + void addBatchSinglePlace(size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena) const override + { + const ColumnNullable * column = assert_cast(columns[0]); + const IColumn * nested_column = &column->getNestedColumn(); + const UInt8 * null_map = column->getNullMapData().data(); + + this->nested_function->addBatchSinglePlaceNotNull(batch_size, this->nestedPlace(place), &nested_column, null_map, arena); + } }; diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h index b500fe91653..52431060aea 100644 --- a/src/AggregateFunctions/AggregateFunctionSum.h +++ b/src/AggregateFunctions/AggregateFunctionSum.h @@ -20,16 +20,17 @@ struct AggregateFunctionSumData { T sum{}; - void add(T value) + void ALWAYS_INLINE add(T value) { sum += value; } /// Vectorized version template - void addMany(const Value * __restrict ptr, size_t count) + void NO_INLINE addMany(const Value * __restrict ptr, size_t count) { /// Compiler cannot unroll this loop, do it manually. + /// (at least for floats, most likely due to the lack of -fassociative-math) /// Something around the number of SSE registers * the number of elements fit in register. constexpr size_t unroll_count = 128 / sizeof(T); @@ -55,6 +56,36 @@ struct AggregateFunctionSumData } } + template + void NO_INLINE addManyNotNull(const Value * __restrict ptr, const UInt8 * __restrict null_map, size_t count) + { + constexpr size_t unroll_count = 128 / sizeof(T); + T partial_sums[unroll_count]{}; + + const auto * end = ptr + count; + const auto * unrolled_end = ptr + (count / unroll_count * unroll_count); + + while (ptr < unrolled_end) + { + for (size_t i = 0; i < unroll_count; ++i) + if (!null_map[i]) + partial_sums[i] += ptr[i]; + ptr += unroll_count; + null_map += unroll_count; + } + + for (size_t i = 0; i < unroll_count; ++i) + sum += partial_sums[i]; + + while (ptr < end) + { + if (!*null_map) + sum += *ptr; + ++ptr; + ++null_map; + } + } + void merge(const AggregateFunctionSumData & rhs) { sum += rhs.sum; @@ -86,7 +117,7 @@ struct AggregateFunctionSumKahanData T compensation{}; template - ALWAYS_INLINE void addImpl(Value value, T & out_sum, T & out_compensation) + void ALWAYS_INLINE addImpl(Value value, T & out_sum, T & out_compensation) { auto compensated_value = value - out_compensation; auto new_sum = out_sum + compensated_value; @@ -94,16 +125,16 @@ struct AggregateFunctionSumKahanData out_sum = new_sum; } - void add(T value) + void ALWAYS_INLINE add(T value) { addImpl(value, sum, compensation); } /// Vectorized version template - void addMany(const Value * __restrict ptr, size_t count) + void NO_INLINE addMany(const Value * __restrict ptr, size_t count) { - constexpr size_t unroll_count = 4; // 128 / sizeof(T); + constexpr size_t unroll_count = 4; T partial_sums[unroll_count]{}; T partial_compensations[unroll_count]{}; @@ -127,6 +158,37 @@ struct AggregateFunctionSumKahanData } } + template + void NO_INLINE addManyNotNull(const Value * __restrict ptr, const UInt8 * __restrict null_map, size_t count) + { + constexpr size_t unroll_count = 4; + T partial_sums[unroll_count]{}; + T partial_compensations[unroll_count]{}; + + const auto * end = ptr + count; + const auto * unrolled_end = ptr + (count / unroll_count * unroll_count); + + while (ptr < unrolled_end) + { + for (size_t i = 0; i < unroll_count; ++i) + if (!null_map[i]) + addImpl(ptr[i], partial_sums[i], partial_compensations[i]); + ptr += unroll_count; + null_map += unroll_count; + } + + for (size_t i = 0; i < unroll_count; ++i) + mergeImpl(sum, compensation, partial_sums[i], partial_compensations[i]); + + while (ptr < end) + { + if (!*null_map) + addImpl(*ptr, sum, compensation); + ++ptr; + ++null_map; + } + } + void ALWAYS_INLINE mergeImpl(T & to_sum, T & to_compensation, T from_sum, T from_compensation) { auto raw_sum = to_sum + from_sum; @@ -217,6 +279,13 @@ public: this->data(place).addMany(column.getData().data(), batch_size); } + void addBatchSinglePlaceNotNull( + size_t batch_size, AggregateDataPtr place, const IColumn ** columns, const UInt8 * null_map, Arena *) const override + { + const auto & column = static_cast(*columns[0]); + this->data(place).addManyNotNull(column.getData().data(), null_map, batch_size); + } + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override { this->data(place).merge(this->data(rhs)); diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h index ad074feffc5..1870eee07b8 100644 --- a/src/AggregateFunctions/IAggregateFunction.h +++ b/src/AggregateFunctions/IAggregateFunction.h @@ -145,6 +145,11 @@ public: */ virtual void addBatchSinglePlace(size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena) const = 0; + /** The same for single place when need to aggregate only filtered data. + */ + virtual void addBatchSinglePlaceNotNull( + size_t batch_size, AggregateDataPtr place, const IColumn ** columns, const UInt8 * null_map, Arena * arena) const = 0; + /** In addition to addBatch, this method collects multiple rows of arguments into array "places" * as long as they are between offsets[i-1] and offsets[i]. This is used for arrayReduce and * -Array combinator. It might also be used generally to break data dependency when array @@ -201,6 +206,14 @@ public: static_cast(this)->add(place, columns, i, arena); } + void addBatchSinglePlaceNotNull( + size_t batch_size, AggregateDataPtr place, const IColumn ** columns, const UInt8 * null_map, Arena * arena) const override + { + for (size_t i = 0; i < batch_size; ++i) + if (!null_map[i]) + static_cast(this)->add(place, columns, i, arena); + } + void addBatchArray( size_t batch_size, AggregateDataPtr * places, size_t place_offset, const IColumn ** columns, const UInt64 * offsets, Arena * arena) const override From db422434ff3324a825f2507e41ce52448c1f0489 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 18 May 2020 07:55:06 +0300 Subject: [PATCH 016/120] Add performance test --- tests/performance/sum.xml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 tests/performance/sum.xml diff --git a/tests/performance/sum.xml b/tests/performance/sum.xml new file mode 100644 index 00000000000..9bee2a580c3 --- /dev/null +++ b/tests/performance/sum.xml @@ -0,0 +1,19 @@ + + SELECT sum(number) FROM numbers(100000000) + SELECT sum(toUInt32(number)) FROM numbers(100000000) + SELECT sum(toUInt16(number)) FROM numbers(100000000) + SELECT sum(toUInt8(number)) FROM numbers(100000000) + SELECT sum(toFloat32(number)) FROM numbers(100000000) + SELECT sum(toFloat64(number)) FROM numbers(100000000) + SELECT sumKahan(toFloat32(number)) FROM numbers(100000000) + SELECT sumKahan(toFloat64(number)) FROM numbers(100000000) + + SELECT sum(toNullable(number)) FROM numbers(100000000) + SELECT sum(toNullable(toUInt32(number))) FROM numbers(100000000) + SELECT sum(toNullable(toUInt16(number))) FROM numbers(100000000) + SELECT sum(toNullable(toUInt8(number))) FROM numbers(100000000) + SELECT sum(toNullable(toFloat32(number))) FROM numbers(100000000) + SELECT sum(toNullable(toFloat64(number))) FROM numbers(100000000) + SELECT sumKahan(toNullable(toFloat32(number))) FROM numbers(100000000) + SELECT sumKahan(toNullable(toFloat64(number))) FROM numbers(100000000) + From f5072aab18b692d44cad29af71f347e6bfd06fc1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 18 May 2020 08:01:55 +0300 Subject: [PATCH 017/120] Fix error --- src/AggregateFunctions/AggregateFunctionNull.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionNull.h b/src/AggregateFunctions/AggregateFunctionNull.h index 10a9f207e93..55d610207f1 100644 --- a/src/AggregateFunctions/AggregateFunctionNull.h +++ b/src/AggregateFunctions/AggregateFunctionNull.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -53,13 +54,13 @@ protected: static void initFlag(AggregateDataPtr place) noexcept { - if (result_is_nullable) + if constexpr (result_is_nullable) place[0] = 0; } static void setFlag(AggregateDataPtr place) noexcept { - if (result_is_nullable) + if constexpr (result_is_nullable) place[0] = 1; } @@ -72,7 +73,7 @@ public: AggregateFunctionNullBase(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params) : IAggregateFunctionHelper(arguments, params), nested_function{nested_function_} { - if (result_is_nullable) + if constexpr (result_is_nullable) prefix_size = nested_function->alignOfData(); else prefix_size = 0; @@ -128,7 +129,7 @@ public: void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { bool flag = getFlag(place); - if (result_is_nullable) + if constexpr (result_is_nullable) writeBinary(flag, buf); if (flag) nested_function->serialize(nestedPlace(place), buf); @@ -137,7 +138,7 @@ public: void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override { bool flag = 1; - if (result_is_nullable) + if constexpr (result_is_nullable) readBinary(flag, buf); if (flag) { @@ -148,7 +149,7 @@ public: void insertResultInto(AggregateDataPtr place, IColumn & to) const override { - if (result_is_nullable) + if constexpr (result_is_nullable) { ColumnNullable & to_concrete = assert_cast(to); if (getFlag(place)) @@ -209,6 +210,10 @@ public: const UInt8 * null_map = column->getNullMapData().data(); this->nested_function->addBatchSinglePlaceNotNull(batch_size, this->nestedPlace(place), &nested_column, null_map, arena); + + if constexpr (result_is_nullable) + if (!memoryIsByte(null_map, batch_size, 1)) + this->setFlag(place); } }; From 65e387f1a3496e0fd3715574ce25a5bf3e8b1820 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 18 May 2020 08:03:58 +0300 Subject: [PATCH 018/120] Added a test --- tests/queries/0_stateless/01281_sum_nullable.reference | 6 ++++++ tests/queries/0_stateless/01281_sum_nullable.sql | 6 ++++++ 2 files changed, 12 insertions(+) create mode 100644 tests/queries/0_stateless/01281_sum_nullable.reference create mode 100644 tests/queries/0_stateless/01281_sum_nullable.sql diff --git a/tests/queries/0_stateless/01281_sum_nullable.reference b/tests/queries/0_stateless/01281_sum_nullable.reference new file mode 100644 index 00000000000..be8b67fd296 --- /dev/null +++ b/tests/queries/0_stateless/01281_sum_nullable.reference @@ -0,0 +1,6 @@ +45 +45 +45 +1 +45 +\N diff --git a/tests/queries/0_stateless/01281_sum_nullable.sql b/tests/queries/0_stateless/01281_sum_nullable.sql new file mode 100644 index 00000000000..35d593da75d --- /dev/null +++ b/tests/queries/0_stateless/01281_sum_nullable.sql @@ -0,0 +1,6 @@ +SELECT sumKahan(toFloat64(number)) FROM numbers(10); +SELECT sumKahan(toNullable(toFloat64(number))) FROM numbers(10); +SELECT sum(toNullable(number)) FROM numbers(10); +SELECT sum(x) FROM (SELECT 1 AS x UNION ALL SELECT NULL); +SELECT sum(number) FROM numbers(10); +SELECT sum(number < 1000 ? NULL : number) FROM numbers(10); From f54435e7fda3d377c21c2cbecd49345044dc14ee Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 18 May 2020 11:08:55 +0300 Subject: [PATCH 019/120] Fix clang-tidy --- programs/benchmark/Benchmark.cpp | 2 +- programs/client/Client.cpp | 4 +- programs/client/ConnectionParameters.cpp | 2 +- programs/copier/ClusterCopier.cpp | 4 +- programs/copier/ClusterCopierApp.cpp | 2 +- programs/copier/Internals.cpp | 4 +- programs/obfuscator/Obfuscator.cpp | 4 +- programs/server/HTTPHandler.cpp | 4 +- programs/server/HTTPHandlerFactory.cpp | 133 ++++++++++-------- programs/server/ReplicasStatusHandler.cpp | 2 +- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 12 +- .../main.cpp | 2 +- 12 files changed, 98 insertions(+), 77 deletions(-) diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index ce59f5cac7f..91c43160e0f 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -289,7 +289,7 @@ private: connection_entries.emplace_back(std::make_shared( connection->get(ConnectionTimeouts::getTCPTimeoutsWithoutFailover(settings)))); - pool.scheduleOrThrowOnError(std::bind(&Benchmark::thread, this, connection_entries)); + pool.scheduleOrThrowOnError([this, connection_entries]() mutable { thread(connection_entries); }); } } catch (...) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 53947283faf..d6cac7a7b02 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -485,7 +485,7 @@ private: history_file = config().getString("history_file"); else { - auto history_file_from_env = getenv("CLICKHOUSE_HISTORY_FILE"); + auto * history_file_from_env = getenv("CLICKHOUSE_HISTORY_FILE"); if (history_file_from_env) history_file = history_file_from_env; else if (!home_path.empty()) @@ -1480,7 +1480,7 @@ private: "\033[1m↗\033[0m", }; - auto indicator = indicators[increment % 8]; + const char * indicator = indicators[increment % 8]; if (!send_logs && written_progress_chars) message << '\r'; diff --git a/programs/client/ConnectionParameters.cpp b/programs/client/ConnectionParameters.cpp index 50cac3b7800..f0ef3ae5694 100644 --- a/programs/client/ConnectionParameters.cpp +++ b/programs/client/ConnectionParameters.cpp @@ -51,7 +51,7 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati { std::string prompt{"Password for user (" + user + "): "}; char buf[1000] = {}; - if (auto result = readpassphrase(prompt.c_str(), buf, sizeof(buf), 0)) + if (auto * result = readpassphrase(prompt.c_str(), buf, sizeof(buf), 0)) password = result; } diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp index 8df55b63407..45cfb4963a3 100644 --- a/programs/copier/ClusterCopier.cpp +++ b/programs/copier/ClusterCopier.cpp @@ -442,7 +442,7 @@ bool ClusterCopier::checkPartitionPieceIsDone(const TaskTable & task_table, cons /// Collect all shards that contain partition piece number piece_number. Strings piece_status_paths; - for (auto & shard : shards_with_partition) + for (const auto & shard : shards_with_partition) { ShardPartition & task_shard_partition = shard->partition_tasks.find(partition_name)->second; ShardPartitionPiece & shard_partition_piece = task_shard_partition.pieces[piece_number]; @@ -702,7 +702,7 @@ ASTPtr ClusterCopier::removeAliasColumnsFromCreateQuery(const ASTPtr & query_ast auto new_columns_list = std::make_shared(); new_columns_list->set(new_columns_list->columns, new_columns); - if (auto indices = query_ast->as()->columns_list->indices) + if (const auto * indices = query_ast->as()->columns_list->indices) new_columns_list->set(new_columns_list->indices, indices->clone()); new_query.replace(new_query.columns_list, new_columns_list); diff --git a/programs/copier/ClusterCopierApp.cpp b/programs/copier/ClusterCopierApp.cpp index 52a37c75c72..5ab6064f0f3 100644 --- a/programs/copier/ClusterCopierApp.cpp +++ b/programs/copier/ClusterCopierApp.cpp @@ -94,7 +94,7 @@ void ClusterCopierApp::mainImpl() StatusFile status_file(process_path + "/status"); ThreadStatus thread_status; - auto log = &logger(); + auto * log = &logger(); LOG_INFO(log, "Starting clickhouse-copier (" << "id " << process_id << ", " << "host_id " << host_id << ", " diff --git a/programs/copier/Internals.cpp b/programs/copier/Internals.cpp index 545df2e779c..0613381a763 100644 --- a/programs/copier/Internals.cpp +++ b/programs/copier/Internals.cpp @@ -260,7 +260,7 @@ ShardPriority getReplicasPriority(const Cluster::Addresses & replicas, const std return res; res.is_remote = 1; - for (auto & replica : replicas) + for (const auto & replica : replicas) { if (isLocalAddress(DNSResolver::instance().resolveHost(replica.host_name))) { @@ -270,7 +270,7 @@ ShardPriority getReplicasPriority(const Cluster::Addresses & replicas, const std } res.hostname_difference = std::numeric_limits::max(); - for (auto & replica : replicas) + for (const auto & replica : replicas) { size_t difference = getHostNameDifference(local_hostname, replica.host_name); res.hostname_difference = std::min(difference, res.hostname_difference); diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index 8b5a8c73ca4..f3ac0549573 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -937,10 +937,10 @@ public: if (typeid_cast(&data_type)) return std::make_unique(seed); - if (auto type = typeid_cast(&data_type)) + if (const auto * type = typeid_cast(&data_type)) return std::make_unique(get(*type->getNestedType(), seed, markov_model_params)); - if (auto type = typeid_cast(&data_type)) + if (const auto * type = typeid_cast(&data_type)) return std::make_unique(get(*type->getNestedType(), seed, markov_model_params)); throw Exception("Unsupported data type", ErrorCodes::NOT_IMPLEMENTED); diff --git a/programs/server/HTTPHandler.cpp b/programs/server/HTTPHandler.cpp index bceeec306cf..701b5f7d735 100644 --- a/programs/server/HTTPHandler.cpp +++ b/programs/server/HTTPHandler.cpp @@ -195,7 +195,7 @@ void HTTPHandler::pushDelayedResults(Output & used_output) std::vector read_buffers; std::vector read_buffers_raw_ptr; - auto cascade_buffer = typeid_cast(used_output.out_maybe_delayed_and_compressed.get()); + auto * cascade_buffer = typeid_cast(used_output.out_maybe_delayed_and_compressed.get()); if (!cascade_buffer) throw Exception("Expected CascadeWriteBuffer", ErrorCodes::LOGICAL_ERROR); @@ -383,7 +383,7 @@ void HTTPHandler::processQuery( { auto push_memory_buffer_and_continue = [next_buffer = used_output.out_maybe_compressed] (const WriteBufferPtr & prev_buf) { - auto prev_memory_buffer = typeid_cast(prev_buf.get()); + auto * prev_memory_buffer = typeid_cast(prev_buf.get()); if (!prev_memory_buffer) throw Exception("Expected MemoryWriteBuffer", ErrorCodes::LOGICAL_ERROR); diff --git a/programs/server/HTTPHandlerFactory.cpp b/programs/server/HTTPHandlerFactory.cpp index 91cf9ddf25b..4caea1e92e8 100644 --- a/programs/server/HTTPHandlerFactory.cpp +++ b/programs/server/HTTPHandlerFactory.cpp @@ -28,7 +28,7 @@ HTTPRequestHandlerFactoryMain::HTTPRequestHandlerFactoryMain(const std::string & { } -Poco::Net::HTTPRequestHandler * HTTPRequestHandlerFactoryMain::createRequestHandler(const Poco::Net::HTTPServerRequest & request) // override +Poco::Net::HTTPRequestHandler * HTTPRequestHandlerFactoryMain::createRequestHandler(const Poco::Net::HTTPServerRequest & request) { LOG_TRACE(log, "HTTP Request for " << name << ". " << "Method: " << request.getMethod() @@ -40,7 +40,7 @@ Poco::Net::HTTPRequestHandler * HTTPRequestHandlerFactoryMain::createRequestHand for (auto & handler_factory : child_factories) { - auto handler = handler_factory->createRequestHandler(request); + auto * handler = handler_factory->createRequestHandler(request); if (handler != nullptr) return handler; } @@ -72,80 +72,96 @@ HTTPRequestHandlerFactoryMain::TThis * HTTPRequestHandlerFactoryMain::addHandler static inline auto createHandlersFactoryFromConfig(IServer & server, const std::string & name, const String & prefix) { - auto main_handler_factory = new HTTPRequestHandlerFactoryMain(name); + auto main_handler_factory = std::make_unique(name); - try + Poco::Util::AbstractConfiguration::Keys keys; + server.config().keys(prefix, keys); + + for (const auto & key : keys) { - Poco::Util::AbstractConfiguration::Keys keys; - server.config().keys(prefix, keys); + if (!startsWith(key, "rule")) + throw Exception("Unknown element in config: " + prefix + "." + key + ", must be 'rule'", ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); - for (const auto & key : keys) - { - if (!startsWith(key, "rule")) - throw Exception("Unknown element in config: " + prefix + "." + key + ", must be 'rule'", ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + const auto & handler_type = server.config().getString(prefix + "." + key + ".handler.type", ""); - const auto & handler_type = server.config().getString(prefix + "." + key + ".handler.type", ""); - - if (handler_type == "static") - main_handler_factory->addHandler(createStaticHandlerFactory(server, prefix + "." + key)); - else if (handler_type == "dynamic_query_handler") - main_handler_factory->addHandler(createDynamicHandlerFactory(server, prefix + "." + key)); - else if (handler_type == "predefined_query_handler") - main_handler_factory->addHandler(createPredefinedHandlerFactory(server, prefix + "." + key)); - else if (handler_type.empty()) - throw Exception("Handler type in config is not specified here: " + - prefix + "." + key + ".handler.type", ErrorCodes::INVALID_CONFIG_PARAMETER); - else - throw Exception("Unknown handler type '" + handler_type +"' in config here: " + - prefix + "." + key + ".handler.type",ErrorCodes::INVALID_CONFIG_PARAMETER); - } - - return main_handler_factory; - } - catch (...) - { - delete main_handler_factory; - throw; + if (handler_type == "static") + main_handler_factory->addHandler(createStaticHandlerFactory(server, prefix + "." + key)); + else if (handler_type == "dynamic_query_handler") + main_handler_factory->addHandler(createDynamicHandlerFactory(server, prefix + "." + key)); + else if (handler_type == "predefined_query_handler") + main_handler_factory->addHandler(createPredefinedHandlerFactory(server, prefix + "." + key)); + else if (handler_type.empty()) + throw Exception("Handler type in config is not specified here: " + + prefix + "." + key + ".handler.type", ErrorCodes::INVALID_CONFIG_PARAMETER); + else + throw Exception("Unknown handler type '" + handler_type +"' in config here: " + + prefix + "." + key + ".handler.type",ErrorCodes::INVALID_CONFIG_PARAMETER); } + + return main_handler_factory.release(); } static const auto ping_response_expression = "Ok.\n"; static const auto root_response_expression = "config://http_server_default_response"; -static inline Poco::Net::HTTPRequestHandlerFactory * createHTTPHandlerFactory(IServer & server, const std::string & name, AsynchronousMetrics & async_metrics) +static inline Poco::Net::HTTPRequestHandlerFactory * createHTTPHandlerFactory( + IServer & server, const std::string & name, AsynchronousMetrics & async_metrics) { if (server.config().has("http_handlers")) return createHandlersFactoryFromConfig(server, name, "http_handlers"); else { - auto factory = (new HTTPRequestHandlerFactoryMain(name)) - ->addHandler((new HandlingRuleHTTPHandlerFactory(server, root_response_expression)) - ->attachStrictPath("/")->allowGetAndHeadRequest()) - ->addHandler((new HandlingRuleHTTPHandlerFactory(server, ping_response_expression)) - ->attachStrictPath("/ping")->allowGetAndHeadRequest()) - ->addHandler((new HandlingRuleHTTPHandlerFactory(server)) - ->attachNonStrictPath("/replicas_status")->allowGetAndHeadRequest()) - ->addHandler((new HandlingRuleHTTPHandlerFactory(server, "query"))->allowPostAndGetParamsRequest()); + auto factory = std::make_unique(name); + + auto root_handler = std::make_unique>(server, root_response_expression); + root_handler->attachStrictPath("/")->allowGetAndHeadRequest(); + factory->addHandler(root_handler.release()); + + auto ping_handler = std::make_unique>(server, ping_response_expression); + ping_handler->attachStrictPath("/ping")->allowGetAndHeadRequest(); + factory->addHandler(ping_handler.release()); + + auto replicas_status_handler = std::make_unique>(server); + replicas_status_handler->attachNonStrictPath("/replicas_status")->allowGetAndHeadRequest(); + factory->addHandler(replicas_status_handler.release()); + + auto query_handler = std::make_unique>(server, "query"); + query_handler->allowPostAndGetParamsRequest(); + factory->addHandler(query_handler.release()); if (server.config().has("prometheus") && server.config().getInt("prometheus.port", 0) == 0) - factory->addHandler((new HandlingRuleHTTPHandlerFactory( - server, PrometheusMetricsWriter(server.config(), "prometheus", async_metrics))) - ->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"))->allowGetAndHeadRequest()); + { + auto prometheus_handler = std::make_unique>( + server, PrometheusMetricsWriter(server.config(), "prometheus", async_metrics)); + prometheus_handler->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"))->allowGetAndHeadRequest(); + factory->addHandler(prometheus_handler.release()); + } - return factory; + return factory.release(); } } static inline Poco::Net::HTTPRequestHandlerFactory * createInterserverHTTPHandlerFactory(IServer & server, const std::string & name) { - return (new HTTPRequestHandlerFactoryMain(name)) - ->addHandler((new HandlingRuleHTTPHandlerFactory(server, root_response_expression)) - ->attachStrictPath("/")->allowGetAndHeadRequest()) - ->addHandler((new HandlingRuleHTTPHandlerFactory(server, ping_response_expression)) - ->attachStrictPath("/ping")->allowGetAndHeadRequest()) - ->addHandler((new HandlingRuleHTTPHandlerFactory(server)) - ->attachNonStrictPath("/replicas_status")->allowGetAndHeadRequest()) - ->addHandler((new HandlingRuleHTTPHandlerFactory(server))->allowPostAndGetParamsRequest()); + auto factory = std::make_unique(name); + + auto root_handler = std::make_unique>(server, root_response_expression); + root_handler->attachStrictPath("/")->allowGetAndHeadRequest(); + factory->addHandler(root_handler.release()); + + auto ping_handler = std::make_unique>(server, ping_response_expression); + ping_handler->attachStrictPath("/ping")->allowGetAndHeadRequest(); + factory->addHandler(ping_handler.release()); + + auto replicas_status_handler = std::make_unique>(server); + replicas_status_handler->attachNonStrictPath("/replicas_status")->allowGetAndHeadRequest(); + factory->addHandler(replicas_status_handler.release()); + + auto main_handler = std::make_unique>(server); + main_handler->allowPostAndGetParamsRequest(); + factory->addHandler(main_handler.release()); + + return factory.release(); } Poco::Net::HTTPRequestHandlerFactory * createHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & name) @@ -155,9 +171,14 @@ Poco::Net::HTTPRequestHandlerFactory * createHandlerFactory(IServer & server, As else if (name == "InterserverIOHTTPHandler-factory" || name == "InterserverIOHTTPSHandler-factory") return createInterserverHTTPHandlerFactory(server, name); else if (name == "PrometheusHandler-factory") - return (new HTTPRequestHandlerFactoryMain(name))->addHandler((new HandlingRuleHTTPHandlerFactory( - server, PrometheusMetricsWriter(server.config(), "prometheus", async_metrics))) - ->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"))->allowGetAndHeadRequest()); + { + auto factory = std::make_unique(name); + auto handler = std::make_unique>( + server, PrometheusMetricsWriter(server.config(), "prometheus", async_metrics)); + handler->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"))->allowGetAndHeadRequest(); + factory->addHandler(handler.release()); + return factory.release(); + } throw Exception("LOGICAL ERROR: Unknown HTTP handler factory name.", ErrorCodes::LOGICAL_ERROR); } diff --git a/programs/server/ReplicasStatusHandler.cpp b/programs/server/ReplicasStatusHandler.cpp index 2f2aa5953b6..f2d1ffe2ee5 100644 --- a/programs/server/ReplicasStatusHandler.cpp +++ b/programs/server/ReplicasStatusHandler.cpp @@ -46,7 +46,7 @@ void ReplicasStatusHandler::handleRequest(Poco::Net::HTTPServerRequest & request for (auto iterator = db.second->getTablesIterator(); iterator->isValid(); iterator->next()) { - auto & table = iterator->table(); + const auto & table = iterator->table(); StorageReplicatedMergeTree * table_replicated = dynamic_cast(table.get()); if (!table_replicated) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 80e7e033525..2e71bc902e9 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -331,18 +331,13 @@ void ReplicatedMergeTreeQueue::updateTimesInZooKeeper( void ReplicatedMergeTreeQueue::removeProcessedEntry(zkutil::ZooKeeperPtr zookeeper, LogEntryPtr & entry) { - auto code = zookeeper->tryRemove(replica_path + "/queue/" + entry->znode_name); - - if (code) - LOG_ERROR(log, "Couldn't remove " << replica_path << "/queue/" << entry->znode_name << ": " - << zkutil::ZooKeeper::error2string(code) << ". This shouldn't happen often."); - std::optional min_unprocessed_insert_time_changed; std::optional max_processed_insert_time_changed; bool found = false; size_t queue_size = 0; + /// First remove from memory then from ZooKeeper { std::unique_lock lock(state_mutex); @@ -372,6 +367,11 @@ void ReplicatedMergeTreeQueue::removeProcessedEntry(zkutil::ZooKeeperPtr zookeep notifySubscribers(queue_size); + auto code = zookeeper->tryRemove(replica_path + "/queue/" + entry->znode_name); + if (code) + LOG_ERROR(log, "Couldn't remove " << replica_path << "/queue/" << entry->znode_name << ": " + << zkutil::ZooKeeper::error2string(code) << ". This shouldn't happen often."); + updateTimesInZooKeeper(zookeeper, min_unprocessed_insert_time_changed, max_processed_insert_time_changed); } diff --git a/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp b/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp index a896129f915..8550675cb9e 100644 --- a/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp +++ b/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp @@ -199,7 +199,7 @@ void setCurrentBlockNumber(zkutil::ZooKeeper & zk, const std::string & path, Int create_ephemeral_nodes(1); /// Firstly try to create just a single node. /// Create other nodes in batches of 50 nodes. - while (current_block_number + 50 <= new_current_block_number) + while (current_block_number + 50 <= new_current_block_number) // NOLINT: clang-tidy thinks that the loop is infinite create_ephemeral_nodes(50); create_ephemeral_nodes(new_current_block_number - current_block_number); From 6af9c7847667bd0db95af6597de23d59972a3d84 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 18 May 2020 16:51:01 +0300 Subject: [PATCH 020/120] remove data on DROP DATABASE --- src/Databases/DatabaseMemory.cpp | 7 +++++++ src/Databases/DatabaseMemory.h | 2 ++ 2 files changed, 9 insertions(+) diff --git a/src/Databases/DatabaseMemory.cpp b/src/Databases/DatabaseMemory.cpp index 417761f81e7..84fec6bcc22 100644 --- a/src/Databases/DatabaseMemory.cpp +++ b/src/Databases/DatabaseMemory.cpp @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB @@ -84,4 +85,10 @@ UUID DatabaseMemory::tryGetTableUUID(const String & table_name) const return UUIDHelpers::Nil; } +void DatabaseMemory::drop(const Context & context) +{ + /// Remove data on explicit DROP DATABASE + std::filesystem::remove_all(context.getPath() + data_path); +} + } diff --git a/src/Databases/DatabaseMemory.h b/src/Databases/DatabaseMemory.h index 29a9abc9d75..ad34c4d9097 100644 --- a/src/Databases/DatabaseMemory.h +++ b/src/Databases/DatabaseMemory.h @@ -46,6 +46,8 @@ public: UUID tryGetTableUUID(const String & table_name) const override; + void drop(const Context & context) override; + private: String data_path; using NameToASTCreate = std::unordered_map; From 0e48cb1f802995933b5217d94be40f036a883c2f Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 18 May 2020 16:55:07 +0300 Subject: [PATCH 021/120] Removed allow_processors flag from executeQuery(). --- src/DataStreams/BlockIO.cpp | 22 ++++ src/DataStreams/BlockIO.h | 3 + .../ClickHouseDictionarySource.cpp | 23 ++-- .../InterpreterKillQueryQuery.cpp | 5 +- src/Interpreters/executeQuery.cpp | 12 +- src/Interpreters/executeQuery.h | 3 +- .../PipelineExecutingBlockInputStream.cpp | 106 ++++++++++++++++++ .../PipelineExecutingBlockInputStream.h | 39 +++++++ .../Executors/PullingPipelineExecutor.cpp | 5 + .../Executors/PullingPipelineExecutor.h | 3 + 10 files changed, 198 insertions(+), 23 deletions(-) create mode 100644 src/Processors/Executors/PipelineExecutingBlockInputStream.cpp create mode 100644 src/Processors/Executors/PipelineExecutingBlockInputStream.h diff --git a/src/DataStreams/BlockIO.cpp b/src/DataStreams/BlockIO.cpp index 60a0b415237..150995962bf 100644 --- a/src/DataStreams/BlockIO.cpp +++ b/src/DataStreams/BlockIO.cpp @@ -1,9 +1,31 @@ #include #include +#include namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +BlockInputStreamPtr BlockIO::getInputStream() +{ + if (out) + throw Exception("Cannot get input stream from BlockIO because output stream is not empty", + ErrorCodes::LOGICAL_ERROR); + + if (in) + return in; + + if (pipeline.initialized()) + return std::make_shared(std::move(pipeline)); + + throw Exception("Cannot get input stream from BlockIO because query pipeline was not initialized", + ErrorCodes::LOGICAL_ERROR); +} + void BlockIO::reset() { /** process_list_entry should be destroyed after in, after out and after pipeline, diff --git a/src/DataStreams/BlockIO.h b/src/DataStreams/BlockIO.h index 08a5f819fd6..d4733e6aebe 100644 --- a/src/DataStreams/BlockIO.h +++ b/src/DataStreams/BlockIO.h @@ -50,6 +50,9 @@ struct BlockIO exception_callback(); } + /// Returns in or converts pipeline to stream. Throws if out is not empty. + BlockInputStreamPtr getInputStream(); + private: void reset(); }; diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp index 7bf147dd539..aefde10a873 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -131,10 +131,10 @@ BlockInputStreamPtr ClickHouseDictionarySource::loadAll() */ if (is_local) { - BlockIO res = executeQuery(load_all_query, context, true, QueryProcessingStage::Complete, false, false); + auto stream = executeQuery(load_all_query, context, true).getInputStream(); /// FIXME res.in may implicitly use some objects owned be res, but them will be destructed after return - res.in = std::make_shared(res.in, sample_block, ConvertingBlockInputStream::MatchColumnsMode::Position); - return res.in; + stream = std::make_shared(stream, sample_block, ConvertingBlockInputStream::MatchColumnsMode::Position); + return stream; } return std::make_shared(pool, load_all_query, sample_block, context); } @@ -144,9 +144,9 @@ BlockInputStreamPtr ClickHouseDictionarySource::loadUpdatedAll() std::string load_update_query = getUpdateFieldAndDate(); if (is_local) { - auto res = executeQuery(load_update_query, context, true, QueryProcessingStage::Complete, false, false); - res.in = std::make_shared(res.in, sample_block, ConvertingBlockInputStream::MatchColumnsMode::Position); - return res.in; + auto stream = executeQuery(load_update_query, context, true).getInputStream(); + stream = std::make_shared(stream, sample_block, ConvertingBlockInputStream::MatchColumnsMode::Position); + return stream; } return std::make_shared(pool, load_update_query, sample_block, context); } @@ -191,10 +191,10 @@ BlockInputStreamPtr ClickHouseDictionarySource::createStreamForSelectiveLoad(con { if (is_local) { - auto res = executeQuery(query, context, true, QueryProcessingStage::Complete, false, false); - res.in = std::make_shared( - res.in, sample_block, ConvertingBlockInputStream::MatchColumnsMode::Position); - return res.in; + auto res = executeQuery(query, context, true).getInputStream(); + res = std::make_shared( + res, sample_block, ConvertingBlockInputStream::MatchColumnsMode::Position); + return res; } return std::make_shared(pool, query, sample_block, context); @@ -206,8 +206,7 @@ std::string ClickHouseDictionarySource::doInvalidateQuery(const std::string & re if (is_local) { Context query_context = context; - auto input_block = executeQuery(request, query_context, true, - QueryProcessingStage::Complete, false, false).in; + auto input_block = executeQuery(request, query_context, true).getInputStream(); return readInvalidateQuery(*input_block); } else diff --git a/src/Interpreters/InterpreterKillQueryQuery.cpp b/src/Interpreters/InterpreterKillQueryQuery.cpp index 23f39ab3fc5..39e432195fd 100644 --- a/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -302,8 +302,9 @@ Block InterpreterKillQueryQuery::getSelectResult(const String & columns, const S if (where_expression) select_query += " WHERE " + queryToString(where_expression); - BlockIO block_io = executeQuery(select_query, context.getGlobalContext(), true, QueryProcessingStage::Complete, false, false); - Block res = block_io.in->read(); + BlockIO block_io = executeQuery(select_query, context.getGlobalContext(), true); + auto stream = block_io.getInputStream(); + Block res = stream->read(); if (res && block_io.in->read()) throw Exception("Expected one block from input stream", ErrorCodes::LOGICAL_ERROR); diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 4d609395c3a..8c6e5cea73c 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -196,8 +196,7 @@ static std::tuple executeQueryImpl( bool internal, QueryProcessingStage::Enum stage, bool has_query_tail, - ReadBuffer * istr, - bool allow_processors) + ReadBuffer * istr) { time_t current_time = time(nullptr); @@ -317,7 +316,7 @@ static std::tuple executeQueryImpl( context.resetInputCallbacks(); auto interpreter = InterpreterFactory::get(ast, context, stage); - bool use_processors = allow_processors && interpreter->canExecuteWithProcessors(); + bool use_processors = interpreter->canExecuteWithProcessors(); std::shared_ptr quota; if (!interpreter->ignoreQuota()) @@ -580,13 +579,12 @@ BlockIO executeQuery( Context & context, bool internal, QueryProcessingStage::Enum stage, - bool may_have_embedded_data, - bool allow_processors) + bool may_have_embedded_data) { ASTPtr ast; BlockIO streams; std::tie(ast, streams) = executeQueryImpl(query.data(), query.data() + query.size(), context, - internal, stage, !may_have_embedded_data, nullptr, allow_processors); + internal, stage, !may_have_embedded_data, nullptr); if (const auto * ast_query_with_output = dynamic_cast(ast.get())) { @@ -647,7 +645,7 @@ void executeQuery( ASTPtr ast; BlockIO streams; - std::tie(ast, streams) = executeQueryImpl(begin, end, context, false, QueryProcessingStage::Complete, may_have_tail, &istr, true); + std::tie(ast, streams) = executeQueryImpl(begin, end, context, false, QueryProcessingStage::Complete, may_have_tail, &istr); auto & pipeline = streams.pipeline; diff --git a/src/Interpreters/executeQuery.h b/src/Interpreters/executeQuery.h index b6cb56b9e24..22b353488ad 100644 --- a/src/Interpreters/executeQuery.h +++ b/src/Interpreters/executeQuery.h @@ -42,8 +42,7 @@ BlockIO executeQuery( Context & context, /// DB, tables, data types, storage engines, functions, aggregate functions... bool internal = false, /// If true, this query is caused by another query and thus needn't be registered in the ProcessList. QueryProcessingStage::Enum stage = QueryProcessingStage::Complete, /// To which stage the query must be executed. - bool may_have_embedded_data = false, /// If insert query may have embedded data - bool allow_processors = true /// If can use processors pipeline + bool may_have_embedded_data = false /// If insert query may have embedded data ); diff --git a/src/Processors/Executors/PipelineExecutingBlockInputStream.cpp b/src/Processors/Executors/PipelineExecutingBlockInputStream.cpp new file mode 100644 index 00000000000..e7846f31bd5 --- /dev/null +++ b/src/Processors/Executors/PipelineExecutingBlockInputStream.cpp @@ -0,0 +1,106 @@ +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +PipelineExecutingBlockInputStream::PipelineExecutingBlockInputStream(QueryPipeline pipeline_) + : pipeline(std::make_unique(std::move(pipeline_))) +{ +} + +PipelineExecutingBlockInputStream::~PipelineExecutingBlockInputStream() = default; + +Block PipelineExecutingBlockInputStream::getHeader() const +{ + return executor ? executor->getHeader() + : pipeline->getHeader(); +} + +void PipelineExecutingBlockInputStream::readPrefixImpl() +{ + executor = std::make_unique(*pipeline); +} + +Block PipelineExecutingBlockInputStream::readImpl() +{ + Block block; + while (executor->pull(block)) + { + if (block) + return block; + } + + return {}; +} + +inline static void throwIfExecutionStarted(bool is_execution_started, const char * method) +{ + if (is_execution_started) + throw Exception(String("Cannot call ") + method + + " for PipelineExecutingBlockInputStream because execution was started", + ErrorCodes::LOGICAL_ERROR); +} + +inline static void throwIfExecutionNotStarted(bool is_execution_started, const char * method) +{ + if (!is_execution_started) + throw Exception(String("Cannot call ") + method + + " for PipelineExecutingBlockInputStream because execution was not started", + ErrorCodes::LOGICAL_ERROR); +} + +void PipelineExecutingBlockInputStream::cancel(bool kill) +{ + throwIfExecutionNotStarted(executor != nullptr, "cancel"); + IBlockInputStream::cancel(kill); + executor->cancel(); +} + + +void PipelineExecutingBlockInputStream::setProgressCallback(const ProgressCallback & callback) +{ + throwIfExecutionStarted(executor != nullptr, "setProgressCallback"); + pipeline->setProgressCallback(callback); +} + +void PipelineExecutingBlockInputStream::setProcessListElement(QueryStatus * elem) +{ + throwIfExecutionStarted(executor != nullptr, "setProcessListElement"); + IBlockInputStream::setProcessListElement(elem); + pipeline->setProcessListElement(elem); +} + +void PipelineExecutingBlockInputStream::setLimits(const IBlockInputStream::LocalLimits & limits_) +{ + throwIfExecutionStarted(executor != nullptr, "setLimits"); + + if (limits_.mode == LimitsMode::LIMITS_TOTAL) + throw Exception("Total limits are not supported by PipelineExecutingBlockInputStream", + ErrorCodes::LOGICAL_ERROR); + + /// Local limits may be checked by IBlockInputStream itself. + IBlockInputStream::setLimits(limits_); +} + +void PipelineExecutingBlockInputStream::setQuota(const std::shared_ptr &) +{ + throw Exception("Quota is not supported by PipelineExecutingBlockInputStream", + ErrorCodes::LOGICAL_ERROR); +} + +void PipelineExecutingBlockInputStream::addTotalRowsApprox(size_t) +{ + throw Exception("Progress is not supported by PipelineExecutingBlockInputStream", + ErrorCodes::LOGICAL_ERROR); +} + + +} diff --git a/src/Processors/Executors/PipelineExecutingBlockInputStream.h b/src/Processors/Executors/PipelineExecutingBlockInputStream.h new file mode 100644 index 00000000000..773332f69c8 --- /dev/null +++ b/src/Processors/Executors/PipelineExecutingBlockInputStream.h @@ -0,0 +1,39 @@ +#pragma once +#include + +namespace DB +{ + +class QueryPipeline; +class PullingPipelineExecutor; + +/// Implement IBlockInputStream from QueryPipeline. +/// It's a temporary wrapper. +class PipelineExecutingBlockInputStream : public IBlockInputStream +{ +public: + explicit PipelineExecutingBlockInputStream(QueryPipeline pipeline_); + ~PipelineExecutingBlockInputStream(); + + String getName() const override { return "PipelineExecuting"; } + Block getHeader() const override; + + void cancel(bool kill) override; + + /// Implement IBlockInputStream methods via QueryPipeline. + void setProgressCallback(const ProgressCallback & callback) final; + void setProcessListElement(QueryStatus * elem) final; + void setLimits(const LocalLimits & limits_) final; + void setQuota(const std::shared_ptr & quota_) final; + void addTotalRowsApprox(size_t value) final; + +protected: + void readPrefixImpl() override; + Block readImpl() override; + +private: + std::unique_ptr executor; + std::unique_ptr pipeline; +}; + +} diff --git a/src/Processors/Executors/PullingPipelineExecutor.cpp b/src/Processors/Executors/PullingPipelineExecutor.cpp index c34195a0793..223c22e59db 100644 --- a/src/Processors/Executors/PullingPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingPipelineExecutor.cpp @@ -52,6 +52,11 @@ PullingPipelineExecutor::~PullingPipelineExecutor() } } +const Block & PullingPipelineExecutor::getHeader() const +{ + return lazy_format->getPort(IOutputFormat::PortKind::Main).getHeader(); +} + static void threadFunction(PullingPipelineExecutor::Data & data, ThreadGroupStatusPtr thread_group, size_t num_threads) { if (thread_group) diff --git a/src/Processors/Executors/PullingPipelineExecutor.h b/src/Processors/Executors/PullingPipelineExecutor.h index f3b06fc618a..7a093fe0022 100644 --- a/src/Processors/Executors/PullingPipelineExecutor.h +++ b/src/Processors/Executors/PullingPipelineExecutor.h @@ -22,6 +22,9 @@ public: explicit PullingPipelineExecutor(QueryPipeline & pipeline_); ~PullingPipelineExecutor(); + /// Get structure of returned block or chunk. + const Block & getHeader() const; + /// Methods return false if query is finished. /// If milliseconds > 0, returns empty object and `true` after timeout exceeded. /// You can use any pull method. From deeae6645502cab4fef80250ed3c9ca6d586ddcc Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 18 May 2020 18:52:00 +0300 Subject: [PATCH 022/120] Fix clang build. --- src/Processors/Executors/PipelineExecutingBlockInputStream.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Executors/PipelineExecutingBlockInputStream.h b/src/Processors/Executors/PipelineExecutingBlockInputStream.h index 773332f69c8..781396159e6 100644 --- a/src/Processors/Executors/PipelineExecutingBlockInputStream.h +++ b/src/Processors/Executors/PipelineExecutingBlockInputStream.h @@ -13,7 +13,7 @@ class PipelineExecutingBlockInputStream : public IBlockInputStream { public: explicit PipelineExecutingBlockInputStream(QueryPipeline pipeline_); - ~PipelineExecutingBlockInputStream(); + ~PipelineExecutingBlockInputStream() override; String getName() const override { return "PipelineExecuting"; } Block getHeader() const override; From 0ce9afd1e3ee53539267d9ce6893710e22ed9b46 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 18 May 2020 19:19:43 +0300 Subject: [PATCH 023/120] Try fix tests. --- src/Processors/Executors/PipelineExecutingBlockInputStream.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Processors/Executors/PipelineExecutingBlockInputStream.cpp b/src/Processors/Executors/PipelineExecutingBlockInputStream.cpp index e7846f31bd5..75324cb25b8 100644 --- a/src/Processors/Executors/PipelineExecutingBlockInputStream.cpp +++ b/src/Processors/Executors/PipelineExecutingBlockInputStream.cpp @@ -31,6 +31,9 @@ void PipelineExecutingBlockInputStream::readPrefixImpl() Block PipelineExecutingBlockInputStream::readImpl() { + if (!executor) + executor = std::make_unique(*pipeline); + Block block; while (executor->pull(block)) { From bdbfa08e024aac9db21c8399e28c18003138b724 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 18 May 2020 21:18:37 +0300 Subject: [PATCH 024/120] Truncate system.*_log by default in tests Enable it by default due to: - it is a sane default - avoids modifying docker images --- tests/clickhouse-test | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 1a2735296e8..76a61e1d558 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -234,6 +234,11 @@ def run_tests_array(all_tests_with_params): clickhouse_proc = Popen(shlex.split(args.client), stdin=PIPE, stdout=PIPE, stderr=PIPE) clickhouse_proc.communicate("SELECT 'Running test {suite}/{case} from pid={pid}';".format(pid = os.getpid(), case = case, suite = suite)) + if not args.no_system_log_cleanup: + for table in ['query_log', 'query_thread_log', 'trace_log', 'metric_log']: + clickhouse_proc = Popen(shlex.split(args.client), stdin=PIPE, stdout=PIPE, stderr=PIPE) + clickhouse_proc.communicate("TRUNCATE TABLE IF EXISTS system.{}".format(table)) + reference_file = os.path.join(suite_dir, name) + '.reference' stdout_file = os.path.join(suite_tmp_dir, name) + '.stdout' stderr_file = os.path.join(suite_tmp_dir, name) + '.stderr' @@ -564,6 +569,7 @@ if __name__ == '__main__': parser.add_argument('--stop', action='store_true', default=None, dest='stop', help='Stop on network errors') parser.add_argument('--order', default='desc', choices=['asc', 'desc', 'random'], help='Run order') parser.add_argument('--testname', action='store_true', default=None, dest='testname', help='Make query with test name before test run') + parser.add_argument('--no-system-log-cleanup', action='store_true', default=None, help='Do not cleanup system.*_log tables') parser.add_argument('--hung-check', action='store_true', default=False) parser.add_argument('--force-color', action='store_true', default=False) parser.add_argument('--database', help='Database for tests (random name test_XXXXXX by default)') From 32071ba08096c12545104be46fe8595073e86006 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 18 May 2020 21:18:37 +0300 Subject: [PATCH 025/120] Do not add event_date/event_time condition for system.*_log Also some of them can be even more simplified, since the system.*_log are empty most of the time there should not be any conditions at all (i.e. `query` LIKE and so on). --- .../00634_performance_introspection_and_logging.sh | 6 +++--- .../00933_test_fix_extra_seek_on_compressed_cache.sh | 2 +- .../0_stateless/00956_sensitive_data_masking.sh | 6 +++--- tests/queries/0_stateless/00974_query_profiler.sql | 4 ++-- .../0_stateless/00974_text_log_table_not_empty.sh | 2 +- .../01070_exception_code_in_query_log_table.sql | 2 +- tests/queries/0_stateless/01091_num_threads.sql | 12 ++++++------ tests/queries/0_stateless/01092_memory_profiler.sql | 2 +- tests/queries/0_stateless/01198_client_quota_key.sh | 2 +- .../0_stateless/01231_log_queries_min_type.sql | 6 +++--- 10 files changed, 22 insertions(+), 22 deletions(-) diff --git a/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh b/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh index 5173b5f5772..684287da761 100755 --- a/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh +++ b/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh @@ -47,7 +47,7 @@ SELECT threads_realtime >= threads_time_user_system_io, any(length(thread_ids)) >= 1 FROM - (SELECT * FROM system.query_log PREWHERE query='$heavy_cpu_query' WHERE event_date >= today()-1 AND type=2 ORDER BY event_time DESC LIMIT 1) + (SELECT * FROM system.query_log PREWHERE query='$heavy_cpu_query' WHERE type=2 ORDER BY event_time DESC LIMIT 1) ARRAY JOIN ProfileEvents.Names AS PN, ProfileEvents.Values AS PV" # Check per-thread and per-query ProfileEvents consistency @@ -58,7 +58,7 @@ SELECT PN, PVq, PVt FROM SELECT PN, sum(PV) AS PVt FROM system.query_thread_log ARRAY JOIN ProfileEvents.Names AS PN, ProfileEvents.Values AS PV - WHERE event_date >= today()-1 AND query_id='$query_id' + WHERE query_id='$query_id' GROUP BY PN ) js1 ANY INNER JOIN @@ -66,7 +66,7 @@ ANY INNER JOIN SELECT PN, PV AS PVq FROM system.query_log ARRAY JOIN ProfileEvents.Names AS PN, ProfileEvents.Values AS PV - WHERE event_date >= today()-1 AND query_id='$query_id' + WHERE query_id='$query_id' ) js2 USING PN WHERE diff --git a/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.sh b/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.sh index 1f7571a2404..b0fd9a70bd4 100755 --- a/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.sh +++ b/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.sh @@ -19,7 +19,7 @@ $CLICKHOUSE_CLIENT --use_uncompressed_cache=1 --query_id="test-query-uncompresse sleep 1 $CLICKHOUSE_CLIENT --query="SYSTEM FLUSH LOGS" -$CLICKHOUSE_CLIENT --query="SELECT ProfileEvents.Values[indexOf(ProfileEvents.Names, 'Seek')], ProfileEvents.Values[indexOf(ProfileEvents.Names, 'ReadCompressedBytes')], ProfileEvents.Values[indexOf(ProfileEvents.Names, 'UncompressedCacheHits')] AS hit FROM system.query_log WHERE (query_id = 'test-query-uncompressed-cache') AND (type = 2) AND event_date >= yesterday() ORDER BY event_time DESC LIMIT 1" +$CLICKHOUSE_CLIENT --query="SELECT ProfileEvents.Values[indexOf(ProfileEvents.Names, 'Seek')], ProfileEvents.Values[indexOf(ProfileEvents.Names, 'ReadCompressedBytes')], ProfileEvents.Values[indexOf(ProfileEvents.Names, 'UncompressedCacheHits')] AS hit FROM system.query_log WHERE (query_id = 'test-query-uncompressed-cache') AND (type = 2) ORDER BY event_time DESC LIMIT 1" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS small_table" diff --git a/tests/queries/0_stateless/00956_sensitive_data_masking.sh b/tests/queries/0_stateless/00956_sensitive_data_masking.sh index 0f76c34eaff..c492fd35b89 100755 --- a/tests/queries/0_stateless/00956_sensitive_data_masking.sh +++ b/tests/queries/0_stateless/00956_sensitive_data_masking.sh @@ -95,7 +95,7 @@ echo 7 # and finally querylog $CLICKHOUSE_CLIENT \ --server_logs_file=/dev/null \ - --query="select * from system.query_log where event_time>now() - 10 and query like '%TOPSECRET%';" + --query="select * from system.query_log where query like '%TOPSECRET%';" rm -f $tmp_file >/dev/null 2>&1 @@ -117,8 +117,8 @@ sleep 0.1; echo 9 $CLICKHOUSE_CLIENT \ --server_logs_file=/dev/null \ - --query="SELECT if( count() > 0, 'text_log non empty', 'text_log empty') FROM system.text_log WHERE event_time>now() - 60 and message like '%find_me%'; - select * from system.text_log where event_time>now() - 60 and message like '%TOPSECRET=TOPSECRET%';" --ignore-error --multiquery + --query="SELECT if( count() > 0, 'text_log non empty', 'text_log empty') FROM system.text_log WHERE message like '%find_me%'; + select * from system.text_log where message like '%TOPSECRET=TOPSECRET%';" --ignore-error --multiquery echo 'finish' rm -f $tmp_file >/dev/null 2>&1 diff --git a/tests/queries/0_stateless/00974_query_profiler.sql b/tests/queries/0_stateless/00974_query_profiler.sql index 4d0b46dd51f..9e2723c67d8 100644 --- a/tests/queries/0_stateless/00974_query_profiler.sql +++ b/tests/queries/0_stateless/00974_query_profiler.sql @@ -5,7 +5,7 @@ SET log_queries = 1; SELECT sleep(0.5), ignore('test real time query profiler'); SET log_queries = 0; SYSTEM FLUSH LOGS; -WITH addressToSymbol(arrayJoin(trace)) AS symbol SELECT count() > 0 FROM system.trace_log t WHERE event_date >= yesterday() AND query_id = (SELECT query_id FROM system.query_log WHERE event_date >= yesterday() AND query LIKE '%test real time query profiler%' AND query NOT LIKE '%system%' ORDER BY event_time DESC LIMIT 1) AND symbol LIKE '%FunctionSleep%'; +WITH addressToSymbol(arrayJoin(trace)) AS symbol SELECT count() > 0 FROM system.trace_log t WHERE query_id = (SELECT query_id FROM system.query_log WHERE query LIKE '%test real time query profiler%' AND query NOT LIKE '%system%' ORDER BY event_time DESC LIMIT 1) AND symbol LIKE '%FunctionSleep%'; SET query_profiler_real_time_period_ns = 0; SET query_profiler_cpu_time_period_ns = 1000000; @@ -13,4 +13,4 @@ SET log_queries = 1; SELECT count(), ignore('test cpu time query profiler') FROM numbers(1000000000); SET log_queries = 0; SYSTEM FLUSH LOGS; -WITH addressToSymbol(arrayJoin(trace)) AS symbol SELECT count() > 0 FROM system.trace_log t WHERE event_date >= yesterday() AND query_id = (SELECT query_id FROM system.query_log WHERE event_date >= yesterday() AND query LIKE '%test cpu time query profiler%' AND query NOT LIKE '%system%' ORDER BY event_time DESC LIMIT 1) AND symbol LIKE '%Source%'; +WITH addressToSymbol(arrayJoin(trace)) AS symbol SELECT count() > 0 FROM system.trace_log t WHERE query_id = (SELECT query_id FROM system.query_log WHERE query LIKE '%test cpu time query profiler%' AND query NOT LIKE '%system%' ORDER BY event_time DESC LIMIT 1) AND symbol LIKE '%Source%'; diff --git a/tests/queries/0_stateless/00974_text_log_table_not_empty.sh b/tests/queries/0_stateless/00974_text_log_table_not_empty.sh index c3cde4c08bb..149f0668bd1 100755 --- a/tests/queries/0_stateless/00974_text_log_table_not_empty.sh +++ b/tests/queries/0_stateless/00974_text_log_table_not_empty.sh @@ -10,7 +10,7 @@ do ${CLICKHOUSE_CLIENT} --query="SYSTEM FLUSH LOGS" sleep 0.1; -if [[ $($CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "SELECT count() > 0 FROM system.text_log WHERE position(system.text_log.message, 'SELECT 6103') > 0 AND event_date >= yesterday()") == 1 ]]; then echo 1; exit; fi; +if [[ $($CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "SELECT count() > 0 FROM system.text_log WHERE position(system.text_log.message, 'SELECT 6103') > 0") == 1 ]]; then echo 1; exit; fi; done; diff --git a/tests/queries/0_stateless/01070_exception_code_in_query_log_table.sql b/tests/queries/0_stateless/01070_exception_code_in_query_log_table.sql index 2c99ba54112..e1e81614ab7 100644 --- a/tests/queries/0_stateless/01070_exception_code_in_query_log_table.sql +++ b/tests/queries/0_stateless/01070_exception_code_in_query_log_table.sql @@ -3,5 +3,5 @@ SELECT * FROM test_table_for_01070_exception_code_in_query_log_table; -- { serve CREATE TABLE test_table_for_01070_exception_code_in_query_log_table (value UInt64) ENGINE=Memory(); SELECT * FROM test_table_for_01070_exception_code_in_query_log_table; SYSTEM FLUSH LOGS; -SELECT exception_code FROM system.query_log WHERE query = 'SELECT * FROM test_table_for_01070_exception_code_in_query_log_table' AND event_date >= yesterday() AND event_time > now() - INTERVAL 5 MINUTE ORDER BY exception_code; +SELECT exception_code FROM system.query_log WHERE query = 'SELECT * FROM test_table_for_01070_exception_code_in_query_log_table' ORDER BY exception_code; DROP TABLE IF EXISTS test_table_for_01070_exception_code_in_query_log_table; diff --git a/tests/queries/0_stateless/01091_num_threads.sql b/tests/queries/0_stateless/01091_num_threads.sql index 876a2d15d1a..a93568fcee5 100644 --- a/tests/queries/0_stateless/01091_num_threads.sql +++ b/tests/queries/0_stateless/01091_num_threads.sql @@ -8,13 +8,13 @@ WITH ( SELECT query_id FROM system.query_log - WHERE (query = 'SELECT 1') AND (event_date >= (today() - 1)) + WHERE (query = 'SELECT 1') ORDER BY event_time DESC LIMIT 1 ) AS id SELECT uniqExact(thread_id) FROM system.query_thread_log -WHERE (event_date >= (today() - 1)) AND (query_id = id) AND (thread_id != master_thread_id); +WHERE (query_id = id) AND (thread_id != master_thread_id); select sum(number) from numbers(1000000); SYSTEM FLUSH LOGS; @@ -23,13 +23,13 @@ WITH ( SELECT query_id FROM system.query_log - WHERE (query = 'SELECT sum(number) FROM numbers(1000000)') AND (event_date >= (today() - 1)) + WHERE (query = 'SELECT sum(number) FROM numbers(1000000)') ORDER BY event_time DESC LIMIT 1 ) AS id SELECT uniqExact(thread_id) FROM system.query_thread_log -WHERE (event_date >= (today() - 1)) AND (query_id = id) AND (thread_id != master_thread_id); +WHERE (query_id = id) AND (thread_id != master_thread_id); select sum(number) from numbers_mt(1000000); SYSTEM FLUSH LOGS; @@ -38,10 +38,10 @@ WITH ( SELECT query_id FROM system.query_log - WHERE (query = 'SELECT sum(number) FROM numbers_mt(1000000)') AND (event_date >= (today() - 1)) + WHERE (query = 'SELECT sum(number) FROM numbers_mt(1000000)') ORDER BY event_time DESC LIMIT 1 ) AS id SELECT uniqExact(thread_id) > 2 FROM system.query_thread_log -WHERE (event_date >= (today() - 1)) AND (query_id = id) AND (thread_id != master_thread_id); +WHERE (query_id = id) AND (thread_id != master_thread_id); diff --git a/tests/queries/0_stateless/01092_memory_profiler.sql b/tests/queries/0_stateless/01092_memory_profiler.sql index c20b5c79cdb..980f7f73d5d 100644 --- a/tests/queries/0_stateless/01092_memory_profiler.sql +++ b/tests/queries/0_stateless/01092_memory_profiler.sql @@ -3,4 +3,4 @@ SET allow_introspection_functions = 1; SET memory_profiler_step = 1000000; SELECT ignore(groupArray(number), 'test memory profiler') FROM numbers(10000000); SYSTEM FLUSH LOGS; -WITH addressToSymbol(arrayJoin(trace)) AS symbol SELECT count() > 0 FROM system.trace_log t WHERE event_date >= yesterday() AND trace_type = 'Memory' AND query_id = (SELECT query_id FROM system.query_log WHERE event_date >= yesterday() AND query LIKE '%test memory profiler%' ORDER BY event_time DESC LIMIT 1); +WITH addressToSymbol(arrayJoin(trace)) AS symbol SELECT count() > 0 FROM system.trace_log t WHERE trace_type = 'Memory' AND query_id = (SELECT query_id FROM system.query_log WHERE query LIKE '%test memory profiler%' ORDER BY event_time DESC LIMIT 1); diff --git a/tests/queries/0_stateless/01198_client_quota_key.sh b/tests/queries/0_stateless/01198_client_quota_key.sh index f4b66aea6ac..b3bc845cd06 100755 --- a/tests/queries/0_stateless/01198_client_quota_key.sh +++ b/tests/queries/0_stateless/01198_client_quota_key.sh @@ -3,4 +3,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -$CLICKHOUSE_CLIENT --quota_key Hello --query_id test_quota_key --log_queries 1 --multiquery --query "SELECT 1; SYSTEM FLUSH LOGS; SELECT DISTINCT quota_key FROM system.query_log WHERE event_date >= yesterday() AND event_time >= now() - 300 AND query_id = 'test_quota_key'" +$CLICKHOUSE_CLIENT --quota_key Hello --query_id test_quota_key --log_queries 1 --multiquery --query "SELECT 1; SYSTEM FLUSH LOGS; SELECT DISTINCT quota_key FROM system.query_log WHERE query_id = 'test_quota_key'" diff --git a/tests/queries/0_stateless/01231_log_queries_min_type.sql b/tests/queries/0_stateless/01231_log_queries_min_type.sql index f2229c94a8a..565a5880b84 100644 --- a/tests/queries/0_stateless/01231_log_queries_min_type.sql +++ b/tests/queries/0_stateless/01231_log_queries_min_type.sql @@ -2,14 +2,14 @@ set log_queries=1; select '01231_log_queries_min_type/QUERY_START'; system flush logs; -select count() from system.query_log where query like '%01231_log_queries_min_type/%' and query not like '%system.query_log%' and event_date = today() and event_time >= now() - interval 1 minute; +select count() from system.query_log where query like '%01231_log_queries_min_type/%' and query not like '%system.query_log%'; set log_queries_min_type='EXCEPTION_BEFORE_START'; select '01231_log_queries_min_type/EXCEPTION_BEFORE_START'; system flush logs; -select count() from system.query_log where query like '%01231_log_queries_min_type/%' and query not like '%system.query_log%' and event_date = today() and event_time >= now() - interval 1 minute; +select count() from system.query_log where query like '%01231_log_queries_min_type/%' and query not like '%system.query_log%'; set log_queries_min_type='EXCEPTION_WHILE_PROCESSING'; select '01231_log_queries_min_type/', max(number) from system.numbers limit 1e6 settings max_rows_to_read='100K'; -- { serverError 158; } system flush logs; -select count() from system.query_log where query like '%01231_log_queries_min_type/%' and query not like '%system.query_log%' and event_date = today() and event_time >= now() - interval 1 minute; +select count() from system.query_log where query like '%01231_log_queries_min_type/%' and query not like '%system.query_log%'; From bd86f15680c51f7b48d35e73e490882814c54006 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 18 May 2020 21:18:37 +0300 Subject: [PATCH 026/120] Use QueryFinish over magic const in tests Find all usages with: $ git grep 'type[ ]*=[ ]*2' tests/queries/0_stateless/ Sure this is not all... --- .../0_stateless/00634_performance_introspection_and_logging.sh | 2 +- .../00933_test_fix_extra_seek_on_compressed_cache.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh b/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh index 684287da761..efb4b3569fb 100755 --- a/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh +++ b/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh @@ -47,7 +47,7 @@ SELECT threads_realtime >= threads_time_user_system_io, any(length(thread_ids)) >= 1 FROM - (SELECT * FROM system.query_log PREWHERE query='$heavy_cpu_query' WHERE type=2 ORDER BY event_time DESC LIMIT 1) + (SELECT * FROM system.query_log PREWHERE query='$heavy_cpu_query' WHERE type='QueryFinish' ORDER BY event_time DESC LIMIT 1) ARRAY JOIN ProfileEvents.Names AS PN, ProfileEvents.Values AS PV" # Check per-thread and per-query ProfileEvents consistency diff --git a/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.sh b/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.sh index b0fd9a70bd4..9e32c30ce20 100755 --- a/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.sh +++ b/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.sh @@ -19,7 +19,7 @@ $CLICKHOUSE_CLIENT --use_uncompressed_cache=1 --query_id="test-query-uncompresse sleep 1 $CLICKHOUSE_CLIENT --query="SYSTEM FLUSH LOGS" -$CLICKHOUSE_CLIENT --query="SELECT ProfileEvents.Values[indexOf(ProfileEvents.Names, 'Seek')], ProfileEvents.Values[indexOf(ProfileEvents.Names, 'ReadCompressedBytes')], ProfileEvents.Values[indexOf(ProfileEvents.Names, 'UncompressedCacheHits')] AS hit FROM system.query_log WHERE (query_id = 'test-query-uncompressed-cache') AND (type = 2) ORDER BY event_time DESC LIMIT 1" +$CLICKHOUSE_CLIENT --query="SELECT ProfileEvents.Values[indexOf(ProfileEvents.Names, 'Seek')], ProfileEvents.Values[indexOf(ProfileEvents.Names, 'ReadCompressedBytes')], ProfileEvents.Values[indexOf(ProfileEvents.Names, 'UncompressedCacheHits')] AS hit FROM system.query_log WHERE (query_id = 'test-query-uncompressed-cache') AND (type = 'QueryFinish') ORDER BY event_time DESC LIMIT 1" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS small_table" From 4ce0ef6055872b3070aad66e69b90808f3254035 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 18 May 2020 21:40:01 +0300 Subject: [PATCH 027/120] Fix tests. --- src/Processors/Executors/PipelineExecutingBlockInputStream.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Executors/PipelineExecutingBlockInputStream.h b/src/Processors/Executors/PipelineExecutingBlockInputStream.h index 781396159e6..7555e1dec97 100644 --- a/src/Processors/Executors/PipelineExecutingBlockInputStream.h +++ b/src/Processors/Executors/PipelineExecutingBlockInputStream.h @@ -32,8 +32,8 @@ protected: Block readImpl() override; private: - std::unique_ptr executor; std::unique_ptr pipeline; + std::unique_ptr executor; }; } From 08ff5b59663ed0e82d033bc5f5cf72afcb4ccab1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 18 May 2020 22:13:13 +0300 Subject: [PATCH 028/120] Add NCHAR and NVARCHAR synonims --- src/DataTypes/DataTypeString.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/DataTypes/DataTypeString.cpp b/src/DataTypes/DataTypeString.cpp index a7bfb2b635d..4a9a6e9ab78 100644 --- a/src/DataTypes/DataTypeString.cpp +++ b/src/DataTypes/DataTypeString.cpp @@ -376,8 +376,10 @@ void registerDataTypeString(DataTypeFactory & factory) /// These synonyms are added for compatibility. factory.registerAlias("CHAR", "String", DataTypeFactory::CaseInsensitive); + factory.registerAlias("NCHAR", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("CHARACTER", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("VARCHAR", "String", DataTypeFactory::CaseInsensitive); + factory.registerAlias("NVARCHAR", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("VARCHAR2", "String", DataTypeFactory::CaseInsensitive); /// Oracle factory.registerAlias("TEXT", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("TINYTEXT", "String", DataTypeFactory::CaseInsensitive); From 551fbf8519485e2d827290a439a924e44e651b09 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Mon, 18 May 2020 22:46:32 +0300 Subject: [PATCH 029/120] Update HTTPHandlerFactory.cpp --- programs/server/HTTPHandlerFactory.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/programs/server/HTTPHandlerFactory.cpp b/programs/server/HTTPHandlerFactory.cpp index 4caea1e92e8..955ff6b8834 100644 --- a/programs/server/HTTPHandlerFactory.cpp +++ b/programs/server/HTTPHandlerFactory.cpp @@ -129,6 +129,8 @@ static inline Poco::Net::HTTPRequestHandlerFactory * createHTTPHandlerFactory( query_handler->allowPostAndGetParamsRequest(); factory->addHandler(query_handler.release()); + /// We check that prometheus handler will be served on current (default) port. + /// Otherwise it will be created separately, see below. if (server.config().has("prometheus") && server.config().getInt("prometheus.port", 0) == 0) { auto prometheus_handler = std::make_unique>( From 0ec321a811b3e7c5776e9ce9b9d9cc5681f99f5c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 19 May 2020 00:01:33 +0300 Subject: [PATCH 030/120] Fix ya.make --- src/Processors/ya.make | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Processors/ya.make b/src/Processors/ya.make index 09fb9d64ad1..f5a3c0ea30a 100644 --- a/src/Processors/ya.make +++ b/src/Processors/ya.make @@ -10,6 +10,7 @@ SRCS( Chunk.cpp ConcatProcessor.cpp DelayedPortsProcessor.cpp + Executors/PipelineExecutingBlockInputStream.h Executors/PipelineExecutor.cpp Executors/PullingPipelineExecutor.cpp Executors/TreeExecutorBlockInputStream.cpp From 075ccceee971935505b42064fed526ec602a1aa1 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 19 May 2020 00:38:28 +0300 Subject: [PATCH 031/120] Fix tests. --- src/Interpreters/InterpreterKillQueryQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterKillQueryQuery.cpp b/src/Interpreters/InterpreterKillQueryQuery.cpp index 39e432195fd..42afd0ef477 100644 --- a/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -306,7 +306,7 @@ Block InterpreterKillQueryQuery::getSelectResult(const String & columns, const S auto stream = block_io.getInputStream(); Block res = stream->read(); - if (res && block_io.in->read()) + if (res && stream->read()) throw Exception("Expected one block from input stream", ErrorCodes::LOGICAL_ERROR); return res; From e9dc2cbcf70968996a1afbae3c409c5177f77f74 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 19 May 2020 01:12:51 +0300 Subject: [PATCH 032/120] Add missing SYSTEM FLUSH LOGS before TRUNCATE TABLE in clickhouse-test Suggested-by: @filimonov --- tests/clickhouse-test | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 76a61e1d558..324fd13aac2 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -235,6 +235,9 @@ def run_tests_array(all_tests_with_params): clickhouse_proc.communicate("SELECT 'Running test {suite}/{case} from pid={pid}';".format(pid = os.getpid(), case = case, suite = suite)) if not args.no_system_log_cleanup: + clickhouse_proc = Popen(shlex.split(args.client), stdin=PIPE, stdout=PIPE, stderr=PIPE) + clickhouse_proc.communicate("SYSTEM FLUSH LOGS") + for table in ['query_log', 'query_thread_log', 'trace_log', 'metric_log']: clickhouse_proc = Popen(shlex.split(args.client), stdin=PIPE, stdout=PIPE, stderr=PIPE) clickhouse_proc.communicate("TRUNCATE TABLE IF EXISTS system.{}".format(table)) From 58ed04dc24e335fd3edc50aec3df95e9d700cc8a Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 19 May 2020 04:53:01 +0300 Subject: [PATCH 033/120] materialize TTL after its modifying --- src/Core/Settings.h | 1 + src/Storages/AlterCommands.cpp | 45 ++++++++++- src/Storages/AlterCommands.h | 5 +- src/Storages/StorageMergeTree.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- .../01070_materialize_ttl.reference | 8 ++ .../0_stateless/01070_materialize_ttl.sql | 8 ++ .../0_stateless/01070_modify_ttl.reference | 32 ++++++++ .../queries/0_stateless/01070_modify_ttl.sql | 74 +++++++++++++++++++ 9 files changed, 173 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/01070_modify_ttl.reference create mode 100644 tests/queries/0_stateless/01070_modify_ttl.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index ff151e24a99..f33a8a4da35 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -422,6 +422,7 @@ struct Settings : public SettingsCollection M(SettingBool, transform_null_in, false, "If enabled, NULL values will be matched with 'IN' operator as if they are considered equal.", 0) \ M(SettingBool, allow_nondeterministic_mutations, false, "Allow non-deterministic functions in ALTER UPDATE/ALTER DELETE statements", 0) \ M(SettingSeconds, lock_acquire_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "How long locking request should wait before failing", 0) \ + M(SettingBool, materialize_ttl_after_modify, true, "Apply TTL for old data, after ALTER MODIFY TTL query", 0) \ \ /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ \ diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 67bd88d10a8..e34a39d8f46 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -594,6 +594,27 @@ bool AlterCommand::isCommentAlter() const return false; } +bool AlterCommand::isTTLAlter(const StorageInMemoryMetadata & metadata) const +{ + if (type == MODIFY_TTL) + return true; + + if (!ttl || type != MODIFY_COLUMN) + return false; + + bool ttl_changed = true; + for (const auto & [name, ttl_ast] : metadata.columns.getColumnTTLs()) + { + if (name == column_name && queryToString(*ttl) == queryToString(*ttl_ast)) + { + ttl_changed = false; + break; + } + } + + return ttl_changed; +} + std::optional AlterCommand::tryConvertToMutationCommand(const StorageInMemoryMetadata & metadata) const { if (!isRequireMutationStage(metadata)) @@ -922,13 +943,35 @@ bool AlterCommands::isCommentAlter() const return std::all_of(begin(), end(), [](const AlterCommand & c) { return c.isCommentAlter(); }); } +static MutationCommand createMaterializeTTLCommand() +{ + MutationCommand command; + auto ast = std::make_shared(); + ast->type = ASTAlterCommand::MATERIALIZE_TTL; + command.type = MutationCommand::MATERIALIZE_TTL; + command.ast = std::move(ast); + return command; +} -MutationCommands AlterCommands::getMutationCommands(const StorageInMemoryMetadata & metadata) const +MutationCommands AlterCommands::getMutationCommands(const StorageInMemoryMetadata & metadata, bool materialize_ttl) const { MutationCommands result; for (const auto & alter_cmd : *this) if (auto mutation_cmd = alter_cmd.tryConvertToMutationCommand(metadata); mutation_cmd) result.push_back(*mutation_cmd); + + if (materialize_ttl) + { + for (const auto & alter_cmd : *this) + { + if (alter_cmd.isTTLAlter(metadata)) + { + result.push_back(createMaterializeTTLCommand()); + break; + } + } + } + return result; } diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h index c1c913dad73..d52a4ff2ed5 100644 --- a/src/Storages/AlterCommands.h +++ b/src/Storages/AlterCommands.h @@ -118,6 +118,9 @@ struct AlterCommand /// Checks that only comment changed by alter bool isCommentAlter() const; + /// Checks that any TTL changed by alter + bool isTTLAlter(const StorageInMemoryMetadata & metadata) const; + /// If possible, convert alter command to mutation command. In other case /// return empty optional. Some storages may execute mutations after /// metadata changes. @@ -162,7 +165,7 @@ public: /// Return mutation commands which some storages may execute as part of /// alter. If alter can be performed is pure metadata update, than result is /// empty. - MutationCommands getMutationCommands(const StorageInMemoryMetadata & metadata) const; + MutationCommands getMutationCommands(const StorageInMemoryMetadata & metadata, bool materialize_ttl) const; }; } diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 68d468233a8..e78438210ec 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -220,7 +220,7 @@ void StorageMergeTree::alter( auto table_id = getStorageID(); StorageInMemoryMetadata metadata = getInMemoryMetadata(); - auto maybe_mutation_commands = commands.getMutationCommands(metadata); + auto maybe_mutation_commands = commands.getMutationCommands(metadata, context.getSettingsRef().materialize_ttl_after_modify); commands.apply(metadata); /// This alter can be performed at metadata level only diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 1317b05d9fe..add21251acc 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3388,7 +3388,7 @@ void StorageReplicatedMergeTree::alter( alter_entry->alter_version = new_metadata_version; alter_entry->create_time = time(nullptr); - auto maybe_mutation_commands = params.getMutationCommands(current_metadata); + auto maybe_mutation_commands = params.getMutationCommands(current_metadata, query_context.getSettingsRef().materialize_ttl_after_modify); alter_entry->have_mutation = !maybe_mutation_commands.empty(); ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log/log-", alter_entry->toString(), zkutil::CreateMode::PersistentSequential)); diff --git a/tests/queries/0_stateless/01070_materialize_ttl.reference b/tests/queries/0_stateless/01070_materialize_ttl.reference index b4a9947a521..af1b3a4459b 100644 --- a/tests/queries/0_stateless/01070_materialize_ttl.reference +++ b/tests/queries/0_stateless/01070_materialize_ttl.reference @@ -1,8 +1,16 @@ +2000-10-10 1 +2000-10-10 2 +2100-10-10 3 +2100-10-10 4 2100-10-10 3 2100-10-10 4 1 a 3 c 1 a +2 b +3 c +4 d +1 a 2 3 c 4 diff --git a/tests/queries/0_stateless/01070_materialize_ttl.sql b/tests/queries/0_stateless/01070_materialize_ttl.sql index 6696fbc980a..2521ae35edf 100755 --- a/tests/queries/0_stateless/01070_materialize_ttl.sql +++ b/tests/queries/0_stateless/01070_materialize_ttl.sql @@ -6,9 +6,14 @@ insert into ttl values (toDateTime('2000-10-10 00:00:00'), 2); insert into ttl values (toDateTime('2100-10-10 00:00:00'), 3); insert into ttl values (toDateTime('2100-10-10 00:00:00'), 4); +set materialize_ttl_after_modify = 0; + alter table ttl materialize ttl; -- { serverError 80 } alter table ttl modify ttl d + interval 1 day; +-- TTL should not be applied +select * from ttl order by a; + alter table ttl materialize ttl settings mutations_sync=2; select * from ttl order by a; @@ -31,6 +36,9 @@ create table ttl (i Int, s String) engine = MergeTree order by i; insert into ttl values (1, 'a') (2, 'b') (3, 'c') (4, 'd'); alter table ttl modify column s String ttl i % 2 = 0 ? today() - 10 : toDate('2100-01-01'); +-- TTL should not be applied +select * from ttl order by i; + alter table ttl materialize ttl settings mutations_sync=2; select * from ttl order by i; diff --git a/tests/queries/0_stateless/01070_modify_ttl.reference b/tests/queries/0_stateless/01070_modify_ttl.reference new file mode 100644 index 00000000000..d64c1a4edc2 --- /dev/null +++ b/tests/queries/0_stateless/01070_modify_ttl.reference @@ -0,0 +1,32 @@ +2100-10-10 3 +2100-10-10 4 +============= +1 a +3 c +============= +============= +1 a +2 +3 c +4 +============= +1 +2 +3 +4 +============= +1 a +2 b +4 d +============= +1 +2 +4 d +============= +1 a +2 b bb +3 cc +4 d +1 +============= +0 diff --git a/tests/queries/0_stateless/01070_modify_ttl.sql b/tests/queries/0_stateless/01070_modify_ttl.sql new file mode 100644 index 00000000000..4e842948afe --- /dev/null +++ b/tests/queries/0_stateless/01070_modify_ttl.sql @@ -0,0 +1,74 @@ +drop table if exists ttl; + +create table ttl (d Date, a Int) engine = MergeTree order by a partition by toDayOfMonth(d); +insert into ttl values (toDateTime('2000-10-10 00:00:00'), 1); +insert into ttl values (toDateTime('2000-10-10 00:00:00'), 2); +insert into ttl values (toDateTime('2100-10-10 00:00:00'), 3); +insert into ttl values (toDateTime('2100-10-10 00:00:00'), 4); + +set mutations_sync = 2; + +alter table ttl modify ttl d + interval 1 day; +select * from ttl order by a; +select '============='; + +drop table if exists ttl; + +create table ttl (i Int, s String) engine = MergeTree order by i; +insert into ttl values (1, 'a') (2, 'b') (3, 'c') (4, 'd'); + +alter table ttl modify ttl i % 2 = 0 ? today() - 10 : toDate('2100-01-01'); +select * from ttl order by i; +select '============='; + +alter table ttl modify ttl toDate('2000-01-01'); +select * from ttl order by i; +select '============='; + +drop table if exists ttl; + +create table ttl (i Int, s String) engine = MergeTree order by i; +insert into ttl values (1, 'a') (2, 'b') (3, 'c') (4, 'd'); + +alter table ttl modify column s String ttl i % 2 = 0 ? today() - 10 : toDate('2100-01-01'); +select * from ttl order by i; +select '============='; + +alter table ttl modify column s String ttl toDate('2000-01-01'); +select * from ttl order by i; +select '============='; + +drop table if exists ttl; + +create table ttl (d Date, i Int, s String) engine = MergeTree order by i; +insert into ttl values (toDate('2000-01-02'), 1, 'a') (toDate('2000-01-03'), 2, 'b') (toDate('2080-01-01'), 3, 'c') (toDate('2080-01-03'), 4, 'd'); + +alter table ttl modify ttl i % 3 = 0 ? today() - 10 : toDate('2100-01-01'); +select i, s from ttl order by i; +select '============='; + +alter table ttl modify column s String ttl d + interval 1 month; +select i, s from ttl order by i; +select '============='; + +drop table if exists ttl; + +create table ttl (i Int, s String, t String) engine = MergeTree order by i; +insert into ttl values (1, 'a', 'aa') (2, 'b', 'bb') (3, 'c', 'cc') (4, 'd', 'dd'); + +alter table ttl modify column s String ttl i % 3 = 0 ? today() - 10 : toDate('2100-01-01'), + modify column t String ttl i % 3 = 1 ? today() - 10 : toDate('2100-01-01'); + +select i, s, t from ttl order by i; +-- MATERIALIZE TTL ran only once +select count() from system.mutations where table = 'ttl' and is_done; +select '============='; + +drop table if exists ttl; + +-- Nothing changed, don't run mutation +create table ttl (i Int, s String ttl toDate('2000-01-02')) engine = MergeTree order by i; +alter table ttl modify column s String ttl toDate('2000-01-02'); +select count() from system.mutations where table = 'ttl' and is_done; + +drop table if exists ttl; From 1086cf769184c4d6f83a748c16d670d4fe453061 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Mon, 6 Apr 2020 11:37:16 +0300 Subject: [PATCH 034/120] wip --- docker/packager/deb/build.sh | 7 +++++++ docker/packager/packager | 11 +++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/docker/packager/deb/build.sh b/docker/packager/deb/build.sh index 1efed3628a0..586fe3bfcd9 100755 --- a/docker/packager/deb/build.sh +++ b/docker/packager/deb/build.sh @@ -10,5 +10,12 @@ mv *.changes /output mv *.buildinfo /output mv /*.rpm /output ||: # if exists mv /*.tgz /output ||: # if exists + +if [ "binary" == "$BINARY_OUTPUT" ] +then + mkdir /output/binary + mv ./programs/clickhouse* /output/binary + mv ./dbms/unit_tests_dbms /output/binary +fi ccache --show-stats ||: ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so ||: diff --git a/docker/packager/packager b/docker/packager/packager index 025ca3bf398..eff9eabf388 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -54,7 +54,7 @@ def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache subprocess.check_call(cmd, shell=True) -def parse_env_variables(build_type, compiler, sanitizer, package_type, image_type, cache, distcc_hosts, unbundled, split_binary, clang_tidy, version, author, official, alien_pkgs, with_coverage): +def parse_env_variables(build_type, compiler, sanitizer, package_type, image_type, cache, distcc_hosts, unbundled, split_binary, clang_tidy, version, author, official, alien_pkgs, with_coverage, with_binaries): CLANG_PREFIX = "clang" DARWIN_SUFFIX = "-darwin" ARM_SUFFIX = "-aarch64" @@ -131,6 +131,9 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ if alien_pkgs: result.append("ALIEN_PKGS='" + ' '.join(['--' + pkg for pkg in alien_pkgs]) + "'") + if with_binaries: + result.append('BINARY_OUTPUT=binary') + if unbundled: # TODO: fix build with ENABLE_RDKAFKA cmake_flags.append('-DUNBUNDLED=1 -DENABLE_MYSQL=0 -DENABLE_ODBC=0 -DENABLE_REPLXX=0 -DENABLE_RDKAFKA=0') @@ -179,6 +182,7 @@ if __name__ == "__main__": parser.add_argument("--official", action="store_true") parser.add_argument("--alien-pkgs", nargs='+', default=[]) parser.add_argument("--with-coverage", action="store_true") + parser.add_argument("--with-binaries", action="store_true") args = parser.parse_args() if not os.path.isabs(args.output_dir): @@ -195,6 +199,9 @@ if __name__ == "__main__": if args.alien_pkgs and not image_type == "deb": raise Exception("Can add alien packages only in deb build") + if args.with_binaries and not image_type == "deb": + raise Exception("Can add additional binaries only in deb build") + dockerfile = os.path.join(ch_root, "docker/packager", image_type, "Dockerfile") if image_type != "freebsd" and not check_image_exists_locally(image_name) or args.force_build_image: if not pull_image(image_name) or args.force_build_image: @@ -202,6 +209,6 @@ if __name__ == "__main__": env_prepared = parse_env_variables( args.build_type, args.compiler, args.sanitizer, args.package_type, image_type, args.cache, args.distcc_hosts, args.unbundled, args.split_binary, args.clang_tidy, - args.version, args.author, args.official, args.alien_pkgs, args.with_coverage) + args.version, args.author, args.official, args.alien_pkgs, args.with_coverage, args.with_binaries) run_docker_image_with_env(image_name, args.output_dir, env_prepared, ch_root, args.ccache_dir) logging.info("Output placed into {}".format(args.output_dir)) From 60f9d2088d89e2536704513420faff0c5b462f96 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Tue, 7 Apr 2020 14:33:50 +0300 Subject: [PATCH 035/120] wip --- docker/packager/deb/build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/packager/deb/build.sh b/docker/packager/deb/build.sh index 586fe3bfcd9..65499b87df2 100755 --- a/docker/packager/deb/build.sh +++ b/docker/packager/deb/build.sh @@ -14,8 +14,8 @@ mv /*.tgz /output ||: # if exists if [ "binary" == "$BINARY_OUTPUT" ] then mkdir /output/binary - mv ./programs/clickhouse* /output/binary - mv ./dbms/unit_tests_dbms /output/binary + mv /build/obj-x86_64-linux-gnu/programs/clickhouse* /output/binary + mv /build/obj-x86_64-linux-gnu/dbms/unit_tests_dbms /output/binary fi ccache --show-stats ||: ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so ||: From b780adb241202dd3f9a7e33ce90c278c534c8f82 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Tue, 7 Apr 2020 18:15:22 +0300 Subject: [PATCH 036/120] wip --- docker/packager/deb/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/packager/deb/build.sh b/docker/packager/deb/build.sh index 65499b87df2..04233fbed45 100755 --- a/docker/packager/deb/build.sh +++ b/docker/packager/deb/build.sh @@ -15,7 +15,7 @@ if [ "binary" == "$BINARY_OUTPUT" ] then mkdir /output/binary mv /build/obj-x86_64-linux-gnu/programs/clickhouse* /output/binary - mv /build/obj-x86_64-linux-gnu/dbms/unit_tests_dbms /output/binary + mv /build/obj-x86_64-linux-gnu/src/unit_tests_dbms /output/binary fi ccache --show-stats ||: ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so ||: From 8d190f316deba8e0e9c87c59f7ce13d234f2b02a Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Thu, 9 Apr 2020 12:04:30 +0300 Subject: [PATCH 037/120] wip --- debian/rules | 2 +- docker/packager/deb/build.sh | 10 +++++++--- docker/packager/packager | 14 ++++++++++---- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/debian/rules b/debian/rules index dabebb516cd..d2ea9857ff6 100755 --- a/debian/rules +++ b/debian/rules @@ -92,7 +92,7 @@ override_dh_auto_test: override_dh_clean: rm -rf debian/copyright debian/clickhouse-client.docs debian/clickhouse-common-static.docs - dh_clean -X contrib + dh_clean # -X contrib override_dh_strip: dh_strip -pclickhouse-common-static --dbg-package=clickhouse-common-static-dbg diff --git a/docker/packager/deb/build.sh b/docker/packager/deb/build.sh index 04233fbed45..cfdca7abc45 100755 --- a/docker/packager/deb/build.sh +++ b/docker/packager/deb/build.sh @@ -11,11 +11,15 @@ mv *.buildinfo /output mv /*.rpm /output ||: # if exists mv /*.tgz /output ||: # if exists -if [ "binary" == "$BINARY_OUTPUT" ] +if [ -n "$BINARY_OUTPUT" ] && { [ "$BINARY_OUTPUT" = "programs" ] || [ "$BINARY_OUTPUT" = "tests" ] ;} then - mkdir /output/binary + echo Place $BINARY_OUTPUT to output + mkdir /output/binary ||: # if exists mv /build/obj-x86_64-linux-gnu/programs/clickhouse* /output/binary - mv /build/obj-x86_64-linux-gnu/src/unit_tests_dbms /output/binary + if [ "$BINARY_OUTPUT" = "tests" ] + then + mv /build/obj-x86_64-linux-gnu/src/unit_tests_dbms /output/binary + fi fi ccache --show-stats ||: ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so ||: diff --git a/docker/packager/packager b/docker/packager/packager index eff9eabf388..cee012a4699 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -131,8 +131,11 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ if alien_pkgs: result.append("ALIEN_PKGS='" + ' '.join(['--' + pkg for pkg in alien_pkgs]) + "'") - if with_binaries: - result.append('BINARY_OUTPUT=binary') + if with_binaries == "programs": + result.append('BINARY_OUTPUT=programs') + elif with_binaries == "tests": + result.append('BINARY_OUTPUT=tests') + cmake_flags.append('-DENABLE_TESTS=1') if unbundled: # TODO: fix build with ENABLE_RDKAFKA @@ -182,7 +185,7 @@ if __name__ == "__main__": parser.add_argument("--official", action="store_true") parser.add_argument("--alien-pkgs", nargs='+', default=[]) parser.add_argument("--with-coverage", action="store_true") - parser.add_argument("--with-binaries", action="store_true") + parser.add_argument("--with-binaries", choices=("programs", "tests", ""), default="") args = parser.parse_args() if not os.path.isabs(args.output_dir): @@ -199,9 +202,12 @@ if __name__ == "__main__": if args.alien_pkgs and not image_type == "deb": raise Exception("Can add alien packages only in deb build") - if args.with_binaries and not image_type == "deb": + if args.with_binaries != "" and not image_type == "deb": raise Exception("Can add additional binaries only in deb build") + if args.with_binaries != "" and image_type == "deb": + logging.info("Should place {} to output".format(args.with_binaries)) + dockerfile = os.path.join(ch_root, "docker/packager", image_type, "Dockerfile") if image_type != "freebsd" and not check_image_exists_locally(image_name) or args.force_build_image: if not pull_image(image_name) or args.force_build_image: From d961707512cad27c9fbf1d07e0ed26da1d2a4208 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Wed, 13 May 2020 17:47:25 +0300 Subject: [PATCH 038/120] add targets dependecies to tests --- debian/rules | 1 + programs/CMakeLists.txt | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/debian/rules b/debian/rules index d2ea9857ff6..e032dbad000 100755 --- a/debian/rules +++ b/debian/rules @@ -21,6 +21,7 @@ ifeq ($(THREADS_COUNT),) THREADS_COUNT=$(shell nproc || grep -c ^processor /proc/cpuinfo || sysctl -n hw.ncpu || echo 4) endif DEB_BUILD_OPTIONS+=parallel=$(THREADS_COUNT) +DEB_BUILD_OPTIONS+=nocheck ifndef ENABLE_TESTS CMAKE_FLAGS += -DENABLE_TESTS=0 diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 7cbe2e7a2a6..7bc31452aa4 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -201,3 +201,9 @@ endif () if (TARGET clickhouse-server AND TARGET copy-headers) add_dependencies(clickhouse-server copy-headers) endif () + +if (ENABLE_TESTS AND USE_GTEST) + set (CLICKHOUSE_ALL_TESTS_TARGETS local_date_time_comparison unit_tests_libcommon unit_tests_dbms hashing_write_buffer hashing_read_buffer in_join_subqueries_preprocessor expression_analyzer) + add_custom_target (clickhouse-tests ALL DEPENDS ${CLICKHOUSE_ALL_TESTS_TARGETS}) + add_dependencies(clickhouse-bundle clickhouse-tests) +endif() From 9c551b8b4d2e88da2669cf51d6d134649864cd62 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Thu, 14 May 2020 10:44:15 +0300 Subject: [PATCH 039/120] set USE_GTEST for tests build --- docker/packager/packager | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/packager/packager b/docker/packager/packager index cee012a4699..ff385e168b2 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -136,6 +136,7 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ elif with_binaries == "tests": result.append('BINARY_OUTPUT=tests') cmake_flags.append('-DENABLE_TESTS=1') + cmake_flags.append('-DUSE_GTEST=1') if unbundled: # TODO: fix build with ENABLE_RDKAFKA From c66d1a03eb6e182fec9b60ac21ff6a5707dd5dca Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Mon, 18 May 2020 10:09:43 +0300 Subject: [PATCH 040/120] set nostrip for build with binaries export --- debian/rules | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/debian/rules b/debian/rules index e032dbad000..194350ec26b 100755 --- a/debian/rules +++ b/debian/rules @@ -21,10 +21,13 @@ ifeq ($(THREADS_COUNT),) THREADS_COUNT=$(shell nproc || grep -c ^processor /proc/cpuinfo || sysctl -n hw.ncpu || echo 4) endif DEB_BUILD_OPTIONS+=parallel=$(THREADS_COUNT) -DEB_BUILD_OPTIONS+=nocheck ifndef ENABLE_TESTS CMAKE_FLAGS += -DENABLE_TESTS=0 +else +# To export binaries and from deb build we do not strip them. No need to run tests in deb build as we run them in CI + DEB_BUILD_OPTIONS+=nocheck + DEB_BUILD_OPTIONS+=nostrip endif ifndef MAKE_TARGET From e3c692bc48759637d0d27cfb3a9f0d9d8afe1167 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Mon, 18 May 2020 15:32:06 +0300 Subject: [PATCH 041/120] fix issues --- debian/rules | 3 +-- docker/packager/deb/build.sh | 4 ++-- docker/packager/packager | 1 + 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/debian/rules b/debian/rules index 194350ec26b..467b86e5d79 100755 --- a/debian/rules +++ b/debian/rules @@ -26,8 +26,7 @@ ifndef ENABLE_TESTS CMAKE_FLAGS += -DENABLE_TESTS=0 else # To export binaries and from deb build we do not strip them. No need to run tests in deb build as we run them in CI - DEB_BUILD_OPTIONS+=nocheck - DEB_BUILD_OPTIONS+=nostrip + DEB_BUILD_OPTIONS+=" nocheck nostrip" endif ifndef MAKE_TARGET diff --git a/docker/packager/deb/build.sh b/docker/packager/deb/build.sh index cfdca7abc45..fbaa0151c6b 100755 --- a/docker/packager/deb/build.sh +++ b/docker/packager/deb/build.sh @@ -15,10 +15,10 @@ if [ -n "$BINARY_OUTPUT" ] && { [ "$BINARY_OUTPUT" = "programs" ] || [ "$BINARY_ then echo Place $BINARY_OUTPUT to output mkdir /output/binary ||: # if exists - mv /build/obj-x86_64-linux-gnu/programs/clickhouse* /output/binary + mv /build/obj-*/programs/clickhouse* /output/binary if [ "$BINARY_OUTPUT" = "tests" ] then - mv /build/obj-x86_64-linux-gnu/src/unit_tests_dbms /output/binary + mv /build/obj-*/src/unit_tests_dbms /output/binary fi fi ccache --show-stats ||: diff --git a/docker/packager/packager b/docker/packager/packager index ff385e168b2..8a5bdda60e8 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -134,6 +134,7 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ if with_binaries == "programs": result.append('BINARY_OUTPUT=programs') elif with_binaries == "tests": + result.append('ENABLE_TESTS=1') result.append('BINARY_OUTPUT=tests') cmake_flags.append('-DENABLE_TESTS=1') cmake_flags.append('-DUSE_GTEST=1') From daa519a654ee102f0f2212d7dd31064cc79822d1 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 19 May 2020 12:04:56 +0300 Subject: [PATCH 042/120] Fix ya.make --- src/Processors/ya.make | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/ya.make b/src/Processors/ya.make index f5a3c0ea30a..03a9b939d42 100644 --- a/src/Processors/ya.make +++ b/src/Processors/ya.make @@ -10,7 +10,7 @@ SRCS( Chunk.cpp ConcatProcessor.cpp DelayedPortsProcessor.cpp - Executors/PipelineExecutingBlockInputStream.h + Executors/PipelineExecutingBlockInputStream.cpp Executors/PipelineExecutor.cpp Executors/PullingPipelineExecutor.cpp Executors/TreeExecutorBlockInputStream.cpp From d7d70b788d6927066158ef7c07a0201a9b147c24 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Tue, 19 May 2020 15:07:16 +0300 Subject: [PATCH 043/120] finally fix rules --- debian/rules | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/debian/rules b/debian/rules index 467b86e5d79..ad7b73380f4 100755 --- a/debian/rules +++ b/debian/rules @@ -26,7 +26,8 @@ ifndef ENABLE_TESTS CMAKE_FLAGS += -DENABLE_TESTS=0 else # To export binaries and from deb build we do not strip them. No need to run tests in deb build as we run them in CI - DEB_BUILD_OPTIONS+=" nocheck nostrip" + DEB_BUILD_OPTIONS+= nocheck + DEB_BUILD_OPTIONS+= nostrip endif ifndef MAKE_TARGET @@ -91,14 +92,18 @@ override_dh_auto_build: $(MAKE) $(THREADS_COUNT) -C $(BUILDDIR) $(MAKE_TARGET) override_dh_auto_test: +ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS))) cd $(BUILDDIR) && ctest $(THREADS_COUNT) -V -E with_server +endif override_dh_clean: rm -rf debian/copyright debian/clickhouse-client.docs debian/clickhouse-common-static.docs dh_clean # -X contrib override_dh_strip: +ifeq (,$(filter nostrip,$(DEB_BUILD_OPTIONS))) dh_strip -pclickhouse-common-static --dbg-package=clickhouse-common-static-dbg +endif override_dh_install: # Making docs From 35b916433a12706398b28a710c2c0066895ecf1a Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 19 May 2020 16:00:32 +0300 Subject: [PATCH 044/120] fix TTL with defaults after modify --- src/DataStreams/TTLBlockInputStream.cpp | 21 +++++---- tests/integration/test_ttl_replicated/test.py | 44 +++++++++++++++++++ 2 files changed, 56 insertions(+), 9 deletions(-) diff --git a/src/DataStreams/TTLBlockInputStream.cpp b/src/DataStreams/TTLBlockInputStream.cpp index 692d2c0d5cf..c9a40768166 100644 --- a/src/DataStreams/TTLBlockInputStream.cpp +++ b/src/DataStreams/TTLBlockInputStream.cpp @@ -34,22 +34,25 @@ TTLBlockInputStream::TTLBlockInputStream( const auto & storage_columns = storage.getColumns(); const auto & column_defaults = storage_columns.getDefaults(); + ASTPtr default_expr_list = std::make_shared(); + for (const auto & [name, _] : storage.column_ttl_entries_by_name) + { + auto it = column_defaults.find(name); + if (it != column_defaults.end()) + { + auto column = storage_columns.get(name); + auto expression = it->second.expression->clone(); + default_expr_list->children.emplace_back(setAlias(addTypeConversionToAST(std::move(expression), column.type->getName()), it->first)); + } + } + for (const auto & [name, ttl_info] : old_ttl_infos.columns_ttl) { if (force || isTTLExpired(ttl_info.min)) { new_ttl_infos.columns_ttl.emplace(name, IMergeTreeDataPart::TTLInfo{}); empty_columns.emplace(name); - - auto it = column_defaults.find(name); - - if (it != column_defaults.end()) - { - auto column = storage_columns.get(name); - auto expression = it->second.expression->clone(); - default_expr_list->children.emplace_back(setAlias(addTypeConversionToAST(std::move(expression), column.type->getName()), it->first)); - } } else new_ttl_infos.columns_ttl.emplace(name, ttl_info); diff --git a/tests/integration/test_ttl_replicated/test.py b/tests/integration/test_ttl_replicated/test.py index 78ff703f0ec..29169ad3c0e 100644 --- a/tests/integration/test_ttl_replicated/test.py +++ b/tests/integration/test_ttl_replicated/test.py @@ -109,6 +109,50 @@ def test_ttl_table(started_cluster, delete_suffix): assert TSV(node1.query("SELECT * FROM test_ttl")) == TSV("") assert TSV(node2.query("SELECT * FROM test_ttl")) == TSV("") +def test_modify_ttl(started_cluster): + drop_table([node1, node2], "test_ttl") + for node in [node1, node2]: + node.query( + ''' + CREATE TABLE test_ttl(d DateTime, id UInt32) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl', '{replica}') + ORDER BY id + '''.format(replica=node.name)) + + node1.query("INSERT INTO test_ttl VALUES (now() - INTERVAL 5 HOUR, 1), (now() - INTERVAL 3 HOUR, 2), (now() - INTERVAL 1 HOUR, 3)") + node2.query("SYSTEM SYNC REPLICA test_ttl", timeout=20) + + node1.query("ALTER TABLE test_ttl MODIFY TTL d + INTERVAL 4 HOUR SETTINGS mutations_sync = 2") + assert node2.query("SELECT id FROM test_ttl") == "2\n3\n" + + node2.query("ALTER TABLE test_ttl MODIFY TTL d + INTERVAL 2 HOUR SETTINGS mutations_sync = 2") + assert node1.query("SELECT id FROM test_ttl") == "3\n" + + node1.query("ALTER TABLE test_ttl MODIFY TTL d + INTERVAL 30 MINUTE SETTINGS mutations_sync = 2") + assert node2.query("SELECT id FROM test_ttl") == "" + +def test_modify_column_ttl(started_cluster): + drop_table([node1, node2], "test_ttl") + for node in [node1, node2]: + node.query( + ''' + CREATE TABLE test_ttl(d DateTime, id UInt32 DEFAULT 42) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl', '{replica}') + ORDER BY d + '''.format(replica=node.name)) + + node1.query("INSERT INTO test_ttl VALUES (now() - INTERVAL 5 HOUR, 1), (now() - INTERVAL 3 HOUR, 2), (now() - INTERVAL 1 HOUR, 3)") + node2.query("SYSTEM SYNC REPLICA test_ttl", timeout=20) + + node1.query("ALTER TABLE test_ttl MODIFY COLUMN id UInt32 TTL d + INTERVAL 4 HOUR SETTINGS mutations_sync = 2") + assert node2.query("SELECT id FROM test_ttl") == "42\n2\n3\n" + + node1.query("ALTER TABLE test_ttl MODIFY COLUMN id UInt32 TTL d + INTERVAL 2 HOUR SETTINGS mutations_sync = 2") + assert node1.query("SELECT id FROM test_ttl") == "42\n42\n3\n" + + node1.query("ALTER TABLE test_ttl MODIFY COLUMN id UInt32 TTL d + INTERVAL 30 MINUTE SETTINGS mutations_sync = 2") + assert node2.query("SELECT id FROM test_ttl") == "42\n42\n42\n" + def test_ttl_double_delete_rule_returns_error(started_cluster): drop_table([node1, node2], "test_ttl") try: From b0d9552e1eb7b2e7f50978f52d1a3e730eff9ad7 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 19 May 2020 17:06:33 +0300 Subject: [PATCH 045/120] Try fix compatibility. --- src/Interpreters/executeQuery.cpp | 16 ++++++++++++++++ src/Interpreters/executeQuery.h | 15 ++++++++------- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 8c6e5cea73c..468ce36daf9 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -599,6 +599,22 @@ BlockIO executeQuery( return streams; } +BlockIO executeQuery( + const String & query, + Context & context, + bool internal, + QueryProcessingStage::Enum stage, + bool may_have_embedded_data, + bool allow_processors) +{ + BlockIO res = executeQuery(query, context, internal, stage, may_have_embedded_data, allow_processors); + + if (!allow_processors && res.pipeline.initialized()) + res.in = res.getInputStream(); + + return res; +} + void executeQuery( ReadBuffer & istr, diff --git a/src/Interpreters/executeQuery.h b/src/Interpreters/executeQuery.h index 22b353488ad..2850bb3baf4 100644 --- a/src/Interpreters/executeQuery.h +++ b/src/Interpreters/executeQuery.h @@ -45,13 +45,14 @@ BlockIO executeQuery( bool may_have_embedded_data = false /// If insert query may have embedded data ); - -QueryPipeline executeQueryWithProcessors( - const String & query, /// Query text without INSERT data. The latter must be written to BlockIO::out. - Context & context, /// DB, tables, data types, storage engines, functions, aggregate functions... - bool internal = false, /// If true, this query is caused by another query and thus needn't be registered in the ProcessList. - QueryProcessingStage::Enum stage = QueryProcessingStage::Complete, /// To which stage the query must be executed. - bool may_have_embedded_data = false /// If insert query may have embedded data +/// Old interface with allow_processors flag. For compatibility. +BlockIO executeQuery( + const String & query, + Context & context, + bool internal, + QueryProcessingStage::Enum stage, + bool may_have_embedded_data, + bool allow_processors /// If can use processors pipeline ); } From 6109e87aeddd0b1799e3ff7fd41efc44f177e4b5 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 19 May 2020 18:40:15 +0300 Subject: [PATCH 046/120] Try fix compatibility. --- src/Interpreters/executeQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 468ce36daf9..e0377615dd6 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -607,7 +607,7 @@ BlockIO executeQuery( bool may_have_embedded_data, bool allow_processors) { - BlockIO res = executeQuery(query, context, internal, stage, may_have_embedded_data, allow_processors); + BlockIO res = executeQuery(query, context, internal, stage, may_have_embedded_data); if (!allow_processors && res.pipeline.initialized()) res.in = res.getInputStream(); From b50480cbde937e0e67c6c95b7cf956ff5342d99b Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Tue, 19 May 2020 18:55:37 +0200 Subject: [PATCH 047/120] Fixes the potential missed data during termination of Kafka engine table --- src/Storages/Kafka/KafkaBlockInputStream.cpp | 8 +- .../Kafka/ReadBufferFromKafkaConsumer.cpp | 9 +- .../Kafka/ReadBufferFromKafkaConsumer.h | 3 +- src/Storages/Kafka/StorageKafka.cpp | 5 +- tests/integration/test_storage_kafka/test.py | 92 +++++++++++++++++++ 5 files changed, 110 insertions(+), 7 deletions(-) diff --git a/src/Storages/Kafka/KafkaBlockInputStream.cpp b/src/Storages/Kafka/KafkaBlockInputStream.cpp index 55ff8610941..9f19bd464ff 100644 --- a/src/Storages/Kafka/KafkaBlockInputStream.cpp +++ b/src/Storages/Kafka/KafkaBlockInputStream.cpp @@ -134,7 +134,11 @@ Block KafkaBlockInputStream::readImpl() auto new_rows = read_kafka_message(); - buffer->storeLastReadMessageOffset(); + // we can't store the offser after rebalance, when consumer is stalled, or if it's terminating + if (!buffer->storeLastReadMessageOffset()) { + total_rows = 0; + break; + } auto topic = buffer->currentTopic(); auto key = buffer->currentKey(); @@ -172,7 +176,7 @@ Block KafkaBlockInputStream::readImpl() } } - if (buffer->rebalanceHappened() || total_rows == 0) + if (total_rows == 0) return Block(); /// MATERIALIZED columns can be added here, but I think diff --git a/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp b/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp index eff4161ffb6..aeda60e90a7 100644 --- a/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp +++ b/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp @@ -336,12 +336,17 @@ bool ReadBufferFromKafkaConsumer::nextImpl() return true; } -void ReadBufferFromKafkaConsumer::storeLastReadMessageOffset() +bool ReadBufferFromKafkaConsumer::storeLastReadMessageOffset() { - if (!stalled && !rebalance_happened) + if (!stalled && !rebalance_happened && !stopped) { consumer->store_offset(*(current - 1)); ++offsets_stored; + return true; + } + else + { + return false; } } diff --git a/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h b/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h index c5b72ed6d7c..435b5f2a7c8 100644 --- a/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h +++ b/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h @@ -37,9 +37,8 @@ public: auto pollTimeout() const { return poll_timeout; } bool hasMorePolledMessages() const; - auto rebalanceHappened() const { return rebalance_happened; } - void storeLastReadMessageOffset(); + bool storeLastReadMessageOffset(); void resetToLastCommitted(const char * msg); // Return values for the message that's being read. diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 5c4657403b7..793a9a29676 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -454,7 +454,10 @@ bool StorageKafka::streamToViews() else in = streams[0]; - copyData(*in, *block_io.out, &stream_cancelled); + // We can't cancel during copyData, as it's not aware of commits and other kafka-related stuff. + // It will be cancelled on underlying layer (kafka buffer) + std::atomic stub = {false}; + copyData(*in, *block_io.out, &stub); for (auto & stream : streams) stream->as()->commit(); diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index cbe96df3c29..9154ad67c05 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -1241,6 +1241,98 @@ def test_exception_from_destructor(kafka_cluster): assert TSV(instance.query('SELECT 1')) == TSV('1') +@pytest.mark.timeout(120) +def test_commits_of_unprocessed_messages_on_drop(kafka_cluster): + messages = [json.dumps({'key': j+1, 'value': j+1}) for j in range(1)] + kafka_produce('commits_of_unprocessed_messages_on_drop', messages) + + instance.query(''' + DROP TABLE IF EXISTS test.destination; + CREATE TABLE test.destination ( + key UInt64, + value UInt64, + _topic String, + _key String, + _offset UInt64, + _partition UInt64, + _timestamp Nullable(DateTime), + _consumed_by LowCardinality(String) + ) + ENGINE = MergeTree() + ORDER BY key; + + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'commits_of_unprocessed_messages_on_drop', + kafka_group_name = 'commits_of_unprocessed_messages_on_drop_test_group', + kafka_format = 'JSONEachRow', + kafka_max_block_size = 1000; + + CREATE MATERIALIZED VIEW test.kafka_consumer TO test.destination AS + SELECT + key, + value, + _topic, + _key, + _offset, + _partition, + _timestamp + FROM test.kafka; + ''') + + while int(instance.query("SELECT count() FROM test.destination")) == 0: + print("Waiting for test.kafka_consumer to start consume") + time.sleep(1) + + cancel = threading.Event() + + i = [2] + def produce(): + while not cancel.is_set(): + messages = [] + for _ in range(113): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + kafka_produce('commits_of_unprocessed_messages_on_drop', messages) + time.sleep(1) + + kafka_thread = threading.Thread(target=produce) + kafka_thread.start() + time.sleep(12) + + instance.query(''' + DROP TABLE test.kafka; + ''') + + instance.query(''' + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'commits_of_unprocessed_messages_on_drop', + kafka_group_name = 'commits_of_unprocessed_messages_on_drop_test_group', + kafka_format = 'JSONEachRow', + kafka_max_block_size = 10000; + ''') + + cancel.set() + time.sleep(15) + + #kafka_cluster.open_bash_shell('instance') + # SELECT key, _timestamp, _offset FROM test.destination where runningDifference(key) <> 1 ORDER BY key; + + result = instance.query('SELECT count(), uniqExact(key), max(key) FROM test.destination') + print(result) + + instance.query(''' + DROP TABLE test.kafka_consumer; + DROP TABLE test.destination; + ''') + + kafka_thread.join() + assert TSV(result) == TSV('{0}\t{0}\t{0}'.format(i[0]-1)), 'Missing data!' + + @pytest.mark.timeout(1200) def test_kafka_duplicates_when_commit_failed(kafka_cluster): messages = [json.dumps({'key': j+1, 'value': 'x' * 300}) for j in range(22)] From 34198336eddb7bd07ae7831652fb1fe9d5e02318 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Tue, 19 May 2020 19:02:37 +0200 Subject: [PATCH 048/120] Style --- src/Storages/Kafka/KafkaBlockInputStream.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Storages/Kafka/KafkaBlockInputStream.cpp b/src/Storages/Kafka/KafkaBlockInputStream.cpp index 9f19bd464ff..4da5de69085 100644 --- a/src/Storages/Kafka/KafkaBlockInputStream.cpp +++ b/src/Storages/Kafka/KafkaBlockInputStream.cpp @@ -135,7 +135,8 @@ Block KafkaBlockInputStream::readImpl() auto new_rows = read_kafka_message(); // we can't store the offser after rebalance, when consumer is stalled, or if it's terminating - if (!buffer->storeLastReadMessageOffset()) { + if (!buffer->storeLastReadMessageOffset()) + { total_rows = 0; break; } From bc34e0ff94fb30414ce8b0a77dc294c04c33ef6a Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 19 May 2020 23:12:10 +0300 Subject: [PATCH 049/120] fixup --- .../compose/docker_compose_mongo.yml | 1 + src/Dictionaries/DictionaryFactory.cpp | 5 ++++ src/Dictionaries/MongoDBDictionarySource.cpp | 27 ++++++++++++++++++- src/Dictionaries/MongoDBDictionarySource.h | 8 +++--- .../external_sources.py | 5 ++++ .../test.py | 4 +-- 6 files changed, 43 insertions(+), 7 deletions(-) diff --git a/docker/test/integration/compose/docker_compose_mongo.yml b/docker/test/integration/compose/docker_compose_mongo.yml index a593c3e123b..9137f0aebd1 100644 --- a/docker/test/integration/compose/docker_compose_mongo.yml +++ b/docker/test/integration/compose/docker_compose_mongo.yml @@ -8,3 +8,4 @@ services: MONGO_INITDB_ROOT_PASSWORD: clickhouse ports: - 27018:27017 + command: --profile=2 --verbose diff --git a/src/Dictionaries/DictionaryFactory.cpp b/src/Dictionaries/DictionaryFactory.cpp index 1e985e81969..326e0f08d89 100644 --- a/src/Dictionaries/DictionaryFactory.cpp +++ b/src/Dictionaries/DictionaryFactory.cpp @@ -5,6 +5,8 @@ #include "DictionaryStructure.h" #include "getDictionaryConfigurationFromAST.h" +#include + namespace DB { namespace ErrorCodes @@ -41,6 +43,9 @@ DictionaryPtr DictionaryFactory::create( const DictionaryStructure dict_struct{config, config_prefix + ".structure"}; DictionarySourcePtr source_ptr = DictionarySourceFactory::instance().create(name, config, config_prefix + ".source", dict_struct, context, check_source_config); + LOG_TRACE(&Poco::Logger::get("DictionaryFactory"), + "Created dictionary source '" << source_ptr->toString() + << "' for dictionary '" << name << "'"); const auto & layout_type = keys.front(); diff --git a/src/Dictionaries/MongoDBDictionarySource.cpp b/src/Dictionaries/MongoDBDictionarySource.cpp index d9601f29a03..a4bd22cd848 100644 --- a/src/Dictionaries/MongoDBDictionarySource.cpp +++ b/src/Dictionaries/MongoDBDictionarySource.cpp @@ -35,11 +35,13 @@ void registerDictionarySourceMongoDB(DictionarySourceFactory & factory) } +#include #include #include #include #include #include +#include #include #include @@ -187,15 +189,38 @@ MongoDBDictionarySource::MongoDBDictionarySource( , db{db_} , collection{collection_} , sample_block{sample_block_} - , connection{std::make_shared(host, port)} + , connection{std::make_shared()} { if (!uri.empty()) { + Poco::URI poco_uri(uri); + + // Parse database from URI. This is required for correctness -- the + // cursor is created using database name and colleciton name, so we have + // to specify them properly. + db = poco_uri.getPath(); + // getPath() may return a leading slash, remove it. + if (!db.empty() && db[0] == '/') + { + db.erase(0, 1); + } + + // Parse some other parts from URI, for logging and display purposes. + host = poco_uri.getHost(); + port = poco_uri.getPort(); + user = poco_uri.getUserInfo(); + if (size_t separator = user.find(':'); separator != std::string::npos) + { + user.resize(separator); + } + + // Connect with URI. Poco::MongoDB::Connection::SocketFactory socket_factory; connection->connect(uri, socket_factory); } else { + // Connect with host/port/user/etc. connection->connect(host, port); if (!user.empty()) { diff --git a/src/Dictionaries/MongoDBDictionarySource.h b/src/Dictionaries/MongoDBDictionarySource.h index d90f28e1e74..36c9e82474c 100644 --- a/src/Dictionaries/MongoDBDictionarySource.h +++ b/src/Dictionaries/MongoDBDictionarySource.h @@ -72,12 +72,12 @@ public: private: const DictionaryStructure dict_struct; const std::string uri; - const std::string host; - const UInt16 port; - const std::string user; + std::string host; + UInt16 port; + std::string user; const std::string password; const std::string method; - const std::string db; + std::string db; const std::string collection; Block sample_block; diff --git a/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py b/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py index 7d1ded04bdc..7f8a480704c 100644 --- a/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py +++ b/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py @@ -179,6 +179,11 @@ class SourceMongo(ExternalSource): result = tbl.insert_many(to_insert) class SourceMongoURI(SourceMongo): + def compatible_with_layout(self, layout): + # It is enough to test one layout for this dictionary, since we're + # only testing that the connection with URI works. + return layout.name == 'flat' + def get_source_str(self, table_name): return ''' diff --git a/tests/integration/test_dictionaries_all_layouts_and_sources/test.py b/tests/integration/test_dictionaries_all_layouts_and_sources/test.py index 36034bab357..9cf5806b9e8 100644 --- a/tests/integration/test_dictionaries_all_layouts_and_sources/test.py +++ b/tests/integration/test_dictionaries_all_layouts_and_sources/test.py @@ -106,9 +106,9 @@ VALUES = { LAYOUTS = [ + Layout("flat"), Layout("hashed"), Layout("cache"), - Layout("flat"), Layout("complex_key_hashed"), Layout("complex_key_cache"), Layout("range_hashed"), @@ -117,7 +117,7 @@ LAYOUTS = [ SOURCES = [ SourceMongo("MongoDB", "localhost", "27018", "mongo1", "27017", "root", "clickhouse"), - SourceMongoURI("MongoDB", "localhost", "27018", "mongo1", "27017", "root", "clickhouse"), + SourceMongoURI("MongoDB_URI", "localhost", "27018", "mongo1", "27017", "root", "clickhouse"), SourceMySQL("MySQL", "localhost", "3308", "mysql1", "3306", "root", "clickhouse"), SourceClickHouse("RemoteClickHouse", "localhost", "9000", "clickhouse1", "9000", "default", ""), SourceClickHouse("LocalClickHouse", "localhost", "9000", "node", "9000", "default", ""), From 7ba5419190ff8fe38f9d841882d45e27978a5308 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 19 May 2020 23:24:06 +0300 Subject: [PATCH 050/120] Update PipelineExecutor. --- src/Processors/Executors/PipelineExecutor.cpp | 163 +++++++++++------- src/Processors/Executors/PipelineExecutor.h | 26 ++- 2 files changed, 127 insertions(+), 62 deletions(-) diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index 687736eef61..59f37115333 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -491,6 +491,34 @@ void PipelineExecutor::execute(size_t num_threads) throw; } + finalizeExecution(); +} + +bool PipelineExecutor::executeStep(std::atomic_bool * yield_flag) +{ + if (finished) + return false; + + if (!is_execution_initialized) + initExecution(1); + + executeStep(yield_flag); + + if (!finished) + return true; + + /// Execution can be stopped because of exception. Check and rethrow if any. + for (auto & node : graph) + if (node.execution_state->exception) + std::rethrow_exception(node.execution_state->exception); + + finalizeExecution(); + + return false; +} + +void PipelineExecutor::finalizeExecution() +{ if (process_list_element && process_list_element->isKilled()) throw Exception("Query was cancelled", ErrorCodes::QUERY_WAS_CANCELLED); @@ -506,33 +534,39 @@ void PipelineExecutor::execute(size_t num_threads) throw Exception("Pipeline stuck. Current state:\n" + dumpPipeline(), ErrorCodes::LOGICAL_ERROR); } +void PipelineExecutor::wakeUpExecutor(size_t thread_num) +{ + std::lock_guard guard(executor_contexts[thread_num]->mutex); + executor_contexts[thread_num]->wake_flag = true; + executor_contexts[thread_num]->condvar.notify_one(); +} + void PipelineExecutor::executeSingleThread(size_t thread_num, size_t num_threads) { -#ifndef NDEBUG - UInt64 total_time_ns = 0; - UInt64 execution_time_ns = 0; - UInt64 processing_time_ns = 0; - UInt64 wait_time_ns = 0; + executeStepImpl(thread_num, num_threads); +#ifndef NDEBUG + auto & context = executor_contexts[thread_num]; + LOG_TRACE(log, std::fixed << std::setprecision(3) + << "Thread finished." + << " Total time: " << (context.total_time_ns / 1e9) << " sec." + << " Execution time: " << (context.execution_time_ns / 1e9) << " sec." + << " Processing time: " << (context.processing_time_ns / 1e9) << " sec." + << " Wait time: " << (context.wait_time_ns / 1e9) << " sec."); +#endif +} + +void PipelineExecutor::executeStepImpl(size_t thread_num, size_t num_threads, std::atomic_bool * yield_flag) +{ +#ifndef NDEBUG Stopwatch total_time_watch; #endif - ExecutionState * state = nullptr; + auto & context = executor_contexts[thread_num]; + auto & state = context->state; + bool yield = false; - auto prepare_processor = [&](UInt64 pid, Queue & queue) - { - if (!prepareProcessor(pid, thread_num, queue, std::unique_lock(*graph[pid].status_mutex))) - finish(); - }; - - auto wake_up_executor = [&](size_t executor) - { - std::lock_guard guard(executor_contexts[executor]->mutex); - executor_contexts[executor]->wake_flag = true; - executor_contexts[executor]->condvar.notify_one(); - }; - - while (!finished) + while (!finished && !yield) { /// First, find any processor to execute. /// Just travers graph and prepare any processor. @@ -555,7 +589,7 @@ void PipelineExecutor::executeSingleThread(size_t thread_num, size_t num_threads thread_to_wake = threads_queue.pop_any(); lock.unlock(); - wake_up_executor(thread_to_wake); + wakeUpExecutor(thread_to_wake); } break; @@ -572,21 +606,21 @@ void PipelineExecutor::executeSingleThread(size_t thread_num, size_t num_threads } { - std::unique_lock lock(executor_contexts[thread_num]->mutex); + std::unique_lock lock(context->mutex); - executor_contexts[thread_num]->condvar.wait(lock, [&] + context->condvar.wait(lock, [&] { - return finished || executor_contexts[thread_num]->wake_flag; + return finished || context->wake_flag; }); - executor_contexts[thread_num]->wake_flag = false; + context->wake_flag = false; } } if (finished) break; - while (state) + while (state && !yield) { if (finished) break; @@ -601,7 +635,7 @@ void PipelineExecutor::executeSingleThread(size_t thread_num, size_t num_threads state->job(); #ifndef NDEBUG - execution_time_ns += execution_time_watch.elapsed(); + context->execution_time_ns += execution_time_watch.elapsed(); #endif } @@ -623,8 +657,13 @@ void PipelineExecutor::executeSingleThread(size_t thread_num, size_t num_threads while (auto * task = expand_pipeline_task.load()) doExpandPipeline(task, true); - /// Execute again if can. - prepare_processor(state->processors_id, queue); + /// Prepare processor after execution. + { + auto lock = std::unique_lock(*graph[state->processors_id].status_mutex); + if (!prepareProcessor(state->processors_id, thread_num, queue, std::move(lock))) + finish(); + } + state = nullptr; /// Take local task from queue if has one. @@ -656,7 +695,7 @@ void PipelineExecutor::executeSingleThread(size_t thread_num, size_t num_threads lock.unlock(); - wake_up_executor(thread_to_wake); + wakeUpExecutor(thread_to_wake); } } @@ -666,27 +705,24 @@ void PipelineExecutor::executeSingleThread(size_t thread_num, size_t num_threads } #ifndef NDEBUG - processing_time_ns += processing_time_watch.elapsed(); + context->processing_time_ns += processing_time_watch.elapsed(); #endif + + /// We have executed single processor. Check if we need to yield execution. + if (yield_flag && *yield_flag) + yield = true; } } #ifndef NDEBUG - total_time_ns = total_time_watch.elapsed(); - wait_time_ns = total_time_ns - execution_time_ns - processing_time_ns; - - LOG_TRACE(log, std::fixed << std::setprecision(3) - << "Thread finished." - << " Total time: " << (total_time_ns / 1e9) << " sec." - << " Execution time: " << (execution_time_ns / 1e9) << " sec." - << " Processing time: " << (processing_time_ns / 1e9) << " sec." - << " Wait time: " << (wait_time_ns / 1e9) << " sec."); + context->total_time_ns += total_time_watch.elapsed(); + context->wait_time_ns = total_time_ns - execution_time_ns - processing_time_ns; #endif } -void PipelineExecutor::executeImpl(size_t num_threads) +void PipelineExecutor::initExecution(size_t num_threads) { - Stack stack; + is_execution_initialized = true; threads_queue.init(num_threads); task_queue.init(num_threads); @@ -699,25 +735,7 @@ void PipelineExecutor::executeImpl(size_t num_threads) executor_contexts.emplace_back(std::make_unique()); } - auto thread_group = CurrentThread::getGroup(); - - using ThreadsData = std::vector; - ThreadsData threads; - threads.reserve(num_threads); - - bool finished_flag = false; - - SCOPE_EXIT( - if (!finished_flag) - { - finish(); - - for (auto & thread : threads) - if (thread.joinable()) - thread.join(); - } - ); - + Stack stack; addChildlessProcessorsToStack(stack); { @@ -744,9 +762,32 @@ void PipelineExecutor::executeImpl(size_t num_threads) } } } +} + +void PipelineExecutor::executeImpl(size_t num_threads) +{ + initExecution(num_threads); + + using ThreadsData = std::vector; + ThreadsData threads; + threads.reserve(num_threads); + + bool finished_flag = false; + + SCOPE_EXIT( + if (!finished_flag) + { + finish(); + + for (auto & thread : threads) + if (thread.joinable()) + thread.join(); + } + ); if (num_threads > 1) { + auto thread_group = CurrentThread::getGroup(); for (size_t i = 0; i < num_threads; ++i) { diff --git a/src/Processors/Executors/PipelineExecutor.h b/src/Processors/Executors/PipelineExecutor.h index c0ce9053e5a..0a15f5cc974 100644 --- a/src/Processors/Executors/PipelineExecutor.h +++ b/src/Processors/Executors/PipelineExecutor.h @@ -35,6 +35,11 @@ public: /// In case of exception during execution throws any occurred. void execute(size_t num_threads); + /// Execute single step. Step will be stopped when yield_flag is true. + /// Execution is happened in single thread. + /// Return true if execution should be continued. + bool executeStep(std::atomic_bool * yield_flag = nullptr); + String getName() const { return "PipelineExecutor"; } const Processors & getProcessors() const { return processors; } @@ -203,6 +208,8 @@ private: ThreadsQueue threads_queue; std::mutex task_queue_mutex; + /// Flag that checks that initExecution was called. + bool is_execution_initialized = false; std::atomic_bool cancelled; std::atomic_bool finished; @@ -235,7 +242,17 @@ private: std::mutex mutex; bool wake_flag = false; - /// std::queue pinned_tasks; + /// Currently processing state. + ExecutionState * state = nullptr; + +#ifndef NDEBUG + /// Time for different processing stages. + UInt64 total_time_ns = 0; + UInt64 execution_time_ns = 0; + UInt64 processing_time_ns = 0; + UInt64 wait_time_ns = 0; +#endif + }; std::vector> executor_contexts; @@ -267,7 +284,14 @@ private: bool prepareProcessor(UInt64 pid, size_t thread_number, Queue & queue, std::unique_lock node_lock); bool doExpandPipeline(ExpandPipelineTask * task, bool processing); + /// Continue executor (in case there are tasks in queue). + void wakeUpExecutor(size_t thread_num); + + void initExecution(size_t num_threads); /// Initialize executor contexts and task_queue. + void finalizeExecution(); /// Check all processors are finished. + void executeImpl(size_t num_threads); + void executeStepImpl(size_t thread_num, size_t num_threads, std::atomic_bool * yield_flag = nullptr); void executeSingleThread(size_t thread_num, size_t num_threads); void finish(); From bd3e7307d068cb6b826be9ba962aef66e6b864c8 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 20 May 2020 01:43:16 +0300 Subject: [PATCH 051/120] fix integration test --- tests/integration/test_ttl_move/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_ttl_move/test.py b/tests/integration/test_ttl_move/test.py index ab348ea0cb1..243268260d0 100644 --- a/tests/integration/test_ttl_move/test.py +++ b/tests/integration/test_ttl_move/test.py @@ -626,7 +626,7 @@ def test_materialize_ttl_in_partition(started_cluster, name, engine): node1.query(""" ALTER TABLE {name} MODIFY TTL - d1 TO DISK 'external' + d1 TO DISK 'external' SETTINGS materialize_ttl_after_modify = 0 """.format(name=name)) time.sleep(0.5) From e8a13842afc9e40c3a693d983d1edb1830ac2dcc Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Wed, 20 May 2020 11:02:02 +0200 Subject: [PATCH 052/120] Better fix, previous fix was wrong (was leaving the cycle by eof condition) --- src/Storages/Kafka/KafkaBlockInputStream.cpp | 9 ++---- .../Kafka/ReadBufferFromKafkaConsumer.cpp | 31 +++++++++++++------ .../Kafka/ReadBufferFromKafkaConsumer.h | 5 ++- 3 files changed, 28 insertions(+), 17 deletions(-) diff --git a/src/Storages/Kafka/KafkaBlockInputStream.cpp b/src/Storages/Kafka/KafkaBlockInputStream.cpp index 4da5de69085..a2403e66c50 100644 --- a/src/Storages/Kafka/KafkaBlockInputStream.cpp +++ b/src/Storages/Kafka/KafkaBlockInputStream.cpp @@ -134,12 +134,7 @@ Block KafkaBlockInputStream::readImpl() auto new_rows = read_kafka_message(); - // we can't store the offser after rebalance, when consumer is stalled, or if it's terminating - if (!buffer->storeLastReadMessageOffset()) - { - total_rows = 0; - break; - } + buffer->storeLastReadMessageOffset(); auto topic = buffer->currentTopic(); auto key = buffer->currentKey(); @@ -177,7 +172,7 @@ Block KafkaBlockInputStream::readImpl() } } - if (total_rows == 0) + if (buffer->polledDataUnusable() || total_rows == 0) return Block(); /// MATERIALIZED columns can be added here, but I think diff --git a/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp b/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp index aeda60e90a7..ad9d660a989 100644 --- a/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp +++ b/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp @@ -250,11 +250,23 @@ void ReadBufferFromKafkaConsumer::resetToLastCommitted(const char * msg) /// Do commit messages implicitly after we processed the previous batch. bool ReadBufferFromKafkaConsumer::nextImpl() { + /// NOTE: ReadBuffer was implemented with an immutable underlying contents in mind. /// If we failed to poll any message once - don't try again. /// Otherwise, the |poll_timeout| expectations get flawn. - if (stalled || stopped || !allowed || rebalance_happened) + + // we can react on stop only during fetching data + // after block is formed (i.e. during copying data to MV / commiting) we ignore stop attempts + if (stopped) + { + was_stopped = true; + offsets_stored = 0; return false; + } + + if (stalled || was_stopped || !allowed || rebalance_happened) + return false; + if (current == messages.end()) { @@ -267,7 +279,13 @@ bool ReadBufferFromKafkaConsumer::nextImpl() /// Don't drop old messages immediately, since we may need them for virtual columns. auto new_messages = consumer->poll_batch(batch_size, std::chrono::milliseconds(poll_timeout)); - if (rebalance_happened) + if (stopped) + { + was_stopped = true; + offsets_stored = 0; + return false; + } + else if (rebalance_happened) { if (!new_messages.empty()) { @@ -336,17 +354,12 @@ bool ReadBufferFromKafkaConsumer::nextImpl() return true; } -bool ReadBufferFromKafkaConsumer::storeLastReadMessageOffset() +void ReadBufferFromKafkaConsumer::storeLastReadMessageOffset() { - if (!stalled && !rebalance_happened && !stopped) + if (!stalled && !was_stopped && !rebalance_happened) { consumer->store_offset(*(current - 1)); ++offsets_stored; - return true; - } - else - { - return false; } } diff --git a/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h b/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h index 435b5f2a7c8..46dace827d0 100644 --- a/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h +++ b/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h @@ -37,8 +37,9 @@ public: auto pollTimeout() const { return poll_timeout; } bool hasMorePolledMessages() const; + bool polledDataUnusable() const { return (was_stopped || rebalance_happened); } - bool storeLastReadMessageOffset(); + void storeLastReadMessageOffset(); void resetToLastCommitted(const char * msg); // Return values for the message that's being read. @@ -68,6 +69,8 @@ private: bool rebalance_happened = false; + bool was_stopped = false; + // order is important, need to be destructed before consumer cppkafka::TopicPartitionList assignment; const Names topics; From 3ed5d9434a9f1ca8c1a9a82b59a9fec9f75c8070 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Thu, 30 Apr 2020 10:21:19 +0300 Subject: [PATCH 053/120] Added `move_ttl_info` to `system.parts`. --- src/Storages/System/StorageSystemParts.cpp | 93 ++++++++++++++-------- 1 file changed, 60 insertions(+), 33 deletions(-) diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index 2418594899e..39246498849 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include #include #include @@ -17,41 +19,51 @@ namespace DB StorageSystemParts::StorageSystemParts(const std::string & name_) : StorageSystemPartsBase(name_, { - {"partition", std::make_shared()}, - {"name", std::make_shared()}, - {"part_type", std::make_shared()}, - {"active", std::make_shared()}, - {"marks", std::make_shared()}, - {"rows", std::make_shared()}, - {"bytes_on_disk", std::make_shared()}, - {"data_compressed_bytes", std::make_shared()}, - {"data_uncompressed_bytes", std::make_shared()}, - {"marks_bytes", std::make_shared()}, - {"modification_time", std::make_shared()}, - {"remove_time", std::make_shared()}, - {"refcount", std::make_shared()}, - {"min_date", std::make_shared()}, - {"max_date", std::make_shared()}, - {"min_time", std::make_shared()}, - {"max_time", std::make_shared()}, - {"partition_id", std::make_shared()}, - {"min_block_number", std::make_shared()}, - {"max_block_number", std::make_shared()}, - {"level", std::make_shared()}, - {"data_version", std::make_shared()}, - {"primary_key_bytes_in_memory", std::make_shared()}, - {"primary_key_bytes_in_memory_allocated", std::make_shared()}, - {"is_frozen", std::make_shared()}, + {"partition", std::make_shared()}, + {"name", std::make_shared()}, + {"part_type", std::make_shared()}, + {"active", std::make_shared()}, + {"marks", std::make_shared()}, + {"rows", std::make_shared()}, + {"bytes_on_disk", std::make_shared()}, + {"data_compressed_bytes", std::make_shared()}, + {"data_uncompressed_bytes", std::make_shared()}, + {"marks_bytes", std::make_shared()}, + {"modification_time", std::make_shared()}, + {"remove_time", std::make_shared()}, + {"refcount", std::make_shared()}, + {"min_date", std::make_shared()}, + {"max_date", std::make_shared()}, + {"min_time", std::make_shared()}, + {"max_time", std::make_shared()}, + {"partition_id", std::make_shared()}, + {"min_block_number", std::make_shared()}, + {"max_block_number", std::make_shared()}, + {"level", std::make_shared()}, + {"data_version", std::make_shared()}, + {"primary_key_bytes_in_memory", std::make_shared()}, + {"primary_key_bytes_in_memory_allocated", std::make_shared()}, + {"is_frozen", std::make_shared()}, - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"engine", std::make_shared()}, - {"disk_name", std::make_shared()}, - {"path", std::make_shared()}, + {"database", std::make_shared()}, + {"table", std::make_shared()}, + {"engine", std::make_shared()}, + {"disk_name", std::make_shared()}, + {"path", std::make_shared()}, - {"hash_of_all_files", std::make_shared()}, - {"hash_of_uncompressed_files", std::make_shared()}, - {"uncompressed_hash_of_compressed_files", std::make_shared()} + {"hash_of_all_files", std::make_shared()}, + {"hash_of_uncompressed_files", std::make_shared()}, + {"uncompressed_hash_of_compressed_files", std::make_shared()}, + + {"move_ttl_info", std::make_shared( + std::make_shared( + DataTypes({ + std::make_shared(), + std::make_shared(), + std::make_shared() + }), + Strings({"expression", "min", "max"}) + ))}, } ) { @@ -128,6 +140,21 @@ void StorageSystemParts::processNextStorage(MutableColumns & columns_, const Sto checksum = helper.uncompressed_hash_of_compressed_files; columns_[i++]->insert(getHexUIntLowercase(checksum.first) + getHexUIntLowercase(checksum.second)); + + /// move_ttl_info + { + Array move_ttl_info_array; + move_ttl_info_array.reserve(part->ttl_infos.moves_ttl.size()); + for (const auto & [expression, move_ttl_info] : part->ttl_infos.moves_ttl) + { + Tuple move_ttl_info_tuple; + move_ttl_info_tuple.push_back(expression); + move_ttl_info_tuple.push_back(static_cast(move_ttl_info.min)); + move_ttl_info_tuple.push_back(static_cast(move_ttl_info.max)); + move_ttl_info_array.emplace_back(std::move(move_ttl_info_tuple)); + } + columns_[i++]->insert(move_ttl_info_array); + } } } From 6f68979662cad8d6005d500e8cbcd865aa5c0ec3 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Mon, 11 May 2020 10:31:26 +0300 Subject: [PATCH 054/120] Switched `system.parts.move_ttl_info` to separate arrays. --- src/Storages/System/StorageSystemParts.cpp | 31 +++++++++------------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index 39246498849..8c1fdd667c4 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -55,15 +54,9 @@ StorageSystemParts::StorageSystemParts(const std::string & name_) {"hash_of_uncompressed_files", std::make_shared()}, {"uncompressed_hash_of_compressed_files", std::make_shared()}, - {"move_ttl_info", std::make_shared( - std::make_shared( - DataTypes({ - std::make_shared(), - std::make_shared(), - std::make_shared() - }), - Strings({"expression", "min", "max"}) - ))}, + {"move_ttl_info.expression", std::make_shared(std::make_shared())}, + {"move_ttl_info.min", std::make_shared(std::make_shared())}, + {"move_ttl_info.max", std::make_shared(std::make_shared())}, } ) { @@ -143,17 +136,19 @@ void StorageSystemParts::processNextStorage(MutableColumns & columns_, const Sto /// move_ttl_info { - Array move_ttl_info_array; - move_ttl_info_array.reserve(part->ttl_infos.moves_ttl.size()); + Array expression_array, min_array, max_array; + expression_array.reserve(part->ttl_infos.moves_ttl.size()); + min_array.reserve(part->ttl_infos.moves_ttl.size()); + max_array.reserve(part->ttl_infos.moves_ttl.size()); for (const auto & [expression, move_ttl_info] : part->ttl_infos.moves_ttl) { - Tuple move_ttl_info_tuple; - move_ttl_info_tuple.push_back(expression); - move_ttl_info_tuple.push_back(static_cast(move_ttl_info.min)); - move_ttl_info_tuple.push_back(static_cast(move_ttl_info.max)); - move_ttl_info_array.emplace_back(std::move(move_ttl_info_tuple)); + expression_array.emplace_back(expression); + min_array.push_back(static_cast(move_ttl_info.min)); + max_array.push_back(static_cast(move_ttl_info.max)); } - columns_[i++]->insert(move_ttl_info_array); + columns_[i++]->insert(expression_array); + columns_[i++]->insert(min_array); + columns_[i++]->insert(max_array); } } } From 02b52df86dac2967a3118e41536d4f047c54d84e Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Mon, 11 May 2020 10:35:13 +0300 Subject: [PATCH 055/120] Added `delete_ttl_info_min` and `delete_ttl_info_max` to `system.parts`. --- src/Storages/System/StorageSystemParts.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index 8c1fdd667c4..a5d5da5bd22 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -54,6 +54,9 @@ StorageSystemParts::StorageSystemParts(const std::string & name_) {"hash_of_uncompressed_files", std::make_shared()}, {"uncompressed_hash_of_compressed_files", std::make_shared()}, + {"delete_ttl_info_min", std::make_shared(std::make_shared())}, + {"delete_ttl_info_max", std::make_shared(std::make_shared())}, + {"move_ttl_info.expression", std::make_shared(std::make_shared())}, {"move_ttl_info.min", std::make_shared(std::make_shared())}, {"move_ttl_info.max", std::make_shared(std::make_shared())}, @@ -134,6 +137,12 @@ void StorageSystemParts::processNextStorage(MutableColumns & columns_, const Sto checksum = helper.uncompressed_hash_of_compressed_files; columns_[i++]->insert(getHexUIntLowercase(checksum.first) + getHexUIntLowercase(checksum.second)); + /// delete_ttl_info + { + columns_[i++]->insert(static_cast(part->ttl_infos.table_ttl.min)); + columns_[i++]->insert(static_cast(part->ttl_infos.table_ttl.max)); + } + /// move_ttl_info { Array expression_array, min_array, max_array; From da33858d392c6b08f608b12df1b30a4be236efe7 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Mon, 18 May 2020 19:53:14 +0300 Subject: [PATCH 056/120] Update StorageSystemParts.cpp --- src/Storages/System/StorageSystemParts.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index a5d5da5bd22..745d2938d86 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -54,8 +54,8 @@ StorageSystemParts::StorageSystemParts(const std::string & name_) {"hash_of_uncompressed_files", std::make_shared()}, {"uncompressed_hash_of_compressed_files", std::make_shared()}, - {"delete_ttl_info_min", std::make_shared(std::make_shared())}, - {"delete_ttl_info_max", std::make_shared(std::make_shared())}, + {"delete_ttl_info_min", std::make_shared()}, + {"delete_ttl_info_max", std::make_shared()}, {"move_ttl_info.expression", std::make_shared(std::make_shared())}, {"move_ttl_info.min", std::make_shared(std::make_shared())}, From fcf58fa4a15ea6b8b1d4cb21ac4785f12f19ad40 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 20 May 2020 17:04:24 +0300 Subject: [PATCH 057/120] Fix clang build. --- src/Processors/Executors/PipelineExecutor.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index 59f37115333..5825a8c90d6 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -549,10 +549,10 @@ void PipelineExecutor::executeSingleThread(size_t thread_num, size_t num_threads auto & context = executor_contexts[thread_num]; LOG_TRACE(log, std::fixed << std::setprecision(3) << "Thread finished." - << " Total time: " << (context.total_time_ns / 1e9) << " sec." - << " Execution time: " << (context.execution_time_ns / 1e9) << " sec." - << " Processing time: " << (context.processing_time_ns / 1e9) << " sec." - << " Wait time: " << (context.wait_time_ns / 1e9) << " sec."); + << " Total time: " << (context->total_time_ns / 1e9) << " sec." + << " Execution time: " << (context->execution_time_ns / 1e9) << " sec." + << " Processing time: " << (context->processing_time_ns / 1e9) << " sec." + << " Wait time: " << (context->wait_time_ns / 1e9) << " sec."); #endif } From 555087512ae4164cac6f93aa08a2f472cd356f0a Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Wed, 20 May 2020 18:28:44 +0300 Subject: [PATCH 058/120] Update debian/rules Co-authored-by: alesapin --- debian/rules | 1 + 1 file changed, 1 insertion(+) diff --git a/debian/rules b/debian/rules index ad7b73380f4..7218e196baa 100755 --- a/debian/rules +++ b/debian/rules @@ -101,6 +101,7 @@ override_dh_clean: dh_clean # -X contrib override_dh_strip: +#https://www.debian.org/doc/debian-policy/ch-source.html#debian-rules-and-deb-build-options ifeq (,$(filter nostrip,$(DEB_BUILD_OPTIONS))) dh_strip -pclickhouse-common-static --dbg-package=clickhouse-common-static-dbg endif From 138154ba6aceecf621f7095aab5a80c719abd182 Mon Sep 17 00:00:00 2001 From: Pavel Kovalenko Date: Wed, 20 May 2020 18:42:45 +0300 Subject: [PATCH 059/120] Fix memory-leak in registerDiskS3 during config->createView invocations. --- src/Disks/S3/registerDiskS3.cpp | 38 ++++++++++++++++----------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/S3/registerDiskS3.cpp index 2b72f872dd2..81879800b7f 100644 --- a/src/Disks/S3/registerDiskS3.cpp +++ b/src/Disks/S3/registerDiskS3.cpp @@ -37,13 +37,14 @@ namespace void checkRemoveAccess(IDisk & disk) { disk.remove("test_acl"); } - std::shared_ptr getProxyResolverConfiguration(const Poco::Util::AbstractConfiguration * proxy_resolver_config) + std::shared_ptr getProxyResolverConfiguration( + const String & prefix, const Poco::Util::AbstractConfiguration & proxy_resolver_config) { - auto endpoint = Poco::URI(proxy_resolver_config->getString("endpoint")); - auto proxy_scheme = proxy_resolver_config->getString("proxy_scheme"); + auto endpoint = Poco::URI(proxy_resolver_config.getString(prefix + ".endpoint")); + auto proxy_scheme = proxy_resolver_config.getString(prefix + ".proxy_scheme"); if (proxy_scheme != "http" && proxy_scheme != "https") throw Exception("Only HTTP/HTTPS schemas allowed in proxy resolver config: " + proxy_scheme, ErrorCodes::BAD_ARGUMENTS); - auto proxy_port = proxy_resolver_config->getUInt("proxy_port"); + auto proxy_port = proxy_resolver_config.getUInt(prefix + ".proxy_port"); LOG_DEBUG( &Logger::get("DiskS3"), "Configured proxy resolver: " << endpoint.toString() << ", Scheme: " << proxy_scheme << ", Port: " << proxy_port); @@ -51,16 +52,17 @@ namespace return std::make_shared(endpoint, proxy_scheme, proxy_port); } - std::shared_ptr getProxyListConfiguration(const Poco::Util::AbstractConfiguration * proxy_config) + std::shared_ptr getProxyListConfiguration( + const String & prefix, const Poco::Util::AbstractConfiguration & proxy_config) { std::vector keys; - proxy_config->keys(keys); + proxy_config.keys(prefix, keys); std::vector proxies; for (const auto & key : keys) if (startsWith(key, "uri")) { - Poco::URI proxy_uri(proxy_config->getString(key)); + Poco::URI proxy_uri(proxy_config.getString(prefix + "." + key)); if (proxy_uri.getScheme() != "http" && proxy_uri.getScheme() != "https") throw Exception("Only HTTP/HTTPS schemas allowed in proxy uri: " + proxy_uri.toString(), ErrorCodes::BAD_ARGUMENTS); @@ -78,25 +80,23 @@ namespace return nullptr; } - std::shared_ptr getProxyConfiguration(const Poco::Util::AbstractConfiguration * config) + std::shared_ptr getProxyConfiguration(const String & prefix, const Poco::Util::AbstractConfiguration & config) { - if (!config->has("proxy")) + if (!config.has(prefix + ".proxy")) return nullptr; - const auto * proxy_config = config->createView("proxy"); - std::vector config_keys; - proxy_config->keys(config_keys); + config.keys(prefix + ".proxy", config_keys); if (auto resolver_configs = std::count(config_keys.begin(), config_keys.end(), "resolver")) { if (resolver_configs > 1) throw Exception("Multiple proxy resolver configurations aren't allowed", ErrorCodes::BAD_ARGUMENTS); - return getProxyResolverConfiguration(proxy_config->createView("resolver")); + return getProxyResolverConfiguration(prefix + ".proxy.resolver", config); } - return getProxyListConfiguration(proxy_config); + return getProxyListConfiguration(prefix + ".proxy", config); } } @@ -107,27 +107,25 @@ void registerDiskS3(DiskFactory & factory) const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const Context & context) -> DiskPtr { - const auto * disk_config = config.createView(config_prefix); - Poco::File disk{context.getPath() + "disks/" + name}; disk.createDirectories(); Aws::Client::ClientConfiguration cfg; - S3::URI uri(Poco::URI(disk_config->getString("endpoint"))); + S3::URI uri(Poco::URI(config.getString(config_prefix + ".endpoint"))); if (uri.key.back() != '/') throw Exception("S3 path must ends with '/', but '" + uri.key + "' doesn't.", ErrorCodes::BAD_ARGUMENTS); cfg.endpointOverride = uri.endpoint; - auto proxy_config = getProxyConfiguration(disk_config); + auto proxy_config = getProxyConfiguration(config_prefix, config); if (proxy_config) cfg.perRequestConfiguration = [proxy_config](const auto & request) { return proxy_config->getConfiguration(request); }; auto client = S3::ClientFactory::instance().create( cfg, - disk_config->getString("access_key_id", ""), - disk_config->getString("secret_access_key", "")); + config.getString(config_prefix + ".access_key_id", ""), + config.getString(config_prefix + ".secret_access_key", "")); String metadata_path = context.getPath() + "disks/" + name + "/"; From d98c1589edf16281f687099d9e0772a1015ba174 Mon Sep 17 00:00:00 2001 From: Pavel Kovalenko Date: Wed, 20 May 2020 19:01:05 +0300 Subject: [PATCH 060/120] Better error handling in Proxy Resolver. --- src/Disks/S3/ProxyResolverConfiguration.cpp | 12 ++++++++++-- src/Disks/S3/ProxyResolverConfiguration.h | 2 +- .../configs/config.d/storage_conf.xml | 4 ++-- .../test_s3_with_proxy/proxy-resolver/resolver.py | 2 +- 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/Disks/S3/ProxyResolverConfiguration.cpp b/src/Disks/S3/ProxyResolverConfiguration.cpp index a574809596f..c36432d933e 100644 --- a/src/Disks/S3/ProxyResolverConfiguration.cpp +++ b/src/Disks/S3/ProxyResolverConfiguration.cpp @@ -7,6 +7,11 @@ #include #include +namespace DB::ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + namespace DB::S3 { ProxyResolverConfiguration::ProxyResolverConfiguration(const Poco::URI & endpoint_, String proxy_scheme_, unsigned proxy_port_) @@ -30,13 +35,16 @@ Aws::Client::ClientConfigurationPerRequest ProxyResolverConfiguration::getConfig Aws::Client::ClientConfigurationPerRequest cfg; try { - /// It should be just empty GET / request. - Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_1_1); + /// It should be just empty GET request. + Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, endpoint.getPath(), Poco::Net::HTTPRequest::HTTP_1_1); session->sendRequest(request); Poco::Net::HTTPResponse response; auto & response_body_stream = session->receiveResponse(response); + if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK) + throw Exception("Proxy resolver returned not OK status: " + response.getReason(), ErrorCodes::BAD_ARGUMENTS); + String proxy_host; /// Read proxy host as string from response body. Poco::StreamCopier::copyToString(response_body_stream, proxy_host); diff --git a/src/Disks/S3/ProxyResolverConfiguration.h b/src/Disks/S3/ProxyResolverConfiguration.h index 0b23ae77c4a..113ee6ea035 100644 --- a/src/Disks/S3/ProxyResolverConfiguration.h +++ b/src/Disks/S3/ProxyResolverConfiguration.h @@ -6,7 +6,7 @@ namespace DB::S3 { /** * Proxy configuration where proxy host is obtained each time from specified endpoint. - * For each request to S3 it makes GET request to specified endpoint and reads proxy host from a response body. + * For each request to S3 it makes GET request to specified endpoint URL and reads proxy host from a response body. * Specified scheme and port added to obtained proxy host to form completed proxy URL. */ class ProxyResolverConfiguration : public ProxyConfiguration diff --git a/tests/integration/test_s3_with_proxy/configs/config.d/storage_conf.xml b/tests/integration/test_s3_with_proxy/configs/config.d/storage_conf.xml index a83c875b134..ec543d64cdb 100644 --- a/tests/integration/test_s3_with_proxy/configs/config.d/storage_conf.xml +++ b/tests/integration/test_s3_with_proxy/configs/config.d/storage_conf.xml @@ -18,12 +18,12 @@ minio123 - http://resolver:8080 + http://resolver:8080/hostname http 8888 diff --git a/tests/integration/test_s3_with_proxy/proxy-resolver/resolver.py b/tests/integration/test_s3_with_proxy/proxy-resolver/resolver.py index ecafe92cb83..520c018cbad 100644 --- a/tests/integration/test_s3_with_proxy/proxy-resolver/resolver.py +++ b/tests/integration/test_s3_with_proxy/proxy-resolver/resolver.py @@ -2,7 +2,7 @@ import bottle import random -@bottle.route('/') +@bottle.route('/hostname') def index(): if random.randrange(2) == 0: return 'proxy1' From 6f50700f3524b943cd66612ace6c55cdb2c02604 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 20 May 2020 21:53:18 +0300 Subject: [PATCH 061/120] Add PullingOutputFormat and PullingPipelineExecutor. --- .../PipelineExecutingBlockInputStream.cpp | 47 +++- .../PipelineExecutingBlockInputStream.h | 5 + src/Processors/Executors/PipelineExecutor.cpp | 6 +- src/Processors/Executors/PipelineExecutor.h | 8 +- .../PullingAsyncPipelineExecutor.cpp | 203 ++++++++++++++++++ .../Executors/PullingAsyncPipelineExecutor.h | 58 +++++ .../Executors/PullingPipelineExecutor.cpp | 129 ++--------- .../Executors/PullingPipelineExecutor.h | 22 +- src/Processors/Formats/LazyOutputFormat.h | 2 +- .../Formats/PullingOutputFormat.cpp | 39 ++++ src/Processors/Formats/PullingOutputFormat.h | 45 ++++ src/Processors/ya.make | 1 + 12 files changed, 429 insertions(+), 136 deletions(-) create mode 100644 src/Processors/Executors/PullingAsyncPipelineExecutor.cpp create mode 100644 src/Processors/Executors/PullingAsyncPipelineExecutor.h create mode 100644 src/Processors/Formats/PullingOutputFormat.cpp create mode 100644 src/Processors/Formats/PullingOutputFormat.h diff --git a/src/Processors/Executors/PipelineExecutingBlockInputStream.cpp b/src/Processors/Executors/PipelineExecutingBlockInputStream.cpp index 75324cb25b8..d39938b57c5 100644 --- a/src/Processors/Executors/PipelineExecutingBlockInputStream.cpp +++ b/src/Processors/Executors/PipelineExecutingBlockInputStream.cpp @@ -1,7 +1,7 @@ #include +#include #include #include -#include namespace DB { @@ -20,27 +20,54 @@ PipelineExecutingBlockInputStream::~PipelineExecutingBlockInputStream() = defaul Block PipelineExecutingBlockInputStream::getHeader() const { - return executor ? executor->getHeader() - : pipeline->getHeader(); + if (executor) + return executor->getHeader(); + + if (async_executor) + return async_executor->getHeader(); + + return pipeline->getHeader(); +} + +void PipelineExecutingBlockInputStream::createExecutor() +{ + if (pipeline->getNumThreads() > 1) + async_executor = std::make_unique(*pipeline); + else + executor = std::make_unique(*pipeline); + + is_execution_started = true; } void PipelineExecutingBlockInputStream::readPrefixImpl() { - executor = std::make_unique(*pipeline); + createExecutor(); } Block PipelineExecutingBlockInputStream::readImpl() { - if (!executor) - executor = std::make_unique(*pipeline); + if (!is_execution_started) + createExecutor(); Block block; - while (executor->pull(block)) + bool can_continue = true; + while (can_continue) { + if (executor) + can_continue = executor->pull(block); + else + can_continue = async_executor->pull(block); + if (block) return block; } + totals = executor ? executor->getTotalsBlock() + : async_executor->getTotalsBlock(); + + extremes = executor ? executor->getExtremesBlock() + : async_executor->getExtremesBlock(); + return {}; } @@ -70,20 +97,20 @@ void PipelineExecutingBlockInputStream::cancel(bool kill) void PipelineExecutingBlockInputStream::setProgressCallback(const ProgressCallback & callback) { - throwIfExecutionStarted(executor != nullptr, "setProgressCallback"); + throwIfExecutionStarted(is_execution_started, "setProgressCallback"); pipeline->setProgressCallback(callback); } void PipelineExecutingBlockInputStream::setProcessListElement(QueryStatus * elem) { - throwIfExecutionStarted(executor != nullptr, "setProcessListElement"); + throwIfExecutionStarted(is_execution_started, "setProcessListElement"); IBlockInputStream::setProcessListElement(elem); pipeline->setProcessListElement(elem); } void PipelineExecutingBlockInputStream::setLimits(const IBlockInputStream::LocalLimits & limits_) { - throwIfExecutionStarted(executor != nullptr, "setLimits"); + throwIfExecutionStarted(is_execution_started, "setLimits"); if (limits_.mode == LimitsMode::LIMITS_TOTAL) throw Exception("Total limits are not supported by PipelineExecutingBlockInputStream", diff --git a/src/Processors/Executors/PipelineExecutingBlockInputStream.h b/src/Processors/Executors/PipelineExecutingBlockInputStream.h index 7555e1dec97..11ccb71f17a 100644 --- a/src/Processors/Executors/PipelineExecutingBlockInputStream.h +++ b/src/Processors/Executors/PipelineExecutingBlockInputStream.h @@ -5,6 +5,7 @@ namespace DB { class QueryPipeline; +class PullingAsyncPipelineExecutor; class PullingPipelineExecutor; /// Implement IBlockInputStream from QueryPipeline. @@ -34,6 +35,10 @@ protected: private: std::unique_ptr pipeline; std::unique_ptr executor; + std::unique_ptr async_executor; + bool is_execution_started = false; + + void createExecutor(); }; } diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index 5825a8c90d6..a5a43a9f546 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -500,7 +500,7 @@ bool PipelineExecutor::executeStep(std::atomic_bool * yield_flag) return false; if (!is_execution_initialized) - initExecution(1); + initializeExecution(1); executeStep(yield_flag); @@ -720,7 +720,7 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, size_t num_threads, st #endif } -void PipelineExecutor::initExecution(size_t num_threads) +void PipelineExecutor::initializeExecution(size_t num_threads) { is_execution_initialized = true; @@ -766,7 +766,7 @@ void PipelineExecutor::initExecution(size_t num_threads) void PipelineExecutor::executeImpl(size_t num_threads) { - initExecution(num_threads); + initializeExecution(num_threads); using ThreadsData = std::vector; ThreadsData threads; diff --git a/src/Processors/Executors/PipelineExecutor.h b/src/Processors/Executors/PipelineExecutor.h index 0a15f5cc974..395f42a3316 100644 --- a/src/Processors/Executors/PipelineExecutor.h +++ b/src/Processors/Executors/PipelineExecutor.h @@ -36,12 +36,10 @@ public: void execute(size_t num_threads); /// Execute single step. Step will be stopped when yield_flag is true. - /// Execution is happened in single thread. + /// Execution is happened in a single thread. /// Return true if execution should be continued. bool executeStep(std::atomic_bool * yield_flag = nullptr); - String getName() const { return "PipelineExecutor"; } - const Processors & getProcessors() const { return processors; } /// Cancel execution. May be called from another thread. @@ -208,7 +206,7 @@ private: ThreadsQueue threads_queue; std::mutex task_queue_mutex; - /// Flag that checks that initExecution was called. + /// Flag that checks that initializeExecution was called. bool is_execution_initialized = false; std::atomic_bool cancelled; std::atomic_bool finished; @@ -287,7 +285,7 @@ private: /// Continue executor (in case there are tasks in queue). void wakeUpExecutor(size_t thread_num); - void initExecution(size_t num_threads); /// Initialize executor contexts and task_queue. + void initializeExecution(size_t num_threads); /// Initialize executor contexts and task_queue. void finalizeExecution(); /// Check all processors are finished. void executeImpl(size_t num_threads); diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp new file mode 100644 index 00000000000..003508ab86f --- /dev/null +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -0,0 +1,203 @@ +#include +#include +#include +#include +#include + +#include +#include + +namespace DB +{ + +struct PullingAsyncPipelineExecutor::Data +{ + PipelineExecutorPtr executor; + std::exception_ptr exception; + std::atomic_bool is_executed = false; + std::atomic_bool has_exception = false; + ThreadFromGlobalPool thread; + + ~Data() + { + if (thread.joinable()) + thread.join(); + } + + void rethrowExceptionIfHas() + { + if (has_exception) + { + has_exception = false; + std::rethrow_exception(std::move(exception)); + } + } +}; + +PullingAsyncPipelineExecutor::PullingAsyncPipelineExecutor(QueryPipeline & pipeline_) : pipeline(pipeline_) +{ + lazy_format = std::make_shared(pipeline.getHeader()); + pipeline.setOutput(lazy_format); +} + +PullingAsyncPipelineExecutor::~PullingAsyncPipelineExecutor() +{ + try + { + cancel(); + } + catch (...) + { + tryLogCurrentException("PullingAsyncPipelineExecutor"); + } +} + +const Block & PullingAsyncPipelineExecutor::getHeader() const +{ + return lazy_format->getPort(IOutputFormat::PortKind::Main).getHeader(); +} + +static void threadFunction(PullingAsyncPipelineExecutor::Data & data, ThreadGroupStatusPtr thread_group, size_t num_threads) +{ + if (thread_group) + CurrentThread::attachTo(thread_group); + + SCOPE_EXIT( + if (thread_group) + CurrentThread::detachQueryIfNotDetached(); + ); + + setThreadName("QueryPipelineEx"); + + try + { + data.executor->execute(num_threads); + } + catch (...) + { + data.exception = std::current_exception(); + data.has_exception = true; + } +} + + +bool PullingAsyncPipelineExecutor::pull(Chunk & chunk, uint64_t milliseconds) +{ + if (!data) + { + data = std::make_unique(); + data->executor = pipeline.execute(); + + auto func = [&, thread_group = CurrentThread::getGroup()]() + { + threadFunction(*data, thread_group, pipeline.getNumThreads()); + }; + + data->thread = ThreadFromGlobalPool(std::move(func)); + } + + if (data->has_exception) + { + /// Finish lazy format in case of exception. Otherwise thread.join() may hung. + lazy_format->finish(); + data->has_exception = false; + std::rethrow_exception(std::move(data->exception)); + } + + if (lazy_format->isFinished()) + { + data->is_executed = true; + /// Wait thread ant rethrow exception if any. + cancel(); + return false; + } + + chunk = lazy_format->getChunk(milliseconds); + return true; +} + +bool PullingAsyncPipelineExecutor::pull(Block & block, uint64_t milliseconds) +{ + Chunk chunk; + + if (!pull(chunk, milliseconds)) + return false; + + if (!chunk) + { + /// In case if timeout exceeded. + block.clear(); + return true; + } + + block = lazy_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns()); + + if (auto chunk_info = chunk.getChunkInfo()) + { + if (const auto * agg_info = typeid_cast(chunk_info.get())) + { + block.info.bucket_num = agg_info->bucket_num; + block.info.is_overflows = agg_info->is_overflows; + } + } + + return true; +} + +void PullingAsyncPipelineExecutor::cancel() +{ + /// Cancel execution if it wasn't finished. + if (data && !data->is_executed && data->executor) + data->executor->cancel(); + + /// Finish lazy format. Otherwise thread.join() may hung. + if (!lazy_format->isFinished()) + lazy_format->finish(); + + /// Join thread here to wait for possible exception. + if (data && data->thread.joinable()) + data->thread.join(); + + /// Rethrow exception to not swallow it in destructor. + if (data) + data->rethrowExceptionIfHas(); +} + +Chunk PullingAsyncPipelineExecutor::getTotals() +{ + return lazy_format->getTotals(); +} + +Chunk PullingAsyncPipelineExecutor::getExtremes() +{ + return lazy_format->getExtremes(); +} + +Block PullingAsyncPipelineExecutor::getTotalsBlock() +{ + auto totals = getTotals(); + + if (totals.empty()) + return {}; + + const auto & header = lazy_format->getPort(IOutputFormat::PortKind::Totals).getHeader(); + return header.cloneWithColumns(totals.detachColumns()); +} + +Block PullingAsyncPipelineExecutor::getExtremesBlock() +{ + auto extremes = getExtremes(); + + if (extremes.empty()) + return {}; + + const auto & header = lazy_format->getPort(IOutputFormat::PortKind::Extremes).getHeader(); + return header.cloneWithColumns(extremes.detachColumns()); +} + +BlockStreamProfileInfo & PullingAsyncPipelineExecutor::getProfileInfo() +{ + return lazy_format->getProfileInfo(); +} + +} diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.h b/src/Processors/Executors/PullingAsyncPipelineExecutor.h new file mode 100644 index 00000000000..2ce75aecab7 --- /dev/null +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.h @@ -0,0 +1,58 @@ +#pragma once +#include + +namespace DB +{ + +class QueryPipeline; +class Block; +class Chunk; +class LazyOutputFormat; +struct BlockStreamProfileInfo; + +/// Asynchronous pulling executor for QueryPipeline. +/// Always creates extra thread. If query is executed in single thread, use PullingPipelineExecutor. +/// Typical usage is: +/// +/// PullingAsyncPipelineExecutor executor(query_pipeline); +/// while (executor.pull(chunk, timeout)) +/// ... process chunk ... +class PullingAsyncPipelineExecutor +{ +public: + explicit PullingAsyncPipelineExecutor(QueryPipeline & pipeline_); + ~PullingAsyncPipelineExecutor(); + + /// Get structure of returned block or chunk. + const Block & getHeader() const; + + /// Methods return false if query is finished. + /// If milliseconds > 0, returns empty object and `true` after timeout exceeded. Otherwise method is blocking. + /// You can use any pull method. + bool pull(Chunk & chunk, uint64_t milliseconds = 0); + bool pull(Block & block, uint64_t milliseconds = 0); + + /// Stop execution. It is not necessary, but helps to stop execution before executor is destroyed. + void cancel(); + + /// Get totals and extremes. Returns empty chunk if doesn't have any. + Chunk getTotals(); + Chunk getExtremes(); + + /// Get totals and extremes. Returns empty chunk if doesn't have any. + Block getTotalsBlock(); + Block getExtremesBlock(); + + /// Get query profile info. + BlockStreamProfileInfo & getProfileInfo(); + + /// Internal executor data. + struct Data; + +private: + QueryPipeline & pipeline; + std::shared_ptr lazy_format; + std::unique_ptr data; +}; + +} diff --git a/src/Processors/Executors/PullingPipelineExecutor.cpp b/src/Processors/Executors/PullingPipelineExecutor.cpp index 223c22e59db..375f6c9ed0e 100644 --- a/src/Processors/Executors/PullingPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingPipelineExecutor.cpp @@ -1,43 +1,15 @@ #include -#include -#include -#include +#include #include - -#include -#include +#include namespace DB { -struct PullingPipelineExecutor::Data -{ - PipelineExecutorPtr executor; - std::exception_ptr exception; - std::atomic_bool is_executed = false; - std::atomic_bool has_exception = false; - ThreadFromGlobalPool thread; - - ~Data() - { - if (thread.joinable()) - thread.join(); - } - - void rethrowExceptionIfHas() - { - if (has_exception) - { - has_exception = false; - std::rethrow_exception(std::move(exception)); - } - } -}; - PullingPipelineExecutor::PullingPipelineExecutor(QueryPipeline & pipeline_) : pipeline(pipeline_) { - lazy_format = std::make_shared(pipeline.getHeader()); - pipeline.setOutput(lazy_format); + pulling_format = std::make_shared(pipeline.getHeader(), has_data_flag); + pipeline.setOutput(pulling_format); } PullingPipelineExecutor::~PullingPipelineExecutor() @@ -54,73 +26,26 @@ PullingPipelineExecutor::~PullingPipelineExecutor() const Block & PullingPipelineExecutor::getHeader() const { - return lazy_format->getPort(IOutputFormat::PortKind::Main).getHeader(); + return pulling_format->getPort(IOutputFormat::PortKind::Main).getHeader(); } -static void threadFunction(PullingPipelineExecutor::Data & data, ThreadGroupStatusPtr thread_group, size_t num_threads) +bool PullingPipelineExecutor::pull(Chunk & chunk) { - if (thread_group) - CurrentThread::attachTo(thread_group); + if (!executor) + executor = pipeline.execute(); - SCOPE_EXIT( - if (thread_group) - CurrentThread::detachQueryIfNotDetached(); - ); - - setThreadName("QueryPipelineEx"); - - try - { - data.executor->execute(num_threads); - } - catch (...) - { - data.exception = std::current_exception(); - data.has_exception = true; - } -} - - -bool PullingPipelineExecutor::pull(Chunk & chunk, uint64_t milliseconds) -{ - if (!data) - { - data = std::make_unique(); - data->executor = pipeline.execute(); - - auto func = [&, thread_group = CurrentThread::getGroup()]() - { - threadFunction(*data, thread_group, pipeline.getNumThreads()); - }; - - data->thread = ThreadFromGlobalPool(std::move(func)); - } - - if (data->has_exception) - { - /// Finish lazy format in case of exception. Otherwise thread.join() may hung. - lazy_format->finish(); - data->has_exception = false; - std::rethrow_exception(std::move(data->exception)); - } - - if (lazy_format->isFinished()) - { - data->is_executed = true; - /// Wait thread ant rethrow exception if any. - cancel(); + if (!executor->executeStep(&has_data_flag)) return false; - } - chunk = lazy_format->getChunk(milliseconds); + chunk = pulling_format->getChunk(); return true; } -bool PullingPipelineExecutor::pull(Block & block, uint64_t milliseconds) +bool PullingPipelineExecutor::pull(Block & block) { Chunk chunk; - if (!pull(chunk, milliseconds)) + if (!pull(chunk)) return false; if (!chunk) @@ -130,7 +55,7 @@ bool PullingPipelineExecutor::pull(Block & block, uint64_t milliseconds) return true; } - block = lazy_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns()); + block = pulling_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns()); if (auto chunk_info = chunk.getChunkInfo()) { @@ -147,30 +72,22 @@ bool PullingPipelineExecutor::pull(Block & block, uint64_t milliseconds) void PullingPipelineExecutor::cancel() { /// Cancel execution if it wasn't finished. - if (data && !data->is_executed && data->executor) - data->executor->cancel(); + if (executor) + executor->cancel(); - /// Finish lazy format. Otherwise thread.join() may hung. - if (!lazy_format->isFinished()) - lazy_format->finish(); - - /// Join thread here to wait for possible exception. - if (data && data->thread.joinable()) - data->thread.join(); - - /// Rethrow exception to not swallow it in destructor. - if (data) - data->rethrowExceptionIfHas(); + /// Read all data and finish execution. + Chunk chunk; + while (pull(chunk)); } Chunk PullingPipelineExecutor::getTotals() { - return lazy_format->getTotals(); + return pulling_format->getTotals(); } Chunk PullingPipelineExecutor::getExtremes() { - return lazy_format->getExtremes(); + return pulling_format->getExtremes(); } Block PullingPipelineExecutor::getTotalsBlock() @@ -180,7 +97,7 @@ Block PullingPipelineExecutor::getTotalsBlock() if (totals.empty()) return {}; - const auto & header = lazy_format->getPort(IOutputFormat::PortKind::Totals).getHeader(); + const auto & header = pulling_format->getPort(IOutputFormat::PortKind::Totals).getHeader(); return header.cloneWithColumns(totals.detachColumns()); } @@ -191,13 +108,13 @@ Block PullingPipelineExecutor::getExtremesBlock() if (extremes.empty()) return {}; - const auto & header = lazy_format->getPort(IOutputFormat::PortKind::Extremes).getHeader(); + const auto & header = pulling_format->getPort(IOutputFormat::PortKind::Extremes).getHeader(); return header.cloneWithColumns(extremes.detachColumns()); } BlockStreamProfileInfo & PullingPipelineExecutor::getProfileInfo() { - return lazy_format->getProfileInfo(); + return pulling_format->getProfileInfo(); } } diff --git a/src/Processors/Executors/PullingPipelineExecutor.h b/src/Processors/Executors/PullingPipelineExecutor.h index 7a093fe0022..67ef4f7cf71 100644 --- a/src/Processors/Executors/PullingPipelineExecutor.h +++ b/src/Processors/Executors/PullingPipelineExecutor.h @@ -4,13 +4,16 @@ namespace DB { -class QueryPipeline; class Block; class Chunk; -class LazyOutputFormat; +class QueryPipeline; +class PipelineExecutor; +class PullingOutputFormat; struct BlockStreamProfileInfo; -/// Pulling executor for QueryPipeline. +using PipelineExecutorPtr = std::shared_ptr; + +/// Pulling executor for QueryPipeline. Always execute pipeline in single thread. /// Typical usage is: /// /// PullingPipelineExecutor executor(query_pipeline); @@ -26,10 +29,9 @@ public: const Block & getHeader() const; /// Methods return false if query is finished. - /// If milliseconds > 0, returns empty object and `true` after timeout exceeded. /// You can use any pull method. - bool pull(Chunk & chunk, uint64_t milliseconds = 0); - bool pull(Block & block, uint64_t milliseconds = 0); + bool pull(Chunk & chunk); + bool pull(Block & block); /// Stop execution. It is not necessary, but helps to stop execution before executor is destroyed. void cancel(); @@ -45,13 +47,11 @@ public: /// Get query profile info. BlockStreamProfileInfo & getProfileInfo(); - /// Internal executor data. - struct Data; - private: + std::atomic_bool has_data_flag = false; QueryPipeline & pipeline; - std::shared_ptr lazy_format; - std::unique_ptr data; + std::shared_ptr pulling_format; + PipelineExecutorPtr executor; }; } diff --git a/src/Processors/Formats/LazyOutputFormat.h b/src/Processors/Formats/LazyOutputFormat.h index 14d7a7f47d7..06ec116f3dd 100644 --- a/src/Processors/Formats/LazyOutputFormat.h +++ b/src/Processors/Formats/LazyOutputFormat.h @@ -9,7 +9,7 @@ namespace DB /// LazyOutputFormat is used to retrieve ready data from executing pipeline. /// You can periodically call `getChunk` from separate thread. -/// Used in PullingPipelineExecutor. +/// Used in PullingAsyncPipelineExecutor. class LazyOutputFormat : public IOutputFormat { diff --git a/src/Processors/Formats/PullingOutputFormat.cpp b/src/Processors/Formats/PullingOutputFormat.cpp new file mode 100644 index 00000000000..e137fee3dd1 --- /dev/null +++ b/src/Processors/Formats/PullingOutputFormat.cpp @@ -0,0 +1,39 @@ +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +void PullingOutputFormat::consume(Chunk chunk) +{ + if (data) + throw Exception("PullingOutputFormat cannot consume chunk because it already has data", + ErrorCodes::LOGICAL_ERROR); + + if (chunk) + info.update(chunk.getNumRows(), chunk.allocatedBytes()); + + data = std::move(chunk); + has_data_flag = true; +} + +Chunk PullingOutputFormat::getChunk() +{ + auto chunk = std::move(data); + has_data_flag = false; + return chunk; +} + +Chunk PullingOutputFormat::getTotals() { return std::move(totals); } +Chunk PullingOutputFormat::getExtremes() { return std::move(extremes); } + +void PullingOutputFormat::setRowsBeforeLimit(size_t rows_before_limit) +{ + info.setRowsBeforeLimit(rows_before_limit); +} + +} diff --git a/src/Processors/Formats/PullingOutputFormat.h b/src/Processors/Formats/PullingOutputFormat.h new file mode 100644 index 00000000000..0864b5a02ef --- /dev/null +++ b/src/Processors/Formats/PullingOutputFormat.h @@ -0,0 +1,45 @@ +#pragma once +#include +#include + +namespace DB +{ + +/// Output format which is used in PullingPipelineExecutor. +class PullingOutputFormat : public IOutputFormat +{ +public: + explicit PullingOutputFormat(const Block & header, std::atomic_bool & consume_data_flag_) + : IOutputFormat(header, out) + , has_data_flag(consume_data_flag_) + {} + + String getName() const override { return "PullingOutputFormat"; } + + Chunk getChunk(); + Chunk getTotals(); + Chunk getExtremes(); + + BlockStreamProfileInfo & getProfileInfo() { return info; } + + void setRowsBeforeLimit(size_t rows_before_limit) override; + +protected: + void consume(Chunk chunk) override; + void consumeTotals(Chunk chunk) override { totals = std::move(chunk); } + void consumeExtremes(Chunk chunk) override { extremes = std::move(chunk); } + +private: + Chunk data; + Chunk totals; + Chunk extremes; + + std::atomic_bool & has_data_flag; + + BlockStreamProfileInfo info; + + /// Is not used. + static WriteBuffer out; +}; + +} diff --git a/src/Processors/ya.make b/src/Processors/ya.make index 03a9b939d42..4412aa4748c 100644 --- a/src/Processors/ya.make +++ b/src/Processors/ya.make @@ -12,6 +12,7 @@ SRCS( DelayedPortsProcessor.cpp Executors/PipelineExecutingBlockInputStream.cpp Executors/PipelineExecutor.cpp + Executors/PullingAsyncPipelineExecutor.cpp Executors/PullingPipelineExecutor.cpp Executors/TreeExecutorBlockInputStream.cpp ForkProcessor.cpp From 1183bad4d066f1c03ec2af2388fd551bdf37e1b2 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 20 May 2020 22:01:36 +0300 Subject: [PATCH 062/120] Fix build. --- programs/server/TCPHandler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/programs/server/TCPHandler.cpp b/programs/server/TCPHandler.cpp index a792af30cf2..37195b21a86 100644 --- a/programs/server/TCPHandler.cpp +++ b/programs/server/TCPHandler.cpp @@ -28,7 +28,7 @@ #include #include -#include +#include #include "TCPHandler.h" @@ -560,7 +560,7 @@ void TCPHandler::processOrdinaryQueryWithProcessors() } { - PullingPipelineExecutor executor(pipeline); + PullingAsyncPipelineExecutor executor(pipeline); CurrentMetrics::Increment query_thread_metric_increment{CurrentMetrics::QueryThread}; Block block; From bcd38ac207b3a07f745bbfef4ef4883a99cbb9ac Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 20 May 2020 22:08:20 +0300 Subject: [PATCH 063/120] Fix build. --- src/Processors/Formats/PullingOutputFormat.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Processors/Formats/PullingOutputFormat.cpp b/src/Processors/Formats/PullingOutputFormat.cpp index e137fee3dd1..af237037a72 100644 --- a/src/Processors/Formats/PullingOutputFormat.cpp +++ b/src/Processors/Formats/PullingOutputFormat.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB { @@ -8,6 +9,8 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +WriteBuffer PullingOutputFormat::out(nullptr, 0); + void PullingOutputFormat::consume(Chunk chunk) { if (data) From ebef7b9a853f4e6d56a37ad022c896e5d9ed6d72 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 21 May 2020 00:01:18 +0300 Subject: [PATCH 064/120] [website] ld+json for the front page (#11077) * [website] ld+json for the front page * Update index.html * Update index.html --- website/index.html | 34 ++++++++++++++++++++++++++++++++++ website/templates/base.html | 1 + 2 files changed, 35 insertions(+) diff --git a/website/index.html b/website/index.html index e2ac6e31441..a1f85f0bbfb 100644 --- a/website/index.html +++ b/website/index.html @@ -4,6 +4,40 @@ {% extends "templates/base.html" %} +{% block extra_meta %} + +{% endblock %} + {% block content %} {% include "templates/index/nav.html" %} diff --git a/website/templates/base.html b/website/templates/base.html index 3a2977ac2b7..700f71d61ec 100644 --- a/website/templates/base.html +++ b/website/templates/base.html @@ -2,6 +2,7 @@ {% include "templates/common_meta.html" %} + {% block extra_meta %}{% endblock %} {% include "templates/common_css.html" %} From 7d0ac4e20fe2333a31936f696defa9d2c7f75f31 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 21 May 2020 00:08:04 +0300 Subject: [PATCH 065/120] trigger ci --- docs/en/sql-reference/syntax.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/en/sql-reference/syntax.md b/docs/en/sql-reference/syntax.md index 9ff5a388ad3..70db90b38be 100644 --- a/docs/en/sql-reference/syntax.md +++ b/docs/en/sql-reference/syntax.md @@ -28,9 +28,10 @@ There may be any number of space symbols between syntactical constructions (incl ## Comments {#comments} -ClickHouse supports either SQL-style and C-style comments. -SQL-style comments start with `--` and continue to the end of the line, a space after `--` can be omitted. -C-style are from `/*` to `*/`and can be multiline, spaces are not required either. +ClickHouse supports either SQL-style and C-style comments: + +- SQL-style comments start with `--` and continue to the end of the line, a space after `--` can be omitted. +- C-style are from `/*` to `*/`and can be multiline, spaces are not required either. ## Keywords {#syntax-keywords} From 58ffa4c59ce5e0dfc9793a708d5067feb126f0ad Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 21 May 2020 01:01:28 +0300 Subject: [PATCH 066/120] Enable percpu_arena:percpu for jemalloc This will reduce memory fragmentation due to thread pool. --- .../jemalloc/internal/jemalloc_internal_defs.h | 2 +- .../jemalloc/internal/jemalloc_internal_defs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h index 9c46a3a9320..e989020d651 100644 --- a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h +++ b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h @@ -369,7 +369,7 @@ /* #undef JEMALLOC_EXPORT */ /* config.malloc_conf options string. */ -#define JEMALLOC_CONFIG_MALLOC_CONF "" +#define JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu" /* If defined, jemalloc takes the malloc/free/etc. symbol names. */ #define JEMALLOC_IS_MALLOC 1 diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h index 43936e8eba0..596dffa8d26 100644 --- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h +++ b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h @@ -360,7 +360,7 @@ /* #undef JEMALLOC_EXPORT */ /* config.malloc_conf options string. */ -#define JEMALLOC_CONFIG_MALLOC_CONF "" +#define JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu" /* If defined, jemalloc takes the malloc/free/etc. symbol names. */ #define JEMALLOC_IS_MALLOC 1 From d93b9a57f65a8790d4c3a6e2affc0ffefd84feb8 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 20 May 2020 23:16:32 +0300 Subject: [PATCH 067/120] Forward declaration for Context as much as possible. Now after changing Context.h 488 modules will be recompiled instead of 582. --- programs/client/Suggest.h | 1 + programs/local/LocalServer.cpp | 1 - programs/local/LocalServer.h | 3 +- src/Client/Connection.h | 2 ++ .../AddingDefaultBlockOutputStream.h | 2 +- .../AddingDefaultsBlockInputStream.h | 3 +- .../CreatingSetsBlockInputStream.cpp | 1 + .../ParallelParsingBlockInputStream.h | 1 - .../PushingToViewsBlockOutputStream.cpp | 1 + src/DataStreams/RemoteBlockOutputStream.h | 1 + src/Databases/DatabaseAtomic.cpp | 1 + src/Databases/DatabaseLazy.h | 2 +- src/Databases/DatabaseMySQL.h | 4 ++- src/Databases/DatabaseOnDisk.h | 3 +- src/Databases/DatabaseWithDictionaries.h | 3 ++ src/Disks/DiskSelector.cpp | 1 + src/Disks/DiskSelector.h | 2 +- src/Functions/pointInPolygon.cpp | 1 + src/IO/ReadWriteBufferFromHTTP.h | 1 + src/Interpreters/ActionsVisitor.cpp | 1 + src/Interpreters/AsynchronousMetrics.cpp | 1 + .../ClusterProxy/SelectStreamFactory.h | 3 ++ src/Interpreters/DDLWorker.cpp | 1 + src/Interpreters/DDLWorker.h | 8 ++++- src/Interpreters/ExpressionActions.cpp | 28 ++++++++++++++++++ src/Interpreters/ExpressionActions.h | 29 ++++--------------- src/Interpreters/ExpressionAnalyzer.cpp | 1 + src/Interpreters/GlobalSubqueriesVisitor.h | 2 +- src/Interpreters/InterpreterExplainQuery.cpp | 1 + src/Interpreters/InterpreterExplainQuery.h | 3 +- src/Interpreters/InterpreterFactory.cpp | 1 + src/Interpreters/InterpreterInsertQuery.h | 3 +- src/Interpreters/InterpreterSelectQuery.h | 2 +- .../InterpreterSelectWithUnionQuery.cpp | 1 + .../InterpreterSelectWithUnionQuery.h | 3 +- src/Interpreters/InterpreterSetRoleQuery.h | 2 ++ src/Interpreters/InterpreterWatchQuery.cpp | 1 + src/Interpreters/InterpreterWatchQuery.h | 2 +- src/Interpreters/JoinedTables.cpp | 1 - src/Interpreters/JoinedTables.h | 3 +- src/Interpreters/MutationsInterpreter.h | 1 + src/Interpreters/PartLog.cpp | 1 + src/Interpreters/QueryLog.h | 1 + src/Interpreters/QueryThreadLog.h | 1 + src/Interpreters/Set.cpp | 1 + src/Interpreters/Set.h | 2 +- src/Interpreters/SystemLog.h | 7 ++++- src/Interpreters/executeQuery.cpp | 1 + src/Interpreters/getTableExpressions.cpp | 1 + src/Interpreters/interpretSubquery.cpp | 1 + .../Impl/ConstantExpressionTemplate.cpp | 3 +- src/Storages/Distributed/DirectoryMonitor.cpp | 16 +++++----- .../DistributedBlockOutputStream.cpp | 1 + .../DistributedBlockOutputStream.h | 2 +- src/Storages/MergeTree/IMergeTreeDataPart.h | 1 + src/Storages/MergeTree/KeyCondition.h | 4 +-- src/Storages/MergeTree/MergeList.h | 1 - src/Storages/MergeTree/MergeTreeData.cpp | 1 + src/Storages/MergeTree/MergeTreeData.h | 3 +- .../MergeTree/MergeTreeDataMergerMutator.cpp | 1 + .../MergeTree/MergeTreeDataPartWriterWide.cpp | 1 + .../MergeTree/MergeTreeDataSelectExecutor.cpp | 1 + .../MergeTree/MergeTreeDataWriter.cpp | 1 + src/Storages/MergeTree/MergeTreeDataWriter.h | 1 - .../MergeTreeReverseSelectProcessor.cpp | 1 + .../MergeTree/MergeTreeSelectProcessor.cpp | 1 + .../MergeTree/MergeTreeSequentialSource.cpp | 1 + ...rgeTreeThreadSelectBlockInputProcessor.cpp | 1 + .../MergeTree/MergedBlockOutputStream.cpp | 1 + .../MergedColumnOnlyOutputStream.cpp | 1 + src/Storages/MergeTree/RPNBuilder.h | 2 +- .../ReplicatedMergeTreeCleanupThread.cpp | 1 + .../ReplicatedMergeTreePartCheckThread.cpp | 1 + .../ReplicatedMergeTreeRestartingThread.cpp | 1 + .../MergeTree/registerStorageMergeTree.cpp | 2 ++ src/Storages/StorageBuffer.h | 3 +- src/Storages/StorageDistributed.cpp | 19 ++++++------ src/Storages/StorageDistributed.h | 2 +- src/Storages/StorageMergeTree.cpp | 5 ++-- src/Storages/StorageReplicatedMergeTree.cpp | 1 + src/Storages/StorageSet.cpp | 1 + src/Storages/StorageView.cpp | 1 + src/Storages/System/StorageSystemColumns.cpp | 1 + src/Storages/System/StorageSystemDisks.cpp | 1 + .../System/StorageSystemMutations.cpp | 1 + .../System/StorageSystemPartsBase.cpp | 1 + .../System/StorageSystemStoragePolicies.cpp | 1 + src/TableFunctions/ITableFunctionFileLike.h | 2 +- src/TableFunctions/TableFunctionFile.cpp | 1 + src/TableFunctions/TableFunctionFile.h | 2 +- src/TableFunctions/TableFunctionHDFS.h | 5 +++- src/TableFunctions/TableFunctionInput.cpp | 1 - src/TableFunctions/TableFunctionInput.h | 5 +++- src/TableFunctions/TableFunctionURL.h | 5 +++- 94 files changed, 175 insertions(+), 81 deletions(-) diff --git a/programs/client/Suggest.h b/programs/client/Suggest.h index 6c81a388ea7..b13289ac322 100644 --- a/programs/client/Suggest.h +++ b/programs/client/Suggest.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 06790038a5e..eae30fddfee 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include diff --git a/programs/local/LocalServer.h b/programs/local/LocalServer.h index 77d0b9ff5dc..5733bbc1a7c 100644 --- a/programs/local/LocalServer.h +++ b/programs/local/LocalServer.h @@ -4,13 +4,12 @@ #include #include #include +#include namespace DB { -class Context; - /// Lightweight Application for clickhouse-local /// No networking, no extra configs and working directories, no pid and status files, no dictionaries, no logging. /// Quiet mode by default diff --git a/src/Client/Connection.h b/src/Client/Connection.h index de04e3f0ef4..e056a4323df 100644 --- a/src/Client/Connection.h +++ b/src/Client/Connection.h @@ -50,6 +50,8 @@ class Connection; using ConnectionPtr = std::shared_ptr; using Connections = std::vector; +using Scalars = std::map; + /// Packet that could be received from server. struct Packet diff --git a/src/DataStreams/AddingDefaultBlockOutputStream.h b/src/DataStreams/AddingDefaultBlockOutputStream.h index e04fdc0faeb..89134920730 100644 --- a/src/DataStreams/AddingDefaultBlockOutputStream.h +++ b/src/DataStreams/AddingDefaultBlockOutputStream.h @@ -3,12 +3,12 @@ #include #include #include -#include namespace DB { +class Context; /** This stream adds three types of columns into block * 1. Columns, that are missed inside request, but present in table without defaults (missed columns) diff --git a/src/DataStreams/AddingDefaultsBlockInputStream.h b/src/DataStreams/AddingDefaultsBlockInputStream.h index cf16ec3fb75..10fa20f7ad1 100644 --- a/src/DataStreams/AddingDefaultsBlockInputStream.h +++ b/src/DataStreams/AddingDefaultsBlockInputStream.h @@ -2,12 +2,13 @@ #include #include -#include namespace DB { +class Context; + /// Adds defaults to columns using BlockDelayedDefaults bitmask attached to Block by child InputStream. class AddingDefaultsBlockInputStream : public IBlockInputStream { diff --git a/src/DataStreams/CreatingSetsBlockInputStream.cpp b/src/DataStreams/CreatingSetsBlockInputStream.cpp index 1a67031df5d..be89e0e87b0 100644 --- a/src/DataStreams/CreatingSetsBlockInputStream.cpp +++ b/src/DataStreams/CreatingSetsBlockInputStream.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include diff --git a/src/DataStreams/ParallelParsingBlockInputStream.h b/src/DataStreams/ParallelParsingBlockInputStream.h index 03f0d508227..a904c686e47 100644 --- a/src/DataStreams/ParallelParsingBlockInputStream.h +++ b/src/DataStreams/ParallelParsingBlockInputStream.h @@ -8,7 +8,6 @@ #include #include #include -#include namespace DB { diff --git a/src/DataStreams/PushingToViewsBlockOutputStream.cpp b/src/DataStreams/PushingToViewsBlockOutputStream.cpp index ce0922bf282..159199b2828 100644 --- a/src/DataStreams/PushingToViewsBlockOutputStream.cpp +++ b/src/DataStreams/PushingToViewsBlockOutputStream.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/src/DataStreams/RemoteBlockOutputStream.h b/src/DataStreams/RemoteBlockOutputStream.h index 40387180997..2c89a7358ad 100644 --- a/src/DataStreams/RemoteBlockOutputStream.h +++ b/src/DataStreams/RemoteBlockOutputStream.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index 2894b65274f..27a2441cec6 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include diff --git a/src/Databases/DatabaseLazy.h b/src/Databases/DatabaseLazy.h index 77ad1a3ab20..4306e61b37b 100644 --- a/src/Databases/DatabaseLazy.h +++ b/src/Databases/DatabaseLazy.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include @@ -10,6 +9,7 @@ namespace DB class DatabaseLazyIterator; +class Context; /** Lazy engine of databases. * Works like DatabaseOrdinary, but stores in memory only cache. diff --git a/src/Databases/DatabaseMySQL.h b/src/Databases/DatabaseMySQL.h index d729c62a8bb..a43da5d1762 100644 --- a/src/Databases/DatabaseMySQL.h +++ b/src/Databases/DatabaseMySQL.h @@ -5,14 +5,16 @@ #include #include -#include #include #include +#include namespace DB { +class Context; + /** Real-time access to table list and table structure from remote MySQL * It doesn't make any manipulations with filesystem. * All tables are created by calling code after real-time pull-out structure from remote MySQL diff --git a/src/Databases/DatabaseOnDisk.h b/src/Databases/DatabaseOnDisk.h index a07a143ea92..75609e231af 100644 --- a/src/Databases/DatabaseOnDisk.h +++ b/src/Databases/DatabaseOnDisk.h @@ -3,7 +3,6 @@ #include #include #include -#include #include #include @@ -11,6 +10,8 @@ namespace DB { +class Context; + std::pair createTableFromAST( ASTCreateQuery ast_create_query, const String & database_name, diff --git a/src/Databases/DatabaseWithDictionaries.h b/src/Databases/DatabaseWithDictionaries.h index a02b68a56da..eb9e105e31d 100644 --- a/src/Databases/DatabaseWithDictionaries.h +++ b/src/Databases/DatabaseWithDictionaries.h @@ -5,6 +5,9 @@ namespace DB { +class Context; +class ExternalDictionariesLoader; + class DatabaseWithDictionaries : public DatabaseOnDisk { diff --git a/src/Disks/DiskSelector.cpp b/src/Disks/DiskSelector.cpp index 0ae8763eef3..69549e4520d 100644 --- a/src/Disks/DiskSelector.cpp +++ b/src/Disks/DiskSelector.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include diff --git a/src/Disks/DiskSelector.h b/src/Disks/DiskSelector.h index 85b67f55d0a..8ae8de3be61 100644 --- a/src/Disks/DiskSelector.h +++ b/src/Disks/DiskSelector.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include @@ -10,6 +9,7 @@ namespace DB { +class Context; class DiskSelector; using DiskSelectorPtr = std::shared_ptr; diff --git a/src/Functions/pointInPolygon.cpp b/src/Functions/pointInPolygon.cpp index 460c60d6e4c..2dbfd6f6641 100644 --- a/src/Functions/pointInPolygon.cpp +++ b/src/Functions/pointInPolygon.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index 2d4e3f0472e..4b4e4453360 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 3fa53e3e694..7c2133e629f 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -32,6 +32,7 @@ #include #include +#include #include #include #include diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index 7ba3886afb3..59ff01bf972 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.h b/src/Interpreters/ClusterProxy/SelectStreamFactory.h index daf4dd48b4d..da1d18b6dd9 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.h +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.h @@ -2,11 +2,14 @@ #include #include +#include #include namespace DB { +using Scalars = std::map; + namespace ClusterProxy { diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index fd8ca66e85c..cb499577272 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h index 62eba97032e..2f63d9dadee 100644 --- a/src/Interpreters/DDLWorker.h +++ b/src/Interpreters/DDLWorker.h @@ -1,5 +1,5 @@ #pragma once -#include + #include #include #include @@ -13,9 +13,15 @@ #include #include +namespace zkutil +{ + class ZooKeeper; +} + namespace DB { +class Context; class ASTAlterQuery; class AccessRightsElements; struct DDLLogEntry; diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index cb65ec35b9e..363d4765019 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -509,6 +510,33 @@ std::string ExpressionAction::toString() const return ss.str(); } +ExpressionActions::ExpressionActions(const NamesAndTypesList & input_columns_, const Context & context_) + : input_columns(input_columns_), settings(context_.getSettingsRef()) +{ + for (const auto & input_elem : input_columns) + sample_block.insert(ColumnWithTypeAndName(nullptr, input_elem.type, input_elem.name)); + +#if USE_EMBEDDED_COMPILER +compilation_cache = context_.getCompiledExpressionCache(); +#endif +} + +/// For constant columns the columns themselves can be contained in `input_columns_`. +ExpressionActions::ExpressionActions(const ColumnsWithTypeAndName & input_columns_, const Context & context_) + : settings(context_.getSettingsRef()) +{ + for (const auto & input_elem : input_columns_) + { + input_columns.emplace_back(input_elem.name, input_elem.type); + sample_block.insert(input_elem); + } +#if USE_EMBEDDED_COMPILER + compilation_cache = context_.getCompiledExpressionCache(); +#endif +} + +ExpressionActions::~ExpressionActions() = default; + void ExpressionActions::checkLimits(Block & block) const { if (settings.max_temporary_columns && block.columns() > settings.max_temporary_columns) diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index 5a29eaaab9e..080e8f8a10f 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -25,6 +24,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +class Context; class TableJoin; class IJoin; using JoinPtr = std::shared_ptr; @@ -42,6 +42,7 @@ class IDataType; using DataTypePtr = std::shared_ptr; class ExpressionActions; +class CompiledExpressionCache; /** Action on the block. */ @@ -155,30 +156,12 @@ class ExpressionActions public: using Actions = std::vector; - ExpressionActions(const NamesAndTypesList & input_columns_, const Context & context_) - : input_columns(input_columns_), settings(context_.getSettingsRef()) - { - for (const auto & input_elem : input_columns) - sample_block.insert(ColumnWithTypeAndName(nullptr, input_elem.type, input_elem.name)); - -#if USE_EMBEDDED_COMPILER - compilation_cache = context_.getCompiledExpressionCache(); -#endif - } + ExpressionActions(const NamesAndTypesList & input_columns_, const Context & context_); /// For constant columns the columns themselves can be contained in `input_columns_`. - ExpressionActions(const ColumnsWithTypeAndName & input_columns_, const Context & context_) - : settings(context_.getSettingsRef()) - { - for (const auto & input_elem : input_columns_) - { - input_columns.emplace_back(input_elem.name, input_elem.type); - sample_block.insert(input_elem); - } -#if USE_EMBEDDED_COMPILER - compilation_cache = context_.getCompiledExpressionCache(); -#endif - } + ExpressionActions(const ColumnsWithTypeAndName & input_columns_, const Context & context_); + + ~ExpressionActions(); /// Add the input column. /// The name of the column must not match the names of the intermediate columns that occur when evaluating the expression. diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 8bff2816df1..3341855b8c6 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include #include diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index 78d98805814..37a358c3d28 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -17,6 +16,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 841635b8d01..1c1e21fc32c 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Interpreters/InterpreterExplainQuery.h b/src/Interpreters/InterpreterExplainQuery.h index 058c51737b0..fbc8a998f2c 100644 --- a/src/Interpreters/InterpreterExplainQuery.h +++ b/src/Interpreters/InterpreterExplainQuery.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include @@ -8,6 +7,8 @@ namespace DB { +class Context; + /// Returns single row with explain results class InterpreterExplainQuery : public IInterpreter { diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp index 60302848367..ccaa8fa4067 100644 --- a/src/Interpreters/InterpreterFactory.cpp +++ b/src/Interpreters/InterpreterFactory.cpp @@ -61,6 +61,7 @@ #include #include #include +#include #include diff --git a/src/Interpreters/InterpreterInsertQuery.h b/src/Interpreters/InterpreterInsertQuery.h index 476e86898d7..fef962d24a3 100644 --- a/src/Interpreters/InterpreterInsertQuery.h +++ b/src/Interpreters/InterpreterInsertQuery.h @@ -2,13 +2,14 @@ #include #include -#include #include #include namespace DB { +class Context; + /** Interprets the INSERT query. */ diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h index 87a95c5b836..b97ff65e988 100644 --- a/src/Interpreters/InterpreterSelectQuery.h +++ b/src/Interpreters/InterpreterSelectQuery.h @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include @@ -25,6 +24,7 @@ namespace DB struct SubqueryForSet; class InterpreterSelectWithUnionQuery; +class Context; struct SyntaxAnalyzerResult; using SyntaxAnalyzerResultPtr = std::shared_ptr; diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index 9cdb19b1934..378cb943c04 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.h b/src/Interpreters/InterpreterSelectWithUnionQuery.h index ad78572ab77..c7a8e09578b 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.h +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.h @@ -1,15 +1,16 @@ #pragma once #include -#include #include #include +#include #include namespace DB { +class Context; class InterpreterSelectQuery; diff --git a/src/Interpreters/InterpreterSetRoleQuery.h b/src/Interpreters/InterpreterSetRoleQuery.h index afb53014c87..91cf5fc1b2e 100644 --- a/src/Interpreters/InterpreterSetRoleQuery.h +++ b/src/Interpreters/InterpreterSetRoleQuery.h @@ -6,6 +6,8 @@ namespace DB { + +class Context; class ASTSetRoleQuery; struct ExtendedRoleSet; struct User; diff --git a/src/Interpreters/InterpreterWatchQuery.cpp b/src/Interpreters/InterpreterWatchQuery.cpp index 91e7bfac2a3..dc1ae6a7cad 100644 --- a/src/Interpreters/InterpreterWatchQuery.cpp +++ b/src/Interpreters/InterpreterWatchQuery.cpp @@ -13,6 +13,7 @@ limitations under the License. */ #include #include #include +#include #include #include #include diff --git a/src/Interpreters/InterpreterWatchQuery.h b/src/Interpreters/InterpreterWatchQuery.h index 7929b86b1c8..a0dacd08ea8 100644 --- a/src/Interpreters/InterpreterWatchQuery.h +++ b/src/Interpreters/InterpreterWatchQuery.h @@ -18,11 +18,11 @@ limitations under the License. */ #include #include #include -#include namespace DB { +class Context; class IAST; using ASTPtr = std::shared_ptr; using StoragePtr = std::shared_ptr; diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp index b2591d94310..67363737670 100644 --- a/src/Interpreters/JoinedTables.cpp +++ b/src/Interpreters/JoinedTables.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include #include diff --git a/src/Interpreters/JoinedTables.h b/src/Interpreters/JoinedTables.h index 399acdc0768..55244e1225c 100644 --- a/src/Interpreters/JoinedTables.h +++ b/src/Interpreters/JoinedTables.h @@ -3,13 +3,14 @@ #include #include #include +#include +#include #include namespace DB { class ASTSelectQuery; -class Context; class TableJoin; struct SelectQueryOptions; diff --git a/src/Interpreters/MutationsInterpreter.h b/src/Interpreters/MutationsInterpreter.h index 9b12ce79e91..35c4f8ece0a 100644 --- a/src/Interpreters/MutationsInterpreter.h +++ b/src/Interpreters/MutationsInterpreter.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp index 7e04c5f6abb..ab22e73a0ca 100644 --- a/src/Interpreters/PartLog.cpp +++ b/src/Interpreters/PartLog.cpp @@ -10,6 +10,7 @@ #include #include #include +#include namespace DB diff --git a/src/Interpreters/QueryLog.h b/src/Interpreters/QueryLog.h index ec14f5e97fb..e503b417638 100644 --- a/src/Interpreters/QueryLog.h +++ b/src/Interpreters/QueryLog.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include diff --git a/src/Interpreters/QueryThreadLog.h b/src/Interpreters/QueryThreadLog.h index c50daa7bc88..e2c082eb54c 100644 --- a/src/Interpreters/QueryThreadLog.h +++ b/src/Interpreters/QueryThreadLog.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace ProfileEvents diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp index afc95e11fd8..6f3031d5e7d 100644 --- a/src/Interpreters/Set.cpp +++ b/src/Interpreters/Set.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include diff --git a/src/Interpreters/Set.h b/src/Interpreters/Set.h index 0f84c2f5da8..848abc9aa8a 100644 --- a/src/Interpreters/Set.h +++ b/src/Interpreters/Set.h @@ -5,7 +5,6 @@ #include #include #include -#include #include #include @@ -17,6 +16,7 @@ namespace DB struct Range; +class Context; class IFunctionBase; using FunctionBasePtr = std::shared_ptr; diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index b2a4eec7883..218555bebbc 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -65,6 +64,12 @@ namespace ErrorCodes class Context; +class QueryLog; +class QueryThreadLog; +class PartLog; +class TextLog; +class TraceLog; +class MetricLog; class ISystemLog diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 4d609395c3a..8b3dc709ab2 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -31,6 +31,7 @@ #include #include #include +#include #include #include diff --git a/src/Interpreters/getTableExpressions.cpp b/src/Interpreters/getTableExpressions.cpp index 68f18c1397a..b5444f73b35 100644 --- a/src/Interpreters/getTableExpressions.cpp +++ b/src/Interpreters/getTableExpressions.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include diff --git a/src/Interpreters/interpretSubquery.cpp b/src/Interpreters/interpretSubquery.cpp index 0521ab7fd8b..e108db1af30 100644 --- a/src/Interpreters/interpretSubquery.cpp +++ b/src/Interpreters/interpretSubquery.cpp @@ -12,6 +12,7 @@ #include #include +#include namespace DB { diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index 7254cf91245..24fff4203bb 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include #include #include @@ -17,7 +19,6 @@ #include #include #include -#include #include diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp index d90a7974968..2947b47eefa 100644 --- a/src/Storages/Distributed/DirectoryMonitor.cpp +++ b/src/Storages/Distributed/DirectoryMonitor.cpp @@ -81,13 +81,13 @@ StorageDistributedDirectoryMonitor::StorageDistributedDirectoryMonitor( : storage(storage_) , pool(std::move(pool_)) , path{path_ + '/'} - , should_batch_inserts(storage.global_context.getSettingsRef().distributed_directory_monitor_batch_inserts) - , min_batched_block_size_rows(storage.global_context.getSettingsRef().min_insert_block_size_rows) - , min_batched_block_size_bytes(storage.global_context.getSettingsRef().min_insert_block_size_bytes) + , should_batch_inserts(storage.global_context->getSettingsRef().distributed_directory_monitor_batch_inserts) + , min_batched_block_size_rows(storage.global_context->getSettingsRef().min_insert_block_size_rows) + , min_batched_block_size_bytes(storage.global_context->getSettingsRef().min_insert_block_size_bytes) , current_batch_file_path{path + "current_batch.txt"} - , default_sleep_time{storage.global_context.getSettingsRef().distributed_directory_monitor_sleep_time_ms.totalMilliseconds()} + , default_sleep_time{storage.global_context->getSettingsRef().distributed_directory_monitor_sleep_time_ms.totalMilliseconds()} , sleep_time{default_sleep_time} - , max_sleep_time{storage.global_context.getSettingsRef().distributed_directory_monitor_max_sleep_time_ms.totalMilliseconds()} + , max_sleep_time{storage.global_context->getSettingsRef().distributed_directory_monitor_max_sleep_time_ms.totalMilliseconds()} , log{&Logger::get(getLoggerName())} , monitor_blocker(monitor_blocker_) , bg_pool(bg_pool_) @@ -214,7 +214,7 @@ ConnectionPoolPtr StorageDistributedDirectoryMonitor::createPool(const std::stri auto pools = createPoolsForAddresses(name, pool_factory); - const auto settings = storage.global_context.getSettings(); + const auto settings = storage.global_context->getSettings(); return pools.size() == 1 ? pools.front() : std::make_shared(pools, settings.load_balancing, settings.distributed_replica_error_half_life.totalSeconds(), @@ -262,7 +262,7 @@ bool StorageDistributedDirectoryMonitor::processFiles() void StorageDistributedDirectoryMonitor::processFile(const std::string & file_path) { LOG_TRACE(log, "Started processing `" << file_path << '`'); - auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(storage.global_context.getSettingsRef()); + auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(storage.global_context->getSettingsRef()); auto connection = pool->get(timeouts); try @@ -437,7 +437,7 @@ struct StorageDistributedDirectoryMonitor::Batch Poco::File{tmp_file}.renameTo(parent.current_batch_file_path); } - auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(parent.storage.global_context.getSettingsRef()); + auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(parent.storage.global_context->getSettingsRef()); auto connection = parent.pool->get(timeouts); bool batch_broken = false; diff --git a/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/src/Storages/Distributed/DistributedBlockOutputStream.cpp index c7b0436a9a3..e08c4b7fd34 100644 --- a/src/Storages/Distributed/DistributedBlockOutputStream.cpp +++ b/src/Storages/Distributed/DistributedBlockOutputStream.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/src/Storages/Distributed/DistributedBlockOutputStream.h b/src/Storages/Distributed/DistributedBlockOutputStream.h index 319664e1723..17db955431c 100644 --- a/src/Storages/Distributed/DistributedBlockOutputStream.h +++ b/src/Storages/Distributed/DistributedBlockOutputStream.h @@ -11,7 +11,6 @@ #include #include #include -#include namespace Poco @@ -22,6 +21,7 @@ namespace Poco namespace DB { +class Context; class StorageDistributed; /** If insert_sync_ is true, the write is synchronous. Uses insert_timeout_ if it is not zero. diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index f0ae8b40c5b..bed57dfe68d 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -36,6 +36,7 @@ using VolumePtr = std::shared_ptr; class IMergeTreeReader; class IMergeTreeDataPartWriter; +class MarkCache; namespace ErrorCodes { diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index 3a3768f0e4c..f12acdbf7bf 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -3,7 +3,6 @@ #include #include -#include #include #include #include @@ -15,10 +14,9 @@ namespace DB { - +class Context; class IFunction; using FunctionBasePtr = std::shared_ptr; - class ExpressionActions; using ExpressionActionsPtr = std::shared_ptr; diff --git a/src/Storages/MergeTree/MergeList.h b/src/Storages/MergeTree/MergeList.h index 107b03a018a..4ee8a75a868 100644 --- a/src/Storages/MergeTree/MergeList.h +++ b/src/Storages/MergeTree/MergeList.h @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index de9d3f6e981..d9ca89d721b 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 394260096a7..2868ecaf290 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include #include @@ -34,6 +34,7 @@ class MergeListEntry; class AlterCommands; class MergeTreePartsMover; class MutationCommands; +class Context; class ExpressionActions; using ExpressionActionsPtr = std::shared_ptr; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 625dc9f367a..00b474a7792 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 4cdf57a4700..1ab10b55409 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB { diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 662fb067e48..92f4f6107b8 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -18,6 +18,7 @@ #include #include #include +#include /// Allow to use __uint128_t as a template parameter for boost::rational. // https://stackoverflow.com/questions/41198673/uint128-t-not-working-with-clang-and-libstdc diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index a78e2e5ae32..75d6ebe7cb7 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.h b/src/Storages/MergeTree/MergeTreeDataWriter.h index c2878145a50..e199aa3b43a 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.h +++ b/src/Storages/MergeTree/MergeTreeDataWriter.h @@ -9,7 +9,6 @@ #include #include -#include #include diff --git a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp index 265dba0e6fe..cc1f713a57e 100644 --- a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace DB diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index def01b192d5..9af72e60123 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace DB diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index cfda7fdf562..755ff87eab5 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace DB { diff --git a/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp b/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp index aa8c550839d..0b09fad91d1 100644 --- a/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace DB diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index e0760e87d00..bd1312f0c59 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -1,4 +1,5 @@ #include +#include #include diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp index 892b4eccfbc..1a99636534b 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB { diff --git a/src/Storages/MergeTree/RPNBuilder.h b/src/Storages/MergeTree/RPNBuilder.h index 2e457147cf4..ed25252642b 100644 --- a/src/Storages/MergeTree/RPNBuilder.h +++ b/src/Storages/MergeTree/RPNBuilder.h @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include @@ -13,6 +12,7 @@ namespace DB { +class Context; /// Builds reverse polish notation template diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index b1164f6621c..c36879750a1 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index 98b30498839..9d055545457 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -2,6 +2,7 @@ #include #include #include +#include namespace ProfileEvents diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index cd82a865827..75f15556edf 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 721d58539ef..40cc8edca74 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -17,6 +17,8 @@ #include #include +#include + namespace DB { diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h index f8fd1d2eaaf..6d6c1f66569 100644 --- a/src/Storages/StorageBuffer.h +++ b/src/Storages/StorageBuffer.h @@ -8,6 +8,7 @@ #include #include #include +#include namespace Poco { class Logger; } @@ -16,8 +17,6 @@ namespace Poco { class Logger; } namespace DB { -class Context; - /** During insertion, buffers the data in the RAM until certain thresholds are exceeded. * When thresholds are exceeded, flushes the data to another table. diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index eb75f53ef9c..10870993dd8 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -276,8 +277,8 @@ StorageDistributed::StorageDistributed( : IStorage(id_) , remote_database(remote_database_) , remote_table(remote_table_) - , global_context(context_) - , cluster_name(global_context.getMacros()->expand(cluster_name_)) + , global_context(std::make_unique(context_)) + , cluster_name(global_context->getMacros()->expand(cluster_name_)) , has_sharding_key(sharding_key_) , storage_policy(storage_policy_) , relative_data_path(relative_data_path_) @@ -287,7 +288,7 @@ StorageDistributed::StorageDistributed( if (sharding_key_) { - sharding_key_expr = buildShardingKeyExpression(sharding_key_, global_context, getColumns().getAllPhysical(), false); + sharding_key_expr = buildShardingKeyExpression(sharding_key_, *global_context, getColumns().getAllPhysical(), false); sharding_key_column_name = sharding_key_->getColumnName(); } @@ -297,7 +298,7 @@ StorageDistributed::StorageDistributed( /// Sanity check. Skip check if the table is already created to allow the server to start. if (!attach_ && !cluster_name.empty()) { - size_t num_local_shards = global_context.getCluster(cluster_name)->getLocalShardCount(); + size_t num_local_shards = global_context->getCluster(cluster_name)->getLocalShardCount(); if (num_local_shards && remote_database == id_.database_name && remote_table == id_.table_name) throw Exception("Distributed table " + id_.table_name + " looks at itself", ErrorCodes::INFINITE_LOOP); } @@ -325,7 +326,7 @@ void StorageDistributed::createStorage() /// Create default policy with the relative_data_path_ if (storage_policy.empty()) { - std::string path(global_context.getPath()); + std::string path(global_context->getPath()); /// Disk must ends with '/' if (!path.ends_with('/')) path += '/'; @@ -334,7 +335,7 @@ void StorageDistributed::createStorage() } else { - auto policy = global_context.getStoragePolicySelector()->get(storage_policy); + auto policy = global_context->getStoragePolicySelector()->get(storage_policy); if (policy->getVolumes().size() != 1) throw Exception("Policy for Distributed table, should have exactly one volume", ErrorCodes::BAD_ARGUMENTS); volume = policy->getVolume(0); @@ -628,7 +629,7 @@ StoragePolicyPtr StorageDistributed::getStoragePolicy() const { if (storage_policy.empty()) return {}; - return global_context.getStoragePolicySelector()->get(storage_policy); + return global_context->getStoragePolicySelector()->get(storage_policy); } void StorageDistributed::createDirectoryMonitors(const std::string & disk) @@ -655,7 +656,7 @@ StorageDistributedDirectoryMonitor& StorageDistributed::requireDirectoryMonitor( { node_data.conneciton_pool = StorageDistributedDirectoryMonitor::createPool(name, *this); node_data.directory_monitor = std::make_unique( - *this, path, node_data.conneciton_pool, monitors_blocker, global_context.getDistributedSchedulePool()); + *this, path, node_data.conneciton_pool, monitors_blocker, global_context->getDistributedSchedulePool()); } return *node_data.directory_monitor; } @@ -672,7 +673,7 @@ std::pair StorageDistributed::getPath( ClusterPtr StorageDistributed::getCluster() const { - return owned_cluster ? owned_cluster : global_context.getCluster(cluster_name); + return owned_cluster ? owned_cluster : global_context->getCluster(cluster_name); } ClusterPtr StorageDistributed::getOptimizedCluster(const Context & context, const ASTPtr & query_ptr) const diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index 125e1dee1e6..c934b3870f4 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -127,7 +127,7 @@ public: String remote_table; ASTPtr remote_table_function_ptr; - Context global_context; + std::unique_ptr global_context; Logger * log = &Logger::get("StorageDistributed"); /// Used to implement TableFunctionRemote. diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index e22e81d5041..a25b4d3beba 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include #include #include @@ -21,9 +23,8 @@ #include #include #include -#include -#include #include +#include namespace DB diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index e6fc32ecbf9..a553fbbe970 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -44,6 +44,7 @@ #include #include +#include #include #include diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 613f5b6a2ed..9a3a2b0ac94 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 287e93fc99a..636c7f9d64d 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 283c7f2c4a5..ab824fc8bdc 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -12,6 +12,7 @@ #include #include #include +#include namespace DB diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index 5ddf7de9ec8..c7d134645dc 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Storages/System/StorageSystemMutations.cpp b/src/Storages/System/StorageSystemMutations.cpp index 968c67bc14a..d4a262860dc 100644 --- a/src/Storages/System/StorageSystemMutations.cpp +++ b/src/Storages/System/StorageSystemMutations.cpp @@ -9,6 +9,7 @@ #include #include #include +#include namespace DB diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index 0fce41d8398..6356e6d699e 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -14,6 +14,7 @@ #include #include #include +#include namespace DB diff --git a/src/Storages/System/StorageSystemStoragePolicies.cpp b/src/Storages/System/StorageSystemStoragePolicies.cpp index 81b6ddd465a..fdfdc277fd2 100644 --- a/src/Storages/System/StorageSystemStoragePolicies.cpp +++ b/src/Storages/System/StorageSystemStoragePolicies.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB diff --git a/src/TableFunctions/ITableFunctionFileLike.h b/src/TableFunctions/ITableFunctionFileLike.h index a18ca8ea4c8..55df9a7484d 100644 --- a/src/TableFunctions/ITableFunctionFileLike.h +++ b/src/TableFunctions/ITableFunctionFileLike.h @@ -1,11 +1,11 @@ #pragma once #include -#include namespace DB { class ColumnsDescription; +class Context; /* * function(source, format, structure) - creates a temporary storage from formated source diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index 0a68ed59aa2..4f67f4cfd10 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include "registerTableFunctions.h" namespace DB diff --git a/src/TableFunctions/TableFunctionFile.h b/src/TableFunctions/TableFunctionFile.h index ead924f6828..e0d8c20ac61 100644 --- a/src/TableFunctions/TableFunctionFile.h +++ b/src/TableFunctions/TableFunctionFile.h @@ -1,11 +1,11 @@ #pragma once #include -#include namespace DB { +class Context; /* file(path, format, structure) - creates a temporary storage from file * * diff --git a/src/TableFunctions/TableFunctionHDFS.h b/src/TableFunctions/TableFunctionHDFS.h index 443ce0aa93b..5e8503b318e 100644 --- a/src/TableFunctions/TableFunctionHDFS.h +++ b/src/TableFunctions/TableFunctionHDFS.h @@ -5,11 +5,13 @@ #if USE_HDFS #include -#include namespace DB { + +class Context; + /* hdfs(name_node_ip:name_node_port, format, structure) - creates a temporary storage from hdfs file * */ @@ -27,6 +29,7 @@ private: const String & source, const String & format, const ColumnsDescription & columns, Context & global_context, const std::string & table_name, const String & compression_method) const override; const char * getStorageTypeName() const override { return "HDFS"; } }; + } #endif diff --git a/src/TableFunctions/TableFunctionInput.cpp b/src/TableFunctions/TableFunctionInput.cpp index e8f3453da06..5b4a3d97ee4 100644 --- a/src/TableFunctions/TableFunctionInput.cpp +++ b/src/TableFunctions/TableFunctionInput.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include "registerTableFunctions.h" diff --git a/src/TableFunctions/TableFunctionInput.h b/src/TableFunctions/TableFunctionInput.h index 92c2e3a6e54..a2791533c5d 100644 --- a/src/TableFunctions/TableFunctionInput.h +++ b/src/TableFunctions/TableFunctionInput.h @@ -1,11 +1,13 @@ #pragma once #include -#include namespace DB { + +class Context; + /* input(structure) - allows to make INSERT SELECT from incoming stream of data */ class TableFunctionInput : public ITableFunction @@ -18,4 +20,5 @@ private: StoragePtr executeImpl(const ASTPtr & ast_function, const Context & context, const std::string & table_name) const override; const char * getStorageTypeName() const override { return "Input"; } }; + } diff --git a/src/TableFunctions/TableFunctionURL.h b/src/TableFunctions/TableFunctionURL.h index 61dca561f0c..366d573bcf4 100644 --- a/src/TableFunctions/TableFunctionURL.h +++ b/src/TableFunctions/TableFunctionURL.h @@ -1,11 +1,13 @@ #pragma once #include -#include namespace DB { + +class Context; + /* url(source, format, structure) - creates a temporary storage from url */ class TableFunctionURL : public ITableFunctionFileLike @@ -22,4 +24,5 @@ private: const String & source, const String & format, const ColumnsDescription & columns, Context & global_context, const std::string & table_name, const String & compression_method) const override; const char * getStorageTypeName() const override { return "URL"; } }; + } From 09474ebea51996c668e1a5ebcdd4a3f89e284de1 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 18 May 2020 21:13:56 +0300 Subject: [PATCH 068/120] Fix memory tracking for two-level GROUP BY when not all rows read from Aggregator (TCP) Example of such cases: - SELECT GROUP BY LIMIT - SELECT GROUP BY with subsequent MEMORY_LIMIT_EXCEEDED error And it should be two-level aggregation, since otherwise there will be only one hashtable which will be cleared correctly, only if you have two-level GROUP BY some of hashtables will not be cleared since nobody consume rows. Before this patch: 09:39.015292 [ 3070801 ] {609a0610-e377-4132-9cf3-f49454cf3c96} executeQuery: Read 1000000 rows, 7.63 MiB in 0.707 sec., 1413826 rows/sec., 10.79 MiB/sec. 09:39.015348 [ 3070801 ] {609a0610-e377-4132-9cf3-f49454cf3c96} MemoryTracker: Peak memory usage (for query): 51.93 MiB. 09:39.015942 [ 3070801 ] {} Aggregator: Destroying aggregate states <-- **problem** 09:39.017057 [ 3070801 ] {} Aggregator: Destroying aggregate states <-- 09:39.017961 [ 3070801 ] {} MemoryTracker: Peak memory usage (for query): 51.93 MiB. 09:39.018029 [ 3070801 ] {} TCPHandler: Processed in 0.711 sec. After this patch: 16:24.544030 [ 3087333 ] {79da208a-b3c0-48d4-9943-c974a3cbb6ea} executeQuery: Read 1000000 rows, 7.63 MiB in 0.599 sec., 1670199 rows/sec., 12.74 MiB/sec. 16:24.544084 [ 3087333 ] {79da208a-b3c0-48d4-9943-c974a3cbb6ea} MemoryTracker: Peak memory usage (for query): 72.11 MiB. 16:24.544398 [ 3087333 ] {79da208a-b3c0-48d4-9943-c974a3cbb6ea} Aggregator: Destroying aggregate states 16:24.545485 [ 3087333 ] {79da208a-b3c0-48d4-9943-c974a3cbb6ea} Aggregator: Destroying aggregate states 16:24.547053 [ 3087333 ] {} MemoryTracker: Peak memory usage (for query): 72.11 MiB. 16:24.547093 [ 3087333 ] {} TCPHandler: Processed in 0.603 sec. --- programs/server/TCPHandler.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/programs/server/TCPHandler.cpp b/programs/server/TCPHandler.cpp index a792af30cf2..3ae87a57725 100644 --- a/programs/server/TCPHandler.cpp +++ b/programs/server/TCPHandler.cpp @@ -278,8 +278,11 @@ void TCPHandler::runImpl() sendLogs(); sendEndOfStream(); - query_scope.reset(); + /// QueryState should be cleared before QueryScope, since otherwise + /// the MemoryTracker will be wrong for possible deallocations. + /// (i.e. deallocations from the Aggregator with two-level aggregation) state.reset(); + query_scope.reset(); } catch (const Exception & e) { @@ -359,8 +362,11 @@ void TCPHandler::runImpl() try { - query_scope.reset(); + /// QueryState should be cleared before QueryScope, since otherwise + /// the MemoryTracker will be wrong for possible deallocations. + /// (i.e. deallocations from the Aggregator with two-level aggregation) state.reset(); + query_scope.reset(); } catch (...) { From 77bdb5f4629bc72014daa1bad67ab3bd015cbd3f Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 18 May 2020 21:13:56 +0300 Subject: [PATCH 069/120] Add a test for GROUP BY memory accounting - GROUP BY - GROUP BY LIMIT - GROUP BY MEMORY_LIMIT_EXCEEDED --- ...1_group_by_limit_memory_tracking.reference | 0 .../01281_group_by_limit_memory_tracking.sql | 84 +++++++++++++++++++ 2 files changed, 84 insertions(+) create mode 100644 tests/queries/0_stateless/01281_group_by_limit_memory_tracking.reference create mode 100644 tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sql diff --git a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.reference b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sql b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sql new file mode 100644 index 00000000000..fc8f81975f0 --- /dev/null +++ b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sql @@ -0,0 +1,84 @@ +DROP TABLE IF EXISTS trace_log_01281; +DROP TABLE IF EXISTS trace_log_01281_mv; +DROP TABLE IF EXISTS trace_log_01281_assert; + +-- better alternative will be to TRUNCATE TABLE system.*_log +-- but this will be a separate issue +CREATE TABLE trace_log_01281 AS system.trace_log Engine=Memory(); +CREATE MATERIALIZED VIEW trace_log_01281_mv TO trace_log_01281 AS SELECT * FROM system.trace_log WHERE trace_type = 'MemorySample'; +CREATE VIEW trace_log_01281_assert AS SELECT + *, + throwIf(cnt < 0, 'no memory profile captured'), + throwIf(queries != 1, 'too much queries'), + throwIf(alloc < 100e6, 'minimal allocation had not been done'), + throwIf((alloc+free)/alloc > 0.05, 'memory accounting leaked more than 5%') + FROM ( + SELECT + count() cnt, + uniq(query_id) queries, + sumIf(size, size > 0) alloc, + sumIf(size, size < 0) free + FROM trace_log_01281 + ); + +-- +-- Basic +-- NOTE: 0 (and even 1e6) is too small, will make SYSTEM FLUSH LOGS too slow +-- (in debug build at least) +-- +SET max_untracked_memory=4e6; + +TRUNCATE TABLE trace_log_01281; +-- single { +SET max_threads=1; +SET memory_profiler_sample_probability=1; +SELECT uniqExactState(number) FROM numbers(toUInt64(2e6)) GROUP BY number % 2e5 FORMAT Null; +SET memory_profiler_sample_probability=0; +SYSTEM FLUSH LOGS; +-- } +SELECT * FROM trace_log_01281_assert FORMAT Null; + +TRUNCATE TABLE trace_log_01281; +-- single limit { +SET max_threads=1; +SET memory_profiler_sample_probability=1; +SELECT uniqExactState(number) FROM numbers(toUInt64(2e6)) GROUP BY number % 2e5 LIMIT 10 FORMAT Null; +SET memory_profiler_sample_probability=0; +SYSTEM FLUSH LOGS; +-- } +SELECT * FROM trace_log_01281_assert FORMAT Null; + +TRUNCATE TABLE trace_log_01281; +-- two-level { +-- need to have multiple threads for two-level aggregation +SET max_threads=2; +SET memory_profiler_sample_probability=1; +SELECT uniqExactState(number) FROM numbers_mt(toUInt64(2e6)) GROUP BY number % 2e5 FORMAT Null; +SET memory_profiler_sample_probability=0; +SYSTEM FLUSH LOGS; +-- } +SELECT * FROM trace_log_01281_assert FORMAT Null; + +TRUNCATE TABLE trace_log_01281; +-- two-level limit { +-- need to have multiple threads for two-level aggregation +SET max_threads=2; +SET memory_profiler_sample_probability=1; +SELECT uniqExactState(number) FROM numbers_mt(toUInt64(2e6)) GROUP BY number % 2e5 LIMIT 10 FORMAT Null; +SET memory_profiler_sample_probability=0; +SYSTEM FLUSH LOGS; +-- } +SELECT * FROM trace_log_01281_assert FORMAT Null; + +TRUNCATE TABLE trace_log_01281; +-- two-level MEMORY_LIMIT_EXCEEDED { +-- need to have multiple threads for two-level aggregation +SET max_threads=2; +SET memory_profiler_sample_probability=1; +SET max_memory_usage='150M'; +SELECT uniqExactState(number) FROM numbers_mt(toUInt64(10e6)) GROUP BY number % 1e6 FORMAT Null; -- { serverError 241; } +SET memory_profiler_sample_probability=0; +SET max_memory_usage=0; +SYSTEM FLUSH LOGS; +-- } +SELECT * FROM trace_log_01281_assert FORMAT Null; From 34508b8f641e8872ff99e449705d50813f3eb87b Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Wed, 20 May 2020 05:19:19 +0300 Subject: [PATCH 070/120] [wip] Add memory stats to performance test --- docker/test/performance-comparison/compare.sh | 353 +++++++++++------- .../test/performance-comparison/entrypoint.sh | 2 +- docker/test/performance-comparison/eqmed.sql | 41 +- docker/test/performance-comparison/perf.py | 23 +- docker/test/performance-comparison/report.py | 49 +-- 5 files changed, 286 insertions(+), 182 deletions(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index af88a3954d7..0c0289d811c 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -133,7 +133,7 @@ function run_tests fi # Delete old report files. - for x in {test-times,skipped-tests,wall-clock-times,report-thresholds,client-times}.tsv + for x in {test-times,wall-clock-times}.tsv do rm -v "$x" ||: touch "$x" @@ -220,66 +220,127 @@ function get_profiles # Build and analyze randomization distribution for all queries. function analyze_queries { -rm -v analyze-commands.txt analyze-errors.log all-queries.tsv unstable-queries.tsv ./*-report.tsv raw-queries.tsv client-times.tsv report-thresholds.tsv ||: +rm -v analyze-commands.txt analyze-errors.log all-queries.tsv unstable-queries.tsv ./*-report.tsv raw-queries.tsv ||: +rm -rfv analyze ||: +mkdir analyze ||: + +# FIXME This loop builds column definitons from TSVWithNamesAndTypes in an +# absolutely atrocious way. This should be done by the file() function itself. +for x in {right,left}-{addresses,{query,query-thread,trace,metric}-log}.tsv +do + paste -d' ' \ + <(sed -n '1{s/\t/\n/g;p;q}' "$x" | sed 's/\(^.*$\)/"\1"/') \ + <(sed -n '2{s/\t/\n/g;p;q}' "$x" ) \ + | tr '\n' ', ' | sed 's/,$//' > "$x.columns" +done # Split the raw test output into files suitable for analysis. IFS=$'\n' for test_file in $(find . -maxdepth 1 -name "*-raw.tsv" -print) do test_name=$(basename "$test_file" "-raw.tsv") - sed -n "s/^query\t//p" < "$test_file" > "$test_name-queries.tsv" - sed -n "s/^client-time/$test_name/p" < "$test_file" >> "client-times.tsv" - sed -n "s/^report-threshold/$test_name/p" < "$test_file" >> "report-thresholds.tsv" - sed -n "s/^skipped/$test_name/p" < "$test_file" >> "skipped-tests.tsv" + sed -n "s/^query\t/$test_name\t/p" < "$test_file" >> "analyze/query-runs.tsv" + sed -n "s/^client-time/$test_name/p" < "$test_file" >> "analyze/client-times.tsv" + sed -n "s/^report-threshold/$test_name/p" < "$test_file" >> "analyze/report-thresholds.tsv" + sed -n "s/^skipped/$test_name/p" < "$test_file" >> "analyze/skipped-tests.tsv" + sed -n "s/^display-name/$test_name/p" < "$test_file" >> "analyze/query-display-names.tsv" done unset IFS +# for each query run, prepare array of metrics from query log +clickhouse-local --query " +create view query_runs as select * from file('analyze/query-runs.tsv', TSV, + 'test text, query_index int, query_id text, version UInt8, time float'); + +create view left_query_log as select * + from file('left-query-log.tsv', TSVWithNamesAndTypes, + '$(cat "left-query-log.tsv.columns")'); + +create view right_query_log as select * + from file('right-query-log.tsv', TSVWithNamesAndTypes, + '$(cat "right-query-log.tsv.columns")'); + +create table query_metrics engine File(TSV, -- do not add header -- will parse with grep + 'analyze/query-run-metrics.tsv') + as select + test, query_index, 0 run, version, + [ + -- server-reported time + query_duration_ms / toFloat64(1000) + , toFloat64(memory_usage) + -- client-reported time + , query_runs.time + ] metrics + from ( + select *, 0 version from left_query_log + union all + select *, 1 version from right_query_log + ) query_logs + right join query_runs + using (query_id, version) + ; +" + # This is a lateral join in bash... please forgive me. -# We don't have arrayPermute(), so I have to make random permutations with +# We don't have arrayPermute(), so I have to make random permutations with # `order by rand`, and it becomes really slow if I do it for more than one # query. We also don't have lateral joins. So I just put all runs of each # query into a separate file, and then compute randomization distribution # for each file. I do this in parallel using GNU parallel. +query_index=1 IFS=$'\n' -for test_file in $(find . -maxdepth 1 -name "*-queries.tsv" -print) +for prefix in $(cut -f1,2 "analyze/query-run-metrics.tsv" | sort | uniq) do - test_name=$(basename "$test_file" "-queries.tsv") - query_index=1 - for query in $(cut -d' ' -f1 "$test_file" | sort | uniq) - do - query_prefix="$test_name.q$query_index" - query_index=$((query_index + 1)) - grep -F "$query " "$test_file" > "$query_prefix.tmp" - printf "%s\0\n" \ - "clickhouse-local \ - --file \"$query_prefix.tmp\" \ - --structure 'query text, run int, version UInt32, time float' \ - --query \"$(cat "$script_dir/eqmed.sql")\" \ - >> \"$test_name-report.tsv\"" \ - 2>> analyze-errors.log \ - >> analyze-commands.txt - done + file="analyze/q$query_index.tmp" + grep -F "$prefix " "analyze/query-run-metrics.tsv" > "$file" & + printf "%s\0\n" \ + "clickhouse-local \ + --file \"$file\" \ + --structure 'test text, query text, run int, version UInt8, metrics Array(float)' \ + --query \"$(cat "$script_dir/eqmed.sql")\" \ + >> \"analyze/query-reports.tsv\"" \ + 2>> analyze/errors.log \ + >> analyze/commands.txt + + query_index=$((query_index + 1)) done wait unset IFS -parallel --null < analyze-commands.txt +parallel --joblog analyze/parallel-log.txt --null < analyze/commands.txt } # Analyze results function report { - rm -r report ||: mkdir report ||: - rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.tsv unstable-query-metrics.tsv changed-perf.tsv unstable-tests.tsv unstable-queries.tsv bad-tests.tsv slow-on-client.tsv all-queries.tsv ||: -cat analyze-errors.log >> report/errors.log ||: +cat analyze/errors.log >> report/errors.log ||: cat profile-errors.log >> report/errors.log ||: clickhouse-local --query " +create view query_display_names as select * from + file('analyze/query-display-names.tsv', TSV, + 'test text, query_index int, query_display_name text') + ; + +create table query_metric_stats engine File(TSVWithNamesAndTypes, + 'report/query-metric-stats.tsv') as + select *, metric_name + from file ('analyze/query-reports.tsv', TSV, 'left Array(float), + right Array(float), diff Array(float), stat_threshold Array(float), + test text, query_index int') reports + left array join ['server_time', 'memory', 'client_time'] as metric_name, + left, right, diff, stat_threshold + left join query_display_names + on reports.test = query_display_names.test + and reports.query_index = query_display_names.query_index + ; + +-- Main statistics for queries -- query time as reported in query log. create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv') as select -- FIXME Comparison mode doesn't make sense for queries that complete @@ -296,53 +357,54 @@ create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv') left, right, diff, stat_threshold, if(report_threshold > 0, report_threshold, 0.10) as report_threshold, - reports.test, - query - from - ( - select *, - replaceAll(_file, '-report.tsv', '') test - from file('*-report.tsv', TSV, 'left float, right float, diff float, stat_threshold float, query text') - ) reports - left join file('report-thresholds.tsv', TSV, 'test text, report_threshold float') thresholds - using test - ; + test, query_index, query_display_name + from query_metric_stats + left join file('analyze/report-thresholds.tsv', TSV, + 'test text, report_threshold float') thresholds + on query_metric_stats.test = thresholds.test + where metric_name = 'server_time' + order by test, query_index, metric_name + ; -- keep the table in old format so that we can analyze new and old data together create table queries_old_format engine File(TSVWithNamesAndTypes, 'queries.rep') - as select short, changed_fail, unstable_fail, left, right, diff, stat_threshold, test, query + as select short, changed_fail, unstable_fail, left, right, diff, + stat_threshold, test, query_display_name query from queries ; -- save all test runs as JSON for the new comparison page -create table all_query_funs_json engine File(JSON, 'report/all-query-runs.json') as - select test, query, versions_runs[1] runs_left, versions_runs[2] runs_right +create table all_query_runs_json engine File(JSON, 'report/all-query-runs.json') as + select test, query_display_name query, + versions_runs[1] runs_left, versions_runs[2] runs_right from ( select - test, query, + test, query_index, groupArrayInsertAt(runs, version) versions_runs from ( select - replaceAll(_file, '-queries.tsv', '') test, - query, version, - groupArray(time) runs - from file('*-queries.tsv', TSV, 'query text, run int, version UInt32, time float') - group by test, query, version + test, query_index, version, + groupArray(metrics[1]) runs + from file('analyze/query-run-metrics.tsv', TSV, + 'test text, query_index int, run int, version UInt8, metrics Array(float)') + group by test, query_index, version ) - group by test, query - ) + group by test, query_index + ) runs + left join query_display_names using (test, query_index) ; create table changed_perf_tsv engine File(TSV, 'report/changed-perf.tsv') as - select left, right, diff, stat_threshold, changed_fail, test, query from queries where changed_show - order by abs(diff) desc; + select left, right, diff, stat_threshold, changed_fail, test, query_display_name + from queries where changed_show order by abs(diff) desc; create table unstable_queries_tsv engine File(TSV, 'report/unstable-queries.tsv') as - select left, right, diff, stat_threshold, unstable_fail, test, query from queries where unstable_show - order by stat_threshold desc; + select left, right, diff, stat_threshold, unstable_fail, test, query_display_name + from queries where unstable_show order by stat_threshold desc; -create table queries_for_flamegraph engine File(TSVWithNamesAndTypes, 'report/queries-for-flamegraph.tsv') as - select query, test from queries where unstable_show or changed_show +create table queries_for_flamegraph engine File(TSVWithNamesAndTypes, + 'report/queries-for-flamegraph.tsv') as + select test, query_index from queries where unstable_show or changed_show ; create table unstable_tests_tsv engine File(TSV, 'report/bad-tests.tsv') as @@ -350,23 +412,23 @@ create table unstable_tests_tsv engine File(TSV, 'report/bad-tests.tsv') as group by test having s > 0 order by s desc; create table query_time engine Memory as select * - from file('client-times.tsv', TSV, 'test text, query text, client float, server float'); + from file('analyze/client-times.tsv', TSV, + 'test text, query_index int, client float, server float'); create table wall_clock engine Memory as select * from file('wall-clock-times.tsv', TSV, 'test text, real float, user float, system float'); create table slow_on_client_tsv engine File(TSV, 'report/slow-on-client.tsv') as - select client, server, floor(client/server, 3) p, query - from query_time where p > 1.02 order by p desc; + select client, server, floor(client/server, 3) p, query_display_name + from query_time left join query_display_names using (test, query_index) + where p > 1.02 order by p desc; create table test_time engine Memory as select test, sum(client) total_client_time, maxIf(client, not short) query_max, minIf(client, not short) query_min, - count(*) queries, - sum(short) short_queries - from query_time full join queries - using test, query + count(*) queries, sum(short) short_queries + from query_time full join queries using (test, query_index) group by test; create table test_times_tsv engine File(TSV, 'report/test-times.tsv') as @@ -378,40 +440,89 @@ create table test_times_tsv engine File(TSV, 'report/test-times.tsv') as floor(real / queries, 3) avg_real_per_query, floor(query_min, 3) from test_time - -- wall clock times are also measured for skipped tests, so don't - -- do full join - left join wall_clock using test + -- wall clock times are also measured for skipped tests, so don't + -- do full join + left join wall_clock using test order by avg_real_per_query desc; +-- report for all queries page, only main metric create table all_tests_tsv engine File(TSV, 'report/all-queries.tsv') as select changed_fail, unstable_fail, left, right, diff, floor(left > right ? left / right : right / left, 3), - stat_threshold, test, query - from queries order by test, query; + stat_threshold, test, query_display_name + from queries order by test, query_display_name; + +-- new report for all queries with all metrics (no page yet) +create table all_query_metrics_tsv engine File(TSV, 'report/all-query-metrics.tsv') as + select metric_name, left, right, diff, + floor(left > right ? left / right : right / left, 3), + stat_threshold, test, query_index, query_display_name + from query_metric_stats + order by test, query_index; " 2> >(tee -a report/errors.log 1>&2) -for x in {right,left}-{addresses,{query,query-thread,trace,metric}-log}.tsv -do - # FIXME This loop builds column definitons from TSVWithNamesAndTypes in an - # absolutely atrocious way. This should be done by the file() function itself. - paste -d' ' \ - <(sed -n '1{s/\t/\n/g;p;q}' "$x" | sed 's/\(^.*$\)/"\1"/') \ - <(sed -n '2{s/\t/\n/g;p;q}' "$x" ) \ - | tr '\n' ', ' | sed 's/,$//' > "$x.columns" -done - +# Prepare source data for metrics and flamegraphs for unstable queries. for version in {right,left} -do -clickhouse-local --query " + do + rm -rf data + clickhouse-local --query " create view queries_for_flamegraph as select * from file('report/queries-for-flamegraph.tsv', TSVWithNamesAndTypes, - 'query text, test text'); + 'test text, query_index int'); + +create view query_runs as + with 0 as left, 1 as right + select * from file('analyze/query-runs.tsv', TSV, + 'test text, query_index int, query_id text, version UInt8, time float') + where version = $version + ; + +create view query_display_names as select * from + file('analyze/query-display-names.tsv', TSV, + 'test text, query_index int, query_display_name text') + ; + +create table unstable_query_runs engine File(TSVWithNamesAndTypes, + 'unstable-query-runs.$version.rep') as + select test, query_index, query_display_name, query_id + from query_runs + join queries_for_flamegraph on + query_runs.test = queries_for_flamegraph.test + and query_runs.query_index = queries_for_flamegraph.query_index + left join query_display_names on + query_runs.test = query_display_names.test + and query_runs.query_index = query_display_names.query_index + ; create view query_log as select * from file('$version-query-log.tsv', TSVWithNamesAndTypes, '$(cat "$version-query-log.tsv.columns")'); +create table unstable_run_metrics engine File(TSVWithNamesAndTypes, + 'unstable-run-metrics.$version.rep') as + select + test, query_index, query_id, + ProfileEvents.Values value, ProfileEvents.Names metric + from query_log array join ProfileEvents + join unstable_query_runs using (query_id) + ; + +create table unstable_run_metrics_2 engine File(TSVWithNamesAndTypes, + 'unstable-run-metrics-2.$version.rep') as + select + test, query_index, query_id, + v, n + from ( + select + test, query_index, query_id, + ['memory_usage', 'read_bytes', 'written_bytes', 'query_duration_ms'] n, + [memory_usage, read_bytes, written_bytes, query_duration_ms] v + from query_log + join unstable_query_runs using (query_id) + ) + array join v, n; + create view trace_log as select * from file('$version-trace-log.tsv', TSVWithNamesAndTypes, '$(cat "$version-trace-log.tsv.columns")'); @@ -423,88 +534,64 @@ create view addresses_src as select * create table addresses_join_$version engine Join(any, left, address) as select addr address, name from addresses_src; -create table unstable_query_runs engine File(TSVWithNamesAndTypes, - 'unstable-query-runs.$version.rep') as - select query, query_id from query_log - where query in (select query from queries_for_flamegraph) - and query_id not like 'prewarm %' - ; - -create table unstable_query_log engine File(Vertical, - 'unstable-query-log.$version.rep') as - select * from query_log - where query_id in (select query_id from unstable_query_runs); - -create table unstable_run_metrics engine File(TSVWithNamesAndTypes, - 'unstable-run-metrics.$version.rep') as - select ProfileEvents.Values value, ProfileEvents.Names metric, query_id, query - from query_log array join ProfileEvents - where query_id in (select query_id from unstable_query_runs) - ; - -create table unstable_run_metrics_2 engine File(TSVWithNamesAndTypes, - 'unstable-run-metrics-2.$version.rep') as - select v, n, query_id, query - from - (select - ['memory_usage', 'read_bytes', 'written_bytes', 'query_duration_ms'] n, - [memory_usage, read_bytes, written_bytes, query_duration_ms] v, - query, - query_id - from query_log - where query_id in (select query_id from unstable_query_runs)) - array join n, v; - create table unstable_run_traces engine File(TSVWithNamesAndTypes, 'unstable-run-traces.$version.rep') as select + test, query_index, query_id, count() value, - joinGet(addresses_join_$version, 'name', arrayJoin(trace)) metric, - unstable_query_runs.query_id, - any(unstable_query_runs.query) query - from unstable_query_runs - join trace_log on trace_log.query_id = unstable_query_runs.query_id - group by unstable_query_runs.query_id, metric + joinGet(addresses_join_$version, 'name', arrayJoin(trace)) metric + from trace_log + join unstable_query_runs using query_id + group by test, query_index, query_id, metric order by count() desc ; create table metric_devation engine File(TSVWithNamesAndTypes, 'metric-deviation.$version.rep') as - select query, floor((q[3] - q[1])/q[2], 3) d, - quantilesExact(0, 0.5, 1)(value) q, metric - from (select * from unstable_run_metrics - union all select * from unstable_run_traces - union all select * from unstable_run_metrics_2) mm - join queries_for_flamegraph using query - group by query, metric - having d > 0.5 - order by query desc, d desc + -- first goes the key used to split the file with grep + select test, query_index, query_display_name, + d, q, metric + from ( + select + test, query_index, + floor((q[3] - q[1])/q[2], 3) d, + quantilesExact(0, 0.5, 1)(value) q, metric + from (select * from unstable_run_metrics + union all select * from unstable_run_traces + union all select * from unstable_run_metrics_2) mm + group by test, query_index, metric + having d > 0.5 + ) metrics + left join unstable_query_runs using (test, query_index) + order by test, query_index, d desc ; create table stacks engine File(TSV, 'stacks.$version.rep') as select - query, + -- first goes the key used to split the file with grep + test, query_index, any(query_display_name), arrayStringConcat( arrayMap(x -> joinGet(addresses_join_$version, 'name', x), arrayReverse(trace) ), ';' ) readable_trace, - count() + count() c from trace_log join unstable_query_runs using query_id - group by query, trace + group by test, query_index, trace ; " 2> >(tee -a report/errors.log 1>&2) # do not run in parallel because they use the same data dir for StorageJoins which leads to weird errors. done wait +# Create per-query flamegraphs and files with metrics IFS=$'\n' for version in {right,left} do - for query in $(cut -d' ' -f1 "stacks.$version.rep" | sort | uniq) + for query in $(cut -d' ' -f1,2,3 "stacks.$version.rep" | sort | uniq) do - query_file=$(echo "$query" | cut -c-120 | sed 's/[/]/_/g') + query_file=$(echo "$query" | cut -c-120 | sed 's/[/ ]/_/g') # Build separate .svg flamegraph for each query. grep -F "$query " "stacks.$version.rep" \ diff --git a/docker/test/performance-comparison/entrypoint.sh b/docker/test/performance-comparison/entrypoint.sh index 500dd872c8d..3b8a24ad5da 100755 --- a/docker/test/performance-comparison/entrypoint.sh +++ b/docker/test/performance-comparison/entrypoint.sh @@ -119,5 +119,5 @@ done dmesg -T > dmesg.log -7z a /output/output.7z ./*.{log,tsv,html,txt,rep,svg} {right,left}/{performance,db/preprocessed_configs,scripts} ./report +7z a /output/output.7z ./*.{log,tsv,html,txt,rep,svg} {right,left}/{performance,db/preprocessed_configs,scripts} report analyze cp compare.log /output diff --git a/docker/test/performance-comparison/eqmed.sql b/docker/test/performance-comparison/eqmed.sql index 4f8482ea74b..f7f8d6ac40d 100644 --- a/docker/test/performance-comparison/eqmed.sql +++ b/docker/test/performance-comparison/eqmed.sql @@ -1,32 +1,37 @@ --- input is table(query text, run UInt32, version int, time float) +-- input is table(test text, query text, run UInt32, version int, metrics Array(float)) select - floor(original_medians_array.time_by_version[1], 4) l, - floor(original_medians_array.time_by_version[2], 4) r, - floor((r - l) / l, 3) diff_percent, - floor(threshold / l, 3) threshold_percent, - query + arrayMap(x -> floor(x, 4), original_medians_array.medians_by_version[1] as l) l_rounded, + arrayMap(x -> floor(x, 4), original_medians_array.medians_by_version[2] as r) r_rounded, + arrayMap(x, y -> floor((y - x) / x, 3), l, r) diff_percent, + arrayMap(x, y -> floor(x / y, 3), threshold, l) threshold_percent, + test, query from ( -- quantiles of randomization distributions - select quantileExact(0.999)(abs(time_by_label[1] - time_by_label[2]) as d) threshold + select quantileExactForEach(0.999)( + arrayMap(x, y -> abs(x - y), metrics_by_label[1], metrics_by_label[2]) as d + ) threshold ---- uncomment to see what the distribution is really like - --, uniqExact(d) u + --, uniqExact(d.1) u --, arraySort(x->x.1, -- arrayZip( - -- (sumMap([d], [1]) as f).1, + -- (sumMap([d.1], [1]) as f).1, -- f.2)) full_histogram from ( - select virtual_run, groupArrayInsertAt(median_time, random_label) time_by_label -- make array 'random label' -> 'median time' + -- make array 'random label' -> '[median metric]' + select virtual_run, groupArrayInsertAt(median_metrics, random_label) metrics_by_label from ( - select medianExact(time) median_time, virtual_run, random_label -- get median times, grouping by random label + -- get [median metric] arrays among virtual runs, grouping by random label + select medianExactForEach(metrics) median_metrics, virtual_run, random_label from ( - select *, toUInt32(rowNumberInAllBlocks() % 2) random_label -- randomly relabel measurements + -- randomly relabel measurements + select *, toUInt32(rowNumberInAllBlocks() % 2) random_label from ( - select time, number virtual_run + select metrics, number virtual_run from -- strip the query away before the join -- it might be several kB long; - (select time, run, version from table) no_query, + (select metrics, run, version from table) no_query, -- duplicate input measurements into many virtual runs numbers(1, 100000) nn -- for each virtual run, randomly reorder measurements @@ -40,19 +45,19 @@ from -- this select aggregates by virtual_run ) rd, ( - select groupArrayInsertAt(median_time, version) time_by_version + select groupArrayInsertAt(median_metrics, version) medians_by_version from ( - select medianExact(time) median_time, version + select medianExactForEach(metrics) median_metrics, version from table group by version ) original_medians ) original_medians_array, ( - select any(query) query from table + select any(test) test, any(query) query from table ) any_query, ( - select throwIf(uniq(query) != 1) from table + select throwIf(uniq((test, query)) != 1) from table ) check_single_query -- this subselect checks that there is only one query in the input table; -- written this way so that it is not optimized away (#10523) ; diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py index a749b158e83..be8155cdddb 100755 --- a/docker/test/performance-comparison/perf.py +++ b/docker/test/performance-comparison/perf.py @@ -29,6 +29,8 @@ parser.add_argument('--runs', type=int, default=int(os.environ.get('CHPC_RUNS', parser.add_argument('--no-long', type=bool, default=True, help='Skip the tests tagged as long.') args = parser.parse_args() +test_name = os.path.splitext(os.path.basename(args.file[0].name))[0] + tree = et.parse(args.file[0]) root = tree.getroot() @@ -141,19 +143,25 @@ test_queries = substitute_parameters(test_query_templates) report_stage_end('substitute2') -for i, q in enumerate(test_queries): +for query_index, q in enumerate(test_queries): + query_prefix = f'{test_name}.query{query_index}' + # We have some crazy long queries (about 100kB), so trim them to a sane - # length. + # length. This means we can't use query text as an identifier and have to + # use the test name + the test-wide query index. query_display_name = q if len(query_display_name) > 1000: query_display_name = f'{query_display_name[:1000]}...({i})' + print(f'display-name\t{query_index}\t{tsv_escape(query_display_name)}') + # Prewarm: run once on both servers. Helps to bring the data into memory, # precompile the queries, etc. try: for conn_index, c in enumerate(connections): - res = c.execute(q, query_id = f'prewarm {0} {query_display_name}') - print(f'prewarm\t{tsv_escape(query_display_name)}\t{conn_index}\t{c.last_query.elapsed}') + prewarm_id = f'{query_prefix}.prewarm0' + res = c.execute(q, query_id = prewarm_id) + print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}') except: # If prewarm fails for some query -- skip it, and try to test the others. # This might happen if the new test introduces some function that the @@ -170,13 +178,14 @@ for i, q in enumerate(test_queries): start_seconds = time.perf_counter() server_seconds = 0 for run in range(0, args.runs): + run_id = f'{query_prefix}.run{run}' for conn_index, c in enumerate(connections): - res = c.execute(q) - print(f'query\t{tsv_escape(query_display_name)}\t{run}\t{conn_index}\t{c.last_query.elapsed}') + res = c.execute(q, query_id = run_id) + print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}') server_seconds += c.last_query.elapsed client_seconds = time.perf_counter() - start_seconds - print(f'client-time\t{tsv_escape(query_display_name)}\t{client_seconds}\t{server_seconds}') + print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}') report_stage_end('benchmark') diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py index a6203ec40e6..83c5e7344f7 100755 --- a/docker/test/performance-comparison/report.py +++ b/docker/test/performance-comparison/report.py @@ -25,6 +25,9 @@ very_unstable_queries = 0 # max seconds to run one query by itself, not counting preparation allowed_single_run_time = 2 +color_bad='#ffb0c0' +color_good='#b0d050' + header_template = """ @@ -188,8 +191,8 @@ if args.report == 'main': print(tableStart('Changes in performance')) columns = [ - 'Old, s.', # 0 - 'New, s.', # 1 + 'Old, s', # 0 + 'New, s', # 1 'Relative difference (new − old) / old', # 2 'p < 0.001 threshold', # 3 # Failed # 4 @@ -205,10 +208,10 @@ if args.report == 'main': if int(row[4]): if float(row[2]) < 0.: faster_queries += 1 - attrs[2] = 'style="background: #00ff00"' + attrs[2] = f'style="background: {color_good}"' else: slower_queries += 1 - attrs[2] = 'style="background: #ff0000"' + attrs[2] = f'style="background: {color_bad}"' else: attrs[2] = '' @@ -221,7 +224,7 @@ if args.report == 'main': slow_on_client_rows = tsvRows('report/slow-on-client.tsv') error_tests += len(slow_on_client_rows) printSimpleTable('Slow on client', - ['Client time, s.', 'Server time, s.', 'Ratio', 'Query'], + ['Client time, s', 'Server time, s', 'Ratio', 'Query'], slow_on_client_rows) def print_unstable_queries(): @@ -252,7 +255,7 @@ if args.report == 'main': for r in unstable_rows: if int(r[4]): very_unstable_queries += 1 - attrs[3] = 'style="background: #ffb0a0"' + attrs[3] = f'style="background: {color_bad}"' else: attrs[3] = '' @@ -266,7 +269,7 @@ if args.report == 'main': error_tests += len(run_error_rows) printSimpleTable('Run errors', ['Test', 'Error'], run_error_rows) - skipped_tests_rows = tsvRows('skipped-tests.tsv') + skipped_tests_rows = tsvRows('analyze/skipped-tests.tsv') printSimpleTable('Skipped tests', ['Test', 'Reason'], skipped_tests_rows) printSimpleTable('Tests with most unstable queries', @@ -281,13 +284,13 @@ if args.report == 'main': columns = [ 'Test', #0 - 'Wall clock time, s.', #1 - 'Total client time, s.', #2 + 'Wall clock time, s', #1 + 'Total client time, s', #2 'Total queries', #3 'Ignored short queries', #4 - 'Longest query
(sum for all runs), s.', #5 - 'Avg wall clock time
(sum for all runs), s.', #6 - 'Shortest query
(sum for all runs), s.', #7 + 'Longest query
(sum for all runs), s', #5 + 'Avg wall clock time
(sum for all runs), s', #6 + 'Shortest query
(sum for all runs), s', #7 ] print(tableStart('Test times')) @@ -300,13 +303,13 @@ if args.report == 'main': if float(r[6]) > 1.5 * total_runs: # FIXME should be 15s max -- investigate parallel_insert slow_average_tests += 1 - attrs[6] = 'style="background: #ffb0a0"' + attrs[6] = f'style="background: {color_bad}"' else: attrs[6] = '' if float(r[5]) > allowed_single_run_time * total_runs: slow_average_tests += 1 - attrs[5] = 'style="background: #ffb0a0"' + attrs[5] = f'style="background: {color_bad}"' else: attrs[5] = '' @@ -320,9 +323,9 @@ if args.report == 'main': print(""" @@ -382,8 +385,8 @@ elif args.report == 'all-queries': columns = [ # Changed #0 # Unstable #1 - 'Old, s.', #2 - 'New, s.', #3 + 'Old, s', #2 + 'New, s', #3 'Relative difference (new − old) / old', #4 'Times speedup / slowdown', #5 'p < 0.001 threshold', #6 @@ -399,21 +402,21 @@ elif args.report == 'all-queries': attrs[1] = None for r in rows: if int(r[1]): - attrs[6] = 'style="background: #ffb0a0"' + attrs[6] = f'style="background: {color_bad}"' else: attrs[6] = '' if int(r[0]): if float(r[4]) > 0.: - attrs[4] = 'style="background: #ffb0a0"' + attrs[4] = f'style="background: {color_bad}"' else: - attrs[4] = 'style="background: #adbdff"' + attrs[4] = f'style="background: {color_good}"' else: attrs[4] = '' if (float(r[2]) + float(r[3])) / 2 > allowed_single_run_time: - attrs[2] = 'style="background: #ffb0a0"' - attrs[3] = 'style="background: #ffb0a0"' + attrs[2] = f'style="background: {color_bad}"' + attrs[3] = f'style="background: {color_bad}"' else: attrs[2] = '' attrs[3] = '' @@ -428,9 +431,9 @@ elif args.report == 'all-queries': print(""" From 4f9abad9dd7cc7bde46f44512ff6033be00b7b75 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 21 May 2020 09:15:52 +0300 Subject: [PATCH 071/120] Added .arcignore --- .arcignore | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 .arcignore diff --git a/.arcignore b/.arcignore new file mode 100644 index 00000000000..cd917b06fb7 --- /dev/null +++ b/.arcignore @@ -0,0 +1,12 @@ +# .arcignore is the same as .gitignore but for Arc VCS. +# Arc VCS is a proprietary VCS in Yandex that is very similar to Git +# from the user perspective but with the following differences: +# 1. Data is stored in distributed object storage. +# 2. Local copy works via FUSE without downloading all the objects. +# For this reason, it is better suited for huge monorepositories that can be found in large companies (e.g. Yandex, Google). +# As ClickHouse developers, we don't use Arc as a VCS (we use Git). +# But the ClickHouse source code is also mirrored into internal monorepository and our collegues are using Arc. +# You can read more about Arc here: https://habr.com/en/company/yandex/blog/482926/ + +# Repository is synchronized without 3rd-party submodules. +contrib From 8c2b7c31a5a79952b323c6080aba43bae0b6ea8a Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 21 May 2020 09:49:15 +0300 Subject: [PATCH 072/120] Fix too long perf test. --- tests/performance/random_string_utf8.xml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/performance/random_string_utf8.xml b/tests/performance/random_string_utf8.xml index 60e3b69e79f..0185f519c31 100644 --- a/tests/performance/random_string_utf8.xml +++ b/tests/performance/random_string_utf8.xml @@ -3,10 +3,10 @@ SELECT count() FROM zeros(1000000) WHERE NOT ignore(randomStringUTF8(10)) - SELECT count() FROM zeros(1000000) WHERE NOT ignore(randomStringUTF8(100)) - SELECT count() FROM zeros(100000) WHERE NOT ignore(randomStringUTF8(1000)) - SELECT count() FROM zeros(10000) WHERE NOT ignore(randomStringUTF8(10000)) - SELECT count() FROM zeros(10000000) WHERE NOT ignore(randomStringUTF8(rand() % 10)) - SELECT count() FROM zeros(10000000) WHERE NOT ignore(randomStringUTF8(rand() % 100)) - SELECT count() FROM zeros(1000000) WHERE NOT ignore(randomStringUTF8(rand() % 1000)) + SELECT count() FROM zeros(100000) WHERE NOT ignore(randomStringUTF8(100)) + SELECT count() FROM zeros(10000) WHERE NOT ignore(randomStringUTF8(1000)) + SELECT count() FROM zeros(1000) WHERE NOT ignore(randomStringUTF8(10000)) + SELECT count() FROM zeros(1000000) WHERE NOT ignore(randomStringUTF8(rand() % 10)) + SELECT count() FROM zeros(100000) WHERE NOT ignore(randomStringUTF8(rand() % 100)) + SELECT count() FROM zeros(10000) WHERE NOT ignore(randomStringUTF8(rand() % 1000)) From 72ca56c43b6f8ca21837860369ba25cf0fd71a4b Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Thu, 21 May 2020 07:28:30 +0000 Subject: [PATCH 073/120] Bump protobuf from 3.12.0 to 3.12.1 in /docs/tools Bumps [protobuf](https://github.com/protocolbuffers/protobuf) from 3.12.0 to 3.12.1. - [Release notes](https://github.com/protocolbuffers/protobuf/releases) - [Changelog](https://github.com/protocolbuffers/protobuf/blob/master/generate_changelog.py) - [Commits](https://github.com/protocolbuffers/protobuf/compare/v3.12.0...v3.12.1) Signed-off-by: dependabot-preview[bot] --- docs/tools/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index e4751272ebb..adb68cfb2d7 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -21,7 +21,7 @@ mkdocs-htmlproofer-plugin==0.0.3 mkdocs-macros-plugin==0.4.9 nltk==3.5 nose==1.3.7 -protobuf==3.12.0 +protobuf==3.12.1 numpy==1.18.4 Pygments==2.5.2 pymdown-extensions==7.1 From 6efc51335a7a6dfc9da258965558f562b5f52860 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 21 May 2020 10:31:43 +0300 Subject: [PATCH 074/120] Fix build. --- src/Processors/Executors/PipelineExecutor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index a5a43a9f546..fb1527e7cfd 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -716,7 +716,7 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, size_t num_threads, st #ifndef NDEBUG context->total_time_ns += total_time_watch.elapsed(); - context->wait_time_ns = total_time_ns - execution_time_ns - processing_time_ns; + context->wait_time_ns = context->total_time_ns - context->execution_time_ns - context->processing_time_ns; #endif } From f6d5961915991bf22722160a5a004b23c400b17f Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 21 May 2020 10:40:01 +0300 Subject: [PATCH 075/120] Fix build. --- src/Processors/Executors/PipelineExecutor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index fb1527e7cfd..4552262040b 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -502,7 +502,7 @@ bool PipelineExecutor::executeStep(std::atomic_bool * yield_flag) if (!is_execution_initialized) initializeExecution(1); - executeStep(yield_flag); + executeStepImpl(0, 1, yield_flag); if (!finished) return true; From adac47a8452e9f07a6cc4ed16059cae735842c68 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 21 May 2020 10:47:38 +0300 Subject: [PATCH 076/120] Fix build. --- .../Executors/PipelineExecutingBlockInputStream.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Processors/Executors/PipelineExecutingBlockInputStream.cpp b/src/Processors/Executors/PipelineExecutingBlockInputStream.cpp index d39938b57c5..3c127d08c28 100644 --- a/src/Processors/Executors/PipelineExecutingBlockInputStream.cpp +++ b/src/Processors/Executors/PipelineExecutingBlockInputStream.cpp @@ -89,11 +89,14 @@ inline static void throwIfExecutionNotStarted(bool is_execution_started, const c void PipelineExecutingBlockInputStream::cancel(bool kill) { - throwIfExecutionNotStarted(executor != nullptr, "cancel"); IBlockInputStream::cancel(kill); - executor->cancel(); -} + if (is_execution_started) + { + executor ? executor->cancel() + : async_executor->cancel(); + } +} void PipelineExecutingBlockInputStream::setProgressCallback(const ProgressCallback & callback) { From 5da41d5cd6217f7737107226295a6ff01aed8985 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 21 May 2020 10:35:37 +0300 Subject: [PATCH 077/120] Configure jemalloc options via cmake --- contrib/CMakeLists.txt | 1 + contrib/jemalloc-cmake/CMakeLists.txt | 15 +++++++++++++-- .../jemalloc-cmake/include_linux_aarch64/README | 1 + ...nternal_defs.h => jemalloc_internal_defs.h.in} | 2 +- .../jemalloc-cmake/include_linux_x86_64/README | 1 + ...nternal_defs.h => jemalloc_internal_defs.h.in} | 2 +- 6 files changed, 18 insertions(+), 4 deletions(-) rename contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/{jemalloc_internal_defs.h => jemalloc_internal_defs.h.in} (99%) rename contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/{jemalloc_internal_defs.h => jemalloc_internal_defs.h.in} (99%) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 1031285eac7..981a60df70a 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -22,6 +22,7 @@ add_subdirectory (consistent-hashing) add_subdirectory (croaring) add_subdirectory (FastMemcpy) add_subdirectory (grpc-cmake) +set(JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu") add_subdirectory (jemalloc-cmake) add_subdirectory (libcpuid-cmake) add_subdirectory (murmurhash) diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 5ae09bec8aa..aa005400bd1 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -9,6 +9,9 @@ if (ENABLE_JEMALLOC) option (USE_INTERNAL_JEMALLOC "Use internal jemalloc library" ${NOT_UNBUNDLED}) if (USE_INTERNAL_JEMALLOC) + option (JEMALLOC_CONFIG_MALLOC_CONF "Change default configuration string" "") + message (STATUS "jemalloc malloc_conf: ${JEMALLOC_CONFIG_MALLOC_CONF}") + set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/jemalloc") set (SRCS @@ -52,11 +55,19 @@ if (ENABLE_JEMALLOC) add_library(jemalloc ${SRCS}) target_include_directories(jemalloc PRIVATE ${LIBRARY_DIR}/include) target_include_directories(jemalloc SYSTEM PUBLIC include) + + set(JEMALLOC_INCLUDE) if (ARCH_AMD64) - target_include_directories(jemalloc SYSTEM PUBLIC include_linux_x86_64) + set(JEMALLOC_INCLUDE_PREFIX include_linux_x86_64) elseif (ARCH_ARM) - target_include_directories(jemalloc SYSTEM PUBLIC include_linux_aarch64) + set(JEMALLOC_INCLUDE_PREFIX include_linux_aarch64) endif () + target_include_directories(jemalloc SYSTEM PUBLIC + ${JEMALLOC_INCLUDE_PREFIX}) + configure_file(${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal/jemalloc_internal_defs.h.in + ${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal/jemalloc_internal_defs.h) + target_include_directories(jemalloc SYSTEM PRIVATE + ${CMAKE_CURRENT_BINARY_DIR}/${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal) target_compile_definitions(jemalloc PRIVATE -DJEMALLOC_NO_PRIVATE_NAMESPACE) diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/README b/contrib/jemalloc-cmake/include_linux_aarch64/README index 2ab582803a2..3cecf7fa36d 100644 --- a/contrib/jemalloc-cmake/include_linux_aarch64/README +++ b/contrib/jemalloc-cmake/include_linux_aarch64/README @@ -5,3 +5,4 @@ Added #define GNU_SOURCE Added JEMALLOC_OVERRIDE___POSIX_MEMALIGN because why not. Removed JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF because it's non standard. Removed JEMALLOC_PURGE_MADVISE_FREE because it's available only from Linux 4.5. +Added JEMALLOC_CONFIG_MALLOC_CONF substitution diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in similarity index 99% rename from contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h rename to contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in index e989020d651..089f1a773aa 100644 --- a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h +++ b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in @@ -369,7 +369,7 @@ /* #undef JEMALLOC_EXPORT */ /* config.malloc_conf options string. */ -#define JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu" +#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@" /* If defined, jemalloc takes the malloc/free/etc. symbol names. */ #define JEMALLOC_IS_MALLOC 1 diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/README b/contrib/jemalloc-cmake/include_linux_x86_64/README index bf7663bda8d..8b93e0d4dcd 100644 --- a/contrib/jemalloc-cmake/include_linux_x86_64/README +++ b/contrib/jemalloc-cmake/include_linux_x86_64/README @@ -5,3 +5,4 @@ Added #define GNU_SOURCE Added JEMALLOC_OVERRIDE___POSIX_MEMALIGN because why not. Removed JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF because it's non standard. Removed JEMALLOC_PURGE_MADVISE_FREE because it's available only from Linux 4.5. +Added JEMALLOC_CONFIG_MALLOC_CONF substitution diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in similarity index 99% rename from contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h rename to contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in index 596dffa8d26..63f7f765023 100644 --- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h +++ b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in @@ -360,7 +360,7 @@ /* #undef JEMALLOC_EXPORT */ /* config.malloc_conf options string. */ -#define JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu" +#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@" /* If defined, jemalloc takes the malloc/free/etc. symbol names. */ #define JEMALLOC_IS_MALLOC 1 From 111fa49df593a29d7b25245e31b1d61ffd093908 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 21 May 2020 10:45:59 +0300 Subject: [PATCH 078/120] Add comment for enabling jemalloc percpu_arena --- contrib/CMakeLists.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 981a60df70a..baf1fb0c254 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -22,6 +22,14 @@ add_subdirectory (consistent-hashing) add_subdirectory (croaring) add_subdirectory (FastMemcpy) add_subdirectory (grpc-cmake) +# ThreadPool select job randomly, and there can be some threads that had been +# performed some memory heavy task before and will be inactive for some time, +# but until it will became active again, the memory will not be freed since by +# default each thread has it's own arena, but there should be not more then +# 4*CPU arenas (see opt.nareans description). +# +# By enabling percpu_arena number of arenas limited to number of CPUs and hence +# this problem should go away. set(JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu") add_subdirectory (jemalloc-cmake) add_subdirectory (libcpuid-cmake) From cfef21be866e7d25a89f9c94ddfe2864e86a94ab Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 21 May 2020 11:54:41 +0300 Subject: [PATCH 079/120] Fix build. --- .../Executors/PipelineExecutingBlockInputStream.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/Processors/Executors/PipelineExecutingBlockInputStream.cpp b/src/Processors/Executors/PipelineExecutingBlockInputStream.cpp index 3c127d08c28..ba42d858d50 100644 --- a/src/Processors/Executors/PipelineExecutingBlockInputStream.cpp +++ b/src/Processors/Executors/PipelineExecutingBlockInputStream.cpp @@ -79,14 +79,6 @@ inline static void throwIfExecutionStarted(bool is_execution_started, const char ErrorCodes::LOGICAL_ERROR); } -inline static void throwIfExecutionNotStarted(bool is_execution_started, const char * method) -{ - if (!is_execution_started) - throw Exception(String("Cannot call ") + method + - " for PipelineExecutingBlockInputStream because execution was not started", - ErrorCodes::LOGICAL_ERROR); -} - void PipelineExecutingBlockInputStream::cancel(bool kill) { IBlockInputStream::cancel(kill); From c9d315654c4b55825cea847cc9a7319e0cebf7e2 Mon Sep 17 00:00:00 2001 From: BohuTANG Date: Thu, 21 May 2020 17:00:44 +0800 Subject: [PATCH 080/120] Fix Storages/MergeTree typo --- src/Storages/MergeTree/IMergeTreeDataPart.h | 4 ++-- src/Storages/MergeTree/IMergeTreeDataPartWriter.h | 2 +- src/Storages/MergeTree/MergeTreeData.h | 2 +- src/Storages/MergeTree/MergeTreeDataMergerMutator.h | 2 +- src/Storages/MergeTree/MergeTreeDataPartCompact.h | 4 ++-- src/Storages/MergeTree/MergeTreeSettings.h | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index f0ae8b40c5b..4b2d6102b2f 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -319,7 +319,7 @@ protected: /// checksums.txt and columns.txt. 0 - if not counted; UInt64 bytes_on_disk{0}; - /// Columns description. Cannot be changed, after part initialiation. + /// Columns description. Cannot be changed, after part initialization. NamesAndTypesList columns; const Type part_type; @@ -352,7 +352,7 @@ private: /// For the older format version calculates rows count from the size of a column with a fixed size. void loadRowsCount(); - /// Loads ttl infos in json format from file ttl.txt. If file doesn`t exists assigns ttl infos with all zeros + /// Loads ttl infos in json format from file ttl.txt. If file doesn't exists assigns ttl infos with all zeros void loadTTLInfos(); void loadPartitionAndMinMaxIndex(); diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h index ffdba570544..149aeaa2f0d 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h @@ -136,7 +136,7 @@ protected: size_t next_mark = 0; size_t next_index_offset = 0; - /// Number of marsk in data from which skip indices have to start + /// Number of marks in data from which skip indices have to start /// aggregation. I.e. it's data mark number, not skip indices mark. size_t skip_index_data_mark = 0; diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 394260096a7..852b3b68caf 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -70,7 +70,7 @@ namespace ErrorCodes /// [Column].mrk - marks, pointing to seek positions allowing to skip n * k rows. /// /// File structure of tables with custom partitioning (format_version >= 1): -/// Part directory - / partiiton-id _ min-id _ max-id _ level / +/// Part directory - / partition-id _ min-id _ max-id _ level / /// Inside the part directory: /// The same files as for month-partitioned tables, plus /// count.txt - contains total number of rows in this part. diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index 431d059ba60..84a3fcf1be2 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -160,7 +160,7 @@ private: NamesAndTypesList storage_columns, const MutationCommands & commands_for_removes); - /// Get skip indcies, that should exists in the resulting data part. + /// Get skip indices, that should exists in the resulting data part. static MergeTreeIndices getIndicesForNewDataPart( const MergeTreeIndices & all_indices, const MutationCommands & commands_for_removes); diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.h b/src/Storages/MergeTree/MergeTreeDataPartCompact.h index 3ce1fd830d5..b4a2b5fa797 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.h @@ -6,13 +6,13 @@ namespace DB { /** In compact format all columns are stored in one file (`data.bin`). - * Data is splitted in granules and columns are serialized sequentially in one granule. + * Data is split in granules and columns are serialized sequentially in one granule. * Granules are written one by one in data file. * Marks are also stored in single file (`data.mrk3`). * In compact format one mark is an array of marks for every column and a number of rows in granule. * Format of other data part files is not changed. * It's considered to store only small parts in compact format (up to 10M). - * NOTE: Compact parts aren't supported for tables with non-adaptive granularty. + * NOTE: Compact parts aren't supported for tables with non-adaptive granularity. * NOTE: In compact part compressed and uncompressed size of single column is unknown. */ class MergeTreeDataPartCompact : public IMergeTreeDataPart diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index dfe3643e60c..f29be3d692f 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -28,7 +28,7 @@ struct MergeTreeSettings : public SettingsCollection #define LIST_OF_MERGE_TREE_SETTINGS(M) \ M(SettingUInt64, index_granularity, 8192, "How many rows correspond to one primary key value.", 0) \ \ - /** Data storing format settigns. */ \ + /** Data storing format settings. */ \ M(SettingUInt64, min_bytes_for_wide_part, 0, "Minimal uncompressed size in bytes to create part in wide format instead of compact", 0) \ M(SettingUInt64, min_rows_for_wide_part, 0, "Minimal number of rows to create part in wide format instead of compact", 0) \ \ From 37e5da737cfd51b10ab11715e23a01e1e5697e73 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 21 May 2020 12:27:55 +0300 Subject: [PATCH 081/120] Fix ya.make. --- src/Processors/ya.make | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Processors/ya.make b/src/Processors/ya.make index 4412aa4748c..a17cab9c0a1 100644 --- a/src/Processors/ya.make +++ b/src/Processors/ya.make @@ -59,6 +59,7 @@ SRCS( Formats/IRowOutputFormat.cpp Formats/LazyOutputFormat.cpp Formats/OutputStreamToOutputFormat.cpp + Formats/PullingOutputFormat.cpp Formats/RowInputFormatWithDiagnosticInfo.cpp IAccumulatingTransform.cpp IInflatingTransform.cpp From 2fa264d83edf4a1453a64a77d73c0be48638b72e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 21 May 2020 12:45:44 +0300 Subject: [PATCH 082/120] Fix style #10591 --- src/Storages/System/StorageSystemParts.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index 745d2938d86..83e3c34bfb8 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -145,7 +145,9 @@ void StorageSystemParts::processNextStorage(MutableColumns & columns_, const Sto /// move_ttl_info { - Array expression_array, min_array, max_array; + Array expression_array; + Array min_array; + Array max_array; expression_array.reserve(part->ttl_infos.moves_ttl.size()); min_array.reserve(part->ttl_infos.moves_ttl.size()); max_array.reserve(part->ttl_infos.moves_ttl.size()); From 7e4cd8f8d4d3c093333adef8b44d49a5a5215e8d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 21 May 2020 12:55:39 +0300 Subject: [PATCH 083/120] Test for TTL info in system.parts --- .../0_stateless/01282_system_parts_ttl_info.reference | 2 ++ .../queries/0_stateless/01282_system_parts_ttl_info.sql | 9 +++++++++ 2 files changed, 11 insertions(+) create mode 100644 tests/queries/0_stateless/01282_system_parts_ttl_info.reference create mode 100644 tests/queries/0_stateless/01282_system_parts_ttl_info.sql diff --git a/tests/queries/0_stateless/01282_system_parts_ttl_info.reference b/tests/queries/0_stateless/01282_system_parts_ttl_info.reference new file mode 100644 index 00000000000..1d9fe9eeb36 --- /dev/null +++ b/tests/queries/0_stateless/01282_system_parts_ttl_info.reference @@ -0,0 +1,2 @@ +2 2000-01-11 01:02:03 2000-02-13 04:05:06 [] [] [] +0 0000-00-00 00:00:00 0000-00-00 00:00:00 [] [] [] diff --git a/tests/queries/0_stateless/01282_system_parts_ttl_info.sql b/tests/queries/0_stateless/01282_system_parts_ttl_info.sql new file mode 100644 index 00000000000..3a1b1cc79ce --- /dev/null +++ b/tests/queries/0_stateless/01282_system_parts_ttl_info.sql @@ -0,0 +1,9 @@ +DROP TABLE IF EXISTS ttl; +CREATE TABLE ttl (d DateTime) ENGINE = MergeTree ORDER BY tuple() TTL d + INTERVAL 10 DAY; +SYSTEM STOP MERGES; +INSERT INTO ttl VALUES ('2000-01-01 01:02:03'), ('2000-02-03 04:05:06'); +SELECT rows, delete_ttl_info_min, delete_ttl_info_max, move_ttl_info.expression, move_ttl_info.min, move_ttl_info.max FROM system.parts WHERE database = currentDatabase() AND table = 'ttl'; +SYSTEM START MERGES; +OPTIMIZE TABLE ttl FINAL; +SELECT rows, delete_ttl_info_min, delete_ttl_info_max, move_ttl_info.expression, move_ttl_info.min, move_ttl_info.max FROM system.parts WHERE database = currentDatabase() AND table = 'ttl' AND active; +DROP TABLE ttl; From c816916fba75311ae5f30b84ef86c09c6fd4d1e8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 21 May 2020 13:46:36 +0300 Subject: [PATCH 084/120] Fix typo --- .../0_stateless/01281_group_by_limit_memory_tracking.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sql b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sql index fc8f81975f0..e2f1671acb9 100644 --- a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sql +++ b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sql @@ -9,7 +9,7 @@ CREATE MATERIALIZED VIEW trace_log_01281_mv TO trace_log_01281 AS SELECT * FROM CREATE VIEW trace_log_01281_assert AS SELECT *, throwIf(cnt < 0, 'no memory profile captured'), - throwIf(queries != 1, 'too much queries'), + throwIf(queries != 1, 'too many queries'), throwIf(alloc < 100e6, 'minimal allocation had not been done'), throwIf((alloc+free)/alloc > 0.05, 'memory accounting leaked more than 5%') FROM ( From 1fca0294fe9769b997b4aa1afd06d2b999a940fd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 21 May 2020 13:48:48 +0300 Subject: [PATCH 085/120] Fix logical error in test --- .../0_stateless/01281_group_by_limit_memory_tracking.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sql b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sql index e2f1671acb9..18d9988551f 100644 --- a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sql +++ b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sql @@ -9,7 +9,7 @@ CREATE MATERIALIZED VIEW trace_log_01281_mv TO trace_log_01281 AS SELECT * FROM CREATE VIEW trace_log_01281_assert AS SELECT *, throwIf(cnt < 0, 'no memory profile captured'), - throwIf(queries != 1, 'too many queries'), + throwIf(queries > 1, 'too many queries'), throwIf(alloc < 100e6, 'minimal allocation had not been done'), throwIf((alloc+free)/alloc > 0.05, 'memory accounting leaked more than 5%') FROM ( From 886266fdad160c04b441cdb74603523bcacb871a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 21 May 2020 13:57:24 +0300 Subject: [PATCH 086/120] Attempt to fix an error --- .../0_stateless/01281_group_by_limit_memory_tracking.sql | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sql b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sql index 18d9988551f..b9e467ba301 100644 --- a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sql +++ b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sql @@ -38,6 +38,7 @@ SYSTEM FLUSH LOGS; -- } SELECT * FROM trace_log_01281_assert FORMAT Null; +SYSTEM FLUSH LOGS; TRUNCATE TABLE trace_log_01281; -- single limit { SET max_threads=1; @@ -48,6 +49,7 @@ SYSTEM FLUSH LOGS; -- } SELECT * FROM trace_log_01281_assert FORMAT Null; +SYSTEM FLUSH LOGS; TRUNCATE TABLE trace_log_01281; -- two-level { -- need to have multiple threads for two-level aggregation @@ -59,6 +61,7 @@ SYSTEM FLUSH LOGS; -- } SELECT * FROM trace_log_01281_assert FORMAT Null; +SYSTEM FLUSH LOGS; TRUNCATE TABLE trace_log_01281; -- two-level limit { -- need to have multiple threads for two-level aggregation @@ -70,6 +73,7 @@ SYSTEM FLUSH LOGS; -- } SELECT * FROM trace_log_01281_assert FORMAT Null; +SYSTEM FLUSH LOGS; TRUNCATE TABLE trace_log_01281; -- two-level MEMORY_LIMIT_EXCEEDED { -- need to have multiple threads for two-level aggregation From 80c7e85f38fc815cd952ffe709b41cd7162c282d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 21 May 2020 13:58:05 +0300 Subject: [PATCH 087/120] Disable potentially unreliable check --- .../0_stateless/01281_group_by_limit_memory_tracking.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sql b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sql index b9e467ba301..fa606898fe8 100644 --- a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sql +++ b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sql @@ -9,7 +9,7 @@ CREATE MATERIALIZED VIEW trace_log_01281_mv TO trace_log_01281 AS SELECT * FROM CREATE VIEW trace_log_01281_assert AS SELECT *, throwIf(cnt < 0, 'no memory profile captured'), - throwIf(queries > 1, 'too many queries'), +-- this check is disabled because it's not reliable: throwIf(queries > 1, 'too many queries'), throwIf(alloc < 100e6, 'minimal allocation had not been done'), throwIf((alloc+free)/alloc > 0.05, 'memory accounting leaked more than 5%') FROM ( From cce805c42cbe742f2fe745913f54d0e3fe9eb1bd Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Thu, 21 May 2020 14:50:12 +0300 Subject: [PATCH 088/120] Revert "Allow to build binaries and tests from deb package build. " --- debian/rules | 11 +---------- docker/packager/deb/build.sh | 11 ----------- docker/packager/packager | 19 ++----------------- programs/CMakeLists.txt | 6 ------ 4 files changed, 3 insertions(+), 44 deletions(-) diff --git a/debian/rules b/debian/rules index 7218e196baa..dabebb516cd 100755 --- a/debian/rules +++ b/debian/rules @@ -24,10 +24,6 @@ DEB_BUILD_OPTIONS+=parallel=$(THREADS_COUNT) ifndef ENABLE_TESTS CMAKE_FLAGS += -DENABLE_TESTS=0 -else -# To export binaries and from deb build we do not strip them. No need to run tests in deb build as we run them in CI - DEB_BUILD_OPTIONS+= nocheck - DEB_BUILD_OPTIONS+= nostrip endif ifndef MAKE_TARGET @@ -92,19 +88,14 @@ override_dh_auto_build: $(MAKE) $(THREADS_COUNT) -C $(BUILDDIR) $(MAKE_TARGET) override_dh_auto_test: -ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS))) cd $(BUILDDIR) && ctest $(THREADS_COUNT) -V -E with_server -endif override_dh_clean: rm -rf debian/copyright debian/clickhouse-client.docs debian/clickhouse-common-static.docs - dh_clean # -X contrib + dh_clean -X contrib override_dh_strip: -#https://www.debian.org/doc/debian-policy/ch-source.html#debian-rules-and-deb-build-options -ifeq (,$(filter nostrip,$(DEB_BUILD_OPTIONS))) dh_strip -pclickhouse-common-static --dbg-package=clickhouse-common-static-dbg -endif override_dh_install: # Making docs diff --git a/docker/packager/deb/build.sh b/docker/packager/deb/build.sh index fbaa0151c6b..1efed3628a0 100755 --- a/docker/packager/deb/build.sh +++ b/docker/packager/deb/build.sh @@ -10,16 +10,5 @@ mv *.changes /output mv *.buildinfo /output mv /*.rpm /output ||: # if exists mv /*.tgz /output ||: # if exists - -if [ -n "$BINARY_OUTPUT" ] && { [ "$BINARY_OUTPUT" = "programs" ] || [ "$BINARY_OUTPUT" = "tests" ] ;} -then - echo Place $BINARY_OUTPUT to output - mkdir /output/binary ||: # if exists - mv /build/obj-*/programs/clickhouse* /output/binary - if [ "$BINARY_OUTPUT" = "tests" ] - then - mv /build/obj-*/src/unit_tests_dbms /output/binary - fi -fi ccache --show-stats ||: ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so ||: diff --git a/docker/packager/packager b/docker/packager/packager index 8a5bdda60e8..025ca3bf398 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -54,7 +54,7 @@ def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache subprocess.check_call(cmd, shell=True) -def parse_env_variables(build_type, compiler, sanitizer, package_type, image_type, cache, distcc_hosts, unbundled, split_binary, clang_tidy, version, author, official, alien_pkgs, with_coverage, with_binaries): +def parse_env_variables(build_type, compiler, sanitizer, package_type, image_type, cache, distcc_hosts, unbundled, split_binary, clang_tidy, version, author, official, alien_pkgs, with_coverage): CLANG_PREFIX = "clang" DARWIN_SUFFIX = "-darwin" ARM_SUFFIX = "-aarch64" @@ -131,14 +131,6 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ if alien_pkgs: result.append("ALIEN_PKGS='" + ' '.join(['--' + pkg for pkg in alien_pkgs]) + "'") - if with_binaries == "programs": - result.append('BINARY_OUTPUT=programs') - elif with_binaries == "tests": - result.append('ENABLE_TESTS=1') - result.append('BINARY_OUTPUT=tests') - cmake_flags.append('-DENABLE_TESTS=1') - cmake_flags.append('-DUSE_GTEST=1') - if unbundled: # TODO: fix build with ENABLE_RDKAFKA cmake_flags.append('-DUNBUNDLED=1 -DENABLE_MYSQL=0 -DENABLE_ODBC=0 -DENABLE_REPLXX=0 -DENABLE_RDKAFKA=0') @@ -187,7 +179,6 @@ if __name__ == "__main__": parser.add_argument("--official", action="store_true") parser.add_argument("--alien-pkgs", nargs='+', default=[]) parser.add_argument("--with-coverage", action="store_true") - parser.add_argument("--with-binaries", choices=("programs", "tests", ""), default="") args = parser.parse_args() if not os.path.isabs(args.output_dir): @@ -204,12 +195,6 @@ if __name__ == "__main__": if args.alien_pkgs and not image_type == "deb": raise Exception("Can add alien packages only in deb build") - if args.with_binaries != "" and not image_type == "deb": - raise Exception("Can add additional binaries only in deb build") - - if args.with_binaries != "" and image_type == "deb": - logging.info("Should place {} to output".format(args.with_binaries)) - dockerfile = os.path.join(ch_root, "docker/packager", image_type, "Dockerfile") if image_type != "freebsd" and not check_image_exists_locally(image_name) or args.force_build_image: if not pull_image(image_name) or args.force_build_image: @@ -217,6 +202,6 @@ if __name__ == "__main__": env_prepared = parse_env_variables( args.build_type, args.compiler, args.sanitizer, args.package_type, image_type, args.cache, args.distcc_hosts, args.unbundled, args.split_binary, args.clang_tidy, - args.version, args.author, args.official, args.alien_pkgs, args.with_coverage, args.with_binaries) + args.version, args.author, args.official, args.alien_pkgs, args.with_coverage) run_docker_image_with_env(image_name, args.output_dir, env_prepared, ch_root, args.ccache_dir) logging.info("Output placed into {}".format(args.output_dir)) diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 7bc31452aa4..7cbe2e7a2a6 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -201,9 +201,3 @@ endif () if (TARGET clickhouse-server AND TARGET copy-headers) add_dependencies(clickhouse-server copy-headers) endif () - -if (ENABLE_TESTS AND USE_GTEST) - set (CLICKHOUSE_ALL_TESTS_TARGETS local_date_time_comparison unit_tests_libcommon unit_tests_dbms hashing_write_buffer hashing_read_buffer in_join_subqueries_preprocessor expression_analyzer) - add_custom_target (clickhouse-tests ALL DEPENDS ${CLICKHOUSE_ALL_TESTS_TARGETS}) - add_dependencies(clickhouse-bundle clickhouse-tests) -endif() From d7b1615ac4edb5325a8f300824c670388176dc4b Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 21 May 2020 14:52:28 +0300 Subject: [PATCH 089/120] Update extended-roadmap.md --- docs/ru/whats-new/extended-roadmap.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/ru/whats-new/extended-roadmap.md b/docs/ru/whats-new/extended-roadmap.md index 9c640504aec..b7756f7fb79 100644 --- a/docs/ru/whats-new/extended-roadmap.md +++ b/docs/ru/whats-new/extended-roadmap.md @@ -174,7 +174,7 @@ Upd. Всё ещё ждём удаление старого кода, котор ### 2.3. Перенос столбцового ser/de из DataType в Column {#perenos-stolbtsovogo-serde-iz-datatype-v-column} -В очереди. +В очереди. Антон Попов. ### 2.4. Перевод LowCardinality из DataType в Column. Добавление ColumnSparse {#perevod-lowcardinality-iz-datatype-v-column-dobavlenie-columnsparse} @@ -977,10 +977,10 @@ Q2. [Виталий Баранов](https://github.com/vitlibar) и Денис Глазачев, Altinity. Требует 12.1. -### 12.6. Информация о пользователях и квотах в системной таблице {#informatsiia-o-polzovateliakh-i-kvotakh-v-sistemnoi-tablitse} +### 12.6. + Информация о пользователях и квотах в системной таблице {#informatsiia-o-polzovateliakh-i-kvotakh-v-sistemnoi-tablitse} [Виталий Баранов](https://github.com/vitlibar). Требует 12.1. -Есть pull request. Q2. +Есть pull request. Q2. Готово. ## 13. Разделение ресурсов, multi-tenancy {#razdelenie-resursov-multi-tenancy} From d6ce1ed7b931115c53f066e2c423b98bdd78eef3 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 21 May 2020 14:54:26 +0300 Subject: [PATCH 090/120] Added comments. --- src/Processors/Executors/PipelineExecutingBlockInputStream.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Processors/Executors/PipelineExecutingBlockInputStream.h b/src/Processors/Executors/PipelineExecutingBlockInputStream.h index 11ccb71f17a..e2e6eda0e24 100644 --- a/src/Processors/Executors/PipelineExecutingBlockInputStream.h +++ b/src/Processors/Executors/PipelineExecutingBlockInputStream.h @@ -34,8 +34,9 @@ protected: private: std::unique_ptr pipeline; - std::unique_ptr executor; - std::unique_ptr async_executor; + /// One of executors is used. + std::unique_ptr executor; /// for singe thread. + std::unique_ptr async_executor; /// for many threads. bool is_execution_started = false; void createExecutor(); From 3c763908c3570bc73dd18574c9046293fede337a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 21 May 2020 15:04:15 +0300 Subject: [PATCH 091/120] Added comment --- src/AggregateFunctions/AggregateFunctionSum.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h index 52431060aea..1d89b028150 100644 --- a/src/AggregateFunctions/AggregateFunctionSum.h +++ b/src/AggregateFunctions/AggregateFunctionSum.h @@ -134,6 +134,8 @@ struct AggregateFunctionSumKahanData template void NO_INLINE addMany(const Value * __restrict ptr, size_t count) { + /// Less than in ordinary sum, because the algorithm is more complicated and too large loop unrolling is questionable. + /// But this is just a guess. constexpr size_t unroll_count = 4; T partial_sums[unroll_count]{}; T partial_compensations[unroll_count]{}; From 581b2358de1219dac615994440944cb7a6becfb5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 21 May 2020 15:07:12 +0300 Subject: [PATCH 092/120] Added comment --- src/AggregateFunctions/AggregateFunctionSum.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h index 1d89b028150..9d3d559ecee 100644 --- a/src/AggregateFunctions/AggregateFunctionSum.h +++ b/src/AggregateFunctions/AggregateFunctionSum.h @@ -195,6 +195,8 @@ struct AggregateFunctionSumKahanData { auto raw_sum = to_sum + from_sum; auto rhs_compensated = raw_sum - to_sum; + /// Kahan summation is tricky because it depends on non-associativity of float arithmetic. + /// Do not simplify this expression if you are not sure. auto compensations = ((from_sum - rhs_compensated) + (to_sum - (raw_sum - rhs_compensated))) + compensation + from_compensation; to_sum = raw_sum + compensations; to_compensation = compensations - (to_sum - raw_sum); From 5e388d5519f7e05e6bdeb1e6ffeb6a22d3f97266 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E7=92=9E?= Date: Thu, 21 May 2020 20:14:39 +0800 Subject: [PATCH 093/120] table name typo fix (#11100) According to the context, the table name of queue2 should be queue3. --- docs/en/engines/table-engines/integrations/kafka.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/integrations/kafka.md b/docs/en/engines/table-engines/integrations/kafka.md index 4b29baed70d..d7824443c1d 100644 --- a/docs/en/engines/table-engines/integrations/kafka.md +++ b/docs/en/engines/table-engines/integrations/kafka.md @@ -72,7 +72,7 @@ Examples: kafka_format = 'JSONEachRow', kafka_num_consumers = 4; - CREATE TABLE queue2 ( + CREATE TABLE queue3 ( timestamp UInt64, level String, message String From 4e72ede169cfd2f425dccc2307f1d6816ee10d8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9E=81=E5=AE=A2=E9=9D=92=E5=B9=B4?= Date: Thu, 21 May 2020 20:40:44 +0800 Subject: [PATCH 094/120] Update install.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit https://repo.yandex.ru/clickhouse/rpm/stable/x86_64/ change url | 修正地址 --- docs/zh/getting-started/install.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/getting-started/install.md b/docs/zh/getting-started/install.md index 9f06317ebd0..32eb7fa0a82 100644 --- a/docs/zh/getting-started/install.md +++ b/docs/zh/getting-started/install.md @@ -46,7 +46,7 @@ sudo yum-config-manager --add-repo https://repo.yandex.ru/clickhouse/rpm/stable/ sudo yum install clickhouse-server clickhouse-client ``` -您也可以从此处手动下载和安装软件包:https://repo.yandex.ru/clickhouse/rpm/stable/x86\_64。 +您也可以从此处手动下载和安装软件包:https://repo.yandex.ru/clickhouse/rpm/stable/x86_64。 ### 来自Docker {#from-docker-image} From 403aae91265cfe50f8a5c989147680c68a9e93a0 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Sat, 16 May 2020 15:11:17 +0300 Subject: [PATCH 095/120] Fixed parseDateTime64BestEffort implementation Fixed argument resolution issues. Added tests and made sure -orNull and -orZero variants alwo work correctly. --- src/Functions/FunctionsConversion.h | 139 +++++++++--------- src/IO/parseDateTimeBestEffort.cpp | 13 +- .../01281_parseDateTime64BestEffort.reference | 15 ++ .../01281_parseDateTime64BestEffort.sql | 33 +++++ 4 files changed, 132 insertions(+), 68 deletions(-) create mode 100644 tests/queries/0_stateless/01281_parseDateTime64BestEffort.reference create mode 100644 tests/queries/0_stateless/01281_parseDateTime64BestEffort.sql diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 64708f45598..9d0b764e84b 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -1110,6 +1110,8 @@ public: std::is_same_v> || std::is_same_v>; + static constexpr bool to_datetime64 = std::is_same_v; + static FunctionPtr create(const Context &) { return std::make_shared(); } static FunctionPtr create() { return std::make_shared(); } @@ -1126,67 +1128,17 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - if ((arguments.size() != 1 && arguments.size() != 2) || (to_decimal && arguments.size() != 2)) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) + - ", should be 1 or 2. Second argument only make sense for DateTime (time zone, optional) and Decimal (scale).", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - if (!isStringOrFixedString(arguments[0].type)) - { - if (this->getName().find("OrZero") != std::string::npos || - this->getName().find("OrNull") != std::string::npos) - throw Exception("Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName() + - ". Conversion functions with postfix 'OrZero' or 'OrNull' should take String argument", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - else - throw Exception("Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - } - - if (arguments.size() == 2) - { - if constexpr (std::is_same_v) - { - if (!isString(arguments[1].type)) - throw Exception("Illegal type " + arguments[1].type->getName() + " of 2nd argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - } - else if constexpr (to_decimal) - { - if (!isInteger(arguments[1].type)) - throw Exception("Illegal type " + arguments[1].type->getName() + " of 2nd argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - if (!arguments[1].column) - throw Exception("Second argument for function " + getName() + " must be constant", ErrorCodes::ILLEGAL_COLUMN); - } - else - { - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(arguments.size()) + ", should be 1. Second argument makes sense only for DateTime and Decimal.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - } - } - DataTypePtr res; - - if constexpr (std::is_same_v) - res = std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 1, 0)); - else if constexpr (to_decimal) + if constexpr (to_datetime64) { - UInt64 scale = extractToDecimalScale(arguments[1]); + validateFunctionArgumentTypes(*this, arguments, + FunctionArgumentDescriptors{{"string", isStringOrFixedString, nullptr, "String or FixedString"}}, + // optional + FunctionArgumentDescriptors{ + {"precision", isUInt8, isColumnConst, "const UInt8"}, + {"timezone", isStringOrFixedString, isColumnConst, "const String or FixedString"}, + }); - if constexpr (std::is_same_v>) - res = createDecimal(9, scale); - else if constexpr (std::is_same_v>) - res = createDecimal(18, scale); - else if constexpr (std::is_same_v>) - res = createDecimal(38, scale); - - if (!res) - throw Exception("Someting wrong with toDecimalNNOrZero() or toDecimalNNOrNull()", ErrorCodes::LOGICAL_ERROR); - } - else if constexpr (std::is_same_v) - { UInt64 scale = DataTypeDateTime64::default_scale; if (arguments.size() > 1) scale = extractToDecimalScale(arguments[1]); @@ -1194,7 +1146,67 @@ public: res = std::make_shared(scale, timezone); } else - res = std::make_shared(); + { + if ((arguments.size() != 1 && arguments.size() != 2) || (to_decimal && arguments.size() != 2)) + throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) + + ", should be 1 or 2. Second argument only make sense for DateTime (time zone, optional) and Decimal (scale).", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + if (!isStringOrFixedString(arguments[0].type)) + { + if (this->getName().find("OrZero") != std::string::npos || + this->getName().find("OrNull") != std::string::npos) + throw Exception("Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName() + + ". Conversion functions with postfix 'OrZero' or 'OrNull' should take String argument", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + else + throw Exception("Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + if (arguments.size() == 2) + { + if constexpr (std::is_same_v) + { + if (!isString(arguments[1].type)) + throw Exception("Illegal type " + arguments[1].type->getName() + " of 2nd argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + else if constexpr (to_decimal) + { + if (!isInteger(arguments[1].type)) + throw Exception("Illegal type " + arguments[1].type->getName() + " of 2nd argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + if (!arguments[1].column) + throw Exception("Second argument for function " + getName() + " must be constant", ErrorCodes::ILLEGAL_COLUMN); + } + else + { + throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + + toString(arguments.size()) + ", should be 1. Second argument makes sense only for DateTime and Decimal.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + } + } + + if constexpr (std::is_same_v) + res = std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 1, 0)); + else if constexpr (to_decimal) + { + UInt64 scale = extractToDecimalScale(arguments[1]); + + if constexpr (std::is_same_v>) + res = createDecimal(9, scale); + else if constexpr (std::is_same_v>) + res = createDecimal(18, scale); + else if constexpr (std::is_same_v>) + res = createDecimal(38, scale); + + if (!res) + throw Exception("Someting wrong with toDecimalNNOrZero() or toDecimalNNOrNull()", ErrorCodes::LOGICAL_ERROR); + } + else + res = std::make_shared(); + } if constexpr (exception_mode == ConvertFromStringExceptionMode::Null) res = std::make_shared(res); @@ -1207,12 +1219,9 @@ public: const IDataType * from_type = block.getByPosition(arguments[0]).type.get(); bool ok = true; - if constexpr (to_decimal || std::is_same_v) + if constexpr (to_decimal || to_datetime64) { - if (arguments.size() != 2) - throw Exception{"Function " + getName() + " expects 2 arguments for Decimal.", ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION}; - - UInt32 scale = extractToDecimalScale(block.getByPosition(arguments[1])); + const UInt32 scale = assert_cast(*removeNullable(block.getByPosition(result).type)).getScale(); if (checkAndGetDataType(from_type)) { @@ -1241,7 +1250,6 @@ public: } else ok = false; - } if (!ok) @@ -1252,7 +1260,6 @@ public: } }; - /** Conversion to fixed string is implemented only for strings. */ class FunctionToFixedString : public IFunction diff --git a/src/IO/parseDateTimeBestEffort.cpp b/src/IO/parseDateTimeBestEffort.cpp index 7e40909226c..4b6183e9c0b 100644 --- a/src/IO/parseDateTimeBestEffort.cpp +++ b/src/IO/parseDateTimeBestEffort.cpp @@ -562,8 +562,17 @@ ReturnType parseDateTime64BestEffortImpl(DateTime64 & res, UInt32 scale, ReadBuf { time_t whole; DateTimeSubsecondPart subsecond = {0, 0}; // needs to be explicitly initialized sine it could be missing from input string - if (!parseDateTimeBestEffortImpl(whole, in, local_time_zone, utc_time_zone, &subsecond)) - return ReturnType(false); + + if constexpr (std::is_same_v) + { + if (!parseDateTimeBestEffortImpl(whole, in, local_time_zone, utc_time_zone, &subsecond)) + return false; + } + else + { + parseDateTimeBestEffortImpl(whole, in, local_time_zone, utc_time_zone, &subsecond); + } + DateTime64::NativeType fractional = subsecond.value; if (scale < subsecond.digits) diff --git a/tests/queries/0_stateless/01281_parseDateTime64BestEffort.reference b/tests/queries/0_stateless/01281_parseDateTime64BestEffort.reference new file mode 100644 index 00000000000..e55e50c15d5 --- /dev/null +++ b/tests/queries/0_stateless/01281_parseDateTime64BestEffort.reference @@ -0,0 +1,15 @@ +orNull +2020-05-14 03:37:03.253 +\N +orZero +2020-05-14 03:37:03.253 +0000-00-00 00:00:00.000 +non-const +2020-05-14 03:37:03.253 +Timezones +2020-05-14 03:37:03.253 +2020-05-14 06:37:03.253 +Formats +2020-05-14 03:37:03.253 +2020-05-14 03:37:03.000 +2020-05-14 03:37:03.000 diff --git a/tests/queries/0_stateless/01281_parseDateTime64BestEffort.sql b/tests/queries/0_stateless/01281_parseDateTime64BestEffort.sql new file mode 100644 index 00000000000..5c0bbe1b4c2 --- /dev/null +++ b/tests/queries/0_stateless/01281_parseDateTime64BestEffort.sql @@ -0,0 +1,33 @@ +-- Error cases +SELECT parseDateTime64BestEffort(); -- {serverError 42} +SELECT parseDateTime64BestEffort(123); -- {serverError 43} +SELECT parseDateTime64BestEffort('foo'); -- {serverError 41} + +SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 'bar'); -- {serverError 43} -- invalid scale parameter +SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 3, 4); -- {serverError 43} -- invalid timezone parameter +SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 3, 'baz'); -- {serverError 1000} -- unknown timezone + +SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', materialize(3), 4); -- {serverError 44} -- non-const precision +SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 3, materialize('UTC')); -- {serverError 44} -- non-const timezone + +SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184012345678910111213141516171819Z', 3, 'UTC'); -- {serverError 6} + +SELECT 'orNull'; +SELECT parseDateTime64BestEffortOrNull('2020-05-14T03:37:03.253184Z', 3, 'UTC'); +SELECT parseDateTime64BestEffortOrNull('foo', 3, 'UTC'); + +SELECT 'orZero'; +SELECT parseDateTime64BestEffortOrZero('2020-05-14T03:37:03.253184Z', 3, 'UTC'); +SELECT parseDateTime64BestEffortOrZero('bar', 3, 'UTC'); + +SELECT 'non-const'; +SELECT parseDateTime64BestEffort(materialize('2020-05-14T03:37:03.253184Z'), 3, 'UTC'); + +SELECT 'Timezones'; +SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 3, 'UTC'); +SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 3, 'Europe/Minsk'); + +SELECT 'Formats'; +SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184', 3, 'UTC'); +SELECT parseDateTime64BestEffort('2020-05-14T03:37:03', 3, 'UTC'); +SELECT parseDateTime64BestEffort('2020-05-14 03:37:03', 3, 'UTC'); \ No newline at end of file From 9dc493e063c20459c2941c3d3ff28996da3205e3 Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Thu, 21 May 2020 18:02:52 +0300 Subject: [PATCH 096/120] Revert "Revert "Allow to build binaries and tests from deb package build. "" --- debian/rules | 11 ++++++++++- docker/packager/deb/build.sh | 11 +++++++++++ docker/packager/packager | 19 +++++++++++++++++-- programs/CMakeLists.txt | 6 ++++++ 4 files changed, 44 insertions(+), 3 deletions(-) diff --git a/debian/rules b/debian/rules index dabebb516cd..7218e196baa 100755 --- a/debian/rules +++ b/debian/rules @@ -24,6 +24,10 @@ DEB_BUILD_OPTIONS+=parallel=$(THREADS_COUNT) ifndef ENABLE_TESTS CMAKE_FLAGS += -DENABLE_TESTS=0 +else +# To export binaries and from deb build we do not strip them. No need to run tests in deb build as we run them in CI + DEB_BUILD_OPTIONS+= nocheck + DEB_BUILD_OPTIONS+= nostrip endif ifndef MAKE_TARGET @@ -88,14 +92,19 @@ override_dh_auto_build: $(MAKE) $(THREADS_COUNT) -C $(BUILDDIR) $(MAKE_TARGET) override_dh_auto_test: +ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS))) cd $(BUILDDIR) && ctest $(THREADS_COUNT) -V -E with_server +endif override_dh_clean: rm -rf debian/copyright debian/clickhouse-client.docs debian/clickhouse-common-static.docs - dh_clean -X contrib + dh_clean # -X contrib override_dh_strip: +#https://www.debian.org/doc/debian-policy/ch-source.html#debian-rules-and-deb-build-options +ifeq (,$(filter nostrip,$(DEB_BUILD_OPTIONS))) dh_strip -pclickhouse-common-static --dbg-package=clickhouse-common-static-dbg +endif override_dh_install: # Making docs diff --git a/docker/packager/deb/build.sh b/docker/packager/deb/build.sh index 1efed3628a0..fbaa0151c6b 100755 --- a/docker/packager/deb/build.sh +++ b/docker/packager/deb/build.sh @@ -10,5 +10,16 @@ mv *.changes /output mv *.buildinfo /output mv /*.rpm /output ||: # if exists mv /*.tgz /output ||: # if exists + +if [ -n "$BINARY_OUTPUT" ] && { [ "$BINARY_OUTPUT" = "programs" ] || [ "$BINARY_OUTPUT" = "tests" ] ;} +then + echo Place $BINARY_OUTPUT to output + mkdir /output/binary ||: # if exists + mv /build/obj-*/programs/clickhouse* /output/binary + if [ "$BINARY_OUTPUT" = "tests" ] + then + mv /build/obj-*/src/unit_tests_dbms /output/binary + fi +fi ccache --show-stats ||: ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so ||: diff --git a/docker/packager/packager b/docker/packager/packager index 025ca3bf398..8a5bdda60e8 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -54,7 +54,7 @@ def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache subprocess.check_call(cmd, shell=True) -def parse_env_variables(build_type, compiler, sanitizer, package_type, image_type, cache, distcc_hosts, unbundled, split_binary, clang_tidy, version, author, official, alien_pkgs, with_coverage): +def parse_env_variables(build_type, compiler, sanitizer, package_type, image_type, cache, distcc_hosts, unbundled, split_binary, clang_tidy, version, author, official, alien_pkgs, with_coverage, with_binaries): CLANG_PREFIX = "clang" DARWIN_SUFFIX = "-darwin" ARM_SUFFIX = "-aarch64" @@ -131,6 +131,14 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ if alien_pkgs: result.append("ALIEN_PKGS='" + ' '.join(['--' + pkg for pkg in alien_pkgs]) + "'") + if with_binaries == "programs": + result.append('BINARY_OUTPUT=programs') + elif with_binaries == "tests": + result.append('ENABLE_TESTS=1') + result.append('BINARY_OUTPUT=tests') + cmake_flags.append('-DENABLE_TESTS=1') + cmake_flags.append('-DUSE_GTEST=1') + if unbundled: # TODO: fix build with ENABLE_RDKAFKA cmake_flags.append('-DUNBUNDLED=1 -DENABLE_MYSQL=0 -DENABLE_ODBC=0 -DENABLE_REPLXX=0 -DENABLE_RDKAFKA=0') @@ -179,6 +187,7 @@ if __name__ == "__main__": parser.add_argument("--official", action="store_true") parser.add_argument("--alien-pkgs", nargs='+', default=[]) parser.add_argument("--with-coverage", action="store_true") + parser.add_argument("--with-binaries", choices=("programs", "tests", ""), default="") args = parser.parse_args() if not os.path.isabs(args.output_dir): @@ -195,6 +204,12 @@ if __name__ == "__main__": if args.alien_pkgs and not image_type == "deb": raise Exception("Can add alien packages only in deb build") + if args.with_binaries != "" and not image_type == "deb": + raise Exception("Can add additional binaries only in deb build") + + if args.with_binaries != "" and image_type == "deb": + logging.info("Should place {} to output".format(args.with_binaries)) + dockerfile = os.path.join(ch_root, "docker/packager", image_type, "Dockerfile") if image_type != "freebsd" and not check_image_exists_locally(image_name) or args.force_build_image: if not pull_image(image_name) or args.force_build_image: @@ -202,6 +217,6 @@ if __name__ == "__main__": env_prepared = parse_env_variables( args.build_type, args.compiler, args.sanitizer, args.package_type, image_type, args.cache, args.distcc_hosts, args.unbundled, args.split_binary, args.clang_tidy, - args.version, args.author, args.official, args.alien_pkgs, args.with_coverage) + args.version, args.author, args.official, args.alien_pkgs, args.with_coverage, args.with_binaries) run_docker_image_with_env(image_name, args.output_dir, env_prepared, ch_root, args.ccache_dir) logging.info("Output placed into {}".format(args.output_dir)) diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index a3d3188653b..65742697333 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -205,3 +205,9 @@ endif () if (TARGET clickhouse-server AND TARGET copy-headers) add_dependencies(clickhouse-server copy-headers) endif () + +if (ENABLE_TESTS AND USE_GTEST) + set (CLICKHOUSE_ALL_TESTS_TARGETS local_date_time_comparison unit_tests_libcommon unit_tests_dbms hashing_write_buffer hashing_read_buffer in_join_subqueries_preprocessor expression_analyzer) + add_custom_target (clickhouse-tests ALL DEPENDS ${CLICKHOUSE_ALL_TESTS_TARGETS}) + add_dependencies(clickhouse-bundle clickhouse-tests) +endif() From 20d41a3f9834d2c2d4f2badfa94b6054bd6c24ae Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 21 May 2020 18:30:08 +0300 Subject: [PATCH 097/120] Update dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) for compatibility with ubuntu eoan packages format --- docker/packager/deb/Dockerfile | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index 78f396f5c75..ae70bc8c594 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -5,6 +5,7 @@ RUN apt-get --allow-unauthenticated update -y && apt-get install --yes wget gnup RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - RUN echo "deb [trusted=yes] http://apt.llvm.org/eoan/ llvm-toolchain-eoan-10 main" >> /etc/apt/sources.list + RUN apt-get --allow-unauthenticated update -y \ && env DEBIAN_FRONTEND=noninteractive \ apt-get --allow-unauthenticated install --yes --no-install-recommends \ @@ -17,6 +18,14 @@ RUN apt-get --allow-unauthenticated update -y \ apt-transport-https \ ca-certificates +# Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able +# to compress files using pigz (https://zlib.net/pigz/) instead of gzip. +# Significantly increase deb packaging speed and compatible with old systems +RUN curl -O https://clickhouse-builds.s3.yandex.net/utils/dpkg-deb +RUN chmod +x dpkg-deb +RUN cp dpkg-deb /usr/bin + + # Libraries from OS are only needed to test the "unbundled" build (that is not used in production). RUN apt-get --allow-unauthenticated update -y \ && env DEBIAN_FRONTEND=noninteractive \ @@ -74,12 +83,6 @@ RUN apt-get --allow-unauthenticated update -y \ libldap2-dev -# Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able -# to compress files using pigz (https://zlib.net/pigz/) instead of gzip. -# Significantly increase deb packaging speed and compatible with old systems -RUN curl -O https://clickhouse-builds.s3.yandex.net/utils/dpkg-deb -RUN chmod +x dpkg-deb -RUN cp dpkg-deb /usr/bin # This symlink required by gcc to find lld compiler RUN ln -s /usr/bin/lld-10 /usr/bin/ld.lld From 5676340f6b4378c953b3462700839eecf6c9dbd2 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 21 May 2020 18:52:11 +0300 Subject: [PATCH 098/120] Less verbose logging in mutation finalization task --- src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp | 2 ++ src/Storages/StorageReplicatedMergeTree.cpp | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 2e71bc902e9..94497b2a850 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1405,6 +1405,8 @@ bool ReplicatedMergeTreeQueue::tryFinalizeMutations(zkutil::ZooKeeperPtr zookeep if (candidates.empty()) return false; + else + LOG_DEBUG(log, "Trying to finalize " << candidates.size() << " mutations"); auto merge_pred = getMergePredicate(zookeeper); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index e6fc32ecbf9..f05c078bd57 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2272,7 +2272,6 @@ void StorageReplicatedMergeTree::mergeSelectingTask() void StorageReplicatedMergeTree::mutationsFinalizingTask() { - LOG_DEBUG(log, "Trying to finalize mutations"); bool needs_reschedule = false; try From adfa77be9dd958a109c7481642189ec67775d00f Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 21 May 2020 20:07:22 +0300 Subject: [PATCH 099/120] Added another one comment. --- src/Processors/Executors/PipelineExecutor.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Processors/Executors/PipelineExecutor.h b/src/Processors/Executors/PipelineExecutor.h index 395f42a3316..4b87a1147c6 100644 --- a/src/Processors/Executors/PipelineExecutor.h +++ b/src/Processors/Executors/PipelineExecutor.h @@ -288,6 +288,7 @@ private: void initializeExecution(size_t num_threads); /// Initialize executor contexts and task_queue. void finalizeExecution(); /// Check all processors are finished. + /// Methods connected to execution. void executeImpl(size_t num_threads); void executeStepImpl(size_t thread_num, size_t num_threads, std::atomic_bool * yield_flag = nullptr); void executeSingleThread(size_t thread_num, size_t num_threads); From ebb29d48fab6741a436ad80ae2d250899540e54d Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 21 May 2020 21:06:08 +0300 Subject: [PATCH 100/120] Update perf.py --- docker/test/performance-comparison/perf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py index be8155cdddb..fd16673bd89 100755 --- a/docker/test/performance-comparison/perf.py +++ b/docker/test/performance-comparison/perf.py @@ -151,7 +151,7 @@ for query_index, q in enumerate(test_queries): # use the test name + the test-wide query index. query_display_name = q if len(query_display_name) > 1000: - query_display_name = f'{query_display_name[:1000]}...({i})' + query_display_name = f'{query_display_name[:1000]}...({query_index})' print(f'display-name\t{query_index}\t{tsv_escape(query_display_name)}') From 23044ac02cff79b629974e6705ca8491214b32f2 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 21 May 2020 21:18:34 +0300 Subject: [PATCH 101/120] Disable -Wsequence-point on gcc10 (otherwise it stuck on GatherUtils compiling) clang (10.0.0 is fine BTW) will warn about this anyway on CI. For the debug build gcc10: - before patch: - concat.cpp -> >5m (stuck on cc1plus, not as) - has.cpp -> >10m (stuck on cc1plus, not as) - after this patch: - concat.cpp -> 1m16s - has.cpp -> 4m (and most of the time eats, as from binutils 2.34.50.20200508) Command for build: - ninja src/Functions/GatherUtils/CMakeFiles/clickhouse_functions_gatherutils.dir/concat.cpp.o - ninja src/Functions/GatherUtils/CMakeFiles/clickhouse_functions_gatherutils.dir/has.cpp.o The test case should be reduced and then it can be reported to the gcc bugzilla. P.S. Looks like a signal not to switch to gcc10 for now --- cmake/warnings.cmake | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cmake/warnings.cmake b/cmake/warnings.cmake index cc97e727101..63cb153a0b4 100644 --- a/cmake/warnings.cmake +++ b/cmake/warnings.cmake @@ -162,4 +162,10 @@ elseif (COMPILER_GCC) add_cxx_compile_options(-Wunused) # Warn if vector operation is not implemented via SIMD capabilities of the architecture add_cxx_compile_options(-Wvector-operation-performance) + + # XXX: gcc10 stuck with this option while compiling GatherUtils code + # (anyway there are builds with clang, that will warn) + if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 10) + add_cxx_compile_options(-Wno-sequence-point) + endif() endif () From e0668f48e18bd3a44d77e0ead8b1a9200438865b Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 21 May 2020 22:34:50 +0300 Subject: [PATCH 102/120] [docs] fix sitemap (#11113) --- docs/tools/build.py | 11 ----------- website/robots.txt | 2 +- website/{sitemap.xml => sitemap-index.xml} | 2 +- website/{sitemap_static.xml => sitemap-static.xml} | 0 4 files changed, 2 insertions(+), 13 deletions(-) rename website/{sitemap.xml => sitemap-index.xml} (92%) rename website/{sitemap_static.xml => sitemap-static.xml} (100%) diff --git a/docs/tools/build.py b/docs/tools/build.py index 406f5689bc4..95e887f046f 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -58,17 +58,6 @@ def build_for_lang(lang, args): 'custom_dir': os.path.join(os.path.dirname(__file__), '..', args.theme_dir), 'language': lang, 'direction': 'rtl' if lang == 'fa' else 'ltr', - # TODO: cleanup - 'feature': { - 'tabs': False - }, - 'palette': { - 'primary': 'white', - 'accent': 'white' - }, - 'font': False, - 'logo': 'images/logo.svg', - 'favicon': 'assets/images/favicon.ico', 'static_templates': ['404.html'], 'extra': { 'now': int(time.mktime(datetime.datetime.now().timetuple())) # TODO better way to avoid caching diff --git a/website/robots.txt b/website/robots.txt index f9970836f18..fa3a68b6d69 100644 --- a/website/robots.txt +++ b/website/robots.txt @@ -12,4 +12,4 @@ Disallow: /docs/v3* Disallow: /cdn-cgi/ Allow: / Host: https://clickhouse.tech -Sitemap: https://clickhouse.tech/sitemap.xml +Sitemap: https://clickhouse.tech/sitemap-index.xml diff --git a/website/sitemap.xml b/website/sitemap-index.xml similarity index 92% rename from website/sitemap.xml rename to website/sitemap-index.xml index a147404ec6f..e53d6c29c54 100644 --- a/website/sitemap.xml +++ b/website/sitemap-index.xml @@ -22,6 +22,6 @@ https://clickhouse.tech/docs/fa/sitemap.xml - https://clickhouse.tech/sitemap_static.xml + https://clickhouse.tech/sitemap-static.xml diff --git a/website/sitemap_static.xml b/website/sitemap-static.xml similarity index 100% rename from website/sitemap_static.xml rename to website/sitemap-static.xml From c81dd49686693d1eeb29bffe2b95e0f256fbf246 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 21 May 2020 22:55:27 +0300 Subject: [PATCH 103/120] Try fix build. --- src/Processors/Executors/PullingPipelineExecutor.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Processors/Executors/PullingPipelineExecutor.h b/src/Processors/Executors/PullingPipelineExecutor.h index 67ef4f7cf71..878d66bd3d4 100644 --- a/src/Processors/Executors/PullingPipelineExecutor.h +++ b/src/Processors/Executors/PullingPipelineExecutor.h @@ -1,5 +1,6 @@ #pragma once #include +#include namespace DB { From 087d8ab020d0ef11783fe0620767a03666252dd8 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Thu, 21 May 2020 23:15:18 +0300 Subject: [PATCH 104/120] A follow-up for trace log overflow in perftests The start was in https://github.com/ClickHouse/ClickHouse/pull/11026 It turned out that the problem was due to the incorrect mutate() implementation that lead to quadratic amount of column copying. This problem has since been fixed. Remove the excessively verbose logging, and also change appendToBlock of LogElement's to accept mutable columns instead of accepting a block and mutating it on each call. It looks wasteful, even though it is almost a noop. --- src/Interpreters/MetricLog.cpp | 6 +----- src/Interpreters/MetricLog.h | 2 +- src/Interpreters/PartLog.cpp | 6 +----- src/Interpreters/PartLog.h | 2 +- src/Interpreters/QueryLog.cpp | 6 +----- src/Interpreters/QueryLog.h | 2 +- src/Interpreters/QueryThreadLog.cpp | 6 +----- src/Interpreters/QueryThreadLog.h | 2 +- src/Interpreters/SystemLog.h | 16 +++++----------- src/Interpreters/TextLog.cpp | 6 +----- src/Interpreters/TextLog.h | 2 +- src/Interpreters/TraceLog.cpp | 6 +----- src/Interpreters/TraceLog.h | 2 +- 13 files changed, 17 insertions(+), 47 deletions(-) diff --git a/src/Interpreters/MetricLog.cpp b/src/Interpreters/MetricLog.cpp index 96fe55c26e6..437d91e3743 100644 --- a/src/Interpreters/MetricLog.cpp +++ b/src/Interpreters/MetricLog.cpp @@ -35,10 +35,8 @@ Block MetricLogElement::createBlock() } -void MetricLogElement::appendToBlock(Block & block) const +void MetricLogElement::appendToBlock(MutableColumns & columns) const { - MutableColumns columns = block.mutateColumns(); - size_t column_idx = 0; columns[column_idx++]->insert(DateLUT::instance().toDayNum(event_time)); @@ -50,8 +48,6 @@ void MetricLogElement::appendToBlock(Block & block) const for (size_t i = 0, end = CurrentMetrics::end(); i < end; ++i) columns[column_idx++]->insert(current_metrics[i]); - - block.setColumns(std::move(columns)); } diff --git a/src/Interpreters/MetricLog.h b/src/Interpreters/MetricLog.h index a90ce923494..7774a45d7e1 100644 --- a/src/Interpreters/MetricLog.h +++ b/src/Interpreters/MetricLog.h @@ -25,7 +25,7 @@ struct MetricLogElement static std::string name() { return "MetricLog"; } static Block createBlock(); - void appendToBlock(Block & block) const; + void appendToBlock(MutableColumns & columns) const; }; diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp index 7e04c5f6abb..91b7bd800b6 100644 --- a/src/Interpreters/PartLog.cpp +++ b/src/Interpreters/PartLog.cpp @@ -58,10 +58,8 @@ Block PartLogElement::createBlock() }; } -void PartLogElement::appendToBlock(Block & block) const +void PartLogElement::appendToBlock(MutableColumns & columns) const { - MutableColumns columns = block.mutateColumns(); - size_t i = 0; columns[i++]->insert(event_type); @@ -92,8 +90,6 @@ void PartLogElement::appendToBlock(Block & block) const columns[i++]->insert(error); columns[i++]->insert(exception); - - block.setColumns(std::move(columns)); } diff --git a/src/Interpreters/PartLog.h b/src/Interpreters/PartLog.h index b84138159a2..9c8bf414099 100644 --- a/src/Interpreters/PartLog.h +++ b/src/Interpreters/PartLog.h @@ -50,7 +50,7 @@ struct PartLogElement static std::string name() { return "PartLog"; } static Block createBlock(); - void appendToBlock(Block & block) const; + void appendToBlock(MutableColumns & columns) const; }; class IMergeTreeDataPart; diff --git a/src/Interpreters/QueryLog.cpp b/src/Interpreters/QueryLog.cpp index 1b104fe1a6c..007a118b4ce 100644 --- a/src/Interpreters/QueryLog.cpp +++ b/src/Interpreters/QueryLog.cpp @@ -85,10 +85,8 @@ Block QueryLogElement::createBlock() } -void QueryLogElement::appendToBlock(Block & block) const +void QueryLogElement::appendToBlock(MutableColumns & columns) const { - MutableColumns columns = block.mutateColumns(); - size_t i = 0; columns[i++]->insert(type); @@ -146,8 +144,6 @@ void QueryLogElement::appendToBlock(Block & block) const columns[i++]->insertDefault(); columns[i++]->insertDefault(); } - - block.setColumns(std::move(columns)); } void QueryLogElement::appendClientInfo(const ClientInfo & client_info, MutableColumns & columns, size_t & i) diff --git a/src/Interpreters/QueryLog.h b/src/Interpreters/QueryLog.h index ec14f5e97fb..ac7c2365a07 100644 --- a/src/Interpreters/QueryLog.h +++ b/src/Interpreters/QueryLog.h @@ -62,7 +62,7 @@ struct QueryLogElement static std::string name() { return "QueryLog"; } static Block createBlock(); - void appendToBlock(Block & block) const; + void appendToBlock(MutableColumns & columns) const; static void appendClientInfo(const ClientInfo & client_info, MutableColumns & columns, size_t & i); }; diff --git a/src/Interpreters/QueryThreadLog.cpp b/src/Interpreters/QueryThreadLog.cpp index f539a720449..97d35659d48 100644 --- a/src/Interpreters/QueryThreadLog.cpp +++ b/src/Interpreters/QueryThreadLog.cpp @@ -67,10 +67,8 @@ Block QueryThreadLogElement::createBlock() }; } -void QueryThreadLogElement::appendToBlock(Block & block) const +void QueryThreadLogElement::appendToBlock(MutableColumns & columns) const { - MutableColumns columns = block.mutateColumns(); - size_t i = 0; columns[i++]->insert(DateLUT::instance().toDayNum(event_time)); @@ -107,8 +105,6 @@ void QueryThreadLogElement::appendToBlock(Block & block) const columns[i++]->insertDefault(); columns[i++]->insertDefault(); } - - block.setColumns(std::move(columns)); } } diff --git a/src/Interpreters/QueryThreadLog.h b/src/Interpreters/QueryThreadLog.h index c50daa7bc88..7b4e8ca5787 100644 --- a/src/Interpreters/QueryThreadLog.h +++ b/src/Interpreters/QueryThreadLog.h @@ -43,7 +43,7 @@ struct QueryThreadLogElement static std::string name() { return "QueryThreadLog"; } static Block createBlock(); - void appendToBlock(Block & block) const; + void appendToBlock(MutableColumns & columns) const; }; diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index b2a4eec7883..f32a05123b1 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -52,7 +52,7 @@ namespace DB static std::string name(); static Block createBlock(); - void appendToBlock(Block & block) const; + void appendToBlock(MutableColumns & columns) const; }; */ @@ -340,9 +340,8 @@ void SystemLog::savingThreadFunction() uint64_t to_flush_end = 0; { - LOG_TRACE(log, "Sleeping"); std::unique_lock lock(mutex); - const bool predicate = flush_event.wait_for(lock, + flush_event.wait_for(lock, std::chrono::milliseconds(flush_interval_milliseconds), [&] () { @@ -359,13 +358,6 @@ void SystemLog::savingThreadFunction() queue.swap(to_flush); exit_this_thread = is_shutdown; - - LOG_TRACE(log, "Woke up" - << (predicate ? " by condition" : " by timeout (" - + toString(flush_interval_milliseconds) + " ms)") - << ", " << to_flush.size() << " elements to flush" - << " up to " << to_flush_end - << (is_shutdown ? ", shutdown requested" : "")); } if (to_flush.empty()) @@ -399,8 +391,10 @@ void SystemLog::flushImpl(const std::vector & to_flush, prepareTable(); Block block = LogElement::createBlock(); + MutableColumns columns = block.mutateColumns(); for (const auto & elem : to_flush) - elem.appendToBlock(block); + elem.appendToBlock(columns); + block.setColumns(std::move(columns)); /// We write to table indirectly, using InterpreterInsertQuery. /// This is needed to support DEFAULT-columns in table. diff --git a/src/Interpreters/TextLog.cpp b/src/Interpreters/TextLog.cpp index 35e027616f6..d166b24ef4f 100644 --- a/src/Interpreters/TextLog.cpp +++ b/src/Interpreters/TextLog.cpp @@ -46,10 +46,8 @@ Block TextLogElement::createBlock() }; } -void TextLogElement::appendToBlock(Block & block) const +void TextLogElement::appendToBlock(MutableColumns & columns) const { - MutableColumns columns = block.mutateColumns(); - size_t i = 0; columns[i++]->insert(DateLUT::instance().toDayNum(event_time)); @@ -68,8 +66,6 @@ void TextLogElement::appendToBlock(Block & block) const columns[i++]->insert(source_file); columns[i++]->insert(source_line); - - block.setColumns(std::move(columns)); } TextLog::TextLog(Context & context_, const String & database_name_, diff --git a/src/Interpreters/TextLog.h b/src/Interpreters/TextLog.h index 73c38429662..d5d1610dfb5 100644 --- a/src/Interpreters/TextLog.h +++ b/src/Interpreters/TextLog.h @@ -25,7 +25,7 @@ struct TextLogElement static std::string name() { return "TextLog"; } static Block createBlock(); - void appendToBlock(Block & block) const; + void appendToBlock(MutableColumns & columns) const; }; class TextLog : public SystemLog diff --git a/src/Interpreters/TraceLog.cpp b/src/Interpreters/TraceLog.cpp index 2367cc56ac0..3df182a61ca 100644 --- a/src/Interpreters/TraceLog.cpp +++ b/src/Interpreters/TraceLog.cpp @@ -35,10 +35,8 @@ Block TraceLogElement::createBlock() }; } -void TraceLogElement::appendToBlock(Block & block) const +void TraceLogElement::appendToBlock(MutableColumns & columns) const { - MutableColumns columns = block.mutateColumns(); - size_t i = 0; columns[i++]->insert(DateLUT::instance().toDayNum(event_time)); @@ -50,6 +48,4 @@ void TraceLogElement::appendToBlock(Block & block) const columns[i++]->insertData(query_id.data(), query_id.size()); columns[i++]->insert(trace); columns[i++]->insert(size); - - block.setColumns(std::move(columns)); } diff --git a/src/Interpreters/TraceLog.h b/src/Interpreters/TraceLog.h index c882ae69420..f97cdaaec40 100644 --- a/src/Interpreters/TraceLog.h +++ b/src/Interpreters/TraceLog.h @@ -24,7 +24,7 @@ struct TraceLogElement static std::string name() { return "TraceLog"; } static Block createBlock(); - void appendToBlock(Block & block) const; + void appendToBlock(MutableColumns & columns) const; }; class TraceLog : public SystemLog From 7b747fabe20c91c24bc6328f8a605cff47cd6fb9 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Fri, 22 May 2020 02:03:41 +0300 Subject: [PATCH 105/120] performance comparison --- docker/test/performance-comparison/compare.sh | 60 ++++++++++++++----- 1 file changed, 45 insertions(+), 15 deletions(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 0c0289d811c..5731b3c88af 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -217,13 +217,8 @@ function get_profiles clickhouse-client --port 9002 --query "select 1" } -# Build and analyze randomization distribution for all queries. -function analyze_queries +function build_log_column_definitions { -rm -v analyze-commands.txt analyze-errors.log all-queries.tsv unstable-queries.tsv ./*-report.tsv raw-queries.tsv ||: -rm -rfv analyze ||: -mkdir analyze ||: - # FIXME This loop builds column definitons from TSVWithNamesAndTypes in an # absolutely atrocious way. This should be done by the file() function itself. for x in {right,left}-{addresses,{query,query-thread,trace,metric}-log}.tsv @@ -233,6 +228,16 @@ do <(sed -n '2{s/\t/\n/g;p;q}' "$x" ) \ | tr '\n' ', ' | sed 's/,$//' > "$x.columns" done +} + +# Build and analyze randomization distribution for all queries. +function analyze_queries +{ +rm -v analyze-commands.txt analyze-errors.log all-queries.tsv unstable-queries.tsv ./*-report.tsv raw-queries.tsv ||: +rm -rfv analyze ||: +mkdir analyze ||: + +build_log_column_definitions # Split the raw test output into files suitable for analysis. IFS=$'\n' @@ -278,6 +283,7 @@ create table query_metrics engine File(TSV, -- do not add header -- will parse w ) query_logs right join query_runs using (query_id, version) + order by test, query_index ; " @@ -291,8 +297,8 @@ query_index=1 IFS=$'\n' for prefix in $(cut -f1,2 "analyze/query-run-metrics.tsv" | sort | uniq) do - file="analyze/q$query_index.tmp" - grep -F "$prefix " "analyze/query-run-metrics.tsv" > "$file" & + file="analyze/$(echo "$prefix" | sed 's/\t/_/g').tmp" + grep "^$prefix " "analyze/query-run-metrics.tsv" > "$file" & printf "%s\0\n" \ "clickhouse-local \ --file \"$file\" \ @@ -301,13 +307,11 @@ do >> \"analyze/query-reports.tsv\"" \ 2>> analyze/errors.log \ >> analyze/commands.txt - - query_index=$((query_index + 1)) done wait unset IFS -parallel --joblog analyze/parallel-log.txt --null < analyze/commands.txt +parallel --joblog analyze/parallel-log.txt --null < analyze/commands.txt 2>> analyze/errors.log } # Analyze results @@ -318,6 +322,8 @@ mkdir report ||: rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.tsv unstable-query-metrics.tsv changed-perf.tsv unstable-tests.tsv unstable-queries.tsv bad-tests.tsv slow-on-client.tsv all-queries.tsv ||: +build_log_column_definitions + cat analyze/errors.log >> report/errors.log ||: cat profile-errors.log >> report/errors.log ||: @@ -570,8 +576,16 @@ create table stacks engine File(TSV, 'stacks.$version.rep') as select -- first goes the key used to split the file with grep test, query_index, any(query_display_name), + -- next go the stacks in flamegraph format: 'func1;...;funcN count' arrayStringConcat( - arrayMap(x -> joinGet(addresses_join_$version, 'name', x), + arrayMap(addr -> replaceRegexpOne( + joinGet(addresses_join_$version, 'name', addr), + -- This function is at the base of the stack, and its name changes + -- surprisingly often between builds, e.g. '__clone' or 'clone' or + -- even '__GI__clone'. This breaks differential flame graphs, so + -- filter it out here. + '^clone\\.S.*', 'clone.S (name filtered by comparison script)' + ), arrayReverse(trace) ), ';' @@ -580,6 +594,7 @@ create table stacks engine File(TSV, 'stacks.$version.rep') as from trace_log join unstable_query_runs using query_id group by test, query_index, trace + order by test, query_index, trace ; " 2> >(tee -a report/errors.log 1>&2) # do not run in parallel because they use the same data dir for StorageJoins which leads to weird errors. done @@ -592,22 +607,37 @@ do for query in $(cut -d' ' -f1,2,3 "stacks.$version.rep" | sort | uniq) do query_file=$(echo "$query" | cut -c-120 | sed 's/[/ ]/_/g') + echo "$query_file" >> report/query-files.txt # Build separate .svg flamegraph for each query. + # -F is somewhat unsafe because it might match not the beginning of the + # string, but this is unlikely and escaping the query for grep is a pain. grep -F "$query " "stacks.$version.rep" \ - | cut -d' ' -f 2- \ + | cut -f 4- \ | sed 's/\t/ /g' \ | tee "$query_file.stacks.$version.rep" \ - | ~/fg/flamegraph.pl > "$query_file.$version.svg" & + | ~/fg/flamegraph.pl --hash > "$query_file.$version.svg" & # Copy metric stats into separate files as well. + # Ditto the above comment about -F. grep -F "$query " "metric-deviation.$version.rep" \ - | cut -f2- > "$query_file.$version.metrics.rep" & + | cut -f4- > "$query_file.$version.metrics.rep" & done done wait unset IFS +# Create differential flamegraphs. +IFS=$'\n' +for query_file in $(cat report/query-files.txt) +do + ~/fg/difffolded.pl "$query_file.stacks.left.rep" "$query_file.stacks.right.rep" \ + | tee "$query_file.stacks.diff.rep" \ + | ~/fg/flamegraph.pl > "$query_file.diff.svg" & +done +unset IFS +wait + # Remember that grep sets error code when nothing is found, hence the bayan # operator. grep -H -m2 -i '\(Exception\|Error\):[^:]' ./*-err.log | sed 's/:/\t/' >> run-errors.tsv ||: From 322d731d28c1dab7b722867a98d6949d2dbef37a Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Fri, 22 May 2020 03:01:35 +0300 Subject: [PATCH 106/120] Remove extra copying from addressToLine The line is already serialized to the same arena, so there is no need to copy it. Also add this function to query profiler test, so that it has some coverage. --- src/Functions/addressToLine.cpp | 4 +--- tests/queries/0_stateless/00974_query_profiler.sql | 8 ++++++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/Functions/addressToLine.cpp b/src/Functions/addressToLine.cpp index 6489fac3371..420938bc4b1 100644 --- a/src/Functions/addressToLine.cpp +++ b/src/Functions/addressToLine.cpp @@ -118,9 +118,7 @@ private: writeChar(':', out); writeIntText(location.line, out); - StringRef out_str = out.finish(); - out_str.data = arena.insert(out_str.data, out_str.size); - return out_str; + return out.finish(); } else { diff --git a/tests/queries/0_stateless/00974_query_profiler.sql b/tests/queries/0_stateless/00974_query_profiler.sql index 9e2723c67d8..c5c073abfa9 100644 --- a/tests/queries/0_stateless/00974_query_profiler.sql +++ b/tests/queries/0_stateless/00974_query_profiler.sql @@ -5,7 +5,9 @@ SET log_queries = 1; SELECT sleep(0.5), ignore('test real time query profiler'); SET log_queries = 0; SYSTEM FLUSH LOGS; -WITH addressToSymbol(arrayJoin(trace)) AS symbol SELECT count() > 0 FROM system.trace_log t WHERE query_id = (SELECT query_id FROM system.query_log WHERE query LIKE '%test real time query profiler%' AND query NOT LIKE '%system%' ORDER BY event_time DESC LIMIT 1) AND symbol LIKE '%FunctionSleep%'; + +WITH addressToLine(arrayJoin(trace) AS addr) || '#' || demangle(addressToSymbol(addr)) AS symbol +SELECT count() > 0 FROM system.trace_log t WHERE query_id = (SELECT query_id FROM system.query_log WHERE query LIKE '%test real time query profiler%' AND query NOT LIKE '%system%' ORDER BY event_time DESC LIMIT 1) AND symbol LIKE '%FunctionSleep%'; SET query_profiler_real_time_period_ns = 0; SET query_profiler_cpu_time_period_ns = 1000000; @@ -13,4 +15,6 @@ SET log_queries = 1; SELECT count(), ignore('test cpu time query profiler') FROM numbers(1000000000); SET log_queries = 0; SYSTEM FLUSH LOGS; -WITH addressToSymbol(arrayJoin(trace)) AS symbol SELECT count() > 0 FROM system.trace_log t WHERE query_id = (SELECT query_id FROM system.query_log WHERE query LIKE '%test cpu time query profiler%' AND query NOT LIKE '%system%' ORDER BY event_time DESC LIMIT 1) AND symbol LIKE '%Source%'; + +WITH addressToLine(arrayJoin(trace) AS addr) || '#' || demangle(addressToSymbol(addr)) AS symbol +SELECT count() > 0 FROM system.trace_log t WHERE query_id = (SELECT query_id FROM system.query_log WHERE query LIKE '%test cpu time query profiler%' AND query NOT LIKE '%system%' ORDER BY event_time DESC LIMIT 1) AND symbol LIKE '%Source%'; From 432648ca1ee3a5055bc6cd91bc35dbd88457b952 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Fri, 22 May 2020 03:51:42 +0300 Subject: [PATCH 107/120] performance comparison --- docker/test/performance-comparison/entrypoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/performance-comparison/entrypoint.sh b/docker/test/performance-comparison/entrypoint.sh index 3b8a24ad5da..b425acd1a72 100755 --- a/docker/test/performance-comparison/entrypoint.sh +++ b/docker/test/performance-comparison/entrypoint.sh @@ -82,7 +82,7 @@ if [ "$REF_PR" == "" ]; then echo Reference PR is not specified ; exit 1 ; fi ) | tee right-commit.txt # Prepare the list of changed tests for use by compare.sh -git -C ch diff --name-only "$SHA_TO_TEST" "$(git -C ch merge-base "$SHA_TO_TEST"~ master)" -- tests/performance | tee changed-tests.txt +git -C ch diff --name-only "$REF_SHA" "$SHA_TO_TEST" -- tests/performance | tee changed-tests.txt # Set python output encoding so that we can print queries with Russian letters. export PYTHONIOENCODING=utf-8 From 3073fc9eb00f72c87e39109df43e644b586cf88f Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Fri, 22 May 2020 05:51:26 +0300 Subject: [PATCH 108/120] performance comparison --- docker/test/performance-comparison/report.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py index 83c5e7344f7..b171603700d 100755 --- a/docker/test/performance-comparison/report.py +++ b/docker/test/performance-comparison/report.py @@ -182,6 +182,16 @@ if args.report == 'main': print_tested_commits() + run_error_rows = tsvRows('run-errors.tsv') + error_tests += len(run_error_rows) + printSimpleTable('Run errors', ['Test', 'Error'], run_error_rows) + + slow_on_client_rows = tsvRows('report/slow-on-client.tsv') + error_tests += len(slow_on_client_rows) + printSimpleTable('Slow on client', + ['Client time, s', 'Server time, s', 'Ratio', 'Query'], + slow_on_client_rows) + def print_changes(): rows = tsvRows('report/changed-perf.tsv') if not rows: @@ -221,12 +231,6 @@ if args.report == 'main': print_changes() - slow_on_client_rows = tsvRows('report/slow-on-client.tsv') - error_tests += len(slow_on_client_rows) - printSimpleTable('Slow on client', - ['Client time, s', 'Server time, s', 'Ratio', 'Query'], - slow_on_client_rows) - def print_unstable_queries(): global unstable_queries global very_unstable_queries @@ -265,10 +269,6 @@ if args.report == 'main': print_unstable_queries() - run_error_rows = tsvRows('run-errors.tsv') - error_tests += len(run_error_rows) - printSimpleTable('Run errors', ['Test', 'Error'], run_error_rows) - skipped_tests_rows = tsvRows('analyze/skipped-tests.tsv') printSimpleTable('Skipped tests', ['Test', 'Reason'], skipped_tests_rows) From 6a670ba7ecb73f9b5dc2ad019b58644f5b27bef2 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 22 May 2020 08:51:46 +0300 Subject: [PATCH 109/120] Update index.md --- docs/en/faq/index.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/en/faq/index.md b/docs/en/faq/index.md index 11395301904..bdbd59f7880 100644 --- a/docs/en/faq/index.md +++ b/docs/en/faq/index.md @@ -1,6 +1,8 @@ --- toc_folder_title: F.A.Q. toc_priority: 76 +toc_title: hidden +toc_hidden: true --- From caa788b4f8035f25835299ddc1fb02801c07ea5c Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Fri, 22 May 2020 07:28:01 +0000 Subject: [PATCH 110/120] Bump six from 1.14.0 to 1.15.0 in /docs/tools Bumps [six](https://github.com/benjaminp/six) from 1.14.0 to 1.15.0. - [Release notes](https://github.com/benjaminp/six/releases) - [Changelog](https://github.com/benjaminp/six/blob/master/CHANGES) - [Commits](https://github.com/benjaminp/six/compare/1.14.0...1.15.0) Signed-off-by: dependabot-preview[bot] --- docs/tools/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index adb68cfb2d7..4d4e9f98780 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -30,7 +30,7 @@ PyYAML==5.3.1 repackage==0.7.3 requests==2.23.0 singledispatch==3.4.0.3 -six==1.14.0 +six==1.15.0 soupsieve==2.0.1 termcolor==1.1.0 tornado==5.1.1 From cbcdb6219eb14a0f49475edc042def6ec3e988d1 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 22 May 2020 11:17:57 +0300 Subject: [PATCH 111/120] Slighty more verbose comment --- src/Storages/AlterCommands.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h index a42e7cce646..82090cb1aaf 100644 --- a/src/Storages/AlterCommands.h +++ b/src/Storages/AlterCommands.h @@ -163,8 +163,9 @@ public: bool isCommentAlter() const; /// Return mutation commands which some storages may execute as part of - /// alter. If alter can be performed is pure metadata update, than result is - /// empty. + /// alter. If alter can be performed as pure metadata update, than result is + /// empty. If some TTL changes happened than, depending on materialize_ttl + /// additional mutation command (MATERIALIZE_TTL) will be returned. MutationCommands getMutationCommands(StorageInMemoryMetadata metadata, bool materialize_ttl) const; }; From 86da115018f290e66c7c097149bb079a2c94af9c Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Fri, 22 May 2020 11:40:02 +0300 Subject: [PATCH 112/120] performance comparison --- docker/test/performance-comparison/download.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docker/test/performance-comparison/download.sh b/docker/test/performance-comparison/download.sh index dd030e9f39d..8e09fac77f8 100755 --- a/docker/test/performance-comparison/download.sh +++ b/docker/test/performance-comparison/download.sh @@ -46,7 +46,13 @@ function download done mkdir ~/fg ||: - cd ~/fg && wget -nv -nd -c "https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl" && chmod +x ~/fg/flamegraph.pl & + ( + cd ~/fg + wget -nv -nd -c "https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl" + wget -nv -nd -c "https://raw.githubusercontent.com/brendangregg/FlameGraph/master/difffolded.pl" + chmod +x ~/fg/difffolded.pl + chmod +x ~/fg/flamegraph.pl + ) & wait } From f04c5c6100231255f871653b1294b2a2d13927bd Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Fri, 22 May 2020 11:54:35 +0300 Subject: [PATCH 113/120] performance comparison --- docker/test/performance-comparison/entrypoint.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/docker/test/performance-comparison/entrypoint.sh b/docker/test/performance-comparison/entrypoint.sh index b425acd1a72..ef62c8981e9 100755 --- a/docker/test/performance-comparison/entrypoint.sh +++ b/docker/test/performance-comparison/entrypoint.sh @@ -81,8 +81,13 @@ if [ "$REF_PR" == "" ]; then echo Reference PR is not specified ; exit 1 ; fi fi ) | tee right-commit.txt -# Prepare the list of changed tests for use by compare.sh -git -C ch diff --name-only "$REF_SHA" "$SHA_TO_TEST" -- tests/performance | tee changed-tests.txt +if [ "$PR_TO_TEST" != "0" ] +then + # Prepare the list of tests changed in the PR for use by compare.sh. Compare to + # merge base, because master might be far in the future and have unrelated test + # changes. + git -C ch diff --name-only "$SHA_TO_TEST" "$(git -C ch merge-base "$SHA_TO_TEST" master)" -- tests/performance | tee changed-tests.txt +fi # Set python output encoding so that we can print queries with Russian letters. export PYTHONIOENCODING=utf-8 From a2fabd7ff03f32cb5580c2883bbeda08723e62b4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 22 May 2020 12:36:26 +0300 Subject: [PATCH 114/120] Remove unreliable test #11119 --- ...1_group_by_limit_memory_tracking.reference | 0 .../01281_group_by_limit_memory_tracking.sql | 88 ------------------- 2 files changed, 88 deletions(-) delete mode 100644 tests/queries/0_stateless/01281_group_by_limit_memory_tracking.reference delete mode 100644 tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sql diff --git a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.reference b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sql b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sql deleted file mode 100644 index fa606898fe8..00000000000 --- a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sql +++ /dev/null @@ -1,88 +0,0 @@ -DROP TABLE IF EXISTS trace_log_01281; -DROP TABLE IF EXISTS trace_log_01281_mv; -DROP TABLE IF EXISTS trace_log_01281_assert; - --- better alternative will be to TRUNCATE TABLE system.*_log --- but this will be a separate issue -CREATE TABLE trace_log_01281 AS system.trace_log Engine=Memory(); -CREATE MATERIALIZED VIEW trace_log_01281_mv TO trace_log_01281 AS SELECT * FROM system.trace_log WHERE trace_type = 'MemorySample'; -CREATE VIEW trace_log_01281_assert AS SELECT - *, - throwIf(cnt < 0, 'no memory profile captured'), --- this check is disabled because it's not reliable: throwIf(queries > 1, 'too many queries'), - throwIf(alloc < 100e6, 'minimal allocation had not been done'), - throwIf((alloc+free)/alloc > 0.05, 'memory accounting leaked more than 5%') - FROM ( - SELECT - count() cnt, - uniq(query_id) queries, - sumIf(size, size > 0) alloc, - sumIf(size, size < 0) free - FROM trace_log_01281 - ); - --- --- Basic --- NOTE: 0 (and even 1e6) is too small, will make SYSTEM FLUSH LOGS too slow --- (in debug build at least) --- -SET max_untracked_memory=4e6; - -TRUNCATE TABLE trace_log_01281; --- single { -SET max_threads=1; -SET memory_profiler_sample_probability=1; -SELECT uniqExactState(number) FROM numbers(toUInt64(2e6)) GROUP BY number % 2e5 FORMAT Null; -SET memory_profiler_sample_probability=0; -SYSTEM FLUSH LOGS; --- } -SELECT * FROM trace_log_01281_assert FORMAT Null; - -SYSTEM FLUSH LOGS; -TRUNCATE TABLE trace_log_01281; --- single limit { -SET max_threads=1; -SET memory_profiler_sample_probability=1; -SELECT uniqExactState(number) FROM numbers(toUInt64(2e6)) GROUP BY number % 2e5 LIMIT 10 FORMAT Null; -SET memory_profiler_sample_probability=0; -SYSTEM FLUSH LOGS; --- } -SELECT * FROM trace_log_01281_assert FORMAT Null; - -SYSTEM FLUSH LOGS; -TRUNCATE TABLE trace_log_01281; --- two-level { --- need to have multiple threads for two-level aggregation -SET max_threads=2; -SET memory_profiler_sample_probability=1; -SELECT uniqExactState(number) FROM numbers_mt(toUInt64(2e6)) GROUP BY number % 2e5 FORMAT Null; -SET memory_profiler_sample_probability=0; -SYSTEM FLUSH LOGS; --- } -SELECT * FROM trace_log_01281_assert FORMAT Null; - -SYSTEM FLUSH LOGS; -TRUNCATE TABLE trace_log_01281; --- two-level limit { --- need to have multiple threads for two-level aggregation -SET max_threads=2; -SET memory_profiler_sample_probability=1; -SELECT uniqExactState(number) FROM numbers_mt(toUInt64(2e6)) GROUP BY number % 2e5 LIMIT 10 FORMAT Null; -SET memory_profiler_sample_probability=0; -SYSTEM FLUSH LOGS; --- } -SELECT * FROM trace_log_01281_assert FORMAT Null; - -SYSTEM FLUSH LOGS; -TRUNCATE TABLE trace_log_01281; --- two-level MEMORY_LIMIT_EXCEEDED { --- need to have multiple threads for two-level aggregation -SET max_threads=2; -SET memory_profiler_sample_probability=1; -SET max_memory_usage='150M'; -SELECT uniqExactState(number) FROM numbers_mt(toUInt64(10e6)) GROUP BY number % 1e6 FORMAT Null; -- { serverError 241; } -SET memory_profiler_sample_probability=0; -SET max_memory_usage=0; -SYSTEM FLUSH LOGS; --- } -SELECT * FROM trace_log_01281_assert FORMAT Null; From 138e4562f30c650d5ce8cc4181705eb66e3a8a2a Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 22 May 2020 13:38:56 +0300 Subject: [PATCH 115/120] Skip flacky test --- tests/integration/test_replicated_merge_tree_s3/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_replicated_merge_tree_s3/test.py b/tests/integration/test_replicated_merge_tree_s3/test.py index d6b6015a388..e38186ead3e 100644 --- a/tests/integration/test_replicated_merge_tree_s3/test.py +++ b/tests/integration/test_replicated_merge_tree_s3/test.py @@ -85,7 +85,7 @@ def drop_table(cluster): for obj in list(minio.list_objects(cluster.minio_bucket, 'data/')): minio.remove_object(cluster.minio_bucket, obj.object_name) - +@pytest.mark.skip(reason="Cannot correctly start server") def test_insert_select_replicated(cluster): create_table(cluster) From 2547874b3badc3bf8254689edf9f3d5fbfdfc5a6 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 22 May 2020 14:00:41 +0300 Subject: [PATCH 116/120] Update index.md --- docs/en/engines/index.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/en/engines/index.md b/docs/en/engines/index.md index fe7e46fb534..0ab3b5fe172 100644 --- a/docs/en/engines/index.md +++ b/docs/en/engines/index.md @@ -1,6 +1,8 @@ --- toc_folder_title: Engines toc_priority: 25 +toc_title: hidden +toc_hidden: true --- {## [Original article](https://clickhouse.tech/docs/en/engines/) ##} From b6c04fb6ec881b1baf2c8cb649929c3aea54b5e6 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 22 May 2020 14:09:33 +0300 Subject: [PATCH 117/120] Update footer.html --- website/templates/docs/footer.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/templates/docs/footer.html b/website/templates/docs/footer.html index c9432a0b4dc..a99a723c036 100644 --- a/website/templates/docs/footer.html +++ b/website/templates/docs/footer.html @@ -1,7 +1,7 @@ {% if not single_page %}

{{ _('Rating') }}: RATING_VALUE - RATING_COUNT {{ _('votes') }}

-

{% if is_amp %}{{ _('Article Rating') }}{% else %}{{ _('Was this content helpful?') }}{% endif %}

+ {% if is_amp %}{{ _('Article Rating') }}{% else %}{{ _('Was this content helpful?') }}{% endif %}
Date: Fri, 22 May 2020 14:16:24 +0300 Subject: [PATCH 118/120] [docs] tune titles --- website/main.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website/main.html b/website/main.html index 07647c24a2d..73693204e5d 100644 --- a/website/main.html +++ b/website/main.html @@ -18,9 +18,9 @@ {% if page and page.meta and page.meta.title %} {% set title = page.meta.title %} {% elif page and page.title and not page.is_homepage and page.title != 'hidden' %} - {% set title = page.title + ' - ' + config.site_name %} + {% set title = page.title + ' | ' + config.site_name %} {% elif page and page.title and not page.is_homepage and page.title == 'hidden' and page.ancestors %} - {% set title = (page.ancestors | first).title + ' - ' + config.site_name %} + {% set title = (page.ancestors | first).title + ' | ' + config.site_name %} {% else %} {% set title = config.site_name %} {% endif %} From 1a5fa0e6fa50286c265f9ce0b6ef1e3558a7c129 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 22 May 2020 14:39:21 +0300 Subject: [PATCH 119/120] Remove totally wrong performance test #10623 --- tests/performance/point_in_polygon.xml | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 tests/performance/point_in_polygon.xml diff --git a/tests/performance/point_in_polygon.xml b/tests/performance/point_in_polygon.xml deleted file mode 100644 index d854fb6952b..00000000000 --- a/tests/performance/point_in_polygon.xml +++ /dev/null @@ -1,6 +0,0 @@ - - CREATE TABLE point_in_polygon(`polygon` Array(Array(Float64, Float64))) ENGINE = Log() - insert into point_in_polygon SELECT arrayJoin(arrayMap(y -> [arrayMap(x -> (cos(x / 90. * pi()) * y, sin(x / 90. * pi()) * y), range(180))], arraySlice(range(35000), 2, 35000))) - SELECT pointInPolygon((100, 100), `polygon`) from point_in_polygon - DROP TABLE IF EXISTS point_in_polygon - From e60fee3f3d537356bfedb5e7a8b7f680f2d5c6cf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 22 May 2020 14:46:54 +0300 Subject: [PATCH 120/120] Add performance test for non-constant polygons --- tests/performance/point_in_polygon.xml | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 tests/performance/point_in_polygon.xml diff --git a/tests/performance/point_in_polygon.xml b/tests/performance/point_in_polygon.xml new file mode 100644 index 00000000000..06e59f13a13 --- /dev/null +++ b/tests/performance/point_in_polygon.xml @@ -0,0 +1,13 @@ + + CREATE TABLE polygons (polygon Array(Array(Tuple(Float64, Float64)))) ENGINE = Memory + + INSERT INTO polygons + WITH number + 1 AS radius + SELECT [arrayMap(x -> (cos(x / 90. * pi()) * radius, sin(x / 90. * pi()) * radius), range(180))] + FROM numbers(1000) + + + SELECT pointInPolygon((100, 100), polygon) FROM polygons + + DROP TABLE IF EXISTS polygons +