From 2f6e66c8e3809d80bcab0afe7bf5d9d03761640d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 9 Aug 2023 03:19:24 +0200 Subject: [PATCH 01/16] Remove unnecessary templates --- src/Functions/array/arrayAUC.cpp | 122 +++++++++++++++++++++++-------- 1 file changed, 90 insertions(+), 32 deletions(-) diff --git a/src/Functions/array/arrayAUC.cpp b/src/Functions/array/arrayAUC.cpp index 4d2b8175f5b..a0aca7a563b 100644 --- a/src/Functions/array/arrayAUC.cpp +++ b/src/Functions/array/arrayAUC.cpp @@ -1,7 +1,9 @@ -#include #include +#include +#include +#include +#include #include -#include "arrayScalarProduct.h" namespace DB @@ -10,6 +12,8 @@ namespace DB namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; + extern const int BAD_ARGUMENTS; } @@ -70,44 +74,32 @@ namespace ErrorCodes * The "curve" will be present by a line that moves one step either towards right or top on each threshold change. */ - -struct NameArrayAUC -{ - static constexpr auto name = "arrayAUC"; -}; - - -class ArrayAUCImpl +class FunctionArrayAUC : public IFunction { public: - using ResultType = Float64; + static constexpr auto name = "arrayAUC"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } - static DataTypePtr getReturnType(const DataTypePtr & /* score_type */, const DataTypePtr & label_type) - { - if (!(isNumber(label_type) || isEnum(label_type))) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{} label must have numeric type.", std::string(NameArrayAUC::name)); - - return std::make_shared>(); - } - - template - static ResultType apply( - const T * scores, - const U * labels, - size_t size) +private: + static Float64 apply( + const IColumn & data1, + const IColumn & data2, + ColumnArray::Offset current_offset, + ColumnArray::Offset next_offset) { struct ScoreLabel { - T score; + Float64 score; bool label; }; + size_t size = next_offset - current_offset; PODArrayWithStackMemory sorted_labels(size); for (size_t i = 0; i < size; ++i) { - bool label = labels[i] > 0; - sorted_labels[i].score = scores[i]; + bool label = data2.getFloat64(current_offset + i) > 0; + sorted_labels[i].score = data1.getFloat64(current_offset + i); sorted_labels[i].label = label; } @@ -129,18 +121,84 @@ public: /// Then divide the area to the area of rectangle. if (count_positive == 0 || count_positive == size) - return std::numeric_limits::quiet_NaN(); + return std::numeric_limits::quiet_NaN(); - return static_cast(area) / count_positive / (size - count_positive); + return static_cast(area) / count_positive / (size - count_positive); + } + + static void vector( + const IColumn & data1, + const IColumn & data2, + const ColumnArray::Offsets & offsets, + PaddedPODArray & result) + { + size_t size = offsets.size(); + result.resize(size); + + ColumnArray::Offset current_offset = 0; + for (size_t i = 0; i < size; ++i) + { + auto next_offset = offsets[i]; + result[i] = apply(data1, data2, current_offset, next_offset); + current_offset = next_offset; + } + } + +public: + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 2; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return false; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + // Basic type check + std::vector nested_types(2, nullptr); + for (size_t i = 0; i < getNumberOfArguments(); ++i) + { + const DataTypeArray * array_type = checkAndGetDataType(arguments[i].get()); + if (!array_type) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "All arguments for function {} must be an array.", getName()); + + const auto & nested_type = array_type->getNestedType(); + if (!isNativeNumber(nested_type) && !isEnum(nested_type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{} cannot process values of type {}", + getName(), nested_type->getName()); + nested_types[i] = nested_type; + } + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override + { + ColumnPtr col1 = arguments[0].column->convertToFullColumnIfConst(); + ColumnPtr col2 = arguments[1].column->convertToFullColumnIfConst(); + + const ColumnArray * col_array1 = checkAndGetColumn(col1.get()); + const ColumnArray * col_array2 = checkAndGetColumn(col2.get()); + if (!col_array1 || !col_array2) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of first argument of function {}", arguments[0].column->getName(), getName()); + + if (!col_array1->hasEqualOffsets(*col_array2)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Array arguments for function {} must have equal sizes", getName()); + + auto col_res = ColumnVector::create(); + + vector( + col_array1->getData(), + col_array2->getData(), + col_array1->getOffsets(), + col_res->getData()); + + return col_res; } }; -/// auc(array_score, array_label) - Calculate AUC with array of score and label -using FunctionArrayAUC = FunctionArrayScalarProduct; - REGISTER_FUNCTION(ArrayAUC) { factory.registerFunction(); } + } From da01d756f9c17d27b35bfcb3381b479b5c1e5a7f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 9 Aug 2023 20:20:25 +0200 Subject: [PATCH 02/16] Addressed review comments --- src/Functions/array/arrayAUC.cpp | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/Functions/array/arrayAUC.cpp b/src/Functions/array/arrayAUC.cpp index a0aca7a563b..caf929ba038 100644 --- a/src/Functions/array/arrayAUC.cpp +++ b/src/Functions/array/arrayAUC.cpp @@ -82,8 +82,8 @@ public: private: static Float64 apply( - const IColumn & data1, - const IColumn & data2, + const IColumn & scores, + const IColumn & labels, ColumnArray::Offset current_offset, ColumnArray::Offset next_offset) { @@ -98,8 +98,8 @@ private: for (size_t i = 0; i < size; ++i) { - bool label = data2.getFloat64(current_offset + i) > 0; - sorted_labels[i].score = data1.getFloat64(current_offset + i); + bool label = labels.getFloat64(current_offset + i) > 0; + sorted_labels[i].score = scores.getFloat64(current_offset + i); sorted_labels[i].label = label; } @@ -151,8 +151,6 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - // Basic type check - std::vector nested_types(2, nullptr); for (size_t i = 0; i < getNumberOfArguments(); ++i) { const DataTypeArray * array_type = checkAndGetDataType(arguments[i].get()); @@ -163,7 +161,6 @@ public: if (!isNativeNumber(nested_type) && !isEnum(nested_type)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{} cannot process values of type {}", getName(), nested_type->getName()); - nested_types[i] = nested_type; } return std::make_shared(); @@ -175,11 +172,15 @@ public: ColumnPtr col2 = arguments[1].column->convertToFullColumnIfConst(); const ColumnArray * col_array1 = checkAndGetColumn(col1.get()); - const ColumnArray * col_array2 = checkAndGetColumn(col2.get()); - if (!col_array1 || !col_array2) + if (!col_array1) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", arguments[0].column->getName(), getName()); + const ColumnArray * col_array2 = checkAndGetColumn(col2.get()); + if (!col_array2) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of second argument of function {}", arguments[1].column->getName(), getName()); + if (!col_array1->hasEqualOffsets(*col_array2)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Array arguments for function {} must have equal sizes", getName()); From 6a3267ad3770187ed5d253e952c02f282cd1cb3f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 9 Aug 2023 20:21:51 +0200 Subject: [PATCH 03/16] A fix for clang-17 --- src/Storages/StorageFile.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index e3908c75a58..5c03540de9a 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -396,9 +396,9 @@ std::unique_ptr createReadBuffer( throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, "Cannot compile regex from glob ({}): {}", current_path, matcher->error()); - return reader->readFile([matcher = std::move(matcher)](const std::string & path) + return reader->readFile([my_matcher = std::move(matcher)](const std::string & path) { - return re2::RE2::FullMatch(path, *matcher); + return re2::RE2::FullMatch(path, *my_matcher); }); } else From 5cdeacf4cf61c0ac228eb63b7178392a6436d41c Mon Sep 17 00:00:00 2001 From: Ruslan Mardugalliamov Date: Sun, 6 Aug 2023 15:33:36 -0400 Subject: [PATCH 04/16] Add hints for HTTP handlers Add hints to HTTP handlers to help users avoid misspellings. For example, if a user mistakenly writes `/dashboad` instead of `/dashboard`, they will now get a hint that /dashboard is the correct handler. This change will improve the user experience by making it easier for users to find the correct handlers. #47662 --- programs/keeper/CMakeLists.txt | 1 + src/Server/HTTPHandlerFactory.cpp | 4 ++++ src/Server/HTTPPathHints.cpp | 16 ++++++++++++++ src/Server/HTTPPathHints.h | 22 +++++++++++++++++++ src/Server/HTTPRequestHandlerFactoryMain.cpp | 2 +- src/Server/HTTPRequestHandlerFactoryMain.h | 4 ++++ src/Server/NotFoundHandler.cpp | 3 ++- src/Server/NotFoundHandler.h | 3 +++ ...ggest_http_page_in_error_message.reference | 4 ++++ ...2842_suggest_http_page_in_error_message.sh | 12 ++++++++++ 10 files changed, 69 insertions(+), 2 deletions(-) create mode 100644 src/Server/HTTPPathHints.cpp create mode 100644 src/Server/HTTPPathHints.h create mode 100644 tests/queries/0_stateless/02842_suggest_http_page_in_error_message.reference create mode 100755 tests/queries/0_stateless/02842_suggest_http_page_in_error_message.sh diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 43a8d84b513..a43a312ba54 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -57,6 +57,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/IO/ReadBuffer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPPathHints.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/KeeperTCPHandler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/TCPServer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/NotFoundHandler.cpp diff --git a/src/Server/HTTPHandlerFactory.cpp b/src/Server/HTTPHandlerFactory.cpp index 78e374ee9e0..1c911034da1 100644 --- a/src/Server/HTTPHandlerFactory.cpp +++ b/src/Server/HTTPHandlerFactory.cpp @@ -132,21 +132,25 @@ void addCommonDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IS auto ping_handler = std::make_shared>(server, ping_response_expression); ping_handler->attachStrictPath("/ping"); ping_handler->allowGetAndHeadRequest(); + factory.addPathToHints("/ping"); factory.addHandler(ping_handler); auto replicas_status_handler = std::make_shared>(server); replicas_status_handler->attachNonStrictPath("/replicas_status"); replicas_status_handler->allowGetAndHeadRequest(); + factory.addPathToHints("/replicas_status"); factory.addHandler(replicas_status_handler); auto play_handler = std::make_shared>(server); play_handler->attachNonStrictPath("/play"); play_handler->allowGetAndHeadRequest(); + factory.addPathToHints("/play"); factory.addHandler(play_handler); auto dashboard_handler = std::make_shared>(server); dashboard_handler->attachNonStrictPath("/dashboard"); dashboard_handler->allowGetAndHeadRequest(); + factory.addPathToHints("/dashboard"); factory.addHandler(dashboard_handler); auto js_handler = std::make_shared>(server); diff --git a/src/Server/HTTPPathHints.cpp b/src/Server/HTTPPathHints.cpp new file mode 100644 index 00000000000..51ef3eabffe --- /dev/null +++ b/src/Server/HTTPPathHints.cpp @@ -0,0 +1,16 @@ +#include + +namespace DB +{ + +void HTTPPathHints::add(const String & http_path) +{ + http_paths.push_back(http_path); +} + +std::vector HTTPPathHints::getAllRegisteredNames() const +{ + return http_paths; +} + +} diff --git a/src/Server/HTTPPathHints.h b/src/Server/HTTPPathHints.h new file mode 100644 index 00000000000..708816ebf07 --- /dev/null +++ b/src/Server/HTTPPathHints.h @@ -0,0 +1,22 @@ +#pragma once + +#include + +#include + +namespace DB +{ + +class HTTPPathHints : public IHints<1, HTTPPathHints> +{ +public: + std::vector getAllRegisteredNames() const override; + void add(const String & http_path); + +private: + std::vector http_paths; +}; + +using HTTPPathHintsPtr = std::shared_ptr; + +} diff --git a/src/Server/HTTPRequestHandlerFactoryMain.cpp b/src/Server/HTTPRequestHandlerFactoryMain.cpp index 61a2909d30f..5481bcd5083 100644 --- a/src/Server/HTTPRequestHandlerFactoryMain.cpp +++ b/src/Server/HTTPRequestHandlerFactoryMain.cpp @@ -29,7 +29,7 @@ std::unique_ptr HTTPRequestHandlerFactoryMain::createRequest || request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD || request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST) { - return std::unique_ptr(new NotFoundHandler); + return std::unique_ptr(new NotFoundHandler(hints.getHints(request.getURI()))); } return nullptr; diff --git a/src/Server/HTTPRequestHandlerFactoryMain.h b/src/Server/HTTPRequestHandlerFactoryMain.h index b0e57bd6b3b..07b278d831c 100644 --- a/src/Server/HTTPRequestHandlerFactoryMain.h +++ b/src/Server/HTTPRequestHandlerFactoryMain.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include @@ -15,11 +16,14 @@ public: void addHandler(HTTPRequestHandlerFactoryPtr child_factory) { child_factories.emplace_back(child_factory); } + void addPathToHints(const std::string & http_path) { hints.add(http_path); } + std::unique_ptr createRequestHandler(const HTTPServerRequest & request) override; private: Poco::Logger * log; std::string name; + HTTPPathHints hints; std::vector child_factories; }; diff --git a/src/Server/NotFoundHandler.cpp b/src/Server/NotFoundHandler.cpp index 3181708b9b7..5b1db508551 100644 --- a/src/Server/NotFoundHandler.cpp +++ b/src/Server/NotFoundHandler.cpp @@ -10,7 +10,8 @@ void NotFoundHandler::handleRequest(HTTPServerRequest & request, HTTPServerRespo try { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_NOT_FOUND); - *response.send() << "There is no handle " << request.getURI() << "\n\n" + *response.send() << "There is no handle " << request.getURI() + << (!hints.empty() ? fmt::format(". Maybe you meant {}.", hints.front()) : "") << "\n\n" << "Use / or /ping for health checks.\n" << "Or /replicas_status for more sophisticated health checks.\n\n" << "Send queries from your program with POST method or GET /?query=...\n\n" diff --git a/src/Server/NotFoundHandler.h b/src/Server/NotFoundHandler.h index 749ac388c4d..1cbfcd57f8f 100644 --- a/src/Server/NotFoundHandler.h +++ b/src/Server/NotFoundHandler.h @@ -9,7 +9,10 @@ namespace DB class NotFoundHandler : public HTTPRequestHandler { public: + NotFoundHandler(std::vector hints_) : hints(std::move(hints_)) {} void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; +private: + std::vector hints; }; } diff --git a/tests/queries/0_stateless/02842_suggest_http_page_in_error_message.reference b/tests/queries/0_stateless/02842_suggest_http_page_in_error_message.reference new file mode 100644 index 00000000000..0025187be30 --- /dev/null +++ b/tests/queries/0_stateless/02842_suggest_http_page_in_error_message.reference @@ -0,0 +1,4 @@ +There is no handle /sashboards. Maybe you meant /dashboard +There is no handle /sashboard. Maybe you meant /dashboard +There is no handle /sashboarb. Maybe you meant /dashboard +There is no handle /sashboaxb. Maybe you meant /dashboard diff --git a/tests/queries/0_stateless/02842_suggest_http_page_in_error_message.sh b/tests/queries/0_stateless/02842_suggest_http_page_in_error_message.sh new file mode 100755 index 00000000000..cf69c742777 --- /dev/null +++ b/tests/queries/0_stateless/02842_suggest_http_page_in_error_message.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +export CLICKHOUSE_URL="${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/" + +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}sashboards" | grep -o ".* Maybe you meant /dashboard" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}sashboard" | grep -o ".* Maybe you meant /dashboard" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}sashboarb" | grep -o ".* Maybe you meant /dashboard" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}sashboaxb" | grep -o ".* Maybe you meant /dashboard" From 1377d86ed9d6aaf17878b8c7d2960a0053b1111d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 10 Aug 2023 17:01:09 +0300 Subject: [PATCH 05/16] Update src/Functions/array/arrayAUC.cpp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: János Benjamin Antal --- src/Functions/array/arrayAUC.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/array/arrayAUC.cpp b/src/Functions/array/arrayAUC.cpp index caf929ba038..b7bd7dcc0ad 100644 --- a/src/Functions/array/arrayAUC.cpp +++ b/src/Functions/array/arrayAUC.cpp @@ -127,8 +127,8 @@ private: } static void vector( - const IColumn & data1, - const IColumn & data2, + const IColumn & scores, + const IColumn & labels, const ColumnArray::Offsets & offsets, PaddedPODArray & result) { From 3aca2408548bc149f933379506250e49238a24de Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 10 Aug 2023 20:59:33 +0200 Subject: [PATCH 06/16] Change the default of max_concurrent_queries from 100 to 1000 --- programs/server/config.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/server/config.xml b/programs/server/config.xml index 14b8954fc39..85cdda63558 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -317,7 +317,7 @@ 0 - 100 + 1000