From 4bc26fe45fa78fbfd1394d22f73a86cf1f83e586 Mon Sep 17 00:00:00 2001 From: Bhavna Jindal Date: Wed, 31 Jan 2024 10:35:08 -0800 Subject: [PATCH] Added support for custom percentiles and K --- .../functions/time-series-functions.md | 41 ++-- src/Functions/seriesOutliersTukey.cpp | 184 +++++++++++------- .../02813_seriesOutliersTukey.reference | 20 +- .../0_stateless/02813_seriesOutliersTukey.sql | 26 ++- .../aspell-ignore/en/aspell-dict.txt | 3 + 5 files changed, 168 insertions(+), 106 deletions(-) diff --git a/docs/en/sql-reference/functions/time-series-functions.md b/docs/en/sql-reference/functions/time-series-functions.md index 9eea5a8eb1f..bd50ef556f7 100644 --- a/docs/en/sql-reference/functions/time-series-functions.md +++ b/docs/en/sql-reference/functions/time-series-functions.md @@ -6,7 +6,7 @@ sidebar_label: Time Series # Time Series Functions -Below functions are used for time series analysis. +Below functions are used for series data analysis. ## seriesOutliersDetectTukey @@ -16,16 +16,27 @@ Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.o ``` sql seriesOutliersDetectTukey(series); +seriesOutliersDetectTukey(series, kind, min_percentile, max_percentile, K); ``` **Arguments** -- `series` - An array of numeric values +- `series` - An array of numeric values. +- `kind` - Kind of algorithm to use. Supported values are 'tukey' for standard tukey and 'ctukey' for custom tukey algorithm. The default is 'ctukey'. +- `min_percentile` - The minimum percentile to be used to calculate inter-quantile range(IQR). The value must be in range [2,98]. The default is 10. This value is only supported for 'ctukey'. +- `max_percentile` - The maximum percentile to be used to calculate inter-quantile range(IQR). The value must be in range [2,98]. The default is 90. This value is only supported for 'ctukey'. +- `K` - Non-negative constant value to detect mild or stronger outliers. The default value is 1.5 + +At least four data points are required in `series` to detect outliers. + +Default quantile range: +- `tukey` - 25%/75% +- `ctukey` - 10%/90% **Returned value** -- Returns an array of the same length where each value represents a modified Z-score of possible anomaly of corresponding element in the series. -- A value greater than 3 or lesser than -3 indicates a possible anomaly. +- Returns an array of the same length where each value represents score of possible anomaly of corresponding element in the series. +- A non-zero score indicates a possible anomaly. Type: [Array](../../sql-reference/data-types/array.md). @@ -34,34 +45,34 @@ Type: [Array](../../sql-reference/data-types/array.md). Query: ``` sql -seriesOutliersDetectTukey([-3,2.4,15,3.9,5,6,4.5,5.2,3,4,5,16,7,5,5,4]) AS print_0; +SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6]) AS print_0; ``` Result: ``` text -┌───────────print_0──────────────────────────────────────────────────────────────────┐ -│[-2.7121212121212137,0,4.196969696969699,0,0,0,0,0,0,0,0,4.803030303030305,0,0,0,0] │ -└────────────────────────────────────────────────────────────────────────────────────┘ +┌───────────print_0───────────────────┐ +│[0,0,0,0,0,0,0,0,0,10.5,0,0,0,0,0,0] │ +└─────────────────────────────────────┘ ``` Query: ``` sql -seriesOutliersDetectTukey(arrayMap(x -> sin(x / 10), range(30))) AS print_0; +SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 'ctukey', 20, 80, 1.5) AS print_0; ``` Result: ``` text -┌───────────print_0────────────────────────────────────────────┐ -│[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] │ -└──────────────────────────────────────────────────────────────┘ +┌─print_0────────────────────────────┐ +│ [0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0] │ +└────────────────────────────────────┘ ``` ## seriesPeriodDetectFFT -Finds the period of the given time series data using FFT +Finds the period of the given series data data using FFT FFT - [Fast Fourier transform](https://en.wikipedia.org/wiki/Fast_Fourier_transform) **Syntax** @@ -76,7 +87,7 @@ seriesPeriodDetectFFT(series); **Returned value** -- A real value equal to the period of time series +- A real value equal to the period of series data - Returns NAN when number of data points are less than four. Type: [Float64](../../sql-reference/data-types/float.md). @@ -111,7 +122,7 @@ Result: ## seriesDecomposeSTL -Decomposes a time series using STL [(Seasonal-Trend Decomposition Procedure Based on Loess)](https://www.wessa.net/download/stl.pdf) into a season, a trend and a residual component. +Decomposes a series data using STL [(Seasonal-Trend Decomposition Procedure Based on Loess)](https://www.wessa.net/download/stl.pdf) into a season, a trend and a residual component. **Syntax** diff --git a/src/Functions/seriesOutliersTukey.cpp b/src/Functions/seriesOutliersTukey.cpp index 72a16949605..4c2c1ccd882 100644 --- a/src/Functions/seriesOutliersTukey.cpp +++ b/src/Functions/seriesOutliersTukey.cpp @@ -1,4 +1,3 @@ -#include #include #include #include @@ -7,6 +6,7 @@ #include #include #include +#include namespace DB { @@ -16,7 +16,7 @@ extern const int BAD_ARGUMENTS; extern const int ILLEGAL_COLUMN; } -//Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences) +///Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences) class FunctionSeriesOutliersDetectTukey : public IFunction { public: @@ -40,28 +40,28 @@ public: FunctionArgumentDescriptors optional_args{ {"kind", &isString, isColumnConst, "const String"}, {"min_percentile", &isNativeNumber, isColumnConst, "Number"}, - {"max_percentile", &isNativeNumber, isColumnConst, "Number"} - }; + {"max_percentile", &isNativeNumber, isColumnConst, "Number"}, + {"k", &isNativeNumber, isColumnConst, "Number"}}; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); return std::make_shared(std::make_shared()); } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1,2,3}; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2, 3, 4}; } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override { - ColumnPtr array_ptr = arguments[0].column; - const ColumnArray * array = checkAndGetColumn(array_ptr.get()); + ColumnPtr col = arguments[0].column; + const ColumnArray * col_arr = checkAndGetColumn(col.get()); - const IColumn & src_data = array->getData(); - const ColumnArray::Offsets & src_offsets = array->getOffsets(); + const IColumn & arr_data = col_arr->getData(); + const ColumnArray::Offsets & arr_offsets = col_arr->getOffsets(); - Float64 min_percentile = 0.25; - Float64 max_percentile = 0.75; + Float64 min_percentile = 0.10; //default 10th percentile + Float64 max_percentile = 0.90; //default 90th percentile - if(arguments.size() > 1) + if (arguments.size() > 1) { //const IColumn * arg_column = arguments[1].column.get(); const ColumnConst * arg_string = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get()); @@ -70,44 +70,62 @@ public: throw Exception(ErrorCodes::ILLEGAL_COLUMN, "The second argument of function {} must be constant String", getName()); String kind = arg_string->getValue(); - if(kind == "ctukey"){ - min_percentile = 0.10; //default 10th percentile - max_percentile = 0.90; //default 90th percentile - - if(arguments.size() > 2) + if (kind == "ctukey") + { + if (arguments.size() > 2) { Float64 p_min = arguments[2].column->getFloat64(0); - if(p_min >= 2.0 && p_min <= 98.0) - min_percentile = p_min/100; + if (p_min >= 2.0 && p_min <= 98.0) + min_percentile = p_min / 100; else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The third argumet of function {} must be in range [2.0, 98.0]", getName()); + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "The third argumet of function {} must be in range [2.0, 98.0]", getName()); } - if(arguments.size() == 4) + if (arguments.size() > 3) { Float64 p_max = arguments[3].column->getFloat64(0); - if(p_max >= 2.0 && p_max <= 98.0 && p_max > min_percentile*100) - max_percentile = p_max/100; + if (p_max >= 2.0 && p_max <= 98.0 && p_max > min_percentile * 100) + max_percentile = p_max / 100; else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The fourth argumet of function {} must be in range [2.0, 98.0]", getName()); + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "The fourth argumet of function {} must be in range [2.0, 98.0]", getName()); } - } - else - { - if(kind != "tukey") - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The second argument of function {} can only be 'tukey' or 'ctukey'.", getName()); } + else if (kind == "tukey") + { + min_percentile = 0.25; + max_percentile = 0.75; + } + else + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "The second argument of function {} can only be 'tukey' or 'ctukey'.", getName()); } - ColumnPtr res; - - if (executeNumber(src_data, src_offsets, min_percentile, max_percentile, res) || executeNumber(src_data, src_offsets,min_percentile, max_percentile, res) - || executeNumber(src_data, src_offsets, min_percentile, max_percentile,res) || executeNumber(src_data, src_offsets,min_percentile, max_percentile, res) - || executeNumber(src_data, src_offsets,min_percentile, max_percentile, res) || executeNumber(src_data, src_offsets,min_percentile, max_percentile, res) - || executeNumber(src_data, src_offsets,min_percentile, max_percentile, res) || executeNumber(src_data, src_offsets, min_percentile, max_percentile,res) - || executeNumber(src_data, src_offsets,min_percentile, max_percentile, res) || executeNumber(src_data, src_offsets,min_percentile, max_percentile, res)) + Float64 K = 1.50; + if (arguments.size() == 5) { - return res; + auto k_val = arguments[4].column->getFloat64(0); + if (k_val >= 0.0) + K = k_val; + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The fifth argumet of function {} must be a positive number", getName()); + } + + ColumnPtr col_res; + + if (executeNumber(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res) + || executeNumber(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res) + || executeNumber(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res) + || executeNumber(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res) + || executeNumber(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res) + || executeNumber(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res) + || executeNumber(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res) + || executeNumber(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res) + || executeNumber(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res) + || executeNumber(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)) + { + return col_res; } else throw Exception( @@ -117,14 +135,17 @@ public: getName()); } +private: template - bool executeNumber(const IColumn & src_data, - const ColumnArray::Offsets & src_offsets, - Float64 min_percentile, - Float64 max_percentile, - ColumnPtr & res_ptr) const + bool executeNumber( + const IColumn & arr_data, + const ColumnArray::Offsets & arr_offsets, + Float64 min_percentile, + Float64 max_percentile, + Float64 K, + ColumnPtr & res_ptr) const { - const ColumnVector * src_data_concrete = checkAndGetColumn>(&src_data); + const ColumnVector * src_data_concrete = checkAndGetColumn>(&arr_data); if (!src_data_concrete) return false; @@ -136,53 +157,57 @@ public: ColumnArray::ColumnOffsets::MutablePtr res_offsets = ColumnArray::ColumnOffsets::create(); auto & res_offsets_data = res_offsets->getData(); + std::vector src_sorted; + ColumnArray::Offset prev_src_offset = 0; - for (auto curr_src_offset : src_offsets) + for (auto src_offset : arr_offsets) { - chassert(prev_src_offset <= curr_src_offset); - size_t len = curr_src_offset - prev_src_offset; + chassert(prev_src_offset <= src_offset); + size_t len = src_offset - prev_src_offset; if (len < 4) throw Exception(ErrorCodes::BAD_ARGUMENTS, "At least four data points are needed for function {}", getName()); - std::vector src_sorted(src_vec.begin() + prev_src_offset, src_vec.begin() + curr_src_offset); + src_sorted.assign(src_vec.begin() + prev_src_offset, src_vec.begin() + src_offset); std::sort(src_sorted.begin(), src_sorted.end()); Float64 q1, q2; auto p1 = len * min_percentile; - if(p1 == static_cast(p1)){ - size_t index = static_cast(p1)-1; - q1 = (src_sorted[index] + src_sorted[index+1])/2; + if (p1 == static_cast(p1)) + { + size_t index = static_cast(p1) - 1; + q1 = (src_sorted[index] + src_sorted[index + 1]) / 2; } else - { - size_t index = static_cast(std::ceil(p1))-1; + { + size_t index = static_cast(std::ceil(p1)) - 1; q1 = src_sorted[index]; } auto p2 = len * max_percentile; - if(p2 == static_cast(p2)){ - size_t index = static_cast(p2)-1; - q2 = (src_sorted[index] + src_sorted[index+1])/2; + if (p2 == static_cast(p2)) + { + size_t index = static_cast(p2) - 1; + q2 = (src_sorted[index] + src_sorted[index + 1]) / 2; } else - { - size_t index = static_cast(std::ceil(p2))-1; + { + size_t index = static_cast(std::ceil(p2)) - 1; q2 = src_sorted[index]; } - Float64 iqr = q2 - q1; + Float64 iqr = q2 - q1; /// interquantile range - Float64 lower_fence = q1 - 1.5 * iqr; - Float64 upper_fence = q2 + 1.5 * iqr; + Float64 lower_fence = q1 - K * iqr; + Float64 upper_fence = q2 + K * iqr; - for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; ++j) + for (ColumnArray::Offset j = prev_src_offset; j < src_offset; ++j) { - auto score = std::min((src_vec[j] - lower_fence) / iqr, 0.0) + std::max((src_vec[j] - upper_fence) / iqr, 0.0); + auto score = std::min((src_vec[j] - lower_fence), 0.0) + std::max((src_vec[j] - upper_fence), 0.0); outlier_data.push_back(score); } res_offsets_data.push_back(outlier_data.size()); - prev_src_offset = curr_src_offset; + prev_src_offset = src_offset; } res_ptr = ColumnArray::create(std::move(outliers), std::move(res_offsets)); @@ -196,20 +221,31 @@ REGISTER_FUNCTION(SeriesOutliersDetectTukey) .description = R"( Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences). +Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences). + **Syntax** ``` sql seriesOutliersDetectTukey(series); +seriesOutliersDetectTukey(series, kind, min_percentile, max_percentile, K); ``` **Arguments** -- `series` - An array of numeric values +- `series` - An array of numeric values. +- `kind` - Kind of algorithm to use. Supported values are 'tukey' for standard tukey and 'ctukey' for custom tukey algorithm. The default is 'ctukey'. +- `min_percentile` - The minimum percentile to be used to calculate inter-quantile range(IQR). The value must be in range [2,98]. The default is 10. This value is only supported for 'ctukey'. +- `max_percentile` - The maximum percentile to be used to calculate inter-quantile range(IQR). The value must be in range [2,98]. The default is 90. This value is only supported for 'ctukey'. +- `K` - Non-negative constant value to detect mild or stronger outliers. The default value is 1.5 + +Default quantile range: +- `tukey` - 25%/75% +- `ctukey` - 10%/90% **Returned value** -- Returns an array of the same length where each value represents a modified Z-score of possible anomaly of corresponding element in the series. -- A value greater than 3 or lesser than -3 indicates a possible anomaly. +- Returns an array of the same length where each value represents score of possible anomaly of corresponding element in the series. +- A non-zero score indicates a possible anomaly. Type: [Array](../../sql-reference/data-types/array.md). @@ -218,29 +254,29 @@ Type: [Array](../../sql-reference/data-types/array.md). Query: ``` sql -seriesOutliersDetectTukey([-3,2.4,15,3.9,5,6,4.5,5.2,3,4,5,16,7,5,5,4]) AS print_0; +SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6]) AS print_0; ``` Result: ``` text -┌───────────print_0──────────────────────────────────────────────────────────────────┐ -│[-2.7121212121212137,0,4.196969696969699,0,0,0,0,0,0,0,0,4.803030303030305,0,0,0,0] │ -└────────────────────────────────────────────────────────────────────────────────────┘ +┌───────────print_0───────────────────┐ +│[0,0,0,0,0,0,0,0,0,10.5,0,0,0,0,0,0] │ +└─────────────────────────────────────┘ ``` Query: ``` sql -seriesOutliersDetectTukey(arrayMap(x -> sin(x / 10), range(30))) AS print_0; +SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 'ctukey', 20, 80, 1.5) AS print_0; ``` Result: ``` text -┌───────────print_0────────────────────────────────────────────┐ -│[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] │ -└──────────────────────────────────────────────────────────────┘ +┌─print_0────────────────────────────┐ +│ [0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0] │ +└────────────────────────────────────┘ ```)", .categories{"Time series analysis"}}); } diff --git a/tests/queries/0_stateless/02813_seriesOutliersTukey.reference b/tests/queries/0_stateless/02813_seriesOutliersTukey.reference index 990c8c11e9e..bdcde0419a4 100644 --- a/tests/queries/0_stateless/02813_seriesOutliersTukey.reference +++ b/tests/queries/0_stateless/02813_seriesOutliersTukey.reference @@ -1,12 +1,14 @@ -[-2.7121212121212137,0,4.196969696969699,0,0,0,0,0,0,0,0,4.803030303030305,0,0,0,0] -[0,0,0,0,0,0,0,0,0,5.228971962616823,0,0,0,0,0,0] -[-2.7121212121212137,0,4.196969696969699,0,0,0,0,0,0,0,0,4.803030303030305,0,0,0,0] -[0,0,0,0,0,0,0,0,0,5.228971962616823,0,0,0,0,0,0] -[0,0,0,0,0,0,0,0,0,4.706896551724138,0,0,0,0,0,0] -[-0.9615384615384613,0,1.9615384615384612,0,0,0,0,0,0.8076923076923075,19.26923076923077,0.8076923076923075,0,0,0,0,0,0,0] -[0,0,0,0,0,0,0,0,0,4.706896551724138,0,0,0,0,0,0] -[0,0,0,0,0,0,0,0,0,0.8076923076923077,0,0,0,0,0,0] -[0,0,0,0,0,0,0,0,0,4.706896551724138,0,0,0,0,0,0] +[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] +[0,0,0,0,0,0,0,0,0,11.100000000000001,0,0,0,0,0,0] +[-4.475000000000001,0,6.925000000000001,0,0,0,0,0,0,0,0,7.925000000000001,0,0,0,0] +[0,0,0,0,0,0,0,0,0,27.975,0,0,0,0,0,0] +[0,0,0,0,0,0,0,0,0,10.5,0,0,0,0,0,0] +[0,0,0,0,0,0,0,0,0,26.1,0,0,0,0,0,0,0,0] +[0,0,0,0,0,0,0,0,0,27.3,0,0,0,0,0,0] +[0,0,0,0,0,0,0,0,0,10.5,0,0,0,0,0,0] +[0,0,0,0,0,0,0,0,0,27.3,0,0,0,0,0,0] [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] [0,0,0,0] [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] +[0,0,0,0,0,0,0,0,0,27,0,0,0,0,0,0] +[0,0,0,0,0,0,0,0,0,18,0,0,0,0,0,0] diff --git a/tests/queries/0_stateless/02813_seriesOutliersTukey.sql b/tests/queries/0_stateless/02813_seriesOutliersTukey.sql index f8debc7b6db..7efe4903249 100644 --- a/tests/queries/0_stateless/02813_seriesOutliersTukey.sql +++ b/tests/queries/0_stateless/02813_seriesOutliersTukey.sql @@ -1,19 +1,29 @@ DROP TABLE IF EXISTS tb1; CREATE TABLE tb1 (n UInt32, a Array(Float64)) engine=Memory; -INSERT INTO tb1 VALUES (1, [-3,2.4,15,3.9,5,6,4.5,5.2,3,4,5,16,7,5,5,4]), (2, [-3,2.4,15,3.9,5,6,4.5,5.2,12,45,12,3.4,3,4,5,6]); +INSERT INTO tb1 VALUES (1, [-3,2.40,15,3.90,5,6,4.50,5.20,3,4,5,16,7,5,5,4]), (2, [-3,2.40,15,3.90,5,6,4.50,5.20,12,45,12,3.40,3,4,5,6]); +-- non-const inputs SELECT seriesOutliersDetectTukey(a) FROM tb1 ORDER BY n; SELECT seriesOutliersDetectTukey(a,'ctukey', 25,75) FROM tb1 ORDER BY n; DROP TABLE IF EXISTS tb1; -SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.5, 5, 12, 45, 12, 3.4, 3, 4, 5, 6]); -SELECT seriesOutliersDetectTukey([-3, 2.4, 15, 3.9, 5, 6, 4.5, 5.2, 12, 60, 12, 3.4, 3, 4, 5, 6, 3.4, 2.7]); -SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.5, 5, 12, 45, 12, 3.4, 3, 4, 5, 6], 'ctukey', 25, 75); -SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.5, 5, 12, 45, 12, 3.4, 3, 4, 5, 6], 'ctukey', 10, 90); -SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.5, 5, 12, 45, 12, 3.4, 3, 4, 5, 6], 'tukey', 10, 90); -SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.5, 5, 12, 45, 12, 3.4, 3, 4, 5, 6], 'ctukey', 2, 98) + +-- const inputs +SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6]); +SELECT seriesOutliersDetectTukey([-3, 2.40, 15, 3.90, 5, 6, 4.50, 5.20, 12, 60, 12, 3.40, 3, 4, 5, 6, 3.40, 2.7]); + +-- const inputs with optional arguments +SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 'ctukey', 25, 75); +SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 'ctukey', 10, 90); +SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 'tukey', 10, 90); +SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 'ctukey', 2, 98); SELECT seriesOutliersDetectTukey([-3, 2, 15, 3], 'ctukey', 2, 98); -SELECT seriesOutliersDetectTukey(arrayMap(x -> sin(x / 10), range(30))); +SELECT seriesOutliersDetectTukey(arrayMap(x -> sin(x / 10), range(30)), 'tukey'); +SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6], 'tukey', 25, 75, 1.5); +SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6], 'tukey', 25, 75, 3); + +-- negative tests +SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6], 'tukey', 25, 75, -1); -- { serverError BAD_ARGUMENTS} SELECT seriesOutliersDetectTukey([-3, 2, 15, 3], 'xyz', 33, 53); -- { serverError BAD_ARGUMENTS} SELECT seriesOutliersDetectTukey([-3, 2.4, 15, NULL]); -- { serverError ILLEGAL_COLUMN} SELECT seriesOutliersDetectTukey([]); -- { serverError ILLEGAL_COLUMN} diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 64327aba2d1..3c8ec34de8d 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1332,6 +1332,7 @@ cryptographic csv csvwithnames csvwithnamesandtypes +ctukey curdate currentDatabase currentProfiles @@ -2271,6 +2272,7 @@ seektable sequenceCount sequenceMatch sequenceNextNode +seriesOutliersDetectTukey seriesDecomposeSTL seriesPeriodDetectFFT serverTimeZone @@ -2564,6 +2566,7 @@ tryPunycodeDecode tskv tsv tui +tukey tumbleEnd tumbleStart tupleConcat