diff --git a/docs/en/sql-reference/functions/time-series-functions.md b/docs/en/sql-reference/functions/time-series-functions.md index ce36c89f473..e80a3fa9860 100644 --- a/docs/en/sql-reference/functions/time-series-functions.md +++ b/docs/en/sql-reference/functions/time-series-functions.md @@ -22,8 +22,8 @@ seriesOutliersDetectTukey(series, min_percentile, max_percentile, K); **Arguments** - `series` - An array of numeric values. -- `min_percentile` - The minimum percentile to be used to calculate inter-quantile range [(IQR)](https://en.wikipedia.org/wiki/Interquartile_range). The value must be in range [2,98]. The default is 25. -- `max_percentile` - The maximum percentile to be used to calculate inter-quantile range (IQR). The value must be in range [2,98]. The default is 75. +- `min_percentile` - The minimum percentile to be used to calculate inter-quantile range [(IQR)](https://en.wikipedia.org/wiki/Interquartile_range). The value must be in range [0.02,0.98]. The default is 0.25. +- `max_percentile` - The maximum percentile to be used to calculate inter-quantile range (IQR). The value must be in range [0.02,0.98]. The default is 0.75. - `K` - Non-negative constant value to detect mild or stronger outliers. The default value is 1.5. At least four data points are required in `series` to detect outliers. @@ -53,7 +53,7 @@ Result: Query: ``` sql -SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 20, 80, 1.5) AS print_0; +SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 0.2, 0.8, 1.5) AS print_0; ``` Result: diff --git a/src/Functions/seriesOutliersDetectTukey.cpp b/src/Functions/seriesOutliersDetectTukey.cpp index 3d28b16364c..5bc8edf3a54 100644 --- a/src/Functions/seriesOutliersDetectTukey.cpp +++ b/src/Functions/seriesOutliersDetectTukey.cpp @@ -24,9 +24,6 @@ class FunctionSeriesOutliersDetectTukey : public IFunction public: static constexpr auto name = "seriesOutliersDetectTukey"; - static constexpr Float64 min_quartile = 0.02; - static constexpr Float64 max_quartile = 0.98; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } std::string getName() const override { return name; } @@ -50,8 +47,8 @@ public: FunctionArgumentDescriptors mandatory_args{{"time_series", &isArray, nullptr, "Array"}}; FunctionArgumentDescriptors optional_args{ - {"min_percentile", &isNativeNumber, isColumnConst, "Number"}, - {"max_percentile", &isNativeNumber, isColumnConst, "Number"}, + {"min_percentile", &isFloat, isColumnConst, "Number"}, + {"max_percentile", &isFloat, isColumnConst, "Number"}, {"k", &isNativeNumber, isColumnConst, "Number"}}; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); @@ -79,23 +76,20 @@ public: if (arguments.size() > 1) { - Float64 p_min = arguments[1].column->getFloat64(0); - if (isnan(p_min) || !isFinite(p_min) || p_min < min_quartile|| p_min > max_quartile) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The second argument of function {} must be in range [2.0, 98.0]", getName()); + static constexpr Float64 min_percentile_lower_bound = 0.02; + static constexpr Float64 max_percentile_upper_bound = 0.98; - min_percentile = p_min; + min_percentile = arguments[1].column->getFloat64(0); + if (isnan(min_percentile) || !isFinite(min_percentile) || min_percentile < min_percentile_lower_bound|| min_percentile > max_percentile_upper_bound) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The second argument of function {} must be in range [0.02, 0.98]", getName()); - Float64 p_max = arguments[2].column->getFloat64(0); - if (isnan(p_max) || !isFinite(p_max) || p_max < min_quartile || p_max > max_quartile || p_max < min_percentile) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The third argument of function {} must be in range [2.0, 98.0]", getName()); + max_percentile = arguments[2].column->getFloat64(0); + if (isnan(max_percentile) || !isFinite(max_percentile) || max_percentile < min_percentile_lower_bound || max_percentile > max_percentile_upper_bound || max_percentile < min_percentile) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The third argument of function {} must be in range [0.02, 0.98]", getName()); - max_percentile = p_max; - - auto k_val = arguments[3].column->getFloat64(0); - if (k_val < 0.0 || isnan(k_val) || !isFinite(k_val)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The fourth argument of function {} must be a positive number", getName()); - - k = k_val; + k = arguments[3].column->getFloat64(0); + if (k < 0.0 || isnan(k) || !isFinite(k)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The fourth argument of function {} must be a non-negative number", getName()); } if (executeNumber(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res) @@ -216,7 +210,7 @@ seriesOutliersDetectTukey(series, min_percentile, max_percentile, k); **Arguments** - `series` - An array of numeric values. -- `min_quantile` - The minimum quantile to be used to calculate inter-quantile range [(IQR)](https://en.wikipedia.org/wiki/Interquartile_range). The value must be in range [2,98]. The default is 25. +- `min_quantile` - The minimum quantile to be used to calculate inter-quantile range [(IQR)](https://en.wikipedia.org/wiki/Interquartile_range). The value must be in range [0.02,0.98]. The default is 0.25. - `max_quantile` - The maximum quantile to be used to calculate inter-quantile range (IQR). The value must be in range [0.02, 0.98]. The default is 0.75. - `k` - Non-negative constant value to detect mild or stronger outliers. The default value is 1.5 @@ -247,7 +241,7 @@ Result: Query: ``` sql -SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], .2, .8, 1.5) AS print_0; +SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 0.2, 0.8, 1.5) AS print_0; ``` Result: diff --git a/tests/queries/0_stateless/02813_seriesOutliersDetectTukey.sql b/tests/queries/0_stateless/02813_seriesOutliersDetectTukey.sql index e4edeacf20a..0030929e6a3 100644 --- a/tests/queries/0_stateless/02813_seriesOutliersDetectTukey.sql +++ b/tests/queries/0_stateless/02813_seriesOutliersDetectTukey.sql @@ -19,7 +19,7 @@ SELECT seriesOutliersDetectTukey([-3, 2.40, 15, 3.90, 5, 6, 4.50, 5.20, 12, 60, SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], .25, .75, 1.5); SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], .10, .90, 1.5); SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], .02, .98, 1.5); -SELECT seriesOutliersDetectTukey([-3, 2, 15, 3], 2, 98, 1.5); +SELECT seriesOutliersDetectTukey([-3, 2, 15, 3], 0.02, 0.98, 1.5); SELECT seriesOutliersDetectTukey(arrayMap(x -> sin(x / 10), range(30))); SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6], .25, .75, 3);