mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
removed unnecessary method overloading and fixed documentation
This commit is contained in:
parent
6d24ffc976
commit
ce31fa912b
@ -10,33 +10,27 @@ Below functions are used for series data analysis.
|
||||
|
||||
## seriesOutliersDetectTukey
|
||||
|
||||
Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences).
|
||||
Detects outliers in series data using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
seriesOutliersDetectTukey(series);
|
||||
seriesOutliersDetectTukey(series, kind, min_percentile, max_percentile, K);
|
||||
seriesOutliersDetectTukey(series, min_percentile, max_percentile, K);
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `series` - An array of numeric values.
|
||||
- `kind` - Kind of algorithm to use. Supported values are 'tukey' for standard tukey and 'ctukey' for custom tukey algorithm. The default is 'ctukey'.
|
||||
- `min_percentile` - The minimum percentile to be used to calculate inter-quantile range(IQR). The value must be in range [2,98]. The default is 10. This value is only supported for 'ctukey'.
|
||||
- `max_percentile` - The maximum percentile to be used to calculate inter-quantile range(IQR). The value must be in range [2,98]. The default is 90. This value is only supported for 'ctukey'.
|
||||
- `min_percentile` - The minimum percentile to be used to calculate inter-quantile range [(IQR)](https://en.wikipedia.org/wiki/Interquartile_range). The value must be in range [2,98]. The default is 25.
|
||||
- `max_percentile` - The maximum percentile to be used to calculate inter-quantile range (IQR). The value must be in range [2,98]. The default is 75.
|
||||
- `K` - Non-negative constant value to detect mild or stronger outliers. The default value is 1.5
|
||||
|
||||
At least four data points are required in `series` to detect outliers.
|
||||
|
||||
Default quantile range:
|
||||
- `tukey` - 25%/75%
|
||||
- `ctukey` - 10%/90%
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns an array of the same length where each value represents score of possible anomaly of corresponding element in the series.
|
||||
- A non-zero score indicates a possible anomaly.
|
||||
- Returns an array of the same length as the input array where each value represents score of possible anomaly of corresponding element in the series. A non-zero score indicates a possible anomaly.
|
||||
|
||||
Type: [Array](../../sql-reference/data-types/array.md).
|
||||
|
||||
@ -51,23 +45,23 @@ SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4,
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌───────────print_0───────────────────┐
|
||||
│[0,0,0,0,0,0,0,0,0,10.5,0,0,0,0,0,0] │
|
||||
└─────────────────────────────────────┘
|
||||
┌───────────print_0─────────────────┐
|
||||
│[0,0,0,0,0,0,0,0,0,27,0,0,0,0,0,0] │
|
||||
└───────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 'ctukey', 20, 80, 1.5) AS print_0;
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 20, 80, 1.5) AS print_0;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─print_0────────────────────────────┐
|
||||
│ [0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0] │
|
||||
└────────────────────────────────────┘
|
||||
┌─print_0──────────────────────────────┐
|
||||
│ [0,0,0,0,0,0,0,0,0,19.5,0,0,0,0,0,0] │
|
||||
└──────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## seriesPeriodDetectFFT
|
||||
|
@ -14,9 +14,10 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
///Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences)
|
||||
/// Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences)
|
||||
class FunctionSeriesOutliersDetectTukey : public IFunction
|
||||
{
|
||||
public:
|
||||
@ -36,9 +37,15 @@ public:
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
if (arguments.size() != 1 && arguments.size() != 4)
|
||||
throw Exception(
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Function {} needs either 1 or 4 arguments; passed {}.",
|
||||
getName(),
|
||||
arguments.size());
|
||||
|
||||
FunctionArgumentDescriptors mandatory_args{{"time_series", &isArray<IDataType>, nullptr, "Array"}};
|
||||
FunctionArgumentDescriptors optional_args{
|
||||
{"kind", &isString<IDataType>, isColumnConst, "const String"},
|
||||
{"min_percentile", &isNativeNumber<IDataType>, isColumnConst, "Number"},
|
||||
{"max_percentile", &isNativeNumber<IDataType>, isColumnConst, "Number"},
|
||||
{"k", &isNativeNumber<IDataType>, isColumnConst, "Number"}};
|
||||
@ -48,9 +55,9 @@ public:
|
||||
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeFloat64>());
|
||||
}
|
||||
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2, 3, 4}; }
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2, 3}; }
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
ColumnPtr col = arguments[0].column;
|
||||
const ColumnArray * col_arr = checkAndGetColumn<ColumnArray>(col.get());
|
||||
@ -58,62 +65,36 @@ public:
|
||||
const IColumn & arr_data = col_arr->getData();
|
||||
const ColumnArray::Offsets & arr_offsets = col_arr->getOffsets();
|
||||
|
||||
Float64 min_percentile = 0.10; //default 10th percentile
|
||||
Float64 max_percentile = 0.90; //default 90th percentile
|
||||
ColumnPtr col_res;
|
||||
if (input_rows_count == 0)
|
||||
return ColumnArray::create(ColumnFloat64::create());
|
||||
|
||||
|
||||
Float64 min_percentile = 0.25; /// default 25th percentile
|
||||
Float64 max_percentile = 0.75; /// default 75th percentile
|
||||
Float64 K = 1.50;
|
||||
|
||||
if (arguments.size() > 1)
|
||||
{
|
||||
//const IColumn * arg_column = arguments[1].column.get();
|
||||
const ColumnConst * arg_string = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
|
||||
Float64 p_min = arguments[1].column->getFloat64(0);
|
||||
if (p_min < 2.0 || p_min > 98.0)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The second argument of function {} must be in range [2.0, 98.0]", getName());
|
||||
|
||||
if (!arg_string)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "The second argument of function {} must be constant String", getName());
|
||||
min_percentile = p_min / 100;
|
||||
|
||||
String kind = arg_string->getValue<String>();
|
||||
if (kind == "ctukey")
|
||||
{
|
||||
if (arguments.size() > 2)
|
||||
{
|
||||
Float64 p_min = arguments[2].column->getFloat64(0);
|
||||
if (p_min >= 2.0 && p_min <= 98.0)
|
||||
min_percentile = p_min / 100;
|
||||
else
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS, "The third argument of function {} must be in range [2.0, 98.0]", getName());
|
||||
}
|
||||
Float64 p_max = arguments[2].column->getFloat64(0);
|
||||
if (p_max < 2.0 || p_max > 98.0 || p_max < min_percentile * 100)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The third argument of function {} must be in range [2.0, 98.0]", getName());
|
||||
|
||||
if (arguments.size() > 3)
|
||||
{
|
||||
Float64 p_max = arguments[3].column->getFloat64(0);
|
||||
if (p_max >= 2.0 && p_max <= 98.0 && p_max > min_percentile * 100)
|
||||
max_percentile = p_max / 100;
|
||||
else
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS, "The fourth argument of function {} must be in range [2.0, 98.0]", getName());
|
||||
}
|
||||
}
|
||||
else if (kind == "tukey")
|
||||
{
|
||||
min_percentile = 0.25;
|
||||
max_percentile = 0.75;
|
||||
}
|
||||
else
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS, "The second argument of function {} can only be 'tukey' or 'ctukey'.", getName());
|
||||
max_percentile = p_max / 100;
|
||||
|
||||
auto k_val = arguments[3].column->getFloat64(0);
|
||||
if (k_val < 0.0)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The fourth argument of function {} must be a positive number", getName());
|
||||
|
||||
K = k_val;
|
||||
}
|
||||
|
||||
Float64 K = 1.50;
|
||||
if (arguments.size() == 5)
|
||||
{
|
||||
auto k_val = arguments[4].column->getFloat64(0);
|
||||
if (k_val >= 0.0)
|
||||
K = k_val;
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The fifth argument of function {} must be a positive number", getName());
|
||||
}
|
||||
|
||||
ColumnPtr col_res;
|
||||
|
||||
if (executeNumber<UInt8>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
|
||||
|| executeNumber<UInt16>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
|
||||
|| executeNumber<UInt32>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
|
||||
@ -172,7 +153,7 @@ private:
|
||||
|
||||
Float64 q1, q2;
|
||||
|
||||
auto p1 = len * min_percentile;
|
||||
Float64 p1 = len * min_percentile;
|
||||
if (p1 == static_cast<Int64>(p1))
|
||||
{
|
||||
size_t index = static_cast<size_t>(p1) - 1;
|
||||
@ -184,7 +165,7 @@ private:
|
||||
q1 = src_sorted[index];
|
||||
}
|
||||
|
||||
auto p2 = len * max_percentile;
|
||||
Float64 p2 = len * max_percentile;
|
||||
if (p2 == static_cast<Int64>(p2))
|
||||
{
|
||||
size_t index = static_cast<size_t>(p2) - 1;
|
||||
@ -219,33 +200,27 @@ REGISTER_FUNCTION(SeriesOutliersDetectTukey)
|
||||
{
|
||||
factory.registerFunction<FunctionSeriesOutliersDetectTukey>(FunctionDocumentation{
|
||||
.description = R"(
|
||||
Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences).
|
||||
|
||||
Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences).
|
||||
Detects outliers in series data using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
seriesOutliersDetectTukey(series);
|
||||
seriesOutliersDetectTukey(series, kind, min_percentile, max_percentile, K);
|
||||
seriesOutliersDetectTukey(series, min_percentile, max_percentile, K);
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `series` - An array of numeric values.
|
||||
- `kind` - Kind of algorithm to use. Supported values are 'tukey' for standard tukey and 'ctukey' for custom tukey algorithm. The default is 'ctukey'.
|
||||
- `min_percentile` - The minimum percentile to be used to calculate inter-quantile range(IQR). The value must be in range [2,98]. The default is 10. This value is only supported for 'ctukey'.
|
||||
- `max_percentile` - The maximum percentile to be used to calculate inter-quantile range(IQR). The value must be in range [2,98]. The default is 90. This value is only supported for 'ctukey'.
|
||||
- `min_percentile` - The minimum percentile to be used to calculate inter-quantile range [(IQR)](https://en.wikipedia.org/wiki/Interquartile_range). The value must be in range [2,98]. The default is 25.
|
||||
- `max_percentile` - The maximum percentile to be used to calculate inter-quantile range (IQR). The value must be in range [2,98]. The default is 75.
|
||||
- `K` - Non-negative constant value to detect mild or stronger outliers. The default value is 1.5
|
||||
|
||||
Default quantile range:
|
||||
- `tukey` - 25%/75%
|
||||
- `ctukey` - 10%/90%
|
||||
At least four data points are required in `series` to detect outliers.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns an array of the same length where each value represents score of possible anomaly of corresponding element in the series.
|
||||
- A non-zero score indicates a possible anomaly.
|
||||
- Returns an array of the same length as the input array where each value represents score of possible anomaly of corresponding element in the series. A non-zero score indicates a possible anomaly.
|
||||
|
||||
Type: [Array](../../sql-reference/data-types/array.md).
|
||||
|
||||
@ -260,23 +235,23 @@ SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4,
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌───────────print_0───────────────────┐
|
||||
│[0,0,0,0,0,0,0,0,0,10.5,0,0,0,0,0,0] │
|
||||
└─────────────────────────────────────┘
|
||||
┌───────────print_0─────────────────┐
|
||||
│[0,0,0,0,0,0,0,0,0,27,0,0,0,0,0,0] │
|
||||
└───────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 'ctukey', 20, 80, 1.5) AS print_0;
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 20, 80, 1.5) AS print_0;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─print_0────────────────────────────┐
|
||||
│ [0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0] │
|
||||
└────────────────────────────────────┘
|
||||
┌─print_0──────────────────────────────┐
|
||||
│ [0,0,0,0,0,0,0,0,0,19.5,0,0,0,0,0,0] │
|
||||
└──────────────────────────────────────┘
|
||||
```)",
|
||||
.categories{"Time series analysis"}});
|
||||
}
|
||||
|
@ -1,14 +1,12 @@
|
||||
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,11.100000000000001,0,0,0,0,0,0]
|
||||
[-4.475000000000001,0,6.925000000000001,0,0,0,0,0,0,0,0,7.925000000000001,0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,27.975,0,0,0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,10.5,0,0,0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,26.1,0,0,0,0,0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,11.100000000000001,0,0,0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,27.3,0,0,0,0,0,0]
|
||||
[-2.4999999999999996,0,5.1,0,0,0,0,0,2.0999999999999996,50.1,2.0999999999999996,0,0,0,0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,27.3,0,0,0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,10.5,0,0,0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,27.3,0,0,0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
|
||||
[0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,27,0,0,0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,18,0,0,0,0,0,0]
|
||||
|
@ -1,11 +1,11 @@
|
||||
DROP TABLE IF EXISTS tb1;
|
||||
|
||||
CREATE TABLE tb1 (n UInt32, a Array(Float64)) engine=Memory;
|
||||
INSERT INTO tb1 VALUES (1, [-3,2.40,15,3.90,5,6,4.50,5.20,3,4,5,16,7,5,5,4]), (2, [-3,2.40,15,3.90,5,6,4.50,5.20,12,45,12,3.40,3,4,5,6]);
|
||||
INSERT INTO tb1 VALUES (1, [-3, 2.40, 15, 3.90, 5, 6, 4.50, 5.20, 3, 4, 5, 16, 7, 5, 5, 4]), (2, [-3, 2.40, 15, 3.90, 5, 6, 4.50, 5.20, 12, 45, 12, 3.40, 3, 4, 5, 6]);
|
||||
|
||||
-- non-const inputs
|
||||
SELECT seriesOutliersDetectTukey(a) FROM tb1 ORDER BY n;
|
||||
SELECT seriesOutliersDetectTukey(a,'ctukey', 25,75) FROM tb1 ORDER BY n;
|
||||
SELECT seriesOutliersDetectTukey(a,10,90,1.5) FROM tb1 ORDER BY n;
|
||||
DROP TABLE IF EXISTS tb1;
|
||||
|
||||
-- const inputs
|
||||
@ -13,18 +13,17 @@ SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40,
|
||||
SELECT seriesOutliersDetectTukey([-3, 2.40, 15, 3.90, 5, 6, 4.50, 5.20, 12, 60, 12, 3.40, 3, 4, 5, 6, 3.40, 2.7]);
|
||||
|
||||
-- const inputs with optional arguments
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 'ctukey', 25, 75);
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 'ctukey', 10, 90);
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 'tukey', 10, 90);
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 'ctukey', 2, 98);
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3], 'ctukey', 2, 98);
|
||||
SELECT seriesOutliersDetectTukey(arrayMap(x -> sin(x / 10), range(30)), 'tukey');
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6], 'tukey', 25, 75, 1.5);
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6], 'tukey', 25, 75, 3);
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 25, 75, 1.5);
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 10, 90, 1.5);
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 2, 98, 1.5);
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3], 2, 98, 1.5);
|
||||
SELECT seriesOutliersDetectTukey(arrayMap(x -> sin(x / 10), range(30)));
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6], 25, 75, 3);
|
||||
|
||||
-- negative tests
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6], 'tukey', 25, 75, -1); -- { serverError BAD_ARGUMENTS}
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3], 'xyz', 33, 53); -- { serverError BAD_ARGUMENTS}
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6], 25, 75, -1); -- { serverError BAD_ARGUMENTS}
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3], 33, 53); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH}
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3], 33); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH}
|
||||
SELECT seriesOutliersDetectTukey([-3, 2.4, 15, NULL]); -- { serverError ILLEGAL_COLUMN}
|
||||
SELECT seriesOutliersDetectTukey([]); -- { serverError ILLEGAL_COLUMN}
|
||||
SELECT seriesOutliersDetectTukey([-3, 2.4, 15]); -- { serverError BAD_ARGUMENTS}
|
Loading…
Reference in New Issue
Block a user