mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
Added support for custom percentiles and K
This commit is contained in:
parent
487ef67e97
commit
4bc26fe45f
@ -6,7 +6,7 @@ sidebar_label: Time Series
|
||||
|
||||
# Time Series Functions
|
||||
|
||||
Below functions are used for time series analysis.
|
||||
Below functions are used for series data analysis.
|
||||
|
||||
## seriesOutliersDetectTukey
|
||||
|
||||
@ -16,16 +16,27 @@ Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.o
|
||||
|
||||
``` sql
|
||||
seriesOutliersDetectTukey(series);
|
||||
seriesOutliersDetectTukey(series, kind, min_percentile, max_percentile, K);
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `series` - An array of numeric values
|
||||
- `series` - An array of numeric values.
|
||||
- `kind` - Kind of algorithm to use. Supported values are 'tukey' for standard tukey and 'ctukey' for custom tukey algorithm. The default is 'ctukey'.
|
||||
- `min_percentile` - The minimum percentile to be used to calculate inter-quantile range(IQR). The value must be in range [2,98]. The default is 10. This value is only supported for 'ctukey'.
|
||||
- `max_percentile` - The maximum percentile to be used to calculate inter-quantile range(IQR). The value must be in range [2,98]. The default is 90. This value is only supported for 'ctukey'.
|
||||
- `K` - Non-negative constant value to detect mild or stronger outliers. The default value is 1.5
|
||||
|
||||
At least four data points are required in `series` to detect outliers.
|
||||
|
||||
Default quantile range:
|
||||
- `tukey` - 25%/75%
|
||||
- `ctukey` - 10%/90%
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns an array of the same length where each value represents a modified Z-score of possible anomaly of corresponding element in the series.
|
||||
- A value greater than 3 or lesser than -3 indicates a possible anomaly.
|
||||
- Returns an array of the same length where each value represents score of possible anomaly of corresponding element in the series.
|
||||
- A non-zero score indicates a possible anomaly.
|
||||
|
||||
Type: [Array](../../sql-reference/data-types/array.md).
|
||||
|
||||
@ -34,34 +45,34 @@ Type: [Array](../../sql-reference/data-types/array.md).
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
seriesOutliersDetectTukey([-3,2.4,15,3.9,5,6,4.5,5.2,3,4,5,16,7,5,5,4]) AS print_0;
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6]) AS print_0;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌───────────print_0──────────────────────────────────────────────────────────────────┐
|
||||
│[-2.7121212121212137,0,4.196969696969699,0,0,0,0,0,0,0,0,4.803030303030305,0,0,0,0] │
|
||||
└────────────────────────────────────────────────────────────────────────────────────┘
|
||||
┌───────────print_0───────────────────┐
|
||||
│[0,0,0,0,0,0,0,0,0,10.5,0,0,0,0,0,0] │
|
||||
└─────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
seriesOutliersDetectTukey(arrayMap(x -> sin(x / 10), range(30))) AS print_0;
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 'ctukey', 20, 80, 1.5) AS print_0;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌───────────print_0────────────────────────────────────────────┐
|
||||
│[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] │
|
||||
└──────────────────────────────────────────────────────────────┘
|
||||
┌─print_0────────────────────────────┐
|
||||
│ [0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0] │
|
||||
└────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## seriesPeriodDetectFFT
|
||||
|
||||
Finds the period of the given time series data using FFT
|
||||
Finds the period of the given series data data using FFT
|
||||
FFT - [Fast Fourier transform](https://en.wikipedia.org/wiki/Fast_Fourier_transform)
|
||||
|
||||
**Syntax**
|
||||
@ -76,7 +87,7 @@ seriesPeriodDetectFFT(series);
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A real value equal to the period of time series
|
||||
- A real value equal to the period of series data
|
||||
- Returns NAN when number of data points are less than four.
|
||||
|
||||
Type: [Float64](../../sql-reference/data-types/float.md).
|
||||
@ -111,7 +122,7 @@ Result:
|
||||
|
||||
## seriesDecomposeSTL
|
||||
|
||||
Decomposes a time series using STL [(Seasonal-Trend Decomposition Procedure Based on Loess)](https://www.wessa.net/download/stl.pdf) into a season, a trend and a residual component.
|
||||
Decomposes a series data using STL [(Seasonal-Trend Decomposition Procedure Based on Loess)](https://www.wessa.net/download/stl.pdf) into a season, a trend and a residual component.
|
||||
|
||||
**Syntax**
|
||||
|
||||
|
@ -1,4 +1,3 @@
|
||||
#include <cmath>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
@ -7,6 +6,7 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <cmath>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -16,7 +16,7 @@ extern const int BAD_ARGUMENTS;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
//Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences)
|
||||
///Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences)
|
||||
class FunctionSeriesOutliersDetectTukey : public IFunction
|
||||
{
|
||||
public:
|
||||
@ -40,26 +40,26 @@ public:
|
||||
FunctionArgumentDescriptors optional_args{
|
||||
{"kind", &isString<IDataType>, isColumnConst, "const String"},
|
||||
{"min_percentile", &isNativeNumber<IDataType>, isColumnConst, "Number"},
|
||||
{"max_percentile", &isNativeNumber<IDataType>, isColumnConst, "Number"}
|
||||
};
|
||||
{"max_percentile", &isNativeNumber<IDataType>, isColumnConst, "Number"},
|
||||
{"k", &isNativeNumber<IDataType>, isColumnConst, "Number"}};
|
||||
|
||||
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
|
||||
|
||||
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeFloat64>());
|
||||
}
|
||||
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1,2,3}; }
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2, 3, 4}; }
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
|
||||
{
|
||||
ColumnPtr array_ptr = arguments[0].column;
|
||||
const ColumnArray * array = checkAndGetColumn<ColumnArray>(array_ptr.get());
|
||||
ColumnPtr col = arguments[0].column;
|
||||
const ColumnArray * col_arr = checkAndGetColumn<ColumnArray>(col.get());
|
||||
|
||||
const IColumn & src_data = array->getData();
|
||||
const ColumnArray::Offsets & src_offsets = array->getOffsets();
|
||||
const IColumn & arr_data = col_arr->getData();
|
||||
const ColumnArray::Offsets & arr_offsets = col_arr->getOffsets();
|
||||
|
||||
Float64 min_percentile = 0.25;
|
||||
Float64 max_percentile = 0.75;
|
||||
Float64 min_percentile = 0.10; //default 10th percentile
|
||||
Float64 max_percentile = 0.90; //default 90th percentile
|
||||
|
||||
if (arguments.size() > 1)
|
||||
{
|
||||
@ -70,44 +70,62 @@ public:
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "The second argument of function {} must be constant String", getName());
|
||||
|
||||
String kind = arg_string->getValue<String>();
|
||||
if(kind == "ctukey"){
|
||||
min_percentile = 0.10; //default 10th percentile
|
||||
max_percentile = 0.90; //default 90th percentile
|
||||
|
||||
if (kind == "ctukey")
|
||||
{
|
||||
if (arguments.size() > 2)
|
||||
{
|
||||
Float64 p_min = arguments[2].column->getFloat64(0);
|
||||
if (p_min >= 2.0 && p_min <= 98.0)
|
||||
min_percentile = p_min / 100;
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The third argumet of function {} must be in range [2.0, 98.0]", getName());
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS, "The third argumet of function {} must be in range [2.0, 98.0]", getName());
|
||||
}
|
||||
|
||||
if(arguments.size() == 4)
|
||||
if (arguments.size() > 3)
|
||||
{
|
||||
Float64 p_max = arguments[3].column->getFloat64(0);
|
||||
if (p_max >= 2.0 && p_max <= 98.0 && p_max > min_percentile * 100)
|
||||
max_percentile = p_max / 100;
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The fourth argumet of function {} must be in range [2.0, 98.0]", getName());
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS, "The fourth argumet of function {} must be in range [2.0, 98.0]", getName());
|
||||
}
|
||||
}
|
||||
else if (kind == "tukey")
|
||||
{
|
||||
min_percentile = 0.25;
|
||||
max_percentile = 0.75;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(kind != "tukey")
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The second argument of function {} can only be 'tukey' or 'ctukey'.", getName());
|
||||
}
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS, "The second argument of function {} can only be 'tukey' or 'ctukey'.", getName());
|
||||
}
|
||||
|
||||
ColumnPtr res;
|
||||
|
||||
if (executeNumber<UInt8>(src_data, src_offsets, min_percentile, max_percentile, res) || executeNumber<UInt16>(src_data, src_offsets,min_percentile, max_percentile, res)
|
||||
|| executeNumber<UInt32>(src_data, src_offsets, min_percentile, max_percentile,res) || executeNumber<UInt64>(src_data, src_offsets,min_percentile, max_percentile, res)
|
||||
|| executeNumber<Int8>(src_data, src_offsets,min_percentile, max_percentile, res) || executeNumber<Int16>(src_data, src_offsets,min_percentile, max_percentile, res)
|
||||
|| executeNumber<Int32>(src_data, src_offsets,min_percentile, max_percentile, res) || executeNumber<Int64>(src_data, src_offsets, min_percentile, max_percentile,res)
|
||||
|| executeNumber<Float32>(src_data, src_offsets,min_percentile, max_percentile, res) || executeNumber<Float64>(src_data, src_offsets,min_percentile, max_percentile, res))
|
||||
Float64 K = 1.50;
|
||||
if (arguments.size() == 5)
|
||||
{
|
||||
return res;
|
||||
auto k_val = arguments[4].column->getFloat64(0);
|
||||
if (k_val >= 0.0)
|
||||
K = k_val;
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The fifth argumet of function {} must be a positive number", getName());
|
||||
}
|
||||
|
||||
ColumnPtr col_res;
|
||||
|
||||
if (executeNumber<UInt8>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
|
||||
|| executeNumber<UInt16>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
|
||||
|| executeNumber<UInt32>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
|
||||
|| executeNumber<UInt64>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
|
||||
|| executeNumber<Int8>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
|
||||
|| executeNumber<Int16>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
|
||||
|| executeNumber<Int32>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
|
||||
|| executeNumber<Int64>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
|
||||
|| executeNumber<Float32>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res)
|
||||
|| executeNumber<Float64>(arr_data, arr_offsets, min_percentile, max_percentile, K, col_res))
|
||||
{
|
||||
return col_res;
|
||||
}
|
||||
else
|
||||
throw Exception(
|
||||
@ -117,14 +135,17 @@ public:
|
||||
getName());
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
bool executeNumber(const IColumn & src_data,
|
||||
const ColumnArray::Offsets & src_offsets,
|
||||
bool executeNumber(
|
||||
const IColumn & arr_data,
|
||||
const ColumnArray::Offsets & arr_offsets,
|
||||
Float64 min_percentile,
|
||||
Float64 max_percentile,
|
||||
Float64 K,
|
||||
ColumnPtr & res_ptr) const
|
||||
{
|
||||
const ColumnVector<T> * src_data_concrete = checkAndGetColumn<ColumnVector<T>>(&src_data);
|
||||
const ColumnVector<T> * src_data_concrete = checkAndGetColumn<ColumnVector<T>>(&arr_data);
|
||||
if (!src_data_concrete)
|
||||
return false;
|
||||
|
||||
@ -136,21 +157,24 @@ public:
|
||||
ColumnArray::ColumnOffsets::MutablePtr res_offsets = ColumnArray::ColumnOffsets::create();
|
||||
auto & res_offsets_data = res_offsets->getData();
|
||||
|
||||
std::vector<Float64> src_sorted;
|
||||
|
||||
ColumnArray::Offset prev_src_offset = 0;
|
||||
for (auto curr_src_offset : src_offsets)
|
||||
for (auto src_offset : arr_offsets)
|
||||
{
|
||||
chassert(prev_src_offset <= curr_src_offset);
|
||||
size_t len = curr_src_offset - prev_src_offset;
|
||||
chassert(prev_src_offset <= src_offset);
|
||||
size_t len = src_offset - prev_src_offset;
|
||||
if (len < 4)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "At least four data points are needed for function {}", getName());
|
||||
|
||||
std::vector<Float64> src_sorted(src_vec.begin() + prev_src_offset, src_vec.begin() + curr_src_offset);
|
||||
src_sorted.assign(src_vec.begin() + prev_src_offset, src_vec.begin() + src_offset);
|
||||
std::sort(src_sorted.begin(), src_sorted.end());
|
||||
|
||||
Float64 q1, q2;
|
||||
|
||||
auto p1 = len * min_percentile;
|
||||
if(p1 == static_cast<Int64>(p1)){
|
||||
if (p1 == static_cast<Int64>(p1))
|
||||
{
|
||||
size_t index = static_cast<size_t>(p1) - 1;
|
||||
q1 = (src_sorted[index] + src_sorted[index + 1]) / 2;
|
||||
}
|
||||
@ -161,7 +185,8 @@ public:
|
||||
}
|
||||
|
||||
auto p2 = len * max_percentile;
|
||||
if(p2 == static_cast<Int64>(p2)){
|
||||
if (p2 == static_cast<Int64>(p2))
|
||||
{
|
||||
size_t index = static_cast<size_t>(p2) - 1;
|
||||
q2 = (src_sorted[index] + src_sorted[index + 1]) / 2;
|
||||
}
|
||||
@ -171,18 +196,18 @@ public:
|
||||
q2 = src_sorted[index];
|
||||
}
|
||||
|
||||
Float64 iqr = q2 - q1;
|
||||
Float64 iqr = q2 - q1; /// interquantile range
|
||||
|
||||
Float64 lower_fence = q1 - 1.5 * iqr;
|
||||
Float64 upper_fence = q2 + 1.5 * iqr;
|
||||
Float64 lower_fence = q1 - K * iqr;
|
||||
Float64 upper_fence = q2 + K * iqr;
|
||||
|
||||
for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; ++j)
|
||||
for (ColumnArray::Offset j = prev_src_offset; j < src_offset; ++j)
|
||||
{
|
||||
auto score = std::min((src_vec[j] - lower_fence) / iqr, 0.0) + std::max((src_vec[j] - upper_fence) / iqr, 0.0);
|
||||
auto score = std::min((src_vec[j] - lower_fence), 0.0) + std::max((src_vec[j] - upper_fence), 0.0);
|
||||
outlier_data.push_back(score);
|
||||
}
|
||||
res_offsets_data.push_back(outlier_data.size());
|
||||
prev_src_offset = curr_src_offset;
|
||||
prev_src_offset = src_offset;
|
||||
}
|
||||
|
||||
res_ptr = ColumnArray::create(std::move(outliers), std::move(res_offsets));
|
||||
@ -196,20 +221,31 @@ REGISTER_FUNCTION(SeriesOutliersDetectTukey)
|
||||
.description = R"(
|
||||
Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences).
|
||||
|
||||
Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
seriesOutliersDetectTukey(series);
|
||||
seriesOutliersDetectTukey(series, kind, min_percentile, max_percentile, K);
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `series` - An array of numeric values
|
||||
- `series` - An array of numeric values.
|
||||
- `kind` - Kind of algorithm to use. Supported values are 'tukey' for standard tukey and 'ctukey' for custom tukey algorithm. The default is 'ctukey'.
|
||||
- `min_percentile` - The minimum percentile to be used to calculate inter-quantile range(IQR). The value must be in range [2,98]. The default is 10. This value is only supported for 'ctukey'.
|
||||
- `max_percentile` - The maximum percentile to be used to calculate inter-quantile range(IQR). The value must be in range [2,98]. The default is 90. This value is only supported for 'ctukey'.
|
||||
- `K` - Non-negative constant value to detect mild or stronger outliers. The default value is 1.5
|
||||
|
||||
Default quantile range:
|
||||
- `tukey` - 25%/75%
|
||||
- `ctukey` - 10%/90%
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns an array of the same length where each value represents a modified Z-score of possible anomaly of corresponding element in the series.
|
||||
- A value greater than 3 or lesser than -3 indicates a possible anomaly.
|
||||
- Returns an array of the same length where each value represents score of possible anomaly of corresponding element in the series.
|
||||
- A non-zero score indicates a possible anomaly.
|
||||
|
||||
Type: [Array](../../sql-reference/data-types/array.md).
|
||||
|
||||
@ -218,29 +254,29 @@ Type: [Array](../../sql-reference/data-types/array.md).
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
seriesOutliersDetectTukey([-3,2.4,15,3.9,5,6,4.5,5.2,3,4,5,16,7,5,5,4]) AS print_0;
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6]) AS print_0;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌───────────print_0──────────────────────────────────────────────────────────────────┐
|
||||
│[-2.7121212121212137,0,4.196969696969699,0,0,0,0,0,0,0,0,4.803030303030305,0,0,0,0] │
|
||||
└────────────────────────────────────────────────────────────────────────────────────┘
|
||||
┌───────────print_0───────────────────┐
|
||||
│[0,0,0,0,0,0,0,0,0,10.5,0,0,0,0,0,0] │
|
||||
└─────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
seriesOutliersDetectTukey(arrayMap(x -> sin(x / 10), range(30))) AS print_0;
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 'ctukey', 20, 80, 1.5) AS print_0;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌───────────print_0────────────────────────────────────────────┐
|
||||
│[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] │
|
||||
└──────────────────────────────────────────────────────────────┘
|
||||
┌─print_0────────────────────────────┐
|
||||
│ [0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0] │
|
||||
└────────────────────────────────────┘
|
||||
```)",
|
||||
.categories{"Time series analysis"}});
|
||||
}
|
||||
|
@ -1,12 +1,14 @@
|
||||
[-2.7121212121212137,0,4.196969696969699,0,0,0,0,0,0,0,0,4.803030303030305,0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,5.228971962616823,0,0,0,0,0,0]
|
||||
[-2.7121212121212137,0,4.196969696969699,0,0,0,0,0,0,0,0,4.803030303030305,0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,5.228971962616823,0,0,0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,4.706896551724138,0,0,0,0,0,0]
|
||||
[-0.9615384615384613,0,1.9615384615384612,0,0,0,0,0,0.8076923076923075,19.26923076923077,0.8076923076923075,0,0,0,0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,4.706896551724138,0,0,0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,0.8076923076923077,0,0,0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,4.706896551724138,0,0,0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,11.100000000000001,0,0,0,0,0,0]
|
||||
[-4.475000000000001,0,6.925000000000001,0,0,0,0,0,0,0,0,7.925000000000001,0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,27.975,0,0,0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,10.5,0,0,0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,26.1,0,0,0,0,0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,27.3,0,0,0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,10.5,0,0,0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,27.3,0,0,0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
|
||||
[0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,27,0,0,0,0,0,0]
|
||||
[0,0,0,0,0,0,0,0,0,18,0,0,0,0,0,0]
|
||||
|
@ -1,19 +1,29 @@
|
||||
DROP TABLE IF EXISTS tb1;
|
||||
|
||||
CREATE TABLE tb1 (n UInt32, a Array(Float64)) engine=Memory;
|
||||
INSERT INTO tb1 VALUES (1, [-3,2.4,15,3.9,5,6,4.5,5.2,3,4,5,16,7,5,5,4]), (2, [-3,2.4,15,3.9,5,6,4.5,5.2,12,45,12,3.4,3,4,5,6]);
|
||||
INSERT INTO tb1 VALUES (1, [-3,2.40,15,3.90,5,6,4.50,5.20,3,4,5,16,7,5,5,4]), (2, [-3,2.40,15,3.90,5,6,4.50,5.20,12,45,12,3.40,3,4,5,6]);
|
||||
|
||||
-- non-const inputs
|
||||
SELECT seriesOutliersDetectTukey(a) FROM tb1 ORDER BY n;
|
||||
SELECT seriesOutliersDetectTukey(a,'ctukey', 25,75) FROM tb1 ORDER BY n;
|
||||
DROP TABLE IF EXISTS tb1;
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.5, 5, 12, 45, 12, 3.4, 3, 4, 5, 6]);
|
||||
SELECT seriesOutliersDetectTukey([-3, 2.4, 15, 3.9, 5, 6, 4.5, 5.2, 12, 60, 12, 3.4, 3, 4, 5, 6, 3.4, 2.7]);
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.5, 5, 12, 45, 12, 3.4, 3, 4, 5, 6], 'ctukey', 25, 75);
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.5, 5, 12, 45, 12, 3.4, 3, 4, 5, 6], 'ctukey', 10, 90);
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.5, 5, 12, 45, 12, 3.4, 3, 4, 5, 6], 'tukey', 10, 90);
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.5, 5, 12, 45, 12, 3.4, 3, 4, 5, 6], 'ctukey', 2, 98)
|
||||
|
||||
-- const inputs
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6]);
|
||||
SELECT seriesOutliersDetectTukey([-3, 2.40, 15, 3.90, 5, 6, 4.50, 5.20, 12, 60, 12, 3.40, 3, 4, 5, 6, 3.40, 2.7]);
|
||||
|
||||
-- const inputs with optional arguments
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 'ctukey', 25, 75);
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 'ctukey', 10, 90);
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 'tukey', 10, 90);
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 'ctukey', 2, 98);
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3], 'ctukey', 2, 98);
|
||||
SELECT seriesOutliersDetectTukey(arrayMap(x -> sin(x / 10), range(30)));
|
||||
SELECT seriesOutliersDetectTukey(arrayMap(x -> sin(x / 10), range(30)), 'tukey');
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6], 'tukey', 25, 75, 1.5);
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6], 'tukey', 25, 75, 3);
|
||||
|
||||
-- negative tests
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6], 'tukey', 25, 75, -1); -- { serverError BAD_ARGUMENTS}
|
||||
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3], 'xyz', 33, 53); -- { serverError BAD_ARGUMENTS}
|
||||
SELECT seriesOutliersDetectTukey([-3, 2.4, 15, NULL]); -- { serverError ILLEGAL_COLUMN}
|
||||
SELECT seriesOutliersDetectTukey([]); -- { serverError ILLEGAL_COLUMN}
|
||||
|
@ -1332,6 +1332,7 @@ cryptographic
|
||||
csv
|
||||
csvwithnames
|
||||
csvwithnamesandtypes
|
||||
ctukey
|
||||
curdate
|
||||
currentDatabase
|
||||
currentProfiles
|
||||
@ -2271,6 +2272,7 @@ seektable
|
||||
sequenceCount
|
||||
sequenceMatch
|
||||
sequenceNextNode
|
||||
seriesOutliersDetectTukey
|
||||
seriesDecomposeSTL
|
||||
seriesPeriodDetectFFT
|
||||
serverTimeZone
|
||||
@ -2564,6 +2566,7 @@ tryPunycodeDecode
|
||||
tskv
|
||||
tsv
|
||||
tui
|
||||
tukey
|
||||
tumbleEnd
|
||||
tumbleStart
|
||||
tupleConcat
|
||||
|
Loading…
Reference in New Issue
Block a user