changed 2nd argument type to UInt and other cosmetic changes

This commit is contained in:
Bhavna Jindal 2024-01-05 11:39:52 -08:00
parent 0adac742f8
commit 22474f6b38
6 changed files with 41 additions and 62 deletions

View File

@ -4,7 +4,7 @@ sidebar_position: 172
sidebar_label: Time Series
---
#Time Series Functions
# Time Series Functions
Below functions are used for time series analysis.
@ -60,10 +60,7 @@ Result:
## seriesDecomposeSTL
Decompose time series data based on STL(Seasonal-Trend Decomposition Procedure Based on Loess)
Returns an array of three arrays where the first array include seasonal components, the second array - trend,
and the third array - residue component.
https://www.wessa.net/download/stl.pdf
Decomposes a time series using STL [(Seasonal-Trend Decomposition Procedure Based on Loess)](https://www.wessa.net/download/stl.pdf) into a season, a trend and a residual component.
**Syntax**
@ -80,7 +77,8 @@ The number of data points in `series` should be at least twice the value of `per
**Returned value**
- Array of arrays
- An array of three arrays where the first array include seasonal components, the second array - trend,
and the third array - residue component.
Type: [Array](../../sql-reference/data-types/array.md).

View File

@ -59,7 +59,6 @@
#cmakedefine01 USE_PRQL
#cmakedefine01 USE_OPENSSL_INTREE
#cmakedefine01 USE_ULID
#cmakedefine01 USE_SEASONAL
#cmakedefine01 FIU_ENABLE
#cmakedefine01 USE_BCRYPT
#cmakedefine01 USE_LIBARCHIVE

View File

@ -1,5 +1,3 @@
#include "config.h"
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wold-style-cast"
@ -54,8 +52,8 @@ public:
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
FunctionArgumentDescriptors args{
{"time-series", &isArray<IDataType>, nullptr, "Array"},
{"period", &isNativeNumber<IDataType>, nullptr, "Number"},
{"time_series", &isArray<IDataType>, nullptr, "Array"},
{"period", &isNativeUInt<IDataType>, nullptr, "Unsigned Integer"},
};
validateFunctionArgumentTypes(*this, arguments, args);
@ -68,8 +66,7 @@ public:
const ColumnArray * array = checkAndGetColumn<ColumnArray>(array_ptr.get());
if (!array)
{
const ColumnConst * const_array = checkAndGetColumnConst<ColumnArray>(
arguments[0].column.get());
const ColumnConst * const_array = checkAndGetColumnConst<ColumnArray>(arguments[0].column.get());
if (!const_array)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
arguments[0].column->getName(), getName());
@ -81,13 +78,13 @@ public:
const IColumn & src_data = array->getData();
const ColumnArray::Offsets & src_offsets = array->getOffsets();
Float64 period;
UInt64 period;
auto ret = ColumnFloat32::create();
auto & res_data = ret->getData();
auto res = ColumnFloat32::create();
auto & res_data = res->getData();
ColumnArray::ColumnOffsets::MutablePtr col_offsets = ColumnArray::ColumnOffsets::create();
auto & col_offsets_data = col_offsets->getData();
ColumnArray::ColumnOffsets::MutablePtr res_col_offsets = ColumnArray::ColumnOffsets::create();
auto & res_col_offsets_data = res_col_offsets->getData();
auto root_offsets = ColumnArray::ColumnOffsets::create();
auto & root_offsets_data = root_offsets->getData();
@ -97,19 +94,11 @@ public:
for (size_t i = 0; i < src_offsets.size(); ++i)
{
auto period_ptr = arguments[1].column->convertToFullColumnIfConst();
if (checkAndGetColumn<ColumnUInt8>(period_ptr.get()) || checkAndGetColumn<ColumnUInt16>(period_ptr.get())
|| checkAndGetColumn<ColumnUInt32>(period_ptr.get()) || checkAndGetColumn<ColumnUInt64>(period_ptr.get()))
if (checkAndGetColumn<ColumnUInt8>(period_ptr.get())
|| checkAndGetColumn<ColumnUInt16>(period_ptr.get())
|| checkAndGetColumn<ColumnUInt32>(period_ptr.get())
|| checkAndGetColumn<ColumnUInt64>(period_ptr.get()))
period = period_ptr->getUInt(i);
else if (checkAndGetColumn<ColumnFloat32>(period_ptr.get()) || checkAndGetColumn<ColumnFloat64>(period_ptr.get()))
{
period = period_ptr->getFloat64(i);
if (isNaN(period) || !std::isfinite(period) || period < 0)
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal value {} for second argument of function {}. Should be a positive number",
period,
getName());
}
else
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
@ -136,15 +125,15 @@ public:
|| executeNumber<Float64>(src_data, period, prev_src_offset, curr_offset, seasonal, trend, residue))
{
res_data.insert(res_data.end(), seasonal.begin(), seasonal.end());
col_offsets_data.push_back(res_data.size());
res_col_offsets_data.push_back(res_data.size());
res_data.insert(res_data.end(), trend.begin(), trend.end());
col_offsets_data.push_back(res_data.size());
res_col_offsets_data.push_back(res_data.size());
res_data.insert(res_data.end(), residue.begin(), residue.end());
col_offsets_data.push_back(res_data.size());
res_col_offsets_data.push_back(res_data.size());
root_offsets_data.push_back(col_offsets->size());
root_offsets_data.push_back(res_col_offsets->size());
prev_src_offset = curr_offset;
}
@ -155,14 +144,14 @@ public:
arguments[0].column->getName(),
getName());
}
ColumnArray::MutablePtr nested_array_col = ColumnArray::create(std::move(ret), std::move(col_offsets));
ColumnArray::MutablePtr nested_array_col = ColumnArray::create(std::move(res), std::move(res_col_offsets));
return ColumnArray::create(std::move(nested_array_col), std::move(root_offsets));
}
template <typename T>
bool executeNumber(
const IColumn & src_data,
Float64 period,
UInt64 period,
ColumnArray::Offset & start,
ColumnArray::Offset & end,
std::vector<Float32> & seasonal,
@ -179,38 +168,28 @@ public:
size_t len = end - start;
if (len < 4)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "At least four data points are needed for function {}", getName());
else if (period > (len / 2.0))
if (period > (len / 2))
throw Exception(
ErrorCodes::BAD_ARGUMENTS, "The series should have data of at least two period lengths for function {}", getName());
std::vector<float> src((src_vec.begin() + start), (src_vec.begin() + end));
std::vector<float> src(src_vec.begin() + start, src_vec.begin() + end);
try
{
auto res = stl::params().fit(src, static_cast<size_t>(std::round(period)));
auto res = stl::params().fit(src, static_cast<size_t>(period));
if (res.seasonal.empty())
return false;
seasonal = res.seasonal;
trend = res.trend;
residue = res.remainder;
return true;
}
catch (...)
{
if (res.seasonal.empty())
return false;
}
seasonal = std::move(res.seasonal);
trend = std::move(res.trend);
residue = std::move(res.remainder);
return true;
}
};
REGISTER_FUNCTION(seriesDecomposeSTL)
{
factory.registerFunction<FunctionSeriesDecomposeSTL>(FunctionDocumentation{
.description = R"(
Decompose time series data based on STL(Seasonal-Trend Decomposition Procedure Based on Loess)
Returns an array of three arrays where the first array include seasonal components, the second array - trend,
and the third array - residue component.
https://www.wessa.net/download/stl.pdf
Decomposes a time series using STL [(Seasonal-Trend Decomposition Procedure Based on Loess)](https://www.wessa.net/download/stl.pdf) into a season, a trend and a residual component.
**Syntax**
@ -227,7 +206,7 @@ The number of data points in `series` should be at least twice the value of `per
**Returned value**
- Array of arrays
- An array of three arrays where the first array include seasonal components, the second array - trend, and the third array - residue component.
Type: [Array](../../sql-reference/data-types/array.md).

View File

@ -1,3 +1,6 @@
// Dump of https://github.com/ankane/stl-cpp/blob/3b1b3a3e9335cda26c8b0797d8b8d24ac8e350ad/include/stl.hpp.
// Added to ClickHouse source code and not referenced as a submodule because its easier maintain and modify/customize.
/*!
* STL C++ v0.1.3
* https://github.com/ankane/stl-cpp

View File

@ -1,18 +1,18 @@
-- Tags: no-fasttest, no-cpu-aarch64
-- Tags: no-cpu-aarch64
-- Tag no-cpu-aarch64: values generated are slighly different on aarch64
DROP TABLE IF EXISTS tb2;
CREATE TABLE tb2 (`n` UInt32, `a` Array(Float64)) ENGINE = Memory;
CREATE TABLE tb2 (`period` UInt32, `ts` Array(Float64)) ENGINE = Memory;
INSERT INTO tb2 VALUES (3,[10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34]);
INSERT INTO tb2 VALUES (14, [139, 87, 110, 68, 54, 50, 51, 53, 133, 86, 141, 97, 156, 94, 149, 95, 140, 77, 61, 50, 54, 47, 133, 72, 152, 94, 148, 105, 162, 101, 160, 87, 63, 53, 55, 54, 151, 103, 189, 108, 183, 113, 175, 113, 178, 90, 71, 62, 62, 65, 165, 109, 181, 115, 182, 121, 178, 114, 170]);
SELECT seriesDecomposeSTL([10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34], 3);
SELECT seriesDecomposeSTL([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], 0);
SELECT seriesDecomposeSTL(a, n) FROM tb2;
SELECT seriesDecomposeSTL(ts, period) FROM tb2 ORDER BY period;
DROP TABLE IF EXISTS tb2;
SELECT seriesDecomposeSTL([2,2,2,2,2,2,2,2,2,2,2,2,2,2], -5); -- { serverError ILLEGAL_COLUMN}
SELECT seriesDecomposeSTL([2,2,2,2,2,2,2,2,2,2,2,2,2,2], -5.2); --{ serverError ILLEGAL_COLUMN}
SELECT seriesDecomposeSTL([2,2,2,2,2,2,2,2,2,2,2,2,2,2], -5); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT}
SELECT seriesDecomposeSTL([2,2,2,2,2,2,2,2,2,2,2,2,2,2], -5.2); --{ serverError ILLEGAL_TYPE_OF_ARGUMENT}
SELECT seriesDecomposeSTL(); --{ serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH}
SELECT seriesDecomposeSTL([]); --{ serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH}
SELECT seriesDecomposeSTL([1,2,3], 2); --{ serverError BAD_ARGUMENTS}