mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
Merge pull request #57574 from ClibMouse/dev-seriesPeriodDetect
Revert "Revert "Implemented series period detect method using pocketfft lib""
This commit is contained in:
commit
7a5b40563a
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -354,6 +354,9 @@
|
||||
[submodule "contrib/aklomp-base64"]
|
||||
path = contrib/aklomp-base64
|
||||
url = https://github.com/aklomp/base64.git
|
||||
[submodule "contrib/pocketfft"]
|
||||
path = contrib/pocketfft
|
||||
url = https://github.com/mreineck/pocketfft.git
|
||||
[submodule "contrib/sqids-cpp"]
|
||||
path = contrib/sqids-cpp
|
||||
url = https://github.com/sqids/sqids-cpp.git
|
||||
|
1
contrib/CMakeLists.txt
vendored
1
contrib/CMakeLists.txt
vendored
@ -44,6 +44,7 @@ else ()
|
||||
endif ()
|
||||
add_contrib (miniselect-cmake miniselect)
|
||||
add_contrib (pdqsort-cmake pdqsort)
|
||||
add_contrib (pocketfft-cmake pocketfft)
|
||||
add_contrib (crc32-vpmsum-cmake crc32-vpmsum)
|
||||
add_contrib (sparsehash-c11-cmake sparsehash-c11)
|
||||
add_contrib (abseil-cpp-cmake abseil-cpp)
|
||||
|
1
contrib/pocketfft
vendored
Submodule
1
contrib/pocketfft
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 9efd4da52cf8d28d14531d14e43ad9d913807546
|
10
contrib/pocketfft-cmake/CMakeLists.txt
Normal file
10
contrib/pocketfft-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,10 @@
|
||||
option (ENABLE_POCKETFFT "Enable pocketfft" ${ENABLE_LIBRARIES})
|
||||
|
||||
if (NOT ENABLE_POCKETFFT)
|
||||
message(STATUS "Not using pocketfft")
|
||||
return()
|
||||
endif()
|
||||
|
||||
add_library(_pocketfft INTERFACE)
|
||||
target_include_directories(_pocketfft INTERFACE ${ClickHouse_SOURCE_DIR}/contrib/pocketfft)
|
||||
add_library(ch_contrib::pocketfft ALIAS _pocketfft)
|
59
docs/en/sql-reference/functions/time-series-functions.md
Normal file
59
docs/en/sql-reference/functions/time-series-functions.md
Normal file
@ -0,0 +1,59 @@
|
||||
---
|
||||
slug: /en/sql-reference/functions/time-series-functions
|
||||
sidebar_position: 172
|
||||
sidebar_label: Time Series
|
||||
---
|
||||
|
||||
# Time Series Functions
|
||||
|
||||
Below functions are used for time series analysis.
|
||||
|
||||
## seriesPeriodDetectFFT
|
||||
|
||||
Finds the period of the given time series data using FFT
|
||||
FFT - [Fast Fourier transform](https://en.wikipedia.org/wiki/Fast_Fourier_transform)
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
seriesPeriodDetectFFT(series);
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `series` - An array of numeric values
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A real value equal to the period of time series
|
||||
- Returns NAN when number of data points are less than four.
|
||||
|
||||
Type: [Float64](../../sql-reference/data-types/float.md).
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT seriesPeriodDetectFFT([1, 4, 6, 1, 4, 6, 1, 4, 6, 1, 4, 6, 1, 4, 6, 1, 4, 6, 1, 4, 6]) AS print_0;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌───────────print_0──────┐
|
||||
│ 3 │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
||||
``` sql
|
||||
SELECT seriesPeriodDetectFFT(arrayMap(x -> abs((x % 6) - 3), range(1000))) AS print_0;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─print_0─┐
|
||||
│ 6 │
|
||||
└─────────┘
|
||||
```
|
@ -436,6 +436,10 @@ dbms_target_link_libraries(PRIVATE ch_contrib::zstd)
|
||||
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::zstd)
|
||||
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::xz)
|
||||
|
||||
if (TARGET ch_contrib::pocketfft)
|
||||
target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::pocketfft)
|
||||
endif ()
|
||||
|
||||
if (TARGET ch_contrib::icu)
|
||||
dbms_target_link_libraries (PRIVATE ch_contrib::icu)
|
||||
endif ()
|
||||
|
@ -62,6 +62,7 @@
|
||||
#cmakedefine01 FIU_ENABLE
|
||||
#cmakedefine01 USE_BCRYPT
|
||||
#cmakedefine01 USE_LIBARCHIVE
|
||||
#cmakedefine01 USE_POCKETFFT
|
||||
|
||||
/// This is needed for .incbin in assembly. For some reason, include paths don't work there in presence of LTO.
|
||||
/// That's why we use absolute paths.
|
||||
|
@ -99,6 +99,10 @@ if (TARGET ch_contrib::rapidjson)
|
||||
list (APPEND PRIVATE_LIBS ch_contrib::rapidjson)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::pocketfft)
|
||||
list (APPEND PRIVATE_LIBS ch_contrib::pocketfft)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::crc32-vpmsum)
|
||||
list (APPEND PUBLIC_LIBS ch_contrib::crc32-vpmsum)
|
||||
endif()
|
||||
|
227
src/Functions/seriesPeriodDetectFFT.cpp
Normal file
227
src/Functions/seriesPeriodDetectFFT.cpp
Normal file
@ -0,0 +1,227 @@
|
||||
#include "config.h"
|
||||
|
||||
#if USE_POCKETFFT
|
||||
# ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wshadow"
|
||||
# pragma clang diagnostic ignored "-Wextra-semi-stmt"
|
||||
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
# endif
|
||||
|
||||
# include <pocketfft_hdronly.h>
|
||||
|
||||
# ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
# endif
|
||||
|
||||
# include <cmath>
|
||||
# include <Columns/ColumnArray.h>
|
||||
# include <Columns/ColumnsNumber.h>
|
||||
# include <DataTypes/DataTypeArray.h>
|
||||
# include <DataTypes/DataTypesNumber.h>
|
||||
# include <Functions/FunctionFactory.h>
|
||||
# include <Functions/FunctionHelpers.h>
|
||||
# include <Functions/IFunction.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
/*Detect Period in time series data using FFT.
|
||||
* FFT - Fast Fourier transform (https://en.wikipedia.org/wiki/Fast_Fourier_transform)
|
||||
* 1. Convert time series data to frequency domain using FFT.
|
||||
* 2. Remove the 0th(the Dc component) and n/2th the Nyquist frequency
|
||||
* 3. Find the peak value (highest) for dominant frequency component.
|
||||
* 4. Inverse of the dominant frequency component is the period.
|
||||
*/
|
||||
|
||||
class FunctionSeriesPeriodDetectFFT : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "seriesPeriodDetectFFT";
|
||||
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionSeriesPeriodDetectFFT>(); }
|
||||
|
||||
std::string getName() const override { return name; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return 1; }
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
FunctionArgumentDescriptors args{{"time_series", &isArray<IDataType>, nullptr, "Array"}};
|
||||
validateFunctionArgumentTypes(*this, arguments, args);
|
||||
|
||||
return std::make_shared<DataTypeFloat64>();
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
ColumnPtr array_ptr = arguments[0].column;
|
||||
const ColumnArray * array = checkAndGetColumn<ColumnArray>(array_ptr.get());
|
||||
|
||||
const IColumn & src_data = array->getData();
|
||||
const ColumnArray::Offsets & offsets = array->getOffsets();
|
||||
|
||||
auto res = ColumnFloat64::create(input_rows_count);
|
||||
auto & res_data = res->getData();
|
||||
|
||||
ColumnArray::Offset prev_src_offset = 0;
|
||||
|
||||
Float64 period;
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
ColumnArray::Offset curr_offset = offsets[i];
|
||||
if (executeNumbers<UInt8>(src_data, period, prev_src_offset, curr_offset)
|
||||
|| executeNumbers<UInt16>(src_data, period, prev_src_offset, curr_offset)
|
||||
|| executeNumbers<UInt32>(src_data, period, prev_src_offset, curr_offset)
|
||||
|| executeNumbers<UInt64>(src_data, period, prev_src_offset, curr_offset)
|
||||
|| executeNumbers<Int8>(src_data, period, prev_src_offset, curr_offset)
|
||||
|| executeNumbers<Int16>(src_data, period, prev_src_offset, curr_offset)
|
||||
|| executeNumbers<Int32>(src_data, period, prev_src_offset, curr_offset)
|
||||
|| executeNumbers<Int64>(src_data, period, prev_src_offset, curr_offset)
|
||||
|| executeNumbers<Float32>(src_data, period, prev_src_offset, curr_offset)
|
||||
|| executeNumbers<Float64>(src_data, period, prev_src_offset, curr_offset))
|
||||
{
|
||||
res_data[i] = period;
|
||||
prev_src_offset = curr_offset;
|
||||
}
|
||||
else
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Illegal column {} of first argument of function {}",
|
||||
arguments[0].column->getName(),
|
||||
getName());
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool executeNumbers(const IColumn & src_data, Float64 & period, ColumnArray::Offset & start, ColumnArray::Offset & end) const
|
||||
{
|
||||
const ColumnVector<T> * src_data_concrete = checkAndGetColumn<ColumnVector<T>>(&src_data);
|
||||
if (!src_data_concrete)
|
||||
return false;
|
||||
|
||||
const PaddedPODArray<T> & src_vec = src_data_concrete->getData();
|
||||
|
||||
chassert(start <= end);
|
||||
size_t len = end - start;
|
||||
if (len < 4)
|
||||
{
|
||||
period = NAN; // At least four data points are required to detect period
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<Float64> src((src_vec.begin() + start), (src_vec.begin() + end));
|
||||
std::vector<std::complex<double>> out((len / 2) + 1);
|
||||
|
||||
pocketfft::shape_t shape{len};
|
||||
|
||||
pocketfft::shape_t axes;
|
||||
axes.reserve(shape.size());
|
||||
for (size_t i = 0; i < shape.size(); ++i)
|
||||
axes.push_back(i);
|
||||
|
||||
pocketfft::stride_t stride_src{sizeof(double)};
|
||||
pocketfft::stride_t stride_out{sizeof(std::complex<double>)};
|
||||
|
||||
pocketfft::r2c(shape, stride_src, stride_out, axes, pocketfft::FORWARD, src.data(), out.data(), static_cast<double>(1));
|
||||
|
||||
size_t spec_len = (len - 1) / 2; //removing the nyquist element when len is even
|
||||
|
||||
double max_mag = 0;
|
||||
size_t idx = 1;
|
||||
for (size_t i = 1; i < spec_len; ++i)
|
||||
{
|
||||
double magnitude = sqrt(out[i].real() * out[i].real() + out[i].imag() * out[i].imag());
|
||||
if (magnitude > max_mag)
|
||||
{
|
||||
max_mag = magnitude;
|
||||
idx = i;
|
||||
}
|
||||
}
|
||||
|
||||
// In case all FFT values are zero, it means the input signal is flat.
|
||||
// It implies the period of the series should be 0.
|
||||
if (max_mag == 0)
|
||||
{
|
||||
period = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<double> xfreq(spec_len);
|
||||
double step = 0.5 / (spec_len - 1);
|
||||
for (size_t i = 0; i < spec_len; ++i)
|
||||
xfreq[i] = i * step;
|
||||
|
||||
auto freq = xfreq[idx];
|
||||
|
||||
period = std::round(1 / freq);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_FUNCTION(SeriesPeriodDetectFFT)
|
||||
{
|
||||
factory.registerFunction<FunctionSeriesPeriodDetectFFT>(FunctionDocumentation{
|
||||
.description = R"(
|
||||
Finds the period of the given time series data using FFT
|
||||
FFT - Fast Fourier transform (https://en.wikipedia.org/wiki/Fast_Fourier_transform)
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
seriesPeriodDetectFFT(series);
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `series` - An array of numeric values
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A real value equal to the period of time series
|
||||
- Returns NAN when number of data points are less than four.
|
||||
|
||||
Type: [Float64](../../sql-reference/data-types/float.md).
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT seriesPeriodDetectFFT([1, 4, 6, 1, 4, 6, 1, 4, 6, 1, 4, 6, 1, 4, 6, 1, 4, 6, 1, 4, 6]) AS print_0;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌───────────print_0──────┐
|
||||
│ 3 │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
||||
``` sql
|
||||
SELECT seriesPeriodDetectFFT(arrayMap(x -> abs((x % 6) - 3), range(1000))) AS print_0;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─print_0─┐
|
||||
│ 6 │
|
||||
└─────────┘
|
||||
```
|
||||
)",
|
||||
.categories{"Time series analysis"}});
|
||||
}
|
||||
}
|
||||
#endif
|
@ -169,5 +169,8 @@ endif()
|
||||
if (TARGET ch_contrib::libarchive)
|
||||
set(USE_LIBARCHIVE 1)
|
||||
endif()
|
||||
if (TARGET ch_contrib::pocketfft)
|
||||
set(USE_POCKETFFT 1)
|
||||
endif()
|
||||
|
||||
set(SOURCE_DIR ${PROJECT_SOURCE_DIR})
|
||||
|
@ -0,0 +1,12 @@
|
||||
14
|
||||
3
|
||||
3
|
||||
3
|
||||
0
|
||||
62
|
||||
6
|
||||
6
|
||||
nan
|
||||
3
|
||||
0
|
||||
nan
|
22
tests/queries/0_stateless/02813_series_period_detect.sql
Normal file
22
tests/queries/0_stateless/02813_series_period_detect.sql
Normal file
@ -0,0 +1,22 @@
|
||||
-- Tags: no-fasttest
|
||||
|
||||
DROP TABLE IF EXISTS tb1;
|
||||
|
||||
CREATE TABLE tb1 (n UInt32, a Array(Int32)) engine=Memory;
|
||||
INSERT INTO tb1 VALUES (1, [10, 20, 30, 10, 20, 30, 10, 20, 30, 10, 20, 30, 10, 20, 30, 10, 20, 30, 10, 20, 30]), (2, [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]), (3, [6, 3, 4]);
|
||||
|
||||
SELECT seriesPeriodDetectFFT([139, 87, 110, 68, 54, 50, 51, 53, 133, 86, 141, 97, 156, 94, 149, 95, 140, 77, 61, 50, 54, 47, 133, 72, 152, 94, 148, 105, 162, 101, 160, 87, 63, 53, 55, 54, 151, 103, 189, 108, 183, 113, 175, 113, 178, 90, 71, 62, 62, 65, 165, 109, 181, 115, 182, 121, 178, 114, 170]);
|
||||
SELECT seriesPeriodDetectFFT([10, 20, 30, 10, 20, 30, 10, 20, 30, 10, 20, 30, 10, 20, 30, 10, 20, 30, 10, 20, 30]);
|
||||
SELECT seriesPeriodDetectFFT([10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34]);
|
||||
SELECT seriesPeriodDetectFFT([10.1, 10, 400, 10.1, 10, 400, 10.1, 10, 400, 10.1, 10, 400, 10.1, 10, 400, 10.1, 10, 400, 10.1, 10, 400, 10.1, 10, 400]);
|
||||
SELECT seriesPeriodDetectFFT([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]);
|
||||
SELECT seriesPeriodDetectFFT(arrayMap(x -> sin(x / 10), range(1000)));
|
||||
SELECT seriesPeriodDetectFFT(arrayMap(x -> abs((x % 6) - 3), range(1000)));
|
||||
SELECT seriesPeriodDetectFFT(arrayMap(x -> if((x % 6) < 3, 3, 0), range(1000)));
|
||||
SELECT seriesPeriodDetectFFT([1,2,3]);
|
||||
SELECT seriesPeriodDetectFFT(a) FROM tb1;
|
||||
DROP TABLE IF EXISTS tb1;
|
||||
SELECT seriesPeriodDetectFFT(); --{ serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH}
|
||||
SELECT seriesPeriodDetectFFT([]); -- { serverError ILLEGAL_COLUMN}
|
||||
SELECT seriesPeriodDetectFFT([NULL, NULL, NULL]); -- { serverError ILLEGAL_COLUMN}
|
||||
SELECT seriesPeriodDetectFFT([10, 20, 30, 10, 202, 30, NULL]); -- { serverError ILLEGAL_COLUMN }
|
@ -2233,6 +2233,7 @@ seektable
|
||||
sequenceCount
|
||||
sequenceMatch
|
||||
sequenceNextNode
|
||||
seriesPeriodDetectFFT
|
||||
serverTimeZone
|
||||
serverTimezone
|
||||
serverUUID
|
||||
|
Loading…
Reference in New Issue
Block a user