mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
Revert "Revert "Implemented series period detect method using pocketfft lib""
This reverts commit d7d83c99e5
.
This commit is contained in:
parent
05bc8ef1e0
commit
dad268c33b
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -354,3 +354,6 @@
|
||||
[submodule "contrib/aklomp-base64"]
|
||||
path = contrib/aklomp-base64
|
||||
url = https://github.com/aklomp/base64.git
|
||||
[submodule "contrib/pocketfft"]
|
||||
path = contrib/pocketfft
|
||||
url = https://github.com/mreineck/pocketfft.git
|
||||
|
1
contrib/CMakeLists.txt
vendored
1
contrib/CMakeLists.txt
vendored
@ -44,6 +44,7 @@ else ()
|
||||
endif ()
|
||||
add_contrib (miniselect-cmake miniselect)
|
||||
add_contrib (pdqsort-cmake pdqsort)
|
||||
add_contrib (pocketfft-cmake pocketfft)
|
||||
add_contrib (crc32-vpmsum-cmake crc32-vpmsum)
|
||||
add_contrib (sparsehash-c11-cmake sparsehash-c11)
|
||||
add_contrib (abseil-cpp-cmake abseil-cpp)
|
||||
|
1
contrib/pocketfft
vendored
Submodule
1
contrib/pocketfft
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 9efd4da52cf8d28d14531d14e43ad9d913807546
|
10
contrib/pocketfft-cmake/CMakeLists.txt
Normal file
10
contrib/pocketfft-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,10 @@
|
||||
option (ENABLE_POCKETFFT "Enable pocketfft" ${ENABLE_LIBRARIES})
|
||||
|
||||
if (NOT ENABLE_POCKETFFT)
|
||||
message(STATUS "Not using pocketfft")
|
||||
return()
|
||||
endif()
|
||||
|
||||
add_library(_pocketfft INTERFACE)
|
||||
target_include_directories(_pocketfft INTERFACE ${ClickHouse_SOURCE_DIR}/contrib/pocketfft)
|
||||
add_library(ch_contrib::pocketfft ALIAS _pocketfft)
|
47
docs/en/sql-reference/functions/time-series-functions.md
Normal file
47
docs/en/sql-reference/functions/time-series-functions.md
Normal file
@ -0,0 +1,47 @@
|
||||
---
|
||||
slug: /en/sql-reference/functions/time-series-functions
|
||||
sidebar_position: 172
|
||||
sidebar_label: Time Series
|
||||
---
|
||||
|
||||
# Time Series Functions
|
||||
|
||||
Below functions are used for time series analysis.
|
||||
|
||||
## seriesPeriodDetectFFT
|
||||
|
||||
Finds the period of the given time series data using FFT
|
||||
Detect Period in time series data using FFT.
|
||||
FFT - Fast Fourier transform (https://en.wikipedia.org/wiki/Fast_Fourier_transform)
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
seriesPeriodDetectFFT(series);
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `series` - An array of numeric values
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A real value equal to the period of time series
|
||||
|
||||
Type: [Float64](../../sql-reference/data-types/float.md).
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT seriesPeriodDetectFFT([1, 4, 6, 1, 4, 6, 1, 4, 6, 1, 4, 6, 1, 4, 6, 1, 4, 6, 1, 4, 6]) AS print_0;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌───────────print_0──────┐
|
||||
│ 3 │
|
||||
└────────────────────────┘
|
||||
```
|
@ -436,6 +436,10 @@ dbms_target_link_libraries(PRIVATE ch_contrib::zstd)
|
||||
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::zstd)
|
||||
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::xz)
|
||||
|
||||
if (TARGET ch_contrib::pocketfft)
|
||||
target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::pocketfft)
|
||||
endif ()
|
||||
|
||||
if (TARGET ch_contrib::icu)
|
||||
dbms_target_link_libraries (PRIVATE ch_contrib::icu)
|
||||
endif ()
|
||||
|
@ -61,6 +61,7 @@
|
||||
#cmakedefine01 FIU_ENABLE
|
||||
#cmakedefine01 USE_BCRYPT
|
||||
#cmakedefine01 USE_LIBARCHIVE
|
||||
#cmakedefine01 USE_POCKETFFT
|
||||
|
||||
/// This is needed for .incbin in assembly. For some reason, include paths don't work there in presence of LTO.
|
||||
/// That's why we use absolute paths.
|
||||
|
@ -95,6 +95,10 @@ if (TARGET ch_contrib::rapidjson)
|
||||
list (APPEND PRIVATE_LIBS ch_contrib::rapidjson)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::pocketfft)
|
||||
list (APPEND PRIVATE_LIBS ch_contrib::pocketfft)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::crc32-vpmsum)
|
||||
list (APPEND PUBLIC_LIBS ch_contrib::crc32-vpmsum)
|
||||
endif()
|
||||
|
164
src/Functions/seriesPeriodDetectFFT.cpp
Normal file
164
src/Functions/seriesPeriodDetectFFT.cpp
Normal file
@ -0,0 +1,164 @@
|
||||
#include "config.h"
|
||||
|
||||
#if USE_POCKETFFT
|
||||
# ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wshadow"
|
||||
# pragma clang diagnostic ignored "-Wextra-semi-stmt"
|
||||
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
# endif
|
||||
|
||||
# include <pocketfft_hdronly.h>
|
||||
|
||||
# ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
# endif
|
||||
|
||||
# include <cmath>
|
||||
# include <Columns/ColumnArray.h>
|
||||
# include <Columns/ColumnsNumber.h>
|
||||
# include <DataTypes/DataTypeArray.h>
|
||||
# include <DataTypes/DataTypesNumber.h>
|
||||
# include <Functions/FunctionFactory.h>
|
||||
# include <Functions/FunctionHelpers.h>
|
||||
# include <Functions/IFunction.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
/*Detect Period in time series data using FFT.
|
||||
* FFT - Fast Fourier transform (https://en.wikipedia.org/wiki/Fast_Fourier_transform)
|
||||
* 1. Convert time series data to frequency domain using FFT.
|
||||
* 2. Remove the 0th(the Dc component) and n/2th the Nyquist frequency
|
||||
* 3. Find the peak value (highest) for dominant frequency component.
|
||||
* 4. Inverse of the dominant frequency component is the period.
|
||||
*/
|
||||
|
||||
class FunctionSeriesPeriodDetectFFT : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "seriesPeriodDetectFFT";
|
||||
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionSeriesPeriodDetectFFT>(); }
|
||||
|
||||
std::string getName() const override { return name; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return 1; }
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
FunctionArgumentDescriptors args{{"time_series", &isArray<IDataType>, nullptr, "Array"}};
|
||||
validateFunctionArgumentTypes(*this, arguments, args);
|
||||
|
||||
return std::make_shared<DataTypeFloat64>();
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
|
||||
{
|
||||
ColumnPtr array_ptr = arguments[0].column;
|
||||
const ColumnArray * array = checkAndGetColumn<ColumnArray>(array_ptr.get());
|
||||
|
||||
const IColumn & src_data = array->getData();
|
||||
|
||||
auto res = ColumnFloat64::create(1);
|
||||
auto & res_data = res->getData();
|
||||
|
||||
Float64 period;
|
||||
|
||||
if (executeNumber<UInt8>(src_data, period) || executeNumber<UInt16>(src_data, period) || executeNumber<UInt32>(src_data, period)
|
||||
|| executeNumber<UInt64>(src_data, period) || executeNumber<Int8>(src_data, period) || executeNumber<Int16>(src_data, period)
|
||||
|| executeNumber<Int32>(src_data, period) || executeNumber<Int64>(src_data, period) || executeNumber<Float32>(src_data, period)
|
||||
|| executeNumber<Float64>(src_data, period))
|
||||
{
|
||||
res_data[0] = period;
|
||||
return res;
|
||||
}
|
||||
else
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Illegal column {} of first argument of function {}",
|
||||
arguments[0].column->getName(),
|
||||
getName());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool executeNumber(const IColumn & src_data, Float64 & period) const
|
||||
{
|
||||
const ColumnVector<T> * src_data_concrete = checkAndGetColumn<ColumnVector<T>>(&src_data);
|
||||
if (!src_data_concrete)
|
||||
return false;
|
||||
|
||||
const PaddedPODArray<T> & src_vec = src_data_concrete->getData();
|
||||
|
||||
size_t len = src_vec.size();
|
||||
if (len < 4)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "At least four data points are needed for function {}", getName());
|
||||
|
||||
std::vector<Float64> src(src_vec.begin(), src_vec.end());
|
||||
std::vector<std::complex<double>> out((len / 2) + 1);
|
||||
|
||||
pocketfft::shape_t shape{len};
|
||||
|
||||
pocketfft::shape_t axes;
|
||||
axes.reserve(shape.size());
|
||||
for (size_t i = 0; i < shape.size(); ++i)
|
||||
axes.push_back(i);
|
||||
|
||||
pocketfft::stride_t stride_src{sizeof(double)};
|
||||
pocketfft::stride_t stride_out{sizeof(std::complex<double>)};
|
||||
|
||||
pocketfft::r2c(shape, stride_src, stride_out, axes, pocketfft::FORWARD, src.data(), out.data(), static_cast<double>(1));
|
||||
|
||||
size_t spec_len = (len - 1) / 2; //removing the nyquist element when len is even
|
||||
|
||||
double max_mag = 0;
|
||||
size_t idx = 1;
|
||||
for (size_t i = 1; i < spec_len; ++i)
|
||||
{
|
||||
double magnitude = sqrt(out[i].real() * out[i].real() + out[i].imag() * out[i].imag());
|
||||
if (magnitude > max_mag)
|
||||
{
|
||||
max_mag = magnitude;
|
||||
idx = i;
|
||||
}
|
||||
}
|
||||
|
||||
// In case all FFT values are zero, it means the input signal is flat.
|
||||
// It implies the period of the series should be 0.
|
||||
if (max_mag == 0)
|
||||
{
|
||||
period = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<double> xfreq(spec_len);
|
||||
double step = 0.5 / (spec_len - 1);
|
||||
for (size_t i = 0; i < spec_len; ++i)
|
||||
xfreq[i] = i * step;
|
||||
|
||||
auto freq = xfreq[idx];
|
||||
|
||||
period = std::round(1 / freq);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_FUNCTION(SeriesPeriodDetectFFT)
|
||||
{
|
||||
factory.registerFunction<FunctionSeriesPeriodDetectFFT>(FunctionDocumentation{
|
||||
.description = R"(
|
||||
Detects period in time series data using FFT.)",
|
||||
.categories{"Time series analysis"}});
|
||||
}
|
||||
}
|
||||
#endif
|
@ -166,5 +166,8 @@ endif()
|
||||
if (TARGET ch_contrib::libarchive)
|
||||
set(USE_LIBARCHIVE 1)
|
||||
endif()
|
||||
if (TARGET ch_contrib::pocketfft)
|
||||
set(USE_POCKETFFT 1)
|
||||
endif()
|
||||
|
||||
set(SOURCE_DIR ${PROJECT_SOURCE_DIR})
|
||||
|
@ -0,0 +1,5 @@
|
||||
14
|
||||
3
|
||||
3
|
||||
3
|
||||
0
|
12
tests/queries/0_stateless/02813_series_period_detect.sql
Normal file
12
tests/queries/0_stateless/02813_series_period_detect.sql
Normal file
@ -0,0 +1,12 @@
|
||||
-- Tags: no-fasttest
|
||||
|
||||
SELECT seriesPeriodDetectFFT([139, 87, 110, 68, 54, 50, 51, 53, 133, 86, 141, 97, 156, 94, 149, 95, 140, 77, 61, 50, 54, 47, 133, 72, 152, 94, 148, 105, 162, 101, 160, 87, 63, 53, 55, 54, 151, 103, 189, 108, 183, 113, 175, 113, 178, 90, 71, 62, 62, 65, 165, 109, 181, 115, 182, 121, 178, 114, 170]);
|
||||
SELECT seriesPeriodDetectFFT([10,20,30,10,20,30,10,20,30, 10,20,30,10,20,30,10,20,30,10,20,30]);
|
||||
SELECT seriesPeriodDetectFFT([10.1, 20.45, 40.34, 10.1, 20.45, 40.34,10.1, 20.45, 40.34,10.1, 20.45, 40.34,10.1, 20.45, 40.34,10.1, 20.45, 40.34,10.1, 20.45, 40.34, 10.1, 20.45, 40.34]);
|
||||
SELECT seriesPeriodDetectFFT([10.1, 10, 400, 10.1, 10, 400, 10.1, 10, 400,10.1, 10, 400,10.1, 10, 400,10.1, 10, 400,10.1, 10, 400,10.1, 10, 400]);
|
||||
SELECT seriesPeriodDetectFFT([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]);
|
||||
SELECT seriesPeriodDetectFFT([1,2,3]); -- { serverError BAD_ARGUMENTS}
|
||||
SELECT seriesPeriodDetectFFT(); --{ serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH}
|
||||
SELECT seriesPeriodDetectFFT([]); -- { serverError ILLEGAL_COLUMN}
|
||||
SELECT seriesPeriodDetectFFT([NULL, NULL, NULL]); -- { serverError ILLEGAL_COLUMN}
|
||||
SELECT seriesPeriodDetectFFT([10,20,30,10,202,30,NULL]); -- { serverError ILLEGAL_COLUMN }
|
@ -2231,6 +2231,7 @@ seektable
|
||||
sequenceCount
|
||||
sequenceMatch
|
||||
sequenceNextNode
|
||||
seriesPeriodDetectFFT
|
||||
serverTimeZone
|
||||
serverTimezone
|
||||
serverUUID
|
||||
|
Loading…
Reference in New Issue
Block a user