Merge branch 'master' into nulls

This commit is contained in:
chertus 2019-05-16 14:02:12 +03:00
commit 2739a3f6d0
40 changed files with 573 additions and 81 deletions

View File

@ -310,6 +310,7 @@ include (cmake/find_rt.cmake)
include (cmake/find_execinfo.cmake)
include (cmake/find_readline_edit.cmake)
include (cmake/find_re2.cmake)
include (cmake/find_libgsasl.cmake)
include (cmake/find_rdkafka.cmake)
include (cmake/find_capnp.cmake)
include (cmake/find_llvm.cmake)
@ -317,7 +318,6 @@ include (cmake/find_cpuid.cmake) # Freebsd, bundled
if (NOT USE_CPUID)
include (cmake/find_cpuinfo.cmake) # Debian
endif()
include (cmake/find_libgsasl.cmake)
include (cmake/find_libxml2.cmake)
include (cmake/find_brotli.cmake)
include (cmake/find_protobuf.cmake)

View File

@ -1,9 +1,12 @@
option (USE_INTERNAL_BOOST_LIBRARY "Set to FALSE to use system boost library instead of bundled" ${NOT_UNBUNDLED})
# Test random file existing in all package variants
if (USE_INTERNAL_BOOST_LIBRARY AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/boost/libs/system/src/error_code.cpp")
message (WARNING "submodules in contrib/boost is missing. to fix try run: \n git submodule update --init --recursive")
set (USE_INTERNAL_BOOST_LIBRARY 0)
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/boost/libs/system/src/error_code.cpp")
if(USE_INTERNAL_BOOST_LIBRARY)
message(WARNING "submodules in contrib/boost is missing. to fix try run: \n git submodule update --init --recursive")
endif()
set (USE_INTERNAL_BOOST_LIBRARY 0)
set (MISSING_INTERNAL_BOOST_LIBRARY 1)
endif ()
if (NOT USE_INTERNAL_BOOST_LIBRARY)
@ -21,10 +24,9 @@ if (NOT USE_INTERNAL_BOOST_LIBRARY)
set (Boost_INCLUDE_DIRS "")
set (Boost_SYSTEM_LIBRARY "")
endif ()
endif ()
if (NOT Boost_SYSTEM_LIBRARY)
if (NOT Boost_SYSTEM_LIBRARY AND NOT MISSING_INTERNAL_BOOST_LIBRARY)
set (USE_INTERNAL_BOOST_LIBRARY 1)
set (Boost_SYSTEM_LIBRARY boost_system_internal)
set (Boost_PROGRAM_OPTIONS_LIBRARY boost_program_options_internal)
@ -44,7 +46,6 @@ if (NOT Boost_SYSTEM_LIBRARY)
# For packaged version:
list (APPEND Boost_INCLUDE_DIRS "${ClickHouse_SOURCE_DIR}/contrib/boost")
endif ()
message (STATUS "Using Boost: ${Boost_INCLUDE_DIRS} : ${Boost_PROGRAM_OPTIONS_LIBRARY},${Boost_SYSTEM_LIBRARY},${Boost_FILESYSTEM_LIBRARY},${Boost_REGEX_LIBRARY}")

View File

@ -22,4 +22,8 @@ elseif (NOT MISSING_INTERNAL_LIBGSASL_LIBRARY AND NOT APPLE AND NOT ARCH_32)
set (LIBGSASL_LIBRARY libgsasl)
endif ()
message (STATUS "Using libgsasl: ${LIBGSASL_INCLUDE_DIR} : ${LIBGSASL_LIBRARY}")
if(LIBGSASL_LIBRARY AND LIBGSASL_INCLUDE_DIR)
set (USE_LIBGSASL 1)
endif()
message (STATUS "Using libgsasl=${USE_LIBGSASL}: ${LIBGSASL_INCLUDE_DIR} : ${LIBGSASL_LIBRARY}")

View File

@ -10,7 +10,7 @@ endif ()
if (ENABLE_RDKAFKA)
if (OS_LINUX AND NOT ARCH_ARM)
if (OS_LINUX AND NOT ARCH_ARM AND USE_LIBGSASL)
option (USE_INTERNAL_RDKAFKA_LIBRARY "Set to FALSE to use system librdkafka instead of the bundled" ${NOT_UNBUNDLED})
endif ()

View File

@ -1,5 +1,13 @@
option (USE_INTERNAL_RE2_LIBRARY "Set to FALSE to use system re2 library instead of bundled [slower]" ${NOT_UNBUNDLED})
if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/re2/CMakeLists.txt")
if(USE_INTERNAL_RE2_LIBRARY)
message(WARNING "submodule contrib/re2 is missing. to fix try run: \n git submodule update --init --recursive")
endif()
set(USE_INTERNAL_RE2_LIBRARY 0)
set(MISSING_INTERNAL_RE2_LIBRARY 1)
endif()
if (NOT USE_INTERNAL_RE2_LIBRARY)
find_library (RE2_LIBRARY re2)
find_path (RE2_INCLUDE_DIR NAMES re2/re2.h PATHS ${RE2_INCLUDE_PATHS})

View File

@ -9,6 +9,6 @@ if (NOT HAVE_AVX2)
endif ()
option (USE_SIMDJSON "Use simdjson" ON)
set (SIMDJSON_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/simdjson/include")
set (SIMDJSON_LIBRARY "simdjson")
message(STATUS "Using simdjson=${USE_SIMDJSON}: ${SIMDJSON_LIBRARY}")

View File

@ -2,20 +2,28 @@ if (NOT OS_FREEBSD AND NOT ARCH_32)
option (USE_INTERNAL_ZLIB_LIBRARY "Set to FALSE to use system zlib library instead of bundled" ${NOT_UNBUNDLED})
endif ()
if (NOT MSVC)
set (INTERNAL_ZLIB_NAME "zlib-ng" CACHE INTERNAL "")
else ()
set (INTERNAL_ZLIB_NAME "zlib" CACHE INTERNAL "")
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/${INTERNAL_ZLIB_NAME}")
message (WARNING "Will use standard zlib, please clone manually:\n git clone https://github.com/madler/zlib.git ${ClickHouse_SOURCE_DIR}/contrib/${INTERNAL_ZLIB_NAME}")
endif ()
endif ()
if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/${INTERNAL_ZLIB_NAME}/zlib.h")
if(USE_INTERNAL_ZLIB_LIBRARY)
message(WARNING "submodule contrib/${INTERNAL_ZLIB_NAME} is missing. to fix try run: \n git submodule update --init --recursive")
endif()
set(USE_INTERNAL_ZLIB_LIBRARY 0)
set(MISSING_INTERNAL_ZLIB_LIBRARY 1)
endif()
if (NOT USE_INTERNAL_ZLIB_LIBRARY)
find_package (ZLIB)
endif ()
if (NOT ZLIB_FOUND)
if (NOT MSVC)
set (INTERNAL_ZLIB_NAME "zlib-ng" CACHE INTERNAL "")
else ()
set (INTERNAL_ZLIB_NAME "zlib" CACHE INTERNAL "")
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/${INTERNAL_ZLIB_NAME}")
message (WARNING "Will use standard zlib, please clone manually:\n git clone https://github.com/madler/zlib.git ${ClickHouse_SOURCE_DIR}/contrib/${INTERNAL_ZLIB_NAME}")
endif ()
endif ()
if (NOT ZLIB_FOUND AND NOT MISSING_INTERNAL_ZLIB_LIBRARY)
set (USE_INTERNAL_ZLIB_LIBRARY 1)
set (ZLIB_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/${INTERNAL_ZLIB_NAME}" "${ClickHouse_BINARY_DIR}/contrib/${INTERNAL_ZLIB_NAME}" CACHE INTERNAL "") # generated zconf.h
set (ZLIB_INCLUDE_DIRS ${ZLIB_INCLUDE_DIR}) # for poco

View File

@ -1,9 +1,12 @@
option (USE_INTERNAL_ZSTD_LIBRARY "Set to FALSE to use system zstd library instead of bundled" ${NOT_UNBUNDLED})
if (USE_INTERNAL_ZSTD_LIBRARY AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/zstd/lib/zstd.h")
message (WARNING "submodule contrib/zstd is missing. to fix try run: \n git submodule update --init --recursive")
set (USE_INTERNAL_ZSTD_LIBRARY 0)
endif ()
if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/zstd/lib/zstd.h")
if(USE_INTERNAL_ZSTD_LIBRARY)
message(WARNING "submodule contrib/zstd is missing. to fix try run: \n git submodule update --init --recursive")
endif()
set(USE_INTERNAL_ZSTD_LIBRARY 0)
set(MISSING_INTERNAL_ZSTD_LIBRARY 1)
endif()
if (NOT USE_INTERNAL_ZSTD_LIBRARY)
find_library (ZSTD_LIBRARY zstd)
@ -11,7 +14,7 @@ if (NOT USE_INTERNAL_ZSTD_LIBRARY)
endif ()
if (ZSTD_LIBRARY AND ZSTD_INCLUDE_DIR)
else ()
elseif (NOT MISSING_INTERNAL_ZSTD_LIBRARY)
set (USE_INTERNAL_ZSTD_LIBRARY 1)
set (ZSTD_LIBRARY zstd)
set (ZSTD_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/zstd/lib)

View File

@ -59,7 +59,7 @@ add_library(rdkafka ${SRCS})
target_include_directories(rdkafka SYSTEM PUBLIC include)
target_include_directories(rdkafka SYSTEM PUBLIC ${RDKAFKA_SOURCE_DIR}) # Because weird logic with "include_next" is used.
target_include_directories(rdkafka SYSTEM PRIVATE ${ZSTD_INCLUDE_DIR}/common) # Because wrong path to "zstd_errors.h" is used.
target_link_libraries(rdkafka PUBLIC ${ZLIB_LIBRARIES} ${ZSTD_LIBRARY} ${LZ4_LIBRARY} libgsasl)
target_link_libraries(rdkafka PUBLIC ${ZLIB_LIBRARIES} ${ZSTD_LIBRARY} ${LZ4_LIBRARY} ${LIBGSASL_LIBRARY})
if(OPENSSL_SSL_LIBRARY AND OPENSSL_CRYPTO_LIBRARY)
target_link_libraries(rdkafka PUBLIC ${OPENSSL_SSL_LIBRARY} ${OPENSSL_CRYPTO_LIBRARY})
endif()

View File

@ -1,15 +1,7 @@
if (NOT HAVE_AVX2)
message (FATAL_ERROR "No AVX2 support")
endif ()
if(MAKE_STATIC_LIBRARIES)
set(SIMDJSON_LIB_TYPE STATIC)
MESSAGE(STATUS "Building static library ${SIMDJSON_LIBRARY}")
else()
set(SIMDJSON_LIB_TYPE SHARED)
MESSAGE(STATUS "Building dynamic library ${SIMDJSON_LIBRARY}")
endif()
set(SIMDJSON_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/simdjson/include")
set(SIMDJSON_SRC_DIR "${SIMDJSON_INCLUDE_DIR}/../src")
set(SIMDJSON_SRC
${SIMDJSON_SRC_DIR}/jsonioutil.cpp
@ -21,6 +13,6 @@ set(SIMDJSON_SRC
${SIMDJSON_SRC_DIR}/parsedjsoniterator.cpp
)
add_library(${SIMDJSON_LIBRARY} ${SIMDJSON_LIB_TYPE} ${SIMDJSON_SRC})
target_include_directories(${SIMDJSON_LIBRARY} PRIVATE "${SIMDJSON_INCLUDE_DIR}")
add_library(${SIMDJSON_LIBRARY} ${SIMDJSON_SRC})
target_include_directories(${SIMDJSON_LIBRARY} PUBLIC "${SIMDJSON_INCLUDE_DIR}")
target_compile_options(${SIMDJSON_LIBRARY} PRIVATE -mavx2 -mbmi -mbmi2 -mpclmul)

View File

@ -189,8 +189,17 @@ target_link_libraries (clickhouse_common_io
${Poco_Net_LIBRARY}
${Poco_Util_LIBRARY}
${Poco_Foundation_LIBRARY}
${RE2_LIBRARY}
${RE2_ST_LIBRARY}
)
if(RE2_LIBRARY)
target_link_libraries(clickhouse_common_io PUBLIC ${RE2_LIBRARY})
endif()
if(RE2_ST_LIBRARY)
target_link_libraries(clickhouse_common_io PUBLIC ${RE2_ST_LIBRARY})
endif()
target_link_libraries(clickhouse_common_io
PUBLIC
${CITYHASH_LIBRARIES}
PRIVATE
${ZLIB_LIBRARIES}
@ -208,7 +217,9 @@ target_link_libraries (clickhouse_common_io
)
target_include_directories(clickhouse_common_io SYSTEM BEFORE PUBLIC ${RE2_INCLUDE_DIR})
if(RE2_INCLUDE_DIR)
target_include_directories(clickhouse_common_io SYSTEM BEFORE PUBLIC ${RE2_INCLUDE_DIR})
endif()
if (USE_LFALLOC)
target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${LFALLOC_INCLUDE_DIR})

View File

@ -0,0 +1,30 @@
#include "AggregateFunctionTSGroupSum.h"
#include "AggregateFunctionFactory.h"
#include "FactoryHelpers.h"
#include "Helpers.h"
namespace DB
{
namespace
{
template <bool rate>
AggregateFunctionPtr createAggregateFunctionTSgroupSum(const std::string & name, const DataTypes & arguments, const Array & params)
{
assertNoParameters(name, params);
if (arguments.size() < 3)
throw Exception("Not enough event arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
return std::make_shared<AggregateFunctionTSgroupSum<rate>>(arguments);
}
}
void registerAggregateFunctionTSgroupSum(AggregateFunctionFactory & factory)
{
factory.registerFunction("TSgroupSum", createAggregateFunctionTSgroupSum<false>, AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("TSgroupRateSum", createAggregateFunctionTSgroupSum<true>, AggregateFunctionFactory::CaseInsensitive);
}
}

View File

@ -0,0 +1,287 @@
#pragma once
#include <bitset>
#include <iostream>
#include <map>
#include <queue>
#include <sstream>
#include <unordered_set>
#include <utility>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesNumber.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Common/ArenaAllocator.h>
#include <ext/range.h>
#include "IAggregateFunction.h"
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION;
}
template <bool rate>
struct AggregateFunctionTSgroupSumData
{
using DataPoint = std::pair<Int64, Float64>;
struct Points
{
using Dps = std::queue<DataPoint>;
Dps dps;
void add(Int64 t, Float64 v)
{
dps.push(std::make_pair(t, v));
if (dps.size() > 2)
dps.pop();
}
Float64 getval(Int64 t)
{
Int64 t1, t2;
Float64 v1, v2;
if (rate)
{
if (dps.size() < 2)
return 0;
t1 = dps.back().first;
t2 = dps.front().first;
v1 = dps.back().second;
v2 = dps.front().second;
return (v1 - v2) / Float64(t1 - t2);
}
else
{
if (dps.size() == 1 && t == dps.front().first)
return dps.front().second;
t1 = dps.back().first;
t2 = dps.front().first;
v1 = dps.back().second;
v2 = dps.front().second;
return v2 + ((v1 - v2) * Float64(t - t2)) / Float64(t1 - t2);
}
}
};
static constexpr size_t bytes_on_stack = 128;
typedef std::map<UInt64, Points> Series;
typedef PODArray<DataPoint, bytes_on_stack, AllocatorWithStackMemory<Allocator<false>, bytes_on_stack>> AggSeries;
Series ss;
AggSeries result;
void add(UInt64 uid, Int64 t, Float64 v)
{ //suppose t is coming asc
typename Series::iterator it_ss;
if (ss.count(uid) == 0)
{ //time series not exist, insert new one
Points tmp;
tmp.add(t, v);
ss.emplace(uid, tmp);
it_ss = ss.find(uid);
}
else
{
it_ss = ss.find(uid);
it_ss->second.add(t, v);
}
if (result.size() > 0 && t < result.back().first)
throw Exception{"TSgroupSum or TSgroupRateSum must order by timestamp asc!!!", ErrorCodes::LOGICAL_ERROR};
if (result.size() > 0 && t == result.back().first)
{
//do not add new point
if (rate)
result.back().second += it_ss->second.getval(t);
else
result.back().second += v;
}
else
{
if (rate)
result.emplace_back(std::make_pair(t, it_ss->second.getval(t)));
else
result.emplace_back(std::make_pair(t, v));
}
size_t i = result.size() - 1;
//reverse find out the index of timestamp that more than previous timestamp of t
while (result[i].first > it_ss->second.dps.front().first && i >= 0)
i--;
i++;
while (i < result.size() - 1)
{
result[i].second += it_ss->second.getval(result[i].first);
i++;
}
}
void merge(const AggregateFunctionTSgroupSumData & other)
{
//if ts has overlap, then aggregate two series by interpolation;
AggSeries tmp;
tmp.reserve(other.result.size() + result.size());
size_t i = 0, j = 0;
Int64 t1, t2;
Float64 v1, v2;
while (i < result.size() && j < other.result.size())
{
if (result[i].first < other.result[j].first)
{
if (j == 0)
{
tmp.emplace_back(result[i]);
}
else
{
t1 = other.result[j].first;
t2 = other.result[j - 1].first;
v1 = other.result[j].second;
v2 = other.result[j - 1].second;
Float64 value = result[i].second + v2 + (v1 - v2) * (Float64(result[i].first - t2)) / Float64(t1 - t2);
tmp.emplace_back(std::make_pair(result[i].first, value));
}
i++;
}
else if (result[i].first > other.result[j].first)
{
if (i == 0)
{
tmp.emplace_back(other.result[j]);
}
else
{
t1 = result[i].first;
t2 = result[i - 1].first;
v1 = result[i].second;
v2 = result[i - 1].second;
Float64 value = other.result[j].second + v2 + (v1 - v2) * (Float64(other.result[j].first - t2)) / Float64(t1 - t2);
tmp.emplace_back(std::make_pair(other.result[j].first, value));
}
j++;
}
else
{
tmp.emplace_back(std::make_pair(result[i].first, result[i].second + other.result[j].second));
i++;
j++;
}
}
while (i < result.size())
{
tmp.emplace_back(result[i]);
i++;
}
while (j < other.result.size())
{
tmp.push_back(other.result[j]);
j++;
}
swap(result, tmp);
}
void serialize(WriteBuffer & buf) const
{
size_t size = result.size();
writeVarUInt(size, buf);
buf.write(reinterpret_cast<const char *>(result.data()), sizeof(result[0]));
}
void deserialize(ReadBuffer & buf)
{
size_t size = 0;
readVarUInt(size, buf);
result.resize(size);
buf.read(reinterpret_cast<char *>(result.data()), size * sizeof(result[0]));
}
};
template <bool rate>
class AggregateFunctionTSgroupSum final
: public IAggregateFunctionDataHelper<AggregateFunctionTSgroupSumData<rate>, AggregateFunctionTSgroupSum<rate>>
{
private:
public:
String getName() const override { return rate ? "TSgroupRateSum" : "TSgroupSum"; }
AggregateFunctionTSgroupSum(const DataTypes & arguments)
: IAggregateFunctionDataHelper<AggregateFunctionTSgroupSumData<rate>, AggregateFunctionTSgroupSum<rate>>(arguments, {})
{
if (!WhichDataType(arguments[0].get()).isUInt64())
throw Exception{"Illegal type " + arguments[0].get()->getName() + " of argument 1 of aggregate function " + getName()
+ ", must be UInt64",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
if (!WhichDataType(arguments[1].get()).isInt64())
throw Exception{"Illegal type " + arguments[1].get()->getName() + " of argument 2 of aggregate function " + getName()
+ ", must be Int64",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
if (!WhichDataType(arguments[2].get()).isFloat64())
throw Exception{"Illegal type " + arguments[2].get()->getName() + " of argument 3 of aggregate function " + getName()
+ ", must be Float64",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
}
DataTypePtr getReturnType() const override
{
auto datatypes = std::vector<DataTypePtr>();
datatypes.push_back(std::make_shared<DataTypeInt64>());
datatypes.push_back(std::make_shared<DataTypeFloat64>());
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeTuple>(datatypes));
}
void add(AggregateDataPtr place, const IColumn ** columns, const size_t row_num, Arena *) const override
{
auto uid = static_cast<const ColumnVector<UInt64> *>(columns[0])->getData()[row_num];
auto ts = static_cast<const ColumnVector<Int64> *>(columns[1])->getData()[row_num];
auto val = static_cast<const ColumnVector<Float64> *>(columns[2])->getData()[row_num];
if (uid && ts && val)
{
this->data(place).add(uid, ts, val);
}
}
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override { this->data(place).merge(this->data(rhs)); }
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { this->data(place).serialize(buf); }
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).deserialize(buf); }
void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
{
const auto & value = this->data(place).result;
size_t size = value.size();
ColumnArray & arr_to = static_cast<ColumnArray &>(to);
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
size_t old_size = offsets_to.back();
offsets_to.push_back(offsets_to.back() + size);
if (size)
{
typename ColumnInt64::Container & ts_to
= static_cast<ColumnInt64 &>(static_cast<ColumnTuple &>(arr_to.getData()).getColumn(0)).getData();
typename ColumnFloat64::Container & val_to
= static_cast<ColumnFloat64 &>(static_cast<ColumnTuple &>(arr_to.getData()).getColumn(1)).getData();
ts_to.reserve(old_size + size);
val_to.reserve(old_size + size);
size_t i = 0;
while (i < this->data(place).result.size())
{
ts_to.push_back(this->data(place).result[i].first);
val_to.push_back(this->data(place).result[i].second);
i++;
}
}
}
bool allocatesMemoryInArena() const override { return true; }
const char * getHeaderFilePath() const override { return __FILE__; }
};
}

View File

@ -41,7 +41,7 @@ void registerAggregateFunctionCombinatorNull(AggregateFunctionCombinatorFactory
void registerAggregateFunctionHistogram(AggregateFunctionFactory & factory);
void registerAggregateFunctionRetention(AggregateFunctionFactory & factory);
void registerAggregateFunctionTSgroupSum(AggregateFunctionFactory & factory);
void registerAggregateFunctions()
{
{
@ -70,6 +70,7 @@ void registerAggregateFunctions()
registerAggregateFunctionsMaxIntersections(factory);
registerAggregateFunctionHistogram(factory);
registerAggregateFunctionRetention(factory);
registerAggregateFunctionTSgroupSum(factory);
registerAggregateFunctionMLMethod(factory);
registerAggregateFunctionEntropy(factory);
registerAggregateFunctionLeastSqr(factory);

View File

@ -1,14 +1,18 @@
include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake)
add_headers_and_sources(clickhouse_compression .)
add_library(clickhouse_compression ${clickhouse_compression_headers} ${clickhouse_compression_sources})
target_link_libraries(clickhouse_compression PRIVATE clickhouse_parsers clickhouse_common_io ${ZSTD_LIBRARY} ${LZ4_LIBRARY} ${CITYHASH_LIBRARIES})
target_link_libraries(clickhouse_compression PRIVATE clickhouse_parsers clickhouse_common_io ${LZ4_LIBRARY} ${CITYHASH_LIBRARIES})
if(ZSTD_LIBRARY)
target_link_libraries(clickhouse_compression PRIVATE ${ZSTD_LIBRARY})
endif()
target_include_directories(clickhouse_compression PUBLIC ${DBMS_INCLUDE_DIR})
target_include_directories(clickhouse_compression SYSTEM PUBLIC ${PCG_RANDOM_INCLUDE_DIR})
if (NOT USE_INTERNAL_LZ4_LIBRARY)
target_include_directories(clickhouse_compression SYSTEM BEFORE PRIVATE ${LZ4_INCLUDE_DIR})
endif ()
if (NOT USE_INTERNAL_ZSTD_LIBRARY)
if (NOT USE_INTERNAL_ZSTD_LIBRARY AND ZSTD_INCLUDE_DIR)
target_include_directories(clickhouse_compression SYSTEM BEFORE PRIVATE ${ZSTD_INCLUDE_DIR})
endif ()

View File

@ -61,10 +61,13 @@ bool DataTypeDecimal<T>::tryReadText(T & x, ReadBuffer & istr, UInt32 precision,
}
template <typename T>
void DataTypeDecimal<T>::readText(T & x, ReadBuffer & istr, UInt32 precision, UInt32 scale)
void DataTypeDecimal<T>::readText(T & x, ReadBuffer & istr, UInt32 precision, UInt32 scale, bool csv)
{
UInt32 unread_scale = scale;
readDecimalText(istr, x, precision, unread_scale);
if (csv)
readCSVDecimalText(istr, x, precision, unread_scale);
else
readDecimalText(istr, x, precision, unread_scale);
x *= getScaleMultiplier(unread_scale);
}
@ -76,6 +79,13 @@ void DataTypeDecimal<T>::deserializeText(IColumn & column, ReadBuffer & istr, co
static_cast<ColumnType &>(column).getData().push_back(x);
}
template <typename T>
void DataTypeDecimal<T>::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
T x;
readText(x, istr, true);
static_cast<ColumnType &>(column).getData().push_back(x);
}
template <typename T>
T DataTypeDecimal<T>::parseFromString(const String & str) const

View File

@ -91,6 +91,7 @@ public:
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeBinary(const Field & field, WriteBuffer & ostr) const override;
void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override;
@ -175,8 +176,8 @@ public:
T parseFromString(const String & str) const;
void readText(T & x, ReadBuffer & istr) const { readText(x, istr, precision, scale); }
static void readText(T & x, ReadBuffer & istr, UInt32 precision, UInt32 scale);
void readText(T & x, ReadBuffer & istr, bool csv = false) const { readText(x, istr, precision, scale, csv); }
static void readText(T & x, ReadBuffer & istr, UInt32 precision, UInt32 scale, bool csv = false);
static bool tryReadText(T & x, ReadBuffer & istr, UInt32 precision, UInt32 scale);
static T getScaleMultiplier(UInt32 scale);

View File

@ -63,7 +63,7 @@ Block BlockInputStreamFromRowInputStream::readImpl()
if (rows_portion_size && batch == rows_portion_size)
{
batch = 0;
if (!checkTimeLimit())
if (!checkTimeLimit() || isCancelled())
break;
}

View File

@ -60,17 +60,16 @@ if(USE_BASE64)
target_include_directories(clickhouse_functions SYSTEM PRIVATE ${BASE64_INCLUDE_DIR})
endif()
if (USE_XXHASH)
if(USE_XXHASH)
target_link_libraries(clickhouse_functions PRIVATE ${XXHASH_LIBRARY})
target_include_directories(clickhouse_functions SYSTEM PRIVATE ${XXHASH_INCLUDE_DIR})
endif()
if (USE_HYPERSCAN)
target_link_libraries (clickhouse_functions PRIVATE ${HYPERSCAN_LIBRARY})
target_include_directories (clickhouse_functions SYSTEM PRIVATE ${HYPERSCAN_INCLUDE_DIR})
endif ()
if(USE_HYPERSCAN)
target_link_libraries(clickhouse_functions PRIVATE ${HYPERSCAN_LIBRARY})
target_include_directories(clickhouse_functions SYSTEM PRIVATE ${HYPERSCAN_INCLUDE_DIR})
endif()
if (USE_SIMDJSON)
if(USE_SIMDJSON)
target_link_libraries(clickhouse_functions PRIVATE ${SIMDJSON_LIBRARY})
target_include_directories(clickhouse_functions PRIVATE ${SIMDJSON_INCLUDE_DIR})
endif ()
endif()

View File

@ -697,6 +697,23 @@ inline bool tryReadDecimalText(ReadBuffer & buf, T & x, unsigned int precision,
return true;
}
template <typename T>
inline void readCSVDecimalText(ReadBuffer & buf, T & x, unsigned int precision, unsigned int & scale)
{
if (buf.eof())
throwReadAfterEOF();
char maybe_quote = *buf.position();
if (maybe_quote == '\'' || maybe_quote == '\"')
++buf.position();
readDecimalText(buf, x, precision, scale, false);
if (maybe_quote == '\'' || maybe_quote == '\"')
assertChar(maybe_quote, buf);
}
template <typename T> void readFloatTextPrecise(T & x, ReadBuffer & in) { readFloatTextPreciseImpl<T, void>(x, in); }
template <typename T> bool tryReadFloatTextPrecise(T & x, ReadBuffer & in) { return readFloatTextPreciseImpl<T, bool>(x, in); }

View File

@ -27,6 +27,8 @@ void ReadBufferFromKafkaConsumer::subscribe(const Names & topics)
consumer->poll(5s);
consumer->resume();
}
stalled = false;
}
void ReadBufferFromKafkaConsumer::unsubscribe()
@ -38,6 +40,12 @@ void ReadBufferFromKafkaConsumer::unsubscribe()
/// Do commit messages implicitly after we processed the previous batch.
bool ReadBufferFromKafkaConsumer::nextImpl()
{
/// NOTE: ReadBuffer was implemented with a immutable buffer contents in mind.
/// If we failed to poll any message once - don't try again.
/// Otherwise, the |poll_timeout| expectations get flawn.
if (stalled)
return false;
if (current == messages.end())
{
commit();
@ -48,7 +56,10 @@ bool ReadBufferFromKafkaConsumer::nextImpl()
}
if (messages.empty() || current == messages.end())
{
stalled = true;
return false;
}
if (auto err = current->get_error())
{

View File

@ -38,6 +38,7 @@ private:
Poco::Logger * log;
const size_t batch_size = 1;
const size_t poll_timeout = 0;
bool stalled = false;
Messages messages;
Messages::const_iterator current;

View File

@ -37,11 +37,14 @@ const char * auto_config_build[]
"USE_GLIBC_COMPATIBILITY", "@GLIBC_COMPATIBILITY@",
"USE_JEMALLOC", "@USE_JEMALLOC@",
"USE_TCMALLOC", "@USE_TCMALLOC@",
"USE_LFALLOC", "@USE_LFALLOC@",
"USE_LFALLOC_RANDOM_HINT", "@USE_LFALLOC_RANDOM_HINT@",
"USE_UNWIND", "@USE_UNWIND@",
"USE_ICU", "@USE_ICU@",
"USE_MYSQL", "@USE_MYSQL@",
"USE_RE2_ST", "@USE_RE2_ST@",
"USE_VECTORCLASS", "@USE_VECTORCLASS@",
"USE_LIBGSASL", "@USE_LIBGSASL@",
"USE_RDKAFKA", "@USE_RDKAFKA@",
"USE_CAPNP", "@USE_CAPNP@",
"USE_POCO_SQLODBC", "@USE_POCO_SQLODBC@",
@ -57,8 +60,7 @@ const char * auto_config_build[]
"USE_BROTLI", "@USE_BROTLI@",
"USE_SSL", "@USE_SSL@",
"USE_HYPERSCAN", "@USE_HYPERSCAN@",
"USE_LFALLOC", "@USE_LFALLOC@",
"USE_LFALLOC_RANDOM_HINT", "@USE_LFALLOC_RANDOM_HINT@",
"USE_SIMDJSON", "@USE_SIMDJSON@",
nullptr, nullptr
};

View File

@ -4,7 +4,7 @@
<stop_conditions>
<any_of>
<average_speed_not_changing_for_ms>1000</average_speed_not_changing_for_ms>
<average_speed_not_changing_for_ms>5000</average_speed_not_changing_for_ms>
<total_time_ms>10000</total_time_ms>
</any_of>
</stop_conditions>

View File

@ -32,6 +32,6 @@
</substitution>
</substitutions>
<query>SELECT {hash_func}(number, {buckets}) FROM system.numbers LIMIT 1000000000</query>
<query>SELECT sumburConsistentHash(toUInt32(number), {buckets}) FROM system.numbers LIMIT 10000</query>
<query>SELECT {hash_func}(number, {buckets}) FROM system.numbers</query>
<query>SELECT sumburConsistentHash(toUInt32(number), {buckets}) FROM system.numbers</query>
</test>

View File

@ -46,5 +46,5 @@
</substitution>
</substitutions>
<query>SELECT ignore({crypto_hash_func}({string})) FROM system.{table} LIMIT 10000000</query>
<query>SELECT ignore({crypto_hash_func}({string})) FROM system.{table}</query>
</test>

View File

@ -4,8 +4,8 @@
<stop_conditions>
<any_of>
<average_speed_not_changing_for_ms>300</average_speed_not_changing_for_ms>
<total_time_ms>1000</total_time_ms>
<average_speed_not_changing_for_ms>1000</average_speed_not_changing_for_ms>
<total_time_ms>5000</total_time_ms>
</any_of>
</stop_conditions>

View File

@ -51,5 +51,5 @@
</substitution>
</substitutions>
<query>SELECT ignore({gp_hash_func}({string})) FROM system.{table} LIMIT 10000000</query>
<query>SELECT ignore({gp_hash_func}({string})) FROM system.{table}</query>
</test>

View File

@ -1,7 +1,7 @@
<test>
<name>Simple Join Query</name>
<type>once</type>
<type>loop</type>
<stop_conditions>
<all_of>

View File

@ -0,0 +1,5 @@
1 1.00 1.00 1.00
2 -1.00 -1.00 -1.00
3 1.00 1.00 1.00
4 -0.10 -0.10 -0.10
5 0.01 0.01 0.01

View File

@ -0,0 +1,17 @@
DROP TABLE IF EXISTS test;
CREATE TABLE test (key UInt64, d32 Decimal32(2), d64 Decimal64(2), d128 Decimal128(2)) ENGINE = Memory;
INSERT INTO test FORMAT CSV "1","1","1","1"
;
INSERT INTO test FORMAT CSV "2","-1","-1","-1"
;
INSERT INTO test FORMAT CSV "3","1.0","1.0","1.0"
;
INSERT INTO test FORMAT CSV "4","-0.1","-0.1","-0.1"
;
INSERT INTO test FORMAT CSV "5","0.010","0.010","0.010"
;
SELECT * FROM test ORDER BY key;
DROP TABLE test;

View File

@ -0,0 +1,2 @@
[(2,0.2),(3,0.8999999999999999),(7,2.0999999999999996),(8,2.4),(12,3.5999999999999996),(17,5.1000000000000005),(18,5.4),(24,7.199999999999999),(25,2.5)]
[(2,0),(3,0.09999999999999999),(7,0.3),(8,0.30000000000000004),(12,0.29999999999999993),(17,0.30000000000000004),(18,0.30000000000000004),(24,0.29999999999999993),(25,0.1)]

View File

@ -0,0 +1,10 @@
drop table if exists tsgroupsum_test;
create table tsgroupsum_test (uid UInt64, ts Int64, value Float64) engine=Memory;
insert into tsgroupsum_test values (1,2,0.2),(1,7,0.7),(1,12,1.2),(1,17,1.7),(1,25,2.5);
insert into tsgroupsum_test values (2,3,0.6),(2,8,1.6),(2,12,2.4),(2,18,3.6),(2,24,4.8);
select TSgroupSum(uid, ts, value) from (select * from tsgroupsum_test order by ts asc);
select TSgroupRateSum(uid, ts, value) from (select * from tsgroupsum_test order by ts asc);
drop table tsgroupsum_test;

View File

@ -14,4 +14,5 @@ fi
df -h
ccache --show-stats
ccache -M ${CCACHE_SIZE:=32G}
ccache --zero-stats
ccache --max-size=${CCACHE_SIZE:=32G}

View File

@ -4,10 +4,11 @@ set -x -e
mkdir -p build/build_docker
cd build/build_docker
ccache -s ||:
ccache --show-stats ||:
ccache --zero-stats ||:
rm -f CMakeCache.txt
cmake .. -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DSANITIZE=$SANITIZER $CMAKE_FLAGS
ninja
ccache -s ||:
ccache --show-stats ||:
mv ./dbms/programs/clickhouse* /output
mv ./dbms/unit_tests_dbms /output

View File

@ -2,9 +2,10 @@
set -x -e
ccache -s ||:
ccache --show-stats ||:
ccache --zero-stats ||:
build/release --no-pbuilder
mv /*.deb /output
mv *.changes /output
mv *.buildinfo /output
ccache -s ||:
ccache --show-stats ||:

View File

@ -301,6 +301,60 @@ GROUP BY timeslot
└─────────────────────┴──────────────────────────────────────────────┘
```
## TSgroupSum(uid, timestamp, value) {#agg_function-tsgroupsum}
TSgroupSum can aggregate different time series that sample timestamp not alignment.
It will use linear interpolation between two sample timestamp and then sum time-series together.
`uid` is the time series unique id, UInt64.
`timestamp` is Int64 type in order to support millisecond or microsecond.
`value` is the metric.
Before use this function, timestamp should be in ascend order
Example:
```
┌─uid─┬─timestamp─┬─value─┐
│ 1 │ 2 │ 0.2 │
│ 1 │ 7 │ 0.7 │
│ 1 │ 12 │ 1.2 │
│ 1 │ 17 │ 1.7 │
│ 1 │ 25 │ 2.5 │
│ 2 │ 3 │ 0.6 │
│ 2 │ 8 │ 1.6 │
│ 2 │ 12 │ 2.4 │
│ 2 │ 18 │ 3.6 │
│ 2 │ 24 │ 4.8 │
└─────┴───────────┴───────┘
```
```
CREATE TABLE time_series(
uid UInt64,
timestamp Int64,
value Float64
) ENGINE = Memory;
INSERT INTO time_series VALUES
(1,2,0.2),(1,7,0.7),(1,12,1.2),(1,17,1.7),(1,25,2.5),
(2,3,0.6),(2,8,1.6),(2,12,2.4),(2,18,3.6),(2,24,4.8);
SELECT TSgroupSum(uid, timestamp, value)
FROM (
SELECT * FROM time_series order by timestamp ASC
);
```
And the result will be:
```
[(2,0.2),(3,0.9),(7,2.1),(8,2.4),(12,3.6),(17,5.1),(18,5.4),(24,7.2),(25,2.5)]
```
## TSgroupRateSum(uid, ts, val) {#agg_function-tsgroupratesum}
Similarly TSgroupRateSum, TSgroupRateSum will Calculate the rate of time-series and then sum rates together.
Also, timestamp should be in ascend order before use this function.
Use this function, the result above case will be:
```
[(2,0),(3,0.1),(7,0.3),(8,0.3),(12,0.3),(17,0.3),(18,0.3),(24,0.3),(25,0.1)]
```
## avg(x) {#agg_function-avg}
Calculates the average.

View File

@ -87,7 +87,10 @@ endif ()
find_package (Threads)
target_include_directories (common BEFORE PRIVATE ${CCTZ_INCLUDE_DIR})
if(CCTZ_INCLUDE_DIR)
target_include_directories(common BEFORE PRIVATE ${CCTZ_INCLUDE_DIR})
endif()
target_include_directories (common PUBLIC ${COMMON_INCLUDE_DIR})
if (NOT USE_INTERNAL_BOOST_LIBRARY)
@ -98,12 +101,15 @@ if(NOT USE_INTERNAL_POCO_LIBRARY)
target_include_directories (common SYSTEM BEFORE PUBLIC ${Poco_Foundation_INCLUDE_DIR})
endif()
if(CCTZ_LIBRARY)
target_link_libraries(common PRIVATE ${CCTZ_LIBRARY})
endif()
target_link_libraries (common
PUBLIC
${Poco_Foundation_LIBRARY}
${CITYHASH_LIBRARIES}
PRIVATE
${CCTZ_LIBRARY}
${Boost_FILESYSTEM_LIBRARY}
PUBLIC
${Boost_SYSTEM_LIBRARY}

View File

@ -1,10 +1,12 @@
option (USE_INTERNAL_CCTZ_LIBRARY "Set to FALSE to use system cctz library instead of bundled" ${NOT_UNBUNDLED})
if (USE_INTERNAL_CCTZ_LIBRARY AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cctz/include/cctz/time_zone.h")
message (WARNING "submodule contrib/cctz is missing. to fix try run: \n git submodule update --init --recursive")
set (MISSING_INTERNAL_CCTZ_LIBRARY 1)
set (USE_INTERNAL_CCTZ_LIBRARY 0)
endif ()
if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cctz/include/cctz/time_zone.h")
if(USE_INTERNAL_CCTZ_LIBRARY)
message(WARNING "submodule contrib/cctz is missing. to fix try run: \n git submodule update --init --recursive")
endif()
set(USE_INTERNAL_CCTZ_LIBRARY 0)
set(MISSING_INTERNAL_CCTZ_LIBRARY 1)
endif()
if (NOT USE_INTERNAL_CCTZ_LIBRARY)
find_library (CCTZ_LIBRARY cctz)

View File

@ -1,7 +1,10 @@
find_package (Threads)
add_executable (zstd_test zstd_test.cpp)
target_link_libraries (zstd_test PRIVATE ${ZSTD_LIBRARY} common Threads::Threads)
if(ZSTD_LIBRARY)
target_link_libraries(zstd_test PRIVATE ${ZSTD_LIBRARY})
endif()
target_link_libraries (zstd_test PRIVATE common Threads::Threads)
add_executable (mutator mutator.cpp)
target_link_libraries(mutator PRIVATE clickhouse_common_io)
@ -9,6 +12,6 @@ target_link_libraries(mutator PRIVATE clickhouse_common_io)
add_executable (decompress_perf decompress_perf.cpp)
target_link_libraries(decompress_perf PRIVATE clickhouse_common_io clickhouse_compression ${LZ4_LIBRARY})
if (NOT USE_INTERNAL_ZSTD_LIBRARY)
if (NOT USE_INTERNAL_ZSTD_LIBRARY AND ZSTD_INCLUDE_DIR)
target_include_directories (zstd_test BEFORE PRIVATE ${ZSTD_INCLUDE_DIR})
endif ()