Merge branch 'master' into alter_on_top_of_mutations

This commit is contained in:
alesapin 2020-01-14 11:26:27 +03:00
commit e7aa209bf3
33 changed files with 1253 additions and 285 deletions

View File

@ -1,4 +1,4 @@
include (CheckCXXSourceCompiles)
include (CheckCXXCompilerFlag)
# Try to add -Wflag if compiler supports it
macro (add_warning flag)

View File

@ -1,5 +1,12 @@
set(USE_INTERNAL_LIBCXX_LIBRARY_DEFAULT ${NOT_UNBUNDLED})
if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libcxx/CMakeLists.txt")
message(WARNING "submodule contrib/libcxx is missing. to fix try run: \n git submodule update --init --recursive")
set(USE_INTERNAL_LIBCXX_LIBRARY_DEFAULT 0)
endif()
option (USE_LIBCXX "Use libc++ and libc++abi instead of libstdc++" ${NOT_UNBUNDLED})
option (USE_INTERNAL_LIBCXX_LIBRARY "Set to FALSE to use system libcxx and libcxxabi libraries instead of bundled" ${NOT_UNBUNDLED})
option (USE_INTERNAL_LIBCXX_LIBRARY "Set to FALSE to use system libcxx and libcxxabi libraries instead of bundled" ${USE_INTERNAL_LIBCXX_LIBRARY_DEFAULT})
if (USE_LIBCXX)
set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_LIBCPP_DEBUG=0") # More checks in debug build.

View File

@ -437,8 +437,12 @@ ${ICU_SOURCE_DIR}/i18n/formattedval_iterimpl.cpp
${ICU_SOURCE_DIR}/i18n/formattedval_sbimpl.cpp
${ICU_SOURCE_DIR}/i18n/formatted_string_builder.cpp)
file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/empty.cpp CONTENT " ")
enable_language(ASM)
set(ICUDATA_SOURCES ${ICUDATA_SOURCE_DIR}/icudt66l_dat.S)
set(ICUDATA_SOURCES
${ICUDATA_SOURCE_DIR}/icudt66l_dat.S
${CMAKE_CURRENT_BINARY_DIR}/empty.cpp # Without this cmake can incorrectly detects library type (OBJECT) instead of SHARED/STATIC
)
# Note that we don't like any kind of binary plugins (because of runtime dependencies, vulnerabilities, ABI incompatibilities).
add_definitions(-D_REENTRANT -DU_HAVE_ELF_H=1 -DU_HAVE_STRTOD_L=1 -DU_HAVE_XLOCALE_H=0 -DDEFAULT_ICU_PLUGINS="/dev/null")
@ -447,8 +451,8 @@ add_library(icuuc ${ICUUC_SOURCES})
add_library(icui18n ${ICUI18N_SOURCES})
add_library(icudata ${ICUDATA_SOURCES})
target_link_libraries(icuuc icudata)
target_link_libraries(icui18n icuuc)
target_link_libraries(icuuc PRIVATE icudata)
target_link_libraries(icui18n PRIVATE icuuc)
target_include_directories(icuuc SYSTEM PUBLIC ${ICU_SOURCE_DIR}/common/)
target_include_directories(icui18n SYSTEM PUBLIC ${ICU_SOURCE_DIR}/i18n/)

View File

@ -1,11 +1,11 @@
# This strings autochanged from release_lib.sh:
set(VERSION_REVISION 54431)
set(VERSION_REVISION 54432)
set(VERSION_MAJOR 20)
set(VERSION_MINOR 1)
set(VERSION_MINOR 2)
set(VERSION_PATCH 1)
set(VERSION_GITHASH 51d4c8a53be94504e3607b2232e12e5ef7a8ec28)
set(VERSION_DESCRIBE v20.1.1.1-prestable)
set(VERSION_STRING 20.1.1.1)
set(VERSION_GITHASH 4b9acaaa9099e71c36e5c818031149c5cba2bbdb)
set(VERSION_DESCRIBE v20.2.1.1-prestable)
set(VERSION_STRING 20.2.1.1)
# end of autochange
set(VERSION_EXTRA "" CACHE STRING "")

View File

@ -29,17 +29,23 @@ static IAggregateFunction * createWithNumericOrTimeType(const IDataType & argume
}
template <typename has_limit, typename ... TArgs>
template <typename Trait, typename ... TArgs>
inline AggregateFunctionPtr createAggregateFunctionGroupArrayImpl(const DataTypePtr & argument_type, TArgs ... args)
{
if (auto res = createWithNumericOrTimeType<GroupArrayNumericImpl, has_limit>(*argument_type, argument_type, std::forward<TArgs>(args)...))
if (auto res = createWithNumericOrTimeType<GroupArrayNumericImpl, Trait>(*argument_type, argument_type, std::forward<TArgs>(args)...))
return AggregateFunctionPtr(res);
WhichDataType which(argument_type);
if (which.idx == TypeIndex::String)
return std::make_shared<GroupArrayGeneralListImpl<GroupArrayListNodeString, has_limit::value>>(argument_type, std::forward<TArgs>(args)...);
return std::make_shared<GroupArrayGeneralImpl<GroupArrayNodeString, Trait>>(argument_type, std::forward<TArgs>(args)...);
return std::make_shared<GroupArrayGeneralListImpl<GroupArrayListNodeGeneral, has_limit::value>>(argument_type, std::forward<TArgs>(args)...);
return std::make_shared<GroupArrayGeneralImpl<GroupArrayNodeGeneral, Trait>>(argument_type, std::forward<TArgs>(args)...);
// Link list implementation doesn't show noticeable performance improvement
// if (which.idx == TypeIndex::String)
// return std::make_shared<GroupArrayGeneralListImpl<GroupArrayListNodeString, Trait>>(argument_type, std::forward<TArgs>(args)...);
// return std::make_shared<GroupArrayGeneralListImpl<GroupArrayListNodeGeneral, Trait>>(argument_type, std::forward<TArgs>(args)...);
}
@ -72,9 +78,38 @@ static AggregateFunctionPtr createAggregateFunctionGroupArray(const std::string
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
if (!limit_size)
return createAggregateFunctionGroupArrayImpl<std::false_type>(argument_types[0]);
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait<false, Sampler::NONE>>(argument_types[0]);
else
return createAggregateFunctionGroupArrayImpl<std::true_type>(argument_types[0], max_elems);
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait<true, Sampler::NONE>>(argument_types[0], max_elems);
}
static AggregateFunctionPtr
createAggregateFunctionGroupArraySample(const std::string & name, const DataTypes & argument_types, const Array & parameters)
{
assertUnary(name, argument_types);
UInt64 max_elems = std::numeric_limits<UInt64>::max();
UInt64 seed = 123456;
UInt64 * params[2] = {&max_elems, &seed};
if (parameters.size() != 1 && parameters.size() != 2)
throw Exception("Incorrect number of parameters for aggregate function " + name + ", should be 1 or 2",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
for (auto i = 0ul; i < parameters.size(); ++i)
{
auto type = parameters[i].getType();
if (type != Field::Types::Int64 && type != Field::Types::UInt64)
throw Exception("Parameter for aggregate function " + name + " should be positive number", ErrorCodes::BAD_ARGUMENTS);
if ((type == Field::Types::Int64 && parameters[i].get<Int64>() < 0) ||
(type == Field::Types::UInt64 && parameters[i].get<UInt64>() == 0))
throw Exception("Parameter for aggregate function " + name + " should be positive number", ErrorCodes::BAD_ARGUMENTS);
*params[i] = parameters[i].get<UInt64>();
}
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait<true, Sampler::RNG>>(argument_types[0], max_elems, seed);
}
}
@ -83,6 +118,7 @@ static AggregateFunctionPtr createAggregateFunctionGroupArray(const std::string
void registerAggregateFunctionGroupArray(AggregateFunctionFactory & factory)
{
factory.registerFunction("groupArray", createAggregateFunctionGroupArray);
factory.registerFunction("groupArraySample", createAggregateFunctionGroupArraySample);
}
}

View File

@ -1,40 +1,97 @@
#pragma once
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnVector.h>
#include <Common/ArenaAllocator.h>
#include <Common/assert_cast.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <common/likely.h>
#include <type_traits>
#include <common/likely.h>
#define AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ARRAY_SIZE 0xFFFFFF
namespace DB
{
namespace ErrorCodes
{
extern const int TOO_LARGE_ARRAY_SIZE;
extern const int LOGICAL_ERROR;
}
enum class Sampler
{
NONE,
RNG,
DETERMINATOR // TODO
};
template <bool Thas_limit, Sampler Tsampler>
struct GroupArrayTrait
{
static constexpr bool has_limit = Thas_limit;
static constexpr Sampler sampler = Tsampler;
};
template <typename Trait>
static constexpr const char * getNameByTrait()
{
if (Trait::sampler == Sampler::NONE)
return "groupArray";
else if (Trait::sampler == Sampler::RNG)
return "groupArraySample";
// else if (Trait::sampler == Sampler::DETERMINATOR) // TODO
__builtin_unreachable();
}
template <typename T>
struct GroupArraySamplerData
{
// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
using Array = PODArray<T, 32, Allocator>;
Array value;
size_t total_values = 0;
pcg32_fast rng;
UInt64 genRandom(size_t lim)
{
/// With a large number of values, we will generate random numbers several times slower.
if (lim <= static_cast<UInt64>(rng.max()))
return static_cast<UInt32>(rng()) % static_cast<UInt32>(lim);
else
return (static_cast<UInt64>(rng()) * (static_cast<UInt64>(rng.max()) + 1ULL) + static_cast<UInt64>(rng())) % lim;
}
void randomShuffle()
{
for (size_t i = 1; i < value.size(); ++i)
{
size_t j = genRandom(i + 1);
std::swap(value[i], value[j]);
}
}
};
/// A particular case is an implementation for numeric types.
template <typename T, bool has_sampler>
struct GroupArrayNumericData;
template <typename T>
struct GroupArrayNumericData
struct GroupArrayNumericData<T, false>
{
// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
@ -43,51 +100,140 @@ struct GroupArrayNumericData
Array value;
};
template <typename T, typename Tlimit_num_elems>
class GroupArrayNumericImpl final
: public IAggregateFunctionDataHelper<GroupArrayNumericData<T>, GroupArrayNumericImpl<T, Tlimit_num_elems>>
template <typename T>
struct GroupArrayNumericData<T, true> : public GroupArraySamplerData<T>
{
static constexpr bool limit_num_elems = Tlimit_num_elems::value;
};
template <typename T, typename Trait>
class GroupArrayNumericImpl final
: public IAggregateFunctionDataHelper<GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>, GroupArrayNumericImpl<T, Trait>>
{
using Data = GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>;
static constexpr bool limit_num_elems = Trait::has_limit;
DataTypePtr & data_type;
UInt64 max_elems;
UInt64 seed;
public:
explicit GroupArrayNumericImpl(const DataTypePtr & data_type_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
: IAggregateFunctionDataHelper<GroupArrayNumericData<T>, GroupArrayNumericImpl<T, Tlimit_num_elems>>({data_type_}, {})
, data_type(this->argument_types[0]), max_elems(max_elems_) {}
String getName() const override { return "groupArray"; }
DataTypePtr getReturnType() const override
explicit GroupArrayNumericImpl(
const DataTypePtr & data_type_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max(), UInt64 seed_ = 123456)
: IAggregateFunctionDataHelper<GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>, GroupArrayNumericImpl<T, Trait>>(
{data_type_}, {})
, data_type(this->argument_types[0])
, max_elems(max_elems_)
, seed(seed_)
{
return std::make_shared<DataTypeArray>(data_type);
}
String getName() const override { return getNameByTrait<Trait>(); }
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(data_type); }
void insert(Data & a, const T & v, Arena * arena) const
{
++a.total_values;
if (a.value.size() < max_elems)
a.value.push_back(v, arena);
else
{
UInt64 rnd = a.genRandom(a.total_values);
if (rnd < max_elems)
a.value[rnd] = v;
}
}
void create(AggregateDataPtr place) const override
{
[[maybe_unused]] auto a = new (place) Data;
if constexpr (Trait::sampler == Sampler::RNG)
a->rng.seed(seed);
}
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
if (limit_num_elems && this->data(place).value.size() >= max_elems)
return;
if constexpr (Trait::sampler == Sampler::NONE)
{
if (limit_num_elems && this->data(place).value.size() >= max_elems)
return;
this->data(place).value.push_back(assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num], arena);
this->data(place).value.push_back(assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num], arena);
}
if constexpr (Trait::sampler == Sampler::RNG)
{
auto & a = this->data(place);
++a.total_values;
if (a.value.size() < max_elems)
a.value.push_back(assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num], arena);
else
{
UInt64 rnd = a.genRandom(a.total_values);
if (rnd < max_elems)
a.value[rnd] = assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
}
}
// TODO
// if constexpr (Trait::sampler == Sampler::DETERMINATOR)
}
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
auto & cur_elems = this->data(place);
auto & rhs_elems = this->data(rhs);
if constexpr (Trait::sampler == Sampler::NONE)
{
auto & cur_elems = this->data(place);
auto & rhs_elems = this->data(rhs);
if (!limit_num_elems)
{
if (rhs_elems.value.size())
cur_elems.value.insert(rhs_elems.value.begin(), rhs_elems.value.end(), arena);
if (!limit_num_elems)
{
if (rhs_elems.value.size())
cur_elems.value.insert(rhs_elems.value.begin(), rhs_elems.value.end(), arena);
}
else
{
UInt64 elems_to_insert = std::min(static_cast<size_t>(max_elems) - cur_elems.value.size(), rhs_elems.value.size());
if (elems_to_insert)
cur_elems.value.insert(rhs_elems.value.begin(), rhs_elems.value.begin() + elems_to_insert, arena);
}
}
else
if constexpr (Trait::sampler == Sampler::RNG)
{
UInt64 elems_to_insert = std::min(static_cast<size_t>(max_elems) - cur_elems.value.size(), rhs_elems.value.size());
if (elems_to_insert)
cur_elems.value.insert(rhs_elems.value.begin(), rhs_elems.value.begin() + elems_to_insert, arena);
if (this->data(rhs).value.empty()) /// rhs state is empty
return;
auto & a = this->data(place);
auto & b = this->data(rhs);
if (b.total_values <= max_elems)
{
for (size_t i = 0; i < b.value.size(); ++i)
insert(a, b.value[i], arena);
}
else if (a.total_values <= max_elems)
{
decltype(a.value) from;
from.swap(a.value, arena);
a.value.assign(b.value.begin(), b.value.end(), arena);
a.total_values = b.total_values;
for (size_t i = 0; i < from.size(); ++i)
insert(a, from[i], arena);
}
else
{
a.randomShuffle();
a.total_values += b.total_values;
for (size_t i = 0; i < max_elems; ++i)
{
UInt64 rnd = a.genRandom(a.total_values);
if (rnd < b.total_values)
a.value[i] = b.value[i];
}
}
}
// TODO
// if constexpr (Trait::sampler == Sampler::DETERMINATOR)
}
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
@ -96,6 +242,17 @@ public:
size_t size = value.size();
writeVarUInt(size, buf);
buf.write(reinterpret_cast<const char *>(value.data()), size * sizeof(value[0]));
if constexpr (Trait::sampler == Sampler::RNG)
{
DB::writeIntBinary<size_t>(this->data(place).total_values, buf);
std::ostringstream rng_stream;
rng_stream << this->data(place).rng;
DB::writeStringBinary(rng_stream.str(), buf);
}
// TODO
// if constexpr (Trait::sampler == Sampler::DETERMINATOR)
}
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
@ -113,6 +270,18 @@ public:
value.resize(size, arena);
buf.read(reinterpret_cast<char *>(value.data()), size * sizeof(value[0]));
if constexpr (Trait::sampler == Sampler::RNG)
{
DB::readIntBinary<size_t>(this->data(place).total_values, buf);
std::string rng_string;
DB::readStringBinary(rng_string, buf);
std::istringstream rng_stream(rng_string);
rng_stream >> this->data(place).rng;
}
// TODO
// if constexpr (Trait::sampler == Sampler::DETERMINATOR)
}
void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
@ -132,10 +301,7 @@ public:
}
}
bool allocatesMemoryInArena() const override
{
return true;
}
bool allocatesMemoryInArena() const override { return true; }
};
@ -145,26 +311,24 @@ public:
/// Nodes used to implement a linked list for storage of groupArray states
template <typename Node>
struct GroupArrayListNodeBase
struct GroupArrayNodeBase
{
Node * next;
UInt64 size; // size of payload
/// Returns pointer to actual payload
char * data()
{
static_assert(sizeof(GroupArrayListNodeBase) == sizeof(Node));
return reinterpret_cast<char *>(this) + sizeof(Node);
}
char * data() { return reinterpret_cast<char *>(this) + sizeof(Node); }
const char * data() const { return reinterpret_cast<const char *>(this) + sizeof(Node); }
/// Clones existing node (does not modify next field)
Node * clone(Arena * arena)
Node * clone(Arena * arena) const
{
return reinterpret_cast<Node *>(const_cast<char *>(arena->alignedInsert(reinterpret_cast<char *>(this), sizeof(Node) + size, alignof(Node))));
return reinterpret_cast<Node *>(
const_cast<char *>(arena->alignedInsert(reinterpret_cast<const char *>(this), sizeof(Node) + size, alignof(Node))));
}
/// Write node to buffer
void write(WriteBuffer & buf)
void write(WriteBuffer & buf) const
{
writeVarUInt(size, buf);
buf.write(data(), size);
@ -183,6 +347,290 @@ struct GroupArrayListNodeBase
}
};
struct GroupArrayNodeString : public GroupArrayNodeBase<GroupArrayNodeString>
{
using Node = GroupArrayNodeString;
/// Create node from string
static Node * allocate(const IColumn & column, size_t row_num, Arena * arena)
{
StringRef string = assert_cast<const ColumnString &>(column).getDataAt(row_num);
Node * node = reinterpret_cast<Node *>(arena->alignedAlloc(sizeof(Node) + string.size, alignof(Node)));
node->size = string.size;
memcpy(node->data(), string.data, string.size);
return node;
}
void insertInto(IColumn & column)
{
assert_cast<ColumnString &>(column).insertData(data(), size);
}
};
struct GroupArrayNodeGeneral : public GroupArrayNodeBase<GroupArrayNodeGeneral>
{
using Node = GroupArrayNodeGeneral;
static Node * allocate(const IColumn & column, size_t row_num, Arena * arena)
{
const char * begin = arena->alignedAlloc(sizeof(Node), alignof(Node));
StringRef value = column.serializeValueIntoArena(row_num, *arena, begin);
Node * node = reinterpret_cast<Node *>(const_cast<char *>(begin));
node->size = value.size;
return node;
}
void insertInto(IColumn & column) { column.deserializeAndInsertFromArena(data()); }
};
template <typename Node, bool has_sampler>
struct GroupArrayGeneralData;
template <typename Node>
struct GroupArrayGeneralData<Node, false>
{
// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
using Allocator = MixedAlignedArenaAllocator<alignof(Node *), 4096>;
using Array = PODArray<Node *, 32, Allocator>;
Array value;
};
template <typename Node>
struct GroupArrayGeneralData<Node, true> : public GroupArraySamplerData<Node *>
{
};
/// Implementation of groupArray for String or any ComplexObject via Array
template <typename Node, typename Trait>
class GroupArrayGeneralImpl final
: public IAggregateFunctionDataHelper<GroupArrayGeneralData<Node, Trait::sampler != Sampler::NONE>, GroupArrayGeneralImpl<Node, Trait>>
{
static constexpr bool limit_num_elems = Trait::has_limit;
using Data = GroupArrayGeneralData<Node, Trait::sampler != Sampler::NONE>;
static Data & data(AggregateDataPtr place) { return *reinterpret_cast<Data *>(place); }
static const Data & data(ConstAggregateDataPtr place) { return *reinterpret_cast<const Data *>(place); }
DataTypePtr & data_type;
UInt64 max_elems;
UInt64 seed;
public:
GroupArrayGeneralImpl(const DataTypePtr & data_type_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max(), UInt64 seed_ = 123456)
: IAggregateFunctionDataHelper<GroupArrayGeneralData<Node, Trait::sampler != Sampler::NONE>, GroupArrayGeneralImpl<Node, Trait>>(
{data_type_}, {})
, data_type(this->argument_types[0])
, max_elems(max_elems_)
, seed(seed_)
{
}
String getName() const override { return getNameByTrait<Trait>(); }
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(data_type); }
void insert(Data & a, const Node * v, Arena * arena) const
{
++a.total_values;
if (a.value.size() < max_elems)
a.value.push_back(v->clone(arena), arena);
else
{
UInt64 rnd = a.genRandom(a.total_values);
if (rnd < max_elems)
a.value[rnd] = v->clone(arena);
}
}
void create(AggregateDataPtr place) const override
{
[[maybe_unused]] auto a = new (place) Data;
if constexpr (Trait::sampler == Sampler::RNG)
a->rng.seed(seed);
}
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
if constexpr (Trait::sampler == Sampler::NONE)
{
if (limit_num_elems && data(place).value.size() >= max_elems)
return;
Node * node = Node::allocate(*columns[0], row_num, arena);
data(place).value.push_back(node, arena);
}
if constexpr (Trait::sampler == Sampler::RNG)
{
auto & a = data(place);
++a.total_values;
if (a.value.size() < max_elems)
a.value.push_back(Node::allocate(*columns[0], row_num, arena), arena);
else
{
UInt64 rnd = a.genRandom(a.total_values);
if (rnd < max_elems)
a.value[rnd] = Node::allocate(*columns[0], row_num, arena);
}
}
// TODO
// if constexpr (Trait::sampler == Sampler::DETERMINATOR)
}
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
if constexpr (Trait::sampler == Sampler::NONE)
mergeNoSampler(place, rhs, arena);
else if constexpr (Trait::sampler == Sampler::RNG)
mergeWithRNGSampler(place, rhs, arena);
// TODO
// else if constexpr (Trait::sampler == Sampler::DETERMINATOR)
}
void ALWAYS_INLINE mergeNoSampler(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const
{
if (data(rhs).value.empty()) /// rhs state is empty
return;
UInt64 new_elems;
if (limit_num_elems)
{
if (data(place).value.size() >= max_elems)
return;
new_elems = std::min(data(rhs).value.size(), static_cast<size_t>(max_elems) - data(place).value.size());
}
else
new_elems = data(rhs).value.size();
auto & a = data(place).value;
auto & b = data(rhs).value;
for (UInt64 i = 0; i < new_elems; ++i)
a.push_back(b[i]->clone(arena), arena);
}
void ALWAYS_INLINE mergeWithRNGSampler(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const
{
if (data(rhs).value.empty()) /// rhs state is empty
return;
auto & a = data(place);
auto & b = data(rhs);
if (b.total_values <= max_elems)
{
for (size_t i = 0; i < b.value.size(); ++i)
insert(a, b.value[i], arena);
}
else if (a.total_values <= max_elems)
{
decltype(a.value) from;
from.swap(a.value, arena);
for (auto & node : b.value)
a.value.push_back(node->clone(arena), arena);
a.total_values = b.total_values;
for (size_t i = 0; i < from.size(); ++i)
insert(a, from[i], arena);
}
else
{
a.randomShuffle();
a.total_values += b.total_values;
for (size_t i = 0; i < max_elems; ++i)
{
UInt64 rnd = a.genRandom(a.total_values);
if (rnd < b.total_values)
a.value[i] = b.value[i]->clone(arena);
}
}
}
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
{
writeVarUInt(data(place).value.size(), buf);
auto & value = data(place).value;
for (auto & node : value)
node->write(buf);
if constexpr (Trait::sampler == Sampler::RNG)
{
DB::writeIntBinary<size_t>(data(place).total_values, buf);
std::ostringstream rng_stream;
rng_stream << data(place).rng;
DB::writeStringBinary(rng_stream.str(), buf);
}
// TODO
// if constexpr (Trait::sampler == Sampler::DETERMINATOR)
}
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
{
UInt64 elems;
readVarUInt(elems, buf);
if (unlikely(elems == 0))
return;
if (unlikely(elems > AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ARRAY_SIZE))
throw Exception("Too large array size", ErrorCodes::TOO_LARGE_ARRAY_SIZE);
if (limit_num_elems && unlikely(elems > max_elems))
throw Exception("Too large array size, it should not exceed " + toString(max_elems), ErrorCodes::TOO_LARGE_ARRAY_SIZE);
auto & value = data(place).value;
value.resize(elems, arena);
for (UInt64 i = 0; i < elems; ++i)
value[i] = Node::read(buf, arena);
if constexpr (Trait::sampler == Sampler::RNG)
{
DB::readIntBinary<size_t>(data(place).total_values, buf);
std::string rng_string;
DB::readStringBinary(rng_string, buf);
std::istringstream rng_stream(rng_string);
rng_stream >> data(place).rng;
}
// TODO
// if constexpr (Trait::sampler == Sampler::DETERMINATOR)
}
void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
{
auto & column_array = assert_cast<ColumnArray &>(to);
auto & offsets = column_array.getOffsets();
offsets.push_back(offsets.back() + data(place).value.size());
auto & column_data = column_array.getData();
if (std::is_same_v<Node, GroupArrayNodeString>)
{
auto & string_offsets = assert_cast<ColumnString &>(column_data).getOffsets();
string_offsets.reserve(string_offsets.size() + data(place).value.size());
}
auto & value = data(place).value;
for (auto & node : value)
node->insertInto(column_data);
}
bool allocatesMemoryInArena() const override { return true; }
};
template <typename Node>
struct GroupArrayListNodeBase : public GroupArrayNodeBase<Node>
{
Node * next;
};
struct GroupArrayListNodeString : public GroupArrayListNodeBase<GroupArrayListNodeString>
{
using Node = GroupArrayListNodeString;
@ -200,10 +648,7 @@ struct GroupArrayListNodeString : public GroupArrayListNodeBase<GroupArrayListNo
return node;
}
void insertInto(IColumn & column)
{
assert_cast<ColumnString &>(column).insertData(data(), size);
}
void insertInto(IColumn & column) { assert_cast<ColumnString &>(column).insertData(data(), size); }
};
struct GroupArrayListNodeGeneral : public GroupArrayListNodeBase<GroupArrayListNodeGeneral>
@ -222,10 +667,7 @@ struct GroupArrayListNodeGeneral : public GroupArrayListNodeBase<GroupArrayListN
return node;
}
void insertInto(IColumn & column)
{
column.deserializeAndInsertFromArena(data());
}
void insertInto(IColumn & column) { column.deserializeAndInsertFromArena(data()); }
};
@ -240,23 +682,27 @@ struct GroupArrayGeneralListData
/// Implementation of groupArray for String or any ComplexObject via linked list
/// It has poor performance in case of many small objects
template <typename Node, bool limit_num_elems>
template <typename Node, typename Trait>
class GroupArrayGeneralListImpl final
: public IAggregateFunctionDataHelper<GroupArrayGeneralListData<Node>, GroupArrayGeneralListImpl<Node, limit_num_elems>>
: public IAggregateFunctionDataHelper<GroupArrayGeneralListData<Node>, GroupArrayGeneralListImpl<Node, Trait>>
{
static constexpr bool limit_num_elems = Trait::has_limit;
using Data = GroupArrayGeneralListData<Node>;
static Data & data(AggregateDataPtr place) { return *reinterpret_cast<Data*>(place); }
static const Data & data(ConstAggregateDataPtr place) { return *reinterpret_cast<const Data*>(place); }
static Data & data(AggregateDataPtr place) { return *reinterpret_cast<Data *>(place); }
static const Data & data(ConstAggregateDataPtr place) { return *reinterpret_cast<const Data *>(place); }
DataTypePtr & data_type;
UInt64 max_elems;
public:
GroupArrayGeneralListImpl(const DataTypePtr & data_type_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
: IAggregateFunctionDataHelper<GroupArrayGeneralListData<Node>, GroupArrayGeneralListImpl<Node, limit_num_elems>>({data_type_}, {})
, data_type(this->argument_types[0]), max_elems(max_elems_) {}
: IAggregateFunctionDataHelper<GroupArrayGeneralListData<Node>, GroupArrayGeneralListImpl<Node, Trait>>({data_type_}, {})
, data_type(this->argument_types[0])
, max_elems(max_elems_)
{
}
String getName() const override { return "groupArray"; }
String getName() const override { return getNameByTrait<Trait>(); }
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(data_type); }
@ -295,7 +741,7 @@ public:
if (data(place).elems >= max_elems)
return;
new_elems = std::min(data(place).elems + data(rhs).elems, max_elems);
new_elems = std::min(data(place).elems + data(rhs).elems, static_cast<size_t>(max_elems));
}
else
{
@ -394,10 +840,7 @@ public:
}
}
bool allocatesMemoryInArena() const override
{
return true;
}
bool allocatesMemoryInArena() const override { return true; }
};
#undef AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ARRAY_SIZE

View File

@ -35,6 +35,12 @@ public:
{
// Do nothing, trash in arena remains.
}
protected:
static constexpr size_t getStackThreshold()
{
return 0;
}
};
@ -66,6 +72,12 @@ public:
static void free(void * /*buf*/, size_t /*size*/)
{
}
protected:
static constexpr size_t getStackThreshold()
{
return 0;
}
};
@ -100,6 +112,12 @@ public:
if (size >= REAL_ALLOCATION_TRESHOLD)
TRealAllocator::free(buf, size);
}
protected:
static constexpr size_t getStackThreshold()
{
return 0;
}
};
@ -136,6 +154,12 @@ public:
}
void free(void * /*buf*/, size_t /*size*/) {}
protected:
static constexpr size_t getStackThreshold()
{
return N;
}
};
}

View File

@ -150,7 +150,7 @@ protected:
bool isAllocatedFromStack() const
{
constexpr size_t stack_threshold = TAllocator::getStackThreshold();
static constexpr size_t stack_threshold = TAllocator::getStackThreshold();
return (stack_threshold > 0) && (allocated_bytes() <= stack_threshold);
}
@ -453,7 +453,8 @@ public:
this->c_end += bytes_to_copy;
}
void swap(PODArray & rhs)
template <typename... TAllocatorParams>
void swap(PODArray & rhs, TAllocatorParams &&... allocator_params)
{
#ifndef NDEBUG
this->unprotect();
@ -463,7 +464,7 @@ public:
/// Swap two PODArray objects, arr1 and arr2, that satisfy the following conditions:
/// - The elements of arr1 are stored on stack.
/// - The elements of arr2 are stored on heap.
auto swap_stack_heap = [this](PODArray & arr1, PODArray & arr2)
auto swap_stack_heap = [&](PODArray & arr1, PODArray & arr2)
{
size_t stack_size = arr1.size();
size_t stack_allocated = arr1.allocated_bytes();
@ -480,18 +481,18 @@ public:
arr1.c_end = arr1.c_start + this->byte_size(heap_size);
/// Allocate stack space for arr2.
arr2.alloc(stack_allocated);
arr2.alloc(stack_allocated, std::forward<TAllocatorParams>(allocator_params)...);
/// Copy the stack content.
memcpy(arr2.c_start, stack_c_start, this->byte_size(stack_size));
arr2.c_end = arr2.c_start + this->byte_size(stack_size);
};
auto do_move = [this](PODArray & src, PODArray & dest)
auto do_move = [&](PODArray & src, PODArray & dest)
{
if (src.isAllocatedFromStack())
{
dest.dealloc();
dest.alloc(src.allocated_bytes());
dest.alloc(src.allocated_bytes(), std::forward<TAllocatorParams>(allocator_params)...);
memcpy(dest.c_start, src.c_start, this->byte_size(src.size()));
dest.c_end = dest.c_start + (src.c_end - src.c_start);
@ -569,24 +570,26 @@ public:
}
}
void assign(size_t n, const T & x)
template <typename... TAllocatorParams>
void assign(size_t n, const T & x, TAllocatorParams &&... allocator_params)
{
this->resize(n);
this->resize(n, std::forward<TAllocatorParams>(allocator_params)...);
std::fill(begin(), end(), x);
}
template <typename It1, typename It2>
void assign(It1 from_begin, It2 from_end)
template <typename It1, typename It2, typename... TAllocatorParams>
void assign(It1 from_begin, It2 from_end, TAllocatorParams &&... allocator_params)
{
size_t required_capacity = from_end - from_begin;
if (required_capacity > this->capacity())
this->reserve(roundUpToPowerOfTwoOrZero(required_capacity));
this->reserve(roundUpToPowerOfTwoOrZero(required_capacity), std::forward<TAllocatorParams>(allocator_params)...);
size_t bytes_to_copy = this->byte_size(required_capacity);
memcpy(this->c_start, reinterpret_cast<const void *>(&*from_begin), bytes_to_copy);
this->c_end = this->c_start + bytes_to_copy;
}
// ISO C++ has strict ambiguity rules, thus we cannot apply TAllocatorParams here.
void assign(const PODArray & from)
{
assign(from.begin(), from.end());

View File

@ -14,7 +14,7 @@ static bool operator==(const IDataType & left, const IDataType & right)
return left.equals(right);
}
static std::ostream & operator<<(std::ostream & ostr, const IDataType & dt)
std::ostream & operator<<(std::ostream & ostr, const IDataType & dt)
{
return ostr << dt.getName();
}

View File

@ -149,7 +149,7 @@ protected:
}
while (true)
{
UInt64 timestamp_usec = static_cast<UInt64>(timestamp.epochMicroseconds());
UInt64 timestamp_usec = static_cast<UInt64>(Poco::Timestamp().epochMicroseconds());
/// Or spurious wakeup.
bool signaled = std::cv_status::no_timeout == storage->condition.wait_for(lock,
@ -166,7 +166,7 @@ protected:
else
{
// heartbeat
last_event_timestamp_usec = static_cast<UInt64>(timestamp.epochMicroseconds());
last_event_timestamp_usec = static_cast<UInt64>(Poco::Timestamp().epochMicroseconds());
return { getHeader(), true };
}
}
@ -185,7 +185,7 @@ protected:
num_updates += 1;
}
last_event_timestamp_usec = static_cast<UInt64>(timestamp.epochMicroseconds());
last_event_timestamp_usec = static_cast<UInt64>(Poco::Timestamp().epochMicroseconds());
return { res, true };
}
@ -207,7 +207,6 @@ private:
UInt64 heartbeat_interval_usec;
UInt64 temporary_live_view_timeout_sec;
UInt64 last_event_timestamp_usec = 0;
Poco::Timestamp timestamp;
};
}

View File

@ -682,6 +682,57 @@ void MergeTreeData::setTTLExpressions(const ColumnsDescription::ColumnTTLs & new
}
void MergeTreeData::setStoragePolicy(const String & new_storage_policy_name, bool only_check)
{
const auto old_storage_policy = getStoragePolicy();
const auto & new_storage_policy = global_context.getStoragePolicySelector()[new_storage_policy_name];
std::unordered_set<String> new_volume_names;
for (const auto & volume : new_storage_policy->getVolumes())
new_volume_names.insert(volume->getName());
for (const auto & volume : old_storage_policy->getVolumes())
{
if (new_volume_names.count(volume->getName()) == 0)
throw Exception("New storage policy shall contain volumes of old one", ErrorCodes::LOGICAL_ERROR);
std::unordered_set<String> new_disk_names;
for (const auto & disk : new_storage_policy->getVolumeByName(volume->getName())->disks)
new_disk_names.insert(disk->getName());
for (const auto & disk : volume->disks)
if (new_disk_names.count(disk->getName()) == 0)
throw Exception("New storage policy shall contain disks of old one", ErrorCodes::LOGICAL_ERROR);
}
std::unordered_set<String> all_diff_disk_names;
for (const auto & disk : new_storage_policy->getDisks())
all_diff_disk_names.insert(disk->getName());
for (const auto & disk : old_storage_policy->getDisks())
all_diff_disk_names.erase(disk->getName());
for (const String & disk_name : all_diff_disk_names)
{
const auto & path = getFullPathOnDisk(new_storage_policy->getDiskByName(disk_name));
if (Poco::File(path).exists())
throw Exception("New storage policy contain disks which already contain data of a table with the same name", ErrorCodes::LOGICAL_ERROR);
}
if (!only_check)
{
for (const String & disk_name : all_diff_disk_names)
{
const auto & path = getFullPathOnDisk(new_storage_policy->getDiskByName(disk_name));
Poco::File(path).createDirectories();
Poco::File(path + "detached").createDirectory();
}
storage_policy = new_storage_policy;
/// TODO: Query lock is fine but what about background moves??? And downloading of parts?
}
}
void MergeTreeData::MergingParams::check(const NamesAndTypesList & columns) const
{
if (!sign_column.empty() && mode != MergingParams::Collapsing && mode != MergingParams::VersionedCollapsing)
@ -1130,7 +1181,7 @@ void MergeTreeData::clearOldTemporaryDirectories(ssize_t custom_directories_life
}
MergeTreeData::DataPartsVector MergeTreeData::grabOldParts()
MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force)
{
DataPartsVector res;
@ -1153,8 +1204,8 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts()
auto part_remove_time = part->remove_time.load(std::memory_order_relaxed);
if (part.unique() && /// Grab only parts that are not used by anyone (SELECTs for example).
part_remove_time < now &&
now - part_remove_time > getSettings()->old_parts_lifetime.totalSeconds())
((part_remove_time < now &&
now - part_remove_time > getSettings()->old_parts_lifetime.totalSeconds()) || force))
{
parts_to_delete.emplace_back(it);
}
@ -1229,9 +1280,9 @@ void MergeTreeData::removePartsFinally(const MergeTreeData::DataPartsVector & pa
}
}
void MergeTreeData::clearOldPartsFromFilesystem()
void MergeTreeData::clearOldPartsFromFilesystem(bool force)
{
DataPartsVector parts_to_remove = grabOldParts();
DataPartsVector parts_to_remove = grabOldParts(force);
clearPartsFromFilesystem(parts_to_remove);
removePartsFinally(parts_to_remove);
}
@ -1479,6 +1530,9 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S
throw Exception{"Setting '" + changed_setting.name + "' is readonly for storage '" + getName() + "'",
ErrorCodes::READONLY_SETTING};
}
if (changed_setting.name == "storage_policy")
setStoragePolicy(changed_setting.value.safeGet<String>(), /* only_check = */ true);
}
}
@ -1816,6 +1870,10 @@ void MergeTreeData::changeSettings(
copy.applyChanges(new_changes);
storage_settings.set(std::make_unique<const MergeTreeSettings>(copy));
settings_ast = new_settings;
for (const auto & change : new_changes)
if (change.name == "storage_policy")
setStoragePolicy(change.value.safeGet<String>());
}
}

View File

@ -506,9 +506,9 @@ public:
/// If the part is Obsolete and not used by anybody else, immediately delete it from filesystem and remove from memory.
void tryRemovePartImmediately(DataPartPtr && part);
/// Returns old inactive parts that can be deleted. At the same time removes them from the list of parts
/// but not from the disk.
DataPartsVector grabOldParts();
/// Returns old inactive parts that can be deleted. At the same time removes them from the list of parts but not from the disk.
/// If 'force' - don't wait for old_parts_lifetime.
DataPartsVector grabOldParts(bool force = false);
/// Reverts the changes made by grabOldParts(), parts should be in Deleting state.
void rollbackDeletingParts(const DataPartsVector & parts);
@ -517,7 +517,8 @@ public:
void removePartsFinally(const DataPartsVector & parts);
/// Delete irrelevant parts from memory and disk.
void clearOldPartsFromFilesystem();
/// If 'force' - don't wait for old_parts_lifetime.
void clearOldPartsFromFilesystem(bool force = false);
void clearPartsFromFilesystem(const DataPartsVector & parts);
/// Delete all directories which names begin with "tmp"
@ -901,6 +902,8 @@ protected:
void setTTLExpressions(const ColumnsDescription::ColumnTTLs & new_column_ttls,
const ASTPtr & new_ttl_table_ast, bool only_check = false);
void setStoragePolicy(const String & new_storage_policy_name, bool only_check = false);
/// Expression for column type conversion.
/// If no conversions are needed, out_expression=nullptr.
/// out_rename_map maps column files for the out_expression onto new table files.

View File

@ -105,7 +105,7 @@ struct MergeTreeSettings : public SettingsCollection<MergeTreeSettings>
/// We check settings after storage creation
static bool isReadonlySetting(const String & name)
{
return name == "index_granularity" || name == "index_granularity_bytes" || name == "storage_policy";
return name == "index_granularity" || name == "index_granularity_bytes";
}
};

View File

@ -110,6 +110,7 @@ void StorageMergeTree::shutdown()
if (shutdown_called)
return;
shutdown_called = true;
clearOldPartsFromFilesystem(true);
merger_mutator.merges_blocker.cancelForever();
parts_mover.moves_blocker.cancelForever();

View File

@ -3006,6 +3006,7 @@ void StorageReplicatedMergeTree::startup()
void StorageReplicatedMergeTree::shutdown()
{
clearOldPartsFromFilesystem(true);
/// Cancel fetches, merges and mutations to force the queue_task to finish ASAP.
fetcher.blocker.cancelForever();
merger_mutator.merges_blocker.cancelForever();

View File

@ -109,6 +109,7 @@ const char * auto_contributors[] {
"Derek Perkins",
"Ding Xiang Fei",
"Dmitrii Kovalkov",
"Dmitrii Raev",
"Dmitry Bilunov",
"Dmitry Galuza",
"Dmitry Luhtionov",
@ -118,6 +119,7 @@ const char * auto_contributors[] {
"Dmitry Rubashkin",
"Dmitry S..ky / skype: dvska-at-skype",
"Doge",
"Dr. Strange Looker",
"Eldar Zaitov",
"Elena Baskakova",
"Elghazal Ahmed",
@ -176,10 +178,12 @@ const char * auto_contributors[] {
"Ivan Zhukov",
"JaosnHsieh",
"Jason",
"Javi santana bot",
"Jean Baptiste Favre",
"Jonatas Freitas",
"Karl Pietrzak",
"Keiji Yoshida",
"Kiran",
"Kirill Danshin",
"Kirill Malev",
"Kirill Shvakov",
@ -187,6 +191,7 @@ const char * auto_contributors[] {
"KochetovNicolai",
"Konstantin Grabar",
"Konstantin Lebedev",
"Konstantin Malanchev",
"Konstantin Podshumok",
"Korviakov Andrey",
"Leonardo Cecchi",
@ -210,6 +215,7 @@ const char * auto_contributors[] {
"Marti Raudsepp",
"Martijn Bakker",
"Masha",
"Matwey V. Kornilov",
"Max",
"Max Akhmedov",
"Max Vetrov",
@ -220,6 +226,7 @@ const char * auto_contributors[] {
"Maxim Kuznetsov",
"Maxim Nikulin",
"Maxim Sabyanin",
"Maxim Ulanovskiy",
"MaximAL",
"MeiK",
"Metehan Çetinkaya",
@ -234,6 +241,7 @@ const char * auto_contributors[] {
"Mikhail Fandyushin",
"Mikhail Filimonov",
"Mikhail Korotov",
"Mikhail Nacharov",
"Mikhail Salosin",
"Mikhail Surin",
"Mikhail f. Shiryaev",
@ -277,6 +285,7 @@ const char * auto_contributors[] {
"Persiyanov Dmitriy Andreevich",
"Pervakov Grigory",
"Philippe Ombredanne",
"Pradeep Chhetri",
"Quid37",
"Rafael David Tinoco",
"Ramazan Polat",
@ -288,6 +297,7 @@ const char * auto_contributors[] {
"Roman Nozdrin",
"Roman Peshkurov",
"Roman Tsisyk",
"Ruslan",
"Sabyanin Maxim",
"SaltTan",
"Samuel Chou",
@ -298,8 +308,10 @@ const char * auto_contributors[] {
"Sergei Tsetlin (rekub)",
"Sergey Elantsev",
"Sergey Fedorov",
"Sergey Kononenko",
"Sergey Lazarev",
"Sergey Magidovich",
"Sergey Shtykov",
"Sergey V. Galtsev",
"Sergey Zaikin",
"Sergi Vladykin",
@ -358,6 +370,7 @@ const char * auto_contributors[] {
"William Shallum",
"Winter Zhang",
"Yangkuan Liu",
"Yatsishin Ilya",
"Yegor Andreenko",
"Yiğit Konur",
"Yuri Dyachenko",
@ -406,6 +419,7 @@ const char * auto_contributors[] {
"decaseal",
"dependabot[bot]",
"dimarub2000",
"dinosaur",
"dmitrii",
"dmitriiut",
"dmitry kuzmin",
@ -448,19 +462,24 @@ const char * auto_contributors[] {
"liu-bov",
"liuyangkuan",
"liuyimin",
"liyang",
"lomberts",
"luc1ph3r",
"maiha",
"malkfilipp",
"maqroll",
"maxim",
"maxkuzn",
"maxulan",
"memo",
"meo",
"mf5137",
"mfridental",
"miha-g",
"millb",
"morty",
"moscas",
"nagorny",
"never lee",
"nicelulu",
"nonexistence",
@ -495,17 +514,22 @@ const char * auto_contributors[] {
"sundyli",
"svladykin",
"tai",
"taiyang-li",
"tavplubix",
"topvisor",
"tyrionhuang",
"unknown",
"urgordeadbeef",
"velom",
"vicdashkov",
"vinity",
"vxider",
"vzakaznikov",
"wangchao",
"zamulla",
"zhang2014",
"zhangxiao018",
"zhen ni",
"zhukai",
"Šimon Podlipský",
"Артем Стрельцов",

View File

@ -30,6 +30,17 @@
</volumes>
</small_jbod_with_external>
<one_more_small_jbod_with_external>
<volumes>
<m>
<disk>jbod1</disk>
</m>
<e>
<disk>external</disk>
</e>
</volumes>
</one_more_small_jbod_with_external>
<!-- store on JBOD by default (round-robin), store big parts on external -->
<jbods_with_external>
<volumes>

View File

@ -84,6 +84,22 @@ def test_system_tables(start_cluster):
"max_data_part_size": "0",
"move_factor": 0.1,
},
{
"policy_name": "one_more_small_jbod_with_external",
"volume_name": "m",
"volume_priority": "1",
"disks": ["jbod1"],
"max_data_part_size": "0",
"move_factor": 0.1,
},
{
"policy_name": "one_more_small_jbod_with_external",
"volume_name": "e",
"volume_priority": "2",
"disks": ["external"],
"max_data_part_size": "0",
"move_factor": 0.1,
},
{
"policy_name": "jbods_with_external",
"volume_name": "main",
@ -223,6 +239,40 @@ def test_query_parser(start_cluster):
node1.query("DROP TABLE IF EXISTS table_with_normal_policy")
@pytest.mark.parametrize("name,engine", [
("test_alter_policy","MergeTree()"),
("replicated_test_alter_policy","ReplicatedMergeTree('/clickhouse/test_alter_policy', '1')",),
])
def test_alter_policy(start_cluster, name, engine):
try:
node1.query("""
CREATE TABLE {name} (
d UInt64
) ENGINE = {engine}
ORDER BY d
SETTINGS storage_policy='small_jbod_with_external'
""".format(name=name, engine=engine))
assert node1.query("""SELECT storage_policy FROM system.tables WHERE name = '{name}'""".format(name=name)) == "small_jbod_with_external\n"
with pytest.raises(QueryRuntimeException):
node1.query("""ALTER TABLE {name} MODIFY SETTING storage_policy='one_more_small_jbod_with_external'""".format(name=name))
assert node1.query("""SELECT storage_policy FROM system.tables WHERE name = '{name}'""".format(name=name)) == "small_jbod_with_external\n"
node1.query("""ALTER TABLE {name} MODIFY SETTING storage_policy='jbods_with_external'""".format(name=name))
assert node1.query("""SELECT storage_policy FROM system.tables WHERE name = '{name}'""".format(name=name)) == "jbods_with_external\n"
with pytest.raises(QueryRuntimeException):
node1.query("""ALTER TABLE {name} MODIFY SETTING storage_policy='small_jbod_with_external'""".format(name=name))
assert node1.query("""SELECT storage_policy FROM system.tables WHERE name = '{name}'""".format(name=name)) == "jbods_with_external\n"
finally:
node1.query("DROP TABLE IF EXISTS {name}".format(name=name))
def get_random_string(length):
return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(length))

View File

@ -0,0 +1,8 @@
0 [576,800,64,936,552,216,252,808,920,780]
1 [577,801,65,937,553,217,253,809,921,781]
2 [578,802,66,938,554,218,254,810,922,782]
3 [579,803,67,939,555,219,255,811,923,783]
0 [128,184,304,140,568,528,772,452,176,648]
1 [129,185,305,141,569,529,773,453,177,649]
2 [130,186,306,142,570,530,774,454,178,650]
3 [131,187,307,143,571,531,775,455,179,651]

View File

@ -0,0 +1,4 @@
select k, groupArraySample(10)(v) from (select number % 4 as k, number as v from numbers(1024)) group by k;
-- different seed
select k, groupArraySample(10, 1)(v) from (select number % 4 as k, number as v from numbers(1024)) group by k;

View File

@ -0,0 +1,3 @@
10000000
0
10000000

View File

@ -0,0 +1,16 @@
CREATE TABLE IF NOT EXISTS test Engine = MergeTree ORDER BY number AS SELECT number, toString(rand()) x from numbers(10000000);
SELECT count() FROM test;
ALTER TABLE test detach partition tuple();
SELECT count() FROM test;
DETACH TABLE test;
ATTACH TABLE test;
ALTER TABLE test ATTACH PARTITION tuple();
SELECT count() FROM test;
DROP TABLE test;

4
debian/changelog vendored
View File

@ -1,5 +1,5 @@
clickhouse (19.19.1.1) unstable; urgency=low
clickhouse (20.2.1.1) unstable; urgency=low
* Modified source code
-- clickhouse-release <clickhouse-release@yandex-team.ru> Fri, 06 Dec 2019 17:21:55 +0300
-- clickhouse-release <clickhouse-release@yandex-team.ru> Mon, 13 Jan 2020 22:21:05 +0300

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/"
ARG version=19.19.1.*
ARG version=20.2.1.*
RUN apt-get update \
&& apt-get install --yes --no-install-recommends \

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/"
ARG version=19.19.1.*
ARG version=20.2.1.*
ARG gosu_ver=1.10
RUN apt-get update \

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/"
ARG version=19.19.1.*
ARG version=20.2.1.*
RUN apt-get update && \
apt-get install -y apt-transport-https dirmngr && \

View File

@ -34,7 +34,7 @@ Possible values:
Default value: 1.
**Usage**
Usage
Consider the following queries:
@ -47,7 +47,7 @@ If `enable_optimize_predicate_expression = 0`, then the execution time of the se
## fallback_to_stale_replicas_for_distributed_queries {#settings-fallback_to_stale_replicas_for_distributed_queries}
Forces a query to an out-of-date replica if updated data is not available. See "[Replication](../../operations/table_engines/replication.md)".
Forces a query to an out-of-date replica if updated data is not available. See [Replication](../../operations/table_engines/replication.md).
ClickHouse selects the most relevant from the outdated replicas of the table.
@ -61,7 +61,7 @@ Disables query execution if the index can't be used by date.
Works with tables in the MergeTree family.
If `force_index_by_date=1`, ClickHouse checks whether the query has a date key condition that can be used for restricting data ranges. If there is no suitable condition, it throws an exception. However, it does not check whether the condition actually reduces the amount of data to read. For example, the condition `Date != ' 2000-01-01 '` is acceptable even when it matches all the data in the table (i.e., running the query requires a full scan). For more information about ranges of data in MergeTree tables, see "[MergeTree](../../operations/table_engines/mergetree.md)".
If `force_index_by_date=1`, ClickHouse checks whether the query has a date key condition that can be used for restricting data ranges. If there is no suitable condition, it throws an exception. However, it does not check whether the condition actually reduces the amount of data to read. For example, the condition `Date != ' 2000-01-01 '` is acceptable even when it matches all the data in the table (i.e., running the query requires a full scan). For more information about ranges of data in MergeTree tables, see [MergeTree](../../operations/table_engines/mergetree.md).
## force_primary_key
@ -70,7 +70,7 @@ Disables query execution if indexing by the primary key is not possible.
Works with tables in the MergeTree family.
If `force_primary_key=1`, ClickHouse checks to see if the query has a primary key condition that can be used for restricting data ranges. If there is no suitable condition, it throws an exception. However, it does not check whether the condition actually reduces the amount of data to read. For more information about data ranges in MergeTree tables, see "[MergeTree](../../operations/table_engines/mergetree.md)".
If `force_primary_key=1`, ClickHouse checks to see if the query has a primary key condition that can be used for restricting data ranges. If there is no suitable condition, it throws an exception. However, it does not check whether the condition actually reduces the amount of data to read. For more information about data ranges in MergeTree tables, see [MergeTree](../../operations/table_engines/mergetree.md).
## format_schema
@ -183,7 +183,7 @@ Possible values:
Default value: 1.
**Example of Use**
Example of Use
Insert the [DateTime](../../data_types/datetime.md) type value with the different settings.
@ -191,6 +191,7 @@ Insert the [DateTime](../../data_types/datetime.md) type value with the differen
SET input_format_values_interpret_expressions = 0;
INSERT INTO datetime_t VALUES (now())
```
```text
Exception on client:
Code: 27. DB::Exception: Cannot parse input: expected ) before: now()): (at row 1)
@ -200,6 +201,7 @@ Code: 27. DB::Exception: Cannot parse input: expected ) before: now()): (at row
SET input_format_values_interpret_expressions = 1;
INSERT INTO datetime_t VALUES (now())
```
```text
Ok.
```
@ -210,28 +212,34 @@ The last query is equivalent to the following:
SET input_format_values_interpret_expressions = 0;
INSERT INTO datetime_t SELECT now()
```
```text
Ok.
```
## input_format_values_deduce_templates_of_expressions {#settings-input_format_values_deduce_templates_of_expressions}
Enables or disables template deduction for an SQL expressions in [Values](../../interfaces/formats.md#data-format-values) format. It allows to parse and interpret expressions in `Values` much faster if expressions in consecutive rows have the same structure. ClickHouse will try to deduce template of an expression, parse the following rows using this template and evaluate the expression on batch of successfully parsed rows. For the following query:
```sql
INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), (upper('Values')), ...
```
- if `input_format_values_interpret_expressions=1` and `format_values_deduce_templates_of_expressions=0` expressions will be interpreted separately for each row (this is very slow for large number of rows)
- if `input_format_values_interpret_expressions=0` and `format_values_deduce_templates_of_expressions=1` expressions in the first, second and third rows will be parsed using template `lower(String)` and interpreted together, expression is the forth row will be parsed with another template (`upper(String)`)
- if `input_format_values_interpret_expressions=1` and `format_values_deduce_templates_of_expressions=1` - the same as in previous case, but also allows fallback to interpreting expressions separately if it's not possible to deduce template.
- if `input_format_values_interpret_expressions=1` and `format_values_deduce_templates_of_expressions=0` expressions will be interpreted separately for each row (this is very slow for large number of rows)
- if `input_format_values_interpret_expressions=0` and `format_values_deduce_templates_of_expressions=1` expressions in the first, second and third rows will be parsed using template `lower(String)` and interpreted together, expression is the forth row will be parsed with another template (`upper(String)`)
- if `input_format_values_interpret_expressions=1` and `format_values_deduce_templates_of_expressions=1` - the same as in previous case, but also allows fallback to interpreting expressions separately if it's not possible to deduce template.
Enabled by default.
## input_format_values_accurate_types_of_literals {#settings-input_format_values_accurate_types_of_literals}
This setting is used only when `input_format_values_deduce_templates_of_expressions = 1`. It can happen, that expressions for some column have the same structure, but contain numeric literals of different types, e.g
```sql
(..., abs(0), ...), -- UInt64 literal
(..., abs(3.141592654), ...), -- Float64 literal
(..., abs(-1), ...), -- Int64 literal
```
When this setting is enabled, ClickHouse will check actual type of literal and will use expression template of the corresponding type. In some cases it may significantly slow down expression evaluation in `Values`.
When disabled, ClickHouse may use more general type for some literals (e.g. `Float64` or `Int64` instead of `UInt64` for `42`), but it may cause overflow and precision issues.
Enabled by default.
@ -296,7 +304,7 @@ Possible values:
Default value: 0.
**See Also**
See also:
- [Usage of Nested Structures](../../interfaces/formats.md#jsoneachrow-nested) with the `JSONEachRow` format.
@ -336,7 +344,7 @@ Possible values:
Default value: `'basic'`.
**See Also**
See also:
- [DateTime data type.](../../data_types/datetime.md)
- [Functions for working with dates and times.](../../query_language/functions/date_time_functions.md)
@ -368,7 +376,7 @@ Possible values:
Default value: 0.
**See Also**
See also:
- [JOIN clause](../../query_language/select.md#select-join)
- [Join table engine](../table_engines/join.md)
@ -435,14 +443,13 @@ Default value: 163840.
## merge_tree_min_bytes_for_concurrent_read {#setting-merge_tree_min_bytes_for_concurrent_read}
If the number of bytes to read from one file of a [MergeTree*](../table_engines/mergetree.md)-engine table exceeds `merge_tree_min_bytes_for_concurrent_read`, then ClickHouse tries to concurrently read from this file from several threads.
If the number of bytes to read from one file of a [MergeTree*](../table_engines/mergetree.md)-engine table exceeds `merge_tree_min_bytes_for_concurrent_read`, then ClickHouse tries to concurrently read from this file in several threads.
Possible values:
Possible value:
- Any positive integer.
Default value: 240 ✕ 1024 ✕ 1024.
Default value: 251658240.
## merge_tree_min_rows_for_seek {#setting-merge_tree_min_rows_for_seek}
@ -487,19 +494,17 @@ Possible values:
Default value: 128 ✕ 8192.
## merge_tree_max_bytes_to_use_cache {#setting-merge_tree_max_bytes_to_use_cache}
If ClickHouse should read more than `merge_tree_max_bytes_to_use_cache` bytes in one query, it doesn't use the cache of uncompressed blocks.
The cache of uncompressed blocks stores data extracted for queries. ClickHouse uses this cache to speed up responses to repeated small queries. This setting protects the cache from trashing by queries that read a large amount of data. The [uncompressed_cache_size](../server_settings/settings.md#server-settings-uncompressed_cache_size) server setting defines the size of the cache of uncompressed blocks.
Possible values:
Possible value:
- Any positive integer.
Default value: 1920 ✕ 1024 ✕ 1024.
Default value: 2013265920.
## min_bytes_to_use_direct_io {#settings-min_bytes_to_use_direct_io}
@ -507,12 +512,12 @@ The minimum data volume required for using direct I/O access to the storage disk
ClickHouse uses this setting when reading data from tables. If the total storage volume of all the data to be read exceeds `min_bytes_to_use_direct_io` bytes, then ClickHouse reads the data from the storage disk with the `O_DIRECT` option.
**Possible values**
Possible values:
- 0 — Direct I/O is disabled.
- Positive integer.
**Default value**: 0.
Default value: 0.
## log_queries {#settings-log-queries}
@ -520,9 +525,11 @@ Setting up query logging.
Queries sent to ClickHouse with this setup are logged according to the rules in the [query_log](../server_settings/settings.md#server_settings-query-log) server configuration parameter.
**Example**:
Example:
log_queries=1
```text
log_queries=1
```
## log_query_threads {#settings-log-query-threads}
@ -530,9 +537,11 @@ Setting up query threads logging.
Queries' threads runned by ClickHouse with this setup are logged according to the rules in the [query_thread_log](../server_settings/settings.md#server_settings-query-thread-log) server configuration parameter.
**Example**:
Example:
log_query_threads=1
```text
log_query_threads=1
```
## max_insert_block_size {#settings-max_insert_block_size}
@ -548,7 +557,7 @@ The default is slightly more than `max_block_size`. The reason for this is becau
## max_replica_delay_for_distributed_queries {#settings-max_replica_delay_for_distributed_queries}
Disables lagging replicas for distributed queries. See "[Replication](../../operations/table_engines/replication.md)".
Disables lagging replicas for distributed queries. See [Replication](../../operations/table_engines/replication.md).
Sets the time in seconds. If a replica lags more than the set value, this replica is not used.
@ -783,7 +792,6 @@ If the value is 1 or more, compilation occurs asynchronously in a separate threa
Compiled code is required for each different combination of aggregate functions used in the query and the type of keys in the GROUP BY clause.
The results of compilation are saved in the build directory in the form of .so files. There is no restriction on the number of compilation results, since they don't use very much space. Old results will be used after server restarts, except in the case of a server upgrade in this case, the old results are deleted.
## output_format_json_quote_64bit_integers {#session_settings-output_format_json_quote_64bit_integers}
If the value is true, integers appear in quotes when using JSON\* Int64 and UInt64 formats (for compatibility with most JavaScript implementations); otherwise, integers are output without the quotes.
@ -800,12 +808,12 @@ For CSV input format enables or disables parsing of unquoted `NULL` as literal (
Enables quorum writes.
- If `insert_quorum < 2`, the quorum writes are disabled.
- If `insert_quorum >= 2`, the quorum writes are enabled.
- If `insert_quorum < 2`, the quorum writes are disabled.
- If `insert_quorum >= 2`, the quorum writes are enabled.
Default value: 0.
**Quorum writes**
Quorum writes
`INSERT` succeeds only when ClickHouse manages to correctly write data to the `insert_quorum` of replicas during the `insert_quorum_timeout`. If for any reason the number of replicas with successful writes does not reach the `insert_quorum`, the write is considered failed and ClickHouse will delete the inserted block from all the replicas where data has already been written.
@ -813,24 +821,23 @@ All the replicas in the quorum are consistent, i.e., they contain data from all
When reading the data written from the `insert_quorum`, you can use the [select_sequential_consistency](#settings-select_sequential_consistency) option.
**ClickHouse generates an exception**
ClickHouse generates an exception
- If the number of available replicas at the time of the query is less than the `insert_quorum`.
- At an attempt to write data when the previous block has not yet been inserted in the `insert_quorum` of replicas. This situation may occur if the user tries to perform an `INSERT` before the previous one with the `insert_quorum` is completed.
**See also the following parameters:**
See also:
- [insert_quorum_timeout](#settings-insert_quorum_timeout)
- [select_sequential_consistency](#settings-select_sequential_consistency)
## insert_quorum_timeout {#settings-insert_quorum_timeout}
Quorum write timeout in seconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica.
Default value: 60 seconds.
**See also the following parameters:**
See also:
- [insert_quorum](#settings-insert_quorum)
- [select_sequential_consistency](#settings-select_sequential_consistency)
@ -847,11 +854,11 @@ Possible values:
Default value: 0.
**Usage**
Usage
When sequential consistency is enabled, ClickHouse allows the client to execute the `SELECT` query only for those replicas that contain data from all previous `INSERT` queries executed with `insert_quorum`. If the client refers to a partial replica, ClickHouse will generate an exception. The SELECT query will not include data that has not yet been written to the quorum of replicas.
**See Also**
See also:
- [insert_quorum](#settings-insert_quorum)
- [insert_quorum_timeout](#settings-insert_quorum_timeout)
@ -913,11 +920,10 @@ Possible values:
Default value: 1.
**See Also**
See also:
- [Multiple JOIN](../../query_language/select.md#select-join)
## count_distinct_implementation {#settings-count_distinct_implementation}
Specifies which of the `uniq*` functions should be used to perform the [COUNT(DISTINCT ...)](../../query_language/agg_functions/reference.md#agg_function-count) construction.
@ -983,11 +989,10 @@ Default value: 0.
Controls how fast errors in distributed tables are zeroed. If a replica is unavailabe for some time, accumulates 5 errors, and distributed_replica_error_half_life is set to 1 second, then the replica is considered normal 3 seconds after last error.
** See also **
See also:
- [Table engine Distributed](../../operations/table_engines/distributed.md)
- [`distributed_replica_error_cap`](#settings-distributed_replica_error_cap)
- [distributed_replica_error_cap](#settings-distributed_replica_error_cap)
## distributed_replica_error_cap {#settings-distributed_replica_error_cap}
@ -996,11 +1001,10 @@ Controls how fast errors in distributed tables are zeroed. If a replica is unava
Error count of each replica is capped at this value, preventing a single replica from accumulating too many errors.
** See also **
See also:
- [Table engine Distributed](../../operations/table_engines/distributed.md)
- [`distributed_replica_error_half_life`](#settings-distributed_replica_error_half_life)
- [distributed_replica_error_half_life](#settings-distributed_replica_error_half_life)
## distributed_directory_monitor_sleep_time_ms {#distributed_directory_monitor_sleep_time_ms}
@ -1012,7 +1016,6 @@ Possible values:
Default value: 100 milliseconds.
## distributed_directory_monitor_max_sleep_time_ms {#distributed_directory_monitor_max_sleep_time_ms}
Maximum interval for the [Distributed](../table_engines/distributed.md) table engine to send data. Limits exponential growth of the interval set in the [distributed_directory_monitor_sleep_time_ms](#distributed_directory_monitor_sleep_time_ms) setting.
@ -1051,7 +1054,6 @@ Lower values mean higher priority. Threads with low `nice` priority values are e
Default value: 0.
## query_profiler_real_time_period_ns {#query_profiler_real_time_period_ns}
Sets the period for a real clock timer of the query profiler. Real clock timer counts wall-clock time.
@ -1071,7 +1073,7 @@ Type: [UInt64](../../data_types/int_uint.md).
Default value: 1000000000 nanoseconds (once a second).
**See Also**
See also:
- [system.trace_log](../system_tables.md#system_tables-trace_log)
@ -1094,7 +1096,7 @@ Type: [UInt64](../../data_types/int_uint.md).
Default value: 1000000000 nanoseconds.
**See Also**
See also:
- [system.trace_log](../system_tables.md#system_tables-trace_log)

View File

@ -14,11 +14,11 @@ SELECT [DISTINCT] expr_list
[GROUP BY expr_list] [WITH TOTALS]
[HAVING expr]
[ORDER BY expr_list]
[LIMIT [offset_value, ]n BY columns]
[LIMIT [n, ]m]
[UNION ALL ...]
[INTO OUTFILE filename]
[FORMAT format]
[LIMIT [offset_value, ]n BY columns]
```
All the clauses are optional, except for the required list of expressions immediately after SELECT.

View File

@ -16,8 +16,8 @@ ClickHouse применяет настройку в тех случаях, ко
Возможные значения:
- `deny` — значение по умолчанию. Запрещает использование таких подзапросов (При попытке использование вернет исключение "Double-distributed IN/JOIN subqueries is denied");
- `local` — заменяет базу данных и таблицу в подзапросе на локальные для конечного сервера (шарда), оставив обычный `IN` / `JOIN.`
- `global` — заменяет запрос `IN` / `JOIN` на `GLOBAL IN` / `GLOBAL JOIN.`
- `local` — заменяет базу данных и таблицу в подзапросе на локальные для конечного сервера (шарда), оставив обычный `IN`/`JOIN.`
- `global` — заменяет запрос `IN`/`JOIN` на `GLOBAL IN`/`GLOBAL JOIN.`
- `allow` — разрешает использование таких подзапросов.
## enable_optimize_predicate_expression
@ -31,9 +31,9 @@ ClickHouse применяет настройку в тех случаях, ко
- 0 — выключена.
- 1 — включена.
Значение по умолчанию 1.
Значение по умолчанию: 1.
**Использование**
Использование
Рассмотрим следующие запросы:
@ -46,7 +46,7 @@ ClickHouse применяет настройку в тех случаях, ко
## fallback_to_stale_replicas_for_distributed_queries {#settings-fallback_to_stale_replicas_for_distributed_queries}
Форсирует запрос в устаревшую реплику в случае, если актуальные данные недоступны. Смотрите "[Репликация](../../operations/table_engines/replication.md)".
Форсирует запрос в устаревшую реплику в случае, если актуальные данные недоступны. См. [Репликация](../../operations/table_engines/replication.md).
Из устаревших реплик таблицы ClickHouse выбирает наиболее актуальную.
@ -60,7 +60,7 @@ ClickHouse применяет настройку в тех случаях, ко
Работает с таблицами семейства MergeTree.
При `force_index_by_date=1` ClickHouse проверяет, есть ли в запросе условие на ключ даты, которое может использоваться для отсечения диапазонов данных. Если подходящего условия нет - кидается исключение. При этом не проверяется, действительно ли условие уменьшает объём данных для чтения. Например, условие `Date != '2000-01-01'` подходит даже в том случае, когда соответствует всем данным в таблице (т.е. для выполнения запроса требуется full scan). Подробнее про диапазоны данных в таблицах MergeTree читайте в разделе "[MergeTree](../../operations/table_engines/mergetree.md)".
При `force_index_by_date=1` ClickHouse проверяет, есть ли в запросе условие на ключ даты, которое может использоваться для отсечения диапазонов данных. Если подходящего условия нет - кидается исключение. При этом не проверяется, действительно ли условие уменьшает объём данных для чтения. Например, условие `Date != '2000-01-01'` подходит даже в том случае, когда соответствует всем данным в таблице (т.е. для выполнения запроса требуется full scan). Подробнее про диапазоны данных в таблицах MergeTree читайте в разделе [MergeTree](../../operations/table_engines/mergetree.md).
## force_primary_key {#settings-force_primary_key}
@ -68,7 +68,7 @@ ClickHouse применяет настройку в тех случаях, ко
Работает с таблицами семейства MergeTree.
При `force_primary_key=1` ClickHouse проверяет, есть ли в запросе условие на первичный ключ, которое может использоваться для отсечения диапазонов данных. Если подходящего условия нет - кидается исключение. При этом не проверяется, действительно ли условие уменьшает объём данных для чтения. Подробнее про диапазоны данных в таблицах MergeTree читайте в разделе "[MergeTree](../../operations/table_engines/mergetree.md)".
При `force_primary_key=1` ClickHouse проверяет, есть ли в запросе условие на первичный ключ, которое может использоваться для отсечения диапазонов данных. Если подходящего условия нет - кидается исключение. При этом не проверяется, действительно ли условие уменьшает объём данных для чтения. Подробнее про диапазоны данных в таблицах MergeTree читайте в разделе [MergeTree](../../operations/table_engines/mergetree.md).
## format_schema
@ -91,7 +91,7 @@ ClickHouse применяет настройку в тех случаях, ко
- 0 — выключена.
- 1 — включена.
Значение по умолчанию 0.
Значение по умолчанию: 0.
## http_zlib_compression_level {#settings-http_zlib_compression_level}
@ -112,7 +112,7 @@ ClickHouse применяет настройку в тех случаях, ко
- 0 — выключена.
- 1 — включена.
Значение по умолчанию 0.
Значение по умолчанию: 0.
## send_progress_in_http_headers {#settings-send_progress_in_http_headers}
@ -125,7 +125,7 @@ ClickHouse применяет настройку в тех случаях, ко
- 0 — выключена.
- 1 — включена.
Значение по умолчанию 0.
Значение по умолчанию: 0.
## max_http_get_redirects {#setting-max_http_get_redirects}
@ -179,7 +179,7 @@ ClickHouse применяет настройку в тех случаях, ко
Значение по умолчанию: 1.
**Пример использования**
Пример использования:
Вставим значение типа [DateTime](../../data_types/datetime.md) при разных значения настройки.
@ -228,7 +228,7 @@ Ok.
- 0 — выключена.
- 1 — включена.
Значение по умолчанию 1.
Значение по умолчанию: 1.
## input_format_null_as_default {#settings-input_format_null_as_default}
@ -252,7 +252,7 @@ Ok.
- 0 — выключена.
- 1 — включена.
Значение по умолчанию 0.
Значение по умолчанию: 0.
## input_format_import_nested_json {#settings-input_format_import_nested_json}
@ -267,9 +267,9 @@ Ok.
- 0 — выключена.
- 1 — включена.
Значение по умолчанию 0.
Значение по умолчанию: 0.
**Смотрите также**
См. также:
- [Использование вложенных структур](../../interfaces/formats.md#jsoneachrow-nested) with the `JSONEachRow` format.
@ -307,9 +307,9 @@ Ok.
ClickHouse может парсить только базовый формат `YYYY-MM-DD HH:MM:SS`. Например, `'2019-08-20 10:18:56'`.
Значение по умолчанию `'basic'`.
Значение по умолчанию: `'basic'`.
**Смотрите также**
См. также:
- [Тип данных DateTime.](../../data_types/datetime.md)
- [Функции для работы с датой и временем.](../../query_language/functions/date_time_functions.md)
@ -318,13 +318,13 @@ Ok.
Устанавливает строгость по умолчанию для [JOIN](../../query_language/select.md#select-join).
**Возможные значения**
Возможные значения
- `ALL` — если в правой таблице несколько совпадающих строк, данные умножаются на количество этих строк. Это нормальное поведение `JOIN` как в стандартном SQL.
- `ANY` — если в правой таблице несколько соответствующих строк, то соединяется только первая найденная. Если в "правой" таблице есть не более одной подходящей строки, то результаты `ANY` и `ALL` совпадают.
- `Пустая строка` — если `ALL` или `ANY` не указаны в запросе, то ClickHouse генерирует исключение.
Значение по умолчанию `ALL`.
Значение по умолчанию: `ALL`.
## join_any_take_last_row {#settings-join_any_take_last_row}
@ -338,9 +338,9 @@ Ok.
- 0 — если в правой таблице несколько соответствующих строк, то присоединяется только первая найденная строка.
- 1 — если в правой таблице несколько соответствующих строк, то присоединяется только последняя найденная строка.
Значение по умолчанию 0.
Значение по умолчанию: 0.
**Смотрите также**
См. также:
- [Секция JOIN](../../query_language/select.md#select-join)
- [Движок таблиц Join](../table_engines/join.md)
@ -358,9 +358,9 @@ Ok.
- 0 — если в правой таблице несколько соответствующих строк, то присоединяется только первая найденная.
- 1 — если в правой таблице несколько соответствующих строк, то присоединяется только последняя найденная строка.
Значение по умолчанию 0.
Значение по умолчанию: 0.
**Смотрите также**
См. также:
- [Секция JOIN](../../query_language/select.md#select-join)
- [Движок таблиц Join](../table_engines/join.md)
@ -370,12 +370,12 @@ Ok.
Устанавливает тип поведения [JOIN](../../query_language/select.md). При объединении таблиц могут появиться пустые ячейки. ClickHouse заполняет их по-разному в зависимости от настроек.
**Возможные значения**
Возможные значения
- 0 — пустые ячейки заполняются значением по умолчанию соответствующего типа поля.
- 1 — `JOIN` ведёт себя как в стандартном SQL. Тип соответствующего поля преобразуется в [Nullable](../../data_types/nullable.md#data_type-nullable), а пустые ячейки заполняются значениями [NULL](../../query_language/syntax.md).
**Значение по умолчанию**: 0.
Значение по умолчанию: 0.
## max_block_size
@ -400,7 +400,7 @@ Ok.
- 0 — не использовать равномерное распределение заданий на чтение.
- 1 — использовать равномерное распределение заданий на чтение.
Значение по умолчанию 1.
Значение по умолчанию: 1.
## merge_tree_min_rows_for_concurrent_read {#setting-merge_tree_min_rows_for_concurrent_read}
@ -410,17 +410,17 @@ Ok.
- Любое положительное целое число.
Значение по умолчанию 163840.
Значение по умолчанию: 163840.
## merge_tree_min_bytes_for_concurrent_read {#setting-merge_tree_min_bytes_for_concurrent_read}
Если число байтов, которые должны быть прочитаны из одного файла таблицы с движком [MergeTree*](../table_engines/mergetree.md) превышает `merge_tree_min_bytes_for_concurrent_read`, то ClickHouse пытается выполнить конкурентное чтение в несколько потоков из этого файла.
Если число байтов, которое должно быть прочитано из одного файла таблицы с движком [MergeTree*](../table_engines/mergetree.md), превышает значение `merge_tree_min_bytes_for_concurrent_read`, то ClickHouse выполняет одновременное чтение в несколько потоков из этого файла.
Возможные значения:
Возможное значение:
- Положительное целое число.
Значение по умолчанию — 240 ✕ 1024 ✕ 1024.
Значение по умолчанию: 251658240.
## merge_tree_min_rows_for_seek {#setting-merge_tree_min_rows_for_seek}
@ -430,7 +430,7 @@ Ok.
- Положительное целое число.
Значение по умолчанию 0.
Значение по умолчанию: 0.
## merge_tree_min_bytes_for_seek {#setting-merge_tree_min_bytes_for_seek}
@ -440,7 +440,7 @@ Ok.
- Положительное целое число.
Значение по умолчанию 0.
Значение по умолчанию: 0.
## merge_tree_coarse_index_granularity {#setting-merge_tree_coarse_index_granularity}
@ -450,7 +450,7 @@ Ok.
- Положительное целое число.
Значение по умолчанию 8.
Значение по умолчанию: 8.
## merge_tree_max_rows_to_use_cache {#setting-merge_tree_max_rows_to_use_cache}
@ -462,19 +462,19 @@ Ok.
- Положительное целое число.
Значение по умолчанию 128 ✕ 8192.
Значение по умолчанию: 128 ✕ 8192.
## merge_tree_max_bytes_to_use_cache {#setting-merge_tree_max_bytes_to_use_cache}
Если требуется прочитать более, чем `merge_tree_max_bytes_to_use_cache` байтов в одном запросе, ClickHouse не используют кэш несжатых блоков.
Кэш несжатых блоков хранит данные, извлечённые при выполнении запросов. ClickHouse использует этот кэш для ускорения ответов на повторяющиеся небольшие запросы. Настройка защищает кэш от замусоривания запросами, для выполнения которых необходимо извлечь большое количество данных. Настройка сервера [uncompressed_cache_size](../server_settings/settings.md#server-settings-uncompressed_cache_size) определяет размер кэша несжатых блоков.
Кэш несжатых блоков хранит данные, извлечённые при выполнении запросов. ClickHouse использует кэш для ускорения ответов на повторяющиеся небольшие запросы. Настройка защищает кэш от переполнения. Настройка сервера [uncompressed_cache_size](../server_settings/settings.md#server-settings-uncompressed_cache_size) определяет размер кэша несжатых блоков.
Возможные значения:
Возможное значение:
- Положительное целое число.
Значение по умолчанию — 1920 ✕ 1024 ✕ 1024.
Значение по умолчанию: 2013265920.
## min_bytes_to_use_direct_io {#settings-min_bytes_to_use_direct_io}
@ -482,12 +482,12 @@ Ok.
ClickHouse использует этот параметр при чтении данных из таблиц. Если общий объем хранения всех данных для чтения превышает `min_bytes_to_use_direct_io` байт, тогда ClickHouse использует флаг `O_DIRECT` при чтении данных с диска.
**Возможные значения**
Возможные значения:
- 0 — прямой ввод-вывод отключен.
- Положительное целое число.
**Значение по умолчанию**: 0.
Значение по умолчанию: 0.
## log_queries {#settings-log-queries}
@ -495,9 +495,11 @@ ClickHouse использует этот параметр при чтении д
Запросы, переданные в ClickHouse с этой установкой, логируются согласно правилам конфигурационного параметра сервера [query_log](../server_settings/settings.md#server_settings-query-log).
**Пример** :
Пример:
log_queries=1
```text
log_queries=1
```
## log_query_threads {#settings-log-query-threads}
@ -505,9 +507,11 @@ ClickHouse использует этот параметр при чтении д
Лог информации о потоках выполнения запросов, переданных в ClickHouse с этой установкой, записывается согласно правилам конфигурационного параметра сервера [query_thread_log](../server_settings/settings.md#server_settings-query-thread-log).
**Пример** :
Пример:
log_query_threads=1
```text
log_query_threads=1
```
## max_insert_block_size {#settings-max_insert_block_size}
@ -523,7 +527,7 @@ ClickHouse использует этот параметр при чтении д
## max_replica_delay_for_distributed_queries {#settings-max_replica_delay_for_distributed_queries}
Отключает отстающие реплики при распределенных запросах. Смотрите "[Репликация](../../operations/table_engines/replication.md)".
Отключает отстающие реплики при распределенных запросах. См. [Репликация](../../operations/table_engines/replication.md).
Устанавливает время в секундах. Если отставание реплики больше установленного значения, то реплика не используется.
@ -554,7 +558,7 @@ ClickHouse использует этот параметр при чтении д
## min_compress_block_size
Для таблиц типа "[MergeTree](../../operations/table_engines/mergetree.md)". В целях уменьшения задержек при обработке запросов, блок сжимается при записи следующей засечки, если его размер не меньше min_compress_block_size. По умолчанию - 65 536.
Для таблиц типа [MergeTree](../../operations/table_engines/mergetree.md). В целях уменьшения задержек при обработке запросов, блок сжимается при записи следующей засечки, если его размер не меньше min_compress_block_size. По умолчанию - 65 536.
Реальный размер блока, если несжатых данных меньше max_compress_block_size, будет не меньше этого значения и не меньше объёма данных на одну засечку.
@ -652,7 +656,7 @@ ClickHouse использует этот параметр при чтении д
Работает для таблиц со стриммингом в случае тайм-аута, или когда поток генерирует [max_insert_block_size](#settings-max_insert_block_size) строк.
Значение по умолчанию - 7500.
Значение по умолчанию: 7500.
Чем меньше значение, тем чаще данные сбрасываются в таблицу. Установка слишком низкого значения приводит к снижению производительности.
@ -776,12 +780,12 @@ load_balancing = first_or_random
Включает кворумную запись.
- Если `insert_quorum < 2`, то кворумная запись выключена.
- Если `insert_quorum >= 2`, то кворумная запись включена.
- Если `insert_quorum < 2`, то кворумная запись выключена.
- Если `insert_quorum >= 2`, то кворумная запись включена.
Значение по умолчанию: 0.
**Кворумная запись**
Кворумная запись
`INSERT` завершается успешно только в том случае, когда ClickHouse смог без ошибки записать данные в `insert_quorum` реплик за время `insert_quorum_timeout`. Если по любой причине количество реплик с успешной записью не достигнет `insert_quorum`, то запись считается не состоявшейся и ClickHouse удалит вставленный блок из всех реплик, куда уже успел записать данные.
@ -789,12 +793,12 @@ load_balancing = first_or_random
При чтении данных, записанных с `insert_quorum` можно использовать настройку [select_sequential_consistency](#settings-select_sequential_consistency).
**ClickHouse генерирует исключение**
ClickHouse генерирует исключение
- Если количество доступных реплик на момент запроса меньше `insert_quorum`.
- При попытке записать данные в момент, когда предыдущий блок ещё не вставлен в `insert_quorum` реплик. Эта ситуация может возникнуть, если пользователь вызвал `INSERT` прежде, чем завершился предыдущий с `insert_quorum`.
**См. также параметры:**
См. также:
- [insert_quorum_timeout](#settings-insert_quorum_timeout)
- [select_sequential_consistency](#settings-select_sequential_consistency)
@ -805,7 +809,7 @@ load_balancing = first_or_random
Значение по умолчанию: 60 секунд.
**См. также параметры:**
См. также:
- [insert_quorum](#settings-insert_quorum)
- [select_sequential_consistency](#settings-select_sequential_consistency)
@ -821,11 +825,11 @@ load_balancing = first_or_random
Значение по умолчанию: 0.
**Использование**
Использование
Когда последовательная консистентность включена, то ClickHouse позволит клиенту выполнить запрос `SELECT` только к тем репликам, которые содержат данные всех предыдущих запросов `INSERT`, выполненных с `insert_quorum`. Если клиент обратится к неполной реплике, то ClickHouse сгенерирует исключение. В запросе SELECT не будут участвовать данные, которые ещё не были записаны на кворум реплик.
**Смотрите также**
См. также:
- [insert_quorum](#settings-insert_quorum)
- [insert_quorum_timeout](#settings-insert_quorum_timeout)
@ -842,7 +846,7 @@ load_balancing = first_or_random
- [uniqHLL12](../../query_language/agg_functions/reference.md#agg_function-uniqhll12)
- [uniqExact](../../query_language/agg_functions/reference.md#agg_function-uniqexact)
Значение по умолчанию `uniqExact`.
Значение по умолчанию: `uniqExact`.
## max_network_bytes {#settings-max_network_bytes}
@ -853,7 +857,7 @@ load_balancing = first_or_random
- Положительное целое число.
- 0 — контроль объема данных отключен.
Значение по умолчанию 0.
Значение по умолчанию: 0.
## max_network_bandwidth {#settings-max_network_bandwidth}
@ -864,7 +868,7 @@ load_balancing = first_or_random
- Положительное целое число.
- 0 — контроль скорости передачи данных отключен.
Значение по умолчанию 0.
Значение по умолчанию: 0.
## max_network_bandwidth_for_user {#settings-max_network_bandwidth_for_user}
@ -875,7 +879,7 @@ load_balancing = first_or_random
- Положительное целое число.
- 0 — управление скоростью передачи данных отключено.
Значение по умолчанию 0.
Значение по умолчанию: 0.
## max_network_bandwidth_for_all_users {#settings-max_network_bandwidth_for_all_users}
@ -886,7 +890,7 @@ load_balancing = first_or_random
- Положительное целое число.
- 0 — управление скоростью передачи данных отключено.
Значение по умолчанию 0.
Значение по умолчанию: 0.
## allow_experimental_cross_to_join_conversion {#settings-allow_experimental_cross_to_join_conversion}
@ -903,7 +907,7 @@ load_balancing = first_or_random
Значение по умолчанию: 1.
**Смотрите также**
См. также
- [Множественный JOIN](../../query_language/select.md#select-join)
@ -936,7 +940,7 @@ load_balancing = first_or_random
Если шард недоступен, то ClickHouse генерирует исключение.
Значение по умолчанию 0.
Значение по умолчанию: 0.
## optimize_throw_if_noop {#setting-optimize_throw_if_noop}
@ -949,7 +953,7 @@ load_balancing = first_or_random
- 1 — генерирование исключения включено.
- 0 — генерирование исключения выключено.
Значение по умолчанию 0.
Значение по умолчанию: 0.
## distributed_directory_monitor_sleep_time_ms {#distributed_directory_monitor_sleep_time_ms}
@ -998,7 +1002,7 @@ load_balancing = first_or_random
Более низкие значения означают более высокий приоритет. Потоки с низкими значениями приоритета `nice` выполняются чаще, чем потоки с более высокими значениями. Высокие значения предпочтительно использовать для долгих неинтерактивных запросов, поскольку это позволяет бысто выделить ресурс в пользу коротких интерактивных запросов.
Значение по умолчанию 0.
Значение по умолчанию: 0.
[Оригинальная статья](https://clickhouse.yandex/docs/ru/operations/settings/settings/) <!--hide-->

View File

@ -14,11 +14,11 @@ SELECT [DISTINCT] expr_list
[GROUP BY expr_list] [WITH TOTALS]
[HAVING expr]
[ORDER BY expr_list]
[LIMIT [offset_value, ]n BY columns]
[LIMIT [n, ]m]
[UNION ALL ...]
[INTO OUTFILE filename]
[FORMAT format]
[LIMIT [offset_value, ]n BY columns]
```
Все секции, кроме списка выражений сразу после SELECT, являются необязательными.

View File

@ -13,11 +13,11 @@ SELECT [DISTINCT] expr_list
[GROUP BY expr_list] [WITH TOTALS]
[HAVING expr]
[ORDER BY expr_list]
[LIMIT n BY columns]
[LIMIT [n, ]m]
[UNION ALL ...]
[INTO OUTFILE filename]
[FORMAT format]
[LIMIT n BY columns]
```
所有的子句都是可选的除了SELECT之后的表达式列表(expr_list)。

View File

@ -1,4 +1,4 @@
if(OS_LINUX)
if(OS_LINUX AND OPENSSL_FOUND)
option(ENABLE_MYSQL "Enable MySQL" ${ENABLE_LIBRARIES})
else ()
option(ENABLE_MYSQL "Enable MySQL" FALSE)

View File

@ -32,7 +32,7 @@ h1, h2 {
}
p {
line-height: 20px;
line-height: 1.5;
white-space: pre-wrap;
}
@ -49,7 +49,6 @@ a:hover {
.island {
background-color: #FFF;
box-shadow: 0 0 0 1px rgba(0, 0, 0, 0.05), 0 8px 25px -5px rgba(0, 0, 0, 0.1);
/* border-radius: 5px;*/
padding: 10px;
width: 90%;
margin: 10px auto 10px auto;
@ -180,12 +179,14 @@ th {
border: 1px solid #DDD;
background-color: #EEE;
padding: 1px 5px 1px 5px;
margin: 1px 5px 1px 5px;
cursor: pointer;
white-space: nowrap;
}
#selectors p span.selected {
border: 1px solid #F60;
background-color: #F80;
border: 1px solid #FC9;
background-color: #FC9;
}
#selectors p span.disabled {
@ -212,6 +213,9 @@ th {
border-bottom: 1px solid #EEE;
}
.diagram-system-name {
text-align: right; width: 20%;
}
</style>
@ -487,58 +491,6 @@ var results =
]
},
{
"system": "Lenovo B580 Laptop (i5-3210M)",
"data_size": 100000000,
"time": "2020-01-11 00:00:00",
"result":
[
[0.035, 0.003, 0.005],
[0.093, 0.064, 0.060],
[0.265, 0.170, 0.167],
[0.880, 0.251, 0.266],
[0.954, 0.593, 0.561],
[2.140, 1.506, 1.525],
[0.148, 0.096, 0.105],
[0.064, 0.048, 0.044],
[2.727, 2.330, 2.280],
[3.386, 3.210, 2.951],
[1.218, 0.787, 0.749],
[1.293, 0.915, 0.904],
[3.713, 3.224, 3.190],
[4.943, 4.338, 4.310],
[4.503, 3.999, 3.918],
[4.001, 3.686, 4.144],
[10.714, 10.011, 10.035],
[7.456, 6.556, 6.675],
[20.201, 19.238, 19.135],
[0.888, 0.217, 0.209],
[9.685, 4.144, 4.023],
[11.201, 4.648, 4.636],
[21.037, 10.712, 10.571],
[18.186, 4.743, 4.743],
[2.844, 1.379, 1.358],
[1.623, 1.138, 1.130],
[2.861, 1.394, 1.417],
[9.691, 4.191, 4.129],
[10.285, 7.381, 7.379],
[6.879, 6.871, 6.829],
[4.131, 3.336, 3.240],
[7.157, 4.666, 4.616],
[29.371, 36.392, 29.946],
[17.929, 14.223, 14.127],
[17.058, 13.998, 14.055],
[5.667, 5.460, 5.408],
[0.325, 0.230, 0.217],
[0.115, 0.101, 0.094],
[0.148, 0.093, 0.084],
[0.585, 0.464, 0.459],
[0.078, 0.042, 0.035],
[0.057, 0.038, 0.032],
[0.024, 0.011, 0.010]
]
},
{
"system": "Yandex Cloud Cascade Lake, 64 vCPU (32 threads), 128 GB RAM, 400 GB SSD",
"data_size": 100000000,
@ -591,6 +543,214 @@ var results =
]
},
{
"system": "Yandex Cloud Cascade Lake, 64 vCPU (32 threads), 128 GB RAM, 4 TB SSD",
"data_size": 100000000,
"time": "2020-01-13 00:00:00",
"result":
[
[0.054, 0.002, 0.002],
[0.140, 0.009, 0.015],
[0.139, 0.017, 0.020],
[0.430, 0.022, 0.022],
[0.453, 0.083, 0.082],
[0.839, 0.160, 0.159],
[0.058, 0.010, 0.010],
[0.048, 0.009, 0.008],
[0.706, 0.307, 0.288],
[0.821, 0.328, 0.301],
[0.509, 0.108, 0.106],
[0.534, 0.117, 0.116],
[0.905, 0.318, 0.313],
[1.573, 0.429, 0.413],
[0.960, 0.410, 0.403],
[0.769, 0.619, 0.521],
[1.914, 1.335, 1.272],
[1.279, 0.657, 1.215],
[3.839, 2.264, 2.481],
[0.425, 0.064, 0.027],
[5.605, 0.344, 0.367],
[6.389, 0.382, 0.403],
[11.794, 0.894, 0.878],
[11.730, 0.536, 0.436],
[1.540, 0.120, 0.109],
[0.715, 0.091, 0.106],
[1.553, 0.132, 0.132],
[5.580, 0.375, 0.350],
[4.720, 0.511, 0.480],
[1.025, 0.953, 1.008],
[1.475, 0.359, 0.357],
[3.457, 0.504, 0.495],
[4.688, 3.581, 3.673],
[6.325, 1.913, 1.865],
[6.338, 1.933, 2.030],
[0.961, 0.785, 0.847],
[0.267, 0.221, 0.215],
[0.095, 0.071, 0.078],
[0.148, 0.065, 0.071],
[0.516, 0.471, 0.432],
[0.076, 0.028, 0.025],
[0.053, 0.018, 0.021],
[0.034, 0.004, 0.004]
]
},
{
"system": "Yandex Cloud Cascade Lake, 4 vCPU (2 threads), 16 GB RAM, 30 GB SSD",
"data_size": 100000000,
"time": "2020-01-13 00:00:00",
"result":
[
[0.621, 0.002, 0.002],
[0.288, 0.035, 0.030],
[1.023, 0.126, 0.132],
[5.152, 0.219, 0.194],
[0.458, 0.427, 0.447],
[6.848, 1.223, 1.232],
[0.271, 0.077, 0.058],
[0.130, 0.044, 0.032],
[3.722, 2.145, 2.159],
[2.571, 2.459, 2.490],
[0.764, 0.679, 0.721],
[0.892, 0.816, 0.816],
[5.743, 3.467, 3.294],
[5.177, 4.540, 4.596],
[5.294, 4.565, 4.510],
[5.109, 3.902, 3.845],
[14.256, 12.943, 12.882],
[8.741, 8.056, 9.738],
[30.649, 26.987, 26.702],
[2.063, 0.183, 0.239],
[54.740, 3.602, 3.559],
[54.077, 6.038, 4.264],
[107.285, 11.156, 9.986],
[114.734, 4.735, 4.673],
[15.581, 1.257, 1.249],
[3.779, 1.002, 0.992],
[4.864, 1.305, 1.305],
[55.450, 3.348, 3.230],
[46.372, 5.424, 5.263],
[6.437, 6.404, 6.179],
[11.933, 3.524, 3.546],
[20.803, 5.352, 5.216],
[43.065, 41.106, 41.870],
[58.396, 16.545, 16.610],
[51.752, 16.329, 16.221],
[6.722, 6.256, 6.391],
[0.533, 0.241, 0.237],
[0.113, 0.085, 0.077],
[0.093, 0.083, 0.074],
[0.624, 0.497, 0.492],
[0.286, 0.036, 0.028],
[0.088, 0.022, 0.021],
[0.099, 0.005, 0.005]
]
},
{
"system": "Dell PowerEdge™ R6415 DX180 AMD EPYC™ 7551P 32-Core Naples (Zen), 128 GB RAM, 2x SSD 960 GB RAID 1",
"data_size": 100000000,
"time": "2020-01-13 00:00:00",
"result":
[
[0.007, 0.002, 0.001],
[0.030, 0.016, 0.014],
[0.042, 0.026, 0.026],
[0.078, 0.043, 0.042],
[0.143, 0.120, 0.117],
[0.239, 0.198, 0.198],
[0.022, 0.014, 0.014],
[0.016, 0.013, 0.015],
[0.388, 0.380, 0.384],
[0.476, 0.429, 0.411],
[0.201, 0.192, 0.191],
[0.204, 0.207, 0.192],
[0.676, 0.654, 0.637],
[0.890, 0.932, 0.940],
[0.730, 0.789, 0.738],
[0.658, 0.641, 0.678],
[1.556, 1.430, 1.529],
[0.819, 1.096, 0.906],
[3.569, 3.626, 3.508],
[0.083, 0.047, 0.077],
[0.812, 1.010, 0.601],
[1.097, 0.847, 0.864],
[2.654, 3.146, 3.169],
[1.595, 0.922, 0.877],
[0.259, 0.227, 0.236],
[0.206, 0.187, 0.181],
[0.245, 0.235, 0.232],
[0.974, 1.018, 1.012],
[1.280, 1.398, 1.243],
[2.171, 2.270, 2.284],
[0.594, 0.592, 0.602],
[0.976, 0.946, 0.966],
[4.543, 4.471, 4.364],
[3.844, 4.052, 3.858],
[3.932, 3.961, 3.982],
[1.128, 1.117, 1.146],
[0.233, 0.216, 0.221],
[0.088, 0.082, 0.085],
[0.075, 0.070, 0.070],
[0.465, 0.445, 0.435],
[0.036, 0.026, 0.031],
[0.028, 0.024, 0.021],
[0.010, 0.006, 0.006]
]
},
{
"system": "Dell PowerEdge™ R640 DX292 2x Xeon SP Gold 16-Core 2.10GHz, 196 GB RAM, 2x SSD 960 GB RAID 1",
"data_size": 100000000,
"time": "2020-01-13 00:00:00",
"result":
[
[0.005, 0.003, 0.003],
[0.035, 0.013, 0.016],
[0.043, 0.023, 0.023],
[0.076, 0.030, 0.027],
[0.109, 0.087, 0.098],
[0.184, 0.154, 0.151],
[0.030, 0.017, 0.016],
[0.018, 0.017, 0.016],
[0.346, 0.357, 0.375],
[0.467, 0.397, 0.410],
[0.165, 0.135, 0.137],
[0.166, 0.146, 0.143],
[0.452, 0.432, 0.415],
[0.543, 0.523, 0.527],
[0.508, 0.489, 0.472],
[0.638, 0.551, 0.549],
[1.280, 1.231, 1.272],
[0.680, 0.748, 0.611],
[2.380, 2.465, 2.351],
[0.073, 0.065, 0.040],
[0.724, 0.371, 0.376],
[0.805, 0.474, 0.450],
[1.547, 1.064, 1.117],
[1.798, 0.543, 0.507],
[0.217, 0.145, 0.142],
[0.139, 0.122, 0.133],
[0.221, 0.161, 0.159],
[0.730, 0.440, 0.449],
[0.875, 0.744, 0.721],
[1.307, 1.259, 1.318],
[0.457, 0.401, 0.404],
[0.716, 0.688, 0.617],
[4.147, 4.251, 3.844],
[2.082, 1.950, 2.187],
[2.109, 2.095, 1.930],
[0.875, 0.851, 0.848],
[0.233, 0.235, 0.221],
[0.103, 0.087, 0.086],
[0.087, 0.078, 0.078],
[0.452, 0.407, 0.403],
[0.047, 0.041, 0.054],
[0.036, 0.034, 0.035],
[0.013, 0.010, 0.010]
]
},
{
"system": "E5-2650 v2 @ 2.60GHz, 2 sockets, 16 threads, 8xHDD RAID-5",
"data_size": 100000000,
@ -642,6 +802,110 @@ var results =
[0.075, 0.013, 0.013]
]
},
{
"system": "Time4vps.eu VPS (KVM) Linux Ubuntu 4 Core (Skylake) 16GB RAM 160GB Disk",
"data_size": 100000000,
"time": "2020-01-13 00:00:00",
"result":
[
[0.068, 0.002, 0.002],
[0.124, 0.021, 0.025],
[0.594, 0.089, 0.077],
[2.300, 0.133, 0.090],
[2.710, 0.205, 0.212],
[5.203, 0.603, 0.610],
[0.090, 0.029, 0.036],
[0.118, 0.021, 0.022],
[5.977, 1.295, 1.206],
[3.909, 1.415, 1.452],
[2.551, 0.336, 0.324],
[3.123, 0.446, 0.409],
[4.075, 1.743, 1.661],
[6.427, 2.499, 2.487],
[5.775, 2.156, 2.431],
[3.322, 2.288, 2.276],
[8.642, 6.463, 6.690],
[6.365, 3.852, 3.757],
[20.426, 13.849, 13.695],
[2.507, 0.105, 0.100],
[30.691, 1.747, 1.699],
[30.206, 2.010, 1.943],
[57.155, 4.699, 4.859],
[50.924, 2.173, 2.119],
[10.907, 0.660, 0.686],
[3.636, 0.505, 0.524],
[8.388, 0.683, 0.627],
[27.423, 1.650, 1.703],
[21.309, 2.824, 2.821],
[4.227, 4.053, 4.037],
[8.198, 1.797, 1.776],
[18.853, 2.927, 2.881],
[22.254, 21.156, 20.854],
[29.323, 8.728, 8.621],
[27.889, 8.759, 9.063],
[4.121, 3.837, 3.934],
[0.452, 0.292, 0.247],
[0.221, 0.093, 0.090],
[0.331, 0.069, 0.074],
[0.703, 0.469, 0.506],
[0.211, 0.026, 0.027],
[0.134, 0.021, 0.021],
[0.121, 0.007, 0.006]
]
},
{
"system": "Lenovo B580 Laptop (i5-3210M)",
"data_size": 100000000,
"time": "2020-01-11 00:00:00",
"result":
[
[0.035, 0.003, 0.005],
[0.093, 0.064, 0.060],
[0.265, 0.170, 0.167],
[0.880, 0.251, 0.266],
[0.954, 0.593, 0.561],
[2.140, 1.506, 1.525],
[0.148, 0.096, 0.105],
[0.064, 0.048, 0.044],
[2.727, 2.330, 2.280],
[3.386, 3.210, 2.951],
[1.218, 0.787, 0.749],
[1.293, 0.915, 0.904],
[3.713, 3.224, 3.190],
[4.943, 4.338, 4.310],
[4.503, 3.999, 3.918],
[4.001, 3.686, 4.144],
[10.714, 10.011, 10.035],
[7.456, 6.556, 6.675],
[20.201, 19.238, 19.135],
[0.888, 0.217, 0.209],
[9.685, 4.144, 4.023],
[11.201, 4.648, 4.636],
[21.037, 10.712, 10.571],
[18.186, 4.743, 4.743],
[2.844, 1.379, 1.358],
[1.623, 1.138, 1.130],
[2.861, 1.394, 1.417],
[9.691, 4.191, 4.129],
[10.285, 7.381, 7.379],
[6.879, 6.871, 6.829],
[4.131, 3.336, 3.240],
[7.157, 4.666, 4.616],
[29.371, 36.392, 29.946],
[17.929, 14.223, 14.127],
[17.058, 13.998, 14.055],
[5.667, 5.460, 5.408],
[0.325, 0.230, 0.217],
[0.115, 0.101, 0.094],
[0.148, 0.093, 0.084],
[0.585, 0.464, 0.459],
[0.078, 0.042, 0.035],
[0.057, 0.038, 0.032],
[0.024, 0.011, 0.010]
]
},
];
</script>
@ -991,7 +1255,7 @@ function generate_diagram() {
var total_ratio = +$("#absolute_totals" + j).attr("data-ratio");
html += "<tr>";
html += "<td style='text-align: right;'><b>" + filtered_results[j].system + "</b>" +
html += "<td class='diagram-system-name'>" + filtered_results[j].system +
(filtered_results[j].version ? "<br />(" + filtered_results[j].version.replace(/ /g, '&nbsp;') + ")" : "") + "</td>";
html += "<td style='width: 100%; padding-right: 20px;'>";
@ -1081,6 +1345,9 @@ try { var yaCounter18343495 = new Ya.Metrika({id:18343495,
<div class='island'>
Results for Lenovo B580 Laptop are from <b>Ragıp Ünal</b>. 16GB RAM 1600 GHz, 240GB SSD, Intel(R) Core(TM) i5-3210M CPU @ 2.50GHz (2 Core / 4 HT)<br/>
Results for Time4vps.eu are from <b>Ragıp Ünal</b>.<br/>
Results for Dell PowerEdge™ (in Hetzner) are from <b>Dmirty Titov</b>.<br/>
Xeon Gold 6230 server is using 4 x SAMSUNG datacenter class SSD in RAID 10.<br/>
Submit your own results: <a href="https://clickhouse.yandex/docs/en/operations/performance_test/">https://clickhouse.yandex/docs/en/operations/performance_test/</a>
</div>