This commit is contained in:
Evgeniy Gatov 2014-11-01 21:34:42 +03:00
commit d23613a7e3
65 changed files with 2351 additions and 499 deletions

View File

@ -113,21 +113,21 @@ public:
return res;
}
std::string getName() const { return "ColumnAggregateFunction"; }
std::string getName() const override { return "ColumnAggregateFunction"; }
size_t sizeOfField() const { return sizeof(getData()[0]); }
size_t sizeOfField() const override { return sizeof(getData()[0]); }
size_t size() const
size_t size() const override
{
return getData().size();
}
ColumnPtr cloneEmpty() const
ColumnPtr cloneEmpty() const override
{
return new ColumnAggregateFunction(holder->func, Arenas(1, new Arena));
};
Field operator[](size_t n) const
Field operator[](size_t n) const override
{
Field field = String();
{
@ -137,7 +137,7 @@ public:
return field;
}
void get(size_t n, Field & res) const
void get(size_t n, Field & res) const override
{
res = String();
{
@ -146,17 +146,17 @@ public:
}
}
StringRef getDataAt(size_t n) const
StringRef getDataAt(size_t n) const override
{
return StringRef(reinterpret_cast<const char *>(&getData()[n]), sizeof(getData()[n]));
}
void insertData(const char * pos, size_t length)
void insertData(const char * pos, size_t length) override
{
getData().push_back(*reinterpret_cast<const AggregateDataPtr *>(pos));
}
void insertFrom(const IColumn & src, size_t n)
void insertFrom(const IColumn & src, size_t n) override
{
getData().push_back(static_cast<const ColumnAggregateFunction &>(src).getData()[n]);
}
@ -167,7 +167,7 @@ public:
holder.get()->func.get()->merge(getData().back(), static_cast<const ColumnAggregateFunction &>(src).getData()[n]);
}
void insert(const Field & x)
void insert(const Field & x) override
{
IAggregateFunction * function = holder.get()->func;
@ -177,17 +177,17 @@ public:
function->deserializeMerge(getData().back(), read_buffer);
}
void insertDefault()
void insertDefault() override
{
throw Exception("Method insertDefault is not supported for ColumnAggregateFunction.", ErrorCodes::NOT_IMPLEMENTED);
}
size_t byteSize() const
size_t byteSize() const override
{
return getData().size() * sizeof(getData()[0]);
}
ColumnPtr cut(size_t start, size_t length) const
ColumnPtr cut(size_t start, size_t length) const override
{
if (start + length > getData().size())
throw Exception("Parameters start = "
@ -205,7 +205,7 @@ public:
return res;
}
ColumnPtr filter(const Filter & filter) const
ColumnPtr filter(const Filter & filter) const override
{
size_t size = getData().size();
if (size != filter.size())
@ -225,7 +225,7 @@ public:
return res;
}
ColumnPtr permute(const Permutation & perm, size_t limit) const
ColumnPtr permute(const Permutation & perm, size_t limit) const override
{
size_t size = getData().size();
@ -247,22 +247,22 @@ public:
return res;
}
ColumnPtr replicate(const Offsets_t & offsets) const
ColumnPtr replicate(const Offsets_t & offsets) const override
{
throw Exception("Method replicate is not supported for ColumnAggregateFunction.", ErrorCodes::NOT_IMPLEMENTED);
}
void getExtremes(Field & min, Field & max) const
void getExtremes(Field & min, Field & max) const override
{
throw Exception("Method getExtremes is not supported for ColumnAggregateFunction.", ErrorCodes::NOT_IMPLEMENTED);
}
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override
{
return 0;
}
void getPermutation(bool reverse, size_t limit, Permutation & res) const
void getPermutation(bool reverse, size_t limit, Permutation & res) const override
{
size_t s = getData().size();
res.resize(s);

View File

@ -42,19 +42,19 @@ public:
}
}
std::string getName() const { return "ColumnArray(" + data->getName() + ")"; }
std::string getName() const override { return "ColumnArray(" + data->getName() + ")"; }
ColumnPtr cloneEmpty() const
ColumnPtr cloneEmpty() const override
{
return new ColumnArray(data->cloneEmpty());
}
size_t size() const
size_t size() const override
{
return getOffsets().size();
}
Field operator[](size_t n) const
Field operator[](size_t n) const override
{
size_t offset = offsetAt(n);
size_t size = sizeAt(n);
@ -66,7 +66,7 @@ public:
return res;
}
void get(size_t n, Field & res) const
void get(size_t n, Field & res) const override
{
size_t offset = offsetAt(n);
size_t size = sizeAt(n);
@ -77,7 +77,7 @@ public:
data->get(offset + i, res_arr[i]);
}
StringRef getDataAt(size_t n) const
StringRef getDataAt(size_t n) const override
{
/** Работает для массивов значений фиксированной длины.
* Для массивов строк и массивов массивов полученный кусок памяти может не взаимно-однозначно соответствовать элементам.
@ -87,7 +87,7 @@ public:
return StringRef(begin.data, end.data - begin.data);
}
void insertData(const char * pos, size_t length)
void insertData(const char * pos, size_t length) override
{
/** Аналогично - только для массивов значений фиксированной длины.
*/
@ -98,14 +98,17 @@ public:
size_t field_size = data_->sizeOfField();
const char * end = pos + length;
for (; pos + field_size <= end; pos += field_size)
size_t elems = 0;
for (; pos + field_size <= end; pos += field_size, ++elems)
data_->insertData(pos, field_size);
if (pos != end)
throw Exception("Incorrect length argument for method ColumnArray::insertData", ErrorCodes::BAD_ARGUMENTS);
getOffsets().push_back((getOffsets().size() == 0 ? 0 : getOffsets().back()) + elems);
}
ColumnPtr cut(size_t start, size_t length) const
ColumnPtr cut(size_t start, size_t length) const override
{
if (length == 0)
return new ColumnArray(data);
@ -138,7 +141,7 @@ public:
return res;
}
void insert(const Field & x)
void insert(const Field & x) override
{
const Array & array = DB::get<const Array &>(x);
size_t size = array.size();
@ -147,7 +150,7 @@ public:
getOffsets().push_back((getOffsets().size() == 0 ? 0 : getOffsets().back()) + size);
}
void insertFrom(const IColumn & src_, size_t n)
void insertFrom(const IColumn & src_, size_t n) override
{
const ColumnArray & src = static_cast<const ColumnArray &>(src_);
size_t size = src.sizeAt(n);
@ -159,12 +162,12 @@ public:
getOffsets().push_back((getOffsets().size() == 0 ? 0 : getOffsets().back()) + size);
}
void insertDefault()
void insertDefault() override
{
getOffsets().push_back(getOffsets().size() == 0 ? 0 : getOffsets().back());
}
ColumnPtr filter(const Filter & filt) const
ColumnPtr filter(const Filter & filt) const override
{
size_t size = getOffsets().size();
if (size != filt.size())
@ -203,7 +206,7 @@ public:
return res;
}
ColumnPtr permute(const Permutation & perm, size_t limit) const
ColumnPtr permute(const Permutation & perm, size_t limit) const override
{
size_t size = getOffsets().size();
@ -241,7 +244,7 @@ public:
return res;
}
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const final
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override
{
const ColumnArray & rhs = static_cast<const ColumnArray &>(rhs_);
@ -276,7 +279,7 @@ public:
}
};
void getPermutation(bool reverse, size_t limit, Permutation & res) const
void getPermutation(bool reverse, size_t limit, Permutation & res) const override
{
size_t s = size();
if (limit >= s)
@ -302,18 +305,18 @@ public:
}
}
void reserve(size_t n)
void reserve(size_t n) override
{
getOffsets().reserve(n);
getData().reserve(n); /// Средний размер массивов тут никак не учитывается. Или считается, что он не больше единицы.
}
size_t byteSize() const
size_t byteSize() const override
{
return data->byteSize() + getOffsets().size() * sizeof(getOffsets()[0]);
}
void getExtremes(Field & min, Field & max) const
void getExtremes(Field & min, Field & max) const override
{
min = Array();
max = Array();
@ -350,7 +353,7 @@ public:
const ColumnPtr & getOffsetsColumn() const { return offsets; }
ColumnPtr replicate(const Offsets_t & replicate_offsets) const
ColumnPtr replicate(const Offsets_t & replicate_offsets) const override
{
/// Не получается реализовать в общем случае.

View File

@ -19,7 +19,7 @@ using Poco::SharedPtr;
class IColumnConst : public IColumn
{
public:
bool isConst() const { return true; }
bool isConst() const override { return true; }
virtual ColumnPtr convertToFullColumn() const = 0;
};
@ -37,21 +37,21 @@ public:
/// Для ColumnConst<String> data_type_ должен быть ненулевым, если тип данных FixedString.
ColumnConst(size_t s_, const T & data_, DataTypePtr data_type_ = DataTypePtr()) : s(s_), data(data_), data_type(data_type_) {}
std::string getName() const { return "ColumnConst<" + TypeName<T>::get() + ">"; }
bool isNumeric() const { return IsNumber<T>::value; }
bool isFixed() const { return IsNumber<T>::value; }
size_t sizeOfField() const { return sizeof(T); }
ColumnPtr cloneResized(size_t s_) const { return new ColumnConst(s_, data); }
size_t size() const { return s; }
Field operator[](size_t n) const { return FieldType(data); }
void get(size_t n, Field & res) const { res = FieldType(data); }
std::string getName() const override { return "ColumnConst<" + TypeName<T>::get() + ">"; }
bool isNumeric() const override { return IsNumber<T>::value; }
bool isFixed() const override { return IsNumber<T>::value; }
size_t sizeOfField() const override { return sizeof(T); }
ColumnPtr cloneResized(size_t s_) const override { return new ColumnConst(s_, data); }
size_t size() const override { return s; }
Field operator[](size_t n) const override { return FieldType(data); }
void get(size_t n, Field & res) const override { res = FieldType(data); }
ColumnPtr cut(size_t start, size_t length) const
ColumnPtr cut(size_t start, size_t length) const override
{
return new ColumnConst<T>(length, data, data_type);
}
void insert(const Field & x)
void insert(const Field & x) override
{
if (x.get<FieldType>() != FieldType(data))
throw Exception("Cannot insert different element into constant column " + getName(),
@ -59,12 +59,12 @@ public:
++s;
}
void insertData(const char * pos, size_t length)
void insertData(const char * pos, size_t length) override
{
throw Exception("Cannot insert element into constant column " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
void insertFrom(const IColumn & src, size_t n)
void insertFrom(const IColumn & src, size_t n) override
{
if (data != static_cast<const ColumnConst<T> &>(src).data)
throw Exception("Cannot insert different element into constant column " + getName(),
@ -72,9 +72,9 @@ public:
++s;
}
void insertDefault() { ++s; }
void insertDefault() override { ++s; }
ColumnPtr filter(const Filter & filt) const
ColumnPtr filter(const Filter & filt) const override
{
if (s != filt.size())
throw Exception("Size of filter doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
@ -82,7 +82,7 @@ public:
return new ColumnConst<T>(countBytesInFilter(filt), data, data_type);
}
ColumnPtr replicate(const Offsets_t & offsets) const
ColumnPtr replicate(const Offsets_t & offsets) const override
{
if (s != offsets.size())
throw Exception("Size of offsets doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
@ -91,9 +91,9 @@ public:
return new ColumnConst<T>(replicated_size, data, data_type);
}
size_t byteSize() const { return sizeof(data) + sizeof(s); }
size_t byteSize() const override { return sizeof(data) + sizeof(s); }
ColumnPtr permute(const Permutation & perm, size_t limit) const
ColumnPtr permute(const Permutation & perm, size_t limit) const override
{
if (limit == 0)
limit = s;
@ -106,7 +106,7 @@ public:
return new ColumnConst<T>(limit, data, data_type);
}
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override
{
const ColumnConst<T> & rhs = static_cast<const ColumnConst<T> &>(rhs_);
return data < rhs.data /// TODO: правильное сравнение NaN-ов в константных столбцах.
@ -116,30 +116,33 @@ public:
: 1);
}
void getPermutation(bool reverse, size_t limit, Permutation & res) const
void getPermutation(bool reverse, size_t limit, Permutation & res) const override
{
res.resize(s);
for (size_t i = 0; i < s; ++i)
res[i] = i;
}
StringRef getDataAt(size_t n) const;
StringRef getDataAtWithTerminatingZero(size_t n) const;
UInt64 get64(size_t n) const;
StringRef getDataAt(size_t n) const override;
StringRef getDataAtWithTerminatingZero(size_t n) const override;
UInt64 get64(size_t n) const override;
/** Более эффективные методы манипуляции */
T & getData() { return data; }
const T & getData() const { return data; }
/** Преобразование из константы в полноценный столбец */
ColumnPtr convertToFullColumn() const;
ColumnPtr convertToFullColumn() const override;
void getExtremes(Field & min, Field & max) const
void getExtremes(Field & min, Field & max) const override
{
min = FieldType(data);
max = FieldType(data);
}
DataTypePtr & getDataType() { return data_type; }
const DataTypePtr & getDataType() const { return data_type; }
private:
size_t s;
T data;

View File

@ -19,8 +19,8 @@ public:
ColumnExpression(size_t s_, ExpressionActionsPtr expression_, const NamesAndTypesList & arguments_, DataTypePtr return_type_, std::string return_name_)
: IColumnDummy(s_), expression(expression_), arguments(arguments_.begin(), arguments_.end()), return_type(return_type_), return_name(return_name_) {}
std::string getName() const { return "ColumnExpression"; }
ColumnPtr cloneDummy(size_t s_) const { return new ColumnExpression(s_, expression, arguments, return_type, return_name); }
std::string getName() const override { return "ColumnExpression"; }
ColumnPtr cloneDummy(size_t s_) const override { return new ColumnExpression(s_, expression, arguments, return_type, return_name); }
const ExpressionActionsPtr & getExpression() const { return expression; }
const DataTypePtr & getReturnType() const { return return_type; }

View File

@ -27,49 +27,49 @@ public:
/** Создать пустой столбец строк фиксированной длины n */
ColumnFixedString(size_t n_) : n(n_) {}
std::string getName() const { return "ColumnFixedString"; }
std::string getName() const override { return "ColumnFixedString"; }
ColumnPtr cloneEmpty() const
ColumnPtr cloneEmpty() const override
{
return new ColumnFixedString(n);
}
size_t size() const
size_t size() const override
{
return chars.size() / n;
}
size_t sizeOfField() const
size_t sizeOfField() const override
{
return n;
}
bool isFixed() const
bool isFixed() const override
{
return true;
}
size_t byteSize() const
size_t byteSize() const override
{
return chars.size() + sizeof(n);
}
Field operator[](size_t index) const
Field operator[](size_t index) const override
{
return String(reinterpret_cast<const char *>(&chars[n * index]), n);
}
void get(size_t index, Field & res) const
void get(size_t index, Field & res) const override
{
res.assignString(reinterpret_cast<const char *>(&chars[n * index]), n);
}
StringRef getDataAt(size_t index) const
StringRef getDataAt(size_t index) const override
{
return StringRef(&chars[n * index], n);
}
void insert(const Field & x)
void insert(const Field & x) override
{
const String & s = DB::get<const String &>(x);
@ -81,7 +81,7 @@ public:
memcpy(&chars[old_size], s.data(), s.size());
}
void insertFrom(const IColumn & src_, size_t index)
void insertFrom(const IColumn & src_, size_t index) override
{
const ColumnFixedString & src = static_cast<const ColumnFixedString &>(src_);
@ -93,7 +93,7 @@ public:
memcpy(&chars[old_size], &src.chars[n * index], n);
}
void insertData(const char * pos, size_t length)
void insertData(const char * pos, size_t length) override
{
if (length > n)
throw Exception("Too large string for FixedString column", ErrorCodes::TOO_LARGE_STRING_SIZE);
@ -103,12 +103,12 @@ public:
memcpy(&chars[old_size], pos, length);
}
void insertDefault()
void insertDefault() override
{
chars.resize_fill(chars.size() + n);
}
int compareAt(size_t p1, size_t p2, const IColumn & rhs_, int nan_direction_hint) const
int compareAt(size_t p1, size_t p2, const IColumn & rhs_, int nan_direction_hint) const override
{
const ColumnFixedString & rhs = static_cast<const ColumnFixedString &>(rhs_);
return memcmp(&chars[p1 * n], &rhs.chars[p2 * n], n);
@ -127,7 +127,7 @@ public:
}
};
void getPermutation(bool reverse, size_t limit, Permutation & res) const
void getPermutation(bool reverse, size_t limit, Permutation & res) const override
{
size_t s = size();
res.resize(s);
@ -153,7 +153,7 @@ public:
}
}
ColumnPtr cut(size_t start, size_t length) const
ColumnPtr cut(size_t start, size_t length) const override
{
ColumnFixedString * res_ = new ColumnFixedString(n);
ColumnPtr res = res_;
@ -162,7 +162,7 @@ public:
return res;
}
ColumnPtr filter(const IColumn::Filter & filt) const
ColumnPtr filter(const IColumn::Filter & filt) const override
{
size_t col_size = size();
if (col_size != filt.size())
@ -185,7 +185,7 @@ public:
return res;
}
ColumnPtr permute(const Permutation & perm, size_t limit) const
ColumnPtr permute(const Permutation & perm, size_t limit) const override
{
size_t col_size = size();
@ -214,7 +214,7 @@ public:
return res;
}
ColumnPtr replicate(const Offsets_t & offsets) const
ColumnPtr replicate(const Offsets_t & offsets) const override
{
size_t col_size = size();
if (col_size != offsets.size())
@ -243,7 +243,7 @@ public:
return res;
}
void getExtremes(Field & min, Field & max) const
void getExtremes(Field & min, Field & max) const override
{
min = String();
max = String();

View File

@ -47,7 +47,7 @@ public:
}
}
std::string getName() const
std::string getName() const override
{
std::string res;
{
@ -63,7 +63,7 @@ public:
return "ColumnNested(" + res + ")";
}
ColumnPtr cloneEmpty() const
ColumnPtr cloneEmpty() const override
{
Columns res(data.size());
for (size_t i = 0; i < data.size(); ++i)
@ -71,32 +71,32 @@ public:
return new ColumnNested(res);
}
size_t size() const
size_t size() const override
{
return getOffsets().size();
}
Field operator[](size_t n) const
Field operator[](size_t n) const override
{
throw Exception("Method operator[] is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
void get(size_t n, Field & res) const
void get(size_t n, Field & res) const override
{
throw Exception("Method get is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
StringRef getDataAt(size_t n) const
StringRef getDataAt(size_t n) const override
{
throw Exception("Method getDataAt is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
void insertData(const char * pos, size_t length)
void insertData(const char * pos, size_t length) override
{
throw Exception("Method insertData is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
ColumnPtr cut(size_t start, size_t length) const
ColumnPtr cut(size_t start, size_t length) const override
{
if (length == 0)
return new ColumnNested(data);
@ -131,12 +131,12 @@ public:
return res;
}
void insert(const Field & x)
void insert(const Field & x) override
{
throw Exception("Method insert is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
void insertFrom(const IColumn & src_, size_t n)
void insertFrom(const IColumn & src_, size_t n) override
{
const ColumnNested & src = static_cast<const ColumnNested &>(src_);
@ -158,14 +158,14 @@ public:
getOffsets().push_back((getOffsets().size() == 0 ? 0 : getOffsets().back()) + size);
}
void insertDefault()
void insertDefault() override
{
for (size_t i = 0; i < data.size(); ++i)
data[i]->insertDefault();
getOffsets().push_back(getOffsets().size() == 0 ? 1 : (getOffsets().back() + 1));
}
ColumnPtr filter(const Filter & filt) const
ColumnPtr filter(const Filter & filt) const override
{
size_t size = getOffsets().size();
if (size != filt.size())
@ -205,12 +205,12 @@ public:
return res;
}
ColumnPtr replicate(const Offsets_t & offsets) const
ColumnPtr replicate(const Offsets_t & offsets) const override
{
throw Exception("Replication of ColumnNested is not implemented.", ErrorCodes::NOT_IMPLEMENTED);
}
ColumnPtr permute(const Permutation & perm, size_t limit) const
ColumnPtr permute(const Permutation & perm, size_t limit) const override
{
size_t size = getOffsets().size();
if (size != perm.size())
@ -255,24 +255,24 @@ public:
return res;
}
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override
{
throw Exception("Method compareAt is not supported for ColumnNested.", ErrorCodes::NOT_IMPLEMENTED);
}
void getPermutation(bool reverse, size_t limit, Permutation & res) const
void getPermutation(bool reverse, size_t limit, Permutation & res) const override
{
throw Exception("Method getPermutation is not supported for ColumnNested.", ErrorCodes::NOT_IMPLEMENTED);
}
void reserve(size_t n)
void reserve(size_t n) override
{
getOffsets().reserve(n);
for (Columns::iterator it = data.begin(); it != data.end(); ++it)
(*it)->reserve(n);
}
size_t byteSize() const
size_t byteSize() const override
{
size_t size = getOffsets().size() * sizeof(getOffsets()[0]);
for (Columns::const_iterator it = data.begin(); it != data.end(); ++it)
@ -280,7 +280,7 @@ public:
return size;
}
void getExtremes(Field & min, Field & max) const
void getExtremes(Field & min, Field & max) const override
{
throw Exception("Method getExtremes is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
@ -289,9 +289,6 @@ public:
Columns & getData() { return data; }
const Columns & getData() const { return data; }
// ColumnPtr & getDataPtr() { return data; }
// const ColumnPtr & getDataPtr() const { return data; }
Offsets_t & ALWAYS_INLINE getOffsets()
{
return static_cast<ColumnOffsets_t &>(*offsets.get()).getData();

View File

@ -14,8 +14,8 @@ class ColumnReplicated final : public IColumnDummy
{
public:
ColumnReplicated(size_t s_, ColumnPtr nested_) : IColumnDummy(s_), nested(nested_) {}
std::string getName() const { return "ColumnReplicated"; }
ColumnPtr cloneDummy(size_t s_) const { return new ColumnReplicated(s_, nested); }
std::string getName() const override { return "ColumnReplicated"; }
ColumnPtr cloneDummy(size_t s_) const override { return new ColumnReplicated(s_, nested); }
ColumnPtr & getData() { return nested; }
private:

View File

@ -17,10 +17,10 @@ public:
ColumnSet(size_t s_, SetPtr data_) : IColumnDummy(s_), data(data_) {}
/// Столбец не константный. Иначе столбец будет использоваться в вычислениях в ExpressionActions::prepare, когда множество из подзапроса ещё не готово.
bool isConst() const { return false; }
bool isConst() const override { return false; }
std::string getName() const { return "ColumnSet"; }
ColumnPtr cloneDummy(size_t s_) const { return new ColumnSet(s_, data); }
std::string getName() const override { return "ColumnSet"; }
ColumnPtr cloneDummy(size_t s_) const override { return new ColumnSet(s_, data); }
SetPtr & getData() { return data; }
const SetPtr & getData() const { return data; }

View File

@ -36,44 +36,44 @@ public:
/** Создать пустой столбец строк */
ColumnString() {}
std::string getName() const { return "ColumnString"; }
std::string getName() const override { return "ColumnString"; }
size_t size() const
size_t size() const override
{
return offsets.size();
}
size_t byteSize() const
size_t byteSize() const override
{
return chars.size() + offsets.size() * sizeof(offsets[0]);
}
ColumnPtr cloneEmpty() const
ColumnPtr cloneEmpty() const override
{
return new ColumnString;
}
Field operator[](size_t n) const
Field operator[](size_t n) const override
{
return Field(&chars[offsetAt(n)], sizeAt(n) - 1);
}
void get(size_t n, Field & res) const
void get(size_t n, Field & res) const override
{
res.assignString(&chars[offsetAt(n)], sizeAt(n) - 1);
}
StringRef getDataAt(size_t n) const
StringRef getDataAt(size_t n) const override
{
return StringRef(&chars[offsetAt(n)], sizeAt(n) - 1);
}
StringRef getDataAtWithTerminatingZero(size_t n) const
StringRef getDataAtWithTerminatingZero(size_t n) const override
{
return StringRef(&chars[offsetAt(n)], sizeAt(n));
}
void insert(const Field & x)
void insert(const Field & x) override
{
const String & s = DB::get<const String &>(x);
size_t old_size = chars.size();
@ -84,7 +84,7 @@ public:
offsets.push_back((offsets.size() == 0 ? 0 : offsets.back()) + size_to_append);
}
void insertFrom(const IColumn & src_, size_t n)
void insertFrom(const IColumn & src_, size_t n) override
{
const ColumnString & src = static_cast<const ColumnString &>(src_);
size_t old_size = chars.size();
@ -96,7 +96,7 @@ public:
offsets.push_back((offsets.size() == 0 ? 0 : offsets.back()) + size_to_append);
}
void insertData(const char * pos, size_t length)
void insertData(const char * pos, size_t length) override
{
size_t old_size = chars.size();
@ -106,7 +106,7 @@ public:
offsets.push_back((offsets.size() == 0 ? 0 : offsets.back()) + length + 1);
}
void insertDataWithTerminatingZero(const char * pos, size_t length)
void insertDataWithTerminatingZero(const char * pos, size_t length) override
{
size_t old_size = chars.size();
@ -115,7 +115,7 @@ public:
offsets.push_back((offsets.size() == 0 ? 0 : offsets.back()) + length);
}
ColumnPtr cut(size_t start, size_t length) const
ColumnPtr cut(size_t start, size_t length) const override
{
if (length == 0)
return new ColumnString;
@ -150,7 +150,7 @@ public:
return res;
}
ColumnPtr filter(const Filter & filt) const
ColumnPtr filter(const Filter & filt) const override
{
const size_t size = offsets.size();
if (size != filt.size())
@ -257,7 +257,7 @@ public:
return res_;
}
ColumnPtr permute(const Permutation & perm, size_t limit) const
ColumnPtr permute(const Permutation & perm, size_t limit) const override
{
size_t size = offsets.size();
@ -300,13 +300,13 @@ public:
return res;
}
void insertDefault()
void insertDefault() override
{
chars.push_back(0);
offsets.push_back(offsets.size() == 0 ? 1 : (offsets.back() + 1));
}
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override
{
const ColumnString & rhs = static_cast<const ColumnString &>(rhs_);
@ -344,7 +344,7 @@ public:
}
};
void getPermutation(bool reverse, size_t limit, Permutation & res) const
void getPermutation(bool reverse, size_t limit, Permutation & res) const override
{
size_t s = offsets.size();
res.resize(s);
@ -415,7 +415,7 @@ public:
}
}
ColumnPtr replicate(const Offsets_t & replicate_offsets) const
ColumnPtr replicate(const Offsets_t & replicate_offsets) const override
{
size_t col_size = size();
if (col_size != replicate_offsets.size())
@ -457,13 +457,13 @@ public:
return res;
}
void reserve(size_t n)
void reserve(size_t n) override
{
offsets.reserve(n);
chars.reserve(n * DBMS_APPROX_STRING_SIZE);
}
void getExtremes(Field & min, Field & max) const
void getExtremes(Field & min, Field & max) const override
{
min = String();
max = String();

View File

@ -26,21 +26,19 @@ public:
columns[i] = data.getByPosition(i).column;
}
std::string getName() const { return "Tuple"; }
std::string getName() const override { return "Tuple"; }
SharedPtr<IColumn> cloneEmpty() const
SharedPtr<IColumn> cloneEmpty() const override
{
return new ColumnTuple(data.cloneEmpty());
}
size_t size() const
size_t size() const override
{
return data.rows();
}
bool empty() const { return size() == 0; }
Field operator[](size_t n) const
Field operator[](size_t n) const override
{
Array res;
@ -50,7 +48,7 @@ public:
return res;
}
void get(size_t n, Field & res) const
void get(size_t n, Field & res) const override
{
size_t size = columns.size();
res = Array(size);
@ -59,17 +57,17 @@ public:
columns[i]->get(n, res_arr[i]);
}
StringRef getDataAt(size_t n) const
StringRef getDataAt(size_t n) const override
{
throw Exception("Method getDataAt is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
void insertData(const char * pos, size_t length)
void insertData(const char * pos, size_t length) override
{
throw Exception("Method insertData is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
void insert(const Field & x)
void insert(const Field & x) override
{
const Array & arr = DB::get<const Array &>(x);
@ -81,7 +79,7 @@ public:
columns[i]->insert(arr[i]);
}
void insertFrom(const IColumn & src_, size_t n)
void insertFrom(const IColumn & src_, size_t n) override
{
const ColumnTuple & src = static_cast<const ColumnTuple &>(src_);
@ -93,14 +91,14 @@ public:
columns[i]->insertFrom(*src.columns[i], n);
}
void insertDefault()
void insertDefault() override
{
for (Columns::iterator it = columns.begin(); it != columns.end(); ++it)
(*it)->insertDefault();
}
ColumnPtr cut(size_t start, size_t length) const
ColumnPtr cut(size_t start, size_t length) const override
{
Block res_block = data.cloneEmpty();
@ -110,7 +108,7 @@ public:
return new ColumnTuple(res_block);
}
ColumnPtr filter(const Filter & filt) const
ColumnPtr filter(const Filter & filt) const override
{
Block res_block = data.cloneEmpty();
@ -120,7 +118,7 @@ public:
return new ColumnTuple(res_block);
}
ColumnPtr permute(const Permutation & perm, size_t limit) const
ColumnPtr permute(const Permutation & perm, size_t limit) const override
{
Block res_block = data.cloneEmpty();
@ -130,7 +128,7 @@ public:
return new ColumnTuple(res_block);
}
ColumnPtr replicate(const Offsets_t & offsets) const
ColumnPtr replicate(const Offsets_t & offsets) const override
{
Block res_block = data.cloneEmpty();
@ -140,7 +138,7 @@ public:
return new ColumnTuple(res_block);
}
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override
{
size_t size = columns.size();
for (size_t i = 0; i < size; ++i)
@ -175,7 +173,7 @@ public:
}
};
void getPermutation(bool reverse, size_t limit, Permutation & res) const
void getPermutation(bool reverse, size_t limit, Permutation & res) const override
{
size_t rows = size();
res.resize(rows);
@ -201,13 +199,13 @@ public:
}
}
void reserve(size_t n)
void reserve(size_t n) override
{
for (Columns::iterator it = columns.begin(); it != columns.end(); ++it)
(*it)->reserve(n);
}
size_t byteSize() const
size_t byteSize() const override
{
size_t res = 0;
for (Columns::const_iterator it = columns.begin(); it != columns.end(); ++it)
@ -215,7 +213,7 @@ public:
return res;
}
void getExtremes(Field & min, Field & max) const
void getExtremes(Field & min, Field & max) const override
{
throw Exception("Method getExtremes is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}

View File

@ -77,7 +77,7 @@ template <> struct CompareHelper<Float64> : public FloatCompareHelper<Float64> {
/** Шаблон столбцов, которые используют для хранения простой массив.
*/
template <typename T>
class ColumnVector : public IColumn
class ColumnVector final : public IColumn
{
private:
typedef ColumnVector<T> Self;
@ -86,44 +86,45 @@ public:
typedef PODArray<value_type> Container_t;
ColumnVector() {}
ColumnVector(size_t n) : data(n) {}
ColumnVector(const size_t n) : data{n} {}
ColumnVector(const size_t n, const value_type x) : data{n, x} {}
bool isNumeric() const { return IsNumber<T>::value; }
bool isFixed() const { return IsNumber<T>::value; }
bool isNumeric() const override { return IsNumber<T>::value; }
bool isFixed() const override { return IsNumber<T>::value; }
size_t sizeOfField() const { return sizeof(T); }
size_t sizeOfField() const override { return sizeof(T); }
size_t size() const
size_t size() const override
{
return data.size();
}
StringRef getDataAt(size_t n) const
StringRef getDataAt(size_t n) const override
{
return StringRef(reinterpret_cast<const char *>(&data[n]), sizeof(data[n]));
}
void insertFrom(const IColumn & src, size_t n)
void insertFrom(const IColumn & src, size_t n) override
{
data.push_back(static_cast<const Self &>(src).getData()[n]);
}
void insertData(const char * pos, size_t length)
void insertData(const char * pos, size_t length) override
{
data.push_back(*reinterpret_cast<const T *>(pos));
}
void insertDefault()
void insertDefault() override
{
data.push_back(T());
}
size_t byteSize() const
size_t byteSize() const override
{
return data.size() * sizeof(data[0]);
}
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override
{
return CompareHelper<T>::compare(data[n], static_cast<const Self &>(rhs_).data[m], nan_direction_hint);
}
@ -142,7 +143,7 @@ public:
bool operator()(size_t lhs, size_t rhs) const { return CompareHelper<T>::greater(parent.data[lhs], parent.data[rhs]); }
};
void getPermutation(bool reverse, size_t limit, Permutation & res) const
void getPermutation(bool reverse, size_t limit, Permutation & res) const override
{
size_t s = data.size();
res.resize(s);
@ -168,36 +169,36 @@ public:
}
}
void reserve(size_t n)
void reserve(size_t n) override
{
data.reserve(n);
}
std::string getName() const { return "ColumnVector<" + TypeName<T>::get() + ">"; }
std::string getName() const override { return "ColumnVector<" + TypeName<T>::get() + ">"; }
ColumnPtr cloneEmpty() const
ColumnPtr cloneEmpty() const override
{
return new ColumnVector<T>;
}
Field operator[](size_t n) const
Field operator[](size_t n) const override
{
return typename NearestFieldType<T>::Type(data[n]);
}
void get(size_t n, Field & res) const
void get(size_t n, Field & res) const override
{
res = typename NearestFieldType<T>::Type(data[n]);
}
UInt64 get64(size_t n) const;
UInt64 get64(size_t n) const override;
void insert(const Field & x)
void insert(const Field & x) override
{
data.push_back(DB::get<typename NearestFieldType<T>::Type>(x));
}
ColumnPtr cut(size_t start, size_t length) const
ColumnPtr cut(size_t start, size_t length) const override
{
if (start + length > data.size())
throw Exception("Parameters start = "
@ -211,7 +212,7 @@ public:
return res;
}
ColumnPtr filter(const IColumn::Filter & filt) const
ColumnPtr filter(const IColumn::Filter & filt) const override
{
size_t size = data.size();
if (size != filt.size())
@ -269,7 +270,7 @@ public:
return res;
}
ColumnPtr permute(const IColumn::Permutation & perm, size_t limit) const
ColumnPtr permute(const IColumn::Permutation & perm, size_t limit) const override
{
size_t size = data.size();
@ -290,7 +291,7 @@ public:
return res;
}
ColumnPtr replicate(const IColumn::Offsets_t & offsets) const
ColumnPtr replicate(const IColumn::Offsets_t & offsets) const override
{
size_t size = data.size();
if (size != offsets.size())
@ -317,7 +318,7 @@ public:
return res;
}
void getExtremes(Field & min, Field & max) const
void getExtremes(Field & min, Field & max) const override
{
size_t size = data.size();

View File

@ -17,30 +17,30 @@ public:
virtual ColumnPtr cloneDummy(size_t s_) const = 0;
ColumnPtr cloneResized(size_t s_) const { return cloneDummy(s_); }
bool isConst() const { return true; }
size_t size() const { return s; }
void insertDefault() { ++s; }
size_t byteSize() const { return 0; }
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const { return 0; }
ColumnPtr cloneResized(size_t s_) const override { return cloneDummy(s_); }
bool isConst() const override { return true; }
size_t size() const override { return s; }
void insertDefault() override { ++s; }
size_t byteSize() const override { return 0; }
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override { return 0; }
Field operator[](size_t n) const { throw Exception("Cannot get value from " + getName(), ErrorCodes::NOT_IMPLEMENTED); }
void get(size_t n, Field & res) const { throw Exception("Cannot get value from " + getName(), ErrorCodes::NOT_IMPLEMENTED); };
void insert(const Field & x) { throw Exception("Cannot insert element into " + getName(), ErrorCodes::NOT_IMPLEMENTED); }
StringRef getDataAt(size_t n) const { throw Exception("Method getDataAt is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); }
void insertData(const char * pos, size_t length) { throw Exception("Method insertData is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); }
Field operator[](size_t n) const override { throw Exception("Cannot get value from " + getName(), ErrorCodes::NOT_IMPLEMENTED); }
void get(size_t n, Field & res) const override { throw Exception("Cannot get value from " + getName(), ErrorCodes::NOT_IMPLEMENTED); };
void insert(const Field & x) override { throw Exception("Cannot insert element into " + getName(), ErrorCodes::NOT_IMPLEMENTED); }
StringRef getDataAt(size_t n) const override { throw Exception("Method getDataAt is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); }
void insertData(const char * pos, size_t length) override { throw Exception("Method insertData is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); }
void getExtremes(Field & min, Field & max) const
void getExtremes(Field & min, Field & max) const override
{
throw Exception("Method getExtremes is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
ColumnPtr cut(size_t start, size_t length) const
ColumnPtr cut(size_t start, size_t length) const override
{
return cloneDummy(length);
}
ColumnPtr filter(const Filter & filt) const
ColumnPtr filter(const Filter & filt) const override
{
size_t new_size = 0;
for (Filter::const_iterator it = filt.begin(); it != filt.end(); ++it)
@ -50,7 +50,7 @@ public:
return cloneDummy(new_size);
}
ColumnPtr permute(const Permutation & perm, size_t limit) const
ColumnPtr permute(const Permutation & perm, size_t limit) const override
{
if (s != perm.size())
throw Exception("Size of permutation doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
@ -58,14 +58,14 @@ public:
return cloneDummy(limit ? std::min(s, limit) : s);
}
void getPermutation(bool reverse, size_t limit, Permutation & res) const
void getPermutation(bool reverse, size_t limit, Permutation & res) const override
{
res.resize(s);
for (size_t i = 0; i < s; ++i)
res[i] = i;
}
ColumnPtr replicate(const Offsets_t & offsets) const
ColumnPtr replicate(const Offsets_t & offsets) const override
{
if (s != offsets.size())
throw Exception("Size of offsets doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);

View File

@ -455,7 +455,7 @@ public:
const_iterator begin() const
{
if (this->hasZero())
return const_iterator(this, this->zeroValue());
return iteratorToZero();
const Cell * ptr = buf;
while (ptr < buf + grower.bufSize() && ptr->isZero(*this))
@ -467,7 +467,7 @@ public:
iterator begin()
{
if (this->hasZero())
return iterator(this, this->zeroValue());
return iteratorToZero();
Cell * ptr = buf;
while (ptr < buf + grower.bufSize() && ptr->isZero(*this))
@ -481,6 +481,10 @@ public:
protected:
const_iterator iteratorToZero() const { return const_iterator(this, this->zeroValue()); }
iterator iteratorToZero() { return iterator(this, this->zeroValue()); }
/// Если ключ нулевой - вставить его в специальное место и вернуть true.
bool emplaceIfZero(Key x, iterator & it, bool & inserted)
{
@ -490,17 +494,17 @@ protected:
if (Cell::isZero(x, *this))
{
it = iteratorToZero();
if (!this->hasZero())
{
++m_size;
this->setHasZero();
it.ptr->setHash(hash(x));
inserted = true;
}
else
inserted = false;
it = begin();
it.ptr->setHash(hash(x));
return true;
}
@ -583,7 +587,7 @@ public:
iterator find(Key x)
{
if (Cell::isZero(x, *this))
return this->hasZero() ? begin() : end();
return this->hasZero() ? iteratorToZero() : end();
size_t place_value = findCell(x, grower.place(hash(x)));
@ -594,7 +598,7 @@ public:
const_iterator find(Key x) const
{
if (Cell::isZero(x, *this))
return this->hasZero() ? begin() : end();
return this->hasZero() ? iteratorToZero() : end();
size_t place_value = findCell(x, grower.place(hash(x)));

View File

@ -158,7 +158,14 @@ public:
#undef SIPROUND
inline uint64_t sipHash64(const char * data, size_t size)
inline void sipHash128(const char * data, const size_t size, char * out)
{
SipHash hash;
hash.update(data, size);
hash.get128(out);
}
inline uint64_t sipHash64(const char * data, const size_t size)
{
SipHash hash;
hash.update(data, size);

View File

@ -0,0 +1,62 @@
#pragma once
#include <cstring>
#include <cmath>
#include <string>
#define UNICODE_BAR_CHAR_SIZE (strlen("█"))
/** Позволяет нарисовать unicode-art полоску, ширина которой отображается с разрешением 1/8 символа.
*/
namespace UnicodeBar
{
template <typename T>
double getWidth(T x, int64_t min, int64_t max, double max_width)
{
if (x <= min)
return 0;
if (x >= max)
return max_width;
return (x - min) * max_width / (max - min);
}
inline size_t getWidthInBytes(double width)
{
return ceil(width - 1.0 / 8) * UNICODE_BAR_CHAR_SIZE;
}
/// В dst должно быть место для barWidthInBytes(width) символов и завершающего нуля.
inline void render(double width, char * dst)
{
size_t floor_width = floor(width);
for (size_t i = 0; i < floor_width; ++i)
{
memcpy(dst, "", UNICODE_BAR_CHAR_SIZE);
dst += UNICODE_BAR_CHAR_SIZE;
}
size_t remainder = floor((width - floor_width) * 8);
if (remainder)
{
memcpy(dst, &"▏▎▍▌▋▋▊▉"[(remainder - 1) * UNICODE_BAR_CHAR_SIZE], UNICODE_BAR_CHAR_SIZE);
dst += UNICODE_BAR_CHAR_SIZE;
}
*dst = 0;
}
inline std::string render(double width)
{
std::string res(getWidthInBytes(width), '\0');
render(width, &res[0]);
return res;
}
}

View File

@ -0,0 +1,13 @@
#pragma once
#include <string>
/// Выводит переданный размер в байтах в виде 123.45 GiB.
std::string formatReadableSizeWithBinarySuffix(double value, int precision = 2);
/// Выводит переданный размер в байтах в виде 132.55 GB.
std::string formatReadableSizeWithDecimalSuffix(double value, int precision = 2);
/// Выводит число в виде 123.45 billion.
std::string formatReadableQuantity(double value, int precision = 2);

View File

@ -64,7 +64,9 @@
#define DBMS_MIN_REVISION_WITH_TOTALS_EXTREMES 35265
#define DBMS_MIN_REVISION_WITH_STRING_QUERY_ID 39002
#define DBMS_MIN_REVISION_WITH_TEMPORARY_TABLES 50264
#define DBMS_MIN_REVISION_WITH_TOTAL_ROWS_IN_PROGRESS 51554
#define DBMS_DISTRIBUTED_DIRECTORY_MONITOR_SLEEP_TIME_MS 100
#define ALWAYS_INLINE __attribute__((__always_inline__))
#define NO_INLINE __attribute__((__noinline__))

View File

@ -1,5 +1,6 @@
#pragma once
#include <DB/Core/Defines.h>
#include <DB/IO/ReadBuffer.h>
#include <DB/IO/WriteBuffer.h>
#include <DB/IO/ReadHelpers.h>
@ -10,25 +11,72 @@ namespace DB
{
/// Прогресс выполнения запроса
/** Прогресс выполнения запроса.
* Передаваемые по сети значения представляют собой разницу - сколько было сделано после предыдущего отправленного значения.
* Тот же объект используется для суммирования полученных значений.
*/
struct Progress
{
size_t rows; /// Строк обработано.
size_t bytes; /// Байт обработано.
size_t rows = 0; /// Строк обработано.
size_t bytes = 0; /// Байт обработано.
Progress() : rows(0), bytes(0) {}
Progress(size_t rows_, size_t bytes_) : rows(rows_), bytes(bytes_) {}
/** Сколько ещё строк надо обработать, приблизительно. Передаётся не ноль, когда возникает информация о какой-то новой части работы.
* Полученные значения надо суммровать, чтобы получить оценку общего количества строк для обработки.
* Используется для отображения прогресс-бара на клиенте.
*/
size_t total_rows = 0;
void read(ReadBuffer & in)
Progress() {}
Progress(size_t rows_, size_t bytes_, size_t total_rows_ = 0)
: rows(rows_), bytes(bytes_), total_rows(total_rows_) {}
void read(ReadBuffer & in, UInt64 server_revision)
{
readVarUInt(rows, in);
readVarUInt(bytes, in);
if (server_revision >= DBMS_MIN_REVISION_WITH_TOTAL_ROWS_IN_PROGRESS)
readVarUInt(total_rows, in);
}
void write(WriteBuffer & out)
void write(WriteBuffer & out, UInt64 client_revision) const
{
writeVarUInt(rows, out);
writeVarUInt(bytes, out);
if (client_revision >= DBMS_MIN_REVISION_WITH_TOTAL_ROWS_IN_PROGRESS)
writeVarUInt(total_rows, out);
}
void increment(const Progress & rhs)
{
rows += rhs.rows;
bytes += rhs.bytes;
total_rows += rhs.total_rows;
}
/// Каждое значение по-отдельности изменяется атомарно.
void incrementPiecewiseAtomically(const Progress & rhs)
{
__sync_add_and_fetch(&rows, rhs.rows);
__sync_add_and_fetch(&bytes, rhs.bytes);
__sync_add_and_fetch(&total_rows, rhs.total_rows);
}
void reset()
{
*this = Progress();
}
Progress fetchAndResetPiecewiseAtomically()
{
Progress res;
res.rows = __sync_fetch_and_and(&rows, 0);
res.bytes = __sync_fetch_and_and(&bytes, 0);
res.total_rows = __sync_fetch_and_and(&total_rows, 0);
return res;
}
};

View File

@ -5,6 +5,7 @@
#include <Poco/SharedPtr.h>
#include <DB/Core/Block.h>
#include <DB/Core/Progress.h>
#include <DB/Storages/IStorage.h>
@ -18,7 +19,7 @@ using Poco::SharedPtr;
* Функция принимает количество строк в последнем блоке, количество байт в последнем блоке.
* Следует иметь ввиду, что колбэк может вызываться из разных потоков.
*/
typedef std::function<void(size_t, size_t)> ProgressCallback;
typedef std::function<void(const Progress & progress)> ProgressCallback;
/** Интерфейс потока для чтения данных по блокам из БД.

View File

@ -5,6 +5,7 @@
#include <statdaemons/Stopwatch.h>
#include <DB/Core/Names.h>
#include <DB/Core/Progress.h>
#include <DB/Interpreters/Limits.h>
#include <DB/Interpreters/Quota.h>
@ -109,8 +110,8 @@ public:
* - проверяются ограничения и квоты, которые должны быть проверены не в рамках одного источника,
* а над общим количеством потраченных ресурсов во всех источниках сразу (информация в ProcessList-е).
*/
virtual void progress(size_t rows, size_t bytes) { progressImpl(rows, bytes); }
void progressImpl(size_t rows, size_t bytes);
virtual void progress(const Progress & value) { progressImpl(value); }
void progressImpl(const Progress & value);
/** Установить указатель на элемент списка процессов.

View File

@ -66,7 +66,7 @@ public:
/** Отменяем умолчальное уведомление о прогрессе,
* так как колбэк прогресса вызывается самостоятельно.
*/
void progress(size_t rows, size_t bytes) {}
void progress(const Progress & value) override {}
void cancel()
@ -156,7 +156,7 @@ protected:
* ограничений (например, минимальная скорость выполнения запроса)
* и квот (например, на количество строчек для чтения).
*/
progressImpl(packet.progress.rows, packet.progress.bytes);
progressImpl(packet.progress);
if (!was_cancelled && !finished && isCancelled())
cancel();

View File

@ -16,6 +16,8 @@ using Poco::SharedPtr;
class DataTypeString : public IDataType
{
public:
using FieldType = String;
std::string getName() const
{
return "String";

View File

@ -2,6 +2,10 @@
#include <DB/DataTypes/DataTypeArray.h>
#include <DB/DataTypes/DataTypesNumberFixed.h>
#include <DB/DataTypes/DataTypeDate.h>
#include <DB/DataTypes/DataTypeDateTime.h>
#include <DB/DataTypes/DataTypeString.h>
#include <DB/Columns/ColumnArray.h>
#include <DB/Columns/ColumnString.h>
@ -1190,6 +1194,43 @@ private:
};
template <typename Type> struct TypeToColumnType { using ColumnType = ColumnVector<Type>; };
template <> struct TypeToColumnType<String> { using ColumnType = ColumnString; };
template <typename DataType> struct DataTypeToName : TypeName<typename DataType::FieldType> { };
template <> struct DataTypeToName<DataTypeDate> { static std::string get() { return "Date"; } };
template <> struct DataTypeToName<DataTypeDateTime> { static std::string get() { return "DateTime"; } };
template <typename DataType>
struct EmptyArray : public IFunction
{
String getName() const
{
return "emptyArray" + DataTypeToName<DataType>::get();
}
DataTypePtr getReturnType(const DataTypes & arguments) const
{
if (arguments.size() != 0)
throw Exception("Number of arguments for function " + getName() + " doesn't match: passed "
+ toString(arguments.size()) + ", should be 0.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
return new DataTypeArray{new DataType{}};
}
void execute(Block & block, const ColumnNumbers & arguments, size_t result)
{
using UnderlyingColumnType = typename TypeToColumnType<typename DataType::FieldType>::ColumnType;
block.getByPosition(result).column = new ColumnArray{
new UnderlyingColumnType,
new ColumnArray::ColumnOffsets_t{block.rowsInFirstColumn(), 0}
};
}
};
struct NameHas { static const char * get() { return "has"; } };
struct NameIndexOf { static const char * get() { return "indexOf"; } };
struct NameCountEqual { static const char * get() { return "countEqual"; } };
@ -1198,5 +1239,19 @@ typedef FunctionArrayIndex<IndexToOne, NameHas> FunctionHas;
typedef FunctionArrayIndex<IndexIdentity, NameIndexOf> FunctionIndexOf;
typedef FunctionArrayIndex<IndexCount, NameCountEqual> FunctionCountEqual;
using FunctionEmptyArrayUInt8 = EmptyArray<DataTypeUInt8>;
using FunctionEmptyArrayUInt16 = EmptyArray<DataTypeUInt16>;
using FunctionEmptyArrayUInt32 = EmptyArray<DataTypeUInt32>;
using FunctionEmptyArrayUInt64 = EmptyArray<DataTypeUInt64>;
using FunctionEmptyArrayInt8 = EmptyArray<DataTypeInt8>;
using FunctionEmptyArrayInt16 = EmptyArray<DataTypeInt16>;
using FunctionEmptyArrayInt32 = EmptyArray<DataTypeInt32>;
using FunctionEmptyArrayInt64 = EmptyArray<DataTypeInt64>;
using FunctionEmptyArrayFloat32 = EmptyArray<DataTypeFloat32>;
using FunctionEmptyArrayFloat64 = EmptyArray<DataTypeFloat64>;
using FunctionEmptyArrayDate = EmptyArray<DataTypeDate>;
using FunctionEmptyArrayDateTime = EmptyArray<DataTypeDateTime>;
using FunctionEmptyArrayString = EmptyArray<DataTypeString>;
}

View File

@ -13,6 +13,10 @@
#include <DB/Columns/ColumnConst.h>
#include <DB/Functions/IFunction.h>
#include <arpa/inet.h>
#include <statdaemons/ext/range.hpp>
#include <array>
namespace DB
{
@ -35,6 +39,426 @@ namespace DB
/// Включая нулевой символ в конце.
#define MAX_UINT_HEX_LENGTH 20
const auto ipv4_bytes_length = 4;
const auto ipv6_bytes_length = 16;
class FunctionIPv6NumToString : public IFunction
{
public:
String getName() const { return "IPv6NumToString"; }
DataTypePtr getReturnType(const DataTypes & arguments) const
{
if (arguments.size() != 1)
throw Exception("Number of arguments for function " + getName() + " doesn't match: passed "
+ toString(arguments.size()) + ", should be 1.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
const auto ptr = typeid_cast<const DataTypeFixedString *>(arguments[0].get());
if (!ptr || ptr->getN() != ipv6_bytes_length)
throw Exception("Illegal type " + arguments[0]->getName() +
" of argument of function " + getName() +
", expected FixedString(" + toString(ipv6_bytes_length) + ")",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return new DataTypeString;
}
/// integer logarithm, return ceil(log(value, base)) (the smallest integer greater or equal than log(value, base)
static constexpr uint32_t int_log(const uint32_t value, const uint32_t base, const bool carry = false)
{
return value >= base ? 1 + int_log(value / base, base, value % base || carry) : value % base > 1 || carry;
}
/// mapping of digits up to base 16
static constexpr auto && digits = "0123456789abcdef";
/// print integer in desired base, faster than sprintf
template <uint32_t base, typename T, uint32_t buffer_size = sizeof(T) * int_log(256, base)>
static void print_integer(char *& out, T value)
{
if (value == 0)
*out++ = '0';
else
{
char buf[buffer_size];
auto ptr = buf;
while (value > 0)
{
*ptr++ = digits[value % base];
value /= base;
}
while (ptr != buf)
*out++ = *--ptr;
}
}
/// print IPv4 address as %u.%u.%u.%u
static void ipv4_format(const unsigned char * src, char *& dst)
{
constexpr auto size = sizeof(UInt32);
for (const auto i : ext::range(0, size))
{
print_integer<10, UInt8>(dst, src[i]);
if (i != size - 1)
*dst++ = '.';
}
}
/** rewritten inet_ntop6 from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c
* performs significantly faster than the reference implementation due to the absence of sprintf calls,
* bounds checking, unnecessary string copying and length calculation */
static const void ipv6_format(const unsigned char * src, char *& dst)
{
struct { int base, len; } best{-1}, cur{-1};
std::array<uint16_t, ipv6_bytes_length / sizeof(uint16_t)> words{};
/** Preprocess:
* Copy the input (bytewise) array into a wordwise array.
* Find the longest run of 0x00's in src[] for :: shorthanding. */
for (const auto i : ext::range(0, ipv6_bytes_length))
words[i / 2] |= src[i] << ((1 - (i % 2)) << 3);
for (const auto i : ext::range(0, words.size()))
{
if (words[i] == 0) {
if (cur.base == -1)
cur.base = i, cur.len = 1;
else
cur.len++;
}
else
{
if (cur.base != -1)
{
if (best.base == -1 || cur.len > best.len)
best = cur;
cur.base = -1;
}
}
}
if (cur.base != -1)
{
if (best.base == -1 || cur.len > best.len)
best = cur;
}
if (best.base != -1 && best.len < 2)
best.base = -1;
/// Format the result.
for (const int i : ext::range(0, words.size()))
{
/// Are we inside the best run of 0x00's?
if (best.base != -1 && i >= best.base && i < (best.base + best.len))
{
if (i == best.base)
*dst++ = ':';
continue;
}
/// Are we following an initial run of 0x00s or any real hex?
if (i != 0)
*dst++ = ':';
/// Is this address an encapsulated IPv4?
if (i == 6 && best.base == 0 && (best.len == 6 || (best.len == 5 && words[5] == 0xffffu)))
{
ipv4_format(src + 12, dst);
break;
}
print_integer<16>(dst, words[i]);
}
/// Was it a trailing run of 0x00's?
if (best.base != -1 && (best.base + best.len) == words.size())
*dst++ = ':';
*dst++ = '\0';
}
void execute(Block & block, const ColumnNumbers & arguments, const size_t result)
{
const auto & col_name_type = block.getByPosition(arguments[0]);
const ColumnPtr & column = col_name_type.column;
if (const auto col_in = typeid_cast<const ColumnFixedString *>(column.get()))
{
if (col_in->getN() != ipv6_bytes_length)
throw Exception("Illegal type " + col_name_type.type->getName() +
" of column " + col_in->getName() +
" argument of function " + getName() +
", expected FixedString(" + toString(ipv6_bytes_length) + ")",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
const auto size = col_in->size();
const auto & vec_in = col_in->getChars();
auto col_res = new ColumnString;
block.getByPosition(result).column = col_res;
ColumnString::Chars_t & vec_res = col_res->getChars();
ColumnString::Offsets_t & offsets_res = col_res->getOffsets();
vec_res.resize(size * INET6_ADDRSTRLEN);
offsets_res.resize(size);
auto begin = reinterpret_cast<char *>(&vec_res[0]);
auto pos = begin;
for (size_t offset = 0, i = 0; offset < vec_in.size(); offset += ipv6_bytes_length, ++i)
{
ipv6_format(&vec_in[offset], pos);
offsets_res[i] = pos - begin;
}
vec_res.resize(pos - begin);
}
else if (const auto col_in = typeid_cast<const ColumnConst<String> *>(column.get()))
{
const auto data_type_fixed_string = typeid_cast<const DataTypeFixedString *>(col_in->getDataType().get());
if (!data_type_fixed_string || data_type_fixed_string->getN() != ipv6_bytes_length)
throw Exception("Illegal type " + col_name_type.type->getName() +
" of column " + col_in->getName() +
" argument of function " + getName() +
", expected FixedString(" + toString(ipv6_bytes_length) + ")",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
const auto & data_in = col_in->getData();
char buf[INET6_ADDRSTRLEN];
char * dst = buf;
ipv6_format(reinterpret_cast<const unsigned char *>(data_in.data()), dst);
block.getByPosition(result).column = new ColumnConstString{col_in->size(), buf};
}
else
throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
+ " of argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
}
};
class FunctionIPv6StringToNum : public IFunction
{
public:
String getName() const { return "IPv6StringToNum"; }
DataTypePtr getReturnType(const DataTypes & arguments) const
{
if (arguments.size() != 1)
throw Exception("Number of arguments for function " + getName() + " doesn't match: passed "
+ toString(arguments.size()) + ", should be 1.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
if (!typeid_cast<const DataTypeString *>(&*arguments[0]))
throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return new DataTypeFixedString{ipv6_bytes_length};
}
static bool isDigit(char c) { return c >= '0' && c <= '9'; }
static bool ipv4_scan(const char * src, unsigned char * dst)
{
constexpr auto size = sizeof(UInt32);
char bytes[size]{};
for (const auto i : ext::range(0, size))
{
UInt32 value = 0;
size_t len = 0;
while (isDigit(*src) && len <= 3)
{
value = value * 10 + (*src - '0');
++len;
++src;
}
if (len == 0 || value > 255 || (i < size - 1 && *src != '.'))
{
memset(dst, 0, size);
return false;
}
bytes[i] = value;
++src;
}
if (src[-1] != '\0')
{
memset(dst, 0, size);
return false;
}
memcpy(dst, bytes, sizeof(bytes));
return true;
}
/// slightly altered implementation from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c
static void ipv6_scan(const char * src, unsigned char * dst)
{
const auto clear_dst = [dst] {
memset(dst, '\0', ipv6_bytes_length);
};
/// Leading :: requires some special handling.
if (*src == ':')
if (*++src != ':')
return clear_dst();
/// get integer value for a hexademical char digit, or -1
const auto number_by_char = [] (const char ch) {
if ('A' <= ch && ch <= 'F')
return 10 + ch - 'A';
if ('a' <= ch && ch <= 'f')
return 10 + ch - 'a';
if ('0' <= ch && ch <= '9')
return ch - '0';
return -1;
};
unsigned char tmp[ipv6_bytes_length]{};
auto tp = tmp;
auto endp = tp + ipv6_bytes_length;
auto curtok = src;
auto saw_xdigit = false;
uint16_t val{};
unsigned char * colonp = nullptr;
while (const auto ch = *src++)
{
const auto num = number_by_char(ch);
if (num != -1)
{
val <<= 4;
val |= num;
if (val > 0xffffu)
return clear_dst();
saw_xdigit = 1;
continue;
}
if (ch == ':')
{
curtok = src;
if (!saw_xdigit)
{
if (colonp)
return clear_dst();
colonp = tp;
continue;
}
if (tp + sizeof(uint16_t) > endp)
return clear_dst();
*tp++ = static_cast<unsigned char>((val >> 8) & 0xffu);
*tp++ = static_cast<unsigned char>(val & 0xffu);
saw_xdigit = false;
val = 0;
continue;
}
if (ch == '.' && (tp + ipv4_bytes_length) <= endp)
{
if (!ipv4_scan(curtok, tp))
return clear_dst();
tp += ipv4_bytes_length;
saw_xdigit = false;
break; /* '\0' was seen by ipv4_scan(). */
}
return clear_dst();
}
if (saw_xdigit)
{
if (tp + sizeof(uint16_t) > endp)
return clear_dst();
*tp++ = static_cast<unsigned char>((val >> 8) & 0xffu);
*tp++ = static_cast<unsigned char>(val & 0xffu);
}
if (colonp)
{
/*
* Since some memmove()'s erroneously fail to handle
* overlapping regions, we'll do the shift by hand.
*/
const auto n = tp - colonp;
for (int i = 1; i <= n; i++)
{
endp[- i] = colonp[n - i];
colonp[n - i] = 0;
}
tp = endp;
}
if (tp != endp)
return clear_dst();
memcpy(dst, tmp, sizeof(tmp));
}
void execute(Block & block, const ColumnNumbers & arguments, size_t result)
{
const ColumnPtr & column = block.getByPosition(arguments[0]).column;
if (const auto col_in = typeid_cast<const ColumnString *>(&*column))
{
const auto col_res = new ColumnFixedString{ipv6_bytes_length};
block.getByPosition(result).column = col_res;
auto & vec_res = col_res->getChars();
vec_res.resize(col_in->size() * ipv6_bytes_length);
const ColumnString::Chars_t & vec_src = col_in->getChars();
const ColumnString::Offsets_t & offsets_src = col_in->getOffsets();
size_t src_offset = 0;
for (size_t out_offset = 0, i = 0;
out_offset < vec_res.size();
out_offset += ipv6_bytes_length, ++i)
{
ipv6_scan(reinterpret_cast<const char* >(&vec_src[src_offset]), &vec_res[out_offset]);
src_offset = offsets_src[i];
}
}
else if (const auto col_in = typeid_cast<const ColumnConstString *>(&*column))
{
String out(ipv6_bytes_length, 0);
ipv6_scan(col_in->getData().data(), reinterpret_cast<unsigned char *>(&out[0]));
block.getByPosition(result).column = new ColumnConst<String>{
col_in->size(),
out,
new DataTypeFixedString{ipv6_bytes_length}
};
}
else
throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
+ " of argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
}
};
class FunctionIPv4NumToString : public IFunction
{
public:
@ -108,7 +532,7 @@ public:
ColumnString::Chars_t & vec_res = col_res->getChars();
ColumnString::Offsets_t & offsets_res = col_res->getOffsets();
vec_res.resize(vec_in.size() * 16); /// самое длинное значение: 255.255.255.255\0
vec_res.resize(vec_in.size() * INET_ADDRSTRLEN); /// самое длинное значение: 255.255.255.255\0
offsets_res.resize(vec_in.size());
char * begin = reinterpret_cast<char *>(&vec_res[0]);
char * pos = begin;
@ -161,7 +585,7 @@ public:
return new DataTypeUInt32;
}
static inline bool isDigit(char c)
static bool isDigit(char c)
{
return c >= '0' && c <= '9';
}

View File

@ -453,7 +453,11 @@ public:
if (column_const->getData().size() > n)
throw Exception("String too long for type FixedString(" + toString(n) + ")",
ErrorCodes::TOO_LARGE_STRING_SIZE);
block.getByPosition(result).column = new ColumnConst<String>(column_const->size(), column_const->getData(), new DataTypeFixedString(n));
auto resized_string = column_const->getData();
resized_string.resize(n);
block.getByPosition(result).column = new ColumnConst<String>(column_const->size(), std::move(resized_string), new DataTypeFixedString(n));
}
else if(const ColumnString * column_string = typeid_cast<const ColumnString *>(&*column))
{

View File

@ -1,6 +1,7 @@
#pragma once
#include <openssl/md5.h>
#include <openssl/sha.h>
#include <city.h>
#include <Poco/ByteOrder.h>
@ -64,6 +65,62 @@ struct HalfMD5Impl
}
};
struct MD5Impl
{
static constexpr auto name = "MD5";
static constexpr auto length = 16;
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
{
MD5_CTX ctx;
MD5_Init(&ctx);
MD5_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
MD5_Final(out_char_data, &ctx);
}
};
struct SHA1Impl
{
static constexpr auto name = "SHA1";
static constexpr auto length = 20;
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
{
SHA_CTX ctx;
SHA1_Init(&ctx);
SHA1_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
SHA1_Final(out_char_data, &ctx);
}
};
struct SHA224Impl
{
static constexpr auto name = "SHA224";
static constexpr auto length = 28;
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
{
SHA256_CTX ctx;
SHA224_Init(&ctx);
SHA224_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
SHA224_Final(out_char_data, &ctx);
}
};
struct SHA256Impl
{
static constexpr auto name = "SHA256";
static constexpr auto length = 32;
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
{
SHA256_CTX ctx;
SHA256_Init(&ctx);
SHA256_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
SHA256_Final(out_char_data, &ctx);
}
};
struct SipHash64Impl
{
static UInt64 apply(const char * begin, size_t size)
@ -72,6 +129,17 @@ struct SipHash64Impl
}
};
struct SipHash128Impl
{
static constexpr auto name = "SipHash128";
static constexpr auto length = 16;
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
{
sipHash128(begin, size, reinterpret_cast<char*>(out_char_data));
}
};
struct IntHash32Impl
{
typedef UInt32 ReturnType;
@ -152,6 +220,72 @@ public:
};
template <typename Impl>
class FunctionStringHashFixedString : public IFunction
{
public:
/// Получить имя функции.
String getName() const
{
return Impl::name;
}
/// Получить тип результата по типам аргументов. Если функция неприменима для данных аргументов - кинуть исключение.
DataTypePtr getReturnType(const DataTypes & arguments) const
{
if (arguments.size() != 1)
throw Exception("Number of arguments for function " + getName() + " doesn't match: passed "
+ toString(arguments.size()) + ", should be 1.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
if (!typeid_cast<const DataTypeString *>(&*arguments[0]))
throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return new DataTypeFixedString{Impl::length};
}
/// Выполнить функцию над блоком.
void execute(Block & block, const ColumnNumbers & arguments, size_t result)
{
if (const ColumnString * col_from = typeid_cast<const ColumnString *>(&*block.getByPosition(arguments[0]).column))
{
auto col_to = new ColumnFixedString{Impl::length};
block.getByPosition(result).column = col_to;
const typename ColumnString::Chars_t & data = col_from->getChars();
const typename ColumnString::Offsets_t & offsets = col_from->getOffsets();
auto & chars_to = col_to->getChars();
const auto size = offsets.size();
chars_to.resize(size * Impl::length);
for (size_t i = 0; i < size; ++i)
Impl::apply(
reinterpret_cast<const char *>(&data[i == 0 ? 0 : offsets[i - 1]]),
i == 0 ? offsets[i] - 1 : (offsets[i] - 1 - offsets[i - 1]),
&chars_to[i * Impl::length]);
}
else if (const ColumnConstString * col_from = typeid_cast<const ColumnConstString *>(&*block.getByPosition(arguments[0]).column))
{
const auto & data = col_from->getData();
String hash(Impl::length, 0);
Impl::apply(data.data(), data.size(), reinterpret_cast<unsigned char *>(&hash[0]));
block.getByPosition(result).column = new ColumnConst<String>{
col_from->size(),
hash,
new DataTypeFixedString{Impl::length}
};
}
else
throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
+ " of first argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
}
};
template <typename Impl, typename Name>
class FunctionIntHash : public IFunction
{
@ -465,6 +599,10 @@ typedef FunctionStringHash64<HalfMD5Impl, NameHalfMD5> FunctionHalfMD5;
typedef FunctionStringHash64<SipHash64Impl, NameSipHash64> FunctionSipHash64;
typedef FunctionIntHash<IntHash32Impl, NameIntHash32> FunctionIntHash32;
typedef FunctionIntHash<IntHash64Impl, NameIntHash64> FunctionIntHash64;
typedef FunctionStringHashFixedString<MD5Impl> FunctionMD5;
typedef FunctionStringHashFixedString<SHA1Impl> FunctionSHA1;
typedef FunctionStringHashFixedString<SHA224Impl> FunctionSHA224;
typedef FunctionStringHashFixedString<SHA256Impl> FunctionSHA256;
typedef FunctionStringHashFixedString<SipHash128Impl> FunctionSipHash128;
}

View File

@ -288,6 +288,95 @@ struct ArraySumImpl
}
};
struct ArrayFirstImpl
{
static bool needBoolean() { return false; }
static bool needExpression() { return true; }
static bool needOneArray() { return false; }
static DataTypePtr getReturnType(const DataTypePtr & expression_return, const DataTypePtr & array_element)
{
return array_element;
}
static ColumnPtr execute(const ColumnArray * array, ColumnPtr mapped)
{
auto column_filter = typeid_cast<ColumnVector<UInt8> *>(&*mapped);
if (!column_filter)
throw Exception("Unexpected type of filter column", ErrorCodes::ILLEGAL_COLUMN);
const auto & filter = column_filter->getData();
const auto & offsets = array->getOffsets();
const auto & data = array->getData();
ColumnPtr out{data.cloneEmpty()};
size_t pos{};
for (size_t i = 0; i < offsets.size(); ++i)
{
auto exists = false;
for (; pos < offsets[i]; ++pos)
{
if (filter[pos])
{
out->insert(data[pos]);
exists = true;
pos = offsets[i];
break;
}
}
if (!exists)
out->insertDefault();
}
return out;
}
};
struct ArrayFirstIndexImpl
{
static bool needBoolean() { return false; }
static bool needExpression() { return true; }
static bool needOneArray() { return false; }
static DataTypePtr getReturnType(const DataTypePtr & expression_return, const DataTypePtr & array_element)
{
return new DataTypeUInt32;
}
static ColumnPtr execute(const ColumnArray * array, ColumnPtr mapped)
{
auto column_filter = typeid_cast<ColumnVector<UInt8> *>(&*mapped);
if (!column_filter)
throw Exception("Unexpected type of filter column", ErrorCodes::ILLEGAL_COLUMN);
const auto & filter = column_filter->getData();
const auto & offsets = array->getOffsets();
auto out_column = new ColumnVector<UInt32>{offsets.size()};
ColumnPtr out_column_ptr{out_column};
auto & out_index = out_column->getData();
size_t pos{};
for (size_t i = 0; i < offsets.size(); ++i)
{
UInt32 index{};
for (size_t idx{1}; pos < offsets[i]; ++pos, ++idx)
{
if (filter[pos])
{
index = idx;
pos = offsets[i];
break;
}
}
out_index[i] = index;
}
return out_column_ptr;
}
};
template <typename Impl, typename Name>
class FunctionArrayMapped : public IFunction
{
@ -508,6 +597,8 @@ struct NameArrayCount { static const char * get() { return "arrayCount"; } };
struct NameArrayExists { static const char * get() { return "arrayExists"; } };
struct NameArrayAll { static const char * get() { return "arrayAll"; } };
struct NameArraySum { static const char * get() { return "arraySum"; } };
struct NameArrayFirst { static const char * get() { return "arrayFirst"; } };
struct NameArrayFirstIndex { static const char * get() { return "arrayFirstIndex"; } };
typedef FunctionArrayMapped<ArrayMapImpl, NameArrayMap> FunctionArrayMap;
typedef FunctionArrayMapped<ArrayFilterImpl, NameArrayFilter> FunctionArrayFilter;
@ -515,5 +606,7 @@ typedef FunctionArrayMapped<ArrayCountImpl, NameArrayCount> FunctionArrayCount
typedef FunctionArrayMapped<ArrayExistsImpl, NameArrayExists> FunctionArrayExists;
typedef FunctionArrayMapped<ArrayAllImpl, NameArrayAll> FunctionArrayAll;
typedef FunctionArrayMapped<ArraySumImpl, NameArraySum> FunctionArraySum;
typedef FunctionArrayMapped<ArrayFirstImpl, NameArrayFirst> FunctionArrayFirst;
typedef FunctionArrayMapped<ArrayFirstIndexImpl, NameArrayFirstIndex> FunctionArrayFirstIndex;
}

View File

@ -20,6 +20,7 @@
#include <DB/Columns/ColumnTuple.h>
#include <DB/Columns/ColumnArray.h>
#include <DB/Columns/ColumnReplicated.h>
#include <DB/Common/UnicodeBar.h>
#include <DB/Functions/IFunction.h>
@ -672,47 +673,6 @@ private:
return apply_visitor(FieldVisitorConvertToNumber<T>(), column[0]);
}
static constexpr size_t BAR_CHAR_SIZE = strlen("");
template <typename T>
static Float64 barWidth(T x, Int64 min, Int64 max, Float64 max_width)
{
if (x <= min)
return 0;
if (x >= max)
return max_width;
return (x - min) * max_width / (max - min);
}
static size_t barWidthInBytes(Float64 width)
{
return ceil(width - 1.0 / 8) * BAR_CHAR_SIZE;
}
/// В dst должно быть место для barWidthInBytes(width) символов и завершающего нуля.
static void renderBar(Float64 width, char * dst)
{
size_t floor_width = floor(width);
for (size_t i = 0; i < floor_width; ++i)
{
memcpy(dst, "", BAR_CHAR_SIZE);
dst += BAR_CHAR_SIZE;
}
size_t remainder = floor((width - floor_width) * 8);
if (remainder)
{
memcpy(dst, &"▏▎▍▌▋▋▊▉"[(remainder - 1) * BAR_CHAR_SIZE], BAR_CHAR_SIZE);
dst += BAR_CHAR_SIZE;
}
*dst = 0;
}
template <typename T>
static void fill(const PODArray<T> & src, ColumnString::Chars_t & dst_chars, ColumnString::Offsets_t & dst_offsets,
Int64 min, Int64 max, Float64 max_width)
@ -721,14 +681,14 @@ private:
size_t current_offset = 0;
dst_offsets.resize(size);
dst_chars.reserve(size * (barWidthInBytes(max_width) + 1)); /// строки 0-terminated.
dst_chars.reserve(size * (UnicodeBar::getWidthInBytes(max_width) + 1)); /// строки 0-terminated.
for (size_t i = 0; i < size; ++i)
{
Float64 width = barWidth(src[i], min, max, max_width);
size_t next_size = current_offset + barWidthInBytes(width) + 1;
Float64 width = UnicodeBar::getWidth(src[i], min, max, max_width);
size_t next_size = current_offset + UnicodeBar::getWidthInBytes(width) + 1;
dst_chars.resize(next_size);
renderBar(width, reinterpret_cast<char *>(&dst_chars[current_offset]));
UnicodeBar::render(width, reinterpret_cast<char *>(&dst_chars[current_offset]));
current_offset = next_size;
dst_offsets[i] = current_offset;
}
@ -738,9 +698,9 @@ private:
static void fill(T src, String & dst_chars,
Int64 min, Int64 max, Float64 max_width)
{
Float64 width = barWidth(src, min, max, max_width);
dst_chars.resize(barWidthInBytes(width));
renderBar(width, &dst_chars[0]);
Float64 width = UnicodeBar::getWidth(src, min, max, max_width);
dst_chars.resize(UnicodeBar::getWidthInBytes(width));
UnicodeBar::render(width, &dst_chars[0]);
}
template <typename T>

View File

@ -113,6 +113,94 @@ struct ExtractDomain
}
};
struct ExtractFirstSignificantSubdomain
{
static size_t getReserveLengthForElement() { return 10; }
static void execute(const Pos data, const size_t size, Pos & res_data, size_t & res_size, Pos * out_domain_end = nullptr)
{
res_data = data;
res_size = 0;
Pos tmp;
size_t domain_length;
ExtractDomain<true>::execute(data, size, tmp, domain_length);
if (domain_length == 0)
return;
if (out_domain_end)
*out_domain_end = tmp + domain_length;
/// cut useless dot
if (tmp[domain_length - 1] == '.')
--domain_length;
res_data = tmp;
res_size = domain_length;
auto begin = tmp;
auto end = begin + domain_length;
const char * last_3_periods[3]{};
auto pos = static_cast<const char *>(memchr(begin, '.', domain_length));
while (pos)
{
last_3_periods[2] = last_3_periods[1];
last_3_periods[1] = last_3_periods[0];
last_3_periods[0] = pos;
pos = static_cast<const char *>(memchr(pos + 1, '.', end - pos - 1));
}
if (!last_3_periods[0])
return;
if (!last_3_periods[1])
{
res_size = last_3_periods[0] - begin;
return;
}
if (!last_3_periods[2])
last_3_periods[2] = begin - 1;
if (!strncmp(last_3_periods[1] + 1, "com", 3) ||
!strncmp(last_3_periods[1] + 1, "net", 3) ||
!strncmp(last_3_periods[1] + 1, "org", 3) ||
!strncmp(last_3_periods[1] + 1, "co", 2))
{
res_data += last_3_periods[2] + 1 - begin;
res_size = last_3_periods[1] - last_3_periods[2] - 1;
return;
}
res_data += last_3_periods[1] + 1 - begin;
res_size = last_3_periods[0] - last_3_periods[1] - 1;
}
};
struct CutToFirstSignificantSubdomain
{
static size_t getReserveLengthForElement() { return 15; }
static void execute(const Pos data, const size_t size, Pos & res_data, size_t & res_size)
{
res_data = data;
res_size = 0;
Pos tmp_data;
size_t tmp_length;
Pos domain_end;
ExtractFirstSignificantSubdomain::execute(data, size, tmp_data, tmp_length, &domain_end);
if (tmp_length == 0)
return;
res_data = tmp_data;
res_size = domain_end - tmp_data;
}
};
struct ExtractTopLevelDomain
{
static size_t getReserveLengthForElement() { return 5; }
@ -839,12 +927,15 @@ struct CutSubstringImpl
struct NameProtocol { static const char * get() { return "protocol"; } };
struct NameDomain { static const char * get() { return "domain"; } };
struct NameDomainWithoutWWW { static const char * get() { return "domainWithoutWWW"; } };
struct NameFirstSignificantSubdomain { static const char * get() { return "firstSignificantSubdomain"; } };
struct NameTopLevelDomain { static const char * get() { return "topLevelDomain"; } };
struct NamePath { static const char * get() { return "path"; } };
struct NameQueryString { static const char * get() { return "queryString"; } };
struct NameFragment { static const char * get() { return "fragment"; } };
struct NameQueryStringAndFragment { static const char * get() { return "queryStringAndFragment"; } };
struct NameCutToFirstSignificantSubdomain { static const char * get() { return "cutToFirstSignificantSubdomain"; } };
struct NameCutWWW { static const char * get() { return "cutWWW"; } };
struct NameCutQueryString { static const char * get() { return "cutQueryString"; } };
struct NameCutFragment { static const char * get() { return "cutFragment"; } };
@ -856,12 +947,15 @@ struct NameCutURLParameter { static const char * get() { return "cutURLParam
typedef FunctionStringToString<ExtractSubstringImpl<ExtractProtocol>, NameProtocol> FunctionProtocol;
typedef FunctionStringToString<ExtractSubstringImpl<ExtractDomain<false> >, NameDomain> FunctionDomain;
typedef FunctionStringToString<ExtractSubstringImpl<ExtractDomain<true> >, NameDomainWithoutWWW> FunctionDomainWithoutWWW;
typedef FunctionStringToString<ExtractSubstringImpl<ExtractFirstSignificantSubdomain>, NameFirstSignificantSubdomain> FunctionFirstSignificantSubdomain;
typedef FunctionStringToString<ExtractSubstringImpl<ExtractTopLevelDomain>, NameTopLevelDomain> FunctionTopLevelDomain;
typedef FunctionStringToString<ExtractSubstringImpl<ExtractPath>, NamePath> FunctionPath;
typedef FunctionStringToString<ExtractSubstringImpl<ExtractQueryString<true> >, NameQueryString> FunctionQueryString;
typedef FunctionStringToString<ExtractSubstringImpl<ExtractFragment<true> >, NameFragment> FunctionFragment;
typedef FunctionStringToString<ExtractSubstringImpl<ExtractQueryStringAndFragment<true> >, NameQueryStringAndFragment> FunctionQueryStringAndFragment;
typedef FunctionStringToString<ExtractSubstringImpl<CutToFirstSignificantSubdomain>, NameCutToFirstSignificantSubdomain> FunctionCutToFirstSignificantSubdomain;
typedef FunctionStringToString<CutSubstringImpl<ExtractWWW>, NameCutWWW> FunctionCutWWW;
typedef FunctionStringToString<CutSubstringImpl<ExtractQueryString<false> >, NameCutQueryString> FunctionCutQueryString;
typedef FunctionStringToString<CutSubstringImpl<ExtractFragment<false> >, NameCutFragment> FunctionCutFragment;

View File

@ -22,6 +22,7 @@
#include <DB/Columns/ColumnString.h>
#include <DB/Columns/ColumnFixedString.h>
#include <DB/Columns/ColumnAggregateFunction.h>
#include <DB/Columns/ColumnVector.h>
@ -53,25 +54,78 @@ typedef HashMap<UInt128, AggregateDataPtr, UInt128Hash> AggregatedDataWithKeys12
typedef HashMap<UInt128, std::pair<StringRef*, AggregateDataPtr>, UInt128TrivialHash> AggregatedDataHashed;
/// Для случая, когда есть один числовой ключ.
struct AggregationMethodKey64
/// Специализации для UInt8, UInt16.
struct TrivialHash
{
typedef AggregatedDataWithUInt64Key Data;
typedef Data::key_type Key;
typedef Data::mapped_type Mapped;
typedef Data::iterator iterator;
typedef Data::const_iterator const_iterator;
template <typename T>
size_t operator() (T key) const
{
return key;
}
};
/// Превращает хэш-таблицу в что-то типа lookup-таблицы. Остаётся неоптимальность - в ячейках хранятся ключи.
template <size_t key_bits>
struct HashTableFixedGrower
{
size_t bufSize() const { return 1 << key_bits; }
size_t place(size_t x) const { return x; }
/// Тут можно было бы написать __builtin_unreachable(), но компилятор не до конца всё оптимизирует, и получается менее эффективно.
size_t next(size_t pos) const { return pos + 1; }
bool overflow(size_t elems) const { return false; }
void increaseSize() { __builtin_unreachable(); }
void set(size_t num_elems) {}
void setBufSize(size_t buf_size_) {}
};
typedef HashMap<UInt64, AggregateDataPtr, TrivialHash, HashTableFixedGrower<8>> AggregatedDataWithUInt8Key;
typedef HashMap<UInt64, AggregateDataPtr, TrivialHash, HashTableFixedGrower<16>> AggregatedDataWithUInt16Key;
template <typename FieldType>
struct AggregatedDataWithUIntKey
{
using Type = AggregatedDataWithUInt64Key;
static constexpr bool never_overflows = false;
};
template <>
struct AggregatedDataWithUIntKey<UInt8>
{
using Type = AggregatedDataWithUInt8Key;
static constexpr bool never_overflows = true; /// Говорит о том, что в результате агрегации не может быть много записей.
};
template <>
struct AggregatedDataWithUIntKey<UInt16>
{
using Type = AggregatedDataWithUInt16Key;
static constexpr bool never_overflows = true;
};
/// Для случая, когда есть один числовой ключ.
template <typename FieldType> /// UInt8/16/32/64 для любых типов соответствующей битности.
struct AggregationMethodOneNumber
{
typedef typename AggregatedDataWithUIntKey<FieldType>::Type Data;
typedef typename Data::key_type Key;
typedef typename Data::mapped_type Mapped;
typedef typename Data::iterator iterator;
typedef typename Data::const_iterator const_iterator;
static constexpr bool never_overflows = AggregatedDataWithUIntKey<FieldType>::never_overflows;
Data data;
const IColumn * column;
const FieldType * column;
/** Вызывается в начале обработки каждого блока.
* Устанавливает переменные, необходимые для остальных методов, вызываемых во внутренних циклах.
*/
void init(ConstColumnPlainPtrs & key_columns)
{
column = key_columns[0];
column = &static_cast<const ColumnVector<FieldType> *>(key_columns[0])->getData()[0];
}
/// Достать из ключевых столбцов ключ для вставки в хэш-таблицу.
@ -82,7 +136,7 @@ struct AggregationMethodKey64
const Sizes & key_sizes, /// Если ключи фиксированной длины - их длины. Не используется в методах агрегации по ключам переменной длины.
StringRefs & keys) const /// Сюда могут быть записаны ссылки на данные ключей в столбцах. Они могут быть использованы в дальнейшем.
{
return column->get64(i);
return get64(column[i]);
}
/// Из значения в хэш-таблице получить AggregateDataPtr.
@ -99,10 +153,43 @@ struct AggregationMethodKey64
*/
static void insertKeyIntoColumns(const_iterator & it, ColumnPlainPtrs & key_columns, size_t keys_size, const Sizes & key_sizes)
{
key_columns[0]->insertData(reinterpret_cast<const char *>(&it->first), sizeof(it->first));
static_cast<ColumnVector<FieldType> *>(key_columns[0])->insertData(reinterpret_cast<const char *>(&it->first), sizeof(it->first));
}
private:
UInt64 get64(FieldType x) const
{
return x;
}
};
template <>
inline UInt64 AggregationMethodOneNumber<Float64>::get64(Float64 x) const
{
union
{
Float64 src;
UInt64 res;
};
src = x;
return res;
}
template <>
inline UInt64 AggregationMethodOneNumber<Float32>::get64(Float32 x) const
{
union
{
Float32 src;
UInt64 res;
};
res = 0;
src = x;
return res;
}
/// Для случая, когда есть один строковый ключ.
struct AggregationMethodString
@ -113,6 +200,8 @@ struct AggregationMethodString
typedef Data::iterator iterator;
typedef Data::const_iterator const_iterator;
static constexpr bool never_overflows = false;
Data data;
const ColumnString::Offsets_t * offsets;
@ -160,6 +249,8 @@ struct AggregationMethodFixedString
typedef Data::iterator iterator;
typedef Data::const_iterator const_iterator;
static constexpr bool never_overflows = false;
Data data;
size_t n;
@ -207,6 +298,8 @@ struct AggregationMethodKeys128
typedef Data::iterator iterator;
typedef Data::const_iterator const_iterator;
static constexpr bool never_overflows = false;
Data data;
void init(ConstColumnPlainPtrs & key_columns)
@ -252,6 +345,8 @@ struct AggregationMethodHashed
typedef Data::iterator iterator;
typedef Data::const_iterator const_iterator;
static constexpr bool never_overflows = false;
Data data;
void init(ConstColumnPlainPtrs & key_columns)
@ -317,7 +412,10 @@ struct AggregatedDataVariants : private boost::noncopyable
*/
AggregatedDataWithoutKey without_key = nullptr;
std::unique_ptr<AggregationMethodKey64> key64;
std::unique_ptr<AggregationMethodOneNumber<UInt8>> key8;
std::unique_ptr<AggregationMethodOneNumber<UInt16>> key16;
std::unique_ptr<AggregationMethodOneNumber<UInt32>> key32;
std::unique_ptr<AggregationMethodOneNumber<UInt64>> key64;
std::unique_ptr<AggregationMethodString> key_string;
std::unique_ptr<AggregationMethodFixedString> key_fixed_string;
std::unique_ptr<AggregationMethodKeys128> keys128;
@ -326,12 +424,15 @@ struct AggregatedDataVariants : private boost::noncopyable
enum Type
{
EMPTY = 0,
WITHOUT_KEY = 1,
KEY_64 = 2,
KEY_STRING = 3,
KEY_FIXED_STRING = 4,
KEYS_128 = 5,
HASHED = 6,
WITHOUT_KEY,
KEY_8,
KEY_16,
KEY_32,
KEY_64,
KEY_STRING,
KEY_FIXED_STRING,
KEYS_128,
HASHED,
};
Type type = EMPTY;
@ -348,11 +449,14 @@ struct AggregatedDataVariants : private boost::noncopyable
{
case EMPTY: break;
case WITHOUT_KEY: break;
case KEY_64: key64 .reset(new AggregationMethodKey64); break;
case KEY_STRING: key_string .reset(new AggregationMethodString); break;
case KEY_FIXED_STRING: key_fixed_string.reset(new AggregationMethodFixedString); break;
case KEYS_128: keys128 .reset(new AggregationMethodKeys128); break;
case HASHED: hashed .reset(new AggregationMethodHashed); break;
case KEY_8: key8 .reset(new decltype(key8)::element_type); break;
case KEY_16: key16 .reset(new decltype(key16)::element_type); break;
case KEY_32: key32 .reset(new decltype(key32)::element_type); break;
case KEY_64: key64 .reset(new decltype(key64)::element_type); break;
case KEY_STRING: key_string .reset(new decltype(key_string)::element_type); break;
case KEY_FIXED_STRING: key_fixed_string.reset(new decltype(key_fixed_string)::element_type); break;
case KEYS_128: keys128 .reset(new decltype(keys128)::element_type); break;
case HASHED: hashed .reset(new decltype(hashed)::element_type); break;
default:
throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT);
@ -365,6 +469,9 @@ struct AggregatedDataVariants : private boost::noncopyable
{
case EMPTY: return 0;
case WITHOUT_KEY: return 1;
case KEY_8: return key8->data.size() + (without_key != nullptr);
case KEY_16: return key16->data.size() + (without_key != nullptr);
case KEY_32: return key32->data.size() + (without_key != nullptr);
case KEY_64: return key64->data.size() + (without_key != nullptr);
case KEY_STRING: return key_string->data.size() + (without_key != nullptr);
case KEY_FIXED_STRING: return key_fixed_string->data.size() + (without_key != nullptr);
@ -382,6 +489,9 @@ struct AggregatedDataVariants : private boost::noncopyable
{
case EMPTY: return "EMPTY";
case WITHOUT_KEY: return "WITHOUT_KEY";
case KEY_8: return "KEY_8";
case KEY_16: return "KEY_16";
case KEY_32: return "KEY_32";
case KEY_64: return "KEY_64";
case KEY_STRING: return "KEY_STRING";
case KEY_FIXED_STRING: return "KEY_FIXED_STRING";

View File

@ -8,6 +8,7 @@
#include <Poco/Net/IPAddress.h>
#include <statdaemons/Stopwatch.h>
#include <DB/Core/Defines.h>
#include <DB/Core/Progress.h>
#include <DB/Core/Exception.h>
#include <DB/Core/ErrorCodes.h>
#include <DB/Common/MemoryTracker.h>
@ -35,8 +36,7 @@ public:
Stopwatch watch;
volatile size_t rows_processed = 0;
volatile size_t bytes_processed = 0;
Progress progress;
MemoryTracker memory_tracker;
@ -56,10 +56,9 @@ public:
current_memory_tracker = nullptr;
}
bool update(size_t rows, size_t bytes) volatile
bool update(const Progress & value)
{
__sync_add_and_fetch(&rows_processed, rows);
__sync_add_and_fetch(&bytes_processed, bytes);
progress.incrementPiecewiseAtomically(value);
return !is_cancelled;
}
};

View File

@ -94,6 +94,9 @@ private:
void calculateHashesThread(Block & block, size_t begin, size_t end, ExceptionPtr & exception, MemoryTracker * memory_tracker);
void aggregateThread(Block & block, AggregatedDataVariants & result, size_t thread_no, ExceptionPtr & exception, MemoryTracker * memory_tracker);
void convertToBlockThread(AggregatedDataVariants & data_variant, Block & block, bool final, ExceptionPtr & exception, MemoryTracker * memory_tracker);
template <typename FieldType>
void aggregateOneNumber(AggregatedDataVariants & result, size_t thread_no, bool no_more_keys);
};

View File

@ -41,10 +41,10 @@ public:
/// (от storage ожидают получить только столбцы таблицы).
remove_prewhere_column = !pre_name_set.count(prewhere_column);
Names post_column_names;
for (size_t i = 0; i < column_names.size(); ++i)
for (const auto & name : column_names)
{
if (!pre_name_set.count(column_names[i]))
post_column_names.push_back(column_names[i]);
if (!pre_name_set.count(name))
post_column_names.push_back(name);
}
column_names = post_column_names;
}
@ -68,8 +68,16 @@ public:
columns = owned_data_part->columns.addTypes(column_names);
}
/// Оценим общее количество строк - для прогресс-бара.
for (const auto & range : all_mark_ranges)
total_rows += range.end - range.begin;
total_rows *= storage.index_granularity;
LOG_TRACE(log, "Reading " << all_mark_ranges.size() << " ranges from part " << owned_data_part->name
<< ", up to " << (all_mark_ranges.back().end - all_mark_ranges.front().begin) * storage.index_granularity
<< ", approx. " << total_rows
<< (all_mark_ranges.size() > 1
? ", up to " + toString((all_mark_ranges.back().end - all_mark_ranges.front().begin) * storage.index_granularity)
: "")
<< " rows starting from " << all_mark_ranges.front().begin * storage.index_granularity);
}
@ -97,7 +105,7 @@ public:
protected:
/// Будем вызывать progressImpl самостоятельно.
void progress(size_t rows, size_t bytes) {}
void progress(const Progress & value) override {}
Block readImpl()
{
@ -108,6 +116,10 @@ protected:
if (!reader)
{
/// Отправим информацию о том, что собираемся читать примерно столько-то строк.
/// NOTE В конструкторе это делать не получилось бы, потому что тогда ещё не установлен progress_callback.
progressImpl(Progress(0, 0, total_rows));
UncompressedCache * uncompressed_cache = use_uncompressed_cache ? storage.context.getUncompressedCache() : NULL;
reader.reset(new MergeTreeReader(path, owned_data_part->name, columns, uncompressed_cache, storage, all_mark_ranges));
if (prewhere_actions)
@ -135,7 +147,7 @@ protected:
if (range.begin == range.end)
remaining_mark_ranges.pop_back();
}
progressImpl(res.rows(), res.bytes());
progressImpl(Progress(res.rows(), res.bytes()));
pre_reader->fillMissingColumns(res);
/// Вычислим выражение в PREWHERE.
@ -164,7 +176,7 @@ protected:
reader->readRange(range.begin, range.end, res);
}
progressImpl(0, res.bytes() - pre_bytes);
progressImpl(Progress(0, res.bytes() - pre_bytes));
}
else if (ColumnUInt8 * column_vec = typeid_cast<ColumnUInt8 *>(&*column))
{
@ -216,7 +228,7 @@ protected:
continue;
}
progressImpl(0, res.bytes() - pre_bytes);
progressImpl(Progress(0, res.bytes() - pre_bytes));
post_filter.resize(post_filter_pos);
@ -259,7 +271,7 @@ protected:
remaining_mark_ranges.pop_back();
}
progressImpl(res.rows(), res.bytes());
progressImpl(Progress(res.rows(), res.bytes()));
reader->fillMissingColumns(res);
}
@ -297,6 +309,7 @@ private:
ExpressionActionsPtr prewhere_actions;
String prewhere_column;
bool remove_prewhere_column;
size_t total_rows = 0; /// Приблизительное общее количество строк - для прогресс-бара.
Logger * log;
};

View File

@ -0,0 +1,128 @@
#pragma once
#include <mutex>
#include <thread>
#include <DB/Core/NamesAndTypes.h>
#include <DB/Storages/IStorage.h>
#include <DB/DataStreams/IProfilingBlockInputStream.h>
#include <DB/DataStreams/IBlockOutputStream.h>
#include <DB/Interpreters/Context.h>
namespace DB
{
/** При вставке, буферизует данные в оперативке, пока не превышены некоторые пороги.
* Когда пороги превышены - сбрасывает данные в другую таблицу.
* При чтении, читает как из своих буферов, так и из подчинённой таблицы.
*
* Буфер представляет собой набор из num_shards блоков.
* При записи, выбирается номер блока по остатку от деления ThreadNumber на num_shards (или один из других),
* и в соответствующий блок добавляются строчки.
* При использовании блока, он блокируется некоторым mutex-ом. Если при записи, соответствующий блок уже занят
* - пробуем заблокировать следующий по кругу блок, и так не более num_shards раз (далее блокируемся).
* Пороги проверяются при вставке, а также, периодически, в фоновом потоке (чтобы реализовать пороги по времени).
* Пороги действуют независимо для каждого shard-а. Каждый shard может быть сброшен независимо от других.
* Если в таблицу вставляется блок, который сам по себе превышает max-пороги, то он записывается сразу в подчинённую таблицу без буферизации.
* Пороги могут быть превышены. Например, если max_rows = 1 000 000, в буфере уже было 500 000 строк,
* и добавляется кусок из 800 000 строк, то в буфере окажется 1 300 000 строк, и затем такой блок будет записан в подчинённую таблицу
*
* При уничтожении таблицы типа Buffer и при завершении работы, все данные сбрасываются.
* Данные в буфере не реплицируются, не логгируются на диск, не индексируются. При грубом перезапуске сервера, данные пропадают.
*/
class StorageBuffer : public IStorage
{
friend class BufferBlockInputStream;
friend class BufferBlockOutputStream;
public:
/// Пороги.
struct Thresholds
{
time_t time; /// Количество секунд от момента вставки первой строчки в блок.
size_t rows; /// Количество строк в блоке.
size_t bytes; /// Количество (несжатых) байт в блоке.
};
/** num_shards - уровень внутреннего параллелизма (количество независимых буферов)
* Буфер сбрасывается, если превышены все минимальные пороги или хотя бы один из максимальных.
*/
static StoragePtr create(const std::string & name_, NamesAndTypesListPtr columns_, Context & context_,
size_t num_shards_, const Thresholds & min_thresholds_, const Thresholds & max_thresholds_,
const String & destination_database_, const String & destination_table_);
std::string getName() const override { return "Buffer"; }
std::string getTableName() const override { return name; }
const NamesAndTypesList & getColumnsList() const override { return *columns; }
BlockInputStreams read(
const Names & column_names,
ASTPtr query,
const Settings & settings,
QueryProcessingStage::Enum & processed_stage,
size_t max_block_size = DEFAULT_BLOCK_SIZE,
unsigned threads = 1) override;
BlockOutputStreamPtr write(ASTPtr query) override;
/// Сбрасывает все буферы в подчинённую таблицу.
void shutdown() override;
bool optimize() override;
void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name) override { name = new_table_name; }
bool supportsSampling() const override { return true; }
bool supportsFinal() const override { return true; }
bool supportsPrewhere() const override { return true; }
/// Структура подчинённой таблицы не проверяется и не изменяется.
void alter(const AlterCommands & params, const String & database_name, const String & table_name, Context & context) override;
private:
String name;
NamesAndTypesListPtr columns;
Context & context;
struct Buffer
{
time_t first_write_time = 0;
Block data;
std::mutex mutex;
};
/// Имеется num_shards независимых буферов.
const size_t num_shards;
std::vector<Buffer> buffers;
const Thresholds min_thresholds;
const Thresholds max_thresholds;
const String destination_database;
const String destination_table;
bool no_destination; /// Если задано - не записывать данные из буфера, а просто опустошать буфер.
Logger * log;
/// Выполняет сброс данных по таймауту.
std::thread flush_thread;
StorageBuffer(const std::string & name_, NamesAndTypesListPtr columns_, Context & context_,
size_t num_shards_, const Thresholds & min_thresholds_, const Thresholds & max_thresholds_,
const String & destination_database_, const String & destination_table_);
/// Сбросить буфер. Если выставлено check_thresholds - сбрасывает только если превышены пороги.
void flushBuffer(Buffer & buffer, bool check_thresholds);
bool checkThresholds(Buffer & buffer, time_t current_time, size_t additional_rows = 0, size_t additional_bytes = 0);
/// Аргумент table передаётся, так как иногда вычисляется заранее. Он должен соответствовать destination-у.
void writeBlockToDestination(const Block & block, StoragePtr table);
Poco::Event shutdown_event;
void flushThread();
};
}

View File

@ -24,8 +24,8 @@ public:
std::stringstream res;
res << "Memory(" << &*begin << ", " << &*end;
for (size_t i = 0; i < column_names.size(); ++i)
res << ", " << column_names[i];
for (const auto & name : column_names)
res << ", " << name;
res << ")";
return res.str();

View File

@ -7,7 +7,8 @@
namespace DB
{
class StorageView : public IStorage {
class StorageView : public IStorage
{
public:
static StoragePtr create(const String & table_name_, const String & database_name_,
@ -16,7 +17,7 @@ public:
std::string getName() const override { return "View"; }
std::string getTableName() const override { return table_name; }
const NamesAndTypesList & getColumnsList() const override { return *columns; }
DB::ASTPtr getInnerQuery() const { return inner_query.clone(); };
ASTPtr getInnerQuery() const { return inner_query.clone(); };
/// Пробрасывается внутрь запроса и решается на его уровне.
bool supportsSampling() const override { return true; }

View File

@ -239,9 +239,8 @@ private:
Stopwatch watch;
RemoteBlockInputStream stream(connection, query, nullptr);
size_t read_rows = 0;
size_t read_bytes = 0;
stream.setProgressCallback([&](size_t rows_inc, size_t bytes_inc) { read_rows += rows_inc; read_bytes += bytes_inc; });
Progress progress;
stream.setProgressCallback([&progress](const Progress & value) { progress.incrementPiecewiseAtomically(value); });
stream.readPrefix();
while (Block block = stream.read())
@ -253,8 +252,8 @@ private:
double seconds = watch.elapsedSeconds();
Poco::ScopedLock<Poco::FastMutex> lock(mutex);
info_per_interval.add(seconds, read_rows, read_bytes, info.rows, info.bytes);
info_total.add(seconds, read_rows, read_bytes, info.rows, info.bytes);
info_per_interval.add(seconds, progress.rows, progress.bytes, info.rows, info.bytes);
info_total.add(seconds, progress.rows, progress.bytes, info.rows, info.bytes);
}

View File

@ -48,6 +48,8 @@
#include "InterruptListener.h"
#include <DB/Common/ExternalTable.h>
#include <DB/Common/UnicodeBar.h>
#include <DB/Common/formatReadable.h>
/// http://en.wikipedia.org/wiki/ANSI_escape_code
@ -86,6 +88,8 @@ private:
bool is_interactive = true; /// Использовать readline интерфейс или batch режим.
bool stdin_is_not_tty = false; /// stdin - не терминал.
winsize terminal_size {}; /// Размер терминала - для вывода прогресс-бара.
SharedPtr<Connection> connection; /// Соединение с БД.
String query; /// Текущий запрос.
@ -121,8 +125,10 @@ private:
Stopwatch watch;
size_t rows_read_on_server = 0;
size_t bytes_read_on_server = 0;
/// С сервера периодически приходит информация, о том, сколько прочитано данных за прошедшее время.
Progress progress;
bool show_progress_bar = false;
size_t written_progress_chars = 0;
bool written_first_block = false;
@ -363,6 +369,9 @@ private:
try
{
/// Выясняем размер терминала.
ioctl(0, TIOCGWINSZ, &terminal_size);
if (!process(query))
break;
}
@ -470,8 +479,8 @@ private:
return true;
processed_rows = 0;
rows_read_on_server = 0;
bytes_read_on_server = 0;
progress.reset();
show_progress_bar = false;
written_progress_chars = 0;
written_first_block = false;
@ -511,7 +520,7 @@ private:
std::cout << std::endl
<< processed_rows << " rows in set. Elapsed: " << watch.elapsedSeconds() << " sec. ";
if (rows_read_on_server >= 1000)
if (progress.rows >= 1000)
writeFinalProgress();
std::cout << std::endl << std::endl;
@ -809,11 +818,9 @@ private:
}
void onProgress(const Progress & progress)
void onProgress(const Progress & value)
{
rows_read_on_server += progress.rows;
bytes_read_on_server += progress.bytes;
progress.increment(value);
writeProgress();
}
@ -851,31 +858,62 @@ private:
std::stringstream message;
message << indicators[increment % 8]
<< std::fixed << std::setprecision(3)
<< " Progress: " << rows_read_on_server << " rows, " << bytes_read_on_server / 1000000.0 << " MB";
<< " Progress: ";
message
<< formatReadableQuantity(progress.rows) << " rows, "
<< formatReadableSizeWithDecimalSuffix(progress.bytes);
size_t elapsed_ns = watch.elapsed();
if (elapsed_ns)
message << " ("
<< rows_read_on_server * 1000000000.0 / elapsed_ns << " rows/s., "
<< bytes_read_on_server * 1000.0 / elapsed_ns << " MB/s.) ";
<< formatReadableQuantity(progress.rows * 1000000000.0 / elapsed_ns) << " rows/s., "
<< formatReadableSizeWithDecimalSuffix(progress.bytes * 1000000000.0 / elapsed_ns) << "/s.) ";
else
message << ". ";
written_progress_chars = message.str().size() - 13;
std::cerr << DISABLE_LINE_WRAPPING << message.rdbuf() << ENABLE_LINE_WRAPPING;
written_progress_chars = message.str().size() - (increment % 8 == 7 ? 10 : 13);
std::cerr << DISABLE_LINE_WRAPPING << message.rdbuf();
/** Если известно приблизительное общее число строк, которых нужно обработать - можно вывести прогрессбар.
* Чтобы не было "мерцания", выводим его только если с момента начала выполнения запроса прошло хотя бы пол секунды,
* и если к этому моменту запрос обработан менее чем наполовину.
*/
ssize_t width_of_progress_bar = static_cast<ssize_t>(terminal_size.ws_col) - written_progress_chars - strlen(" 99%");
if (show_progress_bar
|| (width_of_progress_bar > 0
&& progress.total_rows
&& elapsed_ns > 500000000
&& progress.rows * 2 < progress.total_rows))
{
show_progress_bar = true;
size_t total_rows_corrected = std::max(progress.rows, progress.total_rows);
std::string bar = UnicodeBar::render(UnicodeBar::getWidth(progress.rows, 0, total_rows_corrected, width_of_progress_bar));
std::cerr << "\033[0;32m" << bar << "\033[0m";
if (width_of_progress_bar > static_cast<ssize_t>(bar.size() / UNICODE_BAR_CHAR_SIZE))
std::cerr << std::string(width_of_progress_bar - bar.size() / UNICODE_BAR_CHAR_SIZE, ' ');
std::cerr << ' ' << (99 * progress.rows / total_rows_corrected) << '%'; /// Чуть-чуть занижаем процент, чтобы не показывать 100%.
}
std::cerr << ENABLE_LINE_WRAPPING;
++increment;
}
void writeFinalProgress()
{
std::cout << "Processed " << rows_read_on_server << " rows, " << bytes_read_on_server / 1000000.0 << " MB";
std::cout << "Processed "
<< formatReadableQuantity(progress.rows) << " rows, "
<< formatReadableSizeWithDecimalSuffix(progress.bytes);
size_t elapsed_ns = watch.elapsed();
if (elapsed_ns)
std::cout << " ("
<< rows_read_on_server * 1000000000.0 / elapsed_ns << " rows/s., "
<< bytes_read_on_server * 1000.0 / elapsed_ns << " MB/s.) ";
<< formatReadableQuantity(progress.rows * 1000000000.0 / elapsed_ns) << " rows/s., "
<< formatReadableSizeWithDecimalSuffix(progress.bytes * 1000000000.0 / elapsed_ns) << "/s.) ";
else
std::cout << ". ";
}

View File

@ -470,7 +470,7 @@ Progress Connection::receiveProgress()
//LOG_TRACE(log_wrapper.get(), "Receiving progress (" << getServerAddress() << ")");
Progress progress;
progress.read(*in);
progress.read(*in, server_revision);
return progress;
}

View File

@ -1,30 +1,16 @@
#include <Yandex/likely.h>
#include <Yandex/logger_useful.h>
#include <DB/Core/Exception.h>
#include <DB/Common/formatReadable.h>
#include <DB/IO/WriteHelpers.h>
#include <iomanip>
#include <DB/Common/MemoryTracker.h>
static std::string formatReadableSize(double size)
{
const char* units[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"};
size_t i = 0;
while (i + 1 < sizeof(units) / sizeof(units[0]) &&
fabs(size) >= 1024)
{
size /= 1024;
++i;
}
std::stringstream ss;
ss << std::fixed << std::setprecision(i) << size << ' ' << units[i];
return ss.str();
}
MemoryTracker::~MemoryTracker()
{
LOG_DEBUG(&Logger::get("MemoryTracker"), "Peak memory usage for query: " << formatReadableSize(peak) << ".");
LOG_DEBUG(&Logger::get("MemoryTracker"), "Peak memory usage for query: " << formatReadableSizeWithBinarySuffix(peak) << ".");
}
void MemoryTracker::alloc(Int64 size)
@ -34,9 +20,9 @@ void MemoryTracker::alloc(Int64 size)
if (unlikely(limit && will_be > limit))
{
free(size);
throw DB::Exception("Memory limit exceeded: would use " + formatReadableSize(will_be) + ""
throw DB::Exception("Memory limit exceeded: would use " + formatReadableSizeWithBinarySuffix(will_be) + ""
" (attempt to allocate chunk of " + DB::toString(size) + " bytes)"
", maximum: " + formatReadableSize(limit), DB::ErrorCodes::MEMORY_LIMIT_EXCEEDED);
", maximum: " + formatReadableSizeWithBinarySuffix(limit), DB::ErrorCodes::MEMORY_LIMIT_EXCEEDED);
}
if (will_be > peak)

View File

@ -0,0 +1,36 @@
#include <cmath>
#include <sstream>
#include <iomanip>
#include <DB/Common/formatReadable.h>
static std::string formatReadable(double size, int precision, const char ** units, size_t units_size, double delimiter)
{
size_t i = 0;
for (; i + 1 < units_size && fabs(size) >= delimiter; ++i)
size /= delimiter;
std::stringstream ss;
ss << std::fixed << std::setprecision(precision) << size << units[i];
return ss.str();
}
std::string formatReadableSizeWithBinarySuffix(double value, int precision)
{
const char * units[] = {" B", " KiB", " MiB", " GiB", " TiB", " PiB", " EiB", " ZiB", " YiB"};
return formatReadable(value, precision, units, sizeof(units) / sizeof(units[0]), 1024);
}
std::string formatReadableSizeWithDecimalSuffix(double value, int precision)
{
const char * units[] = {" B", " KB", " MB", " GB", " TB", " PB", " EB", " ZB", " YB"};
return formatReadable(value, precision, units, sizeof(units) / sizeof(units[0]), 1000);
}
std::string formatReadableQuantity(double value, int precision)
{
const char * units[] = {"", " thousand", " million", " billion", " trillion", " quadrillion"};
return formatReadable(value, precision, units, sizeof(units) / sizeof(units[0]), 1000);
}

View File

@ -166,7 +166,7 @@ Block IProfilingBlockInputStream::read()
cancel();
}
progress(res.rowsInFirstColumn(), res.bytes());
progress(Progress(res.rowsInFirstColumn(), res.bytes()));
return res;
}
@ -295,36 +295,45 @@ void IProfilingBlockInputStream::checkQuota(Block & block)
}
void IProfilingBlockInputStream::progressImpl(size_t rows, size_t bytes)
void IProfilingBlockInputStream::progressImpl(const Progress & value)
{
/// Данные для прогресса берутся из листовых источников.
if (children.empty())
{
if (progress_callback)
progress_callback(rows, bytes);
progress_callback(value);
if (process_list_elem)
{
if (!process_list_elem->update(rows, bytes))
if (!process_list_elem->update(value))
cancel();
/// Общее количество данных, обработанных во всех листовых источниках, возможно, на удалённых серверах.
/// Общее количество данных, обработанных или предполагаемых к обработке во всех листовых источниках, возможно, на удалённых серверах.
size_t total_rows = process_list_elem->rows_processed;
size_t total_bytes = process_list_elem->bytes_processed;
size_t rows_processed = process_list_elem->progress.rows;
size_t bytes_processed = process_list_elem->progress.bytes;
size_t total_rows_estimate = std::max(process_list_elem->progress.rows, process_list_elem->progress.total_rows);
/** Проверяем ограничения на объём данных для чтения, скорость выполнения запроса, квоту на объём данных для чтения.
* NOTE: Может быть, имеет смысл сделать, чтобы они проверялись прямо в ProcessList?
*/
if (limits.mode == LIMITS_TOTAL
&& ((limits.max_rows_to_read && total_rows > limits.max_rows_to_read)
|| (limits.max_bytes_to_read && total_bytes > limits.max_bytes_to_read)))
&& ((limits.max_rows_to_read && total_rows_estimate > limits.max_rows_to_read)
|| (limits.max_bytes_to_read && bytes_processed > limits.max_bytes_to_read)))
{
if (limits.read_overflow_mode == OverflowMode::THROW)
throw Exception("Limit for rows to read exceeded: read " + toString(total_rows)
+ " rows, maximum: " + toString(limits.max_rows_to_read),
{
if (limits.max_rows_to_read && total_rows_estimate > limits.max_rows_to_read)
throw Exception("Limit for rows to read exceeded: " + toString(total_rows_estimate)
+ " rows read (or to read), maximum: " + toString(limits.max_rows_to_read),
ErrorCodes::TOO_MUCH_ROWS);
else
throw Exception("Limit for (uncompressed) bytes to read exceeded: " + toString(bytes_processed)
+ " bytes read, maximum: " + toString(limits.max_bytes_to_read),
ErrorCodes::TOO_MUCH_ROWS);
}
else if (limits.read_overflow_mode == OverflowMode::BREAK)
cancel();
else
@ -336,9 +345,9 @@ void IProfilingBlockInputStream::progressImpl(size_t rows, size_t bytes)
double total_elapsed = info.total_stopwatch.elapsedSeconds();
if (total_elapsed > limits.timeout_before_checking_execution_speed.totalMicroseconds() / 1000000.0
&& total_rows / total_elapsed < limits.min_execution_speed)
&& rows_processed / total_elapsed < limits.min_execution_speed)
{
throw Exception("Query is executing too slow: " + toString(total_rows / total_elapsed)
throw Exception("Query is executing too slow: " + toString(rows_processed / total_elapsed)
+ " rows/sec., minimum: " + toString(limits.min_execution_speed),
ErrorCodes::TOO_SLOW);
}
@ -346,7 +355,7 @@ void IProfilingBlockInputStream::progressImpl(size_t rows, size_t bytes)
if (quota != nullptr && limits.mode == LIMITS_TOTAL)
{
quota->checkAndAddReadRowsBytes(time(0), rows, bytes);
quota->checkAndAddReadRowsBytes(time(0), value.rows, value.bytes);
}
}
}

View File

@ -15,6 +15,20 @@ void registerFunctionsArray(FunctionFactory & factory)
factory.registerFunction("countEqual", F { return new FunctionCountEqual; });
factory.registerFunction("arrayEnumerate", F { return new FunctionArrayEnumerate; });
factory.registerFunction("arrayEnumerateUniq", F { return new FunctionArrayEnumerateUniq; });
factory.registerFunction("emptyArrayUInt8", F { return new FunctionEmptyArrayUInt8; });
factory.registerFunction("emptyArrayUInt16", F { return new FunctionEmptyArrayUInt16; });
factory.registerFunction("emptyArrayUInt32", F { return new FunctionEmptyArrayUInt32; });
factory.registerFunction("emptyArrayUInt64", F { return new FunctionEmptyArrayUInt64; });
factory.registerFunction("emptyArrayInt8", F { return new FunctionEmptyArrayInt8; });
factory.registerFunction("emptyArrayInt16", F { return new FunctionEmptyArrayInt16; });
factory.registerFunction("emptyArrayInt32", F { return new FunctionEmptyArrayInt32; });
factory.registerFunction("emptyArrayInt64", F { return new FunctionEmptyArrayInt64; });
factory.registerFunction("emptyArrayFloat32", F { return new FunctionEmptyArrayFloat32; });
factory.registerFunction("emptyArrayFloat64", F { return new FunctionEmptyArrayFloat64; });
factory.registerFunction("emptyArrayDate", F { return new FunctionEmptyArrayDate; });
factory.registerFunction("emptyArrayDateTime", F { return new FunctionEmptyArrayDateTime; });
factory.registerFunction("emptyArrayString", F { return new FunctionEmptyArrayString; });
#undef F
}

View File

@ -9,6 +9,8 @@ void registerFunctionsCoding(FunctionFactory & factory)
#define F [](const Context & context) -> IFunction *
factory.registerFunction("toStringCutToZero", F { return new FunctionToStringCutToZero; });
factory.registerFunction("IPv6NumToString", F { return new FunctionIPv6NumToString; });
factory.registerFunction("IPv6StringToNum", F { return new FunctionIPv6StringToNum; });
factory.registerFunction("IPv4NumToString", F { return new FunctionIPv4NumToString; });
factory.registerFunction("IPv4StringToNum", F { return new FunctionIPv4StringToNum; });
factory.registerFunction("hex", F { return new FunctionHex; });

View File

@ -10,7 +10,12 @@ void registerFunctionsHashing(FunctionFactory & factory)
#define F [](const Context & context) -> IFunction *
factory.registerFunction("halfMD5", F { return new FunctionHalfMD5; });
factory.registerFunction("MD5", F { return new FunctionMD5; });
factory.registerFunction("SHA1", F { return new FunctionSHA1; });
factory.registerFunction("SHA224", F { return new FunctionSHA224; });
factory.registerFunction("SHA256", F { return new FunctionSHA256; });
factory.registerFunction("sipHash64", F { return new FunctionSipHash64; });
factory.registerFunction("sipHash128", F { return new FunctionSipHash128; });
factory.registerFunction("cityHash64", F { return new FunctionCityHash64; });
factory.registerFunction("intHash32", F { return new FunctionIntHash32; });
factory.registerFunction("intHash64", F { return new FunctionIntHash64; });

View File

@ -14,6 +14,8 @@ void registerFunctionsHigherOrder(FunctionFactory & factory)
factory.registerFunction("arrayExists", F { return new FunctionArrayExists; });
factory.registerFunction("arrayAll", F { return new FunctionArrayAll; });
factory.registerFunction("arraySum", F { return new FunctionArraySum; });
factory.registerFunction("arrayFirst", F { return new FunctionArrayFirst; });
factory.registerFunction("arrayFirstIndex", F { return new FunctionArrayFirstIndex; });
#undef F
}

View File

@ -11,6 +11,7 @@ void registerFunctionsURL(FunctionFactory & factory)
factory.registerFunction("protocol", F { return new FunctionProtocol; });
factory.registerFunction("domain", F { return new FunctionDomain; });
factory.registerFunction("domainWithoutWWW", F { return new FunctionDomainWithoutWWW; });
factory.registerFunction("firstSignificantSubdomain", F { return new FunctionFirstSignificantSubdomain; });
factory.registerFunction("topLevelDomain", F { return new FunctionTopLevelDomain; });
factory.registerFunction("path", F { return new FunctionPath; });
factory.registerFunction("queryString", F { return new FunctionQueryString; });
@ -21,6 +22,7 @@ void registerFunctionsURL(FunctionFactory & factory)
factory.registerFunction("extractURLParameterNames", F { return new FunctionExtractURLParameterNames; });
factory.registerFunction("URLHierarchy", F { return new FunctionURLHierarchy; });
factory.registerFunction("URLPathHierarchy", F { return new FunctionURLPathHierarchy; });
factory.registerFunction("cutToFirstSignificantSubdomain", F { return new FunctionCutToFirstSignificantSubdomain; });
factory.registerFunction("cutWWW", F { return new FunctionCutWWW; });
factory.registerFunction("cutQueryString", F { return new FunctionCutQueryString; });
factory.registerFunction("cutFragment", F { return new FunctionCutFragment; });

View File

@ -125,7 +125,18 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod(const ConstColu
/// Если есть один числовой ключ, который помещается в 64 бита
if (keys_size == 1 && key_columns[0]->isNumeric())
{
size_t size_of_field = key_columns[0]->sizeOfField();
if (size_of_field == 1)
return AggregatedDataVariants::KEY_8;
if (size_of_field == 2)
return AggregatedDataVariants::KEY_16;
if (size_of_field == 4)
return AggregatedDataVariants::KEY_32;
if (size_of_field == 8)
return AggregatedDataVariants::KEY_64;
throw Exception("Logical error: numeric column has sizeOfField not in 1, 2, 4, 8.", ErrorCodes::LOGICAL_ERROR);
}
/// Если ключи помещаются в 128 бит, будем использовать хэш-таблицу по упакованным в 128-бит ключам
if (keys_fit_128_bits)
@ -167,8 +178,12 @@ void Aggregator::createAggregateStates(AggregateDataPtr & aggregate_data) const
}
/** Интересно - если убрать noinline, то gcc зачем-то инлайнит эту функцию, и производительность уменьшается (~10%).
* (Возможно из-за того, что после инлайна этой функции, перестают инлайниться более внутренние функции.)
* Инлайнить не имеет смысла, так как внутренний цикл находится целиком внутри этой функции.
*/
template <typename Method>
void Aggregator::executeImpl(
void NO_INLINE Aggregator::executeImpl(
Method & method,
Arena * aggregates_pool,
size_t rows,
@ -191,7 +206,7 @@ void Aggregator::executeImpl(
/// Получаем ключ для вставки в хэш-таблицу.
typename Method::Key key = method.getKey(key_columns, keys_size, i, key_sizes, keys);
if (!no_more_keys) /// Вставляем.
if (Method::never_overflows || !no_more_keys) /// Вставляем.
method.data.emplace(key, it, inserted);
else
{
@ -203,7 +218,7 @@ void Aggregator::executeImpl(
}
/// Если ключ не поместился, и данные не надо агрегировать в отдельную строку, то делать нечего.
if (overflow && !overflow_row)
if (!Method::never_overflows && overflow && !overflow_row)
continue;
/// Если вставили новый ключ - инициализируем состояния агрегатных функций, и возможно, что-нибудь связанное с ключом.
@ -216,7 +231,7 @@ void Aggregator::executeImpl(
createAggregateStates(aggregate_data);
}
AggregateDataPtr value = !overflow ? Method::getAggregateData(it->second) : overflow_row;
AggregateDataPtr value = (Method::never_overflows || !overflow) ? Method::getAggregateData(it->second) : overflow_row;
/// Добавляем значения в агрегатные функции.
for (size_t j = 0; j < aggregates_size; ++j)
@ -226,7 +241,7 @@ void Aggregator::executeImpl(
template <typename Method>
void Aggregator::convertToBlockImpl(
void NO_INLINE Aggregator::convertToBlockImpl(
Method & method,
ColumnPlainPtrs & key_columns,
AggregateColumnsData & aggregate_columns,
@ -262,7 +277,7 @@ void Aggregator::convertToBlockImpl(
template <typename Method>
void Aggregator::mergeDataImpl(
void NO_INLINE Aggregator::mergeDataImpl(
Method & method_dst,
Method & method_src) const
{
@ -294,7 +309,7 @@ void Aggregator::mergeDataImpl(
template <typename Method>
void Aggregator::mergeStreamsImpl(
void NO_INLINE Aggregator::mergeStreamsImpl(
Method & method,
Arena * aggregates_pool,
size_t start_row,
@ -336,7 +351,7 @@ void Aggregator::mergeStreamsImpl(
template <typename Method>
void Aggregator::destroyImpl(
void NO_INLINE Aggregator::destroyImpl(
Method & method) const
{
for (typename Method::const_iterator it = method.data.begin(); it != method.data.end(); ++it)
@ -372,8 +387,14 @@ bool Aggregator::executeOnBlock(Block & block, AggregatedDataVariants & result,
/// Запоминаем столбцы, с которыми будем работать
for (size_t i = 0; i < keys_size; ++i)
{
key_columns[i] = block.getByPosition(keys[i]).column;
if (key_columns[i]->isConst())
throw Exception("Constants is not allowed as GROUP BY keys"
" (but all of them must be eliminated in ExpressionAnalyzer)", ErrorCodes::ILLEGAL_COLUMN);
}
for (size_t i = 0; i < aggregates_size; ++i)
{
for (size_t j = 0; j < aggregate_columns[i].size(); ++j)
@ -434,7 +455,16 @@ bool Aggregator::executeOnBlock(Block & block, AggregatedDataVariants & result,
AggregateDataPtr overflow_row_ptr = overflow_row ? result.without_key : nullptr;
if (result.type == AggregatedDataVariants::KEY_64)
if (result.type == AggregatedDataVariants::KEY_8)
executeImpl(*result.key8, result.aggregates_pool, rows, key_columns, aggregate_columns,
result.key_sizes, key, no_more_keys, overflow_row_ptr);
else if (result.type == AggregatedDataVariants::KEY_16)
executeImpl(*result.key16, result.aggregates_pool, rows, key_columns, aggregate_columns,
result.key_sizes, key, no_more_keys, overflow_row_ptr);
else if (result.type == AggregatedDataVariants::KEY_32)
executeImpl(*result.key32, result.aggregates_pool, rows, key_columns, aggregate_columns,
result.key_sizes, key, no_more_keys, overflow_row_ptr);
else if (result.type == AggregatedDataVariants::KEY_64)
executeImpl(*result.key64, result.aggregates_pool, rows, key_columns, aggregate_columns,
result.key_sizes, key, no_more_keys, overflow_row_ptr);
else if (result.type == AggregatedDataVariants::KEY_STRING)
@ -590,7 +620,16 @@ Block Aggregator::convertToBlock(AggregatedDataVariants & data_variants, bool fi
size_t start_row = overflow_row ? 1 : 0;
if (data_variants.type == AggregatedDataVariants::KEY_64)
if (data_variants.type == AggregatedDataVariants::KEY_8)
convertToBlockImpl(*data_variants.key8, key_columns, aggregate_columns,
final_aggregate_columns, data_variants.key_sizes, start_row, final);
else if (data_variants.type == AggregatedDataVariants::KEY_16)
convertToBlockImpl(*data_variants.key16, key_columns, aggregate_columns,
final_aggregate_columns, data_variants.key_sizes, start_row, final);
else if (data_variants.type == AggregatedDataVariants::KEY_32)
convertToBlockImpl(*data_variants.key32, key_columns, aggregate_columns,
final_aggregate_columns, data_variants.key_sizes, start_row, final);
else if (data_variants.type == AggregatedDataVariants::KEY_64)
convertToBlockImpl(*data_variants.key64, key_columns, aggregate_columns,
final_aggregate_columns, data_variants.key_sizes, start_row, final);
else if (data_variants.type == AggregatedDataVariants::KEY_STRING)
@ -694,7 +733,13 @@ AggregatedDataVariantsPtr Aggregator::merge(ManyAggregatedDataVariants & data_va
current_data = nullptr;
}
if (res->type == AggregatedDataVariants::KEY_64)
if (res->type == AggregatedDataVariants::KEY_8)
mergeDataImpl(*res->key8, *current.key8);
else if (res->type == AggregatedDataVariants::KEY_16)
mergeDataImpl(*res->key16, *current.key16);
else if (res->type == AggregatedDataVariants::KEY_32)
mergeDataImpl(*res->key32, *current.key32);
else if (res->type == AggregatedDataVariants::KEY_64)
mergeDataImpl(*res->key64, *current.key64);
else if (res->type == AggregatedDataVariants::KEY_STRING)
mergeDataImpl(*res->key_string, *current.key_string);
@ -782,7 +827,13 @@ void Aggregator::merge(BlockInputStreamPtr stream, AggregatedDataVariants & resu
size_t start_row = overflow_row ? 1 : 0;
if (result.type == AggregatedDataVariants::KEY_64)
if (result.type == AggregatedDataVariants::KEY_8)
mergeStreamsImpl(*result.key8, result.aggregates_pool, start_row, rows, key_columns, aggregate_columns, key_sizes, key);
else if (result.type == AggregatedDataVariants::KEY_16)
mergeStreamsImpl(*result.key16, result.aggregates_pool, start_row, rows, key_columns, aggregate_columns, key_sizes, key);
else if (result.type == AggregatedDataVariants::KEY_32)
mergeStreamsImpl(*result.key32, result.aggregates_pool, start_row, rows, key_columns, aggregate_columns, key_sizes, key);
else if (result.type == AggregatedDataVariants::KEY_64)
mergeStreamsImpl(*result.key64, result.aggregates_pool, start_row, rows, key_columns, aggregate_columns, key_sizes, key);
else if (result.type == AggregatedDataVariants::KEY_STRING)
mergeStreamsImpl(*result.key_string, result.aggregates_pool, start_row, rows, key_columns, aggregate_columns, key_sizes, key);
@ -818,7 +869,13 @@ void Aggregator::destroyAllAggregateStates(AggregatedDataVariants & result)
aggregate_functions[i]->destroy(res_data + offsets_of_aggregate_states[i]);
}
if (result.type == AggregatedDataVariants::KEY_64)
if (result.type == AggregatedDataVariants::KEY_8)
destroyImpl(*result.key8);
else if (result.type == AggregatedDataVariants::KEY_16)
destroyImpl(*result.key16);
else if (result.type == AggregatedDataVariants::KEY_32)
destroyImpl(*result.key32);
else if (result.type == AggregatedDataVariants::KEY_64)
destroyImpl(*result.key64);
else if (result.type == AggregatedDataVariants::KEY_STRING)
destroyImpl(*result.key_string);

View File

@ -77,7 +77,7 @@ StoragePtr InterpreterCreateQuery::execute(bool assume_metadata_exists)
SharedPtr<InterpreterSelectQuery> interpreter_select;
Block select_sample;
/// Для таблиц типа вью, чтобы получить столбцы, может понадобиться sample block.
/// Для таблиц типа view, чтобы получить столбцы, может понадобиться sample_block.
if (create.select && (!create.attach || (!create.columns && (create.is_view || create.is_materialized_view))))
{
interpreter_select = new InterpreterSelectQuery(create.select, context);

View File

@ -48,7 +48,10 @@ void SplittingAggregator::execute(BlockInputStreamPtr stream, ManyAggregatedData
method = chooseAggregationMethod(key_columns, key_sizes);
/// Подготавливаем массивы, куда будут складываться ключи или хэши от ключей.
if (method == AggregatedDataVariants::KEY_64)
if (method == AggregatedDataVariants::KEY_8 /// TODO не использовать SplittingAggregator для маленьких ключей.
|| method == AggregatedDataVariants::KEY_16
|| method == AggregatedDataVariants::KEY_32
|| method == AggregatedDataVariants::KEY_64)
{
keys64.resize(rows);
}
@ -96,7 +99,7 @@ void SplittingAggregator::execute(BlockInputStreamPtr stream, ManyAggregatedData
pool.wait();
rethrowFirstException(exceptions);
rethrowFirstException(exceptions); /// TODO Заменить на future, packaged_task
/// Параллельно агрегируем в независимые хэш-таблицы
@ -150,14 +153,17 @@ void SplittingAggregator::calculateHashesThread(Block & block, size_t begin, siz
try
{
if (method == AggregatedDataVariants::KEY_64)
if (method == AggregatedDataVariants::KEY_8
|| method == AggregatedDataVariants::KEY_16
|| method == AggregatedDataVariants::KEY_32
|| method == AggregatedDataVariants::KEY_64)
{
const IColumn & column = *key_columns[0];
for (size_t i = begin; i < end; ++i)
{
keys64[i] = column.get64(i);
thread_nums[i] = intHash32<0xd1f93e3190506c7cULL>(keys64[i]) % threads;
keys64[i] = column.get64(i); /// TODO Убрать виртуальный вызов
thread_nums[i] = intHash32<0xd1f93e3190506c7cULL>(keys64[i]) % threads; /// TODO более эффективная хэш-функция
}
}
else if (method == AggregatedDataVariants::KEY_STRING)
@ -216,24 +222,8 @@ void SplittingAggregator::calculateHashesThread(Block & block, size_t begin, siz
}
void SplittingAggregator::aggregateThread(
Block & block, AggregatedDataVariants & result, size_t thread_no, ExceptionPtr & exception, MemoryTracker * memory_tracker)
{
current_memory_tracker = memory_tracker;
try
{
result.aggregator = this;
/** Используется, если есть ограничение на максимальное количество строк при агрегации,
* и если group_by_overflow_mode == ANY.
* В этом случае, новые ключи не добавляются в набор, а производится агрегация только по
* ключам, которые уже успели попасть в набор.
*/
bool no_more_keys = max_rows_to_group_by && size_of_all_results > max_rows_to_group_by;
size_t old_result_size = result.size();
if (method == AggregatedDataVariants::KEY_64)
template <typename FieldType>
void SplittingAggregator::aggregateOneNumber(AggregatedDataVariants & result, size_t thread_no, bool no_more_keys)
{
AggregatedDataWithUInt64Key & res = result.key64->data;
@ -269,6 +259,33 @@ void SplittingAggregator::aggregateThread(
aggregate_functions[j]->add(it->second + offsets_of_aggregate_states[j], &aggregate_columns[j][0], i);
}
}
void SplittingAggregator::aggregateThread(
Block & block, AggregatedDataVariants & result, size_t thread_no, ExceptionPtr & exception, MemoryTracker * memory_tracker)
{
current_memory_tracker = memory_tracker;
try
{
result.aggregator = this;
/** Используется, если есть ограничение на максимальное количество строк при агрегации,
* и если group_by_overflow_mode == ANY.
* В этом случае, новые ключи не добавляются в набор, а производится агрегация только по
* ключам, которые уже успели попасть в набор.
*/
bool no_more_keys = max_rows_to_group_by && size_of_all_results > max_rows_to_group_by;
size_t old_result_size = result.size();
if (method == AggregatedDataVariants::KEY_8)
aggregateOneNumber<UInt8>(result, thread_no, no_more_keys);
else if (method == AggregatedDataVariants::KEY_16)
aggregateOneNumber<UInt16>(result, thread_no, no_more_keys);
else if (method == AggregatedDataVariants::KEY_32)
aggregateOneNumber<UInt32>(result, thread_no, no_more_keys);
else if (method == AggregatedDataVariants::KEY_64)
aggregateOneNumber<UInt64>(result, thread_no, no_more_keys);
else if (method == AggregatedDataVariants::KEY_STRING)
{
AggregatedDataWithStringKey & res = result.key_string->data;

View File

@ -1,7 +1,5 @@
#include <iomanip>
#include <boost/bind.hpp>
#include <Poco/Net/NetException.h>
#include <Yandex/Revision.h>
@ -85,9 +83,7 @@ void TCPHandler::runImpl()
sendHello();
connection_context.setProgressCallback([this] (const size_t rows, const size_t bytes) {
return this->updateProgress(rows, bytes);
});
connection_context.setProgressCallback([this] (const Progress & value) { return this->updateProgress(value); });
while (1)
{
@ -125,6 +121,7 @@ void TCPHandler::runImpl()
/// Очищаем, так как, получая данные внешних таблиц, мы получили пустой блок.
/// А значит, stream помечен как cancelled и читать из него нельзя.
state.block_in = nullptr;
state.maybe_compressed_in = nullptr; /// Для более корректного учёта MemoryTracker-ом.
/// Обрабатываем Query
state.io = executeQuery(state.query, query_context, false, state.stage);
@ -286,7 +283,7 @@ void TCPHandler::processOrdinaryQuery()
}
else
{
if (state.rows_processed && after_send_progress.elapsed() / 1000 >= query_context.getSettingsRef().interactive_delay)
if (state.progress.rows && after_send_progress.elapsed() / 1000 >= query_context.getSettingsRef().interactive_delay)
{
/// Прошло некоторое время и есть прогресс.
after_send_progress.restart();
@ -691,21 +688,17 @@ void TCPHandler::sendEndOfStream()
}
void TCPHandler::updateProgress(size_t rows, size_t bytes)
void TCPHandler::updateProgress(const Progress & value)
{
__sync_fetch_and_add(&state.rows_processed, rows);
__sync_fetch_and_add(&state.bytes_processed, bytes);
state.progress.incrementPiecewiseAtomically(value);
}
void TCPHandler::sendProgress()
{
size_t rows_processed = __sync_fetch_and_and(&state.rows_processed, 0);
size_t bytes_processed = __sync_fetch_and_and(&state.bytes_processed, 0);
writeVarUInt(Protocol::Server::Progress, *out);
Progress progress(rows_processed, bytes_processed);
progress.write(*out);
Progress increment = state.progress.fetchAndResetPiecewiseAtomically();
increment.write(*out, client_revision);
out->next();
}

View File

@ -23,8 +23,8 @@ struct QueryState
/// Идентификатор запроса.
String query_id;
QueryProcessingStage::Enum stage;
Protocol::Compression::Enum compression;
QueryProcessingStage::Enum stage = QueryProcessingStage::Complete;
Protocol::Compression::Enum compression = Protocol::Compression::Disable;
/// Откуда читать данные для INSERT-а.
SharedPtr<ReadBuffer> maybe_compressed_in;
@ -40,24 +40,29 @@ struct QueryState
BlockIO io;
/// Отменен ли запрос
bool is_cancelled;
bool is_cancelled = false;
/// Пустой или нет
bool is_empty;
bool is_empty = true;
/// Данные были отправлены.
bool sent_all_data;
bool sent_all_data = false;
/// Запрос на вставку или нет.
bool is_insert;
bool is_insert = false;
/// Для вывода прогресса - разница после предыдущей отправки прогресса.
volatile size_t rows_processed;
volatile size_t bytes_processed;
Progress progress;
QueryState() : query_id(""), stage(QueryProcessingStage::Complete), compression(Protocol::Compression::Disable),
is_cancelled(false), is_empty(true), sent_all_data(false), is_insert(false), rows_processed(0), bytes_processed(0) {}
void reset()
{
/** process_list_entry также включает/выключает учёт памяти MemoryTracker-ом.
* Члены maybe_compressed_in, block_in, maybe_compressed_out, block_out
* могли быть инициализированы до io, и выделенная в них память могла не быть учтена MemoryTracker-ом.
* Если эти члены будут уничтожены раньше, то освобождение памяти будет учтено MemoryTracker-ом,
* и вычисленный расход памяти может оказаться отрицательным (это не проблема, но некрасиво).
* Поэтому, сначала уничтожим process_list_entry.
*/
io.process_list_entry = nullptr;
*this = QueryState();
}
@ -133,7 +138,7 @@ private:
bool isQueryCancelled();
/// Эта функция вызывается из разных потоков.
void updateProgress(size_t rows, size_t bytes);
void updateProgress(const Progress & value);
/// Вывести информацию о скорости выполнения SELECT запроса.
void logProfileInfo(Stopwatch & watch, IBlockInputStream & in);

View File

@ -350,10 +350,12 @@ MergeTreeData::DataPartPtr MergeTreeDataMerger::mergeParts(
auto input = stdext::make_unique<MergeTreeBlockInputStream>(
data.getFullPath() + parts[i]->name + '/', DEFAULT_MERGE_BLOCK_SIZE, union_column_names, data,
parts[i], ranges, false, nullptr, "");
input->setProgressCallback([&merge_entry, rows_total] (const std::size_t rows, const std::size_t bytes) {
const auto new_rows_read = __sync_add_and_fetch(&merge_entry->rows_read, rows);
input->setProgressCallback([&merge_entry, rows_total] (const Progress & value)
{
const auto new_rows_read = __sync_add_and_fetch(&merge_entry->rows_read, value.rows);
merge_entry->progress = static_cast<Float64>(new_rows_read) / rows_total;
__sync_add_and_fetch(&merge_entry->bytes_read_uncompressed, bytes);
__sync_add_and_fetch(&merge_entry->bytes_read_uncompressed, value.bytes);
});
src_streams.push_back(new ExpressionBlockInputStream(input.release(), data.getPrimaryExpression()));

View File

@ -0,0 +1,432 @@
#include <DB/Interpreters/InterpreterSelectQuery.h>
#include <DB/Interpreters/InterpreterInsertQuery.h>
#include <DB/Interpreters/InterpreterAlterQuery.h>
#include <DB/Storages/StorageBuffer.h>
#include <DB/Parsers/ASTInsertQuery.h>
#include <Poco/Ext/ThreadNumber.h>
#include <statdaemons/ext/range.hpp>
namespace DB
{
StoragePtr StorageBuffer::create(const std::string & name_, NamesAndTypesListPtr columns_, Context & context_,
size_t num_shards_, const Thresholds & min_thresholds_, const Thresholds & max_thresholds_,
const String & destination_database_, const String & destination_table_)
{
return (new StorageBuffer{
name_, columns_, context_, num_shards_, min_thresholds_, max_thresholds_, destination_database_, destination_table_})->thisPtr();
}
StorageBuffer::StorageBuffer(const std::string & name_, NamesAndTypesListPtr columns_, Context & context_,
size_t num_shards_, const Thresholds & min_thresholds_, const Thresholds & max_thresholds_,
const String & destination_database_, const String & destination_table_)
: name(name_), columns(columns_), context(context_),
num_shards(num_shards_), buffers(num_shards_),
min_thresholds(min_thresholds_), max_thresholds(max_thresholds_),
destination_database(destination_database_), destination_table(destination_table_),
no_destination(destination_database.empty() && destination_table.empty()),
log(&Logger::get("StorageBuffer (" + name + ")")),
flush_thread([this] { flushThread(); })
{
}
/// Читает из одного буфера (из одного блока) под его mutex-ом.
class BufferBlockInputStream : public IProfilingBlockInputStream
{
public:
BufferBlockInputStream(const Names & column_names_, StorageBuffer::Buffer & buffer_)
: column_names(column_names_.begin(), column_names_.end()), buffer(buffer_) {}
String getName() const { return "BufferBlockInputStream"; }
String getID() const
{
std::stringstream res;
res << "Buffer(" << &buffer;
for (const auto & name : column_names)
res << ", " << name;
res << ")";
return res.str();
}
protected:
Block readImpl()
{
Block res;
if (has_been_read)
return res;
has_been_read = true;
std::lock_guard<std::mutex> lock(buffer.mutex);
if (!buffer.data)
return res;
for (size_t i = 0, size = buffer.data.columns(); i < size; ++i)
{
auto & col = buffer.data.unsafeGetByPosition(i);
if (column_names.count(col.name))
res.insert(col);
}
return res;
}
private:
NameSet column_names;
StorageBuffer::Buffer & buffer;
bool has_been_read = false;
};
BlockInputStreams StorageBuffer::read(
const Names & column_names,
ASTPtr query,
const Settings & settings,
QueryProcessingStage::Enum & processed_stage,
size_t max_block_size,
unsigned threads)
{
processed_stage = QueryProcessingStage::FetchColumns;
BlockInputStreams streams_from_dst;
if (!no_destination)
streams_from_dst = context.getTable(destination_database, destination_table)->read(
column_names, query, settings, processed_stage, max_block_size, threads);
BlockInputStreams streams_from_buffers;
streams_from_buffers.reserve(num_shards);
for (auto & buf : buffers)
streams_from_buffers.push_back(new BufferBlockInputStream(column_names, buf));
/** Если источники из таблицы были обработаны до какой-то не начальной стадии выполнения запроса,
* то тогда источники из буферов надо тоже обернуть в конвейер обработки до той же стадии.
*/
if (processed_stage > QueryProcessingStage::FetchColumns)
for (auto & stream : streams_from_buffers)
stream = InterpreterSelectQuery(query, context, processed_stage, 0, stream).execute();
streams_from_dst.insert(streams_from_dst.end(), streams_from_buffers.begin(), streams_from_buffers.end());
return streams_from_dst;
}
static void appendBlock(const Block & from, Block & to)
{
size_t rows = from.rows();
for (size_t column_no = 0, columns = to.columns(); column_no < columns; ++column_no)
{
const IColumn & col_from = *from.getByPosition(column_no).column.get();
IColumn & col_to = *to.getByPosition(column_no).column.get();
if (col_from.getName() != col_to.getName())
throw Exception("Cannot append block to another: different type of columns at index " + toString(column_no)
+ ". Block 1: " + from.dumpStructure() + ". Block 2: " + to.dumpStructure(), ErrorCodes::BLOCKS_HAS_DIFFERENT_STRUCTURE);
for (size_t row_no = 0; row_no < rows; ++row_no)
col_to.insertFrom(col_from, row_no);
}
}
class BufferBlockOutputStream : public IBlockOutputStream
{
public:
BufferBlockOutputStream(StorageBuffer & storage_) : storage(storage_) {}
void write(const Block & block)
{
if (!block)
return;
size_t rows = block.rowsInFirstColumn();
if (!rows)
return;
StoragePtr destination;
if (!storage.no_destination)
{
destination = storage.context.tryGetTable(storage.destination_database, storage.destination_table);
/// Проверяем структуру таблицы.
try
{
destination->check(block, true);
}
catch (Exception & e)
{
e.addMessage("(when looking at destination table " + storage.destination_database + "." + storage.destination_table + ")");
throw;
}
}
size_t bytes = block.bytes();
/// Если блок уже превышает максимальные ограничения, то пишем минуя буфер.
if (rows > storage.max_thresholds.rows || bytes > storage.max_thresholds.bytes)
{
if (!storage.no_destination)
{
LOG_TRACE(storage.log, "Writing block with " << rows << " rows, " << bytes << " bytes directly.");
storage.writeBlockToDestination(block, destination);
}
return;
}
/// Распределяем нагрузку по шардам по номеру потока.
const auto start_shard_num = Poco::ThreadNumber::get() % storage.num_shards;
/// Перебираем буферы по кругу, пытаясь заблокировать mutex. Не более одного круга.
auto shard_num = start_shard_num;
size_t try_no = 0;
for (; try_no != storage.num_shards; ++try_no)
{
std::unique_lock<std::mutex> lock(storage.buffers[shard_num].mutex, std::try_to_lock_t());
if (lock.owns_lock())
{
insertIntoBuffer(block, storage.buffers[shard_num], std::move(lock));
break;
}
++shard_num;
if (shard_num == storage.num_shards)
shard_num = 0;
}
/// Если так и не удалось ничего сразу заблокировать, то будем ждать на mutex-е.
if (try_no == storage.num_shards)
insertIntoBuffer(block, storage.buffers[start_shard_num], std::unique_lock<std::mutex>(storage.buffers[start_shard_num].mutex));
}
private:
StorageBuffer & storage;
void insertIntoBuffer(const Block & block, StorageBuffer::Buffer & buffer, std::unique_lock<std::mutex> && lock)
{
if (!buffer.data)
{
buffer.first_write_time = time(0);
buffer.data = block.cloneEmpty();
}
/// Если после вставки в буфер, ограничения будут превышены, то будем сбрасывать буфер.
if (storage.checkThresholds(buffer, time(0), block.rowsInFirstColumn(), block.bytes()))
{
/// Вытащим из буфера блок, заменим буфер на пустой. После этого можно разблокировать mutex.
Block block_to_write;
buffer.data.swap(block_to_write);
buffer.first_write_time = 0;
lock.unlock();
if (!storage.no_destination)
{
appendBlock(block, block_to_write);
storage.writeBlockToDestination(block_to_write,
storage.context.tryGetTable(storage.destination_database, storage.destination_table));
}
}
else
appendBlock(block, buffer.data);
}
};
BlockOutputStreamPtr StorageBuffer::write(ASTPtr query)
{
return new BufferBlockOutputStream(*this);
}
void StorageBuffer::shutdown()
{
shutdown_event.set();
if (flush_thread.joinable())
flush_thread.join();
optimize();
}
bool StorageBuffer::optimize()
{
for (auto & buf : buffers)
flushBuffer(buf, false);
return true;
}
bool StorageBuffer::checkThresholds(Buffer & buffer, time_t current_time, size_t additional_rows, size_t additional_bytes)
{
time_t time_passed = 0;
if (buffer.first_write_time)
time_passed = current_time - buffer.first_write_time;
size_t rows = buffer.data.rowsInFirstColumn() + additional_rows;
size_t bytes = buffer.data.bytes() + additional_bytes;
bool res =
(time_passed > min_thresholds.time && rows > min_thresholds.rows && bytes > min_thresholds.bytes)
|| (time_passed > max_thresholds.time || rows > max_thresholds.rows || bytes > max_thresholds.bytes);
if (res)
LOG_TRACE(log, "Flushing buffer with " << rows << " rows, " << bytes << " bytes, age " << time_passed << " seconds.");
return res;
}
void StorageBuffer::flushBuffer(Buffer & buffer, bool check_thresholds)
{
Block block_to_write;
time_t current_time = check_thresholds ? time(0) : 0;
/** Довольно много проблем из-за того, что хотим блокировать буфер лишь на короткое время.
* Под блокировкой, получаем из буфера блок, и заменяем в нём блок на новый пустой.
* Затем пытаемся записать полученный блок в подчинённую таблицу.
* Если этого не получилось - кладём данные обратно в буфер.
* Замечание: может быть, стоит избавиться от такой сложности.
*/
{
std::lock_guard<std::mutex> lock(buffer.mutex);
if (check_thresholds && !checkThresholds(buffer, current_time))
return;
buffer.data.swap(block_to_write);
buffer.first_write_time = 0;
}
if (no_destination)
return;
try
{
writeBlockToDestination(block_to_write, context.tryGetTable(destination_database, destination_table));
}
catch (...)
{
/// Возвращаем блок на место в буфер.
std::lock_guard<std::mutex> lock(buffer.mutex);
if (buffer.data)
{
/** Так как структура таблицы не изменилась, можно склеить два блока.
* Замечание: остаётся проблема - из-за того, что в разных попытках вставляются разные блоки,
* теряется идемпотентность вставки в ReplicatedMergeTree.
*/
appendBlock(block_to_write, buffer.data);
buffer.data.swap(block_to_write);
}
if (!buffer.first_write_time)
buffer.first_write_time = current_time;
/// Через некоторое время будет следующая попытка записать.
throw;
}
}
void StorageBuffer::writeBlockToDestination(const Block & block, StoragePtr table)
{
if (no_destination || !block)
return;
if (!table)
{
LOG_ERROR(log, "Destination table " << destination_database << "." << destination_table << " doesn't exist. Block of data is discarded.");
return;
}
ASTInsertQuery * insert = new ASTInsertQuery;
ASTPtr ast_ptr = insert;
insert->database = destination_database;
insert->table = destination_table;
/** Будем вставлять столбцы, являющиеся пересечением множества столбцов таблицы-буфера и подчинённой таблицы.
* Это позволит поддержать часть случаев (но не все), когда структура таблицы не совпадает.
*/
Block structure_of_destination_table = table->getSampleBlock();
Names columns_intersection;
columns_intersection.reserve(block.columns());
for (size_t i : ext::range(0, structure_of_destination_table.columns()))
{
auto dst_col = structure_of_destination_table.unsafeGetByPosition(i);
if (block.has(dst_col.name))
{
if (block.getByName(dst_col.name).type->getName() != dst_col.type->getName())
{
LOG_ERROR(log, "Destination table " << destination_database << "." << destination_table
<< " have different type of column " << dst_col.name << ". Block of data is discarded.");
return;
}
columns_intersection.push_back(dst_col.name);
}
}
if (columns_intersection.empty())
{
LOG_ERROR(log, "Destination table " << destination_database << "." << destination_table << " have no common columns with block in buffer. Block of data is discarded.");
return;
}
if (columns_intersection.size() != block.columns())
LOG_WARNING(log, "Not all columns from block in buffer exist in destination table "
<< destination_database << "." << destination_table << ". Some columns are discarded.");
ASTExpressionList * list_of_columns = new ASTExpressionList;
insert->columns = list_of_columns;
list_of_columns->children.reserve(columns_intersection.size());
for (const String & column : columns_intersection)
list_of_columns->children.push_back(new ASTIdentifier(StringRange(), column, ASTIdentifier::Column));
InterpreterInsertQuery interpreter{ast_ptr, context};
auto block_io = interpreter.execute();
block_io.out->writePrefix();
block_io.out->write(block);
block_io.out->writeSuffix();
}
void StorageBuffer::flushThread()
{
do
{
try
{
optimize();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
} while (!shutdown_event.tryWait(1000));
}
void StorageBuffer::alter(const AlterCommands & params, const String & database_name, const String & table_name, Context & context)
{
auto lock = lockStructureForAlter();
/// Чтобы не осталось блоков старой структуры.
optimize();
params.apply(*columns);
InterpreterAlterQuery::updateMetadata(database_name, table_name, *columns, context);
}
}

View File

@ -11,6 +11,7 @@
#include <DB/Storages/StorageLog.h>
#include <DB/Storages/StorageTinyLog.h>
#include <DB/Storages/StorageMemory.h>
#include <DB/Storages/StorageBuffer.h>
#include <DB/Storages/StorageNull.h>
#include <DB/Storages/StorageMerge.h>
#include <DB/Storages/StorageMergeTree.h>
@ -190,6 +191,46 @@ StoragePtr StorageFactory::get(
return StorageDistributed::create(
table_name, columns, remote_database, remote_table, cluster_name, context, sharding_key, data_path);
}
else if (name == "Buffer")
{
/** Buffer(db, table, num_buckets, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes)
*
* db, table - в какую таблицу сбрасывать данные из буфера.
* num_buckets - уровень параллелизма.
* min_time, max_time, min_rows, max_rows, min_bytes, max_bytes - условия вытеснения из буфера.
*/
ASTs & args_func = typeid_cast<ASTFunction &>(*typeid_cast<ASTCreateQuery &>(*query).storage).children;
if (args_func.size() != 1)
throw Exception("Storage Buffer requires 9 parameters: "
" destination database, destination table, num_buckets, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
ASTs & args = typeid_cast<ASTExpressionList &>(*args_func.at(0)).children;
if (args.size() != 9)
throw Exception("Storage Buffer requires 9 parameters: "
" destination_database, destination_table, num_buckets, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
String destination_database = reinterpretAsIdentifier(args[0], local_context).name;
String destination_table = typeid_cast<ASTIdentifier &>(*args[1]).name;
size_t num_buckets = apply_visitor(FieldVisitorConvertToNumber<size_t>(), typeid_cast<ASTLiteral &>(*args[2]).value);
time_t min_time = apply_visitor(FieldVisitorConvertToNumber<size_t>(), typeid_cast<ASTLiteral &>(*args[3]).value);
time_t max_time = apply_visitor(FieldVisitorConvertToNumber<size_t>(), typeid_cast<ASTLiteral &>(*args[4]).value);
size_t min_rows = apply_visitor(FieldVisitorConvertToNumber<size_t>(), typeid_cast<ASTLiteral &>(*args[5]).value);
size_t max_rows = apply_visitor(FieldVisitorConvertToNumber<size_t>(), typeid_cast<ASTLiteral &>(*args[6]).value);
size_t min_bytes = apply_visitor(FieldVisitorConvertToNumber<size_t>(), typeid_cast<ASTLiteral &>(*args[7]).value);
size_t max_bytes = apply_visitor(FieldVisitorConvertToNumber<size_t>(), typeid_cast<ASTLiteral &>(*args[8]).value);
return StorageBuffer::create(
table_name, columns, context,
num_buckets, {min_time, min_rows, min_bytes}, {max_time, max_rows, max_bytes},
destination_database, destination_table);
}
else if (endsWith(name, "MergeTree"))
{
/** Движки [Replicated][Summing|Collapsing|Aggregating|]MergeTree (8 комбинаций)

View File

@ -43,8 +43,8 @@ String LogBlockInputStream::getID() const
std::stringstream res;
res << "Log(" << storage.getTableName() << ", " << &storage << ", " << mark_number << ", " << rows_limit;
for (size_t i = 0; i < column_names.size(); ++i)
res << ", " << column_names[i];
for (const auto & name : column_names)
res << ", " << name;
res << ")";
return res.str();

View File

@ -34,7 +34,7 @@ BlockInputStreams StorageSystemOne::read(
ColumnWithNameAndType col;
col.name = "dummy";
col.type = new DataTypeUInt8;
col.column = new ColumnConstUInt8(1, 0);
col.column = ColumnConstUInt8(1, 0).convertToFullColumn();
block.insert(col);
return BlockInputStreams(1, new OneBlockInputStream(block));

View File

@ -17,6 +17,7 @@ StorageSystemProcesses::StorageSystemProcesses(const std::string & name_, const
{ "elapsed", new DataTypeFloat64 },
{ "rows_read", new DataTypeUInt64 },
{ "bytes_read", new DataTypeUInt64 },
{ "total_rows_approx", new DataTypeUInt64 },
{ "memory_usage", new DataTypeUInt64 },
{ "query", new DataTypeString },
{ "query_id", new DataTypeString }
@ -42,20 +43,19 @@ BlockInputStreams StorageSystemProcesses::read(
ColumnWithNameAndType col_elapsed{new ColumnFloat64, new DataTypeFloat64, "elapsed"};
ColumnWithNameAndType col_rows_read{new ColumnUInt64, new DataTypeUInt64, "rows_read"};
ColumnWithNameAndType col_bytes_read{new ColumnUInt64, new DataTypeUInt64, "bytes_read"};
ColumnWithNameAndType col_total_rows_approx{new ColumnUInt64, new DataTypeUInt64, "total_rows_approx"};
ColumnWithNameAndType col_memory_usage{new ColumnUInt64, new DataTypeUInt64, "memory_usage"};
ColumnWithNameAndType col_query{new ColumnString, new DataTypeString, "query"};
ColumnWithNameAndType col_query_id{new ColumnString, new DataTypeString, "query_id"};
for (const auto & process : context.getProcessList().get())
{
const size_t rows_read = process.rows_processed;
const size_t bytes_read = process.bytes_processed;
col_user.column->insert(process.user);
col_address.column->insert(process.ip_address.toString());
col_elapsed.column->insert(process.watch.elapsedSeconds());
col_rows_read.column->insert(rows_read);
col_bytes_read.column->insert(bytes_read);
col_rows_read.column->insert(process.progress.rows);
col_bytes_read.column->insert(process.progress.bytes);
col_total_rows_approx.column->insert(process.progress.total_rows);
col_memory_usage.column->insert(static_cast<UInt64>(process.memory_tracker.get()));
col_query.column->insert(process.query);
col_query_id.column->insert(process.query_id);
@ -67,6 +67,7 @@ BlockInputStreams StorageSystemProcesses::read(
col_elapsed,
col_rows_read,
col_bytes_read,
col_total_rows_approx,
col_memory_usage,
col_query,
col_query_id

View File

@ -38,8 +38,8 @@ String TinyLogBlockInputStream::getID() const
std::stringstream res;
res << "TinyLog(" << storage.getTableName() << ", " << &storage;
for (size_t i = 0; i < column_names.size(); ++i)
res << ", " << column_names[i];
for (const auto & name : column_names)
res << ", " << name;
res << ")";
return res.str();

View File

@ -1 +1 @@
SELECT quantilesTiming(0.1, 0.5, 0.9)(materialize(dummy)) FROM remote('127.0.0.{1,2}', system, one) GROUP BY 1 WITH TOTALS
SELECT quantilesTiming(0.1, 0.5, 0.9)(dummy) FROM remote('127.0.0.{1,2}', system, one) GROUP BY 1 WITH TOTALS

View File

@ -0,0 +1,18 @@
1
1
1
1
1 1
1 1
1
1
1
1
1
1
1
1
1
1
1
1

View File

@ -0,0 +1,19 @@
select IPv4StringToNum('') == 0;
select IPv4StringToNum(materialize('')) == 0;
select IPv4StringToNum('not an ip string') == 0;
select IPv4StringToNum(materialize('not an ip string')) == 0;
select IPv4StringToNum('127.0.0.1' as p) == (0x7f000001 as n), IPv4NumToString(n) == p;
select IPv4StringToNum(materialize('127.0.0.1') as p) == (materialize(0x7f000001) as n), IPv4NumToString(n) == p;
select IPv4NumToString(toUInt32(0)) == '0.0.0.0';
select IPv4NumToString(materialize(toUInt32(0))) == materialize('0.0.0.0');
select IPv6NumToString(toFixedString('', 16)) == '::';
select IPv6NumToString(toFixedString(materialize(''), 16)) == materialize('::');
select IPv6NumToString(IPv6StringToNum('::ffff:127.0.0.1' as p) as n) == p;
select IPv6NumToString(IPv6StringToNum(materialize('::ffff:127.0.0.1') as p) as n) == p;
select IPv6NumToString(toFixedString(unhex('20010DB800000003000001FF0000002E'), 16)) == '2001:db8:0:3:0:1ff:0:2e';
select IPv6NumToString(toFixedString(unhex(materialize('20010DB800000003000001FF0000002E')), 16)) == materialize('2001:db8:0:3:0:1ff:0:2e');
select IPv6StringToNum('') == toFixedString(materialize(''), 16);
select IPv6StringToNum(materialize('')) == toFixedString(materialize(''), 16);
select IPv6StringToNum('not an ip string') == toFixedString(materialize(''), 16);
select IPv6StringToNum(materialize('not an ip string')) == toFixedString(materialize(''), 16);

View File

@ -4,6 +4,7 @@
#include <Yandex/DateLUT.h>
#include <mysqlxx/Date.h>
#include <iomanip>
namespace mysqlxx
@ -177,12 +178,15 @@ public:
inline std::ostream & operator<< (std::ostream & ostr, const DateTime & datetime)
{
return ostr << datetime.year()
<< '-' << (datetime.month() / 10) << (datetime.month() % 10)
ostr << std::setfill('0') << std::setw(4) << datetime.year();
ostr << '-' << (datetime.month() / 10) << (datetime.month() % 10)
<< '-' << (datetime.day() / 10) << (datetime.day() % 10)
<< ' ' << (datetime.hour() / 10) << (datetime.hour() % 10)
<< ':' << (datetime.minute() / 10) << (datetime.minute() % 10)
<< ':' << (datetime.second() / 10) << (datetime.second() % 10);
return ostr;
}
}

View File

@ -23,6 +23,11 @@ public:
const char * className() const throw() { return "zkutil::KeeperException"; }
KeeperException * clone() const { return new KeeperException(*this); }
/// при этих ошибках надо переинициализировать сессию с zookeeper
bool isUnrecoverable() const
{
return code == ZINVALIDSTATE || code == ZSESSIONEXPIRED;
}
int32_t code;
private:
@ -30,6 +35,7 @@ private:
{
ProfileEvents::increment(ProfileEvents::ZooKeeperExceptions);
}
};
};

View File

@ -246,7 +246,7 @@ int32_t ZooKeeper::tryCreate(const std::string & path, const std::string & data,
int32_t ZooKeeper::tryCreateWithRetries(const std::string & path, const std::string & data, int32_t mode, std::string & pathCreated, size_t* attempt)
{
return retry([&path, &data, mode, &pathCreated, this] { return tryCreate(path, data, mode, pathCreated); });
return retry([&path, &data, mode, &pathCreated, this] { return tryCreate(path, data, mode, pathCreated); }, attempt);
}