2017-04-28 18:33:31 +00:00
|
|
|
#pragma once
|
|
|
|
#include <Columns/ColumnString.h>
|
2018-12-10 15:25:45 +00:00
|
|
|
#include <Columns/ColumnVector.h>
|
2017-04-28 18:33:31 +00:00
|
|
|
#include <Columns/IColumn.h>
|
2019-01-23 14:48:50 +00:00
|
|
|
#include <DataStreams/IBlockInputStream.h>
|
2017-05-04 18:14:23 +00:00
|
|
|
#include <DataTypes/DataTypeDate.h>
|
2018-12-10 15:25:45 +00:00
|
|
|
#include <DataTypes/DataTypesNumber.h>
|
|
|
|
#include <ext/range.h>
|
2018-11-28 11:37:12 +00:00
|
|
|
#include "DictionaryBlockInputStreamBase.h"
|
|
|
|
#include "DictionaryStructure.h"
|
|
|
|
#include "IDictionary.h"
|
|
|
|
#include "RangeHashedDictionary.h"
|
2017-04-28 18:33:31 +00:00
|
|
|
|
2017-05-26 16:08:56 +00:00
|
|
|
namespace DB
|
2017-04-28 18:33:31 +00:00
|
|
|
{
|
2017-05-26 16:08:56 +00:00
|
|
|
/*
|
|
|
|
* BlockInputStream implementation for external dictionaries
|
2017-04-28 18:33:31 +00:00
|
|
|
* read() returns single block consisting of the in-memory contents of the dictionaries
|
|
|
|
*/
|
2018-09-17 15:04:57 +00:00
|
|
|
template <typename DictionaryType, typename RangeType, typename Key>
|
2017-04-28 18:33:31 +00:00
|
|
|
class RangeDictionaryBlockInputStream : public DictionaryBlockInputStreamBase
|
|
|
|
{
|
|
|
|
public:
|
2018-08-24 05:21:53 +00:00
|
|
|
using DictionaryPtr = std::shared_ptr<DictionaryType const>;
|
2017-05-26 16:08:56 +00:00
|
|
|
|
2017-05-04 18:14:23 +00:00
|
|
|
RangeDictionaryBlockInputStream(
|
2018-12-10 15:25:45 +00:00
|
|
|
DictionaryPtr dictionary,
|
2019-02-10 16:55:12 +00:00
|
|
|
UInt64 max_block_size,
|
2018-12-10 15:25:45 +00:00
|
|
|
const Names & column_names,
|
|
|
|
PaddedPODArray<Key> && ids_to_fill,
|
|
|
|
PaddedPODArray<RangeType> && start_dates,
|
|
|
|
PaddedPODArray<RangeType> && end_dates);
|
2017-04-28 18:33:31 +00:00
|
|
|
|
2018-12-10 15:25:45 +00:00
|
|
|
String getName() const override { return "RangeDictionary"; }
|
2017-04-28 18:33:31 +00:00
|
|
|
|
2017-05-26 16:08:56 +00:00
|
|
|
protected:
|
|
|
|
Block getBlock(size_t start, size_t length) const override;
|
|
|
|
|
2017-04-28 18:33:31 +00:00
|
|
|
private:
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename Type>
|
2018-12-10 15:25:45 +00:00
|
|
|
using DictionaryGetter = void (DictionaryType::*)(
|
|
|
|
const std::string &, const PaddedPODArray<Key> &, const PaddedPODArray<Int64> &, PaddedPODArray<Type> &) const;
|
2017-04-28 18:33:31 +00:00
|
|
|
|
2018-10-08 19:45:17 +00:00
|
|
|
template <typename Type>
|
2018-12-10 15:25:45 +00:00
|
|
|
using DictionaryDecimalGetter = void (DictionaryType::*)(
|
|
|
|
const std::string &, const PaddedPODArray<Key> &, const PaddedPODArray<Int64> &, DecimalPaddedPODArray<Type> &) const;
|
2018-10-08 19:45:17 +00:00
|
|
|
|
|
|
|
template <typename AttributeType, typename Getter>
|
2018-12-10 15:25:45 +00:00
|
|
|
ColumnPtr getColumnFromAttribute(
|
|
|
|
Getter getter,
|
|
|
|
const PaddedPODArray<Key> & ids_to_fill,
|
|
|
|
const PaddedPODArray<Int64> & dates,
|
|
|
|
const DictionaryAttribute & attribute,
|
|
|
|
const DictionaryType & concrete_dictionary) const;
|
|
|
|
ColumnPtr getColumnFromAttributeString(
|
|
|
|
const PaddedPODArray<Key> & ids_to_fill,
|
|
|
|
const PaddedPODArray<Int64> & dates,
|
|
|
|
const DictionaryAttribute & attribute,
|
|
|
|
const DictionaryType & concrete_dictionary) const;
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename T>
|
2018-01-10 00:04:08 +00:00
|
|
|
ColumnPtr getColumnFromPODArray(const PaddedPODArray<T> & array) const;
|
2017-05-04 18:14:23 +00:00
|
|
|
|
2018-09-13 13:33:44 +00:00
|
|
|
template <typename DictionarySpecialAttributeType, typename T>
|
2017-05-04 18:14:23 +00:00
|
|
|
void addSpecialColumn(
|
2018-12-10 15:25:45 +00:00
|
|
|
const std::optional<DictionarySpecialAttributeType> & attribute,
|
|
|
|
DataTypePtr type,
|
|
|
|
const std::string & default_name,
|
|
|
|
const std::unordered_set<std::string> & column_names_set,
|
|
|
|
const PaddedPODArray<T> & values,
|
|
|
|
ColumnsWithTypeAndName & columns) const;
|
2017-05-26 16:08:56 +00:00
|
|
|
|
2018-12-10 15:25:45 +00:00
|
|
|
Block fillBlock(
|
|
|
|
const PaddedPODArray<Key> & ids_to_fill,
|
|
|
|
const PaddedPODArray<RangeType> & block_start_dates,
|
|
|
|
const PaddedPODArray<RangeType> & block_end_dates) const;
|
2017-05-26 16:08:56 +00:00
|
|
|
|
2018-12-10 15:25:45 +00:00
|
|
|
PaddedPODArray<Int64>
|
|
|
|
makeDateKey(const PaddedPODArray<RangeType> & block_start_dates, const PaddedPODArray<RangeType> & block_end_dates) const;
|
2017-12-25 19:00:48 +00:00
|
|
|
|
2018-08-24 05:21:53 +00:00
|
|
|
DictionaryPtr dictionary;
|
2017-05-26 16:08:56 +00:00
|
|
|
Names column_names;
|
|
|
|
PaddedPODArray<Key> ids;
|
2018-09-17 15:04:57 +00:00
|
|
|
PaddedPODArray<RangeType> start_dates;
|
|
|
|
PaddedPODArray<RangeType> end_dates;
|
2017-04-28 18:33:31 +00:00
|
|
|
};
|
|
|
|
|
2017-05-26 16:08:56 +00:00
|
|
|
|
2018-09-17 15:04:57 +00:00
|
|
|
template <typename DictionaryType, typename RangeType, typename Key>
|
|
|
|
RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::RangeDictionaryBlockInputStream(
|
2018-12-10 15:25:45 +00:00
|
|
|
DictionaryPtr dictionary,
|
|
|
|
size_t max_column_size,
|
|
|
|
const Names & column_names,
|
|
|
|
PaddedPODArray<Key> && ids,
|
|
|
|
PaddedPODArray<RangeType> && block_start_dates,
|
|
|
|
PaddedPODArray<RangeType> && block_end_dates)
|
|
|
|
: DictionaryBlockInputStreamBase(ids.size(), max_column_size)
|
|
|
|
, dictionary(dictionary)
|
|
|
|
, column_names(column_names)
|
|
|
|
, ids(std::move(ids))
|
|
|
|
, start_dates(std::move(block_start_dates))
|
|
|
|
, end_dates(std::move(block_end_dates))
|
2017-05-26 16:08:56 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2018-09-17 15:04:57 +00:00
|
|
|
template <typename DictionaryType, typename RangeType, typename Key>
|
|
|
|
Block RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getBlock(size_t start, size_t length) const
|
2017-05-26 16:08:56 +00:00
|
|
|
{
|
|
|
|
PaddedPODArray<Key> block_ids;
|
2018-09-17 15:04:57 +00:00
|
|
|
PaddedPODArray<RangeType> block_start_dates;
|
|
|
|
PaddedPODArray<RangeType> block_end_dates;
|
2017-05-26 16:08:56 +00:00
|
|
|
block_ids.reserve(length);
|
|
|
|
block_start_dates.reserve(length);
|
|
|
|
block_end_dates.reserve(length);
|
|
|
|
|
|
|
|
for (auto idx : ext::range(start, start + length))
|
|
|
|
{
|
|
|
|
block_ids.push_back(ids[idx]);
|
2017-12-25 19:00:48 +00:00
|
|
|
block_start_dates.push_back(start_dates[idx]);
|
|
|
|
block_end_dates.push_back(end_dates[idx]);
|
2017-05-26 16:08:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return fillBlock(block_ids, block_start_dates, block_end_dates);
|
|
|
|
}
|
|
|
|
|
2018-09-17 15:04:57 +00:00
|
|
|
template <typename DictionaryType, typename RangeType, typename Key>
|
2018-10-08 19:45:17 +00:00
|
|
|
template <typename AttributeType, typename Getter>
|
2018-09-17 15:04:57 +00:00
|
|
|
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromAttribute(
|
2018-12-10 15:25:45 +00:00
|
|
|
Getter getter,
|
|
|
|
const PaddedPODArray<Key> & ids_to_fill,
|
|
|
|
const PaddedPODArray<Int64> & dates,
|
|
|
|
const DictionaryAttribute & attribute,
|
|
|
|
const DictionaryType & concrete_dictionary) const
|
2017-05-26 16:08:56 +00:00
|
|
|
{
|
2018-10-08 19:45:17 +00:00
|
|
|
if constexpr (IsDecimalNumber<AttributeType>)
|
|
|
|
{
|
|
|
|
auto column = ColumnDecimal<AttributeType>::create(ids_to_fill.size(), 0); /// NOTE: There's wrong scale here, but it's unused.
|
|
|
|
(concrete_dictionary.*getter)(attribute.name, ids_to_fill, dates, column->getData());
|
|
|
|
return column;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
auto column_vector = ColumnVector<AttributeType>::create(ids_to_fill.size());
|
|
|
|
(concrete_dictionary.*getter)(attribute.name, ids_to_fill, dates, column_vector->getData());
|
|
|
|
return column_vector;
|
|
|
|
}
|
2017-05-26 16:08:56 +00:00
|
|
|
}
|
|
|
|
|
2018-09-17 15:04:57 +00:00
|
|
|
template <typename DictionaryType, typename RangeType, typename Key>
|
|
|
|
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromAttributeString(
|
2018-12-10 15:25:45 +00:00
|
|
|
const PaddedPODArray<Key> & ids_to_fill,
|
|
|
|
const PaddedPODArray<Int64> & dates,
|
|
|
|
const DictionaryAttribute & attribute,
|
|
|
|
const DictionaryType & concrete_dictionary) const
|
2017-05-26 16:08:56 +00:00
|
|
|
{
|
2017-12-15 03:19:14 +00:00
|
|
|
auto column_string = ColumnString::create();
|
2018-08-27 17:18:14 +00:00
|
|
|
concrete_dictionary.getString(attribute.name, ids_to_fill, dates, column_string.get());
|
Get rid of useless std::move to get NRVO
http://eel.is/c++draft/class.copy.elision#:constructor,copy,elision
Some quote:
> Speaking of RVO, return std::move(w); prohibits it. It means "use move constructor or fail to compile", whereas return w; means "use RVO, and if you can't, use move constructor, and if you can't, use copy constructor, and if you can't, fail to compile."
There is one exception to this rule:
```cpp
Block FilterBlockInputStream::removeFilterIfNeed(Block && block)
{
if (block && remove_filter)
block.erase(static_cast<size_t>(filter_column));
return std::move(block);
}
```
because references are not eligible for NRVO, which is another rule "always move rvalue references and forward universal references" that takes precedence.
2018-08-27 14:04:22 +00:00
|
|
|
return column_string;
|
2017-05-26 16:08:56 +00:00
|
|
|
}
|
|
|
|
|
2018-09-17 15:04:57 +00:00
|
|
|
template <typename DictionaryType, typename RangeType, typename Key>
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename T>
|
2018-09-17 15:04:57 +00:00
|
|
|
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromPODArray(const PaddedPODArray<T> & array) const
|
2017-05-26 16:08:56 +00:00
|
|
|
{
|
2017-12-15 03:19:14 +00:00
|
|
|
auto column_vector = ColumnVector<T>::create();
|
2017-05-26 16:08:56 +00:00
|
|
|
column_vector->getData().reserve(array.size());
|
|
|
|
for (T value : array)
|
2018-10-22 08:54:54 +00:00
|
|
|
column_vector->insertValue(value);
|
Get rid of useless std::move to get NRVO
http://eel.is/c++draft/class.copy.elision#:constructor,copy,elision
Some quote:
> Speaking of RVO, return std::move(w); prohibits it. It means "use move constructor or fail to compile", whereas return w; means "use RVO, and if you can't, use move constructor, and if you can't, use copy constructor, and if you can't, fail to compile."
There is one exception to this rule:
```cpp
Block FilterBlockInputStream::removeFilterIfNeed(Block && block)
{
if (block && remove_filter)
block.erase(static_cast<size_t>(filter_column));
return std::move(block);
}
```
because references are not eligible for NRVO, which is another rule "always move rvalue references and forward universal references" that takes precedence.
2018-08-27 14:04:22 +00:00
|
|
|
return column_vector;
|
2017-05-26 16:08:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2018-09-17 15:04:57 +00:00
|
|
|
template <typename DictionaryType, typename RangeType, typename Key>
|
2018-09-13 13:33:44 +00:00
|
|
|
template <typename DictionarySpecialAttributeType, typename T>
|
2018-09-17 15:04:57 +00:00
|
|
|
void RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::addSpecialColumn(
|
2018-12-10 15:25:45 +00:00
|
|
|
const std::optional<DictionarySpecialAttributeType> & attribute,
|
|
|
|
DataTypePtr type,
|
|
|
|
const std::string & default_name,
|
|
|
|
const std::unordered_set<std::string> & column_names_set,
|
|
|
|
const PaddedPODArray<T> & values,
|
|
|
|
ColumnsWithTypeAndName & columns) const
|
2017-05-04 18:14:23 +00:00
|
|
|
{
|
|
|
|
std::string name = default_name;
|
2018-01-10 00:04:08 +00:00
|
|
|
if (attribute)
|
2017-05-26 16:08:56 +00:00
|
|
|
name = attribute->name;
|
2018-01-10 00:04:08 +00:00
|
|
|
|
2018-08-27 17:42:13 +00:00
|
|
|
if (column_names_set.find(name) != column_names_set.end())
|
2017-05-04 18:14:23 +00:00
|
|
|
columns.emplace_back(getColumnFromPODArray(values), type, name);
|
|
|
|
}
|
|
|
|
|
2018-09-17 15:04:57 +00:00
|
|
|
template <typename DictionaryType, typename RangeType, typename Key>
|
|
|
|
PaddedPODArray<Int64> RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::makeDateKey(
|
2018-12-10 15:25:45 +00:00
|
|
|
const PaddedPODArray<RangeType> & block_start_dates, const PaddedPODArray<RangeType> & block_end_dates) const
|
2017-12-25 19:00:48 +00:00
|
|
|
{
|
2018-09-13 13:33:44 +00:00
|
|
|
PaddedPODArray<Int64> key(block_start_dates.size());
|
2017-12-25 19:00:48 +00:00
|
|
|
for (size_t i = 0; i < key.size(); ++i)
|
|
|
|
{
|
2018-08-27 17:42:13 +00:00
|
|
|
if (RangeHashedDictionary::Range::isCorrectDate(block_start_dates[i]))
|
|
|
|
key[i] = block_start_dates[i];
|
2017-12-25 19:00:48 +00:00
|
|
|
else
|
2018-08-27 17:42:13 +00:00
|
|
|
key[i] = block_end_dates[i];
|
2017-12-25 19:00:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return key;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2018-09-17 15:04:57 +00:00
|
|
|
template <typename DictionaryType, typename RangeType, typename Key>
|
|
|
|
Block RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::fillBlock(
|
2018-08-24 05:45:03 +00:00
|
|
|
const PaddedPODArray<Key> & ids_to_fill,
|
2018-12-10 15:25:45 +00:00
|
|
|
const PaddedPODArray<RangeType> & block_start_dates,
|
|
|
|
const PaddedPODArray<RangeType> & block_end_dates) const
|
2017-04-28 18:33:31 +00:00
|
|
|
{
|
|
|
|
ColumnsWithTypeAndName columns;
|
2018-08-24 05:25:00 +00:00
|
|
|
const DictionaryStructure & structure = dictionary->getStructure();
|
2017-04-28 18:33:31 +00:00
|
|
|
|
2017-05-04 18:14:23 +00:00
|
|
|
std::unordered_set<std::string> names(column_names.begin(), column_names.end());
|
2017-04-28 18:33:31 +00:00
|
|
|
|
2018-08-24 05:45:03 +00:00
|
|
|
addSpecialColumn(structure.id, std::make_shared<DataTypeUInt64>(), "ID", names, ids_to_fill, columns);
|
2018-09-13 13:33:44 +00:00
|
|
|
addSpecialColumn(structure.range_min, structure.range_max->type, "Range Start", names, block_start_dates, columns);
|
|
|
|
addSpecialColumn(structure.range_max, structure.range_max->type, "Range End", names, block_end_dates, columns);
|
2017-04-28 18:33:31 +00:00
|
|
|
|
2018-08-27 17:42:13 +00:00
|
|
|
auto date_key = makeDateKey(block_start_dates, block_end_dates);
|
2017-12-25 19:00:48 +00:00
|
|
|
|
2017-04-28 18:33:31 +00:00
|
|
|
for (const auto idx : ext::range(0, structure.attributes.size()))
|
|
|
|
{
|
2018-08-24 05:20:18 +00:00
|
|
|
const DictionaryAttribute & attribute = structure.attributes[idx];
|
2017-05-26 16:08:56 +00:00
|
|
|
if (names.find(attribute.name) != names.end())
|
2017-04-28 18:33:31 +00:00
|
|
|
{
|
2017-05-04 18:14:23 +00:00
|
|
|
ColumnPtr column;
|
2018-12-10 15:25:45 +00:00
|
|
|
#define GET_COLUMN_FORM_ATTRIBUTE(TYPE) \
|
|
|
|
column = getColumnFromAttribute<TYPE>(&DictionaryType::get##TYPE, ids_to_fill, date_key, attribute, *dictionary)
|
2017-05-04 18:14:23 +00:00
|
|
|
switch (attribute.underlying_type)
|
|
|
|
{
|
2018-12-10 15:25:45 +00:00
|
|
|
case AttributeUnderlyingType::UInt8:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(UInt8);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::UInt16:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(UInt16);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::UInt32:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(UInt32);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::UInt64:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(UInt64);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::UInt128:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(UInt128);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::Int8:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(Int8);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::Int16:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(Int16);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::Int32:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(Int32);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::Int64:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(Int64);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::Float32:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(Float32);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::Float64:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(Float64);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::Decimal32:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(Decimal32);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::Decimal64:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(Decimal64);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::Decimal128:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(Decimal128);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::String:
|
|
|
|
column = getColumnFromAttributeString(ids_to_fill, date_key, attribute, *dictionary);
|
|
|
|
break;
|
2017-05-04 18:14:23 +00:00
|
|
|
}
|
2018-10-08 19:45:17 +00:00
|
|
|
#undef GET_COLUMN_FORM_ATTRIBUTE
|
2017-05-04 18:14:23 +00:00
|
|
|
columns.emplace_back(column, attribute.type, attribute.name);
|
|
|
|
}
|
2017-04-28 18:33:31 +00:00
|
|
|
}
|
2017-05-26 16:08:56 +00:00
|
|
|
return Block(columns);
|
2017-04-28 18:33:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|