2017-04-28 18:33:31 +00:00
|
|
|
#pragma once
|
|
|
|
#include <Columns/ColumnVector.h>
|
|
|
|
#include <Columns/ColumnString.h>
|
|
|
|
#include <Columns/IColumn.h>
|
|
|
|
#include <DataStreams/IProfilingBlockInputStream.h>
|
|
|
|
#include <DataTypes/DataTypesNumber.h>
|
2017-05-04 18:14:23 +00:00
|
|
|
#include <DataTypes/DataTypeDate.h>
|
2017-04-28 18:33:31 +00:00
|
|
|
#include <Dictionaries/DictionaryBlockInputStreamBase.h>
|
|
|
|
#include <Dictionaries/DictionaryStructure.h>
|
|
|
|
#include <Dictionaries/IDictionary.h>
|
2017-12-25 19:00:48 +00:00
|
|
|
#include <Dictionaries/RangeHashedDictionary.h>
|
2017-06-08 13:35:35 +00:00
|
|
|
#include <ext/range.h>
|
2017-04-28 18:33:31 +00:00
|
|
|
|
2017-05-26 16:08:56 +00:00
|
|
|
namespace DB
|
2017-04-28 18:33:31 +00:00
|
|
|
{
|
|
|
|
|
2017-05-26 16:08:56 +00:00
|
|
|
/*
|
|
|
|
* BlockInputStream implementation for external dictionaries
|
2017-04-28 18:33:31 +00:00
|
|
|
* read() returns single block consisting of the in-memory contents of the dictionaries
|
|
|
|
*/
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename DictionaryType, typename Key>
|
2017-04-28 18:33:31 +00:00
|
|
|
class RangeDictionaryBlockInputStream : public DictionaryBlockInputStreamBase
|
|
|
|
{
|
|
|
|
public:
|
2018-08-24 05:21:53 +00:00
|
|
|
using DictionaryPtr = std::shared_ptr<DictionaryType const>;
|
2017-05-26 16:08:56 +00:00
|
|
|
|
2017-05-04 18:14:23 +00:00
|
|
|
RangeDictionaryBlockInputStream(
|
2018-09-13 13:33:44 +00:00
|
|
|
DictionaryPtr dictionary, size_t max_block_size, const Names & column_names, PaddedPODArray<Key> && ids_to_fill,
|
|
|
|
PaddedPODArray<Int64> && start_dates, PaddedPODArray<Int64> && end_dates);
|
2017-04-28 18:33:31 +00:00
|
|
|
|
2018-01-10 00:04:08 +00:00
|
|
|
String getName() const override
|
|
|
|
{
|
2018-02-21 20:23:27 +00:00
|
|
|
return "RangeDictionary";
|
2017-05-29 17:26:45 +00:00
|
|
|
}
|
2017-04-28 18:33:31 +00:00
|
|
|
|
2017-05-26 16:08:56 +00:00
|
|
|
protected:
|
|
|
|
Block getBlock(size_t start, size_t length) const override;
|
|
|
|
|
2017-04-28 18:33:31 +00:00
|
|
|
private:
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename Type>
|
2017-04-28 18:33:31 +00:00
|
|
|
using DictionaryGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &,
|
2018-09-13 13:33:44 +00:00
|
|
|
const PaddedPODArray<Int64> &, PaddedPODArray<Type> &) const;
|
2017-04-28 18:33:31 +00:00
|
|
|
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename AttributeType>
|
2017-04-28 18:33:31 +00:00
|
|
|
ColumnPtr getColumnFromAttribute(DictionaryGetter<AttributeType> getter,
|
2018-09-13 13:33:44 +00:00
|
|
|
const PaddedPODArray<Key> & ids_to_fill, const PaddedPODArray<Int64> & dates,
|
2018-08-27 17:18:14 +00:00
|
|
|
const DictionaryAttribute & attribute, const DictionaryType & concrete_dictionary) const;
|
2018-09-13 13:33:44 +00:00
|
|
|
ColumnPtr getColumnFromAttributeString(const PaddedPODArray<Key> & ids_to_fill, const PaddedPODArray<Int64> & dates,
|
2018-08-27 17:18:14 +00:00
|
|
|
const DictionaryAttribute & attribute, const DictionaryType & concrete_dictionary) const;
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename T>
|
2018-01-10 00:04:08 +00:00
|
|
|
ColumnPtr getColumnFromPODArray(const PaddedPODArray<T> & array) const;
|
2017-05-04 18:14:23 +00:00
|
|
|
|
2018-09-13 13:33:44 +00:00
|
|
|
template <typename DictionarySpecialAttributeType, typename T>
|
2017-05-04 18:14:23 +00:00
|
|
|
void addSpecialColumn(
|
2018-09-13 13:33:44 +00:00
|
|
|
const std::optional<DictionarySpecialAttributeType> & attribute, DataTypePtr type,
|
2018-08-27 17:42:13 +00:00
|
|
|
const std::string & default_name, const std::unordered_set<std::string> & column_names_set,
|
2018-08-24 05:25:00 +00:00
|
|
|
const PaddedPODArray<T> & values, ColumnsWithTypeAndName & columns) const;
|
2017-05-26 16:08:56 +00:00
|
|
|
|
2018-08-24 05:45:03 +00:00
|
|
|
Block fillBlock(const PaddedPODArray<Key> & ids_to_fill,
|
2018-09-13 13:33:44 +00:00
|
|
|
const PaddedPODArray<Int64> & block_start_dates, const PaddedPODArray<Int64> & block_end_dates) const;
|
2017-05-26 16:08:56 +00:00
|
|
|
|
2018-09-13 13:33:44 +00:00
|
|
|
PaddedPODArray<Int64> makeDateKey(
|
|
|
|
const PaddedPODArray<Int64> & block_start_dates, const PaddedPODArray<Int64> & block_end_dates) const;
|
2017-12-25 19:00:48 +00:00
|
|
|
|
2018-08-24 05:21:53 +00:00
|
|
|
DictionaryPtr dictionary;
|
2017-05-26 16:08:56 +00:00
|
|
|
Names column_names;
|
|
|
|
PaddedPODArray<Key> ids;
|
2018-09-13 13:33:44 +00:00
|
|
|
PaddedPODArray<Int64> start_dates;
|
|
|
|
PaddedPODArray<Int64> end_dates;
|
2017-04-28 18:33:31 +00:00
|
|
|
};
|
|
|
|
|
2017-05-26 16:08:56 +00:00
|
|
|
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename DictionaryType, typename Key>
|
2017-05-26 16:08:56 +00:00
|
|
|
RangeDictionaryBlockInputStream<DictionaryType, Key>::RangeDictionaryBlockInputStream(
|
2018-08-24 05:21:53 +00:00
|
|
|
DictionaryPtr dictionary, size_t max_column_size, const Names & column_names, PaddedPODArray<Key> && ids,
|
2018-09-13 13:33:44 +00:00
|
|
|
PaddedPODArray<Int64> && block_start_dates, PaddedPODArray<Int64> && block_end_dates)
|
2017-05-26 16:08:56 +00:00
|
|
|
: DictionaryBlockInputStreamBase(ids.size(), max_column_size),
|
2017-05-29 17:26:45 +00:00
|
|
|
dictionary(dictionary), column_names(column_names),
|
2018-09-13 13:33:44 +00:00
|
|
|
ids(std::move(ids)), start_dates(std::move(block_start_dates)), end_dates(std::move(block_end_dates))
|
2017-05-26 16:08:56 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename DictionaryType, typename Key>
|
2017-05-26 16:08:56 +00:00
|
|
|
Block RangeDictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, size_t length) const
|
|
|
|
{
|
|
|
|
PaddedPODArray<Key> block_ids;
|
2018-09-13 13:33:44 +00:00
|
|
|
PaddedPODArray<Int64> block_start_dates;
|
|
|
|
PaddedPODArray<Int64> block_end_dates;
|
2017-05-26 16:08:56 +00:00
|
|
|
block_ids.reserve(length);
|
|
|
|
block_start_dates.reserve(length);
|
|
|
|
block_end_dates.reserve(length);
|
|
|
|
|
|
|
|
for (auto idx : ext::range(start, start + length))
|
|
|
|
{
|
|
|
|
block_ids.push_back(ids[idx]);
|
2017-12-25 19:00:48 +00:00
|
|
|
block_start_dates.push_back(start_dates[idx]);
|
|
|
|
block_end_dates.push_back(end_dates[idx]);
|
2017-05-26 16:08:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return fillBlock(block_ids, block_start_dates, block_end_dates);
|
|
|
|
}
|
|
|
|
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename DictionaryType, typename Key>
|
|
|
|
template <typename AttributeType>
|
2017-05-26 16:08:56 +00:00
|
|
|
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, Key>::getColumnFromAttribute(
|
2018-08-24 05:45:03 +00:00
|
|
|
DictionaryGetter<AttributeType> getter, const PaddedPODArray<Key> & ids_to_fill,
|
2018-09-13 13:33:44 +00:00
|
|
|
const PaddedPODArray<Int64> & dates, const DictionaryAttribute & attribute, const DictionaryType & concrete_dictionary) const
|
2017-05-26 16:08:56 +00:00
|
|
|
{
|
2018-08-24 05:45:03 +00:00
|
|
|
auto column_vector = ColumnVector<AttributeType>::create(ids_to_fill.size());
|
2018-08-27 17:18:14 +00:00
|
|
|
(concrete_dictionary.*getter)(attribute.name, ids_to_fill, dates, column_vector->getData());
|
Get rid of useless std::move to get NRVO
http://eel.is/c++draft/class.copy.elision#:constructor,copy,elision
Some quote:
> Speaking of RVO, return std::move(w); prohibits it. It means "use move constructor or fail to compile", whereas return w; means "use RVO, and if you can't, use move constructor, and if you can't, use copy constructor, and if you can't, fail to compile."
There is one exception to this rule:
```cpp
Block FilterBlockInputStream::removeFilterIfNeed(Block && block)
{
if (block && remove_filter)
block.erase(static_cast<size_t>(filter_column));
return std::move(block);
}
```
because references are not eligible for NRVO, which is another rule "always move rvalue references and forward universal references" that takes precedence.
2018-08-27 14:04:22 +00:00
|
|
|
return column_vector;
|
2017-05-26 16:08:56 +00:00
|
|
|
}
|
|
|
|
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename DictionaryType, typename Key>
|
2017-05-26 16:08:56 +00:00
|
|
|
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, Key>::getColumnFromAttributeString(
|
2018-09-13 13:33:44 +00:00
|
|
|
const PaddedPODArray<Key> & ids_to_fill, const PaddedPODArray<Int64> & dates,
|
2018-08-27 17:18:14 +00:00
|
|
|
const DictionaryAttribute & attribute, const DictionaryType & concrete_dictionary) const
|
2017-05-26 16:08:56 +00:00
|
|
|
{
|
2017-12-15 03:19:14 +00:00
|
|
|
auto column_string = ColumnString::create();
|
2018-08-27 17:18:14 +00:00
|
|
|
concrete_dictionary.getString(attribute.name, ids_to_fill, dates, column_string.get());
|
Get rid of useless std::move to get NRVO
http://eel.is/c++draft/class.copy.elision#:constructor,copy,elision
Some quote:
> Speaking of RVO, return std::move(w); prohibits it. It means "use move constructor or fail to compile", whereas return w; means "use RVO, and if you can't, use move constructor, and if you can't, use copy constructor, and if you can't, fail to compile."
There is one exception to this rule:
```cpp
Block FilterBlockInputStream::removeFilterIfNeed(Block && block)
{
if (block && remove_filter)
block.erase(static_cast<size_t>(filter_column));
return std::move(block);
}
```
because references are not eligible for NRVO, which is another rule "always move rvalue references and forward universal references" that takes precedence.
2018-08-27 14:04:22 +00:00
|
|
|
return column_string;
|
2017-05-26 16:08:56 +00:00
|
|
|
}
|
|
|
|
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename DictionaryType, typename Key>
|
|
|
|
template <typename T>
|
2018-01-10 00:04:08 +00:00
|
|
|
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, Key>::getColumnFromPODArray(const PaddedPODArray<T> & array) const
|
2017-05-26 16:08:56 +00:00
|
|
|
{
|
2017-12-15 03:19:14 +00:00
|
|
|
auto column_vector = ColumnVector<T>::create();
|
2017-05-26 16:08:56 +00:00
|
|
|
column_vector->getData().reserve(array.size());
|
|
|
|
for (T value : array)
|
|
|
|
column_vector->insert(value);
|
Get rid of useless std::move to get NRVO
http://eel.is/c++draft/class.copy.elision#:constructor,copy,elision
Some quote:
> Speaking of RVO, return std::move(w); prohibits it. It means "use move constructor or fail to compile", whereas return w; means "use RVO, and if you can't, use move constructor, and if you can't, use copy constructor, and if you can't, fail to compile."
There is one exception to this rule:
```cpp
Block FilterBlockInputStream::removeFilterIfNeed(Block && block)
{
if (block && remove_filter)
block.erase(static_cast<size_t>(filter_column));
return std::move(block);
}
```
because references are not eligible for NRVO, which is another rule "always move rvalue references and forward universal references" that takes precedence.
2018-08-27 14:04:22 +00:00
|
|
|
return column_vector;
|
2017-05-26 16:08:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename DictionaryType, typename Key>
|
2018-09-13 13:33:44 +00:00
|
|
|
template <typename DictionarySpecialAttributeType, typename T>
|
2017-05-04 18:14:23 +00:00
|
|
|
void RangeDictionaryBlockInputStream<DictionaryType, Key>::addSpecialColumn(
|
2018-09-13 13:33:44 +00:00
|
|
|
const std::optional<DictionarySpecialAttributeType> & attribute, DataTypePtr type,
|
2018-08-27 17:42:13 +00:00
|
|
|
const std::string & default_name, const std::unordered_set<std::string> & column_names_set,
|
2017-05-26 16:08:56 +00:00
|
|
|
const PaddedPODArray<T> & values, ColumnsWithTypeAndName & columns) const
|
2017-05-04 18:14:23 +00:00
|
|
|
{
|
|
|
|
std::string name = default_name;
|
2018-01-10 00:04:08 +00:00
|
|
|
if (attribute)
|
2017-05-26 16:08:56 +00:00
|
|
|
name = attribute->name;
|
2018-01-10 00:04:08 +00:00
|
|
|
|
2018-08-27 17:42:13 +00:00
|
|
|
if (column_names_set.find(name) != column_names_set.end())
|
2017-05-04 18:14:23 +00:00
|
|
|
columns.emplace_back(getColumnFromPODArray(values), type, name);
|
|
|
|
}
|
|
|
|
|
2017-12-25 19:00:48 +00:00
|
|
|
template <typename DictionaryType, typename Key>
|
2018-09-13 13:33:44 +00:00
|
|
|
PaddedPODArray<Int64> RangeDictionaryBlockInputStream<DictionaryType, Key>::makeDateKey(
|
|
|
|
const PaddedPODArray<Int64> & block_start_dates, const PaddedPODArray<Int64> & block_end_dates) const
|
2017-12-25 19:00:48 +00:00
|
|
|
{
|
2018-09-13 13:33:44 +00:00
|
|
|
PaddedPODArray<Int64> key(block_start_dates.size());
|
2017-12-25 19:00:48 +00:00
|
|
|
for (size_t i = 0; i < key.size(); ++i)
|
|
|
|
{
|
2018-08-27 17:42:13 +00:00
|
|
|
if (RangeHashedDictionary::Range::isCorrectDate(block_start_dates[i]))
|
|
|
|
key[i] = block_start_dates[i];
|
2017-12-25 19:00:48 +00:00
|
|
|
else
|
2018-08-27 17:42:13 +00:00
|
|
|
key[i] = block_end_dates[i];
|
2017-12-25 19:00:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return key;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename DictionaryType, typename Key>
|
2017-05-26 16:08:56 +00:00
|
|
|
Block RangeDictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
|
2018-08-24 05:45:03 +00:00
|
|
|
const PaddedPODArray<Key> & ids_to_fill,
|
2018-09-13 13:33:44 +00:00
|
|
|
const PaddedPODArray<Int64> & block_start_dates, const PaddedPODArray<Int64> & block_end_dates) const
|
2017-04-28 18:33:31 +00:00
|
|
|
{
|
|
|
|
ColumnsWithTypeAndName columns;
|
2018-08-24 05:25:00 +00:00
|
|
|
const DictionaryStructure & structure = dictionary->getStructure();
|
2017-04-28 18:33:31 +00:00
|
|
|
|
2017-05-04 18:14:23 +00:00
|
|
|
std::unordered_set<std::string> names(column_names.begin(), column_names.end());
|
2017-04-28 18:33:31 +00:00
|
|
|
|
2018-08-24 05:45:03 +00:00
|
|
|
addSpecialColumn(structure.id, std::make_shared<DataTypeUInt64>(), "ID", names, ids_to_fill, columns);
|
2018-09-13 13:33:44 +00:00
|
|
|
addSpecialColumn(structure.range_min, structure.range_max->type, "Range Start", names, block_start_dates, columns);
|
|
|
|
addSpecialColumn(structure.range_max, structure.range_max->type, "Range End", names, block_end_dates, columns);
|
2017-04-28 18:33:31 +00:00
|
|
|
|
2018-08-27 17:42:13 +00:00
|
|
|
auto date_key = makeDateKey(block_start_dates, block_end_dates);
|
2017-12-25 19:00:48 +00:00
|
|
|
|
2017-04-28 18:33:31 +00:00
|
|
|
for (const auto idx : ext::range(0, structure.attributes.size()))
|
|
|
|
{
|
2018-08-24 05:20:18 +00:00
|
|
|
const DictionaryAttribute & attribute = structure.attributes[idx];
|
2017-05-26 16:08:56 +00:00
|
|
|
if (names.find(attribute.name) != names.end())
|
2017-04-28 18:33:31 +00:00
|
|
|
{
|
2017-05-04 18:14:23 +00:00
|
|
|
ColumnPtr column;
|
2017-05-29 17:26:45 +00:00
|
|
|
#define GET_COLUMN_FORM_ATTRIBUTE(TYPE)\
|
2018-08-24 05:45:03 +00:00
|
|
|
column = getColumnFromAttribute<TYPE>(&DictionaryType::get##TYPE, ids_to_fill, date_key, attribute, *dictionary)
|
2017-05-04 18:14:23 +00:00
|
|
|
switch (attribute.underlying_type)
|
|
|
|
{
|
2017-05-29 17:26:45 +00:00
|
|
|
case AttributeUnderlyingType::UInt8:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(UInt8);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::UInt16:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(UInt16);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::UInt32:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(UInt32);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::UInt64:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(UInt64);
|
|
|
|
break;
|
2017-11-14 00:08:54 +00:00
|
|
|
case AttributeUnderlyingType::UInt128:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(UInt128);
|
|
|
|
break;
|
2017-05-29 17:26:45 +00:00
|
|
|
case AttributeUnderlyingType::Int8:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(Int8);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::Int16:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(Int16);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::Int32:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(Int32);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::Int64:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(Int64);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::Float32:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(Float32);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::Float64:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(Float64);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::String:
|
2018-08-24 05:45:03 +00:00
|
|
|
column = getColumnFromAttributeString(ids_to_fill, date_key, attribute, *dictionary);
|
2017-05-29 17:26:45 +00:00
|
|
|
break;
|
2017-05-04 18:14:23 +00:00
|
|
|
}
|
2017-04-28 18:33:31 +00:00
|
|
|
|
2017-05-04 18:14:23 +00:00
|
|
|
columns.emplace_back(column, attribute.type, attribute.name);
|
|
|
|
}
|
2017-04-28 18:33:31 +00:00
|
|
|
}
|
2017-05-26 16:08:56 +00:00
|
|
|
return Block(columns);
|
2017-04-28 18:33:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|