2017-04-28 18:33:31 +00:00
|
|
|
#pragma once
|
|
|
|
#include <Columns/ColumnVector.h>
|
|
|
|
#include <Columns/ColumnString.h>
|
|
|
|
#include <Columns/IColumn.h>
|
|
|
|
#include <DataStreams/IProfilingBlockInputStream.h>
|
|
|
|
#include <DataTypes/DataTypesNumber.h>
|
2017-05-04 18:14:23 +00:00
|
|
|
#include <DataTypes/DataTypeDate.h>
|
2017-04-28 18:33:31 +00:00
|
|
|
#include <Dictionaries/DictionaryBlockInputStreamBase.h>
|
|
|
|
#include <Dictionaries/DictionaryStructure.h>
|
|
|
|
#include <Dictionaries/IDictionary.h>
|
2017-12-25 19:00:48 +00:00
|
|
|
#include <Dictionaries/RangeHashedDictionary.h>
|
2017-06-08 13:35:35 +00:00
|
|
|
#include <ext/range.h>
|
2017-04-28 18:33:31 +00:00
|
|
|
|
2017-05-26 16:08:56 +00:00
|
|
|
namespace DB
|
2017-04-28 18:33:31 +00:00
|
|
|
{
|
|
|
|
|
2017-05-26 16:08:56 +00:00
|
|
|
/*
|
|
|
|
* BlockInputStream implementation for external dictionaries
|
2017-04-28 18:33:31 +00:00
|
|
|
* read() returns single block consisting of the in-memory contents of the dictionaries
|
|
|
|
*/
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename DictionaryType, typename Key>
|
2017-04-28 18:33:31 +00:00
|
|
|
class RangeDictionaryBlockInputStream : public DictionaryBlockInputStreamBase
|
|
|
|
{
|
|
|
|
public:
|
2018-08-24 05:21:53 +00:00
|
|
|
using DictionaryPtr = std::shared_ptr<DictionaryType const>;
|
2017-05-26 16:08:56 +00:00
|
|
|
|
2017-05-04 18:14:23 +00:00
|
|
|
RangeDictionaryBlockInputStream(
|
2018-08-24 05:21:53 +00:00
|
|
|
DictionaryPtr dictionary, size_t max_block_size, const Names & column_names, PaddedPODArray<Key> && ids,
|
2017-05-26 16:08:56 +00:00
|
|
|
PaddedPODArray<UInt16> && start_dates, PaddedPODArray<UInt16> && end_dates);
|
2017-04-28 18:33:31 +00:00
|
|
|
|
2018-01-10 00:04:08 +00:00
|
|
|
String getName() const override
|
|
|
|
{
|
2018-02-21 20:23:27 +00:00
|
|
|
return "RangeDictionary";
|
2017-05-29 17:26:45 +00:00
|
|
|
}
|
2017-04-28 18:33:31 +00:00
|
|
|
|
2017-05-26 16:08:56 +00:00
|
|
|
protected:
|
|
|
|
Block getBlock(size_t start, size_t length) const override;
|
|
|
|
|
2017-04-28 18:33:31 +00:00
|
|
|
private:
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename Type>
|
2017-04-28 18:33:31 +00:00
|
|
|
using DictionaryGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &,
|
2017-05-29 17:26:45 +00:00
|
|
|
const PaddedPODArray<UInt16> &, PaddedPODArray<Type> &) const;
|
2017-04-28 18:33:31 +00:00
|
|
|
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename AttributeType>
|
2017-04-28 18:33:31 +00:00
|
|
|
ColumnPtr getColumnFromAttribute(DictionaryGetter<AttributeType> getter,
|
2018-01-10 00:04:08 +00:00
|
|
|
const PaddedPODArray<Key> & ids, const PaddedPODArray<UInt16> & dates,
|
2018-08-24 05:25:00 +00:00
|
|
|
const DictionaryAttribute & attribute, const DictionaryType & dictionary) const;
|
2018-01-10 00:04:08 +00:00
|
|
|
ColumnPtr getColumnFromAttributeString(const PaddedPODArray<Key> & ids, const PaddedPODArray<UInt16> & dates,
|
2018-08-24 05:25:00 +00:00
|
|
|
const DictionaryAttribute & attribute, const DictionaryType & dictionary) const;
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename T>
|
2018-01-10 00:04:08 +00:00
|
|
|
ColumnPtr getColumnFromPODArray(const PaddedPODArray<T> & array) const;
|
2017-05-04 18:14:23 +00:00
|
|
|
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename T>
|
2017-05-04 18:14:23 +00:00
|
|
|
void addSpecialColumn(
|
2018-01-10 00:04:08 +00:00
|
|
|
const std::optional<DictionarySpecialAttribute> & attribute, DataTypePtr type,
|
2017-05-04 18:14:23 +00:00
|
|
|
const std::string & default_name, const std::unordered_set<std::string> & column_names,
|
2018-08-24 05:25:00 +00:00
|
|
|
const PaddedPODArray<T> & values, ColumnsWithTypeAndName & columns) const;
|
2017-05-26 16:08:56 +00:00
|
|
|
|
|
|
|
Block fillBlock(const PaddedPODArray<Key> & ids,
|
|
|
|
const PaddedPODArray<UInt16> & start_dates, const PaddedPODArray<UInt16> & end_dates) const;
|
|
|
|
|
2017-12-25 19:00:48 +00:00
|
|
|
PaddedPODArray<UInt16> makeDateKey(
|
|
|
|
const PaddedPODArray<UInt16> & start_dates, const PaddedPODArray<UInt16> & end_dates) const;
|
|
|
|
|
2018-08-24 05:21:53 +00:00
|
|
|
DictionaryPtr dictionary;
|
2017-05-26 16:08:56 +00:00
|
|
|
Names column_names;
|
|
|
|
PaddedPODArray<Key> ids;
|
|
|
|
PaddedPODArray<UInt16> start_dates;
|
|
|
|
PaddedPODArray<UInt16> end_dates;
|
2017-04-28 18:33:31 +00:00
|
|
|
};
|
|
|
|
|
2017-05-26 16:08:56 +00:00
|
|
|
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename DictionaryType, typename Key>
|
2017-05-26 16:08:56 +00:00
|
|
|
RangeDictionaryBlockInputStream<DictionaryType, Key>::RangeDictionaryBlockInputStream(
|
2018-08-24 05:21:53 +00:00
|
|
|
DictionaryPtr dictionary, size_t max_column_size, const Names & column_names, PaddedPODArray<Key> && ids,
|
2017-05-26 16:08:56 +00:00
|
|
|
PaddedPODArray<UInt16> && start_dates, PaddedPODArray<UInt16> && end_dates)
|
|
|
|
: DictionaryBlockInputStreamBase(ids.size(), max_column_size),
|
2017-05-29 17:26:45 +00:00
|
|
|
dictionary(dictionary), column_names(column_names),
|
|
|
|
ids(std::move(ids)), start_dates(std::move(start_dates)), end_dates(std::move(end_dates))
|
2017-05-26 16:08:56 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename DictionaryType, typename Key>
|
2017-05-26 16:08:56 +00:00
|
|
|
Block RangeDictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, size_t length) const
|
|
|
|
{
|
|
|
|
PaddedPODArray<Key> block_ids;
|
|
|
|
PaddedPODArray<UInt16> block_start_dates;
|
|
|
|
PaddedPODArray<UInt16> block_end_dates;
|
|
|
|
block_ids.reserve(length);
|
|
|
|
block_start_dates.reserve(length);
|
|
|
|
block_end_dates.reserve(length);
|
|
|
|
|
|
|
|
for (auto idx : ext::range(start, start + length))
|
|
|
|
{
|
|
|
|
block_ids.push_back(ids[idx]);
|
2017-12-25 19:00:48 +00:00
|
|
|
block_start_dates.push_back(start_dates[idx]);
|
|
|
|
block_end_dates.push_back(end_dates[idx]);
|
2017-05-26 16:08:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return fillBlock(block_ids, block_start_dates, block_end_dates);
|
|
|
|
}
|
|
|
|
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename DictionaryType, typename Key>
|
|
|
|
template <typename AttributeType>
|
2017-05-26 16:08:56 +00:00
|
|
|
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, Key>::getColumnFromAttribute(
|
2018-01-10 00:04:08 +00:00
|
|
|
DictionaryGetter<AttributeType> getter, const PaddedPODArray<Key> & ids,
|
2018-08-24 05:25:00 +00:00
|
|
|
const PaddedPODArray<UInt16> & dates, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const
|
2017-05-26 16:08:56 +00:00
|
|
|
{
|
2017-12-15 03:19:14 +00:00
|
|
|
auto column_vector = ColumnVector<AttributeType>::create(ids.size());
|
2017-05-26 16:08:56 +00:00
|
|
|
(dictionary.*getter)(attribute.name, ids, dates, column_vector->getData());
|
2017-12-15 03:19:14 +00:00
|
|
|
return std::move(column_vector);
|
2017-05-26 16:08:56 +00:00
|
|
|
}
|
|
|
|
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename DictionaryType, typename Key>
|
2017-05-26 16:08:56 +00:00
|
|
|
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, Key>::getColumnFromAttributeString(
|
2018-01-10 00:04:08 +00:00
|
|
|
const PaddedPODArray<Key> & ids, const PaddedPODArray<UInt16> & dates,
|
2018-08-24 05:25:00 +00:00
|
|
|
const DictionaryAttribute & attribute, const DictionaryType & dictionary) const
|
2017-05-26 16:08:56 +00:00
|
|
|
{
|
2017-12-15 03:19:14 +00:00
|
|
|
auto column_string = ColumnString::create();
|
2017-05-26 16:08:56 +00:00
|
|
|
dictionary.getString(attribute.name, ids, dates, column_string.get());
|
2017-12-15 03:19:14 +00:00
|
|
|
return std::move(column_string);
|
2017-05-26 16:08:56 +00:00
|
|
|
}
|
|
|
|
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename DictionaryType, typename Key>
|
|
|
|
template <typename T>
|
2018-01-10 00:04:08 +00:00
|
|
|
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, Key>::getColumnFromPODArray(const PaddedPODArray<T> & array) const
|
2017-05-26 16:08:56 +00:00
|
|
|
{
|
2017-12-15 03:19:14 +00:00
|
|
|
auto column_vector = ColumnVector<T>::create();
|
2017-05-26 16:08:56 +00:00
|
|
|
column_vector->getData().reserve(array.size());
|
|
|
|
for (T value : array)
|
|
|
|
column_vector->insert(value);
|
2017-12-15 03:19:14 +00:00
|
|
|
return std::move(column_vector);
|
2017-05-26 16:08:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename DictionaryType, typename Key>
|
|
|
|
template <typename T>
|
2017-05-04 18:14:23 +00:00
|
|
|
void RangeDictionaryBlockInputStream<DictionaryType, Key>::addSpecialColumn(
|
2017-11-20 04:15:43 +00:00
|
|
|
const std::optional<DictionarySpecialAttribute> & attribute, DataTypePtr type,
|
2018-08-24 05:25:00 +00:00
|
|
|
const std::string & default_name, const std::unordered_set<std::string> & column_names,
|
2017-05-26 16:08:56 +00:00
|
|
|
const PaddedPODArray<T> & values, ColumnsWithTypeAndName & columns) const
|
2017-05-04 18:14:23 +00:00
|
|
|
{
|
|
|
|
std::string name = default_name;
|
2018-01-10 00:04:08 +00:00
|
|
|
if (attribute)
|
2017-05-26 16:08:56 +00:00
|
|
|
name = attribute->name;
|
2018-01-10 00:04:08 +00:00
|
|
|
|
|
|
|
if (column_names.find(name) != column_names.end())
|
2017-05-04 18:14:23 +00:00
|
|
|
columns.emplace_back(getColumnFromPODArray(values), type, name);
|
|
|
|
}
|
|
|
|
|
2017-12-25 19:00:48 +00:00
|
|
|
template <typename DictionaryType, typename Key>
|
|
|
|
PaddedPODArray<UInt16> RangeDictionaryBlockInputStream<DictionaryType, Key>::makeDateKey(
|
|
|
|
const PaddedPODArray<UInt16> & start_dates, const PaddedPODArray<UInt16> & end_dates) const
|
|
|
|
{
|
|
|
|
PaddedPODArray<UInt16> key(start_dates.size());
|
|
|
|
for (size_t i = 0; i < key.size(); ++i)
|
|
|
|
{
|
|
|
|
if (RangeHashedDictionary::Range::isCorrectDate(start_dates[i]))
|
|
|
|
key[i] = start_dates[i];
|
|
|
|
else
|
|
|
|
key[i] = end_dates[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
return key;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename DictionaryType, typename Key>
|
2017-05-26 16:08:56 +00:00
|
|
|
Block RangeDictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
|
2018-01-10 00:04:08 +00:00
|
|
|
const PaddedPODArray<Key> & ids,
|
2017-05-26 16:08:56 +00:00
|
|
|
const PaddedPODArray<UInt16> & start_dates, const PaddedPODArray<UInt16> & end_dates) const
|
2017-04-28 18:33:31 +00:00
|
|
|
{
|
|
|
|
ColumnsWithTypeAndName columns;
|
2018-08-24 05:25:00 +00:00
|
|
|
const DictionaryStructure & structure = dictionary->getStructure();
|
2017-04-28 18:33:31 +00:00
|
|
|
|
2017-05-04 18:14:23 +00:00
|
|
|
std::unordered_set<std::string> names(column_names.begin(), column_names.end());
|
2017-04-28 18:33:31 +00:00
|
|
|
|
2017-05-04 18:14:23 +00:00
|
|
|
addSpecialColumn(structure.id, std::make_shared<DataTypeUInt64>(), "ID", names, ids, columns);
|
|
|
|
addSpecialColumn(structure.range_min, std::make_shared<DataTypeDate>(), "Range Start", names, start_dates, columns);
|
|
|
|
addSpecialColumn(structure.range_max, std::make_shared<DataTypeDate>(), "Range End", names, end_dates, columns);
|
2017-04-28 18:33:31 +00:00
|
|
|
|
2017-12-25 19:00:48 +00:00
|
|
|
auto date_key = makeDateKey(start_dates, end_dates);
|
|
|
|
|
2017-04-28 18:33:31 +00:00
|
|
|
for (const auto idx : ext::range(0, structure.attributes.size()))
|
|
|
|
{
|
2018-08-24 05:20:18 +00:00
|
|
|
const DictionaryAttribute & attribute = structure.attributes[idx];
|
2017-05-26 16:08:56 +00:00
|
|
|
if (names.find(attribute.name) != names.end())
|
2017-04-28 18:33:31 +00:00
|
|
|
{
|
2017-05-04 18:14:23 +00:00
|
|
|
ColumnPtr column;
|
2017-05-29 17:26:45 +00:00
|
|
|
#define GET_COLUMN_FORM_ATTRIBUTE(TYPE)\
|
2017-12-25 19:00:48 +00:00
|
|
|
column = getColumnFromAttribute<TYPE>(&DictionaryType::get##TYPE, ids, date_key, attribute, *dictionary)
|
2017-05-04 18:14:23 +00:00
|
|
|
switch (attribute.underlying_type)
|
|
|
|
{
|
2017-05-29 17:26:45 +00:00
|
|
|
case AttributeUnderlyingType::UInt8:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(UInt8);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::UInt16:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(UInt16);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::UInt32:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(UInt32);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::UInt64:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(UInt64);
|
|
|
|
break;
|
2017-11-14 00:08:54 +00:00
|
|
|
case AttributeUnderlyingType::UInt128:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(UInt128);
|
|
|
|
break;
|
2017-05-29 17:26:45 +00:00
|
|
|
case AttributeUnderlyingType::Int8:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(Int8);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::Int16:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(Int16);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::Int32:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(Int32);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::Int64:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(Int64);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::Float32:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(Float32);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::Float64:
|
|
|
|
GET_COLUMN_FORM_ATTRIBUTE(Float64);
|
|
|
|
break;
|
|
|
|
case AttributeUnderlyingType::String:
|
2017-12-25 19:00:48 +00:00
|
|
|
column = getColumnFromAttributeString(ids, date_key, attribute, *dictionary);
|
2017-05-29 17:26:45 +00:00
|
|
|
break;
|
2017-05-04 18:14:23 +00:00
|
|
|
}
|
2017-04-28 18:33:31 +00:00
|
|
|
|
2017-05-04 18:14:23 +00:00
|
|
|
columns.emplace_back(column, attribute.type, attribute.name);
|
|
|
|
}
|
2017-04-28 18:33:31 +00:00
|
|
|
}
|
2017-05-26 16:08:56 +00:00
|
|
|
return Block(columns);
|
2017-04-28 18:33:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|