ClickHouse/dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h

239 lines
9.5 KiB
C++
Raw Normal View History

2017-04-28 18:33:31 +00:00
#pragma once
#include <Columns/ColumnVector.h>
#include <Columns/ColumnString.h>
#include <Columns/IColumn.h>
#include <DataStreams/IProfilingBlockInputStream.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeDate.h>
2017-04-28 18:33:31 +00:00
#include <Dictionaries/DictionaryBlockInputStreamBase.h>
#include <Dictionaries/DictionaryStructure.h>
#include <Dictionaries/IDictionary.h>
#include <Dictionaries/RangeHashedDictionary.h>
#include <ext/range.h>
2017-04-28 18:33:31 +00:00
namespace DB
2017-04-28 18:33:31 +00:00
{
/*
* BlockInputStream implementation for external dictionaries
2017-04-28 18:33:31 +00:00
* read() returns single block consisting of the in-memory contents of the dictionaries
*/
2017-09-15 12:16:12 +00:00
template <typename DictionaryType, typename Key>
2017-04-28 18:33:31 +00:00
class RangeDictionaryBlockInputStream : public DictionaryBlockInputStreamBase
{
public:
2018-08-24 05:21:53 +00:00
using DictionaryPtr = std::shared_ptr<DictionaryType const>;
RangeDictionaryBlockInputStream(
2018-08-24 05:21:53 +00:00
DictionaryPtr dictionary, size_t max_block_size, const Names & column_names, PaddedPODArray<Key> && ids,
PaddedPODArray<UInt16> && start_dates, PaddedPODArray<UInt16> && end_dates);
2017-04-28 18:33:31 +00:00
2018-01-10 00:04:08 +00:00
String getName() const override
{
2018-02-21 20:23:27 +00:00
return "RangeDictionary";
}
2017-04-28 18:33:31 +00:00
protected:
Block getBlock(size_t start, size_t length) const override;
2017-04-28 18:33:31 +00:00
private:
2017-09-15 12:16:12 +00:00
template <typename Type>
2017-04-28 18:33:31 +00:00
using DictionaryGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &,
const PaddedPODArray<UInt16> &, PaddedPODArray<Type> &) const;
2017-04-28 18:33:31 +00:00
2017-09-15 12:16:12 +00:00
template <typename AttributeType>
2017-04-28 18:33:31 +00:00
ColumnPtr getColumnFromAttribute(DictionaryGetter<AttributeType> getter,
2018-01-10 00:04:08 +00:00
const PaddedPODArray<Key> & ids, const PaddedPODArray<UInt16> & dates,
2018-08-24 05:25:00 +00:00
const DictionaryAttribute & attribute, const DictionaryType & dictionary) const;
2018-01-10 00:04:08 +00:00
ColumnPtr getColumnFromAttributeString(const PaddedPODArray<Key> & ids, const PaddedPODArray<UInt16> & dates,
2018-08-24 05:25:00 +00:00
const DictionaryAttribute & attribute, const DictionaryType & dictionary) const;
2017-09-15 12:16:12 +00:00
template <typename T>
2018-01-10 00:04:08 +00:00
ColumnPtr getColumnFromPODArray(const PaddedPODArray<T> & array) const;
2017-09-15 12:16:12 +00:00
template <typename T>
void addSpecialColumn(
2018-01-10 00:04:08 +00:00
const std::optional<DictionarySpecialAttribute> & attribute, DataTypePtr type,
const std::string & default_name, const std::unordered_set<std::string> & column_names,
2018-08-24 05:25:00 +00:00
const PaddedPODArray<T> & values, ColumnsWithTypeAndName & columns) const;
Block fillBlock(const PaddedPODArray<Key> & ids,
const PaddedPODArray<UInt16> & start_dates, const PaddedPODArray<UInt16> & end_dates) const;
PaddedPODArray<UInt16> makeDateKey(
const PaddedPODArray<UInt16> & start_dates, const PaddedPODArray<UInt16> & end_dates) const;
2018-08-24 05:21:53 +00:00
DictionaryPtr dictionary;
Names column_names;
PaddedPODArray<Key> ids;
PaddedPODArray<UInt16> start_dates;
PaddedPODArray<UInt16> end_dates;
2017-04-28 18:33:31 +00:00
};
2017-09-15 12:16:12 +00:00
template <typename DictionaryType, typename Key>
RangeDictionaryBlockInputStream<DictionaryType, Key>::RangeDictionaryBlockInputStream(
2018-08-24 05:21:53 +00:00
DictionaryPtr dictionary, size_t max_column_size, const Names & column_names, PaddedPODArray<Key> && ids,
PaddedPODArray<UInt16> && start_dates, PaddedPODArray<UInt16> && end_dates)
: DictionaryBlockInputStreamBase(ids.size(), max_column_size),
dictionary(dictionary), column_names(column_names),
ids(std::move(ids)), start_dates(std::move(start_dates)), end_dates(std::move(end_dates))
{
}
2017-09-15 12:16:12 +00:00
template <typename DictionaryType, typename Key>
Block RangeDictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, size_t length) const
{
PaddedPODArray<Key> block_ids;
PaddedPODArray<UInt16> block_start_dates;
PaddedPODArray<UInt16> block_end_dates;
block_ids.reserve(length);
block_start_dates.reserve(length);
block_end_dates.reserve(length);
for (auto idx : ext::range(start, start + length))
{
block_ids.push_back(ids[idx]);
block_start_dates.push_back(start_dates[idx]);
block_end_dates.push_back(end_dates[idx]);
}
return fillBlock(block_ids, block_start_dates, block_end_dates);
}
2017-09-15 12:16:12 +00:00
template <typename DictionaryType, typename Key>
template <typename AttributeType>
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, Key>::getColumnFromAttribute(
2018-01-10 00:04:08 +00:00
DictionaryGetter<AttributeType> getter, const PaddedPODArray<Key> & ids,
2018-08-24 05:25:00 +00:00
const PaddedPODArray<UInt16> & dates, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const
{
auto column_vector = ColumnVector<AttributeType>::create(ids.size());
(dictionary.*getter)(attribute.name, ids, dates, column_vector->getData());
return std::move(column_vector);
}
2017-09-15 12:16:12 +00:00
template <typename DictionaryType, typename Key>
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, Key>::getColumnFromAttributeString(
2018-01-10 00:04:08 +00:00
const PaddedPODArray<Key> & ids, const PaddedPODArray<UInt16> & dates,
2018-08-24 05:25:00 +00:00
const DictionaryAttribute & attribute, const DictionaryType & dictionary) const
{
auto column_string = ColumnString::create();
dictionary.getString(attribute.name, ids, dates, column_string.get());
return std::move(column_string);
}
2017-09-15 12:16:12 +00:00
template <typename DictionaryType, typename Key>
template <typename T>
2018-01-10 00:04:08 +00:00
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, Key>::getColumnFromPODArray(const PaddedPODArray<T> & array) const
{
auto column_vector = ColumnVector<T>::create();
column_vector->getData().reserve(array.size());
for (T value : array)
column_vector->insert(value);
return std::move(column_vector);
}
2017-09-15 12:16:12 +00:00
template <typename DictionaryType, typename Key>
template <typename T>
void RangeDictionaryBlockInputStream<DictionaryType, Key>::addSpecialColumn(
const std::optional<DictionarySpecialAttribute> & attribute, DataTypePtr type,
2018-08-24 05:25:00 +00:00
const std::string & default_name, const std::unordered_set<std::string> & column_names,
const PaddedPODArray<T> & values, ColumnsWithTypeAndName & columns) const
{
std::string name = default_name;
2018-01-10 00:04:08 +00:00
if (attribute)
name = attribute->name;
2018-01-10 00:04:08 +00:00
if (column_names.find(name) != column_names.end())
columns.emplace_back(getColumnFromPODArray(values), type, name);
}
template <typename DictionaryType, typename Key>
PaddedPODArray<UInt16> RangeDictionaryBlockInputStream<DictionaryType, Key>::makeDateKey(
const PaddedPODArray<UInt16> & start_dates, const PaddedPODArray<UInt16> & end_dates) const
{
PaddedPODArray<UInt16> key(start_dates.size());
for (size_t i = 0; i < key.size(); ++i)
{
if (RangeHashedDictionary::Range::isCorrectDate(start_dates[i]))
key[i] = start_dates[i];
else
key[i] = end_dates[i];
}
return key;
}
2017-09-15 12:16:12 +00:00
template <typename DictionaryType, typename Key>
Block RangeDictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
2018-01-10 00:04:08 +00:00
const PaddedPODArray<Key> & ids,
const PaddedPODArray<UInt16> & start_dates, const PaddedPODArray<UInt16> & end_dates) const
2017-04-28 18:33:31 +00:00
{
ColumnsWithTypeAndName columns;
2018-08-24 05:25:00 +00:00
const DictionaryStructure & structure = dictionary->getStructure();
2017-04-28 18:33:31 +00:00
std::unordered_set<std::string> names(column_names.begin(), column_names.end());
2017-04-28 18:33:31 +00:00
addSpecialColumn(structure.id, std::make_shared<DataTypeUInt64>(), "ID", names, ids, columns);
addSpecialColumn(structure.range_min, std::make_shared<DataTypeDate>(), "Range Start", names, start_dates, columns);
addSpecialColumn(structure.range_max, std::make_shared<DataTypeDate>(), "Range End", names, end_dates, columns);
2017-04-28 18:33:31 +00:00
auto date_key = makeDateKey(start_dates, end_dates);
2017-04-28 18:33:31 +00:00
for (const auto idx : ext::range(0, structure.attributes.size()))
{
2018-08-24 05:20:18 +00:00
const DictionaryAttribute & attribute = structure.attributes[idx];
if (names.find(attribute.name) != names.end())
2017-04-28 18:33:31 +00:00
{
ColumnPtr column;
#define GET_COLUMN_FORM_ATTRIBUTE(TYPE)\
column = getColumnFromAttribute<TYPE>(&DictionaryType::get##TYPE, ids, date_key, attribute, *dictionary)
switch (attribute.underlying_type)
{
case AttributeUnderlyingType::UInt8:
GET_COLUMN_FORM_ATTRIBUTE(UInt8);
break;
case AttributeUnderlyingType::UInt16:
GET_COLUMN_FORM_ATTRIBUTE(UInt16);
break;
case AttributeUnderlyingType::UInt32:
GET_COLUMN_FORM_ATTRIBUTE(UInt32);
break;
case AttributeUnderlyingType::UInt64:
GET_COLUMN_FORM_ATTRIBUTE(UInt64);
break;
case AttributeUnderlyingType::UInt128:
GET_COLUMN_FORM_ATTRIBUTE(UInt128);
break;
case AttributeUnderlyingType::Int8:
GET_COLUMN_FORM_ATTRIBUTE(Int8);
break;
case AttributeUnderlyingType::Int16:
GET_COLUMN_FORM_ATTRIBUTE(Int16);
break;
case AttributeUnderlyingType::Int32:
GET_COLUMN_FORM_ATTRIBUTE(Int32);
break;
case AttributeUnderlyingType::Int64:
GET_COLUMN_FORM_ATTRIBUTE(Int64);
break;
case AttributeUnderlyingType::Float32:
GET_COLUMN_FORM_ATTRIBUTE(Float32);
break;
case AttributeUnderlyingType::Float64:
GET_COLUMN_FORM_ATTRIBUTE(Float64);
break;
case AttributeUnderlyingType::String:
column = getColumnFromAttributeString(ids, date_key, attribute, *dictionary);
break;
}
2017-04-28 18:33:31 +00:00
columns.emplace_back(column, attribute.type, attribute.name);
}
2017-04-28 18:33:31 +00:00
}
return Block(columns);
2017-04-28 18:33:31 +00:00
}
}