ClickHouse/src/Dictionaries/RangeDictionaryBlockInputStream.h

174 lines
5.9 KiB
C++
Raw Normal View History

2017-04-28 18:33:31 +00:00
#pragma once
#include <Columns/ColumnString.h>
#include <Columns/ColumnVector.h>
2017-04-28 18:33:31 +00:00
#include <Columns/IColumn.h>
#include <DataStreams/IBlockInputStream.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypesNumber.h>
#include <ext/range.h>
#include "DictionaryBlockInputStreamBase.h"
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "RangeHashedDictionary.h"
2017-04-28 18:33:31 +00:00
namespace DB
2017-04-28 18:33:31 +00:00
{
/*
* BlockInputStream implementation for external dictionaries
2017-04-28 18:33:31 +00:00
* read() returns single block consisting of the in-memory contents of the dictionaries
*/
2021-01-31 15:14:26 +00:00
template <typename RangeType>
2017-04-28 18:33:31 +00:00
class RangeDictionaryBlockInputStream : public DictionaryBlockInputStreamBase
{
public:
2021-01-31 15:14:26 +00:00
using Key = UInt64;
RangeDictionaryBlockInputStream(
std::shared_ptr<const IDictionary> dictionary,
size_t max_block_size,
const Names & column_names,
PaddedPODArray<Key> && ids_to_fill,
PaddedPODArray<RangeType> && start_dates,
PaddedPODArray<RangeType> && end_dates);
2017-04-28 18:33:31 +00:00
String getName() const override { return "RangeDictionary"; }
2017-04-28 18:33:31 +00:00
protected:
Block getBlock(size_t start, size_t length) const override;
2017-04-28 18:33:31 +00:00
private:
2017-09-15 12:16:12 +00:00
template <typename T>
2018-01-10 00:04:08 +00:00
ColumnPtr getColumnFromPODArray(const PaddedPODArray<T> & array) const;
Block fillBlock(
const PaddedPODArray<Key> & ids_to_fill,
const PaddedPODArray<RangeType> & block_start_dates,
const PaddedPODArray<RangeType> & block_end_dates) const;
2021-01-31 15:14:26 +00:00
PaddedPODArray<Int64> makeDateKey(
const PaddedPODArray<RangeType> & block_start_dates,
const PaddedPODArray<RangeType> & block_end_dates) const;
std::shared_ptr<const IDictionary> dictionary;
2021-01-31 15:14:26 +00:00
NameSet column_names;
PaddedPODArray<Key> ids;
PaddedPODArray<RangeType> start_dates;
PaddedPODArray<RangeType> end_dates;
2017-04-28 18:33:31 +00:00
};
2021-01-31 15:14:26 +00:00
template <typename RangeType>
RangeDictionaryBlockInputStream<RangeType>::RangeDictionaryBlockInputStream(
std::shared_ptr<const IDictionary> dictionary_,
2019-08-03 11:02:40 +00:00
size_t max_block_size_,
const Names & column_names_,
PaddedPODArray<Key> && ids_,
PaddedPODArray<RangeType> && block_start_dates,
PaddedPODArray<RangeType> && block_end_dates)
2019-08-03 11:02:40 +00:00
: DictionaryBlockInputStreamBase(ids_.size(), max_block_size_)
, dictionary(dictionary_)
2021-01-31 15:14:26 +00:00
, column_names(column_names_.begin(), column_names_.end())
2019-08-03 11:02:40 +00:00
, ids(std::move(ids_))
, start_dates(std::move(block_start_dates))
, end_dates(std::move(block_end_dates))
{
}
2021-01-31 15:14:26 +00:00
template <typename RangeType>
Block RangeDictionaryBlockInputStream<RangeType>::getBlock(size_t start, size_t length) const
{
PaddedPODArray<Key> block_ids;
PaddedPODArray<RangeType> block_start_dates;
PaddedPODArray<RangeType> block_end_dates;
block_ids.reserve(length);
block_start_dates.reserve(length);
block_end_dates.reserve(length);
for (auto idx : ext::range(start, start + length))
{
block_ids.push_back(ids[idx]);
block_start_dates.push_back(start_dates[idx]);
block_end_dates.push_back(end_dates[idx]);
}
return fillBlock(block_ids, block_start_dates, block_end_dates);
}
2021-01-31 15:14:26 +00:00
template <typename RangeType>
2017-09-15 12:16:12 +00:00
template <typename T>
2021-01-31 15:14:26 +00:00
ColumnPtr RangeDictionaryBlockInputStream<RangeType>::getColumnFromPODArray(const PaddedPODArray<T> & array) const
{
auto column_vector = ColumnVector<T>::create();
column_vector->getData().reserve(array.size());
2021-01-31 15:14:26 +00:00
column_vector->getData().insert(array.begin(), array.end());
2018-01-10 00:04:08 +00:00
2021-01-31 15:14:26 +00:00
return column_vector;
}
2021-01-31 15:14:26 +00:00
template <typename RangeType>
PaddedPODArray<Int64> RangeDictionaryBlockInputStream<RangeType>::makeDateKey(
const PaddedPODArray<RangeType> & block_start_dates, const PaddedPODArray<RangeType> & block_end_dates) const
{
PaddedPODArray<Int64> key(block_start_dates.size());
for (size_t i = 0; i < key.size(); ++i)
{
2018-08-27 17:42:13 +00:00
if (RangeHashedDictionary::Range::isCorrectDate(block_start_dates[i]))
key[i] = block_start_dates[i];
else
2018-08-27 17:42:13 +00:00
key[i] = block_end_dates[i];
}
return key;
}
2021-01-31 15:14:26 +00:00
template <typename RangeType>
Block RangeDictionaryBlockInputStream<RangeType>::fillBlock(
2018-08-24 05:45:03 +00:00
const PaddedPODArray<Key> & ids_to_fill,
const PaddedPODArray<RangeType> & block_start_dates,
const PaddedPODArray<RangeType> & block_end_dates) const
2017-04-28 18:33:31 +00:00
{
ColumnsWithTypeAndName columns;
2018-08-24 05:25:00 +00:00
const DictionaryStructure & structure = dictionary->getStructure();
2017-04-28 18:33:31 +00:00
2021-01-31 15:14:26 +00:00
auto ids_column = getColumnFromPODArray(ids_to_fill);
const std::string & id_column_name = structure.id->name;
if (column_names.find(id_column_name) != column_names.end())
columns.emplace_back(ids_column, std::make_shared<DataTypeUInt64>(), id_column_name);
2017-04-28 18:33:31 +00:00
2018-08-27 17:42:13 +00:00
auto date_key = makeDateKey(block_start_dates, block_end_dates);
auto date_column = getColumnFromPODArray(date_key);
2021-01-31 15:14:26 +00:00
const std::string & range_min_column_name = structure.range_min->name;
if (column_names.find(range_min_column_name) != column_names.end())
{
auto range_min_column = getColumnFromPODArray(block_start_dates);
columns.emplace_back(range_min_column, structure.range_max->type, range_min_column_name);
}
const std::string & range_max_column_name = structure.range_max->name;
if (column_names.find(range_max_column_name) != column_names.end())
{
auto range_max_column = getColumnFromPODArray(block_end_dates);
columns.emplace_back(range_max_column, structure.range_max->type, range_max_column_name);
}
2017-04-28 18:33:31 +00:00
for (const auto idx : ext::range(0, structure.attributes.size()))
{
2018-08-24 05:20:18 +00:00
const DictionaryAttribute & attribute = structure.attributes[idx];
2021-01-31 15:14:26 +00:00
if (column_names.find(attribute.name) != column_names.end())
2017-04-28 18:33:31 +00:00
{
ColumnPtr column = dictionary->getColumn(
attribute.name,
attribute.type,
{ids_column, date_column},
2021-01-02 15:03:28 +00:00
{std::make_shared<DataTypeUInt64>(), std::make_shared<DataTypeInt64>()},
nullptr);
columns.emplace_back(column, attribute.type, attribute.name);
}
2017-04-28 18:33:31 +00:00
}
return Block(columns);
2017-04-28 18:33:31 +00:00
}
}