2017-04-28 18:33:31 +00:00
|
|
|
#pragma once
|
|
|
|
#include <Columns/ColumnString.h>
|
2018-12-10 15:25:45 +00:00
|
|
|
#include <Columns/ColumnVector.h>
|
2017-04-28 18:33:31 +00:00
|
|
|
#include <Columns/IColumn.h>
|
2019-01-23 14:48:50 +00:00
|
|
|
#include <DataStreams/IBlockInputStream.h>
|
2017-05-04 18:14:23 +00:00
|
|
|
#include <DataTypes/DataTypeDate.h>
|
2018-12-10 15:25:45 +00:00
|
|
|
#include <DataTypes/DataTypesNumber.h>
|
|
|
|
#include <ext/range.h>
|
2018-11-28 11:37:12 +00:00
|
|
|
#include "DictionaryBlockInputStreamBase.h"
|
|
|
|
#include "DictionaryStructure.h"
|
|
|
|
#include "IDictionary.h"
|
|
|
|
#include "RangeHashedDictionary.h"
|
2017-04-28 18:33:31 +00:00
|
|
|
|
2017-05-26 16:08:56 +00:00
|
|
|
namespace DB
|
2017-04-28 18:33:31 +00:00
|
|
|
{
|
2017-05-26 16:08:56 +00:00
|
|
|
/*
|
|
|
|
* BlockInputStream implementation for external dictionaries
|
2017-04-28 18:33:31 +00:00
|
|
|
* read() returns single block consisting of the in-memory contents of the dictionaries
|
|
|
|
*/
|
2021-01-31 15:14:26 +00:00
|
|
|
template <typename RangeType>
|
2017-04-28 18:33:31 +00:00
|
|
|
class RangeDictionaryBlockInputStream : public DictionaryBlockInputStreamBase
|
|
|
|
{
|
|
|
|
public:
|
2021-01-31 15:14:26 +00:00
|
|
|
using Key = UInt64;
|
2017-05-26 16:08:56 +00:00
|
|
|
|
2017-05-04 18:14:23 +00:00
|
|
|
RangeDictionaryBlockInputStream(
|
2021-03-24 16:31:00 +00:00
|
|
|
std::shared_ptr<const IDictionary> dictionary,
|
2019-02-18 18:51:46 +00:00
|
|
|
size_t max_block_size,
|
2018-12-10 15:25:45 +00:00
|
|
|
const Names & column_names,
|
|
|
|
PaddedPODArray<Key> && ids_to_fill,
|
|
|
|
PaddedPODArray<RangeType> && start_dates,
|
|
|
|
PaddedPODArray<RangeType> && end_dates);
|
2017-04-28 18:33:31 +00:00
|
|
|
|
2018-12-10 15:25:45 +00:00
|
|
|
String getName() const override { return "RangeDictionary"; }
|
2017-04-28 18:33:31 +00:00
|
|
|
|
2017-05-26 16:08:56 +00:00
|
|
|
protected:
|
|
|
|
Block getBlock(size_t start, size_t length) const override;
|
|
|
|
|
2017-04-28 18:33:31 +00:00
|
|
|
private:
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename T>
|
2018-01-10 00:04:08 +00:00
|
|
|
ColumnPtr getColumnFromPODArray(const PaddedPODArray<T> & array) const;
|
2017-05-04 18:14:23 +00:00
|
|
|
|
2018-12-10 15:25:45 +00:00
|
|
|
Block fillBlock(
|
|
|
|
const PaddedPODArray<Key> & ids_to_fill,
|
|
|
|
const PaddedPODArray<RangeType> & block_start_dates,
|
|
|
|
const PaddedPODArray<RangeType> & block_end_dates) const;
|
2017-05-26 16:08:56 +00:00
|
|
|
|
2021-01-31 15:14:26 +00:00
|
|
|
PaddedPODArray<Int64> makeDateKey(
|
|
|
|
const PaddedPODArray<RangeType> & block_start_dates,
|
|
|
|
const PaddedPODArray<RangeType> & block_end_dates) const;
|
2017-12-25 19:00:48 +00:00
|
|
|
|
2021-03-24 16:31:00 +00:00
|
|
|
std::shared_ptr<const IDictionary> dictionary;
|
2021-01-31 15:14:26 +00:00
|
|
|
NameSet column_names;
|
2017-05-26 16:08:56 +00:00
|
|
|
PaddedPODArray<Key> ids;
|
2018-09-17 15:04:57 +00:00
|
|
|
PaddedPODArray<RangeType> start_dates;
|
|
|
|
PaddedPODArray<RangeType> end_dates;
|
2017-04-28 18:33:31 +00:00
|
|
|
};
|
|
|
|
|
2017-05-26 16:08:56 +00:00
|
|
|
|
2021-01-31 15:14:26 +00:00
|
|
|
template <typename RangeType>
|
|
|
|
RangeDictionaryBlockInputStream<RangeType>::RangeDictionaryBlockInputStream(
|
2021-03-24 16:31:00 +00:00
|
|
|
std::shared_ptr<const IDictionary> dictionary_,
|
2019-08-03 11:02:40 +00:00
|
|
|
size_t max_block_size_,
|
|
|
|
const Names & column_names_,
|
|
|
|
PaddedPODArray<Key> && ids_,
|
2018-12-10 15:25:45 +00:00
|
|
|
PaddedPODArray<RangeType> && block_start_dates,
|
|
|
|
PaddedPODArray<RangeType> && block_end_dates)
|
2019-08-03 11:02:40 +00:00
|
|
|
: DictionaryBlockInputStreamBase(ids_.size(), max_block_size_)
|
|
|
|
, dictionary(dictionary_)
|
2021-01-31 15:14:26 +00:00
|
|
|
, column_names(column_names_.begin(), column_names_.end())
|
2019-08-03 11:02:40 +00:00
|
|
|
, ids(std::move(ids_))
|
2018-12-10 15:25:45 +00:00
|
|
|
, start_dates(std::move(block_start_dates))
|
|
|
|
, end_dates(std::move(block_end_dates))
|
2017-05-26 16:08:56 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2021-01-31 15:14:26 +00:00
|
|
|
template <typename RangeType>
|
|
|
|
Block RangeDictionaryBlockInputStream<RangeType>::getBlock(size_t start, size_t length) const
|
2017-05-26 16:08:56 +00:00
|
|
|
{
|
|
|
|
PaddedPODArray<Key> block_ids;
|
2018-09-17 15:04:57 +00:00
|
|
|
PaddedPODArray<RangeType> block_start_dates;
|
|
|
|
PaddedPODArray<RangeType> block_end_dates;
|
2017-05-26 16:08:56 +00:00
|
|
|
block_ids.reserve(length);
|
|
|
|
block_start_dates.reserve(length);
|
|
|
|
block_end_dates.reserve(length);
|
|
|
|
|
|
|
|
for (auto idx : ext::range(start, start + length))
|
|
|
|
{
|
|
|
|
block_ids.push_back(ids[idx]);
|
2017-12-25 19:00:48 +00:00
|
|
|
block_start_dates.push_back(start_dates[idx]);
|
|
|
|
block_end_dates.push_back(end_dates[idx]);
|
2017-05-26 16:08:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return fillBlock(block_ids, block_start_dates, block_end_dates);
|
|
|
|
}
|
|
|
|
|
2021-01-31 15:14:26 +00:00
|
|
|
template <typename RangeType>
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename T>
|
2021-01-31 15:14:26 +00:00
|
|
|
ColumnPtr RangeDictionaryBlockInputStream<RangeType>::getColumnFromPODArray(const PaddedPODArray<T> & array) const
|
2017-05-26 16:08:56 +00:00
|
|
|
{
|
2017-12-15 03:19:14 +00:00
|
|
|
auto column_vector = ColumnVector<T>::create();
|
2017-05-26 16:08:56 +00:00
|
|
|
column_vector->getData().reserve(array.size());
|
2021-01-31 15:14:26 +00:00
|
|
|
column_vector->getData().insert(array.begin(), array.end());
|
2018-01-10 00:04:08 +00:00
|
|
|
|
2021-01-31 15:14:26 +00:00
|
|
|
return column_vector;
|
2017-05-04 18:14:23 +00:00
|
|
|
}
|
|
|
|
|
2021-01-31 15:14:26 +00:00
|
|
|
template <typename RangeType>
|
|
|
|
PaddedPODArray<Int64> RangeDictionaryBlockInputStream<RangeType>::makeDateKey(
|
2018-12-10 15:25:45 +00:00
|
|
|
const PaddedPODArray<RangeType> & block_start_dates, const PaddedPODArray<RangeType> & block_end_dates) const
|
2017-12-25 19:00:48 +00:00
|
|
|
{
|
2018-09-13 13:33:44 +00:00
|
|
|
PaddedPODArray<Int64> key(block_start_dates.size());
|
2017-12-25 19:00:48 +00:00
|
|
|
for (size_t i = 0; i < key.size(); ++i)
|
|
|
|
{
|
2018-08-27 17:42:13 +00:00
|
|
|
if (RangeHashedDictionary::Range::isCorrectDate(block_start_dates[i]))
|
|
|
|
key[i] = block_start_dates[i];
|
2017-12-25 19:00:48 +00:00
|
|
|
else
|
2018-08-27 17:42:13 +00:00
|
|
|
key[i] = block_end_dates[i];
|
2017-12-25 19:00:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return key;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2021-01-31 15:14:26 +00:00
|
|
|
template <typename RangeType>
|
|
|
|
Block RangeDictionaryBlockInputStream<RangeType>::fillBlock(
|
2018-08-24 05:45:03 +00:00
|
|
|
const PaddedPODArray<Key> & ids_to_fill,
|
2018-12-10 15:25:45 +00:00
|
|
|
const PaddedPODArray<RangeType> & block_start_dates,
|
|
|
|
const PaddedPODArray<RangeType> & block_end_dates) const
|
2017-04-28 18:33:31 +00:00
|
|
|
{
|
|
|
|
ColumnsWithTypeAndName columns;
|
2018-08-24 05:25:00 +00:00
|
|
|
const DictionaryStructure & structure = dictionary->getStructure();
|
2017-04-28 18:33:31 +00:00
|
|
|
|
2021-01-31 15:14:26 +00:00
|
|
|
auto ids_column = getColumnFromPODArray(ids_to_fill);
|
|
|
|
const std::string & id_column_name = structure.id->name;
|
|
|
|
if (column_names.find(id_column_name) != column_names.end())
|
|
|
|
columns.emplace_back(ids_column, std::make_shared<DataTypeUInt64>(), id_column_name);
|
2017-04-28 18:33:31 +00:00
|
|
|
|
2018-08-27 17:42:13 +00:00
|
|
|
auto date_key = makeDateKey(block_start_dates, block_end_dates);
|
2020-12-20 20:11:28 +00:00
|
|
|
auto date_column = getColumnFromPODArray(date_key);
|
2017-12-25 19:00:48 +00:00
|
|
|
|
2021-01-31 15:14:26 +00:00
|
|
|
const std::string & range_min_column_name = structure.range_min->name;
|
|
|
|
if (column_names.find(range_min_column_name) != column_names.end())
|
|
|
|
{
|
|
|
|
auto range_min_column = getColumnFromPODArray(block_start_dates);
|
|
|
|
columns.emplace_back(range_min_column, structure.range_max->type, range_min_column_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
const std::string & range_max_column_name = structure.range_max->name;
|
|
|
|
if (column_names.find(range_max_column_name) != column_names.end())
|
|
|
|
{
|
|
|
|
auto range_max_column = getColumnFromPODArray(block_end_dates);
|
|
|
|
columns.emplace_back(range_max_column, structure.range_max->type, range_max_column_name);
|
|
|
|
}
|
|
|
|
|
2017-04-28 18:33:31 +00:00
|
|
|
for (const auto idx : ext::range(0, structure.attributes.size()))
|
|
|
|
{
|
2018-08-24 05:20:18 +00:00
|
|
|
const DictionaryAttribute & attribute = structure.attributes[idx];
|
2021-01-31 15:14:26 +00:00
|
|
|
if (column_names.find(attribute.name) != column_names.end())
|
2017-04-28 18:33:31 +00:00
|
|
|
{
|
2020-12-20 20:11:28 +00:00
|
|
|
ColumnPtr column = dictionary->getColumn(
|
|
|
|
attribute.name,
|
|
|
|
attribute.type,
|
|
|
|
{ids_column, date_column},
|
2021-01-02 15:03:28 +00:00
|
|
|
{std::make_shared<DataTypeUInt64>(), std::make_shared<DataTypeInt64>()},
|
2020-12-20 20:11:28 +00:00
|
|
|
nullptr);
|
2017-05-04 18:14:23 +00:00
|
|
|
columns.emplace_back(column, attribute.type, attribute.name);
|
|
|
|
}
|
2017-04-28 18:33:31 +00:00
|
|
|
}
|
2017-05-26 16:08:56 +00:00
|
|
|
return Block(columns);
|
2017-04-28 18:33:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|