2017-04-27 17:16:24 +00:00
|
|
|
#pragma once
|
2018-08-24 05:20:18 +00:00
|
|
|
|
2018-12-10 15:25:45 +00:00
|
|
|
#include <memory>
|
2018-10-08 19:45:17 +00:00
|
|
|
#include <Columns/ColumnDecimal.h>
|
2017-04-27 17:16:24 +00:00
|
|
|
#include <Columns/ColumnString.h>
|
2018-12-10 15:25:45 +00:00
|
|
|
#include <Columns/ColumnVector.h>
|
2017-04-28 18:33:31 +00:00
|
|
|
#include <Columns/IColumn.h>
|
2018-12-10 15:25:45 +00:00
|
|
|
#include <Core/Names.h>
|
2019-01-23 14:48:50 +00:00
|
|
|
#include <DataStreams/IBlockInputStream.h>
|
2017-04-28 18:33:31 +00:00
|
|
|
#include <DataTypes/DataTypesNumber.h>
|
2018-12-10 15:25:45 +00:00
|
|
|
#include <common/logger_useful.h>
|
|
|
|
#include <ext/range.h>
|
2018-11-28 11:37:12 +00:00
|
|
|
#include "DictionaryBlockInputStreamBase.h"
|
|
|
|
#include "DictionaryStructure.h"
|
|
|
|
#include "IDictionary.h"
|
2017-04-27 17:16:24 +00:00
|
|
|
|
2017-05-26 16:08:56 +00:00
|
|
|
namespace DB
|
2017-04-28 18:33:31 +00:00
|
|
|
{
|
2018-06-05 19:46:49 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int LOGICAL_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2018-08-24 05:37:06 +00:00
|
|
|
/* BlockInputStream implementation for external dictionaries
|
|
|
|
* read() returns blocks consisting of the in-memory contents of the dictionaries
|
2017-04-27 17:16:24 +00:00
|
|
|
*/
|
2020-12-19 13:24:51 +00:00
|
|
|
template <typename Key>
|
2017-04-28 18:33:31 +00:00
|
|
|
class DictionaryBlockInputStream : public DictionaryBlockInputStreamBase
|
2017-04-27 17:16:24 +00:00
|
|
|
{
|
|
|
|
public:
|
2018-12-10 15:25:45 +00:00
|
|
|
DictionaryBlockInputStream(
|
2019-02-10 16:55:12 +00:00
|
|
|
std::shared_ptr<const IDictionaryBase> dictionary, UInt64 max_block_size, PaddedPODArray<Key> && ids, const Names & column_names);
|
2018-08-24 05:37:06 +00:00
|
|
|
|
2018-12-10 15:25:45 +00:00
|
|
|
DictionaryBlockInputStream(
|
|
|
|
std::shared_ptr<const IDictionaryBase> dictionary,
|
2019-02-10 16:55:12 +00:00
|
|
|
UInt64 max_block_size,
|
2018-12-10 15:25:45 +00:00
|
|
|
const std::vector<StringRef> & keys,
|
|
|
|
const Names & column_names);
|
2017-04-27 17:16:24 +00:00
|
|
|
|
2018-12-10 15:25:45 +00:00
|
|
|
using GetColumnsFunction = std::function<ColumnsWithTypeAndName(const Columns &, const std::vector<DictionaryAttribute> & attributes)>;
|
2018-08-24 05:37:06 +00:00
|
|
|
|
2017-06-05 09:02:05 +00:00
|
|
|
// Used to separate key columns format for storage and view.
|
2020-01-11 09:50:41 +00:00
|
|
|
// Calls get_key_columns_function to get key column for dictionary get function call
|
2017-06-05 09:02:05 +00:00
|
|
|
// and get_view_columns_function to get key representation.
|
|
|
|
// Now used in trie dictionary, where columns are stored as ip and mask, and are showed as string
|
2018-12-10 15:25:45 +00:00
|
|
|
DictionaryBlockInputStream(
|
|
|
|
std::shared_ptr<const IDictionaryBase> dictionary,
|
2019-02-10 16:55:12 +00:00
|
|
|
UInt64 max_block_size,
|
2018-12-10 15:25:45 +00:00
|
|
|
const Columns & data_columns,
|
|
|
|
const Names & column_names,
|
|
|
|
GetColumnsFunction && get_key_columns_function,
|
|
|
|
GetColumnsFunction && get_view_columns_function);
|
2017-06-05 09:02:05 +00:00
|
|
|
|
2018-12-10 15:25:45 +00:00
|
|
|
String getName() const override { return "Dictionary"; }
|
2017-04-27 17:16:24 +00:00
|
|
|
|
2017-05-26 16:08:56 +00:00
|
|
|
protected:
|
|
|
|
Block getBlock(size_t start, size_t size) const override;
|
2017-05-04 18:14:23 +00:00
|
|
|
|
2017-05-26 16:08:56 +00:00
|
|
|
private:
|
2018-12-10 15:25:45 +00:00
|
|
|
Block
|
|
|
|
fillBlock(const PaddedPODArray<Key> & ids_to_fill, const Columns & keys, const DataTypes & types, ColumnsWithTypeAndName && view) const;
|
2017-05-04 18:14:23 +00:00
|
|
|
|
2018-08-24 05:45:03 +00:00
|
|
|
ColumnPtr getColumnFromIds(const PaddedPODArray<Key> & ids_to_fill) const;
|
2017-05-04 18:14:23 +00:00
|
|
|
|
2018-12-10 15:25:45 +00:00
|
|
|
void fillKeyColumns(
|
|
|
|
const std::vector<StringRef> & keys,
|
|
|
|
size_t start,
|
|
|
|
size_t size,
|
|
|
|
const DictionaryStructure & dictionary_structure,
|
|
|
|
ColumnsWithTypeAndName & columns) const;
|
2017-05-04 18:14:23 +00:00
|
|
|
|
2020-12-19 13:24:51 +00:00
|
|
|
std::shared_ptr<const IDictionaryBase> dictionary;
|
2017-05-26 16:08:56 +00:00
|
|
|
Names column_names;
|
|
|
|
PaddedPODArray<Key> ids;
|
|
|
|
ColumnsWithTypeAndName key_columns;
|
2018-08-24 05:37:06 +00:00
|
|
|
|
2017-06-05 09:02:05 +00:00
|
|
|
Columns data_columns;
|
|
|
|
GetColumnsFunction get_key_columns_function;
|
|
|
|
GetColumnsFunction get_view_columns_function;
|
2018-03-07 14:29:00 +00:00
|
|
|
|
2021-01-23 13:18:24 +00:00
|
|
|
enum class DictionaryInputStreamKeyType
|
2018-03-07 14:29:00 +00:00
|
|
|
{
|
|
|
|
Id,
|
|
|
|
ComplexKey,
|
|
|
|
Callback
|
|
|
|
};
|
|
|
|
|
2021-01-23 13:18:24 +00:00
|
|
|
DictionaryInputStreamKeyType key_type;
|
2017-04-27 17:16:24 +00:00
|
|
|
};
|
|
|
|
|
2018-08-24 05:37:06 +00:00
|
|
|
|
2020-12-19 13:24:51 +00:00
|
|
|
template <typename Key>
|
|
|
|
DictionaryBlockInputStream<Key>::DictionaryBlockInputStream(
|
2019-08-03 11:02:40 +00:00
|
|
|
std::shared_ptr<const IDictionaryBase> dictionary_, UInt64 max_block_size_, PaddedPODArray<Key> && ids_, const Names & column_names_)
|
|
|
|
: DictionaryBlockInputStreamBase(ids_.size(), max_block_size_)
|
2020-12-19 13:24:51 +00:00
|
|
|
, dictionary(dictionary_)
|
2019-08-03 11:02:40 +00:00
|
|
|
, column_names(column_names_)
|
|
|
|
, ids(std::move(ids_))
|
2021-01-23 13:18:24 +00:00
|
|
|
, key_type(DictionaryInputStreamKeyType::Id)
|
2017-04-27 17:16:24 +00:00
|
|
|
{
|
2017-05-04 18:14:23 +00:00
|
|
|
}
|
|
|
|
|
2020-12-19 13:24:51 +00:00
|
|
|
template <typename Key>
|
|
|
|
DictionaryBlockInputStream<Key>::DictionaryBlockInputStream(
|
2019-08-03 11:02:40 +00:00
|
|
|
std::shared_ptr<const IDictionaryBase> dictionary_,
|
|
|
|
UInt64 max_block_size_,
|
2018-12-10 15:25:45 +00:00
|
|
|
const std::vector<StringRef> & keys,
|
2019-08-03 11:02:40 +00:00
|
|
|
const Names & column_names_)
|
|
|
|
: DictionaryBlockInputStreamBase(keys.size(), max_block_size_)
|
2020-12-19 13:24:51 +00:00
|
|
|
, dictionary(dictionary_)
|
2019-08-03 11:02:40 +00:00
|
|
|
, column_names(column_names_)
|
2021-01-23 13:18:24 +00:00
|
|
|
, key_type(DictionaryInputStreamKeyType::ComplexKey)
|
2017-05-04 18:14:23 +00:00
|
|
|
{
|
2020-12-18 21:43:08 +00:00
|
|
|
const DictionaryStructure & dictionary_structure = dictionary->getStructure();
|
|
|
|
fillKeyColumns(keys, 0, keys.size(), dictionary_structure, key_columns);
|
2017-05-26 16:08:56 +00:00
|
|
|
}
|
|
|
|
|
2020-12-19 13:24:51 +00:00
|
|
|
template <typename Key>
|
|
|
|
DictionaryBlockInputStream<Key>::DictionaryBlockInputStream(
|
2019-08-03 11:02:40 +00:00
|
|
|
std::shared_ptr<const IDictionaryBase> dictionary_,
|
|
|
|
UInt64 max_block_size_,
|
|
|
|
const Columns & data_columns_,
|
|
|
|
const Names & column_names_,
|
|
|
|
GetColumnsFunction && get_key_columns_function_,
|
|
|
|
GetColumnsFunction && get_view_columns_function_)
|
|
|
|
: DictionaryBlockInputStreamBase(data_columns_.front()->size(), max_block_size_)
|
2020-12-19 13:24:51 +00:00
|
|
|
, dictionary(dictionary_)
|
2019-08-03 11:02:40 +00:00
|
|
|
, column_names(column_names_)
|
|
|
|
, data_columns(data_columns_)
|
2020-12-19 13:24:51 +00:00
|
|
|
, get_key_columns_function(std::move(get_key_columns_function_))
|
|
|
|
, get_view_columns_function(std::move(get_view_columns_function_))
|
2021-01-23 13:18:24 +00:00
|
|
|
, key_type(DictionaryInputStreamKeyType::Callback)
|
2017-06-05 09:02:05 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2018-08-24 05:37:06 +00:00
|
|
|
|
2020-12-19 13:24:51 +00:00
|
|
|
template <typename Key>
|
|
|
|
Block DictionaryBlockInputStream<Key>::getBlock(size_t start, size_t length) const
|
2017-05-26 16:08:56 +00:00
|
|
|
{
|
2020-12-19 13:24:51 +00:00
|
|
|
/// TODO: Rewrite
|
2018-03-07 14:29:00 +00:00
|
|
|
switch (key_type)
|
2017-05-26 16:08:56 +00:00
|
|
|
{
|
2021-01-23 13:18:24 +00:00
|
|
|
case DictionaryInputStreamKeyType::ComplexKey:
|
2017-06-05 09:02:05 +00:00
|
|
|
{
|
2018-03-07 14:29:00 +00:00
|
|
|
Columns columns;
|
|
|
|
ColumnsWithTypeAndName view_columns;
|
|
|
|
columns.reserve(key_columns.size());
|
|
|
|
for (const auto & key_column : key_columns)
|
|
|
|
{
|
|
|
|
ColumnPtr column = key_column.column->cut(start, length);
|
|
|
|
columns.emplace_back(column);
|
|
|
|
view_columns.emplace_back(column, key_column.type, key_column.name);
|
|
|
|
}
|
2020-12-18 21:43:08 +00:00
|
|
|
return fillBlock({}, columns, {}, std::move(view_columns));
|
2017-06-05 09:02:05 +00:00
|
|
|
}
|
2018-08-24 05:37:06 +00:00
|
|
|
|
2021-01-23 13:18:24 +00:00
|
|
|
case DictionaryInputStreamKeyType::Id:
|
2018-03-07 14:29:00 +00:00
|
|
|
{
|
2018-08-24 05:45:03 +00:00
|
|
|
PaddedPODArray<Key> ids_to_fill(ids.begin() + start, ids.begin() + start + length);
|
2020-12-18 21:43:08 +00:00
|
|
|
return fillBlock(ids_to_fill, {}, {}, {});
|
2018-03-07 14:29:00 +00:00
|
|
|
}
|
2018-08-24 05:37:06 +00:00
|
|
|
|
2021-01-23 13:18:24 +00:00
|
|
|
case DictionaryInputStreamKeyType::Callback:
|
2017-06-05 09:02:05 +00:00
|
|
|
{
|
2018-03-07 14:29:00 +00:00
|
|
|
Columns columns;
|
|
|
|
columns.reserve(data_columns.size());
|
|
|
|
for (const auto & data_column : data_columns)
|
|
|
|
columns.push_back(data_column->cut(start, length));
|
|
|
|
const DictionaryStructure & dictionaty_structure = dictionary->getStructure();
|
|
|
|
const auto & attributes = *dictionaty_structure.key;
|
|
|
|
ColumnsWithTypeAndName keys_with_type_and_name = get_key_columns_function(columns, attributes);
|
|
|
|
ColumnsWithTypeAndName view_with_type_and_name = get_view_columns_function(columns, attributes);
|
|
|
|
DataTypes types;
|
|
|
|
columns.clear();
|
|
|
|
for (const auto & key_column : keys_with_type_and_name)
|
|
|
|
{
|
|
|
|
columns.push_back(key_column.column);
|
|
|
|
types.push_back(key_column.type);
|
|
|
|
}
|
2020-12-18 21:43:08 +00:00
|
|
|
return fillBlock({}, columns, types, std::move(view_with_type_and_name));
|
2017-06-05 09:02:05 +00:00
|
|
|
}
|
2017-05-26 16:08:56 +00:00
|
|
|
}
|
2018-08-24 05:37:06 +00:00
|
|
|
|
2021-01-23 13:18:24 +00:00
|
|
|
throw Exception("Unexpected DictionaryInputStreamKeyType.", ErrorCodes::LOGICAL_ERROR);
|
2017-05-04 18:14:23 +00:00
|
|
|
}
|
|
|
|
|
2020-12-19 13:24:51 +00:00
|
|
|
template <typename Key>
|
|
|
|
Block DictionaryBlockInputStream<Key>::fillBlock(
|
2018-08-10 04:02:56 +00:00
|
|
|
const PaddedPODArray<Key> & ids_to_fill, const Columns & keys, const DataTypes & types, ColumnsWithTypeAndName && view) const
|
2017-05-04 18:14:23 +00:00
|
|
|
{
|
|
|
|
std::unordered_set<std::string> names(column_names.begin(), column_names.end());
|
|
|
|
|
2017-06-05 09:02:05 +00:00
|
|
|
DataTypes data_types = types;
|
|
|
|
ColumnsWithTypeAndName block_columns;
|
|
|
|
|
|
|
|
data_types.reserve(keys.size());
|
2017-12-15 02:52:38 +00:00
|
|
|
const DictionaryStructure & dictionaty_structure = dictionary->getStructure();
|
2017-06-05 09:02:05 +00:00
|
|
|
if (data_types.empty() && dictionaty_structure.key)
|
2017-09-08 03:47:27 +00:00
|
|
|
for (const auto & key : *dictionaty_structure.key)
|
2017-06-05 09:02:05 +00:00
|
|
|
data_types.push_back(key.type);
|
|
|
|
|
|
|
|
for (const auto & column : view)
|
|
|
|
if (names.find(column.name) != names.end())
|
|
|
|
block_columns.push_back(column);
|
2017-05-04 18:14:23 +00:00
|
|
|
|
2017-12-15 02:52:38 +00:00
|
|
|
const DictionaryStructure & structure = dictionary->getStructure();
|
2021-01-02 15:03:28 +00:00
|
|
|
ColumnPtr ids_column = getColumnFromIds(ids_to_fill);
|
2017-04-27 17:16:24 +00:00
|
|
|
|
2017-05-04 18:14:23 +00:00
|
|
|
if (structure.id && names.find(structure.id->name) != names.end())
|
2020-12-18 21:43:08 +00:00
|
|
|
{
|
|
|
|
block_columns.emplace_back(ids_column, std::make_shared<DataTypeUInt64>(), structure.id->name);
|
|
|
|
}
|
2017-04-27 17:16:24 +00:00
|
|
|
|
2021-01-23 13:18:24 +00:00
|
|
|
auto dictionary_key_type = dictionary->getKeyType();
|
2020-12-19 13:24:51 +00:00
|
|
|
|
2017-04-27 17:16:24 +00:00
|
|
|
for (const auto idx : ext::range(0, structure.attributes.size()))
|
|
|
|
{
|
2018-08-24 05:20:18 +00:00
|
|
|
const DictionaryAttribute & attribute = structure.attributes[idx];
|
2017-05-26 16:08:56 +00:00
|
|
|
if (names.find(attribute.name) != names.end())
|
2017-04-27 17:16:24 +00:00
|
|
|
{
|
2017-05-04 18:14:23 +00:00
|
|
|
ColumnPtr column;
|
2020-12-18 21:43:08 +00:00
|
|
|
|
2021-01-23 13:18:24 +00:00
|
|
|
if (dictionary_key_type == DictionaryKeyType::simple)
|
2020-12-18 21:43:08 +00:00
|
|
|
{
|
2020-12-19 13:24:51 +00:00
|
|
|
column = dictionary->getColumn(
|
2021-01-23 13:18:24 +00:00
|
|
|
attribute.name,
|
|
|
|
attribute.type,
|
|
|
|
{ids_column},
|
|
|
|
{std::make_shared<DataTypeUInt64>()},
|
|
|
|
nullptr /* default_values_column */);
|
2020-12-18 21:43:08 +00:00
|
|
|
}
|
2021-01-02 22:08:54 +00:00
|
|
|
else
|
2020-12-18 21:43:08 +00:00
|
|
|
{
|
2020-12-19 14:27:39 +00:00
|
|
|
column = dictionary->getColumn(
|
2021-01-23 16:47:33 +00:00
|
|
|
attribute.name,
|
2021-01-23 13:18:24 +00:00
|
|
|
attribute.type,
|
|
|
|
keys,
|
|
|
|
data_types,
|
|
|
|
nullptr /* default_values_column*/);
|
2020-12-18 21:43:08 +00:00
|
|
|
}
|
|
|
|
|
2017-06-05 09:02:05 +00:00
|
|
|
block_columns.emplace_back(column, attribute.type, attribute.name);
|
2017-05-04 18:14:23 +00:00
|
|
|
}
|
2017-04-27 17:16:24 +00:00
|
|
|
}
|
2018-08-24 05:37:06 +00:00
|
|
|
|
2020-12-18 21:43:08 +00:00
|
|
|
return Block(block_columns);
|
2017-04-27 17:16:24 +00:00
|
|
|
}
|
|
|
|
|
2020-12-19 13:24:51 +00:00
|
|
|
template <typename Key>
|
|
|
|
ColumnPtr DictionaryBlockInputStream<Key>::getColumnFromIds(const PaddedPODArray<Key> & ids_to_fill) const
|
2017-04-27 17:16:24 +00:00
|
|
|
{
|
2017-12-14 01:43:19 +00:00
|
|
|
auto column_vector = ColumnVector<UInt64>::create();
|
2018-08-10 04:02:56 +00:00
|
|
|
column_vector->getData().reserve(ids_to_fill.size());
|
|
|
|
for (UInt64 id : ids_to_fill)
|
2018-10-22 08:54:54 +00:00
|
|
|
column_vector->insertValue(id);
|
Get rid of useless std::move to get NRVO
http://eel.is/c++draft/class.copy.elision#:constructor,copy,elision
Some quote:
> Speaking of RVO, return std::move(w); prohibits it. It means "use move constructor or fail to compile", whereas return w; means "use RVO, and if you can't, use move constructor, and if you can't, use copy constructor, and if you can't, fail to compile."
There is one exception to this rule:
```cpp
Block FilterBlockInputStream::removeFilterIfNeed(Block && block)
{
if (block && remove_filter)
block.erase(static_cast<size_t>(filter_column));
return std::move(block);
}
```
because references are not eligible for NRVO, which is another rule "always move rvalue references and forward universal references" that takes precedence.
2018-08-27 14:04:22 +00:00
|
|
|
return column_vector;
|
2017-04-27 17:16:24 +00:00
|
|
|
}
|
|
|
|
|
2018-08-24 05:37:06 +00:00
|
|
|
|
2020-12-19 13:24:51 +00:00
|
|
|
template <typename Key>
|
|
|
|
void DictionaryBlockInputStream<Key>::fillKeyColumns(
|
2018-12-10 15:25:45 +00:00
|
|
|
const std::vector<StringRef> & keys,
|
|
|
|
size_t start,
|
|
|
|
size_t size,
|
|
|
|
const DictionaryStructure & dictionary_structure,
|
|
|
|
ColumnsWithTypeAndName & res) const
|
2017-05-04 18:14:23 +00:00
|
|
|
{
|
2017-12-15 02:52:38 +00:00
|
|
|
MutableColumns columns;
|
|
|
|
columns.reserve(dictionary_structure.key->size());
|
|
|
|
|
2017-05-29 17:26:45 +00:00
|
|
|
for (const DictionaryAttribute & attribute : *dictionary_structure.key)
|
2017-12-15 02:52:38 +00:00
|
|
|
columns.emplace_back(attribute.type->createColumn());
|
|
|
|
|
2017-05-26 16:08:56 +00:00
|
|
|
for (auto idx : ext::range(start, size))
|
2017-05-04 18:14:23 +00:00
|
|
|
{
|
2017-05-26 16:08:56 +00:00
|
|
|
const auto & key = keys[idx];
|
2017-05-04 18:14:23 +00:00
|
|
|
auto ptr = key.data;
|
2017-12-15 02:52:38 +00:00
|
|
|
for (auto & column : columns)
|
|
|
|
ptr = column->deserializeAndInsertFromArena(ptr);
|
2017-05-04 18:14:23 +00:00
|
|
|
}
|
2017-12-15 02:52:38 +00:00
|
|
|
|
2018-06-03 22:11:50 +00:00
|
|
|
for (size_t i = 0, num_columns = columns.size(); i < num_columns; ++i)
|
2018-12-10 15:25:45 +00:00
|
|
|
res.emplace_back(
|
|
|
|
ColumnWithTypeAndName{std::move(columns[i]), (*dictionary_structure.key)[i].type, (*dictionary_structure.key)[i].name});
|
2017-05-04 18:14:23 +00:00
|
|
|
}
|
|
|
|
|
2017-05-26 16:08:56 +00:00
|
|
|
}
|