mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 07:01:59 +00:00
Added ComplexKeyRangeHashed dictionary
This commit is contained in:
parent
40f5e06a8d
commit
e12820ecb2
48
src/Dictionaries/DictionaryHelpers.cpp
Normal file
48
src/Dictionaries/DictionaryHelpers.cpp
Normal file
@ -0,0 +1,48 @@
|
||||
#include "DictionaryHelpers.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
MutableColumns deserializeColumnsFromKeys(
|
||||
const DictionaryStructure & dictionary_structure,
|
||||
const PaddedPODArray<StringRef> & keys,
|
||||
size_t start,
|
||||
size_t end)
|
||||
{
|
||||
MutableColumns result_columns;
|
||||
result_columns.reserve(dictionary_structure.key->size());
|
||||
|
||||
for (const DictionaryAttribute & attribute : *dictionary_structure.key)
|
||||
result_columns.emplace_back(attribute.type->createColumn());
|
||||
|
||||
for (size_t index = start; index < end; ++index)
|
||||
{
|
||||
const auto & key = keys[index];
|
||||
const auto * ptr = key.data;
|
||||
|
||||
for (auto & result_column : result_columns)
|
||||
ptr = result_column->deserializeAndInsertFromArena(ptr);
|
||||
}
|
||||
|
||||
return result_columns;
|
||||
}
|
||||
|
||||
ColumnsWithTypeAndName deserializeColumnsWithTypeAndNameFromKeys(
|
||||
const DictionaryStructure & dictionary_structure,
|
||||
const PaddedPODArray<StringRef> & keys,
|
||||
size_t start,
|
||||
size_t end)
|
||||
{
|
||||
ColumnsWithTypeAndName result;
|
||||
MutableColumns columns = deserializeColumnsFromKeys(dictionary_structure, keys, start, end);
|
||||
|
||||
for (size_t i = 0, num_columns = columns.size(); i < num_columns; ++i)
|
||||
{
|
||||
const auto & dictionary_attribute = (*dictionary_structure.key)[i];
|
||||
result.emplace_back(ColumnWithTypeAndName{std::move(columns[i]), dictionary_attribute.type, dictionary_attribute.name});
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
@ -497,6 +497,20 @@ private:
|
||||
Arena * complex_key_arena;
|
||||
};
|
||||
|
||||
/// Deserialize columns from keys array using dictionary structure
|
||||
MutableColumns deserializeColumnsFromKeys(
|
||||
const DictionaryStructure & dictionary_structure,
|
||||
const PaddedPODArray<StringRef> & keys,
|
||||
size_t start,
|
||||
size_t end);
|
||||
|
||||
/// Deserialize columns with type and name from keys array using dictionary structure
|
||||
ColumnsWithTypeAndName deserializeColumnsWithTypeAndNameFromKeys(
|
||||
const DictionaryStructure & dictionary_structure,
|
||||
const PaddedPODArray<StringRef> & keys,
|
||||
size_t start,
|
||||
size_t end);
|
||||
|
||||
/** Merge block with blocks from stream. If there are duplicate keys in block they are filtered out.
|
||||
* In result block_to_update will be merged with blocks from stream.
|
||||
* Note: readPrefix readImpl readSuffix will be called on stream object during function execution.
|
||||
|
@ -29,7 +29,7 @@ DictionarySourceData::DictionarySourceData(
|
||||
, key_type(DictionaryInputStreamKeyType::ComplexKey)
|
||||
{
|
||||
const DictionaryStructure & dictionary_structure = dictionary->getStructure();
|
||||
fillKeyColumns(keys, 0, keys.size(), dictionary_structure, key_columns);
|
||||
key_columns = deserializeColumnsWithTypeAndNameFromKeys(dictionary_structure, keys, 0, keys.size());
|
||||
}
|
||||
|
||||
DictionarySourceData::DictionarySourceData(
|
||||
@ -158,32 +158,4 @@ Block DictionarySourceData::fillBlock(
|
||||
return Block(block_columns);
|
||||
}
|
||||
|
||||
void DictionarySourceData::fillKeyColumns(
|
||||
const PaddedPODArray<StringRef> & keys,
|
||||
size_t start,
|
||||
size_t size,
|
||||
const DictionaryStructure & dictionary_structure,
|
||||
ColumnsWithTypeAndName & result)
|
||||
{
|
||||
MutableColumns columns;
|
||||
columns.reserve(dictionary_structure.key->size());
|
||||
|
||||
for (const DictionaryAttribute & attribute : *dictionary_structure.key)
|
||||
columns.emplace_back(attribute.type->createColumn());
|
||||
|
||||
for (size_t index = start; index < size; ++index)
|
||||
{
|
||||
const auto & key = keys[index];
|
||||
const auto *ptr = key.data;
|
||||
for (auto & column : columns)
|
||||
ptr = column->deserializeAndInsertFromArena(ptr);
|
||||
}
|
||||
|
||||
for (size_t i = 0, num_columns = columns.size(); i < num_columns; ++i)
|
||||
{
|
||||
const auto & dictionary_attribute = (*dictionary_structure.key)[i];
|
||||
result.emplace_back(ColumnWithTypeAndName{std::move(columns[i]), dictionary_attribute.type, dictionary_attribute.name});
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -51,13 +51,6 @@ private:
|
||||
const DataTypes & types,
|
||||
ColumnsWithTypeAndName && view) const;
|
||||
|
||||
static void fillKeyColumns(
|
||||
const PaddedPODArray<StringRef> & keys,
|
||||
size_t start,
|
||||
size_t size,
|
||||
const DictionaryStructure & dictionary_structure,
|
||||
ColumnsWithTypeAndName & result);
|
||||
|
||||
const size_t num_rows;
|
||||
std::shared_ptr<const IDictionary> dictionary;
|
||||
std::unordered_set<std::string> column_names;
|
||||
|
@ -134,42 +134,11 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration
|
||||
if (id->name.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "'id' cannot be empty");
|
||||
|
||||
const char * range_default_type = "Date";
|
||||
if (config.has(structure_prefix + ".range_min"))
|
||||
range_min.emplace(makeDictionaryTypedSpecialAttribute(config, structure_prefix + ".range_min", range_default_type));
|
||||
|
||||
if (config.has(structure_prefix + ".range_max"))
|
||||
range_max.emplace(makeDictionaryTypedSpecialAttribute(config, structure_prefix + ".range_max", range_default_type));
|
||||
|
||||
if (range_min.has_value() != range_max.has_value())
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Dictionary structure should have both 'range_min' and 'range_max' either specified or not.");
|
||||
}
|
||||
|
||||
if (range_min && range_max && !range_min->type->equals(*range_max->type))
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Dictionary structure 'range_min' and 'range_max' should have same type, "
|
||||
"'range_min' type: {},"
|
||||
"'range_max' type: {}",
|
||||
range_min->type->getName(),
|
||||
range_max->type->getName());
|
||||
}
|
||||
|
||||
if (range_min)
|
||||
{
|
||||
if (!range_min->type->isValueRepresentedByInteger())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Dictionary structure type of 'range_min' and 'range_max' should be an integer, Date, DateTime, or Enum."
|
||||
" Actual 'range_min' and 'range_max' type is {}",
|
||||
range_min->type->getName());
|
||||
}
|
||||
|
||||
if (!id->expression.empty() || (range_min && !range_min->expression.empty()) || (range_max && !range_max->expression.empty()))
|
||||
if (!id->expression.empty())
|
||||
has_expressions = true;
|
||||
}
|
||||
|
||||
parseRangeConfiguration(config, structure_prefix);
|
||||
attributes = getAttributes(config, structure_prefix, /*complex_key_attributes =*/ false);
|
||||
|
||||
for (size_t i = 0; i < attributes.size(); ++i)
|
||||
@ -439,4 +408,42 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
|
||||
return res_attributes;
|
||||
}
|
||||
|
||||
void DictionaryStructure::parseRangeConfiguration(const Poco::Util::AbstractConfiguration & config, const std::string & structure_prefix)
|
||||
{
|
||||
const char * range_default_type = "Date";
|
||||
if (config.has(structure_prefix + ".range_min"))
|
||||
range_min.emplace(makeDictionaryTypedSpecialAttribute(config, structure_prefix + ".range_min", range_default_type));
|
||||
|
||||
if (config.has(structure_prefix + ".range_max"))
|
||||
range_max.emplace(makeDictionaryTypedSpecialAttribute(config, structure_prefix + ".range_max", range_default_type));
|
||||
|
||||
if (range_min.has_value() != range_max.has_value())
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Dictionary structure should have both 'range_min' and 'range_max' either specified or not.");
|
||||
}
|
||||
|
||||
if (range_min && range_max && !range_min->type->equals(*range_max->type))
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Dictionary structure 'range_min' and 'range_max' should have same type, "
|
||||
"'range_min' type: {},"
|
||||
"'range_max' type: {}",
|
||||
range_min->type->getName(),
|
||||
range_max->type->getName());
|
||||
}
|
||||
|
||||
if (range_min)
|
||||
{
|
||||
if (!range_min->type->isValueRepresentedByInteger())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Dictionary structure type of 'range_min' and 'range_max' should be an integer, Date, DateTime, or Enum."
|
||||
" Actual 'range_min' and 'range_max' type is {}",
|
||||
range_min->type->getName());
|
||||
}
|
||||
|
||||
if ((range_min && !range_min->expression.empty()) || (range_max && !range_max->expression.empty()))
|
||||
has_expressions = true;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -67,8 +67,9 @@ using DictionaryLifetime = ExternalLoadableLifetime;
|
||||
* - null_value, used as a default value for non-existent entries in the dictionary,
|
||||
* decimal representation for numeric attributes;
|
||||
* - hierarchical, whether this attribute defines a hierarchy;
|
||||
* - injective, whether the mapping to parent is injective (can be used for optimization of GROUP BY?)
|
||||
* - is_object_id, used in mongo dictionary, converts string key to objectid
|
||||
* - injective, whether the mapping to parent is injective (can be used for optimization of GROUP BY?);
|
||||
* - is_object_id, used in mongo dictionary, converts string key to objectid;
|
||||
* - is_nullable, is attribute nullable;
|
||||
*/
|
||||
struct DictionaryAttribute final
|
||||
{
|
||||
@ -153,6 +154,10 @@ private:
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const std::string & config_prefix,
|
||||
bool complex_key_attributes);
|
||||
|
||||
/// parse range_min and range_max
|
||||
void parseRangeConfiguration(const Poco::Util::AbstractConfiguration & config, const std::string & structure_prefix);
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -133,6 +133,29 @@ void ExternalQueryBuilder::composeLoadAllQuery(WriteBuffer & out) const
|
||||
|
||||
writeQuoted(key.name, out);
|
||||
}
|
||||
|
||||
if (dict_struct.range_min && dict_struct.range_max)
|
||||
{
|
||||
writeString(", ", out);
|
||||
|
||||
if (!dict_struct.range_min->expression.empty())
|
||||
{
|
||||
writeParenthesisedString(dict_struct.range_min->expression, out);
|
||||
writeString(" AS ", out);
|
||||
}
|
||||
|
||||
writeQuoted(dict_struct.range_min->name, out);
|
||||
|
||||
writeString(", ", out);
|
||||
|
||||
if (!dict_struct.range_max->expression.empty())
|
||||
{
|
||||
writeParenthesisedString(dict_struct.range_max->expression, out);
|
||||
writeString(" AS ", out);
|
||||
}
|
||||
|
||||
writeQuoted(dict_struct.range_max->name, out);
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto & attr : dict_struct.attributes)
|
||||
|
@ -64,7 +64,7 @@ public:
|
||||
|
||||
bool isInjective(const std::string & attribute_name) const override
|
||||
{
|
||||
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
|
||||
return dict_struct.getAttribute(attribute_name).injective;
|
||||
}
|
||||
|
||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
|
||||
|
@ -14,170 +14,213 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
template <typename RangeType>
|
||||
enum class RangeDictionaryType
|
||||
{
|
||||
simple,
|
||||
complex
|
||||
};
|
||||
|
||||
template <RangeDictionaryType range_dictionary_type, typename RangeType>
|
||||
class RangeDictionarySourceData
|
||||
{
|
||||
public:
|
||||
using Key = UInt64;
|
||||
|
||||
using KeyType = std::conditional_t<range_dictionary_type == RangeDictionaryType::simple, UInt64, StringRef>;
|
||||
|
||||
RangeDictionarySourceData(
|
||||
std::shared_ptr<const IDictionary> dictionary,
|
||||
const Names & column_names,
|
||||
PaddedPODArray<Key> && ids_to_fill,
|
||||
PaddedPODArray<KeyType> && keys,
|
||||
PaddedPODArray<RangeType> && start_dates,
|
||||
PaddedPODArray<RangeType> && end_dates);
|
||||
|
||||
Block getBlock(size_t start, size_t length) const;
|
||||
size_t getNumRows() const { return ids.size(); }
|
||||
size_t getNumRows() const { return keys.size(); }
|
||||
|
||||
private:
|
||||
|
||||
Block fillBlock(
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const PaddedPODArray<KeyType> & keys_to_fill,
|
||||
const PaddedPODArray<RangeType> & block_start_dates,
|
||||
const PaddedPODArray<RangeType> & block_end_dates) const;
|
||||
const PaddedPODArray<RangeType> & block_end_dates,
|
||||
size_t start,
|
||||
size_t end) const;
|
||||
|
||||
PaddedPODArray<Int64> makeDateKey(
|
||||
PaddedPODArray<Int64> makeDateKeys(
|
||||
const PaddedPODArray<RangeType> & block_start_dates,
|
||||
const PaddedPODArray<RangeType> & block_end_dates) const;
|
||||
|
||||
std::shared_ptr<const IDictionary> dictionary;
|
||||
NameSet column_names;
|
||||
PaddedPODArray<Key> ids;
|
||||
PaddedPODArray<KeyType> keys;
|
||||
PaddedPODArray<RangeType> start_dates;
|
||||
PaddedPODArray<RangeType> end_dates;
|
||||
};
|
||||
|
||||
|
||||
template <typename RangeType>
|
||||
RangeDictionarySourceData<RangeType>::RangeDictionarySourceData(
|
||||
template <RangeDictionaryType range_dictionary_type, typename RangeType>
|
||||
RangeDictionarySourceData<range_dictionary_type, RangeType>::RangeDictionarySourceData(
|
||||
std::shared_ptr<const IDictionary> dictionary_,
|
||||
const Names & column_names_,
|
||||
PaddedPODArray<Key> && ids_,
|
||||
PaddedPODArray<KeyType> && keys,
|
||||
PaddedPODArray<RangeType> && block_start_dates,
|
||||
PaddedPODArray<RangeType> && block_end_dates)
|
||||
: dictionary(dictionary_)
|
||||
, column_names(column_names_.begin(), column_names_.end())
|
||||
, ids(std::move(ids_))
|
||||
, keys(std::move(keys))
|
||||
, start_dates(std::move(block_start_dates))
|
||||
, end_dates(std::move(block_end_dates))
|
||||
{
|
||||
}
|
||||
|
||||
template <typename RangeType>
|
||||
Block RangeDictionarySourceData<RangeType>::getBlock(size_t start, size_t length) const
|
||||
template <RangeDictionaryType range_dictionary_type, typename RangeType>
|
||||
Block RangeDictionarySourceData<range_dictionary_type, RangeType>::getBlock(size_t start, size_t length) const
|
||||
{
|
||||
PaddedPODArray<Key> block_ids;
|
||||
PaddedPODArray<KeyType> block_keys;
|
||||
PaddedPODArray<RangeType> block_start_dates;
|
||||
PaddedPODArray<RangeType> block_end_dates;
|
||||
block_ids.reserve(length);
|
||||
block_keys.reserve(length);
|
||||
block_start_dates.reserve(length);
|
||||
block_end_dates.reserve(length);
|
||||
|
||||
for (auto idx : collections::range(start, start + length))
|
||||
for (size_t index = start; index < start + length; ++index )
|
||||
{
|
||||
block_ids.push_back(ids[idx]);
|
||||
block_start_dates.push_back(start_dates[idx]);
|
||||
block_end_dates.push_back(end_dates[idx]);
|
||||
block_keys.push_back(block_keys[index]);
|
||||
block_start_dates.push_back(start_dates[index]);
|
||||
block_end_dates.push_back(end_dates[index]);
|
||||
}
|
||||
|
||||
return fillBlock(block_ids, block_start_dates, block_end_dates);
|
||||
return fillBlock(block_keys, block_start_dates, block_end_dates, start, start + length);
|
||||
}
|
||||
|
||||
template <typename RangeType>
|
||||
PaddedPODArray<Int64> RangeDictionarySourceData<RangeType>::makeDateKey(
|
||||
const PaddedPODArray<RangeType> & block_start_dates, const PaddedPODArray<RangeType> & block_end_dates) const
|
||||
{
|
||||
PaddedPODArray<Int64> key(block_start_dates.size());
|
||||
for (size_t i = 0; i < key.size(); ++i)
|
||||
{
|
||||
if (RangeHashedDictionary::Range::isCorrectDate(block_start_dates[i]))
|
||||
key[i] = block_start_dates[i];
|
||||
else
|
||||
key[i] = block_end_dates[i];
|
||||
}
|
||||
|
||||
return key;
|
||||
}
|
||||
|
||||
|
||||
template <typename RangeType>
|
||||
Block RangeDictionarySourceData<RangeType>::fillBlock(
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
template <RangeDictionaryType range_dictionary_type, typename RangeType>
|
||||
PaddedPODArray<Int64> RangeDictionarySourceData<range_dictionary_type, RangeType>::makeDateKeys(
|
||||
const PaddedPODArray<RangeType> & block_start_dates,
|
||||
const PaddedPODArray<RangeType> & block_end_dates) const
|
||||
{
|
||||
PaddedPODArray<Int64> keys(block_start_dates.size());
|
||||
|
||||
for (size_t i = 0; i < keys.size(); ++i)
|
||||
{
|
||||
if (Range::isCorrectDate(block_start_dates[i]))
|
||||
keys[i] = block_start_dates[i];
|
||||
else
|
||||
keys[i] = block_end_dates[i];
|
||||
}
|
||||
|
||||
return keys;
|
||||
}
|
||||
|
||||
|
||||
template <RangeDictionaryType range_dictionary_type, typename RangeType>
|
||||
Block RangeDictionarySourceData<range_dictionary_type, RangeType>::fillBlock(
|
||||
const PaddedPODArray<KeyType> & keys_to_fill,
|
||||
const PaddedPODArray<RangeType> & block_start_dates,
|
||||
const PaddedPODArray<RangeType> & block_end_dates,
|
||||
size_t start,
|
||||
size_t end) const
|
||||
{
|
||||
ColumnsWithTypeAndName columns;
|
||||
const DictionaryStructure & structure = dictionary->getStructure();
|
||||
const DictionaryStructure & dictionary_structure = dictionary->getStructure();
|
||||
|
||||
auto ids_column = getColumnFromPODArray(ids_to_fill);
|
||||
const std::string & id_column_name = structure.id->name;
|
||||
if (column_names.find(id_column_name) != column_names.end())
|
||||
columns.emplace_back(ids_column, std::make_shared<DataTypeUInt64>(), id_column_name);
|
||||
DataTypes keys_types;
|
||||
Columns keys_columns;
|
||||
Strings keys_names = dictionary_structure.getKeysNames();
|
||||
|
||||
auto date_key = makeDateKey(block_start_dates, block_end_dates);
|
||||
if constexpr (range_dictionary_type == RangeDictionaryType::simple)
|
||||
{
|
||||
keys_columns = {getColumnFromPODArray(keys_to_fill)};
|
||||
keys_types = {std::make_shared<DataTypeUInt64>()};
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const auto & attribute : *dictionary_structure.key)
|
||||
keys_types.emplace_back(attribute.type);
|
||||
|
||||
auto deserialized_columns = deserializeColumnsFromKeys(dictionary_structure, keys, start, end);
|
||||
for (auto & deserialized_column : deserialized_columns)
|
||||
keys_columns.emplace_back(std::move(deserialized_column));
|
||||
}
|
||||
|
||||
size_t keys_size = keys_names.size();
|
||||
|
||||
std::cerr << "Keys size " << keys_size << " key columns size " << keys_columns.size();
|
||||
std::cerr << " keys types size " << keys_types.size() << std::endl;
|
||||
|
||||
assert(keys_columns.size() == keys_size);
|
||||
assert(keys_types.size() == keys_size);
|
||||
|
||||
for (size_t i = 0; i < keys_size; ++i)
|
||||
{
|
||||
auto & key_name = keys_names[i];
|
||||
|
||||
if (column_names.find(key_name) != column_names.end())
|
||||
columns.emplace_back(keys_columns[i], keys_types[i], key_name);
|
||||
}
|
||||
|
||||
auto date_key = makeDateKeys(block_start_dates, block_end_dates);
|
||||
auto date_column = getColumnFromPODArray(date_key);
|
||||
|
||||
const std::string & range_min_column_name = structure.range_min->name;
|
||||
keys_columns.emplace_back(std::move(date_column));
|
||||
keys_types.emplace_back(std::make_shared<DataTypeInt64>());
|
||||
|
||||
const auto & range_min_column_name = dictionary_structure.range_min->name;
|
||||
if (column_names.find(range_min_column_name) != column_names.end())
|
||||
{
|
||||
auto range_min_column = getColumnFromPODArray(block_start_dates);
|
||||
columns.emplace_back(range_min_column, structure.range_max->type, range_min_column_name);
|
||||
columns.emplace_back(range_min_column, dictionary_structure.range_max->type, range_min_column_name);
|
||||
}
|
||||
|
||||
const std::string & range_max_column_name = structure.range_max->name;
|
||||
const auto & range_max_column_name = dictionary_structure.range_max->name;
|
||||
if (column_names.find(range_max_column_name) != column_names.end())
|
||||
{
|
||||
auto range_max_column = getColumnFromPODArray(block_end_dates);
|
||||
columns.emplace_back(range_max_column, structure.range_max->type, range_max_column_name);
|
||||
columns.emplace_back(range_max_column, dictionary_structure.range_max->type, range_max_column_name);
|
||||
}
|
||||
|
||||
for (const auto idx : collections::range(0, structure.attributes.size()))
|
||||
size_t attributes_size = dictionary_structure.attributes.size();
|
||||
for (size_t attribute_index = 0; attribute_index < attributes_size; ++attribute_index)
|
||||
{
|
||||
const DictionaryAttribute & attribute = structure.attributes[idx];
|
||||
if (column_names.find(attribute.name) != column_names.end())
|
||||
{
|
||||
ColumnPtr column = dictionary->getColumn(
|
||||
attribute.name,
|
||||
attribute.type,
|
||||
{ids_column, date_column},
|
||||
{std::make_shared<DataTypeUInt64>(), std::make_shared<DataTypeInt64>()},
|
||||
nullptr);
|
||||
columns.emplace_back(column, attribute.type, attribute.name);
|
||||
}
|
||||
const auto & attribute = dictionary_structure.attributes[attribute_index];
|
||||
if (column_names.find(attribute.name) == column_names.end())
|
||||
continue;
|
||||
|
||||
auto column = dictionary->getColumn(
|
||||
attribute.name,
|
||||
attribute.type,
|
||||
keys_columns,
|
||||
keys_types,
|
||||
nullptr /* default_values_column*/);
|
||||
|
||||
columns.emplace_back(std::move(column), attribute.type, attribute.name);
|
||||
}
|
||||
return Block(columns);
|
||||
}
|
||||
|
||||
/*
|
||||
* BlockInputStream implementation for external dictionaries
|
||||
* read() returns single block consisting of the in-memory contents of the dictionaries
|
||||
*/
|
||||
template <typename RangeType>
|
||||
template <RangeDictionaryType range_dictionary_type, typename RangeType>
|
||||
class RangeDictionarySource : public DictionarySourceBase
|
||||
{
|
||||
public:
|
||||
using Key = UInt64;
|
||||
|
||||
RangeDictionarySource(RangeDictionarySourceData<RangeType> data_, size_t max_block_size);
|
||||
RangeDictionarySource(RangeDictionarySourceData<range_dictionary_type, RangeType> data_, size_t max_block_size);
|
||||
|
||||
String getName() const override { return "RangeDictionarySource"; }
|
||||
|
||||
protected:
|
||||
Block getBlock(size_t start, size_t length) const override;
|
||||
|
||||
RangeDictionarySourceData<RangeType> data;
|
||||
RangeDictionarySourceData<range_dictionary_type, RangeType> data;
|
||||
};
|
||||
|
||||
template <typename RangeType>
|
||||
RangeDictionarySource<RangeType>::RangeDictionarySource(RangeDictionarySourceData<RangeType> data_, size_t max_block_size)
|
||||
template <RangeDictionaryType range_dictionary_type, typename RangeType>
|
||||
RangeDictionarySource<range_dictionary_type, RangeType>::RangeDictionarySource(RangeDictionarySourceData<range_dictionary_type, RangeType> data_, size_t max_block_size)
|
||||
: DictionarySourceBase(data_.getBlock(0, 0), data_.getNumRows(), max_block_size)
|
||||
, data(std::move(data_))
|
||||
{
|
||||
}
|
||||
|
||||
template <typename RangeType>
|
||||
Block RangeDictionarySource<RangeType>::getBlock(size_t start, size_t length) const
|
||||
template <RangeDictionaryType range_dictionary_type, typename RangeType>
|
||||
Block RangeDictionarySource<range_dictionary_type, RangeType>::getBlock(size_t start, size_t length) const
|
||||
{
|
||||
return data.getBlock(start, length);
|
||||
}
|
||||
|
@ -10,7 +10,8 @@
|
||||
|
||||
namespace
|
||||
{
|
||||
using RangeStorageType = DB::RangeHashedDictionary::RangeStorageType;
|
||||
|
||||
using RangeStorageType = DB::RangeStorageType;
|
||||
|
||||
// Null values mean that specified boundary, either min or max is not set on range.
|
||||
// To simplify comparison, null value of min bound should be bigger than any other value,
|
||||
@ -25,7 +26,7 @@ RangeStorageType getColumnIntValueOrDefault(const DB::IColumn & column, size_t i
|
||||
return default_value;
|
||||
|
||||
const RangeStorageType result = static_cast<RangeStorageType>(column.getInt(index));
|
||||
if (isDate && !DB::RangeHashedDictionary::Range::isCorrectDate(result))
|
||||
if (isDate && !DB::Range::isCorrectDate(result))
|
||||
return default_value;
|
||||
|
||||
return result;
|
||||
@ -54,23 +55,23 @@ namespace ErrorCodes
|
||||
extern const int UNSUPPORTED_METHOD;
|
||||
}
|
||||
|
||||
bool RangeHashedDictionary::Range::isCorrectDate(const RangeStorageType & date)
|
||||
bool Range::isCorrectDate(const RangeStorageType & date)
|
||||
{
|
||||
return 0 < date && date <= DATE_LUT_MAX_DAY_NUM;
|
||||
}
|
||||
|
||||
bool RangeHashedDictionary::Range::contains(const RangeStorageType & value) const
|
||||
bool Range::contains(const RangeStorageType & value) const
|
||||
{
|
||||
return left <= value && value <= right;
|
||||
}
|
||||
|
||||
static bool operator<(const RangeHashedDictionary::Range & left, const RangeHashedDictionary::Range & right)
|
||||
static bool operator<(const Range & left, const Range & right)
|
||||
{
|
||||
return std::tie(left.left, left.right) < std::tie(right.left, right.right);
|
||||
}
|
||||
|
||||
|
||||
RangeHashedDictionary::RangeHashedDictionary(
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
RangeHashedDictionary<dictionary_key_type>::RangeHashedDictionary(
|
||||
const StorageID & dict_id_,
|
||||
const DictionaryStructure & dict_struct_,
|
||||
DictionarySourcePtr source_ptr_,
|
||||
@ -87,7 +88,8 @@ RangeHashedDictionary::RangeHashedDictionary(
|
||||
calculateBytesAllocated();
|
||||
}
|
||||
|
||||
ColumnPtr RangeHashedDictionary::getColumn(
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
ColumnPtr RangeHashedDictionary<dictionary_key_type>::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
@ -96,20 +98,18 @@ ColumnPtr RangeHashedDictionary::getColumn(
|
||||
{
|
||||
ColumnPtr result;
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
|
||||
auto keys_size = key_columns.front()->size();
|
||||
const size_t attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second;
|
||||
const auto & attribute = attributes[attribute_index];
|
||||
|
||||
/// Cast second column to storage type
|
||||
Columns modified_key_columns = key_columns;
|
||||
|
||||
auto range_storage_column = key_columns[1];
|
||||
ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types[1], ""};
|
||||
|
||||
auto range_storage_column = key_columns.back();
|
||||
ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types.back(), ""};
|
||||
auto range_column_storage_type = std::make_shared<DataTypeInt64>();
|
||||
modified_key_columns[1] = castColumnAccurate(column_to_cast, range_column_storage_type);
|
||||
modified_key_columns.back() = castColumnAccurate(column_to_cast, range_column_storage_type);
|
||||
|
||||
size_t keys_size = key_columns.front()->size();
|
||||
bool is_attribute_nullable = attribute.is_nullable;
|
||||
|
||||
ColumnUInt8::MutablePtr col_null_map_to;
|
||||
@ -204,24 +204,26 @@ ColumnPtr RangeHashedDictionary::getColumn(
|
||||
return result;
|
||||
}
|
||||
|
||||
ColumnUInt8::Ptr RangeHashedDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
ColumnUInt8::Ptr RangeHashedDictionary<dictionary_key_type>::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
|
||||
{
|
||||
auto range_storage_column = key_columns[1];
|
||||
ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types[1], ""};
|
||||
|
||||
auto range_column_storage_type = std::make_shared<DataTypeInt64>();
|
||||
auto range_storage_column = key_columns.back();
|
||||
ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types[1], ""};
|
||||
auto range_column_updated = castColumnAccurate(column_to_cast, range_column_storage_type);
|
||||
|
||||
PaddedPODArray<UInt64> key_backup_storage;
|
||||
PaddedPODArray<RangeStorageType> range_backup_storage;
|
||||
|
||||
const PaddedPODArray<UInt64> & ids = getColumnVectorData(this, key_columns[0], key_backup_storage);
|
||||
const PaddedPODArray<RangeStorageType> & dates = getColumnVectorData(this, range_column_updated, range_backup_storage);
|
||||
|
||||
auto key_columns_copy = key_columns;
|
||||
key_columns_copy.pop_back();
|
||||
DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
|
||||
DictionaryKeysExtractor<dictionary_key_type> keys_extractor(key_columns_copy, arena_holder.getComplexKeyArena());
|
||||
const size_t keys_size = keys_extractor.getKeysSize();
|
||||
|
||||
const auto & attribute = attributes.front();
|
||||
|
||||
ColumnUInt8::Ptr result;
|
||||
|
||||
auto result = ColumnUInt8::create(keys_size);
|
||||
auto & out = result->getData();
|
||||
size_t keys_found = 0;
|
||||
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
@ -229,58 +231,48 @@ ColumnUInt8::Ptr RangeHashedDictionary::hasKeys(const Columns & key_columns, con
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
result = hasKeysImpl<ValueType>(attribute, ids, dates, keys_found);
|
||||
|
||||
const auto & collection = std::get<CollectionType<ValueType>>(attribute.maps);
|
||||
|
||||
for (size_t key_index = 0; key_index < keys_size; ++key_index)
|
||||
{
|
||||
const auto key = keys_extractor.extractCurrentKey();
|
||||
const auto it = collection.find(key);
|
||||
|
||||
if (it)
|
||||
{
|
||||
const auto date = dates[key_index];
|
||||
const auto & ranges_and_values = it->getMapped();
|
||||
const auto val_it = std::find_if(
|
||||
std::begin(ranges_and_values),
|
||||
std::end(ranges_and_values),
|
||||
[date](const Value<ValueType> & v)
|
||||
{
|
||||
return v.range.contains(date);
|
||||
});
|
||||
|
||||
out[key_index] = val_it != std::end(ranges_and_values);
|
||||
keys_found += out[key_index];
|
||||
}
|
||||
else
|
||||
{
|
||||
out[key_index] = false;
|
||||
}
|
||||
|
||||
keys_extractor.rollbackCurrentKey();
|
||||
}
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
query_count.fetch_add(ids.size(), std::memory_order_relaxed);
|
||||
query_count.fetch_add(keys_size, std::memory_order_relaxed);
|
||||
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename AttributeType>
|
||||
ColumnUInt8::Ptr RangeHashedDictionary::hasKeysImpl(
|
||||
const Attribute & attribute,
|
||||
const PaddedPODArray<UInt64> & ids,
|
||||
const PaddedPODArray<RangeStorageType> & dates,
|
||||
size_t & keys_found) const
|
||||
{
|
||||
auto result = ColumnUInt8::create(ids.size());
|
||||
auto& out = result->getData();
|
||||
|
||||
const auto & attr = *std::get<Ptr<AttributeType>>(attribute.maps);
|
||||
|
||||
keys_found = 0;
|
||||
|
||||
for (const auto row : collections::range(0, ids.size()))
|
||||
{
|
||||
const auto it = attr.find(ids[row]);
|
||||
|
||||
if (it)
|
||||
{
|
||||
const auto date = dates[row];
|
||||
const auto & ranges_and_values = it->getMapped();
|
||||
const auto val_it = std::find_if(
|
||||
std::begin(ranges_and_values),
|
||||
std::end(ranges_and_values),
|
||||
[date](const Value<AttributeType> & v)
|
||||
{
|
||||
return v.range.contains(date);
|
||||
});
|
||||
|
||||
out[row] = val_it != std::end(ranges_and_values);
|
||||
keys_found += out[row];
|
||||
}
|
||||
else
|
||||
out[row] = false;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void RangeHashedDictionary::createAttributes()
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
void RangeHashedDictionary<dictionary_key_type>::createAttributes()
|
||||
{
|
||||
const auto size = dict_struct.attributes.size();
|
||||
attributes.reserve(size);
|
||||
@ -296,7 +288,8 @@ void RangeHashedDictionary::createAttributes()
|
||||
}
|
||||
}
|
||||
|
||||
void RangeHashedDictionary::loadData()
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
void RangeHashedDictionary<dictionary_key_type>::loadData()
|
||||
{
|
||||
QueryPipeline pipeline;
|
||||
pipeline.init(source_ptr->loadAll());
|
||||
@ -305,38 +298,57 @@ void RangeHashedDictionary::loadData()
|
||||
Block block;
|
||||
while (executor.pull(block))
|
||||
{
|
||||
const auto & id_column = *block.safeGetByPosition(0).column;
|
||||
size_t skip_keys_size_offset = dict_struct.getKeysSize();
|
||||
|
||||
Columns key_columns;
|
||||
key_columns.reserve(skip_keys_size_offset);
|
||||
|
||||
/// Split into keys columns and attribute columns
|
||||
for (size_t i = 0; i < skip_keys_size_offset; ++i)
|
||||
key_columns.emplace_back(block.safeGetByPosition(i).column);
|
||||
|
||||
DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
|
||||
DictionaryKeysExtractor<dictionary_key_type> keys_extractor(key_columns, arena_holder.getComplexKeyArena());
|
||||
const size_t keys_size = keys_extractor.getKeysSize();
|
||||
|
||||
element_count += keys_size;
|
||||
|
||||
// Support old behaviour, where invalid date means 'open range'.
|
||||
const bool is_date = isDate(dict_struct.range_min->type);
|
||||
|
||||
const auto & min_range_column = unwrapNullableColumn(*block.safeGetByPosition(1).column);
|
||||
const auto & max_range_column = unwrapNullableColumn(*block.safeGetByPosition(2).column);
|
||||
const auto & min_range_column = unwrapNullableColumn(*block.safeGetByPosition(skip_keys_size_offset).column);
|
||||
const auto & max_range_column = unwrapNullableColumn(*block.safeGetByPosition(skip_keys_size_offset + 1).column);
|
||||
|
||||
element_count += id_column.size();
|
||||
skip_keys_size_offset += 2;
|
||||
|
||||
for (const auto attribute_idx : collections::range(0, attributes.size()))
|
||||
for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index)
|
||||
{
|
||||
const auto & attribute_column = *block.safeGetByPosition(attribute_idx + 3).column;
|
||||
auto & attribute = attributes[attribute_idx];
|
||||
const auto & attribute_column = *block.safeGetByPosition(attribute_index + skip_keys_size_offset).column;
|
||||
auto & attribute = attributes[attribute_index];
|
||||
|
||||
for (const auto row_idx : collections::range(0, id_column.size()))
|
||||
for (size_t key_index = 0; key_index < keys_size; ++key_index)
|
||||
{
|
||||
auto key = keys_extractor.extractCurrentKey();
|
||||
|
||||
RangeStorageType lower_bound;
|
||||
RangeStorageType upper_bound;
|
||||
|
||||
if (is_date)
|
||||
{
|
||||
lower_bound = getColumnIntValueOrDefault(min_range_column, row_idx, is_date, 0);
|
||||
upper_bound = getColumnIntValueOrDefault(max_range_column, row_idx, is_date, DATE_LUT_MAX_DAY_NUM + 1);
|
||||
lower_bound = getColumnIntValueOrDefault(min_range_column, key_index, is_date, 0);
|
||||
upper_bound = getColumnIntValueOrDefault(max_range_column, key_index, is_date, DATE_LUT_MAX_DAY_NUM + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
lower_bound = getColumnIntValueOrDefault(min_range_column, row_idx, is_date, RANGE_MIN_NULL_VALUE);
|
||||
upper_bound = getColumnIntValueOrDefault(max_range_column, row_idx, is_date, RANGE_MAX_NULL_VALUE);
|
||||
lower_bound = getColumnIntValueOrDefault(min_range_column, key_index, is_date, RANGE_MIN_NULL_VALUE);
|
||||
upper_bound = getColumnIntValueOrDefault(max_range_column, key_index, is_date, RANGE_MAX_NULL_VALUE);
|
||||
}
|
||||
|
||||
setAttributeValue(attribute, id_column.getUInt(row_idx), Range{lower_bound, upper_bound}, attribute_column[row_idx]);
|
||||
if constexpr (std::is_same_v<KeyType, StringRef>)
|
||||
key = copyKeyInArena(key);
|
||||
|
||||
setAttributeValue(attribute, key, Range{lower_bound, upper_bound}, attribute_column[key_index]);
|
||||
keys_extractor.rollbackCurrentKey();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -346,22 +358,8 @@ void RangeHashedDictionary::loadData()
|
||||
"{}: dictionary source is empty and 'require_nonempty' property is set.");
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void RangeHashedDictionary::addAttributeSize(const Attribute & attribute)
|
||||
{
|
||||
const auto & map_ref = std::get<Ptr<T>>(attribute.maps);
|
||||
bytes_allocated += sizeof(Collection<T>) + map_ref->getBufferSizeInBytes();
|
||||
bucket_count = map_ref->getBufferSizeInCells();
|
||||
}
|
||||
|
||||
template <>
|
||||
void RangeHashedDictionary::addAttributeSize<String>(const Attribute & attribute)
|
||||
{
|
||||
addAttributeSize<StringRef>(attribute);
|
||||
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
||||
}
|
||||
|
||||
void RangeHashedDictionary::calculateBytesAllocated()
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
void RangeHashedDictionary<dictionary_key_type>::calculateBytesAllocated()
|
||||
{
|
||||
bytes_allocated += attributes.size() * sizeof(attributes.front());
|
||||
|
||||
@ -371,14 +369,25 @@ void RangeHashedDictionary::calculateBytesAllocated()
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
addAttributeSize<AttributeType>(attribute);
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
|
||||
const auto & collection = std::get<CollectionType<ValueType>>(attribute.maps);
|
||||
bytes_allocated += sizeof(CollectionType<ValueType>) + collection.getBufferSizeInBytes();
|
||||
bucket_count = collection.getBufferSizeInCells();
|
||||
|
||||
if constexpr (std::is_same_v<ValueType, StringRef>)
|
||||
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
|
||||
if constexpr (dictionary_key_type == DictionaryKeyType::complex)
|
||||
bytes_allocated += complex_key_arena.size();
|
||||
}
|
||||
|
||||
RangeHashedDictionary::Attribute RangeHashedDictionary::createAttribute(const DictionaryAttribute & dictionary_attribute)
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
typename RangeHashedDictionary<dictionary_key_type>::Attribute RangeHashedDictionary<dictionary_key_type>::createAttribute(const DictionaryAttribute & dictionary_attribute)
|
||||
{
|
||||
Attribute attribute{dictionary_attribute.underlying_type, dictionary_attribute.is_nullable, {}, {}};
|
||||
|
||||
@ -391,7 +400,7 @@ RangeHashedDictionary::Attribute RangeHashedDictionary::createAttribute(const Di
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
attribute.string_arena = std::make_unique<Arena>();
|
||||
|
||||
attribute.maps = std::make_unique<Collection<ValueType>>();
|
||||
attribute.maps = CollectionType<ValueType>();
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(dictionary_attribute.underlying_type, type_call);
|
||||
@ -399,29 +408,35 @@ RangeHashedDictionary::Attribute RangeHashedDictionary::createAttribute(const Di
|
||||
return attribute;
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
template <typename AttributeType, bool is_nullable, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void RangeHashedDictionary::getItemsImpl(
|
||||
void RangeHashedDictionary<dictionary_key_type>::getItemsImpl(
|
||||
const Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
PaddedPODArray<UInt64> key_backup_storage;
|
||||
PaddedPODArray<RangeStorageType> range_backup_storage;
|
||||
|
||||
const PaddedPODArray<UInt64> & ids = getColumnVectorData(this, key_columns[0], key_backup_storage);
|
||||
const PaddedPODArray<RangeStorageType> & dates = getColumnVectorData(this, key_columns[1], range_backup_storage);
|
||||
|
||||
const auto & attr = *std::get<Ptr<AttributeType>>(attribute.maps);
|
||||
const auto & collection = std::get<CollectionType<AttributeType>>(attribute.maps);
|
||||
|
||||
size_t keys_found = 0;
|
||||
|
||||
for (const auto row : collections::range(0, ids.size()))
|
||||
PaddedPODArray<RangeStorageType> range_backup_storage;
|
||||
const auto & dates = getColumnVectorData(this, key_columns.back(), range_backup_storage);
|
||||
|
||||
auto key_columns_copy = key_columns;
|
||||
key_columns_copy.pop_back();
|
||||
DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
|
||||
DictionaryKeysExtractor<dictionary_key_type> keys_extractor(key_columns_copy, arena_holder.getComplexKeyArena());
|
||||
const size_t keys_size = keys_extractor.getKeysSize();
|
||||
|
||||
for (size_t key_index = 0; key_index < keys_size; ++key_index)
|
||||
{
|
||||
const auto it = attr.find(ids[row]);
|
||||
auto key = keys_extractor.extractCurrentKey();
|
||||
const auto it = collection.find(key);
|
||||
|
||||
if (it)
|
||||
{
|
||||
const auto date = dates[row];
|
||||
const auto date = dates[key_index];
|
||||
const auto & ranges_and_values = it->getMapped();
|
||||
const auto val_it = std::find_if(
|
||||
std::begin(ranges_and_values),
|
||||
@ -439,35 +454,38 @@ void RangeHashedDictionary::getItemsImpl(
|
||||
if constexpr (is_nullable)
|
||||
{
|
||||
if (value.has_value())
|
||||
set_value(row, *value, false);
|
||||
set_value(key_index, *value, false);
|
||||
else
|
||||
set_value(row, default_value_extractor[row], true);
|
||||
set_value(key_index, default_value_extractor[key_index], true);
|
||||
}
|
||||
else
|
||||
{
|
||||
set_value(row, *value, false);
|
||||
set_value(key_index, *value, false);
|
||||
}
|
||||
|
||||
keys_extractor.rollbackCurrentKey();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr (is_nullable)
|
||||
set_value(row, default_value_extractor[row], default_value_extractor.isNullAt(row));
|
||||
set_value(key_index, default_value_extractor[key_index], default_value_extractor.isNullAt(key_index));
|
||||
else
|
||||
set_value(row, default_value_extractor[row], false);
|
||||
set_value(key_index, default_value_extractor[key_index], false);
|
||||
|
||||
keys_extractor.rollbackCurrentKey();
|
||||
}
|
||||
|
||||
query_count.fetch_add(ids.size(), std::memory_order_relaxed);
|
||||
query_count.fetch_add(keys_size, std::memory_order_relaxed);
|
||||
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
template <typename T>
|
||||
void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const UInt64 id, const Range & range, const Field & value)
|
||||
void RangeHashedDictionary<dictionary_key_type>::setAttributeValueImpl(Attribute & attribute, KeyType key, const Range & range, const Field & value)
|
||||
{
|
||||
using ValueType = std::conditional_t<std::is_same_v<T, String>, StringRef, T>;
|
||||
auto & map = *std::get<Ptr<ValueType>>(attribute.maps);
|
||||
auto & collection = std::get<CollectionType<ValueType>>(attribute.maps);
|
||||
|
||||
Value<ValueType> value_to_insert;
|
||||
|
||||
@ -490,61 +508,47 @@ void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const U
|
||||
}
|
||||
}
|
||||
|
||||
const auto it = map.find(id);
|
||||
const auto it = collection.find(key);
|
||||
|
||||
if (it)
|
||||
{
|
||||
auto & values = it->getMapped();
|
||||
|
||||
const auto insert_it
|
||||
= std::lower_bound(std::begin(values), std::end(values), range, [](const Value<ValueType> & lhs, const Range & rhs_range)
|
||||
{
|
||||
return lhs.range < rhs_range;
|
||||
});
|
||||
const auto insert_it = std::lower_bound(
|
||||
std::begin(values),
|
||||
std::end(values),
|
||||
range,
|
||||
[](const Value<ValueType> & lhs, const Range & rhs_range)
|
||||
{
|
||||
return lhs.range < rhs_range;
|
||||
});
|
||||
|
||||
values.insert(insert_it, std::move(value_to_insert));
|
||||
}
|
||||
else
|
||||
map.insert({id, Values<ValueType>{std::move(value_to_insert)}});
|
||||
{
|
||||
collection.insert({key, Values<ValueType>{std::move(value_to_insert)}});
|
||||
}
|
||||
}
|
||||
|
||||
void RangeHashedDictionary::setAttributeValue(Attribute & attribute, const UInt64 id, const Range & range, const Field & value)
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
void RangeHashedDictionary<dictionary_key_type>::setAttributeValue(Attribute & attribute, KeyType key, const Range & range, const Field & value)
|
||||
{
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
setAttributeValueImpl<AttributeType>(attribute, id, range, value);
|
||||
setAttributeValueImpl<AttributeType>(attribute, key, range, value);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
|
||||
const RangeHashedDictionary::Attribute & RangeHashedDictionary::getAttribute(const std::string & attribute_name) const
|
||||
{
|
||||
const auto it = attribute_index_by_name.find(attribute_name);
|
||||
if (it == std::end(attribute_index_by_name))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "{}: no such attribute '{}'", full_name, attribute_name);
|
||||
|
||||
return attributes[it->second];
|
||||
}
|
||||
|
||||
const RangeHashedDictionary::Attribute &
|
||||
RangeHashedDictionary::getAttributeWithType(const std::string & attribute_name, const AttributeUnderlyingType type) const
|
||||
{
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
if (attribute.type != type)
|
||||
throw Exception(ErrorCodes::TYPE_MISMATCH, "attribute {} has type {}",
|
||||
attribute_name,
|
||||
toString(attribute.type));
|
||||
|
||||
return attribute;
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
template <typename RangeType>
|
||||
void RangeHashedDictionary::getIdsAndDates(
|
||||
PaddedPODArray<UInt64> & ids,
|
||||
void RangeHashedDictionary<dictionary_key_type>::getKeysAndDates(
|
||||
PaddedPODArray<KeyType> & keys,
|
||||
PaddedPODArray<RangeType> & start_dates,
|
||||
PaddedPODArray<RangeType> & end_dates) const
|
||||
{
|
||||
@ -556,32 +560,33 @@ void RangeHashedDictionary::getIdsAndDates(
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
|
||||
getIdsAndDates<ValueType>(attribute, ids, start_dates, end_dates);
|
||||
getKeysAndDates<ValueType>(attribute, keys, start_dates, end_dates);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
template <typename T, typename RangeType>
|
||||
void RangeHashedDictionary::getIdsAndDates(
|
||||
void RangeHashedDictionary<dictionary_key_type>::getKeysAndDates(
|
||||
const Attribute & attribute,
|
||||
PaddedPODArray<UInt64> & ids,
|
||||
PaddedPODArray<KeyType> & keys,
|
||||
PaddedPODArray<RangeType> & start_dates,
|
||||
PaddedPODArray<RangeType> & end_dates) const
|
||||
{
|
||||
const HashMap<UInt64, Values<T>> & attr = *std::get<Ptr<T>>(attribute.maps);
|
||||
const auto & collection = std::get<CollectionType<T>>(attribute.maps);
|
||||
|
||||
ids.reserve(attr.size());
|
||||
start_dates.reserve(attr.size());
|
||||
end_dates.reserve(attr.size());
|
||||
keys.reserve(collection.size());
|
||||
start_dates.reserve(collection.size());
|
||||
end_dates.reserve(collection.size());
|
||||
|
||||
const bool is_date = isDate(dict_struct.range_min->type);
|
||||
|
||||
for (const auto & key : attr)
|
||||
for (const auto & key : collection)
|
||||
{
|
||||
for (const auto & value : key.getMapped())
|
||||
{
|
||||
ids.push_back(key.getKey());
|
||||
keys.push_back(key.getKey());
|
||||
start_dates.push_back(value.range.left);
|
||||
end_dates.push_back(value.range.right);
|
||||
|
||||
@ -592,22 +597,23 @@ void RangeHashedDictionary::getIdsAndDates(
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
template <typename RangeType>
|
||||
Pipe RangeHashedDictionary::readImpl(const Names & column_names, size_t max_block_size) const
|
||||
Pipe RangeHashedDictionary<dictionary_key_type>::readImpl(const Names & column_names, size_t max_block_size) const
|
||||
{
|
||||
PaddedPODArray<UInt64> ids;
|
||||
PaddedPODArray<KeyType> keys;
|
||||
PaddedPODArray<RangeType> start_dates;
|
||||
PaddedPODArray<RangeType> end_dates;
|
||||
getIdsAndDates(ids, start_dates, end_dates);
|
||||
getKeysAndDates(keys, start_dates, end_dates);
|
||||
|
||||
using RangeDictionarySourceType = RangeDictionarySource<RangeType>;
|
||||
static constexpr RangeDictionaryType range_dictionary_type = (dictionary_key_type == DictionaryKeyType::simple) ? RangeDictionaryType::simple : RangeDictionaryType::complex;
|
||||
using RangeDictionarySourceType = RangeDictionarySource<range_dictionary_type, RangeType>;
|
||||
|
||||
auto source = std::make_shared<RangeDictionarySourceType>(
|
||||
RangeDictionarySourceData<RangeType>(
|
||||
RangeDictionarySourceData<range_dictionary_type, RangeType>(
|
||||
shared_from_this(),
|
||||
column_names,
|
||||
std::move(ids),
|
||||
std::move(keys),
|
||||
std::move(start_dates),
|
||||
std::move(end_dates)),
|
||||
max_block_size);
|
||||
@ -615,10 +621,21 @@ Pipe RangeHashedDictionary::readImpl(const Names & column_names, size_t max_bloc
|
||||
return Pipe(source);
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
StringRef RangeHashedDictionary<dictionary_key_type>::copyKeyInArena(StringRef key)
|
||||
{
|
||||
size_t key_size = key.size;
|
||||
char * place_for_key = complex_key_arena.alloc(key_size);
|
||||
memcpy(reinterpret_cast<void *>(place_for_key), reinterpret_cast<const void *>(key.data), key_size);
|
||||
StringRef updated_key{place_for_key, key_size};
|
||||
return updated_key;
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
struct RangeHashedDictionaryCallGetSourceImpl
|
||||
{
|
||||
Pipe pipe;
|
||||
const RangeHashedDictionary * dict;
|
||||
const RangeHashedDictionary<dictionary_key_type> * dict;
|
||||
const Names * column_names;
|
||||
size_t max_block_size;
|
||||
|
||||
@ -627,15 +644,16 @@ struct RangeHashedDictionaryCallGetSourceImpl
|
||||
{
|
||||
const auto & type = dict->dict_struct.range_min->type;
|
||||
if (pipe.empty() && dynamic_cast<const DataTypeNumberBase<RangeType> *>(type.get()))
|
||||
pipe = dict->readImpl<RangeType>(*column_names, max_block_size);
|
||||
pipe = dict->template readImpl<RangeType>(*column_names, max_block_size);
|
||||
}
|
||||
};
|
||||
|
||||
Pipe RangeHashedDictionary::read(const Names & column_names, size_t max_block_size) const
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
Pipe RangeHashedDictionary<dictionary_key_type>::read(const Names & column_names, size_t max_block_size) const
|
||||
{
|
||||
using ListType = TypeList<UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Int128, Float32, Float64>;
|
||||
|
||||
RangeHashedDictionaryCallGetSourceImpl callable;
|
||||
RangeHashedDictionaryCallGetSourceImpl<dictionary_key_type> callable;
|
||||
callable.dict = this;
|
||||
callable.column_names = &column_names;
|
||||
callable.max_block_size = max_block_size;
|
||||
@ -653,7 +671,7 @@ Pipe RangeHashedDictionary::read(const Names & column_names, size_t max_block_si
|
||||
|
||||
void registerDictionaryRangeHashed(DictionaryFactory & factory)
|
||||
{
|
||||
auto create_layout = [=](const std::string & full_name,
|
||||
auto create_layout_simple = [=](const std::string & full_name,
|
||||
const DictionaryStructure & dict_struct,
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const std::string & config_prefix,
|
||||
@ -672,9 +690,32 @@ void registerDictionaryRangeHashed(DictionaryFactory & factory)
|
||||
const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix);
|
||||
const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
|
||||
const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
|
||||
return std::make_unique<RangeHashedDictionary>(dict_id, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty);
|
||||
return std::make_unique<RangeHashedDictionary<DictionaryKeyType::simple>>(dict_id, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty);
|
||||
};
|
||||
factory.registerLayout("range_hashed", create_layout, false);
|
||||
factory.registerLayout("range_hashed", create_layout_simple, false);
|
||||
|
||||
auto create_layout_complex = [=](const std::string & full_name,
|
||||
const DictionaryStructure & dict_struct,
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const std::string & config_prefix,
|
||||
DictionarySourcePtr source_ptr,
|
||||
ContextPtr /* context */,
|
||||
bool /*created_from_ddl*/) -> DictionaryPtr
|
||||
{
|
||||
if (dict_struct.id)
|
||||
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "'id' is not supported for dictionary of layout 'complex_key_range_hashed'");
|
||||
|
||||
if (!dict_struct.range_min || !dict_struct.range_max)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"{}: dictionary of layout 'complex_key_range_hashed' requires .structure.range_min and .structure.range_max",
|
||||
full_name);
|
||||
|
||||
const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix);
|
||||
const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
|
||||
const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
|
||||
return std::make_unique<RangeHashedDictionary<DictionaryKeyType::complex>>(dict_id, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty);
|
||||
};
|
||||
factory.registerLayout("complex_key_range_hashed", create_layout_complex, true);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -16,9 +16,25 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
using RangeStorageType = Int64;
|
||||
|
||||
struct Range
|
||||
{
|
||||
RangeStorageType left;
|
||||
RangeStorageType right;
|
||||
|
||||
static bool isCorrectDate(const RangeStorageType & date);
|
||||
bool contains(const RangeStorageType & value) const;
|
||||
};
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
class RangeHashedDictionary final : public IDictionary
|
||||
{
|
||||
public:
|
||||
using KeyType = std::conditional_t<dictionary_key_type == DictionaryKeyType::simple, UInt64, StringRef>;
|
||||
static_assert(dictionary_key_type != DictionaryKeyType::range, "Range key type is not supported by hashed dictionary");
|
||||
|
||||
RangeHashedDictionary(
|
||||
const StorageID & dict_id_,
|
||||
const DictionaryStructure & dict_struct_,
|
||||
@ -59,7 +75,7 @@ public:
|
||||
|
||||
bool isInjective(const std::string & attribute_name) const override
|
||||
{
|
||||
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
|
||||
return dict_struct.getAttribute(attribute_name).injective;
|
||||
}
|
||||
|
||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::range; }
|
||||
@ -73,19 +89,8 @@ public:
|
||||
|
||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
using RangeStorageType = Int64;
|
||||
|
||||
Pipe read(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
struct Range
|
||||
{
|
||||
RangeStorageType left;
|
||||
RangeStorageType right;
|
||||
|
||||
static bool isCorrectDate(const RangeStorageType & date);
|
||||
bool contains(const RangeStorageType & value) const;
|
||||
};
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
struct Value final
|
||||
@ -96,10 +101,12 @@ private:
|
||||
|
||||
template <typename T>
|
||||
using Values = std::vector<Value<T>>;
|
||||
template <typename T>
|
||||
using Collection = HashMap<UInt64, Values<T>>;
|
||||
template <typename T>
|
||||
using Ptr = std::unique_ptr<Collection<T>>;
|
||||
|
||||
template <typename Value>
|
||||
using CollectionType = std::conditional_t<
|
||||
dictionary_key_type == DictionaryKeyType::simple,
|
||||
HashMap<UInt64, Values<Value>>,
|
||||
HashMapWithSavedHash<StringRef, Values<Value>, DefaultHash<StringRef>>>;
|
||||
|
||||
struct Attribute final
|
||||
{
|
||||
@ -108,27 +115,27 @@ private:
|
||||
bool is_nullable;
|
||||
|
||||
std::variant<
|
||||
Ptr<UInt8>,
|
||||
Ptr<UInt16>,
|
||||
Ptr<UInt32>,
|
||||
Ptr<UInt64>,
|
||||
Ptr<UInt128>,
|
||||
Ptr<UInt256>,
|
||||
Ptr<Int8>,
|
||||
Ptr<Int16>,
|
||||
Ptr<Int32>,
|
||||
Ptr<Int64>,
|
||||
Ptr<Int128>,
|
||||
Ptr<Int256>,
|
||||
Ptr<Decimal32>,
|
||||
Ptr<Decimal64>,
|
||||
Ptr<Decimal128>,
|
||||
Ptr<Decimal256>,
|
||||
Ptr<Float32>,
|
||||
Ptr<Float64>,
|
||||
Ptr<UUID>,
|
||||
Ptr<StringRef>,
|
||||
Ptr<Array>>
|
||||
CollectionType<UInt8>,
|
||||
CollectionType<UInt16>,
|
||||
CollectionType<UInt32>,
|
||||
CollectionType<UInt64>,
|
||||
CollectionType<UInt128>,
|
||||
CollectionType<UInt256>,
|
||||
CollectionType<Int8>,
|
||||
CollectionType<Int16>,
|
||||
CollectionType<Int32>,
|
||||
CollectionType<Int64>,
|
||||
CollectionType<Int128>,
|
||||
CollectionType<Int256>,
|
||||
CollectionType<Decimal32>,
|
||||
CollectionType<Decimal64>,
|
||||
CollectionType<Decimal128>,
|
||||
CollectionType<Decimal256>,
|
||||
CollectionType<Float32>,
|
||||
CollectionType<Float64>,
|
||||
CollectionType<UUID>,
|
||||
CollectionType<StringRef>,
|
||||
CollectionType<Array>>
|
||||
maps;
|
||||
std::unique_ptr<Arena> string_arena;
|
||||
};
|
||||
@ -137,9 +144,6 @@ private:
|
||||
|
||||
void loadData();
|
||||
|
||||
template <typename T>
|
||||
void addAttributeSize(const Attribute & attribute);
|
||||
|
||||
void calculateBytesAllocated();
|
||||
|
||||
static Attribute createAttribute(const DictionaryAttribute & dictionary_attribute);
|
||||
@ -151,35 +155,30 @@ private:
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
template <typename AttributeType>
|
||||
ColumnUInt8::Ptr hasKeysImpl(
|
||||
const Attribute & attribute,
|
||||
const PaddedPODArray<UInt64> & ids,
|
||||
const PaddedPODArray<RangeStorageType> & dates,
|
||||
size_t & keys_found) const;
|
||||
|
||||
template <typename T>
|
||||
static void setAttributeValueImpl(Attribute & attribute, const UInt64 id, const Range & range, const Field & value);
|
||||
static void setAttributeValueImpl(Attribute & attribute, KeyType key, const Range & range, const Field & value);
|
||||
|
||||
static void setAttributeValue(Attribute & attribute, const UInt64 id, const Range & range, const Field & value);
|
||||
|
||||
const Attribute & getAttribute(const std::string & attribute_name) const;
|
||||
|
||||
const Attribute & getAttributeWithType(const std::string & name, const AttributeUnderlyingType type) const;
|
||||
static void setAttributeValue(Attribute & attribute, KeyType key, const Range & range, const Field & value);
|
||||
|
||||
template <typename RangeType>
|
||||
void getIdsAndDates(PaddedPODArray<UInt64> & ids, PaddedPODArray<RangeType> & start_dates, PaddedPODArray<RangeType> & end_dates) const;
|
||||
void getKeysAndDates(
|
||||
PaddedPODArray<KeyType> & keys,
|
||||
PaddedPODArray<RangeType> & start_dates,
|
||||
PaddedPODArray<RangeType> & end_dates) const;
|
||||
|
||||
template <typename T, typename RangeType>
|
||||
void getIdsAndDates(
|
||||
void getKeysAndDates(
|
||||
const Attribute & attribute,
|
||||
PaddedPODArray<UInt64> & ids,
|
||||
PaddedPODArray<KeyType> & keys,
|
||||
PaddedPODArray<RangeType> & start_dates,
|
||||
PaddedPODArray<RangeType> & end_dates) const;
|
||||
|
||||
template <typename RangeType>
|
||||
Pipe readImpl(const Names & column_names, size_t max_block_size) const;
|
||||
|
||||
StringRef copyKeyInArena(StringRef key);
|
||||
|
||||
template <DictionaryKeyType>
|
||||
friend struct RangeHashedDictionaryCallGetSourceImpl;
|
||||
|
||||
const DictionaryStructure dict_struct;
|
||||
@ -189,6 +188,7 @@ private:
|
||||
|
||||
std::map<std::string, size_t> attribute_index_by_name;
|
||||
std::vector<Attribute> attributes;
|
||||
Arena complex_key_arena;
|
||||
|
||||
size_t bytes_allocated = 0;
|
||||
size_t element_count = 0;
|
||||
|
@ -6,6 +6,7 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
using DictionaryConfigurationPtr = Poco::AutoPtr<Poco::Util::AbstractConfiguration>;
|
||||
|
||||
/// Convert dictionary AST to Poco::AbstractConfiguration
|
||||
@ -13,4 +14,5 @@ using DictionaryConfigurationPtr = Poco::AutoPtr<Poco::Util::AbstractConfigurati
|
||||
/// Can throw exception if query is ill-formed
|
||||
DictionaryConfigurationPtr
|
||||
getDictionaryConfigurationFromAST(const ASTCreateQuery & query, ContextPtr context, const std::string & database_ = "");
|
||||
|
||||
}
|
||||
|
@ -250,12 +250,23 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!WhichDataType(key_column_type).isUInt64())
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Second argument of function {} must be UInt64 when dictionary is range. Actual type {}.",
|
||||
getName(),
|
||||
key_column_with_type.type->getName());
|
||||
/// Functions in external dictionaries_loader only support full-value (not constant) columns with keys.
|
||||
ColumnPtr key_column = key_column_with_type.column->convertToFullColumnIfConst();
|
||||
DataTypePtr key_column_type = key_column_with_type.type;
|
||||
|
||||
Columns key_columns;
|
||||
DataTypes key_types;
|
||||
|
||||
if (isTuple(key_column_type))
|
||||
{
|
||||
key_columns = assert_cast<const ColumnTuple &>(*key_column).getColumnsCopy();
|
||||
key_types = assert_cast<const DataTypeTuple &>(*key_column_type).getElements();
|
||||
}
|
||||
else
|
||||
{
|
||||
key_columns = {key_column, range_col};
|
||||
key_types = {std::make_shared<DataTypeUInt64>(), range_col_type};
|
||||
}
|
||||
|
||||
return dictionary->hasKeys({key_column, range_col}, {std::make_shared<DataTypeUInt64>(), range_col_type});
|
||||
}
|
||||
@ -487,18 +498,29 @@ public:
|
||||
}
|
||||
else if (dictionary_key_type == DictionaryKeyType::range)
|
||||
{
|
||||
if (!WhichDataType(key_col_with_type.type).isUInt64())
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Third argument of function {} must be UInt64 when dictionary is range. Actual type {}.",
|
||||
getName(),
|
||||
key_col_with_type.type->getName());
|
||||
/// Functions in external dictionaries_loader only support full-value (not constant) columns with keys.
|
||||
ColumnPtr key_column = key_col_with_type.column->convertToFullColumnIfConst();
|
||||
DataTypePtr key_column_type = key_col_with_type.type;
|
||||
|
||||
Columns key_columns;
|
||||
DataTypes key_types;
|
||||
|
||||
if (isTuple(key_column_type))
|
||||
{
|
||||
key_columns = assert_cast<const ColumnTuple &>(*key_column).getColumnsCopy();
|
||||
key_types = assert_cast<const DataTypeTuple &>(*key_column_type).getElements();
|
||||
}
|
||||
else
|
||||
{
|
||||
key_columns = {key_column, range_col};
|
||||
key_types = {std::make_shared<DataTypeUInt64>(), range_col_type};
|
||||
}
|
||||
|
||||
result = executeDictionaryRequest(
|
||||
dictionary,
|
||||
attribute_names,
|
||||
{key_column, range_col},
|
||||
{std::make_shared<DataTypeUInt64>(), range_col_type},
|
||||
key_columns,
|
||||
key_types,
|
||||
result_type,
|
||||
default_cols);
|
||||
}
|
||||
|
@ -0,0 +1,58 @@
|
||||
Dictionary not nullable
|
||||
dictGet
|
||||
0.2
|
||||
0.2
|
||||
0.2
|
||||
0.2
|
||||
0.4
|
||||
dictHas
|
||||
1
|
||||
1
|
||||
1
|
||||
0
|
||||
select columns from dictionary
|
||||
allColumns
|
||||
2019-05-05 2019-05-20 1 1 0.33
|
||||
2019-05-21 2019-05-30 1 1 0.42
|
||||
2019-05-21 2019-05-30 2 2 0.46
|
||||
noColumns
|
||||
1
|
||||
1
|
||||
1
|
||||
onlySpecificColumns
|
||||
1 2019-05-05 0.33
|
||||
1 2019-05-21 0.42
|
||||
2 2019-05-21 0.46
|
||||
onlySpecificColumn
|
||||
0.33
|
||||
0.42
|
||||
0.46
|
||||
Dictionary nullable
|
||||
dictGet
|
||||
0.2
|
||||
0.2
|
||||
0.2
|
||||
0.2
|
||||
0.4
|
||||
dictHas
|
||||
1
|
||||
1
|
||||
1
|
||||
0
|
||||
select columns from dictionary
|
||||
allColumns
|
||||
2019-05-05 2019-05-20 1 1 0.33
|
||||
2019-05-21 2019-05-30 1 1 0.42
|
||||
2019-05-21 2019-05-30 2 2 \N
|
||||
noColumns
|
||||
1
|
||||
1
|
||||
1
|
||||
onlySpecificColumns
|
||||
1 2019-05-05 0.33
|
||||
1 2019-05-21 0.42
|
||||
2 2019-05-21 \N
|
||||
onlySpecificColumn
|
||||
0.33
|
||||
0.42
|
||||
\N
|
@ -0,0 +1,109 @@
|
||||
DROP TABLE IF EXISTS date_table;
|
||||
CREATE TABLE date_table
|
||||
(
|
||||
CountryID UInt64,
|
||||
CountryKey String,
|
||||
StartDate Date,
|
||||
EndDate Date,
|
||||
Tax Float64
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
ORDER BY CountryID;
|
||||
|
||||
INSERT INTO date_table VALUES(1, '1', toDate('2019-05-05'), toDate('2019-05-20'), 0.33);
|
||||
INSERT INTO date_table VALUES(1, '1', toDate('2019-05-21'), toDate('2019-05-30'), 0.42);
|
||||
INSERT INTO date_table VALUES(2, '2', toDate('2019-05-21'), toDate('2019-05-30'), 0.46);
|
||||
|
||||
DROP DICTIONARY IF EXISTS range_dictionary;
|
||||
CREATE DICTIONARY range_dictionary
|
||||
(
|
||||
CountryID UInt64,
|
||||
CountryKey String,
|
||||
StartDate Date,
|
||||
EndDate Date,
|
||||
Tax Float64 DEFAULT 0.2
|
||||
)
|
||||
PRIMARY KEY CountryID, CountryKey
|
||||
SOURCE(CLICKHOUSE(TABLE 'date_table'))
|
||||
LIFETIME(MIN 1 MAX 1000)
|
||||
LAYOUT(COMPLEX_KEY_RANGE_HASHED())
|
||||
RANGE(MIN StartDate MAX EndDate);
|
||||
|
||||
SELECT 'Dictionary not nullable';
|
||||
SELECT 'dictGet';
|
||||
SELECT dictGet('range_dictionary', 'Tax', (toUInt64(1), '1'), toDate('2019-05-15'));
|
||||
SELECT dictGet('range_dictionary', 'Tax', (toUInt64(1), '1'), toDate('2019-05-29'));
|
||||
SELECT dictGet('range_dictionary', 'Tax', (toUInt64(2), '2'), toDate('2019-05-29'));
|
||||
SELECT dictGet('range_dictionary', 'Tax', (toUInt64(2), '2'), toDate('2019-05-31'));
|
||||
SELECT dictGetOrDefault('range_dictionary', 'Tax', (toUInt64(2), '2'), toDate('2019-05-31'), 0.4);
|
||||
SELECT 'dictHas';
|
||||
SELECT dictHas('range_dictionary', (toUInt64(1), '1'), toDate('2019-05-15'));
|
||||
SELECT dictHas('range_dictionary', (toUInt64(1), '1'), toDate('2019-05-29'));
|
||||
SELECT dictHas('range_dictionary', (toUInt64(2), '2'), toDate('2019-05-29'));
|
||||
SELECT dictHas('range_dictionary', (toUInt64(2), '2'), toDate('2019-05-31'));
|
||||
SELECT 'select columns from dictionary';
|
||||
SELECT 'allColumns';
|
||||
SELECT * FROM range_dictionary;
|
||||
SELECT 'noColumns';
|
||||
SELECT 1 FROM range_dictionary;
|
||||
SELECT 'onlySpecificColumns';
|
||||
SELECT CountryID, StartDate, Tax FROM range_dictionary;
|
||||
SELECT 'onlySpecificColumn';
|
||||
SELECT Tax FROM range_dictionary;
|
||||
|
||||
DROP TABLE date_table;
|
||||
DROP DICTIONARY range_dictionary;
|
||||
|
||||
CREATE TABLE date_table
|
||||
(
|
||||
CountryID UInt64,
|
||||
CountryKey String,
|
||||
StartDate Date,
|
||||
EndDate Date,
|
||||
Tax Nullable(Float64)
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
ORDER BY CountryID;
|
||||
|
||||
INSERT INTO date_table VALUES(1, '1', toDate('2019-05-05'), toDate('2019-05-20'), 0.33);
|
||||
INSERT INTO date_table VALUES(1, '1', toDate('2019-05-21'), toDate('2019-05-30'), 0.42);
|
||||
INSERT INTO date_table VALUES(2, '2', toDate('2019-05-21'), toDate('2019-05-30'), NULL);
|
||||
|
||||
CREATE DICTIONARY range_dictionary_nullable
|
||||
(
|
||||
CountryID UInt64,
|
||||
CountryKey String,
|
||||
StartDate Date,
|
||||
EndDate Date,
|
||||
Tax Nullable(Float64) DEFAULT 0.2
|
||||
)
|
||||
PRIMARY KEY CountryID, CountryKey
|
||||
SOURCE(CLICKHOUSE(TABLE 'date_table'))
|
||||
LIFETIME(MIN 1 MAX 1000)
|
||||
LAYOUT(COMPLEX_KEY_RANGE_HASHED())
|
||||
RANGE(MIN StartDate MAX EndDate);
|
||||
|
||||
SELECT 'Dictionary nullable';
|
||||
SELECT 'dictGet';
|
||||
SELECT dictGet('range_dictionary_nullable', 'Tax', (toUInt64(1), '1'), toDate('2019-05-15'));
|
||||
SELECT dictGet('range_dictionary_nullable', 'Tax', (toUInt64(1), '1'), toDate('2019-05-29'));
|
||||
SELECT dictGet('range_dictionary_nullable', 'Tax', (toUInt64(2), '2'), toDate('2019-05-29'));
|
||||
SELECT dictGet('range_dictionary_nullable', 'Tax', (toUInt64(2), '2'), toDate('2019-05-31'));
|
||||
SELECT dictGetOrDefault('range_dictionary_nullable', 'Tax', (toUInt64(2), '2'), toDate('2019-05-31'), 0.4);
|
||||
SELECT 'dictHas';
|
||||
SELECT dictHas('range_dictionary_nullable', (toUInt64(1), '1'), toDate('2019-05-15'));
|
||||
SELECT dictHas('range_dictionary_nullable', (toUInt64(1), '1'), toDate('2019-05-29'));
|
||||
SELECT dictHas('range_dictionary_nullable', (toUInt64(2), '2'), toDate('2019-05-29'));
|
||||
SELECT dictHas('range_dictionary_nullable', (toUInt64(2), '2'), toDate('2019-05-31'));
|
||||
SELECT 'select columns from dictionary';
|
||||
SELECT 'allColumns';
|
||||
SELECT * FROM range_dictionary_nullable;
|
||||
SELECT 'noColumns';
|
||||
SELECT 1 FROM range_dictionary_nullable;
|
||||
SELECT 'onlySpecificColumns';
|
||||
SELECT CountryID, StartDate, Tax FROM range_dictionary_nullable;
|
||||
SELECT 'onlySpecificColumn';
|
||||
SELECT Tax FROM range_dictionary_nullable;
|
||||
|
||||
DROP TABLE date_table;
|
||||
DROP DICTIONARY range_dictionary_nullable;
|
Loading…
Reference in New Issue
Block a user