mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-17 20:02:05 +00:00
Added ComplexKeyRangeHashed dictionary
This commit is contained in:
parent
40f5e06a8d
commit
e12820ecb2
48
src/Dictionaries/DictionaryHelpers.cpp
Normal file
48
src/Dictionaries/DictionaryHelpers.cpp
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
#include "DictionaryHelpers.h"
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
MutableColumns deserializeColumnsFromKeys(
|
||||||
|
const DictionaryStructure & dictionary_structure,
|
||||||
|
const PaddedPODArray<StringRef> & keys,
|
||||||
|
size_t start,
|
||||||
|
size_t end)
|
||||||
|
{
|
||||||
|
MutableColumns result_columns;
|
||||||
|
result_columns.reserve(dictionary_structure.key->size());
|
||||||
|
|
||||||
|
for (const DictionaryAttribute & attribute : *dictionary_structure.key)
|
||||||
|
result_columns.emplace_back(attribute.type->createColumn());
|
||||||
|
|
||||||
|
for (size_t index = start; index < end; ++index)
|
||||||
|
{
|
||||||
|
const auto & key = keys[index];
|
||||||
|
const auto * ptr = key.data;
|
||||||
|
|
||||||
|
for (auto & result_column : result_columns)
|
||||||
|
ptr = result_column->deserializeAndInsertFromArena(ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result_columns;
|
||||||
|
}
|
||||||
|
|
||||||
|
ColumnsWithTypeAndName deserializeColumnsWithTypeAndNameFromKeys(
|
||||||
|
const DictionaryStructure & dictionary_structure,
|
||||||
|
const PaddedPODArray<StringRef> & keys,
|
||||||
|
size_t start,
|
||||||
|
size_t end)
|
||||||
|
{
|
||||||
|
ColumnsWithTypeAndName result;
|
||||||
|
MutableColumns columns = deserializeColumnsFromKeys(dictionary_structure, keys, start, end);
|
||||||
|
|
||||||
|
for (size_t i = 0, num_columns = columns.size(); i < num_columns; ++i)
|
||||||
|
{
|
||||||
|
const auto & dictionary_attribute = (*dictionary_structure.key)[i];
|
||||||
|
result.emplace_back(ColumnWithTypeAndName{std::move(columns[i]), dictionary_attribute.type, dictionary_attribute.name});
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -497,6 +497,20 @@ private:
|
|||||||
Arena * complex_key_arena;
|
Arena * complex_key_arena;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Deserialize columns from keys array using dictionary structure
|
||||||
|
MutableColumns deserializeColumnsFromKeys(
|
||||||
|
const DictionaryStructure & dictionary_structure,
|
||||||
|
const PaddedPODArray<StringRef> & keys,
|
||||||
|
size_t start,
|
||||||
|
size_t end);
|
||||||
|
|
||||||
|
/// Deserialize columns with type and name from keys array using dictionary structure
|
||||||
|
ColumnsWithTypeAndName deserializeColumnsWithTypeAndNameFromKeys(
|
||||||
|
const DictionaryStructure & dictionary_structure,
|
||||||
|
const PaddedPODArray<StringRef> & keys,
|
||||||
|
size_t start,
|
||||||
|
size_t end);
|
||||||
|
|
||||||
/** Merge block with blocks from stream. If there are duplicate keys in block they are filtered out.
|
/** Merge block with blocks from stream. If there are duplicate keys in block they are filtered out.
|
||||||
* In result block_to_update will be merged with blocks from stream.
|
* In result block_to_update will be merged with blocks from stream.
|
||||||
* Note: readPrefix readImpl readSuffix will be called on stream object during function execution.
|
* Note: readPrefix readImpl readSuffix will be called on stream object during function execution.
|
||||||
|
@ -29,7 +29,7 @@ DictionarySourceData::DictionarySourceData(
|
|||||||
, key_type(DictionaryInputStreamKeyType::ComplexKey)
|
, key_type(DictionaryInputStreamKeyType::ComplexKey)
|
||||||
{
|
{
|
||||||
const DictionaryStructure & dictionary_structure = dictionary->getStructure();
|
const DictionaryStructure & dictionary_structure = dictionary->getStructure();
|
||||||
fillKeyColumns(keys, 0, keys.size(), dictionary_structure, key_columns);
|
key_columns = deserializeColumnsWithTypeAndNameFromKeys(dictionary_structure, keys, 0, keys.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
DictionarySourceData::DictionarySourceData(
|
DictionarySourceData::DictionarySourceData(
|
||||||
@ -158,32 +158,4 @@ Block DictionarySourceData::fillBlock(
|
|||||||
return Block(block_columns);
|
return Block(block_columns);
|
||||||
}
|
}
|
||||||
|
|
||||||
void DictionarySourceData::fillKeyColumns(
|
|
||||||
const PaddedPODArray<StringRef> & keys,
|
|
||||||
size_t start,
|
|
||||||
size_t size,
|
|
||||||
const DictionaryStructure & dictionary_structure,
|
|
||||||
ColumnsWithTypeAndName & result)
|
|
||||||
{
|
|
||||||
MutableColumns columns;
|
|
||||||
columns.reserve(dictionary_structure.key->size());
|
|
||||||
|
|
||||||
for (const DictionaryAttribute & attribute : *dictionary_structure.key)
|
|
||||||
columns.emplace_back(attribute.type->createColumn());
|
|
||||||
|
|
||||||
for (size_t index = start; index < size; ++index)
|
|
||||||
{
|
|
||||||
const auto & key = keys[index];
|
|
||||||
const auto *ptr = key.data;
|
|
||||||
for (auto & column : columns)
|
|
||||||
ptr = column->deserializeAndInsertFromArena(ptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t i = 0, num_columns = columns.size(); i < num_columns; ++i)
|
|
||||||
{
|
|
||||||
const auto & dictionary_attribute = (*dictionary_structure.key)[i];
|
|
||||||
result.emplace_back(ColumnWithTypeAndName{std::move(columns[i]), dictionary_attribute.type, dictionary_attribute.name});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -51,13 +51,6 @@ private:
|
|||||||
const DataTypes & types,
|
const DataTypes & types,
|
||||||
ColumnsWithTypeAndName && view) const;
|
ColumnsWithTypeAndName && view) const;
|
||||||
|
|
||||||
static void fillKeyColumns(
|
|
||||||
const PaddedPODArray<StringRef> & keys,
|
|
||||||
size_t start,
|
|
||||||
size_t size,
|
|
||||||
const DictionaryStructure & dictionary_structure,
|
|
||||||
ColumnsWithTypeAndName & result);
|
|
||||||
|
|
||||||
const size_t num_rows;
|
const size_t num_rows;
|
||||||
std::shared_ptr<const IDictionary> dictionary;
|
std::shared_ptr<const IDictionary> dictionary;
|
||||||
std::unordered_set<std::string> column_names;
|
std::unordered_set<std::string> column_names;
|
||||||
|
@ -134,42 +134,11 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration
|
|||||||
if (id->name.empty())
|
if (id->name.empty())
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "'id' cannot be empty");
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "'id' cannot be empty");
|
||||||
|
|
||||||
const char * range_default_type = "Date";
|
if (!id->expression.empty())
|
||||||
if (config.has(structure_prefix + ".range_min"))
|
|
||||||
range_min.emplace(makeDictionaryTypedSpecialAttribute(config, structure_prefix + ".range_min", range_default_type));
|
|
||||||
|
|
||||||
if (config.has(structure_prefix + ".range_max"))
|
|
||||||
range_max.emplace(makeDictionaryTypedSpecialAttribute(config, structure_prefix + ".range_max", range_default_type));
|
|
||||||
|
|
||||||
if (range_min.has_value() != range_max.has_value())
|
|
||||||
{
|
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
|
||||||
"Dictionary structure should have both 'range_min' and 'range_max' either specified or not.");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (range_min && range_max && !range_min->type->equals(*range_max->type))
|
|
||||||
{
|
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
|
||||||
"Dictionary structure 'range_min' and 'range_max' should have same type, "
|
|
||||||
"'range_min' type: {},"
|
|
||||||
"'range_max' type: {}",
|
|
||||||
range_min->type->getName(),
|
|
||||||
range_max->type->getName());
|
|
||||||
}
|
|
||||||
|
|
||||||
if (range_min)
|
|
||||||
{
|
|
||||||
if (!range_min->type->isValueRepresentedByInteger())
|
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
|
||||||
"Dictionary structure type of 'range_min' and 'range_max' should be an integer, Date, DateTime, or Enum."
|
|
||||||
" Actual 'range_min' and 'range_max' type is {}",
|
|
||||||
range_min->type->getName());
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!id->expression.empty() || (range_min && !range_min->expression.empty()) || (range_max && !range_max->expression.empty()))
|
|
||||||
has_expressions = true;
|
has_expressions = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
parseRangeConfiguration(config, structure_prefix);
|
||||||
attributes = getAttributes(config, structure_prefix, /*complex_key_attributes =*/ false);
|
attributes = getAttributes(config, structure_prefix, /*complex_key_attributes =*/ false);
|
||||||
|
|
||||||
for (size_t i = 0; i < attributes.size(); ++i)
|
for (size_t i = 0; i < attributes.size(); ++i)
|
||||||
@ -439,4 +408,42 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
|
|||||||
return res_attributes;
|
return res_attributes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void DictionaryStructure::parseRangeConfiguration(const Poco::Util::AbstractConfiguration & config, const std::string & structure_prefix)
|
||||||
|
{
|
||||||
|
const char * range_default_type = "Date";
|
||||||
|
if (config.has(structure_prefix + ".range_min"))
|
||||||
|
range_min.emplace(makeDictionaryTypedSpecialAttribute(config, structure_prefix + ".range_min", range_default_type));
|
||||||
|
|
||||||
|
if (config.has(structure_prefix + ".range_max"))
|
||||||
|
range_max.emplace(makeDictionaryTypedSpecialAttribute(config, structure_prefix + ".range_max", range_default_type));
|
||||||
|
|
||||||
|
if (range_min.has_value() != range_max.has_value())
|
||||||
|
{
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||||
|
"Dictionary structure should have both 'range_min' and 'range_max' either specified or not.");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (range_min && range_max && !range_min->type->equals(*range_max->type))
|
||||||
|
{
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||||
|
"Dictionary structure 'range_min' and 'range_max' should have same type, "
|
||||||
|
"'range_min' type: {},"
|
||||||
|
"'range_max' type: {}",
|
||||||
|
range_min->type->getName(),
|
||||||
|
range_max->type->getName());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (range_min)
|
||||||
|
{
|
||||||
|
if (!range_min->type->isValueRepresentedByInteger())
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||||
|
"Dictionary structure type of 'range_min' and 'range_max' should be an integer, Date, DateTime, or Enum."
|
||||||
|
" Actual 'range_min' and 'range_max' type is {}",
|
||||||
|
range_min->type->getName());
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((range_min && !range_min->expression.empty()) || (range_max && !range_max->expression.empty()))
|
||||||
|
has_expressions = true;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -67,8 +67,9 @@ using DictionaryLifetime = ExternalLoadableLifetime;
|
|||||||
* - null_value, used as a default value for non-existent entries in the dictionary,
|
* - null_value, used as a default value for non-existent entries in the dictionary,
|
||||||
* decimal representation for numeric attributes;
|
* decimal representation for numeric attributes;
|
||||||
* - hierarchical, whether this attribute defines a hierarchy;
|
* - hierarchical, whether this attribute defines a hierarchy;
|
||||||
* - injective, whether the mapping to parent is injective (can be used for optimization of GROUP BY?)
|
* - injective, whether the mapping to parent is injective (can be used for optimization of GROUP BY?);
|
||||||
* - is_object_id, used in mongo dictionary, converts string key to objectid
|
* - is_object_id, used in mongo dictionary, converts string key to objectid;
|
||||||
|
* - is_nullable, is attribute nullable;
|
||||||
*/
|
*/
|
||||||
struct DictionaryAttribute final
|
struct DictionaryAttribute final
|
||||||
{
|
{
|
||||||
@ -153,6 +154,10 @@ private:
|
|||||||
const Poco::Util::AbstractConfiguration & config,
|
const Poco::Util::AbstractConfiguration & config,
|
||||||
const std::string & config_prefix,
|
const std::string & config_prefix,
|
||||||
bool complex_key_attributes);
|
bool complex_key_attributes);
|
||||||
|
|
||||||
|
/// parse range_min and range_max
|
||||||
|
void parseRangeConfiguration(const Poco::Util::AbstractConfiguration & config, const std::string & structure_prefix);
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -133,6 +133,29 @@ void ExternalQueryBuilder::composeLoadAllQuery(WriteBuffer & out) const
|
|||||||
|
|
||||||
writeQuoted(key.name, out);
|
writeQuoted(key.name, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (dict_struct.range_min && dict_struct.range_max)
|
||||||
|
{
|
||||||
|
writeString(", ", out);
|
||||||
|
|
||||||
|
if (!dict_struct.range_min->expression.empty())
|
||||||
|
{
|
||||||
|
writeParenthesisedString(dict_struct.range_min->expression, out);
|
||||||
|
writeString(" AS ", out);
|
||||||
|
}
|
||||||
|
|
||||||
|
writeQuoted(dict_struct.range_min->name, out);
|
||||||
|
|
||||||
|
writeString(", ", out);
|
||||||
|
|
||||||
|
if (!dict_struct.range_max->expression.empty())
|
||||||
|
{
|
||||||
|
writeParenthesisedString(dict_struct.range_max->expression, out);
|
||||||
|
writeString(" AS ", out);
|
||||||
|
}
|
||||||
|
|
||||||
|
writeQuoted(dict_struct.range_max->name, out);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const auto & attr : dict_struct.attributes)
|
for (const auto & attr : dict_struct.attributes)
|
||||||
|
@ -64,7 +64,7 @@ public:
|
|||||||
|
|
||||||
bool isInjective(const std::string & attribute_name) const override
|
bool isInjective(const std::string & attribute_name) const override
|
||||||
{
|
{
|
||||||
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
|
return dict_struct.getAttribute(attribute_name).injective;
|
||||||
}
|
}
|
||||||
|
|
||||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
|
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
|
||||||
|
@ -14,170 +14,213 @@
|
|||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
template <typename RangeType>
|
enum class RangeDictionaryType
|
||||||
|
{
|
||||||
|
simple,
|
||||||
|
complex
|
||||||
|
};
|
||||||
|
|
||||||
|
template <RangeDictionaryType range_dictionary_type, typename RangeType>
|
||||||
class RangeDictionarySourceData
|
class RangeDictionarySourceData
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
using Key = UInt64;
|
|
||||||
|
using KeyType = std::conditional_t<range_dictionary_type == RangeDictionaryType::simple, UInt64, StringRef>;
|
||||||
|
|
||||||
RangeDictionarySourceData(
|
RangeDictionarySourceData(
|
||||||
std::shared_ptr<const IDictionary> dictionary,
|
std::shared_ptr<const IDictionary> dictionary,
|
||||||
const Names & column_names,
|
const Names & column_names,
|
||||||
PaddedPODArray<Key> && ids_to_fill,
|
PaddedPODArray<KeyType> && keys,
|
||||||
PaddedPODArray<RangeType> && start_dates,
|
PaddedPODArray<RangeType> && start_dates,
|
||||||
PaddedPODArray<RangeType> && end_dates);
|
PaddedPODArray<RangeType> && end_dates);
|
||||||
|
|
||||||
Block getBlock(size_t start, size_t length) const;
|
Block getBlock(size_t start, size_t length) const;
|
||||||
size_t getNumRows() const { return ids.size(); }
|
size_t getNumRows() const { return keys.size(); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
Block fillBlock(
|
Block fillBlock(
|
||||||
const PaddedPODArray<Key> & ids_to_fill,
|
const PaddedPODArray<KeyType> & keys_to_fill,
|
||||||
const PaddedPODArray<RangeType> & block_start_dates,
|
const PaddedPODArray<RangeType> & block_start_dates,
|
||||||
const PaddedPODArray<RangeType> & block_end_dates) const;
|
const PaddedPODArray<RangeType> & block_end_dates,
|
||||||
|
size_t start,
|
||||||
|
size_t end) const;
|
||||||
|
|
||||||
PaddedPODArray<Int64> makeDateKey(
|
PaddedPODArray<Int64> makeDateKeys(
|
||||||
const PaddedPODArray<RangeType> & block_start_dates,
|
const PaddedPODArray<RangeType> & block_start_dates,
|
||||||
const PaddedPODArray<RangeType> & block_end_dates) const;
|
const PaddedPODArray<RangeType> & block_end_dates) const;
|
||||||
|
|
||||||
std::shared_ptr<const IDictionary> dictionary;
|
std::shared_ptr<const IDictionary> dictionary;
|
||||||
NameSet column_names;
|
NameSet column_names;
|
||||||
PaddedPODArray<Key> ids;
|
PaddedPODArray<KeyType> keys;
|
||||||
PaddedPODArray<RangeType> start_dates;
|
PaddedPODArray<RangeType> start_dates;
|
||||||
PaddedPODArray<RangeType> end_dates;
|
PaddedPODArray<RangeType> end_dates;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
template <typename RangeType>
|
template <RangeDictionaryType range_dictionary_type, typename RangeType>
|
||||||
RangeDictionarySourceData<RangeType>::RangeDictionarySourceData(
|
RangeDictionarySourceData<range_dictionary_type, RangeType>::RangeDictionarySourceData(
|
||||||
std::shared_ptr<const IDictionary> dictionary_,
|
std::shared_ptr<const IDictionary> dictionary_,
|
||||||
const Names & column_names_,
|
const Names & column_names_,
|
||||||
PaddedPODArray<Key> && ids_,
|
PaddedPODArray<KeyType> && keys,
|
||||||
PaddedPODArray<RangeType> && block_start_dates,
|
PaddedPODArray<RangeType> && block_start_dates,
|
||||||
PaddedPODArray<RangeType> && block_end_dates)
|
PaddedPODArray<RangeType> && block_end_dates)
|
||||||
: dictionary(dictionary_)
|
: dictionary(dictionary_)
|
||||||
, column_names(column_names_.begin(), column_names_.end())
|
, column_names(column_names_.begin(), column_names_.end())
|
||||||
, ids(std::move(ids_))
|
, keys(std::move(keys))
|
||||||
, start_dates(std::move(block_start_dates))
|
, start_dates(std::move(block_start_dates))
|
||||||
, end_dates(std::move(block_end_dates))
|
, end_dates(std::move(block_end_dates))
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename RangeType>
|
template <RangeDictionaryType range_dictionary_type, typename RangeType>
|
||||||
Block RangeDictionarySourceData<RangeType>::getBlock(size_t start, size_t length) const
|
Block RangeDictionarySourceData<range_dictionary_type, RangeType>::getBlock(size_t start, size_t length) const
|
||||||
{
|
{
|
||||||
PaddedPODArray<Key> block_ids;
|
PaddedPODArray<KeyType> block_keys;
|
||||||
PaddedPODArray<RangeType> block_start_dates;
|
PaddedPODArray<RangeType> block_start_dates;
|
||||||
PaddedPODArray<RangeType> block_end_dates;
|
PaddedPODArray<RangeType> block_end_dates;
|
||||||
block_ids.reserve(length);
|
block_keys.reserve(length);
|
||||||
block_start_dates.reserve(length);
|
block_start_dates.reserve(length);
|
||||||
block_end_dates.reserve(length);
|
block_end_dates.reserve(length);
|
||||||
|
|
||||||
for (auto idx : collections::range(start, start + length))
|
for (size_t index = start; index < start + length; ++index )
|
||||||
{
|
{
|
||||||
block_ids.push_back(ids[idx]);
|
block_keys.push_back(block_keys[index]);
|
||||||
block_start_dates.push_back(start_dates[idx]);
|
block_start_dates.push_back(start_dates[index]);
|
||||||
block_end_dates.push_back(end_dates[idx]);
|
block_end_dates.push_back(end_dates[index]);
|
||||||
}
|
}
|
||||||
|
|
||||||
return fillBlock(block_ids, block_start_dates, block_end_dates);
|
return fillBlock(block_keys, block_start_dates, block_end_dates, start, start + length);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename RangeType>
|
template <RangeDictionaryType range_dictionary_type, typename RangeType>
|
||||||
PaddedPODArray<Int64> RangeDictionarySourceData<RangeType>::makeDateKey(
|
PaddedPODArray<Int64> RangeDictionarySourceData<range_dictionary_type, RangeType>::makeDateKeys(
|
||||||
const PaddedPODArray<RangeType> & block_start_dates, const PaddedPODArray<RangeType> & block_end_dates) const
|
|
||||||
{
|
|
||||||
PaddedPODArray<Int64> key(block_start_dates.size());
|
|
||||||
for (size_t i = 0; i < key.size(); ++i)
|
|
||||||
{
|
|
||||||
if (RangeHashedDictionary::Range::isCorrectDate(block_start_dates[i]))
|
|
||||||
key[i] = block_start_dates[i];
|
|
||||||
else
|
|
||||||
key[i] = block_end_dates[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
return key;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <typename RangeType>
|
|
||||||
Block RangeDictionarySourceData<RangeType>::fillBlock(
|
|
||||||
const PaddedPODArray<Key> & ids_to_fill,
|
|
||||||
const PaddedPODArray<RangeType> & block_start_dates,
|
const PaddedPODArray<RangeType> & block_start_dates,
|
||||||
const PaddedPODArray<RangeType> & block_end_dates) const
|
const PaddedPODArray<RangeType> & block_end_dates) const
|
||||||
|
{
|
||||||
|
PaddedPODArray<Int64> keys(block_start_dates.size());
|
||||||
|
|
||||||
|
for (size_t i = 0; i < keys.size(); ++i)
|
||||||
|
{
|
||||||
|
if (Range::isCorrectDate(block_start_dates[i]))
|
||||||
|
keys[i] = block_start_dates[i];
|
||||||
|
else
|
||||||
|
keys[i] = block_end_dates[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
return keys;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <RangeDictionaryType range_dictionary_type, typename RangeType>
|
||||||
|
Block RangeDictionarySourceData<range_dictionary_type, RangeType>::fillBlock(
|
||||||
|
const PaddedPODArray<KeyType> & keys_to_fill,
|
||||||
|
const PaddedPODArray<RangeType> & block_start_dates,
|
||||||
|
const PaddedPODArray<RangeType> & block_end_dates,
|
||||||
|
size_t start,
|
||||||
|
size_t end) const
|
||||||
{
|
{
|
||||||
ColumnsWithTypeAndName columns;
|
ColumnsWithTypeAndName columns;
|
||||||
const DictionaryStructure & structure = dictionary->getStructure();
|
const DictionaryStructure & dictionary_structure = dictionary->getStructure();
|
||||||
|
|
||||||
auto ids_column = getColumnFromPODArray(ids_to_fill);
|
DataTypes keys_types;
|
||||||
const std::string & id_column_name = structure.id->name;
|
Columns keys_columns;
|
||||||
if (column_names.find(id_column_name) != column_names.end())
|
Strings keys_names = dictionary_structure.getKeysNames();
|
||||||
columns.emplace_back(ids_column, std::make_shared<DataTypeUInt64>(), id_column_name);
|
|
||||||
|
|
||||||
auto date_key = makeDateKey(block_start_dates, block_end_dates);
|
if constexpr (range_dictionary_type == RangeDictionaryType::simple)
|
||||||
|
{
|
||||||
|
keys_columns = {getColumnFromPODArray(keys_to_fill)};
|
||||||
|
keys_types = {std::make_shared<DataTypeUInt64>()};
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (const auto & attribute : *dictionary_structure.key)
|
||||||
|
keys_types.emplace_back(attribute.type);
|
||||||
|
|
||||||
|
auto deserialized_columns = deserializeColumnsFromKeys(dictionary_structure, keys, start, end);
|
||||||
|
for (auto & deserialized_column : deserialized_columns)
|
||||||
|
keys_columns.emplace_back(std::move(deserialized_column));
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t keys_size = keys_names.size();
|
||||||
|
|
||||||
|
std::cerr << "Keys size " << keys_size << " key columns size " << keys_columns.size();
|
||||||
|
std::cerr << " keys types size " << keys_types.size() << std::endl;
|
||||||
|
|
||||||
|
assert(keys_columns.size() == keys_size);
|
||||||
|
assert(keys_types.size() == keys_size);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < keys_size; ++i)
|
||||||
|
{
|
||||||
|
auto & key_name = keys_names[i];
|
||||||
|
|
||||||
|
if (column_names.find(key_name) != column_names.end())
|
||||||
|
columns.emplace_back(keys_columns[i], keys_types[i], key_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto date_key = makeDateKeys(block_start_dates, block_end_dates);
|
||||||
auto date_column = getColumnFromPODArray(date_key);
|
auto date_column = getColumnFromPODArray(date_key);
|
||||||
|
|
||||||
const std::string & range_min_column_name = structure.range_min->name;
|
keys_columns.emplace_back(std::move(date_column));
|
||||||
|
keys_types.emplace_back(std::make_shared<DataTypeInt64>());
|
||||||
|
|
||||||
|
const auto & range_min_column_name = dictionary_structure.range_min->name;
|
||||||
if (column_names.find(range_min_column_name) != column_names.end())
|
if (column_names.find(range_min_column_name) != column_names.end())
|
||||||
{
|
{
|
||||||
auto range_min_column = getColumnFromPODArray(block_start_dates);
|
auto range_min_column = getColumnFromPODArray(block_start_dates);
|
||||||
columns.emplace_back(range_min_column, structure.range_max->type, range_min_column_name);
|
columns.emplace_back(range_min_column, dictionary_structure.range_max->type, range_min_column_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::string & range_max_column_name = structure.range_max->name;
|
const auto & range_max_column_name = dictionary_structure.range_max->name;
|
||||||
if (column_names.find(range_max_column_name) != column_names.end())
|
if (column_names.find(range_max_column_name) != column_names.end())
|
||||||
{
|
{
|
||||||
auto range_max_column = getColumnFromPODArray(block_end_dates);
|
auto range_max_column = getColumnFromPODArray(block_end_dates);
|
||||||
columns.emplace_back(range_max_column, structure.range_max->type, range_max_column_name);
|
columns.emplace_back(range_max_column, dictionary_structure.range_max->type, range_max_column_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const auto idx : collections::range(0, structure.attributes.size()))
|
size_t attributes_size = dictionary_structure.attributes.size();
|
||||||
|
for (size_t attribute_index = 0; attribute_index < attributes_size; ++attribute_index)
|
||||||
{
|
{
|
||||||
const DictionaryAttribute & attribute = structure.attributes[idx];
|
const auto & attribute = dictionary_structure.attributes[attribute_index];
|
||||||
if (column_names.find(attribute.name) != column_names.end())
|
if (column_names.find(attribute.name) == column_names.end())
|
||||||
{
|
continue;
|
||||||
ColumnPtr column = dictionary->getColumn(
|
|
||||||
attribute.name,
|
auto column = dictionary->getColumn(
|
||||||
attribute.type,
|
attribute.name,
|
||||||
{ids_column, date_column},
|
attribute.type,
|
||||||
{std::make_shared<DataTypeUInt64>(), std::make_shared<DataTypeInt64>()},
|
keys_columns,
|
||||||
nullptr);
|
keys_types,
|
||||||
columns.emplace_back(column, attribute.type, attribute.name);
|
nullptr /* default_values_column*/);
|
||||||
}
|
|
||||||
|
columns.emplace_back(std::move(column), attribute.type, attribute.name);
|
||||||
}
|
}
|
||||||
return Block(columns);
|
return Block(columns);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
template <RangeDictionaryType range_dictionary_type, typename RangeType>
|
||||||
* BlockInputStream implementation for external dictionaries
|
|
||||||
* read() returns single block consisting of the in-memory contents of the dictionaries
|
|
||||||
*/
|
|
||||||
template <typename RangeType>
|
|
||||||
class RangeDictionarySource : public DictionarySourceBase
|
class RangeDictionarySource : public DictionarySourceBase
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
using Key = UInt64;
|
|
||||||
|
|
||||||
RangeDictionarySource(RangeDictionarySourceData<RangeType> data_, size_t max_block_size);
|
RangeDictionarySource(RangeDictionarySourceData<range_dictionary_type, RangeType> data_, size_t max_block_size);
|
||||||
|
|
||||||
String getName() const override { return "RangeDictionarySource"; }
|
String getName() const override { return "RangeDictionarySource"; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
Block getBlock(size_t start, size_t length) const override;
|
Block getBlock(size_t start, size_t length) const override;
|
||||||
|
|
||||||
RangeDictionarySourceData<RangeType> data;
|
RangeDictionarySourceData<range_dictionary_type, RangeType> data;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename RangeType>
|
template <RangeDictionaryType range_dictionary_type, typename RangeType>
|
||||||
RangeDictionarySource<RangeType>::RangeDictionarySource(RangeDictionarySourceData<RangeType> data_, size_t max_block_size)
|
RangeDictionarySource<range_dictionary_type, RangeType>::RangeDictionarySource(RangeDictionarySourceData<range_dictionary_type, RangeType> data_, size_t max_block_size)
|
||||||
: DictionarySourceBase(data_.getBlock(0, 0), data_.getNumRows(), max_block_size)
|
: DictionarySourceBase(data_.getBlock(0, 0), data_.getNumRows(), max_block_size)
|
||||||
, data(std::move(data_))
|
, data(std::move(data_))
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename RangeType>
|
template <RangeDictionaryType range_dictionary_type, typename RangeType>
|
||||||
Block RangeDictionarySource<RangeType>::getBlock(size_t start, size_t length) const
|
Block RangeDictionarySource<range_dictionary_type, RangeType>::getBlock(size_t start, size_t length) const
|
||||||
{
|
{
|
||||||
return data.getBlock(start, length);
|
return data.getBlock(start, length);
|
||||||
}
|
}
|
||||||
|
@ -10,7 +10,8 @@
|
|||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
using RangeStorageType = DB::RangeHashedDictionary::RangeStorageType;
|
|
||||||
|
using RangeStorageType = DB::RangeStorageType;
|
||||||
|
|
||||||
// Null values mean that specified boundary, either min or max is not set on range.
|
// Null values mean that specified boundary, either min or max is not set on range.
|
||||||
// To simplify comparison, null value of min bound should be bigger than any other value,
|
// To simplify comparison, null value of min bound should be bigger than any other value,
|
||||||
@ -25,7 +26,7 @@ RangeStorageType getColumnIntValueOrDefault(const DB::IColumn & column, size_t i
|
|||||||
return default_value;
|
return default_value;
|
||||||
|
|
||||||
const RangeStorageType result = static_cast<RangeStorageType>(column.getInt(index));
|
const RangeStorageType result = static_cast<RangeStorageType>(column.getInt(index));
|
||||||
if (isDate && !DB::RangeHashedDictionary::Range::isCorrectDate(result))
|
if (isDate && !DB::Range::isCorrectDate(result))
|
||||||
return default_value;
|
return default_value;
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
@ -54,23 +55,23 @@ namespace ErrorCodes
|
|||||||
extern const int UNSUPPORTED_METHOD;
|
extern const int UNSUPPORTED_METHOD;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RangeHashedDictionary::Range::isCorrectDate(const RangeStorageType & date)
|
bool Range::isCorrectDate(const RangeStorageType & date)
|
||||||
{
|
{
|
||||||
return 0 < date && date <= DATE_LUT_MAX_DAY_NUM;
|
return 0 < date && date <= DATE_LUT_MAX_DAY_NUM;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RangeHashedDictionary::Range::contains(const RangeStorageType & value) const
|
bool Range::contains(const RangeStorageType & value) const
|
||||||
{
|
{
|
||||||
return left <= value && value <= right;
|
return left <= value && value <= right;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool operator<(const RangeHashedDictionary::Range & left, const RangeHashedDictionary::Range & right)
|
static bool operator<(const Range & left, const Range & right)
|
||||||
{
|
{
|
||||||
return std::tie(left.left, left.right) < std::tie(right.left, right.right);
|
return std::tie(left.left, left.right) < std::tie(right.left, right.right);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <DictionaryKeyType dictionary_key_type>
|
||||||
RangeHashedDictionary::RangeHashedDictionary(
|
RangeHashedDictionary<dictionary_key_type>::RangeHashedDictionary(
|
||||||
const StorageID & dict_id_,
|
const StorageID & dict_id_,
|
||||||
const DictionaryStructure & dict_struct_,
|
const DictionaryStructure & dict_struct_,
|
||||||
DictionarySourcePtr source_ptr_,
|
DictionarySourcePtr source_ptr_,
|
||||||
@ -87,7 +88,8 @@ RangeHashedDictionary::RangeHashedDictionary(
|
|||||||
calculateBytesAllocated();
|
calculateBytesAllocated();
|
||||||
}
|
}
|
||||||
|
|
||||||
ColumnPtr RangeHashedDictionary::getColumn(
|
template <DictionaryKeyType dictionary_key_type>
|
||||||
|
ColumnPtr RangeHashedDictionary<dictionary_key_type>::getColumn(
|
||||||
const std::string & attribute_name,
|
const std::string & attribute_name,
|
||||||
const DataTypePtr & result_type,
|
const DataTypePtr & result_type,
|
||||||
const Columns & key_columns,
|
const Columns & key_columns,
|
||||||
@ -96,20 +98,18 @@ ColumnPtr RangeHashedDictionary::getColumn(
|
|||||||
{
|
{
|
||||||
ColumnPtr result;
|
ColumnPtr result;
|
||||||
|
|
||||||
const auto & attribute = getAttribute(attribute_name);
|
|
||||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||||
|
const size_t attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second;
|
||||||
auto keys_size = key_columns.front()->size();
|
const auto & attribute = attributes[attribute_index];
|
||||||
|
|
||||||
/// Cast second column to storage type
|
/// Cast second column to storage type
|
||||||
Columns modified_key_columns = key_columns;
|
Columns modified_key_columns = key_columns;
|
||||||
|
auto range_storage_column = key_columns.back();
|
||||||
auto range_storage_column = key_columns[1];
|
ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types.back(), ""};
|
||||||
ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types[1], ""};
|
|
||||||
|
|
||||||
auto range_column_storage_type = std::make_shared<DataTypeInt64>();
|
auto range_column_storage_type = std::make_shared<DataTypeInt64>();
|
||||||
modified_key_columns[1] = castColumnAccurate(column_to_cast, range_column_storage_type);
|
modified_key_columns.back() = castColumnAccurate(column_to_cast, range_column_storage_type);
|
||||||
|
|
||||||
|
size_t keys_size = key_columns.front()->size();
|
||||||
bool is_attribute_nullable = attribute.is_nullable;
|
bool is_attribute_nullable = attribute.is_nullable;
|
||||||
|
|
||||||
ColumnUInt8::MutablePtr col_null_map_to;
|
ColumnUInt8::MutablePtr col_null_map_to;
|
||||||
@ -204,24 +204,26 @@ ColumnPtr RangeHashedDictionary::getColumn(
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
ColumnUInt8::Ptr RangeHashedDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
|
template <DictionaryKeyType dictionary_key_type>
|
||||||
|
ColumnUInt8::Ptr RangeHashedDictionary<dictionary_key_type>::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
|
||||||
{
|
{
|
||||||
auto range_storage_column = key_columns[1];
|
|
||||||
ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types[1], ""};
|
|
||||||
|
|
||||||
auto range_column_storage_type = std::make_shared<DataTypeInt64>();
|
auto range_column_storage_type = std::make_shared<DataTypeInt64>();
|
||||||
|
auto range_storage_column = key_columns.back();
|
||||||
|
ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types[1], ""};
|
||||||
auto range_column_updated = castColumnAccurate(column_to_cast, range_column_storage_type);
|
auto range_column_updated = castColumnAccurate(column_to_cast, range_column_storage_type);
|
||||||
|
|
||||||
PaddedPODArray<UInt64> key_backup_storage;
|
|
||||||
PaddedPODArray<RangeStorageType> range_backup_storage;
|
PaddedPODArray<RangeStorageType> range_backup_storage;
|
||||||
|
|
||||||
const PaddedPODArray<UInt64> & ids = getColumnVectorData(this, key_columns[0], key_backup_storage);
|
|
||||||
const PaddedPODArray<RangeStorageType> & dates = getColumnVectorData(this, range_column_updated, range_backup_storage);
|
const PaddedPODArray<RangeStorageType> & dates = getColumnVectorData(this, range_column_updated, range_backup_storage);
|
||||||
|
|
||||||
|
auto key_columns_copy = key_columns;
|
||||||
|
key_columns_copy.pop_back();
|
||||||
|
DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
|
||||||
|
DictionaryKeysExtractor<dictionary_key_type> keys_extractor(key_columns_copy, arena_holder.getComplexKeyArena());
|
||||||
|
const size_t keys_size = keys_extractor.getKeysSize();
|
||||||
|
|
||||||
const auto & attribute = attributes.front();
|
const auto & attribute = attributes.front();
|
||||||
|
|
||||||
ColumnUInt8::Ptr result;
|
auto result = ColumnUInt8::create(keys_size);
|
||||||
|
auto & out = result->getData();
|
||||||
size_t keys_found = 0;
|
size_t keys_found = 0;
|
||||||
|
|
||||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||||
@ -229,58 +231,48 @@ ColumnUInt8::Ptr RangeHashedDictionary::hasKeys(const Columns & key_columns, con
|
|||||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||||
using AttributeType = typename Type::AttributeType;
|
using AttributeType = typename Type::AttributeType;
|
||||||
using ValueType = DictionaryValueType<AttributeType>;
|
using ValueType = DictionaryValueType<AttributeType>;
|
||||||
result = hasKeysImpl<ValueType>(attribute, ids, dates, keys_found);
|
|
||||||
|
const auto & collection = std::get<CollectionType<ValueType>>(attribute.maps);
|
||||||
|
|
||||||
|
for (size_t key_index = 0; key_index < keys_size; ++key_index)
|
||||||
|
{
|
||||||
|
const auto key = keys_extractor.extractCurrentKey();
|
||||||
|
const auto it = collection.find(key);
|
||||||
|
|
||||||
|
if (it)
|
||||||
|
{
|
||||||
|
const auto date = dates[key_index];
|
||||||
|
const auto & ranges_and_values = it->getMapped();
|
||||||
|
const auto val_it = std::find_if(
|
||||||
|
std::begin(ranges_and_values),
|
||||||
|
std::end(ranges_and_values),
|
||||||
|
[date](const Value<ValueType> & v)
|
||||||
|
{
|
||||||
|
return v.range.contains(date);
|
||||||
|
});
|
||||||
|
|
||||||
|
out[key_index] = val_it != std::end(ranges_and_values);
|
||||||
|
keys_found += out[key_index];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
out[key_index] = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
keys_extractor.rollbackCurrentKey();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||||
|
|
||||||
query_count.fetch_add(ids.size(), std::memory_order_relaxed);
|
query_count.fetch_add(keys_size, std::memory_order_relaxed);
|
||||||
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename AttributeType>
|
template <DictionaryKeyType dictionary_key_type>
|
||||||
ColumnUInt8::Ptr RangeHashedDictionary::hasKeysImpl(
|
void RangeHashedDictionary<dictionary_key_type>::createAttributes()
|
||||||
const Attribute & attribute,
|
|
||||||
const PaddedPODArray<UInt64> & ids,
|
|
||||||
const PaddedPODArray<RangeStorageType> & dates,
|
|
||||||
size_t & keys_found) const
|
|
||||||
{
|
|
||||||
auto result = ColumnUInt8::create(ids.size());
|
|
||||||
auto& out = result->getData();
|
|
||||||
|
|
||||||
const auto & attr = *std::get<Ptr<AttributeType>>(attribute.maps);
|
|
||||||
|
|
||||||
keys_found = 0;
|
|
||||||
|
|
||||||
for (const auto row : collections::range(0, ids.size()))
|
|
||||||
{
|
|
||||||
const auto it = attr.find(ids[row]);
|
|
||||||
|
|
||||||
if (it)
|
|
||||||
{
|
|
||||||
const auto date = dates[row];
|
|
||||||
const auto & ranges_and_values = it->getMapped();
|
|
||||||
const auto val_it = std::find_if(
|
|
||||||
std::begin(ranges_and_values),
|
|
||||||
std::end(ranges_and_values),
|
|
||||||
[date](const Value<AttributeType> & v)
|
|
||||||
{
|
|
||||||
return v.range.contains(date);
|
|
||||||
});
|
|
||||||
|
|
||||||
out[row] = val_it != std::end(ranges_and_values);
|
|
||||||
keys_found += out[row];
|
|
||||||
}
|
|
||||||
else
|
|
||||||
out[row] = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
void RangeHashedDictionary::createAttributes()
|
|
||||||
{
|
{
|
||||||
const auto size = dict_struct.attributes.size();
|
const auto size = dict_struct.attributes.size();
|
||||||
attributes.reserve(size);
|
attributes.reserve(size);
|
||||||
@ -296,7 +288,8 @@ void RangeHashedDictionary::createAttributes()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RangeHashedDictionary::loadData()
|
template <DictionaryKeyType dictionary_key_type>
|
||||||
|
void RangeHashedDictionary<dictionary_key_type>::loadData()
|
||||||
{
|
{
|
||||||
QueryPipeline pipeline;
|
QueryPipeline pipeline;
|
||||||
pipeline.init(source_ptr->loadAll());
|
pipeline.init(source_ptr->loadAll());
|
||||||
@ -305,38 +298,57 @@ void RangeHashedDictionary::loadData()
|
|||||||
Block block;
|
Block block;
|
||||||
while (executor.pull(block))
|
while (executor.pull(block))
|
||||||
{
|
{
|
||||||
const auto & id_column = *block.safeGetByPosition(0).column;
|
size_t skip_keys_size_offset = dict_struct.getKeysSize();
|
||||||
|
|
||||||
|
Columns key_columns;
|
||||||
|
key_columns.reserve(skip_keys_size_offset);
|
||||||
|
|
||||||
|
/// Split into keys columns and attribute columns
|
||||||
|
for (size_t i = 0; i < skip_keys_size_offset; ++i)
|
||||||
|
key_columns.emplace_back(block.safeGetByPosition(i).column);
|
||||||
|
|
||||||
|
DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
|
||||||
|
DictionaryKeysExtractor<dictionary_key_type> keys_extractor(key_columns, arena_holder.getComplexKeyArena());
|
||||||
|
const size_t keys_size = keys_extractor.getKeysSize();
|
||||||
|
|
||||||
|
element_count += keys_size;
|
||||||
|
|
||||||
// Support old behaviour, where invalid date means 'open range'.
|
// Support old behaviour, where invalid date means 'open range'.
|
||||||
const bool is_date = isDate(dict_struct.range_min->type);
|
const bool is_date = isDate(dict_struct.range_min->type);
|
||||||
|
|
||||||
const auto & min_range_column = unwrapNullableColumn(*block.safeGetByPosition(1).column);
|
const auto & min_range_column = unwrapNullableColumn(*block.safeGetByPosition(skip_keys_size_offset).column);
|
||||||
const auto & max_range_column = unwrapNullableColumn(*block.safeGetByPosition(2).column);
|
const auto & max_range_column = unwrapNullableColumn(*block.safeGetByPosition(skip_keys_size_offset + 1).column);
|
||||||
|
|
||||||
element_count += id_column.size();
|
skip_keys_size_offset += 2;
|
||||||
|
|
||||||
for (const auto attribute_idx : collections::range(0, attributes.size()))
|
for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index)
|
||||||
{
|
{
|
||||||
const auto & attribute_column = *block.safeGetByPosition(attribute_idx + 3).column;
|
const auto & attribute_column = *block.safeGetByPosition(attribute_index + skip_keys_size_offset).column;
|
||||||
auto & attribute = attributes[attribute_idx];
|
auto & attribute = attributes[attribute_index];
|
||||||
|
|
||||||
for (const auto row_idx : collections::range(0, id_column.size()))
|
for (size_t key_index = 0; key_index < keys_size; ++key_index)
|
||||||
{
|
{
|
||||||
|
auto key = keys_extractor.extractCurrentKey();
|
||||||
|
|
||||||
RangeStorageType lower_bound;
|
RangeStorageType lower_bound;
|
||||||
RangeStorageType upper_bound;
|
RangeStorageType upper_bound;
|
||||||
|
|
||||||
if (is_date)
|
if (is_date)
|
||||||
{
|
{
|
||||||
lower_bound = getColumnIntValueOrDefault(min_range_column, row_idx, is_date, 0);
|
lower_bound = getColumnIntValueOrDefault(min_range_column, key_index, is_date, 0);
|
||||||
upper_bound = getColumnIntValueOrDefault(max_range_column, row_idx, is_date, DATE_LUT_MAX_DAY_NUM + 1);
|
upper_bound = getColumnIntValueOrDefault(max_range_column, key_index, is_date, DATE_LUT_MAX_DAY_NUM + 1);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
lower_bound = getColumnIntValueOrDefault(min_range_column, row_idx, is_date, RANGE_MIN_NULL_VALUE);
|
lower_bound = getColumnIntValueOrDefault(min_range_column, key_index, is_date, RANGE_MIN_NULL_VALUE);
|
||||||
upper_bound = getColumnIntValueOrDefault(max_range_column, row_idx, is_date, RANGE_MAX_NULL_VALUE);
|
upper_bound = getColumnIntValueOrDefault(max_range_column, key_index, is_date, RANGE_MAX_NULL_VALUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
setAttributeValue(attribute, id_column.getUInt(row_idx), Range{lower_bound, upper_bound}, attribute_column[row_idx]);
|
if constexpr (std::is_same_v<KeyType, StringRef>)
|
||||||
|
key = copyKeyInArena(key);
|
||||||
|
|
||||||
|
setAttributeValue(attribute, key, Range{lower_bound, upper_bound}, attribute_column[key_index]);
|
||||||
|
keys_extractor.rollbackCurrentKey();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -346,22 +358,8 @@ void RangeHashedDictionary::loadData()
|
|||||||
"{}: dictionary source is empty and 'require_nonempty' property is set.");
|
"{}: dictionary source is empty and 'require_nonempty' property is set.");
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <DictionaryKeyType dictionary_key_type>
|
||||||
void RangeHashedDictionary::addAttributeSize(const Attribute & attribute)
|
void RangeHashedDictionary<dictionary_key_type>::calculateBytesAllocated()
|
||||||
{
|
|
||||||
const auto & map_ref = std::get<Ptr<T>>(attribute.maps);
|
|
||||||
bytes_allocated += sizeof(Collection<T>) + map_ref->getBufferSizeInBytes();
|
|
||||||
bucket_count = map_ref->getBufferSizeInCells();
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void RangeHashedDictionary::addAttributeSize<String>(const Attribute & attribute)
|
|
||||||
{
|
|
||||||
addAttributeSize<StringRef>(attribute);
|
|
||||||
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
|
||||||
}
|
|
||||||
|
|
||||||
void RangeHashedDictionary::calculateBytesAllocated()
|
|
||||||
{
|
{
|
||||||
bytes_allocated += attributes.size() * sizeof(attributes.front());
|
bytes_allocated += attributes.size() * sizeof(attributes.front());
|
||||||
|
|
||||||
@ -371,14 +369,25 @@ void RangeHashedDictionary::calculateBytesAllocated()
|
|||||||
{
|
{
|
||||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||||
using AttributeType = typename Type::AttributeType;
|
using AttributeType = typename Type::AttributeType;
|
||||||
addAttributeSize<AttributeType>(attribute);
|
using ValueType = DictionaryValueType<AttributeType>;
|
||||||
|
|
||||||
|
const auto & collection = std::get<CollectionType<ValueType>>(attribute.maps);
|
||||||
|
bytes_allocated += sizeof(CollectionType<ValueType>) + collection.getBufferSizeInBytes();
|
||||||
|
bucket_count = collection.getBufferSizeInCells();
|
||||||
|
|
||||||
|
if constexpr (std::is_same_v<ValueType, StringRef>)
|
||||||
|
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
||||||
};
|
};
|
||||||
|
|
||||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if constexpr (dictionary_key_type == DictionaryKeyType::complex)
|
||||||
|
bytes_allocated += complex_key_arena.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
RangeHashedDictionary::Attribute RangeHashedDictionary::createAttribute(const DictionaryAttribute & dictionary_attribute)
|
template <DictionaryKeyType dictionary_key_type>
|
||||||
|
typename RangeHashedDictionary<dictionary_key_type>::Attribute RangeHashedDictionary<dictionary_key_type>::createAttribute(const DictionaryAttribute & dictionary_attribute)
|
||||||
{
|
{
|
||||||
Attribute attribute{dictionary_attribute.underlying_type, dictionary_attribute.is_nullable, {}, {}};
|
Attribute attribute{dictionary_attribute.underlying_type, dictionary_attribute.is_nullable, {}, {}};
|
||||||
|
|
||||||
@ -391,7 +400,7 @@ RangeHashedDictionary::Attribute RangeHashedDictionary::createAttribute(const Di
|
|||||||
if constexpr (std::is_same_v<AttributeType, String>)
|
if constexpr (std::is_same_v<AttributeType, String>)
|
||||||
attribute.string_arena = std::make_unique<Arena>();
|
attribute.string_arena = std::make_unique<Arena>();
|
||||||
|
|
||||||
attribute.maps = std::make_unique<Collection<ValueType>>();
|
attribute.maps = CollectionType<ValueType>();
|
||||||
};
|
};
|
||||||
|
|
||||||
callOnDictionaryAttributeType(dictionary_attribute.underlying_type, type_call);
|
callOnDictionaryAttributeType(dictionary_attribute.underlying_type, type_call);
|
||||||
@ -399,29 +408,35 @@ RangeHashedDictionary::Attribute RangeHashedDictionary::createAttribute(const Di
|
|||||||
return attribute;
|
return attribute;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <DictionaryKeyType dictionary_key_type>
|
||||||
template <typename AttributeType, bool is_nullable, typename ValueSetter, typename DefaultValueExtractor>
|
template <typename AttributeType, bool is_nullable, typename ValueSetter, typename DefaultValueExtractor>
|
||||||
void RangeHashedDictionary::getItemsImpl(
|
void RangeHashedDictionary<dictionary_key_type>::getItemsImpl(
|
||||||
const Attribute & attribute,
|
const Attribute & attribute,
|
||||||
const Columns & key_columns,
|
const Columns & key_columns,
|
||||||
ValueSetter && set_value,
|
ValueSetter && set_value,
|
||||||
DefaultValueExtractor & default_value_extractor) const
|
DefaultValueExtractor & default_value_extractor) const
|
||||||
{
|
{
|
||||||
PaddedPODArray<UInt64> key_backup_storage;
|
const auto & collection = std::get<CollectionType<AttributeType>>(attribute.maps);
|
||||||
PaddedPODArray<RangeStorageType> range_backup_storage;
|
|
||||||
|
|
||||||
const PaddedPODArray<UInt64> & ids = getColumnVectorData(this, key_columns[0], key_backup_storage);
|
|
||||||
const PaddedPODArray<RangeStorageType> & dates = getColumnVectorData(this, key_columns[1], range_backup_storage);
|
|
||||||
|
|
||||||
const auto & attr = *std::get<Ptr<AttributeType>>(attribute.maps);
|
|
||||||
|
|
||||||
size_t keys_found = 0;
|
size_t keys_found = 0;
|
||||||
|
|
||||||
for (const auto row : collections::range(0, ids.size()))
|
PaddedPODArray<RangeStorageType> range_backup_storage;
|
||||||
|
const auto & dates = getColumnVectorData(this, key_columns.back(), range_backup_storage);
|
||||||
|
|
||||||
|
auto key_columns_copy = key_columns;
|
||||||
|
key_columns_copy.pop_back();
|
||||||
|
DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
|
||||||
|
DictionaryKeysExtractor<dictionary_key_type> keys_extractor(key_columns_copy, arena_holder.getComplexKeyArena());
|
||||||
|
const size_t keys_size = keys_extractor.getKeysSize();
|
||||||
|
|
||||||
|
for (size_t key_index = 0; key_index < keys_size; ++key_index)
|
||||||
{
|
{
|
||||||
const auto it = attr.find(ids[row]);
|
auto key = keys_extractor.extractCurrentKey();
|
||||||
|
const auto it = collection.find(key);
|
||||||
|
|
||||||
if (it)
|
if (it)
|
||||||
{
|
{
|
||||||
const auto date = dates[row];
|
const auto date = dates[key_index];
|
||||||
const auto & ranges_and_values = it->getMapped();
|
const auto & ranges_and_values = it->getMapped();
|
||||||
const auto val_it = std::find_if(
|
const auto val_it = std::find_if(
|
||||||
std::begin(ranges_and_values),
|
std::begin(ranges_and_values),
|
||||||
@ -439,35 +454,38 @@ void RangeHashedDictionary::getItemsImpl(
|
|||||||
if constexpr (is_nullable)
|
if constexpr (is_nullable)
|
||||||
{
|
{
|
||||||
if (value.has_value())
|
if (value.has_value())
|
||||||
set_value(row, *value, false);
|
set_value(key_index, *value, false);
|
||||||
else
|
else
|
||||||
set_value(row, default_value_extractor[row], true);
|
set_value(key_index, default_value_extractor[key_index], true);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
set_value(row, *value, false);
|
set_value(key_index, *value, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
keys_extractor.rollbackCurrentKey();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if constexpr (is_nullable)
|
if constexpr (is_nullable)
|
||||||
set_value(row, default_value_extractor[row], default_value_extractor.isNullAt(row));
|
set_value(key_index, default_value_extractor[key_index], default_value_extractor.isNullAt(key_index));
|
||||||
else
|
else
|
||||||
set_value(row, default_value_extractor[row], false);
|
set_value(key_index, default_value_extractor[key_index], false);
|
||||||
|
|
||||||
|
keys_extractor.rollbackCurrentKey();
|
||||||
}
|
}
|
||||||
|
|
||||||
query_count.fetch_add(ids.size(), std::memory_order_relaxed);
|
query_count.fetch_add(keys_size, std::memory_order_relaxed);
|
||||||
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <DictionaryKeyType dictionary_key_type>
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const UInt64 id, const Range & range, const Field & value)
|
void RangeHashedDictionary<dictionary_key_type>::setAttributeValueImpl(Attribute & attribute, KeyType key, const Range & range, const Field & value)
|
||||||
{
|
{
|
||||||
using ValueType = std::conditional_t<std::is_same_v<T, String>, StringRef, T>;
|
using ValueType = std::conditional_t<std::is_same_v<T, String>, StringRef, T>;
|
||||||
auto & map = *std::get<Ptr<ValueType>>(attribute.maps);
|
auto & collection = std::get<CollectionType<ValueType>>(attribute.maps);
|
||||||
|
|
||||||
Value<ValueType> value_to_insert;
|
Value<ValueType> value_to_insert;
|
||||||
|
|
||||||
@ -490,61 +508,47 @@ void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const U
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto it = map.find(id);
|
const auto it = collection.find(key);
|
||||||
|
|
||||||
if (it)
|
if (it)
|
||||||
{
|
{
|
||||||
auto & values = it->getMapped();
|
auto & values = it->getMapped();
|
||||||
|
|
||||||
const auto insert_it
|
const auto insert_it = std::lower_bound(
|
||||||
= std::lower_bound(std::begin(values), std::end(values), range, [](const Value<ValueType> & lhs, const Range & rhs_range)
|
std::begin(values),
|
||||||
{
|
std::end(values),
|
||||||
return lhs.range < rhs_range;
|
range,
|
||||||
});
|
[](const Value<ValueType> & lhs, const Range & rhs_range)
|
||||||
|
{
|
||||||
|
return lhs.range < rhs_range;
|
||||||
|
});
|
||||||
|
|
||||||
values.insert(insert_it, std::move(value_to_insert));
|
values.insert(insert_it, std::move(value_to_insert));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
map.insert({id, Values<ValueType>{std::move(value_to_insert)}});
|
{
|
||||||
|
collection.insert({key, Values<ValueType>{std::move(value_to_insert)}});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RangeHashedDictionary::setAttributeValue(Attribute & attribute, const UInt64 id, const Range & range, const Field & value)
|
template <DictionaryKeyType dictionary_key_type>
|
||||||
|
void RangeHashedDictionary<dictionary_key_type>::setAttributeValue(Attribute & attribute, KeyType key, const Range & range, const Field & value)
|
||||||
{
|
{
|
||||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||||
{
|
{
|
||||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||||
using AttributeType = typename Type::AttributeType;
|
using AttributeType = typename Type::AttributeType;
|
||||||
|
|
||||||
setAttributeValueImpl<AttributeType>(attribute, id, range, value);
|
setAttributeValueImpl<AttributeType>(attribute, key, range, value);
|
||||||
};
|
};
|
||||||
|
|
||||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||||
}
|
}
|
||||||
|
|
||||||
const RangeHashedDictionary::Attribute & RangeHashedDictionary::getAttribute(const std::string & attribute_name) const
|
template <DictionaryKeyType dictionary_key_type>
|
||||||
{
|
|
||||||
const auto it = attribute_index_by_name.find(attribute_name);
|
|
||||||
if (it == std::end(attribute_index_by_name))
|
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "{}: no such attribute '{}'", full_name, attribute_name);
|
|
||||||
|
|
||||||
return attributes[it->second];
|
|
||||||
}
|
|
||||||
|
|
||||||
const RangeHashedDictionary::Attribute &
|
|
||||||
RangeHashedDictionary::getAttributeWithType(const std::string & attribute_name, const AttributeUnderlyingType type) const
|
|
||||||
{
|
|
||||||
const auto & attribute = getAttribute(attribute_name);
|
|
||||||
if (attribute.type != type)
|
|
||||||
throw Exception(ErrorCodes::TYPE_MISMATCH, "attribute {} has type {}",
|
|
||||||
attribute_name,
|
|
||||||
toString(attribute.type));
|
|
||||||
|
|
||||||
return attribute;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename RangeType>
|
template <typename RangeType>
|
||||||
void RangeHashedDictionary::getIdsAndDates(
|
void RangeHashedDictionary<dictionary_key_type>::getKeysAndDates(
|
||||||
PaddedPODArray<UInt64> & ids,
|
PaddedPODArray<KeyType> & keys,
|
||||||
PaddedPODArray<RangeType> & start_dates,
|
PaddedPODArray<RangeType> & start_dates,
|
||||||
PaddedPODArray<RangeType> & end_dates) const
|
PaddedPODArray<RangeType> & end_dates) const
|
||||||
{
|
{
|
||||||
@ -556,32 +560,33 @@ void RangeHashedDictionary::getIdsAndDates(
|
|||||||
using AttributeType = typename Type::AttributeType;
|
using AttributeType = typename Type::AttributeType;
|
||||||
using ValueType = DictionaryValueType<AttributeType>;
|
using ValueType = DictionaryValueType<AttributeType>;
|
||||||
|
|
||||||
getIdsAndDates<ValueType>(attribute, ids, start_dates, end_dates);
|
getKeysAndDates<ValueType>(attribute, keys, start_dates, end_dates);
|
||||||
};
|
};
|
||||||
|
|
||||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <DictionaryKeyType dictionary_key_type>
|
||||||
template <typename T, typename RangeType>
|
template <typename T, typename RangeType>
|
||||||
void RangeHashedDictionary::getIdsAndDates(
|
void RangeHashedDictionary<dictionary_key_type>::getKeysAndDates(
|
||||||
const Attribute & attribute,
|
const Attribute & attribute,
|
||||||
PaddedPODArray<UInt64> & ids,
|
PaddedPODArray<KeyType> & keys,
|
||||||
PaddedPODArray<RangeType> & start_dates,
|
PaddedPODArray<RangeType> & start_dates,
|
||||||
PaddedPODArray<RangeType> & end_dates) const
|
PaddedPODArray<RangeType> & end_dates) const
|
||||||
{
|
{
|
||||||
const HashMap<UInt64, Values<T>> & attr = *std::get<Ptr<T>>(attribute.maps);
|
const auto & collection = std::get<CollectionType<T>>(attribute.maps);
|
||||||
|
|
||||||
ids.reserve(attr.size());
|
keys.reserve(collection.size());
|
||||||
start_dates.reserve(attr.size());
|
start_dates.reserve(collection.size());
|
||||||
end_dates.reserve(attr.size());
|
end_dates.reserve(collection.size());
|
||||||
|
|
||||||
const bool is_date = isDate(dict_struct.range_min->type);
|
const bool is_date = isDate(dict_struct.range_min->type);
|
||||||
|
|
||||||
for (const auto & key : attr)
|
for (const auto & key : collection)
|
||||||
{
|
{
|
||||||
for (const auto & value : key.getMapped())
|
for (const auto & value : key.getMapped())
|
||||||
{
|
{
|
||||||
ids.push_back(key.getKey());
|
keys.push_back(key.getKey());
|
||||||
start_dates.push_back(value.range.left);
|
start_dates.push_back(value.range.left);
|
||||||
end_dates.push_back(value.range.right);
|
end_dates.push_back(value.range.right);
|
||||||
|
|
||||||
@ -592,22 +597,23 @@ void RangeHashedDictionary::getIdsAndDates(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <DictionaryKeyType dictionary_key_type>
|
||||||
template <typename RangeType>
|
template <typename RangeType>
|
||||||
Pipe RangeHashedDictionary::readImpl(const Names & column_names, size_t max_block_size) const
|
Pipe RangeHashedDictionary<dictionary_key_type>::readImpl(const Names & column_names, size_t max_block_size) const
|
||||||
{
|
{
|
||||||
PaddedPODArray<UInt64> ids;
|
PaddedPODArray<KeyType> keys;
|
||||||
PaddedPODArray<RangeType> start_dates;
|
PaddedPODArray<RangeType> start_dates;
|
||||||
PaddedPODArray<RangeType> end_dates;
|
PaddedPODArray<RangeType> end_dates;
|
||||||
getIdsAndDates(ids, start_dates, end_dates);
|
getKeysAndDates(keys, start_dates, end_dates);
|
||||||
|
|
||||||
using RangeDictionarySourceType = RangeDictionarySource<RangeType>;
|
static constexpr RangeDictionaryType range_dictionary_type = (dictionary_key_type == DictionaryKeyType::simple) ? RangeDictionaryType::simple : RangeDictionaryType::complex;
|
||||||
|
using RangeDictionarySourceType = RangeDictionarySource<range_dictionary_type, RangeType>;
|
||||||
|
|
||||||
auto source = std::make_shared<RangeDictionarySourceType>(
|
auto source = std::make_shared<RangeDictionarySourceType>(
|
||||||
RangeDictionarySourceData<RangeType>(
|
RangeDictionarySourceData<range_dictionary_type, RangeType>(
|
||||||
shared_from_this(),
|
shared_from_this(),
|
||||||
column_names,
|
column_names,
|
||||||
std::move(ids),
|
std::move(keys),
|
||||||
std::move(start_dates),
|
std::move(start_dates),
|
||||||
std::move(end_dates)),
|
std::move(end_dates)),
|
||||||
max_block_size);
|
max_block_size);
|
||||||
@ -615,10 +621,21 @@ Pipe RangeHashedDictionary::readImpl(const Names & column_names, size_t max_bloc
|
|||||||
return Pipe(source);
|
return Pipe(source);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <DictionaryKeyType dictionary_key_type>
|
||||||
|
StringRef RangeHashedDictionary<dictionary_key_type>::copyKeyInArena(StringRef key)
|
||||||
|
{
|
||||||
|
size_t key_size = key.size;
|
||||||
|
char * place_for_key = complex_key_arena.alloc(key_size);
|
||||||
|
memcpy(reinterpret_cast<void *>(place_for_key), reinterpret_cast<const void *>(key.data), key_size);
|
||||||
|
StringRef updated_key{place_for_key, key_size};
|
||||||
|
return updated_key;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <DictionaryKeyType dictionary_key_type>
|
||||||
struct RangeHashedDictionaryCallGetSourceImpl
|
struct RangeHashedDictionaryCallGetSourceImpl
|
||||||
{
|
{
|
||||||
Pipe pipe;
|
Pipe pipe;
|
||||||
const RangeHashedDictionary * dict;
|
const RangeHashedDictionary<dictionary_key_type> * dict;
|
||||||
const Names * column_names;
|
const Names * column_names;
|
||||||
size_t max_block_size;
|
size_t max_block_size;
|
||||||
|
|
||||||
@ -627,15 +644,16 @@ struct RangeHashedDictionaryCallGetSourceImpl
|
|||||||
{
|
{
|
||||||
const auto & type = dict->dict_struct.range_min->type;
|
const auto & type = dict->dict_struct.range_min->type;
|
||||||
if (pipe.empty() && dynamic_cast<const DataTypeNumberBase<RangeType> *>(type.get()))
|
if (pipe.empty() && dynamic_cast<const DataTypeNumberBase<RangeType> *>(type.get()))
|
||||||
pipe = dict->readImpl<RangeType>(*column_names, max_block_size);
|
pipe = dict->template readImpl<RangeType>(*column_names, max_block_size);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
Pipe RangeHashedDictionary::read(const Names & column_names, size_t max_block_size) const
|
template <DictionaryKeyType dictionary_key_type>
|
||||||
|
Pipe RangeHashedDictionary<dictionary_key_type>::read(const Names & column_names, size_t max_block_size) const
|
||||||
{
|
{
|
||||||
using ListType = TypeList<UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Int128, Float32, Float64>;
|
using ListType = TypeList<UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Int128, Float32, Float64>;
|
||||||
|
|
||||||
RangeHashedDictionaryCallGetSourceImpl callable;
|
RangeHashedDictionaryCallGetSourceImpl<dictionary_key_type> callable;
|
||||||
callable.dict = this;
|
callable.dict = this;
|
||||||
callable.column_names = &column_names;
|
callable.column_names = &column_names;
|
||||||
callable.max_block_size = max_block_size;
|
callable.max_block_size = max_block_size;
|
||||||
@ -653,7 +671,7 @@ Pipe RangeHashedDictionary::read(const Names & column_names, size_t max_block_si
|
|||||||
|
|
||||||
void registerDictionaryRangeHashed(DictionaryFactory & factory)
|
void registerDictionaryRangeHashed(DictionaryFactory & factory)
|
||||||
{
|
{
|
||||||
auto create_layout = [=](const std::string & full_name,
|
auto create_layout_simple = [=](const std::string & full_name,
|
||||||
const DictionaryStructure & dict_struct,
|
const DictionaryStructure & dict_struct,
|
||||||
const Poco::Util::AbstractConfiguration & config,
|
const Poco::Util::AbstractConfiguration & config,
|
||||||
const std::string & config_prefix,
|
const std::string & config_prefix,
|
||||||
@ -672,9 +690,32 @@ void registerDictionaryRangeHashed(DictionaryFactory & factory)
|
|||||||
const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix);
|
const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix);
|
||||||
const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
|
const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
|
||||||
const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
|
const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
|
||||||
return std::make_unique<RangeHashedDictionary>(dict_id, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty);
|
return std::make_unique<RangeHashedDictionary<DictionaryKeyType::simple>>(dict_id, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty);
|
||||||
};
|
};
|
||||||
factory.registerLayout("range_hashed", create_layout, false);
|
factory.registerLayout("range_hashed", create_layout_simple, false);
|
||||||
|
|
||||||
|
auto create_layout_complex = [=](const std::string & full_name,
|
||||||
|
const DictionaryStructure & dict_struct,
|
||||||
|
const Poco::Util::AbstractConfiguration & config,
|
||||||
|
const std::string & config_prefix,
|
||||||
|
DictionarySourcePtr source_ptr,
|
||||||
|
ContextPtr /* context */,
|
||||||
|
bool /*created_from_ddl*/) -> DictionaryPtr
|
||||||
|
{
|
||||||
|
if (dict_struct.id)
|
||||||
|
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "'id' is not supported for dictionary of layout 'complex_key_range_hashed'");
|
||||||
|
|
||||||
|
if (!dict_struct.range_min || !dict_struct.range_max)
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||||
|
"{}: dictionary of layout 'complex_key_range_hashed' requires .structure.range_min and .structure.range_max",
|
||||||
|
full_name);
|
||||||
|
|
||||||
|
const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix);
|
||||||
|
const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
|
||||||
|
const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
|
||||||
|
return std::make_unique<RangeHashedDictionary<DictionaryKeyType::complex>>(dict_id, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty);
|
||||||
|
};
|
||||||
|
factory.registerLayout("complex_key_range_hashed", create_layout_complex, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -16,9 +16,25 @@
|
|||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
|
using RangeStorageType = Int64;
|
||||||
|
|
||||||
|
struct Range
|
||||||
|
{
|
||||||
|
RangeStorageType left;
|
||||||
|
RangeStorageType right;
|
||||||
|
|
||||||
|
static bool isCorrectDate(const RangeStorageType & date);
|
||||||
|
bool contains(const RangeStorageType & value) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <DictionaryKeyType dictionary_key_type>
|
||||||
class RangeHashedDictionary final : public IDictionary
|
class RangeHashedDictionary final : public IDictionary
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
using KeyType = std::conditional_t<dictionary_key_type == DictionaryKeyType::simple, UInt64, StringRef>;
|
||||||
|
static_assert(dictionary_key_type != DictionaryKeyType::range, "Range key type is not supported by hashed dictionary");
|
||||||
|
|
||||||
RangeHashedDictionary(
|
RangeHashedDictionary(
|
||||||
const StorageID & dict_id_,
|
const StorageID & dict_id_,
|
||||||
const DictionaryStructure & dict_struct_,
|
const DictionaryStructure & dict_struct_,
|
||||||
@ -59,7 +75,7 @@ public:
|
|||||||
|
|
||||||
bool isInjective(const std::string & attribute_name) const override
|
bool isInjective(const std::string & attribute_name) const override
|
||||||
{
|
{
|
||||||
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
|
return dict_struct.getAttribute(attribute_name).injective;
|
||||||
}
|
}
|
||||||
|
|
||||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::range; }
|
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::range; }
|
||||||
@ -73,19 +89,8 @@ public:
|
|||||||
|
|
||||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||||
|
|
||||||
using RangeStorageType = Int64;
|
|
||||||
|
|
||||||
Pipe read(const Names & column_names, size_t max_block_size) const override;
|
Pipe read(const Names & column_names, size_t max_block_size) const override;
|
||||||
|
|
||||||
struct Range
|
|
||||||
{
|
|
||||||
RangeStorageType left;
|
|
||||||
RangeStorageType right;
|
|
||||||
|
|
||||||
static bool isCorrectDate(const RangeStorageType & date);
|
|
||||||
bool contains(const RangeStorageType & value) const;
|
|
||||||
};
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
template <typename T>
|
template <typename T>
|
||||||
struct Value final
|
struct Value final
|
||||||
@ -96,10 +101,12 @@ private:
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
using Values = std::vector<Value<T>>;
|
using Values = std::vector<Value<T>>;
|
||||||
template <typename T>
|
|
||||||
using Collection = HashMap<UInt64, Values<T>>;
|
template <typename Value>
|
||||||
template <typename T>
|
using CollectionType = std::conditional_t<
|
||||||
using Ptr = std::unique_ptr<Collection<T>>;
|
dictionary_key_type == DictionaryKeyType::simple,
|
||||||
|
HashMap<UInt64, Values<Value>>,
|
||||||
|
HashMapWithSavedHash<StringRef, Values<Value>, DefaultHash<StringRef>>>;
|
||||||
|
|
||||||
struct Attribute final
|
struct Attribute final
|
||||||
{
|
{
|
||||||
@ -108,27 +115,27 @@ private:
|
|||||||
bool is_nullable;
|
bool is_nullable;
|
||||||
|
|
||||||
std::variant<
|
std::variant<
|
||||||
Ptr<UInt8>,
|
CollectionType<UInt8>,
|
||||||
Ptr<UInt16>,
|
CollectionType<UInt16>,
|
||||||
Ptr<UInt32>,
|
CollectionType<UInt32>,
|
||||||
Ptr<UInt64>,
|
CollectionType<UInt64>,
|
||||||
Ptr<UInt128>,
|
CollectionType<UInt128>,
|
||||||
Ptr<UInt256>,
|
CollectionType<UInt256>,
|
||||||
Ptr<Int8>,
|
CollectionType<Int8>,
|
||||||
Ptr<Int16>,
|
CollectionType<Int16>,
|
||||||
Ptr<Int32>,
|
CollectionType<Int32>,
|
||||||
Ptr<Int64>,
|
CollectionType<Int64>,
|
||||||
Ptr<Int128>,
|
CollectionType<Int128>,
|
||||||
Ptr<Int256>,
|
CollectionType<Int256>,
|
||||||
Ptr<Decimal32>,
|
CollectionType<Decimal32>,
|
||||||
Ptr<Decimal64>,
|
CollectionType<Decimal64>,
|
||||||
Ptr<Decimal128>,
|
CollectionType<Decimal128>,
|
||||||
Ptr<Decimal256>,
|
CollectionType<Decimal256>,
|
||||||
Ptr<Float32>,
|
CollectionType<Float32>,
|
||||||
Ptr<Float64>,
|
CollectionType<Float64>,
|
||||||
Ptr<UUID>,
|
CollectionType<UUID>,
|
||||||
Ptr<StringRef>,
|
CollectionType<StringRef>,
|
||||||
Ptr<Array>>
|
CollectionType<Array>>
|
||||||
maps;
|
maps;
|
||||||
std::unique_ptr<Arena> string_arena;
|
std::unique_ptr<Arena> string_arena;
|
||||||
};
|
};
|
||||||
@ -137,9 +144,6 @@ private:
|
|||||||
|
|
||||||
void loadData();
|
void loadData();
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void addAttributeSize(const Attribute & attribute);
|
|
||||||
|
|
||||||
void calculateBytesAllocated();
|
void calculateBytesAllocated();
|
||||||
|
|
||||||
static Attribute createAttribute(const DictionaryAttribute & dictionary_attribute);
|
static Attribute createAttribute(const DictionaryAttribute & dictionary_attribute);
|
||||||
@ -151,35 +155,30 @@ private:
|
|||||||
ValueSetter && set_value,
|
ValueSetter && set_value,
|
||||||
DefaultValueExtractor & default_value_extractor) const;
|
DefaultValueExtractor & default_value_extractor) const;
|
||||||
|
|
||||||
template <typename AttributeType>
|
|
||||||
ColumnUInt8::Ptr hasKeysImpl(
|
|
||||||
const Attribute & attribute,
|
|
||||||
const PaddedPODArray<UInt64> & ids,
|
|
||||||
const PaddedPODArray<RangeStorageType> & dates,
|
|
||||||
size_t & keys_found) const;
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static void setAttributeValueImpl(Attribute & attribute, const UInt64 id, const Range & range, const Field & value);
|
static void setAttributeValueImpl(Attribute & attribute, KeyType key, const Range & range, const Field & value);
|
||||||
|
|
||||||
static void setAttributeValue(Attribute & attribute, const UInt64 id, const Range & range, const Field & value);
|
static void setAttributeValue(Attribute & attribute, KeyType key, const Range & range, const Field & value);
|
||||||
|
|
||||||
const Attribute & getAttribute(const std::string & attribute_name) const;
|
|
||||||
|
|
||||||
const Attribute & getAttributeWithType(const std::string & name, const AttributeUnderlyingType type) const;
|
|
||||||
|
|
||||||
template <typename RangeType>
|
template <typename RangeType>
|
||||||
void getIdsAndDates(PaddedPODArray<UInt64> & ids, PaddedPODArray<RangeType> & start_dates, PaddedPODArray<RangeType> & end_dates) const;
|
void getKeysAndDates(
|
||||||
|
PaddedPODArray<KeyType> & keys,
|
||||||
|
PaddedPODArray<RangeType> & start_dates,
|
||||||
|
PaddedPODArray<RangeType> & end_dates) const;
|
||||||
|
|
||||||
template <typename T, typename RangeType>
|
template <typename T, typename RangeType>
|
||||||
void getIdsAndDates(
|
void getKeysAndDates(
|
||||||
const Attribute & attribute,
|
const Attribute & attribute,
|
||||||
PaddedPODArray<UInt64> & ids,
|
PaddedPODArray<KeyType> & keys,
|
||||||
PaddedPODArray<RangeType> & start_dates,
|
PaddedPODArray<RangeType> & start_dates,
|
||||||
PaddedPODArray<RangeType> & end_dates) const;
|
PaddedPODArray<RangeType> & end_dates) const;
|
||||||
|
|
||||||
template <typename RangeType>
|
template <typename RangeType>
|
||||||
Pipe readImpl(const Names & column_names, size_t max_block_size) const;
|
Pipe readImpl(const Names & column_names, size_t max_block_size) const;
|
||||||
|
|
||||||
|
StringRef copyKeyInArena(StringRef key);
|
||||||
|
|
||||||
|
template <DictionaryKeyType>
|
||||||
friend struct RangeHashedDictionaryCallGetSourceImpl;
|
friend struct RangeHashedDictionaryCallGetSourceImpl;
|
||||||
|
|
||||||
const DictionaryStructure dict_struct;
|
const DictionaryStructure dict_struct;
|
||||||
@ -189,6 +188,7 @@ private:
|
|||||||
|
|
||||||
std::map<std::string, size_t> attribute_index_by_name;
|
std::map<std::string, size_t> attribute_index_by_name;
|
||||||
std::vector<Attribute> attributes;
|
std::vector<Attribute> attributes;
|
||||||
|
Arena complex_key_arena;
|
||||||
|
|
||||||
size_t bytes_allocated = 0;
|
size_t bytes_allocated = 0;
|
||||||
size_t element_count = 0;
|
size_t element_count = 0;
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
using DictionaryConfigurationPtr = Poco::AutoPtr<Poco::Util::AbstractConfiguration>;
|
using DictionaryConfigurationPtr = Poco::AutoPtr<Poco::Util::AbstractConfiguration>;
|
||||||
|
|
||||||
/// Convert dictionary AST to Poco::AbstractConfiguration
|
/// Convert dictionary AST to Poco::AbstractConfiguration
|
||||||
@ -13,4 +14,5 @@ using DictionaryConfigurationPtr = Poco::AutoPtr<Poco::Util::AbstractConfigurati
|
|||||||
/// Can throw exception if query is ill-formed
|
/// Can throw exception if query is ill-formed
|
||||||
DictionaryConfigurationPtr
|
DictionaryConfigurationPtr
|
||||||
getDictionaryConfigurationFromAST(const ASTCreateQuery & query, ContextPtr context, const std::string & database_ = "");
|
getDictionaryConfigurationFromAST(const ASTCreateQuery & query, ContextPtr context, const std::string & database_ = "");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -250,12 +250,23 @@ public:
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (!WhichDataType(key_column_type).isUInt64())
|
/// Functions in external dictionaries_loader only support full-value (not constant) columns with keys.
|
||||||
throw Exception(
|
ColumnPtr key_column = key_column_with_type.column->convertToFullColumnIfConst();
|
||||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
DataTypePtr key_column_type = key_column_with_type.type;
|
||||||
"Second argument of function {} must be UInt64 when dictionary is range. Actual type {}.",
|
|
||||||
getName(),
|
Columns key_columns;
|
||||||
key_column_with_type.type->getName());
|
DataTypes key_types;
|
||||||
|
|
||||||
|
if (isTuple(key_column_type))
|
||||||
|
{
|
||||||
|
key_columns = assert_cast<const ColumnTuple &>(*key_column).getColumnsCopy();
|
||||||
|
key_types = assert_cast<const DataTypeTuple &>(*key_column_type).getElements();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
key_columns = {key_column, range_col};
|
||||||
|
key_types = {std::make_shared<DataTypeUInt64>(), range_col_type};
|
||||||
|
}
|
||||||
|
|
||||||
return dictionary->hasKeys({key_column, range_col}, {std::make_shared<DataTypeUInt64>(), range_col_type});
|
return dictionary->hasKeys({key_column, range_col}, {std::make_shared<DataTypeUInt64>(), range_col_type});
|
||||||
}
|
}
|
||||||
@ -487,18 +498,29 @@ public:
|
|||||||
}
|
}
|
||||||
else if (dictionary_key_type == DictionaryKeyType::range)
|
else if (dictionary_key_type == DictionaryKeyType::range)
|
||||||
{
|
{
|
||||||
if (!WhichDataType(key_col_with_type.type).isUInt64())
|
/// Functions in external dictionaries_loader only support full-value (not constant) columns with keys.
|
||||||
throw Exception(
|
ColumnPtr key_column = key_col_with_type.column->convertToFullColumnIfConst();
|
||||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
DataTypePtr key_column_type = key_col_with_type.type;
|
||||||
"Third argument of function {} must be UInt64 when dictionary is range. Actual type {}.",
|
|
||||||
getName(),
|
Columns key_columns;
|
||||||
key_col_with_type.type->getName());
|
DataTypes key_types;
|
||||||
|
|
||||||
|
if (isTuple(key_column_type))
|
||||||
|
{
|
||||||
|
key_columns = assert_cast<const ColumnTuple &>(*key_column).getColumnsCopy();
|
||||||
|
key_types = assert_cast<const DataTypeTuple &>(*key_column_type).getElements();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
key_columns = {key_column, range_col};
|
||||||
|
key_types = {std::make_shared<DataTypeUInt64>(), range_col_type};
|
||||||
|
}
|
||||||
|
|
||||||
result = executeDictionaryRequest(
|
result = executeDictionaryRequest(
|
||||||
dictionary,
|
dictionary,
|
||||||
attribute_names,
|
attribute_names,
|
||||||
{key_column, range_col},
|
key_columns,
|
||||||
{std::make_shared<DataTypeUInt64>(), range_col_type},
|
key_types,
|
||||||
result_type,
|
result_type,
|
||||||
default_cols);
|
default_cols);
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,58 @@
|
|||||||
|
Dictionary not nullable
|
||||||
|
dictGet
|
||||||
|
0.2
|
||||||
|
0.2
|
||||||
|
0.2
|
||||||
|
0.2
|
||||||
|
0.4
|
||||||
|
dictHas
|
||||||
|
1
|
||||||
|
1
|
||||||
|
1
|
||||||
|
0
|
||||||
|
select columns from dictionary
|
||||||
|
allColumns
|
||||||
|
2019-05-05 2019-05-20 1 1 0.33
|
||||||
|
2019-05-21 2019-05-30 1 1 0.42
|
||||||
|
2019-05-21 2019-05-30 2 2 0.46
|
||||||
|
noColumns
|
||||||
|
1
|
||||||
|
1
|
||||||
|
1
|
||||||
|
onlySpecificColumns
|
||||||
|
1 2019-05-05 0.33
|
||||||
|
1 2019-05-21 0.42
|
||||||
|
2 2019-05-21 0.46
|
||||||
|
onlySpecificColumn
|
||||||
|
0.33
|
||||||
|
0.42
|
||||||
|
0.46
|
||||||
|
Dictionary nullable
|
||||||
|
dictGet
|
||||||
|
0.2
|
||||||
|
0.2
|
||||||
|
0.2
|
||||||
|
0.2
|
||||||
|
0.4
|
||||||
|
dictHas
|
||||||
|
1
|
||||||
|
1
|
||||||
|
1
|
||||||
|
0
|
||||||
|
select columns from dictionary
|
||||||
|
allColumns
|
||||||
|
2019-05-05 2019-05-20 1 1 0.33
|
||||||
|
2019-05-21 2019-05-30 1 1 0.42
|
||||||
|
2019-05-21 2019-05-30 2 2 \N
|
||||||
|
noColumns
|
||||||
|
1
|
||||||
|
1
|
||||||
|
1
|
||||||
|
onlySpecificColumns
|
||||||
|
1 2019-05-05 0.33
|
||||||
|
1 2019-05-21 0.42
|
||||||
|
2 2019-05-21 \N
|
||||||
|
onlySpecificColumn
|
||||||
|
0.33
|
||||||
|
0.42
|
||||||
|
\N
|
@ -0,0 +1,109 @@
|
|||||||
|
DROP TABLE IF EXISTS date_table;
|
||||||
|
CREATE TABLE date_table
|
||||||
|
(
|
||||||
|
CountryID UInt64,
|
||||||
|
CountryKey String,
|
||||||
|
StartDate Date,
|
||||||
|
EndDate Date,
|
||||||
|
Tax Float64
|
||||||
|
)
|
||||||
|
ENGINE = MergeTree()
|
||||||
|
ORDER BY CountryID;
|
||||||
|
|
||||||
|
INSERT INTO date_table VALUES(1, '1', toDate('2019-05-05'), toDate('2019-05-20'), 0.33);
|
||||||
|
INSERT INTO date_table VALUES(1, '1', toDate('2019-05-21'), toDate('2019-05-30'), 0.42);
|
||||||
|
INSERT INTO date_table VALUES(2, '2', toDate('2019-05-21'), toDate('2019-05-30'), 0.46);
|
||||||
|
|
||||||
|
DROP DICTIONARY IF EXISTS range_dictionary;
|
||||||
|
CREATE DICTIONARY range_dictionary
|
||||||
|
(
|
||||||
|
CountryID UInt64,
|
||||||
|
CountryKey String,
|
||||||
|
StartDate Date,
|
||||||
|
EndDate Date,
|
||||||
|
Tax Float64 DEFAULT 0.2
|
||||||
|
)
|
||||||
|
PRIMARY KEY CountryID, CountryKey
|
||||||
|
SOURCE(CLICKHOUSE(TABLE 'date_table'))
|
||||||
|
LIFETIME(MIN 1 MAX 1000)
|
||||||
|
LAYOUT(COMPLEX_KEY_RANGE_HASHED())
|
||||||
|
RANGE(MIN StartDate MAX EndDate);
|
||||||
|
|
||||||
|
SELECT 'Dictionary not nullable';
|
||||||
|
SELECT 'dictGet';
|
||||||
|
SELECT dictGet('range_dictionary', 'Tax', (toUInt64(1), '1'), toDate('2019-05-15'));
|
||||||
|
SELECT dictGet('range_dictionary', 'Tax', (toUInt64(1), '1'), toDate('2019-05-29'));
|
||||||
|
SELECT dictGet('range_dictionary', 'Tax', (toUInt64(2), '2'), toDate('2019-05-29'));
|
||||||
|
SELECT dictGet('range_dictionary', 'Tax', (toUInt64(2), '2'), toDate('2019-05-31'));
|
||||||
|
SELECT dictGetOrDefault('range_dictionary', 'Tax', (toUInt64(2), '2'), toDate('2019-05-31'), 0.4);
|
||||||
|
SELECT 'dictHas';
|
||||||
|
SELECT dictHas('range_dictionary', (toUInt64(1), '1'), toDate('2019-05-15'));
|
||||||
|
SELECT dictHas('range_dictionary', (toUInt64(1), '1'), toDate('2019-05-29'));
|
||||||
|
SELECT dictHas('range_dictionary', (toUInt64(2), '2'), toDate('2019-05-29'));
|
||||||
|
SELECT dictHas('range_dictionary', (toUInt64(2), '2'), toDate('2019-05-31'));
|
||||||
|
SELECT 'select columns from dictionary';
|
||||||
|
SELECT 'allColumns';
|
||||||
|
SELECT * FROM range_dictionary;
|
||||||
|
SELECT 'noColumns';
|
||||||
|
SELECT 1 FROM range_dictionary;
|
||||||
|
SELECT 'onlySpecificColumns';
|
||||||
|
SELECT CountryID, StartDate, Tax FROM range_dictionary;
|
||||||
|
SELECT 'onlySpecificColumn';
|
||||||
|
SELECT Tax FROM range_dictionary;
|
||||||
|
|
||||||
|
DROP TABLE date_table;
|
||||||
|
DROP DICTIONARY range_dictionary;
|
||||||
|
|
||||||
|
CREATE TABLE date_table
|
||||||
|
(
|
||||||
|
CountryID UInt64,
|
||||||
|
CountryKey String,
|
||||||
|
StartDate Date,
|
||||||
|
EndDate Date,
|
||||||
|
Tax Nullable(Float64)
|
||||||
|
)
|
||||||
|
ENGINE = MergeTree()
|
||||||
|
ORDER BY CountryID;
|
||||||
|
|
||||||
|
INSERT INTO date_table VALUES(1, '1', toDate('2019-05-05'), toDate('2019-05-20'), 0.33);
|
||||||
|
INSERT INTO date_table VALUES(1, '1', toDate('2019-05-21'), toDate('2019-05-30'), 0.42);
|
||||||
|
INSERT INTO date_table VALUES(2, '2', toDate('2019-05-21'), toDate('2019-05-30'), NULL);
|
||||||
|
|
||||||
|
CREATE DICTIONARY range_dictionary_nullable
|
||||||
|
(
|
||||||
|
CountryID UInt64,
|
||||||
|
CountryKey String,
|
||||||
|
StartDate Date,
|
||||||
|
EndDate Date,
|
||||||
|
Tax Nullable(Float64) DEFAULT 0.2
|
||||||
|
)
|
||||||
|
PRIMARY KEY CountryID, CountryKey
|
||||||
|
SOURCE(CLICKHOUSE(TABLE 'date_table'))
|
||||||
|
LIFETIME(MIN 1 MAX 1000)
|
||||||
|
LAYOUT(COMPLEX_KEY_RANGE_HASHED())
|
||||||
|
RANGE(MIN StartDate MAX EndDate);
|
||||||
|
|
||||||
|
SELECT 'Dictionary nullable';
|
||||||
|
SELECT 'dictGet';
|
||||||
|
SELECT dictGet('range_dictionary_nullable', 'Tax', (toUInt64(1), '1'), toDate('2019-05-15'));
|
||||||
|
SELECT dictGet('range_dictionary_nullable', 'Tax', (toUInt64(1), '1'), toDate('2019-05-29'));
|
||||||
|
SELECT dictGet('range_dictionary_nullable', 'Tax', (toUInt64(2), '2'), toDate('2019-05-29'));
|
||||||
|
SELECT dictGet('range_dictionary_nullable', 'Tax', (toUInt64(2), '2'), toDate('2019-05-31'));
|
||||||
|
SELECT dictGetOrDefault('range_dictionary_nullable', 'Tax', (toUInt64(2), '2'), toDate('2019-05-31'), 0.4);
|
||||||
|
SELECT 'dictHas';
|
||||||
|
SELECT dictHas('range_dictionary_nullable', (toUInt64(1), '1'), toDate('2019-05-15'));
|
||||||
|
SELECT dictHas('range_dictionary_nullable', (toUInt64(1), '1'), toDate('2019-05-29'));
|
||||||
|
SELECT dictHas('range_dictionary_nullable', (toUInt64(2), '2'), toDate('2019-05-29'));
|
||||||
|
SELECT dictHas('range_dictionary_nullable', (toUInt64(2), '2'), toDate('2019-05-31'));
|
||||||
|
SELECT 'select columns from dictionary';
|
||||||
|
SELECT 'allColumns';
|
||||||
|
SELECT * FROM range_dictionary_nullable;
|
||||||
|
SELECT 'noColumns';
|
||||||
|
SELECT 1 FROM range_dictionary_nullable;
|
||||||
|
SELECT 'onlySpecificColumns';
|
||||||
|
SELECT CountryID, StartDate, Tax FROM range_dictionary_nullable;
|
||||||
|
SELECT 'onlySpecificColumn';
|
||||||
|
SELECT Tax FROM range_dictionary_nullable;
|
||||||
|
|
||||||
|
DROP TABLE date_table;
|
||||||
|
DROP DICTIONARY range_dictionary_nullable;
|
Loading…
Reference in New Issue
Block a user