Added ComplexKeyRangeHashed dictionary

This commit is contained in:
Maksim Kita 2021-08-13 00:39:20 +03:00
parent 40f5e06a8d
commit e12820ecb2
15 changed files with 731 additions and 394 deletions

View File

@ -0,0 +1,48 @@
#include "DictionaryHelpers.h"
namespace DB
{
MutableColumns deserializeColumnsFromKeys(
const DictionaryStructure & dictionary_structure,
const PaddedPODArray<StringRef> & keys,
size_t start,
size_t end)
{
MutableColumns result_columns;
result_columns.reserve(dictionary_structure.key->size());
for (const DictionaryAttribute & attribute : *dictionary_structure.key)
result_columns.emplace_back(attribute.type->createColumn());
for (size_t index = start; index < end; ++index)
{
const auto & key = keys[index];
const auto * ptr = key.data;
for (auto & result_column : result_columns)
ptr = result_column->deserializeAndInsertFromArena(ptr);
}
return result_columns;
}
ColumnsWithTypeAndName deserializeColumnsWithTypeAndNameFromKeys(
const DictionaryStructure & dictionary_structure,
const PaddedPODArray<StringRef> & keys,
size_t start,
size_t end)
{
ColumnsWithTypeAndName result;
MutableColumns columns = deserializeColumnsFromKeys(dictionary_structure, keys, start, end);
for (size_t i = 0, num_columns = columns.size(); i < num_columns; ++i)
{
const auto & dictionary_attribute = (*dictionary_structure.key)[i];
result.emplace_back(ColumnWithTypeAndName{std::move(columns[i]), dictionary_attribute.type, dictionary_attribute.name});
}
return result;
}
}

View File

@ -497,6 +497,20 @@ private:
Arena * complex_key_arena; Arena * complex_key_arena;
}; };
/// Deserialize columns from keys array using dictionary structure
MutableColumns deserializeColumnsFromKeys(
const DictionaryStructure & dictionary_structure,
const PaddedPODArray<StringRef> & keys,
size_t start,
size_t end);
/// Deserialize columns with type and name from keys array using dictionary structure
ColumnsWithTypeAndName deserializeColumnsWithTypeAndNameFromKeys(
const DictionaryStructure & dictionary_structure,
const PaddedPODArray<StringRef> & keys,
size_t start,
size_t end);
/** Merge block with blocks from stream. If there are duplicate keys in block they are filtered out. /** Merge block with blocks from stream. If there are duplicate keys in block they are filtered out.
* In result block_to_update will be merged with blocks from stream. * In result block_to_update will be merged with blocks from stream.
* Note: readPrefix readImpl readSuffix will be called on stream object during function execution. * Note: readPrefix readImpl readSuffix will be called on stream object during function execution.

View File

@ -29,7 +29,7 @@ DictionarySourceData::DictionarySourceData(
, key_type(DictionaryInputStreamKeyType::ComplexKey) , key_type(DictionaryInputStreamKeyType::ComplexKey)
{ {
const DictionaryStructure & dictionary_structure = dictionary->getStructure(); const DictionaryStructure & dictionary_structure = dictionary->getStructure();
fillKeyColumns(keys, 0, keys.size(), dictionary_structure, key_columns); key_columns = deserializeColumnsWithTypeAndNameFromKeys(dictionary_structure, keys, 0, keys.size());
} }
DictionarySourceData::DictionarySourceData( DictionarySourceData::DictionarySourceData(
@ -158,32 +158,4 @@ Block DictionarySourceData::fillBlock(
return Block(block_columns); return Block(block_columns);
} }
void DictionarySourceData::fillKeyColumns(
const PaddedPODArray<StringRef> & keys,
size_t start,
size_t size,
const DictionaryStructure & dictionary_structure,
ColumnsWithTypeAndName & result)
{
MutableColumns columns;
columns.reserve(dictionary_structure.key->size());
for (const DictionaryAttribute & attribute : *dictionary_structure.key)
columns.emplace_back(attribute.type->createColumn());
for (size_t index = start; index < size; ++index)
{
const auto & key = keys[index];
const auto *ptr = key.data;
for (auto & column : columns)
ptr = column->deserializeAndInsertFromArena(ptr);
}
for (size_t i = 0, num_columns = columns.size(); i < num_columns; ++i)
{
const auto & dictionary_attribute = (*dictionary_structure.key)[i];
result.emplace_back(ColumnWithTypeAndName{std::move(columns[i]), dictionary_attribute.type, dictionary_attribute.name});
}
}
} }

View File

@ -51,13 +51,6 @@ private:
const DataTypes & types, const DataTypes & types,
ColumnsWithTypeAndName && view) const; ColumnsWithTypeAndName && view) const;
static void fillKeyColumns(
const PaddedPODArray<StringRef> & keys,
size_t start,
size_t size,
const DictionaryStructure & dictionary_structure,
ColumnsWithTypeAndName & result);
const size_t num_rows; const size_t num_rows;
std::shared_ptr<const IDictionary> dictionary; std::shared_ptr<const IDictionary> dictionary;
std::unordered_set<std::string> column_names; std::unordered_set<std::string> column_names;

View File

@ -134,42 +134,11 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration
if (id->name.empty()) if (id->name.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "'id' cannot be empty"); throw Exception(ErrorCodes::BAD_ARGUMENTS, "'id' cannot be empty");
const char * range_default_type = "Date"; if (!id->expression.empty())
if (config.has(structure_prefix + ".range_min"))
range_min.emplace(makeDictionaryTypedSpecialAttribute(config, structure_prefix + ".range_min", range_default_type));
if (config.has(structure_prefix + ".range_max"))
range_max.emplace(makeDictionaryTypedSpecialAttribute(config, structure_prefix + ".range_max", range_default_type));
if (range_min.has_value() != range_max.has_value())
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Dictionary structure should have both 'range_min' and 'range_max' either specified or not.");
}
if (range_min && range_max && !range_min->type->equals(*range_max->type))
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Dictionary structure 'range_min' and 'range_max' should have same type, "
"'range_min' type: {},"
"'range_max' type: {}",
range_min->type->getName(),
range_max->type->getName());
}
if (range_min)
{
if (!range_min->type->isValueRepresentedByInteger())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Dictionary structure type of 'range_min' and 'range_max' should be an integer, Date, DateTime, or Enum."
" Actual 'range_min' and 'range_max' type is {}",
range_min->type->getName());
}
if (!id->expression.empty() || (range_min && !range_min->expression.empty()) || (range_max && !range_max->expression.empty()))
has_expressions = true; has_expressions = true;
} }
parseRangeConfiguration(config, structure_prefix);
attributes = getAttributes(config, structure_prefix, /*complex_key_attributes =*/ false); attributes = getAttributes(config, structure_prefix, /*complex_key_attributes =*/ false);
for (size_t i = 0; i < attributes.size(); ++i) for (size_t i = 0; i < attributes.size(); ++i)
@ -439,4 +408,42 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
return res_attributes; return res_attributes;
} }
void DictionaryStructure::parseRangeConfiguration(const Poco::Util::AbstractConfiguration & config, const std::string & structure_prefix)
{
const char * range_default_type = "Date";
if (config.has(structure_prefix + ".range_min"))
range_min.emplace(makeDictionaryTypedSpecialAttribute(config, structure_prefix + ".range_min", range_default_type));
if (config.has(structure_prefix + ".range_max"))
range_max.emplace(makeDictionaryTypedSpecialAttribute(config, structure_prefix + ".range_max", range_default_type));
if (range_min.has_value() != range_max.has_value())
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Dictionary structure should have both 'range_min' and 'range_max' either specified or not.");
}
if (range_min && range_max && !range_min->type->equals(*range_max->type))
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Dictionary structure 'range_min' and 'range_max' should have same type, "
"'range_min' type: {},"
"'range_max' type: {}",
range_min->type->getName(),
range_max->type->getName());
}
if (range_min)
{
if (!range_min->type->isValueRepresentedByInteger())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Dictionary structure type of 'range_min' and 'range_max' should be an integer, Date, DateTime, or Enum."
" Actual 'range_min' and 'range_max' type is {}",
range_min->type->getName());
}
if ((range_min && !range_min->expression.empty()) || (range_max && !range_max->expression.empty()))
has_expressions = true;
}
} }

View File

@ -67,8 +67,9 @@ using DictionaryLifetime = ExternalLoadableLifetime;
* - null_value, used as a default value for non-existent entries in the dictionary, * - null_value, used as a default value for non-existent entries in the dictionary,
* decimal representation for numeric attributes; * decimal representation for numeric attributes;
* - hierarchical, whether this attribute defines a hierarchy; * - hierarchical, whether this attribute defines a hierarchy;
* - injective, whether the mapping to parent is injective (can be used for optimization of GROUP BY?) * - injective, whether the mapping to parent is injective (can be used for optimization of GROUP BY?);
* - is_object_id, used in mongo dictionary, converts string key to objectid * - is_object_id, used in mongo dictionary, converts string key to objectid;
* - is_nullable, is attribute nullable;
*/ */
struct DictionaryAttribute final struct DictionaryAttribute final
{ {
@ -153,6 +154,10 @@ private:
const Poco::Util::AbstractConfiguration & config, const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix, const std::string & config_prefix,
bool complex_key_attributes); bool complex_key_attributes);
/// parse range_min and range_max
void parseRangeConfiguration(const Poco::Util::AbstractConfiguration & config, const std::string & structure_prefix);
}; };
} }

View File

@ -133,6 +133,29 @@ void ExternalQueryBuilder::composeLoadAllQuery(WriteBuffer & out) const
writeQuoted(key.name, out); writeQuoted(key.name, out);
} }
if (dict_struct.range_min && dict_struct.range_max)
{
writeString(", ", out);
if (!dict_struct.range_min->expression.empty())
{
writeParenthesisedString(dict_struct.range_min->expression, out);
writeString(" AS ", out);
}
writeQuoted(dict_struct.range_min->name, out);
writeString(", ", out);
if (!dict_struct.range_max->expression.empty())
{
writeParenthesisedString(dict_struct.range_max->expression, out);
writeString(" AS ", out);
}
writeQuoted(dict_struct.range_max->name, out);
}
} }
for (const auto & attr : dict_struct.attributes) for (const auto & attr : dict_struct.attributes)

View File

@ -64,7 +64,7 @@ public:
bool isInjective(const std::string & attribute_name) const override bool isInjective(const std::string & attribute_name) const override
{ {
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective; return dict_struct.getAttribute(attribute_name).injective;
} }
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; } DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }

View File

@ -14,170 +14,213 @@
namespace DB namespace DB
{ {
template <typename RangeType> enum class RangeDictionaryType
{
simple,
complex
};
template <RangeDictionaryType range_dictionary_type, typename RangeType>
class RangeDictionarySourceData class RangeDictionarySourceData
{ {
public: public:
using Key = UInt64;
using KeyType = std::conditional_t<range_dictionary_type == RangeDictionaryType::simple, UInt64, StringRef>;
RangeDictionarySourceData( RangeDictionarySourceData(
std::shared_ptr<const IDictionary> dictionary, std::shared_ptr<const IDictionary> dictionary,
const Names & column_names, const Names & column_names,
PaddedPODArray<Key> && ids_to_fill, PaddedPODArray<KeyType> && keys,
PaddedPODArray<RangeType> && start_dates, PaddedPODArray<RangeType> && start_dates,
PaddedPODArray<RangeType> && end_dates); PaddedPODArray<RangeType> && end_dates);
Block getBlock(size_t start, size_t length) const; Block getBlock(size_t start, size_t length) const;
size_t getNumRows() const { return ids.size(); } size_t getNumRows() const { return keys.size(); }
private: private:
Block fillBlock( Block fillBlock(
const PaddedPODArray<Key> & ids_to_fill, const PaddedPODArray<KeyType> & keys_to_fill,
const PaddedPODArray<RangeType> & block_start_dates, const PaddedPODArray<RangeType> & block_start_dates,
const PaddedPODArray<RangeType> & block_end_dates) const; const PaddedPODArray<RangeType> & block_end_dates,
size_t start,
size_t end) const;
PaddedPODArray<Int64> makeDateKey( PaddedPODArray<Int64> makeDateKeys(
const PaddedPODArray<RangeType> & block_start_dates, const PaddedPODArray<RangeType> & block_start_dates,
const PaddedPODArray<RangeType> & block_end_dates) const; const PaddedPODArray<RangeType> & block_end_dates) const;
std::shared_ptr<const IDictionary> dictionary; std::shared_ptr<const IDictionary> dictionary;
NameSet column_names; NameSet column_names;
PaddedPODArray<Key> ids; PaddedPODArray<KeyType> keys;
PaddedPODArray<RangeType> start_dates; PaddedPODArray<RangeType> start_dates;
PaddedPODArray<RangeType> end_dates; PaddedPODArray<RangeType> end_dates;
}; };
template <typename RangeType> template <RangeDictionaryType range_dictionary_type, typename RangeType>
RangeDictionarySourceData<RangeType>::RangeDictionarySourceData( RangeDictionarySourceData<range_dictionary_type, RangeType>::RangeDictionarySourceData(
std::shared_ptr<const IDictionary> dictionary_, std::shared_ptr<const IDictionary> dictionary_,
const Names & column_names_, const Names & column_names_,
PaddedPODArray<Key> && ids_, PaddedPODArray<KeyType> && keys,
PaddedPODArray<RangeType> && block_start_dates, PaddedPODArray<RangeType> && block_start_dates,
PaddedPODArray<RangeType> && block_end_dates) PaddedPODArray<RangeType> && block_end_dates)
: dictionary(dictionary_) : dictionary(dictionary_)
, column_names(column_names_.begin(), column_names_.end()) , column_names(column_names_.begin(), column_names_.end())
, ids(std::move(ids_)) , keys(std::move(keys))
, start_dates(std::move(block_start_dates)) , start_dates(std::move(block_start_dates))
, end_dates(std::move(block_end_dates)) , end_dates(std::move(block_end_dates))
{ {
} }
template <typename RangeType> template <RangeDictionaryType range_dictionary_type, typename RangeType>
Block RangeDictionarySourceData<RangeType>::getBlock(size_t start, size_t length) const Block RangeDictionarySourceData<range_dictionary_type, RangeType>::getBlock(size_t start, size_t length) const
{ {
PaddedPODArray<Key> block_ids; PaddedPODArray<KeyType> block_keys;
PaddedPODArray<RangeType> block_start_dates; PaddedPODArray<RangeType> block_start_dates;
PaddedPODArray<RangeType> block_end_dates; PaddedPODArray<RangeType> block_end_dates;
block_ids.reserve(length); block_keys.reserve(length);
block_start_dates.reserve(length); block_start_dates.reserve(length);
block_end_dates.reserve(length); block_end_dates.reserve(length);
for (auto idx : collections::range(start, start + length)) for (size_t index = start; index < start + length; ++index )
{ {
block_ids.push_back(ids[idx]); block_keys.push_back(block_keys[index]);
block_start_dates.push_back(start_dates[idx]); block_start_dates.push_back(start_dates[index]);
block_end_dates.push_back(end_dates[idx]); block_end_dates.push_back(end_dates[index]);
} }
return fillBlock(block_ids, block_start_dates, block_end_dates); return fillBlock(block_keys, block_start_dates, block_end_dates, start, start + length);
} }
template <typename RangeType> template <RangeDictionaryType range_dictionary_type, typename RangeType>
PaddedPODArray<Int64> RangeDictionarySourceData<RangeType>::makeDateKey( PaddedPODArray<Int64> RangeDictionarySourceData<range_dictionary_type, RangeType>::makeDateKeys(
const PaddedPODArray<RangeType> & block_start_dates, const PaddedPODArray<RangeType> & block_end_dates) const
{
PaddedPODArray<Int64> key(block_start_dates.size());
for (size_t i = 0; i < key.size(); ++i)
{
if (RangeHashedDictionary::Range::isCorrectDate(block_start_dates[i]))
key[i] = block_start_dates[i];
else
key[i] = block_end_dates[i];
}
return key;
}
template <typename RangeType>
Block RangeDictionarySourceData<RangeType>::fillBlock(
const PaddedPODArray<Key> & ids_to_fill,
const PaddedPODArray<RangeType> & block_start_dates, const PaddedPODArray<RangeType> & block_start_dates,
const PaddedPODArray<RangeType> & block_end_dates) const const PaddedPODArray<RangeType> & block_end_dates) const
{
PaddedPODArray<Int64> keys(block_start_dates.size());
for (size_t i = 0; i < keys.size(); ++i)
{
if (Range::isCorrectDate(block_start_dates[i]))
keys[i] = block_start_dates[i];
else
keys[i] = block_end_dates[i];
}
return keys;
}
template <RangeDictionaryType range_dictionary_type, typename RangeType>
Block RangeDictionarySourceData<range_dictionary_type, RangeType>::fillBlock(
const PaddedPODArray<KeyType> & keys_to_fill,
const PaddedPODArray<RangeType> & block_start_dates,
const PaddedPODArray<RangeType> & block_end_dates,
size_t start,
size_t end) const
{ {
ColumnsWithTypeAndName columns; ColumnsWithTypeAndName columns;
const DictionaryStructure & structure = dictionary->getStructure(); const DictionaryStructure & dictionary_structure = dictionary->getStructure();
auto ids_column = getColumnFromPODArray(ids_to_fill); DataTypes keys_types;
const std::string & id_column_name = structure.id->name; Columns keys_columns;
if (column_names.find(id_column_name) != column_names.end()) Strings keys_names = dictionary_structure.getKeysNames();
columns.emplace_back(ids_column, std::make_shared<DataTypeUInt64>(), id_column_name);
auto date_key = makeDateKey(block_start_dates, block_end_dates); if constexpr (range_dictionary_type == RangeDictionaryType::simple)
{
keys_columns = {getColumnFromPODArray(keys_to_fill)};
keys_types = {std::make_shared<DataTypeUInt64>()};
}
else
{
for (const auto & attribute : *dictionary_structure.key)
keys_types.emplace_back(attribute.type);
auto deserialized_columns = deserializeColumnsFromKeys(dictionary_structure, keys, start, end);
for (auto & deserialized_column : deserialized_columns)
keys_columns.emplace_back(std::move(deserialized_column));
}
size_t keys_size = keys_names.size();
std::cerr << "Keys size " << keys_size << " key columns size " << keys_columns.size();
std::cerr << " keys types size " << keys_types.size() << std::endl;
assert(keys_columns.size() == keys_size);
assert(keys_types.size() == keys_size);
for (size_t i = 0; i < keys_size; ++i)
{
auto & key_name = keys_names[i];
if (column_names.find(key_name) != column_names.end())
columns.emplace_back(keys_columns[i], keys_types[i], key_name);
}
auto date_key = makeDateKeys(block_start_dates, block_end_dates);
auto date_column = getColumnFromPODArray(date_key); auto date_column = getColumnFromPODArray(date_key);
const std::string & range_min_column_name = structure.range_min->name; keys_columns.emplace_back(std::move(date_column));
keys_types.emplace_back(std::make_shared<DataTypeInt64>());
const auto & range_min_column_name = dictionary_structure.range_min->name;
if (column_names.find(range_min_column_name) != column_names.end()) if (column_names.find(range_min_column_name) != column_names.end())
{ {
auto range_min_column = getColumnFromPODArray(block_start_dates); auto range_min_column = getColumnFromPODArray(block_start_dates);
columns.emplace_back(range_min_column, structure.range_max->type, range_min_column_name); columns.emplace_back(range_min_column, dictionary_structure.range_max->type, range_min_column_name);
} }
const std::string & range_max_column_name = structure.range_max->name; const auto & range_max_column_name = dictionary_structure.range_max->name;
if (column_names.find(range_max_column_name) != column_names.end()) if (column_names.find(range_max_column_name) != column_names.end())
{ {
auto range_max_column = getColumnFromPODArray(block_end_dates); auto range_max_column = getColumnFromPODArray(block_end_dates);
columns.emplace_back(range_max_column, structure.range_max->type, range_max_column_name); columns.emplace_back(range_max_column, dictionary_structure.range_max->type, range_max_column_name);
} }
for (const auto idx : collections::range(0, structure.attributes.size())) size_t attributes_size = dictionary_structure.attributes.size();
for (size_t attribute_index = 0; attribute_index < attributes_size; ++attribute_index)
{ {
const DictionaryAttribute & attribute = structure.attributes[idx]; const auto & attribute = dictionary_structure.attributes[attribute_index];
if (column_names.find(attribute.name) != column_names.end()) if (column_names.find(attribute.name) == column_names.end())
{ continue;
ColumnPtr column = dictionary->getColumn(
attribute.name, auto column = dictionary->getColumn(
attribute.type, attribute.name,
{ids_column, date_column}, attribute.type,
{std::make_shared<DataTypeUInt64>(), std::make_shared<DataTypeInt64>()}, keys_columns,
nullptr); keys_types,
columns.emplace_back(column, attribute.type, attribute.name); nullptr /* default_values_column*/);
}
columns.emplace_back(std::move(column), attribute.type, attribute.name);
} }
return Block(columns); return Block(columns);
} }
/* template <RangeDictionaryType range_dictionary_type, typename RangeType>
* BlockInputStream implementation for external dictionaries
* read() returns single block consisting of the in-memory contents of the dictionaries
*/
template <typename RangeType>
class RangeDictionarySource : public DictionarySourceBase class RangeDictionarySource : public DictionarySourceBase
{ {
public: public:
using Key = UInt64;
RangeDictionarySource(RangeDictionarySourceData<RangeType> data_, size_t max_block_size); RangeDictionarySource(RangeDictionarySourceData<range_dictionary_type, RangeType> data_, size_t max_block_size);
String getName() const override { return "RangeDictionarySource"; } String getName() const override { return "RangeDictionarySource"; }
protected: protected:
Block getBlock(size_t start, size_t length) const override; Block getBlock(size_t start, size_t length) const override;
RangeDictionarySourceData<RangeType> data; RangeDictionarySourceData<range_dictionary_type, RangeType> data;
}; };
template <typename RangeType> template <RangeDictionaryType range_dictionary_type, typename RangeType>
RangeDictionarySource<RangeType>::RangeDictionarySource(RangeDictionarySourceData<RangeType> data_, size_t max_block_size) RangeDictionarySource<range_dictionary_type, RangeType>::RangeDictionarySource(RangeDictionarySourceData<range_dictionary_type, RangeType> data_, size_t max_block_size)
: DictionarySourceBase(data_.getBlock(0, 0), data_.getNumRows(), max_block_size) : DictionarySourceBase(data_.getBlock(0, 0), data_.getNumRows(), max_block_size)
, data(std::move(data_)) , data(std::move(data_))
{ {
} }
template <typename RangeType> template <RangeDictionaryType range_dictionary_type, typename RangeType>
Block RangeDictionarySource<RangeType>::getBlock(size_t start, size_t length) const Block RangeDictionarySource<range_dictionary_type, RangeType>::getBlock(size_t start, size_t length) const
{ {
return data.getBlock(start, length); return data.getBlock(start, length);
} }

View File

@ -10,7 +10,8 @@
namespace namespace
{ {
using RangeStorageType = DB::RangeHashedDictionary::RangeStorageType;
using RangeStorageType = DB::RangeStorageType;
// Null values mean that specified boundary, either min or max is not set on range. // Null values mean that specified boundary, either min or max is not set on range.
// To simplify comparison, null value of min bound should be bigger than any other value, // To simplify comparison, null value of min bound should be bigger than any other value,
@ -25,7 +26,7 @@ RangeStorageType getColumnIntValueOrDefault(const DB::IColumn & column, size_t i
return default_value; return default_value;
const RangeStorageType result = static_cast<RangeStorageType>(column.getInt(index)); const RangeStorageType result = static_cast<RangeStorageType>(column.getInt(index));
if (isDate && !DB::RangeHashedDictionary::Range::isCorrectDate(result)) if (isDate && !DB::Range::isCorrectDate(result))
return default_value; return default_value;
return result; return result;
@ -54,23 +55,23 @@ namespace ErrorCodes
extern const int UNSUPPORTED_METHOD; extern const int UNSUPPORTED_METHOD;
} }
bool RangeHashedDictionary::Range::isCorrectDate(const RangeStorageType & date) bool Range::isCorrectDate(const RangeStorageType & date)
{ {
return 0 < date && date <= DATE_LUT_MAX_DAY_NUM; return 0 < date && date <= DATE_LUT_MAX_DAY_NUM;
} }
bool RangeHashedDictionary::Range::contains(const RangeStorageType & value) const bool Range::contains(const RangeStorageType & value) const
{ {
return left <= value && value <= right; return left <= value && value <= right;
} }
static bool operator<(const RangeHashedDictionary::Range & left, const RangeHashedDictionary::Range & right) static bool operator<(const Range & left, const Range & right)
{ {
return std::tie(left.left, left.right) < std::tie(right.left, right.right); return std::tie(left.left, left.right) < std::tie(right.left, right.right);
} }
template <DictionaryKeyType dictionary_key_type>
RangeHashedDictionary::RangeHashedDictionary( RangeHashedDictionary<dictionary_key_type>::RangeHashedDictionary(
const StorageID & dict_id_, const StorageID & dict_id_,
const DictionaryStructure & dict_struct_, const DictionaryStructure & dict_struct_,
DictionarySourcePtr source_ptr_, DictionarySourcePtr source_ptr_,
@ -87,7 +88,8 @@ RangeHashedDictionary::RangeHashedDictionary(
calculateBytesAllocated(); calculateBytesAllocated();
} }
ColumnPtr RangeHashedDictionary::getColumn( template <DictionaryKeyType dictionary_key_type>
ColumnPtr RangeHashedDictionary<dictionary_key_type>::getColumn(
const std::string & attribute_name, const std::string & attribute_name,
const DataTypePtr & result_type, const DataTypePtr & result_type,
const Columns & key_columns, const Columns & key_columns,
@ -96,20 +98,18 @@ ColumnPtr RangeHashedDictionary::getColumn(
{ {
ColumnPtr result; ColumnPtr result;
const auto & attribute = getAttribute(attribute_name);
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type); const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
const size_t attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second;
auto keys_size = key_columns.front()->size(); const auto & attribute = attributes[attribute_index];
/// Cast second column to storage type /// Cast second column to storage type
Columns modified_key_columns = key_columns; Columns modified_key_columns = key_columns;
auto range_storage_column = key_columns.back();
auto range_storage_column = key_columns[1]; ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types.back(), ""};
ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types[1], ""};
auto range_column_storage_type = std::make_shared<DataTypeInt64>(); auto range_column_storage_type = std::make_shared<DataTypeInt64>();
modified_key_columns[1] = castColumnAccurate(column_to_cast, range_column_storage_type); modified_key_columns.back() = castColumnAccurate(column_to_cast, range_column_storage_type);
size_t keys_size = key_columns.front()->size();
bool is_attribute_nullable = attribute.is_nullable; bool is_attribute_nullable = attribute.is_nullable;
ColumnUInt8::MutablePtr col_null_map_to; ColumnUInt8::MutablePtr col_null_map_to;
@ -204,24 +204,26 @@ ColumnPtr RangeHashedDictionary::getColumn(
return result; return result;
} }
ColumnUInt8::Ptr RangeHashedDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const template <DictionaryKeyType dictionary_key_type>
ColumnUInt8::Ptr RangeHashedDictionary<dictionary_key_type>::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
{ {
auto range_storage_column = key_columns[1];
ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types[1], ""};
auto range_column_storage_type = std::make_shared<DataTypeInt64>(); auto range_column_storage_type = std::make_shared<DataTypeInt64>();
auto range_storage_column = key_columns.back();
ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types[1], ""};
auto range_column_updated = castColumnAccurate(column_to_cast, range_column_storage_type); auto range_column_updated = castColumnAccurate(column_to_cast, range_column_storage_type);
PaddedPODArray<UInt64> key_backup_storage;
PaddedPODArray<RangeStorageType> range_backup_storage; PaddedPODArray<RangeStorageType> range_backup_storage;
const PaddedPODArray<UInt64> & ids = getColumnVectorData(this, key_columns[0], key_backup_storage);
const PaddedPODArray<RangeStorageType> & dates = getColumnVectorData(this, range_column_updated, range_backup_storage); const PaddedPODArray<RangeStorageType> & dates = getColumnVectorData(this, range_column_updated, range_backup_storage);
auto key_columns_copy = key_columns;
key_columns_copy.pop_back();
DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
DictionaryKeysExtractor<dictionary_key_type> keys_extractor(key_columns_copy, arena_holder.getComplexKeyArena());
const size_t keys_size = keys_extractor.getKeysSize();
const auto & attribute = attributes.front(); const auto & attribute = attributes.front();
ColumnUInt8::Ptr result; auto result = ColumnUInt8::create(keys_size);
auto & out = result->getData();
size_t keys_found = 0; size_t keys_found = 0;
auto type_call = [&](const auto & dictionary_attribute_type) auto type_call = [&](const auto & dictionary_attribute_type)
@ -229,58 +231,48 @@ ColumnUInt8::Ptr RangeHashedDictionary::hasKeys(const Columns & key_columns, con
using Type = std::decay_t<decltype(dictionary_attribute_type)>; using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType; using AttributeType = typename Type::AttributeType;
using ValueType = DictionaryValueType<AttributeType>; using ValueType = DictionaryValueType<AttributeType>;
result = hasKeysImpl<ValueType>(attribute, ids, dates, keys_found);
const auto & collection = std::get<CollectionType<ValueType>>(attribute.maps);
for (size_t key_index = 0; key_index < keys_size; ++key_index)
{
const auto key = keys_extractor.extractCurrentKey();
const auto it = collection.find(key);
if (it)
{
const auto date = dates[key_index];
const auto & ranges_and_values = it->getMapped();
const auto val_it = std::find_if(
std::begin(ranges_and_values),
std::end(ranges_and_values),
[date](const Value<ValueType> & v)
{
return v.range.contains(date);
});
out[key_index] = val_it != std::end(ranges_and_values);
keys_found += out[key_index];
}
else
{
out[key_index] = false;
}
keys_extractor.rollbackCurrentKey();
}
}; };
callOnDictionaryAttributeType(attribute.type, type_call); callOnDictionaryAttributeType(attribute.type, type_call);
query_count.fetch_add(ids.size(), std::memory_order_relaxed); query_count.fetch_add(keys_size, std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed); found_count.fetch_add(keys_found, std::memory_order_relaxed);
return result; return result;
} }
template <typename AttributeType> template <DictionaryKeyType dictionary_key_type>
ColumnUInt8::Ptr RangeHashedDictionary::hasKeysImpl( void RangeHashedDictionary<dictionary_key_type>::createAttributes()
const Attribute & attribute,
const PaddedPODArray<UInt64> & ids,
const PaddedPODArray<RangeStorageType> & dates,
size_t & keys_found) const
{
auto result = ColumnUInt8::create(ids.size());
auto& out = result->getData();
const auto & attr = *std::get<Ptr<AttributeType>>(attribute.maps);
keys_found = 0;
for (const auto row : collections::range(0, ids.size()))
{
const auto it = attr.find(ids[row]);
if (it)
{
const auto date = dates[row];
const auto & ranges_and_values = it->getMapped();
const auto val_it = std::find_if(
std::begin(ranges_and_values),
std::end(ranges_and_values),
[date](const Value<AttributeType> & v)
{
return v.range.contains(date);
});
out[row] = val_it != std::end(ranges_and_values);
keys_found += out[row];
}
else
out[row] = false;
}
return result;
}
void RangeHashedDictionary::createAttributes()
{ {
const auto size = dict_struct.attributes.size(); const auto size = dict_struct.attributes.size();
attributes.reserve(size); attributes.reserve(size);
@ -296,7 +288,8 @@ void RangeHashedDictionary::createAttributes()
} }
} }
void RangeHashedDictionary::loadData() template <DictionaryKeyType dictionary_key_type>
void RangeHashedDictionary<dictionary_key_type>::loadData()
{ {
QueryPipeline pipeline; QueryPipeline pipeline;
pipeline.init(source_ptr->loadAll()); pipeline.init(source_ptr->loadAll());
@ -305,38 +298,57 @@ void RangeHashedDictionary::loadData()
Block block; Block block;
while (executor.pull(block)) while (executor.pull(block))
{ {
const auto & id_column = *block.safeGetByPosition(0).column; size_t skip_keys_size_offset = dict_struct.getKeysSize();
Columns key_columns;
key_columns.reserve(skip_keys_size_offset);
/// Split into keys columns and attribute columns
for (size_t i = 0; i < skip_keys_size_offset; ++i)
key_columns.emplace_back(block.safeGetByPosition(i).column);
DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
DictionaryKeysExtractor<dictionary_key_type> keys_extractor(key_columns, arena_holder.getComplexKeyArena());
const size_t keys_size = keys_extractor.getKeysSize();
element_count += keys_size;
// Support old behaviour, where invalid date means 'open range'. // Support old behaviour, where invalid date means 'open range'.
const bool is_date = isDate(dict_struct.range_min->type); const bool is_date = isDate(dict_struct.range_min->type);
const auto & min_range_column = unwrapNullableColumn(*block.safeGetByPosition(1).column); const auto & min_range_column = unwrapNullableColumn(*block.safeGetByPosition(skip_keys_size_offset).column);
const auto & max_range_column = unwrapNullableColumn(*block.safeGetByPosition(2).column); const auto & max_range_column = unwrapNullableColumn(*block.safeGetByPosition(skip_keys_size_offset + 1).column);
element_count += id_column.size(); skip_keys_size_offset += 2;
for (const auto attribute_idx : collections::range(0, attributes.size())) for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index)
{ {
const auto & attribute_column = *block.safeGetByPosition(attribute_idx + 3).column; const auto & attribute_column = *block.safeGetByPosition(attribute_index + skip_keys_size_offset).column;
auto & attribute = attributes[attribute_idx]; auto & attribute = attributes[attribute_index];
for (const auto row_idx : collections::range(0, id_column.size())) for (size_t key_index = 0; key_index < keys_size; ++key_index)
{ {
auto key = keys_extractor.extractCurrentKey();
RangeStorageType lower_bound; RangeStorageType lower_bound;
RangeStorageType upper_bound; RangeStorageType upper_bound;
if (is_date) if (is_date)
{ {
lower_bound = getColumnIntValueOrDefault(min_range_column, row_idx, is_date, 0); lower_bound = getColumnIntValueOrDefault(min_range_column, key_index, is_date, 0);
upper_bound = getColumnIntValueOrDefault(max_range_column, row_idx, is_date, DATE_LUT_MAX_DAY_NUM + 1); upper_bound = getColumnIntValueOrDefault(max_range_column, key_index, is_date, DATE_LUT_MAX_DAY_NUM + 1);
} }
else else
{ {
lower_bound = getColumnIntValueOrDefault(min_range_column, row_idx, is_date, RANGE_MIN_NULL_VALUE); lower_bound = getColumnIntValueOrDefault(min_range_column, key_index, is_date, RANGE_MIN_NULL_VALUE);
upper_bound = getColumnIntValueOrDefault(max_range_column, row_idx, is_date, RANGE_MAX_NULL_VALUE); upper_bound = getColumnIntValueOrDefault(max_range_column, key_index, is_date, RANGE_MAX_NULL_VALUE);
} }
setAttributeValue(attribute, id_column.getUInt(row_idx), Range{lower_bound, upper_bound}, attribute_column[row_idx]); if constexpr (std::is_same_v<KeyType, StringRef>)
key = copyKeyInArena(key);
setAttributeValue(attribute, key, Range{lower_bound, upper_bound}, attribute_column[key_index]);
keys_extractor.rollbackCurrentKey();
} }
} }
} }
@ -346,22 +358,8 @@ void RangeHashedDictionary::loadData()
"{}: dictionary source is empty and 'require_nonempty' property is set."); "{}: dictionary source is empty and 'require_nonempty' property is set.");
} }
template <typename T> template <DictionaryKeyType dictionary_key_type>
void RangeHashedDictionary::addAttributeSize(const Attribute & attribute) void RangeHashedDictionary<dictionary_key_type>::calculateBytesAllocated()
{
const auto & map_ref = std::get<Ptr<T>>(attribute.maps);
bytes_allocated += sizeof(Collection<T>) + map_ref->getBufferSizeInBytes();
bucket_count = map_ref->getBufferSizeInCells();
}
template <>
void RangeHashedDictionary::addAttributeSize<String>(const Attribute & attribute)
{
addAttributeSize<StringRef>(attribute);
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
}
void RangeHashedDictionary::calculateBytesAllocated()
{ {
bytes_allocated += attributes.size() * sizeof(attributes.front()); bytes_allocated += attributes.size() * sizeof(attributes.front());
@ -371,14 +369,25 @@ void RangeHashedDictionary::calculateBytesAllocated()
{ {
using Type = std::decay_t<decltype(dictionary_attribute_type)>; using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType; using AttributeType = typename Type::AttributeType;
addAttributeSize<AttributeType>(attribute); using ValueType = DictionaryValueType<AttributeType>;
const auto & collection = std::get<CollectionType<ValueType>>(attribute.maps);
bytes_allocated += sizeof(CollectionType<ValueType>) + collection.getBufferSizeInBytes();
bucket_count = collection.getBufferSizeInCells();
if constexpr (std::is_same_v<ValueType, StringRef>)
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
}; };
callOnDictionaryAttributeType(attribute.type, type_call); callOnDictionaryAttributeType(attribute.type, type_call);
} }
if constexpr (dictionary_key_type == DictionaryKeyType::complex)
bytes_allocated += complex_key_arena.size();
} }
RangeHashedDictionary::Attribute RangeHashedDictionary::createAttribute(const DictionaryAttribute & dictionary_attribute) template <DictionaryKeyType dictionary_key_type>
typename RangeHashedDictionary<dictionary_key_type>::Attribute RangeHashedDictionary<dictionary_key_type>::createAttribute(const DictionaryAttribute & dictionary_attribute)
{ {
Attribute attribute{dictionary_attribute.underlying_type, dictionary_attribute.is_nullable, {}, {}}; Attribute attribute{dictionary_attribute.underlying_type, dictionary_attribute.is_nullable, {}, {}};
@ -391,7 +400,7 @@ RangeHashedDictionary::Attribute RangeHashedDictionary::createAttribute(const Di
if constexpr (std::is_same_v<AttributeType, String>) if constexpr (std::is_same_v<AttributeType, String>)
attribute.string_arena = std::make_unique<Arena>(); attribute.string_arena = std::make_unique<Arena>();
attribute.maps = std::make_unique<Collection<ValueType>>(); attribute.maps = CollectionType<ValueType>();
}; };
callOnDictionaryAttributeType(dictionary_attribute.underlying_type, type_call); callOnDictionaryAttributeType(dictionary_attribute.underlying_type, type_call);
@ -399,29 +408,35 @@ RangeHashedDictionary::Attribute RangeHashedDictionary::createAttribute(const Di
return attribute; return attribute;
} }
template <DictionaryKeyType dictionary_key_type>
template <typename AttributeType, bool is_nullable, typename ValueSetter, typename DefaultValueExtractor> template <typename AttributeType, bool is_nullable, typename ValueSetter, typename DefaultValueExtractor>
void RangeHashedDictionary::getItemsImpl( void RangeHashedDictionary<dictionary_key_type>::getItemsImpl(
const Attribute & attribute, const Attribute & attribute,
const Columns & key_columns, const Columns & key_columns,
ValueSetter && set_value, ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const DefaultValueExtractor & default_value_extractor) const
{ {
PaddedPODArray<UInt64> key_backup_storage; const auto & collection = std::get<CollectionType<AttributeType>>(attribute.maps);
PaddedPODArray<RangeStorageType> range_backup_storage;
const PaddedPODArray<UInt64> & ids = getColumnVectorData(this, key_columns[0], key_backup_storage);
const PaddedPODArray<RangeStorageType> & dates = getColumnVectorData(this, key_columns[1], range_backup_storage);
const auto & attr = *std::get<Ptr<AttributeType>>(attribute.maps);
size_t keys_found = 0; size_t keys_found = 0;
for (const auto row : collections::range(0, ids.size())) PaddedPODArray<RangeStorageType> range_backup_storage;
const auto & dates = getColumnVectorData(this, key_columns.back(), range_backup_storage);
auto key_columns_copy = key_columns;
key_columns_copy.pop_back();
DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
DictionaryKeysExtractor<dictionary_key_type> keys_extractor(key_columns_copy, arena_holder.getComplexKeyArena());
const size_t keys_size = keys_extractor.getKeysSize();
for (size_t key_index = 0; key_index < keys_size; ++key_index)
{ {
const auto it = attr.find(ids[row]); auto key = keys_extractor.extractCurrentKey();
const auto it = collection.find(key);
if (it) if (it)
{ {
const auto date = dates[row]; const auto date = dates[key_index];
const auto & ranges_and_values = it->getMapped(); const auto & ranges_and_values = it->getMapped();
const auto val_it = std::find_if( const auto val_it = std::find_if(
std::begin(ranges_and_values), std::begin(ranges_and_values),
@ -439,35 +454,38 @@ void RangeHashedDictionary::getItemsImpl(
if constexpr (is_nullable) if constexpr (is_nullable)
{ {
if (value.has_value()) if (value.has_value())
set_value(row, *value, false); set_value(key_index, *value, false);
else else
set_value(row, default_value_extractor[row], true); set_value(key_index, default_value_extractor[key_index], true);
} }
else else
{ {
set_value(row, *value, false); set_value(key_index, *value, false);
} }
keys_extractor.rollbackCurrentKey();
continue; continue;
} }
} }
if constexpr (is_nullable) if constexpr (is_nullable)
set_value(row, default_value_extractor[row], default_value_extractor.isNullAt(row)); set_value(key_index, default_value_extractor[key_index], default_value_extractor.isNullAt(key_index));
else else
set_value(row, default_value_extractor[row], false); set_value(key_index, default_value_extractor[key_index], false);
keys_extractor.rollbackCurrentKey();
} }
query_count.fetch_add(ids.size(), std::memory_order_relaxed); query_count.fetch_add(keys_size, std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed); found_count.fetch_add(keys_found, std::memory_order_relaxed);
} }
template <DictionaryKeyType dictionary_key_type>
template <typename T> template <typename T>
void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const UInt64 id, const Range & range, const Field & value) void RangeHashedDictionary<dictionary_key_type>::setAttributeValueImpl(Attribute & attribute, KeyType key, const Range & range, const Field & value)
{ {
using ValueType = std::conditional_t<std::is_same_v<T, String>, StringRef, T>; using ValueType = std::conditional_t<std::is_same_v<T, String>, StringRef, T>;
auto & map = *std::get<Ptr<ValueType>>(attribute.maps); auto & collection = std::get<CollectionType<ValueType>>(attribute.maps);
Value<ValueType> value_to_insert; Value<ValueType> value_to_insert;
@ -490,61 +508,47 @@ void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const U
} }
} }
const auto it = map.find(id); const auto it = collection.find(key);
if (it) if (it)
{ {
auto & values = it->getMapped(); auto & values = it->getMapped();
const auto insert_it const auto insert_it = std::lower_bound(
= std::lower_bound(std::begin(values), std::end(values), range, [](const Value<ValueType> & lhs, const Range & rhs_range) std::begin(values),
{ std::end(values),
return lhs.range < rhs_range; range,
}); [](const Value<ValueType> & lhs, const Range & rhs_range)
{
return lhs.range < rhs_range;
});
values.insert(insert_it, std::move(value_to_insert)); values.insert(insert_it, std::move(value_to_insert));
} }
else else
map.insert({id, Values<ValueType>{std::move(value_to_insert)}}); {
collection.insert({key, Values<ValueType>{std::move(value_to_insert)}});
}
} }
void RangeHashedDictionary::setAttributeValue(Attribute & attribute, const UInt64 id, const Range & range, const Field & value) template <DictionaryKeyType dictionary_key_type>
void RangeHashedDictionary<dictionary_key_type>::setAttributeValue(Attribute & attribute, KeyType key, const Range & range, const Field & value)
{ {
auto type_call = [&](const auto &dictionary_attribute_type) auto type_call = [&](const auto &dictionary_attribute_type)
{ {
using Type = std::decay_t<decltype(dictionary_attribute_type)>; using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType; using AttributeType = typename Type::AttributeType;
setAttributeValueImpl<AttributeType>(attribute, id, range, value); setAttributeValueImpl<AttributeType>(attribute, key, range, value);
}; };
callOnDictionaryAttributeType(attribute.type, type_call); callOnDictionaryAttributeType(attribute.type, type_call);
} }
const RangeHashedDictionary::Attribute & RangeHashedDictionary::getAttribute(const std::string & attribute_name) const template <DictionaryKeyType dictionary_key_type>
{
const auto it = attribute_index_by_name.find(attribute_name);
if (it == std::end(attribute_index_by_name))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "{}: no such attribute '{}'", full_name, attribute_name);
return attributes[it->second];
}
const RangeHashedDictionary::Attribute &
RangeHashedDictionary::getAttributeWithType(const std::string & attribute_name, const AttributeUnderlyingType type) const
{
const auto & attribute = getAttribute(attribute_name);
if (attribute.type != type)
throw Exception(ErrorCodes::TYPE_MISMATCH, "attribute {} has type {}",
attribute_name,
toString(attribute.type));
return attribute;
}
template <typename RangeType> template <typename RangeType>
void RangeHashedDictionary::getIdsAndDates( void RangeHashedDictionary<dictionary_key_type>::getKeysAndDates(
PaddedPODArray<UInt64> & ids, PaddedPODArray<KeyType> & keys,
PaddedPODArray<RangeType> & start_dates, PaddedPODArray<RangeType> & start_dates,
PaddedPODArray<RangeType> & end_dates) const PaddedPODArray<RangeType> & end_dates) const
{ {
@ -556,32 +560,33 @@ void RangeHashedDictionary::getIdsAndDates(
using AttributeType = typename Type::AttributeType; using AttributeType = typename Type::AttributeType;
using ValueType = DictionaryValueType<AttributeType>; using ValueType = DictionaryValueType<AttributeType>;
getIdsAndDates<ValueType>(attribute, ids, start_dates, end_dates); getKeysAndDates<ValueType>(attribute, keys, start_dates, end_dates);
}; };
callOnDictionaryAttributeType(attribute.type, type_call); callOnDictionaryAttributeType(attribute.type, type_call);
} }
template <DictionaryKeyType dictionary_key_type>
template <typename T, typename RangeType> template <typename T, typename RangeType>
void RangeHashedDictionary::getIdsAndDates( void RangeHashedDictionary<dictionary_key_type>::getKeysAndDates(
const Attribute & attribute, const Attribute & attribute,
PaddedPODArray<UInt64> & ids, PaddedPODArray<KeyType> & keys,
PaddedPODArray<RangeType> & start_dates, PaddedPODArray<RangeType> & start_dates,
PaddedPODArray<RangeType> & end_dates) const PaddedPODArray<RangeType> & end_dates) const
{ {
const HashMap<UInt64, Values<T>> & attr = *std::get<Ptr<T>>(attribute.maps); const auto & collection = std::get<CollectionType<T>>(attribute.maps);
ids.reserve(attr.size()); keys.reserve(collection.size());
start_dates.reserve(attr.size()); start_dates.reserve(collection.size());
end_dates.reserve(attr.size()); end_dates.reserve(collection.size());
const bool is_date = isDate(dict_struct.range_min->type); const bool is_date = isDate(dict_struct.range_min->type);
for (const auto & key : attr) for (const auto & key : collection)
{ {
for (const auto & value : key.getMapped()) for (const auto & value : key.getMapped())
{ {
ids.push_back(key.getKey()); keys.push_back(key.getKey());
start_dates.push_back(value.range.left); start_dates.push_back(value.range.left);
end_dates.push_back(value.range.right); end_dates.push_back(value.range.right);
@ -592,22 +597,23 @@ void RangeHashedDictionary::getIdsAndDates(
} }
} }
template <DictionaryKeyType dictionary_key_type>
template <typename RangeType> template <typename RangeType>
Pipe RangeHashedDictionary::readImpl(const Names & column_names, size_t max_block_size) const Pipe RangeHashedDictionary<dictionary_key_type>::readImpl(const Names & column_names, size_t max_block_size) const
{ {
PaddedPODArray<UInt64> ids; PaddedPODArray<KeyType> keys;
PaddedPODArray<RangeType> start_dates; PaddedPODArray<RangeType> start_dates;
PaddedPODArray<RangeType> end_dates; PaddedPODArray<RangeType> end_dates;
getIdsAndDates(ids, start_dates, end_dates); getKeysAndDates(keys, start_dates, end_dates);
using RangeDictionarySourceType = RangeDictionarySource<RangeType>; static constexpr RangeDictionaryType range_dictionary_type = (dictionary_key_type == DictionaryKeyType::simple) ? RangeDictionaryType::simple : RangeDictionaryType::complex;
using RangeDictionarySourceType = RangeDictionarySource<range_dictionary_type, RangeType>;
auto source = std::make_shared<RangeDictionarySourceType>( auto source = std::make_shared<RangeDictionarySourceType>(
RangeDictionarySourceData<RangeType>( RangeDictionarySourceData<range_dictionary_type, RangeType>(
shared_from_this(), shared_from_this(),
column_names, column_names,
std::move(ids), std::move(keys),
std::move(start_dates), std::move(start_dates),
std::move(end_dates)), std::move(end_dates)),
max_block_size); max_block_size);
@ -615,10 +621,21 @@ Pipe RangeHashedDictionary::readImpl(const Names & column_names, size_t max_bloc
return Pipe(source); return Pipe(source);
} }
template <DictionaryKeyType dictionary_key_type>
StringRef RangeHashedDictionary<dictionary_key_type>::copyKeyInArena(StringRef key)
{
size_t key_size = key.size;
char * place_for_key = complex_key_arena.alloc(key_size);
memcpy(reinterpret_cast<void *>(place_for_key), reinterpret_cast<const void *>(key.data), key_size);
StringRef updated_key{place_for_key, key_size};
return updated_key;
}
template <DictionaryKeyType dictionary_key_type>
struct RangeHashedDictionaryCallGetSourceImpl struct RangeHashedDictionaryCallGetSourceImpl
{ {
Pipe pipe; Pipe pipe;
const RangeHashedDictionary * dict; const RangeHashedDictionary<dictionary_key_type> * dict;
const Names * column_names; const Names * column_names;
size_t max_block_size; size_t max_block_size;
@ -627,15 +644,16 @@ struct RangeHashedDictionaryCallGetSourceImpl
{ {
const auto & type = dict->dict_struct.range_min->type; const auto & type = dict->dict_struct.range_min->type;
if (pipe.empty() && dynamic_cast<const DataTypeNumberBase<RangeType> *>(type.get())) if (pipe.empty() && dynamic_cast<const DataTypeNumberBase<RangeType> *>(type.get()))
pipe = dict->readImpl<RangeType>(*column_names, max_block_size); pipe = dict->template readImpl<RangeType>(*column_names, max_block_size);
} }
}; };
Pipe RangeHashedDictionary::read(const Names & column_names, size_t max_block_size) const template <DictionaryKeyType dictionary_key_type>
Pipe RangeHashedDictionary<dictionary_key_type>::read(const Names & column_names, size_t max_block_size) const
{ {
using ListType = TypeList<UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Int128, Float32, Float64>; using ListType = TypeList<UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Int128, Float32, Float64>;
RangeHashedDictionaryCallGetSourceImpl callable; RangeHashedDictionaryCallGetSourceImpl<dictionary_key_type> callable;
callable.dict = this; callable.dict = this;
callable.column_names = &column_names; callable.column_names = &column_names;
callable.max_block_size = max_block_size; callable.max_block_size = max_block_size;
@ -653,7 +671,7 @@ Pipe RangeHashedDictionary::read(const Names & column_names, size_t max_block_si
void registerDictionaryRangeHashed(DictionaryFactory & factory) void registerDictionaryRangeHashed(DictionaryFactory & factory)
{ {
auto create_layout = [=](const std::string & full_name, auto create_layout_simple = [=](const std::string & full_name,
const DictionaryStructure & dict_struct, const DictionaryStructure & dict_struct,
const Poco::Util::AbstractConfiguration & config, const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix, const std::string & config_prefix,
@ -672,9 +690,32 @@ void registerDictionaryRangeHashed(DictionaryFactory & factory)
const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix); const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix);
const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
return std::make_unique<RangeHashedDictionary>(dict_id, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); return std::make_unique<RangeHashedDictionary<DictionaryKeyType::simple>>(dict_id, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty);
}; };
factory.registerLayout("range_hashed", create_layout, false); factory.registerLayout("range_hashed", create_layout_simple, false);
auto create_layout_complex = [=](const std::string & full_name,
const DictionaryStructure & dict_struct,
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
DictionarySourcePtr source_ptr,
ContextPtr /* context */,
bool /*created_from_ddl*/) -> DictionaryPtr
{
if (dict_struct.id)
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "'id' is not supported for dictionary of layout 'complex_key_range_hashed'");
if (!dict_struct.range_min || !dict_struct.range_max)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"{}: dictionary of layout 'complex_key_range_hashed' requires .structure.range_min and .structure.range_max",
full_name);
const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix);
const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
return std::make_unique<RangeHashedDictionary<DictionaryKeyType::complex>>(dict_id, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty);
};
factory.registerLayout("complex_key_range_hashed", create_layout_complex, true);
} }
} }

View File

@ -16,9 +16,25 @@
namespace DB namespace DB
{ {
using RangeStorageType = Int64;
struct Range
{
RangeStorageType left;
RangeStorageType right;
static bool isCorrectDate(const RangeStorageType & date);
bool contains(const RangeStorageType & value) const;
};
template <DictionaryKeyType dictionary_key_type>
class RangeHashedDictionary final : public IDictionary class RangeHashedDictionary final : public IDictionary
{ {
public: public:
using KeyType = std::conditional_t<dictionary_key_type == DictionaryKeyType::simple, UInt64, StringRef>;
static_assert(dictionary_key_type != DictionaryKeyType::range, "Range key type is not supported by hashed dictionary");
RangeHashedDictionary( RangeHashedDictionary(
const StorageID & dict_id_, const StorageID & dict_id_,
const DictionaryStructure & dict_struct_, const DictionaryStructure & dict_struct_,
@ -59,7 +75,7 @@ public:
bool isInjective(const std::string & attribute_name) const override bool isInjective(const std::string & attribute_name) const override
{ {
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective; return dict_struct.getAttribute(attribute_name).injective;
} }
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::range; } DictionaryKeyType getKeyType() const override { return DictionaryKeyType::range; }
@ -73,19 +89,8 @@ public:
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override; ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
using RangeStorageType = Int64;
Pipe read(const Names & column_names, size_t max_block_size) const override; Pipe read(const Names & column_names, size_t max_block_size) const override;
struct Range
{
RangeStorageType left;
RangeStorageType right;
static bool isCorrectDate(const RangeStorageType & date);
bool contains(const RangeStorageType & value) const;
};
private: private:
template <typename T> template <typename T>
struct Value final struct Value final
@ -96,10 +101,12 @@ private:
template <typename T> template <typename T>
using Values = std::vector<Value<T>>; using Values = std::vector<Value<T>>;
template <typename T>
using Collection = HashMap<UInt64, Values<T>>; template <typename Value>
template <typename T> using CollectionType = std::conditional_t<
using Ptr = std::unique_ptr<Collection<T>>; dictionary_key_type == DictionaryKeyType::simple,
HashMap<UInt64, Values<Value>>,
HashMapWithSavedHash<StringRef, Values<Value>, DefaultHash<StringRef>>>;
struct Attribute final struct Attribute final
{ {
@ -108,27 +115,27 @@ private:
bool is_nullable; bool is_nullable;
std::variant< std::variant<
Ptr<UInt8>, CollectionType<UInt8>,
Ptr<UInt16>, CollectionType<UInt16>,
Ptr<UInt32>, CollectionType<UInt32>,
Ptr<UInt64>, CollectionType<UInt64>,
Ptr<UInt128>, CollectionType<UInt128>,
Ptr<UInt256>, CollectionType<UInt256>,
Ptr<Int8>, CollectionType<Int8>,
Ptr<Int16>, CollectionType<Int16>,
Ptr<Int32>, CollectionType<Int32>,
Ptr<Int64>, CollectionType<Int64>,
Ptr<Int128>, CollectionType<Int128>,
Ptr<Int256>, CollectionType<Int256>,
Ptr<Decimal32>, CollectionType<Decimal32>,
Ptr<Decimal64>, CollectionType<Decimal64>,
Ptr<Decimal128>, CollectionType<Decimal128>,
Ptr<Decimal256>, CollectionType<Decimal256>,
Ptr<Float32>, CollectionType<Float32>,
Ptr<Float64>, CollectionType<Float64>,
Ptr<UUID>, CollectionType<UUID>,
Ptr<StringRef>, CollectionType<StringRef>,
Ptr<Array>> CollectionType<Array>>
maps; maps;
std::unique_ptr<Arena> string_arena; std::unique_ptr<Arena> string_arena;
}; };
@ -137,9 +144,6 @@ private:
void loadData(); void loadData();
template <typename T>
void addAttributeSize(const Attribute & attribute);
void calculateBytesAllocated(); void calculateBytesAllocated();
static Attribute createAttribute(const DictionaryAttribute & dictionary_attribute); static Attribute createAttribute(const DictionaryAttribute & dictionary_attribute);
@ -151,35 +155,30 @@ private:
ValueSetter && set_value, ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const; DefaultValueExtractor & default_value_extractor) const;
template <typename AttributeType>
ColumnUInt8::Ptr hasKeysImpl(
const Attribute & attribute,
const PaddedPODArray<UInt64> & ids,
const PaddedPODArray<RangeStorageType> & dates,
size_t & keys_found) const;
template <typename T> template <typename T>
static void setAttributeValueImpl(Attribute & attribute, const UInt64 id, const Range & range, const Field & value); static void setAttributeValueImpl(Attribute & attribute, KeyType key, const Range & range, const Field & value);
static void setAttributeValue(Attribute & attribute, const UInt64 id, const Range & range, const Field & value); static void setAttributeValue(Attribute & attribute, KeyType key, const Range & range, const Field & value);
const Attribute & getAttribute(const std::string & attribute_name) const;
const Attribute & getAttributeWithType(const std::string & name, const AttributeUnderlyingType type) const;
template <typename RangeType> template <typename RangeType>
void getIdsAndDates(PaddedPODArray<UInt64> & ids, PaddedPODArray<RangeType> & start_dates, PaddedPODArray<RangeType> & end_dates) const; void getKeysAndDates(
PaddedPODArray<KeyType> & keys,
PaddedPODArray<RangeType> & start_dates,
PaddedPODArray<RangeType> & end_dates) const;
template <typename T, typename RangeType> template <typename T, typename RangeType>
void getIdsAndDates( void getKeysAndDates(
const Attribute & attribute, const Attribute & attribute,
PaddedPODArray<UInt64> & ids, PaddedPODArray<KeyType> & keys,
PaddedPODArray<RangeType> & start_dates, PaddedPODArray<RangeType> & start_dates,
PaddedPODArray<RangeType> & end_dates) const; PaddedPODArray<RangeType> & end_dates) const;
template <typename RangeType> template <typename RangeType>
Pipe readImpl(const Names & column_names, size_t max_block_size) const; Pipe readImpl(const Names & column_names, size_t max_block_size) const;
StringRef copyKeyInArena(StringRef key);
template <DictionaryKeyType>
friend struct RangeHashedDictionaryCallGetSourceImpl; friend struct RangeHashedDictionaryCallGetSourceImpl;
const DictionaryStructure dict_struct; const DictionaryStructure dict_struct;
@ -189,6 +188,7 @@ private:
std::map<std::string, size_t> attribute_index_by_name; std::map<std::string, size_t> attribute_index_by_name;
std::vector<Attribute> attributes; std::vector<Attribute> attributes;
Arena complex_key_arena;
size_t bytes_allocated = 0; size_t bytes_allocated = 0;
size_t element_count = 0; size_t element_count = 0;

View File

@ -6,6 +6,7 @@
namespace DB namespace DB
{ {
using DictionaryConfigurationPtr = Poco::AutoPtr<Poco::Util::AbstractConfiguration>; using DictionaryConfigurationPtr = Poco::AutoPtr<Poco::Util::AbstractConfiguration>;
/// Convert dictionary AST to Poco::AbstractConfiguration /// Convert dictionary AST to Poco::AbstractConfiguration
@ -13,4 +14,5 @@ using DictionaryConfigurationPtr = Poco::AutoPtr<Poco::Util::AbstractConfigurati
/// Can throw exception if query is ill-formed /// Can throw exception if query is ill-formed
DictionaryConfigurationPtr DictionaryConfigurationPtr
getDictionaryConfigurationFromAST(const ASTCreateQuery & query, ContextPtr context, const std::string & database_ = ""); getDictionaryConfigurationFromAST(const ASTCreateQuery & query, ContextPtr context, const std::string & database_ = "");
} }

View File

@ -250,12 +250,23 @@ public:
} }
else else
{ {
if (!WhichDataType(key_column_type).isUInt64()) /// Functions in external dictionaries_loader only support full-value (not constant) columns with keys.
throw Exception( ColumnPtr key_column = key_column_with_type.column->convertToFullColumnIfConst();
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, DataTypePtr key_column_type = key_column_with_type.type;
"Second argument of function {} must be UInt64 when dictionary is range. Actual type {}.",
getName(), Columns key_columns;
key_column_with_type.type->getName()); DataTypes key_types;
if (isTuple(key_column_type))
{
key_columns = assert_cast<const ColumnTuple &>(*key_column).getColumnsCopy();
key_types = assert_cast<const DataTypeTuple &>(*key_column_type).getElements();
}
else
{
key_columns = {key_column, range_col};
key_types = {std::make_shared<DataTypeUInt64>(), range_col_type};
}
return dictionary->hasKeys({key_column, range_col}, {std::make_shared<DataTypeUInt64>(), range_col_type}); return dictionary->hasKeys({key_column, range_col}, {std::make_shared<DataTypeUInt64>(), range_col_type});
} }
@ -487,18 +498,29 @@ public:
} }
else if (dictionary_key_type == DictionaryKeyType::range) else if (dictionary_key_type == DictionaryKeyType::range)
{ {
if (!WhichDataType(key_col_with_type.type).isUInt64()) /// Functions in external dictionaries_loader only support full-value (not constant) columns with keys.
throw Exception( ColumnPtr key_column = key_col_with_type.column->convertToFullColumnIfConst();
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, DataTypePtr key_column_type = key_col_with_type.type;
"Third argument of function {} must be UInt64 when dictionary is range. Actual type {}.",
getName(), Columns key_columns;
key_col_with_type.type->getName()); DataTypes key_types;
if (isTuple(key_column_type))
{
key_columns = assert_cast<const ColumnTuple &>(*key_column).getColumnsCopy();
key_types = assert_cast<const DataTypeTuple &>(*key_column_type).getElements();
}
else
{
key_columns = {key_column, range_col};
key_types = {std::make_shared<DataTypeUInt64>(), range_col_type};
}
result = executeDictionaryRequest( result = executeDictionaryRequest(
dictionary, dictionary,
attribute_names, attribute_names,
{key_column, range_col}, key_columns,
{std::make_shared<DataTypeUInt64>(), range_col_type}, key_types,
result_type, result_type,
default_cols); default_cols);
} }

View File

@ -0,0 +1,58 @@
Dictionary not nullable
dictGet
0.2
0.2
0.2
0.2
0.4
dictHas
1
1
1
0
select columns from dictionary
allColumns
2019-05-05 2019-05-20 1 1 0.33
2019-05-21 2019-05-30 1 1 0.42
2019-05-21 2019-05-30 2 2 0.46
noColumns
1
1
1
onlySpecificColumns
1 2019-05-05 0.33
1 2019-05-21 0.42
2 2019-05-21 0.46
onlySpecificColumn
0.33
0.42
0.46
Dictionary nullable
dictGet
0.2
0.2
0.2
0.2
0.4
dictHas
1
1
1
0
select columns from dictionary
allColumns
2019-05-05 2019-05-20 1 1 0.33
2019-05-21 2019-05-30 1 1 0.42
2019-05-21 2019-05-30 2 2 \N
noColumns
1
1
1
onlySpecificColumns
1 2019-05-05 0.33
1 2019-05-21 0.42
2 2019-05-21 \N
onlySpecificColumn
0.33
0.42
\N

View File

@ -0,0 +1,109 @@
DROP TABLE IF EXISTS date_table;
CREATE TABLE date_table
(
CountryID UInt64,
CountryKey String,
StartDate Date,
EndDate Date,
Tax Float64
)
ENGINE = MergeTree()
ORDER BY CountryID;
INSERT INTO date_table VALUES(1, '1', toDate('2019-05-05'), toDate('2019-05-20'), 0.33);
INSERT INTO date_table VALUES(1, '1', toDate('2019-05-21'), toDate('2019-05-30'), 0.42);
INSERT INTO date_table VALUES(2, '2', toDate('2019-05-21'), toDate('2019-05-30'), 0.46);
DROP DICTIONARY IF EXISTS range_dictionary;
CREATE DICTIONARY range_dictionary
(
CountryID UInt64,
CountryKey String,
StartDate Date,
EndDate Date,
Tax Float64 DEFAULT 0.2
)
PRIMARY KEY CountryID, CountryKey
SOURCE(CLICKHOUSE(TABLE 'date_table'))
LIFETIME(MIN 1 MAX 1000)
LAYOUT(COMPLEX_KEY_RANGE_HASHED())
RANGE(MIN StartDate MAX EndDate);
SELECT 'Dictionary not nullable';
SELECT 'dictGet';
SELECT dictGet('range_dictionary', 'Tax', (toUInt64(1), '1'), toDate('2019-05-15'));
SELECT dictGet('range_dictionary', 'Tax', (toUInt64(1), '1'), toDate('2019-05-29'));
SELECT dictGet('range_dictionary', 'Tax', (toUInt64(2), '2'), toDate('2019-05-29'));
SELECT dictGet('range_dictionary', 'Tax', (toUInt64(2), '2'), toDate('2019-05-31'));
SELECT dictGetOrDefault('range_dictionary', 'Tax', (toUInt64(2), '2'), toDate('2019-05-31'), 0.4);
SELECT 'dictHas';
SELECT dictHas('range_dictionary', (toUInt64(1), '1'), toDate('2019-05-15'));
SELECT dictHas('range_dictionary', (toUInt64(1), '1'), toDate('2019-05-29'));
SELECT dictHas('range_dictionary', (toUInt64(2), '2'), toDate('2019-05-29'));
SELECT dictHas('range_dictionary', (toUInt64(2), '2'), toDate('2019-05-31'));
SELECT 'select columns from dictionary';
SELECT 'allColumns';
SELECT * FROM range_dictionary;
SELECT 'noColumns';
SELECT 1 FROM range_dictionary;
SELECT 'onlySpecificColumns';
SELECT CountryID, StartDate, Tax FROM range_dictionary;
SELECT 'onlySpecificColumn';
SELECT Tax FROM range_dictionary;
DROP TABLE date_table;
DROP DICTIONARY range_dictionary;
CREATE TABLE date_table
(
CountryID UInt64,
CountryKey String,
StartDate Date,
EndDate Date,
Tax Nullable(Float64)
)
ENGINE = MergeTree()
ORDER BY CountryID;
INSERT INTO date_table VALUES(1, '1', toDate('2019-05-05'), toDate('2019-05-20'), 0.33);
INSERT INTO date_table VALUES(1, '1', toDate('2019-05-21'), toDate('2019-05-30'), 0.42);
INSERT INTO date_table VALUES(2, '2', toDate('2019-05-21'), toDate('2019-05-30'), NULL);
CREATE DICTIONARY range_dictionary_nullable
(
CountryID UInt64,
CountryKey String,
StartDate Date,
EndDate Date,
Tax Nullable(Float64) DEFAULT 0.2
)
PRIMARY KEY CountryID, CountryKey
SOURCE(CLICKHOUSE(TABLE 'date_table'))
LIFETIME(MIN 1 MAX 1000)
LAYOUT(COMPLEX_KEY_RANGE_HASHED())
RANGE(MIN StartDate MAX EndDate);
SELECT 'Dictionary nullable';
SELECT 'dictGet';
SELECT dictGet('range_dictionary_nullable', 'Tax', (toUInt64(1), '1'), toDate('2019-05-15'));
SELECT dictGet('range_dictionary_nullable', 'Tax', (toUInt64(1), '1'), toDate('2019-05-29'));
SELECT dictGet('range_dictionary_nullable', 'Tax', (toUInt64(2), '2'), toDate('2019-05-29'));
SELECT dictGet('range_dictionary_nullable', 'Tax', (toUInt64(2), '2'), toDate('2019-05-31'));
SELECT dictGetOrDefault('range_dictionary_nullable', 'Tax', (toUInt64(2), '2'), toDate('2019-05-31'), 0.4);
SELECT 'dictHas';
SELECT dictHas('range_dictionary_nullable', (toUInt64(1), '1'), toDate('2019-05-15'));
SELECT dictHas('range_dictionary_nullable', (toUInt64(1), '1'), toDate('2019-05-29'));
SELECT dictHas('range_dictionary_nullable', (toUInt64(2), '2'), toDate('2019-05-29'));
SELECT dictHas('range_dictionary_nullable', (toUInt64(2), '2'), toDate('2019-05-31'));
SELECT 'select columns from dictionary';
SELECT 'allColumns';
SELECT * FROM range_dictionary_nullable;
SELECT 'noColumns';
SELECT 1 FROM range_dictionary_nullable;
SELECT 'onlySpecificColumns';
SELECT CountryID, StartDate, Tax FROM range_dictionary_nullable;
SELECT 'onlySpecificColumn';
SELECT Tax FROM range_dictionary_nullable;
DROP TABLE date_table;
DROP DICTIONARY range_dictionary_nullable;