Fixed tests

This commit is contained in:
Maksim Kita 2021-01-31 18:14:26 +03:00 committed by Maksim Kita
parent add444f71c
commit ee8c51f65e
5 changed files with 100 additions and 71 deletions

View File

@ -17,14 +17,14 @@ namespace DB
* BlockInputStream implementation for external dictionaries
* read() returns single block consisting of the in-memory contents of the dictionaries
*/
template <typename DictionaryType, typename RangeType, typename Key>
template <typename RangeType>
class RangeDictionaryBlockInputStream : public DictionaryBlockInputStreamBase
{
public:
using DictionaryPtr = std::shared_ptr<DictionaryType const>;
using Key = UInt64;
RangeDictionaryBlockInputStream(
DictionaryPtr dictionary,
std::shared_ptr<const IDictionaryBase> dictionary,
size_t max_block_size,
const Names & column_names,
PaddedPODArray<Key> && ids_to_fill,
@ -40,35 +40,26 @@ private:
template <typename T>
ColumnPtr getColumnFromPODArray(const PaddedPODArray<T> & array) const;
template <typename DictionarySpecialAttributeType, typename T>
void addSpecialColumn(
const std::optional<DictionarySpecialAttributeType> & attribute,
DataTypePtr type,
const std::string & default_name,
const std::unordered_set<std::string> & column_names_set,
const PaddedPODArray<T> & values,
ColumnsWithTypeAndName & columns,
bool force = false) const;
Block fillBlock(
const PaddedPODArray<Key> & ids_to_fill,
const PaddedPODArray<RangeType> & block_start_dates,
const PaddedPODArray<RangeType> & block_end_dates) const;
PaddedPODArray<Int64>
makeDateKey(const PaddedPODArray<RangeType> & block_start_dates, const PaddedPODArray<RangeType> & block_end_dates) const;
PaddedPODArray<Int64> makeDateKey(
const PaddedPODArray<RangeType> & block_start_dates,
const PaddedPODArray<RangeType> & block_end_dates) const;
DictionaryPtr dictionary;
Names column_names;
std::shared_ptr<const IDictionaryBase> dictionary;
NameSet column_names;
PaddedPODArray<Key> ids;
PaddedPODArray<RangeType> start_dates;
PaddedPODArray<RangeType> end_dates;
};
template <typename DictionaryType, typename RangeType, typename Key>
RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::RangeDictionaryBlockInputStream(
DictionaryPtr dictionary_,
template <typename RangeType>
RangeDictionaryBlockInputStream<RangeType>::RangeDictionaryBlockInputStream(
std::shared_ptr<const IDictionaryBase> dictionary_,
size_t max_block_size_,
const Names & column_names_,
PaddedPODArray<Key> && ids_,
@ -76,15 +67,15 @@ RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::RangeDictionary
PaddedPODArray<RangeType> && block_end_dates)
: DictionaryBlockInputStreamBase(ids_.size(), max_block_size_)
, dictionary(dictionary_)
, column_names(column_names_)
, column_names(column_names_.begin(), column_names_.end())
, ids(std::move(ids_))
, start_dates(std::move(block_start_dates))
, end_dates(std::move(block_end_dates))
{
}
template <typename DictionaryType, typename RangeType, typename Key>
Block RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getBlock(size_t start, size_t length) const
template <typename RangeType>
Block RangeDictionaryBlockInputStream<RangeType>::getBlock(size_t start, size_t length) const
{
PaddedPODArray<Key> block_ids;
PaddedPODArray<RangeType> block_start_dates;
@ -103,38 +94,19 @@ Block RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getBlock(
return fillBlock(block_ids, block_start_dates, block_end_dates);
}
template <typename DictionaryType, typename RangeType, typename Key>
template <typename RangeType>
template <typename T>
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromPODArray(const PaddedPODArray<T> & array) const
ColumnPtr RangeDictionaryBlockInputStream<RangeType>::getColumnFromPODArray(const PaddedPODArray<T> & array) const
{
auto column_vector = ColumnVector<T>::create();
column_vector->getData().reserve(array.size());
for (T value : array)
column_vector->insertValue(value);
column_vector->getData().insert(array.begin(), array.end());
return column_vector;
}
template <typename DictionaryType, typename RangeType, typename Key>
template <typename DictionarySpecialAttributeType, typename T>
void RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::addSpecialColumn(
const std::optional<DictionarySpecialAttributeType> & attribute,
DataTypePtr type,
const std::string & default_name,
const std::unordered_set<std::string> & column_names_set,
const PaddedPODArray<T> & values,
ColumnsWithTypeAndName & columns,
bool force) const
{
std::string name = default_name;
if (attribute)
name = attribute->name;
if (force || column_names_set.find(name) != column_names_set.end())
columns.emplace_back(getColumnFromPODArray(values), type, name);
}
template <typename DictionaryType, typename RangeType, typename Key>
PaddedPODArray<Int64> RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::makeDateKey(
template <typename RangeType>
PaddedPODArray<Int64> RangeDictionaryBlockInputStream<RangeType>::makeDateKey(
const PaddedPODArray<RangeType> & block_start_dates, const PaddedPODArray<RangeType> & block_end_dates) const
{
PaddedPODArray<Int64> key(block_start_dates.size());
@ -150,8 +122,8 @@ PaddedPODArray<Int64> RangeDictionaryBlockInputStream<DictionaryType, RangeType,
}
template <typename DictionaryType, typename RangeType, typename Key>
Block RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::fillBlock(
template <typename RangeType>
Block RangeDictionaryBlockInputStream<RangeType>::fillBlock(
const PaddedPODArray<Key> & ids_to_fill,
const PaddedPODArray<RangeType> & block_start_dates,
const PaddedPODArray<RangeType> & block_end_dates) const
@ -159,20 +131,32 @@ Block RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::fillBlock
ColumnsWithTypeAndName columns;
const DictionaryStructure & structure = dictionary->getStructure();
std::unordered_set<std::string> names(column_names.begin(), column_names.end());
addSpecialColumn(structure.id, std::make_shared<DataTypeUInt64>(), "ID", names, ids_to_fill, columns, true);
auto ids_column = columns.back().column;
addSpecialColumn(structure.range_min, structure.range_max->type, "Range Start", names, block_start_dates, columns);
addSpecialColumn(structure.range_max, structure.range_max->type, "Range End", names, block_end_dates, columns);
auto ids_column = getColumnFromPODArray(ids_to_fill);
const std::string & id_column_name = structure.id->name;
if (column_names.find(id_column_name) != column_names.end())
columns.emplace_back(ids_column, std::make_shared<DataTypeUInt64>(), id_column_name);
auto date_key = makeDateKey(block_start_dates, block_end_dates);
auto date_column = getColumnFromPODArray(date_key);
const std::string & range_min_column_name = structure.range_min->name;
if (column_names.find(range_min_column_name) != column_names.end())
{
auto range_min_column = getColumnFromPODArray(block_start_dates);
columns.emplace_back(range_min_column, structure.range_max->type, range_min_column_name);
}
const std::string & range_max_column_name = structure.range_max->name;
if (column_names.find(range_max_column_name) != column_names.end())
{
auto range_max_column = getColumnFromPODArray(block_end_dates);
columns.emplace_back(range_max_column, structure.range_max->type, range_max_column_name);
}
for (const auto idx : ext::range(0, structure.attributes.size()))
{
const DictionaryAttribute & attribute = structure.attributes[idx];
if (names.find(attribute.name) != names.end())
if (column_names.find(attribute.name) != column_names.end())
{
ColumnPtr column = dictionary->getColumn(
attribute.name,

View File

@ -515,7 +515,9 @@ RangeHashedDictionary::getAttributeWithType(const std::string & attribute_name,
template <typename RangeType>
void RangeHashedDictionary::getIdsAndDates(
PaddedPODArray<Key> & ids, PaddedPODArray<RangeType> & start_dates, PaddedPODArray<RangeType> & end_dates) const
PaddedPODArray<Key> & ids,
PaddedPODArray<RangeType> & start_dates,
PaddedPODArray<RangeType> & end_dates) const
{
const auto & attribute = attributes.front();
@ -523,11 +525,9 @@ void RangeHashedDictionary::getIdsAndDates(
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ValueType = DictionaryValueType<AttributeType>;
if constexpr (std::is_same_v<AttributeType, String>)
getIdsAndDates<StringRef>(attribute, ids, start_dates, end_dates);
else
getIdsAndDates<AttributeType>(attribute, ids, start_dates, end_dates);
getIdsAndDates<ValueType>(attribute, ids, start_dates, end_dates);
};
callOnDictionaryAttributeType(attribute.type, type_call);
@ -571,13 +571,20 @@ BlockInputStreamPtr RangeHashedDictionary::getBlockInputStreamImpl(const Names &
PaddedPODArray<RangeType> end_dates;
getIdsAndDates(ids, start_dates, end_dates);
using BlockInputStreamType = RangeDictionaryBlockInputStream<RangeHashedDictionary, RangeType, Key>;
auto dict_ptr = std::static_pointer_cast<const RangeHashedDictionary>(shared_from_this());
return std::make_shared<BlockInputStreamType>(
dict_ptr, max_block_size, column_names, std::move(ids), std::move(start_dates), std::move(end_dates));
using BlockInputStreamType = RangeDictionaryBlockInputStream<RangeType>;
auto stream = std::make_shared<BlockInputStreamType>(
shared_from_this(),
max_block_size,
column_names,
std::move(ids),
std::move(start_dates),
std::move(end_dates));
return stream;
}
struct RangeHashedDIctionaryCallGetBlockInputStreamImpl
struct RangeHashedDictionaryCallGetBlockInputStreamImpl
{
BlockInputStreamPtr stream;
const RangeHashedDictionary * dict;
@ -597,7 +604,7 @@ BlockInputStreamPtr RangeHashedDictionary::getBlockInputStream(const Names & col
{
using ListType = TypeList<UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Int128, Float32, Float64>;
RangeHashedDIctionaryCallGetBlockInputStreamImpl callable;
RangeHashedDictionaryCallGetBlockInputStreamImpl callable;
callable.dict = this;
callable.column_names = &column_names;
callable.max_block_size = max_block_size;

View File

@ -93,8 +93,6 @@ private:
template <typename T>
using Ptr = std::unique_ptr<Collection<T>>;
using NullableSet = HashSet<Key, DefaultHash<Key>>;
struct Attribute final
{
public:
@ -187,7 +185,7 @@ private:
template <typename RangeType>
BlockInputStreamPtr getBlockInputStreamImpl(const Names & column_names, size_t max_block_size) const;
friend struct RangeHashedDIctionaryCallGetBlockInputStreamImpl;
friend struct RangeHashedDictionaryCallGetBlockInputStreamImpl;
const DictionaryStructure dict_struct;
const DictionarySourcePtr source_ptr;

View File

@ -11,9 +11,22 @@ dictHas
1
0
select columns from dictionary
allColumns
1 2019-05-05 2019-05-20 0.33
1 2019-05-21 2019-05-30 0.42
2 2019-05-21 2019-05-30 0.46
noColumns
1
1
1
onlySpecificColumns
1 2019-05-05 0.33
1 2019-05-21 0.42
2 2019-05-21 0.46
onlySpecificColumn
0.33
0.42
0.46
Dictionary nullable
dictGet
0.33
@ -27,6 +40,19 @@ dictHas
1
0
select columns from dictionary
allColumns
1 2019-05-05 2019-05-20 0.33
1 2019-05-21 2019-05-30 0.42
2 2019-05-21 2019-05-30 \N
noColumns
1
1
1
onlySpecificColumns
1 2019-05-05 0.33
1 2019-05-21 0.42
2 2019-05-21 \N
onlySpecificColumn
0.33
0.42
\N

View File

@ -42,7 +42,14 @@ SELECT dictHas('database_for_range_dict.range_dictionary', toUInt64(1), toDate('
SELECT dictHas('database_for_range_dict.range_dictionary', toUInt64(2), toDate('2019-05-29'));
SELECT dictHas('database_for_range_dict.range_dictionary', toUInt64(2), toDate('2019-05-31'));
SELECT 'select columns from dictionary';
SELECT 'allColumns';
SELECT * FROM database_for_range_dict.range_dictionary;
SELECT 'noColumns';
SELECT 1 FROM database_for_range_dict.range_dictionary;
SELECT 'onlySpecificColumns';
SELECT CountryID, StartDate, Tax FROM database_for_range_dict.range_dictionary;
SELECT 'onlySpecificColumn';
SELECT Tax FROM database_for_range_dict.range_dictionary;
DROP TABLE database_for_range_dict.date_table;
DROP DICTIONARY database_for_range_dict.range_dictionary;
@ -87,7 +94,14 @@ SELECT dictHas('database_for_range_dict.range_dictionary_nullable', toUInt64(1),
SELECT dictHas('database_for_range_dict.range_dictionary_nullable', toUInt64(2), toDate('2019-05-29'));
SELECT dictHas('database_for_range_dict.range_dictionary_nullable', toUInt64(2), toDate('2019-05-31'));
SELECT 'select columns from dictionary';
SELECT 'allColumns';
SELECT * FROM database_for_range_dict.range_dictionary_nullable;
SELECT 'noColumns';
SELECT 1 FROM database_for_range_dict.range_dictionary_nullable;
SELECT 'onlySpecificColumns';
SELECT CountryID, StartDate, Tax FROM database_for_range_dict.range_dictionary_nullable;
SELECT 'onlySpecificColumn';
SELECT Tax FROM database_for_range_dict.range_dictionary_nullable;
DROP TABLE database_for_range_dict.date_table;
DROP DICTIONARY database_for_range_dict.range_dictionary_nullable;