#pragma once #include #include #include #include #include #include #include #include #include #include #include namespace DB { /* * BlockInputStream implementation for external dictionaries * read() returns single block consisting of the in-memory contents of the dictionaries */ template class RangeDictionaryBlockInputStream : public DictionaryBlockInputStreamBase { public: using DictionatyPtr = std::shared_ptr; RangeDictionaryBlockInputStream( DictionatyPtr dictionary, size_t max_block_size, const Names & column_names, PaddedPODArray && ids, PaddedPODArray && start_dates, PaddedPODArray && end_dates); String getName() const override { return "RangeDictionaryBlockInputStream"; } protected: Block getBlock(size_t start, size_t length) const override; private: template using DictionaryGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray &, const PaddedPODArray &, PaddedPODArray &) const; template ColumnPtr getColumnFromAttribute(DictionaryGetter getter, const PaddedPODArray & ids, const PaddedPODArray & dates, const DictionaryAttribute& attribute, const DictionaryType& dictionary) const; ColumnPtr getColumnFromAttributeString(const PaddedPODArray & ids, const PaddedPODArray & dates, const DictionaryAttribute& attribute, const DictionaryType& dictionary) const; template ColumnPtr getColumnFromPODArray(const PaddedPODArray & array) const; template void addSpecialColumn( const std::optional & attribute, DataTypePtr type, const std::string & default_name, const std::unordered_set & column_names, const PaddedPODArray & values, ColumnsWithTypeAndName& columns) const; Block fillBlock(const PaddedPODArray & ids, const PaddedPODArray & start_dates, const PaddedPODArray & end_dates) const; PaddedPODArray makeDateKey( const PaddedPODArray & start_dates, const PaddedPODArray & end_dates) const; DictionatyPtr dictionary; Names column_names; PaddedPODArray ids; PaddedPODArray start_dates; PaddedPODArray end_dates; }; template RangeDictionaryBlockInputStream::RangeDictionaryBlockInputStream( DictionatyPtr dictionary, size_t max_column_size, const Names & column_names, PaddedPODArray && ids, PaddedPODArray && start_dates, PaddedPODArray && end_dates) : DictionaryBlockInputStreamBase(ids.size(), max_column_size), dictionary(dictionary), column_names(column_names), ids(std::move(ids)), start_dates(std::move(start_dates)), end_dates(std::move(end_dates)) { } template Block RangeDictionaryBlockInputStream::getBlock(size_t start, size_t length) const { PaddedPODArray block_ids; PaddedPODArray block_start_dates; PaddedPODArray block_end_dates; block_ids.reserve(length); block_start_dates.reserve(length); block_end_dates.reserve(length); for (auto idx : ext::range(start, start + length)) { block_ids.push_back(ids[idx]); block_start_dates.push_back(start_dates[idx]); block_end_dates.push_back(end_dates[idx]); } return fillBlock(block_ids, block_start_dates, block_end_dates); } template template ColumnPtr RangeDictionaryBlockInputStream::getColumnFromAttribute( DictionaryGetter getter, const PaddedPODArray & ids, const PaddedPODArray & dates, const DictionaryAttribute& attribute, const DictionaryType& dictionary) const { auto column_vector = ColumnVector::create(ids.size()); (dictionary.*getter)(attribute.name, ids, dates, column_vector->getData()); return std::move(column_vector); } template ColumnPtr RangeDictionaryBlockInputStream::getColumnFromAttributeString( const PaddedPODArray & ids, const PaddedPODArray & dates, const DictionaryAttribute& attribute, const DictionaryType& dictionary) const { auto column_string = ColumnString::create(); dictionary.getString(attribute.name, ids, dates, column_string.get()); return std::move(column_string); } template template ColumnPtr RangeDictionaryBlockInputStream::getColumnFromPODArray(const PaddedPODArray & array) const { auto column_vector = ColumnVector::create(); column_vector->getData().reserve(array.size()); for (T value : array) column_vector->insert(value); return std::move(column_vector); } template template void RangeDictionaryBlockInputStream::addSpecialColumn( const std::optional & attribute, DataTypePtr type, const std::string& default_name, const std::unordered_set & column_names, const PaddedPODArray & values, ColumnsWithTypeAndName & columns) const { std::string name = default_name; if (attribute) name = attribute->name; if (column_names.find(name) != column_names.end()) columns.emplace_back(getColumnFromPODArray(values), type, name); } template PaddedPODArray RangeDictionaryBlockInputStream::makeDateKey( const PaddedPODArray & start_dates, const PaddedPODArray & end_dates) const { PaddedPODArray key(start_dates.size()); for (size_t i = 0; i < key.size(); ++i) { if (RangeHashedDictionary::Range::isCorrectDate(start_dates[i])) key[i] = start_dates[i]; else key[i] = end_dates[i]; } return key; } template Block RangeDictionaryBlockInputStream::fillBlock( const PaddedPODArray & ids, const PaddedPODArray & start_dates, const PaddedPODArray & end_dates) const { ColumnsWithTypeAndName columns; const DictionaryStructure& structure = dictionary->getStructure(); std::unordered_set names(column_names.begin(), column_names.end()); addSpecialColumn(structure.id, std::make_shared(), "ID", names, ids, columns); addSpecialColumn(structure.range_min, std::make_shared(), "Range Start", names, start_dates, columns); addSpecialColumn(structure.range_max, std::make_shared(), "Range End", names, end_dates, columns); auto date_key = makeDateKey(start_dates, end_dates); for (const auto idx : ext::range(0, structure.attributes.size())) { const DictionaryAttribute& attribute = structure.attributes[idx]; if (names.find(attribute.name) != names.end()) { ColumnPtr column; #define GET_COLUMN_FORM_ATTRIBUTE(TYPE)\ column = getColumnFromAttribute(&DictionaryType::get##TYPE, ids, date_key, attribute, *dictionary) switch (attribute.underlying_type) { case AttributeUnderlyingType::UInt8: GET_COLUMN_FORM_ATTRIBUTE(UInt8); break; case AttributeUnderlyingType::UInt16: GET_COLUMN_FORM_ATTRIBUTE(UInt16); break; case AttributeUnderlyingType::UInt32: GET_COLUMN_FORM_ATTRIBUTE(UInt32); break; case AttributeUnderlyingType::UInt64: GET_COLUMN_FORM_ATTRIBUTE(UInt64); break; case AttributeUnderlyingType::UInt128: GET_COLUMN_FORM_ATTRIBUTE(UInt128); break; case AttributeUnderlyingType::Int8: GET_COLUMN_FORM_ATTRIBUTE(Int8); break; case AttributeUnderlyingType::Int16: GET_COLUMN_FORM_ATTRIBUTE(Int16); break; case AttributeUnderlyingType::Int32: GET_COLUMN_FORM_ATTRIBUTE(Int32); break; case AttributeUnderlyingType::Int64: GET_COLUMN_FORM_ATTRIBUTE(Int64); break; case AttributeUnderlyingType::Float32: GET_COLUMN_FORM_ATTRIBUTE(Float32); break; case AttributeUnderlyingType::Float64: GET_COLUMN_FORM_ATTRIBUTE(Float64); break; case AttributeUnderlyingType::String: column = getColumnFromAttributeString(ids, date_key, attribute, *dictionary); break; } columns.emplace_back(column, attribute.type, attribute.name); } } return Block(columns); } }