Implemented isIn methods for cache dictionaries [#CLICKHOUSE-2144].

This commit is contained in:
Alexey Milovidov 2017-03-26 02:42:04 +03:00
parent f5434cd2ae
commit d4992da546
3 changed files with 121 additions and 11 deletions

View File

@ -93,9 +93,9 @@ public:
void toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const override;
/* void isInVectorVector(const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
void isInVectorVector(const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override;
void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override; */
void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
#define DECLARE(TYPE)\
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, PaddedPODArray<TYPE> & out) const;
@ -242,6 +242,12 @@ private:
FindResult findCellIdx(const Key & id, const CellMetadata::time_point_t now) const;
template <typename AncestorType>
void isInImpl(
const PaddedPODArray<Key> & child_ids,
const AncestorType & ancestor_ids,
PaddedPODArray<UInt8> & out) const;
const std::string name;
const DictionaryStructure dict_struct;
const DictionarySourcePtr source_ptr;

View File

@ -1338,6 +1338,8 @@ using FunctionDictGetDateOrDefault = FunctionDictGetOrDefault<DataTypeDate>;
using FunctionDictGetDateTimeOrDefault = FunctionDictGetOrDefault<DataTypeDateTime>;
/// Functions to work with hierarchies.
class FunctionDictGetHierarchy final : public IFunction
{
public:
@ -1409,7 +1411,8 @@ private:
"Dictionary does not have a hierarchy",
ErrorCodes::UNSUPPORTED_METHOD};
const auto get_hierarchies = [&] (const PaddedPODArray<UInt64> & in, PaddedPODArray<UInt64> & out, PaddedPODArray<UInt64> & offsets) {
const auto get_hierarchies = [&] (const PaddedPODArray<UInt64> & in, PaddedPODArray<UInt64> & out, PaddedPODArray<UInt64> & offsets)
{
const auto size = in.size();
/// copy of `in` array

View File

@ -61,20 +61,121 @@ void CacheDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<K
}
/*void CacheDictionary::isIn(
/// Allow to use single value in same way as array.
static inline CacheDictionary::Key getAt(const PaddedPODArray<CacheDictionary::Key> & arr, const size_t idx) { return arr[idx]; }
static inline CacheDictionary::Key getAt(const CacheDictionary::Key & value, const size_t idx) { return value; }
template <typename AncestorType>
void CacheDictionary::isInImpl(
const PaddedPODArray<Key> & child_ids,
const AncestorType & ancestor_ids,
PaddedPODArray<UInt8> & out) const
{
/// Transform all children to parents until ancestor id or null_value will be reached.
size_t size = out.size();
memset(out.data(), 0xFF, size); /// 0xFF means "not calculated"
const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
const PaddedPODArray<Key> * current_children = &child_ids;
PaddedPODArray<Key> children(size);
PaddedPODArray<Key> parents(size);
while (true)
{
toParent(*current_children, parents);
size_t out_idx = 0;
size_t parents_idx = 0;
size_t new_children_idx = 0;
while (out_idx < size)
{
/// Already calculated
if (out[out_idx] != 0xFF)
{
++out_idx;
continue;
}
/// No parent
if (parents[parents_idx] == null_value)
{
out[out_idx] = 0;
}
/// Found ancestor
else if (parents[parents_idx] == getAt(ancestor_ids, parents_idx))
{
out[out_idx] = 1;
}
/// Found intermediate parent, add this value to search at next loop iteration
else
{
children[new_children_idx] = parents[parents_idx];
++new_children_idx;
}
++out_idx;
++parents_idx;
}
if (new_children_idx == 0)
break;
/// Will process new children at next loop iteration.
children.resize(new_children_idx);
parents.resize(new_children_idx);
current_children = &children;
}
}
void CacheDictionary::isInVectorVector(
const PaddedPODArray<Key> & child_ids,
const PaddedPODArray<Key> & ancestor_ids,
PaddedPODArray<UInt8> & out) const
{
memset(out.data(), 0, out.size() * sizeof(out[0]));
isInImpl(child_ids, ancestor_ids, out);
}
const PaddedPODArray<Key> * current_child_ids = &child_ids;
PaddedPODArray<Key> child_ids_buffer;
PaddedPODArray<Key> parents(out.size());
void CacheDictionary::isInVectorConstant(
const PaddedPODArray<Key> & child_ids,
const Key ancestor_id,
PaddedPODArray<UInt8> & out) const
{
isInImpl(child_ids, ancestor_id, out);
}
toParent(*current_child_ids, parents);
for (size_t i = 0,)
}*/
void CacheDictionary::isInConstantVector(
const Key child_id,
const PaddedPODArray<Key> & ancestor_ids,
PaddedPODArray<UInt8> & out) const
{
/// Special case with single child value.
const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
PaddedPODArray<Key> child(1, child_id);
PaddedPODArray<Key> parent(1);
std::vector<Key> ancestors(1, child_id);
/// Iteratively find all ancestors for child.
while (true)
{
toParent(child, parent);
if (parent[0] == null_value)
break;
child[0] = parent[0];
ancestors.push_back(parent[0]);
}
/// Assuming short hierarchy, so linear search is Ok.
for (size_t i = 0, size = out.size(); i < size; ++i)
out[i] = std::find(ancestors.begin(), ancestors.end(), ancestor_ids[i]) != ancestors.end();
}
#define DECLARE(TYPE)\