add bloom filter skip index for map data type.

This commit is contained in:
fuwhu 2021-09-06 10:27:28 +08:00 committed by Maksim Kita
parent e60b863898
commit 8962672e59
3 changed files with 58 additions and 0 deletions

View File

@ -1,9 +1,11 @@
#include <Interpreters/BloomFilter.h>
#include <city.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnMap.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnLowCardinality.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeLowCardinality.h>
@ -124,6 +126,14 @@ DataTypePtr BloomFilter::getPrimitiveType(const DataTypePtr & data_type)
throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::BAD_ARGUMENTS);
}
if (const auto * map_type = typeid_cast<const DataTypeMap *>(data_type.get()))
{
if (!typeid_cast<const DataTypeMap *>(map_type->getKeyType().get()))
return getPrimitiveType(map_type->getKeyType());
else
throw Exception("Unexpected key type " + data_type->getName() + " of bloom filter index for map.", ErrorCodes::BAD_ARGUMENTS);
}
if (const auto * nullable_type = typeid_cast<const DataTypeNullable *>(data_type.get()))
return getPrimitiveType(nullable_type->getNestedType());
@ -138,6 +148,9 @@ ColumnPtr BloomFilter::getPrimitiveColumn(const ColumnPtr & column)
if (const auto * array_col = typeid_cast<const ColumnArray *>(column.get()))
return getPrimitiveColumn(array_col->getDataPtr());
if (const auto * map_col = typeid_cast<const ColumnMap *>(column.get()))
return getPrimitiveColumn(map_col->getNestedData().getColumnPtr(0));
if (const auto * nullable_col = typeid_cast<const ColumnNullable *>(column.get()))
return getPrimitiveColumn(nullable_col->getNestedColumnPtr());

View File

@ -5,6 +5,7 @@
#include <Columns/IColumn.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnMap.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnString.h>
@ -123,6 +124,26 @@ struct BloomFilterHash
}
}
if (which.isMap())
{
const auto * map_col = typeid_cast<const ColumnMap *>(column.get());
const auto & keys_data = map_col->getNestedData().getColumn(0);
if (checkAndGetColumn<ColumnNullable>(keys_data))
throw Exception("Unexpected key type " + data_type->getName() + " of bloom filter index for map.", ErrorCodes::BAD_ARGUMENTS);
const auto & offsets = map_col->getNestedColumn().getOffsets();
limit = offsets[pos + limit - 1] - offsets[pos - 1];
pos = offsets[pos - 1];
if (limit == 0)
{
auto index_column = ColumnUInt64::create(1);
ColumnUInt64::Container & index_column_vec = index_column->getData();
index_column_vec[0] = 0;
return index_column;
}
}
const ColumnPtr actual_col = BloomFilter::getPrimitiveColumn(column);
const DataTypePtr actual_type = BloomFilter::getPrimitiveType(data_type);

View File

@ -1,6 +1,7 @@
#include <Common/HashTable/ClearableHashMap.h>
#include <Common/FieldVisitorsAccurateComparison.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeTuple.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnTuple.h>
@ -509,6 +510,29 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
return match_with_subtype;
}
if (function->name == "arrayElement")
{
auto & col_name = assert_cast<ASTIdentifier *>(function->arguments.get()->children[0].get())->name();
if (header.has(col_name))
{
size_t position = header.getPositionByName(col_name);
const DataTypePtr & index_type = header.getByPosition(position).type;
const auto * map_type = typeid_cast<const DataTypeMap *>(index_type.get());
if (map_type)
{
out.function = function_name == "equals" ? RPNElement::FUNCTION_EQUALS : RPNElement::FUNCTION_NOT_EQUALS;
const DataTypePtr actual_type = BloomFilter::getPrimitiveType(index_type);
/// TODO :: Here, we assume the second argument of arrayElement is const string, need to support column and other data types.
auto & element_key = assert_cast<ASTIdentifier *>(function->arguments.get()->children[1].get())->name();
Field element_key_field = element_key;
out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(actual_type.get(), element_key_field)));
return true;
}
}
}
}
return false;