MergeTreeIndexFullText updated for data type map

This commit is contained in:
Maksim Kita 2021-09-21 16:43:33 +03:00
parent e3e3b84f63
commit 6dadc521fb
4 changed files with 85 additions and 48 deletions

View File

@ -165,16 +165,16 @@ void MergeTreeIndexAggregatorFullText::update(const Block & block, size_t * pos,
auto * column_map = assert_cast<ColumnMap *>(const_cast<IColumn *>(column.get()));
auto & column_array = assert_cast<ColumnArray &>(column_map->getNestedColumn());
auto & column_tuple = assert_cast<ColumnTuple &>(column_array.getData());
auto & column_key = assert_cast<ColumnString &>(column_tuple.getColumn(0));
auto & column_key = column_tuple.getColumn(0);
for (size_t i = 0; i < rows_read; ++i)
{
size_t element_start_row = *pos !=0 ? column_array.getOffsets()[*pos-1] : 0;
size_t element_start_row = column_array.getOffsets()[*pos - 1];
size_t elements_size = column_array.getOffsets()[*pos] - element_start_row;
for (size_t row_num = 0; row_num < elements_size; row_num++)
{
auto ref = column_key.getDataAt(element_start_row+row_num);
auto ref = column_key.getDataAt(element_start_row + row_num);
columnToBloomFilter(ref.data, ref.size, token_extractor, granule->bloom_filters[col]);
}
@ -355,16 +355,9 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
return rpn_stack[0].can_be_true;
}
bool MergeTreeConditionFullText::getKey(const ASTPtr & node, size_t & key_column_num)
bool MergeTreeConditionFullText::getKey(const std::string & key_column_name, size_t & key_column_num)
{
String column_name = node->getColumnName();
//try to get map column name in arrayElement function
if (const auto func = node.get()->as<ASTFunction>())
if (func->name == "arrayElement")
column_name = assert_cast<ASTIdentifier *>(func->arguments.get()->children[0].get())->name();
auto it = std::find(index_columns.begin(), index_columns.end(), column_name);
auto it = std::find(index_columns.begin(), index_columns.end(), key_column_name);
if (it == index_columns.end())
return false;
@ -392,16 +385,50 @@ bool MergeTreeConditionFullText::atomFromAST(
{
key_arg_pos = 0;
}
else if (KeyCondition::getConstant(args[1], block_with_constants, const_value, const_type) && getKey(args[0], key_column_num))
else if (KeyCondition::getConstant(args[1], block_with_constants, const_value, const_type) && getKey(args[0]->getColumnName(), key_column_num))
{
key_arg_pos = 0;
}
else if (KeyCondition::getConstant(args[0], block_with_constants, const_value, const_type) && getKey(args[1], key_column_num))
else if (KeyCondition::getConstant(args[0], block_with_constants, const_value, const_type) && getKey(args[1]->getColumnName(), key_column_num))
{
key_arg_pos = 1;
}
else if (const auto * index_function = args[0].get()->as<ASTFunction>())
{
if (index_function->name == "arrayElement")
{
auto column_name = assert_cast<ASTIdentifier *>(index_function->arguments.get()->children[0].get())->name();
if (!getKey(column_name, key_column_num))
return false;
key_arg_pos = 0;
auto & argument = index_function->arguments.get()->children[1];
if (const auto * literal = argument->as<ASTLiteral>())
{
const_value = literal->value;
if (const_value.getType() != Field::Types::String)
return false;
const_type = std::make_shared<DataTypeString>();
}
else
{
return false;
}
}
else
{
return false;
}
}
else
{
return false;
}
if (const_type && const_type->getTypeId() != TypeIndex::String
&& const_type->getTypeId() != TypeIndex::FixedString
@ -409,11 +436,7 @@ bool MergeTreeConditionFullText::atomFromAST(
{
return false;
}
//try to parse arrayElement function
if (const auto map_func = args[0].get()->as<ASTFunction>())
if (map_func->name == "arrayElement")
const_value = assert_cast<ASTIdentifier *>(map_func->arguments->children[1].get())->name();
if (key_arg_pos == 1 && (func_name != "equals" && func_name != "notEquals"))
return false;
else if (!token_extractor->supportLike() && (func_name == "like" || func_name == "notLike"))
@ -536,7 +559,7 @@ bool MergeTreeConditionFullText::tryPrepareSetBloomFilter(
for (size_t i = 0; i < tuple_elements.size(); ++i)
{
size_t key = 0;
if (getKey(tuple_elements[i], key))
if (getKey(tuple_elements[i]->getColumnName(), key))
{
key_tuple_mapping.emplace_back(i, key);
data_types.push_back(index_data_types[key]);
@ -546,7 +569,7 @@ bool MergeTreeConditionFullText::tryPrepareSetBloomFilter(
else
{
size_t key = 0;
if (getKey(left_arg, key))
if (getKey(left_arg->getColumnName(), key))
{
key_tuple_mapping.emplace_back(0, key);
data_types.push_back(index_data_types[key]);
@ -880,21 +903,17 @@ void bloomFilterIndexValidator(const IndexDescription & index, bool /*attach*/)
{
for (const auto & data_type : index.data_types)
{
if (data_type->getTypeId() != TypeIndex::String
&& data_type->getTypeId() != TypeIndex::FixedString)
{
DataTypePtr index_key_data_type = data_type;
if (data_type->getTypeId() != TypeIndex::Map)
throw Exception("Bloom filter index can be used only with `String`,`FixedString` or `Map` with key of String or fixedString type.", ErrorCodes::INCORRECT_QUERY);
if (data_type->getTypeId() == TypeIndex::Map)
{
DataTypeMap * map_type = assert_cast<DataTypeMap *>(const_cast<IDataType *>(data_type.get()));
index_key_data_type = map_type->getKeyType();
}
else
{
DataTypeMap * map_type = assert_cast<DataTypeMap *>(const_cast<IDataType *>(data_type.get()));
if (map_type->getKeyType()->getTypeId() != TypeIndex::String&& map_type->getKeyType()->getTypeId() != TypeIndex::FixedString)
throw Exception("Bloom filter index can be used only with `String`,`FixedString` or `Map` with key of String or fixedString type.", ErrorCodes::INCORRECT_QUERY);
}
}
if (index_key_data_type->getTypeId() != TypeIndex::String && index_key_data_type->getTypeId() != TypeIndex::FixedString)
throw Exception(ErrorCodes::INCORRECT_QUERY,
"Bloom filter index can be used only with `String`,`FixedString` or `Map` with key of `String` or `FixedString` type.");
}
if (index.type == NgramTokenExtractor::getName())

View File

@ -147,7 +147,7 @@ private:
bool atomFromAST(const ASTPtr & node, Block & block_with_constants, RPNElement & out);
bool getKey(const ASTPtr & node, size_t & key_column_num);
bool getKey(const std::string & key_column_name, size_t & key_column_num);
bool tryPrepareSetBloomFilter(const ASTs & args, RPNElement & out);
static bool createFunctionEqualsCondition(

View File

@ -1,2 +1,4 @@
3 {'K3':'V3'}
2 {'K2':'V2'}
1 {'K1':'V1'} {'K1':'V1'}
2 {'K2':'V2'} {'K2':'V2'}
1 {'K1':'V1'} {'K1':'V1'}
2 {'K2':'V2'} {'K2':'V2'}

View File

@ -1,16 +1,32 @@
CREATE DATABASE IF NOT EXISTS test;
DROP TABLE IF EXISTS bf_tokenbf_map_test;
DROP TABLE IF EXISTS bf_ngram_map_test;
DROP TABLE IF EXISTS test.bf_tokenbf_map_test;
DROP TABLE IF EXISTS test.bf_ngram_map_test;
CREATE TABLE bf_tokenbf_map_test
(
row_id UInt32,
map Map(String, String),
map_fixed Map(FixedString(2), String),
INDEX map_tokenbf map TYPE tokenbf_v1(256,2,0) GRANULARITY 1,
INDEX map_fixed_tokenbf map_fixed TYPE tokenbf_v1(256,2,0) GRANULARITY 1
) Engine=MergeTree() ORDER BY row_id SETTINGS index_granularity = 2;
CREATE TABLE test.bf_tokenbf_map_test (row_id UInt32, map Map(String, String), INDEX map_tokenbf map TYPE tokenbf_v1(256,2,0) GRANULARITY 1) Engine=MergeTree() ORDER BY row_id settings index_granularity = 2;
CREATE TABLE test.bf_ngram_map_test (row_id UInt32, map Map(String, String), INDEX map_tokenbf map TYPE ngrambf_v1(4,256,2,0) GRANULARITY 1) Engine=MergeTree() ORDER BY row_id settings index_granularity = 2;
CREATE TABLE bf_ngram_map_test
(
row_id UInt32,
map Map(String, String),
map_fixed Map(FixedString(2), String),
INDEX map_ngram map TYPE ngrambf_v1(4,256,2,0) GRANULARITY 1,
INDEX map_fixed_ngram map_fixed TYPE ngrambf_v1(4,256,2,0) GRANULARITY 1
) Engine=MergeTree() ORDER BY row_id SETTINGS index_granularity = 2;
INSERT INTO test.bf_tokenbf_map_test VALUES (1, {'K1':'V1'}),(2,{'K2':'V2'}),(3,{'K3':'V3'}),(4,{'K4':'V4'});
INSERT INTO test.bf_ngram_map_test VALUES (1, {'K1':'V1'}),(2,{'K2':'V2'}),(3,{'K3':'V3'}),(4,{'K4':'V4'});
INSERT INTO bf_tokenbf_map_test VALUES (1, {'K1':'V1'}, {'K1':'V1'}), (2, {'K2':'V2'}, {'K2':'V2'});
INSERT INTO bf_ngram_map_test VALUES (1, {'K1':'V1'}, {'K1':'V1'}), (2, {'K2':'V2'}, {'K2':'V2'});
SELECT * FROM test.bf_tokenbf_map_test WHERE map['K3']='V3';
SELECT * FROM test.bf_tokenbf_map_test WHERE map['K2']='V2';
SELECT * FROM bf_tokenbf_map_test WHERE map['K1']='V1';
SELECT * FROM bf_ngram_map_test WHERE map['K2']='V2';
DROP TABLE test.bf_tokenbf_map_test;
DROP TABLE test.bf_ngram_map_test;
SELECT * FROM bf_tokenbf_map_test WHERE map_fixed['K1']='V1';
SELECT * FROM bf_ngram_map_test WHERE map_fixed['K2']='V2';
DROP TABLE bf_tokenbf_map_test;
DROP TABLE bf_ngram_map_test;