MergeTreeIndexFullText updated for data type map

This commit is contained in:
Maksim Kita 2021-09-21 16:43:33 +03:00
parent e3e3b84f63
commit 6dadc521fb
4 changed files with 85 additions and 48 deletions

View File

@ -165,16 +165,16 @@ void MergeTreeIndexAggregatorFullText::update(const Block & block, size_t * pos,
auto * column_map = assert_cast<ColumnMap *>(const_cast<IColumn *>(column.get())); auto * column_map = assert_cast<ColumnMap *>(const_cast<IColumn *>(column.get()));
auto & column_array = assert_cast<ColumnArray &>(column_map->getNestedColumn()); auto & column_array = assert_cast<ColumnArray &>(column_map->getNestedColumn());
auto & column_tuple = assert_cast<ColumnTuple &>(column_array.getData()); auto & column_tuple = assert_cast<ColumnTuple &>(column_array.getData());
auto & column_key = assert_cast<ColumnString &>(column_tuple.getColumn(0)); auto & column_key = column_tuple.getColumn(0);
for (size_t i = 0; i < rows_read; ++i) for (size_t i = 0; i < rows_read; ++i)
{ {
size_t element_start_row = *pos !=0 ? column_array.getOffsets()[*pos-1] : 0; size_t element_start_row = column_array.getOffsets()[*pos - 1];
size_t elements_size = column_array.getOffsets()[*pos] - element_start_row; size_t elements_size = column_array.getOffsets()[*pos] - element_start_row;
for (size_t row_num = 0; row_num < elements_size; row_num++) for (size_t row_num = 0; row_num < elements_size; row_num++)
{ {
auto ref = column_key.getDataAt(element_start_row+row_num); auto ref = column_key.getDataAt(element_start_row + row_num);
columnToBloomFilter(ref.data, ref.size, token_extractor, granule->bloom_filters[col]); columnToBloomFilter(ref.data, ref.size, token_extractor, granule->bloom_filters[col]);
} }
@ -355,16 +355,9 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
return rpn_stack[0].can_be_true; return rpn_stack[0].can_be_true;
} }
bool MergeTreeConditionFullText::getKey(const ASTPtr & node, size_t & key_column_num) bool MergeTreeConditionFullText::getKey(const std::string & key_column_name, size_t & key_column_num)
{ {
String column_name = node->getColumnName(); auto it = std::find(index_columns.begin(), index_columns.end(), key_column_name);
//try to get map column name in arrayElement function
if (const auto func = node.get()->as<ASTFunction>())
if (func->name == "arrayElement")
column_name = assert_cast<ASTIdentifier *>(func->arguments.get()->children[0].get())->name();
auto it = std::find(index_columns.begin(), index_columns.end(), column_name);
if (it == index_columns.end()) if (it == index_columns.end())
return false; return false;
@ -392,16 +385,50 @@ bool MergeTreeConditionFullText::atomFromAST(
{ {
key_arg_pos = 0; key_arg_pos = 0;
} }
else if (KeyCondition::getConstant(args[1], block_with_constants, const_value, const_type) && getKey(args[0], key_column_num)) else if (KeyCondition::getConstant(args[1], block_with_constants, const_value, const_type) && getKey(args[0]->getColumnName(), key_column_num))
{ {
key_arg_pos = 0; key_arg_pos = 0;
} }
else if (KeyCondition::getConstant(args[0], block_with_constants, const_value, const_type) && getKey(args[1], key_column_num)) else if (KeyCondition::getConstant(args[0], block_with_constants, const_value, const_type) && getKey(args[1]->getColumnName(), key_column_num))
{ {
key_arg_pos = 1; key_arg_pos = 1;
} }
else if (const auto * index_function = args[0].get()->as<ASTFunction>())
{
if (index_function->name == "arrayElement")
{
auto column_name = assert_cast<ASTIdentifier *>(index_function->arguments.get()->children[0].get())->name();
if (!getKey(column_name, key_column_num))
return false;
key_arg_pos = 0;
auto & argument = index_function->arguments.get()->children[1];
if (const auto * literal = argument->as<ASTLiteral>())
{
const_value = literal->value;
if (const_value.getType() != Field::Types::String)
return false;
const_type = std::make_shared<DataTypeString>();
}
else
{
return false;
}
}
else
{
return false;
}
}
else else
{
return false; return false;
}
if (const_type && const_type->getTypeId() != TypeIndex::String if (const_type && const_type->getTypeId() != TypeIndex::String
&& const_type->getTypeId() != TypeIndex::FixedString && const_type->getTypeId() != TypeIndex::FixedString
@ -409,11 +436,7 @@ bool MergeTreeConditionFullText::atomFromAST(
{ {
return false; return false;
} }
//try to parse arrayElement function
if (const auto map_func = args[0].get()->as<ASTFunction>())
if (map_func->name == "arrayElement")
const_value = assert_cast<ASTIdentifier *>(map_func->arguments->children[1].get())->name();
if (key_arg_pos == 1 && (func_name != "equals" && func_name != "notEquals")) if (key_arg_pos == 1 && (func_name != "equals" && func_name != "notEquals"))
return false; return false;
else if (!token_extractor->supportLike() && (func_name == "like" || func_name == "notLike")) else if (!token_extractor->supportLike() && (func_name == "like" || func_name == "notLike"))
@ -536,7 +559,7 @@ bool MergeTreeConditionFullText::tryPrepareSetBloomFilter(
for (size_t i = 0; i < tuple_elements.size(); ++i) for (size_t i = 0; i < tuple_elements.size(); ++i)
{ {
size_t key = 0; size_t key = 0;
if (getKey(tuple_elements[i], key)) if (getKey(tuple_elements[i]->getColumnName(), key))
{ {
key_tuple_mapping.emplace_back(i, key); key_tuple_mapping.emplace_back(i, key);
data_types.push_back(index_data_types[key]); data_types.push_back(index_data_types[key]);
@ -546,7 +569,7 @@ bool MergeTreeConditionFullText::tryPrepareSetBloomFilter(
else else
{ {
size_t key = 0; size_t key = 0;
if (getKey(left_arg, key)) if (getKey(left_arg->getColumnName(), key))
{ {
key_tuple_mapping.emplace_back(0, key); key_tuple_mapping.emplace_back(0, key);
data_types.push_back(index_data_types[key]); data_types.push_back(index_data_types[key]);
@ -880,21 +903,17 @@ void bloomFilterIndexValidator(const IndexDescription & index, bool /*attach*/)
{ {
for (const auto & data_type : index.data_types) for (const auto & data_type : index.data_types)
{ {
if (data_type->getTypeId() != TypeIndex::String DataTypePtr index_key_data_type = data_type;
&& data_type->getTypeId() != TypeIndex::FixedString)
{
if (data_type->getTypeId() != TypeIndex::Map) if (data_type->getTypeId() == TypeIndex::Map)
throw Exception("Bloom filter index can be used only with `String`,`FixedString` or `Map` with key of String or fixedString type.", ErrorCodes::INCORRECT_QUERY); {
DataTypeMap * map_type = assert_cast<DataTypeMap *>(const_cast<IDataType *>(data_type.get()));
index_key_data_type = map_type->getKeyType();
}
else if (index_key_data_type->getTypeId() != TypeIndex::String && index_key_data_type->getTypeId() != TypeIndex::FixedString)
{ throw Exception(ErrorCodes::INCORRECT_QUERY,
DataTypeMap * map_type = assert_cast<DataTypeMap *>(const_cast<IDataType *>(data_type.get())); "Bloom filter index can be used only with `String`,`FixedString` or `Map` with key of `String` or `FixedString` type.");
if (map_type->getKeyType()->getTypeId() != TypeIndex::String&& map_type->getKeyType()->getTypeId() != TypeIndex::FixedString)
throw Exception("Bloom filter index can be used only with `String`,`FixedString` or `Map` with key of String or fixedString type.", ErrorCodes::INCORRECT_QUERY);
}
}
} }
if (index.type == NgramTokenExtractor::getName()) if (index.type == NgramTokenExtractor::getName())

View File

@ -147,7 +147,7 @@ private:
bool atomFromAST(const ASTPtr & node, Block & block_with_constants, RPNElement & out); bool atomFromAST(const ASTPtr & node, Block & block_with_constants, RPNElement & out);
bool getKey(const ASTPtr & node, size_t & key_column_num); bool getKey(const std::string & key_column_name, size_t & key_column_num);
bool tryPrepareSetBloomFilter(const ASTs & args, RPNElement & out); bool tryPrepareSetBloomFilter(const ASTs & args, RPNElement & out);
static bool createFunctionEqualsCondition( static bool createFunctionEqualsCondition(

View File

@ -1,2 +1,4 @@
3 {'K3':'V3'} 1 {'K1':'V1'} {'K1':'V1'}
2 {'K2':'V2'} 2 {'K2':'V2'} {'K2':'V2'}
1 {'K1':'V1'} {'K1':'V1'}
2 {'K2':'V2'} {'K2':'V2'}

View File

@ -1,16 +1,32 @@
CREATE DATABASE IF NOT EXISTS test; DROP TABLE IF EXISTS bf_tokenbf_map_test;
DROP TABLE IF EXISTS bf_ngram_map_test;
DROP TABLE IF EXISTS test.bf_tokenbf_map_test; CREATE TABLE bf_tokenbf_map_test
DROP TABLE IF EXISTS test.bf_ngram_map_test; (
row_id UInt32,
map Map(String, String),
map_fixed Map(FixedString(2), String),
INDEX map_tokenbf map TYPE tokenbf_v1(256,2,0) GRANULARITY 1,
INDEX map_fixed_tokenbf map_fixed TYPE tokenbf_v1(256,2,0) GRANULARITY 1
) Engine=MergeTree() ORDER BY row_id SETTINGS index_granularity = 2;
CREATE TABLE test.bf_tokenbf_map_test (row_id UInt32, map Map(String, String), INDEX map_tokenbf map TYPE tokenbf_v1(256,2,0) GRANULARITY 1) Engine=MergeTree() ORDER BY row_id settings index_granularity = 2; CREATE TABLE bf_ngram_map_test
CREATE TABLE test.bf_ngram_map_test (row_id UInt32, map Map(String, String), INDEX map_tokenbf map TYPE ngrambf_v1(4,256,2,0) GRANULARITY 1) Engine=MergeTree() ORDER BY row_id settings index_granularity = 2; (
row_id UInt32,
map Map(String, String),
map_fixed Map(FixedString(2), String),
INDEX map_ngram map TYPE ngrambf_v1(4,256,2,0) GRANULARITY 1,
INDEX map_fixed_ngram map_fixed TYPE ngrambf_v1(4,256,2,0) GRANULARITY 1
) Engine=MergeTree() ORDER BY row_id SETTINGS index_granularity = 2;
INSERT INTO test.bf_tokenbf_map_test VALUES (1, {'K1':'V1'}),(2,{'K2':'V2'}),(3,{'K3':'V3'}),(4,{'K4':'V4'}); INSERT INTO bf_tokenbf_map_test VALUES (1, {'K1':'V1'}, {'K1':'V1'}), (2, {'K2':'V2'}, {'K2':'V2'});
INSERT INTO test.bf_ngram_map_test VALUES (1, {'K1':'V1'}),(2,{'K2':'V2'}),(3,{'K3':'V3'}),(4,{'K4':'V4'}); INSERT INTO bf_ngram_map_test VALUES (1, {'K1':'V1'}, {'K1':'V1'}), (2, {'K2':'V2'}, {'K2':'V2'});
SELECT * FROM test.bf_tokenbf_map_test WHERE map['K3']='V3'; SELECT * FROM bf_tokenbf_map_test WHERE map['K1']='V1';
SELECT * FROM test.bf_tokenbf_map_test WHERE map['K2']='V2'; SELECT * FROM bf_ngram_map_test WHERE map['K2']='V2';
DROP TABLE test.bf_tokenbf_map_test; SELECT * FROM bf_tokenbf_map_test WHERE map_fixed['K1']='V1';
DROP TABLE test.bf_ngram_map_test; SELECT * FROM bf_ngram_map_test WHERE map_fixed['K2']='V2';
DROP TABLE bf_tokenbf_map_test;
DROP TABLE bf_ngram_map_test;