convert type with condition

This commit is contained in:
zhang2014 2019-06-19 18:50:37 +08:00
parent 6c8ff6dc31
commit d145295163
5 changed files with 84 additions and 22 deletions

View File

@ -30,10 +30,12 @@ struct BloomFilterHash
{
WhichDataType which(data_type);
if (which.isUInt())
if (which.isUInt() || which.isDateOrDateTime())
return ColumnConst::create(ColumnUInt64::create(1, intHash64(field.safeGet<UInt64>())), 1);
else if (which.isInt())
else if (which.isInt() || which.isEnum())
return ColumnConst::create(ColumnUInt64::create(1, intHash64(ext::bit_cast<UInt64>(field.safeGet<Int64>()))), 1);
else if (which.isFloat())
return ColumnConst::create(ColumnUInt64::create(1, intHash64(ext::bit_cast<UInt64>(field.safeGet<Float64>()))), 1);
else if (which.isString() || which.isFixedString())
{
const auto & value = field.safeGet<String>();

View File

@ -36,15 +36,18 @@ MergeTreeIndexGranulePtr MergeTreeIndexBloomFilter::createIndexGranule() const
bool MergeTreeIndexBloomFilter::mayBenefitFromIndexForIn(const ASTPtr & node) const
{
const String column_name = node->getColumnName();
const String & column_name = node->getColumnName();
for (const auto & name : columns)
if (column_name == name)
return true;
if (const auto * func = typeid_cast<const ASTFunction *>(node.get()))
if (func->arguments->children.size() == 1)
return mayBenefitFromIndexForIn(func->arguments->children.front());
{
for (const auto & children : func->arguments->children)
if (mayBenefitFromIndexForIn(children))
return true;
}
return false;
}
@ -59,6 +62,24 @@ IndexConditionPtr MergeTreeIndexBloomFilter::createIndexCondition(const SelectQu
return std::make_shared<MergeTreeIndexConditionBloomFilter>(query_info, context, header, hash_functions);
}
static void assertIndexColumnsType(const Block &header)
{
if (!header || !header.columns())
throw Exception("Index must have columns.", ErrorCodes::INCORRECT_QUERY);
const DataTypes & columns_data_types = header.getDataTypes();
for (size_t index = 0; index < columns_data_types.size(); ++index)
{
WhichDataType which(columns_data_types[index]);
if (!which.isUInt() && !which.isInt() && !which.isString() && !which.isFixedString() && !which.isFloat() &&
!which.isDateOrDateTime() && !which.isEnum())
throw Exception("Unexpected type " + columns_data_types[index]->getName() + " of bloom filter index.",
ErrorCodes::ILLEGAL_COLUMN);
}
}
std::unique_ptr<IMergeTreeIndex> bloomFilterIndexCreatorNew(const NamesAndTypesList & columns, std::shared_ptr<ASTIndexDeclaration> node, const Context & context)
{
if (node->name.empty())
@ -70,8 +91,7 @@ std::unique_ptr<IMergeTreeIndex> bloomFilterIndexCreatorNew(const NamesAndTypesL
auto index_expr = ExpressionAnalyzer(expr_list, syntax, context).getActions(false);
auto index_sample = ExpressionAnalyzer(expr_list, syntax, context).getActions(true)->getSampleBlock();
if (!index_sample || !index_sample.columns())
throw Exception("Index must have columns.", ErrorCodes::INCORRECT_QUERY);
assertIndexColumnsType(index_sample);
double max_conflict_probability = 0.025;
if (node->type->arguments && !node->type->arguments->children.empty())

View File

@ -10,7 +10,8 @@
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTIdentifier.h>
#include <Columns/ColumnTuple.h>
#include "MergeTreeIndexConditionBloomFilter.h"
#include <Interpreters/castColumn.h>
#include <Interpreters/convertFieldToType.h>
namespace DB
@ -76,7 +77,7 @@ bool maybeTrueOnBloomFilter(const IColumn * hash_column, const BloomFilterPtr &
MergeTreeIndexConditionBloomFilter::MergeTreeIndexConditionBloomFilter(
const SelectQueryInfo & info, const Context & context, const Block & header, size_t hash_functions)
: header(header), query_info(info), hash_functions(hash_functions)
: header(header), context(context), query_info(info), hash_functions(hash_functions)
{
auto atomFromAST = [this](auto & node, auto &, auto & constants, auto & out) { return traverseAtomAST(node, constants, out); };
rpn = std::move(RPNBuilder<RPNElement>(info, context, atomFromAST).extractRPN());
@ -249,7 +250,9 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTIn(
{
size_t row_size = column->size();
size_t position = header.getPositionByName(key_ast->getColumnName());
out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithColumn(type, column, 0, row_size)));
const DataTypePtr & index_type = header.getByPosition(position).type;
const auto & converted_column = castColumn(ColumnWithTypeAndName{column, type, ""}, index_type, context);
out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithColumn(index_type, converted_column, 0, row_size)));
if (function_name == "in" || function_name == "globalIn")
out.function = RPNElement::FUNCTION_IN;
@ -293,7 +296,9 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
if (header.has(key_ast->getColumnName()))
{
size_t position = header.getPositionByName(key_ast->getColumnName());
out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(&*value_type, value_field)));
const DataTypePtr & index_type = header.getByPosition(position).type;
Field converted_field = convertFieldToType(value_field, *index_type, &*value_type);
out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(&*index_type, converted_field)));
out.function = function_name == "equals" ? RPNElement::FUNCTION_EQUALS : RPNElement::FUNCTION_NOT_EQUALS;
return true;
}

View File

@ -53,6 +53,7 @@ public:
private:
const Block & header;
const Context & context;
const SelectQueryInfo & query_info;
const size_t hash_functions;
std::vector<RPNElement> rpn;
@ -63,13 +64,11 @@ private:
bool traverseAtomAST(const ASTPtr & node, Block & block_with_constants, RPNElement & out);
bool traverseASTIn(const String &function_name, const ASTPtr &key_ast, const SetPtr &prepared_set, RPNElement &out);
bool traverseASTIn(const String & function_name, const ASTPtr & key_ast, const SetPtr & prepared_set, RPNElement & out);
bool traverseASTIn(const String &function_name, const ASTPtr &key_ast, const DataTypePtr &type, const ColumnPtr &column,
RPNElement &out);
bool traverseASTIn(const String & function_name, const ASTPtr & key_ast, const DataTypePtr & type, const ColumnPtr & column, RPNElement & out);
bool traverseASTEquals(const String &function_name, const ASTPtr &key_ast, const DataTypePtr &value_type, const Field &value_field,
RPNElement &out);
bool traverseASTEquals(const String & function_name, const ASTPtr & key_ast, const DataTypePtr & value_type, const Field & value_field, RPNElement & out);
};
}

View File

@ -1,7 +1,7 @@
DROP TABLE IF EXISTS test.single_column_bloom_filter;
SET allow_experimental_data_skipping_indices = 1;
DROP TABLE IF EXISTS test.single_column_bloom_filter;
CREATE TABLE test.single_column_bloom_filter (u64 UInt64, i32 Int32, i64 UInt64, INDEX idx (i32) TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 6;
INSERT INTO test.single_column_bloom_filter SELECT number AS u64, number AS i32, number AS i64 FROM system.numbers LIMIT 100;
@ -19,13 +19,49 @@ SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) = (1
SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (1, 2) SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN ((1, 2), (2, 3)) SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN ((1, 1), (2, 2)) SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) = (1, (1, 1)) SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (SELECT arrayJoin([1, 2])) SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN (SELECT arrayJoin([(1, 1), (2, 2)])) SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) IN ((1, (1, 1)), (2, (2, 2))) SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (SELECT arrayJoin([toInt32(1), toInt32(2)])) SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN (SELECT arrayJoin([(toInt32(1), toInt32(2)), (toInt32(2), toInt32(3))])) SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN (SELECT arrayJoin([(toInt32(1), toUInt64(1)), (toInt32(2), toUInt64(2))])) SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) IN (SELECT arrayJoin([(toUInt64(1), (toUInt64(1), toInt32(1))), (toUInt64(2), (toUInt64(2), toInt32(2)))])) SETTINGS max_rows_to_read = 6;
WITH (1, 2) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN liter_prepared_set SETTINGS max_rows_to_read = 6;
WITH ((1, 2), (2, 3)) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN liter_prepared_set SETTINGS max_rows_to_read = 6;
WITH ((1, 1), (2, 2)) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN liter_prepared_set SETTINGS max_rows_to_read = 6;
WITH ((1, (1, 1)), (2, (2, 2))) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) IN liter_prepared_set SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (1, 2) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (SELECT arrayJoin([1, 2])) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN ((1, 2), (2, 3)) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN ((1, 1), (2, 2)) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) IN ((1, (1, 1)), (2, (2, 2))) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (SELECT arrayJoin([toInt32(1), toInt32(2)])) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN (SELECT arrayJoin([(toInt32(1), toInt32(2)), (toInt32(2), toInt32(3))])) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN (SELECT arrayJoin([(toInt32(1), toUInt64(1)), (toInt32(2), toUInt64(2))])) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) IN (SELECT arrayJoin([(toUInt64(1), (toUInt64(1), toInt32(1))), (toUInt64(2), (toUInt64(2), toInt32(2)))])) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
WITH (1, 2) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN liter_prepared_set SETTINGS max_rows_to_read = 5; -- { serverError 158 }
WITH ((1, 2), (2, 3)) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN liter_prepared_set SETTINGS max_rows_to_read = 5; -- { serverError 158 }
WITH ((1, 1), (2, 2)) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN liter_prepared_set SETTINGS max_rows_to_read = 5; -- { serverError 158 }
WITH ((1, (1, 1)), (2, (2, 2))) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) IN liter_prepared_set SETTINGS max_rows_to_read = 5; -- { serverError 158 }
DROP TABLE IF EXISTS test.single_column_bloom_filter;
DROP TABLE IF EXISTS test.bloom_filter_types_test;
CREATE TABLE test.bloom_filter_types_test (order_key UInt64, i8 Int8, i16 Int16, i32 Int32, i64 Int64, u8 UInt8, u16 UInt16, u32 UInt32, u64 UInt64, f32 Float32, f64 Float64, date Date, date_time DateTime, str String, fixed_string FixedString(3), INDEX idx (i8, i16, i32, i64, u8, u16, u32, u64, f32, f64, date, date_time, str, fixed_string) TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree() ORDER BY order_key SETTINGS index_granularity = 6;
INSERT INTO test.bloom_filter_types_test SELECT number AS order_key, toInt8(number) AS i8, toInt16(number) AS i16, toInt32(number) AS i32, toInt64(number) AS i64, toUInt8(number) AS u8, toUInt16(number) AS u16, toUInt32(number) AS u32, toUInt64(number) AS u64, toFloat32(number) AS f32, toFloat64(number) AS f64, toDate(number) AS date, toDateTime(number) AS date_time, toString(number) AS str, toFixedString(toString(number), 3) AS fixed_string FROM system.numbers LIMIT 100;
SELECT COUNT() FROM test.bloom_filter_types_test WHERE i8 = 1 SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM test.bloom_filter_types_test WHERE i16 = 1 SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM test.bloom_filter_types_test WHERE i32 = 1 SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM test.bloom_filter_types_test WHERE i64 = 1 SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM test.bloom_filter_types_test WHERE u8 = 1 SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM test.bloom_filter_types_test WHERE u16 = 1 SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM test.bloom_filter_types_test WHERE u32 = 1 SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM test.bloom_filter_types_test WHERE u64 = 1 SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM test.bloom_filter_types_test WHERE f32 = 1.0 SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM test.bloom_filter_types_test WHERE f64 = 1.0 SETTINGS max_rows_to_read = 6;
SELECT * FROM test.bloom_filter_types_test WHERE f32 = 1 SETTINGS max_rows_to_read = 6;
DROP TABLE IF EXISTS test.bloom_filter_types_test;