2019-05-10 03:42:28 +00:00
|
|
|
#include <Storages/MergeTree/MergeTreeIndexBloomFilter.h>
|
|
|
|
#include <Storages/MergeTree/MergeTreeData.h>
|
|
|
|
#include <Interpreters/SyntaxAnalyzer.h>
|
|
|
|
#include <Interpreters/ExpressionAnalyzer.h>
|
|
|
|
#include <Core/Types.h>
|
|
|
|
#include <ext/bit_cast.h>
|
|
|
|
#include <Parsers/ASTLiteral.h>
|
|
|
|
#include <IO/ReadHelpers.h>
|
|
|
|
#include <IO/WriteHelpers.h>
|
2019-09-17 11:18:04 +00:00
|
|
|
#include <DataTypes/DataTypeArray.h>
|
2019-05-10 03:42:28 +00:00
|
|
|
#include <DataTypes/DataTypeNullable.h>
|
|
|
|
#include <Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h>
|
|
|
|
#include <Parsers/queryToString.h>
|
|
|
|
#include <Columns/ColumnConst.h>
|
2019-10-15 04:22:51 +00:00
|
|
|
#include <Columns/ColumnLowCardinality.h>
|
2019-06-19 15:09:07 +00:00
|
|
|
#include <Interpreters/BloomFilterHash.h>
|
2019-05-10 03:42:28 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2020-02-25 18:02:41 +00:00
|
|
|
extern const int ILLEGAL_COLUMN;
|
2019-05-10 03:42:28 +00:00
|
|
|
extern const int INCORRECT_QUERY;
|
|
|
|
}
|
|
|
|
|
|
|
|
MergeTreeIndexBloomFilter::MergeTreeIndexBloomFilter(
|
2019-06-20 00:33:37 +00:00
|
|
|
const String & name_, const ExpressionActionsPtr & expr_, const Names & columns_, const DataTypes & data_types_, const Block & header_,
|
|
|
|
size_t granularity_, size_t bits_per_row_, size_t hash_functions_)
|
|
|
|
: IMergeTreeIndex(name_, expr_, columns_, data_types_, header_, granularity_), bits_per_row(bits_per_row_),
|
|
|
|
hash_functions(hash_functions_)
|
2019-05-10 03:42:28 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
MergeTreeIndexGranulePtr MergeTreeIndexBloomFilter::createIndexGranule() const
|
|
|
|
{
|
|
|
|
return std::make_shared<MergeTreeIndexGranuleBloomFilter>(bits_per_row, hash_functions, columns.size());
|
|
|
|
}
|
|
|
|
|
|
|
|
bool MergeTreeIndexBloomFilter::mayBenefitFromIndexForIn(const ASTPtr & node) const
|
|
|
|
{
|
2019-06-19 10:50:37 +00:00
|
|
|
const String & column_name = node->getColumnName();
|
2019-05-10 03:42:28 +00:00
|
|
|
|
2019-08-03 11:02:40 +00:00
|
|
|
for (const auto & cname : columns)
|
|
|
|
if (column_name == cname)
|
2019-05-10 03:42:28 +00:00
|
|
|
return true;
|
|
|
|
|
|
|
|
if (const auto * func = typeid_cast<const ASTFunction *>(node.get()))
|
2019-06-19 10:50:37 +00:00
|
|
|
{
|
|
|
|
for (const auto & children : func->arguments->children)
|
|
|
|
if (mayBenefitFromIndexForIn(children))
|
|
|
|
return true;
|
|
|
|
}
|
2019-05-10 03:42:28 +00:00
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
MergeTreeIndexAggregatorPtr MergeTreeIndexBloomFilter::createIndexAggregator() const
|
|
|
|
{
|
|
|
|
return std::make_shared<MergeTreeIndexAggregatorBloomFilter>(bits_per_row, hash_functions, columns);
|
|
|
|
}
|
|
|
|
|
2019-06-19 15:30:48 +00:00
|
|
|
MergeTreeIndexConditionPtr MergeTreeIndexBloomFilter::createIndexCondition(const SelectQueryInfo & query_info, const Context & context) const
|
2019-05-10 03:42:28 +00:00
|
|
|
{
|
|
|
|
return std::make_shared<MergeTreeIndexConditionBloomFilter>(query_info, context, header, hash_functions);
|
|
|
|
}
|
|
|
|
|
2019-06-19 15:30:48 +00:00
|
|
|
static void assertIndexColumnsType(const Block & header)
|
2019-06-19 10:50:37 +00:00
|
|
|
{
|
|
|
|
if (!header || !header.columns())
|
|
|
|
throw Exception("Index must have columns.", ErrorCodes::INCORRECT_QUERY);
|
|
|
|
|
|
|
|
const DataTypes & columns_data_types = header.getDataTypes();
|
|
|
|
|
2019-11-01 15:31:02 +00:00
|
|
|
for (auto & type : columns_data_types)
|
2019-06-19 10:50:37 +00:00
|
|
|
{
|
2019-11-01 15:31:02 +00:00
|
|
|
const IDataType * actual_type = BloomFilter::getPrimitiveType(type).get();
|
2019-10-15 04:22:51 +00:00
|
|
|
WhichDataType which(actual_type);
|
2019-09-17 11:18:04 +00:00
|
|
|
|
2019-06-19 10:50:37 +00:00
|
|
|
if (!which.isUInt() && !which.isInt() && !which.isString() && !which.isFixedString() && !which.isFloat() &&
|
2019-09-23 03:40:48 +00:00
|
|
|
!which.isDateOrDateTime() && !which.isEnum())
|
2019-11-01 15:31:02 +00:00
|
|
|
throw Exception("Unexpected type " + type->getName() + " of bloom filter index.",
|
2019-06-19 10:50:37 +00:00
|
|
|
ErrorCodes::ILLEGAL_COLUMN);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-06-19 15:30:48 +00:00
|
|
|
std::unique_ptr<IMergeTreeIndex> bloomFilterIndexCreatorNew(
|
|
|
|
const NamesAndTypesList & columns, std::shared_ptr<ASTIndexDeclaration> node, const Context & context)
|
2019-05-10 03:42:28 +00:00
|
|
|
{
|
|
|
|
if (node->name.empty())
|
|
|
|
throw Exception("Index must have unique name.", ErrorCodes::INCORRECT_QUERY);
|
|
|
|
|
|
|
|
ASTPtr expr_list = MergeTreeData::extractKeyExpressionList(node->expr->clone());
|
|
|
|
|
2020-02-26 19:33:09 +00:00
|
|
|
auto syntax = SyntaxAnalyzer(context).analyze(expr_list, columns);
|
2019-05-10 03:42:28 +00:00
|
|
|
auto index_expr = ExpressionAnalyzer(expr_list, syntax, context).getActions(false);
|
|
|
|
auto index_sample = ExpressionAnalyzer(expr_list, syntax, context).getActions(true)->getSampleBlock();
|
|
|
|
|
2019-06-19 10:50:37 +00:00
|
|
|
assertIndexColumnsType(index_sample);
|
2019-05-10 03:42:28 +00:00
|
|
|
|
|
|
|
double max_conflict_probability = 0.025;
|
|
|
|
if (node->type->arguments && !node->type->arguments->children.empty())
|
|
|
|
max_conflict_probability = typeid_cast<const ASTLiteral &>(*node->type->arguments->children[0]).value.get<Float64>();
|
|
|
|
|
2019-06-19 15:09:07 +00:00
|
|
|
const auto & bits_per_row_and_size_of_hash_functions = BloomFilterHash::calculationBestPractices(max_conflict_probability);
|
2019-05-10 03:42:28 +00:00
|
|
|
|
|
|
|
return std::make_unique<MergeTreeIndexBloomFilter>(
|
|
|
|
node->name, std::move(index_expr), index_sample.getNames(), index_sample.getDataTypes(), index_sample, node->granularity,
|
|
|
|
bits_per_row_and_size_of_hash_functions.first, bits_per_row_and_size_of_hash_functions.second);
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|