ClickHouse/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp

215 lines
6.5 KiB
C++
Raw Normal View History

2019-01-08 17:27:44 +00:00
#include <Storages/MergeTree/MergeTreeMinMaxIndex.h>
2019-01-10 13:50:41 +00:00
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/SyntaxAnalyzer.h>
2019-01-09 14:15:23 +00:00
#include <Poco/Logger.h>
2019-01-08 17:27:44 +00:00
namespace DB
{
2019-01-22 19:43:52 +00:00
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int INCORRECT_QUERY;
}
2019-01-08 17:27:44 +00:00
MergeTreeMinMaxGranule::MergeTreeMinMaxGranule(const MergeTreeMinMaxIndex & index)
2019-03-08 19:52:21 +00:00
: IMergeTreeIndexGranule(), index(index), parallelogram() {}
MergeTreeMinMaxGranule::MergeTreeMinMaxGranule(
const MergeTreeMinMaxIndex & index, std::vector<Range> && parallelogram)
: IMergeTreeIndexGranule(), index(index), parallelogram(std::move(parallelogram)) {}
2019-01-08 17:27:44 +00:00
void MergeTreeMinMaxGranule::serializeBinary(WriteBuffer & ostr) const
{
if (empty())
throw Exception(
"Attempt to write empty minmax index `" + index.name + "`", ErrorCodes::LOGICAL_ERROR);
for (size_t i = 0; i < index.columns.size(); ++i)
{
const DataTypePtr & type = index.data_types[i];
2019-05-12 17:01:36 +00:00
if (!type->isNullable())
{
type->serializeBinary(parallelogram[i].left, ostr);
type->serializeBinary(parallelogram[i].right, ostr);
}
else
{
bool is_null = parallelogram[i].left.isNull() || parallelogram[i].right.isNull(); // one is enough
writeBinary(is_null, ostr);
if (!is_null)
{
type->serializeBinary(parallelogram[i].left, ostr);
type->serializeBinary(parallelogram[i].right, ostr);
}
}
2019-01-08 17:27:44 +00:00
}
}
void MergeTreeMinMaxGranule::deserializeBinary(ReadBuffer & istr)
{
2019-01-09 14:15:23 +00:00
parallelogram.clear();
2019-02-14 17:35:50 +00:00
Field min_val;
Field max_val;
2019-01-08 17:27:44 +00:00
for (size_t i = 0; i < index.columns.size(); ++i)
{
const DataTypePtr & type = index.data_types[i];
2019-05-12 17:01:36 +00:00
if (!type->isNullable())
{
type->deserializeBinary(min_val, istr);
type->deserializeBinary(max_val, istr);
}
else
{
bool is_null;
readBinary(is_null, istr);
if (!is_null)
{
type->deserializeBinary(min_val, istr);
type->deserializeBinary(max_val, istr);
}
else
{
min_val = Null();
max_val = Null();
}
}
2019-01-08 20:17:45 +00:00
parallelogram.emplace_back(min_val, true, max_val, true);
2019-01-08 17:27:44 +00:00
}
2019-01-09 14:15:23 +00:00
}
2019-03-08 19:52:21 +00:00
MergeTreeMinMaxAggregator::MergeTreeMinMaxAggregator(const MergeTreeMinMaxIndex & index)
: index(index) {}
MergeTreeIndexGranulePtr MergeTreeMinMaxAggregator::getGranuleAndReset()
{
return std::make_shared<MergeTreeMinMaxGranule>(index, std::move(parallelogram));
}
void MergeTreeMinMaxAggregator::update(const Block & block, size_t * pos, size_t limit)
2019-01-08 17:27:44 +00:00
{
2019-02-06 07:49:18 +00:00
if (*pos >= block.rows())
throw Exception(
"The provided position is not less than the number of block rows. Position: "
+ toString(*pos) + ", Block rows: " + toString(block.rows()) + ".", ErrorCodes::LOGICAL_ERROR);
2019-01-26 19:31:12 +00:00
size_t rows_read = std::min(limit, block.rows() - *pos);
2019-02-14 17:35:50 +00:00
Field field_min;
Field field_max;
2019-01-08 17:27:44 +00:00
for (size_t i = 0; i < index.columns.size(); ++i)
{
2019-01-26 19:31:12 +00:00
const auto & column = block.getByName(index.columns[i]).column;
column->cut(*pos, rows_read)->getExtremes(field_min, field_max);
if (parallelogram.size() <= i)
2019-01-08 17:27:44 +00:00
{
2019-01-26 19:31:12 +00:00
parallelogram.emplace_back(field_min, true, field_max, true);
2019-01-08 17:27:44 +00:00
}
2019-01-26 19:31:12 +00:00
else
{
parallelogram[i].left = std::min(parallelogram[i].left, field_min);
parallelogram[i].right = std::max(parallelogram[i].right, field_max);
}
2019-01-08 17:27:44 +00:00
}
*pos += rows_read;
2019-01-27 18:23:08 +00:00
}
2019-01-08 17:27:44 +00:00
MinMaxCondition::MinMaxCondition(
const SelectQueryInfo &query,
const Context &context,
const MergeTreeMinMaxIndex &index)
2019-02-06 07:49:18 +00:00
: IIndexCondition(), index(index), condition(query, context, index.columns, index.expr) {}
2019-01-08 17:27:44 +00:00
bool MinMaxCondition::alwaysUnknownOrTrue() const
{
return condition.alwaysUnknownOrTrue();
}
bool MinMaxCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const
{
std::shared_ptr<MergeTreeMinMaxGranule> granule
2019-01-09 17:05:52 +00:00
= std::dynamic_pointer_cast<MergeTreeMinMaxGranule>(idx_granule);
2019-01-26 06:26:49 +00:00
if (!granule)
2019-01-08 17:27:44 +00:00
throw Exception(
2019-02-06 07:49:18 +00:00
"Minmax index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR);
2019-01-08 17:27:44 +00:00
return condition.mayBeTrueInParallelogram(granule->parallelogram, index.data_types);
}
MergeTreeIndexGranulePtr MergeTreeMinMaxIndex::createIndexGranule() const
{
return std::make_shared<MergeTreeMinMaxGranule>(*this);
}
2019-03-08 19:52:21 +00:00
MergeTreeIndexAggregatorPtr MergeTreeMinMaxIndex::createIndexAggregator() const
{
return std::make_shared<MergeTreeMinMaxAggregator>(*this);
}
2019-01-08 17:27:44 +00:00
IndexConditionPtr MergeTreeMinMaxIndex::createIndexCondition(
2019-01-09 17:05:52 +00:00
const SelectQueryInfo & query, const Context & context) const
2019-01-08 17:27:44 +00:00
{
2019-02-06 07:49:18 +00:00
return std::make_shared<MinMaxCondition>(query, context, *this);
2019-01-08 17:27:44 +00:00
};
2019-02-25 08:43:19 +00:00
bool MergeTreeMinMaxIndex::mayBenefitFromIndexForIn(const ASTPtr & node) const
{
const String column_name = node->getColumnName();
2019-02-25 10:12:05 +00:00
for (const auto & name : columns)
if (column_name == name)
return true;
if (const auto * func = typeid_cast<const ASTFunction *>(node.get()))
if (func->arguments->children.size() == 1)
return mayBenefitFromIndexForIn(func->arguments->children.front());
return false;
2019-02-25 08:43:19 +00:00
}
2019-01-08 17:27:44 +00:00
2019-02-06 09:05:05 +00:00
std::unique_ptr<IMergeTreeIndex> minmaxIndexCreator(
2019-01-30 10:18:59 +00:00
const NamesAndTypesList & new_columns,
2019-01-09 17:05:52 +00:00
std::shared_ptr<ASTIndexDeclaration> node,
const Context & context)
2019-01-08 17:27:44 +00:00
{
if (node->name.empty())
throw Exception("Index must have unique name", ErrorCodes::INCORRECT_QUERY);
if (node->type->arguments)
throw Exception("Minmax index have not any arguments", ErrorCodes::INCORRECT_QUERY);
ASTPtr expr_list = MergeTreeData::extractKeyExpressionList(node->expr->clone());
auto syntax = SyntaxAnalyzer(context, {}).analyze(
2019-02-05 16:43:52 +00:00
expr_list, new_columns);
2019-01-08 17:27:44 +00:00
auto minmax_expr = ExpressionAnalyzer(expr_list, syntax, context).getActions(false);
2019-01-09 14:15:23 +00:00
auto sample = ExpressionAnalyzer(expr_list, syntax, context)
2019-02-05 16:43:52 +00:00
.getActions(true)->getSampleBlock();
2019-01-08 17:27:44 +00:00
2019-01-10 12:57:12 +00:00
Names columns;
DataTypes data_types;
2019-01-09 14:15:23 +00:00
for (size_t i = 0; i < expr_list->children.size(); ++i)
2019-01-08 17:27:44 +00:00
{
2019-01-09 14:15:23 +00:00
const auto & column = sample.getByPosition(i);
2019-01-10 12:57:12 +00:00
columns.emplace_back(column.name);
data_types.emplace_back(column.type);
2019-01-08 17:27:44 +00:00
}
2019-01-10 12:57:12 +00:00
return std::make_unique<MergeTreeMinMaxIndex>(
2019-02-06 08:43:54 +00:00
node->name, std::move(minmax_expr), columns, data_types, sample, node->granularity);
2019-01-08 17:27:44 +00:00
}
}