ClickHouse/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp

208 lines
6.5 KiB
C++
Raw Normal View History

2019-06-19 15:30:48 +00:00
#include <Storages/MergeTree/MergeTreeIndexMinMax.h>
2019-01-10 13:50:41 +00:00
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/TreeRewriter.h>
2019-01-10 13:50:41 +00:00
2019-01-09 14:15:23 +00:00
#include <Poco/Logger.h>
2019-01-08 17:27:44 +00:00
namespace DB
{
2019-01-22 19:43:52 +00:00
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
2020-05-28 12:37:05 +00:00
MergeTreeIndexGranuleMinMax::MergeTreeIndexGranuleMinMax(const String & index_name_, const Block & index_sample_block_)
: index_name(index_name_)
, index_sample_block(index_sample_block_)
2020-05-27 18:38:34 +00:00
{}
2019-03-08 19:52:21 +00:00
2019-06-19 15:30:48 +00:00
MergeTreeIndexGranuleMinMax::MergeTreeIndexGranuleMinMax(
2020-05-28 12:37:05 +00:00
const String & index_name_,
const Block & index_sample_block_,
std::vector<Range> && hyperrectangle_)
: index_name(index_name_)
, index_sample_block(index_sample_block_)
, hyperrectangle(std::move(hyperrectangle_)) {}
2019-01-08 17:27:44 +00:00
2019-06-19 15:30:48 +00:00
void MergeTreeIndexGranuleMinMax::serializeBinary(WriteBuffer & ostr) const
2019-01-08 17:27:44 +00:00
{
if (empty())
throw Exception(
2020-05-28 12:37:05 +00:00
"Attempt to write empty minmax index " + backQuote(index_name), ErrorCodes::LOGICAL_ERROR);
2019-01-08 17:27:44 +00:00
2020-05-27 18:38:34 +00:00
for (size_t i = 0; i < index_sample_block.columns(); ++i)
2019-01-08 17:27:44 +00:00
{
2020-05-27 18:38:34 +00:00
const DataTypePtr & type = index_sample_block.getByPosition(i).type;
2021-03-09 14:46:52 +00:00
auto serialization = type->getDefaultSerialization();
2019-05-12 17:01:36 +00:00
if (!type->isNullable())
{
2021-03-09 14:46:52 +00:00
serialization->serializeBinary(hyperrectangle[i].left, ostr);
serialization->serializeBinary(hyperrectangle[i].right, ostr);
2019-05-12 17:01:36 +00:00
}
else
{
2020-03-10 14:56:55 +00:00
bool is_null = hyperrectangle[i].left.isNull() || hyperrectangle[i].right.isNull(); // one is enough
2019-05-12 17:01:36 +00:00
writeBinary(is_null, ostr);
if (!is_null)
{
2021-03-09 14:46:52 +00:00
serialization->serializeBinary(hyperrectangle[i].left, ostr);
serialization->serializeBinary(hyperrectangle[i].right, ostr);
2019-05-12 17:01:36 +00:00
}
}
2019-01-08 17:27:44 +00:00
}
}
2019-06-19 15:30:48 +00:00
void MergeTreeIndexGranuleMinMax::deserializeBinary(ReadBuffer & istr)
2019-01-08 17:27:44 +00:00
{
2020-03-10 14:56:55 +00:00
hyperrectangle.clear();
2019-02-14 17:35:50 +00:00
Field min_val;
Field max_val;
2021-03-09 14:46:52 +00:00
2020-05-27 18:38:34 +00:00
for (size_t i = 0; i < index_sample_block.columns(); ++i)
2019-01-08 17:27:44 +00:00
{
2020-05-27 18:38:34 +00:00
const DataTypePtr & type = index_sample_block.getByPosition(i).type;
2021-03-09 14:46:52 +00:00
auto serialization = type->getDefaultSerialization();
2019-05-12 17:01:36 +00:00
if (!type->isNullable())
{
2021-03-09 14:46:52 +00:00
serialization->deserializeBinary(min_val, istr);
serialization->deserializeBinary(max_val, istr);
2019-05-12 17:01:36 +00:00
}
else
{
bool is_null;
readBinary(is_null, istr);
if (!is_null)
{
2021-03-09 14:46:52 +00:00
serialization->deserializeBinary(min_val, istr);
serialization->deserializeBinary(max_val, istr);
2019-05-12 17:01:36 +00:00
}
else
{
min_val = Null();
max_val = Null();
}
}
2020-03-10 14:56:55 +00:00
hyperrectangle.emplace_back(min_val, true, max_val, true);
2019-01-08 17:27:44 +00:00
}
2019-01-09 14:15:23 +00:00
}
2020-05-28 12:37:05 +00:00
MergeTreeIndexAggregatorMinMax::MergeTreeIndexAggregatorMinMax(const String & index_name_, const Block & index_sample_block_)
: index_name(index_name_)
, index_sample_block(index_sample_block_)
2020-05-27 18:38:34 +00:00
{}
2019-03-08 19:52:21 +00:00
2019-06-19 15:30:48 +00:00
MergeTreeIndexGranulePtr MergeTreeIndexAggregatorMinMax::getGranuleAndReset()
2019-03-08 19:52:21 +00:00
{
2020-05-28 12:37:05 +00:00
return std::make_shared<MergeTreeIndexGranuleMinMax>(index_name, index_sample_block, std::move(hyperrectangle));
2019-03-08 19:52:21 +00:00
}
2019-06-19 15:30:48 +00:00
void MergeTreeIndexAggregatorMinMax::update(const Block & block, size_t * pos, size_t limit)
2019-01-08 17:27:44 +00:00
{
2019-02-06 07:49:18 +00:00
if (*pos >= block.rows())
throw Exception(
"The provided position is not less than the number of block rows. Position: "
+ toString(*pos) + ", Block rows: " + toString(block.rows()) + ".", ErrorCodes::LOGICAL_ERROR);
2019-01-26 19:31:12 +00:00
size_t rows_read = std::min(limit, block.rows() - *pos);
FieldRef field_min;
FieldRef field_max;
2020-05-27 18:38:34 +00:00
for (size_t i = 0; i < index_sample_block.columns(); ++i)
2019-01-08 17:27:44 +00:00
{
2020-05-27 18:38:34 +00:00
auto index_column_name = index_sample_block.getByPosition(i).name;
const auto & column = block.getByName(index_column_name).column;
2019-01-26 19:31:12 +00:00
column->cut(*pos, rows_read)->getExtremes(field_min, field_max);
2020-03-10 14:56:55 +00:00
if (hyperrectangle.size() <= i)
2019-01-08 17:27:44 +00:00
{
2020-03-10 14:56:55 +00:00
hyperrectangle.emplace_back(field_min, true, field_max, true);
2019-01-08 17:27:44 +00:00
}
2019-01-26 19:31:12 +00:00
else
{
2020-03-10 14:56:55 +00:00
hyperrectangle[i].left = std::min(hyperrectangle[i].left, field_min);
hyperrectangle[i].right = std::max(hyperrectangle[i].right, field_max);
2019-01-26 19:31:12 +00:00
}
2019-01-08 17:27:44 +00:00
}
*pos += rows_read;
2019-01-27 18:23:08 +00:00
}
2019-01-08 17:27:44 +00:00
2019-06-19 15:30:48 +00:00
MergeTreeIndexConditionMinMax::MergeTreeIndexConditionMinMax(
2020-05-28 13:09:03 +00:00
const IndexDescription & index,
2020-05-27 18:38:34 +00:00
const SelectQueryInfo & query,
const Context & context)
: index_data_types(index.data_types)
, condition(query, context, index.column_names, index.expression)
{
}
2019-01-08 17:27:44 +00:00
2019-06-19 15:30:48 +00:00
bool MergeTreeIndexConditionMinMax::alwaysUnknownOrTrue() const
2019-01-08 17:27:44 +00:00
{
return condition.alwaysUnknownOrTrue();
}
2019-06-19 15:30:48 +00:00
bool MergeTreeIndexConditionMinMax::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const
2019-01-08 17:27:44 +00:00
{
2019-06-19 15:30:48 +00:00
std::shared_ptr<MergeTreeIndexGranuleMinMax> granule
= std::dynamic_pointer_cast<MergeTreeIndexGranuleMinMax>(idx_granule);
2019-01-26 06:26:49 +00:00
if (!granule)
2019-01-08 17:27:44 +00:00
throw Exception(
2019-02-06 07:49:18 +00:00
"Minmax index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR);
2020-03-10 14:56:55 +00:00
for (const auto & range : granule->hyperrectangle)
2019-05-12 17:08:28 +00:00
if (range.left.isNull() || range.right.isNull())
return true;
2020-05-28 12:37:05 +00:00
return condition.checkInHyperrectangle(granule->hyperrectangle, index_data_types).can_be_true;
2019-01-08 17:27:44 +00:00
}
2019-06-19 15:30:48 +00:00
MergeTreeIndexGranulePtr MergeTreeIndexMinMax::createIndexGranule() const
2019-01-08 17:27:44 +00:00
{
2020-05-28 12:37:05 +00:00
return std::make_shared<MergeTreeIndexGranuleMinMax>(index.name, index.sample_block);
2019-01-08 17:27:44 +00:00
}
2019-03-08 19:52:21 +00:00
2019-06-19 15:30:48 +00:00
MergeTreeIndexAggregatorPtr MergeTreeIndexMinMax::createIndexAggregator() const
2019-03-08 19:52:21 +00:00
{
2020-05-28 12:37:05 +00:00
return std::make_shared<MergeTreeIndexAggregatorMinMax>(index.name, index.sample_block);
2019-03-08 19:52:21 +00:00
}
2019-06-19 15:30:48 +00:00
MergeTreeIndexConditionPtr MergeTreeIndexMinMax::createIndexCondition(
2019-01-09 17:05:52 +00:00
const SelectQueryInfo & query, const Context & context) const
2019-01-08 17:27:44 +00:00
{
2020-05-27 18:38:34 +00:00
return std::make_shared<MergeTreeIndexConditionMinMax>(index, query, context);
2019-01-08 17:27:44 +00:00
};
2019-06-19 15:30:48 +00:00
bool MergeTreeIndexMinMax::mayBenefitFromIndexForIn(const ASTPtr & node) const
2019-02-25 08:43:19 +00:00
{
const String column_name = node->getColumnName();
2019-02-25 10:12:05 +00:00
2020-05-27 18:38:34 +00:00
for (const auto & cname : index.column_names)
2019-08-03 11:02:40 +00:00
if (column_name == cname)
2019-02-25 10:12:05 +00:00
return true;
if (const auto * func = typeid_cast<const ASTFunction *>(node.get()))
if (func->arguments->children.size() == 1)
return mayBenefitFromIndexForIn(func->arguments->children.front());
return false;
2019-02-25 08:43:19 +00:00
}
2019-01-08 17:27:44 +00:00
2020-05-28 13:45:08 +00:00
MergeTreeIndexPtr minmaxIndexCreator(
2020-05-28 13:09:03 +00:00
const IndexDescription & index)
2019-01-08 17:27:44 +00:00
{
2020-05-28 12:37:05 +00:00
return std::make_shared<MergeTreeIndexMinMax>(index);
2019-01-08 17:27:44 +00:00
}
2020-05-28 13:09:03 +00:00
void minmaxIndexValidator(const IndexDescription & /* index */, bool /* attach */)
2020-05-28 12:37:05 +00:00
{
}
}