2019-01-08 17:27:44 +00:00
|
|
|
#include <Storages/MergeTree/MergeTreeMinMaxIndex.h>
|
2019-01-10 13:50:41 +00:00
|
|
|
|
|
|
|
#include <Interpreters/ExpressionActions.h>
|
|
|
|
#include <Interpreters/ExpressionAnalyzer.h>
|
|
|
|
#include <Interpreters/SyntaxAnalyzer.h>
|
|
|
|
|
2019-01-09 14:15:23 +00:00
|
|
|
#include <Poco/Logger.h>
|
2019-01-08 17:27:44 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2019-01-22 19:43:52 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int LOGICAL_ERROR;
|
|
|
|
extern const int INCORRECT_QUERY;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-01-08 17:27:44 +00:00
|
|
|
MergeTreeMinMaxGranule::MergeTreeMinMaxGranule(const MergeTreeMinMaxIndex & index)
|
2019-01-09 14:15:23 +00:00
|
|
|
: MergeTreeIndexGranule(), index(index), parallelogram()
|
2019-01-08 17:27:44 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
void MergeTreeMinMaxGranule::serializeBinary(WriteBuffer & ostr) const
|
|
|
|
{
|
|
|
|
if (empty())
|
|
|
|
throw Exception(
|
|
|
|
"Attempt to write empty minmax index `" + index.name + "`", ErrorCodes::LOGICAL_ERROR);
|
2019-01-09 14:15:23 +00:00
|
|
|
Poco::Logger * log = &Poco::Logger::get("minmax_idx");
|
|
|
|
|
|
|
|
LOG_DEBUG(log, "serializeBinary Granule");
|
2019-01-08 17:27:44 +00:00
|
|
|
|
|
|
|
for (size_t i = 0; i < index.columns.size(); ++i)
|
|
|
|
{
|
|
|
|
const DataTypePtr & type = index.data_types[i];
|
|
|
|
|
2019-01-09 14:15:23 +00:00
|
|
|
LOG_DEBUG(log, "parallel " << i << " :: "
|
|
|
|
<< applyVisitor(FieldVisitorToString(), parallelogram[i].left) << " "
|
|
|
|
<< applyVisitor(FieldVisitorToString(), parallelogram[i].right));
|
|
|
|
|
2019-01-08 17:27:44 +00:00
|
|
|
type->serializeBinary(parallelogram[i].left, ostr);
|
|
|
|
type->serializeBinary(parallelogram[i].right, ostr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void MergeTreeMinMaxGranule::deserializeBinary(ReadBuffer & istr)
|
|
|
|
{
|
2019-01-09 14:15:23 +00:00
|
|
|
Poco::Logger * log = &Poco::Logger::get("minmax_idx");
|
|
|
|
|
|
|
|
LOG_DEBUG(log, "deserializeBinary Granule");
|
|
|
|
parallelogram.clear();
|
2019-01-08 17:27:44 +00:00
|
|
|
for (size_t i = 0; i < index.columns.size(); ++i)
|
|
|
|
{
|
|
|
|
const DataTypePtr & type = index.data_types[i];
|
|
|
|
|
|
|
|
Field min_val;
|
|
|
|
type->deserializeBinary(min_val, istr);
|
|
|
|
Field max_val;
|
|
|
|
type->deserializeBinary(max_val, istr);
|
2019-01-08 20:17:45 +00:00
|
|
|
|
2019-01-09 14:15:23 +00:00
|
|
|
LOG_DEBUG(log, "parallel " << i << " :: "
|
|
|
|
<< applyVisitor(FieldVisitorToString(), min_val) << " "
|
|
|
|
<< applyVisitor(FieldVisitorToString(), max_val));
|
|
|
|
|
2019-01-08 20:17:45 +00:00
|
|
|
parallelogram.emplace_back(min_val, true, max_val, true);
|
2019-01-08 17:27:44 +00:00
|
|
|
}
|
2019-01-09 14:15:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
String MergeTreeMinMaxGranule::toString() const
|
|
|
|
{
|
|
|
|
String res = "minmax granule: ";
|
|
|
|
|
|
|
|
for (size_t i = 0; i < parallelogram.size(); ++i)
|
|
|
|
{
|
|
|
|
res += "["
|
|
|
|
+ applyVisitor(FieldVisitorToString(), parallelogram[i].left) + ", "
|
|
|
|
+ applyVisitor(FieldVisitorToString(), parallelogram[i].right) + "]";
|
|
|
|
}
|
|
|
|
|
|
|
|
return res;
|
2019-01-08 17:27:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void MergeTreeMinMaxGranule::update(const Block & block, size_t * pos, size_t limit)
|
|
|
|
{
|
2019-01-09 14:15:23 +00:00
|
|
|
Poco::Logger * log = &Poco::Logger::get("minmax_idx");
|
|
|
|
|
|
|
|
LOG_DEBUG(log, "update Granule " << parallelogram.size()
|
|
|
|
<< " pos: "<< *pos << " limit: " << limit << " rows: " << block.rows());
|
|
|
|
|
2019-01-08 17:27:44 +00:00
|
|
|
size_t rows_read = 0;
|
|
|
|
for (size_t i = 0; i < index.columns.size(); ++i)
|
|
|
|
{
|
2019-01-09 14:15:23 +00:00
|
|
|
LOG_DEBUG(log, "granule column: " << index.columns[i]);
|
|
|
|
|
2019-01-08 17:27:44 +00:00
|
|
|
auto column = block.getByName(index.columns[i]).column;
|
|
|
|
size_t cur;
|
|
|
|
/// TODO: more effective (index + getExtremes??)
|
|
|
|
for (cur = 0; cur < limit && cur + *pos < column->size(); ++cur)
|
|
|
|
{
|
|
|
|
Field field;
|
2019-01-09 14:15:23 +00:00
|
|
|
column->get(cur + *pos, field);
|
|
|
|
LOG_DEBUG(log, "upd:: " << applyVisitor(FieldVisitorToString(), field));
|
|
|
|
if (parallelogram.size() <= i)
|
2019-01-08 17:27:44 +00:00
|
|
|
{
|
2019-01-09 14:15:23 +00:00
|
|
|
LOG_DEBUG(log, "emplaced");
|
2019-01-08 17:27:44 +00:00
|
|
|
parallelogram.emplace_back(field, true, field, true);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
parallelogram[i].left = std::min(parallelogram[i].left, field);
|
|
|
|
parallelogram[i].right = std::max(parallelogram[i].right, field);
|
|
|
|
}
|
|
|
|
}
|
2019-01-09 14:15:23 +00:00
|
|
|
LOG_DEBUG(log, "res:: ["
|
|
|
|
<< applyVisitor(FieldVisitorToString(), parallelogram[i].left) << ", "
|
|
|
|
<< applyVisitor(FieldVisitorToString(), parallelogram[i].right) << "]");
|
2019-01-08 17:27:44 +00:00
|
|
|
rows_read = cur;
|
|
|
|
}
|
2019-01-09 14:15:23 +00:00
|
|
|
LOG_DEBUG(log, "updated rows_read: " << rows_read);
|
2019-01-08 17:27:44 +00:00
|
|
|
|
|
|
|
*pos += rows_read;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
MinMaxCondition::MinMaxCondition(
|
|
|
|
const SelectQueryInfo &query,
|
|
|
|
const Context &context,
|
|
|
|
const MergeTreeMinMaxIndex &index)
|
|
|
|
: IndexCondition(), index(index), condition(query, context, index.columns, index.expr) {};
|
|
|
|
|
|
|
|
bool MinMaxCondition::alwaysUnknownOrTrue() const
|
|
|
|
{
|
|
|
|
return condition.alwaysUnknownOrTrue();
|
|
|
|
}
|
|
|
|
|
|
|
|
bool MinMaxCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const
|
|
|
|
{
|
|
|
|
std::shared_ptr<MergeTreeMinMaxGranule> granule
|
2019-01-09 17:05:52 +00:00
|
|
|
= std::dynamic_pointer_cast<MergeTreeMinMaxGranule>(idx_granule);
|
2019-01-26 06:26:49 +00:00
|
|
|
if (!granule)
|
2019-01-08 17:27:44 +00:00
|
|
|
throw Exception(
|
|
|
|
"Minmax index condition got wrong granule", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
|
|
|
return condition.mayBeTrueInParallelogram(granule->parallelogram, index.data_types);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
MergeTreeIndexGranulePtr MergeTreeMinMaxIndex::createIndexGranule() const
|
|
|
|
{
|
|
|
|
return std::make_shared<MergeTreeMinMaxGranule>(*this);
|
|
|
|
}
|
|
|
|
|
|
|
|
IndexConditionPtr MergeTreeMinMaxIndex::createIndexCondition(
|
2019-01-09 17:05:52 +00:00
|
|
|
const SelectQueryInfo & query, const Context & context) const
|
2019-01-08 17:27:44 +00:00
|
|
|
{
|
|
|
|
return std::make_shared<MinMaxCondition>(query, context, *this);
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
std::unique_ptr<MergeTreeIndex> MergeTreeMinMaxIndexCreator(
|
2019-01-09 17:05:52 +00:00
|
|
|
const MergeTreeData & data,
|
|
|
|
std::shared_ptr<ASTIndexDeclaration> node,
|
|
|
|
const Context & context)
|
2019-01-08 17:27:44 +00:00
|
|
|
{
|
|
|
|
if (node->name.empty())
|
|
|
|
throw Exception("Index must have unique name", ErrorCodes::INCORRECT_QUERY);
|
|
|
|
|
|
|
|
if (node->type->arguments)
|
|
|
|
throw Exception("Minmax index have not any arguments", ErrorCodes::INCORRECT_QUERY);
|
|
|
|
|
|
|
|
ASTPtr expr_list = MergeTreeData::extractKeyExpressionList(node->expr->clone());
|
|
|
|
auto syntax = SyntaxAnalyzer(context, {}).analyze(
|
|
|
|
expr_list, data.getColumns().getAllPhysical());
|
|
|
|
auto minmax_expr = ExpressionAnalyzer(expr_list, syntax, context).getActions(false);
|
|
|
|
|
2019-01-09 14:15:23 +00:00
|
|
|
auto sample = ExpressionAnalyzer(expr_list, syntax, context)
|
|
|
|
.getActions(true)->getSampleBlock();
|
2019-01-08 17:27:44 +00:00
|
|
|
|
2019-01-10 12:57:12 +00:00
|
|
|
Names columns;
|
|
|
|
DataTypes data_types;
|
|
|
|
|
2019-01-09 14:15:23 +00:00
|
|
|
Poco::Logger * log = &Poco::Logger::get("minmax_idx");
|
2019-01-15 17:39:10 +00:00
|
|
|
LOG_DEBUG(log, "new minmax index" << node->name);
|
2019-01-09 14:15:23 +00:00
|
|
|
for (size_t i = 0; i < expr_list->children.size(); ++i)
|
2019-01-08 17:27:44 +00:00
|
|
|
{
|
2019-01-09 14:15:23 +00:00
|
|
|
const auto & column = sample.getByPosition(i);
|
|
|
|
|
2019-01-10 12:57:12 +00:00
|
|
|
columns.emplace_back(column.name);
|
|
|
|
data_types.emplace_back(column.type);
|
2019-01-09 14:15:23 +00:00
|
|
|
LOG_DEBUG(log, ">" << column.name << " " << column.type->getName());
|
2019-01-08 17:27:44 +00:00
|
|
|
}
|
|
|
|
|
2019-01-10 12:57:12 +00:00
|
|
|
return std::make_unique<MergeTreeMinMaxIndex>(
|
|
|
|
node->name, std::move(minmax_expr), columns, data_types, node->granularity.get<size_t>());;
|
2019-01-08 17:27:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|