hypothesis basic

This commit is contained in:
Nikita Vasilev 2021-04-26 12:40:54 +03:00
parent 4601623d61
commit 2896f9aa75
7 changed files with 296 additions and 1 deletions

View File

@ -64,7 +64,7 @@ add_subdirectory (Coordination)
set(dbms_headers)
set(dbms_sources Interpreters/ConstraintMatcherVisitor.cpp Interpreters/ConstraintMatcherVisitor.h Interpreters/WhereConstraintsOptimizer.cpp Interpreters/WhereConstraintsOptimizer.h Interpreters/TreeCNFConverter.cpp Interpreters/TreeCNFConverter.h Interpreters/ComparisonGraph.cpp Interpreters/ComparisonGraph.h Storages/MergeTree/SubstituteColumnOptimizer.cpp Storages/MergeTree/SubstituteColumnOptimizer.h)
set(dbms_sources Interpreters/ConstraintMatcherVisitor.cpp Interpreters/ConstraintMatcherVisitor.h Interpreters/WhereConstraintsOptimizer.cpp Interpreters/WhereConstraintsOptimizer.h Interpreters/TreeCNFConverter.cpp Interpreters/TreeCNFConverter.h Interpreters/ComparisonGraph.cpp Interpreters/ComparisonGraph.h Storages/MergeTree/SubstituteColumnOptimizer.cpp Storages/MergeTree/SubstituteColumnOptimizer.h Storages/MergeTree/MergeTreeIndexHypothesis.cpp Storages/MergeTree/MergeTreeIndexHypothesis.h)
add_headers_and_sources(clickhouse_common_io Common)
add_headers_and_sources(clickhouse_common_io Common/HashTable)

View File

@ -0,0 +1,164 @@
#include <Storages/MergeTree/MergeTreeIndexHypothesis.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/TreeRewriter.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int INCORRECT_QUERY;
}
MergeTreeIndexGranuleHypothesis::MergeTreeIndexGranuleHypothesis(const String & index_name_)
: index_name(index_name_), is_empty(true), met(false)
{
}
MergeTreeIndexGranuleHypothesis::MergeTreeIndexGranuleHypothesis(const String & index_name_, const bool met_)
: index_name(index_name_), is_empty(false), met(met_)
{
}
void MergeTreeIndexGranuleHypothesis::serializeBinary(WriteBuffer & ostr) const
{
const auto & size_type = DataTypePtr(std::make_shared<DataTypeUInt8>());
size_type->serializeBinary(static_cast<UInt8>(met), ostr);
}
void MergeTreeIndexGranuleHypothesis::deserializeBinary(ReadBuffer & istr)
{
Field field_met;
const auto & size_type = DataTypePtr(std::make_shared<DataTypeUInt8>());
size_type->deserializeBinary(field_met, istr);
met = field_met.get<UInt8>();
is_empty = false;
}
MergeTreeIndexAggregatorHypothesis::MergeTreeIndexAggregatorHypothesis(const String & index_name_, const String & column_name_)
: index_name(index_name_), column_name(column_name_)
{
}
MergeTreeIndexGranulePtr MergeTreeIndexAggregatorHypothesis::getGranuleAndReset()
{
const auto granule = std::make_shared<MergeTreeIndexGranuleHypothesis>(index_name, met);
met = true;
is_empty = true;
return granule;
}
void MergeTreeIndexAggregatorHypothesis::update(const Block & block, size_t * pos, size_t limit)
{
size_t rows_read = std::min(limit, block.rows() - *pos);
if (rows_read == 0)
return;
const auto & column = block.getByName(column_name).column->cut(*pos, rows_read);
if (!column->hasEqualValues() || column->get64(0) == 0)
met = false;
is_empty = false;
*pos += rows_read;
}
MergeTreeIndexConditionHypothesis::MergeTreeIndexConditionHypothesis(
const String & index_name_,
const String & column_name_,
const SelectQueryInfo & query_,
const Context &)
: index_name(index_name_)
, column_name(column_name_)
{
const auto & select = query_.query->as<ASTSelectQuery &>();
if (select.where() && select.prewhere())
expression_ast = makeASTFunction(
"and",
select.where()->clone(),
select.prewhere()->clone());
else if (select.where())
expression_ast = select.where()->clone();
else if (select.prewhere())
expression_ast = select.prewhere()->clone();
}
std::pair<bool, bool> MergeTreeIndexConditionHypothesis::mayBeTrue(const ASTPtr & ast, const bool value) const
{
if (ast->getColumnName() == column_name)
return {value, !value};
auto * func = ast->as<ASTFunction>();
if (!func)
return {true, true};
auto & args = func->arguments->children;
if (func->name == "not")
{
const auto res = mayBeTrue(args[0], value);
return {res.second, res.first};
}
/*else if (func->name == "or")
{
}
else if (func->name == "and")
{
}*/
else
{
return {true, true};
}
}
bool MergeTreeIndexConditionHypothesis::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const
{
if (idx_granule->empty())
return true;
auto granule = std::dynamic_pointer_cast<MergeTreeIndexGranuleHypothesis>(idx_granule);
if (!granule)
throw Exception(
"Set index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR);
return mayBeTrue(expression_ast, granule->met).first;
}
MergeTreeIndexGranulePtr MergeTreeIndexHypothesis::createIndexGranule() const
{
return std::make_shared<MergeTreeIndexGranuleHypothesis>(index.name);
}
MergeTreeIndexAggregatorPtr MergeTreeIndexHypothesis::createIndexAggregator() const
{
return std::make_shared<MergeTreeIndexAggregatorHypothesis>(index.name, index.sample_block.getNames().front());
}
MergeTreeIndexConditionPtr MergeTreeIndexHypothesis::createIndexCondition(
const SelectQueryInfo & query, const Context & context) const
{
return std::make_shared<MergeTreeIndexConditionHypothesis>(index.name, index.sample_block.getNames().front(), query, context);
}
bool MergeTreeIndexHypothesis::mayBenefitFromIndexForIn(const ASTPtr &) const
{
return false;
}
MergeTreeIndexPtr hypothesisIndexCreator(const IndexDescription & index)
{
return std::make_shared<MergeTreeIndexHypothesis>(index);
}
void hypothesisIndexValidator(const IndexDescription &, bool /*attach*/)
{
}
}

View File

@ -0,0 +1,107 @@
#pragma once
#include <Storages/MergeTree/MergeTreeIndices.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Interpreters/SetVariants.h>
#include <memory>
#include <set>
namespace DB
{
class MergeTreeIndexHyposesis;
struct MergeTreeIndexGranuleHypothesis : public IMergeTreeIndexGranule
{
explicit MergeTreeIndexGranuleHypothesis(
const String & index_name_);
MergeTreeIndexGranuleHypothesis(
const String & index_name_,
const bool met_);
void serializeBinary(WriteBuffer & ostr) const override;
void deserializeBinary(ReadBuffer & istr) override;
bool empty() const override { return is_empty; }
~MergeTreeIndexGranuleHypothesis() override = default;
String index_name;
bool is_empty = true;
bool met = true;
};
struct MergeTreeIndexAggregatorHypothesis : IMergeTreeIndexAggregator
{
explicit MergeTreeIndexAggregatorHypothesis(
const String & index_name_, const String & column_name_);
~MergeTreeIndexAggregatorHypothesis() override = default;
bool empty() const override { return is_empty; }
MergeTreeIndexGranulePtr getGranuleAndReset() override;
void update(const Block & block, size_t * pos, size_t limit) override;
private:
String index_name;
String column_name;
bool met = true;
bool is_empty = true;
};
class MergeTreeIndexConditionHypothesis : public IMergeTreeIndexCondition
{
public:
MergeTreeIndexConditionHypothesis(
const String & index_name_,
const String & column_name_,
const SelectQueryInfo & query,
const Context & context);
bool alwaysUnknownOrTrue() const override { return false; }
bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const override;
~MergeTreeIndexConditionHypothesis() override = default;
private:
std::pair<bool, bool> mayBeTrue(const ASTPtr & ast, const bool value) const;
String index_name;
String column_name;
ASTPtr expression_ast;
};
class MergeTreeIndexHypothesis : public IMergeTreeIndex
{
public:
MergeTreeIndexHypothesis(
const IndexDescription & index_)
: IMergeTreeIndex(index_)
{}
~MergeTreeIndexHypothesis() override = default;
MergeTreeIndexGranulePtr createIndexGranule() const override;
MergeTreeIndexAggregatorPtr createIndexAggregator() const override;
MergeTreeIndexConditionPtr createIndexCondition(
const SelectQueryInfo & query, const Context & context) const override;
bool mayBenefitFromIndexForIn(const ASTPtr & node) const override;
size_t max_rows = 0;
};
}

View File

@ -98,6 +98,9 @@ MergeTreeIndexFactory::MergeTreeIndexFactory()
registerCreator("bloom_filter", bloomFilterIndexCreatorNew);
registerValidator("bloom_filter", bloomFilterIndexValidatorNew);
registerCreator("hypothesis", hypothesisIndexCreator);
registerValidator("hypothesis", hypothesisIndexValidator);
}
MergeTreeIndexFactory & MergeTreeIndexFactory::instance()

View File

@ -135,4 +135,7 @@ void bloomFilterIndexValidator(const IndexDescription & index, bool attach);
MergeTreeIndexPtr bloomFilterIndexCreatorNew(const IndexDescription & index);
void bloomFilterIndexValidatorNew(const IndexDescription & index, bool attach);
MergeTreeIndexPtr hypothesisIndexCreator(const IndexDescription & index);
void hypothesisIndexValidator(const IndexDescription & index, bool attach);
}

View File

@ -0,0 +1 @@
2

View File

@ -0,0 +1,17 @@
SET convert_query_to_cnf = 1;
SET optimize_using_constraints = 1;
SET optimize_move_to_prewhere = 1;
DROP DATABASE IF EXISTS constraint_test;
DROP TABLE IF EXISTS constraint_test.test;
CREATE DATABASE constraint_test;
CREATE TABLE constraint_test.test (i UInt64, a UInt64, b UInt64, INDEX t (a = b) TYPE hypothesis GRANULARITY 1) ENGINE = MergeTree() ORDER BY i SETTINGS index_granularity=1;
INSERT INTO constraint_test.test VALUES (1, 1, 1), (2, 1, 2), (3, 2, 1), (4, 2, 2);
SELECT count() FROM constraint_test.test WHERE a = b;
DROP TABLE constraint_test.test;
DROP DATABASE constraint_test;