ClickHouse/src/Interpreters/TreeCNFConverter.h

168 lines
4.9 KiB
C++
Raw Normal View History

2021-01-04 20:55:32 +00:00
#pragma once
#include <Parsers/IAST_fwd.h>
2021-01-05 20:51:19 +00:00
#include <Parsers/ASTLiteral.h>
2021-01-04 20:55:32 +00:00
#include <vector>
#include <set>
#include <unordered_map>
namespace DB
{
class CNFQuery
{
public:
2021-03-04 12:11:43 +00:00
struct AtomicFormula
{
bool negative = false;
ASTPtr ast;
/// for set
bool operator<(const AtomicFormula & rhs) const
{
2021-04-28 17:35:51 +00:00
return ast->getTreeHash() == rhs.ast->getTreeHash()
? negative < rhs.negative
: ast->getTreeHash() < rhs.ast->getTreeHash();
}
bool operator==(const AtomicFormula & rhs) const
{
return negative == rhs.negative &&
ast->getTreeHash() == rhs.ast->getTreeHash() &&
ast->getColumnName() == rhs.ast->getColumnName();
2021-03-04 12:11:43 +00:00
}
};
using OrGroup = std::set<AtomicFormula>;
2021-01-04 20:55:32 +00:00
using AndGroup = std::set<OrGroup>;
2021-01-05 20:51:19 +00:00
CNFQuery(AndGroup && statements_) : statements(std::move(statements_)) { }
2021-01-04 20:55:32 +00:00
template <typename P>
2021-01-06 10:41:06 +00:00
CNFQuery & filterAlwaysTrueGroups(P predicate_is_unknown) /// delete always true groups
2021-01-05 20:51:19 +00:00
{
2021-01-04 20:55:32 +00:00
AndGroup filtered;
for (const auto & or_group : statements)
{
2021-01-06 10:41:06 +00:00
if (predicate_is_unknown(or_group))
2021-01-04 20:55:32 +00:00
filtered.insert(or_group);
}
std::swap(statements, filtered);
2021-01-05 20:51:19 +00:00
return *this;
2021-01-04 20:55:32 +00:00
}
template <typename P>
2021-01-06 10:41:06 +00:00
CNFQuery & filterAlwaysFalseAtoms(P predicate_is_unknown) /// delete always false atoms
2021-01-05 20:51:19 +00:00
{
2021-01-04 20:55:32 +00:00
AndGroup filtered;
for (const auto & or_group : statements)
{
OrGroup filtered_group;
2021-01-05 20:51:19 +00:00
for (auto ast : or_group)
{
2021-01-06 10:41:06 +00:00
if (predicate_is_unknown(ast))
2021-01-04 20:55:32 +00:00
filtered_group.insert(ast);
}
if (!filtered_group.empty())
filtered.insert(filtered_group);
2021-01-05 20:51:19 +00:00
else
{
2021-11-10 17:57:59 +00:00
/// all atoms false -> group false -> CNF false
2021-01-05 20:51:19 +00:00
filtered.clear();
filtered_group.clear();
2021-03-04 12:11:43 +00:00
filtered_group.insert(AtomicFormula{false, std::make_shared<ASTLiteral>(static_cast<UInt8>(0))});
2021-01-05 20:51:19 +00:00
filtered.insert(filtered_group);
std::swap(statements, filtered);
return *this;
}
2021-01-04 20:55:32 +00:00
}
std::swap(statements, filtered);
2021-01-05 20:51:19 +00:00
return *this;
}
2021-04-28 17:35:51 +00:00
template <typename F>
2021-05-02 19:16:40 +00:00
const CNFQuery & iterateGroups(F func) const
2021-04-28 17:35:51 +00:00
{
for (const auto & group : statements)
func(group);
return *this;
}
CNFQuery & appendGroup(AndGroup&& and_group)
{
for (auto && or_group : and_group)
statements.emplace(std::move(or_group));
return *this;
}
2021-01-05 20:51:19 +00:00
template <typename F>
CNFQuery & transformGroups(F func)
{
AndGroup result;
for (const auto & group : statements)
{
auto new_group = func(group);
if (!new_group.empty())
result.insert(std::move(new_group));
}
std::swap(statements, result);
return *this;
}
template <typename F>
CNFQuery & transformAtoms(F func)
{
transformGroups([func](const OrGroup & group) -> OrGroup
{
OrGroup result;
2021-03-04 12:11:43 +00:00
for (const auto & atom : group)
2021-01-05 20:51:19 +00:00
{
2021-03-04 12:11:43 +00:00
auto new_atom = func(atom);
if (new_atom.ast)
result.insert(std::move(new_atom));
2021-01-05 20:51:19 +00:00
}
return result;
});
return *this;
2021-01-04 20:55:32 +00:00
}
const AndGroup & getStatements() const { return statements; }
std::string dump() const;
2021-01-05 20:51:19 +00:00
/// Converts != -> NOT =; <,>= -> (NOT) <; >,<= -> (NOT) <= for simpler matching
CNFQuery & pullNotOutFunctions();
/// Revert pullNotOutFunctions actions
CNFQuery & pushNotInFuntions();
2021-05-05 08:51:25 +00:00
/// (a OR b OR ...) AND (NOT a OR b OR ...) -> (b OR ...)
CNFQuery & reduce();
2021-01-04 20:55:32 +00:00
private:
AndGroup statements;
};
class TreeCNFConverter
{
public:
2021-11-18 14:24:06 +00:00
static constexpr size_t DEFAULT_MAX_GROWTH_MULTIPLIER = 20;
static constexpr size_t MAX_ATOMS_WITHOUT_CHECK = 200;
/// @max_growth_multipler means that it's allowed to grow size of formula only
/// in that amount of times. It's needed to avoid exponential explosion of formula.
/// CNF of boolean formula with N clauses can have 2^N clauses.
2021-11-19 14:14:56 +00:00
/// If amount of atomic formulas will be exceeded nullopt will be returned.
2021-11-18 14:24:06 +00:00
/// 0 - means unlimited.
static std::optional<CNFQuery> tryConvertToCNF(
const ASTPtr & query, size_t max_growth_multipler = DEFAULT_MAX_GROWTH_MULTIPLIER);
static CNFQuery toCNF(
const ASTPtr & query, size_t max_growth_multipler = DEFAULT_MAX_GROWTH_MULTIPLIER);
2021-01-04 20:55:32 +00:00
static ASTPtr fromCNF(const CNFQuery & cnf);
};
2021-03-05 09:54:13 +00:00
void pushNotIn(CNFQuery::AtomicFormula & atom);
2021-01-04 20:55:32 +00:00
}