2021-01-04 20:55:32 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <set>
|
|
|
|
#include <unordered_map>
|
2021-11-26 18:27:16 +00:00
|
|
|
#include <vector>
|
|
|
|
#include <Parsers/ASTLiteral.h>
|
|
|
|
#include <Parsers/IAST_fwd.h>
|
2021-01-04 20:55:32 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
class CNFQuery
|
|
|
|
{
|
|
|
|
public:
|
2021-03-04 12:11:43 +00:00
|
|
|
struct AtomicFormula
|
|
|
|
{
|
|
|
|
bool negative = false;
|
|
|
|
ASTPtr ast;
|
|
|
|
|
|
|
|
/// for set
|
|
|
|
bool operator<(const AtomicFormula & rhs) const
|
|
|
|
{
|
2023-11-13 09:55:58 +00:00
|
|
|
return ast->getTreeHash(/*ignore_aliases=*/ true) == rhs.ast->getTreeHash(/*ignore_aliases=*/ true)
|
2021-04-28 17:35:51 +00:00
|
|
|
? negative < rhs.negative
|
2023-11-13 09:55:58 +00:00
|
|
|
: ast->getTreeHash(/*ignore_aliases=*/ true) < rhs.ast->getTreeHash(/*ignore_aliases=*/ true);
|
2021-04-28 17:35:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool operator==(const AtomicFormula & rhs) const
|
|
|
|
{
|
|
|
|
return negative == rhs.negative &&
|
2023-11-13 09:55:58 +00:00
|
|
|
ast->getTreeHash(/*ignore_aliases=*/ true) == rhs.ast->getTreeHash(/*ignore_aliases=*/ true) &&
|
2021-04-28 17:35:51 +00:00
|
|
|
ast->getColumnName() == rhs.ast->getColumnName();
|
2021-03-04 12:11:43 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
using OrGroup = std::set<AtomicFormula>;
|
2021-01-04 20:55:32 +00:00
|
|
|
using AndGroup = std::set<OrGroup>;
|
|
|
|
|
2022-03-13 11:59:20 +00:00
|
|
|
CNFQuery(AndGroup && statements_) : statements(std::move(statements_)) { } /// NOLINT
|
2021-01-04 20:55:32 +00:00
|
|
|
|
|
|
|
template <typename P>
|
2021-01-06 10:41:06 +00:00
|
|
|
CNFQuery & filterAlwaysTrueGroups(P predicate_is_unknown) /// delete always true groups
|
2021-01-05 20:51:19 +00:00
|
|
|
{
|
2021-01-04 20:55:32 +00:00
|
|
|
AndGroup filtered;
|
|
|
|
for (const auto & or_group : statements)
|
|
|
|
{
|
2021-01-06 10:41:06 +00:00
|
|
|
if (predicate_is_unknown(or_group))
|
2021-01-04 20:55:32 +00:00
|
|
|
filtered.insert(or_group);
|
|
|
|
}
|
|
|
|
std::swap(statements, filtered);
|
2021-01-05 20:51:19 +00:00
|
|
|
return *this;
|
2021-01-04 20:55:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
template <typename P>
|
2021-01-06 10:41:06 +00:00
|
|
|
CNFQuery & filterAlwaysFalseAtoms(P predicate_is_unknown) /// delete always false atoms
|
2021-01-05 20:51:19 +00:00
|
|
|
{
|
2021-01-04 20:55:32 +00:00
|
|
|
AndGroup filtered;
|
|
|
|
for (const auto & or_group : statements)
|
|
|
|
{
|
|
|
|
OrGroup filtered_group;
|
2021-01-05 20:51:19 +00:00
|
|
|
for (auto ast : or_group)
|
|
|
|
{
|
2021-01-06 10:41:06 +00:00
|
|
|
if (predicate_is_unknown(ast))
|
2021-01-04 20:55:32 +00:00
|
|
|
filtered_group.insert(ast);
|
|
|
|
}
|
|
|
|
if (!filtered_group.empty())
|
|
|
|
filtered.insert(filtered_group);
|
2021-01-05 20:51:19 +00:00
|
|
|
else
|
|
|
|
{
|
2021-11-10 17:57:59 +00:00
|
|
|
/// all atoms false -> group false -> CNF false
|
2021-01-05 20:51:19 +00:00
|
|
|
filtered.clear();
|
|
|
|
filtered_group.clear();
|
2021-03-04 12:11:43 +00:00
|
|
|
filtered_group.insert(AtomicFormula{false, std::make_shared<ASTLiteral>(static_cast<UInt8>(0))});
|
2021-01-05 20:51:19 +00:00
|
|
|
filtered.insert(filtered_group);
|
|
|
|
std::swap(statements, filtered);
|
|
|
|
return *this;
|
|
|
|
}
|
2021-01-04 20:55:32 +00:00
|
|
|
}
|
|
|
|
std::swap(statements, filtered);
|
2021-01-05 20:51:19 +00:00
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
2021-04-28 17:35:51 +00:00
|
|
|
template <typename F>
|
2021-05-02 19:16:40 +00:00
|
|
|
const CNFQuery & iterateGroups(F func) const
|
2021-04-28 17:35:51 +00:00
|
|
|
{
|
|
|
|
for (const auto & group : statements)
|
|
|
|
func(group);
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
CNFQuery & appendGroup(AndGroup&& and_group)
|
|
|
|
{
|
|
|
|
for (auto && or_group : and_group)
|
2022-03-13 11:59:20 +00:00
|
|
|
statements.emplace(or_group);
|
2021-04-28 17:35:51 +00:00
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
2021-01-05 20:51:19 +00:00
|
|
|
template <typename F>
|
|
|
|
CNFQuery & transformGroups(F func)
|
|
|
|
{
|
|
|
|
AndGroup result;
|
|
|
|
for (const auto & group : statements)
|
|
|
|
{
|
|
|
|
auto new_group = func(group);
|
|
|
|
if (!new_group.empty())
|
|
|
|
result.insert(std::move(new_group));
|
|
|
|
}
|
|
|
|
std::swap(statements, result);
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename F>
|
|
|
|
CNFQuery & transformAtoms(F func)
|
|
|
|
{
|
|
|
|
transformGroups([func](const OrGroup & group) -> OrGroup
|
|
|
|
{
|
|
|
|
OrGroup result;
|
2021-03-04 12:11:43 +00:00
|
|
|
for (const auto & atom : group)
|
2021-01-05 20:51:19 +00:00
|
|
|
{
|
2021-03-04 12:11:43 +00:00
|
|
|
auto new_atom = func(atom);
|
|
|
|
if (new_atom.ast)
|
|
|
|
result.insert(std::move(new_atom));
|
2021-01-05 20:51:19 +00:00
|
|
|
}
|
|
|
|
return result;
|
|
|
|
});
|
|
|
|
return *this;
|
2021-01-04 20:55:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
const AndGroup & getStatements() const { return statements; }
|
|
|
|
|
|
|
|
std::string dump() const;
|
|
|
|
|
2021-01-05 20:51:19 +00:00
|
|
|
/// Converts != -> NOT =; <,>= -> (NOT) <; >,<= -> (NOT) <= for simpler matching
|
|
|
|
CNFQuery & pullNotOutFunctions();
|
|
|
|
/// Revert pullNotOutFunctions actions
|
2022-09-05 01:50:24 +00:00
|
|
|
CNFQuery & pushNotInFunctions();
|
2021-01-05 20:51:19 +00:00
|
|
|
|
2021-05-05 08:51:25 +00:00
|
|
|
/// (a OR b OR ...) AND (NOT a OR b OR ...) -> (b OR ...)
|
|
|
|
CNFQuery & reduce();
|
|
|
|
|
2021-01-04 20:55:32 +00:00
|
|
|
private:
|
|
|
|
AndGroup statements;
|
|
|
|
};
|
|
|
|
|
|
|
|
class TreeCNFConverter
|
|
|
|
{
|
|
|
|
public:
|
2021-11-18 14:24:06 +00:00
|
|
|
static constexpr size_t DEFAULT_MAX_GROWTH_MULTIPLIER = 20;
|
|
|
|
static constexpr size_t MAX_ATOMS_WITHOUT_CHECK = 200;
|
|
|
|
|
2021-11-26 14:47:40 +00:00
|
|
|
/// @max_growth_multiplier means that it's allowed to grow size of formula only
|
2021-11-18 14:24:06 +00:00
|
|
|
/// in that amount of times. It's needed to avoid exponential explosion of formula.
|
|
|
|
/// CNF of boolean formula with N clauses can have 2^N clauses.
|
2021-11-19 14:14:56 +00:00
|
|
|
/// If amount of atomic formulas will be exceeded nullopt will be returned.
|
2021-11-18 14:24:06 +00:00
|
|
|
/// 0 - means unlimited.
|
|
|
|
static std::optional<CNFQuery> tryConvertToCNF(
|
2021-11-26 14:47:40 +00:00
|
|
|
const ASTPtr & query, size_t max_growth_multiplier = DEFAULT_MAX_GROWTH_MULTIPLIER);
|
2021-11-18 14:24:06 +00:00
|
|
|
|
|
|
|
static CNFQuery toCNF(
|
2021-11-26 14:47:40 +00:00
|
|
|
const ASTPtr & query, size_t max_growth_multiplier = DEFAULT_MAX_GROWTH_MULTIPLIER);
|
2021-01-04 20:55:32 +00:00
|
|
|
|
|
|
|
static ASTPtr fromCNF(const CNFQuery & cnf);
|
|
|
|
};
|
|
|
|
|
2021-03-05 09:54:13 +00:00
|
|
|
void pushNotIn(CNFQuery::AtomicFormula & atom);
|
|
|
|
|
2023-03-15 11:41:29 +00:00
|
|
|
template <typename TAndGroup>
|
|
|
|
TAndGroup reduceOnceCNFStatements(const TAndGroup & groups)
|
|
|
|
{
|
|
|
|
TAndGroup result;
|
|
|
|
for (const auto & group : groups)
|
|
|
|
{
|
|
|
|
using GroupType = std::decay_t<decltype(group)>;
|
|
|
|
GroupType copy(group);
|
|
|
|
bool inserted = false;
|
|
|
|
for (const auto & atom : group)
|
|
|
|
{
|
|
|
|
copy.erase(atom);
|
|
|
|
using AtomType = std::decay_t<decltype(atom)>;
|
|
|
|
AtomType negative_atom(atom);
|
|
|
|
negative_atom.negative = !atom.negative;
|
|
|
|
copy.insert(negative_atom);
|
|
|
|
|
|
|
|
if (groups.contains(copy))
|
|
|
|
{
|
|
|
|
copy.erase(negative_atom);
|
|
|
|
result.insert(copy);
|
|
|
|
inserted = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
copy.erase(negative_atom);
|
|
|
|
copy.insert(atom);
|
|
|
|
}
|
|
|
|
if (!inserted)
|
|
|
|
result.insert(group);
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename TOrGroup>
|
|
|
|
bool isCNFGroupSubset(const TOrGroup & left, const TOrGroup & right)
|
|
|
|
{
|
|
|
|
if (left.size() > right.size())
|
|
|
|
return false;
|
|
|
|
for (const auto & elem : left)
|
|
|
|
if (!right.contains(elem))
|
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename TAndGroup>
|
|
|
|
TAndGroup filterCNFSubsets(const TAndGroup & groups)
|
|
|
|
{
|
|
|
|
TAndGroup result;
|
|
|
|
for (const auto & group : groups)
|
|
|
|
{
|
|
|
|
bool insert = true;
|
|
|
|
|
|
|
|
for (const auto & other_group : groups)
|
|
|
|
{
|
|
|
|
if (isCNFGroupSubset(other_group, group) && group != other_group)
|
|
|
|
{
|
|
|
|
insert = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (insert)
|
|
|
|
result.insert(group);
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2021-01-04 20:55:32 +00:00
|
|
|
}
|