LogicalExpressionsOptimizer: optimze for LowCardinality

This commit is contained in:
Wangyang Guo 2022-10-25 14:44:09 +08:00
parent f4483ed19e
commit b08961be8a
3 changed files with 41 additions and 5 deletions

View File

@ -1,13 +1,17 @@
#include <Interpreters/LogicalExpressionsOptimizer.h>
#include <Interpreters/IdentifierSemantic.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <Core/Settings.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTIdentifier.h>
#include <Common/typeid_cast.h>
#include <deque>
#include <vector>
#include <base/sort.h>
@ -32,8 +36,9 @@ bool LogicalExpressionsOptimizer::OrWithExpression::operator<(const OrWithExpres
return std::tie(this->or_function, this->expression) < std::tie(rhs.or_function, rhs.expression);
}
LogicalExpressionsOptimizer::LogicalExpressionsOptimizer(ASTSelectQuery * select_query_, UInt64 optimize_min_equality_disjunction_chain_length)
: select_query(select_query_), settings(optimize_min_equality_disjunction_chain_length)
LogicalExpressionsOptimizer::LogicalExpressionsOptimizer(ASTSelectQuery * select_query_,
const TablesWithColumns & tables_with_columns_, UInt64 optimize_min_equality_disjunction_chain_length)
: select_query(select_query_), tables_with_columns(tables_with_columns_), settings(optimize_min_equality_disjunction_chain_length)
{
}
@ -196,13 +201,39 @@ inline ASTs & getFunctionOperands(const ASTFunction * or_function)
}
bool LogicalExpressionsOptimizer::isLowCardinalityEqualityChain(const std::vector<ASTFunction *> & functions) const
{
if (functions.size() > 1)
{
/// Check if identifier is LowCardinality type
auto & first_operands = getFunctionOperands(functions[0]);
const auto * identifier = first_operands[0]->as<ASTIdentifier>();
if (identifier)
{
auto pos = IdentifierSemantic::getMembership(*identifier);
if (!pos)
pos = IdentifierSemantic::chooseTableColumnMatch(*identifier, tables_with_columns, true);
if (pos)
{
if (auto data_type_and_name = tables_with_columns[*pos].columns.tryGetByName(identifier->shortName()))
{
if (typeid_cast<const DataTypeLowCardinality *>(data_type_and_name->type.get()))
return true;
}
}
}
}
return false;
}
bool LogicalExpressionsOptimizer::mayOptimizeDisjunctiveEqualityChain(const DisjunctiveEqualityChain & chain) const
{
const auto & equalities = chain.second;
const auto & equality_functions = equalities.functions;
/// We eliminate too short chains.
if (equality_functions.size() < settings.optimize_min_equality_disjunction_chain_length)
if (equality_functions.size() < settings.optimize_min_equality_disjunction_chain_length &&
!isLowCardinalityEqualityChain(equality_functions))
return false;
/// We check that the right-hand sides of all equalities have the same type.

View File

@ -1,6 +1,7 @@
#pragma once
#include <Parsers/IAST.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>
#include <string>
#include <vector>
@ -36,7 +37,7 @@ class LogicalExpressionsOptimizer final
public:
/// Constructor. Accepts the root of the query DAG.
LogicalExpressionsOptimizer(ASTSelectQuery * select_query_, UInt64 optimize_min_equality_disjunction_chain_length);
LogicalExpressionsOptimizer(ASTSelectQuery * select_query_, const TablesWithColumns & tables_with_columns_, UInt64 optimize_min_equality_disjunction_chain_length);
/** Replace all rather long homogeneous OR-chains expr = x1 OR ... OR expr = xN
* on the expressions `expr` IN (x1, ..., xN).
@ -79,6 +80,9 @@ private:
*/
bool mayOptimizeDisjunctiveEqualityChain(const DisjunctiveEqualityChain & chain) const;
/// Check if is LowCardinality OR chain
bool isLowCardinalityEqualityChain(const std::vector<ASTFunction *> & functions) const;
/// Insert the IN expression into the OR chain.
static void addInExpression(const DisjunctiveEqualityChain & chain);
@ -96,6 +100,7 @@ private:
using ColumnToPosition = std::unordered_map<const IAST *, size_t>;
ASTSelectQuery * select_query;
const TablesWithColumns & tables_with_columns;
const ExtractedSettings settings;
/// Information about the OR-chains inside the query.
DisjunctiveEqualityChainsMap disjunctive_equality_chains_map;

View File

@ -1246,7 +1246,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
translateQualifiedNames(query, *select_query, source_columns_set, tables_with_columns);
/// Optimizes logical expressions.
LogicalExpressionsOptimizer(select_query, settings.optimize_min_equality_disjunction_chain_length.value).perform();
LogicalExpressionsOptimizer(select_query, tables_with_columns, settings.optimize_min_equality_disjunction_chain_length.value).perform();
NameSet all_source_columns_set = source_columns_set;
if (table_join)