mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
Refactor constant folding and make it reusable for primary_key_expr
This commit is contained in:
parent
57c5dbcd2b
commit
6df757c6f7
3
.gitignore
vendored
3
.gitignore
vendored
@ -248,3 +248,6 @@ website/package-lock.json
|
||||
|
||||
# Ignore files for locally disabled tests
|
||||
/dbms/tests/queries/**/*.disabled
|
||||
|
||||
# cquery cache
|
||||
/.cquery-cache
|
||||
|
@ -94,8 +94,8 @@ public:
|
||||
/// Approximate number of allocated bytes in memory - for profiling and limits.
|
||||
size_t allocatedBytes() const;
|
||||
|
||||
operator bool() const { return !data.empty(); }
|
||||
bool operator!() const { return data.empty(); }
|
||||
operator bool() const { return !!columns(); }
|
||||
bool operator!() const { return !this->operator bool(); }
|
||||
|
||||
/** Get a list of column names separated by commas. */
|
||||
std::string dumpNames() const;
|
||||
|
@ -1,18 +1,20 @@
|
||||
#include <Core/Block.h>
|
||||
#include <Interpreters/evaluateConstantExpression.h>
|
||||
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ExpressionElementParsers.h>
|
||||
#include <Core/Block.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/SyntaxAnalyzer.h>
|
||||
#include <Interpreters/ExpressionAnalyzer.h>
|
||||
#include <Interpreters/convertFieldToType.h>
|
||||
#include <Interpreters/ExpressionActions.h>
|
||||
#include <Interpreters/evaluateConstantExpression.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Interpreters/ExpressionAnalyzer.h>
|
||||
#include <Interpreters/SyntaxAnalyzer.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ExpressionElementParsers.h>
|
||||
#include <TableFunctions/TableFunctionFactory.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -57,7 +59,7 @@ std::pair<Field, std::shared_ptr<const IDataType>> evaluateConstantExpression(co
|
||||
|
||||
ASTPtr evaluateConstantExpressionAsLiteral(const ASTPtr & node, const Context & context)
|
||||
{
|
||||
/// Branch with string in qery.
|
||||
/// Branch with string in query.
|
||||
if (typeid_cast<const ASTLiteral *>(node.get()))
|
||||
return node;
|
||||
|
||||
@ -77,4 +79,238 @@ ASTPtr evaluateConstantExpressionOrIdentifierAsLiteral(const ASTPtr & node, cons
|
||||
return evaluateConstantExpressionAsLiteral(node, context);
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
using Conjunction = ColumnsWithTypeAndName;
|
||||
using Disjunction = std::vector<Conjunction>;
|
||||
|
||||
Disjunction analyzeEquals(const ASTIdentifier * identifier, const ASTLiteral * literal, const ExpressionActionsPtr & expr)
|
||||
{
|
||||
if (!identifier || !literal)
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
for (const auto & name_and_type : expr->getRequiredColumnsWithTypes())
|
||||
{
|
||||
const auto & name = name_and_type.name;
|
||||
const auto & type = name_and_type.type;
|
||||
|
||||
if (name == identifier->name)
|
||||
{
|
||||
ColumnWithTypeAndName column;
|
||||
// FIXME: what to do if field is not convertable?
|
||||
column.column = type->createColumnConst(1, convertFieldToType(literal->value, *type));
|
||||
column.name = name;
|
||||
column.type = type;
|
||||
return {{std::move(column)}};
|
||||
}
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
Disjunction andDNF(const Disjunction & left, const Disjunction & right)
|
||||
{
|
||||
if (left.empty())
|
||||
{
|
||||
return right;
|
||||
}
|
||||
|
||||
Disjunction result;
|
||||
|
||||
for (const auto & conjunct1 : left)
|
||||
{
|
||||
for (const auto & conjunct2 : right)
|
||||
{
|
||||
Conjunction new_conjunct{conjunct1};
|
||||
new_conjunct.insert(new_conjunct.end(), conjunct2.begin(), conjunct2.end());
|
||||
result.emplace_back(new_conjunct);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
Disjunction analyzeFunction(const ASTFunction * fn, const ExpressionActionsPtr & expr)
|
||||
{
|
||||
if (!fn)
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
// TODO: enumerate all possible function names!
|
||||
|
||||
if (fn->name == "equals")
|
||||
{
|
||||
const auto * left = fn->arguments->children.front().get();
|
||||
const auto * right = fn->arguments->children.back().get();
|
||||
const auto * identifier = typeid_cast<const ASTIdentifier *>(left) ? typeid_cast<const ASTIdentifier *>(left)
|
||||
: typeid_cast<const ASTIdentifier *>(right);
|
||||
const auto * literal = typeid_cast<const ASTLiteral *>(left) ? typeid_cast<const ASTLiteral *>(left)
|
||||
: typeid_cast<const ASTLiteral *>(right);
|
||||
|
||||
return analyzeEquals(identifier, literal, expr);
|
||||
}
|
||||
else if (fn->name == "in")
|
||||
{
|
||||
const auto * left = fn->arguments->children.front().get();
|
||||
const auto * right = fn->arguments->children.back().get();
|
||||
const auto * identifier = typeid_cast<const ASTIdentifier *>(left);
|
||||
const auto * inner_fn = typeid_cast<const ASTFunction *>(right);
|
||||
|
||||
if (!inner_fn)
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
const auto * tuple = typeid_cast<const ASTExpressionList *>(inner_fn->children.front().get());
|
||||
|
||||
if (!tuple)
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
Disjunction result;
|
||||
|
||||
for (const auto & child : tuple->children)
|
||||
{
|
||||
const auto * literal = typeid_cast<const ASTLiteral *>(child.get());
|
||||
const auto dnf = analyzeEquals(identifier, literal, expr);
|
||||
|
||||
if (dnf.empty())
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
result.insert(result.end(), dnf.begin(), dnf.end());
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
else if (fn->name == "or")
|
||||
{
|
||||
const auto * args = typeid_cast<const ASTExpressionList *>(fn->children.front().get());
|
||||
|
||||
if (!args)
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
Disjunction result;
|
||||
|
||||
for (const auto & arg : args->children)
|
||||
{
|
||||
const auto dnf = analyzeFunction(typeid_cast<const ASTFunction *>(arg.get()), expr);
|
||||
|
||||
if (dnf.empty())
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
result.insert(result.end(), dnf.begin(), dnf.end());
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
else if (fn->name == "and")
|
||||
{
|
||||
const auto * args = typeid_cast<const ASTExpressionList *>(fn->children.front().get());
|
||||
|
||||
if (!args)
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
Disjunction result;
|
||||
|
||||
for (const auto & arg : args->children)
|
||||
{
|
||||
const auto dnf = analyzeFunction(typeid_cast<const ASTFunction *>(arg.get()), expr);
|
||||
|
||||
if (dnf.empty())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
result = andDNF(result, dnf);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: distinguish always-false and failed evaluation results,
|
||||
// assume failed if returned empty `Blocks` for now.
|
||||
Blocks evaluateConstantExpressionAsBlock(const ASTPtr & node, const ExpressionActionsPtr & target_expr)
|
||||
{
|
||||
Blocks result;
|
||||
|
||||
// TODO: `node` may be always-false literal.
|
||||
|
||||
if (const auto fn = typeid_cast<const ASTFunction *>(node.get()))
|
||||
{
|
||||
const auto dnf = analyzeFunction(fn, target_expr);
|
||||
|
||||
if (dnf.empty())
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
auto hasRequiredColumns = [&target_expr](const Block & block) -> bool
|
||||
{
|
||||
for (const auto & name : target_expr->getRequiredColumns())
|
||||
{
|
||||
bool hasColumn = false;
|
||||
for (const auto & column_name : block.getNames())
|
||||
{
|
||||
if (column_name == name)
|
||||
{
|
||||
hasColumn = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!hasColumn)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
for (const auto & conjunct : dnf)
|
||||
{
|
||||
Block block(conjunct);
|
||||
|
||||
// Block should contain all required columns from `target_expr`
|
||||
if (!hasRequiredColumns(block))
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
target_expr->execute(block);
|
||||
|
||||
if (block.rows() == 1)
|
||||
{
|
||||
result.push_back(block);
|
||||
}
|
||||
else if (block.rows() == 0)
|
||||
{
|
||||
// filter out cases like "WHERE a = 1 AND a = 2"
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
// FIXME: shouldn't happen
|
||||
return {};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <Core/Block.h>
|
||||
#include <Core/Field.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Parsers/IParser.h>
|
||||
@ -10,8 +11,10 @@ namespace DB
|
||||
{
|
||||
|
||||
class Context;
|
||||
class ExpressionActions;
|
||||
class IDataType;
|
||||
|
||||
using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
|
||||
|
||||
/** Evaluate constant expression and its type.
|
||||
* Used in rare cases - for elements of set for IN, for data to INSERT.
|
||||
@ -33,4 +36,7 @@ ASTPtr evaluateConstantExpressionAsLiteral(const ASTPtr & node, const Context &
|
||||
*/
|
||||
ASTPtr evaluateConstantExpressionOrIdentifierAsLiteral(const ASTPtr & node, const Context & context);
|
||||
|
||||
// FIXME: collapse returned blocks into a single block.
|
||||
Blocks evaluateConstantExpressionAsBlock(const ASTPtr & node, const ExpressionActionsPtr & target_expr);
|
||||
|
||||
}
|
||||
|
@ -313,7 +313,7 @@ bool KeyCondition::addCondition(const String & column, const Range & range)
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Computes value of constant expression and it data type.
|
||||
/** Computes value of constant expression and its data type.
|
||||
* Returns false, if expression isn't constant.
|
||||
*/
|
||||
static bool getConstant(const ASTPtr & expr, Block & block_with_constants, Field & out_value, DataTypePtr & out_type)
|
||||
|
@ -253,7 +253,7 @@ public:
|
||||
/// Get the maximum number of the key element used in the condition.
|
||||
size_t getMaxKeyColumn() const;
|
||||
|
||||
/// Impose an additional condition: the value in the column column must be in the `range` range.
|
||||
/// Impose an additional condition: the value in the column `column` must be in the range `range`.
|
||||
/// Returns whether there is such a column in the key.
|
||||
bool addCondition(const String & column, const Range & range);
|
||||
|
||||
|
@ -1,266 +1,23 @@
|
||||
#include <Storages/StorageDistributed.h>
|
||||
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
#include <Interpreters/convertFieldToType.h>
|
||||
#include <Interpreters/createBlockSelector.h>
|
||||
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Interpreters/evaluateConstantExpression.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int TYPE_MISMATCH;
|
||||
|
||||
extern const int TYPE_MISMATCH;
|
||||
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
/// Contains a list of columns for conjunction: columns[0] AND columns[1] AND ...
|
||||
struct Conjunction
|
||||
{
|
||||
ColumnsWithTypeAndName columns;
|
||||
};
|
||||
|
||||
/// Contains a list of disjunctions: disjunctions[0] OR disjunctions[1] OR ...
|
||||
struct Disjunction
|
||||
{
|
||||
std::vector<Conjunction> conjunctions;
|
||||
};
|
||||
|
||||
using Disjunctions = std::vector<Disjunction>;
|
||||
using DisjunctionsPtr = std::shared_ptr<Disjunctions>;
|
||||
|
||||
static constexpr auto and_function_name = "and";
|
||||
static constexpr auto equals_function_name = "equals";
|
||||
static constexpr auto in_function_name = "in";
|
||||
static constexpr auto or_function_name = "or";
|
||||
static constexpr auto tuple_function_name = "tuple";
|
||||
|
||||
void logDebug(std::string message)
|
||||
{
|
||||
LOG_DEBUG(&Logger::get("(StorageDistributedShardsOptimizer)"), message);
|
||||
}
|
||||
|
||||
/// Returns disjunction equivalent to `disjunctions AND another`.
|
||||
Disjunctions pairwiseAnd(const Disjunctions & disjunctions, const Disjunctions & another)
|
||||
{
|
||||
Disjunctions new_disjunctions;
|
||||
|
||||
if (disjunctions.empty())
|
||||
{
|
||||
return another;
|
||||
}
|
||||
|
||||
if (another.empty())
|
||||
{
|
||||
return disjunctions;
|
||||
}
|
||||
|
||||
for (const auto disjunction : disjunctions)
|
||||
{
|
||||
for (const auto another_disjunction : another)
|
||||
{
|
||||
std::vector<Conjunction> new_conjunctions;
|
||||
|
||||
for (const auto conjunction : disjunction.conjunctions)
|
||||
{
|
||||
for (const auto another_conjunction : another_disjunction.conjunctions)
|
||||
{
|
||||
ColumnsWithTypeAndName new_columns;
|
||||
new_columns.insert(std::end(new_columns), conjunction.columns.begin(), conjunction.columns.end());
|
||||
new_columns.insert(std::end(new_columns), another_conjunction.columns.begin(), another_conjunction.columns.end());
|
||||
|
||||
new_conjunctions.push_back(Conjunction{new_columns});
|
||||
}
|
||||
}
|
||||
|
||||
new_disjunctions.push_back(Disjunction{new_conjunctions});
|
||||
}
|
||||
}
|
||||
|
||||
return new_disjunctions;
|
||||
}
|
||||
|
||||
/// Given `ident = literal` expr, returns disjunctions relevant for constant folding in sharding_key_expr.
|
||||
DisjunctionsPtr analyzeEquals(const ASTIdentifier * ident, const ASTLiteral * literal, ExpressionActionsPtr sharding_key_expr)
|
||||
{
|
||||
for (const auto name_and_type : sharding_key_expr->getRequiredColumnsWithTypes())
|
||||
{
|
||||
const auto type = name_and_type.type;
|
||||
const auto name = name_and_type.name;
|
||||
|
||||
if (name == ident->name)
|
||||
{
|
||||
ColumnWithTypeAndName column;
|
||||
|
||||
column.column = type->createColumnConst(1, convertFieldToType(literal->value, *type));
|
||||
column.type = type;
|
||||
column.name = name;
|
||||
|
||||
const auto columns = ColumnsWithTypeAndName{column};
|
||||
const auto conjunction = Conjunction{columns};
|
||||
const auto disjunction = Disjunction{{conjunction}};
|
||||
const Disjunctions disjunctions = {disjunction};
|
||||
|
||||
return std::make_shared<Disjunctions>(disjunctions);
|
||||
}
|
||||
}
|
||||
|
||||
const Disjunctions disjunctions = {};
|
||||
return std::make_shared<Disjunctions>(disjunctions);
|
||||
}
|
||||
|
||||
/// Given `ident IN (..literals)` expr, returns disjunctions relevant for constant folding in sharding_key_expr.
|
||||
DisjunctionsPtr analyzeIn(
|
||||
const ASTIdentifier * ident, const std::vector<const ASTLiteral *> literals, ExpressionActionsPtr sharding_key_expr)
|
||||
{
|
||||
Disjunctions disjunctions;
|
||||
|
||||
for (const auto literal : literals)
|
||||
{
|
||||
const auto inner_disjunctions = analyzeEquals(ident, literal, sharding_key_expr);
|
||||
|
||||
if (!inner_disjunctions)
|
||||
return nullptr;
|
||||
|
||||
disjunctions.insert(std::end(disjunctions), inner_disjunctions->begin(), inner_disjunctions->end());
|
||||
}
|
||||
|
||||
return std::make_shared<Disjunctions>(disjunctions);
|
||||
}
|
||||
|
||||
/// Given WHERE condition, returns disjunctions relevant for constant folding in sharding_key_expr.
|
||||
DisjunctionsPtr analyzeQuery(const ASTFunction * function, ExpressionActionsPtr sharding_key_expr)
|
||||
{
|
||||
if (function->name == equals_function_name)
|
||||
{
|
||||
auto left_arg = function->arguments->children.front().get();
|
||||
auto right_arg = function->arguments->children.back().get();
|
||||
|
||||
// try to ensure left_arg points to ASTIdentifier
|
||||
if (!typeid_cast<const ASTIdentifier *>(left_arg) && typeid_cast<const ASTIdentifier *>(right_arg))
|
||||
std::swap(left_arg, right_arg);
|
||||
|
||||
const auto ident = typeid_cast<const ASTIdentifier *>(left_arg);
|
||||
const auto literal = typeid_cast<const ASTLiteral *>(right_arg);
|
||||
|
||||
if (!ident || !literal)
|
||||
{
|
||||
logDebug("didn't match pattern ident = <literal>");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return analyzeEquals(ident, literal, sharding_key_expr);
|
||||
}
|
||||
else if (function->name == in_function_name)
|
||||
{
|
||||
const auto left_arg = function->arguments->children.front().get();
|
||||
const auto right_arg = function->arguments->children.back().get();
|
||||
|
||||
const auto ident = typeid_cast<const ASTIdentifier *>(left_arg);
|
||||
const auto inner_function = typeid_cast<const ASTFunction *>(right_arg);
|
||||
|
||||
if (!ident || !inner_function || inner_function->name != tuple_function_name)
|
||||
{
|
||||
logDebug("didn't match pattern ident IN tuple(...)");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::vector<const ASTLiteral *> literals;
|
||||
const auto expr_list = typeid_cast<const ASTExpressionList *>(inner_function->children.front().get());
|
||||
|
||||
if (!expr_list)
|
||||
{
|
||||
logDebug("expected ExpressionList in tuple, got: " + inner_function->getID());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
for (const auto child : expr_list->children)
|
||||
{
|
||||
if (const auto child_literal = typeid_cast<const ASTLiteral *>(child.get()))
|
||||
{
|
||||
literals.push_back(child_literal);
|
||||
}
|
||||
else
|
||||
{
|
||||
logDebug("non-literal in IN expression, got: " + child->getID());
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
return analyzeIn(ident, literals, sharding_key_expr);
|
||||
}
|
||||
else if (function->name == or_function_name)
|
||||
{
|
||||
const auto expr_list = typeid_cast<const ASTExpressionList *>(function->children.front().get());
|
||||
|
||||
if (!expr_list)
|
||||
{
|
||||
logDebug("expected ExpressionList in IN, got: " + function->getID());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Disjunctions disjunctions;
|
||||
|
||||
for (const auto child : expr_list->children)
|
||||
{
|
||||
// we can't ignore expr we can't analyze because it can widden the set of shards
|
||||
if (const auto child_function = typeid_cast<const ASTFunction *>(child.get()))
|
||||
{
|
||||
const auto child_disjunctions = analyzeQuery(child_function, sharding_key_expr);
|
||||
|
||||
if (!child_disjunctions)
|
||||
return nullptr;
|
||||
|
||||
disjunctions.insert(std::end(disjunctions), child_disjunctions->begin(), child_disjunctions->end());
|
||||
}
|
||||
else
|
||||
{
|
||||
logDebug("non-function expression in OR, got: " + child->getID());
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
return std::make_shared<Disjunctions>(disjunctions);
|
||||
}
|
||||
else if (function->name == and_function_name)
|
||||
{
|
||||
const auto expr_list = typeid_cast<const ASTExpressionList *>(function->children.front().get());
|
||||
|
||||
if (!expr_list)
|
||||
{
|
||||
logDebug("expected ExpressionList in AND, got: " + function->getID());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Disjunctions disjunctions;
|
||||
|
||||
for (const auto child : expr_list->children)
|
||||
{
|
||||
// we can skip everything we can't analyze because it can only narrow the set of shards
|
||||
if (const auto child_function = typeid_cast<const ASTFunction *>(child.get()))
|
||||
{
|
||||
const auto child_disjunctions = analyzeQuery(child_function, sharding_key_expr);
|
||||
|
||||
if (!child_disjunctions)
|
||||
continue;
|
||||
|
||||
disjunctions = pairwiseAnd(disjunctions, *child_disjunctions);
|
||||
}
|
||||
}
|
||||
|
||||
return std::make_shared<Disjunctions>(disjunctions);
|
||||
}
|
||||
else
|
||||
{
|
||||
logDebug("unsupported function: " + function->name);
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
/// the same as DistributedBlockOutputStream::createSelector, should it be static?
|
||||
IColumn::Selector createSelector(const ClusterPtr cluster, const ColumnWithTypeAndName & result)
|
||||
@ -285,73 +42,38 @@ IColumn::Selector createSelector(const ClusterPtr cluster, const ColumnWithTypeA
|
||||
throw Exception{"Sharding key expression does not evaluate to an integer type", ErrorCodes::TYPE_MISMATCH};
|
||||
}
|
||||
|
||||
/// Returns true if block has all columns required by sharding_key_expr.
|
||||
bool hasRequiredColumns(const Block & block, ExpressionActionsPtr sharding_key_expr)
|
||||
{
|
||||
for (const auto name : sharding_key_expr->getRequiredColumns())
|
||||
{
|
||||
bool hasColumn = false;
|
||||
for (const auto column_name : block.getNames())
|
||||
{
|
||||
if (column_name == name)
|
||||
{
|
||||
hasColumn = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!hasColumn)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** Returns a new cluster with fewer shards if constant folding for sharding_key_expr is possible
|
||||
* using constraints from WHERE condition, otherwise, returns nullptr. */
|
||||
/// Returns a new cluster with fewer shards if constant folding for `sharding_key_expr` is possible
|
||||
/// using constraints from "WHERE" condition, otherwise returns `nullptr`
|
||||
ClusterPtr StorageDistributed::skipUnusedShards(ClusterPtr cluster, const SelectQueryInfo & query_info)
|
||||
{
|
||||
const auto & select = typeid_cast<ASTSelectQuery &>(*query_info.query);
|
||||
|
||||
if (!select.where_expression)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const auto function = typeid_cast<const ASTFunction *>(select.where_expression.get());
|
||||
const auto blocks = evaluateConstantExpressionAsBlock(select.where_expression, sharding_key_expr);
|
||||
|
||||
if (!function)
|
||||
return nullptr;
|
||||
|
||||
const auto disjunctions = analyzeQuery(function, sharding_key_expr);
|
||||
|
||||
// didn't get definite answer from analysis, about optimization
|
||||
if (!disjunctions)
|
||||
// Can't get definite answer if we can skip any shards
|
||||
if (blocks.empty())
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::set<int> shards;
|
||||
|
||||
for (const auto disjunction : *disjunctions)
|
||||
for (const auto & block : blocks)
|
||||
{
|
||||
for (const auto conjunction : disjunction.conjunctions)
|
||||
{
|
||||
Block block(conjunction.columns);
|
||||
if (!block.has(sharding_key_column_name))
|
||||
throw Exception("sharding_key_expr should evaluate as a single row", ErrorCodes::TYPE_MISMATCH);
|
||||
|
||||
// check if sharding_key_expr requires column that we don't know anything about
|
||||
// if so, we don't have enough information to optimize
|
||||
if (!hasRequiredColumns(block, sharding_key_expr))
|
||||
return nullptr;
|
||||
const auto result = block.getByName(sharding_key_column_name);
|
||||
const auto selector = createSelector(cluster, result);
|
||||
|
||||
sharding_key_expr->execute(block);
|
||||
|
||||
if (!block || block.rows() != 1 || !block.has(sharding_key_column_name))
|
||||
throw Exception("Logical error: sharding_key_expr should evaluate as 1 row", ErrorCodes::TYPE_MISMATCH);
|
||||
|
||||
const auto result = block.getByName(sharding_key_column_name);
|
||||
const auto selector = createSelector(cluster, result);
|
||||
|
||||
shards.insert(selector.begin(), selector.end());
|
||||
}
|
||||
shards.insert(selector.begin(), selector.end());
|
||||
}
|
||||
|
||||
return cluster->getClusterWithMultipleShards({shards.begin(), shards.end()});
|
||||
|
@ -1,4 +1,15 @@
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
OK
|
||||
OK
|
||||
1
|
||||
OK
|
||||
4
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
4
|
||||
OK
|
||||
OK
|
||||
OK
|
||||
OK
|
||||
OK
|
||||
|
@ -0,0 +1,106 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
. $CURDIR/../shell_config.sh
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test.mergetree;"
|
||||
${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test.distributed;"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query "CREATE TABLE test.mergetree (a Int64, b Int64, c Int64) ENGINE = MergeTree ORDER BY (a, b);"
|
||||
${CLICKHOUSE_CLIENT} --query "CREATE TABLE test.distributed AS test.mergetree ENGINE = Distributed(test_unavailable_shard, test, mergetree, jumpConsistentHash(a+b, 2));"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query "INSERT INTO test.mergetree VALUES (0, 0, 0);"
|
||||
${CLICKHOUSE_CLIENT} --query "INSERT INTO test.mergetree VALUES (1, 0, 0);"
|
||||
${CLICKHOUSE_CLIENT} --query "INSERT INTO test.mergetree VALUES (0, 1, 1);"
|
||||
${CLICKHOUSE_CLIENT} --query "INSERT INTO test.mergetree VALUES (1, 1, 1);"
|
||||
|
||||
# Should fail because second shard is unavailable
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT count(*) FROM test.distributed;" 2>&1 \
|
||||
| fgrep -q "All connection tries failed" && echo 'OK' || echo 'FAIL'
|
||||
|
||||
# Should fail without setting `distributed_optimize_skip_select_on_unused_shards`
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT count(*) FROM test.distributed WHERE a = 0 AND b = 0;" 2>&1 \
|
||||
| fgrep -q "All connection tries failed" && echo 'OK' || echo 'FAIL'
|
||||
|
||||
# Should pass now
|
||||
${CLICKHOUSE_CLIENT} -n --query="
|
||||
SET distributed_optimize_skip_select_on_unused_shards = 1;
|
||||
SELECT count(*) FROM test.distributed WHERE a = 0 AND b = 0;
|
||||
"
|
||||
|
||||
# Should still fail because of matching unavailable shard
|
||||
${CLICKHOUSE_CLIENT} -n --query="
|
||||
SET distributed_optimize_skip_select_on_unused_shards = 1;
|
||||
SELECT count(*) FROM test.distributed WHERE a = 2 AND b = 2;
|
||||
" 2>&1 \ | fgrep -q "All connection tries failed" && echo 'OK' || echo 'FAIL'
|
||||
|
||||
# Try more complext expressions for constant folding - all should pass.
|
||||
|
||||
# TODO: should pass one day.
|
||||
#${CLICKHOUSE_CLIENT} -n --query="
|
||||
# SET distributed_optimize_skip_select_on_unused_shards = 1;
|
||||
# SELECT count(*) FROM test.distributed WHERE a = 1 AND a = 0;
|
||||
#"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -n --query="
|
||||
SET distributed_optimize_skip_select_on_unused_shards = 1;
|
||||
SELECT count(*) FROM test.distributed WHERE a IN (0, 1) AND b IN (0, 1);
|
||||
"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -n --query="
|
||||
SET distributed_optimize_skip_select_on_unused_shards = 1;
|
||||
SELECT count(*) FROM test.distributed WHERE a = 0 AND b = 0 OR a = 1 AND b = 1;
|
||||
"
|
||||
|
||||
# TODO: should pass one day.
|
||||
#${CLICKHOUSE_CLIENT} -n --query="
|
||||
# SET distributed_optimize_skip_select_on_unused_shards = 1;
|
||||
# SELECT count(*) FROM test.distributed WHERE a = 0 AND b >= 0 AND b <= 1;
|
||||
#"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -n --query="
|
||||
SET distributed_optimize_skip_select_on_unused_shards = 1;
|
||||
SELECT count(*) FROM test.distributed WHERE a = 0 AND b = 0 AND c = 0;
|
||||
"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -n --query="
|
||||
SET distributed_optimize_skip_select_on_unused_shards = 1;
|
||||
SELECT count(*) FROM test.distributed WHERE a = 0 AND b = 0 AND c != 10;
|
||||
"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -n --query="
|
||||
SET distributed_optimize_skip_select_on_unused_shards = 1;
|
||||
SELECT count(*) FROM test.distributed WHERE a = 0 AND b = 0 AND (a+b)*b != 12;
|
||||
"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -n --query="
|
||||
SET distributed_optimize_skip_select_on_unused_shards = 1;
|
||||
SELECT count(*) FROM test.distributed WHERE (a = 0 OR a = 1) AND (b = 0 OR b = 1);
|
||||
"
|
||||
|
||||
# These ones should fail.
|
||||
|
||||
${CLICKHOUSE_CLIENT} -n --query="
|
||||
SET distributed_optimize_skip_select_on_unused_shards = 1;
|
||||
SELECT count(*) FROM test.distributed WHERE a = 0 AND b <= 1;
|
||||
" 2>&1 \ | fgrep -q "All connection tries failed" && echo 'OK' || echo 'FAIL'
|
||||
|
||||
${CLICKHOUSE_CLIENT} -n --query="
|
||||
SET distributed_optimize_skip_select_on_unused_shards = 1;
|
||||
SELECT count(*) FROM test.distributed WHERE a = 0 AND c = 0;
|
||||
" 2>&1 \ | fgrep -q "All connection tries failed" && echo 'OK' || echo 'FAIL'
|
||||
|
||||
${CLICKHOUSE_CLIENT} -n --query="
|
||||
SET distributed_optimize_skip_select_on_unused_shards = 1;
|
||||
SELECT count(*) FROM test.distributed WHERE a = 0 OR a = 1 AND b = 0;
|
||||
" 2>&1 \ | fgrep -q "All connection tries failed" && echo 'OK' || echo 'FAIL'
|
||||
|
||||
${CLICKHOUSE_CLIENT} -n --query="
|
||||
SET distributed_optimize_skip_select_on_unused_shards = 1;
|
||||
SELECT count(*) FROM test.distributed WHERE a = 0 AND b = 0 OR a = 2 AND b = 2;
|
||||
" 2>&1 \ | fgrep -q "All connection tries failed" && echo 'OK' || echo 'FAIL'
|
||||
|
||||
${CLICKHOUSE_CLIENT} -n --query="
|
||||
SET distributed_optimize_skip_select_on_unused_shards = 1;
|
||||
SELECT count(*) FROM test.distributed WHERE a = 0 AND b = 0 OR c = 0;
|
||||
" 2>&1 \ | fgrep -q "All connection tries failed" && echo 'OK' || echo 'FAIL'
|
@ -1,17 +0,0 @@
|
||||
SET distributed_optimize_skip_select_on_unused_shards = 1;
|
||||
|
||||
DROP TABLE IF EXISTS test.mergetree;
|
||||
DROP TABLE IF EXISTS test.distributed;
|
||||
|
||||
CREATE TABLE test.mergetree (a Int64, b Int64) ENGINE = MergeTree ORDER BY (a, b);
|
||||
CREATE TABLE test.distributed AS test.mergetree ENGINE = Distributed(test_unavailable_shard, test, mergetree, jumpConsistentHash(a+b, 2));
|
||||
|
||||
INSERT INTO test.mergetree VALUES (0, 0);
|
||||
INSERT INTO test.mergetree VALUES (1, 0);
|
||||
INSERT INTO test.mergetree VALUES (0, 1);
|
||||
INSERT INTO test.mergetree VALUES (1, 1);
|
||||
|
||||
/* without setting, quering of the second shard will fail because it isn't available */
|
||||
|
||||
SELECT jumpConsistentHash(a+b, 2) FROM test.distributed
|
||||
WHERE (a+b > 0 AND a = 0 AND b = 0) OR (a IN (0, 1) AND b IN (0, 1)) OR ((a = 1 OR a = 1) AND b = 2);
|
Loading…
Reference in New Issue
Block a user