Removed WithAlias option, added option to ignore_constant for QueryTreeNodeTable. Moved cache to QueryAnalysisPass, adjusted test to check with alias - 42648 Support scalar subqueries cache

This commit is contained in:
Smita Kulkarni 2022-12-13 16:55:00 +01:00
parent 60ec65eb21
commit 97024f8ee5
10 changed files with 87 additions and 73 deletions

View File

@ -48,6 +48,34 @@ using QueryTreeNodePtrWithHashMap = std::unordered_map<QueryTreeNodePtrWithHash,
template <typename Value>
using QueryTreeNodeConstRawPtrWithHashMap = std::unordered_map<QueryTreeNodeConstRawPtrWithHash, Value>;
template <typename QueryTreeNodePtrType>
struct QueryTreeNodeWithHashIgnoreConstant
{
QueryTreeNodeWithHashIgnoreConstant(QueryTreeNodePtrType node_) /// NOLINT
: node(std::move(node_))
, hash(node->getTreeHash().first)
{}
QueryTreeNodePtrType node = nullptr;
size_t hash = 0;
};
template <typename T>
inline bool operator==(const QueryTreeNodeWithHashIgnoreConstant<T> & lhs, const QueryTreeNodeWithHashIgnoreConstant<T> & rhs)
{
return lhs.hash == rhs.hash && lhs.node->isEqual(*rhs.node, /* ignore_constant */true);
}
template <typename T>
inline bool operator!=(const QueryTreeNodeWithHashIgnoreConstant<T> & lhs, const QueryTreeNodeWithHashIgnoreConstant<T> & rhs)
{
return !(lhs == rhs);
}
using QueryTreeNodeConstRawPtrWithHashIgnoreConstant = QueryTreeNodeWithHashIgnoreConstant<const IQueryTreeNode *>;
}
template <typename T>
@ -58,3 +86,12 @@ struct std::hash<DB::QueryTreeNodeWithHash<T>>
return node_with_hash.hash;
}
};
template <typename T>
struct std::hash<DB::QueryTreeNodeWithHashIgnoreConstant<T>>
{
size_t operator()(const DB::QueryTreeNodeWithHashIgnoreConstant<T> & node_with_hash) const
{
return node_with_hash.hash;
}
};

View File

@ -9,6 +9,7 @@
#include <IO/Operators.h>
#include <Parsers/ASTWithAlias.h>
#include <Analyzer/QueryNode.h>
namespace DB
{
@ -74,7 +75,7 @@ struct NodePairHash
}
bool IQueryTreeNode::isEqual(const IQueryTreeNode & rhs) const
bool IQueryTreeNode::isEqual(const IQueryTreeNode & rhs, bool ignore_constant) const
{
std::vector<NodePair> nodes_to_process;
std::unordered_set<NodePair, NodePairHash> equals_pairs;
@ -96,11 +97,17 @@ bool IQueryTreeNode::isEqual(const IQueryTreeNode & rhs) const
assert(rhs_node_to_compare);
if (lhs_node_to_compare->getNodeType() != rhs_node_to_compare->getNodeType() ||
lhs_node_to_compare->alias != rhs_node_to_compare->alias ||
!lhs_node_to_compare->isEqualImpl(*rhs_node_to_compare))
{
lhs_node_to_compare->alias != rhs_node_to_compare->alias)
return false;
/// ignore_constant is used for scalar subqueries cache
if (ignore_constant && lhs_node_to_compare->as<QueryNode>() && rhs_node_to_compare->as<QueryNode>())
{
if(!lhs_node_to_compare->as<QueryNode>()->isEqualImplIgnoreConstant(*rhs_node_to_compare))
return false;
}
else if (!lhs_node_to_compare->isEqualImpl(*rhs_node_to_compare))
return false;
const auto & lhs_children = lhs_node_to_compare->children;
const auto & rhs_children = rhs_node_to_compare->children;
@ -153,7 +160,7 @@ bool IQueryTreeNode::isEqual(const IQueryTreeNode & rhs) const
return true;
}
IQueryTreeNode::Hash IQueryTreeNode::getTreeHash(bool withAlias) const
IQueryTreeNode::Hash IQueryTreeNode::getTreeHash() const
{
HashState hash_state;
@ -177,13 +184,10 @@ IQueryTreeNode::Hash IQueryTreeNode::getTreeHash(bool withAlias) const
node_to_identifier.emplace(node_to_process, node_to_identifier.size());
hash_state.update(static_cast<size_t>(node_to_process->getNodeType()));
if (withAlias)
if (!node_to_process->alias.empty())
{
if (!node_to_process->alias.empty())
{
hash_state.update(node_to_process->alias.size());
hash_state.update(node_to_process->alias);
}
hash_state.update(node_to_process->alias.size());
hash_state.update(node_to_process->alias);
}
node_to_process->updateTreeHashImpl(hash_state);
@ -216,12 +220,6 @@ IQueryTreeNode::Hash IQueryTreeNode::getTreeHash(bool withAlias) const
return result;
}
String IQueryTreeNode::getTreeHashAsString(bool withAlias) const
{
Hash hash = getTreeHash(withAlias);
return toString(hash.first)+"_"+toString(hash.second);
}
QueryTreeNodePtr IQueryTreeNode::clone() const
{
/** Clone tree with this node as root.

View File

@ -120,7 +120,7 @@ public:
* Aliases of query tree nodes are compared during isEqual call.
* Original ASTs of query tree nodes are not compared during isEqual call.
*/
bool isEqual(const IQueryTreeNode & rhs) const;
bool isEqual(const IQueryTreeNode & rhs, bool ignore_constants = false) const;
using Hash = std::pair<UInt64, UInt64>;
using HashState = SipHash;
@ -130,9 +130,7 @@ public:
* Alias of query tree node is part of query tree hash.
* Original AST is not part of query tree hash.
*/
Hash getTreeHash(bool withAlias = true) const;
String getTreeHashAsString(bool withAlias = true) const;
Hash getTreeHash() const;
/// Get a deep copy of the query tree
QueryTreeNodePtr clone() const;

View File

@ -68,6 +68,7 @@
#include <Analyzer/QueryTreeBuilder.h>
#include <Analyzer/IQueryTreeNode.h>
#include <Common/ProfileEvents.h>
#include <Analyzer/HashUtils.h>
namespace ProfileEvents
{
@ -663,10 +664,6 @@ struct IdentifierResolveScope
ContextPtr context;
/// Results of scalar sub queries
std::unordered_map<size_t, std::shared_ptr<ConstantValue>> scalars;
std::unordered_map<size_t, std::shared_ptr<ConstantValue>> local_scalars;
/// Identifier lookup to result
std::unordered_map<IdentifierLookup, IdentifierResolveResult, IdentifierLookupHash> identifier_lookup_to_result;
@ -1102,7 +1099,7 @@ private:
static QueryTreeNodePtr tryGetLambdaFromSQLUserDefinedFunctions(const std::string & function_name, ContextPtr context);
static void evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & query_tree_node, size_t subquery_depth, ContextPtr context);
void evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & query_tree_node, size_t subquery_depth, ContextPtr context);
static void mergeWindowWithParentWindow(const QueryTreeNodePtr & window_node, const QueryTreeNodePtr & parent_window_node, IdentifierResolveScope & scope);
@ -1212,6 +1209,9 @@ private:
/// Global resolve expression node to projection names map
std::unordered_map<QueryTreeNodePtr, ProjectionNames> resolved_expressions;
/// Results of scalar sub queries
std::unordered_map<QueryTreeNodeConstRawPtrWithHashIgnoreConstant, std::shared_ptr<ConstantValue>> scalars;
};
/// Utility functions implementation
@ -1697,12 +1697,12 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, size
bool hit = false;
std::shared_ptr<ConstantValue> constant_value;
auto hash = node->getTreeHashAsString(false);
if (context->getQueryContext()->hasAnalyzerScalar(hash))
auto scalars_iterator = scalars.find(node.get());
if (scalars_iterator != scalars.end())
{
hit = true;
constant_value = context->getQueryContext()->getAnalyzerScalar(hash);
constant_value = scalars_iterator->second;
ProfileEvents::increment(ProfileEvents::ScalarSubqueriesGlobalCacheHit);
}
@ -1797,7 +1797,8 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, size
}
constant_value = std::make_shared<ConstantValue>(std::move(scalar_value), std::move(scalar_type));
context->getQueryContext()->addAnalyzerScalar(hash, constant_value);
scalars[node.get()] = constant_value;
}
if (query_node)

View File

@ -205,6 +205,23 @@ bool QueryNode::isEqualImpl(const IQueryTreeNode & rhs) const
is_group_by_all == rhs_typed.is_group_by_all;
}
bool QueryNode::isEqualImplIgnoreConstant(const IQueryTreeNode & rhs) const
{
const auto & rhs_typed = assert_cast<const QueryNode &>(rhs);
return is_subquery == rhs_typed.is_subquery &&
is_cte == rhs_typed.is_cte &&
cte_name == rhs_typed.cte_name &&
projection_columns == rhs_typed.projection_columns &&
is_distinct == rhs_typed.is_distinct &&
is_limit_with_ties == rhs_typed.is_limit_with_ties &&
is_group_by_with_totals == rhs_typed.is_group_by_with_totals &&
is_group_by_with_rollup == rhs_typed.is_group_by_with_rollup &&
is_group_by_with_cube == rhs_typed.is_group_by_with_cube &&
is_group_by_with_grouping_sets == rhs_typed.is_group_by_with_grouping_sets &&
is_group_by_all == rhs_typed.is_group_by_all;
}
void QueryNode::updateTreeHashImpl(HashState & state) const
{
state.update(is_subquery);

View File

@ -573,6 +573,7 @@ public:
}
void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override;
bool isEqualImplIgnoreConstant(const IQueryTreeNode & rhs) const;
protected:
bool isEqualImpl(const IQueryTreeNode & rhs) const override;

View File

@ -1083,43 +1083,6 @@ const Block & Context::getScalar(const String & name) const
return it->second;
}
const std::unordered_map<String, std::shared_ptr<ConstantValue>> & Context::getAnalyzerScalars() const
{
return analyzer_scalars;
}
const std::shared_ptr<ConstantValue> & Context::getAnalyzerScalar(const String & hash) const
{
auto it = analyzer_scalars.find(hash);
if (analyzer_scalars.end() == it)
{
/// This should be a logical error, but it fails the sql_fuzz test too
/// often, so 'bad arguments' for now.
throw Exception("Scalar doesn't exist (internal bug)", ErrorCodes::BAD_ARGUMENTS);
}
return it->second;
}
void Context::addAnalyzerScalar(const String & hash, const std::shared_ptr<ConstantValue> & constant_value)
{
if (isGlobalContext())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have scalars");
analyzer_scalars[hash] = constant_value;
}
bool Context::hasAnalyzerScalar(const String & hash) const
{
if (isGlobalContext())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have scalars");
return analyzer_scalars.contains(hash);
}
const Block * Context::tryGetSpecialScalar(const String & name) const
{
auto it = special_scalars.find(name);

View File

@ -247,7 +247,6 @@ private:
TemporaryTablesMapping external_tables_mapping;
Scalars scalars;
std::unordered_map<String, std::shared_ptr<ConstantValue>> analyzer_scalars;
/// Used to store constant values which are different on each instance during distributed plan, such as _shard_num.
Scalars special_scalars;

View File

@ -1,3 +1,3 @@
02177_CTE_GLOBAL_ON 5 500 11 0 5
02177_CTE_GLOBAL_OFF 1 100 5 0 1
02177_CTE_NEW_ANALYZER 1 100 4 0 1
02177_CTE_NEW_ANALYZER 2 200 3 0 2

View File

@ -19,10 +19,10 @@ SELECT '02177_CTE_GLOBAL_OFF', a5 FROM system.numbers LIMIT 100
SETTINGS enable_global_with_statement = 0;
WITH
( SELECT sleep(0.0001) FROM system.one ) as a1,
( SELECT sleep(0.0001) FROM system.one ) as a2,
( SELECT sleep(0.0001) FROM system.one ) as a3,
( SELECT sleep(0.0001) FROM system.one ) as a4,
( SELECT sleep(0.0001) FROM system.one ),
( SELECT sleep(0.0001) FROM system.one ),
( SELECT sleep(0.0001) FROM system.one ),
( SELECT sleep(0.0001) FROM system.one ),
( SELECT sleep(0.0001) FROM system.one ) as a5
SELECT '02177_CTE_NEW_ANALYZER', a5 FROM system.numbers LIMIT 100
FORMAT Null