Merge pull request #32756 from CurtizJ/fix-index-hypothesis

Fix race in skipping index of type `hypothesis`
This commit is contained in:
Anton Popov 2021-12-15 15:26:35 +03:00 committed by GitHub
commit ec46cbef20
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 45 additions and 9 deletions

View File

@ -146,10 +146,15 @@ bool MergeTreeIndexhypothesisMergedCondition::mayBeTrueOnGranule(const MergeTree
values.push_back(granule->met);
}
if (const auto it = answer_cache.find(values); it != std::end(answer_cache))
return it->second;
const ComparisonGraph * graph = nullptr;
const auto & graph = getGraph(values);
{
std::lock_guard lock(cache_mutex);
if (const auto it = answer_cache.find(values); it != std::end(answer_cache))
return it->second;
graph = getGraph(values);
}
bool always_false = false;
expression_cnf->iterateGroups(
@ -166,7 +171,7 @@ bool MergeTreeIndexhypothesisMergedCondition::mayBeTrueOnGranule(const MergeTree
if (func && func->arguments->children.size() == 2)
{
const auto expected = ComparisonGraph::atomToCompareResult(atom);
if (graph.isPossibleCompare(expected, func->arguments->children[0], func->arguments->children[1]))
if (graph->isPossibleCompare(expected, func->arguments->children[0], func->arguments->children[1]))
{
/// If graph failed use matching.
/// We don't need to check constraints.
@ -177,6 +182,8 @@ bool MergeTreeIndexhypothesisMergedCondition::mayBeTrueOnGranule(const MergeTree
always_false = true;
});
std::lock_guard lock(cache_mutex);
answer_cache[values] = !always_false;
return !always_false;
}
@ -195,11 +202,13 @@ std::unique_ptr<ComparisonGraph> MergeTreeIndexhypothesisMergedCondition::buildG
return std::make_unique<ComparisonGraph>(active_atomic_formulas);
}
const ComparisonGraph & MergeTreeIndexhypothesisMergedCondition::getGraph(const std::vector<bool> & values) const
const ComparisonGraph * MergeTreeIndexhypothesisMergedCondition::getGraph(const std::vector<bool> & values) const
{
if (!graph_cache.contains(values))
graph_cache[values] = buildGraph(values);
return *graph_cache.at(values);
auto [it, inserted] = graph_cache.try_emplace(values);
if (inserted)
it->second = buildGraph(values);
return it->second.get();
}
}

View File

@ -21,11 +21,14 @@ public:
private:
void addConstraints(const ConstraintsDescription & constraints_description);
std::unique_ptr<ComparisonGraph> buildGraph(const std::vector<bool> & values) const;
const ComparisonGraph & getGraph(const std::vector<bool> & values) const;
const ComparisonGraph * getGraph(const std::vector<bool> & values) const;
ASTPtr expression_ast;
std::unique_ptr<CNFQuery> expression_cnf;
/// Part analysis can be done in parallel.
/// So, we have shared answer and graph cache.
mutable std::mutex cache_mutex;
mutable std::unordered_map<std::vector<bool>, std::unique_ptr<ComparisonGraph>> graph_cache;
mutable std::unordered_map<std::vector<bool>, bool> answer_cache;

View File

@ -0,0 +1,23 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_index_hypothesis"
$CLICKHOUSE_CLIENT -q "CREATE TABLE t_index_hypothesis (a UInt32, b UInt32, INDEX t a != b TYPE hypothesis GRANULARITY 1) ENGINE = MergeTree ORDER BY a"
$CLICKHOUSE_CLIENT -q "INSERT INTO t_index_hypothesis SELECT number, number + 1 FROM numbers(10000000)"
for _ in {0..30}; do
output=`$CLICKHOUSE_CLIENT -q "SELECT count() FROM t_index_hypothesis WHERE a = b"`
if [[ $output != "0" ]]; then
echo "output: $output, expected: 0"
exit 1
fi
done
echo OK
$CLICKHOUSE_CLIENT -q "DROP TABLE t_index_hypothesis"