diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.cpp b/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.cpp index 6900ae1e69b..84195eb71b2 100644 --- a/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.cpp @@ -146,10 +146,15 @@ bool MergeTreeIndexhypothesisMergedCondition::mayBeTrueOnGranule(const MergeTree values.push_back(granule->met); } - if (const auto it = answer_cache.find(values); it != std::end(answer_cache)) - return it->second; + const ComparisonGraph * graph = nullptr; - const auto & graph = getGraph(values); + { + std::lock_guard lock(cache_mutex); + if (const auto it = answer_cache.find(values); it != std::end(answer_cache)) + return it->second; + + graph = getGraph(values); + } bool always_false = false; expression_cnf->iterateGroups( @@ -166,7 +171,7 @@ bool MergeTreeIndexhypothesisMergedCondition::mayBeTrueOnGranule(const MergeTree if (func && func->arguments->children.size() == 2) { const auto expected = ComparisonGraph::atomToCompareResult(atom); - if (graph.isPossibleCompare(expected, func->arguments->children[0], func->arguments->children[1])) + if (graph->isPossibleCompare(expected, func->arguments->children[0], func->arguments->children[1])) { /// If graph failed use matching. /// We don't need to check constraints. @@ -177,6 +182,8 @@ bool MergeTreeIndexhypothesisMergedCondition::mayBeTrueOnGranule(const MergeTree always_false = true; }); + std::lock_guard lock(cache_mutex); + answer_cache[values] = !always_false; return !always_false; } @@ -195,11 +202,13 @@ std::unique_ptr MergeTreeIndexhypothesisMergedCondition::buildG return std::make_unique(active_atomic_formulas); } -const ComparisonGraph & MergeTreeIndexhypothesisMergedCondition::getGraph(const std::vector & values) const +const ComparisonGraph * MergeTreeIndexhypothesisMergedCondition::getGraph(const std::vector & values) const { - if (!graph_cache.contains(values)) - graph_cache[values] = buildGraph(values); - return *graph_cache.at(values); + auto [it, inserted] = graph_cache.try_emplace(values); + if (inserted) + it->second = buildGraph(values); + + return it->second.get(); } } diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.h b/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.h index 530e14e15cc..9ebcbe9d7dc 100644 --- a/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.h +++ b/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.h @@ -21,11 +21,14 @@ public: private: void addConstraints(const ConstraintsDescription & constraints_description); std::unique_ptr buildGraph(const std::vector & values) const; - const ComparisonGraph & getGraph(const std::vector & values) const; + const ComparisonGraph * getGraph(const std::vector & values) const; ASTPtr expression_ast; std::unique_ptr expression_cnf; + /// Part analysis can be done in parallel. + /// So, we have shared answer and graph cache. + mutable std::mutex cache_mutex; mutable std::unordered_map, std::unique_ptr> graph_cache; mutable std::unordered_map, bool> answer_cache; diff --git a/tests/queries/0_stateless/02150_index_hypothesis_race_long.reference b/tests/queries/0_stateless/02150_index_hypothesis_race_long.reference new file mode 100644 index 00000000000..d86bac9de59 --- /dev/null +++ b/tests/queries/0_stateless/02150_index_hypothesis_race_long.reference @@ -0,0 +1 @@ +OK diff --git a/tests/queries/0_stateless/02150_index_hypothesis_race_long.sh b/tests/queries/0_stateless/02150_index_hypothesis_race_long.sh new file mode 100755 index 00000000000..da2dcd055ea --- /dev/null +++ b/tests/queries/0_stateless/02150_index_hypothesis_race_long.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_index_hypothesis" + +$CLICKHOUSE_CLIENT -q "CREATE TABLE t_index_hypothesis (a UInt32, b UInt32, INDEX t a != b TYPE hypothesis GRANULARITY 1) ENGINE = MergeTree ORDER BY a" + +$CLICKHOUSE_CLIENT -q "INSERT INTO t_index_hypothesis SELECT number, number + 1 FROM numbers(10000000)" + +for _ in {0..30}; do + output=`$CLICKHOUSE_CLIENT -q "SELECT count() FROM t_index_hypothesis WHERE a = b"` + if [[ $output != "0" ]]; then + echo "output: $output, expected: 0" + exit 1 + fi +done + +echo OK + +$CLICKHOUSE_CLIENT -q "DROP TABLE t_index_hypothesis"