From 9a1c286ade68895bb095eb28ed471dfbcd4a2156 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Wed, 16 May 2018 13:25:10 +0300
Subject: [PATCH 1/3] Changed prepared set indexation from AST* to StringRange.
 [#CLICKHOUSE-3734]

---
 dbms/src/Interpreters/ExpressionAnalyzer.cpp  | 18 ++++++------
 dbms/src/Interpreters/ExpressionAnalyzer.h    |  8 ++---
 dbms/src/Parsers/IAST.h                       |  2 +-
 dbms/src/Parsers/StringRange.h                | 20 +++++++++++--
 dbms/src/Storages/MergeTree/KeyCondition.cpp  |  6 ++--
 .../MergeTree/MergeTreeWhereOptimizer.cpp     |  2 +-
 dbms/src/Storages/SelectQueryInfo.h           |  4 +--
 dbms/src/Storages/StorageMerge.cpp            | 29 -------------------
 8 files changed, 38 insertions(+), 51 deletions(-)
diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp
index c7810666da8..ba9dca82822 100644
--- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp
@@ -1486,7 +1486,7 @@ void ExpressionAnalyzer::tryMakeSetFromSubquery(const ASTPtr & subquery_or_table
             return;
     }
 
-    prepared_sets[subquery_or_table_name.get()] = std::move(set);
+    prepared_sets[subquery_or_table_name->range] = std::move(set);
 }
 
 
@@ -1515,7 +1515,7 @@ void ExpressionAnalyzer::makeSetsForIndexImpl(const ASTPtr & node, const Block &
         {
             const ASTPtr & arg = args.children.at(1);
 
-            if (!prepared_sets.count(arg.get())) /// Not already prepared.
+            if (!prepared_sets.count(arg->range)) /// Not already prepared.
             {
                 if (typeid_cast<ASTSubquery *>(arg.get()) || typeid_cast<ASTIdentifier *>(arg.get()))
                 {
@@ -1550,7 +1550,7 @@ void ExpressionAnalyzer::makeSet(const ASTFunction * node, const Block & sample_
     const ASTPtr & arg = args.children.at(1);
 
     /// Already converted.
-    if (prepared_sets.count(arg.get()))
+    if (prepared_sets.count(arg->range))
         return;
 
     /// If the subquery or table name for SELECT.
@@ -1573,7 +1573,7 @@ void ExpressionAnalyzer::makeSet(const ASTFunction * node, const Block & sample_
 
                 if (storage_set)
                 {
-                    prepared_sets[arg.get()] = storage_set->getSet();
+                    prepared_sets[arg->range] = storage_set->getSet();
                     return;
                 }
             }
@@ -1584,7 +1584,7 @@ void ExpressionAnalyzer::makeSet(const ASTFunction * node, const Block & sample_
         /// If you already created a Set with the same subquery / table.
         if (subquery_for_set.set)
         {
-            prepared_sets[arg.get()] = subquery_for_set.set;
+            prepared_sets[arg->range] = subquery_for_set.set;
             return;
         }
 
@@ -1630,7 +1630,7 @@ void ExpressionAnalyzer::makeSet(const ASTFunction * node, const Block & sample_
         }
 
         subquery_for_set.set = set;
-        prepared_sets[arg.get()] = set;
+        prepared_sets[arg->range] = set;
     }
     else
     {
@@ -1712,7 +1712,7 @@ void ExpressionAnalyzer::makeExplicitSet(const ASTFunction * node, const Block &
 
     SetPtr set = std::make_shared<Set>(SizeLimits(settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode));
     set->createFromAST(set_element_types, elements_ast, context, create_ordered_set);
-    prepared_sets[right_arg.get()] = std::move(set);
+    prepared_sets[right_arg->range] = std::move(set);
 }
 
 
@@ -2102,12 +2102,12 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries,
                 /// Select the name in the next cycle.
                 argument_names.emplace_back();
             }
-            else if (prepared_sets.count(child.get()) && functionIsInOrGlobalInOperator(node->name) && arg == 1)
+            else if (prepared_sets.count(child->range) && functionIsInOrGlobalInOperator(node->name) && arg == 1)
             {
                 ColumnWithTypeAndName column;
                 column.type = std::make_shared<DataTypeSet>();
 
-                const SetPtr & set = prepared_sets[child.get()];
+                const SetPtr & set = prepared_sets[child->range];
 
                 /// If the argument is a set given by an enumeration of values (so, the set was already built), give it a unique name,
                 ///  so that sets with the same literal representation do not fuse together (they can have different types).
diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h
index d118e06334d..b8a0bd5a17b 100644
--- a/dbms/src/Interpreters/ExpressionAnalyzer.h
+++ b/dbms/src/Interpreters/ExpressionAnalyzer.h
@@ -3,9 +3,9 @@
 #include <Interpreters/AggregateDescription.h>
 #include <Interpreters/Settings.h>
 #include <Core/Block.h>
-#include "ExpressionActions.h"
-#include "ProjectionManipulation.h"
-
+#include <Interpreters/ExpressionActions.h>
+#include <Interpreters/ProjectionManipulation.h>
+#include <Parsers/StringRange.h>
 
 namespace DB
 {
@@ -23,7 +23,7 @@ using ASTPtr = std::shared_ptr<IAST>;
 
 class Set;
 using SetPtr = std::shared_ptr<Set>;
-using PreparedSets = std::unordered_map<IAST*, SetPtr>;
+using PreparedSets = std::unordered_map<StringRange, SetPtr, StringRangeHash>;
 
 class IBlockInputStream;
 using BlockInputStreamPtr = std::shared_ptr<IBlockInputStream>;
diff --git a/dbms/src/Parsers/IAST.h b/dbms/src/Parsers/IAST.h
index 9c4f33ebf49..1558ebb6681 100644
--- a/dbms/src/Parsers/IAST.h
+++ b/dbms/src/Parsers/IAST.h
@@ -65,7 +65,7 @@ public:
 
     ASTPtr ptr() { return shared_from_this(); }
 
-    /** Get a deep copy of the tree. */
+    /** Get a deep copy of the tree. Cloned object must have the same range. */
     virtual ASTPtr clone() const = 0;
 
     /** Get hash code, identifying this element and its subtree.
diff --git a/dbms/src/Parsers/StringRange.h b/dbms/src/Parsers/StringRange.h
index b919a899293..3de66f2cd00 100644
--- a/dbms/src/Parsers/StringRange.h
+++ b/dbms/src/Parsers/StringRange.h
@@ -4,6 +4,7 @@
 #include <Parsers/TokenIterator.h>
 #include <map>
 #include <memory>
+#include <Common/SipHash.h>
 
 
 namespace DB
@@ -14,9 +15,10 @@ struct StringRange
     const char * first = nullptr;
     const char * second = nullptr;
 
-    StringRange() {}
+    StringRange() = default;
+    StringRange(const StringRange & other) = default;
     StringRange(const char * begin, const char * end) : first(begin), second(end) {}
-    StringRange(TokenIterator token) : first(token->begin), second(token->end) {}
+    explicit StringRange(TokenIterator token) : first(token->begin), second(token->end) {}
 
     StringRange(TokenIterator token_begin, TokenIterator token_end)
     {
@@ -34,6 +36,8 @@ struct StringRange
         first = token_begin->begin;
         second = token_last->end;
     }
+
+    bool operator==(const StringRange & rhs) const { return std::tie(first, second) == std::tie(rhs.first, rhs.second); }
 };
 
 using StringPtr = std::shared_ptr<String>;
@@ -44,4 +48,16 @@ inline String toString(const StringRange & range)
     return range.first ? String(range.first, range.second) : String();
 }
 
+struct StringRangeHash
+{
+    UInt64 operator()(const StringRange & range) const
+    {
+        SipHash hash;
+        hash.update(range.first);
+        hash.update(range.second);
+        return hash.get64();
+    }
+};
+
 }
+
diff --git a/dbms/src/Storages/MergeTree/KeyCondition.cpp b/dbms/src/Storages/MergeTree/KeyCondition.cpp
index ffed82ad773..bc618f8d655 100644
--- a/dbms/src/Storages/MergeTree/KeyCondition.cpp
+++ b/dbms/src/Storages/MergeTree/KeyCondition.cpp
@@ -641,8 +641,8 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo
         bool is_set_const = false;
         bool is_constant_transformed = false;
 
-        if (prepared_sets.count(args[1].get())
-            && isTupleIndexable(args[0], context, out, prepared_sets[args[1].get()], key_column_num))
+        if (prepared_sets.count(args[1]->range)
+            && isTupleIndexable(args[0], context, out, prepared_sets[args[1]->range], key_column_num))
         {
             key_arg_pos = 0;
             is_set_const = true;
@@ -1016,7 +1016,7 @@ bool KeyCondition::mayBeTrueInRangeImpl(const std::vector<Range> & key_ranges, c
         {
             auto in_func = typeid_cast<const ASTFunction *>(element.in_function.get());
             const ASTs & args = typeid_cast<const ASTExpressionList &>(*in_func->arguments).children;
-            PreparedSets::const_iterator it = prepared_sets.find(args[1].get());
+            PreparedSets::const_iterator it = prepared_sets.find(args[1]->range);
             if (in_func && it != prepared_sets.end())
             {
                 rpn_stack.emplace_back(element.set_index->mayBeTrueInRange(key_ranges, data_types));
diff --git a/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
index 3b35c127511..b26b4f9bdcd 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
@@ -334,7 +334,7 @@ bool MergeTreeWhereOptimizer::isPrimaryKeyAtom(const IAST * const ast) const
         if ((primary_key_columns.count(first_arg_name) && isConstant(args[1])) ||
             (primary_key_columns.count(second_arg_name) && isConstant(args[0])) ||
             (primary_key_columns.count(first_arg_name)
-                && (prepared_sets.count(args[1].get()) || typeid_cast<const ASTSubquery *>(args[1].get()))))
+                && (prepared_sets.count(args[1]->range) || typeid_cast<const ASTSubquery *>(args[1].get()))))
             return true;
     }
 
diff --git a/dbms/src/Storages/SelectQueryInfo.h b/dbms/src/Storages/SelectQueryInfo.h
index 5443434fd40..608ebde5301 100644
--- a/dbms/src/Storages/SelectQueryInfo.h
+++ b/dbms/src/Storages/SelectQueryInfo.h
@@ -2,7 +2,7 @@
 
 #include <memory>
 #include <unordered_map>
-
+#include <Parsers/StringRange.h>
 
 namespace DB
 {
@@ -14,7 +14,7 @@ class Set;
 using SetPtr = std::shared_ptr<Set>;
 
 /// Information about calculated sets in right hand side of IN.
-using PreparedSets = std::unordered_map<IAST*, SetPtr>;
+using PreparedSets = std::unordered_map<StringRange, SetPtr, StringRangeHash>;
 
 
 /** Query along with some additional data,
diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp
index c0f264e7f85..93254187ec9 100644
--- a/dbms/src/Storages/StorageMerge.cpp
+++ b/dbms/src/Storages/StorageMerge.cpp
@@ -84,33 +84,6 @@ bool StorageMerge::isRemote() const
 }
 
 
-namespace
-{
-    using NodeHashToSet = std::map<IAST::Hash, SetPtr>;
-
-    void relinkSetsImpl(const ASTPtr & query, const NodeHashToSet & node_hash_to_set, PreparedSets & new_sets)
-    {
-        auto hash = query->getTreeHash();
-        auto it = node_hash_to_set.find(hash);
-        if (node_hash_to_set.end() != it)
-            new_sets[query.get()] = it->second;
-
-        for (const auto & child : query->children)
-            relinkSetsImpl(child, node_hash_to_set, new_sets);
-    }
-
-    /// Re-link prepared sets onto cloned and modified AST.
-    void relinkSets(const ASTPtr & query, const PreparedSets & old_sets, PreparedSets & new_sets)
-    {
-        NodeHashToSet node_hash_to_set;
-        for (const auto & node_set : old_sets)
-            node_hash_to_set.emplace(node_set.first->getTreeHash(), node_set.second);
-
-        relinkSetsImpl(query, node_hash_to_set, new_sets);
-    }
-}
-
-
 bool StorageMerge::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand) const
 {
     /// It's beneficial if it is true for at least one table.
@@ -211,8 +184,6 @@ BlockInputStreams StorageMerge::read(
         SelectQueryInfo modified_query_info;
         modified_query_info.query = modified_query_ast;
 
-        relinkSets(modified_query_info.query, query_info.sets, modified_query_info.sets);
-
         BlockInputStreams source_streams;
 
         if (curr_table_number < num_streams)

From d0341077ee4fd04c501a175e9aa072a310d975c9 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 17 May 2018 17:54:22 +0300
Subject: [PATCH 2/3] Added test with bug caused set indexing by ptr.
 [#CLICKHOUSE-3734]

---
 .../00628_in_lambda_on_merge_table_bug.reference  | 10 ++++++++++
 .../00628_in_lambda_on_merge_table_bug.sql        | 15 +++++++++++++++
 2 files changed, 25 insertions(+)
 create mode 100644 dbms/tests/queries/0_stateless/00628_in_lambda_on_merge_table_bug.reference
 create mode 100644 dbms/tests/queries/0_stateless/00628_in_lambda_on_merge_table_bug.sql

diff --git a/dbms/tests/queries/0_stateless/00628_in_lambda_on_merge_table_bug.reference b/dbms/tests/queries/0_stateless/00628_in_lambda_on_merge_table_bug.reference
new file mode 100644
index 00000000000..7bb480beb03
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00628_in_lambda_on_merge_table_bug.reference
@@ -0,0 +1,10 @@
+1	1	1	test_in_tuple_1
+1	2	2	test_in_tuple_1
+2	1	1	test_in_tuple_2
+2	2	2	test_in_tuple_2
+-
+1	1	1	test_in_tuple_1
+2	1	1	test_in_tuple_2
+-
+1	1	1	test_in_tuple_1
+2	1	1	test_in_tuple_2
diff --git a/dbms/tests/queries/0_stateless/00628_in_lambda_on_merge_table_bug.sql b/dbms/tests/queries/0_stateless/00628_in_lambda_on_merge_table_bug.sql
new file mode 100644
index 00000000000..beedf511795
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00628_in_lambda_on_merge_table_bug.sql
@@ -0,0 +1,15 @@
+drop table if exists test_in_tuple_1;
+drop table if exists test_in_tuple_2;
+drop table if exists test_in_tuple;
+
+create table test_in_tuple_1 (key Int32, key_2 Int32, x Array(Int32), y Array(Int32)) engine = MergeTree order by (key, key_2);
+create table test_in_tuple_2 (key Int32, key_2 Int32, x Array(Int32), y Array(Int32)) engine = MergeTree order by (key, key_2);
+create table test_in_tuple as test_in_tuple_1 engine = Merge('default', '^test_in_tuple_[0-9]+$');
+
+insert into test_in_tuple_1 values (1, 1, [1, 2], [1, 2]);
+insert into test_in_tuple_2 values (2, 1, [1, 2], [1, 2]);
+select key, arr_x, arr_y, _table from test_in_tuple left array join x as arr_x, y as arr_y order by _table;
+select '-';
+select key, arr_x, arr_y, _table from test_in_tuple left array join x as arr_x, y as arr_y where (key_2, arr_x, arr_y) in (1, 1, 1) order by _table;
+select '-';
+select key, arr_x, arr_y, _table from test_in_tuple left array join arrayFilter((t, x_0, x_1) -> (key_2, x_0, x_1) in (1, 1, 1), x, x ,y) as arr_x, arrayFilter((t, x_0, x_1) -> (key_2, x_0, x_1) in (1, 1, 1), y, x ,y) as arr_y where (key_2, arr_x, arr_y) in (1, 1, 1) order by _table;

From 5e22f19258dd459cf79d3979da9bd3ebc88eccd9 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 18 May 2018 20:27:58 +0300
Subject: [PATCH 3/3] Added fake range for ASTIdentifier in order to fix
 multiple global in. [#CLICKHOUSE-3734]

---
 dbms/src/Parsers/ASTIdentifier.h   | 2 +-
 dbms/src/Storages/StorageMerge.cpp | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/dbms/src/Parsers/ASTIdentifier.h b/dbms/src/Parsers/ASTIdentifier.h
index 017e33af500..4374255b874 100644
--- a/dbms/src/Parsers/ASTIdentifier.h
+++ b/dbms/src/Parsers/ASTIdentifier.h
@@ -26,7 +26,7 @@ public:
     Kind kind;
 
     ASTIdentifier(const String & name_, const Kind kind_ = Column)
-        : name(name_), kind(kind_) {}
+        : name(name_), kind(kind_) { range = StringRange(name.data(), name.data() + name.size()); }
 
     /** Get the text that identifies this element. */
     String getID() const override { return "Identifier_" + name; }
diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp
index 93254187ec9..4fdd9a5d04d 100644
--- a/dbms/src/Storages/StorageMerge.cpp
+++ b/dbms/src/Storages/StorageMerge.cpp
@@ -183,6 +183,7 @@ BlockInputStreams StorageMerge::read(
 
         SelectQueryInfo modified_query_info;
         modified_query_info.query = modified_query_ast;
+        modified_query_info.sets = query_info.sets;
 
         BlockInputStreams source_streams;