Fixed :a elimate AMBIGUOUS_COLUMN_NAME exception

elimate AMBIGUOUS_COLUMN_NAME exception in hdfs/s3 cluster when run a query with join
2024-11-25 17:12:03 +00:00 · 2022-03-30 16:19:16 +08:00 · 2022-03-30 16:19:16 +08:00 · 08e1fdc6b8
commit 08e1fdc6b8
parent a842a81aba
4 changed files with 31 additions and 2 deletions
--- a/src/Storages/HDFS/StorageHDFSCluster.cpp
+++ b/src/Storages/HDFS/StorageHDFSCluster.cpp
@ -93,7 +93,7 @@ Pipe StorageHDFSCluster::read(
            /// So, task_identifier is passed as constructor argument. It is more obvious.
            auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
                connection,
-                queryToString(query_info.query),
+                queryToString(query_info.original_query),
                header,
                context,
                /*throttler=*/nullptr,
--- a/src/Storages/StorageS3Cluster.cpp
+++ b/src/Storages/StorageS3Cluster.cpp
@ -134,7 +134,7 @@ Pipe StorageS3Cluster::read(
            /// So, task_identifier is passed as constructor argument. It is more obvious.
            auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
                connection,
-                queryToString(query_info.query),
+                queryToString(query_info.original_query),
                header,
                context,
                /*throttler=*/nullptr,
--- a/tests/integration/test_s3_cluster/test.py
+++ b/tests/integration/test_s3_cluster/test.py
@ -153,3 +153,22 @@ def test_wrong_cluster(started_cluster):
    )

    assert "not found" in error
+
+
+def test_ambiguous_join(started_cluster):
+    node = started_cluster.instances["s0_0_0"]
+    result = node.query(
+    """
+    SELECT l.name, r.value from s3Cluster(
+        'cluster_simple', 
+        'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 
+        'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') as l
+    JOIN s3Cluster(
+        'cluster_simple', 
+        'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 
+        'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') as r
+    ON l.name = r.name
+    """
+    )
+    assert "AMBIGUOUS_COLUMN_NAME" not in result
+    
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@ -553,6 +553,16 @@ def test_insert_select_schema_inference(started_cluster):
    result = node1.query(f"select * from hdfs('hdfs://hdfs1:9000/test.native.zst')")
    assert int(result) == 1

+def test_cluster_join(started_cluster):
+    result = node1.query(
+        '''
+        SELECT l.id,r.id FROM hdfsCluster('test_cluster_two_shards', 'hdfs://hdfs1:9000/test_hdfsCluster/file*', 'TSV', 'id UInt32') as l
+        JOIN hdfsCluster('test_cluster_two_shards', 'hdfs://hdfs1:9000/test_hdfsCluster/file*', 'TSV', 'id UInt32') as r
+        ON l.id = r.id
+        '''
+    )
+    assert "AMBIGUOUS_COLUMN_NAME" not in result
+

 if __name__ == "__main__":
    cluster.start()