Fixed :a elimate AMBIGUOUS_COLUMN_NAME exception

elimate AMBIGUOUS_COLUMN_NAME exception in hdfs/s3 cluster when run a query with join
2024-11-29 11:02:08 +00:00 · 2022-03-30 16:19:16 +08:00 · 2022-03-30 16:19:16 +08:00 · 08e1fdc6b8
commit 08e1fdc6b8
parent a842a81aba
4 changed files with 31 additions and 2 deletions
--- a/src/Storages/HDFS/StorageHDFSCluster.cpp
+++ b/src/Storages/HDFS/StorageHDFSCluster.cpp
@ -93,7 +93,7 @@ Pipe StorageHDFSCluster::read(
            /// So, task_identifier is passed as constructor argument. It is more obvious.
            auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
                connection,
-                queryToString(query_info.query),
+                queryToString(query_info.original_query),
                header,
                context,
                /*throttler=*/nullptr,
--- a/src/Storages/StorageS3Cluster.cpp
+++ b/src/Storages/StorageS3Cluster.cpp
@ -134,7 +134,7 @@ Pipe StorageS3Cluster::read(
            /// So, task_identifier is passed as constructor argument. It is more obvious.
            auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
                connection,
-                queryToString(query_info.query),
+                queryToString(query_info.original_query),
                header,
                context,
                /*throttler=*/nullptr,
--- a/tests/integration/test_s3_cluster/test.py
+++ b/tests/integration/test_s3_cluster/test.py
@ -153,3 +153,22 @@ def test_wrong_cluster(started_cluster):
    )
    assert "not found" in error
 def test_ambiguous_join(started_cluster):
    node = started_cluster.instances["s0_0_0"]
    result = node.query(
    """
    SELECT l.name, r.value from s3Cluster(
        'cluster_simple', 
        'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 
        'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') as l
    JOIN s3Cluster(
        'cluster_simple', 
        'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 
        'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') as r
    ON l.name = r.name
    """
    )
    assert "AMBIGUOUS_COLUMN_NAME" not in result
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@ -553,6 +553,16 @@ def test_insert_select_schema_inference(started_cluster):
    result = node1.query(f"select * from hdfs('hdfs://hdfs1:9000/test.native.zst')")
    assert int(result) == 1
 def test_cluster_join(started_cluster):
    result = node1.query(
        '''
        SELECT l.id,r.id FROM hdfsCluster('test_cluster_two_shards', 'hdfs://hdfs1:9000/test_hdfsCluster/file*', 'TSV', 'id UInt32') as l
        JOIN hdfsCluster('test_cluster_two_shards', 'hdfs://hdfs1:9000/test_hdfsCluster/file*', 'TSV', 'id UInt32') as r
        ON l.id = r.id
        '''
    )
    assert "AMBIGUOUS_COLUMN_NAME" not in result
 if __name__ == "__main__":
    cluster.start()