fix tests

2024-11-21 15:12:02 +00:00 · 2024-08-21 19:43:45 +02:00 · 2024-08-21 19:43:45 +02:00 · a52eff299e
commit a52eff299e
parent 2e58ac5611
5 changed files with 100 additions and 116 deletions
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@ -162,7 +162,7 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & sto
                return;

            if (storage_columns.size() == 1)
-                throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot use a file with one column {}, that is ised during hive partitioning", name);
+                throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot implement partition by all columns in a file");
            auto local_type = storage_columns.get(name).type;
            storage_columns.remove(name);
            desc.addEphemeral(name, local_type, "");
--- a/tests/integration/test_storage_azure_blob_storage/test.py
+++ b/tests/integration/test_storage_azure_blob_storage/test.py
@ -1518,14 +1518,14 @@ def test_hive_partitioning_with_one_parameter(cluster):
    )

    query = (
-        f"SELECT column1, column2, _file, _path, _column1 FROM azureBlobStorage(azure_conf2, "
+        f"SELECT column1, column2, _file, _path FROM azureBlobStorage(azure_conf2, "
        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
        f"blob_path='{path}', format='CSV', structure='{table_format}')"
    )
    assert azure_query(
        node, query, settings={"use_hive_partitioning": 1}
    ).splitlines() == [
-        "Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}\tElizabeth".format(
+        "Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}".format(
            bucket="cont", max_path=path
        )
    ]
@ -1533,14 +1533,14 @@ def test_hive_partitioning_with_one_parameter(cluster):
    query = (
        f"SELECT column2 FROM azureBlobStorage(azure_conf2, "
        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
+        f"blob_path='{path}', format='CSV', structure='{table_format}');"
    )
    assert azure_query(
        node, query, settings={"use_hive_partitioning": 1}
    ).splitlines() == ["Gordon"]


-def test_hive_partitioning_with_two_parameters(cluster):
+def test_hive_partitioning_with_all_parameters(cluster):
    # type: (ClickHouseCluster) -> None
    node = cluster.instances["node"]  # type: ClickHouseInstance
    table_format = "column1 String, column2 String"
@ -1556,35 +1556,14 @@ def test_hive_partitioning_with_two_parameters(cluster):
    )

    query = (
-        f"SELECT column1, column2, _file, _path, _column1, _column2 FROM azureBlobStorage(azure_conf2, "
+        f"SELECT column1, column2, _file, _path FROM azureBlobStorage(azure_conf2, "
        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
+        f"blob_path='{path}', format='CSV', structure='{table_format}');"
    )
-    assert azure_query(
-        node, query, settings={"use_hive_partitioning": 1}
-    ).splitlines() == [
-        "Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}\tElizabeth\tGordon".format(
-            bucket="cont", max_path=path
-        )
-    ]
+    pattern = r"DB::Exception: Cannot implement partition by all columns in a file"

-    query = (
-        f"SELECT column1 FROM azureBlobStorage(azure_conf2, "
-        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column2=_column2;"
-    )
-    assert azure_query(
-        node, query, settings={"use_hive_partitioning": 1}
-    ).splitlines() == ["Elizabeth"]
-
-    query = (
-        f"SELECT column1 FROM azureBlobStorage(azure_conf2, "
-        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column2=_column2 AND column1=_column1;"
-    )
-    assert azure_query(
-        node, query, settings={"use_hive_partitioning": 1}
-    ).splitlines() == ["Elizabeth"]
+    with pytest.raises(Exception, match=pattern):
+        azure_query(node, query, settings={"use_hive_partitioning": 1})


 def test_hive_partitioning_without_setting(cluster):
@ -1603,9 +1582,9 @@ def test_hive_partitioning_without_setting(cluster):
    )

    query = (
-        f"SELECT column1, column2, _file, _path, _column1, _column2 FROM azureBlobStorage(azure_conf2, "
+        f"SELECT column1, column2, _file, _path FROM azureBlobStorage(azure_conf2, "
        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
+        f"blob_path='{path}', format='CSV', structure='{table_format}');"
    )
    pattern = re.compile(
        r"DB::Exception: Unknown expression identifier '.*' in scope.*", re.DOTALL
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@ -1263,13 +1263,19 @@ def test_hive_partitioning_with_one_parameter(started_cluster):
    assert hdfs_api.read_data(f"/column0=Elizabeth/parquet_1") == f"Elizabeth\tGordon\n"

    r = node1.query(
-        "SELECT _column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')",
+        "SELECT column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')",
        settings={"use_hive_partitioning": 1},
    )
    assert r == f"Elizabeth\n"

+    r = node1.query(
+        "SELECT column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')",
+        settings={"use_hive_partitioning": 1},
+    )
+    assert r == f"Gordon\n"

-def test_hive_partitioning_with_two_parameters(started_cluster):
+
+def test_hive_partitioning_with_all_parameters(started_cluster):
    hdfs_api = started_cluster.hdfs_api
    hdfs_api.write_data(
        f"/column0=Elizabeth/column1=Gordon/parquet_2", f"Elizabeth\tGordon\n"
@ -1279,11 +1285,13 @@ def test_hive_partitioning_with_two_parameters(started_cluster):
        == f"Elizabeth\tGordon\n"
    )

-    r = node1.query(
-        "SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
-        settings={"use_hive_partitioning": 1},
-    )
-    assert r == f"Gordon\n"
+    pattern = r"DB::Exception: Cannot implement partition by all columns in a file"
+
+    with pytest.raises(QueryRuntimeException, match=pattern):
+        node1.query(
+            f"SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
+            settings={"use_hive_partitioning": 1},
+        )


 def test_hive_partitioning_without_setting(started_cluster):
@ -1301,7 +1309,7 @@ def test_hive_partitioning_without_setting(started_cluster):

    with pytest.raises(QueryRuntimeException, match=pattern):
        node1.query(
-            f"SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
+            f"SELECT column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
            settings={"use_hive_partitioning": 0},
        )

--- a/tests/queries/0_stateless/03203_hive_style_partitioning.reference
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
@ -1,4 +1,14 @@
 TESTING THE FILE HIVE PARTITIONING
+last	Elizabeth
+Frank	Elizabeth
+Moreno	Elizabeth
+Guzman	Elizabeth
+Stephens	Elizabeth
+Franklin	Elizabeth
+Gibson	Elizabeth
+Greer	Elizabeth
+Delgado	Elizabeth
+Cross	Elizabeth
 first	 last	Elizabeth
 Jorge	Frank	Elizabeth
 Hunter	Moreno	Elizabeth
@ -10,25 +20,16 @@ Eugenia	Greer	Elizabeth
 Jeffery	Delgado	Elizabeth
 Clara	Cross	Elizabeth
 Elizabeth	Gordon	Elizabeth
-Eva	Schmidt	Elizabeth	Schmidt
-Samuel	Schmidt	Elizabeth	Schmidt
-Eva	Schmidt	Elizabeth
-Samuel	Schmidt	Elizabeth
-Elizabeth	Gordon	Elizabeth	Gordon
-Elizabeth	Gordon	Elizabeth
-Elizabeth	Gordon	Elizabeth	Gordon
-Elizabeth	Gordon	Elizabeth
-first	 last	Elizabeth
-Jorge	Frank	Elizabeth
-Hunter	Moreno	Elizabeth
-Esther	Guzman	Elizabeth
-Dennis	Stephens	Elizabeth
-Nettie	Franklin	Elizabeth
-Stanley	Gibson	Elizabeth
-Eugenia	Greer	Elizabeth
-Jeffery	Delgado	Elizabeth
-Clara	Cross	Elizabeth
-Elizabeth	Gordon	Elizabeth
+last	Elizabeth
+Frank	Elizabeth
+Moreno	Elizabeth
+Guzman	Elizabeth
+Stephens	Elizabeth
+Franklin	Elizabeth
+Gibson	Elizabeth
+Greer	Elizabeth
+Delgado	Elizabeth
+Cross	Elizabeth
 42	2020-01-01
 [1,2,3]	42.42
 Array(Int64)	LowCardinality(Float64)
@ -37,10 +38,20 @@ Array(Int64)	LowCardinality(Float64)
 4081
 2070
 2070
-1
-1
 b
+1
+1
 TESTING THE URL PARTITIONING
+last	Elizabeth
+Frank	Elizabeth
+Moreno	Elizabeth
+Guzman	Elizabeth
+Stephens	Elizabeth
+Franklin	Elizabeth
+Gibson	Elizabeth
+Greer	Elizabeth
+Delgado	Elizabeth
+Cross	Elizabeth
 first	 last	Elizabeth
 Jorge	Frank	Elizabeth
 Hunter	Moreno	Elizabeth
@ -52,26 +63,18 @@ Eugenia	Greer	Elizabeth
 Jeffery	Delgado	Elizabeth
 Clara	Cross	Elizabeth
 Elizabeth	Gordon	Elizabeth
-Eva	Schmidt	Elizabeth	Schmidt
-Samuel	Schmidt	Elizabeth	Schmidt
-Eva	Schmidt	Elizabeth
-Samuel	Schmidt	Elizabeth
-Elizabeth	Gordon	Elizabeth	Gordon
-Elizabeth	Gordon	Elizabeth
-Elizabeth	Gordon	Elizabeth	Gordon
-Elizabeth	Gordon	Elizabeth
-first	 last	Elizabeth
-Jorge	Frank	Elizabeth
-Hunter	Moreno	Elizabeth
-Esther	Guzman	Elizabeth
-Dennis	Stephens	Elizabeth
-Nettie	Franklin	Elizabeth
-Stanley	Gibson	Elizabeth
-Eugenia	Greer	Elizabeth
-Jeffery	Delgado	Elizabeth
-Clara	Cross	Elizabeth
 1
 TESTING THE S3 PARTITIONING
+last	Elizabeth
+Frank	Elizabeth
+Moreno	Elizabeth
+Guzman	Elizabeth
+Stephens	Elizabeth
+Franklin	Elizabeth
+Gibson	Elizabeth
+Greer	Elizabeth
+Delgado	Elizabeth
+Cross	Elizabeth
 first	 last	Elizabeth
 Jorge	Frank	Elizabeth
 Hunter	Moreno	Elizabeth
@ -83,39 +86,35 @@ Eugenia	Greer	Elizabeth
 Jeffery	Delgado	Elizabeth
 Clara	Cross	Elizabeth
 Elizabeth	Gordon	Elizabeth
-Eva	Schmidt	Elizabeth	Schmidt
-Samuel	Schmidt	Elizabeth	Schmidt
-Eva	Schmidt	Elizabeth
-Samuel	Schmidt	Elizabeth
-Elizabeth	Gordon	Elizabeth	Gordon
-Elizabeth	Gordon	Elizabeth
-Elizabeth	Gordon	Elizabeth	Gordon
-Elizabeth	Gordon	Elizabeth
-first	 last	Elizabeth
-Jorge	Frank	Elizabeth
-Hunter	Moreno	Elizabeth
-Esther	Guzman	Elizabeth
-Dennis	Stephens	Elizabeth
-Nettie	Franklin	Elizabeth
-Stanley	Gibson	Elizabeth
-Eugenia	Greer	Elizabeth
-Jeffery	Delgado	Elizabeth
-Clara	Cross	Elizabeth
-Elizabeth	Gordon	Elizabeth
+last	Elizabeth
+Frank	Elizabeth
+Moreno	Elizabeth
+Guzman	Elizabeth
+Stephens	Elizabeth
+Franklin	Elizabeth
+Gibson	Elizabeth
+Greer	Elizabeth
+Delgado	Elizabeth
+Cross	Elizabeth
 OK
 TESTING THE S3CLUSTER PARTITIONING
-first	 last	Elizabeth
-Jorge	Frank	Elizabeth
-Hunter	Moreno	Elizabeth
-Esther	Guzman	Elizabeth
-Dennis	Stephens	Elizabeth
-Nettie	Franklin	Elizabeth
-Stanley	Gibson	Elizabeth
-Eugenia	Greer	Elizabeth
-Jeffery	Delgado	Elizabeth
-Clara	Cross	Elizabeth
-Elizabeth	Gordon	Elizabeth
-Eva	Schmidt	Elizabeth	Schmidt
-Samuel	Schmidt	Elizabeth	Schmidt
-Eva	Schmidt	Elizabeth
-Samuel	Schmidt	Elizabeth
+last	Elizabeth
+Frank	Elizabeth
+Moreno	Elizabeth
+Guzman	Elizabeth
+Stephens	Elizabeth
+Franklin	Elizabeth
+Gibson	Elizabeth
+Greer	Elizabeth
+Delgado	Elizabeth
+Cross	Elizabeth
+last	Elizabeth
+Frank	Elizabeth
+Moreno	Elizabeth
+Guzman	Elizabeth
+Stephens	Elizabeth
+Franklin	Elizabeth
+Gibson	Elizabeth
+Greer	Elizabeth
+Delgado	Elizabeth
+Cross	Elizabeth
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
@ -13,8 +13,6 @@ set use_hive_partitioning = 1;

 SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;

-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0;
-
 SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
 SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;