fix tests

This commit is contained in:
Yarik Briukhovetskyi 2024-08-21 19:43:45 +02:00 committed by GitHub
parent 2e58ac5611
commit a52eff299e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 100 additions and 116 deletions

View File

@ -162,7 +162,7 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & sto
return;
if (storage_columns.size() == 1)
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot use a file with one column {}, that is ised during hive partitioning", name);
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot implement partition by all columns in a file");
auto local_type = storage_columns.get(name).type;
storage_columns.remove(name);
desc.addEphemeral(name, local_type, "");

View File

@ -1518,14 +1518,14 @@ def test_hive_partitioning_with_one_parameter(cluster):
)
query = (
f"SELECT column1, column2, _file, _path, _column1 FROM azureBlobStorage(azure_conf2, "
f"SELECT column1, column2, _file, _path FROM azureBlobStorage(azure_conf2, "
f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
f"blob_path='{path}', format='CSV', structure='{table_format}')"
)
assert azure_query(
node, query, settings={"use_hive_partitioning": 1}
).splitlines() == [
"Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}\tElizabeth".format(
"Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}".format(
bucket="cont", max_path=path
)
]
@ -1533,14 +1533,14 @@ def test_hive_partitioning_with_one_parameter(cluster):
query = (
f"SELECT column2 FROM azureBlobStorage(azure_conf2, "
f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
f"blob_path='{path}', format='CSV', structure='{table_format}');"
)
assert azure_query(
node, query, settings={"use_hive_partitioning": 1}
).splitlines() == ["Gordon"]
def test_hive_partitioning_with_two_parameters(cluster):
def test_hive_partitioning_with_all_parameters(cluster):
# type: (ClickHouseCluster) -> None
node = cluster.instances["node"] # type: ClickHouseInstance
table_format = "column1 String, column2 String"
@ -1556,35 +1556,14 @@ def test_hive_partitioning_with_two_parameters(cluster):
)
query = (
f"SELECT column1, column2, _file, _path, _column1, _column2 FROM azureBlobStorage(azure_conf2, "
f"SELECT column1, column2, _file, _path FROM azureBlobStorage(azure_conf2, "
f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
f"blob_path='{path}', format='CSV', structure='{table_format}');"
)
assert azure_query(
node, query, settings={"use_hive_partitioning": 1}
).splitlines() == [
"Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}\tElizabeth\tGordon".format(
bucket="cont", max_path=path
)
]
pattern = r"DB::Exception: Cannot implement partition by all columns in a file"
query = (
f"SELECT column1 FROM azureBlobStorage(azure_conf2, "
f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column2=_column2;"
)
assert azure_query(
node, query, settings={"use_hive_partitioning": 1}
).splitlines() == ["Elizabeth"]
query = (
f"SELECT column1 FROM azureBlobStorage(azure_conf2, "
f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column2=_column2 AND column1=_column1;"
)
assert azure_query(
node, query, settings={"use_hive_partitioning": 1}
).splitlines() == ["Elizabeth"]
with pytest.raises(Exception, match=pattern):
azure_query(node, query, settings={"use_hive_partitioning": 1})
def test_hive_partitioning_without_setting(cluster):
@ -1603,9 +1582,9 @@ def test_hive_partitioning_without_setting(cluster):
)
query = (
f"SELECT column1, column2, _file, _path, _column1, _column2 FROM azureBlobStorage(azure_conf2, "
f"SELECT column1, column2, _file, _path FROM azureBlobStorage(azure_conf2, "
f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
f"blob_path='{path}', format='CSV', structure='{table_format}');"
)
pattern = re.compile(
r"DB::Exception: Unknown expression identifier '.*' in scope.*", re.DOTALL

View File

@ -1263,13 +1263,19 @@ def test_hive_partitioning_with_one_parameter(started_cluster):
assert hdfs_api.read_data(f"/column0=Elizabeth/parquet_1") == f"Elizabeth\tGordon\n"
r = node1.query(
"SELECT _column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')",
"SELECT column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')",
settings={"use_hive_partitioning": 1},
)
assert r == f"Elizabeth\n"
r = node1.query(
"SELECT column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')",
settings={"use_hive_partitioning": 1},
)
assert r == f"Gordon\n"
def test_hive_partitioning_with_two_parameters(started_cluster):
def test_hive_partitioning_with_all_parameters(started_cluster):
hdfs_api = started_cluster.hdfs_api
hdfs_api.write_data(
f"/column0=Elizabeth/column1=Gordon/parquet_2", f"Elizabeth\tGordon\n"
@ -1279,11 +1285,13 @@ def test_hive_partitioning_with_two_parameters(started_cluster):
== f"Elizabeth\tGordon\n"
)
r = node1.query(
"SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
settings={"use_hive_partitioning": 1},
)
assert r == f"Gordon\n"
pattern = r"DB::Exception: Cannot implement partition by all columns in a file"
with pytest.raises(QueryRuntimeException, match=pattern):
node1.query(
f"SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
settings={"use_hive_partitioning": 1},
)
def test_hive_partitioning_without_setting(started_cluster):
@ -1301,7 +1309,7 @@ def test_hive_partitioning_without_setting(started_cluster):
with pytest.raises(QueryRuntimeException, match=pattern):
node1.query(
f"SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
f"SELECT column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
settings={"use_hive_partitioning": 0},
)

View File

@ -1,4 +1,14 @@
TESTING THE FILE HIVE PARTITIONING
last Elizabeth
Frank Elizabeth
Moreno Elizabeth
Guzman Elizabeth
Stephens Elizabeth
Franklin Elizabeth
Gibson Elizabeth
Greer Elizabeth
Delgado Elizabeth
Cross Elizabeth
first last Elizabeth
Jorge Frank Elizabeth
Hunter Moreno Elizabeth
@ -10,25 +20,16 @@ Eugenia Greer Elizabeth
Jeffery Delgado Elizabeth
Clara Cross Elizabeth
Elizabeth Gordon Elizabeth
Eva Schmidt Elizabeth Schmidt
Samuel Schmidt Elizabeth Schmidt
Eva Schmidt Elizabeth
Samuel Schmidt Elizabeth
Elizabeth Gordon Elizabeth Gordon
Elizabeth Gordon Elizabeth
Elizabeth Gordon Elizabeth Gordon
Elizabeth Gordon Elizabeth
first last Elizabeth
Jorge Frank Elizabeth
Hunter Moreno Elizabeth
Esther Guzman Elizabeth
Dennis Stephens Elizabeth
Nettie Franklin Elizabeth
Stanley Gibson Elizabeth
Eugenia Greer Elizabeth
Jeffery Delgado Elizabeth
Clara Cross Elizabeth
Elizabeth Gordon Elizabeth
last Elizabeth
Frank Elizabeth
Moreno Elizabeth
Guzman Elizabeth
Stephens Elizabeth
Franklin Elizabeth
Gibson Elizabeth
Greer Elizabeth
Delgado Elizabeth
Cross Elizabeth
42 2020-01-01
[1,2,3] 42.42
Array(Int64) LowCardinality(Float64)
@ -37,10 +38,20 @@ Array(Int64) LowCardinality(Float64)
4081
2070
2070
1
1
b
1
1
TESTING THE URL PARTITIONING
last Elizabeth
Frank Elizabeth
Moreno Elizabeth
Guzman Elizabeth
Stephens Elizabeth
Franklin Elizabeth
Gibson Elizabeth
Greer Elizabeth
Delgado Elizabeth
Cross Elizabeth
first last Elizabeth
Jorge Frank Elizabeth
Hunter Moreno Elizabeth
@ -52,26 +63,18 @@ Eugenia Greer Elizabeth
Jeffery Delgado Elizabeth
Clara Cross Elizabeth
Elizabeth Gordon Elizabeth
Eva Schmidt Elizabeth Schmidt
Samuel Schmidt Elizabeth Schmidt
Eva Schmidt Elizabeth
Samuel Schmidt Elizabeth
Elizabeth Gordon Elizabeth Gordon
Elizabeth Gordon Elizabeth
Elizabeth Gordon Elizabeth Gordon
Elizabeth Gordon Elizabeth
first last Elizabeth
Jorge Frank Elizabeth
Hunter Moreno Elizabeth
Esther Guzman Elizabeth
Dennis Stephens Elizabeth
Nettie Franklin Elizabeth
Stanley Gibson Elizabeth
Eugenia Greer Elizabeth
Jeffery Delgado Elizabeth
Clara Cross Elizabeth
1
TESTING THE S3 PARTITIONING
last Elizabeth
Frank Elizabeth
Moreno Elizabeth
Guzman Elizabeth
Stephens Elizabeth
Franklin Elizabeth
Gibson Elizabeth
Greer Elizabeth
Delgado Elizabeth
Cross Elizabeth
first last Elizabeth
Jorge Frank Elizabeth
Hunter Moreno Elizabeth
@ -83,39 +86,35 @@ Eugenia Greer Elizabeth
Jeffery Delgado Elizabeth
Clara Cross Elizabeth
Elizabeth Gordon Elizabeth
Eva Schmidt Elizabeth Schmidt
Samuel Schmidt Elizabeth Schmidt
Eva Schmidt Elizabeth
Samuel Schmidt Elizabeth
Elizabeth Gordon Elizabeth Gordon
Elizabeth Gordon Elizabeth
Elizabeth Gordon Elizabeth Gordon
Elizabeth Gordon Elizabeth
first last Elizabeth
Jorge Frank Elizabeth
Hunter Moreno Elizabeth
Esther Guzman Elizabeth
Dennis Stephens Elizabeth
Nettie Franklin Elizabeth
Stanley Gibson Elizabeth
Eugenia Greer Elizabeth
Jeffery Delgado Elizabeth
Clara Cross Elizabeth
Elizabeth Gordon Elizabeth
last Elizabeth
Frank Elizabeth
Moreno Elizabeth
Guzman Elizabeth
Stephens Elizabeth
Franklin Elizabeth
Gibson Elizabeth
Greer Elizabeth
Delgado Elizabeth
Cross Elizabeth
OK
TESTING THE S3CLUSTER PARTITIONING
first last Elizabeth
Jorge Frank Elizabeth
Hunter Moreno Elizabeth
Esther Guzman Elizabeth
Dennis Stephens Elizabeth
Nettie Franklin Elizabeth
Stanley Gibson Elizabeth
Eugenia Greer Elizabeth
Jeffery Delgado Elizabeth
Clara Cross Elizabeth
Elizabeth Gordon Elizabeth
Eva Schmidt Elizabeth Schmidt
Samuel Schmidt Elizabeth Schmidt
Eva Schmidt Elizabeth
Samuel Schmidt Elizabeth
last Elizabeth
Frank Elizabeth
Moreno Elizabeth
Guzman Elizabeth
Stephens Elizabeth
Franklin Elizabeth
Gibson Elizabeth
Greer Elizabeth
Delgado Elizabeth
Cross Elizabeth
last Elizabeth
Frank Elizabeth
Moreno Elizabeth
Guzman Elizabeth
Stephens Elizabeth
Franklin Elizabeth
Gibson Elizabeth
Greer Elizabeth
Delgado Elizabeth
Cross Elizabeth

View File

@ -13,8 +13,6 @@ set use_hive_partitioning = 1;
SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0;
SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;