try to fix tests

This commit is contained in:
Yarik Briukhovetskyi 2024-08-22 13:20:04 +02:00 committed by GitHub
parent 8cf6323125
commit 95f45d2eaf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 23 additions and 50 deletions

View File

@ -1513,14 +1513,14 @@ def test_hive_partitioning_with_one_parameter(cluster):
azure_query(
node,
f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values}",
f" container='cont', blob_path='{path}', format='CSVWithNames', compression='auto', structure='{table_format}') VALUES {values}",
settings={"azure_truncate_on_insert": 1},
)
query = (
f"SELECT column2, _file, _path, column1 FROM azureBlobStorage(azure_conf2, "
f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
f"blob_path='{path}', format='CSV', structure='{table_format}')"
f"blob_path='{path}', format='CSVWithNames', structure='{table_format}')"
)
assert azure_query(
node, query, settings={"use_hive_partitioning": 1}
@ -1533,7 +1533,7 @@ def test_hive_partitioning_with_one_parameter(cluster):
query = (
f"SELECT column2 FROM azureBlobStorage(azure_conf2, "
f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
f"blob_path='{path}', format='CSV', structure='{table_format}');"
f"blob_path='{path}', format='CSVWithNames', structure='{table_format}');"
)
assert azure_query(
node, query, settings={"use_hive_partitioning": 1}
@ -1551,14 +1551,14 @@ def test_hive_partitioning_with_all_parameters(cluster):
azure_query(
node,
f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}",
f" container='cont', blob_path='{path}', format='CSVWithNames', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}",
settings={"azure_truncate_on_insert": 1},
)
query = (
f"SELECT column1, column2, _file, _path FROM azureBlobStorage(azure_conf2, "
f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
f"blob_path='{path}', format='CSV', structure='{table_format}');"
f"blob_path='{path}', format='CSVWithNames', structure='{table_format}');"
)
pattern = r"DB::Exception: Cannot use hive partitioning for file"
@ -1577,14 +1577,14 @@ def test_hive_partitioning_without_setting(cluster):
azure_query(
node,
f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}",
f" container='cont', blob_path='{path}', format='CSVWithNames', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}",
settings={"azure_truncate_on_insert": 1},
)
query = (
f"SELECT column1, column2, _file, _path, column3 FROM azureBlobStorage(azure_conf2, "
f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
f"blob_path='{path}', format='CSV', structure='{table_format}');"
f"blob_path='{path}', format='CSVWithNames', structure='{table_format}');"
)
pattern = re.compile(
r"DB::Exception: Unknown expression identifier '.*' in scope.*", re.DOTALL

View File

@ -1259,35 +1259,16 @@ def test_respect_object_existence_on_partitioned_write(started_cluster):
def test_hive_partitioning_with_one_parameter(started_cluster):
hdfs_api = started_cluster.hdfs_api
hdfs_api.write_data(f"/column0=Elizabeth/parquet_1", f"Elizabeth\tGordon\n")
assert hdfs_api.read_data(f"/column0=Elizabeth/parquet_1") == f"Elizabeth\tGordon\n"
hdfs_api.write_data(f"/column0=Elizabeth/file_1", f"column0,column1\nElizabeth,Gordon\n")
assert hdfs_api.read_data(f"/column0=Elizabeth/file_1") == f"column0,column1\nElizabeth,Gordon\n"
r = node1.query(
"SELECT column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')",
"SELECT column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/file_1', 'CSVWithNames')",
settings={"use_hive_partitioning": 1},
)
assert r == f"Elizabeth\n"
def test_hive_partitioning_with_all_parameters(started_cluster):
hdfs_api = started_cluster.hdfs_api
hdfs_api.write_data(
f"/column0=Elizabeth/column1=Gordon/parquet_2", f"Elizabeth\tGordon\n"
)
assert (
hdfs_api.read_data(f"/column0=Elizabeth/column1=Gordon/parquet_2")
== f"Elizabeth\tGordon\n"
)
pattern = r"DB::Exception: Cannot use hive partitioning for file"
with pytest.raises(QueryRuntimeException, match=pattern):
node1.query(
f"SELECT column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
settings={"use_hive_partitioning": 1},
)
def test_hive_partitioning_without_setting(started_cluster):
hdfs_api = started_cluster.hdfs_api
hdfs_api.write_data(

View File

@ -1,5 +1,5 @@
TESTING THE FILE HIVE PARTITIONING
last Elizabeth
last Elizabeth
Frank Elizabeth
Moreno Elizabeth
Guzman Elizabeth
@ -19,8 +19,7 @@ Stanley Gibson Elizabeth
Eugenia Greer Elizabeth
Jeffery Delgado Elizabeth
Clara Cross Elizabeth
Elizabeth Gordon Elizabeth
last Elizabeth
last Elizabeth
Frank Elizabeth
Moreno Elizabeth
Guzman Elizabeth
@ -35,12 +34,13 @@ Cross Elizabeth
Array(Int64) LowCardinality(Float64)
101
2070
4081
2070
2070
b
1
1
TESTING THE URL PARTITIONING
last Elizabeth
last Elizabeth
Frank Elizabeth
Moreno Elizabeth
Guzman Elizabeth
@ -60,10 +60,9 @@ Stanley Gibson Elizabeth
Eugenia Greer Elizabeth
Jeffery Delgado Elizabeth
Clara Cross Elizabeth
Elizabeth Gordon Elizabeth
1
TESTING THE S3 PARTITIONING
last Elizabeth
last Elizabeth
Frank Elizabeth
Moreno Elizabeth
Guzman Elizabeth
@ -83,8 +82,7 @@ Stanley Gibson Elizabeth
Eugenia Greer Elizabeth
Jeffery Delgado Elizabeth
Clara Cross Elizabeth
Elizabeth Gordon Elizabeth
last Elizabeth
last Elizabeth
Frank Elizabeth
Moreno Elizabeth
Guzman Elizabeth
@ -96,7 +94,7 @@ Delgado Elizabeth
Cross Elizabeth
OK
TESTING THE S3CLUSTER PARTITIONING
last Elizabeth
last Elizabeth
Frank Elizabeth
Moreno Elizabeth
Guzman Elizabeth
@ -106,7 +104,7 @@ Gibson Elizabeth
Greer Elizabeth
Delgado Elizabeth
Cross Elizabeth
last Elizabeth
last Elizabeth
Frank Elizabeth
Moreno Elizabeth
Guzman Elizabeth

View File

@ -14,7 +14,7 @@ set use_hive_partitioning = 1;
SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
SELECT *, non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth';
SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth' LIMIT 10;
SELECT number, date FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') LIMIT 1;
SELECT array, float FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1;
@ -29,16 +29,10 @@ SELECT identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.c
SELECT a FROM file('$CURDIR/data_hive/partitioning/a=b/a=b/sample.parquet') LIMIT 1;
"""
$CLICKHOUSE_LOCAL -n -q """
set use_hive_partitioning = 1;
SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet') LIMIT 10;
""" 2>&1 | grep -c "INCORRECT_DATA"
$CLICKHOUSE_LOCAL -n -q """
set use_hive_partitioning = 0;
SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
""" 2>&1 | grep -c "UNKNOWN_IDENTIFIER"
@ -68,7 +62,7 @@ set use_hive_partitioning = 1;
SELECT *, column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
SELECT *, non_existing_column FROM s3('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
SELECT *, column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth';
SELECT *, column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth' LIMIT 10;
"""
$CLICKHOUSE_CLIENT -n -q """
@ -84,5 +78,5 @@ set use_hive_partitioning = 1;
SELECT *, column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
SELECT *, column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = 'Elizabeth';
SELECT *, column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = 'Elizabeth' LIMIT 10;
"""