ClickHouse/tests/integration/test_storage_url/test.py

135 lines
4.6 KiB
Python
Raw Normal View History

2021-10-26 09:31:01 +00:00
import pytest
from helpers.cluster import ClickHouseCluster
2022-09-04 16:57:51 +00:00
from helpers.test_tools import TSV
2021-10-26 09:31:01 +00:00
2022-09-04 16:57:51 +00:00
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance(
2023-05-17 13:02:15 +00:00
"node1",
main_configs=["configs/conf.xml", "configs/named_collections.xml"],
2023-06-15 10:33:24 +00:00
user_configs=["configs/users.xml"],
2023-05-17 13:32:40 +00:00
with_nginx=True,
2022-09-04 16:57:51 +00:00
)
2021-10-26 09:31:01 +00:00
2022-09-04 16:57:51 +00:00
@pytest.fixture(scope="module", autouse=True)
def setup_node():
2021-10-26 09:31:01 +00:00
try:
cluster.start()
2022-09-04 16:57:51 +00:00
node1.query(
"insert into table function url(url1) partition by column3 values (1, 2, 3), (3, 2, 1), (1, 3, 2)"
)
yield
2021-10-26 09:31:01 +00:00
finally:
cluster.shutdown()
2022-09-04 16:57:51 +00:00
def test_partition_by():
result = node1.query(
f"select * from url('http://nginx:80/test_1', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')"
)
assert result.strip() == "3\t2\t1"
result = node1.query(
f"select * from url('http://nginx:80/test_2', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')"
)
assert result.strip() == "1\t3\t2"
result = node1.query(
f"select * from url('http://nginx:80/test_3', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')"
)
assert result.strip() == "1\t2\t3"
2022-09-04 16:57:51 +00:00
2023-01-19 08:49:44 +00:00
2023-05-16 14:54:52 +00:00
def test_url_cluster():
2023-01-19 06:19:47 +00:00
result = node1.query(
f"select * from urlCluster('test_cluster_two_shards', 'http://nginx:80/test_1', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')"
)
assert result.strip() == "3\t2\t1"
result = node1.query(
f"select * from urlCluster('test_cluster_two_shards', 'http://nginx:80/test_2', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')"
)
assert result.strip() == "1\t3\t2"
result = node1.query(
f"select * from urlCluster('test_cluster_two_shards', 'http://nginx:80/test_3', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')"
)
assert result.strip() == "1\t2\t3"
2022-09-04 16:57:51 +00:00
2023-01-19 08:49:44 +00:00
2023-05-16 14:54:52 +00:00
def test_url_cluster_with_named_collection():
result = node1.query(
f"select * from urlCluster(test_cluster_one_shard_three_replicas_localhost, test_url)"
)
assert result.strip() == "3\t2\t1"
result = node1.query(
f"select * from urlCluster(test_cluster_one_shard_three_replicas_localhost, test_url, structure='auto')"
)
assert result.strip() == "3\t2\t1"
2022-09-04 16:57:51 +00:00
def test_table_function_url_access_rights():
node1.query("CREATE USER OR REPLACE u1")
2023-08-06 12:48:20 +00:00
expected_error = "necessary to have the grant CREATE TEMPORARY TABLE, URL ON *.*"
2022-09-04 16:57:51 +00:00
assert expected_error in node1.query_and_get_error(
f"SELECT * FROM url('http://nginx:80/test_1', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')",
user="u1",
)
2023-08-06 12:48:20 +00:00
expected_error = "necessary to have the grant CREATE TEMPORARY TABLE, URL ON *.*"
2022-09-04 16:57:51 +00:00
assert expected_error in node1.query_and_get_error(
f"SELECT * FROM url('http://nginx:80/test_1', 'TSV')", user="u1"
)
assert node1.query(
f"DESCRIBE TABLE url('http://nginx:80/test_1', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')",
user="u1",
) == TSV([["column1", "UInt32"], ["column2", "UInt32"], ["column3", "UInt32"]])
assert node1.query(
f"DESCRIBE TABLE url('http://nginx:80/not-exist', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')",
user="u1",
) == TSV([["column1", "UInt32"], ["column2", "UInt32"], ["column3", "UInt32"]])
2023-08-06 12:48:20 +00:00
expected_error = "necessary to have the grant URL ON *.*"
2022-09-04 16:57:51 +00:00
assert expected_error in node1.query_and_get_error(
f"DESCRIBE TABLE url('http://nginx:80/test_1', 'TSV')", user="u1"
)
node1.query("GRANT URL ON *.* TO u1")
assert node1.query(
f"DESCRIBE TABLE url('http://nginx:80/test_1', 'TSV')",
user="u1",
) == TSV(
[
["c1", "Nullable(Int64)"],
["c2", "Nullable(Int64)"],
["c3", "Nullable(Int64)"],
]
)
2023-02-28 14:22:44 +00:00
@pytest.mark.parametrize("file_format", ["Parquet", "CSV", "TSV", "JSONEachRow"])
def test_file_formats(file_format):
url = f"http://nginx:80/{file_format}_file"
2023-02-28 14:29:24 +00:00
values = ", ".join([f"({i}, {i + 1}, {i + 2})" for i in range(100)])
2023-02-28 14:22:44 +00:00
node1.query(
2023-02-28 14:29:24 +00:00
f"insert into table function url(url_file, url = '{url}', format = '{file_format}') values",
stdin=values,
2023-02-28 14:22:44 +00:00
)
for download_threads in [1, 4, 16]:
result = node1.query(
f"""
SELECT *
FROM url('{url}', '{file_format}')
LIMIT 10
SETTINGS remote_read_min_bytes_for_seek = 1, max_read_buffer_size = 1, max_download_buffer_size = 1, max_download_threads = {download_threads}
2023-02-28 14:29:24 +00:00
"""
)
2023-02-28 14:22:44 +00:00
expected_result = ""
for i in range(10):
expected_result += f"{i}\t{i + 1}\t{i + 2}\n"
assert result == expected_result