2018-12-05 13:24:45 +00:00
|
|
|
import time
|
|
|
|
import pytest
|
|
|
|
import requests
|
|
|
|
from tempfile import NamedTemporaryFile
|
|
|
|
from helpers.hdfs_api import HDFSApi
|
|
|
|
|
|
|
|
import os
|
|
|
|
|
|
|
|
from helpers.cluster import ClickHouseCluster
|
|
|
|
import subprocess
|
|
|
|
|
|
|
|
|
|
|
|
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
|
|
|
|
cluster = ClickHouseCluster(__file__)
|
2018-12-10 16:07:31 +00:00
|
|
|
node1 = cluster.add_instance('node1', with_hdfs=True, config_dir="configs", main_configs=['configs/log_conf.xml'])
|
2018-12-05 13:24:45 +00:00
|
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
|
|
def started_cluster():
|
|
|
|
try:
|
|
|
|
cluster.start()
|
|
|
|
|
|
|
|
yield cluster
|
|
|
|
|
|
|
|
except Exception as ex:
|
|
|
|
print(ex)
|
|
|
|
raise ex
|
|
|
|
finally:
|
|
|
|
cluster.shutdown()
|
|
|
|
|
|
|
|
def test_read_write_storage(started_cluster):
|
|
|
|
hdfs_api = HDFSApi("root")
|
|
|
|
|
|
|
|
node1.query("create table SimpleHDFSStorage (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs1:9000/simple_storage', 'TSV')")
|
2019-09-05 14:42:17 +00:00
|
|
|
node1.query("insert into SimpleHDFSStorage values (1, 'Mark', 72.53)")
|
|
|
|
assert hdfs_api.read_data("/simple_storage") == "1\tMark\t72.53\n"
|
2018-12-05 13:24:45 +00:00
|
|
|
assert node1.query("select * from SimpleHDFSStorage") == "1\tMark\t72.53\n"
|
|
|
|
|
2019-09-05 14:42:17 +00:00
|
|
|
def test_read_write_storage_with_globs(started_cluster):
|
|
|
|
hdfs_api = HDFSApi("root")
|
|
|
|
|
|
|
|
node1.query("create table HDFSStorageWithRange (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs1:9000/storage{1..5}', 'TSV')")
|
|
|
|
node1.query("create table HDFSStorageWithEnum (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs1:9000/storage{1,2,3,4,5}', 'TSV')")
|
|
|
|
node1.query("create table HDFSStorageWithQuestionMark (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs1:9000/storage?', 'TSV')")
|
|
|
|
node1.query("create table HDFSStorageWithAsterisk (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs1:9000/storage*', 'TSV')")
|
|
|
|
|
2019-09-20 11:26:00 +00:00
|
|
|
for i in ["1", "2", "3"]:
|
|
|
|
hdfs_api.write_data("/storage" + i, i + "\tMark\t72.53\n")
|
|
|
|
assert hdfs_api.read_data("/storage" + i) == i + "\tMark\t72.53\n"
|
|
|
|
|
|
|
|
assert node1.query("select count(*) from HDFSStorageWithRange") == "3\n"
|
|
|
|
assert node1.query("select count(*) from HDFSStorageWithEnum") == "3\n"
|
|
|
|
assert node1.query("select count(*) from HDFSStorageWithQuestionMark") == "3\n"
|
|
|
|
assert node1.query("select count(*) from HDFSStorageWithAsterisk") == "3\n"
|
|
|
|
|
|
|
|
try:
|
|
|
|
node1.query("insert into HDFSStorageWithEnum values (1, 'NEW', 4.2)")
|
|
|
|
assert False, "Exception have to be thrown"
|
|
|
|
except Exception as ex:
|
|
|
|
print ex
|
|
|
|
assert "in readonly mode" in str(ex)
|
|
|
|
|
|
|
|
try:
|
|
|
|
node1.query("insert into HDFSStorageWithQuestionMark values (1, 'NEW', 4.2)")
|
|
|
|
assert False, "Exception have to be thrown"
|
|
|
|
except Exception as ex:
|
|
|
|
print ex
|
|
|
|
assert "in readonly mode" in str(ex)
|
|
|
|
|
|
|
|
try:
|
|
|
|
node1.query("insert into HDFSStorageWithAsterisk values (1, 'NEW', 4.2)")
|
|
|
|
assert False, "Exception have to be thrown"
|
|
|
|
except Exception as ex:
|
|
|
|
print ex
|
|
|
|
assert "in readonly mode" in str(ex)
|
2019-09-05 14:42:17 +00:00
|
|
|
|
2018-12-05 13:24:45 +00:00
|
|
|
def test_read_write_table(started_cluster):
|
|
|
|
hdfs_api = HDFSApi("root")
|
|
|
|
data = "1\tSerialize\t555.222\n2\tData\t777.333\n"
|
|
|
|
hdfs_api.write_data("/simple_table_function", data)
|
|
|
|
|
2019-08-01 15:46:54 +00:00
|
|
|
assert hdfs_api.read_data("/simple_table_function") == data
|
2018-12-05 13:24:45 +00:00
|
|
|
|
2019-08-01 15:46:54 +00:00
|
|
|
assert node1.query("select * from hdfs('hdfs://hdfs1:9000/simple_table_function', 'TSV', 'id UInt64, text String, number Float64')") == data
|
2019-01-17 14:10:30 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_write_table(started_cluster):
|
|
|
|
hdfs_api = HDFSApi("root")
|
|
|
|
|
|
|
|
node1.query("create table OtherHDFSStorage (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs1:9000/other_storage', 'TSV')")
|
|
|
|
node1.query("insert into OtherHDFSStorage values (10, 'tomas', 55.55), (11, 'jack', 32.54)")
|
|
|
|
|
|
|
|
result = "10\ttomas\t55.55\n11\tjack\t32.54\n"
|
|
|
|
assert hdfs_api.read_data("/other_storage") == result
|
|
|
|
assert node1.query("select * from OtherHDFSStorage order by id") == result
|
2019-01-19 20:17:19 +00:00
|
|
|
|
|
|
|
def test_bad_hdfs_uri(started_cluster):
|
|
|
|
try:
|
|
|
|
node1.query("create table BadStorage1 (id UInt32, name String, weight Float64) ENGINE = HDFS('hads:hgsdfs100500:9000/other_storage', 'TSV')")
|
|
|
|
except Exception as ex:
|
|
|
|
print ex
|
2019-09-20 11:26:00 +00:00
|
|
|
assert "Illegal HDFS URI" in str(ex)
|
2019-01-19 20:17:19 +00:00
|
|
|
try:
|
|
|
|
node1.query("create table BadStorage2 (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs100500:9000/other_storage', 'TSV')")
|
|
|
|
except Exception as ex:
|
|
|
|
print ex
|
2019-09-20 11:26:00 +00:00
|
|
|
assert "Unable to create builder to connect to HDFS" in str(ex)
|
2019-01-19 20:17:19 +00:00
|
|
|
|
|
|
|
try:
|
|
|
|
node1.query("create table BadStorage3 (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs1:9000/<>', 'TSV')")
|
|
|
|
except Exception as ex:
|
|
|
|
print ex
|
2019-09-20 11:26:00 +00:00
|
|
|
assert "Unable to open HDFS file" in str(ex)
|
2019-08-01 15:46:54 +00:00
|
|
|
|
|
|
|
def test_globs_in_read_table(started_cluster):
|
|
|
|
hdfs_api = HDFSApi("root")
|
|
|
|
some_data = "1\tSerialize\t555.222\n2\tData\t777.333\n"
|
2019-08-09 17:25:29 +00:00
|
|
|
globs_dir = "/dir_for_test_with_globs/"
|
2019-08-10 16:00:01 +00:00
|
|
|
files = ["dir1/dir_dir/file1", "dir2/file2", "simple_table_function", "dir/file", "some_dir/dir1/file", "some_dir/dir2/file", "some_dir/file", "table1_function", "table2_function", "table3_function"]
|
2019-08-09 17:25:29 +00:00
|
|
|
for filename in files:
|
|
|
|
hdfs_api.write_data(globs_dir + filename, some_data)
|
|
|
|
|
2019-08-10 16:00:01 +00:00
|
|
|
test_requests = [("dir{1..5}/dir_dir/file1", 1),
|
|
|
|
("*_table_functio?", 1),
|
2019-08-09 17:25:29 +00:00
|
|
|
("dir/fil?", 1),
|
|
|
|
("table{3..8}_function", 1),
|
|
|
|
("table{2..8}_function", 2),
|
|
|
|
("dir/*", 1),
|
|
|
|
("dir/*?*?*?*?*", 1),
|
|
|
|
("dir/*?*?*?*?*?*", 0),
|
|
|
|
("some_dir/*/file", 2),
|
|
|
|
("some_dir/dir?/*", 2),
|
2019-08-10 16:00:01 +00:00
|
|
|
("*/*/*", 3),
|
|
|
|
("?", 0)]
|
2019-08-09 17:25:29 +00:00
|
|
|
|
|
|
|
for pattern, value in test_requests:
|
|
|
|
assert node1.query("select * from hdfs('hdfs://hdfs1:9000" + globs_dir + pattern + "', 'TSV', 'id UInt64, text String, number Float64')") == value * some_data
|