2019-08-09 17:06:29 +00:00
import pytest
from helpers . cluster import ClickHouseCluster
cluster = ClickHouseCluster ( __file__ )
node = cluster . add_instance ( " node " )
2020-09-16 04:26:10 +00:00
path_to_userfiles_from_defaut_config = (
" /var/lib/clickhouse/user_files/ " # should be the same as in config file
2022-03-22 16:39:58 +00:00
)
2020-09-16 04:26:10 +00:00
2019-08-09 17:06:29 +00:00
@pytest.fixture ( scope = " module " )
def start_cluster ( ) :
try :
cluster . start ( )
2019-12-17 08:06:39 +00:00
2019-08-09 17:06:29 +00:00
yield cluster
2019-12-17 08:06:39 +00:00
except Exception as ex :
print ( ex )
raise ex
2019-08-09 17:06:29 +00:00
finally :
cluster . shutdown ( )
2020-09-16 04:26:10 +00:00
2019-08-10 16:00:01 +00:00
def test_strange_filenames ( start_cluster ) :
# 2 rows data
some_data = " \t 111.222 \n Data \t 333.444 "
2020-09-16 04:26:10 +00:00
node . exec_in_container (
2022-03-22 16:39:58 +00:00
[
2020-09-16 04:26:10 +00:00
" bash " ,
" -c " ,
" mkdir {} strange_names/ " . format ( path_to_userfiles_from_defaut_config ) ,
] ,
privileged = True ,
user = " root " ,
)
2019-08-10 16:00:01 +00:00
files = [ " p.o.i.n.t.s " , " b} { ra { ces " , " b}.o { t.h " ]
# filename inside testing data for debug simplicity
for filename in files :
2020-09-16 04:26:10 +00:00
node . exec_in_container (
2022-03-22 16:39:58 +00:00
[
2020-09-16 04:26:10 +00:00
" bash " ,
" -c " ,
' echo " {} {} " > {} strange_names/ {} ' . format (
filename , some_data , path_to_userfiles_from_defaut_config , filename
2022-03-22 16:39:58 +00:00
) ,
2020-09-16 04:26:10 +00:00
] ,
privileged = True ,
user = " root " ,
)
2019-08-10 16:00:01 +00:00
test_requests = [
( " p.o.??n.t.s " , " 2 " ) ,
( " p.o.*t.s " , " 2 " ) ,
( " b} { r? { ces " , " 2 " ) ,
( " b}*ces " , " 2 " ) ,
2019-08-27 15:20:31 +00:00
( " b}.? { t.h " , " 2 " ) ,
]
2019-08-10 16:00:01 +00:00
for pattern , value in test_requests :
assert (
node . query (
2022-03-22 16:39:58 +00:00
"""
2019-08-10 16:00:01 +00:00
select count ( * ) from file ( ' strange_names/ {} ' , ' TSV ' , ' text String, number Float64 ' )
""" .format(
pattern
)
2022-03-22 16:39:58 +00:00
)
2019-08-10 16:00:01 +00:00
== " {} \n " . format ( value )
2022-03-22 16:39:58 +00:00
)
2019-08-10 16:00:01 +00:00
assert (
node . query (
2022-03-22 16:39:58 +00:00
"""
2019-08-10 16:00:01 +00:00
select count ( * ) from file ( ' {} strange_names/ {} ' , ' TSV ' , ' text String, number Float64 ' )
""" .format(
path_to_userfiles_from_defaut_config , pattern
)
2022-03-22 16:39:58 +00:00
)
2019-08-10 16:00:01 +00:00
== " {} \n " . format ( value )
2022-03-22 16:39:58 +00:00
)
2019-08-10 16:00:01 +00:00
2020-09-16 04:26:10 +00:00
2019-08-10 16:00:01 +00:00
def test_linear_structure ( start_cluster ) :
# 2 rows data
some_data = " \t 123.456 \n Data \t 789.012 "
files = [
" file1 " ,
" file2 " ,
" file3 " ,
" file4 " ,
" file5 " ,
" file000 " ,
" file111 " ,
" file222 " ,
" file333 " ,
" file444 " ,
" a_file " ,
" b_file " ,
" c_file " ,
" d_file " ,
" e_file " ,
" a_data " ,
" b_data " ,
" c_data " ,
" d_data " ,
" e_data " ,
]
# filename inside testing data for debug simplicity
for filename in files :
2020-09-16 04:26:10 +00:00
node . exec_in_container (
2022-03-22 16:39:58 +00:00
[
2020-09-16 04:26:10 +00:00
" bash " ,
" -c " ,
' echo " {} {} " > {} {} ' . format (
filename , some_data , path_to_userfiles_from_defaut_config , filename
2022-03-22 16:39:58 +00:00
) ,
2020-09-16 04:26:10 +00:00
] ,
privileged = True ,
user = " root " ,
)
2022-03-22 16:39:58 +00:00
2019-08-10 16:00:01 +00:00
test_requests = [
( " file { 0..9} " , " 10 " ) ,
( " file? " , " 10 " ) ,
2019-12-17 08:06:39 +00:00
( " nothing* " , " 0 " ) ,
2019-08-10 16:00:01 +00:00
( " file { 0..9} { 0..9} { 0..9} " , " 10 " ) ,
2020-04-01 15:06:20 +00:00
( " file { 000..999} " , " 10 " ) ,
2019-08-10 16:00:01 +00:00
( " file??? " , " 10 " ) ,
( " file* " , " 20 " ) ,
( " a_ { file,data} " , " 4 " ) ,
( " ?_ { file,data} " , " 20 " ) ,
2019-08-27 15:20:31 +00:00
( " { a,b,c,d,e}_ { file,data} " , " 20 " ) ,
( " { a,b,c,d,e}? { file,data} " , " 20 " ) ,
2019-08-10 16:00:01 +00:00
( " * " , " 40 " ) ,
]
for pattern , value in test_requests :
assert (
node . query (
2022-03-22 16:39:58 +00:00
"""
2019-08-10 16:00:01 +00:00
select count ( * ) from file ( ' {} ' , ' TSV ' , ' text String, number Float64 ' )
""" .format(
pattern
)
2022-03-22 16:39:58 +00:00
)
2019-08-10 16:00:01 +00:00
== " {} \n " . format ( value )
2022-03-22 16:39:58 +00:00
)
2019-08-10 16:00:01 +00:00
assert (
node . query (
2022-03-22 16:39:58 +00:00
"""
2019-08-10 16:00:01 +00:00
select count ( * ) from file ( ' {} {} ' , ' TSV ' , ' text String, number Float64 ' )
""" .format(
path_to_userfiles_from_defaut_config , pattern
)
2022-03-22 16:39:58 +00:00
)
2019-08-10 16:00:01 +00:00
== " {} \n " . format ( value )
2022-03-22 16:39:58 +00:00
)
2019-08-10 16:00:01 +00:00
2020-09-16 04:26:10 +00:00
2019-08-10 16:00:01 +00:00
def test_deep_structure ( start_cluster ) :
# 2 rows data
some_data = " \t 135.791 \n Data \t 246.802 "
dirs = [
" directory1/ " ,
" directory2/ " ,
" some_more_dir/ " ,
" we/ " ,
" directory1/big_dir/ " ,
" directory1/dir1/ " ,
" directory1/dir2/ " ,
" directory1/dir3/ " ,
" directory2/dir1/ " ,
" directory2/dir2/ " ,
" directory2/one_more_dir/ " ,
" some_more_dir/yet_another_dir/ " ,
" we/need/ " ,
" we/need/to/ " ,
" we/need/to/go/ " ,
" we/need/to/go/deeper/ " ,
]
2019-08-09 17:06:29 +00:00
for dir in dirs :
2020-09-16 04:26:10 +00:00
node . exec_in_container (
2022-03-22 16:39:58 +00:00
[
2020-09-16 04:26:10 +00:00
" bash " ,
" -c " ,
" mkdir {} {} " . format ( path_to_userfiles_from_defaut_config , dir ) ,
] ,
privileged = True ,
user = " root " ,
)
2019-08-09 17:06:29 +00:00
# all directories appeared in files must be listed in dirs
2019-08-10 16:00:01 +00:00
files = [ ]
for i in range ( 10 ) :
for j in range ( 10 ) :
for k in range ( 10 ) :
2019-12-17 08:06:39 +00:00
files . append ( " directory1/big_dir/file " + str ( i ) + str ( j ) + str ( k ) )
2019-08-09 17:06:29 +00:00
2019-08-10 16:00:01 +00:00
for dir in dirs :
2020-09-16 04:26:10 +00:00
files . append ( dir + " file " )
2019-08-10 16:00:01 +00:00
# filename inside testing data for debug simplicity
2019-08-09 17:06:29 +00:00
for filename in files :
2020-09-16 04:26:10 +00:00
node . exec_in_container (
2022-03-22 16:39:58 +00:00
[
2020-09-16 04:26:10 +00:00
" bash " ,
" -c " ,
' echo " {} {} " > {} {} ' . format (
filename , some_data , path_to_userfiles_from_defaut_config , filename
2022-03-22 16:39:58 +00:00
) ,
2020-09-16 04:26:10 +00:00
] ,
privileged = True ,
user = " root " ,
)
2019-08-09 17:06:29 +00:00
2020-09-16 04:26:10 +00:00
test_requests = [
( " directory { 1..5}/big_dir/* " , " 2002 " ) ,
( " directory { 0..6}/big_dir/* { 0..9} { 0..9} { 0..9} " , " 2000 " ) ,
2019-08-10 16:00:01 +00:00
( " ? " , " 0 " ) ,
( " directory { 0..5}/dir { 1..3}/file " , " 10 " ) ,
( " directory { 0..5}/dir?/file " , " 10 " ) ,
2019-08-27 15:20:31 +00:00
( " we/need/to/go/deeper/file " , " 2 " ) ,
( " */*/*/*/*/* " , " 2 " ) ,
( " we/need/??/go/deeper/*?*?*?*?* " , " 2 " ) ,
]
2019-08-09 17:06:29 +00:00
for pattern , value in test_requests :
assert (
node . query (
2022-03-22 16:39:58 +00:00
"""
2019-08-09 17:06:29 +00:00
select count ( * ) from file ( ' {} ' , ' TSV ' , ' text String, number Float64 ' )
2019-08-10 16:00:01 +00:00
""" .format(
pattern
)
2022-03-22 16:39:58 +00:00
)
2019-08-10 16:00:01 +00:00
== " {} \n " . format ( value )
2022-03-22 16:39:58 +00:00
)
2019-08-10 16:00:01 +00:00
assert (
node . query (
2022-03-22 16:39:58 +00:00
"""
2019-08-10 16:00:01 +00:00
select count ( * ) from file ( ' {} {} ' , ' TSV ' , ' text String, number Float64 ' )
2019-11-13 18:35:35 +00:00
""" .format(
path_to_userfiles_from_defaut_config , pattern
)
2022-03-22 16:39:58 +00:00
)
2019-11-13 18:35:35 +00:00
== " {} \n " . format ( value )
2022-03-22 16:39:58 +00:00
)
2019-11-13 18:35:35 +00:00
2020-09-16 04:26:10 +00:00
2020-01-15 07:52:45 +00:00
def test_table_function_and_virtual_columns ( start_cluster ) :
2019-11-13 18:35:35 +00:00
node . exec_in_container (
2022-03-22 16:39:58 +00:00
[
2019-11-13 18:35:35 +00:00
" bash " ,
" -c " ,
" mkdir -p {} some/path/to/ " . format ( path_to_userfiles_from_defaut_config ) ,
2022-03-22 16:39:58 +00:00
]
2019-11-13 18:35:35 +00:00
)
node . exec_in_container (
2022-03-22 16:39:58 +00:00
[
2019-11-13 18:35:35 +00:00
" bash " ,
" -c " ,
" touch {} some/path/to/data.CSV " . format (
path_to_userfiles_from_defaut_config
2022-03-22 16:39:58 +00:00
) ,
]
)
2020-09-16 04:26:10 +00:00
node . query (
2019-11-13 18:35:35 +00:00
" insert into table function file( ' some/path/to/data.CSV ' , CSV, ' n UInt8, s String ' ) select number, concat( ' str_ ' , toString(number)) from numbers(100000) "
2022-03-22 16:39:58 +00:00
)
assert (
2020-09-16 04:26:10 +00:00
node . query (
2019-11-13 18:35:35 +00:00
" select count() from file( ' some/path/to/data.CSV ' , CSV, ' n UInt8, s String ' ) "
2020-09-16 04:26:10 +00:00
) . rstrip ( )
2022-03-22 16:39:58 +00:00
== " 100000 "
)
2020-09-16 04:26:10 +00:00
node . query (
2019-12-17 08:06:39 +00:00
" insert into table function file( ' nonexist.csv ' , ' CSV ' , ' val1 UInt32 ' ) values (1) "
)
2020-09-16 04:26:10 +00:00
assert (
node . query ( " select * from file( ' nonexist.csv ' , ' CSV ' , ' val1 UInt32 ' ) " ) . rstrip ( )
== " 1 "
2020-01-15 07:52:45 +00:00
)
assert (
" nonexist.csv "
in node . query (
" select _path from file( ' nonexis?.csv ' , ' CSV ' , ' val1 UInt32 ' ) "
) . rstrip ( )
2022-03-22 16:39:58 +00:00
)
2020-01-15 07:52:45 +00:00
assert (
" nonexist.csv "
in node . query (
" select _path from file( ' nonexist.csv ' , ' CSV ' , ' val1 UInt32 ' ) "
) . rstrip ( )
2022-03-22 16:39:58 +00:00
)
2020-01-15 07:52:45 +00:00
assert (
" nonexist.csv "
== node . query (
" select _file from file( ' nonexis?.csv ' , ' CSV ' , ' val1 UInt32 ' ) "
2020-09-16 04:26:10 +00:00
) . rstrip ( )
2022-03-22 16:39:58 +00:00
)
2020-09-16 04:26:10 +00:00
assert (
" nonexist.csv "
== node . query (
" select _file from file( ' nonexist.csv ' , ' CSV ' , ' val1 UInt32 ' ) "
) . rstrip ( )
2022-03-22 16:39:58 +00:00
)