ClickHouse/tests/integration/test_globs_in_filepath/test.py

import pytest

from helpers.cluster import ClickHouseCluster

cluster = ClickHouseCluster(__file__)
node = cluster.add_instance("node")
path_to_userfiles_from_defaut_config = (
    "/var/lib/clickhouse/user_files/"  # should be the same as in config file
)


@pytest.fixture(scope="module")
def start_cluster():
    try:
        cluster.start()

        yield cluster

    except Exception as ex:
        print(ex)
        raise ex
    finally:
        cluster.shutdown()


def test_strange_filenames(start_cluster):
    # 2 rows data
    some_data = "\t111.222\nData\t333.444"

    node.exec_in_container(
        [
            "bash",
            "-c",
            "mkdir {}strange_names/".format(path_to_userfiles_from_defaut_config),
        ],
        privileged=True,
        user="root",
    )

    files = ["p.o.i.n.t.s", "b}{ra{ces", "b}.o{t.h"]

    # filename inside testing data for debug simplicity
    for filename in files:
        node.exec_in_container(
            [
                "bash",
                "-c",
                'echo "{}{}" > {}strange_names/{}'.format(
                    filename, some_data, path_to_userfiles_from_defaut_config, filename
                ),
            ],
            privileged=True,
            user="root",
        )

    test_requests = [
        ("p.o.??n.t.s", "2"),
        ("p.o.*t.s", "2"),
        ("b}{r?{ces", "2"),
        ("b}*ces", "2"),
        ("b}.?{t.h", "2"),
    ]

    for pattern, value in test_requests:
        assert (
            node.query(
                """
            select count(*) from file('strange_names/{}', 'TSV', 'text String, number Float64')
        """.format(
                    pattern
                )
            )
            == "{}\n".format(value)
        )
        assert (
            node.query(
                """
            select count(*) from file('{}strange_names/{}', 'TSV', 'text String, number Float64')
        """.format(
                    path_to_userfiles_from_defaut_config, pattern
                )
            )
            == "{}\n".format(value)
        )


def test_linear_structure(start_cluster):
    # 2 rows data
    some_data = "\t123.456\nData\t789.012"

    files = [
        "file1",
        "file2",
        "file3",
        "file4",
        "file5",
        "file000",
        "file111",
        "file222",
        "file333",
        "file444",
        "a_file",
        "b_file",
        "c_file",
        "d_file",
        "e_file",
        "a_data",
        "b_data",
        "c_data",
        "d_data",
        "e_data",
    ]

    # filename inside testing data for debug simplicity
    for filename in files:
        node.exec_in_container(
            [
                "bash",
                "-c",
                'echo "{}{}" > {}{}'.format(
                    filename, some_data, path_to_userfiles_from_defaut_config, filename
                ),
            ],
            privileged=True,
            user="root",
        )

    test_requests = [
        ("file{0..9}", "10"),
        ("file?", "10"),
        ("nothing*", "0"),
        ("file{0..9}{0..9}{0..9}", "10"),
        ("file{000..999}", "10"),
        ("file???", "10"),
        ("file*", "20"),
        ("a_{file,data}", "4"),
        ("?_{file,data}", "20"),
        ("{a,b,c,d,e}_{file,data}", "20"),
        ("{a,b,c,d,e}?{file,data}", "20"),
        ("*", "40"),
    ]

    for pattern, value in test_requests:
        assert (
            node.query(
                """
            select count(*) from file('{}', 'TSV', 'text String, number Float64')
        """.format(
                    pattern
                )
            )
            == "{}\n".format(value)
        )
        assert (
            node.query(
                """
            select count(*) from file('{}{}', 'TSV', 'text String, number Float64')
        """.format(
                    path_to_userfiles_from_defaut_config, pattern
                )
            )
            == "{}\n".format(value)
        )


def test_deep_structure(start_cluster):
    # 2 rows data
    some_data = "\t135.791\nData\t246.802"
    dirs = [
        "directory1/",
        "directory2/",
        "some_more_dir/",
        "we/",
        "directory1/big_dir/",
        "directory1/dir1/",
        "directory1/dir2/",
        "directory1/dir3/",
        "directory2/dir1/",
        "directory2/dir2/",
        "directory2/one_more_dir/",
        "some_more_dir/yet_another_dir/",
        "we/need/",
        "we/need/to/",
        "we/need/to/go/",
        "we/need/to/go/deeper/",
    ]

    for dir in dirs:
        node.exec_in_container(
            [
                "bash",
                "-c",
                "mkdir {}{}".format(path_to_userfiles_from_defaut_config, dir),
            ],
            privileged=True,
            user="root",
        )

    # all directories appeared in files must be listed in dirs
    files = []
    for i in range(10):
        for j in range(10):
            for k in range(10):
                files.append("directory1/big_dir/file" + str(i) + str(j) + str(k))

    for dir in dirs:
        files.append(dir + "file")

    # filename inside testing data for debug simplicity
    for filename in files:
        node.exec_in_container(
            [
                "bash",
                "-c",
                'echo "{}{}" > {}{}'.format(
                    filename, some_data, path_to_userfiles_from_defaut_config, filename
                ),
            ],
            privileged=True,
            user="root",
        )

    test_requests = [
        ("directory{1..5}/big_dir/*", "2002"),
        ("directory{0..6}/big_dir/*{0..9}{0..9}{0..9}", "2000"),
        ("?", "0"),
        ("directory{0..5}/dir{1..3}/file", "10"),
        ("directory{0..5}/dir?/file", "10"),
        ("we/need/to/go/deeper/file", "2"),
        ("*/*/*/*/*/*", "2"),
        ("we/need/??/go/deeper/*?*?*?*?*", "2"),
    ]

    for pattern, value in test_requests:
        assert (
            node.query(
                """
            select count(*) from file('{}', 'TSV', 'text String, number Float64')
        """.format(
                    pattern
                )
            )
            == "{}\n".format(value)
        )
        assert (
            node.query(
                """
            select count(*) from file('{}{}', 'TSV', 'text String, number Float64')
        """.format(
                    path_to_userfiles_from_defaut_config, pattern
                )
            )
            == "{}\n".format(value)
        )


def test_table_function_and_virtual_columns(start_cluster):
    node.exec_in_container(
        [
            "bash",
            "-c",
            "mkdir -p {}some/path/to/".format(path_to_userfiles_from_defaut_config),
        ]
    )
    node.exec_in_container(
        [
            "bash",
            "-c",
            "touch {}some/path/to/data.CSV".format(
                path_to_userfiles_from_defaut_config
            ),
        ]
    )
    node.query(
        "insert into table function file('some/path/to/data.CSV', CSV, 'n UInt8, s String') select number, concat('str_', toString(number)) from numbers(100000)"
    )
    assert (
        node.query(
            "select count() from file('some/path/to/data.CSV', CSV, 'n UInt8, s String')"
        ).rstrip()
        == "100000"
    )
    node.query(
        "insert into table function file('nonexist.csv', 'CSV', 'val1 UInt32') values (1)"
    )
    assert (
        node.query("select * from file('nonexist.csv', 'CSV', 'val1 UInt32')").rstrip()
        == "1"
    )
    assert (
        "nonexist.csv"
        in node.query(
            "select _path from file('nonexis?.csv', 'CSV', 'val1 UInt32')"
        ).rstrip()
    )
    assert (
        "nonexist.csv"
        in node.query(
            "select _path from file('nonexist.csv', 'CSV', 'val1 UInt32')"
        ).rstrip()
    )
    assert (
        "nonexist.csv"
        == node.query(
            "select _file from file('nonexis?.csv', 'CSV', 'val1 UInt32')"
        ).rstrip()
    )
    assert (
        "nonexist.csv"
        == node.query(
            "select _file from file('nonexist.csv', 'CSV', 'val1 UInt32')"
        ).rstrip()
    )
fix and add test for storage file version 2019-08-09 17:06:29 +00:00			`import pytest`

			`from helpers.cluster import ClickHouseCluster`

			`cluster = ClickHouseCluster(__file__)`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`node = cluster.add_instance("node")`
			`path_to_userfiles_from_defaut_config = (`
			`"/var/lib/clickhouse/user_files/" # should be the same as in config file`
			`)`
Format and optimize imports in integration test files This PR formats all the `*.py` files found under the `tests/integration` folder. It also reorders the imports and cleans up a bunch of unused imports. The formatting also takes care of other things like wrapping lines and fixing spaces and indents such that the tests look more readable. 2020-09-16 04:26:10 +00:00
fix and add test for storage file version 2019-08-09 17:06:29 +00:00
			`@pytest.fixture(scope="module")`
			`def start_cluster():`
			`try:`
			`cluster.start()`
Fix segfault in table function file while inserting into it (#8177) * Fix segfault in table function file while inserting into it 2019-12-17 08:06:39 +00:00
fix and add test for storage file version 2019-08-09 17:06:29 +00:00			`yield cluster`
Fix segfault in table function file while inserting into it (#8177) * Fix segfault in table function file while inserting into it 2019-12-17 08:06:39 +00:00
			`except Exception as ex:`
			`print(ex)`
			`raise ex`
fix and add test for storage file version 2019-08-09 17:06:29 +00:00			`finally:`
			`cluster.shutdown()`

Format and optimize imports in integration test files This PR formats all the `*.py` files found under the `tests/integration` folder. It also reorders the imports and cleans up a bunch of unused imports. The formatting also takes care of other things like wrapping lines and fixing spaces and indents such that the tests look more readable. 2020-09-16 04:26:10 +00:00
Add tests, comments and fix 2019-08-10 16:00:01 +00:00			`def test_strange_filenames(start_cluster):`
			`# 2 rows data`
			`some_data = "\t111.222\nData\t333.444"`

Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`node.exec_in_container(`
			`[`
			`"bash",`
			`"-c",`
			`"mkdir {}strange_names/".format(path_to_userfiles_from_defaut_config),`
			`],`
			`privileged=True,`
			`user="root",`
			`)`
Add tests, comments and fix 2019-08-10 16:00:01 +00:00
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`files = ["p.o.i.n.t.s", "b}{ra{ces", "b}.o{t.h"]`
Add tests, comments and fix 2019-08-10 16:00:01 +00:00
			`# filename inside testing data for debug simplicity`
			`for filename in files:`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`node.exec_in_container(`
			`[`
			`"bash",`
			`"-c",`
			`'echo "{}{}" > {}strange_names/{}'.format(`
			`filename, some_data, path_to_userfiles_from_defaut_config, filename`
			`),`
			`],`
			`privileged=True,`
			`user="root",`
			`)`
Add tests, comments and fix 2019-08-10 16:00:01 +00:00
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`test_requests = [`
			`("p.o.??n.t.s", "2"),`
			`("p.o.*t.s", "2"),`
			`("b}{r?{ces", "2"),`
			`("b}*ces", "2"),`
			`("b}.?{t.h", "2"),`
			`]`
Add tests, comments and fix 2019-08-10 16:00:01 +00:00
			`for pattern, value in test_requests:`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`assert (`
			`node.query(`
			`"""`
Add tests, comments and fix 2019-08-10 16:00:01 +00:00			`select count(*) from file('strange_names/{}', 'TSV', 'text String, number Float64')`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`""".format(`
			`pattern`
			`)`
			`)`
			`== "{}\n".format(value)`
			`)`
			`assert (`
			`node.query(`
			`"""`
Add tests, comments and fix 2019-08-10 16:00:01 +00:00			`select count(*) from file('{}strange_names/{}', 'TSV', 'text String, number Float64')`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`""".format(`
			`path_to_userfiles_from_defaut_config, pattern`
			`)`
			`)`
			`== "{}\n".format(value)`
			`)`
Add tests, comments and fix 2019-08-10 16:00:01 +00:00
Format and optimize imports in integration test files This PR formats all the `*.py` files found under the `tests/integration` folder. It also reorders the imports and cleans up a bunch of unused imports. The formatting also takes care of other things like wrapping lines and fixing spaces and indents such that the tests look more readable. 2020-09-16 04:26:10 +00:00
Add tests, comments and fix 2019-08-10 16:00:01 +00:00			`def test_linear_structure(start_cluster):`
			`# 2 rows data`
			`some_data = "\t123.456\nData\t789.012"`

Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`files = [`
			`"file1",`
			`"file2",`
			`"file3",`
			`"file4",`
			`"file5",`
			`"file000",`
			`"file111",`
			`"file222",`
			`"file333",`
			`"file444",`
			`"a_file",`
			`"b_file",`
			`"c_file",`
			`"d_file",`
			`"e_file",`
			`"a_data",`
			`"b_data",`
			`"c_data",`
			`"d_data",`
			`"e_data",`
			`]`
Add tests, comments and fix 2019-08-10 16:00:01 +00:00
			`# filename inside testing data for debug simplicity`
			`for filename in files:`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`node.exec_in_container(`
			`[`
			`"bash",`
			`"-c",`
			`'echo "{}{}" > {}{}'.format(`
			`filename, some_data, path_to_userfiles_from_defaut_config, filename`
			`),`
			`],`
			`privileged=True,`
			`user="root",`
			`)`

			`test_requests = [`
			`("file{0..9}", "10"),`
			`("file?", "10"),`
			`("nothing*", "0"),`
			`("file{0..9}{0..9}{0..9}", "10"),`
			`("file{000..999}", "10"),`
			`("file???", "10"),`
			`("file*", "20"),`
			`("a_{file,data}", "4"),`
			`("?_{file,data}", "20"),`
			`("{a,b,c,d,e}_{file,data}", "20"),`
			`("{a,b,c,d,e}?{file,data}", "20"),`
			`("*", "40"),`
			`]`
Add tests, comments and fix 2019-08-10 16:00:01 +00:00
			`for pattern, value in test_requests:`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`assert (`
			`node.query(`
			`"""`
Add tests, comments and fix 2019-08-10 16:00:01 +00:00			`select count(*) from file('{}', 'TSV', 'text String, number Float64')`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`""".format(`
			`pattern`
			`)`
			`)`
			`== "{}\n".format(value)`
			`)`
			`assert (`
			`node.query(`
			`"""`
Add tests, comments and fix 2019-08-10 16:00:01 +00:00			`select count(*) from file('{}{}', 'TSV', 'text String, number Float64')`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`""".format(`
			`path_to_userfiles_from_defaut_config, pattern`
			`)`
			`)`
			`== "{}\n".format(value)`
			`)`
Add tests, comments and fix 2019-08-10 16:00:01 +00:00
Format and optimize imports in integration test files This PR formats all the `*.py` files found under the `tests/integration` folder. It also reorders the imports and cleans up a bunch of unused imports. The formatting also takes care of other things like wrapping lines and fixing spaces and indents such that the tests look more readable. 2020-09-16 04:26:10 +00:00
Add tests, comments and fix 2019-08-10 16:00:01 +00:00			`def test_deep_structure(start_cluster):`
			`# 2 rows data`
			`some_data = "\t135.791\nData\t246.802"`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`dirs = [`
			`"directory1/",`
			`"directory2/",`
			`"some_more_dir/",`
			`"we/",`
			`"directory1/big_dir/",`
			`"directory1/dir1/",`
			`"directory1/dir2/",`
			`"directory1/dir3/",`
			`"directory2/dir1/",`
			`"directory2/dir2/",`
			`"directory2/one_more_dir/",`
			`"some_more_dir/yet_another_dir/",`
			`"we/need/",`
			`"we/need/to/",`
			`"we/need/to/go/",`
			`"we/need/to/go/deeper/",`
			`]`
fix and add test for storage file version 2019-08-09 17:06:29 +00:00
			`for dir in dirs:`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`node.exec_in_container(`
			`[`
			`"bash",`
			`"-c",`
			`"mkdir {}{}".format(path_to_userfiles_from_defaut_config, dir),`
			`],`
			`privileged=True,`
			`user="root",`
			`)`
fix and add test for storage file version 2019-08-09 17:06:29 +00:00
			`# all directories appeared in files must be listed in dirs`
Add tests, comments and fix 2019-08-10 16:00:01 +00:00			`files = []`
			`for i in range(10):`
			`for j in range(10):`
			`for k in range(10):`
Fix segfault in table function file while inserting into it (#8177) * Fix segfault in table function file while inserting into it 2019-12-17 08:06:39 +00:00			`files.append("directory1/big_dir/file" + str(i) + str(j) + str(k))`
fix and add test for storage file version 2019-08-09 17:06:29 +00:00
Add tests, comments and fix 2019-08-10 16:00:01 +00:00			`for dir in dirs:`
Format and optimize imports in integration test files This PR formats all the `*.py` files found under the `tests/integration` folder. It also reorders the imports and cleans up a bunch of unused imports. The formatting also takes care of other things like wrapping lines and fixing spaces and indents such that the tests look more readable. 2020-09-16 04:26:10 +00:00			`files.append(dir + "file")`
Add tests, comments and fix 2019-08-10 16:00:01 +00:00
			`# filename inside testing data for debug simplicity`
fix and add test for storage file version 2019-08-09 17:06:29 +00:00			`for filename in files:`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`node.exec_in_container(`
			`[`
			`"bash",`
			`"-c",`
			`'echo "{}{}" > {}{}'.format(`
			`filename, some_data, path_to_userfiles_from_defaut_config, filename`
			`),`
			`],`
			`privileged=True,`
			`user="root",`
			`)`
fix and add test for storage file version 2019-08-09 17:06:29 +00:00
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`test_requests = [`
			`("directory{1..5}/big_dir/*", "2002"),`
			`("directory{0..6}/big_dir/*{0..9}{0..9}{0..9}", "2000"),`
			`("?", "0"),`
			`("directory{0..5}/dir{1..3}/file", "10"),`
			`("directory{0..5}/dir?/file", "10"),`
			`("we/need/to/go/deeper/file", "2"),`
			`("/////", "2"),`
			`("we/need/??/go/deeper/????*", "2"),`
			`]`
fix and add test for storage file version 2019-08-09 17:06:29 +00:00
			`for pattern, value in test_requests:`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`assert (`
			`node.query(`
			`"""`
fix and add test for storage file version 2019-08-09 17:06:29 +00:00			`select count(*) from file('{}', 'TSV', 'text String, number Float64')`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`""".format(`
			`pattern`
			`)`
			`)`
			`== "{}\n".format(value)`
			`)`
			`assert (`
			`node.query(`
			`"""`
Add tests, comments and fix 2019-08-10 16:00:01 +00:00			`select count(*) from file('{}{}', 'TSV', 'text String, number Float64')`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`""".format(`
			`path_to_userfiles_from_defaut_config, pattern`
			`)`
			`)`
			`== "{}\n".format(value)`
			`)`
more tests for table functions 2019-11-13 18:35:35 +00:00
Format and optimize imports in integration test files This PR formats all the `*.py` files found under the `tests/integration` folder. It also reorders the imports and cleans up a bunch of unused imports. The formatting also takes care of other things like wrapping lines and fixing spaces and indents such that the tests look more readable. 2020-09-16 04:26:10 +00:00
Add virtual columns to hdfs and file table functions (#8489) * Add virtual column _path to hdfs and file table functions with test * Fix const of headers * Add column _file with tests * Add docs * Fix improper resolve conflicts * Fix links in docs * Better condition for virtual columns proccessing in StorageFile * better condition for virtual columns processing in StorageHDFS 2020-01-15 07:52:45 +00:00			`def test_table_function_and_virtual_columns(start_cluster):`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`node.exec_in_container(`
			`[`
			`"bash",`
			`"-c",`
			`"mkdir -p {}some/path/to/".format(path_to_userfiles_from_defaut_config),`
			`]`
			`)`
			`node.exec_in_container(`
			`[`
			`"bash",`
			`"-c",`
			`"touch {}some/path/to/data.CSV".format(`
			`path_to_userfiles_from_defaut_config`
			`),`
			`]`
			`)`
			`node.query(`
			`"insert into table function file('some/path/to/data.CSV', CSV, 'n UInt8, s String') select number, concat('str_', toString(number)) from numbers(100000)"`
			`)`
			`assert (`
			`node.query(`
			`"select count() from file('some/path/to/data.CSV', CSV, 'n UInt8, s String')"`
			`).rstrip()`
			`== "100000"`
			`)`
Format and optimize imports in integration test files This PR formats all the `*.py` files found under the `tests/integration` folder. It also reorders the imports and cleans up a bunch of unused imports. The formatting also takes care of other things like wrapping lines and fixing spaces and indents such that the tests look more readable. 2020-09-16 04:26:10 +00:00			`node.query(`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`"insert into table function file('nonexist.csv', 'CSV', 'val1 UInt32') values (1)"`
			`)`
			`assert (`
			`node.query("select * from file('nonexist.csv', 'CSV', 'val1 UInt32')").rstrip()`
			`== "1"`
			`)`
			`assert (`
			`"nonexist.csv"`
			`in node.query(`
			`"select _path from file('nonexis?.csv', 'CSV', 'val1 UInt32')"`
			`).rstrip()`
			`)`
			`assert (`
			`"nonexist.csv"`
			`in node.query(`
			`"select _path from file('nonexist.csv', 'CSV', 'val1 UInt32')"`
			`).rstrip()`
			`)`
			`assert (`
			`"nonexist.csv"`
			`== node.query(`
			`"select _file from file('nonexis?.csv', 'CSV', 'val1 UInt32')"`
			`).rstrip()`
			`)`
			`assert (`
			`"nonexist.csv"`
			`== node.query(`
			`"select _file from file('nonexist.csv', 'CSV', 'val1 UInt32')"`
			`).rstrip()`
			`)`