Merge pull request #48525 from ClickHouse/parallel-reading-from-file

Parallel processing right after reading `FROM file()`
This commit is contained in:
Igor Nikonov 2023-04-10 18:30:05 +02:00 committed by GitHub
commit e3b5072eb8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 125 additions and 102 deletions

View File

@ -10,7 +10,7 @@ namespace DB
/** Has arbitrary non zero number of inputs and arbitrary non zero number of outputs.
* All of them have the same structure.
*
* Pulls data from arbitrary input (whenever it is ready) and pushes it to arbitrary output (whenever is is not full).
* Pulls data from arbitrary input (whenever it is ready) and pushes it to arbitrary output (whenever it is not full).
* Doesn't do any heavy calculations.
* Doesn't preserve an order of data.
*

View File

@ -34,6 +34,7 @@
#include <Processors/Formats/ISchemaReader.h>
#include <Processors/Sources/NullSource.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Processors/ResizeProcessor.h>
#include <Common/escapeForFileName.h>
#include <Common/typeid_cast.h>
@ -700,7 +701,7 @@ Pipe StorageFile::read(
ContextPtr context,
QueryProcessingStage::Enum /*processed_stage*/,
size_t max_block_size,
size_t num_streams)
const size_t max_num_streams)
{
if (use_table_fd)
{
@ -731,7 +732,8 @@ Pipe StorageFile::read(
auto this_ptr = std::static_pointer_cast<StorageFile>(shared_from_this());
if (num_streams > paths.size())
size_t num_streams = max_num_streams;
if (max_num_streams > paths.size())
num_streams = paths.size();
Pipes pipes;
@ -789,7 +791,15 @@ Pipe StorageFile::read(
std::move(read_buffer)));
}
return Pipe::unitePipes(std::move(pipes));
Pipe pipe = Pipe::unitePipes(std::move(pipes));
/// Parallelize output as much as possible
/// Note: number of streams can be 0 if paths is empty
/// It happens if globs in file(path, ...) expands to empty set i.e. no files to process
if (num_streams > 0 && num_streams < max_num_streams)
{
pipe.resize(max_num_streams);
}
return pipe;
}

View File

@ -0,0 +1,9 @@
<test>
<fill_query>INSERT INTO function file(reading_from_file.parquet) SELECT URL FROM test.hits LIMIT 100000 SETTINGS engine_file_truncate_on_insert=1</fill_query>
<query>SELECT sum(length(base58Encode(URL))) FROM file(reading_from_file.parquet) FORMAT Null</query>
<drop_query>INSERT INTO FUNCTION file(reading_from_file.parquet) SELECT * FROM numbers(0) SETTINGS engine_file_truncate_on_insert=1</drop_query>
</test>

View File

@ -6,4 +6,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
yes http://foobarfoobarfoobarfoobarfoobarfoobarfoobar.com | head -c1G > ${CLICKHOUSE_TMP}/1g.csv
$CLICKHOUSE_LOCAL --stacktrace --input_format_parallel_parsing=1 --max_memory_usage=100Mi -q "select count() from file('${CLICKHOUSE_TMP}/1g.csv', 'TSV', 'URL String')"
$CLICKHOUSE_LOCAL --stacktrace --input_format_parallel_parsing=1 --max_memory_usage=50Mi -q "select count() from file('${CLICKHOUSE_TMP}/1g.csv', 'TSV', 'URL String') settings max_threads=1"

View File

@ -15,8 +15,8 @@ ${CLICKHOUSE_CLIENT} --multiline --multiquery --query "
set min_chunk_bytes_for_parallel_parsing=10485760;
set max_read_buffer_size = 65536;
set input_format_parallel_parsing = 0;
select * from file('test_01946.zstd', 'JSONEachRow', 'foo String') limit 30 format Null;
select * from file('test_01946.zstd', 'JSONEachRow', 'foo String') order by foo limit 30 format Null;
set input_format_parallel_parsing = 1;
select * from file('test_01946.zstd', 'JSONEachRow', 'foo String') limit 30 format Null;
select * from file('test_01946.zstd', 'JSONEachRow', 'foo String') order by foo limit 30 format Null;
"

View File

@ -28,5 +28,5 @@ function cleanup()
trap cleanup EXIT
${CLICKHOUSE_CLIENT} --query="insert into table function file('${symlink_path}', 'Values', 'a String') select 'OK'";
${CLICKHOUSE_CLIENT} --query="select * from file('${symlink_path}', 'Values', 'a String')";
${CLICKHOUSE_CLIENT} --query="select * from file('${symlink_path}', 'Values', 'a String') order by a";

View File

@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
USER_FILES_PATH=$(clickhouse client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
DATA_FILE=$USER_FILES_PATH/test_02103.data
@ -14,7 +14,7 @@ FORMATS=('TSVWithNames' 'TSVWithNamesAndTypes' 'TSVRawWithNames' 'TSVRawWithName
for format in "${FORMATS[@]}"
do
$CLICKHOUSE_CLIENT -q "SELECT number, range(number + 10) AS array, toString(number) AS string FROM numbers(10) FORMAT $format" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103.data', '$format', 'number UInt64, array Array(UInt64), string String') SETTINGS input_format_parallel_parsing=1, min_chunk_bytes_for_parallel_parsing=40"
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103.data', '$format', 'number UInt64, array Array(UInt64), string String') ORDER BY number SETTINGS input_format_parallel_parsing=1, min_chunk_bytes_for_parallel_parsing=40"
done
rm $DATA_FILE

View File

@ -5,13 +5,13 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_str.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='str'"
$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_str.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='str', engine_file_truncate_on_insert=1"
$CLICKHOUSE_CLIENT -q "select * from file('uuid_str.msgpack', 'MsgPack', 'uuid UUID')"
$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_bin.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='bin'"
$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_bin.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='bin', engine_file_truncate_on_insert=1"
$CLICKHOUSE_CLIENT -q "select * from file('uuid_bin.msgpack', 'MsgPack', 'uuid UUID')"
$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_ext.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='ext'"
$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_ext.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='ext', engine_file_truncate_on_insert=1"
$CLICKHOUSE_CLIENT -q "select * from file('uuid_ext.msgpack', 'MsgPack', 'uuid UUID')"
$CLICKHOUSE_CLIENT -q "select c1, toTypeName(c1) from file('uuid_ext.msgpack') settings input_format_msgpack_number_of_columns=1"

View File

@ -1,3 +1,3 @@
-- Tags: no-fasttest
insert into table function file('data.jsonl', 'JSONEachRow', 'x UInt32') select * from numbers(10);
select * from file('data.jsonl');
-- Tags: no-fasttest, no-parallel
insert into table function file('data.jsonl', 'JSONEachRow', 'x UInt32') select * from numbers(10) SETTINGS engine_file_truncate_on_insert=1;
select * from file('data.jsonl') order by x;

View File

@ -12,13 +12,13 @@
4 \N
5 \N
6 7
7 6
\N 1
\N 2
\N 3
\N 3
\N 4
\N 5
7 6
OK
1
2

View File

@ -9,140 +9,140 @@ USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonex
cp $CURDIR/data_mysql_dump/dump*.sql $USER_FILES_PATH
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32), y Nullable(Int32)')"
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'a Nullable(Int32), b Nullable(Int32)') settings input_format_mysql_dump_map_column_names = 0"
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'y Nullable(Int32), x Nullable(Int32)') settings input_format_mysql_dump_map_column_names = 1"
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32), z String') settings input_format_skip_unknown_fields = 0" 2>&1 | grep -F -q 'INCORRECT_DATA' && echo 'OK' || echo 'FAIL'
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32), z String') settings input_format_skip_unknown_fields = 1"
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32), y Nullable(Int32)') order by x, y"
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'a Nullable(Int32), b Nullable(Int32)') order by a, b settings input_format_mysql_dump_map_column_names = 0"
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'y Nullable(Int32), x Nullable(Int32)') order by y, x settings input_format_mysql_dump_map_column_names = 1"
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32), z String') order by x, z settings input_format_skip_unknown_fields = 0" 2>&1 | grep -F -q 'INCORRECT_DATA' && echo 'OK' || echo 'FAIL'
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32), z String') order by x, z settings input_format_skip_unknown_fields = 1"
echo "dump1"
$CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump)"
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump)"
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump) settings max_threads=1"
$CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
$CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2', max_threads=1"
$CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'" 2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL'
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32)') settings input_format_mysql_dump_table_name='test 3'" 2>&1 | grep -F -q 'EMPTY_DATA_PASSED' && echo 'OK' || echo 'FAIL'
echo "dump2"
$CLICKHOUSE_CLIENT -q "desc file(dump2.sql, MySQLDump)"
$CLICKHOUSE_CLIENT -q "select * from file(dump2.sql, MySQLDump)"
$CLICKHOUSE_CLIENT -q "select * from file(dump2.sql, MySQLDump) settings max_threads=1"
$CLICKHOUSE_CLIENT -q "desc file(dump2.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
$CLICKHOUSE_CLIENT -q "select * from file(dump2.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
$CLICKHOUSE_CLIENT -q "select * from file(dump2.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
echo "dump3"
$CLICKHOUSE_CLIENT -q "desc file(dump3.sql, MySQLDump)"
$CLICKHOUSE_CLIENT -q "select * from file(dump3.sql, MySQLDump)"
$CLICKHOUSE_CLIENT -q "select * from file(dump3.sql, MySQLDump) settings max_threads=1"
$CLICKHOUSE_CLIENT -q "desc file(dump3.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
$CLICKHOUSE_CLIENT -q "select * from file(dump3.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
$CLICKHOUSE_CLIENT -q "select * from file(dump3.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2', max_threads=1"
echo "dump4"
$CLICKHOUSE_CLIENT -q "desc file(dump4.sql, MySQLDump)"
$CLICKHOUSE_CLIENT -q "select * from file(dump4.sql, MySQLDump)"
$CLICKHOUSE_CLIENT -q "select * from file(dump4.sql, MySQLDump) settings max_threads=1"
$CLICKHOUSE_CLIENT -q "desc file(dump4.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
$CLICKHOUSE_CLIENT -q "select * from file(dump4.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
$CLICKHOUSE_CLIENT -q "select * from file(dump4.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3', max_threads=1"
echo "dump5"
$CLICKHOUSE_CLIENT -q "desc file(dump5.sql, MySQLDump)"
$CLICKHOUSE_CLIENT -q "select * from file(dump5.sql, MySQLDump)"
$CLICKHOUSE_CLIENT -q "select * from file(dump5.sql, MySQLDump) settings max_threads=1"
$CLICKHOUSE_CLIENT -q "desc file(dump5.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
$CLICKHOUSE_CLIENT -q "select * from file(dump5.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
$CLICKHOUSE_CLIENT -q "select * from file(dump5.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
echo "dump6"
$CLICKHOUSE_CLIENT -q "desc file(dump6.sql, MySQLDump)"
$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump)"
$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump) settings max_threads=1"
$CLICKHOUSE_CLIENT -q "desc file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
$CLICKHOUSE_CLIENT -q "desc file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2', max_threads=1"
$CLICKHOUSE_CLIENT -q "desc file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3', max_threads=1"
echo "dump7"
$CLICKHOUSE_CLIENT -q "desc file(dump7.sql, MySQLDump)"
$CLICKHOUSE_CLIENT -q "select * from file(dump7.sql, MySQLDump)"
$CLICKHOUSE_CLIENT -q "desc file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
$CLICKHOUSE_CLIENT -q "select * from file(dump7.sql, MySQLDump) settings max_threads=1"
$CLICKHOUSE_CLIENT -q "desc file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
$CLICKHOUSE_CLIENT -q "select * from file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
$CLICKHOUSE_CLIENT -q "desc file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
$CLICKHOUSE_CLIENT -q "select * from file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
$CLICKHOUSE_CLIENT -q "select * from file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2', max_threads=1"
$CLICKHOUSE_CLIENT -q "desc file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
$CLICKHOUSE_CLIENT -q "select * from file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
$CLICKHOUSE_CLIENT -q "select * from file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3', max_threads=1"
echo "dump8"
$CLICKHOUSE_CLIENT -q "desc file(dump8.sql, MySQLDump)"
$CLICKHOUSE_CLIENT -q "select * from file(dump8.sql, MySQLDump)"
$CLICKHOUSE_CLIENT -q "select * from file(dump8.sql, MySQLDump) settings max_threads=1"
$CLICKHOUSE_CLIENT -q "desc file(dump8.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
$CLICKHOUSE_CLIENT -q "select * from file(dump2.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
$CLICKHOUSE_CLIENT -q "select * from file(dump2.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
echo "dump9"
$CLICKHOUSE_CLIENT -q "desc file(dump9.sql, MySQLDump)"
$CLICKHOUSE_CLIENT -q "select * from file(dump9.sql, MySQLDump)"
$CLICKHOUSE_CLIENT -q "select * from file(dump9.sql, MySQLDump) settings max_threads=1"
$CLICKHOUSE_CLIENT -q "desc file(dump9.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
$CLICKHOUSE_CLIENT -q "select * from file(dump9.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
$CLICKHOUSE_CLIENT -q "select * from file(dump9.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
echo "dump10"
$CLICKHOUSE_CLIENT -q "desc file(dump10.sql, MySQLDump)"
$CLICKHOUSE_CLIENT -q "select * from file(dump10.sql, MySQLDump)"
$CLICKHOUSE_CLIENT -q "select * from file(dump10.sql, MySQLDump) settings max_threads=1"
$CLICKHOUSE_CLIENT -q "desc file(dump10.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
$CLICKHOUSE_CLIENT -q "select * from file(dump10.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
$CLICKHOUSE_CLIENT -q "select * from file(dump10.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
echo "dump11"
$CLICKHOUSE_CLIENT -q "desc file(dump11.sql, MySQLDump)"
$CLICKHOUSE_CLIENT -q "select * from file(dump11.sql, MySQLDump)"
$CLICKHOUSE_CLIENT -q "select * from file(dump11.sql, MySQLDump) settings max_threads=1"
$CLICKHOUSE_CLIENT -q "desc file(dump11.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
$CLICKHOUSE_CLIENT -q "select * from file(dump11.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
$CLICKHOUSE_CLIENT -q "select * from file(dump11.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
echo "dump12"
$CLICKHOUSE_CLIENT -q "desc file(dump12.sql, MySQLDump)"
$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump)"
$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump) settings max_threads=1"
$CLICKHOUSE_CLIENT -q "desc file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
$CLICKHOUSE_CLIENT -q "desc file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2', max_threads=1"
$CLICKHOUSE_CLIENT -q "desc file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3', max_threads=1"
echo "dump13"
$CLICKHOUSE_CLIENT -q "desc file(dump13.sql, MySQLDump)"
$CLICKHOUSE_CLIENT -q "select * from file(dump13.sql, MySQLDump)"
$CLICKHOUSE_CLIENT -q "select * from file(dump13.sql, MySQLDump) settings max_threads=1"
$CLICKHOUSE_CLIENT -q "desc file(dump13.sql, MySQLDump) settings input_format_mysql_dump_table_name='fruits'"
$CLICKHOUSE_CLIENT -q "select * from file(dump13.sql, MySQLDump) settings input_format_mysql_dump_table_name='fruits'"
$CLICKHOUSE_CLIENT -q "select * from file(dump13.sql, MySQLDump) settings input_format_mysql_dump_table_name='fruits', max_threads=1"
echo "dump14"
$CLICKHOUSE_CLIENT -q "desc file(dump14.sql, MySQLDump)"
$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump)"
$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump) settings max_threads=1"
$CLICKHOUSE_CLIENT -q "desc file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
$CLICKHOUSE_CLIENT -q "desc file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2', max_threads=1"
$CLICKHOUSE_CLIENT -q "desc file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3', max_threads=1"
echo "dump15"
$CLICKHOUSE_CLIENT -q "desc file(dump15.sql, MySQLDump)"
$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump)"
$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump) settings max_threads=1"
$CLICKHOUSE_CLIENT -q "desc file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
$CLICKHOUSE_CLIENT -q "desc file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2', max_threads=1"
$CLICKHOUSE_CLIENT -q "desc file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3', max_threads=1"
rm $USER_FILES_PATH/dump*.sql

View File

@ -1,3 +1,3 @@
-- Tags: no-fasttest
-- Tags: no-fasttest, no-parallel
insert into function file(02293_data.arrow) select toLowCardinality(toString(number)) from numbers(300) settings output_format_arrow_low_cardinality_as_dictionary=1, engine_file_truncate_on_insert=1;
select * from file(02293_data.arrow);
select * from file(02293_data.arrow) settings max_threads=1;

View File

@ -86,18 +86,18 @@ d Nullable(String)
\N \N 3 \N
\N \N \N String
OK
3
2
1
2
3
c1 Nullable(Int64)
c2 Nullable(Int64)
c3 Nullable(String)
1 1 \N
2 2 \N
3 3 \N
1 \N \N
2 \N \N
3 \N \N
1 2 String
1 \N \N
2 2 \N
2 \N \N
3 3 \N
3 \N \N
OK
OK

View File

@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
USER_FILES_PATH=$(clickhouse client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
DATA_FILE=$USER_FILES_PATH/data_02293
@ -17,13 +17,13 @@ echo "JSONColumns"
$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONColumns"
$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONColumns" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONColumns)"
$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns)"
$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns) order by a"
echo "JSONCompactColumns"
$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONCompactColumns"
$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONCompactColumns" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONCompactColumns)"
$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns)"
$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns) order by c1, c2, c3"
echo "JSONColumnsWithMetadata"
$CLICKHOUSE_CLIENT -q "select sum(a) as sum, avg(a) as avg from test_02293 group by a % 4 with totals order by tuple(sum, avg) format JSONColumnsWithMetadata" --extremes=1 | grep -v "elapsed"
@ -49,9 +49,9 @@ echo '
' > $DATA_FILE
$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONColumns)"
$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns)"
$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns, 'a UInt32, t String') settings input_format_skip_unknown_fields=0" 2>&1 | grep -F -q 'INCORRECT_DATA' && echo 'OK' || echo 'FAIL'
$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns, 'a UInt32, t String') settings input_format_skip_unknown_fields=1"
$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns) order by b, a, c, d"
$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns, 'a UInt32, t String') order by a, t settings input_format_skip_unknown_fields=0" 2>&1 | grep -F -q 'INCORRECT_DATA' && echo 'OK' || echo 'FAIL'
$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns, 'a UInt32, t String') order by a, t settings input_format_skip_unknown_fields=1"
echo '
[
@ -75,8 +75,8 @@ echo '
' > $DATA_FILE
$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONCompactColumns)"
$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns)"
$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns, 'a UInt32, t UInt32')" 2>&1 | grep -F -q 'INCORRECT_DATA' && echo 'OK' || echo 'FAIL'
$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns) order by c1, c2, c3"
$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns, 'a UInt32, t UInt32') order by a, t" 2>&1 | grep -F -q 'INCORRECT_DATA' && echo 'OK' || echo 'FAIL'
echo '
{

View File

@ -3,10 +3,10 @@
insert into function file(data_02314.csv) select number, number + 1 from numbers(5) settings engine_file_truncate_on_insert=1;
insert into function file(data_02314.csv) select number, number + 1, number + 2 from numbers(5);
desc file(data_02314.csv) settings input_format_csv_skip_first_lines=5;
select * from file(data_02314.csv) settings input_format_csv_skip_first_lines=5;
select * from file(data_02314.csv) order by c1 settings input_format_csv_skip_first_lines=5;
insert into function file(data_02314.tsv) select number, number + 1 from numbers(5) settings engine_file_truncate_on_insert=1;
insert into function file(data_02314.tsv) select number, number + 1, number + 2 from numbers(5);
desc file(data_02314.tsv) settings input_format_tsv_skip_first_lines=5;
select * from file(data_02314.tsv) settings input_format_tsv_skip_first_lines=5;
select * from file(data_02314.tsv) order by c1 settings input_format_tsv_skip_first_lines=5;

View File

@ -6,24 +6,24 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. "$CURDIR"/../shell_config.sh
USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
UNIQ_DEST_PATH=$USER_FILES_PATH/test-02383-$RANDOM-$RANDOM
mkdir -p $UNIQ_DEST_PATH
mkdir -p $USER_FILES_PATH/test_02383
cp $CURDIR/data_arrow/dictionary*.arrow $USER_FILES_PATH/test_02383/
cp $CURDIR/data_arrow/corrupted.arrow $USER_FILES_PATH/test_02383/
cp $CURDIR/data_arrow/dict_with_nulls.arrow $USER_FILES_PATH/test_02383/
cp $CURDIR/data_arrow/dictionary*.arrow $UNIQ_DEST_PATH/
cp $CURDIR/data_arrow/corrupted.arrow $UNIQ_DEST_PATH/
cp $CURDIR/data_arrow/dict_with_nulls.arrow $UNIQ_DEST_PATH/
$CLICKHOUSE_CLIENT -q "desc file('test_02383/dictionary1.arrow')"
$CLICKHOUSE_CLIENT -q "select * from file('test_02383/dictionary1.arrow')"
$CLICKHOUSE_CLIENT -q "desc file('test_02383/dictionary2.arrow')"
$CLICKHOUSE_CLIENT -q "select * from file('test_02383/dictionary2.arrow')"
$CLICKHOUSE_CLIENT -q "desc file('test_02383/dictionary3.arrow')"
$CLICKHOUSE_CLIENT -q "select * from file('test_02383/dictionary3.arrow')"
$CLICKHOUSE_CLIENT -q "desc file('$UNIQ_DEST_PATH/dictionary1.arrow')"
$CLICKHOUSE_CLIENT -q "select * from file('$UNIQ_DEST_PATH/dictionary1.arrow') settings max_threads=1"
$CLICKHOUSE_CLIENT -q "desc file('$UNIQ_DEST_PATH/dictionary2.arrow')"
$CLICKHOUSE_CLIENT -q "select * from file('$UNIQ_DEST_PATH/dictionary2.arrow') settings max_threads=1"
$CLICKHOUSE_CLIENT -q "desc file('$UNIQ_DEST_PATH/dictionary3.arrow')"
$CLICKHOUSE_CLIENT -q "select * from file('$UNIQ_DEST_PATH/dictionary3.arrow') settings max_threads=1"
$CLICKHOUSE_CLIENT -q "desc file('test_02383/corrupted.arrow')"
$CLICKHOUSE_CLIENT -q "select * from file('test_02383/corrupted.arrow')" 2>&1 | grep -F -q "INCORRECT_DATA" && echo OK || echo FAIL
$CLICKHOUSE_CLIENT -q "desc file('$UNIQ_DEST_PATH/corrupted.arrow')"
$CLICKHOUSE_CLIENT -q "select * from file('$UNIQ_DEST_PATH/corrupted.arrow')" 2>&1 | grep -F -q "INCORRECT_DATA" && echo OK || echo FAIL
$CLICKHOUSE_CLIENT -q "desc file('test_02383/dict_with_nulls.arrow')"
$CLICKHOUSE_CLIENT -q "select * from file('test_02383/dict_with_nulls.arrow')"
$CLICKHOUSE_CLIENT -q "desc file('$UNIQ_DEST_PATH/dict_with_nulls.arrow')"
$CLICKHOUSE_CLIENT -q "select * from file('$UNIQ_DEST_PATH/dict_with_nulls.arrow') settings max_threads=1"
rm -rf $USER_FILES_PATH/test_02383
rm -rf $UNIQ_DEST_PATH

View File

@ -1,11 +1,15 @@
#!/usr/bin/env bash
# Tags: no-parallel
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
user_files_path=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}')
cp "$CUR_DIR"/data_csv/10m_rows.csv.xz $user_files_path/
USER_FILES_PATH=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}')
${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('$user_files_path/10m_rows.csv.xz' , 'CSVWithNames') LIMIT 1 settings max_memory_usage=1000000000"
${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('$user_files_path/10m_rows.csv.xz' , 'CSVWithNames') LIMIT 1 settings max_memory_usage=100000000"
cp "$CUR_DIR"/data_csv/10m_rows.csv.xz $USER_FILES_PATH/
${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings max_memory_usage=1000000000"
${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings max_memory_usage=100000000"
rm $USER_FILES_PATH/10m_rows.csv.xz