mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 00:22:29 +00:00
Merge pull request #48525 from ClickHouse/parallel-reading-from-file
Parallel processing right after reading `FROM file()`
This commit is contained in:
commit
e3b5072eb8
@ -10,7 +10,7 @@ namespace DB
|
||||
/** Has arbitrary non zero number of inputs and arbitrary non zero number of outputs.
|
||||
* All of them have the same structure.
|
||||
*
|
||||
* Pulls data from arbitrary input (whenever it is ready) and pushes it to arbitrary output (whenever is is not full).
|
||||
* Pulls data from arbitrary input (whenever it is ready) and pushes it to arbitrary output (whenever it is not full).
|
||||
* Doesn't do any heavy calculations.
|
||||
* Doesn't preserve an order of data.
|
||||
*
|
||||
|
@ -34,6 +34,7 @@
|
||||
#include <Processors/Formats/ISchemaReader.h>
|
||||
#include <Processors/Sources/NullSource.h>
|
||||
#include <Processors/Executors/PullingPipelineExecutor.h>
|
||||
#include <Processors/ResizeProcessor.h>
|
||||
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
@ -700,7 +701,7 @@ Pipe StorageFile::read(
|
||||
ContextPtr context,
|
||||
QueryProcessingStage::Enum /*processed_stage*/,
|
||||
size_t max_block_size,
|
||||
size_t num_streams)
|
||||
const size_t max_num_streams)
|
||||
{
|
||||
if (use_table_fd)
|
||||
{
|
||||
@ -731,7 +732,8 @@ Pipe StorageFile::read(
|
||||
|
||||
auto this_ptr = std::static_pointer_cast<StorageFile>(shared_from_this());
|
||||
|
||||
if (num_streams > paths.size())
|
||||
size_t num_streams = max_num_streams;
|
||||
if (max_num_streams > paths.size())
|
||||
num_streams = paths.size();
|
||||
|
||||
Pipes pipes;
|
||||
@ -789,7 +791,15 @@ Pipe StorageFile::read(
|
||||
std::move(read_buffer)));
|
||||
}
|
||||
|
||||
return Pipe::unitePipes(std::move(pipes));
|
||||
Pipe pipe = Pipe::unitePipes(std::move(pipes));
|
||||
/// Parallelize output as much as possible
|
||||
/// Note: number of streams can be 0 if paths is empty
|
||||
/// It happens if globs in file(path, ...) expands to empty set i.e. no files to process
|
||||
if (num_streams > 0 && num_streams < max_num_streams)
|
||||
{
|
||||
pipe.resize(max_num_streams);
|
||||
}
|
||||
return pipe;
|
||||
}
|
||||
|
||||
|
||||
|
9
tests/performance/reading_from_file.xml
Normal file
9
tests/performance/reading_from_file.xml
Normal file
@ -0,0 +1,9 @@
|
||||
<test>
|
||||
|
||||
<fill_query>INSERT INTO function file(reading_from_file.parquet) SELECT URL FROM test.hits LIMIT 100000 SETTINGS engine_file_truncate_on_insert=1</fill_query>
|
||||
|
||||
<query>SELECT sum(length(base58Encode(URL))) FROM file(reading_from_file.parquet) FORMAT Null</query>
|
||||
|
||||
<drop_query>INSERT INTO FUNCTION file(reading_from_file.parquet) SELECT * FROM numbers(0) SETTINGS engine_file_truncate_on_insert=1</drop_query>
|
||||
|
||||
</test>
|
@ -6,4 +6,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
|
||||
yes http://foobarfoobarfoobarfoobarfoobarfoobarfoobar.com | head -c1G > ${CLICKHOUSE_TMP}/1g.csv
|
||||
|
||||
$CLICKHOUSE_LOCAL --stacktrace --input_format_parallel_parsing=1 --max_memory_usage=100Mi -q "select count() from file('${CLICKHOUSE_TMP}/1g.csv', 'TSV', 'URL String')"
|
||||
$CLICKHOUSE_LOCAL --stacktrace --input_format_parallel_parsing=1 --max_memory_usage=50Mi -q "select count() from file('${CLICKHOUSE_TMP}/1g.csv', 'TSV', 'URL String') settings max_threads=1"
|
||||
|
@ -15,8 +15,8 @@ ${CLICKHOUSE_CLIENT} --multiline --multiquery --query "
|
||||
set min_chunk_bytes_for_parallel_parsing=10485760;
|
||||
set max_read_buffer_size = 65536;
|
||||
set input_format_parallel_parsing = 0;
|
||||
select * from file('test_01946.zstd', 'JSONEachRow', 'foo String') limit 30 format Null;
|
||||
select * from file('test_01946.zstd', 'JSONEachRow', 'foo String') order by foo limit 30 format Null;
|
||||
set input_format_parallel_parsing = 1;
|
||||
select * from file('test_01946.zstd', 'JSONEachRow', 'foo String') limit 30 format Null;
|
||||
select * from file('test_01946.zstd', 'JSONEachRow', 'foo String') order by foo limit 30 format Null;
|
||||
"
|
||||
|
||||
|
@ -28,5 +28,5 @@ function cleanup()
|
||||
trap cleanup EXIT
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="insert into table function file('${symlink_path}', 'Values', 'a String') select 'OK'";
|
||||
${CLICKHOUSE_CLIENT} --query="select * from file('${symlink_path}', 'Values', 'a String')";
|
||||
${CLICKHOUSE_CLIENT} --query="select * from file('${symlink_path}', 'Values', 'a String') order by a";
|
||||
|
||||
|
@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
|
||||
USER_FILES_PATH=$(clickhouse client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
|
||||
|
||||
DATA_FILE=$USER_FILES_PATH/test_02103.data
|
||||
|
||||
@ -14,7 +14,7 @@ FORMATS=('TSVWithNames' 'TSVWithNamesAndTypes' 'TSVRawWithNames' 'TSVRawWithName
|
||||
for format in "${FORMATS[@]}"
|
||||
do
|
||||
$CLICKHOUSE_CLIENT -q "SELECT number, range(number + 10) AS array, toString(number) AS string FROM numbers(10) FORMAT $format" > $DATA_FILE
|
||||
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103.data', '$format', 'number UInt64, array Array(UInt64), string String') SETTINGS input_format_parallel_parsing=1, min_chunk_bytes_for_parallel_parsing=40"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103.data', '$format', 'number UInt64, array Array(UInt64), string String') ORDER BY number SETTINGS input_format_parallel_parsing=1, min_chunk_bytes_for_parallel_parsing=40"
|
||||
done
|
||||
|
||||
rm $DATA_FILE
|
||||
|
@ -5,13 +5,13 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_str.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='str'"
|
||||
$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_str.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='str', engine_file_truncate_on_insert=1"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file('uuid_str.msgpack', 'MsgPack', 'uuid UUID')"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_bin.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='bin'"
|
||||
$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_bin.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='bin', engine_file_truncate_on_insert=1"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file('uuid_bin.msgpack', 'MsgPack', 'uuid UUID')"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_ext.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='ext'"
|
||||
$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_ext.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='ext', engine_file_truncate_on_insert=1"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file('uuid_ext.msgpack', 'MsgPack', 'uuid UUID')"
|
||||
$CLICKHOUSE_CLIENT -q "select c1, toTypeName(c1) from file('uuid_ext.msgpack') settings input_format_msgpack_number_of_columns=1"
|
||||
|
||||
|
@ -1,3 +1,3 @@
|
||||
-- Tags: no-fasttest
|
||||
insert into table function file('data.jsonl', 'JSONEachRow', 'x UInt32') select * from numbers(10);
|
||||
select * from file('data.jsonl');
|
||||
-- Tags: no-fasttest, no-parallel
|
||||
insert into table function file('data.jsonl', 'JSONEachRow', 'x UInt32') select * from numbers(10) SETTINGS engine_file_truncate_on_insert=1;
|
||||
select * from file('data.jsonl') order by x;
|
||||
|
@ -12,13 +12,13 @@
|
||||
4 \N
|
||||
5 \N
|
||||
6 7
|
||||
7 6
|
||||
\N 1
|
||||
\N 2
|
||||
\N 3
|
||||
\N 3
|
||||
\N 4
|
||||
\N 5
|
||||
7 6
|
||||
OK
|
||||
1
|
||||
2
|
||||
|
@ -9,140 +9,140 @@ USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonex
|
||||
|
||||
cp $CURDIR/data_mysql_dump/dump*.sql $USER_FILES_PATH
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32), y Nullable(Int32)')"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'a Nullable(Int32), b Nullable(Int32)') settings input_format_mysql_dump_map_column_names = 0"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'y Nullable(Int32), x Nullable(Int32)') settings input_format_mysql_dump_map_column_names = 1"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32), z String') settings input_format_skip_unknown_fields = 0" 2>&1 | grep -F -q 'INCORRECT_DATA' && echo 'OK' || echo 'FAIL'
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32), z String') settings input_format_skip_unknown_fields = 1"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32), y Nullable(Int32)') order by x, y"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'a Nullable(Int32), b Nullable(Int32)') order by a, b settings input_format_mysql_dump_map_column_names = 0"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'y Nullable(Int32), x Nullable(Int32)') order by y, x settings input_format_mysql_dump_map_column_names = 1"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32), z String') order by x, z settings input_format_skip_unknown_fields = 0" 2>&1 | grep -F -q 'INCORRECT_DATA' && echo 'OK' || echo 'FAIL'
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32), z String') order by x, z settings input_format_skip_unknown_fields = 1"
|
||||
|
||||
echo "dump1"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump) settings max_threads=1"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2', max_threads=1"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'" 2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL'
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32)') settings input_format_mysql_dump_table_name='test 3'" 2>&1 | grep -F -q 'EMPTY_DATA_PASSED' && echo 'OK' || echo 'FAIL'
|
||||
|
||||
echo "dump2"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump2.sql, MySQLDump)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump2.sql, MySQLDump)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump2.sql, MySQLDump) settings max_threads=1"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump2.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump2.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump2.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
|
||||
|
||||
echo "dump3"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump3.sql, MySQLDump)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump3.sql, MySQLDump)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump3.sql, MySQLDump) settings max_threads=1"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump3.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump3.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump3.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2', max_threads=1"
|
||||
|
||||
echo "dump4"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump4.sql, MySQLDump)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump4.sql, MySQLDump)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump4.sql, MySQLDump) settings max_threads=1"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump4.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump4.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump4.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3', max_threads=1"
|
||||
|
||||
echo "dump5"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump5.sql, MySQLDump)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump5.sql, MySQLDump)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump5.sql, MySQLDump) settings max_threads=1"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump5.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump5.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump5.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
|
||||
|
||||
echo "dump6"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump6.sql, MySQLDump)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump) settings max_threads=1"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2', max_threads=1"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3', max_threads=1"
|
||||
|
||||
echo "dump7"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump7.sql, MySQLDump)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump7.sql, MySQLDump)"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump7.sql, MySQLDump) settings max_threads=1"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2', max_threads=1"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3', max_threads=1"
|
||||
|
||||
echo "dump8"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump8.sql, MySQLDump)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump8.sql, MySQLDump)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump8.sql, MySQLDump) settings max_threads=1"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump8.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump2.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump2.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
|
||||
|
||||
echo "dump9"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump9.sql, MySQLDump)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump9.sql, MySQLDump)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump9.sql, MySQLDump) settings max_threads=1"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump9.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump9.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump9.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
|
||||
|
||||
echo "dump10"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump10.sql, MySQLDump)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump10.sql, MySQLDump)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump10.sql, MySQLDump) settings max_threads=1"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump10.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump10.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump10.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
|
||||
|
||||
echo "dump11"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump11.sql, MySQLDump)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump11.sql, MySQLDump)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump11.sql, MySQLDump) settings max_threads=1"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump11.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump11.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump11.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
|
||||
|
||||
|
||||
echo "dump12"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump12.sql, MySQLDump)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump) settings max_threads=1"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2', max_threads=1"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3', max_threads=1"
|
||||
|
||||
echo "dump13"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump13.sql, MySQLDump)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump13.sql, MySQLDump)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump13.sql, MySQLDump) settings max_threads=1"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump13.sql, MySQLDump) settings input_format_mysql_dump_table_name='fruits'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump13.sql, MySQLDump) settings input_format_mysql_dump_table_name='fruits'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump13.sql, MySQLDump) settings input_format_mysql_dump_table_name='fruits', max_threads=1"
|
||||
|
||||
echo "dump14"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump14.sql, MySQLDump)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump) settings max_threads=1"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2', max_threads=1"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3', max_threads=1"
|
||||
|
||||
echo "dump15"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump15.sql, MySQLDump)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump) settings max_threads=1"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2', max_threads=1"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3', max_threads=1"
|
||||
|
||||
rm $USER_FILES_PATH/dump*.sql
|
||||
|
@ -1,3 +1,3 @@
|
||||
-- Tags: no-fasttest
|
||||
-- Tags: no-fasttest, no-parallel
|
||||
insert into function file(02293_data.arrow) select toLowCardinality(toString(number)) from numbers(300) settings output_format_arrow_low_cardinality_as_dictionary=1, engine_file_truncate_on_insert=1;
|
||||
select * from file(02293_data.arrow);
|
||||
select * from file(02293_data.arrow) settings max_threads=1;
|
||||
|
@ -86,18 +86,18 @@ d Nullable(String)
|
||||
\N \N 3 \N
|
||||
\N \N \N String
|
||||
OK
|
||||
3
|
||||
2
|
||||
1
|
||||
2
|
||||
3
|
||||
c1 Nullable(Int64)
|
||||
c2 Nullable(Int64)
|
||||
c3 Nullable(String)
|
||||
1 1 \N
|
||||
2 2 \N
|
||||
3 3 \N
|
||||
1 \N \N
|
||||
2 \N \N
|
||||
3 \N \N
|
||||
1 2 String
|
||||
1 \N \N
|
||||
2 2 \N
|
||||
2 \N \N
|
||||
3 3 \N
|
||||
3 \N \N
|
||||
OK
|
||||
OK
|
||||
|
@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
|
||||
USER_FILES_PATH=$(clickhouse client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
|
||||
|
||||
DATA_FILE=$USER_FILES_PATH/data_02293
|
||||
|
||||
@ -17,13 +17,13 @@ echo "JSONColumns"
|
||||
$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONColumns"
|
||||
$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONColumns" > $DATA_FILE
|
||||
$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONColumns)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns) order by a"
|
||||
|
||||
echo "JSONCompactColumns"
|
||||
$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONCompactColumns"
|
||||
$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONCompactColumns" > $DATA_FILE
|
||||
$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONCompactColumns)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns) order by c1, c2, c3"
|
||||
|
||||
echo "JSONColumnsWithMetadata"
|
||||
$CLICKHOUSE_CLIENT -q "select sum(a) as sum, avg(a) as avg from test_02293 group by a % 4 with totals order by tuple(sum, avg) format JSONColumnsWithMetadata" --extremes=1 | grep -v "elapsed"
|
||||
@ -49,9 +49,9 @@ echo '
|
||||
' > $DATA_FILE
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONColumns)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns, 'a UInt32, t String') settings input_format_skip_unknown_fields=0" 2>&1 | grep -F -q 'INCORRECT_DATA' && echo 'OK' || echo 'FAIL'
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns, 'a UInt32, t String') settings input_format_skip_unknown_fields=1"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns) order by b, a, c, d"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns, 'a UInt32, t String') order by a, t settings input_format_skip_unknown_fields=0" 2>&1 | grep -F -q 'INCORRECT_DATA' && echo 'OK' || echo 'FAIL'
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns, 'a UInt32, t String') order by a, t settings input_format_skip_unknown_fields=1"
|
||||
|
||||
echo '
|
||||
[
|
||||
@ -75,8 +75,8 @@ echo '
|
||||
' > $DATA_FILE
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONCompactColumns)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns, 'a UInt32, t UInt32')" 2>&1 | grep -F -q 'INCORRECT_DATA' && echo 'OK' || echo 'FAIL'
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns) order by c1, c2, c3"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns, 'a UInt32, t UInt32') order by a, t" 2>&1 | grep -F -q 'INCORRECT_DATA' && echo 'OK' || echo 'FAIL'
|
||||
|
||||
echo '
|
||||
{
|
||||
|
@ -3,10 +3,10 @@
|
||||
insert into function file(data_02314.csv) select number, number + 1 from numbers(5) settings engine_file_truncate_on_insert=1;
|
||||
insert into function file(data_02314.csv) select number, number + 1, number + 2 from numbers(5);
|
||||
desc file(data_02314.csv) settings input_format_csv_skip_first_lines=5;
|
||||
select * from file(data_02314.csv) settings input_format_csv_skip_first_lines=5;
|
||||
select * from file(data_02314.csv) order by c1 settings input_format_csv_skip_first_lines=5;
|
||||
|
||||
insert into function file(data_02314.tsv) select number, number + 1 from numbers(5) settings engine_file_truncate_on_insert=1;
|
||||
insert into function file(data_02314.tsv) select number, number + 1, number + 2 from numbers(5);
|
||||
desc file(data_02314.tsv) settings input_format_tsv_skip_first_lines=5;
|
||||
select * from file(data_02314.tsv) settings input_format_tsv_skip_first_lines=5;
|
||||
select * from file(data_02314.tsv) order by c1 settings input_format_tsv_skip_first_lines=5;
|
||||
|
||||
|
@ -6,24 +6,24 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
|
||||
UNIQ_DEST_PATH=$USER_FILES_PATH/test-02383-$RANDOM-$RANDOM
|
||||
mkdir -p $UNIQ_DEST_PATH
|
||||
|
||||
mkdir -p $USER_FILES_PATH/test_02383
|
||||
cp $CURDIR/data_arrow/dictionary*.arrow $USER_FILES_PATH/test_02383/
|
||||
cp $CURDIR/data_arrow/corrupted.arrow $USER_FILES_PATH/test_02383/
|
||||
cp $CURDIR/data_arrow/dict_with_nulls.arrow $USER_FILES_PATH/test_02383/
|
||||
cp $CURDIR/data_arrow/dictionary*.arrow $UNIQ_DEST_PATH/
|
||||
cp $CURDIR/data_arrow/corrupted.arrow $UNIQ_DEST_PATH/
|
||||
cp $CURDIR/data_arrow/dict_with_nulls.arrow $UNIQ_DEST_PATH/
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "desc file('test_02383/dictionary1.arrow')"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file('test_02383/dictionary1.arrow')"
|
||||
$CLICKHOUSE_CLIENT -q "desc file('test_02383/dictionary2.arrow')"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file('test_02383/dictionary2.arrow')"
|
||||
$CLICKHOUSE_CLIENT -q "desc file('test_02383/dictionary3.arrow')"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file('test_02383/dictionary3.arrow')"
|
||||
$CLICKHOUSE_CLIENT -q "desc file('$UNIQ_DEST_PATH/dictionary1.arrow')"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file('$UNIQ_DEST_PATH/dictionary1.arrow') settings max_threads=1"
|
||||
$CLICKHOUSE_CLIENT -q "desc file('$UNIQ_DEST_PATH/dictionary2.arrow')"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file('$UNIQ_DEST_PATH/dictionary2.arrow') settings max_threads=1"
|
||||
$CLICKHOUSE_CLIENT -q "desc file('$UNIQ_DEST_PATH/dictionary3.arrow')"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file('$UNIQ_DEST_PATH/dictionary3.arrow') settings max_threads=1"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "desc file('test_02383/corrupted.arrow')"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file('test_02383/corrupted.arrow')" 2>&1 | grep -F -q "INCORRECT_DATA" && echo OK || echo FAIL
|
||||
$CLICKHOUSE_CLIENT -q "desc file('$UNIQ_DEST_PATH/corrupted.arrow')"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file('$UNIQ_DEST_PATH/corrupted.arrow')" 2>&1 | grep -F -q "INCORRECT_DATA" && echo OK || echo FAIL
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "desc file('test_02383/dict_with_nulls.arrow')"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file('test_02383/dict_with_nulls.arrow')"
|
||||
$CLICKHOUSE_CLIENT -q "desc file('$UNIQ_DEST_PATH/dict_with_nulls.arrow')"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file('$UNIQ_DEST_PATH/dict_with_nulls.arrow') settings max_threads=1"
|
||||
|
||||
|
||||
rm -rf $USER_FILES_PATH/test_02383
|
||||
rm -rf $UNIQ_DEST_PATH
|
||||
|
@ -1,11 +1,15 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-parallel
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
user_files_path=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}')
|
||||
cp "$CUR_DIR"/data_csv/10m_rows.csv.xz $user_files_path/
|
||||
USER_FILES_PATH=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}')
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('$user_files_path/10m_rows.csv.xz' , 'CSVWithNames') LIMIT 1 settings max_memory_usage=1000000000"
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('$user_files_path/10m_rows.csv.xz' , 'CSVWithNames') LIMIT 1 settings max_memory_usage=100000000"
|
||||
cp "$CUR_DIR"/data_csv/10m_rows.csv.xz $USER_FILES_PATH/
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings max_memory_usage=1000000000"
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings max_memory_usage=100000000"
|
||||
|
||||
rm $USER_FILES_PATH/10m_rows.csv.xz
|
||||
|
Loading…
Reference in New Issue
Block a user