From 0875d4632b03ea6d59217c1f17126a72ee878af6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 17 Sep 2022 23:03:49 +0200 Subject: [PATCH 1/7] Fix trash in schema inference --- src/Formats/ReadSchemaUtils.cpp | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/src/Formats/ReadSchemaUtils.cpp b/src/Formats/ReadSchemaUtils.cpp index e6f2f694763..8468f540253 100644 --- a/src/Formats/ReadSchemaUtils.cpp +++ b/src/Formats/ReadSchemaUtils.cpp @@ -65,7 +65,7 @@ ColumnsDescription readSchemaFromFormat( } catch (Exception & e) { - e.addMessage("Cannot extract table structure from {} format file. You can specify the structure manually", format_name); + e.addMessage(fmt::format("Cannot extract table structure from {} format file. You can specify the structure manually", format_name)); throw; } } @@ -86,6 +86,12 @@ ColumnsDescription readSchemaFromFormat( break; is_eof = buf->eof(); } + catch (Exception & e) + { + e.addMessage(fmt::format( + "Cannot extract table structure from {} format file. You can specify the structure manually", format_name)); + throw; + } catch (...) { auto exception_message = getCurrentExceptionMessage(false); @@ -137,7 +143,21 @@ ColumnsDescription readSchemaFromFormat( } if (!retry || !isRetryableSchemaInferenceError(getCurrentExceptionCode())) - throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot extract table structure from {} format file. Error: {}. You can specify the structure manually", format_name, exception_message); + { + try + { + throw; + } + catch (Exception & e) + { + e.addMessage(fmt::format("Cannot extract table structure from {} format file. You can specify the structure manually", format_name)); + throw; + } + catch (...) + { + throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot extract table structure from {} format file. Error: {}. You can specify the structure manually", format_name, exception_message); + } + } exception_messages += "\n" + exception_message; } From 2149a4f2fc2745b38a2d0e23ebb2d118a712faa7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Sep 2022 00:52:20 +0200 Subject: [PATCH 2/7] Fix test --- .../02268_json_wrong_root_type_in_schema_inference.sql | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.sql b/tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.sql index 9d435820ce2..ab4c6c8216d 100644 --- a/tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.sql +++ b/tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.sql @@ -1,6 +1,5 @@ insert into function file('02268_data.jsonl', 'TSV') select 1; -select * from file('02268_data.jsonl'); --{serverError CANNOT_EXTRACT_TABLE_STRUCTURE} +select * from file('02268_data.jsonl'); --{serverError 117} insert into function file('02268_data.jsonCompactEachRow', 'TSV') select 1; -select * from file('02268_data.jsonCompactEachRow'); --{serverError CANNOT_EXTRACT_TABLE_STRUCTURE} - +select * from file('02268_data.jsonCompactEachRow'); --{serverError 117} From 795eb8a125b1becfab17bab2c889750a3314158a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Sep 2022 00:53:51 +0200 Subject: [PATCH 3/7] Fix test --- .../02268_json_wrong_root_type_in_schema_inference.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.sql b/tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.sql index ab4c6c8216d..7427426602a 100644 --- a/tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.sql +++ b/tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.sql @@ -1,3 +1,5 @@ +-- Tags: no-fasttest + insert into function file('02268_data.jsonl', 'TSV') select 1; select * from file('02268_data.jsonl'); --{serverError 117} From 6d1e851dcc31206e1271b6270f13e875049826bc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Sep 2022 05:25:21 +0200 Subject: [PATCH 4/7] Fix tests --- tests/queries/0_stateless/02185_orc_corrupted_file.sh | 3 +-- .../0_stateless/02245_parquet_skip_unknown_type.sh | 3 +-- .../0_stateless/02267_file_globs_schema_inference.sql | 2 +- .../queries/0_stateless/02286_mysql_dump_input_format.sh | 3 +-- tests/queries/0_stateless/02293_formats_json_columns.sh | 4 +--- .../02327_capnproto_protobuf_empty_messages.sh | 8 ++++---- 6 files changed, 9 insertions(+), 14 deletions(-) diff --git a/tests/queries/0_stateless/02185_orc_corrupted_file.sh b/tests/queries/0_stateless/02185_orc_corrupted_file.sh index 7d7a714cccc..c5f5e8710ca 100755 --- a/tests/queries/0_stateless/02185_orc_corrupted_file.sh +++ b/tests/queries/0_stateless/02185_orc_corrupted_file.sh @@ -8,5 +8,4 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') cp $CUR_DIR/data_orc/corrupted.orc $USER_FILES_PATH/ -${CLICKHOUSE_CLIENT} --query="select * from file('corrupted.orc')" 2>&1 | grep -F -q 'CANNOT_EXTRACT_TABLE_STRUCTURE' && echo 'OK' || echo 'FAIL' - +${CLICKHOUSE_CLIENT} --query="select * from file('corrupted.orc')" 2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL' diff --git a/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh b/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh index 005c089e434..23c46f4815e 100755 --- a/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh +++ b/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh @@ -12,7 +12,6 @@ DATA_FILE=$USER_FILES_PATH/$FILE_NAME cp $CUR_DIR/data_parquet_bad_column/metadata_0.parquet $DATA_FILE -$CLICKHOUSE_CLIENT -q "desc file(test_02245.parquet)" 2>&1 | grep -qF "CANNOT_EXTRACT_TABLE_STRUCTURE" && echo "OK" || echo "FAIL" +$CLICKHOUSE_CLIENT -q "desc file(test_02245.parquet)" 2>&1 | grep -qF "Cannot extract table structure" && echo "OK" || echo "FAIL" $CLICKHOUSE_CLIENT -q "desc file(test_02245.parquet) settings input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference=1" $CLICKHOUSE_CLIENT -q "select count(*) from file(test_02245.parquet) settings input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference=1" - diff --git a/tests/queries/0_stateless/02267_file_globs_schema_inference.sql b/tests/queries/0_stateless/02267_file_globs_schema_inference.sql index 56b99d80286..aaf4d0ce582 100644 --- a/tests/queries/0_stateless/02267_file_globs_schema_inference.sql +++ b/tests/queries/0_stateless/02267_file_globs_schema_inference.sql @@ -7,4 +7,4 @@ select * from file('02267_data*.jsonl') order by x; insert into function file('02267_data1.jsonl', 'TSV') select 1 as x; insert into function file('02267_data1.jsonl', 'TSV') select [1,2,3] as x; -select * from file('02267_data*.jsonl') settings schema_inference_use_cache_for_file=0; --{serverError CANNOT_EXTRACT_TABLE_STRUCTURE} +select * from file('02267_data*.jsonl') settings schema_inference_use_cache_for_file=0; --{serverError INCORRECT_DATA} diff --git a/tests/queries/0_stateless/02286_mysql_dump_input_format.sh b/tests/queries/0_stateless/02286_mysql_dump_input_format.sh index f3253e0bf13..891734e9ad3 100755 --- a/tests/queries/0_stateless/02286_mysql_dump_input_format.sh +++ b/tests/queries/0_stateless/02286_mysql_dump_input_format.sh @@ -23,7 +23,7 @@ $CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump) settings input_format_mys $CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'" $CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'" $CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'" -$CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'" 2>&1 | grep -F -q 'CANNOT_EXTRACT_TABLE_STRUCTURE' && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'" 2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL' $CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32)') settings input_format_mysql_dump_table_name='test 3'" 2>&1 | grep -F -q 'EMPTY_DATA_PASSED' && echo 'OK' || echo 'FAIL' echo "dump2" @@ -146,4 +146,3 @@ $CLICKHOUSE_CLIENT -q "desc file(dump15.sql, MySQLDump) settings input_format_my $CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'" rm $USER_FILES_PATH/dump*.sql - diff --git a/tests/queries/0_stateless/02293_formats_json_columns.sh b/tests/queries/0_stateless/02293_formats_json_columns.sh index 74d9a4f5aab..7a21f8d9bab 100755 --- a/tests/queries/0_stateless/02293_formats_json_columns.sh +++ b/tests/queries/0_stateless/02293_formats_json_columns.sh @@ -88,6 +88,4 @@ echo ' } ' > $DATA_FILE -$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONColumns) settings input_format_max_rows_to_read_for_schema_inference=3" 2>&1 | grep -F -q 'CANNOT_EXTRACT_TABLE_STRUCTURE' && echo 'OK' || echo 'FAIL' - - +$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONColumns) settings input_format_max_rows_to_read_for_schema_inference=3" 2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL' diff --git a/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh b/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh index 9de01dbe294..69e65112305 100755 --- a/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh +++ b/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh @@ -15,11 +15,11 @@ mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR cp -r $CLIENT_SCHEMADIR/02327_* $SCHEMADIR/$SERVER_SCHEMADIR/ -$CLICKHOUSE_CLIENT --query="desc file(data.pb) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'CANNOT_EXTRACT_TABLE_STRUCTURE' && echo 'OK' || echo 'FAIL'; -$CLICKHOUSE_CLIENT --query="desc file(data.capnp) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'CANNOT_EXTRACT_TABLE_STRUCTURE' && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="desc file(data.pb) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="desc file(data.capnp) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL'; -$CLICKHOUSE_CLIENT --query="create table test_protobuf engine=File(Protobuf) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'CANNOT_EXTRACT_TABLE_STRUCTURE' && echo 'OK' || echo 'FAIL'; -$CLICKHOUSE_CLIENT --query="create table test_capnp engine=File(CapnProto) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'CANNOT_EXTRACT_TABLE_STRUCTURE' && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="create table test_protobuf engine=File(Protobuf) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="create table test_capnp engine=File(CapnProto) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL'; $CLICKHOUSE_CLIENT --query="desc file(data.pb) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty', input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference=1"; $CLICKHOUSE_CLIENT --query="desc file(data.capnp) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty', input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference=1"; From 1d279ad6b60df111d5c801c3628f73cddef23d0f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Sep 2022 08:47:55 +0200 Subject: [PATCH 5/7] Fix tests --- tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh | 2 +- tests/queries/0_stateless/02267_file_globs_schema_inference.sql | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh b/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh index 23c46f4815e..1e416f23b69 100755 --- a/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh +++ b/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-parallel CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02267_file_globs_schema_inference.sql b/tests/queries/0_stateless/02267_file_globs_schema_inference.sql index aaf4d0ce582..6862d6f0602 100644 --- a/tests/queries/0_stateless/02267_file_globs_schema_inference.sql +++ b/tests/queries/0_stateless/02267_file_globs_schema_inference.sql @@ -1,4 +1,5 @@ -- Tags: no-fasttest, no-parallel + insert into function file('02267_data2.jsonl') select NULL as x; insert into function file('02267_data3.jsonl') select * from numbers(0); insert into function file('02267_data4.jsonl') select 1 as x; From 12e950ae95eb9e5f130dca564754614b87eeb8f6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Sep 2022 08:49:48 +0200 Subject: [PATCH 6/7] Fix tests --- tests/queries/0_stateless/02267_file_globs_schema_inference.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02267_file_globs_schema_inference.sql b/tests/queries/0_stateless/02267_file_globs_schema_inference.sql index 6862d6f0602..0a8cce9ef50 100644 --- a/tests/queries/0_stateless/02267_file_globs_schema_inference.sql +++ b/tests/queries/0_stateless/02267_file_globs_schema_inference.sql @@ -3,7 +3,7 @@ insert into function file('02267_data2.jsonl') select NULL as x; insert into function file('02267_data3.jsonl') select * from numbers(0); insert into function file('02267_data4.jsonl') select 1 as x; -select * from file('02267_data*.jsonl') order by x; +select * from file('02267_data{2..4}.jsonl') order by x; insert into function file('02267_data1.jsonl', 'TSV') select 1 as x; insert into function file('02267_data1.jsonl', 'TSV') select [1,2,3] as x; From 7e0f61791029fe5f7233d91d7e790f3dec4ff160 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Sep 2022 08:54:48 +0200 Subject: [PATCH 7/7] Fix tests --- tests/queries/0_stateless/02267_file_globs_schema_inference.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02267_file_globs_schema_inference.sql b/tests/queries/0_stateless/02267_file_globs_schema_inference.sql index 0a8cce9ef50..6862d6f0602 100644 --- a/tests/queries/0_stateless/02267_file_globs_schema_inference.sql +++ b/tests/queries/0_stateless/02267_file_globs_schema_inference.sql @@ -3,7 +3,7 @@ insert into function file('02267_data2.jsonl') select NULL as x; insert into function file('02267_data3.jsonl') select * from numbers(0); insert into function file('02267_data4.jsonl') select 1 as x; -select * from file('02267_data{2..4}.jsonl') order by x; +select * from file('02267_data*.jsonl') order by x; insert into function file('02267_data1.jsonl', 'TSV') select 1 as x; insert into function file('02267_data1.jsonl', 'TSV') select [1,2,3] as x;