mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
Fixes
This commit is contained in:
parent
5e4796ae16
commit
93fbe1d9c8
@ -673,7 +673,7 @@ std::optional<String> FormatFactory::tryGetFormatFromFileDescriptor(int fd)
|
||||
#elif defined(OS_DARWIN)
|
||||
char file_path[PATH_MAX] = {'\0'};
|
||||
if (fcntl(fd, F_GETPATH, file_path) != -1)
|
||||
return tryGetFormatFromFileName(file_path, false);
|
||||
return tryGetFormatFromFileName(file_path);
|
||||
return std::nullopt;
|
||||
#else
|
||||
(void)fd;
|
||||
|
@ -127,7 +127,6 @@ try
|
||||
IReadBufferIterator::Data iterator_data;
|
||||
std::vector<std::pair<NamesAndTypesList, String>> schemas_for_union_mode;
|
||||
std::string exception_messages;
|
||||
SchemaReaderPtr schema_reader;
|
||||
size_t max_rows_to_read = format_settings ? format_settings->max_rows_to_read_for_schema_inference
|
||||
: context->getSettingsRef().input_format_max_rows_to_read_for_schema_inference;
|
||||
size_t max_bytes_to_read = format_settings ? format_settings->max_bytes_to_read_for_schema_inference
|
||||
@ -227,6 +226,8 @@ try
|
||||
continue;
|
||||
}
|
||||
|
||||
SchemaReaderPtr schema_reader;
|
||||
|
||||
if (format_name)
|
||||
{
|
||||
try
|
||||
@ -417,12 +418,11 @@ try
|
||||
if (!format_name)
|
||||
throw Exception(ErrorCodes::CANNOT_DETECT_FORMAT, "The data format cannot be detected by the contents of the files. You can specify the format manually");
|
||||
|
||||
/// If we got all schemas from cache, schema_reader can be uninitialized.
|
||||
/// But we still need some stateless methods of ISchemaReader,
|
||||
/// let's initialize it with empty buffer.
|
||||
/// We need some stateless methods of ISchemaReader, but during reading schema we
|
||||
/// could not even create a schema reader (for example when we got schema from cache).
|
||||
/// Let's create stateless schema reader from empty read buffer.
|
||||
EmptyReadBuffer empty;
|
||||
if (!schema_reader)
|
||||
schema_reader = FormatFactory::instance().getSchemaReader(*format_name, empty, context, format_settings);
|
||||
SchemaReaderPtr stateless_schema_reader = FormatFactory::instance().getSchemaReader(*format_name, empty, context, format_settings);
|
||||
|
||||
if (mode == SchemaInferenceMode::UNION)
|
||||
{
|
||||
@ -449,7 +449,7 @@ try
|
||||
/// If types are not the same, try to transform them according
|
||||
/// to the format to find common type.
|
||||
auto new_type_copy = type;
|
||||
schema_reader->transformTypesFromDifferentFilesIfNeeded(it->second, new_type_copy);
|
||||
stateless_schema_reader->transformTypesFromDifferentFilesIfNeeded(it->second, new_type_copy);
|
||||
|
||||
/// If types are not the same after transform, we cannot do anything, throw an exception.
|
||||
if (!it->second->equals(*new_type_copy))
|
||||
@ -495,7 +495,7 @@ try
|
||||
/// It will allow to execute simple data loading with query
|
||||
/// "INSERT INTO table SELECT * FROM ..."
|
||||
const auto & insertion_table = context->getInsertionTable();
|
||||
if (schema_reader && !schema_reader->hasStrictOrderOfColumns() && !insertion_table.empty())
|
||||
if (!stateless_schema_reader->hasStrictOrderOfColumns() && !insertion_table.empty())
|
||||
{
|
||||
auto storage = DatabaseCatalog::instance().getTable(insertion_table, context);
|
||||
auto metadata = storage->getInMemoryMetadataPtr();
|
||||
|
@ -8,4 +8,4 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
|
||||
cp $CUR_DIR/data_orc/corrupted.orc $USER_FILES_PATH/
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="select * from file('corrupted.orc')" 2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL'
|
||||
${CLICKHOUSE_CLIENT} --query="select * from file('corrupted.orc')" 2>&1 | grep -F -q 'CANNOT_EXTRACT_TABLE_STRUCTURE' && echo 'OK' || echo 'FAIL'
|
||||
|
@ -12,6 +12,6 @@ DATA_FILE=$USER_FILES_PATH/$FILE_NAME
|
||||
cp $CUR_DIR/data_parquet_bad_column/metadata_0.parquet $DATA_FILE
|
||||
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "desc file(test_02245.parquet)" 2>&1 | grep -qF "Cannot extract table structure" && echo "OK" || echo "FAIL"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(test_02245.parquet)" 2>&1 | grep -qF "CANNOT_EXTRACT_TABLE_STRUCTURE" && echo "OK" || echo "FAIL"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(test_02245.parquet) settings input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference=1"
|
||||
$CLICKHOUSE_CLIENT -q "select count(*) from file(test_02245.parquet) settings input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference=1"
|
||||
|
@ -1,7 +1,7 @@
|
||||
-- Tags: no-fasttest
|
||||
|
||||
insert into function file('02268_data.jsonl', 'TSV') select 1;
|
||||
select * from file('02268_data.jsonl'); --{serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED}
|
||||
select * from file('02268_data.jsonl'); --{serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
|
||||
|
||||
insert into function file('02268_data.jsonCompactEachRow', 'TSV') select 1;
|
||||
select * from file('02268_data.jsonCompactEachRow'); --{serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED}
|
||||
select * from file('02268_data.jsonCompactEachRow'); --{serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
|
||||
|
@ -23,7 +23,7 @@ $CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump) settings input_format_mys
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2', max_threads=1"
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'" 2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL'
|
||||
$CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'" 2>&1 | grep -F -q 'CANNOT_EXTRACT_TABLE_STRUCTURE' && echo 'OK' || echo 'FAIL'
|
||||
$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32)') settings input_format_mysql_dump_table_name='test 3'" 2>&1 | grep -F -q 'EMPTY_DATA_PASSED' && echo 'OK' || echo 'FAIL'
|
||||
|
||||
echo "dump2"
|
||||
|
@ -88,4 +88,4 @@ echo '
|
||||
}
|
||||
' > $DATA_FILE
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONColumns) settings input_format_max_rows_to_read_for_schema_inference=3, input_format_json_infer_incomplete_types_as_strings=0" 2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL'
|
||||
$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONColumns) settings input_format_max_rows_to_read_for_schema_inference=3, input_format_json_infer_incomplete_types_as_strings=0" 2>&1 | grep -F -q 'CANNOT_EXTRACT_TABLE_STRUCTURE' && echo 'OK' || echo 'FAIL'
|
||||
|
@ -15,11 +15,11 @@ mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR
|
||||
cp -r $CLIENT_SCHEMADIR/02327_* $SCHEMADIR/$SERVER_SCHEMADIR/
|
||||
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="desc file(data.pb) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL';
|
||||
$CLICKHOUSE_CLIENT --query="desc file(data.capnp) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL';
|
||||
$CLICKHOUSE_CLIENT --query="desc file(data.pb) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'The table structure cannot be extracted' && echo 'OK' || echo 'FAIL';
|
||||
$CLICKHOUSE_CLIENT --query="desc file(data.capnp) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'The table structure cannot be extracted' && echo 'OK' || echo 'FAIL';
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="create table test_protobuf engine=File(Protobuf) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL';
|
||||
$CLICKHOUSE_CLIENT --query="create table test_capnp engine=File(CapnProto) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL';
|
||||
$CLICKHOUSE_CLIENT --query="create table test_protobuf engine=File(Protobuf) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'The table structure cannot be extracted' && echo 'OK' || echo 'FAIL';
|
||||
$CLICKHOUSE_CLIENT --query="create table test_capnp engine=File(CapnProto) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'The table structure cannot be extracted' && echo 'OK' || echo 'FAIL';
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="desc file(data.pb) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty', input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference=1";
|
||||
$CLICKHOUSE_CLIENT --query="desc file(data.capnp) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty', input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference=1";
|
||||
|
@ -2,5 +2,5 @@
|
||||
set allow_experimental_object_type=1;
|
||||
desc format(JSONEachRow, '{"a" : {"b" : {"c" : 1, "d" : "str"}}}');
|
||||
set allow_experimental_object_type=0, input_format_json_read_objects_as_strings=0, input_format_json_try_infer_named_tuples_from_objects=0, input_format_json_read_numbers_as_strings=0;
|
||||
desc format(JSONEachRow, '{"a" : {"b" : {"c" : 1, "d" : "str"}}}'); -- {serverError 652}
|
||||
desc format(JSONEachRow, '{"a" : {"b" : {"c" : 1, "d" : "str"}}}'); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
-- Tags: no-fasttest
|
||||
|
||||
desc format(JSONEachRow, '{"x" : 1, "x" : 2}'); -- {serverError INCORRECT_DATA}
|
||||
desc format(JSONEachRow, '{"x" : 1, "y" : 2}\n{"x" : 2, "x" : 3}'); -- {serverError INCORRECT_DATA}
|
||||
desc format(CSVWithNames, 'a,b,a\n1,2,3'); -- {serverError INCORRECT_DATA}
|
||||
desc format(CSV, '1,2,3') settings column_names_for_schema_inference='a, b, a'; -- {serverError INCORRECT_DATA}
|
||||
desc format(JSONEachRow, '{"x" : 1, "x" : 2}'); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
|
||||
desc format(JSONEachRow, '{"x" : 1, "y" : 2}\n{"x" : 2, "x" : 3}'); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
|
||||
desc format(CSVWithNames, 'a,b,a\n1,2,3'); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
|
||||
desc format(CSV, '1,2,3') settings column_names_for_schema_inference='a, b, a'; -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
|
||||
|
||||
|
@ -10,14 +10,14 @@ set input_format_json_infer_incomplete_types_as_strings=0;
|
||||
insert into test select * from file(02458_data.jsonl);
|
||||
insert into test select x, 1 from file(02458_data.jsonl);
|
||||
insert into test select x, y from file(02458_data.jsonl);
|
||||
insert into test select x + 1, y from file(02458_data.jsonl); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA}
|
||||
insert into test select x + 1, y from file(02458_data.jsonl); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
|
||||
insert into test select x, z from file(02458_data.jsonl);
|
||||
|
||||
insert into test select * from file(02458_data.jsoncompacteachrow);
|
||||
insert into test select x, 1 from file(02458_data.jsoncompacteachrow); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA}
|
||||
insert into test select x, y from file(02458_data.jsoncompacteachrow); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA}
|
||||
insert into test select x + 1, y from file(02458_data.jsoncompacteachrow); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA}
|
||||
insert into test select x, z from file(02458_data.jsoncompacteachrow); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA}
|
||||
insert into test select x, 1 from file(02458_data.jsoncompacteachrow); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
|
||||
insert into test select x, y from file(02458_data.jsoncompacteachrow); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
|
||||
insert into test select x + 1, y from file(02458_data.jsoncompacteachrow); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
|
||||
insert into test select x, z from file(02458_data.jsoncompacteachrow); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
|
||||
|
||||
insert into test select * from input() format CSV 1,2
|
||||
insert into test select x, y from input() format CSV 1,2 -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
|
||||
|
@ -46,7 +46,7 @@ DROP DATABASE IF EXISTS test3;
|
||||
CREATE DATABASE test3 ENGINE = S3;
|
||||
USE test3;
|
||||
SELECT * FROM \"http://localhost:11111/test/a.myext\"
|
||||
""" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||:
|
||||
""" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "S3_ERROR" > /dev/null && echo "OK" || echo 'FAIL' ||:
|
||||
|
||||
${CLICKHOUSE_CLIENT} --multiline --multiquery -q """
|
||||
USE test3;
|
||||
|
@ -58,7 +58,7 @@ SELECT * FROM \"abacaba/file.tsv\"
|
||||
""" 2>&1 | tr '\n' ' ' | grep -oF "CANNOT_EXTRACT_TABLE_STRUCTURE"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT * FROM test_hdfs_4.\`http://localhost:11111/test/a.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||:
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/file.myext\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||:
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/file.myext\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "HDFS_ERROR" > /dev/null && echo "OK" || echo 'FAIL' ||:
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/test_02725_3.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "CANNOT_EXTRACT_TABLE_STRUCTURE" > /dev/null && echo "OK" || echo 'FAIL' ||:
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||:
|
||||
|
@ -39,13 +39,13 @@ desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/archive.tar :: data{1,2,3}.jsonl');
|
||||
"
|
||||
|
||||
echo 'Error' > $CLICKHOUSE_TEST_UNIQUE_NAME/data4.jsonl
|
||||
$CLICKHOUSE_LOCAL -q "desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data{1,2,3,4}.jsonl') settings schema_inference_mode='union'" 2>&1 | grep -c -F "Cannot extract table structure"
|
||||
$CLICKHOUSE_LOCAL -q "desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data{1,2,3,4}.jsonl') settings schema_inference_mode='union'" 2>&1 | grep -c -F "CANNOT_EXTRACT_TABLE_STRUCTURE"
|
||||
|
||||
$CLICKHOUSE_LOCAL -nm -q "
|
||||
set schema_inference_mode = 'union';
|
||||
desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data{2,3}.jsonl');
|
||||
desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data{1,2,3,4}.jsonl');
|
||||
" 2>&1 | grep -c -F "Cannot extract table structure"
|
||||
" 2>&1 | grep -c -F "CANNOT_EXTRACT_TABLE_STRUCTURE"
|
||||
|
||||
echo 42 > $CLICKHOUSE_TEST_UNIQUE_NAME/data1.csv
|
||||
echo 42, 43 > $CLICKHOUSE_TEST_UNIQUE_NAME/data2.csv
|
||||
|
Loading…
Reference in New Issue
Block a user