Merge pull request #68298 from Avogar/fix-nullable-schema-inference

Fix using schema_inference_make_columns_nullable=0
This commit is contained in:
Kruglov Pavel 2024-08-26 11:52:34 +00:00 committed by GitHub
commit e361417ff6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 78 additions and 35 deletions

View File

@ -1389,7 +1389,7 @@ DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : nul
#### schema_inference_make_columns_nullable #### schema_inference_make_columns_nullable
Controls making inferred types `Nullable` in schema inference for formats without information about nullability. Controls making inferred types `Nullable` in schema inference for formats without information about nullability.
If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will be `Nullable` only if `input_format_null_as_default` is disabled and the column contains `NULL` in a sample that is parsed during schema inference. If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will never be `Nullable`, if set to `auto`, the inferred type will be `Nullable` only if the column contains `NULL` in a sample that is parsed during schema inference or file metadata contains information about column nullability.
Enabled by default. Enabled by default.
@ -1412,15 +1412,13 @@ DESC format(JSONEachRow, $$
└─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ └─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
``` ```
```sql ```sql
SET schema_inference_make_columns_nullable = 0; SET schema_inference_make_columns_nullable = 'auto';
SET input_format_null_as_default = 0;
DESC format(JSONEachRow, $$ DESC format(JSONEachRow, $$
{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]} {"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}
{"id" : 2, "age" : 19, "name" : "Alan", "status" : "married", "hobbies" : ["tennis", "art"]} {"id" : 2, "age" : 19, "name" : "Alan", "status" : "married", "hobbies" : ["tennis", "art"]}
$$) $$)
``` ```
```response ```response
┌─name────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ ┌─name────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ id │ Int64 │ │ │ │ │ │ │ id │ Int64 │ │ │ │ │ │
│ age │ Int64 │ │ │ │ │ │ │ age │ Int64 │ │ │ │ │ │
@ -1432,7 +1430,6 @@ DESC format(JSONEachRow, $$
```sql ```sql
SET schema_inference_make_columns_nullable = 0; SET schema_inference_make_columns_nullable = 0;
SET input_format_null_as_default = 1;
DESC format(JSONEachRow, $$ DESC format(JSONEachRow, $$
{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]} {"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}
{"id" : 2, "age" : 19, "name" : "Alan", "status" : "married", "hobbies" : ["tennis", "art"]} {"id" : 2, "age" : 19, "name" : "Alan", "status" : "married", "hobbies" : ["tennis", "art"]}

View File

@ -171,8 +171,8 @@ If the `schema_inference_hints` is not formated properly, or if there is a typo
## schema_inference_make_columns_nullable {#schema_inference_make_columns_nullable} ## schema_inference_make_columns_nullable {#schema_inference_make_columns_nullable}
Controls making inferred types `Nullable` in schema inference for formats without information about nullability. Controls making inferred types `Nullable` in schema inference.
If the setting is enabled, the inferred type will be `Nullable` only if column contains `NULL` in a sample that is parsed during schema inference. If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will never be `Nullable`, if set to `auto`, the inferred type will be `Nullable` only if the column contains `NULL` in a sample that is parsed during schema inference or file metadata contains information about column nullability.
Default value: `true`. Default value: `true`.

View File

@ -1120,7 +1120,7 @@ class IColumn;
M(String, column_names_for_schema_inference, "", "The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...'", 0) \ M(String, column_names_for_schema_inference, "", "The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...'", 0) \
M(String, schema_inference_hints, "", "The list of column names and types to use in schema inference for formats without column names. The format: 'column_name1 column_type1, column_name2 column_type2, ...'", 0) \ M(String, schema_inference_hints, "", "The list of column names and types to use in schema inference for formats without column names. The format: 'column_name1 column_type1, column_name2 column_type2, ...'", 0) \
M(SchemaInferenceMode, schema_inference_mode, "default", "Mode of schema inference. 'default' - assume that all files have the same schema and schema can be inferred from any file, 'union' - files can have different schemas and the resulting schema should be the a union of schemas of all files", 0) \ M(SchemaInferenceMode, schema_inference_mode, "default", "Mode of schema inference. 'default' - assume that all files have the same schema and schema can be inferred from any file, 'union' - files can have different schemas and the resulting schema should be the a union of schemas of all files", 0) \
M(Bool, schema_inference_make_columns_nullable, true, "If set to true, all inferred types will be Nullable in schema inference for formats without information about nullability.", 0) \ M(UInt64Auto, schema_inference_make_columns_nullable, 1, "If set to true, all inferred types will be Nullable in schema inference. When set to false, no columns will be converted to Nullable. When set to 'auto', ClickHouse will use information about nullability from the data.", 0) \
M(Bool, input_format_json_read_bools_as_numbers, true, "Allow to parse bools as numbers in JSON input formats", 0) \ M(Bool, input_format_json_read_bools_as_numbers, true, "Allow to parse bools as numbers in JSON input formats", 0) \
M(Bool, input_format_json_read_bools_as_strings, true, "Allow to parse bools as strings in JSON input formats", 0) \ M(Bool, input_format_json_read_bools_as_strings, true, "Allow to parse bools as strings in JSON input formats", 0) \
M(Bool, input_format_json_try_infer_numbers_from_strings, false, "Try to infer numbers from string fields while schema inference", 0) \ M(Bool, input_format_json_try_infer_numbers_from_strings, false, "Try to infer numbers from string fields while schema inference", 0) \

View File

@ -257,7 +257,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
format_settings.max_bytes_to_read_for_schema_inference = settings.input_format_max_bytes_to_read_for_schema_inference; format_settings.max_bytes_to_read_for_schema_inference = settings.input_format_max_bytes_to_read_for_schema_inference;
format_settings.column_names_for_schema_inference = settings.column_names_for_schema_inference; format_settings.column_names_for_schema_inference = settings.column_names_for_schema_inference;
format_settings.schema_inference_hints = settings.schema_inference_hints; format_settings.schema_inference_hints = settings.schema_inference_hints;
format_settings.schema_inference_make_columns_nullable = settings.schema_inference_make_columns_nullable; format_settings.schema_inference_make_columns_nullable = settings.schema_inference_make_columns_nullable.valueOr(2);
format_settings.mysql_dump.table_name = settings.input_format_mysql_dump_table_name; format_settings.mysql_dump.table_name = settings.input_format_mysql_dump_table_name;
format_settings.mysql_dump.map_column_names = settings.input_format_mysql_dump_map_column_names; format_settings.mysql_dump.map_column_names = settings.input_format_mysql_dump_map_column_names;
format_settings.sql_insert.max_batch_size = settings.output_format_sql_insert_max_batch_size; format_settings.sql_insert.max_batch_size = settings.output_format_sql_insert_max_batch_size;

View File

@ -77,7 +77,7 @@ struct FormatSettings
Raw Raw
}; };
bool schema_inference_make_columns_nullable = true; UInt64 schema_inference_make_columns_nullable = 1;
DateTimeOutputFormat date_time_output_format = DateTimeOutputFormat::Simple; DateTimeOutputFormat date_time_output_format = DateTimeOutputFormat::Simple;

View File

@ -1344,7 +1344,11 @@ namespace
if (checkCharCaseInsensitive('n', buf)) if (checkCharCaseInsensitive('n', buf))
{ {
if (checkStringCaseInsensitive("ull", buf)) if (checkStringCaseInsensitive("ull", buf))
return std::make_shared<DataTypeNullable>(std::make_shared<DataTypeNothing>()); {
if (settings.schema_inference_make_columns_nullable == 0)
return std::make_shared<DataTypeNothing>();
return makeNullable(std::make_shared<DataTypeNothing>());
}
else if (checkStringCaseInsensitive("an", buf)) else if (checkStringCaseInsensitive("an", buf))
return std::make_shared<DataTypeFloat64>(); return std::make_shared<DataTypeFloat64>();
} }

View File

@ -54,13 +54,8 @@ void checkFinalInferredType(
type = default_type; type = default_type;
} }
if (settings.schema_inference_make_columns_nullable) if (settings.schema_inference_make_columns_nullable == 1)
type = makeNullableRecursively(type); type = makeNullableRecursively(type);
/// In case when data for some column could contain nulls and regular values,
/// resulting inferred type is Nullable.
/// If input_format_null_as_default is enabled, we should remove Nullable type.
else if (settings.null_as_default)
type = removeNullable(type);
} }
void ISchemaReader::transformTypesIfNeeded(DB::DataTypePtr & type, DB::DataTypePtr & new_type) void ISchemaReader::transformTypesIfNeeded(DB::DataTypePtr & type, DB::DataTypePtr & new_type)

View File

@ -204,8 +204,11 @@ NamesAndTypesList ArrowSchemaReader::readSchema()
schema = file_reader->schema(); schema = file_reader->schema();
auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader( auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(
*schema, stream ? "ArrowStream" : "Arrow", format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference); *schema,
if (format_settings.schema_inference_make_columns_nullable) stream ? "ArrowStream" : "Arrow",
format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference,
format_settings.schema_inference_make_columns_nullable != 0);
if (format_settings.schema_inference_make_columns_nullable == 1)
return getNamesAndRecursivelyNullableTypes(header); return getNamesAndRecursivelyNullableTypes(header);
return header.getNamesAndTypesList(); return header.getNamesAndTypesList();
} }

View File

@ -727,6 +727,7 @@ struct ReadColumnFromArrowColumnSettings
FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior; FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior;
bool allow_arrow_null_type; bool allow_arrow_null_type;
bool skip_columns_with_unsupported_types; bool skip_columns_with_unsupported_types;
bool allow_inferring_nullable_columns;
}; };
static ColumnWithTypeAndName readColumnFromArrowColumn( static ColumnWithTypeAndName readColumnFromArrowColumn(
@ -1109,7 +1110,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
bool is_map_nested_column, bool is_map_nested_column,
const ReadColumnFromArrowColumnSettings & settings) const ReadColumnFromArrowColumnSettings & settings)
{ {
bool read_as_nullable_column = arrow_column->null_count() || is_nullable_column || (type_hint && type_hint->isNullable()); bool read_as_nullable_column = (arrow_column->null_count() || is_nullable_column || (type_hint && type_hint->isNullable())) && settings.allow_inferring_nullable_columns;
if (read_as_nullable_column && if (read_as_nullable_column &&
arrow_column->type()->id() != arrow::Type::LIST && arrow_column->type()->id() != arrow::Type::LIST &&
arrow_column->type()->id() != arrow::Type::LARGE_LIST && arrow_column->type()->id() != arrow::Type::LARGE_LIST &&
@ -1173,14 +1174,16 @@ static std::shared_ptr<arrow::ChunkedArray> createArrowColumn(const std::shared_
Block ArrowColumnToCHColumn::arrowSchemaToCHHeader( Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(
const arrow::Schema & schema, const arrow::Schema & schema,
const std::string & format_name, const std::string & format_name,
bool skip_columns_with_unsupported_types) bool skip_columns_with_unsupported_types,
bool allow_inferring_nullable_columns)
{ {
ReadColumnFromArrowColumnSettings settings ReadColumnFromArrowColumnSettings settings
{ {
.format_name = format_name, .format_name = format_name,
.date_time_overflow_behavior = FormatSettings::DateTimeOverflowBehavior::Ignore, .date_time_overflow_behavior = FormatSettings::DateTimeOverflowBehavior::Ignore,
.allow_arrow_null_type = false, .allow_arrow_null_type = false,
.skip_columns_with_unsupported_types = skip_columns_with_unsupported_types .skip_columns_with_unsupported_types = skip_columns_with_unsupported_types,
.allow_inferring_nullable_columns = allow_inferring_nullable_columns,
}; };
ColumnsWithTypeAndName sample_columns; ColumnsWithTypeAndName sample_columns;
@ -1254,7 +1257,8 @@ Chunk ArrowColumnToCHColumn::arrowColumnsToCHChunk(const NameToArrowColumn & nam
.format_name = format_name, .format_name = format_name,
.date_time_overflow_behavior = date_time_overflow_behavior, .date_time_overflow_behavior = date_time_overflow_behavior,
.allow_arrow_null_type = true, .allow_arrow_null_type = true,
.skip_columns_with_unsupported_types = false .skip_columns_with_unsupported_types = false,
.allow_inferring_nullable_columns = true
}; };
Columns columns; Columns columns;

View File

@ -34,7 +34,8 @@ public:
static Block arrowSchemaToCHHeader( static Block arrowSchemaToCHHeader(
const arrow::Schema & schema, const arrow::Schema & schema,
const std::string & format_name, const std::string & format_name,
bool skip_columns_with_unsupported_types = false); bool skip_columns_with_unsupported_types = false,
bool allow_inferring_nullable_columns = true);
struct DictionaryInfo struct DictionaryInfo
{ {

View File

@ -1002,7 +1002,7 @@ NamesAndTypesList NativeORCSchemaReader::readSchema()
header.insert(ColumnWithTypeAndName{type, name}); header.insert(ColumnWithTypeAndName{type, name});
} }
if (format_settings.schema_inference_make_columns_nullable) if (format_settings.schema_inference_make_columns_nullable == 1)
return getNamesAndRecursivelyNullableTypes(header); return getNamesAndRecursivelyNullableTypes(header);
return header.getNamesAndTypesList(); return header.getNamesAndTypesList();
} }

View File

@ -160,8 +160,11 @@ NamesAndTypesList ORCSchemaReader::readSchema()
{ {
initializeIfNeeded(); initializeIfNeeded();
auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader( auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(
*schema, "ORC", format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference); *schema,
if (format_settings.schema_inference_make_columns_nullable) "ORC",
format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference,
format_settings.schema_inference_make_columns_nullable != 0);
if (format_settings.schema_inference_make_columns_nullable == 1)
return getNamesAndRecursivelyNullableTypes(header); return getNamesAndRecursivelyNullableTypes(header);
return header.getNamesAndTypesList(); return header.getNamesAndTypesList();
} }

View File

@ -869,8 +869,11 @@ NamesAndTypesList ParquetSchemaReader::readSchema()
THROW_ARROW_NOT_OK(parquet::arrow::FromParquetSchema(metadata->schema(), &schema)); THROW_ARROW_NOT_OK(parquet::arrow::FromParquetSchema(metadata->schema(), &schema));
auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader( auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(
*schema, "Parquet", format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference); *schema,
if (format_settings.schema_inference_make_columns_nullable) "Parquet",
format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference,
format_settings.schema_inference_make_columns_nullable != 0);
if (format_settings.schema_inference_make_columns_nullable == 1)
return getNamesAndRecursivelyNullableTypes(header); return getNamesAndRecursivelyNullableTypes(header);
return header.getNamesAndTypesList(); return header.getNamesAndTypesList();
} }

View File

@ -18,7 +18,7 @@ desc format(JSONEachRow, '{"x" : [[], [null], [1, 2, 3]]}');
desc format(JSONEachRow, '{"x" : [{"a" : null}, {"b" : 1}]}'); desc format(JSONEachRow, '{"x" : [{"a" : null}, {"b" : 1}]}');
desc format(JSONEachRow, '{"x" : [["2020-01-01", null, "1234"], ["abcd"]]}'); desc format(JSONEachRow, '{"x" : [["2020-01-01", null, "1234"], ["abcd"]]}');
set schema_inference_make_columns_nullable=0; set schema_inference_make_columns_nullable='auto';
desc format(JSONEachRow, '{"x" : [1, 2]}'); desc format(JSONEachRow, '{"x" : [1, 2]}');
desc format(JSONEachRow, '{"x" : [null, 1]}'); desc format(JSONEachRow, '{"x" : [null, 1]}');
desc format(JSONEachRow, '{"x" : [1, 2]}, {"x" : [3]}'); desc format(JSONEachRow, '{"x" : [1, 2]}, {"x" : [3]}');
@ -40,7 +40,7 @@ desc format(JSONCompactEachRow, '[[[], [null], [1, 2, 3]]]');
desc format(JSONCompactEachRow, '[[{"a" : null}, {"b" : 1}]]'); desc format(JSONCompactEachRow, '[[{"a" : null}, {"b" : 1}]]');
desc format(JSONCompactEachRow, '[[["2020-01-01", null, "1234"], ["abcd"]]]'); desc format(JSONCompactEachRow, '[[["2020-01-01", null, "1234"], ["abcd"]]]');
set schema_inference_make_columns_nullable=0; set schema_inference_make_columns_nullable='auto';
desc format(JSONCompactEachRow, '[[1, 2]]'); desc format(JSONCompactEachRow, '[[1, 2]]');
desc format(JSONCompactEachRow, '[[null, 1]]'); desc format(JSONCompactEachRow, '[[null, 1]]');
desc format(JSONCompactEachRow, '[[1, 2]], [[3]]'); desc format(JSONCompactEachRow, '[[1, 2]], [[3]]');
@ -59,7 +59,7 @@ desc format(CSV, '"[[], [null], [1, 2, 3]]"');
desc format(CSV, '"[{\'a\' : null}, {\'b\' : 1}]"'); desc format(CSV, '"[{\'a\' : null}, {\'b\' : 1}]"');
desc format(CSV, '"[[\'2020-01-01\', null, \'1234\'], [\'abcd\']]"'); desc format(CSV, '"[[\'2020-01-01\', null, \'1234\'], [\'abcd\']]"');
set schema_inference_make_columns_nullable=0; set schema_inference_make_columns_nullable='auto';
desc format(CSV, '"[1,2]"'); desc format(CSV, '"[1,2]"');
desc format(CSV, '"[NULL, 1]"'); desc format(CSV, '"[NULL, 1]"');
desc format(CSV, '"[1, 2]"\n"[3]"'); desc format(CSV, '"[1, 2]"\n"[3]"');

View File

@ -1,7 +1,7 @@
desc format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=1; desc format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=1;
select * from format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=1; select * from format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=1;
desc format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=0, input_format_null_as_default=0; desc format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable='auto', input_format_null_as_default=0;
select * from format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=0, input_format_null_as_default=0; select * from format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable='auto', input_format_null_as_default=0;
desc format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=0, input_format_null_as_default=1; desc format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=0, input_format_null_as_default=1;
select * from format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=0, input_format_null_as_default=1; select * from format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=0, input_format_null_as_default=1;

View File

@ -1,40 +1,66 @@
Parquet Parquet
a UInt64 a UInt64
a_nullable Nullable(UInt64) a_nullable Nullable(UInt64)
a UInt64
a_nullable UInt64
Arrow Arrow
a UInt64 a UInt64
a_nullable Nullable(UInt64) a_nullable Nullable(UInt64)
a UInt64
a_nullable UInt64
Parquet Parquet
b Array(UInt64) b Array(UInt64)
b_nullable Array(Nullable(UInt64)) b_nullable Array(Nullable(UInt64))
b Array(UInt64)
b_nullable Array(UInt64)
Arrow Arrow
b Array(Nullable(UInt64)) b Array(Nullable(UInt64))
b_nullable Array(Nullable(UInt64)) b_nullable Array(Nullable(UInt64))
b Array(UInt64)
b_nullable Array(UInt64)
Parquet Parquet
c Tuple(\n a UInt64,\n b String) c Tuple(\n a UInt64,\n b String)
c_nullable Tuple(\n a Nullable(UInt64),\n b Nullable(String)) c_nullable Tuple(\n a Nullable(UInt64),\n b Nullable(String))
c Tuple(\n a UInt64,\n b String)
c_nullable Tuple(\n a UInt64,\n b String)
Arrow Arrow
c Tuple(\n a UInt64,\n b String) c Tuple(\n a UInt64,\n b String)
c_nullable Tuple(\n a Nullable(UInt64),\n b Nullable(String)) c_nullable Tuple(\n a Nullable(UInt64),\n b Nullable(String))
c Tuple(\n a UInt64,\n b String)
c_nullable Tuple(\n a UInt64,\n b String)
Parquet Parquet
d Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String),\n d_nullable Tuple(\n a UInt64,\n b Tuple(\n a Nullable(UInt64),\n b Nullable(String)))) d Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String),\n d_nullable Tuple(\n a UInt64,\n b Tuple(\n a Nullable(UInt64),\n b Nullable(String))))
d Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String),\n d_nullable Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String)))
Arrow Arrow
d Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String),\n d_nullable Tuple(\n a UInt64,\n b Tuple(\n a Nullable(UInt64),\n b Nullable(String)))) d Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String),\n d_nullable Tuple(\n a UInt64,\n b Tuple(\n a Nullable(UInt64),\n b Nullable(String))))
d Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String),\n d_nullable Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String)))
Parquet Parquet
e Map(UInt64, String) e Map(UInt64, String)
e_nullable Map(UInt64, Nullable(String)) e_nullable Map(UInt64, Nullable(String))
e Map(UInt64, String)
e_nullable Map(UInt64, String)
Arrow Arrow
e Map(UInt64, Nullable(String)) e Map(UInt64, Nullable(String))
e_nullable Map(UInt64, Nullable(String)) e_nullable Map(UInt64, Nullable(String))
e Map(UInt64, String)
e_nullable Map(UInt64, String)
Parquet Parquet
f Map(UInt64, Map(UInt64, String)) f Map(UInt64, Map(UInt64, String))
f_nullables Map(UInt64, Map(UInt64, Nullable(String))) f_nullables Map(UInt64, Map(UInt64, Nullable(String)))
f Map(UInt64, Map(UInt64, String))
f_nullables Map(UInt64, Map(UInt64, String))
Arrow Arrow
f Map(UInt64, Map(UInt64, Nullable(String))) f Map(UInt64, Map(UInt64, Nullable(String)))
f_nullables Map(UInt64, Map(UInt64, Nullable(String))) f_nullables Map(UInt64, Map(UInt64, Nullable(String)))
f Map(UInt64, Map(UInt64, String))
f_nullables Map(UInt64, Map(UInt64, String))
Parquet Parquet
g String g String
g_nullable Nullable(String) g_nullable Nullable(String)
g String
g_nullable String
Arrow Arrow
g LowCardinality(String) g LowCardinality(String)
g_nullable LowCardinality(String) g_nullable LowCardinality(String)
g LowCardinality(String)
g_nullable LowCardinality(String)

View File

@ -14,6 +14,7 @@ for format in $formats
do do
echo $format echo $format
$CLICKHOUSE_LOCAL -q "select * from generateRandom('a UInt64, a_nullable Nullable(UInt64)', 42) limit 10 format $format" > $DATA_FILE $CLICKHOUSE_LOCAL -q "select * from generateRandom('a UInt64, a_nullable Nullable(UInt64)', 42) limit 10 format $format" > $DATA_FILE
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'"
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0"
done done
@ -21,6 +22,7 @@ for format in $formats
do do
echo $format echo $format
$CLICKHOUSE_LOCAL -q "select * from generateRandom('b Array(UInt64), b_nullable Array(Nullable(UInt64))', 42) limit 10 format $format" > $DATA_FILE $CLICKHOUSE_LOCAL -q "select * from generateRandom('b Array(UInt64), b_nullable Array(Nullable(UInt64))', 42) limit 10 format $format" > $DATA_FILE
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'"
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0"
done done
@ -28,6 +30,7 @@ for format in $formats
do do
echo $format echo $format
$CLICKHOUSE_LOCAL -q "select * from generateRandom('c Tuple(a UInt64, b String), c_nullable Tuple(a Nullable(UInt64), b Nullable(String))', 42) limit 10 format $format" > $DATA_FILE $CLICKHOUSE_LOCAL -q "select * from generateRandom('c Tuple(a UInt64, b String), c_nullable Tuple(a Nullable(UInt64), b Nullable(String))', 42) limit 10 format $format" > $DATA_FILE
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'"
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0"
done done
@ -35,6 +38,7 @@ for format in $formats
do do
echo $format echo $format
$CLICKHOUSE_LOCAL -q "select * from generateRandom('d Tuple(a UInt64, b Tuple(a UInt64, b String), d_nullable Tuple(a UInt64, b Tuple(a Nullable(UInt64), b Nullable(String))))', 42) limit 10 format $format" > $DATA_FILE $CLICKHOUSE_LOCAL -q "select * from generateRandom('d Tuple(a UInt64, b Tuple(a UInt64, b String), d_nullable Tuple(a UInt64, b Tuple(a Nullable(UInt64), b Nullable(String))))', 42) limit 10 format $format" > $DATA_FILE
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'"
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0"
done done
@ -42,6 +46,7 @@ for format in $formats
do do
echo $format echo $format
$CLICKHOUSE_LOCAL -q "select * from generateRandom('e Map(UInt64, String), e_nullable Map(UInt64, Nullable(String))', 42) limit 10 format $format" > $DATA_FILE $CLICKHOUSE_LOCAL -q "select * from generateRandom('e Map(UInt64, String), e_nullable Map(UInt64, Nullable(String))', 42) limit 10 format $format" > $DATA_FILE
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'"
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0"
done done
@ -49,6 +54,7 @@ for format in $formats
do do
echo $format echo $format
$CLICKHOUSE_LOCAL -q "select * from generateRandom('f Map(UInt64, Map(UInt64, String)), f_nullables Map(UInt64, Map(UInt64, Nullable(String)))', 42) limit 10 format $format" > $DATA_FILE $CLICKHOUSE_LOCAL -q "select * from generateRandom('f Map(UInt64, Map(UInt64, String)), f_nullables Map(UInt64, Map(UInt64, Nullable(String)))', 42) limit 10 format $format" > $DATA_FILE
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'"
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0"
done done
@ -56,6 +62,7 @@ for format in $formats
do do
echo $format echo $format
$CLICKHOUSE_LOCAL -q "select * from generateRandom('g LowCardinality(String), g_nullable LowCardinality(Nullable(String))', 42) limit 10 settings output_format_arrow_low_cardinality_as_dictionary=1, allow_suspicious_low_cardinality_types=1 format $format" > $DATA_FILE $CLICKHOUSE_LOCAL -q "select * from generateRandom('g LowCardinality(String), g_nullable LowCardinality(Nullable(String))', 42) limit 10 settings output_format_arrow_low_cardinality_as_dictionary=1, allow_suspicious_low_cardinality_types=1 format $format" > $DATA_FILE
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'"
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0"
done done

View File

@ -1,2 +1,2 @@
x Nullable(Int64) x Nullable(Int64)
schema_inference_hints=, max_rows_to_read_for_schema_inference=25000, max_bytes_to_read_for_schema_inference=1000, schema_inference_make_columns_nullable=true, try_infer_integers=true, try_infer_dates=true, try_infer_datetimes=true, try_infer_datetimes_only_datetime64=false, try_infer_numbers_from_strings=false, read_bools_as_numbers=true, read_bools_as_strings=true, read_objects_as_strings=true, read_numbers_as_strings=true, read_arrays_as_strings=true, try_infer_objects_as_tuples=true, infer_incomplete_types_as_strings=true, try_infer_objects=false, use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects=false schema_inference_hints=, max_rows_to_read_for_schema_inference=25000, max_bytes_to_read_for_schema_inference=1000, schema_inference_make_columns_nullable=1, try_infer_integers=true, try_infer_dates=true, try_infer_datetimes=true, try_infer_datetimes_only_datetime64=false, try_infer_numbers_from_strings=false, read_bools_as_numbers=true, read_bools_as_strings=true, read_objects_as_strings=true, read_numbers_as_strings=true, read_arrays_as_strings=true, try_infer_objects_as_tuples=true, infer_incomplete_types_as_strings=true, try_infer_objects=false, use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects=false