mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 17:12:03 +00:00
Backport #68298 to 24.8: Fix using schema_inference_make_columns_nullable=0
This commit is contained in:
parent
3b0c895582
commit
34a7efc53f
@ -1389,7 +1389,7 @@ DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : nul
|
|||||||
#### schema_inference_make_columns_nullable
|
#### schema_inference_make_columns_nullable
|
||||||
|
|
||||||
Controls making inferred types `Nullable` in schema inference for formats without information about nullability.
|
Controls making inferred types `Nullable` in schema inference for formats without information about nullability.
|
||||||
If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will be `Nullable` only if `input_format_null_as_default` is disabled and the column contains `NULL` in a sample that is parsed during schema inference.
|
If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will never be `Nullable`, if set to `auto`, the inferred type will be `Nullable` only if the column contains `NULL` in a sample that is parsed during schema inference or file metadata contains information about column nullability.
|
||||||
|
|
||||||
Enabled by default.
|
Enabled by default.
|
||||||
|
|
||||||
@ -1412,15 +1412,13 @@ DESC format(JSONEachRow, $$
|
|||||||
└─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
└─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||||
```
|
```
|
||||||
```sql
|
```sql
|
||||||
SET schema_inference_make_columns_nullable = 0;
|
SET schema_inference_make_columns_nullable = 'auto';
|
||||||
SET input_format_null_as_default = 0;
|
|
||||||
DESC format(JSONEachRow, $$
|
DESC format(JSONEachRow, $$
|
||||||
{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}
|
{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}
|
||||||
{"id" : 2, "age" : 19, "name" : "Alan", "status" : "married", "hobbies" : ["tennis", "art"]}
|
{"id" : 2, "age" : 19, "name" : "Alan", "status" : "married", "hobbies" : ["tennis", "art"]}
|
||||||
$$)
|
$$)
|
||||||
```
|
```
|
||||||
```response
|
```response
|
||||||
|
|
||||||
┌─name────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
┌─name────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||||
│ id │ Int64 │ │ │ │ │ │
|
│ id │ Int64 │ │ │ │ │ │
|
||||||
│ age │ Int64 │ │ │ │ │ │
|
│ age │ Int64 │ │ │ │ │ │
|
||||||
@ -1432,7 +1430,6 @@ DESC format(JSONEachRow, $$
|
|||||||
|
|
||||||
```sql
|
```sql
|
||||||
SET schema_inference_make_columns_nullable = 0;
|
SET schema_inference_make_columns_nullable = 0;
|
||||||
SET input_format_null_as_default = 1;
|
|
||||||
DESC format(JSONEachRow, $$
|
DESC format(JSONEachRow, $$
|
||||||
{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}
|
{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}
|
||||||
{"id" : 2, "age" : 19, "name" : "Alan", "status" : "married", "hobbies" : ["tennis", "art"]}
|
{"id" : 2, "age" : 19, "name" : "Alan", "status" : "married", "hobbies" : ["tennis", "art"]}
|
||||||
|
@ -171,8 +171,8 @@ If the `schema_inference_hints` is not formated properly, or if there is a typo
|
|||||||
|
|
||||||
## schema_inference_make_columns_nullable {#schema_inference_make_columns_nullable}
|
## schema_inference_make_columns_nullable {#schema_inference_make_columns_nullable}
|
||||||
|
|
||||||
Controls making inferred types `Nullable` in schema inference for formats without information about nullability.
|
Controls making inferred types `Nullable` in schema inference.
|
||||||
If the setting is enabled, the inferred type will be `Nullable` only if column contains `NULL` in a sample that is parsed during schema inference.
|
If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will never be `Nullable`, if set to `auto`, the inferred type will be `Nullable` only if the column contains `NULL` in a sample that is parsed during schema inference or file metadata contains information about column nullability.
|
||||||
|
|
||||||
Default value: `true`.
|
Default value: `true`.
|
||||||
|
|
||||||
|
@ -1118,7 +1118,7 @@ class IColumn;
|
|||||||
M(String, column_names_for_schema_inference, "", "The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...'", 0) \
|
M(String, column_names_for_schema_inference, "", "The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...'", 0) \
|
||||||
M(String, schema_inference_hints, "", "The list of column names and types to use in schema inference for formats without column names. The format: 'column_name1 column_type1, column_name2 column_type2, ...'", 0) \
|
M(String, schema_inference_hints, "", "The list of column names and types to use in schema inference for formats without column names. The format: 'column_name1 column_type1, column_name2 column_type2, ...'", 0) \
|
||||||
M(SchemaInferenceMode, schema_inference_mode, "default", "Mode of schema inference. 'default' - assume that all files have the same schema and schema can be inferred from any file, 'union' - files can have different schemas and the resulting schema should be the a union of schemas of all files", 0) \
|
M(SchemaInferenceMode, schema_inference_mode, "default", "Mode of schema inference. 'default' - assume that all files have the same schema and schema can be inferred from any file, 'union' - files can have different schemas and the resulting schema should be the a union of schemas of all files", 0) \
|
||||||
M(Bool, schema_inference_make_columns_nullable, true, "If set to true, all inferred types will be Nullable in schema inference for formats without information about nullability.", 0) \
|
M(UInt64Auto, schema_inference_make_columns_nullable, 1, "If set to true, all inferred types will be Nullable in schema inference. When set to false, no columns will be converted to Nullable. When set to 'auto', ClickHouse will use information about nullability from the data.", 0) \
|
||||||
M(Bool, input_format_json_read_bools_as_numbers, true, "Allow to parse bools as numbers in JSON input formats", 0) \
|
M(Bool, input_format_json_read_bools_as_numbers, true, "Allow to parse bools as numbers in JSON input formats", 0) \
|
||||||
M(Bool, input_format_json_read_bools_as_strings, true, "Allow to parse bools as strings in JSON input formats", 0) \
|
M(Bool, input_format_json_read_bools_as_strings, true, "Allow to parse bools as strings in JSON input formats", 0) \
|
||||||
M(Bool, input_format_json_try_infer_numbers_from_strings, false, "Try to infer numbers from string fields while schema inference", 0) \
|
M(Bool, input_format_json_try_infer_numbers_from_strings, false, "Try to infer numbers from string fields while schema inference", 0) \
|
||||||
|
@ -257,7 +257,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
|
|||||||
format_settings.max_bytes_to_read_for_schema_inference = settings.input_format_max_bytes_to_read_for_schema_inference;
|
format_settings.max_bytes_to_read_for_schema_inference = settings.input_format_max_bytes_to_read_for_schema_inference;
|
||||||
format_settings.column_names_for_schema_inference = settings.column_names_for_schema_inference;
|
format_settings.column_names_for_schema_inference = settings.column_names_for_schema_inference;
|
||||||
format_settings.schema_inference_hints = settings.schema_inference_hints;
|
format_settings.schema_inference_hints = settings.schema_inference_hints;
|
||||||
format_settings.schema_inference_make_columns_nullable = settings.schema_inference_make_columns_nullable;
|
format_settings.schema_inference_make_columns_nullable = settings.schema_inference_make_columns_nullable.valueOr(2);
|
||||||
format_settings.mysql_dump.table_name = settings.input_format_mysql_dump_table_name;
|
format_settings.mysql_dump.table_name = settings.input_format_mysql_dump_table_name;
|
||||||
format_settings.mysql_dump.map_column_names = settings.input_format_mysql_dump_map_column_names;
|
format_settings.mysql_dump.map_column_names = settings.input_format_mysql_dump_map_column_names;
|
||||||
format_settings.sql_insert.max_batch_size = settings.output_format_sql_insert_max_batch_size;
|
format_settings.sql_insert.max_batch_size = settings.output_format_sql_insert_max_batch_size;
|
||||||
|
@ -76,7 +76,7 @@ struct FormatSettings
|
|||||||
Raw
|
Raw
|
||||||
};
|
};
|
||||||
|
|
||||||
bool schema_inference_make_columns_nullable = true;
|
UInt64 schema_inference_make_columns_nullable = 1;
|
||||||
|
|
||||||
DateTimeOutputFormat date_time_output_format = DateTimeOutputFormat::Simple;
|
DateTimeOutputFormat date_time_output_format = DateTimeOutputFormat::Simple;
|
||||||
|
|
||||||
|
@ -1302,7 +1302,11 @@ namespace
|
|||||||
if (checkCharCaseInsensitive('n', buf))
|
if (checkCharCaseInsensitive('n', buf))
|
||||||
{
|
{
|
||||||
if (checkStringCaseInsensitive("ull", buf))
|
if (checkStringCaseInsensitive("ull", buf))
|
||||||
return std::make_shared<DataTypeNullable>(std::make_shared<DataTypeNothing>());
|
{
|
||||||
|
if (settings.schema_inference_make_columns_nullable == 0)
|
||||||
|
return std::make_shared<DataTypeNothing>();
|
||||||
|
return makeNullable(std::make_shared<DataTypeNothing>());
|
||||||
|
}
|
||||||
else if (checkStringCaseInsensitive("an", buf))
|
else if (checkStringCaseInsensitive("an", buf))
|
||||||
return std::make_shared<DataTypeFloat64>();
|
return std::make_shared<DataTypeFloat64>();
|
||||||
}
|
}
|
||||||
|
@ -54,13 +54,8 @@ void checkFinalInferredType(
|
|||||||
type = default_type;
|
type = default_type;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (settings.schema_inference_make_columns_nullable)
|
if (settings.schema_inference_make_columns_nullable == 1)
|
||||||
type = makeNullableRecursively(type);
|
type = makeNullableRecursively(type);
|
||||||
/// In case when data for some column could contain nulls and regular values,
|
|
||||||
/// resulting inferred type is Nullable.
|
|
||||||
/// If input_format_null_as_default is enabled, we should remove Nullable type.
|
|
||||||
else if (settings.null_as_default)
|
|
||||||
type = removeNullable(type);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ISchemaReader::transformTypesIfNeeded(DB::DataTypePtr & type, DB::DataTypePtr & new_type)
|
void ISchemaReader::transformTypesIfNeeded(DB::DataTypePtr & type, DB::DataTypePtr & new_type)
|
||||||
|
@ -204,8 +204,11 @@ NamesAndTypesList ArrowSchemaReader::readSchema()
|
|||||||
schema = file_reader->schema();
|
schema = file_reader->schema();
|
||||||
|
|
||||||
auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(
|
auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(
|
||||||
*schema, stream ? "ArrowStream" : "Arrow", format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference);
|
*schema,
|
||||||
if (format_settings.schema_inference_make_columns_nullable)
|
stream ? "ArrowStream" : "Arrow",
|
||||||
|
format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference,
|
||||||
|
format_settings.schema_inference_make_columns_nullable != 0);
|
||||||
|
if (format_settings.schema_inference_make_columns_nullable == 1)
|
||||||
return getNamesAndRecursivelyNullableTypes(header);
|
return getNamesAndRecursivelyNullableTypes(header);
|
||||||
return header.getNamesAndTypesList();
|
return header.getNamesAndTypesList();
|
||||||
}
|
}
|
||||||
|
@ -712,6 +712,7 @@ struct ReadColumnFromArrowColumnSettings
|
|||||||
FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior;
|
FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior;
|
||||||
bool allow_arrow_null_type;
|
bool allow_arrow_null_type;
|
||||||
bool skip_columns_with_unsupported_types;
|
bool skip_columns_with_unsupported_types;
|
||||||
|
bool allow_inferring_nullable_columns;
|
||||||
};
|
};
|
||||||
|
|
||||||
static ColumnWithTypeAndName readColumnFromArrowColumn(
|
static ColumnWithTypeAndName readColumnFromArrowColumn(
|
||||||
@ -1094,7 +1095,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
|
|||||||
bool is_map_nested_column,
|
bool is_map_nested_column,
|
||||||
const ReadColumnFromArrowColumnSettings & settings)
|
const ReadColumnFromArrowColumnSettings & settings)
|
||||||
{
|
{
|
||||||
bool read_as_nullable_column = arrow_column->null_count() || is_nullable_column || (type_hint && type_hint->isNullable());
|
bool read_as_nullable_column = (arrow_column->null_count() || is_nullable_column || (type_hint && type_hint->isNullable())) && settings.allow_inferring_nullable_columns;
|
||||||
if (read_as_nullable_column &&
|
if (read_as_nullable_column &&
|
||||||
arrow_column->type()->id() != arrow::Type::LIST &&
|
arrow_column->type()->id() != arrow::Type::LIST &&
|
||||||
arrow_column->type()->id() != arrow::Type::LARGE_LIST &&
|
arrow_column->type()->id() != arrow::Type::LARGE_LIST &&
|
||||||
@ -1158,14 +1159,16 @@ static std::shared_ptr<arrow::ChunkedArray> createArrowColumn(const std::shared_
|
|||||||
Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(
|
Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(
|
||||||
const arrow::Schema & schema,
|
const arrow::Schema & schema,
|
||||||
const std::string & format_name,
|
const std::string & format_name,
|
||||||
bool skip_columns_with_unsupported_types)
|
bool skip_columns_with_unsupported_types,
|
||||||
|
bool allow_inferring_nullable_columns)
|
||||||
{
|
{
|
||||||
ReadColumnFromArrowColumnSettings settings
|
ReadColumnFromArrowColumnSettings settings
|
||||||
{
|
{
|
||||||
.format_name = format_name,
|
.format_name = format_name,
|
||||||
.date_time_overflow_behavior = FormatSettings::DateTimeOverflowBehavior::Ignore,
|
.date_time_overflow_behavior = FormatSettings::DateTimeOverflowBehavior::Ignore,
|
||||||
.allow_arrow_null_type = false,
|
.allow_arrow_null_type = false,
|
||||||
.skip_columns_with_unsupported_types = skip_columns_with_unsupported_types
|
.skip_columns_with_unsupported_types = skip_columns_with_unsupported_types,
|
||||||
|
.allow_inferring_nullable_columns = allow_inferring_nullable_columns,
|
||||||
};
|
};
|
||||||
|
|
||||||
ColumnsWithTypeAndName sample_columns;
|
ColumnsWithTypeAndName sample_columns;
|
||||||
@ -1239,7 +1242,8 @@ Chunk ArrowColumnToCHColumn::arrowColumnsToCHChunk(const NameToArrowColumn & nam
|
|||||||
.format_name = format_name,
|
.format_name = format_name,
|
||||||
.date_time_overflow_behavior = date_time_overflow_behavior,
|
.date_time_overflow_behavior = date_time_overflow_behavior,
|
||||||
.allow_arrow_null_type = true,
|
.allow_arrow_null_type = true,
|
||||||
.skip_columns_with_unsupported_types = false
|
.skip_columns_with_unsupported_types = false,
|
||||||
|
.allow_inferring_nullable_columns = true
|
||||||
};
|
};
|
||||||
|
|
||||||
Columns columns;
|
Columns columns;
|
||||||
|
@ -34,7 +34,8 @@ public:
|
|||||||
static Block arrowSchemaToCHHeader(
|
static Block arrowSchemaToCHHeader(
|
||||||
const arrow::Schema & schema,
|
const arrow::Schema & schema,
|
||||||
const std::string & format_name,
|
const std::string & format_name,
|
||||||
bool skip_columns_with_unsupported_types = false);
|
bool skip_columns_with_unsupported_types = false,
|
||||||
|
bool allow_inferring_nullable_columns = true);
|
||||||
|
|
||||||
struct DictionaryInfo
|
struct DictionaryInfo
|
||||||
{
|
{
|
||||||
|
@ -1002,7 +1002,7 @@ NamesAndTypesList NativeORCSchemaReader::readSchema()
|
|||||||
header.insert(ColumnWithTypeAndName{type, name});
|
header.insert(ColumnWithTypeAndName{type, name});
|
||||||
}
|
}
|
||||||
|
|
||||||
if (format_settings.schema_inference_make_columns_nullable)
|
if (format_settings.schema_inference_make_columns_nullable == 1)
|
||||||
return getNamesAndRecursivelyNullableTypes(header);
|
return getNamesAndRecursivelyNullableTypes(header);
|
||||||
return header.getNamesAndTypesList();
|
return header.getNamesAndTypesList();
|
||||||
}
|
}
|
||||||
|
@ -160,8 +160,11 @@ NamesAndTypesList ORCSchemaReader::readSchema()
|
|||||||
{
|
{
|
||||||
initializeIfNeeded();
|
initializeIfNeeded();
|
||||||
auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(
|
auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(
|
||||||
*schema, "ORC", format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference);
|
*schema,
|
||||||
if (format_settings.schema_inference_make_columns_nullable)
|
"ORC",
|
||||||
|
format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference,
|
||||||
|
format_settings.schema_inference_make_columns_nullable != 0);
|
||||||
|
if (format_settings.schema_inference_make_columns_nullable == 1)
|
||||||
return getNamesAndRecursivelyNullableTypes(header);
|
return getNamesAndRecursivelyNullableTypes(header);
|
||||||
return header.getNamesAndTypesList();
|
return header.getNamesAndTypesList();
|
||||||
}
|
}
|
||||||
|
@ -866,8 +866,11 @@ NamesAndTypesList ParquetSchemaReader::readSchema()
|
|||||||
THROW_ARROW_NOT_OK(parquet::arrow::FromParquetSchema(metadata->schema(), &schema));
|
THROW_ARROW_NOT_OK(parquet::arrow::FromParquetSchema(metadata->schema(), &schema));
|
||||||
|
|
||||||
auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(
|
auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(
|
||||||
*schema, "Parquet", format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference);
|
*schema,
|
||||||
if (format_settings.schema_inference_make_columns_nullable)
|
"Parquet",
|
||||||
|
format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference,
|
||||||
|
format_settings.schema_inference_make_columns_nullable != 0);
|
||||||
|
if (format_settings.schema_inference_make_columns_nullable == 1)
|
||||||
return getNamesAndRecursivelyNullableTypes(header);
|
return getNamesAndRecursivelyNullableTypes(header);
|
||||||
return header.getNamesAndTypesList();
|
return header.getNamesAndTypesList();
|
||||||
}
|
}
|
||||||
|
@ -18,7 +18,7 @@ desc format(JSONEachRow, '{"x" : [[], [null], [1, 2, 3]]}');
|
|||||||
desc format(JSONEachRow, '{"x" : [{"a" : null}, {"b" : 1}]}');
|
desc format(JSONEachRow, '{"x" : [{"a" : null}, {"b" : 1}]}');
|
||||||
desc format(JSONEachRow, '{"x" : [["2020-01-01", null, "1234"], ["abcd"]]}');
|
desc format(JSONEachRow, '{"x" : [["2020-01-01", null, "1234"], ["abcd"]]}');
|
||||||
|
|
||||||
set schema_inference_make_columns_nullable=0;
|
set schema_inference_make_columns_nullable='auto';
|
||||||
desc format(JSONEachRow, '{"x" : [1, 2]}');
|
desc format(JSONEachRow, '{"x" : [1, 2]}');
|
||||||
desc format(JSONEachRow, '{"x" : [null, 1]}');
|
desc format(JSONEachRow, '{"x" : [null, 1]}');
|
||||||
desc format(JSONEachRow, '{"x" : [1, 2]}, {"x" : [3]}');
|
desc format(JSONEachRow, '{"x" : [1, 2]}, {"x" : [3]}');
|
||||||
@ -40,7 +40,7 @@ desc format(JSONCompactEachRow, '[[[], [null], [1, 2, 3]]]');
|
|||||||
desc format(JSONCompactEachRow, '[[{"a" : null}, {"b" : 1}]]');
|
desc format(JSONCompactEachRow, '[[{"a" : null}, {"b" : 1}]]');
|
||||||
desc format(JSONCompactEachRow, '[[["2020-01-01", null, "1234"], ["abcd"]]]');
|
desc format(JSONCompactEachRow, '[[["2020-01-01", null, "1234"], ["abcd"]]]');
|
||||||
|
|
||||||
set schema_inference_make_columns_nullable=0;
|
set schema_inference_make_columns_nullable='auto';
|
||||||
desc format(JSONCompactEachRow, '[[1, 2]]');
|
desc format(JSONCompactEachRow, '[[1, 2]]');
|
||||||
desc format(JSONCompactEachRow, '[[null, 1]]');
|
desc format(JSONCompactEachRow, '[[null, 1]]');
|
||||||
desc format(JSONCompactEachRow, '[[1, 2]], [[3]]');
|
desc format(JSONCompactEachRow, '[[1, 2]], [[3]]');
|
||||||
@ -59,7 +59,7 @@ desc format(CSV, '"[[], [null], [1, 2, 3]]"');
|
|||||||
desc format(CSV, '"[{\'a\' : null}, {\'b\' : 1}]"');
|
desc format(CSV, '"[{\'a\' : null}, {\'b\' : 1}]"');
|
||||||
desc format(CSV, '"[[\'2020-01-01\', null, \'1234\'], [\'abcd\']]"');
|
desc format(CSV, '"[[\'2020-01-01\', null, \'1234\'], [\'abcd\']]"');
|
||||||
|
|
||||||
set schema_inference_make_columns_nullable=0;
|
set schema_inference_make_columns_nullable='auto';
|
||||||
desc format(CSV, '"[1,2]"');
|
desc format(CSV, '"[1,2]"');
|
||||||
desc format(CSV, '"[NULL, 1]"');
|
desc format(CSV, '"[NULL, 1]"');
|
||||||
desc format(CSV, '"[1, 2]"\n"[3]"');
|
desc format(CSV, '"[1, 2]"\n"[3]"');
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
desc format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=1;
|
desc format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=1;
|
||||||
select * from format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=1;
|
select * from format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=1;
|
||||||
desc format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=0, input_format_null_as_default=0;
|
desc format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable='auto', input_format_null_as_default=0;
|
||||||
select * from format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=0, input_format_null_as_default=0;
|
select * from format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable='auto', input_format_null_as_default=0;
|
||||||
desc format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=0, input_format_null_as_default=1;
|
desc format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=0, input_format_null_as_default=1;
|
||||||
select * from format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=0, input_format_null_as_default=1;
|
select * from format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=0, input_format_null_as_default=1;
|
||||||
|
|
||||||
|
@ -1,40 +1,66 @@
|
|||||||
Parquet
|
Parquet
|
||||||
a UInt64
|
a UInt64
|
||||||
a_nullable Nullable(UInt64)
|
a_nullable Nullable(UInt64)
|
||||||
|
a UInt64
|
||||||
|
a_nullable UInt64
|
||||||
Arrow
|
Arrow
|
||||||
a UInt64
|
a UInt64
|
||||||
a_nullable Nullable(UInt64)
|
a_nullable Nullable(UInt64)
|
||||||
|
a UInt64
|
||||||
|
a_nullable UInt64
|
||||||
Parquet
|
Parquet
|
||||||
b Array(UInt64)
|
b Array(UInt64)
|
||||||
b_nullable Array(Nullable(UInt64))
|
b_nullable Array(Nullable(UInt64))
|
||||||
|
b Array(UInt64)
|
||||||
|
b_nullable Array(UInt64)
|
||||||
Arrow
|
Arrow
|
||||||
b Array(Nullable(UInt64))
|
b Array(Nullable(UInt64))
|
||||||
b_nullable Array(Nullable(UInt64))
|
b_nullable Array(Nullable(UInt64))
|
||||||
|
b Array(UInt64)
|
||||||
|
b_nullable Array(UInt64)
|
||||||
Parquet
|
Parquet
|
||||||
c Tuple(\n a UInt64,\n b String)
|
c Tuple(\n a UInt64,\n b String)
|
||||||
c_nullable Tuple(\n a Nullable(UInt64),\n b Nullable(String))
|
c_nullable Tuple(\n a Nullable(UInt64),\n b Nullable(String))
|
||||||
|
c Tuple(\n a UInt64,\n b String)
|
||||||
|
c_nullable Tuple(\n a UInt64,\n b String)
|
||||||
Arrow
|
Arrow
|
||||||
c Tuple(\n a UInt64,\n b String)
|
c Tuple(\n a UInt64,\n b String)
|
||||||
c_nullable Tuple(\n a Nullable(UInt64),\n b Nullable(String))
|
c_nullable Tuple(\n a Nullable(UInt64),\n b Nullable(String))
|
||||||
|
c Tuple(\n a UInt64,\n b String)
|
||||||
|
c_nullable Tuple(\n a UInt64,\n b String)
|
||||||
Parquet
|
Parquet
|
||||||
d Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String),\n d_nullable Tuple(\n a UInt64,\n b Tuple(\n a Nullable(UInt64),\n b Nullable(String))))
|
d Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String),\n d_nullable Tuple(\n a UInt64,\n b Tuple(\n a Nullable(UInt64),\n b Nullable(String))))
|
||||||
|
d Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String),\n d_nullable Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String)))
|
||||||
Arrow
|
Arrow
|
||||||
d Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String),\n d_nullable Tuple(\n a UInt64,\n b Tuple(\n a Nullable(UInt64),\n b Nullable(String))))
|
d Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String),\n d_nullable Tuple(\n a UInt64,\n b Tuple(\n a Nullable(UInt64),\n b Nullable(String))))
|
||||||
|
d Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String),\n d_nullable Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String)))
|
||||||
Parquet
|
Parquet
|
||||||
e Map(UInt64, String)
|
e Map(UInt64, String)
|
||||||
e_nullable Map(UInt64, Nullable(String))
|
e_nullable Map(UInt64, Nullable(String))
|
||||||
|
e Map(UInt64, String)
|
||||||
|
e_nullable Map(UInt64, String)
|
||||||
Arrow
|
Arrow
|
||||||
e Map(UInt64, Nullable(String))
|
e Map(UInt64, Nullable(String))
|
||||||
e_nullable Map(UInt64, Nullable(String))
|
e_nullable Map(UInt64, Nullable(String))
|
||||||
|
e Map(UInt64, String)
|
||||||
|
e_nullable Map(UInt64, String)
|
||||||
Parquet
|
Parquet
|
||||||
f Map(UInt64, Map(UInt64, String))
|
f Map(UInt64, Map(UInt64, String))
|
||||||
f_nullables Map(UInt64, Map(UInt64, Nullable(String)))
|
f_nullables Map(UInt64, Map(UInt64, Nullable(String)))
|
||||||
|
f Map(UInt64, Map(UInt64, String))
|
||||||
|
f_nullables Map(UInt64, Map(UInt64, String))
|
||||||
Arrow
|
Arrow
|
||||||
f Map(UInt64, Map(UInt64, Nullable(String)))
|
f Map(UInt64, Map(UInt64, Nullable(String)))
|
||||||
f_nullables Map(UInt64, Map(UInt64, Nullable(String)))
|
f_nullables Map(UInt64, Map(UInt64, Nullable(String)))
|
||||||
|
f Map(UInt64, Map(UInt64, String))
|
||||||
|
f_nullables Map(UInt64, Map(UInt64, String))
|
||||||
Parquet
|
Parquet
|
||||||
g String
|
g String
|
||||||
g_nullable Nullable(String)
|
g_nullable Nullable(String)
|
||||||
|
g String
|
||||||
|
g_nullable String
|
||||||
Arrow
|
Arrow
|
||||||
g LowCardinality(String)
|
g LowCardinality(String)
|
||||||
g_nullable LowCardinality(String)
|
g_nullable LowCardinality(String)
|
||||||
|
g LowCardinality(String)
|
||||||
|
g_nullable LowCardinality(String)
|
||||||
|
@ -14,6 +14,7 @@ for format in $formats
|
|||||||
do
|
do
|
||||||
echo $format
|
echo $format
|
||||||
$CLICKHOUSE_LOCAL -q "select * from generateRandom('a UInt64, a_nullable Nullable(UInt64)', 42) limit 10 format $format" > $DATA_FILE
|
$CLICKHOUSE_LOCAL -q "select * from generateRandom('a UInt64, a_nullable Nullable(UInt64)', 42) limit 10 format $format" > $DATA_FILE
|
||||||
|
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'"
|
||||||
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0"
|
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0"
|
||||||
done
|
done
|
||||||
|
|
||||||
@ -21,6 +22,7 @@ for format in $formats
|
|||||||
do
|
do
|
||||||
echo $format
|
echo $format
|
||||||
$CLICKHOUSE_LOCAL -q "select * from generateRandom('b Array(UInt64), b_nullable Array(Nullable(UInt64))', 42) limit 10 format $format" > $DATA_FILE
|
$CLICKHOUSE_LOCAL -q "select * from generateRandom('b Array(UInt64), b_nullable Array(Nullable(UInt64))', 42) limit 10 format $format" > $DATA_FILE
|
||||||
|
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'"
|
||||||
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0"
|
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0"
|
||||||
done
|
done
|
||||||
|
|
||||||
@ -28,6 +30,7 @@ for format in $formats
|
|||||||
do
|
do
|
||||||
echo $format
|
echo $format
|
||||||
$CLICKHOUSE_LOCAL -q "select * from generateRandom('c Tuple(a UInt64, b String), c_nullable Tuple(a Nullable(UInt64), b Nullable(String))', 42) limit 10 format $format" > $DATA_FILE
|
$CLICKHOUSE_LOCAL -q "select * from generateRandom('c Tuple(a UInt64, b String), c_nullable Tuple(a Nullable(UInt64), b Nullable(String))', 42) limit 10 format $format" > $DATA_FILE
|
||||||
|
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'"
|
||||||
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0"
|
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0"
|
||||||
done
|
done
|
||||||
|
|
||||||
@ -35,6 +38,7 @@ for format in $formats
|
|||||||
do
|
do
|
||||||
echo $format
|
echo $format
|
||||||
$CLICKHOUSE_LOCAL -q "select * from generateRandom('d Tuple(a UInt64, b Tuple(a UInt64, b String), d_nullable Tuple(a UInt64, b Tuple(a Nullable(UInt64), b Nullable(String))))', 42) limit 10 format $format" > $DATA_FILE
|
$CLICKHOUSE_LOCAL -q "select * from generateRandom('d Tuple(a UInt64, b Tuple(a UInt64, b String), d_nullable Tuple(a UInt64, b Tuple(a Nullable(UInt64), b Nullable(String))))', 42) limit 10 format $format" > $DATA_FILE
|
||||||
|
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'"
|
||||||
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0"
|
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0"
|
||||||
done
|
done
|
||||||
|
|
||||||
@ -42,6 +46,7 @@ for format in $formats
|
|||||||
do
|
do
|
||||||
echo $format
|
echo $format
|
||||||
$CLICKHOUSE_LOCAL -q "select * from generateRandom('e Map(UInt64, String), e_nullable Map(UInt64, Nullable(String))', 42) limit 10 format $format" > $DATA_FILE
|
$CLICKHOUSE_LOCAL -q "select * from generateRandom('e Map(UInt64, String), e_nullable Map(UInt64, Nullable(String))', 42) limit 10 format $format" > $DATA_FILE
|
||||||
|
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'"
|
||||||
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0"
|
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0"
|
||||||
done
|
done
|
||||||
|
|
||||||
@ -49,6 +54,7 @@ for format in $formats
|
|||||||
do
|
do
|
||||||
echo $format
|
echo $format
|
||||||
$CLICKHOUSE_LOCAL -q "select * from generateRandom('f Map(UInt64, Map(UInt64, String)), f_nullables Map(UInt64, Map(UInt64, Nullable(String)))', 42) limit 10 format $format" > $DATA_FILE
|
$CLICKHOUSE_LOCAL -q "select * from generateRandom('f Map(UInt64, Map(UInt64, String)), f_nullables Map(UInt64, Map(UInt64, Nullable(String)))', 42) limit 10 format $format" > $DATA_FILE
|
||||||
|
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'"
|
||||||
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0"
|
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0"
|
||||||
done
|
done
|
||||||
|
|
||||||
@ -56,6 +62,7 @@ for format in $formats
|
|||||||
do
|
do
|
||||||
echo $format
|
echo $format
|
||||||
$CLICKHOUSE_LOCAL -q "select * from generateRandom('g LowCardinality(String), g_nullable LowCardinality(Nullable(String))', 42) limit 10 settings output_format_arrow_low_cardinality_as_dictionary=1, allow_suspicious_low_cardinality_types=1 format $format" > $DATA_FILE
|
$CLICKHOUSE_LOCAL -q "select * from generateRandom('g LowCardinality(String), g_nullable LowCardinality(Nullable(String))', 42) limit 10 settings output_format_arrow_low_cardinality_as_dictionary=1, allow_suspicious_low_cardinality_types=1 format $format" > $DATA_FILE
|
||||||
|
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'"
|
||||||
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0"
|
$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0"
|
||||||
done
|
done
|
||||||
|
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
x Nullable(Int64)
|
x Nullable(Int64)
|
||||||
schema_inference_hints=, max_rows_to_read_for_schema_inference=25000, max_bytes_to_read_for_schema_inference=1000, schema_inference_make_columns_nullable=true, try_infer_integers=true, try_infer_dates=true, try_infer_datetimes=true, try_infer_datetimes_only_datetime64=false, try_infer_numbers_from_strings=false, read_bools_as_numbers=true, read_bools_as_strings=true, read_objects_as_strings=true, read_numbers_as_strings=true, read_arrays_as_strings=true, try_infer_objects_as_tuples=true, infer_incomplete_types_as_strings=true, try_infer_objects=false, use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects=false
|
schema_inference_hints=, max_rows_to_read_for_schema_inference=25000, max_bytes_to_read_for_schema_inference=1000, schema_inference_make_columns_nullable=1, try_infer_integers=true, try_infer_dates=true, try_infer_datetimes=true, try_infer_datetimes_only_datetime64=false, try_infer_numbers_from_strings=false, read_bools_as_numbers=true, read_bools_as_strings=true, read_objects_as_strings=true, read_numbers_as_strings=true, read_arrays_as_strings=true, try_infer_objects_as_tuples=true, infer_incomplete_types_as_strings=true, try_infer_objects=false, use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects=false
|
||||||
|
Loading…
Reference in New Issue
Block a user