mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 09:32:06 +00:00
Merge pull request #44446 from Avogar/arrow-nullables
Respect setting settings.schema_inference_make_columns_nullable in Parquet/ORC/Arrow formats
This commit is contained in:
commit
1c2dc05d6e
@ -173,8 +173,9 @@ NamesAndTypesList ArrowSchemaReader::readSchema()
|
||||
|
||||
auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(
|
||||
*schema, stream ? "ArrowStream" : "Arrow", format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference);
|
||||
return getNamesAndRecursivelyNullableTypes(header);
|
||||
}
|
||||
if (format_settings.schema_inference_make_columns_nullable)
|
||||
return getNamesAndRecursivelyNullableTypes(header);
|
||||
return header.getNamesAndTypesList();}
|
||||
|
||||
void registerInputFormatArrow(FormatFactory & factory)
|
||||
{
|
||||
@ -208,12 +209,24 @@ void registerArrowSchemaReader(FormatFactory & factory)
|
||||
{
|
||||
return std::make_shared<ArrowSchemaReader>(buf, false, settings);
|
||||
});
|
||||
|
||||
factory.registerAdditionalInfoForSchemaCacheGetter("Arrow", [](const FormatSettings & settings)
|
||||
{
|
||||
return fmt::format("schema_inference_make_columns_nullable={}", settings.schema_inference_make_columns_nullable);
|
||||
});
|
||||
factory.registerSchemaReader(
|
||||
"ArrowStream",
|
||||
[](ReadBuffer & buf, const FormatSettings & settings)
|
||||
{
|
||||
return std::make_shared<ArrowSchemaReader>(buf, true, settings);
|
||||
});}
|
||||
});
|
||||
|
||||
factory.registerAdditionalInfoForSchemaCacheGetter("ArrowStream", [](const FormatSettings & settings)
|
||||
{
|
||||
return fmt::format("schema_inference_make_columns_nullable={}", settings.schema_inference_make_columns_nullable);
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
#else
|
||||
|
||||
|
@ -189,8 +189,9 @@ NamesAndTypesList ORCSchemaReader::readSchema()
|
||||
getFileReaderAndSchema(in, file_reader, schema, format_settings, is_stopped);
|
||||
auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(
|
||||
*schema, "ORC", format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference);
|
||||
return getNamesAndRecursivelyNullableTypes(header);
|
||||
}
|
||||
if (format_settings.schema_inference_make_columns_nullable)
|
||||
return getNamesAndRecursivelyNullableTypes(header);
|
||||
return header.getNamesAndTypesList();}
|
||||
|
||||
void registerInputFormatORC(FormatFactory & factory)
|
||||
{
|
||||
@ -216,6 +217,11 @@ void registerORCSchemaReader(FormatFactory & factory)
|
||||
return std::make_shared<ORCSchemaReader>(buf, settings);
|
||||
}
|
||||
);
|
||||
|
||||
factory.registerAdditionalInfoForSchemaCacheGetter("ORC", [](const FormatSettings & settings)
|
||||
{
|
||||
return fmt::format("schema_inference_make_columns_nullable={}", settings.schema_inference_make_columns_nullable);
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -187,7 +187,9 @@ NamesAndTypesList ParquetSchemaReader::readSchema()
|
||||
getFileReaderAndSchema(in, file_reader, schema, format_settings, is_stopped);
|
||||
auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(
|
||||
*schema, "Parquet", format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference);
|
||||
return getNamesAndRecursivelyNullableTypes(header);
|
||||
if (format_settings.schema_inference_make_columns_nullable)
|
||||
return getNamesAndRecursivelyNullableTypes(header);
|
||||
return header.getNamesAndTypesList();
|
||||
}
|
||||
|
||||
void registerInputFormatParquet(FormatFactory & factory)
|
||||
@ -214,6 +216,11 @@ void registerParquetSchemaReader(FormatFactory & factory)
|
||||
return std::make_shared<ParquetSchemaReader>(buf, settings);
|
||||
}
|
||||
);
|
||||
|
||||
factory.registerAdditionalInfoForSchemaCacheGetter("Parquet", [](const FormatSettings & settings)
|
||||
{
|
||||
return fmt::format("schema_inference_make_columns_nullable={}", settings.schema_inference_make_columns_nullable);
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,6 @@
|
||||
number Nullable(UInt64)
|
||||
number UInt64
|
||||
number Nullable(Int64)
|
||||
number Int64
|
||||
number Nullable(UInt64)
|
||||
number UInt64
|
@ -0,0 +1,16 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-fasttest
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
$CLICKHOUSE_LOCAL -q "select * from numbers(3) format Parquet" | $CLICKHOUSE_LOCAL --input-format=Parquet --table=test -q "desc test" --schema_inference_make_columns_nullable=1;
|
||||
$CLICKHOUSE_LOCAL -q "select * from numbers(3) format Parquet" | $CLICKHOUSE_LOCAL --input-format=Parquet --table=test -q "desc test" --schema_inference_make_columns_nullable=0;
|
||||
|
||||
$CLICKHOUSE_LOCAL -q "select * from numbers(3) format ORC" | $CLICKHOUSE_LOCAL --input-format=ORC --table=test -q "desc test" --schema_inference_make_columns_nullable=1;
|
||||
$CLICKHOUSE_LOCAL -q "select * from numbers(3) format ORC" | $CLICKHOUSE_LOCAL --input-format=ORC --table=test -q "desc test" --schema_inference_make_columns_nullable=0;
|
||||
|
||||
$CLICKHOUSE_LOCAL -q "select * from numbers(3) format Arrow" | $CLICKHOUSE_LOCAL --input-format=Arrow --table=test -q "desc test" --schema_inference_make_columns_nullable=1;
|
||||
$CLICKHOUSE_LOCAL -q "select * from numbers(3) format Arrow" | $CLICKHOUSE_LOCAL --input-format=Arrow --table=test -q "desc test" --schema_inference_make_columns_nullable=0;
|
||||
|
Loading…
Reference in New Issue
Block a user