From 1af8066cdaf444f8f76fe36a87b46473e065ba24 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Thu, 2 May 2024 21:22:36 +0000 Subject: [PATCH] Fix test --- src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp | 7 +++++-- .../0_stateless/03036_parquet_arrow_nullable.reference | 6 +++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp b/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp index 9b51ca0c295..ce859b38b3c 100644 --- a/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp +++ b/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp @@ -33,7 +33,7 @@ /// * `def` and `rep` arrays can be longer than `primitive_column`, because they include nulls and /// empty arrays; the values in primitive_column correspond to positions where def[i] == max_def. /// -/// If you do want to learn it, dremel paper: https://research.google/pubs/pub36632/ +/// If you do want to learn it, see dremel paper: https://research.google/pubs/pub36632/ /// Instead of reading the whole paper, try staring at figures 2-3 for a while - it might be enough. /// (Why does Parquet do all this instead of just storing array lengths and null masks? I'm not /// really sure.) @@ -430,13 +430,16 @@ void prepareColumnNullable( if (schemas[child_schema_idx].repetition_type == parq::FieldRepetitionType::REQUIRED) { - /// Normal case: we just slap a FieldRepetitionType::OPTIONAL onto the nested column. + /// Normal case: the column inside Nullable is a primitive type (not Nullable/Array/Map). + /// Just slap a FieldRepetitionType::OPTIONAL onto it. schemas[child_schema_idx].repetition_type = parq::FieldRepetitionType::OPTIONAL; } else { /// Weird case: Nullable(Nullable(...)). Or Nullable(Tuple(Nullable(...))), etc. /// This is probably not allowed in ClickHouse, but let's support it just in case. + /// The nested column already has a nontrivial repetition type, so we have to wrap it in a + /// group and assign repetition type OPTIONAL to the group. auto & schema = *schemas.insert(schemas.begin() + child_schema_idx, {}); schema.__set_repetition_type(parq::FieldRepetitionType::OPTIONAL); schema.__set_name("nullable"); diff --git a/tests/queries/0_stateless/03036_parquet_arrow_nullable.reference b/tests/queries/0_stateless/03036_parquet_arrow_nullable.reference index 8820bb7cb9f..985f8192f26 100644 --- a/tests/queries/0_stateless/03036_parquet_arrow_nullable.reference +++ b/tests/queries/0_stateless/03036_parquet_arrow_nullable.reference @@ -5,7 +5,7 @@ Arrow a UInt64 a_nullable Nullable(UInt64) Parquet -b Array(Nullable(UInt64)) +b Array(UInt64) b_nullable Array(Nullable(UInt64)) Arrow b Array(Nullable(UInt64)) @@ -21,13 +21,13 @@ d Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String),\n Arrow d Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String),\n d_nullable Tuple(\n a UInt64,\n b Tuple(\n a Nullable(UInt64),\n b Nullable(String)))) Parquet -e Map(UInt64, Nullable(String)) +e Map(UInt64, String) e_nullable Map(UInt64, Nullable(String)) Arrow e Map(UInt64, Nullable(String)) e_nullable Map(UInt64, Nullable(String)) Parquet -f Map(UInt64, Map(UInt64, Nullable(String))) +f Map(UInt64, Map(UInt64, String)) f_nullables Map(UInt64, Map(UInt64, Nullable(String))) Arrow f Map(UInt64, Map(UInt64, Nullable(String)))