diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp index 043e4f1e724..893059ceae2 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp @@ -25,6 +25,8 @@ #include #include +#include + #define FOR_INTERNAL_NUMERIC_TYPES(M) \ M(UInt8, arrow::UInt8Builder) \ M(Int8, arrow::Int8Builder) \ @@ -717,9 +719,7 @@ namespace DB column.type = recursiveRemoveLowCardinality(column.type); column.column = recursiveRemoveLowCardinality(column.column); } - bool is_column_nullable = false; - auto arrow_type = getArrowType(column.type, column.column, column.name, format_name, &is_column_nullable); - arrow_fields.emplace_back(std::make_shared(column.name, arrow_type, is_column_nullable)); + header_columns.emplace_back(std::move(column)); } } @@ -740,6 +740,13 @@ namespace DB if (!low_cardinality_as_dictionary) column = recursiveRemoveLowCardinality(column); + if (!is_arrow_fields_initialized) + { + bool is_column_nullable = false; + auto arrow_type = getArrowType(header_column.type, column, header_column.name, format_name, &is_column_nullable); + arrow_fields.emplace_back(std::make_shared(header_column.name, arrow_type, is_column_nullable)); + } + arrow::MemoryPool* pool = arrow::default_memory_pool(); std::unique_ptr array_builder; arrow::Status status = MakeBuilder(pool, arrow_fields[column_i]->type(), &array_builder); @@ -757,6 +764,7 @@ namespace DB std::shared_ptr arrow_schema = std::make_shared(arrow_fields); res = arrow::Table::Make(arrow_schema, arrow_arrays); + is_arrow_fields_initialized = true; } } diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.h b/src/Processors/Formats/Impl/CHColumnToArrowColumn.h index 1fb2a8af65e..50de8045d5f 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.h +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.h @@ -27,6 +27,11 @@ private: /// To avoid converting dictionary from LowCardinality to Arrow /// Dictionary every chunk we save it and reuse. std::unordered_map> dictionary_values; + + /// We should initialize arrow fields on first call of chChunkToArrowTable, not in constructor + /// because LowCardinality column from header always has indexes type UInt8, so, we should get + /// proper indexes type from first chunk of data. + bool is_arrow_fields_initialized = false; }; } diff --git a/tests/queries/0_stateless/02293_arrow_dictionary_indexes.reference b/tests/queries/0_stateless/02293_arrow_dictionary_indexes.reference new file mode 100644 index 00000000000..9ece9606f8b --- /dev/null +++ b/tests/queries/0_stateless/02293_arrow_dictionary_indexes.reference @@ -0,0 +1,300 @@ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 diff --git a/tests/queries/0_stateless/02293_arrow_dictionary_indexes.sql b/tests/queries/0_stateless/02293_arrow_dictionary_indexes.sql new file mode 100644 index 00000000000..4836b07dd1d --- /dev/null +++ b/tests/queries/0_stateless/02293_arrow_dictionary_indexes.sql @@ -0,0 +1,2 @@ +insert into function file(02293_data.arrow) select toLowCardinality(toString(number)) from numbers(300) settings output_format_arrow_low_cardinality_as_dictionary=1, engine_file_truncate_on_insert=1; +select * from file(02293_data.arrow);