mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
Make ParquetMetadata say whether bloom filter is present
This commit is contained in:
parent
9e2ae7e0c7
commit
e3ebe51968
@ -92,8 +92,9 @@ static NamesAndTypesList getHeaderForParquetMetadata()
|
||||
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>()),
|
||||
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>())},
|
||||
Names{"num_values", "null_count", "distinct_count", "min", "max"}),
|
||||
DataTypeFactory::instance().get("Bool"),
|
||||
},
|
||||
Names{"name", "path", "total_compressed_size", "total_uncompressed_size", "have_statistics", "statistics"}))},
|
||||
Names{"name", "path", "total_compressed_size", "total_uncompressed_size", "have_statistics", "statistics", "have_bloom_filter"}))},
|
||||
Names{"num_columns", "num_rows", "total_uncompressed_size", "total_compressed_size", "columns"}))},
|
||||
};
|
||||
return names_and_types;
|
||||
@ -350,6 +351,8 @@ void ParquetMetadataInputFormat::fillColumnChunksMetadata(const std::unique_ptr<
|
||||
fillColumnStatistics(column_chunk_metadata->statistics(), tuple_column.getColumn(5), row_group_metadata->schema()->Column(column_i)->type_length());
|
||||
else
|
||||
tuple_column.getColumn(5).insertDefault();
|
||||
bool have_bloom_filter = column_chunk_metadata->bloom_filter_offset().has_value();
|
||||
assert_cast<ColumnUInt8 &>(tuple_column.getColumn(6)).insertValue(have_bloom_filter);
|
||||
}
|
||||
array_column.getOffsets().push_back(tuple_column.size());
|
||||
}
|
||||
|
@ -78,7 +78,8 @@
|
||||
"distinct_count": null,
|
||||
"min": "0",
|
||||
"max": "999"
|
||||
}
|
||||
},
|
||||
"have_bloom_filter": false
|
||||
},
|
||||
{
|
||||
"name": "str",
|
||||
@ -92,7 +93,8 @@
|
||||
"distinct_count": null,
|
||||
"min": "Hello0",
|
||||
"max": "Hello999"
|
||||
}
|
||||
},
|
||||
"have_bloom_filter": false
|
||||
},
|
||||
{
|
||||
"name": "mod",
|
||||
@ -106,7 +108,8 @@
|
||||
"distinct_count": null,
|
||||
"min": "0",
|
||||
"max": "8"
|
||||
}
|
||||
},
|
||||
"have_bloom_filter": false
|
||||
}
|
||||
]
|
||||
},
|
||||
@ -128,7 +131,8 @@
|
||||
"distinct_count": null,
|
||||
"min": "0",
|
||||
"max": "999"
|
||||
}
|
||||
},
|
||||
"have_bloom_filter": false
|
||||
},
|
||||
{
|
||||
"name": "str",
|
||||
@ -142,7 +146,8 @@
|
||||
"distinct_count": null,
|
||||
"min": "Hello0",
|
||||
"max": "Hello999"
|
||||
}
|
||||
},
|
||||
"have_bloom_filter": false
|
||||
},
|
||||
{
|
||||
"name": "mod",
|
||||
@ -156,7 +161,8 @@
|
||||
"distinct_count": null,
|
||||
"min": "0",
|
||||
"max": "8"
|
||||
}
|
||||
},
|
||||
"have_bloom_filter": false
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -223,3 +229,55 @@
|
||||
}
|
||||
1
|
||||
1
|
||||
{
|
||||
"num_columns": "1",
|
||||
"num_rows": "5",
|
||||
"num_row_groups": "1",
|
||||
"format_version": "1.0",
|
||||
"metadata_size": "267",
|
||||
"total_uncompressed_size": "105",
|
||||
"total_compressed_size": "128",
|
||||
"columns": [
|
||||
{
|
||||
"name": "ipv6",
|
||||
"path": "ipv6",
|
||||
"max_definition_level": "0",
|
||||
"max_repetition_level": "0",
|
||||
"physical_type": "FIXED_LEN_BYTE_ARRAY",
|
||||
"logical_type": "None",
|
||||
"compression": "GZIP",
|
||||
"total_uncompressed_size": "105",
|
||||
"total_compressed_size": "128",
|
||||
"space_saved": "-21.9%",
|
||||
"encodings": [
|
||||
"PLAIN",
|
||||
"BIT_PACKED"
|
||||
]
|
||||
}
|
||||
],
|
||||
"row_groups": [
|
||||
{
|
||||
"num_columns": "1",
|
||||
"num_rows": "5",
|
||||
"total_uncompressed_size": "105",
|
||||
"total_compressed_size": "128",
|
||||
"columns": [
|
||||
{
|
||||
"name": "ipv6",
|
||||
"path": "ipv6",
|
||||
"total_compressed_size": "128",
|
||||
"total_uncompressed_size": "105",
|
||||
"have_statistics": true,
|
||||
"statistics": {
|
||||
"num_values": "5",
|
||||
"null_count": "0",
|
||||
"distinct_count": null,
|
||||
"min": "27 32 150 125 17 250 66 31 157 44 75 218 51 50 19 144 ",
|
||||
"max": "154 31 90 141 15 7 68 47 190 29 121 145 188 162 234 154 "
|
||||
},
|
||||
"have_bloom_filter": true
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
@ -17,3 +17,4 @@ $CLICKHOUSE_LOCAL -q "select some_column from file('$CURDIR/data_parquet/02718_d
|
||||
$CLICKHOUSE_LOCAL -q "select num_columns from file('$CURDIR/data_parquet/02718_data.parquet', ParquetMetadata, 'num_columns Array(UInt32)')" 2>&1 | grep -c "BAD_ARGUMENTS"
|
||||
|
||||
|
||||
$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_parquet/ipv6_bloom_filter.gz.parquet', ParquetMetadata) format JSONEachRow" | python3 -m json.tool
|
||||
|
Loading…
Reference in New Issue
Block a user