From b277a5c943a1cc5e64d78c8dbcc737aad9cf1539 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 18 Apr 2023 11:07:08 +0000
Subject: [PATCH] Add ParquetMetadata input format to read Parquet file
 metadata

---
 docs/en/interfaces/formats.md                 | 272 +++++++---
 src/Formats/registerFormats.cpp               |   5 +
 .../Impl/ParquetMetadataInputFormat.cpp       | 499 ++++++++++++++++++
 .../Formats/Impl/ParquetMetadataInputFormat.h |  90 ++++
 .../02718_parquet_metadata_format.reference   | 154 ++++++
 .../02718_parquet_metadata_format.sh          |   7 +
 .../data_parquet/02718_data.parquet           | Bin 0 -> 28165 bytes
 7 files changed, 952 insertions(+), 75 deletions(-)
 create mode 100644 src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp
 create mode 100644 src/Processors/Formats/Impl/ParquetMetadataInputFormat.h
 create mode 100644 tests/queries/0_stateless/02718_parquet_metadata_format.reference
 create mode 100755 tests/queries/0_stateless/02718_parquet_metadata_format.sh
 create mode 100644 tests/queries/0_stateless/data_parquet/02718_data.parquet

diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index b4823d5ebaf..b17c3c14f73 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -10,80 +10,82 @@ results of a `SELECT`, and to perform `INSERT`s into a file-backed table.
 
 The supported formats are:
 
-| Format                                                                                    | Input | Output |
-|-------------------------------------------------------------------------------------------|------|--------|
-| [TabSeparated](#tabseparated)                                                             | ✔    | ✔      |
-| [TabSeparatedRaw](#tabseparatedraw)                                                       | ✔    | ✔      |
-| [TabSeparatedWithNames](#tabseparatedwithnames)                                           | ✔    | ✔      |
-| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes)                           | ✔    | ✔      |
-| [TabSeparatedRawWithNames](#tabseparatedrawwithnames)                                     | ✔    | ✔      |
-| [TabSeparatedRawWithNamesAndTypes](#tabseparatedrawwithnamesandtypes)                     | ✔    | ✔      |
-| [Template](#format-template)                                                              | ✔    | ✔      |
-| [TemplateIgnoreSpaces](#templateignorespaces)                                             | ✔    | ✗      |
-| [CSV](#csv)                                                                               | ✔    | ✔      |
-| [CSVWithNames](#csvwithnames)                                                             | ✔    | ✔      |
-| [CSVWithNamesAndTypes](#csvwithnamesandtypes)                                             | ✔    | ✔      |
-| [CustomSeparated](#format-customseparated)                                                | ✔    | ✔      |
-| [CustomSeparatedWithNames](#customseparatedwithnames)                                     | ✔    | ✔      |
-| [CustomSeparatedWithNamesAndTypes](#customseparatedwithnamesandtypes)                     | ✔    | ✔      |
-| [SQLInsert](#sqlinsert)                                                                   | ✗    | ✔      |
-| [Values](#data-format-values)                                                             | ✔    | ✔      |
-| [Vertical](#vertical)                                                                     | ✗    | ✔      |
-| [JSON](#json)                                                                             | ✔    | ✔      |
-| [JSONAsString](#jsonasstring)                                                             | ✔    | ✗      |
-| [JSONStrings](#jsonstrings)                                                               | ✔    | ✔      |
-| [JSONColumns](#jsoncolumns)                                                               | ✔    | ✔      |
-| [JSONColumnsWithMetadata](#jsoncolumnsmonoblock))                                         | ✔    | ✔      |
-| [JSONCompact](#jsoncompact)                                                               | ✔    | ✔      |
-| [JSONCompactStrings](#jsoncompactstrings)                                                 | ✗    | ✔      |
-| [JSONCompactColumns](#jsoncompactcolumns)                                                 | ✔    | ✔      |
-| [JSONEachRow](#jsoneachrow)                                                               | ✔    | ✔      |
-| [JSONEachRowWithProgress](#jsoneachrowwithprogress)                                       | ✗    | ✔      |
-| [JSONStringsEachRow](#jsonstringseachrow)                                                 | ✔    | ✔      |
-| [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress)                         | ✗    | ✔      |
-| [JSONCompactEachRow](#jsoncompacteachrow)                                                 | ✔    | ✔      |
-| [JSONCompactEachRowWithNames](#jsoncompacteachrowwithnames)                               | ✔    | ✔      |
-| [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes)               | ✔    | ✔      |
-| [JSONCompactStringsEachRow](#jsoncompactstringseachrow)                                   | ✔    | ✔      |
-| [JSONCompactStringsEachRowWithNames](#jsoncompactstringseachrowwithnames)                 | ✔    | ✔      |
+| Format                                                                                   | Input | Output |
+|------------------------------------------------------------------------------------------|------|--------|
+| [TabSeparated](#tabseparated)                                                            | ✔    | ✔      |
+| [TabSeparatedRaw](#tabseparatedraw)                                                      | ✔    | ✔      |
+| [TabSeparatedWithNames](#tabseparatedwithnames)                                          | ✔    | ✔      |
+| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes)                          | ✔    | ✔      |
+| [TabSeparatedRawWithNames](#tabseparatedrawwithnames)                                    | ✔    | ✔      |
+| [TabSeparatedRawWithNamesAndTypes](#tabseparatedrawwithnamesandtypes)                    | ✔    | ✔      |
+| [Template](#format-template)                                                             | ✔    | ✔      |
+| [TemplateIgnoreSpaces](#templateignorespaces)                                            | ✔    | ✗      |
+| [CSV](#csv)                                                                              | ✔    | ✔      |
+| [CSVWithNames](#csvwithnames)                                                            | ✔    | ✔      |
+| [CSVWithNamesAndTypes](#csvwithnamesandtypes)                                            | ✔    | ✔      |
+| [CustomSeparated](#format-customseparated)                                               | ✔    | ✔      |
+| [CustomSeparatedWithNames](#customseparatedwithnames)                                    | ✔    | ✔      |
+| [CustomSeparatedWithNamesAndTypes](#customseparatedwithnamesandtypes)                    | ✔    | ✔      |
+| [SQLInsert](#sqlinsert)                                                                  | ✗    | ✔      |
+| [Values](#data-format-values)                                                            | ✔    | ✔      |
+| [Vertical](#vertical)                                                                    | ✗    | ✔      |
+| [JSON](#json)                                                                            | ✔    | ✔      |
+| [JSONAsString](#jsonasstring)                                                            | ✔    | ✗      |
+| [JSONStrings](#jsonstrings)                                                              | ✔    | ✔      |
+| [JSONColumns](#jsoncolumns)                                                              | ✔    | ✔      |
+| [JSONColumnsWithMetadata](#jsoncolumnsmonoblock))                                        | ✔    | ✔      |
+| [JSONCompact](#jsoncompact)                                                              | ✔    | ✔      |
+| [JSONCompactStrings](#jsoncompactstrings)                                                | ✗    | ✔      |
+| [JSONCompactColumns](#jsoncompactcolumns)                                                | ✔    | ✔      |
+| [JSONEachRow](#jsoneachrow)                                                              | ✔    | ✔      |
+| [PrettyJSONEachRow](#prettyjsoneachrow)                                                  | ✗    | ✔      |
+| [JSONEachRowWithProgress](#jsoneachrowwithprogress)                                      | ✗    | ✔      |
+| [JSONStringsEachRow](#jsonstringseachrow)                                                | ✔    | ✔      |
+| [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress)                        | ✗    | ✔      |
+| [JSONCompactEachRow](#jsoncompacteachrow)                                                | ✔    | ✔      |
+| [JSONCompactEachRowWithNames](#jsoncompacteachrowwithnames)                              | ✔    | ✔      |
+| [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes)              | ✔    | ✔      |
+| [JSONCompactStringsEachRow](#jsoncompactstringseachrow)                                  | ✔    | ✔      |
+| [JSONCompactStringsEachRowWithNames](#jsoncompactstringseachrowwithnames)                | ✔    | ✔      |
 | [JSONCompactStringsEachRowWithNamesAndTypes](#jsoncompactstringseachrowwithnamesandtypes) | ✔    | ✔      |
-| [JSONObjectEachRow](#jsonobjecteachrow)                                                   | ✔    | ✔      |
-| [BSONEachRow](#bsoneachrow)                                                               | ✔    | ✔      |
-| [TSKV](#tskv)                                                                             | ✔    | ✔      |
-| [Pretty](#pretty)                                                                         | ✗    | ✔      |
-| [PrettyNoEscapes](#prettynoescapes)                                                       | ✗    | ✔      |
-| [PrettyMonoBlock](#prettymonoblock)                                                       | ✗    | ✔      |
-| [PrettyNoEscapesMonoBlock](#prettynoescapesmonoblock)                                     | ✗    | ✔      |
-| [PrettyCompact](#prettycompact)                                                           | ✗    | ✔      |
-| [PrettyCompactNoEscapes](#prettycompactnoescapes)                                         | ✗    | ✔      |
-| [PrettyCompactMonoBlock](#prettycompactmonoblock)                                         | ✗    | ✔      |
-| [PrettyCompactNoEscapesMonoBlock](#prettycompactnoescapesmonoblock)                       | ✗    | ✔      |
-| [PrettySpace](#prettyspace)                                                               | ✗    | ✔      |
-| [PrettySpaceNoEscapes](#prettyspacenoescapes)                                             | ✗    | ✔      |
-| [PrettySpaceMonoBlock](#prettyspacemonoblock)                                             | ✗    | ✔      |
-| [PrettySpaceNoEscapesMonoBlock](#prettyspacenoescapesmonoblock)                           | ✗    | ✔      |
-| [Prometheus](#prometheus)                                                                 | ✗    | ✔      |
-| [Protobuf](#protobuf)                                                                     | ✔    | ✔      |
-| [ProtobufSingle](#protobufsingle)                                                         | ✔    | ✔      |
-| [Avro](#data-format-avro)                                                                 | ✔    | ✔      |
-| [AvroConfluent](#data-format-avro-confluent)                                              | ✔    | ✗      |
-| [Parquet](#data-format-parquet)                                                           | ✔    | ✔      |
-| [Arrow](#data-format-arrow)                                                               | ✔    | ✔      |
-| [ArrowStream](#data-format-arrow-stream)                                                  | ✔    | ✔      |
-| [ORC](#data-format-orc)                                                                   | ✔    | ✔      |
-| [RowBinary](#rowbinary)                                                                   | ✔    | ✔      |
-| [RowBinaryWithNames](#rowbinarywithnamesandtypes)                                         | ✔    | ✔      |
-| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes)                                 | ✔    | ✔      |
-| [Native](#native)                                                                         | ✔    | ✔      |
-| [Null](#null)                                                                             | ✗    | ✔      |
-| [XML](#xml)                                                                               | ✗    | ✔      |
-| [CapnProto](#capnproto)                                                                   | ✔    | ✔      |
-| [LineAsString](#lineasstring)                                                             | ✔    | ✔      |
-| [Regexp](#data-format-regexp)                                                             | ✔    | ✗      |
-| [RawBLOB](#rawblob)                                                                       | ✔    | ✔      |
-| [MsgPack](#msgpack)                                                                       | ✔    | ✔      |
-| [MySQLDump](#mysqldump)                                                                   | ✔    | ✗      |
-| [Markdown](#markdown)                                                                     | ✗    | ✔      |
+| [JSONObjectEachRow](#jsonobjecteachrow)                                                  | ✔    | ✔      |
+| [BSONEachRow](#bsoneachrow)                                                              | ✔    | ✔      |
+| [TSKV](#tskv)                                                                            | ✔    | ✔      |
+| [Pretty](#pretty)                                                                        | ✗    | ✔      |
+| [PrettyNoEscapes](#prettynoescapes)                                                      | ✗    | ✔      |
+| [PrettyMonoBlock](#prettymonoblock)                                                      | ✗    | ✔      |
+| [PrettyNoEscapesMonoBlock](#prettynoescapesmonoblock)                                    | ✗    | ✔      |
+| [PrettyCompact](#prettycompact)                                                          | ✗    | ✔      |
+| [PrettyCompactNoEscapes](#prettycompactnoescapes)                                        | ✗    | ✔      |
+| [PrettyCompactMonoBlock](#prettycompactmonoblock)                                        | ✗    | ✔      |
+| [PrettyCompactNoEscapesMonoBlock](#prettycompactnoescapesmonoblock)                      | ✗    | ✔      |
+| [PrettySpace](#prettyspace)                                                              | ✗    | ✔      |
+| [PrettySpaceNoEscapes](#prettyspacenoescapes)                                            | ✗    | ✔      |
+| [PrettySpaceMonoBlock](#prettyspacemonoblock)                                            | ✗    | ✔      |
+| [PrettySpaceNoEscapesMonoBlock](#prettyspacenoescapesmonoblock)                          | ✗    | ✔      |
+| [Prometheus](#prometheus)                                                                | ✗    | ✔      |
+| [Protobuf](#protobuf)                                                                    | ✔    | ✔      |
+| [ProtobufSingle](#protobufsingle)                                                        | ✔    | ✔      |
+| [Avro](#data-format-avro)                                                                | ✔    | ✔      |
+| [AvroConfluent](#data-format-avro-confluent)                                             | ✔    | ✗      |
+| [Parquet](#data-format-parquet)                                                          | ✔    | ✔      |
+| [ParqueMetadata](#data-format-parquet-metadata)                                          | ✔    | ✗      |
+| [Arrow](#data-format-arrow)                                                              | ✔    | ✔      |
+| [ArrowStream](#data-format-arrow-stream)                                                 | ✔    | ✔      |
+| [ORC](#data-format-orc)                                                                  | ✔    | ✔      |
+| [RowBinary](#rowbinary)                                                                  | ✔    | ✔      |
+| [RowBinaryWithNames](#rowbinarywithnamesandtypes)                                        | ✔    | ✔      |
+| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes)                                | ✔    | ✔      |
+| [Native](#native)                                                                        | ✔    | ✔      |
+| [Null](#null)                                                                            | ✗    | ✔      |
+| [XML](#xml)                                                                              | ✗    | ✔      |
+| [CapnProto](#capnproto)                                                                  | ✔    | ✔      |
+| [LineAsString](#lineasstring)                                                            | ✔    | ✔      |
+| [Regexp](#data-format-regexp)                                                            | ✔    | ✗      |
+| [RawBLOB](#rawblob)                                                                      | ✔    | ✔      |
+| [MsgPack](#msgpack)                                                                      | ✔    | ✔      |
+| [MySQLDump](#mysqldump)                                                                  | ✔    | ✗      |
+| [Markdown](#markdown)                                                                    | ✗    | ✔      |
 
 
 You can control some format processing parameters with the ClickHouse settings. For more information read the [Settings](/docs/en/operations/settings/settings-formats.md) section.
@@ -915,8 +917,6 @@ Example:
 {"num":44,"str":"hello","arr":[0,1,2,3]}
 ```
 
-While importing data columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
-
 ## JSONStringsEachRow {#jsonstringseachrow}
 
 Differs from JSONEachRow only in that data fields are output in strings, not in typed JSON values.
@@ -2003,6 +2003,128 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
 - [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `2.latest`.
 - [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `snappy`.
 
+## ParquetMetadata {data-format-parquet-metadata}
+
+Special format for reading Parquet file metadata (https://parquet.apache.org/docs/file-format/metadata/). It always outputs one row with the next structure/content:
+- num_columns - the number of columns
+- num_rows - the total number of rows
+- num_row_groups - the total number of row groups
+- format_version - parquet format version, always 1.0 or 2.6
+- total_byte_size - total bytes size of the data, calculated as the sum of total_byte_size from all row groups
+- total_compressed_size - total compressed bytes size of the data, calculated as the sum of total_compressed_size from all row groups
+- columns - the list of columns metadata with the next structure:
+  - name - column name
+  - path - column path (differs from name for nested column)
+  - max_definition_level - maximum definition level
+  - max_repetition_level - maximum repetition level
+  - physical_type - column physical type
+  - logical_type - column logical type
+  - compression - compression used for this column
+  - encodings - the list of encodings used for this column
+- row_groups - the list of row groups metadata with the next structure:
+  - num_columns - the number of columns in the row group
+  - num_rows - the number of rows in the row group
+  - total_byte_size - total bytes size of the row group
+  - total_compressed_size - total compressed bytes size of the row group
+  - columns - the list of column chunks metadata with the next structure:
+     - name - column name
+     - path - column path
+     - total_compressed_size - total compressed bytes size of the column
+     - total_uncompressed_size - total uncompressed bytes size of the row group
+     - have_statistics - bool flag that indicates if column chunk metadata contains column statistics
+     - statistics - column chunk statistics (all fields are NULL if have_statistics = false) with the next structure:
+        - num_values - the number of non-null values in the column chunk
+        - null_count - the number of NULL values in the column chunk
+        - distinct_count - the number of distinct values in the column chunk
+        - min - the minimum value of the column chunk
+        - max - the maximum column of the column chunk
+
+Example:
+
+```sql
+SELECT * FROM file(data.parquet, ParquetMetadata) format PrettyJSONEachRow
+```
+
+```json
+{
+    "num_columns": "2",
+    "num_rows": "1000000",
+    "num_row_groups": "16",
+    "format_version": "2.6",
+    "total_byte_size": "10001981",
+    "total_compressed_size": "6011415",
+    "columns": [
+        {
+            "name": "number",
+            "path": "number",
+            "max_definition_level": "0",
+            "max_repetition_level": "0",
+            "physical_type": "INT64",
+            "logical_type": "Int(bitWidth=64, isSigned=false)",
+            "compression": "LZ4",
+            "encodings": [
+                "RLE_DICTIONARY",
+                "PLAIN",
+                "RLE"
+            ]
+        },
+        {
+            "name": "'Hello'",
+            "path": "'Hello'",
+            "max_definition_level": "0",
+            "max_repetition_level": "0",
+            "physical_type": "BYTE_ARRAY",
+            "logical_type": "None",
+            "compression": "LZ4",
+            "encodings": [
+                "RLE_DICTIONARY",
+                "PLAIN",
+                "RLE"
+            ]
+        }
+    ],
+    "row_groups": [
+        {
+            "num_columns": "2",
+            "num_rows": "65409",
+            "total_byte_size": "654367",
+            "total_compressed_size": "393396",
+            "columns": [
+                {
+                    "name": "number",
+                    "path": "number",
+                    "total_compressed_size": "393329",
+                    "total_uncompressed_size": "654302",
+                    "have_statistics": true,
+                    "statistics": {
+                        "num_values": "65409",
+                        "null_count": "0",
+                        "distinct_count": null,
+                        "min": "0",
+                        "max": "65408"
+                    }
+                },
+                {
+                    "name": "'Hello'",
+                    "path": "'Hello'",
+                    "total_compressed_size": "67",
+                    "total_uncompressed_size": "65",
+                    "have_statistics": true,
+                    "statistics": {
+                        "num_values": "65409",
+                        "null_count": "0",
+                        "distinct_count": null,
+                        "min": "Hello",
+                        "max": "Hello"
+                    }
+                }
+            ]
+        },
+      ...
+  ]
+}
+```
+
 ## Arrow {#data-format-arrow}
 
 [Apache Arrow](https://arrow.apache.org/) comes with two built-in columnar storage formats. ClickHouse supports read and write operations for these formats.
diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp
index 285e234167b..29ef46f330f 100644
--- a/src/Formats/registerFormats.cpp
+++ b/src/Formats/registerFormats.cpp
@@ -100,6 +100,7 @@ void registerInputFormatJSONAsString(FormatFactory & factory);
 void registerInputFormatJSONAsObject(FormatFactory & factory);
 void registerInputFormatLineAsString(FormatFactory & factory);
 void registerInputFormatMySQLDump(FormatFactory & factory);
+void registerInputFormatParquetMetadata(FormatFactory & factory);
 
 #if USE_HIVE
 void registerInputFormatHiveText(FormatFactory & factory);
@@ -140,6 +141,7 @@ void registerValuesSchemaReader(FormatFactory & factory);
 void registerTemplateSchemaReader(FormatFactory & factory);
 void registerMySQLSchemaReader(FormatFactory & factory);
 void registerBSONEachRowSchemaReader(FormatFactory & factory);
+void registerParquetMetadataSchemaReader(FormatFactory & factory);
 
 void registerFileExtensions(FormatFactory & factory);
 
@@ -240,6 +242,8 @@ void registerFormats()
     registerInputFormatCapnProto(factory);
     registerInputFormatMySQLDump(factory);
 
+    registerInputFormatParquetMetadata(factory);
+
     registerNonTrivialPrefixAndSuffixCheckerJSONEachRow(factory);
     registerNonTrivialPrefixAndSuffixCheckerJSONAsString(factory);
     registerNonTrivialPrefixAndSuffixCheckerJSONAsObject(factory);
@@ -274,6 +278,7 @@ void registerFormats()
     registerTemplateSchemaReader(factory);
     registerMySQLSchemaReader(factory);
     registerBSONEachRowSchemaReader(factory);
+    registerParquetMetadataSchemaReader(factory);
 }
 
 }
diff --git a/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp b/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp
new file mode 100644
index 00000000000..c384c3811db
--- /dev/null
+++ b/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp
@@ -0,0 +1,499 @@
+#include "ParquetMetadataInputFormat.h"
+
+#if USE_PARQUET
+
+#include <Formats/FormatFactory.h>
+#include <IO/ReadBufferFromMemory.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <Columns/ColumnsNumber.h>
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnTuple.h>
+#include <Columns/ColumnNullable.h>
+#include <Core/NamesAndTypes.h>
+#include <arrow/api.h>
+#include <arrow/status.h>
+#include <parquet/file_reader.h>
+#include <parquet/statistics.h>
+#include "ArrowBufferedStreams.h"
+#include <DataTypes/NestedUtils.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+}
+
+static NamesAndTypesList getHeaderForParquetMetadata()
+{
+    NamesAndTypesList names_and_types{
+        {"num_columns", std::make_shared<DataTypeUInt64>()},
+        {"num_rows", std::make_shared<DataTypeUInt64>()},
+        {"num_row_groups", std::make_shared<DataTypeUInt64>()},
+        {"format_version", std::make_shared<DataTypeString>()},
+        {"total_byte_size", std::make_shared<DataTypeUInt64>()},
+        {"total_compressed_size", std::make_shared<DataTypeUInt64>()},
+        {"columns",
+         std::make_shared<DataTypeArray>(
+             std::make_shared<DataTypeTuple>(
+                 DataTypes{
+                     std::make_shared<DataTypeString>(),
+                     std::make_shared<DataTypeString>(),
+                     std::make_shared<DataTypeUInt64>(),
+                     std::make_shared<DataTypeUInt64>(),
+                     std::make_shared<DataTypeString>(),
+                     std::make_shared<DataTypeString>(),
+                     std::make_shared<DataTypeString>(),
+                     std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
+                 Names{
+                     "name",
+                     "path",
+                     "max_definition_level",
+                     "max_repetition_level",
+                     "physical_type",
+                     "logical_type",
+                     "compression",
+                     "encodings"}))},
+        {"row_groups",
+         std::make_shared<DataTypeArray>(std::make_shared<DataTypeTuple>(
+             DataTypes{
+                 std::make_shared<DataTypeUInt64>(),
+                 std::make_shared<DataTypeUInt64>(),
+                 std::make_shared<DataTypeUInt64>(),
+                 std::make_shared<DataTypeUInt64>(),
+                 std::make_shared<DataTypeArray>(
+                     std::make_shared<DataTypeTuple>(
+                         DataTypes{
+                             std::make_shared<DataTypeString>(),
+                             std::make_shared<DataTypeString>(),
+                             std::make_shared<DataTypeUInt64>(),
+                             std::make_shared<DataTypeUInt64>(),
+                             DataTypeFactory::instance().get("Bool"),
+                             std::make_shared<DataTypeTuple>(
+                                 DataTypes{
+                                     std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt64>()),
+                                     std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt64>()),
+                                     std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt64>()),
+                                     std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>()),
+                                     std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>())},
+                                 Names{"num_values", "null_count", "distinct_count", "min", "max"}),
+                         },
+                         Names{"name", "path", "total_compressed_size", "total_uncompressed_size", "have_statistics", "statistics"}))},
+             Names{"num_columns", "num_rows", "total_byte_size", "total_compressed_size", "columns"}))},
+    };
+    return names_and_types;
+}
+
+void checkHeader(const Block & header)
+{
+    auto expected_names_and_types = getHeaderForParquetMetadata();
+    std::unordered_map<String, DataTypePtr> name_to_type;
+    for (const auto & [name, type] : expected_names_and_types)
+        name_to_type[name] = type;
+
+    for (const auto & [name, type] : header.getNamesAndTypes())
+    {
+        auto it = name_to_type.find(name);
+        if (it == name_to_type.end())
+            throw Exception(
+                ErrorCodes::BAD_ARGUMENTS,
+                "Unexpected column: {}. ParquetMetadata format allows only the next columns: num_columns, num_rows, num_row_groups, "
+                "format_version, columns, row_groups", name);
+
+        if (!it->second->equals(*type))
+            throw Exception(
+                ErrorCodes::BAD_ARGUMENTS,
+                "Unexpected type {} for column {}. Expected type: {}",
+                type->getName(),
+                name,
+                it->second->getName());
+    }
+}
+
+static std::shared_ptr<parquet::FileMetaData> getFileMetadata(
+    ReadBuffer & in,
+    const FormatSettings & format_settings,
+    std::atomic<int> & is_stopped)
+{
+    auto arrow_file = asArrowFile(in, format_settings, is_stopped, "Parquet", PARQUET_MAGIC_BYTES);
+    return parquet::ReadMetaData(arrow_file);
+}
+
+ParquetMetadataInputFormat::ParquetMetadataInputFormat(ReadBuffer & in_, Block header_, const FormatSettings & format_settings_)
+    : IInputFormat(std::move(header_), in_), format_settings(format_settings_)
+{
+    checkHeader(getPort().getHeader());
+}
+
+Chunk ParquetMetadataInputFormat::generate()
+{
+    Chunk res;
+    if (done)
+        return res;
+
+    auto metadata = getFileMetadata(*in, format_settings, is_stopped);
+
+    const auto & header = getPort().getHeader();
+    auto names_and_types = getHeaderForParquetMetadata();
+    auto names = names_and_types.getNames();
+    auto types = names_and_types.getTypes();
+
+    for (const auto & name : header.getNames())
+    {
+        /// num_columns
+        if (name == names[0])
+        {
+            auto column = types[0]->createColumn();
+            assert_cast<ColumnUInt64 &>(*column).insertValue(metadata->num_columns());
+            res.addColumn(std::move(column));
+        }
+        /// num_rows
+        else if (name == names[1])
+        {
+            auto column = types[1]->createColumn();
+            assert_cast<ColumnUInt64 &>(*column).insertValue(metadata->num_rows());
+            res.addColumn(std::move(column));
+        }
+        /// num_row_groups
+        else if (name == names[2])
+        {
+            auto column = types[2]->createColumn();
+            assert_cast<ColumnUInt64 &>(*column).insertValue(metadata->num_row_groups());
+            res.addColumn(std::move(column));
+        }
+        /// format_version
+        else if (name == names[3])
+        {
+            auto column = types[3]->createColumn();
+            String version = metadata->version() == parquet::ParquetVersion::PARQUET_1_0 ? "1.0" : "2.6";
+            assert_cast<ColumnString &>(*column).insertData(version.data(), version.size());
+            res.addColumn(std::move(column));
+        }
+        /// total_byte_size
+        else if (name == names[4])
+        {
+            auto column = types[4]->createColumn();
+            size_t total_byte_size = 0;
+            for (int32_t i = 0; i != metadata->num_row_groups(); ++i)
+                total_byte_size += metadata->RowGroup(i)->total_byte_size();
+
+            assert_cast<ColumnUInt64 &>(*column).insertValue(total_byte_size);
+            res.addColumn(std::move(column));
+        }
+        /// total_compressed_size
+        else if (name == names[5])
+        {
+            auto column = types[5]->createColumn();
+            size_t total_compressed_size = 0;
+            for (int32_t i = 0; i != metadata->num_row_groups(); ++i)
+                total_compressed_size += metadata->RowGroup(i)->total_compressed_size();
+
+            assert_cast<ColumnUInt64 &>(*column).insertValue(total_compressed_size);
+            res.addColumn(std::move(column));
+        }
+        /// columns
+        else if (name == names[6])
+        {
+            auto column = types[6]->createColumn();
+            fillColumnsMetadata(metadata, column);
+            res.addColumn(std::move(column));
+        }
+        /// row_groups
+        else if (name == names[7])
+        {
+            auto column = types[7]->createColumn();
+            fillRowGroupsMetadata(metadata, column);
+            res.addColumn(std::move(column));
+        }
+    }
+
+    done = true;
+    return res;
+}
+
+void ParquetMetadataInputFormat::fillColumnsMetadata(const std::shared_ptr<parquet::FileMetaData> & metadata, MutableColumnPtr & column)
+{
+    auto & array_column = assert_cast<ColumnArray &>(*column);
+    auto & tuple_column = assert_cast<ColumnTuple &>(array_column.getData());
+    int32_t num_columns = metadata->num_columns();
+    for (int32_t i = 0; i != num_columns; ++i)
+    {
+        const auto * column_info = metadata->schema()->Column(i);
+        /// name
+        String column_name = column_info->name();
+        assert_cast<ColumnString &>(tuple_column.getColumn(0)).insertData(column_name.data(), column_name.size());
+        /// path
+        String path = column_info->path()->ToDotString();
+        assert_cast<ColumnString &>(tuple_column.getColumn(1)).insertData(path.data(), path.size());
+        /// max_definition_level
+        assert_cast<ColumnUInt64 &>(tuple_column.getColumn(2)).insertValue(column_info->max_definition_level());
+        /// max_repetition_level
+        assert_cast<ColumnUInt64 &>(tuple_column.getColumn(3)).insertValue(column_info->max_repetition_level());
+        /// physical_type
+        std::string_view physical_type = magic_enum::enum_name(column_info->physical_type());
+        assert_cast<ColumnString &>(tuple_column.getColumn(4)).insertData(physical_type.data(), physical_type.size());
+        /// logical_type
+        String logical_type = column_info->logical_type()->ToString();
+        assert_cast<ColumnString &>(tuple_column.getColumn(5)).insertData(logical_type.data(), logical_type.size());
+
+        if (metadata->num_row_groups() > 0)
+        {
+            auto column_chunk_metadata = metadata->RowGroup(0)->ColumnChunk(i);
+            std::string_view compression = magic_enum::enum_name(column_chunk_metadata->compression());
+            assert_cast<ColumnString &>(tuple_column.getColumn(6)).insertData(compression.data(), compression.size());
+            auto & encodings_array_column = assert_cast<ColumnArray &>(tuple_column.getColumn(7));
+            auto & encodings_nested_column = assert_cast<ColumnString &>(encodings_array_column.getData());
+            for (auto codec : column_chunk_metadata->encodings())
+            {
+                auto codec_name = magic_enum::enum_name(codec);
+                encodings_nested_column.insertData(codec_name.data(), codec_name.size());
+            }
+            encodings_array_column.getOffsets().push_back(encodings_nested_column.size());
+        }
+        else
+        {
+            String compression = "NONE";
+            assert_cast<ColumnString &>(tuple_column.getColumn(5)).insertData(compression.data(), compression.size());
+            tuple_column.getColumn(6).insertDefault();
+        }
+    }
+    array_column.getOffsets().push_back(tuple_column.size());
+}
+
+void ParquetMetadataInputFormat::fillRowGroupsMetadata(const std::shared_ptr<parquet::FileMetaData> & metadata, MutableColumnPtr & column)
+{
+    auto & row_groups_array_column = assert_cast<ColumnArray &>(*column);
+    auto & row_groups_column = assert_cast<ColumnTuple &>(row_groups_array_column.getData());
+    for (int32_t i = 0; i != metadata->num_row_groups(); ++i)
+    {
+        auto row_group_metadata = metadata->RowGroup(i);
+        /// num_columns
+        assert_cast<ColumnUInt64 &>(row_groups_column.getColumn(0)).insertValue(row_group_metadata->num_columns());
+        /// num_rows
+        assert_cast<ColumnUInt64 &>(row_groups_column.getColumn(1)).insertValue(row_group_metadata->num_rows());
+        /// total_bytes_size
+        assert_cast<ColumnUInt64 &>(row_groups_column.getColumn(2)).insertValue(row_group_metadata->total_byte_size());
+        /// total_compressed_size
+        assert_cast<ColumnUInt64 &>(row_groups_column.getColumn(3)).insertValue(row_group_metadata->total_compressed_size());
+        /// columns
+        fillColumnChunksMetadata(row_group_metadata, row_groups_column.getColumn(4));
+    }
+    row_groups_array_column.getOffsets().push_back(row_groups_column.size());
+}
+
+void ParquetMetadataInputFormat::fillColumnChunksMetadata(const std::unique_ptr<parquet::RowGroupMetaData> & row_group_metadata, IColumn & column)
+{
+    auto & array_column = assert_cast<ColumnArray &>(column);
+    auto & tuple_column = assert_cast<ColumnTuple &>(array_column.getData());
+    for (int32_t column_i = 0; column_i != row_group_metadata->num_columns(); ++column_i)
+    {
+        auto column_chunk_metadata = row_group_metadata->ColumnChunk(column_i);
+        /// name
+        String column_name = row_group_metadata->schema()->Column(column_i)->name();
+        assert_cast<ColumnString &>(tuple_column.getColumn(0)).insertData(column_name.data(), column_name.size());
+        /// path
+        String path = row_group_metadata->schema()->Column(column_i)->path()->ToDotString();
+        assert_cast<ColumnString &>(tuple_column.getColumn(1)).insertData(path.data(), path.size());
+        /// total_compressed_size
+        assert_cast<ColumnUInt64 &>(tuple_column.getColumn(2)).insertValue(column_chunk_metadata->total_compressed_size());
+        /// total_uncompressed_size
+        assert_cast<ColumnUInt64 &>(tuple_column.getColumn(3)).insertValue(column_chunk_metadata->total_uncompressed_size());
+        /// have_statistics
+        bool have_statistics = column_chunk_metadata->is_stats_set();
+        assert_cast<ColumnUInt8 &>(tuple_column.getColumn(4)).insertValue(have_statistics);
+        if (have_statistics)
+            fillColumnStatistics(column_chunk_metadata->statistics(), tuple_column.getColumn(5), row_group_metadata->schema()->Column(column_i)->type_length());
+        else
+            tuple_column.getColumn(5).insertDefault();
+    }
+    array_column.getOffsets().push_back(tuple_column.size());
+}
+
+template <typename T>
+static void getMinMaxNumberStatistics(const std::shared_ptr<parquet::Statistics> & statistics, String & min, String & max)
+{
+    const auto & typed_statistics = dynamic_cast<parquet::TypedStatistics<T> &>(*statistics);
+    min = std::to_string(typed_statistics.min());
+    max = std::to_string(typed_statistics.max());
+}
+
+void ParquetMetadataInputFormat::fillColumnStatistics(const std::shared_ptr<parquet::Statistics> & statistics, IColumn & column, int32_t type_length)
+{
+    auto & statistics_column = assert_cast<ColumnTuple &>(column);
+    /// num_values
+    auto & nullable_num_values = assert_cast<ColumnNullable &>(statistics_column.getColumn(0));
+    assert_cast<ColumnUInt64 &>(nullable_num_values.getNestedColumn()).insertValue(statistics->num_values());
+    nullable_num_values.getNullMapData().push_back(0);
+
+    /// null_count
+    if (statistics->HasNullCount())
+    {
+        auto & nullable_null_count = assert_cast<ColumnNullable &>(statistics_column.getColumn(1));
+        assert_cast<ColumnUInt64 &>(nullable_null_count.getNestedColumn()).insertValue(statistics->null_count());
+        nullable_null_count.getNullMapData().push_back(0);
+    }
+    else
+    {
+        statistics_column.getColumn(1).insertDefault();
+    }
+
+    /// distinct_count
+    if (statistics->HasDistinctCount())
+    {
+        auto & nullable_distinct_count = assert_cast<ColumnNullable &>(statistics_column.getColumn(2));
+        size_t distinct_count = statistics->distinct_count();
+        /// It can be set but still be 0 because of a bug: https://github.com/apache/arrow/issues/27644
+        /// If we see distinct_count = 0 with non 0 values in chunk, set it to NULL.
+        if (distinct_count == 0 && statistics->num_values() != 0)
+        {
+            nullable_distinct_count.insertDefault();
+        }
+        else
+        {
+            assert_cast<ColumnUInt64 &>(nullable_distinct_count.getNestedColumn()).insertValue(distinct_count);
+            nullable_distinct_count.getNullMapData().push_back(0);
+        }
+    }
+    else
+    {
+        statistics_column.getColumn(2).insertDefault();
+    }
+
+    /// min/max
+    if (statistics->HasMinMax() && statistics->physical_type() != parquet::Type::type::UNDEFINED)
+    {
+        String min;
+        String max;
+        switch (statistics->physical_type())
+        {
+            case parquet::Type::type::FLOAT:
+            {
+                getMinMaxNumberStatistics<parquet::FloatType>(statistics, min, max);
+                break;
+            }
+            case parquet::Type::type::DOUBLE:
+            {
+                getMinMaxNumberStatistics<parquet::DoubleType>(statistics, min, max);
+                break;
+            }
+            case parquet::Type::type::INT32:
+            {
+                getMinMaxNumberStatistics<parquet::Int32Type>(statistics, min, max);
+                break;
+            }
+            case parquet::Type::type::INT64:
+            {
+                getMinMaxNumberStatistics<parquet::Int64Type>(statistics, min, max);
+                break;
+            }
+            case parquet::Type::type::INT96:
+            {
+                const auto & int96_statistics = dynamic_cast<parquet::TypedStatistics<parquet::Int96Type> &>(*statistics);
+                min = parquet::Int96ToString(int96_statistics.min());
+                max = parquet::Int96ToString(int96_statistics.max());
+                break;
+            }
+            case parquet::Type::type::BOOLEAN:
+            {
+                getMinMaxNumberStatistics<parquet::BooleanType>(statistics, min, max);
+                break;
+            }
+            case parquet::Type::type::BYTE_ARRAY:
+            {
+                const auto & byte_array_statistics = dynamic_cast<parquet::ByteArrayStatistics &>(*statistics);
+                min = parquet::ByteArrayToString(byte_array_statistics.min());
+                max = parquet::ByteArrayToString(byte_array_statistics.max());
+                break;
+            }
+            case parquet::Type::type::FIXED_LEN_BYTE_ARRAY:
+            {
+                const auto & flba_statistics = dynamic_cast<parquet::FLBAStatistics &>(*statistics);
+                min = parquet::FixedLenByteArrayToString(flba_statistics.min(), type_length);
+                max = parquet::FixedLenByteArrayToString(flba_statistics.max(), type_length);
+                break;
+            }
+            case parquet::Type::type::UNDEFINED:
+            {
+                break; /// unreachable
+            }
+        }
+
+        auto & nullable_min = assert_cast<ColumnNullable &>(statistics_column.getColumn(3));
+        assert_cast<ColumnString &>(nullable_min.getNestedColumn()).insertData(min.data(), min.size());
+        nullable_min.getNullMapData().push_back(0);
+        auto & nullable_max = assert_cast<ColumnNullable &>(statistics_column.getColumn(4));
+        assert_cast<ColumnString &>(nullable_max.getNestedColumn()).insertData(max.data(), max.size());
+        nullable_max.getNullMapData().push_back(0);
+    }
+    else
+    {
+        statistics_column.getColumn(3).insertDefault();
+        statistics_column.getColumn(4).insertDefault();
+    }
+}
+
+void ParquetMetadataInputFormat::resetParser()
+{
+    IInputFormat::resetParser();
+    done = false;
+}
+
+ParquetMetadataSchemaReader::ParquetMetadataSchemaReader(ReadBuffer & in_)
+    : ISchemaReader(in_)
+{
+}
+
+NamesAndTypesList ParquetMetadataSchemaReader::readSchema()
+{
+    return getHeaderForParquetMetadata();
+}
+
+void registerInputFormatParquetMetadata(FormatFactory & factory)
+{
+    factory.registerInputFormat(
+        "ParquetMetadata",
+        [](ReadBuffer &buf,
+           const Block &sample,
+           const RowInputFormatParams &,
+           const FormatSettings & settings)
+        {
+            return std::make_shared<ParquetMetadataInputFormat>(buf, sample, settings);
+        });
+    factory.markFormatSupportsSubcolumns("ParquetMetadata");
+    factory.markFormatSupportsSubsetOfColumns("ParquetMetadata");
+}
+
+void registerParquetMetadataSchemaReader(FormatFactory & factory)
+{
+    factory.registerSchemaReader(
+        "ParquetMetadata",
+        [](ReadBuffer & buf, const FormatSettings &)
+        {
+            return std::make_shared<ParquetMetadataSchemaReader>(buf);
+        }
+    );
+}
+
+}
+
+#else
+
+namespace DB
+{
+class FormatFactory;
+void registerInputFormatParquetMetadata(FormatFactory &)
+{
+}
+
+void registerParquetMetadataSchemaReader(FormatFactory &) {}
+}
+
+#endif
diff --git a/src/Processors/Formats/Impl/ParquetMetadataInputFormat.h b/src/Processors/Formats/Impl/ParquetMetadataInputFormat.h
new file mode 100644
index 00000000000..3561ec6dae8
--- /dev/null
+++ b/src/Processors/Formats/Impl/ParquetMetadataInputFormat.h
@@ -0,0 +1,90 @@
+#pragma once
+#include "config.h"
+#if USE_PARQUET
+
+#include <Processors/Formats/IInputFormat.h>
+#include <Processors/Formats/ISchemaReader.h>
+#include <Formats/FormatSettings.h>
+#include <parquet/metadata.h>
+
+namespace parquet::arrow { class FileReader; }
+
+namespace arrow { class Buffer; class RecordBatchReader;}
+
+namespace DB
+{
+
+/* Special format that always returns just one row with Parquet file metadata (see https://parquet.apache.org/docs/file-format/metadata/).
+ * The result row have the next structure:
+ * num_columns - the number of columns
+ * num_rows - the total number of rows
+ * num_row_groups - the total number of row groups
+ * format_version - parquet format version, always 1.0 or 2.6
+ * total_byte_size - total bytes size of the data, calculated as the sum of total_byte_size from all row groups
+ * total_compressed_size - total compressed bytes size of the data, calculated as the sum of total_compressed_size from all row groups
+ * columns - the list of columns metadata with the next structure:
+ *     name - column name
+ *     path - column path (differs from name for nested column)
+ *     max_definition_level - maximum definition level
+ *     max_repetition_level - maximum repetition level
+ *     physical_type - column physical type
+ *     logical_type - column logical type
+ *     compression - compression used for this column
+ *     encodings - the list of encodings used for this column
+ * row_groups - the list of row groups metadata with the next structure:
+ *     num_columns - the number of columns in the row group
+ *     num_rows - the number of rows in the row group
+ *     total_byte_size - total bytes size of the row group
+ *     total_compressed_size - total compressed bytes size of the row group
+ *     columns - the list of column chunks metadata with the next structure:
+ *         name - column name
+ *         path - column path
+ *         total_compressed_size - total compressed bytes size of the column
+ *         total_uncompressed_size - total uncompressed bytes size of the row group
+ *         have_statistics - bool flag that indicates if column chunk metadata contains column statistics
+ *         statistics - column chunk statistics (all fields are NULL if have_statistics = false) with the next structure:
+ *             num_values - the number of non-null values in the column chunk
+ *             null_count - the number of NULL values in the column chunk
+ *             distinct_count - the number pf distinct values in the column chunk
+ *             min - the minimum value of the column chunk
+ *             max - the maximum column of the column chunk
+ * */
+
+class ParquetMetadataInputFormat : public IInputFormat
+{
+public:
+    ParquetMetadataInputFormat(ReadBuffer & in_, Block header_, const FormatSettings & format_settings_);
+
+    String getName() const override { return "ParquetMetadataInputFormat"; }
+
+    void resetParser() override;
+
+private:
+    Chunk generate() override;
+
+    void onCancel() override
+    {
+        is_stopped = 1;
+    }
+
+    void fillColumnsMetadata(const std::shared_ptr<parquet::FileMetaData> & metadata, MutableColumnPtr & column);
+    void fillRowGroupsMetadata(const std::shared_ptr<parquet::FileMetaData> & metadata, MutableColumnPtr & column);
+    void fillColumnChunksMetadata(const std::unique_ptr<parquet::RowGroupMetaData> & row_group_metadata, IColumn & column);
+    void fillColumnStatistics(const std::shared_ptr<parquet::Statistics> & statistics, IColumn & column, int32_t type_length);
+
+    const FormatSettings format_settings;
+    bool done = false;
+    std::atomic<int> is_stopped{0};
+};
+
+class ParquetMetadataSchemaReader : public ISchemaReader
+{
+public:
+    ParquetMetadataSchemaReader(ReadBuffer & in_);
+
+    NamesAndTypesList readSchema() override;
+};
+
+}
+
+#endif
diff --git a/tests/queries/0_stateless/02718_parquet_metadata_format.reference b/tests/queries/0_stateless/02718_parquet_metadata_format.reference
new file mode 100644
index 00000000000..5ec8b097cea
--- /dev/null
+++ b/tests/queries/0_stateless/02718_parquet_metadata_format.reference
@@ -0,0 +1,154 @@
+{
+    "num_columns": "3",
+    "num_rows": "100000",
+    "num_row_groups": "2",
+    "format_version": "2.6",
+    "total_byte_size": "314147",
+    "total_compressed_size": "27081",
+    "columns": [
+        {
+            "name": "number",
+            "path": "number",
+            "max_definition_level": "0",
+            "max_repetition_level": "0",
+            "physical_type": "INT32",
+            "logical_type": "Int(bitWidth=16, isSigned=false)",
+            "compression": "LZ4",
+            "encodings": [
+                "RLE_DICTIONARY",
+                "PLAIN",
+                "RLE"
+            ]
+        },
+        {
+            "name": "str",
+            "path": "str",
+            "max_definition_level": "0",
+            "max_repetition_level": "0",
+            "physical_type": "BYTE_ARRAY",
+            "logical_type": "None",
+            "compression": "LZ4",
+            "encodings": [
+                "RLE_DICTIONARY",
+                "PLAIN",
+                "RLE"
+            ]
+        },
+        {
+            "name": "mod",
+            "path": "mod",
+            "max_definition_level": "1",
+            "max_repetition_level": "0",
+            "physical_type": "INT32",
+            "logical_type": "Int(bitWidth=8, isSigned=false)",
+            "compression": "LZ4",
+            "encodings": [
+                "RLE_DICTIONARY",
+                "PLAIN",
+                "RLE"
+            ]
+        }
+    ],
+    "row_groups": [
+        {
+            "num_columns": "3",
+            "num_rows": "65409",
+            "total_byte_size": "200527",
+            "total_compressed_size": "14406",
+            "columns": [
+                {
+                    "name": "number",
+                    "path": "number",
+                    "total_compressed_size": "7070",
+                    "total_uncompressed_size": "85956",
+                    "have_statistics": true,
+                    "statistics": {
+                        "num_values": "65409",
+                        "null_count": "0",
+                        "distinct_count": null,
+                        "min": "0",
+                        "max": "999"
+                    }
+                },
+                {
+                    "name": "str",
+                    "path": "str",
+                    "total_compressed_size": "7093",
+                    "total_uncompressed_size": "93853",
+                    "have_statistics": true,
+                    "statistics": {
+                        "num_values": "65409",
+                        "null_count": "0",
+                        "distinct_count": null,
+                        "min": "Hello0",
+                        "max": "Hello999"
+                    }
+                },
+                {
+                    "name": "mod",
+                    "path": "mod",
+                    "total_compressed_size": "243",
+                    "total_uncompressed_size": "20718",
+                    "have_statistics": true,
+                    "statistics": {
+                        "num_values": "32705",
+                        "null_count": "32704",
+                        "distinct_count": null,
+                        "min": "0",
+                        "max": "8"
+                    }
+                }
+            ]
+        },
+        {
+            "num_columns": "3",
+            "num_rows": "34591",
+            "total_byte_size": "113620",
+            "total_compressed_size": "12675",
+            "columns": [
+                {
+                    "name": "number",
+                    "path": "number",
+                    "total_compressed_size": "6223",
+                    "total_uncompressed_size": "47365",
+                    "have_statistics": true,
+                    "statistics": {
+                        "num_values": "34591",
+                        "null_count": "0",
+                        "distinct_count": null,
+                        "min": "0",
+                        "max": "999"
+                    }
+                },
+                {
+                    "name": "str",
+                    "path": "str",
+                    "total_compressed_size": "6247",
+                    "total_uncompressed_size": "55262",
+                    "have_statistics": true,
+                    "statistics": {
+                        "num_values": "34591",
+                        "null_count": "0",
+                        "distinct_count": null,
+                        "min": "Hello0",
+                        "max": "Hello999"
+                    }
+                },
+                {
+                    "name": "mod",
+                    "path": "mod",
+                    "total_compressed_size": "205",
+                    "total_uncompressed_size": "10993",
+                    "have_statistics": true,
+                    "statistics": {
+                        "num_values": "17295",
+                        "null_count": "17296",
+                        "distinct_count": null,
+                        "min": "0",
+                        "max": "8"
+                    }
+                }
+            ]
+        }
+    ]
+}
diff --git a/tests/queries/0_stateless/02718_parquet_metadata_format.sh b/tests/queries/0_stateless/02718_parquet_metadata_format.sh
new file mode 100755
index 00000000000..f785abde368
--- /dev/null
+++ b/tests/queries/0_stateless/02718_parquet_metadata_format.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_parquet/02718_data.parquet', ParquetMetadata) format JSONEachRow" | python3 -m json.tool 
diff --git a/tests/queries/0_stateless/data_parquet/02718_data.parquet b/tests/queries/0_stateless/data_parquet/02718_data.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..6a930689c83eefbb100f6210a3bab04c7094bfd7
GIT binary patch
literal 28165
zcmeI*30%$j{>Smt;uNCLNr*z7LejpFJ%kXl%rG;d2q{a}F*1b4V2pieOh`g#X0nbY
zq_QO>A*5YG$iAHa>vv{c^PQP{r~iGp_s%>X=RD?_^ZC8M51oF$&d;gO?{mhtV?PIl
zt|F<O;zBzgMTVh5R$C@}q*AFK$hc`i8#>TM7096neN;s?REGg-pe77qgj(S5lhuJS
z6fi+un4%u)qXEp&5RG6C3p7R(G=(LspoBGS&<xGd0=8%gJG6p59N-8iIKu_5aDzKM
z&>C&viMD8m_UM3)=!DMjLKk#JH+Z8vdY~tK&<nlM2fpZwe&`QB48TAP!eD%W4>1Ho
z@ew}8C-BEG495rrAP^%l3PBi+F&K+rj6(=QF&+~z5t9&x$(Vwvn1<<?flo0LpJ5il
zF&m#_4(1{P^DrL^5Q#6a5MN>u7Gnv%!cu&VWmt|CScz3wjWzfNYq1Vd_!jH&9ip)T
z8?gy7*o;_gK^(SX8@3}JJFpYGkbv*;1Aaszc4H6rVjuS70Di(j9KvB7K@yJQ7>*+u
zCy;_vq~Rn^;WW<REYfif890v%xQI+#LKZF~8&_}@*N}tjxPhC<#VzC^9|b5x5sFcQ
z+bG2yl;JM!;Xcao01xp96?lv%c#3Cuju&`|S9py#P`%cX&H>WxftD8M0O&v$RUn5R
z^idVnP#p%SftoOc5o)0}>cAKZn4m69Q4jUe0A^^2Mlgp38lwrC!V*?c!WuSchURDi
zTeO57TEQL;aD)?_;R08<!5toGjW+N^TeL%abU;URLT7lP3%a5kywM#!&=WrBh2H1`
zU-U&k^oJh?U?2uzFh0PC7=oer2p{7U_+uD`V*~;ah>;kDAdJQsj72cUAq1fqj|rHF
zNeIJaOu<x4!*tBRr<jS)Fbm<Bjn6R$a}j}gn2!aB#1~kIFR=)Vu>@aXDZa)sEXNA0
z#44=D8hnGbScfQli}m;p(b#~E*n}8tMl7}<4qLGe+Yyf)*oj?8!1wq8KOzyku?Ksx
z5BqTdKj9z_;V_OM2}f}Z$B~Q^NI@#na1y6*8fS18={ScBoW})RL?$jF3zw0NE4Yel
z$ia2oz)j@h7V?mf0u-VM#VEmTl;RG`a2NM*ALV#}hj@evJjN3|#WOs|3%tZDyv7@-
zApPU71#Rd+7gZpK9`sQa)leM<sDYX=gb`|?HtN6_3YefSOi>T@(Ew&>h(<7n1sbCX
zn!*xRP{JBEXolu!0b8_$9a_O24se7MoZ$jjxWOGBXpJ`TL|e2&dvriYbV6r%p$odA
z8@$mSJ<t<A=!M?s17Gw-KlFzm24EltVK6?xhZusP_y`~46Zm5ohGPT*5Qvc&g&>T^
z7>q?Q#vufu7>@~<h)D>;WK6+SOv7}{z^9mr&oB$&n2pad2XhgDd6<s{h{P9Ih%d1S
zi?IY>VJW`GGAzdmti&p;#u|KswOEHJe2ew?4$;_vjo5@3Y(^}$AP!rx4cifq9oUIo
zNWk~_0Y4%UyRip*u@C!k06*a%4&gA4APGlt49Ag-6G%ZS(r^-|a2jWD7U?*L44lUW
zTtp@=Aq$t0jVri{YskTM+`vub;ui9dj{+2;2*oJDZIt2;%5WF=a3AG(fQNX53OvRW
zJjF9S#|yl~E4;=Vs2*1@=imROS~@)cp#xo1fgF0!M^#iqbr_%qYQhjksD;|717j#)
zg1Rt8J=8}7n4uvW!5kK7j3#IbOISe(YuKO}nxh45(GqrO1$#Ka5l(Q13tZs_cX*&R
z+Q1WS(GKm=0Ugl^o#BNp=!$OeMtAf;Pxzo0dZQ0~(HH&DAAT5sff$6r_y8Yb2!`S#
ze2h=vk6{>&5ePsaMq(6#FdAbp7Qq;Y5QJhpCSW2aAq<l-1yeB%(=h{|VkSPrEQDh=
zKF1u)MFi$yJ{BMnUtl4=#3C%l5`2ZF_!`Tw94oLAtFRht@D0{t9is3p*5f-wV*@r~
z6JoF#vDkt*Y{fQgM?7|5Cw3tL-{S}Th(zqh9_+<F?8gE8go8MQ!#IK@9K|slM>0+z
z1*u5GNu0uIoWWV7;~X+@9v5&CnYe^3Tt+so;3}>m2iI`}H<625$U{B~P>3QFLm^XK
zRaI5Iva?mpd84N=Qpgo@Ggq07t`wT;@(;Lldm_D#zgjI^rfZQT(>3-_($cq#PSQ4T
z&P~!W^0GLpYwYKLw2Em!^ijE4Sng3hiwKKj`j#vFk5#pai9S}%HZk{Db^BzC;|9)|
z{>N*$7epVg=~<C`+^~ag<76W*<6+6Qye&5**Y<I~m0ZWytMLhAKfhro6oUgcoG=+0
zcI!l4|A@vZrU5I4rPK?G*^p8{IPq3WgYn6YQ_aFMhov^0R<I$p(aeflsphkFo1|Gp
z7!OZt9BH{Rt;r(iytJlEy_%e~T;VtTq}7^$jVF~+VR<L5qa&J}vWZzS{8Y2Jn2o2J
z$0z2UYLSrK<g{&K=J3-k_Z4hBZFjID?{upq-KJ;klZ{85aY(b=bjI<FbN(5p46mkV
zoiqJLoOQ_#*mTx4CoKQ0TW&<tboYW4Bho!eVm76>E=$Z$Z&RM!^qgl!=7@7`pA~F6
z*X~tC{<-$DDwY`?bQJ*^9rdkZGCCQ!6l8QZ>SB4`%XmP*`7Wk`G3UFQO)fa!&0?PA
z1#ioh0T;U4Y>v6m!*+MUg`V~&EHC;vUkbR`%e^q>VsFpK1sD5tsA84r>!k?H?CWi{
zIkTUSOJQbz-!4{{{QL$4UK%hsaPy^sLnjwr8stCEDr<1S%D}7-f;Ml?`Y?ERVb+lG
zC#)_H4Z9S0`J-utn=gMn^Ks$jPi9w9X8T7dMrIF-w2I9hzR0C0d&JT%$}0gY28_HC
zxF#_6%E+k6MOQ{e&r@Cvidi}G>gc%5u~)~$?=HGJHsOTwT5#f}k=MrUD~!Dsa`17{
zwa}z0);Z&o6{B({q*-mrnRv#fIA>Bu7whX`nFB^$pPU`I<@%JI$;H>F=FYReF|A<b
zs2kHuHgCBxqilEajZe!@Sl^skacR`e&%&P-Zn-(@)#Kuu;WD{R?rdF?pxn>(m2tUq
z3|vcc=NfgjxfNkNFzD7i(~)tv=9^6^xwXJzzD-`F<*J~(FKlAt@)p|eDarfNKE)<~
zk#km1{$lr{xcnuaPfGH?>L71cu++<Bbivo&%B=;<d|YoAEcfl&tZ;?jz|n;(2anuZ
zxN7K>+l8zB=Qk@_6R>J@(KkV{TZ`5P@3~#HZhT6!;;68!(Z%0RE81GTe&&<g#ox`A
zH!q2fFd0*_AyT=mWaA>&(vnR}yEeZavtr<w+nd*n+;%%QYD($tEz$Fvm&V1c8dJJ8
zE_Pe#w)j1zrP~uyn%{{}%o=lN$G)O%cXl3pQhH}slDtJ(LbA!&vhUNB+sl49<9es;
z$BeEm?j~jq9D8?n_Q>sb_vB2ub9Zm<{1*526|5S2Z+}Vb_In4)_T0JmQ+Y~@`v)ts
z#@;{ltZ4iF!>^v)xqn2aXIq}6TQ|7;sJ?Z4`7r~xvhw3b-E1Eu8xIP8aKdy{{DTy;
zsbvpREf&~5OtV}a{P3jBmiULKZ1<KuJZ+z9`{<1G<={tW-HYQNrF%XtdvvaYUdxIM
zue#$Z&U;($sJP(ccDLf9Z?~3@GyMjQdwgl|s2z{9hEBcv__F_kmQS(+R*!pfC1}fz
zCs%{_-hFayd}_<5IboN_J-t4yc*oNlGoRjldULj(-Lu??x*^YQMOyECmbb|5-n0Cr
z-RzzhtQZvXyl~B^ozIJ+rrvvA9KFEqMM=!+kQcY(w(NXS8o&46i#rLab}!2kFNeIm
zyRUfX%X<f(-g|jJNw3wb^5nXquO6gX?|Sv{jNAQJk21QodR>t@DD?H?>`}X3KgpST
z|Mk<{1+CsZD_9-+=6SxZd3b@YN;O2DuWKmX8Oj~=@U*HbRbP35t~|BsuL>!Nuc=aX
zW2KgQomefcqf)gctFF$(D$G=+aw1cxGqD=nP^D@{X0Fb}%Cm_|Wlq*ior#r&wMwNR
zb5dtwrE9BFRVQn!&cv$3UZv6^>!!}c>Wqu(2~l5lBoqm)Rb?d7o-z*?iWTiuc_e}A
zL@1_tsj^8z)rn9H?XF5EnXXRW9s%?44!u>$Y@Vy${Ewp`eMZavsskj8)rq*@^#`jG
zh*qm3b+NS0bT<5a9Lk2OVktJLQ=!Nhrivoju1<s^ainS)$!>Kb6l=z+B1w*@6QP(n
zUNwv4q&g7_|H-N_l8fp@D7<E<#*kcBCqiKpuJR`-RwqJX5TP1K@<5#kMR}ykhvcO?
z5sJ*ks!k*-xjGW7ealqtQf8#i-W`gl)hat_dwunG;aJR$Qdvror8*K1M!*JDeJQh3
zXX2B<J62^RW$x-sUGYqiHvRmxu-&ebOL-@CE>=bfFQv#w9SKE6;sYres7{0;dw;Q%
z_^T74IC%KFl#Ed)+)<wZ%)_IPUz9e7sW%H>azxrmDVe2Cq=K(nI&?pu2ZGKXk<v(Y
zDil5!c1y`Jbs`k@m$yqvlsXX#<D3mr601&x;#ux$DM?T#LXlIrSV|746QM{dohv2D
z>O?4F?n}=SRk}Li&M!M5vLaO4oUPt073_#~>0~H94TGNtN@3pb3*R?Gq@+xp{I-w>
zKbJ>V7%r$#<*E0w{NQ0j+wBUucdR^MgM4<BJZiOk-!ggTVtIL_+#tfxYlfEGf3jiO
zP;<HdV8af*wd6y)8xH9{_ubk0*Y|`YB^2WKrdWyJn_?xNpv6i&L5r1mf)*?B1T9wL
z30kbwXW`Q>L(Ie9-ZSLRQw~cv!_=x;=7CgKq~DzWd{ZWem?^aDxfsdh^-Sd<lgAAY
zoT#X2!s~sL$RpKE4u&XSwNvW<_Emp9Te^DE@7nla#2@^upsOg@ucdhSuFbbu&Svkx
z;NVdEYBGfb9N`3KxWE-|aIYq_^w0~CDIM&q%S;^L2q!qh1+H*|dv%#Sgt(HNSXUX&
ztg8$c)>Vcp>ng*Ib(P`Hy2|ii-Jfvv@X)`ZQ`g==R@VWJaDp>j;0iam8_4vgv80k5
zSyvfOtg8%X)>VcJ>ng*Qb(P`9y2@~8U1fN%?oa3?=qerUr^)^wAIjO;3A+C)){f4(
zT0H9i)#v|bpV!*a&0FVvKQ1S}PL2QQK2SQj+f=FaPIl&iX#6GifzsK71Lwj4(fI4^
z16PiR#$RV2xN$r*{yO`>o#UbL*VzXi9FM;NrH6AulfT0Q{dXUuf`i~29O~iW@z2_R
z`syF%Szs#v`Nw*GzSlJWG|fLv^H0<K(=`7y&A)e@K%w6!6hgmGD1?5WP-vQeQY%I1
z_X&m2?-L54-zOA8zfUN{cAr>jnt$*5UYh0~-;|o>pLw|0?h`Aq-6vLJyHBjdcAr>@
z?LM)3+w!Yy^Y8EtEi;9|FWo<#NnvkWe+Ld!Hz^HKmhRM2W`;`5zu&+1|HBrbjl$MP
zQB6JNS6)SGFiNdK=|VxN7pM<@wn#gstw`4vE-9R?(y`H%;(uIJ7&^&<YeWZV$yyk8
zkVzL9{;OLJ^T75NjZ`{XI^mIBy_#;Ubv`o4%ehJYT8j;3>&(M<%QjR=l$nQ5*V@vD
z&Ea<1>#I!H>chJ5huSgP$uh$|I{)gg|4o)>J}J_6xo>0;Uzai|pB?zNeX27qG~k!s
zY0hzNlOi4E##}As)tg#ouD|^X|8VayCYkfDfBJSrN~LP88Dh~4vDD|O)Bt8^h(<7n
z1sbCXn!*xRP{JBEXolu!0b8_$9a_O24se7MoZ$jjxWOGBXpJ`TL|e2&dvriYbV6r%
zp$odA8@$mSJ<t<A=!M?s17Gw-KlFzm24EltVK6?xhZusP_y`~46Zm5ohGPT*5Qvc&
zg&>T^7>q?Q#vufu7>@~<h)D>;WK6+SOv7}{z^9mr&oB$&n2pad2XhgDd6<s{h{P9I
zh%d1Si?IY>VJW`GGAzdmti&p;#u|KswOEHJe2ew?4$;_vjo5@3Y(^}$AP!rx4cifq
z9oUIoNWk~_0Y4%UyRip*u@C!k06*a%4&gA4APGlt49Ag-6G%ZS(r^-|a2jWD7U?*L
z44lUWTtp@=Aq$t0jVri{YskTM+`vub;ui9dj{+2;2*oJDZIt2;%5WF=a3AG(fQNX5
z3OvRWJjF9S#|yl~E4;=Vs37B7cr9o{2fC;NIrN~9s;GwQFhC8|gdvPj3$;-P#!$cn
zbzzEnsE-CPLqjxzIV{i^P0$pUu!0iSut764M+?}ZCG5}&_HckBoZt)>xWWzY@IY&{
zfhXFc9onM<I-(Og!wX%|72V*C?&yJ@@If#1Mj!a1FZ!WB{4f9mF$jb40Y1bK48=$I
z7@xo&!!R5p5P(38#3%$|G{#^of-w#u2*r3zz(h<!7$##1reYeVV+KCOOnin}2*+%E
zjyaf%2+YHLEI=f_z(Rb9MOcg__zFw$HI`vHR$wJoVKvs^8?41TMB!Vk$9IUv25iJ8
z#9%XGu?2D1if!1Ac<jJV>_P&*#}D`siP()j*o%GGj|2D#2XP38aRf;?ieosAWSl?>
zQjvy}IEB+VgR@A-Ib`5GF5n_EaS2(tjBH%NRa`?3uHy!7A{V!ihkO*E5Jf14blajQ
z&6;uZl}THq7o^gwQ|U#i9;8>ERZ$JqVSpN_2}2m67HXpojG=%D>cSNDP#+CohK6Va
zb6B7;nxH8xVFe|uVS{F9jux;*OW2_m?BM`MIKde%aD^M(;epm@15dO?JG4g!bVMg~
zh8Mb^E4slO-O&R*;e%f2jXv;2U-UzN_+bDBVh{%71AK@f7>bYZF+PDmhG95HAOL|F
ziBSl`XpF&F1Y;aR5Q_1bfQgudFigf2OvN-z#|(UmnfMH|5RTdS9CI)i5txVhSb#`;
zfra=Ii?A3=@D-NgYb?WZtiVdF!fLF+H&}~xh{Cs6kM9tT4cLfHh{0yWVhiH172B{K
z@z{Z#*o6dqk00<O60sY5uowHV9|!Og4&o3F;|P*)6vuEJ$vA-&q#_L`aSEq#24|6u
zbI8DXT);(S;u5lO8QHjktGI?7T*nREL@sV25BVrSA&O9p65K{9?w|~JaS!)Vjt6*%
zN2tJKJi${u!*jgAOT5BsynzZbE&hWxbfAkWkV6mpsETT+4g=IcO&G!mwNM*%U<?IJ
zP#31Chx%v$Gc-gan8N~%(F9Fl2`eaJ4I4B=bF_dhTEY&kU=Ig4!U@iBfh*kL4iB_O
z8+f8E+Mzu<pd&h=GrZ6RUC|BR=#C!f2_N)AZ}fpL`l28D!w>vuu6dsc_Q7hb0l(PD
ziK(K<Y;3DI9Hpz7mDJ2iYGx%hvyz%wNzJU}+sR4w@8<v8vywvZR2Y{O3SnGQD1JXJ
z`F3<t7>*PQVK`DKgyBe`5QZa#LKuz|3Sl@>D1`o~Pze1|p%D6`LLu}=g+gqPij~+b
z6)SOCQmn*rNwE^gC55W*FXNH}eHm1@lqoAtXus=Byc_J|#fiznf+{BKhbc2RX)Ch|
zXikXl=<tWF32C<L&AZ-2iaxKZ)fa&|+j|(utQ{&jPgI#1A!$uzW`v|Qm6;Ke)>LLj
zNLs^;kkZvbSNl(gg(@>7B(15;kdU<IPlkk~+yqJ&X;P??AKqS;Z|(A@V_fAjX>dk!
z8UMe(4>XtD|8*|8myU(zlKa2bKG0lp|JS+XUOE<kgG=tE$H-;(HpYLK2m9|nPA{&h
zmnSw*Xy)Yq$=rNpX5^(cm6?&3*8Itgyp-!b)UWHH@#}q{ba1WKNd4j0TmO6aIcxn7
z$KL<$pIhnZ#<cGH-hrCg`2Xl^yfha7-sa;K%*`|5<!tZ%&zcp#zsgfH;iY+dN6p(i
zYTn*a^Y)IKw|D&Ydp!z6T|yxYbqR$q)Fl)`mqI9nE`?ACT?(Plv@FcSg)W6qh)oKy
z5}OoaB{nI<N^DYymC&RJ7pmX3EUw1s{B9aW>Q@}RDK|+UrrbGSN4Y3f`#q1Nq;Jzw
z%wZhmS66LBFVX((I7+ixTuQd>kH=9CnfFy08LM=fzvuCjNb_c1CYHYHi5BIu9qgOm
z44O#$CU3iz$yyy{(l#CCv#mO2uD^Zkf7H#X=i?$Xl}{QG6gX~}LeAUxSyBG{)_+!}
zKJj&oYSZ0O7_e0u36gScQ)#xv%7jo~3QdyM8q~9`=lQ<gJy7{x2a%NTZz{)R{UKB7
z%@CFPCW~%q8FQVN=lkreDc|q6{CoT^F^Rih!-Q`zsY7ToKSj$}TV~T_{IH1=CI^PK
w95H^p#gxE_lSYSzSUA|Z*g3T9-@Bu4fA2ni^*qhg*T3>nJ$2#B{{Zg)0C2T2@c;k-

literal 0
HcmV?d00001