Merge branch 'master' into in-memory-compression

2024-11-21 23:21:59 +00:00 · 2021-02-19 12:01:33 +03:00 · 2021-02-19 12:01:33 +03:00 · a9d9a6d56f
commit a9d9a6d56f
parent 8e8a81883e 4650dcdbb0
86 changed files with 4685 additions and 3087 deletions
--- a/cmake/find/ccache.cmake
+++ b/cmake/find/ccache.cmake
@ -37,15 +37,13 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE)
      #
      # - 4.0+ ccache always includes this environment variable into the hash
      #   of the manifest, which do not allow to use previous cache,
-      # - 4.2+ ccache ignores SOURCE_DATE_EPOCH under time_macros sloppiness.
+      # - 4.2+ ccache ignores SOURCE_DATE_EPOCH for every file w/o __DATE__/__TIME__
      #
      # So for:
-      # - 4.2+ time_macros sloppiness is used,
+      # - 4.2+ does not require any sloppiness
      # - 4.0+ will ignore SOURCE_DATE_EPOCH environment variable.
      if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.2")
-         message(STATUS "Use time_macros sloppiness for ccache")
-         set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_FOUND} --set-config=sloppiness=time_macros")
-         set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK "${CCACHE_FOUND} --set-config=sloppiness=time_macros")
+         message(STATUS "ccache is 4.2+ no quirks for SOURCE_DATE_EPOCH required")
      elseif (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0")
         message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache")
         set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}")
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@ -342,9 +342,10 @@ function run_tests

        # JSON functions
        01666_blns
+        01674_htm_xml_coarse_parse
    )

-    time clickhouse-test --hung-check -j 8 --order=random --use-skip-list --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
+    (time clickhouse-test --hung-check -j 8 --order=random --use-skip-list --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 ||:) | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"

    # substr is to remove semicolon after test name
    readarray -t FAILED_TESTS < <(awk '/\[ FAIL|TIMEOUT|ERROR \]/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt")
--- a/docker/test/stateless/Dockerfile
+++ b/docker/test/stateless/Dockerfile
@ -13,6 +13,7 @@ RUN apt-get update -y \
            ncdu \
            netcat-openbsd \
            openssl \
+            protobuf-compiler \
            python3 \
            python3-lxml \
            python3-requests \
--- a/docs/en/sql-reference/statements/alter/column.md
+++ b/docs/en/sql-reference/statements/alter/column.md
@ -25,6 +25,7 @@ The following actions are supported:
 -   [COMMENT COLUMN](#alter_comment-column) — Adds a text comment to the column.
 -   [MODIFY COLUMN](#alter_modify-column) — Changes column’s type, default expression and TTL.
 -   [MODIFY COLUMN REMOVE](#modify-remove) — Removes one of the column properties.
+-   [RENAME COLUMN](#alter_rename-column) — Renames an existing column.

 These actions are described in detail below.

@ -183,6 +184,22 @@ ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;

 - [REMOVE TTL](ttl.md).

+## RENAME COLUMN {#alter_rename-column}
+
+Renames an existing column.
+
+Syntax:
+
+```sql
+ALTER TABLE table_name RENAME COLUMN column_name TO new_column_name;
+```
+
+**Example**
+
+```sql
+ALTER TABLE table_with_ttl RENAME COLUMN column_ttl TO column_ttl_new;
+```
+
 ## Limitations {#alter-query-limitations}

 The `ALTER` query lets you create and delete separate elements (columns) in nested data structures, but not whole nested data structures. To add a nested data structure, you can add columns with a name like `name.nested_name` and the type `Array(T)`. A nested data structure is equivalent to multiple array columns with a name that has the same prefix before the dot.
--- a/src/Columns/ColumnFixedString.cpp
+++ b/src/Columns/ColumnFixedString.cpp
@ -474,4 +474,19 @@ ColumnPtr ColumnFixedString::compress() const
        });
 }

+
+void ColumnFixedString::alignStringLength(ColumnFixedString::Chars & data, size_t n, size_t old_size)
+{
+    size_t length = data.size() - old_size;
+    if (length < n)
+    {
+        data.resize_fill(old_size + n);
+    }
+    else if (length > n)
+    {
+        data.resize_assume_reserved(old_size);
+        throw Exception("Too large value for FixedString(" + std::to_string(n) + ")", ErrorCodes::TOO_LARGE_STRING_SIZE);
+    }
+}
+
 }
--- a/src/Columns/ColumnFixedString.h
+++ b/src/Columns/ColumnFixedString.h
@ -184,7 +184,8 @@ public:
    const Chars & getChars() const { return chars; }

    size_t getN() const { return n; }
+
+    static void alignStringLength(ColumnFixedString::Chars & data, size_t n, size_t old_size);
 };

-
 }
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@ -404,7 +404,7 @@
    M(432, UNKNOWN_CODEC) \
    M(433, ILLEGAL_CODEC_PARAMETER) \
    M(434, CANNOT_PARSE_PROTOBUF_SCHEMA) \
-    M(435, NO_DATA_FOR_REQUIRED_PROTOBUF_FIELD) \
+    M(435, NO_COLUMN_SERIALIZED_TO_REQUIRED_PROTOBUF_FIELD) \
    M(436, PROTOBUF_BAD_CAST) \
    M(437, PROTOBUF_FIELD_NOT_REPEATED) \
    M(438, DATA_TYPE_CANNOT_BE_PROMOTED) \
@ -412,7 +412,7 @@
    M(440, INVALID_LIMIT_EXPRESSION) \
    M(441, CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING) \
    M(442, BAD_DATABASE_FOR_TEMPORARY_TABLE) \
-    M(443, NO_COMMON_COLUMNS_WITH_PROTOBUF_SCHEMA) \
+    M(443, NO_COLUMNS_SERIALIZED_TO_PROTOBUF_FIELDS) \
    M(444, UNKNOWN_PROTOBUF_FORMAT) \
    M(445, CANNOT_MPROTECT) \
    M(446, FUNCTION_NOT_ALLOWED) \
@ -535,6 +535,8 @@
    M(566, CANNOT_RMDIR) \
    M(567, DUPLICATED_PART_UUIDS) \
    M(568, RAFT_ERROR) \
+    M(569, MULTIPLE_COLUMNS_SERIALIZED_TO_SAME_PROTOBUF_FIELD) \
+    M(570, DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD) \
    \
    M(999, KEEPER_EXCEPTION) \
    M(1000, POCO_EXCEPTION) \
--- a/src/DataTypes/DataTypeAggregateFunction.cpp
+++ b/src/DataTypes/DataTypeAggregateFunction.cpp
@ -10,8 +10,6 @@
 #include <Common/AlignedBuffer.h>

 #include <Formats/FormatSettings.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>
 #include <DataTypes/DataTypeAggregateFunction.h>
 #include <DataTypes/DataTypeFactory.h>
 #include <IO/WriteBufferFromString.h>
@ -261,45 +259,6 @@ void DataTypeAggregateFunction::deserializeTextCSV(IColumn & column, ReadBuffer
 }


-void DataTypeAggregateFunction::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    if (value_index)
-        return;
-    value_index = static_cast<bool>(
-        protobuf.writeAggregateFunction(function, assert_cast<const ColumnAggregateFunction &>(column).getData()[row_num]));
-}
-
-void DataTypeAggregateFunction::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    ColumnAggregateFunction & column_concrete = assert_cast<ColumnAggregateFunction &>(column);
-    Arena & arena = column_concrete.createOrGetArena();
-    size_t size_of_state = function->sizeOfData();
-    AggregateDataPtr place = arena.alignedAlloc(size_of_state, function->alignOfData());
-    function->create(place);
-    try
-    {
-        if (!protobuf.readAggregateFunction(function, place, arena))
-        {
-            function->destroy(place);
-            return;
-        }
-        auto & container = column_concrete.getData();
-        if (allow_add_row)
-        {
-            container.emplace_back(place);
-            row_added = true;
-        }
-        else
-            container.back() = place;
-    }
-    catch (...)
-    {
-        function->destroy(place);
-        throw;
-    }
-}
-
 MutableColumnPtr DataTypeAggregateFunction::createColumn() const
 {
    return ColumnAggregateFunction::create(function);
--- a/src/DataTypes/DataTypeAggregateFunction.h
+++ b/src/DataTypes/DataTypeAggregateFunction.h
@ -59,8 +59,6 @@ public:
    void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
    void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
    void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;

    MutableColumnPtr createColumn() const override;

--- a/src/DataTypes/DataTypeArray.cpp
+++ b/src/DataTypes/DataTypeArray.cpp
@ -6,7 +6,6 @@
 #include <IO/WriteBufferFromString.h>

 #include <Formats/FormatSettings.h>
-#include <Formats/ProtobufReader.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeFactory.h>
@ -522,55 +521,6 @@ void DataTypeArray::deserializeTextCSV(IColumn & column, ReadBuffer & istr, cons
 }


-void DataTypeArray::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
-    const ColumnArray::Offsets & offsets = column_array.getOffsets();
-    size_t offset = offsets[row_num - 1] + value_index;
-    size_t next_offset = offsets[row_num];
-    const IColumn & nested_column = column_array.getData();
-    size_t i;
-    for (i = offset; i < next_offset; ++i)
-    {
-        size_t element_stored = 0;
-        nested->serializeProtobuf(nested_column, i, protobuf, element_stored);
-        if (!element_stored)
-            break;
-    }
-    value_index += i - offset;
-}
-
-
-void DataTypeArray::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    ColumnArray & column_array = assert_cast<ColumnArray &>(column);
-    IColumn & nested_column = column_array.getData();
-    ColumnArray::Offsets & offsets = column_array.getOffsets();
-    size_t old_size = offsets.size();
-    try
-    {
-        bool nested_row_added;
-        do
-            nested->deserializeProtobuf(nested_column, protobuf, true, nested_row_added);
-        while (nested_row_added && protobuf.canReadMoreValues());
-        if (allow_add_row)
-        {
-            offsets.emplace_back(nested_column.size());
-            row_added = true;
-        }
-        else
-            offsets.back() = nested_column.size();
-    }
-    catch (...)
-    {
-        offsets.resize_assume_reserved(old_size);
-        nested_column.popBack(nested_column.size() - offsets.back());
-        throw;
-    }
-}
-
-
 MutableColumnPtr DataTypeArray::createColumn() const
 {
    return ColumnArray::create(nested->createColumn(), ColumnArray::ColumnOffsets::create());
--- a/src/DataTypes/DataTypeArray.h
+++ b/src/DataTypes/DataTypeArray.h
@ -85,15 +85,6 @@ public:
            DeserializeBinaryBulkStatePtr & state,
            SubstreamsCache * cache) const override;

-    void serializeProtobuf(const IColumn & column,
-                           size_t row_num,
-                           ProtobufWriter & protobuf,
-                           size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column,
-                             ProtobufReader & protobuf,
-                             bool allow_add_row,
-                             bool & row_added) const override;
-
    MutableColumnPtr createColumn() const override;

    Field getDefault() const override;
--- a/src/DataTypes/DataTypeDate.cpp
+++ b/src/DataTypes/DataTypeDate.cpp
@ -4,8 +4,6 @@
 #include <Columns/ColumnsNumber.h>
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeFactory.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>

 #include <Common/assert_cast.h>

@ -81,30 +79,6 @@ void DataTypeDate::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const
    assert_cast<ColumnUInt16 &>(column).getData().push_back(value.getDayNum());
 }

-void DataTypeDate::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    if (value_index)
-        return;
-    value_index = static_cast<bool>(protobuf.writeDate(DayNum(assert_cast<const ColumnUInt16 &>(column).getData()[row_num])));
-}
-
-void DataTypeDate::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    DayNum d;
-    if (!protobuf.readDate(d))
-        return;
-
-    auto & container = assert_cast<ColumnUInt16 &>(column).getData();
-    if (allow_add_row)
-    {
-        container.emplace_back(d);
-        row_added = true;
-    }
-    else
-        container.back() = d;
-}
-
 bool DataTypeDate::equals(const IDataType & rhs) const
 {
    return typeid(rhs) == typeid(*this);
--- a/src/DataTypes/DataTypeDate.h
+++ b/src/DataTypes/DataTypeDate.h
@ -24,8 +24,6 @@ public:
    void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
    void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
    void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;

    bool canBeUsedAsVersion() const override { return true; }
    bool canBeInsideNullable() const override { return true; }
--- a/src/DataTypes/DataTypeDateTime.cpp
+++ b/src/DataTypes/DataTypeDateTime.cpp
@ -5,8 +5,6 @@
 #include <common/DateLUT.h>
 #include <DataTypes/DataTypeFactory.h>
 #include <Formats/FormatSettings.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>
 #include <IO/Operators.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteBufferFromString.h>
@ -164,32 +162,6 @@ void DataTypeDateTime::deserializeTextCSV(IColumn & column, ReadBuffer & istr, c
    assert_cast<ColumnType &>(column).getData().push_back(x);
 }

-void DataTypeDateTime::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    if (value_index)
-        return;
-
-    // On some platforms `time_t` is `long` but not `unsigned int` (UInt32 that we store in column), hence static_cast.
-    value_index = static_cast<bool>(protobuf.writeDateTime(static_cast<time_t>(assert_cast<const ColumnType &>(column).getData()[row_num])));
-}
-
-void DataTypeDateTime::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    time_t t;
-    if (!protobuf.readDateTime(t))
-        return;
-
-    auto & container = assert_cast<ColumnType &>(column).getData();
-    if (allow_add_row)
-    {
-        container.emplace_back(t);
-        row_added = true;
-    }
-    else
-        container.back() = t;
-}
-
 bool DataTypeDateTime::equals(const IDataType & rhs) const
 {
    /// DateTime with different timezones are equal, because:
--- a/src/DataTypes/DataTypeDateTime.h
+++ b/src/DataTypes/DataTypeDateTime.h
@ -68,8 +68,6 @@ public:
    void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
    void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
    void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;

    bool canBeUsedAsVersion() const override { return true; }
    bool canBeInsideNullable() const override { return true; }
--- a/src/DataTypes/DataTypeDateTime64.cpp
+++ b/src/DataTypes/DataTypeDateTime64.cpp
@ -6,8 +6,6 @@
 #include <common/DateLUT.h>
 #include <DataTypes/DataTypeFactory.h>
 #include <Formats/FormatSettings.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>
 #include <IO/Operators.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteBufferFromString.h>
@ -182,30 +180,6 @@ void DataTypeDateTime64::deserializeTextCSV(IColumn & column, ReadBuffer & istr,
    assert_cast<ColumnType &>(column).getData().push_back(x);
 }

-void DataTypeDateTime64::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    if (value_index)
-        return;
-    value_index = static_cast<bool>(protobuf.writeDateTime64(assert_cast<const ColumnType &>(column).getData()[row_num], scale));
-}
-
-void DataTypeDateTime64::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    DateTime64 t = 0;
-    if (!protobuf.readDateTime64(t, scale))
-        return;
-
-    auto & container = assert_cast<ColumnType &>(column).getData();
-    if (allow_add_row)
-    {
-        container.emplace_back(t);
-        row_added = true;
-    }
-    else
-        container.back() = t;
-}
-
 bool DataTypeDateTime64::equals(const IDataType & rhs) const
 {
    if (const auto * ptype = typeid_cast<const DataTypeDateTime64 *>(&rhs))
--- a/src/DataTypes/DataTypeDateTime64.h
+++ b/src/DataTypes/DataTypeDateTime64.h
@ -42,8 +42,6 @@ public:
    void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
    void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
    void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;

    bool equals(const IDataType & rhs) const override;

--- a/src/DataTypes/DataTypeDecimalBase.cpp
+++ b/src/DataTypes/DataTypeDecimalBase.cpp
@ -4,8 +4,6 @@
 #include <Common/typeid_cast.h>
 #include <Core/DecimalFunctions.h>
 #include <DataTypes/DataTypeFactory.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
 #include <Interpreters/Context.h>
--- a/src/DataTypes/DataTypeEnum.cpp
+++ b/src/DataTypes/DataTypeEnum.cpp
@ -1,7 +1,5 @@
 #include <IO/WriteBufferFromString.h>
 #include <Formats/FormatSettings.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>
 #include <DataTypes/DataTypeEnum.h>
 #include <DataTypes/DataTypeFactory.h>
 #include <Parsers/IAST.h>
@ -254,34 +252,6 @@ void DataTypeEnum<Type>::deserializeBinaryBulk(
    x.resize(initial_size + size / sizeof(FieldType));
 }

-template <typename Type>
-void DataTypeEnum<Type>::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    if (value_index)
-        return;
-    protobuf.prepareEnumMapping(values);
-    value_index = static_cast<bool>(protobuf.writeEnum(assert_cast<const ColumnType &>(column).getData()[row_num]));
-}
-
-template<typename Type>
-void DataTypeEnum<Type>::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    protobuf.prepareEnumMapping(values);
-    row_added = false;
-    Type value;
-    if (!protobuf.readEnum(value))
-        return;
-
-    auto & container = assert_cast<ColumnType &>(column).getData();
-    if (allow_add_row)
-    {
-        container.emplace_back(value);
-        row_added = true;
-    }
-    else
-        container.back() = value;
-}
-
 template <typename Type>
 Field DataTypeEnum<Type>::getDefault() const
 {
--- a/src/DataTypes/DataTypeEnum.h
+++ b/src/DataTypes/DataTypeEnum.h
@ -132,9 +132,6 @@ public:
    void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, const size_t offset, size_t limit) const override;
    void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, const size_t limit, const double avg_value_size_hint) const override;

-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
-
    MutableColumnPtr createColumn() const override { return ColumnType::create(); }

    Field getDefault() const override;
--- a/src/DataTypes/DataTypeFixedString.cpp
+++ b/src/DataTypes/DataTypeFixedString.cpp
@ -2,8 +2,6 @@
 #include <Columns/ColumnConst.h>

 #include <Formats/FormatSettings.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>
 #include <DataTypes/DataTypeFixedString.h>
 #include <DataTypes/DataTypeFactory.h>

@ -25,7 +23,6 @@ namespace DB
 namespace ErrorCodes
 {
    extern const int CANNOT_READ_ALL_DATA;
-    extern const int TOO_LARGE_STRING_SIZE;
    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
    extern const int UNEXPECTED_AST_STRUCTURE;
 }
@ -127,16 +124,7 @@ static inline void alignStringLength(const DataTypeFixedString & type,
                                     ColumnFixedString::Chars & data,
                                     size_t string_start)
 {
-    size_t length = data.size() - string_start;
-    if (length < type.getN())
-    {
-        data.resize_fill(string_start + type.getN());
-    }
-    else if (length > type.getN())
-    {
-        data.resize_assume_reserved(string_start);
-        throw Exception("Too large value for " + type.getName(), ErrorCodes::TOO_LARGE_STRING_SIZE);
-    }
+    ColumnFixedString::alignStringLength(data, type.getN(), string_start);
 }

 template <typename Reader>
@ -215,53 +203,6 @@ void DataTypeFixedString::deserializeTextCSV(IColumn & column, ReadBuffer & istr
 }


-void DataTypeFixedString::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    if (value_index)
-        return;
-    const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]);
-    value_index = static_cast<bool>(protobuf.writeString(StringRef(pos, n)));
-}
-
-
-void DataTypeFixedString::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    auto & column_string = assert_cast<ColumnFixedString &>(column);
-    ColumnFixedString::Chars & data = column_string.getChars();
-    size_t old_size = data.size();
-    try
-    {
-        if (allow_add_row)
-        {
-            if (protobuf.readStringInto(data))
-            {
-                alignStringLength(*this, data, old_size);
-                row_added = true;
-            }
-            else
-                data.resize_assume_reserved(old_size);
-        }
-        else
-        {
-            ColumnFixedString::Chars temp_data;
-            if (protobuf.readStringInto(temp_data))
-            {
-                alignStringLength(*this, temp_data, 0);
-                column_string.popBack(1);
-                old_size = data.size();
-                data.insertSmallAllowReadWriteOverflow15(temp_data.begin(), temp_data.end());
-            }
-        }
-    }
-    catch (...)
-    {
-        data.resize_assume_reserved(old_size);
-        throw;
-    }
-}
-
-
 MutableColumnPtr DataTypeFixedString::createColumn() const
 {
    return ColumnFixedString::create(n);
--- a/src/DataTypes/DataTypeFixedString.h
+++ b/src/DataTypes/DataTypeFixedString.h
@ -66,9 +66,6 @@ public:
    void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
    void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;

-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
-
    MutableColumnPtr createColumn() const override;

    Field getDefault() const override;
--- a/src/DataTypes/DataTypeLowCardinality.cpp
+++ b/src/DataTypes/DataTypeLowCardinality.cpp
@ -808,31 +808,6 @@ void DataTypeLowCardinality::serializeTextXML(const IColumn & column, size_t row
    serializeImpl(column, row_num, &IDataType::serializeAsTextXML, ostr, settings);
 }

-void DataTypeLowCardinality::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    serializeImpl(column, row_num, &IDataType::serializeProtobuf, protobuf, value_index);
-}
-
-void DataTypeLowCardinality::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    if (allow_add_row)
-    {
-        deserializeImpl(column, &IDataType::deserializeProtobuf, protobuf, true, row_added);
-        return;
-    }
-
-    row_added = false;
-    auto & low_cardinality_column= getColumnLowCardinality(column);
-    auto  nested_column = low_cardinality_column.getDictionary().getNestedColumn();
-    auto temp_column = nested_column->cloneEmpty();
-    size_t unique_row_number = low_cardinality_column.getIndexes().getUInt(low_cardinality_column.size() - 1);
-    temp_column->insertFrom(*nested_column, unique_row_number);
-    bool dummy;
-    dictionary_type.get()->deserializeProtobuf(*temp_column, protobuf, false, dummy);
-    low_cardinality_column.popBack(1);
-    low_cardinality_column.insertFromFullColumn(*temp_column, 0);
-}
-
 template <typename... Params, typename... Args>
 void DataTypeLowCardinality::serializeImpl(
    const IColumn & column, size_t row_num, DataTypeLowCardinality::SerializeFunctionPtr<Params...> func, Args &&... args) const
--- a/src/DataTypes/DataTypeLowCardinality.h
+++ b/src/DataTypes/DataTypeLowCardinality.h
@ -67,8 +67,6 @@ public:
    void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
    void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
    void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;

    MutableColumnPtr createColumn() const override;

--- a/src/DataTypes/DataTypeMap.cpp
+++ b/src/DataTypes/DataTypeMap.cpp
@ -336,16 +336,6 @@ void DataTypeMap::deserializeBinaryBulkWithMultipleStreamsImpl(
    nested->deserializeBinaryBulkWithMultipleStreams(column_map.getNestedColumnPtr(), limit, settings, state, cache);
 }

-void DataTypeMap::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    nested->serializeProtobuf(extractNestedColumn(column), row_num, protobuf, value_index);
-}
-
-void DataTypeMap::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    nested->deserializeProtobuf(extractNestedColumn(column), protobuf, allow_add_row, row_added);
-}
-
 MutableColumnPtr DataTypeMap::createColumn() const
 {
    return ColumnMap::create(nested->createColumn());
--- a/src/DataTypes/DataTypeMap.h
+++ b/src/DataTypes/DataTypeMap.h
@ -76,9 +76,6 @@ public:
           DeserializeBinaryBulkStatePtr & state,
           SubstreamsCache * cache) const override;

-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
-
    MutableColumnPtr createColumn() const override;

    Field getDefault() const override;
@ -92,6 +89,8 @@ public:
    const DataTypePtr & getValueType() const { return value_type; }
    DataTypes getKeyValueTypes() const { return {key_type, value_type}; }

+    const DataTypePtr & getNestedType() const { return nested; }
+
 private:
    template <typename Writer>
    void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, Writer && writer) const;
--- a/src/DataTypes/DataTypeNullable.cpp
+++ b/src/DataTypes/DataTypeNullable.cpp
@ -486,33 +486,6 @@ void DataTypeNullable::serializeTextXML(const IColumn & column, size_t row_num,
        nested_data_type->serializeAsTextXML(col.getNestedColumn(), row_num, ostr, settings);
 }

-void DataTypeNullable::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
-    if (!col.isNullAt(row_num))
-        nested_data_type->serializeProtobuf(col.getNestedColumn(), row_num, protobuf, value_index);
-}
-
-void DataTypeNullable::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    ColumnNullable & col = assert_cast<ColumnNullable &>(column);
-    IColumn & nested_column = col.getNestedColumn();
-    size_t old_size = nested_column.size();
-    try
-    {
-        nested_data_type->deserializeProtobuf(nested_column, protobuf, allow_add_row, row_added);
-        if (row_added)
-            col.getNullMapData().push_back(0);
-    }
-    catch (...)
-    {
-        nested_column.popBack(nested_column.size() - old_size);
-        col.getNullMapData().resize_assume_reserved(old_size);
-        row_added = false;
-        throw;
-    }
-}
-
 MutableColumnPtr DataTypeNullable::createColumn() const
 {
    return ColumnNullable::create(nested_data_type->createColumn(), ColumnUInt8::create());
--- a/src/DataTypes/DataTypeNullable.h
+++ b/src/DataTypes/DataTypeNullable.h
@ -73,9 +73,6 @@ public:
    void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
    void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;

-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
-
    MutableColumnPtr createColumn() const override;

    Field getDefault() const override;
--- a/src/DataTypes/DataTypeNumberBase.cpp
+++ b/src/DataTypes/DataTypeNumberBase.cpp
@ -8,8 +8,6 @@
 #include <Common/typeid_cast.h>
 #include <Common/assert_cast.h>
 #include <Formats/FormatSettings.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>


 namespace DB
@ -205,34 +203,6 @@ void DataTypeNumberBase<T>::deserializeBinaryBulk(IColumn & column, ReadBuffer &
 }


-template <typename T>
-void DataTypeNumberBase<T>::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    if (value_index)
-        return;
-    value_index = static_cast<bool>(protobuf.writeNumber(assert_cast<const ColumnVector<T> &>(column).getData()[row_num]));
-}
-
-
-template <typename T>
-void DataTypeNumberBase<T>::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    T value;
-    if (!protobuf.readNumber(value))
-        return;
-
-    auto & container = typeid_cast<ColumnVector<T> &>(column).getData();
-    if (allow_add_row)
-    {
-        container.emplace_back(value);
-        row_added = true;
-    }
-    else
-        container.back() = value;
-}
-
-
 template <typename T>
 MutableColumnPtr DataTypeNumberBase<T>::createColumn() const
 {
--- a/src/DataTypes/DataTypeNumberBase.h
+++ b/src/DataTypes/DataTypeNumberBase.h
@ -45,9 +45,6 @@ public:
    void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override;
    void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override;

-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
-
    MutableColumnPtr createColumn() const override;

    bool isParametric() const override { return false; }
--- a/src/DataTypes/DataTypeString.cpp
+++ b/src/DataTypes/DataTypeString.cpp
@ -9,8 +9,6 @@
 #include <Core/Field.h>

 #include <Formats/FormatSettings.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypeFactory.h>

@ -311,55 +309,6 @@ void DataTypeString::deserializeTextCSV(IColumn & column, ReadBuffer & istr, con
 }


-void DataTypeString::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    if (value_index)
-        return;
-    value_index = static_cast<bool>(protobuf.writeString(assert_cast<const ColumnString &>(column).getDataAt(row_num)));
-}
-
-
-void DataTypeString::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    auto & column_string = assert_cast<ColumnString &>(column);
-    ColumnString::Chars & data = column_string.getChars();
-    ColumnString::Offsets & offsets = column_string.getOffsets();
-    size_t old_size = offsets.size();
-    try
-    {
-        if (allow_add_row)
-        {
-            if (protobuf.readStringInto(data))
-            {
-                data.emplace_back(0);
-                offsets.emplace_back(data.size());
-                row_added = true;
-            }
-            else
-                data.resize_assume_reserved(offsets.back());
-        }
-        else
-        {
-            ColumnString::Chars temp_data;
-            if (protobuf.readStringInto(temp_data))
-            {
-                temp_data.emplace_back(0);
-                column_string.popBack(1);
-                old_size = offsets.size();
-                data.insertSmallAllowReadWriteOverflow15(temp_data.begin(), temp_data.end());
-                offsets.emplace_back(data.size());
-            }
-        }
-    }
-    catch (...)
-    {
-        offsets.resize_assume_reserved(old_size);
-        data.resize_assume_reserved(offsets.back());
-        throw;
-    }
-}
-
 Field DataTypeString::getDefault() const
 {
    return String();
--- a/src/DataTypes/DataTypeString.h
+++ b/src/DataTypes/DataTypeString.h
@ -47,9 +47,6 @@ public:
    void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
    void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;

-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
-
    MutableColumnPtr createColumn() const override;

    Field getDefault() const override;
--- a/src/DataTypes/DataTypeTuple.cpp
+++ b/src/DataTypes/DataTypeTuple.cpp
@ -504,33 +504,6 @@ void DataTypeTuple::deserializeBinaryBulkWithMultipleStreamsImpl(
    settings.path.pop_back();
 }

-void DataTypeTuple::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    for (; value_index < elems.size(); ++value_index)
-    {
-        size_t stored = 0;
-        elems[value_index]->serializeProtobuf(extractElementColumn(column, value_index), row_num, protobuf, stored);
-        if (!stored)
-            break;
-    }
-}
-
-void DataTypeTuple::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    bool all_elements_get_row = true;
-    addElementSafe(elems, column, [&]
-    {
-        for (const auto & i : ext::range(0, ext::size(elems)))
-        {
-            bool element_row_added;
-            elems[i]->deserializeProtobuf(extractElementColumn(column, i), protobuf, allow_add_row, element_row_added);
-            all_elements_get_row &= element_row_added;
-        }
-    });
-    row_added = all_elements_get_row;
-}
-
 MutableColumnPtr DataTypeTuple::createColumn() const
 {
    size_t size = elems.size();
--- a/src/DataTypes/DataTypeTuple.h
+++ b/src/DataTypes/DataTypeTuple.h
@ -81,9 +81,6 @@ public:
            DeserializeBinaryBulkStatePtr & state,
            SubstreamsCache * cache) const override;

-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
-
    MutableColumnPtr createColumn() const override;

    Field getDefault() const override;
--- a/src/DataTypes/DataTypeUUID.cpp
+++ b/src/DataTypes/DataTypeUUID.cpp
@ -1,8 +1,6 @@
 #include <DataTypes/DataTypeUUID.h>
 #include <DataTypes/DataTypeFactory.h>
 #include <Columns/ColumnsNumber.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>
 #include <IO/WriteHelpers.h>
 #include <IO/ReadHelpers.h>
 #include <Common/assert_cast.h>
@ -79,30 +77,6 @@ void DataTypeUUID::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const
    assert_cast<ColumnUInt128 &>(column).getData().push_back(value);
 }

-void DataTypeUUID::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    if (value_index)
-        return;
-    value_index = static_cast<bool>(protobuf.writeUUID(UUID(assert_cast<const ColumnUInt128 &>(column).getData()[row_num])));
-}
-
-void DataTypeUUID::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    UUID uuid;
-    if (!protobuf.readUUID(uuid))
-        return;
-
-    auto & container = assert_cast<ColumnUInt128 &>(column).getData();
-    if (allow_add_row)
-    {
-        container.emplace_back(uuid);
-        row_added = true;
-    }
-    else
-        container.back() = uuid;
-}
-
 bool DataTypeUUID::equals(const IDataType & rhs) const
 {
    return typeid(rhs) == typeid(*this);
--- a/src/DataTypes/DataTypeUUID.h
+++ b/src/DataTypes/DataTypeUUID.h
@ -26,8 +26,6 @@ public:
    void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
    void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
    void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;

    bool canBeUsedInBitOperations() const override { return true; }
    bool canBeInsideNullable() const override { return true; }
--- a/src/DataTypes/DataTypesDecimal.cpp
+++ b/src/DataTypes/DataTypesDecimal.cpp
@ -4,8 +4,6 @@
 #include <Common/typeid_cast.h>
 #include <Core/DecimalFunctions.h>
 #include <DataTypes/DataTypeFactory.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
 #include <IO/readDecimalText.h>
@ -111,33 +109,6 @@ T DataTypeDecimal<T>::parseFromString(const String & str) const
    return x;
 }

-template <typename T>
-void DataTypeDecimal<T>::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    if (value_index)
-        return;
-    value_index = static_cast<bool>(protobuf.writeDecimal(assert_cast<const ColumnType &>(column).getData()[row_num], this->scale));
-}
-
-
-template <typename T>
-void DataTypeDecimal<T>::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    T decimal;
-    if (!protobuf.readDecimal(decimal, this->precision, this->scale))
-        return;
-
-    auto & container = assert_cast<ColumnType &>(column).getData();
-    if (allow_add_row)
-    {
-        container.emplace_back(decimal);
-        row_added = true;
-    }
-    else
-        container.back() = decimal;
-}
-

 static DataTypePtr create(const ASTPtr & arguments)
 {
--- a/src/DataTypes/DataTypesDecimal.h
+++ b/src/DataTypes/DataTypesDecimal.h
@ -46,9 +46,6 @@ public:
    void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
    void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;

-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
-
    bool equals(const IDataType & rhs) const override;

    T parseFromString(const String & str) const;
--- a/src/DataTypes/IDataType.h
+++ b/src/DataTypes/IDataType.h
@ -26,9 +26,6 @@ class Field;
 using DataTypePtr = std::shared_ptr<const IDataType>;
 using DataTypes = std::vector<DataTypePtr>;

-class ProtobufReader;
-class ProtobufWriter;
-
 struct NameAndTypePair;


@ -235,10 +232,6 @@ public:
    /// If method will throw an exception, then column will be in same state as before call to method.
    virtual void deserializeBinary(IColumn & column, ReadBuffer & istr) const = 0;

-    /** Serialize to a protobuf. */
-    virtual void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const = 0;
-    virtual void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const = 0;
-
    /** Text serialization with escaping but without quoting.
      */
    void serializeAsTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const;
--- a/src/DataTypes/IDataTypeDummy.h
+++ b/src/DataTypes/IDataTypeDummy.h
@ -34,8 +34,6 @@ public:
    void deserializeBinaryBulk(IColumn &, ReadBuffer &, size_t, double) const override      { throwNoSerialization(); }
    void serializeText(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
    void deserializeText(IColumn &, ReadBuffer &, const FormatSettings &) const override    { throwNoSerialization(); }
-    void serializeProtobuf(const IColumn &, size_t, ProtobufWriter &, size_t &) const override { throwNoSerialization(); }
-    void deserializeProtobuf(IColumn &, ProtobufReader &, bool, bool &) const override      { throwNoSerialization(); }

    MutableColumnPtr createColumn() const override
    {
--- a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp
+++ b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp
@ -401,10 +401,16 @@ void buildConfigurationFromFunctionWithKeyValueArguments(
        {
            auto builder = FunctionFactory::instance().tryGet(func->name, context);
            auto function = builder->build({});
-            auto result = function->execute({}, {}, 0);
+            function->prepare({});
+
+            /// We assume that function will not take arguments and will return constant value like tcpPort or hostName
+            /// Such functions will return column with size equal to input_rows_count.
+            size_t input_rows_count = 1;
+            auto result = function->execute({}, function->getResultType(), input_rows_count);

            Field value;
            result->get(0, value);
+
            AutoPtr<Text> text_value(doc->createTextNode(getFieldAsString(value)));
            current_xml_element->appendChild(text_value);
        }
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@ -120,7 +120,6 @@ struct FormatSettings

    struct
    {
-        bool write_row_delimiters = true;
        /**
         * Some buffers (kafka / rabbit) split the rows internally using callback,
         * and always send one row per message, so we can push there formats
@ -128,7 +127,7 @@ struct FormatSettings
         * we have to enforce exporting at most one row in the format output,
         * because Protobuf without delimiters is not generally useful.
         */
-        bool allow_many_rows_no_delimiters = false;
+        bool allow_multiple_rows_without_delimiter = false;
    } protobuf;

    struct
--- a/src/Formats/ProtobufColumnMatcher.cpp
+++ b/src/Formats/ProtobufColumnMatcher.cpp
@ -1,55 +0,0 @@
-#include "ProtobufColumnMatcher.h"
-#if USE_PROTOBUF
-#include <Common/Exception.h>
-#include <google/protobuf/descriptor.pb.h>
-#include <Poco/String.h>
-
-
-namespace DB
-{
-namespace ErrorCodes
-{
-    extern const int NO_COMMON_COLUMNS_WITH_PROTOBUF_SCHEMA;
-}
-
-
-namespace
-{
-    String columnNameToSearchableForm(const String & str)
-    {
-        return Poco::replace(Poco::toUpper(str), ".", "_");
-    }
-}
-
-namespace ProtobufColumnMatcher
-{
-    namespace details
-    {
-        ColumnNameMatcher::ColumnNameMatcher(const std::vector<String> & column_names) : column_usage(column_names.size())
-        {
-            column_usage.resize(column_names.size(), false);
-            for (size_t i = 0; i != column_names.size(); ++i)
-                column_name_to_index_map.emplace(columnNameToSearchableForm(column_names[i]), i);
-        }
-
-        size_t ColumnNameMatcher::findColumn(const String & field_name)
-        {
-            auto it = column_name_to_index_map.find(columnNameToSearchableForm(field_name));
-            if (it == column_name_to_index_map.end())
-                return -1;
-            size_t column_index = it->second;
-            if (column_usage[column_index])
-                return -1;
-            column_usage[column_index] = true;
-            return column_index;
-        }
-
-        void throwNoCommonColumns()
-        {
-            throw Exception("No common columns with provided protobuf schema", ErrorCodes::NO_COMMON_COLUMNS_WITH_PROTOBUF_SCHEMA);
-        }
-    }
-}
-
-}
-#endif
--- a/src/Formats/ProtobufColumnMatcher.h
+++ b/src/Formats/ProtobufColumnMatcher.h
@ -1,196 +0,0 @@
-#pragma once
-
-#if !defined(ARCADIA_BUILD)
-#    include "config_formats.h"
-#endif
-
-#if USE_PROTOBUF
-#    include <memory>
-#    include <unordered_map>
-#    include <vector>
-#    include <common/types.h>
-#    include <boost/blank.hpp>
-#    include <google/protobuf/descriptor.h>
-#    include <google/protobuf/descriptor.pb.h>
-
-namespace google
-{
-namespace protobuf
-{
-    class Descriptor;
-    class FieldDescriptor;
-}
-}
-
-
-namespace DB
-{
-namespace ProtobufColumnMatcher
-{
-    struct DefaultTraits
-    {
-        using MessageData = boost::blank;
-        using FieldData = boost::blank;
-    };
-
-    template <typename Traits = DefaultTraits>
-    struct Message;
-
-    /// Represents a field in a protobuf message.
-    template <typename Traits = DefaultTraits>
-    struct Field
-    {
-        const google::protobuf::FieldDescriptor * field_descriptor = nullptr;
-
-        /// Same as field_descriptor->number().
-        UInt32 field_number = 0;
-
-        /// Index of a column; either 'column_index' or 'nested_message' is set.
-        size_t column_index = -1;
-        std::unique_ptr<Message<Traits>> nested_message;
-
-        typename Traits::FieldData data;
-    };
-
-    /// Represents a protobuf message.
-    template <typename Traits>
-    struct Message
-    {
-        std::vector<Field<Traits>> fields;
-
-        /// Points to the parent message if this is a nested message.
-        Message * parent = nullptr;
-        size_t index_in_parent = -1;
-
-        typename Traits::MessageData data;
-    };
-
-    /// Utility function finding matching columns for each protobuf field.
-    template <typename Traits = DefaultTraits>
-    static std::unique_ptr<Message<Traits>> matchColumns(
-        const std::vector<String> & column_names,
-        const google::protobuf::Descriptor * message_type);
-
-    template <typename Traits = DefaultTraits>
-    static std::unique_ptr<Message<Traits>> matchColumns(
-        const std::vector<String> & column_names,
-        const google::protobuf::Descriptor * message_type,
-        std::vector<const google::protobuf::FieldDescriptor *> & field_descriptors_without_match);
-
-    namespace details
-    {
-        [[noreturn]] void throwNoCommonColumns();
-
-        class ColumnNameMatcher
-        {
-        public:
-            ColumnNameMatcher(const std::vector<String> & column_names);
-            size_t findColumn(const String & field_name);
-
-        private:
-            std::unordered_map<String, size_t> column_name_to_index_map;
-            std::vector<bool> column_usage;
-        };
-
-        template <typename Traits>
-        std::unique_ptr<Message<Traits>> matchColumnsRecursive(
-            ColumnNameMatcher & name_matcher,
-            const google::protobuf::Descriptor * message_type,
-            const String & field_name_prefix,
-            std::vector<const google::protobuf::FieldDescriptor *> * field_descriptors_without_match)
-        {
-            auto message = std::make_unique<Message<Traits>>();
-            for (int i = 0; i != message_type->field_count(); ++i)
-            {
-                const google::protobuf::FieldDescriptor * field_descriptor = message_type->field(i);
-                if ((field_descriptor->type() == google::protobuf::FieldDescriptor::TYPE_MESSAGE)
-                    || (field_descriptor->type() == google::protobuf::FieldDescriptor::TYPE_GROUP))
-                {
-                    auto nested_message = matchColumnsRecursive<Traits>(
-                        name_matcher,
-                        field_descriptor->message_type(),
-                        field_name_prefix + field_descriptor->name() + ".",
-                        field_descriptors_without_match);
-                    if (nested_message)
-                    {
-                        message->fields.emplace_back();
-                        auto & current_field = message->fields.back();
-                        current_field.field_number = field_descriptor->number();
-                        current_field.field_descriptor = field_descriptor;
-                        current_field.nested_message = std::move(nested_message);
-                        current_field.nested_message->parent = message.get();
-                    }
-                }
-                else
-                {
-                    size_t column_index = name_matcher.findColumn(field_name_prefix + field_descriptor->name());
-                    if (column_index == static_cast<size_t>(-1))
-                    {
-                        if (field_descriptors_without_match)
-                            field_descriptors_without_match->emplace_back(field_descriptor);
-                    }
-                    else
-                    {
-                        message->fields.emplace_back();
-                        auto & current_field = message->fields.back();
-                        current_field.field_number = field_descriptor->number();
-                        current_field.field_descriptor = field_descriptor;
-                        current_field.column_index = column_index;
-                    }
-                }
-            }
-
-            if (message->fields.empty())
-                return nullptr;
-
-            // Columns should be sorted by field_number, it's necessary for writing protobufs and useful reading protobufs.
-            std::sort(message->fields.begin(), message->fields.end(), [](const Field<Traits> & left, const Field<Traits> & right)
-            {
-                return left.field_number < right.field_number;
-            });
-
-            for (size_t i = 0; i != message->fields.size(); ++i)
-            {
-                auto & field = message->fields[i];
-                if (field.nested_message)
-                    field.nested_message->index_in_parent = i;
-            }
-
-            return message;
-        }
-    }
-
-    template <typename Data>
-    static std::unique_ptr<Message<Data>> matchColumnsImpl(
-        const std::vector<String> & column_names,
-        const google::protobuf::Descriptor * message_type,
-        std::vector<const google::protobuf::FieldDescriptor *> * field_descriptors_without_match)
-    {
-        details::ColumnNameMatcher name_matcher(column_names);
-        auto message = details::matchColumnsRecursive<Data>(name_matcher, message_type, "", field_descriptors_without_match);
-        if (!message)
-            details::throwNoCommonColumns();
-        return message;
-    }
-
-    template <typename Data>
-    static std::unique_ptr<Message<Data>> matchColumns(
-        const std::vector<String> & column_names,
-        const google::protobuf::Descriptor * message_type)
-    {
-        return matchColumnsImpl<Data>(column_names, message_type, nullptr);
-    }
-
-    template <typename Data>
-    static std::unique_ptr<Message<Data>> matchColumns(
-        const std::vector<String> & column_names,
-        const google::protobuf::Descriptor * message_type,
-        std::vector<const google::protobuf::FieldDescriptor *> & field_descriptors_without_match)
-    {
-        return matchColumnsImpl<Data>(column_names, message_type, &field_descriptors_without_match);
-    }
-}
-
-}
-
-#endif
--- a/src/Formats/ProtobufReader.cpp
+++ b/src/Formats/ProtobufReader.cpp
--- a/src/Formats/ProtobufReader.h
+++ b/src/Formats/ProtobufReader.h
@ -1,258 +1,72 @@
 #pragma once

-#include <common/DayNum.h>
-#include <Common/PODArray.h>
-#include <Common/UInt128.h>
-#include <Core/UUID.h>
-
 #if !defined(ARCADIA_BUILD)
-#    include "config_formats.h"
+#   include "config_formats.h"
 #endif

 #if USE_PROTOBUF
-#    include <memory>
-#    include <IO/ReadBuffer.h>
-#    include <boost/noncopyable.hpp>
-#    include "ProtobufColumnMatcher.h"
+#   include <Common/PODArray.h>
+#   include <IO/ReadBuffer.h>

-namespace google
-{
-namespace protobuf
-{
-    class Descriptor;
-}
-}

 namespace DB
 {
-class Arena;
-class IAggregateFunction;
 class ReadBuffer;
-using AggregateDataPtr = char *;
-using AggregateFunctionPtr = std::shared_ptr<IAggregateFunction>;
-
-
-/** Deserializes a protobuf, tries to cast data types if necessarily.
-  */
-class ProtobufReader : private boost::noncopyable
-{
-public:
-    ProtobufReader(ReadBuffer & in_, const google::protobuf::Descriptor * message_type, const std::vector<String> & column_names, const bool use_length_delimiters_);
-    ~ProtobufReader();
-
-    /// Should be called when we start reading a new message.
-    bool startMessage();
-
-    /// Ends reading a message.
-    void endMessage(bool ignore_errors = false);
-
-    /// Reads the column index.
-    /// The function returns false if there are no more columns to read (call endMessage() in this case).
-    bool readColumnIndex(size_t & column_index);
-
-    /// Reads a value which should be put to column at index received with readColumnIndex().
-    /// The function returns false if there are no more values to read now (call readColumnIndex() in this case).
-    bool readNumber(Int8 & value) { return current_converter->readInt8(value); }
-    bool readNumber(UInt8 & value) { return current_converter->readUInt8(value); }
-    bool readNumber(Int16 & value) { return current_converter->readInt16(value); }
-    bool readNumber(UInt16 & value) { return current_converter->readUInt16(value); }
-    bool readNumber(Int32 & value) { return current_converter->readInt32(value); }
-    bool readNumber(UInt32 & value) { return current_converter->readUInt32(value); }
-    bool readNumber(Int64 & value) { return current_converter->readInt64(value); }
-    bool readNumber(UInt64 & value) { return current_converter->readUInt64(value); }
-    bool readNumber(Int128 & value) { return current_converter->readInt128(value); }
-    bool readNumber(UInt128 & value) { return current_converter->readUInt128(value); }
-    bool readNumber(Int256 & value) { return current_converter->readInt256(value); }
-    bool readNumber(UInt256 & value) { return current_converter->readUInt256(value); }
-    bool readNumber(Float32 & value) { return current_converter->readFloat32(value); }
-    bool readNumber(Float64 & value) { return current_converter->readFloat64(value); }
-
-    bool readStringInto(PaddedPODArray<UInt8> & str) { return current_converter->readStringInto(str); }
-
-    void prepareEnumMapping(const std::vector<std::pair<std::string, Int8>> & name_value_pairs) { current_converter->prepareEnumMapping8(name_value_pairs); }
-    void prepareEnumMapping(const std::vector<std::pair<std::string, Int16>> & name_value_pairs) { current_converter->prepareEnumMapping16(name_value_pairs); }
-    bool readEnum(Int8 & value) { return current_converter->readEnum8(value); }
-    bool readEnum(Int16 & value) { return current_converter->readEnum16(value); }
-
-    bool readUUID(UUID & uuid) { return current_converter->readUUID(uuid); }
-    bool readDate(DayNum & date) { return current_converter->readDate(date); }
-    bool readDateTime(time_t & tm) { return current_converter->readDateTime(tm); }
-    bool readDateTime64(DateTime64 & tm, UInt32 scale) { return current_converter->readDateTime64(tm, scale); }
-
-    bool readDecimal(Decimal32 & decimal, UInt32 precision, UInt32 scale) { return current_converter->readDecimal32(decimal, precision, scale); }
-    bool readDecimal(Decimal64 & decimal, UInt32 precision, UInt32 scale) { return current_converter->readDecimal64(decimal, precision, scale); }
-    bool readDecimal(Decimal128 & decimal, UInt32 precision, UInt32 scale) { return current_converter->readDecimal128(decimal, precision, scale); }
-    bool readDecimal(Decimal256 & decimal, UInt32 precision, UInt32 scale) { return current_converter->readDecimal256(decimal, precision, scale); }
-
-    bool readAggregateFunction(const AggregateFunctionPtr & function, AggregateDataPtr place, Arena & arena) { return current_converter->readAggregateFunction(function, place, arena); }
-
-    /// Call it after calling one of the read*() function to determine if there are more values available for reading.
-    bool ALWAYS_INLINE canReadMoreValues() const { return simple_reader.canReadMoreValues(); }
-
-private:
-    class SimpleReader
-    {
-    public:
-        SimpleReader(ReadBuffer & in_, const bool use_length_delimiters_);
-        bool startMessage();
-        void endMessage(bool ignore_errors);
-        void startNestedMessage();
-        void endNestedMessage();
-        bool readFieldNumber(UInt32 & field_number);
-        bool readInt(Int64 & value);
-        bool readSInt(Int64 & value);
-        bool readUInt(UInt64 & value);
-        template<typename T> bool readFixed(T & value);
-        bool readStringInto(PaddedPODArray<UInt8> & str);
-
-        bool ALWAYS_INLINE canReadMoreValues() const { return cursor < field_end; }
-
-    private:
-        void readBinary(void * data, size_t size);
-        void ignore(UInt64 num_bytes);
-        void moveCursorBackward(UInt64 num_bytes);
-
-        UInt64 ALWAYS_INLINE readVarint()
-        {
-            char c;
-            in.readStrict(c);
-            UInt64 first_byte = static_cast<UInt8>(c);
-            ++cursor;
-            if (likely(!(c & 0x80)))
-                return first_byte;
-            return continueReadingVarint(first_byte);
-        }
-
-        UInt64 continueReadingVarint(UInt64 first_byte);
-        void ignoreVarint();
-        void ignoreGroup();
-        [[noreturn]] void throwUnknownFormat() const;
-
-        ReadBuffer & in;
-        Int64 cursor;
-        size_t current_message_level;
-        Int64 current_message_end;
-        std::vector<Int64> parent_message_ends;
-        Int64 field_end;
-        Int64 last_string_pos;
-        const bool use_length_delimiters;
-    };
-
-    class IConverter
-    {
-    public:
-       virtual ~IConverter() = default;
-       virtual bool readStringInto(PaddedPODArray<UInt8> &) = 0;
-       virtual bool readInt8(Int8&) = 0;
-       virtual bool readUInt8(UInt8 &) = 0;
-       virtual bool readInt16(Int16 &) = 0;
-       virtual bool readUInt16(UInt16 &) = 0;
-       virtual bool readInt32(Int32 &) = 0;
-       virtual bool readUInt32(UInt32 &) = 0;
-       virtual bool readInt64(Int64 &) = 0;
-       virtual bool readUInt64(UInt64 &) = 0;
-       virtual bool readInt128(Int128 &) = 0;
-       virtual bool readUInt128(UInt128 &) = 0;
-
-       virtual bool readInt256(Int256 &) = 0;
-       virtual bool readUInt256(UInt256 &) = 0;
-
-       virtual bool readFloat32(Float32 &) = 0;
-       virtual bool readFloat64(Float64 &) = 0;
-       virtual void prepareEnumMapping8(const std::vector<std::pair<std::string, Int8>> &) = 0;
-       virtual void prepareEnumMapping16(const std::vector<std::pair<std::string, Int16>> &) = 0;
-       virtual bool readEnum8(Int8 &) = 0;
-       virtual bool readEnum16(Int16 &) = 0;
-       virtual bool readUUID(UUID &) = 0;
-       virtual bool readDate(DayNum &) = 0;
-       virtual bool readDateTime(time_t &) = 0;
-       virtual bool readDateTime64(DateTime64 &, UInt32) = 0;
-       virtual bool readDecimal32(Decimal32 &, UInt32, UInt32) = 0;
-       virtual bool readDecimal64(Decimal64 &, UInt32, UInt32) = 0;
-       virtual bool readDecimal128(Decimal128 &, UInt32, UInt32) = 0;
-       virtual bool readDecimal256(Decimal256 &, UInt32, UInt32) = 0;
-       virtual bool readAggregateFunction(const AggregateFunctionPtr &, AggregateDataPtr, Arena &) = 0;
-    };
-
-    class ConverterBaseImpl;
-    class ConverterFromString;
-    template<int field_type_id, typename FromType> class ConverterFromNumber;
-    class ConverterFromBool;
-    class ConverterFromEnum;
-
-    struct ColumnMatcherTraits
-    {
-        struct FieldData
-        {
-            std::unique_ptr<IConverter> converter;
-        };
-        struct MessageData
-        {
-            std::unordered_map<UInt32, const ProtobufColumnMatcher::Field<ColumnMatcherTraits>*> field_number_to_field_map;
-        };
-    };
-    using Message = ProtobufColumnMatcher::Message<ColumnMatcherTraits>;
-    using Field = ProtobufColumnMatcher::Field<ColumnMatcherTraits>;
-
-    void setTraitsDataAfterMatchingColumns(Message * message);
-
-    template <int field_type_id>
-    std::unique_ptr<IConverter> createConverter(const google::protobuf::FieldDescriptor * field);
-
-    SimpleReader simple_reader;
-    std::unique_ptr<Message> root_message;
-    Message* current_message = nullptr;
-    size_t current_field_index = 0;
-    IConverter* current_converter = nullptr;
-};
-
-}
-
-#else
-
-namespace DB
-{
-class Arena;
-class IAggregateFunction;
-class ReadBuffer;
-using AggregateDataPtr = char *;
-using AggregateFunctionPtr = std::shared_ptr<IAggregateFunction>;

+/// Utility class for reading in the Protobuf format.
+/// Knows nothing about protobuf schemas, just provides useful functions to serialize data.
 class ProtobufReader
 {
 public:
-    bool startMessage() { return false; }
-    void endMessage() {}
-    bool readColumnIndex(size_t &) { return false; }
-    bool readNumber(Int8 &) { return false; }
-    bool readNumber(UInt8 &) { return false; }
-    bool readNumber(Int16 &) { return false; }
-    bool readNumber(UInt16 &) { return false; }
-    bool readNumber(Int32 &) { return false; }
-    bool readNumber(UInt32 &) { return false; }
-    bool readNumber(Int64 &) { return false; }
-    bool readNumber(UInt64 &) { return false; }
-    bool readNumber(Int128 &) { return false; }
-    bool readNumber(UInt128 &) { return false; }
-    bool readNumber(Int256 &) { return false; }
-    bool readNumber(UInt256 &) { return false; }
-    bool readNumber(Float32 &) { return false; }
-    bool readNumber(Float64 &) { return false; }
-    bool readStringInto(PaddedPODArray<UInt8> &) { return false; }
-    void prepareEnumMapping(const std::vector<std::pair<std::string, Int8>> &) {}
-    void prepareEnumMapping(const std::vector<std::pair<std::string, Int16>> &) {}
-    bool readEnum(Int8 &) { return false; }
-    bool readEnum(Int16 &) { return false; }
-    bool readUUID(UUID &) { return false; }
-    bool readDate(DayNum &) { return false; }
-    bool readDateTime(time_t &) { return false; }
-    bool readDateTime64(DateTime64 & /*tm*/, UInt32 /*scale*/) { return false; }
-    bool readDecimal(Decimal32 &, UInt32, UInt32) { return false; }
-    bool readDecimal(Decimal64 &, UInt32, UInt32) { return false; }
-    bool readDecimal(Decimal128 &, UInt32, UInt32) { return false; }
-    bool readDecimal(Decimal256 &, UInt32, UInt32) { return false; }
-    bool readAggregateFunction(const AggregateFunctionPtr &, AggregateDataPtr, Arena &) { return false; }
-    bool canReadMoreValues() const { return false; }
+    ProtobufReader(ReadBuffer & in_);
+
+    void startMessage(bool with_length_delimiter_);
+    void endMessage(bool ignore_errors);
+    void startNestedMessage();
+    void endNestedMessage();
+
+    bool readFieldNumber(int & field_number);
+    Int64 readInt();
+    Int64 readSInt();
+    UInt64 readUInt();
+    template<typename T> T readFixed();
+
+    void readString(String & str);
+    void readStringAndAppend(PaddedPODArray<UInt8> & str);
+
+    bool eof() const { return in.eof(); }
+
+private:
+    void readBinary(void * data, size_t size);
+    void ignore(UInt64 num_bytes);
+    void ignoreAll();
+    void moveCursorBackward(UInt64 num_bytes);
+
+    UInt64 ALWAYS_INLINE readVarint()
+    {
+        char c;
+        in.readStrict(c);
+        UInt64 first_byte = static_cast<UInt8>(c);
+        ++cursor;
+        if (likely(!(c & 0x80)))
+            return first_byte;
+        return continueReadingVarint(first_byte);
+    }
+
+    UInt64 continueReadingVarint(UInt64 first_byte);
+    void ignoreVarint();
+    void ignoreGroup();
+    [[noreturn]] void throwUnknownFormat() const;
+
+    ReadBuffer & in;
+    Int64 cursor = 0;
+    bool root_message_has_length_delimiter = false;
+    size_t current_message_level = 0;
+    Int64 current_message_end = 0;
+    std::vector<Int64> parent_message_ends;
+    int field_number = 0;
+    int next_field_number = 0;
+    Int64 field_end = 0;
 };

 }
--- a/src/Formats/ProtobufSerializer.cpp
+++ b/src/Formats/ProtobufSerializer.cpp
--- a/src/Formats/ProtobufSerializer.h
+++ b/src/Formats/ProtobufSerializer.h
@ -0,0 +1,52 @@
+#pragma once
+
+#if !defined(ARCADIA_BUILD)
+#    include "config_formats.h"
+#endif
+
+#if USE_PROTOBUF
+#   include <Columns/IColumn.h>
+
+
+namespace google::protobuf { class Descriptor; }
+
+namespace DB
+{
+class ProtobufReader;
+class ProtobufWriter;
+class IDataType;
+using DataTypePtr = std::shared_ptr<const IDataType>;
+using DataTypes = std::vector<DataTypePtr>;
+
+
+/// Utility class, does all the work for serialization in the Protobuf format.
+class ProtobufSerializer
+{
+public:
+    virtual ~ProtobufSerializer() = default;
+
+    virtual void setColumns(const ColumnPtr * columns, size_t num_columns) = 0;
+    virtual void writeRow(size_t row_num) = 0;
+
+    virtual void setColumns(const MutableColumnPtr * columns, size_t num_columns) = 0;
+    virtual void readRow(size_t row_num) = 0;
+    virtual void insertDefaults(size_t row_num) = 0;
+
+    static std::unique_ptr<ProtobufSerializer> create(
+        const Strings & column_names,
+        const DataTypes & data_types,
+        std::vector<size_t> & missing_column_indices,
+        const google::protobuf::Descriptor & message_descriptor,
+        bool with_length_delimiter,
+        ProtobufReader & reader);
+
+    static std::unique_ptr<ProtobufSerializer> create(
+        const Strings & column_names,
+        const DataTypes & data_types,
+        const google::protobuf::Descriptor & message_descriptor,
+        bool with_length_delimiter,
+        ProtobufWriter & writer);
+};
+
+}
+#endif
--- a/src/Formats/ProtobufWriter.cpp
+++ b/src/Formats/ProtobufWriter.cpp
@ -1,29 +1,11 @@
 #include "ProtobufWriter.h"

 #if USE_PROTOBUF
-#    include <cassert>
-#    include <optional>
-#    include <math.h>
-#    include <AggregateFunctions/IAggregateFunction.h>
-#    include <DataTypes/DataTypesDecimal.h>
-#    include <IO/ReadHelpers.h>
-#    include <IO/WriteHelpers.h>
-#    include <boost/numeric/conversion/cast.hpp>
-#    include <google/protobuf/descriptor.h>
-#    include <google/protobuf/descriptor.pb.h>
+#   include <IO/WriteHelpers.h>


 namespace DB
 {
-namespace ErrorCodes
-{
-    extern const int NOT_IMPLEMENTED;
-    extern const int NO_DATA_FOR_REQUIRED_PROTOBUF_FIELD;
-    extern const int PROTOBUF_BAD_CAST;
-    extern const int PROTOBUF_FIELD_NOT_REPEATED;
-}
-
-
 namespace
 {
    constexpr size_t MAX_VARINT_SIZE = 10;
@ -81,66 +63,24 @@ namespace
    }

    void writeFieldNumber(UInt32 field_number, WireType wire_type, PODArray<UInt8> & buf) { writeVarint((field_number << 3) | wire_type, buf); }
-
-    // Should we pack repeated values while storing them.
-    // It depends on type of the field in the protobuf schema and the syntax of that schema.
-    bool shouldPackRepeated(const google::protobuf::FieldDescriptor * field)
-    {
-        if (!field->is_repeated())
-            return false;
-        switch (field->type())
-        {
-            case google::protobuf::FieldDescriptor::TYPE_INT32:
-            case google::protobuf::FieldDescriptor::TYPE_UINT32:
-            case google::protobuf::FieldDescriptor::TYPE_SINT32:
-            case google::protobuf::FieldDescriptor::TYPE_INT64:
-            case google::protobuf::FieldDescriptor::TYPE_UINT64:
-            case google::protobuf::FieldDescriptor::TYPE_SINT64:
-            case google::protobuf::FieldDescriptor::TYPE_FIXED32:
-            case google::protobuf::FieldDescriptor::TYPE_SFIXED32:
-            case google::protobuf::FieldDescriptor::TYPE_FIXED64:
-            case google::protobuf::FieldDescriptor::TYPE_SFIXED64:
-            case google::protobuf::FieldDescriptor::TYPE_FLOAT:
-            case google::protobuf::FieldDescriptor::TYPE_DOUBLE:
-            case google::protobuf::FieldDescriptor::TYPE_BOOL:
-            case google::protobuf::FieldDescriptor::TYPE_ENUM:
-                break;
-            default:
-                return false;
-        }
-        if (field->options().has_packed())
-            return field->options().packed();
-        return field->file()->syntax() == google::protobuf::FileDescriptor::SYNTAX_PROTO3;
-    }
-
-    // Should we omit null values (zero for numbers / empty string for strings) while storing them.
-    bool shouldSkipNullValue(const google::protobuf::FieldDescriptor * field)
-    {
-        return field->is_optional() && (field->file()->syntax() == google::protobuf::FileDescriptor::SYNTAX_PROTO3);
-    }
 }


-// SimpleWriter is an utility class to serialize protobufs.
-// Knows nothing about protobuf schemas, just provides useful functions to serialize data.
-ProtobufWriter::SimpleWriter::SimpleWriter(WriteBuffer & out_, const bool use_length_delimiters_)
+ProtobufWriter::ProtobufWriter(WriteBuffer & out_)
    : out(out_)
-    , current_piece_start(0)
-    , num_bytes_skipped(0)
-    , use_length_delimiters(use_length_delimiters_)
 {
 }

-ProtobufWriter::SimpleWriter::~SimpleWriter() = default;
+ProtobufWriter::~ProtobufWriter() = default;

-void ProtobufWriter::SimpleWriter::startMessage()
+void ProtobufWriter::startMessage()
 {
 }

-void ProtobufWriter::SimpleWriter::endMessage()
+void ProtobufWriter::endMessage(bool with_length_delimiter)
 {
    pieces.emplace_back(current_piece_start, buffer.size());
-    if (use_length_delimiters)
+    if (with_length_delimiter)
    {
        size_t size_of_message = buffer.size() - num_bytes_skipped;
        writeVarint(size_of_message, out);
@ -154,7 +94,7 @@ void ProtobufWriter::SimpleWriter::endMessage()
    current_piece_start = 0;
 }

-void ProtobufWriter::SimpleWriter::startNestedMessage()
+void ProtobufWriter::startNestedMessage()
 {
    nested_infos.emplace_back(pieces.size(), num_bytes_skipped);
    pieces.emplace_back(current_piece_start, buffer.size());
@ -167,7 +107,7 @@ void ProtobufWriter::SimpleWriter::startNestedMessage()
    num_bytes_skipped = NESTED_MESSAGE_PADDING;
 }

-void ProtobufWriter::SimpleWriter::endNestedMessage(UInt32 field_number, bool is_group, bool skip_if_empty)
+void ProtobufWriter::endNestedMessage(int field_number, bool is_group, bool skip_if_empty)
 {
    const auto & nested_info = nested_infos.back();
    size_t num_pieces_at_start = nested_info.num_pieces_at_start;
@ -203,8 +143,13 @@ void ProtobufWriter::SimpleWriter::endNestedMessage(UInt32 field_number, bool is
    num_bytes_skipped += num_bytes_skipped_at_start - num_bytes_inserted;
 }

-void ProtobufWriter::SimpleWriter::writeUInt(UInt32 field_number, UInt64 value)
+void ProtobufWriter::writeUInt(int field_number, UInt64 value)
 {
+    if (in_repeated_pack)
+    {
+        writeVarint(value, buffer);
+        return;
+    }
    size_t old_size = buffer.size();
    buffer.reserve(old_size + 2 * MAX_VARINT_SIZE);
    UInt8 * ptr = buffer.data() + old_size;
@ -213,20 +158,27 @@ void ProtobufWriter::SimpleWriter::writeUInt(UInt32 field_number, UInt64 value)
    buffer.resize_assume_reserved(ptr - buffer.data());
 }

-void ProtobufWriter::SimpleWriter::writeInt(UInt32 field_number, Int64 value)
+void ProtobufWriter::writeInt(int field_number, Int64 value)
 {
    writeUInt(field_number, static_cast<UInt64>(value));
 }

-void ProtobufWriter::SimpleWriter::writeSInt(UInt32 field_number, Int64 value)
+void ProtobufWriter::writeSInt(int field_number, Int64 value)
 {
    writeUInt(field_number, encodeZigZag(value));
 }

 template <typename T>
-void ProtobufWriter::SimpleWriter::writeFixed(UInt32 field_number, T value)
+void ProtobufWriter::writeFixed(int field_number, T value)
 {
    static_assert((sizeof(T) == 4) || (sizeof(T) == 8));
+    if (in_repeated_pack)
+    {
+        size_t old_size = buffer.size();
+        buffer.resize(old_size + sizeof(T));
+        memcpy(buffer.data() + old_size, &value, sizeof(T));
+        return;
+    }
    constexpr WireType wire_type = (sizeof(T) == 4) ? BITS32 : BITS64;
    size_t old_size = buffer.size();
    buffer.reserve(old_size + MAX_VARINT_SIZE + sizeof(T));
@ -237,19 +189,27 @@ void ProtobufWriter::SimpleWriter::writeFixed(UInt32 field_number, T value)
    buffer.resize_assume_reserved(ptr - buffer.data());
 }

-void ProtobufWriter::SimpleWriter::writeString(UInt32 field_number, const StringRef & str)
+template void ProtobufWriter::writeFixed<Int32>(int field_number, Int32 value);
+template void ProtobufWriter::writeFixed<UInt32>(int field_number, UInt32 value);
+template void ProtobufWriter::writeFixed<Int64>(int field_number, Int64 value);
+template void ProtobufWriter::writeFixed<UInt64>(int field_number, UInt64 value);
+template void ProtobufWriter::writeFixed<Float32>(int field_number, Float32 value);
+template void ProtobufWriter::writeFixed<Float64>(int field_number, Float64 value);
+
+void ProtobufWriter::writeString(int field_number, const std::string_view & str)
 {
+    size_t length = str.length();
    size_t old_size = buffer.size();
-    buffer.reserve(old_size + 2 * MAX_VARINT_SIZE + str.size);
+    buffer.reserve(old_size + 2 * MAX_VARINT_SIZE + length);
    UInt8 * ptr = buffer.data() + old_size;
    ptr = writeFieldNumber(field_number, LENGTH_DELIMITED, ptr);
-    ptr = writeVarint(str.size, ptr);
-    memcpy(ptr, str.data, str.size);
-    ptr += str.size;
+    ptr = writeVarint(length, ptr);
+    memcpy(ptr, str.data(), length);
+    ptr += length;
    buffer.resize_assume_reserved(ptr - buffer.data());
 }

-void ProtobufWriter::SimpleWriter::startRepeatedPack()
+void ProtobufWriter::startRepeatedPack()
 {
    pieces.emplace_back(current_piece_start, buffer.size());

@ -259,17 +219,19 @@ void ProtobufWriter::SimpleWriter::startRepeatedPack()
    current_piece_start = buffer.size() + REPEATED_PACK_PADDING;
    buffer.resize(current_piece_start);
    num_bytes_skipped += REPEATED_PACK_PADDING;
+    in_repeated_pack = true;
 }

-void ProtobufWriter::SimpleWriter::endRepeatedPack(UInt32 field_number)
+void ProtobufWriter::endRepeatedPack(int field_number, bool skip_if_empty)
 {
    size_t size = buffer.size() - current_piece_start;
-    if (!size)
+    if (!size && skip_if_empty)
    {
        current_piece_start = pieces.back().start;
        buffer.resize(pieces.back().end);
        pieces.pop_back();
        num_bytes_skipped -= REPEATED_PACK_PADDING;
+        in_repeated_pack = false;
        return;
    }
    UInt8 * ptr = &buffer[pieces.back().end];
@ -278,726 +240,7 @@ void ProtobufWriter::SimpleWriter::endRepeatedPack(UInt32 field_number)
    size_t num_bytes_inserted = endptr - ptr;
    pieces.back().end += num_bytes_inserted;
    num_bytes_skipped -= num_bytes_inserted;
-}
-
-void ProtobufWriter::SimpleWriter::addUIntToRepeatedPack(UInt64 value)
-{
-    writeVarint(value, buffer);
-}
-
-void ProtobufWriter::SimpleWriter::addIntToRepeatedPack(Int64 value)
-{
-    writeVarint(static_cast<UInt64>(value), buffer);
-}
-
-void ProtobufWriter::SimpleWriter::addSIntToRepeatedPack(Int64 value)
-{
-    writeVarint(encodeZigZag(value), buffer);
-}
-
-template <typename T>
-void ProtobufWriter::SimpleWriter::addFixedToRepeatedPack(T value)
-{
-    static_assert((sizeof(T) == 4) || (sizeof(T) == 8));
-    size_t old_size = buffer.size();
-    buffer.resize(old_size + sizeof(T));
-    memcpy(buffer.data() + old_size, &value, sizeof(T));
-}
-
-
-// Implementation for a converter from any DB data type to any protobuf field type.
-class ProtobufWriter::ConverterBaseImpl : public IConverter
-{
-public:
-    ConverterBaseImpl(SimpleWriter & simple_writer_, const google::protobuf::FieldDescriptor * field_)
-        : simple_writer(simple_writer_), field(field_)
-    {
-        field_number = field->number();
-    }
-
-    virtual void writeString(const StringRef &) override { cannotConvertType("String"); }
-    virtual void writeInt8(Int8) override { cannotConvertType("Int8"); }
-    virtual void writeUInt8(UInt8) override { cannotConvertType("UInt8"); }
-    virtual void writeInt16(Int16) override { cannotConvertType("Int16"); }
-    virtual void writeUInt16(UInt16) override { cannotConvertType("UInt16"); }
-    virtual void writeInt32(Int32) override { cannotConvertType("Int32"); }
-    virtual void writeUInt32(UInt32) override { cannotConvertType("UInt32"); }
-    virtual void writeInt64(Int64) override { cannotConvertType("Int64"); }
-    virtual void writeUInt64(UInt64) override { cannotConvertType("UInt64"); }
-    virtual void writeInt128(Int128) override { cannotConvertType("Int128"); }
-    virtual void writeUInt128(const UInt128 &) override { cannotConvertType("UInt128"); }
-    virtual void writeInt256(const Int256 &) override { cannotConvertType("Int256"); }
-    virtual void writeUInt256(const UInt256 &) override { cannotConvertType("UInt256"); }
-    virtual void writeFloat32(Float32) override { cannotConvertType("Float32"); }
-    virtual void writeFloat64(Float64) override { cannotConvertType("Float64"); }
-    virtual void prepareEnumMapping8(const std::vector<std::pair<std::string, Int8>> &) override {}
-    virtual void prepareEnumMapping16(const std::vector<std::pair<std::string, Int16>> &) override {}
-    virtual void writeEnum8(Int8) override { cannotConvertType("Enum"); }
-    virtual void writeEnum16(Int16) override { cannotConvertType("Enum"); }
-    virtual void writeUUID(const UUID &) override { cannotConvertType("UUID"); }
-    virtual void writeDate(DayNum) override { cannotConvertType("Date"); }
-    virtual void writeDateTime(time_t) override { cannotConvertType("DateTime"); }
-    virtual void writeDateTime64(DateTime64, UInt32) override { cannotConvertType("DateTime64"); }
-    virtual void writeDecimal32(Decimal32, UInt32) override { cannotConvertType("Decimal32"); }
-    virtual void writeDecimal64(Decimal64, UInt32) override { cannotConvertType("Decimal64"); }
-    virtual void writeDecimal128(const Decimal128 &, UInt32) override { cannotConvertType("Decimal128"); }
-    virtual void writeDecimal256(const Decimal256 &, UInt32) override { cannotConvertType("Decimal256"); }
-
-    virtual void writeAggregateFunction(const AggregateFunctionPtr &, ConstAggregateDataPtr) override { cannotConvertType("AggregateFunction"); }
-
-protected:
-    [[noreturn]] void cannotConvertType(const String & type_name)
-    {
-        throw Exception(
-            "Could not convert data type '" + type_name + "' to protobuf type '" + field->type_name() + "' (field: " + field->name() + ")",
-            ErrorCodes::PROTOBUF_BAD_CAST);
-    }
-
-    [[noreturn]] void cannotConvertValue(const String & value)
-    {
-        throw Exception(
-            "Could not convert value '" + value + "' to protobuf type '" + field->type_name() + "' (field: " + field->name() + ")",
-            ErrorCodes::PROTOBUF_BAD_CAST);
-    }
-
-    template <typename To, typename From>
-    To numericCast(From value)
-    {
-        if constexpr (std::is_same_v<To, From>)
-            return value;
-        To result;
-        try
-        {
-            result = boost::numeric_cast<To>(value);
-        }
-        catch (boost::numeric::bad_numeric_cast &)
-        {
-            cannotConvertValue(toString(value));
-        }
-        return result;
-    }
-
-    template <typename To>
-    To parseFromString(const StringRef & str)
-    {
-        To result;
-        try
-        {
-            result = ::DB::parse<To>(str.data, str.size);
-        }
-        catch (...)
-        {
-            cannotConvertValue(str.toString());
-        }
-        return result;
-    }
-
-    SimpleWriter & simple_writer;
-    const google::protobuf::FieldDescriptor * field;
-    UInt32 field_number;
-};
-
-
-template <bool skip_null_value>
-class ProtobufWriter::ConverterToString : public ConverterBaseImpl
-{
-public:
-    using ConverterBaseImpl::ConverterBaseImpl;
-
-    void writeString(const StringRef & str) override { writeField(str); }
-
-    void writeInt8(Int8 value) override { convertToStringAndWriteField(value); }
-    void writeUInt8(UInt8 value) override { convertToStringAndWriteField(value); }
-    void writeInt16(Int16 value) override { convertToStringAndWriteField(value); }
-    void writeUInt16(UInt16 value) override { convertToStringAndWriteField(value); }
-    void writeInt32(Int32 value) override { convertToStringAndWriteField(value); }
-    void writeUInt32(UInt32 value) override { convertToStringAndWriteField(value); }
-    void writeInt64(Int64 value) override { convertToStringAndWriteField(value); }
-    void writeUInt64(UInt64 value) override { convertToStringAndWriteField(value); }
-    void writeFloat32(Float32 value) override { convertToStringAndWriteField(value); }
-    void writeFloat64(Float64 value) override { convertToStringAndWriteField(value); }
-
-    void prepareEnumMapping8(const std::vector<std::pair<String, Int8>> & name_value_pairs) override
-    {
-        prepareEnumValueToNameMap(name_value_pairs);
-    }
-    void prepareEnumMapping16(const std::vector<std::pair<String, Int16>> & name_value_pairs) override
-    {
-        prepareEnumValueToNameMap(name_value_pairs);
-    }
-
-    void writeEnum8(Int8 value) override { writeEnum16(value); }
-
-    void writeEnum16(Int16 value) override
-    {
-        auto it = enum_value_to_name_map->find(value);
-        if (it == enum_value_to_name_map->end())
-            cannotConvertValue(toString(value));
-        writeField(it->second);
-    }
-
-    void writeUUID(const UUID & uuid) override { convertToStringAndWriteField(uuid); }
-    void writeDate(DayNum date) override { convertToStringAndWriteField(date); }
-
-    void writeDateTime(time_t tm) override
-    {
-        writeDateTimeText(tm, text_buffer);
-        writeField(text_buffer.stringRef());
-        text_buffer.restart();
-    }
-
-    void writeDateTime64(DateTime64 date_time, UInt32 scale) override
-    {
-        writeDateTimeText(date_time, scale, text_buffer);
-        writeField(text_buffer.stringRef());
-        text_buffer.restart();
-    }
-
-    void writeDecimal32(Decimal32 decimal, UInt32 scale) override { writeDecimal(decimal, scale); }
-    void writeDecimal64(Decimal64 decimal, UInt32 scale) override { writeDecimal(decimal, scale); }
-    void writeDecimal128(const Decimal128 & decimal, UInt32 scale) override { writeDecimal(decimal, scale); }
-
-    void writeAggregateFunction(const AggregateFunctionPtr & function, ConstAggregateDataPtr place) override
-    {
-        function->serialize(place, text_buffer);
-        writeField(text_buffer.stringRef());
-        text_buffer.restart();
-    }
-
-private:
-    template <typename T>
-    void convertToStringAndWriteField(T value)
-    {
-        writeText(value, text_buffer);
-        writeField(text_buffer.stringRef());
-        text_buffer.restart();
-    }
-
-    template <typename T>
-    void writeDecimal(const Decimal<T> & decimal, UInt32 scale)
-    {
-        writeText(decimal, scale, text_buffer);
-        writeField(text_buffer.stringRef());
-        text_buffer.restart();
-    }
-
-    template <typename T>
-    void prepareEnumValueToNameMap(const std::vector<std::pair<String, T>> & name_value_pairs)
-    {
-        if (enum_value_to_name_map.has_value())
-            return;
-        enum_value_to_name_map.emplace();
-        for (const auto & name_value_pair : name_value_pairs)
-            enum_value_to_name_map->emplace(name_value_pair.second, name_value_pair.first);
-    }
-
-    void writeField(const StringRef & str)
-    {
-        if constexpr (skip_null_value)
-        {
-            if (!str.size)
-                return;
-        }
-        simple_writer.writeString(field_number, str);
-    }
-
-    WriteBufferFromOwnString text_buffer;
-    std::optional<std::unordered_map<Int16, String>> enum_value_to_name_map;
-};
-
-#    define PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_STRINGS(field_type_id) \
-        template <> \
-        std::unique_ptr<ProtobufWriter::IConverter> ProtobufWriter::createConverter<field_type_id>( \
-            const google::protobuf::FieldDescriptor * field) \
-        { \
-            if (shouldSkipNullValue(field)) \
-                return std::make_unique<ConverterToString<true>>(simple_writer, field); \
-            else \
-                return std::make_unique<ConverterToString<false>>(simple_writer, field); \
-        }
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_STRINGS(google::protobuf::FieldDescriptor::TYPE_STRING)
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_STRINGS(google::protobuf::FieldDescriptor::TYPE_BYTES)
-#    undef PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_STRINGS
-
-
-template <int field_type_id, typename ToType, bool skip_null_value, bool pack_repeated>
-class ProtobufWriter::ConverterToNumber : public ConverterBaseImpl
-{
-public:
-    using ConverterBaseImpl::ConverterBaseImpl;
-
-    void writeString(const StringRef & str) override { writeField(parseFromString<ToType>(str)); }
-
-    void writeInt8(Int8 value) override { castNumericAndWriteField(value); }
-    void writeUInt8(UInt8 value) override { castNumericAndWriteField(value); }
-    void writeInt16(Int16 value) override { castNumericAndWriteField(value); }
-    void writeUInt16(UInt16 value) override { castNumericAndWriteField(value); }
-    void writeInt32(Int32 value) override { castNumericAndWriteField(value); }
-    void writeUInt32(UInt32 value) override { castNumericAndWriteField(value); }
-    void writeInt64(Int64 value) override { castNumericAndWriteField(value); }
-    void writeUInt64(UInt64 value) override { castNumericAndWriteField(value); }
-    void writeFloat32(Float32 value) override { castNumericAndWriteField(value); }
-    void writeFloat64(Float64 value) override { castNumericAndWriteField(value); }
-
-    void writeEnum8(Int8 value) override { writeEnum16(value); }
-
-    void writeEnum16(Int16 value) override
-    {
-        if constexpr (!is_integer_v<ToType>)
-            cannotConvertType("Enum"); // It's not correct to convert enum to floating point.
-        castNumericAndWriteField(value);
-    }
-
-    void writeDate(DayNum date) override { castNumericAndWriteField(static_cast<UInt16>(date)); }
-    void writeDateTime(time_t tm) override { castNumericAndWriteField(tm); }
-    void writeDateTime64(DateTime64 date_time, UInt32 scale) override { writeDecimal(date_time, scale); }
-    void writeDecimal32(Decimal32 decimal, UInt32 scale) override { writeDecimal(decimal, scale); }
-    void writeDecimal64(Decimal64 decimal, UInt32 scale) override { writeDecimal(decimal, scale); }
-    void writeDecimal128(const Decimal128 & decimal, UInt32 scale) override { writeDecimal(decimal, scale); }
-
-private:
-    template <typename FromType>
-    void castNumericAndWriteField(FromType value)
-    {
-        writeField(numericCast<ToType>(value));
-    }
-
-    template <typename S>
-    void writeDecimal(const Decimal<S> & decimal, UInt32 scale)
-    {
-        castNumericAndWriteField(DecimalUtils::convertTo<ToType>(decimal, scale));
-    }
-
-    void writeField(ToType value)
-    {
-        if constexpr (skip_null_value)
-        {
-            if (value == 0)
-                return;
-        }
-        if constexpr (((field_type_id == google::protobuf::FieldDescriptor::TYPE_INT32) && std::is_same_v<ToType, Int32>)
-                   || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_INT64) && std::is_same_v<ToType, Int64>))
-        {
-            if constexpr (pack_repeated)
-                simple_writer.addIntToRepeatedPack(value);
-            else
-                simple_writer.writeInt(field_number, value);
-        }
-        else if constexpr (((field_type_id == google::protobuf::FieldDescriptor::TYPE_SINT32) && std::is_same_v<ToType, Int32>)
-                        || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_SINT64) && std::is_same_v<ToType, Int64>))
-        {
-            if constexpr (pack_repeated)
-                simple_writer.addSIntToRepeatedPack(value);
-            else
-                simple_writer.writeSInt(field_number, value);
-        }
-        else if constexpr (((field_type_id == google::protobuf::FieldDescriptor::TYPE_UINT32) && std::is_same_v<ToType, UInt32>)
-                        || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_UINT64) && std::is_same_v<ToType, UInt64>))
-        {
-            if constexpr (pack_repeated)
-                simple_writer.addUIntToRepeatedPack(value);
-            else
-                simple_writer.writeUInt(field_number, value);
-        }
-        else
-        {
-            static_assert(((field_type_id == google::protobuf::FieldDescriptor::TYPE_FIXED32) && std::is_same_v<ToType, UInt32>)
-                       || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_SFIXED32) && std::is_same_v<ToType, Int32>)
-                       || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_FIXED64) && std::is_same_v<ToType, UInt64>)
-                       || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_SFIXED64) && std::is_same_v<ToType, Int64>)
-                       || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_FLOAT) && std::is_same_v<ToType, float>)
-                       || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_DOUBLE) && std::is_same_v<ToType, double>));
-            if constexpr (pack_repeated)
-                simple_writer.addFixedToRepeatedPack(value);
-            else
-                simple_writer.writeFixed(field_number, value);
-        }
-    }
-};
-
-#    define PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(field_type_id, field_type) \
-        template <> \
-        std::unique_ptr<ProtobufWriter::IConverter> ProtobufWriter::createConverter<field_type_id>( \
-            const google::protobuf::FieldDescriptor * field) \
-        { \
-            if (shouldSkipNullValue(field)) \
-                return std::make_unique<ConverterToNumber<field_type_id, field_type, true, false>>(simple_writer, field); \
-            else if (shouldPackRepeated(field)) \
-                return std::make_unique<ConverterToNumber<field_type_id, field_type, false, true>>(simple_writer, field); \
-            else \
-                return std::make_unique<ConverterToNumber<field_type_id, field_type, false, false>>(simple_writer, field); \
-        }
-
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_INT32, Int32);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_SINT32, Int32);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_UINT32, UInt32);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_INT64, Int64);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_SINT64, Int64);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_UINT64, UInt64);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_FIXED32, UInt32);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_SFIXED32, Int32);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_FIXED64, UInt64);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_SFIXED64, Int64);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_FLOAT, float);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_DOUBLE, double);
-#    undef PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS
-
-
-template <bool skip_null_value, bool pack_repeated>
-class ProtobufWriter::ConverterToBool : public ConverterBaseImpl
-{
-public:
-    using ConverterBaseImpl::ConverterBaseImpl;
-
-    void writeString(const StringRef & str) override
-    {
-        if (str == "true")
-            writeField(true);
-        else if (str == "false")
-            writeField(false);
-        else
-            cannotConvertValue(str.toString());
-    }
-
-    void writeInt8(Int8 value) override { convertToBoolAndWriteField(value); }
-    void writeUInt8(UInt8 value) override { convertToBoolAndWriteField(value); }
-    void writeInt16(Int16 value) override { convertToBoolAndWriteField(value); }
-    void writeUInt16(UInt16 value) override { convertToBoolAndWriteField(value); }
-    void writeInt32(Int32 value) override { convertToBoolAndWriteField(value); }
-    void writeUInt32(UInt32 value) override { convertToBoolAndWriteField(value); }
-    void writeInt64(Int64 value) override { convertToBoolAndWriteField(value); }
-    void writeUInt64(UInt64 value) override { convertToBoolAndWriteField(value); }
-    void writeFloat32(Float32 value) override { convertToBoolAndWriteField(value); }
-    void writeFloat64(Float64 value) override { convertToBoolAndWriteField(value); }
-    void writeDecimal32(Decimal32 decimal, UInt32) override { convertToBoolAndWriteField(decimal.value); }
-    void writeDecimal64(Decimal64 decimal, UInt32) override { convertToBoolAndWriteField(decimal.value); }
-    void writeDecimal128(const Decimal128 & decimal, UInt32) override { convertToBoolAndWriteField(decimal.value); }
-
-private:
-    template <typename T>
-    void convertToBoolAndWriteField(T value)
-    {
-        writeField(static_cast<bool>(value));
-    }
-
-    void writeField(bool b)
-    {
-        if constexpr (skip_null_value)
-        {
-            if (!b)
-                return;
-        }
-        if constexpr (pack_repeated)
-            simple_writer.addUIntToRepeatedPack(b);
-        else
-            simple_writer.writeUInt(field_number, b);
-    }
-};
-
-template <>
-std::unique_ptr<ProtobufWriter::IConverter> ProtobufWriter::createConverter<google::protobuf::FieldDescriptor::TYPE_BOOL>(
-    const google::protobuf::FieldDescriptor * field)
-{
-    if (shouldSkipNullValue(field))
-        return std::make_unique<ConverterToBool<true, false>>(simple_writer, field);
-    else if (shouldPackRepeated(field))
-        return std::make_unique<ConverterToBool<false, true>>(simple_writer, field);
-    else
-        return std::make_unique<ConverterToBool<false, false>>(simple_writer, field);
-}
-
-
-template <bool skip_null_value, bool pack_repeated>
-class ProtobufWriter::ConverterToEnum : public ConverterBaseImpl
-{
-public:
-    using ConverterBaseImpl::ConverterBaseImpl;
-
-    void writeString(const StringRef & str) override
-    {
-        prepareEnumNameToPbNumberMap();
-        auto it = enum_name_to_pbnumber_map->find(str);
-        if (it == enum_name_to_pbnumber_map->end())
-            cannotConvertValue(str.toString());
-        writeField(it->second);
-    }
-
-    void writeInt8(Int8 value) override { convertToEnumAndWriteField(value); }
-    void writeUInt8(UInt8 value) override { convertToEnumAndWriteField(value); }
-    void writeInt16(Int16 value) override { convertToEnumAndWriteField(value); }
-    void writeUInt16(UInt16 value) override { convertToEnumAndWriteField(value); }
-    void writeInt32(Int32 value) override { convertToEnumAndWriteField(value); }
-    void writeUInt32(UInt32 value) override { convertToEnumAndWriteField(value); }
-    void writeInt64(Int64 value) override { convertToEnumAndWriteField(value); }
-    void writeUInt64(UInt64 value) override { convertToEnumAndWriteField(value); }
-
-    void prepareEnumMapping8(const std::vector<std::pair<String, Int8>> & name_value_pairs) override
-    {
-        prepareEnumValueToPbNumberMap(name_value_pairs);
-    }
-    void prepareEnumMapping16(const std::vector<std::pair<String, Int16>> & name_value_pairs) override
-    {
-        prepareEnumValueToPbNumberMap(name_value_pairs);
-    }
-
-    void writeEnum8(Int8 value) override { writeEnum16(value); }
-
-    void writeEnum16(Int16 value) override
-    {
-        int pbnumber;
-        if (enum_value_always_equals_pbnumber)
-            pbnumber = value;
-        else
-        {
-            auto it = enum_value_to_pbnumber_map->find(value);
-            if (it == enum_value_to_pbnumber_map->end())
-                cannotConvertValue(toString(value));
-            pbnumber = it->second;
-        }
-        writeField(pbnumber);
-    }
-
-private:
-    template <typename T>
-    void convertToEnumAndWriteField(T value)
-    {
-        const auto * enum_descriptor = field->enum_type()->FindValueByNumber(numericCast<int>(value));
-        if (!enum_descriptor)
-            cannotConvertValue(toString(value));
-        writeField(enum_descriptor->number());
-    }
-
-    void prepareEnumNameToPbNumberMap()
-    {
-        if (enum_name_to_pbnumber_map.has_value())
-            return;
-        enum_name_to_pbnumber_map.emplace();
-        const auto * enum_type = field->enum_type();
-        for (int i = 0; i != enum_type->value_count(); ++i)
-        {
-            const auto * enum_value = enum_type->value(i);
-            enum_name_to_pbnumber_map->emplace(enum_value->name(), enum_value->number());
-        }
-    }
-
-    template <typename T>
-    void prepareEnumValueToPbNumberMap(const std::vector<std::pair<String, T>> & name_value_pairs)
-    {
-        if (enum_value_to_pbnumber_map.has_value())
-            return;
-        enum_value_to_pbnumber_map.emplace();
-        enum_value_always_equals_pbnumber = true;
-        for (const auto & name_value_pair : name_value_pairs)
-        {
-            Int16 value = name_value_pair.second; // NOLINT
-            const auto * enum_descriptor = field->enum_type()->FindValueByName(name_value_pair.first);
-            if (enum_descriptor)
-            {
-                enum_value_to_pbnumber_map->emplace(value, enum_descriptor->number());
-                if (value != enum_descriptor->number())
-                    enum_value_always_equals_pbnumber = false;
-            }
-            else
-                enum_value_always_equals_pbnumber = false;
-        }
-    }
-
-    void writeField(int enum_pbnumber)
-    {
-        if constexpr (skip_null_value)
-        {
-            if (!enum_pbnumber)
-                return;
-        }
-        if constexpr (pack_repeated)
-            simple_writer.addUIntToRepeatedPack(enum_pbnumber);
-        else
-            simple_writer.writeUInt(field_number, enum_pbnumber);
-    }
-
-    std::optional<std::unordered_map<StringRef, int>> enum_name_to_pbnumber_map;
-    std::optional<std::unordered_map<Int16, int>> enum_value_to_pbnumber_map;
-    bool enum_value_always_equals_pbnumber;
-};
-
-template <>
-std::unique_ptr<ProtobufWriter::IConverter> ProtobufWriter::createConverter<google::protobuf::FieldDescriptor::TYPE_ENUM>(
-    const google::protobuf::FieldDescriptor * field)
-{
-    if (shouldSkipNullValue(field))
-        return std::make_unique<ConverterToEnum<true, false>>(simple_writer, field);
-    else if (shouldPackRepeated(field))
-        return std::make_unique<ConverterToEnum<false, true>>(simple_writer, field);
-    else
-        return std::make_unique<ConverterToEnum<false, false>>(simple_writer, field);
-}
-
-
-ProtobufWriter::ProtobufWriter(
-    WriteBuffer & out, const google::protobuf::Descriptor * message_type, const std::vector<String> & column_names, const bool use_length_delimiters_)
-    : simple_writer(out, use_length_delimiters_)
-{
-    std::vector<const google::protobuf::FieldDescriptor *> field_descriptors_without_match;
-    root_message = ProtobufColumnMatcher::matchColumns<ColumnMatcherTraits>(column_names, message_type, field_descriptors_without_match);
-    for (const auto * field_descriptor_without_match : field_descriptors_without_match)
-    {
-        if (field_descriptor_without_match->is_required())
-            throw Exception(
-                "Output doesn't have a column named '" + field_descriptor_without_match->name()
-                    + "' which is required to write the output in the protobuf format.",
-                ErrorCodes::NO_DATA_FOR_REQUIRED_PROTOBUF_FIELD);
-    }
-    setTraitsDataAfterMatchingColumns(root_message.get());
-}
-
-ProtobufWriter::~ProtobufWriter() = default;
-
-void ProtobufWriter::setTraitsDataAfterMatchingColumns(Message * message)
-{
-    Field * parent_field = message->parent ? &message->parent->fields[message->index_in_parent] : nullptr;
-    message->data.parent_field_number = parent_field ? parent_field->field_number : 0;
-    message->data.is_required = parent_field && parent_field->data.is_required;
-
-    if (parent_field && parent_field->data.is_repeatable)
-        message->data.repeatable_container_message = message;
-    else if (message->parent)
-        message->data.repeatable_container_message = message->parent->data.repeatable_container_message;
-    else
-        message->data.repeatable_container_message = nullptr;
-
-    message->data.is_group = parent_field && (parent_field->field_descriptor->type() == google::protobuf::FieldDescriptor::TYPE_GROUP);
-
-    for (auto & field : message->fields)
-    {
-        field.data.is_repeatable = field.field_descriptor->is_repeated();
-        field.data.is_required = field.field_descriptor->is_required();
-        field.data.repeatable_container_message = message->data.repeatable_container_message;
-        field.data.should_pack_repeated = shouldPackRepeated(field.field_descriptor);
-
-        if (field.nested_message)
-        {
-            setTraitsDataAfterMatchingColumns(field.nested_message.get());
-            continue;
-        }
-        switch (field.field_descriptor->type())
-        {
-#    define PROTOBUF_WRITER_CONVERTER_CREATING_CASE(field_type_id) \
-        case field_type_id: \
-            field.data.converter = createConverter<field_type_id>(field.field_descriptor); \
-            break
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_STRING);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_BYTES);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_INT32);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_SINT32);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_UINT32);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_FIXED32);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_SFIXED32);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_INT64);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_SINT64);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_UINT64);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_FIXED64);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_SFIXED64);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_FLOAT);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_DOUBLE);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_BOOL);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_ENUM);
-#    undef PROTOBUF_WRITER_CONVERTER_CREATING_CASE
-            default:
-                throw Exception(
-                    String("Protobuf type '") + field.field_descriptor->type_name() + "' isn't supported", ErrorCodes::NOT_IMPLEMENTED);
-        }
-    }
-}
-
-void ProtobufWriter::startMessage()
-{
-    current_message = root_message.get();
-    current_field_index = 0;
-    simple_writer.startMessage();
-}
-
-void ProtobufWriter::endMessage()
-{
-    if (!current_message)
-        return;
-    endWritingField();
-    while (current_message->parent)
-    {
-        simple_writer.endNestedMessage(
-            current_message->data.parent_field_number, current_message->data.is_group, !current_message->data.is_required);
-        current_message = current_message->parent;
-    }
-    simple_writer.endMessage();
-    current_message = nullptr;
-}
-
-bool ProtobufWriter::writeField(size_t & column_index)
-{
-    endWritingField();
-    while (true)
-    {
-        if (current_field_index < current_message->fields.size())
-        {
-            Field & field = current_message->fields[current_field_index];
-            if (!field.nested_message)
-            {
-                current_field = &current_message->fields[current_field_index];
-                current_converter = current_field->data.converter.get();
-                column_index = current_field->column_index;
-                if (current_field->data.should_pack_repeated)
-                    simple_writer.startRepeatedPack();
-                return true;
-            }
-            simple_writer.startNestedMessage();
-            current_message = field.nested_message.get();
-            current_message->data.need_repeat = false;
-            current_field_index = 0;
-            continue;
-        }
-        if (current_message->parent)
-        {
-            simple_writer.endNestedMessage(
-                current_message->data.parent_field_number, current_message->data.is_group, !current_message->data.is_required);
-            if (current_message->data.need_repeat)
-            {
-                simple_writer.startNestedMessage();
-                current_message->data.need_repeat = false;
-                current_field_index = 0;
-                continue;
-            }
-            current_field_index = current_message->index_in_parent + 1;
-            current_message = current_message->parent;
-            continue;
-        }
-        return false;
-    }
-}
-
-void ProtobufWriter::endWritingField()
-{
-    if (!current_field)
-        return;
-    if (current_field->data.should_pack_repeated)
-        simple_writer.endRepeatedPack(current_field->field_number);
-    else if ((num_values == 0) && current_field->data.is_required)
-        throw Exception(
-            "No data for the required field '" + current_field->field_descriptor->name() + "'",
-            ErrorCodes::NO_DATA_FOR_REQUIRED_PROTOBUF_FIELD);
-
-    current_field = nullptr;
-    current_converter = nullptr;
-    num_values = 0;
-    ++current_field_index;
-}
-
-void ProtobufWriter::setNestedMessageNeedsRepeat()
-{
-    if (current_field->data.repeatable_container_message)
-        current_field->data.repeatable_container_message->data.need_repeat = true;
-    else
-        throw Exception(
-            "Cannot write more than single value to the non-repeated field '" + current_field->field_descriptor->name() + "'",
-            ErrorCodes::PROTOBUF_FIELD_NOT_REPEATED);
+    in_repeated_pack = false;
 }

 }
--- a/src/Formats/ProtobufWriter.h
+++ b/src/Formats/ProtobufWriter.h
@ -1,290 +1,68 @@
 #pragma once

-#include <Core/UUID.h>
-#include <common/DayNum.h>
-#include <memory>
-
 #if !defined(ARCADIA_BUILD)
 #    include "config_formats.h"
 #endif

 #if USE_PROTOBUF
-#    include <IO/WriteBufferFromString.h>
-#    include <boost/noncopyable.hpp>
-#    include <Common/PODArray.h>
-#    include "ProtobufColumnMatcher.h"
-
-
-namespace google
-{
-namespace protobuf
-{
-    class Descriptor;
-    class FieldDescriptor;
-}
-}
-
-namespace DB
-{
-class IAggregateFunction;
-using AggregateFunctionPtr = std::shared_ptr<IAggregateFunction>;
-using ConstAggregateDataPtr = const char *;
-
-
-/** Serializes a protobuf, tries to cast types if necessarily.
-  */
-class ProtobufWriter : private boost::noncopyable
-{
-public:
-    ProtobufWriter(WriteBuffer & out, const google::protobuf::Descriptor * message_type, const std::vector<String> & column_names, const bool use_length_delimiters_);
-    ~ProtobufWriter();
-
-    /// Should be called at the beginning of writing a message.
-    void startMessage();
-
-    /// Should be called at the end of writing a message.
-    void endMessage();
-
-    /// Prepares for writing values of a field.
-    /// Returns true and sets 'column_index' to the corresponding column's index.
-    /// Returns false if there are no more fields to write in the message type (call endMessage() in this case).
-    bool writeField(size_t & column_index);
-
-    /// Writes a value. This function should be called one or multiple times after writeField().
-    /// Returns false if there are no more place for the values in the protobuf's field.
-    /// This can happen if the protobuf's field is not declared as repeated in the protobuf schema.
-    bool writeNumber(Int8 value) { return writeValueIfPossible(&IConverter::writeInt8, value); }
-    bool writeNumber(UInt8 value) { return writeValueIfPossible(&IConverter::writeUInt8, value); }
-    bool writeNumber(Int16 value) { return writeValueIfPossible(&IConverter::writeInt16, value); }
-    bool writeNumber(UInt16 value) { return writeValueIfPossible(&IConverter::writeUInt16, value); }
-    bool writeNumber(Int32 value) { return writeValueIfPossible(&IConverter::writeInt32, value); }
-    bool writeNumber(UInt32 value) { return writeValueIfPossible(&IConverter::writeUInt32, value); }
-    bool writeNumber(Int64 value) { return writeValueIfPossible(&IConverter::writeInt64, value); }
-    bool writeNumber(UInt64 value) { return writeValueIfPossible(&IConverter::writeUInt64, value); }
-    bool writeNumber(Int128 value) { return writeValueIfPossible(&IConverter::writeInt128, value); }
-    bool writeNumber(UInt128 value) { return writeValueIfPossible(&IConverter::writeUInt128, value); }
-
-    bool writeNumber(Int256 value) { return writeValueIfPossible(&IConverter::writeInt256, value); }
-    bool writeNumber(UInt256 value) { return writeValueIfPossible(&IConverter::writeUInt256, value); }
-
-    bool writeNumber(Float32 value) { return writeValueIfPossible(&IConverter::writeFloat32, value); }
-    bool writeNumber(Float64 value) { return writeValueIfPossible(&IConverter::writeFloat64, value); }
-    bool writeString(const StringRef & str) { return writeValueIfPossible(&IConverter::writeString, str); }
-    void prepareEnumMapping(const std::vector<std::pair<std::string, Int8>> & enum_values) { current_converter->prepareEnumMapping8(enum_values); }
-    void prepareEnumMapping(const std::vector<std::pair<std::string, Int16>> & enum_values) { current_converter->prepareEnumMapping16(enum_values); }
-    bool writeEnum(Int8 value) { return writeValueIfPossible(&IConverter::writeEnum8, value); }
-    bool writeEnum(Int16 value) { return writeValueIfPossible(&IConverter::writeEnum16, value); }
-    bool writeUUID(const UUID & uuid) { return writeValueIfPossible(&IConverter::writeUUID, uuid); }
-    bool writeDate(DayNum date) { return writeValueIfPossible(&IConverter::writeDate, date); }
-    bool writeDateTime(time_t tm) { return writeValueIfPossible(&IConverter::writeDateTime, tm); }
-    bool writeDateTime64(DateTime64 tm, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDateTime64, tm, scale); }
-    bool writeDecimal(Decimal32 decimal, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDecimal32, decimal, scale); }
-    bool writeDecimal(Decimal64 decimal, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDecimal64, decimal, scale); }
-    bool writeDecimal(const Decimal128 & decimal, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDecimal128, decimal, scale); }
-    bool writeDecimal(const Decimal256 & decimal, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDecimal256, decimal, scale); }
-    bool writeAggregateFunction(const AggregateFunctionPtr & function, ConstAggregateDataPtr place) { return writeValueIfPossible(&IConverter::writeAggregateFunction, function, place); }
-
-private:
-    class SimpleWriter
-    {
-    public:
-        SimpleWriter(WriteBuffer & out_, const bool use_length_delimiters_);
-        ~SimpleWriter();
-
-        void startMessage();
-        void endMessage();
-
-        void startNestedMessage();
-        void endNestedMessage(UInt32 field_number, bool is_group, bool skip_if_empty);
-
-        void writeInt(UInt32 field_number, Int64 value);
-        void writeUInt(UInt32 field_number, UInt64 value);
-        void writeSInt(UInt32 field_number, Int64 value);
-        template <typename T>
-        void writeFixed(UInt32 field_number, T value);
-        void writeString(UInt32 field_number, const StringRef & str);
-
-        void startRepeatedPack();
-        void addIntToRepeatedPack(Int64 value);
-        void addUIntToRepeatedPack(UInt64 value);
-        void addSIntToRepeatedPack(Int64 value);
-        template <typename T>
-        void addFixedToRepeatedPack(T value);
-        void endRepeatedPack(UInt32 field_number);
-
-    private:
-        struct Piece
-        {
-            size_t start;
-            size_t end;
-            Piece(size_t start_, size_t end_) : start(start_), end(end_) {}
-            Piece() = default;
-        };
-
-        struct NestedInfo
-        {
-            size_t num_pieces_at_start;
-            size_t num_bytes_skipped_at_start;
-            NestedInfo(size_t num_pieces_at_start_, size_t num_bytes_skipped_at_start_)
-                : num_pieces_at_start(num_pieces_at_start_), num_bytes_skipped_at_start(num_bytes_skipped_at_start_)
-            {
-            }
-        };
-
-        WriteBuffer & out;
-        PODArray<UInt8> buffer;
-        std::vector<Piece> pieces;
-        size_t current_piece_start;
-        size_t num_bytes_skipped;
-        std::vector<NestedInfo> nested_infos;
-        const bool use_length_delimiters;
-    };
-
-    class IConverter
-    {
-    public:
-        virtual ~IConverter() = default;
-        virtual void writeString(const StringRef &) = 0;
-        virtual void writeInt8(Int8) = 0;
-        virtual void writeUInt8(UInt8) = 0;
-        virtual void writeInt16(Int16) = 0;
-        virtual void writeUInt16(UInt16) = 0;
-        virtual void writeInt32(Int32) = 0;
-        virtual void writeUInt32(UInt32) = 0;
-        virtual void writeInt64(Int64) = 0;
-        virtual void writeUInt64(UInt64) = 0;
-        virtual void writeInt128(Int128) = 0;
-        virtual void writeUInt128(const UInt128 &) = 0;
-
-        virtual void writeInt256(const Int256 &) = 0;
-        virtual void writeUInt256(const UInt256 &) = 0;
-
-        virtual void writeFloat32(Float32) = 0;
-        virtual void writeFloat64(Float64) = 0;
-        virtual void prepareEnumMapping8(const std::vector<std::pair<std::string, Int8>> &) = 0;
-        virtual void prepareEnumMapping16(const std::vector<std::pair<std::string, Int16>> &) = 0;
-        virtual void writeEnum8(Int8) = 0;
-        virtual void writeEnum16(Int16) = 0;
-        virtual void writeUUID(const UUID &) = 0;
-        virtual void writeDate(DayNum) = 0;
-        virtual void writeDateTime(time_t) = 0;
-        virtual void writeDateTime64(DateTime64, UInt32 scale) = 0;
-        virtual void writeDecimal32(Decimal32, UInt32) = 0;
-        virtual void writeDecimal64(Decimal64, UInt32) = 0;
-        virtual void writeDecimal128(const Decimal128 &, UInt32) = 0;
-        virtual void writeDecimal256(const Decimal256 &, UInt32) = 0;
-        virtual void writeAggregateFunction(const AggregateFunctionPtr &, ConstAggregateDataPtr) = 0;
-    };
-
-    class ConverterBaseImpl;
-    template <bool skip_null_value>
-    class ConverterToString;
-    template <int field_type_id, typename ToType, bool skip_null_value, bool pack_repeated>
-    class ConverterToNumber;
-    template <bool skip_null_value, bool pack_repeated>
-    class ConverterToBool;
-    template <bool skip_null_value, bool pack_repeated>
-    class ConverterToEnum;
-
-    struct ColumnMatcherTraits
-    {
-        struct FieldData
-        {
-            std::unique_ptr<IConverter> converter;
-            bool is_required;
-            bool is_repeatable;
-            bool should_pack_repeated;
-            ProtobufColumnMatcher::Message<ColumnMatcherTraits> * repeatable_container_message;
-        };
-        struct MessageData
-        {
-            UInt32 parent_field_number;
-            bool is_group;
-            bool is_required;
-            ProtobufColumnMatcher::Message<ColumnMatcherTraits> * repeatable_container_message;
-            bool need_repeat;
-        };
-    };
-    using Message = ProtobufColumnMatcher::Message<ColumnMatcherTraits>;
-    using Field = ProtobufColumnMatcher::Field<ColumnMatcherTraits>;
-
-    void setTraitsDataAfterMatchingColumns(Message * message);
-
-    template <int field_type_id>
-    std::unique_ptr<IConverter> createConverter(const google::protobuf::FieldDescriptor * field);
-
-    template <typename... Params>
-    using WriteValueFunctionPtr = void (IConverter::*)(Params...);
-
-    template <typename... Params, typename... Args>
-    bool writeValueIfPossible(WriteValueFunctionPtr<Params...> func, Args &&... args)
-    {
-        if (num_values && !current_field->data.is_repeatable)
-        {
-            setNestedMessageNeedsRepeat();
-            return false;
-        }
-        (current_converter->*func)(std::forward<Args>(args)...);
-        ++num_values;
-        return true;
-    }
-
-    void setNestedMessageNeedsRepeat();
-    void endWritingField();
-
-    SimpleWriter simple_writer;
-    std::unique_ptr<Message> root_message;
-
-    Message * current_message;
-    size_t current_field_index = 0;
-    const Field * current_field = nullptr;
-    IConverter * current_converter = nullptr;
-    size_t num_values = 0;
-};
-
-}
-
-#else
-#    include <common/StringRef.h>
+#   include <Core/Types.h>
+#   include <Common/PODArray.h>


 namespace DB
 {
-class IAggregateFunction;
-using AggregateFunctionPtr = std::shared_ptr<IAggregateFunction>;
-using ConstAggregateDataPtr = const char *;
+class WriteBuffer;

+/// Utility class for writing in the Protobuf format.
+/// Knows nothing about protobuf schemas, just provides useful functions to serialize data.
 class ProtobufWriter
 {
 public:
-    bool writeNumber(Int8 /* value */) { return false; }
-    bool writeNumber(UInt8 /* value */) { return false; }
-    bool writeNumber(Int16 /* value */) { return false; }
-    bool writeNumber(UInt16 /* value */) { return false; }
-    bool writeNumber(Int32 /* value */) { return false; }
-    bool writeNumber(UInt32 /* value */) { return false; }
-    bool writeNumber(Int64 /* value */) { return false; }
-    bool writeNumber(UInt64 /* value */) { return false; }
-    bool writeNumber(Int128 /* value */) { return false; }
-    bool writeNumber(UInt128 /* value */) { return false; }
-    bool writeNumber(Int256 /* value */) { return false; }
-    bool writeNumber(UInt256 /* value */) { return false; }
-    bool writeNumber(Float32 /* value */) { return false; }
-    bool writeNumber(Float64 /* value */) { return false; }
-    bool writeString(const StringRef & /* value */) { return false; }
-    void prepareEnumMapping(const std::vector<std::pair<std::string, Int8>> & /* name_value_pairs */) {}
-    void prepareEnumMapping(const std::vector<std::pair<std::string, Int16>> & /* name_value_pairs */) {}
-    bool writeEnum(Int8 /* value */) { return false; }
-    bool writeEnum(Int16 /* value */) { return false; }
-    bool writeUUID(const UUID & /* value */) { return false; }
-    bool writeDate(DayNum /* date */) { return false; }
-    bool writeDateTime(time_t /* tm */) { return false; }
-    bool writeDateTime64(DateTime64 /*tm*/, UInt32 /*scale*/) { return false; }
-    bool writeDecimal(Decimal32 /* decimal */, UInt32 /* scale */) { return false; }
-    bool writeDecimal(Decimal64 /* decimal */, UInt32 /* scale */) { return false; }
-    bool writeDecimal(const Decimal128 & /* decimal */, UInt32 /* scale */) { return false; }
-    bool writeDecimal(const Decimal256 & /* decimal */, UInt32 /* scale */) { return false; }
-    bool writeAggregateFunction(const AggregateFunctionPtr & /* function */, ConstAggregateDataPtr /* place */) { return false; }
+    ProtobufWriter(WriteBuffer & out_);
+    ~ProtobufWriter();
+
+    void startMessage();
+    void endMessage(bool with_length_delimiter);
+
+    void startNestedMessage();
+    void endNestedMessage(int field_number, bool is_group, bool skip_if_empty);
+
+    void writeInt(int field_number, Int64 value);
+    void writeUInt(int field_number, UInt64 value);
+    void writeSInt(int field_number, Int64 value);
+    template <typename T>
+    void writeFixed(int field_number, T value);
+    void writeString(int field_number, const std::string_view & str);
+
+    void startRepeatedPack();
+    void endRepeatedPack(int field_number, bool skip_if_empty);
+
+private:
+    struct Piece
+    {
+        size_t start;
+        size_t end;
+        Piece(size_t start_, size_t end_) : start(start_), end(end_) {}
+        Piece() = default;
+    };
+
+    struct NestedInfo
+    {
+        size_t num_pieces_at_start;
+        size_t num_bytes_skipped_at_start;
+        NestedInfo(size_t num_pieces_at_start_, size_t num_bytes_skipped_at_start_)
+            : num_pieces_at_start(num_pieces_at_start_), num_bytes_skipped_at_start(num_bytes_skipped_at_start_)
+        {
+        }
+    };
+
+    WriteBuffer & out;
+    PODArray<UInt8> buffer;
+    std::vector<Piece> pieces;
+    size_t current_piece_start = 0;
+    size_t num_bytes_skipped = 0;
+    std::vector<NestedInfo> nested_infos;
+    bool in_repeated_pack = false;
 };

 }
--- a/src/Formats/ya.make
+++ b/src/Formats/ya.make
@ -20,9 +20,9 @@ SRCS(
    NativeFormat.cpp
    NullFormat.cpp
    ParsedTemplateFormatString.cpp
-    ProtobufColumnMatcher.cpp
    ProtobufReader.cpp
    ProtobufSchemas.cpp
+    ProtobufSerializer.cpp
    ProtobufWriter.cpp
    registerFormats.cpp
    verbosePrintString.cpp
--- a/src/Functions/htmlOrXmlCoarseParse.cpp
+++ b/src/Functions/htmlOrXmlCoarseParse.cpp
@ -0,0 +1,582 @@
+#include <Columns/ColumnString.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/IFunctionImpl.h>
+
+#include <utility>
+#include <vector>
+#include <algorithm>
+
+#if USE_HYPERSCAN
+#   include <hs.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_COLUMN;
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int CANNOT_ALLOCATE_MEMORY;
+    extern const int NOT_IMPLEMENTED;
+}
+
+namespace
+{
+struct HxCoarseParseImpl
+{
+private:
+    struct SpanInfo
+    {
+        SpanInfo(): id(0), match_space(std::pair<unsigned long long, unsigned long long>(0, 0)) {}  // NOLINT
+        SpanInfo(unsigned int matchId, std::pair<unsigned long long, unsigned long long> matchSpan): id(matchId), match_space(matchSpan){} // NOLINT
+        SpanInfo(const SpanInfo& obj)
+        {
+            id = obj.id;
+            match_space = obj.match_space;
+        }
+        SpanInfo& operator=(const SpanInfo& obj) = default;
+
+        unsigned int id;
+        std::pair<unsigned long long, unsigned long long> match_space;  // NOLINT
+    };
+    using SpanElement = std::vector<SpanInfo>;
+    struct Span
+    {
+        Span(): set_script(false), set_style(false), set_semi(false), is_finding_cdata(false) {}
+
+        SpanElement copy_stack;         // copy area
+        SpanElement tag_stack;          // regexp area
+        SpanInfo script_ptr;            // script pointer
+        bool set_script;                // whether set script
+        SpanInfo style_ptr;             // style pointer
+        bool set_style;                 // whether set style
+        SpanInfo semi_ptr;              // tag ptr
+        bool set_semi;                  // whether set semi
+
+        bool is_finding_cdata;
+    };
+
+    static inline void copyZone(
+        ColumnString::Offset& current_dst_string_offset,
+        ColumnString::Offset& current_copy_loc,
+        ColumnString::Chars& dst_chars,
+        const ColumnString::Chars& src_chars,
+        size_t bytes_to_copy,
+        unsigned is_space
+    )
+    {
+        bool is_last_space = false;
+        if (current_dst_string_offset == 0 || dst_chars[current_dst_string_offset - 1] == 0 || dst_chars[current_dst_string_offset - 1] == ' ')
+        {
+            is_last_space = true;
+        }
+        if (bytes_to_copy == 0)
+        {
+            if (is_space && !is_last_space)
+            {
+                dst_chars[current_dst_string_offset++] = ' ';
+            }
+        }
+        else
+        {
+            if (is_last_space && src_chars[current_copy_loc] == ' ')
+            {
+                --bytes_to_copy;
+                ++current_copy_loc;
+            }
+            if (bytes_to_copy > 0)
+            {
+                memcpySmallAllowReadWriteOverflow15(
+                    &dst_chars[current_dst_string_offset], &src_chars[current_copy_loc], bytes_to_copy);
+                current_dst_string_offset += bytes_to_copy;
+            }
+
+            // separator is space and last character is not space.
+            if (is_space && !(current_dst_string_offset == 0 || dst_chars[current_dst_string_offset - 1] == 0 || dst_chars[current_dst_string_offset - 1] == ' '))
+            {
+                dst_chars[current_dst_string_offset++] = ' ';
+            }
+        }
+        // return;
+    }
+    static inline void popArea(SpanElement& stack, unsigned long long from, unsigned long long to)  //NOLINT
+    {
+        while (!stack.empty())
+        {
+            if (to > stack.back().match_space.second && from < stack.back().match_space.second)
+            {
+                stack.pop_back();
+            }
+            else
+            {
+                break;
+            }
+        }
+        // return;
+    }
+
+    static void dealCommonTag(Span* matches)
+    {
+        while (!matches->copy_stack.empty() && matches->copy_stack.back().id != 10)
+        {
+            matches->copy_stack.pop_back();
+        }
+        if (!matches->copy_stack.empty())
+        {
+            matches->copy_stack.pop_back();
+        }
+        unsigned long long from;    // NOLINT
+        unsigned long long to;      // NOLINT
+        unsigned id;
+        for (auto begin = matches->tag_stack.begin(); begin != matches->tag_stack.end(); ++begin)
+        {
+            from = begin->match_space.first;
+            to = begin->match_space.second;
+            id = begin->id;
+            switch (id)
+            {
+                case 12:
+                case 13:
+                {
+                    popArea(matches->copy_stack, from, to);
+                    if (matches->copy_stack.empty() || from >= matches->copy_stack.back().match_space.second)
+                        matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
+                    break;
+                }
+                case 0:
+                case 2:
+                case 3:
+                case 4:
+                case 5:
+                case 6:
+                case 7:
+                case 8:
+                case 9:
+                case 10:
+                {
+                    if (!matches->set_semi || (matches->set_semi && from == matches->semi_ptr.match_space.first))
+                    {
+                        matches->set_semi = true;
+                        matches->semi_ptr = SpanInfo(id, std::make_pair(from, to));
+                    }
+                    break;
+                }
+                case 1:
+                {
+                    if (matches->set_semi)
+                    {
+                        switch (matches->semi_ptr.id)
+                        {
+                            case 0:
+                            case 2:
+                            case 3:
+                            case 6:
+                            case 7:
+                            case 10:
+                            {
+                                if (matches->semi_ptr.id == 2 || (matches->semi_ptr.id == 3 && matches->semi_ptr.match_space.second == from))
+                                {
+                                    if (!matches->set_script)
+                                    {
+                                        matches->set_script = true;
+                                        matches->script_ptr = SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to));
+                                    }
+                                }
+                                else if (matches->semi_ptr.id == 6 || (matches->semi_ptr.id == 7 && matches->semi_ptr.match_space.second == from))
+                                {
+                                    if (!matches->set_style)
+                                    {
+                                        matches->set_style = true;
+                                        matches->style_ptr = SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to));
+                                    }
+                                }
+                                popArea(matches->copy_stack, matches->semi_ptr.match_space.first, to);
+                                matches->copy_stack.push_back(SpanInfo(0, std::make_pair(matches->semi_ptr.match_space.first, to)));
+                                matches->set_semi = false;
+                                break;
+                            }
+                            case 4:
+                            case 5:
+                            case 8:
+                            case 9:
+                            {
+                                SpanInfo complete_zone;
+
+                                complete_zone.match_space.second = to;
+                                if (matches->set_script && (matches->semi_ptr.id == 4 || (matches->semi_ptr.id == 5 && matches->semi_ptr.match_space.second == from)))
+                                {
+                                    complete_zone.id = matches->script_ptr.id;
+                                    complete_zone.match_space.first = matches->script_ptr.match_space.first;
+                                    matches->set_script = false;
+                                }
+                                else if (matches->set_style && (matches->semi_ptr.id == 8 || (matches->semi_ptr.id == 9 && matches->semi_ptr.match_space.second == from)))
+                                {
+                                    complete_zone.id = matches->style_ptr.id;
+                                    complete_zone.match_space.first = matches->style_ptr.match_space.first;
+                                    matches->set_style = false;
+                                }
+                                else
+                                {
+                                    complete_zone.id = matches->semi_ptr.id;
+                                    complete_zone.match_space.first = matches->semi_ptr.match_space.first;
+                                }
+                                popArea(matches->copy_stack, complete_zone.match_space.first, complete_zone.match_space.second);
+                                matches->copy_stack.push_back(complete_zone);
+                                matches->set_semi = false;
+                                break;
+                            }
+                        }
+                    }
+                    break;
+                }
+                default:
+                {
+                    break;
+                }
+            }
+        }
+        // return;
+    }
+    static int spanCollect(unsigned int id,
+                          unsigned long long from,  // NOLINT
+                          unsigned long long to,    // NOLINT
+                          unsigned int , void * ctx)
+    {
+        Span* matches = static_cast<Span*>(ctx);
+        from = id == 12 ? from : to - patterns_length[id];
+
+        if (matches->is_finding_cdata)
+        {
+            if (id == 11)
+            {
+                matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
+                matches->is_finding_cdata = false;
+                matches->tag_stack.clear();
+                if (matches->semi_ptr.id == 10)
+                {
+                    matches->set_semi = false;
+                }
+            }
+            else if (id == 12 || id == 13)
+            {
+                popArea(matches->copy_stack, from, to);
+                if (matches->copy_stack.empty() || from >= matches->copy_stack.back().match_space.second)
+                    matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
+
+                popArea(matches->tag_stack, from, to);
+                if (matches->tag_stack.empty() || from >= matches->tag_stack.back().match_space.second)
+                    matches->tag_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
+            }
+            else
+            {
+                popArea(matches->tag_stack, from, to);
+                matches->tag_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
+            }
+        }
+        else
+        {
+            switch (id)
+            {
+                case 12:
+                case 13:
+                {
+                    popArea(matches->copy_stack, from, to);
+                    if (matches->copy_stack.empty() || from >= matches->copy_stack.back().match_space.second)
+                        matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
+                    break;
+                }
+                case 0:
+                case 2:
+                case 3:
+                case 4:
+                case 5:
+                case 6:
+                case 7:
+                case 8:
+                case 9:
+                {
+                    if (!matches->set_semi || (matches->set_semi && from == matches->semi_ptr.match_space.first))
+                    {
+                        matches->set_semi = true;
+                        matches->semi_ptr = SpanInfo(id, std::make_pair(from, to));
+                    }
+                    break;
+                }
+                case 10:
+                {
+                    if (!matches->set_semi || (matches->set_semi && from == matches->semi_ptr.match_space.first))
+                    {
+                        matches->set_semi = true;
+                        matches->semi_ptr = SpanInfo(id, std::make_pair(from, to));
+                    }
+                    matches->is_finding_cdata = true;
+                    matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
+                    matches->tag_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
+                    break;
+                }
+                case 1:
+                {
+                    if (matches->set_semi)
+                    {
+                        switch (matches->semi_ptr.id)
+                        {
+                            case 0:
+                            case 2:
+                            case 3:
+                            case 6:
+                            case 7:
+                            case 10:
+                            {
+                                if (matches->semi_ptr.id == 2 || (matches->semi_ptr.id == 3 && matches->semi_ptr.match_space.second == from))
+                                {
+                                    if (!matches->set_script)
+                                    {
+                                        matches->set_script = true;
+                                        matches->script_ptr = SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to));
+                                    }
+                                }
+                                else if (matches->semi_ptr.id == 6 || (matches->semi_ptr.id == 7 && matches->semi_ptr.match_space.second == from))
+                                {
+                                    if (!matches->set_style)
+                                    {
+                                        matches->set_style = true;
+                                        matches->style_ptr = SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to));
+                                    }
+                                }
+                                popArea(matches->copy_stack, matches->semi_ptr.match_space.first, to);
+                                matches->copy_stack.push_back(SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to)));
+                                matches->set_semi = false;
+                                break;
+                            }
+                            case 4:
+                            case 5:
+                            case 8:
+                            case 9:
+                            {
+                                SpanInfo complete_zone;
+                                complete_zone.match_space.second = to;
+                                if (matches->set_script && (matches->semi_ptr.id == 4 || (matches->semi_ptr.id == 5 && matches->semi_ptr.match_space.second == from)))
+                                {
+                                    complete_zone.id = matches->script_ptr.id;
+                                    complete_zone.match_space.first = matches->script_ptr.match_space.first;
+                                    matches->set_script = false;
+                                }
+                                else if (matches->set_style && (matches->semi_ptr.id == 8 || (matches->semi_ptr.id == 9 && matches->semi_ptr.match_space.second == from)))
+                                {
+                                    complete_zone.id = matches->style_ptr.id;
+                                    complete_zone.match_space.first = matches->style_ptr.match_space.first;
+                                    matches->set_style = false;
+                                }
+                                else
+                                {
+                                    complete_zone.id = matches->semi_ptr.id;
+                                    complete_zone.match_space.first = matches->semi_ptr.match_space.first;
+                                }
+                                popArea(matches->copy_stack, complete_zone.match_space.first, complete_zone.match_space.second);
+                                matches->copy_stack.push_back(complete_zone);
+                                matches->set_semi = false;
+                                break;
+                            }
+                        }
+                    }
+                    break;
+                }
+                default:
+                {
+                    break;
+                }
+            }
+        }
+        return 0;
+    }
+    #if USE_HYPERSCAN
+    static hs_database_t* buildDatabase(const std::vector<const char* > &expressions,
+                                        const std::vector<unsigned> &flags,
+                                        const std::vector<unsigned> &id,
+                                        unsigned int mode)
+    {
+        hs_database_t *db;
+        hs_compile_error_t *compile_err;
+        hs_error_t err;
+        err = hs_compile_multi(expressions.data(), flags.data(), id.data(),
+                            expressions.size(), mode, nullptr, &db, &compile_err);
+
+        if (err != HS_SUCCESS)
+        {
+            hs_free_compile_error(compile_err);
+            throw Exception("Hyper scan database cannot be compiled.", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
+        }
+        return db;
+    }
+    #endif
+    static std::vector<const char*> patterns;
+    static std::vector<std::size_t> patterns_length;
+    static std::vector<unsigned> patterns_flag;
+    static std::vector<unsigned> ids;
+
+public:
+    static void executeInternal(
+        const ColumnString::Chars & src_chars,
+        const ColumnString::Offsets & src_offsets,
+        ColumnString::Chars & dst_chars,
+        ColumnString::Offsets & dst_offsets)
+    {
+    #if USE_HYPERSCAN
+        hs_database_t * db = buildDatabase(patterns, patterns_flag, ids, HS_MODE_BLOCK);
+        hs_scratch_t* scratch = nullptr;
+        if (hs_alloc_scratch(db, &scratch) != HS_SUCCESS)
+        {
+            hs_free_database(db);
+            throw Exception("Unable to allocate scratch space.", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
+        }
+        dst_chars.resize(src_chars.size());
+        dst_offsets.resize(src_offsets.size());
+
+        ColumnString::Offset current_src_string_offset = 0;
+        ColumnString::Offset current_dst_string_offset = 0;
+        ColumnString::Offset current_copy_loc;
+        ColumnString::Offset current_copy_end;
+        unsigned is_space;
+        size_t bytes_to_copy;
+        Span match_zoneall;
+
+        for (size_t off = 0; off < src_offsets.size(); ++off)
+        {
+            hs_scan(db, reinterpret_cast<const char *>(&src_chars[current_src_string_offset]), src_offsets[off] - current_src_string_offset, 0, scratch, spanCollect, &match_zoneall);
+            if (match_zoneall.is_finding_cdata)
+            {
+                dealCommonTag(&match_zoneall);
+            }
+            SpanElement& match_zone = match_zoneall.copy_stack;
+            current_copy_loc = current_src_string_offset;
+            if (match_zone.empty())
+            {
+                current_copy_end = src_offsets[off];
+                is_space = 0;
+            }
+            else
+            {
+                current_copy_end = current_src_string_offset + match_zone.begin()->match_space.first;
+                is_space = (match_zone.begin()->id == 12 || match_zone.begin()->id == 13)?1:0;
+            }
+
+            bytes_to_copy = current_copy_end - current_copy_loc;
+            copyZone(current_dst_string_offset, current_copy_loc, dst_chars, src_chars, bytes_to_copy, is_space);
+            for (auto begin = match_zone.begin(); begin != match_zone.end(); ++begin)
+            {
+                current_copy_loc = current_src_string_offset + begin->match_space.second;
+                if (begin + 1 >= match_zone.end())
+                {
+                    current_copy_end = src_offsets[off];
+                    is_space = 0;
+                }
+                else
+                {
+                    current_copy_end = current_src_string_offset + (begin+1)->match_space.first;
+                    is_space = ((begin+1)->id == 12 || (begin+1)->id == 13)?1:0;
+                }
+                bytes_to_copy = current_copy_end - current_copy_loc;
+                copyZone(current_dst_string_offset, current_copy_loc, dst_chars, src_chars, bytes_to_copy, is_space);
+            }
+            if (current_dst_string_offset > 1 && dst_chars[current_dst_string_offset - 2] == ' ')
+            {
+                dst_chars[current_dst_string_offset - 2] = 0;
+                --current_dst_string_offset;
+            }
+            dst_offsets[off] = current_dst_string_offset;
+            current_src_string_offset = src_offsets[off];
+            match_zoneall.copy_stack.clear();
+            match_zoneall.tag_stack.clear();
+        }
+            dst_chars.resize(dst_chars.size());
+            hs_free_scratch(scratch);
+            hs_free_database(db);
+    #else
+        (void)src_chars;
+        (void)src_offsets;
+        (void)dst_chars;
+        (void)dst_offsets;
+        throw Exception(
+            "htmlOrXmlCoarseParse is not implemented when hyperscan is off (is it x86 processor?)",
+            ErrorCodes::NOT_IMPLEMENTED);
+    #endif
+    }
+};
+
+std::vector<const char*> HxCoarseParseImpl::patterns =
+    {
+        "<[^\\s<>]",       // 0  "<", except "< ", "<<", "<>"
+        ">",               // 1  ">"
+        "<script\\s",      // 2  <script xxxxx>
+        "<script",         // 3  <script>
+        "</script\\s",     // 4  </script xxxx>
+        "</script",        // 5  </script>
+        "<style\\s",       // 6  <style xxxxxx>
+        "<style",          // 7  <style>
+        "</style\\s",      // 8  </style xxxxx>
+        "</style",         // 9  </style>
+        "<!\\[CDATA\\[",   // 10 <![CDATA[xxxxxx]]>
+        "\\]\\]>",         // 11 ]]>
+        "\\s{2,}",         // 12 "   ", continuous blanks
+        "[^\\S ]"          // 13 "\n", "\t" and other white space, it does not include single ' '.
+    };
+std::vector<std::size_t> HxCoarseParseImpl::patterns_length =
+    {
+        2, 1, 8, 7, 9, 8, 7, 6, 8, 7, 9, 3, 0, 1
+    };
+#if USE_HYPERSCAN
+std::vector<unsigned> HxCoarseParseImpl::patterns_flag =
+    {
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, HS_FLAG_SOM_LEFTMOST, 0
+    };
+#endif
+std::vector<unsigned> HxCoarseParseImpl::ids =
+    {
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13
+    };
+
+class FunctionHtmlOrXmlCoarseParse : public IFunction
+{
+public:
+    static constexpr auto name = "htmlOrXmlCoarseParse";
+
+    static FunctionPtr create(const Context &) {return std::make_shared<FunctionHtmlOrXmlCoarseParse>(); }
+
+    String getName() const override {return name;}
+
+    size_t getNumberOfArguments() const override {return 1;}
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        if (!isString(arguments[0]))
+            throw Exception(
+                "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        return arguments[0];
+    }
+
+    bool useDefaultImplementationForConstants() const override {return true;}
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & , size_t) const override
+    {
+        const auto & strcolumn = arguments[0].column;
+        if (const ColumnString* html_sentence = checkAndGetColumn<ColumnString>(strcolumn.get()))
+        {
+            auto col_res = ColumnString::create();
+            HxCoarseParseImpl::executeInternal(html_sentence->getChars(), html_sentence->getOffsets(), col_res->getChars(), col_res->getOffsets());
+            return col_res;
+        }
+        else
+        {
+            throw Exception("First argument for function " + getName() + " must be string.", ErrorCodes::ILLEGAL_COLUMN);
+        }
+    }
+};
+}
+
+void registerFunctionHtmlOrXmlCoarseParse(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionHtmlOrXmlCoarseParse>();
+}
+
+}
+#endif
--- a/src/Functions/registerFunctionsString.cpp
+++ b/src/Functions/registerFunctionsString.cpp
@ -6,7 +6,9 @@ namespace DB
 {

 class FunctionFactory;
-
+#if USE_HYPERSCAN
+void registerFunctionHtmlOrXmlCoarseParse(FunctionFactory &);
+#endif
 void registerFunctionRepeat(FunctionFactory &);
 void registerFunctionEmpty(FunctionFactory &);
 void registerFunctionNotEmpty(FunctionFactory &);
@ -45,6 +47,9 @@ void registerFunctionTryBase64Decode(FunctionFactory &);

 void registerFunctionsString(FunctionFactory & factory)
 {
+#if USE_HYPERSCAN
+    registerFunctionHtmlOrXmlCoarseParse(factory);
+#endif
    registerFunctionRepeat(factory);
    registerFunctionEmpty(factory);
    registerFunctionNotEmpty(factory);
--- a/src/Functions/ya.make
+++ b/src/Functions/ya.make
@ -291,6 +291,7 @@ SRCS(
    hasToken.cpp
    hasTokenCaseInsensitive.cpp
    hostName.cpp
+    htmlOrXmlCoarseParse.cpp
    hypot.cpp
    identity.cpp
    if.cpp
--- a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp
@ -1,57 +1,48 @@
 #include "ProtobufRowInputFormat.h"

 #if USE_PROTOBUF
-#include <Core/Block.h>
-#include <Formats/FormatFactory.h>
-#include <Formats/FormatSchemaInfo.h>
-#include <Formats/ProtobufSchemas.h>
-#include <Interpreters/Context.h>
+#   include <Core/Block.h>
+#   include <Formats/FormatFactory.h>
+#   include <Formats/FormatSchemaInfo.h>
+#   include <Formats/ProtobufReader.h>
+#   include <Formats/ProtobufSchemas.h>
+#   include <Formats/ProtobufSerializer.h>
+#   include <Interpreters/Context.h>
+#   include <ext/range.h>


 namespace DB
 {
-
-ProtobufRowInputFormat::ProtobufRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSchemaInfo & info_, const bool use_length_delimiters_)
+ProtobufRowInputFormat::ProtobufRowInputFormat(ReadBuffer & in_, const Block & header_, const Params & params_, const FormatSchemaInfo & schema_info_, bool with_length_delimiter_)
    : IRowInputFormat(header_, in_, params_)
-    , data_types(header_.getDataTypes())
-    , reader(in, ProtobufSchemas::instance().getMessageTypeForFormatSchema(info_), header_.getNames(), use_length_delimiters_)
+    , reader(std::make_unique<ProtobufReader>(in_))
+    , serializer(ProtobufSerializer::create(
+          header_.getNames(),
+          header_.getDataTypes(),
+          missing_column_indices,
+          *ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_),
+          with_length_delimiter_,
+         *reader))
 {
 }

 ProtobufRowInputFormat::~ProtobufRowInputFormat() = default;

-bool ProtobufRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & extra)
+bool ProtobufRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & row_read_extension)
 {
-    if (!reader.startMessage())
-        return false; // EOF reached, no more messages.
+    if (reader->eof())
+        return false;

-    // Set of columns for which the values were read. The rest will be filled with default values.
-    auto & read_columns = extra.read_columns;
-    read_columns.assign(columns.size(), false);
+    size_t row_num = columns.empty() ? 0 : columns[0]->size();
+    if (!row_num)
+        serializer->setColumns(columns.data(), columns.size());

-    // Read values from this message and put them to the columns while it's possible.
-    size_t column_index;
-    while (reader.readColumnIndex(column_index))
-    {
-        bool allow_add_row = !static_cast<bool>(read_columns[column_index]);
-        do
-        {
-            bool row_added;
-            data_types[column_index]->deserializeProtobuf(*columns[column_index], reader, allow_add_row, row_added);
-            if (row_added)
-            {
-                read_columns[column_index] = true;
-                allow_add_row = false;
-            }
-        } while (reader.canReadMoreValues());
-    }
+    serializer->readRow(row_num);

-    // Fill non-visited columns with the default values.
-    for (column_index = 0; column_index < read_columns.size(); ++column_index)
-        if (!read_columns[column_index])
-            data_types[column_index]->insertDefaultInto(*columns[column_index]);
-
-    reader.endMessage();
+    row_read_extension.read_columns.clear();
+    row_read_extension.read_columns.resize(columns.size(), true);
+    for (size_t column_idx : missing_column_indices)
+        row_read_extension.read_columns[column_idx] = false;
    return true;
 }

@ -62,14 +53,14 @@ bool ProtobufRowInputFormat::allowSyncAfterError() const

 void ProtobufRowInputFormat::syncAfterError()
 {
-    reader.endMessage(true);
+    reader->endMessage(true);
 }

 void registerInputFormatProcessorProtobuf(FormatFactory & factory)
 {
-    for (bool use_length_delimiters : {false, true})
+    for (bool with_length_delimiter : {false, true})
    {
-        factory.registerInputFormatProcessor(use_length_delimiters ? "Protobuf" : "ProtobufSingle", [use_length_delimiters](
+        factory.registerInputFormatProcessor(with_length_delimiter ? "Protobuf" : "ProtobufSingle", [with_length_delimiter](
            ReadBuffer & buf,
            const Block & sample,
            IRowInputFormat::Params params,
@ -78,7 +69,7 @@ void registerInputFormatProcessorProtobuf(FormatFactory & factory)
            return std::make_shared<ProtobufRowInputFormat>(buf, sample, std::move(params),
                FormatSchemaInfo(settings.schema.format_schema, "Protobuf", true,
                                settings.schema.is_server, settings.schema.format_schema_path),
-                use_length_delimiters);
+                with_length_delimiter);
        });
    }
 }
--- a/src/Processors/Formats/Impl/ProtobufRowInputFormat.h
+++ b/src/Processors/Formats/Impl/ProtobufRowInputFormat.h
@ -5,14 +5,14 @@
 #endif

 #if USE_PROTOBUF
-#    include <DataTypes/IDataType.h>
-#    include <Formats/ProtobufReader.h>
 #    include <Processors/Formats/IRowInputFormat.h>

 namespace DB
 {
 class Block;
 class FormatSchemaInfo;
+class ProtobufReader;
+class ProtobufSerializer;


 /** Stream designed to deserialize data from the google protobuf format.
@ -29,18 +29,19 @@ class FormatSchemaInfo;
 class ProtobufRowInputFormat : public IRowInputFormat
 {
 public:
-    ProtobufRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSchemaInfo & info_, const bool use_length_delimiters_);
+    ProtobufRowInputFormat(ReadBuffer & in_, const Block & header_, const Params & params_, const FormatSchemaInfo & schema_info_, bool with_length_delimiter_);
    ~ProtobufRowInputFormat() override;

    String getName() const override { return "ProtobufRowInputFormat"; }

-    bool readRow(MutableColumns & columns, RowReadExtension & extra) override;
+    bool readRow(MutableColumns & columns, RowReadExtension &) override;
    bool allowSyncAfterError() const override;
    void syncAfterError() override;

 private:
-    DataTypes data_types;
-    ProtobufReader reader;
+    std::unique_ptr<ProtobufReader> reader;
+    std::vector<size_t> missing_column_indices;
+    std::unique_ptr<ProtobufSerializer> serializer;
 };

 }
--- a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp
@ -1,13 +1,13 @@
-#include <Formats/FormatFactory.h>
 #include "ProtobufRowOutputFormat.h"

 #if USE_PROTOBUF
-
-#include <Core/Block.h>
-#include <Formats/FormatSchemaInfo.h>
-#include <Formats/ProtobufSchemas.h>
-#include <Interpreters/Context.h>
-#include <google/protobuf/descriptor.h>
+#   include <Formats/FormatFactory.h>
+#   include <Core/Block.h>
+#   include <Formats/FormatSchemaInfo.h>
+#   include <Formats/ProtobufSchemas.h>
+#   include <Formats/ProtobufSerializer.h>
+#   include <Formats/ProtobufWriter.h>
+#   include <google/protobuf/descriptor.h>


 namespace DB
@ -20,58 +20,55 @@ namespace ErrorCodes

 ProtobufRowOutputFormat::ProtobufRowOutputFormat(
    WriteBuffer & out_,
-    const Block & header,
+    const Block & header_,
    const RowOutputFormatParams & params_,
-    const FormatSchemaInfo & format_schema,
-    const FormatSettings & settings)
-    : IRowOutputFormat(header, out_, params_)
-    , data_types(header.getDataTypes())
-    , writer(out,
-        ProtobufSchemas::instance().getMessageTypeForFormatSchema(format_schema),
-        header.getNames(), settings.protobuf.write_row_delimiters)
-    , allow_only_one_row(
-        !settings.protobuf.write_row_delimiters
-            && !settings.protobuf.allow_many_rows_no_delimiters)
+    const FormatSchemaInfo & schema_info_,
+    const FormatSettings & settings_,
+    bool with_length_delimiter_)
+    : IRowOutputFormat(header_, out_, params_)
+    , writer(std::make_unique<ProtobufWriter>(out))
+    , serializer(ProtobufSerializer::create(
+          header_.getNames(),
+          header_.getDataTypes(),
+          *ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_),
+          with_length_delimiter_,
+          *writer))
+    , allow_multiple_rows(with_length_delimiter_ || settings_.protobuf.allow_multiple_rows_without_delimiter)
 {
-    value_indices.resize(header.columns());
 }

 void ProtobufRowOutputFormat::write(const Columns & columns, size_t row_num)
 {
-    if (allow_only_one_row && !first_row)
-    {
-        throw Exception("The ProtobufSingle format can't be used to write multiple rows because this format doesn't have any row delimiter.", ErrorCodes::NO_ROW_DELIMITER);
-    }
+    if (!allow_multiple_rows && !first_row)
+        throw Exception(
+            "The ProtobufSingle format can't be used to write multiple rows because this format doesn't have any row delimiter.",
+            ErrorCodes::NO_ROW_DELIMITER);

-    writer.startMessage();
-    std::fill(value_indices.begin(), value_indices.end(), 0);
-    size_t column_index;
-    while (writer.writeField(column_index))
-        data_types[column_index]->serializeProtobuf(
-                *columns[column_index], row_num, writer, value_indices[column_index]);
-    writer.endMessage();
+    if (!row_num)
+        serializer->setColumns(columns.data(), columns.size());
+
+    serializer->writeRow(row_num);
 }


 void registerOutputFormatProcessorProtobuf(FormatFactory & factory)
 {
-    for (bool write_row_delimiters : {false, true})
+    for (bool with_length_delimiter : {false, true})
    {
        factory.registerOutputFormatProcessor(
-            write_row_delimiters ? "Protobuf" : "ProtobufSingle",
-            [write_row_delimiters](WriteBuffer & buf,
+            with_length_delimiter ? "Protobuf" : "ProtobufSingle",
+            [with_length_delimiter](WriteBuffer & buf,
               const Block & header,
               const RowOutputFormatParams & params,
-               const FormatSettings & _settings)
+               const FormatSettings & settings)
            {
-                FormatSettings settings = _settings;
-                settings.protobuf.write_row_delimiters = write_row_delimiters;
                return std::make_shared<ProtobufRowOutputFormat>(
                    buf, header, params,
                    FormatSchemaInfo(settings.schema.format_schema, "Protobuf",
                        true, settings.schema.is_server,
                        settings.schema.format_schema_path),
-                    settings);
+                    settings,
+                    with_length_delimiter);
            });
    }
 }
--- a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h
+++ b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h
@ -8,21 +8,16 @@
 #    include <Core/Block.h>
 #    include <Formats/FormatSchemaInfo.h>
 #    include <Formats/FormatSettings.h>
-#    include <Formats/ProtobufWriter.h>
 #    include <Processors/Formats/IRowOutputFormat.h>


-namespace google
-{
-namespace protobuf
-{
-    class Message;
-}
-}
-
-
 namespace DB
 {
+class ProtobufWriter;
+class ProtobufSerializer;
+class FormatSchemaInfo;
+struct FormatSettings;
+
 /** Stream designed to serialize data in the google protobuf format.
  * Each row is written as a separated message.
  *
@ -38,10 +33,11 @@ class ProtobufRowOutputFormat : public IRowOutputFormat
 public:
    ProtobufRowOutputFormat(
        WriteBuffer & out_,
-        const Block & header,
+        const Block & header_,
        const RowOutputFormatParams & params_,
-        const FormatSchemaInfo & format_schema,
-        const FormatSettings & settings);
+        const FormatSchemaInfo & schema_info_,
+        const FormatSettings & settings_,
+        bool with_length_delimiter_);

    String getName() const override { return "ProtobufRowOutputFormat"; }

@ -50,10 +46,9 @@ public:
    std::string getContentType() const override { return "application/octet-stream"; }

 private:
-    DataTypes data_types;
-    ProtobufWriter writer;
-    std::vector<size_t> value_indices;
-    const bool allow_only_one_row;
+    std::unique_ptr<ProtobufWriter> writer;
+    std::unique_ptr<ProtobufSerializer> serializer;
+    const bool allow_multiple_rows;
 };

 }
--- a/src/Storages/Kafka/KafkaBlockOutputStream.cpp
+++ b/src/Storages/Kafka/KafkaBlockOutputStream.cpp
@ -26,7 +26,7 @@ void KafkaBlockOutputStream::writePrefix()
    buffer = storage.createWriteBuffer(getHeader());

    auto format_settings = getFormatSettings(*context);
-    format_settings.protobuf.allow_many_rows_no_delimiters = true;
+    format_settings.protobuf.allow_multiple_rows_without_delimiter = true;

    child = FormatFactory::instance().getOutputStream(storage.getFormatName(), *buffer,
        getHeader(), *context,
--- a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp
+++ b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp
@ -34,7 +34,7 @@ void RabbitMQBlockOutputStream::writePrefix()
    buffer->activateWriting();

    auto format_settings = getFormatSettings(context);
-    format_settings.protobuf.allow_many_rows_no_delimiters = true;
+    format_settings.protobuf.allow_multiple_rows_without_delimiter = true;

    child = FormatFactory::instance().getOutputStream(storage.getFormatName(), *buffer,
        getHeader(), context,
--- a/tests/integration/test_odbc_interaction/test.py
+++ b/tests/integration/test_odbc_interaction/test.py
@ -342,3 +342,25 @@ def test_bridge_dies_with_parent(started_cluster):

    assert clickhouse_pid is None
    assert bridge_pid is None
+
+
+def test_odbc_postgres_date_data_type(started_cluster):
+    conn = get_postgres_conn();
+    cursor = conn.cursor()
+    cursor.execute("CREATE TABLE IF NOT EXISTS clickhouse.test_date (column1 integer, column2 date)")
+
+    cursor.execute("INSERT INTO clickhouse.test_date VALUES (1, '2020-12-01')")
+    cursor.execute("INSERT INTO clickhouse.test_date VALUES (2, '2020-12-02')")
+    cursor.execute("INSERT INTO clickhouse.test_date VALUES (3, '2020-12-03')")
+    conn.commit()
+
+    node1.query(
+        '''
+        CREATE TABLE test_date (column1 UInt64, column2 Date)
+        ENGINE=ODBC('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_date')''')
+
+    expected = '1\t2020-12-01\n2\t2020-12-02\n3\t2020-12-03\n'
+    result = node1.query('SELECT * FROM test_date');
+    assert(result == expected)
+
+
--- a/tests/queries/0_stateless/00825_protobuf_format_array_3dim.proto
+++ b/tests/queries/0_stateless/00825_protobuf_format_array_3dim.proto
@ -0,0 +1,14 @@
+syntax = "proto3";
+
+message ABC
+{
+    message nested
+    {
+        message nested
+        {
+            repeated int32 c = 1;
+        }
+        repeated nested b = 1;
+    }
+    repeated nested a = 1;
+}
--- a/tests/queries/0_stateless/00825_protobuf_format_array_3dim.reference
+++ b/tests/queries/0_stateless/00825_protobuf_format_array_3dim.reference
@ -0,0 +1,52 @@
+[[],[[]],[[1]],[[2,3],[4]]]
+[[[5,6,7]],[[8,9,10]]]
+
+Binary representation:
+00000000  1a 0a 00 0a 02 0a 00 0a  05 0a 03 0a 01 01 0a 0b  |................|
+00000010  0a 04 0a 02 02 03 0a 03  0a 01 04 12 0a 07 0a 05  |................|
+00000020  0a 03 05 06 07 0a 07 0a  05 0a 03 08 09 0a        |..............|
+0000002e
+
+MESSAGE #1 AT 0x00000001
+a {
+}
+a {
+  b {
+  }
+}
+a {
+  b {
+    c: 1
+  }
+}
+a {
+  b {
+    c: 2
+    c: 3
+  }
+  b {
+    c: 4
+  }
+}
+MESSAGE #2 AT 0x0000001C
+a {
+  b {
+    c: 5
+    c: 6
+    c: 7
+  }
+}
+a {
+  b {
+    c: 8
+    c: 9
+    c: 10
+  }
+}
+
+Binary representation is as expected
+
+[[],[[]],[[1]],[[2,3],[4]]]
+[[[5,6,7]],[[8,9,10]]]
+[[],[[]],[[1]],[[2,3],[4]]]
+[[[5,6,7]],[[8,9,10]]]
--- a/tests/queries/0_stateless/00825_protobuf_format_array_3dim.sh
+++ b/tests/queries/0_stateless/00825_protobuf_format_array_3dim.sh
@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+set -eo pipefail
+
+# Run the client.
+$CLICKHOUSE_CLIENT --multiquery <<'EOF'
+DROP TABLE IF EXISTS array_3dim_protobuf_00825;
+
+CREATE TABLE array_3dim_protobuf_00825
+(
+    `a_b_c` Array(Array(Array(Int32)))
+) ENGINE = MergeTree ORDER BY tuple();
+
+INSERT INTO array_3dim_protobuf_00825 VALUES ([[], [[]], [[1]], [[2,3],[4]]]), ([[[5, 6, 7]], [[8, 9, 10]]]);
+
+SELECT * FROM array_3dim_protobuf_00825;
+EOF
+
+BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_array_3dim.XXXXXX.binary")
+$CLICKHOUSE_CLIENT --query "SELECT * FROM array_3dim_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_array_3dim:ABC'" > "$BINARY_FILE_PATH"
+
+# Check the output in the protobuf format
+echo
+$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_array_3dim:ABC" --input "$BINARY_FILE_PATH"
+
+# Check the input in the protobuf format (now the table contains the same data twice).
+echo
+$CLICKHOUSE_CLIENT --query "INSERT INTO array_3dim_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_array_3dim:ABC'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM array_3dim_protobuf_00825"
+
+rm "$BINARY_FILE_PATH"
--- a/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.proto
+++ b/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.proto
@ -0,0 +1,9 @@
+syntax = "proto3";
+
+message AA {
+    message nested_array {
+	    repeated double c = 2;
+    }
+    string a = 1;
+    repeated nested_array b = 2;
+}
--- a/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.reference
+++ b/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.reference
@ -0,0 +1,41 @@
+one	[[1,2,3],[0.5,0.25],[],[4,5],[0.125,0.0625],[6]]
+
+Binary representation:
+00000000  6b 0a 03 6f 6e 65 12 1a  12 18 00 00 00 00 00 00  |k..one..........|
+00000010  f0 3f 00 00 00 00 00 00  00 40 00 00 00 00 00 00  |.?.......@......|
+00000020  08 40 12 12 12 10 00 00  00 00 00 00 e0 3f 00 00  |.@...........?..|
+00000030  00 00 00 00 d0 3f 12 00  12 12 12 10 00 00 00 00  |.....?..........|
+00000040  00 00 10 40 00 00 00 00  00 00 14 40 12 12 12 10  |...@.......@....|
+00000050  00 00 00 00 00 00 c0 3f  00 00 00 00 00 00 b0 3f  |.......?.......?|
+00000060  12 0a 12 08 00 00 00 00  00 00 18 40              |...........@|
+0000006c
+
+MESSAGE #1 AT 0x00000001
+a: "one"
+b {
+  c: 1
+  c: 2
+  c: 3
+}
+b {
+  c: 0.5
+  c: 0.25
+}
+b {
+}
+b {
+  c: 4
+  c: 5
+}
+b {
+  c: 0.125
+  c: 0.0625
+}
+b {
+  c: 6
+}
+
+Binary representation is as expected
+
+one	[[1,2,3],[0.5,0.25],[],[4,5],[0.125,0.0625],[6]]
+one	[[1,2,3],[0.5,0.25],[],[4,5],[0.125,0.0625],[6]]
--- a/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.sh
+++ b/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.sh
@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+
+# https://github.com/ClickHouse/ClickHouse/issues/9069
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+set -eo pipefail
+
+# Run the client.
+$CLICKHOUSE_CLIENT --multiquery <<'EOF'
+CREATE TABLE array_of_arrays_protobuf_00825
+(
+    `a` String,
+    `b` Nested (
+        `c` Array(Float64)
+    )
+) ENGINE = MergeTree ORDER BY tuple();
+
+INSERT INTO array_of_arrays_protobuf_00825 VALUES ('one', [[1,2,3],[0.5,0.25],[],[4,5],[0.125,0.0625],[6]]);
+
+SELECT * FROM array_of_arrays_protobuf_00825;
+EOF
+
+BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_array_of_arrays.XXXXXX.binary")
+$CLICKHOUSE_CLIENT --query "SELECT * FROM array_of_arrays_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_array_of_arrays:AA'" > "$BINARY_FILE_PATH"
+
+# Check the output in the protobuf format
+echo
+$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_array_of_arrays:AA" --input "$BINARY_FILE_PATH"
+
+# Check the input in the protobuf format (now the table contains the same data twice).
+echo
+$CLICKHOUSE_CLIENT --query "INSERT INTO array_of_arrays_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_array_of_arrays:AA'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM array_of_arrays_protobuf_00825"
+
+rm "$BINARY_FILE_PATH"
--- a/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.proto
+++ b/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.proto
@ -0,0 +1,13 @@
+syntax = "proto3";
+
+message Message
+{
+  enum Enum
+  {
+    FIRST = 0;
+    SECOND = 1;
+    TEN = 10;
+    HUNDRED = 100;
+  };
+  Enum x = 1;
+};
--- a/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.reference
+++ b/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.reference
@ -0,0 +1,31 @@
+Second
+Third
+First
+First
+Second
+
+Binary representation:
+00000000  02 08 01 02 08 64 00 00  02 08 01                 |.....d.....|
+0000000b
+
+MESSAGE #1 AT 0x00000001
+x: SECOND
+MESSAGE #2 AT 0x00000004
+x: HUNDRED
+MESSAGE #3 AT 0x00000007
+MESSAGE #4 AT 0x00000008
+MESSAGE #5 AT 0x00000009
+x: SECOND
+
+Binary representation is as expected
+
+Second
+Third
+First
+First
+Second
+Second
+Third
+First
+First
+Second
--- a/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.sh
+++ b/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.sh
@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+
+# https://github.com/ClickHouse/ClickHouse/issues/7438
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+set -eo pipefail
+
+# Run the client.
+$CLICKHOUSE_CLIENT --multiquery <<'EOF'
+DROP TABLE IF EXISTS enum_mapping_protobuf_00825;
+
+CREATE TABLE enum_mapping_protobuf_00825
+(
+  x Enum16('First'=-100, 'Second'=0, 'Third'=100)
+) ENGINE = MergeTree ORDER BY tuple();
+
+INSERT INTO enum_mapping_protobuf_00825 VALUES ('Second'), ('Third'), ('First'), ('First'), ('Second');
+
+SELECT * FROM enum_mapping_protobuf_00825;
+EOF
+
+BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_enum_mapping.XXXXXX.binary")
+$CLICKHOUSE_CLIENT --query "SELECT * FROM enum_mapping_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_enum_mapping:Message'" > "$BINARY_FILE_PATH"
+
+# Check the output in the protobuf format
+echo
+$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_enum_mapping:Message" --input "$BINARY_FILE_PATH"
+
+# Check the input in the protobuf format (now the table contains the same data twice).
+echo
+$CLICKHOUSE_CLIENT --query "INSERT INTO enum_mapping_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_enum_mapping:Message'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM enum_mapping_protobuf_00825"
+
+rm "$BINARY_FILE_PATH"
--- a/tests/queries/0_stateless/00825_protobuf_format_map.proto
+++ b/tests/queries/0_stateless/00825_protobuf_format_map.proto
@ -0,0 +1,5 @@
+syntax = "proto3";
+
+message Message {
+  map<string, uint32> a = 1;
+};
--- a/tests/queries/0_stateless/00825_protobuf_format_map.reference
+++ b/tests/queries/0_stateless/00825_protobuf_format_map.reference
@ -0,0 +1,19 @@
+{'x':5,'y':7}
+{'z':11}
+{'temp':0}
+{'':0}
+
+Binary representation:
+00000000  0e 0a 05 0a 01 78 10 05  0a 05 0a 01 79 10 07 07  |.....x......y...|
+00000010  0a 05 0a 01 7a 10 0b 0a  0a 08 0a 04 74 65 6d 70  |....z.......temp|
+00000020  10 00 06 0a 04 0a 00 10  00                       |.........|
+00000029
+
+{'x':5,'y':7}
+{'z':11}
+{'temp':0}
+{'':0}
+{'x':5,'y':7}
+{'z':11}
+{'temp':0}
+{'':0}
--- a/tests/queries/0_stateless/00825_protobuf_format_map.sh
+++ b/tests/queries/0_stateless/00825_protobuf_format_map.sh
@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+
+# https://github.com/ClickHouse/ClickHouse/issues/6497
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+set -eo pipefail
+
+# Run the client.
+$CLICKHOUSE_CLIENT --multiquery <<'EOF'
+SET allow_experimental_map_type = 1;
+
+DROP TABLE IF EXISTS map_00825;
+
+CREATE TABLE map_00825
+(
+  a Map(String, UInt32)
+) ENGINE = MergeTree ORDER BY tuple();
+
+INSERT INTO map_00825 VALUES ({'x':5, 'y':7}), ({'z':11}), ({'temp':0}), ({'':0});
+
+SELECT * FROM map_00825;
+EOF
+
+BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_map.XXXXXX.binary")
+$CLICKHOUSE_CLIENT --query "SELECT * FROM map_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_map:Message'" > "$BINARY_FILE_PATH"
+
+# Check the output in the protobuf format
+echo
+echo "Binary representation:"
+hexdump -C $BINARY_FILE_PATH
+
+# Check the input in the protobuf format (now the table contains the same data twice).
+echo
+$CLICKHOUSE_CLIENT --query "INSERT INTO map_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_map:Message'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM map_00825"
+
+rm "$BINARY_FILE_PATH"
--- a/tests/queries/0_stateless/00825_protobuf_format_nested_optional.proto
+++ b/tests/queries/0_stateless/00825_protobuf_format_nested_optional.proto
@ -0,0 +1,10 @@
+syntax = "proto3";
+
+message Repeated {
+  string foo = 1;
+  int64 bar = 2;
+}
+
+message Message {
+  repeated Repeated messages = 1;
+};
--- a/tests/queries/0_stateless/00825_protobuf_format_nested_optional.reference
+++ b/tests/queries/0_stateless/00825_protobuf_format_nested_optional.reference
@ -0,0 +1,25 @@
+['1']	[0]
+['1','']	[0,1]
+
+Binary representation:
+00000000  05 0a 03 0a 01 31 09 0a  03 0a 01 31 0a 02 10 01  |.....1.....1....|
+00000010
+
+MESSAGE #1 AT 0x00000001
+messages {
+  foo: "1"
+}
+MESSAGE #2 AT 0x00000007
+messages {
+  foo: "1"
+}
+messages {
+  bar: 1
+}
+
+Binary representation is as expected
+
+['1']	[0]
+['1','']	[0,1]
+['1']	[0]
+['1','']	[0,1]
--- a/tests/queries/0_stateless/00825_protobuf_format_nested_optional.sh
+++ b/tests/queries/0_stateless/00825_protobuf_format_nested_optional.sh
@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+
+# https://github.com/ClickHouse/ClickHouse/issues/6497
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+set -eo pipefail
+
+# Run the client.
+$CLICKHOUSE_CLIENT --multiquery <<'EOF'
+DROP TABLE IF EXISTS nested_optional_protobuf_00825;
+
+CREATE TABLE nested_optional_protobuf_00825
+(
+  messages Nested
+  (
+    foo String,
+    bar Int64
+  )
+) ENGINE = MergeTree ORDER BY tuple();
+
+INSERT INTO nested_optional_protobuf_00825 VALUES (['1'], [0]), (['1', ''], [0, 1]);
+
+SELECT * FROM nested_optional_protobuf_00825;
+EOF
+
+BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_nested_optional.XXXXXX.binary")
+$CLICKHOUSE_CLIENT --query "SELECT * FROM nested_optional_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_nested_optional:Message'" > "$BINARY_FILE_PATH"
+
+# Check the output in the protobuf format
+echo
+$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_nested_optional:Message" --input "$BINARY_FILE_PATH"
+
+# Check the input in the protobuf format (now the table contains the same data twice).
+echo
+$CLICKHOUSE_CLIENT --query "INSERT INTO nested_optional_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_nested_optional:Message'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM nested_optional_protobuf_00825"
+
+rm "$BINARY_FILE_PATH"
--- a/tests/queries/0_stateless/00825_protobuf_format_table_default.proto
+++ b/tests/queries/0_stateless/00825_protobuf_format_table_default.proto
@ -0,0 +1,6 @@
+syntax = "proto3";
+
+message Message {
+  sint32 x = 1;
+  sint32 z = 2;
+};
--- a/tests/queries/0_stateless/00825_protobuf_format_table_default.reference
+++ b/tests/queries/0_stateless/00825_protobuf_format_table_default.reference
@ -0,0 +1,37 @@
+0	0	0
+2	4	8
+3	9	27
+5	25	125
+101	102	103
+
+Binary representation:
+00000000  00 04 08 04 10 10 04 08  06 10 36 05 08 0a 10 fa  |..........6.....|
+00000010  01 06 08 ca 01 10 ce 01                           |........|
+00000018
+
+MESSAGE #1 AT 0x00000001
+MESSAGE #2 AT 0x00000002
+x: 2
+z: 8
+MESSAGE #3 AT 0x00000007
+x: 3
+z: 27
+MESSAGE #4 AT 0x0000000C
+x: 5
+z: 125
+MESSAGE #5 AT 0x00000012
+x: 101
+z: 103
+
+Binary representation is as expected
+
+0	0	0
+0	0	0
+2	4	8
+2	4	8
+3	9	27
+3	9	27
+5	25	125
+5	25	125
+101	102	103
+101	10201	103
--- a/tests/queries/0_stateless/00825_protobuf_format_table_default.sh
+++ b/tests/queries/0_stateless/00825_protobuf_format_table_default.sh
@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+set -eo pipefail
+
+# Run the client.
+$CLICKHOUSE_CLIENT --multiquery <<'EOF'
+DROP TABLE IF EXISTS table_default_protobuf_00825;
+
+CREATE TABLE table_default_protobuf_00825
+(
+  x Int64,
+  y Int64 DEFAULT x * x,
+  z Int64 DEFAULT x * x * x
+) ENGINE = MergeTree ORDER BY tuple();
+
+INSERT INTO table_default_protobuf_00825 (x) VALUES (0), (2), (3), (5);
+INSERT INTO table_default_protobuf_00825 VALUES (101, 102, 103);
+
+SELECT * FROM table_default_protobuf_00825 ORDER BY x,y,z;
+EOF
+
+BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_table_default.XXXXXX.binary")
+$CLICKHOUSE_CLIENT --query "SELECT * FROM table_default_protobuf_00825 ORDER BY x,y,z FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_table_default:Message'" > "$BINARY_FILE_PATH"
+
+# Check the output in the protobuf format
+echo
+$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_table_default:Message" --input "$BINARY_FILE_PATH"
+
+# Check the input in the protobuf format (now the table contains the same data twice).
+echo
+$CLICKHOUSE_CLIENT --query "INSERT INTO table_default_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_table_default:Message'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM table_default_protobuf_00825 ORDER BY x,y,z"
+
+rm "$BINARY_FILE_PATH"
--- a/tests/queries/0_stateless/01674_htm_xml_coarse_parse.reference
+++ b/tests/queries/0_stateless/01674_htm_xml_coarse_parse.reference
@ -0,0 +1,9 @@
+
+
+Here is CDTATA.
+This is a white space test.
+This is a complex test. <script type="text/javascript">Hello, world</script> world <style> hello
+hello, world
+
+hello, world
+white space collapse
--- a/tests/queries/0_stateless/01674_htm_xml_coarse_parse.sql
+++ b/tests/queries/0_stateless/01674_htm_xml_coarse_parse.sql
@ -0,0 +1,15 @@
+SELECT htmlOrXmlCoarseParse('<script>Here is script.</script>');
+SELECT htmlOrXmlCoarseParse('<style>Here is style.</style>');
+SELECT htmlOrXmlCoarseParse('<![CDATA[Here is CDTATA.]]>');
+SELECT htmlOrXmlCoarseParse('This is a     white   space test.');
+SELECT htmlOrXmlCoarseParse('This is a complex test. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"\n "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><![CDATA[<script type="text/javascript">Hello, world</script> ]]><hello />world<![CDATA[ <style> ]]> hello</style>\n<script><![CDATA[</script>]]>hello</script>\n</html>');
+DROP TABLE IF EXISTS defaults;
+CREATE TABLE defaults
+(
+    stringColumn String
+) ENGINE = Memory();
+
+INSERT INTO defaults values ('<common tag>hello, world<tag>'), ('<script desc=content> some content </script>'), ('<![CDATA[hello, world]]>'), ('white space    collapse');
+
+SELECT htmlOrXmlCoarseParse(stringColumn) FROM defaults;
+DROP table defaults;
--- a/tests/queries/0_stateless/01720_dictionary_create_source_with_functions.reference
+++ b/tests/queries/0_stateless/01720_dictionary_create_source_with_functions.reference
@ -0,0 +1 @@
+1	First
--- a/tests/queries/0_stateless/01720_dictionary_create_source_with_functions.sql
+++ b/tests/queries/0_stateless/01720_dictionary_create_source_with_functions.sql
@ -0,0 +1,28 @@
+DROP DATABASE IF EXISTS 01720_dictionary_db;
+CREATE DATABASE 01720_dictionary_db;
+
+CREATE TABLE 01720_dictionary_db.dictionary_source_table
+(
+	key UInt8,
+    value String
+)
+ENGINE = TinyLog;
+
+INSERT INTO 01720_dictionary_db.dictionary_source_table VALUES (1, 'First');
+
+CREATE DICTIONARY 01720_dictionary_db.dictionary
+(
+    key UInt64,
+    value String
+)
+PRIMARY KEY key
+SOURCE(CLICKHOUSE(DB '01720_dictionary_db' TABLE 'dictionary_source_table' HOST hostName() PORT tcpPort()))
+LIFETIME(0)
+LAYOUT(FLAT());
+
+SELECT * FROM 01720_dictionary_db.dictionary;
+
+DROP DICTIONARY 01720_dictionary_db.dictionary;
+DROP TABLE 01720_dictionary_db.dictionary_source_table;
+
+DROP DATABASE 01720_dictionary_db;
--- a/tests/queries/0_stateless/arcadia_skip_list.txt
+++ b/tests/queries/0_stateless/arcadia_skip_list.txt
@ -197,6 +197,7 @@
 01181_db_atomic_drop_on_cluster
 01658_test_base64Encode_mysql_compatibility
 01659_test_base64Decode_mysql_compatibility
+01674_htm_xml_coarse_parse
 01675_data_type_coroutine
 01676_clickhouse_client_autocomplete
 01671_aggregate_function_group_bitmap_data
--- a/tests/queries/0_stateless/helpers/protobuf_length_delimited_encoder.py
+++ b/tests/queries/0_stateless/helpers/protobuf_length_delimited_encoder.py
@ -0,0 +1,180 @@
+#!/usr/bin/env python3
+
+# The protobuf compiler protoc doesn't support encoding or decoding length-delimited protobuf message.
+# To do that this script has been written. 
+
+import argparse
+import os.path
+import struct
+import subprocess
+import sys
+import tempfile
+
+def read_varint(input):
+    res = 0
+    shift = 0
+    while True:
+        c = input.read(1)
+        if len(c) == 0:
+            return None
+        b = c[0]
+        if b < 0x80:
+            res += b << shift
+            break
+        b -= 0x80
+        res += b << shift
+        shift = shift << 7
+    return res
+
+def write_varint(output, value):
+    while True:
+        if value < 0x80:
+            b = value
+            output.write(b.to_bytes(1, byteorder='little'))
+            break
+        b = (value & 0x7F) + 0x80
+        output.write(b.to_bytes(1, byteorder='little'))
+        value = value >> 7
+
+def write_hexdump(output, data):
+    with subprocess.Popen(["hexdump", "-C"], stdin=subprocess.PIPE, stdout=output, shell=False) as proc:
+        proc.communicate(data)
+        if proc.returncode != 0:
+            raise RuntimeError("hexdump returned code " + str(proc.returncode))
+    output.flush()
+
+class FormatSchemaSplitted:
+    def __init__(self, format_schema):
+        self.format_schema = format_schema
+        splitted = self.format_schema.split(':')
+        if len(splitted) < 2:
+            raise RuntimeError('The format schema must have the format "schemafile:MessageType"')
+        path = splitted[0]
+        self.schemadir = os.path.dirname(path)
+        self.schemaname = os.path.basename(path)
+        if not self.schemaname.endswith(".proto"):
+            self.schemaname = self.schemaname + ".proto"
+        self.message_type = splitted[1]
+
+def decode(input, output, format_schema):
+    if not type(format_schema) is FormatSchemaSplitted:
+        format_schema = FormatSchemaSplitted(format_schema)
+    msgindex = 1
+    while True:
+        sz = read_varint(input)
+        if sz is None:
+            break
+        output.write("MESSAGE #{msgindex} AT 0x{msgoffset:08X}\n".format(msgindex=msgindex, msgoffset=input.tell()).encode())
+        output.flush()
+        msg = input.read(sz)
+        if len(msg) < sz:
+            raise EOFError('Unexpected end of file')
+        with subprocess.Popen(["protoc",
+                                "--decode", format_schema.message_type, format_schema.schemaname],
+                              cwd=format_schema.schemadir,
+                              stdin=subprocess.PIPE,
+                              stdout=output,
+                              shell=False) as proc:
+            proc.communicate(msg)
+            if proc.returncode != 0:
+                raise RuntimeError("protoc returned code " + str(proc.returncode))
+        output.flush()
+        msgindex = msgindex + 1
+
+def encode(input, output, format_schema):
+    if not type(format_schema) is FormatSchemaSplitted:
+        format_schema = FormatSchemaSplitted(format_schema)
+    line_offset = input.tell()
+    line = input.readline()
+    while True:
+        if len(line) == 0:
+            break
+        if not line.startswith(b"MESSAGE #"):
+            raise RuntimeError("The line at 0x{line_offset:08X} must start with the text 'MESSAGE #'".format(line_offset=line_offset))
+        msg = b""
+        while True:
+            line_offset = input.tell()
+            line = input.readline()
+            if line.startswith(b"MESSAGE #") or len(line) == 0:
+                break
+            msg += line
+        with subprocess.Popen(["protoc",
+                                "--encode", format_schema.message_type, format_schema.schemaname],
+                              cwd=format_schema.schemadir,
+                              stdin=subprocess.PIPE,
+                              stdout=subprocess.PIPE,
+                              shell=False) as proc:
+            msgbin = proc.communicate(msg)[0]
+            if proc.returncode != 0:
+                raise RuntimeError("protoc returned code " + str(proc.returncode))
+        write_varint(output, len(msgbin))
+        output.write(msgbin)
+        output.flush()
+
+def decode_and_check(input, output, format_schema):
+    input_data = input.read()
+    output.write(b"Binary representation:\n")
+    output.flush()
+    write_hexdump(output, input_data)
+    output.write(b"\n")
+    output.flush()
+
+    with tempfile.TemporaryFile() as tmp_input, tempfile.TemporaryFile() as tmp_decoded, tempfile.TemporaryFile() as tmp_encoded:
+        tmp_input.write(input_data)
+        tmp_input.flush()
+        tmp_input.seek(0)
+        decode(tmp_input, tmp_decoded, format_schema)
+        tmp_decoded.seek(0)
+        decoded_text = tmp_decoded.read()
+        output.write(decoded_text)
+        output.flush()
+        tmp_decoded.seek(0)
+        encode(tmp_decoded, tmp_encoded, format_schema)
+        tmp_encoded.seek(0)
+        encoded_data = tmp_encoded.read()
+
+    if encoded_data == input_data:
+        output.write(b"\nBinary representation is as expected\n")
+        output.flush()
+    else:
+        output.write(b"\nBinary representation differs from the expected one (listed below):\n")
+        output.flush()
+        write_hexdump(output, encoded_data)
+        sys.exit(1)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Encodes or decodes length-delimited protobuf messages.')
+    parser.add_argument('--input', help='The input file, the standard input will be used if not specified.')
+    parser.add_argument('--output', help='The output file, the standard output will be used if not specified')
+    parser.add_argument('--format_schema', required=True, help='Format schema in the format "schemafile:MessageType"')
+    group = parser.add_mutually_exclusive_group(required=True)
+    group.add_argument('--encode', action='store_true', help='Specify to encode length-delimited messages.'
+                       'The utility will read text-format messages of the given type from the input and write it in binary to the output.')
+    group.add_argument('--decode', action='store_true', help='Specify to decode length-delimited messages.'
+                       'The utility will read messages in binary from the input and write text-format messages to the output.')
+    group.add_argument('--decode_and_check', action='store_true', help='The same as --decode, and the utility will then encode '
+                       ' the decoded data back to the binary form to check that the result of that encoding is the same as the input was.')
+    args = parser.parse_args()
+    
+    custom_input_file = None
+    custom_output_file = None
+    try:
+        if args.input:
+            custom_input_file = open(args.input, "rb")
+        if args.output:
+            custom_output_file = open(args.output, "wb")
+        input = custom_input_file if custom_input_file else sys.stdin.buffer
+        output = custom_output_file if custom_output_file else sys.stdout.buffer
+
+        if args.encode:
+            encode(input, output, args.format_schema)
+        elif args.decode:
+            decode(input, output, args.format_schema)
+        elif args.decode_and_check:
+            decode_and_check(input, output, args.format_schema)
+
+    finally:
+        if custom_input_file:
+            custom_input_file.close()
+        if custom_output_file:
+            custom_output_file.close()
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@ -122,6 +122,12 @@
        "00763_create_query_as_table_engine_bug",
        "00765_sql_compatibility_aliases",
        "00825_protobuf_format_input",
+        "00825_protobuf_format_nested_optional",
+        "00825_protobuf_format_array_3dim",
+        "00825_protobuf_format_map",
+        "00825_protobuf_format_array_of_arrays",
+        "00825_protobuf_format_table_default",
+        "00825_protobuf_format_enum_mapping",
        "00826_cross_to_inner_join",
        "00834_not_between",
        "00909_kill_not_initialized_query",