From 61212635d829c056402893510e8e50e044154992 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Wed, 30 Mar 2022 16:03:36 +0200
Subject: [PATCH 01/22] MergeTree multiple ORDER BY columns improve insert
 performance

---
 src/Columns/ColumnVector.cpp | 54 +++++++++++++++++++++++++++++++++++-
 1 file changed, 53 insertions(+), 1 deletion(-)
diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp
index dded5ff6c99..82e2bba04d7 100644
--- a/src/Columns/ColumnVector.cpp
+++ b/src/Columns/ColumnVector.cpp
@@ -275,7 +275,59 @@ template <typename T>
 void ColumnVector<T>::updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
                                     size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
 {
-    auto sort = [](auto begin, auto end, auto pred) { ::sort(begin, end, pred); };
+    bool reverse = direction == IColumn::PermutationSortDirection::Descending;
+    bool ascending = direction == IColumn::PermutationSortDirection::Ascending;
+    bool sort_is_stable = stability == IColumn::PermutationSortStability::Stable;
+
+    auto sort = [&](auto begin, auto end, auto pred)
+    {
+        /// A case for radix sort
+        if constexpr (is_arithmetic_v<T> && !is_big_int_v<T>)
+        {
+            /// TODO: LSD RadixSort is currently not stable if direction is descending, or value is floating point
+            bool use_radix_sort = (sort_is_stable && ascending && !std::is_floating_point_v<T>) || !sort_is_stable;
+            size_t size = end - begin;
+
+            /// Thresholds on size. Lower threshold is arbitrary. Upper threshold is chosen by the type for histogram counters.
+            if (size >= 256 && size <= std::numeric_limits<UInt32>::max() && use_radix_sort)
+            {
+                PaddedPODArray<ValueWithIndex<T>> pairs(size);
+                size_t index = 0;
+
+                for (auto it = begin; it != end; ++it)
+                {
+                    pairs[index] = {data[*it], static_cast<UInt32>(*it)};
+                    ++index;
+                }
+
+                RadixSort<RadixSortTraits<T>>::executeLSD(pairs.data(), size, reverse, begin);
+
+                /// Radix sort treats all NaNs to be greater than all numbers.
+                /// If the user needs the opposite, we must move them accordingly.
+                if (std::is_floating_point_v<T> && nan_direction_hint < 0)
+                {
+                    size_t nans_to_move = 0;
+
+                    for (size_t i = 0; i < size; ++i)
+                    {
+                        if (isNaN(data[begin[reverse ? i : size - 1 - i]]))
+                            ++nans_to_move;
+                        else
+                            break;
+                    }
+
+                    if (nans_to_move)
+                    {
+                        std::rotate(begin, begin + (reverse ? nans_to_move : size - nans_to_move), end);
+                    }
+                }
+
+                return;
+            }
+        }
+
+        ::sort(begin, end, pred);
+    };
     auto partial_sort = [](auto begin, auto mid, auto end, auto pred) { ::partial_sort(begin, mid, end, pred); };
 
     if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable)

From 196ff48f3446529bb018ca34d5f1354187ee126a Mon Sep 17 00:00:00 2001
From: Kuz Le <k.leshakov@yandex.ru>
Date: Wed, 25 May 2022 12:55:55 +0300
Subject: [PATCH 02/22] Update delete-old-data.md
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Изменение смысла предложения в переводе на русский язык в статье по удалению старых данных
---
 docs/ru/faq/operations/delete-old-data.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/ru/faq/operations/delete-old-data.md b/docs/ru/faq/operations/delete-old-data.md
index 92736ef5205..ab221f8303b 100644
--- a/docs/ru/faq/operations/delete-old-data.md
+++ b/docs/ru/faq/operations/delete-old-data.md
@@ -22,9 +22,9 @@ ClickHouse позволяет автоматически удалять данн
 
 ClickHouse не удаляет данные в реальном времени, как СУБД [OLTP](https://en.wikipedia.org/wiki/Online_transaction_processing). Больше всего на такое удаление похожи мутации. Они выполняются с помощью запросов `ALTER ... DELETE` или `ALTER ... UPDATE`. В отличие от обычных запросов `DELETE` и `UPDATE`, мутации выполняются асинхронно, в пакетном режиме, не в реальном времени. В остальном после слов `ALTER TABLE` синтаксис обычных запросов и мутаций одинаковый.
 
-`ALTER DELETE` можно использовать для гибкого удаления устаревших данных. Если вам нужно делать это регулярно, единственный недостаток такого подхода будет заключаться в том, что потребуется внешняя система для запуска запроса. Кроме того, могут возникнуть некоторые проблемы с производительностью, поскольку мутации перезаписывают целые куски данных если в них содержится хотя бы одна строка, которую нужно удалить.
+`ALTER DELETE` можно использовать для гибкого удаления устаревших данных. Если вам нужно делать это регулярно, основной недостаток такого подхода будет заключаться в том, что потребуется внешняя система для запуска запроса. Кроме того, могут возникнуть некоторые проблемы с производительностью, поскольку мутации перезаписывают целые куски данных если в них содержится хотя бы одна строка, которую нужно удалить.
 
-Это самый распространенный подход к тому, чтобы обеспечить соблюдение принципов [GDPR](https://gdpr-info.eu) в вашей системе на ClickHouse.
+Это - самый распространенный подход к тому, чтобы обеспечить соблюдение принципов [GDPR](https://gdpr-info.eu) в вашей системе на ClickHouse.
 
 Подробнее смотрите в разделе [Мутации](../../sql-reference/statements/alter/index.md#alter-mutations).
 

From 7817d6aea37bb6150e96d92b1a8abb32c115d40c Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Wed, 25 May 2022 11:20:28 +0000
Subject: [PATCH 03/22] Support Maps and Records in Avro format

---
 .../Formats/Impl/AvroRowInputFormat.cpp       | 113 ++++++++++++++----
 .../Formats/Impl/AvroRowInputFormat.h         |   8 +-
 .../Formats/Impl/AvroRowOutputFormat.cpp      |  90 +++++++++++---
 .../Formats/Impl/AvroRowOutputFormat.h        |   6 +-
 .../02313_avro_records_and_maps.reference     |  25 ++++
 .../02313_avro_records_and_maps.sql           |  24 ++++
 6 files changed, 216 insertions(+), 50 deletions(-)
 create mode 100644 tests/queries/0_stateless/02313_avro_records_and_maps.reference
 create mode 100644 tests/queries/0_stateless/02313_avro_records_and_maps.sql

diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
index 24de8c65eeb..40dc49a8bd4 100644
--- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
@@ -4,7 +4,6 @@
 
 #include <numeric>
 
-#include <Core/Defines.h>
 #include <Core/Field.h>
 
 #include <Common/LRUCache.h>
@@ -13,12 +12,9 @@
 #include <IO/ReadHelpers.h>
 #include <IO/HTTPCommon.h>
 
-#include <Formats/verbosePrintString.h>
 #include <Formats/FormatFactory.h>
 
 #include <DataTypes/DataTypeArray.h>
-#include <DataTypes/DataTypeDate.h>
-#include <DataTypes/DataTypeDateTime.h>
 #include <DataTypes/DataTypeDateTime64.h>
 #include <DataTypes/DataTypeEnum.h>
 #include <DataTypes/DataTypeFixedString.h>
@@ -28,7 +24,7 @@
 #include <DataTypes/DataTypeTuple.h>
 #include <DataTypes/DataTypeUUID.h>
 #include <DataTypes/IDataType.h>
-#include <DataTypes/getLeastSupertype.h>
+#include <DataTypes/DataTypeMap.h>
 
 #include <Columns/ColumnArray.h>
 #include <Columns/ColumnFixedString.h>
@@ -36,33 +32,24 @@
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnsNumber.h>
 #include <Columns/ColumnLowCardinality.h>
+#include <Columns/ColumnTuple.h>
+#include <Columns/ColumnMap.h>
 
-#include <avro/Compiler.hh>
-#include <avro/DataFile.hh>
-#include <avro/Decoder.hh>
-#include <avro/Encoder.hh>
-#include <avro/Generic.hh>
-#include <avro/GenericDatum.hh>
-#include <avro/Node.hh>
-#include <avro/NodeConcepts.hh>
-#include <avro/NodeImpl.hh>
-#include <avro/Reader.hh>
-#include <avro/Schema.hh>
-#include <avro/Specific.hh>
-#include <avro/Types.hh>
-#include <avro/ValidSchema.hh>
-#include <avro/Writer.hh>
+#include <Compiler.hh>
+#include <DataFile.hh>
+#include <Decoder.hh>
+#include <Node.hh>
+#include <NodeConcepts.hh>
+#include <NodeImpl.hh>
+#include <Types.hh>
+#include <ValidSchema.hh>
 
-#include <Poco/BinaryReader.h>
 #include <Poco/Buffer.h>
 #include <Poco/JSON/JSON.h>
 #include <Poco/JSON/Object.h>
 #include <Poco/JSON/Parser.h>
-#include <Poco/MemoryStream.h>
-#include <Poco/Net/HTTPClientSession.h>
 #include <Poco/Net/HTTPRequest.h>
 #include <Poco/Net/HTTPResponse.h>
-#include <Poco/Poco.h>
 #include <Poco/URI.h>
 
 
@@ -386,8 +373,69 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node
         }
         case avro::AVRO_SYMBOLIC:
             return createDeserializeFn(avro::resolveSymbol(root_node), target_type);
-        case avro::AVRO_MAP:
         case avro::AVRO_RECORD:
+        {
+            if (target.isTuple())
+            {
+                const DataTypeTuple & tuple_type = assert_cast<const DataTypeTuple &>(*target_type);
+                const auto & nested_types = tuple_type.getElements();
+                std::vector<std::pair<DeserializeFn, size_t>> nested_deserializers;
+                nested_deserializers.reserve(root_node->leaves());
+                if (root_node->leaves() != nested_types.size())
+                    throw Exception(ErrorCodes::INCORRECT_DATA, "The number of leaves in record doesn't match the number of elements in tuple");
+
+                for (size_t i = 0; i != root_node->leaves(); ++i)
+                {
+                    const auto & name = root_node->nameAt(i);
+                    size_t pos = tuple_type.getPositionByName(name);
+                    auto nested_deserializer = createDeserializeFn(root_node->leafAt(i), nested_types[pos]);
+                    nested_deserializers.emplace_back(nested_deserializer, pos);
+                }
+
+                return [nested_deserializers](IColumn & column, avro::Decoder & decoder)
+                {
+                    ColumnTuple & column_tuple = assert_cast<ColumnTuple &>(column);
+                    auto nested_columns = column_tuple.getColumns();
+                    for (const auto & [nested_deserializer, pos] : nested_deserializers)
+                        nested_deserializer(*nested_columns[pos], decoder);
+                };
+            }
+            break;
+        }
+        case avro::AVRO_MAP:
+        {
+            if (target.isMap())
+            {
+                const auto & map_type = assert_cast<const DataTypeMap &>(*target_type);
+                const auto & keys_type = map_type.getKeyType();
+                const auto & values_type = map_type.getValueType();
+                auto keys_source_type = root_node->leafAt(0);
+                auto values_source_type = root_node->leafAt(1);
+                auto keys_deserializer = createDeserializeFn(keys_source_type, keys_type);
+                auto values_deserializer = createDeserializeFn(values_source_type, values_type);
+                return [keys_deserializer, values_deserializer](IColumn & column, avro::Decoder & decoder)
+                {
+                    ColumnMap & column_map = assert_cast<ColumnMap &>(column);
+                    ColumnArray & column_array = column_map.getNestedColumn();
+                    ColumnArray::Offsets & offsets = column_array.getOffsets();
+                    ColumnTuple & nested_columns = column_map.getNestedData();
+                    IColumn & keys_column = nested_columns.getColumn(0);
+                    IColumn & values_column = nested_columns.getColumn(1);
+                    size_t total = 0;
+                    for (size_t n = decoder.mapStart(); n != 0; n = decoder.mapNext())
+                    {
+                        total += n;
+                        for (size_t i = 0; i < n; ++i)
+                        {
+                            keys_deserializer(keys_column, decoder);
+                            values_deserializer(values_column, decoder);
+                        }
+                    }
+                    offsets.push_back(offsets.back() + total);
+                };
+            }
+            break;
+        }
         default:
             break;
     }
@@ -896,6 +944,21 @@ DataTypePtr AvroSchemaReader::avroNodeToDataType(avro::NodePtr node)
             throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Avro type  UNION is not supported for inserting.");
         case avro::Type::AVRO_SYMBOLIC:
             return avroNodeToDataType(avro::resolveSymbol(node));
+        case avro::Type::AVRO_RECORD:
+        {
+            DataTypes nested_types;
+            nested_types.reserve(node->leaves());
+            Names nested_names;
+            nested_names.reserve(node->leaves());
+            for (size_t i = 0; i != node->leaves(); ++i)
+            {
+                nested_types.push_back(avroNodeToDataType(node->leafAt(i)));
+                nested_names.push_back(node->nameAt(i));
+            }
+            return std::make_shared<DataTypeTuple>(nested_types, nested_names);
+        }
+        case avro::Type::AVRO_MAP:
+            return std::make_shared<DataTypeMap>(avroNodeToDataType(node->leafAt(0)), avroNodeToDataType(node->leafAt(1)));
         default:
             throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Avro column {} is not supported for inserting.");
     }
diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.h b/src/Processors/Formats/Impl/AvroRowInputFormat.h
index 7a598de1f6a..3fde0e14ede 100644
--- a/src/Processors/Formats/Impl/AvroRowInputFormat.h
+++ b/src/Processors/Formats/Impl/AvroRowInputFormat.h
@@ -15,10 +15,10 @@
 #include <Processors/Formats/IRowInputFormat.h>
 #include <Processors/Formats/ISchemaReader.h>
 
-#include <avro/DataFile.hh>
-#include <avro/Decoder.hh>
-#include <avro/Schema.hh>
-#include <avro/ValidSchema.hh>
+#include <DataFile.hh>
+#include <Decoder.hh>
+#include <Schema.hh>
+#include <ValidSchema.hh>
 
 
 namespace DB
diff --git a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp
index 7b86dcd4a64..ddee20c187b 100644
--- a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp
@@ -1,23 +1,21 @@
 #include "AvroRowOutputFormat.h"
 #if USE_AVRO
 
-#include <Core/Defines.h>
 #include <Core/Field.h>
-#include <IO/Operators.h>
 #include <IO/WriteBuffer.h>
 #include <IO/WriteHelpers.h>
 
-#include <Formats/verbosePrintString.h>
 #include <Formats/FormatFactory.h>
 
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeDate.h>
-#include <DataTypes/DataTypeDateTime.h>
 #include <DataTypes/DataTypeDateTime64.h>
 #include <DataTypes/DataTypeEnum.h>
 #include <DataTypes/DataTypeLowCardinality.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeUUID.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeMap.h>
 
 #include <Columns/ColumnArray.h>
 #include <Columns/ColumnFixedString.h>
@@ -25,21 +23,13 @@
 #include <Columns/ColumnNullable.h>
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnsNumber.h>
+#include <Columns/ColumnTuple.h>
+#include <Columns/ColumnMap.h>
 
-#include <avro/Compiler.hh>
-#include <avro/DataFile.hh>
-#include <avro/Decoder.hh>
-#include <avro/Encoder.hh>
-#include <avro/Generic.hh>
-#include <avro/GenericDatum.hh>
-#include <avro/Node.hh>
-#include <avro/NodeConcepts.hh>
-#include <avro/NodeImpl.hh>
-#include <avro/Reader.hh>
-#include <avro/Schema.hh>
-#include <avro/Specific.hh>
-#include <avro/ValidSchema.hh>
-#include <avro/Writer.hh>
+#include <DataFile.hh>
+#include <Encoder.hh>
+#include <Node.hh>
+#include <Schema.hh>
 
 #include <re2/re2.h>
 
@@ -321,6 +311,70 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF
         }
         case TypeIndex::Nothing:
             return {avro::NullSchema(), [](const IColumn &, size_t, avro::Encoder & encoder) { encoder.encodeNull(); }};
+        case TypeIndex::Tuple:
+        {
+            const auto & tuple_type = assert_cast<const DataTypeTuple &>(*data_type);
+            const auto & nested_types = tuple_type.getElements();
+            const auto & nested_names = tuple_type.getElementNames();
+            std::vector<SerializeFn> nested_serializers;
+            nested_serializers.reserve(nested_types.size());
+            auto schema = avro::RecordSchema(column_name);
+            for (size_t i = 0; i != nested_types.size(); ++i)
+            {
+                auto nested_mapping = createSchemaWithSerializeFn(nested_types[i], type_name_increment, nested_names[i]);
+                schema.addField(nested_names[i], nested_mapping.schema);
+                nested_serializers.push_back(nested_mapping.serialize);
+            }
+
+            return {schema, [nested_serializers](const IColumn & column, size_t row_num, avro::Encoder & encoder)
+            {
+                const ColumnTuple & column_tuple = assert_cast<const ColumnTuple &>(column);
+                const auto & nested_columns = column_tuple.getColumns();
+                for (size_t i = 0; i != nested_serializers.size(); ++i)
+                    nested_serializers[i](*nested_columns[i], row_num, encoder);
+            }};
+        }
+        case TypeIndex::Map:
+        {
+            const auto & map_type = assert_cast<const DataTypeMap &>(*data_type);
+            const auto & keys_type = map_type.getKeyType();
+            if (!isStringOrFixedString(keys_type))
+                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Avro Maps support only keys with type String, got {}", keys_type->getName());
+
+            auto keys_serializer = [](const IColumn & column, size_t row_num, avro::Encoder & encoder)
+            {
+                const StringRef & s = column.getDataAt(row_num);
+                encoder.encodeString(s.toString());
+            };
+
+            const auto & values_type = map_type.getValueType();
+            auto values_mapping = createSchemaWithSerializeFn(values_type, type_name_increment, column_name + ".value");
+            auto schema = avro::MapSchema(values_mapping.schema);
+
+            return {schema, [keys_serializer, values_mapping](const IColumn & column, size_t row_num, avro::Encoder & encoder)
+            {
+                const ColumnMap & column_map = assert_cast<const ColumnMap &>(column);
+                const ColumnArray & column_array = column_map.getNestedColumn();
+                const ColumnArray::Offsets & offsets = column_array.getOffsets();
+                size_t offset = offsets[row_num - 1];
+                size_t next_offset = offsets[row_num];
+                size_t row_count = next_offset - offset;
+                const ColumnTuple & nested_columns = column_map.getNestedData();
+                const IColumn & keys_column = nested_columns.getColumn(0);
+                const IColumn & values_column = nested_columns.getColumn(1);
+
+                encoder.mapStart();
+                if (row_count > 0)
+                    encoder.setItemCount(row_count);
+
+                for (size_t i = offset; i < next_offset; ++i)
+                {
+                    keys_serializer(keys_column, i, encoder);
+                    values_mapping.serialize(values_column, i, encoder);
+                }
+                encoder.mapEnd();
+            }};
+        }
         default:
             break;
     }
diff --git a/src/Processors/Formats/Impl/AvroRowOutputFormat.h b/src/Processors/Formats/Impl/AvroRowOutputFormat.h
index 0a2acc93688..a36b36286c3 100644
--- a/src/Processors/Formats/Impl/AvroRowOutputFormat.h
+++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.h
@@ -9,9 +9,9 @@
 #include <IO/WriteBuffer.h>
 #include <Processors/Formats/IRowOutputFormat.h>
 
-#include <avro/DataFile.hh>
-#include <avro/Schema.hh>
-#include <avro/ValidSchema.hh>
+#include <DataFile.hh>
+#include <Schema.hh>
+#include <ValidSchema.hh>
 
 
 namespace DB
diff --git a/tests/queries/0_stateless/02313_avro_records_and_maps.reference b/tests/queries/0_stateless/02313_avro_records_and_maps.reference
new file mode 100644
index 00000000000..24fc635cdce
--- /dev/null
+++ b/tests/queries/0_stateless/02313_avro_records_and_maps.reference
@@ -0,0 +1,25 @@
+t	Tuple(a Int32, b String)					
+(0,'String')
+(1,'String')
+(2,'String')
+t	Tuple(a Int32, b Tuple(c Int32, d Int32), e Array(Int32))					
+(0,(1,2),[])
+(1,(2,3),[0])
+(2,(3,4),[0,1])
+a.b	Array(Int32)					
+a.c	Array(Int32)					
+[0,1]	[2,3]
+[1,2]	[3,4]
+[2,3]	[4,5]
+a.b	Array(Array(Tuple(c Int32, d Int32)))					
+[[(0,1),(2,3)]]
+[[(1,2),(3,4)]]
+[[(2,3),(4,5)]]
+m	Map(String, Int64)					
+{'key_0':0}
+{'key_1':1}
+{'key_2':2}
+m	Map(String, Tuple(`1` Int64, `2` Array(Int64)))					
+{'key_0':(0,[])}
+{'key_1':(1,[0])}
+{'key_2':(2,[0,1])}
diff --git a/tests/queries/0_stateless/02313_avro_records_and_maps.sql b/tests/queries/0_stateless/02313_avro_records_and_maps.sql
new file mode 100644
index 00000000000..bb487b3de07
--- /dev/null
+++ b/tests/queries/0_stateless/02313_avro_records_and_maps.sql
@@ -0,0 +1,24 @@
+insert into function file(data_02313.avro) select tuple(number, 'String')::Tuple(a UInt32, b String) as t from numbers(3) settings engine_file_truncate_on_insert=1;
+desc file(data_02313.avro);
+select * from file(data_02313.avro);
+
+insert into function file(data_02313.avro) select tuple(number, tuple(number + 1, number + 2), range(number))::Tuple(a UInt32, b Tuple(c UInt32, d UInt32), e Array(UInt32)) as t from numbers(3) settings engine_file_truncate_on_insert=1;
+desc file(data_02313.avro);
+select * from file(data_02313.avro);
+
+insert into function file(data_02313.avro, auto, 'a Nested(b UInt32, c UInt32)') select [number, number + 1], [number + 2, number + 3] from  numbers(3) settings engine_file_truncate_on_insert=1;
+desc file(data_02313.avro);
+select * from file(data_02313.avro);
+
+insert into function file(data_02313.avro, auto, 'a Nested(b Nested(c UInt32, d UInt32))') select [[(number, number + 1), (number + 2, number + 3)]] from  numbers(3) settings engine_file_truncate_on_insert=1;
+desc file(data_02313.avro);
+select * from file(data_02313.avro);
+
+insert into function file(data_02313.avro) select map(concat('key_', toString(number)), number) as m from numbers(3) settings engine_file_truncate_on_insert=1;
+desc file(data_02313.avro);
+select * from file(data_02313.avro);
+
+insert into function file(data_02313.avro) select map(concat('key_', toString(number)), tuple(number, range(number))) as m from numbers(3) settings engine_file_truncate_on_insert=1;
+desc file(data_02313.avro);
+select * from file(data_02313.avro);
+

From 12effa4c2ad7d08eb95432ef22979aae1a9b63d6 Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Wed, 25 May 2022 11:22:04 +0000
Subject: [PATCH 04/22] Add tags to test

---
 tests/queries/0_stateless/02313_avro_records_and_maps.sql | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/02313_avro_records_and_maps.sql b/tests/queries/0_stateless/02313_avro_records_and_maps.sql
index bb487b3de07..83ab342be9e 100644
--- a/tests/queries/0_stateless/02313_avro_records_and_maps.sql
+++ b/tests/queries/0_stateless/02313_avro_records_and_maps.sql
@@ -1,3 +1,5 @@
+-- Tags: no-parallel, no-fasttest
+
 insert into function file(data_02313.avro) select tuple(number, 'String')::Tuple(a UInt32, b String) as t from numbers(3) settings engine_file_truncate_on_insert=1;
 desc file(data_02313.avro);
 select * from file(data_02313.avro);

From 038a422aeb8f15cc6ffedda12bb0ddeedaef744f Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Wed, 25 May 2022 12:56:59 +0000
Subject: [PATCH 05/22] Add setting to insert null as default

---
 src/Core/Settings.h                           |   1 +
 src/Formats/FormatFactory.cpp                 |   1 +
 src/Formats/FormatSettings.h                  |   1 +
 .../Formats/Impl/AvroRowInputFormat.cpp       |  67 +++++++++++-------
 .../Formats/Impl/AvroRowInputFormat.h         |   8 ++-
 .../02314_avro_null_as_default.reference      |   6 ++
 .../0_stateless/02314_avro_null_as_default.sh |  17 +++++
 .../0_stateless/data_avro/nullable_array.avro | Bin 0 -> 373 bytes
 8 files changed, 72 insertions(+), 29 deletions(-)
 create mode 100644 tests/queries/0_stateless/02314_avro_null_as_default.reference
 create mode 100755 tests/queries/0_stateless/02314_avro_null_as_default.sh
 create mode 100644 tests/queries/0_stateless/data_avro/nullable_array.avro

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index bf9785fcc00..42483d87fbb 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -686,6 +686,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
     M(Bool, input_format_values_deduce_templates_of_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser, deduce template of the SQL expression, try to parse all rows using template and then interpret expression for all rows.", 0) \
     M(Bool, input_format_values_accurate_types_of_literals, true, "For Values format: when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues.", 0) \
     M(Bool, input_format_avro_allow_missing_fields, false, "For Avro/AvroConfluent format: when field is not found in schema use default value instead of error", 0) \
+    M(Bool, input_format_avro_null_as_default, false, "For Avro/AvroConfluent format: insert default in case of null and non Nullable column", 0) \
     M(URI, format_avro_schema_registry_url, "", "For AvroConfluent format: Confluent Schema Registry URL.", 0) \
     \
     M(Bool, output_format_json_quote_64bit_integers, true, "Controls quoting of 64-bit integers in JSON output format.", 0) \
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 644e4d3ecfd..f7e18083cb3 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -56,6 +56,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
     format_settings.avro.schema_registry_url = settings.format_avro_schema_registry_url.toString();
     format_settings.avro.string_column_pattern = settings.output_format_avro_string_column_pattern.toString();
     format_settings.avro.output_rows_in_file = settings.output_format_avro_rows_in_file;
+    format_settings.avro.null_as_default = settings.input_format_avro_null_as_default;
     format_settings.csv.allow_double_quotes = settings.format_csv_allow_double_quotes;
     format_settings.csv.allow_single_quotes = settings.format_csv_allow_single_quotes;
     format_settings.csv.crlf_end_of_line = settings.output_format_csv_crlf_end_of_line;
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index e6f0a7d229e..64482f32381 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -92,6 +92,7 @@ struct FormatSettings
         bool allow_missing_fields = false;
         String string_column_pattern;
         UInt64 output_rows_in_file = 1;
+        bool null_as_default = false;
     } avro;
 
     String bool_true_representation = "true";
diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
index 40dc49a8bd4..6b056e64d41 100644
--- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
@@ -279,30 +279,42 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node
             break;
         case avro::AVRO_UNION:
         {
-            auto nullable_deserializer = [root_node, target_type](size_t non_null_union_index)
+            if (root_node->leaves() == 2
+                && (root_node->leafAt(0)->type() == avro::AVRO_NULL || root_node->leafAt(1)->type() == avro::AVRO_NULL))
             {
-                auto nested_deserialize = createDeserializeFn(root_node->leafAt(non_null_union_index), removeNullable(target_type));
-                return [non_null_union_index, nested_deserialize](IColumn & column, avro::Decoder & decoder)
+                size_t non_null_union_index = root_node->leafAt(0)->type() == avro::AVRO_NULL ? 1 : 0;
+                if (target.isNullable())
                 {
-                    ColumnNullable & col = assert_cast<ColumnNullable &>(column);
-                    size_t union_index = decoder.decodeUnionIndex();
-                    if (union_index == non_null_union_index)
+                    auto nested_deserialize = this->createDeserializeFn(root_node->leafAt(non_null_union_index), removeNullable(target_type));
+                    return [non_null_union_index, nested_deserialize](IColumn & column, avro::Decoder & decoder)
                     {
-                        nested_deserialize(col.getNestedColumn(), decoder);
-                        col.getNullMapData().push_back(0);
-                    }
-                    else
+                        ColumnNullable & col = assert_cast<ColumnNullable &>(column);
+                        size_t union_index = decoder.decodeUnionIndex();
+                        if (union_index == non_null_union_index)
+                        {
+                            nested_deserialize(col.getNestedColumn(), decoder);
+                            col.getNullMapData().push_back(0);
+                        }
+                        else
+                        {
+                            col.insertDefault();
+                        }
+                    };
+                }
+
+                if (null_as_default)
+                {
+                    auto nested_deserialize = this->createDeserializeFn(root_node->leafAt(non_null_union_index), target_type);
+                    return [non_null_union_index, nested_deserialize](IColumn & column, avro::Decoder & decoder)
                     {
-                        col.insertDefault();
-                    }
-                };
-            };
-            if (root_node->leaves() == 2 && target.isNullable())
-            {
-                if (root_node->leafAt(0)->type() == avro::AVRO_NULL)
-                    return nullable_deserializer(1);
-                if (root_node->leafAt(1)->type() == avro::AVRO_NULL)
-                    return nullable_deserializer(0);
+                        size_t union_index = decoder.decodeUnionIndex();
+                        if (union_index == non_null_union_index)
+                            nested_deserialize(column, decoder);
+                        else
+                            column.insertDefault();
+                    };
+                }
+
             }
             break;
         }
@@ -625,7 +637,8 @@ AvroDeserializer::Action AvroDeserializer::createAction(const Block & header, co
     }
 }
 
-AvroDeserializer::AvroDeserializer(const Block & header, avro::ValidSchema schema, bool allow_missing_fields)
+AvroDeserializer::AvroDeserializer(const Block & header, avro::ValidSchema schema, bool allow_missing_fields, bool null_as_default_)
+    : null_as_default(null_as_default_)
 {
     const auto & schema_root = schema.root();
     if (schema_root->type() != avro::AVRO_RECORD)
@@ -663,15 +676,15 @@ void AvroDeserializer::deserializeRow(MutableColumns & columns, avro::Decoder &
 
 
 AvroRowInputFormat::AvroRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_, const FormatSettings & format_settings_)
-    : IRowInputFormat(header_, in_, params_),
-      allow_missing_fields(format_settings_.avro.allow_missing_fields)
+    : IRowInputFormat(header_, in_, params_), format_settings(format_settings_)
 {
 }
 
 void AvroRowInputFormat::readPrefix()
 {
     file_reader_ptr = std::make_unique<avro::DataFileReaderBase>(std::make_unique<InputStreamReadBufferAdapter>(*in));
-    deserializer_ptr = std::make_unique<AvroDeserializer>(output.getHeader(), file_reader_ptr->dataSchema(), allow_missing_fields);
+    deserializer_ptr = std::make_unique<AvroDeserializer>(
+        output.getHeader(), file_reader_ptr->dataSchema(), format_settings.avro.allow_missing_fields, format_settings.avro.null_as_default);
     file_reader_ptr->init();
 }
 
@@ -857,7 +870,8 @@ const AvroDeserializer & AvroConfluentRowInputFormat::getOrCreateDeserializer(Sc
     if (it == deserializer_cache.end())
     {
         auto schema = schema_registry->getSchema(schema_id);
-        AvroDeserializer deserializer(output.getHeader(), schema, format_settings.avro.allow_missing_fields);
+        AvroDeserializer deserializer(
+            output.getHeader(), schema, format_settings.avro.allow_missing_fields, format_settings.avro.null_as_default);
         it = deserializer_cache.emplace(schema_id, deserializer).first;
     }
     return it->second;
@@ -939,7 +953,8 @@ DataTypePtr AvroSchemaReader::avroNodeToDataType(avro::NodePtr node)
             if (node->leaves() == 2 && (node->leafAt(0)->type() == avro::Type::AVRO_NULL || node->leafAt(1)->type() == avro::Type::AVRO_NULL))
             {
                 size_t nested_leaf_index = node->leafAt(0)->type() == avro::Type::AVRO_NULL ? 1 : 0;
-                return makeNullable(avroNodeToDataType(node->leafAt(nested_leaf_index)));
+                auto nested_type = avroNodeToDataType(node->leafAt(nested_leaf_index));
+                return nested_type->canBeInsideNullable() ? makeNullable(nested_type) : nested_type;
             }
             throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Avro type  UNION is not supported for inserting.");
         case avro::Type::AVRO_SYMBOLIC:
diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.h b/src/Processors/Formats/Impl/AvroRowInputFormat.h
index 3fde0e14ede..13afa06b089 100644
--- a/src/Processors/Formats/Impl/AvroRowInputFormat.h
+++ b/src/Processors/Formats/Impl/AvroRowInputFormat.h
@@ -32,13 +32,13 @@ namespace ErrorCodes
 class AvroDeserializer
 {
 public:
-    AvroDeserializer(const Block & header, avro::ValidSchema schema, bool allow_missing_fields);
+    AvroDeserializer(const Block & header, avro::ValidSchema schema, bool allow_missing_fields, bool null_as_default_);
     void deserializeRow(MutableColumns & columns, avro::Decoder & decoder, RowReadExtension & ext) const;
 
 private:
     using DeserializeFn = std::function<void(IColumn & column, avro::Decoder & decoder)>;
     using SkipFn = std::function<void(avro::Decoder & decoder)>;
-    static DeserializeFn createDeserializeFn(avro::NodePtr root_node, DataTypePtr target_type);
+    DeserializeFn createDeserializeFn(avro::NodePtr root_node, DataTypePtr target_type);
     SkipFn createSkipFn(avro::NodePtr root_node);
 
     struct Action
@@ -113,6 +113,8 @@ private:
     /// Map from name of named Avro type (record, enum, fixed) to SkipFn.
     /// This is to avoid infinite recursion when  Avro schema contains self-references. e.g. LinkedList
     std::map<avro::Name, SkipFn> symbolic_skip_fn_map;
+
+    bool null_as_default = false;
 };
 
 class AvroRowInputFormat final : public IRowInputFormat
@@ -128,7 +130,7 @@ private:
 
     std::unique_ptr<avro::DataFileReaderBase> file_reader_ptr;
     std::unique_ptr<AvroDeserializer> deserializer_ptr;
-    bool allow_missing_fields;
+    FormatSettings format_settings;
 };
 
 /// Confluent framing + Avro binary datum encoding. Mainly used for Kafka.
diff --git a/tests/queries/0_stateless/02314_avro_null_as_default.reference b/tests/queries/0_stateless/02314_avro_null_as_default.reference
new file mode 100644
index 00000000000..ba38a15f924
--- /dev/null
+++ b/tests/queries/0_stateless/02314_avro_null_as_default.reference
@@ -0,0 +1,6 @@
+a	Nullable(Int64)					
+b	Array(Tuple(c Nullable(Int64), d Nullable(String)))					
+1	[(100,'Q'),(200,'W')]
+0
+0
+0
diff --git a/tests/queries/0_stateless/02314_avro_null_as_default.sh b/tests/queries/0_stateless/02314_avro_null_as_default.sh
new file mode 100755
index 00000000000..207f814be10
--- /dev/null
+++ b/tests/queries/0_stateless/02314_avro_null_as_default.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+# Tags: no-parallel, no-fasttest
+
+set -e
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+DATA_DIR=$CUR_DIR/data_avro
+
+$CLICKHOUSE_LOCAL -q "desc file('$DATA_DIR/nullable_array.avro') settings input_format_avro_null_as_default=1"
+$CLICKHOUSE_LOCAL -q "select * from file('$DATA_DIR/nullable_array.avro') settings input_format_avro_null_as_default=1"
+
+$CLICKHOUSE_CLIENT -q "insert into function file(data_02314.avro) select NULL::Nullable(UInt32) as x from numbers(3) settings engine_file_truncate_on_insert=1"
+$CLICKHOUSE_CLIENT -q "select * from file(data_02314.avro, auto, 'x UInt32') settings input_format_avro_null_as_default=1"
+
diff --git a/tests/queries/0_stateless/data_avro/nullable_array.avro b/tests/queries/0_stateless/data_avro/nullable_array.avro
new file mode 100644
index 0000000000000000000000000000000000000000..915e319c8e4f376f36b38e82155154489051a080
GIT binary patch
literal 373
zcmeZI%3@>@ODrqO*DFrWNX<>$!&0qOQdy9yWTjM;nw(#hqNJmgmzWFU=Vhj41|f?T
z7bGTwB=U>W^%8;Xj8r|48laA}%+#EeVkN8SYMACkpeV%LXr;W;oE#uGCqFM;DYjMz
zt|kdX4csk>MMa5~K<Uhq)LfvsaB*}uCWV09j?Dqd7=~jy0OVOTM;4b9WrCd<TU#3o
zgd*UuO3qJ7P38dkgW<ba+Gi=7H0!Q)``5jjWP6KAiHV7ch3Nz%6H_451ZF0ta112?
D<N|y{

literal 0
HcmV?d00001


From 0697b9ffcffa4203531adbd9cb27aa4513a2b797 Mon Sep 17 00:00:00 2001
From: koloshmet <mvguzov@edu.hse.ru>
Date: Wed, 25 May 2022 22:04:39 +0300
Subject: [PATCH 06/22] FPC codec

---
 src/Compression/CompressionCodecFPC.cpp | 498 ++++++++++++++++++++++++
 src/Compression/CompressionFactory.cpp  |   2 +
 src/Compression/CompressionInfo.h       |   3 +-
 3 files changed, 502 insertions(+), 1 deletion(-)
 create mode 100644 src/Compression/CompressionCodecFPC.cpp

diff --git a/src/Compression/CompressionCodecFPC.cpp b/src/Compression/CompressionCodecFPC.cpp
new file mode 100644
index 00000000000..3c7aac794c0
--- /dev/null
+++ b/src/Compression/CompressionCodecFPC.cpp
@@ -0,0 +1,498 @@
+#include <Compression/ICompressionCodec.h>
+#include <Compression/CompressionInfo.h>
+#include <Compression/CompressionFactory.h>
+#include <Parsers/IAST.h>
+#include <Parsers/ASTLiteral.h>
+#include <Common/typeid_cast.h>
+#include <IO/WriteHelpers.h>
+
+#include <span>
+#include <bit>
+#include <concepts>
+
+
+namespace DB
+{
+
+class CompressionCodecFPC : public ICompressionCodec
+{
+public:
+    explicit CompressionCodecFPC(UInt8 float_size, UInt8 compression_level);
+
+    uint8_t getMethodByte() const override;
+
+    void updateHash(SipHash & hash) const override;
+
+protected:
+    UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
+
+    void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
+
+    UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
+
+    bool isCompression() const override { return true; }
+    bool isGenericCompression() const override { return false; }
+
+    static constexpr UInt32 HEADER_SIZE{3};
+
+private:
+    UInt8 float_width;
+    UInt8 level;
+};
+
+
+namespace ErrorCodes
+{
+    extern const int CANNOT_COMPRESS;
+    extern const int CANNOT_DECOMPRESS;
+    extern const int ILLEGAL_CODEC_PARAMETER;
+    extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
+    extern const int BAD_ARGUMENTS;
+}
+
+uint8_t CompressionCodecFPC::getMethodByte() const
+{
+    return static_cast<uint8_t>(CompressionMethodByte::FPC);
+}
+
+void CompressionCodecFPC::updateHash(SipHash & hash) const
+{
+    getCodecDesc()->updateTreeHash(hash);
+}
+
+CompressionCodecFPC::CompressionCodecFPC(UInt8 float_size, UInt8 compression_level)
+    : float_width{float_size}, level{compression_level}
+{
+    setCodecDescription("FPC", {std::make_shared<ASTLiteral>(static_cast<UInt64>(level))});
+}
+
+UInt32 CompressionCodecFPC::getMaxCompressedDataSize(UInt32 uncompressed_size) const
+{
+    auto float_count = (uncompressed_size + float_width - 1) / float_width;
+    if (float_count % 2 != 0) {
+        ++float_count;
+    }
+    return HEADER_SIZE + (float_count + float_count / 2) * float_width;
+}
+
+namespace
+{
+
+UInt8 getFloatBytesSize(const IDataType & column_type)
+{
+    if (!WhichDataType(column_type).isFloat())
+    {
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "FPC codec is not applicable for {} because the data type is not float",
+                        column_type.getName());
+    }
+
+    if (auto float_size = column_type.getSizeOfValueInMemory(); float_size >= 4)
+    {
+        return static_cast<UInt8>(float_size);
+    }
+    throw Exception(ErrorCodes::BAD_ARGUMENTS, "FPC codec is not applicable for floats of size less than 4 bytes. Given type {}",
+                    column_type.getName());
+}
+
+UInt8 encodeEndianness(std::endian endian)
+{
+    switch (endian)
+    {
+        case std::endian::little:
+            return 0;
+        case std::endian::big:
+            return 1;
+    }
+    throw Exception("Unsupported endianness", ErrorCodes::BAD_ARGUMENTS);
+}
+
+std::endian decodeEndianness(UInt8 endian) {
+    switch (endian)
+    {
+        case 0:
+            return std::endian::little;
+        case 1:
+            return std::endian::big;
+    }
+    throw Exception("Unsupported endianness", ErrorCodes::BAD_ARGUMENTS);
+}
+
+}
+
+void registerCodecFPC(CompressionCodecFactory & factory)
+{
+    auto method_code = static_cast<UInt8>(CompressionMethodByte::FPC);
+    auto codec_builder = [&](const ASTPtr & arguments, const IDataType * column_type) -> CompressionCodecPtr
+    {
+        if (!column_type)
+        {
+            throw Exception("FPC codec must have associated column", ErrorCodes::BAD_ARGUMENTS);
+        }
+        UInt8 level{0};
+        if (arguments && !arguments->children.empty())
+        {
+            if (arguments->children.size() > 1)
+            {
+                throw Exception(ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE,
+                                "FPC codec must have 1 parameter, given {}", arguments->children.size());
+            }
+
+            const auto * literal = arguments->children.front()->as<ASTLiteral>();
+            if (!literal)
+            {
+                throw Exception("FPC codec argument must be integer", ErrorCodes::ILLEGAL_CODEC_PARAMETER);
+            }
+
+            level = literal->value.safeGet<UInt8>();
+        }
+        return std::make_shared<CompressionCodecFPC>(getFloatBytesSize(*column_type), level);
+    };
+    factory.registerCompressionCodecWithType("FPC", method_code, codec_builder);
+}
+
+namespace
+{
+
+template <std::unsigned_integral TUint> requires (sizeof(TUint) >= 4)
+class DfcmPredictor {
+public:
+    explicit DfcmPredictor(std::size_t table_size): table(table_size, 0), prev_value{0}, hash{0}
+    {
+    }
+
+    [[nodiscard]]
+    TUint predict() const noexcept
+    {
+        return table[hash] + prev_value;
+    }
+
+    void add(TUint value) noexcept
+    {
+        table[hash] = value - prev_value;
+        recalculateHash();
+        prev_value = value;
+    }
+
+private:
+    void recalculateHash() noexcept
+    {
+        auto value = table[hash];
+        if constexpr (sizeof(TUint) >= 8)
+        {
+            hash = ((hash << 2) ^ static_cast<std::size_t>(value >> 40)) & (table.size() - 1);
+        }
+        else
+        {
+            hash = ((hash << 4) ^ static_cast<std::size_t>(value >> 23)) & (table.size() - 1);
+        }
+    }
+
+    std::vector<TUint> table;
+    TUint prev_value{0};
+    std::size_t hash{0};
+};
+
+template <std::unsigned_integral TUint> requires (sizeof(TUint) >= 4)
+class FcmPredictor {
+public:
+    explicit FcmPredictor(std::size_t table_size): table(table_size, 0), hash{0}
+    {
+    }
+
+    [[nodiscard]]
+    TUint predict() const noexcept
+    {
+        return table[hash];
+    }
+
+    void add(TUint value) noexcept
+    {
+        table[hash] = value;
+        recalculateHash();
+    }
+
+private:
+    void recalculateHash() noexcept
+    {
+        auto value = table[hash];
+        if constexpr (sizeof(TUint) >= 8)
+        {
+            hash = ((hash << 6) ^ static_cast<std::size_t>(value >> 48)) & (table.size() - 1);
+        }
+        else
+        {
+            hash = ((hash << 1) ^ static_cast<std::size_t>(value >> 22)) & (table.size() - 1);
+        }
+    }
+
+    std::vector<TUint> table;
+    std::size_t hash{0};
+};
+
+template <std::unsigned_integral TUint, std::endian Endian = std::endian::native>
+    requires (Endian == std::endian::little || Endian == std::endian::big)
+class FPCOperation
+{
+    static constexpr std::size_t CHUNK_SIZE{64};
+
+    static constexpr auto VALUE_SIZE = sizeof(TUint);
+    static constexpr std::byte DFCM_BIT_1{1u << 7};
+    static constexpr std::byte DFCM_BIT_2{1u << 3};
+    static constexpr unsigned MAX_COMPRESSED_SIZE{0b111u};
+
+public:
+    explicit FPCOperation(std::span<std::byte> destination, UInt8 compression_level)
+        : dfcm_predictor(1 << compression_level), fcm_predictor(1 << compression_level), chunk{}, result{destination}
+    {
+    }
+
+    std::size_t encode(std::span<const std::byte> data) &&
+    {
+        auto initial_size = result.size();
+
+        std::span chunk_view(chunk);
+        for (std::size_t i = 0; i < data.size(); i += chunk_view.size_bytes())
+        {
+            auto written_values = importChunk(data.subspan(i), chunk_view);
+            encodeChunk(chunk_view.subspan(0, written_values));
+        }
+
+        return initial_size - result.size();
+    }
+
+    void decode(std::span<const std::byte> values, std::size_t decoded_size) &&
+    {
+        std::size_t read_bytes{0};
+
+        std::span<TUint> chunk_view(chunk);
+        for (std::size_t i = 0; i < decoded_size; i += chunk_view.size_bytes())
+        {
+            if (i + chunk_view.size_bytes() > decoded_size)
+                chunk_view = chunk_view.first(ceilBytesToEvenValues(decoded_size - i));
+            read_bytes += decodeChunk(values.subspan(read_bytes), chunk_view);
+            exportChunk(chunk_view);
+        }
+    }
+
+private:
+    static std::size_t ceilBytesToEvenValues(std::size_t bytes_count)
+    {
+        auto values_count = (bytes_count + VALUE_SIZE - 1) / VALUE_SIZE;
+        return values_count % 2 == 0 ? values_count : values_count + 1;
+    }
+
+    std::size_t importChunk(std::span<const std::byte> values, std::span<TUint> chnk)
+    {
+        if (auto chunk_view = std::as_writable_bytes(chnk); chunk_view.size() <= values.size())
+        {
+            std::memcpy(chunk_view.data(), values.data(), chunk_view.size());
+            return chunk_view.size() / VALUE_SIZE;
+        }
+        else
+        {
+            std::memset(chunk_view.data(), 0, chunk_view.size());
+            std::memcpy(chunk_view.data(), values.data(), values.size());
+            return ceilBytesToEvenValues(values.size());
+        }
+    }
+
+    void exportChunk(std::span<const TUint> chnk)
+    {
+        auto chunk_view = std::as_bytes(chnk).first(std::min(result.size(), chnk.size_bytes()));
+        std::memcpy(result.data(), chunk_view.data(), chunk_view.size());
+        result = result.subspan(chunk_view.size());
+    }
+
+    void encodeChunk(std::span<const TUint> seq)
+    {
+        for (std::size_t i = 0; i < seq.size(); i += 2)
+        {
+            encodePair(seq[i], seq[i + 1]);
+        }
+    }
+
+    struct CompressedValue
+    {
+        TUint value;
+        unsigned compressed_size;
+        bool is_dfcm_predictor;
+    };
+
+    unsigned encodeCompressedSize(int compressed)
+    {
+        if constexpr (VALUE_SIZE > MAX_COMPRESSED_SIZE)
+        {
+            if (compressed >= 4)
+                --compressed;
+        }
+        return std::min(static_cast<unsigned>(compressed), MAX_COMPRESSED_SIZE);
+    }
+
+    unsigned decodeCompressedSize(unsigned encoded_size)
+    {
+        if constexpr (VALUE_SIZE > MAX_COMPRESSED_SIZE)
+        {
+            if (encoded_size > 3)
+                ++encoded_size;
+        }
+        return encoded_size;
+    }
+
+    CompressedValue compressValue(TUint value) noexcept
+    {
+        TUint compressed_dfcm = dfcm_predictor.predict() ^ value;
+        TUint compressed_fcm = fcm_predictor.predict() ^ value;
+        dfcm_predictor.add(value);
+        fcm_predictor.add(value);
+        auto zeroes_dfcm = std::countl_zero(compressed_dfcm);
+        auto zeroes_fcm = std::countl_zero(compressed_fcm);
+        if (zeroes_dfcm > zeroes_fcm)
+            return {compressed_dfcm, encodeCompressedSize(zeroes_dfcm / CHAR_BIT), true};
+        return {compressed_fcm, encodeCompressedSize(zeroes_fcm / CHAR_BIT), false};
+    }
+
+    void encodePair(TUint first, TUint second)
+    {
+        auto [value1, compressed_size1, is_dfcm_predictor1] = compressValue(first);
+        auto [value2, compressed_size2, is_dfcm_predictor2] = compressValue(second);
+        std::byte header{0x0};
+        if (is_dfcm_predictor1)
+            header |= DFCM_BIT_1;
+        if (is_dfcm_predictor2)
+            header |= DFCM_BIT_2;
+        header |= static_cast<std::byte>((compressed_size1 << 4) | compressed_size2);
+        result.front() = header;
+
+        compressed_size1 = decodeCompressedSize(compressed_size1);
+        compressed_size2 = decodeCompressedSize(compressed_size2);
+        auto tail_size1 = VALUE_SIZE - compressed_size1;
+        auto tail_size2 = VALUE_SIZE - compressed_size2;
+
+        std::memcpy(result.data() + 1, valueTail(value1, compressed_size1), tail_size1);
+        std::memcpy(result.data() + 1 + tail_size1, valueTail(value2, compressed_size2), tail_size2);
+        result = result.subspan(1 + tail_size1 + tail_size2);
+    }
+
+    std::size_t decodeChunk(std::span<const std::byte> values, std::span<TUint> seq)
+    {
+        std::size_t read_bytes{0};
+        for (std::size_t i = 0; i < seq.size(); i += 2)
+        {
+            read_bytes += decodePair(values.subspan(read_bytes), seq[i], seq[i + 1]);
+        }
+        return read_bytes;
+    }
+
+    TUint decompressValue(TUint value, bool isDfcmPredictor)
+    {
+        TUint decompressed;
+        if (isDfcmPredictor)
+        {
+            decompressed = dfcm_predictor.predict() ^ value;
+        }
+        else
+        {
+            decompressed = fcm_predictor.predict() ^ value;
+        }
+        dfcm_predictor.add(decompressed);
+        fcm_predictor.add(decompressed);
+        return decompressed;
+    }
+
+    std::size_t decodePair(std::span<const std::byte> bytes, TUint& first, TUint& second)
+    {
+        if (bytes.empty())
+            throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Unexpected end of encoded sequence");
+
+        auto compressed_size1 = decodeCompressedSize(static_cast<unsigned>(bytes.front() >> 4) & MAX_COMPRESSED_SIZE);
+        auto compressed_size2 = decodeCompressedSize(static_cast<unsigned>(bytes.front()) & MAX_COMPRESSED_SIZE);
+
+        auto tail_size1 = VALUE_SIZE - compressed_size1;
+        auto tail_size2 = VALUE_SIZE - compressed_size2;
+
+        if (bytes.size() < 1 + tail_size1 + tail_size2)
+            throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Unexpected end of encoded sequence");
+
+        TUint value1{0};
+        TUint value2{0};
+
+        std::memcpy(valueTail(value1, compressed_size1), bytes.data() + 1, tail_size1);
+        std::memcpy(valueTail(value2, compressed_size2), bytes.data() + 1 + tail_size1, tail_size2);
+
+        auto is_dfcm_predictor1 = static_cast<unsigned char>(bytes.front() & DFCM_BIT_1);
+        auto is_dfcm_predictor2 = static_cast<unsigned char>(bytes.front() & DFCM_BIT_2);
+        first = decompressValue(value1, is_dfcm_predictor1 != 0);
+        second = decompressValue(value2, is_dfcm_predictor2 != 0);
+
+        return 1 + tail_size1 + tail_size2;
+    }
+
+    static void* valueTail(TUint& value, unsigned compressed_size)
+    {
+        if constexpr (Endian == std::endian::little)
+        {
+            return &value;
+        }
+        else
+        {
+            return reinterpret_cast<std::byte*>(&value) + compressed_size;
+        }
+    }
+
+    DfcmPredictor<TUint> dfcm_predictor;
+    FcmPredictor<TUint> fcm_predictor;
+    std::array<TUint, CHUNK_SIZE> chunk{};
+    std::span<std::byte> result{};
+};
+
+}
+
+UInt32 CompressionCodecFPC::doCompressData(const char * source, UInt32 source_size, char * dest) const
+{
+    dest[0] = static_cast<char>(float_width);
+    dest[1] = static_cast<char>(level);
+    dest[2] = static_cast<char>(encodeEndianness(std::endian::native));
+
+    auto destination = std::as_writable_bytes(std::span(dest, source_size).subspan(HEADER_SIZE));
+    auto src = std::as_bytes(std::span(source, source_size));
+    switch (float_width)
+    {
+        case sizeof(Float64):
+            return HEADER_SIZE + FPCOperation<UInt64>(destination, level).encode(src);
+        case sizeof(Float32):
+            return HEADER_SIZE + FPCOperation<UInt32>(destination, level).encode(src);
+        default:
+            break;
+    }
+    throw Exception("Cannot compress. File has incorrect float width", ErrorCodes::CANNOT_COMPRESS);
+}
+
+void CompressionCodecFPC::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const
+{
+    if (source_size < HEADER_SIZE)
+        throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
+
+    auto compressed_data = std::span(source, source_size);
+    if (static_cast<UInt8>(compressed_data[0]) != float_width)
+        throw Exception("Cannot decompress. File has incorrect float width", ErrorCodes::CANNOT_DECOMPRESS);
+    if (static_cast<UInt8>(compressed_data[1]) != level)
+        throw Exception("Cannot decompress. File has incorrect compression level", ErrorCodes::CANNOT_DECOMPRESS);
+    if (decodeEndianness(static_cast<UInt8>(compressed_data[2])) != std::endian::native)
+        throw Exception("Cannot decompress. File has incorrect endianness", ErrorCodes::CANNOT_DECOMPRESS);
+
+    auto destination = std::as_writable_bytes(std::span(dest, uncompressed_size));
+    auto src = std::as_bytes(compressed_data.subspan(HEADER_SIZE));
+    switch (float_width)
+    {
+        case sizeof(Float64):
+            FPCOperation<UInt64>(destination, level).decode(src, uncompressed_size);
+            break;
+        case sizeof(Float32):
+            FPCOperation<UInt32>(destination, level).decode(src, uncompressed_size);
+            break;
+        default:
+            break;
+    }
+}
+
+}
diff --git a/src/Compression/CompressionFactory.cpp b/src/Compression/CompressionFactory.cpp
index abf5e38a8c3..b8a1c5877a4 100644
--- a/src/Compression/CompressionFactory.cpp
+++ b/src/Compression/CompressionFactory.cpp
@@ -177,6 +177,7 @@ void registerCodecT64(CompressionCodecFactory & factory);
 void registerCodecDoubleDelta(CompressionCodecFactory & factory);
 void registerCodecGorilla(CompressionCodecFactory & factory);
 void registerCodecEncrypted(CompressionCodecFactory & factory);
+void registerCodecFPC(CompressionCodecFactory & factory);
 
 #endif
 
@@ -194,6 +195,7 @@ CompressionCodecFactory::CompressionCodecFactory()
     registerCodecDoubleDelta(*this);
     registerCodecGorilla(*this);
     registerCodecEncrypted(*this);
+    registerCodecFPC(*this);
 #endif
 
     default_codec = get("LZ4", {});
diff --git a/src/Compression/CompressionInfo.h b/src/Compression/CompressionInfo.h
index bbe8315f3ea..839fb68e8c3 100644
--- a/src/Compression/CompressionInfo.h
+++ b/src/Compression/CompressionInfo.h
@@ -44,7 +44,8 @@ enum class CompressionMethodByte : uint8_t
     DoubleDelta     = 0x94,
     Gorilla         = 0x95,
     AES_128_GCM_SIV = 0x96,
-    AES_256_GCM_SIV = 0x97
+    AES_256_GCM_SIV = 0x97,
+    FPC             = 0x98
 };
 
 }

From adf888811cf036612ceb33ac245a47eb6a24e689 Mon Sep 17 00:00:00 2001
From: koloshmet <mvguzov@edu.hse.ru>
Date: Thu, 26 May 2022 04:53:07 +0300
Subject: [PATCH 07/22] fixed max size computation

---
 src/Compression/CompressionCodecFPC.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/Compression/CompressionCodecFPC.cpp b/src/Compression/CompressionCodecFPC.cpp
index 3c7aac794c0..0e50d62893c 100644
--- a/src/Compression/CompressionCodecFPC.cpp
+++ b/src/Compression/CompressionCodecFPC.cpp
@@ -72,7 +72,7 @@ UInt32 CompressionCodecFPC::getMaxCompressedDataSize(UInt32 uncompressed_size) c
     if (float_count % 2 != 0) {
         ++float_count;
     }
-    return HEADER_SIZE + (float_count + float_count / 2) * float_width;
+    return HEADER_SIZE + float_count * float_width + float_count / 2;
 }
 
 namespace
@@ -453,7 +453,8 @@ UInt32 CompressionCodecFPC::doCompressData(const char * source, UInt32 source_si
     dest[1] = static_cast<char>(level);
     dest[2] = static_cast<char>(encodeEndianness(std::endian::native));
 
-    auto destination = std::as_writable_bytes(std::span(dest, source_size).subspan(HEADER_SIZE));
+    auto dest_size = getMaxCompressedDataSize(source_size);
+    auto destination = std::as_writable_bytes(std::span(dest, dest_size).subspan(HEADER_SIZE));
     auto src = std::as_bytes(std::span(source, source_size));
     switch (float_width)
     {

From cd3e28e3b1e4c8881c642c29d3fcf59bbf25e621 Mon Sep 17 00:00:00 2001
From: koloshmet <mvguzov@edu.hse.ru>
Date: Thu, 26 May 2022 06:53:48 +0300
Subject: [PATCH 08/22] fixed decoding parameters

---
 src/Compression/CompressionCodecFPC.cpp | 30 ++++++++++++-------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/Compression/CompressionCodecFPC.cpp b/src/Compression/CompressionCodecFPC.cpp
index 0e50d62893c..d6b96f8a2f2 100644
--- a/src/Compression/CompressionCodecFPC.cpp
+++ b/src/Compression/CompressionCodecFPC.cpp
@@ -124,11 +124,11 @@ void registerCodecFPC(CompressionCodecFactory & factory)
     auto method_code = static_cast<UInt8>(CompressionMethodByte::FPC);
     auto codec_builder = [&](const ASTPtr & arguments, const IDataType * column_type) -> CompressionCodecPtr
     {
-        if (!column_type)
-        {
-            throw Exception("FPC codec must have associated column", ErrorCodes::BAD_ARGUMENTS);
-        }
-        UInt8 level{0};
+        UInt8 float_width{0};
+        if (column_type != nullptr)
+            float_width = getFloatBytesSize(*column_type);
+
+        UInt8 level{12};
         if (arguments && !arguments->children.empty())
         {
             if (arguments->children.size() > 1)
@@ -139,13 +139,11 @@ void registerCodecFPC(CompressionCodecFactory & factory)
 
             const auto * literal = arguments->children.front()->as<ASTLiteral>();
             if (!literal)
-            {
                 throw Exception("FPC codec argument must be integer", ErrorCodes::ILLEGAL_CODEC_PARAMETER);
-            }
 
             level = literal->value.safeGet<UInt8>();
         }
-        return std::make_shared<CompressionCodecFPC>(getFloatBytesSize(*column_type), level);
+        return std::make_shared<CompressionCodecFPC>(float_width, level);
     };
     factory.registerCompressionCodecWithType("FPC", method_code, codec_builder);
 }
@@ -474,26 +472,28 @@ void CompressionCodecFPC::doDecompressData(const char * source, UInt32 source_si
         throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
 
     auto compressed_data = std::span(source, source_size);
-    if (static_cast<UInt8>(compressed_data[0]) != float_width)
-        throw Exception("Cannot decompress. File has incorrect float width", ErrorCodes::CANNOT_DECOMPRESS);
-    if (static_cast<UInt8>(compressed_data[1]) != level)
-        throw Exception("Cannot decompress. File has incorrect compression level", ErrorCodes::CANNOT_DECOMPRESS);
     if (decodeEndianness(static_cast<UInt8>(compressed_data[2])) != std::endian::native)
         throw Exception("Cannot decompress. File has incorrect endianness", ErrorCodes::CANNOT_DECOMPRESS);
 
+    auto compressed_float_width = static_cast<UInt8>(compressed_data[0]);
+    auto compressed_level = static_cast<UInt8>(compressed_data[1]);
+    if (compressed_level == 0)
+        throw Exception("Cannot decompress. File has incorrect level", ErrorCodes::CANNOT_DECOMPRESS);
+
     auto destination = std::as_writable_bytes(std::span(dest, uncompressed_size));
     auto src = std::as_bytes(compressed_data.subspan(HEADER_SIZE));
-    switch (float_width)
+    switch (compressed_float_width)
     {
         case sizeof(Float64):
-            FPCOperation<UInt64>(destination, level).decode(src, uncompressed_size);
+            FPCOperation<UInt64>(destination, compressed_level).decode(src, uncompressed_size);
             break;
         case sizeof(Float32):
-            FPCOperation<UInt32>(destination, level).decode(src, uncompressed_size);
+            FPCOperation<UInt32>(destination, compressed_level).decode(src, uncompressed_size);
             break;
         default:
             break;
     }
+    throw Exception("Cannot decompress. File has incorrect float width", ErrorCodes::CANNOT_DECOMPRESS);
 }
 
 }

From 1bfeb982af9bffea3c42ff2f87daa99ea016f83e Mon Sep 17 00:00:00 2001
From: koloshmet <mvguzov@edu.hse.ru>
Date: Thu, 26 May 2022 06:58:34 +0300
Subject: [PATCH 09/22] fixed decoding parameters

---
 src/Compression/CompressionCodecFPC.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/Compression/CompressionCodecFPC.cpp b/src/Compression/CompressionCodecFPC.cpp
index d6b96f8a2f2..269a935da4b 100644
--- a/src/Compression/CompressionCodecFPC.cpp
+++ b/src/Compression/CompressionCodecFPC.cpp
@@ -491,9 +491,8 @@ void CompressionCodecFPC::doDecompressData(const char * source, UInt32 source_si
             FPCOperation<UInt32>(destination, compressed_level).decode(src, uncompressed_size);
             break;
         default:
-            break;
+            throw Exception("Cannot decompress. File has incorrect float width", ErrorCodes::CANNOT_DECOMPRESS);
     }
-    throw Exception("Cannot decompress. File has incorrect float width", ErrorCodes::CANNOT_DECOMPRESS);
 }
 
 }

From 4d41121c096918493b4f0ac05dbab4546b8c9331 Mon Sep 17 00:00:00 2001
From: koloshmet <mvguzov@edu.hse.ru>
Date: Thu, 26 May 2022 10:58:00 +0300
Subject: [PATCH 10/22] added test query

---
 .../02313_test_fpc_codec.reference            |  2 +
 .../0_stateless/02313_test_fpc_codec.sql      | 61 +++++++++++++++++++
 2 files changed, 63 insertions(+)
 create mode 100644 tests/queries/0_stateless/02313_test_fpc_codec.reference
 create mode 100644 tests/queries/0_stateless/02313_test_fpc_codec.sql

diff --git a/tests/queries/0_stateless/02313_test_fpc_codec.reference b/tests/queries/0_stateless/02313_test_fpc_codec.reference
new file mode 100644
index 00000000000..5e871ea0329
--- /dev/null
+++ b/tests/queries/0_stateless/02313_test_fpc_codec.reference
@@ -0,0 +1,2 @@
+F64
+F32
diff --git a/tests/queries/0_stateless/02313_test_fpc_codec.sql b/tests/queries/0_stateless/02313_test_fpc_codec.sql
new file mode 100644
index 00000000000..e077e59b07b
--- /dev/null
+++ b/tests/queries/0_stateless/02313_test_fpc_codec.sql
@@ -0,0 +1,61 @@
+DROP TABLE IF EXISTS codecTest;
+
+CREATE TABLE codecTest (
+    key      UInt64,
+    name     String,
+    ref_valueF64 Float64,
+    ref_valueF32 Float32,
+    valueF64 Float64  CODEC(FPC),
+    valueF32 Float32  CODEC(FPC)
+) Engine = MergeTree ORDER BY key;
+
+-- best case - same value
+INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32)
+	SELECT number AS n, 'e()', e() AS v, v, v, v FROM system.numbers LIMIT 1, 100;
+
+-- good case - values that grow insignificantly
+INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32)
+	SELECT number AS n, 'log2(n)', log2(n) AS v, v, v, v FROM system.numbers LIMIT 101, 100;
+
+-- bad case - values differ significantly
+INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32)
+	SELECT number AS n, 'n*sqrt(n)', n*sqrt(n) AS v, v, v, v FROM system.numbers LIMIT 201, 100;
+
+-- worst case - almost like a random values
+INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32)
+	SELECT number AS n, 'sin(n*n*n)*n', sin(n * n * n * n* n) AS v, v, v, v FROM system.numbers LIMIT 301, 100;
+
+
+-- These floating-point values are expected to be BINARY equal, so comparing by-value is Ok here.
+
+-- referencing previous row key, value, and case name to simplify debugging.
+SELECT 'F64';
+SELECT
+	c1.key, c1.name,
+	c1.ref_valueF64, c1.valueF64, c1.ref_valueF64 - c1.valueF64 AS dF64,
+	'prev:',
+	c2.key, c2.ref_valueF64
+FROM
+	codecTest as c1, codecTest as c2
+WHERE
+	dF64 != 0
+AND
+	c2.key = c1.key - 1
+LIMIT 10;
+
+
+SELECT 'F32';
+SELECT
+	c1.key, c1.name,
+	c1.ref_valueF32, c1.valueF32, c1.ref_valueF32 - c1.valueF32 AS dF32,
+	'prev:',
+	c2.key, c2.ref_valueF32
+FROM
+	codecTest as c1, codecTest as c2
+WHERE
+	dF32 != 0
+AND
+	c2.key = c1.key - 1
+LIMIT 10;
+
+DROP TABLE IF EXISTS codecTest;

From 821100f145cf75c8ef3a3c30d16c4bafcc05378e Mon Sep 17 00:00:00 2001
From: koloshmet <mvguzov@edu.hse.ru>
Date: Thu, 26 May 2022 11:09:36 +0300
Subject: [PATCH 11/22] code style fixes

---
 src/Compression/CompressionCodecFPC.cpp | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/Compression/CompressionCodecFPC.cpp b/src/Compression/CompressionCodecFPC.cpp
index 269a935da4b..0f68f512ad8 100644
--- a/src/Compression/CompressionCodecFPC.cpp
+++ b/src/Compression/CompressionCodecFPC.cpp
@@ -69,9 +69,8 @@ CompressionCodecFPC::CompressionCodecFPC(UInt8 float_size, UInt8 compression_lev
 UInt32 CompressionCodecFPC::getMaxCompressedDataSize(UInt32 uncompressed_size) const
 {
     auto float_count = (uncompressed_size + float_width - 1) / float_width;
-    if (float_count % 2 != 0) {
+    if (float_count % 2 != 0)
         ++float_count;
-    }
     return HEADER_SIZE + float_count * float_width + float_count / 2;
 }
 
@@ -106,7 +105,8 @@ UInt8 encodeEndianness(std::endian endian)
     throw Exception("Unsupported endianness", ErrorCodes::BAD_ARGUMENTS);
 }
 
-std::endian decodeEndianness(UInt8 endian) {
+std::endian decodeEndianness(UInt8 endian)
+{
     switch (endian)
     {
         case 0:
@@ -152,7 +152,8 @@ namespace
 {
 
 template <std::unsigned_integral TUint> requires (sizeof(TUint) >= 4)
-class DfcmPredictor {
+class DfcmPredictor
+{
 public:
     explicit DfcmPredictor(std::size_t table_size): table(table_size, 0), prev_value{0}, hash{0}
     {
@@ -191,7 +192,8 @@ private:
 };
 
 template <std::unsigned_integral TUint> requires (sizeof(TUint) >= 4)
-class FcmPredictor {
+class FcmPredictor
+{
 public:
     explicit FcmPredictor(std::size_t table_size): table(table_size, 0), hash{0}
     {

From 7e69779575b712aae530c71f82f9e33f10e48d76 Mon Sep 17 00:00:00 2001
From: koloshmet <mvguzov@edu.hse.ru>
Date: Thu, 26 May 2022 22:32:56 +0300
Subject: [PATCH 12/22] added fpc codec to float perftest

---
 src/Compression/CompressionCodecFPC.cpp       |  2 +
 tests/performance/codecs_float_insert.xml     |  1 +
 tests/performance/codecs_float_select.xml     |  1 +
 .../02313_test_fpc_codec.reference            |  2 +
 .../0_stateless/02313_test_fpc_codec.sql      | 60 +++++++++++++++++++
 5 files changed, 66 insertions(+)

diff --git a/src/Compression/CompressionCodecFPC.cpp b/src/Compression/CompressionCodecFPC.cpp
index 0f68f512ad8..f3106204e01 100644
--- a/src/Compression/CompressionCodecFPC.cpp
+++ b/src/Compression/CompressionCodecFPC.cpp
@@ -142,6 +142,8 @@ void registerCodecFPC(CompressionCodecFactory & factory)
                 throw Exception("FPC codec argument must be integer", ErrorCodes::ILLEGAL_CODEC_PARAMETER);
 
             level = literal->value.safeGet<UInt8>();
+            if (level == 0)
+                throw Exception("FPC codec level must be at least 1", ErrorCodes::ILLEGAL_CODEC_PARAMETER);
         }
         return std::make_shared<CompressionCodecFPC>(float_width, level);
     };
diff --git a/tests/performance/codecs_float_insert.xml b/tests/performance/codecs_float_insert.xml
index b31e0eafdd7..64325d30189 100644
--- a/tests/performance/codecs_float_insert.xml
+++ b/tests/performance/codecs_float_insert.xml
@@ -12,6 +12,7 @@
                 <value>ZSTD</value>
                 <value>DoubleDelta</value>
                 <value>Gorilla</value>
+                <value>FPC</value>
             </values>
         </substitution>
         <substitution>
diff --git a/tests/performance/codecs_float_select.xml b/tests/performance/codecs_float_select.xml
index 82489daf524..4743a756ac3 100644
--- a/tests/performance/codecs_float_select.xml
+++ b/tests/performance/codecs_float_select.xml
@@ -12,6 +12,7 @@
                 <value>ZSTD</value>
                 <value>DoubleDelta</value>
                 <value>Gorilla</value>
+                <value>FPC</value>
             </values>
         </substitution>
         <substitution>
diff --git a/tests/queries/0_stateless/02313_test_fpc_codec.reference b/tests/queries/0_stateless/02313_test_fpc_codec.reference
index 5e871ea0329..23c75ed1ac0 100644
--- a/tests/queries/0_stateless/02313_test_fpc_codec.reference
+++ b/tests/queries/0_stateless/02313_test_fpc_codec.reference
@@ -1,2 +1,4 @@
 F64
 F32
+F64
+F32
diff --git a/tests/queries/0_stateless/02313_test_fpc_codec.sql b/tests/queries/0_stateless/02313_test_fpc_codec.sql
index e077e59b07b..3b1127350f0 100644
--- a/tests/queries/0_stateless/02313_test_fpc_codec.sql
+++ b/tests/queries/0_stateless/02313_test_fpc_codec.sql
@@ -44,6 +44,66 @@ AND
 LIMIT 10;
 
 
+SELECT 'F32';
+SELECT
+	c1.key, c1.name,
+	c1.ref_valueF32, c1.valueF32, c1.ref_valueF32 - c1.valueF32 AS dF32,
+	'prev:',
+	c2.key, c2.ref_valueF32
+FROM
+	codecTest as c1, codecTest as c2
+WHERE
+	dF32 != 0
+AND
+	c2.key = c1.key - 1
+LIMIT 10;
+
+DROP TABLE IF EXISTS codecTest;
+
+CREATE TABLE codecTest (
+    key      UInt64,
+    name     String,
+    ref_valueF64 Float64,
+    ref_valueF32 Float32,
+    valueF64 Float64  CODEC(FPC(4)),
+    valueF32 Float32  CODEC(FPC(4))
+) Engine = MergeTree ORDER BY key;
+
+-- best case - same value
+INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32)
+	SELECT number AS n, 'e()', e() AS v, v, v, v FROM system.numbers LIMIT 1, 100;
+
+-- good case - values that grow insignificantly
+INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32)
+	SELECT number AS n, 'log2(n)', log2(n) AS v, v, v, v FROM system.numbers LIMIT 101, 100;
+
+-- bad case - values differ significantly
+INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32)
+	SELECT number AS n, 'n*sqrt(n)', n*sqrt(n) AS v, v, v, v FROM system.numbers LIMIT 201, 100;
+
+-- worst case - almost like a random values
+INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32)
+	SELECT number AS n, 'sin(n*n*n)*n', sin(n * n * n * n* n) AS v, v, v, v FROM system.numbers LIMIT 301, 100;
+
+
+-- These floating-point values are expected to be BINARY equal, so comparing by-value is Ok here.
+
+-- referencing previous row key, value, and case name to simplify debugging.
+SELECT 'F64';
+SELECT
+	c1.key, c1.name,
+	c1.ref_valueF64, c1.valueF64, c1.ref_valueF64 - c1.valueF64 AS dF64,
+	'prev:',
+	c2.key, c2.ref_valueF64
+FROM
+	codecTest as c1, codecTest as c2
+WHERE
+	dF64 != 0
+AND
+	c2.key = c1.key - 1
+LIMIT 10;
+
+
 SELECT 'F32';
 SELECT
 	c1.key, c1.name,

From 0e63583b8f100a04af554aa086dd83167ae64aa6 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Tue, 31 May 2022 00:10:47 +0000
Subject: [PATCH 13/22] Support types with non-standard defaults in ROLLUP,
 CUBE, GROUPING SETS

---
 src/Columns/IColumn.h                         |   2 +-
 src/DataTypes/IDataType.cpp                   |   7 ++
 src/DataTypes/IDataType.h                     |   2 +
 src/Processors/QueryPlan/AggregatingStep.cpp  |   4 +-
 src/Processors/Transforms/CubeTransform.cpp   |   5 +-
 src/Processors/Transforms/RollupTransform.cpp |  10 +-
 ...modifiers_with_non-default_types.reference | 113 ++++++++++++++++++
 ...up_by_modifiers_with_non-default_types.sql |  39 ++++++
 8 files changed, 178 insertions(+), 4 deletions(-)
 create mode 100644 tests/queries/0_stateless/02313_group_by_modifiers_with_non-default_types.reference
 create mode 100644 tests/queries/0_stateless/02313_group_by_modifiers_with_non-default_types.sql

diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h
index f62f6c444b3..a99d4172e5b 100644
--- a/src/Columns/IColumn.h
+++ b/src/Columns/IColumn.h
@@ -90,7 +90,7 @@ public:
     /// Creates column with the same type and specified size.
     /// If size is less current size, then data is cut.
     /// If size is greater, than default values are appended.
-    [[nodiscard]] virtual MutablePtr cloneResized(size_t /*size*/) const { throw Exception("Cannot cloneResized() column " + getName(), ErrorCodes::NOT_IMPLEMENTED); }
+    [[nodiscard]] virtual MutablePtr cloneResized(size_t /*size*/) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot cloneResized() column {}", getName()); }
 
     /// Returns number of values in column.
     [[nodiscard]] virtual size_t size() const = 0;
diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp
index 0976233c031..f2bb878a533 100644
--- a/src/DataTypes/IDataType.cpp
+++ b/src/DataTypes/IDataType.cpp
@@ -1,3 +1,4 @@
+#include <cstddef>
 #include <Columns/IColumn.h>
 #include <Columns/ColumnConst.h>
 #include <Columns/ColumnSparse.h>
@@ -162,6 +163,12 @@ void IDataType::insertDefaultInto(IColumn & column) const
     column.insertDefault();
 }
 
+void IDataType::insertManyDefaultsInto(IColumn & column, size_t n) const
+{
+    for (size_t i = 0; i < n; ++i)
+        insertDefaultInto(column);
+}
+
 void IDataType::setCustomization(DataTypeCustomDescPtr custom_desc_) const
 {
     /// replace only if not null
diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h
index fc9e50dc55b..420ef61a13f 100644
--- a/src/DataTypes/IDataType.h
+++ b/src/DataTypes/IDataType.h
@@ -159,6 +159,8 @@ public:
       */
     virtual void insertDefaultInto(IColumn & column) const;
 
+    void insertManyDefaultsInto(IColumn & column, size_t n) const;
+
     /// Checks that two instances belong to the same type
     virtual bool equals(const IDataType & rhs) const = 0;
 
diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp
index 4114eff5c56..17a0498fb7e 100644
--- a/src/Processors/QueryPlan/AggregatingStep.cpp
+++ b/src/Processors/QueryPlan/AggregatingStep.cpp
@@ -241,7 +241,9 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B
                     if (missign_column_index < missing_columns.size() && missing_columns[missign_column_index] == i)
                     {
                         ++missign_column_index;
-                        auto column = ColumnConst::create(col.column->cloneResized(1), 0);
+                        auto column_with_default = col.column->cloneEmpty();
+                        col.type->insertDefaultInto(*column_with_default);
+                        auto column = ColumnConst::create(std::move(column_with_default), 0);
                         const auto * node = &dag->addColumn({ColumnPtr(std::move(column)), col.type, col.name});
                         node = &dag->materializeNode(*node);
                         index.push_back(node);
diff --git a/src/Processors/Transforms/CubeTransform.cpp b/src/Processors/Transforms/CubeTransform.cpp
index c699e724ffc..83ed346dabe 100644
--- a/src/Processors/Transforms/CubeTransform.cpp
+++ b/src/Processors/Transforms/CubeTransform.cpp
@@ -35,6 +35,8 @@ void CubeTransform::consume(Chunk chunk)
     consumed_chunks.emplace_back(std::move(chunk));
 }
 
+MutableColumnPtr getColumnWithDefaults(Block const & header, size_t key, size_t n);
+
 Chunk CubeTransform::generate()
 {
     if (!consumed_chunks.empty())
@@ -53,8 +55,9 @@ Chunk CubeTransform::generate()
         current_zero_columns.clear();
         current_zero_columns.reserve(keys.size());
 
+        auto const & input_header = getInputPort().getHeader();
         for (auto key : keys)
-            current_zero_columns.emplace_back(current_columns[key]->cloneEmpty()->cloneResized(num_rows));
+            current_zero_columns.emplace_back(getColumnWithDefaults(input_header, key, num_rows));
     }
 
     auto gen_chunk = std::move(cube_chunk);
diff --git a/src/Processors/Transforms/RollupTransform.cpp b/src/Processors/Transforms/RollupTransform.cpp
index db60b99102d..b69a691323c 100644
--- a/src/Processors/Transforms/RollupTransform.cpp
+++ b/src/Processors/Transforms/RollupTransform.cpp
@@ -29,6 +29,14 @@ Chunk RollupTransform::merge(Chunks && chunks, bool final)
     return Chunk(rollup_block.getColumns(), num_rows);
 }
 
+MutableColumnPtr getColumnWithDefaults(Block const & header, size_t key, size_t n)
+{
+    auto const & col = header.getByPosition(key);
+    auto result_column = col.column->cloneEmpty();
+    col.type->insertManyDefaultsInto(*result_column, n);
+    return result_column;
+}
+
 Chunk RollupTransform::generate()
 {
     if (!consumed_chunks.empty())
@@ -51,7 +59,7 @@ Chunk RollupTransform::generate()
 
         auto num_rows = gen_chunk.getNumRows();
         auto columns = gen_chunk.getColumns();
-        columns[key] = columns[key]->cloneEmpty()->cloneResized(num_rows);
+        columns[key] = getColumnWithDefaults(getInputPort().getHeader(), key, num_rows);
 
         Chunks chunks;
         chunks.emplace_back(std::move(columns), num_rows);
diff --git a/tests/queries/0_stateless/02313_group_by_modifiers_with_non-default_types.reference b/tests/queries/0_stateless/02313_group_by_modifiers_with_non-default_types.reference
new file mode 100644
index 00000000000..183c63d1222
--- /dev/null
+++ b/tests/queries/0_stateless/02313_group_by_modifiers_with_non-default_types.reference
@@ -0,0 +1,113 @@
+-- { echoOn }
+SELECT
+    count() as d, a, b, c
+FROM test02313
+GROUP BY ROLLUP(a, b, c)
+ORDER BY d, a, b, c;
+1	one	default	0
+1	one	default	2
+1	one	default	4
+1	one	default	6
+1	one	default	8
+1	two	non-default	1
+1	two	non-default	3
+1	two	non-default	5
+1	two	non-default	7
+1	two	non-default	9
+5	one	default	0
+5	one	default	0
+5	two	default	0
+5	two	non-default	0
+10	one	default	0
+SELECT
+    count() as d, a, b, c
+FROM test02313
+GROUP BY CUBE(a, b, c)
+ORDER BY d, a, b, c;
+1	one	default	0
+1	one	default	0
+1	one	default	0
+1	one	default	0
+1	one	default	1
+1	one	default	2
+1	one	default	2
+1	one	default	2
+1	one	default	2
+1	one	default	3
+1	one	default	4
+1	one	default	4
+1	one	default	4
+1	one	default	4
+1	one	default	5
+1	one	default	6
+1	one	default	6
+1	one	default	6
+1	one	default	6
+1	one	default	7
+1	one	default	8
+1	one	default	8
+1	one	default	8
+1	one	default	8
+1	one	default	9
+1	one	non-default	1
+1	one	non-default	3
+1	one	non-default	5
+1	one	non-default	7
+1	one	non-default	9
+1	two	default	1
+1	two	default	3
+1	two	default	5
+1	two	default	7
+1	two	default	9
+1	two	non-default	1
+1	two	non-default	3
+1	two	non-default	5
+1	two	non-default	7
+1	two	non-default	9
+5	one	default	0
+5	one	default	0
+5	one	default	0
+5	one	non-default	0
+5	two	default	0
+5	two	non-default	0
+10	one	default	0
+SELECT
+    count() as d, a, b, c
+FROM test02313
+GROUP BY GROUPING SETS
+    (
+        (c),
+        (a, c),
+        (b, c)
+    )
+ORDER BY d, a, b, c;
+1	one	default	0
+1	one	default	0
+1	one	default	0
+1	one	default	1
+1	one	default	2
+1	one	default	2
+1	one	default	2
+1	one	default	3
+1	one	default	4
+1	one	default	4
+1	one	default	4
+1	one	default	5
+1	one	default	6
+1	one	default	6
+1	one	default	6
+1	one	default	7
+1	one	default	8
+1	one	default	8
+1	one	default	8
+1	one	default	9
+1	one	non-default	1
+1	one	non-default	3
+1	one	non-default	5
+1	one	non-default	7
+1	one	non-default	9
+1	two	default	1
+1	two	default	3
+1	two	default	5
+1	two	default	7
+1	two	default	9
diff --git a/tests/queries/0_stateless/02313_group_by_modifiers_with_non-default_types.sql b/tests/queries/0_stateless/02313_group_by_modifiers_with_non-default_types.sql
new file mode 100644
index 00000000000..d30cc930429
--- /dev/null
+++ b/tests/queries/0_stateless/02313_group_by_modifiers_with_non-default_types.sql
@@ -0,0 +1,39 @@
+DROP TABLE IF EXISTS test02313;
+
+CREATE TABLE test02313
+(
+    a Enum('one' = 1, 'two' = 2),
+    b Enum('default' = 0, 'non-default' = 1),
+    c UInt8
+)
+ENGINE = MergeTree()
+ORDER BY (a, b, c);
+
+INSERT INTO test02313 SELECT number % 2 + 1 AS a, number % 2 AS b, number FROM numbers(10);
+
+-- { echoOn }
+SELECT
+    count() as d, a, b, c
+FROM test02313
+GROUP BY ROLLUP(a, b, c)
+ORDER BY d, a, b, c;
+
+SELECT
+    count() as d, a, b, c
+FROM test02313
+GROUP BY CUBE(a, b, c)
+ORDER BY d, a, b, c;
+
+SELECT
+    count() as d, a, b, c
+FROM test02313
+GROUP BY GROUPING SETS
+    (
+        (c),
+        (a, c),
+        (b, c)
+    )
+ORDER BY d, a, b, c;
+
+-- { echoOff }
+DROP TABLE test02313;

From d5064dd5b5480f93966f847e4752d620eebc6656 Mon Sep 17 00:00:00 2001
From: koloshmet <mvguzov@edu.hse.ru>
Date: Wed, 8 Jun 2022 02:17:34 +0300
Subject: [PATCH 14/22] so many improvements

---
 src/Compression/CompressionCodecFPC.cpp | 129 +++++++++++++-----------
 1 file changed, 69 insertions(+), 60 deletions(-)

diff --git a/src/Compression/CompressionCodecFPC.cpp b/src/Compression/CompressionCodecFPC.cpp
index f3106204e01..3b66060b6fc 100644
--- a/src/Compression/CompressionCodecFPC.cpp
+++ b/src/Compression/CompressionCodecFPC.cpp
@@ -17,12 +17,15 @@ namespace DB
 class CompressionCodecFPC : public ICompressionCodec
 {
 public:
-    explicit CompressionCodecFPC(UInt8 float_size, UInt8 compression_level);
+    CompressionCodecFPC(UInt8 float_size, UInt8 compression_level);
 
     uint8_t getMethodByte() const override;
 
     void updateHash(SipHash & hash) const override;
 
+    static constexpr UInt8 MAX_COMPRESSION_LEVEL{28};
+    static constexpr UInt8 DEFAULT_COMPRESSION_LEVEL{12};
+
 protected:
     UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
 
@@ -33,11 +36,11 @@ protected:
     bool isCompression() const override { return true; }
     bool isGenericCompression() const override { return false; }
 
+private:
     static constexpr UInt32 HEADER_SIZE{3};
 
-private:
-    UInt8 float_width;
-    UInt8 level;
+    UInt8 float_width; // size of uncompressed float in bytes
+    UInt8 level; // compression level, 2^level * float_width is the size of predictors table in bytes
 };
 
 
@@ -93,21 +96,21 @@ UInt8 getFloatBytesSize(const IDataType & column_type)
                     column_type.getName());
 }
 
-UInt8 encodeEndianness(std::endian endian)
+std::byte encodeEndianness(std::endian endian)
 {
     switch (endian)
     {
         case std::endian::little:
-            return 0;
+            return std::byte{0};
         case std::endian::big:
-            return 1;
+            return std::byte{1};
     }
     throw Exception("Unsupported endianness", ErrorCodes::BAD_ARGUMENTS);
 }
 
-std::endian decodeEndianness(UInt8 endian)
+std::endian decodeEndianness(std::byte endian)
 {
-    switch (endian)
+    switch (std::to_integer<unsigned char>(endian))
     {
         case 0:
             return std::endian::little;
@@ -128,7 +131,7 @@ void registerCodecFPC(CompressionCodecFactory & factory)
         if (column_type != nullptr)
             float_width = getFloatBytesSize(*column_type);
 
-        UInt8 level{12};
+        UInt8 level = CompressionCodecFPC::DEFAULT_COMPRESSION_LEVEL;
         if (arguments && !arguments->children.empty())
         {
             if (arguments->children.size() > 1)
@@ -144,6 +147,8 @@ void registerCodecFPC(CompressionCodecFactory & factory)
             level = literal->value.safeGet<UInt8>();
             if (level == 0)
                 throw Exception("FPC codec level must be at least 1", ErrorCodes::ILLEGAL_CODEC_PARAMETER);
+            if (level > CompressionCodecFPC::MAX_COMPRESSION_LEVEL)
+                throw Exception("FPC codec level must be at most 28", ErrorCodes::ILLEGAL_CODEC_PARAMETER);
         }
         return std::make_shared<CompressionCodecFPC>(float_width, level);
     };
@@ -153,7 +158,8 @@ void registerCodecFPC(CompressionCodecFactory & factory)
 namespace
 {
 
-template <std::unsigned_integral TUint> requires (sizeof(TUint) >= 4)
+template <std::unsigned_integral TUint>
+    requires (sizeof(TUint) >= 4)
 class DfcmPredictor
 {
 public:
@@ -189,11 +195,12 @@ private:
     }
 
     std::vector<TUint> table;
-    TUint prev_value{0};
-    std::size_t hash{0};
+    TUint prev_value;
+    std::size_t hash;
 };
 
-template <std::unsigned_integral TUint> requires (sizeof(TUint) >= 4)
+template <std::unsigned_integral TUint>
+    requires (sizeof(TUint) >= 4)
 class FcmPredictor
 {
 public:
@@ -228,7 +235,7 @@ private:
     }
 
     std::vector<TUint> table;
-    std::size_t hash{0};
+    std::size_t hash;
 };
 
 template <std::unsigned_integral TUint, std::endian Endian = std::endian::native>
@@ -238,13 +245,15 @@ class FPCOperation
     static constexpr std::size_t CHUNK_SIZE{64};
 
     static constexpr auto VALUE_SIZE = sizeof(TUint);
-    static constexpr std::byte DFCM_BIT_1{1u << 7};
-    static constexpr std::byte DFCM_BIT_2{1u << 3};
-    static constexpr unsigned MAX_COMPRESSED_SIZE{0b111u};
+    static constexpr std::byte FCM_BIT{0};
+    static constexpr std::byte DFCM_BIT{1u << 3};
+    static constexpr auto DFCM_BIT_1 = DFCM_BIT << 4;
+    static constexpr auto DFCM_BIT_2 = DFCM_BIT;
+    static constexpr unsigned MAX_ZERO_BYTE_COUNT{0b111u};
 
 public:
-    explicit FPCOperation(std::span<std::byte> destination, UInt8 compression_level)
-        : dfcm_predictor(1 << compression_level), fcm_predictor(1 << compression_level), chunk{}, result{destination}
+    FPCOperation(std::span<std::byte> destination, UInt8 compression_level)
+        : dfcm_predictor(1u << compression_level), fcm_predictor(1u << compression_level), chunk{}, result{destination}
     {
     }
 
@@ -317,22 +326,22 @@ private:
     {
         TUint value;
         unsigned compressed_size;
-        bool is_dfcm_predictor;
+        std::byte predictor;
     };
 
-    unsigned encodeCompressedSize(int compressed)
+    unsigned encodeCompressedZeroByteCount(int compressed)
     {
-        if constexpr (VALUE_SIZE > MAX_COMPRESSED_SIZE)
+        if constexpr (VALUE_SIZE == MAX_ZERO_BYTE_COUNT + 1)
         {
             if (compressed >= 4)
                 --compressed;
         }
-        return std::min(static_cast<unsigned>(compressed), MAX_COMPRESSED_SIZE);
+        return std::min(static_cast<unsigned>(compressed), MAX_ZERO_BYTE_COUNT);
     }
 
-    unsigned decodeCompressedSize(unsigned encoded_size)
+    unsigned decodeCompressedZeroByteCount(unsigned encoded_size)
     {
-        if constexpr (VALUE_SIZE > MAX_COMPRESSED_SIZE)
+        if constexpr (VALUE_SIZE == MAX_ZERO_BYTE_COUNT + 1)
         {
             if (encoded_size > 3)
                 ++encoded_size;
@@ -342,6 +351,8 @@ private:
 
     CompressedValue compressValue(TUint value) noexcept
     {
+        static constexpr auto BITS_PER_BYTE = std::numeric_limits<unsigned char>::digits;
+
         TUint compressed_dfcm = dfcm_predictor.predict() ^ value;
         TUint compressed_fcm = fcm_predictor.predict() ^ value;
         dfcm_predictor.add(value);
@@ -349,29 +360,26 @@ private:
         auto zeroes_dfcm = std::countl_zero(compressed_dfcm);
         auto zeroes_fcm = std::countl_zero(compressed_fcm);
         if (zeroes_dfcm > zeroes_fcm)
-            return {compressed_dfcm, encodeCompressedSize(zeroes_dfcm / CHAR_BIT), true};
-        return {compressed_fcm, encodeCompressedSize(zeroes_fcm / CHAR_BIT), false};
+            return {compressed_dfcm, encodeCompressedZeroByteCount(zeroes_dfcm / BITS_PER_BYTE), DFCM_BIT};
+        return {compressed_fcm, encodeCompressedZeroByteCount(zeroes_fcm / BITS_PER_BYTE), FCM_BIT};
     }
 
     void encodePair(TUint first, TUint second)
     {
-        auto [value1, compressed_size1, is_dfcm_predictor1] = compressValue(first);
-        auto [value2, compressed_size2, is_dfcm_predictor2] = compressValue(second);
+        auto [value1, zero_byte_count1, predictor1] = compressValue(first);
+        auto [value2, zero_byte_count2, predictor2] = compressValue(second);
         std::byte header{0x0};
-        if (is_dfcm_predictor1)
-            header |= DFCM_BIT_1;
-        if (is_dfcm_predictor2)
-            header |= DFCM_BIT_2;
-        header |= static_cast<std::byte>((compressed_size1 << 4) | compressed_size2);
+        header |= (predictor1 << 4) | predictor2;
+        header |= static_cast<std::byte>((zero_byte_count1 << 4) | zero_byte_count2);
         result.front() = header;
 
-        compressed_size1 = decodeCompressedSize(compressed_size1);
-        compressed_size2 = decodeCompressedSize(compressed_size2);
-        auto tail_size1 = VALUE_SIZE - compressed_size1;
-        auto tail_size2 = VALUE_SIZE - compressed_size2;
+        zero_byte_count1 = decodeCompressedZeroByteCount(zero_byte_count1);
+        zero_byte_count2 = decodeCompressedZeroByteCount(zero_byte_count2);
+        auto tail_size1 = VALUE_SIZE - zero_byte_count1;
+        auto tail_size2 = VALUE_SIZE - zero_byte_count2;
 
-        std::memcpy(result.data() + 1, valueTail(value1, compressed_size1), tail_size1);
-        std::memcpy(result.data() + 1 + tail_size1, valueTail(value2, compressed_size2), tail_size2);
+        std::memcpy(result.data() + 1, valueTail(value1, zero_byte_count1), tail_size1);
+        std::memcpy(result.data() + 1 + tail_size1, valueTail(value2, zero_byte_count2), tail_size2);
         result = result.subspan(1 + tail_size1 + tail_size2);
     }
 
@@ -406,11 +414,13 @@ private:
         if (bytes.empty())
             throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Unexpected end of encoded sequence");
 
-        auto compressed_size1 = decodeCompressedSize(static_cast<unsigned>(bytes.front() >> 4) & MAX_COMPRESSED_SIZE);
-        auto compressed_size2 = decodeCompressedSize(static_cast<unsigned>(bytes.front()) & MAX_COMPRESSED_SIZE);
+        auto zero_byte_count1 = decodeCompressedZeroByteCount(
+            std::to_integer<unsigned>(bytes.front() >> 4) & MAX_ZERO_BYTE_COUNT);
+        auto zero_byte_count2 = decodeCompressedZeroByteCount(
+            std::to_integer<unsigned>(bytes.front()) & MAX_ZERO_BYTE_COUNT);
 
-        auto tail_size1 = VALUE_SIZE - compressed_size1;
-        auto tail_size2 = VALUE_SIZE - compressed_size2;
+        auto tail_size1 = VALUE_SIZE - zero_byte_count1;
+        auto tail_size2 = VALUE_SIZE - zero_byte_count2;
 
         if (bytes.size() < 1 + tail_size1 + tail_size2)
             throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Unexpected end of encoded sequence");
@@ -418,13 +428,13 @@ private:
         TUint value1{0};
         TUint value2{0};
 
-        std::memcpy(valueTail(value1, compressed_size1), bytes.data() + 1, tail_size1);
-        std::memcpy(valueTail(value2, compressed_size2), bytes.data() + 1 + tail_size1, tail_size2);
+        std::memcpy(valueTail(value1, zero_byte_count1), bytes.data() + 1, tail_size1);
+        std::memcpy(valueTail(value2, zero_byte_count2), bytes.data() + 1 + tail_size1, tail_size2);
 
-        auto is_dfcm_predictor1 = static_cast<unsigned char>(bytes.front() & DFCM_BIT_1);
-        auto is_dfcm_predictor2 = static_cast<unsigned char>(bytes.front() & DFCM_BIT_2);
-        first = decompressValue(value1, is_dfcm_predictor1 != 0);
-        second = decompressValue(value2, is_dfcm_predictor2 != 0);
+        auto is_dfcm_predictor1 = std::to_integer<unsigned char>(bytes.front() & DFCM_BIT_1) != 0;
+        auto is_dfcm_predictor2 = std::to_integer<unsigned char>(bytes.front() & DFCM_BIT_2) != 0;
+        first = decompressValue(value1, is_dfcm_predictor1);
+        second = decompressValue(value2, is_dfcm_predictor2);
 
         return 1 + tail_size1 + tail_size2;
     }
@@ -453,7 +463,7 @@ UInt32 CompressionCodecFPC::doCompressData(const char * source, UInt32 source_si
 {
     dest[0] = static_cast<char>(float_width);
     dest[1] = static_cast<char>(level);
-    dest[2] = static_cast<char>(encodeEndianness(std::endian::native));
+    dest[2] = std::to_integer<char>(encodeEndianness(std::endian::native));
 
     auto dest_size = getMaxCompressedDataSize(source_size);
     auto destination = std::as_writable_bytes(std::span(dest, dest_size).subspan(HEADER_SIZE));
@@ -475,17 +485,16 @@ void CompressionCodecFPC::doDecompressData(const char * source, UInt32 source_si
     if (source_size < HEADER_SIZE)
         throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
 
-    auto compressed_data = std::span(source, source_size);
-    if (decodeEndianness(static_cast<UInt8>(compressed_data[2])) != std::endian::native)
+    auto compressed_data = std::as_bytes(std::span(source, source_size));
+    auto compressed_float_width = std::to_integer<UInt8>(compressed_data[0]);
+    auto compressed_level = std::to_integer<UInt8>(compressed_data[1]);
+    if (compressed_level == 0 || compressed_level > MAX_COMPRESSION_LEVEL)
+        throw Exception("Cannot decompress. File has incorrect level", ErrorCodes::CANNOT_DECOMPRESS);
+    if (decodeEndianness(compressed_data[2]) != std::endian::native)
         throw Exception("Cannot decompress. File has incorrect endianness", ErrorCodes::CANNOT_DECOMPRESS);
 
-    auto compressed_float_width = static_cast<UInt8>(compressed_data[0]);
-    auto compressed_level = static_cast<UInt8>(compressed_data[1]);
-    if (compressed_level == 0)
-        throw Exception("Cannot decompress. File has incorrect level", ErrorCodes::CANNOT_DECOMPRESS);
-
     auto destination = std::as_writable_bytes(std::span(dest, uncompressed_size));
-    auto src = std::as_bytes(compressed_data.subspan(HEADER_SIZE));
+    auto src = compressed_data.subspan(HEADER_SIZE);
     switch (compressed_float_width)
     {
         case sizeof(Float64):

From e701adbd2828895763c6d2d6cbb735a1a74e1922 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Tue, 14 Jun 2022 16:22:41 +0200
Subject: [PATCH 15/22] Use workflow URL as "link" ref

---
 tests/ci/build_report_check.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py
index b2d54eadd60..1ba91a38a60 100644
--- a/tests/ci/build_report_check.py
+++ b/tests/ci/build_report_check.py
@@ -93,7 +93,7 @@ def get_failed_report(
         elapsed_seconds=0,
         with_coverage=False,
     )
-    return [build_result], [[""]], [""]
+    return [build_result], [[""]], [GITHUB_RUN_URL]
 
 
 def process_report(

From dc2e117cce48323e8c7da31278c5c68f75f63f6f Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 14 Jun 2022 17:30:11 +0200
Subject: [PATCH 16/22] UnaryLogicalFunctions improve performance using dynamic
 dispatch

---
 src/Functions/FunctionsLogical.cpp            | 69 ++++++++-----------
 tests/performance/unary_logical_functions.xml | 31 +++++++++
 2 files changed, 59 insertions(+), 41 deletions(-)
 create mode 100644 tests/performance/unary_logical_functions.xml

diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp
index b615f52652c..be295186943 100644
--- a/src/Functions/FunctionsLogical.cpp
+++ b/src/Functions/FunctionsLogical.cpp
@@ -13,6 +13,7 @@
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeFactory.h>
 #include <Functions/FunctionHelpers.h>
+#include <Functions/FunctionUnaryArithmetic.h>
 #include <Common/FieldVisitors.h>
 
 #include <algorithm>
@@ -50,17 +51,15 @@ MutableColumnPtr buildColumnFromTernaryData(const UInt8Container & ternary_data,
     const size_t rows_count = ternary_data.size();
 
     auto new_column = ColumnUInt8::create(rows_count);
-    std::transform(
-            ternary_data.cbegin(), ternary_data.cend(), new_column->getData().begin(),
-            [](const auto x) { return x == Ternary::True; });
+    for (size_t i = 0; i < rows_count; ++i)
+        new_column->getData()[i] = (ternary_data[i] == Ternary::True);
 
     if (!make_nullable)
         return new_column;
 
     auto null_column = ColumnUInt8::create(rows_count);
-    std::transform(
-            ternary_data.cbegin(), ternary_data.cend(), null_column->getData().begin(),
-            [](const auto x) { return x == Ternary::Null; });
+    for (size_t i = 0; i < rows_count; ++i)
+        null_column->getData()[i] = (ternary_data[i] == Ternary::Null);
 
     return ColumnNullable::create(std::move(new_column), std::move(null_column));
 }
@@ -68,13 +67,14 @@ MutableColumnPtr buildColumnFromTernaryData(const UInt8Container & ternary_data,
 template <typename T>
 bool tryConvertColumnToBool(const IColumn * column, UInt8Container & res)
 {
-    const auto col = checkAndGetColumn<ColumnVector<T>>(column);
-    if (!col)
+    const auto column_typed = checkAndGetColumn<ColumnVector<T>>(column);
+    if (!column_typed)
         return false;
 
-    std::transform(
-            col->getData().cbegin(), col->getData().cend(), res.begin(),
-            [](const auto x) { return !!x; });
+    auto & data = column_typed->getData();
+    size_t data_size = data.size();
+    for (size_t i = 0; i < data_size; ++i)
+        res[i] = static_cast<bool>(data[i]);
 
     return true;
 }
@@ -99,7 +99,7 @@ bool extractConstColumns(ColumnRawPtrs & in, UInt8 & res, Func && func)
 {
     bool has_res = false;
 
-    for (int i = static_cast<int>(in.size()) - 1; i >= 0; --i)
+    for (Int64 i = static_cast<Int64>(in.size()) - 1; i >= 0; --i)
     {
         UInt8 x;
 
@@ -458,7 +458,9 @@ ColumnPtr basicExecuteImpl(ColumnRawPtrs arguments, size_t input_rows_count)
     for (const IColumn * column : arguments)
     {
         if (const auto * uint8_column = checkAndGetColumn<ColumnUInt8>(column))
+        {
             uint8_args.push_back(uint8_column);
+        }
         else
         {
             auto converted_column = ColumnUInt8::create(input_rows_count);
@@ -596,14 +598,14 @@ ColumnPtr FunctionAnyArityLogical<Impl, Name>::executeShortCircuit(ColumnsWithTy
     if (nulls)
         applyTernaryLogic<Name>(mask, *nulls);
 
-    MutableColumnPtr res = ColumnUInt8::create();
-    typeid_cast<ColumnUInt8 *>(res.get())->getData() = std::move(mask);
+    auto res = ColumnUInt8::create();
+    res->getData() = std::move(mask);
 
     if (!nulls)
         return res;
 
-    MutableColumnPtr bytemap = ColumnUInt8::create();
-    typeid_cast<ColumnUInt8 *>(bytemap.get())->getData() = std::move(*nulls);
+    auto bytemap = ColumnUInt8::create();
+    bytemap->getData() = std::move(*nulls);
     return ColumnNullable::create(std::move(res), std::move(bytemap));
 }
 
@@ -692,29 +694,14 @@ ColumnPtr FunctionAnyArityLogical<Impl, Name>::getConstantResultForNonConstArgum
     return result_column;
 }
 
-template <typename A, typename Op>
-struct UnaryOperationImpl
-{
-    using ResultType = typename Op::ResultType;
-    using ArrayA = typename ColumnVector<A>::Container;
-    using ArrayC = typename ColumnVector<ResultType>::Container;
-
-    static void NO_INLINE vector(const ArrayA & a, ArrayC & c)
-    {
-        std::transform(
-                a.cbegin(), a.cend(), c.begin(),
-                [](const auto x) { return Op::apply(x); });
-    }
-};
-
 template <template <typename> class Impl, typename Name>
 DataTypePtr FunctionUnaryLogical<Impl, Name>::getReturnTypeImpl(const DataTypes & arguments) const
 {
     if (!isNativeNumber(arguments[0]))
-        throw Exception("Illegal type ("
-            + arguments[0]->getName()
-            + ") of argument of function " + getName(),
-            ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+            "Illegal type ({}) of argument of function {}",
+            arguments[0]->getName(),
+            getName());
 
     return isBool(arguments[0]) ? DataTypeFactory::instance().get("Bool") : std::make_shared<DataTypeUInt8>();
 }
@@ -724,10 +711,9 @@ ColumnPtr functionUnaryExecuteType(const ColumnsWithTypeAndName & arguments)
 {
     if (auto col = checkAndGetColumn<ColumnVector<T>>(arguments[0].column.get()))
     {
-        auto col_res = ColumnUInt8::create();
+        auto col_res = ColumnUInt8::create(col->getData().size());
+        auto & vec_res = col_res->getData();
 
-        typename ColumnUInt8::Container & vec_res = col_res->getData();
-        vec_res.resize(col->getData().size());
         UnaryOperationImpl<T, Impl<T>>::vector(col->getData(), vec_res);
 
         return col_res;
@@ -750,9 +736,10 @@ ColumnPtr FunctionUnaryLogical<Impl, Name>::executeImpl(const ColumnsWithTypeAnd
         || (res = functionUnaryExecuteType<Impl, Int64>(arguments))
         || (res = functionUnaryExecuteType<Impl, Float32>(arguments))
         || (res = functionUnaryExecuteType<Impl, Float64>(arguments))))
-       throw Exception("Illegal column " + arguments[0].column->getName()
-            + " of argument of function " + getName(),
-            ErrorCodes::ILLEGAL_COLUMN);
+       throw Exception(ErrorCodes::ILLEGAL_COLUMN,
+            "Illegal column {} of argument of function {}",
+            arguments[0].column->getName(),
+            getName());
 
     return res;
 }
diff --git a/tests/performance/unary_logical_functions.xml b/tests/performance/unary_logical_functions.xml
new file mode 100644
index 00000000000..86b173fac31
--- /dev/null
+++ b/tests/performance/unary_logical_functions.xml
@@ -0,0 +1,31 @@
+<test>
+    <substitutions>
+         <substitution>
+           <name>func</name>
+           <values>
+               <value>not</value>
+           </values>
+        </substitution>
+
+        <substitution>
+            <name>expr</name>
+
+            <values>
+                <value>number</value>
+                <value>toUInt32(number)</value>
+                <value>toUInt16(number)</value>
+                <value>toUInt8(number)</value>
+
+                <value>toInt64(number)</value>
+                <value>toInt32(number)</value>
+                <value>toInt16(number)</value>
+                <value>toInt8(number)</value>
+
+                <value>toFloat64(number)</value>
+                <value>toFloat32(number)</value>
+            </values>
+        </substitution>
+    </substitutions>
+
+    <query>SELECT {func}({expr}) FROM numbers(1000000000) FORMAT Null</query>
+</test>

From 140f3633d8db48b9cf7d321a5e987714a513970e Mon Sep 17 00:00:00 2001
From: Jachen Duschletta <jcduschletta@gmail.com>
Date: Tue, 14 Jun 2022 12:26:44 -0500
Subject: [PATCH 17/22] Small typo fix

---
 .../en/sql-reference/aggregate-functions/reference/varsamp.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/aggregate-functions/reference/varsamp.md b/docs/en/sql-reference/aggregate-functions/reference/varsamp.md
index 844b2006c91..9c0636ad1b4 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/varsamp.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/varsamp.md
@@ -6,10 +6,10 @@ sidebar_position: 33
 
 Calculates the amount `Σ((x - x̅)^2) / (n - 1)`, where `n` is the sample size and `x̅`is the average value of `x`.
 
-It represents an unbiased estimate of the variance of a random variable if passed values form its sample.
+It represents an unbiased estimate of the variance of a random variable if passed values from its sample.
 
 Returns `Float64`. When `n <= 1`, returns `+∞`.
 
 :::note    
 This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varSampStable` function. It works slower but provides a lower computational error.
-:::
\ No newline at end of file
+:::

From 0957c885e25a3ce8f9d18a1344010d457c20827d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 14 Jun 2022 23:36:16 +0200
Subject: [PATCH 18/22] Remove -0. from CPU usage in the client

---
 src/Client/ClientBase.cpp         | 11 +++++------
 src/Common/ProgressIndication.cpp |  6 +++++-
 src/Common/ProgressIndication.h   |  8 ++++----
 3 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index b586979b546..733c2d6b4df 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -950,18 +950,17 @@ void ClientBase::onProfileEvents(Block & block)
                 progress_indication.addThreadIdToList(host_name, thread_id);
             auto event_name = names.getDataAt(i);
             auto value = array_values[i];
+
+            /// Ignore negative time delta or memory usage just in case.
+            if (value < 0)
+                continue;
+
             if (event_name == user_time_name)
-            {
                 thread_times[host_name][thread_id].user_ms = value;
-            }
             else if (event_name == system_time_name)
-            {
                 thread_times[host_name][thread_id].system_ms = value;
-            }
             else if (event_name == MemoryTracker::USAGE_EVENT_NAME)
-            {
                 thread_times[host_name][thread_id].memory_usage = value;
-            }
         }
         auto elapsed_time = profile_events.watch.elapsedMicroseconds();
         progress_indication.updateThreadEventData(thread_times, elapsed_time);
diff --git a/src/Common/ProgressIndication.cpp b/src/Common/ProgressIndication.cpp
index ffc90807060..315080115a6 100644
--- a/src/Common/ProgressIndication.cpp
+++ b/src/Common/ProgressIndication.cpp
@@ -19,7 +19,7 @@ namespace
     double calculateCPUUsage(DB::ThreadIdToTimeMap times, UInt64 elapsed)
     {
         auto accumulated = std::accumulate(times.begin(), times.end(), 0,
-        [](Int64 acc, const auto & elem)
+        [](UInt64 acc, const auto & elem)
         {
             if (elem.first == ALL_THREADS)
                 return acc;
@@ -191,6 +191,10 @@ void ProgressIndication::writeProgress()
     {
         WriteBufferFromOwnString profiling_msg_builder;
 
+        /// We don't want -0. that can appear due to rounding errors.
+        if (cpu_usage <= 0)
+            cpu_usage = 0;
+
         profiling_msg_builder << "(" << fmt::format("{:.1f}", cpu_usage) << " CPU";
 
         if (memory_usage > 0)
diff --git a/src/Common/ProgressIndication.h b/src/Common/ProgressIndication.h
index 9db9af96b49..d44becc416a 100644
--- a/src/Common/ProgressIndication.h
+++ b/src/Common/ProgressIndication.h
@@ -16,11 +16,11 @@ namespace DB
 
 struct ThreadEventData
 {
-    Int64 time() const noexcept { return user_ms + system_ms; }
+    UInt64 time() const noexcept { return user_ms + system_ms; }
 
-    Int64 user_ms      = 0;
-    Int64 system_ms    = 0;
-    Int64 memory_usage = 0;
+    UInt64 user_ms      = 0;
+    UInt64 system_ms    = 0;
+    UInt64 memory_usage = 0;
 };
 
 using ThreadIdToTimeMap = std::unordered_map<UInt64, ThreadEventData>;

From 411695bd97ee09eaeda1c6e3b1dd0f37ec4fdb15 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Tue, 14 Jun 2022 22:26:50 -0400
Subject: [PATCH 19/22] do not fill 'to' boundary

---
 src/Interpreters/FillingRow.cpp                          | 2 +-
 .../0_stateless/00995_order_by_with_fill.reference       | 5 -----
 .../0_stateless/02019_multiple_weird_with_fill.reference | 9 ---------
 .../0_stateless/02112_with_fill_interval.reference       | 4 ----
 4 files changed, 1 insertion(+), 19 deletions(-)

diff --git a/src/Interpreters/FillingRow.cpp b/src/Interpreters/FillingRow.cpp
index bb8661d0ef9..561ac255326 100644
--- a/src/Interpreters/FillingRow.cpp
+++ b/src/Interpreters/FillingRow.cpp
@@ -76,7 +76,7 @@ bool FillingRow::next(const FillingRow & to_row)
     auto next_value = row[pos];
     getFillDescription(pos).step_func(next_value);
 
-    if (less(to_row.row[pos], next_value, getDirection(pos)))
+    if (less(to_row.row[pos], next_value, getDirection(pos)) || equals(next_value, getFillDescription(pos).fill_to))
         return false;
 
     row[pos] = next_value;
diff --git a/tests/queries/0_stateless/00995_order_by_with_fill.reference b/tests/queries/0_stateless/00995_order_by_with_fill.reference
index adb0e1aa2c3..0036aabda40 100644
--- a/tests/queries/0_stateless/00995_order_by_with_fill.reference
+++ b/tests/queries/0_stateless/00995_order_by_with_fill.reference
@@ -376,11 +376,6 @@
 2019-05-03	4	
 2019-05-03	1	
 2019-05-03	-2	
-2019-05-01	10	
-2019-05-01	7	
-2019-05-01	4	
-2019-05-01	1	
-2019-05-01	-2	
 *** date WITH FILL TO 2019-06-23 STEP 3, val WITH FILL FROM -10 STEP 2
 2019-05-07	-10	
 2019-05-07	-8	
diff --git a/tests/queries/0_stateless/02019_multiple_weird_with_fill.reference b/tests/queries/0_stateless/02019_multiple_weird_with_fill.reference
index 822d290564a..703dd6e6aac 100644
--- a/tests/queries/0_stateless/02019_multiple_weird_with_fill.reference
+++ b/tests/queries/0_stateless/02019_multiple_weird_with_fill.reference
@@ -34,12 +34,3 @@
 6	-4	
 6	-3	
 6	-2	
-7	-10	
-7	-9	
-7	-8	
-7	-7	
-7	-6	
-7	-5	
-7	-4	
-7	-3	
-7	-2	
diff --git a/tests/queries/0_stateless/02112_with_fill_interval.reference b/tests/queries/0_stateless/02112_with_fill_interval.reference
index 4bb99803eb1..a1b1d3b0d4d 100644
--- a/tests/queries/0_stateless/02112_with_fill_interval.reference
+++ b/tests/queries/0_stateless/02112_with_fill_interval.reference
@@ -103,10 +103,6 @@
 2020-04-01	2	0
 2020-04-01	3	0
 2020-04-01	4	0
-2020-05-01	1	0
-2020-05-01	2	0
-2020-05-01	3	0
-2020-05-01	4	0
 1970-01-04
 1970-01-03
 1970-01-02

From dcfe4eea3ce895b1f9a4b12f0f508d19314e984b Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 15 Jun 2022 12:53:53 +0300
Subject: [PATCH 20/22] Update 01417_freeze_partition_verbose.sh

---
 tests/queries/0_stateless/01417_freeze_partition_verbose.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01417_freeze_partition_verbose.sh b/tests/queries/0_stateless/01417_freeze_partition_verbose.sh
index 38f84c2fa15..1f67100a4b6 100755
--- a/tests/queries/0_stateless/01417_freeze_partition_verbose.sh
+++ b/tests/queries/0_stateless/01417_freeze_partition_verbose.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: no-replicated-database, no-parallel
+# Tags: no-replicated-database, no-parallel, no-ordinary-database
 # Tag no-replicated-database: Unsupported type of ALTER query
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)

From 56f2121c1a57ee2069e27ebb5b153e1bdbcccd17 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@gmail.com>
Date: Wed, 15 Jun 2022 13:04:01 +0300
Subject: [PATCH 21/22] Revert "Add backoff to merges in replicated queue if
 `always_fetch_merged_part` is enabled"

---
 .../MergeTree/ReplicatedMergeTreeQueue.cpp    | 21 -------------------
 1 file changed, 21 deletions(-)

diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
index ebd34474066..9f679f121b8 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
@@ -1256,27 +1256,6 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
         {
             ignore_max_size = max_source_parts_size == data_settings->max_bytes_to_merge_at_max_space_in_pool;
 
-            if (data_settings->always_fetch_merged_part && entry.num_tries > 0)
-            {
-                static constexpr auto MAX_SECONDS_TO_WAIT = 300L;
-                static constexpr auto BACKOFF_SECONDS = 3;
-
-                auto time_to_wait_seconds = std::min<int64_t>(MAX_SECONDS_TO_WAIT, entry.num_tries * BACKOFF_SECONDS);
-                auto time_since_last_try_seconds = std::time(nullptr) - entry.last_attempt_time;
-                /// Otherwise we will constantly look for part on other replicas
-                /// and load zookeeper too much.
-                if (time_to_wait_seconds > time_since_last_try_seconds)
-                {
-                    out_postpone_reason = fmt::format(
-                        "Not executing log entry ({}) to merge parts for part {} because `always_fetch_merged_part` enabled and "
-                        " not enough time had been passed since last try, have to wait {} seconds",
-                        entry.znode_name, entry.new_part_name, time_to_wait_seconds - time_since_last_try_seconds);
-
-                    LOG_DEBUG(log, fmt::runtime(out_postpone_reason));
-                    return false;
-                }
-            }
-
             if (isTTLMergeType(entry.merge_type))
             {
                 if (merger_mutator.ttl_merges_blocker.isCancelled())

From c8afeafe0e974a0070f04a056705fabc75cc998e Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Wed, 15 Jun 2022 12:44:20 +0200
Subject: [PATCH 22/22] More parallel execution for queries with `FINAL`
 (#36396)

---
 src/Processors/Merges/IMergingTransform.cpp   |  65 -----
 src/Processors/Merges/IMergingTransform.h     |  10 -
 src/Processors/QueryPlan/IQueryPlanStep.cpp   |   3 +
 src/Processors/QueryPlan/PartsSplitter.cpp    | 274 ++++++++++++++++++
 src/Processors/QueryPlan/PartsSplitter.h      |  25 ++
 .../QueryPlan/ReadFromMergeTree.cpp           | 138 ++++-----
 .../Transforms/AddingSelectorTransform.cpp    |  76 -----
 .../Transforms/AddingSelectorTransform.h      |  26 --
 .../Transforms/FilterSortedStreamByRange.h    |  66 +++++
 src/Processors/Transforms/FilterTransform.h   |   1 -
 src/Processors/Transforms/SelectorInfo.h      |  14 -
 src/QueryPipeline/printPipeline.h             |   5 +-
 tests/performance/parallel_final.xml          |   5 +
 .../01861_explain_pipeline.reference          |  17 +-
 .../02286_parallel_final.reference            |   9 +
 .../0_stateless/02286_parallel_final.sh       |  31 ++
 16 files changed, 485 insertions(+), 280 deletions(-)
 create mode 100644 src/Processors/QueryPlan/PartsSplitter.cpp
 create mode 100644 src/Processors/QueryPlan/PartsSplitter.h
 delete mode 100644 src/Processors/Transforms/AddingSelectorTransform.cpp
 delete mode 100644 src/Processors/Transforms/AddingSelectorTransform.h
 create mode 100644 src/Processors/Transforms/FilterSortedStreamByRange.h
 delete mode 100644 src/Processors/Transforms/SelectorInfo.h
 create mode 100644 tests/queries/0_stateless/02286_parallel_final.reference
 create mode 100755 tests/queries/0_stateless/02286_parallel_final.sh

diff --git a/src/Processors/Merges/IMergingTransform.cpp b/src/Processors/Merges/IMergingTransform.cpp
index f09c7c5339f..226f55b3e92 100644
--- a/src/Processors/Merges/IMergingTransform.cpp
+++ b/src/Processors/Merges/IMergingTransform.cpp
@@ -1,5 +1,4 @@
 #include <Processors/Merges/IMergingTransform.h>
-#include <Processors/Transforms/SelectorInfo.h>
 
 namespace DB
 {
@@ -181,68 +180,4 @@ IProcessor::Status IMergingTransformBase::prepare()
     return Status::Ready;
 }
 
-static void filterChunk(IMergingAlgorithm::Input & input, size_t selector_position)
-{
-    if (!input.chunk.getChunkInfo())
-        throw Exception("IMergingTransformBase expected ChunkInfo for input chunk", ErrorCodes::LOGICAL_ERROR);
-
-    const auto * chunk_info = typeid_cast<const SelectorInfo *>(input.chunk.getChunkInfo().get());
-    if (!chunk_info)
-        throw Exception("IMergingTransformBase expected SelectorInfo for input chunk", ErrorCodes::LOGICAL_ERROR);
-
-    const auto & selector = chunk_info->selector;
-
-    IColumn::Filter filter;
-    filter.resize_fill(selector.size());
-
-    size_t num_rows = input.chunk.getNumRows();
-    auto columns = input.chunk.detachColumns();
-
-    size_t num_result_rows = 0;
-
-    for (size_t row = 0; row < num_rows; ++row)
-    {
-        if (selector[row] == selector_position)
-        {
-            ++num_result_rows;
-            filter[row] = 1;
-        }
-    }
-
-    if (!filter.empty() && filter.back() == 0)
-    {
-        filter.back() = 1;
-        ++num_result_rows;
-        input.skip_last_row = true;
-    }
-
-    for (auto & column : columns)
-        column = column->filter(filter, num_result_rows);
-
-    input.chunk.clear();
-    input.chunk.setColumns(std::move(columns), num_result_rows);
-}
-
-void IMergingTransformBase::filterChunks()
-{
-    if (state.selector_position < 0)
-        return;
-
-    if (!state.init_chunks.empty())
-    {
-        for (size_t i = 0; i < input_states.size(); ++i)
-        {
-            auto & input = state.init_chunks[i];
-            if (!input.chunk)
-                continue;
-
-            filterChunk(input, state.selector_position);
-        }
-    }
-
-    if (state.has_input)
-        filterChunk(state.input_chunk, state.selector_position);
-}
-
-
 }
diff --git a/src/Processors/Merges/IMergingTransform.h b/src/Processors/Merges/IMergingTransform.h
index ea6f6aed37f..144c47c96f5 100644
--- a/src/Processors/Merges/IMergingTransform.h
+++ b/src/Processors/Merges/IMergingTransform.h
@@ -28,17 +28,10 @@ public:
 
     Status prepare() override;
 
-    /// Set position which will be used in selector if input chunk has attached SelectorInfo (see SelectorInfo.h).
-    /// Columns will be filtered, keep only rows labeled with this position.
-    /// It is used in parallel final.
-    void setSelectorPosition(size_t position) { state.selector_position = position; }
-
 protected:
     virtual void onNewInput(); /// Is called when new input is added. Only if have_all_inputs = false.
     virtual void onFinish() {} /// Is called when all data is processed.
 
-    void filterChunks(); /// Filter chunks if selector position was set. For parallel final.
-
     /// Processor state.
     struct State
     {
@@ -50,7 +43,6 @@ protected:
         size_t next_input_to_read = 0;
 
         IMergingAlgorithm::Inputs init_chunks;
-        ssize_t selector_position = -1;
     };
 
     State state;
@@ -92,8 +84,6 @@ public:
 
     void work() override
     {
-        filterChunks();
-
         if (!state.init_chunks.empty())
             algorithm.initialize(std::move(state.init_chunks));
 
diff --git a/src/Processors/QueryPlan/IQueryPlanStep.cpp b/src/Processors/QueryPlan/IQueryPlanStep.cpp
index f06897e8488..b36d1f0e12f 100644
--- a/src/Processors/QueryPlan/IQueryPlanStep.cpp
+++ b/src/Processors/QueryPlan/IQueryPlanStep.cpp
@@ -86,6 +86,9 @@ static void doDescribeProcessor(const IProcessor & processor, size_t count, IQue
             doDescribeHeader(*last_header, num_equal_headers, settings);
     }
 
+    if (!processor.getDescription().empty())
+        settings.out << String(settings.offset, settings.indent_char) << "Description: " << processor.getDescription() << '\n';
+
     settings.offset += settings.indent;
 }
 
diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp
new file mode 100644
index 00000000000..25574c6dcc5
--- /dev/null
+++ b/src/Processors/QueryPlan/PartsSplitter.cpp
@@ -0,0 +1,274 @@
+#include <algorithm>
+#include <memory>
+#include <numeric>
+#include <queue>
+#include <unordered_map>
+#include <vector>
+
+#include <Core/Field.h>
+#include <Interpreters/ExpressionAnalyzer.h>
+#include <Interpreters/TreeRewriter.h>
+#include <Parsers/ASTFunction.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Processors/QueryPlan/PartsSplitter.h>
+#include <Processors/Transforms/FilterSortedStreamByRange.h>
+#include <Storages/MergeTree/RangesInDataPart.h>
+
+using namespace DB;
+
+namespace
+{
+
+using Value = std::vector<Field>;
+
+std::string toString(const Value & value)
+{
+    return fmt::format("({})", fmt::join(value, ", "));
+}
+
+/// Adaptor to access PK values from index.
+class IndexAccess
+{
+public:
+    explicit IndexAccess(const RangesInDataParts & parts_) : parts(parts_) { }
+
+    Value getValue(size_t part_idx, size_t mark) const
+    {
+        const auto & index = parts[part_idx].data_part->index;
+        Value value(index.size());
+        for (size_t i = 0; i < value.size(); ++i)
+            index[i]->get(mark, value[i]);
+        return value;
+    }
+
+    size_t getMarkRows(size_t part_idx, size_t mark) const { return parts[part_idx].data_part->index_granularity.getMarkRows(mark); }
+
+    size_t getTotalRowCount() const
+    {
+        size_t total = 0;
+        for (const auto & part : parts)
+            total += part.getRowsCount();
+        return total;
+    }
+
+private:
+    const RangesInDataParts & parts;
+};
+
+
+/// Splits parts into layers, each layer will contain parts subranges with PK values from its own range.
+/// Will try to produce exactly max_layer layers but may return less if data is distributed in not a very parallelizable way.
+std::pair<std::vector<Value>, std::vector<RangesInDataParts>> split(RangesInDataParts parts, size_t max_layers)
+{
+    // We will advance the iterator pointing to the mark with the smallest PK value until there will be not less than rows_per_layer rows in the current layer (roughly speaking).
+    // Then we choose the last observed value as the new border, so the current layer will consists of granules with values greater than the previous mark and less or equal
+    // than the new border.
+
+    struct PartsRangesIterator
+    {
+        struct RangeInDataPart : MarkRange
+        {
+            size_t part_idx;
+        };
+
+        enum class EventType
+        {
+            RangeBeginning,
+            RangeEnding,
+        };
+
+        bool operator<(const PartsRangesIterator & other) const { return std::tie(value, event) > std::tie(other.value, other.event); }
+
+        Value value;
+        RangeInDataPart range;
+        EventType event;
+    };
+
+    const auto index_access = std::make_unique<IndexAccess>(parts);
+    std::priority_queue<PartsRangesIterator> parts_ranges_queue;
+    for (size_t part_idx = 0; part_idx < parts.size(); ++part_idx)
+    {
+        for (const auto & range : parts[part_idx].ranges)
+        {
+            parts_ranges_queue.push(
+                {index_access->getValue(part_idx, range.begin), {range, part_idx}, PartsRangesIterator::EventType::RangeBeginning});
+            const auto & index_granularity = parts[part_idx].data_part->index_granularity;
+            if (index_granularity.hasFinalMark() && range.end + 1 == index_granularity.getMarksCount())
+                parts_ranges_queue.push(
+                    {index_access->getValue(part_idx, range.end), {range, part_idx}, PartsRangesIterator::EventType::RangeEnding});
+        }
+    }
+
+    /// The beginning of currently started (but not yet finished) range of marks of a part in the current layer.
+    std::unordered_map<size_t, size_t> current_part_range_begin;
+    /// The current ending of a range of marks of a part in the current layer.
+    std::unordered_map<size_t, size_t> current_part_range_end;
+
+    /// Determine borders between layers.
+    std::vector<Value> borders;
+    std::vector<RangesInDataParts> result_layers;
+
+    const size_t rows_per_layer = std::max<size_t>(index_access->getTotalRowCount() / max_layers, 1);
+
+    while (!parts_ranges_queue.empty())
+    {
+        // New layer should include last granules of still open ranges from the previous layer,
+        // because they may already contain values greater than the last border.
+        size_t rows_in_current_layer = 0;
+        size_t marks_in_current_layer = 0;
+
+        // Intersection between the current and next layers is just the last observed marks of each still open part range. Ratio is empirical.
+        auto layers_intersection_is_too_big = [&]()
+        {
+            const auto intersected_parts = current_part_range_end.size();
+            return marks_in_current_layer < intersected_parts * 2;
+        };
+
+        result_layers.emplace_back();
+
+        while (rows_in_current_layer < rows_per_layer || layers_intersection_is_too_big() || result_layers.size() == max_layers)
+        {
+            // We're advancing iterators until a new value showed up.
+            Value last_value;
+            while (!parts_ranges_queue.empty() && (last_value.empty() || last_value == parts_ranges_queue.top().value))
+            {
+                auto current = parts_ranges_queue.top();
+                parts_ranges_queue.pop();
+                const auto part_idx = current.range.part_idx;
+
+                if (current.event == PartsRangesIterator::EventType::RangeEnding)
+                {
+                    result_layers.back().emplace_back(
+                        parts[part_idx].data_part,
+                        parts[part_idx].part_index_in_query,
+                        MarkRanges{{current_part_range_begin[part_idx], current.range.end}});
+                    current_part_range_begin.erase(part_idx);
+                    current_part_range_end.erase(part_idx);
+                    continue;
+                }
+
+                last_value = std::move(current.value);
+                rows_in_current_layer += index_access->getMarkRows(part_idx, current.range.begin);
+                marks_in_current_layer++;
+                current_part_range_begin.try_emplace(part_idx, current.range.begin);
+                current_part_range_end[part_idx] = current.range.begin;
+                if (current.range.begin + 1 < current.range.end)
+                {
+                    current.range.begin++;
+                    current.value = index_access->getValue(part_idx, current.range.begin);
+                    parts_ranges_queue.push(std::move(current));
+                }
+            }
+            if (parts_ranges_queue.empty())
+                break;
+            if (rows_in_current_layer >= rows_per_layer && !layers_intersection_is_too_big() && result_layers.size() < max_layers)
+                borders.push_back(last_value);
+        }
+        for (const auto & [part_idx, last_mark] : current_part_range_end)
+        {
+            result_layers.back().emplace_back(
+                parts[part_idx].data_part,
+                parts[part_idx].part_index_in_query,
+                MarkRanges{{current_part_range_begin[part_idx], last_mark + 1}});
+            current_part_range_begin[part_idx] = current_part_range_end[part_idx];
+        }
+    }
+    for (auto & layer : result_layers)
+    {
+        std::stable_sort(
+            layer.begin(),
+            layer.end(),
+            [](const auto & lhs, const auto & rhs) { return lhs.part_index_in_query < rhs.part_index_in_query; });
+    }
+
+    return std::make_pair(std::move(borders), std::move(result_layers));
+}
+
+
+/// Will return borders.size()+1 filters in total, i-th filter will accept rows with PK values within the range [borders[i-1], borders[i]).
+std::vector<ASTPtr> buildFilters(const KeyDescription & primary_key, const std::vector<Value> & borders)
+{
+    auto add_and_condition = [&](ASTPtr & result, const ASTPtr & foo) { result = !result ? foo : makeASTFunction("and", result, foo); };
+
+    /// Produces ASTPtr to predicate (pk_col0, pk_col1, ... , pk_colN) > (value[0], value[1], ... , value[N])
+    auto lexicographically_greater = [&](const Value & value)
+    {
+        // PK may contain functions of the table columns, so we need the actual PK AST with all expressions it contains.
+        ASTPtr pk_columns_as_tuple = makeASTFunction("tuple", primary_key.expression_list_ast->children);
+
+        ASTPtr value_ast = std::make_shared<ASTExpressionList>();
+        for (size_t i = 0; i < value.size(); ++i)
+        {
+            const auto & types = primary_key.data_types;
+            ASTPtr component_ast = std::make_shared<ASTLiteral>(value[i]);
+            // Values of some types (e.g. Date, DateTime) are stored in columns as numbers and we get them as just numbers from the index.
+            // So we need an explicit Cast for them.
+            if (isColumnedAsNumber(types.at(i)->getTypeId()) && !isNumber(types.at(i)->getTypeId()))
+                component_ast = makeASTFunction("cast", std::move(component_ast), std::make_shared<ASTLiteral>(types.at(i)->getName()));
+            value_ast->children.push_back(std::move(component_ast));
+        }
+        ASTPtr value_as_tuple = makeASTFunction("tuple", value_ast->children);
+
+        return makeASTFunction("greater", pk_columns_as_tuple, value_as_tuple);
+    };
+
+    std::vector<ASTPtr> filters(borders.size() + 1);
+    for (size_t layer = 0; layer <= borders.size(); ++layer)
+    {
+        if (layer > 0)
+            add_and_condition(filters[layer], lexicographically_greater(borders[layer - 1]));
+        if (layer < borders.size())
+            add_and_condition(filters[layer], makeASTFunction("not", lexicographically_greater(borders[layer])));
+    }
+    return filters;
+}
+}
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+Pipes buildPipesForReadingByPKRanges(
+    const KeyDescription & primary_key,
+    RangesInDataParts parts,
+    size_t max_layers,
+    ContextPtr context,
+    ReadingInOrderStepGetter && reading_step_getter)
+{
+    if (max_layers <= 1)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "max_layer should be greater than 1.");
+
+    auto && [borders, result_layers] = split(std::move(parts), max_layers);
+    auto filters = buildFilters(primary_key, borders);
+    Pipes pipes(result_layers.size());
+    for (size_t i = 0; i < result_layers.size(); ++i)
+    {
+        pipes[i] = reading_step_getter(std::move(result_layers[i]));
+        auto & filter_function = filters[i];
+        if (!filter_function)
+            continue;
+        auto syntax_result = TreeRewriter(context).analyze(filter_function, primary_key.expression->getRequiredColumnsWithTypes());
+        auto actions = ExpressionAnalyzer(filter_function, syntax_result, context).getActionsDAG(false);
+        ExpressionActionsPtr expression_actions = std::make_shared<ExpressionActions>(std::move(actions));
+        auto description = fmt::format(
+            "filter values in [{}, {})", i ? ::toString(borders[i - 1]) : "-inf", i < borders.size() ? ::toString(borders[i]) : "+inf");
+        auto pk_expression = std::make_shared<ExpressionActions>(primary_key.expression->getActionsDAG().clone());
+        pipes[i].addSimpleTransform([pk_expression](const Block & header)
+                                    { return std::make_shared<ExpressionTransform>(header, pk_expression); });
+        pipes[i].addSimpleTransform(
+            [&](const Block & header)
+            {
+                auto step = std::make_shared<FilterSortedStreamByRange>(header, expression_actions, filter_function->getColumnName(), true);
+                step->setDescription(description);
+                return step;
+            });
+    }
+    return pipes;
+}
+
+}
diff --git a/src/Processors/QueryPlan/PartsSplitter.h b/src/Processors/QueryPlan/PartsSplitter.h
new file mode 100644
index 00000000000..56bca688c2d
--- /dev/null
+++ b/src/Processors/QueryPlan/PartsSplitter.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include <functional>
+
+#include <Interpreters/Context_fwd.h>
+#include <QueryPipeline/Pipe.h>
+#include <Storages/KeyDescription.h>
+#include <Storages/MergeTree/RangesInDataPart.h>
+
+
+namespace DB
+{
+
+using ReadingInOrderStepGetter = std::function<Pipe(RangesInDataParts)>;
+
+/// Splits parts into layers, each layer will contain parts subranges with PK values from its own range.
+/// A separate pipe will be constructed for each layer with a reading step (provided by the reading_step_getter) and a filter for this layer's range of PK values.
+/// Will try to produce exactly max_layer pipes but may return less if data is distributed in not a very parallelizable way.
+Pipes buildPipesForReadingByPKRanges(
+    const KeyDescription & primary_key,
+    RangesInDataParts parts,
+    size_t max_layers,
+    ContextPtr context,
+    ReadingInOrderStepGetter && reading_step_getter);
+}
diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index 8adaf2f1027..1caee41160f 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -1,14 +1,16 @@
+#include <algorithm>
+#include <functional>
+#include <memory>
+#include <numeric>
+#include <queue>
+#include <stdexcept>
+#include <IO/Operators.h>
+#include <Interpreters/ExpressionAnalyzer.h>
+#include <Interpreters/TreeRewriter.h>
 #include <Parsers/ASTFunction.h>
+#include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTSelectQuery.h>
-#include <Processors/QueryPlan/ReadFromMergeTree.h>
-#include <QueryPipeline/QueryPipelineBuilder.h>
 #include <Processors/ConcatProcessor.h>
-#include <Processors/Transforms/ReverseTransform.h>
-#include <Processors/Transforms/ExpressionTransform.h>
-#include <Processors/Transforms/FilterTransform.h>
-#include <Processors/Transforms/AddingSelectorTransform.h>
-#include <Processors/Transforms/CopyTransform.h>
-#include <Processors/Sources/NullSource.h>
 #include <Processors/Merges/AggregatingSortedTransform.h>
 #include <Processors/Merges/CollapsingSortedTransform.h>
 #include <Processors/Merges/GraphiteRollupSortedTransform.h>
@@ -16,17 +18,22 @@
 #include <Processors/Merges/ReplacingSortedTransform.h>
 #include <Processors/Merges/SummingSortedTransform.h>
 #include <Processors/Merges/VersionedCollapsingTransform.h>
+#include <Processors/QueryPlan/PartsSplitter.h>
+#include <Processors/QueryPlan/ReadFromMergeTree.h>
+#include <Processors/Sources/NullSource.h>
+#include <Processors/Transforms/ExpressionTransform.h>
+#include <Processors/Transforms/FilterTransform.h>
+#include <Processors/Transforms/ReverseTransform.h>
+#include <QueryPipeline/QueryPipelineBuilder.h>
+#include <Storages/MergeTree/MergeTreeDataSelectExecutor.h>
 #include <Storages/MergeTree/MergeTreeInOrderSelectProcessor.h>
+#include <Storages/MergeTree/MergeTreeReadPool.h>
 #include <Storages/MergeTree/MergeTreeReverseSelectProcessor.h>
 #include <Storages/MergeTree/MergeTreeThreadSelectProcessor.h>
-#include <Storages/MergeTree/MergeTreeDataSelectExecutor.h>
-#include <Storages/MergeTree/MergeTreeReadPool.h>
 #include <Storages/VirtualColumnUtils.h>
-#include <IO/Operators.h>
-#include <Interpreters/ExpressionAnalyzer.h>
-#include <Interpreters/TreeRewriter.h>
 #include <Common/logger_useful.h>
 #include <base/sort.h>
+#include <Poco/Logger.h>
 #include <Common/JSONBuilder.h>
 
 namespace ProfileEvents
@@ -560,7 +567,6 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder(
 
 static void addMergingFinal(
     Pipe & pipe,
-    size_t num_output_streams,
     const SortDescription & sort_description,
     MergeTreeData::MergingParams merging_params,
     Names partition_key_columns,
@@ -607,56 +613,7 @@ static void addMergingFinal(
         __builtin_unreachable();
     };
 
-    if (num_output_streams <= 1 || sort_description.empty())
-    {
-        pipe.addTransform(get_merging_processor());
-        return;
-    }
-
-    ColumnNumbers key_columns;
-    key_columns.reserve(sort_description.size());
-    for (const auto & desc : sort_description)
-        key_columns.push_back(header.getPositionByName(desc.column_name));
-
-    pipe.addSimpleTransform([&](const Block & stream_header)
-    {
-        return std::make_shared<AddingSelectorTransform>(stream_header, num_output_streams, key_columns);
-    });
-
-    pipe.transform([&](OutputPortRawPtrs ports)
-    {
-        Processors transforms;
-        std::vector<OutputPorts::iterator> output_ports;
-        transforms.reserve(ports.size() + num_output_streams);
-        output_ports.reserve(ports.size());
-
-        for (auto & port : ports)
-        {
-            auto copier = std::make_shared<CopyTransform>(header, num_output_streams);
-            connect(*port, copier->getInputPort());
-            output_ports.emplace_back(copier->getOutputs().begin());
-            transforms.emplace_back(std::move(copier));
-        }
-
-        for (size_t i = 0; i < num_output_streams; ++i)
-        {
-            auto merge = get_merging_processor();
-            merge->setSelectorPosition(i);
-            auto input = merge->getInputs().begin();
-
-            /// Connect i-th merge with i-th input port of every copier.
-            for (size_t j = 0; j < ports.size(); ++j)
-            {
-                connect(*output_ports[j], *input);
-                ++output_ports[j];
-                ++input;
-            }
-
-            transforms.emplace_back(std::move(merge));
-        }
-
-        return transforms;
-    });
+    pipe.addTransform(get_merging_processor());
 }
 
 
@@ -710,8 +667,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal(
 
     for (size_t range_index = 0; range_index < parts_to_merge_ranges.size() - 1; ++range_index)
     {
-        Pipe pipe;
-
+        Pipes pipes;
         {
             RangesInDataParts new_parts;
 
@@ -738,21 +694,39 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal(
             if (new_parts.empty())
                 continue;
 
-            pipe = read(std::move(new_parts), column_names, ReadFromMergeTree::ReadType::InOrder,
-                num_streams, 0, info.use_uncompressed_cache);
+            if (num_streams > 1 && metadata_for_reading->hasPrimaryKey())
+            {
+                // Let's split parts into layers to ensure data parallelism of final.
+                auto reading_step_getter = [this, &column_names, &info](auto parts)
+                {
+                    return read(
+                        std::move(parts),
+                        column_names,
+                        ReadFromMergeTree::ReadType::InOrder,
+                        1 /* num_streams */,
+                        0 /* min_marks_for_concurrent_read */,
+                        info.use_uncompressed_cache);
+                };
+                pipes = buildPipesForReadingByPKRanges(
+                    metadata_for_reading->getPrimaryKey(), std::move(new_parts), num_streams, context, std::move(reading_step_getter));
+            }
+            else
+            {
+                pipes.emplace_back(read(
+                    std::move(new_parts), column_names, ReadFromMergeTree::ReadType::InOrder, num_streams, 0, info.use_uncompressed_cache));
+            }
 
             /// Drop temporary columns, added by 'sorting_key_expr'
             if (!out_projection)
-                out_projection = createProjection(pipe.getHeader());
+                out_projection = createProjection(pipes.front().getHeader());
         }
 
         auto sorting_expr = std::make_shared<ExpressionActions>(
             metadata_for_reading->getSortingKey().expression->getActionsDAG().clone());
 
-        pipe.addSimpleTransform([sorting_expr](const Block & header)
-        {
-            return std::make_shared<ExpressionTransform>(header, sorting_expr);
-        });
+        for (auto & pipe : pipes)
+            pipe.addSimpleTransform([sorting_expr](const Block & header)
+                                    { return std::make_shared<ExpressionTransform>(header, sorting_expr); });
 
         /// If do_not_merge_across_partitions_select_final is true and there is only one part in partition
         /// with level > 0 then we won't postprocess this part
@@ -760,7 +734,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal(
             std::distance(parts_to_merge_ranges[range_index], parts_to_merge_ranges[range_index + 1]) == 1 &&
             parts_to_merge_ranges[range_index]->data_part->info.level > 0)
         {
-            partition_pipes.emplace_back(std::move(pipe));
+            partition_pipes.emplace_back(Pipe::unitePipes(std::move(pipes)));
             continue;
         }
 
@@ -777,21 +751,21 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal(
         for (size_t i = 0; i < sort_columns_size; ++i)
             sort_description.emplace_back(sort_columns[i], 1, 1);
 
-        addMergingFinal(
-            pipe,
-            std::min<size_t>(num_streams, settings.max_final_threads),
-            sort_description, data.merging_params, partition_key_columns, max_block_size);
+        for (auto & pipe : pipes)
+            addMergingFinal(
+                pipe,
+                sort_description,
+                data.merging_params,
+                partition_key_columns,
+                max_block_size);
 
-        partition_pipes.emplace_back(std::move(pipe));
+        partition_pipes.emplace_back(Pipe::unitePipes(std::move(pipes)));
     }
 
     if (!lonely_parts.empty())
     {
-        RangesInDataParts new_parts;
-
         size_t num_streams_for_lonely_parts = num_streams * lonely_parts.size();
 
-
         const size_t min_marks_for_concurrent_read = MergeTreeDataSelectExecutor::minMarksForConcurrentRead(
             settings.merge_tree_min_rows_for_concurrent_read,
             settings.merge_tree_min_bytes_for_concurrent_read,
diff --git a/src/Processors/Transforms/AddingSelectorTransform.cpp b/src/Processors/Transforms/AddingSelectorTransform.cpp
deleted file mode 100644
index f75a5920072..00000000000
--- a/src/Processors/Transforms/AddingSelectorTransform.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-#include <Processors/Transforms/AddingSelectorTransform.h>
-#include <Processors/Transforms/SelectorInfo.h>
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
-
-AddingSelectorTransform::AddingSelectorTransform(
-    const Block & header, size_t num_outputs_, ColumnNumbers key_columns_)
-    : ISimpleTransform(header, header, false)
-    , num_outputs(num_outputs_)
-    , key_columns(std::move(key_columns_))
-    , hash(0)
-{
-    setInputNotNeededAfterRead(false);
-
-    if (num_outputs <= 1)
-        throw Exception("SplittingByHashTransform expects more than 1 outputs, got " + std::to_string(num_outputs),
-                        ErrorCodes::LOGICAL_ERROR);
-
-    if (key_columns.empty())
-        throw Exception("SplittingByHashTransform cannot split by empty set of key columns",
-                        ErrorCodes::LOGICAL_ERROR);
-
-    for (auto & column : key_columns)
-        if (column >= header.columns())
-            throw Exception("Invalid column number: " + std::to_string(column) +
-                            ". There is only " + std::to_string(header.columns()) + " columns in header",
-                            ErrorCodes::LOGICAL_ERROR);
-}
-
-static void calculateWeakHash32(const Chunk & chunk, const ColumnNumbers & key_columns, WeakHash32 & hash)
-{
-    auto num_rows = chunk.getNumRows();
-    const auto & columns = chunk.getColumns();
-
-    hash.reset(num_rows);
-
-    for (const auto & column_number : key_columns)
-        columns[column_number]->updateWeakHash32(hash);
-}
-
-static IColumn::Selector fillSelector(const WeakHash32 & hash, size_t num_outputs)
-{
-    /// Row from interval [(2^32 / num_outputs) * i, (2^32 / num_outputs) * (i + 1)) goes to bucket with number i.
-
-    const auto & hash_data = hash.getData();
-    size_t num_rows = hash_data.size();
-    IColumn::Selector selector(num_rows);
-
-    for (size_t row = 0; row < num_rows; ++row)
-    {
-        selector[row] = hash_data[row]; /// [0, 2^32)
-        selector[row] *= num_outputs; /// [0, num_outputs * 2^32), selector stores 64 bit values.
-        selector[row] >>= 32u; /// [0, num_outputs)
-    }
-
-    return selector;
-}
-
-void AddingSelectorTransform::transform(Chunk & input_chunk, Chunk & output_chunk)
-{
-    auto chunk_info = std::make_shared<SelectorInfo>();
-
-    calculateWeakHash32(input_chunk, key_columns, hash);
-    chunk_info->selector = fillSelector(hash, num_outputs);
-
-    input_chunk.swap(output_chunk);
-    output_chunk.setChunkInfo(std::move(chunk_info));
-}
-
-}
diff --git a/src/Processors/Transforms/AddingSelectorTransform.h b/src/Processors/Transforms/AddingSelectorTransform.h
deleted file mode 100644
index bad97adfa76..00000000000
--- a/src/Processors/Transforms/AddingSelectorTransform.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#pragma once
-#include <Processors/IProcessor.h>
-#include <Processors/ISimpleTransform.h>
-#include <Core/ColumnNumbers.h>
-#include <Common/WeakHash.h>
-
-namespace DB
-{
-
-/// Add IColumn::Selector to chunk (see SelectorInfo.h).
-/// Selector is filled by formula (WeakHash(key_columns) * num_outputs / MAX_INT).
-class AddingSelectorTransform : public ISimpleTransform
-{
-public:
-    AddingSelectorTransform(const Block & header, size_t num_outputs_, ColumnNumbers key_columns_);
-    String getName() const override { return "AddingSelector"; }
-    void transform(Chunk & input_chunk, Chunk & output_chunk) override;
-
-private:
-    size_t num_outputs;
-    ColumnNumbers key_columns;
-
-    WeakHash32 hash;
-};
-
-}
diff --git a/src/Processors/Transforms/FilterSortedStreamByRange.h b/src/Processors/Transforms/FilterSortedStreamByRange.h
new file mode 100644
index 00000000000..adbc0626abb
--- /dev/null
+++ b/src/Processors/Transforms/FilterSortedStreamByRange.h
@@ -0,0 +1,66 @@
+#pragma once
+
+#include <Interpreters/ExpressionActions.h>
+#include <Processors/ISimpleTransform.h>
+#include <Processors/Transforms/ExpressionTransform.h>
+#include <Processors/Transforms/FilterTransform.h>
+
+namespace DB
+{
+
+/// Could be used when the predicate given by expression_ is true only on one continuous range of values and input is monotonous by that value.
+/// The following optimization applies: when a new chunk of data comes in we firstly execute the expression_ only on the first and the last row.
+/// If it evaluates to true on both rows then the whole chunk is immediately passed to further steps.
+/// Otherwise, we apply the expression_ to all rows.
+class FilterSortedStreamByRange : public ISimpleTransform
+{
+public:
+    FilterSortedStreamByRange(
+        const Block & header_,
+        ExpressionActionsPtr expression_,
+        String filter_column_name_,
+        bool remove_filter_column_,
+        bool on_totals_ = false)
+        : ISimpleTransform(
+            header_,
+            FilterTransform::transformHeader(header_, expression_->getActionsDAG(), filter_column_name_, remove_filter_column_),
+            true)
+        , filter_transform(header_, expression_, filter_column_name_, remove_filter_column_, on_totals_)
+    {
+    }
+
+    String getName() const override { return "FilterSortedStreamByRange"; }
+
+    void transform(Chunk & chunk) override
+    {
+        int rows_before_filtration = chunk.getNumRows();
+        if (rows_before_filtration < 2)
+        {
+            filter_transform.transform(chunk);
+            return;
+        }
+
+        // Evaluate expression on just the first and the last row.
+        // If both of them satisfies conditions, than skip calculation for all the rows in between.
+        auto quick_check_columns = chunk.cloneEmptyColumns();
+        auto src_columns = chunk.detachColumns();
+        for (auto row : {0, rows_before_filtration - 1})
+            for (size_t col = 0; col < quick_check_columns.size(); ++col)
+                quick_check_columns[col]->insertFrom(*src_columns[col].get(), row);
+        chunk.setColumns(std::move(quick_check_columns), 2);
+        filter_transform.transform(chunk);
+        const bool all_rows_will_pass_filter = chunk.getNumRows() == 2;
+
+        chunk.setColumns(std::move(src_columns), rows_before_filtration);
+
+        // Not all rows satisfy conditions.
+        if (!all_rows_will_pass_filter)
+            filter_transform.transform(chunk);
+    }
+
+private:
+    FilterTransform filter_transform;
+};
+
+
+}
diff --git a/src/Processors/Transforms/FilterTransform.h b/src/Processors/Transforms/FilterTransform.h
index 39f1f1c42db..3340fe230b7 100644
--- a/src/Processors/Transforms/FilterTransform.h
+++ b/src/Processors/Transforms/FilterTransform.h
@@ -32,7 +32,6 @@ public:
 
     Status prepare() override;
 
-protected:
     void transform(Chunk & chunk) override;
 
 private:
diff --git a/src/Processors/Transforms/SelectorInfo.h b/src/Processors/Transforms/SelectorInfo.h
deleted file mode 100644
index 2876d64ed28..00000000000
--- a/src/Processors/Transforms/SelectorInfo.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#pragma once
-#include <Processors/Chunk.h>
-#include <Common/PODArray.h>
-
-namespace DB
-{
-
-/// ChunkInfo with IColumn::Selector. It is added by AddingSelectorTransform.
-struct SelectorInfo : public ChunkInfo
-{
-    IColumn::Selector selector;
-};
-
-}
diff --git a/src/QueryPipeline/printPipeline.h b/src/QueryPipeline/printPipeline.h
index 6ff5fb24c37..4b947149c7c 100644
--- a/src/QueryPipeline/printPipeline.h
+++ b/src/QueryPipeline/printPipeline.h
@@ -28,7 +28,10 @@ void printPipeline(const Processors & processors, const Statuses & statuses, Wri
     /// Nodes // TODO quoting and escaping
     for (const auto & processor : processors)
     {
-        out << "    n" << get_proc_id(*processor) << "[label=\"" << processor->getName() << processor->getDescription();
+        auto description = processor->getDescription();
+        if (!description.empty())
+            description = ": " + description;
+        out << "    n" << get_proc_id(*processor) << "[label=\"" << processor->getName() << description;
 
         if (statuses_iter != statuses.end())
         {
diff --git a/tests/performance/parallel_final.xml b/tests/performance/parallel_final.xml
index 775926d1ee8..ca84ed52a04 100644
--- a/tests/performance/parallel_final.xml
+++ b/tests/performance/parallel_final.xml
@@ -18,6 +18,7 @@
                 <value>collapsing_final_16p_str_keys_rnd</value>
                 <value>collapsing_final_1024p_ord</value>
                 <value>collapsing_final_1024p_rnd</value>
+                <value>collapsing_final_1p_ord</value>
             </values>
         </substitution>
     </substitutions>
@@ -30,6 +31,7 @@
     <create_query>create table collapsing_final_16p_str_keys_rnd (key1 UInt32, key2 String, key3 String, key4 String, key5 String, key6 String, key7 String, key8 String, sign Int8, s UInt64) engine = CollapsingMergeTree(sign) order by (key1, key2, key3, key4, key5, key6, key7, key8) partition by key1 % 16 </create_query>
     <create_query>create table collapsing_final_1024p_ord (key1 UInt32, sign Int8, s UInt64) engine = CollapsingMergeTree(sign) order by (key1) partition by intDiv(key1, 8192 * 2) </create_query>
     <create_query>create table collapsing_final_1024p_rnd (key1 UInt32, sign Int8, s UInt64) engine = CollapsingMergeTree(sign) order by (key1) partition by key1 % 1024 </create_query>
+    <create_query>create table collapsing_final_1p_ord (key1 UInt64, key2 UInt64, sign Int8, s UInt64) engine = CollapsingMergeTree(sign) order by (key1, key2)</create_query>
 
     <!-- 16 parts, 8192 * 1024 rows each -->
     <fill_query>insert into collapsing_final_16p_ord select number, number, 1, number from numbers_mt(8388608) </fill_query>
@@ -43,6 +45,9 @@
     <fill_query>insert into collapsing_final_1024p_ord select number, 1, number from numbers_mt(16777216) </fill_query>
     <fill_query>insert into collapsing_final_1024p_rnd select number, 1, number from numbers_mt(16777216) </fill_query>
 
+    <!-- 1 big part of 5e7 rows -->
+    <fill_query>insert into collapsing_final_1p_ord select number, number + 1, 1, number from numbers_mt(5e7)</fill_query>
+
     <fill_query>optimize table {collapsing} final</fill_query>
 
     <query>SELECT count() FROM {collapsing} final</query>
diff --git a/tests/queries/0_stateless/01861_explain_pipeline.reference b/tests/queries/0_stateless/01861_explain_pipeline.reference
index 2ba294d7e4d..aec3ae06dce 100644
--- a/tests/queries/0_stateless/01861_explain_pipeline.reference
+++ b/tests/queries/0_stateless/01861_explain_pipeline.reference
@@ -16,8 +16,15 @@ ExpressionTransform
 ExpressionTransform × 2
   (ReadFromMergeTree)
   ExpressionTransform × 2
-    ReplacingSorted × 2 2 → 1
-      Copy × 2 1 → 2
-        AddingSelector × 2
-          ExpressionTransform × 2
-            MergeTreeInOrder × 2 0 → 1
+    ReplacingSorted
+      ExpressionTransform
+        FilterSortedStreamByRange
+        Description: filter values in [(5), +inf)
+          ExpressionTransform
+            MergeTreeInOrder 0 → 1
+              ReplacingSorted 2 → 1
+                ExpressionTransform × 2
+                  FilterSortedStreamByRange × 2
+                  Description: filter values in [-inf, (5))
+                    ExpressionTransform × 2
+                      MergeTreeInOrder × 2 0 → 1
diff --git a/tests/queries/0_stateless/02286_parallel_final.reference b/tests/queries/0_stateless/02286_parallel_final.reference
new file mode 100644
index 00000000000..f6573cb9042
--- /dev/null
+++ b/tests/queries/0_stateless/02286_parallel_final.reference
@@ -0,0 +1,9 @@
+2
+2
+3
+5
+8
+8
+8
+8
+8
diff --git a/tests/queries/0_stateless/02286_parallel_final.sh b/tests/queries/0_stateless/02286_parallel_final.sh
new file mode 100755
index 00000000000..6686a5d3e33
--- /dev/null
+++ b/tests/queries/0_stateless/02286_parallel_final.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+test_random_values() {
+  layers=$1
+  $CLICKHOUSE_CLIENT -n -q "
+    create table tbl_8parts_${layers}granules_rnd (key1 UInt32, sign Int8) engine = CollapsingMergeTree(sign) order by (key1) partition by (key1 % 8);
+    insert into tbl_8parts_${layers}granules_rnd select number, 1 from numbers_mt($((layers * 8 * 8192)));
+    explain pipeline select * from tbl_8parts_${layers}granules_rnd final settings max_threads = 16;" 2>&1 |
+       grep -c "CollapsingSortedTransform"
+}
+
+for layers in 2 3 5 8; do
+  test_random_values $layers
+done;
+
+test_sequential_values() {
+  layers=$1
+  $CLICKHOUSE_CLIENT -n -q "
+    create table tbl_8parts_${layers}granules_seq (key1 UInt32, sign Int8) engine = CollapsingMergeTree(sign) order by (key1) partition by (key1 / $((layers * 8192)))::UInt64;
+    insert into tbl_8parts_${layers}granules_seq select number, 1 from numbers_mt($((layers * 8 * 8192)));
+    explain pipeline select * from tbl_8parts_${layers}granules_seq final settings max_threads = 8;" 2>&1 |
+       grep -c "CollapsingSortedTransform"
+}
+
+for layers in 2 3 5 8 16; do
+  test_sequential_values $layers
+done;