Merge branch 'ClickHouse:master' into fix_datetime64_in_datetime64

2024-12-04 13:32:13 +00:00 · 2024-12-02 12:19:09 +01:00 · 2024-12-02 12:19:09 +01:00 · fd3723a727
commit fd3723a727
parent 04c3208a0e e8dab58d2d
40 changed files with 334 additions and 99 deletions
--- a/contrib/NuRaft
+++ b/contrib/NuRaft
@ -1 +1 @@
-Subproject commit c2b0811f164a7948208489562dab4f186eb305ce
+Subproject commit ce6de271811899d587fc28b500041ebcf720014f
--- a/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp
+++ b/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp
@ -192,7 +192,7 @@ struct AggregateFunctionGroupArrayIntersectGenericData
 *  For such columns GroupArrayIntersect() can be implemented more efficiently (especially for small numeric arrays).
 */
 template <bool is_plain_column = false>
-class AggregateFunctionGroupArrayIntersectGeneric
+class AggregateFunctionGroupArrayIntersectGeneric final
    : public IAggregateFunctionDataHelper<AggregateFunctionGroupArrayIntersectGenericData,
        AggregateFunctionGroupArrayIntersectGeneric<is_plain_column>>
 {
@ -353,7 +353,7 @@ namespace
 {

 /// Substitute return type for Date and DateTime
-class AggregateFunctionGroupArrayIntersectDate : public AggregateFunctionGroupArrayIntersect<DataTypeDate::FieldType>
+class AggregateFunctionGroupArrayIntersectDate final : public AggregateFunctionGroupArrayIntersect<DataTypeDate::FieldType>
 {
 public:
    explicit AggregateFunctionGroupArrayIntersectDate(const DataTypePtr & argument_type, const Array & parameters_)
@ -361,7 +361,7 @@ public:
    static DataTypePtr createResultType() { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeDate>()); }
 };

-class AggregateFunctionGroupArrayIntersectDateTime : public AggregateFunctionGroupArrayIntersect<DataTypeDateTime::FieldType>
+class AggregateFunctionGroupArrayIntersectDateTime final : public AggregateFunctionGroupArrayIntersect<DataTypeDateTime::FieldType>
 {
 public:
    explicit AggregateFunctionGroupArrayIntersectDateTime(const DataTypePtr & argument_type, const Array & parameters_)
@ -369,7 +369,7 @@ public:
    static DataTypePtr createResultType() { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>()); }
 };

-class AggregateFunctionGroupArrayIntersectDate32 : public AggregateFunctionGroupArrayIntersect<DataTypeDate32::FieldType>
+class AggregateFunctionGroupArrayIntersectDate32 final : public AggregateFunctionGroupArrayIntersect<DataTypeDate32::FieldType>
 {
 public:
    explicit AggregateFunctionGroupArrayIntersectDate32(const DataTypePtr & argument_type, const Array & parameters_)
--- a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp
+++ b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp
@ -153,7 +153,7 @@ static void deserializeAndInsertImpl(StringRef str, IColumn & data_to);
 *  For such columns groupUniqArray() can be implemented more efficiently (especially for small numeric arrays).
 */
 template <bool is_plain_column = false, typename LimitNumElems = std::false_type>
-class AggregateFunctionGroupUniqArrayGeneric
+class AggregateFunctionGroupUniqArrayGeneric final
    : public IAggregateFunctionDataHelper<AggregateFunctionGroupUniqArrayGenericData,
        AggregateFunctionGroupUniqArrayGeneric<is_plain_column, LimitNumElems>>
 {
@ -245,7 +245,7 @@ public:

 /// Substitute return type for Date and DateTime
 template <typename HasLimit>
-class AggregateFunctionGroupUniqArrayDate : public AggregateFunctionGroupUniqArray<DataTypeDate::FieldType, HasLimit>
+class AggregateFunctionGroupUniqArrayDate final : public AggregateFunctionGroupUniqArray<DataTypeDate::FieldType, HasLimit>
 {
 public:
    explicit AggregateFunctionGroupUniqArrayDate(const DataTypePtr & argument_type, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
@ -254,7 +254,7 @@ public:
 };

 template <typename HasLimit>
-class AggregateFunctionGroupUniqArrayDateTime : public AggregateFunctionGroupUniqArray<DataTypeDateTime::FieldType, HasLimit>
+class AggregateFunctionGroupUniqArrayDateTime final : public AggregateFunctionGroupUniqArray<DataTypeDateTime::FieldType, HasLimit>
 {
 public:
    explicit AggregateFunctionGroupUniqArrayDateTime(const DataTypePtr & argument_type, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
@ -263,7 +263,7 @@ public:
 };

 template <typename HasLimit>
-class AggregateFunctionGroupUniqArrayIPv4 : public AggregateFunctionGroupUniqArray<DataTypeIPv4::FieldType, HasLimit>
+class AggregateFunctionGroupUniqArrayIPv4 final : public AggregateFunctionGroupUniqArray<DataTypeIPv4::FieldType, HasLimit>
 {
 public:
    explicit AggregateFunctionGroupUniqArrayIPv4(const DataTypePtr & argument_type, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
--- a/src/AggregateFunctions/AggregateFunctionMeanZTest.cpp
+++ b/src/AggregateFunctions/AggregateFunctionMeanZTest.cpp
@ -28,7 +28,7 @@ namespace

 /// Returns tuple of (z-statistic, p-value, confidence-interval-low, confidence-interval-high)
 template <typename Data>
-class AggregateFunctionMeanZTest :
+class AggregateFunctionMeanZTest final:
    public IAggregateFunctionDataHelper<Data, AggregateFunctionMeanZTest<Data>>
 {
 private:
--- a/src/AggregateFunctions/AggregateFunctionRankCorrelation.cpp
+++ b/src/AggregateFunctions/AggregateFunctionRankCorrelation.cpp
@ -47,7 +47,7 @@ struct RankCorrelationData : public StatisticalSample<Float64, Float64>
    }
 };

-class AggregateFunctionRankCorrelation :
+class AggregateFunctionRankCorrelation final :
    public IAggregateFunctionDataHelper<RankCorrelationData, AggregateFunctionRankCorrelation>
 {
 public:
--- a/src/AggregateFunctions/AggregateFunctionTTest.h
+++ b/src/AggregateFunctions/AggregateFunctionTTest.h
@ -38,7 +38,7 @@ namespace ErrorCodes
 /// Returns tuple of (t-statistic, p-value)
 /// https://cpb-us-w2.wpmucdn.com/voices.uchicago.edu/dist/9/1193/files/2016/01/05b-TandP.pdf
 template <typename Data>
-class AggregateFunctionTTest :
+class AggregateFunctionTTest final:
    public IAggregateFunctionDataHelper<Data, AggregateFunctionTTest<Data>>
 {
 private:
--- a/src/AggregateFunctions/AggregateFunctionTopK.cpp
+++ b/src/AggregateFunctions/AggregateFunctionTopK.cpp
@ -203,7 +203,7 @@ struct AggregateFunctionTopKGenericData
 *  For such columns topK() can be implemented more efficiently (especially for small numeric arrays).
 */
 template <bool is_plain_column, bool is_weighted>
-class AggregateFunctionTopKGeneric
+class AggregateFunctionTopKGeneric final
    : public IAggregateFunctionDataHelper<AggregateFunctionTopKGenericData, AggregateFunctionTopKGeneric<is_plain_column, is_weighted>>
 {
 private:
@ -367,7 +367,7 @@ public:

 /// Substitute return type for Date and DateTime
 template <bool is_weighted>
-class AggregateFunctionTopKDate : public AggregateFunctionTopK<DataTypeDate::FieldType, is_weighted>
+class AggregateFunctionTopKDate final : public AggregateFunctionTopK<DataTypeDate::FieldType, is_weighted>
 {
 public:
    using AggregateFunctionTopK<DataTypeDate::FieldType, is_weighted>::AggregateFunctionTopK;
@ -384,7 +384,7 @@ public:
 };

 template <bool is_weighted>
-class AggregateFunctionTopKDateTime : public AggregateFunctionTopK<DataTypeDateTime::FieldType, is_weighted>
+class AggregateFunctionTopKDateTime final : public AggregateFunctionTopK<DataTypeDateTime::FieldType, is_weighted>
 {
 public:
    using AggregateFunctionTopK<DataTypeDateTime::FieldType, is_weighted>::AggregateFunctionTopK;
@ -401,7 +401,7 @@ public:
 };

 template <bool is_weighted>
-class AggregateFunctionTopKIPv4 : public AggregateFunctionTopK<DataTypeIPv4::FieldType, is_weighted>
+class AggregateFunctionTopKIPv4 final : public AggregateFunctionTopK<DataTypeIPv4::FieldType, is_weighted>
 {
 public:
    using AggregateFunctionTopK<DataTypeIPv4::FieldType, is_weighted>::AggregateFunctionTopK;
--- a/src/AggregateFunctions/Combinators/AggregateFunctionDistinct.h
+++ b/src/AggregateFunctions/Combinators/AggregateFunctionDistinct.h
@ -150,7 +150,7 @@ struct AggregateFunctionDistinctMultipleGenericData : public AggregateFunctionDi
  * Adding -Distinct suffix to aggregate function
 **/
 template <typename Data>
-class AggregateFunctionDistinct : public IAggregateFunctionDataHelper<Data, AggregateFunctionDistinct<Data>>
+class AggregateFunctionDistinct final : public IAggregateFunctionDataHelper<Data, AggregateFunctionDistinct<Data>>
 {
 private:
    AggregateFunctionPtr nested_func;
--- a/src/AggregateFunctions/CrossTab.h
+++ b/src/AggregateFunctions/CrossTab.h
@ -114,7 +114,7 @@ struct CrossTabData


 template <typename Data>
-class AggregateFunctionCrossTab : public IAggregateFunctionDataHelper<Data, AggregateFunctionCrossTab<Data>>
+class AggregateFunctionCrossTab final : public IAggregateFunctionDataHelper<Data, AggregateFunctionCrossTab<Data>>
 {
 public:
    explicit AggregateFunctionCrossTab(const DataTypes & arguments)
--- a/src/AggregateFunctions/IAggregateFunction.h
+++ b/src/AggregateFunctions/IAggregateFunction.h
@ -176,6 +176,8 @@ public:
    /// Serializes state (to transmit it over the network, for example).
    virtual void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> version = std::nullopt) const = 0; /// NOLINT

+    virtual void serializeBatch(const PaddedPODArray<AggregateDataPtr> & data, size_t start, size_t size, WriteBuffer & buf, std::optional<size_t> version = std::nullopt) const = 0; /// NOLINT
+
    /// Deserializes state. This function is called only for empty (just created) states.
    virtual void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> version = std::nullopt, Arena * arena = nullptr) const = 0; /// NOLINT

@ -471,6 +473,12 @@ public:
        }
    }

+    void serializeBatch(const PaddedPODArray<AggregateDataPtr> & data, size_t start, size_t size, WriteBuffer & buf, std::optional<size_t> version) const override // NOLINT
+    {
+        for (size_t i = start; i < size; ++i)
+            static_cast<const Derived *>(this)->serialize(data[i], buf, version);
+    }
+
    void addBatchSparse(
        size_t row_begin,
        size_t row_end,
--- a/src/Columns/ColumnAggregateFunction.cpp
+++ b/src/Columns/ColumnAggregateFunction.cpp
@ -418,9 +418,10 @@ WeakHash32 ColumnAggregateFunction::getWeakHash32() const

 void ColumnAggregateFunction::updateHashFast(SipHash & hash) const
 {
-    /// Fallback to per-element hashing, as there is no faster way
-    for (size_t i = 0; i < size(); ++i)
-        updateHashWithValue(i, hash);
+    WriteBufferFromOwnString wbuf;
+    const ColumnAggregateFunction::Container & vec = getData();
+    func->serializeBatch(vec, 0, size(), wbuf);
+    hash.update(wbuf.str().c_str(), wbuf.str().size());
 }

 /// The returned size is less than real size. The reason is that some parts of
--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@ -266,7 +266,7 @@ struct KeeperServer::KeeperRaftServer : public nuraft::raft_server
        }

        const size_t voting_members = get_num_voting_members();
-        const auto not_responding_peers = get_not_responding_peers();
+        const auto not_responding_peers = get_not_responding_peers_count();
        const auto quorum_size = voting_members / 2 + 1;
        const auto max_not_responding_peers = voting_members - quorum_size;

@ -303,6 +303,11 @@ struct KeeperServer::KeeperRaftServer : public nuraft::raft_server
        return std::unique_lock(lock_);
    }

+    std::unique_lock<std::mutex> lockCommit()
+    {
+        return std::unique_lock(commit_lock_);
+    }
+
    bool isCommitInProgress() const
    {
        return sm_commit_exec_in_progress_;
@ -1228,6 +1233,7 @@ Keeper4LWInfo KeeperServer::getPartiallyFilled4LWInfo() const

 uint64_t KeeperServer::createSnapshot()
 {
+    auto commit_lock = raft_instance->lockCommit();
    uint64_t log_idx = raft_instance->create_snapshot();
    if (log_idx != 0)
        LOG_INFO(log, "Snapshot creation scheduled with last committed log index {}.", log_idx);
--- a/src/Coordination/KeeperStateMachine.cpp
+++ b/src/Coordination/KeeperStateMachine.cpp
@ -142,13 +142,14 @@ void KeeperStateMachine<Storage>::init()
        }
        catch (...)
        {
-            tryLogCurrentException(
+            LOG_FATAL(
                log,
-                fmt::format(
-                    "Aborting because of failure to load from latest snapshot with index {}. Problematic snapshot can be removed but it will "
-                    "lead to data loss",
-                    latest_log_index));
-            std::abort();
+                "Failure to load from latest snapshot with index {}: {}",
+                latest_log_index,
+                getCurrentExceptionMessage(true, true, false));
+            LOG_FATAL(
+                log, "Manual intervention is necessary for recovery. Problematic snapshot can be removed but it will lead to data loss");
+            abort();
        }
    }

@ -427,8 +428,13 @@ bool KeeperStateMachine<Storage>::preprocess(const KeeperStorageBase::RequestFor
    }
    catch (...)
    {
-        tryLogCurrentException(__PRETTY_FUNCTION__, fmt::format("Failed to preprocess stored log at index {}, aborting to avoid inconsistent state", request_for_session.log_idx));
-        std::abort();
+        LOG_FATAL(
+            log,
+            "Failed to preprocess stored log at index {}: {}",
+            request_for_session.log_idx,
+            getCurrentExceptionMessage(true, true, false));
+        LOG_FATAL(log, "Aborting to avoid inconsistent state");
+        abort();
    }

    if (keeper_context->digestEnabled() && request_for_session.digest)
--- a/src/Coordination/SnapshotableHashTable.h
+++ b/src/Coordination/SnapshotableHashTable.h
@ -8,6 +8,11 @@
 namespace DB
 {

+namespace ErrorCodes
+{
+extern const int LOGICAL_ERROR;
+}
+
 template<typename V>
 struct ListNode
 {
@ -292,7 +297,8 @@ public:
    {
        size_t hash_value = map.hash(key);
        auto it = map.find(key, hash_value);
-        chassert(it != map.end());
+        if (it == map.end())
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not find key: '{}'", key);

        auto list_itr = it->getMapped();
        uint64_t old_value_size = list_itr->value.sizeInBytes();
@ -348,7 +354,8 @@ public:
    const V & getValue(StringRef key) const
    {
        auto it = map.find(key);
-        chassert(it);
+        if (it == map.end())
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not find key: '{}'", key);
        return it->getMapped()->value;
    }

@ -356,7 +363,8 @@ public:
    {
        for (auto & itr : snapshot_invalid_iters)
        {
-            chassert(!itr->isActiveInMap());
+            if (itr->isActiveInMap())
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "{} is not active in map", itr->key);
            updateDataSize(ERASE, itr->key.size, 0, itr->value.sizeInBytes(), /*remove_old=*/true);
            if (itr->getFreeKey())
                arena.free(const_cast<char *>(itr->key.data), itr->key.size);
--- a/src/Core/FormatFactorySettings.h
+++ b/src/Core/FormatFactorySettings.h
@ -1242,6 +1242,9 @@ Set the quoting rule for identifiers in SHOW CREATE query
 )", 0) \
    DECLARE(IdentifierQuotingStyle, show_create_query_identifier_quoting_style, IdentifierQuotingStyle::Backticks, R"(
 Set the quoting style for identifiers in SHOW CREATE query
+)", 0) \
+    DECLARE(String, composed_data_type_output_format_mode, "default", R"(
+Set composed data type output format mode, default or spark.
 )", 0) \

 // End of FORMAT_FACTORY_SETTINGS
--- a/src/Core/Settings.cpp
+++ b/src/Core/Settings.cpp
@ -1816,6 +1816,22 @@ Possible values:

 - 0 — Disabled.
 - 1 — Enabled.
+)", 0) \
+    DECLARE(Map, http_response_headers, "", R"(
+Allows to add or override HTTP headers which the server will return in the response with a successful query result.
+This only affects the HTTP interface.
+
+If the header is already set by default, the provided value will override it.
+If the header was not set by default, it will be added to the list of headers.
+Headers that are set by the server by default and not overridden by this setting, will remain.
+
+The setting allows you to set a header to a constant value. Currently there is no way to set a header to a dynamically calculated value.
+
+Neither names or values can contain ASCII control characters.
+
+If you implement a UI application which allows users to modify settings but at the same time makes decisions based on the returned headers, it is recommended to restrict this setting to readonly.
+
+Example: `SET http_response_headers = '{"Content-Type": "image/png"}'`
 )", 0) \
    \
    DECLARE(String, count_distinct_implementation, "uniqExact", R"(
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@ -67,6 +67,8 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
            {"max_bytes_ratio_before_external_group_by", 0., 0., "New setting."},
            {"max_bytes_ratio_before_external_sort", 0., 0., "New setting."},
            {"use_async_executor_for_materialized_views", false, false, "New setting."},
+            {"composed_data_type_output_format_mode", "default", "default", "New setting"},
+            {"http_response_headers", "", "", "New setting."},
        }
    },
    {"24.11",
--- a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp
+++ b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp
@ -60,12 +60,11 @@ void SerializationAggregateFunction::serializeBinaryBulk(const IColumn & column,
    const ColumnAggregateFunction & real_column = typeid_cast<const ColumnAggregateFunction &>(column);
    const ColumnAggregateFunction::Container & vec = real_column.getData();

-    ColumnAggregateFunction::Container::const_iterator it = vec.begin() + offset;
-    ColumnAggregateFunction::Container::const_iterator end = limit ? it + limit : vec.end();
+    size_t end = vec.size();
+    if (limit)
+        end = std::min(end, offset + limit);

-    end = std::min(end, vec.end());
-    for (; it != end; ++it)
-        function->serialize(*it, ostr, version);
+    function->serializeBatch(vec, offset, end, ostr, version);
 }

 void SerializationAggregateFunction::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double /*avg_value_size_hint*/) const
--- a/src/DataTypes/Serializations/SerializationArray.cpp
+++ b/src/DataTypes/Serializations/SerializationArray.cpp
@ -401,7 +401,7 @@ void SerializationArray::deserializeBinaryBulkWithMultipleStreams(


 template <typename Writer>
-static void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, Writer && write_nested)
+static void serializeTextImpl(const IColumn & column, size_t row_num, const FormatSettings & settings, WriteBuffer & ostr, Writer && write_nested)
 {
    const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
    const ColumnArray::Offsets & offsets = column_array.getOffsets();
@ -412,10 +412,14 @@ static void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffe
    const IColumn & nested_column = column_array.getData();

    writeChar('[', ostr);
-    for (size_t i = offset; i < next_offset; ++i)
+
+    if (next_offset != offset)
+        write_nested(nested_column, offset);
+    for (size_t i = offset + 1; i < next_offset; ++i)
    {
-        if (i != offset)
-            writeChar(',', ostr);
+        writeChar(',', ostr);
+        if (settings.composed_data_type_output_format_mode == "spark")
+            writeChar(' ', ostr);
        write_nested(nested_column, i);
    }
    writeChar(']', ostr);
@ -520,10 +524,13 @@ static ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reade

 void SerializationArray::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
 {
-    serializeTextImpl(column, row_num, ostr,
+    serializeTextImpl(column, row_num, settings, ostr,
        [&](const IColumn & nested_column, size_t i)
        {
-            nested->serializeTextQuoted(nested_column, i, ostr, settings);
+            if (settings.composed_data_type_output_format_mode == "spark")
+                nested->serializeText(nested_column, i, ostr, settings);
+            else
+                nested->serializeTextQuoted(nested_column, i, ostr, settings);
        });
 }

--- a/src/DataTypes/Serializations/SerializationMap.cpp
+++ b/src/DataTypes/Serializations/SerializationMap.cpp
@ -90,6 +90,7 @@ template <typename KeyWriter, typename ValueWriter>
 void SerializationMap::serializeTextImpl(
    const IColumn & column,
    size_t row_num,
+    const FormatSettings & settings,
    WriteBuffer & ostr,
    KeyWriter && key_writer,
    ValueWriter && value_writer) const
@ -104,15 +105,31 @@ void SerializationMap::serializeTextImpl(
    size_t next_offset = offsets[row_num];

    writeChar('{', ostr);
-    for (size_t i = offset; i < next_offset; ++i)
+    if (offset != next_offset)
    {
-        if (i != offset)
-            writeChar(',', ostr);
-
-        key_writer(ostr, key, nested_tuple.getColumn(0), i);
-        writeChar(':', ostr);
-        value_writer(ostr, value, nested_tuple.getColumn(1), i);
+        key_writer(ostr, key, nested_tuple.getColumn(0), offset);
+        if (settings.composed_data_type_output_format_mode == "spark")
+            writeString(std::string_view(" -> "), ostr);
+        else
+            writeChar(':', ostr);
+        value_writer(ostr, value, nested_tuple.getColumn(1), offset);
    }
+    if (settings.composed_data_type_output_format_mode == "spark")
+        for (size_t i = offset + 1; i < next_offset; ++i)
+        {
+            writeString(std::string_view(", "), ostr);
+            key_writer(ostr, key, nested_tuple.getColumn(0), i);
+            writeString(std::string_view(" -> "), ostr);
+            value_writer(ostr, value, nested_tuple.getColumn(1), i);
+        }
+    else
+        for (size_t i = offset + 1; i < next_offset; ++i)
+        {
+            writeChar(',', ostr);
+            key_writer(ostr, key, nested_tuple.getColumn(0), i);
+            writeChar(':', ostr);
+            value_writer(ostr, value, nested_tuple.getColumn(1), i);
+        }
    writeChar('}', ostr);
 }

@ -221,10 +238,13 @@ void SerializationMap::serializeText(const IColumn & column, size_t row_num, Wri
 {
    auto writer = [&settings](WriteBuffer & buf, const SerializationPtr & subcolumn_serialization, const IColumn & subcolumn, size_t pos)
    {
-        subcolumn_serialization->serializeTextQuoted(subcolumn, pos, buf, settings);
+        if (settings.composed_data_type_output_format_mode == "spark")
+            subcolumn_serialization->serializeText(subcolumn, pos, buf, settings);
+        else
+            subcolumn_serialization->serializeTextQuoted(subcolumn, pos, buf, settings);
    };

-    serializeTextImpl(column, row_num, ostr, writer, writer);
+    serializeTextImpl(column, row_num, settings, ostr, writer, writer);
 }

 void SerializationMap::deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const
@ -266,7 +286,7 @@ bool SerializationMap::tryDeserializeText(IColumn & column, ReadBuffer & istr, c

 void SerializationMap::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
 {
-    serializeTextImpl(column, row_num, ostr,
+    serializeTextImpl(column, row_num, settings, ostr,
        [&settings](WriteBuffer & buf, const SerializationPtr & subcolumn_serialization, const IColumn & subcolumn, size_t pos)
        {
            /// We need to double-quote all keys (including integers) to produce valid JSON.
--- a/src/DataTypes/Serializations/SerializationMap.h
+++ b/src/DataTypes/Serializations/SerializationMap.h
@ -70,7 +70,7 @@ public:

 private:
    template <typename KeyWriter, typename ValueWriter>
-    void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, KeyWriter && key_writer, ValueWriter && value_writer) const;
+    void serializeTextImpl(const IColumn & column, size_t row_num, const FormatSettings & settings, WriteBuffer & ostr, KeyWriter && key_writer, ValueWriter && value_writer) const;

    template <typename ReturnType = void, typename Reader>
    ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && reader) const;
--- a/src/DataTypes/Serializations/SerializationTuple.cpp
+++ b/src/DataTypes/Serializations/SerializationTuple.cpp
@ -137,12 +137,25 @@ void SerializationTuple::deserializeBinary(IColumn & column, ReadBuffer & istr,
 void SerializationTuple::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
 {
    writeChar('(', ostr);
-    for (size_t i = 0; i < elems.size(); ++i)
+    if (!elems.empty())
    {
-        if (i != 0)
-            writeChar(',', ostr);
-        elems[i]->serializeTextQuoted(extractElementColumn(column, i), row_num, ostr, settings);
+        if (settings.composed_data_type_output_format_mode == "spark")
+            elems[0]->serializeText(extractElementColumn(column, 0), row_num, ostr, settings);
+        else
+            elems[0]->serializeTextQuoted(extractElementColumn(column, 0), row_num, ostr, settings);
    }
+    if (settings.composed_data_type_output_format_mode == "spark")
+        for (size_t i = 1; i < elems.size(); ++i)
+        {
+            writeString(std::string_view(", "), ostr);
+            elems[i]->serializeText(extractElementColumn(column, i), row_num, ostr, settings);
+        }
+    else
+        for (size_t i = 1; i < elems.size(); ++i)
+        {
+            writeChar(',', ostr);
+            elems[i]->serializeTextQuoted(extractElementColumn(column, i), row_num, ostr, settings);
+        }
    writeChar(')', ostr);
 }

--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@ -251,6 +251,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
    format_settings.values.deduce_templates_of_expressions = settings[Setting::input_format_values_deduce_templates_of_expressions];
    format_settings.values.interpret_expressions = settings[Setting::input_format_values_interpret_expressions];
    format_settings.values.escape_quote_with_quote = settings[Setting::output_format_values_escape_quote_with_quote];
+    format_settings.composed_data_type_output_format_mode = settings[Setting::composed_data_type_output_format_mode];
    format_settings.with_names_use_header = settings[Setting::input_format_with_names_use_header];
    format_settings.with_types_use_header = settings[Setting::input_format_with_types_use_header];
    format_settings.write_statistics = settings[Setting::output_format_write_statistics];
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@ -38,6 +38,7 @@ struct FormatSettings
    bool try_infer_variant = false;

    bool seekable_read = true;
+    String composed_data_type_output_format_mode = "default";
    UInt64 max_rows_to_read_for_schema_inference = 25000;
    UInt64 max_bytes_to_read_for_schema_inference = 32 * 1024 * 1024;

--- a/src/Formats/MarkInCompressedFile.cpp
+++ b/src/Formats/MarkInCompressedFile.cpp
@ -63,7 +63,7 @@ MarksInCompressedFile::MarksInCompressedFile(const PlainArray & marks)

    // Overallocate by +1 element to let the bit packing/unpacking do less bounds checking.
    size_t packed_length = (packed_bits + 63) / 64 + 1;
-    packed.reserve(packed_length);
+    packed.reserve_exact(packed_length);
    packed.resize_fill(packed_length);

    // Second pass: write out the packed marks.
@ -97,7 +97,7 @@ std::tuple<const MarksInCompressedFile::BlockInfo *, size_t> MarksInCompressedFi

 size_t MarksInCompressedFile::approximateMemoryUsage() const
 {
-    return sizeof(*this) + blocks.size() * sizeof(blocks[0]) + packed.size() * sizeof(packed[0]);
+    return sizeof(*this) + blocks.allocated_bytes() + packed.allocated_bytes();
 }

 }
--- a/src/Formats/tests/gtest_marks.cpp
+++ b/src/Formats/tests/gtest_marks.cpp
@ -35,18 +35,29 @@ TEST(Marks, Compression)
        EXPECT_LE((marks.approximateMemoryUsage() - sizeof(MarksInCompressedFile)) * 8, plain.size() * max_bits_per_mark);
    };

-    // Typical.
-    test(gen(10000, 1'000'000, 0), 30);
+    {
+        SCOPED_TRACE("Typical");
+        test(gen(10000, 1'000'000, 0), 30);
+    }

-    // Completely random 64-bit values.
-    test(gen(10000, UINT64_MAX - 1, UINT64_MAX - 1), 130);

-    // All zeros.
-    test(gen(10000, 0, 0), 2);
+    {
+        SCOPED_TRACE("Completely random 64-bit values");
+        test(gen(10000, UINT64_MAX - 1, UINT64_MAX - 1), 130);
+    }

-    // Short.
-    test(gen(10, 1000, 1000), 65);
+    {
+        SCOPED_TRACE("All zeros");
+        test(gen(10000, 0, 0), 2);
+    }

-    // Empty.
-    test(gen(0, 0, 0), 0);
+    {
+        SCOPED_TRACE("Short");
+        test(gen(10, 1000, 1000), 65);
+    }
+
+    {
+        SCOPED_TRACE("Empty");
+        test(gen(0, 0, 0), 0);
+    }
 }
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@ -1633,29 +1633,29 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
            if (isReplicated(*inner_table_engine))
                is_storage_replicated = true;
        }
-        }
+    }

-        bool allow_heavy_populate = getContext()->getSettingsRef()[Setting::database_replicated_allow_heavy_create] && create.is_populate;
-        if (!allow_heavy_populate && database && database->getEngineName() == "Replicated" && (create.select || create.is_populate))
+    bool allow_heavy_populate = getContext()->getSettingsRef()[Setting::database_replicated_allow_heavy_create] && create.is_populate;
+    if (!allow_heavy_populate && database && database->getEngineName() == "Replicated" && (create.select || create.is_populate))
+    {
+        const bool allow_create_select_for_replicated
+            = (create.isView() && !create.is_populate) || create.is_create_empty || !is_storage_replicated;
+        if (!allow_create_select_for_replicated)
        {
-            const bool allow_create_select_for_replicated
-                = (create.isView() && !create.is_populate) || create.is_create_empty || !is_storage_replicated;
-            if (!allow_create_select_for_replicated)
-            {
-                /// POPULATE can be enabled with setting, provide hint in error message
-                if (create.is_populate)
-                    throw Exception(
-                        ErrorCodes::SUPPORT_IS_DISABLED,
-                        "CREATE with POPULATE is not supported with Replicated databases. Consider using separate CREATE and INSERT "
-                        "queries. "
-                        "Alternatively, you can enable 'database_replicated_allow_heavy_create' setting to allow this operation, use with "
-                        "caution");
-
+            /// POPULATE can be enabled with setting, provide hint in error message
+            if (create.is_populate)
                throw Exception(
                    ErrorCodes::SUPPORT_IS_DISABLED,
-                    "CREATE AS SELECT is not supported with Replicated databases. Consider using separate CREATE and INSERT queries.");
-            }
+                    "CREATE with POPULATE is not supported with Replicated databases. Consider using separate CREATE and INSERT "
+                    "queries. "
+                    "Alternatively, you can enable 'database_replicated_allow_heavy_create' setting to allow this operation, use with "
+                    "caution");
+
+            throw Exception(
+                ErrorCodes::SUPPORT_IS_DISABLED,
+                "CREATE AS SELECT is not supported with Replicated databases. Consider using separate CREATE and INSERT queries.");
        }
+    }

    if (create.is_clone_as)
    {
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@ -163,6 +163,7 @@ namespace Setting
    extern const SettingsSeconds wait_for_async_insert_timeout;
    extern const SettingsBool implicit_select;
    extern const SettingsBool enforce_strict_identifier_format;
+    extern const SettingsMap http_response_headers;
 }

 namespace ErrorCodes
@ -179,6 +180,7 @@ namespace ErrorCodes
    extern const int SYNTAX_ERROR;
    extern const int SUPPORT_IS_DISABLED;
    extern const int INCORRECT_QUERY;
+    extern const int BAD_ARGUMENTS;
 }

 namespace FailPoints
@ -1682,6 +1684,33 @@ void executeQuery(
    /// But `session_timezone` setting could be modified in the query itself, so we update the value.
    result_details.timezone = DateLUT::instance().getTimeZone();

+    const Map & additional_http_headers = context->getSettingsRef()[Setting::http_response_headers].value;
+    if (!additional_http_headers.empty())
+    {
+        for (const auto & key_value : additional_http_headers)
+        {
+            if (key_value.getType() != Field::Types::Tuple
+                || key_value.safeGet<Tuple>().size() != 2)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of the `additional_http_headers` setting must be a Map");
+
+            if (key_value.safeGet<Tuple>().at(0).getType() != Field::Types::String)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "The keys of the `additional_http_headers` setting must be Strings");
+
+            if (key_value.safeGet<Tuple>().at(1).getType() != Field::Types::String)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "The values of the `additional_http_headers` setting must be Strings");
+
+            String key = key_value.safeGet<Tuple>().at(0).safeGet<String>();
+            String value = key_value.safeGet<Tuple>().at(1).safeGet<String>();
+
+            if (std::find_if(key.begin(), key.end(), isControlASCII) != key.end()
+                || std::find_if(value.begin(), value.end(), isControlASCII) != value.end())
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "The values of the `additional_http_headers` cannot contain ASCII control characters");
+
+            if (!result_details.additional_headers.emplace(key, value).second)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "There are duplicate entries in the `additional_http_headers` setting");
+        }
+    }
+
    auto & pipeline = streams.pipeline;

    std::unique_ptr<WriteBuffer> compressed_buffer;
--- a/src/Interpreters/executeQuery.h
+++ b/src/Interpreters/executeQuery.h
@ -24,6 +24,7 @@ struct QueryResultDetails
    std::optional<String> content_type = {};
    std::optional<String> format = {};
    std::optional<String> timezone = {};
+    std::unordered_map<String, String> additional_headers = {};
 };

 using SetResultDetailsFunc = std::function<void(const QueryResultDetails &)>;
@ -42,7 +43,7 @@ void executeQuery(
    WriteBuffer & ostr,                 /// Where to write query output to.
    bool allow_into_outfile,            /// If true and the query contains INTO OUTFILE section, redirect output to that file.
    ContextMutablePtr context,          /// DB, tables, data types, storage engines, functions, aggregate functions...
-    SetResultDetailsFunc set_result_details, /// If a non-empty callback is passed, it will be called with the query id, the content-type, the format, and the timezone.
+    SetResultDetailsFunc set_result_details, /// If a non-empty callback is passed, it will be called with the query id, the content-type, the format, and the timezone, as well as additional headers.
    QueryFlags flags = {},
    const std::optional<FormatSettings> & output_format_settings = std::nullopt, /// Format settings for output format, will be calculated from the context if not set.
    HandleExceptionInOutputFormatFunc handle_exception_in_output_format = {} /// If a non-empty callback is passed, it will be called on exception with created output format.
--- a/src/Server/HTTPHandler.cpp
+++ b/src/Server/HTTPHandler.cpp
@ -524,6 +524,9 @@ void HTTPHandler::processQuery(

        if (details.timezone)
            response.add("X-ClickHouse-Timezone", *details.timezone);
+
+        for (const auto & [name, value] : details.additional_headers)
+            response.set(name, value);
    };

    auto handle_exception_in_output_format = [&](IOutputFormat & current_output_format,
--- a/src/Storages/MergeTree/MergeTreeSink.cpp
+++ b/src/Storages/MergeTree/MergeTreeSink.cpp
@ -44,8 +44,6 @@ MergeTreeSink::~MergeTreeSink()
    if (!delayed_chunk)
        return;

-    chassert(isCancelled());
-
    for (auto & partition : delayed_chunk->partitions)
    {
        partition.temp_part.cancel();
--- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp
+++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp
@ -118,7 +118,7 @@ struct MergedBlockOutputStream::Finalizer::Impl
    }

    void finish();
-    void cancel() noexcept;
+    void cancel();
 };

 void MergedBlockOutputStream::Finalizer::finish()
@ -129,7 +129,7 @@ void MergedBlockOutputStream::Finalizer::finish()
        to_finish->finish();
 }

-void MergedBlockOutputStream::Finalizer::cancel() noexcept
+void MergedBlockOutputStream::Finalizer::cancel()
 {
    std::unique_ptr<Impl> to_cancel = std::move(impl);
    impl.reset();
@ -166,7 +166,7 @@ void MergedBlockOutputStream::Finalizer::Impl::finish()
        part->getDataPartStorage().removeFile(file_name);
 }

-void MergedBlockOutputStream::Finalizer::Impl::cancel() noexcept
+void MergedBlockOutputStream::Finalizer::Impl::cancel()
 {
    writer.cancel();

@ -182,8 +182,15 @@ MergedBlockOutputStream::Finalizer::Finalizer(std::unique_ptr<Impl> impl_) : imp

 MergedBlockOutputStream::Finalizer::~Finalizer()
 {
-    if (impl)
-        cancel();
+    try
+    {
+        if (impl)
+            finish();
+    }
+    catch (...)
+    {
+        tryLogCurrentException(__PRETTY_FUNCTION__);
+    }
 }


--- a/src/Storages/MergeTree/MergedBlockOutputStream.h
+++ b/src/Storages/MergeTree/MergedBlockOutputStream.h
@ -54,7 +54,7 @@ public:
        ~Finalizer();

        void finish();
-        void cancel() noexcept;
+        void cancel();
    };

    /// Finalize writing part and fill inner structures
--- a/tests/config/config.d/database_replicated.xml
+++ b/tests/config/config.d/database_replicated.xml
@ -44,10 +44,19 @@
            <election_timeout_upper_bound_ms>5000</election_timeout_upper_bound_ms>
            <raft_logs_level>information</raft_logs_level>
            <force_sync>false</force_sync>
+            <async_replication>1</async_replication>
            <!-- we want all logs for complex problems investigation -->
            <reserved_log_items>1000000000000000</reserved_log_items>
        </coordination_settings>

+        <feature_flags>
+            <filtered_list>1</filtered_list>
+            <multi_read>1</multi_read>
+            <check_not_exists>1</check_not_exists>
+            <create_if_not_exists>1</create_if_not_exists>
+            <remove_recursive>1</remove_recursive>
+        </feature_flags>
+
        <raft_configuration>
            <server>
                <id>1</id>
--- a/tests/integration/test_keeper_snapshots/test.py
+++ b/tests/integration/test_keeper_snapshots/test.py
@ -191,8 +191,9 @@ def test_invalid_snapshot(started_cluster):
            ]
        )
        node.start_clickhouse(start_wait_sec=120, expected_to_fail=True)
+        assert node.contains_in_log("Failure to load from latest snapshot with index")
        assert node.contains_in_log(
-            "Aborting because of failure to load from latest snapshot with index"
+            "Manual intervention is necessary for recovery. Problematic snapshot can be removed but it will lead to data loss"
        )

        node.stop_clickhouse()
--- a/tests/performance/avg_serialization.xml
+++ b/tests/performance/avg_serialization.xml
@ -0,0 +1,10 @@
+<test>
+    <create_query>DROP TABLE IF EXISTS test_avg_insert</create_query>
+    <create_query>
+        CREATE TABLE test_avg_insert (key UInt64, value AggregateFunction(avg, UInt8)) ENGINE = Memory()
+    </create_query>
+
+    <query>INSERT INTO test_avg_insert with initializeAggregation('avgState', 1) as s select number, s AS value FROM numbers(200000000)</query>
+
+    <drop_query>DROP TABLE IF EXISTS test_avg_insert</drop_query>
+</test>
--- a/tests/queries/0_stateless/03259_to_string_spark_format.reference
+++ b/tests/queries/0_stateless/03259_to_string_spark_format.reference
@ -0,0 +1,16 @@
+-- array format --
+[\'1\']
+[1, 2, abc, \'1\']
+[1, 2, abc, \'1\']
+[1, 2, abc, \'1\']
+[1, 2, abc, \'1\']
+-- map format --
+{1343 -> fe, afe -> fefe}
+{1343 -> fe, afe -> fefe}
+{1343 -> fe, afe -> fefe}
+{1343 -> fe, afe -> fefe}
+-- tuple format --
+(1, 3, abc)
+(1, 3, abc)
+(1, 3, abc)
+(1, 3, abc)
--- a/tests/queries/0_stateless/03259_to_string_spark_format.sql
+++ b/tests/queries/0_stateless/03259_to_string_spark_format.sql
@ -0,0 +1,18 @@
+SELECT '-- array format --';
+SELECT CAST(array('\'1\'') , 'String') SETTINGS composed_data_type_output_format_mode = 'spark';
+SELECT CAST([materialize('1'), '2', 'abc', '\'1\''], 'String') SETTINGS composed_data_type_output_format_mode = 'spark';
+SELECT CAST([materialize('1'), materialize('2'), 'abc', '\'1\''], 'String') SETTINGS composed_data_type_output_format_mode = 'spark';
+SELECT CAST([materialize('1'), materialize('2'), materialize('abc'), '\'1\''], 'String') SETTINGS composed_data_type_output_format_mode = 'spark';
+SELECT CAST([materialize('1'), materialize('2'), materialize('abc'), materialize('\'1\'')], 'String') SETTINGS composed_data_type_output_format_mode = 'spark';
+
+SELECT '-- map format --';
+SELECT toString(map('1343', 'fe', 'afe', 'fefe')) SETTINGS composed_data_type_output_format_mode = 'spark';
+SELECT toString(map(materialize('1343'), materialize('fe'), 'afe', 'fefe')) SETTINGS composed_data_type_output_format_mode = 'spark';
+SELECT toString(map(materialize('1343'), materialize('fe'), materialize('afe'), 'fefe')) SETTINGS composed_data_type_output_format_mode = 'spark';
+SELECT toString(map(materialize('1343'), materialize('fe'), materialize('afe'), materialize('fefe'))) SETTINGS composed_data_type_output_format_mode = 'spark';
+
+SELECT '-- tuple format --';
+SELECT toString(('1', '3', 'abc')) SETTINGS composed_data_type_output_format_mode = 'spark';
+SELECT toString((materialize('1'), '3', 'abc')) SETTINGS composed_data_type_output_format_mode = 'spark';
+SELECT toString((materialize('1'), materialize('3'), 'abc')) SETTINGS composed_data_type_output_format_mode = 'spark';
+SELECT toString((materialize('1'), materialize('3'), materialize('abc'))) SETTINGS composed_data_type_output_format_mode = 'spark';
--- a/tests/queries/0_stateless/03277_http_response_headers.reference
+++ b/tests/queries/0_stateless/03277_http_response_headers.reference
@ -0,0 +1,17 @@
+We can add a new header:
+> POST /?http_response_headers={'My-New-Header':'Hello,+world.'} HTTP/1.1
+< My-New-Header: Hello, world.
+It works even with the settings clause:
+< My-New-Header: Hello, world.
+Check the default header value:
+> Content-Type: application/x-www-form-urlencoded
+< Content-Type: text/tab-separated-values; charset=UTF-8
+Check that we can override it:
+> POST /?http_response_headers={'Content-Type':'image/png'} HTTP/1.1
+> Content-Type: application/x-www-form-urlencoded
+< Content-Type: image/png
+It does not allow bad characters:
+BAD_ARGUMENTS
+BAD_ARGUMENTS
+It does not let duplicate entries:
+BAD_ARGUMENTS
--- a/tests/queries/0_stateless/03277_http_response_headers.sh
+++ b/tests/queries/0_stateless/03277_http_response_headers.sh
@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+echo "We can add a new header:"
+${CLICKHOUSE_CURL} -sS --globoff -v "http://localhost:8123/?http_response_headers={'My-New-Header':'Hello,+world.'}" -d "SELECT 1" 2>&1 | grep -i 'My-New'
+
+echo "It works even with the settings clause:"
+${CLICKHOUSE_CURL} -sS --globoff -v "http://localhost:8123/" -d "SELECT 1 SETTINGS http_response_headers = \$\${'My-New-Header':'Hello, world.'}\$\$" 2>&1 | grep -i 'My-New'
+
+echo "Check the default header value:"
+${CLICKHOUSE_CURL} -sS --globoff -v "http://localhost:8123/" -d "SELECT 1" 2>&1 | grep -i 'Content-Type'
+
+echo "Check that we can override it:"
+${CLICKHOUSE_CURL} -sS --globoff -v "http://localhost:8123/?http_response_headers={'Content-Type':'image/png'}" -d "SELECT 1" 2>&1 | grep -i 'Content-Type'
+
+echo "It does not allow bad characters:"
+${CLICKHOUSE_CURL} -sS --globoff -v "http://localhost:8123/" -d "SELECT 1 SETTINGS http_response_headers = \$\${'My-New-Header':'Hello,\n\nworld.'}\$\$" 2>&1 | grep -o -F 'BAD_ARGUMENTS'
+${CLICKHOUSE_CURL} -sS --globoff -v "http://localhost:8123/" -d "SELECT 1 SETTINGS http_response_headers = \$\${'My\rNew-Header':'Hello, world.'}\$\$" 2>&1 | grep -o -F 'BAD_ARGUMENTS'
+
+echo "It does not let duplicate entries:"
+${CLICKHOUSE_CURL} -sS --globoff -v "http://localhost:8123/" -d "SELECT 1 SETTINGS http_response_headers = \$\${'a':'b','a':'c'}\$\$" 2>&1 | grep -o -F 'BAD_ARGUMENTS'