Merge branch 'master' into dictionary-invalidate-query

2024-11-24 00:22:29 +00:00 · 2017-05-23 13:27:31 +03:00 · 2017-05-23 13:27:31 +03:00 · f0ec7901e9
commit f0ec7901e9
parent db3fbd91b7 0c416be381
54 changed files with 2239 additions and 223 deletions
--- a/dbms/src/Core/Block.cpp
+++ b/dbms/src/Core/Block.cpp
@ -606,5 +606,13 @@ void Block::unshareColumns()
    }
 }

+void Block::updateHash(SipHash & hash) const
+{
+    for (size_t row_no = 0, num_rows = rows(); row_no < num_rows; ++row_no)
+    {
+        for (auto & col : getColumns())
+            col.column->updateHashWithValue(row_no, hash);
+    }
+}

 }
--- a/dbms/src/Core/Block.h
+++ b/dbms/src/Core/Block.h
@ -119,6 +119,12 @@ public:
      */
    void unshareColumns();

+    /** Updates SipHash of the Block, using update method of columns.
+      * Returns hash for block, that could be used to differentiate blocks 
+      *  with same structure, but different data.
+      */
+    void updateHash(SipHash & hash) const;
+
 private:
    void eraseImpl(size_t position);
    void initializeIndexByName();
--- a/dbms/src/Core/ErrorCodes.cpp
+++ b/dbms/src/Core/ErrorCodes.cpp
@ -177,7 +177,7 @@ namespace ErrorCodes
    extern const int TOO_BIG_AST = 168;
    extern const int BAD_TYPE_OF_FIELD = 169;
    extern const int BAD_GET = 170;
-    extern const int BLOCKS_HAS_DIFFERENT_STRUCTURE = 171;
+    extern const int BLOCKS_HAVE_DIFFERENT_STRUCTURE = 171;
    extern const int CANNOT_CREATE_DIRECTORY = 172;
    extern const int CANNOT_ALLOCATE_MEMORY = 173;
    extern const int CYCLIC_ALIASES = 174;
--- a/dbms/src/DataStreams/BlockInputStreamFromRowInputStream.cpp
+++ b/dbms/src/DataStreams/BlockInputStreamFromRowInputStream.cpp
@ -12,6 +12,7 @@ namespace ErrorCodes
    extern const int CANNOT_PARSE_DATE;
    extern const int CANNOT_PARSE_DATETIME;
    extern const int CANNOT_READ_ARRAY_FROM_TEXT;
+    extern const int CANNOT_PARSE_NUMBER;
 }


@ -33,7 +34,8 @@ static bool isParseError(int code)
        || code == ErrorCodes::CANNOT_PARSE_QUOTED_STRING
        || code == ErrorCodes::CANNOT_PARSE_DATE
        || code == ErrorCodes::CANNOT_PARSE_DATETIME
-        || code == ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT;
+        || code == ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT
+        || code == ErrorCodes::CANNOT_PARSE_NUMBER;
 }


--- a/dbms/src/DataStreams/CastTypeBlockInputStream.cpp
+++ b/dbms/src/DataStreams/CastTypeBlockInputStream.cpp
@ -55,17 +55,17 @@ Block CastTypeBlockInputStream::readImpl()

        if (it == cast_description.end())
        {
+            // Leave the same column
            res.insert(src_column);
        }
        else
        {
            CastElement & cast_element = it->second;
-
            size_t tmp_col = cast_element.tmp_col_offset;
-            ColumnNumbers arguments{tmp_col, tmp_col + 1};
-            tmp_conversion_block.getByPosition(tmp_col).column = src_column.column;

-            cast_element.function->execute(tmp_conversion_block, arguments, tmp_col + 2);
+            tmp_conversion_block.getByPosition(tmp_col).column = src_column.column;
+            cast_element.function->execute(tmp_conversion_block, ColumnNumbers{tmp_col, tmp_col + 1}, tmp_col + 2);
+
            res.insert(tmp_conversion_block.getByPosition(tmp_col + 2));
        }
    }
@ -93,22 +93,24 @@ void CastTypeBlockInputStream::initialize(const Block & src_block)
        /// Force conversion if source and destination types is different.
        if (!ref_column.type->equals(*src_column.type))
        {
-            ColumnWithTypeAndName src_columnn_copy = src_column.cloneEmpty();
-            ColumnWithTypeAndName alias_column(std::make_shared<ColumnConstString>(1, ref_column.type->getName()), std::make_shared<DataTypeString>(), "");
-            ColumnWithTypeAndName result_column(nullptr, ref_column.type->clone(), src_column.name);
+            ColumnWithTypeAndName res_type_name_column(std::make_shared<ColumnConstString>(1, ref_column.type->getName()), std::make_shared<DataTypeString>(), "");
+            ColumnWithTypeAndName res_blank_column(nullptr, ref_column.type->clone(), src_column.name);

-            DataTypePtr unused_return_type;
-            std::vector<ExpressionAction> unused_prerequisites;
-            ColumnsWithTypeAndName arguments{src_columnn_copy, alias_column};
-
-            /// Prepares function to execution. TODO It is not obvious.
+            /// Prepares function to execution
            auto cast_function = FunctionFactory::instance().get("CAST", context);
-            cast_function->getReturnTypeAndPrerequisites(arguments, unused_return_type, unused_prerequisites);
+            {
+                DataTypePtr unused_return_type;
+                std::vector<ExpressionAction> unused_prerequisites;
+                ColumnsWithTypeAndName arguments{src_column, res_type_name_column};
+                cast_function->getReturnTypeAndPrerequisites(arguments, unused_return_type, unused_prerequisites);
+            }

+            /// Prefill arguments and result column for current CAST
            tmp_conversion_block.insert(src_column);
-            tmp_conversion_block.insert(alias_column);
-            tmp_conversion_block.insert(result_column);
+            tmp_conversion_block.insert(res_type_name_column);
+            tmp_conversion_block.insert(res_blank_column);

+            /// Index of src_column blank in tmp_conversion_block
            size_t tmp_col_offset = cast_description.size() * 3;
            cast_description.emplace(src_col, CastElement(std::move(cast_function), tmp_col_offset));
        }
--- a/dbms/src/DataStreams/CastTypeBlockInputStream.h
+++ b/dbms/src/DataStreams/CastTypeBlockInputStream.h
@ -27,20 +27,26 @@ private:
    const Context & context;
    Block ref_defenition;

+    /// Initializes cast_description and prepares tmp_conversion_block
    void initialize(const Block & src_block);
    bool initialized = false;

    struct CastElement
    {
+        /// Prepared function to do conversion
        std::shared_ptr<IFunction> function;
+        /// Position of first function argument in tmp_conversion_block
        size_t tmp_col_offset;

        CastElement(std::shared_ptr<IFunction> && function_, size_t tmp_col_offset_);
    };

    /// Describes required conversions on source block
+    /// Contains column numbers in source block that should be converted
    std::map<size_t, CastElement> cast_description;
-    /// Auxiliary block, stores arguments and results of required CAST calls
+
+    /// Auxiliary block, stores prefilled arguments and result for each CAST function in cast_description
+    /// 3 columns are allocated for each conversion: [blank of source column, column with res type name, blank of res column]
    Block tmp_conversion_block;
 };

--- a/dbms/src/DataStreams/FormatFactory.cpp
+++ b/dbms/src/DataStreams/FormatFactory.cpp
@ -155,9 +155,11 @@ static BlockOutputStreamPtr getOutputImpl(const String & name, WriteBuffer & buf
    else if (name == "PrettySpaceNoEscapes")
        return std::make_shared<PrettySpaceBlockOutputStream>(buf, true, settings.output_format_pretty_max_rows, context);
    else if (name == "Vertical")
-        return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<VerticalRowOutputStream>(buf, sample, context));
+        return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<VerticalRowOutputStream>(
+            buf, sample, settings.output_format_pretty_max_rows, context));
    else if (name == "VerticalRaw")
-        return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<VerticalRawRowOutputStream>(buf, sample, context));
+        return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<VerticalRawRowOutputStream>(
+            buf, sample, settings.output_format_pretty_max_rows, context));
    else if (name == "Values")
        return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<ValuesRowOutputStream>(buf));
    else if (name == "JSON")
--- a/dbms/src/DataStreams/JSONEachRowRowInputStream.cpp
+++ b/dbms/src/DataStreams/JSONEachRowRowInputStream.cpp
@ -124,7 +124,7 @@ bool JSONEachRowRowInputStream::read(Block & block)
    }

    skipWhitespaceIfAny(istr);
-    if (!istr.eof() && *istr.position() == ',')
+    if (!istr.eof() && (*istr.position() == ',' || *istr.position() == ';'))    /// Semicolon is added for convenience as it could be used at end of INSERT query.
        ++istr.position();

    /// Fill non-visited columns with the default values.
--- a/dbms/src/DataStreams/MergingSortedBlockInputStream.cpp
+++ b/dbms/src/DataStreams/MergingSortedBlockInputStream.cpp
@ -11,7 +11,7 @@ namespace ErrorCodes
 {
    extern const int LOGICAL_ERROR;
    extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
-    extern const int BLOCKS_HAS_DIFFERENT_STRUCTURE;
+    extern const int BLOCKS_HAVE_DIFFERENT_STRUCTURE;
 }


@ -130,7 +130,7 @@ void MergingSortedBlockInputStream::init(Block & merged_block, ColumnPlainPtrs &
            {
                throw Exception("Merging blocks has different names or types of columns:\n"
                    + shared_block_ptr->dumpStructure() + "\nand\n" + merged_block.dumpStructure(),
-                    ErrorCodes::BLOCKS_HAS_DIFFERENT_STRUCTURE);
+                    ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE);
            }
        }
    }
--- a/dbms/src/DataStreams/TSKVRowInputStream.cpp
+++ b/dbms/src/DataStreams/TSKVRowInputStream.cpp
@ -10,6 +10,7 @@ namespace ErrorCodes
    extern const int INCORRECT_DATA;
    extern const int CANNOT_PARSE_ESCAPE_SEQUENCE;
    extern const int CANNOT_READ_ALL_DATA;
+    extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
 }


@ -108,6 +109,7 @@ bool TSKVRowInputStream::read(Block & block)
        {
            StringRef name_ref;
            bool has_value = readName(istr, name_ref, name_buf);
+            ssize_t index = -1;

            if (has_value)
            {
@ -126,7 +128,7 @@ bool TSKVRowInputStream::read(Block & block)
                }
                else
                {
-                    size_t index = it->second;
+                    index = it->second;

                    if (read_columns[index])
                        throw Exception("Duplicate field found while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA);
@ -159,7 +161,16 @@ bool TSKVRowInputStream::read(Block & block)
                break;
            }
            else
-                throw Exception("Found garbage after field in TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA);
+            {
+                /// Possibly a garbage was written into column, remove it
+                if (index >= 0)
+                {
+                    block.getByPosition(index).column->popBack(1);
+                    read_columns[index] = false;
+                }
+
+                throw Exception("Found garbage after field in TSKV format: " + name_ref.toString(), ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED);
+            }
        }
    }

--- a/dbms/src/DataStreams/VerticalRowOutputStream.cpp
+++ b/dbms/src/DataStreams/VerticalRowOutputStream.cpp
@ -10,8 +10,9 @@
 namespace DB
 {

-VerticalRowOutputStream::VerticalRowOutputStream(WriteBuffer & ostr_, const Block & sample_, const Context & context)
-    : ostr(ostr_), sample(sample_)
+VerticalRowOutputStream::VerticalRowOutputStream(
+    WriteBuffer & ostr_, const Block & sample_, size_t max_rows_, const Context & context)
+    : ostr(ostr_), sample(sample_), max_rows(max_rows_)
 {
    size_t columns = sample.columns();

@ -60,6 +61,9 @@ void VerticalRowOutputStream::flush()

 void VerticalRowOutputStream::writeField(const IColumn & column, const IDataType & type, size_t row_num)
 {
+    if (row_number > max_rows)
+        return;
+
    writeString(names_and_paddings[field_number], ostr);
    writeValue(column, type, row_num);
    writeChar('\n', ostr);
@ -82,6 +86,10 @@ void VerticalRawRowOutputStream::writeValue(const IColumn & column, const IDataT
 void VerticalRowOutputStream::writeRowStartDelimiter()
 {
    ++row_number;
+
+    if (row_number > max_rows)
+        return;
+
    writeCString("Row ", ostr);
    writeIntText(row_number, ostr);
    writeCString(":\n", ostr);
@ -95,9 +103,77 @@ void VerticalRowOutputStream::writeRowStartDelimiter()

 void VerticalRowOutputStream::writeRowBetweenDelimiter()
 {
+    if (row_number > max_rows)
+        return;
+
    writeCString("\n", ostr);
    field_number = 0;
 }


+void VerticalRowOutputStream::writeSuffix()
+{
+    if (row_number > max_rows)
+    {
+        writeCString("Showed first ", ostr);
+        writeIntText(max_rows, ostr);
+        writeCString(".\n", ostr);
+    }
+
+    if (totals || extremes)
+    {
+        writeCString("\n", ostr);
+        writeTotals();
+        writeExtremes();
+    }
+}
+
+
+void VerticalRowOutputStream::writeSpecialRow(const Block & block, size_t row_num, const char * title)
+{
+    writeCString("\n", ostr);
+
+    row_number = 0;
+    field_number = 0;
+
+    size_t columns = block.columns();
+
+    writeCString(title, ostr);
+    writeCString(":\n", ostr);
+
+    size_t width = strlen(title) + 1;
+    for (size_t i = 0; i < width; ++i)
+        writeCString("─", ostr);
+    writeChar('\n', ostr);
+
+    for (size_t i = 0; i < columns; ++i)
+    {
+        if (i != 0)
+            writeFieldDelimiter();
+
+        auto & col = block.getByPosition(i);
+        writeField(*col.column.get(), *col.type.get(), row_num);
+    }
+}
+
+
+void VerticalRowOutputStream::writeTotals()
+{
+    if (totals)
+    {
+        writeSpecialRow(totals, 0, "Totals");
+    }
+}
+
+
+void VerticalRowOutputStream::writeExtremes()
+{
+    if (extremes)
+    {
+        writeSpecialRow(extremes, 0, "Min");
+        writeSpecialRow(extremes, 1, "Max");
+    }
+}
+
+
 }
--- a/dbms/src/DataStreams/VerticalRowOutputStream.h
+++ b/dbms/src/DataStreams/VerticalRowOutputStream.h
@ -18,24 +18,37 @@ class Context;
 class VerticalRowOutputStream : public IRowOutputStream
 {
 public:
-    VerticalRowOutputStream(WriteBuffer & ostr_, const Block & sample_, const Context & context);
+    VerticalRowOutputStream(WriteBuffer & ostr_, const Block & sample_, size_t max_rows_, const Context & context);

    void writeField(const IColumn & column, const IDataType & type, size_t row_num) override;
    void writeRowStartDelimiter() override;
    void writeRowBetweenDelimiter() override;
+    void writeSuffix() override;

    void flush() override;

+    void setTotals(const Block & totals_) override { totals = totals_; }
+    void setExtremes(const Block & extremes_) override { extremes = extremes_; }
+
 protected:
    virtual void writeValue(const IColumn & column, const IDataType & type, size_t row_num) const;

+    void writeTotals();
+    void writeExtremes();
+    /// For totals and extremes.
+    void writeSpecialRow(const Block & block, size_t row_num, const char * title);
+
    WriteBuffer & ostr;
    const Block sample;
+    size_t max_rows;
    size_t field_number = 0;
    size_t row_number = 0;

    using NamesAndPaddings = std::vector<String>;
    NamesAndPaddings names_and_paddings;
+
+    Block totals;
+    Block extremes;
 };


@ -44,8 +57,7 @@ protected:
 class VerticalRawRowOutputStream final : public VerticalRowOutputStream
 {
 public:
-    VerticalRawRowOutputStream(WriteBuffer & ostr_, const Block & sample_, const Context & context)
-        : VerticalRowOutputStream(ostr_, sample_, context) {}
+    using VerticalRowOutputStream::VerticalRowOutputStream;

 protected:
    void writeValue(const IColumn & column, const IDataType & type, size_t row_num) const override;
--- a/dbms/src/Storages/MergeTree/ActiveDataPartSet.h
+++ b/dbms/src/Storages/MergeTree/ActiveDataPartSet.h
@ -31,24 +31,13 @@ public:

        bool operator<(const Part & rhs) const
        {
-            if (month != rhs.month)
-                return month < rhs.month;
-
-            if (left != rhs.left)
-                return left < rhs.left;
-            if (right != rhs.right)
-                return right < rhs.right;
-
-            if (level != rhs.level)
-                return level < rhs.level;
-
-            return false;
+            return std::tie(month, left, right, level) < std::tie(rhs.month, rhs.left, rhs.right, rhs.level);
        }

-        /// Contains another part (obtained after combining another part with some other)
+        /// Contains another part (obtained after merging another part with some other)
        bool contains(const Part & rhs) const
        {
-            return month == rhs.month        /// Parts for different months are not combined
+            return month == rhs.month        /// Parts for different months are not merged
                && left_date <= rhs.left_date
                && right_date >= rhs.right_date
                && left <= rhs.left
--- a/dbms/src/Storages/StorageBuffer.cpp
+++ b/dbms/src/Storages/StorageBuffer.cpp
@ -39,7 +39,7 @@ namespace DB
 namespace ErrorCodes
 {
    extern const int INFINITE_LOOP;
-    extern const int BLOCKS_HAS_DIFFERENT_STRUCTURE;
+    extern const int BLOCKS_HAVE_DIFFERENT_STRUCTURE;
 }


@ -198,7 +198,7 @@ static void appendBlock(const Block & from, Block & to)

            if (col_from.getName() != col_to.getName())
                throw Exception("Cannot append block to another: different type of columns at index " + toString(column_no)
-                    + ". Block 1: " + from.dumpStructure() + ". Block 2: " + to.dumpStructure(), ErrorCodes::BLOCKS_HAS_DIFFERENT_STRUCTURE);
+                    + ". Block 1: " + from.dumpStructure() + ". Block 2: " + to.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE);

            col_to.insertRangeFrom(col_from, 0, rows);
        }
--- a/dbms/src/Storages/StorageFactory.cpp
+++ b/dbms/src/Storages/StorageFactory.cpp
@ -19,6 +19,7 @@
 #include <Storages/StorageStripeLog.h>
 #include <Storages/StorageMemory.h>
 #include <Storages/StorageBuffer.h>
+#include <Storages/StorageTrivialBuffer.h>
 #include <Storages/StorageNull.h>
 #include <Storages/StorageMerge.h>
 #include <Storages/StorageMergeTree.h>
@ -556,6 +557,54 @@ StoragePtr StorageFactory::get(
            num_buckets, {min_time, min_rows, min_bytes}, {max_time, max_rows, max_bytes},
            destination_database, destination_table);
    }
+    else if (name == "TrivialBuffer")
+    {
+        /** TrivialBuffer(db, table, num_blocks_to_deduplicate, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes, path_in_zookeeper)
+          *
+          * db, table - in which table to put data from buffer.
+          * min_time, max_time, min_rows, max_rows, min_bytes, max_bytes - conditions for pushing out from the buffer.
+          * num_blocks_to_deduplicate - level of parallelism.
+          */
+
+        const std::string error_message_argument_number_mismatch = "Storage TrivialBuffer requires 10 parameters: "
+                " destination database, destination table, num_blocks_to_deduplicate, min_time, max_time, min_rows,"
+                " max_rows, min_bytes, max_bytes, path_in_zookeeper.";
+        ASTs & args_func = typeid_cast<ASTFunction &>(*typeid_cast<ASTCreateQuery &>(*query).storage).children;
+
+        if (args_func.size() != 1)
+            throw Exception(error_message_argument_number_mismatch,
+                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+
+        ASTs & args = typeid_cast<ASTExpressionList &>(*args_func.at(0)).children;
+
+        if (args.size() != 10)
+            throw Exception(error_message_argument_number_mismatch,
+                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+
+        args[0] = evaluateConstantExpressionOrIdentidierAsLiteral(args[0], local_context);
+        args[1] = evaluateConstantExpressionOrIdentidierAsLiteral(args[1], local_context);
+
+        String destination_database = static_cast<const ASTLiteral &>(*args[0]).value.safeGet<String>();
+        String destination_table    = static_cast<const ASTLiteral &>(*args[1]).value.safeGet<String>();
+
+        size_t num_blocks_to_deduplicate = applyVisitor(FieldVisitorConvertToNumber<size_t>(), typeid_cast<ASTLiteral &>(*args[2]).value);
+
+        time_t min_time = applyVisitor(FieldVisitorConvertToNumber<size_t>(), typeid_cast<ASTLiteral &>(*args[3]).value);
+        time_t max_time = applyVisitor(FieldVisitorConvertToNumber<size_t>(), typeid_cast<ASTLiteral &>(*args[4]).value);
+        size_t min_rows = applyVisitor(FieldVisitorConvertToNumber<size_t>(), typeid_cast<ASTLiteral &>(*args[5]).value);
+        size_t max_rows = applyVisitor(FieldVisitorConvertToNumber<size_t>(), typeid_cast<ASTLiteral &>(*args[6]).value);
+        size_t min_bytes = applyVisitor(FieldVisitorConvertToNumber<size_t>(), typeid_cast<ASTLiteral &>(*args[7]).value);
+        size_t max_bytes = applyVisitor(FieldVisitorConvertToNumber<size_t>(), typeid_cast<ASTLiteral &>(*args[8]).value);
+
+        String path_in_zk_for_deduplication = static_cast<const ASTLiteral &>(*args[9]).value.safeGet<String>();
+
+        return StorageTrivialBuffer::create(
+            table_name, columns,
+            materialized_columns, alias_columns, column_defaults,
+            context, num_blocks_to_deduplicate, path_in_zk_for_deduplication,
+            {min_time, min_rows, min_bytes}, {max_time, max_rows, max_bytes},
+            destination_database, destination_table);
+    }
    else if (endsWith(name, "MergeTree"))
    {
        /** [Replicated][|Summing|Collapsing|Aggregating|Unsorted|Replacing|Graphite]MergeTree (2 * 7 combinations) engines
--- a/dbms/src/Storages/StorageTrivialBuffer.cpp
+++ b/dbms/src/Storages/StorageTrivialBuffer.cpp
@ -0,0 +1,561 @@
+#include <Storages/StorageTrivialBuffer.h>
+
+#include <Databases/IDatabase.h>
+#include <DataStreams/IProfilingBlockInputStream.h>
+#include <Common/CurrentMetrics.h>
+#include <common/logger_useful.h>
+#include <Common/setThreadName.h>
+#include <Interpreters/InterpreterAlterQuery.h>
+#include <Interpreters/InterpreterInsertQuery.h>
+#include <Interpreters/InterpreterSelectQuery.h>
+#include <Parsers/ASTExpressionList.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTInsertQuery.h>
+#include <Poco/Ext/ThreadNumber.h>
+
+#include <ext/range.hpp>
+
+
+namespace ProfileEvents
+{
+    extern const Event StorageBufferFlush;
+    extern const Event StorageBufferErrorOnFlush;
+    extern const Event StorageBufferPassedAllMinThresholds;
+    extern const Event StorageBufferPassedTimeMaxThreshold;
+    extern const Event StorageBufferPassedRowsMaxThreshold;
+    extern const Event StorageBufferPassedBytesMaxThreshold;
+}
+
+namespace CurrentMetrics
+{
+    extern const Metric StorageBufferRows;
+    extern const Metric StorageBufferBytes;
+}
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int INFINITE_LOOP;
+    extern const int BLOCKS_HAVE_DIFFERENT_STRUCTURE;
+}
+
+
+StoragePtr StorageTrivialBuffer::create(const std::string & name_, NamesAndTypesListPtr columns_,
+    const NamesAndTypesList & materialized_columns_,
+    const NamesAndTypesList & alias_columns_,
+    const ColumnDefaults & column_defaults_,
+    Context & context_, const size_t num_blocks_to_deduplicate_,
+    const String & path_in_zk_for_deduplication_,
+    const Thresholds & min_thresholds_, const Thresholds & max_thresholds_,
+    const String & destination_database_, const String & destination_table_)
+{
+    return make_shared(
+        name_, columns_, materialized_columns_, alias_columns_, column_defaults_,
+        context_, num_blocks_to_deduplicate_, path_in_zk_for_deduplication_,
+        min_thresholds_, max_thresholds_,
+        destination_database_, destination_table_);
+}
+
+
+StorageTrivialBuffer::StorageTrivialBuffer(const std::string & name_, NamesAndTypesListPtr columns_,
+    const NamesAndTypesList & materialized_columns_,
+    const NamesAndTypesList & alias_columns_,
+    const ColumnDefaults & column_defaults_,
+    Context & context_, const size_t num_blocks_to_deduplicate_,
+    const String & path_in_zk_for_deduplication_,
+    const Thresholds & min_thresholds_, const Thresholds & max_thresholds_,
+    const String & destination_database_, const String & destination_table_)
+    : IStorage{materialized_columns_, alias_columns_, column_defaults_},
+    name(name_), columns(columns_), context(context_),
+    num_blocks_to_deduplicate(num_blocks_to_deduplicate_),
+    path_in_zk_for_deduplication(path_in_zk_for_deduplication_),
+    zookeeper(context.getZooKeeper()),
+    deduplication_controller(num_blocks_to_deduplicate, zookeeper, path_in_zk_for_deduplication),
+    min_thresholds(min_thresholds_), max_thresholds(max_thresholds_),
+    destination_database(destination_database_), destination_table(destination_table_),
+    no_destination(destination_database.empty() && destination_table.empty()),
+    log(&Logger::get("TrivialBuffer (" + name + ")")),
+    flush_thread(&StorageTrivialBuffer::flushThread, this)
+{
+    zookeeper->createAncestors(path_in_zk_for_deduplication);
+    zookeeper->createOrUpdate(path_in_zk_for_deduplication, {}, zkutil::CreateMode::Persistent);
+}
+
+class TrivialBufferBlockInputStream : public IProfilingBlockInputStream
+{
+public:
+    TrivialBufferBlockInputStream(const Names & column_names_, BlocksList::iterator begin_,
+        BlocksList::iterator end_, StorageTrivialBuffer & buffer_)
+        :  column_names(column_names_), buffer(buffer_),
+        begin(begin_), end(end_), it(begin_) {}
+
+    String getName() const { return "TrivialStorageBuffer"; }
+
+    String getID() const
+    {
+        std::stringstream res;
+        res << "TrivialStorageBuffer(" << &buffer;
+
+        for (const auto & name : column_names)
+            res << ", " << name;
+
+        res << ")";
+        return res.str();
+    }
+
+protected:
+    Block readImpl()
+    {
+        Block res;
+
+        if (it == end)
+            return res;
+
+        for (const auto & column : column_names)
+            res.insert(it->getByName(column));
+        ++it;
+
+        return res;
+    }
+
+private:
+    Names column_names;
+    StorageTrivialBuffer & buffer;
+    BlocksList::iterator begin, end, it;
+};
+
+BlockInputStreams StorageTrivialBuffer::read(
+    const Names & column_names,
+    ASTPtr query,
+    const Context & context,
+    const Settings & settings,
+    QueryProcessingStage::Enum & processed_stage,
+    size_t max_block_size,
+    unsigned threads)
+{
+    check(column_names);
+    processed_stage = QueryProcessingStage::FetchColumns;
+
+    BlockInputStreams streams;
+
+    if (!no_destination)
+    {
+        auto destination = context.getTable(destination_database, destination_table);
+
+        if (destination.get() == this)
+            throw Exception("Destination table is myself. Read will cause infinite loop.",
+                    ErrorCodes::INFINITE_LOOP);
+
+        /** TrivialStorageBuffer does not support 'PREWHERE',
+          * so turn off corresponding optimization.
+          */
+        Settings modified_settings = settings;
+        modified_settings.optimize_move_to_prewhere = false;
+
+        streams = destination->read(column_names, query, context, modified_settings,
+            processed_stage, max_block_size, threads);
+    }
+
+    BlockInputStreams streams_from_buffers;
+    std::lock_guard<std::mutex> lock(mutex);
+    size_t size = data.size();
+    if (threads > size)
+        threads = size;
+
+    for (size_t thread = 0; thread < threads; ++thread)
+    {
+        BlocksList::iterator begin = data.begin();
+        BlocksList::iterator end = data.begin();
+
+        std::advance(begin, thread * size / threads);
+        std::advance(end, (thread + 1) * size / threads);
+
+        streams_from_buffers.push_back(std::make_shared<TrivialBufferBlockInputStream>(column_names, begin, end, *this));
+    }
+
+    /** If sources from destination table are already processed to non-starting stage, then we should wrap
+      * sources from the buffer to the same stage of processing conveyor.
+      */
+    if (processed_stage > QueryProcessingStage::FetchColumns)
+        for (auto & stream : streams_from_buffers)
+            stream = InterpreterSelectQuery(query, context, processed_stage, 0, stream).execute().in;
+
+    streams.insert(streams.end(), streams_from_buffers.begin(), streams_from_buffers.end());
+    return streams;
+}
+
+template <typename DeduplicatioController>
+void StorageTrivialBuffer::addBlock(const Block & block, DeduplicatioController & deduplication_controller)
+{
+    SipHash hash;
+    block.updateHash(hash);
+    typename DeduplicatioController::HashType block_hash = DeduplicatioController::getHashFrom(hash);
+
+    std::lock_guard<std::mutex> lock(mutex);
+    if (!deduplication_controller.contains(block_hash))
+    {
+        deduplication_controller.insert(block_hash);
+        current_rows += block.rows();
+        current_bytes += block.bytes();
+        data.push_back(block);
+
+        CurrentMetrics::add(CurrentMetrics::StorageBufferRows, current_rows);
+        CurrentMetrics::add(CurrentMetrics::StorageBufferBytes, current_bytes);
+    }
+    else
+    {
+        deduplication_controller.updateOnDeduplication(block_hash);
+    }
+}
+
+void StorageTrivialBuffer::flush(bool check_thresholds, bool is_called_from_background)
+{
+    Block block_to_write;
+    time_t current_time = time(0);
+
+    time_t time_passed = 0;
+
+    if (data.empty())
+        return;
+
+    BlocksList::iterator flush_begin, flush_end;
+    {
+        std::unique_lock<std::mutex> lock(mutex, std::try_to_lock_t());
+
+        if (!lock.owns_lock())
+        {
+            // NOTE: is this the behavior we expect from 'flush' concurrency?
+            if (!is_called_from_background)
+                LOG_ERROR(log, "Method \'StorageTrivialBuffer::flush\' was called simultaneously from different threads");
+            return;
+        }
+
+        if (first_write_time)
+            time_passed = current_time - first_write_time;
+
+        if (check_thresholds)
+        {
+            if (!checkThresholdsImpl(current_rows, current_bytes, time_passed))
+                return;
+        }
+        else
+        {
+            if (current_rows == 0)
+                return;
+        }
+
+        flush_begin = data.begin();
+        flush_end = std::prev(data.end());
+        block_to_write = flush_begin->cloneEmpty();
+    }
+
+    /// Collecting BlockList into single block.
+    block_to_write.checkNumberOfRows();
+    flush_end = std::next(flush_end);
+    for (auto block = flush_begin; block != flush_end; ++block)
+    {
+        block->checkNumberOfRows();
+        for (size_t column_no = 0, columns = block->columns(); column_no < columns; ++column_no)
+        {
+            IColumn & col_to = *block_to_write.safeGetByPosition(column_no).column.get();
+            const IColumn & col_from = *block->getByName(col_to.getName()).column.get();
+
+            col_to.insertRangeFrom(col_from, 0, block->rows());
+        }
+
+    }
+    first_write_time = 0;
+
+    ProfileEvents::increment(ProfileEvents::StorageBufferFlush);
+
+    LOG_TRACE(log, "Flushing buffer with " << block_to_write.rows() << " rows, " << block_to_write.bytes() << " bytes, age " << time_passed << " seconds.");
+
+    if (no_destination)
+        return;
+
+    try
+    {
+        writeBlockToDestination(block_to_write, context.tryGetTable(destination_database, destination_table));
+        data.erase(flush_begin, flush_end);
+
+        CurrentMetrics::sub(CurrentMetrics::StorageBufferRows, block_to_write.rows());
+        CurrentMetrics::sub(CurrentMetrics::StorageBufferBytes, block_to_write.bytes());
+
+    }
+    catch (...)
+    {
+        ProfileEvents::increment(ProfileEvents::StorageBufferErrorOnFlush);
+
+        if (!first_write_time)
+            first_write_time = current_time;
+
+        /// We'll retry to write in a moment.
+        throw;
+    }
+
+}
+
+class TrivialBufferBlockOutputStream : public IBlockOutputStream
+{
+public:
+    TrivialBufferBlockOutputStream(StorageTrivialBuffer & buffer_) : buffer(buffer_) {}
+    void write(const Block & block) override
+    {
+        if (!block)
+            return;
+
+        size_t rows = block.rows();
+        size_t bytes = block.bytes();
+        if (!rows)
+            return;
+
+        StoragePtr destination;
+        if (!buffer.no_destination)
+        {
+            destination = buffer.context.tryGetTable(buffer.destination_database,
+                buffer.destination_table);
+
+            if (destination)
+            {
+                if (destination.get() == &buffer)
+                    throw Exception("Destination table is myself. Write will "
+                        "cause infinite loop.", ErrorCodes::INFINITE_LOOP);
+
+                try
+                {
+                    destination->check(block, true);
+                }
+                catch (Exception & e)
+                {
+                    e.addMessage("(when looking at destination table "
+                        + buffer.destination_database + "."
+                        + buffer.destination_table + ")");
+                    throw;
+                }
+            }
+        }
+
+        time_t current_time = time(0);
+        if (buffer.checkThresholds(current_time, rows, bytes))
+        {
+            /** We'll try to flush the buffer if thresholds are overdrafted.
+              * It avoids unlimited memory consuming, bcause if we failed to write
+              * data down to the destination table, we'll throw an exception and
+              * the new block will not be appended to the buffer.
+              */
+
+            buffer.flush(true);
+        }
+
+        if (!buffer.first_write_time)
+            buffer.first_write_time = current_time;
+
+        buffer.addBlock/*<StorageTrivialBuffer::ZookeeperDeduplicationController>*/(block, buffer.deduplication_controller);
+    }
+private:
+    StorageTrivialBuffer & buffer;
+};
+
+BlockOutputStreamPtr StorageTrivialBuffer::write(const ASTPtr & query, const Settings & settings)
+{
+    return std::make_shared<TrivialBufferBlockOutputStream>(*this);
+}
+
+void StorageTrivialBuffer::shutdown()
+{
+    shutdown_event.set();
+
+    if (flush_thread.joinable())
+        flush_thread.join();
+
+    try
+    {
+        flush(false);
+    }
+    catch (...)
+    {
+        tryLogCurrentException(__PRETTY_FUNCTION__);
+    }
+
+}
+
+/** NOTE If you do OPTIMIZE after insertion,
+  * it does not guarantee that all data will be in destination table at the time of
+  * next SELECT just after OPTIMIZE.
+  *
+  * Because in case if there was already running flush method,
+  *  then call to flush inside OPTIMIZE will see empty buffer and return quickly,
+  *  but at the same time, the already running flush method possibly is not finished,
+  *  so next SELECT will observe missing data.
+  *
+  * This kind of race condition make very hard to implement proper tests.
+  */
+bool StorageTrivialBuffer::optimize(const String & partition, bool final, bool deduplicate, const Settings & settings)
+{
+    if (!partition.empty())
+        throw Exception("Partition cannot be specified when optimizing table of type TrivialBuffer",
+            ErrorCodes::NOT_IMPLEMENTED);
+
+    if (final)
+        throw Exception("FINAL cannot be specified when optimizing table of type TrivialBuffer",
+            ErrorCodes::NOT_IMPLEMENTED);
+
+    if (deduplicate)
+        throw Exception("DEDUPLICATE cannot be specified when optimizing table of type TrivialBuffer",
+            ErrorCodes::NOT_IMPLEMENTED);
+
+    flush(false);
+    return true;
+}
+
+
+
+bool StorageTrivialBuffer::checkThresholds(
+    const time_t current_time, const size_t additional_rows, const size_t additional_bytes) const
+{
+    time_t time_passed = 0;
+    if (first_write_time)
+        time_passed = current_time - first_write_time;
+
+    size_t rows = current_rows + additional_rows;
+    size_t bytes = current_bytes + additional_bytes;
+
+    return checkThresholdsImpl(rows, bytes, time_passed);
+
+}
+
+bool StorageTrivialBuffer::checkThresholdsImpl(const size_t rows, const size_t bytes,
+                    const time_t time_passed) const
+{
+    if (time_passed > min_thresholds.time && rows > min_thresholds.rows && bytes > min_thresholds.bytes)
+    {
+        ProfileEvents::increment(ProfileEvents::StorageBufferPassedAllMinThresholds);
+        return true;
+    }
+
+    if (time_passed > max_thresholds.time)
+    {
+        ProfileEvents::increment(ProfileEvents::StorageBufferPassedTimeMaxThreshold);
+        return true;
+    }
+
+    if (rows > max_thresholds.rows)
+    {
+        ProfileEvents::increment(ProfileEvents::StorageBufferPassedRowsMaxThreshold);
+        return true;
+    }
+
+    if (bytes > max_thresholds.bytes)
+    {
+        ProfileEvents::increment(ProfileEvents::StorageBufferPassedBytesMaxThreshold);
+        return true;
+    }
+
+    return false;
+}
+
+void StorageTrivialBuffer::flushThread()
+{
+    setThreadName("BufferFlush");
+
+    do
+    {
+        try
+        {
+            flush(true);
+        }
+        catch (...)
+        {
+            tryLogCurrentException(__PRETTY_FUNCTION__);
+        }
+    }
+    while (!shutdown_event.tryWait(1000));
+}
+
+void StorageTrivialBuffer::writeBlockToDestination(const Block & block, StoragePtr table)
+{
+    if (no_destination || !block)
+        return;
+
+    if (!table)
+    {
+        LOG_ERROR(log, "Destination table " << destination_database << "." << destination_table << " doesn't exist. Block of data is discarded.");
+        return;
+    }
+
+    auto insert = std::make_shared<ASTInsertQuery>();
+
+    insert->database = destination_database;
+    insert->table = destination_table;
+
+    /** Inserting the set columns which is the intersection of buffer columns and destination table ones.
+      * It will help us to support some cases with different tables' structures.
+      */
+    Block structure_of_destination_table = table->getSampleBlock();
+    Names columns_intersection;
+    columns_intersection.reserve(block.columns());
+    for (size_t i : ext::range(0, structure_of_destination_table.columns()))
+    {
+        auto dst_col = structure_of_destination_table.getByPosition(i);
+        if (block.has(dst_col.name))
+        {
+            if (block.getByName(dst_col.name).type->getName() != dst_col.type->getName())
+            {
+                LOG_ERROR(log, "Destination table " << destination_database << "." << destination_table
+                    << " have different type of column " << dst_col.name << ". Block of data is discarded.");
+                return;
+            }
+
+            columns_intersection.push_back(dst_col.name);
+        }
+    }
+
+    if (columns_intersection.empty())
+    {
+        LOG_ERROR(log, "Destination table " << destination_database << "." << destination_table << " have no common columns with block in buffer. Block of data is discarded.");
+        return;
+    }
+
+    if (columns_intersection.size() != block.columns())
+        LOG_WARNING(log, "Not all columns from block in buffer exist in destination table "
+            << destination_database << "." << destination_table << ". Some columns are discarded.");
+
+    auto list_of_columns = std::make_shared<ASTExpressionList>();
+    insert->columns = list_of_columns;
+    list_of_columns->children.reserve(columns_intersection.size());
+    for (const String & column : columns_intersection)
+        list_of_columns->children.push_back(std::make_shared<ASTIdentifier>(StringRange(), column, ASTIdentifier::Column));
+
+    InterpreterInsertQuery interpreter{insert, context};
+
+    auto block_io = interpreter.execute();
+    block_io.out->writePrefix();
+    block_io.out->write(block);
+    block_io.out->writeSuffix();
+}
+
+void StorageTrivialBuffer::alter(
+    const AlterCommands & params, const String & database_name,
+    const String & table_name, const Context & context)
+{
+    for (const auto & param : params)
+        if (param.type == AlterCommand::MODIFY_PRIMARY_KEY)
+            throw Exception("Storage engine " + getName() + " doesn't support primary key.",
+                    ErrorCodes::NOT_IMPLEMENTED);
+
+    auto lock = lockStructureForAlter();
+
+    /// To avoid presence of blocks of different structure in the buffer.
+    flush(false);
+
+    params.apply(*columns, materialized_columns, alias_columns, column_defaults);
+
+    context.getDatabase(database_name)->alterTable(
+        context, table_name,
+        *columns, materialized_columns, alias_columns, column_defaults, {});
+}
+
+}
--- a/dbms/src/Storages/StorageTrivialBuffer.h
+++ b/dbms/src/Storages/StorageTrivialBuffer.h
@ -0,0 +1,234 @@
+#pragma once
+
+#include <mutex>
+#include <thread>
+
+#include <Common/SipHash.h>
+#include <Core/NamesAndTypes.h>
+#include <DataStreams/IBlockOutputStream.h>
+#include <ext/shared_ptr_helper.hpp>
+#include <Poco/Event.h>
+#include <Storages/IStorage.h>
+
+#include <zkutil/ZooKeeper.h>
+
+namespace Poco { class Logger; }
+
+namespace DB
+{
+
+class Context;
+
+/** Stores incoming blocks until some thresholds are exceeded, then sends
+  * them to the table it looks into in the same order they came to the buffer.
+  *
+  * Thresolds are checked during insert and in background thread (to control
+  * time thresholds).
+  * If inserted block exceedes max limits, buffer is flushed and then the incoming
+  * block is appended to buffer.
+  *
+  * Destroying TrivialBuffer or shutting down lead to the buffer flushing.
+  * The data in the buffer is not replicated, logged or stored. After hard reset of the
+  * server, the data is lost.
+  */
+class StorageTrivialBuffer : private ext::shared_ptr_helper<StorageTrivialBuffer>, public IStorage
+{
+friend class ext::shared_ptr_helper<StorageTrivialBuffer>;
+friend class TrivialBufferBlockInputStream;
+friend class TrivialBufferBlockOutputStream;
+
+public:
+    struct Thresholds
+    {
+        time_t time;    /// Seconds after insertion of first block.
+        size_t rows;    /// Number of rows in buffer.
+        size_t bytes;    /// Number of bytes (incompressed) in buffer.
+    };
+
+    static StoragePtr create(const std::string & name_, NamesAndTypesListPtr columns_,
+        const NamesAndTypesList & materialized_columns_,
+        const NamesAndTypesList & alias_columns_,
+        const ColumnDefaults & column_defaults_,
+        Context & context_, size_t num_blocks_to_deduplicate_,
+        const String & path_in_zk_for_deduplication_,
+        const Thresholds & min_thresholds_, const Thresholds & max_thresholds_,
+        const String & destination_database_, const String & destination_table_);
+
+    std::string getName() const override { return "TrivialBuffer"; }
+    std::string getTableName() const override { return name; }
+
+    const NamesAndTypesList & getColumnsListImpl() const override { return *columns; }
+
+    BlockInputStreams read(
+        const Names & column_names,
+        ASTPtr query,
+        const Context & context,
+        const Settings & settings,
+        QueryProcessingStage::Enum & processed_stage,
+        size_t max_block_size = DEFAULT_BLOCK_SIZE,
+        unsigned threads = 1) override;
+
+    BlockOutputStreamPtr write(const ASTPtr & query, const Settings & settings) override;
+
+    bool checkThresholds(const time_t current_time, const size_t additional_rows = 0,
+                const size_t additional_bytes = 0) const;
+    bool checkThresholdsImpl(const size_t rows, const size_t bytes,
+                const time_t time_passed) const;
+
+    /// Writes all the blocks in buffer into the destination table.
+    void shutdown() override;
+    bool optimize(const String & partition, bool final, bool deduplicate, const Settings & settings) override;
+
+    void rename(const String & new_path_to_db, const String & new_database_name,
+            const String & new_table_name) override { name = new_table_name; }
+
+    bool supportsSampling() const override { return true; }
+    bool supportsPrewhere() const override { return true; }
+    bool supportsFinal() const override { return true; }
+    bool supportsIndexForIn() const override { return true; }
+    bool supportsParallelReplicas() const override { return true; }
+
+    /// Does not check or alter the structure of dependent table.
+    void alter(const AlterCommands & params, const String & database_name,
+            const String & table_name, const Context & context) override;
+
+    class ZookeeperDeduplicationController
+    {
+    public:
+        using HashType = String;
+
+        static HashType getHashFrom(SipHash & hash) { return std::to_string(hash.get64()); }
+
+        bool contains(HashType block_hash)
+        {
+            std::string res;
+            return zookeeper->tryGet(path_in_zk_for_deduplication + "/" + block_hash, res);
+        }
+
+        void insert(HashType block_hash)
+        {
+            std::vector<String> current_hashes;
+            if (zookeeper->tryGetChildren(path_in_zk_for_deduplication, current_hashes) == ZNONODE)
+            {
+                throw DB::Exception("No node \'" + path_in_zk_for_deduplication + "\' to control deduplication.");
+            }
+
+            // Cleanup zookeeper if needed.
+            if (current_hashes.size() >= 2*num_blocks_to_deduplicate)
+            {
+                using HashWithTimestamp = std::pair<String, time_t>;
+                std::vector<HashWithTimestamp> hashes_with_timestamps;
+                for (auto & hash : current_hashes)
+                {
+                    zkutil::Stat stat;
+                    String res;
+                    String path_in_zk = path_in_zk_for_deduplication + "/" + hash;
+                    if (!zookeeper->tryGet(path_in_zk, res, &stat))
+                    {
+                        throw DB::Exception("Seems like a race conditions between replics was found, path: " + path_in_zk);
+                    }
+                    hashes_with_timestamps.emplace_back(path_in_zk, stat.ctime);
+                }
+                // We do not need to sort all the hashes, only 'num_blocks_to_deduplicate' hashes
+                // with minimum creation time.
+                auto hashes_with_timestamps_end = hashes_with_timestamps.end();
+                if (hashes_with_timestamps.size() > num_blocks_to_deduplicate)
+                    hashes_with_timestamps_end = hashes_with_timestamps.begin() + num_blocks_to_deduplicate;
+                std::partial_sort(hashes_with_timestamps.begin(), hashes_with_timestamps_end, hashes_with_timestamps.end(),
+                    [] (const HashWithTimestamp & a, const HashWithTimestamp & b) -> bool
+                    {
+                        return a.second > b.second;
+                    }
+                );
+                zkutil::Ops nodes_to_remove;
+                for (auto it = hashes_with_timestamps.begin(); it != hashes_with_timestamps_end; ++it)
+                {
+                    nodes_to_remove.emplace_back(std::make_unique<zkutil::Op::Remove>(it->first, -1));
+                }
+                zookeeper->tryMulti(nodes_to_remove);
+            }
+
+            // Finally, inserting new node.
+            std::string path_for_insert = path_in_zk_for_deduplication + "/" + block_hash;
+            if (zookeeper->tryCreate(path_for_insert, {},
+                zkutil::CreateMode::Persistent) != ZOK)
+            {
+                throw DB::Exception("Cannot create node at path: " + path_for_insert);
+            }
+
+        }
+
+        void updateOnDeduplication(HashType block_hash)
+        {
+            zookeeper->createOrUpdate(path_in_zk_for_deduplication + "/" + block_hash,
+                                      {}, zkutil::CreateMode::Persistent);
+        }
+
+        ZookeeperDeduplicationController(size_t num_blocks_to_deduplicate_, zkutil::ZooKeeperPtr zookeeper_,
+                                     const std::string & path_in_zk_for_deduplication_)
+        : num_blocks_to_deduplicate(num_blocks_to_deduplicate_),
+        zookeeper(zookeeper_), path_in_zk_for_deduplication(path_in_zk_for_deduplication_)
+        { }
+
+    private:
+        using DeduplicationBuffer = std::unordered_set<HashType>;
+
+        size_t num_blocks_to_deduplicate;
+        zkutil::ZooKeeperPtr zookeeper;
+        const std::string path_in_zk_for_deduplication;
+    };
+
+
+private:
+    String name;
+    NamesAndTypesListPtr columns;
+
+    Context & context;
+
+    std::mutex mutex;
+
+    BlocksList data;
+
+    size_t current_rows = 0;
+    size_t current_bytes = 0;
+    time_t first_write_time = 0;
+    const size_t num_blocks_to_deduplicate;
+    const String path_in_zk_for_deduplication;
+    zkutil::ZooKeeperPtr zookeeper;
+    ZookeeperDeduplicationController deduplication_controller;
+
+    const Thresholds min_thresholds;
+    const Thresholds max_thresholds;
+
+    const String destination_database;
+    const String destination_table;
+    /// If set, forces to clean out buffer, not write to destination table.
+    bool no_destination;
+
+    Poco::Logger * log;
+
+    Poco::Event shutdown_event;
+    /// Executes flushing by the time thresholds.
+    std::thread flush_thread;
+
+    StorageTrivialBuffer(const std::string & name_, NamesAndTypesListPtr columns_,
+        const NamesAndTypesList & materialized_columns_,
+        const NamesAndTypesList & alias_columns_,
+        const ColumnDefaults & column_defaults_,
+        Context & context_, size_t num_blocks_to_deduplicate_,
+        const String & path_in_zk_for_deduplication_,
+        const Thresholds & min_thresholds_, const Thresholds & max_thresholds_,
+        const String & destination_database_, const String & destination_table_);
+
+    template <typename DeduplicatioController>
+    void addBlock(const Block & block, DeduplicatioController & deduplication_controller);
+    /// Parameter 'table' is passed because it's sometimes pre-computed. It should
+    /// conform the 'destination_table'.
+    void writeBlockToDestination(const Block & block, StoragePtr table);
+
+
+    void flush(bool check_thresholds = true, bool is_called_from_background = false);
+    void flushThread();
+};
+
+}
--- a/dbms/tests/integration/README.md
+++ b/dbms/tests/integration/README.md
@ -0,0 +1,35 @@
+## ClickHouse integration tests
+
+This directory contains tests that involve several ClickHouse instances, custom configs, ZooKeeper, etc.
+
+### Running
+
+Prerequisites:
+* [docker](https://www.docker.com/community-edition#/download). Minimum required API version: 1.25, check with `docker version`.
+* [docker-compose](https://docs.docker.com/compose/). To install: `sudo pip install docker-compose`
+* [py.test](https://docs.pytest.org/) testing framework. To install: `sudo pip install pytest`
+
+If you want to run the tests under a non-privileged user, you must add this user to `docker` group: `sudo usermod -aG docker $USER`.
+
+Run the tests with the `pytest` command. To select which tests to run, use: `pytest -k <test_name_pattern>`
+
+By default tests are run with system-wide client binary, server binary and base configs. To change that,
+set the following environment variables:
+* `CLICKHOUSE_TESTS_SERVER_BIN_PATH` to choose the server binary.
+* `CLICKHOUSE_TESTS_CLIENT_BIN_PATH` to choose the client binary.
+* `CLICKHOUSE_TESTS_BASE_CONFIG_DIR` to choose the directory from which base configs (`config.xml` and
+  `users.xml`) are taken.
+
+### Adding new tests
+
+To add new test named `foo`, create a directory `test_foo` with an empty `__init__.py` and a file
+named `test.py` containing tests in it. All functions with names starting with `test` will become test cases.
+
+`helpers` directory contains utilities for:
+* Launching a ClickHouse cluster with or without ZooKeeper in docker containers.
+* Sending queries to launched instances.
+* Introducing network failures such as severing network link between two instances.
+
+To assert that two TSV files must be equal, wrap them in the `TSV` class and use the regular `assert`
+statement. Example: `assert TSV(result) == TSV(reference)`. In case the assertion fails, `pytest`
+will automagically detect the types of variables and only the small diff of two files is printed.
--- a/dbms/tests/integration/conftest.py
+++ b/dbms/tests/integration/conftest.py
@ -0,0 +1,5 @@
+from helpers.test_tools import TSV
+
+def pytest_assertrepr_compare(op, left, right):
+    if isinstance(left, TSV) and isinstance(right, TSV) and op == '==':
+        return ['TabSeparated values differ: '] + left.diff(right)
--- a/dbms/tests/integration/helpers/init.py
+++ b/dbms/tests/integration/helpers/init.py
--- a/dbms/tests/integration/helpers/client.py
+++ b/dbms/tests/integration/helpers/client.py
@ -0,0 +1,44 @@
+import errno
+import subprocess as sp
+from threading import Timer
+
+
+class Client:
+    def __init__(self, host, port=9000, command='/usr/bin/clickhouse-client'):
+        self.host = host
+        self.port = port
+        self.command = [command, '--host', self.host, '--port', str(self.port)]
+
+    def query(self, sql, stdin=None, timeout=10.0):
+        if stdin is None:
+            command = self.command + ['--multiquery']
+            stdin = sql
+        else:
+            command = self.command + ['--query', sql]
+
+        process = sp.Popen(command, stdin=sp.PIPE, stdout=sp.PIPE, stderr=sp.PIPE)
+
+        timer = None
+        if timeout is not None:
+            def kill_process():
+                try:
+                    process.kill()
+                except OSError as e:
+                    if e.errno != errno.ESRCH:
+                        raise
+
+            timer = Timer(timeout, kill_process)
+            timer.start()
+
+        stdout, stderr = process.communicate(stdin)
+
+        if timer is not None:
+            if timer.finished.is_set():
+                raise Exception('Client timed out!')
+            else:
+                timer.cancel()
+
+        if process.returncode != 0:
+            raise Exception('Client failed! return code: {}, stderr: {}'.format(process.returncode, stderr))
+
+        return stdout
--- a/dbms/tests/integration/helpers/cluster.py
+++ b/dbms/tests/integration/helpers/cluster.py
@ -0,0 +1,229 @@
+import os
+import os.path as p
+import re
+import subprocess
+import shutil
+import socket
+import time
+import errno
+
+import docker
+
+from .client import Client
+
+
+HELPERS_DIR = p.dirname(__file__)
+
+
+class ClickHouseCluster:
+    """ClickHouse cluster with several instances and (possibly) ZooKeeper.
+
+    Add instances with several calls to add_instance(), then start them with the start() call.
+
+    Directories for instances are created in the directory of base_path. After cluster is started,
+    these directories will contain logs, database files, docker-compose config, ClickHouse configs etc.
+    """
+
+    def __init__(self, base_path, base_configs_dir=None, server_bin_path=None, client_bin_path=None):
+        self.base_dir = p.dirname(base_path)
+
+        self.base_configs_dir = base_configs_dir or os.environ.get('CLICKHOUSE_TESTS_BASE_CONFIG_DIR', '/etc/clickhouse-server/')
+        self.server_bin_path = server_bin_path or os.environ.get('CLICKHOUSE_TESTS_SERVER_BIN_PATH', '/usr/bin/clickhouse')
+        self.client_bin_path = client_bin_path or os.environ.get('CLICKHOUSE_TESTS_CLIENT_BIN_PATH', '/usr/bin/clickhouse-client')
+
+        self.project_name = os.getlogin() + p.basename(self.base_dir)
+        # docker-compose removes everything non-alphanumeric from project names so we do it too.
+        self.project_name = re.sub(r'[^a-z0-9]', '', self.project_name.lower())
+
+        self.base_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', self.project_name]
+        self.instances = {}
+        self.with_zookeeper = False
+        self.is_up = False
+
+
+    def add_instance(self, name, custom_configs, with_zookeeper=False):
+        """Add an instance to the cluster.
+
+        name - the name of the instance directory and the value of the 'instance' macro in ClickHouse.
+        custom_configs - a list of config files that will be added to config.d/ directory
+        with_zookeeper - if True, add ZooKeeper configuration to configs and ZooKeeper instances to the cluster.
+        """
+
+        if self.is_up:
+            raise Exception('Can\'t add instance %s: cluster is already up!' % name)
+
+        if name in self.instances:
+            raise Exception('Can\'t add instance %s: there is already an instance with the same name!' % name)
+
+        instance = ClickHouseInstance(self.base_dir, name, custom_configs, with_zookeeper, self.base_configs_dir, self.server_bin_path)
+        self.instances[name] = instance
+        self.base_cmd.extend(['--file', instance.docker_compose_path])
+        if with_zookeeper and not self.with_zookeeper:
+            self.with_zookeeper = True
+            self.base_cmd.extend(['--file', p.join(HELPERS_DIR, 'docker_compose_zookeeper.yml')])
+
+        return instance
+
+
+    def start(self, destroy_dirs=True):
+        if self.is_up:
+            return
+
+        for instance in self.instances.values():
+            instance.create_dir(destroy_dir=destroy_dirs)
+
+        subprocess.check_call(self.base_cmd + ['up', '-d'])
+
+        docker_client = docker.from_env()
+        for instance in self.instances.values():
+            # According to how docker-compose names containers.
+            instance.docker_id = self.project_name + '_' + instance.name + '_1'
+
+            container = docker_client.containers.get(instance.docker_id)
+            instance.ip_address = container.attrs['NetworkSettings']['Networks'].values()[0]['IPAddress']
+
+            instance.wait_for_start()
+
+            instance.client = Client(instance.ip_address, command=self.client_bin_path)
+
+        self.is_up = True
+
+
+    def shutdown(self, kill=True):
+        if kill:
+            subprocess.check_call(self.base_cmd + ['kill'])
+        subprocess.check_call(self.base_cmd + ['down', '--volumes'])
+        self.is_up = False
+
+        for instance in self.instances.values():
+            instance.docker_id = None
+            instance.ip_address = None
+            instance.client = None
+
+
+DOCKER_COMPOSE_TEMPLATE = '''
+version: '2'
+services:
+    {name}:
+        image: ubuntu:14.04
+        user: '{uid}'
+        volumes:
+            - {binary_path}:/usr/bin/clickhouse:ro
+            - {configs_dir}:/etc/clickhouse-server/
+            - {db_dir}:/var/lib/clickhouse/
+            - {logs_dir}:/var/log/clickhouse-server/
+        entrypoint:
+            -  /usr/bin/clickhouse
+            -  --config-file=/etc/clickhouse-server/config.xml
+            -  --log-file=/var/log/clickhouse-server/clickhouse-server.log
+        depends_on: {depends_on}
+'''
+
+MACROS_CONFIG_TEMPLATE = '''
+<yandex>
+    <macros>
+        <instance>{name}</instance>
+    </macros>
+</yandex>
+'''
+
+class ClickHouseInstance:
+    def __init__(
+            self, base_path, name, custom_configs, with_zookeeper,
+            base_configs_dir, server_bin_path):
+
+        self.name = name
+        self.custom_config_paths = [p.abspath(p.join(base_path, c)) for c in custom_configs]
+        self.with_zookeeper = with_zookeeper
+
+        self.base_configs_dir = base_configs_dir
+        self.server_bin_path = server_bin_path
+
+        self.path = p.abspath(p.join(base_path, name))
+        self.docker_compose_path = p.join(self.path, 'docker_compose.yml')
+
+        self.docker_id = None
+        self.ip_address = None
+        self.client = None
+
+
+    def query(self, sql, stdin=None):
+        return self.client.query(sql, stdin)
+
+
+    def wait_for_start(self, timeout=10.0):
+        deadline = time.time() + timeout
+        while True:
+            if time.time() >= deadline:
+                raise Exception("Timed out while waiting for instance {} with ip address {} to start".format(self.name, self.ip_address))
+
+            # Repeatedly poll the instance address until there is something that listens there.
+            # Usually it means that ClickHouse is ready to accept queries.
+            try:
+                sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+                sock.connect((self.ip_address, 9000))
+                return
+            except socket.error as e:
+                if e.errno == errno.ECONNREFUSED:
+                    time.sleep(0.1)
+                else:
+                    raise
+            finally:
+                sock.close()
+
+
+    def create_dir(self, destroy_dir=True):
+        """Create the instance directory and all the needed files there."""
+
+        if destroy_dir:
+            self.destroy_dir()
+        elif p.exists(self.path):
+            return
+
+        os.mkdir(self.path)
+
+        configs_dir = p.join(self.path, 'configs')
+        os.mkdir(configs_dir)
+
+        shutil.copy(p.join(self.base_configs_dir, 'config.xml'), configs_dir)
+        shutil.copy(p.join(self.base_configs_dir, 'users.xml'), configs_dir)
+
+        config_d_dir = p.join(configs_dir, 'config.d')
+        os.mkdir(config_d_dir)
+
+        shutil.copy(p.join(HELPERS_DIR, 'common_instance_config.xml'), config_d_dir)
+
+        with open(p.join(config_d_dir, 'macros.xml'), 'w') as macros_config:
+            macros_config.write(MACROS_CONFIG_TEMPLATE.format(name=self.name))
+
+        if self.with_zookeeper:
+            shutil.copy(p.join(HELPERS_DIR, 'zookeeper_config.xml'), config_d_dir)
+
+        for path in self.custom_config_paths:
+            shutil.copy(path, config_d_dir)
+
+        db_dir = p.join(self.path, 'database')
+        os.mkdir(db_dir)
+
+        logs_dir = p.join(self.path, 'logs')
+        os.mkdir(logs_dir)
+
+        depends_on = '[]'
+        if self.with_zookeeper:
+            depends_on = '["zoo1", "zoo2", "zoo3"]'
+
+        with open(self.docker_compose_path, 'w') as docker_compose:
+            docker_compose.write(DOCKER_COMPOSE_TEMPLATE.format(
+                name=self.name,
+                uid=os.getuid(),
+                binary_path=self.server_bin_path,
+                configs_dir=configs_dir,
+                config_d_dir=config_d_dir,
+                db_dir=db_dir,
+                logs_dir=logs_dir,
+                depends_on=depends_on))
+
+
+    def destroy_dir(self):
+        if p.exists(self.path):
+            shutil.rmtree(self.path)
--- a/dbms/tests/integration/helpers/common_instance_config.xml
+++ b/dbms/tests/integration/helpers/common_instance_config.xml
@ -0,0 +1,4 @@
+<yandex>
+    <timezone>Europe/Moscow</timezone>
+    <listen_host>::</listen_host>
+</yandex>
--- a/dbms/tests/integration/helpers/docker_compose_zookeeper.yml
+++ b/dbms/tests/integration/helpers/docker_compose_zookeeper.yml
@ -0,0 +1,25 @@
+version: '2'
+services:
+    zoo1:
+        image: zookeeper
+        restart: always
+        environment:
+            ZOO_TICK_TIME: 500
+            ZOO_MY_ID: 1
+            ZOO_SERVERS: server.1=zoo1:2888:3888 server.2=zoo2:2888:3888 server.3=zoo3:2888:3888
+
+    zoo2:
+        image: zookeeper
+        restart: always
+        environment:
+            ZOO_TICK_TIME: 500
+            ZOO_MY_ID: 2
+            ZOO_SERVERS: server.1=zoo1:2888:3888 server.2=zoo2:2888:3888 server.3=zoo3:2888:3888
+
+    zoo3:
+        image: zookeeper
+        restart: always
+        environment:
+            ZOO_TICK_TIME: 500
+            ZOO_MY_ID: 3
+            ZOO_SERVERS: server.1=zoo1:2888:3888 server.2=zoo2:2888:3888 server.3=zoo3:2888:3888
--- a/dbms/tests/integration/helpers/helper_container/Dockerfile
+++ b/dbms/tests/integration/helpers/helper_container/Dockerfile
@ -0,0 +1,4 @@
+# Helper docker container to run iptables without sudo
+
+FROM alpine
+RUN apk add -U iproute2
--- a/dbms/tests/integration/helpers/network.py
+++ b/dbms/tests/integration/helpers/network.py
@ -0,0 +1,159 @@
+import os.path as p
+import subprocess
+import time
+
+import docker
+
+from .cluster import HELPERS_DIR
+
+
+class PartitionManager:
+    """Allows introducing failures in the network between docker containers.
+
+    Can act as a context manager:
+
+    with pm as PartitionManager():
+        pm.partition_instances(instance1, instance2)
+        ...
+        # At exit all partitions are removed automatically.
+
+    """
+
+    def __init__(self):
+        self._iptables_rules = []
+
+    def isolate_instance_from_zk(self, instance, action='DROP'):
+        self._check_instance(instance)
+
+        self._add_rule({'source': instance.ip_address, 'destination_port': 2181, 'action': action})
+        self._add_rule({'destination': instance.ip_address, 'source_port': 2181, 'action': action})
+
+    def partition_instances(self, left, right, action='DROP'):
+        self._check_instance(left)
+        self._check_instance(right)
+
+        self._add_rule({'source': left.ip_address, 'destination': right.ip_address, 'action': action})
+        self._add_rule({'source': right.ip_address, 'destination': left.ip_address, 'action': action})
+
+    def heal_all(self):
+        while self._iptables_rules:
+            rule = self._iptables_rules.pop()
+            _NetworkManager.get().delete_iptables_rule(**rule)
+
+    @staticmethod
+    def _check_instance(instance):
+        if instance.ip_address is None:
+            raise Exception('Instance + ' + instance.name + ' is not launched!')
+
+    def _add_rule(self, rule):
+        _NetworkManager.get().add_iptables_rule(**rule)
+        self._iptables_rules.append(rule)
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.heal_all()
+
+
+class _NetworkManager:
+    """Execute commands inside a container with access to network settings.
+
+    We need to call iptables to create partitions, but we want to avoid sudo.
+    The way to circumvent this restriction is to run iptables in a container with network=host.
+    The container is long-running and periodically renewed - this is an optimization to avoid the overhead
+    of container creation on each call.
+    Source of the idea: https://github.com/worstcase/blockade/blob/master/blockade/host.py
+    """
+
+    # Singleton instance.
+    _instance = None
+
+    @classmethod
+    def get(cls, **kwargs):
+        if cls._instance is None:
+            cls._instance = cls(**kwargs)
+        return cls._instance
+
+    def add_iptables_rule(self, **kwargs):
+        cmd = ['iptables', '-A', 'DOCKER']
+        cmd.extend(self._iptables_cmd_suffix(**kwargs))
+        self._exec_run(cmd, privileged=True)
+
+    def delete_iptables_rule(self, **kwargs):
+        cmd = ['iptables', '-D', 'DOCKER']
+        cmd.extend(self._iptables_cmd_suffix(**kwargs))
+        self._exec_run(cmd, privileged=True)
+
+    @staticmethod
+    def _iptables_cmd_suffix(
+            source=None, destination=None,
+            source_port=None, destination_port=None,
+            action=None):
+        ret = []
+        if source is not None:
+            ret.extend(['-s', source])
+        if destination is not None:
+            ret.extend(['-d', destination])
+        if source_port is not None:
+            ret.extend(['-p', 'tcp', '--sport', str(source_port)])
+        if destination_port is not None:
+            ret.extend(['-p', 'tcp', '--dport', str(destination_port)])
+        if action is not None:
+            ret.extend(['-j', action])
+        return ret
+
+
+    def __init__(
+            self,
+            image_name='clickhouse_tests_helper',
+            image_path=p.join(HELPERS_DIR, 'helper_container'),
+            container_expire_timeout=50, container_exit_timeout=60):
+
+        self.container_expire_timeout = container_expire_timeout
+        self.container_exit_timeout = container_exit_timeout
+
+        self._docker_client = docker.from_env()
+
+        try:
+            self._image = self._docker_client.images.get(image_name)
+        except docker.errors.ImageNotFound:
+            self._image = self._docker_client.images.build(tag=image_name, path=image_path, rm=True)
+
+        self._container = None
+
+        self._ensure_container()
+
+    def _ensure_container(self):
+        if self._container is None or self._container_expire_time <= time.time():
+
+            if self._container is not None:
+                try:
+                    self._container.remove(force=True)
+                except docker.errors.NotFound:
+                    pass
+
+            # Work around https://github.com/docker/docker-py/issues/1477
+            host_config = self._docker_client.api.create_host_config(network_mode='host', auto_remove=True)
+            container_id = self._docker_client.api.create_container(
+                self._image.id, command=('sleep %s' % self.container_exit_timeout),
+                detach=True, host_config=host_config)['Id']
+
+            self._container_expire_time = time.time() + self.container_expire_timeout
+            self._docker_client.api.start(container_id)
+            self._container = self._docker_client.containers.get(container_id)
+
+        return self._container
+
+    def _exec_run(self, cmd, **kwargs):
+        container = self._ensure_container()
+
+        handle = self._docker_client.api.exec_create(container.id, cmd, **kwargs)
+        output = self._docker_client.api.exec_start(handle).decode('utf8')
+        exit_code = self._docker_client.api.exec_inspect(handle)['ExitCode']
+
+        if exit_code != 0:
+            print output
+            raise subprocess.CalledProcessError(exit_code, cmd)
+
+        return output
--- a/dbms/tests/integration/helpers/test_tools.py
+++ b/dbms/tests/integration/helpers/test_tools.py
@ -0,0 +1,13 @@
+import difflib
+
+class TSV:
+    """Helper to get pretty diffs between expected and actual tab-separated value files"""
+
+    def __init__(self, contents):
+        self.lines = contents.readlines() if isinstance(contents, file) else contents.splitlines(True)
+
+    def __eq__(self, other):
+        return self.lines == other.lines
+
+    def diff(self, other):
+        return list(line.rstrip() for line in difflib.context_diff(self.lines, other.lines))[2:]
--- a/dbms/tests/integration/helpers/zookeeper_config.xml
+++ b/dbms/tests/integration/helpers/zookeeper_config.xml
@ -0,0 +1,17 @@
+<yandex>
+    <zookeeper>
+	    <node index="1">
+		    <host>zoo1</host>
+		    <port>2181</port>
+	    </node>
+	    <node index="2">
+		    <host>zoo2</host>
+		    <port>2181</port>
+	    </node>
+	    <node index="3">
+		    <host>zoo3</host>
+		    <port>2181</port>
+	    </node>
+        <session_timeout_ms>1000</session_timeout_ms>
+	</zookeeper>
+</yandex>
--- a/dbms/tests/integration/pytest.ini
+++ b/dbms/tests/integration/pytest.ini
@ -0,0 +1,2 @@
+[pytest]
+python_files = test.py
--- a/dbms/tests/integration/test_delayed_replica_failover/init.py
+++ b/dbms/tests/integration/test_delayed_replica_failover/init.py
--- a/dbms/tests/integration/test_delayed_replica_failover/configs/remote_servers.xml
+++ b/dbms/tests/integration/test_delayed_replica_failover/configs/remote_servers.xml
@ -0,0 +1,17 @@
+<yandex>
+    <remote_servers>
+        <test_cluster>
+            <shard>
+                <internal_replication>true</internal_replication>
+                <replica>
+                    <host>replica1</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>replica2</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </test_cluster>
+    </remote_servers>
+</yandex>
--- a/dbms/tests/integration/test_delayed_replica_failover/test.py
+++ b/dbms/tests/integration/test_delayed_replica_failover/test.py
@ -0,0 +1,74 @@
+import pytest
+import time
+
+from helpers.cluster import ClickHouseCluster
+from helpers.network import PartitionManager
+
+
+cluster = ClickHouseCluster(__file__)
+
+instance_with_dist_table = cluster.add_instance('instance_with_dist_table', ['configs/remote_servers.xml'])
+replica1 = cluster.add_instance('replica1', [], with_zookeeper=True)
+replica2 = cluster.add_instance('replica2', [], with_zookeeper=True)
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+
+        for replica in (replica1, replica2):
+            replica.query(
+                "CREATE TABLE replicated (d Date, x UInt32) ENGINE = "
+                "ReplicatedMergeTree('/clickhouse/tables/replicated', '{instance}', d, d, 8192)")
+
+        instance_with_dist_table.query(
+            "CREATE TABLE distributed (d Date, x UInt32) ENGINE = "
+            "Distributed('test_cluster', 'default', 'replicated')")
+
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+
+def test(started_cluster):
+    with PartitionManager() as pm:
+        pm.partition_instances(replica1, replica2)
+
+        replica2.query("INSERT INTO replicated VALUES ('2017-05-08', 1)")
+
+        time.sleep(1) # accrue replica delay
+
+        assert replica1.query("SELECT count() FROM replicated").strip() == ''
+        assert replica2.query("SELECT count() FROM replicated").strip() == '1'
+
+        # With in_order balancing replica1 is chosen.
+        assert instance_with_dist_table.query(
+            "SELECT count() FROM distributed SETTINGS load_balancing='in_order'").strip() == ''
+
+        # When we set max_replica_delay, replica1 must be excluded.
+        assert instance_with_dist_table.query('''
+SELECT count() FROM distributed SETTINGS
+    load_balancing='in_order',
+    max_replica_delay_for_distributed_queries=1
+''').strip() == '1'
+
+        pm.isolate_instance_from_zk(replica2)
+
+        time.sleep(2) # allow pings to zookeeper to timeout
+
+        # At this point all replicas are stale, but the query must still go to replica2 which is the least stale one.
+        assert instance_with_dist_table.query('''
+SELECT count() FROM distributed SETTINGS
+    load_balancing='in_order',
+    max_replica_delay_for_distributed_queries=1
+''').strip() == '1'
+
+        # If we forbid stale replicas, the query must fail.
+        with pytest.raises(Exception):
+            instance_with_dist_table.query('''
+SELECT count() FROM distributed SETTINGS
+    load_balancing='in_order',
+    max_replica_delay_for_distributed_queries=1,
+    fallback_to_stale_replicas_for_distributed_queries=0
+''')
--- a/dbms/tests/integration/test_graphite_merge_tree/init.py
+++ b/dbms/tests/integration/test_graphite_merge_tree/init.py
--- a/dbms/tests/integration/test_graphite_merge_tree/configs/graphite_rollup.xml
+++ b/dbms/tests/integration/test_graphite_merge_tree/configs/graphite_rollup.xml
@ -0,0 +1,25 @@
+<yandex>
+    <!-- retention scheme for GraphiteMergeTree engine-->
+    <graphite_rollup>
+        <path_column_name>metric</path_column_name>
+        <time_column_name>timestamp</time_column_name>
+        <value_column_name>value</value_column_name>
+        <version_column_name>updated</version_column_name>
+        <pattern>
+            <regexp>^one_min</regexp>
+            <function>avg</function>
+            <retention>
+                <age>0</age>
+                <precision>60</precision>
+            </retention>
+            <retention>
+                <age>7776000</age>
+                <precision>300</precision>
+            </retention>
+            <retention>
+                <age>31536000</age>
+                <precision>600</precision>
+            </retention>
+        </pattern>
+    </graphite_rollup>
+</yandex>
--- a/dbms/tests/integration/test_graphite_merge_tree/test.py
+++ b/dbms/tests/integration/test_graphite_merge_tree/test.py
@ -0,0 +1,216 @@
+import os.path as p
+import time
+import datetime
+import pytest
+
+from helpers.cluster import ClickHouseCluster
+from helpers.test_tools import TSV
+
+
+cluster = ClickHouseCluster(__file__)
+instance = cluster.add_instance('instance', ['configs/graphite_rollup.xml'])
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+        instance.query('CREATE DATABASE test')
+
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+@pytest.fixture
+def graphite_table(started_cluster):
+    instance.query('''
+DROP TABLE IF EXISTS test.graphite;
+CREATE TABLE test.graphite
+    (metric String, value Float64, timestamp UInt32, date Date, updated UInt32)
+    ENGINE = GraphiteMergeTree(date, (metric, timestamp), 8192, 'graphite_rollup');
+''')
+
+    yield
+
+    instance.query('DROP TABLE test.graphite')
+
+
+def test_rollup_versions(graphite_table):
+    timestamp = int(time.time())
+    rounded_timestamp = timestamp - timestamp % 60
+    date = datetime.date.today().isoformat()
+
+    q = instance.query
+
+    # Insert rows with timestamps relative to the current time so that the first retention clause is active.
+    # Two parts are created.
+    q('''
+INSERT INTO test.graphite (metric, value, timestamp, date, updated) VALUES ('one_min.x1', 100, {timestamp}, '{date}', 1);
+INSERT INTO test.graphite (metric, value, timestamp, date, updated) VALUES ('one_min.x1', 200, {timestamp}, '{date}', 2);
+'''.format(timestamp=timestamp, date=date))
+
+    expected1 = '''\
+one_min.x1	100	{timestamp}	{date}	1
+one_min.x1	200	{timestamp}	{date}	2
+'''.format(timestamp=timestamp, date=date)
+
+    assert TSV(q('SELECT * FROM test.graphite ORDER BY updated')) == TSV(expected1)
+
+    q('OPTIMIZE TABLE test.graphite')
+
+    # After rollup only the row with max version is retained.
+    expected2 = '''\
+one_min.x1	200	{timestamp}	{date}	2
+'''.format(timestamp=rounded_timestamp, date=date)
+
+    assert TSV(q('SELECT * FROM test.graphite')) == TSV(expected2)
+
+
+def test_rollup_aggregation(graphite_table):
+    q = instance.query
+
+    # This query essentially emulates what rollup does.
+    result1 = q('''
+SELECT avg(v), max(upd)
+FROM (SELECT timestamp,
+            argMax(value, (updated, number)) AS v,
+            max(updated) AS upd
+      FROM (SELECT 'one_min.x5' AS metric,
+                   toFloat64(number) AS value,
+                   toUInt32(1111111111 + intDiv(number, 3)) AS timestamp,
+                   toDate('2017-02-02') AS date,
+                   toUInt32(intDiv(number, 2)) AS updated,
+                   number
+            FROM system.numbers LIMIT 1000000)
+      WHERE intDiv(timestamp, 600) * 600 = 1111444200
+      GROUP BY timestamp)
+''')
+
+    expected1 = '''\
+999634.9918367347	499999
+'''
+    assert TSV(result1) == TSV(expected1)
+
+    # Timestamp 1111111111 is in sufficiently distant past so that the last retention clause is active.
+    result2 = q('''
+INSERT INTO test.graphite
+    SELECT 'one_min.x' AS metric,
+           toFloat64(number) AS value,
+           toUInt32(1111111111 + intDiv(number, 3)) AS timestamp,
+           toDate('2017-02-02') AS date, toUInt32(intDiv(number, 2)) AS updated
+    FROM (SELECT * FROM system.numbers LIMIT 1000000)
+    WHERE intDiv(timestamp, 600) * 600 = 1111444200;
+
+OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL;
+
+SELECT * FROM test.graphite;
+''')
+
+    expected2 = '''\
+one_min.x	999634.9918367347	1111444200	2017-02-02	499999
+'''
+
+    assert TSV(result2) == TSV(expected2)
+
+
+def test_rollup_aggregation_2(graphite_table):
+    result = instance.query('''
+INSERT INTO test.graphite
+    SELECT 'one_min.x' AS metric,
+           toFloat64(number) AS value,
+           toUInt32(1111111111 - intDiv(number, 3)) AS timestamp,
+           toDate('2017-02-02') AS date,
+           toUInt32(100 - number) AS updated
+    FROM (SELECT * FROM system.numbers LIMIT 50);
+
+OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL;
+
+SELECT * FROM test.graphite;
+''')
+
+    expected = '''\
+one_min.x	24	1111110600	2017-02-02	100
+'''
+
+    assert TSV(result) == TSV(expected)
+
+
+def test_multiple_paths_and_versions(graphite_table):
+    result = instance.query('''
+INSERT INTO test.graphite
+    SELECT 'one_min.x' AS metric,
+           toFloat64(number) AS value,
+           toUInt32(1111111111 + intDiv(number, 3) * 600) AS timestamp,
+           toDate('2017-02-02') AS date,
+           toUInt32(100 - number) AS updated
+    FROM (SELECT * FROM system.numbers LIMIT 50);
+
+OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL;
+
+SELECT * FROM test.graphite;
+
+
+INSERT INTO test.graphite
+    SELECT 'one_min.y' AS metric,
+           toFloat64(number) AS value,
+           toUInt32(1111111111 + number * 600) AS timestamp,
+           toDate('2017-02-02') AS date,
+           toUInt32(100 - number) AS updated
+    FROM (SELECT * FROM system.numbers LIMIT 50);
+
+OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL;
+
+SELECT * FROM test.graphite;
+''')
+
+    with open(p.join(p.dirname(__file__), 'test_multiple_paths_and_versions.reference')) as reference:
+        assert TSV(result) == TSV(reference)
+
+
+def test_multiple_output_blocks(graphite_table):
+    MERGED_BLOCK_SIZE = 8192
+
+    to_insert = ''
+    expected = ''
+    for i in range(2 * MERGED_BLOCK_SIZE + 1):
+        rolled_up_time = 1000000200 + 600 * i
+
+        for j in range(3):
+            cur_time = rolled_up_time + 100 * j
+            to_insert += 'one_min.x1	{}	{}	2001-09-09	1\n'.format(10 * j, cur_time)
+            to_insert += 'one_min.x1	{}	{}	2001-09-09	2\n'.format(10 * (j + 1), cur_time)
+
+        expected += 'one_min.x1	20	{}	2001-09-09	2\n'.format(rolled_up_time)
+
+    instance.query('INSERT INTO test.graphite FORMAT TSV', to_insert)
+
+    result = instance.query('''
+OPTIMIZE TABLE test.graphite PARTITION 200109 FINAL;
+
+SELECT * FROM test.graphite;
+''')
+
+    assert TSV(result) == TSV(expected)
+
+
+def test_paths_not_matching_any_pattern(graphite_table):
+    to_insert = '''\
+one_min.x1	100	1000000000	2001-09-09	1
+zzzzzzzz	100	1000000001	2001-09-09	1
+zzzzzzzz	200	1000000001	2001-09-09	2
+'''
+
+    instance.query('INSERT INTO test.graphite FORMAT TSV', to_insert)
+
+    expected = '''\
+one_min.x1	100	999999600	2001-09-09	1
+zzzzzzzz	200	1000000001	2001-09-09	2
+'''
+
+    result = instance.query('''
+OPTIMIZE TABLE test.graphite PARTITION 200109 FINAL;
+
+SELECT * FROM test.graphite;
+''')
+
+    assert TSV(result) == TSV(expected)
--- a/dbms/tests/integration/test_graphite_merge_tree/test_multiple_paths_and_versions.reference
+++ b/dbms/tests/integration/test_graphite_merge_tree/test_multiple_paths_and_versions.reference
--- a/dbms/tests/integration_drafts/graphite_merge_tree/config.d/graphite_rollup.xml
+++ b/dbms/tests/integration_drafts/graphite_merge_tree/config.d/graphite_rollup.xml
@ -1,128 +0,0 @@
-<yandex>
-    <!-- retention scheme for GraphiteMergeTree engine-->
-    <graphite_rollup>
-    <path_column_name>metric</path_column_name>
-    <time_column_name>timestamp</time_column_name>
-    <value_column_name>value</value_column_name>
-    <version_column_name>updated</version_column_name>
-    <pattern>
-        <regexp>^one_sec</regexp>
-        <function>avg</function>
-        <retention>
-        <age>0</age>
-        <precision>1</precision>
-        </retention>
-        <retention>
-        <age>86400</age>
-        <precision>5</precision>
-        </retention>
-        <retention>
-        <age>604800</age>
-        <precision>60</precision>
-        </retention>
-        <retention>
-        <age>7776000</age>
-        <precision>300</precision>
-        </retention>
-        <retention>
-        <age>31536000</age>
-        <precision>600</precision>
-        </retention>
-    </pattern>
-    <pattern>
-        <regexp>^five_sec</regexp>
-        <function>avg</function>
-        <retention>
-        <age>0</age>
-        <precision>5</precision>
-        </retention>
-        <retention>
-        <age>604800</age>
-        <precision>60</precision>
-        </retention>
-        <retention>
-        <age>7776000</age>
-        <precision>300</precision>
-        </retention>
-        <retention>
-        <age>31536000</age>
-        <precision>600</precision>
-        </retention>
-    </pattern>
-    <pattern>
-        <regexp>^one_min</regexp>
-        <function>avg</function>
-        <retention>
-        <age>0</age>
-        <precision>60</precision>
-        </retention>
-        <retention>
-        <age>7776000</age>
-        <precision>300</precision>
-        </retention>
-        <retention>
-        <age>31536000</age>
-        <precision>600</precision>
-        </retention>
-    </pattern>
-    <pattern>
-        <regexp>^five_min</regexp>
-        <function>avg</function>
-        <retention>
-        <age>0</age>
-        <precision>300</precision>
-        </retention>
-        <retention>
-        <age>31536000</age>
-        <precision>600</precision>
-        </retention>
-    </pattern>
-    <pattern>
-        <regexp>^ten_min</regexp>
-        <function>avg</function>
-        <retention>
-        <age>0</age>
-        <precision>600</precision>
-        </retention>
-    </pattern>
-    <pattern>
-        <regexp>^half_hour</regexp>
-        <function>avg</function>
-        <retention>
-        <age>0</age>
-        <precision>1800</precision>
-        </retention>
-    </pattern>
-    <pattern>
-        <regexp>^one_hour</regexp>
-        <function>avg</function>
-        <retention>
-        <age>0</age>
-        <precision>3600</precision>
-        </retention>
-    </pattern>
-    <pattern>
-        <regexp>^one_day</regexp>
-        <function>avg</function>
-        <retention>
-        <age>0</age>
-        <precision>86400</precision>
-        </retention>
-    </pattern>
-    <default>
-        <function>avg</function>
-        <retention>
-        <age>0</age>
-        <precision>60</precision>
-        </retention>
-        <retention>
-        <age>2592000</age>
-        <precision>300</precision>
-        </retention>
-        <retention>
-        <age>31536000</age>
-        <precision>600</precision>
-        </retention>
-    </default>
-    </graphite_rollup>
-</yandex>
--- a/dbms/tests/integration_drafts/graphite_merge_tree/test1.reference
+++ b/dbms/tests/integration_drafts/graphite_merge_tree/test1.reference
@ -1,3 +0,0 @@
-one_min.x1	100	1486048740	2017-02-02	1
-one_min.x1	200	1486048740	2017-02-02	2
-one_min.x1	200	1486048740	2017-02-02	2
--- a/dbms/tests/integration_drafts/graphite_merge_tree/test1.sql
+++ b/dbms/tests/integration_drafts/graphite_merge_tree/test1.sql
@ -1,13 +0,0 @@
-DROP TABLE IF EXISTS test.graphite;
-CREATE TABLE test.graphite (metric String, value Float64, timestamp UInt32, date Date, updated UInt32) ENGINE = GraphiteMergeTree(date, (metric, timestamp), 8192, 'graphite_rollup');
-
-INSERT into test.graphite (metric, value, timestamp, date, updated) VALUES ('one_min.x1', 100, toUInt32(toDateTime('2017-02-02 18:19:00')), toDate('2017-02-02'), 1);
-INSERT into test.graphite (metric, value, timestamp, date, updated) VALUES ('one_min.x1', 200, toUInt32(toDateTime('2017-02-02 18:19:00')), toDate('2017-02-02'), 2);
-
-SELECT * FROM test.graphite ORDER BY updated;
-
-OPTIMIZE TABLE test.graphite;
-
-SELECT * FROM test.graphite ORDER BY updated;
-
-DROP TABLE test.graphite;
--- a/dbms/tests/integration_drafts/graphite_merge_tree/test2.reference
+++ b/dbms/tests/integration_drafts/graphite_merge_tree/test2.reference
@ -1,2 +0,0 @@
-one_min.x	999636.4856809663	1111444200	2017-02-02	499999
-999634.9918367347	499999
--- a/dbms/tests/integration_drafts/graphite_merge_tree/test2.sql
+++ b/dbms/tests/integration_drafts/graphite_merge_tree/test2.sql
@ -1,10 +0,0 @@
-DROP TABLE IF EXISTS test.graphite;
-CREATE TABLE test.graphite (metric String, value Float64, timestamp UInt32, date Date, updated UInt32) ENGINE = GraphiteMergeTree(date, (metric, timestamp), 8192, 'graphite_rollup');
-
-INSERT INTO test.graphite SELECT 'one_min.x' AS metric, toFloat64(number) AS value, toUInt32(1111111111 + intDiv(number, 3)) AS timestamp, toDate('2017-02-02') AS date, toUInt32(intDiv(number, 2)) AS updated FROM (SELECT * FROM system.numbers LIMIT 1000000) WHERE intDiv(timestamp, 600) * 600 = 1111444200;
-OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL;
-SELECT * FROM test.graphite;
-
-SELECT avg(v), max(upd) FROM (SELECT timestamp, argMax(value, (updated, number)) AS v, max(updated) AS upd FROM (SELECT 'one_min.x5' AS metric, toFloat64(number) AS value, toUInt32(1111111111 + intDiv(number, 3)) AS timestamp, toDate('2017-02-02') AS date, toUInt32(intDiv(number, 2)) AS updated, number FROM system.numbers LIMIT 1000000) WHERE intDiv(timestamp, 600) * 600 = 1111444200 GROUP BY timestamp);
-
-DROP TABLE test.graphite;
--- a/dbms/tests/integration_drafts/graphite_merge_tree/test3.reference
+++ b/dbms/tests/integration_drafts/graphite_merge_tree/test3.reference
@ -1 +0,0 @@
-one_min.x	24	1111110600	2017-02-02	100
--- a/dbms/tests/integration_drafts/graphite_merge_tree/test3.sql
+++ b/dbms/tests/integration_drafts/graphite_merge_tree/test3.sql
@ -1,8 +0,0 @@
-DROP TABLE IF EXISTS test.graphite;
-CREATE TABLE test.graphite (metric String, value Float64, timestamp UInt32, date Date, updated UInt32) ENGINE = GraphiteMergeTree(date, (metric, timestamp), 8192, 'graphite_rollup');
-
-INSERT INTO test.graphite SELECT 'one_min.x' AS metric, toFloat64(number) AS value, toUInt32(1111111111 - intDiv(number, 3)) AS timestamp, toDate('2017-02-02') AS date, toUInt32(100 - number) AS updated FROM (SELECT * FROM system.numbers LIMIT 50);
-OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL;
-SELECT * FROM test.graphite;
-
-DROP TABLE test.graphite;
--- a/dbms/tests/integration_drafts/graphite_merge_tree/test4.sql
+++ b/dbms/tests/integration_drafts/graphite_merge_tree/test4.sql
@ -1,12 +0,0 @@
-DROP TABLE IF EXISTS test.graphite;
-CREATE TABLE test.graphite (metric String, value Float64, timestamp UInt32, date Date, updated UInt32) ENGINE = GraphiteMergeTree(date, (metric, timestamp), 8192, 'graphite_rollup');
-
-INSERT INTO test.graphite SELECT 'one_min.x' AS metric, toFloat64(number) AS value, toUInt32(1111111111 + intDiv(number, 3) * 600) AS timestamp, toDate('2017-02-02') AS date, toUInt32(100 - number) AS updated FROM (SELECT * FROM system.numbers LIMIT 50);
-OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL;
-SELECT * FROM test.graphite;
-
-INSERT INTO test.graphite SELECT 'one_min.y' AS metric, toFloat64(number) AS value, toUInt32(1111111111 + number * 600) AS timestamp, toDate('2017-02-02') AS date, toUInt32(100 - number) AS updated FROM (SELECT * FROM system.numbers LIMIT 50);
-OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL;
-SELECT * FROM test.graphite;
-
-DROP TABLE test.graphite;
--- a/dbms/tests/queries/0_stateless/00418_input_format_allow_errors.reference
+++ b/dbms/tests/queries/0_stateless/00418_input_format_allow_errors.reference
@ -12,3 +12,5 @@
 3	Goodbye
 1	Hello
 3	Goodbye
+1	TSKV
+4	TSKV Ok
--- a/dbms/tests/queries/0_stateless/00418_input_format_allow_errors.sh
+++ b/dbms/tests/queries/0_stateless/00418_input_format_allow_errors.sh
@ -17,6 +17,8 @@ echo -ne '1\tHello\n2\n3\tGoodbye\n\n' | clickhouse-client --input_format_allow_

 echo -ne '1\tHello\n2\n3\tGoodbye\n\n' | clickhouse-client --input_format_allow_errors_num=1 --input_format_allow_errors_ratio=0.6 --query="INSERT INTO test.formats_test FORMAT TSV"

+echo -ne 'x=1\ts=TSKV\nx=minus2\ts=trash1\ns=trash2\tx=-3\ns=TSKV Ok\tx=4\ns=trash3\tx=-5\n' | clickhouse-client --input_format_allow_errors_num=3 -q "INSERT INTO test.formats_test FORMAT TSKV"
+
 clickhouse-client --query="SELECT * FROM test.formats_test"

 clickhouse-client --query="DROP TABLE test.formats_test"
--- a/dbms/tests/queries/0_stateless/00460_vertical_and_totals_extremes.reference
+++ b/dbms/tests/queries/0_stateless/00460_vertical_and_totals_extremes.reference
@ -0,0 +1,298 @@
+Row 1:
+──────
+k:       0
+count(): 20
+
+Row 2:
+──────
+k:       1
+count(): 20
+
+Row 3:
+──────
+k:       2
+count(): 20
+
+Row 4:
+──────
+k:       3
+count(): 20
+
+Row 5:
+──────
+k:       4
+count(): 20
+
+
+Totals:
+───────
+k:       0
+count(): 100
+Row 1:
+──────
+k:       0
+count(): 20
+
+Row 2:
+──────
+k:       1
+count(): 20
+
+Row 3:
+──────
+k:       2
+count(): 20
+
+Row 4:
+──────
+k:       3
+count(): 20
+
+Row 5:
+──────
+k:       4
+count(): 20
+
+
+Totals:
+───────
+k:       0
+count(): 100
+
+Min:
+────
+k:       0
+count(): 20
+
+Max:
+────
+k:       4
+count(): 20
+Row 1:
+──────
+k:       0
+count(): 20
+
+Row 2:
+──────
+k:       1
+count(): 20
+
+Row 3:
+──────
+k:       2
+count(): 20
+
+Row 4:
+──────
+k:       3
+count(): 20
+
+Row 5:
+──────
+k:       4
+count(): 20
+
+
+Totals:
+───────
+k:       0
+count(): 100
+
+Min:
+────
+k:       0
+count(): 20
+
+Max:
+────
+k:       4
+count(): 20
+Row 1:
+──────
+k:       0
+count(): 20
+
+Row 2:
+──────
+k:       1
+count(): 20
+
+Row 3:
+──────
+k:       2
+count(): 20
+
+Row 4:
+──────
+k:       3
+count(): 20
+
+  Showed first 4.
+
+
+Totals:
+───────
+k:       0
+count(): 100
+
+Min:
+────
+k:       0
+count(): 20
+
+Max:
+────
+k:       4
+count(): 20
+Row 1:
+──────
+k:       0
+count(): 20
+
+Row 2:
+──────
+k:       1
+count(): 20
+
+Row 3:
+──────
+k:       2
+count(): 20
+
+Row 4:
+──────
+k:       3
+count(): 20
+
+  Showed first 4.
+
+
+Totals:
+───────
+k:       0
+count(): 100
+
+Min:
+────
+k:       0
+count(): 20
+
+Max:
+────
+k:       4
+count(): 20
+Row 1:
+──────
+k:       0
+count(): 20
+
+Row 2:
+──────
+k:       1
+count(): 20
+
+Row 3:
+──────
+k:       2
+count(): 20
+
+Row 4:
+──────
+k:       3
+count(): 20
+
+  Showed first 4.
+
+
+Totals:
+───────
+k:       0
+count(): 100
+
+Min:
+────
+k:       0
+count(): 20
+
+Max:
+────
+k:       4
+count(): 20
+Row 1:
+──────
+k:       0
+count(): 20
+
+Row 2:
+──────
+k:       1
+count(): 20
+
+Row 3:
+──────
+k:       2
+count(): 20
+
+Row 4:
+──────
+k:       3
+count(): 20
+
+Row 5:
+──────
+k:       4
+count(): 20
+
+
+Totals:
+───────
+k:       0
+count(): 100
+
+Min:
+────
+k:       0
+count(): 20
+
+Max:
+────
+k:       4
+count(): 20
+Row 1:
+──────
+k:       0
+count(): 20
+
+Row 2:
+──────
+k:       1
+count(): 20
+
+Row 3:
+──────
+k:       2
+count(): 20
+
+Row 4:
+──────
+k:       3
+count(): 20
+
+  Showed first 4.
+
+
+Totals:
+───────
+k:       0
+count(): 100
+
+Min:
+────
+k:       0
+count(): 20
+
+Max:
+────
+k:       4
+count(): 20
--- a/dbms/tests/queries/0_stateless/00460_vertical_and_totals_extremes.sql
+++ b/dbms/tests/queries/0_stateless/00460_vertical_and_totals_extremes.sql
@ -0,0 +1,22 @@
+SELECT k, count() FROM (SELECT number % 5 AS k FROM system.numbers LIMIT 100) GROUP BY k WITH TOTALS ORDER BY k FORMAT Vertical;
+
+SET extremes = 1;
+SELECT k, count() FROM (SELECT number % 5 AS k FROM system.numbers LIMIT 100) GROUP BY k WITH TOTALS ORDER BY k FORMAT Vertical;
+
+SET output_format_pretty_max_rows = 5;
+SELECT k, count() FROM (SELECT number % 5 AS k FROM system.numbers LIMIT 100) GROUP BY k WITH TOTALS ORDER BY k FORMAT Vertical;
+
+SET output_format_pretty_max_rows = 4;
+SELECT k, count() FROM (SELECT number % 5 AS k FROM system.numbers LIMIT 100) GROUP BY k WITH TOTALS ORDER BY k FORMAT Vertical;
+
+
+SELECT k, count() FROM (SELECT number % 5 AS k FROM system.numbers LIMIT 100) GROUP BY k WITH TOTALS ORDER BY k FORMAT VerticalRaw;
+
+SET extremes = 1;
+SELECT k, count() FROM (SELECT number % 5 AS k FROM system.numbers LIMIT 100) GROUP BY k WITH TOTALS ORDER BY k FORMAT VerticalRaw;
+
+SET output_format_pretty_max_rows = 5;
+SELECT k, count() FROM (SELECT number % 5 AS k FROM system.numbers LIMIT 100) GROUP BY k WITH TOTALS ORDER BY k FORMAT VerticalRaw;
+
+SET output_format_pretty_max_rows = 4;
+SELECT k, count() FROM (SELECT number % 5 AS k FROM system.numbers LIMIT 100) GROUP BY k WITH TOTALS ORDER BY k FORMAT VerticalRaw;
--- a/dbms/tests/queries/0_stateless/00461_default_value_of_argument_type.reference
+++ b/dbms/tests/queries/0_stateless/00461_default_value_of_argument_type.reference
@ -0,0 +1 @@
+[]	[]	(0,'','0000-00-00 00:00:00','0000-00-00')
--- a/dbms/tests/queries/0_stateless/00461_default_value_of_argument_type.sql
+++ b/dbms/tests/queries/0_stateless/00461_default_value_of_argument_type.sql
@ -0,0 +1 @@
+SELECT defaultValueOfArgumentType([1, 2, 3]), defaultValueOfArgumentType([[[1]]]), defaultValueOfArgumentType((1, 'Hello', now(), today()));
--- a/dbms/tests/queries/0_stateless/00462_json_true_false_literals.reference
+++ b/dbms/tests/queries/0_stateless/00462_json_true_false_literals.reference
@ -0,0 +1,4 @@
+0	0
+1	1
+0	false
+1	true
--- a/dbms/tests/queries/0_stateless/00462_json_true_false_literals.sql
+++ b/dbms/tests/queries/0_stateless/00462_json_true_false_literals.sql
@ -0,0 +1,6 @@
+DROP TABLE IF EXISTS test.json;
+CREATE TABLE test.json (x UInt8, title String) ENGINE = Memory;
+INSERT INTO test.json FORMAT JSONEachRow {"x": true, "title": "true"}, {"x": false, "title": "false"}, {"x": 0, "title": "0"}, {"x": 1, "title": "1"}
+
+SELECT * FROM test.json ORDER BY title;
+DROP TABLE IF EXISTS test.json;
--- a/docs/ru/settings/index.rst
+++ b/docs/ru/settings/index.rst
@ -2,6 +2,7 @@
 ==========

 Описанные в разделе настройки могут быть заданы следующими способами:
+
 * Глобально.
  
  В конфигурационных файлах сервера.
--- a/docs/ru/settings/settings.rst
+++ b/docs/ru/settings/settings.rst
@ -42,6 +42,35 @@ fallback_to_stale_replicas_for_distributed_queries

 По умолчанию - 1 (включена).

+
+input_format_allow_errors_num
+-----------------------------
+Устанавливает максимальное количество допустимых ошибок при чтении из текстовых форматов (CSV, TSV и т.п.).
+
+Значение по умолчанию - 0.
+
+Используйте обязательно в паре с ``input_format_allow_errors_ratio``. Для пропуска ошибок, значения обеих настроек должны быть больше 0.
+
+Если при чтении строки возникла ошибка, но при этом счетчик ошибок меньше ``input_format_allow_errors_num``, то ClickHouse игнорирует строку и переходит к следующей.
+
+В случае превышения ``input_format_allow_errors_num`` ClickHouse генерирует исключение.
+
+
+input_format_allow_errors_ratio
+-------------------------------
+Устанавливает максимальную долю допустимых ошибок при чтении из текстовых форматов (CSV, TSV и т.п.).
+Доля ошибок задаётся в виде числа с плавающей запятой от 0 до 1.
+
+Значение по умолчанию - 0.
+
+Используйте обязательно в паре с ``input_format_allow_errors_num``. Для пропуска ошибок, значения обеих настроек должны быть больше 0.
+
+Если при чтении строки возникла ошибка, но при этом текущая доля ошибок меньше ``input_format_allow_errors_ratio``, то ClickHouse игнорирует строку и переходит к следующей.
+
+В случае превышения ``input_format_allow_errors_ratio`` ClickHouse генерирует исключение.
+
+
+
 max_block_size
 --------------
 Данные в ClickHouse обрабатываются по блокам (наборам кусочков столбцов). Внутренние циклы обработки одного блока достаточно эффективны, но при этом существуют заметные издержки на каждый блок. ``max_block_size`` - это рекомендация, какого размера блоки (в количестве строк) загружать из таблицы. Размер блока должен быть не слишком маленьким, чтобы издержки на каждый блок оставались незаметными, и не слишком большим, чтобы запрос с LIMIT-ом, который завершается уже после первого блока, выполнялся быстро; чтобы не использовалось слишком много оперативки при вынимании большого количества столбцов в несколько потоков; чтобы оставалась хоть какая-нибудь кэш-локальность.
				`@ -0,0 +1 @@`
				`[] [] (0,'','0000-00-00 00:00:00','0000-00-00')`
				`@ -0,0 +1 @@`
				`SELECT defaultValueOfArgumentType([1, 2, 3]), defaultValueOfArgumentType([[[1]]]), defaultValueOfArgumentType((1, 'Hello', now(), today()));`