From f8f67a788e4c8dc41b59d6f22631172fb4a431df Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Thu, 25 Jun 2020 19:55:45 +0300
Subject: [PATCH 001/298] allow to turn on fsync on inserts and merges

---
 src/Disks/DiskLocal.cpp                       | 17 +++++++-
 src/Disks/DiskLocal.h                         |  2 +
 src/Disks/DiskMemory.cpp                      |  5 +++
 src/Disks/DiskMemory.h                        |  2 +
 src/Disks/IDisk.h                             |  3 ++
 src/Disks/S3/DiskS3.cpp                       |  5 +++
 src/Disks/S3/DiskS3.h                         |  2 +
 .../MergeTree/IMergeTreeDataPartWriter.cpp    | 11 +++--
 .../MergeTree/IMergeTreeDataPartWriter.h      |  6 +--
 .../MergeTree/MergeTreeDataMergerMutator.cpp  | 41 +++++++++++++------
 .../MergeTree/MergeTreeDataMergerMutator.h    |  6 ++-
 .../MergeTreeDataPartWriterCompact.cpp        |  4 +-
 .../MergeTreeDataPartWriterCompact.h          |  2 +-
 .../MergeTree/MergeTreeDataPartWriterWide.cpp |  4 +-
 .../MergeTree/MergeTreeDataPartWriterWide.h   |  2 +-
 .../MergeTree/MergeTreeDataWriter.cpp         |  7 +++-
 src/Storages/MergeTree/MergeTreeSettings.h    |  3 ++
 .../MergeTree/MergedBlockOutputStream.cpp     |  7 ++--
 .../MergeTree/MergedBlockOutputStream.h       |  1 +
 .../MergedColumnOnlyOutputStream.cpp          |  9 ++--
 .../MergeTree/MergedColumnOnlyOutputStream.h  |  2 +-
 21 files changed, 108 insertions(+), 33 deletions(-)

diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp
index 68f5ee99a7a..c67bac7ffe2 100644
--- a/src/Disks/DiskLocal.cpp
+++ b/src/Disks/DiskLocal.cpp
@@ -8,7 +8,7 @@
 
 #include <IO/createReadBufferFromFileBase.h>
 #include <IO/createWriteBufferFromFileBase.h>
-
+#include <unistd.h>
 
 namespace DB
 {
@@ -19,6 +19,9 @@ namespace ErrorCodes
     extern const int EXCESSIVE_ELEMENT_IN_CONFIG;
     extern const int PATH_ACCESS_DENIED;
     extern const int INCORRECT_DISK_INDEX;
+    extern const int FILE_DOESNT_EXIST;
+    extern const int CANNOT_OPEN_FILE;
+    extern const int CANNOT_FSYNC;
 }
 
 std::mutex DiskLocal::reservation_mutex;
@@ -188,6 +191,18 @@ void DiskLocal::moveDirectory(const String & from_path, const String & to_path)
     Poco::File(disk_path + from_path).renameTo(disk_path + to_path);
 }
 
+void DiskLocal::sync(const String & path) const
+{
+    String full_path = disk_path + path;
+    int fd = ::open(full_path.c_str(), O_RDONLY);
+    if (-1 == fd)
+        throwFromErrnoWithPath("Cannot open file " + full_path, full_path,
+                               errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE);
+
+    if (-1 == fsync(fd))
+        throwFromErrnoWithPath("Cannot fsync " + full_path, full_path, ErrorCodes::CANNOT_FSYNC);
+}
+
 DiskDirectoryIteratorPtr DiskLocal::iterateDirectory(const String & path)
 {
     return std::make_unique<DiskLocalDirectoryIterator>(disk_path, path);
diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h
index 61a3994b655..743ba2ceb10 100644
--- a/src/Disks/DiskLocal.h
+++ b/src/Disks/DiskLocal.h
@@ -59,6 +59,8 @@ public:
 
     void moveDirectory(const String & from_path, const String & to_path) override;
 
+    void sync(const String & path) const override;
+
     DiskDirectoryIteratorPtr iterateDirectory(const String & path) override;
 
     void createFile(const String & path) override;
diff --git a/src/Disks/DiskMemory.cpp b/src/Disks/DiskMemory.cpp
index 3e43d159ba5..5b3350e40f7 100644
--- a/src/Disks/DiskMemory.cpp
+++ b/src/Disks/DiskMemory.cpp
@@ -261,6 +261,11 @@ void DiskMemory::moveDirectory(const String & /*from_path*/, const String & /*to
     throw Exception("Method moveDirectory is not implemented for memory disks", ErrorCodes::NOT_IMPLEMENTED);
 }
 
+void DiskMemory::sync(const String & /*path*/) const
+{
+    throw Exception("Method sync is not implemented for memory disks", ErrorCodes::NOT_IMPLEMENTED);
+}
+
 DiskDirectoryIteratorPtr DiskMemory::iterateDirectory(const String & path)
 {
     std::lock_guard lock(mutex);
diff --git a/src/Disks/DiskMemory.h b/src/Disks/DiskMemory.h
index b0c1d30c61d..8a3ddf05aa7 100644
--- a/src/Disks/DiskMemory.h
+++ b/src/Disks/DiskMemory.h
@@ -52,6 +52,8 @@ public:
 
     void moveDirectory(const String & from_path, const String & to_path) override;
 
+    void sync(const String & path) const override;
+
     DiskDirectoryIteratorPtr iterateDirectory(const String & path) override;
 
     void createFile(const String & path) override;
diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h
index 011c75402f4..8de77a560d1 100644
--- a/src/Disks/IDisk.h
+++ b/src/Disks/IDisk.h
@@ -105,6 +105,9 @@ public:
     /// Move directory from `from_path` to `to_path`.
     virtual void moveDirectory(const String & from_path, const String & to_path) = 0;
 
+    /// Do fsync on directory.
+    virtual void sync(const String & path) const = 0;
+
     /// Return iterator to the contents of the specified directory.
     virtual DiskDirectoryIteratorPtr iterateDirectory(const String & path) = 0;
 
diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp
index 71b5991f770..292f6567df4 100644
--- a/src/Disks/S3/DiskS3.cpp
+++ b/src/Disks/S3/DiskS3.cpp
@@ -466,6 +466,11 @@ void DiskS3::clearDirectory(const String & path)
             remove(it->path());
 }
 
+void DiskS3::sync(const String & /*path*/) const
+{
+    throw Exception("Method sync is not implemented for S3 disks", ErrorCodes::NOT_IMPLEMENTED);
+}
+
 void DiskS3::moveFile(const String & from_path, const String & to_path)
 {
     if (exists(to_path))
diff --git a/src/Disks/S3/DiskS3.h b/src/Disks/S3/DiskS3.h
index 5fa8e8358a6..09132367ae8 100644
--- a/src/Disks/S3/DiskS3.h
+++ b/src/Disks/S3/DiskS3.h
@@ -58,6 +58,8 @@ public:
 
     void moveDirectory(const String & from_path, const String & to_path) override { moveFile(from_path, to_path); }
 
+    void sync(const String & path) const override;
+
     DiskDirectoryIteratorPtr iterateDirectory(const String & path) override;
 
     void moveFile(const String & from_path, const String & to_path) override;
diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp
index 73ac7fc0064..03ae2166504 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp
@@ -308,7 +308,8 @@ void IMergeTreeDataPartWriter::calculateAndSerializeSkipIndices(
     skip_index_data_mark = skip_index_current_data_mark;
 }
 
-void IMergeTreeDataPartWriter::finishPrimaryIndexSerialization(MergeTreeData::DataPart::Checksums & checksums)
+void IMergeTreeDataPartWriter::finishPrimaryIndexSerialization(
+    MergeTreeData::DataPart::Checksums & checksums, bool sync)
 {
     bool write_final_mark = (with_final_mark && data_written);
     if (write_final_mark && compute_granularity)
@@ -330,12 +331,14 @@ void IMergeTreeDataPartWriter::finishPrimaryIndexSerialization(MergeTreeData::Da
         index_stream->next();
         checksums.files["primary.idx"].file_size = index_stream->count();
         checksums.files["primary.idx"].file_hash = index_stream->getHash();
-        index_stream = nullptr;
+        if (sync)
+            index_stream->sync();
+        index_stream.reset();
     }
 }
 
 void IMergeTreeDataPartWriter::finishSkipIndicesSerialization(
-        MergeTreeData::DataPart::Checksums & checksums)
+        MergeTreeData::DataPart::Checksums & checksums, bool sync)
 {
     for (size_t i = 0; i < skip_indices.size(); ++i)
     {
@@ -348,6 +351,8 @@ void IMergeTreeDataPartWriter::finishSkipIndicesSerialization(
     {
         stream->finalize();
         stream->addToChecksums(checksums);
+        if (sync)
+            stream->sync();
     }
 
     skip_indices_streams.clear();
diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
index 2f849e7c895..eebdb880a66 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
@@ -102,9 +102,9 @@ public:
     void initSkipIndices();
     void initPrimaryIndex();
 
-    virtual void finishDataSerialization(IMergeTreeDataPart::Checksums & checksums) = 0;
-    void finishPrimaryIndexSerialization(MergeTreeData::DataPart::Checksums & checksums);
-    void finishSkipIndicesSerialization(MergeTreeData::DataPart::Checksums & checksums);
+    virtual void finishDataSerialization(IMergeTreeDataPart::Checksums & checksums, bool sync) = 0;
+    void finishPrimaryIndexSerialization(MergeTreeData::DataPart::Checksums & checksums, bool sync);
+    void finishSkipIndicesSerialization(MergeTreeData::DataPart::Checksums & checksum, bool sync);
 
 protected:
     /// Count index_granularity for block and store in `index_granularity`
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index 00830dd78c2..ccd7f234925 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -576,6 +576,13 @@ public:
     }
 };
 
+static bool needSyncPart(const size_t input_rows, size_t input_bytes, const MergeTreeSettings & settings)
+{
+    return ((settings.min_rows_to_sync_after_merge && input_rows >= settings.min_rows_to_sync_after_merge)
+        || (settings.min_compressed_bytes_to_sync_after_merge && input_bytes >= settings.min_compressed_bytes_to_sync_after_merge));
+}
+
+
 /// parts should be sorted.
 MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTemporaryPart(
     const FutureMergedMutatedPart & future_part,
@@ -648,6 +655,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
     }
 
     size_t sum_input_rows_upper_bound = merge_entry->total_rows_count;
+    size_t sum_compressed_bytes_upper_bound = merge_entry->total_size_bytes_compressed;
     MergeAlgorithm merge_alg = chooseMergeAlgorithm(parts, sum_input_rows_upper_bound, gathering_columns, deduplicate, need_remove_expired_values);
 
     LOG_DEBUG(log, "Selected MergeAlgorithm: {}", ((merge_alg == MergeAlgorithm::Vertical) ? "Vertical" : "Horizontal"));
@@ -803,7 +811,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
     if (need_remove_expired_values)
         merged_stream = std::make_shared<TTLBlockInputStream>(merged_stream, data, metadata_snapshot, new_data_part, time_of_merge, force_ttl);
 
-
     if (metadata_snapshot->hasSecondaryIndices())
     {
         const auto & indices = metadata_snapshot->getSecondaryIndices();
@@ -863,6 +870,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
     if (need_remove_expired_values && ttl_merges_blocker.isCancelled())
         throw Exception("Cancelled merging parts with expired TTL", ErrorCodes::ABORTED);
 
+    bool need_sync = needSyncPart(sum_input_rows_upper_bound, sum_compressed_bytes_upper_bound, *data_settings);
     MergeTreeData::DataPart::Checksums checksums_gathered_columns;
 
     /// Gather ordinary columns
@@ -942,7 +950,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
                 throw Exception("Cancelled merging parts", ErrorCodes::ABORTED);
 
             column_gathered_stream.readSuffix();
-            auto changed_checksums = column_to.writeSuffixAndGetChecksums(new_data_part, checksums_gathered_columns);
+            auto changed_checksums = column_to.writeSuffixAndGetChecksums(new_data_part, checksums_gathered_columns, need_sync);
             checksums_gathered_columns.add(std::move(changed_checksums));
 
             if (rows_written != column_elems_written)
@@ -979,9 +987,12 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
     }
 
     if (merge_alg != MergeAlgorithm::Vertical)
-        to.writeSuffixAndFinalizePart(new_data_part);
+        to.writeSuffixAndFinalizePart(new_data_part, need_sync);
     else
-        to.writeSuffixAndFinalizePart(new_data_part, &storage_columns, &checksums_gathered_columns);
+        to.writeSuffixAndFinalizePart(new_data_part, need_sync, &storage_columns, &checksums_gathered_columns);
+
+    if (need_sync)
+        new_data_part->volume->getDisk()->sync(new_part_tmp_path);
 
     return new_data_part;
 }
@@ -1081,7 +1092,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
     /// Don't change granularity type while mutating subset of columns
     auto mrk_extension = source_part->index_granularity_info.is_adaptive ? getAdaptiveMrkExtension(new_data_part->getType())
                                                                          : getNonAdaptiveMrkExtension();
-
+    bool need_sync = needSyncPart(source_part->rows_count, source_part->getBytesOnDisk(), *data_settings);
     bool need_remove_expired_values = false;
 
     if (in && shouldExecuteTTL(metadata_snapshot, in->getHeader().getNamesAndTypesList().getNames(), commands_for_part))
@@ -1099,7 +1110,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
             time_of_mutation,
             compression_codec,
             merge_entry,
-            need_remove_expired_values);
+            need_remove_expired_values,
+            need_sync);
 
         /// no finalization required, because mutateAllPartColumns use
         /// MergedBlockOutputStream which finilaze all part fields itself
@@ -1154,7 +1166,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
                 time_of_mutation,
                 compression_codec,
                 merge_entry,
-                need_remove_expired_values);
+                need_remove_expired_values,
+                need_sync);
         }
 
         for (const auto & [rename_from, rename_to] : files_to_rename)
@@ -1174,6 +1187,9 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
         finalizeMutatedPart(source_part, new_data_part, need_remove_expired_values);
     }
 
+    if (need_sync)
+        new_data_part->volume->getDisk()->sync(new_part_tmp_path);
+
     return new_data_part;
 }
 
@@ -1599,7 +1615,8 @@ void MergeTreeDataMergerMutator::mutateAllPartColumns(
     time_t time_of_mutation,
     const CompressionCodecPtr & compression_codec,
     MergeListEntry & merge_entry,
-    bool need_remove_expired_values) const
+    bool need_remove_expired_values,
+    bool need_sync) const
 {
     if (mutating_stream == nullptr)
         throw Exception("Cannot mutate part columns with uninitialized mutations stream. It's a bug", ErrorCodes::LOGICAL_ERROR);
@@ -1637,7 +1654,7 @@ void MergeTreeDataMergerMutator::mutateAllPartColumns(
     new_data_part->minmax_idx = std::move(minmax_idx);
 
     mutating_stream->readSuffix();
-    out.writeSuffixAndFinalizePart(new_data_part);
+    out.writeSuffixAndFinalizePart(new_data_part, need_sync);
 }
 
 void MergeTreeDataMergerMutator::mutateSomePartColumns(
@@ -1650,7 +1667,8 @@ void MergeTreeDataMergerMutator::mutateSomePartColumns(
     time_t time_of_mutation,
     const CompressionCodecPtr & compression_codec,
     MergeListEntry & merge_entry,
-    bool need_remove_expired_values) const
+    bool need_remove_expired_values,
+    bool need_sync) const
 {
     if (mutating_stream == nullptr)
         throw Exception("Cannot mutate part columns with uninitialized mutations stream. It's a bug", ErrorCodes::LOGICAL_ERROR);
@@ -1684,10 +1702,9 @@ void MergeTreeDataMergerMutator::mutateSomePartColumns(
 
     mutating_stream->readSuffix();
 
-    auto changed_checksums = out.writeSuffixAndGetChecksums(new_data_part, new_data_part->checksums);
+    auto changed_checksums = out.writeSuffixAndGetChecksums(new_data_part, new_data_part->checksums, need_sync);
 
     new_data_part->checksums.add(std::move(changed_checksums));
-
 }
 
 void MergeTreeDataMergerMutator::finalizeMutatedPart(
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
index 121cc770d51..23b8d7f681b 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
@@ -189,7 +189,8 @@ private:
         time_t time_of_mutation,
         const CompressionCodecPtr & codec,
         MergeListEntry & merge_entry,
-        bool need_remove_expired_values) const;
+        bool need_remove_expired_values,
+        bool need_sync) const;
 
     /// Mutate some columns of source part with mutation_stream
     void mutateSomePartColumns(
@@ -202,7 +203,8 @@ private:
         time_t time_of_mutation,
         const CompressionCodecPtr & codec,
         MergeListEntry & merge_entry,
-        bool need_remove_expired_values) const;
+        bool need_remove_expired_values,
+        bool need_sync) const;
 
     /// Initialize and write to disk new part fields like checksums, columns,
     /// etc.
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
index f7a3ad75cf5..79800204a3b 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
@@ -141,7 +141,7 @@ void MergeTreeDataPartWriterCompact::writeColumnSingleGranule(const ColumnWithTy
     column.type->serializeBinaryBulkStateSuffix(serialize_settings, state);
 }
 
-void MergeTreeDataPartWriterCompact::finishDataSerialization(IMergeTreeDataPart::Checksums & checksums)
+void MergeTreeDataPartWriterCompact::finishDataSerialization(IMergeTreeDataPart::Checksums & checksums, bool sync)
 {
     if (columns_buffer.size() != 0)
         writeBlock(header.cloneWithColumns(columns_buffer.releaseColumns()));
@@ -158,6 +158,8 @@ void MergeTreeDataPartWriterCompact::finishDataSerialization(IMergeTreeDataPart:
 
     stream->finalize();
     stream->addToChecksums(checksums);
+    if (sync)
+        stream->sync();
     stream.reset();
 }
 
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
index 8183c038c4c..dde7deafc58 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
@@ -20,7 +20,7 @@ public:
     void write(const Block & block, const IColumn::Permutation * permutation,
         const Block & primary_key_block, const Block & skip_indexes_block) override;
 
-    void finishDataSerialization(IMergeTreeDataPart::Checksums & checksums) override;
+    void finishDataSerialization(IMergeTreeDataPart::Checksums & checksums, bool sync) override;
 
 protected:
     void fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) override;
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
index e71ea4d4b94..fcd0249b10c 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
@@ -264,7 +264,7 @@ void MergeTreeDataPartWriterWide::writeColumn(
     next_index_offset = current_row - total_rows;
 }
 
-void MergeTreeDataPartWriterWide::finishDataSerialization(IMergeTreeDataPart::Checksums & checksums)
+void MergeTreeDataPartWriterWide::finishDataSerialization(IMergeTreeDataPart::Checksums & checksums, bool sync)
 {
     const auto & global_settings = storage.global_context.getSettingsRef();
     IDataType::SerializeBinaryBulkSettings serialize_settings;
@@ -295,6 +295,8 @@ void MergeTreeDataPartWriterWide::finishDataSerialization(IMergeTreeDataPart::Ch
     {
         stream.second->finalize();
         stream.second->addToChecksums(checksums);
+        if (sync)
+            stream.second->sync();
     }
 
     column_streams.clear();
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h
index f5a9d17f63c..4286065a3ca 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h
@@ -23,7 +23,7 @@ public:
     void write(const Block & block, const IColumn::Permutation * permutation,
         const Block & primary_key_block, const Block & skip_indexes_block) override;
 
-    void finishDataSerialization(IMergeTreeDataPart::Checksums & checksums) override;
+    void finishDataSerialization(IMergeTreeDataPart::Checksums & checksums, bool sync) override;
 
     IDataType::OutputStreamGetter createStreamGetter(const String & name, WrittenOffsetColumns & offset_columns);
 
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index 099480aca2f..cf8860b7f04 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -303,10 +303,15 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
 
     const auto & index_factory = MergeTreeIndexFactory::instance();
     MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, index_factory.getMany(metadata_snapshot->getSecondaryIndices()), compression_codec);
+    bool sync_on_insert = data.getSettings()->sync_after_insert;
 
     out.writePrefix();
     out.writeWithPermutation(block, perm_ptr);
-    out.writeSuffixAndFinalizePart(new_data_part);
+    out.writeSuffixAndFinalizePart(new_data_part, sync_on_insert);
+
+    /// Sync part directory.
+    if (sync_on_insert)
+        new_data_part->volume->getDisk()->sync(full_path);
 
     ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterRows, block.rows());
     ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterUncompressedBytes, block.bytes());
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index f2d2a7cc3d4..da2c9ee49ee 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -43,6 +43,9 @@ struct MergeTreeSettings : public SettingsCollection<MergeTreeSettings>
     M(SettingSeconds, old_parts_lifetime, 8 * 60, "How many seconds to keep obsolete parts.", 0) \
     M(SettingSeconds, temporary_directories_lifetime, 86400, "How many seconds to keep tmp_-directories.", 0) \
     M(SettingSeconds, lock_acquire_timeout_for_background_operations, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "For background operations like merges, mutations etc. How many seconds before failing to acquire table locks.", 0) \
+    M(SettingUInt64, min_rows_to_sync_after_merge, 0, "Minimal number of rows to do fsync for part after merge (0 - disabled)", 0) \
+    M(SettingUInt64, min_compressed_bytes_to_sync_after_merge, 0, "Minimal number of compressed bytes to do fsync for part after merge (0 - disabled)", 0) \
+    M(SettingBool, sync_after_insert, false, "Do fsync for every inserted part. Significantly decreases performance of inserts, not recommended to use with wide parts.", 0) \
     \
     /** Inserts settings. */ \
     M(SettingUInt64, parts_to_delay_insert, 150, "If table contains at least that many active parts in single partition, artificially slow down insert into table.", 0) \
diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp
index e776a35f21f..5e15084aa7d 100644
--- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp
+++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp
@@ -85,6 +85,7 @@ void MergedBlockOutputStream::writeSuffix()
 
 void MergedBlockOutputStream::writeSuffixAndFinalizePart(
         MergeTreeData::MutableDataPartPtr & new_part,
+        bool sync,
         const NamesAndTypesList * total_columns_list,
         MergeTreeData::DataPart::Checksums * additional_column_checksums)
 {
@@ -95,9 +96,9 @@ void MergedBlockOutputStream::writeSuffixAndFinalizePart(
         checksums = std::move(*additional_column_checksums);
 
     /// Finish columns serialization.
-    writer->finishDataSerialization(checksums);
-    writer->finishPrimaryIndexSerialization(checksums);
-    writer->finishSkipIndicesSerialization(checksums);
+    writer->finishDataSerialization(checksums, sync);
+    writer->finishPrimaryIndexSerialization(checksums, sync);
+    writer->finishSkipIndicesSerialization(checksums, sync);
 
     NamesAndTypesList part_columns;
     if (!total_columns_list)
diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.h b/src/Storages/MergeTree/MergedBlockOutputStream.h
index 1a8bf9da822..002ef78a9af 100644
--- a/src/Storages/MergeTree/MergedBlockOutputStream.h
+++ b/src/Storages/MergeTree/MergedBlockOutputStream.h
@@ -46,6 +46,7 @@ public:
     /// Finilize writing part and fill inner structures
     void writeSuffixAndFinalizePart(
             MergeTreeData::MutableDataPartPtr & new_part,
+            bool sync = false,
             const NamesAndTypesList * total_columns_list = nullptr,
             MergeTreeData::DataPart::Checksums * additional_column_checksums = nullptr);
 
diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp
index 1faadd0d720..e767fb3f155 100644
--- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp
+++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp
@@ -63,12 +63,15 @@ void MergedColumnOnlyOutputStream::writeSuffix()
 }
 
 MergeTreeData::DataPart::Checksums
-MergedColumnOnlyOutputStream::writeSuffixAndGetChecksums(MergeTreeData::MutableDataPartPtr & new_part, MergeTreeData::DataPart::Checksums & all_checksums)
+MergedColumnOnlyOutputStream::writeSuffixAndGetChecksums(
+    MergeTreeData::MutableDataPartPtr & new_part,
+    MergeTreeData::DataPart::Checksums & all_checksums,
+    bool sync)
 {
     /// Finish columns serialization.
     MergeTreeData::DataPart::Checksums checksums;
-    writer->finishDataSerialization(checksums);
-    writer->finishSkipIndicesSerialization(checksums);
+    writer->finishDataSerialization(checksums, sync);
+    writer->finishSkipIndicesSerialization(checksums, sync);
 
     auto columns = new_part->getColumns();
 
diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h
index 902138ced9d..507a964ede0 100644
--- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h
+++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h
@@ -27,7 +27,7 @@ public:
     void write(const Block & block) override;
     void writeSuffix() override;
     MergeTreeData::DataPart::Checksums
-    writeSuffixAndGetChecksums(MergeTreeData::MutableDataPartPtr & new_part, MergeTreeData::DataPart::Checksums & all_checksums);
+    writeSuffixAndGetChecksums(MergeTreeData::MutableDataPartPtr & new_part, MergeTreeData::DataPart::Checksums & all_checksums, bool sync = false);
 
 private:
     Block header;

From b2aa565a37076230af2ceaa32ee21fa351d37931 Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Sat, 27 Jun 2020 00:55:48 +0300
Subject: [PATCH 002/298] allow to turn on fsync on inserts, merges and fetches

---
 src/Common/FileSyncGuard.h                    | 41 +++++++++++++++++++
 src/Disks/DiskLocal.cpp                       | 35 ++++++++++------
 src/Disks/DiskLocal.h                         |  6 ++-
 src/Disks/DiskMemory.cpp                      | 20 ++++++---
 src/Disks/DiskMemory.h                        |  6 ++-
 src/Disks/IDisk.h                             | 12 ++++--
 src/Disks/S3/DiskS3.cpp                       | 21 +++++++---
 src/Disks/S3/DiskS3.h                         |  6 ++-
 src/Storages/MergeTree/DataPartsExchange.cpp  | 16 +++++++-
 src/Storages/MergeTree/DataPartsExchange.h    |  1 +
 src/Storages/MergeTree/IMergeTreeDataPart.cpp |  5 +++
 .../MergeTree/MergeTreeDataMergerMutator.cpp  | 15 ++++---
 .../MergeTree/MergeTreeDataWriter.cpp         | 12 +++---
 src/Storages/MergeTree/MergeTreeSettings.h    |  2 +
 14 files changed, 154 insertions(+), 44 deletions(-)
 create mode 100644 src/Common/FileSyncGuard.h

diff --git a/src/Common/FileSyncGuard.h b/src/Common/FileSyncGuard.h
new file mode 100644
index 00000000000..5ec9b1d0c98
--- /dev/null
+++ b/src/Common/FileSyncGuard.h
@@ -0,0 +1,41 @@
+#pragma once
+
+#include <Disks/IDisk.h>
+
+namespace DB
+{
+
+/// Helper class, that recieves file descriptor and does fsync for it in destructor.
+/// It's used to keep descriptor open, while doing some operations with it, and do fsync at the end.
+/// Guaranties of sequence 'close-reopen-fsync' may depend on kernel version.
+/// Source: linux-fsdevel mailing-list https://marc.info/?l=linux-fsdevel&m=152535409207496
+class FileSyncGuard
+{
+public:
+    /// NOTE: If you have already opened descriptor, it's preffered to use
+    /// this constructor instead of construnctor with path.
+    FileSyncGuard(const DiskPtr & disk_, int fd_) : disk(disk_), fd(fd_) {}
+
+    FileSyncGuard(const DiskPtr & disk_, const String & path)
+        : disk(disk_), fd(disk_->open(path, O_RDONLY)) {}
+
+    ~FileSyncGuard()
+    {
+        try
+        {
+            disk->sync(fd);
+            disk->close(fd);
+        }
+        catch (...)
+        {
+            tryLogCurrentException(__PRETTY_FUNCTION__);
+        }
+    }
+
+private:
+    DiskPtr disk;
+    int fd = -1;
+};
+
+}
+
diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp
index c67bac7ffe2..f85b69baf5e 100644
--- a/src/Disks/DiskLocal.cpp
+++ b/src/Disks/DiskLocal.cpp
@@ -22,6 +22,7 @@ namespace ErrorCodes
     extern const int FILE_DOESNT_EXIST;
     extern const int CANNOT_OPEN_FILE;
     extern const int CANNOT_FSYNC;
+    extern const int CANNOT_CLOSE_FILE;
 }
 
 std::mutex DiskLocal::reservation_mutex;
@@ -191,18 +192,6 @@ void DiskLocal::moveDirectory(const String & from_path, const String & to_path)
     Poco::File(disk_path + from_path).renameTo(disk_path + to_path);
 }
 
-void DiskLocal::sync(const String & path) const
-{
-    String full_path = disk_path + path;
-    int fd = ::open(full_path.c_str(), O_RDONLY);
-    if (-1 == fd)
-        throwFromErrnoWithPath("Cannot open file " + full_path, full_path,
-                               errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE);
-
-    if (-1 == fsync(fd))
-        throwFromErrnoWithPath("Cannot fsync " + full_path, full_path, ErrorCodes::CANNOT_FSYNC);
-}
-
 DiskDirectoryIteratorPtr DiskLocal::iterateDirectory(const String & path)
 {
     return std::make_unique<DiskLocalDirectoryIterator>(disk_path, path);
@@ -299,6 +288,28 @@ void DiskLocal::copy(const String & from_path, const std::shared_ptr<IDisk> & to
         IDisk::copy(from_path, to_disk, to_path); /// Copy files through buffers.
 }
 
+int DiskLocal::open(const String & path, mode_t mode) const
+{
+    String full_path = disk_path + path;
+    int fd = ::open(full_path.c_str(), mode);
+    if (-1 == fd)
+        throwFromErrnoWithPath("Cannot open file " + full_path, full_path,
+                        errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE);
+    return fd;
+}
+
+void DiskLocal::close(int fd) const
+{
+    if (-1 == ::close(fd))
+        throw Exception("Cannot close file", ErrorCodes::CANNOT_CLOSE_FILE);
+}
+
+void DiskLocal::sync(int fd) const
+{
+    if (-1 == ::fsync(fd))
+        throw Exception("Cannot fsync", ErrorCodes::CANNOT_FSYNC);
+}
+
 DiskPtr DiskLocalReservation::getDisk(size_t i) const
 {
     if (i != 0)
diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h
index 743ba2ceb10..d70ac06c18b 100644
--- a/src/Disks/DiskLocal.h
+++ b/src/Disks/DiskLocal.h
@@ -59,8 +59,6 @@ public:
 
     void moveDirectory(const String & from_path, const String & to_path) override;
 
-    void sync(const String & path) const override;
-
     DiskDirectoryIteratorPtr iterateDirectory(const String & path) override;
 
     void createFile(const String & path) override;
@@ -101,6 +99,10 @@ public:
 
     void createHardLink(const String & src_path, const String & dst_path) override;
 
+    int open(const String & path, mode_t mode) const override;
+    void close(int fd) const override;
+    void sync(int fd) const override;
+
 private:
     bool tryReserve(UInt64 bytes);
 
diff --git a/src/Disks/DiskMemory.cpp b/src/Disks/DiskMemory.cpp
index 5b3350e40f7..a7f1df04e1f 100644
--- a/src/Disks/DiskMemory.cpp
+++ b/src/Disks/DiskMemory.cpp
@@ -261,11 +261,6 @@ void DiskMemory::moveDirectory(const String & /*from_path*/, const String & /*to
     throw Exception("Method moveDirectory is not implemented for memory disks", ErrorCodes::NOT_IMPLEMENTED);
 }
 
-void DiskMemory::sync(const String & /*path*/) const
-{
-    throw Exception("Method sync is not implemented for memory disks", ErrorCodes::NOT_IMPLEMENTED);
-}
-
 DiskDirectoryIteratorPtr DiskMemory::iterateDirectory(const String & path)
 {
     std::lock_guard lock(mutex);
@@ -413,6 +408,21 @@ void DiskMemory::setReadOnly(const String &)
     throw Exception("Method setReadOnly is not implemented for memory disks", ErrorCodes::NOT_IMPLEMENTED);
 }
 
+int DiskMemory::open(const String & /*path*/, mode_t /*mode*/) const
+{
+    throw Exception("Method open is not implemented for memory disks", ErrorCodes::NOT_IMPLEMENTED);
+}
+
+void DiskMemory::close(int /*fd*/) const
+{
+    throw Exception("Method close is not implemented for memory disks", ErrorCodes::NOT_IMPLEMENTED);
+}
+
+void DiskMemory::sync(int /*fd*/) const
+{
+    throw Exception("Method sync is not implemented for memory disks", ErrorCodes::NOT_IMPLEMENTED);
+}
+
 
 using DiskMemoryPtr = std::shared_ptr<DiskMemory>;
 
diff --git a/src/Disks/DiskMemory.h b/src/Disks/DiskMemory.h
index 8a3ddf05aa7..7f111fe5e7d 100644
--- a/src/Disks/DiskMemory.h
+++ b/src/Disks/DiskMemory.h
@@ -52,8 +52,6 @@ public:
 
     void moveDirectory(const String & from_path, const String & to_path) override;
 
-    void sync(const String & path) const override;
-
     DiskDirectoryIteratorPtr iterateDirectory(const String & path) override;
 
     void createFile(const String & path) override;
@@ -92,6 +90,10 @@ public:
 
     void createHardLink(const String & src_path, const String & dst_path) override;
 
+    int open(const String & path, mode_t mode) const override;
+    void close(int fd) const override;
+    void sync(int fd) const override;
+
 private:
     void createDirectoriesImpl(const String & path);
     void replaceFileImpl(const String & from_path, const String & to_path);
diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h
index 8de77a560d1..bc5c9381643 100644
--- a/src/Disks/IDisk.h
+++ b/src/Disks/IDisk.h
@@ -105,9 +105,6 @@ public:
     /// Move directory from `from_path` to `to_path`.
     virtual void moveDirectory(const String & from_path, const String & to_path) = 0;
 
-    /// Do fsync on directory.
-    virtual void sync(const String & path) const = 0;
-
     /// Return iterator to the contents of the specified directory.
     virtual DiskDirectoryIteratorPtr iterateDirectory(const String & path) = 0;
 
@@ -174,6 +171,15 @@ public:
 
     /// Create hardlink from `src_path` to `dst_path`.
     virtual void createHardLink(const String & src_path, const String & dst_path) = 0;
+
+    /// Wrapper for POSIX open
+    virtual int open(const String & path, mode_t mode) const = 0;
+
+    /// Wrapper for POSIX close
+    virtual void close(int fd) const = 0;
+
+    /// Wrapper for POSIX fsync
+    virtual void sync(int fd) const = 0;
 };
 
 using DiskPtr = std::shared_ptr<IDisk>;
diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp
index 292f6567df4..3e0fb05ed6f 100644
--- a/src/Disks/S3/DiskS3.cpp
+++ b/src/Disks/S3/DiskS3.cpp
@@ -29,6 +29,7 @@ namespace ErrorCodes
     extern const int CANNOT_SEEK_THROUGH_FILE;
     extern const int UNKNOWN_FORMAT;
     extern const int INCORRECT_DISK_INDEX;
+    extern const int NOT_IMPLEMENTED;
 }
 
 namespace
@@ -466,11 +467,6 @@ void DiskS3::clearDirectory(const String & path)
             remove(it->path());
 }
 
-void DiskS3::sync(const String & /*path*/) const
-{
-    throw Exception("Method sync is not implemented for S3 disks", ErrorCodes::NOT_IMPLEMENTED);
-}
-
 void DiskS3::moveFile(const String & from_path, const String & to_path)
 {
     if (exists(to_path))
@@ -669,6 +665,21 @@ void DiskS3::setReadOnly(const String & path)
     Poco::File(metadata_path + path).setReadOnly(true);
 }
 
+int DiskS3::open(const String & /*path*/, mode_t /*mode*/) const
+{
+    throw Exception("Method open is not implemented for S3 disks", ErrorCodes::NOT_IMPLEMENTED);
+}
+
+void DiskS3::close(int /*fd*/) const
+{
+    throw Exception("Method close is not implemented for S3 disks", ErrorCodes::NOT_IMPLEMENTED);
+}
+
+void DiskS3::sync(int /*fd*/) const
+{
+    throw Exception("Method sync is not implemented for S3 disks", ErrorCodes::NOT_IMPLEMENTED);
+}
+
 DiskS3Reservation::~DiskS3Reservation()
 {
     try
diff --git a/src/Disks/S3/DiskS3.h b/src/Disks/S3/DiskS3.h
index 09132367ae8..cbf161da561 100644
--- a/src/Disks/S3/DiskS3.h
+++ b/src/Disks/S3/DiskS3.h
@@ -58,8 +58,6 @@ public:
 
     void moveDirectory(const String & from_path, const String & to_path) override { moveFile(from_path, to_path); }
 
-    void sync(const String & path) const override;
-
     DiskDirectoryIteratorPtr iterateDirectory(const String & path) override;
 
     void moveFile(const String & from_path, const String & to_path) override;
@@ -98,6 +96,10 @@ public:
 
     void setReadOnly(const String & path) override;
 
+    int open(const String & path, mode_t mode) const override;
+    void close(int fd) const override;
+    void sync(int fd) const override;
+
 private:
     bool tryReserve(UInt64 bytes);
 
diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp
index 6796e630ff2..e7bb8206cd9 100644
--- a/src/Storages/MergeTree/DataPartsExchange.cpp
+++ b/src/Storages/MergeTree/DataPartsExchange.cpp
@@ -2,6 +2,7 @@
 #include <Disks/createVolume.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/NetException.h>
+#include <Common/FileSyncGuard.h>
 #include <IO/HTTPCommon.h>
 #include <ext/scope_guard.h>
 #include <Poco/File.h>
@@ -224,9 +225,9 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart(
     int server_protocol_version = parse<int>(in.getResponseCookie("server_protocol_version", "0"));
 
     ReservationPtr reservation;
+    size_t sum_files_size = 0;
     if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE)
     {
-        size_t sum_files_size;
         readBinary(sum_files_size, in);
         if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE_AND_TTL_INFOS)
         {
@@ -247,7 +248,10 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart(
         reservation = data.makeEmptyReservationOnLargestDisk();
     }
 
-    return downloadPart(part_name, replica_path, to_detached, tmp_prefix_, std::move(reservation), in);
+    bool sync = (data_settings->min_compressed_bytes_to_sync_after_fetch
+                    && sum_files_size >= data_settings->min_compressed_bytes_to_sync_after_fetch);
+
+    return downloadPart(part_name, replica_path, to_detached, tmp_prefix_, sync, std::move(reservation), in);
 }
 
 MergeTreeData::MutableDataPartPtr Fetcher::downloadPart(
@@ -255,6 +259,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPart(
     const String & replica_path,
     bool to_detached,
     const String & tmp_prefix_,
+    bool sync,
     const ReservationPtr reservation,
     PooledReadWriteBufferFromHTTP & in)
 {
@@ -276,6 +281,10 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPart(
 
     disk->createDirectories(part_download_path);
 
+    std::optional<FileSyncGuard> sync_guard;
+    if (data.getSettings()->sync_part_directory)
+        sync_guard.emplace(disk, part_download_path);
+
     MergeTreeData::DataPart::Checksums checksums;
     for (size_t i = 0; i < files; ++i)
     {
@@ -316,6 +325,9 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPart(
         if (file_name != "checksums.txt" &&
             file_name != "columns.txt")
             checksums.addFile(file_name, file_size, expected_hash);
+
+        if (sync)
+            hashing_out.sync();
     }
 
     assertEOF(in);
diff --git a/src/Storages/MergeTree/DataPartsExchange.h b/src/Storages/MergeTree/DataPartsExchange.h
index c1aff6bdba5..e983d6deecf 100644
--- a/src/Storages/MergeTree/DataPartsExchange.h
+++ b/src/Storages/MergeTree/DataPartsExchange.h
@@ -71,6 +71,7 @@ private:
             const String & replica_path,
             bool to_detached,
             const String & tmp_prefix_,
+            bool sync,
             const ReservationPtr reservation,
             PooledReadWriteBufferFromHTTP & in);
 
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index 61dfeed6b7c..ab9bb7879aa 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -10,6 +10,7 @@
 #include <Storages/MergeTree/localBackup.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/escapeForFileName.h>
+#include <Common/FileSyncGuard.h>
 #include <common/JSON.h>
 #include <common/logger_useful.h>
 
@@ -664,6 +665,10 @@ void IMergeTreeDataPart::renameTo(const String & new_relative_path, bool remove_
     String from = getFullRelativePath();
     String to = storage.relative_data_path + new_relative_path + "/";
 
+    std::optional<FileSyncGuard> sync_guard;
+    if (storage.getSettings()->sync_part_directory)
+        sync_guard.emplace(volume->getDisk(), to);
+
     if (!volume->getDisk()->exists(from))
         throw Exception("Part directory " + fullPath(volume->getDisk(), from) + " doesn't exist. Most likely it is logical error.", ErrorCodes::FILE_DOESNT_EXIST);
 
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index ccd7f234925..9c8c4e3c1d5 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -30,6 +30,7 @@
 #include <Common/interpolate.h>
 #include <Common/typeid_cast.h>
 #include <Common/escapeForFileName.h>
+#include <Common/FileSyncGuard.h>
 #include <cmath>
 #include <numeric>
 #include <iomanip>
@@ -695,6 +696,10 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
         gathering_column_names.clear();
     }
 
+    std::optional<FileSyncGuard> sync_guard;
+    if (data.getSettings()->sync_part_directory)
+        sync_guard.emplace(disk, new_part_tmp_path);
+
     /** Read from all parts, merge and write into a new one.
       * In passing, we calculate expression for sorting.
       */
@@ -991,9 +996,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
     else
         to.writeSuffixAndFinalizePart(new_data_part, need_sync, &storage_columns, &checksums_gathered_columns);
 
-    if (need_sync)
-        new_data_part->volume->getDisk()->sync(new_part_tmp_path);
-
     return new_data_part;
 }
 
@@ -1089,6 +1091,10 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
 
     disk->createDirectories(new_part_tmp_path);
 
+    std::optional<FileSyncGuard> sync_guard;
+    if (data.getSettings()->sync_part_directory)
+        sync_guard.emplace(disk, new_part_tmp_path);
+
     /// Don't change granularity type while mutating subset of columns
     auto mrk_extension = source_part->index_granularity_info.is_adaptive ? getAdaptiveMrkExtension(new_data_part->getType())
                                                                          : getNonAdaptiveMrkExtension();
@@ -1187,9 +1193,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
         finalizeMutatedPart(source_part, new_data_part, need_remove_expired_values);
     }
 
-    if (need_sync)
-        new_data_part->volume->getDisk()->sync(new_part_tmp_path);
-
     return new_data_part;
 }
 
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index cf8860b7f04..01f0b086cea 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -12,6 +12,7 @@
 #include <IO/WriteHelpers.h>
 #include <Poco/File.h>
 #include <Common/typeid_cast.h>
+#include <Common/FileSyncGuard.h>
 
 
 namespace ProfileEvents
@@ -259,7 +260,12 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
         new_data_part->volume->getDisk()->removeRecursive(full_path);
     }
 
-    new_data_part->volume->getDisk()->createDirectories(full_path);
+    const auto disk = new_data_part->volume->getDisk();
+    disk->createDirectories(full_path);
+
+    std::optional<FileSyncGuard> sync_guard;
+    if (data.getSettings()->sync_part_directory)
+        sync_guard.emplace(disk, full_path);
 
     /// If we need to calculate some columns to sort.
     if (metadata_snapshot->hasSortingKey() || metadata_snapshot->hasSecondaryIndices())
@@ -309,10 +315,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
     out.writeWithPermutation(block, perm_ptr);
     out.writeSuffixAndFinalizePart(new_data_part, sync_on_insert);
 
-    /// Sync part directory.
-    if (sync_on_insert)
-        new_data_part->volume->getDisk()->sync(full_path);
-
     ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterRows, block.rows());
     ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterUncompressedBytes, block.bytes());
     ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterCompressedBytes, new_data_part->getBytesOnDisk());
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index da2c9ee49ee..c559ce2804e 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -45,7 +45,9 @@ struct MergeTreeSettings : public SettingsCollection<MergeTreeSettings>
     M(SettingSeconds, lock_acquire_timeout_for_background_operations, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "For background operations like merges, mutations etc. How many seconds before failing to acquire table locks.", 0) \
     M(SettingUInt64, min_rows_to_sync_after_merge, 0, "Minimal number of rows to do fsync for part after merge (0 - disabled)", 0) \
     M(SettingUInt64, min_compressed_bytes_to_sync_after_merge, 0, "Minimal number of compressed bytes to do fsync for part after merge (0 - disabled)", 0) \
+    M(SettingUInt64, min_compressed_bytes_to_sync_after_fetch, 0, "Minimal number of compressed bytes to do fsync for part after fetch (0 - disabled)", 0) \
     M(SettingBool, sync_after_insert, false, "Do fsync for every inserted part. Significantly decreases performance of inserts, not recommended to use with wide parts.", 0) \
+    M(SettingBool, sync_part_directory, false, "Do fsync for part directory after all part operations (writes, renames, etc.).", 0) \
     \
     /** Inserts settings. */ \
     M(SettingUInt64, parts_to_delay_insert, 150, "If table contains at least that many active parts in single partition, artificially slow down insert into table.", 0) \

From ca346ea13cd0ad0f02a29d59302584c826b52298 Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Fri, 3 Jul 2020 02:41:37 +0300
Subject: [PATCH 003/298] rename fsync-related settings

---
 src/Storages/MergeTree/DataPartsExchange.cpp          |  6 +++---
 src/Storages/MergeTree/IMergeTreeDataPart.cpp         |  2 +-
 src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp |  8 ++++----
 src/Storages/MergeTree/MergeTreeDataWriter.cpp        |  4 ++--
 src/Storages/MergeTree/MergeTreeSettings.h            | 10 +++++-----
 5 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp
index e7bb8206cd9..72b478cf587 100644
--- a/src/Storages/MergeTree/DataPartsExchange.cpp
+++ b/src/Storages/MergeTree/DataPartsExchange.cpp
@@ -248,8 +248,8 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart(
         reservation = data.makeEmptyReservationOnLargestDisk();
     }
 
-    bool sync = (data_settings->min_compressed_bytes_to_sync_after_fetch
-                    && sum_files_size >= data_settings->min_compressed_bytes_to_sync_after_fetch);
+    bool sync = (data_settings->min_compressed_bytes_to_fsync_after_fetch
+                    && sum_files_size >= data_settings->min_compressed_bytes_to_fsync_after_fetch);
 
     return downloadPart(part_name, replica_path, to_detached, tmp_prefix_, sync, std::move(reservation), in);
 }
@@ -282,7 +282,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPart(
     disk->createDirectories(part_download_path);
 
     std::optional<FileSyncGuard> sync_guard;
-    if (data.getSettings()->sync_part_directory)
+    if (data.getSettings()->fsync_part_directory)
         sync_guard.emplace(disk, part_download_path);
 
     MergeTreeData::DataPart::Checksums checksums;
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index ab9bb7879aa..3d8cb6b7fc5 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -666,7 +666,7 @@ void IMergeTreeDataPart::renameTo(const String & new_relative_path, bool remove_
     String to = storage.relative_data_path + new_relative_path + "/";
 
     std::optional<FileSyncGuard> sync_guard;
-    if (storage.getSettings()->sync_part_directory)
+    if (storage.getSettings()->fsync_part_directory)
         sync_guard.emplace(volume->getDisk(), to);
 
     if (!volume->getDisk()->exists(from))
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index 9c8c4e3c1d5..c39d1981031 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -579,8 +579,8 @@ public:
 
 static bool needSyncPart(const size_t input_rows, size_t input_bytes, const MergeTreeSettings & settings)
 {
-    return ((settings.min_rows_to_sync_after_merge && input_rows >= settings.min_rows_to_sync_after_merge)
-        || (settings.min_compressed_bytes_to_sync_after_merge && input_bytes >= settings.min_compressed_bytes_to_sync_after_merge));
+    return ((settings.min_rows_to_fsync_after_merge && input_rows >= settings.min_rows_to_fsync_after_merge)
+        || (settings.min_compressed_bytes_to_fsync_after_merge && input_bytes >= settings.min_compressed_bytes_to_fsync_after_merge));
 }
 
 
@@ -697,7 +697,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
     }
 
     std::optional<FileSyncGuard> sync_guard;
-    if (data.getSettings()->sync_part_directory)
+    if (data.getSettings()->fsync_part_directory)
         sync_guard.emplace(disk, new_part_tmp_path);
 
     /** Read from all parts, merge and write into a new one.
@@ -1092,7 +1092,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
     disk->createDirectories(new_part_tmp_path);
 
     std::optional<FileSyncGuard> sync_guard;
-    if (data.getSettings()->sync_part_directory)
+    if (data.getSettings()->fsync_part_directory)
         sync_guard.emplace(disk, new_part_tmp_path);
 
     /// Don't change granularity type while mutating subset of columns
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index 01f0b086cea..23210fc604e 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -264,7 +264,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
     disk->createDirectories(full_path);
 
     std::optional<FileSyncGuard> sync_guard;
-    if (data.getSettings()->sync_part_directory)
+    if (data.getSettings()->fsync_part_directory)
         sync_guard.emplace(disk, full_path);
 
     /// If we need to calculate some columns to sort.
@@ -309,7 +309,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
 
     const auto & index_factory = MergeTreeIndexFactory::instance();
     MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, index_factory.getMany(metadata_snapshot->getSecondaryIndices()), compression_codec);
-    bool sync_on_insert = data.getSettings()->sync_after_insert;
+    bool sync_on_insert = data.getSettings()->fsync_after_insert;
 
     out.writePrefix();
     out.writeWithPermutation(block, perm_ptr);
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index c559ce2804e..eeee0c4b1e1 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -43,11 +43,11 @@ struct MergeTreeSettings : public SettingsCollection<MergeTreeSettings>
     M(SettingSeconds, old_parts_lifetime, 8 * 60, "How many seconds to keep obsolete parts.", 0) \
     M(SettingSeconds, temporary_directories_lifetime, 86400, "How many seconds to keep tmp_-directories.", 0) \
     M(SettingSeconds, lock_acquire_timeout_for_background_operations, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "For background operations like merges, mutations etc. How many seconds before failing to acquire table locks.", 0) \
-    M(SettingUInt64, min_rows_to_sync_after_merge, 0, "Minimal number of rows to do fsync for part after merge (0 - disabled)", 0) \
-    M(SettingUInt64, min_compressed_bytes_to_sync_after_merge, 0, "Minimal number of compressed bytes to do fsync for part after merge (0 - disabled)", 0) \
-    M(SettingUInt64, min_compressed_bytes_to_sync_after_fetch, 0, "Minimal number of compressed bytes to do fsync for part after fetch (0 - disabled)", 0) \
-    M(SettingBool, sync_after_insert, false, "Do fsync for every inserted part. Significantly decreases performance of inserts, not recommended to use with wide parts.", 0) \
-    M(SettingBool, sync_part_directory, false, "Do fsync for part directory after all part operations (writes, renames, etc.).", 0) \
+    M(SettingUInt64, min_rows_to_fsync_after_merge, 0, "Minimal number of rows to do fsync for part after merge (0 - disabled)", 0) \
+    M(SettingUInt64, min_compressed_bytes_to_fsync_after_merge, 0, "Minimal number of compressed bytes to do fsync for part after merge (0 - disabled)", 0) \
+    M(SettingUInt64, min_compressed_bytes_to_fsync_after_fetch, 0, "Minimal number of compressed bytes to do fsync for part after fetch (0 - disabled)", 0) \
+    M(SettingBool, fsync_after_insert, false, "Do fsync for every inserted part. Significantly decreases performance of inserts, not recommended to use with wide parts.", 0) \
+    M(SettingBool, fsync_part_directory, false, "Do fsync for part directory after all part operations (writes, renames, etc.).", 0) \
     \
     /** Inserts settings. */ \
     M(SettingUInt64, parts_to_delay_insert, 150, "If table contains at least that many active parts in single partition, artificially slow down insert into table.", 0) \

From d6434f61dc7b08072862d4d10ea6fa9da781b6c1 Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Tue, 7 Jul 2020 03:15:02 +0300
Subject: [PATCH 004/298] support codecs in compact parts

---
 .../MergeTreeDataPartWriterCompact.cpp        | 80 +++++++++++++------
 .../MergeTreeDataPartWriterCompact.h          | 21 ++++-
 .../01375_compact_parts_codecs.reference      |  3 +
 .../01375_compact_parts_codecs.sql            | 31 +++++++
 4 files changed, 109 insertions(+), 26 deletions(-)
 create mode 100644 tests/queries/0_stateless/01375_compact_parts_codecs.reference
 create mode 100644 tests/queries/0_stateless/01375_compact_parts_codecs.sql

diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
index f7a3ad75cf5..696197aa4ca 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
@@ -14,19 +14,23 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact(
     const MergeTreeIndexGranularity & index_granularity_)
     : IMergeTreeDataPartWriter(
         data_part_, columns_list_, metadata_snapshot_, indices_to_recalc_, marks_file_extension_, default_codec_, settings_, index_granularity_)
+    , plain_file(data_part->volume->getDisk()->writeFile(
+            part_path + MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION,
+            settings.max_compress_block_size, 
+            WriteMode::Rewrite,
+            settings.estimated_size,
+            settings.aio_threshold))
+    , plain_hashing(*plain_file)
+    , marks_file(data_part->volume->getDisk()->writeFile(
+        part_path + MergeTreeDataPartCompact::DATA_FILE_NAME + marks_file_extension_,
+        4096,
+        WriteMode::Rewrite))
+    , marks(*marks_file)
 {
-    using DataPart = MergeTreeDataPartCompact;
-    String data_file_name = DataPart::DATA_FILE_NAME;
-
-    stream = std::make_unique<Stream>(
-        data_file_name,
-        data_part->volume->getDisk(),
-        part_path + data_file_name, DataPart::DATA_FILE_EXTENSION,
-        part_path + data_file_name, marks_file_extension,
-        default_codec,
-        settings.max_compress_block_size,
-        settings.estimated_size,
-        settings.aio_threshold);
+    const auto & storage_columns = metadata_snapshot->getColumns();
+    for (const auto & column : columns_list)
+        compressed_streams[column.name] = std::make_unique<CompressedStream>(
+            plain_hashing, storage_columns.getCodecOrDefault(column.name, default_codec)); 
 }
 
 void MergeTreeDataPartWriterCompact::write(
@@ -98,14 +102,13 @@ void MergeTreeDataPartWriterCompact::writeBlock(const Block & block)
 
         for (const auto & column : columns_list)
         {
-            /// There could already be enough data to compress into the new block.
-            if (stream->compressed.offset() >= settings.min_compress_block_size)
-                stream->compressed.next();
+            auto & stream = compressed_streams[column.name];
 
-            writeIntBinary(stream->plain_hashing.count(), stream->marks);
-            writeIntBinary(stream->compressed.offset(), stream->marks);
+            writeIntBinary(plain_hashing.count(), marks);
+            writeIntBinary(UInt64(0), marks);
 
             writeColumnSingleGranule(block.getByName(column.name), current_row, rows_to_write);
+            stream->hashing_buf.next();
         }
 
         ++from_mark;
@@ -120,7 +123,7 @@ void MergeTreeDataPartWriterCompact::writeBlock(const Block & block)
             index_granularity.appendMark(rows_written);
         }
 
-        writeIntBinary(rows_to_write, stream->marks);
+        writeIntBinary(rows_to_write, marks);
     }
 
     next_index_offset = 0;
@@ -132,7 +135,7 @@ void MergeTreeDataPartWriterCompact::writeColumnSingleGranule(const ColumnWithTy
     IDataType::SerializeBinaryBulkStatePtr state;
     IDataType::SerializeBinaryBulkSettings serialize_settings;
 
-    serialize_settings.getter = [this](IDataType::SubstreamPath) -> WriteBuffer * { return &stream->compressed; };
+    serialize_settings.getter = [this, &column](IDataType::SubstreamPath) -> WriteBuffer * { return &compressed_streams.at(column.name)->hashing_buf; };
     serialize_settings.position_independent_encoding = true;
     serialize_settings.low_cardinality_max_dictionary_size = 0;
 
@@ -150,15 +153,15 @@ void MergeTreeDataPartWriterCompact::finishDataSerialization(IMergeTreeDataPart:
     {
         for (size_t i = 0; i < columns_list.size(); ++i)
         {
-            writeIntBinary(stream->plain_hashing.count(), stream->marks);
-            writeIntBinary(stream->compressed.offset(), stream->marks);
+            writeIntBinary(plain_hashing.count(), marks);
+            writeIntBinary(UInt64(0), marks);
         }
-        writeIntBinary(0ULL, stream->marks);
+        writeIntBinary(UInt64(0), marks);
     }
 
-    stream->finalize();
-    stream->addToChecksums(checksums);
-    stream.reset();
+    plain_file->next();
+    marks.next();
+    addToChecksums(checksums);
 }
 
 static void fillIndexGranularityImpl(
@@ -199,6 +202,33 @@ void MergeTreeDataPartWriterCompact::fillIndexGranularity(size_t index_granulari
         rows_in_block);
 }
 
+void MergeTreeDataPartWriterCompact::addToChecksums(MergeTreeDataPartChecksums & checksums)
+{
+    using uint128 = CityHash_v1_0_2::uint128;
+
+    String data_file_name = MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION;
+    String marks_file_name = MergeTreeDataPartCompact::DATA_FILE_NAME +  marks_file_extension;
+
+    checksums.files[data_file_name].is_compressed = true;
+    size_t uncompressed_size = 0;
+    uint128 uncompressed_hash{0, 0};
+
+    for (const auto & [_, stream] : compressed_streams)
+    {
+        uncompressed_size += stream->hashing_buf.count();
+        uncompressed_hash = CityHash_v1_0_2::CityHash128WithSeed(
+            reinterpret_cast<char *>(&uncompressed_hash), sizeof(uncompressed_hash), uncompressed_hash);
+    }
+
+    checksums.files[data_file_name].uncompressed_size = uncompressed_size;
+    checksums.files[data_file_name].uncompressed_hash = uncompressed_hash;
+    checksums.files[data_file_name].file_size = plain_hashing.count();
+    checksums.files[data_file_name].file_hash = plain_hashing.getHash();
+
+    checksums.files[marks_file_name].file_size = marks.count();
+    checksums.files[marks_file_name].file_hash = marks.getHash();
+}
+
 void MergeTreeDataPartWriterCompact::ColumnsBuffer::add(MutableColumns && columns)
 {
     if (accumulated_columns.empty())
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
index 8183c038c4c..a5bfd8a16cc 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
@@ -34,7 +34,7 @@ private:
 
     void writeBlock(const Block & block);
 
-    StreamPtr stream;
+    void addToChecksums(MergeTreeDataPartChecksums & checksumns);
 
     Block header;
 
@@ -53,6 +53,25 @@ private:
     };
 
     ColumnsBuffer columns_buffer;
+
+    /// compressed -> compressed_buf -> plain_hashing -> plain_file
+    std::unique_ptr<WriteBufferFromFileBase> plain_file;
+    HashingWriteBuffer plain_hashing;
+
+    struct CompressedStream
+    {
+        CompressedWriteBuffer compressed_buf;
+        HashingWriteBuffer hashing_buf;
+
+        CompressedStream(WriteBuffer & buf, const CompressionCodecPtr & codec)
+            : compressed_buf(buf, codec), hashing_buf(compressed_buf) {}
+    };
+
+    std::unordered_map<String, std::unique_ptr<CompressedStream>> compressed_streams;
+
+    /// marks -> marks_file
+    std::unique_ptr<WriteBufferFromFileBase> marks_file;
+    HashingWriteBuffer marks;
 };
 
 }
diff --git a/tests/queries/0_stateless/01375_compact_parts_codecs.reference b/tests/queries/0_stateless/01375_compact_parts_codecs.reference
new file mode 100644
index 00000000000..982c45a26e3
--- /dev/null
+++ b/tests/queries/0_stateless/01375_compact_parts_codecs.reference
@@ -0,0 +1,3 @@
+12000	11890
+11965	11890
+5858	11890
diff --git a/tests/queries/0_stateless/01375_compact_parts_codecs.sql b/tests/queries/0_stateless/01375_compact_parts_codecs.sql
new file mode 100644
index 00000000000..467745c6fa2
--- /dev/null
+++ b/tests/queries/0_stateless/01375_compact_parts_codecs.sql
@@ -0,0 +1,31 @@
+DROP TABLE IF EXISTS codecs;
+
+CREATE TABLE codecs (id UInt32, val UInt32, s String) 
+    ENGINE = MergeTree ORDER BY id
+    SETTINGS min_rows_for_wide_part = 10000;
+INSERT INTO codecs SELECT number, number, toString(number) FROM numbers(1000);
+SELECT sum(data_compressed_bytes), sum(data_uncompressed_bytes) 
+    FROM system.parts 
+    WHERE table = 'codecs' AND database = currentDatabase();
+
+DROP TABLE codecs;
+
+CREATE TABLE codecs (id UInt32 CODEC(NONE), val UInt32 CODEC(NONE), s String CODEC(NONE)) 
+    ENGINE = MergeTree ORDER BY id
+    SETTINGS min_rows_for_wide_part = 10000;
+INSERT INTO codecs SELECT number, number, toString(number) FROM numbers(1000);
+SELECT sum(data_compressed_bytes), sum(data_uncompressed_bytes) 
+    FROM system.parts 
+    WHERE table = 'codecs' AND database = currentDatabase();
+
+DROP TABLE codecs;
+
+CREATE TABLE codecs (id UInt32, val UInt32 CODEC(Delta, ZSTD), s String CODEC(ZSTD)) 
+    ENGINE = MergeTree ORDER BY id
+    SETTINGS min_rows_for_wide_part = 10000;
+INSERT INTO codecs SELECT number, number, toString(number) FROM numbers(1000);
+SELECT sum(data_compressed_bytes), sum(data_uncompressed_bytes) 
+    FROM system.parts 
+    WHERE table = 'codecs' AND database = currentDatabase();
+
+DROP TABLE codecs;

From 80a62977f1aa430144a6bdfae0b7e37605eb5b20 Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Thu, 9 Jul 2020 21:26:54 +0300
Subject: [PATCH 005/298] fix hashing in DataPartWriterCompact

---
 src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
index 69e581a6299..ac697e1b212 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
@@ -217,8 +217,9 @@ void MergeTreeDataPartWriterCompact::addToChecksums(MergeTreeDataPartChecksums &
     for (const auto & [_, stream] : compressed_streams)
     {
         uncompressed_size += stream->hashing_buf.count();
+        auto stream_hash = stream->hashing_buf.getHash();
         uncompressed_hash = CityHash_v1_0_2::CityHash128WithSeed(
-            reinterpret_cast<char *>(&uncompressed_hash), sizeof(uncompressed_hash), uncompressed_hash);
+            reinterpret_cast<char *>(&stream_hash), sizeof(stream_hash), uncompressed_hash);
     }
 
     checksums.files[data_file_name].uncompressed_size = uncompressed_size;

From 24f627e52c5f6f461cd1bc42b2306725ad0491b8 Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Fri, 10 Jul 2020 18:57:10 +0300
Subject: [PATCH 006/298] fix reading from compact parts with different codecs

---
 .../CachedCompressedReadBuffer.cpp            | 14 +++-
 src/Compression/CachedCompressedReadBuffer.h  |  4 +-
 .../CompressedReadBufferFromFile.cpp          |  6 ++
 .../CompressedReadBufferFromFile.h            |  1 +
 .../MergeTree/MergeTreeReaderCompact.cpp      | 80 ++++++++++---------
 .../MergeTree/MergeTreeReaderCompact.h        | 21 ++++-
 6 files changed, 81 insertions(+), 45 deletions(-)

diff --git a/src/Compression/CachedCompressedReadBuffer.cpp b/src/Compression/CachedCompressedReadBuffer.cpp
index 1b083c004c0..beb13d15f01 100644
--- a/src/Compression/CachedCompressedReadBuffer.cpp
+++ b/src/Compression/CachedCompressedReadBuffer.cpp
@@ -12,6 +12,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int SEEK_POSITION_OUT_OF_BOUND;
+    extern const int LOGICAL_ERROR;
 }
 
 
@@ -19,8 +20,9 @@ void CachedCompressedReadBuffer::initInput()
 {
     if (!file_in)
     {
-        file_in = file_in_creator();
-        compressed_in = file_in.get();
+        file_in_holder = file_in_creator();
+        file_in = file_in_holder.get();
+        compressed_in = file_in;
 
         if (profile_callback)
             file_in->setProfileCallback(profile_callback, clock_type);
@@ -71,6 +73,14 @@ bool CachedCompressedReadBuffer::nextImpl()
     return true;
 }
 
+CachedCompressedReadBuffer::CachedCompressedReadBuffer(
+    const std::string & path_, ReadBufferFromFileBase * file_in_, UncompressedCache * cache_)
+    : ReadBuffer(nullptr, 0), file_in(file_in_), cache(cache_), path(path_), file_pos(0)
+{
+    if (file_in == nullptr)
+        throw Exception("Neither file_in nor file_in_creator is initialized in CachedCompressedReadBuffer", ErrorCodes::LOGICAL_ERROR);
+}
+
 CachedCompressedReadBuffer::CachedCompressedReadBuffer(
     const std::string & path_, std::function<std::unique_ptr<ReadBufferFromFileBase>()> file_in_creator_, UncompressedCache * cache_)
     : ReadBuffer(nullptr, 0), file_in_creator(std::move(file_in_creator_)), cache(cache_), path(path_), file_pos(0)
diff --git a/src/Compression/CachedCompressedReadBuffer.h b/src/Compression/CachedCompressedReadBuffer.h
index 88bcec8197d..2c5aa4920bd 100644
--- a/src/Compression/CachedCompressedReadBuffer.h
+++ b/src/Compression/CachedCompressedReadBuffer.h
@@ -22,7 +22,8 @@ class CachedCompressedReadBuffer : public CompressedReadBufferBase, public ReadB
 private:
     std::function<std::unique_ptr<ReadBufferFromFileBase>()> file_in_creator;
     UncompressedCache * cache;
-    std::unique_ptr<ReadBufferFromFileBase> file_in;
+    std::unique_ptr<ReadBufferFromFileBase> file_in_holder;
+    ReadBufferFromFileBase * file_in;
 
     const std::string path;
     size_t file_pos;
@@ -38,6 +39,7 @@ private:
     clockid_t clock_type {};
 
 public:
+    CachedCompressedReadBuffer(const std::string & path_, ReadBufferFromFileBase * file_in_, UncompressedCache * cache_);
     CachedCompressedReadBuffer(const std::string & path, std::function<std::unique_ptr<ReadBufferFromFileBase>()> file_in_creator, UncompressedCache * cache_);
 
     void seek(size_t offset_in_compressed_file, size_t offset_in_decompressed_block);
diff --git a/src/Compression/CompressedReadBufferFromFile.cpp b/src/Compression/CompressedReadBufferFromFile.cpp
index ddd8bba686f..2927ee1b399 100644
--- a/src/Compression/CompressedReadBufferFromFile.cpp
+++ b/src/Compression/CompressedReadBufferFromFile.cpp
@@ -37,6 +37,12 @@ bool CompressedReadBufferFromFile::nextImpl()
     return true;
 }
 
+CompressedReadBufferFromFile::CompressedReadBufferFromFile(ReadBufferFromFileBase & file_in_)
+    : BufferWithOwnMemory<ReadBuffer>(0), file_in(file_in_)
+{
+    compressed_in = &file_in;
+}
+
 CompressedReadBufferFromFile::CompressedReadBufferFromFile(std::unique_ptr<ReadBufferFromFileBase> buf)
     : BufferWithOwnMemory<ReadBuffer>(0), p_file_in(std::move(buf)), file_in(*p_file_in)
 {
diff --git a/src/Compression/CompressedReadBufferFromFile.h b/src/Compression/CompressedReadBufferFromFile.h
index 1729490f606..1de28062e41 100644
--- a/src/Compression/CompressedReadBufferFromFile.h
+++ b/src/Compression/CompressedReadBufferFromFile.h
@@ -28,6 +28,7 @@ private:
     bool nextImpl() override;
 
 public:
+    CompressedReadBufferFromFile(ReadBufferFromFileBase & buf);
     CompressedReadBufferFromFile(std::unique_ptr<ReadBufferFromFileBase> buf);
 
     CompressedReadBufferFromFile(
diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
index 4357ee66a6e..920f171d7f9 100644
--- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
@@ -45,40 +45,31 @@ MergeTreeReaderCompact::MergeTreeReaderCompact(
 {
     size_t buffer_size = settings.max_read_buffer_size;
     const String full_data_path = data_part->getFullRelativePath() + MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION;
+    file_in = data_part->volume->getDisk()->readFile(
+                full_data_path, buffer_size, 0,
+                settings.min_bytes_to_use_direct_io,
+                settings.min_bytes_to_use_mmap_io);
 
-    if (uncompressed_cache)
+    auto full_path = fullPath(data_part->volume->getDisk(), full_data_path);
+    for (const auto & column : columns)
     {
-        auto buffer = std::make_unique<CachedCompressedReadBuffer>(
-            fullPath(data_part->volume->getDisk(), full_data_path),
-            [this, full_data_path, buffer_size]()
-            {
-                return data_part->volume->getDisk()->readFile(
-                    full_data_path,
-                    buffer_size,
-                    0,
-                    settings.min_bytes_to_use_direct_io,
-                    settings.min_bytes_to_use_mmap_io);
-            },
-            uncompressed_cache);
+        
+        std::unique_ptr<CachedCompressedReadBuffer> cached_buffer;
+        std::unique_ptr<CompressedReadBufferFromFile> non_cached_buffer;
+        if (uncompressed_cache)
+        {
+            cached_buffer = std::make_unique<CachedCompressedReadBuffer>(full_path, file_in.get(), uncompressed_cache);
+            if (profile_callback_)
+                cached_buffer->setProfileCallback(profile_callback_, clock_type_);
+        }
+        else
+        {
+            non_cached_buffer = std::make_unique<CompressedReadBufferFromFile>(*file_in);
+            if (profile_callback_)
+                non_cached_buffer->setProfileCallback(profile_callback_, clock_type_);
+        }
 
-        if (profile_callback_)
-            buffer->setProfileCallback(profile_callback_, clock_type_);
-
-        cached_buffer = std::move(buffer);
-        data_buffer = cached_buffer.get();
-    }
-    else
-    {
-        auto buffer =
-            std::make_unique<CompressedReadBufferFromFile>(
-                data_part->volume->getDisk()->readFile(
-                    full_data_path, buffer_size, 0, settings.min_bytes_to_use_direct_io, settings.min_bytes_to_use_mmap_io));
-
-        if (profile_callback_)
-            buffer->setProfileCallback(profile_callback_, clock_type_);
-
-        non_cached_buffer = std::move(buffer);
-        data_buffer = non_cached_buffer.get();
+        column_streams[column.name] = ColumnStream{std::move(cached_buffer), std::move(non_cached_buffer)};
     }
 
     size_t columns_num = columns.size();
@@ -181,15 +172,16 @@ void MergeTreeReaderCompact::readData(
     const String & name, IColumn & column, const IDataType & type,
     size_t from_mark, size_t column_position, size_t rows_to_read, bool only_offsets)
 {
+    auto & stream = column_streams[name];
     if (!isContinuousReading(from_mark, column_position))
-        seekToMark(from_mark, column_position);
+        seekToMark(stream, from_mark, column_position);
 
     auto buffer_getter = [&](const IDataType::SubstreamPath & substream_path) -> ReadBuffer *
     {
         if (only_offsets && (substream_path.size() != 1 || substream_path[0].type != IDataType::Substream::ArraySizes))
             return nullptr;
 
-        return data_buffer;
+        return stream.data_buffer;
     };
 
     IDataType::DeserializeBinaryBulkSettings deserialize_settings;
@@ -209,15 +201,15 @@ void MergeTreeReaderCompact::readData(
 }
 
 
-void MergeTreeReaderCompact::seekToMark(size_t row_index, size_t column_index)
+void MergeTreeReaderCompact::seekToMark(ColumnStream & stream, size_t row_index, size_t column_index)
 {
     MarkInCompressedFile mark = marks_loader.getMark(row_index, column_index);
     try
     {
-        if (cached_buffer)
-            cached_buffer->seek(mark.offset_in_compressed_file, mark.offset_in_decompressed_block);
-        if (non_cached_buffer)
-            non_cached_buffer->seek(mark.offset_in_compressed_file, mark.offset_in_decompressed_block);
+        if (stream.cached_buffer)
+            stream.cached_buffer->seek(mark.offset_in_compressed_file, mark.offset_in_decompressed_block);
+        if (stream.non_cached_buffer)
+            stream.non_cached_buffer->seek(mark.offset_in_compressed_file, mark.offset_in_decompressed_block);
     }
     catch (Exception & e)
     {
@@ -239,4 +231,16 @@ bool MergeTreeReaderCompact::isContinuousReading(size_t mark, size_t column_posi
         || (mark == last_mark + 1 && column_position == 0 && last_column == data_part->getColumns().size() - 1);
 }
 
+MergeTreeReaderCompact::ColumnStream::ColumnStream(
+    std::unique_ptr<CachedCompressedReadBuffer> cached_buffer_,
+    std::unique_ptr<CompressedReadBufferFromFile> non_cached_buffer_)
+    : cached_buffer(std::move(cached_buffer_))
+    , non_cached_buffer(std::move(non_cached_buffer_))
+{
+    if (cached_buffer)
+        data_buffer = cached_buffer.get();
+    else
+        data_buffer = non_cached_buffer.get();
+}
+
 }
diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.h b/src/Storages/MergeTree/MergeTreeReaderCompact.h
index 0457b4b6a50..41682f8b0bd 100644
--- a/src/Storages/MergeTree/MergeTreeReaderCompact.h
+++ b/src/Storages/MergeTree/MergeTreeReaderCompact.h
@@ -2,6 +2,7 @@
 
 #include <Core/NamesAndTypes.h>
 #include <Storages/MergeTree/IMergeTreeReader.h>
+#include <IO/ReadBufferFromFileBase.h>
 
 
 namespace DB
@@ -35,9 +36,21 @@ public:
 private:
     bool isContinuousReading(size_t mark, size_t column_position);
 
-    ReadBuffer * data_buffer;
-    std::unique_ptr<CachedCompressedReadBuffer> cached_buffer;
-    std::unique_ptr<CompressedReadBufferFromFile> non_cached_buffer;
+    std::unique_ptr<ReadBufferFromFileBase> file_in;
+
+    struct ColumnStream
+    {
+        std::unique_ptr<CachedCompressedReadBuffer> cached_buffer;
+        std::unique_ptr<CompressedReadBufferFromFile> non_cached_buffer;
+        ReadBuffer * data_buffer;
+
+        ColumnStream() = default;
+        ColumnStream(
+            std::unique_ptr<CachedCompressedReadBuffer> cached_buffer_,
+            std::unique_ptr<CompressedReadBufferFromFile> non_cached_buffer_);
+    };
+
+    std::unordered_map<String, ColumnStream> column_streams;
 
     MergeTreeMarksLoader marks_loader;
 
@@ -49,7 +62,7 @@ private:
     size_t next_mark = 0;
     std::optional<std::pair<size_t, size_t>> last_read_granule;
 
-    void seekToMark(size_t row_index, size_t column_index);
+    void seekToMark(ColumnStream & stream, size_t row_index, size_t column_index);
 
     void readData(const String & name, IColumn & column, const IDataType & type,
         size_t from_mark, size_t column_position, size_t rows_to_read, bool only_offsets = false);

From fbec940e0fa7246ca7b42e056de7d0cea50640d0 Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Sat, 11 Jul 2020 02:33:36 +0300
Subject: [PATCH 007/298] fix reading and check query for compact parts with
 different codecs

---
 .../CachedCompressedReadBuffer.cpp            |  2 ++
 src/Compression/CachedCompressedReadBuffer.h  |  2 +-
 .../MergeTreeDataPartWriterCompact.cpp        |  4 ++--
 .../MergeTree/MergeTreeReaderCompact.cpp      |  5 +++--
 src/Storages/MergeTree/checkDataPart.cpp      | 22 ++++++++++++++-----
 .../01390_check_table_codec.reference         |  2 ++
 .../0_stateless/01390_check_table_codec.sql   | 15 +++++++++++++
 7 files changed, 41 insertions(+), 11 deletions(-)
 create mode 100644 tests/queries/0_stateless/01390_check_table_codec.reference
 create mode 100644 tests/queries/0_stateless/01390_check_table_codec.sql

diff --git a/src/Compression/CachedCompressedReadBuffer.cpp b/src/Compression/CachedCompressedReadBuffer.cpp
index beb13d15f01..218925f8eae 100644
--- a/src/Compression/CachedCompressedReadBuffer.cpp
+++ b/src/Compression/CachedCompressedReadBuffer.cpp
@@ -79,6 +79,8 @@ CachedCompressedReadBuffer::CachedCompressedReadBuffer(
 {
     if (file_in == nullptr)
         throw Exception("Neither file_in nor file_in_creator is initialized in CachedCompressedReadBuffer", ErrorCodes::LOGICAL_ERROR);
+
+    compressed_in = file_in;
 }
 
 CachedCompressedReadBuffer::CachedCompressedReadBuffer(
diff --git a/src/Compression/CachedCompressedReadBuffer.h b/src/Compression/CachedCompressedReadBuffer.h
index 2c5aa4920bd..89bf66a3e2c 100644
--- a/src/Compression/CachedCompressedReadBuffer.h
+++ b/src/Compression/CachedCompressedReadBuffer.h
@@ -23,7 +23,7 @@ private:
     std::function<std::unique_ptr<ReadBufferFromFileBase>()> file_in_creator;
     UncompressedCache * cache;
     std::unique_ptr<ReadBufferFromFileBase> file_in_holder;
-    ReadBufferFromFileBase * file_in;
+    ReadBufferFromFileBase * file_in = nullptr;
 
     const std::string path;
     size_t file_pos;
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
index ac697e1b212..d15bba232d6 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
@@ -17,7 +17,7 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact(
         default_codec_, settings_, index_granularity_)
     , plain_file(data_part->volume->getDisk()->writeFile(
             part_path + MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION,
-            settings.max_compress_block_size, 
+            settings.max_compress_block_size,
             WriteMode::Rewrite,
             settings.estimated_size,
             settings.aio_threshold))
@@ -31,7 +31,7 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact(
     const auto & storage_columns = metadata_snapshot->getColumns();
     for (const auto & column : columns_list)
         compressed_streams[column.name] = std::make_unique<CompressedStream>(
-            plain_hashing, storage_columns.getCodecOrDefault(column.name, default_codec)); 
+            plain_hashing, storage_columns.getCodecOrDefault(column.name, default_codec));
 }
 
 void MergeTreeDataPartWriterCompact::write(
diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
index 920f171d7f9..89ca8b96dba 100644
--- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
@@ -53,7 +53,7 @@ MergeTreeReaderCompact::MergeTreeReaderCompact(
     auto full_path = fullPath(data_part->volume->getDisk(), full_data_path);
     for (const auto & column : columns)
     {
-        
+
         std::unique_ptr<CachedCompressedReadBuffer> cached_buffer;
         std::unique_ptr<CompressedReadBufferFromFile> non_cached_buffer;
         if (uncompressed_cache)
@@ -69,7 +69,8 @@ MergeTreeReaderCompact::MergeTreeReaderCompact(
                 non_cached_buffer->setProfileCallback(profile_callback_, clock_type_);
         }
 
-        column_streams[column.name] = ColumnStream{std::move(cached_buffer), std::move(non_cached_buffer)};
+        auto column_from_part = getColumnFromPart(column);
+        column_streams[column_from_part.name] = ColumnStream{std::move(cached_buffer), std::move(non_cached_buffer)};
     }
 
     size_t columns_num = columns.size();
diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp
index 793bddc88c0..790a250d831 100644
--- a/src/Storages/MergeTree/checkDataPart.cpp
+++ b/src/Storages/MergeTree/checkDataPart.cpp
@@ -96,11 +96,24 @@ IMergeTreeDataPart::Checksums checkDataPart(
         };
     };
 
+    /// This function calculates only checksum of file content (compressed or uncompressed).
+    auto checksum_file = [](const DiskPtr & disk_, const String & file_path)
+    {
+        auto file_buf = disk_->readFile(file_path);
+        HashingReadBuffer hashing_buf(*file_buf);
+        hashing_buf.tryIgnore(std::numeric_limits<size_t>::max());
+        return IMergeTreeDataPart::Checksums::Checksum{hashing_buf.count(), hashing_buf.getHash()};
+    };
+
+    bool check_uncompressed = true;
     /// First calculate checksums for columns data
     if (part_type == MergeTreeDataPartType::COMPACT)
     {
         const auto & file_name = MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION;
-        checksums_data.files[file_name] = checksum_compressed_file(disk, path + file_name);
+        checksums_data.files[file_name] = checksum_file(disk, path + file_name);
+        /// Uncompressed checksums in compact parts are computed in a complex way.
+        /// We check only checksum of compressed file.
+        check_uncompressed = false;
     }
     else if (part_type == MergeTreeDataPartType::WIDE)
     {
@@ -141,10 +154,7 @@ IMergeTreeDataPart::Checksums checkDataPart(
             if (txt_checksum_it == checksum_files_txt.end() || txt_checksum_it->second.uncompressed_size == 0)
             {
                 /// The file is not compressed.
-                auto file_buf = disk->readFile(it->path());
-                HashingReadBuffer hashing_buf(*file_buf);
-                hashing_buf.tryIgnore(std::numeric_limits<size_t>::max());
-                checksums_data.files[file_name] = IMergeTreeDataPart::Checksums::Checksum(hashing_buf.count(), hashing_buf.getHash());
+                checksums_data.files[file_name] = checksum_file(disk, it->path());
             }
             else /// If we have both compressed and uncompressed in txt, than calculate them
             {
@@ -157,7 +167,7 @@ IMergeTreeDataPart::Checksums checkDataPart(
         return {};
 
     if (require_checksums || !checksums_txt.files.empty())
-        checksums_txt.checkEqual(checksums_data, true);
+        checksums_txt.checkEqual(checksums_data, check_uncompressed);
 
     return checksums_data;
 }
diff --git a/tests/queries/0_stateless/01390_check_table_codec.reference b/tests/queries/0_stateless/01390_check_table_codec.reference
new file mode 100644
index 00000000000..3025e6463d8
--- /dev/null
+++ b/tests/queries/0_stateless/01390_check_table_codec.reference
@@ -0,0 +1,2 @@
+all_1_1_0	1	
+all_1_1_0	1	
diff --git a/tests/queries/0_stateless/01390_check_table_codec.sql b/tests/queries/0_stateless/01390_check_table_codec.sql
new file mode 100644
index 00000000000..639d5bea6e4
--- /dev/null
+++ b/tests/queries/0_stateless/01390_check_table_codec.sql
@@ -0,0 +1,15 @@
+SET check_query_single_value_result = 0;
+
+DROP TABLE IF EXISTS check_codec;
+
+CREATE TABLE check_codec(a Int, b Int CODEC(Delta, ZSTD)) ENGINE = MergeTree ORDER BY a SETTINGS min_bytes_for_wide_part = 0;
+INSERT INTO check_codec SELECT number, number * 2 FROM numbers(1000);
+CHECK TABLE check_codec;
+
+DROP TABLE check_codec;
+
+CREATE TABLE check_codec(a Int, b Int CODEC(Delta, ZSTD)) ENGINE = MergeTree ORDER BY a SETTINGS min_bytes_for_wide_part = '10M';
+INSERT INTO check_codec SELECT number, number * 2 FROM numbers(1000);
+CHECK TABLE check_codec;
+
+DROP TABLE check_codec;

From 9384b6950b6c5311202788c8b38ed84dd53a13e8 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 14 Jul 2020 15:10:20 +0300
Subject: [PATCH 008/298] Add some tests configs

---
 tests/ci/build_config.json | 153 +++++++++++++++++++++++
 tests/ci/tests_config.json | 242 +++++++++++++++++++++++++++++++++++++
 2 files changed, 395 insertions(+)
 create mode 100644 tests/ci/build_config.json
 create mode 100644 tests/ci/tests_config.json

diff --git a/tests/ci/build_config.json b/tests/ci/build_config.json
new file mode 100644
index 00000000000..e4b9c1d6b75
--- /dev/null
+++ b/tests/ci/build_config.json
@@ -0,0 +1,153 @@
+[
+    {
+        "compiler": "gcc-9",
+        "build-type": "",
+        "sanitizer": "",
+        "package-type": "deb",
+        "bundled": "bundled",
+        "splitted": "unsplitted",
+        "alien_pkgs": true,
+        "tidy": "disable",
+        "with_coverage": false
+    },
+    {
+        "compiler": "gcc-9",
+        "build-type": "",
+        "sanitizer": "",
+        "package-type": "performance",
+        "bundled": "bundled",
+        "splitted": "unsplitted",
+        "tidy": "disable",
+        "with_coverage": false
+    },
+    {
+        "compiler": "gcc-9",
+        "build-type": "",
+        "sanitizer": "",
+        "package-type": "binary",
+        "bundled": "bundled",
+        "splitted": "unsplitted",
+        "tidy": "disable",
+        "with_coverage": false
+    },
+    {
+        "compiler": "clang-10",
+        "build-type": "",
+        "sanitizer": "address",
+        "package-type": "deb",
+        "bundled": "bundled",
+        "splitted": "unsplitted",
+        "tidy": "disable",
+        "with_coverage": false
+    },
+    {
+        "compiler": "clang-10",
+        "build-type": "",
+        "sanitizer": "undefined",
+        "package-type": "deb",
+        "bundled": "bundled",
+        "splitted": "unsplitted",
+        "tidy": "disable",
+        "with_coverage": false
+    },
+    {
+        "compiler": "clang-10",
+        "build-type": "",
+        "sanitizer": "thread",
+        "package-type": "deb",
+        "bundled": "bundled",
+        "splitted": "unsplitted",
+        "tidy": "disable",
+        "with_coverage": false
+    },
+    {
+        "compiler": "clang-10",
+        "build-type": "",
+        "sanitizer": "memory",
+        "package-type": "deb",
+        "bundled": "bundled",
+        "splitted": "unsplitted",
+        "tidy": "disable",
+        "with_coverage": false
+    },
+    {
+        "compiler": "clang-10",
+        "build-type": "",
+        "sanitizer": "",
+        "package-type": "deb",
+        "bundled": "bundled",
+        "splitted": "unsplitted",
+        "tidy": "disable",
+        "with_coverage": false
+    },
+    {
+        "compiler": "clang-10",
+        "build-type": "debug",
+        "sanitizer": "",
+        "package-type": "deb",
+        "bundled": "bundled",
+        "splitted": "unsplitted",
+        "tidy": "disable",
+        "with_coverage": false
+    },
+    {
+        "compiler": "gcc-9",
+        "build-type": "",
+        "sanitizer": "",
+        "package-type": "deb",
+        "bundled": "unbundled",
+        "splitted": "unsplitted",
+        "tidy": "disable",
+        "with_coverage": false
+    },
+    {
+        "compiler": "clang-10",
+        "build-type": "",
+        "sanitizer": "",
+        "package-type": "binary",
+        "bundled": "bundled",
+        "splitted": "unsplitted",
+        "tidy": "disable",
+        "with_coverage": false
+    },
+    {
+        "compiler": "clang-10",
+        "build-type": "",
+        "sanitizer": "",
+        "package-type": "binary",
+        "bundled": "bundled",
+        "splitted": "splitted",
+        "tidy": "disable",
+        "with_coverage": false
+    },
+    {
+        "compiler": "clang-10-darwin",
+        "build-type": "",
+        "sanitizer": "",
+        "package-type": "binary",
+        "bundled": "bundled",
+        "splitted": "unsplitted",
+        "tidy": "disable",
+        "with_coverage": false
+    },
+    {
+        "compiler": "clang-10-aarch64",
+        "build-type": "",
+        "sanitizer": "",
+        "package-type": "binary",
+        "bundled": "bundled",
+        "splitted": "unsplitted",
+        "tidy": "disable",
+        "with_coverage": false
+    },
+    {
+        "compiler": "clang-10-freebsd",
+        "build-type": "",
+        "sanitizer": "",
+        "package-type": "binary",
+        "bundled": "bundled",
+        "splitted": "unsplitted",
+        "tidy": "disable",
+        "with_coverage": false
+    }
+]
diff --git a/tests/ci/tests_config.json b/tests/ci/tests_config.json
new file mode 100644
index 00000000000..481de51d08b
--- /dev/null
+++ b/tests/ci/tests_config.json
@@ -0,0 +1,242 @@
+{
+    "Functional stateful tests (address)": {
+        "required_build_properties": {
+            "compiler": "clang-10",
+            "package_type": "deb",
+            "build_type": "relwithdebuginfo",
+            "sanitizer": "address",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "clang-tidy": "disable",
+            "with_coverage": false
+        }
+    },
+    "Functional stateful tests (thread)": {
+        "required_build_properties": {
+            "compiler": "clang-10",
+            "package_type": "deb",
+            "build_type": "relwithdebuginfo",
+            "sanitizer": "thread",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "clang-tidy": "disable",
+            "with_coverage": false
+        }
+    },
+    "Functional stateful tests (memory)": {
+        "required_build_properties": {
+            "compiler": "clang-10",
+            "package_type": "deb",
+            "build_type": "relwithdebuginfo",
+            "sanitizer": "memory",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "clang-tidy": "disable",
+            "with_coverage": false
+        }
+    },
+    "Functional stateful tests (ubsan)": {
+        "required_build_properties": {
+            "compiler": "clang-10",
+            "package_type": "deb",
+            "build_type": "relwithdebuginfo",
+            "sanitizer": "undefined",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "clang-tidy": "disable",
+            "with_coverage": false
+        }
+    },
+    "Functional stateful tests (debug)": {
+        "required_build_properties": {
+            "compiler": "clang-10",
+            "package_type": "deb",
+            "build_type": "debug",
+            "sanitizer": "none",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "clang-tidy": "disable",
+            "with_coverage": false
+        }
+    },
+    "Functional stateful tests (release)": {
+        "required_build_properties": {
+            "compiler": "gcc-9",
+            "package_type": "deb",
+            "build_type": "relwithdebuginfo",
+            "sanitizer": "none",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "clang-tidy": "disable",
+            "with_coverage": false
+        }
+    },
+    "Functional stateful tests (release, DatabaseAtomic)": {
+        "required_build_properties": {
+            "compiler": "gcc-9",
+            "package_type": "deb",
+            "build_type": "relwithdebuginfo",
+            "sanitizer": "none",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "clang-tidy": "disable",
+            "with_coverage": false
+        }
+    },
+    "Functional stateless tests (address)": {
+        "required_build_properties": {
+            "compiler": "clang-10",
+            "package_type": "deb",
+            "build_type": "relwithdebuginfo",
+            "sanitizer": "address",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "clang-tidy": "disable",
+            "with_coverage": false
+        }
+    },
+    "Functional stateless tests (thread)": {
+        "required_build_properties": {
+            "compiler": "clang-10",
+            "package_type": "deb",
+            "build_type": "relwithdebuginfo",
+            "sanitizer": "thread",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "clang-tidy": "disable",
+            "with_coverage": false
+        }
+    },
+    "Functional stateless tests (memory)": {
+        "required_build_properties": {
+            "compiler": "clang-10",
+            "package_type": "deb",
+            "build_type": "relwithdebuginfo",
+            "sanitizer": "memory",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "clang-tidy": "disable",
+            "with_coverage": false
+        }
+    },
+    "Functional stateless tests (ubsan)": {
+        "required_build_properties": {
+            "compiler": "clang-10",
+            "package_type": "deb",
+            "build_type": "relwithdebuginfo",
+            "sanitizer": "undefined",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "clang-tidy": "disable",
+            "with_coverage": false
+        }
+    },
+    "Functional stateless tests (debug)": {
+        "required_build_properties": {
+            "compiler": "clang-10",
+            "package_type": "deb",
+            "build_type": "debug",
+            "sanitizer": "none",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "clang-tidy": "disable",
+            "with_coverage": false
+        }
+    },
+    "Functional stateless tests (release)": {
+        "required_build_properties": {
+            "compiler": "gcc-9",
+            "package_type": "deb",
+            "build_type": "relwithdebuginfo",
+            "sanitizer": "none",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "clang-tidy": "disable",
+            "with_coverage": false
+        }
+    },
+    "Functional stateless tests (unbundled)": {
+        "required_build_properties": {
+            "compiler": "gcc-9",
+            "package_type": "deb",
+            "build_type": "relwithdebuginfo",
+            "sanitizer": "none",
+            "bundled": "unbundled",
+            "splitted": "unsplitted",
+            "clang-tidy": "disable",
+            "with_coverage": false
+        }
+    },
+    "Functional stateless tests (release, polymorphic parts enabled)": {
+        "required_build_properties": {
+            "compiler": "gcc-9",
+            "package_type": "deb",
+            "build_type": "relwithdebuginfo",
+            "sanitizer": "none",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "clang-tidy": "disable",
+            "with_coverage": false
+        }
+    },
+    "Functional stateless tests (release, DatabaseAtomic)": {
+        "required_build_properties": {
+            "compiler": "gcc-9",
+            "package_type": "deb",
+            "build_type": "relwithdebuginfo",
+            "sanitizer": "none",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "clang-tidy": "disable",
+            "with_coverage": false
+        }
+    },
+    "Stress test (address)": {
+        "required_build_properties": {
+            "compiler": "clang-10",
+            "package_type": "deb",
+            "build_type": "relwithdebuginfo",
+            "sanitizer": "address",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "clang-tidy": "disable",
+            "with_coverage": false
+        }
+    },
+    "Stress test (thread)": {
+        "required_build_properties": {
+            "compiler": "clang-10",
+            "package_type": "deb",
+            "build_type": "relwithdebuginfo",
+            "sanitizer": "thread",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "clang-tidy": "disable",
+            "with_coverage": false
+        }
+    },
+    "Stress test (undefined)": {
+        "required_build_properties": {
+            "compiler": "clang-10",
+            "package_type": "deb",
+            "build_type": "relwithdebuginfo",
+            "sanitizer": "undefined",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "clang-tidy": "disable",
+            "with_coverage": false
+        }
+    },
+    "Stress test (memory)": {
+        "required_build_properties": {
+            "compiler": "clang-10",
+            "package_type": "deb",
+            "build_type": "relwithdebuginfo",
+            "sanitizer": "memory",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "clang-tidy": "disable",
+            "with_coverage": false
+        }
+    }
+}

From 230938d3a3082fbf241c9d873571231a69a5f450 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Sat, 11 Jul 2020 15:12:42 +0800
Subject: [PATCH 009/298] Refactor joinGet and implement multi-key lookup.

---
 src/Functions/FunctionJoinGet.cpp             | 83 +++++++++----------
 src/Functions/FunctionJoinGet.h               | 11 +--
 src/Interpreters/HashJoin.cpp                 | 69 ++++++++-------
 src/Interpreters/HashJoin.h                   | 10 +--
 src/Interpreters/misc.h                       |  2 +-
 .../0_stateless/01080_join_get_null.reference |  2 +-
 .../0_stateless/01080_join_get_null.sql       | 12 +--
 .../01400_join_get_with_multi_keys.reference  |  1 +
 .../01400_join_get_with_multi_keys.sql        |  9 ++
 9 files changed, 104 insertions(+), 95 deletions(-)
 create mode 100644 tests/queries/0_stateless/01400_join_get_with_multi_keys.reference
 create mode 100644 tests/queries/0_stateless/01400_join_get_with_multi_keys.sql

diff --git a/src/Functions/FunctionJoinGet.cpp b/src/Functions/FunctionJoinGet.cpp
index a33b70684a5..1badc689c6a 100644
--- a/src/Functions/FunctionJoinGet.cpp
+++ b/src/Functions/FunctionJoinGet.cpp
@@ -1,10 +1,10 @@
 #include <Functions/FunctionJoinGet.h>
 
+#include <Columns/ColumnString.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionHelpers.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/HashJoin.h>
-#include <Columns/ColumnString.h>
 #include <Storages/StorageJoin.h>
 
 
@@ -16,19 +16,35 @@ namespace ErrorCodes
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
+template <bool or_null>
+void ExecutableFunctionJoinGet<or_null>::execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t)
+{
+    Block keys;
+    for (size_t i = 2; i < arguments.size(); ++i)
+    {
+        auto key = block.getByPosition(arguments[i]);
+        keys.insert(std::move(key));
+    }
+    block.getByPosition(result) = join->joinGet(keys, result_block);
+}
+
+template <bool or_null>
+ExecutableFunctionImplPtr FunctionJoinGet<or_null>::prepare(const Block &, const ColumnNumbers &, size_t) const
+{
+    return std::make_unique<ExecutableFunctionJoinGet<or_null>>(join, Block{{return_type->createColumn(), return_type, attr_name}});
+}
+
 static auto getJoin(const ColumnsWithTypeAndName & arguments, const Context & context)
 {
-    if (arguments.size() != 3)
-        throw Exception{"Function joinGet takes 3 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
-
     String join_name;
     if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[0].column.get()))
     {
         join_name = name_col->getValue<String>();
     }
     else
-        throw Exception{"Illegal type " + arguments[0].type->getName() + " of first argument of function joinGet, expected a const string.",
-                        ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
+        throw Exception(
+            "Illegal type " + arguments[0].type->getName() + " of first argument of function joinGet, expected a const string.",
+            ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
     size_t dot = join_name.find('.');
     String database_name;
@@ -43,10 +59,12 @@ static auto getJoin(const ColumnsWithTypeAndName & arguments, const Context & co
         ++dot;
     }
     String table_name = join_name.substr(dot);
+    if (table_name.empty())
+        throw Exception("joinGet does not allow empty table name", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
     auto table = DatabaseCatalog::instance().getTable({database_name, table_name}, context);
     auto storage_join = std::dynamic_pointer_cast<StorageJoin>(table);
     if (!storage_join)
-        throw Exception{"Table " + join_name + " should have engine StorageJoin", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
+        throw Exception("Table " + join_name + " should have engine StorageJoin", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
     String attr_name;
     if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get()))
@@ -54,57 +72,30 @@ static auto getJoin(const ColumnsWithTypeAndName & arguments, const Context & co
         attr_name = name_col->getValue<String>();
     }
     else
-        throw Exception{"Illegal type " + arguments[1].type->getName()
-                            + " of second argument of function joinGet, expected a const string.",
-                        ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
+        throw Exception(
+            "Illegal type " + arguments[1].type->getName() + " of second argument of function joinGet, expected a const string.",
+            ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
     return std::make_pair(storage_join, attr_name);
 }
 
 template <bool or_null>
 FunctionBaseImplPtr JoinGetOverloadResolver<or_null>::build(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const
 {
+    if (arguments.size() < 3)
+        throw Exception(
+            "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size())
+                + ", should be greater or equal to 3",
+            ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
     auto [storage_join, attr_name] = getJoin(arguments, context);
     auto join = storage_join->getJoin();
-    DataTypes data_types(arguments.size());
-
+    DataTypes data_types(arguments.size() - 2);
+    for (size_t i = 2; i < arguments.size(); ++i)
+        data_types[i - 2] = arguments[i].type;
+    auto return_type = join->joinGetCheckAndGetReturnType(data_types, attr_name, or_null);
     auto table_lock = storage_join->lockForShare(context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout);
-    for (size_t i = 0; i < arguments.size(); ++i)
-        data_types[i] = arguments[i].type;
-
-    auto return_type = join->joinGetReturnType(attr_name, or_null);
     return std::make_unique<FunctionJoinGet<or_null>>(table_lock, storage_join, join, attr_name, data_types, return_type);
 }
 
-template <bool or_null>
-DataTypePtr JoinGetOverloadResolver<or_null>::getReturnType(const ColumnsWithTypeAndName & arguments) const
-{
-    auto [storage_join, attr_name] = getJoin(arguments, context);
-    auto join = storage_join->getJoin();
-    return join->joinGetReturnType(attr_name, or_null);
-}
-
-
-template <bool or_null>
-void ExecutableFunctionJoinGet<or_null>::execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count)
-{
-    auto ctn = block.getByPosition(arguments[2]);
-    if (isColumnConst(*ctn.column))
-        ctn.column = ctn.column->cloneResized(1);
-    ctn.name = ""; // make sure the key name never collide with the join columns
-    Block key_block = {ctn};
-    join->joinGet(key_block, attr_name, or_null);
-    auto & result_ctn = key_block.getByPosition(1);
-    if (isColumnConst(*ctn.column))
-        result_ctn.column = ColumnConst::create(result_ctn.column, input_rows_count);
-    block.getByPosition(result) = result_ctn;
-}
-
-template <bool or_null>
-ExecutableFunctionImplPtr FunctionJoinGet<or_null>::prepare(const Block &, const ColumnNumbers &, size_t) const
-{
-    return std::make_unique<ExecutableFunctionJoinGet<or_null>>(join, attr_name);
-}
-
 void registerFunctionJoinGet(FunctionFactory & factory)
 {
     // joinGet
diff --git a/src/Functions/FunctionJoinGet.h b/src/Functions/FunctionJoinGet.h
index a82da589960..6b3b1202f60 100644
--- a/src/Functions/FunctionJoinGet.h
+++ b/src/Functions/FunctionJoinGet.h
@@ -13,14 +13,14 @@ template <bool or_null>
 class ExecutableFunctionJoinGet final : public IExecutableFunctionImpl
 {
 public:
-    ExecutableFunctionJoinGet(HashJoinPtr join_, String attr_name_)
-        : join(std::move(join_)), attr_name(std::move(attr_name_)) {}
+    ExecutableFunctionJoinGet(HashJoinPtr join_, const Block & result_block_)
+        : join(std::move(join_)), result_block(result_block_) {}
 
     static constexpr auto name = or_null ? "joinGetOrNull" : "joinGet";
 
     bool useDefaultImplementationForNulls() const override { return false; }
-    bool useDefaultImplementationForConstants() const override { return true; }
     bool useDefaultImplementationForLowCardinalityColumns() const override { return true; }
+    bool useDefaultImplementationForConstants() const override { return true; }
 
     void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override;
 
@@ -28,7 +28,7 @@ public:
 
 private:
     HashJoinPtr join;
-    const String attr_name;
+    Block result_block;
 };
 
 template <bool or_null>
@@ -77,13 +77,14 @@ public:
     String getName() const override { return name; }
 
     FunctionBaseImplPtr build(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const override;
-    DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments) const override;
+    DataTypePtr getReturnType(const ColumnsWithTypeAndName &) const override { return {}; } // Not used
 
     bool useDefaultImplementationForNulls() const override { return false; }
     bool useDefaultImplementationForLowCardinalityColumns() const override { return true; }
 
     bool isVariadic() const override { return true; }
     size_t getNumberOfArguments() const override { return 0; }
+    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0, 1}; }
 
 private:
     const Context & context;
diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp
index 27294a57675..ffc806b9e88 100644
--- a/src/Interpreters/HashJoin.cpp
+++ b/src/Interpreters/HashJoin.cpp
@@ -42,6 +42,7 @@ namespace ErrorCodes
     extern const int SYNTAX_ERROR;
     extern const int SET_SIZE_LIMIT_EXCEEDED;
     extern const int TYPE_MISMATCH;
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
 namespace
@@ -1109,27 +1110,34 @@ void HashJoin::joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed)
     block = block.cloneWithColumns(std::move(dst_columns));
 }
 
-static void checkTypeOfKey(const Block & block_left, const Block & block_right)
-{
-    const auto & [c1, left_type_origin, left_name] = block_left.safeGetByPosition(0);
-    const auto & [c2, right_type_origin, right_name] = block_right.safeGetByPosition(0);
-    auto left_type = removeNullable(left_type_origin);
-    auto right_type = removeNullable(right_type_origin);
 
-    if (!left_type->equals(*right_type))
-        throw Exception("Type mismatch of columns to joinGet by: "
-            + left_name + " " + left_type->getName() + " at left, "
-            + right_name + " " + right_type->getName() + " at right",
-            ErrorCodes::TYPE_MISMATCH);
-}
-
-
-DataTypePtr HashJoin::joinGetReturnType(const String & column_name, bool or_null) const
+DataTypePtr HashJoin::joinGetCheckAndGetReturnType(const DataTypes & data_types, const String & column_name, bool or_null) const
 {
     std::shared_lock lock(data->rwlock);
 
+    size_t num_keys = data_types.size();
+    if (right_table_keys.columns() != num_keys)
+        throw Exception(
+            "Number of arguments for function joinGet" + toString(or_null ? "OrNull" : "")
+                + " doesn't match: passed, should be equal to " + toString(num_keys),
+            ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+
+    for (size_t i = 0; i < num_keys; ++i)
+    {
+        const auto & left_type_origin = data_types[i];
+        const auto & [c2, right_type_origin, right_name] = right_table_keys.safeGetByPosition(i);
+        auto left_type = removeNullable(left_type_origin);
+        auto right_type = removeNullable(right_type_origin);
+        if (!left_type->equals(*right_type))
+            throw Exception(
+                "Type mismatch in joinGet key " + toString(i) + ": found type " + left_type->getName() + ", while the needed type is "
+                    + right_type->getName(),
+                ErrorCodes::TYPE_MISMATCH);
+    }
+
     if (!sample_block_with_columns_to_add.has(column_name))
         throw Exception("StorageJoin doesn't contain column " + column_name, ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
+
     auto elem = sample_block_with_columns_to_add.getByName(column_name);
     if (or_null)
         elem.type = makeNullable(elem.type);
@@ -1138,34 +1146,33 @@ DataTypePtr HashJoin::joinGetReturnType(const String & column_name, bool or_null
 
 
 template <typename Maps>
-void HashJoin::joinGetImpl(Block & block, const Block & block_with_columns_to_add, const Maps & maps_) const
+ColumnWithTypeAndName HashJoin::joinGetImpl(const Block & block, const Block & block_with_columns_to_add, const Maps & maps_) const
 {
-    joinBlockImpl<ASTTableJoin::Kind::Left, ASTTableJoin::Strictness::RightAny>(
-        block, {block.getByPosition(0).name}, block_with_columns_to_add, maps_);
+    // Assemble the key block with correct names.
+    Block keys;
+    for (size_t i = 0; i < block.columns(); ++i)
+    {
+        auto key = block.getByPosition(i);
+        key.name = key_names_right[i];
+        keys.insert(std::move(key));
+    }
+
+    joinBlockImpl<ASTTableJoin::Kind::Left, ASTTableJoin::Strictness::Any>(
+        keys, key_names_right, block_with_columns_to_add, maps_);
+    return keys.getByPosition(keys.columns() - 1);
 }
 
 
-// TODO: support composite key
 // TODO: return multiple columns as named tuple
 // TODO: return array of values when strictness == ASTTableJoin::Strictness::All
-void HashJoin::joinGet(Block & block, const String & column_name, bool or_null) const
+ColumnWithTypeAndName HashJoin::joinGet(const Block & block, const Block & block_with_columns_to_add) const
 {
     std::shared_lock lock(data->rwlock);
 
-    if (key_names_right.size() != 1)
-        throw Exception("joinGet only supports StorageJoin containing exactly one key", ErrorCodes::UNSUPPORTED_JOIN_KEYS);
-
-    checkTypeOfKey(block, right_table_keys);
-
-    auto elem = sample_block_with_columns_to_add.getByName(column_name);
-    if (or_null)
-        elem.type = makeNullable(elem.type);
-    elem.column = elem.type->createColumn();
-
     if ((strictness == ASTTableJoin::Strictness::Any || strictness == ASTTableJoin::Strictness::RightAny) &&
         kind == ASTTableJoin::Kind::Left)
     {
-        joinGetImpl(block, {elem}, std::get<MapsOne>(data->maps));
+        return joinGetImpl(block, block_with_columns_to_add, std::get<MapsOne>(data->maps));
     }
     else
         throw Exception("joinGet only supports StorageJoin of type Left Any", ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN);
diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h
index 67d83d27a6d..025f41ac28f 100644
--- a/src/Interpreters/HashJoin.h
+++ b/src/Interpreters/HashJoin.h
@@ -162,11 +162,11 @@ public:
       */
     void joinBlock(Block & block, ExtraBlockPtr & not_processed) override;
 
-    /// Infer the return type for joinGet function
-    DataTypePtr joinGetReturnType(const String & column_name, bool or_null) const;
+    /// Check joinGet arguments and infer the return type.
+    DataTypePtr joinGetCheckAndGetReturnType(const DataTypes & data_types, const String & column_name, bool or_null) const;
 
-    /// Used by joinGet function that turns StorageJoin into a dictionary
-    void joinGet(Block & block, const String & column_name, bool or_null) const;
+    /// Used by joinGet function that turns StorageJoin into a dictionary.
+    ColumnWithTypeAndName joinGet(const Block & block, const Block & block_with_columns_to_add) const;
 
     /** Keep "totals" (separate part of dataset, see WITH TOTALS) to use later.
       */
@@ -383,7 +383,7 @@ private:
     void joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed) const;
 
     template <typename Maps>
-    void joinGetImpl(Block & block, const Block & block_with_columns_to_add, const Maps & maps_) const;
+    ColumnWithTypeAndName joinGetImpl(const Block & block, const Block & block_with_columns_to_add, const Maps & maps_) const;
 
     static Type chooseMethod(const ColumnRawPtrs & key_columns, Sizes & key_sizes);
 };
diff --git a/src/Interpreters/misc.h b/src/Interpreters/misc.h
index 094dfbbbb81..cae2691ca1f 100644
--- a/src/Interpreters/misc.h
+++ b/src/Interpreters/misc.h
@@ -28,7 +28,7 @@ inline bool functionIsLikeOperator(const std::string & name)
 
 inline bool functionIsJoinGet(const std::string & name)
 {
-    return name == "joinGet" || startsWith(name, "dictGet");
+    return startsWith(name, "joinGet");
 }
 
 inline bool functionIsDictGet(const std::string & name)
diff --git a/tests/queries/0_stateless/01080_join_get_null.reference b/tests/queries/0_stateless/01080_join_get_null.reference
index bfde072a796..0cfbf08886f 100644
--- a/tests/queries/0_stateless/01080_join_get_null.reference
+++ b/tests/queries/0_stateless/01080_join_get_null.reference
@@ -1 +1 @@
-2	2
+2
diff --git a/tests/queries/0_stateless/01080_join_get_null.sql b/tests/queries/0_stateless/01080_join_get_null.sql
index 71e7ddf8e75..9f782452d34 100644
--- a/tests/queries/0_stateless/01080_join_get_null.sql
+++ b/tests/queries/0_stateless/01080_join_get_null.sql
@@ -1,12 +1,12 @@
 DROP TABLE IF EXISTS test_joinGet;
-DROP TABLE IF EXISTS test_join_joinGet;
 
-CREATE TABLE test_joinGet(id Int32, user_id Nullable(Int32)) Engine = Memory();
-CREATE TABLE test_join_joinGet(user_id Int32, name String) Engine = Join(ANY, LEFT, user_id);
+CREATE TABLE test_joinGet(user_id Nullable(Int32), name String) Engine = Join(ANY, LEFT, user_id);
 
-INSERT INTO test_join_joinGet VALUES (2, 'a'), (6, 'b'), (10, 'c');
+INSERT INTO test_joinGet VALUES (2, 'a'), (6, 'b'), (10, 'c'), (null, 'd');
 
-SELECT 2 id, toNullable(toInt32(2)) user_id WHERE joinGet(test_join_joinGet, 'name', user_id) != '';
+SELECT toNullable(toInt32(2)) user_id WHERE joinGet(test_joinGet, 'name', user_id) != '';
+
+-- If the JOIN keys are Nullable fields, the rows where at least one of the keys has the value NULL are not joined.
+SELECT cast(null AS Nullable(Int32)) user_id WHERE joinGet(test_joinGet, 'name', user_id) != '';
 
 DROP TABLE test_joinGet;
-DROP TABLE test_join_joinGet;
diff --git a/tests/queries/0_stateless/01400_join_get_with_multi_keys.reference b/tests/queries/0_stateless/01400_join_get_with_multi_keys.reference
new file mode 100644
index 00000000000..49d59571fbf
--- /dev/null
+++ b/tests/queries/0_stateless/01400_join_get_with_multi_keys.reference
@@ -0,0 +1 @@
+0.1
diff --git a/tests/queries/0_stateless/01400_join_get_with_multi_keys.sql b/tests/queries/0_stateless/01400_join_get_with_multi_keys.sql
new file mode 100644
index 00000000000..73068270762
--- /dev/null
+++ b/tests/queries/0_stateless/01400_join_get_with_multi_keys.sql
@@ -0,0 +1,9 @@
+DROP TABLE IF EXISTS test_joinGet;
+
+CREATE TABLE test_joinGet(a String, b String, c Float64) ENGINE = Join(any, left, a, b);
+
+INSERT INTO test_joinGet VALUES ('ab', '1', 0.1), ('ab', '2', 0.2), ('cd', '3', 0.3);
+
+SELECT joinGet(test_joinGet, 'c', 'ab', '1');
+
+DROP TABLE test_joinGet;

From 40504f6a6e9b54bdcdb0c63a5724648bf5bc04f5 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Fri, 31 Jul 2020 17:57:00 +0300
Subject: [PATCH 010/298] Simpler version of #12999 w/o `pos` changes

---
 programs/client/Client.cpp                    | 126 +++++++++++++-----
 ...06_insert_values_and_expressions.reference |   2 +
 .../00306_insert_values_and_expressions.sql   |   9 ++
 3 files changed, 103 insertions(+), 34 deletions(-)

diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index 797342a1b44..78a6d7fe2d9 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -908,74 +908,127 @@ private:
         return processMultiQuery(text);
     }
 
-    bool processMultiQuery(const String & text)
+    bool processMultiQuery(const String & all_queries_text)
     {
         const bool test_mode = config().has("testmode");
 
         {   /// disable logs if expects errors
-            TestHint test_hint(test_mode, text);
+            TestHint test_hint(test_mode, all_queries_text);
             if (test_hint.clientError() || test_hint.serverError())
                 processTextAsSingleQuery("SET send_logs_level = 'none'");
         }
 
         /// Several queries separated by ';'.
         /// INSERT data is ended by the end of line, not ';'.
+        /// An exception is VALUES format where we also support semicolon in
+        /// addition to end of line.
 
-        const char * begin = text.data();
-        const char * end = begin + text.size();
+        const char * this_query_begin = all_queries_text.data();
+        const char * all_queries_end = all_queries_text.data() + all_queries_text.size();
 
-        while (begin < end)
+        while (this_query_begin < all_queries_end)
         {
-            const char * pos = begin;
-            ASTPtr orig_ast = parseQuery(pos, end, true);
+            // Use the token iterator to skip any whitespace, semicolons and
+            // comments at the beginning of the query. An example from regression
+            // tests:
+            //      insert into table t values ('invalid'); -- { serverError 469 }
+            //      select 1
+            // Here the test hint comment gets parsed as a part of second query.
+            // We parse the `INSERT VALUES` up to the semicolon, and the rest
+            // looks like a two-line query:
+            //      -- { serverError 469 }
+            //      select 1
+            // and we expect it to fail with error 469, but this hint is actually
+            // for the previous query. Test hints should go after the query, so
+            // we can fix this by skipping leading comments. Token iterator skips
+            // comments and whitespace by itself, so we only have to check for
+            // semicolons.
+            // The code block is to limit visibility of `tokens` because we have
+            // another such variable further down the code, and get warnings for
+            // that.
+            {
+                Tokens tokens(this_query_begin, all_queries_end);
+                IParser::Pos token_iterator(tokens,
+                    context.getSettingsRef().max_parser_depth);
+                while (token_iterator->type == TokenType::Semicolon
+                        && token_iterator.isValid())
+                {
+                    ++token_iterator;
+                }
+                this_query_begin = token_iterator->begin;
+                if (this_query_begin >= all_queries_end)
+                {
+                    break;
+                }
+            }
 
-            if (!orig_ast)
+            // Try to parse the query.
+            const char * this_query_end = this_query_begin;
+            parsed_query = parseQuery(this_query_end, all_queries_end, true);
+
+            if (!parsed_query)
             {
                 if (ignore_error)
                 {
-                    Tokens tokens(begin, end);
+                    Tokens tokens(this_query_begin, all_queries_end);
                     IParser::Pos token_iterator(tokens, context.getSettingsRef().max_parser_depth);
                     while (token_iterator->type != TokenType::Semicolon && token_iterator.isValid())
                         ++token_iterator;
-                    begin = token_iterator->end;
+                    this_query_begin = token_iterator->end;
 
                     continue;
                 }
                 return true;
             }
 
-            auto * insert = orig_ast->as<ASTInsertQuery>();
-
-            if (insert && insert->data)
+            // INSERT queries may have the inserted data in the query text
+            // that follow the query itself, e.g. "insert into t format CSV 1;2".
+            // They need special handling. First of all, here we find where the
+            // inserted data ends. In multy-query mode, it is delimited by a
+            // newline.
+            // The VALUES format needs even more handling -- we also allow the
+            // data to be delimited by semicolon. This case is handled later by
+            // the format parser itself.
+            auto * insert_ast = parsed_query->as<ASTInsertQuery>();
+            if (insert_ast && insert_ast->data)
             {
-                pos = find_first_symbols<'\n'>(insert->data, end);
-                insert->end = pos;
+                this_query_end = find_first_symbols<'\n'>(insert_ast->data, all_queries_end);
+                insert_ast->end = this_query_end;
+                query_to_send = all_queries_text.substr(
+                    this_query_begin - all_queries_text.data(),
+                    insert_ast->data - this_query_begin);
+            }
+            else
+            {
+                query_to_send = all_queries_text.substr(
+                    this_query_begin - all_queries_text.data(),
+                    this_query_end - this_query_begin);
             }
 
-            String str = text.substr(begin - text.data(), pos - begin);
+            // full_query is the query + inline INSERT data.
+            full_query = all_queries_text.substr(
+                this_query_begin - all_queries_text.data(),
+                this_query_end - this_query_begin);
 
-            begin = pos;
-            while (isWhitespaceASCII(*begin) || *begin == ';')
-                ++begin;
-
-            TestHint test_hint(test_mode, str);
+            // Look for the hint in the text of query + insert data, if any.
+            // e.g. insert into t format CSV 'a' -- { serverError 123 }.
+            TestHint test_hint(test_mode, full_query);
             expected_client_error = test_hint.clientError();
             expected_server_error = test_hint.serverError();
 
             try
             {
-                auto ast_to_process = orig_ast;
-                if (insert && insert->data)
+                processParsedSingleQuery();
+
+                if (insert_ast && insert_ast->data)
                 {
-                    ast_to_process = nullptr;
-                    processTextAsSingleQuery(str);
-                }
-                else
-                {
-                    parsed_query = ast_to_process;
-                    full_query = str;
-                    query_to_send = str;
-                    processParsedSingleQuery();
+                    // For VALUES format: use the end of inline data as reported
+                    // by the format parser (it is saved in sendData()). This
+                    // allows us to handle queries like:
+                    //   insert into t values (1); select 1
+                    //, where the inline data is delimited by semicolon and not
+                    // by a newline.
+                    this_query_end = parsed_query->as<ASTInsertQuery>()->end;
                 }
             }
             catch (...)
@@ -983,7 +1036,7 @@ private:
                 last_exception_received_from_server = std::make_unique<Exception>(getCurrentExceptionMessage(true), getCurrentExceptionCode());
                 actual_client_error = last_exception_received_from_server->code();
                 if (!ignore_error && (!actual_client_error || actual_client_error != expected_client_error))
-                    std::cerr << "Error on processing query: " << str << std::endl << last_exception_received_from_server->message();
+                    std::cerr << "Error on processing query: " << full_query << std::endl << last_exception_received_from_server->message();
                 received_exception_from_server = true;
             }
 
@@ -997,6 +1050,8 @@ private:
                 else
                     return false;
             }
+
+            this_query_begin = this_query_end;
         }
 
         return true;
@@ -1407,7 +1462,7 @@ private:
     void sendData(Block & sample, const ColumnsDescription & columns_description)
     {
         /// If INSERT data must be sent.
-        const auto * parsed_insert_query = parsed_query->as<ASTInsertQuery>();
+        auto * parsed_insert_query = parsed_query->as<ASTInsertQuery>();
         if (!parsed_insert_query)
             return;
 
@@ -1416,6 +1471,9 @@ private:
             /// Send data contained in the query.
             ReadBufferFromMemory data_in(parsed_insert_query->data, parsed_insert_query->end - parsed_insert_query->data);
             sendDataFrom(data_in, sample, columns_description);
+            // Remember where the data ended. We use this info later to determine
+            // where the next query begins.
+            parsed_insert_query->end = data_in.buffer().begin() + data_in.count();
         }
         else if (!is_interactive)
         {
diff --git a/tests/queries/0_stateless/00306_insert_values_and_expressions.reference b/tests/queries/0_stateless/00306_insert_values_and_expressions.reference
index 960773dc489..e80a28accf4 100644
--- a/tests/queries/0_stateless/00306_insert_values_and_expressions.reference
+++ b/tests/queries/0_stateless/00306_insert_values_and_expressions.reference
@@ -2,3 +2,5 @@
 2	Hello, world	00000000-0000-0000-0000-000000000000	2016-01-02	2016-01-02 03:04:00	[0,1]
 3	hello, world!	ab41bdd6-5cd4-11e7-907b-a6006ad3dba0	2016-01-03	2016-01-02 03:00:00	[]
 4	World	ab41bdd6-5cd4-11e7-907b-a6006ad3dba0	2016-01-04	2016-12-11 10:09:08	[3,2,1]
+11111
+1
diff --git a/tests/queries/0_stateless/00306_insert_values_and_expressions.sql b/tests/queries/0_stateless/00306_insert_values_and_expressions.sql
index a57e9e69fe6..10a1415f287 100644
--- a/tests/queries/0_stateless/00306_insert_values_and_expressions.sql
+++ b/tests/queries/0_stateless/00306_insert_values_and_expressions.sql
@@ -5,3 +5,12 @@ INSERT INTO insert VALUES (1, 'Hello', 'ab41bdd6-5cd4-11e7-907b-a6006ad3dba0', '
 
 SELECT * FROM insert ORDER BY i;
 DROP TABLE insert;
+
+-- Test the case where the VALUES are delimited by semicolon and a query follows
+-- w/o newline. With most formats the query in the same line would be ignored or
+-- lead to an error, but VALUES are an exception and support semicolon delimiter,
+-- in addition to the newline.
+create table if not exists t_306 (a int) engine Memory;
+insert into t_306 values (1); select 11111;
+select * from t_306;
+drop table if exists t_306;

From 405a6fb08fa22a9e063dd5e48e7ee6060f718749 Mon Sep 17 00:00:00 2001
From: hexiaoting <“hewenting_ict@163.com”>
Date: Mon, 17 Aug 2020 18:20:23 +0800
Subject: [PATCH 011/298] New feature: LineAsString format. #13630

---
 src/Formats/FormatFactory.cpp                 |   1 +
 src/Formats/FormatFactory.h                   |   1 +
 .../Impl/LineAsStringRowInputFormat.cpp       | 101 ++++++++++++++++++
 .../Formats/Impl/LineAsStringRowInputFormat.h |  31 ++++++
 src/Processors/ya.make                        |   1 +
 5 files changed, 135 insertions(+)
 create mode 100644 src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp
 create mode 100644 src/Processors/Formats/Impl/LineAsStringRowInputFormat.h

diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 5256ab2b321..f996e3d8cf2 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -365,6 +365,7 @@ FormatFactory::FormatFactory()
     registerInputFormatProcessorMsgPack(*this);
     registerOutputFormatProcessorMsgPack(*this);
     registerInputFormatProcessorJSONAsString(*this);
+    registerInputFormatProcessorLineAsString(*this);
 
     registerFileSegmentationEngineTabSeparated(*this);
     registerFileSegmentationEngineCSV(*this);
diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h
index ea4004c191f..610cf8105b8 100644
--- a/src/Formats/FormatFactory.h
+++ b/src/Formats/FormatFactory.h
@@ -210,5 +210,6 @@ void registerOutputFormatProcessorPostgreSQLWire(FormatFactory & factory);
 void registerInputFormatProcessorCapnProto(FormatFactory & factory);
 void registerInputFormatProcessorRegexp(FormatFactory & factory);
 void registerInputFormatProcessorJSONAsString(FormatFactory & factory);
+void registerInputFormatProcessorLineAsString(FormatFactory & factory);
 
 }
diff --git a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp
new file mode 100644
index 00000000000..a28b3903724
--- /dev/null
+++ b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp
@@ -0,0 +1,101 @@
+#include <Processors/Formats/Impl/LineAsStringRowInputFormat.h>
+#include <Formats/JSONEachRowUtils.h>
+#include <common/find_symbols.h>
+#include <IO/ReadHelpers.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+    extern const int INCORRECT_DATA;
+}
+
+LineAsStringRowInputFormat::LineAsStringRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_) :
+    IRowInputFormat(header_, in_, std::move(params_)), buf(in)
+{
+    if (header_.columns() > 1 || header_.getDataTypes()[0]->getTypeId() != TypeIndex::String)
+    {
+        throw Exception("This input format is only suitable for tables with a single column of type String.", ErrorCodes::LOGICAL_ERROR);
+    }
+}
+
+void LineAsStringRowInputFormat::resetParser()
+{
+    IRowInputFormat::resetParser();
+    buf.reset();
+}
+
+void LineAsStringRowInputFormat::readLineObject(IColumn & column)
+{
+    PeekableReadBufferCheckpoint checkpoint{buf};
+    size_t balance = 0;
+
+    if (*buf.position() != '"')
+        throw Exception("Line object must begin with '\"'.", ErrorCodes::INCORRECT_DATA);
+
+    ++buf.position();
+    ++balance;
+
+    char * pos;
+
+    while (balance)
+    {
+        if (buf.eof())
+            throw Exception("Unexpected end of file while parsing Line object.", ErrorCodes::INCORRECT_DATA);
+
+        pos = find_last_symbols_or_null<'"', '\\'>(buf.position(), buf.buffer().end());
+        buf.position() = pos;
+        if (buf.position() == buf.buffer().end())
+            continue;
+        else if (*buf.position() == '"')
+        {
+            --balance;
+            ++buf.position();
+        }
+        else if (*buf.position() == '\\')
+            {
+            ++buf.position();
+            if (!buf.eof())
+            {
+            	++buf.position();
+            }
+        }
+        
+    }
+    buf.makeContinuousMemoryFromCheckpointToPos();
+    char * end = buf.position();
+    buf.rollbackToCheckpoint();
+    column.insertData(buf.position(), end - buf.position());
+    buf.position() = end;
+}
+
+bool LineAsStringRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &)
+{
+    skipWhitespaceIfAny(buf);
+
+    if (!buf.eof())
+        readLineObject(*columns[0]);
+
+    skipWhitespaceIfAny(buf);
+    if (!buf.eof() && *buf.position() == ',')
+        ++buf.position();
+    skipWhitespaceIfAny(buf);
+
+    return !buf.eof();
+}
+
+void registerInputFormatProcessorLineAsString(FormatFactory & factory)
+{
+    factory.registerInputFormatProcessor("LineAsString", [](
+            ReadBuffer & buf,
+            const Block & sample,
+            const RowInputFormatParams & params,
+            const FormatSettings &)
+    {
+        return std::make_shared<LineAsStringRowInputFormat>(sample, buf, params);
+    });
+}
+
+}
diff --git a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.h b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.h
new file mode 100644
index 00000000000..a31dce1cc4a
--- /dev/null
+++ b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.h
@@ -0,0 +1,31 @@
+#pragma once
+
+#include <Processors/Formats/IRowInputFormat.h>
+#include <Formats/FormatFactory.h>
+#include <IO/PeekableReadBuffer.h>
+
+namespace DB
+{
+
+class ReadBuffer;
+
+/// This format parses a sequence of Line objects separated by newlines, spaces and/or comma.
+/// Each Line object is parsed as a whole to string.
+/// This format can only parse a table with single field of type String.
+
+class LineAsStringRowInputFormat : public IRowInputFormat
+{
+public:
+    LineAsStringRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_);
+
+    bool readRow(MutableColumns & columns, RowReadExtension & ext) override;
+    String getName() const override { return "LineAsStringRowInputFormat"; }
+    void resetParser() override;
+
+private:
+    void readLineObject(IColumn & column);
+
+    PeekableReadBuffer buf;
+};
+
+}
diff --git a/src/Processors/ya.make b/src/Processors/ya.make
index 4c25ad5bf3f..081b1d5ba1f 100644
--- a/src/Processors/ya.make
+++ b/src/Processors/ya.make
@@ -23,6 +23,7 @@ SRCS(
     Formats/Impl/ConstantExpressionTemplate.cpp
     Formats/Impl/CSVRowInputFormat.cpp
     Formats/Impl/CSVRowOutputFormat.cpp
+    Formats/Impl/LineAsStringRowInputFormat.cpp
     Formats/Impl/JSONAsStringRowInputFormat.cpp
     Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
     Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp

From 0451d5729323b7f46d79336fea4f0982bb1662ae Mon Sep 17 00:00:00 2001
From: hexiaoting <“hewenting_ict@163.com”>
Date: Tue, 18 Aug 2020 10:35:08 +0800
Subject: [PATCH 012/298] Add new feature: LineAsString Format

---
 src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp  | 6 ++++++
 .../0_stateless/01460_line_as_string_format.reference       | 1 +
 tests/queries/0_stateless/01460_line_as_string_format.sql   | 5 +++++
 3 files changed, 12 insertions(+)
 create mode 100644 tests/queries/0_stateless/01460_line_as_string_format.reference
 create mode 100644 tests/queries/0_stateless/01460_line_as_string_format.sql

diff --git a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp
index a28b3903724..36844fa700b 100644
--- a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp
@@ -32,6 +32,12 @@ void LineAsStringRowInputFormat::readLineObject(IColumn & column)
     PeekableReadBufferCheckpoint checkpoint{buf};
     size_t balance = 0;
 
+    if (*buf.position() == ';') {
+        ++buf.position();
+        if(buf.eof())
+            return;
+    }
+
     if (*buf.position() != '"')
         throw Exception("Line object must begin with '\"'.", ErrorCodes::INCORRECT_DATA);
 
diff --git a/tests/queries/0_stateless/01460_line_as_string_format.reference b/tests/queries/0_stateless/01460_line_as_string_format.reference
new file mode 100644
index 00000000000..989f8ac0292
--- /dev/null
+++ b/tests/queries/0_stateless/01460_line_as_string_format.reference
@@ -0,0 +1 @@
+"I love apple","I love banana","I love pear"
diff --git a/tests/queries/0_stateless/01460_line_as_string_format.sql b/tests/queries/0_stateless/01460_line_as_string_format.sql
new file mode 100644
index 00000000000..e5518a828d0
--- /dev/null
+++ b/tests/queries/0_stateless/01460_line_as_string_format.sql
@@ -0,0 +1,5 @@
+DROP TABLE IF EXISTS line_as_string;
+CREATE TABLE line_as_string (field String) ENGINE = Memory;
+INSERT INTO line_as_string FORMAT LineAsString "I love apple","I love banana","I love pear";
+SELECT * FROM line_as_string;
+DROP TABLE line_as_string;

From e9be2f14ea8ac45f11c7c65b6c36646b64a5b390 Mon Sep 17 00:00:00 2001
From: hexiaoting <“hewenting_ict@163.com”>
Date: Wed, 19 Aug 2020 11:50:43 +0800
Subject: [PATCH 013/298] fix implementation for \n separated lines

---
 .../Impl/LineAsStringRowInputFormat.cpp       | 45 ++++++-------------
 .../01460_line_as_string_format.reference     |  7 ++-
 .../01460_line_as_string_format.sh            | 19 ++++++++
 .../01460_line_as_string_format.sql           |  5 ---
 4 files changed, 38 insertions(+), 38 deletions(-)
 create mode 100755 tests/queries/0_stateless/01460_line_as_string_format.sh
 delete mode 100644 tests/queries/0_stateless/01460_line_as_string_format.sql

diff --git a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp
index 36844fa700b..27bc71d764d 100644
--- a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp
@@ -30,35 +30,22 @@ void LineAsStringRowInputFormat::resetParser()
 void LineAsStringRowInputFormat::readLineObject(IColumn & column)
 {
     PeekableReadBufferCheckpoint checkpoint{buf};
-    size_t balance = 0;
-
-    if (*buf.position() == ';') {
-        ++buf.position();
-        if(buf.eof())
-            return;
-    }
-
-    if (*buf.position() != '"')
-        throw Exception("Line object must begin with '\"'.", ErrorCodes::INCORRECT_DATA);
-
-    ++buf.position();
-    ++balance;
+    bool newline = true;
+    bool over = false;
 
     char * pos;
 
-    while (balance)
+    while (newline)
     {
-        if (buf.eof())
-            throw Exception("Unexpected end of file while parsing Line object.", ErrorCodes::INCORRECT_DATA);
-
-        pos = find_last_symbols_or_null<'"', '\\'>(buf.position(), buf.buffer().end());
+        pos = find_first_symbols<'\n', '\\'>(buf.position(), buf.buffer().end());
         buf.position() = pos;
-        if (buf.position() == buf.buffer().end())
-            continue;
-        else if (*buf.position() == '"')
+        if (buf.position() == buf.buffer().end())  {
+            over = true;
+            break;
+        }
+        else if (*buf.position() == '\n')
         {
-            --balance;
-            ++buf.position();
+            newline = false;
         }
         else if (*buf.position() == '\\')
             {
@@ -70,25 +57,19 @@ void LineAsStringRowInputFormat::readLineObject(IColumn & column)
         }
         
     }
+
     buf.makeContinuousMemoryFromCheckpointToPos();
-    char * end = buf.position();
+    char * end = over ? buf.position(): ++buf.position();
     buf.rollbackToCheckpoint();
-    column.insertData(buf.position(), end - buf.position());
+    column.insertData(buf.position(), end - (over ? 0 : 1) - buf.position());
     buf.position() = end;
 }
 
 bool LineAsStringRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &)
 {
-    skipWhitespaceIfAny(buf);
-
     if (!buf.eof())
         readLineObject(*columns[0]);
 
-    skipWhitespaceIfAny(buf);
-    if (!buf.eof() && *buf.position() == ',')
-        ++buf.position();
-    skipWhitespaceIfAny(buf);
-
     return !buf.eof();
 }
 
diff --git a/tests/queries/0_stateless/01460_line_as_string_format.reference b/tests/queries/0_stateless/01460_line_as_string_format.reference
index 989f8ac0292..dec67eb2e0a 100644
--- a/tests/queries/0_stateless/01460_line_as_string_format.reference
+++ b/tests/queries/0_stateless/01460_line_as_string_format.reference
@@ -1 +1,6 @@
-"I love apple","I love banana","I love pear"
+"id" : 1,
+"date" : "01.01.2020",
+"string" : "123{{{\\"\\\\",
+"array" : [1, 2, 3],
+
+Finally implement this new feature.
diff --git a/tests/queries/0_stateless/01460_line_as_string_format.sh b/tests/queries/0_stateless/01460_line_as_string_format.sh
new file mode 100755
index 00000000000..a985bc207a8
--- /dev/null
+++ b/tests/queries/0_stateless/01460_line_as_string_format.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+. "$CURDIR"/../shell_config.sh
+
+$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS line_as_string";
+
+$CLICKHOUSE_CLIENT --query="CREATE TABLE line_as_string(field String) ENGINE = Memory";
+
+echo '"id" : 1,
+"date" : "01.01.2020",
+"string" : "123{{{\"\\",
+"array" : [1, 2, 3],
+
+Finally implement this new feature.' | $CLICKHOUSE_CLIENT --query="INSERT INTO line_as_string FORMAT LineAsString";
+
+$CLICKHOUSE_CLIENT --query="SELECT * FROM line_as_string";
+$CLICKHOUSE_CLIENT --query="DROP TABLE line_as_string"
+
diff --git a/tests/queries/0_stateless/01460_line_as_string_format.sql b/tests/queries/0_stateless/01460_line_as_string_format.sql
deleted file mode 100644
index e5518a828d0..00000000000
--- a/tests/queries/0_stateless/01460_line_as_string_format.sql
+++ /dev/null
@@ -1,5 +0,0 @@
-DROP TABLE IF EXISTS line_as_string;
-CREATE TABLE line_as_string (field String) ENGINE = Memory;
-INSERT INTO line_as_string FORMAT LineAsString "I love apple","I love banana","I love pear";
-SELECT * FROM line_as_string;
-DROP TABLE line_as_string;

From 4331158d3051437f44c7fa1271e4673272cf8cac Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Mon, 24 Aug 2020 16:09:23 +0300
Subject: [PATCH 014/298] merge with master

---
 src/Disks/DiskDecorator.cpp                       | 15 +++++++++++++++
 src/Disks/DiskDecorator.h                         |  3 +++
 .../MergeTree/MergeTreeDataPartWriterInMemory.cpp |  2 +-
 .../MergeTree/MergeTreeDataPartWriterInMemory.h   |  2 +-
 src/Storages/MergeTree/MergeTreeDataWriter.cpp    | 11 ++++++-----
 5 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/src/Disks/DiskDecorator.cpp b/src/Disks/DiskDecorator.cpp
index e55534e347f..7f2ea58d7cf 100644
--- a/src/Disks/DiskDecorator.cpp
+++ b/src/Disks/DiskDecorator.cpp
@@ -165,4 +165,19 @@ void DiskDecorator::truncateFile(const String & path, size_t size)
     delegate->truncateFile(path, size);
 }
 
+int DiskDecorator::open(const String & path, mode_t mode) const
+{
+    return delegate->open(path, mode);
+}
+
+void DiskDecorator::close(int fd) const
+{
+    delegate->close(fd);
+}
+
+void DiskDecorator::sync(int fd) const
+{
+    delegate->sync(fd);
+}
+
 }
diff --git a/src/Disks/DiskDecorator.h b/src/Disks/DiskDecorator.h
index 71bb100c576..f1ddfff4952 100644
--- a/src/Disks/DiskDecorator.h
+++ b/src/Disks/DiskDecorator.h
@@ -42,6 +42,9 @@ public:
     void setReadOnly(const String & path) override;
     void createHardLink(const String & src_path, const String & dst_path) override;
     void truncateFile(const String & path, size_t size) override;
+    int open(const String & path, mode_t mode) const override;
+    void close(int fd) const override;
+    void sync(int fd) const override;
     const String getType() const override { return delegate->getType(); }
 
 protected:
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp
index a7486158737..f0738a1130a 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp
@@ -70,7 +70,7 @@ void MergeTreeDataPartWriterInMemory::calculateAndSerializePrimaryIndex(const Bl
     }
 }
 
-void MergeTreeDataPartWriterInMemory::finishDataSerialization(IMergeTreeDataPart::Checksums & checksums)
+void MergeTreeDataPartWriterInMemory::finishDataSerialization(IMergeTreeDataPart::Checksums & checksums, bool)
 {
     /// If part is empty we still need to initialize block by empty columns.
     if (!part_in_memory->block)
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.h
index 92e4228a90d..6e59cdd08a9 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.h
@@ -18,7 +18,7 @@ public:
     void write(const Block & block, const IColumn::Permutation * permutation,
         const Block & primary_key_block, const Block & skip_indexes_block) override;
 
-    void finishDataSerialization(IMergeTreeDataPart::Checksums & checksums) override;
+    void finishDataSerialization(IMergeTreeDataPart::Checksums & checksums, bool sync) override;
 
     void calculateAndSerializePrimaryIndex(const Block & primary_index_block) override;
 
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index f3a72657be5..b05b970da3b 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -251,6 +251,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
     new_data_part->minmax_idx = std::move(minmax_idx);
     new_data_part->is_temp = true;
 
+    std::optional<FileSyncGuard> sync_guard;
     if (new_data_part->isStoredOnDisk())
     {
         /// The name could be non-unique in case of stale files from previous runs.
@@ -262,12 +263,12 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
             new_data_part->volume->getDisk()->removeRecursive(full_path);
         }
 
-    const auto disk = new_data_part->volume->getDisk();
-    disk->createDirectories(full_path);
+        const auto disk = new_data_part->volume->getDisk();
+        disk->createDirectories(full_path);
 
-    std::optional<FileSyncGuard> sync_guard;
-    if (data.getSettings()->fsync_part_directory)
-        sync_guard.emplace(disk, full_path);
+        if (data.getSettings()->fsync_part_directory)
+            sync_guard.emplace(disk, full_path);
+    }
 
     /// If we need to calculate some columns to sort.
     if (metadata_snapshot->hasSortingKey() || metadata_snapshot->hasSecondaryIndices())

From 4834bed35b251fee8f53d72fa7c2650fd473a195 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 31 Aug 2020 14:35:53 +0300
Subject: [PATCH 015/298] Add recompression TTL parser

---
 src/Parsers/ASTTTLElement.cpp            |  5 +++++
 src/Parsers/ASTTTLElement.h              |  2 ++
 src/Parsers/ExpressionElementParsers.cpp | 19 +++++++++++++++++++
 src/Storages/TTLDescription.cpp          | 15 ++++++++++++++-
 src/Storages/TTLDescription.h            |  5 +++++
 src/Storages/TTLMode.h                   |  3 ++-
 6 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/src/Parsers/ASTTTLElement.cpp b/src/Parsers/ASTTTLElement.cpp
index 1635d376d30..f37631769b8 100644
--- a/src/Parsers/ASTTTLElement.cpp
+++ b/src/Parsers/ASTTTLElement.cpp
@@ -57,6 +57,11 @@ void ASTTTLElement::formatImpl(const FormatSettings & settings, FormatState & st
             }
         }
     }
+    else if (mode == TTLMode::RECOMPRESS)
+    {
+        settings.ostr << " RECOMPRESS ";
+        recompression_codec->formatImpl(settings, state, frame);
+    }
     else if (mode == TTLMode::DELETE)
     {
         /// It would be better to output "DELETE" here but that will break compatibility with earlier versions.
diff --git a/src/Parsers/ASTTTLElement.h b/src/Parsers/ASTTTLElement.h
index 7ee1f4795ff..aadd019b59c 100644
--- a/src/Parsers/ASTTTLElement.h
+++ b/src/Parsers/ASTTTLElement.h
@@ -20,6 +20,8 @@ public:
     ASTs group_by_key;
     std::vector<std::pair<String, ASTPtr>> group_by_aggregations;
 
+    ASTPtr recompression_codec;
+
     ASTTTLElement(TTLMode mode_, DataDestinationType destination_type_, const String & destination_name_)
         : mode(mode_)
         , destination_type(destination_type_)
diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index e24bb9c4129..67c3737f6f0 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -1506,6 +1506,8 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     ParserKeyword s_where("WHERE");
     ParserKeyword s_group_by("GROUP BY");
     ParserKeyword s_set("SET");
+    ParserKeyword s_recompress("RECOMPRESS");
+    ParserKeyword s_codec("CODEC");
     ParserToken s_comma(TokenType::Comma);
     ParserToken s_eq(TokenType::Equals);
 
@@ -1513,6 +1515,7 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     ParserStringLiteral parser_string_literal;
     ParserExpression parser_exp;
     ParserExpressionList parser_expression_list(false);
+    ParserCodec parser_codec;
 
     ASTPtr ttl_expr;
     if (!parser_exp.parse(pos, ttl_expr, expected))
@@ -1536,6 +1539,10 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     {
         mode = TTLMode::GROUP_BY;
     }
+    else if (s_recompress.ignore(pos))
+    {
+        mode = TTLMode::RECOMPRESS;
+    }
     else
     {
         s_delete.ignore(pos);
@@ -1544,6 +1551,7 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 
     ASTPtr where_expr;
     ASTPtr ast_group_by_key;
+    ASTPtr recompression_codec;
     std::vector<std::pair<String, ASTPtr>> group_by_aggregations;
 
     if (mode == TTLMode::MOVE)
@@ -1587,6 +1595,14 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
         if (!parser_exp.parse(pos, where_expr, expected))
             return false;
     }
+    else if (mode == TTLMode::RECOMPRESS)
+    {
+        if (!s_codec.ignore(pos))
+            return false;
+
+        if (!parser_codec.parse(pos, recompression_codec, expected))
+            return false;
+    }
 
     auto ttl_element = std::make_shared<ASTTTLElement>(mode, destination_type, destination_name);
     ttl_element->setTTL(std::move(ttl_expr));
@@ -1599,6 +1615,9 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
         ttl_element->group_by_aggregations = std::move(group_by_aggregations);
     }
 
+    if (mode == TTLMode::RECOMPRESS)
+        ttl_element->recompression_codec = recompression_codec;
+
     node = ttl_element;
     return true;
 }
diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp
index 4c9da095278..656baf39971 100644
--- a/src/Storages/TTLDescription.cpp
+++ b/src/Storages/TTLDescription.cpp
@@ -8,6 +8,7 @@
 #include <Parsers/ASTTTLElement.h>
 #include <Parsers/ASTIdentifier.h>
 #include <Storages/ColumnsDescription.h>
+#include <Interpreters/Context.h>
 
 
 #include <DataTypes/DataTypeDate.h>
@@ -263,6 +264,12 @@ TTLDescription TTLDescription::getTTLFromAST(
                     result.aggregate_descriptions.push_back(descr);
             }
         }
+        else if (ttl_element->mode == TTLMode::RECOMPRESS)
+        {
+            result.recompression_codec =
+                CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(
+                    ttl_element->recompression_codec, {}, !context.getSettingsRef().allow_suspicious_codecs);
+        }
     }
 
     checkTTLExpression(result.expression, result.result_column);
@@ -311,15 +318,21 @@ TTLTableDescription TTLTableDescription::getTTLForTableFromAST(
     for (const auto & ttl_element_ptr : definition_ast->children)
     {
         auto ttl = TTLDescription::getTTLFromAST(ttl_element_ptr, columns, context, primary_key);
-        if (ttl.destination_type == DataDestinationType::DELETE)
+        if (ttl.mode == TTLMode::DELETE)
         {
             if (seen_delete_ttl)
                 throw Exception("More than one DELETE TTL expression is not allowed", ErrorCodes::BAD_TTL_EXPRESSION);
             result.rows_ttl = ttl;
             seen_delete_ttl = true;
         }
+        else if (ttl.mode == TTLMode::RECOMPRESS)
+        {
+            result.recompression_ttl.emplace_back(std::move(ttl));
+        }
         else
+        {
             result.move_ttl.emplace_back(std::move(ttl));
+        }
     }
     return result;
 }
diff --git a/src/Storages/TTLDescription.h b/src/Storages/TTLDescription.h
index f7769fd42e9..4b0d4370a70 100644
--- a/src/Storages/TTLDescription.h
+++ b/src/Storages/TTLDescription.h
@@ -75,6 +75,9 @@ struct TTLDescription
     /// Name of destination disk or volume
     String destination_name;
 
+    /// Codec name which will be used to recompress data
+    ASTPtr recompression_codec;
+
     /// Parse TTL structure from definition. Able to parse both column and table
     /// TTLs.
     static TTLDescription getTTLFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, const Context & context, const KeyDescription & primary_key);
@@ -102,6 +105,8 @@ struct TTLTableDescription
     /// Moving data TTL (to other disks or volumes)
     TTLDescriptions move_ttl;
 
+    TTLDescriptions recompression_ttl;
+
     TTLTableDescription() = default;
     TTLTableDescription(const TTLTableDescription & other);
     TTLTableDescription & operator=(const TTLTableDescription & other);
diff --git a/src/Storages/TTLMode.h b/src/Storages/TTLMode.h
index 0681f10fc17..7f5fe0315c6 100644
--- a/src/Storages/TTLMode.h
+++ b/src/Storages/TTLMode.h
@@ -8,7 +8,8 @@ enum class TTLMode
 {
     DELETE,
     MOVE,
-    GROUP_BY
+    GROUP_BY,
+    RECOMPRESS,
 };
 
 }

From 42c210fcba41d2e0ba657b38048278667ebf5963 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 31 Aug 2020 15:12:51 +0300
Subject: [PATCH 016/298] Recompress TTLs in memory metadata

---
 src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h |  5 ++++-
 src/Storages/MergeTree/MergeTreeDataWriter.cpp    |  5 +++++
 src/Storages/StorageInMemoryMetadata.cpp          | 10 ++++++++++
 src/Storages/StorageInMemoryMetadata.h            |  4 ++++
 4 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
index 209d7181b66..d2e131d5650 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
@@ -38,13 +38,16 @@ struct MergeTreeDataPartTTLInfos
     MergeTreeDataPartTTLInfo table_ttl;
 
     /// `part_min_ttl` and `part_max_ttl` are TTLs which are used for selecting parts
-    /// to merge in order to remove expired rows.    
+    /// to merge in order to remove expired rows.
     time_t part_min_ttl = 0;
     time_t part_max_ttl = 0;
 
     /// Order is important as it would be serialized and hashed for checksums
     std::map<String, MergeTreeDataPartTTLInfo> moves_ttl;
 
+    /// Order is important as it would be serialized and hashed for checksums
+    std::map<String, MergeTreeDataPartTTLInfo> recompression_ttl;
+
     void read(ReadBuffer & in);
     void write(WriteBuffer & out) const;
     void update(const MergeTreeDataPartTTLInfos & other_infos);
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index 445a02b06f0..23569a13b85 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -234,6 +234,11 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
     for (const auto & ttl_entry : move_ttl_entries)
         updateTTL(ttl_entry, move_ttl_infos, move_ttl_infos.moves_ttl[ttl_entry.result_column], block, false);
 
+    const auto & recompression_ttl_entries = metadata_snapshot->getRecompressionTTLs();
+    for (const auto & ttl_entry : recompression_ttl_entries)
+        updateTTL(ttl_entry, move_ttl_infos, move_ttl_infos.moves_ttl[ttl_entry.result_column], block, false);
+
+
     NamesAndTypesList columns = metadata_snapshot->getColumns().getAllPhysical().filter(block.getNames());
     ReservationPtr reservation = data.reserveSpacePreferringTTLRules(expected_size, move_ttl_infos, time(nullptr));
     VolumePtr volume = data.getStoragePolicy()->getVolume(0);
diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp
index b7f4565a55a..f611c1ec95d 100644
--- a/src/Storages/StorageInMemoryMetadata.cpp
+++ b/src/Storages/StorageInMemoryMetadata.cpp
@@ -157,6 +157,16 @@ bool StorageInMemoryMetadata::hasAnyMoveTTL() const
     return !table_ttl.move_ttl.empty();
 }
 
+TTLDescriptions StorageInMemoryMetadata::getRecompressionTTLs() const
+{
+    return table_ttl.recompression_ttl;
+}
+
+bool StorageInMemoryMetadata::hasAnyRecompressionTTL() const
+{
+    return !table_ttl.recompression_ttl.empty();
+}
+
 ColumnDependencies StorageInMemoryMetadata::getColumnDependencies(const NameSet & updated_columns) const
 {
     if (updated_columns.empty())
diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h
index 4c78d72a9d1..3656edf71f4 100644
--- a/src/Storages/StorageInMemoryMetadata.h
+++ b/src/Storages/StorageInMemoryMetadata.h
@@ -114,6 +114,10 @@ struct StorageInMemoryMetadata
     TTLDescriptions getMoveTTLs() const;
     bool hasAnyMoveTTL() const;
 
+    // Just wrapper for table TTLs, return info about recompression ttl
+    TTLDescriptions getRecompressionTTLs() const;
+    bool hasAnyRecompressionTTL() const;
+
     /// Returns columns, which will be needed to calculate dependencies (skip
     /// indices, TTL expressions) if we update @updated_columns set of columns.
     ColumnDependencies getColumnDependencies(const NameSet & updated_columns) const;

From adc18f4d3f8915a1ad505ebc67cace8d98d81c04 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 31 Aug 2020 16:29:31 +0300
Subject: [PATCH 017/298] Write with recompression TTL

---
 .../MergeTree/MergeTreeDataPartTTLInfo.cpp    | 34 +++++++++++++++++++
 .../MergeTree/MergeTreeDataWriter.cpp         |  9 +++--
 2 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
index 37d036fc6fc..94a2b4269ef 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
@@ -67,6 +67,18 @@ void MergeTreeDataPartTTLInfos::read(ReadBuffer & in)
             moves_ttl.emplace(expression, ttl_info);
         }
     }
+    if (json.has("recompression"))
+    {
+        const JSON & moves = json["recompression"];
+        for (auto move : moves) // NOLINT
+        {
+            MergeTreeDataPartTTLInfo ttl_info;
+            ttl_info.min = move["min"].getUInt();
+            ttl_info.max = move["max"].getUInt();
+            String expression = move["expression"].getString();
+            recompression_ttl.emplace(expression, ttl_info);
+        }
+    }
 }
 
 
@@ -122,6 +134,28 @@ void MergeTreeDataPartTTLInfos::write(WriteBuffer & out) const
         }
         writeString("]", out);
     }
+    if (!recompression_ttl.empty())
+    {
+        if (!moves_ttl.empty() || !columns_ttl.empty() || table_ttl.min)
+            writeString(",", out);
+
+        writeString(R"("recompression":[)", out);
+        for (auto it = recompression_ttl.begin(); it != recompression_ttl.end(); ++it)
+        {
+            if (it != recompression_ttl.begin())
+                writeString(",", out);
+
+            writeString(R"({"expression":)", out);
+            writeString(doubleQuoteString(it->first), out);
+            writeString(R"(,"min":)", out);
+            writeIntText(it->second.min, out);
+            writeString(R"(,"max":)", out);
+            writeIntText(it->second.max, out);
+            writeString("}", out);
+        }
+        writeString("]", out);
+
+    }
     writeString("}", out);
 }
 
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index 23569a13b85..92bf5345d5a 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -234,11 +234,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
     for (const auto & ttl_entry : move_ttl_entries)
         updateTTL(ttl_entry, move_ttl_infos, move_ttl_infos.moves_ttl[ttl_entry.result_column], block, false);
 
-    const auto & recompression_ttl_entries = metadata_snapshot->getRecompressionTTLs();
-    for (const auto & ttl_entry : recompression_ttl_entries)
-        updateTTL(ttl_entry, move_ttl_infos, move_ttl_infos.moves_ttl[ttl_entry.result_column], block, false);
-
-
     NamesAndTypesList columns = metadata_snapshot->getColumns().getAllPhysical().filter(block.getNames());
     ReservationPtr reservation = data.reserveSpacePreferringTTLRules(expected_size, move_ttl_infos, time(nullptr));
     VolumePtr volume = data.getStoragePolicy()->getVolume(0);
@@ -303,6 +298,10 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
     for (const auto & [name, ttl_entry] : metadata_snapshot->getColumnTTLs())
         updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.columns_ttl[name], block, true);
 
+    const auto & recompression_ttl_entries = metadata_snapshot->getRecompressionTTLs();
+    for (const auto & ttl_entry : recompression_ttl_entries)
+        updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.recompression_ttl[ttl_entry.result_column], block, false);
+
     new_data_part->ttl_infos.update(move_ttl_infos);
 
     /// This effectively chooses minimal compression method:

From b20a0bc254e769e66093e7c2a2a574b252b5a698 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 31 Aug 2020 16:42:42 +0300
Subject: [PATCH 018/298] Add recompression flag in ReplicatedEntry

---
 src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp | 10 ++++++++++
 src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h   |  1 +
 2 files changed, 11 insertions(+)

diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp
index af6d980ad98..a4fc600d1b3 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp
@@ -36,6 +36,9 @@ void ReplicatedMergeTreeLogEntryData::writeText(WriteBuffer & out) const
                 out << s << '\n';
             out << "into\n" << new_part_name;
             out << "\ndeduplicate: " << deduplicate;
+            /// For backward compatibility write only if enabled
+            if (recompress)
+                out << "\nrecompress: " << recompress;
             break;
 
         case DROP_RANGE:
@@ -149,7 +152,14 @@ void ReplicatedMergeTreeLogEntryData::readText(ReadBuffer & in)
         }
         in >> new_part_name;
         if (format_version >= 4)
+        {
             in >> "\ndeduplicate: " >> deduplicate;
+            in >> "\n";
+            if (in.eof())
+                trailing_newline_found = true;
+            else if (checkString("recompress\n", in))
+                in >> recompress;
+        }
     }
     else if (type_str == "drop" || type_str == "detach")
     {
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h
index ae5fad0b83c..62599c2c3a7 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h
@@ -79,6 +79,7 @@ struct ReplicatedMergeTreeLogEntryData
 
     Strings source_parts;
     bool deduplicate = false; /// Do deduplicate on merge
+    bool recompress = false; /// Recompress parts on merge
     String column_name;
     String index_name;
 

From 46f833b7df64f77d361f78d629d3075f83945ebb Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 31 Aug 2020 22:50:42 +0300
Subject: [PATCH 019/298] Some changes

---
 src/Storages/MergeTree/MergeTreeData.cpp      | 30 +++++++++++++++++++
 src/Storages/MergeTree/MergeTreeData.h        |  3 ++
 .../MergeTree/MergeTreeDataMergerMutator.cpp  |  1 +
 .../MergeTree/MergeTreeDataMergerMutator.h    |  1 +
 .../MergeTree/MergeTreeDataPartTTLInfo.cpp    |  7 +++++
 5 files changed, 42 insertions(+)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index b6a495161f5..b721cf4afbf 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -3055,6 +3055,36 @@ MergeTreeData::selectTTLEntryForTTLInfos(const IMergeTreeDataPart::TTLInfos & tt
     return max_max_ttl ? *best_entry_it : std::optional<TTLDescription>();
 }
 
+
+CompressionCodecPtr MergeTreeData::getCompressionCodecForPart(size_t part_size_compressed, const IMergeTreeDataPart::TTLInfos & ttl_infos, time_t current_time) const
+{
+
+    time_t max_max_ttl = 0;
+    TTLDescriptions::const_iterator best_entry_it;
+    auto metadata_snapshot = getInMemoryMetadataPtr();
+
+    const auto & recompression_ttl_entries = metadata_snapshot->getRecompressionTTLs();
+    for (auto ttl_entry_it = recompression_ttl_entries.begin(); ttl_entry_it != recompression_ttl_entries.end(); ++ttl_entry_it)
+    {
+        auto ttl_info_it = ttl_infos.recompression_ttl.find(ttl_entry_it->result_column);
+        /// Prefer TTL rule which went into action last.
+        if (ttl_info_it != ttl_infos.recompression_ttl.end()
+                && ttl_info_it->second.max <= current_time
+                && max_max_ttl <= ttl_info_it->second.max)
+        {
+            best_entry_it = ttl_entry_it;
+            max_max_ttl = ttl_info_it->second.max;
+        }
+    }
+
+    if (max_max_ttl)
+        return CompressionCodecFactory::instance().get(best_entry_it->recompression_codec, {});
+
+    return global_context.chooseCompressionCodec(
+        part_size_compressed,
+        static_cast<double>(part_size_compressed) / getTotalActiveSizeInBytes());
+}
+
 MergeTreeData::DataParts MergeTreeData::getDataParts(const DataPartStates & affordable_states) const
 {
     DataParts res;
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index e088a1c098b..ab115927e1e 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -669,6 +669,9 @@ public:
 
     std::optional<TTLDescription> selectTTLEntryForTTLInfos(const IMergeTreeDataPart::TTLInfos & ttl_infos, time_t time_of_move) const;
 
+
+    CompressionCodecPtr getCompressionCodecForPart(size_t part_size_compressed, const IMergeTreeDataPart::TTLInfos & ttl_infos, time_t current_time) const;
+
     /// Limiting parallel sends per one table, used in DataPartsExchange
     std::atomic_uint current_table_sends {0};
 
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index 673ad02bfb6..8cece66dafb 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -284,6 +284,7 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
                 current_time,
                 data_settings->merge_with_ttl_timeout,
                 data_settings->ttl_only_drop_parts);
+
         parts_to_merge = merge_selector.select(partitions, max_total_size_to_merge);
     }
 
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
index d5798fe3582..e13711f8064 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
@@ -249,6 +249,7 @@ private:
 
     /// Stores the next TTL merge due time for each partition (used only by TTLMergeSelector)
     TTLMergeSelector::PartitionIdToTTLs next_ttl_merge_times_by_partition;
+
     /// Performing TTL merges independently for each partition guarantees that
     /// there is only a limited number of TTL merges and no partition stores data, that is too stale
 };
diff --git a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
index 94a2b4269ef..4b0a8bdfa9e 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
@@ -16,6 +16,12 @@ void MergeTreeDataPartTTLInfos::update(const MergeTreeDataPartTTLInfos & other_i
         updatePartMinMaxTTL(ttl_info.min, ttl_info.max);
     }
 
+    for (const auto & [name, ttl_info] : other_infos.recompression_ttl)
+    {
+        recompression_ttl[name].update(ttl_info);
+        updatePartMinMaxTTL(ttl_info.min, ttl_info.max);
+    }
+
     for (const auto & [expression, ttl_info] : other_infos.moves_ttl)
     {
         moves_ttl[expression].update(ttl_info);
@@ -77,6 +83,7 @@ void MergeTreeDataPartTTLInfos::read(ReadBuffer & in)
             ttl_info.max = move["max"].getUInt();
             String expression = move["expression"].getString();
             recompression_ttl.emplace(expression, ttl_info);
+            updatePartMinMaxTTL(ttl_info.min, ttl_info.max);
         }
     }
 }

From 25140b9bd5b6421b84ef8586827cc49b9d015e7b Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Tue, 1 Sep 2020 04:39:36 +0300
Subject: [PATCH 020/298] fsync MergeTree format file

---
 src/Storages/MergeTree/MergeTreeData.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index bbefba70c58..bc668659b6a 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -220,6 +220,8 @@ MergeTreeData::MergeTreeData(
         format_version = min_format_version;
         auto buf = version_file.second->writeFile(version_file.first);
         writeIntText(format_version.toUnderType(), *buf);
+        if (global_context.getSettingsRef().fsync_metadata)
+            buf->sync();
     }
     else
     {

From 927eb32e882d070ff5ff5446d5b9e0071e2c6f9d Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Tue, 1 Sep 2020 04:46:40 +0300
Subject: [PATCH 021/298] add test for durability (draft)

---
 utils/durability-test/create.sql         |   1 +
 utils/durability-test/durability-test.sh | 154 +++++++++++++++++++++++
 utils/durability-test/insert.sql         |   1 +
 utils/durability-test/install.sh         |   3 +
 utils/durability-test/sshd_config        |   8 ++
 utils/durability-test/startup.exp        |  23 ++++
 6 files changed, 190 insertions(+)
 create mode 100644 utils/durability-test/create.sql
 create mode 100644 utils/durability-test/durability-test.sh
 create mode 100644 utils/durability-test/insert.sql
 create mode 100644 utils/durability-test/install.sh
 create mode 100644 utils/durability-test/sshd_config
 create mode 100755 utils/durability-test/startup.exp

diff --git a/utils/durability-test/create.sql b/utils/durability-test/create.sql
new file mode 100644
index 00000000000..1ec394100e2
--- /dev/null
+++ b/utils/durability-test/create.sql
@@ -0,0 +1 @@
+CREATE TABLE test (a Int, s String) ENGINE = MergeTree ORDER BY a;
diff --git a/utils/durability-test/durability-test.sh b/utils/durability-test/durability-test.sh
new file mode 100644
index 00000000000..1f47c900f49
--- /dev/null
+++ b/utils/durability-test/durability-test.sh
@@ -0,0 +1,154 @@
+#!/bin/bash
+
+URL=http://cloud-images.ubuntu.com/bionic/current
+IMAGE=bionic-server-cloudimg-amd64.img
+SSH_PORT=11022
+CLICKHOUSE_PORT=9090
+PASSWORD=root
+
+TABLE_NAME=$1
+CREATE_QUERY=$2
+INSERT_QUERY=$3
+
+if [[ -z $TABLE_NAME || -z $CREATE_QUERY || -z $INSERT_QUERY ]]; then
+    echo "Required 3 arguments: table name, file with create query, file with insert query"
+    exit 1
+fi
+
+function run()
+{
+    sshpass -p $PASSWORD ssh -p $SSH_PORT root@localhost "$1"
+}
+
+function copy()
+{
+    sshpass -p $PASSWORD scp -r -P $SSH_PORT $1 root@localhost:$2
+}
+
+function wait_vm_for_start()
+{
+    echo "Waiting until VM started..."
+    started=0
+    for i in {0..100}; do
+        run "exit"
+        if [ $? -eq 0 ]; then
+            started=1
+            break
+        fi 
+        sleep 1s
+    done
+
+    if ((started == 0)); then
+        echo "Can't start or connect to VM."
+        exit 1
+    fi
+
+    echo "Started VM"
+}
+
+function wait_clickhouse_for_start()
+{
+    echo "Waiting until ClickHouse started..."
+    started=0
+    for i in {0..15}; do
+        run "clickhouse client --query 'select 1'"
+        if [ $? -eq 0 ]; then
+            started=1
+            break
+        fi
+        sleep 1s
+    done
+
+    if ((started == 0)); then
+        echo "Can't start ClickHouse."
+    fi
+
+    echo "Started ClickHouse"
+}
+
+echo "Downloading image"
+curl -O $URL/$IMAGE
+
+qemu-img resize $IMAGE +10G
+virt-customize -a $IMAGE --root-password password:$PASSWORD
+virt-copy-in -a $IMAGE sshd_config /etc/ssh
+
+echo "Starting VM"
+
+chmod +x ./startup.exp
+./startup.exp > qemu.log 2>&1 &
+
+wait_vm_for_start
+
+echo "Preparing VM"
+
+# Resize partition
+run "growpart /dev/sda 1 && resize2fs /dev/sda1"
+
+if [[ -z $CLICKHOUSE_BINARY ]]; then
+    CLICKHOUSE_BINARY=/usr/bin/clickhouse
+fi
+
+if [[ -z $CLICKHOUSE_CONFIG_DIR ]]; then
+    CLICKHOUSE_CONFIG_DIR=/etc/clickhouse-server
+fi
+
+echo "Using ClickHouse binary: " $CLICKHOUSE_BINARY
+echo "Using ClickHouse config from: " $CLICKHOUSE_CONFIG_DIR
+
+copy $CLICKHOUSE_BINARY /usr/bin
+copy $CLICKHOUSE_CONFIG_DIR /etc
+run "mv /etc/$CLICKHOUSE_CONFIG_DIR /etc/clickhouse-server"
+
+echo "Prepared VM"
+echo "Starting ClickHouse"
+
+run "clickhouse server --config-file=/etc/clickhouse-server/config.xml > clickhouse-server.log 2>&1" &
+
+wait_clickhouse_for_start
+
+echo "Started ClickHouse"
+
+query=`cat $CREATE_QUERY`
+echo "Executing query:" $query
+run "clickhouse client --query '$query'"
+
+query=`cat $INSERT_QUERY`
+echo "Will run in a loop query: " $query
+run "clickhouse benchmark <<< '$query'" &
+echo "Running queries"
+
+pid=`pidof qemu-system-x86_64`
+sec=$(( (RANDOM % 3) + 25 ))
+
+ms=$(( RANDOM % 1000 ))
+
+echo "Will kill VM in $sec.$ms sec"
+
+sleep $sec.$ms
+kill -9 $pid
+
+echo "Restarting"
+
+./startup.exp > qemu.log 2>&1 &
+wait_vm_for_start
+
+run "rm -r *data/system"
+run "clickhouse server --config-file=/etc/clickhouse-server/config.xml > clickhouse-server.log 2>&1" &
+wait_clickhouse_for_start
+
+result=`run "grep $TABLE_NAME clickhouse-server.log | grep 'Caught exception while loading metadata'"`
+if [[ -n $result ]]; then
+    echo "FAIL. Can't attach table:"
+    echo $result
+    exit 1
+fi
+
+result=`run "grep $TABLE_NAME clickhouse-server.log | grep 'Considering to remove broken part'"`
+if [[ -n $result ]]; then
+    echo "FAIL. Have broken parts:"
+    echo $result
+    exit 1
+fi
+
+echo OK
diff --git a/utils/durability-test/insert.sql b/utils/durability-test/insert.sql
new file mode 100644
index 00000000000..8982ad47228
--- /dev/null
+++ b/utils/durability-test/insert.sql
@@ -0,0 +1 @@
+INSERT INTO test SELECT number, toString(number) FROM numbers(10)
diff --git a/utils/durability-test/install.sh b/utils/durability-test/install.sh
new file mode 100644
index 00000000000..526cde6743f
--- /dev/null
+++ b/utils/durability-test/install.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+apt update && apt install qemu-kvm qemu virt-manager virt-viewer libguestfs-tools sshpass expect
diff --git a/utils/durability-test/sshd_config b/utils/durability-test/sshd_config
new file mode 100644
index 00000000000..6ed06d3d8ad
--- /dev/null
+++ b/utils/durability-test/sshd_config
@@ -0,0 +1,8 @@
+PermitRootLogin yes
+PasswordAuthentication yes
+ChallengeResponseAuthentication no
+UsePAM yes
+X11Forwarding yes
+PrintMotd no
+AcceptEnv LANG LC_*
+Subsystem	sftp	/usr/lib/openssh/sftp-server
diff --git a/utils/durability-test/startup.exp b/utils/durability-test/startup.exp
new file mode 100755
index 00000000000..540cfc0e4b8
--- /dev/null
+++ b/utils/durability-test/startup.exp
@@ -0,0 +1,23 @@
+#!/usr/bin/expect -f
+
+# Wait enough (forever) until a long-time boot
+set timeout -1
+
+spawn qemu-system-x86_64 \
+    -hda bionic-server-cloudimg-amd64.img \
+    -cpu qemu64,+ssse3,+sse4.1,+sse4.2,+popcnt -smp 8 \
+    -net nic -net user,hostfwd=tcp::11022-:22 \
+    -m 4096 -nographic
+
+expect "login: "
+send "root\n"
+
+expect "Password: "
+send "root\n"
+
+# Without it ssh is not working on guest machine for some reason
+expect "# "
+send "dhclient && ssh-keygen -A && systemctl restart sshd.service\n"
+
+# Wait forever
+expect "########"

From 602535396d0ac58c1885ef1d1d3e7c085335f059 Mon Sep 17 00:00:00 2001
From: hcz <h.cz@qq.com>
Date: Tue, 1 Sep 2020 16:36:27 +0800
Subject: [PATCH 022/298] Refactor, move function declarations

---
 src/Formats/FormatFactory.cpp | 100 +++++++++++++++++++++++++++++-----
 src/Formats/FormatFactory.h   |  69 -----------------------
 2 files changed, 85 insertions(+), 84 deletions(-)

diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 935d31d6541..871098e00c0 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -323,13 +323,85 @@ void FormatFactory::registerFileSegmentationEngine(const String & name, FileSegm
     target = std::move(file_segmentation_engine);
 }
 
+/// File Segmentation Engines for parallel reading
+
+void registerFileSegmentationEngineTabSeparated(FormatFactory & factory);
+void registerFileSegmentationEngineCSV(FormatFactory & factory);
+void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory);
+void registerFileSegmentationEngineRegexp(FormatFactory & factory);
+void registerFileSegmentationEngineJSONAsString(FormatFactory & factory);
+
+/// Formats for both input/output.
+
+void registerInputFormatNative(FormatFactory & factory);
+void registerOutputFormatNative(FormatFactory & factory);
+
+void registerInputFormatProcessorNative(FormatFactory & factory);
+void registerOutputFormatProcessorNative(FormatFactory & factory);
+void registerInputFormatProcessorRowBinary(FormatFactory & factory);
+void registerOutputFormatProcessorRowBinary(FormatFactory & factory);
+void registerInputFormatProcessorTabSeparated(FormatFactory & factory);
+void registerOutputFormatProcessorTabSeparated(FormatFactory & factory);
+void registerInputFormatProcessorValues(FormatFactory & factory);
+void registerOutputFormatProcessorValues(FormatFactory & factory);
+void registerInputFormatProcessorCSV(FormatFactory & factory);
+void registerOutputFormatProcessorCSV(FormatFactory & factory);
+void registerInputFormatProcessorTSKV(FormatFactory & factory);
+void registerOutputFormatProcessorTSKV(FormatFactory & factory);
+void registerInputFormatProcessorJSONEachRow(FormatFactory & factory);
+void registerOutputFormatProcessorJSONEachRow(FormatFactory & factory);
+void registerInputFormatProcessorJSONCompactEachRow(FormatFactory & factory);
+void registerOutputFormatProcessorJSONCompactEachRow(FormatFactory & factory);
+void registerInputFormatProcessorProtobuf(FormatFactory & factory);
+void registerOutputFormatProcessorProtobuf(FormatFactory & factory);
+void registerInputFormatProcessorTemplate(FormatFactory & factory);
+void registerOutputFormatProcessorTemplate(FormatFactory & factory);
+void registerInputFormatProcessorMsgPack(FormatFactory & factory);
+void registerOutputFormatProcessorMsgPack(FormatFactory & factory);
+void registerInputFormatProcessorORC(FormatFactory & factory);
+void registerOutputFormatProcessorORC(FormatFactory & factory);
+void registerInputFormatProcessorParquet(FormatFactory & factory);
+void registerOutputFormatProcessorParquet(FormatFactory & factory);
+void registerInputFormatProcessorArrow(FormatFactory & factory);
+void registerOutputFormatProcessorArrow(FormatFactory & factory);
+void registerInputFormatProcessorAvro(FormatFactory & factory);
+void registerOutputFormatProcessorAvro(FormatFactory & factory);
+
+/// Output only (presentational) formats.
+
+void registerOutputFormatNull(FormatFactory & factory);
+
+void registerOutputFormatProcessorPretty(FormatFactory & factory);
+void registerOutputFormatProcessorPrettyCompact(FormatFactory & factory);
+void registerOutputFormatProcessorPrettySpace(FormatFactory & factory);
+void registerOutputFormatProcessorVertical(FormatFactory & factory);
+void registerOutputFormatProcessorJSON(FormatFactory & factory);
+void registerOutputFormatProcessorJSONCompact(FormatFactory & factory);
+void registerOutputFormatProcessorJSONEachRowWithProgress(FormatFactory & factory);
+void registerOutputFormatProcessorXML(FormatFactory & factory);
+void registerOutputFormatProcessorODBCDriver2(FormatFactory & factory);
+void registerOutputFormatProcessorNull(FormatFactory & factory);
+void registerOutputFormatProcessorMySQLWire(FormatFactory & factory);
+void registerOutputFormatProcessorMarkdown(FormatFactory & factory);
+void registerOutputFormatProcessorPostgreSQLWire(FormatFactory & factory);
+
+/// Input only formats.
+
+void registerInputFormatProcessorRegexp(FormatFactory & factory);
+void registerInputFormatProcessorJSONAsString(FormatFactory & factory);
+void registerInputFormatProcessorCapnProto(FormatFactory & factory);
+
 FormatFactory::FormatFactory()
 {
+    registerFileSegmentationEngineTabSeparated(*this);
+    registerFileSegmentationEngineCSV(*this);
+    registerFileSegmentationEngineJSONEachRow(*this);
+    registerFileSegmentationEngineRegexp(*this);
+    registerFileSegmentationEngineJSONAsString(*this);
+
     registerInputFormatNative(*this);
     registerOutputFormatNative(*this);
 
-    registerOutputFormatProcessorJSONEachRowWithProgress(*this);
-
     registerInputFormatProcessorNative(*this);
     registerOutputFormatProcessorNative(*this);
     registerInputFormatProcessorRowBinary(*this);
@@ -348,8 +420,11 @@ FormatFactory::FormatFactory()
     registerOutputFormatProcessorJSONCompactEachRow(*this);
     registerInputFormatProcessorProtobuf(*this);
     registerOutputFormatProcessorProtobuf(*this);
+    registerInputFormatProcessorTemplate(*this);
+    registerOutputFormatProcessorTemplate(*this);
+    registerInputFormatProcessorMsgPack(*this);
+    registerOutputFormatProcessorMsgPack(*this);
 #if !defined(ARCADIA_BUILD)
-    registerInputFormatProcessorCapnProto(*this);
     registerInputFormatProcessorORC(*this);
     registerOutputFormatProcessorORC(*this);
     registerInputFormatProcessorParquet(*this);
@@ -359,18 +434,6 @@ FormatFactory::FormatFactory()
     registerInputFormatProcessorAvro(*this);
     registerOutputFormatProcessorAvro(*this);
 #endif
-    registerInputFormatProcessorTemplate(*this);
-    registerOutputFormatProcessorTemplate(*this);
-    registerInputFormatProcessorRegexp(*this);
-    registerInputFormatProcessorMsgPack(*this);
-    registerOutputFormatProcessorMsgPack(*this);
-    registerInputFormatProcessorJSONAsString(*this);
-
-    registerFileSegmentationEngineTabSeparated(*this);
-    registerFileSegmentationEngineCSV(*this);
-    registerFileSegmentationEngineJSONEachRow(*this);
-    registerFileSegmentationEngineRegexp(*this);
-    registerFileSegmentationEngineJSONAsString(*this);
 
     registerOutputFormatNull(*this);
 
@@ -380,12 +443,19 @@ FormatFactory::FormatFactory()
     registerOutputFormatProcessorVertical(*this);
     registerOutputFormatProcessorJSON(*this);
     registerOutputFormatProcessorJSONCompact(*this);
+    registerOutputFormatProcessorJSONEachRowWithProgress(*this);
     registerOutputFormatProcessorXML(*this);
     registerOutputFormatProcessorODBCDriver2(*this);
     registerOutputFormatProcessorNull(*this);
     registerOutputFormatProcessorMySQLWire(*this);
     registerOutputFormatProcessorMarkdown(*this);
     registerOutputFormatProcessorPostgreSQLWire(*this);
+
+    registerInputFormatProcessorRegexp(*this);
+    registerInputFormatProcessorJSONAsString(*this);
+#if !defined(ARCADIA_BUILD)
+    registerInputFormatProcessorCapnProto(*this);
+#endif
 }
 
 FormatFactory & FormatFactory::instance()
diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h
index f0d2b7826a0..54bff1eefc6 100644
--- a/src/Formats/FormatFactory.h
+++ b/src/Formats/FormatFactory.h
@@ -141,73 +141,4 @@ private:
     const Creators & getCreators(const String & name) const;
 };
 
-/// Formats for both input/output.
-
-void registerInputFormatNative(FormatFactory & factory);
-void registerOutputFormatNative(FormatFactory & factory);
-
-void registerInputFormatProcessorNative(FormatFactory & factory);
-void registerOutputFormatProcessorNative(FormatFactory & factory);
-void registerInputFormatProcessorRowBinary(FormatFactory & factory);
-void registerOutputFormatProcessorRowBinary(FormatFactory & factory);
-void registerInputFormatProcessorTabSeparated(FormatFactory & factory);
-void registerOutputFormatProcessorTabSeparated(FormatFactory & factory);
-void registerInputFormatProcessorValues(FormatFactory & factory);
-void registerOutputFormatProcessorValues(FormatFactory & factory);
-void registerInputFormatProcessorCSV(FormatFactory & factory);
-void registerOutputFormatProcessorCSV(FormatFactory & factory);
-void registerInputFormatProcessorTSKV(FormatFactory & factory);
-void registerOutputFormatProcessorTSKV(FormatFactory & factory);
-void registerInputFormatProcessorJSONEachRow(FormatFactory & factory);
-void registerOutputFormatProcessorJSONEachRow(FormatFactory & factory);
-void registerInputFormatProcessorJSONCompactEachRow(FormatFactory & factory);
-void registerOutputFormatProcessorJSONCompactEachRow(FormatFactory & factory);
-void registerInputFormatProcessorParquet(FormatFactory & factory);
-void registerOutputFormatProcessorParquet(FormatFactory & factory);
-void registerInputFormatProcessorArrow(FormatFactory & factory);
-void registerOutputFormatProcessorArrow(FormatFactory & factory);
-void registerInputFormatProcessorProtobuf(FormatFactory & factory);
-void registerOutputFormatProcessorProtobuf(FormatFactory & factory);
-void registerInputFormatProcessorAvro(FormatFactory & factory);
-void registerOutputFormatProcessorAvro(FormatFactory & factory);
-void registerInputFormatProcessorTemplate(FormatFactory & factory);
-void registerOutputFormatProcessorTemplate(FormatFactory & factory);
-void registerInputFormatProcessorMsgPack(FormatFactory & factory);
-void registerOutputFormatProcessorMsgPack(FormatFactory & factory);
-void registerInputFormatProcessorORC(FormatFactory & factory);
-void registerOutputFormatProcessorORC(FormatFactory & factory);
-
-
-/// File Segmentation Engines for parallel reading
-
-void registerFileSegmentationEngineTabSeparated(FormatFactory & factory);
-void registerFileSegmentationEngineCSV(FormatFactory & factory);
-void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory);
-void registerFileSegmentationEngineRegexp(FormatFactory & factory);
-void registerFileSegmentationEngineJSONAsString(FormatFactory & factory);
-
-/// Output only (presentational) formats.
-
-void registerOutputFormatNull(FormatFactory & factory);
-
-void registerOutputFormatProcessorPretty(FormatFactory & factory);
-void registerOutputFormatProcessorPrettyCompact(FormatFactory & factory);
-void registerOutputFormatProcessorPrettySpace(FormatFactory & factory);
-void registerOutputFormatProcessorPrettyASCII(FormatFactory & factory);
-void registerOutputFormatProcessorVertical(FormatFactory & factory);
-void registerOutputFormatProcessorJSON(FormatFactory & factory);
-void registerOutputFormatProcessorJSONCompact(FormatFactory & factory);
-void registerOutputFormatProcessorJSONEachRowWithProgress(FormatFactory & factory);
-void registerOutputFormatProcessorXML(FormatFactory & factory);
-void registerOutputFormatProcessorODBCDriver2(FormatFactory & factory);
-void registerOutputFormatProcessorNull(FormatFactory & factory);
-void registerOutputFormatProcessorMySQLWire(FormatFactory & factory);
-void registerOutputFormatProcessorMarkdown(FormatFactory & factory);
-void registerOutputFormatProcessorPostgreSQLWire(FormatFactory & factory);
-
-/// Input only formats.
-void registerInputFormatProcessorCapnProto(FormatFactory & factory);
-void registerInputFormatProcessorRegexp(FormatFactory & factory);
-void registerInputFormatProcessorJSONAsString(FormatFactory & factory);
-
 }

From 86fa185bb6fbf8e1e6bc6044a7f4e523477e84db Mon Sep 17 00:00:00 2001
From: hcz <h.cz@qq.com>
Date: Tue, 1 Sep 2020 17:06:28 +0800
Subject: [PATCH 023/298] Add JSONStrings formats

---
 src/Formats/FormatFactory.cpp                 |   6 +
 .../Impl/JSONStringsEachRowRowInputFormat.cpp | 245 ++++++++++++++++++
 .../Impl/JSONStringsEachRowRowInputFormat.h   |  54 ++++
 .../JSONStringsEachRowRowOutputFormat.cpp     | 117 +++++++++
 .../Impl/JSONStringsEachRowRowOutputFormat.h  |  45 ++++
 .../Impl/JSONStringsRowOutputFormat.cpp       |  93 +++++++
 .../Formats/Impl/JSONStringsRowOutputFormat.h |  43 +++
 src/Processors/ya.make                        |   3 +
 8 files changed, 606 insertions(+)
 create mode 100644 src/Processors/Formats/Impl/JSONStringsEachRowRowInputFormat.cpp
 create mode 100644 src/Processors/Formats/Impl/JSONStringsEachRowRowInputFormat.h
 create mode 100644 src/Processors/Formats/Impl/JSONStringsEachRowRowOutputFormat.cpp
 create mode 100644 src/Processors/Formats/Impl/JSONStringsEachRowRowOutputFormat.h
 create mode 100644 src/Processors/Formats/Impl/JSONStringsRowOutputFormat.cpp
 create mode 100644 src/Processors/Formats/Impl/JSONStringsRowOutputFormat.h

diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 871098e00c0..cb378fbea96 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -352,6 +352,8 @@ void registerInputFormatProcessorJSONEachRow(FormatFactory & factory);
 void registerOutputFormatProcessorJSONEachRow(FormatFactory & factory);
 void registerInputFormatProcessorJSONCompactEachRow(FormatFactory & factory);
 void registerOutputFormatProcessorJSONCompactEachRow(FormatFactory & factory);
+void registerInputFormatProcessorJSONStringsEachRow(FormatFactory & factory);
+void registerOutputFormatProcessorJSONStringsEachRow(FormatFactory & factory);
 void registerInputFormatProcessorProtobuf(FormatFactory & factory);
 void registerOutputFormatProcessorProtobuf(FormatFactory & factory);
 void registerInputFormatProcessorTemplate(FormatFactory & factory);
@@ -378,6 +380,7 @@ void registerOutputFormatProcessorVertical(FormatFactory & factory);
 void registerOutputFormatProcessorJSON(FormatFactory & factory);
 void registerOutputFormatProcessorJSONCompact(FormatFactory & factory);
 void registerOutputFormatProcessorJSONEachRowWithProgress(FormatFactory & factory);
+void registerOutputFormatProcessorJSONStrings(FormatFactory & factory);
 void registerOutputFormatProcessorXML(FormatFactory & factory);
 void registerOutputFormatProcessorODBCDriver2(FormatFactory & factory);
 void registerOutputFormatProcessorNull(FormatFactory & factory);
@@ -418,6 +421,8 @@ FormatFactory::FormatFactory()
     registerOutputFormatProcessorJSONEachRow(*this);
     registerInputFormatProcessorJSONCompactEachRow(*this);
     registerOutputFormatProcessorJSONCompactEachRow(*this);
+    registerInputFormatProcessorJSONStringsEachRow(*this);
+    registerOutputFormatProcessorJSONStringsEachRow(*this);
     registerInputFormatProcessorProtobuf(*this);
     registerOutputFormatProcessorProtobuf(*this);
     registerInputFormatProcessorTemplate(*this);
@@ -444,6 +449,7 @@ FormatFactory::FormatFactory()
     registerOutputFormatProcessorJSON(*this);
     registerOutputFormatProcessorJSONCompact(*this);
     registerOutputFormatProcessorJSONEachRowWithProgress(*this);
+    registerOutputFormatProcessorJSONStrings(*this);
     registerOutputFormatProcessorXML(*this);
     registerOutputFormatProcessorODBCDriver2(*this);
     registerOutputFormatProcessorNull(*this);
diff --git a/src/Processors/Formats/Impl/JSONStringsEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONStringsEachRowRowInputFormat.cpp
new file mode 100644
index 00000000000..fff44a204fb
--- /dev/null
+++ b/src/Processors/Formats/Impl/JSONStringsEachRowRowInputFormat.cpp
@@ -0,0 +1,245 @@
+#include <IO/ReadHelpers.h>
+#include <IO/ReadBufferFromString.h>
+
+#include <Processors/Formats/Impl/JSONStringsEachRowRowInputFormat.h>
+#include <Formats/FormatFactory.h>
+#include <DataTypes/NestedUtils.h>
+#include <DataTypes/DataTypeNullable.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int INCORRECT_DATA;
+    extern const int CANNOT_READ_ALL_DATA;
+}
+
+
+JSONStringsEachRowRowInputFormat::JSONStringsEachRowRowInputFormat(ReadBuffer & in_,
+        const Block & header_,
+        Params params_,
+        const FormatSettings & format_settings_,
+        bool with_names_)
+        : IRowInputFormat(header_, in_, std::move(params_)), format_settings(format_settings_), with_names(with_names_)
+{
+    const auto & sample = getPort().getHeader();
+    size_t num_columns = sample.columns();
+
+    data_types.resize(num_columns);
+    column_indexes_by_names.reserve(num_columns);
+
+    for (size_t i = 0; i < num_columns; ++i)
+    {
+        const auto & column_info = sample.getByPosition(i);
+
+        data_types[i] = column_info.type;
+        column_indexes_by_names.emplace(column_info.name, i);
+    }
+}
+
+void JSONStringsEachRowRowInputFormat::resetParser()
+{
+    IRowInputFormat::resetParser();
+    column_indexes_for_input_fields.clear();
+    not_seen_columns.clear();
+}
+
+void JSONStringsEachRowRowInputFormat::readPrefix()
+{
+    /// In this format, BOM at beginning of stream cannot be confused with value, so it is safe to skip it.
+    skipBOMIfExists(in);
+
+    if (with_names)
+    {
+        size_t num_columns = getPort().getHeader().columns();
+        read_columns.assign(num_columns, false);
+
+        assertChar('[', in);
+        do
+        {
+            skipWhitespaceIfAny(in);
+            String column_name;
+            readJSONString(column_name, in);
+            addInputColumn(column_name);
+            skipWhitespaceIfAny(in);
+        }
+        while (checkChar(',', in));
+        assertChar(']', in);
+        skipEndOfLine();
+
+        /// Type checking
+        assertChar('[', in);
+        for (size_t i = 0; i < column_indexes_for_input_fields.size(); ++i)
+        {
+            skipWhitespaceIfAny(in);
+            String data_type;
+            readJSONString(data_type, in);
+
+            if (column_indexes_for_input_fields[i] &&
+                data_types[*column_indexes_for_input_fields[i]]->getName() != data_type)
+            {
+                throw Exception(
+                        "Type of '" + getPort().getHeader().getByPosition(*column_indexes_for_input_fields[i]).name
+                        + "' must be " + data_types[*column_indexes_for_input_fields[i]]->getName() +
+                        ", not " + data_type,
+                        ErrorCodes::INCORRECT_DATA
+                );
+            }
+
+            if (i != column_indexes_for_input_fields.size() - 1)
+                assertChar(',', in);
+            skipWhitespaceIfAny(in);
+        }
+        assertChar(']', in);
+    }
+    else
+    {
+        size_t num_columns = getPort().getHeader().columns();
+        read_columns.assign(num_columns, true);
+        column_indexes_for_input_fields.resize(num_columns);
+
+        for (size_t i = 0; i < num_columns; ++i)
+        {
+            column_indexes_for_input_fields[i] = i;
+        }
+    }
+
+    for (size_t i = 0; i < read_columns.size(); ++i)
+    {
+        if (!read_columns[i])
+        {
+            not_seen_columns.emplace_back(i);
+        }
+    }
+}
+
+void JSONStringsEachRowRowInputFormat::addInputColumn(const String & column_name)
+{
+    names_of_columns.emplace_back(column_name);
+
+    const auto column_it = column_indexes_by_names.find(column_name);
+    if (column_it == column_indexes_by_names.end())
+    {
+        if (format_settings.skip_unknown_fields)
+        {
+            column_indexes_for_input_fields.push_back(std::nullopt);
+            return;
+        }
+
+        throw Exception(
+                "Unknown field found in JSONStringsEachRow header: '" + column_name + "' " +
+                "at position " + std::to_string(column_indexes_for_input_fields.size()) +
+                "\nSet the 'input_format_skip_unknown_fields' parameter explicitly to ignore and proceed",
+                ErrorCodes::INCORRECT_DATA
+        );
+    }
+
+    const auto column_index = column_it->second;
+
+    if (read_columns[column_index])
+        throw Exception("Duplicate field found while parsing JSONStringsEachRow header: " + column_name, ErrorCodes::INCORRECT_DATA);
+
+    read_columns[column_index] = true;
+    column_indexes_for_input_fields.emplace_back(column_index);
+}
+
+bool JSONStringsEachRowRowInputFormat::readRow(DB::MutableColumns &columns, DB::RowReadExtension &ext)
+{
+    skipEndOfLine();
+
+    if (in.eof())
+        return false;
+
+    size_t num_columns = columns.size();
+
+    read_columns.assign(num_columns, false);
+
+    assertChar('[', in);
+    for (size_t file_column = 0; file_column < column_indexes_for_input_fields.size(); ++file_column)
+    {
+        const auto & table_column = column_indexes_for_input_fields[file_column];
+        if (table_column)
+        {
+            readField(*table_column, columns);
+        }
+        else
+        {
+            skipJSONField(in, StringRef(names_of_columns[file_column]));
+        }
+
+        skipWhitespaceIfAny(in);
+        if (in.eof())
+            throw Exception("Unexpected end of stream while parsing JSONStringsEachRow format", ErrorCodes::CANNOT_READ_ALL_DATA);
+        if (file_column + 1 != column_indexes_for_input_fields.size())
+        {
+            assertChar(',', in);
+            skipWhitespaceIfAny(in);
+        }
+    }
+    assertChar(']', in);
+
+    for (const auto & name : not_seen_columns)
+        columns[name]->insertDefault();
+
+    ext.read_columns = read_columns;
+    return true;
+}
+
+void JSONStringsEachRowRowInputFormat::skipEndOfLine()
+{
+    skipWhitespaceIfAny(in);
+    if (!in.eof() && (*in.position() == ',' || *in.position() == ';'))
+        ++in.position();
+
+    skipWhitespaceIfAny(in);
+}
+
+void JSONStringsEachRowRowInputFormat::readField(size_t index, MutableColumns & columns)
+{
+    try
+    {
+        read_columns[index] = true;
+        const auto & type = data_types[index];
+
+        String str;
+        readJSONString(str, in);
+
+        ReadBufferFromString buf(str);
+
+        type->deserializeAsWholeText(*columns[index], buf, format_settings);
+    }
+    catch (Exception & e)
+    {
+        e.addMessage("(while read the value of key " +  getPort().getHeader().getByPosition(index).name + ")");
+        throw;
+    }
+}
+
+void JSONStringsEachRowRowInputFormat::syncAfterError()
+{
+    skipToUnescapedNextLineOrEOF(in);
+}
+
+void registerInputFormatProcessorJSONStringsEachRow(FormatFactory & factory)
+{
+    factory.registerInputFormatProcessor("JSONStringsEachRow", [](
+            ReadBuffer & buf,
+            const Block & sample,
+            IRowInputFormat::Params params,
+            const FormatSettings & settings)
+    {
+        return std::make_shared<JSONStringsEachRowRowInputFormat>(buf, sample, std::move(params), settings, false);
+    });
+
+    factory.registerInputFormatProcessor("JSONStringsEachRowWithNamesAndTypes", [](
+            ReadBuffer & buf,
+            const Block & sample,
+            IRowInputFormat::Params params,
+            const FormatSettings & settings)
+    {
+        return std::make_shared<JSONStringsEachRowRowInputFormat>(buf, sample, std::move(params), settings, true);
+    });
+}
+
+}
diff --git a/src/Processors/Formats/Impl/JSONStringsEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONStringsEachRowRowInputFormat.h
new file mode 100644
index 00000000000..ec0a0f7bad9
--- /dev/null
+++ b/src/Processors/Formats/Impl/JSONStringsEachRowRowInputFormat.h
@@ -0,0 +1,54 @@
+#pragma once
+
+#pragma once
+
+#include <Core/Block.h>
+#include <Processors/Formats/IRowInputFormat.h>
+#include <Formats/FormatSettings.h>
+#include <Common/HashTable/HashMap.h>
+
+namespace DB
+{
+
+class ReadBuffer;
+
+/** A stream for reading data in JSONStringsEachRow and JSONStringsEachRowWithNamesAndTypes formats
+*/
+class JSONStringsEachRowRowInputFormat : public IRowInputFormat
+{
+public:
+    JSONStringsEachRowRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSettings & format_settings_, bool with_names_);
+
+    String getName() const override { return "JSONStringsEachRowRowInputFormat"; }
+
+
+    void readPrefix() override;
+    bool readRow(MutableColumns & columns, RowReadExtension & ext) override;
+    bool allowSyncAfterError() const override { return true; }
+    void syncAfterError() override;
+    void resetParser() override;
+
+private:
+    void addInputColumn(const String & column_name);
+    void skipEndOfLine();
+    void readField(size_t index, MutableColumns & columns);
+
+    const FormatSettings format_settings;
+
+    using IndexesMap = std::unordered_map<String, size_t>;
+    IndexesMap column_indexes_by_names;
+
+    using OptionalIndexes = std::vector<std::optional<size_t>>;
+    OptionalIndexes column_indexes_for_input_fields;
+
+    DataTypes data_types;
+    std::vector<UInt8> read_columns;
+    std::vector<size_t> not_seen_columns;
+
+    /// This is for the correct exceptions in skipping unknown fields.
+    std::vector<String> names_of_columns;
+
+    bool with_names;
+};
+
+}
diff --git a/src/Processors/Formats/Impl/JSONStringsEachRowRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONStringsEachRowRowOutputFormat.cpp
new file mode 100644
index 00000000000..75007ea236e
--- /dev/null
+++ b/src/Processors/Formats/Impl/JSONStringsEachRowRowOutputFormat.cpp
@@ -0,0 +1,117 @@
+#include <IO/WriteHelpers.h>
+#include <IO/WriteBufferValidUTF8.h>
+#include <Processors/Formats/Impl/JSONStringsEachRowRowOutputFormat.h>
+#include <Formats/FormatFactory.h>
+
+
+namespace DB
+{
+
+
+JSONStringsEachRowRowOutputFormat::JSONStringsEachRowRowOutputFormat(WriteBuffer & out_,
+        const Block & header_,
+        FormatFactory::WriteCallback callback,
+        const FormatSettings & settings_,
+        bool with_names_)
+        : IRowOutputFormat(header_, out_, callback), settings(settings_), with_names(with_names_)
+{
+            const auto & sample = getPort(PortKind::Main).getHeader();
+            NamesAndTypesList columns(sample.getNamesAndTypesList());
+            fields.assign(columns.begin(), columns.end());
+}
+
+
+void JSONStringsEachRowRowOutputFormat::writeField(const IColumn & column, const IDataType & type, size_t row_num)
+{
+    WriteBufferFromOwnString buf;
+
+    type.serializeAsText(column, row_num, buf, settings);
+    writeJSONString(buf.str(), out, settings);
+}
+
+
+void JSONStringsEachRowRowOutputFormat::writeFieldDelimiter()
+{
+    writeCString(", ", out);
+}
+
+
+void JSONStringsEachRowRowOutputFormat::writeRowStartDelimiter()
+{
+    writeChar('[', out);
+}
+
+
+void JSONStringsEachRowRowOutputFormat::writeRowEndDelimiter()
+{
+    writeCString("]\n", out);
+}
+
+void JSONStringsEachRowRowOutputFormat::writeTotals(const Columns & columns, size_t row_num)
+{
+    writeChar('\n', out);
+    size_t num_columns = columns.size();
+    writeChar('[', out);
+    for (size_t i = 0; i < num_columns; ++i)
+    {
+        if (i != 0)
+            JSONStringsEachRowRowOutputFormat::writeFieldDelimiter();
+
+        JSONStringsEachRowRowOutputFormat::writeField(*columns[i], *types[i], row_num);
+    }
+    writeCString("]\n", out);
+}
+
+void JSONStringsEachRowRowOutputFormat::writePrefix()
+{
+    if (with_names)
+    {
+        writeChar('[', out);
+        for (size_t i = 0; i < fields.size(); ++i)
+        {
+            writeChar('\"', out);
+            writeString(fields[i].name, out);
+            writeChar('\"', out);
+            if (i != fields.size() - 1)
+                writeCString(", ", out);
+        }
+        writeCString("]\n[", out);
+        for (size_t i = 0; i < fields.size(); ++i)
+        {
+            writeJSONString(fields[i].type->getName(), out, settings);
+            if (i != fields.size() - 1)
+                writeCString(", ", out);
+        }
+        writeCString("]\n", out);
+    }
+}
+
+void JSONStringsEachRowRowOutputFormat::consumeTotals(DB::Chunk chunk)
+{
+    if (with_names)
+        IRowOutputFormat::consumeTotals(std::move(chunk));
+}
+
+void registerOutputFormatProcessorJSONStringsEachRow(FormatFactory & factory)
+{
+    factory.registerOutputFormatProcessor("JSONStringsEachRow", [](
+            WriteBuffer & buf,
+            const Block & sample,
+            FormatFactory::WriteCallback callback,
+            const FormatSettings & format_settings)
+    {
+        return std::make_shared<JSONStringsEachRowRowOutputFormat>(buf, sample, callback, format_settings, false);
+    });
+
+    factory.registerOutputFormatProcessor("JSONStringsEachRowWithNamesAndTypes", [](
+            WriteBuffer &buf,
+            const Block &sample,
+            FormatFactory::WriteCallback callback,
+            const FormatSettings &format_settings)
+    {
+        return std::make_shared<JSONStringsEachRowRowOutputFormat>(buf, sample, callback, format_settings, true);
+    });
+}
+
+
+}
diff --git a/src/Processors/Formats/Impl/JSONStringsEachRowRowOutputFormat.h b/src/Processors/Formats/Impl/JSONStringsEachRowRowOutputFormat.h
new file mode 100644
index 00000000000..1d43a333da1
--- /dev/null
+++ b/src/Processors/Formats/Impl/JSONStringsEachRowRowOutputFormat.h
@@ -0,0 +1,45 @@
+#pragma once
+
+#include <Core/Block.h>
+#include <IO/WriteBuffer.h>
+#include <Processors/Formats/IRowOutputFormat.h>
+#include <Formats/FormatSettings.h>
+
+
+namespace DB
+{
+
+/** The stream for outputting data in JSON format, by object per line.
+  * Does not validate UTF-8.
+  */
+class JSONStringsEachRowRowOutputFormat : public IRowOutputFormat
+{
+public:
+    JSONStringsEachRowRowOutputFormat(WriteBuffer & out_, const Block & header_, FormatFactory::WriteCallback callback, const FormatSettings & settings_, bool with_names);
+
+    String getName() const override { return "JSONStringsEachRowRowOutputFormat"; }
+
+    void writePrefix() override;
+
+    void writeBeforeTotals() override {}
+    void writeTotals(const Columns & columns, size_t row_num) override;
+    void writeAfterTotals() override {}
+
+    void writeField(const IColumn & column, const IDataType & type, size_t row_num) override;
+    void writeFieldDelimiter() override;
+    void writeRowStartDelimiter() override;
+    void writeRowEndDelimiter() override;
+
+protected:
+    void consumeTotals(Chunk) override;
+    /// No extremes.
+    void consumeExtremes(Chunk) override {}
+
+private:
+    FormatSettings settings;
+
+    NamesAndTypes fields;
+
+    bool with_names;
+};
+}
diff --git a/src/Processors/Formats/Impl/JSONStringsRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONStringsRowOutputFormat.cpp
new file mode 100644
index 00000000000..6ccb315f73f
--- /dev/null
+++ b/src/Processors/Formats/Impl/JSONStringsRowOutputFormat.cpp
@@ -0,0 +1,93 @@
+#include <Processors/Formats/Impl/JSONStringsRowOutputFormat.h>
+#include <Formats/FormatFactory.h>
+
+#include <IO/WriteHelpers.h>
+
+
+namespace DB
+{
+
+JSONStringsRowOutputFormat::JSONStringsRowOutputFormat(
+    WriteBuffer & out_, const Block & header, FormatFactory::WriteCallback callback, const FormatSettings & settings_)
+    : JSONRowOutputFormat(out_, header, callback, settings_)
+{
+}
+
+
+void JSONStringsRowOutputFormat::writeField(const IColumn & column, const IDataType & type, size_t row_num)
+{
+    WriteBufferFromOwnString buf;
+
+    type.serializeAsText(column, row_num, buf, settings);
+    writeJSONString(buf.str(), *ostr, settings);
+    ++field_number;
+}
+
+
+void JSONStringsRowOutputFormat::writeFieldDelimiter()
+{
+    writeCString(", ", *ostr);
+}
+
+void JSONStringsRowOutputFormat::writeTotalsFieldDelimiter()
+{
+    writeCString(",", *ostr);
+}
+
+
+void JSONStringsRowOutputFormat::writeRowStartDelimiter()
+{
+    writeCString("\t\t[", *ostr);
+}
+
+
+void JSONStringsRowOutputFormat::writeRowEndDelimiter()
+{
+    writeChar(']', *ostr);
+    field_number = 0;
+    ++row_count;
+}
+
+void JSONStringsRowOutputFormat::writeBeforeTotals()
+{
+    writeCString(",\n", *ostr);
+    writeChar('\n', *ostr);
+    writeCString("\t\"totals\": [", *ostr);
+}
+
+void JSONStringsRowOutputFormat::writeAfterTotals()
+{
+    writeChar(']', *ostr);
+}
+
+void JSONStringsRowOutputFormat::writeExtremesElement(const char * title, const Columns & columns, size_t row_num)
+{
+    writeCString("\t\t\"", *ostr);
+    writeCString(title, *ostr);
+    writeCString("\": [", *ostr);
+
+    size_t extremes_columns = columns.size();
+    for (size_t i = 0; i < extremes_columns; ++i)
+    {
+        if (i != 0)
+            writeTotalsFieldDelimiter();
+
+        writeField(*columns[i], *types[i], row_num);
+    }
+
+    writeChar(']', *ostr);
+}
+
+void registerOutputFormatProcessorJSONStrings(FormatFactory & factory)
+{
+    factory.registerOutputFormatProcessor("JSONStrings", [](
+        WriteBuffer & buf,
+        const Block & sample,
+        FormatFactory::WriteCallback callback,
+        const FormatSettings & format_settings)
+    {
+        return std::make_shared<JSONStringsRowOutputFormat>(buf, sample, callback, format_settings);
+    });
+}
+
+}
diff --git a/src/Processors/Formats/Impl/JSONStringsRowOutputFormat.h b/src/Processors/Formats/Impl/JSONStringsRowOutputFormat.h
new file mode 100644
index 00000000000..b221bc9ee36
--- /dev/null
+++ b/src/Processors/Formats/Impl/JSONStringsRowOutputFormat.h
@@ -0,0 +1,43 @@
+#pragma once
+
+#include <Core/Block.h>
+#include <IO/WriteBuffer.h>
+#include <IO/WriteBufferValidUTF8.h>
+#include <Processors/Formats/Impl/JSONRowOutputFormat.h>
+
+
+namespace DB
+{
+
+struct FormatSettings;
+
+/** The stream for outputting data in the JSONStrings format.
+  */
+class JSONStringsRowOutputFormat : public JSONRowOutputFormat
+{
+public:
+    JSONStringsRowOutputFormat(WriteBuffer & out_, const Block & header, FormatFactory::WriteCallback callback, const FormatSettings & settings_);
+
+    String getName() const override { return "JSONStringsRowOutputFormat"; }
+
+    void writeField(const IColumn & column, const IDataType & type, size_t row_num) override;
+    void writeFieldDelimiter() override;
+    void writeRowStartDelimiter() override;
+    void writeRowEndDelimiter() override;
+
+    void writeBeforeTotals() override;
+    void writeAfterTotals() override;
+
+protected:
+    void writeExtremesElement(const char * title, const Columns & columns, size_t row_num) override;
+
+    void writeTotalsField(const IColumn & column, const IDataType & type, size_t row_num) override
+    {
+        return writeField(column, type, row_num);
+    }
+
+    void writeTotalsFieldDelimiter() override;
+
+};
+
+}
diff --git a/src/Processors/ya.make b/src/Processors/ya.make
index 30de38fedbd..27893674859 100644
--- a/src/Processors/ya.make
+++ b/src/Processors/ya.make
@@ -31,6 +31,9 @@ SRCS(
     Formats/Impl/JSONEachRowRowOutputFormat.cpp
     Formats/Impl/JSONEachRowWithProgressRowOutputFormat.cpp
     Formats/Impl/JSONRowOutputFormat.cpp
+    Formats/Impl/JSONStringsEachRowRowInputFormat.cpp
+    Formats/Impl/JSONStringsEachRowRowOutputFormat.cpp
+    Formats/Impl/JSONStringsRowOutputFormat.cpp
     Formats/Impl/MarkdownRowOutputFormat.cpp
     Formats/Impl/MsgPackRowInputFormat.cpp
     Formats/Impl/MsgPackRowOutputFormat.cpp

From c25a99aaf58108651149930db5ef86e1313120c4 Mon Sep 17 00:00:00 2001
From: hcz <h.cz@qq.com>
Date: Tue, 1 Sep 2020 17:47:37 +0800
Subject: [PATCH 024/298] Add tests

---
 .../01446_JSONStringsEachRow.reference        | 47 ++++++++++++++
 .../0_stateless/01446_JSONStringsEachRow.sql  | 63 +++++++++++++++++++
 .../0_stateless/01447_JSONStrings.reference   | 43 +++++++++++++
 .../queries/0_stateless/01447_JSONStrings.sql |  8 +++
 4 files changed, 161 insertions(+)
 create mode 100644 tests/queries/0_stateless/01446_JSONStringsEachRow.reference
 create mode 100644 tests/queries/0_stateless/01446_JSONStringsEachRow.sql
 create mode 100644 tests/queries/0_stateless/01447_JSONStrings.reference
 create mode 100644 tests/queries/0_stateless/01447_JSONStrings.sql

diff --git a/tests/queries/0_stateless/01446_JSONStringsEachRow.reference b/tests/queries/0_stateless/01446_JSONStringsEachRow.reference
new file mode 100644
index 00000000000..0b05f050b29
--- /dev/null
+++ b/tests/queries/0_stateless/01446_JSONStringsEachRow.reference
@@ -0,0 +1,47 @@
+1
+["1", "a"]
+["2", "b"]
+["3", "c"]
+2
+["a", "1"]
+["b", "1"]
+["c", "1"]
+3
+["value", "name"]
+["UInt8", "String"]
+["1", "a"]
+["2", "b"]
+["3", "c"]
+4
+["name", "c"]
+["String", "UInt64"]
+["a", "1"]
+["b", "1"]
+["c", "1"]
+
+["", "3"]
+5
+["first", "1", "2", "0"]
+["second", "2", "0", "6"]
+6
+["first", "1", "2", "0"]
+["second", "2", "0", "6"]
+7
+["16", "[15,16,17]", "['first','second','third']"]
+8
+["first", "1", "2", "0"]
+["second", "2", "0", "6"]
+9
+["first", "1", "2", "0"]
+["second", "2", "0", "6"]
+10
+["first", "1", "16", "8"]
+["second", "2", "32", "8"]
+11
+["v1", "v2", "v3", "v4"]
+["String", "UInt8", "UInt16", "UInt8"]
+["", "2", "3", "1"]
+12
+["v1", "n.id", "n.name"]
+["UInt8", "Array(UInt8)", "Array(String)"]
+["16", "[15,16,17]", "['first','second','third']"]
diff --git a/tests/queries/0_stateless/01446_JSONStringsEachRow.sql b/tests/queries/0_stateless/01446_JSONStringsEachRow.sql
new file mode 100644
index 00000000000..f461b217fe4
--- /dev/null
+++ b/tests/queries/0_stateless/01446_JSONStringsEachRow.sql
@@ -0,0 +1,63 @@
+DROP TABLE IF EXISTS test_table;
+DROP TABLE IF EXISTS test_table_2;
+SELECT 1;
+/* Check JSONStringsEachRow Output */
+CREATE TABLE test_table (value UInt8, name String) ENGINE = MergeTree() ORDER BY value;
+INSERT INTO test_table VALUES (1, 'a'), (2, 'b'), (3, 'c');
+SELECT * FROM test_table FORMAT JSONStringsEachRow;
+SELECT 2;
+/* Check Totals */
+SELECT name, count() AS c FROM test_table GROUP BY name WITH TOTALS ORDER BY name FORMAT JSONStringsEachRow;
+SELECT 3;
+/* Check JSONStringsEachRowWithNamesAndTypes Output */
+SELECT * FROM test_table FORMAT JSONStringsEachRowWithNamesAndTypes;
+SELECT 4;
+/* Check Totals */
+SELECT name, count() AS c FROM test_table GROUP BY name WITH TOTALS ORDER BY name FORMAT JSONStringsEachRowWithNamesAndTypes;
+DROP TABLE IF EXISTS test_table;
+SELECT 5;
+/* Check JSONStringsEachRow Input */
+CREATE TABLE test_table (v1 String, v2 UInt8, v3 DEFAULT v2 * 16, v4 UInt8 DEFAULT 8) ENGINE = MergeTree() ORDER BY v2;
+INSERT INTO test_table FORMAT JSONStringsEachRow ["first", "1", "2", "NULL"] ["second", "2", "null", "6"];
+SELECT * FROM test_table FORMAT JSONStringsEachRow;
+TRUNCATE TABLE test_table;
+SELECT 6;
+/* Check input_format_null_as_default = 1 */
+SET input_format_null_as_default = 1;
+INSERT INTO test_table FORMAT JSONStringsEachRow ["first", "1", "2", "ᴺᵁᴸᴸ"] ["second", "2", "null", "6"];
+SELECT * FROM test_table FORMAT JSONStringsEachRow;
+TRUNCATE TABLE test_table;
+SELECT 7;
+/* Check Nested */
+CREATE TABLE test_table_2 (v1 UInt8, n Nested(id UInt8, name String)) ENGINE = MergeTree() ORDER BY v1;
+INSERT INTO test_table_2 FORMAT JSONStringsEachRow ["16", "[15, 16, 17]", "['first', 'second', 'third']"];
+SELECT * FROM test_table_2 FORMAT JSONStringsEachRow;
+TRUNCATE TABLE test_table_2;
+SELECT 8;
+/* Check JSONStringsEachRowWithNamesAndTypes Output */
+SET input_format_null_as_default = 0;
+INSERT INTO test_table FORMAT JSONStringsEachRowWithNamesAndTypes ["v1", "v2", "v3", "v4"]["String","UInt8","UInt16","UInt8"]["first", "1", "2", "null"]["second", "2", "null", "6"];
+SELECT * FROM test_table FORMAT JSONStringsEachRow;
+TRUNCATE TABLE test_table;
+SELECT 9;
+/* Check input_format_null_as_default = 1 */
+SET input_format_null_as_default = 1;
+INSERT INTO test_table FORMAT JSONStringsEachRowWithNamesAndTypes ["v1", "v2", "v3", "v4"]["String","UInt8","UInt16","UInt8"]["first", "1", "2", "null"] ["second", "2", "null", "6"];
+SELECT * FROM test_table FORMAT JSONStringsEachRow;
+SELECT 10;
+/* Check Header */
+TRUNCATE TABLE test_table;
+SET input_format_skip_unknown_fields = 1;
+INSERT INTO test_table FORMAT JSONStringsEachRowWithNamesAndTypes ["v1", "v2", "invalid_column"]["String", "UInt8", "UInt8"]["first", "1", "32"]["second", "2", "64"];
+SELECT * FROM test_table FORMAT JSONStringsEachRow;
+SELECT 11;
+TRUNCATE TABLE test_table;
+INSERT INTO test_table FORMAT JSONStringsEachRowWithNamesAndTypes ["v4", "v2", "v3"]["UInt8", "UInt8", "UInt16"]["1", "2", "3"]
+SELECT * FROM test_table FORMAT JSONStringsEachRowWithNamesAndTypes;
+SELECT 12;
+/* Check Nested */
+INSERT INTO test_table_2 FORMAT JSONStringsEachRowWithNamesAndTypes ["v1", "n.id", "n.name"]["UInt8", "Array(UInt8)", "Array(String)"]["16", "[15, 16, 17]", "['first', 'second', 'third']"];
+SELECT * FROM test_table_2 FORMAT JSONStringsEachRowWithNamesAndTypes;
+
+DROP TABLE IF EXISTS test_table;
+DROP TABLE IF EXISTS test_table_2;
diff --git a/tests/queries/0_stateless/01447_JSONStrings.reference b/tests/queries/0_stateless/01447_JSONStrings.reference
new file mode 100644
index 00000000000..58af593dc77
--- /dev/null
+++ b/tests/queries/0_stateless/01447_JSONStrings.reference
@@ -0,0 +1,43 @@
+{
+	"meta":
+	[
+		{
+			"name": "1",
+			"type": "UInt8"
+		},
+		{
+			"name": "'a'",
+			"type": "String"
+		},
+		{
+			"name": "[1, 2, 3]",
+			"type": "Array(UInt8)"
+		},
+		{
+			"name": "tuple(1, 'a')",
+			"type": "Tuple(UInt8, String)"
+		},
+		{
+			"name": "NULL",
+			"type": "Nullable(Nothing)"
+		},
+		{
+			"name": "nan",
+			"type": "Float64"
+		}
+	],
+
+	"data":
+	[
+		["1", "a", "[1,2,3]", "(1,'a')", "ᴺᵁᴸᴸ", "nan"]
+	],
+
+	"rows": 1,
+
+	"statistics":
+	{
+		"elapsed": 0.00068988,
+		"rows_read": 1,
+		"bytes_read": 1
+	}
+}
diff --git a/tests/queries/0_stateless/01447_JSONStrings.sql b/tests/queries/0_stateless/01447_JSONStrings.sql
new file mode 100644
index 00000000000..7d89f0f5087
--- /dev/null
+++ b/tests/queries/0_stateless/01447_JSONStrings.sql
@@ -0,0 +1,8 @@
+SELECT
+    1,
+    'a',
+    [1, 2, 3],
+    (1, 'a'),
+    null,
+    nan
+FORMAT JSONStrings;

From 6d37c9d2a68ca3f6ae39c9bc0bb99424d7fc236e Mon Sep 17 00:00:00 2001
From: hcz <h.cz@qq.com>
Date: Tue, 1 Sep 2020 17:48:03 +0800
Subject: [PATCH 025/298] Update docs about formats

---
 docs/en/interfaces/formats.md | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index 9d3965b4a9c..9c7c2dda8dc 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -26,7 +26,10 @@ The supported formats are:
 | [VerticalRaw](#verticalraw)                                     | ✗     | ✔      |
 | [JSON](#json)                                                   | ✗     | ✔      |
 | [JSONCompact](#jsoncompact)                                     | ✗     | ✔      |
+| [JSONStrings](#jsonstrings)                                     | ✗     | ✔      |
 | [JSONEachRow](#jsoneachrow)                                     | ✔     | ✔      |
+| [JSONCompactEachRow](#jsoncompacteachrow)                       | ✔     | ✔      |
+| [JSONStringsEachRow](#jsonstringseachrow)                       | ✔     | ✔      |
 | [TSKV](#tskv)                                                   | ✔     | ✔      |
 | [Pretty](#pretty)                                               | ✗     | ✔      |
 | [PrettyCompact](#prettycompact)                                 | ✗     | ✔      |
@@ -470,7 +473,7 @@ See also the [JSONEachRow](#jsoneachrow) format.
 
 ## JSONCompact {#jsoncompact}
 
-Differs from JSON only in that data rows are output in arrays, not in objects.
+Differs from JSON only in that data rows are output in arrays of any element type, not in objects.
 
 Example:
 
@@ -514,17 +517,26 @@ Example:
 This format is only appropriate for outputting a query result, but not for parsing (retrieving data to insert in a table).
 See also the `JSONEachRow` format.
 
-## JSONEachRow {#jsoneachrow}
+## JSONStrings {#jsonstrings}
 
-When using this format, ClickHouse outputs rows as separated, newline-delimited JSON objects, but the data as a whole is not valid JSON.
+Differs from JSON and JSONCompact only in that data rows are output in arrays of strings.
+
+This format is only appropriate for outputting a query result, but not for parsing (retrieving data to insert in a table).
+See also the `JSONEachRow` format.
+
+## JSONEachRow {#jsoneachrow}
+## JSONCompactEachRow {#jsoncompacteachrow}
+## JSONStringsEachRow {#jsonstringseachrow}
+
+When using these formats, ClickHouse outputs rows as separated, newline-delimited JSON values, but the data as a whole is not valid JSON.
 
 ``` json
-{"SearchPhrase":"curtain designs","count()":"1064"}
-{"SearchPhrase":"baku","count()":"1000"}
-{"SearchPhrase":"","count()":"8267016"}
+{"some_int":42,"some_str":"hello","some_tuple":[1,"a"]} // JSONEachRow
+[42,"hello",[1,"a"]] // JSONCompactEachRow
+["42","hello","(2,'a')"] // JSONStringsEachRow
 ```
 
-When inserting the data, you should provide a separate JSON object for each row.
+When inserting the data, you should provide a separate JSON value for each row.
 
 ### Inserting Data {#inserting-data}
 

From babd3beec09054d1dc4b1b8a35cf30da013f05af Mon Sep 17 00:00:00 2001
From: hcz <h.cz@qq.com>
Date: Tue, 1 Sep 2020 17:48:19 +0800
Subject: [PATCH 026/298] Fix nullable data parsing

---
 src/DataTypes/DataTypeNullable.cpp | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/DataTypes/DataTypeNullable.cpp b/src/DataTypes/DataTypeNullable.cpp
index 847047850fd..3318196b951 100644
--- a/src/DataTypes/DataTypeNullable.cpp
+++ b/src/DataTypes/DataTypeNullable.cpp
@@ -308,7 +308,10 @@ ReturnType DataTypeNullable::deserializeTextQuoted(IColumn & column, ReadBuffer
                                                    const DataTypePtr & nested_data_type)
 {
     return safeDeserialize<ReturnType>(column, *nested_data_type,
-        [&istr] { return checkStringByFirstCharacterAndAssertTheRestCaseInsensitive("NULL", istr); },
+        [&istr]
+        {
+            return checkStringByFirstCharacterAndAssertTheRestCaseInsensitive("NULL", istr);
+        },
         [&nested_data_type, &istr, &settings] (IColumn & nested) { nested_data_type->deserializeAsTextQuoted(nested, istr, settings); });
 }
 
@@ -316,7 +319,11 @@ ReturnType DataTypeNullable::deserializeTextQuoted(IColumn & column, ReadBuffer
 void DataTypeNullable::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
 {
     safeDeserialize(column, *nested_data_type,
-        [&istr] { return checkStringByFirstCharacterAndAssertTheRestCaseInsensitive("NULL", istr); },
+        [&istr]
+        {
+            return checkStringByFirstCharacterAndAssertTheRestCaseInsensitive("NULL", istr)
+                || checkStringByFirstCharacterAndAssertTheRest("ᴺᵁᴸᴸ", istr);
+        },
         [this, &istr, &settings] (IColumn & nested) { nested_data_type->deserializeAsWholeText(nested, istr, settings); });
 }
 

From f0dc5a30853ff4b40d0097f07068fbb0f49eb714 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 1 Sep 2020 13:49:53 +0300
Subject: [PATCH 027/298] First working test

---
 src/DataStreams/TTLBlockInputStream.cpp       | 28 +++++++++++++
 src/DataStreams/TTLBlockInputStream.h         |  2 +
 src/Interpreters/MutationsInterpreter.cpp     |  8 ++++
 src/Storages/MergeTree/MergeTreeData.cpp      | 10 +++++
 .../MergeTree/MergeTreeDataMergerMutator.cpp  | 41 +++++++++++++------
 .../MergeTree/MergeTreeDataWriter.cpp         |  7 +++-
 .../MergeTree/registerStorageMergeTree.cpp    |  3 ++
 src/Storages/StorageInMemoryMetadata.cpp      |  5 ++-
 src/Storages/TTLDescription.cpp               | 11 +++++
 .../01465_ttl_recompression.reference         | 10 +++++
 .../0_stateless/01465_ttl_recompression.sql   | 32 +++++++++++++++
 11 files changed, 142 insertions(+), 15 deletions(-)
 create mode 100644 tests/queries/0_stateless/01465_ttl_recompression.reference
 create mode 100644 tests/queries/0_stateless/01465_ttl_recompression.sql

diff --git a/src/DataStreams/TTLBlockInputStream.cpp b/src/DataStreams/TTLBlockInputStream.cpp
index 6d80e784c03..e1586286678 100644
--- a/src/DataStreams/TTLBlockInputStream.cpp
+++ b/src/DataStreams/TTLBlockInputStream.cpp
@@ -134,6 +134,7 @@ Block TTLBlockInputStream::readImpl()
     removeValuesWithExpiredColumnTTL(block);
 
     updateMovesTTL(block);
+    updateRecompressionTTL(block);
 
     return block;
 }
@@ -395,6 +396,33 @@ void TTLBlockInputStream::updateMovesTTL(Block & block)
         block.erase(column);
 }
 
+
+void TTLBlockInputStream::updateRecompressionTTL(Block & block)
+{
+    std::vector<String> columns_to_remove;
+    for (const auto & ttl_entry : metadata_snapshot->getRecompressionTTLs())
+    {
+        auto & new_ttl_info = new_ttl_infos.recompression_ttl[ttl_entry.result_column];
+
+        if (!block.has(ttl_entry.result_column))
+        {
+            columns_to_remove.push_back(ttl_entry.result_column);
+            ttl_entry.expression->execute(block);
+        }
+
+        const IColumn * ttl_column = block.getByName(ttl_entry.result_column).column.get();
+
+        for (size_t i = 0; i < block.rows(); ++i)
+        {
+            UInt32 cur_ttl = getTimestampByIndex(ttl_column, i);
+            new_ttl_info.update(cur_ttl);
+        }
+    }
+
+    for (const String & column : columns_to_remove)
+        block.erase(column);
+}
+
 UInt32 TTLBlockInputStream::getTimestampByIndex(const IColumn * column, size_t ind)
 {
     if (const ColumnUInt16 * column_date = typeid_cast<const ColumnUInt16 *>(column))
diff --git a/src/DataStreams/TTLBlockInputStream.h b/src/DataStreams/TTLBlockInputStream.h
index 3f37f35426c..18670021ec9 100644
--- a/src/DataStreams/TTLBlockInputStream.h
+++ b/src/DataStreams/TTLBlockInputStream.h
@@ -78,6 +78,8 @@ private:
     /// Updates TTL for moves
     void updateMovesTTL(Block & block);
 
+    void updateRecompressionTTL(Block & block);
+
     UInt32 getTimestampByIndex(const IColumn * column, size_t ind);
     bool isTTLExpired(time_t ttl) const;
 };
diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index 9d35b339d94..3a397cb9b5a 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -533,8 +533,16 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run)
             /// Special step to recalculate affected indices and TTL expressions.
             stages.emplace_back(context);
             for (const auto & column : unchanged_columns)
+            {
+                std::cerr << "ADDING UNCHANGED COLUMN TO STAGE:" << column << std::endl;
                 stages.back().column_to_updated.emplace(
                     column, std::make_shared<ASTIdentifier>(column));
+                std::cerr << "OUTPUT COLUMNS:" << stages.back().output_columns.size() << std::endl;
+                for (const auto & col : stages.back().output_columns)
+                {
+                    std::cerr << "OUTPUT COLUMN:" << col << std::endl;
+                }
+            }
         }
     }
 
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index b721cf4afbf..536d72d327a 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -3064,8 +3064,10 @@ CompressionCodecPtr MergeTreeData::getCompressionCodecForPart(size_t part_size_c
     auto metadata_snapshot = getInMemoryMetadataPtr();
 
     const auto & recompression_ttl_entries = metadata_snapshot->getRecompressionTTLs();
+    std::cerr << "RECOMPRESSION ENTRIES SIZE:" << recompression_ttl_entries.size() << std::endl;
     for (auto ttl_entry_it = recompression_ttl_entries.begin(); ttl_entry_it != recompression_ttl_entries.end(); ++ttl_entry_it)
     {
+        std::cerr << "RECOMPRESSION TTL SIZE:" << ttl_infos.recompression_ttl.size() << std::endl;
         auto ttl_info_it = ttl_infos.recompression_ttl.find(ttl_entry_it->result_column);
         /// Prefer TTL rule which went into action last.
         if (ttl_info_it != ttl_infos.recompression_ttl.end()
@@ -3078,7 +3080,15 @@ CompressionCodecPtr MergeTreeData::getCompressionCodecForPart(size_t part_size_c
     }
 
     if (max_max_ttl)
+    {
+        std::cerr << "BEST ENTRY FOUND, MAX MAX:" << max_max_ttl << std::endl;
+        std::cerr << "RECOMPRESSION IS NULLPTR:" << (best_entry_it->recompression_codec == nullptr) << std::endl;
         return CompressionCodecFactory::instance().get(best_entry_it->recompression_codec, {});
+    }
+    else
+    {
+        std::cerr << "NOT FOUND NEW RECOMPRESSION\n";
+    }
 
     return global_context.chooseCompressionCodec(
         part_size_compressed,
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index 8cece66dafb..9a77115e777 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -659,9 +659,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
     /// (which is locked in shared mode when input streams are created) and when inserting new data
     /// the order is reverse. This annoys TSan even though one lock is locked in shared mode and thus
     /// deadlock is impossible.
-    auto compression_codec = data.global_context.chooseCompressionCodec(
-        merge_entry->total_size_bytes_compressed,
-        static_cast<double> (merge_entry->total_size_bytes_compressed) / data.getTotalActiveSizeInBytes());
+    auto compression_codec = data.getCompressionCodecForPart(merge_entry->total_size_bytes_compressed, new_data_part->ttl_infos, time_of_merge);
 
     /// TODO: Should it go through IDisk interface?
     String rows_sources_file_path;
@@ -1082,15 +1080,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
     auto disk = new_data_part->volume->getDisk();
     String new_part_tmp_path = new_data_part->getFullRelativePath();
 
-    /// Note: this is done before creating input streams, because otherwise data.data_parts_mutex
-    /// (which is locked in data.getTotalActiveSizeInBytes())
-    /// (which is locked in shared mode when input streams are created) and when inserting new data
-    /// the order is reverse. This annoys TSan even though one lock is locked in shared mode and thus
-    /// deadlock is impossible.
-    auto compression_codec = context.chooseCompressionCodec(
-        source_part->getBytesOnDisk(),
-        static_cast<double>(source_part->getBytesOnDisk()) / data.getTotalActiveSizeInBytes());
-
     disk->createDirectories(new_part_tmp_path);
 
     /// Don't change granularity type while mutating subset of columns
@@ -1100,11 +1089,27 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
     bool need_remove_expired_values = false;
 
     if (in && shouldExecuteTTL(metadata_snapshot, in->getHeader().getNamesAndTypesList().getNames(), commands_for_part))
+    {
+        std::cerr << "GOING TO MATERIALIZE TTL\n";
         need_remove_expired_values = true;
+    }
+    else
+    {
+        std::cerr << "NOT GOING TO MATERIALIZE TTL\n";
+        std::cerr << "IN IS NULL:" << (in == nullptr) << std::endl;
+    }
 
     /// All columns from part are changed and may be some more that were missing before in part
     if (!isWidePart(source_part) || (interpreter && interpreter->isAffectingAllColumns()))
     {
+        std::cerr << "MUTATING ALL PART COLUMNS\n";
+        /// Note: this is done before creating input streams, because otherwise data.data_parts_mutex
+        /// (which is locked in data.getTotalActiveSizeInBytes())
+        /// (which is locked in shared mode when input streams are created) and when inserting new data
+        /// the order is reverse. This annoys TSan even though one lock is locked in shared mode and thus
+        /// deadlock is impossible.
+        auto compression_codec = data.getCompressionCodecForPart(source_part->getBytesOnDisk(), source_part->ttl_infos, time_of_mutation);
+
         auto part_indices = getIndicesForNewDataPart(metadata_snapshot->getSecondaryIndices(), for_file_renames);
         mutateAllPartColumns(
             new_data_part,
@@ -1121,6 +1126,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
     }
     else /// TODO: check that we modify only non-key columns in this case.
     {
+
+        std::cerr << "MUTATING SOME PART COLUMNS\n";
         /// We will modify only some of the columns. Other columns and key values can be copied as-is.
         auto indices_to_recalc = getIndicesToRecalculate(in, updated_header.getNamesAndTypesList(), metadata_snapshot, context);
 
@@ -1128,7 +1135,13 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
         NameToNameVector files_to_rename = collectFilesForRenames(source_part, for_file_renames, mrk_extension);
 
         if (need_remove_expired_values)
+        {
             files_to_skip.insert("ttl.txt");
+        }
+        for (const auto & name : files_to_skip)
+        {
+            std::cerr << "SKIPPING " << name << std::endl;
+        }
 
         /// Create hardlinks for unchanged files
         for (auto it = disk->iterateDirectory(source_part->getFullRelativePath()); it->isValid(); it->next())
@@ -1157,8 +1170,12 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
 
         new_data_part->checksums = source_part->checksums;
 
+        auto compression_codec = source_part->default_codec;
+
         if (in)
         {
+            std::cerr << "HEADER:" << updated_header.dumpStructure() << std::endl;
+            std::cerr << "IN HEADER:" << in->getHeader().dumpStructure() << std::endl;
             mutateSomePartColumns(
                 source_part,
                 metadata_snapshot,
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index 92bf5345d5a..5115666066a 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -13,6 +13,7 @@
 #include <Poco/File.h>
 #include <Common/typeid_cast.h>
 
+#include <Parsers/queryToString.h>
 
 namespace ProfileEvents
 {
@@ -234,8 +235,9 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
     for (const auto & ttl_entry : move_ttl_entries)
         updateTTL(ttl_entry, move_ttl_infos, move_ttl_infos.moves_ttl[ttl_entry.result_column], block, false);
 
+    time_t current_time = time(nullptr);
     NamesAndTypesList columns = metadata_snapshot->getColumns().getAllPhysical().filter(block.getNames());
-    ReservationPtr reservation = data.reserveSpacePreferringTTLRules(expected_size, move_ttl_infos, time(nullptr));
+    ReservationPtr reservation = data.reserveSpacePreferringTTLRules(expected_size, move_ttl_infos, current_time);
     VolumePtr volume = data.getStoragePolicy()->getVolume(0);
 
     auto new_data_part = data.createPart(
@@ -306,7 +308,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
 
     /// This effectively chooses minimal compression method:
     ///  either default lz4 or compression method with zero thresholds on absolute and relative part size.
-    auto compression_codec = data.global_context.chooseCompressionCodec(0, 0);
+    auto compression_codec = data.getCompressionCodecForPart(0, new_data_part->ttl_infos, current_time);
+    std::cerr << "SELECTED CODEC:" << queryToString(compression_codec->getCodecDesc()) << std::endl;
 
     const auto & index_factory = MergeTreeIndexFactory::instance();
     MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, index_factory.getMany(metadata_snapshot->getSecondaryIndices()), compression_codec);
diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp
index 4526b0d4f9b..8706c1f3b37 100644
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@@ -558,8 +558,11 @@ static StoragePtr create(const StorageFactory::Arguments & args)
             metadata.sampling_key = KeyDescription::getKeyFromAST(args.storage_def->sample_by->ptr(), metadata.columns, args.context);
 
         if (args.storage_def->ttl_table)
+        {
+            std::cerr << "Parsing table ttl in description\n";
             metadata.table_ttl = TTLTableDescription::getTTLForTableFromAST(
                 args.storage_def->ttl_table->ptr(), metadata.columns, args.context, metadata.primary_key);
+        }
 
         if (args.query.columns_list && args.query.columns_list->indices)
             for (auto & index : args.query.columns_list->indices->children)
diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp
index f611c1ec95d..f410fa34f59 100644
--- a/src/Storages/StorageInMemoryMetadata.cpp
+++ b/src/Storages/StorageInMemoryMetadata.cpp
@@ -124,7 +124,7 @@ TTLTableDescription StorageInMemoryMetadata::getTableTTLs() const
 
 bool StorageInMemoryMetadata::hasAnyTableTTL() const
 {
-    return hasAnyMoveTTL() || hasRowsTTL();
+    return hasAnyMoveTTL() || hasRowsTTL() || hasAnyRecompressionTTL();
 }
 
 TTLColumnsDescription StorageInMemoryMetadata::getColumnTTLs() const
@@ -207,6 +207,9 @@ ColumnDependencies StorageInMemoryMetadata::getColumnDependencies(const NameSet
         }
     }
 
+    for (const auto & entry : getRecompressionTTLs())
+        add_dependent_columns(entry.expression, required_ttl_columns);
+
     for (const auto & [name, entry] : getColumnTTLs())
     {
         if (add_dependent_columns(entry.expression, required_ttl_columns))
diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp
index 656baf39971..ca5ea714dd9 100644
--- a/src/Storages/TTLDescription.cpp
+++ b/src/Storages/TTLDescription.cpp
@@ -89,6 +89,7 @@ TTLDescription::TTLDescription(const TTLDescription & other)
     , aggregate_descriptions(other.aggregate_descriptions)
     , destination_type(other.destination_type)
     , destination_name(other.destination_name)
+    , recompression_codec(other.recompression_codec)
 {
     if (other.expression)
         expression = std::make_shared<ExpressionActions>(*other.expression);
@@ -125,6 +126,12 @@ TTLDescription & TTLDescription::operator=(const TTLDescription & other)
     aggregate_descriptions = other.aggregate_descriptions;
     destination_type = other.destination_type;
     destination_name = other.destination_name;
+
+    if (other.recompression_codec)
+        recompression_codec = other.recompression_codec->clone();
+    else
+        recompression_codec.reset();
+
     return * this;
 }
 
@@ -266,6 +273,7 @@ TTLDescription TTLDescription::getTTLFromAST(
         }
         else if (ttl_element->mode == TTLMode::RECOMPRESS)
         {
+            std::cerr << "GOT INTO RECOMPRESS\n";
             result.recompression_codec =
                 CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(
                     ttl_element->recompression_codec, {}, !context.getSettingsRef().allow_suspicious_codecs);
@@ -283,6 +291,7 @@ TTLTableDescription::TTLTableDescription(const TTLTableDescription & other)
  : definition_ast(other.definition_ast ? other.definition_ast->clone() : nullptr)
  , rows_ttl(other.rows_ttl)
  , move_ttl(other.move_ttl)
+ , recompression_ttl(other.recompression_ttl)
 {
 }
 
@@ -298,6 +307,7 @@ TTLTableDescription & TTLTableDescription::operator=(const TTLTableDescription &
 
     rows_ttl = other.rows_ttl;
     move_ttl = other.move_ttl;
+    recompression_ttl = other.recompression_ttl;
 
     return *this;
 }
@@ -327,6 +337,7 @@ TTLTableDescription TTLTableDescription::getTTLForTableFromAST(
         }
         else if (ttl.mode == TTLMode::RECOMPRESS)
         {
+            std::cerr << "GOT RECOMPRESSIOn TTL\n";
             result.recompression_ttl.emplace_back(std::move(ttl));
         }
         else
diff --git a/tests/queries/0_stateless/01465_ttl_recompression.reference b/tests/queries/0_stateless/01465_ttl_recompression.reference
new file mode 100644
index 00000000000..2f1a2ea40b1
--- /dev/null
+++ b/tests/queries/0_stateless/01465_ttl_recompression.reference
@@ -0,0 +1,10 @@
+3000
+1_1_1_0	LZ4
+2_2_2_0	ZSTD(17)
+3_3_3_0	LZ4HC(10)
+1_1_1_0_4	LZ4
+2_2_2_0_4	ZSTD(17)
+3_3_3_0_4	LZ4HC(10)
+1_1_1_1_4	LZ4
+2_2_2_1_4	ZSTD(12)
+3_3_3_1_4	ZSTD(12)
diff --git a/tests/queries/0_stateless/01465_ttl_recompression.sql b/tests/queries/0_stateless/01465_ttl_recompression.sql
new file mode 100644
index 00000000000..0c72000c624
--- /dev/null
+++ b/tests/queries/0_stateless/01465_ttl_recompression.sql
@@ -0,0 +1,32 @@
+DROP TABLE IF EXISTS recompression_table;
+
+CREATE TABLE recompression_table
+(
+    dt DateTime,
+    key UInt64,
+    value String
+
+) ENGINE MergeTree()
+ORDER BY tuple()
+PARTITION BY key
+TTL dt + INTERVAL 1 MONTH RECOMPRESS CODEC(ZSTD(17)), dt + INTERVAL 1 YEAR RECOMPRESS CODEC(LZ4HC(10));
+
+INSERT INTO recompression_table SELECT now(), 1, toString(number) from numbers(1000);
+
+INSERT INTO recompression_table SELECT now() - INTERVAL 2 MONTH, 2, toString(number) from numbers(1000, 1000);
+
+INSERT INTO recompression_table SELECT now() - INTERVAL 2 YEAR, 3, toString(number) from numbers(2000, 1000);
+
+SELECT COUNT() FROM recompression_table;
+
+SELECT name, default_compression_codec FROM system.parts WHERE table = 'recompression_table' and active = 1 and database = currentDatabase() ORDER BY name;
+
+ALTER TABLE recompression_table MODIFY TTL dt + INTERVAL 1 MONTH RECOMPRESS CODEC(ZSTD(12)) SETTINGS mutations_sync = 2;
+
+SELECT name, default_compression_codec FROM system.parts WHERE table = 'recompression_table' and active = 1 and database = currentDatabase() ORDER BY name;
+
+OPTIMIZE TABLE recompression_table FINAL;
+
+SELECT name, default_compression_codec FROM system.parts WHERE table = 'recompression_table' and active = 1 and database = currentDatabase() ORDER BY name;
+
+DROP TABLE IF EXISTS recompression_table;

From c37a19f7b095a8f05ab1e9b65c6181e0e1fb6605 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 1 Sep 2020 14:23:38 +0300
Subject: [PATCH 028/298] Better

---
 src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp    | 9 ---------
 src/Storages/MergeTree/MergeTreeSettings.h               | 2 ++
 .../0_stateless/01465_ttl_recompression.reference        | 2 ++
 tests/queries/0_stateless/01465_ttl_recompression.sql    | 8 ++++++++
 4 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index 9a77115e777..f46fb7a79ef 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -1126,8 +1126,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
     }
     else /// TODO: check that we modify only non-key columns in this case.
     {
-
-        std::cerr << "MUTATING SOME PART COLUMNS\n";
         /// We will modify only some of the columns. Other columns and key values can be copied as-is.
         auto indices_to_recalc = getIndicesToRecalculate(in, updated_header.getNamesAndTypesList(), metadata_snapshot, context);
 
@@ -1138,11 +1136,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
         {
             files_to_skip.insert("ttl.txt");
         }
-        for (const auto & name : files_to_skip)
-        {
-            std::cerr << "SKIPPING " << name << std::endl;
-        }
-
         /// Create hardlinks for unchanged files
         for (auto it = disk->iterateDirectory(source_part->getFullRelativePath()); it->isValid(); it->next())
         {
@@ -1174,8 +1167,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
 
         if (in)
         {
-            std::cerr << "HEADER:" << updated_header.dumpStructure() << std::endl;
-            std::cerr << "IN HEADER:" << in->getHeader().dumpStructure() << std::endl;
             mutateSomePartColumns(
                 source_part,
                 metadata_snapshot,
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 085c441aa90..6ac262ed35a 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -33,8 +33,10 @@ struct Settings;
     M(UInt64, max_bytes_to_merge_at_min_space_in_pool, 1024 * 1024, "Maximum in total size of parts to merge, when there are minimum free threads in background pool (or entries in replication queue).", 0) \
     M(UInt64, max_replicated_merges_in_queue, 16, "How many tasks of merging and mutating parts are allowed simultaneously in ReplicatedMergeTree queue.", 0) \
     M(UInt64, max_replicated_mutations_in_queue, 8, "How many tasks of mutating parts are allowed simultaneously in ReplicatedMergeTree queue.", 0) \
+    M(UInt64, max_replicated_recompressions_in_queue, 1, "How many tasks of recompressiong parts are allowed simultaneously in ReplicatedMergeTree queue.", 0) \
     M(UInt64, number_of_free_entries_in_pool_to_lower_max_size_of_merge, 8, "When there is less than specified number of free entries in pool (or replicated queue), start to lower maximum size of merge to process (or to put in queue). This is to allow small merges to process - not filling the pool with long running merges.", 0) \
     M(UInt64, number_of_free_entries_in_pool_to_execute_mutation, 10, "When there is less than specified number of free entries in pool, do not execute part mutations. This is to leave free threads for regular merges and avoid \"Too many parts\"", 0) \
+    M(UInt64, number_of_free_entries_in_pool_to_execute_ttl_recompression, 10, "When there is less than specified number of free entries in pool, do not execute part recompression according to TTL. This is to leave free threads for regular merges and avoid \"Too many parts\"", 0) \
     M(Seconds, old_parts_lifetime, 8 * 60, "How many seconds to keep obsolete parts.", 0) \
     M(Seconds, temporary_directories_lifetime, 86400, "How many seconds to keep tmp_-directories.", 0) \
     M(Seconds, lock_acquire_timeout_for_background_operations, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "For background operations like merges, mutations etc. How many seconds before failing to acquire table locks.", 0) \
diff --git a/tests/queries/0_stateless/01465_ttl_recompression.reference b/tests/queries/0_stateless/01465_ttl_recompression.reference
index 2f1a2ea40b1..c03c003d5b8 100644
--- a/tests/queries/0_stateless/01465_ttl_recompression.reference
+++ b/tests/queries/0_stateless/01465_ttl_recompression.reference
@@ -1,7 +1,9 @@
+CREATE TABLE default.recompression_table\n(\n    `dt` DateTime,\n    `key` UInt64,\n    `value` String\n)\nENGINE = MergeTree()\nPARTITION BY key\nORDER BY tuple()\nTTL dt + toIntervalMonth(1) RECOMPRESS CODEC(ZSTD(17)), dt + toIntervalYear(1) RECOMPRESS CODEC(LZ4HC(10))\nSETTINGS index_granularity = 8192
 3000
 1_1_1_0	LZ4
 2_2_2_0	ZSTD(17)
 3_3_3_0	LZ4HC(10)
+CREATE TABLE default.recompression_table\n(\n    `dt` DateTime,\n    `key` UInt64,\n    `value` String\n)\nENGINE = MergeTree()\nPARTITION BY key\nORDER BY tuple()\nTTL dt + toIntervalMonth(1) RECOMPRESS CODEC(ZSTD(12))\nSETTINGS index_granularity = 8192
 1_1_1_0_4	LZ4
 2_2_2_0_4	ZSTD(17)
 3_3_3_0_4	LZ4HC(10)
diff --git a/tests/queries/0_stateless/01465_ttl_recompression.sql b/tests/queries/0_stateless/01465_ttl_recompression.sql
index 0c72000c624..92233f2d5cb 100644
--- a/tests/queries/0_stateless/01465_ttl_recompression.sql
+++ b/tests/queries/0_stateless/01465_ttl_recompression.sql
@@ -11,6 +11,10 @@ ORDER BY tuple()
 PARTITION BY key
 TTL dt + INTERVAL 1 MONTH RECOMPRESS CODEC(ZSTD(17)), dt + INTERVAL 1 YEAR RECOMPRESS CODEC(LZ4HC(10));
 
+SHOW CREATE TABLE recompression_table;
+
+SYSTEM STOP TTL MERGES recompression_table;
+
 INSERT INTO recompression_table SELECT now(), 1, toString(number) from numbers(1000);
 
 INSERT INTO recompression_table SELECT now() - INTERVAL 2 MONTH, 2, toString(number) from numbers(1000, 1000);
@@ -23,8 +27,12 @@ SELECT name, default_compression_codec FROM system.parts WHERE table = 'recompre
 
 ALTER TABLE recompression_table MODIFY TTL dt + INTERVAL 1 MONTH RECOMPRESS CODEC(ZSTD(12)) SETTINGS mutations_sync = 2;
 
+SHOW CREATE TABLE recompression_table;
+
 SELECT name, default_compression_codec FROM system.parts WHERE table = 'recompression_table' and active = 1 and database = currentDatabase() ORDER BY name;
 
+SYSTEM START TTL MERGES recompression_table;
+
 OPTIMIZE TABLE recompression_table FINAL;
 
 SELECT name, default_compression_codec FROM system.parts WHERE table = 'recompression_table' and active = 1 and database = currentDatabase() ORDER BY name;

From 3cadc9033ae63d7faa851b1707b3c6f9ce1a36aa Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Tue, 1 Sep 2020 18:26:49 +0300
Subject: [PATCH 029/298] fsyncs for metadata files of part

---
 .../MergeTree/IMergeTreeDataPartWriter.h      |  2 +-
 .../MergeTreeDataPartWriterOnDisk.cpp         |  2 +-
 .../MergeTree/MergedBlockOutputStream.cpp     | 13 +++++++--
 .../MergeTree/MergedBlockOutputStream.h       |  3 +-
 utils/durability-test/create_sync.sql         |  1 +
 utils/durability-test/durability-test.sh      | 28 ++++++++++---------
 utils/durability-test/insert_sync.sql         |  1 +
 7 files changed, 32 insertions(+), 18 deletions(-)
 create mode 100644 utils/durability-test/create_sync.sql
 mode change 100644 => 100755 utils/durability-test/durability-test.sh
 create mode 100644 utils/durability-test/insert_sync.sql

diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
index 4d3602e732e..4a42a58a65b 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
@@ -52,7 +52,7 @@ public:
     virtual void initPrimaryIndex() {}
 
     virtual void finishDataSerialization(IMergeTreeDataPart::Checksums & checksums, bool sync) = 0;
-    virtual void finishPrimaryIndexSerialization(MergeTreeData::DataPart::Checksums & /* checksums */,  bool /* sync */) {}
+    virtual void finishPrimaryIndexSerialization(MergeTreeData::DataPart::Checksums & /* checksums */, bool /* sync */) {}
     virtual void finishSkipIndicesSerialization(MergeTreeData::DataPart::Checksums & /* checksums */, bool /* sync */) {}
 
     Columns releaseIndexColumns();
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
index dbe41144573..8295b881d87 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
@@ -332,7 +332,7 @@ void MergeTreeDataPartWriterOnDisk::finishPrimaryIndexSerialization(
         checksums.files["primary.idx"].file_size = index_stream->count();
         checksums.files["primary.idx"].file_hash = index_stream->getHash();
         if (sync)
-            index_stream->sync();
+            index_file_stream->sync();
         index_stream = nullptr;
     }
 }
diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp
index fdef5d69688..bdc6bade259 100644
--- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp
+++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp
@@ -111,7 +111,7 @@ void MergedBlockOutputStream::writeSuffixAndFinalizePart(
         part_columns = *total_columns_list;
 
     if (new_part->isStoredOnDisk())
-        finalizePartOnDisk(new_part, part_columns, checksums);
+        finalizePartOnDisk(new_part, part_columns, checksums, sync);
 
     new_part->setColumns(part_columns);
     new_part->rows_count = rows_count;
@@ -126,7 +126,8 @@ void MergedBlockOutputStream::writeSuffixAndFinalizePart(
 void MergedBlockOutputStream::finalizePartOnDisk(
     const MergeTreeData::MutableDataPartPtr & new_part,
     NamesAndTypesList & part_columns,
-    MergeTreeData::DataPart::Checksums & checksums)
+    MergeTreeData::DataPart::Checksums & checksums,
+    bool sync)
 {
     if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || isCompactPart(new_part))
     {
@@ -143,6 +144,8 @@ void MergedBlockOutputStream::finalizePartOnDisk(
         count_out_hashing.next();
         checksums.files["count.txt"].file_size = count_out_hashing.count();
         checksums.files["count.txt"].file_hash = count_out_hashing.getHash();
+        if (sync)
+            count_out->sync();
     }
 
     if (!new_part->ttl_infos.empty())
@@ -153,6 +156,8 @@ void MergedBlockOutputStream::finalizePartOnDisk(
         new_part->ttl_infos.write(out_hashing);
         checksums.files["ttl.txt"].file_size = out_hashing.count();
         checksums.files["ttl.txt"].file_hash = out_hashing.getHash();
+        if (sync)
+            out->sync();
     }
 
     removeEmptyColumnsFromPart(new_part, part_columns, checksums);
@@ -161,12 +166,16 @@ void MergedBlockOutputStream::finalizePartOnDisk(
         /// Write a file with a description of columns.
         auto out = volume->getDisk()->writeFile(part_path + "columns.txt", 4096);
         part_columns.writeText(*out);
+        if (sync)
+            out->sync();
     }
 
     {
         /// Write file with checksums.
         auto out = volume->getDisk()->writeFile(part_path + "checksums.txt", 4096);
         checksums.write(*out);
+        if (sync)
+            out->sync();
     }
 }
 
diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.h b/src/Storages/MergeTree/MergedBlockOutputStream.h
index 0b500b93f01..87ff9dd1ded 100644
--- a/src/Storages/MergeTree/MergedBlockOutputStream.h
+++ b/src/Storages/MergeTree/MergedBlockOutputStream.h
@@ -59,7 +59,8 @@ private:
     void finalizePartOnDisk(
             const MergeTreeData::MutableDataPartPtr & new_part,
             NamesAndTypesList & part_columns,
-            MergeTreeData::DataPart::Checksums & checksums);
+            MergeTreeData::DataPart::Checksums & checksums,
+            bool sync);
 
 private:
     NamesAndTypesList columns_list;
diff --git a/utils/durability-test/create_sync.sql b/utils/durability-test/create_sync.sql
new file mode 100644
index 00000000000..2cc88d2c943
--- /dev/null
+++ b/utils/durability-test/create_sync.sql
@@ -0,0 +1 @@
+CREATE TABLE test_sync (a Int, s String) ENGINE = MergeTree ORDER BY a SETTINGS fsync_after_insert = 1, min_compressed_bytes_to_fsync_after_merge = 1;
diff --git a/utils/durability-test/durability-test.sh b/utils/durability-test/durability-test.sh
old mode 100644
new mode 100755
index 1f47c900f49..c7f8936ec95
--- a/utils/durability-test/durability-test.sh
+++ b/utils/durability-test/durability-test.sh
@@ -17,12 +17,12 @@ fi
 
 function run()
 {
-    sshpass -p $PASSWORD ssh -p $SSH_PORT root@localhost "$1"
+    sshpass -p $PASSWORD ssh -p $SSH_PORT root@localhost "$1" 2>/dev/null
 }
 
 function copy()
 {
-    sshpass -p $PASSWORD scp -r -P $SSH_PORT $1 root@localhost:$2
+    sshpass -p $PASSWORD scp -r -P $SSH_PORT $1 root@localhost:$2 2>/dev/null
 }
 
 function wait_vm_for_start()
@@ -50,8 +50,8 @@ function wait_clickhouse_for_start()
 {
     echo "Waiting until ClickHouse started..."
     started=0
-    for i in {0..15}; do
-        run "clickhouse client --query 'select 1'"
+    for i in {0..30}; do
+        run "clickhouse client --query 'select 1'" > /dev/null
         if [ $? -eq 0 ]; then
             started=1
             break
@@ -70,7 +70,7 @@ echo "Downloading image"
 curl -O $URL/$IMAGE
 
 qemu-img resize $IMAGE +10G
-virt-customize -a $IMAGE --root-password password:$PASSWORD
+virt-customize -a $IMAGE --root-password password:$PASSWORD > /dev/null 2>&1
 virt-copy-in -a $IMAGE sshd_config /etc/ssh
 
 echo "Starting VM"
@@ -93,8 +93,8 @@ if [[ -z $CLICKHOUSE_CONFIG_DIR ]]; then
     CLICKHOUSE_CONFIG_DIR=/etc/clickhouse-server
 fi
 
-echo "Using ClickHouse binary: " $CLICKHOUSE_BINARY
-echo "Using ClickHouse config from: " $CLICKHOUSE_CONFIG_DIR
+echo "Using ClickHouse binary:" $CLICKHOUSE_BINARY
+echo "Using ClickHouse config from:" $CLICKHOUSE_CONFIG_DIR
 
 copy $CLICKHOUSE_BINARY /usr/bin
 copy $CLICKHOUSE_CONFIG_DIR /etc
@@ -104,23 +104,19 @@ echo "Prepared VM"
 echo "Starting ClickHouse"
 
 run "clickhouse server --config-file=/etc/clickhouse-server/config.xml > clickhouse-server.log 2>&1" &
-
 wait_clickhouse_for_start
 
-echo "Started ClickHouse"
-
 query=`cat $CREATE_QUERY`
 echo "Executing query:" $query
 run "clickhouse client --query '$query'"
 
 query=`cat $INSERT_QUERY`
 echo "Will run in a loop query: " $query
-run "clickhouse benchmark <<< '$query'" &
+run "clickhouse benchmark <<< '$query' -c 8" &
 echo "Running queries"
 
 pid=`pidof qemu-system-x86_64`
-sec=$(( (RANDOM % 3) + 25 ))
-
+sec=$(( (RANDOM % 5) + 25 ))
 ms=$(( RANDOM % 1000 ))
 
 echo "Will kill VM in $sec.$ms sec"
@@ -130,6 +126,8 @@ kill -9 $pid
 
 echo "Restarting"
 
+sleep 5s
+
 ./startup.exp > qemu.log 2>&1 &
 wait_vm_for_start
 
@@ -137,10 +135,12 @@ run "rm -r *data/system"
 run "clickhouse server --config-file=/etc/clickhouse-server/config.xml > clickhouse-server.log 2>&1" &
 wait_clickhouse_for_start
 
+pid=`pidof qemu-system-x86_64`
 result=`run "grep $TABLE_NAME clickhouse-server.log | grep 'Caught exception while loading metadata'"`
 if [[ -n $result ]]; then
     echo "FAIL. Can't attach table:"
     echo $result
+    kill -9 $pid
     exit 1
 fi
 
@@ -148,7 +148,9 @@ result=`run "grep $TABLE_NAME clickhouse-server.log | grep 'Considering to remov
 if [[ -n $result ]]; then
     echo "FAIL. Have broken parts:"
     echo $result
+    kill -9 $pid
     exit 1
 fi
 
+kill -9 $pid
 echo OK
diff --git a/utils/durability-test/insert_sync.sql b/utils/durability-test/insert_sync.sql
new file mode 100644
index 00000000000..a1ad2ff4ea5
--- /dev/null
+++ b/utils/durability-test/insert_sync.sql
@@ -0,0 +1 @@
+INSERT INTO test_sync SELECT number, toString(number) FROM numbers(10)

From ac5877e601714450a369062abbf80f84485bc6f5 Mon Sep 17 00:00:00 2001
From: hcz <h.cz@qq.com>
Date: Wed, 2 Sep 2020 00:58:39 +0800
Subject: [PATCH 030/298] Fix tests

---
 tests/queries/0_stateless/01447_JSONStrings.reference | 9 +--------
 tests/queries/0_stateless/01447_JSONStrings.sql       | 2 ++
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/tests/queries/0_stateless/01447_JSONStrings.reference b/tests/queries/0_stateless/01447_JSONStrings.reference
index 58af593dc77..1c6f073c0d0 100644
--- a/tests/queries/0_stateless/01447_JSONStrings.reference
+++ b/tests/queries/0_stateless/01447_JSONStrings.reference
@@ -32,12 +32,5 @@
 		["1", "a", "[1,2,3]", "(1,'a')", "ᴺᵁᴸᴸ", "nan"]
 	],
 
-	"rows": 1,
-
-	"statistics":
-	{
-		"elapsed": 0.00068988,
-		"rows_read": 1,
-		"bytes_read": 1
-	}
+	"rows": 1
 }
diff --git a/tests/queries/0_stateless/01447_JSONStrings.sql b/tests/queries/0_stateless/01447_JSONStrings.sql
index 7d89f0f5087..45fc4a56d7a 100644
--- a/tests/queries/0_stateless/01447_JSONStrings.sql
+++ b/tests/queries/0_stateless/01447_JSONStrings.sql
@@ -1,3 +1,5 @@
+SET output_format_write_statistics = 0;
+
 SELECT
     1,
     'a',

From 26d75f76026303b6f3769ab4ea39ff639ebe836a Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Wed, 2 Sep 2020 01:25:10 +0300
Subject: [PATCH 031/298] do fsync for WAL

---
 src/Storages/MergeTree/MergeTreeSettings.h    |  2 ++
 .../MergeTree/MergeTreeWriteAheadLog.cpp      | 32 +++++++++++++++++--
 .../MergeTree/MergeTreeWriteAheadLog.h        | 10 +++++-
 3 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 1341526c38b..edf03710974 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -43,6 +43,8 @@ struct Settings;
     M(UInt64, min_compressed_bytes_to_fsync_after_fetch, 0, "Minimal number of compressed bytes to do fsync for part after fetch (0 - disabled)", 0) \
     M(Bool, fsync_after_insert, false, "Do fsync for every inserted part. Significantly decreases performance of inserts, not recommended to use with wide parts.", 0) \
     M(Bool, fsync_part_directory, false, "Do fsync for part directory after all part operations (writes, renames, etc.).", 0) \
+    M(UInt64, write_ahead_log_bytes_to_fsync, 100ULL * 1024 * 1024, "Amount of bytes, accumulated in WAL to do fsync.", 0) \
+    M(UInt64, write_ahead_log_interval_ms_to_fsync, 100, "Interval in milliseconds after which fsync for WAL is being done.", 0) \
     \
     /** Inserts settings. */ \
     M(UInt64, parts_to_delay_insert, 150, "If table contains at least that many active parts in single partition, artificially slow down insert into table.", 0) \
diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp
index eda8579c76a..6f220fc7d5d 100644
--- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp
+++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp
@@ -4,6 +4,7 @@
 #include <Storages/MergeTree/MergedBlockOutputStream.h>
 #include <IO/ReadHelpers.h>
 #include <Poco/File.h>
+#include <sys/time.h>
 
 namespace DB
 {
@@ -16,17 +17,23 @@ namespace ErrorCodes
     extern const int CORRUPTED_DATA;
 }
 
-
 MergeTreeWriteAheadLog::MergeTreeWriteAheadLog(
-    const MergeTreeData & storage_,
+    MergeTreeData & storage_,
     const DiskPtr & disk_,
     const String & name_)
     : storage(storage_)
     , disk(disk_)
     , name(name_)
     , path(storage.getRelativeDataPath() + name_)
+    , pool(storage.global_context.getSchedulePool())
 {
     init();
+    sync_task = pool.createTask("MergeTreeWriteAheadLog::sync", [this]
+    {
+        std::lock_guard lock(write_mutex);
+        out->sync();
+        sync_scheduled = false;
+    });
 }
 
 void MergeTreeWriteAheadLog::init()
@@ -38,6 +45,7 @@ void MergeTreeWriteAheadLog::init()
     block_out = std::make_unique<NativeBlockOutputStream>(*out, 0, Block{});
     min_block_number = std::numeric_limits<Int64>::max();
     max_block_number = -1;
+    bytes_at_last_sync = 0;
 }
 
 void MergeTreeWriteAheadLog::addPart(const Block & block, const String & part_name)
@@ -53,6 +61,7 @@ void MergeTreeWriteAheadLog::addPart(const Block & block, const String & part_na
     writeStringBinary(part_name, *out);
     block_out->write(block);
     block_out->flush();
+    sync(lock);
 
     auto max_wal_bytes = storage.getSettings()->write_ahead_log_max_bytes;
     if (out->count() > max_wal_bytes)
@@ -66,6 +75,7 @@ void MergeTreeWriteAheadLog::dropPart(const String & part_name)
     writeIntBinary(static_cast<UInt8>(0), *out);
     writeIntBinary(static_cast<UInt8>(ActionType::DROP_PART), *out);
     writeStringBinary(part_name, *out);
+    sync(lock);
 }
 
 void MergeTreeWriteAheadLog::rotate(const std::lock_guard<std::mutex> &)
@@ -175,6 +185,24 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore(const Stor
     return result;
 }
 
+void MergeTreeWriteAheadLog::sync(const std::lock_guard<std::mutex> &)
+{
+    size_t bytes_to_sync = storage.getSettings()->write_ahead_log_bytes_to_fsync;
+    time_t time_to_sync = storage.getSettings()->write_ahead_log_interval_ms_to_fsync;
+    size_t current_bytes = out->count();
+
+    if (bytes_to_sync && current_bytes - bytes_at_last_sync > bytes_to_sync)
+    {
+        sync_task->schedule();
+        bytes_at_last_sync = current_bytes;
+    }
+    else if (time_to_sync && !sync_scheduled)
+    {
+        sync_task->scheduleAfter(time_to_sync);
+        sync_scheduled = true;
+    }
+}
+
 std::optional<MergeTreeWriteAheadLog::MinMaxBlockNumber>
 MergeTreeWriteAheadLog::tryParseMinMaxBlockNumber(const String & filename)
 {
diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h
index 2cc3c2b4181..43abf3c04be 100644
--- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h
+++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h
@@ -3,6 +3,7 @@
 #include <DataStreams/NativeBlockInputStream.h>
 #include <DataStreams/NativeBlockOutputStream.h>
 #include <Storages/MergeTree/IMergeTreeDataPart.h>
+#include <Core/BackgroundSchedulePool.h>
 #include <Disks/IDisk.h>
 
 namespace DB
@@ -31,7 +32,7 @@ public:
     constexpr static auto WAL_FILE_EXTENSION = ".bin";
     constexpr static auto DEFAULT_WAL_FILE_NAME = "wal.bin";
 
-    MergeTreeWriteAheadLog(const MergeTreeData & storage_, const DiskPtr & disk_,
+    MergeTreeWriteAheadLog(MergeTreeData & storage_, const DiskPtr & disk_,
         const String & name = DEFAULT_WAL_FILE_NAME);
 
     void addPart(const Block & block, const String & part_name);
@@ -44,6 +45,7 @@ public:
 private:
     void init();
     void rotate(const std::lock_guard<std::mutex> & lock);
+    void sync(const std::lock_guard<std::mutex> & lock);
 
     const MergeTreeData & storage;
     DiskPtr disk;
@@ -56,6 +58,12 @@ private:
     Int64 min_block_number = std::numeric_limits<Int64>::max();
     Int64 max_block_number = -1;
 
+    BackgroundSchedulePool & pool;
+    BackgroundSchedulePoolTaskHolder sync_task;
+
+    size_t bytes_at_last_sync = 0;
+    bool sync_scheduled = false;
+
     mutable std::mutex write_mutex;
 };
 

From a80c1adee81631f770f642ad4430a8ff44ff46af Mon Sep 17 00:00:00 2001
From: hcz <h.cz@qq.com>
Date: Wed, 2 Sep 2020 12:05:02 +0800
Subject: [PATCH 032/298] Add JSONCompactStrings formats

---
 docs/en/interfaces/formats.md                 | 288 +++++++++++-------
 src/Formats/FormatFactory.cpp                 |   6 -
 .../Impl/JSONCompactEachRowRowInputFormat.cpp |  49 ++-
 .../Impl/JSONCompactEachRowRowInputFormat.h   |  11 +-
 .../JSONCompactEachRowRowOutputFormat.cpp     |  37 ++-
 .../Impl/JSONCompactEachRowRowOutputFormat.h  |   9 +-
 .../Impl/JSONCompactRowOutputFormat.cpp       |  30 +-
 .../Formats/Impl/JSONCompactRowOutputFormat.h |  10 +-
 .../Impl/JSONEachRowRowInputFormat.cpp        |  42 ++-
 .../Formats/Impl/JSONEachRowRowInputFormat.h  |   9 +-
 .../Impl/JSONEachRowRowOutputFormat.cpp       |  32 +-
 .../Formats/Impl/JSONEachRowRowOutputFormat.h |  10 +-
 ...JSONEachRowWithProgressRowOutputFormat.cpp |  11 +-
 .../Formats/Impl/JSONRowOutputFormat.cpp      |  44 ++-
 .../Formats/Impl/JSONRowOutputFormat.h        |   9 +-
 .../Impl/JSONStringsEachRowRowInputFormat.cpp | 245 ---------------
 .../Impl/JSONStringsEachRowRowInputFormat.h   |  54 ----
 .../JSONStringsEachRowRowOutputFormat.cpp     | 117 -------
 .../Impl/JSONStringsEachRowRowOutputFormat.h  |  45 ---
 .../Impl/JSONStringsRowOutputFormat.cpp       |  93 ------
 .../Formats/Impl/JSONStringsRowOutputFormat.h |  43 ---
 .../0_stateless/01446_JSONStringsEachRow.sql  |  63 ----
 .../01446_json_strings_each_row.reference     |  22 ++
 .../01446_json_strings_each_row.sql           |  38 +++
 .../0_stateless/01447_json_strings.reference  |  43 +++
 ...JSONStrings.sql => 01447_json_strings.sql} |   0
 ...8_json_compact_strings_each_row.reference} |   0
 .../01448_json_compact_strings_each_row.sql   |  63 ++++
 ...e => 01449_json_compact_strings.reference} |   0
 .../01449_json_compact_strings.sql            |  10 +
 30 files changed, 621 insertions(+), 812 deletions(-)
 delete mode 100644 src/Processors/Formats/Impl/JSONStringsEachRowRowInputFormat.cpp
 delete mode 100644 src/Processors/Formats/Impl/JSONStringsEachRowRowInputFormat.h
 delete mode 100644 src/Processors/Formats/Impl/JSONStringsEachRowRowOutputFormat.cpp
 delete mode 100644 src/Processors/Formats/Impl/JSONStringsEachRowRowOutputFormat.h
 delete mode 100644 src/Processors/Formats/Impl/JSONStringsRowOutputFormat.cpp
 delete mode 100644 src/Processors/Formats/Impl/JSONStringsRowOutputFormat.h
 delete mode 100644 tests/queries/0_stateless/01446_JSONStringsEachRow.sql
 create mode 100644 tests/queries/0_stateless/01446_json_strings_each_row.reference
 create mode 100644 tests/queries/0_stateless/01446_json_strings_each_row.sql
 create mode 100644 tests/queries/0_stateless/01447_json_strings.reference
 rename tests/queries/0_stateless/{01447_JSONStrings.sql => 01447_json_strings.sql} (100%)
 rename tests/queries/0_stateless/{01446_JSONStringsEachRow.reference => 01448_json_compact_strings_each_row.reference} (100%)
 create mode 100644 tests/queries/0_stateless/01448_json_compact_strings_each_row.sql
 rename tests/queries/0_stateless/{01447_JSONStrings.reference => 01449_json_compact_strings.reference} (100%)
 create mode 100644 tests/queries/0_stateless/01449_json_compact_strings.sql

diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index 9c7c2dda8dc..bfe5b6218e4 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -10,45 +10,51 @@ results of a `SELECT`, and to perform `INSERT`s into a file-backed table.
 
 The supported formats are:
 
-| Format                                                          | Input | Output |
-|-----------------------------------------------------------------|-------|--------|
-| [TabSeparated](#tabseparated)                                   | ✔     | ✔      |
-| [TabSeparatedRaw](#tabseparatedraw)                             | ✔     | ✔      |
-| [TabSeparatedWithNames](#tabseparatedwithnames)                 | ✔     | ✔      |
-| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔     | ✔      |
-| [Template](#format-template)                                    | ✔     | ✔      |
-| [TemplateIgnoreSpaces](#templateignorespaces)                   | ✔     | ✗      |
-| [CSV](#csv)                                                     | ✔     | ✔      |
-| [CSVWithNames](#csvwithnames)                                   | ✔     | ✔      |
-| [CustomSeparated](#format-customseparated)                      | ✔     | ✔      |
-| [Values](#data-format-values)                                   | ✔     | ✔      |
-| [Vertical](#vertical)                                           | ✗     | ✔      |
-| [VerticalRaw](#verticalraw)                                     | ✗     | ✔      |
-| [JSON](#json)                                                   | ✗     | ✔      |
-| [JSONCompact](#jsoncompact)                                     | ✗     | ✔      |
-| [JSONStrings](#jsonstrings)                                     | ✗     | ✔      |
-| [JSONEachRow](#jsoneachrow)                                     | ✔     | ✔      |
-| [JSONCompactEachRow](#jsoncompacteachrow)                       | ✔     | ✔      |
-| [JSONStringsEachRow](#jsonstringseachrow)                       | ✔     | ✔      |
-| [TSKV](#tskv)                                                   | ✔     | ✔      |
-| [Pretty](#pretty)                                               | ✗     | ✔      |
-| [PrettyCompact](#prettycompact)                                 | ✗     | ✔      |
-| [PrettyCompactMonoBlock](#prettycompactmonoblock)               | ✗     | ✔      |
-| [PrettyNoEscapes](#prettynoescapes)                             | ✗     | ✔      |
-| [PrettySpace](#prettyspace)                                     | ✗     | ✔      |
-| [Protobuf](#protobuf)                                           | ✔     | ✔      |
-| [Avro](#data-format-avro)                                       | ✔     | ✔      |
-| [AvroConfluent](#data-format-avro-confluent)                    | ✔     | ✗      |
-| [Parquet](#data-format-parquet)                                 | ✔     | ✔      |
-| [Arrow](#data-format-arrow)                                     | ✔     | ✔      |
-| [ArrowStream](#data-format-arrow-stream)                        | ✔     | ✔      |
-| [ORC](#data-format-orc)                                         | ✔     | ✗      |
-| [RowBinary](#rowbinary)                                         | ✔     | ✔      |
-| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes)       | ✔     | ✔      |
-| [Native](#native)                                               | ✔     | ✔      |
-| [Null](#null)                                                   | ✗     | ✔      |
-| [XML](#xml)                                                     | ✗     | ✔      |
-| [CapnProto](#capnproto)                                         | ✔     | ✗      |
+| Format                                                                                  | Input | Output |
+|-----------------------------------------------------------------------------------------|-------|--------|
+| [TabSeparated](#tabseparated)                                                           | ✔     | ✔      |
+| [TabSeparatedRaw](#tabseparatedraw)                                                     | ✔     | ✔      |
+| [TabSeparatedWithNames](#tabseparatedwithnames)                                         | ✔     | ✔      |
+| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes)                         | ✔     | ✔      |
+| [Template](#format-template)                                                            | ✔     | ✔      |
+| [TemplateIgnoreSpaces](#templateignorespaces)                                           | ✔     | ✗      |
+| [CSV](#csv)                                                                             | ✔     | ✔      |
+| [CSVWithNames](#csvwithnames)                                                           | ✔     | ✔      |
+| [CustomSeparated](#format-customseparated)                                              | ✔     | ✔      |
+| [Values](#data-format-values)                                                           | ✔     | ✔      |
+| [Vertical](#vertical)                                                                   | ✗     | ✔      |
+| [VerticalRaw](#verticalraw)                                                             | ✗     | ✔      |
+| [JSON](#json)                                                                           | ✗     | ✔      |
+| [JSONString](#jsonstring)                                                               | ✗     | ✔      |
+| [JSONCompact](#jsoncompact)                                                             | ✗     | ✔      |
+| [JSONCompactString](#jsoncompactstring)                                                 | ✗     | ✔      |
+| [JSONEachRow](#jsoneachrow)                                                             | ✔     | ✔      |
+| [JSONEachRowWithProgress](#jsoneachrowwithprogress)                                     | ✗     | ✔      |
+| [JSONStringEachRow](#jsonstringeachrow)                                                 | ✔     | ✔      |
+| [JSONStringEachRowWithProgress](#jsonstringeachrowwithprogress)                         | ✗     | ✔      |
+| [JSONCompactEachRow](#jsoncompacteachrow)                                               | ✔     | ✔      |
+| [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes)             | ✔     | ✔      |
+| [JSONCompactStringEachRow](#jsoncompactstringeachrow)                                   | ✔     | ✔      |
+| [JSONCompactStringEachRowWithNamesAndTypes](#jsoncompactstringeachrowwithnamesandtypes) | ✔     | ✔      |
+| [TSKV](#tskv)                                                                           | ✔     | ✔      |
+| [Pretty](#pretty)                                                                       | ✗     | ✔      |
+| [PrettyCompact](#prettycompact)                                                         | ✗     | ✔      |
+| [PrettyCompactMonoBlock](#prettycompactmonoblock)                                       | ✗     | ✔      |
+| [PrettyNoEscapes](#prettynoescapes)                                                     | ✗     | ✔      |
+| [PrettySpace](#prettyspace)                                                             | ✗     | ✔      |
+| [Protobuf](#protobuf)                                                                   | ✔     | ✔      |
+| [Avro](#data-format-avro)                                                               | ✔     | ✔      |
+| [AvroConfluent](#data-format-avro-confluent)                                            | ✔     | ✗      |
+| [Parquet](#data-format-parquet)                                                         | ✔     | ✔      |
+| [Arrow](#data-format-arrow)                                                             | ✔     | ✔      |
+| [ArrowStream](#data-format-arrow-stream)                                                | ✔     | ✔      |
+| [ORC](#data-format-orc)                                                                 | ✔     | ✗      |
+| [RowBinary](#rowbinary)                                                                 | ✔     | ✔      |
+| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes)                               | ✔     | ✔      |
+| [Native](#native)                                                                       | ✔     | ✔      |
+| [Null](#null)                                                                           | ✗     | ✔      |
+| [XML](#xml)                                                                             | ✗     | ✔      |
+| [CapnProto](#capnproto)                                                                 | ✔     | ✗      |
 
 You can control some format processing parameters with the ClickHouse settings. For more information read the [Settings](../operations/settings/settings.md) section.
 
@@ -395,62 +401,41 @@ SELECT SearchPhrase, count() AS c FROM test.hits GROUP BY SearchPhrase WITH TOTA
         "meta":
         [
                 {
-                        "name": "SearchPhrase",
+                        "name": "'hello'",
                         "type": "String"
                 },
                 {
-                        "name": "c",
+                        "name": "multiply(42, number)",
                         "type": "UInt64"
+                },
+                {
+                        "name": "range(5)",
+                        "type": "Array(UInt8)"
                 }
         ],
 
         "data":
         [
                 {
-                        "SearchPhrase": "",
-                        "c": "8267016"
+                        "'hello'": "hello",
+                        "multiply(42, number)": "0",
+                        "range(5)": [0,1,2,3,4]
                 },
                 {
-                        "SearchPhrase": "bathroom interior design",
-                        "c": "2166"
+                        "'hello'": "hello",
+                        "multiply(42, number)": "42",
+                        "range(5)": [0,1,2,3,4]
                 },
                 {
-                        "SearchPhrase": "yandex",
-                        "c": "1655"
-                },
-                {
-                        "SearchPhrase": "spring 2014 fashion",
-                        "c": "1549"
-                },
-                {
-                        "SearchPhrase": "freeform photos",
-                        "c": "1480"
+                        "'hello'": "hello",
+                        "multiply(42, number)": "84",
+                        "range(5)": [0,1,2,3,4]
                 }
         ],
 
-        "totals":
-        {
-                "SearchPhrase": "",
-                "c": "8873898"
-        },
+        "rows": 3,
 
-        "extremes":
-        {
-                "min":
-                {
-                        "SearchPhrase": "",
-                        "c": "1480"
-                },
-                "max":
-                {
-                        "SearchPhrase": "",
-                        "c": "8267016"
-                }
-        },
-
-        "rows": 5,
-
-        "rows_before_limit_at_least": 141137
+        "rows_before_limit_at_least": 3
 }
 ```
 
@@ -471,73 +456,166 @@ ClickHouse supports [NULL](../sql-reference/syntax.md), which is displayed as `n
 
 See also the [JSONEachRow](#jsoneachrow) format.
 
-## JSONCompact {#jsoncompact}
+## JSONString {#jsonstring}
 
-Differs from JSON only in that data rows are output in arrays of any element type, not in objects.
+Differs from JSON only in that data fields are output in strings, not in typed json values.
 
 Example:
 
-``` json
+```json
 {
         "meta":
         [
                 {
-                        "name": "SearchPhrase",
+                        "name": "'hello'",
                         "type": "String"
                 },
                 {
-                        "name": "c",
+                        "name": "multiply(42, number)",
                         "type": "UInt64"
+                },
+                {
+                        "name": "range(5)",
+                        "type": "Array(UInt8)"
                 }
         ],
 
         "data":
         [
-                ["", "8267016"],
-                ["bathroom interior design", "2166"],
-                ["yandex", "1655"],
-                ["fashion trends spring 2014", "1549"],
-                ["freeform photo", "1480"]
+                {
+                        "'hello'": "hello",
+                        "multiply(42, number)": "0",
+                        "range(5)": "[0,1,2,3,4]"
+                },
+                {
+                        "'hello'": "hello",
+                        "multiply(42, number)": "42",
+                        "range(5)": "[0,1,2,3,4]"
+                },
+                {
+                        "'hello'": "hello",
+                        "multiply(42, number)": "84",
+                        "range(5)": "[0,1,2,3,4]"
+                }
         ],
 
-        "totals": ["","8873898"],
+        "rows": 3,
 
-        "extremes":
-        {
-                "min": ["","1480"],
-                "max": ["","8267016"]
-        },
-
-        "rows": 5,
-
-        "rows_before_limit_at_least": 141137
+        "rows_before_limit_at_least": 3
 }
 ```
 
-This format is only appropriate for outputting a query result, but not for parsing (retrieving data to insert in a table).
-See also the `JSONEachRow` format.
+## JSONCompact {#jsoncompact}
+## JSONCompactString {#jsoncompactstring}
 
-## JSONStrings {#jsonstrings}
+Differs from JSON only in that data rows are output in arrays, not in objects.
 
-Differs from JSON and JSONCompact only in that data rows are output in arrays of strings.
+Example:
 
-This format is only appropriate for outputting a query result, but not for parsing (retrieving data to insert in a table).
-See also the `JSONEachRow` format.
+``` json
+// JSONCompact
+{
+        "meta":
+        [
+                {
+                        "name": "'hello'",
+                        "type": "String"
+                },
+                {
+                        "name": "multiply(42, number)",
+                        "type": "UInt64"
+                },
+                {
+                        "name": "range(5)",
+                        "type": "Array(UInt8)"
+                }
+        ],
+
+        "data":
+        [
+                ["hello", "0", [0,1,2,3,4]],
+                ["hello", "42", [0,1,2,3,4]],
+                ["hello", "84", [0,1,2,3,4]]
+        ],
+
+        "rows": 3,
+
+        "rows_before_limit_at_least": 3
+}
+```
+
+```json
+// JSONCompactString
+{
+        "meta":
+        [
+                {
+                        "name": "'hello'",
+                        "type": "String"
+                },
+                {
+                        "name": "multiply(42, number)",
+                        "type": "UInt64"
+                },
+                {
+                        "name": "range(5)",
+                        "type": "Array(UInt8)"
+                }
+        ],
+
+        "data":
+        [
+                ["hello", "0", "[0,1,2,3,4]"],
+                ["hello", "42", "[0,1,2,3,4]"],
+                ["hello", "84", "[0,1,2,3,4]"]
+        ],
+
+        "rows": 3,
+
+        "rows_before_limit_at_least": 3
+}
+```
 
 ## JSONEachRow {#jsoneachrow}
+## JSONStringEachRow {#jsonstringeachrow}
 ## JSONCompactEachRow {#jsoncompacteachrow}
-## JSONStringsEachRow {#jsonstringseachrow}
+## JSONCompactStringEachRow {#jsoncompactstringeachrow}
 
 When using these formats, ClickHouse outputs rows as separated, newline-delimited JSON values, but the data as a whole is not valid JSON.
 
 ``` json
 {"some_int":42,"some_str":"hello","some_tuple":[1,"a"]} // JSONEachRow
 [42,"hello",[1,"a"]] // JSONCompactEachRow
-["42","hello","(2,'a')"] // JSONStringsEachRow
+["42","hello","(2,'a')"] // JSONCompactStringsEachRow
 ```
 
 When inserting the data, you should provide a separate JSON value for each row.
 
+## JSONEachRowWithProgress {#jsoneachrowwithprogress}
+## JSONStringEachRowWithProgress {#jsonstringeachrowwithprogress}
+
+Differs from JSONEachRow/JSONStringEachRow in that ClickHouse will also yield progress information as JSON objects.
+
+```json
+{"row":{"'hello'":"hello","multiply(42, number)":"0","range(5)":[0,1,2,3,4]}}
+{"row":{"'hello'":"hello","multiply(42, number)":"42","range(5)":[0,1,2,3,4]}}
+{"row":{"'hello'":"hello","multiply(42, number)":"84","range(5)":[0,1,2,3,4]}}
+{"progress":{"read_rows":"3","read_bytes":"24","written_rows":"0","written_bytes":"0","total_rows_to_read":"3"}}
+```
+
+## JSONCompactEachRowWithNamesAndTypes {#jsoncompacteachrowwithnamesandtypes}
+## JSONCompactStringEachRowWithNamesAndTypes {#jsoncompactstringeachrowwithnamesandtypes}
+
+Differs from JSONCompactEachRow/JSONCompactStringEachRow in that the column names and types are written as the first two rows.
+
+```json
+["'hello'", "multiply(42, number)", "range(5)"]
+["String", "UInt64", "Array(UInt8)"]
+["hello", "0", [0,1,2,3,4]]
+["hello", "42", [0,1,2,3,4]]
+["hello", "84", [0,1,2,3,4]]
+```
+
 ### Inserting Data {#inserting-data}
 
 ``` sql
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index cb378fbea96..871098e00c0 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -352,8 +352,6 @@ void registerInputFormatProcessorJSONEachRow(FormatFactory & factory);
 void registerOutputFormatProcessorJSONEachRow(FormatFactory & factory);
 void registerInputFormatProcessorJSONCompactEachRow(FormatFactory & factory);
 void registerOutputFormatProcessorJSONCompactEachRow(FormatFactory & factory);
-void registerInputFormatProcessorJSONStringsEachRow(FormatFactory & factory);
-void registerOutputFormatProcessorJSONStringsEachRow(FormatFactory & factory);
 void registerInputFormatProcessorProtobuf(FormatFactory & factory);
 void registerOutputFormatProcessorProtobuf(FormatFactory & factory);
 void registerInputFormatProcessorTemplate(FormatFactory & factory);
@@ -380,7 +378,6 @@ void registerOutputFormatProcessorVertical(FormatFactory & factory);
 void registerOutputFormatProcessorJSON(FormatFactory & factory);
 void registerOutputFormatProcessorJSONCompact(FormatFactory & factory);
 void registerOutputFormatProcessorJSONEachRowWithProgress(FormatFactory & factory);
-void registerOutputFormatProcessorJSONStrings(FormatFactory & factory);
 void registerOutputFormatProcessorXML(FormatFactory & factory);
 void registerOutputFormatProcessorODBCDriver2(FormatFactory & factory);
 void registerOutputFormatProcessorNull(FormatFactory & factory);
@@ -421,8 +418,6 @@ FormatFactory::FormatFactory()
     registerOutputFormatProcessorJSONEachRow(*this);
     registerInputFormatProcessorJSONCompactEachRow(*this);
     registerOutputFormatProcessorJSONCompactEachRow(*this);
-    registerInputFormatProcessorJSONStringsEachRow(*this);
-    registerOutputFormatProcessorJSONStringsEachRow(*this);
     registerInputFormatProcessorProtobuf(*this);
     registerOutputFormatProcessorProtobuf(*this);
     registerInputFormatProcessorTemplate(*this);
@@ -449,7 +444,6 @@ FormatFactory::FormatFactory()
     registerOutputFormatProcessorJSON(*this);
     registerOutputFormatProcessorJSONCompact(*this);
     registerOutputFormatProcessorJSONEachRowWithProgress(*this);
-    registerOutputFormatProcessorJSONStrings(*this);
     registerOutputFormatProcessorXML(*this);
     registerOutputFormatProcessorODBCDriver2(*this);
     registerOutputFormatProcessorNull(*this);
diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
index 82e3cb795bf..eb697ce5318 100644
--- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
@@ -1,4 +1,5 @@
 #include <IO/ReadHelpers.h>
+#include <IO/ReadBufferFromString.h>
 
 #include <Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h>
 #include <Formats/FormatFactory.h>
@@ -19,8 +20,9 @@ JSONCompactEachRowRowInputFormat::JSONCompactEachRowRowInputFormat(ReadBuffer &
         const Block & header_,
         Params params_,
         const FormatSettings & format_settings_,
-        bool with_names_)
-        : IRowInputFormat(header_, in_, std::move(params_)), format_settings(format_settings_), with_names(with_names_)
+        bool with_names_,
+        bool yield_strings_)
+        : IRowInputFormat(header_, in_, std::move(params_)), format_settings(format_settings_), with_names(with_names_), yield_strings(yield_strings_)
 {
     const auto & sample = getPort().getHeader();
     size_t num_columns = sample.columns();
@@ -200,10 +202,25 @@ void JSONCompactEachRowRowInputFormat::readField(size_t index, MutableColumns &
     {
         read_columns[index] = true;
         const auto & type = data_types[index];
-        if (format_settings.null_as_default && !type->isNullable())
-            read_columns[index] = DataTypeNullable::deserializeTextJSON(*columns[index], in, format_settings, type);
+
+        if (yield_strings)
+        {
+            // notice: null_as_default on "null" strings is not supported
+
+            String str;
+            readJSONString(str, in);
+
+            ReadBufferFromString buf(str);
+
+            type->deserializeAsWholeText(*columns[index], buf, format_settings);
+        }
         else
-            type->deserializeAsTextJSON(*columns[index], in, format_settings);
+        {
+            if (format_settings.null_as_default && !type->isNullable())
+                read_columns[index] = DataTypeNullable::deserializeTextJSON(*columns[index], in, format_settings, type);
+            else
+                type->deserializeAsTextJSON(*columns[index], in, format_settings);
+        }
     }
     catch (Exception & e)
     {
@@ -225,7 +242,7 @@ void registerInputFormatProcessorJSONCompactEachRow(FormatFactory & factory)
             IRowInputFormat::Params params,
             const FormatSettings & settings)
     {
-        return std::make_shared<JSONCompactEachRowRowInputFormat>(buf, sample, std::move(params), settings, false);
+        return std::make_shared<JSONCompactEachRowRowInputFormat>(buf, sample, std::move(params), settings, false, false);
     });
 
     factory.registerInputFormatProcessor("JSONCompactEachRowWithNamesAndTypes", [](
@@ -234,7 +251,25 @@ void registerInputFormatProcessorJSONCompactEachRow(FormatFactory & factory)
             IRowInputFormat::Params params,
             const FormatSettings & settings)
     {
-        return std::make_shared<JSONCompactEachRowRowInputFormat>(buf, sample, std::move(params), settings, true);
+        return std::make_shared<JSONCompactEachRowRowInputFormat>(buf, sample, std::move(params), settings, true, false);
+    });
+
+    factory.registerInputFormatProcessor("JSONCompactStringsEachRow", [](
+            ReadBuffer & buf,
+            const Block & sample,
+            IRowInputFormat::Params params,
+            const FormatSettings & settings)
+    {
+        return std::make_shared<JSONCompactEachRowRowInputFormat>(buf, sample, std::move(params), settings, false, true);
+    });
+
+    factory.registerInputFormatProcessor("JSONCompactStringsEachRowWithNamesAndTypes", [](
+            ReadBuffer & buf,
+            const Block & sample,
+            IRowInputFormat::Params params,
+            const FormatSettings & settings)
+    {
+        return std::make_shared<JSONCompactEachRowRowInputFormat>(buf, sample, std::move(params), settings, true, true);
     });
 }
 
diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h
index 5c864ebc751..593f297108c 100644
--- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h
+++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h
@@ -12,12 +12,18 @@ namespace DB
 
 class ReadBuffer;
 
-/** A stream for reading data in JSONCompactEachRow and JSONCompactEachRowWithNamesAndTypes formats
+/** A stream for reading data in JSONCompactEachRow- formats
 */
 class JSONCompactEachRowRowInputFormat : public IRowInputFormat
 {
 public:
-    JSONCompactEachRowRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSettings & format_settings_, bool with_names_);
+    JSONCompactEachRowRowInputFormat(
+        ReadBuffer & in_,
+        const Block & header_,
+        Params params_,
+        const FormatSettings & format_settings_,
+        bool with_names_,
+        bool yield_strings_);
 
     String getName() const override { return "JSONCompactEachRowRowInputFormat"; }
 
@@ -49,6 +55,7 @@ private:
     std::vector<String> names_of_columns;
 
     bool with_names;
+    bool yield_strings;
 };
 
 }
diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp
index e155dcb4247..ab8fd164c3c 100644
--- a/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp
@@ -12,8 +12,9 @@ JSONCompactEachRowRowOutputFormat::JSONCompactEachRowRowOutputFormat(WriteBuffer
         const Block & header_,
         FormatFactory::WriteCallback callback,
         const FormatSettings & settings_,
-        bool with_names_)
-        : IRowOutputFormat(header_, out_, callback), settings(settings_), with_names(with_names_)
+        bool with_names_,
+        bool yield_strings_)
+        : IRowOutputFormat(header_, out_, callback), settings(settings_), with_names(with_names_), yield_strings(yield_strings_)
 {
             const auto & sample = getPort(PortKind::Main).getHeader();
             NamesAndTypesList columns(sample.getNamesAndTypesList());
@@ -23,7 +24,15 @@ JSONCompactEachRowRowOutputFormat::JSONCompactEachRowRowOutputFormat(WriteBuffer
 
 void JSONCompactEachRowRowOutputFormat::writeField(const IColumn & column, const IDataType & type, size_t row_num)
 {
-    type.serializeAsTextJSON(column, row_num, out, settings);
+    if (yield_strings)
+    {
+        WriteBufferFromOwnString buf;
+
+        type.serializeAsText(column, row_num, buf, settings);
+        writeJSONString(buf.str(), out, settings);
+    }
+    else
+        type.serializeAsTextJSON(column, row_num, out, settings);
 }
 
 
@@ -97,7 +106,7 @@ void registerOutputFormatProcessorJSONCompactEachRow(FormatFactory & factory)
             FormatFactory::WriteCallback callback,
             const FormatSettings & format_settings)
     {
-        return std::make_shared<JSONCompactEachRowRowOutputFormat>(buf, sample, callback, format_settings, false);
+        return std::make_shared<JSONCompactEachRowRowOutputFormat>(buf, sample, callback, format_settings, false, false);
     });
 
     factory.registerOutputFormatProcessor("JSONCompactEachRowWithNamesAndTypes", [](
@@ -106,7 +115,25 @@ void registerOutputFormatProcessorJSONCompactEachRow(FormatFactory & factory)
             FormatFactory::WriteCallback callback,
             const FormatSettings &format_settings)
     {
-        return std::make_shared<JSONCompactEachRowRowOutputFormat>(buf, sample, callback, format_settings, true);
+        return std::make_shared<JSONCompactEachRowRowOutputFormat>(buf, sample, callback, format_settings, true, false);
+    });
+
+    factory.registerOutputFormatProcessor("JSONCompactStringsEachRow", [](
+            WriteBuffer & buf,
+            const Block & sample,
+            FormatFactory::WriteCallback callback,
+            const FormatSettings & format_settings)
+    {
+        return std::make_shared<JSONCompactEachRowRowOutputFormat>(buf, sample, callback, format_settings, false, true);
+    });
+
+    factory.registerOutputFormatProcessor("JSONCompactStringsEachRowWithNamesAndTypes", [](
+            WriteBuffer &buf,
+            const Block &sample,
+            FormatFactory::WriteCallback callback,
+            const FormatSettings &format_settings)
+    {
+        return std::make_shared<JSONCompactEachRowRowOutputFormat>(buf, sample, callback, format_settings, true, true);
     });
 }
 
diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.h b/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.h
index a7857a82d2d..56936783e78 100644
--- a/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.h
+++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.h
@@ -15,7 +15,13 @@ namespace DB
 class JSONCompactEachRowRowOutputFormat : public IRowOutputFormat
 {
 public:
-    JSONCompactEachRowRowOutputFormat(WriteBuffer & out_, const Block & header_, FormatFactory::WriteCallback callback, const FormatSettings & settings_, bool with_names);
+    JSONCompactEachRowRowOutputFormat(
+        WriteBuffer & out_,
+        const Block & header_,
+        FormatFactory::WriteCallback callback,
+        const FormatSettings & settings_,
+        bool with_names_,
+        bool yield_strings_);
 
     String getName() const override { return "JSONCompactEachRowRowOutputFormat"; }
 
@@ -41,5 +47,6 @@ private:
     NamesAndTypes fields;
 
     bool with_names;
+    bool yield_strings;
 };
 }
diff --git a/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.cpp
index 7e56a4643da..c36942cff09 100644
--- a/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.cpp
@@ -8,15 +8,28 @@ namespace DB
 {
 
 JSONCompactRowOutputFormat::JSONCompactRowOutputFormat(
-    WriteBuffer & out_, const Block & header, FormatFactory::WriteCallback callback, const FormatSettings & settings_)
-    : JSONRowOutputFormat(out_, header, callback, settings_)
+    WriteBuffer & out_,
+    const Block & header,
+    FormatFactory::WriteCallback callback,
+    const FormatSettings & settings_,
+    bool yield_strings_)
+    : JSONRowOutputFormat(out_, header, callback, settings_, yield_strings_)
 {
 }
 
 
 void JSONCompactRowOutputFormat::writeField(const IColumn & column, const IDataType & type, size_t row_num)
 {
-    type.serializeAsTextJSON(column, row_num, *ostr, settings);
+    if (yield_strings)
+    {
+        WriteBufferFromOwnString buf;
+
+        type.serializeAsText(column, row_num, buf, settings);
+        writeJSONString(buf.str(), *ostr, settings);
+    }
+    else
+        type.serializeAsTextJSON(column, row_num, *ostr, settings);
+
     ++field_number;
 }
 
@@ -83,7 +96,16 @@ void registerOutputFormatProcessorJSONCompact(FormatFactory & factory)
         FormatFactory::WriteCallback callback,
         const FormatSettings & format_settings)
     {
-        return std::make_shared<JSONCompactRowOutputFormat>(buf, sample, callback, format_settings);
+        return std::make_shared<JSONCompactRowOutputFormat>(buf, sample, callback, format_settings, false);
+    });
+
+    factory.registerOutputFormatProcessor("JSONCompactStrings", [](
+        WriteBuffer & buf,
+        const Block & sample,
+        FormatFactory::WriteCallback callback,
+        const FormatSettings & format_settings)
+    {
+        return std::make_shared<JSONCompactRowOutputFormat>(buf, sample, callback, format_settings, true);
     });
 }
 
diff --git a/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.h b/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.h
index f4002f74287..6585016c44f 100644
--- a/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.h
+++ b/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.h
@@ -11,12 +11,17 @@ namespace DB
 
 struct FormatSettings;
 
-/** The stream for outputting data in the JSONCompact format.
+/** The stream for outputting data in the JSONCompact- formats.
   */
 class JSONCompactRowOutputFormat : public JSONRowOutputFormat
 {
 public:
-    JSONCompactRowOutputFormat(WriteBuffer & out_, const Block & header, FormatFactory::WriteCallback callback, const FormatSettings & settings_);
+    JSONCompactRowOutputFormat(
+        WriteBuffer & out_,
+        const Block & header,
+        FormatFactory::WriteCallback callback,
+        const FormatSettings & settings_,
+        bool yield_strings_);
 
     String getName() const override { return "JSONCompactRowOutputFormat"; }
 
@@ -37,7 +42,6 @@ protected:
     }
 
     void writeTotalsFieldDelimiter() override;
-
 };
 
 }
diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
index 6350db3b211..9ba82fbb009 100644
--- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
@@ -1,4 +1,5 @@
 #include <IO/ReadHelpers.h>
+#include <IO/ReadBufferFromString.h>
 
 #include <Processors/Formats/Impl/JSONEachRowRowInputFormat.h>
 #include <Formats/JSONEachRowUtils.h>
@@ -29,8 +30,12 @@ enum
 
 
 JSONEachRowRowInputFormat::JSONEachRowRowInputFormat(
-    ReadBuffer & in_, const Block & header_, Params params_, const FormatSettings & format_settings_)
-    : IRowInputFormat(header_, in_, std::move(params_)), format_settings(format_settings_), name_map(header_.columns())
+    ReadBuffer & in_,
+    const Block & header_,
+    Params params_,
+    const FormatSettings & format_settings_,
+    bool yield_strings_)
+    : IRowInputFormat(header_, in_, std::move(params_)), format_settings(format_settings_), name_map(header_.columns()), yield_strings(yield_strings_)
 {
     /// In this format, BOM at beginning of stream cannot be confused with value, so it is safe to skip it.
     skipBOMIfExists(in);
@@ -138,10 +143,25 @@ void JSONEachRowRowInputFormat::readField(size_t index, MutableColumns & columns
     {
         seen_columns[index] = read_columns[index] = true;
         const auto & type = getPort().getHeader().getByPosition(index).type;
-        if (format_settings.null_as_default && !type->isNullable())
-            read_columns[index] = DataTypeNullable::deserializeTextJSON(*columns[index], in, format_settings, type);
+
+        if (yield_strings)
+        {
+            // notice: null_as_default on "null" strings is not supported
+
+            String str;
+            readJSONString(str, in);
+
+            ReadBufferFromString buf(str);
+
+            type->deserializeAsWholeText(*columns[index], buf, format_settings);
+        }
         else
-            type->deserializeAsTextJSON(*columns[index], in, format_settings);
+        {
+            if (format_settings.null_as_default && !type->isNullable())
+                read_columns[index] = DataTypeNullable::deserializeTextJSON(*columns[index], in, format_settings, type);
+            else
+                type->deserializeAsTextJSON(*columns[index], in, format_settings);
+        }
     }
     catch (Exception & e)
     {
@@ -318,13 +338,23 @@ void registerInputFormatProcessorJSONEachRow(FormatFactory & factory)
         IRowInputFormat::Params params,
         const FormatSettings & settings)
     {
-        return std::make_shared<JSONEachRowRowInputFormat>(buf, sample, std::move(params), settings);
+        return std::make_shared<JSONEachRowRowInputFormat>(buf, sample, std::move(params), settings, false);
+    });
+
+    factory.registerInputFormatProcessor("JSONStringsEachRow", [](
+        ReadBuffer & buf,
+        const Block & sample,
+        IRowInputFormat::Params params,
+        const FormatSettings & settings)
+    {
+        return std::make_shared<JSONEachRowRowInputFormat>(buf, sample, std::move(params), settings, true);
     });
 }
 
 void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory)
 {
     factory.registerFileSegmentationEngine("JSONEachRow", &fileSegmentationEngineJSONEachRowImpl);
+    factory.registerFileSegmentationEngine("JSONStringsEachRow", &fileSegmentationEngineJSONEachRowImpl);
 }
 
 }
diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h
index a0a4b735a3e..29a6ce6ecb8 100644
--- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h
+++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h
@@ -20,7 +20,12 @@ class ReadBuffer;
 class JSONEachRowRowInputFormat : public IRowInputFormat
 {
 public:
-    JSONEachRowRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSettings & format_settings_);
+    JSONEachRowRowInputFormat(
+        ReadBuffer & in_,
+        const Block & header_,
+        Params params_,
+        const FormatSettings & format_settings_,
+        bool yield_strings_);
 
     String getName() const override { return "JSONEachRowRowInputFormat"; }
 
@@ -75,6 +80,8 @@ private:
     bool data_in_square_brackets = false;
 
     bool allow_new_rows = true;
+
+    bool yield_strings;
 };
 
 }
diff --git a/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.cpp
index 910a9710de3..069499d99c1 100644
--- a/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.cpp
@@ -8,8 +8,13 @@ namespace DB
 {
 
 
-JSONEachRowRowOutputFormat::JSONEachRowRowOutputFormat(WriteBuffer & out_, const Block & header_, FormatFactory::WriteCallback callback, const FormatSettings & settings_)
-    : IRowOutputFormat(header_, out_, callback), settings(settings_)
+JSONEachRowRowOutputFormat::JSONEachRowRowOutputFormat(
+    WriteBuffer & out_,
+    const Block & header_,
+    FormatFactory::WriteCallback callback,
+    const FormatSettings & settings_,
+    bool yield_strings_)
+    : IRowOutputFormat(header_, out_, callback), settings(settings_), yield_strings(yield_strings_)
 {
     const auto & sample = getPort(PortKind::Main).getHeader();
     size_t columns = sample.columns();
@@ -27,7 +32,17 @@ void JSONEachRowRowOutputFormat::writeField(const IColumn & column, const IDataT
 {
     writeString(fields[field_number], out);
     writeChar(':', out);
-    type.serializeAsTextJSON(column, row_num, out, settings);
+
+    if (yield_strings)
+    {
+        WriteBufferFromOwnString buf;
+
+        type.serializeAsText(column, row_num, buf, settings);
+        writeJSONString(buf.str(), out, settings);
+    }
+    else
+        type.serializeAsTextJSON(column, row_num, out, settings);
+
     ++field_number;
 }
 
@@ -59,7 +74,16 @@ void registerOutputFormatProcessorJSONEachRow(FormatFactory & factory)
         FormatFactory::WriteCallback callback,
         const FormatSettings & format_settings)
     {
-        return std::make_shared<JSONEachRowRowOutputFormat>(buf, sample, callback, format_settings);
+        return std::make_shared<JSONEachRowRowOutputFormat>(buf, sample, callback, format_settings, false);
+    });
+
+    factory.registerOutputFormatProcessor("JSONStringsEachRow", [](
+        WriteBuffer & buf,
+        const Block & sample,
+        FormatFactory::WriteCallback callback,
+        const FormatSettings & format_settings)
+    {
+        return std::make_shared<JSONEachRowRowOutputFormat>(buf, sample, callback, format_settings, true);
     });
 }
 
diff --git a/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.h b/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.h
index d2b6937cd01..5346a1ab19f 100644
--- a/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.h
+++ b/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.h
@@ -15,7 +15,12 @@ namespace DB
 class JSONEachRowRowOutputFormat : public IRowOutputFormat
 {
 public:
-    JSONEachRowRowOutputFormat(WriteBuffer & out_, const Block & header_, FormatFactory::WriteCallback callback, const FormatSettings & settings_);
+    JSONEachRowRowOutputFormat(
+        WriteBuffer & out_,
+        const Block & header_,
+        FormatFactory::WriteCallback callback,
+        const FormatSettings & settings_,
+        bool yield_strings_);
 
     String getName() const override { return "JSONEachRowRowOutputFormat"; }
 
@@ -35,6 +40,9 @@ private:
     Names fields;
 
     FormatSettings settings;
+
+protected:
+    bool yield_strings;
 };
 
 }
diff --git a/src/Processors/Formats/Impl/JSONEachRowWithProgressRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowWithProgressRowOutputFormat.cpp
index a611b5a129b..35720df9672 100644
--- a/src/Processors/Formats/Impl/JSONEachRowWithProgressRowOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONEachRowWithProgressRowOutputFormat.cpp
@@ -36,7 +36,16 @@ void registerOutputFormatProcessorJSONEachRowWithProgress(FormatFactory & factor
             FormatFactory::WriteCallback callback,
             const FormatSettings & format_settings)
     {
-        return std::make_shared<JSONEachRowWithProgressRowOutputFormat>(buf, sample, callback, format_settings);
+        return std::make_shared<JSONEachRowWithProgressRowOutputFormat>(buf, sample, callback, format_settings, false);
+    });
+
+    factory.registerOutputFormatProcessor("JSONStringsEachRowWithProgress", [](
+            WriteBuffer & buf,
+            const Block & sample,
+            FormatFactory::WriteCallback callback,
+            const FormatSettings & format_settings)
+    {
+        return std::make_shared<JSONEachRowWithProgressRowOutputFormat>(buf, sample, callback, format_settings, true);
     });
 }
 
diff --git a/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp
index b3255f2894e..7dd7eb9953a 100644
--- a/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp
@@ -7,8 +7,13 @@
 namespace DB
 {
 
-JSONRowOutputFormat::JSONRowOutputFormat(WriteBuffer & out_, const Block & header, FormatFactory::WriteCallback callback, const FormatSettings & settings_)
-    : IRowOutputFormat(header, out_, callback), settings(settings_)
+JSONRowOutputFormat::JSONRowOutputFormat(
+    WriteBuffer & out_,
+    const Block & header,
+    FormatFactory::WriteCallback callback,
+    const FormatSettings & settings_,
+    bool yield_strings_)
+    : IRowOutputFormat(header, out_, callback), settings(settings_), yield_strings(yield_strings_)
 {
     const auto & sample = getPort(PortKind::Main).getHeader();
     NamesAndTypesList columns(sample.getNamesAndTypesList());
@@ -71,7 +76,17 @@ void JSONRowOutputFormat::writeField(const IColumn & column, const IDataType & t
     writeCString("\t\t\t", *ostr);
     writeString(fields[field_number].name, *ostr);
     writeCString(": ", *ostr);
-    type.serializeAsTextJSON(column, row_num, *ostr, settings);
+
+    if (yield_strings)
+    {
+        WriteBufferFromOwnString buf;
+
+        type.serializeAsText(column, row_num, buf, settings);
+        writeJSONString(buf.str(), *ostr, settings);
+    }
+    else
+        type.serializeAsTextJSON(column, row_num, *ostr, settings);
+
     ++field_number;
 }
 
@@ -80,7 +95,17 @@ void JSONRowOutputFormat::writeTotalsField(const IColumn & column, const IDataTy
     writeCString("\t\t", *ostr);
     writeString(fields[field_number].name, *ostr);
     writeCString(": ", *ostr);
-    type.serializeAsTextJSON(column, row_num, *ostr, settings);
+
+    if (yield_strings)
+    {
+        WriteBufferFromOwnString buf;
+
+        type.serializeAsText(column, row_num, buf, settings);
+        writeJSONString(buf.str(), *ostr, settings);
+    }
+    else
+        type.serializeAsTextJSON(column, row_num, *ostr, settings);
+
     ++field_number;
 }
 
@@ -249,7 +274,16 @@ void registerOutputFormatProcessorJSON(FormatFactory & factory)
         FormatFactory::WriteCallback callback,
         const FormatSettings & format_settings)
     {
-        return std::make_shared<JSONRowOutputFormat>(buf, sample, callback, format_settings);
+        return std::make_shared<JSONRowOutputFormat>(buf, sample, callback, format_settings, false);
+    });
+
+    factory.registerOutputFormatProcessor("JSONStrings", [](
+        WriteBuffer & buf,
+        const Block & sample,
+        FormatFactory::WriteCallback callback,
+        const FormatSettings & format_settings)
+    {
+        return std::make_shared<JSONRowOutputFormat>(buf, sample, callback, format_settings, true);
     });
 }
 
diff --git a/src/Processors/Formats/Impl/JSONRowOutputFormat.h b/src/Processors/Formats/Impl/JSONRowOutputFormat.h
index f9aea3a3e8b..4e9cceb717e 100644
--- a/src/Processors/Formats/Impl/JSONRowOutputFormat.h
+++ b/src/Processors/Formats/Impl/JSONRowOutputFormat.h
@@ -16,7 +16,12 @@ namespace DB
 class JSONRowOutputFormat : public IRowOutputFormat
 {
 public:
-    JSONRowOutputFormat(WriteBuffer & out_, const Block & header, FormatFactory::WriteCallback callback, const FormatSettings & settings_);
+    JSONRowOutputFormat(
+        WriteBuffer & out_,
+        const Block & header,
+        FormatFactory::WriteCallback callback,
+        const FormatSettings & settings_,
+        bool yield_strings_);
 
     String getName() const override { return "JSONRowOutputFormat"; }
 
@@ -78,6 +83,8 @@ protected:
     Progress progress;
     Stopwatch watch;
     FormatSettings settings;
+
+    bool yield_strings;
 };
 
 }
diff --git a/src/Processors/Formats/Impl/JSONStringsEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONStringsEachRowRowInputFormat.cpp
deleted file mode 100644
index fff44a204fb..00000000000
--- a/src/Processors/Formats/Impl/JSONStringsEachRowRowInputFormat.cpp
+++ /dev/null
@@ -1,245 +0,0 @@
-#include <IO/ReadHelpers.h>
-#include <IO/ReadBufferFromString.h>
-
-#include <Processors/Formats/Impl/JSONStringsEachRowRowInputFormat.h>
-#include <Formats/FormatFactory.h>
-#include <DataTypes/NestedUtils.h>
-#include <DataTypes/DataTypeNullable.h>
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int INCORRECT_DATA;
-    extern const int CANNOT_READ_ALL_DATA;
-}
-
-
-JSONStringsEachRowRowInputFormat::JSONStringsEachRowRowInputFormat(ReadBuffer & in_,
-        const Block & header_,
-        Params params_,
-        const FormatSettings & format_settings_,
-        bool with_names_)
-        : IRowInputFormat(header_, in_, std::move(params_)), format_settings(format_settings_), with_names(with_names_)
-{
-    const auto & sample = getPort().getHeader();
-    size_t num_columns = sample.columns();
-
-    data_types.resize(num_columns);
-    column_indexes_by_names.reserve(num_columns);
-
-    for (size_t i = 0; i < num_columns; ++i)
-    {
-        const auto & column_info = sample.getByPosition(i);
-
-        data_types[i] = column_info.type;
-        column_indexes_by_names.emplace(column_info.name, i);
-    }
-}
-
-void JSONStringsEachRowRowInputFormat::resetParser()
-{
-    IRowInputFormat::resetParser();
-    column_indexes_for_input_fields.clear();
-    not_seen_columns.clear();
-}
-
-void JSONStringsEachRowRowInputFormat::readPrefix()
-{
-    /// In this format, BOM at beginning of stream cannot be confused with value, so it is safe to skip it.
-    skipBOMIfExists(in);
-
-    if (with_names)
-    {
-        size_t num_columns = getPort().getHeader().columns();
-        read_columns.assign(num_columns, false);
-
-        assertChar('[', in);
-        do
-        {
-            skipWhitespaceIfAny(in);
-            String column_name;
-            readJSONString(column_name, in);
-            addInputColumn(column_name);
-            skipWhitespaceIfAny(in);
-        }
-        while (checkChar(',', in));
-        assertChar(']', in);
-        skipEndOfLine();
-
-        /// Type checking
-        assertChar('[', in);
-        for (size_t i = 0; i < column_indexes_for_input_fields.size(); ++i)
-        {
-            skipWhitespaceIfAny(in);
-            String data_type;
-            readJSONString(data_type, in);
-
-            if (column_indexes_for_input_fields[i] &&
-                data_types[*column_indexes_for_input_fields[i]]->getName() != data_type)
-            {
-                throw Exception(
-                        "Type of '" + getPort().getHeader().getByPosition(*column_indexes_for_input_fields[i]).name
-                        + "' must be " + data_types[*column_indexes_for_input_fields[i]]->getName() +
-                        ", not " + data_type,
-                        ErrorCodes::INCORRECT_DATA
-                );
-            }
-
-            if (i != column_indexes_for_input_fields.size() - 1)
-                assertChar(',', in);
-            skipWhitespaceIfAny(in);
-        }
-        assertChar(']', in);
-    }
-    else
-    {
-        size_t num_columns = getPort().getHeader().columns();
-        read_columns.assign(num_columns, true);
-        column_indexes_for_input_fields.resize(num_columns);
-
-        for (size_t i = 0; i < num_columns; ++i)
-        {
-            column_indexes_for_input_fields[i] = i;
-        }
-    }
-
-    for (size_t i = 0; i < read_columns.size(); ++i)
-    {
-        if (!read_columns[i])
-        {
-            not_seen_columns.emplace_back(i);
-        }
-    }
-}
-
-void JSONStringsEachRowRowInputFormat::addInputColumn(const String & column_name)
-{
-    names_of_columns.emplace_back(column_name);
-
-    const auto column_it = column_indexes_by_names.find(column_name);
-    if (column_it == column_indexes_by_names.end())
-    {
-        if (format_settings.skip_unknown_fields)
-        {
-            column_indexes_for_input_fields.push_back(std::nullopt);
-            return;
-        }
-
-        throw Exception(
-                "Unknown field found in JSONStringsEachRow header: '" + column_name + "' " +
-                "at position " + std::to_string(column_indexes_for_input_fields.size()) +
-                "\nSet the 'input_format_skip_unknown_fields' parameter explicitly to ignore and proceed",
-                ErrorCodes::INCORRECT_DATA
-        );
-    }
-
-    const auto column_index = column_it->second;
-
-    if (read_columns[column_index])
-        throw Exception("Duplicate field found while parsing JSONStringsEachRow header: " + column_name, ErrorCodes::INCORRECT_DATA);
-
-    read_columns[column_index] = true;
-    column_indexes_for_input_fields.emplace_back(column_index);
-}
-
-bool JSONStringsEachRowRowInputFormat::readRow(DB::MutableColumns &columns, DB::RowReadExtension &ext)
-{
-    skipEndOfLine();
-
-    if (in.eof())
-        return false;
-
-    size_t num_columns = columns.size();
-
-    read_columns.assign(num_columns, false);
-
-    assertChar('[', in);
-    for (size_t file_column = 0; file_column < column_indexes_for_input_fields.size(); ++file_column)
-    {
-        const auto & table_column = column_indexes_for_input_fields[file_column];
-        if (table_column)
-        {
-            readField(*table_column, columns);
-        }
-        else
-        {
-            skipJSONField(in, StringRef(names_of_columns[file_column]));
-        }
-
-        skipWhitespaceIfAny(in);
-        if (in.eof())
-            throw Exception("Unexpected end of stream while parsing JSONStringsEachRow format", ErrorCodes::CANNOT_READ_ALL_DATA);
-        if (file_column + 1 != column_indexes_for_input_fields.size())
-        {
-            assertChar(',', in);
-            skipWhitespaceIfAny(in);
-        }
-    }
-    assertChar(']', in);
-
-    for (const auto & name : not_seen_columns)
-        columns[name]->insertDefault();
-
-    ext.read_columns = read_columns;
-    return true;
-}
-
-void JSONStringsEachRowRowInputFormat::skipEndOfLine()
-{
-    skipWhitespaceIfAny(in);
-    if (!in.eof() && (*in.position() == ',' || *in.position() == ';'))
-        ++in.position();
-
-    skipWhitespaceIfAny(in);
-}
-
-void JSONStringsEachRowRowInputFormat::readField(size_t index, MutableColumns & columns)
-{
-    try
-    {
-        read_columns[index] = true;
-        const auto & type = data_types[index];
-
-        String str;
-        readJSONString(str, in);
-
-        ReadBufferFromString buf(str);
-
-        type->deserializeAsWholeText(*columns[index], buf, format_settings);
-    }
-    catch (Exception & e)
-    {
-        e.addMessage("(while read the value of key " +  getPort().getHeader().getByPosition(index).name + ")");
-        throw;
-    }
-}
-
-void JSONStringsEachRowRowInputFormat::syncAfterError()
-{
-    skipToUnescapedNextLineOrEOF(in);
-}
-
-void registerInputFormatProcessorJSONStringsEachRow(FormatFactory & factory)
-{
-    factory.registerInputFormatProcessor("JSONStringsEachRow", [](
-            ReadBuffer & buf,
-            const Block & sample,
-            IRowInputFormat::Params params,
-            const FormatSettings & settings)
-    {
-        return std::make_shared<JSONStringsEachRowRowInputFormat>(buf, sample, std::move(params), settings, false);
-    });
-
-    factory.registerInputFormatProcessor("JSONStringsEachRowWithNamesAndTypes", [](
-            ReadBuffer & buf,
-            const Block & sample,
-            IRowInputFormat::Params params,
-            const FormatSettings & settings)
-    {
-        return std::make_shared<JSONStringsEachRowRowInputFormat>(buf, sample, std::move(params), settings, true);
-    });
-}
-
-}
diff --git a/src/Processors/Formats/Impl/JSONStringsEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONStringsEachRowRowInputFormat.h
deleted file mode 100644
index ec0a0f7bad9..00000000000
--- a/src/Processors/Formats/Impl/JSONStringsEachRowRowInputFormat.h
+++ /dev/null
@@ -1,54 +0,0 @@
-#pragma once
-
-#pragma once
-
-#include <Core/Block.h>
-#include <Processors/Formats/IRowInputFormat.h>
-#include <Formats/FormatSettings.h>
-#include <Common/HashTable/HashMap.h>
-
-namespace DB
-{
-
-class ReadBuffer;
-
-/** A stream for reading data in JSONStringsEachRow and JSONStringsEachRowWithNamesAndTypes formats
-*/
-class JSONStringsEachRowRowInputFormat : public IRowInputFormat
-{
-public:
-    JSONStringsEachRowRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSettings & format_settings_, bool with_names_);
-
-    String getName() const override { return "JSONStringsEachRowRowInputFormat"; }
-
-
-    void readPrefix() override;
-    bool readRow(MutableColumns & columns, RowReadExtension & ext) override;
-    bool allowSyncAfterError() const override { return true; }
-    void syncAfterError() override;
-    void resetParser() override;
-
-private:
-    void addInputColumn(const String & column_name);
-    void skipEndOfLine();
-    void readField(size_t index, MutableColumns & columns);
-
-    const FormatSettings format_settings;
-
-    using IndexesMap = std::unordered_map<String, size_t>;
-    IndexesMap column_indexes_by_names;
-
-    using OptionalIndexes = std::vector<std::optional<size_t>>;
-    OptionalIndexes column_indexes_for_input_fields;
-
-    DataTypes data_types;
-    std::vector<UInt8> read_columns;
-    std::vector<size_t> not_seen_columns;
-
-    /// This is for the correct exceptions in skipping unknown fields.
-    std::vector<String> names_of_columns;
-
-    bool with_names;
-};
-
-}
diff --git a/src/Processors/Formats/Impl/JSONStringsEachRowRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONStringsEachRowRowOutputFormat.cpp
deleted file mode 100644
index 75007ea236e..00000000000
--- a/src/Processors/Formats/Impl/JSONStringsEachRowRowOutputFormat.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-#include <IO/WriteHelpers.h>
-#include <IO/WriteBufferValidUTF8.h>
-#include <Processors/Formats/Impl/JSONStringsEachRowRowOutputFormat.h>
-#include <Formats/FormatFactory.h>
-
-
-namespace DB
-{
-
-
-JSONStringsEachRowRowOutputFormat::JSONStringsEachRowRowOutputFormat(WriteBuffer & out_,
-        const Block & header_,
-        FormatFactory::WriteCallback callback,
-        const FormatSettings & settings_,
-        bool with_names_)
-        : IRowOutputFormat(header_, out_, callback), settings(settings_), with_names(with_names_)
-{
-            const auto & sample = getPort(PortKind::Main).getHeader();
-            NamesAndTypesList columns(sample.getNamesAndTypesList());
-            fields.assign(columns.begin(), columns.end());
-}
-
-
-void JSONStringsEachRowRowOutputFormat::writeField(const IColumn & column, const IDataType & type, size_t row_num)
-{
-    WriteBufferFromOwnString buf;
-
-    type.serializeAsText(column, row_num, buf, settings);
-    writeJSONString(buf.str(), out, settings);
-}
-
-
-void JSONStringsEachRowRowOutputFormat::writeFieldDelimiter()
-{
-    writeCString(", ", out);
-}
-
-
-void JSONStringsEachRowRowOutputFormat::writeRowStartDelimiter()
-{
-    writeChar('[', out);
-}
-
-
-void JSONStringsEachRowRowOutputFormat::writeRowEndDelimiter()
-{
-    writeCString("]\n", out);
-}
-
-void JSONStringsEachRowRowOutputFormat::writeTotals(const Columns & columns, size_t row_num)
-{
-    writeChar('\n', out);
-    size_t num_columns = columns.size();
-    writeChar('[', out);
-    for (size_t i = 0; i < num_columns; ++i)
-    {
-        if (i != 0)
-            JSONStringsEachRowRowOutputFormat::writeFieldDelimiter();
-
-        JSONStringsEachRowRowOutputFormat::writeField(*columns[i], *types[i], row_num);
-    }
-    writeCString("]\n", out);
-}
-
-void JSONStringsEachRowRowOutputFormat::writePrefix()
-{
-    if (with_names)
-    {
-        writeChar('[', out);
-        for (size_t i = 0; i < fields.size(); ++i)
-        {
-            writeChar('\"', out);
-            writeString(fields[i].name, out);
-            writeChar('\"', out);
-            if (i != fields.size() - 1)
-                writeCString(", ", out);
-        }
-        writeCString("]\n[", out);
-        for (size_t i = 0; i < fields.size(); ++i)
-        {
-            writeJSONString(fields[i].type->getName(), out, settings);
-            if (i != fields.size() - 1)
-                writeCString(", ", out);
-        }
-        writeCString("]\n", out);
-    }
-}
-
-void JSONStringsEachRowRowOutputFormat::consumeTotals(DB::Chunk chunk)
-{
-    if (with_names)
-        IRowOutputFormat::consumeTotals(std::move(chunk));
-}
-
-void registerOutputFormatProcessorJSONStringsEachRow(FormatFactory & factory)
-{
-    factory.registerOutputFormatProcessor("JSONStringsEachRow", [](
-            WriteBuffer & buf,
-            const Block & sample,
-            FormatFactory::WriteCallback callback,
-            const FormatSettings & format_settings)
-    {
-        return std::make_shared<JSONStringsEachRowRowOutputFormat>(buf, sample, callback, format_settings, false);
-    });
-
-    factory.registerOutputFormatProcessor("JSONStringsEachRowWithNamesAndTypes", [](
-            WriteBuffer &buf,
-            const Block &sample,
-            FormatFactory::WriteCallback callback,
-            const FormatSettings &format_settings)
-    {
-        return std::make_shared<JSONStringsEachRowRowOutputFormat>(buf, sample, callback, format_settings, true);
-    });
-}
-
-
-}
diff --git a/src/Processors/Formats/Impl/JSONStringsEachRowRowOutputFormat.h b/src/Processors/Formats/Impl/JSONStringsEachRowRowOutputFormat.h
deleted file mode 100644
index 1d43a333da1..00000000000
--- a/src/Processors/Formats/Impl/JSONStringsEachRowRowOutputFormat.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#pragma once
-
-#include <Core/Block.h>
-#include <IO/WriteBuffer.h>
-#include <Processors/Formats/IRowOutputFormat.h>
-#include <Formats/FormatSettings.h>
-
-
-namespace DB
-{
-
-/** The stream for outputting data in JSON format, by object per line.
-  * Does not validate UTF-8.
-  */
-class JSONStringsEachRowRowOutputFormat : public IRowOutputFormat
-{
-public:
-    JSONStringsEachRowRowOutputFormat(WriteBuffer & out_, const Block & header_, FormatFactory::WriteCallback callback, const FormatSettings & settings_, bool with_names);
-
-    String getName() const override { return "JSONStringsEachRowRowOutputFormat"; }
-
-    void writePrefix() override;
-
-    void writeBeforeTotals() override {}
-    void writeTotals(const Columns & columns, size_t row_num) override;
-    void writeAfterTotals() override {}
-
-    void writeField(const IColumn & column, const IDataType & type, size_t row_num) override;
-    void writeFieldDelimiter() override;
-    void writeRowStartDelimiter() override;
-    void writeRowEndDelimiter() override;
-
-protected:
-    void consumeTotals(Chunk) override;
-    /// No extremes.
-    void consumeExtremes(Chunk) override {}
-
-private:
-    FormatSettings settings;
-
-    NamesAndTypes fields;
-
-    bool with_names;
-};
-}
diff --git a/src/Processors/Formats/Impl/JSONStringsRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONStringsRowOutputFormat.cpp
deleted file mode 100644
index 6ccb315f73f..00000000000
--- a/src/Processors/Formats/Impl/JSONStringsRowOutputFormat.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-#include <Processors/Formats/Impl/JSONStringsRowOutputFormat.h>
-#include <Formats/FormatFactory.h>
-
-#include <IO/WriteHelpers.h>
-
-
-namespace DB
-{
-
-JSONStringsRowOutputFormat::JSONStringsRowOutputFormat(
-    WriteBuffer & out_, const Block & header, FormatFactory::WriteCallback callback, const FormatSettings & settings_)
-    : JSONRowOutputFormat(out_, header, callback, settings_)
-{
-}
-
-
-void JSONStringsRowOutputFormat::writeField(const IColumn & column, const IDataType & type, size_t row_num)
-{
-    WriteBufferFromOwnString buf;
-
-    type.serializeAsText(column, row_num, buf, settings);
-    writeJSONString(buf.str(), *ostr, settings);
-    ++field_number;
-}
-
-
-void JSONStringsRowOutputFormat::writeFieldDelimiter()
-{
-    writeCString(", ", *ostr);
-}
-
-void JSONStringsRowOutputFormat::writeTotalsFieldDelimiter()
-{
-    writeCString(",", *ostr);
-}
-
-
-void JSONStringsRowOutputFormat::writeRowStartDelimiter()
-{
-    writeCString("\t\t[", *ostr);
-}
-
-
-void JSONStringsRowOutputFormat::writeRowEndDelimiter()
-{
-    writeChar(']', *ostr);
-    field_number = 0;
-    ++row_count;
-}
-
-void JSONStringsRowOutputFormat::writeBeforeTotals()
-{
-    writeCString(",\n", *ostr);
-    writeChar('\n', *ostr);
-    writeCString("\t\"totals\": [", *ostr);
-}
-
-void JSONStringsRowOutputFormat::writeAfterTotals()
-{
-    writeChar(']', *ostr);
-}
-
-void JSONStringsRowOutputFormat::writeExtremesElement(const char * title, const Columns & columns, size_t row_num)
-{
-    writeCString("\t\t\"", *ostr);
-    writeCString(title, *ostr);
-    writeCString("\": [", *ostr);
-
-    size_t extremes_columns = columns.size();
-    for (size_t i = 0; i < extremes_columns; ++i)
-    {
-        if (i != 0)
-            writeTotalsFieldDelimiter();
-
-        writeField(*columns[i], *types[i], row_num);
-    }
-
-    writeChar(']', *ostr);
-}
-
-void registerOutputFormatProcessorJSONStrings(FormatFactory & factory)
-{
-    factory.registerOutputFormatProcessor("JSONStrings", [](
-        WriteBuffer & buf,
-        const Block & sample,
-        FormatFactory::WriteCallback callback,
-        const FormatSettings & format_settings)
-    {
-        return std::make_shared<JSONStringsRowOutputFormat>(buf, sample, callback, format_settings);
-    });
-}
-
-}
diff --git a/src/Processors/Formats/Impl/JSONStringsRowOutputFormat.h b/src/Processors/Formats/Impl/JSONStringsRowOutputFormat.h
deleted file mode 100644
index b221bc9ee36..00000000000
--- a/src/Processors/Formats/Impl/JSONStringsRowOutputFormat.h
+++ /dev/null
@@ -1,43 +0,0 @@
-#pragma once
-
-#include <Core/Block.h>
-#include <IO/WriteBuffer.h>
-#include <IO/WriteBufferValidUTF8.h>
-#include <Processors/Formats/Impl/JSONRowOutputFormat.h>
-
-
-namespace DB
-{
-
-struct FormatSettings;
-
-/** The stream for outputting data in the JSONStrings format.
-  */
-class JSONStringsRowOutputFormat : public JSONRowOutputFormat
-{
-public:
-    JSONStringsRowOutputFormat(WriteBuffer & out_, const Block & header, FormatFactory::WriteCallback callback, const FormatSettings & settings_);
-
-    String getName() const override { return "JSONStringsRowOutputFormat"; }
-
-    void writeField(const IColumn & column, const IDataType & type, size_t row_num) override;
-    void writeFieldDelimiter() override;
-    void writeRowStartDelimiter() override;
-    void writeRowEndDelimiter() override;
-
-    void writeBeforeTotals() override;
-    void writeAfterTotals() override;
-
-protected:
-    void writeExtremesElement(const char * title, const Columns & columns, size_t row_num) override;
-
-    void writeTotalsField(const IColumn & column, const IDataType & type, size_t row_num) override
-    {
-        return writeField(column, type, row_num);
-    }
-
-    void writeTotalsFieldDelimiter() override;
-
-};
-
-}
diff --git a/tests/queries/0_stateless/01446_JSONStringsEachRow.sql b/tests/queries/0_stateless/01446_JSONStringsEachRow.sql
deleted file mode 100644
index f461b217fe4..00000000000
--- a/tests/queries/0_stateless/01446_JSONStringsEachRow.sql
+++ /dev/null
@@ -1,63 +0,0 @@
-DROP TABLE IF EXISTS test_table;
-DROP TABLE IF EXISTS test_table_2;
-SELECT 1;
-/* Check JSONStringsEachRow Output */
-CREATE TABLE test_table (value UInt8, name String) ENGINE = MergeTree() ORDER BY value;
-INSERT INTO test_table VALUES (1, 'a'), (2, 'b'), (3, 'c');
-SELECT * FROM test_table FORMAT JSONStringsEachRow;
-SELECT 2;
-/* Check Totals */
-SELECT name, count() AS c FROM test_table GROUP BY name WITH TOTALS ORDER BY name FORMAT JSONStringsEachRow;
-SELECT 3;
-/* Check JSONStringsEachRowWithNamesAndTypes Output */
-SELECT * FROM test_table FORMAT JSONStringsEachRowWithNamesAndTypes;
-SELECT 4;
-/* Check Totals */
-SELECT name, count() AS c FROM test_table GROUP BY name WITH TOTALS ORDER BY name FORMAT JSONStringsEachRowWithNamesAndTypes;
-DROP TABLE IF EXISTS test_table;
-SELECT 5;
-/* Check JSONStringsEachRow Input */
-CREATE TABLE test_table (v1 String, v2 UInt8, v3 DEFAULT v2 * 16, v4 UInt8 DEFAULT 8) ENGINE = MergeTree() ORDER BY v2;
-INSERT INTO test_table FORMAT JSONStringsEachRow ["first", "1", "2", "NULL"] ["second", "2", "null", "6"];
-SELECT * FROM test_table FORMAT JSONStringsEachRow;
-TRUNCATE TABLE test_table;
-SELECT 6;
-/* Check input_format_null_as_default = 1 */
-SET input_format_null_as_default = 1;
-INSERT INTO test_table FORMAT JSONStringsEachRow ["first", "1", "2", "ᴺᵁᴸᴸ"] ["second", "2", "null", "6"];
-SELECT * FROM test_table FORMAT JSONStringsEachRow;
-TRUNCATE TABLE test_table;
-SELECT 7;
-/* Check Nested */
-CREATE TABLE test_table_2 (v1 UInt8, n Nested(id UInt8, name String)) ENGINE = MergeTree() ORDER BY v1;
-INSERT INTO test_table_2 FORMAT JSONStringsEachRow ["16", "[15, 16, 17]", "['first', 'second', 'third']"];
-SELECT * FROM test_table_2 FORMAT JSONStringsEachRow;
-TRUNCATE TABLE test_table_2;
-SELECT 8;
-/* Check JSONStringsEachRowWithNamesAndTypes Output */
-SET input_format_null_as_default = 0;
-INSERT INTO test_table FORMAT JSONStringsEachRowWithNamesAndTypes ["v1", "v2", "v3", "v4"]["String","UInt8","UInt16","UInt8"]["first", "1", "2", "null"]["second", "2", "null", "6"];
-SELECT * FROM test_table FORMAT JSONStringsEachRow;
-TRUNCATE TABLE test_table;
-SELECT 9;
-/* Check input_format_null_as_default = 1 */
-SET input_format_null_as_default = 1;
-INSERT INTO test_table FORMAT JSONStringsEachRowWithNamesAndTypes ["v1", "v2", "v3", "v4"]["String","UInt8","UInt16","UInt8"]["first", "1", "2", "null"] ["second", "2", "null", "6"];
-SELECT * FROM test_table FORMAT JSONStringsEachRow;
-SELECT 10;
-/* Check Header */
-TRUNCATE TABLE test_table;
-SET input_format_skip_unknown_fields = 1;
-INSERT INTO test_table FORMAT JSONStringsEachRowWithNamesAndTypes ["v1", "v2", "invalid_column"]["String", "UInt8", "UInt8"]["first", "1", "32"]["second", "2", "64"];
-SELECT * FROM test_table FORMAT JSONStringsEachRow;
-SELECT 11;
-TRUNCATE TABLE test_table;
-INSERT INTO test_table FORMAT JSONStringsEachRowWithNamesAndTypes ["v4", "v2", "v3"]["UInt8", "UInt8", "UInt16"]["1", "2", "3"]
-SELECT * FROM test_table FORMAT JSONStringsEachRowWithNamesAndTypes;
-SELECT 12;
-/* Check Nested */
-INSERT INTO test_table_2 FORMAT JSONStringsEachRowWithNamesAndTypes ["v1", "n.id", "n.name"]["UInt8", "Array(UInt8)", "Array(String)"]["16", "[15, 16, 17]", "['first', 'second', 'third']"];
-SELECT * FROM test_table_2 FORMAT JSONStringsEachRowWithNamesAndTypes;
-
-DROP TABLE IF EXISTS test_table;
-DROP TABLE IF EXISTS test_table_2;
diff --git a/tests/queries/0_stateless/01446_json_strings_each_row.reference b/tests/queries/0_stateless/01446_json_strings_each_row.reference
new file mode 100644
index 00000000000..84d41095b77
--- /dev/null
+++ b/tests/queries/0_stateless/01446_json_strings_each_row.reference
@@ -0,0 +1,22 @@
+1
+{"value":"1","name":"a"}
+{"value":"2","name":"b"}
+{"value":"3","name":"c"}
+2
+{"name":"a","c":"1"}
+{"name":"b","c":"1"}
+{"name":"c","c":"1"}
+3
+{"row":{"a":"1"}}
+{"progress":{"read_rows":"1","read_bytes":"1","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}}
+4
+{"row":{"a":"1"}}
+{"progress":{"read_rows":"1","read_bytes":"1","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}}
+5
+{"v1":"first","v2":"1","v3":"2","v4":"0"}
+{"v1":"second","v2":"2","v3":"0","v4":"6"}
+6
+{"v1":"first","v2":"1","v3":"2","v4":"0"}
+{"v1":"second","v2":"2","v3":"0","v4":"6"}
+7
+{"v1":"16","n.id":"[15,16,17]","n.name":"['first','second','third']"}
diff --git a/tests/queries/0_stateless/01446_json_strings_each_row.sql b/tests/queries/0_stateless/01446_json_strings_each_row.sql
new file mode 100644
index 00000000000..98bd3e3ab47
--- /dev/null
+++ b/tests/queries/0_stateless/01446_json_strings_each_row.sql
@@ -0,0 +1,38 @@
+DROP TABLE IF EXISTS test_table;
+DROP TABLE IF EXISTS test_table_2;
+SELECT 1;
+/* Check JSONStringsEachRow Output */
+CREATE TABLE test_table (value UInt8, name String) ENGINE = MergeTree() ORDER BY value;
+INSERT INTO test_table VALUES (1, 'a'), (2, 'b'), (3, 'c');
+SELECT * FROM test_table FORMAT JSONStringsEachRow;
+SELECT 2;
+/* Check Totals */
+SELECT name, count() AS c FROM test_table GROUP BY name WITH TOTALS ORDER BY name FORMAT JSONStringsEachRow;
+SELECT 3;
+/* Check JSONStringsEachRowWithProgress Output */
+SELECT 1 as a FROM system.one FORMAT JSONStringsEachRowWithProgress;
+SELECT 4;
+/* Check Totals */
+SELECT 1 as a FROM system.one GROUP BY a WITH TOTALS ORDER BY a FORMAT JSONStringsEachRowWithProgress;
+DROP TABLE IF EXISTS test_table;
+SELECT 5;
+/* Check JSONStringsEachRow Input */
+CREATE TABLE test_table (v1 String, v2 UInt8, v3 DEFAULT v2 * 16, v4 UInt8 DEFAULT 8) ENGINE = MergeTree() ORDER BY v2;
+INSERT INTO test_table FORMAT JSONStringsEachRow {"v1": "first", "v2": "1", "v3": "2", "v4": "NULL"} {"v1": "second", "v2": "2", "v3": "null", "v4": "6"};
+SELECT * FROM test_table FORMAT JSONStringsEachRow;
+TRUNCATE TABLE test_table;
+SELECT 6;
+/* Check input_format_null_as_default = 1 */
+SET input_format_null_as_default = 1;
+INSERT INTO test_table FORMAT JSONStringsEachRow {"v1": "first", "v2": "1", "v3": "2", "v4": "ᴺᵁᴸᴸ"} {"v1": "second", "v2": "2", "v3": "null", "v4": "6"};
+SELECT * FROM test_table FORMAT JSONStringsEachRow;
+TRUNCATE TABLE test_table;
+SELECT 7;
+/* Check Nested */
+CREATE TABLE test_table_2 (v1 UInt8, n Nested(id UInt8, name String)) ENGINE = MergeTree() ORDER BY v1;
+INSERT INTO test_table_2 FORMAT JSONStringsEachRow {"v1": "16", "n.id": "[15, 16, 17]", "n.name": "['first', 'second', 'third']"};
+SELECT * FROM test_table_2 FORMAT JSONStringsEachRow;
+TRUNCATE TABLE test_table_2;
+
+DROP TABLE IF EXISTS test_table;
+DROP TABLE IF EXISTS test_table_2;
diff --git a/tests/queries/0_stateless/01447_json_strings.reference b/tests/queries/0_stateless/01447_json_strings.reference
new file mode 100644
index 00000000000..ab88e2f3696
--- /dev/null
+++ b/tests/queries/0_stateless/01447_json_strings.reference
@@ -0,0 +1,43 @@
+{
+	"meta":
+	[
+		{
+			"name": "1",
+			"type": "UInt8"
+		},
+		{
+			"name": "'a'",
+			"type": "String"
+		},
+		{
+			"name": "[1, 2, 3]",
+			"type": "Array(UInt8)"
+		},
+		{
+			"name": "tuple(1, 'a')",
+			"type": "Tuple(UInt8, String)"
+		},
+		{
+			"name": "NULL",
+			"type": "Nullable(Nothing)"
+		},
+		{
+			"name": "nan",
+			"type": "Float64"
+		}
+	],
+
+	"data":
+	[
+		{
+			"1": "1",
+			"'a'": "a",
+			"[1, 2, 3]": "[1,2,3]",
+			"tuple(1, 'a')": "(1,'a')",
+			"NULL": "ᴺᵁᴸᴸ",
+			"nan": "nan"
+		}
+	],
+
+	"rows": 1
+}
diff --git a/tests/queries/0_stateless/01447_JSONStrings.sql b/tests/queries/0_stateless/01447_json_strings.sql
similarity index 100%
rename from tests/queries/0_stateless/01447_JSONStrings.sql
rename to tests/queries/0_stateless/01447_json_strings.sql
diff --git a/tests/queries/0_stateless/01446_JSONStringsEachRow.reference b/tests/queries/0_stateless/01448_json_compact_strings_each_row.reference
similarity index 100%
rename from tests/queries/0_stateless/01446_JSONStringsEachRow.reference
rename to tests/queries/0_stateless/01448_json_compact_strings_each_row.reference
diff --git a/tests/queries/0_stateless/01448_json_compact_strings_each_row.sql b/tests/queries/0_stateless/01448_json_compact_strings_each_row.sql
new file mode 100644
index 00000000000..c271de88434
--- /dev/null
+++ b/tests/queries/0_stateless/01448_json_compact_strings_each_row.sql
@@ -0,0 +1,63 @@
+DROP TABLE IF EXISTS test_table;
+DROP TABLE IF EXISTS test_table_2;
+SELECT 1;
+/* Check JSONCompactStringsEachRow Output */
+CREATE TABLE test_table (value UInt8, name String) ENGINE = MergeTree() ORDER BY value;
+INSERT INTO test_table VALUES (1, 'a'), (2, 'b'), (3, 'c');
+SELECT * FROM test_table FORMAT JSONCompactStringsEachRow;
+SELECT 2;
+/* Check Totals */
+SELECT name, count() AS c FROM test_table GROUP BY name WITH TOTALS ORDER BY name FORMAT JSONCompactStringsEachRow;
+SELECT 3;
+/* Check JSONCompactStringsEachRowWithNamesAndTypes Output */
+SELECT * FROM test_table FORMAT JSONCompactStringsEachRowWithNamesAndTypes;
+SELECT 4;
+/* Check Totals */
+SELECT name, count() AS c FROM test_table GROUP BY name WITH TOTALS ORDER BY name FORMAT JSONCompactStringsEachRowWithNamesAndTypes;
+DROP TABLE IF EXISTS test_table;
+SELECT 5;
+/* Check JSONCompactStringsEachRow Input */
+CREATE TABLE test_table (v1 String, v2 UInt8, v3 DEFAULT v2 * 16, v4 UInt8 DEFAULT 8) ENGINE = MergeTree() ORDER BY v2;
+INSERT INTO test_table FORMAT JSONCompactStringsEachRow ["first", "1", "2", "NULL"] ["second", "2", "null", "6"];
+SELECT * FROM test_table FORMAT JSONCompactStringsEachRow;
+TRUNCATE TABLE test_table;
+SELECT 6;
+/* Check input_format_null_as_default = 1 */
+SET input_format_null_as_default = 1;
+INSERT INTO test_table FORMAT JSONCompactStringsEachRow ["first", "1", "2", "ᴺᵁᴸᴸ"] ["second", "2", "null", "6"];
+SELECT * FROM test_table FORMAT JSONCompactStringsEachRow;
+TRUNCATE TABLE test_table;
+SELECT 7;
+/* Check Nested */
+CREATE TABLE test_table_2 (v1 UInt8, n Nested(id UInt8, name String)) ENGINE = MergeTree() ORDER BY v1;
+INSERT INTO test_table_2 FORMAT JSONCompactStringsEachRow ["16", "[15, 16, 17]", "['first', 'second', 'third']"];
+SELECT * FROM test_table_2 FORMAT JSONCompactStringsEachRow;
+TRUNCATE TABLE test_table_2;
+SELECT 8;
+/* Check JSONCompactStringsEachRowWithNamesAndTypes Output */
+SET input_format_null_as_default = 0;
+INSERT INTO test_table FORMAT JSONCompactStringsEachRowWithNamesAndTypes ["v1", "v2", "v3", "v4"]["String","UInt8","UInt16","UInt8"]["first", "1", "2", "null"]["second", "2", "null", "6"];
+SELECT * FROM test_table FORMAT JSONCompactStringsEachRow;
+TRUNCATE TABLE test_table;
+SELECT 9;
+/* Check input_format_null_as_default = 1 */
+SET input_format_null_as_default = 1;
+INSERT INTO test_table FORMAT JSONCompactStringsEachRowWithNamesAndTypes ["v1", "v2", "v3", "v4"]["String","UInt8","UInt16","UInt8"]["first", "1", "2", "null"] ["second", "2", "null", "6"];
+SELECT * FROM test_table FORMAT JSONCompactStringsEachRow;
+SELECT 10;
+/* Check Header */
+TRUNCATE TABLE test_table;
+SET input_format_skip_unknown_fields = 1;
+INSERT INTO test_table FORMAT JSONCompactStringsEachRowWithNamesAndTypes ["v1", "v2", "invalid_column"]["String", "UInt8", "UInt8"]["first", "1", "32"]["second", "2", "64"];
+SELECT * FROM test_table FORMAT JSONCompactStringsEachRow;
+SELECT 11;
+TRUNCATE TABLE test_table;
+INSERT INTO test_table FORMAT JSONCompactStringsEachRowWithNamesAndTypes ["v4", "v2", "v3"]["UInt8", "UInt8", "UInt16"]["1", "2", "3"]
+SELECT * FROM test_table FORMAT JSONCompactStringsEachRowWithNamesAndTypes;
+SELECT 12;
+/* Check Nested */
+INSERT INTO test_table_2 FORMAT JSONCompactStringsEachRowWithNamesAndTypes ["v1", "n.id", "n.name"]["UInt8", "Array(UInt8)", "Array(String)"]["16", "[15, 16, 17]", "['first', 'second', 'third']"];
+SELECT * FROM test_table_2 FORMAT JSONCompactStringsEachRowWithNamesAndTypes;
+
+DROP TABLE IF EXISTS test_table;
+DROP TABLE IF EXISTS test_table_2;
diff --git a/tests/queries/0_stateless/01447_JSONStrings.reference b/tests/queries/0_stateless/01449_json_compact_strings.reference
similarity index 100%
rename from tests/queries/0_stateless/01447_JSONStrings.reference
rename to tests/queries/0_stateless/01449_json_compact_strings.reference
diff --git a/tests/queries/0_stateless/01449_json_compact_strings.sql b/tests/queries/0_stateless/01449_json_compact_strings.sql
new file mode 100644
index 00000000000..5b676e30347
--- /dev/null
+++ b/tests/queries/0_stateless/01449_json_compact_strings.sql
@@ -0,0 +1,10 @@
+SET output_format_write_statistics = 0;
+
+SELECT
+    1,
+    'a',
+    [1, 2, 3],
+    (1, 'a'),
+    null,
+    nan
+FORMAT JSONCompactStrings;

From b08056fa8c0f84670bab96b5643dd36850db0d8a Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 2 Sep 2020 11:18:50 +0300
Subject: [PATCH 033/298] Better selection of Merges with TTL

---
 src/Storages/MergeTree/MergeList.cpp          |  2 ++
 src/Storages/MergeTree/MergeList.h            |  2 ++
 .../MergeTree/MergeTreeDataMergerMutator.cpp  | 32 +++++++++++++++----
 .../MergeTree/MergeTreeDataMergerMutator.h    |  7 ++--
 src/Storages/MergeTree/MergeTreeSettings.h    |  4 ++-
 src/Storages/MergeTree/MergeType.cpp          | 27 ++++++++++++++++
 src/Storages/MergeTree/MergeType.h            | 17 ++++++++++
 .../MergeTree/ReplicatedMergeTreeLogEntry.cpp | 13 ++++++++
 .../MergeTree/ReplicatedMergeTreeLogEntry.h   |  2 ++
 .../MergeTree/ReplicatedMergeTreeQueue.cpp    | 11 +++++--
 .../MergeTree/ReplicatedMergeTreeQueue.h      |  9 +++++-
 src/Storages/StorageMergeTree.cpp             | 10 ++++--
 src/Storages/StorageReplicatedMergeTree.cpp   | 31 +++++++++++-------
 src/Storages/StorageReplicatedMergeTree.h     |  3 +-
 src/Storages/System/StorageSystemMerges.cpp   |  2 ++
 15 files changed, 145 insertions(+), 27 deletions(-)
 create mode 100644 src/Storages/MergeTree/MergeType.cpp
 create mode 100644 src/Storages/MergeTree/MergeType.h

diff --git a/src/Storages/MergeTree/MergeList.cpp b/src/Storages/MergeTree/MergeList.cpp
index e9d955f5395..5e7b7046c85 100644
--- a/src/Storages/MergeTree/MergeList.cpp
+++ b/src/Storages/MergeTree/MergeList.cpp
@@ -21,6 +21,7 @@ MergeListElement::MergeListElement(const std::string & database_, const std::str
     , result_data_version{future_part.part_info.getDataVersion()}
     , num_parts{future_part.parts.size()}
     , thread_id{getThreadId()}
+    , merge_type{toString(future_part.merge_type)}
 {
     for (const auto & source_part : future_part.parts)
     {
@@ -70,6 +71,7 @@ MergeInfo MergeListElement::getInfo() const
     res.columns_written = columns_written.load(std::memory_order_relaxed);
     res.memory_usage = memory_tracker.get();
     res.thread_id = thread_id;
+    res.merge_type = merge_type;
 
     for (const auto & source_part_name : source_part_names)
         res.source_part_names.emplace_back(source_part_name);
diff --git a/src/Storages/MergeTree/MergeList.h b/src/Storages/MergeTree/MergeList.h
index 4ee8a75a868..e6ae0407ec0 100644
--- a/src/Storages/MergeTree/MergeList.h
+++ b/src/Storages/MergeTree/MergeList.h
@@ -45,6 +45,7 @@ struct MergeInfo
     UInt64 columns_written;
     UInt64 memory_usage;
     UInt64 thread_id;
+    std::string merge_type;
 };
 
 struct FutureMergedMutatedPart;
@@ -88,6 +89,7 @@ struct MergeListElement : boost::noncopyable
 
     UInt64 thread_id;
 
+    const std::string merge_type;
 
     MergeListElement(const std::string & database, const std::string & table, const FutureMergedMutatedPart & future_part);
 
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index 673ad02bfb6..a0ab7866402 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -158,15 +158,15 @@ MergeTreeDataMergerMutator::MergeTreeDataMergerMutator(MergeTreeData & data_, si
 }
 
 
-UInt64 MergeTreeDataMergerMutator::getMaxSourcePartsSizeForMerge()
+UInt64 MergeTreeDataMergerMutator::getMaxSourcePartsSizeForMerge(MergeType merge_type)
 {
     size_t busy_threads_in_pool = CurrentMetrics::values[CurrentMetrics::BackgroundPoolTask].load(std::memory_order_relaxed);
 
-    return getMaxSourcePartsSizeForMerge(background_pool_size, busy_threads_in_pool == 0 ? 0 : busy_threads_in_pool - 1); /// 1 is current thread
+    return getMaxSourcePartsSizeForMerge(background_pool_size, busy_threads_in_pool == 0 ? 0 : busy_threads_in_pool - 1, merge_type); /// 1 is current thread
 }
 
 
-UInt64 MergeTreeDataMergerMutator::getMaxSourcePartsSizeForMerge(size_t pool_size, size_t pool_used)
+UInt64 MergeTreeDataMergerMutator::getMaxSourcePartsSizeForMerge(size_t pool_size, size_t pool_used, MergeType merge_type)
 {
     if (pool_used > pool_size)
         throw Exception("Logical error: invalid arguments passed to getMaxSourcePartsSize: pool_used > pool_size", ErrorCodes::LOGICAL_ERROR);
@@ -178,14 +178,21 @@ UInt64 MergeTreeDataMergerMutator::getMaxSourcePartsSizeForMerge(size_t pool_siz
     /// One entry is probably the entry where this function is executed.
     /// This will protect from bad settings.
 
+
+    size_t lowering_setting;
+    if (merge_type == MergeType::TTL_DELETE)
+        lowering_setting = data_settings->number_of_free_entries_in_pool_to_lower_max_size_of_merge_with_ttl;
+    else
+        lowering_setting = data_settings->number_of_free_entries_in_pool_to_lower_max_size_of_merge;
+
     UInt64 max_size = 0;
-    if (pool_used <= 1 || free_entries >= data_settings->number_of_free_entries_in_pool_to_lower_max_size_of_merge)
+    if (pool_used <= 1 || free_entries >= lowering_setting)
         max_size = data_settings->max_bytes_to_merge_at_max_space_in_pool;
     else
         max_size = interpolateExponential(
             data_settings->max_bytes_to_merge_at_min_space_in_pool,
             data_settings->max_bytes_to_merge_at_max_space_in_pool,
-            static_cast<double>(free_entries) / data_settings->number_of_free_entries_in_pool_to_lower_max_size_of_merge);
+            static_cast<double>(free_entries) / lowering_setting);
 
     return std::min(max_size, static_cast<UInt64>(data.getStoragePolicy()->getMaxUnreservedFreeSpace() / DISK_USAGE_COEFFICIENT_TO_SELECT));
 }
@@ -213,6 +220,7 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
     bool aggressive,
     size_t max_total_size_to_merge,
     const AllowedMergingPredicate & can_merge_callback,
+    size_t max_total_size_to_merge_with_ttl,
     String * out_disable_reason)
 {
     MergeTreeData::DataPartsVector data_parts = data.getDataPartsVector();
@@ -284,7 +292,9 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
                 current_time,
                 data_settings->merge_with_ttl_timeout,
                 data_settings->ttl_only_drop_parts);
-        parts_to_merge = merge_selector.select(partitions, max_total_size_to_merge);
+
+        parts_to_merge = merge_selector.select(partitions, max_total_size_to_merge_with_ttl);
+        future_part.merge_type = MergeType::TTL_DELETE;
     }
 
     if (parts_to_merge.empty())
@@ -306,6 +316,7 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
                 *out_disable_reason = "There is no need to merge parts according to merge selector algorithm";
             return false;
         }
+        future_part.merge_type = MergeType::NORMAL;
     }
 
     MergeTreeData::DataPartsVector parts;
@@ -385,6 +396,12 @@ bool MergeTreeDataMergerMutator::selectAllPartsToMergeWithinPartition(
 
     LOG_DEBUG(log, "Selected {} parts from {} to {}", parts.size(), parts.front()->name, parts.back()->name);
     future_part.assign(std::move(parts));
+
+    if (final)
+        future_part.merge_type = MergeType::FINAL;
+    else
+        future_part.merge_type = MergeType::NORMAL;
+
     available_disk_space -= required_disk_space;
     return true;
 }
@@ -634,6 +651,9 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
     new_data_part->partition.assign(future_part.getPartition());
     new_data_part->is_temp = true;
 
+    if (future_part.merge_type == MergeType::TTL_DELETE && ttl_merges_blocker.isCancelled())
+        throw Exception("Cancelled merging parts with expired TTL", ErrorCodes::ABORTED);
+
     bool need_remove_expired_values = false;
     for (const auto & part : parts)
         new_data_part->ttl_infos.update(part->ttl_infos);
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
index d5798fe3582..086a2a9cae2 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
@@ -6,6 +6,7 @@
 #include <functional>
 #include <Common/ActionBlocker.h>
 #include <Storages/MergeTree/TTLMergeSelector.h>
+#include <Storages/MergeTree/MergeType.h>
 
 
 namespace DB
@@ -22,6 +23,7 @@ struct FutureMergedMutatedPart
     MergeTreeDataPartType type;
     MergeTreePartInfo part_info;
     MergeTreeData::DataPartsVector parts;
+    MergeType merge_type = MergeType::NORMAL;
 
     const MergeTreePartition & getPartition() const { return parts.front()->partition; }
 
@@ -57,12 +59,12 @@ public:
     /** Get maximum total size of parts to do merge, at current moment of time.
       * It depends on number of free threads in background_pool and amount of free space in disk.
       */
-    UInt64 getMaxSourcePartsSizeForMerge();
+    UInt64 getMaxSourcePartsSizeForMerge(MergeType merge_type);
 
     /** For explicitly passed size of pool and number of used tasks.
       * This method could be used to calculate threshold depending on number of tasks in replication queue.
       */
-    UInt64 getMaxSourcePartsSizeForMerge(size_t pool_size, size_t pool_used);
+    UInt64 getMaxSourcePartsSizeForMerge(size_t pool_size, size_t pool_used, MergeType merge_type);
 
     /** Get maximum total size of parts to do mutation, at current moment of time.
       * It depends only on amount of free space in disk.
@@ -81,6 +83,7 @@ public:
         bool aggressive,
         size_t max_total_size_to_merge,
         const AllowedMergingPredicate & can_merge,
+        size_t max_total_size_to_merge_with_ttl,
         String * out_disable_reason = nullptr);
 
     /** Select all the parts in the specified partition for merge, if possible.
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 085c441aa90..e5707ff837c 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -33,8 +33,10 @@ struct Settings;
     M(UInt64, max_bytes_to_merge_at_min_space_in_pool, 1024 * 1024, "Maximum in total size of parts to merge, when there are minimum free threads in background pool (or entries in replication queue).", 0) \
     M(UInt64, max_replicated_merges_in_queue, 16, "How many tasks of merging and mutating parts are allowed simultaneously in ReplicatedMergeTree queue.", 0) \
     M(UInt64, max_replicated_mutations_in_queue, 8, "How many tasks of mutating parts are allowed simultaneously in ReplicatedMergeTree queue.", 0) \
+    M(UInt64, max_replicated_merges_with_ttl_in_queue, 1, "How many tasks of mutating parts are allowed simultaneously in ReplicatedMergeTree queue.", 0) \
     M(UInt64, number_of_free_entries_in_pool_to_lower_max_size_of_merge, 8, "When there is less than specified number of free entries in pool (or replicated queue), start to lower maximum size of merge to process (or to put in queue). This is to allow small merges to process - not filling the pool with long running merges.", 0) \
     M(UInt64, number_of_free_entries_in_pool_to_execute_mutation, 10, "When there is less than specified number of free entries in pool, do not execute part mutations. This is to leave free threads for regular merges and avoid \"Too many parts\"", 0) \
+    M(UInt64, number_of_free_entries_in_pool_to_lower_max_size_of_merge_with_ttl, 14, "When there is less than specified number of free entries in pool (or replicated queue), start to lower maximum size of merge to process (or to put in queue). This is to allow small merges to process - not filling the pool with long running merges.", 0) \
     M(Seconds, old_parts_lifetime, 8 * 60, "How many seconds to keep obsolete parts.", 0) \
     M(Seconds, temporary_directories_lifetime, 86400, "How many seconds to keep tmp_-directories.", 0) \
     M(Seconds, lock_acquire_timeout_for_background_operations, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "For background operations like merges, mutations etc. How many seconds before failing to acquire table locks.", 0) \
@@ -83,7 +85,7 @@ struct Settings;
     M(UInt64, min_merge_bytes_to_use_direct_io, 10ULL * 1024 * 1024 * 1024, "Minimal amount of bytes to enable O_DIRECT in merge (0 - disabled).", 0) \
     M(UInt64, index_granularity_bytes, 10 * 1024 * 1024, "Approximate amount of bytes in single granule (0 - disabled).", 0) \
     M(UInt64, min_index_granularity_bytes, 1024, "Minimum amount of bytes in single granule.", 1024) \
-    M(Int64, merge_with_ttl_timeout, 3600 * 24, "Minimal time in seconds, when merge with TTL can be repeated.", 0) \
+    M(Int64, merge_with_ttl_timeout, 0, "Minimal time in seconds, when merge with TTL can be repeated.", 0) \
     M(Bool, ttl_only_drop_parts, false, "Only drop altogether the expired parts and not partially prune them.", 0) \
     M(Bool, write_final_mark, 1, "Write final mark after end of column (0 - disabled, do nothing if index_granularity_bytes=0)", 0) \
     M(Bool, enable_mixed_granularity_parts, 1, "Enable parts with adaptive and non adaptive granularity", 0) \
diff --git a/src/Storages/MergeTree/MergeType.cpp b/src/Storages/MergeTree/MergeType.cpp
new file mode 100644
index 00000000000..b58a0de4093
--- /dev/null
+++ b/src/Storages/MergeTree/MergeType.cpp
@@ -0,0 +1,27 @@
+#include <Storages/MergeTree/MergeType.h>
+#include <Common/Exception.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int NOT_IMPLEMENTED;
+}
+
+String toString(MergeType merge_type)
+{
+    switch (merge_type)
+    {
+    case MergeType::NORMAL:
+        return "NORMAL";
+    case MergeType::FINAL:
+        return "FINAL";
+    case MergeType::TTL_DELETE:
+        return "TTL_DELETE";
+    }
+
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown MergeType {}", static_cast<UInt64>(merge_type));
+}
+
+}
diff --git a/src/Storages/MergeTree/MergeType.h b/src/Storages/MergeTree/MergeType.h
new file mode 100644
index 00000000000..5d9abaa61b3
--- /dev/null
+++ b/src/Storages/MergeTree/MergeType.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <Core/Types.h>
+
+namespace DB
+{
+
+enum class MergeType
+{
+    NORMAL,
+    FINAL,
+    TTL_DELETE,
+};
+
+String toString(MergeType merge_type);
+
+}
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp
index af6d980ad98..de8dd7f6097 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp
@@ -36,6 +36,8 @@ void ReplicatedMergeTreeLogEntryData::writeText(WriteBuffer & out) const
                 out << s << '\n';
             out << "into\n" << new_part_name;
             out << "\ndeduplicate: " << deduplicate;
+            if (merge_type != MergeType::NORMAL)
+                out <<"\nmerge_type: " << static_cast<UInt64>(merge_type);
             break;
 
         case DROP_RANGE:
@@ -149,7 +151,18 @@ void ReplicatedMergeTreeLogEntryData::readText(ReadBuffer & in)
         }
         in >> new_part_name;
         if (format_version >= 4)
+        {
             in >> "\ndeduplicate: " >> deduplicate;
+            in >> "\n";
+            if (in.eof())
+                trailing_newline_found = true;
+            else if (checkString("merge_type: ", in))
+            {
+                UInt64 value;
+                in >> value;
+                merge_type = static_cast<MergeType>(value);
+            }
+        }
     }
     else if (type_str == "drop" || type_str == "detach")
     {
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h
index ae5fad0b83c..bea796ce015 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h
@@ -5,6 +5,7 @@
 #include <Core/Types.h>
 #include <IO/WriteHelpers.h>
 #include <Storages/MergeTree/MergeTreeDataPartType.h>
+#include <Storages/MergeTree/MergeType.h>
 
 #include <mutex>
 #include <condition_variable>
@@ -79,6 +80,7 @@ struct ReplicatedMergeTreeLogEntryData
 
     Strings source_parts;
     bool deduplicate = false; /// Do deduplicate on merge
+    MergeType merge_type = MergeType::NORMAL;
     String column_name;
     String index_name;
 
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
index 8e2c3752212..c9b366a9ec8 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
@@ -1061,7 +1061,7 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
             return false;
         }
 
-        UInt64 max_source_parts_size = entry.type == LogEntry::MERGE_PARTS ? merger_mutator.getMaxSourcePartsSizeForMerge()
+        UInt64 max_source_parts_size = entry.type == LogEntry::MERGE_PARTS ? merger_mutator.getMaxSourcePartsSizeForMerge(entry.merge_type)
                                                                            : merger_mutator.getMaxSourcePartSizeForMutation();
         /** If there are enough free threads in background pool to do large merges (maximal size of merge is allowed),
           * then ignore value returned by getMaxSourcePartsSizeForMerge() and execute merge of any size,
@@ -1312,21 +1312,26 @@ bool ReplicatedMergeTreeQueue::processEntry(
 }
 
 
-std::pair<size_t, size_t> ReplicatedMergeTreeQueue::countMergesAndPartMutations() const
+ReplicatedMergeTreeQueue::OperationsInQueue ReplicatedMergeTreeQueue::countMergesAndPartMutations() const
 {
     std::lock_guard lock(state_mutex);
 
     size_t count_merges = 0;
     size_t count_mutations = 0;
+    size_t count_merges_with_ttl = 0;
     for (const auto & entry : queue)
     {
         if (entry->type == ReplicatedMergeTreeLogEntry::MERGE_PARTS)
+        {
             ++count_merges;
+            if (entry->merge_type == MergeType::TTL_DELETE)
+                ++count_merges_with_ttl;
+        }
         else if (entry->type == ReplicatedMergeTreeLogEntry::MUTATE_PART)
             ++count_mutations;
     }
 
-    return std::make_pair(count_merges, count_mutations);
+    return OperationsInQueue{count_merges, count_mutations, count_merges_with_ttl};
 }
 
 
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h
index 76f84da1ae8..c724701f1ff 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h
@@ -46,6 +46,13 @@ private:
         }
     };
 
+    struct OperationsInQueue
+    {
+        size_t merges = 0;
+        size_t mutations = 0;
+        size_t merges_with_ttl = 0;
+    };
+
     /// To calculate min_unprocessed_insert_time, max_processed_insert_time, for which the replica lag is calculated.
     using InsertsByTime = std::set<LogEntryPtr, ByTime>;
 
@@ -325,7 +332,7 @@ public:
     bool processEntry(std::function<zkutil::ZooKeeperPtr()> get_zookeeper, LogEntryPtr & entry, const std::function<bool(LogEntryPtr &)> func);
 
     /// Count the number of merges and mutations of single parts in the queue.
-    std::pair<size_t, size_t> countMergesAndPartMutations() const;
+    OperationsInQueue countMergesAndPartMutations() const;
 
     /// Count the total number of active mutations.
     size_t countMutations() const;
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 7e4318a32f6..05f2f5254f0 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -650,9 +650,14 @@ bool StorageMergeTree::merge(
 
         if (partition_id.empty())
         {
-            UInt64 max_source_parts_size = merger_mutator.getMaxSourcePartsSizeForMerge();
+            UInt64 max_source_parts_size = merger_mutator.getMaxSourcePartsSizeForMerge(MergeType::NORMAL);
+            UInt64 max_source_parts_size_with_ttl = 0;
+
+            if (!aggressive)
+                max_source_parts_size_with_ttl = merger_mutator.getMaxSourcePartsSizeForMerge(MergeType::TTL_DELETE);
+
             if (max_source_parts_size > 0)
-                selected = merger_mutator.selectPartsToMerge(future_part, aggressive, max_source_parts_size, can_merge, out_disable_reason);
+                selected = merger_mutator.selectPartsToMerge(future_part, aggressive, max_source_parts_size, can_merge, max_source_parts_size_with_ttl, out_disable_reason);
             else if (out_disable_reason)
                 *out_disable_reason = "Current value of max_source_parts_size is zero";
         }
@@ -724,6 +729,7 @@ bool StorageMergeTree::merge(
 
     try
     {
+        std::cerr << "FUTURE PART MERGE TYPE:" << toString(future_part.merge_type) << std::endl;
         new_part = merger_mutator.mergePartsToTemporaryPart(
             future_part, metadata_snapshot, *merge_entry, table_lock_holder, time(nullptr),
             merging_tagger->reserved_space, deduplicate);
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 6058632d220..1c880c8c790 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -2514,31 +2514,38 @@ void StorageReplicatedMergeTree::mergeSelectingTask()
         /// and in the same time, many small parts could be created and won't be merged.
 
         auto merges_and_mutations_queued = queue.countMergesAndPartMutations();
-        size_t merges_and_mutations_sum = merges_and_mutations_queued.first + merges_and_mutations_queued.second;
+        size_t merges_and_mutations_sum = merges_and_mutations_queued.merges + merges_and_mutations_queued.mutations;
         if (merges_and_mutations_sum >= storage_settings_ptr->max_replicated_merges_in_queue)
         {
             LOG_TRACE(log, "Number of queued merges ({}) and part mutations ({})"
                 " is greater than max_replicated_merges_in_queue ({}), so won't select new parts to merge or mutate.",
-                merges_and_mutations_queued.first,
-                merges_and_mutations_queued.second,
+                merges_and_mutations_queued.merges,
+                merges_and_mutations_queued.mutations,
                 storage_settings_ptr->max_replicated_merges_in_queue);
         }
         else
         {
             UInt64 max_source_parts_size_for_merge = merger_mutator.getMaxSourcePartsSizeForMerge(
-                storage_settings_ptr->max_replicated_merges_in_queue, merges_and_mutations_sum);
+                storage_settings_ptr->max_replicated_merges_in_queue, merges_and_mutations_sum, MergeType::NORMAL);
+
+            UInt64 max_source_parts_size_for_merge_with_ttl = 0;
+            if (merges_and_mutations_queued.merges_with_ttl < storage_settings_ptr->max_replicated_merges_with_ttl_in_queue)
+                max_source_parts_size_for_merge_with_ttl = merger_mutator.getMaxSourcePartsSizeForMerge(
+                storage_settings_ptr->max_replicated_merges_in_queue, merges_and_mutations_sum, MergeType::TTL_DELETE);
+
             UInt64 max_source_part_size_for_mutation = merger_mutator.getMaxSourcePartSizeForMutation();
 
             FutureMergedMutatedPart future_merged_part;
             if (max_source_parts_size_for_merge > 0 &&
-                merger_mutator.selectPartsToMerge(future_merged_part, false, max_source_parts_size_for_merge, merge_pred, nullptr))
+                merger_mutator.selectPartsToMerge(future_merged_part, false,
+                    max_source_parts_size_for_merge, merge_pred, max_source_parts_size_for_merge_with_ttl, nullptr))
             {
                 create_result = createLogEntryToMergeParts(zookeeper, future_merged_part.parts,
-                    future_merged_part.name, future_merged_part.type, deduplicate, nullptr, merge_pred.getVersion());
+                    future_merged_part.name, future_merged_part.type, deduplicate, nullptr, merge_pred.getVersion(), future_merged_part.merge_type);
             }
             /// If there are many mutations in queue, it may happen, that we cannot enqueue enough merges to merge all new parts
             else if (max_source_part_size_for_mutation > 0 && queue.countMutations() > 0
-                     && merges_and_mutations_queued.second < storage_settings_ptr->max_replicated_mutations_in_queue)
+                     && merges_and_mutations_queued.mutations < storage_settings_ptr->max_replicated_mutations_in_queue)
             {
                 /// Choose a part to mutate.
                 DataPartsVector data_parts = getDataPartsVector();
@@ -2617,7 +2624,8 @@ StorageReplicatedMergeTree::CreateMergeEntryResult StorageReplicatedMergeTree::c
     const MergeTreeDataPartType & merged_part_type,
     bool deduplicate,
     ReplicatedMergeTreeLogEntryData * out_log_entry,
-    int32_t log_version)
+    int32_t log_version,
+    MergeType merge_type)
 {
     std::vector<std::future<Coordination::ExistsResponse>> exists_futures;
     exists_futures.reserve(parts.size());
@@ -2649,6 +2657,7 @@ StorageReplicatedMergeTree::CreateMergeEntryResult StorageReplicatedMergeTree::c
     entry.source_replica = replica_name;
     entry.new_part_name = merged_name;
     entry.new_part_type = merged_part_type;
+    entry.merge_type = merge_type;
     entry.deduplicate = deduplicate;
     entry.create_time = time(nullptr);
 
@@ -3584,7 +3593,7 @@ bool StorageReplicatedMergeTree::optimize(
                     CreateMergeEntryResult create_result = createLogEntryToMergeParts(
                         zookeeper, future_merged_part.parts,
                         future_merged_part.name, future_merged_part.type, deduplicate,
-                        &merge_entry, can_merge.getVersion());
+                        &merge_entry, can_merge.getVersion(), future_merged_part.merge_type);
 
                     if (create_result == CreateMergeEntryResult::MissingPart)
                         return handle_noop("Can't create merge queue node in ZooKeeper, because some parts are missing");
@@ -3614,7 +3623,7 @@ bool StorageReplicatedMergeTree::optimize(
                 if (!partition)
                 {
                     selected = merger_mutator.selectPartsToMerge(
-                        future_merged_part, true, storage_settings_ptr->max_bytes_to_merge_at_max_space_in_pool, can_merge, &disable_reason);
+                        future_merged_part, true, storage_settings_ptr->max_bytes_to_merge_at_max_space_in_pool, can_merge, 0, &disable_reason);
                 }
                 else
                 {
@@ -3639,7 +3648,7 @@ bool StorageReplicatedMergeTree::optimize(
                 CreateMergeEntryResult create_result = createLogEntryToMergeParts(
                     zookeeper, future_merged_part.parts,
                     future_merged_part.name, future_merged_part.type, deduplicate,
-                    &merge_entry, can_merge.getVersion());
+                    &merge_entry, can_merge.getVersion(), future_merged_part.merge_type);
 
                 if (create_result == CreateMergeEntryResult::MissingPart)
                     return handle_noop("Can't create merge queue node in ZooKeeper, because some parts are missing");
diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h
index e9395f20f3f..2bc9265331d 100644
--- a/src/Storages/StorageReplicatedMergeTree.h
+++ b/src/Storages/StorageReplicatedMergeTree.h
@@ -450,7 +450,8 @@ private:
         const MergeTreeDataPartType & merged_part_type,
         bool deduplicate,
         ReplicatedMergeTreeLogEntryData * out_log_entry,
-        int32_t log_version);
+        int32_t log_version,
+        MergeType merge_type);
 
     CreateMergeEntryResult createLogEntryToMutatePart(
         const IMergeTreeDataPart & part,
diff --git a/src/Storages/System/StorageSystemMerges.cpp b/src/Storages/System/StorageSystemMerges.cpp
index 39d22bd00ca..b3bd8f77a89 100644
--- a/src/Storages/System/StorageSystemMerges.cpp
+++ b/src/Storages/System/StorageSystemMerges.cpp
@@ -30,6 +30,7 @@ NamesAndTypesList StorageSystemMerges::getNamesAndTypes()
         {"columns_written", std::make_shared<DataTypeUInt64>()},
         {"memory_usage", std::make_shared<DataTypeUInt64>()},
         {"thread_id", std::make_shared<DataTypeUInt64>()},
+        {"merge_type", std::make_shared<DataTypeString>()},
     };
 }
 
@@ -65,6 +66,7 @@ void StorageSystemMerges::fillData(MutableColumns & res_columns, const Context &
         res_columns[i++]->insert(merge.columns_written);
         res_columns[i++]->insert(merge.memory_usage);
         res_columns[i++]->insert(merge.thread_id);
+        res_columns[i++]->insert(merge.merge_type);
     }
 }
 

From fbb37c37df6c428579130772151492209742008e Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 2 Sep 2020 11:28:46 +0300
Subject: [PATCH 034/298] Simplier interface

---
 src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp | 10 +++++-----
 src/Storages/MergeTree/MergeTreeDataMergerMutator.h   |  6 +++---
 src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp   |  2 +-
 src/Storages/StorageMergeTree.cpp                     |  4 ++--
 src/Storages/StorageReplicatedMergeTree.cpp           |  4 ++--
 5 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index a0ab7866402..31d566c4e0e 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -158,15 +158,15 @@ MergeTreeDataMergerMutator::MergeTreeDataMergerMutator(MergeTreeData & data_, si
 }
 
 
-UInt64 MergeTreeDataMergerMutator::getMaxSourcePartsSizeForMerge(MergeType merge_type)
+UInt64 MergeTreeDataMergerMutator::getMaxSourcePartsSizeForMerge(bool with_ttl) const
 {
     size_t busy_threads_in_pool = CurrentMetrics::values[CurrentMetrics::BackgroundPoolTask].load(std::memory_order_relaxed);
 
-    return getMaxSourcePartsSizeForMerge(background_pool_size, busy_threads_in_pool == 0 ? 0 : busy_threads_in_pool - 1, merge_type); /// 1 is current thread
+    return getMaxSourcePartsSizeForMerge(background_pool_size, busy_threads_in_pool == 0 ? 0 : busy_threads_in_pool - 1, with_ttl); /// 1 is current thread
 }
 
 
-UInt64 MergeTreeDataMergerMutator::getMaxSourcePartsSizeForMerge(size_t pool_size, size_t pool_used, MergeType merge_type)
+UInt64 MergeTreeDataMergerMutator::getMaxSourcePartsSizeForMerge(size_t pool_size, size_t pool_used, bool with_ttl) const
 {
     if (pool_used > pool_size)
         throw Exception("Logical error: invalid arguments passed to getMaxSourcePartsSize: pool_used > pool_size", ErrorCodes::LOGICAL_ERROR);
@@ -180,7 +180,7 @@ UInt64 MergeTreeDataMergerMutator::getMaxSourcePartsSizeForMerge(size_t pool_siz
 
 
     size_t lowering_setting;
-    if (merge_type == MergeType::TTL_DELETE)
+    if (with_ttl)
         lowering_setting = data_settings->number_of_free_entries_in_pool_to_lower_max_size_of_merge_with_ttl;
     else
         lowering_setting = data_settings->number_of_free_entries_in_pool_to_lower_max_size_of_merge;
@@ -198,7 +198,7 @@ UInt64 MergeTreeDataMergerMutator::getMaxSourcePartsSizeForMerge(size_t pool_siz
 }
 
 
-UInt64 MergeTreeDataMergerMutator::getMaxSourcePartSizeForMutation()
+UInt64 MergeTreeDataMergerMutator::getMaxSourcePartSizeForMutation() const
 {
     const auto data_settings = data.getSettings();
     size_t busy_threads_in_pool = CurrentMetrics::values[CurrentMetrics::BackgroundPoolTask].load(std::memory_order_relaxed);
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
index 086a2a9cae2..6b0e2e9be22 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
@@ -59,17 +59,17 @@ public:
     /** Get maximum total size of parts to do merge, at current moment of time.
       * It depends on number of free threads in background_pool and amount of free space in disk.
       */
-    UInt64 getMaxSourcePartsSizeForMerge(MergeType merge_type);
+    UInt64 getMaxSourcePartsSizeForMerge(bool with_ttl) const;
 
     /** For explicitly passed size of pool and number of used tasks.
       * This method could be used to calculate threshold depending on number of tasks in replication queue.
       */
-    UInt64 getMaxSourcePartsSizeForMerge(size_t pool_size, size_t pool_used, MergeType merge_type);
+    UInt64 getMaxSourcePartsSizeForMerge(size_t pool_size, size_t pool_used, bool with_ttl) const;
 
     /** Get maximum total size of parts to do mutation, at current moment of time.
       * It depends only on amount of free space in disk.
       */
-    UInt64 getMaxSourcePartSizeForMutation();
+    UInt64 getMaxSourcePartSizeForMutation() const;
 
     /** Selects which parts to merge. Uses a lot of heuristics.
       *
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
index c9b366a9ec8..d1b4217401c 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
@@ -1061,7 +1061,7 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
             return false;
         }
 
-        UInt64 max_source_parts_size = entry.type == LogEntry::MERGE_PARTS ? merger_mutator.getMaxSourcePartsSizeForMerge(entry.merge_type)
+        UInt64 max_source_parts_size = entry.type == LogEntry::MERGE_PARTS ? merger_mutator.getMaxSourcePartsSizeForMerge(entry.merge_type == MergeType::TTL_DELETE)
                                                                            : merger_mutator.getMaxSourcePartSizeForMutation();
         /** If there are enough free threads in background pool to do large merges (maximal size of merge is allowed),
           * then ignore value returned by getMaxSourcePartsSizeForMerge() and execute merge of any size,
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 05f2f5254f0..07e373ac93c 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -650,11 +650,11 @@ bool StorageMergeTree::merge(
 
         if (partition_id.empty())
         {
-            UInt64 max_source_parts_size = merger_mutator.getMaxSourcePartsSizeForMerge(MergeType::NORMAL);
+            UInt64 max_source_parts_size = merger_mutator.getMaxSourcePartsSizeForMerge(false);
             UInt64 max_source_parts_size_with_ttl = 0;
 
             if (!aggressive)
-                max_source_parts_size_with_ttl = merger_mutator.getMaxSourcePartsSizeForMerge(MergeType::TTL_DELETE);
+                max_source_parts_size_with_ttl = merger_mutator.getMaxSourcePartsSizeForMerge(true);
 
             if (max_source_parts_size > 0)
                 selected = merger_mutator.selectPartsToMerge(future_part, aggressive, max_source_parts_size, can_merge, max_source_parts_size_with_ttl, out_disable_reason);
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 1c880c8c790..e01926d39d1 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -2526,12 +2526,12 @@ void StorageReplicatedMergeTree::mergeSelectingTask()
         else
         {
             UInt64 max_source_parts_size_for_merge = merger_mutator.getMaxSourcePartsSizeForMerge(
-                storage_settings_ptr->max_replicated_merges_in_queue, merges_and_mutations_sum, MergeType::NORMAL);
+                storage_settings_ptr->max_replicated_merges_in_queue, merges_and_mutations_sum, false);
 
             UInt64 max_source_parts_size_for_merge_with_ttl = 0;
             if (merges_and_mutations_queued.merges_with_ttl < storage_settings_ptr->max_replicated_merges_with_ttl_in_queue)
                 max_source_parts_size_for_merge_with_ttl = merger_mutator.getMaxSourcePartsSizeForMerge(
-                storage_settings_ptr->max_replicated_merges_in_queue, merges_and_mutations_sum, MergeType::TTL_DELETE);
+                    storage_settings_ptr->max_replicated_merges_in_queue, merges_and_mutations_sum, true);
 
             UInt64 max_source_part_size_for_mutation = merger_mutator.getMaxSourcePartSizeForMutation();
 

From 06b38a4d44fc579e1635f201caf0babe1c7c74fe Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 2 Sep 2020 13:31:31 +0300
Subject: [PATCH 035/298] Remove cerr

---
 src/Interpreters/MutationsInterpreter.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index 3a397cb9b5a..ef95b25eb98 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -534,14 +534,14 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run)
             stages.emplace_back(context);
             for (const auto & column : unchanged_columns)
             {
-                std::cerr << "ADDING UNCHANGED COLUMN TO STAGE:" << column << std::endl;
+                //std::cerr << "ADDING UNCHANGED COLUMN TO STAGE:" << column << std::endl;
                 stages.back().column_to_updated.emplace(
                     column, std::make_shared<ASTIdentifier>(column));
-                std::cerr << "OUTPUT COLUMNS:" << stages.back().output_columns.size() << std::endl;
-                for (const auto & col : stages.back().output_columns)
-                {
-                    std::cerr << "OUTPUT COLUMN:" << col << std::endl;
-                }
+                //std::cerr << "OUTPUT COLUMNS:" << stages.back().output_columns.size() << std::endl;
+                //for (const auto & col : stages.back().output_columns)
+                //{
+                //    std::cerr << "OUTPUT COLUMN:" << col << std::endl;
+                //}
             }
         }
     }

From 128cb7ce22d9d763da462c5d41dbe90c237718f6 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 2 Sep 2020 15:16:12 +0300
Subject: [PATCH 036/298] Don't select already selected parts

---
 src/Storages/MergeTree/MergeSelector.h        | 14 ++---
 src/Storages/MergeTree/MergeTreeData.cpp      | 59 +++----------------
 src/Storages/MergeTree/MergeTreeData.h        |  3 -
 .../MergeTree/MergeTreeDataMergerMutator.cpp  | 28 ++++-----
 .../MergeTree/MergeTreeDataPartTTLInfo.cpp    | 28 +++++++++
 .../MergeTree/MergeTreeDataPartTTLInfo.h      | 15 +++--
 .../MergeTree/MergeTreePartsMover.cpp         |  5 +-
 src/Storages/MergeTree/MergeType.cpp          |  5 ++
 src/Storages/MergeTree/MergeType.h            |  2 +
 .../MergeTree/ReplicatedMergeTreeQueue.cpp    |  4 +-
 src/Storages/MergeTree/TTLMergeSelector.cpp   | 31 ++++++++--
 src/Storages/MergeTree/TTLMergeSelector.h     | 26 ++++++--
 src/Storages/TTLDescription.cpp               |  2 +-
 13 files changed, 123 insertions(+), 99 deletions(-)

diff --git a/src/Storages/MergeTree/MergeSelector.h b/src/Storages/MergeTree/MergeSelector.h
index ae2c48fced1..285dc1a3660 100644
--- a/src/Storages/MergeTree/MergeSelector.h
+++ b/src/Storages/MergeTree/MergeSelector.h
@@ -4,6 +4,8 @@
 #include <ctime>
 #include <vector>
 #include <functional>
+#include <Storages/MergeTree/MergeTreeDataPartTTLInfo.h>
+#include <Parsers/IAST_fwd.h>
 
 
 namespace DB
@@ -40,17 +42,9 @@ public:
         /// Opaque pointer to avoid dependencies (it is not possible to do forward declaration of typedef).
         const void * data;
 
-        /// Minimal time, when we need to delete some data from this part.
-        time_t min_delete_ttl;
+        MergeTreeDataPartTTLInfos ttl_infos;
 
-        /// Maximum time, when we will need to drop this part altogether because all rows in it are expired.
-        time_t max_delete_ttl;
-
-        /// Minimal time, when we need to recompress this part.
-        time_t min_recompress_ttl;
-
-        /// Maximum time, when we need to recompress this part.
-        time_t max_recompress_ttl;
+        ASTPtr compression_codec_desc;
     };
 
     /// Parts are belong to partitions. Only parts within same partition could be merged.
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 16a08b180f9..f535a040535 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -507,6 +507,7 @@ void MergeTreeData::checkTTLExpressions(const StorageInMemoryMetadata & new_meta
 
     if (new_table_ttl.definition_ast)
     {
+        std::cerr << "MOVE TTL SIZE:" << new_table_ttl.move_ttl.size() << std::endl;
         for (const auto & move_ttl : new_table_ttl.move_ttl)
         {
             if (!getDestinationForTTL(move_ttl))
@@ -2975,9 +2976,11 @@ ReservationPtr MergeTreeData::tryReserveSpacePreferringTTLRules(UInt64 expected_
 {
     expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size);
 
+    auto metadata_snapshot = getInMemoryMetadataPtr();
     ReservationPtr reservation;
 
-    auto ttl_entry = selectTTLEntryForTTLInfos(ttl_infos, time_of_move);
+    auto ttl_entry = selectTTLEntryForTTLInfos(metadata_snapshot->getMoveTTLs(), ttl_infos.moves_ttl, time_of_move, true);
+
     if (ttl_entry)
     {
         SpacePtr destination_ptr = getDestinationForTTL(*ttl_entry);
@@ -3031,64 +3034,16 @@ bool MergeTreeData::isPartInTTLDestination(const TTLDescription & ttl, const IMe
     return false;
 }
 
-std::optional<TTLDescription>
-MergeTreeData::selectTTLEntryForTTLInfos(const IMergeTreeDataPart::TTLInfos & ttl_infos, time_t time_of_move) const
-{
-    time_t max_max_ttl = 0;
-    TTLDescriptions::const_iterator best_entry_it;
-    auto metadata_snapshot = getInMemoryMetadataPtr();
-
-    const auto & move_ttl_entries = metadata_snapshot->getMoveTTLs();
-    for (auto ttl_entry_it = move_ttl_entries.begin(); ttl_entry_it != move_ttl_entries.end(); ++ttl_entry_it)
-    {
-        auto ttl_info_it = ttl_infos.moves_ttl.find(ttl_entry_it->result_column);
-        /// Prefer TTL rule which went into action last.
-        if (ttl_info_it != ttl_infos.moves_ttl.end()
-                && ttl_info_it->second.max <= time_of_move
-                && max_max_ttl <= ttl_info_it->second.max)
-        {
-            best_entry_it = ttl_entry_it;
-            max_max_ttl = ttl_info_it->second.max;
-        }
-    }
-
-    return max_max_ttl ? *best_entry_it : std::optional<TTLDescription>();
-}
-
-
 CompressionCodecPtr MergeTreeData::getCompressionCodecForPart(size_t part_size_compressed, const IMergeTreeDataPart::TTLInfos & ttl_infos, time_t current_time) const
 {
 
-    time_t max_max_ttl = 0;
-    TTLDescriptions::const_iterator best_entry_it;
     auto metadata_snapshot = getInMemoryMetadataPtr();
 
     const auto & recompression_ttl_entries = metadata_snapshot->getRecompressionTTLs();
-    //std::cerr << "RECOMPRESSION ENTRIES SIZE:" << recompression_ttl_entries.size() << std::endl;
-    for (auto ttl_entry_it = recompression_ttl_entries.begin(); ttl_entry_it != recompression_ttl_entries.end(); ++ttl_entry_it)
-    {
-        //std::cerr << "RECOMPRESSION TTL SIZE:" << ttl_infos.recompression_ttl.size() << std::endl;
-        auto ttl_info_it = ttl_infos.recompression_ttl.find(ttl_entry_it->result_column);
-        /// Prefer TTL rule which went into action last.
-        if (ttl_info_it != ttl_infos.recompression_ttl.end()
-                && ttl_info_it->second.max <= current_time
-                && max_max_ttl <= ttl_info_it->second.max)
-        {
-            best_entry_it = ttl_entry_it;
-            max_max_ttl = ttl_info_it->second.max;
-        }
-    }
+    auto best_ttl_entry = selectTTLEntryForTTLInfos(recompression_ttl_entries, ttl_infos.recompression_ttl, current_time, false);
 
-    if (max_max_ttl)
-    {
-        //std::cerr << "BEST ENTRY FOUND, MAX MAX:" << max_max_ttl << std::endl;
-        //std::cerr << "RECOMPRESSION IS NULLPTR:" << (best_entry_it->recompression_codec == nullptr) << std::endl;
-        return CompressionCodecFactory::instance().get(best_entry_it->recompression_codec, {});
-    }
-    //else
-    //{
-    //    std::cerr << "NOT FOUND NEW RECOMPRESSION\n";
-    //}
+    if (best_ttl_entry)
+        return CompressionCodecFactory::instance().get(best_ttl_entry->recompression_codec, {});
 
     return global_context.chooseCompressionCodec(
         part_size_compressed,
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index ab115927e1e..14cefe9af1d 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -667,9 +667,6 @@ public:
     ExpressionActionsPtr getPrimaryKeyAndSkipIndicesExpression(const StorageMetadataPtr & metadata_snapshot) const;
     ExpressionActionsPtr getSortingKeyAndSkipIndicesExpression(const StorageMetadataPtr & metadata_snapshot) const;
 
-    std::optional<TTLDescription> selectTTLEntryForTTLInfos(const IMergeTreeDataPart::TTLInfos & ttl_infos, time_t time_of_move) const;
-
-
     CompressionCodecPtr getCompressionCodecForPart(size_t part_size_compressed, const IMergeTreeDataPart::TTLInfos & ttl_infos, time_t current_time) const;
 
     /// Limiting parallel sends per one table, used in DataPartsExchange
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index ce860a5b590..11bc6bbd46d 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -225,6 +225,7 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
 {
     MergeTreeData::DataPartsVector data_parts = data.getDataPartsVector();
     const auto data_settings = data.getSettings();
+    auto metadata_snapshot = data.getInMemoryMetadataPtr();
 
     if (data_parts.empty())
     {
@@ -268,10 +269,8 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
         part_info.age = current_time - part->modification_time;
         part_info.level = part->info.level;
         part_info.data = &part;
-        part_info.min_delete_ttl = part->ttl_infos.part_min_ttl;
-        part_info.max_delete_ttl = part->ttl_infos.part_max_ttl;
-        part_info.min_recompress_ttl = part->ttl_infos.getMinRecompressionTTL();
-        part_info.max_recompress_ttl = part->ttl_infos.getMaxRecompressionTTL();
+        part_info.ttl_infos = part->ttl_infos;
+        part_info.compression_codec_desc = part->default_codec->getCodecDesc();
 
         partitions.back().emplace_back(part_info);
 
@@ -287,7 +286,7 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
 
     IMergeSelector::PartsInPartition parts_to_merge;
 
-    if (!ttl_merges_blocker.isCancelled())
+    if (!ttl_merges_blocker.isCancelled() && metadata_snapshot->hasAnyTTL())
     {
         TTLDeleteMergeSelector delete_ttl_selector(
                 next_ttl_merge_times_by_partition,
@@ -298,12 +297,13 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
         parts_to_merge = delete_ttl_selector.select(partitions, max_total_size_to_merge_with_ttl);
         if (!parts_to_merge.empty())
             future_part.merge_type = MergeType::TTL_DELETE;
-        else
+        else if (metadata_snapshot->hasAnyRecompressionTTL())
         {
             TTLRecompressMergeSelector recompress_ttl_selector(
                     next_ttl_merge_times_by_partition,
                     current_time,
-                    data_settings->merge_with_ttl_timeout);
+                    data_settings->merge_with_ttl_timeout,
+                    metadata_snapshot->getRecompressionTTLs());
 
             parts_to_merge = recompress_ttl_selector.select(partitions, max_total_size_to_merge_with_ttl);
             if (!parts_to_merge.empty())
@@ -665,7 +665,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
     new_data_part->partition.assign(future_part.getPartition());
     new_data_part->is_temp = true;
 
-    if (future_part.merge_type == MergeType::TTL_DELETE && ttl_merges_blocker.isCancelled())
+    if (isTTLMergeType(future_part.merge_type) && ttl_merges_blocker.isCancelled())
         throw Exception("Cancelled merging parts with expired TTL", ErrorCodes::ABORTED);
 
     bool need_remove_expired_values = false;
@@ -840,8 +840,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
     if (deduplicate)
         merged_stream = std::make_shared<DistinctSortedBlockInputStream>(merged_stream, sort_description, SizeLimits(), 0 /*limit_hint*/, Names());
 
-    if (need_remove_expired_values)
-        merged_stream = std::make_shared<TTLBlockInputStream>(merged_stream, data, metadata_snapshot, new_data_part, time_of_merge, false);
+    if (need_remove_expired_values || (future_part.merge_type == MergeType::FINAL && !ttl_merges_blocker.isCancelled()))
+        merged_stream = std::make_shared<TTLBlockInputStream>(merged_stream, data, metadata_snapshot, new_data_part, time_of_merge, future_part.merge_type == MergeType::FINAL);
 
 
     if (metadata_snapshot->hasSecondaryIndices())
@@ -1123,19 +1123,19 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
 
     if (in && shouldExecuteTTL(metadata_snapshot, in->getHeader().getNamesAndTypesList().getNames(), commands_for_part))
     {
-        std::cerr << "GOING TO MATERIALIZE TTL\n";
+        //std::cerr << "GOING TO MATERIALIZE TTL\n";
         need_remove_expired_values = true;
     }
     else
     {
-        std::cerr << "NOT GOING TO MATERIALIZE TTL\n";
-        std::cerr << "IN IS NULL:" << (in == nullptr) << std::endl;
+        //std::cerr << "NOT GOING TO MATERIALIZE TTL\n";
+        //std::cerr << "IN IS NULL:" << (in == nullptr) << std::endl;
     }
 
     /// All columns from part are changed and may be some more that were missing before in part
     if (!isWidePart(source_part) || (interpreter && interpreter->isAffectingAllColumns()))
     {
-        std::cerr << "MUTATING ALL PART COLUMNS\n";
+        //std::cerr << "MUTATING ALL PART COLUMNS\n";
         /// Note: this is done before creating input streams, because otherwise data.data_parts_mutex
         /// (which is locked in data.getTotalActiveSizeInBytes())
         /// (which is locked in shared mode when input streams are created) and when inserting new data
diff --git a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
index 0664d3c5df0..42fc4be0fa5 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
@@ -186,4 +186,32 @@ time_t MergeTreeDataPartTTLInfos::getMaxRecompressionTTL() const
     return max;
 }
 
+
+std::optional<TTLDescription> selectTTLEntryForTTLInfos(const TTLDescriptions & descriptions, const TTLInfoMap & ttl_info_map, time_t current_time, bool use_max)
+{
+    time_t best_ttl_time = 0;
+    TTLDescriptions::const_iterator best_entry_it;
+    for (auto ttl_entry_it = descriptions.begin(); ttl_entry_it != descriptions.end(); ++ttl_entry_it)
+    {
+        auto ttl_info_it = ttl_info_map.find(ttl_entry_it->result_column);
+        time_t ttl_time;
+
+        if (use_max)
+            ttl_time = ttl_info_it->second.max;
+        else
+            ttl_time = ttl_info_it->second.min;
+
+        /// Prefer TTL rule which went into action last.
+        if (ttl_info_it != ttl_info_map.end()
+                && ttl_time <= current_time
+                && best_ttl_time <= ttl_time)
+        {
+            best_entry_it = ttl_entry_it;
+            best_ttl_time = ttl_time;
+        }
+    }
+
+    return best_ttl_time ? *best_entry_it : std::optional<TTLDescription>();
+}
+
 }
diff --git a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
index 0f46b4f97e8..d0738053d1d 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
@@ -1,6 +1,7 @@
 #pragma once
 #include <IO/WriteBufferFromFile.h>
 #include <IO/ReadBufferFromFile.h>
+#include <Storages/TTLDescription.h>
 
 #include <map>
 
@@ -30,11 +31,13 @@ struct MergeTreeDataPartTTLInfo
     }
 };
 
+/// Order is important as it would be serialized and hashed for checksums
+using TTLInfoMap = std::map<String, MergeTreeDataPartTTLInfo>;
+
 /// PartTTLInfo for all columns and table with minimal ttl for whole part
 struct MergeTreeDataPartTTLInfos
 {
-    /// Order is important as it would be serialized and hashed for checksums
-    std::map<String, MergeTreeDataPartTTLInfo> columns_ttl;
+    TTLInfoMap columns_ttl;
     MergeTreeDataPartTTLInfo table_ttl;
 
     /// `part_min_ttl` and `part_max_ttl` are TTLs which are used for selecting parts
@@ -42,11 +45,9 @@ struct MergeTreeDataPartTTLInfos
     time_t part_min_ttl = 0;
     time_t part_max_ttl = 0;
 
-    /// Order is important as it would be serialized and hashed for checksums
-    std::map<String, MergeTreeDataPartTTLInfo> moves_ttl;
+    TTLInfoMap moves_ttl;
 
-    /// Order is important as it would be serialized and hashed for checksums
-    std::map<String, MergeTreeDataPartTTLInfo> recompression_ttl;
+    TTLInfoMap recompression_ttl;
 
     time_t getMinRecompressionTTL() const;
     time_t getMaxRecompressionTTL() const;
@@ -70,4 +71,6 @@ struct MergeTreeDataPartTTLInfos
     }
 };
 
+std::optional<TTLDescription> selectTTLEntryForTTLInfos(const TTLDescriptions & descriptions, const TTLInfoMap & ttl_info_map, time_t current_time, bool use_max);
+
 }
diff --git a/src/Storages/MergeTree/MergeTreePartsMover.cpp b/src/Storages/MergeTree/MergeTreePartsMover.cpp
index e84ff418bc3..92ea745c5df 100644
--- a/src/Storages/MergeTree/MergeTreePartsMover.cpp
+++ b/src/Storages/MergeTree/MergeTreePartsMover.cpp
@@ -121,6 +121,8 @@ bool MergeTreePartsMover::selectPartsForMove(
 
     time_t time_of_move = time(nullptr);
 
+    auto metadata_snapshot = data->getInMemoryMetadataPtr();
+
     for (const auto & part : data_parts)
     {
         String reason;
@@ -128,7 +130,8 @@ bool MergeTreePartsMover::selectPartsForMove(
         if (!can_move(part, &reason))
             continue;
 
-        auto ttl_entry = data->selectTTLEntryForTTLInfos(part->ttl_infos, time_of_move);
+        auto ttl_entry = selectTTLEntryForTTLInfos(metadata_snapshot->getMoveTTLs(), part->ttl_infos.moves_ttl, time_of_move, true);
+
         auto to_insert = need_to_move.find(part->volume->getDisk());
         ReservationPtr reservation;
         if (ttl_entry)
diff --git a/src/Storages/MergeTree/MergeType.cpp b/src/Storages/MergeTree/MergeType.cpp
index 69732877ad3..875a0a93f6b 100644
--- a/src/Storages/MergeTree/MergeType.cpp
+++ b/src/Storages/MergeTree/MergeType.cpp
@@ -26,4 +26,9 @@ String toString(MergeType merge_type)
     throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown MergeType {}", static_cast<UInt64>(merge_type));
 }
 
+bool isTTLMergeType(MergeType merge_type)
+{
+    return merge_type == MergeType::TTL_DELETE || merge_type == MergeType::TTL_RECOMPRESS;
+}
+
 }
diff --git a/src/Storages/MergeTree/MergeType.h b/src/Storages/MergeTree/MergeType.h
index 2bc88352bf2..0f4a0043c54 100644
--- a/src/Storages/MergeTree/MergeType.h
+++ b/src/Storages/MergeTree/MergeType.h
@@ -15,4 +15,6 @@ enum class MergeType
 
 String toString(MergeType merge_type);
 
+bool isTTLMergeType(MergeType merge_type);
+
 }
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
index d1b4217401c..206d8f93038 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
@@ -1061,7 +1061,7 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
             return false;
         }
 
-        UInt64 max_source_parts_size = entry.type == LogEntry::MERGE_PARTS ? merger_mutator.getMaxSourcePartsSizeForMerge(entry.merge_type == MergeType::TTL_DELETE)
+        UInt64 max_source_parts_size = entry.type == LogEntry::MERGE_PARTS ? merger_mutator.getMaxSourcePartsSizeForMerge(isTTLMergeType(entry.merge_type))
                                                                            : merger_mutator.getMaxSourcePartSizeForMutation();
         /** If there are enough free threads in background pool to do large merges (maximal size of merge is allowed),
           * then ignore value returned by getMaxSourcePartsSizeForMerge() and execute merge of any size,
@@ -1324,7 +1324,7 @@ ReplicatedMergeTreeQueue::OperationsInQueue ReplicatedMergeTreeQueue::countMerge
         if (entry->type == ReplicatedMergeTreeLogEntry::MERGE_PARTS)
         {
             ++count_merges;
-            if (entry->merge_type == MergeType::TTL_DELETE)
+            if (isTTLMergeType(entry->merge_type))
                 ++count_merges_with_ttl;
         }
         else if (entry->type == ReplicatedMergeTreeLogEntry::MUTATE_PART)
diff --git a/src/Storages/MergeTree/TTLMergeSelector.cpp b/src/Storages/MergeTree/TTLMergeSelector.cpp
index 5c2d22ab11c..1bc5d563936 100644
--- a/src/Storages/MergeTree/TTLMergeSelector.cpp
+++ b/src/Storages/MergeTree/TTLMergeSelector.cpp
@@ -1,5 +1,6 @@
 #include <Storages/MergeTree/TTLMergeSelector.h>
 #include <Storages/MergeTree/MergeTreeData.h>
+#include <Parsers/queryToString.h>
 
 #include <algorithm>
 #include <cmath>
@@ -39,7 +40,7 @@ IMergeSelector::PartsInPartition ITTLMergeSelector::select(
         {
             time_t ttl = getTTLForPart(*part_it);
 
-            if (ttl && (partition_to_merge_index == -1 || ttl < partition_to_merge_min_ttl))
+            if (ttl && !isTTLAlreadySatisfied(*part_it) && (partition_to_merge_index == -1 || ttl < partition_to_merge_min_ttl))
             {
                 partition_to_merge_min_ttl = ttl;
                 partition_to_merge_index = i;
@@ -59,7 +60,7 @@ IMergeSelector::PartsInPartition ITTLMergeSelector::select(
     {
         time_t ttl = getTTLForPart(*best_begin);
 
-        if (!ttl || ttl > current_time
+        if (!ttl || isTTLAlreadySatisfied(*best_begin) || ttl > current_time
             || (max_total_size_to_merge && total_size > max_total_size_to_merge))
         {
             ++best_begin;
@@ -77,7 +78,7 @@ IMergeSelector::PartsInPartition ITTLMergeSelector::select(
     {
         time_t ttl = getTTLForPart(*best_end);
 
-        if (!ttl || ttl > current_time
+        if (!ttl || isTTLAlreadySatisfied(*best_end) || ttl > current_time
             || (max_total_size_to_merge && total_size > max_total_size_to_merge))
             break;
 
@@ -93,12 +94,32 @@ IMergeSelector::PartsInPartition ITTLMergeSelector::select(
 
 time_t TTLDeleteMergeSelector::getTTLForPart(const IMergeSelector::Part & part) const
 {
-    return only_drop_parts ? part.max_delete_ttl : part.min_delete_ttl;
+    return only_drop_parts ? part.ttl_infos.part_max_ttl : part.ttl_infos.part_min_ttl;
 }
 
 time_t TTLRecompressMergeSelector::getTTLForPart(const IMergeSelector::Part & part) const
 {
-    return part.min_recompress_ttl;
+    return part.ttl_infos.getMinRecompressionTTL();
+}
+
+bool TTLRecompressMergeSelector::isTTLAlreadySatisfied(const IMergeSelector::Part & part) const
+{
+    if (recompression_ttls.empty())
+        return false;
+
+    auto ttl_description = selectTTLEntryForTTLInfos(recompression_ttls, part.ttl_infos.recompression_ttl, current_time, false);
+
+    if (!ttl_description)
+        return true;
+
+    auto ast_to_str = [](ASTPtr query) -> String
+    {
+        if (!query)
+            return "";
+        return queryToString(query);
+    };
+
+    return ast_to_str(ttl_description->recompression_codec) == ast_to_str(part.compression_codec_desc);
 }
 
 }
diff --git a/src/Storages/MergeTree/TTLMergeSelector.h b/src/Storages/MergeTree/TTLMergeSelector.h
index a7380aa87c9..de4cbc11a57 100644
--- a/src/Storages/MergeTree/TTLMergeSelector.h
+++ b/src/Storages/MergeTree/TTLMergeSelector.h
@@ -2,6 +2,7 @@
 
 #include <Core/Types.h>
 #include <Storages/MergeTree/MergeSelector.h>
+#include <Storages/TTLDescription.h>
 
 #include <map>
 
@@ -21,9 +22,9 @@ public:
     using PartitionIdToTTLs = std::map<String, time_t>;
 
     ITTLMergeSelector(PartitionIdToTTLs & merge_due_times_, time_t current_time_, Int64 merge_cooldown_time_)
-        : merge_due_times(merge_due_times_),
-          current_time(current_time_),
-          merge_cooldown_time(merge_cooldown_time_)
+        : current_time(current_time_)
+        , merge_due_times(merge_due_times_)
+        , merge_cooldown_time(merge_cooldown_time_)
     {
     }
 
@@ -32,10 +33,13 @@ public:
         const size_t max_total_size_to_merge) override;
 
     virtual time_t getTTLForPart(const IMergeSelector::Part & part) const = 0;
+    virtual bool isTTLAlreadySatisfied(const IMergeSelector::Part & part) const = 0;
+
+protected:
+    time_t current_time;
 
 private:
     PartitionIdToTTLs & merge_due_times;
-    time_t current_time;
     Int64 merge_cooldown_time;
 };
 
@@ -51,6 +55,11 @@ public:
 
     time_t getTTLForPart(const IMergeSelector::Part & part) const override;
 
+    bool isTTLAlreadySatisfied(const IMergeSelector::Part &) const override
+    {
+        return false;
+    }
+
 private:
     bool only_drop_parts;
 };
@@ -58,9 +67,16 @@ private:
 class TTLRecompressMergeSelector : public ITTLMergeSelector
 {
 public:
-    using ITTLMergeSelector::ITTLMergeSelector;
+    TTLRecompressMergeSelector(PartitionIdToTTLs & merge_due_times_, time_t current_time_, Int64 merge_cooldown_time_, const TTLDescriptions & recompression_ttls_)
+        : ITTLMergeSelector(merge_due_times_, current_time_, merge_cooldown_time_)
+        , recompression_ttls(recompression_ttls_)
+    {}
 
     time_t getTTLForPart(const IMergeSelector::Part & part) const override;
+
+    bool isTTLAlreadySatisfied(const IMergeSelector::Part & part) const override;
+private:
+    TTLDescriptions recompression_ttls;
 };
 
 }
diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp
index bc634fe67bd..07173d61ece 100644
--- a/src/Storages/TTLDescription.cpp
+++ b/src/Storages/TTLDescription.cpp
@@ -328,7 +328,7 @@ TTLTableDescription TTLTableDescription::getTTLForTableFromAST(
     for (const auto & ttl_element_ptr : definition_ast->children)
     {
         auto ttl = TTLDescription::getTTLFromAST(ttl_element_ptr, columns, context, primary_key);
-        if (ttl.mode == TTLMode::DELETE)
+        if (ttl.mode == TTLMode::DELETE || ttl.mode == TTLMode::GROUP_BY)
         {
             if (seen_delete_ttl)
                 throw Exception("More than one DELETE TTL expression is not allowed", ErrorCodes::BAD_TTL_EXPRESSION);

From dcbddbb8d04e7c9f16bc7555ac37a5fb0a60a382 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 2 Sep 2020 19:15:41 +0300
Subject: [PATCH 037/298] Add recompression ttls tests and fix bugs

---
 src/Storages/MergeTree/MergeTreeData.cpp      |   3 +-
 .../MergeTree/MergeTreeDataMergerMutator.cpp  |  21 +--
 .../MergeTree/MergeTreeDataPartTTLInfo.cpp    |  11 +-
 .../MergeTree/MergeTreeDataPartTTLInfo.h      |   2 +-
 .../MergeTree/MergeTreeDataWriter.cpp         |   1 -
 src/Storages/MergeTree/MergeTreeSettings.cpp  |  13 ++
 src/Storages/MergeTree/MergeTreeSettings.h    |   4 +-
 src/Storages/MergeTree/MergeType.cpp          |   2 -
 src/Storages/MergeTree/MergeType.h            |   1 -
 src/Storages/MergeTree/TTLMergeSelector.cpp   |   2 +
 .../MergeTree/registerStorageMergeTree.cpp    |   1 -
 src/Storages/StorageMergeTree.cpp             |   1 -
 src/Storages/TTLDescription.cpp               |   2 +-
 .../test_recompression_ttl/__init__.py        |   0
 .../configs/background_pool_config.xml        |   9 ++
 .../test_recompression_ttl/test.py            | 131 ++++++++++++++++++
 16 files changed, 179 insertions(+), 25 deletions(-)
 create mode 100644 tests/integration/test_recompression_ttl/__init__.py
 create mode 100644 tests/integration/test_recompression_ttl/configs/background_pool_config.xml
 create mode 100644 tests/integration/test_recompression_ttl/test.py

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index f535a040535..e5946619da5 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -507,7 +507,6 @@ void MergeTreeData::checkTTLExpressions(const StorageInMemoryMetadata & new_meta
 
     if (new_table_ttl.definition_ast)
     {
-        std::cerr << "MOVE TTL SIZE:" << new_table_ttl.move_ttl.size() << std::endl;
         for (const auto & move_ttl : new_table_ttl.move_ttl)
         {
             if (!getDestinationForTTL(move_ttl))
@@ -3040,6 +3039,8 @@ CompressionCodecPtr MergeTreeData::getCompressionCodecForPart(size_t part_size_c
     auto metadata_snapshot = getInMemoryMetadataPtr();
 
     const auto & recompression_ttl_entries = metadata_snapshot->getRecompressionTTLs();
+    LOG_DEBUG(log, "RECOMPRESSION ENTRIES SIZE {}", recompression_ttl_entries.size());
+    LOG_DEBUG(log, "TTL INFOS SIZE {}", ttl_infos.recompression_ttl.size());
     auto best_ttl_entry = selectTTLEntryForTTLInfos(recompression_ttl_entries, ttl_infos.recompression_ttl, current_time, false);
 
     if (best_ttl_entry)
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index 11bc6bbd46d..7c849e2a457 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -234,6 +234,7 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
         return false;
     }
 
+    //LOG_DEBUG(log, "SELECTING PARTS TO MERGE");
     time_t current_time = std::time(nullptr);
 
     IMergeSelector::Partitions partitions;
@@ -270,7 +271,7 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
         part_info.level = part->info.level;
         part_info.data = &part;
         part_info.ttl_infos = part->ttl_infos;
-        part_info.compression_codec_desc = part->default_codec->getCodecDesc();
+        part_info.compression_codec_desc = part->default_codec->getFullCodecDesc();
 
         partitions.back().emplace_back(part_info);
 
@@ -288,6 +289,8 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
 
     if (!ttl_merges_blocker.isCancelled() && metadata_snapshot->hasAnyTTL())
     {
+
+        //LOG_DEBUG(log, "SELECTING WITH TTL");
         TTLDeleteMergeSelector delete_ttl_selector(
                 next_ttl_merge_times_by_partition,
                 current_time,
@@ -299,6 +302,8 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
             future_part.merge_type = MergeType::TTL_DELETE;
         else if (metadata_snapshot->hasAnyRecompressionTTL())
         {
+
+            //LOG_DEBUG(log, "SELECTING WITH RECOMPRESSION");
             TTLRecompressMergeSelector recompress_ttl_selector(
                     next_ttl_merge_times_by_partition,
                     current_time,
@@ -307,7 +312,10 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
 
             parts_to_merge = recompress_ttl_selector.select(partitions, max_total_size_to_merge_with_ttl);
             if (!parts_to_merge.empty())
+            {
+                //LOG_DEBUG(log, "SELECTED PARTS: {}", parts_to_merge.size());
                 future_part.merge_type = MergeType::TTL_RECOMPRESS;
+            }
         }
     }
 
@@ -410,11 +418,7 @@ bool MergeTreeDataMergerMutator::selectAllPartsToMergeWithinPartition(
 
     LOG_DEBUG(log, "Selected {} parts from {} to {}", parts.size(), parts.front()->name, parts.back()->name);
     future_part.assign(std::move(parts));
-
-    if (final)
-        future_part.merge_type = MergeType::FINAL;
-    else
-        future_part.merge_type = MergeType::NORMAL;
+    future_part.merge_type = MergeType::NORMAL;
 
     available_disk_space -= required_disk_space;
     return true;
@@ -693,6 +697,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
     /// the order is reverse. This annoys TSan even though one lock is locked in shared mode and thus
     /// deadlock is impossible.
     auto compression_codec = data.getCompressionCodecForPart(merge_entry->total_size_bytes_compressed, new_data_part->ttl_infos, time_of_merge);
+    LOG_DEBUG(log, "CHOOSEN CODEC {} FOR PART {}", queryToString(compression_codec->getCodecDesc()), new_data_part->name);
 
     /// TODO: Should it go through IDisk interface?
     String rows_sources_file_path;
@@ -840,8 +845,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
     if (deduplicate)
         merged_stream = std::make_shared<DistinctSortedBlockInputStream>(merged_stream, sort_description, SizeLimits(), 0 /*limit_hint*/, Names());
 
-    if (need_remove_expired_values || (future_part.merge_type == MergeType::FINAL && !ttl_merges_blocker.isCancelled()))
-        merged_stream = std::make_shared<TTLBlockInputStream>(merged_stream, data, metadata_snapshot, new_data_part, time_of_merge, future_part.merge_type == MergeType::FINAL);
+    if (need_remove_expired_values)
+        merged_stream = std::make_shared<TTLBlockInputStream>(merged_stream, data, metadata_snapshot, new_data_part, time_of_merge, false);
 
 
     if (metadata_snapshot->hasSecondaryIndices())
diff --git a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
index 42fc4be0fa5..1cecb2672fb 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
@@ -73,15 +73,14 @@ void MergeTreeDataPartTTLInfos::read(ReadBuffer & in)
     }
     if (json.has("recompression"))
     {
-        const JSON & moves = json["recompression"];
-        for (auto move : moves) // NOLINT
+        const JSON & recompressions = json["recompression"];
+        for (auto recompression : recompressions) // NOLINT
         {
             MergeTreeDataPartTTLInfo ttl_info;
-            ttl_info.min = move["min"].getUInt();
-            ttl_info.max = move["max"].getUInt();
-            String expression = move["expression"].getString();
+            ttl_info.min = recompression["min"].getUInt();
+            ttl_info.max = recompression["max"].getUInt();
+            String expression = recompression["expression"].getString();
             recompression_ttl.emplace(expression, ttl_info);
-            updatePartMinMaxTTL(ttl_info.min, ttl_info.max);
         }
     }
 }
diff --git a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
index d0738053d1d..1176c036b8c 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
@@ -67,7 +67,7 @@ struct MergeTreeDataPartTTLInfos
 
     bool empty()
     {
-        return !part_min_ttl && moves_ttl.empty();
+        return !part_min_ttl && moves_ttl.empty() && recompression_ttl.empty();
     }
 };
 
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index 5115666066a..607535225a2 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -309,7 +309,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
     /// This effectively chooses minimal compression method:
     ///  either default lz4 or compression method with zero thresholds on absolute and relative part size.
     auto compression_codec = data.getCompressionCodecForPart(0, new_data_part->ttl_infos, current_time);
-    std::cerr << "SELECTED CODEC:" << queryToString(compression_codec->getCodecDesc()) << std::endl;
 
     const auto & index_factory = MergeTreeIndexFactory::instance();
     MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, index_factory.getMany(metadata_snapshot->getSecondaryIndices()), compression_codec);
diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp
index 7f537ec330a..337cad224a4 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.cpp
+++ b/src/Storages/MergeTree/MergeTreeSettings.cpp
@@ -100,6 +100,19 @@ void MergeTreeSettings::sanityCheck(const Settings & query_settings) const
             number_of_free_entries_in_pool_to_lower_max_size_of_merge,
             query_settings.background_pool_size);
     }
+
+    if (number_of_free_entries_in_pool_to_lower_max_size_of_merge_with_ttl >= query_settings.background_pool_size)
+    {
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of 'number_of_free_entries_in_pool_to_lower_max_size_of_merge_with_ttl' setting"
+            " ({}) (default values are defined in <merge_tree> section of config.xml"
+            " or the value can be specified per table in SETTINGS section of CREATE TABLE query)"
+            " is greater or equals to the value of 'background_pool_size'"
+            " ({}) (the value is defined in users.xml for default profile)."
+            " This indicates incorrect configuration because the maximum size of merge with TTL will be always lowered.",
+            number_of_free_entries_in_pool_to_lower_max_size_of_merge_with_ttl,
+            query_settings.background_pool_size);
+    }
+
 }
 
 }
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index e5707ff837c..f2235bf94aa 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -33,10 +33,10 @@ struct Settings;
     M(UInt64, max_bytes_to_merge_at_min_space_in_pool, 1024 * 1024, "Maximum in total size of parts to merge, when there are minimum free threads in background pool (or entries in replication queue).", 0) \
     M(UInt64, max_replicated_merges_in_queue, 16, "How many tasks of merging and mutating parts are allowed simultaneously in ReplicatedMergeTree queue.", 0) \
     M(UInt64, max_replicated_mutations_in_queue, 8, "How many tasks of mutating parts are allowed simultaneously in ReplicatedMergeTree queue.", 0) \
-    M(UInt64, max_replicated_merges_with_ttl_in_queue, 1, "How many tasks of mutating parts are allowed simultaneously in ReplicatedMergeTree queue.", 0) \
+    M(UInt64, max_replicated_merges_with_ttl_in_queue, 1, "How many tasks of merging parts with TTL are allowed simultaneously in ReplicatedMergeTree queue.", 0) \
     M(UInt64, number_of_free_entries_in_pool_to_lower_max_size_of_merge, 8, "When there is less than specified number of free entries in pool (or replicated queue), start to lower maximum size of merge to process (or to put in queue). This is to allow small merges to process - not filling the pool with long running merges.", 0) \
     M(UInt64, number_of_free_entries_in_pool_to_execute_mutation, 10, "When there is less than specified number of free entries in pool, do not execute part mutations. This is to leave free threads for regular merges and avoid \"Too many parts\"", 0) \
-    M(UInt64, number_of_free_entries_in_pool_to_lower_max_size_of_merge_with_ttl, 14, "When there is less than specified number of free entries in pool (or replicated queue), start to lower maximum size of merge to process (or to put in queue). This is to allow small merges to process - not filling the pool with long running merges.", 0) \
+    M(UInt64, number_of_free_entries_in_pool_to_lower_max_size_of_merge_with_ttl, 14, "When there is less than specified number of free entries in pool (or replicated queue), start to lower maximum size of merge to process (or to put in queue). This is to allow small merges to process - not filling the pool with long running TTL merges.", 0) \
     M(Seconds, old_parts_lifetime, 8 * 60, "How many seconds to keep obsolete parts.", 0) \
     M(Seconds, temporary_directories_lifetime, 86400, "How many seconds to keep tmp_-directories.", 0) \
     M(Seconds, lock_acquire_timeout_for_background_operations, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "For background operations like merges, mutations etc. How many seconds before failing to acquire table locks.", 0) \
diff --git a/src/Storages/MergeTree/MergeType.cpp b/src/Storages/MergeTree/MergeType.cpp
index 875a0a93f6b..b521d835b36 100644
--- a/src/Storages/MergeTree/MergeType.cpp
+++ b/src/Storages/MergeTree/MergeType.cpp
@@ -15,8 +15,6 @@ String toString(MergeType merge_type)
     {
     case MergeType::NORMAL:
         return "NORMAL";
-    case MergeType::FINAL:
-        return "FINAL";
     case MergeType::TTL_DELETE:
         return "TTL_DELETE";
     case MergeType::TTL_RECOMPRESS:
diff --git a/src/Storages/MergeTree/MergeType.h b/src/Storages/MergeTree/MergeType.h
index 0f4a0043c54..26fb172f463 100644
--- a/src/Storages/MergeTree/MergeType.h
+++ b/src/Storages/MergeTree/MergeType.h
@@ -8,7 +8,6 @@ namespace DB
 enum class MergeType
 {
     NORMAL,
-    FINAL,
     TTL_DELETE,
     TTL_RECOMPRESS,
 };
diff --git a/src/Storages/MergeTree/TTLMergeSelector.cpp b/src/Storages/MergeTree/TTLMergeSelector.cpp
index 1bc5d563936..2e71f3c5401 100644
--- a/src/Storages/MergeTree/TTLMergeSelector.cpp
+++ b/src/Storages/MergeTree/TTLMergeSelector.cpp
@@ -118,6 +118,8 @@ bool TTLRecompressMergeSelector::isTTLAlreadySatisfied(const IMergeSelector::Par
             return "";
         return queryToString(query);
     };
+    //LOG_DEBUG(&Poco::Logger::get("RECOMPRESS SELECTOR"), "PART CODEC: {}", ast_to_str(part.compression_codec_desc));
+    //LOG_DEBUG(&Poco::Logger::get("RECOMPRESS SELECTOR"), "ENTRY CODEC: {}", ast_to_str(ttl_description->recompression_codec));
 
     return ast_to_str(ttl_description->recompression_codec) == ast_to_str(part.compression_codec_desc);
 }
diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp
index 8706c1f3b37..b0c422bd79f 100644
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@@ -559,7 +559,6 @@ static StoragePtr create(const StorageFactory::Arguments & args)
 
         if (args.storage_def->ttl_table)
         {
-            std::cerr << "Parsing table ttl in description\n";
             metadata.table_ttl = TTLTableDescription::getTTLForTableFromAST(
                 args.storage_def->ttl_table->ptr(), metadata.columns, args.context, metadata.primary_key);
         }
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 07e373ac93c..72dee939c9d 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -729,7 +729,6 @@ bool StorageMergeTree::merge(
 
     try
     {
-        std::cerr << "FUTURE PART MERGE TYPE:" << toString(future_part.merge_type) << std::endl;
         new_part = merger_mutator.mergePartsToTemporaryPart(
             future_part, metadata_snapshot, *merge_entry, table_lock_holder, time(nullptr),
             merging_tagger->reserved_space, deduplicate);
diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp
index 07173d61ece..8a212074027 100644
--- a/src/Storages/TTLDescription.cpp
+++ b/src/Storages/TTLDescription.cpp
@@ -273,7 +273,7 @@ TTLDescription TTLDescription::getTTLFromAST(
         }
         else if (ttl_element->mode == TTLMode::RECOMPRESS)
         {
-            std::cerr << "GOT INTO RECOMPRESS\n";
+            //std::cerr << "GOT INTO RECOMPRESS\n";
             result.recompression_codec =
                 CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(
                     ttl_element->recompression_codec, {}, !context.getSettingsRef().allow_suspicious_codecs);
diff --git a/tests/integration/test_recompression_ttl/__init__.py b/tests/integration/test_recompression_ttl/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_recompression_ttl/configs/background_pool_config.xml b/tests/integration/test_recompression_ttl/configs/background_pool_config.xml
new file mode 100644
index 00000000000..e62a0105907
--- /dev/null
+++ b/tests/integration/test_recompression_ttl/configs/background_pool_config.xml
@@ -0,0 +1,9 @@
+<yandex>
+    <background_processing_pool_thread_sleep_seconds>1</background_processing_pool_thread_sleep_seconds>
+    <background_processing_pool_thread_sleep_seconds_random_part>0</background_processing_pool_thread_sleep_seconds_random_part>
+    <background_processing_pool_thread_sleep_seconds_if_nothing_to_do>0.0</background_processing_pool_thread_sleep_seconds_if_nothing_to_do>
+    <background_processing_pool_task_sleep_seconds_when_no_work_min>0</background_processing_pool_task_sleep_seconds_when_no_work_min>
+    <background_processing_pool_task_sleep_seconds_when_no_work_max>1</background_processing_pool_task_sleep_seconds_when_no_work_max>
+    <background_processing_pool_task_sleep_seconds_when_no_work_multiplier>1</background_processing_pool_task_sleep_seconds_when_no_work_multiplier>
+    <background_processing_pool_task_sleep_seconds_when_no_work_random_part>0</background_processing_pool_task_sleep_seconds_when_no_work_random_part>
+</yandex>
diff --git a/tests/integration/test_recompression_ttl/test.py b/tests/integration/test_recompression_ttl/test.py
new file mode 100644
index 00000000000..2bf36d79f05
--- /dev/null
+++ b/tests/integration/test_recompression_ttl/test.py
@@ -0,0 +1,131 @@
+import time
+import pytest
+
+import helpers.client as client
+from helpers.cluster import ClickHouseCluster
+from helpers.test_tools import TSV
+
+
+cluster = ClickHouseCluster(__file__)
+node1 = cluster.add_instance('node1', main_configs=['configs/background_pool_config.xml'], with_zookeeper=True)
+node2 = cluster.add_instance('node2', main_configs=['configs/background_pool_config.xml'], with_zookeeper=True)
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+
+        yield cluster
+
+    except Exception as ex:
+        print ex
+
+    finally:
+        cluster.shutdown()
+
+
+def wait_part_in_parts(node, table, part_name, retries=40):
+    for i in range(retries):
+        result = node.query("SELECT name FROM system.parts where name = '{}' and table = '{}'".format(part_name, table))
+        if result:
+            return True
+        time.sleep(0.5)
+    else:
+        return False
+
+
+def optimize_final_table_until_success(node, table_name, retries=40):
+    for i in range(retries):
+        try:
+            node.query("OPTIMIZE TABLE {} FINAL".format(table_name), settings={"optimize_throw_if_noop": "1"})
+            return True
+        except:
+            time.sleep(0.5)
+    else:
+        return False
+
+
+def wait_part_and_get_compression_codec(node, table, part_name, retries=40):
+    if wait_part_in_parts(node, table, part_name, retries):
+        return node.query("SELECT default_compression_codec FROM system.parts where name = '{}' and table = '{}'".format(part_name, table)).strip()
+    return None
+
+
+def test_recompression_simple(started_cluster):
+    node1.query("CREATE TABLE table_for_recompression (d DateTime, key UInt64, data String) ENGINE MergeTree() ORDER BY tuple() TTL d + INTERVAL 10 SECOND RECOMPRESS CODEC(ZSTD(10))")
+    node1.query("INSERT INTO table_for_recompression VALUES (now(), 1, '1')")
+
+    assert node1.query("SELECT default_compression_codec FROM system.parts where name = 'all_1_1_0'") == "LZ4\n"
+
+    codec = wait_part_and_get_compression_codec(node1, "table_for_recompression", "all_1_1_1")
+    if not codec:
+        assert False, "Part all_1_1_1 doesn't appeared in system.parts"
+
+    assert codec == "ZSTD(10)"
+
+    if wait_part_in_parts(node1, "table_for_recompression", "all_1_1_2", retries=20):
+        assert False, "Redundant merge were assigned for part all_1_1_1 -> all_1_1_2"
+
+    optimize_final_table_until_success(node1, "table_for_recompression")
+
+    assert node1.query("SELECT default_compression_codec FROM system.parts where name = 'all_1_1_2'") == "ZSTD(10)\n"
+
+
+def test_recompression_multiple_ttls(started_cluster):
+    node2.query("CREATE TABLE table_for_recompression (d DateTime, key UInt64, data String) ENGINE MergeTree() ORDER BY tuple() \
+    TTL d + INTERVAL 5 SECOND RECOMPRESS CODEC(ZSTD(10)), \
+    d + INTERVAL 10 SECOND RECOMPRESS CODEC(ZSTD(11)), \
+    d + INTERVAL 15 SECOND RECOMPRESS CODEC(ZSTD(12))")
+
+    node2.query("INSERT INTO table_for_recompression VALUES (now(), 1, '1')")
+
+    assert node2.query("SELECT default_compression_codec FROM system.parts where name = 'all_1_1_0'") == "LZ4\n"
+
+    codec = wait_part_and_get_compression_codec(node2, "table_for_recompression", "all_1_1_1")
+    if not codec:
+        assert False, "Part all_1_1_1 doesn't appeared in system.parts"
+
+    assert codec == "ZSTD(10)"
+
+    codec = wait_part_and_get_compression_codec(node2, "table_for_recompression", "all_1_1_2")
+    if not codec:
+        assert False, "Part all_1_1_2 doesn't appeared in system.parts"
+
+    assert codec == "ZSTD(11)"
+
+    codec = wait_part_and_get_compression_codec(node2, "table_for_recompression", "all_1_1_3")
+    if not codec:
+        assert False, "Part all_1_1_3 doesn't appeared in system.parts"
+
+    assert codec == "ZSTD(12)"
+
+    if wait_part_in_parts(node2, "table_for_recompression", "all_1_1_4", retries=20):
+        assert False, "Redundant merge were assigned for part all_1_1_3 -> all_1_1_4"
+
+    optimize_final_table_until_success(node2, "table_for_recompression")
+
+    assert node2.query("SELECT default_compression_codec FROM system.parts where name = 'all_1_1_4'") == "ZSTD(12)\n"
+
+
+def test_recompression_replicated(started_cluster):
+    for i, node in enumerate([node1, node2]):
+        node.query("CREATE TABLE recompression_replicated (d DateTime, key UInt64, data String) \
+        ENGINE ReplicatedMergeTree('/test/rr', '{}') ORDER BY tuple() \
+        TTL d + INTERVAL 10 SECOND RECOMPRESS CODEC(ZSTD(13))".format(i + 1))
+
+    node1.query("INSERT INTO recompression_replicated VALUES (now(), 1, '1')")
+    node2.query("SYSTEM SYNC REPLICA recompression_replicated", timeout=5)
+
+    assert node1.query("SELECT default_compression_codec FROM system.parts where name = 'all_0_0_0' and table = 'recompression_replicated'") == "LZ4\n"
+    assert node2.query("SELECT default_compression_codec FROM system.parts where name = 'all_0_0_0' and table = 'recompression_replicated'") == "LZ4\n"
+
+    codec1 = wait_part_and_get_compression_codec(node1, "recompression_replicated", "all_0_0_1")
+    if not codec1:
+        assert False, "Part all_0_0_1 doesn't appeared in system.parts on node1"
+
+    codec2 = wait_part_and_get_compression_codec(node2, "recompression_replicated", "all_0_0_1")
+    if not codec2:
+        assert False, "Part all_0_0_1 doesn't appeared in system.parts on node2"
+
+    assert codec1 == "ZSTD(13)"
+    assert codec2 == "ZSTD(13)"

From 672f239f5b57ce073e1f8a1417152dbc14606251 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 3 Sep 2020 10:34:53 +0300
Subject: [PATCH 038/298] Add settings to test

---
 tests/integration/test_recompression_ttl/test.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/integration/test_recompression_ttl/test.py b/tests/integration/test_recompression_ttl/test.py
index 2bf36d79f05..5ea0f91d495 100644
--- a/tests/integration/test_recompression_ttl/test.py
+++ b/tests/integration/test_recompression_ttl/test.py
@@ -52,7 +52,7 @@ def wait_part_and_get_compression_codec(node, table, part_name, retries=40):
 
 
 def test_recompression_simple(started_cluster):
-    node1.query("CREATE TABLE table_for_recompression (d DateTime, key UInt64, data String) ENGINE MergeTree() ORDER BY tuple() TTL d + INTERVAL 10 SECOND RECOMPRESS CODEC(ZSTD(10))")
+    node1.query("CREATE TABLE table_for_recompression (d DateTime, key UInt64, data String) ENGINE MergeTree() ORDER BY tuple() TTL d + INTERVAL 10 SECOND RECOMPRESS CODEC(ZSTD(10)) SETTINGS merge_with_ttl_timeout = 0")
     node1.query("INSERT INTO table_for_recompression VALUES (now(), 1, '1')")
 
     assert node1.query("SELECT default_compression_codec FROM system.parts where name = 'all_1_1_0'") == "LZ4\n"
@@ -75,7 +75,7 @@ def test_recompression_multiple_ttls(started_cluster):
     node2.query("CREATE TABLE table_for_recompression (d DateTime, key UInt64, data String) ENGINE MergeTree() ORDER BY tuple() \
     TTL d + INTERVAL 5 SECOND RECOMPRESS CODEC(ZSTD(10)), \
     d + INTERVAL 10 SECOND RECOMPRESS CODEC(ZSTD(11)), \
-    d + INTERVAL 15 SECOND RECOMPRESS CODEC(ZSTD(12))")
+    d + INTERVAL 15 SECOND RECOMPRESS CODEC(ZSTD(12)) SETTINGS merge_with_ttl_timeout = 0")
 
     node2.query("INSERT INTO table_for_recompression VALUES (now(), 1, '1')")
 
@@ -111,7 +111,7 @@ def test_recompression_replicated(started_cluster):
     for i, node in enumerate([node1, node2]):
         node.query("CREATE TABLE recompression_replicated (d DateTime, key UInt64, data String) \
         ENGINE ReplicatedMergeTree('/test/rr', '{}') ORDER BY tuple() \
-        TTL d + INTERVAL 10 SECOND RECOMPRESS CODEC(ZSTD(13))".format(i + 1))
+        TTL d + INTERVAL 10 SECOND RECOMPRESS CODEC(ZSTD(13)) SETTINGS merge_with_ttl_timeout = 0".format(i + 1))
 
     node1.query("INSERT INTO recompression_replicated VALUES (now(), 1, '1')")
     node2.query("SYSTEM SYNC REPLICA recompression_replicated", timeout=5)

From f4c7ff03766ebf9f1f480ab2e8583db2b491f9b7 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 3 Sep 2020 16:00:13 +0300
Subject: [PATCH 039/298] Add fixed size of Merge TTLS

---
 src/Storages/MergeTree/MergeList.cpp          |  6 ++-
 src/Storages/MergeTree/MergeList.h            |  4 +-
 .../MergeTree/MergeTreeDataMergerMutator.cpp  | 25 ++++++++++-
 .../MergeTree/MergeTreeDataMergerMutator.h    | 10 +++++
 src/Storages/MergeTree/MergeTreeSettings.cpp  | 13 ++++++
 src/Storages/MergeTree/MergeTreeSettings.h    |  2 +
 src/Storages/MergeTree/MergeType.cpp          | 41 +++++++++++++++++++
 src/Storages/MergeTree/MergeType.h            | 20 +++++++++
 .../MergeTree/ReplicatedMergeTreeLogEntry.cpp | 14 +++++++
 .../MergeTree/ReplicatedMergeTreeLogEntry.h   |  2 +
 .../MergeTree/ReplicatedMergeTreeQueue.cpp    |  9 +++-
 .../MergeTree/ReplicatedMergeTreeQueue.h      |  8 +++-
 src/Storages/StorageMergeTree.cpp             | 15 ++++++-
 src/Storages/StorageReplicatedMergeTree.cpp   | 26 +++++++-----
 src/Storages/StorageReplicatedMergeTree.h     |  3 +-
 src/Storages/System/StorageSystemMerges.cpp   |  5 +++
 .../System/StorageSystemReplicationQueue.cpp  |  6 +++
 17 files changed, 191 insertions(+), 18 deletions(-)
 create mode 100644 src/Storages/MergeTree/MergeType.cpp
 create mode 100644 src/Storages/MergeTree/MergeType.h

diff --git a/src/Storages/MergeTree/MergeList.cpp b/src/Storages/MergeTree/MergeList.cpp
index e9d955f5395..30324bd5d9e 100644
--- a/src/Storages/MergeTree/MergeList.cpp
+++ b/src/Storages/MergeTree/MergeList.cpp
@@ -15,12 +15,15 @@ namespace DB
 {
 
 MergeListElement::MergeListElement(const std::string & database_, const std::string & table_, const FutureMergedMutatedPart & future_part)
-    : database{database_}, table{table_}, partition_id{future_part.part_info.partition_id}
+    : database{database_}
+    , table{table_}
+    , partition_id{future_part.part_info.partition_id}
     , result_part_name{future_part.name}
     , result_part_path{future_part.path}
     , result_data_version{future_part.part_info.getDataVersion()}
     , num_parts{future_part.parts.size()}
     , thread_id{getThreadId()}
+    , merge_type{future_part.merge_type}
 {
     for (const auto & source_part : future_part.parts)
     {
@@ -70,6 +73,7 @@ MergeInfo MergeListElement::getInfo() const
     res.columns_written = columns_written.load(std::memory_order_relaxed);
     res.memory_usage = memory_tracker.get();
     res.thread_id = thread_id;
+    res.merge_type = toString(merge_type);
 
     for (const auto & source_part_name : source_part_names)
         res.source_part_names.emplace_back(source_part_name);
diff --git a/src/Storages/MergeTree/MergeList.h b/src/Storages/MergeTree/MergeList.h
index 4ee8a75a868..0b41745a9ba 100644
--- a/src/Storages/MergeTree/MergeList.h
+++ b/src/Storages/MergeTree/MergeList.h
@@ -4,6 +4,7 @@
 #include <Common/CurrentMetrics.h>
 #include <Common/MemoryTracker.h>
 #include <Storages/MergeTree/MergeTreeData.h>
+#include <Storages/MergeTree/MergeType.h>
 #include <memory>
 #include <list>
 #include <mutex>
@@ -45,6 +46,7 @@ struct MergeInfo
     UInt64 columns_written;
     UInt64 memory_usage;
     UInt64 thread_id;
+    std::string merge_type;
 };
 
 struct FutureMergedMutatedPart;
@@ -87,7 +89,7 @@ struct MergeListElement : boost::noncopyable
     MemoryTracker * background_thread_memory_tracker_prev_parent = nullptr;
 
     UInt64 thread_id;
-
+    MergeType merge_type;
 
     MergeListElement(const std::string & database, const std::string & table, const FutureMergedMutatedPart & future_part);
 
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index 673ad02bfb6..670f42d916d 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -208,11 +208,29 @@ UInt64 MergeTreeDataMergerMutator::getMaxSourcePartSizeForMutation()
 }
 
 
+UInt64 MergeTreeDataMergerMutator::getMaxSourcePartsSizeForMergeWithTTL()
+{
+    const auto data_settings = data.getSettings();
+    size_t busy_threads_in_pool = CurrentMetrics::values[CurrentMetrics::BackgroundPoolTask].load(std::memory_order_relaxed);
+
+    /// DataPart can be store only at one disk. Get maximum reservable free space at all disks.
+    UInt64 disk_space = data.getStoragePolicy()->getMaxUnreservedFreeSpace();
+
+    /// Allow merges with TTL only if there are enough threads, leave free threads for regular merges
+    if (busy_threads_in_pool <= 1
+        || background_pool_size - busy_threads_in_pool >= data_settings->number_of_free_entries_in_pool_to_execute_merge_with_ttl)
+        return static_cast<UInt64>(disk_space / DISK_USAGE_COEFFICIENT_TO_RESERVE);
+
+    return 0;
+
+}
+
 bool MergeTreeDataMergerMutator::selectPartsToMerge(
     FutureMergedMutatedPart & future_part,
     bool aggressive,
     size_t max_total_size_to_merge,
     const AllowedMergingPredicate & can_merge_callback,
+    size_t max_total_size_to_merge_with_ttl,
     String * out_disable_reason)
 {
     MergeTreeData::DataPartsVector data_parts = data.getDataPartsVector();
@@ -284,7 +302,9 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
                 current_time,
                 data_settings->merge_with_ttl_timeout,
                 data_settings->ttl_only_drop_parts);
-        parts_to_merge = merge_selector.select(partitions, max_total_size_to_merge);
+        parts_to_merge = merge_selector.select(partitions, max_total_size_to_merge_with_ttl);
+        if (!parts_to_merge.empty())
+            future_part.merge_type = MergeType::TTL_DELETE;
     }
 
     if (parts_to_merge.empty())
@@ -593,6 +613,9 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
     if (merges_blocker.isCancelled())
         throw Exception("Cancelled merging parts", ErrorCodes::ABORTED);
 
+    if (isTTLMergeType(future_part.merge_type) && ttl_merges_blocker.isCancelled())
+        throw Exception("Cancelled merging parts with TTL", ErrorCodes::ABORTED);
+
     const MergeTreeData::DataPartsVector & parts = future_part.parts;
 
     LOG_DEBUG(log, "Merging {} parts: from {} to {} into {}", parts.size(), parts.front()->name, parts.back()->name, future_part.type.toString());
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
index d5798fe3582..a874c93e2f6 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
@@ -6,6 +6,7 @@
 #include <functional>
 #include <Common/ActionBlocker.h>
 #include <Storages/MergeTree/TTLMergeSelector.h>
+#include <Storages/MergeTree/MergeType.h>
 
 
 namespace DB
@@ -22,6 +23,7 @@ struct FutureMergedMutatedPart
     MergeTreeDataPartType type;
     MergeTreePartInfo part_info;
     MergeTreeData::DataPartsVector parts;
+    MergeType merge_type = MergeType::REGULAR;
 
     const MergeTreePartition & getPartition() const { return parts.front()->partition; }
 
@@ -59,6 +61,13 @@ public:
       */
     UInt64 getMaxSourcePartsSizeForMerge();
 
+    /** Get maximum total size of parts to do merge with TTL, at current moment
+      * of time. If busy threads count is less than value specified by
+      * number_of_free_entries_in_pool_to_execute_merge_with_ttl than maximum
+      * size (available on disk) is allowed.
+      */
+    UInt64 getMaxSourcePartsSizeForMergeWithTTL();
+
     /** For explicitly passed size of pool and number of used tasks.
       * This method could be used to calculate threshold depending on number of tasks in replication queue.
       */
@@ -81,6 +90,7 @@ public:
         bool aggressive,
         size_t max_total_size_to_merge,
         const AllowedMergingPredicate & can_merge,
+        size_t max_total_size_to_merge_with_ttl,
         String * out_disable_reason = nullptr);
 
     /** Select all the parts in the specified partition for merge, if possible.
diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp
index 7f537ec330a..44504bdec84 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.cpp
+++ b/src/Storages/MergeTree/MergeTreeSettings.cpp
@@ -100,6 +100,19 @@ void MergeTreeSettings::sanityCheck(const Settings & query_settings) const
             number_of_free_entries_in_pool_to_lower_max_size_of_merge,
             query_settings.background_pool_size);
     }
+
+    if (number_of_free_entries_in_pool_to_execute_merge_with_ttl >= query_settings.background_pool_size)
+    {
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of 'number_of_free_entries_in_pool_to_execute_merge_with_ttl' setting"
+            " ({}) (default values are defined in <merge_tree> section of config.xml"
+            " or the value can be specified per table in SETTINGS section of CREATE TABLE query)"
+            " is greater or equals to the value of 'background_pool_size'"
+            " ({}) (the value is defined in users.xml for default profile)."
+            " This indicates incorrect configuration because TTL cannot work with these settings.",
+            number_of_free_entries_in_pool_to_execute_merge_with_ttl,
+            query_settings.background_pool_size);
+    }
+
 }
 
 }
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 085c441aa90..06fc21b24c3 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -33,8 +33,10 @@ struct Settings;
     M(UInt64, max_bytes_to_merge_at_min_space_in_pool, 1024 * 1024, "Maximum in total size of parts to merge, when there are minimum free threads in background pool (or entries in replication queue).", 0) \
     M(UInt64, max_replicated_merges_in_queue, 16, "How many tasks of merging and mutating parts are allowed simultaneously in ReplicatedMergeTree queue.", 0) \
     M(UInt64, max_replicated_mutations_in_queue, 8, "How many tasks of mutating parts are allowed simultaneously in ReplicatedMergeTree queue.", 0) \
+    M(UInt64, max_replicated_merges_with_ttl_in_queue, 1, "How many tasks of merging parts with TTL are allowed simultaneously in ReplicatedMergeTree queue.", 0) \
     M(UInt64, number_of_free_entries_in_pool_to_lower_max_size_of_merge, 8, "When there is less than specified number of free entries in pool (or replicated queue), start to lower maximum size of merge to process (or to put in queue). This is to allow small merges to process - not filling the pool with long running merges.", 0) \
     M(UInt64, number_of_free_entries_in_pool_to_execute_mutation, 10, "When there is less than specified number of free entries in pool, do not execute part mutations. This is to leave free threads for regular merges and avoid \"Too many parts\"", 0) \
+    M(UInt64, number_of_free_entries_in_pool_to_execute_merge_with_ttl, 12, "When there is less than specified number of free entries in pool, do not execute merge with TTL. This is to leave free threads for regular merges and avoid \"Too many parts\"", 0) \
     M(Seconds, old_parts_lifetime, 8 * 60, "How many seconds to keep obsolete parts.", 0) \
     M(Seconds, temporary_directories_lifetime, 86400, "How many seconds to keep tmp_-directories.", 0) \
     M(Seconds, lock_acquire_timeout_for_background_operations, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "For background operations like merges, mutations etc. How many seconds before failing to acquire table locks.", 0) \
diff --git a/src/Storages/MergeTree/MergeType.cpp b/src/Storages/MergeTree/MergeType.cpp
new file mode 100644
index 00000000000..d31197e8d4c
--- /dev/null
+++ b/src/Storages/MergeTree/MergeType.cpp
@@ -0,0 +1,41 @@
+#include <Storages/MergeTree/MergeType.h>
+#include <Common/Exception.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int NOT_IMPLEMENTED;
+}
+
+
+MergeType checkAndGetMergeType(UInt64 merge_type)
+{
+    if (merge_type == static_cast<UInt64>(MergeType::REGULAR))
+        return MergeType::REGULAR;
+    else if (merge_type == static_cast<UInt64>(MergeType::TTL_DELETE))
+        return MergeType::TTL_DELETE;
+
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown MergeType {}", static_cast<UInt64>(merge_type));
+}
+
+String toString(MergeType merge_type)
+{
+    switch (merge_type)
+    {
+    case MergeType::REGULAR:
+        return "REGULAR";
+    case MergeType::TTL_DELETE:
+        return "TTL_DELETE";
+    }
+
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown MergeType {}", static_cast<UInt64>(merge_type));
+}
+
+bool isTTLMergeType(MergeType merge_type)
+{
+    return merge_type == MergeType::TTL_DELETE;
+}
+
+}
diff --git a/src/Storages/MergeTree/MergeType.h b/src/Storages/MergeTree/MergeType.h
new file mode 100644
index 00000000000..20fd7cd24af
--- /dev/null
+++ b/src/Storages/MergeTree/MergeType.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#include <Core/Types.h>
+
+namespace DB
+{
+
+enum class MergeType
+{
+    REGULAR,
+    TTL_DELETE,
+};
+
+MergeType checkAndGetMergeType(UInt64 merge_type);
+
+String toString(MergeType merge_type);
+
+bool isTTLMergeType(MergeType merge_type);
+
+}
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp
index af6d980ad98..d95ae6b729d 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp
@@ -36,6 +36,8 @@ void ReplicatedMergeTreeLogEntryData::writeText(WriteBuffer & out) const
                 out << s << '\n';
             out << "into\n" << new_part_name;
             out << "\ndeduplicate: " << deduplicate;
+            if (merge_type != MergeType::REGULAR)
+                out <<"\nmerge_type: " << static_cast<UInt64>(merge_type);
             break;
 
         case DROP_RANGE:
@@ -148,8 +150,20 @@ void ReplicatedMergeTreeLogEntryData::readText(ReadBuffer & in)
             source_parts.push_back(s);
         }
         in >> new_part_name;
+
         if (format_version >= 4)
+        {
             in >> "\ndeduplicate: " >> deduplicate;
+            in >> "\n";
+            if (in.eof())
+                trailing_newline_found = true;
+            else if (checkString("merge_type: ", in))
+            {
+                UInt64 value;
+                in >> value;
+                merge_type = checkAndGetMergeType(value);
+            }
+        }
     }
     else if (type_str == "drop" || type_str == "detach")
     {
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h
index ae5fad0b83c..2f5d038291b 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h
@@ -5,6 +5,7 @@
 #include <Core/Types.h>
 #include <IO/WriteHelpers.h>
 #include <Storages/MergeTree/MergeTreeDataPartType.h>
+#include <Storages/MergeTree/MergeType.h>
 
 #include <mutex>
 #include <condition_variable>
@@ -79,6 +80,7 @@ struct ReplicatedMergeTreeLogEntryData
 
     Strings source_parts;
     bool deduplicate = false; /// Do deduplicate on merge
+    MergeType merge_type = MergeType::REGULAR;
     String column_name;
     String index_name;
 
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
index 8e2c3752212..61d53c60128 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
@@ -1312,21 +1312,26 @@ bool ReplicatedMergeTreeQueue::processEntry(
 }
 
 
-std::pair<size_t, size_t> ReplicatedMergeTreeQueue::countMergesAndPartMutations() const
+ReplicatedMergeTreeQueue::OperationsInQueue ReplicatedMergeTreeQueue::countMergesAndPartMutations() const
 {
     std::lock_guard lock(state_mutex);
 
     size_t count_merges = 0;
     size_t count_mutations = 0;
+    size_t count_merges_with_ttl = 0;
     for (const auto & entry : queue)
     {
         if (entry->type == ReplicatedMergeTreeLogEntry::MERGE_PARTS)
+        {
             ++count_merges;
+            if (isTTLMergeType(entry->merge_type))
+                ++count_merges_with_ttl;
+        }
         else if (entry->type == ReplicatedMergeTreeLogEntry::MUTATE_PART)
             ++count_mutations;
     }
 
-    return std::make_pair(count_merges, count_mutations);
+    return OperationsInQueue{count_merges, count_mutations, count_merges_with_ttl};
 }
 
 
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h
index 76f84da1ae8..c024cd53c0b 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h
@@ -49,6 +49,12 @@ private:
     /// To calculate min_unprocessed_insert_time, max_processed_insert_time, for which the replica lag is calculated.
     using InsertsByTime = std::set<LogEntryPtr, ByTime>;
 
+    struct OperationsInQueue
+    {
+        size_t merges = 0;
+        size_t mutations = 0;
+        size_t merges_with_ttl = 0;
+    };
 
     StorageReplicatedMergeTree & storage;
     MergeTreeDataFormatVersion format_version;
@@ -325,7 +331,7 @@ public:
     bool processEntry(std::function<zkutil::ZooKeeperPtr()> get_zookeeper, LogEntryPtr & entry, const std::function<bool(LogEntryPtr &)> func);
 
     /// Count the number of merges and mutations of single parts in the queue.
-    std::pair<size_t, size_t> countMergesAndPartMutations() const;
+    OperationsInQueue countMergesAndPartMutations() const;
 
     /// Count the total number of active mutations.
     size_t countMutations() const;
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 7e4318a32f6..729263c3aaa 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -651,8 +651,21 @@ bool StorageMergeTree::merge(
         if (partition_id.empty())
         {
             UInt64 max_source_parts_size = merger_mutator.getMaxSourcePartsSizeForMerge();
+            UInt64 max_source_parts_size_with_ttl = merger_mutator.getMaxSourcePartsSizeForMergeWithTTL();
+
+            /// TTL requirements is much more strict than for regular merge, so
+            /// if regular not possible, than merge with ttl is not also not
+            /// possible.
             if (max_source_parts_size > 0)
-                selected = merger_mutator.selectPartsToMerge(future_part, aggressive, max_source_parts_size, can_merge, out_disable_reason);
+            {
+                selected = merger_mutator.selectPartsToMerge(
+                    future_part,
+                    aggressive,
+                    max_source_parts_size,
+                    can_merge,
+                    max_source_parts_size_with_ttl,
+                    out_disable_reason);
+            }
             else if (out_disable_reason)
                 *out_disable_reason = "Current value of max_source_parts_size is zero";
         }
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 6058632d220..a5b293fd30b 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -2514,13 +2514,13 @@ void StorageReplicatedMergeTree::mergeSelectingTask()
         /// and in the same time, many small parts could be created and won't be merged.
 
         auto merges_and_mutations_queued = queue.countMergesAndPartMutations();
-        size_t merges_and_mutations_sum = merges_and_mutations_queued.first + merges_and_mutations_queued.second;
+        size_t merges_and_mutations_sum = merges_and_mutations_queued.merges + merges_and_mutations_queued.mutations;
         if (merges_and_mutations_sum >= storage_settings_ptr->max_replicated_merges_in_queue)
         {
             LOG_TRACE(log, "Number of queued merges ({}) and part mutations ({})"
                 " is greater than max_replicated_merges_in_queue ({}), so won't select new parts to merge or mutate.",
-                merges_and_mutations_queued.first,
-                merges_and_mutations_queued.second,
+                merges_and_mutations_queued.merges,
+                merges_and_mutations_queued.mutations,
                 storage_settings_ptr->max_replicated_merges_in_queue);
         }
         else
@@ -2529,16 +2529,20 @@ void StorageReplicatedMergeTree::mergeSelectingTask()
                 storage_settings_ptr->max_replicated_merges_in_queue, merges_and_mutations_sum);
             UInt64 max_source_part_size_for_mutation = merger_mutator.getMaxSourcePartSizeForMutation();
 
+            UInt64 max_source_part_size_for_merge_with_ttl = 0;
+            if (merges_and_mutations_queued.merges_with_ttl < storage_settings_ptr->max_replicated_merges_with_ttl_in_queue)
+               max_source_part_size_for_merge_with_ttl = merger_mutator.getMaxSourcePartsSizeForMergeWithTTL();
+
             FutureMergedMutatedPart future_merged_part;
             if (max_source_parts_size_for_merge > 0 &&
-                merger_mutator.selectPartsToMerge(future_merged_part, false, max_source_parts_size_for_merge, merge_pred, nullptr))
+                merger_mutator.selectPartsToMerge(future_merged_part, false, max_source_parts_size_for_merge, merge_pred, max_source_part_size_for_merge_with_ttl, nullptr))
             {
                 create_result = createLogEntryToMergeParts(zookeeper, future_merged_part.parts,
-                    future_merged_part.name, future_merged_part.type, deduplicate, nullptr, merge_pred.getVersion());
+                    future_merged_part.name, future_merged_part.type, deduplicate, nullptr, merge_pred.getVersion(), future_merged_part.merge_type);
             }
             /// If there are many mutations in queue, it may happen, that we cannot enqueue enough merges to merge all new parts
             else if (max_source_part_size_for_mutation > 0 && queue.countMutations() > 0
-                     && merges_and_mutations_queued.second < storage_settings_ptr->max_replicated_mutations_in_queue)
+                     && merges_and_mutations_queued.mutations < storage_settings_ptr->max_replicated_mutations_in_queue)
             {
                 /// Choose a part to mutate.
                 DataPartsVector data_parts = getDataPartsVector();
@@ -2617,7 +2621,8 @@ StorageReplicatedMergeTree::CreateMergeEntryResult StorageReplicatedMergeTree::c
     const MergeTreeDataPartType & merged_part_type,
     bool deduplicate,
     ReplicatedMergeTreeLogEntryData * out_log_entry,
-    int32_t log_version)
+    int32_t log_version,
+    MergeType merge_type)
 {
     std::vector<std::future<Coordination::ExistsResponse>> exists_futures;
     exists_futures.reserve(parts.size());
@@ -2650,6 +2655,7 @@ StorageReplicatedMergeTree::CreateMergeEntryResult StorageReplicatedMergeTree::c
     entry.new_part_name = merged_name;
     entry.new_part_type = merged_part_type;
     entry.deduplicate = deduplicate;
+    entry.merge_type = merge_type;
     entry.create_time = time(nullptr);
 
     for (const auto & part : parts)
@@ -3584,7 +3590,7 @@ bool StorageReplicatedMergeTree::optimize(
                     CreateMergeEntryResult create_result = createLogEntryToMergeParts(
                         zookeeper, future_merged_part.parts,
                         future_merged_part.name, future_merged_part.type, deduplicate,
-                        &merge_entry, can_merge.getVersion());
+                        &merge_entry, can_merge.getVersion(), future_merged_part.merge_type);
 
                     if (create_result == CreateMergeEntryResult::MissingPart)
                         return handle_noop("Can't create merge queue node in ZooKeeper, because some parts are missing");
@@ -3614,7 +3620,7 @@ bool StorageReplicatedMergeTree::optimize(
                 if (!partition)
                 {
                     selected = merger_mutator.selectPartsToMerge(
-                        future_merged_part, true, storage_settings_ptr->max_bytes_to_merge_at_max_space_in_pool, can_merge, &disable_reason);
+                        future_merged_part, true, storage_settings_ptr->max_bytes_to_merge_at_max_space_in_pool, can_merge, storage_settings_ptr->max_bytes_to_merge_at_max_space_in_pool, &disable_reason);
                 }
                 else
                 {
@@ -3639,7 +3645,7 @@ bool StorageReplicatedMergeTree::optimize(
                 CreateMergeEntryResult create_result = createLogEntryToMergeParts(
                     zookeeper, future_merged_part.parts,
                     future_merged_part.name, future_merged_part.type, deduplicate,
-                    &merge_entry, can_merge.getVersion());
+                    &merge_entry, can_merge.getVersion(), future_merged_part.merge_type);
 
                 if (create_result == CreateMergeEntryResult::MissingPart)
                     return handle_noop("Can't create merge queue node in ZooKeeper, because some parts are missing");
diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h
index e9395f20f3f..2bc9265331d 100644
--- a/src/Storages/StorageReplicatedMergeTree.h
+++ b/src/Storages/StorageReplicatedMergeTree.h
@@ -450,7 +450,8 @@ private:
         const MergeTreeDataPartType & merged_part_type,
         bool deduplicate,
         ReplicatedMergeTreeLogEntryData * out_log_entry,
-        int32_t log_version);
+        int32_t log_version,
+        MergeType merge_type);
 
     CreateMergeEntryResult createLogEntryToMutatePart(
         const IMergeTreeDataPart & part,
diff --git a/src/Storages/System/StorageSystemMerges.cpp b/src/Storages/System/StorageSystemMerges.cpp
index 39d22bd00ca..3b9e39c1ef8 100644
--- a/src/Storages/System/StorageSystemMerges.cpp
+++ b/src/Storages/System/StorageSystemMerges.cpp
@@ -30,6 +30,7 @@ NamesAndTypesList StorageSystemMerges::getNamesAndTypes()
         {"columns_written", std::make_shared<DataTypeUInt64>()},
         {"memory_usage", std::make_shared<DataTypeUInt64>()},
         {"thread_id", std::make_shared<DataTypeUInt64>()},
+        {"merge_type", std::make_shared<DataTypeString>()},
     };
 }
 
@@ -65,6 +66,10 @@ void StorageSystemMerges::fillData(MutableColumns & res_columns, const Context &
         res_columns[i++]->insert(merge.columns_written);
         res_columns[i++]->insert(merge.memory_usage);
         res_columns[i++]->insert(merge.thread_id);
+        if (!merge.is_mutation)
+            res_columns[i++]->insert(merge.merge_type);
+        else
+            res_columns[i++]->insertDefault();
     }
 }
 
diff --git a/src/Storages/System/StorageSystemReplicationQueue.cpp b/src/Storages/System/StorageSystemReplicationQueue.cpp
index f5e43abada0..f04d8759507 100644
--- a/src/Storages/System/StorageSystemReplicationQueue.cpp
+++ b/src/Storages/System/StorageSystemReplicationQueue.cpp
@@ -42,6 +42,7 @@ NamesAndTypesList StorageSystemReplicationQueue::getNamesAndTypes()
         { "num_postponed",           std::make_shared<DataTypeUInt32>() },
         { "postpone_reason",         std::make_shared<DataTypeString>() },
         { "last_postpone_time",      std::make_shared<DataTypeDateTime>() },
+        { "merge_type",              std::make_shared<DataTypeString>() },
     };
 }
 
@@ -145,6 +146,11 @@ void StorageSystemReplicationQueue::fillData(MutableColumns & res_columns, const
             res_columns[col_num++]->insert(entry.num_postponed);
             res_columns[col_num++]->insert(entry.postpone_reason);
             res_columns[col_num++]->insert(UInt64(entry.last_postpone_time));
+
+            if (entry.type == ReplicatedMergeTreeLogEntryData::Type::MERGE_PARTS)
+                res_columns[col_num++]->insert(toString(entry.merge_type));
+            else
+                res_columns[col_num++]->insertDefault();
         }
     }
 }

From 13248a744b4409460cf458e075d37a27e9d9f3ef Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 3 Sep 2020 16:02:24 +0300
Subject: [PATCH 040/298] Fix bug in parts selection

---
 .../MergeTree/MergeTreeDataMergerMutator.cpp  | 32 +++++--
 .../test_concurrent_ttl_merges/__init__.py    |  0
 .../configs/fast_background_pool.xml          |  9 ++
 .../test_concurrent_ttl_merges/test.py        | 83 +++++++++++++++++++
 4 files changed, 116 insertions(+), 8 deletions(-)
 create mode 100644 tests/integration/test_concurrent_ttl_merges/__init__.py
 create mode 100644 tests/integration/test_concurrent_ttl_merges/configs/fast_background_pool.xml
 create mode 100644 tests/integration/test_concurrent_ttl_merges/test.py

diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index 670f42d916d..4c57fbc01ff 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -250,8 +250,20 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
     const String * prev_partition_id = nullptr;
     /// Previous part only in boundaries of partition frame
     const MergeTreeData::DataPartPtr * prev_part = nullptr;
+
     for (const MergeTreeData::DataPartPtr & part : data_parts)
     {
+        const String & partition_id = part->info.partition_id;
+
+        if (!prev_partition_id || partition_id != *prev_partition_id)
+        {
+            if (partitions.empty() || !partitions.back().empty())
+                partitions.emplace_back();
+            /// New partition frame.
+            prev_partition_id = &partition_id;
+            prev_part = nullptr;
+        }
+
         /// Check predicate only for first part in each partition.
         if (!prev_part)
         {
@@ -262,15 +274,19 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
             if (!can_merge_callback(nullptr, part, nullptr))
                 continue;
         }
-
-        const String & partition_id = part->info.partition_id;
-        if (!prev_partition_id || partition_id != *prev_partition_id || (prev_part && !can_merge_callback(*prev_part, part, nullptr)))
+        else
         {
-            if (partitions.empty() || !partitions.back().empty())
-                partitions.emplace_back();
-            /// New partition frame.
-            prev_partition_id = &partition_id;
-            prev_part = nullptr;
+            /// If we cannot merge with previous part we had to start new parts
+            /// interval (in the same partition)
+            if (!can_merge_callback(*prev_part, part, nullptr))
+            {
+                /// Starting new interval in the same partition
+                if (!partitions.back().empty())
+                    partitions.emplace_back();
+
+                /// Now we haven't previous part, but it affects only logging
+                prev_part = nullptr;
+            }
         }
 
         IMergeSelector::Part part_info;
diff --git a/tests/integration/test_concurrent_ttl_merges/__init__.py b/tests/integration/test_concurrent_ttl_merges/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_concurrent_ttl_merges/configs/fast_background_pool.xml b/tests/integration/test_concurrent_ttl_merges/configs/fast_background_pool.xml
new file mode 100644
index 00000000000..e62a0105907
--- /dev/null
+++ b/tests/integration/test_concurrent_ttl_merges/configs/fast_background_pool.xml
@@ -0,0 +1,9 @@
+<yandex>
+    <background_processing_pool_thread_sleep_seconds>1</background_processing_pool_thread_sleep_seconds>
+    <background_processing_pool_thread_sleep_seconds_random_part>0</background_processing_pool_thread_sleep_seconds_random_part>
+    <background_processing_pool_thread_sleep_seconds_if_nothing_to_do>0.0</background_processing_pool_thread_sleep_seconds_if_nothing_to_do>
+    <background_processing_pool_task_sleep_seconds_when_no_work_min>0</background_processing_pool_task_sleep_seconds_when_no_work_min>
+    <background_processing_pool_task_sleep_seconds_when_no_work_max>1</background_processing_pool_task_sleep_seconds_when_no_work_max>
+    <background_processing_pool_task_sleep_seconds_when_no_work_multiplier>1</background_processing_pool_task_sleep_seconds_when_no_work_multiplier>
+    <background_processing_pool_task_sleep_seconds_when_no_work_random_part>0</background_processing_pool_task_sleep_seconds_when_no_work_random_part>
+</yandex>
diff --git a/tests/integration/test_concurrent_ttl_merges/test.py b/tests/integration/test_concurrent_ttl_merges/test.py
new file mode 100644
index 00000000000..9da56f8d3e0
--- /dev/null
+++ b/tests/integration/test_concurrent_ttl_merges/test.py
@@ -0,0 +1,83 @@
+import time
+import pytest
+
+import helpers.client as client
+from helpers.cluster import ClickHouseCluster
+from helpers.test_tools import TSV
+from helpers.test_tools import assert_eq_with_retry
+
+
+cluster = ClickHouseCluster(__file__)
+node1 = cluster.add_instance('node1', main_configs=['configs/fast_background_pool.xml'], with_zookeeper=True)
+node2 = cluster.add_instance('node2', main_configs=['configs/fast_background_pool.xml'], with_zookeeper=True)
+
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+
+def count_ttl_merges_in_queue(node, table):
+    result = node.query("SELECT count() FROM system.replication_queue WHERE merge_type = 'TTL_DELETE' and table = '{}'".format(table))
+    if not result:
+        return 0
+    return int(result.strip())
+
+
+def count_regular_merges_in_queue(node, table):
+    result = node.query("SELECT count() FROM system.replication_queue WHERE merge_type = 'REGULAR' and table = '{}'".format(table))
+    if not result:
+        return 0
+    return int(result.strip())
+
+
+def count_ttl_merges_in_background_pool(node, table):
+    result = node.query("SELECT count() FROM system.merges WHERE merge_type = 'TTL_DELETE' and table = '{}'".format(table))
+    if not result:
+        return 0
+    return int(result.strip())
+
+
+def count_regular_merges_in_background_pool(node, table):
+    result = node.query("SELECT count() FROM system.merges WHERE merge_type = 'REGULAR' and table = '{}'".format(table))
+    if not result:
+        return 0
+    return int(result.strip())
+
+
+def count_running_mutations(node, table):
+    result = node.query("SELECT count() FROM system.merges WHERE table = '{}' and is_mutation=1".format(table))
+    if not result:
+        return 0
+    return int(result.strip())
+
+
+def test_no_ttl_merges_in_busy_pool(started_cluster):
+    node1.query("CREATE TABLE test_ttl (d DateTime, key UInt64, data UInt64) ENGINE = MergeTree() ORDER BY tuple() PARTITION BY key TTL d + INTERVAL 1 + sleepEachRow(1) MONTH SETTINGS merge_with_ttl_timeout = 0, number_of_free_entries_in_pool_to_execute_mutation = 0")
+
+    node1.query("SYSTEM STOP TTL MERGES")
+
+    for i in range(1, 7):
+        node1.query("INSERT INTO test_ttl SELECT now() - INTERVAL 1 MONTH + number - 1, {}, number FROM numbers(5)".format(i))
+
+    node1.query("ALTER TABLE test_ttl UPDATE data = data + 1 WHERE sleepEachRow(1) = 0")
+
+    while count_running_mutations(node1, "test_ttl") < 6:
+        print "Mutations count", count_running_mutations(node1, "test_ttl")
+        assert count_ttl_merges_in_background_pool(node1, "test_ttl") == 0
+        time.sleep(0.5)
+
+    node1.query("SYSTEM START TTL MERGES")
+
+    while count_running_mutations(node1, "test_ttl") == 6:
+        print "Mutations count after start TTL", count_running_mutations(node1, "test_ttl")
+        assert node1.query("SELECT count() FROM test_ttl") == "30\n"
+        time.sleep(0.5)
+
+    assert_eq_with_retry(node1, "SELECT COUNT() FROM test_ttl", "0")

From d5da58918e8245652269381df4e70eed346be2b8 Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Fri, 4 Sep 2020 01:04:46 +0300
Subject: [PATCH 041/298] create less compressed streams while writing compact
 parts

---
 src/Compression/CompressionCodecDelta.cpp     |  6 ++++
 src/Compression/CompressionCodecDelta.h       |  3 ++
 .../CompressionCodecDoubleDelta.cpp           |  6 ++++
 src/Compression/CompressionCodecDoubleDelta.h |  3 ++
 src/Compression/CompressionCodecGorilla.cpp   |  6 ++++
 src/Compression/CompressionCodecGorilla.h     |  3 ++
 src/Compression/CompressionCodecLZ4.cpp       |  5 ++++
 src/Compression/CompressionCodecLZ4.h         |  2 ++
 src/Compression/CompressionCodecMultiple.cpp  |  6 ++++
 src/Compression/CompressionCodecMultiple.h    |  3 ++
 src/Compression/CompressionCodecNone.cpp      |  5 ++++
 src/Compression/CompressionCodecNone.h        |  3 ++
 src/Compression/CompressionCodecT64.cpp       |  7 +++++
 src/Compression/CompressionCodecT64.h         |  2 ++
 src/Compression/CompressionCodecZSTD.cpp      |  5 ++++
 src/Compression/CompressionCodecZSTD.h        |  3 ++
 src/Compression/ICompressionCodec.cpp         |  7 +++++
 src/Compression/ICompressionCodec.h           |  5 ++++
 .../MergeTreeDataPartWriterCompact.cpp        | 29 ++++++++++++++-----
 .../MergeTreeDataPartWriterCompact.h          | 16 +++++-----
 20 files changed, 110 insertions(+), 15 deletions(-)

diff --git a/src/Compression/CompressionCodecDelta.cpp b/src/Compression/CompressionCodecDelta.cpp
index 51bd19f646b..dc866e527d6 100644
--- a/src/Compression/CompressionCodecDelta.cpp
+++ b/src/Compression/CompressionCodecDelta.cpp
@@ -36,6 +36,12 @@ ASTPtr CompressionCodecDelta::getCodecDesc() const
     return makeASTFunction("Delta", literal);
 }
 
+void CompressionCodecDelta::updateHash(SipHash & hash) const
+{
+    getCodecDesc()->updateTreeHash(hash);
+    hash.update(delta_bytes_size);
+}
+
 namespace
 {
 
diff --git a/src/Compression/CompressionCodecDelta.h b/src/Compression/CompressionCodecDelta.h
index 5c3979e063e..a192fab051a 100644
--- a/src/Compression/CompressionCodecDelta.h
+++ b/src/Compression/CompressionCodecDelta.h
@@ -14,7 +14,10 @@ public:
 
     ASTPtr getCodecDesc() const override;
 
+    void updateHash(SipHash & hash) const override;
+
 protected:
+
     UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
 
     void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
diff --git a/src/Compression/CompressionCodecDoubleDelta.cpp b/src/Compression/CompressionCodecDoubleDelta.cpp
index 157e2df1a3f..dd2e95a916d 100644
--- a/src/Compression/CompressionCodecDoubleDelta.cpp
+++ b/src/Compression/CompressionCodecDoubleDelta.cpp
@@ -339,6 +339,12 @@ ASTPtr CompressionCodecDoubleDelta::getCodecDesc() const
     return std::make_shared<ASTIdentifier>("DoubleDelta");
 }
 
+void CompressionCodecDoubleDelta::updateHash(SipHash & hash) const
+{
+    getCodecDesc()->updateTreeHash(hash);
+    hash.update(data_bytes_size);
+}
+
 UInt32 CompressionCodecDoubleDelta::getMaxCompressedDataSize(UInt32 uncompressed_size) const
 {
     const auto result = 2 // common header
diff --git a/src/Compression/CompressionCodecDoubleDelta.h b/src/Compression/CompressionCodecDoubleDelta.h
index a2690d24414..30ef086077d 100644
--- a/src/Compression/CompressionCodecDoubleDelta.h
+++ b/src/Compression/CompressionCodecDoubleDelta.h
@@ -100,7 +100,10 @@ public:
 
     ASTPtr getCodecDesc() const override;
 
+    void updateHash(SipHash & hash) const override;
+
 protected:
+
     UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
 
     void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
diff --git a/src/Compression/CompressionCodecGorilla.cpp b/src/Compression/CompressionCodecGorilla.cpp
index 042835f4a32..3d08734fe91 100644
--- a/src/Compression/CompressionCodecGorilla.cpp
+++ b/src/Compression/CompressionCodecGorilla.cpp
@@ -254,6 +254,12 @@ ASTPtr CompressionCodecGorilla::getCodecDesc() const
     return std::make_shared<ASTIdentifier>("Gorilla");
 }
 
+void CompressionCodecGorilla::updateHash(SipHash & hash) const
+{
+    getCodecDesc()->updateTreeHash(hash);
+    hash.update(data_bytes_size);
+}
+
 UInt32 CompressionCodecGorilla::getMaxCompressedDataSize(UInt32 uncompressed_size) const
 {
     const auto result = 2 // common header
diff --git a/src/Compression/CompressionCodecGorilla.h b/src/Compression/CompressionCodecGorilla.h
index 523add0700f..df0f329dc31 100644
--- a/src/Compression/CompressionCodecGorilla.h
+++ b/src/Compression/CompressionCodecGorilla.h
@@ -97,7 +97,10 @@ public:
 
     ASTPtr getCodecDesc() const override;
 
+    void updateHash(SipHash & hash) const override;
+
 protected:
+
     UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
 
     void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
diff --git a/src/Compression/CompressionCodecLZ4.cpp b/src/Compression/CompressionCodecLZ4.cpp
index cf3622cd702..1370349d68d 100644
--- a/src/Compression/CompressionCodecLZ4.cpp
+++ b/src/Compression/CompressionCodecLZ4.cpp
@@ -35,6 +35,11 @@ ASTPtr CompressionCodecLZ4::getCodecDesc() const
     return std::make_shared<ASTIdentifier>("LZ4");
 }
 
+void CompressionCodecLZ4::updateHash(SipHash & hash) const
+{
+    getCodecDesc()->updateTreeHash(hash);
+}
+
 UInt32 CompressionCodecLZ4::getMaxCompressedDataSize(UInt32 uncompressed_size) const
 {
     return LZ4_COMPRESSBOUND(uncompressed_size);
diff --git a/src/Compression/CompressionCodecLZ4.h b/src/Compression/CompressionCodecLZ4.h
index 2f19af08185..229e25481e6 100644
--- a/src/Compression/CompressionCodecLZ4.h
+++ b/src/Compression/CompressionCodecLZ4.h
@@ -18,6 +18,8 @@ public:
 
     UInt32 getAdditionalSizeAtTheEndOfBuffer() const override { return LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER; }
 
+    void updateHash(SipHash & hash) const override;
+
 protected:
     UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
 
diff --git a/src/Compression/CompressionCodecMultiple.cpp b/src/Compression/CompressionCodecMultiple.cpp
index 868df90825e..77f0fc132fe 100644
--- a/src/Compression/CompressionCodecMultiple.cpp
+++ b/src/Compression/CompressionCodecMultiple.cpp
@@ -37,6 +37,12 @@ ASTPtr CompressionCodecMultiple::getCodecDesc() const
     return result;
 }
 
+void CompressionCodecMultiple::updateHash(SipHash & hash) const
+{
+    for (const auto & codec : codecs)
+        codec->updateHash(hash);
+}
+
 UInt32 CompressionCodecMultiple::getMaxCompressedDataSize(UInt32 uncompressed_size) const
 {
     UInt32 compressed_size = uncompressed_size;
diff --git a/src/Compression/CompressionCodecMultiple.h b/src/Compression/CompressionCodecMultiple.h
index cd50d3250e3..6bac189bdf7 100644
--- a/src/Compression/CompressionCodecMultiple.h
+++ b/src/Compression/CompressionCodecMultiple.h
@@ -19,7 +19,10 @@ public:
 
     static std::vector<uint8_t> getCodecsBytesFromData(const char * source);
 
+    void updateHash(SipHash & hash) const override;
+
 protected:
+
     UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
 
     void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 decompressed_size) const override;
diff --git a/src/Compression/CompressionCodecNone.cpp b/src/Compression/CompressionCodecNone.cpp
index 50c19b2b547..f727c4b4860 100644
--- a/src/Compression/CompressionCodecNone.cpp
+++ b/src/Compression/CompressionCodecNone.cpp
@@ -17,6 +17,11 @@ ASTPtr CompressionCodecNone::getCodecDesc() const
     return std::make_shared<ASTIdentifier>("NONE");
 }
 
+void CompressionCodecNone::updateHash(SipHash & hash) const
+{
+    getCodecDesc()->updateTreeHash(hash);
+}
+
 UInt32 CompressionCodecNone::doCompressData(const char * source, UInt32 source_size, char * dest) const
 {
     memcpy(dest, source, source_size);
diff --git a/src/Compression/CompressionCodecNone.h b/src/Compression/CompressionCodecNone.h
index ed604063198..370ef301694 100644
--- a/src/Compression/CompressionCodecNone.h
+++ b/src/Compression/CompressionCodecNone.h
@@ -15,7 +15,10 @@ public:
 
     ASTPtr getCodecDesc() const override;
 
+    void updateHash(SipHash & hash) const override;
+
 protected:
+
     UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
 
     void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
diff --git a/src/Compression/CompressionCodecT64.cpp b/src/Compression/CompressionCodecT64.cpp
index 16462e50ebd..30972a5fe1f 100644
--- a/src/Compression/CompressionCodecT64.cpp
+++ b/src/Compression/CompressionCodecT64.cpp
@@ -646,6 +646,13 @@ ASTPtr CompressionCodecT64::getCodecDesc() const
     return makeASTFunction("T64", literal);
 }
 
+void CompressionCodecT64::updateHash(SipHash & hash) const
+{
+    getCodecDesc()->updateTreeHash(hash);
+    hash.update(type_idx);
+    hash.update(variant);
+}
+
 void registerCodecT64(CompressionCodecFactory & factory)
 {
     auto reg_func = [&](const ASTPtr & arguments, DataTypePtr type) -> CompressionCodecPtr
diff --git a/src/Compression/CompressionCodecT64.h b/src/Compression/CompressionCodecT64.h
index 11efbea0955..9671eb81ce1 100644
--- a/src/Compression/CompressionCodecT64.h
+++ b/src/Compression/CompressionCodecT64.h
@@ -35,6 +35,8 @@ public:
 
     ASTPtr getCodecDesc() const override;
 
+    void updateHash(SipHash & hash) const override;
+
 protected:
     UInt32 doCompressData(const char * src, UInt32 src_size, char * dst) const override;
     void doDecompressData(const char * src, UInt32 src_size, char * dst, UInt32 uncompressed_size) const override;
diff --git a/src/Compression/CompressionCodecZSTD.cpp b/src/Compression/CompressionCodecZSTD.cpp
index ab48580533e..3b317884fec 100644
--- a/src/Compression/CompressionCodecZSTD.cpp
+++ b/src/Compression/CompressionCodecZSTD.cpp
@@ -32,6 +32,11 @@ ASTPtr CompressionCodecZSTD::getCodecDesc() const
     return makeASTFunction("ZSTD", literal);
 }
 
+void CompressionCodecZSTD::updateHash(SipHash & hash) const
+{
+    getCodecDesc()->updateTreeHash(hash);
+}
+
 UInt32 CompressionCodecZSTD::getMaxCompressedDataSize(UInt32 uncompressed_size) const
 {
     return ZSTD_compressBound(uncompressed_size);
diff --git a/src/Compression/CompressionCodecZSTD.h b/src/Compression/CompressionCodecZSTD.h
index 2ad893083c3..3bfb6bb1d4d 100644
--- a/src/Compression/CompressionCodecZSTD.h
+++ b/src/Compression/CompressionCodecZSTD.h
@@ -21,7 +21,10 @@ public:
 
     UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
 
+    void updateHash(SipHash & hash) const override;
+
 protected:
+
     UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
 
     void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
diff --git a/src/Compression/ICompressionCodec.cpp b/src/Compression/ICompressionCodec.cpp
index 4aafc298658..5de015b2680 100644
--- a/src/Compression/ICompressionCodec.cpp
+++ b/src/Compression/ICompressionCodec.cpp
@@ -35,6 +35,13 @@ ASTPtr ICompressionCodec::getFullCodecDesc() const
     return result;
 }
 
+UInt64 ICompressionCodec::getHash() const
+{
+    SipHash hash;
+    updateHash(hash);
+    return hash.get64();
+}
+
 UInt32 ICompressionCodec::compress(const char * source, UInt32 source_size, char * dest) const
 {
     assert(source != nullptr && dest != nullptr);
diff --git a/src/Compression/ICompressionCodec.h b/src/Compression/ICompressionCodec.h
index fa1f73ce4dd..8f72ba55200 100644
--- a/src/Compression/ICompressionCodec.h
+++ b/src/Compression/ICompressionCodec.h
@@ -5,6 +5,7 @@
 #include <Compression/CompressionInfo.h>
 #include <Core/Types.h>
 #include <Parsers/IAST.h>
+#include <Common/SipHash.h>
 
 
 namespace DB
@@ -36,6 +37,10 @@ public:
     /// "CODEC(LZ4,LZ4HC(5))"
     ASTPtr getFullCodecDesc() const;
 
+    /// Hash, that depends on codec ast and optional parameters like data type
+    virtual void updateHash(SipHash & hash) const = 0;
+    UInt64 getHash() const;
+
     /// Compressed bytes from uncompressed source to dest. Dest should preallocate memory
     UInt32 compress(const char * source, UInt32 source_size, char * dest) const;
 
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
index 5e311e0a2f9..70beaec5e5e 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
@@ -29,9 +29,18 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact(
     , marks(*marks_file)
 {
     const auto & storage_columns = metadata_snapshot->getColumns();
+
+    /// Create compressed stream for every different codec.
+    std::unordered_map<UInt64, CompressedStreamPtr> streams_by_codec;
     for (const auto & column : columns_list)
-        compressed_streams[column.name] = std::make_unique<CompressedStream>(
-            plain_hashing, storage_columns.getCodecOrDefault(column.name, default_codec));
+    {
+        auto codec = storage_columns.getCodecOrDefault(column.name, default_codec);
+        auto & stream = streams_by_codec[codec->getHash()];
+        if (!stream)
+            stream = std::make_shared<CompressedStream>(plain_hashing, codec);
+
+        compressed_streams.push_back(stream);
+    }
 }
 
 void MergeTreeDataPartWriterCompact::write(
@@ -101,14 +110,15 @@ void MergeTreeDataPartWriterCompact::writeBlock(const Block & block)
         if (rows_to_write)
             data_written = true;
 
-        for (const auto & column : columns_list)
+        auto name_and_type = columns_list.begin();
+        for (size_t i = 0; i < columns_list.size(); ++i, ++name_and_type)
         {
-            auto & stream = compressed_streams[column.name];
+            auto & stream = compressed_streams[i];
 
             writeIntBinary(plain_hashing.count(), marks);
             writeIntBinary(stream->hashing_buf.offset(), marks);
 
-            writeColumnSingleGranule(block.getByName(column.name), current_row, rows_to_write);
+            writeColumnSingleGranule(block.getByName(name_and_type->name), stream, current_row, rows_to_write);
 
             /// Write one compressed block per column in granule for more optimal reading.
             stream->hashing_buf.next();
@@ -133,12 +143,15 @@ void MergeTreeDataPartWriterCompact::writeBlock(const Block & block)
     next_mark = from_mark;
 }
 
-void MergeTreeDataPartWriterCompact::writeColumnSingleGranule(const ColumnWithTypeAndName & column, size_t from_row, size_t number_of_rows) const
+void MergeTreeDataPartWriterCompact::writeColumnSingleGranule(
+    const ColumnWithTypeAndName & column,
+    const CompressedStreamPtr & stream,
+    size_t from_row, size_t number_of_rows) const
 {
     IDataType::SerializeBinaryBulkStatePtr state;
     IDataType::SerializeBinaryBulkSettings serialize_settings;
 
-    serialize_settings.getter = [this, &column](IDataType::SubstreamPath) -> WriteBuffer * { return &compressed_streams.at(column.name)->hashing_buf; };
+    serialize_settings.getter = [&stream](IDataType::SubstreamPath) -> WriteBuffer * { return &stream->hashing_buf; };
     serialize_settings.position_independent_encoding = true;
     serialize_settings.low_cardinality_max_dictionary_size = 0;
 
@@ -213,7 +226,7 @@ void MergeTreeDataPartWriterCompact::addToChecksums(MergeTreeDataPartChecksums &
     size_t uncompressed_size = 0;
     CityHash_v1_0_2::uint128 uncompressed_hash{0, 0};
 
-    for (const auto & [_, stream] : compressed_streams)
+    for (const auto & stream : compressed_streams)
     {
         uncompressed_size += stream->hashing_buf.count();
         auto stream_hash = stream->hashing_buf.getHash();
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
index 6206d6e867e..2f24c515fb3 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
@@ -26,12 +26,6 @@ protected:
     void fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) override;
 
 private:
-    /// Write single granule of one column (rows between 2 marks)
-    void writeColumnSingleGranule(
-        const ColumnWithTypeAndName & column,
-        size_t from_row,
-        size_t number_of_rows) const;
-
     void writeBlock(const Block & block);
 
     void addToChecksums(MergeTreeDataPartChecksums & checksumns);
@@ -67,11 +61,19 @@ private:
             : compressed_buf(buf, codec), hashing_buf(compressed_buf) {}
     };
 
-    std::unordered_map<String, std::unique_ptr<CompressedStream>> compressed_streams;
+    using CompressedStreamPtr = std::shared_ptr<CompressedStream>;
+    std::vector<CompressedStreamPtr> compressed_streams;
 
     /// marks -> marks_file
     std::unique_ptr<WriteBufferFromFileBase> marks_file;
     HashingWriteBuffer marks;
+
+    /// Write single granule of one column (rows between 2 marks)
+    void writeColumnSingleGranule(
+        const ColumnWithTypeAndName & column,
+        const CompressedStreamPtr & stream,
+        size_t from_row,
+        size_t number_of_rows) const;
 };
 
 }

From 6883ee7eea30329c48c25c83bee341b4fdd886d7 Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Fri, 4 Sep 2020 01:38:17 +0300
Subject: [PATCH 042/298] create less compressed streams while writing compact
 parts

---
 src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp | 5 +----
 src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h   | 5 +++++
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
index 70beaec5e5e..22df2abecf3 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
@@ -29,9 +29,6 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact(
     , marks(*marks_file)
 {
     const auto & storage_columns = metadata_snapshot->getColumns();
-
-    /// Create compressed stream for every different codec.
-    std::unordered_map<UInt64, CompressedStreamPtr> streams_by_codec;
     for (const auto & column : columns_list)
     {
         auto codec = storage_columns.getCodecOrDefault(column.name, default_codec);
@@ -226,7 +223,7 @@ void MergeTreeDataPartWriterCompact::addToChecksums(MergeTreeDataPartChecksums &
     size_t uncompressed_size = 0;
     CityHash_v1_0_2::uint128 uncompressed_hash{0, 0};
 
-    for (const auto & stream : compressed_streams)
+    for (const auto & [_, stream] : streams_by_codec)
     {
         uncompressed_size += stream->hashing_buf.count();
         auto stream_hash = stream->hashing_buf.getHash();
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
index 2f24c515fb3..a121554f4be 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
@@ -62,6 +62,11 @@ private:
     };
 
     using CompressedStreamPtr = std::shared_ptr<CompressedStream>;
+
+    /// Create compressed stream for every different codec.
+    std::unordered_map<UInt64, CompressedStreamPtr> streams_by_codec;
+
+    /// For better performance save pointer to stream by every column.
     std::vector<CompressedStreamPtr> compressed_streams;
 
     /// marks -> marks_file

From 23b9677879a2a0618b35032439650ec08e760c57 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 4 Sep 2020 08:46:58 +0300
Subject: [PATCH 043/298] Added a script to import git repository to ClickHouse

---
 src/Common/ShellCommand.cpp                   |   4 +
 src/IO/ReadBufferFromFile.cpp                 |   3 +
 src/IO/WriteBufferFromFile.cpp                |   3 +
 utils/CMakeLists.txt                          |   1 +
 utils/git-to-clickhouse/CMakeLists.txt        |   2 +
 utils/git-to-clickhouse/git-to-clickhouse.cpp | 638 ++++++++++++++++++
 6 files changed, 651 insertions(+)
 create mode 100644 utils/git-to-clickhouse/CMakeLists.txt
 create mode 100644 utils/git-to-clickhouse/git-to-clickhouse.cpp

diff --git a/src/Common/ShellCommand.cpp b/src/Common/ShellCommand.cpp
index 53ab2301a0a..127f95fef06 100644
--- a/src/Common/ShellCommand.cpp
+++ b/src/Common/ShellCommand.cpp
@@ -186,6 +186,10 @@ int ShellCommand::tryWait()
 {
     wait_called = true;
 
+    in.close();
+    out.close();
+    err.close();
+
     LOG_TRACE(getLogger(), "Will wait for shell command pid {}", pid);
 
     int status = 0;
diff --git a/src/IO/ReadBufferFromFile.cpp b/src/IO/ReadBufferFromFile.cpp
index 40f69625e68..226615c757e 100644
--- a/src/IO/ReadBufferFromFile.cpp
+++ b/src/IO/ReadBufferFromFile.cpp
@@ -77,6 +77,9 @@ ReadBufferFromFile::~ReadBufferFromFile()
 
 void ReadBufferFromFile::close()
 {
+    if (fd < 0)
+        return;
+
     if (0 != ::close(fd))
         throw Exception("Cannot close file", ErrorCodes::CANNOT_CLOSE_FILE);
 
diff --git a/src/IO/WriteBufferFromFile.cpp b/src/IO/WriteBufferFromFile.cpp
index b59a110edb4..4ade2e2c971 100644
--- a/src/IO/WriteBufferFromFile.cpp
+++ b/src/IO/WriteBufferFromFile.cpp
@@ -92,6 +92,9 @@ WriteBufferFromFile::~WriteBufferFromFile()
 /// Close file before destruction of object.
 void WriteBufferFromFile::close()
 {
+    if (fd < 0)
+        return;
+
     next();
 
     if (0 != ::close(fd))
diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt
index 0dd95388e7d..dd03afe9fb8 100644
--- a/utils/CMakeLists.txt
+++ b/utils/CMakeLists.txt
@@ -29,6 +29,7 @@ if (NOT DEFINED ENABLE_UTILS OR ENABLE_UTILS)
     add_subdirectory (convert-month-partitioned-parts)
     add_subdirectory (checksum-for-compressed-block)
     add_subdirectory (wal-dump)
+    add_subdirectory (git-to-clickhouse)
 endif ()
 
 if (ENABLE_CODE_QUALITY)
diff --git a/utils/git-to-clickhouse/CMakeLists.txt b/utils/git-to-clickhouse/CMakeLists.txt
new file mode 100644
index 00000000000..0e46b68d471
--- /dev/null
+++ b/utils/git-to-clickhouse/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_executable (git-to-clickhouse git-to-clickhouse.cpp)
+target_link_libraries(git-to-clickhouse PRIVATE dbms boost::program_options)
diff --git a/utils/git-to-clickhouse/git-to-clickhouse.cpp b/utils/git-to-clickhouse/git-to-clickhouse.cpp
new file mode 100644
index 00000000000..42920328ad7
--- /dev/null
+++ b/utils/git-to-clickhouse/git-to-clickhouse.cpp
@@ -0,0 +1,638 @@
+#include <cstdint>
+#include <string>
+#include <vector>
+
+#include <boost/program_options.hpp>
+
+#include <Common/Exception.h>
+#include <Common/StringUtils/StringUtils.h>
+#include <Common/ShellCommand.h>
+#include <common/find_symbols.h>
+
+#include <IO/copyData.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <IO/WriteBufferFromFile.h>
+#include <IO/WriteBufferFromFileDescriptor.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int INCORRECT_DATA;
+}
+
+enum class LineType
+{
+    Empty,
+    Comment,
+    Punct,
+    Code,
+};
+
+void writeText(LineType type, WriteBuffer & out)
+{
+    switch (type)
+    {
+        case LineType::Empty: writeString("Empty", out); break;
+        case LineType::Comment: writeString("Comment", out); break;
+        case LineType::Punct: writeString("Punct", out); break;
+        case LineType::Code: writeString("Code", out); break;
+    }
+}
+
+struct LineChange
+{
+    int8_t sign{}; /// 1 if added, -1 if deleted
+    uint16_t line_number_old{};
+    uint16_t line_number_new{};
+    uint16_t hunk_num{}; /// ordinal number of hunk in diff, starting with 0
+    uint16_t hunk_start_line_number_old{};
+    uint16_t hunk_start_line_number_new{};
+    std::string hunk_context; /// The context (like a line with function name) as it is calculated by git
+    std::string line; /// Line content without leading whitespaces
+    uint8_t indent{}; /// The number of leading whitespaces or tabs * 4
+    LineType line_type{};
+
+    void setLineInfo(std::string full_line)
+    {
+        indent = 0;
+
+        const char * pos = full_line.data();
+        const char * end = pos + full_line.size();
+
+        while (pos < end)
+        {
+            if (*pos == ' ')
+                ++indent;
+            else if (*pos == '\t')
+                indent += 4;
+            else
+                break;
+            ++pos;
+        }
+
+        line.assign(pos, end);
+
+        if (pos == end)
+        {
+            line_type = LineType::Empty;
+        }
+        else if (pos + 1 < end
+            && ((pos[0] == '/' && pos[1] == '/')
+                || (pos[0] == '*' && pos[1] == ' '))) /// This is not precise.
+        {
+            line_type = LineType::Comment;
+        }
+        else
+        {
+            while (pos < end)
+            {
+                if (isAlphaNumericASCII(*pos))
+                {
+                    line_type = LineType::Code;
+                    break;
+                }
+                ++pos;
+            }
+            if (pos == end)
+                line_type = LineType::Punct;
+        }
+    }
+
+    void writeTextWithoutNewline(WriteBuffer & out) const
+    {
+        writeText(sign, out);
+        writeChar('\t', out);
+        writeText(line_number_old, out);
+        writeChar('\t', out);
+        writeText(line_number_new, out);
+        writeChar('\t', out);
+        writeText(hunk_num, out);
+        writeChar('\t', out);
+        writeText(hunk_start_line_number_old, out);
+        writeChar('\t', out);
+        writeText(hunk_start_line_number_new, out);
+        writeChar('\t', out);
+        writeText(hunk_context, out);
+        writeChar('\t', out);
+        writeText(line, out);
+        writeChar('\t', out);
+        writeText(indent, out);
+        writeChar('\t', out);
+        writeText(line_type, out);
+    }
+};
+
+using LineChanges = std::vector<LineChange>;
+
+enum class FileChangeType
+{
+    Add,
+    Delete,
+    Modify,
+    Rename,
+    Copy,
+    Type,
+};
+
+void writeText(FileChangeType type, WriteBuffer & out)
+{
+    switch (type)
+    {
+        case FileChangeType::Add: writeString("Add", out); break;
+        case FileChangeType::Delete: writeString("Delete", out); break;
+        case FileChangeType::Modify: writeString("Modify", out); break;
+        case FileChangeType::Rename: writeString("Rename", out); break;
+        case FileChangeType::Copy: writeString("Copy", out); break;
+        case FileChangeType::Type: writeString("Type", out); break;
+    }
+}
+
+struct FileChange
+{
+    FileChangeType change_type{};
+    std::string new_file_path;
+    std::string old_file_path;
+    uint16_t lines_added{};
+    uint16_t lines_deleted{};
+    uint16_t hunks_added{};
+    uint16_t hunks_removed{};
+    uint16_t hunks_changed{};
+
+    void writeTextWithoutNewline(WriteBuffer & out) const
+    {
+        writeText(change_type, out);
+        writeChar('\t', out);
+        writeText(new_file_path, out);
+        writeChar('\t', out);
+        writeText(old_file_path, out);
+        writeChar('\t', out);
+        writeText(lines_added, out);
+        writeChar('\t', out);
+        writeText(lines_deleted, out);
+        writeChar('\t', out);
+        writeText(hunks_added, out);
+        writeChar('\t', out);
+        writeText(hunks_removed, out);
+        writeChar('\t', out);
+        writeText(hunks_changed, out);
+    }
+};
+
+struct FileChangeAndLineChanges
+{
+    FileChange file_change;
+    LineChanges line_changes;
+};
+
+struct Commit
+{
+    std::string hash;
+    std::string author_name;
+    std::string author_email;
+    time_t time{};
+    std::string message;
+    uint32_t files_added{};
+    uint32_t files_deleted{};
+    uint32_t files_renamed{};
+    uint32_t files_modified{};
+    uint32_t lines_added{};
+    uint32_t lines_deleted{};
+    uint32_t hunks_added{};
+    uint32_t hunks_removed{};
+    uint32_t hunks_changed{};
+
+    void writeTextWithoutNewline(WriteBuffer & out) const
+    {
+        writeText(hash, out);
+        writeChar('\t', out);
+        writeText(author_name, out);
+        writeChar('\t', out);
+        writeText(author_email, out);
+        writeChar('\t', out);
+        writeText(time, out);
+        writeChar('\t', out);
+        writeText(message, out);
+        writeChar('\t', out);
+        writeText(files_added, out);
+        writeChar('\t', out);
+        writeText(files_deleted, out);
+        writeChar('\t', out);
+        writeText(files_renamed, out);
+        writeChar('\t', out);
+        writeText(files_modified, out);
+        writeChar('\t', out);
+        writeText(lines_added, out);
+        writeChar('\t', out);
+        writeText(lines_deleted, out);
+        writeChar('\t', out);
+        writeText(hunks_added, out);
+        writeChar('\t', out);
+        writeText(hunks_removed, out);
+        writeChar('\t', out);
+        writeText(hunks_changed, out);
+    }
+};
+
+
+void skipUntilWhitespace(ReadBuffer & buf)
+{
+    while (!buf.eof())
+    {
+        char * next_pos = find_first_symbols<'\t', '\n', ' '>(buf.position(), buf.buffer().end());
+        buf.position() = next_pos;
+
+        if (!buf.hasPendingData())
+            continue;
+
+        if (*buf.position() == '\t' || *buf.position() == '\n' || *buf.position() == ' ')
+            return;
+    }
+}
+
+void skipUntilNextLine(ReadBuffer & buf)
+{
+    while (!buf.eof())
+    {
+        char * next_pos = find_first_symbols<'\n'>(buf.position(), buf.buffer().end());
+        buf.position() = next_pos;
+
+        if (!buf.hasPendingData())
+            continue;
+
+        if (*buf.position() == '\n')
+        {
+            ++buf.position();
+            return;
+        }
+    }
+}
+
+void readStringUntilNextLine(std::string & s, ReadBuffer & buf)
+{
+    s.clear();
+    while (!buf.eof())
+    {
+        char * next_pos = find_first_symbols<'\n'>(buf.position(), buf.buffer().end());
+        s.append(buf.position(), next_pos - buf.position());
+        buf.position() = next_pos;
+
+        if (!buf.hasPendingData())
+            continue;
+
+        if (*buf.position() == '\n')
+        {
+            ++buf.position();
+            return;
+        }
+    }
+}
+
+
+struct Result
+{
+    WriteBufferFromFile commits{"commits.tsv"};
+    WriteBufferFromFile file_changes{"file_changes.tsv"};
+    WriteBufferFromFile line_changes{"line_changes.tsv"};
+};
+
+
+void processCommit(std::string hash, Result & result)
+{
+    std::string command = fmt::format(
+        "git show --raw --pretty='format:%at%x09%aN%x09%aE%x0A%s%x00' --patch --unified=0 {}",
+        hash);
+
+    std::cerr << command << "\n";
+
+    auto commit_info = ShellCommand::execute(command);
+    auto & in = commit_info->out;
+
+    Commit commit;
+    commit.hash = hash;
+
+    readText(commit.time, in);
+    assertChar('\t', in);
+    readText(commit.author_name, in);
+    assertChar('\t', in);
+    readText(commit.author_email, in);
+    assertChar('\n', in);
+    readNullTerminated(commit.message, in);
+
+    std::cerr << fmt::format("{}\t{}\n", toString(LocalDateTime(commit.time)), commit.message);
+
+    if (!in.eof())
+        assertChar('\n', in);
+
+    /// File changes in form
+    /// :100644 100644 b90fe6bb94 3ffe4c380f M  src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+    /// :100644 100644 828dedf6b5 828dedf6b5 R100       dbms/src/Functions/GeoUtils.h   dbms/src/Functions/PolygonUtils.h
+
+    std::map<std::string, FileChangeAndLineChanges> file_changes;
+
+    while (checkChar(':', in))
+    {
+        FileChange file_change;
+
+        for (size_t i = 0; i < 4; ++i)
+        {
+            skipUntilWhitespace(in);
+            skipWhitespaceIfAny(in);
+        }
+
+        char change_type;
+        readChar(change_type, in);
+
+        int confidence;
+        switch (change_type)
+        {
+            case 'A':
+                file_change.change_type = FileChangeType::Add;
+                ++commit.files_added;
+                break;
+            case 'D':
+                file_change.change_type = FileChangeType::Delete;
+                ++commit.files_deleted;
+                break;
+            case 'M':
+                file_change.change_type = FileChangeType::Modify;
+                ++commit.files_modified;
+                break;
+            case 'R':
+                file_change.change_type = FileChangeType::Rename;
+                ++commit.files_renamed;
+                readText(confidence, in);
+                break;
+            case 'C':
+                file_change.change_type = FileChangeType::Copy;
+                readText(confidence, in);
+                break;
+            case 'T':
+                file_change.change_type = FileChangeType::Type;
+                break;
+            default:
+                throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected file change type: {}", change_type);
+        }
+
+        skipWhitespaceIfAny(in);
+
+        if (change_type == 'R' || change_type == 'C')
+        {
+            readText(file_change.old_file_path, in);
+            skipWhitespaceIfAny(in);
+            readText(file_change.new_file_path, in);
+        }
+        else
+        {
+            readText(file_change.new_file_path, in);
+        }
+
+        assertChar('\n', in);
+
+        file_changes.emplace(
+            file_change.new_file_path,
+            FileChangeAndLineChanges{ file_change, {} });
+    }
+
+    if (!in.eof())
+    {
+        assertChar('\n', in);
+
+        /// Diffs for every file in form of
+        /// --- a/src/Storages/StorageReplicatedMergeTree.cpp
+        /// +++ b/src/Storages/StorageReplicatedMergeTree.cpp
+        /// @@ -1387,2 +1387 @@ bool StorageReplicatedMergeTree::tryExecuteMerge(const LogEntry & entry)
+        /// -            table_lock, entry.create_time, reserved_space, entry.deduplicate,
+        /// -            entry.force_ttl);
+        /// +            table_lock, entry.create_time, reserved_space, entry.deduplicate);
+
+        std::string old_file_path;
+        std::string new_file_path;
+        FileChangeAndLineChanges * file_change_and_line_changes = nullptr;
+        LineChange line_change;
+
+        while (!in.eof())
+        {
+            if (checkString("@@ ", in))
+            {
+                if (!file_change_and_line_changes)
+                {
+                    auto file_name = new_file_path.empty() ? old_file_path : new_file_path;
+                    auto it = file_changes.find(file_name);
+                    if (file_changes.end() == it)
+                        std::cerr << fmt::format("Warning: skipping bad file name {}\n", file_name);
+                    else
+                        file_change_and_line_changes = &it->second;
+                }
+
+                if (file_change_and_line_changes)
+                {
+                    uint16_t old_lines = 1;
+                    uint16_t new_lines = 1;
+
+                    assertChar('-', in);
+                    readText(line_change.hunk_start_line_number_old, in);
+                    if (checkChar(',', in))
+                        readText(old_lines, in);
+
+                    assertString(" +", in);
+                    readText(line_change.hunk_start_line_number_new, in);
+                    if (checkChar(',', in))
+                        readText(new_lines, in);
+
+                    assertString(" @@", in);
+                    if (checkChar(' ', in))
+                        readStringUntilNextLine(line_change.hunk_context, in);
+                    else
+                        assertChar('\n', in);
+
+                    ++line_change.hunk_num;
+                    line_change.line_number_old = line_change.hunk_start_line_number_old;
+                    line_change.line_number_new = line_change.hunk_start_line_number_new;
+
+                    if (old_lines && new_lines)
+                    {
+                        ++commit.hunks_changed;
+                        ++file_change_and_line_changes->file_change.hunks_changed;
+                    }
+                    else if (old_lines)
+                    {
+                        ++commit.hunks_removed;
+                        ++file_change_and_line_changes->file_change.hunks_removed;
+                    }
+                    else if (new_lines)
+                    {
+                        ++commit.hunks_added;
+                        ++file_change_and_line_changes->file_change.hunks_added;
+                    }
+                }
+            }
+            else if (checkChar('-', in))
+            {
+                if (checkString("-- ", in))
+                {
+                    if (checkString("a/", in))
+                    {
+                        readStringUntilNextLine(old_file_path, in);
+                        line_change = LineChange{};
+                        file_change_and_line_changes = nullptr;
+                    }
+                    else if (checkString("/dev/null", in))
+                    {
+                        old_file_path.clear();
+                        assertChar('\n', in);
+                        line_change = LineChange{};
+                        file_change_and_line_changes = nullptr;
+                    }
+                    else
+                        skipUntilNextLine(in); /// Actually it can be the line in diff. Skip it for simplicity.
+                }
+                else
+                {
+                    if (file_change_and_line_changes)
+                    {
+                        ++commit.lines_deleted;
+
+                        line_change.sign = -1;
+                        readStringUntilNextLine(line_change.line, in);
+                        line_change.setLineInfo(line_change.line);
+
+                        file_change_and_line_changes->line_changes.push_back(line_change);
+                        ++line_change.line_number_old;
+                    }
+                }
+            }
+            else if (checkChar('+', in))
+            {
+                if (checkString("++ ", in))
+                {
+                    if (checkString("b/", in))
+                    {
+                        readStringUntilNextLine(new_file_path, in);
+                        line_change = LineChange{};
+                        file_change_and_line_changes = nullptr;
+                    }
+                    else if (checkString("/dev/null", in))
+                    {
+                        new_file_path.clear();
+                        assertChar('\n', in);
+                        line_change = LineChange{};
+                        file_change_and_line_changes = nullptr;
+                    }
+                    else
+                        skipUntilNextLine(in); /// Actually it can be the line in diff. Skip it for simplicity.
+                }
+                else
+                {
+                    if (file_change_and_line_changes)
+                    {
+                        ++commit.lines_added;
+
+                        line_change.sign = 1;
+                        readStringUntilNextLine(line_change.line, in);
+                        line_change.setLineInfo(line_change.line);
+
+                        file_change_and_line_changes->line_changes.push_back(line_change);
+                        ++line_change.line_number_new;
+                    }
+                }
+            }
+            else
+            {
+                skipUntilNextLine(in);
+            }
+        }
+    }
+
+    /// Write the result
+
+    /// commits table
+    {
+        auto & out = result.commits;
+
+        commit.writeTextWithoutNewline(out);
+        writeChar('\n', out);
+    }
+
+    for (const auto & elem : file_changes)
+    {
+        const FileChange & file_change = elem.second.file_change;
+
+        /// file_changes table
+        {
+            auto & out = result.file_changes;
+
+            file_change.writeTextWithoutNewline(out);
+            writeChar('\t', out);
+            commit.writeTextWithoutNewline(out);
+            writeChar('\n', out);
+        }
+
+        /// line_changes table
+        for (const auto & line_change : elem.second.line_changes)
+        {
+            auto & out = result.line_changes;
+
+            line_change.writeTextWithoutNewline(out);
+            writeChar('\t', out);
+            file_change.writeTextWithoutNewline(out);
+            writeChar('\t', out);
+            commit.writeTextWithoutNewline(out);
+            writeChar('\n', out);
+        }
+    }
+}
+
+
+void processLog()
+{
+    Result result;
+
+    std::string command = "git log --no-merges --pretty=%H";
+    std::cerr << command << "\n";
+    auto git_log = ShellCommand::execute(command);
+
+    auto & in = git_log->out;
+    while (!in.eof())
+    {
+        std::string hash;
+        readString(hash, in);
+        assertChar('\n', in);
+
+        std::cerr << fmt::format("Processing commit {}\n", hash);
+        processCommit(std::move(hash), result);
+    }
+}
+
+
+}
+
+int main(int /*argc*/, char ** /*argv*/)
+try
+{
+    using namespace DB;
+
+/*    boost::program_options::options_description desc("Allowed options");
+    desc.add_options()("help,h", "produce help message");
+
+    boost::program_options::variables_map options;
+    boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options);
+
+    if (options.count("help") || argc != 2)
+    {
+        std::cout << "Usage: " << argv[0] << std::endl;
+        std::cout << desc << std::endl;
+        return 1;
+    }*/
+
+    processLog();
+    return 0;
+}
+catch (...)
+{
+    std::cerr << DB::getCurrentExceptionMessage(true) << '\n';
+    throw;
+}

From 338a6e20f60bb21c99ee2c4f261d96bc55ec4b97 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 4 Sep 2020 09:12:16 +0300
Subject: [PATCH 044/298] Added a script to import git repository to ClickHouse

---
 utils/git-to-clickhouse/git-to-clickhouse.cpp | 97 +++++++++++++++++++
 1 file changed, 97 insertions(+)

diff --git a/utils/git-to-clickhouse/git-to-clickhouse.cpp b/utils/git-to-clickhouse/git-to-clickhouse.cpp
index 42920328ad7..314bba0d5b4 100644
--- a/utils/git-to-clickhouse/git-to-clickhouse.cpp
+++ b/utils/git-to-clickhouse/git-to-clickhouse.cpp
@@ -16,6 +16,101 @@
 #include <IO/WriteBufferFromFileDescriptor.h>
 
 
+/** How to use:
+
+DROP DATABASE IF EXISTS git;
+CREATE DATABASE git;
+
+CREATE TABLE git.commits
+(
+    hash String,
+    author_name LowCardinality(String),
+    author_email LowCardinality(String),
+    time DateTime,
+    message String,
+    files_added UInt32,
+    files_deleted UInt32,
+    files_renamed UInt32,
+    files_modified UInt32,
+    lines_added UInt32,
+    lines_deleted UInt32,
+    hunks_added UInt32,
+    hunks_removed UInt32,
+    hunks_changed UInt32
+) ENGINE = MergeTree ORDER BY time;
+
+CREATE TABLE git.file_changes
+(
+    change_type Enum('Add' = 1, 'Delete' = 2, 'Modify' = 3, 'Rename' = 4, 'Copy' = 5, 'Type' = 6),
+    new_file_path LowCardinality(String),
+    old_file_path LowCardinality(String),
+    lines_added UInt16,
+    lines_deleted UInt16,
+    hunks_added UInt16,
+    hunks_removed UInt16,
+    hunks_changed UInt16,
+
+    commit_hash String,
+    author_name LowCardinality(String),
+    author_email LowCardinality(String),
+    time DateTime,
+    commit_message String,
+    commit_files_added UInt32,
+    commit_files_deleted UInt32,
+    commit_files_renamed UInt32,
+    commit_files_modified UInt32,
+    commit_lines_added UInt32,
+    commit_lines_deleted UInt32,
+    commit_hunks_added UInt32,
+    commit_hunks_removed UInt32,
+    commit_hunks_changed UInt32
+) ENGINE = MergeTree ORDER BY time;
+
+CREATE TABLE git.line_changes
+(
+    sign Int8,
+    line_number_old UInt16,
+    line_number_new UInt16,
+    hunk_num UInt16,
+    hunk_start_line_number_old UInt16,
+    hunk_start_line_number_new UInt16,
+    hunk_context LowCardinality(String),
+    line LowCardinality(String),
+    indent UInt8,
+    line_type Enum('Empty' = 0, 'Comment' = 1, 'Punct' = 2, 'Code' = 3),
+
+    file_change_type Enum('Add' = 1, 'Delete' = 2, 'Modify' = 3, 'Rename' = 4, 'Copy' = 5, 'Type' = 6),
+    new_file_path LowCardinality(String),
+    old_file_path LowCardinality(String),
+    file_lines_added UInt16,
+    file_lines_deleted UInt16,
+    file_hunks_added UInt16,
+    file_hunks_removed UInt16,
+    file_hunks_changed UInt16,
+
+    commit_hash String,
+    author_name LowCardinality(String),
+    author_email LowCardinality(String),
+    time DateTime,
+    commit_message String,
+    commit_files_added UInt32,
+    commit_files_deleted UInt32,
+    commit_files_renamed UInt32,
+    commit_files_modified UInt32,
+    commit_lines_added UInt32,
+    commit_lines_deleted UInt32,
+    commit_hunks_added UInt32,
+    commit_hunks_removed UInt32,
+    commit_hunks_changed UInt32
+) ENGINE = MergeTree ORDER BY time;
+
+clickhouse-client --query "INSERT INTO git.commits FORMAT TSV" < commits.tsv
+clickhouse-client --query "INSERT INTO git.file_changes FORMAT TSV" < file_changes.tsv
+clickhouse-client --query "INSERT INTO git.line_changes FORMAT TSV" < line_changes.tsv
+
+  */
+
+
 namespace DB
 {
 
@@ -495,6 +590,7 @@ void processCommit(std::string hash, Result & result)
                     if (file_change_and_line_changes)
                     {
                         ++commit.lines_deleted;
+                        ++file_change_and_line_changes->file_change.lines_deleted;
 
                         line_change.sign = -1;
                         readStringUntilNextLine(line_change.line, in);
@@ -530,6 +626,7 @@ void processCommit(std::string hash, Result & result)
                     if (file_change_and_line_changes)
                     {
                         ++commit.lines_added;
+                        ++file_change_and_line_changes->file_change.lines_added;
 
                         line_change.sign = 1;
                         readStringUntilNextLine(line_change.line, in);

From 61ecaebcb1b8a306bfda2fec90a20171427d2164 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 4 Sep 2020 09:55:19 +0300
Subject: [PATCH 045/298] Simplify settings for TTL merges

---
 src/Storages/MergeTree/MergeList.h            | 14 ++++++++++-
 .../MergeTree/MergeTreeDataMergerMutator.cpp  | 25 +++----------------
 .../MergeTree/MergeTreeDataMergerMutator.h    |  9 +------
 src/Storages/MergeTree/MergeTreeSettings.cpp  | 13 ----------
 src/Storages/MergeTree/MergeTreeSettings.h    |  2 +-
 src/Storages/StorageMergeTree.cpp             |  6 +++--
 src/Storages/StorageReplicatedMergeTree.cpp   | 10 ++++----
 7 files changed, 28 insertions(+), 51 deletions(-)

diff --git a/src/Storages/MergeTree/MergeList.h b/src/Storages/MergeTree/MergeList.h
index 0b41745a9ba..5af71b88341 100644
--- a/src/Storages/MergeTree/MergeList.h
+++ b/src/Storages/MergeTree/MergeList.h
@@ -132,6 +132,7 @@ class MergeList
     mutable std::mutex mutex;
     container_t merges;
 
+    std::atomic<size_t> merges_with_ttl_counter = 0;
 public:
     using Entry = MergeListEntry;
     using EntryPtr = std::unique_ptr<Entry>;
@@ -140,7 +141,9 @@ public:
     EntryPtr insert(Args &&... args)
     {
         std::lock_guard lock{mutex};
-        return std::make_unique<Entry>(*this, merges.emplace(merges.end(), std::forward<Args>(args)...));
+        auto entry = std::make_unique<Entry>(*this, merges.emplace(merges.end(), std::forward<Args>(args)...));
+        merges_with_ttl_counter += (*entry)->merge_type == MergeType::TTL_DELETE;
+        return entry;
     }
 
     info_container_t get() const
@@ -163,12 +166,21 @@ public:
                 merge_element.is_cancelled = true;
         }
     }
+
+    size_t getExecutingMergesWithTTLCount() const
+    {
+        return merges_with_ttl_counter;
+    }
 };
 
 
 inline MergeListEntry::~MergeListEntry()
 {
     std::lock_guard lock{list.mutex};
+
+    if (it->merge_type == MergeType::TTL_DELETE)
+        list.merges_with_ttl_counter--;
+
     list.merges.erase(it);
 }
 
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index ac86f483694..d25bb5224cf 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -207,34 +207,17 @@ UInt64 MergeTreeDataMergerMutator::getMaxSourcePartSizeForMutation()
     return 0;
 }
 
-
-UInt64 MergeTreeDataMergerMutator::getMaxSourcePartsSizeForMergeWithTTL()
-{
-    const auto data_settings = data.getSettings();
-    size_t busy_threads_in_pool = CurrentMetrics::values[CurrentMetrics::BackgroundPoolTask].load(std::memory_order_relaxed);
-
-    /// DataPart can be store only at one disk. Get maximum reservable free space at all disks.
-    UInt64 disk_space = data.getStoragePolicy()->getMaxUnreservedFreeSpace();
-
-    /// Allow merges with TTL only if there are enough threads, leave free threads for regular merges
-    if (busy_threads_in_pool <= 1
-        || background_pool_size - busy_threads_in_pool >= data_settings->number_of_free_entries_in_pool_to_execute_merge_with_ttl)
-        return static_cast<UInt64>(disk_space / DISK_USAGE_COEFFICIENT_TO_RESERVE);
-
-    return 0;
-
-}
-
 bool MergeTreeDataMergerMutator::selectPartsToMerge(
     FutureMergedMutatedPart & future_part,
     bool aggressive,
     size_t max_total_size_to_merge,
     const AllowedMergingPredicate & can_merge_callback,
-    size_t max_total_size_to_merge_with_ttl,
+    bool merge_with_ttl_allowed,
     String * out_disable_reason)
 {
     MergeTreeData::DataPartsVector data_parts = data.getDataPartsVector();
     const auto data_settings = data.getSettings();
+    auto metadata_snapshot = data.getInMemoryMetadataPtr();
 
     if (data_parts.empty())
     {
@@ -311,7 +294,7 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
 
     IMergeSelector::PartsRange parts_to_merge;
 
-    if (!ttl_merges_blocker.isCancelled())
+    if (metadata_snapshot->hasAnyTTL() && merge_with_ttl_allowed && !ttl_merges_blocker.isCancelled())
     {
         TTLMergeSelector merge_selector(
                 next_ttl_merge_times_by_partition,
@@ -319,7 +302,7 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
                 data_settings->merge_with_ttl_timeout,
                 data_settings->ttl_only_drop_parts);
 
-        parts_to_merge = merge_selector.select(parts_ranges, max_total_size_to_merge_with_ttl);
+        parts_to_merge = merge_selector.select(parts_ranges, max_total_size_to_merge);
         if (!parts_to_merge.empty())
             future_part.merge_type = MergeType::TTL_DELETE;
     }
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
index a874c93e2f6..492807fe39a 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
@@ -61,13 +61,6 @@ public:
       */
     UInt64 getMaxSourcePartsSizeForMerge();
 
-    /** Get maximum total size of parts to do merge with TTL, at current moment
-      * of time. If busy threads count is less than value specified by
-      * number_of_free_entries_in_pool_to_execute_merge_with_ttl than maximum
-      * size (available on disk) is allowed.
-      */
-    UInt64 getMaxSourcePartsSizeForMergeWithTTL();
-
     /** For explicitly passed size of pool and number of used tasks.
       * This method could be used to calculate threshold depending on number of tasks in replication queue.
       */
@@ -90,7 +83,7 @@ public:
         bool aggressive,
         size_t max_total_size_to_merge,
         const AllowedMergingPredicate & can_merge,
-        size_t max_total_size_to_merge_with_ttl,
+        bool merge_with_ttl_allowed,
         String * out_disable_reason = nullptr);
 
     /** Select all the parts in the specified partition for merge, if possible.
diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp
index 44504bdec84..7f537ec330a 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.cpp
+++ b/src/Storages/MergeTree/MergeTreeSettings.cpp
@@ -100,19 +100,6 @@ void MergeTreeSettings::sanityCheck(const Settings & query_settings) const
             number_of_free_entries_in_pool_to_lower_max_size_of_merge,
             query_settings.background_pool_size);
     }
-
-    if (number_of_free_entries_in_pool_to_execute_merge_with_ttl >= query_settings.background_pool_size)
-    {
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of 'number_of_free_entries_in_pool_to_execute_merge_with_ttl' setting"
-            " ({}) (default values are defined in <merge_tree> section of config.xml"
-            " or the value can be specified per table in SETTINGS section of CREATE TABLE query)"
-            " is greater or equals to the value of 'background_pool_size'"
-            " ({}) (the value is defined in users.xml for default profile)."
-            " This indicates incorrect configuration because TTL cannot work with these settings.",
-            number_of_free_entries_in_pool_to_execute_merge_with_ttl,
-            query_settings.background_pool_size);
-    }
-
 }
 
 }
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 06fc21b24c3..80236d227ba 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -36,7 +36,7 @@ struct Settings;
     M(UInt64, max_replicated_merges_with_ttl_in_queue, 1, "How many tasks of merging parts with TTL are allowed simultaneously in ReplicatedMergeTree queue.", 0) \
     M(UInt64, number_of_free_entries_in_pool_to_lower_max_size_of_merge, 8, "When there is less than specified number of free entries in pool (or replicated queue), start to lower maximum size of merge to process (or to put in queue). This is to allow small merges to process - not filling the pool with long running merges.", 0) \
     M(UInt64, number_of_free_entries_in_pool_to_execute_mutation, 10, "When there is less than specified number of free entries in pool, do not execute part mutations. This is to leave free threads for regular merges and avoid \"Too many parts\"", 0) \
-    M(UInt64, number_of_free_entries_in_pool_to_execute_merge_with_ttl, 12, "When there is less than specified number of free entries in pool, do not execute merge with TTL. This is to leave free threads for regular merges and avoid \"Too many parts\"", 0) \
+    M(UInt64, max_number_of_merges_with_ttl_in_pool, 2, "When there is more than specified number of merges with TTL entries in pool, do not assign new merge with TTL. This is to leave free threads for regular merges and avoid \"Too many parts\"", 0) \
     M(Seconds, old_parts_lifetime, 8 * 60, "How many seconds to keep obsolete parts.", 0) \
     M(Seconds, temporary_directories_lifetime, 86400, "How many seconds to keep tmp_-directories.", 0) \
     M(Seconds, lock_acquire_timeout_for_background_operations, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "For background operations like merges, mutations etc. How many seconds before failing to acquire table locks.", 0) \
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 729263c3aaa..d6cce2e98ae 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -627,11 +627,13 @@ bool StorageMergeTree::merge(
 {
     auto table_lock_holder = lockForShare(RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations);
     auto metadata_snapshot = getInMemoryMetadataPtr();
+    auto data_settings = getSettings();
 
     FutureMergedMutatedPart future_part;
 
     /// You must call destructor with unlocked `currently_processing_in_background_mutex`.
     std::optional<CurrentlyMergingPartsTagger> merging_tagger;
+    auto & merge_list = global_context.getMergeList();
 
     {
         std::unique_lock lock(currently_processing_in_background_mutex);
@@ -651,7 +653,7 @@ bool StorageMergeTree::merge(
         if (partition_id.empty())
         {
             UInt64 max_source_parts_size = merger_mutator.getMaxSourcePartsSizeForMerge();
-            UInt64 max_source_parts_size_with_ttl = merger_mutator.getMaxSourcePartsSizeForMergeWithTTL();
+            bool merge_with_ttl_allowed = merge_list.getExecutingMergesWithTTLCount() < data_settings->max_number_of_merges_with_ttl_in_pool;
 
             /// TTL requirements is much more strict than for regular merge, so
             /// if regular not possible, than merge with ttl is not also not
@@ -663,7 +665,7 @@ bool StorageMergeTree::merge(
                     aggressive,
                     max_source_parts_size,
                     can_merge,
-                    max_source_parts_size_with_ttl,
+                    merge_with_ttl_allowed,
                     out_disable_reason);
             }
             else if (out_disable_reason)
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index a5b293fd30b..6823d6c3129 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -2525,17 +2525,17 @@ void StorageReplicatedMergeTree::mergeSelectingTask()
         }
         else
         {
+            const auto & merge_list = global_context.getMergeList();
             UInt64 max_source_parts_size_for_merge = merger_mutator.getMaxSourcePartsSizeForMerge(
                 storage_settings_ptr->max_replicated_merges_in_queue, merges_and_mutations_sum);
             UInt64 max_source_part_size_for_mutation = merger_mutator.getMaxSourcePartSizeForMutation();
 
-            UInt64 max_source_part_size_for_merge_with_ttl = 0;
-            if (merges_and_mutations_queued.merges_with_ttl < storage_settings_ptr->max_replicated_merges_with_ttl_in_queue)
-               max_source_part_size_for_merge_with_ttl = merger_mutator.getMaxSourcePartsSizeForMergeWithTTL();
+            bool merge_with_ttl_allowed = merges_and_mutations_queued.merges_with_ttl < storage_settings_ptr->max_replicated_merges_with_ttl_in_queue &&
+                merge_list.getExecutingMergesWithTTLCount() < storage_settings_ptr->max_number_of_merges_with_ttl_in_pool;
 
             FutureMergedMutatedPart future_merged_part;
             if (max_source_parts_size_for_merge > 0 &&
-                merger_mutator.selectPartsToMerge(future_merged_part, false, max_source_parts_size_for_merge, merge_pred, max_source_part_size_for_merge_with_ttl, nullptr))
+                merger_mutator.selectPartsToMerge(future_merged_part, false, max_source_parts_size_for_merge, merge_pred, merge_with_ttl_allowed, nullptr))
             {
                 create_result = createLogEntryToMergeParts(zookeeper, future_merged_part.parts,
                     future_merged_part.name, future_merged_part.type, deduplicate, nullptr, merge_pred.getVersion(), future_merged_part.merge_type);
@@ -3620,7 +3620,7 @@ bool StorageReplicatedMergeTree::optimize(
                 if (!partition)
                 {
                     selected = merger_mutator.selectPartsToMerge(
-                        future_merged_part, true, storage_settings_ptr->max_bytes_to_merge_at_max_space_in_pool, can_merge, storage_settings_ptr->max_bytes_to_merge_at_max_space_in_pool, &disable_reason);
+                        future_merged_part, true, storage_settings_ptr->max_bytes_to_merge_at_max_space_in_pool, can_merge, true, &disable_reason);
                 }
                 else
                 {

From 82c56349a5413311a4de51718567a776207d0c4f Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 4 Sep 2020 13:08:09 +0300
Subject: [PATCH 046/298] Some comments

---
 src/Storages/MergeTree/MergeTreeData.cpp      |  5 ++
 src/Storages/MergeTree/MergeTreeData.h        |  5 ++
 src/Storages/MergeTree/MergeType.h            | 12 ++-
 .../MergeTree/ReplicatedMergeTreeQueue.cpp    | 29 +++++-
 src/Storages/StorageMergeTree.cpp             |  9 +-
 src/Storages/StorageReplicatedMergeTree.cpp   |  4 +-
 .../test_concurrent_ttl_merges/test.py        | 88 +++++++++++++++++--
 7 files changed, 135 insertions(+), 17 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index b6a495161f5..5daecdbb3ef 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -3666,4 +3666,9 @@ NamesAndTypesList MergeTreeData::getVirtuals() const
     };
 }
 
+size_t MergeTreeData::getTotalMergesWithTTLInMergeList() const
+{
+    return global_context.getMergeList().getExecutingMergesWithTTLCount();
+}
+
 }
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index e088a1c098b..205700ecd64 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -646,6 +646,11 @@ public:
     /// TTL rule.
     bool isPartInTTLDestination(const TTLDescription & ttl, const IMergeTreeDataPart & part) const;
 
+    /// Get count of total merges with TTL in MergeList (system.merges) for all
+    /// tables (not only current table).
+    /// Method is cheap and doesn't require any locks.
+    size_t getTotalMergesWithTTLInMergeList() const;
+
     using WriteAheadLogPtr = std::shared_ptr<MergeTreeWriteAheadLog>;
     WriteAheadLogPtr getWriteAheadLog();
 
diff --git a/src/Storages/MergeTree/MergeType.h b/src/Storages/MergeTree/MergeType.h
index 20fd7cd24af..27efe1a8539 100644
--- a/src/Storages/MergeTree/MergeType.h
+++ b/src/Storages/MergeTree/MergeType.h
@@ -5,16 +5,24 @@
 namespace DB
 {
 
+/// Type of Merge. Used to control amount of different merges during merges
+/// assignment. Also allows to apply special logic during merge process
+/// (mergePartsToTemporaryPart). Stored in FutureMergedMutatedPart and
+/// ReplicatedMergeTreeLogEntry.
+///
+/// Order is important, don't try to change it.
 enum class MergeType
 {
-    REGULAR,
-    TTL_DELETE,
+    REGULAR = 1,
+    TTL_DELETE = 2,
 };
 
+/// Check parsed merge_type from raw int and get enum value.
 MergeType checkAndGetMergeType(UInt64 merge_type);
 
 String toString(MergeType merge_type);
 
+/// Check this merge assigned with TTL
 bool isTTLMergeType(MergeType merge_type);
 
 }
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
index 61d53c60128..5fd15547ea4 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
@@ -1070,7 +1070,34 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
           * because the leader replica does not assign merges of greater size (except OPTIMIZE PARTITION and OPTIMIZE FINAL).
           */
         const auto data_settings = data.getSettings();
-        bool ignore_max_size = (entry.type == LogEntry::MERGE_PARTS) && (max_source_parts_size == data_settings->max_bytes_to_merge_at_max_space_in_pool);
+        bool ignore_max_size = false;
+        if (entry.type == LogEntry::MERGE_PARTS)
+        {
+            ignore_max_size = max_source_parts_size == data_settings->max_bytes_to_merge_at_max_space_in_pool;
+
+            if (isTTLMergeType(entry.merge_type))
+            {
+                if (merger_mutator.ttl_merges_blocker.isCancelled())
+                {
+                    String reason = "Not executing log entry for part " + entry.new_part_name + " because merges with TTL is cancelled now.";
+                    LOG_DEBUG(log, reason);
+                    out_postpone_reason = reason;
+                    return false;
+                }
+                size_t total_merges_with_ttl = data.getTotalMergesWithTTLInMergeList();
+                if (total_merges_with_ttl >= data_settings->max_number_of_merges_with_ttl_in_pool)
+                {
+                    const char * format_str = "Not executing log entry for part {}"
+                        " because {} merges with TTL already executing, maximum {}.";
+                    LOG_DEBUG(log, format_str, entry.new_part_name, total_merges_with_ttl,
+                        data_settings->max_number_of_merges_with_ttl_in_pool);
+
+                    out_postpone_reason = fmt::format(format_str, entry.new_part_name, total_merges_with_ttl,
+                        data_settings->max_number_of_merges_with_ttl_in_pool);
+                    return false;
+                }
+            }
+        }
 
         if (!ignore_max_size && sum_parts_size_in_bytes > max_source_parts_size)
         {
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index d6cce2e98ae..347474753dc 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -633,7 +633,7 @@ bool StorageMergeTree::merge(
 
     /// You must call destructor with unlocked `currently_processing_in_background_mutex`.
     std::optional<CurrentlyMergingPartsTagger> merging_tagger;
-    auto & merge_list = global_context.getMergeList();
+    MergeList::EntryPtr merge_entry;
 
     {
         std::unique_lock lock(currently_processing_in_background_mutex);
@@ -653,7 +653,7 @@ bool StorageMergeTree::merge(
         if (partition_id.empty())
         {
             UInt64 max_source_parts_size = merger_mutator.getMaxSourcePartsSizeForMerge();
-            bool merge_with_ttl_allowed = merge_list.getExecutingMergesWithTTLCount() < data_settings->max_number_of_merges_with_ttl_in_pool;
+            bool merge_with_ttl_allowed = getTotalMergesWithTTLInMergeList() < data_settings->max_number_of_merges_with_ttl_in_pool;
 
             /// TTL requirements is much more strict than for regular merge, so
             /// if regular not possible, than merge with ttl is not also not
@@ -716,11 +716,10 @@ bool StorageMergeTree::merge(
         }
 
         merging_tagger.emplace(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace(future_part.parts), *this, false);
+        auto table_id = getStorageID();
+        merge_entry = global_context.getMergeList().insert(table_id.database_name, table_id.table_name, future_part);
     }
 
-    auto table_id = getStorageID();
-    MergeList::EntryPtr merge_entry = global_context.getMergeList().insert(table_id.database_name, table_id.table_name, future_part);
-
     /// Logging
     Stopwatch stopwatch;
     MutableDataPartPtr new_part;
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 6823d6c3129..13438821ba6 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -1377,6 +1377,7 @@ bool StorageReplicatedMergeTree::tryExecuteMerge(const LogEntry & entry)
             + backQuote(entry.new_part_name), ErrorCodes::BAD_DATA_PART_NAME);
     }
     future_merged_part.updatePath(*this, reserved_space);
+    future_merged_part.merge_type = entry.merge_type;
 
     auto table_id = getStorageID();
     MergeList::EntryPtr merge_entry = global_context.getMergeList().insert(table_id.database_name, table_id.table_name, future_merged_part);
@@ -2525,13 +2526,12 @@ void StorageReplicatedMergeTree::mergeSelectingTask()
         }
         else
         {
-            const auto & merge_list = global_context.getMergeList();
             UInt64 max_source_parts_size_for_merge = merger_mutator.getMaxSourcePartsSizeForMerge(
                 storage_settings_ptr->max_replicated_merges_in_queue, merges_and_mutations_sum);
             UInt64 max_source_part_size_for_mutation = merger_mutator.getMaxSourcePartSizeForMutation();
 
             bool merge_with_ttl_allowed = merges_and_mutations_queued.merges_with_ttl < storage_settings_ptr->max_replicated_merges_with_ttl_in_queue &&
-                merge_list.getExecutingMergesWithTTLCount() < storage_settings_ptr->max_number_of_merges_with_ttl_in_pool;
+                getTotalMergesWithTTLInMergeList() < storage_settings_ptr->max_number_of_merges_with_ttl_in_pool;
 
             FutureMergedMutatedPart future_merged_part;
             if (max_source_parts_size_for_merge > 0 &&
diff --git a/tests/integration/test_concurrent_ttl_merges/test.py b/tests/integration/test_concurrent_ttl_merges/test.py
index a82da2a1a8b..1ca303a6dcc 100644
--- a/tests/integration/test_concurrent_ttl_merges/test.py
+++ b/tests/integration/test_concurrent_ttl_merges/test.py
@@ -30,13 +30,6 @@ def count_ttl_merges_in_queue(node, table):
     return int(result.strip())
 
 
-def count_regular_merges_in_queue(node, table):
-    result = node.query("SELECT count() FROM system.replication_queue WHERE merge_type = 'REGULAR' and table = '{}'".format(table))
-    if not result:
-        return 0
-    return int(result.strip())
-
-
 def count_ttl_merges_in_background_pool(node, table):
     result = node.query("SELECT count() FROM system.merges WHERE merge_type = 'TTL_DELETE' and table = '{}'".format(table))
     if not result:
@@ -84,3 +77,84 @@ def test_no_ttl_merges_in_busy_pool(started_cluster):
         time.sleep(0.5)
 
     assert_eq_with_retry(node1, "SELECT COUNT() FROM test_ttl", "0")
+
+
+def test_limited_ttl_merges_in_empty_pool(started_cluster):
+    node1.query("CREATE TABLE test_ttl_v2 (d DateTime, key UInt64, data UInt64) ENGINE = MergeTree() ORDER BY tuple() PARTITION BY key TTL d + INTERVAL 1 MONTH SETTINGS merge_with_ttl_timeout = 0")
+
+    node1.query("SYSTEM STOP TTL MERGES")
+
+    for i in range(100):
+        node1.query("INSERT INTO test_ttl_v2 SELECT now() - INTERVAL 1 MONTH, {}, number FROM numbers(1)".format(i))
+
+    assert node1.query("SELECT COUNT() FROM test_ttl_v2") == "100\n"
+
+    node1.query("SYSTEM START TTL MERGES")
+
+    merges_with_ttl_count = set({})
+    while True:
+        merges_with_ttl_count.add(count_ttl_merges_in_background_pool(node1, "test_ttl_v2"))
+        time.sleep(0.01)
+        if node1.query("SELECT COUNT() FROM test_ttl_v2") == "0\n":
+            break
+
+    assert max(merges_with_ttl_count) <= 2
+
+
+def test_limited_ttl_merges_in_empty_pool_replicated(started_cluster):
+    node1.query("CREATE TABLE replicated_ttl (d DateTime, key UInt64, data UInt64) ENGINE = ReplicatedMergeTree('/test/t', '1') ORDER BY tuple() PARTITION BY key TTL d + INTERVAL 1 MONTH SETTINGS merge_with_ttl_timeout = 0")
+
+    node1.query("SYSTEM STOP TTL MERGES")
+
+    for i in range(100):
+        node1.query("INSERT INTO replicated_ttl SELECT now() - INTERVAL 1 MONTH, {}, number FROM numbers(1)".format(i))
+
+    assert node1.query("SELECT COUNT() FROM replicated_ttl") == "100\n"
+
+    node1.query("SYSTEM START TTL MERGES")
+
+    merges_with_ttl_count = set({})
+    entries_with_ttl_count = set({})
+    while True:
+        merges_with_ttl_count.add(count_ttl_merges_in_background_pool(node1, "replicated_ttl"))
+        entries_with_ttl_count.add(count_ttl_merges_in_queue(node1, "replicated_ttl"))
+        time.sleep(0.01)
+        if node1.query("SELECT COUNT() FROM replicated_ttl") == "0\n":
+            break
+
+    assert max(merges_with_ttl_count) <= 2
+    assert max(entries_with_ttl_count) <= 1
+
+
+def test_limited_ttl_merges_two_replicas(started_cluster):
+    # Actually this test quite fast and often we cannot catch any merges.
+    # To check for sure just add some sleeps in mergePartsToTemporaryPart
+    node1.query("CREATE TABLE replicated_ttl_2 (d DateTime, key UInt64, data UInt64) ENGINE = ReplicatedMergeTree('/test/t2', '1') ORDER BY tuple() PARTITION BY key TTL d + INTERVAL 1 MONTH SETTINGS merge_with_ttl_timeout = 0")
+    node2.query("CREATE TABLE replicated_ttl_2 (d DateTime, key UInt64, data UInt64) ENGINE = ReplicatedMergeTree('/test/t2', '2') ORDER BY tuple() PARTITION BY key TTL d + INTERVAL 1 MONTH SETTINGS merge_with_ttl_timeout = 0")
+
+    node1.query("SYSTEM STOP TTL MERGES")
+    node2.query("SYSTEM STOP TTL MERGES")
+
+    for i in range(100):
+        node1.query("INSERT INTO replicated_ttl_2 SELECT now() - INTERVAL 1 MONTH, {}, number FROM numbers(10000)".format(i))
+
+    node2.query("SYSTEM SYNC REPLICA replicated_ttl_2", timeout=10)
+    assert node1.query("SELECT COUNT() FROM replicated_ttl_2") == "1000000\n"
+    assert node2.query("SELECT COUNT() FROM replicated_ttl_2") == "1000000\n"
+
+    node1.query("SYSTEM START TTL MERGES")
+    node2.query("SYSTEM START TTL MERGES")
+
+    merges_with_ttl_count_node1 = set({})
+    merges_with_ttl_count_node2 = set({})
+    while True:
+        merges_with_ttl_count_node1.add(count_ttl_merges_in_background_pool(node1, "replicated_ttl_2"))
+        merges_with_ttl_count_node2.add(count_ttl_merges_in_background_pool(node2, "replicated_ttl_2"))
+        if node1.query("SELECT COUNT() FROM replicated_ttl_2") == "0\n" and node2.query("SELECT COUNT() FROM replicated_ttl_2") == "0\n":
+            break
+
+    # Both replicas can assign merges with TTL. If one will perform better than
+    # the other slow replica may have several merges in queue, so we don't
+    # check them
+    assert max(merges_with_ttl_count_node1) <= 2
+    assert max(merges_with_ttl_count_node2) <= 2

From 69b31ab90dab9916c5b0c3200a3bf49168368f1b Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 4 Sep 2020 13:29:55 +0300
Subject: [PATCH 047/298] More comments

---
 src/Storages/MergeTree/MergeType.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Storages/MergeTree/MergeType.h b/src/Storages/MergeTree/MergeType.h
index 27efe1a8539..7ade9ddddd5 100644
--- a/src/Storages/MergeTree/MergeType.h
+++ b/src/Storages/MergeTree/MergeType.h
@@ -13,7 +13,9 @@ namespace DB
 /// Order is important, don't try to change it.
 enum class MergeType
 {
+    /// Just regular merge
     REGULAR = 1,
+    /// Merge assigned to delete some data from parts (with TTLMergeSelector)
     TTL_DELETE = 2,
 };
 

From 6f5ba4d8e51e75f4e6ab0c39dbf6b18f12daa58e Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 4 Sep 2020 13:31:29 +0300
Subject: [PATCH 048/298] Fix ya.make

---
 src/Storages/ya.make | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Storages/ya.make b/src/Storages/ya.make
index 3054d5b9cc7..894085e8b91 100644
--- a/src/Storages/ya.make
+++ b/src/Storages/ya.make
@@ -84,6 +84,7 @@ SRCS(
     MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp
     MergeTree/MergeTreeWhereOptimizer.cpp
     MergeTree/MergeTreeWriteAheadLog.cpp
+    MergeTree/MergeType.cpp
     MergeTree/registerStorageMergeTree.cpp
     MergeTree/ReplicatedMergeTreeAddress.cpp
     MergeTree/ReplicatedMergeTreeAltersSequence.cpp

From e42d0f60da6c228ac7b896fd3504dc7e500d68b4 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 4 Sep 2020 14:27:27 +0300
Subject: [PATCH 049/298] Fix several bugs

---
 src/Storages/MergeTree/MergeList.h                     | 5 +++--
 src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp | 8 +++++---
 src/Storages/StorageReplicatedMergeTree.cpp            | 2 +-
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/Storages/MergeTree/MergeList.h b/src/Storages/MergeTree/MergeList.h
index 5af71b88341..d0b01913058 100644
--- a/src/Storages/MergeTree/MergeList.h
+++ b/src/Storages/MergeTree/MergeList.h
@@ -142,7 +142,8 @@ public:
     {
         std::lock_guard lock{mutex};
         auto entry = std::make_unique<Entry>(*this, merges.emplace(merges.end(), std::forward<Args>(args)...));
-        merges_with_ttl_counter += (*entry)->merge_type == MergeType::TTL_DELETE;
+        if (isTTLMergeType((*entry)->merge_type))
+            ++merges_with_ttl_counter;
         return entry;
     }
 
@@ -179,7 +180,7 @@ inline MergeListEntry::~MergeListEntry()
     std::lock_guard lock{list.mutex};
 
     if (it->merge_type == MergeType::TTL_DELETE)
-        list.merges_with_ttl_counter--;
+        --list.merges_with_ttl_counter;
 
     list.merges.erase(it);
 }
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp
index d95ae6b729d..b79717fc54c 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp
@@ -154,15 +154,17 @@ void ReplicatedMergeTreeLogEntryData::readText(ReadBuffer & in)
         if (format_version >= 4)
         {
             in >> "\ndeduplicate: " >> deduplicate;
+
+            /// Trying to be more backward compatible
             in >> "\n";
-            if (in.eof())
-                trailing_newline_found = true;
-            else if (checkString("merge_type: ", in))
+            if (checkString("merge_type: ", in))
             {
                 UInt64 value;
                 in >> value;
                 merge_type = checkAndGetMergeType(value);
             }
+            else
+                trailing_newline_found = true;
         }
     }
     else if (type_str == "drop" || type_str == "detach")
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 13438821ba6..ad845537139 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -3620,7 +3620,7 @@ bool StorageReplicatedMergeTree::optimize(
                 if (!partition)
                 {
                     selected = merger_mutator.selectPartsToMerge(
-                        future_merged_part, true, storage_settings_ptr->max_bytes_to_merge_at_max_space_in_pool, can_merge, true, &disable_reason);
+                        future_merged_part, true, storage_settings_ptr->max_bytes_to_merge_at_max_space_in_pool, can_merge, false, &disable_reason);
                 }
                 else
                 {

From 68913eab62ea2ac13a2021ce9548b9b3e987f922 Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Fri, 4 Sep 2020 15:48:55 +0300
Subject: [PATCH 050/298] better reading from compact parts with differents
 codecs

---
 .../CachedCompressedReadBuffer.cpp            | 19 +----
 src/Compression/CachedCompressedReadBuffer.h  |  6 +-
 src/Compression/CompressedReadBufferBase.cpp  | 46 +++++++---
 src/Compression/CompressedReadBufferBase.h    |  5 +-
 .../CompressedReadBufferFromFile.cpp          | 12 +--
 .../CompressedReadBufferFromFile.h            |  6 +-
 .../MergeTree/MergeTreeReaderCompact.cpp      | 84 +++++++++----------
 .../MergeTree/MergeTreeReaderCompact.h        | 20 +----
 .../01375_compact_parts_codecs.reference      |  3 +
 .../01375_compact_parts_codecs.sql            |  6 ++
 10 files changed, 104 insertions(+), 103 deletions(-)

diff --git a/src/Compression/CachedCompressedReadBuffer.cpp b/src/Compression/CachedCompressedReadBuffer.cpp
index 218925f8eae..3fb45ab0948 100644
--- a/src/Compression/CachedCompressedReadBuffer.cpp
+++ b/src/Compression/CachedCompressedReadBuffer.cpp
@@ -12,7 +12,6 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int SEEK_POSITION_OUT_OF_BOUND;
-    extern const int LOGICAL_ERROR;
 }
 
 
@@ -20,9 +19,8 @@ void CachedCompressedReadBuffer::initInput()
 {
     if (!file_in)
     {
-        file_in_holder = file_in_creator();
-        file_in = file_in_holder.get();
-        compressed_in = file_in;
+        file_in = file_in_creator();
+        compressed_in = file_in.get();
 
         if (profile_callback)
             file_in->setProfileCallback(profile_callback, clock_type);
@@ -74,19 +72,10 @@ bool CachedCompressedReadBuffer::nextImpl()
 }
 
 CachedCompressedReadBuffer::CachedCompressedReadBuffer(
-    const std::string & path_, ReadBufferFromFileBase * file_in_, UncompressedCache * cache_)
-    : ReadBuffer(nullptr, 0), file_in(file_in_), cache(cache_), path(path_), file_pos(0)
-{
-    if (file_in == nullptr)
-        throw Exception("Neither file_in nor file_in_creator is initialized in CachedCompressedReadBuffer", ErrorCodes::LOGICAL_ERROR);
-
-    compressed_in = file_in;
-}
-
-CachedCompressedReadBuffer::CachedCompressedReadBuffer(
-    const std::string & path_, std::function<std::unique_ptr<ReadBufferFromFileBase>()> file_in_creator_, UncompressedCache * cache_)
+    const std::string & path_, std::function<std::unique_ptr<ReadBufferFromFileBase>()> file_in_creator_, UncompressedCache * cache_, bool allow_different_codecs_)
     : ReadBuffer(nullptr, 0), file_in_creator(std::move(file_in_creator_)), cache(cache_), path(path_), file_pos(0)
 {
+    allow_different_codecs = allow_different_codecs_;
 }
 
 void CachedCompressedReadBuffer::seek(size_t offset_in_compressed_file, size_t offset_in_decompressed_block)
diff --git a/src/Compression/CachedCompressedReadBuffer.h b/src/Compression/CachedCompressedReadBuffer.h
index 5debdc006cc..c2338f6f841 100644
--- a/src/Compression/CachedCompressedReadBuffer.h
+++ b/src/Compression/CachedCompressedReadBuffer.h
@@ -21,9 +21,8 @@ class CachedCompressedReadBuffer : public CompressedReadBufferBase, public ReadB
 {
 private:
     std::function<std::unique_ptr<ReadBufferFromFileBase>()> file_in_creator;
-    std::unique_ptr<ReadBufferFromFileBase> file_in_holder;
-    ReadBufferFromFileBase * file_in = nullptr;
     UncompressedCache * cache;
+    std::unique_ptr<ReadBufferFromFileBase> file_in;
 
     const std::string path;
     size_t file_pos;
@@ -39,8 +38,7 @@ private:
     clockid_t clock_type {};
 
 public:
-    CachedCompressedReadBuffer(const std::string & path_, ReadBufferFromFileBase * file_in_, UncompressedCache * cache_);
-    CachedCompressedReadBuffer(const std::string & path, std::function<std::unique_ptr<ReadBufferFromFileBase>()> file_in_creator, UncompressedCache * cache_);
+    CachedCompressedReadBuffer(const std::string & path, std::function<std::unique_ptr<ReadBufferFromFileBase>()> file_in_creator, UncompressedCache * cache_, bool allow_different_codecs_ = false);
 
     void seek(size_t offset_in_compressed_file, size_t offset_in_decompressed_block);
 
diff --git a/src/Compression/CompressedReadBufferBase.cpp b/src/Compression/CompressedReadBufferBase.cpp
index a05b5cd7f64..be2f697e1b3 100644
--- a/src/Compression/CompressedReadBufferBase.cpp
+++ b/src/Compression/CompressedReadBufferBase.cpp
@@ -105,13 +105,24 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed,
     uint8_t method = ICompressionCodec::readMethod(own_compressed_buffer.data());
 
     if (!codec)
+    {
         codec = CompressionCodecFactory::instance().get(method);
+    }
     else if (method != codec->getMethodByte())
-        throw Exception("Data compressed with different methods, given method byte 0x"
-                        + getHexUIntLowercase(method)
-                        + ", previous method byte 0x"
-                        + getHexUIntLowercase(codec->getMethodByte()),
-                        ErrorCodes::CANNOT_DECOMPRESS);
+    {
+        if (allow_different_codecs)
+        {
+            codec = CompressionCodecFactory::instance().get(method);
+        }
+        else
+        {
+            throw Exception("Data compressed with different methods, given method byte 0x"
+                            + getHexUIntLowercase(method)
+                            + ", previous method byte 0x"
+                            + getHexUIntLowercase(codec->getMethodByte()),
+                            ErrorCodes::CANNOT_DECOMPRESS);
+        }
+    }
 
     size_compressed_without_checksum = ICompressionCodec::readCompressedBlockSize(own_compressed_buffer.data());
     size_decompressed = ICompressionCodec::readDecompressedBlockSize(own_compressed_buffer.data());
@@ -163,21 +174,32 @@ void CompressedReadBufferBase::decompress(char * to, size_t size_decompressed, s
     uint8_t method = ICompressionCodec::readMethod(compressed_buffer);
 
     if (!codec)
+    {
         codec = CompressionCodecFactory::instance().get(method);
+    }
     else if (codec->getMethodByte() != method)
-        throw Exception("Data compressed with different methods, given method byte "
-                        + getHexUIntLowercase(method)
-                        + ", previous method byte "
-                        + getHexUIntLowercase(codec->getMethodByte()),
-                        ErrorCodes::CANNOT_DECOMPRESS);
+    {
+        if (allow_different_codecs)
+        {
+            codec = CompressionCodecFactory::instance().get(method);
+        }
+        else
+        {
+            throw Exception("Data compressed with different methods, given method byte "
+                            + getHexUIntLowercase(method)
+                            + ", previous method byte "
+                            + getHexUIntLowercase(codec->getMethodByte()),
+                            ErrorCodes::CANNOT_DECOMPRESS);
+        }
+    }
 
     codec->decompress(compressed_buffer, size_compressed_without_checksum, to);
 }
 
 
 /// 'compressed_in' could be initialized lazily, but before first call of 'readCompressedData'.
-CompressedReadBufferBase::CompressedReadBufferBase(ReadBuffer * in)
-    : compressed_in(in), own_compressed_buffer(0)
+CompressedReadBufferBase::CompressedReadBufferBase(ReadBuffer * in, bool allow_different_codecs_)
+    : compressed_in(in), own_compressed_buffer(0), allow_different_codecs(allow_different_codecs_)
 {
 }
 
diff --git a/src/Compression/CompressedReadBufferBase.h b/src/Compression/CompressedReadBufferBase.h
index f44140dcd04..71dc5274d5b 100644
--- a/src/Compression/CompressedReadBufferBase.h
+++ b/src/Compression/CompressedReadBufferBase.h
@@ -26,6 +26,9 @@ protected:
     /// Don't checksum on decompressing.
     bool disable_checksum = false;
 
+    /// Allow reading data, compressed by different codecs from one file.
+    bool allow_different_codecs;
+
     /// Read compressed data into compressed_buffer. Get size of decompressed data from block header. Checksum if need.
     /// Returns number of compressed bytes read.
     size_t readCompressedData(size_t & size_decompressed, size_t & size_compressed_without_checksum);
@@ -34,7 +37,7 @@ protected:
 
 public:
     /// 'compressed_in' could be initialized lazily, but before first call of 'readCompressedData'.
-    CompressedReadBufferBase(ReadBuffer * in = nullptr);
+    CompressedReadBufferBase(ReadBuffer * in = nullptr, bool allow_different_codecs_ = false);
     ~CompressedReadBufferBase();
 
     /** Disable checksums.
diff --git a/src/Compression/CompressedReadBufferFromFile.cpp b/src/Compression/CompressedReadBufferFromFile.cpp
index 852194bb81e..f3fa2d6bc10 100644
--- a/src/Compression/CompressedReadBufferFromFile.cpp
+++ b/src/Compression/CompressedReadBufferFromFile.cpp
@@ -36,26 +36,22 @@ bool CompressedReadBufferFromFile::nextImpl()
     return true;
 }
 
-CompressedReadBufferFromFile::CompressedReadBufferFromFile(ReadBufferFromFileBase & file_in_)
-    : BufferWithOwnMemory<ReadBuffer>(0), file_in(file_in_)
-{
-    compressed_in = &file_in;
-}
-
-CompressedReadBufferFromFile::CompressedReadBufferFromFile(std::unique_ptr<ReadBufferFromFileBase> buf)
+CompressedReadBufferFromFile::CompressedReadBufferFromFile(std::unique_ptr<ReadBufferFromFileBase> buf, bool allow_different_codecs_)
     : BufferWithOwnMemory<ReadBuffer>(0), p_file_in(std::move(buf)), file_in(*p_file_in)
 {
     compressed_in = &file_in;
+    allow_different_codecs = allow_different_codecs_;
 }
 
 
 CompressedReadBufferFromFile::CompressedReadBufferFromFile(
-    const std::string & path, size_t estimated_size, size_t aio_threshold, size_t mmap_threshold, size_t buf_size)
+    const std::string & path, size_t estimated_size, size_t aio_threshold, size_t mmap_threshold, size_t buf_size, bool allow_different_codecs_)
     : BufferWithOwnMemory<ReadBuffer>(0)
     , p_file_in(createReadBufferFromFileBase(path, estimated_size, aio_threshold, mmap_threshold, buf_size))
     , file_in(*p_file_in)
 {
     compressed_in = &file_in;
+    allow_different_codecs = allow_different_codecs_;
 }
 
 
diff --git a/src/Compression/CompressedReadBufferFromFile.h b/src/Compression/CompressedReadBufferFromFile.h
index 1de28062e41..166b2595ef9 100644
--- a/src/Compression/CompressedReadBufferFromFile.h
+++ b/src/Compression/CompressedReadBufferFromFile.h
@@ -28,11 +28,11 @@ private:
     bool nextImpl() override;
 
 public:
-    CompressedReadBufferFromFile(ReadBufferFromFileBase & buf);
-    CompressedReadBufferFromFile(std::unique_ptr<ReadBufferFromFileBase> buf);
+    CompressedReadBufferFromFile(std::unique_ptr<ReadBufferFromFileBase> buf, bool allow_different_codecs_ = false);
 
     CompressedReadBufferFromFile(
-        const std::string & path, size_t estimated_size, size_t aio_threshold, size_t mmap_threshold, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);
+        const std::string & path, size_t estimated_size, size_t aio_threshold, size_t mmap_threshold,
+        size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, bool allow_different_codecs_ = false);
 
     void seek(size_t offset_in_compressed_file, size_t offset_in_decompressed_block);
 
diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
index 93dc8372565..87b3f0a4329 100644
--- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
@@ -73,31 +73,41 @@ MergeTreeReaderCompact::MergeTreeReaderCompact(
             buffer_size = settings.max_read_buffer_size;
 
         const String full_data_path = data_part->getFullRelativePath() + MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION;
-        file_in = data_part->volume->getDisk()->readFile(
-                    full_data_path, buffer_size, 0,
-                    settings.min_bytes_to_use_direct_io,
-                    settings.min_bytes_to_use_mmap_io);
-
-        auto full_path = fullPath(data_part->volume->getDisk(), full_data_path);
-        for (const auto & column : columns)
+        if (uncompressed_cache)
         {
-            std::unique_ptr<CachedCompressedReadBuffer> cached_buffer;
-            std::unique_ptr<CompressedReadBufferFromFile> non_cached_buffer;
-            if (uncompressed_cache)
-            {
-                cached_buffer = std::make_unique<CachedCompressedReadBuffer>(full_path, file_in.get(), uncompressed_cache);
-                if (profile_callback_)
-                    cached_buffer->setProfileCallback(profile_callback_, clock_type_);
-            }
-            else
-            {
-                non_cached_buffer = std::make_unique<CompressedReadBufferFromFile>(*file_in);
-                if (profile_callback_)
-                    non_cached_buffer->setProfileCallback(profile_callback_, clock_type_);
-            }
+            auto buffer = std::make_unique<CachedCompressedReadBuffer>(
+                fullPath(data_part->volume->getDisk(), full_data_path),
+                [this, full_data_path, buffer_size]()
+                {
+                    return data_part->volume->getDisk()->readFile(
+                        full_data_path,
+                        buffer_size,
+                        0,
+                        settings.min_bytes_to_use_direct_io,
+                        settings.min_bytes_to_use_mmap_io);
+                },
+                uncompressed_cache,
+                /* allow_different_codecs = */ true);
 
-            auto column_from_part = getColumnFromPart(column);
-            column_streams[column_from_part.name] = ColumnStream{std::move(cached_buffer), std::move(non_cached_buffer)};
+            if (profile_callback_)
+                buffer->setProfileCallback(profile_callback_, clock_type_);
+
+            cached_buffer = std::move(buffer);
+            data_buffer = cached_buffer.get();
+        }
+        else
+        {
+            auto buffer =
+                std::make_unique<CompressedReadBufferFromFile>(
+                    data_part->volume->getDisk()->readFile(
+                        full_data_path, buffer_size, 0, settings.min_bytes_to_use_direct_io, settings.min_bytes_to_use_mmap_io),
+                    /* allow_different_codecs = */ true);
+
+            if (profile_callback_)
+                buffer->setProfileCallback(profile_callback_, clock_type_);
+
+            non_cached_buffer = std::move(buffer);
+            data_buffer = non_cached_buffer.get();
         }
     }
     catch (...)
@@ -192,16 +202,15 @@ void MergeTreeReaderCompact::readData(
     const String & name, IColumn & column, const IDataType & type,
     size_t from_mark, size_t column_position, size_t rows_to_read, bool only_offsets)
 {
-    auto & stream = column_streams[name];
     if (!isContinuousReading(from_mark, column_position))
-        seekToMark(stream, from_mark, column_position);
+        seekToMark(from_mark, column_position);
 
     auto buffer_getter = [&](const IDataType::SubstreamPath & substream_path) -> ReadBuffer *
     {
         if (only_offsets && (substream_path.size() != 1 || substream_path[0].type != IDataType::Substream::ArraySizes))
             return nullptr;
 
-        return stream.data_buffer;
+        return data_buffer;
     };
 
     IDataType::DeserializeBinaryBulkSettings deserialize_settings;
@@ -221,15 +230,15 @@ void MergeTreeReaderCompact::readData(
 }
 
 
-void MergeTreeReaderCompact::seekToMark(ColumnStream & stream, size_t row_index, size_t column_index)
+void MergeTreeReaderCompact::seekToMark(size_t row_index, size_t column_index)
 {
     MarkInCompressedFile mark = marks_loader.getMark(row_index, column_index);
     try
     {
-        if (stream.cached_buffer)
-            stream.cached_buffer->seek(mark.offset_in_compressed_file, mark.offset_in_decompressed_block);
-        if (stream.non_cached_buffer)
-            stream.non_cached_buffer->seek(mark.offset_in_compressed_file, mark.offset_in_decompressed_block);
+        if (cached_buffer)
+            cached_buffer->seek(mark.offset_in_compressed_file, mark.offset_in_decompressed_block);
+        if (non_cached_buffer)
+            non_cached_buffer->seek(mark.offset_in_compressed_file, mark.offset_in_decompressed_block);
     }
     catch (Exception & e)
     {
@@ -241,7 +250,6 @@ void MergeTreeReaderCompact::seekToMark(ColumnStream & stream, size_t row_index,
     }
 }
 
-
 bool MergeTreeReaderCompact::isContinuousReading(size_t mark, size_t column_position)
 {
     if (!last_read_granule)
@@ -251,18 +259,6 @@ bool MergeTreeReaderCompact::isContinuousReading(size_t mark, size_t column_posi
         || (mark == last_mark + 1 && column_position == 0 && last_column == data_part->getColumns().size() - 1);
 }
 
-MergeTreeReaderCompact::ColumnStream::ColumnStream(
-    std::unique_ptr<CachedCompressedReadBuffer> cached_buffer_,
-    std::unique_ptr<CompressedReadBufferFromFile> non_cached_buffer_)
-    : cached_buffer(std::move(cached_buffer_))
-    , non_cached_buffer(std::move(non_cached_buffer_))
-{
-    if (cached_buffer)
-        data_buffer = cached_buffer.get();
-    else
-        data_buffer = non_cached_buffer.get();
-}
-
 namespace
 {
 
diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.h b/src/Storages/MergeTree/MergeTreeReaderCompact.h
index ec765adbf0e..9ef88716579 100644
--- a/src/Storages/MergeTree/MergeTreeReaderCompact.h
+++ b/src/Storages/MergeTree/MergeTreeReaderCompact.h
@@ -39,21 +39,9 @@ public:
 private:
     bool isContinuousReading(size_t mark, size_t column_position);
 
-    std::unique_ptr<ReadBufferFromFileBase> file_in;
-
-    struct ColumnStream
-    {
-        std::unique_ptr<CachedCompressedReadBuffer> cached_buffer;
-        std::unique_ptr<CompressedReadBufferFromFile> non_cached_buffer;
-        ReadBuffer * data_buffer;
-
-        ColumnStream() = default;
-        ColumnStream(
-            std::unique_ptr<CachedCompressedReadBuffer> cached_buffer_,
-            std::unique_ptr<CompressedReadBufferFromFile> non_cached_buffer_);
-    };
-
-    std::unordered_map<String, ColumnStream> column_streams;
+    ReadBuffer * data_buffer;
+    std::unique_ptr<CachedCompressedReadBuffer> cached_buffer;
+    std::unique_ptr<CompressedReadBufferFromFile> non_cached_buffer;
 
     MergeTreeMarksLoader marks_loader;
 
@@ -66,7 +54,7 @@ private:
     size_t next_mark = 0;
     std::optional<std::pair<size_t, size_t>> last_read_granule;
 
-    void seekToMark(ColumnStream & stream, size_t row_index, size_t column_index);
+    void seekToMark(size_t row_index, size_t column_index);
 
     void readData(const String & name, IColumn & column, const IDataType & type,
         size_t from_mark, size_t column_position, size_t rows_to_read, bool only_offsets = false);
diff --git a/tests/queries/0_stateless/01375_compact_parts_codecs.reference b/tests/queries/0_stateless/01375_compact_parts_codecs.reference
index 982c45a26e3..24b3e22d9a6 100644
--- a/tests/queries/0_stateless/01375_compact_parts_codecs.reference
+++ b/tests/queries/0_stateless/01375_compact_parts_codecs.reference
@@ -1,3 +1,6 @@
 12000	11890
+499500	499500	999
 11965	11890
+499500	499500	999
 5858	11890
+499500	499500	999
diff --git a/tests/queries/0_stateless/01375_compact_parts_codecs.sql b/tests/queries/0_stateless/01375_compact_parts_codecs.sql
index 467745c6fa2..4b285f5bcc1 100644
--- a/tests/queries/0_stateless/01375_compact_parts_codecs.sql
+++ b/tests/queries/0_stateless/01375_compact_parts_codecs.sql
@@ -8,6 +8,8 @@ SELECT sum(data_compressed_bytes), sum(data_uncompressed_bytes)
     FROM system.parts 
     WHERE table = 'codecs' AND database = currentDatabase();
 
+SELECT sum(id), sum(val), max(s) FROM codecs;
+
 DROP TABLE codecs;
 
 CREATE TABLE codecs (id UInt32 CODEC(NONE), val UInt32 CODEC(NONE), s String CODEC(NONE)) 
@@ -18,6 +20,8 @@ SELECT sum(data_compressed_bytes), sum(data_uncompressed_bytes)
     FROM system.parts 
     WHERE table = 'codecs' AND database = currentDatabase();
 
+SELECT sum(id), sum(val), max(s) FROM codecs;
+
 DROP TABLE codecs;
 
 CREATE TABLE codecs (id UInt32, val UInt32 CODEC(Delta, ZSTD), s String CODEC(ZSTD)) 
@@ -28,4 +32,6 @@ SELECT sum(data_compressed_bytes), sum(data_uncompressed_bytes)
     FROM system.parts 
     WHERE table = 'codecs' AND database = currentDatabase();
 
+SELECT sum(id), sum(val), max(s) FROM codecs;
+
 DROP TABLE codecs;

From ea7168580bfa630a2b985a1aee15161d1d1a56c3 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 4 Sep 2020 16:55:07 +0300
Subject: [PATCH 051/298] Fixes

---
 src/Storages/MergeTree/MergeList.h          |  2 +-
 src/Storages/MergeTree/MergeTreeSettings.h  |  1 +
 src/Storages/MergeTree/MergeType.cpp        |  2 ++
 src/Storages/StorageReplicatedMergeTree.cpp | 10 ++++++++++
 4 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeList.h b/src/Storages/MergeTree/MergeList.h
index d0b01913058..4d080ff3569 100644
--- a/src/Storages/MergeTree/MergeList.h
+++ b/src/Storages/MergeTree/MergeList.h
@@ -179,7 +179,7 @@ inline MergeListEntry::~MergeListEntry()
 {
     std::lock_guard lock{list.mutex};
 
-    if (it->merge_type == MergeType::TTL_DELETE)
+    if (isTTLMergeType(it->merge_type))
         --list.merges_with_ttl_counter;
 
     list.merges.erase(it);
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 692e07d9884..c4d8e7bd11f 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -54,6 +54,7 @@ struct Settings;
     M(UInt64, min_replicated_logs_to_keep, 10, "Keep about this number of last records in ZooKeeper log, even if they are obsolete. It doesn't affect work of tables: used only to diagnose ZooKeeper log before cleaning.", 0) \
     M(Seconds, prefer_fetch_merged_part_time_threshold, 3600, "If time passed after replication log entry creation exceeds this threshold and sum size of parts is greater than \"prefer_fetch_merged_part_size_threshold\", prefer fetching merged part from replica instead of doing merge locally. To speed up very long merges.", 0) \
     M(UInt64, prefer_fetch_merged_part_size_threshold, 10ULL * 1024 * 1024 * 1024, "If sum size of parts exceeds this threshold and time passed after replication log entry creation is greater than \"prefer_fetch_merged_part_time_threshold\", prefer fetching merged part from replica instead of doing merge locally. To speed up very long merges.", 0) \
+    M(Seconds, try_fetch_recompressed_part_timeout, 7200, "Recompression works slow in most cases, so we don't start merge with recompression until this timeout and trying to fetch recompressed part from replica which assigned this merge with recompression.", 0) \
     M(Bool, always_fetch_merged_part, 0, "If true, replica never merge parts and always download merged parts from other replicas.", 0) \
     M(UInt64, max_suspicious_broken_parts, 10, "Max broken parts, if more - deny automatic deletion.", 0) \
     M(UInt64, max_files_to_modify_in_alter_columns, 75, "Not apply ALTER if number of files for modification(deletion, addition) more than this.", 0) \
diff --git a/src/Storages/MergeTree/MergeType.cpp b/src/Storages/MergeTree/MergeType.cpp
index 7f1495b14b3..e622eb33e31 100644
--- a/src/Storages/MergeTree/MergeType.cpp
+++ b/src/Storages/MergeTree/MergeType.cpp
@@ -15,6 +15,8 @@ MergeType checkAndGetMergeType(UInt64 merge_type)
         return MergeType::REGULAR;
     else if (merge_type == static_cast<UInt64>(MergeType::TTL_DELETE))
         return MergeType::TTL_DELETE;
+    else if (merge_type == static_cast<UInt64>(MergeType::TTL_RECOMPRESS))
+        return MergeType::TTL_RECOMPRESS;
 
     throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown MergeType {}", static_cast<UInt64>(merge_type));
 }
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index be03d541ac8..24ffa2bc410 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -1308,6 +1308,16 @@ bool StorageReplicatedMergeTree::tryExecuteMerge(const LogEntry & entry)
         return false;
     }
 
+    if (entry.merge_type == MergeType::TTL_RECOMPRESS &&
+        (time(nullptr) - entry.create_time) <= storage_settings_ptr->try_fetch_recompressed_part_timeout.totalSeconds() &&
+        entry.source_replica != replica_name)
+    {
+        LOG_INFO(log, "Will try to fetch part {} until '{}' because this part assigned to recompression merge. "
+            "Source replica {} will try to merge this part first", entry.new_part_name,
+            LocalDateTime(entry.create_time + storage_settings_ptr->try_fetch_recompressed_part_timeout.totalSeconds()), entry.source_replica);
+        return false;
+    }
+
     DataPartsVector parts;
     bool have_all_parts = true;
     for (const String & name : entry.source_parts)

From fecb2f13115a1776e5fb9b1cd0f1c3f91e2c5ca5 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 4 Sep 2020 17:08:43 +0300
Subject: [PATCH 052/298] Less copypaste

---
 src/DataStreams/TTLBlockInputStream.cpp       | 34 +++++--------------
 src/DataStreams/TTLBlockInputStream.h         |  3 ++
 src/Storages/MergeTree/MergeTreeData.cpp      |  2 --
 .../MergeTree/MergeTreeDataMergerMutator.cpp  | 14 ++------
 .../MergeTree/MergeTreeDataMergerMutator.h    |  4 ++-
 src/Storages/MergeTree/MergeTreeSettings.h    |  3 +-
 6 files changed, 19 insertions(+), 41 deletions(-)

diff --git a/src/DataStreams/TTLBlockInputStream.cpp b/src/DataStreams/TTLBlockInputStream.cpp
index e1586286678..85d9c7fead2 100644
--- a/src/DataStreams/TTLBlockInputStream.cpp
+++ b/src/DataStreams/TTLBlockInputStream.cpp
@@ -370,13 +370,12 @@ void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block)
         block.erase(column);
 }
 
-void TTLBlockInputStream::updateMovesTTL(Block & block)
+void TTLBlockInputStream::updateTTLWithDescriptions(Block & block, const TTLDescriptions & descriptions, TTLInfoMap & ttl_info_map)
 {
     std::vector<String> columns_to_remove;
-    for (const auto & ttl_entry : metadata_snapshot->getMoveTTLs())
+    for (const auto & ttl_entry : descriptions)
     {
-        auto & new_ttl_info = new_ttl_infos.moves_ttl[ttl_entry.result_column];
-
+        auto & new_ttl_info = ttl_info_map[ttl_entry.result_column];
         if (!block.has(ttl_entry.result_column))
         {
             columns_to_remove.push_back(ttl_entry.result_column);
@@ -396,31 +395,14 @@ void TTLBlockInputStream::updateMovesTTL(Block & block)
         block.erase(column);
 }
 
+void TTLBlockInputStream::updateMovesTTL(Block & block)
+{
+    updateTTLWithDescriptions(block, metadata_snapshot->getMoveTTLs(), new_ttl_infos.moves_ttl);
+}
 
 void TTLBlockInputStream::updateRecompressionTTL(Block & block)
 {
-    std::vector<String> columns_to_remove;
-    for (const auto & ttl_entry : metadata_snapshot->getRecompressionTTLs())
-    {
-        auto & new_ttl_info = new_ttl_infos.recompression_ttl[ttl_entry.result_column];
-
-        if (!block.has(ttl_entry.result_column))
-        {
-            columns_to_remove.push_back(ttl_entry.result_column);
-            ttl_entry.expression->execute(block);
-        }
-
-        const IColumn * ttl_column = block.getByName(ttl_entry.result_column).column.get();
-
-        for (size_t i = 0; i < block.rows(); ++i)
-        {
-            UInt32 cur_ttl = getTimestampByIndex(ttl_column, i);
-            new_ttl_info.update(cur_ttl);
-        }
-    }
-
-    for (const String & column : columns_to_remove)
-        block.erase(column);
+    updateTTLWithDescriptions(block, metadata_snapshot->getRecompressionTTLs(), new_ttl_infos.recompression_ttl);
 }
 
 UInt32 TTLBlockInputStream::getTimestampByIndex(const IColumn * column, size_t ind)
diff --git a/src/DataStreams/TTLBlockInputStream.h b/src/DataStreams/TTLBlockInputStream.h
index 18670021ec9..774b413ed1b 100644
--- a/src/DataStreams/TTLBlockInputStream.h
+++ b/src/DataStreams/TTLBlockInputStream.h
@@ -4,6 +4,7 @@
 #include <Storages/MergeTree/IMergeTreeDataPart.h>
 #include <Core/Block.h>
 #include <Interpreters/Aggregator.h>
+#include <Storages/MergeTree/MergeTreeDataPartTTLInfo.h>
 
 #include <common/DateLUT.h>
 
@@ -75,6 +76,8 @@ private:
     /// Finalize agg_result into result_columns
     void finalizeAggregates(MutableColumns & result_columns);
 
+    void updateTTLWithDescriptions(Block & block, const TTLDescriptions & descriptions, TTLInfoMap & ttl_info_map);
+
     /// Updates TTL for moves
     void updateMovesTTL(Block & block);
 
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index e876ecb8b34..8ba00f29d9d 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -3039,8 +3039,6 @@ CompressionCodecPtr MergeTreeData::getCompressionCodecForPart(size_t part_size_c
     auto metadata_snapshot = getInMemoryMetadataPtr();
 
     const auto & recompression_ttl_entries = metadata_snapshot->getRecompressionTTLs();
-    LOG_DEBUG(log, "RECOMPRESSION ENTRIES SIZE {}", recompression_ttl_entries.size());
-    LOG_DEBUG(log, "TTL INFOS SIZE {}", ttl_infos.recompression_ttl.size());
     auto best_ttl_entry = selectTTLEntryForTTLInfos(recompression_ttl_entries, ttl_infos.recompression_ttl, current_time, false);
 
     if (best_ttl_entry)
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index 9c104d042d9..ddad80e1b76 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -225,7 +225,6 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
         return false;
     }
 
-    //LOG_DEBUG(log, "SELECTING PARTS TO MERGE");
     time_t current_time = std::time(nullptr);
 
     IMergeSelector::PartsRanges parts_ranges;
@@ -296,10 +295,8 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
 
     if (metadata_snapshot->hasAnyTTL() && merge_with_ttl_allowed && !ttl_merges_blocker.isCancelled())
     {
-
-        //LOG_DEBUG(log, "SELECTING WITH TTL");
         TTLDeleteMergeSelector delete_ttl_selector(
-                next_ttl_merge_times_by_partition,
+                next_delete_ttl_merge_times_by_partition,
                 current_time,
                 data_settings->merge_with_ttl_timeout,
                 data_settings->ttl_only_drop_parts);
@@ -309,20 +306,15 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
             future_part.merge_type = MergeType::TTL_DELETE;
         else if (metadata_snapshot->hasAnyRecompressionTTL())
         {
-
-            //LOG_DEBUG(log, "SELECTING WITH RECOMPRESSION");
             TTLRecompressMergeSelector recompress_ttl_selector(
-                    next_ttl_merge_times_by_partition,
+                    next_recompress_ttl_merge_times_by_partition,
                     current_time,
-                    data_settings->merge_with_ttl_timeout,
+                    data_settings->merge_with_recompression_ttl_timeout,
                     metadata_snapshot->getRecompressionTTLs());
 
             parts_to_merge = recompress_ttl_selector.select(parts_ranges, max_total_size_to_merge);
             if (!parts_to_merge.empty())
-            {
-                //LOG_DEBUG(log, "SELECTED PARTS: {}", parts_to_merge.size());
                 future_part.merge_type = MergeType::TTL_RECOMPRESS;
-            }
         }
     }
 
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
index 6ecfef728d2..5f6b9246d68 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
@@ -251,8 +251,10 @@ private:
     time_t disk_space_warning_time = 0;
 
     /// Stores the next TTL merge due time for each partition (used only by TTLMergeSelector)
-    ITTLMergeSelector::PartitionIdToTTLs next_ttl_merge_times_by_partition;
+    ITTLMergeSelector::PartitionIdToTTLs next_delete_ttl_merge_times_by_partition;
 
+    /// Stores the next TTL merge due time for each partition (used only by TTLMergeSelector)
+    ITTLMergeSelector::PartitionIdToTTLs next_recompress_ttl_merge_times_by_partition;
     /// Performing TTL merges independently for each partition guarantees that
     /// there is only a limited number of TTL merges and no partition stores data, that is too stale
 };
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index c4d8e7bd11f..5e1d85e54d5 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -86,7 +86,8 @@ struct Settings;
     M(UInt64, min_merge_bytes_to_use_direct_io, 10ULL * 1024 * 1024 * 1024, "Minimal amount of bytes to enable O_DIRECT in merge (0 - disabled).", 0) \
     M(UInt64, index_granularity_bytes, 10 * 1024 * 1024, "Approximate amount of bytes in single granule (0 - disabled).", 0) \
     M(UInt64, min_index_granularity_bytes, 1024, "Minimum amount of bytes in single granule.", 1024) \
-    M(Int64, merge_with_ttl_timeout, 0, "Minimal time in seconds, when merge with TTL can be repeated.", 0) \
+    M(Int64, merge_with_ttl_timeout, 3600 * 24, "Minimal time in seconds, when merge with delete TTL can be repeated.", 0) \
+    M(Int64, merge_with_recompression_ttl_timeout, 3600 * 24, "Minimal time in seconds, when merge with recompression TTL can be repeated.", 0) \
     M(Bool, ttl_only_drop_parts, false, "Only drop altogether the expired parts and not partially prune them.", 0) \
     M(Bool, write_final_mark, 1, "Write final mark after end of column (0 - disabled, do nothing if index_granularity_bytes=0)", 0) \
     M(Bool, enable_mixed_granularity_parts, 1, "Enable parts with adaptive and non adaptive granularity", 0) \

From 21de2f54f69d45007cd40fb42d28fb96193f14d5 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 4 Sep 2020 17:13:57 +0300
Subject: [PATCH 053/298] Fix tests

---
 tests/integration/test_recompression_ttl/test.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/integration/test_recompression_ttl/test.py b/tests/integration/test_recompression_ttl/test.py
index 5ea0f91d495..a581dd24e43 100644
--- a/tests/integration/test_recompression_ttl/test.py
+++ b/tests/integration/test_recompression_ttl/test.py
@@ -52,7 +52,7 @@ def wait_part_and_get_compression_codec(node, table, part_name, retries=40):
 
 
 def test_recompression_simple(started_cluster):
-    node1.query("CREATE TABLE table_for_recompression (d DateTime, key UInt64, data String) ENGINE MergeTree() ORDER BY tuple() TTL d + INTERVAL 10 SECOND RECOMPRESS CODEC(ZSTD(10)) SETTINGS merge_with_ttl_timeout = 0")
+    node1.query("CREATE TABLE table_for_recompression (d DateTime, key UInt64, data String) ENGINE MergeTree() ORDER BY tuple() TTL d + INTERVAL 10 SECOND RECOMPRESS CODEC(ZSTD(10)) SETTINGS merge_with_recompression_ttl_timeout = 0")
     node1.query("INSERT INTO table_for_recompression VALUES (now(), 1, '1')")
 
     assert node1.query("SELECT default_compression_codec FROM system.parts where name = 'all_1_1_0'") == "LZ4\n"
@@ -75,7 +75,7 @@ def test_recompression_multiple_ttls(started_cluster):
     node2.query("CREATE TABLE table_for_recompression (d DateTime, key UInt64, data String) ENGINE MergeTree() ORDER BY tuple() \
     TTL d + INTERVAL 5 SECOND RECOMPRESS CODEC(ZSTD(10)), \
     d + INTERVAL 10 SECOND RECOMPRESS CODEC(ZSTD(11)), \
-    d + INTERVAL 15 SECOND RECOMPRESS CODEC(ZSTD(12)) SETTINGS merge_with_ttl_timeout = 0")
+    d + INTERVAL 15 SECOND RECOMPRESS CODEC(ZSTD(12)) SETTINGS merge_with_recompression_ttl_timeout = 0")
 
     node2.query("INSERT INTO table_for_recompression VALUES (now(), 1, '1')")
 
@@ -111,7 +111,7 @@ def test_recompression_replicated(started_cluster):
     for i, node in enumerate([node1, node2]):
         node.query("CREATE TABLE recompression_replicated (d DateTime, key UInt64, data String) \
         ENGINE ReplicatedMergeTree('/test/rr', '{}') ORDER BY tuple() \
-        TTL d + INTERVAL 10 SECOND RECOMPRESS CODEC(ZSTD(13)) SETTINGS merge_with_ttl_timeout = 0".format(i + 1))
+        TTL d + INTERVAL 10 SECOND RECOMPRESS CODEC(ZSTD(13)) SETTINGS merge_with_recompression_ttl_timeout = 0".format(i + 1))
 
     node1.query("INSERT INTO recompression_replicated VALUES (now(), 1, '1')")
     node2.query("SYSTEM SYNC REPLICA recompression_replicated", timeout=5)

From f48d654d3515b2e8d9160c02ac038dbe08132fbd Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 4 Sep 2020 17:15:34 +0300
Subject: [PATCH 054/298] Less garbage

---
 src/Interpreters/MutationsInterpreter.cpp             | 8 --------
 src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp | 1 -
 2 files changed, 9 deletions(-)

diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index ef95b25eb98..9d35b339d94 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -533,16 +533,8 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run)
             /// Special step to recalculate affected indices and TTL expressions.
             stages.emplace_back(context);
             for (const auto & column : unchanged_columns)
-            {
-                //std::cerr << "ADDING UNCHANGED COLUMN TO STAGE:" << column << std::endl;
                 stages.back().column_to_updated.emplace(
                     column, std::make_shared<ASTIdentifier>(column));
-                //std::cerr << "OUTPUT COLUMNS:" << stages.back().output_columns.size() << std::endl;
-                //for (const auto & col : stages.back().output_columns)
-                //{
-                //    std::cerr << "OUTPUT COLUMN:" << col << std::endl;
-                //}
-            }
         }
     }
 
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index ddad80e1b76..977c6faace7 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -706,7 +706,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
     /// the order is reverse. This annoys TSan even though one lock is locked in shared mode and thus
     /// deadlock is impossible.
     auto compression_codec = data.getCompressionCodecForPart(merge_entry->total_size_bytes_compressed, new_data_part->ttl_infos, time_of_merge);
-    LOG_DEBUG(log, "CHOOSEN CODEC {} FOR PART {}", queryToString(compression_codec->getCodecDesc()), new_data_part->name);
 
     /// TODO: Should it go through IDisk interface?
     String rows_sources_file_path;

From c933f72adb8716316df8b37200fa0dd6f9e1a2e1 Mon Sep 17 00:00:00 2001
From: Pavel Kovalenko <jokserfn@yandex-team.ru>
Date: Fri, 4 Sep 2020 17:17:27 +0300
Subject: [PATCH 055/298] Disable S3 requests processing during context
 shutdown to speed up termination process.

---
 src/Disks/IDisk.h            | 3 +++
 src/Disks/S3/DiskS3.cpp      | 5 +++++
 src/Disks/S3/DiskS3.h        | 2 ++
 src/Interpreters/Context.cpp | 7 +++++++
 4 files changed, 17 insertions(+)

diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h
index 53dc4999dc4..17de6db3487 100644
--- a/src/Disks/IDisk.h
+++ b/src/Disks/IDisk.h
@@ -183,6 +183,9 @@ public:
     /// Return disk type - "local", "s3", etc.
     virtual const String getType() const = 0;
 
+    /// Invoked when Global Context is shutdown.
+    virtual void shutdown() { }
+
 private:
     /// Returns executor to perform asynchronous operations.
     Executor & getExecutor() { return *executor; }
diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp
index 5aa57518c83..cff7cc3429a 100644
--- a/src/Disks/S3/DiskS3.cpp
+++ b/src/Disks/S3/DiskS3.cpp
@@ -746,4 +746,9 @@ void DiskS3::setReadOnly(const String & path)
     Poco::File(metadata_path + path).setReadOnly(true);
 }
 
+void DiskS3::shutdown()
+{
+    client->DisableRequestProcessing();
+}
+
 }
diff --git a/src/Disks/S3/DiskS3.h b/src/Disks/S3/DiskS3.h
index 34f00af6439..db352feb063 100644
--- a/src/Disks/S3/DiskS3.h
+++ b/src/Disks/S3/DiskS3.h
@@ -102,6 +102,8 @@ public:
 
     const String getType() const override { return "s3"; }
 
+    void shutdown() override;
+
 private:
     bool tryReserve(UInt64 bytes);
 
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 1318f0331c4..3f65fdb8de0 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -1973,6 +1973,13 @@ void Context::reloadConfig() const
 
 void Context::shutdown()
 {
+    auto disks = getDisksMap();
+    for (auto & [disk_name, disk] : disks)
+    {
+        LOG_INFO(shared->log, "Shutdown disk {}", disk_name);
+        disk->shutdown();
+    }
+
     shared->shutdown();
 }
 

From 79ef72178eceb8e9ae85999226b55a805f8c46e5 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 4 Sep 2020 17:18:49 +0300
Subject: [PATCH 056/298] Less garbage

---
 src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index 977c6faace7..b88d80dc41f 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -1135,20 +1135,11 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
     bool need_remove_expired_values = false;
 
     if (in && shouldExecuteTTL(metadata_snapshot, in->getHeader().getNamesAndTypesList().getNames(), commands_for_part))
-    {
-        //std::cerr << "GOING TO MATERIALIZE TTL\n";
         need_remove_expired_values = true;
-    }
-    else
-    {
-        //std::cerr << "NOT GOING TO MATERIALIZE TTL\n";
-        //std::cerr << "IN IS NULL:" << (in == nullptr) << std::endl;
-    }
 
     /// All columns from part are changed and may be some more that were missing before in part
     if (!isWidePart(source_part) || (interpreter && interpreter->isAffectingAllColumns()))
     {
-        //std::cerr << "MUTATING ALL PART COLUMNS\n";
         /// Note: this is done before creating input streams, because otherwise data.data_parts_mutex
         /// (which is locked in data.getTotalActiveSizeInBytes())
         /// (which is locked in shared mode when input streams are created) and when inserting new data
@@ -1179,9 +1170,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
         NameToNameVector files_to_rename = collectFilesForRenames(source_part, for_file_renames, mrk_extension);
 
         if (need_remove_expired_values)
-        {
             files_to_skip.insert("ttl.txt");
-        }
         /// Create hardlinks for unchanged files
         for (auto it = disk->iterateDirectory(source_part->getFullRelativePath()); it->isValid(); it->next())
         {

From 2d1042614eb74a33d97df90ba0f6a198e58ec5e9 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 4 Sep 2020 17:19:41 +0300
Subject: [PATCH 057/298] Less comments

---
 src/Storages/TTLDescription.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp
index 8a212074027..7f55badf819 100644
--- a/src/Storages/TTLDescription.cpp
+++ b/src/Storages/TTLDescription.cpp
@@ -273,7 +273,6 @@ TTLDescription TTLDescription::getTTLFromAST(
         }
         else if (ttl_element->mode == TTLMode::RECOMPRESS)
         {
-            //std::cerr << "GOT INTO RECOMPRESS\n";
             result.recompression_codec =
                 CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(
                     ttl_element->recompression_codec, {}, !context.getSettingsRef().allow_suspicious_codecs);
@@ -337,7 +336,6 @@ TTLTableDescription TTLTableDescription::getTTLForTableFromAST(
         }
         else if (ttl.mode == TTLMode::RECOMPRESS)
         {
-            //std::cerr << "GOT RECOMPRESSIOn TTL\n";
             result.recompression_ttl.emplace_back(std::move(ttl));
         }
         else

From 37a2bd0bfdf189814e00dbf36dd37c135ee1d81a Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 4 Sep 2020 17:23:32 +0300
Subject: [PATCH 058/298] less debug

---
 src/Storages/MergeTree/TTLMergeSelector.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Storages/MergeTree/TTLMergeSelector.cpp b/src/Storages/MergeTree/TTLMergeSelector.cpp
index fccd0f28f84..bb7c001eae1 100644
--- a/src/Storages/MergeTree/TTLMergeSelector.cpp
+++ b/src/Storages/MergeTree/TTLMergeSelector.cpp
@@ -118,8 +118,6 @@ bool TTLRecompressMergeSelector::isTTLAlreadySatisfied(const IMergeSelector::Par
             return "";
         return queryToString(query);
     };
-    //LOG_DEBUG(&Poco::Logger::get("RECOMPRESS SELECTOR"), "PART CODEC: {}", ast_to_str(part.compression_codec_desc));
-    //LOG_DEBUG(&Poco::Logger::get("RECOMPRESS SELECTOR"), "ENTRY CODEC: {}", ast_to_str(ttl_description->recompression_codec));
 
     return ast_to_str(ttl_description->recompression_codec) == ast_to_str(part.compression_codec_desc);
 }

From c73bb980024a43bcfdbdd633d3fb00a9e3099258 Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Fri, 4 Sep 2020 18:07:17 +0300
Subject: [PATCH 059/298] fix clang-tidy

---
 src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp | 2 +-
 src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h   | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
index 22df2abecf3..ab064689f47 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
@@ -143,7 +143,7 @@ void MergeTreeDataPartWriterCompact::writeBlock(const Block & block)
 void MergeTreeDataPartWriterCompact::writeColumnSingleGranule(
     const ColumnWithTypeAndName & column,
     const CompressedStreamPtr & stream,
-    size_t from_row, size_t number_of_rows) const
+    size_t from_row, size_t number_of_rows)
 {
     IDataType::SerializeBinaryBulkStatePtr state;
     IDataType::SerializeBinaryBulkSettings serialize_settings;
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
index a121554f4be..fecf5ce40e8 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
@@ -28,7 +28,7 @@ protected:
 private:
     void writeBlock(const Block & block);
 
-    void addToChecksums(MergeTreeDataPartChecksums & checksumns);
+    void addToChecksums(MergeTreeDataPartChecksums & checksums);
 
     Block header;
 
@@ -74,11 +74,11 @@ private:
     HashingWriteBuffer marks;
 
     /// Write single granule of one column (rows between 2 marks)
-    void writeColumnSingleGranule(
+    static void writeColumnSingleGranule(
         const ColumnWithTypeAndName & column,
         const CompressedStreamPtr & stream,
         size_t from_row,
-        size_t number_of_rows) const;
+        size_t number_of_rows);
 };
 
 }

From dbeeb7d141e8c9fa8a6c208ba827cd86d1ad75cb Mon Sep 17 00:00:00 2001
From: yulu86 <xuyulu86@126.com>
Date: Sat, 5 Sep 2020 01:52:23 +0800
Subject: [PATCH 060/298] optimize chinese tutorial docs to make it more human
 readable

---
 docs/zh/getting-started/tutorial.md | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/docs/zh/getting-started/tutorial.md b/docs/zh/getting-started/tutorial.md
index 38d5a586806..a17adb2d163 100644
--- a/docs/zh/getting-started/tutorial.md
+++ b/docs/zh/getting-started/tutorial.md
@@ -1,6 +1,4 @@
 ---
-machine_translated: true
-machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
 toc_priority: 12
 toc_title: "\u6559\u7A0B"
 ---
@@ -9,25 +7,25 @@ toc_title: "\u6559\u7A0B"
 
 ## 从本教程中可以期待什么？ {#what-to-expect-from-this-tutorial}
 
-通过本教程，您将学习如何设置一个简单的ClickHouse集群。 它会很小，但容错和可扩展。 然后，我们将使用其中一个示例数据集来填充数据并执行一些演示查询。
+通过本教程，您将学习如何设置一个简单的ClickHouse集群。 它会很小，但却是容错和可扩展的。 然后，我们将使用其中一个示例数据集来填充数据并执行一些演示查询。
 
 ## 单节点设置 {#single-node-setup}
 
-为了推迟分布式环境的复杂性，我们将首先在单个服务器或虚拟机上部署ClickHouse。 ClickHouse通常是从安装 [黛布](install.md#install-from-deb-packages) 或 [rpm](install.md#from-rpm-packages) 包，但也有 [替代办法](install.md#from-docker-image) 对于不支持它们的操作系统。
+为了推迟分布式环境的复杂性，我们将首先在单个服务器或虚拟机上部署ClickHouse。 ClickHouse通常是从[deb](install.md#install-from-deb-packages) 或 [rpm](install.md#from-rpm-packages) 包安装，但对于不支持它们的操作系统也有 [替代方法](install.md#from-docker-image) 。
 
-例如，您选择了 `deb` 包和执行:
+例如，您选择了从 `deb` 包安装，执行:
 
 ``` bash
 {% include 'install/deb.sh' %}
 ```
 
-我们在安装的软件包中有什么:
+在我们安装的软件中包含这些包:
 
--   `clickhouse-client` 包包含 [ﾂ环板clientｮﾂ嘉ｯﾂ偲](../interfaces/cli.md) 应用程序，交互式ClickHouse控制台客户端。
--   `clickhouse-common` 包包含一个ClickHouse可执行文件。
--   `clickhouse-server` 包包含要作为服务器运行ClickHouse的配置文件。
+-   `clickhouse-client` 包，包含 [clickhouse-client](../interfaces/cli.md) 应用程序，它是交互式ClickHouse控制台客户端。
+-   `clickhouse-common` 包，包含一个ClickHouse可执行文件。
+-   `clickhouse-server` 包，包含要作为服务端运行的ClickHouse配置文件。
 
-服务器配置文件位于 `/etc/clickhouse-server/`. 在进一步讨论之前，请注意 `<path>` 元素in `config.xml`. Path确定数据存储的位置，因此应该位于磁盘容量较大的卷上；默认值为 `/var/lib/clickhouse/`. 如果你想调整配置，直接编辑并不方便 `config.xml` 文件，考虑到它可能会在未来的软件包更新中被重写。 复盖配置元素的推荐方法是创建 [在配置文件。d目录](../operations/configuration-files.md) 它作为 “patches” 要配置。xml
+服务端配置文件位于 `/etc/clickhouse-server/`。在进一步讨论之前，请注意 `config.xml`文件中的`<path>` 元素. Path决定了数据存储的位置，因此该位置应该位于磁盘容量较大的卷上；默认值为 `/var/lib/clickhouse/`。如果你想调整配置，直接编辑并不方便 `config.xml` 文件，考虑到它可能会在未来的软件包更新中被重写。 复盖配置元素的推荐方法是创建 [在配置文件。d目录](../operations/configuration-files.md) 它作为 “patches” 要配置。xml
 
 你可能已经注意到了, `clickhouse-server` 安装包后不会自动启动。 它也不会在更新后自动重新启动。 您启动服务器的方式取决于您的init系统，通常情况下，它是:
 

From 9d11d4dd449a5bf3507e090e32243657bec7d0d5 Mon Sep 17 00:00:00 2001
From: Pavel Kovalenko <jokserfn@yandex-team.ru>
Date: Fri, 4 Sep 2020 21:45:22 +0300
Subject: [PATCH 061/298] Variable inline.

---
 src/Interpreters/Context.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 3f65fdb8de0..ce6b622d9ef 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -1973,8 +1973,7 @@ void Context::reloadConfig() const
 
 void Context::shutdown()
 {
-    auto disks = getDisksMap();
-    for (auto & [disk_name, disk] : disks)
+    for (auto & [disk_name, disk] : getDisksMap())
     {
         LOG_INFO(shared->log, "Shutdown disk {}", disk_name);
         disk->shutdown();

From 439bcab6d66d28127feac8cffcde10b627bd5513 Mon Sep 17 00:00:00 2001
From: yulu86 <xuyulu86@126.com>
Date: Sat, 5 Sep 2020 10:36:35 +0800
Subject: [PATCH 062/298] update chinese tutorial to make it more human
 readable

---
 docs/zh/getting-started/tutorial.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/zh/getting-started/tutorial.md b/docs/zh/getting-started/tutorial.md
index a17adb2d163..43c7ed0ec59 100644
--- a/docs/zh/getting-started/tutorial.md
+++ b/docs/zh/getting-started/tutorial.md
@@ -25,9 +25,9 @@ toc_title: "\u6559\u7A0B"
 -   `clickhouse-common` 包，包含一个ClickHouse可执行文件。
 -   `clickhouse-server` 包，包含要作为服务端运行的ClickHouse配置文件。
 
-服务端配置文件位于 `/etc/clickhouse-server/`。在进一步讨论之前，请注意 `config.xml`文件中的`<path>` 元素. Path决定了数据存储的位置，因此该位置应该位于磁盘容量较大的卷上；默认值为 `/var/lib/clickhouse/`。如果你想调整配置，直接编辑并不方便 `config.xml` 文件，考虑到它可能会在未来的软件包更新中被重写。 复盖配置元素的推荐方法是创建 [在配置文件。d目录](../operations/configuration-files.md) 它作为 “patches” 要配置。xml
+服务端配置文件位于 `/etc/clickhouse-server/`。 在进一步讨论之前，请注意 `config.xml`文件中的`<path>` 元素. Path决定了数据存储的位置，因此该位置应该位于磁盘容量较大的卷上；默认值为 `/var/lib/clickhouse/`。 如果你想调整配置，考虑到它可能会在未来的软件包更新中被重写，直接编辑`config.xml` 文件并不方便。 推荐的方法是在[配置文件](../operations/configuration-files.md)目录创建文件，作为config.xml文件的“补丁”，用以复写配置元素。
 
-你可能已经注意到了, `clickhouse-server` 安装包后不会自动启动。 它也不会在更新后自动重新启动。 您启动服务器的方式取决于您的init系统，通常情况下，它是:
+你可能已经注意到了, `clickhouse-server` 安装后不会自动启动。 它也不会在更新后自动重新启动。 您启动服务端的方式取决于您的初始系统，通常情况下是这样:
 
 ``` bash
 sudo service clickhouse-server start
@@ -39,13 +39,13 @@ sudo service clickhouse-server start
 sudo /etc/init.d/clickhouse-server start
 ```
 
-服务器日志的默认位置是 `/var/log/clickhouse-server/`. 服务器已准备好处理客户端连接一旦它记录 `Ready for connections` 消息
+服务端日志的默认位置是 `/var/log/clickhouse-server/`。当服务端在日志中记录 `Ready for connections` 消息，即表示服务端已准备好处理客户端连接。
 
-一旦 `clickhouse-server` 正在运行我们可以利用 `clickhouse-client` 连接到服务器并运行一些测试查询，如 `SELECT "Hello, world!";`.
+一旦 `clickhouse-server` 启动并运行，我们可以利用 `clickhouse-client` 连接到服务端，并运行一些测试查询，如 `SELECT "Hello, world!";`.
 
 <details markdown="1">
 
-<summary>Clickhouse-客户端的快速提示</summary>
+<summary>Clickhouse-client的快速提示</summary>
 
 交互模式:
 

From 7b95e56e8c902578f8fcebc5d9edeccce1eb35ee Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 6 Sep 2020 03:09:40 +0300
Subject: [PATCH 063/298] Advancements

---
 utils/git-to-clickhouse/git-to-clickhouse.cpp | 187 +++++++++++++-----
 1 file changed, 133 insertions(+), 54 deletions(-)

diff --git a/utils/git-to-clickhouse/git-to-clickhouse.cpp b/utils/git-to-clickhouse/git-to-clickhouse.cpp
index 314bba0d5b4..d6264a63978 100644
--- a/utils/git-to-clickhouse/git-to-clickhouse.cpp
+++ b/utils/git-to-clickhouse/git-to-clickhouse.cpp
@@ -1,6 +1,11 @@
 #include <cstdint>
 #include <string>
 #include <vector>
+#include <algorithm>
+#include <cctype>
+#include <unordered_set>
+
+#include <re2_st/re2.h>
 
 #include <boost/program_options.hpp>
 
@@ -16,7 +21,8 @@
 #include <IO/WriteBufferFromFileDescriptor.h>
 
 
-/** How to use:
+static constexpr auto documentation = R"(
+Prepare the database by executing the following queries:
 
 DROP DATABASE IF EXISTS git;
 CREATE DATABASE git;
@@ -44,11 +50,11 @@ CREATE TABLE git.file_changes
     change_type Enum('Add' = 1, 'Delete' = 2, 'Modify' = 3, 'Rename' = 4, 'Copy' = 5, 'Type' = 6),
     new_file_path LowCardinality(String),
     old_file_path LowCardinality(String),
-    lines_added UInt16,
-    lines_deleted UInt16,
-    hunks_added UInt16,
-    hunks_removed UInt16,
-    hunks_changed UInt16,
+    lines_added UInt32,
+    lines_deleted UInt32,
+    hunks_added UInt32,
+    hunks_removed UInt32,
+    hunks_changed UInt32,
 
     commit_hash String,
     author_name LowCardinality(String),
@@ -69,11 +75,11 @@ CREATE TABLE git.file_changes
 CREATE TABLE git.line_changes
 (
     sign Int8,
-    line_number_old UInt16,
-    line_number_new UInt16,
-    hunk_num UInt16,
-    hunk_start_line_number_old UInt16,
-    hunk_start_line_number_new UInt16,
+    line_number_old UInt32,
+    line_number_new UInt32,
+    hunk_num UInt32,
+    hunk_start_line_number_old UInt32,
+    hunk_start_line_number_new UInt32,
     hunk_context LowCardinality(String),
     line LowCardinality(String),
     indent UInt8,
@@ -82,11 +88,11 @@ CREATE TABLE git.line_changes
     file_change_type Enum('Add' = 1, 'Delete' = 2, 'Modify' = 3, 'Rename' = 4, 'Copy' = 5, 'Type' = 6),
     new_file_path LowCardinality(String),
     old_file_path LowCardinality(String),
-    file_lines_added UInt16,
-    file_lines_deleted UInt16,
-    file_hunks_added UInt16,
-    file_hunks_removed UInt16,
-    file_hunks_changed UInt16,
+    file_lines_added UInt32,
+    file_lines_deleted UInt32,
+    file_hunks_added UInt32,
+    file_hunks_removed UInt32,
+    file_hunks_changed UInt32,
 
     commit_hash String,
     author_name LowCardinality(String),
@@ -104,12 +110,15 @@ CREATE TABLE git.line_changes
     commit_hunks_changed UInt32
 ) ENGINE = MergeTree ORDER BY time;
 
+Insert the data with the following commands:
+
 clickhouse-client --query "INSERT INTO git.commits FORMAT TSV" < commits.tsv
 clickhouse-client --query "INSERT INTO git.file_changes FORMAT TSV" < file_changes.tsv
 clickhouse-client --query "INSERT INTO git.line_changes FORMAT TSV" < line_changes.tsv
 
-  */
+)";
 
+namespace po = boost::program_options;
 
 namespace DB
 {
@@ -141,11 +150,11 @@ void writeText(LineType type, WriteBuffer & out)
 struct LineChange
 {
     int8_t sign{}; /// 1 if added, -1 if deleted
-    uint16_t line_number_old{};
-    uint16_t line_number_new{};
-    uint16_t hunk_num{}; /// ordinal number of hunk in diff, starting with 0
-    uint16_t hunk_start_line_number_old{};
-    uint16_t hunk_start_line_number_new{};
+    uint32_t line_number_old{};
+    uint32_t line_number_new{};
+    uint32_t hunk_num{}; /// ordinal number of hunk in diff, starting with 0
+    uint32_t hunk_start_line_number_old{};
+    uint32_t hunk_start_line_number_new{};
     std::string hunk_context; /// The context (like a line with function name) as it is calculated by git
     std::string line; /// Line content without leading whitespaces
     uint8_t indent{}; /// The number of leading whitespaces or tabs * 4
@@ -251,11 +260,11 @@ struct FileChange
     FileChangeType change_type{};
     std::string new_file_path;
     std::string old_file_path;
-    uint16_t lines_added{};
-    uint16_t lines_deleted{};
-    uint16_t hunks_added{};
-    uint16_t hunks_removed{};
-    uint16_t hunks_changed{};
+    uint32_t lines_added{};
+    uint32_t lines_deleted{};
+    uint32_t hunks_added{};
+    uint32_t hunks_removed{};
+    uint32_t hunks_changed{};
 
     void writeTextWithoutNewline(WriteBuffer & out) const
     {
@@ -395,13 +404,38 @@ struct Result
 };
 
 
-void processCommit(std::string hash, Result & result)
+struct Options
+{
+    bool skip_commits_without_parents = true;
+    std::optional<re2_st::RE2> skip_paths;
+    std::unordered_set<std::string> skip_commits;
+    size_t diff_size_limit = 0;
+
+    Options(const po::variables_map & options)
+    {
+        skip_commits_without_parents = options["skip-commits-without-parents"].as<bool>();
+        if (options.count("skip-paths"))
+        {
+            skip_paths.emplace(options["skip-paths"].as<std::string>());
+        }
+        if (options.count("skip-commit"))
+        {
+            auto vec = options["skip-commit"].as<std::vector<std::string>>();
+            skip_commits.insert(vec.begin(), vec.end());
+        }
+        diff_size_limit = options["diff-size-limit"].as<size_t>();
+    }
+};
+
+
+void processCommit(
+    const Options & options, size_t commit_num, size_t total_commits, std::string hash, Result & result)
 {
     std::string command = fmt::format(
-        "git show --raw --pretty='format:%at%x09%aN%x09%aE%x0A%s%x00' --patch --unified=0 {}",
+        "git show --raw --pretty='format:%at%x09%aN%x09%aE%x09%P%x0A%s%x00' --patch --unified=0 {}",
         hash);
 
-    std::cerr << command << "\n";
+    //std::cerr << command << "\n";
 
     auto commit_info = ShellCommand::execute(command);
     auto & in = commit_info->out;
@@ -414,10 +448,23 @@ void processCommit(std::string hash, Result & result)
     readText(commit.author_name, in);
     assertChar('\t', in);
     readText(commit.author_email, in);
+    assertChar('\t', in);
+    std::string parent_hash;
+    readString(parent_hash, in);
     assertChar('\n', in);
     readNullTerminated(commit.message, in);
 
-    std::cerr << fmt::format("{}\t{}\n", toString(LocalDateTime(commit.time)), commit.message);
+    std::string message_to_print = commit.message;
+    std::replace_if(message_to_print.begin(), message_to_print.end(), [](char c){ return std::iscntrl(c); }, ' ');
+
+    fmt::print("{}%  {}  {}  {}\n",
+        commit_num * 100 / total_commits, toString(LocalDateTime(commit.time)), hash, message_to_print);
+
+    if (options.skip_commits_without_parents && commit_num != 0 && parent_hash.empty())
+    {
+        std::cerr << "Warning: skipping commit without parents\n";
+        return;
+    }
 
     if (!in.eof())
         assertChar('\n', in);
@@ -487,9 +534,12 @@ void processCommit(std::string hash, Result & result)
 
         assertChar('\n', in);
 
-        file_changes.emplace(
-            file_change.new_file_path,
-            FileChangeAndLineChanges{ file_change, {} });
+        if (!(options.skip_paths && re2_st::RE2::PartialMatch(file_change.new_file_path, *options.skip_paths)))
+        {
+            file_changes.emplace(
+                file_change.new_file_path,
+                FileChangeAndLineChanges{ file_change, {} });
+        }
     }
 
     if (!in.eof())
@@ -517,16 +567,14 @@ void processCommit(std::string hash, Result & result)
                 {
                     auto file_name = new_file_path.empty() ? old_file_path : new_file_path;
                     auto it = file_changes.find(file_name);
-                    if (file_changes.end() == it)
-                        std::cerr << fmt::format("Warning: skipping bad file name {}\n", file_name);
-                    else
+                    if (file_changes.end() != it)
                         file_change_and_line_changes = &it->second;
                 }
 
                 if (file_change_and_line_changes)
                 {
-                    uint16_t old_lines = 1;
-                    uint16_t new_lines = 1;
+                    uint32_t old_lines = 1;
+                    uint32_t new_lines = 1;
 
                     assertChar('-', in);
                     readText(line_change.hunk_start_line_number_old, in);
@@ -644,6 +692,9 @@ void processCommit(std::string hash, Result & result)
         }
     }
 
+    if (commit.lines_added + commit.lines_deleted > options.diff_size_limit)
+        return;
+
     /// Write the result
 
     /// commits table
@@ -684,14 +735,20 @@ void processCommit(std::string hash, Result & result)
 }
 
 
-void processLog()
+void processLog(const Options & options)
 {
     Result result;
 
-    std::string command = "git log --no-merges --pretty=%H";
-    std::cerr << command << "\n";
+    std::string command = "git log --reverse --no-merges --pretty=%H";
+    fmt::print("{}\n", command);
     auto git_log = ShellCommand::execute(command);
 
+    /// Collect hashes in memory. This is inefficient but allows to display beautiful progress.
+    /// The number of commits is in order of single millions for the largest repositories,
+    /// so don't care about potential waste of ~100 MB of memory.
+
+    std::vector<std::string> hashes;
+
     auto & in = git_log->out;
     while (!in.eof())
     {
@@ -699,33 +756,55 @@ void processLog()
         readString(hash, in);
         assertChar('\n', in);
 
-        std::cerr << fmt::format("Processing commit {}\n", hash);
-        processCommit(std::move(hash), result);
+        if (!options.skip_commits.count(hash))
+            hashes.emplace_back(std::move(hash));
+    }
+
+    size_t num_commits = hashes.size();
+    fmt::print("Total {} commits to process.\n", num_commits);
+
+    for (size_t i = 0; i < num_commits; ++i)
+    {
+        processCommit(options, i, num_commits, hashes[i], result);
     }
 }
 
 
 }
 
-int main(int /*argc*/, char ** /*argv*/)
+int main(int argc, char ** argv)
 try
 {
     using namespace DB;
 
-/*    boost::program_options::options_description desc("Allowed options");
-    desc.add_options()("help,h", "produce help message");
+    po::options_description desc("Allowed options");
+    desc.add_options()
+        ("help,h", "produce help message")
+        ("skip-commits-without-parents", po::value<bool>()->default_value(true),
+            "Skip commits without parents (except the initial commit)."
+            " These commits are usually erroneous but they can make sense in very rare cases.")
+        ("skip-paths", po::value<std::string>(),
+            "Skip paths that matches regular expression (re2 syntax).")
+        ("skip-commit", po::value<std::vector<std::string>>(),
+            "Skip commit with specified hash. The option can be specified multiple times.")
+        ("diff-size-limit", po::value<size_t>()->default_value(0),
+            "Skip commits whose diff size (number of added + removed lines) is larger than specified threshold")
+    ;
 
-    boost::program_options::variables_map options;
-    boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options);
+    po::variables_map options;
+    po::store(boost::program_options::parse_command_line(argc, argv, desc), options);
 
-    if (options.count("help") || argc != 2)
+    if (options.count("help"))
     {
-        std::cout << "Usage: " << argv[0] << std::endl;
-        std::cout << desc << std::endl;
+        std::cout << documentation << '\n'
+            << "Usage: " << argv[0] << '\n'
+            << desc << '\n'
+            << "\nExample:\n"
+            << "\n./git-to-clickhouse --diff-size-limit 100000 --skip-paths '^(contrib|docs?|website|libs/(libcityhash|liblz4|libdivide|libvectorclass|libdouble-conversion|libcpuid|libzstd|libfarmhash|libmetrohash|libpoco|libwidechar_width))/'\n";
         return 1;
-    }*/
+    }
 
-    processLog();
+    processLog(options);
     return 0;
 }
 catch (...)

From abe836a584aeaf71b0ba04b8c8cc670385519e94 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 6 Sep 2020 03:13:39 +0300
Subject: [PATCH 064/298] Remove emails as they are mostly useless

---
 utils/git-to-clickhouse/git-to-clickhouse.cpp | 20 ++++++-------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/utils/git-to-clickhouse/git-to-clickhouse.cpp b/utils/git-to-clickhouse/git-to-clickhouse.cpp
index d6264a63978..9203efb0043 100644
--- a/utils/git-to-clickhouse/git-to-clickhouse.cpp
+++ b/utils/git-to-clickhouse/git-to-clickhouse.cpp
@@ -30,8 +30,7 @@ CREATE DATABASE git;
 CREATE TABLE git.commits
 (
     hash String,
-    author_name LowCardinality(String),
-    author_email LowCardinality(String),
+    author LowCardinality(String),
     time DateTime,
     message String,
     files_added UInt32,
@@ -57,8 +56,7 @@ CREATE TABLE git.file_changes
     hunks_changed UInt32,
 
     commit_hash String,
-    author_name LowCardinality(String),
-    author_email LowCardinality(String),
+    author LowCardinality(String),
     time DateTime,
     commit_message String,
     commit_files_added UInt32,
@@ -95,8 +93,7 @@ CREATE TABLE git.line_changes
     file_hunks_changed UInt32,
 
     commit_hash String,
-    author_name LowCardinality(String),
-    author_email LowCardinality(String),
+    author LowCardinality(String),
     time DateTime,
     commit_message String,
     commit_files_added UInt32,
@@ -295,8 +292,7 @@ struct FileChangeAndLineChanges
 struct Commit
 {
     std::string hash;
-    std::string author_name;
-    std::string author_email;
+    std::string author;
     time_t time{};
     std::string message;
     uint32_t files_added{};
@@ -313,9 +309,7 @@ struct Commit
     {
         writeText(hash, out);
         writeChar('\t', out);
-        writeText(author_name, out);
-        writeChar('\t', out);
-        writeText(author_email, out);
+        writeText(author, out);
         writeChar('\t', out);
         writeText(time, out);
         writeChar('\t', out);
@@ -445,9 +439,7 @@ void processCommit(
 
     readText(commit.time, in);
     assertChar('\t', in);
-    readText(commit.author_name, in);
-    assertChar('\t', in);
-    readText(commit.author_email, in);
+    readText(commit.author, in);
     assertChar('\t', in);
     std::string parent_hash;
     readString(parent_hash, in);

From 09978decbdf40c95e7cd8855ad804a2ad31cc09d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 6 Sep 2020 03:47:00 +0300
Subject: [PATCH 065/298] Adjustments

---
 utils/git-to-clickhouse/git-to-clickhouse.cpp | 52 +++++++++++++------
 1 file changed, 35 insertions(+), 17 deletions(-)

diff --git a/utils/git-to-clickhouse/git-to-clickhouse.cpp b/utils/git-to-clickhouse/git-to-clickhouse.cpp
index 9203efb0043..a81bc6679a7 100644
--- a/utils/git-to-clickhouse/git-to-clickhouse.cpp
+++ b/utils/git-to-clickhouse/git-to-clickhouse.cpp
@@ -4,6 +4,7 @@
 #include <algorithm>
 #include <cctype>
 #include <unordered_set>
+#include <filesystem>
 
 #include <re2_st/re2.h>
 
@@ -47,8 +48,9 @@ CREATE TABLE git.commits
 CREATE TABLE git.file_changes
 (
     change_type Enum('Add' = 1, 'Delete' = 2, 'Modify' = 3, 'Rename' = 4, 'Copy' = 5, 'Type' = 6),
-    new_file_path LowCardinality(String),
-    old_file_path LowCardinality(String),
+    path LowCardinality(String),
+    old_path LowCardinality(String),
+    file_extension LowCardinality(String),
     lines_added UInt32,
     lines_deleted UInt32,
     hunks_added UInt32,
@@ -84,8 +86,9 @@ CREATE TABLE git.line_changes
     line_type Enum('Empty' = 0, 'Comment' = 1, 'Punct' = 2, 'Code' = 3),
 
     file_change_type Enum('Add' = 1, 'Delete' = 2, 'Modify' = 3, 'Rename' = 4, 'Copy' = 5, 'Type' = 6),
-    new_file_path LowCardinality(String),
-    old_file_path LowCardinality(String),
+    path LowCardinality(String),
+    old_path LowCardinality(String),
+    file_extension LowCardinality(String),
     file_lines_added UInt32,
     file_lines_deleted UInt32,
     file_hunks_added UInt32,
@@ -255,8 +258,9 @@ void writeText(FileChangeType type, WriteBuffer & out)
 struct FileChange
 {
     FileChangeType change_type{};
-    std::string new_file_path;
-    std::string old_file_path;
+    std::string path;
+    std::string old_path;
+    std::string file_extension;
     uint32_t lines_added{};
     uint32_t lines_deleted{};
     uint32_t hunks_added{};
@@ -267,9 +271,11 @@ struct FileChange
     {
         writeText(change_type, out);
         writeChar('\t', out);
-        writeText(new_file_path, out);
+        writeText(path, out);
         writeChar('\t', out);
-        writeText(old_file_path, out);
+        writeText(old_path, out);
+        writeChar('\t', out);
+        writeText(file_extension, out);
         writeChar('\t', out);
         writeText(lines_added, out);
         writeChar('\t', out);
@@ -422,11 +428,20 @@ struct Options
 };
 
 
+/// Rough snapshot of repository calculated by application of diffs. It's used to calculate blame info.
+struct File
+{
+    std::vector<LineChange> lines;
+};
+
+using Snapshot = std::map<std::string /* path */, File>;
+
+
 void processCommit(
-    const Options & options, size_t commit_num, size_t total_commits, std::string hash, Result & result)
+    const Options & options, size_t commit_num, size_t total_commits, std::string hash, Snapshot & /*snapshot*/, Result & result)
 {
     std::string command = fmt::format(
-        "git show --raw --pretty='format:%at%x09%aN%x09%aE%x09%P%x0A%s%x00' --patch --unified=0 {}",
+        "git show --raw --pretty='format:%at%x09%aN%x09%P%x0A%s%x00' --patch --unified=0 {}",
         hash);
 
     //std::cerr << command << "\n";
@@ -515,21 +530,23 @@ void processCommit(
 
         if (change_type == 'R' || change_type == 'C')
         {
-            readText(file_change.old_file_path, in);
+            readText(file_change.old_path, in);
             skipWhitespaceIfAny(in);
-            readText(file_change.new_file_path, in);
+            readText(file_change.path, in);
         }
         else
         {
-            readText(file_change.new_file_path, in);
+            readText(file_change.path, in);
         }
 
+        file_change.file_extension = std::filesystem::path(file_change.path).extension();
+
         assertChar('\n', in);
 
-        if (!(options.skip_paths && re2_st::RE2::PartialMatch(file_change.new_file_path, *options.skip_paths)))
+        if (!(options.skip_paths && re2_st::RE2::PartialMatch(file_change.path, *options.skip_paths)))
         {
             file_changes.emplace(
-                file_change.new_file_path,
+                file_change.path,
                 FileChangeAndLineChanges{ file_change, {} });
         }
     }
@@ -755,9 +772,10 @@ void processLog(const Options & options)
     size_t num_commits = hashes.size();
     fmt::print("Total {} commits to process.\n", num_commits);
 
+    Snapshot snapshot;
     for (size_t i = 0; i < num_commits; ++i)
     {
-        processCommit(options, i, num_commits, hashes[i], result);
+        processCommit(options, i, num_commits, hashes[i], snapshot, result);
     }
 }
 
@@ -792,7 +810,7 @@ try
             << "Usage: " << argv[0] << '\n'
             << desc << '\n'
             << "\nExample:\n"
-            << "\n./git-to-clickhouse --diff-size-limit 100000 --skip-paths '^(contrib|docs?|website|libs/(libcityhash|liblz4|libdivide|libvectorclass|libdouble-conversion|libcpuid|libzstd|libfarmhash|libmetrohash|libpoco|libwidechar_width))/'\n";
+            << "\n./git-to-clickhouse --diff-size-limit 100000 --skip-paths 'generated\\.cpp|^(contrib|docs?|website|libs/(libcityhash|liblz4|libdivide|libvectorclass|libdouble-conversion|libcpuid|libzstd|libfarmhash|libmetrohash|libpoco|libwidechar_width))/'\n";
         return 1;
     }
 

From d1f1326a1370abd5d837864d02851ef1b3b20745 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 6 Sep 2020 07:02:17 +0300
Subject: [PATCH 066/298] Concurrent processing + history

---
 utils/git-to-clickhouse/git-to-clickhouse.cpp | 411 ++++++++++++------
 1 file changed, 283 insertions(+), 128 deletions(-)

diff --git a/utils/git-to-clickhouse/git-to-clickhouse.cpp b/utils/git-to-clickhouse/git-to-clickhouse.cpp
index a81bc6679a7..6686c1ac480 100644
--- a/utils/git-to-clickhouse/git-to-clickhouse.cpp
+++ b/utils/git-to-clickhouse/git-to-clickhouse.cpp
@@ -4,6 +4,8 @@
 #include <algorithm>
 #include <cctype>
 #include <unordered_set>
+#include <list>
+#include <thread>
 #include <filesystem>
 
 #include <re2_st/re2.h>
@@ -80,11 +82,17 @@ CREATE TABLE git.line_changes
     hunk_num UInt32,
     hunk_start_line_number_old UInt32,
     hunk_start_line_number_new UInt32,
+    hunk_lines_added UInt32,
+    hunk_lines_deleted UInt32,
     hunk_context LowCardinality(String),
     line LowCardinality(String),
     indent UInt8,
     line_type Enum('Empty' = 0, 'Comment' = 1, 'Punct' = 2, 'Code' = 3),
 
+    prev_commit_hash String,
+    prev_author LowCardinality(String),
+    prev_time DateTime,
+
     file_change_type Enum('Add' = 1, 'Delete' = 2, 'Modify' = 3, 'Rename' = 4, 'Copy' = 5, 'Type' = 6),
     path LowCardinality(String),
     old_path LowCardinality(String),
@@ -128,6 +136,112 @@ namespace ErrorCodes
     extern const int INCORRECT_DATA;
 }
 
+
+struct Commit
+{
+    std::string hash;
+    std::string author;
+    LocalDateTime time{};
+    std::string message;
+    uint32_t files_added{};
+    uint32_t files_deleted{};
+    uint32_t files_renamed{};
+    uint32_t files_modified{};
+    uint32_t lines_added{};
+    uint32_t lines_deleted{};
+    uint32_t hunks_added{};
+    uint32_t hunks_removed{};
+    uint32_t hunks_changed{};
+
+    void writeTextWithoutNewline(WriteBuffer & out) const
+    {
+        writeText(hash, out);
+        writeChar('\t', out);
+        writeText(author, out);
+        writeChar('\t', out);
+        writeText(time, out);
+        writeChar('\t', out);
+        writeText(message, out);
+        writeChar('\t', out);
+        writeText(files_added, out);
+        writeChar('\t', out);
+        writeText(files_deleted, out);
+        writeChar('\t', out);
+        writeText(files_renamed, out);
+        writeChar('\t', out);
+        writeText(files_modified, out);
+        writeChar('\t', out);
+        writeText(lines_added, out);
+        writeChar('\t', out);
+        writeText(lines_deleted, out);
+        writeChar('\t', out);
+        writeText(hunks_added, out);
+        writeChar('\t', out);
+        writeText(hunks_removed, out);
+        writeChar('\t', out);
+        writeText(hunks_changed, out);
+    }
+};
+
+
+enum class FileChangeType
+{
+    Add,
+    Delete,
+    Modify,
+    Rename,
+    Copy,
+    Type,
+};
+
+void writeText(FileChangeType type, WriteBuffer & out)
+{
+    switch (type)
+    {
+        case FileChangeType::Add: writeString("Add", out); break;
+        case FileChangeType::Delete: writeString("Delete", out); break;
+        case FileChangeType::Modify: writeString("Modify", out); break;
+        case FileChangeType::Rename: writeString("Rename", out); break;
+        case FileChangeType::Copy: writeString("Copy", out); break;
+        case FileChangeType::Type: writeString("Type", out); break;
+    }
+}
+
+struct FileChange
+{
+    FileChangeType change_type{};
+    std::string path;
+    std::string old_path;
+    std::string file_extension;
+    uint32_t lines_added{};
+    uint32_t lines_deleted{};
+    uint32_t hunks_added{};
+    uint32_t hunks_removed{};
+    uint32_t hunks_changed{};
+
+    void writeTextWithoutNewline(WriteBuffer & out) const
+    {
+        writeText(change_type, out);
+        writeChar('\t', out);
+        writeText(path, out);
+        writeChar('\t', out);
+        writeText(old_path, out);
+        writeChar('\t', out);
+        writeText(file_extension, out);
+        writeChar('\t', out);
+        writeText(lines_added, out);
+        writeChar('\t', out);
+        writeText(lines_deleted, out);
+        writeChar('\t', out);
+        writeText(hunks_added, out);
+        writeChar('\t', out);
+        writeText(hunks_removed, out);
+        writeChar('\t', out);
+        writeText(hunks_changed, out);
+    }
+};
+
+
 enum class LineType
 {
     Empty,
@@ -155,10 +269,15 @@ struct LineChange
     uint32_t hunk_num{}; /// ordinal number of hunk in diff, starting with 0
     uint32_t hunk_start_line_number_old{};
     uint32_t hunk_start_line_number_new{};
+    uint32_t hunk_lines_added{};
+    uint32_t hunk_lines_deleted{};
     std::string hunk_context; /// The context (like a line with function name) as it is calculated by git
     std::string line; /// Line content without leading whitespaces
     uint8_t indent{}; /// The number of leading whitespaces or tabs * 4
     LineType line_type{};
+    std::string prev_commit_hash;
+    std::string prev_author;
+    LocalDateTime prev_time{};
 
     void setLineInfo(std::string full_line)
     {
@@ -220,6 +339,10 @@ struct LineChange
         writeChar('\t', out);
         writeText(hunk_start_line_number_new, out);
         writeChar('\t', out);
+        writeText(hunk_lines_added, out);
+        writeChar('\t', out);
+        writeText(hunk_lines_deleted, out);
+        writeChar('\t', out);
         writeText(hunk_context, out);
         writeChar('\t', out);
         writeText(line, out);
@@ -227,120 +350,17 @@ struct LineChange
         writeText(indent, out);
         writeChar('\t', out);
         writeText(line_type, out);
+        writeChar('\t', out);
+        writeText(prev_commit_hash, out);
+        writeChar('\t', out);
+        writeText(prev_author, out);
+        writeChar('\t', out);
+        writeText(prev_time, out);
     }
 };
 
 using LineChanges = std::vector<LineChange>;
 
-enum class FileChangeType
-{
-    Add,
-    Delete,
-    Modify,
-    Rename,
-    Copy,
-    Type,
-};
-
-void writeText(FileChangeType type, WriteBuffer & out)
-{
-    switch (type)
-    {
-        case FileChangeType::Add: writeString("Add", out); break;
-        case FileChangeType::Delete: writeString("Delete", out); break;
-        case FileChangeType::Modify: writeString("Modify", out); break;
-        case FileChangeType::Rename: writeString("Rename", out); break;
-        case FileChangeType::Copy: writeString("Copy", out); break;
-        case FileChangeType::Type: writeString("Type", out); break;
-    }
-}
-
-struct FileChange
-{
-    FileChangeType change_type{};
-    std::string path;
-    std::string old_path;
-    std::string file_extension;
-    uint32_t lines_added{};
-    uint32_t lines_deleted{};
-    uint32_t hunks_added{};
-    uint32_t hunks_removed{};
-    uint32_t hunks_changed{};
-
-    void writeTextWithoutNewline(WriteBuffer & out) const
-    {
-        writeText(change_type, out);
-        writeChar('\t', out);
-        writeText(path, out);
-        writeChar('\t', out);
-        writeText(old_path, out);
-        writeChar('\t', out);
-        writeText(file_extension, out);
-        writeChar('\t', out);
-        writeText(lines_added, out);
-        writeChar('\t', out);
-        writeText(lines_deleted, out);
-        writeChar('\t', out);
-        writeText(hunks_added, out);
-        writeChar('\t', out);
-        writeText(hunks_removed, out);
-        writeChar('\t', out);
-        writeText(hunks_changed, out);
-    }
-};
-
-struct FileChangeAndLineChanges
-{
-    FileChange file_change;
-    LineChanges line_changes;
-};
-
-struct Commit
-{
-    std::string hash;
-    std::string author;
-    time_t time{};
-    std::string message;
-    uint32_t files_added{};
-    uint32_t files_deleted{};
-    uint32_t files_renamed{};
-    uint32_t files_modified{};
-    uint32_t lines_added{};
-    uint32_t lines_deleted{};
-    uint32_t hunks_added{};
-    uint32_t hunks_removed{};
-    uint32_t hunks_changed{};
-
-    void writeTextWithoutNewline(WriteBuffer & out) const
-    {
-        writeText(hash, out);
-        writeChar('\t', out);
-        writeText(author, out);
-        writeChar('\t', out);
-        writeText(time, out);
-        writeChar('\t', out);
-        writeText(message, out);
-        writeChar('\t', out);
-        writeText(files_added, out);
-        writeChar('\t', out);
-        writeText(files_deleted, out);
-        writeChar('\t', out);
-        writeText(files_renamed, out);
-        writeChar('\t', out);
-        writeText(files_modified, out);
-        writeChar('\t', out);
-        writeText(lines_added, out);
-        writeChar('\t', out);
-        writeText(lines_deleted, out);
-        writeChar('\t', out);
-        writeText(hunks_added, out);
-        writeChar('\t', out);
-        writeText(hunks_removed, out);
-        writeChar('\t', out);
-        writeText(hunks_changed, out);
-    }
-};
-
 
 void skipUntilWhitespace(ReadBuffer & buf)
 {
@@ -407,13 +427,15 @@ struct Result
 struct Options
 {
     bool skip_commits_without_parents = true;
+    size_t threads = 1;
     std::optional<re2_st::RE2> skip_paths;
     std::unordered_set<std::string> skip_commits;
-    size_t diff_size_limit = 0;
+    std::optional<size_t> diff_size_limit;
 
     Options(const po::variables_map & options)
     {
         skip_commits_without_parents = options["skip-commits-without-parents"].as<bool>();
+        threads = options["threads"].as<size_t>();
         if (options.count("skip-paths"))
         {
             skip_paths.emplace(options["skip-paths"].as<std::string>());
@@ -423,36 +445,123 @@ struct Options
             auto vec = options["skip-commit"].as<std::vector<std::string>>();
             skip_commits.insert(vec.begin(), vec.end());
         }
-        diff_size_limit = options["diff-size-limit"].as<size_t>();
+        if (options.count("diff-size-limit"))
+        {
+            diff_size_limit = options["diff-size-limit"].as<size_t>();
+        }
     }
 };
 
 
 /// Rough snapshot of repository calculated by application of diffs. It's used to calculate blame info.
-struct File
+struct FileBlame
 {
-    std::vector<LineChange> lines;
+    using Lines = std::list<Commit>;
+    Lines lines;
+    Lines::iterator it;
+    size_t current_idx = 1;
+
+    FileBlame()
+    {
+        it = lines.begin();
+    }
+
+    FileBlame & operator=(const FileBlame & rhs)
+    {
+        lines = rhs.lines;
+        it = lines.begin();
+        current_idx = 1;
+        return *this;
+    }
+
+    FileBlame(const FileBlame & rhs)
+    {
+        *this = rhs;
+    }
+
+    void walk(uint32_t num)
+    {
+        if (current_idx < num)
+        {
+            while (current_idx < num && it != lines.end())
+            {
+                ++current_idx;
+                ++it;
+            }
+        }
+        else if (current_idx > num)
+        {
+            --current_idx;
+            --it;
+        }
+    }
+
+    const Commit * find(uint32_t num)
+    {
+        walk(num);
+
+        if (current_idx == num && it != lines.end())
+            return &*it;
+        return {};
+    }
+
+    void addLine(uint32_t num, Commit commit)
+    {
+        walk(num);
+
+        while (it == lines.end() && current_idx < num)
+        {
+            lines.emplace_back();
+            ++current_idx;
+        }
+        if (it == lines.end())
+        {
+            lines.emplace_back();
+            --it;
+        }
+
+        lines.insert(it, commit);
+    }
+
+    void removeLine(uint32_t num)
+    {
+        walk(num);
+
+        if (current_idx == num)
+            it = lines.erase(it);
+    }
 };
 
-using Snapshot = std::map<std::string /* path */, File>;
+using Snapshot = std::map<std::string /* path */, FileBlame>;
+
+struct FileChangeAndLineChanges
+{
+    FileChangeAndLineChanges(FileChange file_change_) : file_change(file_change_) {}
+
+    FileChange file_change;
+    LineChanges line_changes;
+
+    std::map<uint32_t, Commit> deleted_lines;
+};
 
 
 void processCommit(
-    const Options & options, size_t commit_num, size_t total_commits, std::string hash, Snapshot & /*snapshot*/, Result & result)
+    std::unique_ptr<ShellCommand> & commit_info,
+    const Options & options,
+    size_t commit_num,
+    size_t total_commits,
+    std::string hash,
+    Snapshot & snapshot,
+    Result & result)
 {
-    std::string command = fmt::format(
-        "git show --raw --pretty='format:%at%x09%aN%x09%P%x0A%s%x00' --patch --unified=0 {}",
-        hash);
-
-    //std::cerr << command << "\n";
-
-    auto commit_info = ShellCommand::execute(command);
     auto & in = commit_info->out;
 
     Commit commit;
     commit.hash = hash;
 
-    readText(commit.time, in);
+    time_t commit_time;
+    readText(commit_time, in);
+    commit.time = commit_time;
     assertChar('\t', in);
     readText(commit.author, in);
     assertChar('\t', in);
@@ -465,7 +574,7 @@ void processCommit(
     std::replace_if(message_to_print.begin(), message_to_print.end(), [](char c){ return std::iscntrl(c); }, ' ');
 
     fmt::print("{}%  {}  {}  {}\n",
-        commit_num * 100 / total_commits, toString(LocalDateTime(commit.time)), hash, message_to_print);
+        commit_num * 100 / total_commits, toString(commit.time), hash, message_to_print);
 
     if (options.skip_commits_without_parents && commit_num != 0 && parent_hash.empty())
     {
@@ -533,6 +642,8 @@ void processCommit(
             readText(file_change.old_path, in);
             skipWhitespaceIfAny(in);
             readText(file_change.path, in);
+
+            snapshot[file_change.path] = snapshot[file_change.old_path];
         }
         else
         {
@@ -547,7 +658,7 @@ void processCommit(
         {
             file_changes.emplace(
                 file_change.path,
-                FileChangeAndLineChanges{ file_change, {} });
+                FileChangeAndLineChanges(file_change));
         }
     }
 
@@ -601,6 +712,9 @@ void processCommit(
                     else
                         assertChar('\n', in);
 
+                    line_change.hunk_lines_added = new_lines;
+                    line_change.hunk_lines_deleted = old_lines;
+
                     ++line_change.hunk_num;
                     line_change.line_number_old = line_change.hunk_start_line_number_old;
                     line_change.line_number_new = line_change.hunk_start_line_number_new;
@@ -653,6 +767,16 @@ void processCommit(
                         readStringUntilNextLine(line_change.line, in);
                         line_change.setLineInfo(line_change.line);
 
+                        FileBlame & file_snapshot = snapshot[old_file_path];
+                        if (const Commit * prev_commit = file_snapshot.find(line_change.line_number_old))
+                        {
+                            line_change.prev_commit_hash = prev_commit->hash;
+                            line_change.prev_author = prev_commit->author;
+                            line_change.prev_time = prev_commit->time;
+                            file_change_and_line_changes->deleted_lines[line_change.line_number_old] = *prev_commit;
+                            file_snapshot.removeLine(line_change.line_number_old);
+                        }
+
                         file_change_and_line_changes->line_changes.push_back(line_change);
                         ++line_change.line_number_old;
                     }
@@ -689,6 +813,16 @@ void processCommit(
                         readStringUntilNextLine(line_change.line, in);
                         line_change.setLineInfo(line_change.line);
 
+                        FileBlame & file_snapshot = snapshot[new_file_path];
+                        if (file_change_and_line_changes->deleted_lines.count(line_change.line_number_new))
+                        {
+                            const auto & prev_commit = file_change_and_line_changes->deleted_lines[line_change.line_number_new];
+                            line_change.prev_commit_hash = prev_commit.hash;
+                            line_change.prev_author = prev_commit.author;
+                            line_change.prev_time = prev_commit.time;
+                        }
+                        file_snapshot.addLine(line_change.line_number_new, commit);
+
                         file_change_and_line_changes->line_changes.push_back(line_change);
                         ++line_change.line_number_new;
                     }
@@ -701,7 +835,7 @@ void processCommit(
         }
     }
 
-    if (commit.lines_added + commit.lines_deleted > options.diff_size_limit)
+    if (options.diff_size_limit && commit.lines_added + commit.lines_deleted > *options.diff_size_limit)
         return;
 
     /// Write the result
@@ -744,6 +878,16 @@ void processCommit(
 }
 
 
+auto gitShow(const std::string & hash)
+{
+    std::string command = fmt::format(
+        "git show --raw --pretty='format:%at%x09%aN%x09%P%x0A%s%x00' --patch --unified=0 {}",
+        hash);
+
+    return ShellCommand::execute(command);
+}
+
+
 void processLog(const Options & options)
 {
     Result result;
@@ -772,10 +916,19 @@ void processLog(const Options & options)
     size_t num_commits = hashes.size();
     fmt::print("Total {} commits to process.\n", num_commits);
 
+    /// Will run multiple processes in parallel
+    size_t num_threads = options.threads;
+
+    std::vector<std::unique_ptr<ShellCommand>> show_commands(num_threads);
+    for (size_t i = 0; i < num_commits && i < num_threads; ++i)
+        show_commands[i] = gitShow(hashes[i]);
+
     Snapshot snapshot;
     for (size_t i = 0; i < num_commits; ++i)
     {
-        processCommit(options, i, num_commits, hashes[i], snapshot, result);
+        processCommit(show_commands[i % num_threads], options, i, num_commits, hashes[i], snapshot, result);
+        if (i + num_threads < num_commits)
+            show_commands[i % num_threads] = gitShow(hashes[i + num_threads]);
     }
 }
 
@@ -797,8 +950,10 @@ try
             "Skip paths that matches regular expression (re2 syntax).")
         ("skip-commit", po::value<std::vector<std::string>>(),
             "Skip commit with specified hash. The option can be specified multiple times.")
-        ("diff-size-limit", po::value<size_t>()->default_value(0),
+        ("diff-size-limit", po::value<size_t>(),
             "Skip commits whose diff size (number of added + removed lines) is larger than specified threshold")
+        ("threads", po::value<size_t>()->default_value(std::thread::hardware_concurrency()),
+            "Number of threads to interact with git")
     ;
 
     po::variables_map options;

From 3f29453c02ef3d3716927d81258218516b183d7b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 6 Sep 2020 09:38:48 +0300
Subject: [PATCH 067/298] Roughly working blame

---
 utils/git-to-clickhouse/git-to-clickhouse.cpp | 179 ++++++++++++++----
 1 file changed, 137 insertions(+), 42 deletions(-)

diff --git a/utils/git-to-clickhouse/git-to-clickhouse.cpp b/utils/git-to-clickhouse/git-to-clickhouse.cpp
index 6686c1ac480..c1c27a82812 100644
--- a/utils/git-to-clickhouse/git-to-clickhouse.cpp
+++ b/utils/git-to-clickhouse/git-to-clickhouse.cpp
@@ -4,6 +4,7 @@
 #include <algorithm>
 #include <cctype>
 #include <unordered_set>
+#include <unordered_map>
 #include <list>
 #include <thread>
 #include <filesystem>
@@ -13,6 +14,7 @@
 #include <boost/program_options.hpp>
 
 #include <Common/Exception.h>
+#include <Common/SipHash.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/ShellCommand.h>
 #include <common/find_symbols.h>
@@ -427,19 +429,26 @@ struct Result
 struct Options
 {
     bool skip_commits_without_parents = true;
+    bool skip_commits_with_duplicate_diffs = true;
     size_t threads = 1;
     std::optional<re2_st::RE2> skip_paths;
+    std::optional<re2_st::RE2> skip_commits_with_messages;
     std::unordered_set<std::string> skip_commits;
     std::optional<size_t> diff_size_limit;
 
     Options(const po::variables_map & options)
     {
         skip_commits_without_parents = options["skip-commits-without-parents"].as<bool>();
+        skip_commits_with_duplicate_diffs = options["skip-commits-with-duplicate-diffs"].as<bool>();
         threads = options["threads"].as<size_t>();
         if (options.count("skip-paths"))
         {
             skip_paths.emplace(options["skip-paths"].as<std::string>());
         }
+        if (options.count("skip-commits-with-messages"))
+        {
+            skip_commits_with_messages.emplace(options["skip-commits-with-messages"].as<std::string>());
+        }
         if (options.count("skip-commit"))
         {
             auto vec = options["skip-commit"].as<std::vector<std::string>>();
@@ -481,15 +490,12 @@ struct FileBlame
 
     void walk(uint32_t num)
     {
-        if (current_idx < num)
+        while (current_idx < num && it != lines.end())
         {
-            while (current_idx < num && it != lines.end())
-            {
-                ++current_idx;
-                ++it;
-            }
+            ++current_idx;
+            ++it;
         }
-        else if (current_idx > num)
+        while (current_idx > num)
         {
             --current_idx;
             --it;
@@ -500,6 +506,8 @@ struct FileBlame
     {
         walk(num);
 
+//        std::cerr << "current_idx: " << current_idx << ", num: " << num << "\n";
+
         if (current_idx == num && it != lines.end())
             return &*it;
         return {};
@@ -514,20 +522,17 @@ struct FileBlame
             lines.emplace_back();
             ++current_idx;
         }
-        if (it == lines.end())
-        {
-            lines.emplace_back();
-            --it;
-        }
 
-        lines.insert(it, commit);
+        it = lines.insert(it, commit);
     }
 
     void removeLine(uint32_t num)
     {
+//        std::cerr << "Removing line " << num << ", current_idx: " << current_idx << "\n";
+
         walk(num);
 
-        if (current_idx == num)
+        if (current_idx == num && it != lines.end())
             it = lines.erase(it);
     }
 };
@@ -540,10 +545,10 @@ struct FileChangeAndLineChanges
 
     FileChange file_change;
     LineChanges line_changes;
-
-    std::map<uint32_t, Commit> deleted_lines;
 };
 
+using DiffHashes = std::unordered_set<UInt128>;
+
 
 void processCommit(
     std::unique_ptr<ShellCommand> & commit_info,
@@ -552,6 +557,7 @@ void processCommit(
     size_t total_commits,
     std::string hash,
     Snapshot & snapshot,
+    DiffHashes & diff_hashes,
     Result & result)
 {
     auto & in = commit_info->out;
@@ -570,6 +576,9 @@ void processCommit(
     assertChar('\n', in);
     readNullTerminated(commit.message, in);
 
+    if (options.skip_commits_with_messages && re2_st::RE2::PartialMatch(commit.message, *options.skip_commits_with_messages))
+        return;
+
     std::string message_to_print = commit.message;
     std::replace_if(message_to_print.begin(), message_to_print.end(), [](char c){ return std::iscntrl(c); }, ' ');
 
@@ -643,7 +652,10 @@ void processCommit(
             skipWhitespaceIfAny(in);
             readText(file_change.path, in);
 
-            snapshot[file_change.path] = snapshot[file_change.old_path];
+//            std::cerr << "Move from " << file_change.old_path << " to " << file_change.path << "\n";
+
+            if (file_change.path != file_change.old_path)
+                snapshot[file_change.path] = snapshot[file_change.old_path];
         }
         else
         {
@@ -706,6 +718,9 @@ void processCommit(
                     if (checkChar(',', in))
                         readText(new_lines, in);
 
+                    if (line_change.hunk_start_line_number_new == 0)
+                        line_change.hunk_start_line_number_new = 1;
+
                     assertString(" @@", in);
                     if (checkChar(' ', in))
                         readStringUntilNextLine(line_change.hunk_context, in);
@@ -767,16 +782,6 @@ void processCommit(
                         readStringUntilNextLine(line_change.line, in);
                         line_change.setLineInfo(line_change.line);
 
-                        FileBlame & file_snapshot = snapshot[old_file_path];
-                        if (const Commit * prev_commit = file_snapshot.find(line_change.line_number_old))
-                        {
-                            line_change.prev_commit_hash = prev_commit->hash;
-                            line_change.prev_author = prev_commit->author;
-                            line_change.prev_time = prev_commit->time;
-                            file_change_and_line_changes->deleted_lines[line_change.line_number_old] = *prev_commit;
-                            file_snapshot.removeLine(line_change.line_number_old);
-                        }
-
                         file_change_and_line_changes->line_changes.push_back(line_change);
                         ++line_change.line_number_old;
                     }
@@ -813,16 +818,6 @@ void processCommit(
                         readStringUntilNextLine(line_change.line, in);
                         line_change.setLineInfo(line_change.line);
 
-                        FileBlame & file_snapshot = snapshot[new_file_path];
-                        if (file_change_and_line_changes->deleted_lines.count(line_change.line_number_new))
-                        {
-                            const auto & prev_commit = file_change_and_line_changes->deleted_lines[line_change.line_number_new];
-                            line_change.prev_commit_hash = prev_commit.hash;
-                            line_change.prev_author = prev_commit.author;
-                            line_change.prev_time = prev_commit.time;
-                        }
-                        file_snapshot.addLine(line_change.line_number_new, commit);
-
                         file_change_and_line_changes->line_changes.push_back(line_change);
                         ++line_change.line_number_new;
                     }
@@ -838,6 +833,99 @@ void processCommit(
     if (options.diff_size_limit && commit.lines_added + commit.lines_deleted > *options.diff_size_limit)
         return;
 
+    /// Calculate hash of diff and skip duplicates
+    if (options.skip_commits_with_duplicate_diffs)
+    {
+        SipHash hasher;
+
+        for (auto & elem : file_changes)
+        {
+            hasher.update(elem.second.file_change.change_type);
+            hasher.update(elem.second.file_change.old_path.size());
+            hasher.update(elem.second.file_change.old_path);
+            hasher.update(elem.second.file_change.path.size());
+            hasher.update(elem.second.file_change.path);
+
+            hasher.update(elem.second.line_changes.size());
+            for (auto & line_change : elem.second.line_changes)
+            {
+                hasher.update(line_change.sign);
+                hasher.update(line_change.line_number_old);
+                hasher.update(line_change.line_number_new);
+                hasher.update(line_change.indent);
+                hasher.update(line_change.line.size());
+                hasher.update(line_change.line);
+            }
+        }
+
+        UInt128 hash_of_diff;
+        hasher.get128(hash_of_diff.low, hash_of_diff.high);
+
+        if (!diff_hashes.insert(hash_of_diff).second)
+            return;
+    }
+
+    /// Update snapshot and blame info
+
+    for (auto & elem : file_changes)
+    {
+//        std::cerr << elem.first << "\n";
+
+        FileBlame & file_snapshot = snapshot[elem.first];
+        std::unordered_map<uint32_t, Commit> deleted_lines;
+
+        /// Obtain blame info from previous state of the snapshot
+
+        for (auto & line_change : elem.second.line_changes)
+        {
+            if (line_change.sign == -1)
+            {
+                if (const Commit * prev_commit = file_snapshot.find(line_change.line_number_old);
+                    prev_commit && prev_commit->time <= commit.time)
+                {
+                    line_change.prev_commit_hash = prev_commit->hash;
+                    line_change.prev_author = prev_commit->author;
+                    line_change.prev_time = prev_commit->time;
+                    deleted_lines[line_change.line_number_old] = *prev_commit;
+                }
+                else
+                {
+                    // std::cerr << "Did not find line " << line_change.line_number_old << " from file " << elem.first << ": " << line_change.line << "\n";
+                }
+            }
+            else if (line_change.sign == 1)
+            {
+                uint32_t this_line_in_prev_commit = line_change.hunk_start_line_number_old
+                    + (line_change.line_number_new - line_change.hunk_start_line_number_new);
+
+                if (deleted_lines.count(this_line_in_prev_commit))
+                {
+                    const auto & prev_commit = deleted_lines[this_line_in_prev_commit];
+                    if (prev_commit.time <= commit.time)
+                    {
+                        line_change.prev_commit_hash = prev_commit.hash;
+                        line_change.prev_author = prev_commit.author;
+                        line_change.prev_time = prev_commit.time;
+                    }
+                }
+            }
+        }
+
+        /// Update the snapshot
+
+        for (const auto & line_change : elem.second.line_changes)
+        {
+            if (line_change.sign == -1)
+            {
+                file_snapshot.removeLine(line_change.line_number_new);
+            }
+            else if (line_change.sign == 1)
+            {
+                file_snapshot.addLine(line_change.line_number_new, commit);
+            }
+        }
+    }
+
     /// Write the result
 
     /// commits table
@@ -881,7 +969,7 @@ void processCommit(
 auto gitShow(const std::string & hash)
 {
     std::string command = fmt::format(
-        "git show --raw --pretty='format:%at%x09%aN%x09%P%x0A%s%x00' --patch --unified=0 {}",
+        "git show --raw --pretty='format:%ct%x09%aN%x09%P%x0A%s%x00' --patch --unified=0 {}",
         hash);
 
     return ShellCommand::execute(command);
@@ -924,9 +1012,11 @@ void processLog(const Options & options)
         show_commands[i] = gitShow(hashes[i]);
 
     Snapshot snapshot;
+    DiffHashes diff_hashes;
+
     for (size_t i = 0; i < num_commits; ++i)
     {
-        processCommit(show_commands[i % num_threads], options, i, num_commits, hashes[i], snapshot, result);
+        processCommit(show_commands[i % num_threads], options, i, num_commits, hashes[i], snapshot, diff_hashes, result);
         if (i + num_threads < num_commits)
             show_commands[i % num_threads] = gitShow(hashes[i + num_threads]);
     }
@@ -946,10 +1036,15 @@ try
         ("skip-commits-without-parents", po::value<bool>()->default_value(true),
             "Skip commits without parents (except the initial commit)."
             " These commits are usually erroneous but they can make sense in very rare cases.")
-        ("skip-paths", po::value<std::string>(),
-            "Skip paths that matches regular expression (re2 syntax).")
+        ("skip-commits-with-duplicate-diffs", po::value<bool>()->default_value(true),
+            "Skip commits with duplicate diffs."
+            " These commits are usually results of cherry-pick or merge after rebase.")
         ("skip-commit", po::value<std::vector<std::string>>(),
             "Skip commit with specified hash. The option can be specified multiple times.")
+        ("skip-paths", po::value<std::string>(),
+            "Skip paths that matches regular expression (re2 syntax).")
+        ("skip-commits-with-messages", po::value<std::string>(),
+            "Skip commits whose messages matches regular expression (re2 syntax).")
         ("diff-size-limit", po::value<size_t>(),
             "Skip commits whose diff size (number of added + removed lines) is larger than specified threshold")
         ("threads", po::value<size_t>()->default_value(std::thread::hardware_concurrency()),
@@ -965,7 +1060,7 @@ try
             << "Usage: " << argv[0] << '\n'
             << desc << '\n'
             << "\nExample:\n"
-            << "\n./git-to-clickhouse --diff-size-limit 100000 --skip-paths 'generated\\.cpp|^(contrib|docs?|website|libs/(libcityhash|liblz4|libdivide|libvectorclass|libdouble-conversion|libcpuid|libzstd|libfarmhash|libmetrohash|libpoco|libwidechar_width))/'\n";
+            << "\n./git-to-clickhouse --diff-size-limit 100000 --skip-paths 'generated\\.cpp|^(contrib|docs?|website|libs/(libcityhash|liblz4|libdivide|libvectorclass|libdouble-conversion|libcpuid|libzstd|libfarmhash|libmetrohash|libpoco|libwidechar_width))/' --skip-commits-with-messages '^Merge branch '\n";
         return 1;
     }
 

From 99c33612d65c627bbb9fc31d9d97906195d3cf53 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 6 Sep 2020 10:29:58 +0300
Subject: [PATCH 068/298] Better diagnostics

---
 utils/git-to-clickhouse/git-to-clickhouse.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/git-to-clickhouse/git-to-clickhouse.cpp b/utils/git-to-clickhouse/git-to-clickhouse.cpp
index c1c27a82812..6b29708ead3 100644
--- a/utils/git-to-clickhouse/git-to-clickhouse.cpp
+++ b/utils/git-to-clickhouse/git-to-clickhouse.cpp
@@ -582,7 +582,7 @@ void processCommit(
     std::string message_to_print = commit.message;
     std::replace_if(message_to_print.begin(), message_to_print.end(), [](char c){ return std::iscntrl(c); }, ' ');
 
-    fmt::print("{}%  {}  {}  {}\n",
+    std::cerr << fmt::format("{}%  {}  {}  {}\n",
         commit_num * 100 / total_commits, toString(commit.time), hash, message_to_print);
 
     if (options.skip_commits_without_parents && commit_num != 0 && parent_hash.empty())

From 3ec9656aa21a3142d2898b7d259a4740a6691fd2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 6 Sep 2020 10:38:39 +0300
Subject: [PATCH 069/298] Slightly more robust

---
 utils/git-to-clickhouse/git-to-clickhouse.cpp | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/utils/git-to-clickhouse/git-to-clickhouse.cpp b/utils/git-to-clickhouse/git-to-clickhouse.cpp
index 6b29708ead3..f3653bb282f 100644
--- a/utils/git-to-clickhouse/git-to-clickhouse.cpp
+++ b/utils/git-to-clickhouse/git-to-clickhouse.cpp
@@ -568,12 +568,10 @@ void processCommit(
     time_t commit_time;
     readText(commit_time, in);
     commit.time = commit_time;
-    assertChar('\t', in);
-    readText(commit.author, in);
-    assertChar('\t', in);
+    assertChar('\0', in);
+    readNullTerminated(commit.author, in);
     std::string parent_hash;
-    readString(parent_hash, in);
-    assertChar('\n', in);
+    readNullTerminated(parent_hash, in);
     readNullTerminated(commit.message, in);
 
     if (options.skip_commits_with_messages && re2_st::RE2::PartialMatch(commit.message, *options.skip_commits_with_messages))
@@ -969,7 +967,7 @@ void processCommit(
 auto gitShow(const std::string & hash)
 {
     std::string command = fmt::format(
-        "git show --raw --pretty='format:%ct%x09%aN%x09%P%x0A%s%x00' --patch --unified=0 {}",
+        "git show --raw --pretty='format:%ct%x00%aN%x00%P%x00%s%x00' --patch --unified=0 {}",
         hash);
 
     return ShellCommand::execute(command);

From 25ca5e91bd0f3074c8d7d0874e125d2dcc611889 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Sat, 5 Sep 2020 16:09:32 -0700
Subject: [PATCH 070/298] AsynchronousMetricLog - add event_time_microseconds
 column

---
 src/Core/Field.h                           |  3 ++-
 src/Interpreters/AsynchronousMetricLog.cpp | 17 +++++++++++++----
 src/Interpreters/AsynchronousMetricLog.h   |  1 +
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/src/Core/Field.h b/src/Core/Field.h
index 0bfdf597543..8973d106c0b 100644
--- a/src/Core/Field.h
+++ b/src/Core/Field.h
@@ -768,7 +768,8 @@ T & Field::get()
     // Disregard signedness when converting between int64 types.
     constexpr Field::Types::Which target = TypeToEnum<NearestFieldType<ValueType>>::value;
     assert(target == which
-           || (isInt64FieldType(target) && isInt64FieldType(which)));
+           || (isInt64FieldType(target) && isInt64FieldType(which))
+           || target == Field::Types::Decimal64 /* DateTime64 fields */);
 #endif
 
     ValueType * MAY_ALIAS ptr = reinterpret_cast<ValueType *>(&storage);
diff --git a/src/Interpreters/AsynchronousMetricLog.cpp b/src/Interpreters/AsynchronousMetricLog.cpp
index e4415773655..d2c81c9dfc5 100644
--- a/src/Interpreters/AsynchronousMetricLog.cpp
+++ b/src/Interpreters/AsynchronousMetricLog.cpp
@@ -2,6 +2,7 @@
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeDateTime.h>
+#include <DataTypes/DataTypeDateTime64.h>
 #include <DataTypes/DataTypeString.h>
 #include <Interpreters/AsynchronousMetrics.h>
 
@@ -13,10 +14,11 @@ Block AsynchronousMetricLogElement::createBlock()
 {
     ColumnsWithTypeAndName columns;
 
-    columns.emplace_back(std::make_shared<DataTypeDate>(),      "event_date");
-    columns.emplace_back(std::make_shared<DataTypeDateTime>(),  "event_time");
-    columns.emplace_back(std::make_shared<DataTypeString>(),    "name");
-    columns.emplace_back(std::make_shared<DataTypeFloat64>(),   "value");
+    columns.emplace_back(std::make_shared<DataTypeDate>(),          "event_date");
+    columns.emplace_back(std::make_shared<DataTypeDateTime>(),      "event_time");
+    columns.emplace_back(std::make_shared<DataTypeDateTime64>(6),   "event_time_microseconds");
+    columns.emplace_back(std::make_shared<DataTypeString>(),        "name");
+    columns.emplace_back(std::make_shared<DataTypeFloat64>(),       "value");
 
     return Block(columns);
 }
@@ -28,6 +30,7 @@ void AsynchronousMetricLogElement::appendToBlock(MutableColumns & columns) const
 
     columns[column_idx++]->insert(event_date);
     columns[column_idx++]->insert(event_time);
+    columns[column_idx++]->insert(event_time_microseconds);
     columns[column_idx++]->insert(metric_name);
     columns[column_idx++]->insert(value);
 }
@@ -38,6 +41,11 @@ inline UInt64 time_in_milliseconds(std::chrono::time_point<std::chrono::system_c
     return std::chrono::duration_cast<std::chrono::milliseconds>(timepoint.time_since_epoch()).count();
 }
 
+inline UInt64 time_in_microseconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
+{
+    return std::chrono::duration_cast<std::chrono::microseconds>(timepoint.time_since_epoch()).count();
+}
+
 
 inline UInt64 time_in_seconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
 {
@@ -50,6 +58,7 @@ void AsynchronousMetricLog::addValues(const AsynchronousMetricValues & values)
 
     const auto now = std::chrono::system_clock::now();
     element.event_time = time_in_seconds(now);
+    element.event_time_microseconds = time_in_microseconds(now);
     element.event_date = DateLUT::instance().toDayNum(element.event_time);
 
     for (const auto & [key, value] : values)
diff --git a/src/Interpreters/AsynchronousMetricLog.h b/src/Interpreters/AsynchronousMetricLog.h
index b7d6aab95b6..0c02244246e 100644
--- a/src/Interpreters/AsynchronousMetricLog.h
+++ b/src/Interpreters/AsynchronousMetricLog.h
@@ -22,6 +22,7 @@ struct AsynchronousMetricLogElement
 {
     UInt16 event_date;
     time_t event_time;
+    UInt64 event_time_microseconds;
     std::string metric_name;
     double value;
 

From 6a5b885ac1167dd8bba4a0c8b091289dd4c1e79e Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Sat, 5 Sep 2020 16:17:28 -0700
Subject: [PATCH 071/298] AsynchronousMetricLog - add tests for
 event_time_microseconds column

---
 .../0_stateless/01473_event_time_microseconds.reference      | 2 ++
 tests/queries/0_stateless/01473_event_time_microseconds.sql  | 5 +++++
 2 files changed, 7 insertions(+)
 create mode 100644 tests/queries/0_stateless/01473_event_time_microseconds.reference
 create mode 100644 tests/queries/0_stateless/01473_event_time_microseconds.sql

diff --git a/tests/queries/0_stateless/01473_event_time_microseconds.reference b/tests/queries/0_stateless/01473_event_time_microseconds.reference
new file mode 100644
index 00000000000..6c3b6ec5c6c
--- /dev/null
+++ b/tests/queries/0_stateless/01473_event_time_microseconds.reference
@@ -0,0 +1,2 @@
+'01473_asynchronous_metric_log_event_start_time_milliseconds_test'
+ok
\ No newline at end of file
diff --git a/tests/queries/0_stateless/01473_event_time_microseconds.sql b/tests/queries/0_stateless/01473_event_time_microseconds.sql
new file mode 100644
index 00000000000..af38cf4ca70
--- /dev/null
+++ b/tests/queries/0_stateless/01473_event_time_microseconds.sql
@@ -0,0 +1,5 @@
+set log_queries = 1;
+
+select '01473_asynchronous_metric_log_event_start_time_milliseconds_test';
+system flush logs;
+SELECT If((select count(event_time_microseconds)  from system.asynchronous_metric_log) > 0, 'ok', 'fail'); -- success

From 1c1f50c6b665b96d7ba5742a60c770081299213d Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Sat, 5 Sep 2020 16:19:38 -0700
Subject: [PATCH 072/298] MetricLog - add event_time_microseconds column

---
 src/Interpreters/MetricLog.cpp | 14 +++++++++++---
 src/Interpreters/MetricLog.h   |  1 +
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/Interpreters/MetricLog.cpp b/src/Interpreters/MetricLog.cpp
index 69fcc4917b9..ce5d5793b87 100644
--- a/src/Interpreters/MetricLog.cpp
+++ b/src/Interpreters/MetricLog.cpp
@@ -2,6 +2,7 @@
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeDateTime.h>
+#include <DataTypes/DataTypeDateTime64.h>
 
 
 namespace DB
@@ -11,9 +12,10 @@ Block MetricLogElement::createBlock()
 {
     ColumnsWithTypeAndName columns_with_type_and_name;
 
-    columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDate>(),     "event_date");
-    columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDateTime>(), "event_time");
-    columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(),   "milliseconds");
+    columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDate>(),           "event_date");
+    columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDateTime>(),       "event_time");
+    columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDateTime64>(6),    "event_time_microseconds");
+    columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(),         "milliseconds");
 
     for (size_t i = 0, end = ProfileEvents::end(); i < end; ++i)
     {
@@ -41,6 +43,7 @@ void MetricLogElement::appendToBlock(MutableColumns & columns) const
 
     columns[column_idx++]->insert(DateLUT::instance().toDayNum(event_time));
     columns[column_idx++]->insert(event_time);
+    columns[column_idx++]->insert(event_time_microseconds);
     columns[column_idx++]->insert(milliseconds);
 
     for (size_t i = 0, end = ProfileEvents::end(); i < end; ++i)
@@ -80,6 +83,10 @@ inline UInt64 time_in_milliseconds(std::chrono::time_point<std::chrono::system_c
     return std::chrono::duration_cast<std::chrono::milliseconds>(timepoint.time_since_epoch()).count();
 }
 
+inline UInt64 time_in_microseconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
+{
+    return std::chrono::duration_cast<std::chrono::microseconds>(timepoint.time_since_epoch()).count();
+}
 
 inline UInt64 time_in_seconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
 {
@@ -102,6 +109,7 @@ void MetricLog::metricThreadFunction()
 
             MetricLogElement elem;
             elem.event_time = std::chrono::system_clock::to_time_t(current_time);
+            elem.event_time_microseconds = time_in_microseconds(current_time);
             elem.milliseconds = time_in_milliseconds(current_time) - time_in_seconds(current_time) * 1000;
 
             elem.profile_events.resize(ProfileEvents::end());
diff --git a/src/Interpreters/MetricLog.h b/src/Interpreters/MetricLog.h
index 7774a45d7e1..f52d078bdc9 100644
--- a/src/Interpreters/MetricLog.h
+++ b/src/Interpreters/MetricLog.h
@@ -18,6 +18,7 @@ namespace DB
 struct MetricLogElement
 {
     time_t event_time{};
+    UInt64 event_time_microseconds{};
     UInt64 milliseconds{};
 
     std::vector<ProfileEvents::Count> profile_events;

From ec0d0243cc994f91fcc8a6a0fd36bde22f853af5 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Sat, 5 Sep 2020 16:21:51 -0700
Subject: [PATCH 073/298] MetricLog - add tests for event_time_microseconds
 field

---
 .../0_stateless/01473_event_time_microseconds.reference     | 6 ++++--
 tests/queries/0_stateless/01473_event_time_microseconds.sql | 4 ++++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01473_event_time_microseconds.reference b/tests/queries/0_stateless/01473_event_time_microseconds.reference
index 6c3b6ec5c6c..cac87f32a29 100644
--- a/tests/queries/0_stateless/01473_event_time_microseconds.reference
+++ b/tests/queries/0_stateless/01473_event_time_microseconds.reference
@@ -1,2 +1,4 @@
-'01473_asynchronous_metric_log_event_start_time_milliseconds_test'
-ok
\ No newline at end of file
+01473_asynchronous_metric_log_event_start_time_milliseconds_test
+ok
+01473_metric_log_event_start_time_milliseconds_test
+ok
diff --git a/tests/queries/0_stateless/01473_event_time_microseconds.sql b/tests/queries/0_stateless/01473_event_time_microseconds.sql
index af38cf4ca70..6a13d6e1543 100644
--- a/tests/queries/0_stateless/01473_event_time_microseconds.sql
+++ b/tests/queries/0_stateless/01473_event_time_microseconds.sql
@@ -3,3 +3,7 @@ set log_queries = 1;
 select '01473_asynchronous_metric_log_event_start_time_milliseconds_test';
 system flush logs;
 SELECT If((select count(event_time_microseconds)  from system.asynchronous_metric_log) > 0, 'ok', 'fail'); -- success
+
+select '01473_metric_log_event_start_time_milliseconds_test';
+system flush logs;
+SELECT If((select count(event_time_microseconds)  from system.metric_log) > 0, 'ok', 'fail'); -- success

From 9de49d130f733c0fcc00d8f7bd85c4fb2eecbfc2 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Sat, 5 Sep 2020 16:36:38 -0700
Subject: [PATCH 074/298] AsynchronousMetricLog & MetricLog - update docs with
 examples

---
 .../system-tables/asynchronous_metric_log.md  | 25 ++++++------
 .../en/operations/system-tables/metric_log.md | 40 +++++++++----------
 2 files changed, 33 insertions(+), 32 deletions(-)

diff --git a/docs/en/operations/system-tables/asynchronous_metric_log.md b/docs/en/operations/system-tables/asynchronous_metric_log.md
index 6b1d71e1ca6..75607cc30b0 100644
--- a/docs/en/operations/system-tables/asynchronous_metric_log.md
+++ b/docs/en/operations/system-tables/asynchronous_metric_log.md
@@ -6,6 +6,7 @@ Columns:
 
 -   `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date.
 -   `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time.
+-   `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds resolution.
 -   `name` ([String](../../sql-reference/data-types/string.md)) — Metric name.
 -   `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value.
 
@@ -16,18 +17,18 @@ SELECT * FROM system.asynchronous_metric_log LIMIT 10
 ```
 
 ``` text
-┌─event_date─┬──────────event_time─┬─name─────────────────────────────────────┬────value─┐
-│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.arenas.all.pmuzzy               │        0 │
-│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.arenas.all.pdirty               │     4214 │
-│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.background_thread.run_intervals │        0 │
-│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.background_thread.num_runs      │        0 │
-│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.retained                        │ 17657856 │
-│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.mapped                          │ 71471104 │
-│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.resident                        │ 61538304 │
-│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.metadata                        │  6199264 │
-│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.allocated                       │ 38074336 │
-│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.epoch                           │        2 │
-└────────────┴─────────────────────┴──────────────────────────────────────────┴──────────┘
+┌─event_date─┬──────────event_time─┬────event_time_microseconds─┬─name─────────────────────────────────────┬─────value─┐
+│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ CPUFrequencyMHz_0                        │    2120.9 │
+│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pmuzzy               │       743 │
+│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pdirty               │     26288 │
+│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.run_intervals │         0 │
+│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.num_runs      │         0 │
+│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.retained                        │  60694528 │
+│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.mapped                          │ 303161344 │
+│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.resident                        │ 260931584 │
+│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.metadata                        │  12079488 │
+│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.allocated                       │ 133756128 │
+└────────────┴─────────────────────┴────────────────────────────┴──────────────────────────────────────────┴───────────┘
 ```
 
 **See Also**
diff --git a/docs/en/operations/system-tables/metric_log.md b/docs/en/operations/system-tables/metric_log.md
index 9ccf61291d2..063fe81923b 100644
--- a/docs/en/operations/system-tables/metric_log.md
+++ b/docs/en/operations/system-tables/metric_log.md
@@ -23,28 +23,28 @@ SELECT * FROM system.metric_log LIMIT 1 FORMAT Vertical;
 ``` text
 Row 1:
 ──────
-event_date:                                                 2020-02-18
-event_time:                                                 2020-02-18 07:15:33
-milliseconds:                                               554
-ProfileEvent_Query:                                         0
-ProfileEvent_SelectQuery:                                   0
-ProfileEvent_InsertQuery:                                   0
-ProfileEvent_FileOpen:                                      0
-ProfileEvent_Seek:                                          0
-ProfileEvent_ReadBufferFromFileDescriptorRead:              1
-ProfileEvent_ReadBufferFromFileDescriptorReadFailed:        0
-ProfileEvent_ReadBufferFromFileDescriptorReadBytes:         0
-ProfileEvent_WriteBufferFromFileDescriptorWrite:            1
-ProfileEvent_WriteBufferFromFileDescriptorWriteFailed:      0
-ProfileEvent_WriteBufferFromFileDescriptorWriteBytes:       56
+event_date:                                                      2020-09-05
+event_time:                                                      2020-09-05 16:22:33
+event_time_microseconds:                                         2020-09-05 16:22:33.196807
+milliseconds:                                                    196
+ProfileEvent_Query:                                              0
+ProfileEvent_SelectQuery:                                        0
+ProfileEvent_InsertQuery:                                        0
+ProfileEvent_FailedQuery:                                        0
+ProfileEvent_FailedSelectQuery:                                  0
 ...
-CurrentMetric_Query:                                        0
-CurrentMetric_Merge:                                        0
-CurrentMetric_PartMutation:                                 0
-CurrentMetric_ReplicatedFetch:                              0
-CurrentMetric_ReplicatedSend:                               0
-CurrentMetric_ReplicatedChecks:                             0
 ...
+CurrentMetric_Revision:                                          54439
+CurrentMetric_VersionInteger:                                    20009001
+CurrentMetric_RWLockWaitingReaders:                              0
+CurrentMetric_RWLockWaitingWriters:                              0
+CurrentMetric_RWLockActiveReaders:                               0
+CurrentMetric_RWLockActiveWriters:                               0
+CurrentMetric_GlobalThread:                                      74
+CurrentMetric_GlobalThreadActive:                                26
+CurrentMetric_LocalThread:                                       0
+CurrentMetric_LocalThreadActive:                                 0
+CurrentMetric_DistributedFilesToInsert:                          0
 ```
 
 **See also**

From db58fa15aaf202318e043549440589797b51aa0a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 7 Sep 2020 02:24:31 +0300
Subject: [PATCH 075/298] Some tweaks

---
 utils/git-to-clickhouse/git-to-clickhouse.cpp | 21 ++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/utils/git-to-clickhouse/git-to-clickhouse.cpp b/utils/git-to-clickhouse/git-to-clickhouse.cpp
index f3653bb282f..9e1ef14fcbf 100644
--- a/utils/git-to-clickhouse/git-to-clickhouse.cpp
+++ b/utils/git-to-clickhouse/git-to-clickhouse.cpp
@@ -435,6 +435,7 @@ struct Options
     std::optional<re2_st::RE2> skip_commits_with_messages;
     std::unordered_set<std::string> skip_commits;
     std::optional<size_t> diff_size_limit;
+    std::string stop_after_commit;
 
     Options(const po::variables_map & options)
     {
@@ -458,6 +459,10 @@ struct Options
         {
             diff_size_limit = options["diff-size-limit"].as<size_t>();
         }
+        if (options.count("stop-after-commit"))
+        {
+            stop_after_commit = options["stop-after-commit"].as<std::string>();
+        }
     }
 };
 
@@ -828,7 +833,7 @@ void processCommit(
         }
     }
 
-    if (options.diff_size_limit && commit.lines_added + commit.lines_deleted > *options.diff_size_limit)
+    if (options.diff_size_limit && commit_num != 0 && commit.lines_added + commit.lines_deleted > *options.diff_size_limit)
         return;
 
     /// Calculate hash of diff and skip duplicates
@@ -1015,6 +1020,10 @@ void processLog(const Options & options)
     for (size_t i = 0; i < num_commits; ++i)
     {
         processCommit(show_commands[i % num_threads], options, i, num_commits, hashes[i], snapshot, diff_hashes, result);
+
+        if (!options.stop_after_commit.empty() && hashes[i] == options.stop_after_commit)
+            break;
+
         if (i + num_threads < num_commits)
             show_commands[i % num_threads] = gitShow(hashes[i + num_threads]);
     }
@@ -1043,10 +1052,12 @@ try
             "Skip paths that matches regular expression (re2 syntax).")
         ("skip-commits-with-messages", po::value<std::string>(),
             "Skip commits whose messages matches regular expression (re2 syntax).")
-        ("diff-size-limit", po::value<size_t>(),
-            "Skip commits whose diff size (number of added + removed lines) is larger than specified threshold")
+        ("diff-size-limit", po::value<size_t>()->default_value(100000),
+            "Skip commits whose diff size (number of added + removed lines) is larger than specified threshold. Does not apply for initial commit.")
+        ("stop-after-commit", po::value<std::string>(),
+            "Stop processing after specified commit hash.")
         ("threads", po::value<size_t>()->default_value(std::thread::hardware_concurrency()),
-            "Number of threads to interact with git")
+            "Number of concurrent git subprocesses to spawn")
     ;
 
     po::variables_map options;
@@ -1058,7 +1069,7 @@ try
             << "Usage: " << argv[0] << '\n'
             << desc << '\n'
             << "\nExample:\n"
-            << "\n./git-to-clickhouse --diff-size-limit 100000 --skip-paths 'generated\\.cpp|^(contrib|docs?|website|libs/(libcityhash|liblz4|libdivide|libvectorclass|libdouble-conversion|libcpuid|libzstd|libfarmhash|libmetrohash|libpoco|libwidechar_width))/' --skip-commits-with-messages '^Merge branch '\n";
+            << "\n./git-to-clickhouse --skip-paths 'generated\\.cpp|^(contrib|docs?|website|libs/(libcityhash|liblz4|libdivide|libvectorclass|libdouble-conversion|libcpuid|libzstd|libfarmhash|libmetrohash|libpoco|libwidechar_width))/' --skip-commits-with-messages '^Merge branch '\n";
         return 1;
     }
 

From 684a910395cc37203453d1faa09ab839d3a4f32a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 7 Sep 2020 03:17:26 +0300
Subject: [PATCH 076/298] Polish

---
 utils/git-to-clickhouse/git-to-clickhouse.cpp | 803 ++++++++++--------
 1 file changed, 451 insertions(+), 352 deletions(-)

diff --git a/utils/git-to-clickhouse/git-to-clickhouse.cpp b/utils/git-to-clickhouse/git-to-clickhouse.cpp
index 9e1ef14fcbf..6e43853d6ba 100644
--- a/utils/git-to-clickhouse/git-to-clickhouse.cpp
+++ b/utils/git-to-clickhouse/git-to-clickhouse.cpp
@@ -277,10 +277,14 @@ struct LineChange
     std::string line; /// Line content without leading whitespaces
     uint8_t indent{}; /// The number of leading whitespaces or tabs * 4
     LineType line_type{};
+    /// Information from the history (blame).
     std::string prev_commit_hash;
     std::string prev_author;
     LocalDateTime prev_time{};
 
+    /** Classify line to empty / code / comment / single punctuation char.
+      * Very rough and mostly suitable for our C++ style.
+      */
     void setLineInfo(std::string full_line)
     {
         indent = 0;
@@ -306,8 +310,9 @@ struct LineChange
             line_type = LineType::Empty;
         }
         else if (pos + 1 < end
-            && ((pos[0] == '/' && pos[1] == '/')
-                || (pos[0] == '*' && pos[1] == ' '))) /// This is not precise.
+            && ((pos[0] == '/' && (pos[1] == '/' || pos[1] == '*'))
+                || (pos[0] == '*' && pos[1] == ' ')     /// This is not precise.
+                || (pos[0] == '#' && pos[1] == ' ')))
         {
             line_type = LineType::Comment;
         }
@@ -363,6 +368,18 @@ struct LineChange
 
 using LineChanges = std::vector<LineChange>;
 
+struct FileDiff
+{
+    FileDiff(FileChange file_change_) : file_change(file_change_) {}
+
+    FileChange file_change;
+    LineChanges line_changes;
+};
+
+using CommitDiff = std::map<std::string /* path */, FileDiff>;
+
+
+/** Parsing helpers */
 
 void skipUntilWhitespace(ReadBuffer & buf)
 {
@@ -418,14 +435,57 @@ void readStringUntilNextLine(std::string & s, ReadBuffer & buf)
 }
 
 
-struct Result
+/** Writes the resulting tables to files that can be imported to ClickHouse.
+  */
+struct ResultWriter
 {
     WriteBufferFromFile commits{"commits.tsv"};
     WriteBufferFromFile file_changes{"file_changes.tsv"};
     WriteBufferFromFile line_changes{"line_changes.tsv"};
+
+    void appendCommit(const Commit & commit, const CommitDiff & files)
+    {
+        /// commits table
+        {
+            auto & out = commits;
+
+            commit.writeTextWithoutNewline(out);
+            writeChar('\n', out);
+        }
+
+        for (const auto & elem : files)
+        {
+            const FileChange & file_change = elem.second.file_change;
+
+            /// file_changes table
+            {
+                auto & out = file_changes;
+
+                file_change.writeTextWithoutNewline(out);
+                writeChar('\t', out);
+                commit.writeTextWithoutNewline(out);
+                writeChar('\n', out);
+            }
+
+            /// line_changes table
+            for (const auto & line_change : elem.second.line_changes)
+            {
+                auto & out = line_changes;
+
+                line_change.writeTextWithoutNewline(out);
+                writeChar('\t', out);
+                file_change.writeTextWithoutNewline(out);
+                writeChar('\t', out);
+                commit.writeTextWithoutNewline(out);
+                writeChar('\n', out);
+            }
+        }
+    }
 };
 
 
+/** See description in "main".
+  */
 struct Options
 {
     bool skip_commits_without_parents = true;
@@ -467,11 +527,23 @@ struct Options
 };
 
 
-/// Rough snapshot of repository calculated by application of diffs. It's used to calculate blame info.
+/** Rough snapshot of repository calculated by application of diffs. It's used to calculate blame info.
+  * Represented by a list of lines. For every line it contains information about commit that modified this line the last time.
+  *
+  * Note that there are many cases when this info may become incorrect.
+  * The first reason is that git history is non-linear but we form this snapshot by application of commit diffs in some order
+  *  that cannot give us correct results even theoretically.
+  * The second reason is that we don't process merge commits. But merge commits may contain differences for conflict resolution.
+  *
+  * We expect that the information will be mostly correct for the purpose of analytics.
+  * So, it can provide the expected "blame" info for the most of the lines.
+  */
 struct FileBlame
 {
     using Lines = std::list<Commit>;
     Lines lines;
+
+    /// We walk through this list adding or removing lines.
     Lines::iterator it;
     size_t current_idx = 1;
 
@@ -480,6 +552,7 @@ struct FileBlame
         it = lines.begin();
     }
 
+    /// This is important when file was copied or renamed.
     FileBlame & operator=(const FileBlame & rhs)
     {
         lines = rhs.lines;
@@ -493,6 +566,7 @@ struct FileBlame
         *this = rhs;
     }
 
+    /// Move iterator to requested line or stop at the end.
     void walk(uint32_t num)
     {
         while (current_idx < num && it != lines.end())
@@ -522,6 +596,7 @@ struct FileBlame
     {
         walk(num);
 
+        /// If the inserted line is over the end of file, we insert empty lines before it.
         while (it == lines.end() && current_idx < num)
         {
             lines.emplace_back();
@@ -542,334 +617,24 @@ struct FileBlame
     }
 };
 
+/// All files with their blame info. When file is renamed, we also rename it in snapshot.
 using Snapshot = std::map<std::string /* path */, FileBlame>;
 
-struct FileChangeAndLineChanges
+
+/** Enrich the line changes data with the history info from the snapshot
+  * - the author, time and commit of the previous change to every found line (blame).
+  * And update the snapshot.
+  */
+void updateSnapshot(Snapshot & snapshot, const Commit & commit, CommitDiff & file_changes)
 {
-    FileChangeAndLineChanges(FileChange file_change_) : file_change(file_change_) {}
-
-    FileChange file_change;
-    LineChanges line_changes;
-};
-
-using DiffHashes = std::unordered_set<UInt128>;
-
-
-void processCommit(
-    std::unique_ptr<ShellCommand> & commit_info,
-    const Options & options,
-    size_t commit_num,
-    size_t total_commits,
-    std::string hash,
-    Snapshot & snapshot,
-    DiffHashes & diff_hashes,
-    Result & result)
-{
-    auto & in = commit_info->out;
-
-    Commit commit;
-    commit.hash = hash;
-
-    time_t commit_time;
-    readText(commit_time, in);
-    commit.time = commit_time;
-    assertChar('\0', in);
-    readNullTerminated(commit.author, in);
-    std::string parent_hash;
-    readNullTerminated(parent_hash, in);
-    readNullTerminated(commit.message, in);
-
-    if (options.skip_commits_with_messages && re2_st::RE2::PartialMatch(commit.message, *options.skip_commits_with_messages))
-        return;
-
-    std::string message_to_print = commit.message;
-    std::replace_if(message_to_print.begin(), message_to_print.end(), [](char c){ return std::iscntrl(c); }, ' ');
-
-    std::cerr << fmt::format("{}%  {}  {}  {}\n",
-        commit_num * 100 / total_commits, toString(commit.time), hash, message_to_print);
-
-    if (options.skip_commits_without_parents && commit_num != 0 && parent_hash.empty())
+    /// Renames and copies.
+    for (auto & elem : file_changes)
     {
-        std::cerr << "Warning: skipping commit without parents\n";
-        return;
+        auto & file = elem.second.file_change;
+        if (file.path != file.old_path)
+            snapshot[file.path] = snapshot[file.old_path];
     }
 
-    if (!in.eof())
-        assertChar('\n', in);
-
-    /// File changes in form
-    /// :100644 100644 b90fe6bb94 3ffe4c380f M  src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
-    /// :100644 100644 828dedf6b5 828dedf6b5 R100       dbms/src/Functions/GeoUtils.h   dbms/src/Functions/PolygonUtils.h
-
-    std::map<std::string, FileChangeAndLineChanges> file_changes;
-
-    while (checkChar(':', in))
-    {
-        FileChange file_change;
-
-        for (size_t i = 0; i < 4; ++i)
-        {
-            skipUntilWhitespace(in);
-            skipWhitespaceIfAny(in);
-        }
-
-        char change_type;
-        readChar(change_type, in);
-
-        int confidence;
-        switch (change_type)
-        {
-            case 'A':
-                file_change.change_type = FileChangeType::Add;
-                ++commit.files_added;
-                break;
-            case 'D':
-                file_change.change_type = FileChangeType::Delete;
-                ++commit.files_deleted;
-                break;
-            case 'M':
-                file_change.change_type = FileChangeType::Modify;
-                ++commit.files_modified;
-                break;
-            case 'R':
-                file_change.change_type = FileChangeType::Rename;
-                ++commit.files_renamed;
-                readText(confidence, in);
-                break;
-            case 'C':
-                file_change.change_type = FileChangeType::Copy;
-                readText(confidence, in);
-                break;
-            case 'T':
-                file_change.change_type = FileChangeType::Type;
-                break;
-            default:
-                throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected file change type: {}", change_type);
-        }
-
-        skipWhitespaceIfAny(in);
-
-        if (change_type == 'R' || change_type == 'C')
-        {
-            readText(file_change.old_path, in);
-            skipWhitespaceIfAny(in);
-            readText(file_change.path, in);
-
-//            std::cerr << "Move from " << file_change.old_path << " to " << file_change.path << "\n";
-
-            if (file_change.path != file_change.old_path)
-                snapshot[file_change.path] = snapshot[file_change.old_path];
-        }
-        else
-        {
-            readText(file_change.path, in);
-        }
-
-        file_change.file_extension = std::filesystem::path(file_change.path).extension();
-
-        assertChar('\n', in);
-
-        if (!(options.skip_paths && re2_st::RE2::PartialMatch(file_change.path, *options.skip_paths)))
-        {
-            file_changes.emplace(
-                file_change.path,
-                FileChangeAndLineChanges(file_change));
-        }
-    }
-
-    if (!in.eof())
-    {
-        assertChar('\n', in);
-
-        /// Diffs for every file in form of
-        /// --- a/src/Storages/StorageReplicatedMergeTree.cpp
-        /// +++ b/src/Storages/StorageReplicatedMergeTree.cpp
-        /// @@ -1387,2 +1387 @@ bool StorageReplicatedMergeTree::tryExecuteMerge(const LogEntry & entry)
-        /// -            table_lock, entry.create_time, reserved_space, entry.deduplicate,
-        /// -            entry.force_ttl);
-        /// +            table_lock, entry.create_time, reserved_space, entry.deduplicate);
-
-        std::string old_file_path;
-        std::string new_file_path;
-        FileChangeAndLineChanges * file_change_and_line_changes = nullptr;
-        LineChange line_change;
-
-        while (!in.eof())
-        {
-            if (checkString("@@ ", in))
-            {
-                if (!file_change_and_line_changes)
-                {
-                    auto file_name = new_file_path.empty() ? old_file_path : new_file_path;
-                    auto it = file_changes.find(file_name);
-                    if (file_changes.end() != it)
-                        file_change_and_line_changes = &it->second;
-                }
-
-                if (file_change_and_line_changes)
-                {
-                    uint32_t old_lines = 1;
-                    uint32_t new_lines = 1;
-
-                    assertChar('-', in);
-                    readText(line_change.hunk_start_line_number_old, in);
-                    if (checkChar(',', in))
-                        readText(old_lines, in);
-
-                    assertString(" +", in);
-                    readText(line_change.hunk_start_line_number_new, in);
-                    if (checkChar(',', in))
-                        readText(new_lines, in);
-
-                    if (line_change.hunk_start_line_number_new == 0)
-                        line_change.hunk_start_line_number_new = 1;
-
-                    assertString(" @@", in);
-                    if (checkChar(' ', in))
-                        readStringUntilNextLine(line_change.hunk_context, in);
-                    else
-                        assertChar('\n', in);
-
-                    line_change.hunk_lines_added = new_lines;
-                    line_change.hunk_lines_deleted = old_lines;
-
-                    ++line_change.hunk_num;
-                    line_change.line_number_old = line_change.hunk_start_line_number_old;
-                    line_change.line_number_new = line_change.hunk_start_line_number_new;
-
-                    if (old_lines && new_lines)
-                    {
-                        ++commit.hunks_changed;
-                        ++file_change_and_line_changes->file_change.hunks_changed;
-                    }
-                    else if (old_lines)
-                    {
-                        ++commit.hunks_removed;
-                        ++file_change_and_line_changes->file_change.hunks_removed;
-                    }
-                    else if (new_lines)
-                    {
-                        ++commit.hunks_added;
-                        ++file_change_and_line_changes->file_change.hunks_added;
-                    }
-                }
-            }
-            else if (checkChar('-', in))
-            {
-                if (checkString("-- ", in))
-                {
-                    if (checkString("a/", in))
-                    {
-                        readStringUntilNextLine(old_file_path, in);
-                        line_change = LineChange{};
-                        file_change_and_line_changes = nullptr;
-                    }
-                    else if (checkString("/dev/null", in))
-                    {
-                        old_file_path.clear();
-                        assertChar('\n', in);
-                        line_change = LineChange{};
-                        file_change_and_line_changes = nullptr;
-                    }
-                    else
-                        skipUntilNextLine(in); /// Actually it can be the line in diff. Skip it for simplicity.
-                }
-                else
-                {
-                    if (file_change_and_line_changes)
-                    {
-                        ++commit.lines_deleted;
-                        ++file_change_and_line_changes->file_change.lines_deleted;
-
-                        line_change.sign = -1;
-                        readStringUntilNextLine(line_change.line, in);
-                        line_change.setLineInfo(line_change.line);
-
-                        file_change_and_line_changes->line_changes.push_back(line_change);
-                        ++line_change.line_number_old;
-                    }
-                }
-            }
-            else if (checkChar('+', in))
-            {
-                if (checkString("++ ", in))
-                {
-                    if (checkString("b/", in))
-                    {
-                        readStringUntilNextLine(new_file_path, in);
-                        line_change = LineChange{};
-                        file_change_and_line_changes = nullptr;
-                    }
-                    else if (checkString("/dev/null", in))
-                    {
-                        new_file_path.clear();
-                        assertChar('\n', in);
-                        line_change = LineChange{};
-                        file_change_and_line_changes = nullptr;
-                    }
-                    else
-                        skipUntilNextLine(in); /// Actually it can be the line in diff. Skip it for simplicity.
-                }
-                else
-                {
-                    if (file_change_and_line_changes)
-                    {
-                        ++commit.lines_added;
-                        ++file_change_and_line_changes->file_change.lines_added;
-
-                        line_change.sign = 1;
-                        readStringUntilNextLine(line_change.line, in);
-                        line_change.setLineInfo(line_change.line);
-
-                        file_change_and_line_changes->line_changes.push_back(line_change);
-                        ++line_change.line_number_new;
-                    }
-                }
-            }
-            else
-            {
-                skipUntilNextLine(in);
-            }
-        }
-    }
-
-    if (options.diff_size_limit && commit_num != 0 && commit.lines_added + commit.lines_deleted > *options.diff_size_limit)
-        return;
-
-    /// Calculate hash of diff and skip duplicates
-    if (options.skip_commits_with_duplicate_diffs)
-    {
-        SipHash hasher;
-
-        for (auto & elem : file_changes)
-        {
-            hasher.update(elem.second.file_change.change_type);
-            hasher.update(elem.second.file_change.old_path.size());
-            hasher.update(elem.second.file_change.old_path);
-            hasher.update(elem.second.file_change.path.size());
-            hasher.update(elem.second.file_change.path);
-
-            hasher.update(elem.second.line_changes.size());
-            for (auto & line_change : elem.second.line_changes)
-            {
-                hasher.update(line_change.sign);
-                hasher.update(line_change.line_number_old);
-                hasher.update(line_change.line_number_new);
-                hasher.update(line_change.indent);
-                hasher.update(line_change.line.size());
-                hasher.update(line_change.line);
-            }
-        }
-
-        UInt128 hash_of_diff;
-        hasher.get128(hash_of_diff.low, hash_of_diff.high);
-
-        if (!diff_hashes.insert(hash_of_diff).second)
-            return;
-    }
-
-    /// Update snapshot and blame info
-
     for (auto & elem : file_changes)
     {
 //        std::cerr << elem.first << "\n";
@@ -928,47 +693,379 @@ void processCommit(
             }
         }
     }
+}
 
-    /// Write the result
 
-    /// commits table
+/** Deduplication of commits with identical diffs.
+  */
+using DiffHashes = std::unordered_set<UInt128>;
+
+UInt128 diffHash(const CommitDiff & file_changes)
+{
+    SipHash hasher;
+
+    for (auto & elem : file_changes)
     {
-        auto & out = result.commits;
+        hasher.update(elem.second.file_change.change_type);
+        hasher.update(elem.second.file_change.old_path.size());
+        hasher.update(elem.second.file_change.old_path);
+        hasher.update(elem.second.file_change.path.size());
+        hasher.update(elem.second.file_change.path);
 
-        commit.writeTextWithoutNewline(out);
-        writeChar('\n', out);
+        hasher.update(elem.second.line_changes.size());
+        for (auto & line_change : elem.second.line_changes)
+        {
+            hasher.update(line_change.sign);
+            hasher.update(line_change.line_number_old);
+            hasher.update(line_change.line_number_new);
+            hasher.update(line_change.indent);
+            hasher.update(line_change.line.size());
+            hasher.update(line_change.line);
+        }
     }
 
-    for (const auto & elem : file_changes)
+    UInt128 hash_of_diff;
+    hasher.get128(hash_of_diff.low, hash_of_diff.high);
+
+    return hash_of_diff;
+}
+
+
+/** File changes in form
+  * :100644 100644 b90fe6bb94 3ffe4c380f M  src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+  * :100644 100644 828dedf6b5 828dedf6b5 R100       dbms/src/Functions/GeoUtils.h   dbms/src/Functions/PolygonUtils.h
+  * according to the output of 'git show --raw'
+  */
+void processFileChanges(
+    ReadBuffer & in,
+    const Options & options,
+    Commit & commit,
+    CommitDiff & file_changes)
+{
+    while (checkChar(':', in))
     {
-        const FileChange & file_change = elem.second.file_change;
+        FileChange file_change;
 
-        /// file_changes table
+        /// We don't care about file mode and content hashes.
+        for (size_t i = 0; i < 4; ++i)
         {
-            auto & out = result.file_changes;
-
-            file_change.writeTextWithoutNewline(out);
-            writeChar('\t', out);
-            commit.writeTextWithoutNewline(out);
-            writeChar('\n', out);
+            skipUntilWhitespace(in);
+            skipWhitespaceIfAny(in);
         }
 
-        /// line_changes table
-        for (const auto & line_change : elem.second.line_changes)
-        {
-            auto & out = result.line_changes;
+        char change_type;
+        readChar(change_type, in);
 
-            line_change.writeTextWithoutNewline(out);
-            writeChar('\t', out);
-            file_change.writeTextWithoutNewline(out);
-            writeChar('\t', out);
-            commit.writeTextWithoutNewline(out);
-            writeChar('\n', out);
+        /// For rename and copy there is a number called "score". We ignore it.
+        int score;
+
+        switch (change_type)
+        {
+            case 'A':
+                file_change.change_type = FileChangeType::Add;
+                ++commit.files_added;
+                break;
+            case 'D':
+                file_change.change_type = FileChangeType::Delete;
+                ++commit.files_deleted;
+                break;
+            case 'M':
+                file_change.change_type = FileChangeType::Modify;
+                ++commit.files_modified;
+                break;
+            case 'R':
+                file_change.change_type = FileChangeType::Rename;
+                ++commit.files_renamed;
+                readText(score, in);
+                break;
+            case 'C':
+                file_change.change_type = FileChangeType::Copy;
+                readText(score, in);
+                break;
+            case 'T':
+                file_change.change_type = FileChangeType::Type;
+                break;
+            default:
+                throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected file change type: {}", change_type);
+        }
+
+        skipWhitespaceIfAny(in);
+
+        if (change_type == 'R' || change_type == 'C')
+        {
+            readText(file_change.old_path, in);
+            skipWhitespaceIfAny(in);
+            readText(file_change.path, in);
+        }
+        else
+        {
+            readText(file_change.path, in);
+        }
+
+        file_change.file_extension = std::filesystem::path(file_change.path).extension();
+        /// It gives us extension in form of '.cpp'. There is a reason for it but we remove initial dot for simplicity.
+        if (!file_change.file_extension.empty() && file_change.file_extension.front() == '.')
+            file_change.file_extension = file_change.file_extension.substr(1, std::string::npos);
+
+        assertChar('\n', in);
+
+        if (!(options.skip_paths && re2_st::RE2::PartialMatch(file_change.path, *options.skip_paths)))
+        {
+            file_changes.emplace(
+                file_change.path,
+                FileDiff(file_change));
         }
     }
 }
 
 
+/** Process the list of diffs for every file from the result of "git show".
+  * Caveats:
+  * - changes in binary files can be ignored;
+  * - if a line content begins with '+' or '-' it will be skipped
+  *   it means that if you store diffs in repository and "git show" will display diff-of-diff for you,
+  *   it won't be processed correctly;
+  * - we expect some specific format of the diff; but it may actually depend on git config;
+  * - non-ASCII file names are not processed correctly (they will not be found and will be ignored).
+  */
+void processDiffs(
+    ReadBuffer & in,
+    std::optional<size_t> size_limit,
+    Commit & commit,
+    CommitDiff & file_changes)
+{
+    std::string old_file_path;
+    std::string new_file_path;
+    FileDiff * file_change_and_line_changes = nullptr;
+    LineChange line_change;
+
+    /// Diffs for every file in form of
+    /// --- a/src/Storages/StorageReplicatedMergeTree.cpp
+    /// +++ b/src/Storages/StorageReplicatedMergeTree.cpp
+    /// @@ -1387,2 +1387 @@ bool StorageReplicatedMergeTree::tryExecuteMerge(const LogEntry & entry)
+    /// -            table_lock, entry.create_time, reserved_space, entry.deduplicate,
+    /// -            entry.force_ttl);
+    /// +            table_lock, entry.create_time, reserved_space, entry.deduplicate);
+
+    size_t diff_size = 0;
+    while (!in.eof())
+    {
+        if (checkString("@@ ", in))
+        {
+            if (!file_change_and_line_changes)
+            {
+                auto file_name = new_file_path.empty() ? old_file_path : new_file_path;
+                auto it = file_changes.find(file_name);
+                if (file_changes.end() != it)
+                    file_change_and_line_changes = &it->second;
+            }
+
+            if (file_change_and_line_changes)
+            {
+                uint32_t old_lines = 1;
+                uint32_t new_lines = 1;
+
+                assertChar('-', in);
+                readText(line_change.hunk_start_line_number_old, in);
+                if (checkChar(',', in))
+                    readText(old_lines, in);
+
+                assertString(" +", in);
+                readText(line_change.hunk_start_line_number_new, in);
+                if (checkChar(',', in))
+                    readText(new_lines, in);
+
+                /// This is needed to simplify the logic of updating snapshot:
+                /// When all lines are removed we can treat it as repeated removal of line with number 1.
+                if (line_change.hunk_start_line_number_new == 0)
+                    line_change.hunk_start_line_number_new = 1;
+
+                assertString(" @@", in);
+                if (checkChar(' ', in))
+                    readStringUntilNextLine(line_change.hunk_context, in);
+                else
+                    assertChar('\n', in);
+
+                line_change.hunk_lines_added = new_lines;
+                line_change.hunk_lines_deleted = old_lines;
+
+                ++line_change.hunk_num;
+                line_change.line_number_old = line_change.hunk_start_line_number_old;
+                line_change.line_number_new = line_change.hunk_start_line_number_new;
+
+                if (old_lines && new_lines)
+                {
+                    ++commit.hunks_changed;
+                    ++file_change_and_line_changes->file_change.hunks_changed;
+                }
+                else if (old_lines)
+                {
+                    ++commit.hunks_removed;
+                    ++file_change_and_line_changes->file_change.hunks_removed;
+                }
+                else if (new_lines)
+                {
+                    ++commit.hunks_added;
+                    ++file_change_and_line_changes->file_change.hunks_added;
+                }
+            }
+        }
+        else if (checkChar('-', in))
+        {
+            if (checkString("-- ", in))
+            {
+                if (checkString("a/", in))
+                {
+                    readStringUntilNextLine(old_file_path, in);
+                    line_change = LineChange{};
+                    file_change_and_line_changes = nullptr;
+                }
+                else if (checkString("/dev/null", in))
+                {
+                    old_file_path.clear();
+                    assertChar('\n', in);
+                    line_change = LineChange{};
+                    file_change_and_line_changes = nullptr;
+                }
+                else
+                    skipUntilNextLine(in); /// Actually it can be the line in diff. Skip it for simplicity.
+            }
+            else
+            {
+                ++diff_size;
+                if (file_change_and_line_changes)
+                {
+                    ++commit.lines_deleted;
+                    ++file_change_and_line_changes->file_change.lines_deleted;
+
+                    line_change.sign = -1;
+                    readStringUntilNextLine(line_change.line, in);
+                    line_change.setLineInfo(line_change.line);
+
+                    file_change_and_line_changes->line_changes.push_back(line_change);
+                    ++line_change.line_number_old;
+                }
+            }
+        }
+        else if (checkChar('+', in))
+        {
+            if (checkString("++ ", in))
+            {
+                if (checkString("b/", in))
+                {
+                    readStringUntilNextLine(new_file_path, in);
+                    line_change = LineChange{};
+                    file_change_and_line_changes = nullptr;
+                }
+                else if (checkString("/dev/null", in))
+                {
+                    new_file_path.clear();
+                    assertChar('\n', in);
+                    line_change = LineChange{};
+                    file_change_and_line_changes = nullptr;
+                }
+                else
+                    skipUntilNextLine(in); /// Actually it can be the line in diff. Skip it for simplicity.
+            }
+            else
+            {
+                ++diff_size;
+                if (file_change_and_line_changes)
+                {
+                    ++commit.lines_added;
+                    ++file_change_and_line_changes->file_change.lines_added;
+
+                    line_change.sign = 1;
+                    readStringUntilNextLine(line_change.line, in);
+                    line_change.setLineInfo(line_change.line);
+
+                    file_change_and_line_changes->line_changes.push_back(line_change);
+                    ++line_change.line_number_new;
+                }
+            }
+        }
+        else
+        {
+            /// Unknown lines are ignored.
+            skipUntilNextLine(in);
+        }
+
+        if (size_limit && diff_size > *size_limit)
+            return;
+    }
+}
+
+
+/** Process the "git show" result for a single commit. Append the result to tables.
+  */
+void processCommit(
+    ReadBuffer & in,
+    const Options & options,
+    size_t commit_num,
+    size_t total_commits,
+    std::string hash,
+    Snapshot & snapshot,
+    DiffHashes & diff_hashes,
+    ResultWriter & result)
+{
+    Commit commit;
+    commit.hash = hash;
+
+    time_t commit_time;
+    readText(commit_time, in);
+    commit.time = commit_time;
+    assertChar('\0', in);
+    readNullTerminated(commit.author, in);
+    std::string parent_hash;
+    readNullTerminated(parent_hash, in);
+    readNullTerminated(commit.message, in);
+
+    if (options.skip_commits_with_messages && re2_st::RE2::PartialMatch(commit.message, *options.skip_commits_with_messages))
+        return;
+
+    std::string message_to_print = commit.message;
+    std::replace_if(message_to_print.begin(), message_to_print.end(), [](char c){ return std::iscntrl(c); }, ' ');
+
+    std::cerr << fmt::format("{}%  {}  {}  {}\n",
+        commit_num * 100 / total_commits, toString(commit.time), hash, message_to_print);
+
+    if (options.skip_commits_without_parents && commit_num != 0 && parent_hash.empty())
+    {
+        std::cerr << "Warning: skipping commit without parents\n";
+        return;
+    }
+
+    if (!in.eof())
+        assertChar('\n', in);
+
+    CommitDiff file_changes;
+    processFileChanges(in, options, commit, file_changes);
+
+    if (!in.eof())
+    {
+        assertChar('\n', in);
+        processDiffs(in, commit_num != 0 ? options.diff_size_limit : std::nullopt, commit, file_changes);
+    }
+
+    /// Skip commits with too large diffs.
+    if (options.diff_size_limit && commit_num != 0 && commit.lines_added + commit.lines_deleted > *options.diff_size_limit)
+        return;
+
+    /// Calculate hash of diff and skip duplicates
+    if (options.skip_commits_with_duplicate_diffs && !diff_hashes.insert(diffHash(file_changes)).second)
+        return;
+
+    /// Update snapshot and blame info
+    updateSnapshot(snapshot, commit, file_changes);
+
+    /// Write the result
+    result.appendCommit(commit, file_changes);
+}
+
+
+/** Runs child process and allows to read the result.
+  * Multiple processes can be run for parallel processing.
+  */
 auto gitShow(const std::string & hash)
 {
     std::string command = fmt::format(
@@ -979,9 +1076,11 @@ auto gitShow(const std::string & hash)
 }
 
 
+/** Obtain the list of commits and process them.
+  */
 void processLog(const Options & options)
 {
-    Result result;
+    ResultWriter result;
 
     std::string command = "git log --reverse --no-merges --pretty=%H";
     fmt::print("{}\n", command);
@@ -1019,7 +1118,7 @@ void processLog(const Options & options)
 
     for (size_t i = 0; i < num_commits; ++i)
     {
-        processCommit(show_commands[i % num_threads], options, i, num_commits, hashes[i], snapshot, diff_hashes, result);
+        processCommit(show_commands[i % num_threads]->out, options, i, num_commits, hashes[i], snapshot, diff_hashes, result);
 
         if (!options.stop_after_commit.empty() && hashes[i] == options.stop_after_commit)
             break;

From 94d49e4197b443a6bced0ac0d137ad646c1c1946 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 7 Sep 2020 03:18:15 +0300
Subject: [PATCH 077/298] Minor modifications

---
 utils/git-to-clickhouse/git-to-clickhouse.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/utils/git-to-clickhouse/git-to-clickhouse.cpp b/utils/git-to-clickhouse/git-to-clickhouse.cpp
index 6e43853d6ba..2add6813008 100644
--- a/utils/git-to-clickhouse/git-to-clickhouse.cpp
+++ b/utils/git-to-clickhouse/git-to-clickhouse.cpp
@@ -120,7 +120,9 @@ CREATE TABLE git.line_changes
     commit_hunks_changed UInt32
 ) ENGINE = MergeTree ORDER BY time;
 
-Insert the data with the following commands:
+Run the tool.
+
+Then insert the data with the following commands:
 
 clickhouse-client --query "INSERT INTO git.commits FORMAT TSV" < commits.tsv
 clickhouse-client --query "INSERT INTO git.file_changes FORMAT TSV" < file_changes.tsv

From 47ca6211604c6fcb7b2c4e137d739ebff88da975 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 7 Sep 2020 03:25:06 +0300
Subject: [PATCH 078/298] Minor modifications

---
 utils/git-to-clickhouse/git-to-clickhouse.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/utils/git-to-clickhouse/git-to-clickhouse.cpp b/utils/git-to-clickhouse/git-to-clickhouse.cpp
index 2add6813008..875da3ba0ac 100644
--- a/utils/git-to-clickhouse/git-to-clickhouse.cpp
+++ b/utils/git-to-clickhouse/git-to-clickhouse.cpp
@@ -993,7 +993,12 @@ void processDiffs(
         }
 
         if (size_limit && diff_size > *size_limit)
+        {
+            /// Drain to avoid "broken pipe" error in child process.
+            while (!in.eof())
+                in.ignore(in.available());
             return;
+        }
     }
 }
 

From 6e0afbecf4fd0ccd04e9dbb82bff6a507545e8d1 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 7 Sep 2020 04:02:35 +0300
Subject: [PATCH 079/298] Minor modifications

---
 utils/git-to-clickhouse/git-to-clickhouse.cpp | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/utils/git-to-clickhouse/git-to-clickhouse.cpp b/utils/git-to-clickhouse/git-to-clickhouse.cpp
index 875da3ba0ac..b5488b0d69a 100644
--- a/utils/git-to-clickhouse/git-to-clickhouse.cpp
+++ b/utils/git-to-clickhouse/git-to-clickhouse.cpp
@@ -138,6 +138,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int INCORRECT_DATA;
+    extern const int CHILD_WAS_NOT_EXITED_NORMALLY;
 }
 
 
@@ -994,9 +995,6 @@ void processDiffs(
 
         if (size_limit && diff_size > *size_limit)
         {
-            /// Drain to avoid "broken pipe" error in child process.
-            while (!in.eof())
-                in.ignore(in.available());
             return;
         }
     }
@@ -1127,6 +1125,19 @@ void processLog(const Options & options)
     {
         processCommit(show_commands[i % num_threads]->out, options, i, num_commits, hashes[i], snapshot, diff_hashes, result);
 
+        try
+        {
+            show_commands[i % num_threads]->wait();
+        }
+        catch (const Exception & e)
+        {
+            /// For broken pipe when we stopped reading prematurally.
+            if (e.code() == ErrorCodes::CHILD_WAS_NOT_EXITED_NORMALLY)
+                std::cerr << getCurrentExceptionMessage(false) << "\n";
+            else
+                throw;
+        }
+
         if (!options.stop_after_commit.empty() && hashes[i] == options.stop_after_commit)
             break;
 

From 69ce9e1f7020df985d7ea6ee450bf0d4b3438a0d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 7 Sep 2020 05:36:54 +0300
Subject: [PATCH 080/298] More documentation

---
 utils/git-to-clickhouse/git-to-clickhouse.cpp | 45 +++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/utils/git-to-clickhouse/git-to-clickhouse.cpp b/utils/git-to-clickhouse/git-to-clickhouse.cpp
index b5488b0d69a..d3b6f77d3d7 100644
--- a/utils/git-to-clickhouse/git-to-clickhouse.cpp
+++ b/utils/git-to-clickhouse/git-to-clickhouse.cpp
@@ -27,6 +27,51 @@
 
 
 static constexpr auto documentation = R"(
+A tool to extract information from Git repository for analytics.
+
+It dumps the data for the following tables:
+- commits - commits with statistics;
+- file_changes - files changed in every commit with the info about the change and statistics;
+- line_changes - every changed line in every changed file in every commit with full info about the line and the information about previous change of this line.
+
+The largest and the most important table is "line_changes".
+
+Allows to answer questions like:
+- list files with maximum number of authors;
+- show me the oldest lines of code in the repository;
+- show me the files with longest history;
+- list favorite files for author;
+- list largest files with lowest number of authors;
+- at what weekday the code has highest chance to stay in repository;
+- the distribution of code age across repository;
+- files sorted by average code age;
+- quickly show file with blame info (rough);
+- commits and lines of code distribution by time; by weekday, by author; for specific subdirectories;
+- show history for every subdirectory, file, line of file, the number of changes (lines and commits) across time; how the number of contributors was changed across time;
+- list files with most modifications;
+- list files that were rewritten most number of time or by most of authors;
+- what is percentage of code removal by other authors, across authors;
+- the matrix of authors that shows what authors tends to rewrite another authors code;
+- what is the worst time to write code in sense that the code has highest chance to be rewritten;
+- the average time before code will be rewritten and the median (half-life of code decay);
+- comments/code percentage change in time / by author / by location;
+- who tend to write more tests / cpp code / comments.
+
+The data is intended for analytical purposes. It can be imprecise by many reasons but it should be good enough for its purpose.
+
+The data is not intended to provide any conclusions for managers, it is especially counter-indicative for any kinds of "performance review". Instead you can spend multiple days looking at various interesting statistics.
+
+Run this tool inside your git repository. It will create .tsv files that can be loaded into ClickHouse (or into other DBMS if you dare).
+
+The tool can process large enough repositories in a reasonable time.
+It has been tested on:
+- ClickHouse: 31 seconds; 3 million rows;
+- LLVM: 8 minues; 62 million rows;
+- Linux - 12 minutes; 85 million rows;
+- Chromium - 67 minutes; 343 million rows;
+(the numbers as of Sep 2020)
+
+
 Prepare the database by executing the following queries:
 
 DROP DATABASE IF EXISTS git;

From 1dc48f66710c5a93e5376320ea7cf3c4a18046d5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 7 Sep 2020 05:39:08 +0300
Subject: [PATCH 081/298] Better help

---
 utils/git-to-clickhouse/git-to-clickhouse.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/utils/git-to-clickhouse/git-to-clickhouse.cpp b/utils/git-to-clickhouse/git-to-clickhouse.cpp
index d3b6f77d3d7..6ef82ac3b6b 100644
--- a/utils/git-to-clickhouse/git-to-clickhouse.cpp
+++ b/utils/git-to-clickhouse/git-to-clickhouse.cpp
@@ -13,6 +13,7 @@
 
 #include <boost/program_options.hpp>
 
+#include <Common/TerminalSize.h>
 #include <Common/Exception.h>
 #include <Common/SipHash.h>
 #include <Common/StringUtils/StringUtils.h>
@@ -1199,7 +1200,7 @@ try
 {
     using namespace DB;
 
-    po::options_description desc("Allowed options");
+    po::options_description desc("Allowed options", getTerminalWidth());
     desc.add_options()
         ("help,h", "produce help message")
         ("skip-commits-without-parents", po::value<bool>()->default_value(true),

From 1400bdbf83c9ebf6e63eeda73966b7e7c0210d80 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 7 Sep 2020 06:11:35 +0300
Subject: [PATCH 082/298] Fix unit tests

---
 src/Common/ShellCommand.cpp                   | 23 +++++++++++++++----
 utils/git-to-clickhouse/git-to-clickhouse.cpp | 13 -----------
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/src/Common/ShellCommand.cpp b/src/Common/ShellCommand.cpp
index 127f95fef06..bbb8801f190 100644
--- a/src/Common/ShellCommand.cpp
+++ b/src/Common/ShellCommand.cpp
@@ -57,7 +57,16 @@ ShellCommand::~ShellCommand()
             LOG_WARNING(getLogger(), "Cannot kill shell command pid {} errno '{}'", pid, errnoToString(retcode));
     }
     else if (!wait_called)
-        tryWait();
+    {
+        try
+        {
+            tryWait();
+        }
+        catch (...)
+        {
+            tryLogCurrentException(getLogger());
+        }
+    }
 }
 
 void ShellCommand::logCommand(const char * filename, char * const argv[])
@@ -74,7 +83,8 @@ void ShellCommand::logCommand(const char * filename, char * const argv[])
     LOG_TRACE(ShellCommand::getLogger(), "Will start shell command '{}' with arguments {}", filename, args.str());
 }
 
-std::unique_ptr<ShellCommand> ShellCommand::executeImpl(const char * filename, char * const argv[], bool pipe_stdin_only, bool terminate_in_destructor)
+std::unique_ptr<ShellCommand> ShellCommand::executeImpl(
+    const char * filename, char * const argv[], bool pipe_stdin_only, bool terminate_in_destructor)
 {
     logCommand(filename, argv);
 
@@ -130,7 +140,8 @@ std::unique_ptr<ShellCommand> ShellCommand::executeImpl(const char * filename, c
         _exit(int(ReturnCodes::CANNOT_EXEC));
     }
 
-    std::unique_ptr<ShellCommand> res(new ShellCommand(pid, pipe_stdin.fds_rw[1], pipe_stdout.fds_rw[0], pipe_stderr.fds_rw[0], terminate_in_destructor));
+    std::unique_ptr<ShellCommand> res(new ShellCommand(
+        pid, pipe_stdin.fds_rw[1], pipe_stdout.fds_rw[0], pipe_stderr.fds_rw[0], terminate_in_destructor));
 
     LOG_TRACE(getLogger(), "Started shell command '{}' with pid {}", filename, pid);
 
@@ -143,7 +154,8 @@ std::unique_ptr<ShellCommand> ShellCommand::executeImpl(const char * filename, c
 }
 
 
-std::unique_ptr<ShellCommand> ShellCommand::execute(const std::string & command, bool pipe_stdin_only, bool terminate_in_destructor)
+std::unique_ptr<ShellCommand> ShellCommand::execute(
+    const std::string & command, bool pipe_stdin_only, bool terminate_in_destructor)
 {
     /// Arguments in non-constant chunks of memory (as required for `execv`).
     /// Moreover, their copying must be done before calling `vfork`, so after `vfork` do a minimum of things.
@@ -157,7 +169,8 @@ std::unique_ptr<ShellCommand> ShellCommand::execute(const std::string & command,
 }
 
 
-std::unique_ptr<ShellCommand> ShellCommand::executeDirect(const std::string & path, const std::vector<std::string> & arguments, bool terminate_in_destructor)
+std::unique_ptr<ShellCommand> ShellCommand::executeDirect(
+    const std::string & path, const std::vector<std::string> & arguments, bool terminate_in_destructor)
 {
     size_t argv_sum_size = path.size() + 1;
     for (const auto & arg : arguments)
diff --git a/utils/git-to-clickhouse/git-to-clickhouse.cpp b/utils/git-to-clickhouse/git-to-clickhouse.cpp
index 6ef82ac3b6b..a081efa3f47 100644
--- a/utils/git-to-clickhouse/git-to-clickhouse.cpp
+++ b/utils/git-to-clickhouse/git-to-clickhouse.cpp
@@ -1171,19 +1171,6 @@ void processLog(const Options & options)
     {
         processCommit(show_commands[i % num_threads]->out, options, i, num_commits, hashes[i], snapshot, diff_hashes, result);
 
-        try
-        {
-            show_commands[i % num_threads]->wait();
-        }
-        catch (const Exception & e)
-        {
-            /// For broken pipe when we stopped reading prematurally.
-            if (e.code() == ErrorCodes::CHILD_WAS_NOT_EXITED_NORMALLY)
-                std::cerr << getCurrentExceptionMessage(false) << "\n";
-            else
-                throw;
-        }
-
         if (!options.stop_after_commit.empty() && hashes[i] == options.stop_after_commit)
             break;
 

From d18e7adbc03e4e7d7ee268e8f90a14e73be7b021 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 7 Sep 2020 06:22:47 +0300
Subject: [PATCH 083/298] Add git-import as a tool

---
 programs/CMakeLists.txt                        | 18 ++++++++++++++----
 programs/config_tools.h.in                     |  1 +
 programs/git-import/CMakeLists.txt             | 10 ++++++++++
 programs/git-import/clickhouse-git-import.cpp  |  2 ++
 .../git-import/git-import.cpp                  |  4 ++--
 programs/install/Install.cpp                   |  1 +
 programs/main.cpp                              |  6 ++++++
 utils/CMakeLists.txt                           |  1 -
 utils/git-to-clickhouse/CMakeLists.txt         |  2 --
 9 files changed, 36 insertions(+), 9 deletions(-)
 create mode 100644 programs/git-import/CMakeLists.txt
 create mode 100644 programs/git-import/clickhouse-git-import.cpp
 rename utils/git-to-clickhouse/git-to-clickhouse.cpp => programs/git-import/git-import.cpp (99%)
 delete mode 100644 utils/git-to-clickhouse/CMakeLists.txt

diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt
index 89220251cda..ae4a72ef62a 100644
--- a/programs/CMakeLists.txt
+++ b/programs/CMakeLists.txt
@@ -16,6 +16,7 @@ option (ENABLE_CLICKHOUSE_COMPRESSOR "Enable clickhouse-compressor" ${ENABLE_CLI
 option (ENABLE_CLICKHOUSE_COPIER "Enable clickhouse-copier" ${ENABLE_CLICKHOUSE_ALL})
 option (ENABLE_CLICKHOUSE_FORMAT "Enable clickhouse-format" ${ENABLE_CLICKHOUSE_ALL})
 option (ENABLE_CLICKHOUSE_OBFUSCATOR "Enable clickhouse-obfuscator" ${ENABLE_CLICKHOUSE_ALL})
+option (ENABLE_CLICKHOUSE_GIT_IMPORT "Enable clickhouse-git-import" ${ENABLE_CLICKHOUSE_ALL})
 option (ENABLE_CLICKHOUSE_ODBC_BRIDGE "Enable clickhouse-odbc-bridge" ${ENABLE_CLICKHOUSE_ALL})
 
 if (CLICKHOUSE_SPLIT_BINARY)
@@ -91,21 +92,22 @@ add_subdirectory (copier)
 add_subdirectory (format)
 add_subdirectory (obfuscator)
 add_subdirectory (install)
+add_subdirectory (git-import)
 
 if (ENABLE_CLICKHOUSE_ODBC_BRIDGE)
     add_subdirectory (odbc-bridge)
 endif ()
 
 if (CLICKHOUSE_ONE_SHARED)
-    add_library(clickhouse-lib SHARED ${CLICKHOUSE_SERVER_SOURCES} ${CLICKHOUSE_CLIENT_SOURCES} ${CLICKHOUSE_LOCAL_SOURCES} ${CLICKHOUSE_BENCHMARK_SOURCES} ${CLICKHOUSE_COPIER_SOURCES} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_SOURCES} ${CLICKHOUSE_COMPRESSOR_SOURCES} ${CLICKHOUSE_FORMAT_SOURCES} ${CLICKHOUSE_OBFUSCATOR_SOURCES} ${CLICKHOUSE_ODBC_BRIDGE_SOURCES})
-    target_link_libraries(clickhouse-lib ${CLICKHOUSE_SERVER_LINK} ${CLICKHOUSE_CLIENT_LINK} ${CLICKHOUSE_LOCAL_LINK} ${CLICKHOUSE_BENCHMARK_LINK} ${CLICKHOUSE_COPIER_LINK} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_LINK} ${CLICKHOUSE_COMPRESSOR_LINK} ${CLICKHOUSE_FORMAT_LINK} ${CLICKHOUSE_OBFUSCATOR_LINK} ${CLICKHOUSE_ODBC_BRIDGE_LINK})
-    target_include_directories(clickhouse-lib ${CLICKHOUSE_SERVER_INCLUDE} ${CLICKHOUSE_CLIENT_INCLUDE} ${CLICKHOUSE_LOCAL_INCLUDE} ${CLICKHOUSE_BENCHMARK_INCLUDE} ${CLICKHOUSE_COPIER_INCLUDE} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_INCLUDE} ${CLICKHOUSE_COMPRESSOR_INCLUDE} ${CLICKHOUSE_FORMAT_INCLUDE} ${CLICKHOUSE_OBFUSCATOR_INCLUDE} ${CLICKHOUSE_ODBC_BRIDGE_INCLUDE})
+    add_library(clickhouse-lib SHARED ${CLICKHOUSE_SERVER_SOURCES} ${CLICKHOUSE_CLIENT_SOURCES} ${CLICKHOUSE_LOCAL_SOURCES} ${CLICKHOUSE_BENCHMARK_SOURCES} ${CLICKHOUSE_COPIER_SOURCES} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_SOURCES} ${CLICKHOUSE_COMPRESSOR_SOURCES} ${CLICKHOUSE_FORMAT_SOURCES} ${CLICKHOUSE_OBFUSCATOR_SOURCES} ${CLICKHOUSE_GIT_IMPORT_SOURCES} ${CLICKHOUSE_ODBC_BRIDGE_SOURCES})
+    target_link_libraries(clickhouse-lib ${CLICKHOUSE_SERVER_LINK} ${CLICKHOUSE_CLIENT_LINK} ${CLICKHOUSE_LOCAL_LINK} ${CLICKHOUSE_BENCHMARK_LINK} ${CLICKHOUSE_COPIER_LINK} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_LINK} ${CLICKHOUSE_COMPRESSOR_LINK} ${CLICKHOUSE_FORMAT_LINK} ${CLICKHOUSE_OBFUSCATOR_LINK} ${CLICKHOUSE_GIT_IMPORT_LINK} ${CLICKHOUSE_ODBC_BRIDGE_LINK})
+    target_include_directories(clickhouse-lib ${CLICKHOUSE_SERVER_INCLUDE} ${CLICKHOUSE_CLIENT_INCLUDE} ${CLICKHOUSE_LOCAL_INCLUDE} ${CLICKHOUSE_BENCHMARK_INCLUDE} ${CLICKHOUSE_COPIER_INCLUDE} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_INCLUDE} ${CLICKHOUSE_COMPRESSOR_INCLUDE} ${CLICKHOUSE_FORMAT_INCLUDE} ${CLICKHOUSE_OBFUSCATOR_INCLUDE} ${CLICKHOUSE_GIT_IMPORT_INCLUDE} ${CLICKHOUSE_ODBC_BRIDGE_INCLUDE})
     set_target_properties(clickhouse-lib PROPERTIES SOVERSION ${VERSION_MAJOR}.${VERSION_MINOR} VERSION ${VERSION_SO} OUTPUT_NAME clickhouse DEBUG_POSTFIX "")
     install (TARGETS clickhouse-lib LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse)
 endif()
 
 if (CLICKHOUSE_SPLIT_BINARY)
-    set (CLICKHOUSE_ALL_TARGETS clickhouse-server clickhouse-client clickhouse-local clickhouse-benchmark clickhouse-extract-from-config clickhouse-compressor clickhouse-format clickhouse-obfuscator clickhouse-copier)
+    set (CLICKHOUSE_ALL_TARGETS clickhouse-server clickhouse-client clickhouse-local clickhouse-benchmark clickhouse-extract-from-config clickhouse-compressor clickhouse-format clickhouse-obfuscator clickhouse-git-import clickhouse-copier)
 
     if (ENABLE_CLICKHOUSE_ODBC_BRIDGE)
         list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-odbc-bridge)
@@ -149,6 +151,9 @@ else ()
     if (ENABLE_CLICKHOUSE_OBFUSCATOR)
         clickhouse_target_link_split_lib(clickhouse obfuscator)
     endif ()
+    if (ENABLE_CLICKHOUSE_GIT_IMPORT)
+        clickhouse_target_link_split_lib(clickhouse git-import)
+    endif ()
     if (ENABLE_CLICKHOUSE_INSTALL)
         clickhouse_target_link_split_lib(clickhouse install)
     endif ()
@@ -199,6 +204,11 @@ else ()
         install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-obfuscator DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
         list(APPEND CLICKHOUSE_BUNDLE clickhouse-obfuscator)
     endif ()
+    if (ENABLE_CLICKHOUSE_GIT_IMPORT)
+        add_custom_target (clickhouse-git-import ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-git-import DEPENDS clickhouse)
+        install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-git-import DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
+        list(APPEND CLICKHOUSE_BUNDLE clickhouse-git-import)
+    endif ()
     if(ENABLE_CLICKHOUSE_ODBC_BRIDGE)
         list(APPEND CLICKHOUSE_BUNDLE clickhouse-odbc-bridge)
     endif()
diff --git a/programs/config_tools.h.in b/programs/config_tools.h.in
index 11386aca60e..7cb5a6d883a 100644
--- a/programs/config_tools.h.in
+++ b/programs/config_tools.h.in
@@ -12,5 +12,6 @@
 #cmakedefine01 ENABLE_CLICKHOUSE_COMPRESSOR
 #cmakedefine01 ENABLE_CLICKHOUSE_FORMAT
 #cmakedefine01 ENABLE_CLICKHOUSE_OBFUSCATOR
+#cmakedefine01 ENABLE_CLICKHOUSE_GIT_IMPORT
 #cmakedefine01 ENABLE_CLICKHOUSE_INSTALL
 #cmakedefine01 ENABLE_CLICKHOUSE_ODBC_BRIDGE
diff --git a/programs/git-import/CMakeLists.txt b/programs/git-import/CMakeLists.txt
new file mode 100644
index 00000000000..279bb35a272
--- /dev/null
+++ b/programs/git-import/CMakeLists.txt
@@ -0,0 +1,10 @@
+set (CLICKHOUSE_GIT_IMPORT_SOURCES git-import.cpp)
+
+set (CLICKHOUSE_GIT_IMPORT_LINK
+    PRIVATE
+        boost::program_options
+        dbms
+)
+
+clickhouse_program_add(git-import)
+
diff --git a/programs/git-import/clickhouse-git-import.cpp b/programs/git-import/clickhouse-git-import.cpp
new file mode 100644
index 00000000000..cfa06306604
--- /dev/null
+++ b/programs/git-import/clickhouse-git-import.cpp
@@ -0,0 +1,2 @@
+int mainEntryClickHouseGitImport(int argc, char ** argv);
+int main(int argc_, char ** argv_) { return mainEntryClickHouseGitImport(argc_, argv_); }
diff --git a/utils/git-to-clickhouse/git-to-clickhouse.cpp b/programs/git-import/git-import.cpp
similarity index 99%
rename from utils/git-to-clickhouse/git-to-clickhouse.cpp
rename to programs/git-import/git-import.cpp
index a081efa3f47..f1ed4d28c6e 100644
--- a/utils/git-to-clickhouse/git-to-clickhouse.cpp
+++ b/programs/git-import/git-import.cpp
@@ -1182,7 +1182,7 @@ void processLog(const Options & options)
 
 }
 
-int main(int argc, char ** argv)
+int mainEntryClickHouseGitImport(int argc, char ** argv)
 try
 {
     using namespace DB;
@@ -1219,7 +1219,7 @@ try
             << "Usage: " << argv[0] << '\n'
             << desc << '\n'
             << "\nExample:\n"
-            << "\n./git-to-clickhouse --skip-paths 'generated\\.cpp|^(contrib|docs?|website|libs/(libcityhash|liblz4|libdivide|libvectorclass|libdouble-conversion|libcpuid|libzstd|libfarmhash|libmetrohash|libpoco|libwidechar_width))/' --skip-commits-with-messages '^Merge branch '\n";
+            << "\nclickhouse git-import --skip-paths 'generated\\.cpp|^(contrib|docs?|website|libs/(libcityhash|liblz4|libdivide|libvectorclass|libdouble-conversion|libcpuid|libzstd|libfarmhash|libmetrohash|libpoco|libwidechar_width))/' --skip-commits-with-messages '^Merge branch '\n";
         return 1;
     }
 
diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp
index 7b7ab149447..bd60fbb63ba 100644
--- a/programs/install/Install.cpp
+++ b/programs/install/Install.cpp
@@ -205,6 +205,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
             "clickhouse-benchmark",
             "clickhouse-copier",
             "clickhouse-obfuscator",
+            "clickhouse-git-import",
             "clickhouse-compressor",
             "clickhouse-format",
             "clickhouse-extract-from-config"
diff --git a/programs/main.cpp b/programs/main.cpp
index 3df5f9f683b..b91bd732f21 100644
--- a/programs/main.cpp
+++ b/programs/main.cpp
@@ -46,6 +46,9 @@ int mainEntryClickHouseClusterCopier(int argc, char ** argv);
 #if ENABLE_CLICKHOUSE_OBFUSCATOR
 int mainEntryClickHouseObfuscator(int argc, char ** argv);
 #endif
+#if ENABLE_CLICKHOUSE_GIT_IMPORT
+int mainEntryClickHouseGitImport(int argc, char ** argv);
+#endif
 #if ENABLE_CLICKHOUSE_INSTALL
 int mainEntryClickHouseInstall(int argc, char ** argv);
 int mainEntryClickHouseStart(int argc, char ** argv);
@@ -91,6 +94,9 @@ std::pair<const char *, MainFunc> clickhouse_applications[] =
 #if ENABLE_CLICKHOUSE_OBFUSCATOR
     {"obfuscator", mainEntryClickHouseObfuscator},
 #endif
+#if ENABLE_CLICKHOUSE_GIT_IMPORT
+    {"git-import", mainEntryClickHouseGitImport},
+#endif
 #if ENABLE_CLICKHOUSE_INSTALL
     {"install", mainEntryClickHouseInstall},
     {"start", mainEntryClickHouseStart},
diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt
index 93490fba565..b4408a298c3 100644
--- a/utils/CMakeLists.txt
+++ b/utils/CMakeLists.txt
@@ -30,7 +30,6 @@ if (NOT DEFINED ENABLE_UTILS OR ENABLE_UTILS)
     add_subdirectory (checksum-for-compressed-block)
     add_subdirectory (db-generator)
     add_subdirectory (wal-dump)
-    add_subdirectory (git-to-clickhouse)
 endif ()
 
 if (ENABLE_CODE_QUALITY)
diff --git a/utils/git-to-clickhouse/CMakeLists.txt b/utils/git-to-clickhouse/CMakeLists.txt
deleted file mode 100644
index 0e46b68d471..00000000000
--- a/utils/git-to-clickhouse/CMakeLists.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-add_executable (git-to-clickhouse git-to-clickhouse.cpp)
-target_link_libraries(git-to-clickhouse PRIVATE dbms boost::program_options)

From ee54971c3d26ca1219da4909bd30f44bee77fd97 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 7 Sep 2020 07:11:03 +0300
Subject: [PATCH 084/298] Fix build

---
 programs/git-import/git-import.cpp | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/programs/git-import/git-import.cpp b/programs/git-import/git-import.cpp
index f1ed4d28c6e..d314969a1a8 100644
--- a/programs/git-import/git-import.cpp
+++ b/programs/git-import/git-import.cpp
@@ -184,7 +184,6 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int INCORRECT_DATA;
-    extern const int CHILD_WAS_NOT_EXITED_NORMALLY;
 }
 
 
@@ -419,7 +418,7 @@ using LineChanges = std::vector<LineChange>;
 
 struct FileDiff
 {
-    FileDiff(FileChange file_change_) : file_change(file_change_) {}
+    explicit FileDiff(FileChange file_change_) : file_change(file_change_) {}
 
     FileChange file_change;
     LineChanges line_changes;
@@ -546,7 +545,7 @@ struct Options
     std::optional<size_t> diff_size_limit;
     std::string stop_after_commit;
 
-    Options(const po::variables_map & options)
+    explicit Options(const po::variables_map & options)
     {
         skip_commits_without_parents = options["skip-commits-without-parents"].as<bool>();
         skip_commits_with_duplicate_diffs = options["skip-commits-with-duplicate-diffs"].as<bool>();
@@ -753,7 +752,7 @@ UInt128 diffHash(const CommitDiff & file_changes)
 {
     SipHash hasher;
 
-    for (auto & elem : file_changes)
+    for (const auto & elem : file_changes)
     {
         hasher.update(elem.second.file_change.change_type);
         hasher.update(elem.second.file_change.old_path.size());
@@ -762,7 +761,7 @@ UInt128 diffHash(const CommitDiff & file_changes)
         hasher.update(elem.second.file_change.path);
 
         hasher.update(elem.second.line_changes.size());
-        for (auto & line_change : elem.second.line_changes)
+        for (const auto & line_change : elem.second.line_changes)
         {
             hasher.update(line_change.sign);
             hasher.update(line_change.line_number_old);
@@ -1159,6 +1158,8 @@ void processLog(const Options & options)
 
     /// Will run multiple processes in parallel
     size_t num_threads = options.threads;
+    if (num_threads == 0)
+        throw Exception("num-threads cannot be zero", ErrorCodes::INCORRECT_DATA);
 
     std::vector<std::unique_ptr<ShellCommand>> show_commands(num_threads);
     for (size_t i = 0; i < num_commits && i < num_threads; ++i)
@@ -1223,7 +1224,7 @@ try
         return 1;
     }
 
-    processLog(options);
+    processLog(Options(options));
     return 0;
 }
 catch (...)

From 04a69650068c3ff5967f3639c55082dbd34017cf Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 7 Sep 2020 09:40:01 +0300
Subject: [PATCH 085/298] Fix error with executable dictionary source

---
 docker/test/fasttest/run.sh                   |   3 +-
 docker/test/stateless/run.sh                  |   1 +
 docker/test/stateless_unbundled/run.sh        |   1 +
 docker/test/stateless_with_coverage/run.sh    |   1 +
 src/Common/tests/CMakeLists.txt               |   3 +
 src/Common/tests/shell_command_inout.cpp      |  47 +++++++
 .../ExecutableDictionarySource.cpp            | 119 ++++++++++--------
 src/Dictionaries/ExecutableDictionarySource.h |   1 +
 tests/config/executable_dictionary.xml        | 108 ++++++++++++++++
 .../01474_executable_dictionary.reference     |   3 +
 .../01474_executable_dictionary.sql           |   3 +
 11 files changed, 240 insertions(+), 50 deletions(-)
 create mode 100644 src/Common/tests/shell_command_inout.cpp
 create mode 100644 tests/config/executable_dictionary.xml
 create mode 100644 tests/queries/0_stateless/01474_executable_dictionary.reference
 create mode 100644 tests/queries/0_stateless/01474_executable_dictionary.sql

diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh
index 1f8d612a125..9f5a9b05219 100755
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@@ -11,7 +11,7 @@ stage=${stage:-}
 
 # A variable to pass additional flags to CMake.
 # Here we explicitly default it to nothing so that bash doesn't complain about
-# it being undefined. Also read it as array so that we can pass an empty list 
+# it being undefined. Also read it as array so that we can pass an empty list
 # of additional variable to cmake properly, and it doesn't generate an extra
 # empty parameter.
 read -ra FASTTEST_CMAKE_FLAGS <<< "${FASTTEST_CMAKE_FLAGS:-}"
@@ -128,6 +128,7 @@ ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-se
 ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/
 ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/
 ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/
+ln -s /usr/share/clickhouse-test/config/executable_dictionary.xml /etc/clickhouse-server/
 ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config.d/
 ln -s /usr/share/clickhouse-test/config/disks.xml /etc/clickhouse-server/config.d/
 #ln -s /usr/share/clickhouse-test/config/secure_ports.xml /etc/clickhouse-server/config.d/
diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh
index 2ff15ca9c6a..4a9ad891883 100755
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@@ -24,6 +24,7 @@ ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-se
 ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/
 ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/
 ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/
+ln -s /usr/share/clickhouse-test/config/executable_dictionary.xml /etc/clickhouse-server/
 ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config.d/
 ln -s /usr/share/clickhouse-test/config/disks.xml /etc/clickhouse-server/config.d/
 ln -s /usr/share/clickhouse-test/config/secure_ports.xml /etc/clickhouse-server/config.d/
diff --git a/docker/test/stateless_unbundled/run.sh b/docker/test/stateless_unbundled/run.sh
index 2ff15ca9c6a..4a9ad891883 100755
--- a/docker/test/stateless_unbundled/run.sh
+++ b/docker/test/stateless_unbundled/run.sh
@@ -24,6 +24,7 @@ ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-se
 ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/
 ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/
 ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/
+ln -s /usr/share/clickhouse-test/config/executable_dictionary.xml /etc/clickhouse-server/
 ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config.d/
 ln -s /usr/share/clickhouse-test/config/disks.xml /etc/clickhouse-server/config.d/
 ln -s /usr/share/clickhouse-test/config/secure_ports.xml /etc/clickhouse-server/config.d/
diff --git a/docker/test/stateless_with_coverage/run.sh b/docker/test/stateless_with_coverage/run.sh
index 64317ee62fd..c3ccb18659b 100755
--- a/docker/test/stateless_with_coverage/run.sh
+++ b/docker/test/stateless_with_coverage/run.sh
@@ -57,6 +57,7 @@ ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-se
 ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/
 ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/
 ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/
+ln -s /usr/share/clickhouse-test/config/executable_dictionary.xml /etc/clickhouse-server/
 ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config.d/
 ln -s /usr/share/clickhouse-test/config/disks.xml /etc/clickhouse-server/config.d/
 ln -s /usr/share/clickhouse-test/config/secure_ports.xml /etc/clickhouse-server/config.d/
diff --git a/src/Common/tests/CMakeLists.txt b/src/Common/tests/CMakeLists.txt
index f6c232cdd22..8de9424e044 100644
--- a/src/Common/tests/CMakeLists.txt
+++ b/src/Common/tests/CMakeLists.txt
@@ -84,3 +84,6 @@ target_link_libraries (procfs_metrics_provider_perf PRIVATE clickhouse_common_io
 
 add_executable (average average.cpp)
 target_link_libraries (average PRIVATE clickhouse_common_io)
+
+add_executable (shell_command_inout shell_command_inout.cpp)
+target_link_libraries (shell_command_inout PRIVATE clickhouse_common_io)
diff --git a/src/Common/tests/shell_command_inout.cpp b/src/Common/tests/shell_command_inout.cpp
new file mode 100644
index 00000000000..615700cd042
--- /dev/null
+++ b/src/Common/tests/shell_command_inout.cpp
@@ -0,0 +1,47 @@
+#include <thread>
+
+#include <Common/ShellCommand.h>
+#include <Common/Exception.h>
+
+#include <IO/ReadBufferFromFileDescriptor.h>
+#include <IO/WriteBufferFromFileDescriptor.h>
+#include <IO/copyData.h>
+
+/** This example shows how we can proxy stdin to ShellCommand and obtain stdout in streaming fashion. */
+
+int main(int argc, char ** argv)
+try
+{
+    using namespace DB;
+
+    if (argc < 2)
+    {
+        std::cerr << "Usage: shell_command_inout 'command...' < in > out\n";
+        return 1;
+    }
+
+    auto command = ShellCommand::execute(argv[1]);
+
+    ReadBufferFromFileDescriptor in(STDIN_FILENO);
+    WriteBufferFromFileDescriptor out(STDOUT_FILENO);
+    WriteBufferFromFileDescriptor err(STDERR_FILENO);
+
+    /// Background thread sends data and foreground thread receives result.
+
+    std::thread thread([&]
+    {
+        copyData(in, command->in);
+        command->in.close();
+    });
+
+    copyData(command->out, out);
+    copyData(command->err, err);
+
+    thread.join();
+    return 0;
+}
+catch (...)
+{
+    std::cerr << DB::getCurrentExceptionMessage(true) << '\n';
+    throw;
+}
diff --git a/src/Dictionaries/ExecutableDictionarySource.cpp b/src/Dictionaries/ExecutableDictionarySource.cpp
index 918cf0732ab..74aab610e0d 100644
--- a/src/Dictionaries/ExecutableDictionarySource.cpp
+++ b/src/Dictionaries/ExecutableDictionarySource.cpp
@@ -1,12 +1,13 @@
 #include "ExecutableDictionarySource.h"
 
-#include <future>
-#include <thread>
+#include <functional>
 #include <ext/scope_guard.h>
 #include <DataStreams/IBlockOutputStream.h>
 #include <DataStreams/OwningBlockInputStream.h>
 #include <Interpreters/Context.h>
 #include <IO/WriteHelpers.h>
+#include <IO/ReadHelpers.h>
+#include <IO/copyData.h>
 #include <Common/ShellCommand.h>
 #include <Common/ThreadPool.h>
 #include <common/logger_useful.h>
@@ -16,6 +17,7 @@
 #include "DictionaryStructure.h"
 #include "registerDictionaries.h"
 
+
 namespace DB
 {
 static const UInt64 max_block_size = 8192;
@@ -31,15 +33,23 @@ namespace
     /// Owns ShellCommand and calls wait for it.
     class ShellCommandOwningBlockInputStream : public OwningBlockInputStream<ShellCommand>
     {
+    private:
+        Poco::Logger * log;
     public:
-        ShellCommandOwningBlockInputStream(const BlockInputStreamPtr & impl, std::unique_ptr<ShellCommand> own_)
-            : OwningBlockInputStream(std::move(impl), std::move(own_))
+        ShellCommandOwningBlockInputStream(Poco::Logger * log_, const BlockInputStreamPtr & impl, std::unique_ptr<ShellCommand> command_)
+            : OwningBlockInputStream(std::move(impl), std::move(command_)), log(log_)
         {
         }
 
         void readSuffix() override
         {
             OwningBlockInputStream<ShellCommand>::readSuffix();
+
+            std::string err;
+            readStringUntilEOF(err, own->err);
+            if (!err.empty())
+                LOG_ERROR(log, "Having stderr: {}", err);
+
             own->wait();
         }
     };
@@ -80,7 +90,7 @@ BlockInputStreamPtr ExecutableDictionarySource::loadAll()
     LOG_TRACE(log, "loadAll {}", toString());
     auto process = ShellCommand::execute(command);
     auto input_stream = context.getInputFormat(format, process->out, sample_block, max_block_size);
-    return std::make_shared<ShellCommandOwningBlockInputStream>(input_stream, std::move(process));
+    return std::make_shared<ShellCommandOwningBlockInputStream>(log, input_stream, std::move(process));
 }
 
 BlockInputStreamPtr ExecutableDictionarySource::loadUpdatedAll()
@@ -95,67 +105,77 @@ BlockInputStreamPtr ExecutableDictionarySource::loadUpdatedAll()
     LOG_TRACE(log, "loadUpdatedAll {}", command_with_update_field);
     auto process = ShellCommand::execute(command_with_update_field);
     auto input_stream = context.getInputFormat(format, process->out, sample_block, max_block_size);
-    return std::make_shared<ShellCommandOwningBlockInputStream>(input_stream, std::move(process));
+    return std::make_shared<ShellCommandOwningBlockInputStream>(log, input_stream, std::move(process));
 }
 
 namespace
 {
-    /** A stream, that also runs and waits for background thread
-      * (that will feed data into pipe to be read from the other side of the pipe).
+    /** A stream, that runs child process and sends data to its stdin in background thread,
+      *  and receives data from its stdout.
       */
     class BlockInputStreamWithBackgroundThread final : public IBlockInputStream
     {
     public:
         BlockInputStreamWithBackgroundThread(
-            const BlockInputStreamPtr & stream_, std::unique_ptr<ShellCommand> && command_, std::packaged_task<void()> && task_)
-            : stream{stream_}, command{std::move(command_)}, task(std::move(task_)), thread([this] {
-                task();
-                command->in.close();
-            })
+            const Context & context,
+            const std::string & format,
+            const Block & sample_block,
+            const std::string & command_str,
+            Poco::Logger * log_,
+            std::function<void(WriteBufferFromFile &)> && send_data_)
+            : log(log_),
+            command(ShellCommand::execute(command_str)),
+            send_data(std::move(send_data_)),
+            thread([this] { send_data(command->in); })
         {
-            children.push_back(stream);
+            //WriteBufferFromFileDescriptor err(STDERR_FILENO);
+            //copyData(command->out, err);
+            //err.next();
+            //thread.join();
+            stream = context.getInputFormat(format, command->out, sample_block, max_block_size);
         }
 
         ~BlockInputStreamWithBackgroundThread() override
         {
             if (thread.joinable())
-            {
-                try
-                {
-                    readSuffix();
-                }
-                catch (...)
-                {
-                    tryLogCurrentException(__PRETTY_FUNCTION__);
-                }
-            }
+                thread.join();
         }
 
-        Block getHeader() const override { return stream->getHeader(); }
+        Block getHeader() const override
+        {
+            return stream->getHeader();
+        }
 
     private:
-        Block readImpl() override { return stream->read(); }
+        Block readImpl() override
+        {
+            return stream->read();
+        }
+
+        void readPrefix() override
+        {
+            stream->readPrefix();
+        }
 
         void readSuffix() override
         {
-            IBlockInputStream::readSuffix();
-            if (!wait_called)
-            {
-                wait_called = true;
-                command->wait();
-            }
-            thread.join();
-            /// To rethrow an exception, if any.
-            task.get_future().get();
+            stream->readSuffix();
+
+            std::string err;
+            readStringUntilEOF(err, command->err);
+            if (!err.empty())
+                LOG_ERROR(log, "Having stderr: {}", err);
+
+            command->wait();
         }
 
         String getName() const override { return "WithBackgroundThread"; }
 
+        Poco::Logger * log;
         BlockInputStreamPtr stream;
         std::unique_ptr<ShellCommand> command;
-        std::packaged_task<void()> task;
-        ThreadFromGlobalPool thread;
-        bool wait_called = false;
+        std::function<void(WriteBufferFromFile &)> send_data;
+        mutable ThreadFromGlobalPool thread;
     };
 
 }
@@ -164,28 +184,29 @@ namespace
 BlockInputStreamPtr ExecutableDictionarySource::loadIds(const std::vector<UInt64> & ids)
 {
     LOG_TRACE(log, "loadIds {} size = {}", toString(), ids.size());
-    auto process = ShellCommand::execute(command);
-
-    auto output_stream = context.getOutputFormat(format, process->in, sample_block);
-    auto input_stream = context.getInputFormat(format, process->out, sample_block, max_block_size);
 
     return std::make_shared<BlockInputStreamWithBackgroundThread>(
-        input_stream, std::move(process), std::packaged_task<void()>([output_stream, &ids]() mutable { formatIDs(output_stream, ids); }));
+        context, format, sample_block, command, log,
+        [&ids, this](WriteBufferFromFile & out) mutable
+        {
+            auto output_stream = context.getOutputFormat(format, out, sample_block);
+            formatIDs(output_stream, ids);
+            out.close();
+        });
 }
 
 BlockInputStreamPtr ExecutableDictionarySource::loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows)
 {
     LOG_TRACE(log, "loadKeys {} size = {}", toString(), requested_rows.size());
-    auto process = ShellCommand::execute(command);
-
-    auto output_stream = context.getOutputFormat(format, process->in, sample_block);
-    auto input_stream = context.getInputFormat(format, process->out, sample_block, max_block_size);
 
     return std::make_shared<BlockInputStreamWithBackgroundThread>(
-        input_stream, std::move(process), std::packaged_task<void()>([output_stream, key_columns, &requested_rows, this]() mutable
+        context, format, sample_block, command, log,
+        [key_columns, &requested_rows, this](WriteBufferFromFile & out) mutable
         {
+            auto output_stream = context.getOutputFormat(format, out, sample_block);
             formatKeys(dict_struct, output_stream, key_columns, requested_rows);
-        }));
+            out.close();
+        });
 }
 
 bool ExecutableDictionarySource::isModified() const
diff --git a/src/Dictionaries/ExecutableDictionarySource.h b/src/Dictionaries/ExecutableDictionarySource.h
index f28d71ca5e3..b2aabf26323 100644
--- a/src/Dictionaries/ExecutableDictionarySource.h
+++ b/src/Dictionaries/ExecutableDictionarySource.h
@@ -14,6 +14,7 @@ namespace DB
 /// Allows loading dictionaries from executable
 class ExecutableDictionarySource final : public IDictionarySource
 {
+    friend class BlockInputStreamWithBackgroundThread;
 public:
     ExecutableDictionarySource(
         const DictionaryStructure & dict_struct_,
diff --git a/tests/config/executable_dictionary.xml b/tests/config/executable_dictionary.xml
new file mode 100644
index 00000000000..50df32e2ec6
--- /dev/null
+++ b/tests/config/executable_dictionary.xml
@@ -0,0 +1,108 @@
+<dictionaries>
+
+<dictionary>
+    <name>executable_complex</name>
+    <source>
+        <executable>
+            <format>JSONEachRow</format>
+            <command>cd /; clickhouse-local --input-format JSONEachRow --output-format JSONEachRow --structure 'x UInt64, y UInt64' --query "SELECT x, y, x + y AS a, x * y AS b FROM table"</command>
+        </executable>
+    </source>
+    <lifetime>0</lifetime>
+    <layout>
+        <complex_key_cache>
+            <size_in_cells>1000</size_in_cells>
+        </complex_key_cache>
+    </layout>
+    <structure>
+        <key>
+            <attribute>
+                <name>x</name>
+                <type>UInt64</type>
+            </attribute>
+            <attribute>
+                <name>y</name>
+                <type>UInt64</type>
+            </attribute>
+        </key>
+        <attribute>
+            <name>a</name>
+            <type>UInt64</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>b</name>
+            <type>UInt64</type>
+            <null_value>0</null_value>
+        </attribute>
+    </structure>
+</dictionary>
+
+<dictionary>
+    <name>executable_simple</name>
+    <source>
+        <executable>
+            <format>JSONEachRow</format>
+            <command>cd /; clickhouse-local --input-format JSONEachRow --output-format JSONEachRow --structure 'x UInt64' --query "SELECT x, x + x AS a, x * x AS b FROM table"</command>
+        </executable>
+    </source>
+    <lifetime>0</lifetime>
+    <layout>
+        <cache>
+            <size_in_cells>1000</size_in_cells>
+        </cache>
+    </layout>
+    <structure>
+        <id>
+            <name>x</name>
+        </id>
+        <attribute>
+            <name>a</name>
+            <type>UInt64</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>b</name>
+            <type>UInt64</type>
+            <null_value>0</null_value>
+        </attribute>
+    </structure>
+</dictionary>
+
+<dictionary>
+    <name>executable_complex_direct</name>
+    <source>
+        <executable>
+            <format>JSONEachRow</format>
+            <command>cd /; clickhouse-local --input-format JSONEachRow --output-format JSONEachRow --structure 'x UInt64, y UInt64' --query "SELECT x, y, x + y AS a, x * y AS b FROM table"</command>
+        </executable>
+    </source>
+    <lifetime>0</lifetime>
+    <layout>
+        <complex_key_direct />
+    </layout>
+    <structure>
+        <key>
+            <attribute>
+                <name>x</name>
+                <type>UInt64</type>
+            </attribute>
+            <attribute>
+                <name>y</name>
+                <type>UInt64</type>
+            </attribute>
+        </key>
+        <attribute>
+            <name>a</name>
+            <type>UInt64</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>b</name>
+            <type>UInt64</type>
+            <null_value>0</null_value>
+        </attribute>
+    </structure>
+</dictionary>
+
+</dictionaries>
diff --git a/tests/queries/0_stateless/01474_executable_dictionary.reference b/tests/queries/0_stateless/01474_executable_dictionary.reference
new file mode 100644
index 00000000000..4d0994b08c3
--- /dev/null
+++ b/tests/queries/0_stateless/01474_executable_dictionary.reference
@@ -0,0 +1,3 @@
+999999	1999998	999998000001
+999999	1999998	999998000001
+999999	1999998	999998000001
diff --git a/tests/queries/0_stateless/01474_executable_dictionary.sql b/tests/queries/0_stateless/01474_executable_dictionary.sql
new file mode 100644
index 00000000000..727cf47f79f
--- /dev/null
+++ b/tests/queries/0_stateless/01474_executable_dictionary.sql
@@ -0,0 +1,3 @@
+SELECT number, dictGet('executable_complex', 'a', (number, number)) AS a, dictGet('executable_complex', 'b', (number, number)) AS b FROM numbers(1000000) WHERE number = 999999;
+SELECT number, dictGet('executable_complex_direct', 'a', (number, number)) AS a, dictGet('executable_complex_direct', 'b', (number, number)) AS b FROM numbers(1000000) WHERE number = 999999;
+SELECT number, dictGet('executable_simple', 'a', number) AS a, dictGet('executable_simple', 'b', number) AS b FROM numbers(1000000) WHERE number = 999999;

From 8dd98f74a5a5cd5c5cba804f96b3349c5f9a2e25 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 7 Sep 2020 09:43:28 +0300
Subject: [PATCH 086/298] Remove debug output

---
 src/Dictionaries/ExecutableDictionarySource.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/Dictionaries/ExecutableDictionarySource.cpp b/src/Dictionaries/ExecutableDictionarySource.cpp
index 74aab610e0d..0709be2420a 100644
--- a/src/Dictionaries/ExecutableDictionarySource.cpp
+++ b/src/Dictionaries/ExecutableDictionarySource.cpp
@@ -128,10 +128,6 @@ namespace
             send_data(std::move(send_data_)),
             thread([this] { send_data(command->in); })
         {
-            //WriteBufferFromFileDescriptor err(STDERR_FILENO);
-            //copyData(command->out, err);
-            //err.next();
-            //thread.join();
             stream = context.getInputFormat(format, command->out, sample_block, max_block_size);
         }
 

From 1f0d2be17adbc292fef91d4b7703d654871cb815 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Mon, 7 Sep 2020 09:44:32 +0300
Subject: [PATCH 087/298] Update ExecutableDictionarySource.h

---
 src/Dictionaries/ExecutableDictionarySource.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Dictionaries/ExecutableDictionarySource.h b/src/Dictionaries/ExecutableDictionarySource.h
index b2aabf26323..f28d71ca5e3 100644
--- a/src/Dictionaries/ExecutableDictionarySource.h
+++ b/src/Dictionaries/ExecutableDictionarySource.h
@@ -14,7 +14,6 @@ namespace DB
 /// Allows loading dictionaries from executable
 class ExecutableDictionarySource final : public IDictionarySource
 {
-    friend class BlockInputStreamWithBackgroundThread;
 public:
     ExecutableDictionarySource(
         const DictionaryStructure & dict_struct_,

From 3903794386c32d1894fa266d760eed07419a1d54 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Mon, 7 Sep 2020 09:45:01 +0300
Subject: [PATCH 088/298] Update ExecutableDictionarySource.cpp

---
 src/Dictionaries/ExecutableDictionarySource.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Dictionaries/ExecutableDictionarySource.cpp b/src/Dictionaries/ExecutableDictionarySource.cpp
index 0709be2420a..cc250727261 100644
--- a/src/Dictionaries/ExecutableDictionarySource.cpp
+++ b/src/Dictionaries/ExecutableDictionarySource.cpp
@@ -171,7 +171,7 @@ namespace
         BlockInputStreamPtr stream;
         std::unique_ptr<ShellCommand> command;
         std::function<void(WriteBufferFromFile &)> send_data;
-        mutable ThreadFromGlobalPool thread;
+        ThreadFromGlobalPool thread;
     };
 
 }

From d666d4c4497e90901e47b1c09c3f730f90f4c7c4 Mon Sep 17 00:00:00 2001
From: zhangshengyu <zhangshengyu@taptap.com>
Date: Mon, 7 Sep 2020 15:00:47 +0800
Subject: [PATCH 089/298] fix zh translate

---
 docs/zh/guides/apply-catboost-model.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/zh/guides/apply-catboost-model.md b/docs/zh/guides/apply-catboost-model.md
index be21c372307..3657a947ad2 100644
--- a/docs/zh/guides/apply-catboost-model.md
+++ b/docs/zh/guides/apply-catboost-model.md
@@ -15,7 +15,7 @@ toc_title: "\u5E94\u7528CatBoost\u6A21\u578B"
 
 1.  [创建表](#create-table).
 2.  [将数据插入到表中](#insert-data-to-table).
-3.  [碌莽禄into拢Integrate010-68520682\<url\>](#integrate-catboost-into-clickhouse) （可选步骤）。
+3.  [将CatBoost集成到ClickHouse中](#integrate-catboost-into-clickhouse) （可选步骤）。
 4.  [从SQL运行模型推理](#run-model-inference).
 
 有关训练CatBoost模型的详细信息，请参阅 [培训和应用模型](https://catboost.ai/docs/features/training.html#training).
@@ -119,12 +119,12 @@ FROM amazon_train
 +-------+
 ```
 
-## 3. 碌莽禄into拢Integrate010-68520682\<url\> {#integrate-catboost-into-clickhouse}
+## 3. 将CatBoost集成到ClickHouse中 {#integrate-catboost-into-clickhouse}
 
 !!! note "注"
     **可选步骤。** Docker映像包含运行CatBoost和ClickHouse所需的所有内容。
 
-碌莽禄to拢integrate010-68520682\<url\>:
+CatBoost集成到ClickHouse步骤:
 
 **1.** 构建评估库。
 

From 3942cc615f03ecb8e5b9e7437fdc5c57613c245d Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Mon, 7 Sep 2020 10:09:42 +0300
Subject: [PATCH 090/298] Update git-import.cpp

---
 programs/git-import/git-import.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/programs/git-import/git-import.cpp b/programs/git-import/git-import.cpp
index d314969a1a8..45bc47348e7 100644
--- a/programs/git-import/git-import.cpp
+++ b/programs/git-import/git-import.cpp
@@ -9,7 +9,7 @@
 #include <thread>
 #include <filesystem>
 
-#include <re2_st/re2.h>
+#include <re2/re2.h>
 
 #include <boost/program_options.hpp>
 
@@ -539,8 +539,8 @@ struct Options
     bool skip_commits_without_parents = true;
     bool skip_commits_with_duplicate_diffs = true;
     size_t threads = 1;
-    std::optional<re2_st::RE2> skip_paths;
-    std::optional<re2_st::RE2> skip_commits_with_messages;
+    std::optional<re2::RE2> skip_paths;
+    std::optional<re2::RE2> skip_commits_with_messages;
     std::unordered_set<std::string> skip_commits;
     std::optional<size_t> diff_size_limit;
     std::string stop_after_commit;
@@ -857,7 +857,7 @@ void processFileChanges(
 
         assertChar('\n', in);
 
-        if (!(options.skip_paths && re2_st::RE2::PartialMatch(file_change.path, *options.skip_paths)))
+        if (!(options.skip_paths && re2::RE2::PartialMatch(file_change.path, *options.skip_paths)))
         {
             file_changes.emplace(
                 file_change.path,
@@ -1070,7 +1070,7 @@ void processCommit(
     readNullTerminated(parent_hash, in);
     readNullTerminated(commit.message, in);
 
-    if (options.skip_commits_with_messages && re2_st::RE2::PartialMatch(commit.message, *options.skip_commits_with_messages))
+    if (options.skip_commits_with_messages && re2::RE2::PartialMatch(commit.message, *options.skip_commits_with_messages))
         return;
 
     std::string message_to_print = commit.message;

From ba70de63f83431dcb116f0be24be5c5ef0822d23 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 7 Sep 2020 10:28:04 +0300
Subject: [PATCH 091/298] Fix diff

---
 .../MergeTree/MergeTreeDataWriter.cpp         |  5 +--
 .../01465_ttl_recompression.reference         | 27 +++++++++-----
 .../0_stateless/01465_ttl_recompression.sql   | 37 ++++++++++++++++++-
 3 files changed, 55 insertions(+), 14 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index 607535225a2..1d7bf545009 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -235,9 +235,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
     for (const auto & ttl_entry : move_ttl_entries)
         updateTTL(ttl_entry, move_ttl_infos, move_ttl_infos.moves_ttl[ttl_entry.result_column], block, false);
 
-    time_t current_time = time(nullptr);
     NamesAndTypesList columns = metadata_snapshot->getColumns().getAllPhysical().filter(block.getNames());
-    ReservationPtr reservation = data.reserveSpacePreferringTTLRules(expected_size, move_ttl_infos, current_time);
+    ReservationPtr reservation = data.reserveSpacePreferringTTLRules(expected_size, move_ttl_infos, time(nullptr));
     VolumePtr volume = data.getStoragePolicy()->getVolume(0);
 
     auto new_data_part = data.createPart(
@@ -308,7 +307,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
 
     /// This effectively chooses minimal compression method:
     ///  either default lz4 or compression method with zero thresholds on absolute and relative part size.
-    auto compression_codec = data.getCompressionCodecForPart(0, new_data_part->ttl_infos, current_time);
+    auto compression_codec = data.global_context.chooseCompressionCodec(0, 0);
 
     const auto & index_factory = MergeTreeIndexFactory::instance();
     MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, index_factory.getMany(metadata_snapshot->getSecondaryIndices()), compression_codec);
diff --git a/tests/queries/0_stateless/01465_ttl_recompression.reference b/tests/queries/0_stateless/01465_ttl_recompression.reference
index c03c003d5b8..40d7ed8896b 100644
--- a/tests/queries/0_stateless/01465_ttl_recompression.reference
+++ b/tests/queries/0_stateless/01465_ttl_recompression.reference
@@ -1,12 +1,21 @@
-CREATE TABLE default.recompression_table\n(\n    `dt` DateTime,\n    `key` UInt64,\n    `value` String\n)\nENGINE = MergeTree()\nPARTITION BY key\nORDER BY tuple()\nTTL dt + toIntervalMonth(1) RECOMPRESS CODEC(ZSTD(17)), dt + toIntervalYear(1) RECOMPRESS CODEC(LZ4HC(10))\nSETTINGS index_granularity = 8192
+CREATE TABLE default.recompression_table\n(\n    `dt` DateTime,\n    `key` UInt64,\n    `value` String\n)\nENGINE = MergeTree()\nPARTITION BY key\nORDER BY tuple()\nTTL dt + toIntervalMonth(1) RECOMPRESS CODEC(ZSTD(17)), dt + toIntervalYear(1) RECOMPRESS CODEC(LZ4HC(10))\nSETTINGS min_rows_for_wide_part = 0, index_granularity = 8192
 3000
 1_1_1_0	LZ4
-2_2_2_0	ZSTD(17)
-3_3_3_0	LZ4HC(10)
-CREATE TABLE default.recompression_table\n(\n    `dt` DateTime,\n    `key` UInt64,\n    `value` String\n)\nENGINE = MergeTree()\nPARTITION BY key\nORDER BY tuple()\nTTL dt + toIntervalMonth(1) RECOMPRESS CODEC(ZSTD(12))\nSETTINGS index_granularity = 8192
-1_1_1_0_4	LZ4
-2_2_2_0_4	ZSTD(17)
-3_3_3_0_4	LZ4HC(10)
+2_2_2_0	LZ4
+3_3_3_0	LZ4
+1_1_1_1	LZ4
+2_2_2_1	ZSTD(17)
+3_3_3_1	LZ4HC(10)
+CREATE TABLE default.recompression_table\n(\n    `dt` DateTime,\n    `key` UInt64,\n    `value` String\n)\nENGINE = MergeTree()\nPARTITION BY key\nORDER BY tuple()\nTTL dt + toIntervalDay(1) RECOMPRESS CODEC(ZSTD(12))\nSETTINGS min_rows_for_wide_part = 0, index_granularity = 8192
 1_1_1_1_4	LZ4
-2_2_2_1_4	ZSTD(12)
-3_3_3_1_4	ZSTD(12)
+2_2_2_1_4	ZSTD(17)
+3_3_3_1_4	LZ4HC(10)
+1_1_1_2_4	LZ4
+2_2_2_2_4	ZSTD(12)
+3_3_3_2_4	ZSTD(12)
+1_1_1_0	LZ4
+2_2_2_0	LZ4
+3_3_3_0	LZ4
+1_1_1_0_4	LZ4
+2_2_2_0_4	ZSTD(12)
+3_3_3_0_4	ZSTD(12)
diff --git a/tests/queries/0_stateless/01465_ttl_recompression.sql b/tests/queries/0_stateless/01465_ttl_recompression.sql
index 92233f2d5cb..0683f971d5f 100644
--- a/tests/queries/0_stateless/01465_ttl_recompression.sql
+++ b/tests/queries/0_stateless/01465_ttl_recompression.sql
@@ -9,7 +9,8 @@ CREATE TABLE recompression_table
 ) ENGINE MergeTree()
 ORDER BY tuple()
 PARTITION BY key
-TTL dt + INTERVAL 1 MONTH RECOMPRESS CODEC(ZSTD(17)), dt + INTERVAL 1 YEAR RECOMPRESS CODEC(LZ4HC(10));
+TTL dt + INTERVAL 1 MONTH RECOMPRESS CODEC(ZSTD(17)), dt + INTERVAL 1 YEAR RECOMPRESS CODEC(LZ4HC(10))
+SETTINGS min_rows_for_wide_part = 0;
 
 SHOW CREATE TABLE recompression_table;
 
@@ -25,7 +26,11 @@ SELECT COUNT() FROM recompression_table;
 
 SELECT name, default_compression_codec FROM system.parts WHERE table = 'recompression_table' and active = 1 and database = currentDatabase() ORDER BY name;
 
-ALTER TABLE recompression_table MODIFY TTL dt + INTERVAL 1 MONTH RECOMPRESS CODEC(ZSTD(12)) SETTINGS mutations_sync = 2;
+OPTIMIZE TABLE recompression_table FINAL;
+
+SELECT name, default_compression_codec FROM system.parts WHERE table = 'recompression_table' and active = 1 and database = currentDatabase() ORDER BY name;
+
+ALTER TABLE recompression_table MODIFY TTL dt + INTERVAL 1 DAY RECOMPRESS CODEC(ZSTD(12)) SETTINGS mutations_sync = 2;
 
 SHOW CREATE TABLE recompression_table;
 
@@ -38,3 +43,31 @@ OPTIMIZE TABLE recompression_table FINAL;
 SELECT name, default_compression_codec FROM system.parts WHERE table = 'recompression_table' and active = 1 and database = currentDatabase() ORDER BY name;
 
 DROP TABLE IF EXISTS recompression_table;
+
+CREATE TABLE recompression_table_compact
+(
+  dt DateTime,
+  key UInt64,
+  value String
+
+) ENGINE MergeTree()
+ORDER BY tuple()
+PARTITION BY key
+TTL dt + INTERVAL 1 MONTH RECOMPRESS CODEC(ZSTD(17)), dt + INTERVAL 1 YEAR RECOMPRESS CODEC(LZ4HC(10))
+SETTINGS min_rows_for_wide_part = 10000;
+
+SYSTEM STOP TTL MERGES recompression_table_compact;
+
+INSERT INTO recompression_table_compact SELECT now(), 1, toString(number) from numbers(1000);
+
+INSERT INTO recompression_table_compact SELECT now() - INTERVAL 2 MONTH, 2, toString(number) from numbers(1000, 1000);
+
+INSERT INTO recompression_table_compact SELECT now() - INTERVAL 2 YEAR, 3, toString(number) from numbers(2000, 1000);
+
+SELECT name, default_compression_codec FROM system.parts WHERE table = 'recompression_table_compact' and active = 1 and database = currentDatabase() ORDER BY name;
+
+ALTER TABLE recompression_table_compact MODIFY TTL dt + INTERVAL 1 MONTH RECOMPRESS CODEC(ZSTD(12)) SETTINGS mutations_sync = 2; -- mutation affect all columns, so codec changes
+
+SELECT name, default_compression_codec FROM system.parts WHERE table = 'recompression_table_compact' and active = 1 and database = currentDatabase() ORDER BY name;
+
+DROP TABLE recompression_table_compact;

From 4c3c1cdaf3e4064a5d65a40dea5383e522e8f2ee Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Mon, 7 Sep 2020 15:24:27 +0800
Subject: [PATCH 092/298] Fix Nullable String to Enum conversion.

---
 src/Functions/FunctionsConversion.h                  |  4 +---
 .../01490_nullable_string_to_enum.reference          |  1 +
 .../0_stateless/01490_nullable_string_to_enum.sql    | 12 ++++++++++++
 3 files changed, 14 insertions(+), 3 deletions(-)
 create mode 100644 tests/queries/0_stateless/01490_nullable_string_to_enum.reference
 create mode 100644 tests/queries/0_stateless/01490_nullable_string_to_enum.sql

diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h
index a18139fd4c8..ffe7677afe7 100644
--- a/src/Functions/FunctionsConversion.h
+++ b/src/Functions/FunctionsConversion.h
@@ -2260,9 +2260,7 @@ private:
 
                 size_t nullable_pos = block.columns() - 1;
                 nullable_col = typeid_cast<const ColumnNullable *>(block.getByPosition(nullable_pos).column.get());
-                if (!nullable_col)
-                    throw Exception("Last column should be ColumnNullable", ErrorCodes::LOGICAL_ERROR);
-                if (col && nullable_col->size() != col->size())
+                if (col && nullable_col && nullable_col->size() != col->size())
                     throw Exception("ColumnNullable is not compatible with original", ErrorCodes::LOGICAL_ERROR);
             }
 
diff --git a/tests/queries/0_stateless/01490_nullable_string_to_enum.reference b/tests/queries/0_stateless/01490_nullable_string_to_enum.reference
new file mode 100644
index 00000000000..ce013625030
--- /dev/null
+++ b/tests/queries/0_stateless/01490_nullable_string_to_enum.reference
@@ -0,0 +1 @@
+hello
diff --git a/tests/queries/0_stateless/01490_nullable_string_to_enum.sql b/tests/queries/0_stateless/01490_nullable_string_to_enum.sql
new file mode 100644
index 00000000000..e0624af4a7a
--- /dev/null
+++ b/tests/queries/0_stateless/01490_nullable_string_to_enum.sql
@@ -0,0 +1,12 @@
+DROP TABLE IF EXISTS t_enum;
+DROP TABLE IF EXISTS t_source;
+
+CREATE TABLE t_enum(x Enum8('hello' = 1, 'world' = 2)) ENGINE = TinyLog;
+CREATE TABLE t_source(x Nullable(String)) ENGINE = TinyLog;
+
+INSERT INTO t_source (x) VALUES ('hello');
+INSERT INTO t_enum(x) SELECT x from t_source WHERE x in ('hello', 'world');
+SELECT * FROM t_enum;
+
+DROP TABLE IF EXISTS t_enum;
+DROP TABLE IF EXISTS t_source;

From f274ffc9d15f446048e197d33d482ff0869684e7 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 7 Sep 2020 10:59:14 +0300
Subject: [PATCH 093/298] Add comments

---
 src/DataStreams/TTLBlockInputStream.h             |  3 +++
 src/Storages/MergeTree/MergeSelector.h            |  3 +++
 src/Storages/MergeTree/MergeTreeData.cpp          |  4 ++--
 src/Storages/MergeTree/MergeTreeData.h            |  2 ++
 .../MergeTree/MergeTreeDataMergerMutator.cpp      |  8 +++++---
 .../MergeTree/MergeTreeDataMergerMutator.h        |  4 ++--
 .../MergeTree/MergeTreeDataPartTTLInfo.cpp        |  2 +-
 src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h |  6 +++++-
 src/Storages/MergeTree/MergeTreePartsMover.cpp    |  2 +-
 src/Storages/MergeTree/TTLMergeSelector.cpp       |  2 +-
 src/Storages/MergeTree/TTLMergeSelector.h         | 15 +++++++++++++++
 11 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/src/DataStreams/TTLBlockInputStream.h b/src/DataStreams/TTLBlockInputStream.h
index 774b413ed1b..1d3b69f61c5 100644
--- a/src/DataStreams/TTLBlockInputStream.h
+++ b/src/DataStreams/TTLBlockInputStream.h
@@ -76,11 +76,14 @@ private:
     /// Finalize agg_result into result_columns
     void finalizeAggregates(MutableColumns & result_columns);
 
+    /// Execute description expressions on block and update ttl's in
+    /// ttl_info_map with expression results.
     void updateTTLWithDescriptions(Block & block, const TTLDescriptions & descriptions, TTLInfoMap & ttl_info_map);
 
     /// Updates TTL for moves
     void updateMovesTTL(Block & block);
 
+    /// Update values for recompression TTL using data from block.
     void updateRecompressionTTL(Block & block);
 
     UInt32 getTimestampByIndex(const IColumn * column, size_t ind);
diff --git a/src/Storages/MergeTree/MergeSelector.h b/src/Storages/MergeTree/MergeSelector.h
index bc2dc81c486..9c043005312 100644
--- a/src/Storages/MergeTree/MergeSelector.h
+++ b/src/Storages/MergeTree/MergeSelector.h
@@ -42,8 +42,11 @@ public:
         /// Opaque pointer to avoid dependencies (it is not possible to do forward declaration of typedef).
         const void * data;
 
+        /// Information about different TTLs for part. Can be used by
+        /// TTLSelector to assign merges with TTL.
         MergeTreeDataPartTTLInfos ttl_infos;
 
+        /// Part compression codec definition.
         ASTPtr compression_codec_desc;
     };
 
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 8ba00f29d9d..9f00fee070e 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -2978,7 +2978,7 @@ ReservationPtr MergeTreeData::tryReserveSpacePreferringTTLRules(UInt64 expected_
     auto metadata_snapshot = getInMemoryMetadataPtr();
     ReservationPtr reservation;
 
-    auto ttl_entry = selectTTLEntryForTTLInfos(metadata_snapshot->getMoveTTLs(), ttl_infos.moves_ttl, time_of_move, true);
+    auto ttl_entry = selectTTLDescriptionForTTLInfos(metadata_snapshot->getMoveTTLs(), ttl_infos.moves_ttl, time_of_move, true);
 
     if (ttl_entry)
     {
@@ -3039,7 +3039,7 @@ CompressionCodecPtr MergeTreeData::getCompressionCodecForPart(size_t part_size_c
     auto metadata_snapshot = getInMemoryMetadataPtr();
 
     const auto & recompression_ttl_entries = metadata_snapshot->getRecompressionTTLs();
-    auto best_ttl_entry = selectTTLEntryForTTLInfos(recompression_ttl_entries, ttl_infos.recompression_ttl, current_time, false);
+    auto best_ttl_entry = selectTTLDescriptionForTTLInfos(recompression_ttl_entries, ttl_infos.recompression_ttl, current_time, false);
 
     if (best_ttl_entry)
         return CompressionCodecFactory::instance().get(best_ttl_entry->recompression_codec, {});
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index c6c734f315a..82f118a4c0f 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -672,6 +672,8 @@ public:
     ExpressionActionsPtr getPrimaryKeyAndSkipIndicesExpression(const StorageMetadataPtr & metadata_snapshot) const;
     ExpressionActionsPtr getSortingKeyAndSkipIndicesExpression(const StorageMetadataPtr & metadata_snapshot) const;
 
+    /// Get compression codec for part according to TTL rules and <compression>
+    /// section from config.xml.
     CompressionCodecPtr getCompressionCodecForPart(size_t part_size_compressed, const IMergeTreeDataPart::TTLInfos & ttl_infos, time_t current_time) const;
 
     /// Limiting parallel sends per one table, used in DataPartsExchange
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index b88d80dc41f..a8f7e265f68 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -295,6 +295,7 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
 
     if (metadata_snapshot->hasAnyTTL() && merge_with_ttl_allowed && !ttl_merges_blocker.isCancelled())
     {
+        /// TTL delete is prefered to recompression
         TTLDeleteMergeSelector delete_ttl_selector(
                 next_delete_ttl_merge_times_by_partition,
                 current_time,
@@ -303,7 +304,9 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
 
         parts_to_merge = delete_ttl_selector.select(parts_ranges, max_total_size_to_merge);
         if (!parts_to_merge.empty())
+        {
             future_part.merge_type = MergeType::TTL_DELETE;
+        }
         else if (metadata_snapshot->hasAnyRecompressionTTL())
         {
             TTLRecompressMergeSelector recompress_ttl_selector(
@@ -625,6 +628,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
     if (merges_blocker.isCancelled())
         throw Exception("Cancelled merging parts", ErrorCodes::ABORTED);
 
+    /// We don't want to perform merge assigned with TTL as normal merge, so
+    /// throw exception
     if (isTTLMergeType(future_part.merge_type) && ttl_merges_blocker.isCancelled())
         throw Exception("Cancelled merging parts with TTL", ErrorCodes::ABORTED);
 
@@ -669,9 +674,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
     new_data_part->partition.assign(future_part.getPartition());
     new_data_part->is_temp = true;
 
-    if (isTTLMergeType(future_part.merge_type) && ttl_merges_blocker.isCancelled())
-        throw Exception("Cancelled merging parts with expired TTL", ErrorCodes::ABORTED);
-
     bool need_remove_expired_values = false;
     bool force_ttl = false;
     for (const auto & part : parts)
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
index 5f6b9246d68..96ab14ba57b 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
@@ -250,10 +250,10 @@ private:
     /// When the last time you wrote to the log that the disk space was running out (not to write about this too often).
     time_t disk_space_warning_time = 0;
 
-    /// Stores the next TTL merge due time for each partition (used only by TTLMergeSelector)
+    /// Stores the next TTL delete merge due time for each partition (used only by TTLDeleteMergeSelector)
     ITTLMergeSelector::PartitionIdToTTLs next_delete_ttl_merge_times_by_partition;
 
-    /// Stores the next TTL merge due time for each partition (used only by TTLMergeSelector)
+    /// Stores the next TTL recompress merge due time for each partition (used only by TTLRecompressionMergeSelector)
     ITTLMergeSelector::PartitionIdToTTLs next_recompress_ttl_merge_times_by_partition;
     /// Performing TTL merges independently for each partition guarantees that
     /// there is only a limited number of TTL merges and no partition stores data, that is too stale
diff --git a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
index 1cecb2672fb..33ed60c225a 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
@@ -186,7 +186,7 @@ time_t MergeTreeDataPartTTLInfos::getMaxRecompressionTTL() const
 }
 
 
-std::optional<TTLDescription> selectTTLEntryForTTLInfos(const TTLDescriptions & descriptions, const TTLInfoMap & ttl_info_map, time_t current_time, bool use_max)
+std::optional<TTLDescription> selectTTLDescriptionForTTLInfos(const TTLDescriptions & descriptions, const TTLInfoMap & ttl_info_map, time_t current_time, bool use_max)
 {
     time_t best_ttl_time = 0;
     TTLDescriptions::const_iterator best_entry_it;
diff --git a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
index 2ab571cf3ba..d9a10785738 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
@@ -49,7 +49,10 @@ struct MergeTreeDataPartTTLInfos
 
     TTLInfoMap recompression_ttl;
 
+    /// Return min recompression TTL value if any, otherwise return zero.
     time_t getMinRecompressionTTL() const;
+
+    /// Return max recompression TTL value if any, otherwise return zero.
     time_t getMaxRecompressionTTL() const;
 
     void read(ReadBuffer & in);
@@ -71,6 +74,7 @@ struct MergeTreeDataPartTTLInfos
     }
 };
 
-std::optional<TTLDescription> selectTTLEntryForTTLInfos(const TTLDescriptions & descriptions, const TTLInfoMap & ttl_info_map, time_t current_time, bool use_max);
+/// Selects the most appropriate TTLDescription using TTL info and current time.
+std::optional<TTLDescription> selectTTLDescriptionForTTLInfos(const TTLDescriptions & descriptions, const TTLInfoMap & ttl_info_map, time_t current_time, bool use_max);
 
 }
diff --git a/src/Storages/MergeTree/MergeTreePartsMover.cpp b/src/Storages/MergeTree/MergeTreePartsMover.cpp
index 92ea745c5df..586c4393dfb 100644
--- a/src/Storages/MergeTree/MergeTreePartsMover.cpp
+++ b/src/Storages/MergeTree/MergeTreePartsMover.cpp
@@ -130,7 +130,7 @@ bool MergeTreePartsMover::selectPartsForMove(
         if (!can_move(part, &reason))
             continue;
 
-        auto ttl_entry = selectTTLEntryForTTLInfos(metadata_snapshot->getMoveTTLs(), part->ttl_infos.moves_ttl, time_of_move, true);
+        auto ttl_entry = selectTTLDescriptionForTTLInfos(metadata_snapshot->getMoveTTLs(), part->ttl_infos.moves_ttl, time_of_move, true);
 
         auto to_insert = need_to_move.find(part->volume->getDisk());
         ReservationPtr reservation;
diff --git a/src/Storages/MergeTree/TTLMergeSelector.cpp b/src/Storages/MergeTree/TTLMergeSelector.cpp
index bb7c001eae1..d46eb19815a 100644
--- a/src/Storages/MergeTree/TTLMergeSelector.cpp
+++ b/src/Storages/MergeTree/TTLMergeSelector.cpp
@@ -107,7 +107,7 @@ bool TTLRecompressMergeSelector::isTTLAlreadySatisfied(const IMergeSelector::Par
     if (recompression_ttls.empty())
         return false;
 
-    auto ttl_description = selectTTLEntryForTTLInfos(recompression_ttls, part.ttl_infos.recompression_ttl, current_time, false);
+    auto ttl_description = selectTTLDescriptionForTTLInfos(recompression_ttls, part.ttl_infos.recompression_ttl, current_time, false);
 
     if (!ttl_description)
         return true;
diff --git a/src/Storages/MergeTree/TTLMergeSelector.h b/src/Storages/MergeTree/TTLMergeSelector.h
index eab4cdcd295..1d41b65f9fb 100644
--- a/src/Storages/MergeTree/TTLMergeSelector.h
+++ b/src/Storages/MergeTree/TTLMergeSelector.h
@@ -32,7 +32,12 @@ public:
         const PartsRanges & parts_ranges,
         const size_t max_total_size_to_merge) override;
 
+    /// Get TTL value for part, may depend on child type and some settings in
+    /// constructor.
     virtual time_t getTTLForPart(const IMergeSelector::Part & part) const = 0;
+
+    /// Sometimes we can check that TTL already satisfied using information
+    /// stored in part and don't assign merge for such part.
     virtual bool isTTLAlreadySatisfied(const IMergeSelector::Part & part) const = 0;
 
 protected:
@@ -44,6 +49,8 @@ private:
 };
 
 
+/// Select parts to merge using information about delete TTL. Depending on flag
+/// only_drop_parts can use max or min TTL value.
 class TTLDeleteMergeSelector : public ITTLMergeSelector
 {
 public:
@@ -55,6 +62,8 @@ public:
 
     time_t getTTLForPart(const IMergeSelector::Part & part) const override;
 
+    /// Delete TTL should be checked only by TTL time, there are no other ways
+    /// to satisfy it.
     bool isTTLAlreadySatisfied(const IMergeSelector::Part &) const override
     {
         return false;
@@ -64,6 +73,8 @@ private:
     bool only_drop_parts;
 };
 
+/// Select parts to merge using information about recompression TTL and
+/// compression codec of existing parts.
 class TTLRecompressMergeSelector : public ITTLMergeSelector
 {
 public:
@@ -72,8 +83,12 @@ public:
         , recompression_ttls(recompression_ttls_)
     {}
 
+    /// Return part min recompression TTL.
     time_t getTTLForPart(const IMergeSelector::Part & part) const override;
 
+    /// Checks that part's codec is not already equal to required codec
+    /// according to recompression TTL. It doesn't make sence to assign such
+    /// merge.
     bool isTTLAlreadySatisfied(const IMergeSelector::Part & part) const override;
 private:
     TTLDescriptions recompression_ttls;

From 99ebab706cf70a286a5a6b5b2ac6070085f1ebf0 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 7 Sep 2020 12:02:22 +0300
Subject: [PATCH 094/298] Fix "Arcadia"

---
 tests/queries/0_stateless/arcadia_skip_list.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt
index 707f91b0c93..16450efb26e 100644
--- a/tests/queries/0_stateless/arcadia_skip_list.txt
+++ b/tests/queries/0_stateless/arcadia_skip_list.txt
@@ -139,3 +139,4 @@
 01455_time_zones
 01456_ast_optimizations_over_distributed
 01460_DistributedFilesToInsert
+01474_executable_dictionary

From 9493532fdb1ecc20d0ed084a61f58a9b10e869f6 Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Mon, 7 Sep 2020 04:05:30 +0300
Subject: [PATCH 095/298] rework test for redis dictionary

---
 .../runner/compose/docker_compose_redis.yml   |   2 +-
 .../dictionary.py                             |   0
 .../external_sources.py                       |  14 +-
 .../fake_cert.pem                             |   0
 .../http_server.py                            |   0
 .../test.py                                   | 126 +------------
 .../test_dictionaries_redis/__init__.py       |   0
 .../test_dictionaries_redis/test.py           | 176 ++++++++++++++++++
 8 files changed, 184 insertions(+), 134 deletions(-)
 rename tests/integration/{test_dictionaries_all_layouts_and_sources => helpers}/dictionary.py (100%)
 rename tests/integration/{test_dictionaries_all_layouts_and_sources => helpers}/external_sources.py (97%)
 rename tests/integration/{test_dictionaries_all_layouts_and_sources => helpers}/fake_cert.pem (100%)
 rename tests/integration/{test_dictionaries_all_layouts_and_sources => helpers}/http_server.py (100%)
 create mode 100644 tests/integration/test_dictionaries_redis/__init__.py
 create mode 100644 tests/integration/test_dictionaries_redis/test.py

diff --git a/docker/test/integration/runner/compose/docker_compose_redis.yml b/docker/test/integration/runner/compose/docker_compose_redis.yml
index 2c9ace96d0c..72df99ec59b 100644
--- a/docker/test/integration/runner/compose/docker_compose_redis.yml
+++ b/docker/test/integration/runner/compose/docker_compose_redis.yml
@@ -5,4 +5,4 @@ services:
         restart: always
         ports:
           - 6380:6379
-        command: redis-server --requirepass "clickhouse"
+        command: redis-server --requirepass "clickhouse" --databases 32
diff --git a/tests/integration/test_dictionaries_all_layouts_and_sources/dictionary.py b/tests/integration/helpers/dictionary.py
similarity index 100%
rename from tests/integration/test_dictionaries_all_layouts_and_sources/dictionary.py
rename to tests/integration/helpers/dictionary.py
diff --git a/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py b/tests/integration/helpers/external_sources.py
similarity index 97%
rename from tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py
rename to tests/integration/helpers/external_sources.py
index fac7dcdea1e..5e8d420ff94 100644
--- a/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py
+++ b/tests/integration/helpers/external_sources.py
@@ -477,13 +477,13 @@ class SourceCassandra(ExternalSource):
 
 class SourceRedis(ExternalSource):
     def __init__(
-            self, name, internal_hostname, internal_port, docker_hostname, docker_port, user, password, storage_type
+            self, name, internal_hostname, internal_port, docker_hostname, docker_port, user, password, db_index, storage_type
     ):
         super(SourceRedis, self).__init__(
             name, internal_hostname, internal_port, docker_hostname, docker_port, user, password
         )
         self.storage_type = storage_type
-        self.db_index = 1
+        self.db_index = db_index
 
     def get_source_str(self, table_name):
         return '''
@@ -513,21 +513,13 @@ class SourceRedis(ExternalSource):
             values = []
             for name in self.ordered_names:
                 values.append(str(row.data[name]))
-            print 'values: ', values
             if len(values) == 2:
                 self.client.set(*values)
-                print 'kek: ', self.client.get(values[0])
             else:
                 self.client.hset(*values)
 
     def compatible_with_layout(self, layout):
-        if (
-            layout.is_simple and self.storage_type == "simple" or
-            layout.is_complex and self.storage_type == "simple" and layout.name == "complex_key_hashed_one_key" or
-            layout.is_complex and self.storage_type == "hash_map" and layout.name == "complex_key_hashed_two_keys"
-        ):
-            return True
-        return False
+        return layout.is_simple and self.storage_type == "simple" or layout.is_complex and self.storage_type == "hash_map"
 
 class SourceAerospike(ExternalSource):
     def __init__(self, name, internal_hostname, internal_port,
diff --git a/tests/integration/test_dictionaries_all_layouts_and_sources/fake_cert.pem b/tests/integration/helpers/fake_cert.pem
similarity index 100%
rename from tests/integration/test_dictionaries_all_layouts_and_sources/fake_cert.pem
rename to tests/integration/helpers/fake_cert.pem
diff --git a/tests/integration/test_dictionaries_all_layouts_and_sources/http_server.py b/tests/integration/helpers/http_server.py
similarity index 100%
rename from tests/integration/test_dictionaries_all_layouts_and_sources/http_server.py
rename to tests/integration/helpers/http_server.py
diff --git a/tests/integration/test_dictionaries_all_layouts_and_sources/test.py b/tests/integration/test_dictionaries_all_layouts_and_sources/test.py
index 3e11a544229..5a46498ce08 100644
--- a/tests/integration/test_dictionaries_all_layouts_and_sources/test.py
+++ b/tests/integration/test_dictionaries_all_layouts_and_sources/test.py
@@ -2,9 +2,9 @@ import pytest
 import os
 
 from helpers.cluster import ClickHouseCluster
-from dictionary import Field, Row, Dictionary, DictionaryStructure, Layout
-from external_sources import SourceMySQL, SourceClickHouse, SourceFile, SourceExecutableCache, SourceExecutableHashed
-from external_sources import SourceMongo, SourceMongoURI, SourceHTTP, SourceHTTPS, SourceRedis, SourceCassandra
+from helpers.dictionary import Field, Row, Dictionary, DictionaryStructure, Layout
+from helpers.external_sources import SourceMySQL, SourceClickHouse, SourceFile, SourceExecutableCache, SourceExecutableHashed
+from helpers.external_sources import SourceMongo, SourceMongoURI, SourceHTTP, SourceHTTPS, SourceRedis, SourceCassandra
 import math
 
 SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
@@ -132,14 +132,6 @@ SOURCES = [
 
 DICTIONARIES = []
 
-# Key-value dictionaries with only one possible field for key
-SOURCES_KV = [
-    SourceRedis("RedisSimple", "localhost", "6380", "redis1", "6379", "", "clickhouse", storage_type="simple"),
-    SourceRedis("RedisHash", "localhost", "6380", "redis1", "6379", "", "clickhouse", storage_type="hash_map"),
-]
-
-DICTIONARIES_KV = []
-
 cluster = None
 node = None
 
@@ -170,17 +162,6 @@ def setup_module(module):
             else:
                 print "Source", source.name, "incompatible with layout", layout.name
 
-    for layout in LAYOUTS:
-        field_keys = list(filter(lambda x: x.is_key, FIELDS[layout.layout_type]))
-        for source in SOURCES_KV:
-            if not source.compatible_with_layout(layout):
-                print "Source", source.name, "incompatible with layout", layout.name
-                continue
-
-            for field in FIELDS[layout.layout_type]:
-                if not (field.is_key or field.is_range or field.is_range_key):
-                    DICTIONARIES_KV.append(get_dict(source, layout, field_keys + [field], field.name))
-
     cluster = ClickHouseCluster(__file__)
 
     main_configs = []
@@ -199,7 +180,7 @@ def setup_module(module):
 def started_cluster():
     try:
         cluster.start()
-        for dictionary in DICTIONARIES + DICTIONARIES_KV:
+        for dictionary in DICTIONARIES:
             print "Preparing", dictionary.name
             dictionary.prepare_source(cluster)
             print "Prepared"
@@ -363,102 +344,3 @@ def test_ranged_dictionaries(started_cluster, fold):
     for query, answer in queries_with_answers:
         print query
         assert node.query(query) == str(answer) + '\n'
-
-
-@pytest.mark.parametrize("fold", list(range(10)))
-def test_key_value_simple_dictionaries(started_cluster, fold):
-    fields = FIELDS["simple"]
-    values = VALUES["simple"]
-    data = [Row(fields, vals) for vals in values]
-
-    all_simple_dicts = [d for d in DICTIONARIES_KV if d.structure.layout.layout_type == "simple"]
-    simple_dicts = get_dictionaries(fold, 10, all_simple_dicts)
-
-    for dct in simple_dicts:
-        queries_with_answers = []
-        local_data = []
-        for row in data:
-            local_fields = dct.get_fields()
-            local_values = [row.get_value_by_name(field.name) for field in local_fields if row.has_field(field.name)]
-            local_data.append(Row(local_fields, local_values))
-
-        dct.load_data(local_data)
-
-        node.query("system reload dictionary {}".format(dct.name))
-
-        print 'name: ', dct.name
-
-        for row in local_data:
-            print dct.get_fields()
-            for field in dct.get_fields():
-                print field.name, field.is_key
-                if not field.is_key:
-                    for query in dct.get_select_get_queries(field, row):
-                        queries_with_answers.append((query, row.get_value_by_name(field.name)))
-
-                    for query in dct.get_select_has_queries(field, row):
-                        queries_with_answers.append((query, 1))
-
-                    for query in dct.get_select_get_or_default_queries(field, row):
-                        queries_with_answers.append((query, field.default_value_for_get))
-
-        if dct.structure.has_hierarchy:
-            for query in dct.get_hierarchical_queries(data[0]):
-                queries_with_answers.append((query, [1]))
-
-            for query in dct.get_hierarchical_queries(data[1]):
-                queries_with_answers.append((query, [2, 1]))
-
-            for query in dct.get_is_in_queries(data[0], data[1]):
-                queries_with_answers.append((query, 0))
-
-            for query in dct.get_is_in_queries(data[1], data[0]):
-                queries_with_answers.append((query, 1))
-
-        for query, answer in queries_with_answers:
-            print query
-            if isinstance(answer, list):
-                answer = str(answer).replace(' ', '')
-            assert node.query(query) == str(answer) + '\n'
-
-
-@pytest.mark.parametrize("fold", list(range(10)))
-def test_key_value_complex_dictionaries(started_cluster, fold):
-    fields = FIELDS["complex"]
-    values = VALUES["complex"]
-    data = [Row(fields, vals) for vals in values]
-
-    all_complex_dicts = [d for d in DICTIONARIES_KV if d.structure.layout.layout_type == "complex"]
-    complex_dicts = get_dictionaries(fold, 10, all_complex_dicts)
-    for dct in complex_dicts:
-        dct.load_data(data)
-
-    node.query("system reload dictionaries")
-
-    for dct in complex_dicts:
-        queries_with_answers = []
-        local_data = []
-        for row in data:
-            local_fields = dct.get_fields()
-            local_values = [row.get_value_by_name(field.name) for field in local_fields if row.has_field(field.name)]
-            local_data.append(Row(local_fields, local_values))
-
-        dct.load_data(local_data)
-
-        node.query("system reload dictionary {}".format(dct.name))
-
-        for row in local_data:
-            for field in dct.get_fields():
-                if not field.is_key:
-                    for query in dct.get_select_get_queries(field, row):
-                        queries_with_answers.append((query, row.get_value_by_name(field.name)))
-
-                    for query in dct.get_select_has_queries(field, row):
-                        queries_with_answers.append((query, 1))
-
-                    for query in dct.get_select_get_or_default_queries(field, row):
-                        queries_with_answers.append((query, field.default_value_for_get))
-
-        for query, answer in queries_with_answers:
-            print query
-            assert node.query(query) == str(answer) + '\n'
diff --git a/tests/integration/test_dictionaries_redis/__init__.py b/tests/integration/test_dictionaries_redis/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_dictionaries_redis/test.py b/tests/integration/test_dictionaries_redis/test.py
new file mode 100644
index 00000000000..1588efa3426
--- /dev/null
+++ b/tests/integration/test_dictionaries_redis/test.py
@@ -0,0 +1,176 @@
+import os
+import pytest
+import redis
+
+from helpers.cluster import ClickHouseCluster
+from helpers.dictionary import Field, Row, Dictionary, DictionaryStructure, Layout
+from helpers.external_sources import SourceRedis
+
+cluster = None
+SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
+dict_configs_path = os.path.join(SCRIPT_DIR, 'configs/dictionaries')
+node = None
+
+KEY_FIELDS = {
+    "simple": [
+        Field("KeyField", 'UInt64', is_key=True, default_value_for_get=9999999)
+    ],
+    "complex": [
+        Field("KeyField1", 'UInt64', is_key=True, default_value_for_get=9999999),
+        Field("KeyField2", 'String', is_key=True, default_value_for_get='xxxxxxxxx'),
+    ]
+}
+
+KEY_VALUES = {
+    "simple" : [
+        [1], [2]
+    ],
+    "complex" : [
+        [1, 'world'], [2, 'qwerty2']
+    ]
+}
+
+FIELDS = [
+    Field("UInt8_", 'UInt8', default_value_for_get=55),
+    Field("UInt16_", 'UInt16', default_value_for_get=66),
+    Field("UInt32_", 'UInt32', default_value_for_get=77),
+    Field("UInt64_", 'UInt64', default_value_for_get=88),
+    Field("Int8_", 'Int8', default_value_for_get=-55),
+    Field("Int16_", 'Int16', default_value_for_get=-66),
+    Field("Int32_", 'Int32', default_value_for_get=-77),
+    Field("Int64_", 'Int64', default_value_for_get=-88),
+    Field("UUID_", 'UUID', default_value_for_get='550e8400-0000-0000-0000-000000000000'),
+    Field("Date_", 'Date', default_value_for_get='2018-12-30'),
+    Field("DateTime_", 'DateTime', default_value_for_get='2018-12-30 00:00:00'),
+    Field("String_", 'String', default_value_for_get='hi'),
+    Field("Float32_", 'Float32', default_value_for_get=555.11),
+    Field("Float64_", 'Float64', default_value_for_get=777.11),
+]
+
+VALUES = [
+    [22, 3],
+    [333, 4],
+    [4444, 5],
+    [55555, 6],
+    [-6, -7],
+    [-77, -8],
+    [-888, -9],
+    [-999, -10],
+    ['550e8400-e29b-41d4-a716-446655440003', '550e8400-e29b-41d4-a716-446655440002'],
+    ['1973-06-28', '1978-06-28'],
+    ['1985-02-28 23:43:25', '1986-02-28 23:42:25'],
+    ['hello', 'hello'],
+    [22.543, 21.543],
+    [3332154213.4, 3222154213.4],
+]
+
+LAYOUTS = [
+    Layout("flat"),
+    Layout("hashed"),
+    Layout("cache"),
+    Layout("complex_key_hashed"),
+    # Layout("complex_key_cache"), # Currently not supported
+    Layout("direct"),
+    # Layout("complex_key_direct") # Currently not supported
+]
+
+DICTIONARIES = []
+
+def get_dict(source, layout, fields, suffix_name=''):
+    global dict_configs_path
+
+    structure = DictionaryStructure(layout, fields)
+    dict_name = source.name + "_" + layout.name + '_' + suffix_name
+    dict_path = os.path.join(dict_configs_path, dict_name + '.xml')
+    dictionary = Dictionary(dict_name, structure, source, dict_path, "table_" + dict_name, fields)
+    dictionary.generate_config()
+    return dictionary
+
+
+def setup_module(module):
+    global DICTIONARIES
+    global cluster
+    global node
+    global dict_configs_path
+
+    for f in os.listdir(dict_configs_path):
+        os.remove(os.path.join(dict_configs_path, f))
+
+    for i, field in enumerate(FIELDS):
+        DICTIONARIES.append([])
+        sources = []
+        sources.append(SourceRedis("RedisSimple", "localhost", "6380", "redis1", "6379", "", "clickhouse", i * 2, storage_type="simple"))
+        sources.append(SourceRedis("RedisHash", "localhost", "6380", "redis1", "6379", "", "clickhouse", i * 2 + 1, storage_type="hash_map"))
+        for source in sources:
+            for layout in LAYOUTS:
+                if not source.compatible_with_layout(layout):
+                    print "Source", source.name, "incompatible with layout", layout.name
+                    continue
+
+                fields = KEY_FIELDS[layout.layout_type] + [field]
+                DICTIONARIES[i].append(get_dict(source, layout, fields, field.name))
+
+    main_configs = []
+    dictionaries = []
+    for fname in os.listdir(dict_configs_path):
+        dictionaries.append(os.path.join(dict_configs_path, fname))
+
+    cluster = ClickHouseCluster(__file__)
+    node = cluster.add_instance('node', main_configs=main_configs, dictionaries=dictionaries, with_redis=True)
+
+@pytest.fixture(scope="module", autouse=True)
+def started_cluster():
+    try:
+        cluster.start()
+        assert len(FIELDS) == len(VALUES)
+        for dicts in DICTIONARIES:
+            for dictionary in dicts:
+                print "Preparing", dictionary.name
+                dictionary.prepare_source(cluster)
+                print "Prepared"
+
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+@pytest.mark.parametrize("id", range(len(FIELDS)))
+def test_redis_dictionaries(started_cluster, id):
+    print 'id:', id
+
+    dicts = DICTIONARIES[id]
+    values = VALUES[id]
+    field = FIELDS[id]
+
+    node.query("system reload dictionaries")
+
+    for dct in dicts:
+        data = []
+        dict_type = dct.structure.layout.layout_type
+        key_fields = KEY_FIELDS[dict_type]
+        key_values = KEY_VALUES[dict_type]
+
+        for key_value, value in zip(key_values, values):
+            data.append(Row(key_fields + [field], key_value + [value]))
+
+        dct.load_data(data)
+
+        queries_with_answers = []
+        for row in data:
+            for query in dct.get_select_get_queries(field, row):
+                queries_with_answers.append((query, row.get_value_by_name(field.name)))
+
+            for query in dct.get_select_has_queries(field, row):
+                queries_with_answers.append((query, 1))
+
+            for query in dct.get_select_get_or_default_queries(field, row):
+                queries_with_answers.append((query, field.default_value_for_get))
+
+        node.query("system reload dictionary {}".format(dct.name))
+
+        for query, answer in queries_with_answers:
+            print query
+            assert node.query(query) == str(answer) + '\n'
+
+    # Checks, that dictionaries can be reloaded.
+    node.query("system reload dictionaries")

From acbe21304a56cf840b0a54d2f1918d5a459bbcbb Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 7 Sep 2020 14:46:32 +0300
Subject: [PATCH 096/298] Fix test for compact parts

---
 tests/queries/0_stateless/01465_ttl_recompression.reference | 4 ++--
 tests/queries/0_stateless/01465_ttl_recompression.sql       | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/01465_ttl_recompression.reference b/tests/queries/0_stateless/01465_ttl_recompression.reference
index 40d7ed8896b..2f8815c62eb 100644
--- a/tests/queries/0_stateless/01465_ttl_recompression.reference
+++ b/tests/queries/0_stateless/01465_ttl_recompression.reference
@@ -1,4 +1,4 @@
-CREATE TABLE default.recompression_table\n(\n    `dt` DateTime,\n    `key` UInt64,\n    `value` String\n)\nENGINE = MergeTree()\nPARTITION BY key\nORDER BY tuple()\nTTL dt + toIntervalMonth(1) RECOMPRESS CODEC(ZSTD(17)), dt + toIntervalYear(1) RECOMPRESS CODEC(LZ4HC(10))\nSETTINGS min_rows_for_wide_part = 0, index_granularity = 8192
+CREATE TABLE default.recompression_table\n(\n    `dt` DateTime,\n    `key` UInt64,\n    `value` String\n)\nENGINE = MergeTree()\nPARTITION BY key\nORDER BY tuple()\nTTL dt + toIntervalMonth(1) RECOMPRESS CODEC(ZSTD(17)), dt + toIntervalYear(1) RECOMPRESS CODEC(LZ4HC(10))\nSETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity = 8192
 3000
 1_1_1_0	LZ4
 2_2_2_0	LZ4
@@ -6,7 +6,7 @@ CREATE TABLE default.recompression_table\n(\n    `dt` DateTime,\n    `key` UInt6
 1_1_1_1	LZ4
 2_2_2_1	ZSTD(17)
 3_3_3_1	LZ4HC(10)
-CREATE TABLE default.recompression_table\n(\n    `dt` DateTime,\n    `key` UInt64,\n    `value` String\n)\nENGINE = MergeTree()\nPARTITION BY key\nORDER BY tuple()\nTTL dt + toIntervalDay(1) RECOMPRESS CODEC(ZSTD(12))\nSETTINGS min_rows_for_wide_part = 0, index_granularity = 8192
+CREATE TABLE default.recompression_table\n(\n    `dt` DateTime,\n    `key` UInt64,\n    `value` String\n)\nENGINE = MergeTree()\nPARTITION BY key\nORDER BY tuple()\nTTL dt + toIntervalDay(1) RECOMPRESS CODEC(ZSTD(12))\nSETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity = 8192
 1_1_1_1_4	LZ4
 2_2_2_1_4	ZSTD(17)
 3_3_3_1_4	LZ4HC(10)
diff --git a/tests/queries/0_stateless/01465_ttl_recompression.sql b/tests/queries/0_stateless/01465_ttl_recompression.sql
index 0683f971d5f..92f20ddd495 100644
--- a/tests/queries/0_stateless/01465_ttl_recompression.sql
+++ b/tests/queries/0_stateless/01465_ttl_recompression.sql
@@ -10,7 +10,7 @@ CREATE TABLE recompression_table
 ORDER BY tuple()
 PARTITION BY key
 TTL dt + INTERVAL 1 MONTH RECOMPRESS CODEC(ZSTD(17)), dt + INTERVAL 1 YEAR RECOMPRESS CODEC(LZ4HC(10))
-SETTINGS min_rows_for_wide_part = 0;
+SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0;
 
 SHOW CREATE TABLE recompression_table;
 

From f8932a7a6bbf10c80ceb05ea25f9a7502e1cb961 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 7 Sep 2020 16:40:03 +0300
Subject: [PATCH 097/298] Build and tests config

---
 tests/ci/build_config.json | 692 +++++++++++++++++++++++++++++--------
 1 file changed, 540 insertions(+), 152 deletions(-)

diff --git a/tests/ci/build_config.json b/tests/ci/build_config.json
index e4b9c1d6b75..02c96b085da 100644
--- a/tests/ci/build_config.json
+++ b/tests/ci/build_config.json
@@ -1,153 +1,541 @@
-[
-    {
-        "compiler": "gcc-9",
-        "build-type": "",
-        "sanitizer": "",
-        "package-type": "deb",
-        "bundled": "bundled",
-        "splitted": "unsplitted",
-        "alien_pkgs": true,
-        "tidy": "disable",
-        "with_coverage": false
-    },
-    {
-        "compiler": "gcc-9",
-        "build-type": "",
-        "sanitizer": "",
-        "package-type": "performance",
-        "bundled": "bundled",
-        "splitted": "unsplitted",
-        "tidy": "disable",
-        "with_coverage": false
-    },
-    {
-        "compiler": "gcc-9",
-        "build-type": "",
-        "sanitizer": "",
-        "package-type": "binary",
-        "bundled": "bundled",
-        "splitted": "unsplitted",
-        "tidy": "disable",
-        "with_coverage": false
-    },
-    {
-        "compiler": "clang-10",
-        "build-type": "",
-        "sanitizer": "address",
-        "package-type": "deb",
-        "bundled": "bundled",
-        "splitted": "unsplitted",
-        "tidy": "disable",
-        "with_coverage": false
-    },
-    {
-        "compiler": "clang-10",
-        "build-type": "",
-        "sanitizer": "undefined",
-        "package-type": "deb",
-        "bundled": "bundled",
-        "splitted": "unsplitted",
-        "tidy": "disable",
-        "with_coverage": false
-    },
-    {
-        "compiler": "clang-10",
-        "build-type": "",
-        "sanitizer": "thread",
-        "package-type": "deb",
-        "bundled": "bundled",
-        "splitted": "unsplitted",
-        "tidy": "disable",
-        "with_coverage": false
-    },
-    {
-        "compiler": "clang-10",
-        "build-type": "",
-        "sanitizer": "memory",
-        "package-type": "deb",
-        "bundled": "bundled",
-        "splitted": "unsplitted",
-        "tidy": "disable",
-        "with_coverage": false
-    },
-    {
-        "compiler": "clang-10",
-        "build-type": "",
-        "sanitizer": "",
-        "package-type": "deb",
-        "bundled": "bundled",
-        "splitted": "unsplitted",
-        "tidy": "disable",
-        "with_coverage": false
-    },
-    {
-        "compiler": "clang-10",
-        "build-type": "debug",
-        "sanitizer": "",
-        "package-type": "deb",
-        "bundled": "bundled",
-        "splitted": "unsplitted",
-        "tidy": "disable",
-        "with_coverage": false
-    },
-    {
-        "compiler": "gcc-9",
-        "build-type": "",
-        "sanitizer": "",
-        "package-type": "deb",
-        "bundled": "unbundled",
-        "splitted": "unsplitted",
-        "tidy": "disable",
-        "with_coverage": false
-    },
-    {
-        "compiler": "clang-10",
-        "build-type": "",
-        "sanitizer": "",
-        "package-type": "binary",
-        "bundled": "bundled",
-        "splitted": "unsplitted",
-        "tidy": "disable",
-        "with_coverage": false
-    },
-    {
-        "compiler": "clang-10",
-        "build-type": "",
-        "sanitizer": "",
-        "package-type": "binary",
-        "bundled": "bundled",
-        "splitted": "splitted",
-        "tidy": "disable",
-        "with_coverage": false
-    },
-    {
-        "compiler": "clang-10-darwin",
-        "build-type": "",
-        "sanitizer": "",
-        "package-type": "binary",
-        "bundled": "bundled",
-        "splitted": "unsplitted",
-        "tidy": "disable",
-        "with_coverage": false
-    },
-    {
-        "compiler": "clang-10-aarch64",
-        "build-type": "",
-        "sanitizer": "",
-        "package-type": "binary",
-        "bundled": "bundled",
-        "splitted": "unsplitted",
-        "tidy": "disable",
-        "with_coverage": false
-    },
-    {
-        "compiler": "clang-10-freebsd",
-        "build-type": "",
-        "sanitizer": "",
-        "package-type": "binary",
-        "bundled": "bundled",
-        "splitted": "unsplitted",
-        "tidy": "disable",
-        "with_coverage": false
+{
+    "build_config": [
+        {
+            "compiler": "gcc-9",
+            "build-type": "",
+            "sanitizer": "",
+            "package-type": "deb",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "alien_pkgs": true,
+            "tidy": "disable",
+            "with_coverage": false
+        },
+        {
+            "compiler": "gcc-9",
+            "build-type": "",
+            "sanitizer": "",
+            "package-type": "performance",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "tidy": "disable",
+            "with_coverage": false
+        },
+        {
+            "compiler": "gcc-9",
+            "build-type": "",
+            "sanitizer": "",
+            "package-type": "binary",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "tidy": "disable",
+            "with_coverage": false
+        },
+        {
+            "compiler": "clang-10",
+            "build-type": "",
+            "sanitizer": "address",
+            "package-type": "deb",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "tidy": "disable",
+            "with_coverage": false
+        },
+        {
+            "compiler": "clang-10",
+            "build-type": "",
+            "sanitizer": "undefined",
+            "package-type": "deb",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "tidy": "disable",
+            "with_coverage": false
+        },
+        {
+            "compiler": "clang-10",
+            "build-type": "",
+            "sanitizer": "thread",
+            "package-type": "deb",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "tidy": "disable",
+            "with_coverage": false
+        },
+        {
+            "compiler": "clang-10",
+            "build-type": "",
+            "sanitizer": "memory",
+            "package-type": "deb",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "tidy": "disable",
+            "with_coverage": false
+        },
+        {
+            "compiler": "clang-10",
+            "build-type": "",
+            "sanitizer": "",
+            "package-type": "deb",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "tidy": "disable",
+            "with_coverage": false
+        },
+        {
+            "compiler": "clang-10",
+            "build-type": "debug",
+            "sanitizer": "",
+            "package-type": "deb",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "tidy": "disable",
+            "with_coverage": false
+        },
+        {
+            "compiler": "gcc-9",
+            "build-type": "",
+            "sanitizer": "",
+            "package-type": "deb",
+            "bundled": "unbundled",
+            "splitted": "unsplitted",
+            "tidy": "disable",
+            "with_coverage": false
+        },
+        {
+            "compiler": "clang-10",
+            "build-type": "",
+            "sanitizer": "",
+            "package-type": "binary",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "tidy": "disable",
+            "with_coverage": false
+        },
+        {
+            "compiler": "clang-10",
+            "build-type": "",
+            "sanitizer": "",
+            "package-type": "binary",
+            "bundled": "bundled",
+            "splitted": "splitted",
+            "tidy": "disable",
+            "with_coverage": false
+        },
+        {
+            "compiler": "clang-10-darwin",
+            "build-type": "",
+            "sanitizer": "",
+            "package-type": "binary",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "tidy": "disable",
+            "with_coverage": false
+        },
+        {
+            "compiler": "clang-10-aarch64",
+            "build-type": "",
+            "sanitizer": "",
+            "package-type": "binary",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "tidy": "disable",
+            "with_coverage": false
+        },
+        {
+            "compiler": "clang-10-freebsd",
+            "build-type": "",
+            "sanitizer": "",
+            "package-type": "binary",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "tidy": "disable",
+            "with_coverage": false
+        }
+    ],
+    "tests_config": {
+        "Functional stateful tests (address)": {
+            "required_build_properties": {
+                "compiler": "clang-10",
+                "package_type": "deb",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "address",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
+        "Functional stateful tests (thread)": {
+            "required_build_properties": {
+                "compiler": "clang-10",
+                "package_type": "deb",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "thread",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
+        "Functional stateful tests (memory)": {
+            "required_build_properties": {
+                "compiler": "clang-10",
+                "package_type": "deb",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "memory",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
+        "Functional stateful tests (ubsan)": {
+            "required_build_properties": {
+                "compiler": "clang-10",
+                "package_type": "deb",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "undefined",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
+        "Functional stateful tests (debug)": {
+            "required_build_properties": {
+                "compiler": "clang-10",
+                "package_type": "deb",
+                "build_type": "debug",
+                "sanitizer": "none",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
+        "Functional stateful tests (release)": {
+            "required_build_properties": {
+                "compiler": "gcc-9",
+                "package_type": "deb",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "none",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
+        "Functional stateful tests (release, DatabaseAtomic)": {
+            "required_build_properties": {
+                "compiler": "gcc-9",
+                "package_type": "deb",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "none",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
+        "Functional stateless tests (address)": {
+            "required_build_properties": {
+                "compiler": "clang-10",
+                "package_type": "deb",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "address",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
+        "Functional stateless tests (thread)": {
+            "required_build_properties": {
+                "compiler": "clang-10",
+                "package_type": "deb",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "thread",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
+        "Functional stateless tests (memory)": {
+            "required_build_properties": {
+                "compiler": "clang-10",
+                "package_type": "deb",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "memory",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
+        "Functional stateless tests (ubsan)": {
+            "required_build_properties": {
+                "compiler": "clang-10",
+                "package_type": "deb",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "undefined",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
+        "Functional stateless tests (debug)": {
+            "required_build_properties": {
+                "compiler": "clang-10",
+                "package_type": "deb",
+                "build_type": "debug",
+                "sanitizer": "none",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
+        "Functional stateless tests (release)": {
+            "required_build_properties": {
+                "compiler": "gcc-9",
+                "package_type": "deb",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "none",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
+        "Functional stateless tests (unbundled)": {
+            "required_build_properties": {
+                "compiler": "gcc-9",
+                "package_type": "deb",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "none",
+                "bundled": "unbundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
+        "Functional stateless tests (release, polymorphic parts enabled)": {
+            "required_build_properties": {
+                "compiler": "gcc-9",
+                "package_type": "deb",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "none",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
+        "Functional stateless tests (release, DatabaseAtomic)": {
+            "required_build_properties": {
+                "compiler": "gcc-9",
+                "package_type": "deb",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "none",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
+        "Stress test (address)": {
+            "required_build_properties": {
+                "compiler": "clang-10",
+                "package_type": "deb",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "address",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
+        "Stress test (thread)": {
+            "required_build_properties": {
+                "compiler": "clang-10",
+                "package_type": "deb",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "thread",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
+        "Stress test (undefined)": {
+            "required_build_properties": {
+                "compiler": "clang-10",
+                "package_type": "deb",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "undefined",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
+        "Stress test (memory)": {
+            "required_build_properties": {
+                "compiler": "clang-10",
+                "package_type": "deb",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "memory",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
+        "Integration tests (asan)": {
+            "required_build_properties": {
+                "compiler": "clang-10",
+                "package_type": "deb",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "address",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
+        "Integration tests (thread)": {
+            "required_build_properties": {
+                "compiler": "clang-10",
+                "package_type": "deb",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "thread",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
+        "Integration tests (release)": {
+            "required_build_properties": {
+                "compiler": "clang-10",
+                "package_type": "deb",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "none",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
+        "Compatibility check": {
+            "required_build_properties": {
+                "compiler": "gcc-9",
+                "package_type": "deb",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "none",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
+        "Split build smoke test": {
+            "required_build_properties": {
+                "compiler": "clang-10",
+                "package_type": "binary",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "none",
+                "bundled": "bundled",
+                "splitted": "splitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
+        "Testflows check": {
+            "required_build_properties": {
+                "compiler": "gcc-9",
+                "package_type": "deb",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "none",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
+        "Unit tests release gcc": {
+            "required_build_properties": {
+                "compiler": "gcc-9",
+                "package_type": "binary",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "none",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
+        "Unit tests release clang": {
+            "required_build_properties": {
+                "compiler": "clang-10",
+                "package_type": "binary",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "none",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
+        "Unit tests ASAN": {
+            "required_build_properties": {
+                "compiler": "clang-10",
+                "package_type": "binary",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "address",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
+        "Unit tests MSAN": {
+            "required_build_properties": {
+                "compiler": "clang-10",
+                "package_type": "binary",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "memory",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
+        "Unit tests TSAN": {
+            "required_build_properties": {
+                "compiler": "clang-10",
+                "package_type": "binary",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "thread",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
+        "Unit tests UBSAN": {
+            "required_build_properties": {
+                "compiler": "clang-10",
+                "package_type": "binary",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "thread",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        }
     }
-]
+}

From df8dde109c698a40f5f281831cfa738eae2533ec Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 7 Sep 2020 16:40:16 +0300
Subject: [PATCH 098/298] Remove tests config

---
 tests/ci/tests_config.json | 242 -------------------------------------
 1 file changed, 242 deletions(-)
 delete mode 100644 tests/ci/tests_config.json

diff --git a/tests/ci/tests_config.json b/tests/ci/tests_config.json
deleted file mode 100644
index 481de51d08b..00000000000
--- a/tests/ci/tests_config.json
+++ /dev/null
@@ -1,242 +0,0 @@
-{
-    "Functional stateful tests (address)": {
-        "required_build_properties": {
-            "compiler": "clang-10",
-            "package_type": "deb",
-            "build_type": "relwithdebuginfo",
-            "sanitizer": "address",
-            "bundled": "bundled",
-            "splitted": "unsplitted",
-            "clang-tidy": "disable",
-            "with_coverage": false
-        }
-    },
-    "Functional stateful tests (thread)": {
-        "required_build_properties": {
-            "compiler": "clang-10",
-            "package_type": "deb",
-            "build_type": "relwithdebuginfo",
-            "sanitizer": "thread",
-            "bundled": "bundled",
-            "splitted": "unsplitted",
-            "clang-tidy": "disable",
-            "with_coverage": false
-        }
-    },
-    "Functional stateful tests (memory)": {
-        "required_build_properties": {
-            "compiler": "clang-10",
-            "package_type": "deb",
-            "build_type": "relwithdebuginfo",
-            "sanitizer": "memory",
-            "bundled": "bundled",
-            "splitted": "unsplitted",
-            "clang-tidy": "disable",
-            "with_coverage": false
-        }
-    },
-    "Functional stateful tests (ubsan)": {
-        "required_build_properties": {
-            "compiler": "clang-10",
-            "package_type": "deb",
-            "build_type": "relwithdebuginfo",
-            "sanitizer": "undefined",
-            "bundled": "bundled",
-            "splitted": "unsplitted",
-            "clang-tidy": "disable",
-            "with_coverage": false
-        }
-    },
-    "Functional stateful tests (debug)": {
-        "required_build_properties": {
-            "compiler": "clang-10",
-            "package_type": "deb",
-            "build_type": "debug",
-            "sanitizer": "none",
-            "bundled": "bundled",
-            "splitted": "unsplitted",
-            "clang-tidy": "disable",
-            "with_coverage": false
-        }
-    },
-    "Functional stateful tests (release)": {
-        "required_build_properties": {
-            "compiler": "gcc-9",
-            "package_type": "deb",
-            "build_type": "relwithdebuginfo",
-            "sanitizer": "none",
-            "bundled": "bundled",
-            "splitted": "unsplitted",
-            "clang-tidy": "disable",
-            "with_coverage": false
-        }
-    },
-    "Functional stateful tests (release, DatabaseAtomic)": {
-        "required_build_properties": {
-            "compiler": "gcc-9",
-            "package_type": "deb",
-            "build_type": "relwithdebuginfo",
-            "sanitizer": "none",
-            "bundled": "bundled",
-            "splitted": "unsplitted",
-            "clang-tidy": "disable",
-            "with_coverage": false
-        }
-    },
-    "Functional stateless tests (address)": {
-        "required_build_properties": {
-            "compiler": "clang-10",
-            "package_type": "deb",
-            "build_type": "relwithdebuginfo",
-            "sanitizer": "address",
-            "bundled": "bundled",
-            "splitted": "unsplitted",
-            "clang-tidy": "disable",
-            "with_coverage": false
-        }
-    },
-    "Functional stateless tests (thread)": {
-        "required_build_properties": {
-            "compiler": "clang-10",
-            "package_type": "deb",
-            "build_type": "relwithdebuginfo",
-            "sanitizer": "thread",
-            "bundled": "bundled",
-            "splitted": "unsplitted",
-            "clang-tidy": "disable",
-            "with_coverage": false
-        }
-    },
-    "Functional stateless tests (memory)": {
-        "required_build_properties": {
-            "compiler": "clang-10",
-            "package_type": "deb",
-            "build_type": "relwithdebuginfo",
-            "sanitizer": "memory",
-            "bundled": "bundled",
-            "splitted": "unsplitted",
-            "clang-tidy": "disable",
-            "with_coverage": false
-        }
-    },
-    "Functional stateless tests (ubsan)": {
-        "required_build_properties": {
-            "compiler": "clang-10",
-            "package_type": "deb",
-            "build_type": "relwithdebuginfo",
-            "sanitizer": "undefined",
-            "bundled": "bundled",
-            "splitted": "unsplitted",
-            "clang-tidy": "disable",
-            "with_coverage": false
-        }
-    },
-    "Functional stateless tests (debug)": {
-        "required_build_properties": {
-            "compiler": "clang-10",
-            "package_type": "deb",
-            "build_type": "debug",
-            "sanitizer": "none",
-            "bundled": "bundled",
-            "splitted": "unsplitted",
-            "clang-tidy": "disable",
-            "with_coverage": false
-        }
-    },
-    "Functional stateless tests (release)": {
-        "required_build_properties": {
-            "compiler": "gcc-9",
-            "package_type": "deb",
-            "build_type": "relwithdebuginfo",
-            "sanitizer": "none",
-            "bundled": "bundled",
-            "splitted": "unsplitted",
-            "clang-tidy": "disable",
-            "with_coverage": false
-        }
-    },
-    "Functional stateless tests (unbundled)": {
-        "required_build_properties": {
-            "compiler": "gcc-9",
-            "package_type": "deb",
-            "build_type": "relwithdebuginfo",
-            "sanitizer": "none",
-            "bundled": "unbundled",
-            "splitted": "unsplitted",
-            "clang-tidy": "disable",
-            "with_coverage": false
-        }
-    },
-    "Functional stateless tests (release, polymorphic parts enabled)": {
-        "required_build_properties": {
-            "compiler": "gcc-9",
-            "package_type": "deb",
-            "build_type": "relwithdebuginfo",
-            "sanitizer": "none",
-            "bundled": "bundled",
-            "splitted": "unsplitted",
-            "clang-tidy": "disable",
-            "with_coverage": false
-        }
-    },
-    "Functional stateless tests (release, DatabaseAtomic)": {
-        "required_build_properties": {
-            "compiler": "gcc-9",
-            "package_type": "deb",
-            "build_type": "relwithdebuginfo",
-            "sanitizer": "none",
-            "bundled": "bundled",
-            "splitted": "unsplitted",
-            "clang-tidy": "disable",
-            "with_coverage": false
-        }
-    },
-    "Stress test (address)": {
-        "required_build_properties": {
-            "compiler": "clang-10",
-            "package_type": "deb",
-            "build_type": "relwithdebuginfo",
-            "sanitizer": "address",
-            "bundled": "bundled",
-            "splitted": "unsplitted",
-            "clang-tidy": "disable",
-            "with_coverage": false
-        }
-    },
-    "Stress test (thread)": {
-        "required_build_properties": {
-            "compiler": "clang-10",
-            "package_type": "deb",
-            "build_type": "relwithdebuginfo",
-            "sanitizer": "thread",
-            "bundled": "bundled",
-            "splitted": "unsplitted",
-            "clang-tidy": "disable",
-            "with_coverage": false
-        }
-    },
-    "Stress test (undefined)": {
-        "required_build_properties": {
-            "compiler": "clang-10",
-            "package_type": "deb",
-            "build_type": "relwithdebuginfo",
-            "sanitizer": "undefined",
-            "bundled": "bundled",
-            "splitted": "unsplitted",
-            "clang-tidy": "disable",
-            "with_coverage": false
-        }
-    },
-    "Stress test (memory)": {
-        "required_build_properties": {
-            "compiler": "clang-10",
-            "package_type": "deb",
-            "build_type": "relwithdebuginfo",
-            "sanitizer": "memory",
-            "bundled": "bundled",
-            "splitted": "unsplitted",
-            "clang-tidy": "disable",
-            "with_coverage": false
-        }
-    }
-}

From 54e6257070b4560f3ab69813514b7c98d7a7a917 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 7 Sep 2020 17:23:20 +0300
Subject: [PATCH 099/298] Better name

---
 tests/ci/{build_config.json => ci_config.json} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/ci/{build_config.json => ci_config.json} (100%)

diff --git a/tests/ci/build_config.json b/tests/ci/ci_config.json
similarity index 100%
rename from tests/ci/build_config.json
rename to tests/ci/ci_config.json

From 7b8f8acd381480b584a1b32cc295830d5b6d09a4 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 7 Sep 2020 17:47:14 +0300
Subject: [PATCH 100/298] Bump all versions to gcc-10

---
 tests/ci/ci_config.json | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/tests/ci/ci_config.json b/tests/ci/ci_config.json
index 02c96b085da..dbb7be0e438 100644
--- a/tests/ci/ci_config.json
+++ b/tests/ci/ci_config.json
@@ -1,7 +1,7 @@
 {
     "build_config": [
         {
-            "compiler": "gcc-9",
+            "compiler": "gcc-10",
             "build-type": "",
             "sanitizer": "",
             "package-type": "deb",
@@ -12,7 +12,7 @@
             "with_coverage": false
         },
         {
-            "compiler": "gcc-9",
+            "compiler": "gcc-10",
             "build-type": "",
             "sanitizer": "",
             "package-type": "performance",
@@ -22,7 +22,7 @@
             "with_coverage": false
         },
         {
-            "compiler": "gcc-9",
+            "compiler": "gcc-10",
             "build-type": "",
             "sanitizer": "",
             "package-type": "binary",
@@ -92,7 +92,7 @@
             "with_coverage": false
         },
         {
-            "compiler": "gcc-9",
+            "compiler": "gcc-10",
             "build-type": "",
             "sanitizer": "",
             "package-type": "deb",
@@ -215,7 +215,7 @@
         },
         "Functional stateful tests (release)": {
             "required_build_properties": {
-                "compiler": "gcc-9",
+                "compiler": "gcc-10",
                 "package_type": "deb",
                 "build_type": "relwithdebuginfo",
                 "sanitizer": "none",
@@ -227,7 +227,7 @@
         },
         "Functional stateful tests (release, DatabaseAtomic)": {
             "required_build_properties": {
-                "compiler": "gcc-9",
+                "compiler": "gcc-10",
                 "package_type": "deb",
                 "build_type": "relwithdebuginfo",
                 "sanitizer": "none",
@@ -299,7 +299,7 @@
         },
         "Functional stateless tests (release)": {
             "required_build_properties": {
-                "compiler": "gcc-9",
+                "compiler": "gcc-10",
                 "package_type": "deb",
                 "build_type": "relwithdebuginfo",
                 "sanitizer": "none",
@@ -311,7 +311,7 @@
         },
         "Functional stateless tests (unbundled)": {
             "required_build_properties": {
-                "compiler": "gcc-9",
+                "compiler": "gcc-10",
                 "package_type": "deb",
                 "build_type": "relwithdebuginfo",
                 "sanitizer": "none",
@@ -323,7 +323,7 @@
         },
         "Functional stateless tests (release, polymorphic parts enabled)": {
             "required_build_properties": {
-                "compiler": "gcc-9",
+                "compiler": "gcc-10",
                 "package_type": "deb",
                 "build_type": "relwithdebuginfo",
                 "sanitizer": "none",
@@ -335,7 +335,7 @@
         },
         "Functional stateless tests (release, DatabaseAtomic)": {
             "required_build_properties": {
-                "compiler": "gcc-9",
+                "compiler": "gcc-10",
                 "package_type": "deb",
                 "build_type": "relwithdebuginfo",
                 "sanitizer": "none",
@@ -431,7 +431,7 @@
         },
         "Compatibility check": {
             "required_build_properties": {
-                "compiler": "gcc-9",
+                "compiler": "gcc-10",
                 "package_type": "deb",
                 "build_type": "relwithdebuginfo",
                 "sanitizer": "none",
@@ -455,7 +455,7 @@
         },
         "Testflows check": {
             "required_build_properties": {
-                "compiler": "gcc-9",
+                "compiler": "gcc-10",
                 "package_type": "deb",
                 "build_type": "relwithdebuginfo",
                 "sanitizer": "none",
@@ -467,7 +467,7 @@
         },
         "Unit tests release gcc": {
             "required_build_properties": {
-                "compiler": "gcc-9",
+                "compiler": "gcc-10",
                 "package_type": "binary",
                 "build_type": "relwithdebuginfo",
                 "sanitizer": "none",

From 661d9bdb4c1489d6a9c5c8f0ae6d06bb5480a2b9 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 7 Sep 2020 19:03:36 +0300
Subject: [PATCH 101/298] Skip test

---
 tests/queries/skip_list.json | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index adfc5f0e582..0aa98499d42 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -18,7 +18,8 @@
         "00152_insert_different_granularity",
         "00151_replace_partition_with_different_granularity",
         "00157_cache_dictionary",
-        "01193_metadata_loading"
+        "01193_metadata_loading",
+        "01474_executable_dictionary" /// informational stderr from sanitizer at start
     ],
     "address-sanitizer": [
         "00281",

From 2c04b0a8e67d1aeefdbb523ac4f8cd321b83a347 Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
Date: Mon, 7 Sep 2020 19:07:34 +0300
Subject: [PATCH 102/298] comment added

---
 .../Formats/Impl/JSONCompactEachRowRowInputFormat.h      | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h
index 593f297108c..6845b2974ab 100644
--- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h
+++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h
@@ -12,7 +12,12 @@ namespace DB
 
 class ReadBuffer;
 
-/** A stream for reading data in JSONCompactEachRow- formats
+/** A stream for reading data in a bunch of formats:
+ *  - JSONCompactEachRow
+ *  - JSONCompactEachRowWithNamesAndTypes
+ *  - JSONCompactStringsEachRow
+ *  - JSONCompactStringsEachRowWithNamesAndTypes
+ *
 */
 class JSONCompactEachRowRowInputFormat : public IRowInputFormat
 {
@@ -54,7 +59,9 @@ private:
     /// This is for the correct exceptions in skipping unknown fields.
     std::vector<String> names_of_columns;
 
+    /// For *WithNamesAndTypes formats.
     bool with_names;
+    /// For JSONCompactString* formats.
     bool yield_strings;
 };
 

From 105f704efe163e702c5d23bd9b164c8a28df7657 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 7 Sep 2020 19:20:00 +0300
Subject: [PATCH 103/298] Back to gcc-9

---
 tests/ci/ci_config.json | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/tests/ci/ci_config.json b/tests/ci/ci_config.json
index dbb7be0e438..02c96b085da 100644
--- a/tests/ci/ci_config.json
+++ b/tests/ci/ci_config.json
@@ -1,7 +1,7 @@
 {
     "build_config": [
         {
-            "compiler": "gcc-10",
+            "compiler": "gcc-9",
             "build-type": "",
             "sanitizer": "",
             "package-type": "deb",
@@ -12,7 +12,7 @@
             "with_coverage": false
         },
         {
-            "compiler": "gcc-10",
+            "compiler": "gcc-9",
             "build-type": "",
             "sanitizer": "",
             "package-type": "performance",
@@ -22,7 +22,7 @@
             "with_coverage": false
         },
         {
-            "compiler": "gcc-10",
+            "compiler": "gcc-9",
             "build-type": "",
             "sanitizer": "",
             "package-type": "binary",
@@ -92,7 +92,7 @@
             "with_coverage": false
         },
         {
-            "compiler": "gcc-10",
+            "compiler": "gcc-9",
             "build-type": "",
             "sanitizer": "",
             "package-type": "deb",
@@ -215,7 +215,7 @@
         },
         "Functional stateful tests (release)": {
             "required_build_properties": {
-                "compiler": "gcc-10",
+                "compiler": "gcc-9",
                 "package_type": "deb",
                 "build_type": "relwithdebuginfo",
                 "sanitizer": "none",
@@ -227,7 +227,7 @@
         },
         "Functional stateful tests (release, DatabaseAtomic)": {
             "required_build_properties": {
-                "compiler": "gcc-10",
+                "compiler": "gcc-9",
                 "package_type": "deb",
                 "build_type": "relwithdebuginfo",
                 "sanitizer": "none",
@@ -299,7 +299,7 @@
         },
         "Functional stateless tests (release)": {
             "required_build_properties": {
-                "compiler": "gcc-10",
+                "compiler": "gcc-9",
                 "package_type": "deb",
                 "build_type": "relwithdebuginfo",
                 "sanitizer": "none",
@@ -311,7 +311,7 @@
         },
         "Functional stateless tests (unbundled)": {
             "required_build_properties": {
-                "compiler": "gcc-10",
+                "compiler": "gcc-9",
                 "package_type": "deb",
                 "build_type": "relwithdebuginfo",
                 "sanitizer": "none",
@@ -323,7 +323,7 @@
         },
         "Functional stateless tests (release, polymorphic parts enabled)": {
             "required_build_properties": {
-                "compiler": "gcc-10",
+                "compiler": "gcc-9",
                 "package_type": "deb",
                 "build_type": "relwithdebuginfo",
                 "sanitizer": "none",
@@ -335,7 +335,7 @@
         },
         "Functional stateless tests (release, DatabaseAtomic)": {
             "required_build_properties": {
-                "compiler": "gcc-10",
+                "compiler": "gcc-9",
                 "package_type": "deb",
                 "build_type": "relwithdebuginfo",
                 "sanitizer": "none",
@@ -431,7 +431,7 @@
         },
         "Compatibility check": {
             "required_build_properties": {
-                "compiler": "gcc-10",
+                "compiler": "gcc-9",
                 "package_type": "deb",
                 "build_type": "relwithdebuginfo",
                 "sanitizer": "none",
@@ -455,7 +455,7 @@
         },
         "Testflows check": {
             "required_build_properties": {
-                "compiler": "gcc-10",
+                "compiler": "gcc-9",
                 "package_type": "deb",
                 "build_type": "relwithdebuginfo",
                 "sanitizer": "none",
@@ -467,7 +467,7 @@
         },
         "Unit tests release gcc": {
             "required_build_properties": {
-                "compiler": "gcc-10",
+                "compiler": "gcc-9",
                 "package_type": "binary",
                 "build_type": "relwithdebuginfo",
                 "sanitizer": "none",

From a56d42de67496404fb507d05c2d399012fd479ce Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
Date: Mon, 7 Sep 2020 20:55:06 +0300
Subject: [PATCH 104/298] fix arcadia

---
 src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h | 2 --
 src/Processors/ya.make                                         | 3 ---
 2 files changed, 5 deletions(-)

diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h
index 6845b2974ab..4077eb6e008 100644
--- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h
+++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h
@@ -1,7 +1,5 @@
 #pragma once
 
-#pragma once
-
 #include <Core/Block.h>
 #include <Processors/Formats/IRowInputFormat.h>
 #include <Formats/FormatSettings.h>
diff --git a/src/Processors/ya.make b/src/Processors/ya.make
index 27893674859..30de38fedbd 100644
--- a/src/Processors/ya.make
+++ b/src/Processors/ya.make
@@ -31,9 +31,6 @@ SRCS(
     Formats/Impl/JSONEachRowRowOutputFormat.cpp
     Formats/Impl/JSONEachRowWithProgressRowOutputFormat.cpp
     Formats/Impl/JSONRowOutputFormat.cpp
-    Formats/Impl/JSONStringsEachRowRowInputFormat.cpp
-    Formats/Impl/JSONStringsEachRowRowOutputFormat.cpp
-    Formats/Impl/JSONStringsRowOutputFormat.cpp
     Formats/Impl/MarkdownRowOutputFormat.cpp
     Formats/Impl/MsgPackRowInputFormat.cpp
     Formats/Impl/MsgPackRowOutputFormat.cpp

From b3eafc1106819099afc980f8033c9e430a564cec Mon Sep 17 00:00:00 2001
From: Artem Zuikov <chertus@gmail.com>
Date: Mon, 7 Sep 2020 21:00:37 +0300
Subject: [PATCH 105/298] hide symbols in nameless namespace

---
 .../AggregateFunctionArray.cpp                |   5 +
 .../AggregateFunctionDistinct.cpp             |   6 +-
 .../AggregateFunctionForEach.cpp              |   5 +
 .../AggregateFunctionMerge.cpp                |   5 +
 .../AggregateFunctionNull.cpp                 |   5 +
 .../AggregateFunctionOrFill.cpp               |   4 +
 .../AggregateFunctionResample.cpp             |   5 +
 .../AggregateFunctionState.cpp                |   5 +
 src/Functions/CRC.cpp                         |   5 +
 src/Functions/abs.cpp                         |   4 +
 src/Functions/acos.cpp                        |   4 +
 src/Functions/addressToLine.cpp               |   5 +
 src/Functions/addressToSymbol.cpp             |   5 +
 src/Functions/appendTrailingCharIfAbsent.cpp  |   4 +
 src/Functions/asin.cpp                        |   4 +
 src/Functions/assumeNotNull.cpp               |   3 +
 src/Functions/atan.cpp                        |   4 +
 src/Functions/bar.cpp                         |   4 +
 src/Functions/bitAnd.cpp                      |   5 +
 src/Functions/bitBoolMaskAnd.cpp              |  72 ++++++------
 src/Functions/bitBoolMaskOr.cpp               |  72 ++++++------
 src/Functions/bitCount.cpp                    |   5 +
 src/Functions/bitNot.cpp                      |   5 +
 src/Functions/bitOr.cpp                       |   5 +
 src/Functions/bitRotateLeft.cpp               |   5 +
 src/Functions/bitRotateRight.cpp              |   5 +
 src/Functions/bitShiftLeft.cpp                |   5 +
 src/Functions/bitShiftRight.cpp               |   5 +
 src/Functions/bitSwapLastTwo.cpp              | 108 +++++++++---------
 src/Functions/bitTest.cpp                     |   5 +
 src/Functions/bitTestAll.cpp                  |   4 +
 src/Functions/bitTestAny.cpp                  |   4 +
 src/Functions/bitWrapperFunc.cpp              |  80 +++++++------
 src/Functions/bitXor.cpp                      |   5 +
 src/Functions/blockNumber.cpp                 |   3 +
 src/Functions/blockSerializedSize.cpp         |   3 +
 src/Functions/blockSize.cpp                   |   3 +
 src/Functions/buildId.cpp                     |   3 +
 src/Functions/caseWithExpression.cpp          |   5 +
 src/Functions/cbrt.cpp                        |   4 +
 src/Functions/coalesce.cpp                    |   3 +
 src/Functions/concat.cpp                      |   3 +
 src/Functions/convertCharset.cpp              |   3 +
 src/Functions/cos.cpp                         |   4 +
 src/Functions/countDigits.cpp                 |   4 +
 src/Functions/currentDatabase.cpp             |   3 +
 src/Functions/currentUser.cpp                 |   3 +
 src/Functions/dateDiff.cpp                    |   5 +
 src/Functions/date_trunc.cpp                  |   3 +
 src/Functions/defaultValueOfArgumentType.cpp  |   3 +
 src/Functions/defaultValueOfTypeName.cpp      |   4 +
 src/Functions/demange.cpp                     |   5 +
 src/Functions/divide.cpp                      |   5 +
 src/Functions/dumpColumnStructure.cpp         |   3 +
 src/Functions/e.cpp                           |   4 +
 src/Functions/empty.cpp                       |   4 +
 src/Functions/erf.cpp                         |   4 +
 src/Functions/erfc.cpp                        |   4 +
 src/Functions/evalMLMethod.cpp                |  17 +--
 src/Functions/exp.cpp                         |   4 +
 src/Functions/exp10.cpp                       |   5 +-
 src/Functions/exp2.cpp                        |   4 +
 src/Functions/extract.cpp                     |   4 +
 src/Functions/extractGroups.cpp               |   5 +-
 src/Functions/filesystem.cpp                  |   3 +
 src/Functions/finalizeAggregation.cpp         |   4 +-
 src/Functions/formatDateTime.cpp              |   7 +-
 src/Functions/formatRow.cpp                   |   4 +
 src/Functions/formatString.cpp                |   5 +
 src/Functions/fuzzBits.cpp                    |   3 +-
 src/Functions/gcd.cpp                         |   6 +-
 src/Functions/geoToH3.cpp                     |   4 +
 src/Functions/geohashDecode.cpp               |   4 +-
 src/Functions/geohashEncode.cpp               |   4 +
 src/Functions/geohashesInBox.cpp              |   5 +
 src/Functions/getMacro.cpp                    |   5 +-
 src/Functions/getScalar.cpp                   |   5 +-
 src/Functions/getSetting.cpp                  |   5 +-
 src/Functions/getSizeOfEnumType.cpp           |   4 +-
 src/Functions/globalVariable.cpp              |   4 +-
 src/Functions/greatest.cpp                    |   4 +
 src/Functions/h3EdgeAngle.cpp                 |   4 +
 src/Functions/h3EdgeLengthM.cpp               |   5 +-
 src/Functions/h3GetBaseCell.cpp               |   5 +
 src/Functions/h3GetResolution.cpp             |   5 +
 src/Functions/h3HexAreaM2.cpp                 |   5 +-
 src/Functions/h3IndexesAreNeighbors.cpp       |   5 +
 src/Functions/h3IsValid.cpp                   |   5 +
 src/Functions/h3ToChildren.cpp                |   5 +-
 src/Functions/h3ToParent.cpp                  |   5 +-
 src/Functions/h3ToString.cpp                  |   5 +
 src/Functions/h3kRing.cpp                     |   4 +
 src/Functions/hasColumnInTable.cpp            |   4 +-
 src/Functions/hasThreadFuzzer.cpp             |   3 +
 src/Functions/hasToken.cpp                    |   4 +
 src/Functions/hasTokenCaseInsensitive.cpp     |   4 +
 src/Functions/hostName.cpp                    |   3 +
 src/Functions/identity.cpp                    |   3 +
 src/Functions/if.cpp                          |   5 +-
 src/Functions/ifNotFinite.cpp                 |   3 +
 src/Functions/ifNull.cpp                      |   3 +
 src/Functions/ignore.cpp                      |   3 +
 src/Functions/ilike.cpp                       |  11 +-
 src/Functions/in.cpp                          |   8 +-
 src/Functions/initializeAggregation.cpp       |   4 +-
 src/Functions/intDiv.cpp                      |   5 +
 src/Functions/intDivOrZero.cpp                |   4 +
 src/Functions/intExp10.cpp                    |   6 +-
 src/Functions/intExp2.cpp                     |   5 +
 src/Functions/isConstant.cpp                  |   3 +
 src/Functions/isDecimalOverflow.cpp           |   5 +-
 src/Functions/isFinite.cpp                    |   3 +
 src/Functions/isInfinite.cpp                  |   3 +
 src/Functions/isNaN.cpp                       |   3 +
 src/Functions/isNotNull.cpp                   |   4 +
 src/Functions/isNull.cpp                      |   3 +
 src/Functions/isZeroOrNull.cpp                |   5 +-
 src/Functions/jumpConsistentHash.cpp          |   5 +-
 src/Functions/lcm.cpp                         |   6 +-
 src/Functions/least.cpp                       |   4 +
 src/Functions/lengthUTF8.cpp                  |   5 +-
 src/Functions/lgamma.cpp                      |   4 +
 src/Functions/like.cpp                        |  11 +-
 src/Functions/log.cpp                         |   8 +-
 src/Functions/log10.cpp                       |   4 +
 src/Functions/log2.cpp                        |   4 +
 src/Functions/lowCardinalityIndices.cpp       |   5 +-
 src/Functions/lowCardinalityKeys.cpp          |   4 +-
 src/Functions/lower.cpp                       |   4 +
 src/Functions/lowerUTF8.cpp                   |   4 +
 src/Functions/match.cpp                       |   4 +
 src/Functions/materialize.cpp                 |   3 +
 src/Functions/minus.cpp                       |   4 +
 src/Functions/modulo.cpp                      |   6 +-
 src/Functions/moduloOrZero.cpp                |   4 +
 src/Functions/multiFuzzyMatchAllIndices.cpp   |   4 +
 src/Functions/multiFuzzyMatchAny.cpp          |   4 +
 src/Functions/multiFuzzyMatchAnyIndex.cpp     |   4 +
 src/Functions/multiIf.cpp                     |   6 +-
 src/Functions/multiMatchAllIndices.cpp        |   4 +
 src/Functions/multiMatchAny.cpp               |   4 +
 src/Functions/multiMatchAnyIndex.cpp          |   4 +
 src/Functions/multiSearchAllPositions.cpp     |   4 +
 ...multiSearchAllPositionsCaseInsensitive.cpp |   4 +
 ...iSearchAllPositionsCaseInsensitiveUTF8.cpp |   4 +
 src/Functions/multiSearchAllPositionsUTF8.cpp |   4 +
 src/Functions/multiSearchAny.cpp              |   4 +
 .../multiSearchAnyCaseInsensitive.cpp         |   4 +
 .../multiSearchAnyCaseInsensitiveUTF8.cpp     |   4 +
 src/Functions/multiSearchAnyUTF8.cpp          |   4 +
 src/Functions/multiSearchFirstIndex.cpp       |   4 +
 .../multiSearchFirstIndexCaseInsensitive.cpp  |   4 +
 ...ltiSearchFirstIndexCaseInsensitiveUTF8.cpp |   4 +
 src/Functions/multiSearchFirstIndexUTF8.cpp   |   4 +
 src/Functions/multiSearchFirstPosition.cpp    |   4 +
 ...ultiSearchFirstPositionCaseInsensitive.cpp |   4 +
 ...SearchFirstPositionCaseInsensitiveUTF8.cpp |   4 +
 .../multiSearchFirstPositionUTF8.cpp          |   4 +
 src/Functions/multiply.cpp                    |   4 +
 src/Functions/negate.cpp                      |   4 +
 src/Functions/neighbor.cpp                    |   5 +
 src/Functions/normalizeQuery.cpp              |   1 -
 src/Functions/normalizedQueryHash.cpp         |   1 -
 src/Functions/notEmpty.cpp                    |   4 +
 src/Functions/notILike.cpp                    |  10 +-
 src/Functions/notLike.cpp                     |   5 +-
 src/Functions/now.cpp                         |   7 +-
 src/Functions/now64.cpp                       |   6 +-
 src/Functions/nullIf.cpp                      |   3 +
 src/Functions/pi.cpp                          |   4 +
 src/Functions/plus.cpp                        |   4 +
 src/Functions/pointInEllipses.cpp             |   5 +-
 src/Functions/pointInPolygon.cpp              |   4 +-
 src/Functions/position.cpp                    |   4 +
 src/Functions/positionCaseInsensitive.cpp     |   4 +
 src/Functions/positionCaseInsensitiveUTF8.cpp |   4 +
 src/Functions/positionUTF8.cpp                |   4 +
 src/Functions/pow.cpp                         |   4 +
 src/Functions/rand.cpp                        |   4 +
 src/Functions/rand64.cpp                      |   4 +
 src/Functions/randConstant.cpp                |   6 +-
 src/Functions/randomFixedString.cpp           |   4 +
 src/Functions/randomPrintableASCII.cpp        |   4 +
 src/Functions/randomString.cpp                |   4 +
 src/Functions/randomStringUTF8.cpp            |   4 +
 src/Functions/regexpQuoteMeta.cpp             |   8 +-
 src/Functions/reinterpretAsFixedString.cpp    |   5 +-
 src/Functions/reinterpretAsString.cpp         |   7 +-
 src/Functions/reinterpretStringAs.cpp         |   7 +-
 src/Functions/repeat.cpp                      |   4 +
 src/Functions/replaceAll.cpp                  |   4 +
 src/Functions/replaceOne.cpp                  |   4 +
 src/Functions/replaceRegexpAll.cpp            |   4 +
 src/Functions/replaceRegexpOne.cpp            |   4 +
 src/Functions/replicate.cpp                   |   5 +-
 src/Functions/reverse.cpp                     |   4 +-
 src/Functions/reverseUTF8.cpp                 |   5 +-
 src/Functions/roundAge.cpp                    |   4 +
 src/Functions/roundDuration.cpp               |   4 +
 src/Functions/roundToExp2.cpp                 |   6 +-
 src/Functions/rowNumberInAllBlocks.cpp        |   3 +
 src/Functions/rowNumberInBlock.cpp            |   4 +
 src/Functions/runningAccumulate.cpp           |   4 +-
 src/Functions/sigmoid.cpp                     |   4 +
 src/Functions/sin.cpp                         |   4 +
 src/Functions/sqrt.cpp                        |   4 +
 src/Functions/stringToH3.cpp                  |   4 +
 src/Functions/substring.cpp                   |   4 +
 src/Functions/sumburConsistentHash.cpp        |   4 +
 src/Functions/tan.cpp                         |   4 +
 src/Functions/tanh.cpp                        |   9 +-
 src/Functions/tgamma.cpp                      |   4 +
 src/Functions/throwIf.cpp                     |   4 +-
 src/Functions/timeSlots.cpp                   |   6 +-
 src/Functions/timezone.cpp                    |   4 +-
 src/Functions/toColumnTypeName.cpp            |   3 +
 src/Functions/toLowCardinality.cpp            |   3 +
 src/Functions/toNullable.cpp                  |   3 +
 src/Functions/toStartOfInterval.cpp           |   2 +-
 src/Functions/toTimeZone.cpp                  |   6 +-
 src/Functions/toTypeName.cpp                  |   3 +
 src/Functions/toUnixTimestamp64Micro.cpp      |   4 +
 src/Functions/toUnixTimestamp64Milli.cpp      |   4 +
 src/Functions/toUnixTimestamp64Nano.cpp       |   4 +
 src/Functions/toValidUTF8.cpp                 |   5 +
 src/Functions/today.cpp                       |   4 +
 src/Functions/transform.cpp                   |   5 +-
 src/Functions/trim.cpp                        |   6 +-
 src/Functions/tuple.cpp                       |   5 +-
 src/Functions/tupleElement.cpp                |   4 +-
 src/Functions/upper.cpp                       |   4 +
 src/Functions/upperUTF8.cpp                   |   4 +
 232 files changed, 1095 insertions(+), 250 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionArray.cpp b/src/AggregateFunctions/AggregateFunctionArray.cpp
index 7fe4f1f448b..d0f17da5aa4 100644
--- a/src/AggregateFunctions/AggregateFunctionArray.cpp
+++ b/src/AggregateFunctions/AggregateFunctionArray.cpp
@@ -12,6 +12,9 @@ namespace ErrorCodes
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+namespace
+{
+
 class AggregateFunctionCombinatorArray final : public IAggregateFunctionCombinator
 {
 public:
@@ -45,6 +48,8 @@ public:
     }
 };
 
+}
+
 void registerAggregateFunctionCombinatorArray(AggregateFunctionCombinatorFactory & factory)
 {
     factory.registerCombinator(std::make_shared<AggregateFunctionCombinatorArray>());
diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.cpp b/src/AggregateFunctions/AggregateFunctionDistinct.cpp
index 4d89e8fb199..8ad37f49797 100644
--- a/src/AggregateFunctions/AggregateFunctionDistinct.cpp
+++ b/src/AggregateFunctions/AggregateFunctionDistinct.cpp
@@ -6,12 +6,14 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
+namespace
+{
+
 class AggregateFunctionCombinatorDistinct final : public IAggregateFunctionCombinator
 {
 public:
@@ -56,6 +58,8 @@ public:
     }
 };
 
+}
+
 void registerAggregateFunctionCombinatorDistinct(AggregateFunctionCombinatorFactory & factory)
 {
     factory.registerCombinator(std::make_shared<AggregateFunctionCombinatorDistinct>());
diff --git a/src/AggregateFunctions/AggregateFunctionForEach.cpp b/src/AggregateFunctions/AggregateFunctionForEach.cpp
index 693bc6839fa..6e0365fc04b 100644
--- a/src/AggregateFunctions/AggregateFunctionForEach.cpp
+++ b/src/AggregateFunctions/AggregateFunctionForEach.cpp
@@ -12,6 +12,9 @@ namespace ErrorCodes
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+namespace
+{
+
 class AggregateFunctionCombinatorForEach final : public IAggregateFunctionCombinator
 {
 public:
@@ -42,6 +45,8 @@ public:
     }
 };
 
+}
+
 void registerAggregateFunctionCombinatorForEach(AggregateFunctionCombinatorFactory & factory)
 {
     factory.registerCombinator(std::make_shared<AggregateFunctionCombinatorForEach>());
diff --git a/src/AggregateFunctions/AggregateFunctionMerge.cpp b/src/AggregateFunctions/AggregateFunctionMerge.cpp
index 2ce3f0e11f6..17157d21bd1 100644
--- a/src/AggregateFunctions/AggregateFunctionMerge.cpp
+++ b/src/AggregateFunctions/AggregateFunctionMerge.cpp
@@ -13,6 +13,9 @@ namespace ErrorCodes
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
+namespace
+{
+
 class AggregateFunctionCombinatorMerge final : public IAggregateFunctionCombinator
 {
 public:
@@ -55,6 +58,8 @@ public:
     }
 };
 
+}
+
 void registerAggregateFunctionCombinatorMerge(AggregateFunctionCombinatorFactory & factory)
 {
     factory.registerCombinator(std::make_shared<AggregateFunctionCombinatorMerge>());
diff --git a/src/AggregateFunctions/AggregateFunctionNull.cpp b/src/AggregateFunctions/AggregateFunctionNull.cpp
index c88d1e7f24c..f584ae1f34c 100644
--- a/src/AggregateFunctions/AggregateFunctionNull.cpp
+++ b/src/AggregateFunctions/AggregateFunctionNull.cpp
@@ -15,6 +15,9 @@ namespace ErrorCodes
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+namespace
+{
+
 class AggregateFunctionCombinatorNull final : public IAggregateFunctionCombinator
 {
 public:
@@ -119,6 +122,8 @@ public:
     }
 };
 
+}
+
 void registerAggregateFunctionCombinatorNull(AggregateFunctionCombinatorFactory & factory)
 {
     factory.registerCombinator(std::make_shared<AggregateFunctionCombinatorNull>());
diff --git a/src/AggregateFunctions/AggregateFunctionOrFill.cpp b/src/AggregateFunctions/AggregateFunctionOrFill.cpp
index ce8fc8d9ca5..af107e26ca9 100644
--- a/src/AggregateFunctions/AggregateFunctionOrFill.cpp
+++ b/src/AggregateFunctions/AggregateFunctionOrFill.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 template <bool UseNull>
 class AggregateFunctionCombinatorOrFill final : public IAggregateFunctionCombinator
@@ -32,6 +34,8 @@ public:
     }
 };
 
+}
+
 void registerAggregateFunctionCombinatorOrFill(AggregateFunctionCombinatorFactory & factory)
 {
     factory.registerCombinator(std::make_shared<AggregateFunctionCombinatorOrFill<false>>());
diff --git a/src/AggregateFunctions/AggregateFunctionResample.cpp b/src/AggregateFunctions/AggregateFunctionResample.cpp
index 389c9048918..b81fb442f27 100644
--- a/src/AggregateFunctions/AggregateFunctionResample.cpp
+++ b/src/AggregateFunctions/AggregateFunctionResample.cpp
@@ -13,6 +13,9 @@ namespace ErrorCodes
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
+namespace
+{
+
 class AggregateFunctionCombinatorResample final : public IAggregateFunctionCombinator
 {
 public:
@@ -93,6 +96,8 @@ public:
     }
 };
 
+}
+
 void registerAggregateFunctionCombinatorResample(AggregateFunctionCombinatorFactory & factory)
 {
     factory.registerCombinator(std::make_shared<AggregateFunctionCombinatorResample>());
diff --git a/src/AggregateFunctions/AggregateFunctionState.cpp b/src/AggregateFunctions/AggregateFunctionState.cpp
index 9d1c677c0ff..348d8ba44dd 100644
--- a/src/AggregateFunctions/AggregateFunctionState.cpp
+++ b/src/AggregateFunctions/AggregateFunctionState.cpp
@@ -13,6 +13,9 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
+namespace
+{
+
 class AggregateFunctionCombinatorState final : public IAggregateFunctionCombinator
 {
 public:
@@ -33,6 +36,8 @@ public:
     }
 };
 
+}
+
 void registerAggregateFunctionCombinatorState(AggregateFunctionCombinatorFactory & factory)
 {
     factory.registerCombinator(std::make_shared<AggregateFunctionCombinatorState>());
diff --git a/src/Functions/CRC.cpp b/src/Functions/CRC.cpp
index 96edf9a0d8e..6083e5ef16f 100644
--- a/src/Functions/CRC.cpp
+++ b/src/Functions/CRC.cpp
@@ -72,6 +72,9 @@ namespace ErrorCodes
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+namespace
+{
+
 template <class Impl>
 struct CRCFunctionWrapper
 {
@@ -127,6 +130,8 @@ using FunctionCRC32IEEE = FunctionCRC<CRC32IEEEImpl>;
 // Uses CRC-64-ECMA polynomial
 using FunctionCRC64ECMA = FunctionCRC<CRC64ECMAImpl>;
 
+}
+
 template <class T>
 void registerFunctionCRCImpl(FunctionFactory & factory)
 {
diff --git a/src/Functions/abs.cpp b/src/Functions/abs.cpp
index f0c530e0e8f..deb69d40035 100644
--- a/src/Functions/abs.cpp
+++ b/src/Functions/abs.cpp
@@ -5,6 +5,8 @@
 
 namespace DB
 {
+namespace
+{
 
 template <typename A>
 struct AbsImpl
@@ -34,6 +36,8 @@ struct AbsImpl
 struct NameAbs { static constexpr auto name = "abs"; };
 using FunctionAbs = FunctionUnaryArithmetic<AbsImpl, NameAbs, false>;
 
+}
+
 template <> struct FunctionUnaryArithmeticMonotonicity<NameAbs>
 {
     static bool has() { return true; }
diff --git a/src/Functions/acos.cpp b/src/Functions/acos.cpp
index 61e213acabf..62e68b5c17b 100644
--- a/src/Functions/acos.cpp
+++ b/src/Functions/acos.cpp
@@ -4,10 +4,14 @@
 
 namespace DB
 {
+namespace
+{
 
 struct AcosName { static constexpr auto name = "acos"; };
 using FunctionAcos = FunctionMathUnary<UnaryFunctionVectorized<AcosName, acos>>;
 
+}
+
 void registerFunctionAcos(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionAcos>(FunctionFactory::CaseInsensitive);
diff --git a/src/Functions/addressToLine.cpp b/src/Functions/addressToLine.cpp
index b5a6fcfb30e..432761e8d28 100644
--- a/src/Functions/addressToLine.cpp
+++ b/src/Functions/addressToLine.cpp
@@ -29,6 +29,9 @@ namespace ErrorCodes
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
+namespace
+{
+
 class FunctionAddressToLine : public IFunction
 {
 public:
@@ -144,6 +147,8 @@ private:
     }
 };
 
+}
+
 void registerFunctionAddressToLine(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionAddressToLine>();
diff --git a/src/Functions/addressToSymbol.cpp b/src/Functions/addressToSymbol.cpp
index 077b4f9a80b..d2df064bf35 100644
--- a/src/Functions/addressToSymbol.cpp
+++ b/src/Functions/addressToSymbol.cpp
@@ -21,6 +21,9 @@ namespace ErrorCodes
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
+namespace
+{
+
 class FunctionAddressToSymbol : public IFunction
 {
 public:
@@ -86,6 +89,8 @@ public:
     }
 };
 
+}
+
 void registerFunctionAddressToSymbol(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionAddressToSymbol>();
diff --git a/src/Functions/appendTrailingCharIfAbsent.cpp b/src/Functions/appendTrailingCharIfAbsent.cpp
index b69edb718a4..67a3cbabe6d 100644
--- a/src/Functions/appendTrailingCharIfAbsent.cpp
+++ b/src/Functions/appendTrailingCharIfAbsent.cpp
@@ -17,6 +17,8 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
+namespace
+{
 
 class FunctionAppendTrailingCharIfAbsent : public IFunction
 {
@@ -109,6 +111,8 @@ private:
     }
 };
 
+}
+
 void registerFunctionAppendTrailingCharIfAbsent(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionAppendTrailingCharIfAbsent>();
diff --git a/src/Functions/asin.cpp b/src/Functions/asin.cpp
index cccd3fc05d4..92391fdef70 100644
--- a/src/Functions/asin.cpp
+++ b/src/Functions/asin.cpp
@@ -4,10 +4,14 @@
 
 namespace DB
 {
+namespace
+{
 
 struct AsinName { static constexpr auto name = "asin"; };
 using FunctionAsin = FunctionMathUnary<UnaryFunctionVectorized<AsinName, asin>>;
 
+}
+
 void registerFunctionAsin(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionAsin>(FunctionFactory::CaseInsensitive);
diff --git a/src/Functions/assumeNotNull.cpp b/src/Functions/assumeNotNull.cpp
index e2b543d1be8..331e6a62341 100644
--- a/src/Functions/assumeNotNull.cpp
+++ b/src/Functions/assumeNotNull.cpp
@@ -7,6 +7,8 @@
 
 namespace DB
 {
+namespace
+{
 
 /// Implements the function assumeNotNull which takes 1 argument and works as follows:
 /// - if the argument is a nullable column, return its embedded column;
@@ -49,6 +51,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionAssumeNotNull(FunctionFactory & factory)
 {
diff --git a/src/Functions/atan.cpp b/src/Functions/atan.cpp
index 00e871b9a84..be0af8a9108 100644
--- a/src/Functions/atan.cpp
+++ b/src/Functions/atan.cpp
@@ -4,10 +4,14 @@
 
 namespace DB
 {
+namespace
+{
 
 struct AtanName { static constexpr auto name = "atan"; };
 using FunctionAtan = FunctionMathUnary<UnaryFunctionVectorized<AtanName, atan>>;
 
+}
+
 void registerFunctionAtan(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionAtan>(FunctionFactory::CaseInsensitive);
diff --git a/src/Functions/bar.cpp b/src/Functions/bar.cpp
index a80cddfb5e3..748c33025a4 100644
--- a/src/Functions/bar.cpp
+++ b/src/Functions/bar.cpp
@@ -19,6 +19,9 @@ namespace ErrorCodes
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+namespace
+{
+
 /** bar(x, min, max, width) - draws a strip from the number of characters proportional to (x - min) and equal to width for x == max.
   * Returns a string with nice Unicode-art bar with resolution of 1/8 part of symbol.
   */
@@ -160,6 +163,7 @@ private:
     }
 };
 
+}
 
 void registerFunctionBar(FunctionFactory & factory)
 {
diff --git a/src/Functions/bitAnd.cpp b/src/Functions/bitAnd.cpp
index c6e2b0a6c88..89c2758bc6a 100644
--- a/src/Functions/bitAnd.cpp
+++ b/src/Functions/bitAnd.cpp
@@ -9,6 +9,9 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+namespace
+{
+
 template <typename A, typename B>
 struct BitAndImpl
 {
@@ -36,6 +39,8 @@ struct BitAndImpl
 struct NameBitAnd { static constexpr auto name = "bitAnd"; };
 using FunctionBitAnd = FunctionBinaryArithmetic<BitAndImpl, NameBitAnd, true>;
 
+}
+
 void registerFunctionBitAnd(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionBitAnd>();
diff --git a/src/Functions/bitBoolMaskAnd.cpp b/src/Functions/bitBoolMaskAnd.cpp
index 44dadad64c0..dd46fa8b1b1 100644
--- a/src/Functions/bitBoolMaskAnd.cpp
+++ b/src/Functions/bitBoolMaskAnd.cpp
@@ -5,44 +5,50 @@
 
 namespace DB
 {
-    namespace ErrorCodes
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+}
+
+namespace
+{
+
+/// Working with UInt8: last bit = can be true, previous = can be false (Like src/Storages/MergeTree/BoolMask.h).
+/// This function provides "AND" operation for BoolMasks.
+/// Returns: "can be true" = A."can be true" AND B."can be true"
+///          "can be false" = A."can be false" OR B."can be false"
+template <typename A, typename B>
+struct BitBoolMaskAndImpl
+{
+    using ResultType = UInt8;
+    static const constexpr bool allow_fixed_string = false;
+
+    template <typename Result = ResultType>
+    static inline Result apply([[maybe_unused]] A left, [[maybe_unused]] B right)
     {
-        extern const int BAD_ARGUMENTS;
+        // Should be a logical error, but this function is callable from SQL.
+        // Need to investigate this.
+        if constexpr (!std::is_same_v<A, ResultType> || !std::is_same_v<B, ResultType>)
+            throw DB::Exception("It's a bug! Only UInt8 type is supported by __bitBoolMaskAnd.", ErrorCodes::BAD_ARGUMENTS);
+
+        auto left_bits = littleBits<A>(left);
+        auto right_bits = littleBits<B>(right);
+        return static_cast<ResultType>((left_bits & right_bits & 1) | ((((left_bits >> 1) | (right_bits >> 1)) & 1) << 1));
     }
 
-    /// Working with UInt8: last bit = can be true, previous = can be false (Like src/Storages/MergeTree/BoolMask.h).
-    /// This function provides "AND" operation for BoolMasks.
-    /// Returns: "can be true" = A."can be true" AND B."can be true"
-    ///          "can be false" = A."can be false" OR B."can be false"
-    template <typename A, typename B>
-    struct BitBoolMaskAndImpl
-    {
-        using ResultType = UInt8;
-        static const constexpr bool allow_fixed_string = false;
-
-        template <typename Result = ResultType>
-        static inline Result apply([[maybe_unused]] A left, [[maybe_unused]] B right)
-        {
-            // Should be a logical error, but this function is callable from SQL.
-            // Need to investigate this.
-            if constexpr (!std::is_same_v<A, ResultType> || !std::is_same_v<B, ResultType>)
-                throw DB::Exception("It's a bug! Only UInt8 type is supported by __bitBoolMaskAnd.", ErrorCodes::BAD_ARGUMENTS);
-
-            auto left_bits = littleBits<A>(left);
-            auto right_bits = littleBits<B>(right);
-            return static_cast<ResultType>((left_bits & right_bits & 1) | ((((left_bits >> 1) | (right_bits >> 1)) & 1) << 1));
-        }
-
 #if USE_EMBEDDED_COMPILER
-        static constexpr bool compilable = false;
+    static constexpr bool compilable = false;
 #endif
-    };
+};
 
-    struct NameBitBoolMaskAnd { static constexpr auto name = "__bitBoolMaskAnd"; };
-    using FunctionBitBoolMaskAnd = FunctionBinaryArithmetic<BitBoolMaskAndImpl, NameBitBoolMaskAnd>;
+struct NameBitBoolMaskAnd { static constexpr auto name = "__bitBoolMaskAnd"; };
+using FunctionBitBoolMaskAnd = FunctionBinaryArithmetic<BitBoolMaskAndImpl, NameBitBoolMaskAnd>;
+
+}
+
+void registerFunctionBitBoolMaskAnd(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionBitBoolMaskAnd>();
+}
 
-    void registerFunctionBitBoolMaskAnd(FunctionFactory & factory)
-    {
-        factory.registerFunction<FunctionBitBoolMaskAnd>();
-    }
 }
diff --git a/src/Functions/bitBoolMaskOr.cpp b/src/Functions/bitBoolMaskOr.cpp
index cfce7f27829..e86c7dcda8e 100644
--- a/src/Functions/bitBoolMaskOr.cpp
+++ b/src/Functions/bitBoolMaskOr.cpp
@@ -5,44 +5,50 @@
 
 namespace DB
 {
-    namespace ErrorCodes
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+}
+
+namespace
+{
+
+/// Working with UInt8: last bit = can be true, previous = can be false (Like src/Storages/MergeTree/BoolMask.h).
+/// This function provides "OR" operation for BoolMasks.
+/// Returns: "can be true" = A."can be true" OR B."can be true"
+///          "can be false" = A."can be false" AND B."can be false"
+template <typename A, typename B>
+struct BitBoolMaskOrImpl
+{
+    using ResultType = UInt8;
+    static const constexpr bool allow_fixed_string = false;
+
+    template <typename Result = ResultType>
+    static inline Result apply([[maybe_unused]] A left, [[maybe_unused]] B right)
     {
-        extern const int BAD_ARGUMENTS;
+        if constexpr (!std::is_same_v<A, ResultType> || !std::is_same_v<B, ResultType>)
+            // Should be a logical error, but this function is callable from SQL.
+            // Need to investigate this.
+            throw DB::Exception("It's a bug! Only UInt8 type is supported by __bitBoolMaskOr.", ErrorCodes::BAD_ARGUMENTS);
+
+        auto left_bits = littleBits<A>(left);
+        auto right_bits = littleBits<B>(right);
+        return static_cast<ResultType>(((left_bits | right_bits) & 1) | ((((left_bits >> 1) & (right_bits >> 1)) & 1) << 1));
     }
 
-    /// Working with UInt8: last bit = can be true, previous = can be false (Like src/Storages/MergeTree/BoolMask.h).
-    /// This function provides "OR" operation for BoolMasks.
-    /// Returns: "can be true" = A."can be true" OR B."can be true"
-    ///          "can be false" = A."can be false" AND B."can be false"
-    template <typename A, typename B>
-    struct BitBoolMaskOrImpl
-    {
-        using ResultType = UInt8;
-        static const constexpr bool allow_fixed_string = false;
-
-        template <typename Result = ResultType>
-        static inline Result apply([[maybe_unused]] A left, [[maybe_unused]] B right)
-        {
-            if constexpr (!std::is_same_v<A, ResultType> || !std::is_same_v<B, ResultType>)
-                // Should be a logical error, but this function is callable from SQL.
-                // Need to investigate this.
-                throw DB::Exception("It's a bug! Only UInt8 type is supported by __bitBoolMaskOr.", ErrorCodes::BAD_ARGUMENTS);
-
-            auto left_bits = littleBits<A>(left);
-            auto right_bits = littleBits<B>(right);
-            return static_cast<ResultType>(((left_bits | right_bits) & 1) | ((((left_bits >> 1) & (right_bits >> 1)) & 1) << 1));
-        }
-
 #if USE_EMBEDDED_COMPILER
-        static constexpr bool compilable = false;
+    static constexpr bool compilable = false;
 #endif
-    };
+};
 
-    struct NameBitBoolMaskOr { static constexpr auto name = "__bitBoolMaskOr"; };
-    using FunctionBitBoolMaskOr = FunctionBinaryArithmetic<BitBoolMaskOrImpl, NameBitBoolMaskOr>;
+struct NameBitBoolMaskOr { static constexpr auto name = "__bitBoolMaskOr"; };
+using FunctionBitBoolMaskOr = FunctionBinaryArithmetic<BitBoolMaskOrImpl, NameBitBoolMaskOr>;
+
+}
+
+void registerFunctionBitBoolMaskOr(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionBitBoolMaskOr>();
+}
 
-    void registerFunctionBitBoolMaskOr(FunctionFactory & factory)
-    {
-        factory.registerFunction<FunctionBitBoolMaskOr>();
-    }
 }
diff --git a/src/Functions/bitCount.cpp b/src/Functions/bitCount.cpp
index 73df2c680da..0b7af6eca7f 100644
--- a/src/Functions/bitCount.cpp
+++ b/src/Functions/bitCount.cpp
@@ -6,6 +6,9 @@
 namespace DB
 {
 
+namespace
+{
+
 template <typename A>
 struct BitCountImpl
 {
@@ -37,6 +40,8 @@ struct BitCountImpl
 struct NameBitCount { static constexpr auto name = "bitCount"; };
 using FunctionBitCount = FunctionUnaryArithmetic<BitCountImpl, NameBitCount, false /* is injective */>;
 
+}
+
 /// The function has no ranges of monotonicity.
 template <> struct FunctionUnaryArithmeticMonotonicity<NameBitCount>
 {
diff --git a/src/Functions/bitNot.cpp b/src/Functions/bitNot.cpp
index 0a08b3aaaeb..050d8dc3e3d 100644
--- a/src/Functions/bitNot.cpp
+++ b/src/Functions/bitNot.cpp
@@ -10,6 +10,9 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+namespace
+{
+
 template <typename A>
 struct BitNotImpl
 {
@@ -36,6 +39,8 @@ struct BitNotImpl
 struct NameBitNot { static constexpr auto name = "bitNot"; };
 using FunctionBitNot = FunctionUnaryArithmetic<BitNotImpl, NameBitNot, true>;
 
+}
+
 template <> struct FunctionUnaryArithmeticMonotonicity<NameBitNot>
 {
     static bool has() { return false; }
diff --git a/src/Functions/bitOr.cpp b/src/Functions/bitOr.cpp
index b8e77128216..0f339b328d8 100644
--- a/src/Functions/bitOr.cpp
+++ b/src/Functions/bitOr.cpp
@@ -8,6 +8,9 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+namespace
+{
+
 template <typename A, typename B>
 struct BitOrImpl
 {
@@ -35,6 +38,8 @@ struct BitOrImpl
 struct NameBitOr { static constexpr auto name = "bitOr"; };
 using FunctionBitOr = FunctionBinaryArithmetic<BitOrImpl, NameBitOr, true>;
 
+}
+
 void registerFunctionBitOr(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionBitOr>();
diff --git a/src/Functions/bitRotateLeft.cpp b/src/Functions/bitRotateLeft.cpp
index a6975468c1e..5d52494eb7d 100644
--- a/src/Functions/bitRotateLeft.cpp
+++ b/src/Functions/bitRotateLeft.cpp
@@ -9,6 +9,9 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+namespace
+{
+
 template <typename A, typename B>
 struct BitRotateLeftImpl
 {
@@ -42,6 +45,8 @@ struct BitRotateLeftImpl
 struct NameBitRotateLeft { static constexpr auto name = "bitRotateLeft"; };
 using FunctionBitRotateLeft = FunctionBinaryArithmetic<BitRotateLeftImpl, NameBitRotateLeft>;
 
+}
+
 void registerFunctionBitRotateLeft(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionBitRotateLeft>();
diff --git a/src/Functions/bitRotateRight.cpp b/src/Functions/bitRotateRight.cpp
index 71d7385bbdf..7cda0b4890b 100644
--- a/src/Functions/bitRotateRight.cpp
+++ b/src/Functions/bitRotateRight.cpp
@@ -9,6 +9,9 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+namespace
+{
+
 template <typename A, typename B>
 struct BitRotateRightImpl
 {
@@ -41,6 +44,8 @@ struct BitRotateRightImpl
 struct NameBitRotateRight { static constexpr auto name = "bitRotateRight"; };
 using FunctionBitRotateRight = FunctionBinaryArithmetic<BitRotateRightImpl, NameBitRotateRight>;
 
+}
+
 void registerFunctionBitRotateRight(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionBitRotateRight>();
diff --git a/src/Functions/bitShiftLeft.cpp b/src/Functions/bitShiftLeft.cpp
index d42082d7778..59d236ac6af 100644
--- a/src/Functions/bitShiftLeft.cpp
+++ b/src/Functions/bitShiftLeft.cpp
@@ -9,6 +9,9 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+namespace
+{
+
 template <typename A, typename B>
 struct BitShiftLeftImpl
 {
@@ -41,6 +44,8 @@ struct BitShiftLeftImpl
 struct NameBitShiftLeft { static constexpr auto name = "bitShiftLeft"; };
 using FunctionBitShiftLeft = FunctionBinaryArithmetic<BitShiftLeftImpl, NameBitShiftLeft>;
 
+}
+
 void registerFunctionBitShiftLeft(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionBitShiftLeft>();
diff --git a/src/Functions/bitShiftRight.cpp b/src/Functions/bitShiftRight.cpp
index 249a86d6961..fe7def0b56b 100644
--- a/src/Functions/bitShiftRight.cpp
+++ b/src/Functions/bitShiftRight.cpp
@@ -9,6 +9,9 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+namespace
+{
+
 template <typename A, typename B>
 struct BitShiftRightImpl
 {
@@ -41,6 +44,8 @@ struct BitShiftRightImpl
 struct NameBitShiftRight { static constexpr auto name = "bitShiftRight"; };
 using FunctionBitShiftRight = FunctionBinaryArithmetic<BitShiftRightImpl, NameBitShiftRight>;
 
+}
+
 void registerFunctionBitShiftRight(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionBitShiftRight>();
diff --git a/src/Functions/bitSwapLastTwo.cpp b/src/Functions/bitSwapLastTwo.cpp
index 07c4db4ebe7..4103511085d 100644
--- a/src/Functions/bitSwapLastTwo.cpp
+++ b/src/Functions/bitSwapLastTwo.cpp
@@ -4,60 +4,66 @@
 
 namespace DB
 {
-    namespace ErrorCodes
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+    extern const int BAD_ARGUMENTS;
+}
+
+namespace
+{
+
+/// Working with UInt8: last bit = can be true, previous = can be false (Like src/Storages/MergeTree/BoolMask.h).
+/// This function provides "NOT" operation for BoolMasks by swapping last two bits ("can be true" <-> "can be false").
+template <typename A>
+struct BitSwapLastTwoImpl
+{
+    using ResultType = UInt8;
+    static constexpr const bool allow_fixed_string = false;
+
+    static inline ResultType NO_SANITIZE_UNDEFINED apply([[maybe_unused]] A a)
     {
-        extern const int LOGICAL_ERROR;
-        extern const int BAD_ARGUMENTS;
+        if constexpr (!std::is_same_v<A, ResultType>)
+            // Should be a logical error, but this function is callable from SQL.
+            // Need to investigate this.
+            throw DB::Exception("It's a bug! Only UInt8 type is supported by __bitSwapLastTwo.", ErrorCodes::BAD_ARGUMENTS);
+
+        auto little_bits = littleBits<A>(a);
+        return static_cast<ResultType>(((little_bits & 1) << 1) | ((little_bits >> 1) & 1));
     }
 
-    /// Working with UInt8: last bit = can be true, previous = can be false (Like src/Storages/MergeTree/BoolMask.h).
-    /// This function provides "NOT" operation for BoolMasks by swapping last two bits ("can be true" <-> "can be false").
-    template <typename A>
-    struct BitSwapLastTwoImpl
-    {
-        using ResultType = UInt8;
-        static constexpr const bool allow_fixed_string = false;
-
-        static inline ResultType NO_SANITIZE_UNDEFINED apply([[maybe_unused]] A a)
-        {
-            if constexpr (!std::is_same_v<A, ResultType>)
-                // Should be a logical error, but this function is callable from SQL.
-                // Need to investigate this.
-                throw DB::Exception("It's a bug! Only UInt8 type is supported by __bitSwapLastTwo.", ErrorCodes::BAD_ARGUMENTS);
-
-            auto little_bits = littleBits<A>(a);
-            return static_cast<ResultType>(((little_bits & 1) << 1) | ((little_bits >> 1) & 1));
-        }
-
 #if USE_EMBEDDED_COMPILER
-    static constexpr bool compilable = true;
+static constexpr bool compilable = true;
+
+static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool)
+{
+    if (!arg->getType()->isIntegerTy())
+        throw Exception("__bitSwapLastTwo expected an integral type", ErrorCodes::LOGICAL_ERROR);
+    return b.CreateOr(
+            b.CreateShl(b.CreateAnd(arg, 1), 1),
+            b.CreateAnd(b.CreateLShr(arg, 1), 1)
+            );
+}
+#endif
+};
+
+struct NameBitSwapLastTwo { static constexpr auto name = "__bitSwapLastTwo"; };
+using FunctionBitSwapLastTwo = FunctionUnaryArithmetic<BitSwapLastTwoImpl, NameBitSwapLastTwo, true>;
+
+}
+
+template <> struct FunctionUnaryArithmeticMonotonicity<NameBitSwapLastTwo>
+{
+    static bool has() { return false; }
+    static IFunction::Monotonicity get(const Field &, const Field &)
+    {
+        return {};
+    }
+};
+
+void registerFunctionBitSwapLastTwo(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionBitSwapLastTwo>();
+}
 
-    static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool)
-    {
-        if (!arg->getType()->isIntegerTy())
-            throw Exception("__bitSwapLastTwo expected an integral type", ErrorCodes::LOGICAL_ERROR);
-        return b.CreateOr(
-                b.CreateShl(b.CreateAnd(arg, 1), 1),
-                b.CreateAnd(b.CreateLShr(arg, 1), 1)
-                );
-    }
-#endif
-    };
-
-    struct NameBitSwapLastTwo { static constexpr auto name = "__bitSwapLastTwo"; };
-    using FunctionBitSwapLastTwo = FunctionUnaryArithmetic<BitSwapLastTwoImpl, NameBitSwapLastTwo, true>;
-
-    template <> struct FunctionUnaryArithmeticMonotonicity<NameBitSwapLastTwo>
-    {
-        static bool has() { return false; }
-        static IFunction::Monotonicity get(const Field &, const Field &)
-        {
-            return {};
-        }
-    };
-
-    void registerFunctionBitSwapLastTwo(FunctionFactory & factory)
-    {
-        factory.registerFunction<FunctionBitSwapLastTwo>();
-    }
 }
diff --git a/src/Functions/bitTest.cpp b/src/Functions/bitTest.cpp
index f34e300d675..9c9f16d87c4 100644
--- a/src/Functions/bitTest.cpp
+++ b/src/Functions/bitTest.cpp
@@ -10,6 +10,9 @@ namespace ErrorCodes
     extern const int NOT_IMPLEMENTED;
 }
 
+namespace
+{
+
 template <typename A, typename B>
 struct BitTestImpl
 {
@@ -33,6 +36,8 @@ struct BitTestImpl
 struct NameBitTest { static constexpr auto name = "bitTest"; };
 using FunctionBitTest = FunctionBinaryArithmetic<BitTestImpl, NameBitTest>;
 
+}
+
 void registerFunctionBitTest(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionBitTest>();
diff --git a/src/Functions/bitTestAll.cpp b/src/Functions/bitTestAll.cpp
index a359ea0b699..901fd600106 100644
--- a/src/Functions/bitTestAll.cpp
+++ b/src/Functions/bitTestAll.cpp
@@ -3,6 +3,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct BitTestAllImpl
 {
@@ -13,6 +15,8 @@ struct BitTestAllImpl
 struct NameBitTestAll { static constexpr auto name = "bitTestAll"; };
 using FunctionBitTestAll = FunctionBitTestMany<BitTestAllImpl, NameBitTestAll>;
 
+}
+
 void registerFunctionBitTestAll(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionBitTestAll>();
diff --git a/src/Functions/bitTestAny.cpp b/src/Functions/bitTestAny.cpp
index b6601dc55fd..6874e74e2be 100644
--- a/src/Functions/bitTestAny.cpp
+++ b/src/Functions/bitTestAny.cpp
@@ -3,6 +3,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct BitTestAnyImpl
 {
@@ -13,6 +15,8 @@ struct BitTestAnyImpl
 struct NameBitTestAny { static constexpr auto name = "bitTestAny"; };
 using FunctionBitTestAny = FunctionBitTestMany<BitTestAnyImpl, NameBitTestAny>;
 
+}
+
 void registerFunctionBitTestAny(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionBitTestAny>();
diff --git a/src/Functions/bitWrapperFunc.cpp b/src/Functions/bitWrapperFunc.cpp
index 140163d0926..8395865d554 100644
--- a/src/Functions/bitWrapperFunc.cpp
+++ b/src/Functions/bitWrapperFunc.cpp
@@ -4,49 +4,53 @@
 
 namespace DB
 {
-    namespace ErrorCodes
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+}
+
+namespace
+{
+
+/// Working with UInt8: last bit = can be true, previous = can be false (Like src/Storages/MergeTree/BoolMask.h).
+/// This function wraps bool atomic functions
+/// and transforms their boolean return value to the BoolMask ("can be false" and "can be true" bits).
+template <typename A>
+struct BitWrapperFuncImpl
+{
+    using ResultType = UInt8;
+    static constexpr const bool allow_fixed_string = false;
+
+    static inline ResultType NO_SANITIZE_UNDEFINED apply(A a [[maybe_unused]])
     {
-        extern const int BAD_ARGUMENTS;
+        // Should be a logical error, but this function is callable from SQL.
+        // Need to investigate this.
+        if constexpr (!is_integer_v<A>)
+            throw DB::Exception("It's a bug! Only integer types are supported by __bitWrapperFunc.", ErrorCodes::BAD_ARGUMENTS);
+        return a == 0 ? static_cast<ResultType>(0b10) : static_cast<ResultType >(0b1);
     }
 
-    /// Working with UInt8: last bit = can be true, previous = can be false (Like src/Storages/MergeTree/BoolMask.h).
-    /// This function wraps bool atomic functions
-    /// and transforms their boolean return value to the BoolMask ("can be false" and "can be true" bits).
-    template <typename A>
-    struct BitWrapperFuncImpl
-    {
-        using ResultType = UInt8;
-        static constexpr const bool allow_fixed_string = false;
-
-        static inline ResultType NO_SANITIZE_UNDEFINED apply(A a [[maybe_unused]])
-        {
-            // Should be a logical error, but this function is callable from SQL.
-            // Need to investigate this.
-            if constexpr (!is_integer_v<A>)
-                throw DB::Exception("It's a bug! Only integer types are supported by __bitWrapperFunc.", ErrorCodes::BAD_ARGUMENTS);
-            return a == 0 ? static_cast<ResultType>(0b10) : static_cast<ResultType >(0b1);
-        }
-
 #if USE_EMBEDDED_COMPILER
-        static constexpr bool compilable = false;
+    static constexpr bool compilable = false;
 #endif
-    };
+};
 
-    struct NameBitWrapperFunc { static constexpr auto name = "__bitWrapperFunc"; };
-    using FunctionBitWrapperFunc = FunctionUnaryArithmetic<BitWrapperFuncImpl, NameBitWrapperFunc, true>;
-
-    template <> struct FunctionUnaryArithmeticMonotonicity<NameBitWrapperFunc>
-    {
-        static bool has() { return false; }
-        static IFunction::Monotonicity get(const Field &, const Field &)
-        {
-            return {};
-        }
-    };
-
-    void registerFunctionBitWrapperFunc(FunctionFactory & factory)
-    {
-        factory.registerFunction<FunctionBitWrapperFunc>();
-    }
+struct NameBitWrapperFunc { static constexpr auto name = "__bitWrapperFunc"; };
+using FunctionBitWrapperFunc = FunctionUnaryArithmetic<BitWrapperFuncImpl, NameBitWrapperFunc, true>;
 
 }
+
+template <> struct FunctionUnaryArithmeticMonotonicity<NameBitWrapperFunc>
+{
+    static bool has() { return false; }
+    static IFunction::Monotonicity get(const Field &, const Field &)
+    {
+        return {};
+    }
+};
+
+void registerFunctionBitWrapperFunc(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionBitWrapperFunc>();
+}
+}
diff --git a/src/Functions/bitXor.cpp b/src/Functions/bitXor.cpp
index 188cad84f81..3d323fde8bb 100644
--- a/src/Functions/bitXor.cpp
+++ b/src/Functions/bitXor.cpp
@@ -8,6 +8,9 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+namespace
+{
+
 template <typename A, typename B>
 struct BitXorImpl
 {
@@ -35,6 +38,8 @@ struct BitXorImpl
 struct NameBitXor { static constexpr auto name = "bitXor"; };
 using FunctionBitXor = FunctionBinaryArithmetic<BitXorImpl, NameBitXor, true>;
 
+}
+
 void registerFunctionBitXor(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionBitXor>();
diff --git a/src/Functions/blockNumber.cpp b/src/Functions/blockNumber.cpp
index e83850803f6..74d33b71fcf 100644
--- a/src/Functions/blockNumber.cpp
+++ b/src/Functions/blockNumber.cpp
@@ -7,6 +7,8 @@
 
 namespace DB
 {
+namespace
+{
 
 /** Incremental block number among calls of this function. */
 class FunctionBlockNumber : public IFunction
@@ -56,6 +58,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionBlockNumber(FunctionFactory & factory)
 {
diff --git a/src/Functions/blockSerializedSize.cpp b/src/Functions/blockSerializedSize.cpp
index 47a221dc391..b14accf19da 100644
--- a/src/Functions/blockSerializedSize.cpp
+++ b/src/Functions/blockSerializedSize.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 /// Returns size on disk for *block* (without taking into account compression).
 class FunctionBlockSerializedSize : public IFunction
@@ -60,6 +62,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionBlockSerializedSize(FunctionFactory & factory)
 {
diff --git a/src/Functions/blockSize.cpp b/src/Functions/blockSize.cpp
index 8fe304a17f9..65977bfded6 100644
--- a/src/Functions/blockSize.cpp
+++ b/src/Functions/blockSize.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 /** blockSize() - get the block size in number of rows.
   */
@@ -47,6 +49,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionBlockSize(FunctionFactory & factory)
 {
diff --git a/src/Functions/buildId.cpp b/src/Functions/buildId.cpp
index bf3640bf68f..117e1346f7b 100644
--- a/src/Functions/buildId.cpp
+++ b/src/Functions/buildId.cpp
@@ -9,6 +9,8 @@
 
 namespace DB
 {
+namespace
+{
 
 /** buildId() - returns the compiler build id of the running binary.
   */
@@ -42,6 +44,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionBuildId(FunctionFactory & factory)
 {
diff --git a/src/Functions/caseWithExpression.cpp b/src/Functions/caseWithExpression.cpp
index eae1d7271c2..699db5cb090 100644
--- a/src/Functions/caseWithExpression.cpp
+++ b/src/Functions/caseWithExpression.cpp
@@ -12,6 +12,9 @@ namespace ErrorCodes
     extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
 }
 
+namespace
+{
+
 /// Implements the CASE construction when it is
 /// provided an expression. Users should not call this function.
 class FunctionCaseWithExpression : public IFunction
@@ -112,6 +115,8 @@ private:
     const Context & context;
 };
 
+}
+
 void registerFunctionCaseWithExpression(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionCaseWithExpression>();
diff --git a/src/Functions/cbrt.cpp b/src/Functions/cbrt.cpp
index f12ae0a6504..94c8627c320 100644
--- a/src/Functions/cbrt.cpp
+++ b/src/Functions/cbrt.cpp
@@ -3,10 +3,14 @@
 
 namespace DB
 {
+namespace
+{
 
 struct CbrtName { static constexpr auto name = "cbrt"; };
 using FunctionCbrt = FunctionMathUnary<UnaryFunctionVectorized<CbrtName, cbrt>>;
 
+}
+
 void registerFunctionCbrt(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionCbrt>();
diff --git a/src/Functions/coalesce.cpp b/src/Functions/coalesce.cpp
index 2f755b8fd8d..05f70f7783b 100644
--- a/src/Functions/coalesce.cpp
+++ b/src/Functions/coalesce.cpp
@@ -12,6 +12,8 @@
 
 namespace DB
 {
+namespace
+{
 
 /// Implements the function coalesce which takes a set of arguments and
 /// returns the value of the leftmost non-null argument. If no such value is
@@ -175,6 +177,7 @@ private:
     const Context & context;
 };
 
+}
 
 void registerFunctionCoalesce(FunctionFactory & factory)
 {
diff --git a/src/Functions/concat.cpp b/src/Functions/concat.cpp
index 12ab2d208a7..3c5c7d29e3f 100644
--- a/src/Functions/concat.cpp
+++ b/src/Functions/concat.cpp
@@ -25,6 +25,8 @@ namespace ErrorCodes
 
 using namespace GatherUtils;
 
+namespace
+{
 
 template <typename Name, bool is_injective>
 class ConcatImpl : public IFunction
@@ -225,6 +227,7 @@ private:
     const Context & context;
 };
 
+}
 
 void registerFunctionsConcat(FunctionFactory & factory)
 {
diff --git a/src/Functions/convertCharset.cpp b/src/Functions/convertCharset.cpp
index 0bc2594bf37..9006dfd206a 100644
--- a/src/Functions/convertCharset.cpp
+++ b/src/Functions/convertCharset.cpp
@@ -30,6 +30,8 @@ namespace ErrorCodes
     extern const int ILLEGAL_COLUMN;
 }
 
+namespace
+{
 
 /** convertCharset(s, from, to)
   *
@@ -210,6 +212,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionConvertCharset(FunctionFactory & factory)
 {
diff --git a/src/Functions/cos.cpp b/src/Functions/cos.cpp
index a047ea2c252..e18524dd56b 100644
--- a/src/Functions/cos.cpp
+++ b/src/Functions/cos.cpp
@@ -3,10 +3,14 @@
 
 namespace DB
 {
+namespace
+{
 
 struct CosName { static constexpr auto name = "cos"; };
 using FunctionCos = FunctionMathUnary<UnaryFunctionVectorized<CosName, cos>>;
 
+}
+
 void registerFunctionCos(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionCos>(FunctionFactory::CaseInsensitive);
diff --git a/src/Functions/countDigits.cpp b/src/Functions/countDigits.cpp
index 9f60bdf9689..5130f0c463e 100644
--- a/src/Functions/countDigits.cpp
+++ b/src/Functions/countDigits.cpp
@@ -16,6 +16,9 @@ namespace ErrorCodes
     extern const int ILLEGAL_COLUMN;
 }
 
+namespace
+{
+
 /// Returns number of decimal digits you need to represent the value.
 /// For Decimal values takes in account their scales: calculates result over underlying int type which is (value * scale).
 /// countDigits(42) = 2, countDigits(42.000) = 5, countDigits(0.04200) = 4.
@@ -136,6 +139,7 @@ private:
     }
 };
 
+}
 
 void registerFunctionCountDigits(FunctionFactory & factory)
 {
diff --git a/src/Functions/currentDatabase.cpp b/src/Functions/currentDatabase.cpp
index b6933051ac0..5be25c63ae6 100644
--- a/src/Functions/currentDatabase.cpp
+++ b/src/Functions/currentDatabase.cpp
@@ -7,6 +7,8 @@
 
 namespace DB
 {
+namespace
+{
 
 class FunctionCurrentDatabase : public IFunction
 {
@@ -45,6 +47,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionCurrentDatabase(FunctionFactory & factory)
 {
diff --git a/src/Functions/currentUser.cpp b/src/Functions/currentUser.cpp
index f51b92e68b8..ce4cd4a3f83 100644
--- a/src/Functions/currentUser.cpp
+++ b/src/Functions/currentUser.cpp
@@ -7,6 +7,8 @@
 
 namespace DB
 {
+namespace
+{
 
 class FunctionCurrentUser : public IFunction
 {
@@ -45,6 +47,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionCurrentUser(FunctionFactory & factory)
 {
diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp
index a0263d80a5c..fea3b2c46ac 100644
--- a/src/Functions/dateDiff.cpp
+++ b/src/Functions/dateDiff.cpp
@@ -28,6 +28,9 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
+namespace
+{
+
 /** dateDiff('unit', t1, t2, [timezone])
   * t1 and t2 can be Date or DateTime
   *
@@ -212,6 +215,8 @@ private:
     }
 };
 
+}
+
 void registerFunctionDateDiff(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionDateDiff>(FunctionFactory::CaseInsensitive);
diff --git a/src/Functions/date_trunc.cpp b/src/Functions/date_trunc.cpp
index af92b59f941..f14241e1b86 100644
--- a/src/Functions/date_trunc.cpp
+++ b/src/Functions/date_trunc.cpp
@@ -17,6 +17,8 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
+namespace
+{
 
 class FunctionDateTrunc : public IFunction
 {
@@ -159,6 +161,7 @@ private:
     mutable IntervalKind::Kind datepart_kind = IntervalKind::Kind::Second;
 };
 
+}
 
 void registerFunctionDateTrunc(FunctionFactory & factory)
 {
diff --git a/src/Functions/defaultValueOfArgumentType.cpp b/src/Functions/defaultValueOfArgumentType.cpp
index f9c906361f9..7bcb92b67f4 100644
--- a/src/Functions/defaultValueOfArgumentType.cpp
+++ b/src/Functions/defaultValueOfArgumentType.cpp
@@ -5,6 +5,8 @@
 
 namespace DB
 {
+namespace
+{
 
 /// Returns global default value for type of passed argument (example: 0 for numeric types, '' for String).
 class FunctionDefaultValueOfArgumentType : public IFunction
@@ -47,6 +49,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionDefaultValueOfArgumentType(FunctionFactory & factory)
 {
diff --git a/src/Functions/defaultValueOfTypeName.cpp b/src/Functions/defaultValueOfTypeName.cpp
index 40527b21935..8baea9b4501 100644
--- a/src/Functions/defaultValueOfTypeName.cpp
+++ b/src/Functions/defaultValueOfTypeName.cpp
@@ -13,6 +13,9 @@ namespace ErrorCodes
     extern const int ILLEGAL_COLUMN;
 }
 
+namespace
+{
+
 /// Returns global default value for type name (example: 0 for numeric types, '' for String).
 class FunctionDefaultValueOfTypeName : public IFunction
 {
@@ -53,6 +56,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionDefaultValueOfTypeName(FunctionFactory & factory)
 {
diff --git a/src/Functions/demange.cpp b/src/Functions/demange.cpp
index db525c4d0d1..24d792b6f4d 100644
--- a/src/Functions/demange.cpp
+++ b/src/Functions/demange.cpp
@@ -18,6 +18,9 @@ namespace ErrorCodes
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
+namespace
+{
+
 class FunctionDemangle : public IFunction
 {
 public:
@@ -86,6 +89,8 @@ public:
     }
 };
 
+}
+
 void registerFunctionDemangle(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionDemangle>();
diff --git a/src/Functions/divide.cpp b/src/Functions/divide.cpp
index cfc535320ed..178dd627b01 100644
--- a/src/Functions/divide.cpp
+++ b/src/Functions/divide.cpp
@@ -9,6 +9,9 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+namespace
+{
+
 template <typename A, typename B>
 struct DivideFloatingImpl
 {
@@ -40,6 +43,8 @@ struct DivideFloatingImpl
 struct NameDivide { static constexpr auto name = "divide"; };
 using FunctionDivide = FunctionBinaryArithmetic<DivideFloatingImpl, NameDivide>;
 
+}
+
 void registerFunctionDivide(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionDivide>();
diff --git a/src/Functions/dumpColumnStructure.cpp b/src/Functions/dumpColumnStructure.cpp
index 7f3d476ba4e..c7f54a72190 100644
--- a/src/Functions/dumpColumnStructure.cpp
+++ b/src/Functions/dumpColumnStructure.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 /// Dump the structure of type and column.
 class FunctionDumpColumnStructure : public IFunction
@@ -46,6 +48,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionDumpColumnStructure(FunctionFactory & factory)
 {
diff --git a/src/Functions/e.cpp b/src/Functions/e.cpp
index cb4cd66b44c..c43bb7d572a 100644
--- a/src/Functions/e.cpp
+++ b/src/Functions/e.cpp
@@ -3,6 +3,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct EImpl
 {
@@ -12,6 +14,8 @@ struct EImpl
 
 using FunctionE = FunctionMathConstFloat64<EImpl>;
 
+}
+
 void registerFunctionE(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionE>();
diff --git a/src/Functions/empty.cpp b/src/Functions/empty.cpp
index dfc13253b48..552fce85de0 100644
--- a/src/Functions/empty.cpp
+++ b/src/Functions/empty.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameEmpty
 {
@@ -13,6 +15,8 @@ struct NameEmpty
 };
 using FunctionEmpty = FunctionStringOrArrayToT<EmptyImpl<false>, NameEmpty, UInt8>;
 
+}
+
 void registerFunctionEmpty(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionEmpty>();
diff --git a/src/Functions/erf.cpp b/src/Functions/erf.cpp
index 8ce6732213f..9c7ca637736 100644
--- a/src/Functions/erf.cpp
+++ b/src/Functions/erf.cpp
@@ -3,10 +3,14 @@
 
 namespace DB
 {
+namespace
+{
 
 struct ErfName { static constexpr auto name = "erf"; };
 using FunctionErf = FunctionMathUnary<UnaryFunctionPlain<ErfName, std::erf>>;
 
+}
+
 void registerFunctionErf(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionErf>();
diff --git a/src/Functions/erfc.cpp b/src/Functions/erfc.cpp
index cd7a36b6865..8e0b462ff8e 100644
--- a/src/Functions/erfc.cpp
+++ b/src/Functions/erfc.cpp
@@ -3,10 +3,14 @@
 
 namespace DB
 {
+namespace
+{
 
 struct ErfcName { static constexpr auto name = "erfc"; };
 using FunctionErfc = FunctionMathUnary<UnaryFunctionPlain<ErfcName, std::erfc>>;
 
+}
+
 void registerFunctionErfc(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionErfc>();
diff --git a/src/Functions/evalMLMethod.cpp b/src/Functions/evalMLMethod.cpp
index f4c8ecf1c2b..b0912b8aac6 100644
--- a/src/Functions/evalMLMethod.cpp
+++ b/src/Functions/evalMLMethod.cpp
@@ -11,14 +11,15 @@
 
 namespace DB
 {
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+    extern const int ILLEGAL_COLUMN;
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+}
 
-    namespace ErrorCodes
-    {
-        extern const int BAD_ARGUMENTS;
-        extern const int ILLEGAL_COLUMN;
-        extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-    }
-
+namespace
+{
 
 /** finalizeAggregation(agg_state) - get the result from the aggregation state.
 * Takes state of aggregate function. Returns result of aggregation (finalized state).
@@ -83,6 +84,8 @@ public:
     const Context & context;
 };
 
+}
+
 void registerFunctionEvalMLMethod(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionEvalMLMethod>();
diff --git a/src/Functions/exp.cpp b/src/Functions/exp.cpp
index 550b3b9d3ab..c6eb3335f25 100644
--- a/src/Functions/exp.cpp
+++ b/src/Functions/exp.cpp
@@ -3,6 +3,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct ExpName { static constexpr auto name = "exp"; };
 
@@ -30,6 +32,8 @@ using FunctionExp = FunctionMathUnary<Impl>;
 using FunctionExp = FunctionMathUnary<UnaryFunctionVectorized<ExpName, exp>>;
 #endif
 
+}
+
 void registerFunctionExp(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionExp>(FunctionFactory::CaseInsensitive);
diff --git a/src/Functions/exp10.cpp b/src/Functions/exp10.cpp
index 230e2282ca6..4be7e15562e 100644
--- a/src/Functions/exp10.cpp
+++ b/src/Functions/exp10.cpp
@@ -4,11 +4,14 @@
 
 namespace DB
 {
+namespace
+{
 
 struct Exp10Name { static constexpr auto name = "exp10"; };
-
 using FunctionExp10 = FunctionMathUnary<UnaryFunctionVectorized<Exp10Name, preciseExp10>>;
 
+}
+
 void registerFunctionExp10(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionExp10>();
diff --git a/src/Functions/exp2.cpp b/src/Functions/exp2.cpp
index e33d07b489f..8825b849346 100644
--- a/src/Functions/exp2.cpp
+++ b/src/Functions/exp2.cpp
@@ -3,10 +3,14 @@
 
 namespace DB
 {
+namespace
+{
 
 struct Exp2Name { static constexpr auto name = "exp2"; };
 using FunctionExp2 = FunctionMathUnary<UnaryFunctionVectorized<Exp2Name, exp2>>;
 
+}
+
 void registerFunctionExp2(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionExp2>();
diff --git a/src/Functions/extract.cpp b/src/Functions/extract.cpp
index 695ba037a02..0296602d205 100644
--- a/src/Functions/extract.cpp
+++ b/src/Functions/extract.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct ExtractImpl
 {
@@ -61,6 +63,8 @@ struct NameExtract
 
 using FunctionExtract = FunctionsStringSearchToString<ExtractImpl, NameExtract>;
 
+}
+
 void registerFunctionExtract(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionExtract>();
diff --git a/src/Functions/extractGroups.cpp b/src/Functions/extractGroups.cpp
index 2146f8d72b9..61055b1652d 100644
--- a/src/Functions/extractGroups.cpp
+++ b/src/Functions/extractGroups.cpp
@@ -13,12 +13,13 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int BAD_ARGUMENTS;
 }
 
+namespace
+{
 
 /** Match all groups of given input string with given re, return array of arrays of matches.
  *
@@ -103,6 +104,8 @@ public:
     }
 };
 
+}
+
 void registerFunctionExtractGroups(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionExtractGroups>();
diff --git a/src/Functions/filesystem.cpp b/src/Functions/filesystem.cpp
index 25f4ada78fc..2f95f9b6e6d 100644
--- a/src/Functions/filesystem.cpp
+++ b/src/Functions/filesystem.cpp
@@ -7,6 +7,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct FilesystemAvailable
 {
@@ -57,6 +59,7 @@ private:
     std::filesystem::space_info spaceinfo;
 };
 
+}
 
 void registerFunctionFilesystem(FunctionFactory & factory)
 {
diff --git a/src/Functions/finalizeAggregation.cpp b/src/Functions/finalizeAggregation.cpp
index 425b4e2079b..51afb4729dc 100644
--- a/src/Functions/finalizeAggregation.cpp
+++ b/src/Functions/finalizeAggregation.cpp
@@ -8,13 +8,14 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_COLUMN;
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+namespace
+{
 
 /** finalizeAggregation(agg_state) - get the result from the aggregation state.
   * Takes state of aggregate function. Returns result of aggregation (finalized state).
@@ -73,6 +74,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionFinalizeAggregation(FunctionFactory & factory)
 {
diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp
index c1140a21a9f..4a7d0b18fba 100644
--- a/src/Functions/formatDateTime.cpp
+++ b/src/Functions/formatDateTime.cpp
@@ -23,7 +23,6 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
@@ -35,7 +34,7 @@ namespace ErrorCodes
 
 namespace
 {
-// in private namespace to avoid GCC 9 error: "explicit specialization in non-namespace scope"
+
 template <typename DataType> struct ActionValueTypeMap {};
 template <> struct ActionValueTypeMap<DataTypeInt8>       { using ActionValueType = UInt32; };
 template <> struct ActionValueTypeMap<DataTypeUInt8>      { using ActionValueType = UInt32; };
@@ -50,7 +49,7 @@ template <> struct ActionValueTypeMap<DataTypeDateTime>   { using ActionValueTyp
 // TODO(vnemkov): once there is support for Int64 in LUT, make that Int64.
 // TODO(vnemkov): to add sub-second format instruction, make that DateTime64 and do some math in Action<T>.
 template <> struct ActionValueTypeMap<DataTypeDateTime64> { using ActionValueType = UInt32; };
-}
+
 
 /** formatDateTime(time, 'pattern')
   * Performs formatting of time, according to provided pattern.
@@ -714,6 +713,8 @@ struct NameFromUnixTime
 using FunctionFormatDateTime = FunctionFormatDateTimeImpl<NameFormatDateTime, false>;
 using FunctionFROM_UNIXTIME = FunctionFormatDateTimeImpl<NameFromUnixTime, true>;
 
+}
+
 void registerFunctionFormatDateTime(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionFormatDateTime>();
diff --git a/src/Functions/formatRow.cpp b/src/Functions/formatRow.cpp
index 9bd60f2fdc0..a8fce0144e2 100644
--- a/src/Functions/formatRow.cpp
+++ b/src/Functions/formatRow.cpp
@@ -21,6 +21,8 @@ namespace ErrorCodes
     extern const int UNKNOWN_FORMAT;
 }
 
+namespace
+{
 
 /** formatRow(<format>, x, y, ...) is a function that allows you to use RowOutputFormat over
   * several columns to generate a string per row, such as CSV, TSV, JSONEachRow, etc.
@@ -112,6 +114,8 @@ private:
     const Context & context;
 };
 
+}
+
 void registerFunctionFormatRow(FunctionFactory & factory)
 {
     factory.registerFunction<FormatRowOverloadResolver<true>>();
diff --git a/src/Functions/formatString.cpp b/src/Functions/formatString.cpp
index 4c0ca01d207..cd727adcabc 100644
--- a/src/Functions/formatString.cpp
+++ b/src/Functions/formatString.cpp
@@ -22,6 +22,9 @@ namespace ErrorCodes
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
+namespace
+{
+
 template <typename Name>
 class FormatFunction : public IFunction
 {
@@ -130,6 +133,8 @@ struct NameFormat
 };
 using FunctionFormat = FormatFunction<NameFormat>;
 
+}
+
 void registerFunctionFormat(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionFormat>();
diff --git a/src/Functions/fuzzBits.cpp b/src/Functions/fuzzBits.cpp
index c5ef448e7f0..b379db3e8db 100644
--- a/src/Functions/fuzzBits.cpp
+++ b/src/Functions/fuzzBits.cpp
@@ -46,7 +46,6 @@ namespace
             ptr_out[i] = ptr_in[i] ^ mask;
         }
     }
-}
 
 
 class FunctionFuzzBits : public IFunction
@@ -143,6 +142,8 @@ public:
     }
 };
 
+}
+
 void registerFunctionFuzzBits(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionFuzzBits>();
diff --git a/src/Functions/gcd.cpp b/src/Functions/gcd.cpp
index b5d1ed6e92c..244b25b194d 100644
--- a/src/Functions/gcd.cpp
+++ b/src/Functions/gcd.cpp
@@ -5,12 +5,14 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int NOT_IMPLEMENTED;
 }
 
+namespace
+{
+
 template <typename A, typename B>
 struct GCDImpl
 {
@@ -40,6 +42,8 @@ struct GCDImpl
 struct NameGCD { static constexpr auto name = "gcd"; };
 using FunctionGCD = FunctionBinaryArithmetic<GCDImpl, NameGCD, false>;
 
+}
+
 void registerFunctionGCD(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionGCD>();
diff --git a/src/Functions/geoToH3.cpp b/src/Functions/geoToH3.cpp
index 924cdf68cb9..8bcec7b73c5 100644
--- a/src/Functions/geoToH3.cpp
+++ b/src/Functions/geoToH3.cpp
@@ -17,6 +17,9 @@ namespace ErrorCodes
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+namespace
+{
+
 /// Implements the function geoToH3 which takes 3 arguments (latitude, longitude and h3 resolution)
 /// and returns h3 index of this point
 class FunctionGeoToH3 : public IFunction
@@ -83,6 +86,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionGeoToH3(FunctionFactory & factory)
 {
diff --git a/src/Functions/geohashDecode.cpp b/src/Functions/geohashDecode.cpp
index f8e57d0feb1..bdfab59b91e 100644
--- a/src/Functions/geohashDecode.cpp
+++ b/src/Functions/geohashDecode.cpp
@@ -14,12 +14,13 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_COLUMN;
 }
 
+namespace
+{
 
 // geohashDecode(string) => (lon float64, lat float64)
 class FunctionGeohashDecode : public IFunction
@@ -89,6 +90,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionGeohashDecode(FunctionFactory & factory)
 {
diff --git a/src/Functions/geohashEncode.cpp b/src/Functions/geohashEncode.cpp
index 99e7dd05e37..30448a5430a 100644
--- a/src/Functions/geohashEncode.cpp
+++ b/src/Functions/geohashEncode.cpp
@@ -20,6 +20,9 @@ namespace ErrorCodes
     extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION;
 }
 
+namespace
+{
+
 // geohashEncode(lon float32/64, lat float32/64, length UInt8) => string
 class FunctionGeohashEncode : public IFunction
 {
@@ -127,6 +130,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionGeohashEncode(FunctionFactory & factory)
 {
diff --git a/src/Functions/geohashesInBox.cpp b/src/Functions/geohashesInBox.cpp
index 7fc0b00fb57..b70f0cf02b3 100644
--- a/src/Functions/geohashesInBox.cpp
+++ b/src/Functions/geohashesInBox.cpp
@@ -22,6 +22,9 @@ extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 extern const int TOO_LARGE_ARRAY_SIZE;
 }
 
+namespace
+{
+
 class FunctionGeohashesInBox : public IFunction
 {
 public:
@@ -172,6 +175,8 @@ public:
     }
 };
 
+}
+
 void registerFunctionGeohashesInBox(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionGeohashesInBox>();
diff --git a/src/Functions/getMacro.cpp b/src/Functions/getMacro.cpp
index 06de09ea17c..e900678ae20 100644
--- a/src/Functions/getMacro.cpp
+++ b/src/Functions/getMacro.cpp
@@ -10,13 +10,15 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
     extern const int ILLEGAL_COLUMN;
 }
 
+namespace
+{
+
 /** Get the value of macro from configuration file.
   * For example, it may be used as a sophisticated replacement for the function 'hostName' if servers have complicated hostnames
   *  but you still need to distinguish them by some convenient names.
@@ -75,6 +77,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionGetMacro(FunctionFactory & factory)
 {
diff --git a/src/Functions/getScalar.cpp b/src/Functions/getScalar.cpp
index a9cf538000d..a989daf83fb 100644
--- a/src/Functions/getScalar.cpp
+++ b/src/Functions/getScalar.cpp
@@ -10,12 +10,14 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+namespace
+{
+
 /** Get scalar value of sub queries from query context via IAST::Hash.
   */
 class FunctionGetScalar : public IFunction
@@ -59,6 +61,7 @@ private:
     const Context & context;
 };
 
+}
 
 void registerFunctionGetScalar(FunctionFactory & factory)
 {
diff --git a/src/Functions/getSetting.cpp b/src/Functions/getSetting.cpp
index 7421aca81f3..c883931fbe0 100644
--- a/src/Functions/getSetting.cpp
+++ b/src/Functions/getSetting.cpp
@@ -9,13 +9,15 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
     extern const int ILLEGAL_COLUMN;
 }
 
+namespace
+{
+
 /// Get the value of a setting.
 class FunctionGetSetting : public IFunction
 {
@@ -58,6 +60,7 @@ private:
     const Context & context;
 };
 
+}
 
 void registerFunctionGetSetting(FunctionFactory & factory)
 {
diff --git a/src/Functions/getSizeOfEnumType.cpp b/src/Functions/getSizeOfEnumType.cpp
index 6db9a6c6837..54f8b12e819 100644
--- a/src/Functions/getSizeOfEnumType.cpp
+++ b/src/Functions/getSizeOfEnumType.cpp
@@ -7,12 +7,13 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+namespace
+{
 
 /// Returns number of fields in Enum data type of passed value.
 class FunctionGetSizeOfEnumType : public IFunction
@@ -64,6 +65,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionGetSizeOfEnumType(FunctionFactory & factory)
 {
diff --git a/src/Functions/globalVariable.cpp b/src/Functions/globalVariable.cpp
index 381651c30b9..4652ec39d23 100644
--- a/src/Functions/globalVariable.cpp
+++ b/src/Functions/globalVariable.cpp
@@ -13,12 +13,13 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int BAD_ARGUMENTS;
 }
 
+namespace
+{
 
 /** globalVariable('name') - takes constant string argument and returns the value of global variable with that name.
   * It is intended for compatibility with MySQL.
@@ -81,6 +82,7 @@ private:
         {"max_allowed_packet", {std::make_shared<DataTypeInt32>(), 67108864}}, {"version", {std::make_shared<DataTypeString>(), "5.7.30"}}};
 };
 
+}
 
 void registerFunctionGlobalVariable(FunctionFactory & factory)
 {
diff --git a/src/Functions/greatest.cpp b/src/Functions/greatest.cpp
index 39021935111..da1a372b0b2 100644
--- a/src/Functions/greatest.cpp
+++ b/src/Functions/greatest.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 template <typename A, typename B>
 struct GreatestBaseImpl
@@ -58,6 +60,8 @@ using GreatestImpl = std::conditional_t<!NumberTraits::LeastGreatestSpecialCase<
 struct NameGreatest { static constexpr auto name = "greatest"; };
 using FunctionGreatest = FunctionBinaryArithmetic<GreatestImpl, NameGreatest>;
 
+}
+
 void registerFunctionGreatest(FunctionFactory & factory)
 {
     factory.registerFunction<LeastGreatestOverloadResolver<LeastGreatest::Greatest, FunctionGreatest>>(FunctionFactory::CaseInsensitive);
diff --git a/src/Functions/h3EdgeAngle.cpp b/src/Functions/h3EdgeAngle.cpp
index 4472c698d21..f1fbd1b9d72 100644
--- a/src/Functions/h3EdgeAngle.cpp
+++ b/src/Functions/h3EdgeAngle.cpp
@@ -18,6 +18,9 @@ namespace ErrorCodes
     extern const int ARGUMENT_OUT_OF_BOUND;
 }
 
+namespace
+{
+
 class FunctionH3EdgeAngle : public IFunction
 {
 public:
@@ -66,6 +69,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionH3EdgeAngle(FunctionFactory & factory)
 {
diff --git a/src/Functions/h3EdgeLengthM.cpp b/src/Functions/h3EdgeLengthM.cpp
index cba0ed831c7..d4c9916e4b5 100644
--- a/src/Functions/h3EdgeLengthM.cpp
+++ b/src/Functions/h3EdgeLengthM.cpp
@@ -12,13 +12,15 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
     extern const int ARGUMENT_OUT_OF_BOUND;
 }
 
+namespace
+{
+
 // Average metric edge length of H3 hexagon. The edge length `e` for given resolution `res` can
 // be used for converting metric search radius `radius` to hexagon search ring size `k` that is
 // used by `H3kRing` function. For small enough search area simple flat approximation can be used,
@@ -71,6 +73,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionH3EdgeLengthM(FunctionFactory & factory)
 {
diff --git a/src/Functions/h3GetBaseCell.cpp b/src/Functions/h3GetBaseCell.cpp
index 15cd5d21c50..d8d3a85504b 100644
--- a/src/Functions/h3GetBaseCell.cpp
+++ b/src/Functions/h3GetBaseCell.cpp
@@ -14,6 +14,10 @@ namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
+
+namespace
+{
+
 class FunctionH3GetBaseCell : public IFunction
 {
 public:
@@ -58,6 +62,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionH3GetBaseCell(FunctionFactory & factory)
 {
diff --git a/src/Functions/h3GetResolution.cpp b/src/Functions/h3GetResolution.cpp
index 52d2d987b5e..7692b3d0a98 100644
--- a/src/Functions/h3GetResolution.cpp
+++ b/src/Functions/h3GetResolution.cpp
@@ -14,6 +14,10 @@ namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
+
+namespace
+{
+
 class FunctionH3GetResolution : public IFunction
 {
 public:
@@ -58,6 +62,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionH3GetResolution(FunctionFactory & factory)
 {
diff --git a/src/Functions/h3HexAreaM2.cpp b/src/Functions/h3HexAreaM2.cpp
index a2b76560a1d..7b12cc0201b 100644
--- a/src/Functions/h3HexAreaM2.cpp
+++ b/src/Functions/h3HexAreaM2.cpp
@@ -12,13 +12,15 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
     extern const int ARGUMENT_OUT_OF_BOUND;
 }
 
+namespace
+{
+
 class FunctionH3HexAreaM2 : public IFunction
 {
 public:
@@ -66,6 +68,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionH3HexAreaM2(FunctionFactory & factory)
 {
diff --git a/src/Functions/h3IndexesAreNeighbors.cpp b/src/Functions/h3IndexesAreNeighbors.cpp
index 2022ebd63d3..e1f69e30ecf 100644
--- a/src/Functions/h3IndexesAreNeighbors.cpp
+++ b/src/Functions/h3IndexesAreNeighbors.cpp
@@ -14,6 +14,10 @@ namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
+
+namespace
+{
+
 class FunctionH3IndexesAreNeighbors : public IFunction
 {
 public:
@@ -66,6 +70,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionH3IndexesAreNeighbors(FunctionFactory & factory)
 {
diff --git a/src/Functions/h3IsValid.cpp b/src/Functions/h3IsValid.cpp
index a338ae0f131..60719718d75 100644
--- a/src/Functions/h3IsValid.cpp
+++ b/src/Functions/h3IsValid.cpp
@@ -14,6 +14,10 @@ namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
+
+namespace
+{
+
 class FunctionH3IsValid : public IFunction
 {
 public:
@@ -58,6 +62,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionH3IsValid(FunctionFactory & factory)
 {
diff --git a/src/Functions/h3ToChildren.cpp b/src/Functions/h3ToChildren.cpp
index d9e402231f7..70add2c0f4b 100644
--- a/src/Functions/h3ToChildren.cpp
+++ b/src/Functions/h3ToChildren.cpp
@@ -17,7 +17,6 @@ static constexpr size_t MAX_ARRAY_SIZE = 1 << 30;
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
@@ -25,6 +24,9 @@ namespace ErrorCodes
     extern const int TOO_LARGE_ARRAY_SIZE;
 }
 
+namespace
+{
+
 class FunctionH3ToChildren : public IFunction
 {
 public:
@@ -101,6 +103,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionH3ToChildren(FunctionFactory & factory)
 {
diff --git a/src/Functions/h3ToParent.cpp b/src/Functions/h3ToParent.cpp
index 2f6a9f3264d..f5b4e106cba 100644
--- a/src/Functions/h3ToParent.cpp
+++ b/src/Functions/h3ToParent.cpp
@@ -12,13 +12,15 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
     extern const int ARGUMENT_OUT_OF_BOUND;
 }
 
+namespace
+{
+
 class FunctionH3ToParent : public IFunction
 {
 public:
@@ -75,6 +77,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionH3ToParent(FunctionFactory & factory)
 {
diff --git a/src/Functions/h3ToString.cpp b/src/Functions/h3ToString.cpp
index 31adbe56eca..5355adaff2a 100644
--- a/src/Functions/h3ToString.cpp
+++ b/src/Functions/h3ToString.cpp
@@ -14,6 +14,10 @@ namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
+
+namespace
+{
+
 class FunctionH3ToString : public IFunction
 {
 public:
@@ -74,6 +78,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionH3ToString(FunctionFactory & factory)
 {
diff --git a/src/Functions/h3kRing.cpp b/src/Functions/h3kRing.cpp
index 9702edf7079..a1ea1e217f8 100644
--- a/src/Functions/h3kRing.cpp
+++ b/src/Functions/h3kRing.cpp
@@ -21,6 +21,9 @@ namespace ErrorCodes
     extern const int PARAMETER_OUT_OF_BOUND;
 }
 
+namespace
+{
+
 class FunctionH3KRing : public IFunction
 {
 public:
@@ -97,6 +100,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionH3KRing(FunctionFactory & factory)
 {
diff --git a/src/Functions/hasColumnInTable.cpp b/src/Functions/hasColumnInTable.cpp
index 258cbbac006..c69d1025740 100644
--- a/src/Functions/hasColumnInTable.cpp
+++ b/src/Functions/hasColumnInTable.cpp
@@ -12,7 +12,6 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
@@ -20,6 +19,8 @@ namespace ErrorCodes
     extern const int UNKNOWN_TABLE;
 }
 
+namespace
+{
 
 /** Usage:
  *  hasColumnInTable(['hostname'[, 'username'[, 'password']],] 'database', 'table', 'column')
@@ -140,6 +141,7 @@ void FunctionHasColumnInTable::executeImpl(Block & block, const ColumnNumbers &
     block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, Field(has_column));
 }
 
+}
 
 void registerFunctionHasColumnInTable(FunctionFactory & factory)
 {
diff --git a/src/Functions/hasThreadFuzzer.cpp b/src/Functions/hasThreadFuzzer.cpp
index 0b0db50a37c..f16a4f34de3 100644
--- a/src/Functions/hasThreadFuzzer.cpp
+++ b/src/Functions/hasThreadFuzzer.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 /** Returns whether Thread Fuzzer is effective.
   * It can be used in tests to prevent too long runs.
@@ -40,6 +42,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionHasThreadFuzzer(FunctionFactory & factory)
 {
diff --git a/src/Functions/hasToken.cpp b/src/Functions/hasToken.cpp
index ee04484ad54..f20edffbdd8 100644
--- a/src/Functions/hasToken.cpp
+++ b/src/Functions/hasToken.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameHasToken
 {
@@ -14,6 +16,8 @@ struct NameHasToken
 
 using FunctionHasToken = FunctionsStringSearch<HasTokenImpl<VolnitskyCaseSensitiveToken, false>, NameHasToken>;
 
+}
+
 void registerFunctionHasToken(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionHasToken>();
diff --git a/src/Functions/hasTokenCaseInsensitive.cpp b/src/Functions/hasTokenCaseInsensitive.cpp
index c58df05d239..28f5b9e80c1 100644
--- a/src/Functions/hasTokenCaseInsensitive.cpp
+++ b/src/Functions/hasTokenCaseInsensitive.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameHasTokenCaseInsensitive
 {
@@ -15,6 +17,8 @@ struct NameHasTokenCaseInsensitive
 using FunctionHasTokenCaseInsensitive
     = FunctionsStringSearch<HasTokenImpl<VolnitskyCaseInsensitiveToken, false>, NameHasTokenCaseInsensitive>;
 
+}
+
 void registerFunctionHasTokenCaseInsensitive(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionHasTokenCaseInsensitive>();
diff --git a/src/Functions/hostName.cpp b/src/Functions/hostName.cpp
index 3b463b0bab5..faa1c8a944f 100644
--- a/src/Functions/hostName.cpp
+++ b/src/Functions/hostName.cpp
@@ -7,6 +7,8 @@
 
 namespace DB
 {
+namespace
+{
 
 /// Get the host name. Is is constant on single server, but is not constant in distributed queries.
 class FunctionHostName : public IFunction
@@ -50,6 +52,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionHostName(FunctionFactory & factory)
 {
diff --git a/src/Functions/identity.cpp b/src/Functions/identity.cpp
index 5308c3d944f..86c56c4911f 100644
--- a/src/Functions/identity.cpp
+++ b/src/Functions/identity.cpp
@@ -4,6 +4,8 @@
 
 namespace DB
 {
+namespace
+{
 
 class FunctionIdentity : public IFunction
 {
@@ -35,6 +37,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionIdentity(FunctionFactory & factory)
 {
diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp
index 8bb04abe834..20848bede32 100644
--- a/src/Functions/if.cpp
+++ b/src/Functions/if.cpp
@@ -28,7 +28,6 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_COLUMN;
@@ -36,6 +35,8 @@ namespace ErrorCodes
     extern const int NOT_IMPLEMENTED;
 }
 
+namespace
+{
 
 using namespace GatherUtils;
 
@@ -1050,6 +1051,8 @@ public:
     }
 };
 
+}
+
 void registerFunctionIf(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionIf>(FunctionFactory::CaseInsensitive);
diff --git a/src/Functions/ifNotFinite.cpp b/src/Functions/ifNotFinite.cpp
index d806a974926..b9e04d733ae 100644
--- a/src/Functions/ifNotFinite.cpp
+++ b/src/Functions/ifNotFinite.cpp
@@ -8,6 +8,8 @@
 
 namespace DB
 {
+namespace
+{
 
 /// ifNotFinite(x, y) is equivalent to isFinite(x) ? x : y.
 class FunctionIfNotFinite : public IFunction
@@ -63,6 +65,7 @@ private:
     const Context & context;
 };
 
+}
 
 void registerFunctionIfNotFinite(FunctionFactory & factory)
 {
diff --git a/src/Functions/ifNull.cpp b/src/Functions/ifNull.cpp
index e76378ef4e5..3d2b5b7210a 100644
--- a/src/Functions/ifNull.cpp
+++ b/src/Functions/ifNull.cpp
@@ -10,6 +10,8 @@
 
 namespace DB
 {
+namespace
+{
 
 /// Implements the function ifNull which takes 2 arguments and returns
 /// the value of the 1st argument if it is not null. Otherwise it returns
@@ -92,6 +94,7 @@ private:
     const Context & context;
 };
 
+}
 
 void registerFunctionIfNull(FunctionFactory & factory)
 {
diff --git a/src/Functions/ignore.cpp b/src/Functions/ignore.cpp
index fe41d860291..61e42fc4eb7 100644
--- a/src/Functions/ignore.cpp
+++ b/src/Functions/ignore.cpp
@@ -5,6 +5,8 @@
 
 namespace DB
 {
+namespace
+{
 
 /** ignore(...) is a function that takes any arguments, and always returns 0.
   */
@@ -49,6 +51,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionIgnore(FunctionFactory & factory)
 {
diff --git a/src/Functions/ilike.cpp b/src/Functions/ilike.cpp
index a39a907eff2..fc3e38daeba 100644
--- a/src/Functions/ilike.cpp
+++ b/src/Functions/ilike.cpp
@@ -4,21 +4,22 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameILike
 {
     static constexpr auto name = "ilike";
 };
 
-namespace
-{
-    using ILikeImpl = MatchImpl<true, false, /*case-insensitive*/true>;
-}
-
+using ILikeImpl = MatchImpl<true, false, /*case-insensitive*/true>;
 using FunctionILike = FunctionsStringSearch<ILikeImpl, NameILike>;
 
+}
+
 void registerFunctionILike(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionILike>();
 }
+
 }
diff --git a/src/Functions/in.cpp b/src/Functions/in.cpp
index e4d503a2a93..a5eb2241f1b 100644
--- a/src/Functions/in.cpp
+++ b/src/Functions/in.cpp
@@ -12,12 +12,14 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_COLUMN;
 }
 
+namespace
+{
+
 /** in(x, set) - function for evaluating the IN
   * notIn(x, set) - and NOT IN.
   */
@@ -128,7 +130,7 @@ public:
 };
 
 template<bool ignore_set>
-static void registerFunctionsInImpl(FunctionFactory & factory)
+void registerFunctionsInImpl(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionIn<false, false, true, ignore_set>>();
     factory.registerFunction<FunctionIn<false, true, true, ignore_set>>();
@@ -140,6 +142,8 @@ static void registerFunctionsInImpl(FunctionFactory & factory)
     factory.registerFunction<FunctionIn<true, true, false, ignore_set>>();
 }
 
+}
+
 void registerFunctionsIn(FunctionFactory & factory)
 {
     registerFunctionsInImpl<false>(factory);
diff --git a/src/Functions/initializeAggregation.cpp b/src/Functions/initializeAggregation.cpp
index 85ee3e07969..8709b02d8e0 100644
--- a/src/Functions/initializeAggregation.cpp
+++ b/src/Functions/initializeAggregation.cpp
@@ -14,7 +14,6 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
@@ -23,6 +22,8 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
+namespace
+{
 
 class FunctionInitializeAggregation : public IFunction
 {
@@ -152,6 +153,7 @@ void FunctionInitializeAggregation::executeImpl(Block & block, const ColumnNumbe
     block.getByPosition(result).column = std::move(result_holder);
 }
 
+}
 
 void registerFunctionInitializeAggregation(FunctionFactory & factory)
 {
diff --git a/src/Functions/intDiv.cpp b/src/Functions/intDiv.cpp
index 7e34f106147..a08525813b1 100644
--- a/src/Functions/intDiv.cpp
+++ b/src/Functions/intDiv.cpp
@@ -15,6 +15,9 @@ namespace ErrorCodes
     extern const int ILLEGAL_DIVISION;
 }
 
+namespace
+{
+
 /// Optimizations for integer division by a constant.
 
 template <typename A, typename B>
@@ -83,6 +86,8 @@ struct DivideIntegralByConstantImpl
   * Can be expanded to all possible combinations, but more code is needed.
   */
 
+}
+
 template <> struct BinaryOperationImpl<UInt64, UInt8, DivideIntegralImpl<UInt64, UInt8>> : DivideIntegralByConstantImpl<UInt64, UInt8> {};
 template <> struct BinaryOperationImpl<UInt64, UInt16, DivideIntegralImpl<UInt64, UInt16>> : DivideIntegralByConstantImpl<UInt64, UInt16> {};
 template <> struct BinaryOperationImpl<UInt64, UInt32, DivideIntegralImpl<UInt64, UInt32>> : DivideIntegralByConstantImpl<UInt64, UInt32> {};
diff --git a/src/Functions/intDivOrZero.cpp b/src/Functions/intDivOrZero.cpp
index 64b6994d438..cae901518c0 100644
--- a/src/Functions/intDivOrZero.cpp
+++ b/src/Functions/intDivOrZero.cpp
@@ -4,6 +4,8 @@
 
 namespace DB
 {
+namespace
+{
 
 template <typename A, typename B>
 struct DivideIntegralOrZeroImpl
@@ -28,6 +30,8 @@ struct DivideIntegralOrZeroImpl
 struct NameIntDivOrZero { static constexpr auto name = "intDivOrZero"; };
 using FunctionIntDivOrZero = FunctionBinaryArithmetic<DivideIntegralOrZeroImpl, NameIntDivOrZero>;
 
+}
+
 void registerFunctionIntDivOrZero(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionIntDivOrZero>();
diff --git a/src/Functions/intExp10.cpp b/src/Functions/intExp10.cpp
index 863a0822765..b1964701ad7 100644
--- a/src/Functions/intExp10.cpp
+++ b/src/Functions/intExp10.cpp
@@ -5,12 +5,14 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int NOT_IMPLEMENTED;
 }
 
+namespace
+{
+
 template <typename A>
 struct IntExp10Impl
 {
@@ -34,6 +36,8 @@ struct NameIntExp10 { static constexpr auto name = "intExp10"; };
 /// Assumed to be injective for the purpose of query optimization, but in fact it is not injective because of possible overflow.
 using FunctionIntExp10 = FunctionUnaryArithmetic<IntExp10Impl, NameIntExp10, true>;
 
+}
+
 template <> struct FunctionUnaryArithmeticMonotonicity<NameIntExp10>
 {
     static bool has() { return true; }
diff --git a/src/Functions/intExp2.cpp b/src/Functions/intExp2.cpp
index 453c5798f10..c87a6e31852 100644
--- a/src/Functions/intExp2.cpp
+++ b/src/Functions/intExp2.cpp
@@ -11,6 +11,9 @@ namespace ErrorCodes
     extern const int NOT_IMPLEMENTED;
 }
 
+namespace
+{
+
 template <typename A>
 struct IntExp2Impl
 {
@@ -41,6 +44,8 @@ struct IntExp2Impl
 struct NameIntExp2 { static constexpr auto name = "intExp2"; };
 using FunctionIntExp2 = FunctionUnaryArithmetic<IntExp2Impl, NameIntExp2, true>;
 
+}
+
 template <> struct FunctionUnaryArithmeticMonotonicity<NameIntExp2>
 {
     static bool has() { return true; }
diff --git a/src/Functions/isConstant.cpp b/src/Functions/isConstant.cpp
index 88dd983f47b..fc3f78cd058 100644
--- a/src/Functions/isConstant.cpp
+++ b/src/Functions/isConstant.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 /// Returns 1 if and only if the argument is constant expression.
 /// This function exists for development, debugging and demonstration purposes.
@@ -42,6 +44,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionIsConstant(FunctionFactory & factory)
 {
diff --git a/src/Functions/isDecimalOverflow.cpp b/src/Functions/isDecimalOverflow.cpp
index dbd57101a64..dc1b0d6300a 100644
--- a/src/Functions/isDecimalOverflow.cpp
+++ b/src/Functions/isDecimalOverflow.cpp
@@ -11,7 +11,6 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
@@ -19,6 +18,9 @@ namespace ErrorCodes
     extern const int ILLEGAL_COLUMN;
 }
 
+namespace
+{
+
 /// Returns 1 if and Decimal value has more digits then it's Precision allow, 0 otherwise.
 /// Precision could be set as second argument or omitted. If ommited function uses Decimal presicion of the first argument.
 class FunctionIsDecimalOverflow : public IFunction
@@ -142,6 +144,7 @@ private:
     }
 };
 
+}
 
 void registerFunctionIsDecimalOverflow(FunctionFactory & factory)
 {
diff --git a/src/Functions/isFinite.cpp b/src/Functions/isFinite.cpp
index e898ddc8304..72cdc4d3a4f 100644
--- a/src/Functions/isFinite.cpp
+++ b/src/Functions/isFinite.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct IsFiniteImpl
 {
@@ -35,6 +37,7 @@ struct IsFiniteImpl
 
 using FunctionIsFinite = FunctionNumericPredicate<IsFiniteImpl>;
 
+}
 
 void registerFunctionIsFinite(FunctionFactory & factory)
 {
diff --git a/src/Functions/isInfinite.cpp b/src/Functions/isInfinite.cpp
index 17208373cb3..12abb8eab6a 100644
--- a/src/Functions/isInfinite.cpp
+++ b/src/Functions/isInfinite.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct IsInfiniteImpl
 {
@@ -31,6 +33,7 @@ struct IsInfiniteImpl
 
 using FunctionIsInfinite = FunctionNumericPredicate<IsInfiniteImpl>;
 
+}
 
 void registerFunctionIsInfinite(FunctionFactory & factory)
 {
diff --git a/src/Functions/isNaN.cpp b/src/Functions/isNaN.cpp
index 5146f7cfc0d..2e35e8cfbb5 100644
--- a/src/Functions/isNaN.cpp
+++ b/src/Functions/isNaN.cpp
@@ -4,6 +4,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct IsNaNImpl
 {
@@ -18,6 +20,7 @@ struct IsNaNImpl
 
 using FunctionIsNaN = FunctionNumericPredicate<IsNaNImpl>;
 
+}
 
 void registerFunctionIsNaN(FunctionFactory & factory)
 {
diff --git a/src/Functions/isNotNull.cpp b/src/Functions/isNotNull.cpp
index b5d856f31ff..12543e2a751 100644
--- a/src/Functions/isNotNull.cpp
+++ b/src/Functions/isNotNull.cpp
@@ -9,6 +9,8 @@
 
 namespace DB
 {
+namespace
+{
 
 /// Implements the function isNotNull which returns true if a value
 /// is not null, false otherwise.
@@ -60,6 +62,8 @@ public:
     }
 };
 
+}
+
 void registerFunctionIsNotNull(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionIsNotNull>();
diff --git a/src/Functions/isNull.cpp b/src/Functions/isNull.cpp
index 1451f8d8f4f..91d98f1fcd7 100644
--- a/src/Functions/isNull.cpp
+++ b/src/Functions/isNull.cpp
@@ -8,6 +8,8 @@
 
 namespace DB
 {
+namespace
+{
 
 /// Implements the function isNull which returns true if a value
 /// is null, false otherwise.
@@ -53,6 +55,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionIsNull(FunctionFactory & factory)
 {
diff --git a/src/Functions/isZeroOrNull.cpp b/src/Functions/isZeroOrNull.cpp
index 02d97181016..00b84c5713d 100644
--- a/src/Functions/isZeroOrNull.cpp
+++ b/src/Functions/isZeroOrNull.cpp
@@ -9,13 +9,15 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int BAD_ARGUMENTS;
     extern const int ILLEGAL_COLUMN;
 }
 
+namespace
+{
+
 /// Returns 1 if argument is zero or NULL.
 /// It can be used to negate filter in WHERE condition.
 /// "WHERE isZeroOrNull(expr)" will return exactly the same rows that "WHERE expr" will filter out.
@@ -107,6 +109,7 @@ private:
     }
 };
 
+}
 
 void registerFunctionIsZeroOrNull(FunctionFactory & factory)
 {
diff --git a/src/Functions/jumpConsistentHash.cpp b/src/Functions/jumpConsistentHash.cpp
index b1a3109c066..32535cfcd6b 100644
--- a/src/Functions/jumpConsistentHash.cpp
+++ b/src/Functions/jumpConsistentHash.cpp
@@ -4,6 +4,8 @@
 
 namespace DB
 {
+namespace
+{
 
 /// Code from https://arxiv.org/pdf/1406.2294.pdf
 static inline int32_t JumpConsistentHash(uint64_t key, int32_t num_buckets)
@@ -35,10 +37,11 @@ struct JumpConsistentHashImpl
 
 using FunctionJumpConsistentHash = FunctionConsistentHashImpl<JumpConsistentHashImpl>;
 
+}
+
 void registerFunctionJumpConsistentHash(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionJumpConsistentHash>();
 }
 
 }
-
diff --git a/src/Functions/lcm.cpp b/src/Functions/lcm.cpp
index ceca495ddce..06e8d7d89f4 100644
--- a/src/Functions/lcm.cpp
+++ b/src/Functions/lcm.cpp
@@ -27,12 +27,14 @@ constexpr T abs(T value) noexcept
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int NOT_IMPLEMENTED;
 }
 
+namespace
+{
+
 template <typename A, typename B>
 struct LCMImpl
 {
@@ -78,6 +80,8 @@ struct LCMImpl
 struct NameLCM { static constexpr auto name = "lcm"; };
 using FunctionLCM = FunctionBinaryArithmetic<LCMImpl, NameLCM, false>;
 
+}
+
 void registerFunctionLCM(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionLCM>();
diff --git a/src/Functions/least.cpp b/src/Functions/least.cpp
index e84d9bd9e24..75e3e7b2a14 100644
--- a/src/Functions/least.cpp
+++ b/src/Functions/least.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 template <typename A, typename B>
 struct LeastBaseImpl
@@ -57,6 +59,8 @@ using LeastImpl = std::conditional_t<!NumberTraits::LeastGreatestSpecialCase<A,
 struct NameLeast { static constexpr auto name = "least"; };
 using FunctionLeast = FunctionBinaryArithmetic<LeastImpl, NameLeast>;
 
+}
+
 void registerFunctionLeast(FunctionFactory & factory)
 {
     factory.registerFunction<LeastGreatestOverloadResolver<LeastGreatest::Least, FunctionLeast>>(FunctionFactory::CaseInsensitive);
diff --git a/src/Functions/lengthUTF8.cpp b/src/Functions/lengthUTF8.cpp
index 7c71533acd9..c067fd4db3c 100644
--- a/src/Functions/lengthUTF8.cpp
+++ b/src/Functions/lengthUTF8.cpp
@@ -6,12 +6,13 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+namespace
+{
 
 /** If the string is UTF-8 encoded text, it returns the length of the text in code points.
   * (not in characters: the length of the text "ё" can be either 1 or 2, depending on the normalization)
@@ -60,6 +61,8 @@ struct NameLengthUTF8
 };
 using FunctionLengthUTF8 = FunctionStringOrArrayToT<LengthUTF8Impl, NameLengthUTF8, UInt64>;
 
+}
+
 void registerFunctionLengthUTF8(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionLengthUTF8>();
diff --git a/src/Functions/lgamma.cpp b/src/Functions/lgamma.cpp
index 05f62431ac9..c631758b2cd 100644
--- a/src/Functions/lgamma.cpp
+++ b/src/Functions/lgamma.cpp
@@ -11,6 +11,8 @@ extern "C"
 
 namespace DB
 {
+namespace
+{
 
 /// Use wrapper and use lgamma_r version because std::lgamma is not threadsafe.
 static Float64 lgamma_wrapper(Float64 arg)
@@ -22,6 +24,8 @@ static Float64 lgamma_wrapper(Float64 arg)
 struct LGammaName { static constexpr auto name = "lgamma"; };
 using FunctionLGamma = FunctionMathUnary<UnaryFunctionPlain<LGammaName, lgamma_wrapper>>;
 
+}
+
 void registerFunctionLGamma(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionLGamma>();
diff --git a/src/Functions/like.cpp b/src/Functions/like.cpp
index f334cef7917..37d9f006187 100644
--- a/src/Functions/like.cpp
+++ b/src/Functions/like.cpp
@@ -5,21 +5,22 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameLike
 {
     static constexpr auto name = "like";
 };
 
-namespace
-{
-    using LikeImpl = MatchImpl</*SQL LIKE */ true, /*revert*/false>;
-}
-
+using LikeImpl = MatchImpl</*SQL LIKE */ true, /*revert*/false>;
 using FunctionLike = FunctionsStringSearch<LikeImpl, NameLike>;
 
+}
+
 void registerFunctionLike(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionLike>();
 }
+
 }
diff --git a/src/Functions/log.cpp b/src/Functions/log.cpp
index c12300d7be4..791c73d13aa 100644
--- a/src/Functions/log.cpp
+++ b/src/Functions/log.cpp
@@ -4,12 +4,13 @@
 namespace DB
 {
 
+namespace
+{
+
 struct LogName { static constexpr auto name = "log"; };
 
 #if USE_FASTOPS
 
-namespace
-{
     struct Impl
     {
         static constexpr auto name = LogName::name;
@@ -22,7 +23,6 @@ namespace
             NFastOps::Log<true>(src, size, dst);
         }
     };
-}
 
 using FunctionLog = FunctionMathUnary<Impl>;
 
@@ -30,6 +30,8 @@ using FunctionLog = FunctionMathUnary<Impl>;
 using FunctionLog = FunctionMathUnary<UnaryFunctionVectorized<LogName, log>>;
 #endif
 
+}
+
 void registerFunctionLog(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionLog>(FunctionFactory::CaseInsensitive);
diff --git a/src/Functions/log10.cpp b/src/Functions/log10.cpp
index 53301a313df..2e0bd484ed3 100644
--- a/src/Functions/log10.cpp
+++ b/src/Functions/log10.cpp
@@ -3,10 +3,14 @@
 
 namespace DB
 {
+namespace
+{
 
 struct Log10Name { static constexpr auto name = "log10"; };
 using FunctionLog10 = FunctionMathUnary<UnaryFunctionVectorized<Log10Name, log10>>;
 
+}
+
 void registerFunctionLog10(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionLog10>(FunctionFactory::CaseInsensitive);
diff --git a/src/Functions/log2.cpp b/src/Functions/log2.cpp
index 903c9176622..6ca770eafee 100644
--- a/src/Functions/log2.cpp
+++ b/src/Functions/log2.cpp
@@ -3,10 +3,14 @@
 
 namespace DB
 {
+namespace
+{
 
 struct Log2Name { static constexpr auto name = "log2"; };
 using FunctionLog2 = FunctionMathUnary<UnaryFunctionVectorized<Log2Name, log2>>;
 
+}
+
 void registerFunctionLog2(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionLog2>(FunctionFactory::CaseInsensitive);
diff --git a/src/Functions/lowCardinalityIndices.cpp b/src/Functions/lowCardinalityIndices.cpp
index 702d90f261c..e79397158a4 100644
--- a/src/Functions/lowCardinalityIndices.cpp
+++ b/src/Functions/lowCardinalityIndices.cpp
@@ -9,12 +9,14 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+namespace
+{
+
 class FunctionLowCardinalityIndices: public IFunction
 {
 public:
@@ -54,6 +56,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionLowCardinalityIndices(FunctionFactory & factory)
 {
diff --git a/src/Functions/lowCardinalityKeys.cpp b/src/Functions/lowCardinalityKeys.cpp
index 34c66a59340..9f9b1348f22 100644
--- a/src/Functions/lowCardinalityKeys.cpp
+++ b/src/Functions/lowCardinalityKeys.cpp
@@ -7,12 +7,13 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+namespace
+{
 
 class FunctionLowCardinalityKeys: public IFunction
 {
@@ -48,6 +49,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionLowCardinalityKeys(FunctionFactory & factory)
 {
diff --git a/src/Functions/lower.cpp b/src/Functions/lower.cpp
index b3c939968cb..0b19ae03f86 100644
--- a/src/Functions/lower.cpp
+++ b/src/Functions/lower.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameLower
 {
@@ -13,6 +15,8 @@ struct NameLower
 };
 using FunctionLower = FunctionStringToString<LowerUpperImpl<'A', 'Z'>, NameLower>;
 
+}
+
 void registerFunctionLower(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionLower>(FunctionFactory::CaseInsensitive);
diff --git a/src/Functions/lowerUTF8.cpp b/src/Functions/lowerUTF8.cpp
index fed031b524c..c8ff9636209 100644
--- a/src/Functions/lowerUTF8.cpp
+++ b/src/Functions/lowerUTF8.cpp
@@ -7,6 +7,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameLowerUTF8
 {
@@ -15,6 +17,8 @@ struct NameLowerUTF8
 
 using FunctionLowerUTF8 = FunctionStringToString<LowerUpperUTF8Impl<'A', 'Z', Poco::Unicode::toLower, UTF8CyrillicToCase<true>>, NameLowerUTF8>;
 
+}
+
 void registerFunctionLowerUTF8(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionLowerUTF8>();
diff --git a/src/Functions/match.cpp b/src/Functions/match.cpp
index c1d69712220..3460d54c6b6 100644
--- a/src/Functions/match.cpp
+++ b/src/Functions/match.cpp
@@ -5,6 +5,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameMatch
 {
@@ -13,6 +15,8 @@ struct NameMatch
 
 using FunctionMatch = FunctionsStringSearch<MatchImpl<false>, NameMatch>;
 
+}
+
 void registerFunctionMatch(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionMatch>();
diff --git a/src/Functions/materialize.cpp b/src/Functions/materialize.cpp
index 847e74c9a2a..56de111abda 100644
--- a/src/Functions/materialize.cpp
+++ b/src/Functions/materialize.cpp
@@ -4,6 +4,8 @@
 
 namespace DB
 {
+namespace
+{
 
 /** materialize(x) - materialize the constant
   */
@@ -43,6 +45,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionMaterialize(FunctionFactory & factory)
 {
diff --git a/src/Functions/minus.cpp b/src/Functions/minus.cpp
index cacde3936d9..fa0a0d4da30 100644
--- a/src/Functions/minus.cpp
+++ b/src/Functions/minus.cpp
@@ -4,6 +4,8 @@
 
 namespace DB
 {
+namespace
+{
 
 template <typename A, typename B>
 struct MinusImpl
@@ -46,6 +48,8 @@ struct MinusImpl
 struct NameMinus { static constexpr auto name = "minus"; };
 using FunctionMinus = FunctionBinaryArithmetic<MinusImpl, NameMinus>;
 
+}
+
 void registerFunctionMinus(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionMinus>();
diff --git a/src/Functions/modulo.cpp b/src/Functions/modulo.cpp
index 68cdceed6ea..a8ad15c3971 100644
--- a/src/Functions/modulo.cpp
+++ b/src/Functions/modulo.cpp
@@ -10,12 +10,14 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_DIVISION;
 }
 
+namespace
+{
+
 /// Optimizations for integer modulo by a constant.
 
 template <typename A, typename B>
@@ -71,6 +73,8 @@ struct ModuloByConstantImpl
     }
 };
 
+}
+
 /** Specializations are specified for dividing numbers of the type UInt64 and UInt32 by the numbers of the same sign.
   * Can be expanded to all possible combinations, but more code is needed.
   */
diff --git a/src/Functions/moduloOrZero.cpp b/src/Functions/moduloOrZero.cpp
index 457c67c9e93..1392b0294bb 100644
--- a/src/Functions/moduloOrZero.cpp
+++ b/src/Functions/moduloOrZero.cpp
@@ -4,6 +4,8 @@
 
 namespace DB
 {
+namespace
+{
 
 template <typename A, typename B>
 struct ModuloOrZeroImpl
@@ -36,6 +38,8 @@ struct ModuloOrZeroImpl
 struct NameModuloOrZero { static constexpr auto name = "moduloOrZero"; };
 using FunctionModuloOrZero = FunctionBinaryArithmetic<ModuloOrZeroImpl, NameModuloOrZero>;
 
+}
+
 void registerFunctionModuloOrZero(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionModuloOrZero>();
diff --git a/src/Functions/multiFuzzyMatchAllIndices.cpp b/src/Functions/multiFuzzyMatchAllIndices.cpp
index 5a0206d5713..8b104e9ed2d 100644
--- a/src/Functions/multiFuzzyMatchAllIndices.cpp
+++ b/src/Functions/multiFuzzyMatchAllIndices.cpp
@@ -5,6 +5,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameMultiFuzzyMatchAllIndices
 {
@@ -16,6 +18,8 @@ using FunctionMultiFuzzyMatchAllIndices = FunctionsMultiStringFuzzySearch<
     NameMultiFuzzyMatchAllIndices,
     std::numeric_limits<UInt32>::max()>;
 
+}
+
 void registerFunctionMultiFuzzyMatchAllIndices(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionMultiFuzzyMatchAllIndices>();
diff --git a/src/Functions/multiFuzzyMatchAny.cpp b/src/Functions/multiFuzzyMatchAny.cpp
index e4b439358b7..4d0c3470d91 100644
--- a/src/Functions/multiFuzzyMatchAny.cpp
+++ b/src/Functions/multiFuzzyMatchAny.cpp
@@ -5,6 +5,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameMultiFuzzyMatchAny
 {
@@ -16,6 +18,8 @@ using FunctionMultiFuzzyMatchAny = FunctionsMultiStringFuzzySearch<
     NameMultiFuzzyMatchAny,
     std::numeric_limits<UInt32>::max()>;
 
+}
+
 void registerFunctionMultiFuzzyMatchAny(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionMultiFuzzyMatchAny>();
diff --git a/src/Functions/multiFuzzyMatchAnyIndex.cpp b/src/Functions/multiFuzzyMatchAnyIndex.cpp
index 13a78ea058a..1680f413154 100644
--- a/src/Functions/multiFuzzyMatchAnyIndex.cpp
+++ b/src/Functions/multiFuzzyMatchAnyIndex.cpp
@@ -5,6 +5,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameMultiFuzzyMatchAnyIndex
 {
@@ -16,6 +18,8 @@ using FunctionMultiFuzzyMatchAnyIndex = FunctionsMultiStringFuzzySearch<
     NameMultiFuzzyMatchAnyIndex,
     std::numeric_limits<UInt32>::max()>;
 
+}
+
 void registerFunctionMultiFuzzyMatchAnyIndex(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionMultiFuzzyMatchAnyIndex>();
diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp
index bdb0e01d7b2..977d4bde1a2 100644
--- a/src/Functions/multiIf.cpp
+++ b/src/Functions/multiIf.cpp
@@ -12,13 +12,15 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
+namespace
+{
+
 /// Function multiIf, which generalizes the function if.
 ///
 /// Syntax: multiIf(cond_1, then_1, ..., cond_N, then_N, else)
@@ -225,6 +227,8 @@ public:
     }
 };
 
+}
+
 void registerFunctionMultiIf(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionMultiIf>();
diff --git a/src/Functions/multiMatchAllIndices.cpp b/src/Functions/multiMatchAllIndices.cpp
index e3f3b995b5e..171fa6baf74 100644
--- a/src/Functions/multiMatchAllIndices.cpp
+++ b/src/Functions/multiMatchAllIndices.cpp
@@ -5,6 +5,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameMultiMatchAllIndices
 {
@@ -16,6 +18,8 @@ using FunctionMultiMatchAllIndices = FunctionsMultiStringSearch<
     NameMultiMatchAllIndices,
     std::numeric_limits<UInt32>::max()>;
 
+}
+
 void registerFunctionMultiMatchAllIndices(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionMultiMatchAllIndices>();
diff --git a/src/Functions/multiMatchAny.cpp b/src/Functions/multiMatchAny.cpp
index ba89e9f9fcd..146c27e250c 100644
--- a/src/Functions/multiMatchAny.cpp
+++ b/src/Functions/multiMatchAny.cpp
@@ -5,6 +5,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameMultiMatchAny
 {
@@ -16,6 +18,8 @@ using FunctionMultiMatchAny = FunctionsMultiStringSearch<
     NameMultiMatchAny,
     std::numeric_limits<UInt32>::max()>;
 
+}
+
 void registerFunctionMultiMatchAny(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionMultiMatchAny>();
diff --git a/src/Functions/multiMatchAnyIndex.cpp b/src/Functions/multiMatchAnyIndex.cpp
index 667149ef8fd..c43cd061187 100644
--- a/src/Functions/multiMatchAnyIndex.cpp
+++ b/src/Functions/multiMatchAnyIndex.cpp
@@ -5,6 +5,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameMultiMatchAnyIndex
 {
@@ -16,6 +18,8 @@ using FunctionMultiMatchAnyIndex = FunctionsMultiStringSearch<
     NameMultiMatchAnyIndex,
     std::numeric_limits<UInt32>::max()>;
 
+}
+
 void registerFunctionMultiMatchAnyIndex(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionMultiMatchAnyIndex>();
diff --git a/src/Functions/multiSearchAllPositions.cpp b/src/Functions/multiSearchAllPositions.cpp
index c7aeb4d6245..5d9b3f5e2fd 100644
--- a/src/Functions/multiSearchAllPositions.cpp
+++ b/src/Functions/multiSearchAllPositions.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameMultiSearchAllPositions
 {
@@ -15,6 +17,8 @@ struct NameMultiSearchAllPositions
 using FunctionMultiSearchAllPositions
     = FunctionsMultiStringPosition<MultiSearchAllPositionsImpl<PositionCaseSensitiveASCII>, NameMultiSearchAllPositions>;
 
+}
+
 void registerFunctionMultiSearchAllPositions(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionMultiSearchAllPositions>();
diff --git a/src/Functions/multiSearchAllPositionsCaseInsensitive.cpp b/src/Functions/multiSearchAllPositionsCaseInsensitive.cpp
index 4abcf7c8405..9f93284a769 100644
--- a/src/Functions/multiSearchAllPositionsCaseInsensitive.cpp
+++ b/src/Functions/multiSearchAllPositionsCaseInsensitive.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameMultiSearchAllPositionsCaseInsensitive
 {
@@ -15,6 +17,8 @@ struct NameMultiSearchAllPositionsCaseInsensitive
 using FunctionMultiSearchAllPositionsCaseInsensitive
     = FunctionsMultiStringPosition<MultiSearchAllPositionsImpl<PositionCaseInsensitiveASCII>, NameMultiSearchAllPositionsCaseInsensitive>;
 
+}
+
 void registerFunctionMultiSearchAllPositionsCaseInsensitive(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionMultiSearchAllPositionsCaseInsensitive>();
diff --git a/src/Functions/multiSearchAllPositionsCaseInsensitiveUTF8.cpp b/src/Functions/multiSearchAllPositionsCaseInsensitiveUTF8.cpp
index d9dbc1a7c8c..8864a00a8d3 100644
--- a/src/Functions/multiSearchAllPositionsCaseInsensitiveUTF8.cpp
+++ b/src/Functions/multiSearchAllPositionsCaseInsensitiveUTF8.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameMultiSearchAllPositionsCaseInsensitiveUTF8
 {
@@ -16,6 +18,8 @@ using FunctionMultiSearchAllPositionsCaseInsensitiveUTF8 = FunctionsMultiStringP
     MultiSearchAllPositionsImpl<PositionCaseInsensitiveUTF8>,
     NameMultiSearchAllPositionsCaseInsensitiveUTF8>;
 
+}
+
 void registerFunctionMultiSearchAllPositionsCaseInsensitiveUTF8(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionMultiSearchAllPositionsCaseInsensitiveUTF8>();
diff --git a/src/Functions/multiSearchAllPositionsUTF8.cpp b/src/Functions/multiSearchAllPositionsUTF8.cpp
index 8f39c0eade9..3922a859c3a 100644
--- a/src/Functions/multiSearchAllPositionsUTF8.cpp
+++ b/src/Functions/multiSearchAllPositionsUTF8.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameMultiSearchAllPositionsUTF8
 {
@@ -15,6 +17,8 @@ struct NameMultiSearchAllPositionsUTF8
 using FunctionMultiSearchAllPositionsUTF8
     = FunctionsMultiStringPosition<MultiSearchAllPositionsImpl<PositionCaseSensitiveUTF8>, NameMultiSearchAllPositionsUTF8>;
 
+}
+
 void registerFunctionMultiSearchAllPositionsUTF8(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionMultiSearchAllPositionsUTF8>();
diff --git a/src/Functions/multiSearchAny.cpp b/src/Functions/multiSearchAny.cpp
index 144dbdbfdc4..5cd688ac65d 100644
--- a/src/Functions/multiSearchAny.cpp
+++ b/src/Functions/multiSearchAny.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameMultiSearchAny
 {
@@ -14,6 +16,8 @@ struct NameMultiSearchAny
 
 using FunctionMultiSearch = FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseSensitiveASCII>, NameMultiSearchAny>;
 
+}
+
 void registerFunctionMultiSearchAny(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionMultiSearch>();
diff --git a/src/Functions/multiSearchAnyCaseInsensitive.cpp b/src/Functions/multiSearchAnyCaseInsensitive.cpp
index 8b33a61013b..2358ce64bf8 100644
--- a/src/Functions/multiSearchAnyCaseInsensitive.cpp
+++ b/src/Functions/multiSearchAnyCaseInsensitive.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameMultiSearchAnyCaseInsensitive
 {
@@ -14,6 +16,8 @@ struct NameMultiSearchAnyCaseInsensitive
 using FunctionMultiSearchCaseInsensitive
     = FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseInsensitiveASCII>, NameMultiSearchAnyCaseInsensitive>;
 
+}
+
 void registerFunctionMultiSearchAnyCaseInsensitive(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionMultiSearchCaseInsensitive>();
diff --git a/src/Functions/multiSearchAnyCaseInsensitiveUTF8.cpp b/src/Functions/multiSearchAnyCaseInsensitiveUTF8.cpp
index 49a8b95a0e0..f84762d2bb4 100644
--- a/src/Functions/multiSearchAnyCaseInsensitiveUTF8.cpp
+++ b/src/Functions/multiSearchAnyCaseInsensitiveUTF8.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameMultiSearchAnyCaseInsensitiveUTF8
 {
@@ -15,6 +17,8 @@ struct NameMultiSearchAnyCaseInsensitiveUTF8
 using FunctionMultiSearchCaseInsensitiveUTF8
     = FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseInsensitiveUTF8>, NameMultiSearchAnyCaseInsensitiveUTF8>;
 
+}
+
 void registerFunctionMultiSearchAnyCaseInsensitiveUTF8(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionMultiSearchCaseInsensitiveUTF8>();
diff --git a/src/Functions/multiSearchAnyUTF8.cpp b/src/Functions/multiSearchAnyUTF8.cpp
index 55f2e449833..b2c8342ba7f 100644
--- a/src/Functions/multiSearchAnyUTF8.cpp
+++ b/src/Functions/multiSearchAnyUTF8.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameMultiSearchAnyUTF8
 {
@@ -13,6 +15,8 @@ struct NameMultiSearchAnyUTF8
 };
 using FunctionMultiSearchUTF8 = FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseSensitiveUTF8>, NameMultiSearchAnyUTF8>;
 
+}
+
 void registerFunctionMultiSearchAnyUTF8(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionMultiSearchUTF8>();
diff --git a/src/Functions/multiSearchFirstIndex.cpp b/src/Functions/multiSearchFirstIndex.cpp
index 65a25004964..fcbeb552ae1 100644
--- a/src/Functions/multiSearchFirstIndex.cpp
+++ b/src/Functions/multiSearchFirstIndex.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameMultiSearchFirstIndex
 {
@@ -15,6 +17,8 @@ struct NameMultiSearchFirstIndex
 using FunctionMultiSearchFirstIndex
     = FunctionsMultiStringSearch<MultiSearchFirstIndexImpl<PositionCaseSensitiveASCII>, NameMultiSearchFirstIndex>;
 
+}
+
 void registerFunctionMultiSearchFirstIndex(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionMultiSearchFirstIndex>();
diff --git a/src/Functions/multiSearchFirstIndexCaseInsensitive.cpp b/src/Functions/multiSearchFirstIndexCaseInsensitive.cpp
index bf643f0cf29..87483734cf9 100644
--- a/src/Functions/multiSearchFirstIndexCaseInsensitive.cpp
+++ b/src/Functions/multiSearchFirstIndexCaseInsensitive.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameMultiSearchFirstIndexCaseInsensitive
 {
@@ -15,6 +17,8 @@ struct NameMultiSearchFirstIndexCaseInsensitive
 using FunctionMultiSearchFirstIndexCaseInsensitive
     = FunctionsMultiStringSearch<MultiSearchFirstIndexImpl<PositionCaseInsensitiveASCII>, NameMultiSearchFirstIndexCaseInsensitive>;
 
+}
+
 void registerFunctionMultiSearchFirstIndexCaseInsensitive(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionMultiSearchFirstIndexCaseInsensitive>();
diff --git a/src/Functions/multiSearchFirstIndexCaseInsensitiveUTF8.cpp b/src/Functions/multiSearchFirstIndexCaseInsensitiveUTF8.cpp
index 005152388ab..69e14adb32a 100644
--- a/src/Functions/multiSearchFirstIndexCaseInsensitiveUTF8.cpp
+++ b/src/Functions/multiSearchFirstIndexCaseInsensitiveUTF8.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameMultiSearchFirstIndexCaseInsensitiveUTF8
 {
@@ -15,6 +17,8 @@ struct NameMultiSearchFirstIndexCaseInsensitiveUTF8
 using FunctionMultiSearchFirstIndexCaseInsensitiveUTF8
     = FunctionsMultiStringSearch<MultiSearchFirstIndexImpl<PositionCaseInsensitiveUTF8>, NameMultiSearchFirstIndexCaseInsensitiveUTF8>;
 
+}
+
 void registerFunctionMultiSearchFirstIndexCaseInsensitiveUTF8(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionMultiSearchFirstIndexCaseInsensitiveUTF8>();
diff --git a/src/Functions/multiSearchFirstIndexUTF8.cpp b/src/Functions/multiSearchFirstIndexUTF8.cpp
index 1158fda2a63..699281dad4b 100644
--- a/src/Functions/multiSearchFirstIndexUTF8.cpp
+++ b/src/Functions/multiSearchFirstIndexUTF8.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameMultiSearchFirstIndexUTF8
 {
@@ -15,6 +17,8 @@ struct NameMultiSearchFirstIndexUTF8
 using FunctionMultiSearchFirstIndexUTF8
     = FunctionsMultiStringSearch<MultiSearchFirstIndexImpl<PositionCaseSensitiveUTF8>, NameMultiSearchFirstIndexUTF8>;
 
+}
+
 void registerFunctionMultiSearchFirstIndexUTF8(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionMultiSearchFirstIndexUTF8>();
diff --git a/src/Functions/multiSearchFirstPosition.cpp b/src/Functions/multiSearchFirstPosition.cpp
index 06ac396250e..003345afde6 100644
--- a/src/Functions/multiSearchFirstPosition.cpp
+++ b/src/Functions/multiSearchFirstPosition.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameMultiSearchFirstPosition
 {
@@ -15,6 +17,8 @@ struct NameMultiSearchFirstPosition
 using FunctionMultiSearchFirstPosition
     = FunctionsMultiStringSearch<MultiSearchFirstPositionImpl<PositionCaseSensitiveASCII>, NameMultiSearchFirstPosition>;
 
+}
+
 void registerFunctionMultiSearchFirstPosition(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionMultiSearchFirstPosition>();
diff --git a/src/Functions/multiSearchFirstPositionCaseInsensitive.cpp b/src/Functions/multiSearchFirstPositionCaseInsensitive.cpp
index 1d028ad4513..7aa1ef991f3 100644
--- a/src/Functions/multiSearchFirstPositionCaseInsensitive.cpp
+++ b/src/Functions/multiSearchFirstPositionCaseInsensitive.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameMultiSearchFirstPositionCaseInsensitive
 {
@@ -15,6 +17,8 @@ struct NameMultiSearchFirstPositionCaseInsensitive
 using FunctionMultiSearchFirstPositionCaseInsensitive
     = FunctionsMultiStringSearch<MultiSearchFirstPositionImpl<PositionCaseInsensitiveASCII>, NameMultiSearchFirstPositionCaseInsensitive>;
 
+}
+
 void registerFunctionMultiSearchFirstPositionCaseInsensitive(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionMultiSearchFirstPositionCaseInsensitive>();
diff --git a/src/Functions/multiSearchFirstPositionCaseInsensitiveUTF8.cpp b/src/Functions/multiSearchFirstPositionCaseInsensitiveUTF8.cpp
index 0b355ddc446..d20ce6c2de3 100644
--- a/src/Functions/multiSearchFirstPositionCaseInsensitiveUTF8.cpp
+++ b/src/Functions/multiSearchFirstPositionCaseInsensitiveUTF8.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameMultiSearchFirstPositionCaseInsensitiveUTF8
 {
@@ -16,6 +18,8 @@ using FunctionMultiSearchFirstPositionCaseInsensitiveUTF8 = FunctionsMultiString
     MultiSearchFirstPositionImpl<PositionCaseInsensitiveUTF8>,
     NameMultiSearchFirstPositionCaseInsensitiveUTF8>;
 
+}
+
 void registerFunctionMultiSearchFirstPositionCaseInsensitiveUTF8(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionMultiSearchFirstPositionCaseInsensitiveUTF8>();
diff --git a/src/Functions/multiSearchFirstPositionUTF8.cpp b/src/Functions/multiSearchFirstPositionUTF8.cpp
index 26e16d17e8d..c0739808f7b 100644
--- a/src/Functions/multiSearchFirstPositionUTF8.cpp
+++ b/src/Functions/multiSearchFirstPositionUTF8.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameMultiSearchFirstPositionUTF8
 {
@@ -15,6 +17,8 @@ struct NameMultiSearchFirstPositionUTF8
 using FunctionMultiSearchFirstPositionUTF8
     = FunctionsMultiStringSearch<MultiSearchFirstPositionImpl<PositionCaseSensitiveUTF8>, NameMultiSearchFirstPositionUTF8>;
 
+}
+
 void registerFunctionMultiSearchFirstPositionUTF8(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionMultiSearchFirstPositionUTF8>();
diff --git a/src/Functions/multiply.cpp b/src/Functions/multiply.cpp
index 7018da843b6..dbb0ee97cde 100644
--- a/src/Functions/multiply.cpp
+++ b/src/Functions/multiply.cpp
@@ -4,6 +4,8 @@
 
 namespace DB
 {
+namespace
+{
 
 template <typename A, typename B>
 struct MultiplyImpl
@@ -46,6 +48,8 @@ struct MultiplyImpl
 struct NameMultiply { static constexpr auto name = "multiply"; };
 using FunctionMultiply = FunctionBinaryArithmetic<MultiplyImpl, NameMultiply>;
 
+}
+
 void registerFunctionMultiply(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionMultiply>();
diff --git a/src/Functions/negate.cpp b/src/Functions/negate.cpp
index 39ca434ea89..fff7c7355f4 100644
--- a/src/Functions/negate.cpp
+++ b/src/Functions/negate.cpp
@@ -4,6 +4,8 @@
 
 namespace DB
 {
+namespace
+{
 
 template <typename A>
 struct NegateImpl
@@ -29,6 +31,8 @@ struct NegateImpl
 struct NameNegate { static constexpr auto name = "negate"; };
 using FunctionNegate = FunctionUnaryArithmetic<NegateImpl, NameNegate, true>;
 
+}
+
 template <> struct FunctionUnaryArithmeticMonotonicity<NameNegate>
 {
     static bool has() { return true; }
diff --git a/src/Functions/neighbor.cpp b/src/Functions/neighbor.cpp
index 1f0f945b828..144d20a0c66 100644
--- a/src/Functions/neighbor.cpp
+++ b/src/Functions/neighbor.cpp
@@ -15,6 +15,9 @@ namespace ErrorCodes
     extern const int ARGUMENT_OUT_OF_BOUND;
 }
 
+namespace
+{
+
 // Implements function, giving value for column within range of given
 // Example:
 // | c1 |
@@ -194,6 +197,8 @@ public:
     }
 };
 
+}
+
 void registerFunctionNeighbor(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionNeighbor>();
diff --git a/src/Functions/normalizeQuery.cpp b/src/Functions/normalizeQuery.cpp
index 2cbefef2cba..320c2fde643 100644
--- a/src/Functions/normalizeQuery.cpp
+++ b/src/Functions/normalizeQuery.cpp
@@ -9,7 +9,6 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_COLUMN;
diff --git a/src/Functions/normalizedQueryHash.cpp b/src/Functions/normalizedQueryHash.cpp
index 21309c2960e..4c222db0f2c 100644
--- a/src/Functions/normalizedQueryHash.cpp
+++ b/src/Functions/normalizedQueryHash.cpp
@@ -15,7 +15,6 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_COLUMN;
diff --git a/src/Functions/notEmpty.cpp b/src/Functions/notEmpty.cpp
index d8057501848..6285e59652c 100644
--- a/src/Functions/notEmpty.cpp
+++ b/src/Functions/notEmpty.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameNotEmpty
 {
@@ -13,6 +15,8 @@ struct NameNotEmpty
 };
 using FunctionNotEmpty = FunctionStringOrArrayToT<EmptyImpl<true>, NameNotEmpty, UInt8>;
 
+}
+
 void registerFunctionNotEmpty(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionNotEmpty>();
diff --git a/src/Functions/notILike.cpp b/src/Functions/notILike.cpp
index 0f95564b09e..1fc0ab65ea8 100644
--- a/src/Functions/notILike.cpp
+++ b/src/Functions/notILike.cpp
@@ -4,19 +4,19 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameNotILike
 {
     static constexpr auto name = "notILike";
 };
 
-namespace
-{
-    using NotILikeImpl = MatchImpl<true, true, /*case-insensitive*/true>;
-}
-
+using NotILikeImpl = MatchImpl<true, true, /*case-insensitive*/true>;
 using FunctionNotILike = FunctionsStringSearch<NotILikeImpl, NameNotILike>;
 
+}
+
 void registerFunctionNotILike(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionNotILike>();
diff --git a/src/Functions/notLike.cpp b/src/Functions/notLike.cpp
index 775fc4f3a66..54e2c7481f0 100644
--- a/src/Functions/notLike.cpp
+++ b/src/Functions/notLike.cpp
@@ -2,9 +2,10 @@
 #include "FunctionFactory.h"
 #include "MatchImpl.h"
 
-
 namespace DB
 {
+namespace
+{
 
 struct NameNotLike
 {
@@ -13,6 +14,8 @@ struct NameNotLike
 
 using FunctionNotLike = FunctionsStringSearch<MatchImpl<true, true>, NameNotLike>;
 
+}
+
 void registerFunctionNotLike(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionNotLike>();
diff --git a/src/Functions/now.cpp b/src/Functions/now.cpp
index f38fc4006bc..e71c3a77f11 100644
--- a/src/Functions/now.cpp
+++ b/src/Functions/now.cpp
@@ -9,10 +9,11 @@
 
 
 namespace DB
-
 {
-/// Get the current time. (It is a constant, it is evaluated once for the entire query.)
+namespace
+{
 
+/// Get the current time. (It is a constant, it is evaluated once for the entire query.)
 class ExecutableFunctionNow : public IExecutableFunctionImpl
 {
 public:
@@ -82,6 +83,8 @@ public:
     }
 };
 
+}
+
 void registerFunctionNow(FunctionFactory & factory)
 {
     factory.registerFunction<NowOverloadResolver>(FunctionFactory::CaseInsensitive);
diff --git a/src/Functions/now64.cpp b/src/Functions/now64.cpp
index 57d1ff8c19e..cba66ffb531 100644
--- a/src/Functions/now64.cpp
+++ b/src/Functions/now64.cpp
@@ -11,13 +11,15 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
     extern const int CANNOT_CLOCK_GETTIME;
 }
 
+namespace
+{
+
 static Field nowSubsecond(UInt32 scale)
 {
     static constexpr Int32 fractional_scale = 9;
@@ -89,6 +91,8 @@ public:
     }
 };
 
+}
+
 void registerFunctionNow64(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionNow64>(FunctionFactory::CaseInsensitive);
diff --git a/src/Functions/nullIf.cpp b/src/Functions/nullIf.cpp
index cde8a2ed343..57e1265793e 100644
--- a/src/Functions/nullIf.cpp
+++ b/src/Functions/nullIf.cpp
@@ -9,6 +9,8 @@
 
 namespace DB
 {
+namespace
+{
 
 /// Implements the function nullIf which takes 2 arguments and returns
 /// NULL if both arguments have the same value. Otherwise it returns the
@@ -74,6 +76,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionNullIf(FunctionFactory & factory)
 {
diff --git a/src/Functions/pi.cpp b/src/Functions/pi.cpp
index 4621ed8ecc0..efa536c7314 100644
--- a/src/Functions/pi.cpp
+++ b/src/Functions/pi.cpp
@@ -3,6 +3,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct PiImpl
 {
@@ -12,6 +14,8 @@ struct PiImpl
 
 using FunctionPi = FunctionMathConstFloat64<PiImpl>;
 
+}
+
 void registerFunctionPi(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionPi>(FunctionFactory::CaseInsensitive);
diff --git a/src/Functions/plus.cpp b/src/Functions/plus.cpp
index 1421bfcd4c6..8aee3141759 100644
--- a/src/Functions/plus.cpp
+++ b/src/Functions/plus.cpp
@@ -4,6 +4,8 @@
 
 namespace DB
 {
+namespace
+{
 
 template <typename A, typename B>
 struct PlusImpl
@@ -47,6 +49,8 @@ struct PlusImpl
 struct NamePlus { static constexpr auto name = "plus"; };
 using FunctionPlus = FunctionBinaryArithmetic<PlusImpl, NamePlus>;
 
+}
+
 void registerFunctionPlus(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionPlus>();
diff --git a/src/Functions/pointInEllipses.cpp b/src/Functions/pointInEllipses.cpp
index d3f57e5fecd..79ea43cf04d 100644
--- a/src/Functions/pointInEllipses.cpp
+++ b/src/Functions/pointInEllipses.cpp
@@ -11,7 +11,6 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
@@ -20,6 +19,9 @@ namespace ErrorCodes
     extern const int ILLEGAL_COLUMN;
 }
 
+namespace
+{
+
 /**
  * The function checks if a point is in one of ellipses in set.
  * The number of arguments must be 2 + 4*N where N is the number of ellipses.
@@ -188,6 +190,7 @@ private:
     }
 };
 
+}
 
 void registerFunctionPointInEllipses(FunctionFactory & factory)
 {
diff --git a/src/Functions/pointInPolygon.cpp b/src/Functions/pointInPolygon.cpp
index ccff56b9a89..5a9ce284ba9 100644
--- a/src/Functions/pointInPolygon.cpp
+++ b/src/Functions/pointInPolygon.cpp
@@ -34,7 +34,6 @@ namespace ProfileEvents
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
@@ -43,6 +42,8 @@ namespace ErrorCodes
     extern const int ILLEGAL_COLUMN;
 }
 
+namespace
+{
 
 using CoordinateType = Float64;
 using Point = boost::geometry::model::d2::point_xy<CoordinateType>;
@@ -561,6 +562,7 @@ private:
     }
 };
 
+}
 
 void registerFunctionPointInPolygon(FunctionFactory & factory)
 {
diff --git a/src/Functions/position.cpp b/src/Functions/position.cpp
index df1405ae94a..5b8af16fef1 100644
--- a/src/Functions/position.cpp
+++ b/src/Functions/position.cpp
@@ -5,6 +5,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NamePosition
 {
@@ -13,6 +15,8 @@ struct NamePosition
 
 using FunctionPosition = FunctionsStringSearch<PositionImpl<PositionCaseSensitiveASCII>, NamePosition>;
 
+}
+
 void registerFunctionPosition(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionPosition>(FunctionFactory::CaseInsensitive);
diff --git a/src/Functions/positionCaseInsensitive.cpp b/src/Functions/positionCaseInsensitive.cpp
index 00721dda212..f72766a1875 100644
--- a/src/Functions/positionCaseInsensitive.cpp
+++ b/src/Functions/positionCaseInsensitive.cpp
@@ -5,6 +5,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NamePositionCaseInsensitive
 {
@@ -13,6 +15,8 @@ struct NamePositionCaseInsensitive
 
 using FunctionPositionCaseInsensitive = FunctionsStringSearch<PositionImpl<PositionCaseInsensitiveASCII>, NamePositionCaseInsensitive>;
 
+}
+
 void registerFunctionPositionCaseInsensitive(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionPositionCaseInsensitive>();
diff --git a/src/Functions/positionCaseInsensitiveUTF8.cpp b/src/Functions/positionCaseInsensitiveUTF8.cpp
index 196a5d67cd2..0af545cc6a3 100644
--- a/src/Functions/positionCaseInsensitiveUTF8.cpp
+++ b/src/Functions/positionCaseInsensitiveUTF8.cpp
@@ -5,6 +5,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NamePositionCaseInsensitiveUTF8
 {
@@ -14,6 +16,8 @@ struct NamePositionCaseInsensitiveUTF8
 using FunctionPositionCaseInsensitiveUTF8
     = FunctionsStringSearch<PositionImpl<PositionCaseInsensitiveUTF8>, NamePositionCaseInsensitiveUTF8>;
 
+}
+
 void registerFunctionPositionCaseInsensitiveUTF8(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionPositionCaseInsensitiveUTF8>();
diff --git a/src/Functions/positionUTF8.cpp b/src/Functions/positionUTF8.cpp
index 944b55005f3..68b2f5c274e 100644
--- a/src/Functions/positionUTF8.cpp
+++ b/src/Functions/positionUTF8.cpp
@@ -5,6 +5,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NamePositionUTF8
 {
@@ -13,6 +15,8 @@ struct NamePositionUTF8
 
 using FunctionPositionUTF8 = FunctionsStringSearch<PositionImpl<PositionCaseSensitiveUTF8>, NamePositionUTF8>;
 
+}
+
 void registerFunctionPositionUTF8(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionPositionUTF8>();
diff --git a/src/Functions/pow.cpp b/src/Functions/pow.cpp
index db16bc993c7..7e60e0e878e 100644
--- a/src/Functions/pow.cpp
+++ b/src/Functions/pow.cpp
@@ -3,10 +3,14 @@
 
 namespace DB
 {
+namespace
+{
 
 struct PowName { static constexpr auto name = "pow"; };
 using FunctionPow = FunctionMathBinaryFloat64<BinaryFunctionVectorized<PowName, pow>>;
 
+}
+
 void registerFunctionPow(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionPow>(FunctionFactory::CaseInsensitive);
diff --git a/src/Functions/rand.cpp b/src/Functions/rand.cpp
index e704b7824b5..660119a64cb 100644
--- a/src/Functions/rand.cpp
+++ b/src/Functions/rand.cpp
@@ -3,10 +3,14 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameRand { static constexpr auto name = "rand"; };
 using FunctionRand = FunctionRandom<UInt32, NameRand>;
 
+}
+
 void registerFunctionRand(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionRand>(FunctionFactory::CaseInsensitive);
diff --git a/src/Functions/rand64.cpp b/src/Functions/rand64.cpp
index cc942aee28c..9377d3d40d0 100644
--- a/src/Functions/rand64.cpp
+++ b/src/Functions/rand64.cpp
@@ -3,10 +3,14 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameRand64 { static constexpr auto name = "rand64"; };
 using FunctionRand64 = FunctionRandom<UInt64, NameRand64>;
 
+}
+
 void registerFunctionRand64(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionRand64>();
diff --git a/src/Functions/randConstant.cpp b/src/Functions/randConstant.cpp
index ac410fc35c4..b2880833e8e 100644
--- a/src/Functions/randConstant.cpp
+++ b/src/Functions/randConstant.cpp
@@ -9,6 +9,9 @@ namespace ErrorCodes
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
+namespace
+{
+
 template <typename ToType, typename Name>
 class ExecutableFunctionRandomConstant : public IExecutableFunctionImpl
 {
@@ -107,10 +110,11 @@ public:
     }
 };
 
-
 struct NameRandConstant { static constexpr auto name = "randConstant"; };
 using FunctionBuilderRandConstant = RandomConstantOverloadResolver<UInt32, NameRandConstant>;
 
+}
+
 void registerFunctionRandConstant(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionBuilderRandConstant>();
diff --git a/src/Functions/randomFixedString.cpp b/src/Functions/randomFixedString.cpp
index 2df0d86aae1..1627716d9bf 100644
--- a/src/Functions/randomFixedString.cpp
+++ b/src/Functions/randomFixedString.cpp
@@ -21,6 +21,8 @@ namespace ErrorCodes
     extern const int DECIMAL_OVERFLOW;
 }
 
+namespace
+{
 
 /* Generate random fixed string with fully random bytes (including zero). */
 template <typename RandImpl>
@@ -103,6 +105,8 @@ private:
     ImplementationSelector<IFunction> selector;
 };
 
+}
+
 void registerFunctionRandomFixedString(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionRandomFixedString>();
diff --git a/src/Functions/randomPrintableASCII.cpp b/src/Functions/randomPrintableASCII.cpp
index 3f426aaa35d..775c50a3026 100644
--- a/src/Functions/randomPrintableASCII.cpp
+++ b/src/Functions/randomPrintableASCII.cpp
@@ -17,6 +17,8 @@ namespace ErrorCodes
     extern const int TOO_LARGE_STRING_SIZE;
 }
 
+namespace
+{
 
 /** Generate random string of specified length with printable ASCII characters, almost uniformly distributed.
   * First argument is length, other optional arguments are ignored and used to prevent common subexpression elimination to get different values.
@@ -108,6 +110,8 @@ public:
     }
 };
 
+}
+
 void registerFunctionRandomPrintableASCII(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionRandomPrintableASCII>();
diff --git a/src/Functions/randomString.cpp b/src/Functions/randomString.cpp
index 58b53e47e02..66f77602d96 100644
--- a/src/Functions/randomString.cpp
+++ b/src/Functions/randomString.cpp
@@ -19,6 +19,8 @@ namespace ErrorCodes
     extern const int TOO_LARGE_STRING_SIZE;
 }
 
+namespace
+{
 
 /* Generate random string of specified length with fully random bytes (including zero). */
 template <typename RandImpl>
@@ -123,6 +125,8 @@ private:
     ImplementationSelector<IFunction> selector;
 };
 
+}
+
 void registerFunctionRandomString(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionRandomString>();
diff --git a/src/Functions/randomStringUTF8.cpp b/src/Functions/randomStringUTF8.cpp
index 433442d6a77..5c7478e446e 100644
--- a/src/Functions/randomStringUTF8.cpp
+++ b/src/Functions/randomStringUTF8.cpp
@@ -17,6 +17,8 @@ namespace ErrorCodes
     extern const int TOO_LARGE_STRING_SIZE;
 }
 
+namespace
+{
 
 /* Generate string with a UTF-8 encoded text.
  * Take a single argument - length of result string in Unicode code points.
@@ -141,6 +143,8 @@ public:
     }
 };
 
+}
+
 void registerFunctionRandomStringUTF8(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionRandomStringUTF8>();
diff --git a/src/Functions/regexpQuoteMeta.cpp b/src/Functions/regexpQuoteMeta.cpp
index e99ef5a9bd0..c508daed75b 100644
--- a/src/Functions/regexpQuoteMeta.cpp
+++ b/src/Functions/regexpQuoteMeta.cpp
@@ -7,13 +7,15 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_COLUMN;
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+namespace
+{
+
 class FunctionRegexpQuoteMeta : public IFunction
 {
 public:
@@ -104,11 +106,13 @@ public:
 
         block.getByPosition(result).column = std::move(dst_column);
     }
-
 };
 
+}
+
 void registerFunctionRegexpQuoteMeta(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionRegexpQuoteMeta>();
 }
+
 }
diff --git a/src/Functions/reinterpretAsFixedString.cpp b/src/Functions/reinterpretAsFixedString.cpp
index 73e1387308d..bec5fb4522e 100644
--- a/src/Functions/reinterpretAsFixedString.cpp
+++ b/src/Functions/reinterpretAsFixedString.cpp
@@ -8,13 +8,14 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
     extern const int ILLEGAL_COLUMN;
 }
 
+namespace
+{
 
 class FunctionReinterpretAsFixedString : public IFunction
 {
@@ -84,6 +85,8 @@ public:
     }
 };
 
+}
+
 void registerFunctionReinterpretAsFixedString(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionReinterpretAsFixedString>();
diff --git a/src/Functions/reinterpretAsString.cpp b/src/Functions/reinterpretAsString.cpp
index c9c0c7eaaf6..2a368392f3f 100644
--- a/src/Functions/reinterpretAsString.cpp
+++ b/src/Functions/reinterpretAsString.cpp
@@ -8,13 +8,15 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_COLUMN;
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+namespace
+{
+
 /** Function for transforming numbers and dates to strings that contain the same set of bytes in the machine representation. */
 class FunctionReinterpretAsString : public IFunction
 {
@@ -80,6 +82,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionReinterpretAsString(FunctionFactory & factory)
 {
@@ -87,5 +90,3 @@ void registerFunctionReinterpretAsString(FunctionFactory & factory)
 }
 
 }
-
-
diff --git a/src/Functions/reinterpretStringAs.cpp b/src/Functions/reinterpretStringAs.cpp
index bb290b33b6d..71528b7cb61 100644
--- a/src/Functions/reinterpretStringAs.cpp
+++ b/src/Functions/reinterpretStringAs.cpp
@@ -16,13 +16,15 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_COLUMN;
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+namespace
+{
+
 template <typename ToDataType, typename Name>
 class FunctionReinterpretStringAs : public IFunction
 {
@@ -132,6 +134,7 @@ using FunctionReinterpretAsFloat64 = FunctionReinterpretStringAs<DataTypeFloat64
 using FunctionReinterpretAsDate = FunctionReinterpretStringAs<DataTypeDate,         NameReinterpretAsDate>;
 using FunctionReinterpretAsDateTime = FunctionReinterpretStringAs<DataTypeDateTime, NameReinterpretAsDateTime>;
 
+}
 
 void registerFunctionsReinterpretStringAs(FunctionFactory & factory)
 {
@@ -150,5 +153,3 @@ void registerFunctionsReinterpretStringAs(FunctionFactory & factory)
 }
 
 }
-
-
diff --git a/src/Functions/repeat.cpp b/src/Functions/repeat.cpp
index 18350dd6207..fd0cf238f50 100644
--- a/src/Functions/repeat.cpp
+++ b/src/Functions/repeat.cpp
@@ -17,6 +17,9 @@ namespace ErrorCodes
     extern const int TOO_LARGE_STRING_SIZE;
 }
 
+namespace
+{
+
 struct RepeatImpl
 {
     /// Safety threshold against DoS.
@@ -218,6 +221,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionRepeat(FunctionFactory & factory)
 {
diff --git a/src/Functions/replaceAll.cpp b/src/Functions/replaceAll.cpp
index 56779952d8e..cc29e57ea69 100644
--- a/src/Functions/replaceAll.cpp
+++ b/src/Functions/replaceAll.cpp
@@ -5,6 +5,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameReplaceAll
 {
@@ -13,6 +15,8 @@ struct NameReplaceAll
 
 using FunctionReplaceAll = FunctionStringReplace<ReplaceStringImpl<false>, NameReplaceAll>;
 
+}
+
 void registerFunctionReplaceAll(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionReplaceAll>();
diff --git a/src/Functions/replaceOne.cpp b/src/Functions/replaceOne.cpp
index 78f2236b873..d9bcbef0e2d 100644
--- a/src/Functions/replaceOne.cpp
+++ b/src/Functions/replaceOne.cpp
@@ -5,6 +5,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameReplaceOne
 {
@@ -13,6 +15,8 @@ struct NameReplaceOne
 
 using FunctionReplaceOne = FunctionStringReplace<ReplaceStringImpl<true>, NameReplaceOne>;
 
+}
+
 void registerFunctionReplaceOne(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionReplaceOne>();
diff --git a/src/Functions/replaceRegexpAll.cpp b/src/Functions/replaceRegexpAll.cpp
index 9bbc28b1cec..ad67efa82f4 100644
--- a/src/Functions/replaceRegexpAll.cpp
+++ b/src/Functions/replaceRegexpAll.cpp
@@ -5,6 +5,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameReplaceRegexpAll
 {
@@ -13,6 +15,8 @@ struct NameReplaceRegexpAll
 
 using FunctionReplaceRegexpAll = FunctionStringReplace<ReplaceRegexpImpl<false>, NameReplaceRegexpAll>;
 
+}
+
 void registerFunctionReplaceRegexpAll(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionReplaceRegexpAll>();
diff --git a/src/Functions/replaceRegexpOne.cpp b/src/Functions/replaceRegexpOne.cpp
index a3fcf34517b..9844f77ee26 100644
--- a/src/Functions/replaceRegexpOne.cpp
+++ b/src/Functions/replaceRegexpOne.cpp
@@ -5,6 +5,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameReplaceRegexpOne
 {
@@ -13,6 +15,8 @@ struct NameReplaceRegexpOne
 
 using FunctionReplaceRegexpOne = FunctionStringReplace<ReplaceRegexpImpl<true>, NameReplaceRegexpOne>;
 
+}
+
 void registerFunctionReplaceRegexpOne(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionReplaceRegexpOne>();
diff --git a/src/Functions/replicate.cpp b/src/Functions/replicate.cpp
index bdbc957c805..73246651003 100644
--- a/src/Functions/replicate.cpp
+++ b/src/Functions/replicate.cpp
@@ -7,13 +7,15 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
     extern const int ILLEGAL_COLUMN;
 }
 
+namespace
+{
+
 /** Creates an array, multiplying the column (the first argument) by the number of elements in the array (the second argument).
   */
 class FunctionReplicate : public IFunction
@@ -64,6 +66,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionReplicate(FunctionFactory & factory)
 {
diff --git a/src/Functions/reverse.cpp b/src/Functions/reverse.cpp
index 60d20d2aee8..ad12b6f7651 100644
--- a/src/Functions/reverse.cpp
+++ b/src/Functions/reverse.cpp
@@ -9,13 +9,14 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_COLUMN;
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+namespace
+{
 
 /** Reverse the string as a sequence of bytes.
   */
@@ -143,6 +144,7 @@ private:
     const Context & context;
 };
 
+}
 
 void registerFunctionReverse(FunctionFactory & factory)
 {
diff --git a/src/Functions/reverseUTF8.cpp b/src/Functions/reverseUTF8.cpp
index 40f3214f880..b19808c091e 100644
--- a/src/Functions/reverseUTF8.cpp
+++ b/src/Functions/reverseUTF8.cpp
@@ -6,12 +6,13 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_COLUMN;
 }
 
+namespace
+{
 
 /** Reverse the sequence of code points in a UTF-8 encoded string.
   * The result may not match the expected result, because modifying code points (for example, diacritics) may be applied to another symbols.
@@ -73,6 +74,8 @@ struct NameReverseUTF8
 };
 using FunctionReverseUTF8 = FunctionStringToString<ReverseUTF8Impl, NameReverseUTF8, true>;
 
+}
+
 void registerFunctionReverseUTF8(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionReverseUTF8>();
diff --git a/src/Functions/roundAge.cpp b/src/Functions/roundAge.cpp
index 67705983385..08ac941c2df 100644
--- a/src/Functions/roundAge.cpp
+++ b/src/Functions/roundAge.cpp
@@ -3,6 +3,8 @@
 
 namespace DB
 {
+namespace
+{
 
 template <typename A>
 struct RoundAgeImpl
@@ -29,6 +31,8 @@ struct RoundAgeImpl
 struct NameRoundAge { static constexpr auto name = "roundAge"; };
 using FunctionRoundAge = FunctionUnaryArithmetic<RoundAgeImpl, NameRoundAge, false>;
 
+}
+
 template <> struct FunctionUnaryArithmeticMonotonicity<NameRoundAge> : PositiveMonotonicity {};
 
 void registerFunctionRoundAge(FunctionFactory & factory)
diff --git a/src/Functions/roundDuration.cpp b/src/Functions/roundDuration.cpp
index 3b617822d9d..dec3427a7e1 100644
--- a/src/Functions/roundDuration.cpp
+++ b/src/Functions/roundDuration.cpp
@@ -3,6 +3,8 @@
 
 namespace DB
 {
+namespace
+{
 
 template <typename A>
 struct RoundDurationImpl
@@ -38,6 +40,8 @@ struct RoundDurationImpl
 struct NameRoundDuration { static constexpr auto name = "roundDuration"; };
 using FunctionRoundDuration = FunctionUnaryArithmetic<RoundDurationImpl, NameRoundDuration, false>;
 
+}
+
 template <> struct FunctionUnaryArithmeticMonotonicity<NameRoundDuration> : PositiveMonotonicity {};
 
 void registerFunctionRoundDuration(FunctionFactory & factory)
diff --git a/src/Functions/roundToExp2.cpp b/src/Functions/roundToExp2.cpp
index c6b6f672c66..50a548e095e 100644
--- a/src/Functions/roundToExp2.cpp
+++ b/src/Functions/roundToExp2.cpp
@@ -5,12 +5,14 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int NOT_IMPLEMENTED;
 }
 
+namespace
+{
+
 template <typename T>
 inline std::enable_if_t<std::is_integral_v<T> && (sizeof(T) <= sizeof(UInt32)), T>
 roundDownToPowerOfTwo(T x)
@@ -88,6 +90,8 @@ struct RoundToExp2Impl
 struct NameRoundToExp2 { static constexpr auto name = "roundToExp2"; };
 using FunctionRoundToExp2 = FunctionUnaryArithmetic<RoundToExp2Impl, NameRoundToExp2, false>;
 
+}
+
 template <> struct FunctionUnaryArithmeticMonotonicity<NameRoundToExp2> : PositiveMonotonicity {};
 
 void registerFunctionRoundToExp2(FunctionFactory & factory)
diff --git a/src/Functions/rowNumberInAllBlocks.cpp b/src/Functions/rowNumberInAllBlocks.cpp
index 4334a6f341a..9fd78857bf7 100644
--- a/src/Functions/rowNumberInAllBlocks.cpp
+++ b/src/Functions/rowNumberInAllBlocks.cpp
@@ -7,6 +7,8 @@
 
 namespace DB
 {
+namespace
+{
 
 /** Incremental number of row within all blocks passed to this function. */
 class FunctionRowNumberInAllBlocks : public IFunction
@@ -69,6 +71,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionRowNumberInAllBlocks(FunctionFactory & factory)
 {
diff --git a/src/Functions/rowNumberInBlock.cpp b/src/Functions/rowNumberInBlock.cpp
index a86711afb56..c7df817fd28 100644
--- a/src/Functions/rowNumberInBlock.cpp
+++ b/src/Functions/rowNumberInBlock.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 class FunctionRowNumberInBlock : public IFunction
 {
@@ -56,6 +58,8 @@ public:
     }
 };
 
+}
+
 void registerFunctionRowNumberInBlock(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionRowNumberInBlock>();
diff --git a/src/Functions/runningAccumulate.cpp b/src/Functions/runningAccumulate.cpp
index 655663f6e9e..c442ab80348 100644
--- a/src/Functions/runningAccumulate.cpp
+++ b/src/Functions/runningAccumulate.cpp
@@ -10,7 +10,6 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_COLUMN;
@@ -18,6 +17,8 @@ namespace ErrorCodes
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
+namespace
+{
 
 /** runningAccumulate(agg_state) - takes the states of the aggregate function and returns a column with values,
   * are the result of the accumulation of these states for a set of block lines, from the first to the current line.
@@ -133,6 +134,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionRunningAccumulate(FunctionFactory & factory)
 {
diff --git a/src/Functions/sigmoid.cpp b/src/Functions/sigmoid.cpp
index e878a48046d..c4e00db6d5c 100644
--- a/src/Functions/sigmoid.cpp
+++ b/src/Functions/sigmoid.cpp
@@ -3,6 +3,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct SigmoidName { static constexpr auto name = "sigmoid"; };
 
@@ -37,6 +39,8 @@ using FunctionSigmoid = FunctionMathUnary<UnaryFunctionVectorized<SigmoidName, s
 
 #endif
 
+}
+
 void registerFunctionSigmoid(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionSigmoid>();
diff --git a/src/Functions/sin.cpp b/src/Functions/sin.cpp
index cd66b19c553..6fd5d189767 100644
--- a/src/Functions/sin.cpp
+++ b/src/Functions/sin.cpp
@@ -3,10 +3,14 @@
 
 namespace DB
 {
+namespace
+{
 
 struct SinName { static constexpr auto name = "sin"; };
 using FunctionSin = FunctionMathUnary<UnaryFunctionVectorized<SinName, sin>>;
 
+}
+
 void registerFunctionSin(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionSin>(FunctionFactory::CaseInsensitive);
diff --git a/src/Functions/sqrt.cpp b/src/Functions/sqrt.cpp
index db861cde32e..725da874a51 100644
--- a/src/Functions/sqrt.cpp
+++ b/src/Functions/sqrt.cpp
@@ -3,10 +3,14 @@
 
 namespace DB
 {
+namespace
+{
 
 struct SqrtName { static constexpr auto name = "sqrt"; };
 using FunctionSqrt = FunctionMathUnary<UnaryFunctionVectorized<SqrtName, sqrt>>;
 
+}
+
 void registerFunctionSqrt(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionSqrt>(FunctionFactory::CaseInsensitive);
diff --git a/src/Functions/stringToH3.cpp b/src/Functions/stringToH3.cpp
index d0d22f3927b..d6b17ce3f5d 100644
--- a/src/Functions/stringToH3.cpp
+++ b/src/Functions/stringToH3.cpp
@@ -18,6 +18,9 @@ namespace ErrorCodes
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+namespace
+{
+
 using namespace GatherUtils;
 
 class FunctionStringToH3 : public IFunction
@@ -90,6 +93,7 @@ private:
     }
 };
 
+}
 
 void registerFunctionStringToH3(FunctionFactory & factory)
 {
diff --git a/src/Functions/substring.cpp b/src/Functions/substring.cpp
index bf8ba1d6b73..f043e08b139 100644
--- a/src/Functions/substring.cpp
+++ b/src/Functions/substring.cpp
@@ -26,6 +26,8 @@ namespace ErrorCodes
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
+namespace
+{
 
 /// If 'is_utf8' - measure offset and length in code points instead of bytes.
 /// UTF8 variant is not available for FixedString arguments.
@@ -176,6 +178,8 @@ public:
     }
 };
 
+}
+
 void registerFunctionSubstring(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionSubstring<false>>(FunctionFactory::CaseInsensitive);
diff --git a/src/Functions/sumburConsistentHash.cpp b/src/Functions/sumburConsistentHash.cpp
index 1fc26502355..88de93f65d9 100644
--- a/src/Functions/sumburConsistentHash.cpp
+++ b/src/Functions/sumburConsistentHash.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct SumburConsistentHashImpl
 {
@@ -24,6 +26,8 @@ struct SumburConsistentHashImpl
 
 using FunctionSumburConsistentHash = FunctionConsistentHashImpl<SumburConsistentHashImpl>;
 
+}
+
 void registerFunctionSumburConsistentHash(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionSumburConsistentHash>();
diff --git a/src/Functions/tan.cpp b/src/Functions/tan.cpp
index e18d81f1e01..db63ab13984 100644
--- a/src/Functions/tan.cpp
+++ b/src/Functions/tan.cpp
@@ -3,10 +3,14 @@
 
 namespace DB
 {
+namespace
+{
 
 struct TanName { static constexpr auto name = "tan"; };
 using FunctionTan = FunctionMathUnary<UnaryFunctionVectorized<TanName, tan>>;
 
+}
+
 void registerFunctionTan(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionTan>(FunctionFactory::CaseInsensitive);
diff --git a/src/Functions/tanh.cpp b/src/Functions/tanh.cpp
index 4fe3d616d25..6fc1d2f79e5 100644
--- a/src/Functions/tanh.cpp
+++ b/src/Functions/tanh.cpp
@@ -3,13 +3,13 @@
 
 namespace DB
 {
+namespace
+{
 
 struct TanhName { static constexpr auto name = "tanh"; };
 
 #if USE_FASTOPS
 
-namespace
-{
     struct Impl
     {
         static constexpr auto name = TanhName::name;
@@ -22,13 +22,12 @@ namespace
             NFastOps::Tanh<>(src, size, dst);
         }
     };
-}
 
 using FunctionTanh = FunctionMathUnary<Impl>;
 
 #else
 
-static double tanh(double x)
+double tanh(double x)
 {
     return 2 / (1.0 + exp(-2 * x)) - 1;
 }
@@ -36,6 +35,8 @@ static double tanh(double x)
 using FunctionTanh = FunctionMathUnary<UnaryFunctionVectorized<TanhName, tanh>>;
 #endif
 
+}
+
 void registerFunctionTanh(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionTanh>(FunctionFactory::CaseInsensitive);
diff --git a/src/Functions/tgamma.cpp b/src/Functions/tgamma.cpp
index e5e68963856..8ad00bc79ff 100644
--- a/src/Functions/tgamma.cpp
+++ b/src/Functions/tgamma.cpp
@@ -3,10 +3,14 @@
 
 namespace DB
 {
+namespace
+{
 
 struct TGammaName { static constexpr auto name = "tgamma"; };
 using FunctionTGamma = FunctionMathUnary<UnaryFunctionPlain<TGammaName, std::tgamma>>;
 
+}
+
 void registerFunctionTGamma(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionTGamma>();
diff --git a/src/Functions/throwIf.cpp b/src/Functions/throwIf.cpp
index 8faac6425c8..46a384ebc51 100644
--- a/src/Functions/throwIf.cpp
+++ b/src/Functions/throwIf.cpp
@@ -10,7 +10,6 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_COLUMN;
@@ -19,6 +18,8 @@ namespace ErrorCodes
     extern const int FUNCTION_THROW_IF_VALUE_IS_NON_ZERO;
 }
 
+namespace
+{
 
 /// Throw an exception if the argument is non zero.
 class FunctionThrowIf : public IFunction
@@ -109,6 +110,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionThrowIf(FunctionFactory & factory)
 {
diff --git a/src/Functions/timeSlots.cpp b/src/Functions/timeSlots.cpp
index 89eb56d8e61..11804ac72d7 100644
--- a/src/Functions/timeSlots.cpp
+++ b/src/Functions/timeSlots.cpp
@@ -13,7 +13,6 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
@@ -21,6 +20,9 @@ namespace ErrorCodes
     extern const int ILLEGAL_COLUMN;
 }
 
+namespace
+{
+
 /** timeSlots(StartTime, Duration)
   * - for the time interval beginning at `StartTime` and continuing `Duration` seconds,
   *   returns an array of time points, consisting of rounding down to half an hour (default; or another value) of points from this interval.
@@ -190,6 +192,8 @@ public:
     }
 };
 
+}
+
 void registerFunctionTimeSlots(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionTimeSlots>();
diff --git a/src/Functions/timezone.cpp b/src/Functions/timezone.cpp
index 9c15e5ae4b8..f697144c0d8 100644
--- a/src/Functions/timezone.cpp
+++ b/src/Functions/timezone.cpp
@@ -7,7 +7,8 @@
 
 namespace DB
 {
-
+namespace
+{
 
 /** Returns the server time zone.
   */
@@ -42,6 +43,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionTimeZone(FunctionFactory & factory)
 {
diff --git a/src/Functions/toColumnTypeName.cpp b/src/Functions/toColumnTypeName.cpp
index 32092d8a471..498f5095461 100644
--- a/src/Functions/toColumnTypeName.cpp
+++ b/src/Functions/toColumnTypeName.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 /// Returns name of IColumn instance.
 class FunctionToColumnTypeName : public IFunction
@@ -46,6 +48,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionToColumnTypeName(FunctionFactory & factory)
 {
diff --git a/src/Functions/toLowCardinality.cpp b/src/Functions/toLowCardinality.cpp
index 8f0dfd2e932..a9873205114 100644
--- a/src/Functions/toLowCardinality.cpp
+++ b/src/Functions/toLowCardinality.cpp
@@ -7,6 +7,8 @@
 
 namespace DB
 {
+namespace
+{
 
 class FunctionToLowCardinality: public IFunction
 {
@@ -47,6 +49,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionToLowCardinality(FunctionFactory & factory)
 {
diff --git a/src/Functions/toNullable.cpp b/src/Functions/toNullable.cpp
index 8be5f3ad243..8d0eb927d55 100644
--- a/src/Functions/toNullable.cpp
+++ b/src/Functions/toNullable.cpp
@@ -7,6 +7,8 @@
 
 namespace DB
 {
+namespace
+{
 
 /// If value is not Nullable or NULL, wraps it to Nullable.
 class FunctionToNullable : public IFunction
@@ -39,6 +41,7 @@ public:
     }
 };
 
+}
 
 void registerFunctionToNullable(FunctionFactory & factory)
 {
diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp
index 6536a8d88a6..83fcec30338 100644
--- a/src/Functions/toStartOfInterval.cpp
+++ b/src/Functions/toStartOfInterval.cpp
@@ -143,7 +143,6 @@ namespace
             return time_zone.toStartOfSecondInterval(t, seconds);
         }
     };
-}
 
 
 class FunctionToStartOfInterval : public IFunction
@@ -340,6 +339,7 @@ private:
     }
 };
 
+}
 
 void registerFunctionToStartOfInterval(FunctionFactory & factory)
 {
diff --git a/src/Functions/toTimeZone.cpp b/src/Functions/toTimeZone.cpp
index d75b202ed41..46d4c4698f7 100644
--- a/src/Functions/toTimeZone.cpp
+++ b/src/Functions/toTimeZone.cpp
@@ -11,13 +11,15 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+namespace
+{
+
 /// Just changes time zone information for data type. The calculation is free.
 class FunctionToTimeZone : public IFunction
 {
@@ -58,6 +60,8 @@ public:
     }
 };
 
+}
+
 void registerFunctionToTimeZone(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionToTimeZone>();
diff --git a/src/Functions/toTypeName.cpp b/src/Functions/toTypeName.cpp
index a47307acbe9..99f39523114 100644
--- a/src/Functions/toTypeName.cpp
+++ b/src/Functions/toTypeName.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 /** toTypeName(x) - get the type name
   * Returns name of IDataType instance (name of data type).
@@ -85,6 +87,7 @@ public:
     ColumnNumbers getArgumentsThatDontImplyNullableReturnType(size_t /*number_of_arguments*/) const override { return {0}; }
 };
 
+}
 
 void registerFunctionToTypeName(FunctionFactory & factory)
 {
diff --git a/src/Functions/toUnixTimestamp64Micro.cpp b/src/Functions/toUnixTimestamp64Micro.cpp
index 845647fc0dd..e1a9382e69a 100644
--- a/src/Functions/toUnixTimestamp64Micro.cpp
+++ b/src/Functions/toUnixTimestamp64Micro.cpp
@@ -3,6 +3,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct TransformToMicro
 {
@@ -12,6 +14,8 @@ struct TransformToMicro
     using ResultDataType = DataTypeInt64;
 };
 
+}
+
 void registerToUnixTimestamp64Micro(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionUnixTimestamp64<TransformToMicro>>();
diff --git a/src/Functions/toUnixTimestamp64Milli.cpp b/src/Functions/toUnixTimestamp64Milli.cpp
index 88baf5c0b61..b49b1c9042b 100644
--- a/src/Functions/toUnixTimestamp64Milli.cpp
+++ b/src/Functions/toUnixTimestamp64Milli.cpp
@@ -3,6 +3,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct TransformToMilli
 {
@@ -12,6 +14,8 @@ struct TransformToMilli
     using ResultDataType = DataTypeInt64;
 };
 
+}
+
 void registerToUnixTimestamp64Milli(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionUnixTimestamp64<TransformToMilli>>();
diff --git a/src/Functions/toUnixTimestamp64Nano.cpp b/src/Functions/toUnixTimestamp64Nano.cpp
index 5e90a9504cf..832d845434c 100644
--- a/src/Functions/toUnixTimestamp64Nano.cpp
+++ b/src/Functions/toUnixTimestamp64Nano.cpp
@@ -3,6 +3,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct TransformToNano
 {
@@ -12,6 +14,8 @@ struct TransformToNano
     using ResultDataType = DataTypeInt64;
 };
 
+}
+
 void registerToUnixTimestamp64Nano(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionUnixTimestamp64<TransformToNano>>();
diff --git a/src/Functions/toValidUTF8.cpp b/src/Functions/toValidUTF8.cpp
index c18ef8e7622..189556f48ea 100644
--- a/src/Functions/toValidUTF8.cpp
+++ b/src/Functions/toValidUTF8.cpp
@@ -20,6 +20,9 @@ namespace ErrorCodes
 
 extern const UInt8 length_of_utf8_sequence[256];
 
+namespace
+{
+
 struct ToValidUTF8Impl
 {
     static void toValidUTF8One(const char * begin, const char * end, WriteBuffer & write_buffer)
@@ -135,6 +138,8 @@ struct NameToValidUTF8
 };
 using FunctionToValidUTF8 = FunctionStringToString<ToValidUTF8Impl, NameToValidUTF8>;
 
+}
+
 void registerFunctionToValidUTF8(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionToValidUTF8>();
diff --git a/src/Functions/today.cpp b/src/Functions/today.cpp
index 9c3395b6ab1..5522246b0a6 100644
--- a/src/Functions/today.cpp
+++ b/src/Functions/today.cpp
@@ -10,6 +10,8 @@
 
 namespace DB
 {
+namespace
+{
 
 class ExecutableFunctionToday : public IExecutableFunctionImpl
 {
@@ -79,6 +81,8 @@ public:
     }
 };
 
+}
+
 void registerFunctionToday(FunctionFactory & factory)
 {
     factory.registerFunction<TodayOverloadResolver>();
diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp
index 884b972c2b4..8eab33e7633 100644
--- a/src/Functions/transform.cpp
+++ b/src/Functions/transform.cpp
@@ -17,7 +17,6 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
@@ -26,6 +25,8 @@ namespace ErrorCodes
     extern const int ILLEGAL_COLUMN;
 }
 
+namespace
+{
 
 /** transform(x, from_array, to_array[, default]) - convert x according to an explicitly passed match.
   */
@@ -846,6 +847,8 @@ private:
     }
 };
 
+}
+
 void registerFunctionTransform(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionTransform>();
diff --git a/src/Functions/trim.cpp b/src/Functions/trim.cpp
index eba49739074..fdaf25b815e 100644
--- a/src/Functions/trim.cpp
+++ b/src/Functions/trim.cpp
@@ -6,12 +6,14 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+namespace
+{
+
 struct TrimModeLeft
 {
     static constexpr auto name = "trimLeft";
@@ -103,6 +105,8 @@ using FunctionTrimLeft = FunctionStringToString<FunctionTrimImpl<TrimModeLeft>,
 using FunctionTrimRight = FunctionStringToString<FunctionTrimImpl<TrimModeRight>, TrimModeRight>;
 using FunctionTrimBoth = FunctionStringToString<FunctionTrimImpl<TrimModeBoth>, TrimModeBoth>;
 
+}
+
 void registerFunctionTrim(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionTrimLeft>();
diff --git a/src/Functions/tuple.cpp b/src/Functions/tuple.cpp
index 9e55ba656a2..2546ec68193 100644
--- a/src/Functions/tuple.cpp
+++ b/src/Functions/tuple.cpp
@@ -7,12 +7,13 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
+namespace
+{
 
 /** tuple(x, y, ...) is a function that allows you to group several columns
   * tupleElement(tuple, n) is a function that allows you to retrieve a column from tuple.
@@ -75,6 +76,8 @@ public:
     }
 };
 
+}
+
 void registerFunctionTuple(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionTuple>();
diff --git a/src/Functions/tupleElement.cpp b/src/Functions/tupleElement.cpp
index 7264fb67bfb..896d9ed5c13 100644
--- a/src/Functions/tupleElement.cpp
+++ b/src/Functions/tupleElement.cpp
@@ -14,13 +14,14 @@
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
     extern const int ILLEGAL_INDEX;
 }
 
+namespace
+{
 
 /** Extract element of tuple by constant index or name. The operation is essentially free.
   * Also the function looks through Arrays: you can get Array of tuple elements from Array of Tuples.
@@ -139,6 +140,7 @@ private:
     }
 };
 
+}
 
 void registerFunctionTupleElement(FunctionFactory & factory)
 {
diff --git a/src/Functions/upper.cpp b/src/Functions/upper.cpp
index e96a5a312fb..515574e2a09 100644
--- a/src/Functions/upper.cpp
+++ b/src/Functions/upper.cpp
@@ -5,6 +5,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameUpper
 {
@@ -12,6 +14,8 @@ struct NameUpper
 };
 using FunctionUpper = FunctionStringToString<LowerUpperImpl<'a', 'z'>, NameUpper>;
 
+}
+
 void registerFunctionUpper(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionUpper>(FunctionFactory::CaseInsensitive);
diff --git a/src/Functions/upperUTF8.cpp b/src/Functions/upperUTF8.cpp
index a6c7a4d41cd..1a85b133757 100644
--- a/src/Functions/upperUTF8.cpp
+++ b/src/Functions/upperUTF8.cpp
@@ -6,6 +6,8 @@
 
 namespace DB
 {
+namespace
+{
 
 struct NameUpperUTF8
 {
@@ -14,6 +16,8 @@ struct NameUpperUTF8
 
 using FunctionUpperUTF8 = FunctionStringToString<LowerUpperUTF8Impl<'a', 'z', Poco::Unicode::toUpper, UTF8CyrillicToCase<false>>, NameUpperUTF8>;
 
+}
+
 void registerFunctionUpperUTF8(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionUpperUTF8>();

From ee218c354ed5a10f7d77cce354385796d708f965 Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Tue, 8 Sep 2020 02:18:07 +0300
Subject: [PATCH 106/298] fix segfault in combinator -Resample

---
 src/AggregateFunctions/AggregateFunctionResample.h | 14 +++++++++++++-
 .../0_stateless/01463_resample_overflow.reference  |  0
 .../0_stateless/01463_resample_overflow.sql        |  1 +
 3 files changed, 14 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/01463_resample_overflow.reference
 create mode 100644 tests/queries/0_stateless/01463_resample_overflow.sql

diff --git a/src/AggregateFunctions/AggregateFunctionResample.h b/src/AggregateFunctions/AggregateFunctionResample.h
index 92fa8fbb2a5..c1528686785 100644
--- a/src/AggregateFunctions/AggregateFunctionResample.h
+++ b/src/AggregateFunctions/AggregateFunctionResample.h
@@ -4,6 +4,7 @@
 #include <Columns/ColumnArray.h>
 #include <DataTypes/DataTypeArray.h>
 #include <Common/assert_cast.h>
+#include <common/arithmeticOverflow.h>
 
 
 namespace DB
@@ -60,7 +61,18 @@ public:
         if (end < begin)
             total = 0;
         else
-            total = (end - begin + step - 1) / step;
+        {
+            Key dif;
+            size_t sum;
+            if (common::subOverflow(end, begin, dif)
+                || common::addOverflow(static_cast<size_t>(dif), step, sum))
+            {
+                throw Exception("Overflow in internal computations in function " + getName()
+                    + ". Too large arguments", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
+            }
+
+            total = (sum - 1) / step; // total = (end - begin + step - 1) / step
+        }
 
         if (total > MAX_ELEMENTS)
             throw Exception("The range given in function "
diff --git a/tests/queries/0_stateless/01463_resample_overflow.reference b/tests/queries/0_stateless/01463_resample_overflow.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01463_resample_overflow.sql b/tests/queries/0_stateless/01463_resample_overflow.sql
new file mode 100644
index 00000000000..298f852ed14
--- /dev/null
+++ b/tests/queries/0_stateless/01463_resample_overflow.sql
@@ -0,0 +1 @@
+select groupArrayResample(-9223372036854775808, 9223372036854775807, 9223372036854775807)(number, toInt64(number)) FROM numbers(7); -- { serverError 69 }

From 7373aaf465134a9e9875f346171da0627bf26d6f Mon Sep 17 00:00:00 2001
From: Artem Zuikov <chertus@gmail.com>
Date: Tue, 8 Sep 2020 03:59:13 +0300
Subject: [PATCH 107/298] add IsOperation helper

---
 src/Functions/FunctionBinaryArithmetic.h | 69 ++++++++----------------
 src/Functions/FunctionUnaryArithmetic.h  |  6 +--
 src/Functions/FunctionsComparison.h      | 10 ++--
 src/Functions/IsOperation.h              | 62 +++++++++++++++++++++
 src/Functions/abs.cpp                    |  4 --
 src/Functions/divide.cpp                 |  5 --
 src/Functions/greatest.cpp               |  4 --
 src/Functions/intDivOrZero.cpp           |  4 --
 src/Functions/jumpConsistentHash.cpp     |  2 +-
 src/Functions/least.cpp                  |  4 --
 src/Functions/minus.cpp                  |  4 --
 src/Functions/multiply.cpp               |  4 --
 src/Functions/negate.cpp                 |  4 --
 src/Functions/plus.cpp                   |  4 --
 14 files changed, 93 insertions(+), 93 deletions(-)
 create mode 100644 src/Functions/IsOperation.h

diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h
index 2a467451684..d899a95ddc6 100644
--- a/src/Functions/FunctionBinaryArithmetic.h
+++ b/src/Functions/FunctionBinaryArithmetic.h
@@ -22,6 +22,7 @@
 #include <Columns/ColumnAggregateFunction.h>
 #include "IFunctionImpl.h"
 #include "FunctionHelpers.h"
+#include "IsOperation.h"
 #include "DivisionUtils.h"
 #include "castTypeToEither.h"
 #include "FunctionFactory.h"
@@ -167,17 +168,6 @@ struct BinaryOperationImpl : BinaryOperationImplBase<A, B, Op, ResultType>
 };
 
 
-template <typename, typename> struct PlusImpl;
-template <typename, typename> struct MinusImpl;
-template <typename, typename> struct MultiplyImpl;
-template <typename, typename> struct DivideFloatingImpl;
-template <typename, typename> struct DivideIntegralImpl;
-template <typename, typename> struct DivideIntegralOrZeroImpl;
-template <typename, typename> struct LeastBaseImpl;
-template <typename, typename> struct GreatestBaseImpl;
-template <typename, typename> struct ModuloImpl;
-
-
 /// Binary operations for Decimals need scale args
 /// +|- scale one of args (which scale factor is not 1). ScaleR = oneof(Scale1, Scale2);
 /// *   no agrs scale. ScaleR = Scale1 + Scale2;
@@ -185,15 +175,15 @@ template <typename, typename> struct ModuloImpl;
 template <typename A, typename B, template <typename, typename> typename Operation, typename ResultType_, bool _check_overflow = true>
 struct DecimalBinaryOperation
 {
-    static constexpr bool is_plus_minus =   std::is_same_v<Operation<Int32, Int32>, PlusImpl<Int32, Int32>> ||
-                                            std::is_same_v<Operation<Int32, Int32>, MinusImpl<Int32, Int32>>;
-    static constexpr bool is_multiply =     std::is_same_v<Operation<Int32, Int32>, MultiplyImpl<Int32, Int32>>;
-    static constexpr bool is_float_division = std::is_same_v<Operation<Int32, Int32>, DivideFloatingImpl<Int32, Int32>>;
-    static constexpr bool is_int_division = std::is_same_v<Operation<Int32, Int32>, DivideIntegralImpl<Int32, Int32>> ||
-                                            std::is_same_v<Operation<Int32, Int32>, DivideIntegralOrZeroImpl<Int32, Int32>>;
+    static constexpr bool is_plus_minus =   IsOperation<Operation>::plus ||
+                                            IsOperation<Operation>::minus;
+    static constexpr bool is_multiply =     IsOperation<Operation>::multiply;
+    static constexpr bool is_float_division = IsOperation<Operation>::div_floating;
+    static constexpr bool is_int_division = IsOperation<Operation>::div_int ||
+                                            IsOperation<Operation>::div_int_or_zero;
     static constexpr bool is_division = is_float_division || is_int_division;
-    static constexpr bool is_compare =      std::is_same_v<Operation<Int32, Int32>, LeastBaseImpl<Int32, Int32>> ||
-                                            std::is_same_v<Operation<Int32, Int32>, GreatestBaseImpl<Int32, Int32>>;
+    static constexpr bool is_compare =      IsOperation<Operation>::least ||
+                                            IsOperation<Operation>::greatest;
     static constexpr bool is_plus_minus_compare = is_plus_minus || is_compare;
     static constexpr bool can_overflow = is_plus_minus || is_multiply;
 
@@ -529,15 +519,7 @@ private: /// it's not correct for Decimal
     using Op = Operation<T0, T1>;
 public:
 
-    static constexpr bool allow_decimal =
-        std::is_same_v<Operation<T0, T0>, PlusImpl<T0, T0>> ||
-        std::is_same_v<Operation<T0, T0>, MinusImpl<T0, T0>> ||
-        std::is_same_v<Operation<T0, T0>, MultiplyImpl<T0, T0>> ||
-        std::is_same_v<Operation<T0, T0>, DivideFloatingImpl<T0, T0>> ||
-        std::is_same_v<Operation<T0, T0>, DivideIntegralImpl<T0, T0>> ||
-        std::is_same_v<Operation<T0, T0>, DivideIntegralOrZeroImpl<T0, T0>> ||
-        std::is_same_v<Operation<T0, T0>, LeastBaseImpl<T0, T0>> ||
-        std::is_same_v<Operation<T0, T0>, GreatestBaseImpl<T0, T0>>;
+    static constexpr bool allow_decimal = IsOperation<Operation>::allow_decimal;
 
     /// Appropriate result type for binary operator on numeric types. "Date" can also mean
     /// DateTime, but if both operands are Dates, their type must be the same (e.g. Date - DateTime is invalid).
@@ -556,21 +538,21 @@ public:
             DataTypeFromFieldType<typename Op::ResultType>>,
         /// Date + Integral -> Date
         /// Integral + Date -> Date
-        Case<std::is_same_v<Op, PlusImpl<T0, T1>>, Switch<
+        Case<IsOperation<Operation>::plus, Switch<
             Case<IsIntegral<RightDataType>, LeftDataType>,
             Case<IsIntegral<LeftDataType>, RightDataType>>>,
         /// Date - Date     -> Int32
         /// Date - Integral -> Date
-        Case<std::is_same_v<Op, MinusImpl<T0, T1>>, Switch<
+        Case<IsOperation<Operation>::minus, Switch<
             Case<std::is_same_v<LeftDataType, RightDataType>, DataTypeInt32>,
             Case<IsDateOrDateTime<LeftDataType> && IsIntegral<RightDataType>, LeftDataType>>>,
         /// least(Date, Date) -> Date
         /// greatest(Date, Date) -> Date
-        Case<std::is_same_v<LeftDataType, RightDataType> && (std::is_same_v<Op, LeastBaseImpl<T0, T1>> || std::is_same_v<Op, GreatestBaseImpl<T0, T1>>),
+        Case<std::is_same_v<LeftDataType, RightDataType> && (IsOperation<Operation>::least || IsOperation<Operation>::greatest),
             LeftDataType>,
         /// Date % Int32 -> Int32
         /// Date % Float -> Float64
-        Case<std::is_same_v<Op, ModuloImpl<T0, T1>>, Switch<
+        Case<IsOperation<Operation>::modulo, Switch<
             Case<IsDateOrDateTime<LeftDataType> && IsIntegral<RightDataType>, RightDataType>,
             Case<IsDateOrDateTime<LeftDataType> && IsFloatingPoint<RightDataType>, DataTypeFloat64>>>>;
 };
@@ -627,10 +609,9 @@ class FunctionBinaryArithmetic : public IFunction
         /// Special case when the function is plus or minus, one of arguments is Date/DateTime and another is Interval.
         /// We construct another function (example: addMonths) and call it.
 
-        static constexpr bool function_is_plus = std::is_same_v<Op<UInt8, UInt8>, PlusImpl<UInt8, UInt8>>;
-        static constexpr bool function_is_minus = std::is_same_v<Op<UInt8, UInt8>, MinusImpl<UInt8, UInt8>>;
-
-        if (!function_is_plus && !function_is_minus)
+        static constexpr bool function_is_plus = IsOperation<Op>::plus;
+        static constexpr bool function_is_minus = IsOperation<Op>::minus;
+        if constexpr (!function_is_plus && !function_is_minus)
             return {};
 
         const DataTypePtr & type_time = first_is_date_or_datetime ? type0 : type1;
@@ -669,7 +650,7 @@ class FunctionBinaryArithmetic : public IFunction
 
     bool isAggregateMultiply(const DataTypePtr & type0, const DataTypePtr & type1) const
     {
-        if constexpr (!std::is_same_v<Op<UInt8, UInt8>, MultiplyImpl<UInt8, UInt8>>)
+        if constexpr (!IsOperation<Op>::multiply)
             return false;
 
         WhichDataType which0(type0);
@@ -681,7 +662,7 @@ class FunctionBinaryArithmetic : public IFunction
 
     bool isAggregateAddition(const DataTypePtr & type0, const DataTypePtr & type1) const
     {
-        if constexpr (!std::is_same_v<Op<UInt8, UInt8>, PlusImpl<UInt8, UInt8>>)
+        if constexpr (!IsOperation<Op>::plus)
             return false;
 
         WhichDataType which0(type0);
@@ -891,10 +872,8 @@ public:
                 {
                     if constexpr (IsDataTypeDecimal<LeftDataType> && IsDataTypeDecimal<RightDataType>)
                     {
-                        constexpr bool is_multiply = std::is_same_v<Op<UInt8, UInt8>, MultiplyImpl<UInt8, UInt8>>;
-                        constexpr bool is_division = std::is_same_v<Op<UInt8, UInt8>, DivideFloatingImpl<UInt8, UInt8>> ||
-                                                   std::is_same_v<Op<UInt8, UInt8>, DivideIntegralImpl<UInt8, UInt8>> ||
-                                                   std::is_same_v<Op<UInt8, UInt8>, DivideIntegralOrZeroImpl<UInt8, UInt8>>;
+                        constexpr bool is_multiply = IsOperation<Op>::multiply;
+                        constexpr bool is_division = IsOperation<Op>::division;
 
                         ResultDataType result_type = decimalResultType(left, right, is_multiply, is_division);
                         type_res = std::make_shared<ResultDataType>(result_type.getPrecision(), result_type.getScale());
@@ -1016,10 +995,8 @@ public:
         if constexpr (!std::is_same_v<ResultDataType, InvalidType>)
         {
             constexpr bool result_is_decimal = IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>;
-            constexpr bool is_multiply = std::is_same_v<Op<UInt8, UInt8>, MultiplyImpl<UInt8, UInt8>>;
-            constexpr bool is_division = std::is_same_v<Op<UInt8, UInt8>, DivideFloatingImpl<UInt8, UInt8>> ||
-                                            std::is_same_v<Op<UInt8, UInt8>, DivideIntegralImpl<UInt8, UInt8>> ||
-                                            std::is_same_v<Op<UInt8, UInt8>, DivideIntegralOrZeroImpl<UInt8, UInt8>>;
+            constexpr bool is_multiply = IsOperation<Op>::multiply;
+            constexpr bool is_division = IsOperation<Op>::division;
 
             using T0 = typename LeftDataType::FieldType;
             using T1 = typename RightDataType::FieldType;
diff --git a/src/Functions/FunctionUnaryArithmetic.h b/src/Functions/FunctionUnaryArithmetic.h
index 5b072e5848a..94f3fd291de 100644
--- a/src/Functions/FunctionUnaryArithmetic.h
+++ b/src/Functions/FunctionUnaryArithmetic.h
@@ -9,6 +9,7 @@
 #include <Columns/ColumnFixedString.h>
 #include <Functions/IFunctionImpl.h>
 #include <Functions/FunctionHelpers.h>
+#include <Functions/IsOperation.h>
 #include <Functions/castTypeToEither.h>
 
 #if !defined(ARCADIA_BUILD)
@@ -71,9 +72,6 @@ struct FixedStringUnaryOperationImpl
 template <typename FunctionName>
 struct FunctionUnaryArithmeticMonotonicity;
 
-template <typename> struct AbsImpl;
-template <typename> struct NegateImpl;
-
 /// Used to indicate undefined operation
 struct InvalidType;
 
@@ -81,7 +79,7 @@ struct InvalidType;
 template <template <typename> class Op, typename Name, bool is_injective>
 class FunctionUnaryArithmetic : public IFunction
 {
-    static constexpr bool allow_decimal = std::is_same_v<Op<Int8>, NegateImpl<Int8>> || std::is_same_v<Op<Int8>, AbsImpl<Int8>>;
+    static constexpr bool allow_decimal = IsUnaryOperation<Op>::negate || IsUnaryOperation<Op>::abs;
     static constexpr bool allow_fixed_string = Op<UInt8>::allow_fixed_string;
 
     template <typename F>
diff --git a/src/Functions/FunctionsComparison.h b/src/Functions/FunctionsComparison.h
index 4eb60477574..0a3d544f9e5 100644
--- a/src/Functions/FunctionsComparison.h
+++ b/src/Functions/FunctionsComparison.h
@@ -29,6 +29,7 @@
 
 #include <Functions/IFunctionAdaptors.h>
 #include <Functions/FunctionHelpers.h>
+#include <Functions/IsOperation.h>
 
 #include <Core/AccurateComparison.h>
 #include <Core/DecimalComparison.h>
@@ -845,8 +846,7 @@ private:
         /// If not possible to convert, comparison with =, <, >, <=, >= yields to false and comparison with != yields to true.
         if (converted.isNull())
         {
-            block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count,
-                std::is_same_v<Op<int, int>, NotEqualsOp<int, int>>);
+            block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, IsOperation<Op>::not_equals);
         }
         else
         {
@@ -1190,9 +1190,9 @@ public:
         if (left_type->equals(*right_type) && !left_type->isNullable() && !isTuple(left_type) && col_left_untyped == col_right_untyped)
         {
             /// Always true: =, <=, >=
-            if constexpr (std::is_same_v<Op<int, int>, EqualsOp<int, int>>
-                || std::is_same_v<Op<int, int>, LessOrEqualsOp<int, int>>
-                || std::is_same_v<Op<int, int>, GreaterOrEqualsOp<int, int>>)
+            if constexpr (IsOperation<Op>::equals
+                || IsOperation<Op>::less_or_equals
+                || IsOperation<Op>::greater_or_equals)
             {
                 block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, 1u);
                 return;
diff --git a/src/Functions/IsOperation.h b/src/Functions/IsOperation.h
new file mode 100644
index 00000000000..b36490adf1e
--- /dev/null
+++ b/src/Functions/IsOperation.h
@@ -0,0 +1,62 @@
+#pragma once
+
+namespace DB
+{
+
+/// These classes should be present in DB namespace (cannot place them into nemelesspace)
+template <typename> struct AbsImpl;
+template <typename> struct NegateImpl;
+template <typename, typename> struct PlusImpl;
+template <typename, typename> struct MinusImpl;
+template <typename, typename> struct MultiplyImpl;
+template <typename, typename> struct DivideFloatingImpl;
+template <typename, typename> struct DivideIntegralImpl;
+template <typename, typename> struct DivideIntegralOrZeroImpl;
+template <typename, typename> struct LeastBaseImpl;
+template <typename, typename> struct GreatestBaseImpl;
+template <typename, typename> struct ModuloImpl;
+template <typename, typename> struct EqualsOp;
+template <typename, typename> struct NotEqualsOp;
+template <typename, typename> struct LessOrEqualsOp;
+template <typename, typename> struct GreaterOrEqualsOp;
+
+template <template <typename, typename> typename Op1, template <typename, typename> typename Op2>
+struct IsSameOperation
+{
+    static constexpr bool value = std::is_same_v<Op1<UInt8, UInt8>, Op2<UInt8, UInt8>>;
+};
+
+template <template <typename> typename Op>
+struct IsUnaryOperation
+{
+    static constexpr bool abs = std::is_same_v<Op<Int8>, AbsImpl<Int8>>;
+    static constexpr bool negate = std::is_same_v<Op<Int8>, NegateImpl<Int8>>;
+};
+
+template <template <typename, typename> typename Op>
+struct IsOperation
+{
+    static constexpr bool equals = IsSameOperation<Op, EqualsOp>::value;
+    static constexpr bool not_equals = IsSameOperation<Op, NotEqualsOp>::value;
+    static constexpr bool less_or_equals = IsSameOperation<Op, LessOrEqualsOp>::value;
+    static constexpr bool greater_or_equals = IsSameOperation<Op, GreaterOrEqualsOp>::value;
+
+    static constexpr bool plus = IsSameOperation<Op, PlusImpl>::value;
+    static constexpr bool minus = IsSameOperation<Op, MinusImpl>::value;
+    static constexpr bool multiply = IsSameOperation<Op, MultiplyImpl>::value;
+    static constexpr bool div_floating = IsSameOperation<Op, DivideFloatingImpl>::value;
+    static constexpr bool div_int = IsSameOperation<Op, DivideIntegralImpl>::value;
+    static constexpr bool div_int_or_zero = IsSameOperation<Op, DivideIntegralOrZeroImpl>::value;
+    static constexpr bool modulo = IsSameOperation<Op, ModuloImpl>::value;
+    static constexpr bool least = IsSameOperation<Op, LeastBaseImpl>::value;
+    static constexpr bool greatest = IsSameOperation<Op, GreatestBaseImpl>::value;
+
+    static constexpr bool division = div_floating || div_int || div_int_or_zero;
+
+    static constexpr bool allow_decimal =
+        plus || minus || multiply ||
+        div_floating || div_int || div_int_or_zero ||
+        least || greatest;
+};
+
+}
diff --git a/src/Functions/abs.cpp b/src/Functions/abs.cpp
index deb69d40035..f0c530e0e8f 100644
--- a/src/Functions/abs.cpp
+++ b/src/Functions/abs.cpp
@@ -5,8 +5,6 @@
 
 namespace DB
 {
-namespace
-{
 
 template <typename A>
 struct AbsImpl
@@ -36,8 +34,6 @@ struct AbsImpl
 struct NameAbs { static constexpr auto name = "abs"; };
 using FunctionAbs = FunctionUnaryArithmetic<AbsImpl, NameAbs, false>;
 
-}
-
 template <> struct FunctionUnaryArithmeticMonotonicity<NameAbs>
 {
     static bool has() { return true; }
diff --git a/src/Functions/divide.cpp b/src/Functions/divide.cpp
index 178dd627b01..cfc535320ed 100644
--- a/src/Functions/divide.cpp
+++ b/src/Functions/divide.cpp
@@ -9,9 +9,6 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
-namespace
-{
-
 template <typename A, typename B>
 struct DivideFloatingImpl
 {
@@ -43,8 +40,6 @@ struct DivideFloatingImpl
 struct NameDivide { static constexpr auto name = "divide"; };
 using FunctionDivide = FunctionBinaryArithmetic<DivideFloatingImpl, NameDivide>;
 
-}
-
 void registerFunctionDivide(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionDivide>();
diff --git a/src/Functions/greatest.cpp b/src/Functions/greatest.cpp
index da1a372b0b2..39021935111 100644
--- a/src/Functions/greatest.cpp
+++ b/src/Functions/greatest.cpp
@@ -6,8 +6,6 @@
 
 namespace DB
 {
-namespace
-{
 
 template <typename A, typename B>
 struct GreatestBaseImpl
@@ -60,8 +58,6 @@ using GreatestImpl = std::conditional_t<!NumberTraits::LeastGreatestSpecialCase<
 struct NameGreatest { static constexpr auto name = "greatest"; };
 using FunctionGreatest = FunctionBinaryArithmetic<GreatestImpl, NameGreatest>;
 
-}
-
 void registerFunctionGreatest(FunctionFactory & factory)
 {
     factory.registerFunction<LeastGreatestOverloadResolver<LeastGreatest::Greatest, FunctionGreatest>>(FunctionFactory::CaseInsensitive);
diff --git a/src/Functions/intDivOrZero.cpp b/src/Functions/intDivOrZero.cpp
index cae901518c0..64b6994d438 100644
--- a/src/Functions/intDivOrZero.cpp
+++ b/src/Functions/intDivOrZero.cpp
@@ -4,8 +4,6 @@
 
 namespace DB
 {
-namespace
-{
 
 template <typename A, typename B>
 struct DivideIntegralOrZeroImpl
@@ -30,8 +28,6 @@ struct DivideIntegralOrZeroImpl
 struct NameIntDivOrZero { static constexpr auto name = "intDivOrZero"; };
 using FunctionIntDivOrZero = FunctionBinaryArithmetic<DivideIntegralOrZeroImpl, NameIntDivOrZero>;
 
-}
-
 void registerFunctionIntDivOrZero(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionIntDivOrZero>();
diff --git a/src/Functions/jumpConsistentHash.cpp b/src/Functions/jumpConsistentHash.cpp
index 32535cfcd6b..a82f10cba16 100644
--- a/src/Functions/jumpConsistentHash.cpp
+++ b/src/Functions/jumpConsistentHash.cpp
@@ -8,7 +8,7 @@ namespace
 {
 
 /// Code from https://arxiv.org/pdf/1406.2294.pdf
-static inline int32_t JumpConsistentHash(uint64_t key, int32_t num_buckets)
+inline int32_t JumpConsistentHash(uint64_t key, int32_t num_buckets)
 {
     int64_t b = -1, j = 0;
     while (j < num_buckets)
diff --git a/src/Functions/least.cpp b/src/Functions/least.cpp
index 75e3e7b2a14..e84d9bd9e24 100644
--- a/src/Functions/least.cpp
+++ b/src/Functions/least.cpp
@@ -6,8 +6,6 @@
 
 namespace DB
 {
-namespace
-{
 
 template <typename A, typename B>
 struct LeastBaseImpl
@@ -59,8 +57,6 @@ using LeastImpl = std::conditional_t<!NumberTraits::LeastGreatestSpecialCase<A,
 struct NameLeast { static constexpr auto name = "least"; };
 using FunctionLeast = FunctionBinaryArithmetic<LeastImpl, NameLeast>;
 
-}
-
 void registerFunctionLeast(FunctionFactory & factory)
 {
     factory.registerFunction<LeastGreatestOverloadResolver<LeastGreatest::Least, FunctionLeast>>(FunctionFactory::CaseInsensitive);
diff --git a/src/Functions/minus.cpp b/src/Functions/minus.cpp
index fa0a0d4da30..cacde3936d9 100644
--- a/src/Functions/minus.cpp
+++ b/src/Functions/minus.cpp
@@ -4,8 +4,6 @@
 
 namespace DB
 {
-namespace
-{
 
 template <typename A, typename B>
 struct MinusImpl
@@ -48,8 +46,6 @@ struct MinusImpl
 struct NameMinus { static constexpr auto name = "minus"; };
 using FunctionMinus = FunctionBinaryArithmetic<MinusImpl, NameMinus>;
 
-}
-
 void registerFunctionMinus(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionMinus>();
diff --git a/src/Functions/multiply.cpp b/src/Functions/multiply.cpp
index dbb0ee97cde..7018da843b6 100644
--- a/src/Functions/multiply.cpp
+++ b/src/Functions/multiply.cpp
@@ -4,8 +4,6 @@
 
 namespace DB
 {
-namespace
-{
 
 template <typename A, typename B>
 struct MultiplyImpl
@@ -48,8 +46,6 @@ struct MultiplyImpl
 struct NameMultiply { static constexpr auto name = "multiply"; };
 using FunctionMultiply = FunctionBinaryArithmetic<MultiplyImpl, NameMultiply>;
 
-}
-
 void registerFunctionMultiply(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionMultiply>();
diff --git a/src/Functions/negate.cpp b/src/Functions/negate.cpp
index fff7c7355f4..39ca434ea89 100644
--- a/src/Functions/negate.cpp
+++ b/src/Functions/negate.cpp
@@ -4,8 +4,6 @@
 
 namespace DB
 {
-namespace
-{
 
 template <typename A>
 struct NegateImpl
@@ -31,8 +29,6 @@ struct NegateImpl
 struct NameNegate { static constexpr auto name = "negate"; };
 using FunctionNegate = FunctionUnaryArithmetic<NegateImpl, NameNegate, true>;
 
-}
-
 template <> struct FunctionUnaryArithmeticMonotonicity<NameNegate>
 {
     static bool has() { return true; }
diff --git a/src/Functions/plus.cpp b/src/Functions/plus.cpp
index 8aee3141759..1421bfcd4c6 100644
--- a/src/Functions/plus.cpp
+++ b/src/Functions/plus.cpp
@@ -4,8 +4,6 @@
 
 namespace DB
 {
-namespace
-{
 
 template <typename A, typename B>
 struct PlusImpl
@@ -49,8 +47,6 @@ struct PlusImpl
 struct NamePlus { static constexpr auto name = "plus"; };
 using FunctionPlus = FunctionBinaryArithmetic<PlusImpl, NamePlus>;
 
-}
-
 void registerFunctionPlus(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionPlus>();

From 0c00b992d5741f7d9750567afd3cbda92b019273 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Mon, 7 Sep 2020 16:01:49 -0700
Subject: [PATCH 108/298] StorageReplicatedMergeTree - cleanup data dir after
 Zk exceptions

It's possible for `getZooKeeper()` to timeout if  zookeeper host(s) can't
be reached. In such cases `Poco::Exception` is thrown after a connection
timeout - refer to `src/Common/ZooKeeper/ZooKeeperImpl.cpp:866` for more info.

Side effect of this is that the CreateQuery gets interrupted and it exits.
But the data Directories for the tables being created aren't cleaned up.
This unclean state will hinder table creation on any retries and will
complain that the Directory for table already exists.

To achieve a clean state on failed table creations, catch this error if
the exception is of type Poco::Exception and call `dropIfEmpty()` method,
then proceed throwing the exception. Without this, the Directory for the
tables need to be manually deleted before retrying the CreateQuery.
---
 src/Storages/StorageReplicatedMergeTree.cpp | 25 ++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 6058632d220..64eb1c8d4cd 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -216,7 +216,30 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree(
         getStorageID().getFullTableName() + " (StorageReplicatedMergeTree::mutationsFinalizingTask)", [this] { mutationsFinalizingTask(); });
 
     if (global_context.hasZooKeeper())
-        current_zookeeper = global_context.getZooKeeper();
+    {
+        /// It's possible for getZooKeeper() to timeout if  zookeeper host(s) can't
+        /// be reached. In such cases Poco::Exception is thrown after a connection
+        /// timeout - refer to src/Common/ZooKeeper/ZooKeeperImpl.cpp:866 for more info.
+        ///
+        /// Side effect of this is that the CreateQuery gets interrupted and it exits.
+        /// But the data Directories for the tables being created aren't cleaned up.
+        /// This unclean state will hinder table creation on any retries and will
+        /// complain that the Directory for table already exists.
+        ///
+        /// To acheive a clean state on failed table creations, catch this error if
+        /// the excaption is of type Poco::Exception and call dropIfEmpty() method,
+        /// then proceed throwing the exception. Without this, the Directory for the
+        /// tables need to be manually deleted before retrying the CreateQuery.
+        try
+        {
+            current_zookeeper = global_context.getZooKeeper();
+        }
+        catch (Poco::Exception & e)
+        {
+            dropIfEmpty();
+            throw e;
+        }
+    }
 
     bool skip_sanity_checks = false;
 

From 806334a642a87c09b77a2b69c765ea10ecbcaad7 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Mon, 7 Sep 2020 22:27:49 -0700
Subject: [PATCH 109/298] StorageReplicatedMergeTree - add integration tests
 that test dir cleanup

This adds a integration test that tests if table directory is cleaned
up after a ZooKeeper connection failure for ReplicatedMergeTree tables.
---
 .../__init__.py                               |  0
 .../configs/remote_servers.xml                | 14 ++++++
 .../test_replicated_zk_conn_failure/test.py   | 45 +++++++++++++++++++
 3 files changed, 59 insertions(+)
 create mode 100644 tests/integration/test_replicated_zk_conn_failure/__init__.py
 create mode 100644 tests/integration/test_replicated_zk_conn_failure/configs/remote_servers.xml
 create mode 100644 tests/integration/test_replicated_zk_conn_failure/test.py

diff --git a/tests/integration/test_replicated_zk_conn_failure/__init__.py b/tests/integration/test_replicated_zk_conn_failure/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_replicated_zk_conn_failure/configs/remote_servers.xml b/tests/integration/test_replicated_zk_conn_failure/configs/remote_servers.xml
new file mode 100644
index 00000000000..538aa72d386
--- /dev/null
+++ b/tests/integration/test_replicated_zk_conn_failure/configs/remote_servers.xml
@@ -0,0 +1,14 @@
+<yandex>
+    <remote_servers>
+        <test_cluster>
+            <shard>
+                <internal_replication>true</internal_replication>
+                <replica>
+                    <default_database>shard_0</default_database>
+                    <host>node1</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </test_cluster>
+    </remote_servers>
+</yandex>
diff --git a/tests/integration/test_replicated_zk_conn_failure/test.py b/tests/integration/test_replicated_zk_conn_failure/test.py
new file mode 100644
index 00000000000..3f106bd2981
--- /dev/null
+++ b/tests/integration/test_replicated_zk_conn_failure/test.py
@@ -0,0 +1,45 @@
+import time
+
+from helpers.cluster import ClickHouseCluster
+from helpers.network import PartitionManager
+
+
+# This tests if the data directory for a table is cleaned up if there is a Zookeeper
+# connection exception during a CreateQuery operation involving ReplicatedMergeTree tables.
+# Test flow is as follows:
+# 1. Configure cluster with ZooKeeper and create a database.
+# 2. Drop all connections to ZooKeeper.
+# 3. Try creating the table and there would be a Poco:Exception.
+# 4. Try creating the table again and there should not be any error
+# that indicates that the Directory for table already exists.
+
+
+def test_replicated_zk_conn_failure():
+    cluster = ClickHouseCluster(__file__)
+    node1 = cluster.add_instance('node1', main_configs=["configs/remote_servers.xml"], with_zookeeper=True)
+    try:
+        cluster.start()
+        node1.query("CREATE DATABASE replica;")
+        query_create = '''CREATE TABLE replica.test
+        (
+           id Int64,
+           event_time DateTime
+        )
+        Engine=ReplicatedMergeTree('/clickhouse/tables/replica/test', 'node1')
+        PARTITION BY toYYYYMMDD(event_time)
+        ORDER BY id;'''.format(replica=node1.name)
+        with PartitionManager() as pm:
+            pm.drop_instance_zk_connections(node1)
+            time.sleep(5)
+            error = node1.query_and_get_error(query_create)
+            # Assert that there was net exception.
+            assert "Poco::Exception. Code: 1000" in error
+            # Assert that the exception was due to ZooKeeper connectivity.
+            assert "All connection tries failed while connecting to ZooKeeper" in error
+            # retry table creation
+            error = node1.query_and_get_error(query_create)
+            # Should not expect any errors related to directory already existing
+            # and those should have been already cleaned up during the previous retry.
+            assert "Directory for table data data/replica/test/ already exists" not in error
+    finally:
+        cluster.shutdown()

From f4846e2ef16e75bb12ec9d79fa12f91a5155b06e Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 8 Sep 2020 09:30:54 +0300
Subject: [PATCH 110/298] Update
 src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp

Co-authored-by: Anton Popov <pad11rus@gmail.com>
---
 src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
index 5fd15547ea4..ef7ebead966 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
@@ -1079,7 +1079,7 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
             {
                 if (merger_mutator.ttl_merges_blocker.isCancelled())
                 {
-                    String reason = "Not executing log entry for part " + entry.new_part_name + " because merges with TTL is cancelled now.";
+                    String reason = "Not executing log entry for part " + entry.new_part_name + " because merges with TTL are cancelled now.";
                     LOG_DEBUG(log, reason);
                     out_postpone_reason = reason;
                     return false;

From 405a204514cab97be663b39ad39bdef235d3d658 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 8 Sep 2020 09:31:54 +0300
Subject: [PATCH 111/298] Lower timeout for TTL merges

---
 src/Storages/MergeTree/MergeTreeSettings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 80236d227ba..6632306b9de 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -85,7 +85,7 @@ struct Settings;
     M(UInt64, min_merge_bytes_to_use_direct_io, 10ULL * 1024 * 1024 * 1024, "Minimal amount of bytes to enable O_DIRECT in merge (0 - disabled).", 0) \
     M(UInt64, index_granularity_bytes, 10 * 1024 * 1024, "Approximate amount of bytes in single granule (0 - disabled).", 0) \
     M(UInt64, min_index_granularity_bytes, 1024, "Minimum amount of bytes in single granule.", 1024) \
-    M(Int64, merge_with_ttl_timeout, 3600 * 24, "Minimal time in seconds, when merge with TTL can be repeated.", 0) \
+    M(Int64, merge_with_ttl_timeout, 3600 * 4, "Minimal time in seconds, when merge with TTL can be repeated.", 0) \
     M(Bool, ttl_only_drop_parts, false, "Only drop altogether the expired parts and not partially prune them.", 0) \
     M(Bool, write_final_mark, 1, "Write final mark after end of column (0 - disabled, do nothing if index_granularity_bytes=0)", 0) \
     M(Bool, enable_mixed_granularity_parts, 1, "Enable parts with adaptive and non adaptive granularity", 0) \

From 6c9734f23c65db3b7b665df568cc049a6a7eeaea Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 8 Sep 2020 10:13:33 +0300
Subject: [PATCH 112/298] bump ci


From 30827dec70b2f86bac565eafa75941494b7bfd73 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 8 Sep 2020 11:20:41 +0300
Subject: [PATCH 113/298] Add special build config

---
 tests/ci/ci_config.json | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tests/ci/ci_config.json b/tests/ci/ci_config.json
index 02c96b085da..3253a7d5f3d 100644
--- a/tests/ci/ci_config.json
+++ b/tests/ci/ci_config.json
@@ -152,6 +152,18 @@
             "with_coverage": false
         }
     ],
+    "special_build_config": [
+        {
+            "compiler": "clang-10",
+            "build-type": "debug",
+            "sanitizer": "",
+            "package-type": "deb",
+            "bundled": "bundled",
+            "splitted": "unsplitted",
+            "tidy": "enable",
+            "with_coverage": true
+        }
+    ],
     "tests_config": {
         "Functional stateful tests (address)": {
             "required_build_properties": {

From d95614ad60d115f45803c03a9f3901f99cefb8b2 Mon Sep 17 00:00:00 2001
From: Artem Zuikov <chertus@gmail.com>
Date: Tue, 8 Sep 2020 12:16:50 +0300
Subject: [PATCH 114/298] fix for clang tidy

---
 src/Functions/lgamma.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Functions/lgamma.cpp b/src/Functions/lgamma.cpp
index c631758b2cd..51b3dfd97df 100644
--- a/src/Functions/lgamma.cpp
+++ b/src/Functions/lgamma.cpp
@@ -15,7 +15,7 @@ namespace
 {
 
 /// Use wrapper and use lgamma_r version because std::lgamma is not threadsafe.
-static Float64 lgamma_wrapper(Float64 arg)
+Float64 lgamma_wrapper(Float64 arg)
 {
     int signp;
     return lgamma_r(arg, &signp);

From 39aad9979aa81a87d3cb61698d07bc2d4464d3d2 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Tue, 8 Sep 2020 13:40:53 +0300
Subject: [PATCH 115/298] Remove Join from ExpressionActions.

---
 .../ExpressionBlockInputStream.cpp            |  10 +-
 src/DataStreams/ExpressionBlockInputStream.h  |   1 -
 src/DataStreams/FilterBlockInputStream.cpp    |   2 +-
 src/Functions/FunctionsMiscellaneous.h        |   2 +-
 src/Interpreters/ExpressionActions.cpp        | 165 +++++-------------
 src/Interpreters/ExpressionActions.h          |  44 ++---
 src/Interpreters/ExpressionAnalyzer.cpp       |  39 ++---
 src/Interpreters/ExpressionAnalyzer.h         |   6 +-
 src/Interpreters/ExpressionJIT.cpp            |   1 -
 src/Interpreters/InterpreterSelectQuery.cpp   |   8 +-
 src/Interpreters/TableJoin.cpp                |   8 +-
 src/Interpreters/TableJoin.h                  |   3 +-
 src/Processors/QueryPlan/ExpressionStep.cpp   |  46 +++--
 src/Processors/QueryPlan/ExpressionStep.h     |  17 +-
 src/Processors/QueryPlan/FilterStep.cpp       |   2 +-
 .../Transforms/ExpressionTransform.cpp        |  20 +--
 .../Transforms/ExpressionTransform.h          |   5 +-
 src/Processors/Transforms/FilterTransform.cpp |   5 +-
 ...sionTransform.cpp => JoiningTransform.cpp} |  30 ++--
 ...pressionTransform.h => JoiningTransform.h} |  14 +-
 src/Processors/ya.make                        |   2 +-
 src/Storages/ColumnsDescription.cpp           |   4 +-
 22 files changed, 164 insertions(+), 270 deletions(-)
 rename src/Processors/Transforms/{InflatingExpressionTransform.cpp => JoiningTransform.cpp} (60%)
 rename src/Processors/Transforms/{InflatingExpressionTransform.h => JoiningTransform.h} (61%)

diff --git a/src/DataStreams/ExpressionBlockInputStream.cpp b/src/DataStreams/ExpressionBlockInputStream.cpp
index 9673395a21a..4840a6263f6 100644
--- a/src/DataStreams/ExpressionBlockInputStream.cpp
+++ b/src/DataStreams/ExpressionBlockInputStream.cpp
@@ -18,7 +18,7 @@ String ExpressionBlockInputStream::getName() const { return "Expression"; }
 Block ExpressionBlockInputStream::getTotals()
 {
     totals = children.back()->getTotals();
-    expression->executeOnTotals(totals);
+    expression->execute(totals);
 
     return totals;
 }
@@ -30,14 +30,6 @@ Block ExpressionBlockInputStream::getHeader() const
 
 Block ExpressionBlockInputStream::readImpl()
 {
-    if (!initialized)
-    {
-        if (expression->resultIsAlwaysEmpty())
-            return {};
-
-        initialized = true;
-    }
-
     Block res = children.back()->read();
     if (res)
         expression->execute(res);
diff --git a/src/DataStreams/ExpressionBlockInputStream.h b/src/DataStreams/ExpressionBlockInputStream.h
index 62141a060af..fae54fbcfbf 100644
--- a/src/DataStreams/ExpressionBlockInputStream.h
+++ b/src/DataStreams/ExpressionBlockInputStream.h
@@ -25,7 +25,6 @@ public:
     Block getHeader() const override;
 
 protected:
-    bool initialized = false;
     ExpressionActionsPtr expression;
 
     Block readImpl() override;
diff --git a/src/DataStreams/FilterBlockInputStream.cpp b/src/DataStreams/FilterBlockInputStream.cpp
index b4b00083d7f..83b36c97db7 100644
--- a/src/DataStreams/FilterBlockInputStream.cpp
+++ b/src/DataStreams/FilterBlockInputStream.cpp
@@ -54,7 +54,7 @@ String FilterBlockInputStream::getName() const { return "Filter"; }
 Block FilterBlockInputStream::getTotals()
 {
     totals = children.back()->getTotals();
-    expression->executeOnTotals(totals);
+    expression->execute(totals);
 
     return totals;
 }
diff --git a/src/Functions/FunctionsMiscellaneous.h b/src/Functions/FunctionsMiscellaneous.h
index 5703f72ce2a..96a71a5225a 100644
--- a/src/Functions/FunctionsMiscellaneous.h
+++ b/src/Functions/FunctionsMiscellaneous.h
@@ -207,7 +207,7 @@ public:
     {
         /// Check that expression does not contain unusual actions that will break blocks structure.
         for (const auto & action : expression_actions->getActions())
-            if (action.type == ExpressionAction::Type::JOIN || action.type == ExpressionAction::Type::ARRAY_JOIN)
+            if (action.type == ExpressionAction::Type::ARRAY_JOIN)
                 throw Exception("Expression with arrayJoin or other unusual action cannot be captured", ErrorCodes::BAD_ARGUMENTS);
 
         std::unordered_map<std::string, DataTypePtr> arguments_map;
diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp
index 218e4bba973..e8cae635e5d 100644
--- a/src/Interpreters/ExpressionActions.cpp
+++ b/src/Interpreters/ExpressionActions.cpp
@@ -11,6 +11,7 @@
 #include <Common/typeid_cast.h>
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeNullable.h>
 #include <Functions/IFunction.h>
 #include <optional>
 #include <Columns/ColumnSet.h>
@@ -153,14 +154,6 @@ ExpressionAction ExpressionAction::arrayJoin(std::string source_name, std::strin
     return a;
 }
 
-ExpressionAction ExpressionAction::ordinaryJoin(std::shared_ptr<TableJoin> table_join, JoinPtr join)
-{
-    ExpressionAction a;
-    a.type = JOIN;
-    a.table_join = table_join;
-    a.join = join;
-    return a;
-}
 
 void ExpressionAction::prepare(Block & sample_block, const Settings & settings, NameSet & names_not_for_constant_folding)
 {
@@ -260,12 +253,6 @@ void ExpressionAction::prepare(Block & sample_block, const Settings & settings,
             break;
         }
 
-        case JOIN:
-        {
-            table_join->addJoinedColumnsAndCorrectNullability(sample_block);
-            break;
-        }
-
         case PROJECT:
         {
             Block new_block;
@@ -336,19 +323,6 @@ void ExpressionAction::prepare(Block & sample_block, const Settings & settings,
     }
 }
 
-void ExpressionAction::execute(Block & block, ExtraBlockPtr & not_processed) const
-{
-    switch (type)
-    {
-        case JOIN:
-            join->joinBlock(block, not_processed);
-            break;
-
-        default:
-            throw Exception("Unexpected expression call", ErrorCodes::LOGICAL_ERROR);
-    }
-}
-
 void ExpressionAction::execute(Block & block, bool dry_run) const
 {
     size_t input_rows_count = block.rows();
@@ -402,9 +376,6 @@ void ExpressionAction::execute(Block & block, bool dry_run) const
             break;
         }
 
-        case JOIN:
-            throw Exception("Unexpected JOIN expression call", ErrorCodes::LOGICAL_ERROR);
-
         case PROJECT:
         {
             Block new_block;
@@ -463,14 +434,6 @@ void ExpressionAction::execute(Block & block, bool dry_run) const
     }
 }
 
-void ExpressionAction::executeOnTotals(Block & block) const
-{
-    if (type != JOIN)
-        execute(block, false);
-    else
-        join->joinTotals(block);
-}
-
 
 std::string ExpressionAction::toString() const
 {
@@ -510,17 +473,6 @@ std::string ExpressionAction::toString() const
             ss << "ARRAY JOIN " << source_name << " -> " << result_name;
             break;
 
-        case JOIN:
-            ss << "JOIN ";
-            for (NamesAndTypesList::const_iterator it = table_join->columnsAddedByJoin().begin();
-                 it != table_join->columnsAddedByJoin().end(); ++it)
-            {
-                if (it != table_join->columnsAddedByJoin().begin())
-                    ss << ", ";
-                ss << it->name;
-            }
-            break;
-
         case PROJECT: [[fallthrough]];
         case ADD_ALIASES:
             ss << (type == PROJECT ? "PROJECT " : "ADD_ALIASES ");
@@ -660,53 +612,15 @@ void ExpressionActions::execute(Block & block, bool dry_run) const
     }
 }
 
-void ExpressionActions::execute(Block & block, ExtraBlockPtr & not_processed) const
-{
-    if (actions.size() != 1)
-        throw Exception("Continuation over multiple expressions is not supported", ErrorCodes::LOGICAL_ERROR);
-
-    actions[0].execute(block, not_processed);
-    checkLimits(block);
-}
-
-bool ExpressionActions::hasJoinOrArrayJoin() const
+bool ExpressionActions::hasArrayJoin() const
 {
     for (const auto & action : actions)
-        if (action.type == ExpressionAction::JOIN || action.type == ExpressionAction::ARRAY_JOIN)
+        if (action.type == ExpressionAction::ARRAY_JOIN)
             return true;
 
     return false;
 }
 
-bool ExpressionActions::hasTotalsInJoin() const
-{
-    for (const auto & action : actions)
-        if (action.table_join && action.join->hasTotals())
-            return true;
-    return false;
-}
-
-void ExpressionActions::executeOnTotals(Block & block) const
-{
-    /// If there is `totals` in the subquery for JOIN, but we do not have totals, then take the block with the default values instead of `totals`.
-    if (!block)
-    {
-        if (hasTotalsInJoin())
-        {
-            for (const auto & name_and_type : input_columns)
-            {
-                auto column = name_and_type.type->createColumn();
-                column->insertDefault();
-                block.insert(ColumnWithTypeAndName(std::move(column), name_and_type.type, name_and_type.name));
-            }
-        }
-        else
-            return; /// There's nothing to JOIN.
-    }
-
-    for (const auto & action : actions)
-        action.executeOnTotals(block);
-}
 
 std::string ExpressionActions::getSmallestColumn(const NamesAndTypesList & columns)
 {
@@ -1190,28 +1104,6 @@ ExpressionActionsPtr ExpressionActions::splitActionsBeforeArrayJoin(const NameSe
     return split_actions;
 }
 
-JoinPtr ExpressionActions::getTableJoinAlgo() const
-{
-    for (const auto & action : actions)
-        if (action.join)
-            return action.join;
-    return {};
-}
-
-
-bool ExpressionActions::resultIsAlwaysEmpty() const
-{
-    /// Check that has join which returns empty result.
-
-    for (const auto & action : actions)
-    {
-        if (action.type == action.JOIN && action.join && action.join->alwaysReturnsEmptySet())
-            return true;
-    }
-
-    return false;
-}
-
 
 bool ExpressionActions::checkColumnIsAlwaysFalse(const String & column_name) const
 {
@@ -1293,10 +1185,6 @@ UInt128 ExpressionAction::ActionHash::operator()(const ExpressionAction & action
             hash.update(action.result_name);
             hash.update(action.source_name);
             break;
-        case JOIN:
-            for (const auto & col : action.table_join->columnsAddedByJoin())
-                hash.update(col.name);
-            break;
         case PROJECT:
             for (const auto & pair_of_strs : action.projection)
             {
@@ -1422,8 +1310,8 @@ std::string ExpressionActionsChain::dumpChain() const
     return ss.str();
 }
 
-ExpressionActionsChain::ArrayJoinStep::ArrayJoinStep(ArrayJoinActionPtr array_join_, ColumnsWithTypeAndName required_columns_, Names required_output_)
-    : Step(std::move(required_output_))
+ExpressionActionsChain::ArrayJoinStep::ArrayJoinStep(ArrayJoinActionPtr array_join_, ColumnsWithTypeAndName required_columns_)
+    : Step({})
     , array_join(std::move(array_join_))
     , result_columns(std::move(required_columns_))
 {
@@ -1462,6 +1350,49 @@ void ExpressionActionsChain::ArrayJoinStep::finalize(const Names & required_outp
     std::swap(result_columns, new_result_columns);
 }
 
+ExpressionActionsChain::JoinStep::JoinStep(
+    std::shared_ptr<TableJoin> analyzed_join_,
+    JoinPtr join_,
+    ColumnsWithTypeAndName required_columns_)
+    : Step({})
+    , analyzed_join(std::move(analyzed_join_))
+    , join(std::move(join_))
+    , result_columns(std::move(required_columns_))
+{
+    analyzed_join->addJoinedColumnsAndCorrectNullability(result_columns);
+}
+
+void ExpressionActionsChain::JoinStep::finalize(const Names & required_output_)
+{
+    /// We need to update required and result columns by removing unused ones.
+    NamesAndTypesList new_required_columns;
+    ColumnsWithTypeAndName new_result_columns;
+
+    /// That's an input columns we need.
+    NameSet required_names(required_output_.begin(), required_output_.end());
+    for (const auto & name : analyzed_join->keyNamesLeft())
+        required_names.emplace(name);
+
+    for (const auto & column : required_columns)
+    {
+        if (required_names.count(column.name) != 0)
+            new_required_columns.emplace_back(column);
+    }
+
+    /// Result will also contain joined columns.
+    for (const auto & column : analyzed_join->columnsAddedByJoin())
+        required_names.emplace(column.name);
+
+    for (const auto & column : result_columns)
+    {
+        if (required_names.count(column.name) != 0)
+            new_result_columns.emplace_back(column);
+    }
+
+    std::swap(required_columns, new_required_columns);
+    std::swap(result_columns, new_result_columns);
+}
+
 ExpressionActionsPtr & ExpressionActionsChain::Step::actions()
 {
     return typeid_cast<ExpressionActionsStep *>(this)->actions;
diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h
index 17715fc216e..0607bc1e055 100644
--- a/src/Interpreters/ExpressionActions.h
+++ b/src/Interpreters/ExpressionActions.h
@@ -69,8 +69,6 @@ public:
         /// Source column is removed from block.
         ARRAY_JOIN,
 
-        JOIN,
-
         /// Reorder and rename the columns, delete the extra ones. The same column names are allowed in the result.
         PROJECT,
         /// Add columns with alias names. This columns are the same as non-aliased. PROJECT columns if you need to modify them.
@@ -119,7 +117,6 @@ public:
     static ExpressionAction project(const Names & projected_columns_);
     static ExpressionAction addAliases(const NamesWithAliases & aliased_columns_);
     static ExpressionAction arrayJoin(std::string source_name, std::string result_name);
-    static ExpressionAction ordinaryJoin(std::shared_ptr<TableJoin> table_join, JoinPtr join);
 
     /// Which columns necessary to perform this action.
     Names getNeededColumns() const;
@@ -137,10 +134,6 @@ private:
     friend class ExpressionActions;
 
     void prepare(Block & sample_block, const Settings & settings, NameSet & names_not_for_constant_folding);
-    void executeOnTotals(Block & block) const;
-
-    /// Executes action on block (modify it). Block could be split in case of JOIN. Then not_processed block is created.
-    void execute(Block & block, ExtraBlockPtr & not_processed) const;
     void execute(Block & block, bool dry_run) const;
 };
 
@@ -205,18 +198,7 @@ public:
     /// Execute the expression on the block. The block must contain all the columns returned by getRequiredColumns.
     void execute(Block & block, bool dry_run = false) const;
 
-    /// Execute the expression on the block with continuation. This method in only supported for single JOIN.
-    void execute(Block & block, ExtraBlockPtr & not_processed) const;
-
-    bool hasJoinOrArrayJoin() const;
-
-    /// Check if joined subquery has totals.
-    bool hasTotalsInJoin() const;
-
-    /** Execute the expression on the block of total values.
-      * Almost the same as `execute`. The difference is only when JOIN is executed.
-      */
-    void executeOnTotals(Block & block) const;
+    bool hasArrayJoin() const;
 
     /// Obtain a sample block that contains the names and types of result columns.
     const Block & getSampleBlock() const { return sample_block; }
@@ -225,14 +207,8 @@ public:
 
     static std::string getSmallestColumn(const NamesAndTypesList & columns);
 
-    JoinPtr getTableJoinAlgo() const;
-
     const Settings & getSettings() const { return settings; }
 
-    /// Check if result block has no rows. True if it's definite, false if we can't say for sure.
-    /// Call it only after subqueries for join were executed.
-    bool resultIsAlwaysEmpty() const;
-
     /// Check if column is always zero. True if it's definite, false if we can't say for sure.
     /// Call it only after subqueries for sets were executed.
     bool checkColumnIsAlwaysFalse(const String & column_name) const;
@@ -357,7 +333,7 @@ struct ExpressionActionsChain
         NamesAndTypesList required_columns;
         ColumnsWithTypeAndName result_columns;
 
-        ArrayJoinStep(ArrayJoinActionPtr array_join_, ColumnsWithTypeAndName required_columns_, Names required_output_);
+        ArrayJoinStep(ArrayJoinActionPtr array_join_, ColumnsWithTypeAndName required_columns_);
 
         const NamesAndTypesList & getRequiredColumns() const override { return required_columns; }
         const ColumnsWithTypeAndName & getResultColumns() const override { return result_columns; }
@@ -366,6 +342,22 @@ struct ExpressionActionsChain
         std::string dump() const override { return "ARRAY JOIN"; }
     };
 
+    struct JoinStep : public Step
+    {
+        std::shared_ptr<TableJoin> analyzed_join;
+        JoinPtr join;
+
+        NamesAndTypesList required_columns;
+        ColumnsWithTypeAndName result_columns;
+
+        JoinStep(std::shared_ptr<TableJoin> analyzed_join_, JoinPtr join_, ColumnsWithTypeAndName required_columns_);
+        const NamesAndTypesList & getRequiredColumns() const override { return required_columns; }
+        const ColumnsWithTypeAndName & getResultColumns() const override { return result_columns; }
+        void finalize(const Names & required_output_) override;
+        void prependProjectInput() const override {} /// TODO: remove unused columns before JOIN ?
+        std::string dump() const override { return "JOIN"; }
+    };
+
     using StepPtr = std::unique_ptr<Step>;
     using Steps = std::vector<StepPtr>;
 
diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index 62c33a56ca8..14a50c2cfc6 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -182,7 +182,9 @@ void ExpressionAnalyzer::analyzeAggregation()
         if (join)
         {
             getRootActionsNoMakeSet(analyzedJoin().leftKeysList(), true, temp_actions, false);
-            addJoinAction(temp_actions);
+            auto sample_columns = temp_actions->getSampleBlock().getColumnsWithTypeAndName();
+            analyzedJoin().addJoinedColumnsAndCorrectNullability(sample_columns);
+            temp_actions = std::make_shared<ExpressionActions>(sample_columns, context);
         }
 
         columns_after_join = columns_after_array_join;
@@ -474,19 +476,13 @@ ArrayJoinActionPtr SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActi
     auto array_join = addMultipleArrayJoinAction(step.actions(), is_array_join_left);
 
     chain.steps.push_back(std::make_unique<ExpressionActionsChain::ArrayJoinStep>(
-            array_join, step.getResultColumns(),
-            Names())); /// Required output is empty because all array joined columns are kept by step.
+            array_join, step.getResultColumns()));
 
     chain.addStep();
 
     return array_join;
 }
 
-void ExpressionAnalyzer::addJoinAction(ExpressionActionsPtr & actions, JoinPtr join) const
-{
-    actions->add(ExpressionAction::ordinaryJoin(syntax->analyzed_join, join));
-}
-
 bool SelectQueryExpressionAnalyzer::appendJoinLeftKeys(ExpressionActionsChain & chain, bool only_types)
 {
     ExpressionActionsChain::Step & step = chain.lastStep(columns_after_array_join);
@@ -495,14 +491,17 @@ bool SelectQueryExpressionAnalyzer::appendJoinLeftKeys(ExpressionActionsChain &
     return true;
 }
 
-bool SelectQueryExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain)
+JoinPtr SelectQueryExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain)
 {
     JoinPtr table_join = makeTableJoin(*syntax->ast_join);
 
     ExpressionActionsChain::Step & step = chain.lastStep(columns_after_array_join);
 
-    addJoinAction(step.actions(), table_join);
-    return true;
+    chain.steps.push_back(std::make_unique<ExpressionActionsChain::JoinStep>(
+            syntax->analyzed_join, table_join, step.getResultColumns()));
+
+    chain.addStep();
+    return table_join;
 }
 
 static JoinPtr tryGetStorageJoin(std::shared_ptr<TableJoin> analyzed_join)
@@ -1091,15 +1090,8 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
         {
             query_analyzer.appendJoinLeftKeys(chain, only_types || !first_stage);
 
-            before_join = chain.getLastActions(true);
-            if (before_join)
-                chain.addStep();
-
-            query_analyzer.appendJoin(chain);
-
-            join = chain.getLastActions();
-            if (!join)
-                throw Exception("No expected JOIN", ErrorCodes::LOGICAL_ERROR);
+            before_join = chain.getLastActions();
+            join = query_analyzer.appendJoin(chain);
             chain.addStep();
         }
 
@@ -1150,9 +1142,8 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
         if (hasJoin())
         {
             /// You may find it strange but we support read_in_order for HashJoin and do not support for MergeJoin.
-            auto join_algo = join->getTableJoinAlgo();
             bool has_delayed_stream = query_analyzer.analyzedJoin().needStreamWithNonJoinedRows();
-            join_allow_read_in_order = typeid_cast<HashJoin *>(join_algo.get()) && !has_delayed_stream;
+            join_allow_read_in_order = typeid_cast<HashJoin *>(join.get()) && !has_delayed_stream;
         }
 
         optimize_read_in_order =
@@ -1242,8 +1233,8 @@ void ExpressionAnalysisResult::checkActions() const
         {
             if (actions)
                 for (const auto & action : actions->getActions())
-                    if (action.type == ExpressionAction::Type::JOIN || action.type == ExpressionAction::Type::ARRAY_JOIN)
-                        throw Exception("PREWHERE cannot contain ARRAY JOIN or JOIN action", ErrorCodes::ILLEGAL_PREWHERE);
+                    if (action.type == ExpressionAction::Type::ARRAY_JOIN)
+                        throw Exception("PREWHERE cannot contain ARRAY JOIN action", ErrorCodes::ILLEGAL_PREWHERE);
         };
 
         check_actions(prewhere_info->prewhere_actions);
diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h
index 5f2c1dc3a85..f47570404fe 100644
--- a/src/Interpreters/ExpressionAnalyzer.h
+++ b/src/Interpreters/ExpressionAnalyzer.h
@@ -139,8 +139,6 @@ protected:
 
     ArrayJoinActionPtr addMultipleArrayJoinAction(ExpressionActionsPtr & actions, bool is_left) const;
 
-    void addJoinAction(ExpressionActionsPtr & actions, JoinPtr = {}) const;
-
     void getRootActions(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts = false);
 
     /** Similar to getRootActions but do not make sets when analyzing IN functions. It's used in
@@ -182,7 +180,7 @@ struct ExpressionAnalysisResult
     ExpressionActionsPtr before_array_join;
     ArrayJoinActionPtr array_join;
     ExpressionActionsPtr before_join;
-    ExpressionActionsPtr join;
+    JoinPtr join;
     ExpressionActionsPtr before_where;
     ExpressionActionsPtr before_aggregation;
     ExpressionActionsPtr before_having;
@@ -313,7 +311,7 @@ private:
     /// Before aggregation:
     ArrayJoinActionPtr appendArrayJoin(ExpressionActionsChain & chain, ExpressionActionsPtr & before_array_join, bool only_types);
     bool appendJoinLeftKeys(ExpressionActionsChain & chain, bool only_types);
-    bool appendJoin(ExpressionActionsChain & chain);
+    JoinPtr appendJoin(ExpressionActionsChain & chain);
     /// Add preliminary rows filtration. Actions are created in other expression analyzer to prevent any possible alias injection.
     void appendPreliminaryFilter(ExpressionActionsChain & chain, ExpressionActionsPtr actions, String column_name);
     /// remove_filter is set in ExpressionActionsChain::finalize();
diff --git a/src/Interpreters/ExpressionJIT.cpp b/src/Interpreters/ExpressionJIT.cpp
index dbbad2e8344..6507721b32a 100644
--- a/src/Interpreters/ExpressionJIT.cpp
+++ b/src/Interpreters/ExpressionJIT.cpp
@@ -599,7 +599,6 @@ static std::vector<std::unordered_set<std::optional<size_t>>> getActionsDependen
             case ExpressionAction::ADD_COLUMN:
             case ExpressionAction::COPY_COLUMN:
             case ExpressionAction::ARRAY_JOIN:
-            case ExpressionAction::JOIN:
             {
                 Names columns = actions[i].getNeededColumns();
                 for (const auto & column : columns)
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index c10716aac32..1979f311387 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -32,7 +32,7 @@
 #include <Processors/Pipe.h>
 #include <Processors/Sources/SourceFromInputStream.h>
 #include <Processors/Transforms/ExpressionTransform.h>
-#include <Processors/Transforms/InflatingExpressionTransform.h>
+#include <Processors/Transforms/JoiningTransform.h>
 #include <Processors/Transforms/AggregatingTransform.h>
 #include <Processors/QueryPlan/ArrayJoinStep.h>
 #include <Processors/QueryPlan/ReadFromStorageStep.h>
@@ -915,12 +915,12 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
             if (expressions.hasJoin())
             {
                 Block join_result_sample;
-                JoinPtr join = expressions.join->getTableJoinAlgo();
+                JoinPtr join = expressions.join;
 
-                join_result_sample = InflatingExpressionTransform::transformHeader(
+                join_result_sample = JoiningTransform::transformHeader(
                     query_plan.getCurrentDataStream().header, expressions.join);
 
-                QueryPlanStepPtr join_step = std::make_unique<InflatingExpressionStep>(
+                QueryPlanStepPtr join_step = std::make_unique<JoinStep>(
                     query_plan.getCurrentDataStream(),
                     expressions.join);
 
diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp
index 01b75b95d19..5f4813d433c 100644
--- a/src/Interpreters/TableJoin.cpp
+++ b/src/Interpreters/TableJoin.cpp
@@ -4,6 +4,8 @@
 
 #include <Core/Settings.h>
 #include <Core/Block.h>
+#include <Core/ColumnWithTypeAndName.h>
+#include <Core/ColumnsWithTypeAndName.h>
 
 #include <Common/StringUtils/StringUtils.h>
 
@@ -228,9 +230,9 @@ void TableJoin::addJoinedColumn(const NameAndTypePair & joined_column)
         columns_added_by_join.push_back(joined_column);
 }
 
-void TableJoin::addJoinedColumnsAndCorrectNullability(Block & sample_block) const
+void TableJoin::addJoinedColumnsAndCorrectNullability(ColumnsWithTypeAndName & columns) const
 {
-    for (auto & col : sample_block)
+    for (auto & col : columns)
     {
         /// Materialize column.
         /// Column is not empty if it is constant, but after Join all constants will be materialized.
@@ -249,7 +251,7 @@ void TableJoin::addJoinedColumnsAndCorrectNullability(Block & sample_block) cons
         if (rightBecomeNullable(res_type))
             res_type = makeNullable(res_type);
 
-        sample_block.insert(ColumnWithTypeAndName(nullptr, res_type, col.name));
+        columns.emplace_back(nullptr, res_type, col.name);
     }
 }
 
diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h
index cad4c979004..351a10557bb 100644
--- a/src/Interpreters/TableJoin.h
+++ b/src/Interpreters/TableJoin.h
@@ -21,6 +21,7 @@ class ASTSelectQuery;
 struct DatabaseAndTableWithAlias;
 class Block;
 class DictionaryReader;
+class ColumnsWithTypeAndName;
 
 struct Settings;
 
@@ -133,7 +134,7 @@ public:
     bool leftBecomeNullable(const DataTypePtr & column_type) const;
     bool rightBecomeNullable(const DataTypePtr & column_type) const;
     void addJoinedColumn(const NameAndTypePair & joined_column);
-    void addJoinedColumnsAndCorrectNullability(Block & sample_block) const;
+    void addJoinedColumnsAndCorrectNullability(ColumnsWithTypeAndName & columns) const;
 
     void setAsofInequality(ASOF::Inequality inequality) { asof_inequality = inequality; }
     ASOF::Inequality getAsofInequality() { return asof_inequality; }
diff --git a/src/Processors/QueryPlan/ExpressionStep.cpp b/src/Processors/QueryPlan/ExpressionStep.cpp
index c42bbc5b966..61e4f2ffebc 100644
--- a/src/Processors/QueryPlan/ExpressionStep.cpp
+++ b/src/Processors/QueryPlan/ExpressionStep.cpp
@@ -2,7 +2,7 @@
 #include <Processors/Transforms/ExpressionTransform.h>
 #include <Processors/QueryPipeline.h>
 #include <Processors/Transforms/ConvertingTransform.h>
-#include <Processors/Transforms/InflatingExpressionTransform.h>
+#include <Processors/Transforms/JoiningTransform.h>
 #include <Interpreters/ExpressionActions.h>
 #include <IO/Operators.h>
 
@@ -14,13 +14,29 @@ static ITransformingStep::Traits getTraits(const ExpressionActionsPtr & expressi
     return ITransformingStep::Traits
     {
         {
-            .preserves_distinct_columns = !expression->hasJoinOrArrayJoin(),
+            .preserves_distinct_columns = !expression->hasArrayJoin(),
             .returns_single_stream = false,
             .preserves_number_of_streams = true,
-            .preserves_sorting = !expression->hasJoinOrArrayJoin(),
+            .preserves_sorting = !expression->hasArrayJoin(),
         },
         {
-            .preserves_number_of_rows = !expression->hasJoinOrArrayJoin(),
+            .preserves_number_of_rows = !expression->hasArrayJoin(),
+        }
+    };
+}
+
+static ITransformingStep::Traits getJoinTraits()
+{
+    return ITransformingStep::Traits
+    {
+        {
+            .preserves_distinct_columns = false,
+            .returns_single_stream = false,
+            .preserves_number_of_streams = true,
+            .preserves_sorting = false,
+        },
+        {
+            .preserves_number_of_rows = false,
         }
     };
 }
@@ -51,10 +67,9 @@ void ExpressionStep::updateInputStream(DataStream input_stream, bool keep_header
 
 void ExpressionStep::transformPipeline(QueryPipeline & pipeline)
 {
-    pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type)
+    pipeline.addSimpleTransform([&](const Block & header)
     {
-        bool on_totals = stream_type == QueryPipeline::StreamType::Totals;
-        return std::make_shared<Transform>(header, expression, on_totals);
+        return std::make_shared<Transform>(header, expression);
     });
 
     if (!blocksHaveEqualStructure(pipeline.getHeader(), output_stream->header))
@@ -86,17 +101,17 @@ void ExpressionStep::describeActions(FormatSettings & settings) const
     doDescribeActions(expression, settings);
 }
 
-InflatingExpressionStep::InflatingExpressionStep(const DataStream & input_stream_, ExpressionActionsPtr expression_)
+JoinStep::JoinStep(const DataStream & input_stream_, JoinPtr join_)
     : ITransformingStep(
         input_stream_,
-        Transform::transformHeader(input_stream_.header, expression_),
-        getTraits(expression_))
-    , expression(std::move(expression_))
+        Transform::transformHeader(input_stream_.header, join_),
+        getJoinTraits())
+    , join(std::move(join_))
 {
     updateDistinctColumns(output_stream->header, output_stream->distinct_columns);
 }
 
-void InflatingExpressionStep::transformPipeline(QueryPipeline & pipeline)
+void JoinStep::transformPipeline(QueryPipeline & pipeline)
 {
     /// In case joined subquery has totals, and we don't, add default chunk to totals.
     bool add_default_totals = false;
@@ -109,13 +124,8 @@ void InflatingExpressionStep::transformPipeline(QueryPipeline & pipeline)
     pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type)
     {
         bool on_totals = stream_type == QueryPipeline::StreamType::Totals;
-        return std::make_shared<Transform>(header, expression, on_totals, add_default_totals);
+        return std::make_shared<Transform>(header, join, on_totals, add_default_totals);
     });
 }
 
-void InflatingExpressionStep::describeActions(FormatSettings & settings) const
-{
-    doDescribeActions(expression, settings);
-}
-
 }
diff --git a/src/Processors/QueryPlan/ExpressionStep.h b/src/Processors/QueryPlan/ExpressionStep.h
index 6a5ea4b68f0..45aaa010121 100644
--- a/src/Processors/QueryPlan/ExpressionStep.h
+++ b/src/Processors/QueryPlan/ExpressionStep.h
@@ -7,8 +7,11 @@ namespace DB
 class ExpressionActions;
 using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
 
+class IJoin;
+using JoinPtr = std::shared_ptr<IJoin>;
+
 class ExpressionTransform;
-class InflatingExpressionTransform;
+class JoiningTransform;
 
 /// Calculates specified expression. See ExpressionTransform.
 class ExpressionStep : public ITransformingStep
@@ -32,20 +35,18 @@ private:
 };
 
 /// TODO: add separate step for join.
-class InflatingExpressionStep : public ITransformingStep
+class JoinStep : public ITransformingStep
 {
 public:
-    using Transform = InflatingExpressionTransform;
+    using Transform = JoiningTransform;
 
-    explicit InflatingExpressionStep(const DataStream & input_stream_, ExpressionActionsPtr expression_);
-    String getName() const override { return "InflatingExpression"; }
+    explicit JoinStep(const DataStream & input_stream_, JoinPtr join_);
+    String getName() const override { return "Join"; }
 
     void transformPipeline(QueryPipeline & pipeline) override;
 
-    void describeActions(FormatSettings & settings) const override;
-
 private:
-    ExpressionActionsPtr expression;
+    JoinPtr join;
 };
 
 }
diff --git a/src/Processors/QueryPlan/FilterStep.cpp b/src/Processors/QueryPlan/FilterStep.cpp
index 8fe82ae6a24..2bbbc0373da 100644
--- a/src/Processors/QueryPlan/FilterStep.cpp
+++ b/src/Processors/QueryPlan/FilterStep.cpp
@@ -13,7 +13,7 @@ static ITransformingStep::Traits getTraits(const ExpressionActionsPtr & expressi
     return ITransformingStep::Traits
     {
         {
-            .preserves_distinct_columns = !expression->hasJoinOrArrayJoin(), /// I suppose it actually never happens
+            .preserves_distinct_columns = !expression->hasArrayJoin(), /// I suppose it actually never happens
             .returns_single_stream = false,
             .preserves_number_of_streams = true,
             .preserves_sorting = true,
diff --git a/src/Processors/Transforms/ExpressionTransform.cpp b/src/Processors/Transforms/ExpressionTransform.cpp
index 9f7970d3272..5520cd2b6d8 100644
--- a/src/Processors/Transforms/ExpressionTransform.cpp
+++ b/src/Processors/Transforms/ExpressionTransform.cpp
@@ -12,33 +12,17 @@ Block ExpressionTransform::transformHeader(Block header, const ExpressionActions
 }
 
 
-ExpressionTransform::ExpressionTransform(const Block & header_, ExpressionActionsPtr expression_, bool on_totals_)
+ExpressionTransform::ExpressionTransform(const Block & header_, ExpressionActionsPtr expression_)
     : ISimpleTransform(header_, transformHeader(header_, expression_), on_totals_)
     , expression(std::move(expression_))
-    , on_totals(on_totals_)
 {
 }
 
 void ExpressionTransform::transform(Chunk & chunk)
 {
-    if (!initialized)
-    {
-        initialized = true;
-
-        if (expression->resultIsAlwaysEmpty() && !on_totals)
-        {
-            stopReading();
-            chunk.clear();
-            return;
-        }
-    }
-
     auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns());
 
-    if (on_totals)
-        expression->executeOnTotals(block);
-    else
-        expression->execute(block);
+    expression->execute(block);
 
     auto num_rows = block.rows();
     chunk.setColumns(block.getColumns(), num_rows);
diff --git a/src/Processors/Transforms/ExpressionTransform.h b/src/Processors/Transforms/ExpressionTransform.h
index 0c6b9870731..525273bad81 100644
--- a/src/Processors/Transforms/ExpressionTransform.h
+++ b/src/Processors/Transforms/ExpressionTransform.h
@@ -17,8 +17,7 @@ class ExpressionTransform : public ISimpleTransform
 public:
     ExpressionTransform(
             const Block & header_,
-            ExpressionActionsPtr expression_,
-            bool on_totals_ = false);
+            ExpressionActionsPtr expression_);
 
     String getName() const override { return "ExpressionTransform"; }
 
@@ -29,8 +28,6 @@ protected:
 
 private:
     ExpressionActionsPtr expression;
-    bool on_totals;
-    bool initialized = false;
 };
 
 }
diff --git a/src/Processors/Transforms/FilterTransform.cpp b/src/Processors/Transforms/FilterTransform.cpp
index aaa44260234..197e0ac7595 100644
--- a/src/Processors/Transforms/FilterTransform.cpp
+++ b/src/Processors/Transforms/FilterTransform.cpp
@@ -103,10 +103,7 @@ void FilterTransform::transform(Chunk & chunk)
         Block block = getInputPort().getHeader().cloneWithColumns(columns);
         columns.clear();
 
-        if (on_totals)
-            expression->executeOnTotals(block);
-        else
-            expression->execute(block);
+        expression->execute(block);
 
         num_rows_before_filtration = block.rows();
         columns = block.getColumns();
diff --git a/src/Processors/Transforms/InflatingExpressionTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp
similarity index 60%
rename from src/Processors/Transforms/InflatingExpressionTransform.cpp
rename to src/Processors/Transforms/JoiningTransform.cpp
index 7e7a029eed9..26630f80b17 100644
--- a/src/Processors/Transforms/InflatingExpressionTransform.cpp
+++ b/src/Processors/Transforms/JoiningTransform.cpp
@@ -1,32 +1,32 @@
-#include <Processors/Transforms/InflatingExpressionTransform.h>
+#include <Processors/Transforms/JoiningTransform.h>
 #include <Interpreters/ExpressionAnalyzer.h>
 #include <Interpreters/ExpressionActions.h>
 
 namespace DB
 {
 
-Block InflatingExpressionTransform::transformHeader(Block header, const ExpressionActionsPtr & expression)
+Block JoiningTransform::transformHeader(Block header, const JoinPtr & join)
 {
     ExtraBlockPtr tmp;
-    expression->execute(header, tmp);
+    join->joinBlock(header, tmp);
     return header;
 }
 
-InflatingExpressionTransform::InflatingExpressionTransform(Block input_header, ExpressionActionsPtr expression_,
-                                                           bool on_totals_, bool default_totals_)
-    : ISimpleTransform(input_header, transformHeader(input_header, expression_), on_totals_)
-    , expression(std::move(expression_))
+JoiningTransform::JoiningTransform(Block input_header, JoinPtr join_,
+                                   bool on_totals_, bool default_totals_)
+    : ISimpleTransform(input_header, transformHeader(input_header, join_), on_totals_)
+    , join(std::move(join_))
     , on_totals(on_totals_)
     , default_totals(default_totals_)
 {}
 
-void InflatingExpressionTransform::transform(Chunk & chunk)
+void JoiningTransform::transform(Chunk & chunk)
 {
     if (!initialized)
     {
         initialized = true;
 
-        if (expression->resultIsAlwaysEmpty() && !on_totals)
+        if (join->alwaysReturnsEmptySet() && !on_totals)
         {
             stopReading();
             chunk.clear();
@@ -42,10 +42,10 @@ void InflatingExpressionTransform::transform(Chunk & chunk)
 
         /// Drop totals if both out stream and joined stream doesn't have ones.
         /// See comment in ExpressionTransform.h
-        if (default_totals && !expression->hasTotalsInJoin())
+        if (default_totals && !join->hasTotals())
             return;
 
-        expression->executeOnTotals(block);
+        join->joinTotals(block);
     }
     else
         block = readExecute(chunk);
@@ -54,7 +54,7 @@ void InflatingExpressionTransform::transform(Chunk & chunk)
     chunk.setColumns(block.getColumns(), num_rows);
 }
 
-Block InflatingExpressionTransform::readExecute(Chunk & chunk)
+Block JoiningTransform::readExecute(Chunk & chunk)
 {
     Block res;
 
@@ -64,7 +64,7 @@ Block InflatingExpressionTransform::readExecute(Chunk & chunk)
             res = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns());
 
         if (res)
-            expression->execute(res, not_processed);
+            join->joinBlock(res, not_processed);
     }
     else if (not_processed->empty()) /// There's not processed data inside expression.
     {
@@ -72,12 +72,12 @@ Block InflatingExpressionTransform::readExecute(Chunk & chunk)
             res = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns());
 
         not_processed.reset();
-        expression->execute(res, not_processed);
+        join->joinBlock(res, not_processed);
     }
     else
     {
         res = std::move(not_processed->block);
-        expression->execute(res, not_processed);
+        join->joinBlock(res, not_processed);
     }
     return res;
 }
diff --git a/src/Processors/Transforms/InflatingExpressionTransform.h b/src/Processors/Transforms/JoiningTransform.h
similarity index 61%
rename from src/Processors/Transforms/InflatingExpressionTransform.h
rename to src/Processors/Transforms/JoiningTransform.h
index aa638d27c9f..c00ac5b83dd 100644
--- a/src/Processors/Transforms/InflatingExpressionTransform.h
+++ b/src/Processors/Transforms/JoiningTransform.h
@@ -5,25 +5,25 @@
 namespace DB
 {
 
-class ExpressionActions;
-using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
+class IJoin;
+using JoinPtr = std::shared_ptr<IJoin>;
 
-class InflatingExpressionTransform : public ISimpleTransform
+class JoiningTransform : public ISimpleTransform
 {
 public:
-    InflatingExpressionTransform(Block input_header, ExpressionActionsPtr expression_,
-                                 bool on_totals_ = false, bool default_totals_ = false);
+    JoiningTransform(Block input_header, JoinPtr join_,
+                     bool on_totals_ = false, bool default_totals_ = false);
 
     String getName() const override { return "InflatingExpressionTransform"; }
 
-    static Block transformHeader(Block header, const ExpressionActionsPtr & expression);
+    static Block transformHeader(Block header, const JoinPtr & join);
 
 protected:
     void transform(Chunk & chunk) override;
     bool needInputData() const override { return !not_processed; }
 
 private:
-    ExpressionActionsPtr expression;
+    JoinPtr join;
     bool on_totals;
     /// This flag means that we have manually added totals to our pipeline.
     /// It may happen in case if joined subquery has totals, but out string doesn't.
diff --git a/src/Processors/ya.make b/src/Processors/ya.make
index 30de38fedbd..bb691f80922 100644
--- a/src/Processors/ya.make
+++ b/src/Processors/ya.make
@@ -135,7 +135,7 @@ SRCS(
     Transforms/FillingTransform.cpp
     Transforms/FilterTransform.cpp
     Transforms/FinishSortingTransform.cpp
-    Transforms/InflatingExpressionTransform.cpp
+    Transforms/JoiningTransform.cpp
     Transforms/LimitByTransform.cpp
     Transforms/LimitsCheckingTransform.cpp
     Transforms/MaterializingTransform.cpp
diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp
index 6a940bd034a..6e4bc4dc80c 100644
--- a/src/Storages/ColumnsDescription.cpp
+++ b/src/Storages/ColumnsDescription.cpp
@@ -484,8 +484,8 @@ Block validateColumnsDefaultsAndGetSampleBlock(ASTPtr default_expr_list, const N
         auto syntax_analyzer_result = TreeRewriter(context).analyze(default_expr_list, all_columns);
         const auto actions = ExpressionAnalyzer(default_expr_list, syntax_analyzer_result, context).getActions(true);
         for (const auto & action : actions->getActions())
-            if (action.type == ExpressionAction::Type::JOIN || action.type == ExpressionAction::Type::ARRAY_JOIN)
-                throw Exception("Unsupported default value that requires ARRAY JOIN or JOIN action", ErrorCodes::THERE_IS_NO_DEFAULT_VALUE);
+            if (action.type == ExpressionAction::Type::ARRAY_JOIN)
+                throw Exception("Unsupported default value that requires ARRAY JOIN action", ErrorCodes::THERE_IS_NO_DEFAULT_VALUE);
 
         return actions->getSampleBlock();
     }

From 69e82e647ea0a7c55295a5343df290a38a50e7f0 Mon Sep 17 00:00:00 2001
From: Artem Zuikov <chertus@gmail.com>
Date: Tue, 8 Sep 2020 14:01:17 +0300
Subject: [PATCH 116/298] another fix for clang tidy

---
 src/Functions/now64.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Functions/now64.cpp b/src/Functions/now64.cpp
index cba66ffb531..de131243441 100644
--- a/src/Functions/now64.cpp
+++ b/src/Functions/now64.cpp
@@ -20,7 +20,7 @@ namespace ErrorCodes
 namespace
 {
 
-static Field nowSubsecond(UInt32 scale)
+Field nowSubsecond(UInt32 scale)
 {
     static constexpr Int32 fractional_scale = 9;
 

From 43c17ff79e690126fc09eed373e5aeffadded60a Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 8 Sep 2020 14:02:44 +0300
Subject: [PATCH 117/298] Add retries to test

---
 .../test_adaptive_granularity/test.py         | 27 ++++++++++++++-----
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/tests/integration/test_adaptive_granularity/test.py b/tests/integration/test_adaptive_granularity/test.py
index 247a0c8919d..d4725956189 100644
--- a/tests/integration/test_adaptive_granularity/test.py
+++ b/tests/integration/test_adaptive_granularity/test.py
@@ -336,12 +336,27 @@ def test_version_update_two_nodes(start_dynamic_cluster):
 
     node11.restart_with_latest_version(callback_onstop=callback) # just to be sure
 
-    node11.query("SYSTEM SYNC REPLICA table_with_default_granularity_new", timeout=20)
-    node12.query("SYSTEM SYNC REPLICA table_with_default_granularity_new", timeout=20)
-    node11.query("SELECT COUNT() FROM table_with_default_granularity_new") == "4\n"
-    node12.query("SELECT COUNT() FROM table_with_default_granularity_new") == "4\n"
+    for i in range(3):
+        try:
+            node11.query("SYSTEM SYNC REPLICA table_with_default_granularity_new", timeout=120)
+            node12.query("SYSTEM SYNC REPLICA table_with_default_granularity_new", timeout=120)
+        except Exception as ex:
+            print("Exception during replica sync", ex)
+            node11.query("SYSTEM RESTART REPLICA table_with_default_granularity_new")
+            node12.query("SYSTEM RESTART REPLICA table_with_default_granularity_new")
+            time.sleep(2 * i)
+
+    assert node11.query("SELECT COUNT() FROM table_with_default_granularity_new") == "4\n"
+    assert node12.query("SELECT COUNT() FROM table_with_default_granularity_new") == "4\n"
 
-    node11.query("SYSTEM SYNC REPLICA table_with_default_granularity", timeout=20)
     node11.query("INSERT INTO table_with_default_granularity VALUES (toDate('2018-10-01'), 5, 333), (toDate('2018-10-02'), 6, 444)")
-    node12.query("SYSTEM SYNC REPLICA table_with_default_granularity", timeout=20)
+    for i in range(3):
+        try:
+            node12.query("SYSTEM SYNC REPLICA table_with_default_granularity", timeout=120)
+        except Exception as ex:
+            print("Exception during replica sync", ex)
+            node11.query("SYSTEM RESTART REPLICA table_with_default_granularity")
+            node12.query("SYSTEM RESTART REPLICA table_with_default_granularity")
+            time.sleep(2 * i)
+
     assert node12.query("SELECT COUNT() FROM table_with_default_granularity") == '6\n'

From 07e835381ce7fc01856c16299d69f806b1c7a982 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Tue, 8 Sep 2020 14:06:36 +0300
Subject: [PATCH 118/298] Fix build.

---
 src/Interpreters/TableJoin.cpp | 1 -
 src/Interpreters/TableJoin.h   | 4 +++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp
index 5f4813d433c..cd837cc15d6 100644
--- a/src/Interpreters/TableJoin.cpp
+++ b/src/Interpreters/TableJoin.cpp
@@ -4,7 +4,6 @@
 
 #include <Core/Settings.h>
 #include <Core/Block.h>
-#include <Core/ColumnWithTypeAndName.h>
 #include <Core/ColumnsWithTypeAndName.h>
 
 #include <Common/StringUtils/StringUtils.h>
diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h
index 351a10557bb..d2ffeeceb13 100644
--- a/src/Interpreters/TableJoin.h
+++ b/src/Interpreters/TableJoin.h
@@ -21,7 +21,9 @@ class ASTSelectQuery;
 struct DatabaseAndTableWithAlias;
 class Block;
 class DictionaryReader;
-class ColumnsWithTypeAndName;
+
+class ColumnWithTypeAndName;
+using ColumnsWithTypeAndName = std::vector<ColumnWithTypeAndName>;
 
 struct Settings;
 

From 4aeed33d4c44b216b54506bffda095681e8e2dd4 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Tue, 8 Sep 2020 14:07:26 +0300
Subject: [PATCH 119/298] Fix build.

---
 src/Interpreters/TableJoin.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h
index d2ffeeceb13..9dcbc30f07b 100644
--- a/src/Interpreters/TableJoin.h
+++ b/src/Interpreters/TableJoin.h
@@ -22,7 +22,7 @@ struct DatabaseAndTableWithAlias;
 class Block;
 class DictionaryReader;
 
-class ColumnWithTypeAndName;
+struct ColumnWithTypeAndName;
 using ColumnsWithTypeAndName = std::vector<ColumnWithTypeAndName>;
 
 struct Settings;

From cdfca04b4232811e3b0dae5e125ddc1335206b20 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 8 Sep 2020 14:07:41 +0300
Subject: [PATCH 120/298] Missed break

---
 tests/integration/test_adaptive_granularity/test.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/integration/test_adaptive_granularity/test.py b/tests/integration/test_adaptive_granularity/test.py
index d4725956189..21d65588de4 100644
--- a/tests/integration/test_adaptive_granularity/test.py
+++ b/tests/integration/test_adaptive_granularity/test.py
@@ -340,6 +340,7 @@ def test_version_update_two_nodes(start_dynamic_cluster):
         try:
             node11.query("SYSTEM SYNC REPLICA table_with_default_granularity_new", timeout=120)
             node12.query("SYSTEM SYNC REPLICA table_with_default_granularity_new", timeout=120)
+            break
         except Exception as ex:
             print("Exception during replica sync", ex)
             node11.query("SYSTEM RESTART REPLICA table_with_default_granularity_new")
@@ -353,6 +354,7 @@ def test_version_update_two_nodes(start_dynamic_cluster):
     for i in range(3):
         try:
             node12.query("SYSTEM SYNC REPLICA table_with_default_granularity", timeout=120)
+            break
         except Exception as ex:
             print("Exception during replica sync", ex)
             node11.query("SYSTEM RESTART REPLICA table_with_default_granularity")

From b0cb5511a92dfa319fab98866fbb9562ec7f731b Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Tue, 8 Sep 2020 14:17:39 +0300
Subject: [PATCH 121/298] Fix build.

---
 src/Processors/Transforms/ExpressionTransform.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Processors/Transforms/ExpressionTransform.cpp b/src/Processors/Transforms/ExpressionTransform.cpp
index 5520cd2b6d8..501a01906ff 100644
--- a/src/Processors/Transforms/ExpressionTransform.cpp
+++ b/src/Processors/Transforms/ExpressionTransform.cpp
@@ -13,7 +13,7 @@ Block ExpressionTransform::transformHeader(Block header, const ExpressionActions
 
 
 ExpressionTransform::ExpressionTransform(const Block & header_, ExpressionActionsPtr expression_)
-    : ISimpleTransform(header_, transformHeader(header_, expression_), on_totals_)
+    : ISimpleTransform(header_, transformHeader(header_, expression_), false)
     , expression(std::move(expression_))
 {
 }

From 6e591d5deaca447a5efe70a55e85156dff36f5ad Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 8 Sep 2020 14:20:35 +0300
Subject: [PATCH 122/298] Better retries

---
 tests/integration/test_mysql_protocol/test.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/tests/integration/test_mysql_protocol/test.py b/tests/integration/test_mysql_protocol/test.py
index 510c2821687..c1d53fc2b34 100644
--- a/tests/integration/test_mysql_protocol/test.py
+++ b/tests/integration/test_mysql_protocol/test.py
@@ -233,6 +233,9 @@ def test_mysql_federated(mysql_server, server_address):
         node.query('''CREATE TABLE mysql_federated.test (col UInt32) ENGINE = Log''', settings={"password": "123"})
         node.query('''INSERT INTO mysql_federated.test VALUES (0), (1), (5)''', settings={"password": "123"})
 
+        def check_retryable_error_in_stderr(stderr):
+            return "Can't connect to local MySQL server through socket" in stderr or "MySQL server has gone away" in stderr
+
         code, (stdout, stderr) = mysql_server.exec_run('''
             mysql
             -e "DROP SERVER IF EXISTS clickhouse;"
@@ -245,7 +248,7 @@ def test_mysql_federated(mysql_server, server_address):
         if code != 0:
             print("stdout", stdout)
             print("stderr", stderr)
-            if try_num + 1 < retries and "Can't connect to local MySQL server through socket" in stderr:
+            if try_num + 1 < retries and check_retryable_error_in_stderr(stderr):
                 time.sleep(1)
                 continue
         assert code == 0
@@ -259,7 +262,7 @@ def test_mysql_federated(mysql_server, server_address):
         if code != 0:
             print("stdout", stdout)
             print("stderr", stderr)
-            if try_num + 1 < retries and "Can't connect to local MySQL server through socket" in stderr:
+            if try_num + 1 < retries and check_retryable_error_in_stderr(stderr):
                 time.sleep(1)
                 continue
         assert code == 0
@@ -275,7 +278,7 @@ def test_mysql_federated(mysql_server, server_address):
         if code != 0:
             print("stdout", stdout)
             print("stderr", stderr)
-            if try_num + 1 < retries and "Can't connect to local MySQL server through socket" in stderr:
+            if try_num + 1 < retries and check_retryable_error_in_stderr(stderr):
                 time.sleep(1)
                 continue
         assert code == 0
@@ -286,7 +289,7 @@ def test_mysql_federated(mysql_server, server_address):
 def test_mysql_set_variables(mysql_client, server_address):
     code, (stdout, stderr) = mysql_client.exec_run('''
         mysql --protocol tcp -h {host} -P {port} default -u default --password=123
-        -e 
+        -e
         "
         SET NAMES=default;
         SET character_set_results=default;

From 724b78a578cc4bfd4647ec9e41932db9a9078a5f Mon Sep 17 00:00:00 2001
From: nikitamikhaylov <mikhaylovnikitka@gmail.com>
Date: Tue, 8 Sep 2020 15:03:55 +0300
Subject: [PATCH 123/298] better

---
 utils/db-generator/query_db_generator.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/utils/db-generator/query_db_generator.cpp b/utils/db-generator/query_db_generator.cpp
index ccef60e7ef2..88f46325c72 100644
--- a/utils/db-generator/query_db_generator.cpp
+++ b/utils/db-generator/query_db_generator.cpp
@@ -651,10 +651,9 @@ FuncRet inFunc(DB::ASTPtr ch, std::map<std::string, Column> & columns)
             {
                 ColumnType type = type_cast(literal->value.getType());
 
-                /// C++20
-                auto routine = [&] <typename T>(const T & arr_values)
+                auto routine = [&](const auto & arr_values)
                 {
-                    for (auto val : arr_values)
+                    for (auto & val : arr_values)
                     {
                         type = type_cast(val.getType());
                         if (type == type::s || type == type::d || type == type::dt)

From 5e39f8b32bcd89419f0105013ec002f34733d7a3 Mon Sep 17 00:00:00 2001
From: Pavel Kovalenko <jokserfn@yandex-team.ru>
Date: Tue, 8 Sep 2020 15:20:35 +0300
Subject: [PATCH 124/298] Add comment explaining DiskS3::shutdown

---
 src/Disks/S3/DiskS3.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp
index cff7cc3429a..3dcb55c2c44 100644
--- a/src/Disks/S3/DiskS3.cpp
+++ b/src/Disks/S3/DiskS3.cpp
@@ -748,6 +748,10 @@ void DiskS3::setReadOnly(const String & path)
 
 void DiskS3::shutdown()
 {
+    /// This call stops any next retry attempts for ongoing S3 requests.
+    /// If S3 request is failed and the method below is executed S3 client immediately returns the last failed S3 request outcome.
+    /// If S3 is healthy nothing wrong will be happened and S3 requests will be processed in a regular way without errors.
+    /// This should significantly speed up shutdown process if S3 is unhealthy.
     client->DisableRequestProcessing();
 }
 

From 0e9612d9ff6350619d41f8da8943ee3d5431fa2e Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 8 Sep 2020 15:29:22 +0300
Subject: [PATCH 125/298] Add null_representation setting in TSV

---
 src/Formats/FormatSettings.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index 299ec353f03..70173bc847d 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -78,6 +78,7 @@ struct FormatSettings
     {
         bool empty_as_default = false;
         bool crlf_end_of_line = false;
+        String null_representation = "\\N";
     };
 
     TSV tsv;

From dbd7ef9ee6092546e3b6806882152aeaee2cb17c Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Tue, 8 Sep 2020 15:31:36 +0300
Subject: [PATCH 126/298] Fix build.

---
 src/Interpreters/ExpressionActions.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp
index e8cae635e5d..33fa6215160 100644
--- a/src/Interpreters/ExpressionActions.cpp
+++ b/src/Interpreters/ExpressionActions.cpp
@@ -1359,6 +1359,9 @@ ExpressionActionsChain::JoinStep::JoinStep(
     , join(std::move(join_))
     , result_columns(std::move(required_columns_))
 {
+    for (const auto & column : result_columns)
+        required_columns.emplace_back(column.name, column.type);
+
     analyzed_join->addJoinedColumnsAndCorrectNullability(result_columns);
 }
 

From ac5d6caef591deb76ef017ab22539155f7685d4a Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 8 Sep 2020 15:32:31 +0300
Subject: [PATCH 127/298] Add tsv_null_representation

---
 src/Formats/FormatFactory.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 935d31d6541..9bee25c8344 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -111,6 +111,7 @@ static FormatSettings getOutputFormatSetting(const Settings & settings, const Co
     format_settings.template_settings.row_format = settings.format_template_row;
     format_settings.template_settings.row_between_delimiter = settings.format_template_rows_between_delimiter;
     format_settings.tsv.crlf_end_of_line = settings.output_format_tsv_crlf_end_of_line;
+    format_settings.tsv.null_representation = settings.tsv_null_representation;
     format_settings.write_statistics = settings.output_format_write_statistics;
     format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size;
     format_settings.schema.format_schema = settings.format_schema;

From 563f1486e6fa5e751eb36814df9a0e6695e402a1 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 8 Sep 2020 15:36:11 +0300
Subject: [PATCH 128/298] Add output_tsv_null_representation

---
 src/Core/Settings.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index d367297f900..3293b22956b 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -439,6 +439,7 @@ class IColumn;
     M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \
     M(UInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \
     M(Bool, output_format_tsv_crlf_end_of_line, false, "If it is set true, end of line in TSV format will be \\r\\n instead of \\n.", 0) \
+    M(Bool, output_format_tsc_null_representation, "\\N", "Custom null representation in TSV format", 0) \
     \
     M(UInt64, input_format_allow_errors_num, 0, "Maximum absolute amount of errors while reading text formats (like CSV, TSV). In case of error, if at least absolute or relative amount of errors is lower than corresponding value, will skip until next line and continue.", 0) \
     M(Float, input_format_allow_errors_ratio, 0, "Maximum relative amount of errors while reading text formats (like CSV, TSV). In case of error, if at least absolute or relative amount of errors is lower than corresponding value, will skip until next line and continue.", 0) \

From cce3f3fe4e70d39da5175a23682a9adddaaeed31 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 8 Sep 2020 15:37:18 +0300
Subject: [PATCH 129/298] Change setting name

---
 src/Formats/FormatFactory.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 9bee25c8344..a1065b2c452 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -111,7 +111,7 @@ static FormatSettings getOutputFormatSetting(const Settings & settings, const Co
     format_settings.template_settings.row_format = settings.format_template_row;
     format_settings.template_settings.row_between_delimiter = settings.format_template_rows_between_delimiter;
     format_settings.tsv.crlf_end_of_line = settings.output_format_tsv_crlf_end_of_line;
-    format_settings.tsv.null_representation = settings.tsv_null_representation;
+    format_settings.tsv.null_representation = settings.output_format_tsv_null_representation;
     format_settings.write_statistics = settings.output_format_write_statistics;
     format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size;
     format_settings.schema.format_schema = settings.format_schema;

From 0e8946eb9f12620dfe048642ad8c3e3ab507a736 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 8 Sep 2020 15:38:11 +0300
Subject: [PATCH 130/298] Fix mistake

---
 src/Core/Settings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 3293b22956b..90d5494792a 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -439,7 +439,7 @@ class IColumn;
     M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \
     M(UInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \
     M(Bool, output_format_tsv_crlf_end_of_line, false, "If it is set true, end of line in TSV format will be \\r\\n instead of \\n.", 0) \
-    M(Bool, output_format_tsc_null_representation, "\\N", "Custom null representation in TSV format", 0) \
+    M(Bool, output_format_tsv_null_representation, "\\N", "Custom null representation in TSV format", 0) \
     \
     M(UInt64, input_format_allow_errors_num, 0, "Maximum absolute amount of errors while reading text formats (like CSV, TSV). In case of error, if at least absolute or relative amount of errors is lower than corresponding value, will skip until next line and continue.", 0) \
     M(Float, input_format_allow_errors_ratio, 0, "Maximum relative amount of errors while reading text formats (like CSV, TSV). In case of error, if at least absolute or relative amount of errors is lower than corresponding value, will skip until next line and continue.", 0) \

From 70be9b152cb13be436113a29ea834cd1099b8a0d Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 8 Sep 2020 15:39:24 +0300
Subject: [PATCH 131/298] Add custom null representation in tsv

---
 src/DataTypes/DataTypeNullable.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/DataTypes/DataTypeNullable.cpp b/src/DataTypes/DataTypeNullable.cpp
index 847047850fd..67acf89ef42 100644
--- a/src/DataTypes/DataTypeNullable.cpp
+++ b/src/DataTypes/DataTypeNullable.cpp
@@ -217,7 +217,7 @@ void DataTypeNullable::serializeTextEscaped(const IColumn & column, size_t row_n
     const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
 
     if (col.isNullAt(row_num))
-        writeCString("\\N", ostr);
+        writeString(settings.tsv.null_representation, ostr);
     else
         nested_data_type->serializeAsTextEscaped(col.getNestedColumn(), row_num, ostr, settings);
 }

From 401b0e5ebbb1765161655ae6789cbea78ba80206 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 8 Sep 2020 15:47:02 +0300
Subject: [PATCH 132/298] Fix mistake 2

---
 src/Core/Settings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 90d5494792a..dca3c367e39 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -439,7 +439,7 @@ class IColumn;
     M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \
     M(UInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \
     M(Bool, output_format_tsv_crlf_end_of_line, false, "If it is set true, end of line in TSV format will be \\r\\n instead of \\n.", 0) \
-    M(Bool, output_format_tsv_null_representation, "\\N", "Custom null representation in TSV format", 0) \
+    M(String, output_format_tsv_null_representation, "\\N", "Custom null representation in TSV format", 0) \
     \
     M(UInt64, input_format_allow_errors_num, 0, "Maximum absolute amount of errors while reading text formats (like CSV, TSV). In case of error, if at least absolute or relative amount of errors is lower than corresponding value, will skip until next line and continue.", 0) \
     M(Float, input_format_allow_errors_ratio, 0, "Maximum relative amount of errors while reading text formats (like CSV, TSV). In case of error, if at least absolute or relative amount of errors is lower than corresponding value, will skip until next line and continue.", 0) \

From efd7ba323032be1c7692f0bedeadcf151e567243 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@yandex-team.ru>
Date: Tue, 8 Sep 2020 15:49:07 +0300
Subject: [PATCH 133/298] Update version_date.tsv after release 20.8.2.3

---
 utils/list-versions/version_date.tsv | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 1b24b0349c2..adba1a73397 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,3 +1,4 @@
+v20.8.2.3-stable	2020-09-08
 v20.7.2.30-stable	2020-08-31
 v20.6.5.8-stable	2020-09-03
 v20.6.4.44-stable	2020-08-20

From 902ac5c33f53508278e238efa1948b843e3bb9c7 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 8 Sep 2020 15:49:57 +0300
Subject: [PATCH 134/298] Change setting description

---
 src/Core/Settings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index dca3c367e39..09fff297e41 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -439,7 +439,7 @@ class IColumn;
     M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \
     M(UInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \
     M(Bool, output_format_tsv_crlf_end_of_line, false, "If it is set true, end of line in TSV format will be \\r\\n instead of \\n.", 0) \
-    M(String, output_format_tsv_null_representation, "\\N", "Custom null representation in TSV format", 0) \
+    M(String, output_format_tsv_null_representation, "\\N", "Custom NULL representation in TSV format", 0) \
     \
     M(UInt64, input_format_allow_errors_num, 0, "Maximum absolute amount of errors while reading text formats (like CSV, TSV). In case of error, if at least absolute or relative amount of errors is lower than corresponding value, will skip until next line and continue.", 0) \
     M(Float, input_format_allow_errors_ratio, 0, "Maximum relative amount of errors while reading text formats (like CSV, TSV). In case of error, if at least absolute or relative amount of errors is lower than corresponding value, will skip until next line and continue.", 0) \

From 5d303573c73a10d95e9b986fbd37ebb35644fd0a Mon Sep 17 00:00:00 2001
From: Avogar <pav.cruglov@yandex.ru>
Date: Tue, 8 Sep 2020 16:06:05 +0300
Subject: [PATCH 135/298] Add tests

---
 .../0_stateless/01474_custom_null_tsv.reference    |  1 +
 tests/queries/0_stateless/01474_custom_null_tsv.sh | 14 ++++++++++++++
 2 files changed, 15 insertions(+)
 create mode 100644 tests/queries/0_stateless/01474_custom_null_tsv.reference
 create mode 100644 tests/queries/0_stateless/01474_custom_null_tsv.sh

diff --git a/tests/queries/0_stateless/01474_custom_null_tsv.reference b/tests/queries/0_stateless/01474_custom_null_tsv.reference
new file mode 100644
index 00000000000..c1e23d80b03
--- /dev/null
+++ b/tests/queries/0_stateless/01474_custom_null_tsv.reference
@@ -0,0 +1 @@
+MyNull
diff --git a/tests/queries/0_stateless/01474_custom_null_tsv.sh b/tests/queries/0_stateless/01474_custom_null_tsv.sh
new file mode 100644
index 00000000000..1078f4431e0
--- /dev/null
+++ b/tests/queries/0_stateless/01474_custom_null_tsv.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+. "$CURDIR"/../shell_config.sh
+
+$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS tsv_custom_null";
+$CLICKHOUSE_CLIENT --query="CREATE TABLE regexp (id Nullable(UInt32)) ENGINE = Memory";
+
+$CLICKHOUSE_CLIENT --query="INSERT INTO tsv_custom_null VALUES (NULL)";
+
+$CLICHHOUSE_CLIENT --output_format_tsv_null_representation='MyNull' --query="SELECT * FROM tsv_custom_null FORMAT TSV";
+
+$CLICKHOUSE_CLIENT --query="DROP TABLE tsv_custom_null";
+

From 23a5a7887373d0fda5553cca338056f096c8aede Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 8 Sep 2020 16:07:58 +0300
Subject: [PATCH 136/298] Fix tests

---
 tests/queries/0_stateless/01474_custom_null_tsv.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01474_custom_null_tsv.sh b/tests/queries/0_stateless/01474_custom_null_tsv.sh
index 1078f4431e0..74c097b9dbd 100644
--- a/tests/queries/0_stateless/01474_custom_null_tsv.sh
+++ b/tests/queries/0_stateless/01474_custom_null_tsv.sh
@@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . "$CURDIR"/../shell_config.sh
 
 $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS tsv_custom_null";
-$CLICKHOUSE_CLIENT --query="CREATE TABLE regexp (id Nullable(UInt32)) ENGINE = Memory";
+$CLICKHOUSE_CLIENT --query="CREATE TABLE tsv_custom_null (id Nullable(UInt32)) ENGINE = Memory";
 
 $CLICKHOUSE_CLIENT --query="INSERT INTO tsv_custom_null VALUES (NULL)";
 

From 74bd12284e954938beeaeb07967e06959b750151 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 8 Sep 2020 16:13:49 +0300
Subject: [PATCH 137/298] Fix typo

---
 tests/queries/0_stateless/01474_custom_null_tsv.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 mode change 100644 => 100755 tests/queries/0_stateless/01474_custom_null_tsv.sh

diff --git a/tests/queries/0_stateless/01474_custom_null_tsv.sh b/tests/queries/0_stateless/01474_custom_null_tsv.sh
old mode 100644
new mode 100755
index 74c097b9dbd..ee9bb7900a0
--- a/tests/queries/0_stateless/01474_custom_null_tsv.sh
+++ b/tests/queries/0_stateless/01474_custom_null_tsv.sh
@@ -8,7 +8,7 @@ $CLICKHOUSE_CLIENT --query="CREATE TABLE tsv_custom_null (id Nullable(UInt32)) E
 
 $CLICKHOUSE_CLIENT --query="INSERT INTO tsv_custom_null VALUES (NULL)";
 
-$CLICHHOUSE_CLIENT --output_format_tsv_null_representation='MyNull' --query="SELECT * FROM tsv_custom_null FORMAT TSV";
+$CLICKHOUSE_CLIENT --output_format_tsv_null_representation='MyNull' --query="SELECT * FROM tsv_custom_null FORMAT TSV";
 
 $CLICKHOUSE_CLIENT --query="DROP TABLE tsv_custom_null";
 

From e0e1a5b24b56704ce130ba77e5ed7bdbcabb73b0 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@yandex-team.ru>
Date: Tue, 8 Sep 2020 16:14:51 +0300
Subject: [PATCH 138/298] Update version_date.tsv after release 20.3.18.10

---
 utils/list-versions/version_date.tsv | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index adba1a73397..d97fdbfa080 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -15,6 +15,7 @@ v20.4.5.36-stable	2020-06-10
 v20.4.4.18-stable	2020-05-26
 v20.4.3.16-stable	2020-05-23
 v20.4.2.9-stable	2020-05-12
+v20.3.18.10-lts	2020-09-08
 v20.3.17.173-lts	2020-08-15
 v20.3.16.165-lts	2020-08-08
 v20.3.15.133-lts	2020-07-27

From 88d569b6a6ea32cfcea0a276e2920f2cd1cc2ecf Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 8 Sep 2020 16:31:59 +0300
Subject: [PATCH 139/298] Add ASTFuzzer to checks list

---
 tests/ci/ci_config.json | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tests/ci/ci_config.json b/tests/ci/ci_config.json
index 3253a7d5f3d..44e9df49216 100644
--- a/tests/ci/ci_config.json
+++ b/tests/ci/ci_config.json
@@ -548,6 +548,18 @@
                 "clang-tidy": "disable",
                 "with_coverage": false
             }
+        },
+        "AST fuzzer": {
+            "required_build_properties": {
+                "compiler": "clang-10",
+                "package_type": "binary",
+                "build_type": "debug",
+                "sanitizer": "none",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
         }
     }
 }

From 7bc3c43a3863c26da620eb7a2a1e8f218c9b515d Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@yandex-team.ru>
Date: Tue, 8 Sep 2020 17:04:06 +0300
Subject: [PATCH 140/298] Auto version update to [20.9.1.4585] [54439]

---
 cmake/autogenerated_versions.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt
index 27586821af2..870c7afa847 100644
--- a/cmake/autogenerated_versions.txt
+++ b/cmake/autogenerated_versions.txt
@@ -3,7 +3,7 @@ SET(VERSION_REVISION 54439)
 SET(VERSION_MAJOR 20)
 SET(VERSION_MINOR 9)
 SET(VERSION_PATCH 1)
-SET(VERSION_GITHASH 0586f0d555f7481b394afc55bbb29738cd573a1c)
-SET(VERSION_DESCRIBE v20.9.1.1-prestable)
-SET(VERSION_STRING 20.9.1.1)
+SET(VERSION_GITHASH 11a247d2f42010c1a17bf678c3e00a4bc89b23f8)
+SET(VERSION_DESCRIBE v20.9.1.4585-prestable)
+SET(VERSION_STRING 20.9.1.4585)
 # end of autochange

From 8def718661651a6d73f655ab64892efeb8833987 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@yandex-team.ru>
Date: Tue, 8 Sep 2020 17:04:41 +0300
Subject: [PATCH 141/298] Auto version update to [20.10.1.1] [54440]

---
 cmake/autogenerated_versions.txt                          | 8 ++++----
 debian/changelog                                          | 4 ++--
 docker/client/Dockerfile                                  | 2 +-
 docker/server/Dockerfile                                  | 2 +-
 docker/test/Dockerfile                                    | 2 +-
 .../System/StorageSystemContributors.generated.cpp        | 7 +++++++
 6 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt
index 870c7afa847..6ca3999ff7f 100644
--- a/cmake/autogenerated_versions.txt
+++ b/cmake/autogenerated_versions.txt
@@ -1,9 +1,9 @@
 # This strings autochanged from release_lib.sh:
-SET(VERSION_REVISION 54439)
+SET(VERSION_REVISION 54440)
 SET(VERSION_MAJOR 20)
-SET(VERSION_MINOR 9)
+SET(VERSION_MINOR 10)
 SET(VERSION_PATCH 1)
 SET(VERSION_GITHASH 11a247d2f42010c1a17bf678c3e00a4bc89b23f8)
-SET(VERSION_DESCRIBE v20.9.1.4585-prestable)
-SET(VERSION_STRING 20.9.1.4585)
+SET(VERSION_DESCRIBE v20.10.1.1-prestable)
+SET(VERSION_STRING 20.10.1.1)
 # end of autochange
diff --git a/debian/changelog b/debian/changelog
index c7c20ccd6d0..244b2b1fde4 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,5 +1,5 @@
-clickhouse (20.9.1.1) unstable; urgency=low
+clickhouse (20.10.1.1) unstable; urgency=low
 
   * Modified source code
 
- -- clickhouse-release <clickhouse-release@yandex-team.ru>  Mon, 31 Aug 2020 23:07:38 +0300
+ -- clickhouse-release <clickhouse-release@yandex-team.ru>  Tue, 08 Sep 2020 17:04:39 +0300
diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile
index 36ca0ee107a..5ce506aafa3 100644
--- a/docker/client/Dockerfile
+++ b/docker/client/Dockerfile
@@ -1,7 +1,7 @@
 FROM ubuntu:18.04
 
 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
-ARG version=20.9.1.*
+ARG version=20.10.1.*
 
 RUN apt-get update \
     && apt-get install --yes --no-install-recommends \
diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile
index c3950c58437..c15bd89b646 100644
--- a/docker/server/Dockerfile
+++ b/docker/server/Dockerfile
@@ -1,7 +1,7 @@
 FROM ubuntu:20.04
 
 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
-ARG version=20.9.1.*
+ARG version=20.10.1.*
 ARG gosu_ver=1.10
 
 RUN apt-get update \
diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile
index bb09fa1de56..ae588af2459 100644
--- a/docker/test/Dockerfile
+++ b/docker/test/Dockerfile
@@ -1,7 +1,7 @@
 FROM ubuntu:18.04
 
 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
-ARG version=20.9.1.*
+ARG version=20.10.1.*
 
 RUN apt-get update && \
     apt-get install -y apt-transport-https dirmngr && \
diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp
index 20c7adb1908..d0cc8faeeba 100644
--- a/src/Storages/System/StorageSystemContributors.generated.cpp
+++ b/src/Storages/System/StorageSystemContributors.generated.cpp
@@ -1,6 +1,7 @@
 // autogenerated by ./StorageSystemContributors.sh
 const char * auto_contributors[] {
     "0xflotus",
+    "243f6a88 85a308d3",
     "243f6a8885a308d313198a2e037",
     "821008736@qq.com",
     "Akazz",
@@ -130,6 +131,7 @@ const char * auto_contributors[] {
     "Daniel Bershatsky",
     "Daniel Dao",
     "Danila Kutenin",
+    "Dao Minh Thuc",
     "Dario",
     "DarkWanderer",
     "Darío",
@@ -167,6 +169,7 @@ const char * auto_contributors[] {
     "Eugene Konkov",
     "Evgenia Sudarikova",
     "Evgenii Pravda",
+    "Evgeniia Sudarikova",
     "Evgeniy Gatov",
     "Evgeniy Udodov",
     "Evgeny Konkov",
@@ -221,6 +224,7 @@ const char * auto_contributors[] {
     "Ilya Yatsishin",
     "ImgBotApp",
     "Ivan",
+    "Ivan A. Torgashov",
     "Ivan Babrou",
     "Ivan Blinkov",
     "Ivan He",
@@ -513,6 +517,7 @@ const char * auto_contributors[] {
     "andrei-karpliuk",
     "andrewsg",
     "anrodigina",
+    "antikvist",
     "anton",
     "ap11",
     "aprudaev",
@@ -582,6 +587,7 @@ const char * auto_contributors[] {
     "igor.lapko",
     "ikopylov",
     "imgbot[bot]",
+    "it1804",
     "ivan-kush",
     "ivanzhukov",
     "jakalletti",
@@ -658,6 +664,7 @@ const char * auto_contributors[] {
     "robot-clickhouse",
     "robot-metrika-test",
     "roman",
+    "romanzhukov",
     "root",
     "santaux",
     "sdk2",

From cfcea19d28f8e726c0f2c14b8bcb06d7c8f9fee0 Mon Sep 17 00:00:00 2001
From: Ivan Blinkov <github@blinkov.ru>
Date: Tue, 8 Sep 2020 17:18:00 +0300
Subject: [PATCH 142/298] Update README.md

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 300ef4555a2..7f6a102a2dd 100644
--- a/README.md
+++ b/README.md
@@ -18,3 +18,4 @@ ClickHouse is an open-source column-oriented database management system that all
 ## Upcoming Events		
 
 * [ClickHouse Data Integration Virtual Meetup](https://www.eventbrite.com/e/clickhouse-september-virtual-meetup-data-integration-tickets-117421895049) on September 10, 2020.
+* [ClickHouse talk at Ya.Subbotnik (in Russian)](https://ya.cc/t/cIBI-3yECj5JF) on September 12, 2020.

From f364d86b2b340c8d8eed0a982617ed05f365ec69 Mon Sep 17 00:00:00 2001
From: Roman Bug <rrrrrroman@gmail.com>
Date: Tue, 8 Sep 2020 18:20:02 +0300
Subject: [PATCH 143/298] DOCSUP-2037: Edit and translate PR to RU (#14439)

* DOCSUP-2037: Updated by PR#12010.

* DOCSUP-2037: Updated by PR#11865

* DOCSUP-2037: Update by PR#11032.

* DOCSUP-2037: Update by PR#11433.

* Update docs/ru/engines/table-engines/mergetree-family/mergetree.md

Co-authored-by: BayoNet <da-daos@yandex.ru>

* Update docs/ru/engines/table-engines/mergetree-family/mergetree.md

Co-authored-by: BayoNet <da-daos@yandex.ru>

* Update docs/ru/operations/system-tables/index.md

Co-authored-by: BayoNet <da-daos@yandex.ru>

* Update docs/ru/operations/system-tables/index.md

Co-authored-by: BayoNet <da-daos@yandex.ru>

Co-authored-by: romanzhukov <romanzhukov@yandex-team.ru>
Co-authored-by: BayoNet <da-daos@yandex.ru>
---
 .../mergetree-family/mergetree.md             | 50 +++++++++++++++----
 docs/ru/interfaces/third-party/gui.md         |  4 ++
 docs/ru/operations/system-tables/index.md     | 38 ++++++++++++--
 .../data-types/simpleaggregatefunction.md     |  1 +
 4 files changed, 77 insertions(+), 16 deletions(-)

diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md
index f04fbae18ba..3c80fe663f1 100644
--- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md
@@ -1,3 +1,8 @@
+---
+toc_priority: 30
+toc_title: MergeTree
+---
+
 # MergeTree {#table_engines-mergetree}
 
 Движок `MergeTree`, а также другие движки этого семейства (`*MergeTree`) — это наиболее функциональные движки таблиц ClickHouse.
@@ -28,8 +33,8 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
     INDEX index_name1 expr1 TYPE type1(...) GRANULARITY value1,
     INDEX index_name2 expr2 TYPE type2(...) GRANULARITY value2
 ) ENGINE = MergeTree()
+ORDER BY expr
 [PARTITION BY expr]
-[ORDER BY expr]
 [PRIMARY KEY expr]
 [SAMPLE BY expr]
 [TTL expr [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'], ...]
@@ -38,27 +43,42 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 
 Описание параметров смотрите в [описании запроса CREATE](../../../engines/table-engines/mergetree-family/mergetree.md).
 
-!!! note "Note"
+!!! note "Примечание"
     `INDEX` — экспериментальная возможность, смотрите [Индексы пропуска данных](#table_engine-mergetree-data_skipping-indexes).
 
 ### Секции запроса {#mergetree-query-clauses}
 
 -   `ENGINE` — имя и параметры движка. `ENGINE = MergeTree()`. `MergeTree` не имеет параметров.
 
--   `PARTITION BY` — [ключ партиционирования](custom-partitioning-key.md). Для партиционирования по месяцам используйте выражение `toYYYYMM(date_column)`, где `date_column` — столбец с датой типа [Date](../../../engines/table-engines/mergetree-family/mergetree.md). В этом случае имена партиций имеют формат `"YYYYMM"`.
+-   `ORDER BY` — ключ сортировки.
+    
+    Кортеж столбцов или произвольных выражений. Пример: `ORDER BY (CounterID, EventDate)`.
 
--   `ORDER BY` — ключ сортировки. Кортеж столбцов или произвольных выражений. Пример: `ORDER BY (CounterID, EventDate)`.
+    ClickHouse использует ключ сортировки в качестве первичного ключа, если первичный ключ не задан в секции `PRIMARY KEY`.
 
--   `PRIMARY KEY` — первичный ключ, если он [отличается от ключа сортировки](#pervichnyi-kliuch-otlichnyi-ot-kliucha-sortirovki). По умолчанию первичный ключ совпадает с ключом сортировки (который задаётся секцией `ORDER BY`.) Поэтому в большинстве случаев секцию `PRIMARY KEY` отдельно указывать не нужно.
+    Чтобы отключить сортировку, используйте синтаксис `ORDER BY tuple()`. Смотрите [выбор первичного ключа](#vybor-pervichnogo-kliucha).    
 
--   `SAMPLE BY` — выражение для сэмплирования. Если используется выражение для сэмплирования, то первичный ключ должен содержать его. Пример: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`.
+-   `PARTITION BY` — [ключ партиционирования](custom-partitioning-key.md). Необязательный параметр.
 
--   `TTL` — список правил, определяющих длительности хранения строк, а также задающих правила перемещения частей на определённые тома или диски. Выражение должно возвращать столбец `Date` или `DateTime`. Пример: `TTL date + INTERVAL 1 DAY`.    
-    - Тип правила `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'` указывает действие, которое будет выполнено с частью, удаление строк (прореживание), перемещение (при выполнении условия для всех строк части) на определённый диск (`TO DISK 'xxx'`) или том (`TO VOLUME 'xxx'`).
-    - Поведение по умолчанию соответствует удалению строк (`DELETE`). В списке правил может быть указано только одно выражение с поведением `DELETE`.
-    - Дополнительные сведения смотрите в разделе [TTL для столбцов и таблиц](#table_engine-mergetree-ttl)
+    Для партиционирования по месяцам используйте выражение `toYYYYMM(date_column)`, где `date_column` — столбец с датой типа [Date](../../../engines/table-engines/mergetree-family/mergetree.md). В этом случае имена партиций имеют формат `"YYYYMM"`.
 
--   `SETTINGS` — дополнительные параметры, регулирующие поведение `MergeTree`:
+-   `PRIMARY KEY` — первичный ключ, если он [отличается от ключа сортировки](#pervichnyi-kliuch-otlichnyi-ot-kliucha-sortirovki). Необязательный параметр.
+    
+    По умолчанию первичный ключ совпадает с ключом сортировки (который задаётся секцией `ORDER BY`.) Поэтому в большинстве случаев секцию `PRIMARY KEY` отдельно указывать не нужно.
+
+-   `SAMPLE BY` — выражение для сэмплирования. Необязательный параметр.
+    
+    Если используется выражение для сэмплирования, то первичный ключ должен содержать его. Пример: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`.
+
+-   `TTL` — список правил, определяющих длительности хранения строк, а также задающих правила перемещения частей на определённые тома или диски. Необязательный параметр.
+    
+    Выражение должно возвращать столбец `Date` или `DateTime`. Пример: `TTL date + INTERVAL 1 DAY`.   
+
+    Тип правила `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'` указывает действие, которое будет выполнено с частью, удаление строк (прореживание), перемещение (при выполнении условия для всех строк части) на определённый диск (`TO DISK 'xxx'`) или том (`TO VOLUME 'xxx'`). Поведение по умолчанию соответствует удалению строк (`DELETE`). В списке правил может быть указано только одно выражение с поведением `DELETE`.
+    
+    Дополнительные сведения смотрите в разделе [TTL для столбцов и таблиц](#table_engine-mergetree-ttl)
+
+-   `SETTINGS` — дополнительные параметры, регулирующие поведение `MergeTree` (необязательные):
 
     - `index_granularity` — максимальное количество строк данных между засечками индекса. По умолчанию — 8192. Смотрите [Хранение данных](#mergetree-data-storage).
     - `index_granularity_bytes` — максимальный размер гранул данных в байтах. По умолчанию — 10Mb. Чтобы ограничить размер гранул только количеством строк, установите значение 0 (не рекомендовано). Смотрите [Хранение данных](#mergetree-data-storage).
@@ -180,6 +200,14 @@ ClickHouse не требует уникального первичного кл
 
 Длинный первичный ключ будет негативно влиять на производительность вставки и потребление памяти, однако на производительность ClickHouse при запросах `SELECT` лишние столбцы в первичном ключе не влияют.
 
+Вы можете создать таблицу без первичного ключа, используя синтаксис `ORDER BY tuple()`. В этом случае ClickHouse хранит данные в порядке вставки. Если вы хотите сохранить порядок данных при вставке данных с помощью запросов `INSERT ... SELECT`, установите [max\_insert\_threads = 1](../../../operations/settings/settings.md#settings-max-insert-threads).
+
+Чтобы выбрать данные в первоначальном порядке, используйте 
+[однопоточные](../../../operations/settings/settings.md#settings-max_threads) запросы `SELECT.
+
+
+
+
 ### Первичный ключ, отличный от ключа сортировки {#pervichnyi-kliuch-otlichnyi-ot-kliucha-sortirovki}
 
 Существует возможность задать первичный ключ (выражение, значения которого будут записаны в индексный файл для
diff --git a/docs/ru/interfaces/third-party/gui.md b/docs/ru/interfaces/third-party/gui.md
index a872e35ce0b..f7eaa5cc77f 100644
--- a/docs/ru/interfaces/third-party/gui.md
+++ b/docs/ru/interfaces/third-party/gui.md
@@ -93,6 +93,10 @@
 
 [cickhouse-plantuml](https://pypi.org/project/clickhouse-plantuml/) — скрипт, генерирующий [PlantUML](https://plantuml.com/) диаграммы схем таблиц.
 
+### xeus-clickhouse {#xeus-clickhouse}
+
+[xeus-clickhouse](https://github.com/wangfenjin/xeus-clickhouse) — это ядро Jupyter для ClickHouse, которое поддерживает запрос ClickHouse-данных с использованием SQL в Jupyter.
+
 ## Коммерческие {#kommercheskie}
 
 ### DataGrip {#datagrip}
diff --git a/docs/ru/operations/system-tables/index.md b/docs/ru/operations/system-tables/index.md
index 95715cd84c4..6fa989d3d0d 100644
--- a/docs/ru/operations/system-tables/index.md
+++ b/docs/ru/operations/system-tables/index.md
@@ -7,10 +7,38 @@ toc_title: Системные таблицы
 
 ## Введение {#system-tables-introduction}
 
-Системные таблицы используются для реализации части функциональности системы, а также предоставляют доступ к информации о работе системы.
-Вы не можете удалить системную таблицу (хотя можете сделать DETACH).
-Для системных таблиц нет файлов с данными на диске и файлов с метаданными. Сервер создаёт все системные таблицы при старте.
-В системные таблицы нельзя записывать данные - можно только читать.
-Системные таблицы расположены в базе данных system.
+Системные таблицы содержат информацию о:
+
+-   Состоянии сервера, процессов и окружении.
+-   Внутренних процессах сервера.
+
+Системные таблицы:
+
+-   Находятся в базе данных `system`.
+-   Доступны только для чтения данных.
+-   Не могут быть удалены или изменены, но их можно отсоединить.
+
+Системные таблицы `metric_log`, `query_log`, `query_thread_log`, `trace_log` системные таблицы хранят данные в файловой системе. Остальные системные таблицы хранят свои данные в оперативной памяти. Сервер ClickHouse создает такие системные таблицы при запуске.
+
+### Источники системных показателей 
+
+Для сбора системных показателей сервер ClickHouse использует:
+
+-   Возможности `CAP_NET_ADMIN`.
+-   [procfs](https://ru.wikipedia.org/wiki/Procfs) (только Linux).
+
+**procfs**
+
+Если для сервера ClickHouse не включено `CAP_NET_ADMIN`, он пытается обратиться к `ProcfsMetricsProvider`. `ProcfsMetricsProvider` позволяет собирать системные показатели для каждого запроса (для CPU и I/O).
+
+Если procfs поддерживается и включена в системе, то сервер ClickHouse собирает следующие системные показатели:
+
+-   `OSCPUVirtualTimeMicroseconds`
+-   `OSCPUWaitMicroseconds`
+-   `OSIOWaitMicroseconds`
+-   `OSReadChars`
+-   `OSWriteChars`
+-   `OSReadBytes`
+-   `OSWriteBytes`
 
 [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/system-tables/) <!--hide-->
diff --git a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md
index d36dc87e8ba..52f0412a177 100644
--- a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md
+++ b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md
@@ -9,6 +9,7 @@ The following aggregate functions are supported:
 -   [`min`](../../sql-reference/aggregate-functions/reference/min.md#agg_function-min)
 -   [`max`](../../sql-reference/aggregate-functions/reference/max.md#agg_function-max)
 -   [`sum`](../../sql-reference/aggregate-functions/reference/sum.md#agg_function-sum)
+-   [`sumWithOverflow`](../../sql-reference/aggregate-functions/reference/sumwithoverflow.md#sumwithoverflowx)
 -   [`groupBitAnd`](../../sql-reference/aggregate-functions/reference/groupbitand.md#groupbitand)
 -   [`groupBitOr`](../../sql-reference/aggregate-functions/reference/groupbitor.md#groupbitor)
 -   [`groupBitXor`](../../sql-reference/aggregate-functions/reference/groupbitxor.md#groupbitxor)

From de9f1f4c949382fcc6c628936e3cd8d9d4342934 Mon Sep 17 00:00:00 2001
From: olgarev <56617294+olgarev@users.noreply.github.com>
Date: Tue, 8 Sep 2020 18:55:00 +0300
Subject: [PATCH 144/298] DOCSUP-924: Higher-order functions moved to Array
 functions (#14092)

* Higher-order functions description moved to Array functions (English).

* Bad anchor fixed.

* Update docs/en/sql-reference/functions/array-functions.md

Co-authored-by: Ivan Blinkov <github@blinkov.ru>

* Higher-order functions description moved to Array functions (Russian).

* Update array-functions.md

Minor fixes in Russian text.

Co-authored-by: Olga Revyakina <revolg@yandex.ru>
Co-authored-by: Ivan Blinkov <github@blinkov.ru>
---
 .../operations/system-tables/stack_trace.md   |   4 +-
 docs/en/sql-reference/data-types/tuple.md     |   2 +-
 .../functions/arithmetic-functions.md         |   2 +-
 .../functions/array-functions.md              | 214 +++++++++++++-
 .../functions/higher-order-functions.md       | 262 ------------------
 docs/en/sql-reference/functions/index.md      |  15 +
 .../sql-reference/functions/introspection.md  |   6 +-
 .../operations/system-tables/stack_trace.md   |   4 +-
 docs/ru/sql-reference/data-types/tuple.md     |   2 +-
 .../functions/array-functions.md              | 123 +++++++-
 .../functions/higher-order-functions.md       | 167 -----------
 docs/ru/sql-reference/functions/index.md      |  14 +
 .../sql-reference/functions/introspection.md  |   6 +-
 13 files changed, 371 insertions(+), 450 deletions(-)
 delete mode 100644 docs/en/sql-reference/functions/higher-order-functions.md
 delete mode 100644 docs/ru/sql-reference/functions/higher-order-functions.md

diff --git a/docs/en/operations/system-tables/stack_trace.md b/docs/en/operations/system-tables/stack_trace.md
index b1714a93a20..44b13047cc3 100644
--- a/docs/en/operations/system-tables/stack_trace.md
+++ b/docs/en/operations/system-tables/stack_trace.md
@@ -82,8 +82,8 @@ res:       /lib/x86_64-linux-gnu/libc-2.27.so
 
 -   [Introspection Functions](../../sql-reference/functions/introspection.md) — Which introspection functions are available and how to use them.
 -   [system.trace_log](../system-tables/trace_log.md) — Contains stack traces collected by the sampling query profiler.
--   [arrayMap](../../sql-reference/functions/higher-order-functions.md#higher_order_functions-array-map) — Description and usage example of the `arrayMap` function.
--   [arrayFilter](../../sql-reference/functions/higher-order-functions.md#higher_order_functions-array-filter) — Description and usage example of the `arrayFilter` function.
+-   [arrayMap](../../sql-reference/functions/array-functions.md#array-map) — Description and usage example of the `arrayMap` function.
+-   [arrayFilter](../../sql-reference/functions/array-functions.md#array-filter) — Description and usage example of the `arrayFilter` function.
 
 
 [Original article](https://clickhouse.tech/docs/en/operations/system-tables/stack_trace) <!--hide-->
diff --git a/docs/en/sql-reference/data-types/tuple.md b/docs/en/sql-reference/data-types/tuple.md
index 60adb942925..e396006d957 100644
--- a/docs/en/sql-reference/data-types/tuple.md
+++ b/docs/en/sql-reference/data-types/tuple.md
@@ -7,7 +7,7 @@ toc_title: Tuple(T1, T2, ...)
 
 A tuple of elements, each having an individual [type](../../sql-reference/data-types/index.md#data_types).
 
-Tuples are used for temporary column grouping. Columns can be grouped when an IN expression is used in a query, and for specifying certain formal parameters of lambda functions. For more information, see the sections [IN operators](../../sql-reference/operators/in.md) and [Higher order functions](../../sql-reference/functions/higher-order-functions.md).
+Tuples are used for temporary column grouping. Columns can be grouped when an IN expression is used in a query, and for specifying certain formal parameters of lambda functions. For more information, see the sections [IN operators](../../sql-reference/operators/in.md) and [Higher order functions](../../sql-reference/functions/index.md#higher-order-functions).
 
 Tuples can be the result of a query. In this case, for text formats other than JSON, values are comma-separated in brackets. In JSON formats, tuples are output as arrays (in square brackets).
 
diff --git a/docs/en/sql-reference/functions/arithmetic-functions.md b/docs/en/sql-reference/functions/arithmetic-functions.md
index 5d89d6d335b..c4b151f59ce 100644
--- a/docs/en/sql-reference/functions/arithmetic-functions.md
+++ b/docs/en/sql-reference/functions/arithmetic-functions.md
@@ -1,5 +1,5 @@
 ---
-toc_priority: 35
+toc_priority: 34
 toc_title: Arithmetic
 ---
 
diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md
index 91ecc963b1f..82700a109b5 100644
--- a/docs/en/sql-reference/functions/array-functions.md
+++ b/docs/en/sql-reference/functions/array-functions.md
@@ -1,9 +1,9 @@
 ---
-toc_priority: 46
+toc_priority: 35
 toc_title: Arrays
 ---
 
-# Functions for Working with Arrays {#functions-for-working-with-arrays}
+# Array Functions {#functions-for-working-with-arrays}
 
 ## empty {#function-empty}
 
@@ -241,6 +241,12 @@ SELECT indexOf([1, 3, NULL, NULL], NULL)
 
 Elements set to `NULL` are handled as normal values.
 
+## arrayCount(\[func,\] arr1, …) {#array-count}
+
+Returns the number of elements in the arr array for which func returns something other than 0. If ‘func’ is not specified, it returns the number of non-zero elements in the array.
+
+Note that the `arrayCount` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. 
+
 ## countEqual(arr, x) {#countequalarr-x}
 
 Returns the number of elements in the array equal to x. Equivalent to arrayCount (elem -\> elem = x, arr).
@@ -568,7 +574,7 @@ SELECT arraySort([1, nan, 2, NULL, 3, nan, -4, NULL, inf, -inf]);
 -   `NaN` values are right before `NULL`.
 -   `Inf` values are right before `NaN`.
 
-Note that `arraySort` is a [higher-order function](../../sql-reference/functions/higher-order-functions.md). You can pass a lambda function to it as the first argument. In this case, sorting order is determined by the result of the lambda function applied to the elements of the array.
+Note that `arraySort` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. In this case, sorting order is determined by the result of the lambda function applied to the elements of the array.
 
 Let’s consider the following example:
 
@@ -668,7 +674,7 @@ SELECT arrayReverseSort([1, nan, 2, NULL, 3, nan, -4, NULL, inf, -inf]) as res;
 -   `NaN` values are right before `NULL`.
 -   `-Inf` values are right before `NaN`.
 
-Note that the `arrayReverseSort` is a [higher-order function](../../sql-reference/functions/higher-order-functions.md). You can pass a lambda function to it as the first argument. Example is shown below.
+Note that the `arrayReverseSort` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. Example is shown below.
 
 ``` sql
 SELECT arrayReverseSort((x) -> -x, [1, 2, 3]) as res;
@@ -1120,7 +1126,205 @@ Result:
 ``` text
 ┌─arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1])─┐
 │                                          0.75 │
-└────────────────────────────────────────---──┘
+└───────────────────────────────────────────────┘
 ```
 
+## arrayMap(func, arr1, …) {#array-map}
+
+Returns an array obtained from the original application of the `func` function to each element in the `arr` array.
+
+Examples:
+
+``` sql
+SELECT arrayMap(x -> (x + 2), [1, 2, 3]) as res;
+```
+
+``` text
+┌─res─────┐
+│ [3,4,5] │
+└─────────┘
+```
+
+The following example shows how to create a tuple of elements from different arrays:
+
+``` sql
+SELECT arrayMap((x, y) -> (x, y), [1, 2, 3], [4, 5, 6]) AS res
+```
+
+``` text
+┌─res─────────────────┐
+│ [(1,4),(2,5),(3,6)] │
+└─────────────────────┘
+```
+
+Note that the `arrayMap` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted.
+
+## arrayFilter(func, arr1, …) {#array-filter}
+
+Returns an array containing only the elements in `arr1` for which `func` returns something other than 0.
+
+Examples:
+
+``` sql
+SELECT arrayFilter(x -> x LIKE '%World%', ['Hello', 'abc World']) AS res
+```
+
+``` text
+┌─res───────────┐
+│ ['abc World'] │
+└───────────────┘
+```
+
+``` sql
+SELECT
+    arrayFilter(
+        (i, x) -> x LIKE '%World%',
+        arrayEnumerate(arr),
+        ['Hello', 'abc World'] AS arr)
+    AS res
+```
+
+``` text
+┌─res─┐
+│ [2] │
+└─────┘
+```
+
+Note that the `arrayFilter` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted.
+
+## arrayFill(func, arr1, …) {#array-fill}
+
+Scan through `arr1` from the first element to the last element and replace `arr1[i]` by `arr1[i - 1]` if `func` returns 0. The first element of `arr1` will not be replaced.
+
+Examples:
+
+``` sql
+SELECT arrayFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, null, null]) AS res
+```
+
+``` text
+┌─res──────────────────────────────┐
+│ [1,1,3,11,12,12,12,5,6,14,14,14] │
+└──────────────────────────────────┘
+```
+
+Note that the `arrayFill` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted.
+
+## arrayReverseFill(func, arr1, …) {#array-reverse-fill}
+
+Scan through `arr1` from the last element to the first element and replace `arr1[i]` by `arr1[i + 1]` if `func` returns 0. The last element of `arr1` will not be replaced.
+
+Examples:
+
+``` sql
+SELECT arrayReverseFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, null, null]) AS res
+```
+
+``` text
+┌─res────────────────────────────────┐
+│ [1,3,3,11,12,5,5,5,6,14,NULL,NULL] │
+└────────────────────────────────────┘
+```
+
+Note that the `arrayReverseFilter` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted.
+
+## arraySplit(func, arr1, …) {#array-split}
+
+Split `arr1` into multiple arrays. When `func` returns something other than 0, the array will be split on the left hand side of the element. The array will not be split before the first element.
+
+Examples:
+
+``` sql
+SELECT arraySplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res
+```
+
+``` text
+┌─res─────────────┐
+│ [[1,2,3],[4,5]] │
+└─────────────────┘
+```
+
+Note that the `arraySplit` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted.
+
+## arrayReverseSplit(func, arr1, …) {#array-reverse-split}
+
+Split `arr1` into multiple arrays. When `func` returns something other than 0, the array will be split on the right hand side of the element. The array will not be split after the last element.
+
+Examples:
+
+``` sql
+SELECT arrayReverseSplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res
+```
+
+``` text
+┌─res───────────────┐
+│ [[1],[2,3,4],[5]] │
+└───────────────────┘
+```
+
+Note that the `arrayReverseSplit` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted.
+
+## arrayExists(\[func,\] arr1, …) {#arrayexistsfunc-arr1}
+
+Returns 1 if there is at least one element in `arr` for which `func` returns something other than 0. Otherwise, it returns 0.
+
+Note that the `arrayExists` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
+
+## arrayAll(\[func,\] arr1, …) {#arrayallfunc-arr1}
+
+Returns 1 if `func` returns something other than 0 for all the elements in `arr`. Otherwise, it returns 0.
+
+Note that the `arrayAll` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
+
+## arrayFirst(func, arr1, …) {#array-first}
+
+Returns the first element in the `arr1` array for which `func` returns something other than 0.
+
+Note that the `arrayFirst` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted.
+
+## arrayFirstIndex(func, arr1, …) {#array-first-index}
+
+Returns the index of the first element in the `arr1` array for which `func` returns something other than 0.
+
+Note that the `arrayFirstIndex` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted.
+
+## arraySum(\[func,\] arr1, …) {#array-sum}
+
+Returns the sum of the `func` values. If the function is omitted, it just returns the sum of the array elements.
+
+Note that the `arraySum` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
+
+## arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1}
+
+Returns an array of partial sums of elements in the source array (a running sum). If the `func` function is specified, then the values of the array elements are converted by this function before summing.
+
+Example:
+
+``` sql
+SELECT arrayCumSum([1, 1, 1, 1]) AS res
+```
+
+``` text
+┌─res──────────┐
+│ [1, 2, 3, 4] │
+└──────────────┘
+```
+
+Note that the `arrayCumSum` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
+
+## arrayCumSumNonNegative(arr) {#arraycumsumnonnegativearr}
+
+Same as `arrayCumSum`, returns an array of partial sums of elements in the source array (a running sum). Different `arrayCumSum`, when then returned value contains a value less than zero, the value is replace with zero and the subsequent calculation is performed with zero parameters. For example:
+
+``` sql
+SELECT arrayCumSumNonNegative([1, 1, -4, 1]) AS res
+```
+
+``` text
+┌─res───────┐
+│ [1,2,0,1] │
+└───────────┘
+```
+Note that the `arraySumNonNegative` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
+
 [Original article](https://clickhouse.tech/docs/en/query_language/functions/array_functions/) <!--hide-->
diff --git a/docs/en/sql-reference/functions/higher-order-functions.md b/docs/en/sql-reference/functions/higher-order-functions.md
deleted file mode 100644
index 484bdaa12e6..00000000000
--- a/docs/en/sql-reference/functions/higher-order-functions.md
+++ /dev/null
@@ -1,262 +0,0 @@
----
-toc_priority: 57
-toc_title: Higher-Order
----
-
-# Higher-order Functions {#higher-order-functions}
-
-## `->` operator, lambda(params, expr) function {#operator-lambdaparams-expr-function}
-
-Allows describing a lambda function for passing to a higher-order function. The left side of the arrow has a formal parameter, which is any ID, or multiple formal parameters – any IDs in a tuple. The right side of the arrow has an expression that can use these formal parameters, as well as any table columns.
-
-Examples: `x -> 2 * x, str -> str != Referer.`
-
-Higher-order functions can only accept lambda functions as their functional argument.
-
-A lambda function that accepts multiple arguments can be passed to a higher-order function. In this case, the higher-order function is passed several arrays of identical length that these arguments will correspond to.
-
-For some functions, such as [arrayCount](#higher_order_functions-array-count) or [arraySum](#higher_order_functions-array-count), the first argument (the lambda function) can be omitted. In this case, identical mapping is assumed.
-
-A lambda function can’t be omitted for the following functions:
-
--   [arrayMap](#higher_order_functions-array-map)
--   [arrayFilter](#higher_order_functions-array-filter)
--   [arrayFill](#higher_order_functions-array-fill)
--   [arrayReverseFill](#higher_order_functions-array-reverse-fill)
--   [arraySplit](#higher_order_functions-array-split)
--   [arrayReverseSplit](#higher_order_functions-array-reverse-split)
--   [arrayFirst](#higher_order_functions-array-first)
--   [arrayFirstIndex](#higher_order_functions-array-first-index)
-
-### arrayMap(func, arr1, …) {#higher_order_functions-array-map}
-
-Returns an array obtained from the original application of the `func` function to each element in the `arr` array.
-
-Examples:
-
-``` sql
-SELECT arrayMap(x -> (x + 2), [1, 2, 3]) as res;
-```
-
-``` text
-┌─res─────┐
-│ [3,4,5] │
-└─────────┘
-```
-
-The following example shows how to create a tuple of elements from different arrays:
-
-``` sql
-SELECT arrayMap((x, y) -> (x, y), [1, 2, 3], [4, 5, 6]) AS res
-```
-
-``` text
-┌─res─────────────────┐
-│ [(1,4),(2,5),(3,6)] │
-└─────────────────────┘
-```
-
-Note that the first argument (lambda function) can’t be omitted in the `arrayMap` function.
-
-### arrayFilter(func, arr1, …) {#higher_order_functions-array-filter}
-
-Returns an array containing only the elements in `arr1` for which `func` returns something other than 0.
-
-Examples:
-
-``` sql
-SELECT arrayFilter(x -> x LIKE '%World%', ['Hello', 'abc World']) AS res
-```
-
-``` text
-┌─res───────────┐
-│ ['abc World'] │
-└───────────────┘
-```
-
-``` sql
-SELECT
-    arrayFilter(
-        (i, x) -> x LIKE '%World%',
-        arrayEnumerate(arr),
-        ['Hello', 'abc World'] AS arr)
-    AS res
-```
-
-``` text
-┌─res─┐
-│ [2] │
-└─────┘
-```
-
-Note that the first argument (lambda function) can’t be omitted in the `arrayFilter` function.
-
-### arrayFill(func, arr1, …) {#higher_order_functions-array-fill}
-
-Scan through `arr1` from the first element to the last element and replace `arr1[i]` by `arr1[i - 1]` if `func` returns 0. The first element of `arr1` will not be replaced.
-
-Examples:
-
-``` sql
-SELECT arrayFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, null, null]) AS res
-```
-
-``` text
-┌─res──────────────────────────────┐
-│ [1,1,3,11,12,12,12,5,6,14,14,14] │
-└──────────────────────────────────┘
-```
-
-Note that the first argument (lambda function) can’t be omitted in the `arrayFill` function.
-
-### arrayReverseFill(func, arr1, …) {#higher_order_functions-array-reverse-fill}
-
-Scan through `arr1` from the last element to the first element and replace `arr1[i]` by `arr1[i + 1]` if `func` returns 0. The last element of `arr1` will not be replaced.
-
-Examples:
-
-``` sql
-SELECT arrayReverseFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, null, null]) AS res
-```
-
-``` text
-┌─res────────────────────────────────┐
-│ [1,3,3,11,12,5,5,5,6,14,NULL,NULL] │
-└────────────────────────────────────┘
-```
-
-Note that the first argument (lambda function) can’t be omitted in the `arrayReverseFill` function.
-
-### arraySplit(func, arr1, …) {#higher_order_functions-array-split}
-
-Split `arr1` into multiple arrays. When `func` returns something other than 0, the array will be split on the left hand side of the element. The array will not be split before the first element.
-
-Examples:
-
-``` sql
-SELECT arraySplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res
-```
-
-``` text
-┌─res─────────────┐
-│ [[1,2,3],[4,5]] │
-└─────────────────┘
-```
-
-Note that the first argument (lambda function) can’t be omitted in the `arraySplit` function.
-
-### arrayReverseSplit(func, arr1, …) {#higher_order_functions-array-reverse-split}
-
-Split `arr1` into multiple arrays. When `func` returns something other than 0, the array will be split on the right hand side of the element. The array will not be split after the last element.
-
-Examples:
-
-``` sql
-SELECT arrayReverseSplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res
-```
-
-``` text
-┌─res───────────────┐
-│ [[1],[2,3,4],[5]] │
-└───────────────────┘
-```
-
-Note that the first argument (lambda function) can’t be omitted in the `arraySplit` function.
-
-### arrayCount(\[func,\] arr1, …) {#higher_order_functions-array-count}
-
-Returns the number of elements in the arr array for which func returns something other than 0. If ‘func’ is not specified, it returns the number of non-zero elements in the array.
-
-### arrayExists(\[func,\] arr1, …) {#arrayexistsfunc-arr1}
-
-Returns 1 if there is at least one element in ‘arr’ for which ‘func’ returns something other than 0. Otherwise, it returns 0.
-
-### arrayAll(\[func,\] arr1, …) {#arrayallfunc-arr1}
-
-Returns 1 if ‘func’ returns something other than 0 for all the elements in ‘arr’. Otherwise, it returns 0.
-
-### arraySum(\[func,\] arr1, …) {#higher-order-functions-array-sum}
-
-Returns the sum of the ‘func’ values. If the function is omitted, it just returns the sum of the array elements.
-
-### arrayFirst(func, arr1, …) {#higher_order_functions-array-first}
-
-Returns the first element in the ‘arr1’ array for which ‘func’ returns something other than 0.
-
-Note that the first argument (lambda function) can’t be omitted in the `arrayFirst` function.
-
-### arrayFirstIndex(func, arr1, …) {#higher_order_functions-array-first-index}
-
-Returns the index of the first element in the ‘arr1’ array for which ‘func’ returns something other than 0.
-
-Note that the first argument (lambda function) can’t be omitted in the `arrayFirstIndex` function.
-
-### arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1}
-
-Returns an array of partial sums of elements in the source array (a running sum). If the `func` function is specified, then the values of the array elements are converted by this function before summing.
-
-Example:
-
-``` sql
-SELECT arrayCumSum([1, 1, 1, 1]) AS res
-```
-
-``` text
-┌─res──────────┐
-│ [1, 2, 3, 4] │
-└──────────────┘
-```
-
-### arrayCumSumNonNegative(arr) {#arraycumsumnonnegativearr}
-
-Same as `arrayCumSum`, returns an array of partial sums of elements in the source array (a running sum). Different `arrayCumSum`, when then returned value contains a value less than zero, the value is replace with zero and the subsequent calculation is performed with zero parameters. For example:
-
-``` sql
-SELECT arrayCumSumNonNegative([1, 1, -4, 1]) AS res
-```
-
-``` text
-┌─res───────┐
-│ [1,2,0,1] │
-└───────────┘
-```
-
-### arraySort(\[func,\] arr1, …) {#arraysortfunc-arr1}
-
-Returns an array as result of sorting the elements of `arr1` in ascending order. If the `func` function is specified, sorting order is determined by the result of the function `func` applied to the elements of array (arrays)
-
-The [Schwartzian transform](https://en.wikipedia.org/wiki/Schwartzian_transform) is used to improve sorting efficiency.
-
-Example:
-
-``` sql
-SELECT arraySort((x, y) -> y, ['hello', 'world'], [2, 1]);
-```
-
-``` text
-┌─res────────────────┐
-│ ['world', 'hello'] │
-└────────────────────┘
-```
-
-For more information about the `arraySort` method, see the [Functions for Working With Arrays](../../sql-reference/functions/array-functions.md#array_functions-sort) section.
-
-### arrayReverseSort(\[func,\] arr1, …) {#arrayreversesortfunc-arr1}
-
-Returns an array as result of sorting the elements of `arr1` in descending order. If the `func` function is specified, sorting order is determined by the result of the function `func` applied to the elements of array (arrays).
-
-Example:
-
-``` sql
-SELECT arrayReverseSort((x, y) -> y, ['hello', 'world'], [2, 1]) as res;
-```
-
-``` text
-┌─res───────────────┐
-│ ['hello','world'] │
-└───────────────────┘
-```
-
-For more information about the `arrayReverseSort` method, see the [Functions for Working With Arrays](../../sql-reference/functions/array-functions.md#array_functions-reverse-sort) section.
-
-[Original article](https://clickhouse.tech/docs/en/query_language/functions/higher_order_functions/) <!--hide-->
diff --git a/docs/en/sql-reference/functions/index.md b/docs/en/sql-reference/functions/index.md
index 65514eff673..1a0b9d83b5f 100644
--- a/docs/en/sql-reference/functions/index.md
+++ b/docs/en/sql-reference/functions/index.md
@@ -44,6 +44,21 @@ Functions have the following behaviors:
 
 Functions can’t change the values of their arguments – any changes are returned as the result. Thus, the result of calculating separate functions does not depend on the order in which the functions are written in the query.
 
+## Higher-order functions, `->` operator and lambda(params, expr) function {#higher-order-functions}
+
+Higher-order functions can only accept lambda functions as their functional argument. To pass a lambda function to a higher-order function use `->` operator. The left side of the arrow has a formal parameter, which is any ID, or multiple formal parameters – any IDs in a tuple. The right side of the arrow has an expression that can use these formal parameters, as well as any table columns.
+
+Examples: 
+
+```
+x -> 2 * x
+str -> str != Referer
+```
+
+A lambda function that accepts multiple arguments can also be passed to a higher-order function. In this case, the higher-order function is passed several arrays of identical length that these arguments will correspond to.
+
+For some functions the first argument (the lambda function) can be omitted. In this case, identical mapping is assumed.
+
 ## Error Handling {#error-handling}
 
 Some functions might throw an exception if the data is invalid. In this case, the query is canceled and an error text is returned to the client. For distributed processing, when an exception occurs on one of the servers, the other servers also attempt to abort the query.
diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md
index 6848f74da1f..1fd39c704c5 100644
--- a/docs/en/sql-reference/functions/introspection.md
+++ b/docs/en/sql-reference/functions/introspection.md
@@ -98,7 +98,7 @@ LIMIT 1
 \G
 ```
 
-The [arrayMap](../../sql-reference/functions/higher-order-functions.md#higher_order_functions-array-map) function allows to process each individual element of the `trace` array by the `addressToLine` function. The result of this processing you see in the `trace_source_code_lines` column of output.
+The [arrayMap](../../sql-reference/functions/array-functions.md#array-map) function allows to process each individual element of the `trace` array by the `addressToLine` function. The result of this processing you see in the `trace_source_code_lines` column of output.
 
 ``` text
 Row 1:
@@ -184,7 +184,7 @@ LIMIT 1
 \G
 ```
 
-The [arrayMap](../../sql-reference/functions/higher-order-functions.md#higher_order_functions-array-map) function allows to process each individual element of the `trace` array by the `addressToSymbols` function. The result of this processing you see in the `trace_symbols` column of output.
+The [arrayMap](../../sql-reference/functions/array-functions.md#array-map) function allows to process each individual element of the `trace` array by the `addressToSymbols` function. The result of this processing you see in the `trace_symbols` column of output.
 
 ``` text
 Row 1:
@@ -281,7 +281,7 @@ LIMIT 1
 \G
 ```
 
-The [arrayMap](../../sql-reference/functions/higher-order-functions.md#higher_order_functions-array-map) function allows to process each individual element of the `trace` array by the `demangle` function. The result of this processing you see in the `trace_functions` column of output.
+The [arrayMap](../../sql-reference/functions/array-functions.md#array-map) function allows to process each individual element of the `trace` array by the `demangle` function. The result of this processing you see in the `trace_functions` column of output.
 
 ``` text
 Row 1:
diff --git a/docs/ru/operations/system-tables/stack_trace.md b/docs/ru/operations/system-tables/stack_trace.md
index 966a07633d8..0689e15c35c 100644
--- a/docs/ru/operations/system-tables/stack_trace.md
+++ b/docs/ru/operations/system-tables/stack_trace.md
@@ -82,7 +82,7 @@ res:       /lib/x86_64-linux-gnu/libc-2.27.so
 
 -   [Функции интроспекции](../../sql-reference/functions/introspection.md) — Что такое функции интроспекции и как их использовать.
 -   [system.trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) — Содержит трассировки стека, собранные профилировщиком выборочных запросов.
--   [arrayMap](../../sql-reference/functions/higher-order-functions.md#higher_order_functions-array-map) — Описание и пример использования функции `arrayMap`.
--   [arrayFilter](../../sql-reference/functions/higher-order-functions.md#higher_order_functions-array-filter) — Описание и пример использования функции `arrayFilter`.
+-   [arrayMap](../../sql-reference/functions/array-functions.md#array-map) — Описание и пример использования функции `arrayMap`.
+-   [arrayFilter](../../sql-reference/functions/array-functions.md#array-filter) — Описание и пример использования функции `arrayFilter`.
 
 [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/system_tables/stack_trace) <!--hide-->
diff --git a/docs/ru/sql-reference/data-types/tuple.md b/docs/ru/sql-reference/data-types/tuple.md
index 566a582eb95..0a1089d1aef 100644
--- a/docs/ru/sql-reference/data-types/tuple.md
+++ b/docs/ru/sql-reference/data-types/tuple.md
@@ -2,7 +2,7 @@
 
 Кортеж из элементов любого [типа](index.md#data_types). Элементы кортежа могут быть одного или разных типов.
 
-Кортежи используются для временной группировки столбцов. Столбцы могут группироваться при использовании выражения IN в запросе, а также для указания нескольких формальных параметров лямбда-функций. Подробнее смотрите разделы [Операторы IN](../../sql-reference/data-types/tuple.md), [Функции высшего порядка](../../sql-reference/functions/higher-order-functions.md#higher-order-functions).
+Кортежи используются для временной группировки столбцов. Столбцы могут группироваться при использовании выражения IN в запросе, а также для указания нескольких формальных параметров лямбда-функций. Подробнее смотрите разделы [Операторы IN](../../sql-reference/data-types/tuple.md), [Функции высшего порядка](../../sql-reference/functions/index.md#higher-order-functions).
 
 Кортежи могут быть результатом запроса. В этом случае, в текстовых форматах кроме JSON, значения выводятся в круглых скобках через запятую. В форматах JSON, кортежи выводятся в виде массивов (в квадратных скобках).
 
diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md
index cb1d179be47..91c0443c85d 100644
--- a/docs/ru/sql-reference/functions/array-functions.md
+++ b/docs/ru/sql-reference/functions/array-functions.md
@@ -1,4 +1,4 @@
-# Функции по работе с массивами {#funktsii-po-rabote-s-massivami}
+# Массивы {#functions-for-working-with-arrays}
 
 ## empty {#function-empty}
 
@@ -186,6 +186,13 @@ SELECT indexOf([1, 3, NULL, NULL], NULL)
 
 Элементы, равные `NULL`, обрабатываются как обычные значения.
 
+## arrayCount(\[func,\] arr1, …) {#array-count}
+
+Возвращает количество элементов массива `arr`, для которых функция `func` возвращает не 0. Если `func` не указана - возвращает количество ненулевых элементов массива.
+
+Функция `arrayCount` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей можно передать лямбда-функцию.
+
+
 ## countEqual(arr, x) {#countequalarr-x}
 
 Возвращает количество элементов массива, равных x. Эквивалентно arrayCount(elem -\> elem = x, arr).
@@ -513,7 +520,7 @@ SELECT arraySort([1, nan, 2, NULL, 3, nan, -4, NULL, inf, -inf]);
 -   Значения `NaN` идут перед `NULL`.
 -   Значения `Inf` идут перед `NaN`.
 
-Функция `arraySort` является [функцией высшего порядка](higher-order-functions.md) — в качестве первого аргумента ей можно передать лямбда-функцию. В этом случае порядок сортировки определяется результатом применения лямбда-функции на элементы массива.
+Функция `arraySort` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей можно передать лямбда-функцию. В этом случае порядок сортировки определяется результатом применения лямбда-функции на элементы массива.
 
 Рассмотрим пример:
 
@@ -613,7 +620,7 @@ SELECT arrayReverseSort([1, nan, 2, NULL, 3, nan, -4, NULL, inf, -inf]) as res;
 -   Значения `NaN` идут перед `NULL`.
 -   Значения `-Inf` идут перед `NaN`.
 
-Функция `arrayReverseSort` является [функцией высшего порядка](higher-order-functions.md). Вы можете передать ей в качестве первого аргумента лямбда-функцию. Например:
+Функция `arrayReverseSort` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей можно передать лямбда-функцию. Например:
 
 ``` sql
 SELECT arrayReverseSort((x) -> -x, [1, 2, 3]) as res;
@@ -1036,6 +1043,116 @@ SELECT arrayZip(['a', 'b', 'c'], [5, 2, 1])
 └──────────────────────────────────────┘
 ```
 
+## arrayMap(func, arr1, …) {#array-map}
+
+Возвращает массив, полученный на основе результатов применения функции `func` к каждому элементу массива `arr`.
+
+Примеры:
+
+``` sql
+SELECT arrayMap(x -> (x + 2), [1, 2, 3]) as res;
+```
+
+``` text
+┌─res─────┐
+│ [3,4,5] │
+└─────────┘
+```
+
+Следующий пример показывает, как создать кортежи из элементов разных массивов:
+
+``` sql
+SELECT arrayMap((x, y) -> (x, y), [1, 2, 3], [4, 5, 6]) AS res
+```
+
+``` text
+┌─res─────────────────┐
+│ [(1,4),(2,5),(3,6)] │
+└─────────────────────┘
+```
+
+Функция `arrayMap` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен.
+
+## arrayFilter(func, arr1, …) {#array-filter}
+
+Возвращает массив, содержащий только те элементы массива `arr1`, для которых функция `func` возвращает не 0.
+
+Примеры:
+
+``` sql
+SELECT arrayFilter(x -> x LIKE '%World%', ['Hello', 'abc World']) AS res
+```
+
+``` text
+┌─res───────────┐
+│ ['abc World'] │
+└───────────────┘
+```
+
+``` sql
+SELECT
+    arrayFilter(
+        (i, x) -> x LIKE '%World%',
+        arrayEnumerate(arr),
+        ['Hello', 'abc World'] AS arr)
+    AS res
+```
+
+``` text
+┌─res─┐
+│ [2] │
+└─────┘
+```
+
+Функция `arrayFilter` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен.
+
+## arrayExists(\[func,\] arr1, …) {#arrayexistsfunc-arr1}
+
+Возвращает 1, если существует хотя бы один элемент массива `arr`, для которого функция func возвращает не 0. Иначе возвращает 0.
+
+Функция `arrayExists` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) - в качестве первого аргумента ей можно передать лямбда-функцию.
+
+## arrayAll(\[func,\] arr1, …) {#arrayallfunc-arr1}
+
+Возвращает 1, если для всех элементов массива `arr`, функция `func` возвращает не 0. Иначе возвращает 0.
+
+Функция `arrayAll` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) - в качестве первого аргумента ей можно передать лямбда-функцию.
+
+## arrayFirst(func, arr1, …) {#array-first}
+
+Возвращает первый элемент массива `arr1`, для которого функция func возвращает не 0.
+
+Функция `arrayFirst` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен.
+
+## arrayFirstIndex(func, arr1, …) {#array-first-index}
+
+Возвращает индекс первого элемента массива `arr1`, для которого функция func возвращает не 0.
+
+Функция `arrayFirstIndex` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен.
+
+## arraySum(\[func,\] arr1, …) {#array-sum}
+
+Возвращает сумму значений функции `func`. Если функция не указана - просто возвращает сумму элементов массива.
+
+Функция `arraySum` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) - в качестве первого аргумента ей можно передать лямбда-функцию.
+
+## arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1}
+
+Возвращает массив из частичных сумм элементов исходного массива (сумма с накоплением). Если указана функция `func`, то значения элементов массива преобразуются этой функцией перед суммированием.
+
+Функция `arrayCumSum` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) - в качестве первого аргумента ей можно передать лямбда-функцию.
+
+Пример:
+
+``` sql
+SELECT arrayCumSum([1, 1, 1, 1]) AS res
+```
+
+``` text
+┌─res──────────┐
+│ [1, 2, 3, 4] │
+└──────────────┘
+
 ## arrayAUC {#arrayauc}
 
 Вычисляет площадь под кривой.
diff --git a/docs/ru/sql-reference/functions/higher-order-functions.md b/docs/ru/sql-reference/functions/higher-order-functions.md
deleted file mode 100644
index cd3dee5b1a7..00000000000
--- a/docs/ru/sql-reference/functions/higher-order-functions.md
+++ /dev/null
@@ -1,167 +0,0 @@
-# Функции высшего порядка {#higher-order-functions}
-
-## Оператор `->`, функция lambda(params, expr) {#operator-funktsiia-lambdaparams-expr}
-
-Позволяет описать лямбда-функцию для передачи в функцию высшего порядка. Слева от стрелочки стоит формальный параметр - произвольный идентификатор, или несколько формальных параметров - произвольные идентификаторы в кортеже. Справа от стрелочки стоит выражение, в котором могут использоваться эти формальные параметры, а также любые столбцы таблицы.
-
-Примеры: `x -> 2 * x, str -> str != Referer.`
-
-Функции высшего порядка, в качестве своего функционального аргумента могут принимать только лямбда-функции.
-
-В функции высшего порядка может быть передана лямбда-функция, принимающая несколько аргументов. В этом случае, в функцию высшего порядка передаётся несколько массивов одинаковых длин, которым эти аргументы будут соответствовать.
-
-Для некоторых функций, например [arrayCount](#higher_order_functions-array-count) или [arraySum](#higher_order_functions-array-sum), первый аргумент (лямбда-функция) может отсутствовать. В этом случае, подразумевается тождественное отображение.
-
-Для функций, перечисленных ниже, лямбда-функцию должна быть указана всегда:
-
--   [arrayMap](#higher_order_functions-array-map)
--   [arrayFilter](#higher_order_functions-array-filter)
--   [arrayFirst](#higher_order_functions-array-first)
--   [arrayFirstIndex](#higher_order_functions-array-first-index)
-
-### arrayMap(func, arr1, …) {#higher_order_functions-array-map}
-
-Вернуть массив, полученный на основе результатов применения функции `func` к каждому элементу массива `arr`.
-
-Примеры:
-
-``` sql
-SELECT arrayMap(x -> (x + 2), [1, 2, 3]) as res;
-```
-
-``` text
-┌─res─────┐
-│ [3,4,5] │
-└─────────┘
-```
-
-Следующий пример показывает, как создать кортежи из элементов разных массивов:
-
-``` sql
-SELECT arrayMap((x, y) -> (x, y), [1, 2, 3], [4, 5, 6]) AS res
-```
-
-``` text
-┌─res─────────────────┐
-│ [(1,4),(2,5),(3,6)] │
-└─────────────────────┘
-```
-
-Обратите внимание, что у функции `arrayMap` первый аргумент (лямбда-функция) не может быть опущен.
-
-### arrayFilter(func, arr1, …) {#higher_order_functions-array-filter}
-
-Вернуть массив, содержащий только те элементы массива `arr1`, для которых функция `func` возвращает не 0.
-
-Примеры:
-
-``` sql
-SELECT arrayFilter(x -> x LIKE '%World%', ['Hello', 'abc World']) AS res
-```
-
-``` text
-┌─res───────────┐
-│ ['abc World'] │
-└───────────────┘
-```
-
-``` sql
-SELECT
-    arrayFilter(
-        (i, x) -> x LIKE '%World%',
-        arrayEnumerate(arr),
-        ['Hello', 'abc World'] AS arr)
-    AS res
-```
-
-``` text
-┌─res─┐
-│ [2] │
-└─────┘
-```
-
-Обратите внимание, что у функции `arrayFilter` первый аргумент (лямбда-функция) не может быть опущен.
-
-### arrayCount(\[func,\] arr1, …) {#higher_order_functions-array-count}
-
-Вернуть количество элементов массива `arr`, для которых функция func возвращает не 0. Если func не указана - вернуть количество ненулевых элементов массива.
-
-### arrayExists(\[func,\] arr1, …) {#arrayexistsfunc-arr1}
-
-Вернуть 1, если существует хотя бы один элемент массива `arr`, для которого функция func возвращает не 0. Иначе вернуть 0.
-
-### arrayAll(\[func,\] arr1, …) {#arrayallfunc-arr1}
-
-Вернуть 1, если для всех элементов массива `arr`, функция `func` возвращает не 0. Иначе вернуть 0.
-
-### arraySum(\[func,\] arr1, …) {#higher_order_functions-array-sum}
-
-Вернуть сумму значений функции `func`. Если функция не указана - просто вернуть сумму элементов массива.
-
-### arrayFirst(func, arr1, …) {#higher_order_functions-array-first}
-
-Вернуть первый элемент массива `arr1`, для которого функция func возвращает не 0.
-
-Обратите внимание, что у функции `arrayFirst` первый аргумент (лямбда-функция) не может быть опущен.
-
-### arrayFirstIndex(func, arr1, …) {#higher_order_functions-array-first-index}
-
-Вернуть индекс первого элемента массива `arr1`, для которого функция func возвращает не 0.
-
-Обратите внимание, что у функции `arrayFirstFilter` первый аргумент (лямбда-функция) не может быть опущен.
-
-### arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1}
-
-Возвращает массив из частичных сумм элементов исходного массива (сумма с накоплением). Если указана функция `func`, то значения элементов массива преобразуются этой функцией перед суммированием.
-
-Пример:
-
-``` sql
-SELECT arrayCumSum([1, 1, 1, 1]) AS res
-```
-
-``` text
-┌─res──────────┐
-│ [1, 2, 3, 4] │
-└──────────────┘
-```
-
-### arraySort(\[func,\] arr1, …) {#arraysortfunc-arr1}
-
-Возвращает отсортированный в восходящем порядке массив `arr1`. Если задана функция `func`, то порядок сортировки определяется результатом применения функции `func` на элементы массива (массивов).
-
-Для улучшения эффективности сортировки применяется [Преобразование Шварца](https://ru.wikipedia.org/wiki/%D0%9F%D1%80%D0%B5%D0%BE%D0%B1%D1%80%D0%B0%D0%B7%D0%BE%D0%B2%D0%B0%D0%BD%D0%B8%D0%B5_%D0%A8%D0%B2%D0%B0%D1%80%D1%86%D0%B0).
-
-Пример:
-
-``` sql
-SELECT arraySort((x, y) -> y, ['hello', 'world'], [2, 1]);
-```
-
-``` text
-┌─res────────────────┐
-│ ['world', 'hello'] │
-└────────────────────┘
-```
-
-Подробная информация о методе `arraySort` приведена в разделе [Функции по работе с массивами](array-functions.md#array_functions-sort).
-
-### arrayReverseSort(\[func,\] arr1, …) {#arrayreversesortfunc-arr1}
-
-Возвращает отсортированный в нисходящем порядке массив `arr1`. Если задана функция `func`, то порядок сортировки определяется результатом применения функции `func` на элементы массива (массивов).
-
-Пример:
-
-``` sql
-SELECT arrayReverseSort((x, y) -> y, ['hello', 'world'], [2, 1]) as res;
-```
-
-``` text
-┌─res───────────────┐
-│ ['hello','world'] │
-└───────────────────┘
-```
-
-Подробная информация о методе `arrayReverseSort` приведена в разделе [Функции по работе с массивами](array-functions.md#array_functions-reverse-sort).
-
-[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/higher_order_functions/) <!--hide-->
diff --git a/docs/ru/sql-reference/functions/index.md b/docs/ru/sql-reference/functions/index.md
index 06d3d892cf9..9c1c0c5ca9d 100644
--- a/docs/ru/sql-reference/functions/index.md
+++ b/docs/ru/sql-reference/functions/index.md
@@ -38,6 +38,20 @@
 
 Функции не могут поменять значения своих аргументов - любые изменения возвращаются в качестве результата. Соответственно, от порядка записи функций в запросе, результат вычислений отдельных функций не зависит.
 
+## Функции высшего порядка, оператор `->` и функция lambda(params, expr)  {#higher-order-functions}
+
+Функции высшего порядка, в качестве своего функционального аргумента могут принимать только лямбда-функции. Чтобы передать лямбда-функцию в функцию высшего порядка, используйте оператор `->`. Слева от стрелочки стоит формальный параметр — произвольный идентификатор, или несколько формальных параметров — произвольные идентификаторы в кортеже. Справа от стрелочки стоит выражение, в котором могут использоваться эти формальные параметры, а также любые столбцы таблицы.
+
+Примеры: 
+```
+x -> 2 * x 
+str -> str != Referer
+```
+
+В функции высшего порядка может быть передана лямбда-функция, принимающая несколько аргументов. В этом случае в функцию высшего порядка передаётся несколько массивов одинаковой длины, которым эти аргументы будут соответствовать.
+
+Для некоторых функций первый аргумент (лямбда-функция) может отсутствовать. В этом случае подразумевается тождественное отображение.
+
 ## Обработка ошибок {#obrabotka-oshibok}
 
 Некоторые функции могут кидать исключения в случае ошибочных данных. В этом случае, выполнение запроса прерывается, и текст ошибки выводится клиенту. При распределённой обработке запроса, при возникновении исключения на одном из серверов, на другие серверы пытается отправиться просьба тоже прервать выполнение запроса.
diff --git a/docs/ru/sql-reference/functions/introspection.md b/docs/ru/sql-reference/functions/introspection.md
index 9c6a0711ec9..655c4be8318 100644
--- a/docs/ru/sql-reference/functions/introspection.md
+++ b/docs/ru/sql-reference/functions/introspection.md
@@ -93,7 +93,7 @@ LIMIT 1
 \G
 ```
 
-Функция [arrayMap](higher-order-functions.md#higher_order_functions-array-map) позволяет обрабатывать каждый отдельный элемент массива `trace` с помощью функции `addressToLine`. Результат этой обработки вы видите в виде `trace_source_code_lines` колонки выходных данных.
+Функция [arrayMap](../../sql-reference/functions/array-functions.md#array-map) позволяет обрабатывать каждый отдельный элемент массива `trace` с помощью функции `addressToLine`. Результат этой обработки вы видите в виде `trace_source_code_lines` колонки выходных данных.
 
 ``` text
 Row 1:
@@ -179,7 +179,7 @@ LIMIT 1
 \G
 ```
 
-То [arrayMap](higher-order-functions.md#higher_order_functions-array-map) функция позволяет обрабатывать каждый отдельный элемент системы. `trace` массив по типу `addressToSymbols` функция. Результат этой обработки вы видите в виде `trace_symbols` колонка выходных данных.
+То [arrayMap](../../sql-reference/functions/array-functions.md#array-map) функция позволяет обрабатывать каждый отдельный элемент системы. `trace` массив по типу `addressToSymbols` функция. Результат этой обработки вы видите в виде `trace_symbols` колонка выходных данных.
 
 ``` text
 Row 1:
@@ -276,7 +276,7 @@ LIMIT 1
 \G
 ```
 
-Функция [arrayMap](higher-order-functions.md#higher_order_functions-array-map) позволяет обрабатывать каждый отдельный элемент массива `trace` с помощью функции `demangle`.
+Функция [arrayMap](../../sql-reference/functions/array-functions.md#array-map) позволяет обрабатывать каждый отдельный элемент массива `trace` с помощью функции `demangle`.
 
 ``` text
 Row 1:

From a419267dc6c5ad37ae30f4c7251db25f92311c61 Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Tue, 8 Sep 2020 19:28:49 +0300
Subject: [PATCH 145/298] minor fixes

---
 src/Compression/CompressionCodecDelta.cpp         |  1 -
 src/IO/ReadBuffer.h                               |  5 +++++
 .../MergeTree/MergeTreeDataPartWriterCompact.cpp  | 11 ++++++++++-
 src/Storages/MergeTree/checkDataPart.cpp          |  4 ++--
 .../01375_compact_parts_codecs.reference          |  3 +++
 .../0_stateless/01375_compact_parts_codecs.sql    | 15 +++++++++++++++
 6 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/src/Compression/CompressionCodecDelta.cpp b/src/Compression/CompressionCodecDelta.cpp
index dc866e527d6..ecb7c36b205 100644
--- a/src/Compression/CompressionCodecDelta.cpp
+++ b/src/Compression/CompressionCodecDelta.cpp
@@ -39,7 +39,6 @@ ASTPtr CompressionCodecDelta::getCodecDesc() const
 void CompressionCodecDelta::updateHash(SipHash & hash) const
 {
     getCodecDesc()->updateTreeHash(hash);
-    hash.update(delta_bytes_size);
 }
 
 namespace
diff --git a/src/IO/ReadBuffer.h b/src/IO/ReadBuffer.h
index a35e5206e49..3d6eb6970ce 100644
--- a/src/IO/ReadBuffer.h
+++ b/src/IO/ReadBuffer.h
@@ -123,6 +123,11 @@ public:
         return bytes_ignored;
     }
 
+    void ignoreAll()
+    {
+        tryIgnore(std::numeric_limits<size_t>::max());
+    }
+
     /** Reads a single byte. */
     bool ALWAYS_INLINE read(char & c)
     {
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
index ab064689f47..9c3325d3d5a 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
@@ -112,8 +112,11 @@ void MergeTreeDataPartWriterCompact::writeBlock(const Block & block)
         {
             auto & stream = compressed_streams[i];
 
+            /// Offset should be 0, because compressed block is written for every granule.
+            assert(stream->hashing_buf.offset() == 0);
+
             writeIntBinary(plain_hashing.count(), marks);
-            writeIntBinary(stream->hashing_buf.offset(), marks);
+            writeIntBinary(UInt64(0), marks);
 
             writeColumnSingleGranule(block.getByName(name_and_type->name), stream, current_row, rows_to_write);
 
@@ -162,6 +165,12 @@ void MergeTreeDataPartWriterCompact::finishDataSerialization(IMergeTreeDataPart:
     if (columns_buffer.size() != 0)
         writeBlock(header.cloneWithColumns(columns_buffer.releaseColumns()));
 
+#ifndef NDEBUG
+    /// Offsets should be 0, because compressed block is written for every granule.
+    for (const auto & [_, stream] : streams_by_codec)
+        assert(stream->hashing_buf.offset() == 0);
+#endif
+
     if (with_final_mark && data_written)
     {
         for (size_t i = 0; i < columns_list.size(); ++i)
diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp
index 9d7e5315474..63b061b9702 100644
--- a/src/Storages/MergeTree/checkDataPart.cpp
+++ b/src/Storages/MergeTree/checkDataPart.cpp
@@ -89,7 +89,7 @@ IMergeTreeDataPart::Checksums checkDataPart(
         CompressedReadBuffer uncompressing_buf(compressed_hashing_buf);
         HashingReadBuffer uncompressed_hashing_buf(uncompressing_buf);
 
-        uncompressed_hashing_buf.tryIgnore(std::numeric_limits<size_t>::max());
+        uncompressed_hashing_buf.ignoreAll();
         return IMergeTreeDataPart::Checksums::Checksum
         {
             compressed_hashing_buf.count(), compressed_hashing_buf.getHash(),
@@ -102,7 +102,7 @@ IMergeTreeDataPart::Checksums checkDataPart(
     {
         auto file_buf = disk_->readFile(file_path);
         HashingReadBuffer hashing_buf(*file_buf);
-        hashing_buf.tryIgnore(std::numeric_limits<size_t>::max());
+        hashing_buf.ignoreAll();
         return IMergeTreeDataPart::Checksums::Checksum{hashing_buf.count(), hashing_buf.getHash()};
     };
 
diff --git a/tests/queries/0_stateless/01375_compact_parts_codecs.reference b/tests/queries/0_stateless/01375_compact_parts_codecs.reference
index 24b3e22d9a6..b48892597b6 100644
--- a/tests/queries/0_stateless/01375_compact_parts_codecs.reference
+++ b/tests/queries/0_stateless/01375_compact_parts_codecs.reference
@@ -1,6 +1,9 @@
 12000	11890
 499500	499500	999
+499500	499500	999
 11965	11890
 499500	499500	999
+499500	499500	999
 5858	11890
 499500	499500	999
+499500	499500	999
diff --git a/tests/queries/0_stateless/01375_compact_parts_codecs.sql b/tests/queries/0_stateless/01375_compact_parts_codecs.sql
index 4b285f5bcc1..698f4148a15 100644
--- a/tests/queries/0_stateless/01375_compact_parts_codecs.sql
+++ b/tests/queries/0_stateless/01375_compact_parts_codecs.sql
@@ -10,6 +10,11 @@ SELECT sum(data_compressed_bytes), sum(data_uncompressed_bytes)
 
 SELECT sum(id), sum(val), max(s) FROM codecs;
 
+DETACH TABLE codecs;
+ATTACH table codecs;
+
+SELECT sum(id), sum(val), max(s) FROM codecs;
+
 DROP TABLE codecs;
 
 CREATE TABLE codecs (id UInt32 CODEC(NONE), val UInt32 CODEC(NONE), s String CODEC(NONE)) 
@@ -22,6 +27,11 @@ SELECT sum(data_compressed_bytes), sum(data_uncompressed_bytes)
 
 SELECT sum(id), sum(val), max(s) FROM codecs;
 
+DETACH TABLE codecs;
+ATTACH table codecs;
+
+SELECT sum(id), sum(val), max(s) FROM codecs;
+
 DROP TABLE codecs;
 
 CREATE TABLE codecs (id UInt32, val UInt32 CODEC(Delta, ZSTD), s String CODEC(ZSTD)) 
@@ -34,4 +44,9 @@ SELECT sum(data_compressed_bytes), sum(data_uncompressed_bytes)
 
 SELECT sum(id), sum(val), max(s) FROM codecs;
 
+DETACH TABLE codecs;
+ATTACH table codecs;
+
+SELECT sum(id), sum(val), max(s) FROM codecs;
+
 DROP TABLE codecs;

From 53572b3783a8ed842ed255d926d8529dd981e1e5 Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Tue, 8 Sep 2020 19:37:16 +0300
Subject: [PATCH 146/298] add gitkeep

---
 .../test_dictionaries_redis/configs/dictionaries/.gitkeep        | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 tests/integration/test_dictionaries_redis/configs/dictionaries/.gitkeep

diff --git a/tests/integration/test_dictionaries_redis/configs/dictionaries/.gitkeep b/tests/integration/test_dictionaries_redis/configs/dictionaries/.gitkeep
new file mode 100644
index 00000000000..c693f138c81
--- /dev/null
+++ b/tests/integration/test_dictionaries_redis/configs/dictionaries/.gitkeep
@@ -0,0 +1 @@
+keep
\ No newline at end of file

From fe0507663bf22e32f5327c7ba3faf111cda221a1 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Tue, 8 Sep 2020 22:40:22 +0300
Subject: [PATCH 147/298] Remove useless line.

---
 src/Columns/ColumnLowCardinality.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h
index e3b879d6dd5..275292d2d72 100644
--- a/src/Columns/ColumnLowCardinality.h
+++ b/src/Columns/ColumnLowCardinality.h
@@ -170,7 +170,6 @@ public:
     size_t sizeOfValueIfFixed() const override { return getDictionary().sizeOfValueIfFixed(); }
     bool isNumeric() const override { return getDictionary().isNumeric(); }
     bool lowCardinality() const override { return true; }
-    bool isNullable() const override { return isColumnNullable(*dictionary.getColumnUniquePtr()); }
 
     const IColumnUnique & getDictionary() const { return dictionary.getColumnUnique(); }
     const ColumnPtr & getDictionaryPtr() const { return dictionary.getColumnUniquePtr(); }

From 78eac658b0d3250c10e177f38a88cc32ef102b3a Mon Sep 17 00:00:00 2001
From: myrrc <me@myrrec.space>
Date: Tue, 8 Sep 2020 22:51:44 +0300
Subject: [PATCH 148/298] adding correct LC nested nullability checker

---
 src/Columns/ColumnLowCardinality.h | 3 ++-
 src/Functions/array/arrayIndex.h   | 8 +++++---
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h
index e3b879d6dd5..00f58a133cf 100644
--- a/src/Columns/ColumnLowCardinality.h
+++ b/src/Columns/ColumnLowCardinality.h
@@ -170,7 +170,8 @@ public:
     size_t sizeOfValueIfFixed() const override { return getDictionary().sizeOfValueIfFixed(); }
     bool isNumeric() const override { return getDictionary().isNumeric(); }
     bool lowCardinality() const override { return true; }
-    bool isNullable() const override { return isColumnNullable(*dictionary.getColumnUniquePtr()); }
+
+    bool nestedIsNullable() const { return isColumnNullable(*dictionary.getColumnUnique().getNestedColumn()); }
 
     const IColumnUnique & getDictionary() const { return dictionary.getColumnUnique(); }
     const ColumnPtr & getDictionaryPtr() const { return dictionary.getColumnUniquePtr(); }
diff --git a/src/Functions/array/arrayIndex.h b/src/Functions/array/arrayIndex.h
index b12372d28ce..f96eb09c861 100644
--- a/src/Functions/array/arrayIndex.h
+++ b/src/Functions/array/arrayIndex.h
@@ -1,5 +1,3 @@
-#include <optional>
-#include <type_traits>
 #include <Functions/IFunctionImpl.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionHelpers.h>
@@ -741,6 +739,10 @@ private:
         if (!col_lc)
             return false;
 
+//        assert(checkAndGetColumn<ColumnNullable>(col_lc->getDictionaryPtr().get()));
+//        assert(col_lc->isNullable());
+//        assert(isColumnNullable(*col_lc->getDictionaryPtr().get()));
+
         const auto [null_map_data, null_map_item] = getNullMaps(block, arguments);
 
         const IColumn& col_arg = *block.getByPosition(arguments[1]).column.get();
@@ -799,7 +801,7 @@ private:
             block.getByPosition(result).column = std::move(col_result);
             return true;
         }
-        else if (col_lc->getDictionaryPtr()->isNullable()) // LC(Nullable(T)) and U
+        else if (col_lc->nestedIsNullable()) // LC(Nullable(T)) and U
         {
             const ColumnPtr left_casted = col_lc->convertToFullColumnIfLowCardinality(); // Nullable(T)
             const ColumnNullable& left_nullable = *checkAndGetColumn<ColumnNullable>(left_casted.get());

From 4f1321daef3af1a4eac14d7c0d33f925e6bb5557 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 8 Sep 2020 23:05:27 +0300
Subject: [PATCH 149/298] Make
 00443_merge_tree_uniform_read_distribution_0.reference real file (was
 symlink)

---
 .../00443_merge_tree_uniform_read_distribution_0.reference    | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
 mode change 120000 => 100644 tests/queries/0_stateless/00443_merge_tree_uniform_read_distribution_0.reference

diff --git a/tests/queries/0_stateless/00443_merge_tree_uniform_read_distribution_0.reference b/tests/queries/0_stateless/00443_merge_tree_uniform_read_distribution_0.reference
deleted file mode 120000
index 4d2577d02a3..00000000000
--- a/tests/queries/0_stateless/00443_merge_tree_uniform_read_distribution_0.reference
+++ /dev/null
@@ -1 +0,0 @@
-00443_optimize_final_vertical_merge.reference
\ No newline at end of file
diff --git a/tests/queries/0_stateless/00443_merge_tree_uniform_read_distribution_0.reference b/tests/queries/0_stateless/00443_merge_tree_uniform_read_distribution_0.reference
new file mode 100644
index 00000000000..bb6e92ae8e7
--- /dev/null
+++ b/tests/queries/0_stateless/00443_merge_tree_uniform_read_distribution_0.reference
@@ -0,0 +1,3 @@
+1500000	1500000	1500000	1500000	1500000	1500000
+[['def']]	[['','']]
+0

From 014c7c02bdc454a4b53c44d1525c9932906d07ca Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 8 Sep 2020 23:34:10 +0300
Subject: [PATCH 150/298] Fix some trailing whitespaces in query format

The following statements still has the trailing whitespace:
- WITH
- SELECT
- SELECT DISTINCT
- ARRAY JOIN
- GROUP BY
- ORDER BY
- LIMIT BY
---
 src/Parsers/ASTExpressionList.cpp      |  9 +++++++++
 src/Parsers/ASTFunction.cpp            |  1 +
 src/Parsers/ASTSelectQuery.cpp         | 12 +++++++-----
 src/Parsers/ASTTablesInSelectQuery.cpp |  5 ++++-
 src/Parsers/IAST.h                     |  1 +
 5 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/src/Parsers/ASTExpressionList.cpp b/src/Parsers/ASTExpressionList.cpp
index abab1e895cf..de38e1fd7ea 100644
--- a/src/Parsers/ASTExpressionList.cpp
+++ b/src/Parsers/ASTExpressionList.cpp
@@ -13,6 +13,9 @@ ASTPtr ASTExpressionList::clone() const
 
 void ASTExpressionList::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
 {
+    if (frame.expression_list_prepend_whitespace)
+        settings.ostr << ' ';
+
     for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it)
     {
         if (it != children.begin())
@@ -30,6 +33,12 @@ void ASTExpressionList::formatImplMultiline(const FormatSettings & settings, For
 {
     std::string indent_str = "\n" + std::string(4 * (frame.indent + 1), ' ');
 
+    if (frame.expression_list_prepend_whitespace)
+    {
+        if (!(children.size() > 1 || frame.expression_list_always_start_on_new_line))
+            settings.ostr << ' ';
+    }
+
     ++frame.indent;
     for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it)
     {
diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp
index 07429c8104f..ebef4261d01 100644
--- a/src/Parsers/ASTFunction.cpp
+++ b/src/Parsers/ASTFunction.cpp
@@ -114,6 +114,7 @@ static bool highlightStringLiteralWithMetacharacters(const ASTPtr & node, const
 
 void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
 {
+    frame.expression_list_prepend_whitespace = false;
     FormatStateStacked nested_need_parens = frame;
     FormatStateStacked nested_dont_need_parens = frame;
     nested_need_parens.need_parens = true;
diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp
index fdc7bd47e4d..499761c4634 100644
--- a/src/Parsers/ASTSelectQuery.cpp
+++ b/src/Parsers/ASTSelectQuery.cpp
@@ -72,18 +72,20 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F
 {
     frame.current_select = this;
     frame.need_parens = false;
+    frame.expression_list_prepend_whitespace = true;
+
     std::string indent_str = s.one_line ? "" : std::string(4 * frame.indent, ' ');
 
     if (with())
     {
-        s.ostr << (s.hilite ? hilite_keyword : "") << indent_str << "WITH " << (s.hilite ? hilite_none : "");
+        s.ostr << (s.hilite ? hilite_keyword : "") << indent_str << "WITH" << (s.hilite ? hilite_none : "");
         s.one_line
             ? with()->formatImpl(s, state, frame)
             : with()->as<ASTExpressionList &>().formatImplMultiline(s, state, frame);
         s.ostr << s.nl_or_ws;
     }
 
-    s.ostr << (s.hilite ? hilite_keyword : "") << indent_str << "SELECT " << (distinct ? "DISTINCT " : "") << (s.hilite ? hilite_none : "");
+    s.ostr << (s.hilite ? hilite_keyword : "") << indent_str << "SELECT" << (distinct ? " DISTINCT" : "") << (s.hilite ? hilite_none : "");
 
     s.one_line
         ? select()->formatImpl(s, state, frame)
@@ -109,7 +111,7 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F
 
     if (groupBy())
     {
-        s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "GROUP BY " << (s.hilite ? hilite_none : "");
+        s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "GROUP BY" << (s.hilite ? hilite_none : "");
         s.one_line
             ? groupBy()->formatImpl(s, state, frame)
             : groupBy()->as<ASTExpressionList &>().formatImplMultiline(s, state, frame);
@@ -132,7 +134,7 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F
 
     if (orderBy())
     {
-        s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "ORDER BY " << (s.hilite ? hilite_none : "");
+        s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "ORDER BY" << (s.hilite ? hilite_none : "");
         s.one_line
             ? orderBy()->formatImpl(s, state, frame)
             : orderBy()->as<ASTExpressionList &>().formatImplMultiline(s, state, frame);
@@ -147,7 +149,7 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F
             s.ostr << ", ";
         }
         limitByLength()->formatImpl(s, state, frame);
-        s.ostr << (s.hilite ? hilite_keyword : "") << " BY " << (s.hilite ? hilite_none : "");
+        s.ostr << (s.hilite ? hilite_keyword : "") << " BY" << (s.hilite ? hilite_none : "");
         s.one_line
             ? limitBy()->formatImpl(s, state, frame)
             : limitBy()->as<ASTExpressionList &>().formatImplMultiline(s, state, frame);
diff --git a/src/Parsers/ASTTablesInSelectQuery.cpp b/src/Parsers/ASTTablesInSelectQuery.cpp
index 0fd93bbd04d..eb3446ca1c4 100644
--- a/src/Parsers/ASTTablesInSelectQuery.cpp
+++ b/src/Parsers/ASTTablesInSelectQuery.cpp
@@ -210,6 +210,7 @@ void ASTTableJoin::formatImplBeforeTable(const FormatSettings & settings, Format
 void ASTTableJoin::formatImplAfterTable(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
 {
     frame.need_parens = false;
+    frame.expression_list_prepend_whitespace = false;
 
     if (using_expression_list)
     {
@@ -236,8 +237,10 @@ void ASTTableJoin::formatImpl(const FormatSettings & settings, FormatState & sta
 
 void ASTArrayJoin::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
 {
+    frame.expression_list_prepend_whitespace = true;
+
     settings.ostr << (settings.hilite ? hilite_keyword : "")
-        << (kind == Kind::Left ? "LEFT " : "") << "ARRAY JOIN " << (settings.hilite ? hilite_none : "");
+        << (kind == Kind::Left ? "LEFT " : "") << "ARRAY JOIN" << (settings.hilite ? hilite_none : "");
 
     settings.one_line
         ? expression_list->formatImpl(settings, state, frame)
diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h
index c0c286ac0d2..cf6a7efc102 100644
--- a/src/Parsers/IAST.h
+++ b/src/Parsers/IAST.h
@@ -203,6 +203,7 @@ public:
         UInt8 indent = 0;
         bool need_parens = false;
         bool expression_list_always_start_on_new_line = false;  /// Line feed and indent before expression list even if it's of single element.
+        bool expression_list_prepend_whitespace = false; /// Prepend whitespace (if it is required)
         const IAST * current_select = nullptr;
     };
 

From 03247707737af0f76cacdd6b8b4544e1967f7ed9 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 8 Sep 2020 23:05:47 +0300
Subject: [PATCH 151/298] Remove all trailing whitespaces in tests

sed -r -i \
    -e 's/SELECT $/SELECT/' \
    -e 's/SELECT DISTINCT $/SELECT DISTINCT/' \
    -e 's/WITH $/WITH/' \ # zero matches, new test will be added
    -e 's/ARRAY JOIN $/ARRAY JOIN/' \
    -e 's/GROUP BY $/GROUP BY/' \
    -e 's/ORDER BY $/ORDER BY/' \
    -e 's/LIMIT ([0-9]+) BY $/LIMIT \1 BY/' \ # zero matches, new test will be added
    tests/queries/*/*.sql \
    tests/queries/*/*.reference

(With except for tests/queries/0_stateless/00751_default_databasename_for_view.reference)
---
 .../00239_type_conversion_in_in.sql           |   2 +-
 .../0_stateless/00250_tuple_comparison.sql    |   2 +-
 .../00344_row_number_in_all_blocks.sql        |   2 +-
 .../0_stateless/00436_convert_charset.sql     |   4 +-
 .../00541_to_start_of_fifteen_minutes.sql     |   4 +-
 .../00552_logical_functions_uint8_as_bool.sql |   2 +-
 ...ll_subquery_aggregation_column_removal.sql |  72 +++++-----
 .../00597_push_down_predicate.reference       | 128 +++++++++---------
 .../queries/0_stateless/00618_nullable_in.sql |   4 +-
 .../00712_prewhere_with_alias_bug_2.sql       |   2 +-
 .../00740_optimize_predicate_expression.sql   |   2 +-
 .../00743_limit_by_not_found_column.sql       |   2 +-
 ...51_default_databasename_for_view.reference |   2 +-
 tests/queries/0_stateless/00759_kodieg.sql    |   2 +-
 .../00826_cross_to_inner_join.reference       |  16 +--
 .../00849_multiple_comma_join.reference       |  40 +++---
 .../00849_multiple_comma_join_2.reference     |  30 ++--
 .../0_stateless/00908_analyze_query.reference |   2 +-
 .../0_stateless/00941_to_custom_week.sql      |   2 +-
 ...0957_format_with_clashed_aliases.reference |   2 +-
 ...58_format_of_tuple_array_element.reference |   2 +-
 ...hecksums_in_system_parts_columns_table.sql |   2 +-
 .../01056_predicate_optimizer_bugs.reference  |  26 ++--
 ...76_predicate_optimizer_with_view.reference |   4 +-
 .../01083_cross_to_inner_with_like.reference  |   6 +-
 .../0_stateless/01117_chain_finalize_bug.sql  |   2 +-
 .../01250_fixed_string_comparison.sql         |   2 +-
 ...ithmetic_operations_in_aggr_func.reference |  84 ++++++------
 .../01272_totals_and_filter_bug.sql           |   6 +-
 .../01278_format_multiple_queries.reference   |   4 +-
 .../01300_group_by_other_keys.reference       |  18 +--
 ...01300_group_by_other_keys_having.reference |   8 +-
 ...egate_functions_of_group_by_keys.reference |  28 ++--
 ...monotonous_functions_in_order_by.reference |  12 +-
 .../01322_any_input_optimize.reference        |   4 +-
 ..._redundant_functions_in_order_by.reference |  30 ++--
 .../01355_defaultValueOfArgumentType_bug.sql  |   2 +-
 .../01372_wrong_order_by_removal.reference    |   6 +-
 .../01379_with_fill_several_columns.sql       |   8 +-
 .../01390_remove_injective_in_uniq.reference  |  28 ++--
 ...dicate_when_contains_with_clause.reference |   4 +-
 .../0_stateless/01418_index_analysis_bug.sql  |   4 +-
 ..._duplicate_distinct_optimization.reference |  28 ++--
 .../01470_columns_transformers.reference      |  22 +--
 .../1_stateful/00063_loyalty_joins.sql        |  20 +--
 45 files changed, 341 insertions(+), 341 deletions(-)

diff --git a/tests/queries/0_stateless/00239_type_conversion_in_in.sql b/tests/queries/0_stateless/00239_type_conversion_in_in.sql
index 6e76a31ac56..5589d91ce74 100644
--- a/tests/queries/0_stateless/00239_type_conversion_in_in.sql
+++ b/tests/queries/0_stateless/00239_type_conversion_in_in.sql
@@ -2,7 +2,7 @@ select 1 as x, x = 1 or x = 2 or x = 3 or x = -1;
 select 1 as x, x = 1.0 or x = 2 or x = 3 or x = -1;
 select 1 as x, x = 1.5 or x = 2 or x = 3 or x = -1;
 
-SELECT 
+SELECT
     1 IN (1, -1, 2.0, 2.5), 
     1.0 IN (1, -1, 2.0, 2.5), 
     1 IN (1.0, -1, 2.0, 2.5),
diff --git a/tests/queries/0_stateless/00250_tuple_comparison.sql b/tests/queries/0_stateless/00250_tuple_comparison.sql
index 2ee29ef5863..03a4d23a271 100644
--- a/tests/queries/0_stateless/00250_tuple_comparison.sql
+++ b/tests/queries/0_stateless/00250_tuple_comparison.sql
@@ -1,4 +1,4 @@
-SELECT 
+SELECT
     (1, 'Hello', 23) =  (1, 'Hello', 23),
     (1, 'Hello', 23) != (1, 'Hello', 23),
     (1, 'Hello', 23) <  (1, 'Hello', 23),
diff --git a/tests/queries/0_stateless/00344_row_number_in_all_blocks.sql b/tests/queries/0_stateless/00344_row_number_in_all_blocks.sql
index 865d0084ce8..43727f2a14e 100644
--- a/tests/queries/0_stateless/00344_row_number_in_all_blocks.sql
+++ b/tests/queries/0_stateless/00344_row_number_in_all_blocks.sql
@@ -1,5 +1,5 @@
 SET max_block_size = 1000;
-SELECT 
+SELECT
     groupUniqArray(blockSize()),
     uniqExact(rowNumberInAllBlocks()),
     min(rowNumberInAllBlocks()),
diff --git a/tests/queries/0_stateless/00436_convert_charset.sql b/tests/queries/0_stateless/00436_convert_charset.sql
index cba91fe67f4..1b7baf22450 100644
--- a/tests/queries/0_stateless/00436_convert_charset.sql
+++ b/tests/queries/0_stateless/00436_convert_charset.sql
@@ -1,4 +1,4 @@
-SELECT 
+SELECT
     'абвгдеёжзийклмнопрстуфхцчшщъыьэюяАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ' AS orig,
     hex(convertCharset(orig, 'utf-8', 'cp1251') AS cp1251) AS cp1251_hex,
     hex(convertCharset(orig, 'utf-8', 'utf-7')) AS utf7_hex,
@@ -13,7 +13,7 @@ SELECT
     convertCharset(broken3, 'utf-8', 'koi8-r') AS restored3
 FORMAT Vertical;
 
-SELECT 
+SELECT
     materialize('абвгдеёжзийклмнопрстуфхцчшщъыьэюяАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ') AS orig,
     hex(convertCharset(orig, 'utf-8', 'cp1251') AS cp1251) AS cp1251_hex,
     hex(convertCharset(orig, 'utf-8', 'utf-7')) AS utf7_hex,
diff --git a/tests/queries/0_stateless/00541_to_start_of_fifteen_minutes.sql b/tests/queries/0_stateless/00541_to_start_of_fifteen_minutes.sql
index 29056eea3d6..0c20670fad2 100644
--- a/tests/queries/0_stateless/00541_to_start_of_fifteen_minutes.sql
+++ b/tests/queries/0_stateless/00541_to_start_of_fifteen_minutes.sql
@@ -1,7 +1,7 @@
-SELECT 
+SELECT
     DISTINCT result 
 FROM (
-    SELECT 
+    SELECT
         toStartOfFifteenMinutes(toDateTime('2017-12-25 00:00:00') + number * 60) AS result
     FROM system.numbers
     LIMIT 120
diff --git a/tests/queries/0_stateless/00552_logical_functions_uint8_as_bool.sql b/tests/queries/0_stateless/00552_logical_functions_uint8_as_bool.sql
index feee33add1c..f62a02288ed 100644
--- a/tests/queries/0_stateless/00552_logical_functions_uint8_as_bool.sql
+++ b/tests/queries/0_stateless/00552_logical_functions_uint8_as_bool.sql
@@ -10,7 +10,7 @@ SELECT
     1 OR 2 OR 4
 ;
 
-SELECT 
+SELECT
     toUInt8(bitAnd(number, 4)) AS a,
     toUInt8(bitAnd(number, 2)) AS b,
     toUInt8(bitAnd(number, 1)) AS c,
diff --git a/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.sql b/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.sql
index 47e6582134a..bf5d2251470 100644
--- a/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.sql
+++ b/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.sql
@@ -8,19 +8,19 @@ INSERT INTO clicks VALUES ('facebook.com'), ('yandex.ru'), ('google.com');
 INSERT INTO transactions VALUES ('facebook.com'), ('yandex.ru'), ('baidu.com');
 
 
-SELECT 
+SELECT
     sum(total_count) AS total, 
     domain
 FROM 
 (
-    SELECT 
+    SELECT
         COUNT(*) AS total_count, 
         toUInt64(0) AS facebookHits, 
         domain
     FROM transactions 
     GROUP BY domain
     UNION ALL 
-    SELECT 
+    SELECT
         COUNT(*) AS total_count, 
         SUM(if(domain = 'facebook.com', 1, 0)) AS facebookHits, 
         domain
@@ -33,19 +33,19 @@ LIMIT 10
 FORMAT JSONEachRow;
 
 
-SELECT 
+SELECT
     sum(total_count) AS total, 
     domain
 FROM 
 (
-    SELECT 
+    SELECT
         COUNT(*) AS total_count, 
         SUM(if(domain = 'facebook.com', 1, 0)) AS facebookHits, 
         domain
     FROM clicks 
     GROUP BY domain
 UNION ALL 
-    SELECT 
+    SELECT
         COUNT(*) AS total_count, 
         toUInt64(0) AS facebookHits, 
         domain
@@ -60,19 +60,19 @@ FORMAT JSONEachRow;
 
 SELECT DISTINCT * FROM
 (
-SELECT 
+SELECT
     sum(total_count) AS total, 
     domain
 FROM 
 (
-    SELECT 
+    SELECT
         COUNT(*) AS total_count, 
         toUInt64(0) AS facebookHits, 
         domain
     FROM transactions 
     GROUP BY domain
     UNION ALL 
-    SELECT 
+    SELECT
         COUNT(*) AS total_count, 
         SUM(if(domain = 'facebook.com', 1, 0)) AS facebookHits, 
         domain
@@ -85,19 +85,19 @@ LIMIT 10
 
 UNION ALL
 
-SELECT 
+SELECT
     sum(total_count) AS total, 
     domain
 FROM 
 (
-    SELECT 
+    SELECT
         COUNT(*) AS total_count, 
         SUM(if(domain = 'facebook.com', 1, 0)) AS facebookHits, 
         domain
     FROM clicks 
     GROUP BY domain
 UNION ALL 
-    SELECT 
+    SELECT
         COUNT(*) AS total_count, 
         toUInt64(0) AS facebookHits, 
         domain
@@ -112,20 +112,20 @@ LIMIT 10
 
 SELECT DISTINCT total, domain FROM
 (
-SELECT 
+SELECT
     sum(total_count) AS total, 
     sum(facebookHits) AS facebook,
     domain
 FROM 
 (
-    SELECT 
+    SELECT
         COUNT(*) AS total_count, 
         toUInt64(0) AS facebookHits, 
         domain
     FROM transactions 
     GROUP BY domain
     UNION ALL 
-    SELECT 
+    SELECT
         COUNT(*) AS total_count, 
         SUM(if(domain = 'facebook.com', 1, 0)) AS facebookHits, 
         domain
@@ -138,20 +138,20 @@ LIMIT 10
 
 UNION ALL
 
-SELECT 
+SELECT
     sum(total_count) AS total, 
     max(facebookHits) AS facebook,
     domain
 FROM 
 (
-    SELECT 
+    SELECT
         COUNT(*) AS total_count, 
         SUM(if(domain = 'facebook.com', 1, 0)) AS facebookHits, 
         domain
     FROM clicks 
     GROUP BY domain
 UNION ALL 
-    SELECT 
+    SELECT
         COUNT(*) AS total_count, 
         toUInt64(0) AS facebookHits, 
         domain
@@ -167,19 +167,19 @@ ORDER BY domain, total;
 
 SELECT * FROM
 (
-SELECT 
+SELECT
     sum(total_count) AS total, 
     domain
 FROM 
 (
-    SELECT 
+    SELECT
         COUNT(*) AS total_count, 
         toUInt64(0) AS facebookHits, 
         domain
     FROM transactions 
     GROUP BY domain
     UNION ALL 
-    SELECT 
+    SELECT
         COUNT(*) AS total_count, 
         SUM(if(domain = 'facebook.com', 1, 0)) AS facebookHits, 
         domain
@@ -192,19 +192,19 @@ LIMIT 10
 ) js1
 ALL FULL OUTER JOIN
 (
-SELECT 
+SELECT
     sum(total_count) AS total, 
     domain
 FROM 
 (
-    SELECT 
+    SELECT
         COUNT(*) AS total_count, 
         SUM(if(domain = 'facebook.com', 1, 0)) AS facebookHits, 
         domain
     FROM clicks 
     GROUP BY domain
 UNION ALL 
-    SELECT 
+    SELECT
         COUNT(*) AS total_count, 
         toUInt64(0) AS facebookHits, 
         domain
@@ -221,19 +221,19 @@ ORDER BY total, domain;
 
 SELECT total FROM
 (
-SELECT 
+SELECT
     sum(total_count) AS total, 
     domain
 FROM 
 (
-    SELECT 
+    SELECT
         COUNT(*) AS total_count, 
         toUInt64(0) AS facebookHits, 
         domain
     FROM transactions 
     GROUP BY domain
     UNION ALL 
-    SELECT 
+    SELECT
         COUNT(*) AS total_count, 
         SUM(if(domain = 'facebook.com', 1, 0)) AS facebookHits, 
         domain
@@ -246,19 +246,19 @@ LIMIT 10
 ) js1
 ALL FULL OUTER JOIN
 (
-SELECT 
+SELECT
     sum(total_count) AS total, 
     domain
 FROM 
 (
-    SELECT 
+    SELECT
         COUNT(*) AS total_count, 
         SUM(if(domain = 'facebook.com', 1, 0)) AS facebookHits, 
         domain
     FROM clicks 
     GROUP BY domain
 UNION ALL 
-    SELECT 
+    SELECT
         COUNT(*) AS total_count, 
         toUInt64(0) AS facebookHits, 
         domain
@@ -275,19 +275,19 @@ ORDER BY total, domain;
 
 SELECT domain FROM
 (
-SELECT 
+SELECT
     sum(total_count) AS total, 
     domain
 FROM 
 (
-    SELECT 
+    SELECT
         COUNT(*) AS total_count, 
         toUInt64(0) AS facebookHits, 
         domain
     FROM transactions 
     GROUP BY domain
     UNION ALL 
-    SELECT 
+    SELECT
         COUNT(*) AS total_count, 
         SUM(if(domain = 'facebook.com', 1, 0)) AS facebookHits, 
         domain
@@ -300,19 +300,19 @@ LIMIT 10
 ) js1
 ALL FULL OUTER JOIN
 (
-SELECT 
+SELECT
     sum(total_count) AS total, 
     domain
 FROM 
 (
-    SELECT 
+    SELECT
         COUNT(*) AS total_count, 
         SUM(if(domain = 'facebook.com', 1, 0)) AS facebookHits, 
         domain
     FROM clicks 
     GROUP BY domain
 UNION ALL 
-    SELECT 
+    SELECT
         COUNT(*) AS total_count, 
         toUInt64(0) AS facebookHits, 
         domain
diff --git a/tests/queries/0_stateless/00597_push_down_predicate.reference b/tests/queries/0_stateless/00597_push_down_predicate.reference
index 1798c727088..83f783138a0 100644
--- a/tests/queries/0_stateless/00597_push_down_predicate.reference
+++ b/tests/queries/0_stateless/00597_push_down_predicate.reference
@@ -7,18 +7,18 @@
 SELECT count()
 FROM 
 (
-    SELECT 
+    SELECT
         [number] AS a,
         [number * 2] AS b
     FROM system.numbers
     LIMIT 1
 ) AS t
-ARRAY JOIN 
+ARRAY JOIN
     a,
     b
 WHERE NOT ignore(a + b)
 1
-SELECT 
+SELECT
     a,
     b
 FROM 
@@ -27,17 +27,17 @@ FROM
 )
 ANY LEFT JOIN 
 (
-    SELECT 
+    SELECT
         1 AS a,
         1 AS b
 ) USING (a)
 WHERE b = 0
-SELECT 
+SELECT
     a,
     b
 FROM 
 (
-    SELECT 
+    SELECT
         1 AS a,
         1 AS b
 )
@@ -46,7 +46,7 @@ ANY RIGHT JOIN
     SELECT 1 AS a
 ) USING (a)
 WHERE b = 0
-SELECT 
+SELECT
     a,
     b
 FROM 
@@ -55,17 +55,17 @@ FROM
 )
 ANY FULL OUTER JOIN 
 (
-    SELECT 
+    SELECT
         1 AS a,
         1 AS b
 ) USING (a)
 WHERE b = 0
-SELECT 
+SELECT
     a,
     b
 FROM 
 (
-    SELECT 
+    SELECT
         1 AS a,
         1 AS b
 )
@@ -107,22 +107,22 @@ FROM
 )
 WHERE id = 1
 1
-SELECT 
+SELECT
     id,
     subquery
 FROM 
 (
-    SELECT 
+    SELECT
         1 AS id,
         CAST(1, \'UInt8\') AS subquery
 )
 1	1
-SELECT 
+SELECT
     a,
     b
 FROM 
 (
-    SELECT 
+    SELECT
         toUInt64(sum(id) AS b) AS a,
         b
     FROM test_00597
@@ -130,20 +130,20 @@ FROM
 )
 WHERE a = 3
 3	3
-SELECT 
+SELECT
     date,
     id,
     name,
     value
 FROM 
 (
-    SELECT 
+    SELECT
         date,
         name,
         value,
         min(id) AS id
     FROM test_00597
-    GROUP BY 
+    GROUP BY
         date,
         name,
         value
@@ -151,12 +151,12 @@ FROM
 )
 WHERE id = 1
 2000-01-01	1	test string 1	1
-SELECT 
+SELECT
     a,
     b
 FROM 
 (
-    SELECT 
+    SELECT
         toUInt64(sum(id) AS b) AS a,
         b
     FROM test_00597 AS table_alias
@@ -164,14 +164,14 @@ FROM
 ) AS outer_table_alias
 WHERE b = 3
 3	3
-SELECT 
+SELECT
     date,
     id,
     name,
     value
 FROM 
 (
-    SELECT 
+    SELECT
         date,
         id,
         name,
@@ -181,21 +181,21 @@ FROM
 )
 WHERE id = 1
 2000-01-01	1	test string 1	1
-SELECT 
+SELECT
     date,
     id,
     name,
     value
 FROM 
 (
-    SELECT 
+    SELECT
         date,
         id,
         name,
         value
     FROM 
     (
-        SELECT 
+        SELECT
             date,
             id,
             name,
@@ -207,21 +207,21 @@ FROM
 )
 WHERE id = 1
 2000-01-01	1	test string 1	1
-SELECT 
+SELECT
     date,
     id,
     name,
     value
 FROM 
 (
-    SELECT 
+    SELECT
         date,
         id,
         name,
         value
     FROM 
     (
-        SELECT 
+        SELECT
             date,
             id,
             name,
@@ -233,14 +233,14 @@ FROM
 )
 WHERE id = 1
 2000-01-01	1	test string 1	1
-SELECT 
+SELECT
     date,
     id,
     name,
     value
 FROM 
 (
-    SELECT 
+    SELECT
         date,
         id,
         name,
@@ -250,21 +250,21 @@ FROM
 )
 WHERE id = 1
 2000-01-01	1	test string 1	1
-SELECT 
+SELECT
     date,
     id,
     name,
     value
 FROM 
 (
-    SELECT 
+    SELECT
         date,
         id,
         name,
         value
     FROM 
     (
-        SELECT 
+        SELECT
             date,
             id,
             name,
@@ -276,14 +276,14 @@ FROM
 )
 WHERE id = 1
 2000-01-01	1	test string 1	1
-SELECT 
+SELECT
     date,
     id,
     name,
     value
 FROM 
 (
-    SELECT 
+    SELECT
         date,
         id,
         name,
@@ -293,21 +293,21 @@ FROM
 ) AS b
 WHERE id = 1
 2000-01-01	1	test string 1	1
-SELECT 
+SELECT
     date,
     id,
     name,
     value
 FROM 
 (
-    SELECT 
+    SELECT
         date,
         id,
         name,
         value
     FROM 
     (
-        SELECT 
+        SELECT
             date,
             id,
             name,
@@ -319,32 +319,32 @@ FROM
 ) AS b
 WHERE id = 1
 2000-01-01	1	test string 1	1
-SELECT 
+SELECT
     id,
     date,
     value
 FROM 
 (
-    SELECT 
+    SELECT
         id,
         date,
         min(value) AS value
     FROM test_00597
     WHERE id = 1
-    GROUP BY 
+    GROUP BY
         id,
         date
 )
 WHERE id = 1
 1	2000-01-01	1
-SELECT 
+SELECT
     date,
     id,
     name,
     value
 FROM 
 (
-    SELECT 
+    SELECT
         date,
         id,
         name,
@@ -352,7 +352,7 @@ FROM
     FROM test_00597
     WHERE id = 1
     UNION ALL
-    SELECT 
+    SELECT
         date,
         id,
         name,
@@ -363,7 +363,7 @@ FROM
 WHERE id = 1
 2000-01-01	1	test string 1	1
 2000-01-01	1	test string 1	1
-SELECT 
+SELECT
     date,
     id,
     name,
@@ -373,7 +373,7 @@ SELECT
     value
 FROM 
 (
-    SELECT 
+    SELECT
         date,
         id,
         name,
@@ -388,7 +388,7 @@ ANY LEFT JOIN
 ) USING (id)
 WHERE id = 1
 2000-01-01	1	test string 1	1	2000-01-01	test string 1	1
-SELECT 
+SELECT
     id,
     date,
     name,
@@ -399,7 +399,7 @@ FROM
 )
 ANY LEFT JOIN 
 (
-    SELECT 
+    SELECT
         date,
         id,
         name,
@@ -416,14 +416,14 @@ FROM
 ANY LEFT JOIN test_00597 AS b USING (id)
 WHERE value = 1
 1
-SELECT 
+SELECT
     date,
     id,
     name,
     value
 FROM 
 (
-    SELECT 
+    SELECT
         date,
         id,
         name,
@@ -433,7 +433,7 @@ FROM
         value
     FROM 
     (
-        SELECT 
+        SELECT
             date,
             id,
             name,
@@ -450,7 +450,7 @@ FROM
 )
 WHERE id = 1
 2000-01-01	1	test string 1	1
-SELECT 
+SELECT
     date,
     id,
     name,
@@ -460,7 +460,7 @@ SELECT
     b.value
 FROM 
 (
-    SELECT 
+    SELECT
         date,
         id,
         name,
@@ -469,7 +469,7 @@ FROM
 )
 ANY LEFT JOIN 
 (
-    SELECT 
+    SELECT
         date,
         id,
         name,
@@ -478,14 +478,14 @@ ANY LEFT JOIN
 ) AS b USING (id)
 WHERE b.id = 1
 2000-01-01	1	test string 1	1	2000-01-01	test string 1	1
-SELECT 
+SELECT
     id,
     date,
     name,
     value
 FROM 
 (
-    SELECT 
+    SELECT
         toInt8(1) AS id,
         toDate(\'2000-01-01\') AS date
     FROM system.numbers
@@ -493,7 +493,7 @@ FROM
 )
 ANY LEFT JOIN 
 (
-    SELECT 
+    SELECT
         date,
         id,
         name,
@@ -502,7 +502,7 @@ ANY LEFT JOIN
 ) AS b USING (date, id)
 WHERE b.date = toDate(\'2000-01-01\')
 1	2000-01-01	test string 1	1
-SELECT 
+SELECT
     date,
     id,
     name,
@@ -513,7 +513,7 @@ SELECT
     `b.value`
 FROM 
 (
-    SELECT 
+    SELECT
         date,
         id,
         name,
@@ -524,7 +524,7 @@ FROM
         b.value
     FROM 
     (
-        SELECT 
+        SELECT
             date,
             id,
             name,
@@ -534,7 +534,7 @@ FROM
     ) AS a
     ANY LEFT JOIN 
     (
-        SELECT 
+        SELECT
             date,
             id,
             name,
@@ -545,7 +545,7 @@ FROM
 )
 WHERE id = 1
 2000-01-01	1	test string 1	1	2000-01-01	1	test string 1	1
-SELECT 
+SELECT
     date,
     id,
     name,
@@ -555,7 +555,7 @@ SELECT
     r.value
 FROM 
 (
-    SELECT 
+    SELECT
         date,
         id,
         name,
@@ -564,14 +564,14 @@ FROM
 )
 SEMI LEFT JOIN 
 (
-    SELECT 
+    SELECT
         date,
         id,
         name,
         value
     FROM 
     (
-        SELECT 
+        SELECT
             date,
             id,
             name,
diff --git a/tests/queries/0_stateless/00618_nullable_in.sql b/tests/queries/0_stateless/00618_nullable_in.sql
index 8e8c26d225d..72e166dc0f5 100644
--- a/tests/queries/0_stateless/00618_nullable_in.sql
+++ b/tests/queries/0_stateless/00618_nullable_in.sql
@@ -1,13 +1,13 @@
 SELECT sum(toNullable('a') IN 'a');
 SELECT countIf(number, toNullable('a') IN ('a', 'b')) FROM numbers(100);
 
-SELECT 
+SELECT
     uniqExact(x) AS u, 
     uniqExactIf(x, name = 'a') AS ue, 
     uniqExactIf(x, name IN ('a', 'b')) AS ui
 FROM 
 (
-    SELECT 
+    SELECT
         toNullable('a') AS name, 
         arrayJoin(range(10)) AS x
 ) 
diff --git a/tests/queries/0_stateless/00712_prewhere_with_alias_bug_2.sql b/tests/queries/0_stateless/00712_prewhere_with_alias_bug_2.sql
index 2c5c494fc1f..97d5e33633a 100644
--- a/tests/queries/0_stateless/00712_prewhere_with_alias_bug_2.sql
+++ b/tests/queries/0_stateless/00712_prewhere_with_alias_bug_2.sql
@@ -4,7 +4,7 @@ CREATE TABLE table (a UInt32,  date Date, b UInt64,  c UInt64, str String, d Int
 
 SELECT alias2 AS alias3
 FROM table 
-ARRAY JOIN 
+ARRAY JOIN
     arr_alias AS alias2, 
     arrayEnumerateUniq(arr_alias) AS _uniq_Event
 WHERE (date = toDate('2010-10-10')) AND (a IN (2, 3)) AND (str NOT IN ('z', 'x')) AND (d != -1)
diff --git a/tests/queries/0_stateless/00740_optimize_predicate_expression.sql b/tests/queries/0_stateless/00740_optimize_predicate_expression.sql
index b016ab49ddd..65b06635808 100644
--- a/tests/queries/0_stateless/00740_optimize_predicate_expression.sql
+++ b/tests/queries/0_stateless/00740_optimize_predicate_expression.sql
@@ -5,7 +5,7 @@ SELECT * FROM (SELECT perf_1.z AS z_1 FROM perf AS perf_1);
 
 SELECT sum(mul)/sqrt(sum(sqr_dif_1) * sum(sqr_dif_2)) AS z_r
 FROM(
-SELECT 
+SELECT
         (SELECT avg(z_1) AS z_1_avg, 
                 avg(z_2) AS z_2_avg
         FROM ( 
diff --git a/tests/queries/0_stateless/00743_limit_by_not_found_column.sql b/tests/queries/0_stateless/00743_limit_by_not_found_column.sql
index 46c6bcb99b2..d20b3b0209e 100644
--- a/tests/queries/0_stateless/00743_limit_by_not_found_column.sql
+++ b/tests/queries/0_stateless/00743_limit_by_not_found_column.sql
@@ -24,7 +24,7 @@ CREATE TEMPORARY TABLE Accounts (AccountID UInt64, Currency String);
 SELECT AccountID
 FROM 
 (
-    SELECT 
+    SELECT
         AccountID, 
         Currency
     FROM Accounts 
diff --git a/tests/queries/0_stateless/00751_default_databasename_for_view.reference b/tests/queries/0_stateless/00751_default_databasename_for_view.reference
index 4814cc77b37..76d5cee02e2 100644
--- a/tests/queries/0_stateless/00751_default_databasename_for_view.reference
+++ b/tests/queries/0_stateless/00751_default_databasename_for_view.reference
@@ -7,7 +7,7 @@ CREATE MATERIALIZED VIEW test_00751.t_mv_00751
 ENGINE = MergeTree
 ORDER BY date
 SETTINGS index_granularity = 8192 AS
-SELECT 
+SELECT
     date,
     platform,
     app
diff --git a/tests/queries/0_stateless/00759_kodieg.sql b/tests/queries/0_stateless/00759_kodieg.sql
index 2037f210dea..9cbe2a0cd7f 100644
--- a/tests/queries/0_stateless/00759_kodieg.sql
+++ b/tests/queries/0_stateless/00759_kodieg.sql
@@ -1,4 +1,4 @@
-SELECT 
+SELECT
     [1, 2, 3, 1, 3] AS a, 
     indexOf(arrayReverse(arraySlice(a, 1, -1)), 3) AS offset_from_right, 
     arraySlice(a, multiIf(offset_from_right = 0, 1, (length(a) - offset_from_right) + 1));
diff --git a/tests/queries/0_stateless/00826_cross_to_inner_join.reference b/tests/queries/0_stateless/00826_cross_to_inner_join.reference
index 6e5cbdcab4e..e7c8d6b1ea9 100644
--- a/tests/queries/0_stateless/00826_cross_to_inner_join.reference
+++ b/tests/queries/0_stateless/00826_cross_to_inner_join.reference
@@ -35,7 +35,7 @@ comma nullable
 1	1	1	1
 2	2	1	2
 cross
-SELECT 
+SELECT
     a,
     b,
     t2_00826.a,
@@ -44,7 +44,7 @@ FROM t1_00826
 ALL INNER JOIN t2_00826 ON a = t2_00826.a
 WHERE a = t2_00826.a
 cross nullable
-SELECT 
+SELECT
     a,
     b,
     t2_00826.a,
@@ -53,7 +53,7 @@ FROM t1_00826
 ALL INNER JOIN t2_00826 ON a = t2_00826.a
 WHERE a = t2_00826.a
 cross nullable vs not nullable
-SELECT 
+SELECT
     a,
     b,
     t2_00826.a,
@@ -62,7 +62,7 @@ FROM t1_00826
 ALL INNER JOIN t2_00826 ON a = t2_00826.b
 WHERE a = t2_00826.b
 cross self
-SELECT 
+SELECT
     a,
     b,
     y.a,
@@ -71,7 +71,7 @@ FROM t1_00826 AS x
 ALL INNER JOIN t1_00826 AS y ON (a = y.a) AND (b = y.b)
 WHERE (a = y.a) AND (b = y.b)
 cross one table expr
-SELECT 
+SELECT
     a,
     b,
     t2_00826.a,
@@ -80,7 +80,7 @@ FROM t1_00826
 CROSS JOIN t2_00826
 WHERE a = b
 cross multiple ands
-SELECT 
+SELECT
     a,
     b,
     t2_00826.a,
@@ -89,7 +89,7 @@ FROM t1_00826
 ALL INNER JOIN t2_00826 ON (a = t2_00826.a) AND (b = t2_00826.b)
 WHERE (a = t2_00826.a) AND (b = t2_00826.b)
 cross and inside and
-SELECT 
+SELECT
     a,
     b,
     t2_00826.a,
@@ -98,7 +98,7 @@ FROM t1_00826
 ALL INNER JOIN t2_00826 ON (a = t2_00826.a) AND (a = t2_00826.a) AND (a = t2_00826.a) AND (b = t2_00826.b)
 WHERE (a = t2_00826.a) AND ((a = t2_00826.a) AND ((a = t2_00826.a) AND (b = t2_00826.b)))
 cross split conjunction
-SELECT 
+SELECT
     a,
     b,
     t2_00826.a,
diff --git a/tests/queries/0_stateless/00849_multiple_comma_join.reference b/tests/queries/0_stateless/00849_multiple_comma_join.reference
index 5a5a90cbdf2..f4db2238dd1 100644
--- a/tests/queries/0_stateless/00849_multiple_comma_join.reference
+++ b/tests/queries/0_stateless/00849_multiple_comma_join.reference
@@ -12,7 +12,7 @@ WHERE b = t2_00849.b
 SELECT `--t1_00849.a` AS `t1_00849.a`
 FROM 
 (
-    SELECT 
+    SELECT
         a AS `--t1_00849.a`,
         b,
         t2_00849.a AS `--t2_00849.a`,
@@ -25,7 +25,7 @@ WHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = a)
 SELECT `--t1_00849.a` AS `t1_00849.a`
 FROM 
 (
-    SELECT 
+    SELECT
         a AS `--t1_00849.a`,
         b AS `--t1_00849.b`,
         t2_00849.a,
@@ -38,7 +38,7 @@ WHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = b)
 SELECT `--t1_00849.a` AS `t1_00849.a`
 FROM 
 (
-    SELECT 
+    SELECT
         `--t1_00849.a`,
         b,
         `--t2_00849.a`,
@@ -47,7 +47,7 @@ FROM
         t3_00849.b
     FROM 
     (
-        SELECT 
+        SELECT
             a AS `--t1_00849.a`,
             b,
             t2_00849.a AS `--t2_00849.a`,
@@ -62,7 +62,7 @@ WHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = `--t3_00849.a`) AN
 SELECT `--t1_00849.a` AS `t1_00849.a`
 FROM 
 (
-    SELECT 
+    SELECT
         `--t1_00849.a`,
         `--t1_00849.b`,
         `t2_00849.a`,
@@ -71,7 +71,7 @@ FROM
         b AS `--t3_00849.b`
     FROM 
     (
-        SELECT 
+        SELECT
             a AS `--t1_00849.a`,
             b AS `--t1_00849.b`,
             t2_00849.a,
@@ -86,7 +86,7 @@ WHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = `--t3_00849.b`) AN
 SELECT `--t1_00849.a` AS `t1_00849.a`
 FROM 
 (
-    SELECT 
+    SELECT
         `--t1_00849.a`,
         b,
         `--t2_00849.a`,
@@ -95,7 +95,7 @@ FROM
         t3_00849.b
     FROM 
     (
-        SELECT 
+        SELECT
             a AS `--t1_00849.a`,
             b,
             t2_00849.a AS `--t2_00849.a`,
@@ -110,7 +110,7 @@ WHERE (`--t2_00849.a` = `--t1_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AN
 SELECT `--t1_00849.a` AS `t1_00849.a`
 FROM 
 (
-    SELECT 
+    SELECT
         `--t1_00849.a`,
         b,
         `--t2_00849.a`,
@@ -119,7 +119,7 @@ FROM
         t3_00849.b
     FROM 
     (
-        SELECT 
+        SELECT
             a AS `--t1_00849.a`,
             b,
             t2_00849.a AS `--t2_00849.a`,
@@ -134,7 +134,7 @@ WHERE (`--t3_00849.a` = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`) AN
 SELECT `--t1_00849.a` AS `t1_00849.a`
 FROM 
 (
-    SELECT 
+    SELECT
         `--t1_00849.a`,
         b,
         `--t2_00849.a`,
@@ -143,7 +143,7 @@ FROM
         t3_00849.b
     FROM 
     (
-        SELECT 
+        SELECT
             a AS `--t1_00849.a`,
             b,
             t2_00849.a AS `--t2_00849.a`,
@@ -158,7 +158,7 @@ WHERE (a = `--t1_00849.a`) AND (a = `--t2_00849.a`) AND (a = `--t3_00849.a`)
 SELECT `--t1_00849.a` AS `t1_00849.a`
 FROM 
 (
-    SELECT 
+    SELECT
         `--t1_00849.a`,
         b,
         `--t2_00849.a`,
@@ -167,7 +167,7 @@ FROM
         t3_00849.b
     FROM 
     (
-        SELECT 
+        SELECT
             a AS `--t1_00849.a`,
             b,
             t2_00849.a AS `--t2_00849.a`,
@@ -182,7 +182,7 @@ WHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AN
 SELECT `--t1_00849.a` AS `t1_00849.a`
 FROM 
 (
-    SELECT 
+    SELECT
         `--t1_00849.a`,
         b,
         `t2_00849.a`,
@@ -191,7 +191,7 @@ FROM
         t3_00849.b
     FROM 
     (
-        SELECT 
+        SELECT
             a AS `--t1_00849.a`,
             b,
             t2_00849.a,
@@ -205,7 +205,7 @@ CROSS JOIN t4_00849
 SELECT `--t1_00849.a` AS `t1_00849.a`
 FROM 
 (
-    SELECT 
+    SELECT
         `--t1_00849.a`,
         b,
         `t2_00849.a`,
@@ -214,7 +214,7 @@ FROM
         t3_00849.b
     FROM 
     (
-        SELECT 
+        SELECT
             a AS `--t1_00849.a`,
             b,
             t2_00849.a,
@@ -228,7 +228,7 @@ CROSS JOIN t4_00849
 SELECT `--t1_00849.a` AS `t1_00849.a`
 FROM 
 (
-    SELECT 
+    SELECT
         a AS `--t1_00849.a`,
         b,
         t2_00849.a,
@@ -240,7 +240,7 @@ CROSS JOIN t3_00849
 SELECT `--t1_00849.a` AS `t1_00849.a`
 FROM 
 (
-    SELECT 
+    SELECT
         a AS `--t1_00849.a`,
         b,
         t2_00849.a AS `--t2_00849.a`,
diff --git a/tests/queries/0_stateless/00849_multiple_comma_join_2.reference b/tests/queries/0_stateless/00849_multiple_comma_join_2.reference
index e08d6ff1192..fc39ef13935 100644
--- a/tests/queries/0_stateless/00849_multiple_comma_join_2.reference
+++ b/tests/queries/0_stateless/00849_multiple_comma_join_2.reference
@@ -12,7 +12,7 @@ WHERE b = t2.b
 SELECT `--t1.a` AS `t1.a`
 FROM 
 (
-    SELECT 
+    SELECT
         a AS `--t1.a`,
         t2.a AS `--t2.a`
     FROM t1
@@ -23,7 +23,7 @@ WHERE (`--t1.a` = `--t2.a`) AND (`--t1.a` = a)
 SELECT `--t1.a` AS `t1.a`
 FROM 
 (
-    SELECT 
+    SELECT
         b AS `--t1.b`,
         a AS `--t1.a`,
         t2.b AS `--t2.b`
@@ -35,13 +35,13 @@ WHERE (`--t1.b` = `--t2.b`) AND (`--t1.b` = b)
 SELECT `--t1.a` AS `t1.a`
 FROM 
 (
-    SELECT 
+    SELECT
         `--t1.a`,
         `--t2.a`,
         a AS `--t3.a`
     FROM 
     (
-        SELECT 
+        SELECT
             a AS `--t1.a`,
             t2.a AS `--t2.a`
         FROM t1
@@ -54,14 +54,14 @@ WHERE (`--t1.a` = `--t2.a`) AND (`--t1.a` = `--t3.a`) AND (`--t1.a` = a)
 SELECT `--t1.a` AS `t1.a`
 FROM 
 (
-    SELECT 
+    SELECT
         `--t1.b`,
         `--t1.a`,
         `--t2.b`,
         b AS `--t3.b`
     FROM 
     (
-        SELECT 
+        SELECT
             b AS `--t1.b`,
             a AS `--t1.a`,
             t2.b AS `--t2.b`
@@ -75,13 +75,13 @@ WHERE (`--t1.b` = `--t2.b`) AND (`--t1.b` = `--t3.b`) AND (`--t1.b` = b)
 SELECT `--t1.a` AS `t1.a`
 FROM 
 (
-    SELECT 
+    SELECT
         `--t1.a`,
         `--t2.a`,
         a AS `--t3.a`
     FROM 
     (
-        SELECT 
+        SELECT
             a AS `--t1.a`,
             t2.a AS `--t2.a`
         FROM t1
@@ -94,13 +94,13 @@ WHERE (`--t2.a` = `--t1.a`) AND (`--t2.a` = `--t3.a`) AND (`--t2.a` = a)
 SELECT `--t1.a` AS `t1.a`
 FROM 
 (
-    SELECT 
+    SELECT
         `--t1.a`,
         `--t2.a`,
         a AS `--t3.a`
     FROM 
     (
-        SELECT 
+        SELECT
             a AS `--t1.a`,
             t2.a AS `--t2.a`
         FROM t1
@@ -113,13 +113,13 @@ WHERE (`--t3.a` = `--t1.a`) AND (`--t3.a` = `--t2.a`) AND (`--t3.a` = a)
 SELECT `--t1.a` AS `t1.a`
 FROM 
 (
-    SELECT 
+    SELECT
         `--t1.a`,
         `--t2.a`,
         a AS `--t3.a`
     FROM 
     (
-        SELECT 
+        SELECT
             a AS `--t1.a`,
             t2.a AS `--t2.a`
         FROM t1
@@ -132,13 +132,13 @@ WHERE (a = `--t1.a`) AND (a = `--t2.a`) AND (a = `--t3.a`)
 SELECT `--t1.a` AS `t1.a`
 FROM 
 (
-    SELECT 
+    SELECT
         `--t1.a`,
         `--t2.a`,
         a AS `--t3.a`
     FROM 
     (
-        SELECT 
+        SELECT
             a AS `--t1.a`,
             t2.a AS `--t2.a`
         FROM t1
@@ -185,7 +185,7 @@ CROSS JOIN t3
 SELECT `--t1.a` AS `t1.a`
 FROM 
 (
-    SELECT 
+    SELECT
         a AS `--t1.a`,
         t2.a AS `--t2.a`
     FROM t1
diff --git a/tests/queries/0_stateless/00908_analyze_query.reference b/tests/queries/0_stateless/00908_analyze_query.reference
index 0305f528b25..ab9237531f7 100644
--- a/tests/queries/0_stateless/00908_analyze_query.reference
+++ b/tests/queries/0_stateless/00908_analyze_query.reference
@@ -1,4 +1,4 @@
-SELECT 
+SELECT
     a,
     b
 FROM a
diff --git a/tests/queries/0_stateless/00941_to_custom_week.sql b/tests/queries/0_stateless/00941_to_custom_week.sql
index a6ff40a6d3f..c7d52e7438b 100644
--- a/tests/queries/0_stateless/00941_to_custom_week.sql
+++ b/tests/queries/0_stateless/00941_to_custom_week.sql
@@ -25,7 +25,7 @@ SELECT toWeek(toDate('2001-01-01'),0) AS w0, toWeek(toDate('2001-01-01'),1) AS w
 SELECT toYearWeek(toDate('2000-12-31'),0), toYearWeek(toDate('2000-12-31'),1), toYearWeek(toDate('2000-12-31'),2), toYearWeek(toDate('2000-12-31'),3), toYearWeek(toDate('2000-12-31'),4), toYearWeek(toDate('2000-12-31'),5), toYearWeek(toDate('2000-12-31'),6), toYearWeek(toDate('2000-12-31'),7);
 
 -- week mode 8,9	
-SELECT 
+SELECT
     toDate('2016-12-21') + number AS d, 
 	  toWeek(d, 8) AS week8,
     toWeek(d, 9) AS week9, 
diff --git a/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference b/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference
index d1c8033b363..d6e53c8b48b 100644
--- a/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference
+++ b/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference
@@ -1,4 +1,4 @@
-SELECT 
+SELECT
     1 AS x,
     x.y
 FROM 
diff --git a/tests/queries/0_stateless/00958_format_of_tuple_array_element.reference b/tests/queries/0_stateless/00958_format_of_tuple_array_element.reference
index eaea02ba40b..a2953fa0264 100644
--- a/tests/queries/0_stateless/00958_format_of_tuple_array_element.reference
+++ b/tests/queries/0_stateless/00958_format_of_tuple_array_element.reference
@@ -1,4 +1,4 @@
-SELECT 
+SELECT
     (x.1)[1],
     (((x[1]).1)[1]).1,
     (NOT x)[1],
diff --git a/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.sql b/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.sql
index b9eed1e8183..3b99ebf8b22 100644
--- a/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.sql
+++ b/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.sql
@@ -4,7 +4,7 @@ CREATE TABLE test_00961 (d Date, a String, b UInt8, x String, y Int8, z UInt32)
 
 INSERT INTO test_00961 VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
 
-SELECT 
+SELECT
     name, 
     table, 
     hash_of_all_files, 
diff --git a/tests/queries/0_stateless/01056_predicate_optimizer_bugs.reference b/tests/queries/0_stateless/01056_predicate_optimizer_bugs.reference
index fd9d96bdf5f..4227af86be7 100644
--- a/tests/queries/0_stateless/01056_predicate_optimizer_bugs.reference
+++ b/tests/queries/0_stateless/01056_predicate_optimizer_bugs.reference
@@ -1,11 +1,11 @@
-SELECT 
+SELECT
     k,
     v,
     d,
     i
 FROM 
 (
-    SELECT 
+    SELECT
         t.1 AS k,
         t.2 AS v,
         runningDifference(v) AS d,
@@ -21,26 +21,26 @@ a	2	1	0
 a	3	1	0
 b	13	2	0
 b	15	2	0
-SELECT 
+SELECT
     co,
     co2,
     co3,
     num
 FROM 
 (
-    SELECT 
+    SELECT
         co,
         co2,
         co3,
         count() AS num
     FROM 
     (
-        SELECT 
+        SELECT
             1 AS co,
             2 AS co2,
             3 AS co3
     )
-    GROUP BY 
+    GROUP BY
         co,
         co2,
         co3
@@ -84,7 +84,7 @@ FROM
 )
 WHERE ccc > 1
 2
-SELECT 
+SELECT
     ts,
     id,
     id_b,
@@ -93,7 +93,7 @@ SELECT
     id_c
 FROM 
 (
-    SELECT 
+    SELECT
         ts,
         id,
         id_b
@@ -102,7 +102,7 @@ FROM
 ) AS a
 ALL LEFT JOIN B AS b ON b.id = id_b
 WHERE ts <= toDateTime(\'1970-01-01 03:00:00\')
-SELECT 
+SELECT
     ts AS `--a.ts`,
     id AS `--a.id`,
     id_b AS `--a.id_b`,
@@ -111,7 +111,7 @@ SELECT
     id_c AS `--b.id_c`
 FROM 
 (
-    SELECT 
+    SELECT
         ts,
         id,
         id_b
@@ -137,19 +137,19 @@ FROM
 )
 WHERE arrayMap(x -> (x + 1), [dummy]) = [1]
 0
-SELECT 
+SELECT
     id,
     value,
     value_1
 FROM 
 (
-    SELECT 
+    SELECT
         1 AS id,
         2 AS value
 )
 ALL INNER JOIN 
 (
-    SELECT 
+    SELECT
         1 AS id,
         3 AS value_1
 ) USING (id)
diff --git a/tests/queries/0_stateless/01076_predicate_optimizer_with_view.reference b/tests/queries/0_stateless/01076_predicate_optimizer_with_view.reference
index d6426f679c5..dfab41b5e4c 100644
--- a/tests/queries/0_stateless/01076_predicate_optimizer_with_view.reference
+++ b/tests/queries/0_stateless/01076_predicate_optimizer_with_view.reference
@@ -1,4 +1,4 @@
-SELECT 
+SELECT
     date,
     id,
     name,
@@ -10,7 +10,7 @@ FROM
     HAVING id = 1
 ) AS test_view
 WHERE id = 1
-SELECT 
+SELECT
     date,
     id,
     name,
diff --git a/tests/queries/0_stateless/01083_cross_to_inner_with_like.reference b/tests/queries/0_stateless/01083_cross_to_inner_with_like.reference
index 5491e82c7d3..42bbeb05ecb 100644
--- a/tests/queries/0_stateless/01083_cross_to_inner_with_like.reference
+++ b/tests/queries/0_stateless/01083_cross_to_inner_with_like.reference
@@ -1,18 +1,18 @@
-SELECT 
+SELECT
     k,
     r.k,
     name
 FROM n
 ALL INNER JOIN r ON k = r.k
 WHERE (k = r.k) AND (name = \'A\')
-SELECT 
+SELECT
     k,
     r.k,
     name
 FROM n
 ALL INNER JOIN r ON k = r.k
 WHERE (k = r.k) AND (name LIKE \'A%\')
-SELECT 
+SELECT
     k,
     r.k,
     name
diff --git a/tests/queries/0_stateless/01117_chain_finalize_bug.sql b/tests/queries/0_stateless/01117_chain_finalize_bug.sql
index f79f82b8d4d..273b742d0bd 100644
--- a/tests/queries/0_stateless/01117_chain_finalize_bug.sql
+++ b/tests/queries/0_stateless/01117_chain_finalize_bug.sql
@@ -14,7 +14,7 @@ SET group_by_two_level_threshold = 2;
 
 SELECT count() FROM
 (
-    SELECT 
+    SELECT
         arrayJoin(arrayMap(i -> (i + 1), range(2))) AS index, 
         number
     FROM numbers_mt(100000)
diff --git a/tests/queries/0_stateless/01250_fixed_string_comparison.sql b/tests/queries/0_stateless/01250_fixed_string_comparison.sql
index 8481b3572bb..d574fd082f0 100644
--- a/tests/queries/0_stateless/01250_fixed_string_comparison.sql
+++ b/tests/queries/0_stateless/01250_fixed_string_comparison.sql
@@ -1,5 +1,5 @@
 WITH 'abb' AS b, 'abc' AS c, 'abd' AS d, toFixedString(b, 5) AS bf, toFixedString(c, 5) AS cf, toFixedString(d, 5) AS df
-SELECT 
+SELECT
     b = b, b > b, b < b,
     b = c, b > c, b < c,
     b = d, b > d, b < d,
diff --git a/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func.reference b/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func.reference
index eeba2646046..669221005f4 100644
--- a/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func.reference
+++ b/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func.reference
@@ -1,4 +1,4 @@
-SELECT 
+SELECT
     sum(n + 1),
     sum(1 + n),
     sum(n - 1),
@@ -8,7 +8,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     sum(n) * 2,
     2 * sum(n),
     sum(n) / 2,
@@ -18,7 +18,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     min(n) + 1,
     1 + min(n),
     min(n) - 1,
@@ -28,7 +28,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     min(n) * 2,
     2 * min(n),
     min(n) / 2,
@@ -38,7 +38,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     max(n) + 1,
     1 + max(n),
     max(n) - 1,
@@ -48,7 +48,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     max(n) * 2,
     2 * max(n),
     max(n) / 2,
@@ -58,7 +58,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     sum(n + -1),
     sum(-1 + n),
     sum(n - -1),
@@ -68,7 +68,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     sum(n) * -2,
     -2 * sum(n),
     sum(n) / -2,
@@ -78,7 +78,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     min(n) + -1,
     -1 + min(n),
     min(n) - -1,
@@ -88,7 +88,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     max(n) * -2,
     -2 * max(n),
     max(n) / -2,
@@ -98,7 +98,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     max(n) + -1,
     -1 + max(n),
     max(n) - -1,
@@ -108,7 +108,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     min(n) * -2,
     -2 * min(n),
     min(n) / -2,
@@ -118,7 +118,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     sum(abs(2) + 1),
     sum(abs(2) + n),
     sum(n - abs(2)),
@@ -128,7 +128,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     sum(abs(2)) * 2,
     sum(abs(2) * n),
     sum(n / abs(2)),
@@ -138,7 +138,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     min(abs(2)) + 1,
     min(abs(2) + n),
     min(n - abs(2)),
@@ -148,7 +148,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     min(abs(2)) * 2,
     min(abs(2) * n),
     min(n / abs(2)),
@@ -158,7 +158,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     max(abs(2)) + 1,
     max(abs(2) + n),
     max(n - abs(2)),
@@ -168,7 +168,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     max(abs(2)) * 2,
     max(abs(2) * n),
     max(n / abs(2)),
@@ -178,7 +178,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     sum(abs(n) + 1),
     sum(abs(n) + n),
     sum(n - abs(n)),
@@ -188,7 +188,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     sum(abs(n)) * 2,
     sum(abs(n) * n),
     sum(n / abs(n)),
@@ -198,7 +198,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     min(abs(n)) + 1,
     min(abs(n) + n),
     min(n - abs(n)),
@@ -208,7 +208,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     min(abs(n)) * 2,
     min(abs(n) * n),
     min(n / abs(n)),
@@ -218,7 +218,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     max(abs(n)) + 1,
     max(abs(n) + n),
     max(n - abs(n)),
@@ -228,7 +228,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     max(abs(n)) * 2,
     max(abs(n) * n),
     max(n / abs(n)),
@@ -238,7 +238,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     sum((n * n) + 1),
     sum(1 + (n * n)),
     sum((n * n) - 1),
@@ -248,7 +248,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     sum(n * n) * 2,
     sum((2 * n) * n),
     sum(n * n) / 2,
@@ -258,7 +258,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     min(n * n) + 1,
     1 + min(n * n),
     min(n * n) - 1,
@@ -268,7 +268,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     min(n * n) * 2,
     min((2 * n) * n),
     min(n * n) / 2,
@@ -278,7 +278,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     max(n * n) + 1,
     1 + max(n * n),
     max(n * n) - 1,
@@ -288,7 +288,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     max(n * n) * 2,
     max((2 * n) * n),
     max(n * n) / 2,
@@ -298,7 +298,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     sum((1 + n) + 1),
     sum((1 + 1) + n),
     sum((1 + n) - 1),
@@ -308,7 +308,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     sum(1 + (n * 2)),
     sum(1 + (2 * n)),
     sum(1 + (n / 2)),
@@ -318,7 +318,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     (1 + min(n)) + 1,
     min((1 + 1) + n),
     (1 + min(n)) - 1,
@@ -328,7 +328,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     1 + min(n * 2),
     1 + min(2 * n),
     1 + min(n / 2),
@@ -338,7 +338,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     (1 + max(n)) + 1,
     max((1 + 1) + n),
     (1 + max(n)) - 1,
@@ -348,7 +348,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     1 + max(n * 2),
     1 + max(2 * n),
     1 + max(n / 2),
@@ -358,7 +358,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     sum((n + -1) + -1),
     sum((-1 + n) + -1),
     sum((n - -1) + -1),
@@ -368,7 +368,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     (sum(n) * -2) * -1,
     (-2 * sum(n)) * -1,
     (sum(n) / -2) / -1,
@@ -378,7 +378,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     (min(n) + -1) + -1,
     (-1 + min(n)) + -1,
     (min(n) - -1) + -1,
@@ -388,7 +388,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     (min(n) * -2) * -1,
     (-2 * min(n)) * -1,
     (min(n) / -2) / -1,
@@ -398,7 +398,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     (max(n) + -1) + -1,
     (-1 + max(n)) + -1,
     (max(n) - -1) + -1,
@@ -408,7 +408,7 @@ FROM
     SELECT number AS n
     FROM numbers(10)
 )
-SELECT 
+SELECT
     (max(n) * -2) * -1,
     (-2 * max(n)) * -1,
     (max(n) / -2) / -1,
diff --git a/tests/queries/0_stateless/01272_totals_and_filter_bug.sql b/tests/queries/0_stateless/01272_totals_and_filter_bug.sql
index a6082ae9f11..de751eb73bd 100644
--- a/tests/queries/0_stateless/01272_totals_and_filter_bug.sql
+++ b/tests/queries/0_stateless/01272_totals_and_filter_bug.sql
@@ -8,13 +8,13 @@ CREATE TABLE bar (server_date Date, dimension_1 String, metric_2 UInt32) ENGINE
 INSERT INTO foo VALUES ('2020-01-01', 'test1', 10), ('2020-01-01', 'test2', 20);
 INSERT INTO bar VALUES ('2020-01-01', 'test2', 30), ('2020-01-01', 'test3', 40);
 
-SELECT 
+SELECT
     dimension_1, 
     sum_metric_1, 
     sum_metric_2
 FROM 
 (
-    SELECT 
+    SELECT
         dimension_1, 
         sum(metric_1) AS sum_metric_1
     FROM foo
@@ -23,7 +23,7 @@ FROM
 ) AS subquery_1
 ALL FULL OUTER JOIN 
 (
-    SELECT 
+    SELECT
         dimension_1, 
         sum(metric_2) AS sum_metric_2
     FROM bar
diff --git a/tests/queries/0_stateless/01278_format_multiple_queries.reference b/tests/queries/0_stateless/01278_format_multiple_queries.reference
index b12e3b30f0c..001b10b0990 100644
--- a/tests/queries/0_stateless/01278_format_multiple_queries.reference
+++ b/tests/queries/0_stateless/01278_format_multiple_queries.reference
@@ -1,4 +1,4 @@
-SELECT 
+SELECT
     a,
     b AS x
 FROM table AS t
@@ -6,7 +6,7 @@ INNER JOIN table2 AS t2 ON t.id = t2.t_id
 WHERE 1 = 1
 ;
 
-SELECT 
+SELECT
     a,
     b AS x,
     if(x = 0, a, b)
diff --git a/tests/queries/0_stateless/01300_group_by_other_keys.reference b/tests/queries/0_stateless/01300_group_by_other_keys.reference
index bd2372dca93..1db34d884a3 100644
--- a/tests/queries/0_stateless/01300_group_by_other_keys.reference
+++ b/tests/queries/0_stateless/01300_group_by_other_keys.reference
@@ -24,7 +24,7 @@
 3465735.9028
 SELECT max(log(2) * number) AS k
 FROM numbers(10000000)
-GROUP BY 
+GROUP BY
     number % 2,
     number % 3,
     ((number % 2) + (number % 3)) % 2
@@ -35,19 +35,19 @@ GROUP BY number % 5
 ORDER BY k ASC
 SELECT avg(log(2) * number) AS k
 FROM numbers(10000000)
-GROUP BY 
+GROUP BY
     (number % 2) * (number % 3),
     number % 3
 ORDER BY k ASC
 SELECT avg(log(2) * number) AS k
 FROM numbers(10000000)
-GROUP BY 
+GROUP BY
     number % 3,
     number % 2
 ORDER BY k ASC
 SELECT avg(log(2) * number) AS k
 FROM numbers(10000000)
-GROUP BY 
+GROUP BY
     (number % 2) % 3,
     number % 2
 ORDER BY k ASC
@@ -77,33 +77,33 @@ ORDER BY k ASC
 3465735.9028
 SELECT max(log(2) * number) AS k
 FROM numbers(10000000)
-GROUP BY 
+GROUP BY
     number % 2,
     number % 3,
     ((number % 2) + (number % 3)) % 2
 ORDER BY k ASC
 SELECT avg(log(2) * number) AS k
 FROM numbers(10000000)
-GROUP BY 
+GROUP BY
     number % 5,
     (number % 5) * (number % 5)
 ORDER BY k ASC
 SELECT avg(log(2) * number) AS k
 FROM numbers(10000000)
-GROUP BY 
+GROUP BY
     (number % 2) * (number % 3),
     number % 3
 ORDER BY k ASC
 SELECT avg(log(2) * number) AS k
 FROM numbers(10000000)
-GROUP BY 
+GROUP BY
     (number % 2) * (number % 3),
     number % 3,
     number % 2
 ORDER BY k ASC
 SELECT avg(log(2) * number) AS k
 FROM numbers(10000000)
-GROUP BY 
+GROUP BY
     (number % 2) % 3,
     number % 2
 ORDER BY k ASC
diff --git a/tests/queries/0_stateless/01300_group_by_other_keys_having.reference b/tests/queries/0_stateless/01300_group_by_other_keys_having.reference
index 0bec0ebdf9b..a470c19a244 100644
--- a/tests/queries/0_stateless/01300_group_by_other_keys_having.reference
+++ b/tests/queries/0_stateless/01300_group_by_other_keys_having.reference
@@ -9,7 +9,7 @@
 4
 SELECT avg(log(2) * number) AS k
 FROM numbers(10000000)
-GROUP BY 
+GROUP BY
     number % 3,
     number % 2
 HAVING avg(log(2) * number) > 3465735.3
@@ -35,7 +35,7 @@ ORDER BY k ASC
 4
 SELECT avg(log(2) * number) AS k
 FROM numbers(10000000)
-GROUP BY 
+GROUP BY
     (number % 2) * (number % 3),
     number % 3,
     number % 2
@@ -44,14 +44,14 @@ ORDER BY k ASC
 SELECT avg(log(2) * number) AS k
 FROM numbers(10000000)
 WHERE ((number % 5) * (number % 5)) < 5
-GROUP BY 
+GROUP BY
     number % 5,
     (number % 5) * (number % 5)
 ORDER BY k ASC
 SELECT (number % 5) * (number % 5) AS k
 FROM numbers(10000000)
 WHERE ((number % 5) * (number % 5)) < 5
-GROUP BY 
+GROUP BY
     number % 5,
     (number % 5) * (number % 5)
 ORDER BY k ASC
diff --git a/tests/queries/0_stateless/01321_aggregate_functions_of_group_by_keys.reference b/tests/queries/0_stateless/01321_aggregate_functions_of_group_by_keys.reference
index 875a6753f84..92d6e5c37e6 100644
--- a/tests/queries/0_stateless/01321_aggregate_functions_of_group_by_keys.reference
+++ b/tests/queries/0_stateless/01321_aggregate_functions_of_group_by_keys.reference
@@ -47,29 +47,29 @@
 24
 0
 0
-SELECT 
+SELECT
     number % 2 AS a,
     number % 3 AS b
 FROM numbers(10000000)
-GROUP BY 
+GROUP BY
     number % 2,
     number % 3
-ORDER BY 
+ORDER BY
     min(number % 2) AS a ASC,
     max(number % 3) AS b ASC
-SELECT 
+SELECT
     number % 2 AS a,
     number % 3 AS b
 FROM numbers(10000000)
-GROUP BY 
+GROUP BY
     number % 2,
     number % 3
-ORDER BY 
+ORDER BY
     any(number % 2) AS a ASC,
     anyLast(number % 3) AS b ASC
 SELECT (number % 5) * (number % 7) AS a
 FROM numbers(10000000)
-GROUP BY 
+GROUP BY
     number % 7,
     number % 5
 ORDER BY max((number % 5) * (number % 7)) AS a ASC
@@ -128,29 +128,29 @@ FROM
 20
 24
 0
-SELECT 
+SELECT
     min(number % 2) AS a,
     max(number % 3) AS b
 FROM numbers(10000000)
-GROUP BY 
+GROUP BY
     number % 2,
     number % 3
-ORDER BY 
+ORDER BY
     a ASC,
     b ASC
-SELECT 
+SELECT
     any(number % 2) AS a,
     anyLast(number % 3) AS b
 FROM numbers(10000000)
-GROUP BY 
+GROUP BY
     number % 2,
     number % 3
-ORDER BY 
+ORDER BY
     a ASC,
     b ASC
 SELECT max((number % 5) * (number % 7)) AS a
 FROM numbers(10000000)
-GROUP BY 
+GROUP BY
     number % 7,
     number % 5
 ORDER BY a ASC
diff --git a/tests/queries/0_stateless/01321_monotonous_functions_in_order_by.reference b/tests/queries/0_stateless/01321_monotonous_functions_in_order_by.reference
index e8e7d754ed9..ffa91586f35 100644
--- a/tests/queries/0_stateless/01321_monotonous_functions_in_order_by.reference
+++ b/tests/queries/0_stateless/01321_monotonous_functions_in_order_by.reference
@@ -54,12 +54,12 @@ FROM numbers(3)
 ORDER BY exp(number) ASC
 SELECT roundToExp2(number) AS x
 FROM numbers(3)
-ORDER BY 
+ORDER BY
     number ASC,
     number ASC
 SELECT number AS x
 FROM numbers(3)
-ORDER BY 
+ORDER BY
     number ASC,
     number ASC
 SELECT number
@@ -79,7 +79,7 @@ FROM numbers(3)
 ORDER BY exp(number) DESC
 SELECT roundToExp2(number) AS x
 FROM numbers(3)
-ORDER BY 
+ORDER BY
     number DESC,
     number DESC
 0
@@ -138,12 +138,12 @@ FROM numbers(3)
 ORDER BY exp(number) ASC
 SELECT roundToExp2(number) AS x
 FROM numbers(3)
-ORDER BY 
+ORDER BY
     x ASC,
     toFloat32(x) ASC
 SELECT number AS x
 FROM numbers(3)
-ORDER BY 
+ORDER BY
     toFloat32(x) AS k ASC,
     toFloat64(k) ASC
 SELECT number
@@ -163,6 +163,6 @@ FROM numbers(3)
 ORDER BY exp(number) DESC
 SELECT roundToExp2(number) AS x
 FROM numbers(3)
-ORDER BY 
+ORDER BY
     x DESC,
     toFloat32(x) DESC
diff --git a/tests/queries/0_stateless/01322_any_input_optimize.reference b/tests/queries/0_stateless/01322_any_input_optimize.reference
index c02c9fbeae4..f88f2f5937c 100644
--- a/tests/queries/0_stateless/01322_any_input_optimize.reference
+++ b/tests/queries/0_stateless/01322_any_input_optimize.reference
@@ -8,7 +8,7 @@ WITH any(number) * 3 AS x
 SELECT x
 FROM numbers(1, 2)
 3
-SELECT 
+SELECT
     anyLast(number) * 3 AS x,
     x
 FROM numbers(1, 2)
@@ -23,7 +23,7 @@ WITH any(number * 3) AS x
 SELECT x
 FROM numbers(1, 2)
 3
-SELECT 
+SELECT
     anyLast(number * 3) AS x,
     x
 FROM numbers(1, 2)
diff --git a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference
index 8751f269c4a..fd52438e9b0 100644
--- a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference
+++ b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference
@@ -34,11 +34,11 @@ FROM
 (
     SELECT number AS x
     FROM numbers(3)
-    ORDER BY 
+    ORDER BY
         exp(x) ASC,
         x ASC
 )
-SELECT 
+SELECT
     key,
     a,
     b,
@@ -49,17 +49,17 @@ FROM
     FROM numbers(4)
 ) AS s
 ALL FULL OUTER JOIN test AS t USING (key)
-ORDER BY 
+ORDER BY
     key ASC,
     t.key ASC
-SELECT 
+SELECT
     key,
     a
 FROM test
-ORDER BY 
+ORDER BY
     key ASC,
     a ASC
-SELECT 
+SELECT
     key,
     a
 FROM test
@@ -86,7 +86,7 @@ FROM
 (
     SELECT number AS x
     FROM numbers(3)
-    ORDER BY 
+    ORDER BY
         x ASC,
         exp(x) ASC
 )
@@ -95,7 +95,7 @@ FROM
 (
     SELECT number AS x
     FROM numbers(3)
-    ORDER BY 
+    ORDER BY
         x ASC,
         exp(exp(x)) ASC
 )
@@ -104,11 +104,11 @@ FROM
 (
     SELECT number AS x
     FROM numbers(3)
-    ORDER BY 
+    ORDER BY
         exp(x) ASC,
         x ASC
 )
-SELECT 
+SELECT
     key,
     a,
     b,
@@ -119,21 +119,21 @@ FROM
     FROM numbers(4)
 ) AS s
 ALL FULL OUTER JOIN test AS t USING (key)
-ORDER BY 
+ORDER BY
     key ASC,
     t.key ASC
-SELECT 
+SELECT
     key,
     a
 FROM test
-ORDER BY 
+ORDER BY
     key ASC,
     a ASC,
     exp(key + a) ASC
-SELECT 
+SELECT
     key,
     a
 FROM test
-ORDER BY 
+ORDER BY
     key ASC,
     exp(key + a) ASC
diff --git a/tests/queries/0_stateless/01355_defaultValueOfArgumentType_bug.sql b/tests/queries/0_stateless/01355_defaultValueOfArgumentType_bug.sql
index e3168eb09a0..2313cb686a4 100644
--- a/tests/queries/0_stateless/01355_defaultValueOfArgumentType_bug.sql
+++ b/tests/queries/0_stateless/01355_defaultValueOfArgumentType_bug.sql
@@ -1,4 +1,4 @@
-SELECT 
+SELECT
     materialize(toLowCardinality('')) AS lc,
     toTypeName(lc)
 WHERE lc = defaultValueOfArgumentType(lc)
diff --git a/tests/queries/0_stateless/01372_wrong_order_by_removal.reference b/tests/queries/0_stateless/01372_wrong_order_by_removal.reference
index 8ed303c04aa..f1f1bcef6e5 100644
--- a/tests/queries/0_stateless/01372_wrong_order_by_removal.reference
+++ b/tests/queries/0_stateless/01372_wrong_order_by_removal.reference
@@ -1,14 +1,14 @@
-SELECT 
+SELECT
     k,
     groupArrayMovingSum(v)
 FROM 
 (
-    SELECT 
+    SELECT
         k,
         dt,
         v
     FROM moving_sum_num
-    ORDER BY 
+    ORDER BY
         k ASC,
         dt ASC
 )
diff --git a/tests/queries/0_stateless/01379_with_fill_several_columns.sql b/tests/queries/0_stateless/01379_with_fill_several_columns.sql
index 5d1cb4e6828..f98431b61b9 100644
--- a/tests/queries/0_stateless/01379_with_fill_several_columns.sql
+++ b/tests/queries/0_stateless/01379_with_fill_several_columns.sql
@@ -1,21 +1,21 @@
-SELECT 
+SELECT
     toDate((number * 10) * 86400) AS d1, 
     toDate(number * 86400) AS d2, 
     'original' AS source
 FROM numbers(10)
 WHERE (number % 3) = 1
-ORDER BY 
+ORDER BY
     d2 WITH FILL, 
     d1 WITH FILL STEP 5;
 
 SELECT '===============';
 
-SELECT 
+SELECT
     toDate((number * 10) * 86400) AS d1, 
     toDate(number * 86400) AS d2, 
     'original' AS source
 FROM numbers(10)
 WHERE (number % 3) = 1
-ORDER BY 
+ORDER BY
     d1 WITH FILL STEP 5,
     d2 WITH FILL;
\ No newline at end of file
diff --git a/tests/queries/0_stateless/01390_remove_injective_in_uniq.reference b/tests/queries/0_stateless/01390_remove_injective_in_uniq.reference
index 8fe2933b95e..94e1dbc5da7 100644
--- a/tests/queries/0_stateless/01390_remove_injective_in_uniq.reference
+++ b/tests/queries/0_stateless/01390_remove_injective_in_uniq.reference
@@ -1,4 +1,4 @@
-SELECT 
+SELECT
     uniq(x),
     uniqExact(x),
     uniqHLL12(x),
@@ -9,7 +9,7 @@ FROM
     SELECT number % 2 AS x
     FROM numbers(10)
 )
-SELECT 
+SELECT
     uniq(x + y),
     uniqExact(x + y),
     uniqHLL12(x + y),
@@ -17,12 +17,12 @@ SELECT
     uniqCombined64(x + y)
 FROM 
 (
-    SELECT 
+    SELECT
         number % 2 AS x,
         number % 3 AS y
     FROM numbers(10)
 )
-SELECT 
+SELECT
     uniq(x),
     uniqExact(x),
     uniqHLL12(x),
@@ -33,7 +33,7 @@ FROM
     SELECT number % 2 AS x
     FROM numbers(10)
 )
-SELECT 
+SELECT
     uniq(x),
     uniqExact(x),
     uniqHLL12(x),
@@ -44,7 +44,7 @@ FROM
     SELECT number % 2 AS x
     FROM numbers(10)
 )
-SELECT 
+SELECT
     uniq(x),
     uniqExact(x),
     uniqHLL12(x),
@@ -55,7 +55,7 @@ FROM
     SELECT number % 2 AS x
     FROM numbers(10)
 )
-SELECT 
+SELECT
     uniq(x),
     uniqExact(x),
     uniqHLL12(x),
@@ -74,7 +74,7 @@ FROM
 )
 SELECT uniq(concatAssumeInjective(\'x\', \'y\'))
 FROM numbers(10)
-SELECT 
+SELECT
     uniq(x),
     uniqExact(x),
     uniqHLL12(x),
@@ -85,7 +85,7 @@ FROM
     SELECT number % 2 AS x
     FROM numbers(10)
 )
-SELECT 
+SELECT
     uniq(x + y),
     uniqExact(x + y),
     uniqHLL12(x + y),
@@ -93,12 +93,12 @@ SELECT
     uniqCombined64(x + y)
 FROM 
 (
-    SELECT 
+    SELECT
         number % 2 AS x,
         number % 3 AS y
     FROM numbers(10)
 )
-SELECT 
+SELECT
     uniq(-x),
     uniqExact(-x),
     uniqHLL12(-x),
@@ -109,7 +109,7 @@ FROM
     SELECT number % 2 AS x
     FROM numbers(10)
 )
-SELECT 
+SELECT
     uniq(bitNot(x)),
     uniqExact(bitNot(x)),
     uniqHLL12(bitNot(x)),
@@ -120,7 +120,7 @@ FROM
     SELECT number % 2 AS x
     FROM numbers(10)
 )
-SELECT 
+SELECT
     uniq(bitNot(-x)),
     uniqExact(bitNot(-x)),
     uniqHLL12(bitNot(-x)),
@@ -131,7 +131,7 @@ FROM
     SELECT number % 2 AS x
     FROM numbers(10)
 )
-SELECT 
+SELECT
     uniq(-bitNot(-x)),
     uniqExact(-bitNot(-x)),
     uniqHLL12(-bitNot(-x)),
diff --git a/tests/queries/0_stateless/01414_push_predicate_when_contains_with_clause.reference b/tests/queries/0_stateless/01414_push_predicate_when_contains_with_clause.reference
index a2ee0336191..13c8fe551c7 100644
--- a/tests/queries/0_stateless/01414_push_predicate_when_contains_with_clause.reference
+++ b/tests/queries/0_stateless/01414_push_predicate_when_contains_with_clause.reference
@@ -1,12 +1,12 @@
 999	1998
 999	1998
-SELECT 
+SELECT
     number,
     square_number
 FROM 
 (
     WITH number * 2 AS square_number
-    SELECT 
+    SELECT
         number,
         square_number
     FROM numbers_indexed
diff --git a/tests/queries/0_stateless/01418_index_analysis_bug.sql b/tests/queries/0_stateless/01418_index_analysis_bug.sql
index c5033ac7d96..aae76b63b7d 100644
--- a/tests/queries/0_stateless/01418_index_analysis_bug.sql
+++ b/tests/queries/0_stateless/01418_index_analysis_bug.sql
@@ -9,13 +9,13 @@ ENGINE = MergeTree()
 PARTITION BY toYYYYMM(eventday)
 ORDER BY (eventday, user_id);
 
-INSERT INTO mytable_local SELECT 
+INSERT INTO mytable_local SELECT
     toDateTime('2020-06-01 00:00:00') + toIntervalMinute(number) AS created,
     toDate(created) AS eventday,
     if((number % 100) > 50, 742522, number % 32141) AS user_id
 FROM numbers(100000);
 
-SELECT 
+SELECT
     eventday,
     count(*)
 FROM mytable_local
diff --git a/tests/queries/0_stateless/01455_duplicate_distinct_optimization.reference b/tests/queries/0_stateless/01455_duplicate_distinct_optimization.reference
index c5a06ca0cd3..2c54899f9f5 100644
--- a/tests/queries/0_stateless/01455_duplicate_distinct_optimization.reference
+++ b/tests/queries/0_stateless/01455_duplicate_distinct_optimization.reference
@@ -9,7 +9,7 @@ FROM
 SELECT DISTINCT number * 2
 FROM 
 (
-    SELECT DISTINCT 
+    SELECT DISTINCT
         number * 2,
         number
     FROM numbers(1)
@@ -20,12 +20,12 @@ FROM
     SELECT DISTINCT number * 2 AS number
     FROM numbers(1)
 )
-SELECT 
+SELECT
     b,
     a
 FROM 
 (
-    SELECT DISTINCT 
+    SELECT DISTINCT
         number % 2 AS a,
         number % 3 AS b
     FROM numbers(100)
@@ -33,7 +33,7 @@ FROM
 SELECT DISTINCT a
 FROM 
 (
-    SELECT DISTINCT 
+    SELECT DISTINCT
         number % 2 AS a,
         number % 3 AS b
     FROM numbers(100)
@@ -44,7 +44,7 @@ FROM
     SELECT DISTINCT a
     FROM 
     (
-        SELECT DISTINCT 
+        SELECT DISTINCT
             number % 2 AS a,
             number % 3 AS b
         FROM numbers(100)
@@ -53,45 +53,45 @@ FROM
 SELECT DISTINCT a
 FROM 
 (
-    SELECT 
+    SELECT
         a,
         b
     FROM 
     (
-        SELECT DISTINCT 
+        SELECT DISTINCT
             number % 2 AS a,
             number % 3 AS b
         FROM numbers(100)
     )
 )
-SELECT 
+SELECT
     a,
     b
 FROM 
 (
-    SELECT 
+    SELECT
         b,
         a
     FROM 
     (
-        SELECT DISTINCT 
+        SELECT DISTINCT
             number AS a,
             number AS b
         FROM numbers(1)
     )
 )
-SELECT 
+SELECT
     a,
     b
 FROM 
 (
-    SELECT 
+    SELECT
         b,
         a,
         a + b
     FROM 
     (
-        SELECT DISTINCT 
+        SELECT DISTINCT
             number % 2 AS a,
             number % 3 AS b
         FROM numbers(100)
@@ -103,7 +103,7 @@ FROM
     SELECT a
     FROM 
     (
-        SELECT DISTINCT 
+        SELECT DISTINCT
             number % 2 AS a,
             number % 3 AS b
         FROM numbers(100)
diff --git a/tests/queries/0_stateless/01470_columns_transformers.reference b/tests/queries/0_stateless/01470_columns_transformers.reference
index 595d99b917f..c0f02e51ccf 100644
--- a/tests/queries/0_stateless/01470_columns_transformers.reference
+++ b/tests/queries/0_stateless/01470_columns_transformers.reference
@@ -9,54 +9,54 @@
 222	18	347
 111	11	173.5
 1970-04-11	1970-01-11	1970-11-21
-SELECT 
+SELECT
     sum(i),
     sum(j),
     sum(k)
 FROM columns_transformers
-SELECT 
+SELECT
     avg(i),
     avg(j),
     avg(k)
 FROM columns_transformers
-SELECT 
+SELECT
     toDate(any(i)),
     toDate(any(j)),
     toDate(any(k))
 FROM columns_transformers AS a
-SELECT 
+SELECT
     length(toString(j)),
     length(toString(k))
 FROM columns_transformers
-SELECT 
+SELECT
     sum(j),
     sum(k)
 FROM columns_transformers
-SELECT 
+SELECT
     avg(i),
     avg(k)
 FROM columns_transformers
-SELECT 
+SELECT
     toDate(any(i)),
     toDate(any(j)),
     toDate(any(k))
 FROM columns_transformers AS a
-SELECT 
+SELECT
     sum(i + 1 AS i),
     sum(j),
     sum(k)
 FROM columns_transformers
-SELECT 
+SELECT
     avg(i + 1 AS i),
     avg(j + 2 AS j),
     avg(k)
 FROM columns_transformers
-SELECT 
+SELECT
     toDate(any(i)),
     toDate(any(j)),
     toDate(any(k))
 FROM columns_transformers AS a
-SELECT 
+SELECT
     (i + 1) + 1 AS i,
     j,
     k
diff --git a/tests/queries/1_stateful/00063_loyalty_joins.sql b/tests/queries/1_stateful/00063_loyalty_joins.sql
index b2491346673..7713c65838c 100644
--- a/tests/queries/1_stateful/00063_loyalty_joins.sql
+++ b/tests/queries/1_stateful/00063_loyalty_joins.sql
@@ -1,12 +1,12 @@
 SET any_join_distinct_right_table_keys = 1;
 SET joined_subquery_requires_alias = 0;
 
-SELECT 
+SELECT
     loyalty, 
     count()
 FROM test.hits ANY LEFT JOIN 
 (
-    SELECT 
+    SELECT
         UserID, 
         sum(SearchEngineID = 2) AS yandex, 
         sum(SearchEngineID = 3) AS google, 
@@ -20,7 +20,7 @@ GROUP BY loyalty
 ORDER BY loyalty ASC;
 
 
-SELECT 
+SELECT
     loyalty, 
     count()
 FROM 
@@ -29,7 +29,7 @@ FROM
     FROM test.hits
 ) ANY LEFT JOIN 
 (
-    SELECT 
+    SELECT
         UserID, 
         sum(SearchEngineID = 2) AS yandex, 
         sum(SearchEngineID = 3) AS google, 
@@ -43,12 +43,12 @@ GROUP BY loyalty
 ORDER BY loyalty ASC;
 
 
-SELECT 
+SELECT
     loyalty, 
     count()
 FROM 
 (
-    SELECT 
+    SELECT
         loyalty, 
         UserID
     FROM 
@@ -57,7 +57,7 @@ FROM
         FROM test.hits
     ) ANY LEFT JOIN 
     (
-        SELECT 
+        SELECT
             UserID, 
             sum(SearchEngineID = 2) AS yandex, 
             sum(SearchEngineID = 3) AS google, 
@@ -72,18 +72,18 @@ GROUP BY loyalty
 ORDER BY loyalty ASC;
 
 
-SELECT 
+SELECT
     loyalty, 
     count() AS c, 
     bar(log(c + 1) * 1000, 0, log(3000000) * 1000, 80)
 FROM test.hits ANY INNER JOIN 
 (
-    SELECT 
+    SELECT
         UserID, 
         toInt8(if(yandex > google, yandex / (yandex + google), -google / (yandex + google)) * 10) AS loyalty
     FROM 
     (
-        SELECT 
+        SELECT
             UserID, 
             sum(SearchEngineID = 2) AS yandex, 
             sum(SearchEngineID = 3) AS google

From 11ba7049c0ad5be7aee7d6e96a73b375c8ab3b99 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 8 Sep 2020 23:33:08 +0300
Subject: [PATCH 152/298] Cover WITH format

---
 tests/queries/0_stateless/01471_with_format.reference | 6 ++++++
 tests/queries/0_stateless/01471_with_format.sql       | 2 ++
 2 files changed, 8 insertions(+)
 create mode 100644 tests/queries/0_stateless/01471_with_format.reference
 create mode 100644 tests/queries/0_stateless/01471_with_format.sql

diff --git a/tests/queries/0_stateless/01471_with_format.reference b/tests/queries/0_stateless/01471_with_format.reference
new file mode 100644
index 00000000000..806ec312bb4
--- /dev/null
+++ b/tests/queries/0_stateless/01471_with_format.reference
@@ -0,0 +1,6 @@
+WITH 1
+SELECT 1
+WITH
+    1,
+    2
+SELECT 1
diff --git a/tests/queries/0_stateless/01471_with_format.sql b/tests/queries/0_stateless/01471_with_format.sql
new file mode 100644
index 00000000000..60f6fe4135a
--- /dev/null
+++ b/tests/queries/0_stateless/01471_with_format.sql
@@ -0,0 +1,2 @@
+EXPLAIN SYNTAX WITH 1 SELECT 1;
+EXPLAIN SYNTAX WITH 1, 2 SELECT 1;

From 68c441e07b0072d7ffcd71491dcee219b2d90b62 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 8 Sep 2020 23:54:46 +0300
Subject: [PATCH 153/298] Add LIMIT BY format test

---
 .../0_stateless/01471_limit_by_format.reference        | 10 ++++++++++
 tests/queries/0_stateless/01471_limit_by_format.sql    |  2 ++
 2 files changed, 12 insertions(+)
 create mode 100644 tests/queries/0_stateless/01471_limit_by_format.reference
 create mode 100644 tests/queries/0_stateless/01471_limit_by_format.sql

diff --git a/tests/queries/0_stateless/01471_limit_by_format.reference b/tests/queries/0_stateless/01471_limit_by_format.reference
new file mode 100644
index 00000000000..aeab30435c0
--- /dev/null
+++ b/tests/queries/0_stateless/01471_limit_by_format.reference
@@ -0,0 +1,10 @@
+SELECT dummy
+FROM system.one
+LIMIT 1 BY dummy
+LIMIT 1
+SELECT dummy
+FROM system.one
+LIMIT 1 BY
+    0 + dummy,
+    0 - dummy
+LIMIT 1
diff --git a/tests/queries/0_stateless/01471_limit_by_format.sql b/tests/queries/0_stateless/01471_limit_by_format.sql
new file mode 100644
index 00000000000..a58099a223a
--- /dev/null
+++ b/tests/queries/0_stateless/01471_limit_by_format.sql
@@ -0,0 +1,2 @@
+EXPLAIN SYNTAX SELECT * FROM system.one LIMIT 1 BY * LIMIT 1;
+EXPLAIN SYNTAX SELECT * FROM system.one LIMIT 1 BY 0+dummy, 0-dummy LIMIT 1;

From 73a5745062c5d5e7c5a7622032c3b96160cacde9 Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
Date: Wed, 9 Sep 2020 02:00:24 +0300
Subject: [PATCH 154/298] Update CHANGELOG.md

---
 CHANGELOG.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 950bdc7e374..345ee2c6213 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,14 +22,14 @@
 * Add setting `allow_non_metadata_alters` which restricts to execute `ALTER` queries which modify data on disk. Disabled be default. Closes [#11547](https://github.com/ClickHouse/ClickHouse/issues/11547). [#12635](https://github.com/ClickHouse/ClickHouse/pull/12635) ([alesapin](https://github.com/alesapin)).
 * A function `formatRow` is added to support turning arbitrary expressions into a string via given format. It's useful for manipulating SQL outputs and is quite versatile combined with the `columns` function. [#12574](https://github.com/ClickHouse/ClickHouse/pull/12574) ([Amos Bird](https://github.com/amosbird)).
 * Add `FROM_UNIXTIME` function for compatibility with MySQL, related to [12149](https://github.com/ClickHouse/ClickHouse/issues/12149). [#12484](https://github.com/ClickHouse/ClickHouse/pull/12484) ([flynn](https://github.com/ucasFL)).
-* Allow Nullable types as keys in MergeTree tables if `allow_nullable_key` table setting is enabled. https://github.com/ClickHouse/ClickHouse/issues/5319. [#12433](https://github.com/ClickHouse/ClickHouse/pull/12433) ([Amos Bird](https://github.com/amosbird)).
+* Allow Nullable types as keys in MergeTree tables if `allow_nullable_key` table setting is enabled. Closes [#5319](https://github.com/ClickHouse/ClickHouse/issues/5319). [#12433](https://github.com/ClickHouse/ClickHouse/pull/12433) ([Amos Bird](https://github.com/amosbird)).
 * Integration with [COS](https://intl.cloud.tencent.com/product/cos). [#12386](https://github.com/ClickHouse/ClickHouse/pull/12386) ([fastio](https://github.com/fastio)).
 * Add mapAdd and mapSubtract functions for adding/subtracting key-mapped values. [#11735](https://github.com/ClickHouse/ClickHouse/pull/11735) ([Ildus Kurbangaliev](https://github.com/ildus)).
 
 #### Bug Fix
 
 * Fix premature `ON CLUSTER` timeouts for queries that must be executed on a single replica. Fixes [#6704](https://github.com/ClickHouse/ClickHouse/issues/6704), [#7228](https://github.com/ClickHouse/ClickHouse/issues/7228), [#13361](https://github.com/ClickHouse/ClickHouse/issues/13361), [#11884](https://github.com/ClickHouse/ClickHouse/issues/11884). [#13450](https://github.com/ClickHouse/ClickHouse/pull/13450) ([alesapin](https://github.com/alesapin)).
-* Fix crash in mark inclusion search introduced in https://github.com/ClickHouse/ClickHouse/pull/12277. [#14225](https://github.com/ClickHouse/ClickHouse/pull/14225) ([Amos Bird](https://github.com/amosbird)).
+* Fix crash in mark inclusion search introduced in [#12277](https://github.com/ClickHouse/ClickHouse/pull/12277). [#14225](https://github.com/ClickHouse/ClickHouse/pull/14225) ([Amos Bird](https://github.com/amosbird)).
 * Fix race condition in external dictionaries with cache layout which can lead server crash. [#12566](https://github.com/ClickHouse/ClickHouse/pull/12566) ([alesapin](https://github.com/alesapin)).
 * Fix visible data clobbering by progress bar in client in interactive mode. This fixes [#12562](https://github.com/ClickHouse/ClickHouse/issues/12562) and [#13369](https://github.com/ClickHouse/ClickHouse/issues/13369) and [#13584](https://github.com/ClickHouse/ClickHouse/issues/13584) and fixes [#12964](https://github.com/ClickHouse/ClickHouse/issues/12964). [#13691](https://github.com/ClickHouse/ClickHouse/pull/13691) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Fixed incorrect sorting order for `LowCardinality` columns when ORDER BY multiple columns is used. This fixes [#13958](https://github.com/ClickHouse/ClickHouse/issues/13958). [#14223](https://github.com/ClickHouse/ClickHouse/pull/14223) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
@@ -71,7 +71,7 @@
 * Fix function if with nullable constexpr as cond that is not literal NULL. Fixes [#12463](https://github.com/ClickHouse/ClickHouse/issues/12463). [#13226](https://github.com/ClickHouse/ClickHouse/pull/13226) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Fix assert in `arrayElement` function in case of array elements are Nullable and array subscript is also Nullable. This fixes [#12172](https://github.com/ClickHouse/ClickHouse/issues/12172). [#13224](https://github.com/ClickHouse/ClickHouse/pull/13224) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Fix DateTime64 conversion functions with constant argument. [#13205](https://github.com/ClickHouse/ClickHouse/pull/13205) ([Azat Khuzhin](https://github.com/azat)).
-* Fix parsing row policies from users.xml when names of databases or tables contain dots. This fixes https://github.com/ClickHouse/ClickHouse/issues/5779, https://github.com/ClickHouse/ClickHouse/issues/12527. [#13199](https://github.com/ClickHouse/ClickHouse/pull/13199) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Fix parsing row policies from users.xml when names of databases or tables contain dots. This fixes [#5779](https://github.com/ClickHouse/ClickHouse/issues/5779), [#12527](https://github.com/ClickHouse/ClickHouse/issues/12527). [#13199](https://github.com/ClickHouse/ClickHouse/pull/13199) ([Vitaly Baranov](https://github.com/vitlibar)).
 * Fix access to `redis` dictionary after connection was dropped once. It may happen with `cache` and `direct` dictionary layouts. [#13082](https://github.com/ClickHouse/ClickHouse/pull/13082) ([Anton Popov](https://github.com/CurtizJ)).
 * Fix wrong index analysis with functions. It could lead to some data parts being skipped when reading from `MergeTree` tables. Fixes [#13060](https://github.com/ClickHouse/ClickHouse/issues/13060). Fixes [#12406](https://github.com/ClickHouse/ClickHouse/issues/12406). [#13081](https://github.com/ClickHouse/ClickHouse/pull/13081) ([Anton Popov](https://github.com/CurtizJ)).
 * Fix error `Cannot convert column because it is constant but values of constants are different in source and result` for remote queries which use deterministic functions in scope of query, but not deterministic between queries, like `now()`, `now64()`, `randConstant()`. Fixes [#11327](https://github.com/ClickHouse/ClickHouse/issues/11327). [#13075](https://github.com/ClickHouse/ClickHouse/pull/13075) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
@@ -89,7 +89,7 @@
 * Fixed [#10572](https://github.com/ClickHouse/ClickHouse/issues/10572) fix bloom filter index with const expression. [#12659](https://github.com/ClickHouse/ClickHouse/pull/12659) ([Winter Zhang](https://github.com/zhang2014)).
 * Fix SIGSEGV in StorageKafka when broker is unavailable (and not only). [#12658](https://github.com/ClickHouse/ClickHouse/pull/12658) ([Azat Khuzhin](https://github.com/azat)).
 * Add support for function `if` with `Array(UUID)` arguments. This fixes [#11066](https://github.com/ClickHouse/ClickHouse/issues/11066). [#12648](https://github.com/ClickHouse/ClickHouse/pull/12648) ([alexey-milovidov](https://github.com/alexey-milovidov)).
-* CREATE USER IF NOT EXISTS now doesn't throw exception if the user exists. This fixes https://github.com/ClickHouse/ClickHouse/issues/12507. [#12646](https://github.com/ClickHouse/ClickHouse/pull/12646) ([Vitaly Baranov](https://github.com/vitlibar)).
+* CREATE USER IF NOT EXISTS now doesn't throw exception if the user exists. This fixes [#12507](https://github.com/ClickHouse/ClickHouse/issues/12507). [#12646](https://github.com/ClickHouse/ClickHouse/pull/12646) ([Vitaly Baranov](https://github.com/vitlibar)).
 * Exception `There is no supertype...` can be thrown during `ALTER ... UPDATE` in unexpected cases (e.g. when subtracting from UInt64 column). This fixes [#7306](https://github.com/ClickHouse/ClickHouse/issues/7306). This fixes [#4165](https://github.com/ClickHouse/ClickHouse/issues/4165). [#12633](https://github.com/ClickHouse/ClickHouse/pull/12633) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Fix possible `Pipeline stuck` error for queries with external sorting. Fixes [#12617](https://github.com/ClickHouse/ClickHouse/issues/12617). [#12618](https://github.com/ClickHouse/ClickHouse/pull/12618) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
 * Fix error `Output of TreeExecutor is not sorted` for `OPTIMIZE DEDUPLICATE`. Fixes [#11572](https://github.com/ClickHouse/ClickHouse/issues/11572). [#12613](https://github.com/ClickHouse/ClickHouse/pull/12613) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
@@ -123,7 +123,7 @@
 * Fix assert in `parseDateTimeBestEffort`. This fixes [#12649](https://github.com/ClickHouse/ClickHouse/issues/12649). [#13227](https://github.com/ClickHouse/ClickHouse/pull/13227) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Minor optimization in Processors/PipelineExecutor: breaking out of a loop because it makes sense to do so. [#13058](https://github.com/ClickHouse/ClickHouse/pull/13058) ([Mark Papadakis](https://github.com/markpapadakis)).
 * Support TRUNCATE table without TABLE keyword. [#12653](https://github.com/ClickHouse/ClickHouse/pull/12653) ([Winter Zhang](https://github.com/zhang2014)).
-* Fix explain query format overwrite by default, issue https://github.com/ClickHouse/ClickHouse/issues/12432. [#12541](https://github.com/ClickHouse/ClickHouse/pull/12541) ([BohuTANG](https://github.com/BohuTANG)).
+* Fix explain query format overwrite by default. This fixes [#12541](https://github.com/ClickHouse/ClickHouse/issues/12432). [#12541](https://github.com/ClickHouse/ClickHouse/pull/12541) ([BohuTANG](https://github.com/BohuTANG)).
 * Allow to set JOIN kind and type in more standad way: `LEFT SEMI JOIN` instead of `SEMI LEFT JOIN`. For now both are correct. [#12520](https://github.com/ClickHouse/ClickHouse/pull/12520) ([Artem Zuikov](https://github.com/4ertus2)).
 * Changes default value for `multiple_joins_rewriter_version` to 2. It enables new multiple joins rewriter that knows about column names. [#12469](https://github.com/ClickHouse/ClickHouse/pull/12469) ([Artem Zuikov](https://github.com/4ertus2)).
 * Add several metrics for requests to S3 storages. [#12464](https://github.com/ClickHouse/ClickHouse/pull/12464) ([ianton-ru](https://github.com/ianton-ru)).

From 3632b0bdb326b71a762740f84a3d08ef3a99e26b Mon Sep 17 00:00:00 2001
From: Pavel Kovalenko <jokserfn@yandex-team.ru>
Date: Wed, 9 Sep 2020 02:35:57 +0300
Subject: [PATCH 155/298] Add option to DiskS3 that allows it's usage if S3
 unavailable (#14497)

---
 src/Disks/S3/registerDiskS3.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/S3/registerDiskS3.cpp
index 341ada59631..fbd19ce1cd9 100644
--- a/src/Disks/S3/registerDiskS3.cpp
+++ b/src/Disks/S3/registerDiskS3.cpp
@@ -145,9 +145,12 @@ void registerDiskS3(DiskFactory & factory)
             config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024));
 
         /// This code is used only to check access to the corresponding disk.
-        checkWriteAccess(*s3disk);
-        checkReadAccess(name, *s3disk);
-        checkRemoveAccess(*s3disk);
+        if (!config.getBool(config_prefix + ".skip_access_check", false))
+        {
+            checkWriteAccess(*s3disk);
+            checkReadAccess(name, *s3disk);
+            checkRemoveAccess(*s3disk);
+        }
 
         bool cache_enabled = config.getBool(config_prefix + ".cache_enabled", true);
 

From bee629c971d8f5add8fe4f205aa30f8f4e66375f Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitbar@yandex-team.ru>
Date: Tue, 8 Sep 2020 02:08:42 +0300
Subject: [PATCH 156/298] Use join() instead of detach() for the
 lists_writing_thread in DiskAccessStorage.

---
 src/Access/DiskAccessStorage.cpp | 47 ++++++++++++++------------------
 src/Access/DiskAccessStorage.h   |  5 ++--
 2 files changed, 22 insertions(+), 30 deletions(-)

diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp
index fc80859885d..6162e4aacc2 100644
--- a/src/Access/DiskAccessStorage.cpp
+++ b/src/Access/DiskAccessStorage.cpp
@@ -426,33 +426,41 @@ bool DiskAccessStorage::writeLists()
 void DiskAccessStorage::scheduleWriteLists(EntityType type)
 {
     if (failed_to_write_lists)
-        return;
+        return; /// We don't try to write list files after the first fail.
+                /// The next restart of the server will invoke rebuilding of the list files.
 
-    bool already_scheduled = !types_of_lists_to_write.empty();
     types_of_lists_to_write.insert(type);
 
-    if (already_scheduled)
-        return;
+    if (lists_writing_thread_is_waiting)
+        return; /// If the lists' writing thread is still waiting we can update `types_of_lists_to_write` easily,
+                /// without restarting that thread.
+
+    if (lists_writing_thread.joinable())
+        lists_writing_thread.join();
 
     /// Create the 'need_rebuild_lists.mark' file.
     /// This file will be used later to find out if writing lists is successful or not.
     std::ofstream{getNeedRebuildListsMarkFilePath(directory_path)};
 
-    startListsWritingThread();
+    lists_writing_thread = ThreadFromGlobalPool{&DiskAccessStorage::listsWritingThreadFunc, this};
+    lists_writing_thread_is_waiting = true;
 }
 
 
-void DiskAccessStorage::startListsWritingThread()
+void DiskAccessStorage::listsWritingThreadFunc()
 {
-    if (lists_writing_thread.joinable())
+    std::unique_lock lock{mutex};
+
     {
-        if (!lists_writing_thread_exited)
-            return;
-        lists_writing_thread.detach();
+        /// It's better not to write the lists files too often, that's why we need
+        /// the following timeout.
+        const auto timeout = std::chrono::minutes(1);
+        SCOPE_EXIT({ lists_writing_thread_is_waiting = false; });
+        if (lists_writing_thread_should_exit.wait_for(lock, timeout) != std::cv_status::timeout)
+            return; /// The destructor requires us to exit.
     }
 
-    lists_writing_thread_exited = false;
-    lists_writing_thread = ThreadFromGlobalPool{&DiskAccessStorage::listsWritingThreadFunc, this};
+    writeLists();
 }
 
 
@@ -466,21 +474,6 @@ void DiskAccessStorage::stopListsWritingThread()
 }
 
 
-void DiskAccessStorage::listsWritingThreadFunc()
-{
-    std::unique_lock lock{mutex};
-    SCOPE_EXIT({ lists_writing_thread_exited = true; });
-
-    /// It's better not to write the lists files too often, that's why we need
-    /// the following timeout.
-    const auto timeout = std::chrono::minutes(1);
-    if (lists_writing_thread_should_exit.wait_for(lock, timeout) != std::cv_status::timeout)
-        return; /// The destructor requires us to exit.
-
-    writeLists();
-}
-
-
 /// Reads and parses all the "<id>.sql" files from a specified directory
 /// and then saves the files "users.list", "roles.list", etc. to the same directory.
 bool DiskAccessStorage::rebuildLists()
diff --git a/src/Access/DiskAccessStorage.h b/src/Access/DiskAccessStorage.h
index 11eb1c3b1ad..ed2dc8b1242 100644
--- a/src/Access/DiskAccessStorage.h
+++ b/src/Access/DiskAccessStorage.h
@@ -42,9 +42,8 @@ private:
     void scheduleWriteLists(EntityType type);
     bool rebuildLists();
 
-    void startListsWritingThread();
-    void stopListsWritingThread();
     void listsWritingThreadFunc();
+    void stopListsWritingThread();
 
     void insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, Notifications & notifications);
     void removeNoLock(const UUID & id, Notifications & notifications);
@@ -74,7 +73,7 @@ private:
     bool failed_to_write_lists = false;                          /// Whether writing of the list files has been failed since the recent restart of the server.
     ThreadFromGlobalPool lists_writing_thread;                   /// List files are written in a separate thread.
     std::condition_variable lists_writing_thread_should_exit;    /// Signals `lists_writing_thread` to exit.
-    std::atomic<bool> lists_writing_thread_exited = false;
+    bool lists_writing_thread_is_waiting = false;
     mutable std::list<OnChangedHandler> handlers_by_type[static_cast<size_t>(EntityType::MAX)];
     mutable std::mutex mutex;
 };

From cce970e40cdf1eba81a1d34c6e692ec883d544e2 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitbar@yandex-team.ru>
Date: Tue, 8 Sep 2020 02:09:03 +0300
Subject: [PATCH 157/298] Use join() instead of detach() for loading threads in
 ExternalLoader.

---
 src/Interpreters/ExternalLoader.cpp | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/src/Interpreters/ExternalLoader.cpp b/src/Interpreters/ExternalLoader.cpp
index e8df205760a..dcef36de175 100644
--- a/src/Interpreters/ExternalLoader.cpp
+++ b/src/Interpreters/ExternalLoader.cpp
@@ -893,6 +893,8 @@ private:
             cancelLoading(info);
         }
 
+        putBackFinishedThreadsToPool();
+
         /// All loadings have unique loading IDs.
         size_t loading_id = next_id_counter++;
         info.loading_id = loading_id;
@@ -914,6 +916,21 @@ private:
         }
     }
 
+    void putBackFinishedThreadsToPool()
+    {
+        for (auto loading_id : recently_finished_loadings)
+        {
+            auto it = loading_threads.find(loading_id);
+            if (it != loading_threads.end())
+            {
+                auto thread = std::move(it->second);
+                loading_threads.erase(it);
+                thread.join(); /// It's very likely that `thread` has already finished.
+            }
+        }
+        recently_finished_loadings.clear();
+    }
+
     static void cancelLoading(Info & info)
     {
         if (!info.isLoading())
@@ -1095,12 +1112,11 @@ private:
         }
         min_id_to_finish_loading_dependencies.erase(std::this_thread::get_id());
 
-        auto it = loading_threads.find(loading_id);
-        if (it != loading_threads.end())
-        {
-            it->second.detach();
-            loading_threads.erase(it);
-        }
+        /// Add `loading_id` to the list of recently finished loadings.
+        /// This list is used to later put the threads which finished loading back to the thread pool.
+        /// (We can't put the loading thread back to the thread pool immediately here because at this point
+        /// the loading thread is about to finish but it's not finished yet right now.)
+        recently_finished_loadings.push_back(loading_id);
     }
 
     /// Calculate next update time for loaded_object. Can be called without mutex locking,
@@ -1158,6 +1174,7 @@ private:
     bool always_load_everything = false;
     std::atomic<bool> enable_async_loading = false;
     std::unordered_map<size_t, ThreadFromGlobalPool> loading_threads;
+    std::vector<size_t> recently_finished_loadings;
     std::unordered_map<std::thread::id, size_t> min_id_to_finish_loading_dependencies;
     size_t next_id_counter = 1; /// should always be > 0
     mutable pcg64 rnd_engine{randomSeed()};

From cc2b4014fffba8e047cbfef1aa73f9471f081237 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Wed, 9 Sep 2020 09:32:09 +0300
Subject: [PATCH 158/298] Lower number of threads in binary build

---
 docker/packager/binary/build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index 72adba5d762..7c3de9aaebd 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -18,7 +18,7 @@ ccache --zero-stats ||:
 ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so ||:
 rm -f CMakeCache.txt
 cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DSANITIZE=$SANITIZER $CMAKE_FLAGS ..
-ninja $NINJA_FLAGS clickhouse-bundle
+ninja -j $(($(nproc) / 2)) $NINJA_FLAGS clickhouse-bundle
 mv ./programs/clickhouse* /output
 mv ./src/unit_tests_dbms /output
 find . -name '*.so' -print -exec mv '{}' /output \;

From 0c2ecb53d13f49437ec47d336fbf68ae9e20c2f7 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Wed, 9 Sep 2020 09:35:05 +0300
Subject: [PATCH 159/298] Update arrayIndex.h

---
 src/Functions/array/arrayIndex.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/Functions/array/arrayIndex.h b/src/Functions/array/arrayIndex.h
index f96eb09c861..d4695ec0af5 100644
--- a/src/Functions/array/arrayIndex.h
+++ b/src/Functions/array/arrayIndex.h
@@ -739,10 +739,6 @@ private:
         if (!col_lc)
             return false;
 
-//        assert(checkAndGetColumn<ColumnNullable>(col_lc->getDictionaryPtr().get()));
-//        assert(col_lc->isNullable());
-//        assert(isColumnNullable(*col_lc->getDictionaryPtr().get()));
-
         const auto [null_map_data, null_map_item] = getNullMaps(block, arguments);
 
         const IColumn& col_arg = *block.getByPosition(arguments[1]).column.get();

From 48bf65d63de2cacab0742f79fcfbab499dae384e Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Tue, 8 Sep 2020 23:45:04 -0700
Subject: [PATCH 160/298] StorageReplicatedMergeTree - improve integration test

---
 .../configs/remote_servers.xml                     | 14 --------------
 .../test_replicated_zk_conn_failure/test.py        | 13 +++++++++----
 2 files changed, 9 insertions(+), 18 deletions(-)
 delete mode 100644 tests/integration/test_replicated_zk_conn_failure/configs/remote_servers.xml

diff --git a/tests/integration/test_replicated_zk_conn_failure/configs/remote_servers.xml b/tests/integration/test_replicated_zk_conn_failure/configs/remote_servers.xml
deleted file mode 100644
index 538aa72d386..00000000000
--- a/tests/integration/test_replicated_zk_conn_failure/configs/remote_servers.xml
+++ /dev/null
@@ -1,14 +0,0 @@
-<yandex>
-    <remote_servers>
-        <test_cluster>
-            <shard>
-                <internal_replication>true</internal_replication>
-                <replica>
-                    <default_database>shard_0</default_database>
-                    <host>node1</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-        </test_cluster>
-    </remote_servers>
-</yandex>
diff --git a/tests/integration/test_replicated_zk_conn_failure/test.py b/tests/integration/test_replicated_zk_conn_failure/test.py
index 3f106bd2981..a860716ee82 100644
--- a/tests/integration/test_replicated_zk_conn_failure/test.py
+++ b/tests/integration/test_replicated_zk_conn_failure/test.py
@@ -12,11 +12,11 @@ from helpers.network import PartitionManager
 # 3. Try creating the table and there would be a Poco:Exception.
 # 4. Try creating the table again and there should not be any error
 # that indicates that the Directory for table already exists.
-
-
+# 5. Final step is to restore ZooKeeper connection and verify that
+# the table creation and queries work.
 def test_replicated_zk_conn_failure():
     cluster = ClickHouseCluster(__file__)
-    node1 = cluster.add_instance('node1', main_configs=["configs/remote_servers.xml"], with_zookeeper=True)
+    node1 = cluster.add_instance('node1', with_zookeeper=True)
     try:
         cluster.start()
         node1.query("CREATE DATABASE replica;")
@@ -27,7 +27,7 @@ def test_replicated_zk_conn_failure():
         )
         Engine=ReplicatedMergeTree('/clickhouse/tables/replica/test', 'node1')
         PARTITION BY toYYYYMMDD(event_time)
-        ORDER BY id;'''.format(replica=node1.name)
+        ORDER BY id;'''
         with PartitionManager() as pm:
             pm.drop_instance_zk_connections(node1)
             time.sleep(5)
@@ -41,5 +41,10 @@ def test_replicated_zk_conn_failure():
             # Should not expect any errors related to directory already existing
             # and those should have been already cleaned up during the previous retry.
             assert "Directory for table data data/replica/test/ already exists" not in error
+            # restore ZooKeeper connections.
+            pm.restore_instance_zk_connections(node1)
+            # retry create query and query the table created.
+            node1.query(query_create)
+            assert "0\n" in node1.query('''SELECT count() from replica.test FORMAT TSV''')
     finally:
         cluster.shutdown()

From c34eaf5de3380e8b12f0f6e8b578bb13744660bf Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 9 Sep 2020 10:08:38 +0300
Subject: [PATCH 161/298] Update ci_config and llvm

---
 contrib/llvm            |  2 +-
 tests/ci/ci_config.json | 26 +++++++++++++-------------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/contrib/llvm b/contrib/llvm
index 3d6c7e91676..8f24d507c1c 160000
--- a/contrib/llvm
+++ b/contrib/llvm
@@ -1 +1 @@
-Subproject commit 3d6c7e916760b395908f28a1c885c8334d4fa98b
+Subproject commit 8f24d507c1cfeec66d27f48fe74518fd278e2d25
diff --git a/tests/ci/ci_config.json b/tests/ci/ci_config.json
index 44e9df49216..adb736a8df3 100644
--- a/tests/ci/ci_config.json
+++ b/tests/ci/ci_config.json
@@ -1,7 +1,7 @@
 {
     "build_config": [
         {
-            "compiler": "gcc-9",
+            "compiler": "gcc-10",
             "build-type": "",
             "sanitizer": "",
             "package-type": "deb",
@@ -12,7 +12,7 @@
             "with_coverage": false
         },
         {
-            "compiler": "gcc-9",
+            "compiler": "gcc-10",
             "build-type": "",
             "sanitizer": "",
             "package-type": "performance",
@@ -22,7 +22,7 @@
             "with_coverage": false
         },
         {
-            "compiler": "gcc-9",
+            "compiler": "gcc-10",
             "build-type": "",
             "sanitizer": "",
             "package-type": "binary",
@@ -92,7 +92,7 @@
             "with_coverage": false
         },
         {
-            "compiler": "gcc-9",
+            "compiler": "gcc-10",
             "build-type": "",
             "sanitizer": "",
             "package-type": "deb",
@@ -227,7 +227,7 @@
         },
         "Functional stateful tests (release)": {
             "required_build_properties": {
-                "compiler": "gcc-9",
+                "compiler": "gcc-10",
                 "package_type": "deb",
                 "build_type": "relwithdebuginfo",
                 "sanitizer": "none",
@@ -239,7 +239,7 @@
         },
         "Functional stateful tests (release, DatabaseAtomic)": {
             "required_build_properties": {
-                "compiler": "gcc-9",
+                "compiler": "gcc-10",
                 "package_type": "deb",
                 "build_type": "relwithdebuginfo",
                 "sanitizer": "none",
@@ -311,7 +311,7 @@
         },
         "Functional stateless tests (release)": {
             "required_build_properties": {
-                "compiler": "gcc-9",
+                "compiler": "gcc-10",
                 "package_type": "deb",
                 "build_type": "relwithdebuginfo",
                 "sanitizer": "none",
@@ -323,7 +323,7 @@
         },
         "Functional stateless tests (unbundled)": {
             "required_build_properties": {
-                "compiler": "gcc-9",
+                "compiler": "gcc-10",
                 "package_type": "deb",
                 "build_type": "relwithdebuginfo",
                 "sanitizer": "none",
@@ -335,7 +335,7 @@
         },
         "Functional stateless tests (release, polymorphic parts enabled)": {
             "required_build_properties": {
-                "compiler": "gcc-9",
+                "compiler": "gcc-10",
                 "package_type": "deb",
                 "build_type": "relwithdebuginfo",
                 "sanitizer": "none",
@@ -347,7 +347,7 @@
         },
         "Functional stateless tests (release, DatabaseAtomic)": {
             "required_build_properties": {
-                "compiler": "gcc-9",
+                "compiler": "gcc-10",
                 "package_type": "deb",
                 "build_type": "relwithdebuginfo",
                 "sanitizer": "none",
@@ -443,7 +443,7 @@
         },
         "Compatibility check": {
             "required_build_properties": {
-                "compiler": "gcc-9",
+                "compiler": "gcc-10",
                 "package_type": "deb",
                 "build_type": "relwithdebuginfo",
                 "sanitizer": "none",
@@ -467,7 +467,7 @@
         },
         "Testflows check": {
             "required_build_properties": {
-                "compiler": "gcc-9",
+                "compiler": "gcc-10",
                 "package_type": "deb",
                 "build_type": "relwithdebuginfo",
                 "sanitizer": "none",
@@ -479,7 +479,7 @@
         },
         "Unit tests release gcc": {
             "required_build_properties": {
-                "compiler": "gcc-9",
+                "compiler": "gcc-10",
                 "package_type": "binary",
                 "build_type": "relwithdebuginfo",
                 "sanitizer": "none",

From b40998ca007afbe702768ede9bf5776274347040 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Wed, 9 Sep 2020 15:41:38 +0800
Subject: [PATCH 162/298] Treat query as function argument.

---
 src/Interpreters/QueryNormalizer.cpp     |  2 +-
 src/Parsers/ASTFunction.cpp              | 13 +++++++++++--
 src/Parsers/ASTFunction.h                |  4 +++-
 src/Parsers/ExpressionElementParsers.cpp |  6 ++++--
 src/TableFunctions/TableFunctionView.cpp | 19 ++++++++-----------
 5 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/src/Interpreters/QueryNormalizer.cpp b/src/Interpreters/QueryNormalizer.cpp
index 59233218a50..07d4888b555 100644
--- a/src/Interpreters/QueryNormalizer.cpp
+++ b/src/Interpreters/QueryNormalizer.cpp
@@ -152,7 +152,7 @@ void QueryNormalizer::visitChildren(const ASTPtr & node, Data & data)
 {
     if (const auto * func_node = node->as<ASTFunction>())
     {
-        if (func_node->query)
+        if (func_node->tryGetQueryArgument())
         {
             if (func_node->name != "view")
                 throw Exception("Query argument can only be used in the `view` TableFunction", ErrorCodes::BAD_ARGUMENTS);
diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp
index 07429c8104f..bbd910ae875 100644
--- a/src/Parsers/ASTFunction.cpp
+++ b/src/Parsers/ASTFunction.cpp
@@ -48,7 +48,6 @@ ASTPtr ASTFunction::clone() const
     auto res = std::make_shared<ASTFunction>(*this);
     res->children.clear();
 
-    if (query) { res->query = query->clone(); res->children.push_back(res->query); }
     if (arguments) { res->arguments = arguments->clone(); res->children.push_back(res->arguments); }
     if (parameters) { res->parameters = parameters->clone(); res->children.push_back(res->parameters); }
 
@@ -112,6 +111,16 @@ static bool highlightStringLiteralWithMetacharacters(const ASTPtr & node, const
 }
 
 
+ASTSelectWithUnionQuery * ASTFunction::tryGetQueryArgument() const
+{
+    if (arguments && arguments->children.size() == 1)
+    {
+        return arguments->children[0]->as<ASTSelectWithUnionQuery>();
+    }
+    return nullptr;
+}
+
+
 void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
 {
     FormatStateStacked nested_need_parens = frame;
@@ -119,7 +128,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
     nested_need_parens.need_parens = true;
     nested_dont_need_parens.need_parens = false;
 
-    if (query)
+    if (auto * query = tryGetQueryArgument())
     {
         std::string nl_or_nothing = settings.one_line ? "" : "\n";
         std::string indent_str = settings.one_line ? "" : std::string(4u * frame.indent, ' ');
diff --git a/src/Parsers/ASTFunction.h b/src/Parsers/ASTFunction.h
index b94614426d8..3b87ab68282 100644
--- a/src/Parsers/ASTFunction.h
+++ b/src/Parsers/ASTFunction.h
@@ -2,6 +2,7 @@
 
 #include <Parsers/ASTWithAlias.h>
 #include <Parsers/ASTExpressionList.h>
+#include <Parsers/ASTSelectWithUnionQuery.h>
 
 
 namespace DB
@@ -13,7 +14,6 @@ class ASTFunction : public ASTWithAlias
 {
 public:
     String name;
-    ASTPtr query; // It's possible for a function to accept a query as its only argument.
     ASTPtr arguments;
     /// parameters - for parametric aggregate function. Example: quantile(0.9)(x) - what in first parens are 'parameters'.
     ASTPtr parameters;
@@ -26,6 +26,8 @@ public:
 
     void updateTreeHashImpl(SipHash & hash_state) const override;
 
+    ASTSelectWithUnionQuery * tryGetQueryArgument() const;
+
 protected:
     void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
     void appendColumnNameImpl(WriteBuffer & ostr) const override;
diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index 985507071be..64e3a0363d1 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -260,8 +260,10 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
                 ++pos;
                 auto function_node = std::make_shared<ASTFunction>();
                 tryGetIdentifierNameInto(identifier, function_node->name);
-                function_node->query = query;
-                function_node->children.push_back(function_node->query);
+                auto expr_list_with_single_query = std::make_shared<ASTExpressionList>();
+                expr_list_with_single_query->children.push_back(query);
+                function_node->arguments = expr_list_with_single_query;
+                function_node->children.push_back(function_node->arguments);
                 node = function_node;
                 return true;
             }
diff --git a/src/TableFunctions/TableFunctionView.cpp b/src/TableFunctions/TableFunctionView.cpp
index 6166fa56f47..8d3f7b06fa3 100644
--- a/src/TableFunctions/TableFunctionView.cpp
+++ b/src/TableFunctions/TableFunctionView.cpp
@@ -20,18 +20,15 @@ StoragePtr TableFunctionView::executeImpl(const ASTPtr & ast_function, const Con
 {
     if (const auto * function = ast_function->as<ASTFunction>())
     {
-        if (function->query)
+        if (auto * select = function->tryGetQueryArgument())
         {
-            if (auto * select = function->query->as<ASTSelectWithUnionQuery>())
-            {
-                auto sample = InterpreterSelectWithUnionQuery::getSampleBlock(function->query, context);
-                auto columns = ColumnsDescription(sample.getNamesAndTypesList());
-                ASTCreateQuery create;
-                create.select = select;
-                auto res = StorageView::create(StorageID(getDatabaseName(), table_name), create, columns);
-                res->startup();
-                return res;
-            }
+            auto sample = InterpreterSelectWithUnionQuery::getSampleBlock(function->arguments->children[0] /* ASTPtr */, context);
+            auto columns = ColumnsDescription(sample.getNamesAndTypesList());
+            ASTCreateQuery create;
+            create.select = select;
+            auto res = StorageView::create(StorageID(getDatabaseName(), table_name), create, columns);
+            res->startup();
+            return res;
         }
     }
     throw Exception("Table function '" + getName() + "' requires a query argument.", ErrorCodes::BAD_ARGUMENTS);

From d8fce448a29eecff6e1dc77299f63c3e75f0fbbc Mon Sep 17 00:00:00 2001
From: hcz <h.cz@qq.com>
Date: Wed, 9 Sep 2020 14:20:14 +0800
Subject: [PATCH 163/298] Implement null_as_default for JSONStrings formats

---
 src/DataTypes/DataTypeNullable.cpp                   | 12 ++++++++++--
 src/DataTypes/DataTypeNullable.h                     |  2 ++
 .../Impl/JSONCompactEachRowRowInputFormat.cpp        |  7 ++++---
 .../Formats/Impl/JSONEachRowRowInputFormat.cpp       |  7 ++++---
 .../01016_input_null_as_default.reference            |  5 +++++
 .../0_stateless/01016_input_null_as_default.sh       |  8 ++++++++
 6 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/src/DataTypes/DataTypeNullable.cpp b/src/DataTypes/DataTypeNullable.cpp
index 3318196b951..6e452b2759b 100644
--- a/src/DataTypes/DataTypeNullable.cpp
+++ b/src/DataTypes/DataTypeNullable.cpp
@@ -318,13 +318,20 @@ ReturnType DataTypeNullable::deserializeTextQuoted(IColumn & column, ReadBuffer
 
 void DataTypeNullable::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
 {
-    safeDeserialize(column, *nested_data_type,
+    deserializeWholeText<void>(column, istr, settings, nested_data_type);
+}
+
+template <typename ReturnType>
+ReturnType DataTypeNullable::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings,
+                                                  const DataTypePtr & nested_data_type)
+{
+    return safeDeserialize<ReturnType>(column, *nested_data_type,
         [&istr]
         {
             return checkStringByFirstCharacterAndAssertTheRestCaseInsensitive("NULL", istr)
                 || checkStringByFirstCharacterAndAssertTheRest("ᴺᵁᴸᴸ", istr);
         },
-        [this, &istr, &settings] (IColumn & nested) { nested_data_type->deserializeAsWholeText(nested, istr, settings); });
+        [&nested_data_type, &istr, &settings] (IColumn & nested) { nested_data_type->deserializeAsWholeText(nested, istr, settings); });
 }
 
 
@@ -551,6 +558,7 @@ DataTypePtr removeNullable(const DataTypePtr & type)
 }
 
 
+template bool DataTypeNullable::deserializeWholeText<bool>(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const DataTypePtr & nested);
 template bool DataTypeNullable::deserializeTextEscaped<bool>(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const DataTypePtr & nested);
 template bool DataTypeNullable::deserializeTextQuoted<bool>(IColumn & column, ReadBuffer & istr, const FormatSettings &, const DataTypePtr & nested);
 template bool DataTypeNullable::deserializeTextCSV<bool>(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const DataTypePtr & nested);
diff --git a/src/DataTypes/DataTypeNullable.h b/src/DataTypes/DataTypeNullable.h
index 22d403da6c4..587eecdf32e 100644
--- a/src/DataTypes/DataTypeNullable.h
+++ b/src/DataTypes/DataTypeNullable.h
@@ -103,6 +103,8 @@ public:
     /// If ReturnType is bool, check for NULL and deserialize value into non-nullable column (and return true) or insert default value of nested type (and return false)
     /// If ReturnType is void, deserialize Nullable(T)
     template <typename ReturnType = bool>
+    static ReturnType deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const DataTypePtr & nested);
+    template <typename ReturnType = bool>
     static ReturnType deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const DataTypePtr & nested);
     template <typename ReturnType = bool>
     static ReturnType deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &, const DataTypePtr & nested);
diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
index eb697ce5318..49c8d29ca2f 100644
--- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
@@ -205,14 +205,15 @@ void JSONCompactEachRowRowInputFormat::readField(size_t index, MutableColumns &
 
         if (yield_strings)
         {
-            // notice: null_as_default on "null" strings is not supported
-
             String str;
             readJSONString(str, in);
 
             ReadBufferFromString buf(str);
 
-            type->deserializeAsWholeText(*columns[index], buf, format_settings);
+            if (format_settings.null_as_default && !type->isNullable())
+                read_columns[index] = DataTypeNullable::deserializeWholeText(*columns[index], buf, format_settings, type);
+            else
+                type->deserializeAsWholeText(*columns[index], buf, format_settings);
         }
         else
         {
diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
index 9ba82fbb009..ab775a3e7aa 100644
--- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
@@ -146,14 +146,15 @@ void JSONEachRowRowInputFormat::readField(size_t index, MutableColumns & columns
 
         if (yield_strings)
         {
-            // notice: null_as_default on "null" strings is not supported
-
             String str;
             readJSONString(str, in);
 
             ReadBufferFromString buf(str);
 
-            type->deserializeAsWholeText(*columns[index], buf, format_settings);
+            if (format_settings.null_as_default && !type->isNullable())
+                read_columns[index] = DataTypeNullable::deserializeWholeText(*columns[index], buf, format_settings, type);
+            else
+                type->deserializeAsWholeText(*columns[index], buf, format_settings);
         }
         else
         {
diff --git a/tests/queries/0_stateless/01016_input_null_as_default.reference b/tests/queries/0_stateless/01016_input_null_as_default.reference
index ba9657bf16e..d7010f42d4e 100644
--- a/tests/queries/0_stateless/01016_input_null_as_default.reference
+++ b/tests/queries/0_stateless/01016_input_null_as_default.reference
@@ -18,6 +18,11 @@ JSONEachRow
 1	world	3	2019-07-23	[1,2,3]	('tuple',3.14)
 2	Hello	123	2019-06-19	[]	('test',2.71828)
 3	Hello	42	2019-06-19	[1,2,3]	('default',0.75)
+JSONStringsEachRow
+0	1	42	2019-07-22	[10,20,30]	('default',0)
+1	world	3	2019-07-23	[1,2,3]	('tuple',3.14)
+2	Hello	123	2019-06-19	[]	('test',2.71828)
+3	Hello	42	2019-06-19	[1,2,3]	('default',0.75)
 Template (Quoted)
 0	1	42	2019-07-22	[10,20,30]	('default',0)
 1	world	3	2019-07-23	[1,2,3]	('tuple',3.14)
diff --git a/tests/queries/0_stateless/01016_input_null_as_default.sh b/tests/queries/0_stateless/01016_input_null_as_default.sh
index a40287eaba8..f31e6591e97 100755
--- a/tests/queries/0_stateless/01016_input_null_as_default.sh
+++ b/tests/queries/0_stateless/01016_input_null_as_default.sh
@@ -38,6 +38,14 @@ echo '{"i": null, "s": "1", "n": null, "d": "2019-07-22", "a": [10, 20, 30], "t"
 $CLICKHOUSE_CLIENT --query="SELECT * FROM null_as_default ORDER BY i";
 $CLICKHOUSE_CLIENT --query="TRUNCATE TABLE null_as_default";
 
+echo 'JSONStringsEachRow'
+echo '{"i": "null", "s": "1", "n": "ᴺᵁᴸᴸ", "d": "2019-07-22", "a": "[10, 20, 30]", "t": "NULL"}
+{"i": "1", "s": "world", "n": "3", "d": "2019-07-23", "a": "null", "t": "('\''tuple'\'', 3.14)"}
+{"i": "2", "s": "null", "n": "123", "d": "null", "a": "[]", "t": "('\''test'\'', 2.71828)"}
+{"i": "3", "s": "null", "n": "null", "d": "null", "a": "null", "t": "null"}' | $CLICKHOUSE_CLIENT --input_format_null_as_default=1 --query="INSERT INTO null_as_default FORMAT JSONStringsEachRow";
+$CLICKHOUSE_CLIENT --query="SELECT * FROM null_as_default ORDER BY i";
+$CLICKHOUSE_CLIENT --query="TRUNCATE TABLE null_as_default";
+
 echo 'Template (Quoted)'
 echo 'NULL, '\''1'\'', null, '\''2019-07-22'\'', [10, 20, 30], NuLl
 1, '\''world'\'', 3, '\''2019-07-23'\'', NULL, ('\''tuple'\'', 3.14)

From 485b1048985e55fd6fb9e20d883056b98ad2a9d2 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 9 Sep 2020 12:15:42 +0300
Subject: [PATCH 164/298] Use max ttl and add introspection to system parts
 about recompression

---
 src/Storages/MergeTree/MergeTreeData.cpp      |  3 ++-
 .../MergeTree/MergeTreeDataPartTTLInfo.cpp    | 21 ++++------------
 .../MergeTree/MergeTreeDataPartTTLInfo.h      |  6 ++---
 src/Storages/MergeTree/TTLMergeSelector.cpp   |  4 ++--
 src/Storages/System/StorageSystemParts.cpp    | 24 ++++++++++++-------
 .../test_recompression_ttl/test.py            |  2 ++
 .../01465_ttl_recompression.reference         |  3 +++
 .../0_stateless/01465_ttl_recompression.sql   |  2 ++
 8 files changed, 34 insertions(+), 31 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 9f00fee070e..03da0033f9d 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -3039,7 +3039,8 @@ CompressionCodecPtr MergeTreeData::getCompressionCodecForPart(size_t part_size_c
     auto metadata_snapshot = getInMemoryMetadataPtr();
 
     const auto & recompression_ttl_entries = metadata_snapshot->getRecompressionTTLs();
-    auto best_ttl_entry = selectTTLDescriptionForTTLInfos(recompression_ttl_entries, ttl_infos.recompression_ttl, current_time, false);
+    auto best_ttl_entry = selectTTLDescriptionForTTLInfos(recompression_ttl_entries, ttl_infos.recompression_ttl, current_time, true);
+
 
     if (best_ttl_entry)
         return CompressionCodecFactory::instance().get(best_ttl_entry->recompression_codec, {});
diff --git a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
index 33ed60c225a..7d3c00aa19c 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
@@ -162,30 +162,19 @@ void MergeTreeDataPartTTLInfos::write(WriteBuffer & out) const
     writeString("}", out);
 }
 
-time_t MergeTreeDataPartTTLInfos::getMinRecompressionTTL() const
+time_t MergeTreeDataPartTTLInfos::getMinimalMaxRecompressionTTL() const
 {
-    time_t min = std::numeric_limits<time_t>::max();
+    time_t max = std::numeric_limits<time_t>::max();
     for (const auto & [name, info] : recompression_ttl)
-    {
-        if (info.min != 0)
-            min = std::min(info.min, min);
-    }
+        if (info.max != 0)
+            max = std::min(info.max, max);
 
-    if (min == std::numeric_limits<time_t>::max())
+    if (max == std::numeric_limits<time_t>::max())
         return 0;
-    return min;
-}
-
-time_t MergeTreeDataPartTTLInfos::getMaxRecompressionTTL() const
-{
-    time_t max = 0;
-    for (const auto & [name, info] : recompression_ttl)
-        max = std::max(info.max, max);
 
     return max;
 }
 
-
 std::optional<TTLDescription> selectTTLDescriptionForTTLInfos(const TTLDescriptions & descriptions, const TTLInfoMap & ttl_info_map, time_t current_time, bool use_max)
 {
     time_t best_ttl_time = 0;
diff --git a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
index d9a10785738..f0837f98486 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
@@ -49,11 +49,9 @@ struct MergeTreeDataPartTTLInfos
 
     TTLInfoMap recompression_ttl;
 
-    /// Return min recompression TTL value if any, otherwise return zero.
-    time_t getMinRecompressionTTL() const;
+    /// Return smalles max recompression TTL value
+    time_t getMinimalMaxRecompressionTTL() const;
 
-    /// Return max recompression TTL value if any, otherwise return zero.
-    time_t getMaxRecompressionTTL() const;
 
     void read(ReadBuffer & in);
     void write(WriteBuffer & out) const;
diff --git a/src/Storages/MergeTree/TTLMergeSelector.cpp b/src/Storages/MergeTree/TTLMergeSelector.cpp
index d46eb19815a..1defc60d8bc 100644
--- a/src/Storages/MergeTree/TTLMergeSelector.cpp
+++ b/src/Storages/MergeTree/TTLMergeSelector.cpp
@@ -99,7 +99,7 @@ time_t TTLDeleteMergeSelector::getTTLForPart(const IMergeSelector::Part & part)
 
 time_t TTLRecompressMergeSelector::getTTLForPart(const IMergeSelector::Part & part) const
 {
-    return part.ttl_infos.getMinRecompressionTTL();
+    return part.ttl_infos.getMinimalMaxRecompressionTTL();
 }
 
 bool TTLRecompressMergeSelector::isTTLAlreadySatisfied(const IMergeSelector::Part & part) const
@@ -107,7 +107,7 @@ bool TTLRecompressMergeSelector::isTTLAlreadySatisfied(const IMergeSelector::Par
     if (recompression_ttls.empty())
         return false;
 
-    auto ttl_description = selectTTLDescriptionForTTLInfos(recompression_ttls, part.ttl_infos.recompression_ttl, current_time, false);
+    auto ttl_description = selectTTLDescriptionForTTLInfos(recompression_ttls, part.ttl_infos.recompression_ttl, current_time, true);
 
     if (!ttl_description)
         return true;
diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp
index b4fcaad9fef..06171fc39ba 100644
--- a/src/Storages/System/StorageSystemParts.cpp
+++ b/src/Storages/System/StorageSystemParts.cpp
@@ -63,6 +63,10 @@ StorageSystemParts::StorageSystemParts(const StorageID & table_id_)
         {"move_ttl_info.max",                           std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
 
         {"default_compression_codec",                   std::make_shared<DataTypeString>()},
+
+        {"recompression_ttl_info.expression",           std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
+        {"recompression_ttl_info.min",                  std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
+        {"recompression_ttl_info.max",                  std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
     }
     )
 {
@@ -154,26 +158,30 @@ void StorageSystemParts::processNextStorage(MutableColumns & columns_, const Sto
             columns_[i++]->insert(static_cast<UInt32>(part->ttl_infos.table_ttl.max));
         }
 
-        /// move_ttl_info
+        auto add_ttl_info_map = [&](const TTLInfoMap & ttl_info_map)
         {
             Array expression_array;
             Array min_array;
             Array max_array;
-            expression_array.reserve(part->ttl_infos.moves_ttl.size());
-            min_array.reserve(part->ttl_infos.moves_ttl.size());
-            max_array.reserve(part->ttl_infos.moves_ttl.size());
-            for (const auto & [expression, move_ttl_info] : part->ttl_infos.moves_ttl)
+            expression_array.reserve(ttl_info_map.size());
+            min_array.reserve(ttl_info_map.size());
+            max_array.reserve(ttl_info_map.size());
+            for (const auto & [expression, ttl_info] : ttl_info_map)
             {
                 expression_array.emplace_back(expression);
-                min_array.push_back(static_cast<UInt32>(move_ttl_info.min));
-                max_array.push_back(static_cast<UInt32>(move_ttl_info.max));
+                min_array.push_back(static_cast<UInt32>(ttl_info.min));
+                max_array.push_back(static_cast<UInt32>(ttl_info.max));
             }
             columns_[i++]->insert(expression_array);
             columns_[i++]->insert(min_array);
             columns_[i++]->insert(max_array);
-        }
+        };
+
+        add_ttl_info_map(part->ttl_infos.moves_ttl);
 
         columns_[i++]->insert(queryToString(part->default_codec->getCodecDesc()));
+
+        add_ttl_info_map(part->ttl_infos.recompression_ttl);
     }
 }
 
diff --git a/tests/integration/test_recompression_ttl/test.py b/tests/integration/test_recompression_ttl/test.py
index a581dd24e43..4707a5c41ad 100644
--- a/tests/integration/test_recompression_ttl/test.py
+++ b/tests/integration/test_recompression_ttl/test.py
@@ -106,6 +106,8 @@ def test_recompression_multiple_ttls(started_cluster):
 
     assert node2.query("SELECT default_compression_codec FROM system.parts where name = 'all_1_1_4'") == "ZSTD(12)\n"
 
+    assert node2.query("SELECT recompression_ttl_info.expression FROM system.parts where name = 'all_1_1_4'") == "['plus(d, toIntervalSecond(10))','plus(d, toIntervalSecond(15))','plus(d, toIntervalSecond(5))']\n"
+
 
 def test_recompression_replicated(started_cluster):
     for i, node in enumerate([node1, node2]):
diff --git a/tests/queries/0_stateless/01465_ttl_recompression.reference b/tests/queries/0_stateless/01465_ttl_recompression.reference
index 2f8815c62eb..524c44ef972 100644
--- a/tests/queries/0_stateless/01465_ttl_recompression.reference
+++ b/tests/queries/0_stateless/01465_ttl_recompression.reference
@@ -13,6 +13,9 @@ CREATE TABLE default.recompression_table\n(\n    `dt` DateTime,\n    `key` UInt6
 1_1_1_2_4	LZ4
 2_2_2_2_4	ZSTD(12)
 3_3_3_2_4	ZSTD(12)
+1_1_1_2_4	['plus(dt, toIntervalDay(1))']
+2_2_2_2_4	['plus(dt, toIntervalDay(1))']
+3_3_3_2_4	['plus(dt, toIntervalDay(1))']
 1_1_1_0	LZ4
 2_2_2_0	LZ4
 3_3_3_0	LZ4
diff --git a/tests/queries/0_stateless/01465_ttl_recompression.sql b/tests/queries/0_stateless/01465_ttl_recompression.sql
index 92f20ddd495..78550582307 100644
--- a/tests/queries/0_stateless/01465_ttl_recompression.sql
+++ b/tests/queries/0_stateless/01465_ttl_recompression.sql
@@ -42,6 +42,8 @@ OPTIMIZE TABLE recompression_table FINAL;
 
 SELECT name, default_compression_codec FROM system.parts WHERE table = 'recompression_table' and active = 1 and database = currentDatabase() ORDER BY name;
 
+SELECT name, recompression_ttl_info.expression FROM system.parts WHERE table = 'recompression_table' and active = 1 and database = currentDatabase() ORDER BY name;
+
 DROP TABLE IF EXISTS recompression_table;
 
 CREATE TABLE recompression_table_compact

From 4ba8f8960bd4e86a57dafba6a0aa1574b66d97db Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 9 Sep 2020 12:53:24 +0300
Subject: [PATCH 165/298] Increase frame-larger-than

---
 cmake/warnings.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/warnings.cmake b/cmake/warnings.cmake
index 2f78dc34079..aec3e46ffa6 100644
--- a/cmake/warnings.cmake
+++ b/cmake/warnings.cmake
@@ -23,7 +23,7 @@ option (WEVERYTHING "Enables -Weverything option with some exceptions. This is i
 # Control maximum size of stack frames. It can be important if the code is run in fibers with small stack size.
 # Only in release build because debug has too large stack frames.
 if ((NOT CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") AND (NOT SANITIZE))
-    add_warning(frame-larger-than=16384)
+    add_warning(frame-larger-than=32768)
 endif ()
 
 if (COMPILER_CLANG)

From 2ea59cb0c2fc9ea25cb6029f910952903e1d0bbd Mon Sep 17 00:00:00 2001
From: hcz <h.cz@qq.com>
Date: Wed, 9 Sep 2020 17:54:41 +0800
Subject: [PATCH 166/298] Fix tests

---
 .../0_stateless/01446_json_strings_each_row.reference     | 4 ++--
 .../01448_json_compact_strings_each_row.reference         | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/queries/0_stateless/01446_json_strings_each_row.reference b/tests/queries/0_stateless/01446_json_strings_each_row.reference
index 84d41095b77..812026534ea 100644
--- a/tests/queries/0_stateless/01446_json_strings_each_row.reference
+++ b/tests/queries/0_stateless/01446_json_strings_each_row.reference
@@ -16,7 +16,7 @@
 {"v1":"first","v2":"1","v3":"2","v4":"0"}
 {"v1":"second","v2":"2","v3":"0","v4":"6"}
 6
-{"v1":"first","v2":"1","v3":"2","v4":"0"}
-{"v1":"second","v2":"2","v3":"0","v4":"6"}
+{"v1":"first","v2":"1","v3":"2","v4":"8"}
+{"v1":"second","v2":"2","v3":"32","v4":"6"}
 7
 {"v1":"16","n.id":"[15,16,17]","n.name":"['first','second','third']"}
diff --git a/tests/queries/0_stateless/01448_json_compact_strings_each_row.reference b/tests/queries/0_stateless/01448_json_compact_strings_each_row.reference
index 0b05f050b29..fb1a066f272 100644
--- a/tests/queries/0_stateless/01448_json_compact_strings_each_row.reference
+++ b/tests/queries/0_stateless/01448_json_compact_strings_each_row.reference
@@ -24,16 +24,16 @@
 ["first", "1", "2", "0"]
 ["second", "2", "0", "6"]
 6
-["first", "1", "2", "0"]
-["second", "2", "0", "6"]
+["first", "1", "2", "8"]
+["second", "2", "32", "6"]
 7
 ["16", "[15,16,17]", "['first','second','third']"]
 8
 ["first", "1", "2", "0"]
 ["second", "2", "0", "6"]
 9
-["first", "1", "2", "0"]
-["second", "2", "0", "6"]
+["first", "1", "2", "8"]
+["second", "2", "32", "6"]
 10
 ["first", "1", "16", "8"]
 ["second", "2", "32", "8"]

From 3d1d64ec60ada95973a0ff4d29a414a998de9c0a Mon Sep 17 00:00:00 2001
From: Winter Zhang <coswde@gmail.com>
Date: Wed, 9 Sep 2020 17:58:59 +0800
Subject: [PATCH 167/298] Fix currentDatabase function cannot be used in ON
 CLUSTER ddl query. (#14211)

---
 src/Interpreters/AddDefaultDatabaseVisitor.h  | 23 +++++++++++--
 src/Interpreters/DDLWorker.cpp                | 11 ++++---
 src/Storages/SelectQueryDescription.cpp       |  2 +-
 .../__init__.py                               |  0
 .../configs/config.d/clusters.xml             | 28 ++++++++++++++++
 .../configs/config.d/distributed_ddl.xml      |  5 +++
 .../test_default_database_on_cluster/test.py  | 32 +++++++++++++++++++
 7 files changed, 93 insertions(+), 8 deletions(-)
 create mode 100644 tests/integration/test_default_database_on_cluster/__init__.py
 create mode 100644 tests/integration/test_default_database_on_cluster/configs/config.d/clusters.xml
 create mode 100644 tests/integration/test_default_database_on_cluster/configs/config.d/distributed_ddl.xml
 create mode 100644 tests/integration/test_default_database_on_cluster/test.py

diff --git a/src/Interpreters/AddDefaultDatabaseVisitor.h b/src/Interpreters/AddDefaultDatabaseVisitor.h
index 8ca22cb94a9..9322232c154 100644
--- a/src/Interpreters/AddDefaultDatabaseVisitor.h
+++ b/src/Interpreters/AddDefaultDatabaseVisitor.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <Common/typeid_cast.h>
+#include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTQueryWithTableAndOutput.h>
 #include <Parsers/ASTRenameQuery.h>
 #include <Parsers/ASTIdentifier.h>
@@ -23,8 +24,9 @@ namespace DB
 class AddDefaultDatabaseVisitor
 {
 public:
-    AddDefaultDatabaseVisitor(const String & database_name_, std::ostream * ostr_ = nullptr)
+    AddDefaultDatabaseVisitor(const String & database_name_, bool only_replace_current_database_function_ = false, std::ostream * ostr_ = nullptr)
     :   database_name(database_name_),
+        only_replace_current_database_function(only_replace_current_database_function_),
         visit_depth(0),
         ostr(ostr_)
     {}
@@ -34,7 +36,8 @@ public:
         visitDDLChildren(ast);
 
         if (!tryVisitDynamicCast<ASTQueryWithTableAndOutput>(ast) &&
-            !tryVisitDynamicCast<ASTRenameQuery>(ast))
+            !tryVisitDynamicCast<ASTRenameQuery>(ast) &&
+            !tryVisitDynamicCast<ASTFunction>(ast))
         {}
     }
 
@@ -60,6 +63,7 @@ public:
 
 private:
     const String database_name;
+    bool only_replace_current_database_function = false;
     mutable size_t visit_depth;
     std::ostream * ostr;
 
@@ -164,12 +168,18 @@ private:
 
     void visitDDL(ASTQueryWithTableAndOutput & node, ASTPtr &) const
     {
+        if (only_replace_current_database_function)
+            return;
+
         if (node.database.empty())
             node.database = database_name;
     }
 
     void visitDDL(ASTRenameQuery & node, ASTPtr &) const
     {
+        if (only_replace_current_database_function)
+            return;
+
         for (ASTRenameQuery::Element & elem : node.elements)
         {
             if (elem.from.database.empty())
@@ -179,6 +189,15 @@ private:
         }
     }
 
+    void visitDDL(ASTFunction & function, ASTPtr & node) const
+    {
+        if (function.name == "currentDatabase")
+        {
+            node = std::make_shared<ASTLiteral>(database_name);
+            return;
+        }
+    }
+
     void visitDDLChildren(ASTPtr & ast) const
     {
         for (auto & child : ast->children)
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index c826d83a081..b9b52e2f3fe 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -1434,9 +1434,11 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & cont
                [](const AccessRightsElement & elem) { return elem.isEmptyDatabase(); })
            != query_requires_access.end());
 
+    bool use_local_default_database = false;
+    const String & current_database = context.getCurrentDatabase();
+
     if (need_replace_current_database)
     {
-        bool use_local_default_database = false;
         Strings shard_default_databases;
         for (const auto & shard : shards)
         {
@@ -1457,10 +1459,6 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & cont
 
         if (use_local_default_database)
         {
-            const String & current_database = context.getCurrentDatabase();
-            AddDefaultDatabaseVisitor visitor(current_database);
-            visitor.visitDDL(query_ptr);
-
             query_requires_access.replaceEmptyDatabase(current_database);
         }
         else
@@ -1481,6 +1479,9 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & cont
         }
     }
 
+    AddDefaultDatabaseVisitor visitor(current_database, !use_local_default_database);
+    visitor.visitDDL(query_ptr);
+
     /// Check access rights, assume that all servers have the same users config
     if (query_requires_grant_option)
         context.getAccess()->checkGrantOption(query_requires_access);
diff --git a/src/Storages/SelectQueryDescription.cpp b/src/Storages/SelectQueryDescription.cpp
index 87ba4ce74a9..bb8295df6f3 100644
--- a/src/Storages/SelectQueryDescription.cpp
+++ b/src/Storages/SelectQueryDescription.cpp
@@ -48,7 +48,7 @@ StorageID extractDependentTableFromSelectQuery(ASTSelectQuery & query, const Con
 {
     if (add_default_db)
     {
-        AddDefaultDatabaseVisitor visitor(context.getCurrentDatabase(), nullptr);
+        AddDefaultDatabaseVisitor visitor(context.getCurrentDatabase(), false, nullptr);
         visitor.visit(query);
     }
 
diff --git a/tests/integration/test_default_database_on_cluster/__init__.py b/tests/integration/test_default_database_on_cluster/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_default_database_on_cluster/configs/config.d/clusters.xml b/tests/integration/test_default_database_on_cluster/configs/config.d/clusters.xml
new file mode 100644
index 00000000000..bead63a1641
--- /dev/null
+++ b/tests/integration/test_default_database_on_cluster/configs/config.d/clusters.xml
@@ -0,0 +1,28 @@
+<yandex>
+<remote_servers>
+    <cluster>
+        <shard>
+            <internal_replication>true</internal_replication>
+            <replica>
+                <host>ch1</host>
+                <port>9000</port>
+            </replica>
+            <replica>
+                <host>ch2</host>
+                <port>9000</port>
+            </replica>
+        </shard>
+        <shard>
+            <internal_replication>true</internal_replication>
+            <replica>
+                <host>ch3</host>
+                <port>9000</port>
+            </replica>
+            <replica>
+                <host>ch4</host>
+                <port>9000</port>
+            </replica>
+        </shard>
+    </cluster>
+</remote_servers>
+</yandex>
diff --git a/tests/integration/test_default_database_on_cluster/configs/config.d/distributed_ddl.xml b/tests/integration/test_default_database_on_cluster/configs/config.d/distributed_ddl.xml
new file mode 100644
index 00000000000..6a88929c8ac
--- /dev/null
+++ b/tests/integration/test_default_database_on_cluster/configs/config.d/distributed_ddl.xml
@@ -0,0 +1,5 @@
+<yandex>
+<distributed_ddl>
+    <path>/clickhouse/task_queue/ddl</path>
+</distributed_ddl>
+</yandex>
diff --git a/tests/integration/test_default_database_on_cluster/test.py b/tests/integration/test_default_database_on_cluster/test.py
new file mode 100644
index 00000000000..a4b0090174d
--- /dev/null
+++ b/tests/integration/test_default_database_on_cluster/test.py
@@ -0,0 +1,32 @@
+import time
+import pytest
+from helpers.cluster import ClickHouseCluster
+
+cluster = ClickHouseCluster(__file__)
+ch1 = cluster.add_instance('ch1', config_dir="configs", with_zookeeper=True)
+ch2 = cluster.add_instance('ch2', config_dir="configs", with_zookeeper=True)
+ch3 = cluster.add_instance('ch3', config_dir="configs", with_zookeeper=True)
+ch4 = cluster.add_instance('ch4', config_dir="configs", with_zookeeper=True)
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+        ch1.query("CREATE DATABASE test_default_database ON CLUSTER 'cluster';")
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+
+def test_default_database_on_cluster(started_cluster):
+    ch1.query(database='test_default_database', sql="CREATE TABLE test_local_table ON CLUSTER 'cluster' (column UInt8) ENGINE = Memory;")
+
+    for node in [ch1, ch2, ch3, ch4]:
+        assert node.query("SHOW TABLES FROM test_default_database FORMAT TSV") == "test_local_table\n"
+
+    ch1.query(database='test_default_database', sql="CREATE TABLE test_distributed_table ON CLUSTER 'cluster' (column UInt8) ENGINE = Distributed(cluster, currentDatabase(), 'test_local_table');")
+
+    for node in [ch1, ch2, ch3, ch4]:
+        assert node.query("SHOW TABLES FROM test_default_database FORMAT TSV") == "test_distributed_table\ntest_local_table\n"
+        assert node.query("SHOW CREATE TABLE test_default_database.test_distributed_table FORMAT TSV") == "CREATE TABLE test_default_database.test_distributed_table\\n(\\n    `column` UInt8\\n)\\nENGINE = Distributed(\\'cluster\\', \\'test_default_database\\', \\'test_local_table\\')\n"

From f528cd9f97b4f7c54a6c22406f09983d055ce642 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 9 Sep 2020 13:01:12 +0300
Subject: [PATCH 168/298] Forward compiler version to unbundled build

---
 docker/packager/packager | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/packager/packager b/docker/packager/packager
index 5874bedd17a..909f20acd6d 100755
--- a/docker/packager/packager
+++ b/docker/packager/packager
@@ -93,7 +93,7 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
 
     cxx = cc.replace('gcc', 'g++').replace('clang', 'clang++')
 
-    if image_type == "deb":
+    if image_type == "deb" or image_type == "unbundled":
         result.append("DEB_CC={}".format(cc))
         result.append("DEB_CXX={}".format(cxx))
     elif image_type == "binary":

From 36972d34a7e2a30158917fa263704eded6bf473c Mon Sep 17 00:00:00 2001
From: Artem Zuikov <chertus@gmail.com>
Date: Wed, 9 Sep 2020 13:03:13 +0300
Subject: [PATCH 169/298] Fix logical error in GLOBAL JOIN with table function
 (#14545)

---
 src/Common/ErrorCodes.cpp                         |  2 +-
 src/Interpreters/GlobalSubqueriesVisitor.h        |  5 ++---
 .../0_stateless/01474_bad_global_join.reference   |  1 +
 .../queries/0_stateless/01474_bad_global_join.sql | 15 +++++++++++++++
 tests/queries/0_stateless/arcadia_skip_list.txt   |  1 +
 5 files changed, 20 insertions(+), 4 deletions(-)
 create mode 100644 tests/queries/0_stateless/01474_bad_global_join.reference
 create mode 100644 tests/queries/0_stateless/01474_bad_global_join.sql

diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index 297192e650b..3bd3761f568 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -281,7 +281,7 @@ namespace ErrorCodes
     extern const int DICTIONARY_IS_EMPTY = 281;
     extern const int INCORRECT_INDEX = 282;
     extern const int UNKNOWN_DISTRIBUTED_PRODUCT_MODE = 283;
-    extern const int UNKNOWN_GLOBAL_SUBQUERIES_METHOD = 284;
+    extern const int WRONG_GLOBAL_SUBQUERY = 284;
     extern const int TOO_FEW_LIVE_REPLICAS = 285;
     extern const int UNSATISFIED_QUORUM_FOR_PREVIOUS_WRITE = 286;
     extern const int UNKNOWN_FORMAT_VERSION = 287;
diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h
index 3b91a49178f..e155a132241 100644
--- a/src/Interpreters/GlobalSubqueriesVisitor.h
+++ b/src/Interpreters/GlobalSubqueriesVisitor.h
@@ -22,7 +22,7 @@ namespace DB
 {
 namespace ErrorCodes
 {
-    extern const int LOGICAL_ERROR;
+    extern const int WRONG_GLOBAL_SUBQUERY;
 }
 
 
@@ -73,8 +73,7 @@ public:
                 is_table = true;
 
             if (!subquery_or_table_name)
-                throw Exception("Logical error: unknown AST element passed to ExpressionAnalyzer::addExternalStorage method",
-                                ErrorCodes::LOGICAL_ERROR);
+                throw Exception("Global subquery requires subquery or table name", ErrorCodes::WRONG_GLOBAL_SUBQUERY);
 
             if (is_table)
             {
diff --git a/tests/queries/0_stateless/01474_bad_global_join.reference b/tests/queries/0_stateless/01474_bad_global_join.reference
new file mode 100644
index 00000000000..29d6383b52c
--- /dev/null
+++ b/tests/queries/0_stateless/01474_bad_global_join.reference
@@ -0,0 +1 @@
+100
diff --git a/tests/queries/0_stateless/01474_bad_global_join.sql b/tests/queries/0_stateless/01474_bad_global_join.sql
new file mode 100644
index 00000000000..cd98c0f60ec
--- /dev/null
+++ b/tests/queries/0_stateless/01474_bad_global_join.sql
@@ -0,0 +1,15 @@
+DROP TABLE IF EXISTS local_table;
+DROP TABLE IF EXISTS dist_table;
+
+CREATE TABLE local_table (id UInt64, val String) ENGINE = Memory;
+
+INSERT INTO local_table SELECT number AS id, toString(number) AS val FROM numbers(100);
+
+CREATE TABLE dist_table AS local_table
+ENGINE = Distributed('test_cluster_two_shards_localhost', currentDatabase(), local_table);
+
+SELECT uniq(d.val) FROM dist_table AS d GLOBAL LEFT JOIN numbers(100) AS t USING id; -- { serverError 284 }
+SELECT uniq(d.val) FROM dist_table AS d GLOBAL LEFT JOIN local_table AS t USING id;
+
+DROP TABLE local_table;
+DROP TABLE dist_table;
diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt
index 707f91b0c93..698b38460e4 100644
--- a/tests/queries/0_stateless/arcadia_skip_list.txt
+++ b/tests/queries/0_stateless/arcadia_skip_list.txt
@@ -139,3 +139,4 @@
 01455_time_zones
 01456_ast_optimizations_over_distributed
 01460_DistributedFilesToInsert
+01474_bad_global_join

From ca6b634eb0466361da6f3526a6611ab0ccd8bfc1 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 9 Sep 2020 13:51:01 +0300
Subject: [PATCH 170/298] Install gcc-10 from proposed repo

---
 docker/packager/binary/Dockerfile | 13 +++++++++++--
 docker/packager/deb/Dockerfile    | 12 ++++++++++--
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile
index 45c35c2e0f3..b911b59a41d 100644
--- a/docker/packager/binary/Dockerfile
+++ b/docker/packager/binary/Dockerfile
@@ -32,8 +32,6 @@ RUN apt-get update \
         curl \
         gcc-9 \
         g++-9 \
-        gcc-10 \
-        g++-10 \
         llvm-${LLVM_VERSION} \
         clang-${LLVM_VERSION} \
         lld-${LLVM_VERSION} \
@@ -93,5 +91,16 @@ RUN wget -nv "https://developer.arm.com/-/media/Files/downloads/gnu-a/8.3-2019.0
 # Download toolchain for FreeBSD 11.3
 RUN wget -nv https://clickhouse-datasets.s3.yandex.net/toolchains/toolchains/freebsd-11.3-toolchain.tar.xz
 
+# NOTE: For some reason we have outdated version of gcc-10 in ubuntu 20.04 stable.
+# Current workaround is to use latest version proposed repo. Remove as soon as
+# gcc-10.2 appear in stable repo.
+RUN echo 'deb http://archive.ubuntu.com/ubuntu/ focal-proposed restricted main multiverse universe' > /etc/apt/sources.list.d/proposed-repositories.list
+
+RUN apt-get update \
+    && apt-get install gcc-10 g++10 --yes
+
+RUN rm /etc/apt/sources.list.d/proposed-repositories.list
+
+
 COPY build.sh /
 CMD ["/bin/bash", "/build.sh"]
diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile
index 87f4582f8e2..30334504c55 100644
--- a/docker/packager/deb/Dockerfile
+++ b/docker/packager/deb/Dockerfile
@@ -42,8 +42,6 @@ RUN  export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
 # Libraries from OS are only needed to test the "unbundled" build (this is not used in production).
 RUN apt-get update \
     && apt-get install \
-        gcc-10 \
-        g++-10 \
         gcc-9 \
         g++-9 \
         clang-11 \
@@ -75,6 +73,16 @@ RUN apt-get update \
         pigz \
         --yes --no-install-recommends
 
+# NOTE: For some reason we have outdated version of gcc-10 in ubuntu 20.04 stable.
+# Current workaround is to use latest version proposed repo. Remove as soon as
+# gcc-10.2 appear in stable repo.
+RUN echo 'deb http://archive.ubuntu.com/ubuntu/ focal-proposed restricted main multiverse universe' > /etc/apt/sources.list.d/proposed-repositories.list
+
+RUN apt-get update \
+    && apt-get install gcc-10 g++10 --yes --no-install-recommends
+
+RUN rm /etc/apt/sources.list.d/proposed-repositories.list
+
 # This symlink required by gcc to find lld compiler
 RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
 

From 150d8d4e79b0fff23cde361ed460e71c8729a4c5 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 9 Sep 2020 14:11:59 +0300
Subject: [PATCH 171/298] Better recursive copy in integration tests

---
 tests/integration/helpers/cluster.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index 6b8cdcf7989..44a22d3fe2e 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -1,6 +1,5 @@
 import base64
 import cassandra.cluster
-import distutils.dir_util
 import docker
 import errno
 import httplib
@@ -19,6 +18,7 @@ import socket
 import subprocess
 import time
 import urllib
+import traceback
 import xml.dom.minidom
 from dicttoxml import dicttoxml
 from kazoo.client import KazooClient
@@ -667,6 +667,7 @@ class ClickHouseCluster:
         except BaseException, e:
             print "Failed to start cluster: "
             print str(e)
+            print traceback.print_exc()
             raise
 
     def shutdown(self, kill=True):
@@ -1164,10 +1165,10 @@ class ClickHouseInstance:
 
         db_dir = p.abspath(p.join(self.path, 'database'))
         print "Setup database dir {}".format(db_dir)
-        os.mkdir(db_dir)
         if self.clickhouse_path_dir is not None:
             print "Database files taken from {}".format(self.clickhouse_path_dir)
-            distutils.dir_util.copy_tree(self.clickhouse_path_dir, db_dir)
+            shutil.copytree(self.clickhouse_path_dir, db_dir)
+            print "Database copied from {} to {}".format(self.clickhouse_path_dir, db_dir)
 
         logs_dir = p.abspath(p.join(self.path, 'logs'))
         print "Setup logs dir {}".format(logs_dir)
@@ -1228,7 +1229,6 @@ class ClickHouseInstance:
             binary_volume = "- " + self.server_bin_path + ":/usr/share/clickhouse_fresh"
             odbc_bridge_volume = "- " + self.odbc_bridge_bin_path + ":/usr/share/clickhouse-odbc-bridge_fresh"
 
-
         with open(self.docker_compose_path, 'w') as docker_compose:
             docker_compose.write(DOCKER_COMPOSE_TEMPLATE.format(
                 image=self.image,
@@ -1251,8 +1251,8 @@ class ClickHouseInstance:
                 app_net=app_net,
                 ipv4_address=ipv4_address,
                 ipv6_address=ipv6_address,
-                net_aliases = net_aliases,
-                net_alias1 = net_alias1,
+                net_aliases=net_aliases,
+                net_alias1=net_alias1,
             ))
 
     def destroy_dir(self):

From f5bef34be6258c528cc5643990cc778a93b44768 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 9 Sep 2020 14:28:59 +0300
Subject: [PATCH 172/298] Fix configs for test_default_database_on_cluster

---
 .../integration/test_default_database_on_cluster/test.py  | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/integration/test_default_database_on_cluster/test.py b/tests/integration/test_default_database_on_cluster/test.py
index a4b0090174d..cfe11c34660 100644
--- a/tests/integration/test_default_database_on_cluster/test.py
+++ b/tests/integration/test_default_database_on_cluster/test.py
@@ -3,10 +3,10 @@ import pytest
 from helpers.cluster import ClickHouseCluster
 
 cluster = ClickHouseCluster(__file__)
-ch1 = cluster.add_instance('ch1', config_dir="configs", with_zookeeper=True)
-ch2 = cluster.add_instance('ch2', config_dir="configs", with_zookeeper=True)
-ch3 = cluster.add_instance('ch3', config_dir="configs", with_zookeeper=True)
-ch4 = cluster.add_instance('ch4', config_dir="configs", with_zookeeper=True)
+ch1 = cluster.add_instance('ch1', main_configs=["configs/config.d/clusters.xml", "configs/config.d/distributed_ddl.xml"], with_zookeeper=True)
+ch2 = cluster.add_instance('ch2', main_configs=["configs/config.d/clusters.xml", "configs/config.d/distributed_ddl.xml"], with_zookeeper=True)
+ch3 = cluster.add_instance('ch3', main_configs=["configs/config.d/clusters.xml", "configs/config.d/distributed_ddl.xml"], with_zookeeper=True)
+ch4 = cluster.add_instance('ch4', main_configs=["configs/config.d/clusters.xml", "configs/config.d/distributed_ddl.xml"], with_zookeeper=True)
 
 @pytest.fixture(scope="module")
 def started_cluster():

From 98f19a5d50b7c1a1017628f63813e6bbabb6a2e5 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 9 Sep 2020 14:29:53 +0300
Subject: [PATCH 173/298] Better permissions

---
 .../clickhouse_path/format_schemas/rabbitmq.proto                 | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 tests/integration/test_storage_rabbitmq/clickhouse_path/format_schemas/rabbitmq.proto

diff --git a/tests/integration/test_storage_rabbitmq/clickhouse_path/format_schemas/rabbitmq.proto b/tests/integration/test_storage_rabbitmq/clickhouse_path/format_schemas/rabbitmq.proto
old mode 100644
new mode 100755

From 97616f2982b6c334ac9894fc302686982e1f0213 Mon Sep 17 00:00:00 2001
From: Simon Podlipsky <simon@podlipsky.net>
Date: Wed, 9 Sep 2020 13:33:34 +0200
Subject: [PATCH 174/298] Mention db requirement in dictionary functions

---
 docs/en/sql-reference/functions/ext-dict-functions.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docs/en/sql-reference/functions/ext-dict-functions.md b/docs/en/sql-reference/functions/ext-dict-functions.md
index 49b1c2dda2c..e0ecdd74fad 100644
--- a/docs/en/sql-reference/functions/ext-dict-functions.md
+++ b/docs/en/sql-reference/functions/ext-dict-functions.md
@@ -3,6 +3,9 @@ toc_priority: 58
 toc_title: External Dictionaries
 ---
 
+!!! attention "Attention"
+    `dict_name` parameter must be fully qualified for dictionaries created with DDL queries. Eg. `<database>.<dict_name>`.
+
 # Functions for Working with External Dictionaries {#ext_dict_functions}
 
 For information on connecting and configuring external dictionaries, see [External dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md).

From c535d752438c9616dab8fac79bf8594acb44665a Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 9 Sep 2020 14:47:34 +0300
Subject: [PATCH 175/298] Add update

---
 docker/packager/binary/Dockerfile | 2 +-
 docker/packager/deb/Dockerfile    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile
index b911b59a41d..893e9191b1e 100644
--- a/docker/packager/binary/Dockerfile
+++ b/docker/packager/binary/Dockerfile
@@ -99,7 +99,7 @@ RUN echo 'deb http://archive.ubuntu.com/ubuntu/ focal-proposed restricted main m
 RUN apt-get update \
     && apt-get install gcc-10 g++10 --yes
 
-RUN rm /etc/apt/sources.list.d/proposed-repositories.list
+RUN rm /etc/apt/sources.list.d/proposed-repositories.list && apt-get update
 
 
 COPY build.sh /
diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile
index 30334504c55..4b7c2ae53a4 100644
--- a/docker/packager/deb/Dockerfile
+++ b/docker/packager/deb/Dockerfile
@@ -81,7 +81,7 @@ RUN echo 'deb http://archive.ubuntu.com/ubuntu/ focal-proposed restricted main m
 RUN apt-get update \
     && apt-get install gcc-10 g++10 --yes --no-install-recommends
 
-RUN rm /etc/apt/sources.list.d/proposed-repositories.list
+RUN rm /etc/apt/sources.list.d/proposed-repositories.list && apt-get update
 
 # This symlink required by gcc to find lld compiler
 RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld

From 3973a17530507941bbf85ce6c6a4676f206c43fb Mon Sep 17 00:00:00 2001
From: Vasily Nemkov <V.Nemkov@gmail.com>
Date: Wed, 9 Sep 2020 15:18:02 +0300
Subject: [PATCH 176/298] MySql datatypes dateTime64 and decimal (#11512)

---
 base/mysqlxx/ResultBase.h                     |   7 +-
 base/mysqlxx/Value.h                          |  18 +-
 programs/odbc-bridge/ODBCBlockInputStream.cpp |   3 +
 .../odbc-bridge/ODBCBlockOutputStream.cpp     |   8 +
 src/Common/ErrorCodes.cpp                     |   1 +
 src/Core/ExternalResultDescription.cpp        |  10 ++
 src/Core/ExternalResultDescription.h          |   4 +
 src/Core/MultiEnum.h                          |  99 +++++++++++
 src/Core/Settings.h                           |   1 +
 src/Core/SettingsEnums.cpp                    |   5 +
 src/Core/SettingsEnums.h                      |  11 ++
 src/Core/SettingsFields.h                     | 109 ++++++++++++
 src/Core/tests/gtest_multienum.cpp            | 158 ++++++++++++++++++
 src/Core/tests/gtest_settings.cpp             | 146 ++++++++++++++++
 src/DataStreams/MongoDBBlockInputStream.cpp   |   3 +
 src/DataTypes/convertMySQLDataType.cpp        | 125 +++++++++-----
 src/DataTypes/convertMySQLDataType.h          |   5 +-
 .../gtest_DataType_deserializeAsText.cpp      | 101 +++++++++++
 .../MySQL/DatabaseConnectionMySQL.cpp         | 101 +++--------
 src/Databases/MySQL/DatabaseConnectionMySQL.h |  29 +++-
 .../MySQL/FetchTablesColumnsList.cpp          | 114 +++++++++++++
 src/Databases/MySQL/FetchTablesColumnsList.h  |  28 ++++
 src/Databases/ya.make                         |   1 +
 .../CassandraBlockInputStream.cpp             |   5 +
 src/Dictionaries/RedisBlockInputStream.cpp    |   3 +
 src/Formats/MySQLBlockInputStream.cpp         |  23 ++-
 src/TableFunctions/TableFunctionMySQL.cpp     |  49 +-----
 .../test_mysql_database_engine/test.py        | 149 ++++++++++++++++-
 28 files changed, 1137 insertions(+), 179 deletions(-)
 create mode 100644 src/Core/MultiEnum.h
 create mode 100644 src/Core/tests/gtest_multienum.cpp
 create mode 100644 src/Core/tests/gtest_settings.cpp
 create mode 100644 src/DataTypes/tests/gtest_DataType_deserializeAsText.cpp
 create mode 100644 src/Databases/MySQL/FetchTablesColumnsList.cpp
 create mode 100644 src/Databases/MySQL/FetchTablesColumnsList.h

diff --git a/base/mysqlxx/ResultBase.h b/base/mysqlxx/ResultBase.h
index 126a5c1ecca..b72b5682122 100644
--- a/base/mysqlxx/ResultBase.h
+++ b/base/mysqlxx/ResultBase.h
@@ -1,9 +1,7 @@
 #pragma once
 
-#include <boost/noncopyable.hpp>
 #include <mysqlxx/Types.h>
 
-
 namespace mysqlxx
 {
 
@@ -22,6 +20,11 @@ class ResultBase
 public:
     ResultBase(MYSQL_RES * res_, Connection * conn_, const Query * query_);
 
+    ResultBase(const ResultBase &) = delete;
+    ResultBase & operator=(const ResultBase &) = delete;
+    ResultBase(ResultBase &&) = default;
+    ResultBase & operator=(ResultBase &&) = default;
+
     Connection * getConnection() { return conn; }
     MYSQL_FIELDS getFields() { return fields; }
     unsigned getNumFields() { return num_fields; }
diff --git a/base/mysqlxx/Value.h b/base/mysqlxx/Value.h
index 9fdb33a442d..dfa86e8aa7d 100644
--- a/base/mysqlxx/Value.h
+++ b/base/mysqlxx/Value.h
@@ -254,7 +254,23 @@ template <> inline std::string          Value::get<std::string          >() cons
 template <> inline LocalDate            Value::get<LocalDate            >() const { return getDate(); }
 template <> inline LocalDateTime        Value::get<LocalDateTime        >() const { return getDateTime(); }
 
-template <typename T> inline T          Value::get()                        const { return T(*this); }
+
+namespace details
+{
+// To avoid stack overflow when converting to type with no appropriate c-tor,
+// resulting in endless recursive calls from `Value::get<T>()` to `Value::operator T()` to `Value::get<T>()` to ...
+template <typename T, typename std::enable_if_t<std::is_constructible_v<T, Value>>>
+inline T contructFromValue(const Value & val)
+{
+    return T(val);
+}
+}
+
+template <typename T>
+inline T Value::get() const
+{
+    return details::contructFromValue<T>(*this);
+}
 
 
 inline std::ostream & operator<< (std::ostream & ostr, const Value & x)
diff --git a/programs/odbc-bridge/ODBCBlockInputStream.cpp b/programs/odbc-bridge/ODBCBlockInputStream.cpp
index 1316ff8f4c6..00ca89bd887 100644
--- a/programs/odbc-bridge/ODBCBlockInputStream.cpp
+++ b/programs/odbc-bridge/ODBCBlockInputStream.cpp
@@ -15,6 +15,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
+    extern const int UNKNOWN_TYPE;
 }
 
 
@@ -86,6 +87,8 @@ namespace
             case ValueType::vtUUID:
                 assert_cast<ColumnUInt128 &>(column).insert(parse<UUID>(value.convert<std::string>()));
                 break;
+            default:
+                throw Exception("Unsupported value type", ErrorCodes::UNKNOWN_TYPE);
         }
     }
 
diff --git a/programs/odbc-bridge/ODBCBlockOutputStream.cpp b/programs/odbc-bridge/ODBCBlockOutputStream.cpp
index b5bffc58c55..82ca861ea67 100644
--- a/programs/odbc-bridge/ODBCBlockOutputStream.cpp
+++ b/programs/odbc-bridge/ODBCBlockOutputStream.cpp
@@ -13,6 +13,11 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int UNKNOWN_TYPE;
+}
+
 namespace
 {
     using ValueType = ExternalResultDescription::ValueType;
@@ -79,6 +84,9 @@ namespace
                 return Poco::Dynamic::Var(std::to_string(LocalDateTime(time_t(field.get<UInt64>())))).convert<String>();
             case ValueType::vtUUID:
                 return Poco::Dynamic::Var(UUID(field.get<UInt128>()).toUnderType().toHexString()).convert<std::string>();
+             default:
+                 throw Exception("Unsupported value type", ErrorCodes::UNKNOWN_TYPE);
+
         }
         __builtin_unreachable();
     }
diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index 3bd3761f568..85da23fb303 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -507,6 +507,7 @@ namespace ErrorCodes
     extern const int CANNOT_DECLARE_RABBITMQ_EXCHANGE = 540;
     extern const int CANNOT_CREATE_RABBITMQ_QUEUE_BINDING = 541;
     extern const int CANNOT_REMOVE_RABBITMQ_EXCHANGE = 542;
+    extern const int UNKNOWN_MYSQL_DATATYPES_SUPPORT_LEVEL = 543;
 
     extern const int KEEPER_EXCEPTION = 999;
     extern const int POCO_EXCEPTION = 1000;
diff --git a/src/Core/ExternalResultDescription.cpp b/src/Core/ExternalResultDescription.cpp
index 5ed34764909..941ee003c94 100644
--- a/src/Core/ExternalResultDescription.cpp
+++ b/src/Core/ExternalResultDescription.cpp
@@ -1,9 +1,11 @@
 #include "ExternalResultDescription.h"
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeDateTime.h>
+#include <DataTypes/DataTypeDateTime64.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypeUUID.h>
+#include <DataTypes/DataTypesDecimal.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeEnum.h>
 #include <Common/typeid_cast.h>
@@ -64,6 +66,14 @@ void ExternalResultDescription::init(const Block & sample_block_)
             types.emplace_back(ValueType::vtString, is_nullable);
         else if (typeid_cast<const DataTypeEnum16 *>(type))
             types.emplace_back(ValueType::vtString, is_nullable);
+        else if (typeid_cast<const DataTypeDateTime64 *>(type))
+            types.emplace_back(ValueType::vtDateTime64, is_nullable);
+        else if (typeid_cast<const DataTypeDecimal<Decimal32> *>(type))
+            types.emplace_back(ValueType::vtDecimal32, is_nullable);
+        else if (typeid_cast<const DataTypeDecimal<Decimal64> *>(type))
+            types.emplace_back(ValueType::vtDecimal64, is_nullable);
+        else if (typeid_cast<const DataTypeDecimal<Decimal128> *>(type))
+            types.emplace_back(ValueType::vtDecimal128, is_nullable);
         else
             throw Exception{"Unsupported type " + type->getName(), ErrorCodes::UNKNOWN_TYPE};
     }
diff --git a/src/Core/ExternalResultDescription.h b/src/Core/ExternalResultDescription.h
index 0bd77afa628..29294fcf2c8 100644
--- a/src/Core/ExternalResultDescription.h
+++ b/src/Core/ExternalResultDescription.h
@@ -26,6 +26,10 @@ struct ExternalResultDescription
         vtDate,
         vtDateTime,
         vtUUID,
+        vtDateTime64,
+        vtDecimal32,
+        vtDecimal64,
+        vtDecimal128
     };
 
     Block sample_block;
diff --git a/src/Core/MultiEnum.h b/src/Core/MultiEnum.h
new file mode 100644
index 00000000000..748550a8779
--- /dev/null
+++ b/src/Core/MultiEnum.h
@@ -0,0 +1,99 @@
+#pragma once
+
+#include <cstdint>
+#include <type_traits>
+
+// Wrapper around enum that can have multiple values (or none) set at once.
+template <typename EnumTypeT, typename StorageTypeT = std::uint64_t>
+struct MultiEnum
+{
+    using StorageType = StorageTypeT;
+    using EnumType = EnumTypeT;
+
+    MultiEnum() = default;
+
+    template <typename ... EnumValues, typename = std::enable_if_t<std::conjunction_v<std::is_same<EnumTypeT, EnumValues>...>>>
+    explicit MultiEnum(EnumValues ... v)
+        : MultiEnum((toBitFlag(v) | ... | 0u))
+    {}
+
+    template <typename ValueType, typename = std::enable_if_t<std::is_convertible_v<ValueType, StorageType>>>
+    explicit MultiEnum(ValueType v)
+        : bitset(v)
+    {
+        static_assert(std::is_unsigned_v<ValueType>);
+        static_assert(std::is_unsigned_v<StorageType> && std::is_integral_v<StorageType>);
+    }
+
+    MultiEnum(const MultiEnum & other) = default;
+    MultiEnum & operator=(const MultiEnum & other) = default;
+
+    bool isSet(EnumType value) const
+    {
+        return bitset & toBitFlag(value);
+    }
+
+    void set(EnumType value)
+    {
+        bitset |= toBitFlag(value);
+    }
+
+    void unSet(EnumType value)
+    {
+        bitset &= ~(toBitFlag(value));
+    }
+
+    void reset()
+    {
+        bitset = 0;
+    }
+
+    StorageType getValue() const
+    {
+        return bitset;
+    }
+
+    template <typename ValueType, typename = std::enable_if_t<std::is_convertible_v<ValueType, StorageType>>>
+    void setValue(ValueType new_value)
+    {
+        // Can't set value from any enum avoid confusion
+        static_assert(!std::is_enum_v<ValueType>);
+        bitset = new_value;
+    }
+
+    bool operator==(const MultiEnum & other) const
+    {
+        return bitset == other.bitset;
+    }
+
+    template <typename ValueType, typename = std::enable_if_t<std::is_convertible_v<ValueType, StorageType>>>
+    bool operator==(ValueType other) const
+    {
+        // Shouldn't be comparable with any enum to avoid confusion
+        static_assert(!std::is_enum_v<ValueType>);
+        return bitset == other;
+    }
+
+    template <typename U>
+    bool operator!=(U && other) const
+    {
+        return !(*this == other);
+    }
+
+    template <typename ValueType, typename = std::enable_if_t<std::is_convertible_v<ValueType, StorageType>>>
+    friend bool operator==(ValueType left, MultiEnum right)
+    {
+        return right == left;
+    }
+
+    template <typename L>
+    friend bool operator!=(L left, MultiEnum right)
+    {
+        return !(right == left);
+    }
+
+private:
+    StorageType bitset = 0;
+
+    static StorageType toBitFlag(EnumType v) { return StorageType{1} << static_cast<StorageType>(v); }
+};
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 09fff297e41..b39c223a5e9 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -382,6 +382,7 @@ class IColumn;
     M(Bool, alter_partition_verbose_result, false, "Output information about affected parts. Currently works only for FREEZE and ATTACH commands.", 0) \
     M(Bool, allow_experimental_database_materialize_mysql, false, "Allow to create database with Engine=MaterializeMySQL(...).", 0) \
     M(Bool, system_events_show_zero_values, false, "Include all metrics, even with zero values", 0) \
+    M(MySQLDataTypesSupport, mysql_datatypes_support_level, 0, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal' or 'datetime64'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precison are seen as String on ClickHouse's side.", 0) \
     \
     /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
     \
diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp
index 1a03f5f4578..c0d2906e2fc 100644
--- a/src/Core/SettingsEnums.cpp
+++ b/src/Core/SettingsEnums.cpp
@@ -11,6 +11,7 @@ namespace ErrorCodes
     extern const int UNKNOWN_DISTRIBUTED_PRODUCT_MODE;
     extern const int UNKNOWN_JOIN;
     extern const int BAD_ARGUMENTS;
+    extern const int UNKNOWN_MYSQL_DATATYPES_SUPPORT_LEVEL;
 }
 
 
@@ -91,4 +92,8 @@ IMPLEMENT_SETTING_ENUM_WITH_RENAME(DefaultDatabaseEngine, ErrorCodes::BAD_ARGUME
     {{"Ordinary", DefaultDatabaseEngine::Ordinary},
      {"Atomic",   DefaultDatabaseEngine::Atomic}})
 
+IMPLEMENT_SETTING_MULTI_ENUM(MySQLDataTypesSupport, ErrorCodes::UNKNOWN_MYSQL_DATATYPES_SUPPORT_LEVEL,
+    {{"decimal",    MySQLDataTypesSupport::DECIMAL},
+     {"datetime64", MySQLDataTypesSupport::DATETIME64}})
+
 }
diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h
index 16ebef87e01..7ed5ffb0c35 100644
--- a/src/Core/SettingsEnums.h
+++ b/src/Core/SettingsEnums.h
@@ -126,4 +126,15 @@ enum class DefaultDatabaseEngine
 };
 
 DECLARE_SETTING_ENUM(DefaultDatabaseEngine)
+
+
+enum class MySQLDataTypesSupport
+{
+    DECIMAL, // convert MySQL's decimal and number to ClickHouse Decimal when applicable
+    DATETIME64, // convert MySQL's DATETIME and TIMESTAMP and ClickHouse DateTime64 if precision is > 0 or range is greater that for DateTime.
+    // ENUM
+};
+
+DECLARE_SETTING_MULTI_ENUM(MySQLDataTypesSupport)
+
 }
diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h
index ca774336f88..270d0c7c7d0 100644
--- a/src/Core/SettingsFields.h
+++ b/src/Core/SettingsFields.h
@@ -4,9 +4,11 @@
 #include <Poco/URI.h>
 #include <Core/Types.h>
 #include <Core/Field.h>
+#include <Core/MultiEnum.h>
 #include <boost/range/adaptor/map.hpp>
 #include <chrono>
 #include <unordered_map>
+#include <string_view>
 
 
 namespace DB
@@ -328,6 +330,113 @@ void SettingFieldEnum<EnumT, Traits>::readBinary(ReadBuffer & in)
         throw Exception(msg, ERROR_CODE_FOR_UNEXPECTED_NAME); \
     }
 
+// Mostly like SettingFieldEnum, but can have multiple enum values (or none) set at once.
+template <typename Enum, typename Traits>
+struct SettingFieldMultiEnum
+{
+    using EnumType = Enum;
+    using ValueType = MultiEnum<Enum>;
+    using StorageType = typename ValueType::StorageType;
+
+    ValueType value;
+    bool changed = false;
+
+    explicit SettingFieldMultiEnum(ValueType v = ValueType{}) : value{v} {}
+    explicit SettingFieldMultiEnum(EnumType e) : value{e} {}
+    explicit SettingFieldMultiEnum(StorageType s) : value(s) {}
+    explicit SettingFieldMultiEnum(const Field & f) : value(parseValueFromString(f.safeGet<const String &>())) {}
+
+    operator ValueType() const { return value; }
+    explicit operator StorageType() const { return value.getValue(); }
+    explicit operator Field() const { return toString(); }
+
+    SettingFieldMultiEnum & operator= (StorageType x) { changed = x != value.getValue(); value.setValue(x); return *this; }
+    SettingFieldMultiEnum & operator= (ValueType x) { changed = !(x == value); value = x; return *this; }
+    SettingFieldMultiEnum & operator= (const Field & x) { parseFromString(x.safeGet<const String &>()); return *this; }
+
+    String toString() const
+    {
+        static const String separator = ",";
+        String result;
+        for (StorageType i = 0; i < Traits::getEnumSize(); ++i)
+        {
+            const auto v = static_cast<Enum>(i);
+            if (value.isSet(v))
+            {
+                result += Traits::toString(v);
+                result += separator;
+            }
+        }
+
+        if (result.size() > 0)
+            result.erase(result.size() - separator.size());
+
+        return result;
+    }
+    void parseFromString(const String & str) { *this = parseValueFromString(str); }
+
+    void writeBinary(WriteBuffer & out) const;
+    void readBinary(ReadBuffer & in);
+
+private:
+    static ValueType parseValueFromString(const std::string_view str)
+    {
+        static const String separators=", ";
+
+        ValueType result;
+
+        //to avoid allocating memory on substr()
+        const std::string_view str_view{str};
+
+        auto value_start = str_view.find_first_not_of(separators);
+        while (value_start != std::string::npos)
+        {
+            auto value_end = str_view.find_first_of(separators, value_start + 1);
+            if (value_end == std::string::npos)
+                value_end = str_view.size();
+
+            result.set(Traits::fromString(str_view.substr(value_start, value_end - value_start)));
+            value_start = str_view.find_first_not_of(separators, value_end);
+        }
+
+        return result;
+    }
+};
+
+template <typename EnumT, typename Traits>
+void SettingFieldMultiEnum<EnumT, Traits>::writeBinary(WriteBuffer & out) const
+{
+    SettingFieldEnumHelpers::writeBinary(toString(), out);
+}
+
+template <typename EnumT, typename Traits>
+void SettingFieldMultiEnum<EnumT, Traits>::readBinary(ReadBuffer & in)
+{
+    parseFromString(SettingFieldEnumHelpers::readBinary(in));
+}
+
+#define DECLARE_SETTING_MULTI_ENUM(ENUM_TYPE) \
+    DECLARE_SETTING_MULTI_ENUM_WITH_RENAME(ENUM_TYPE, ENUM_TYPE)
+
+#define DECLARE_SETTING_MULTI_ENUM_WITH_RENAME(ENUM_TYPE, NEW_NAME) \
+    struct SettingField##NEW_NAME##Traits \
+    { \
+        using EnumType = ENUM_TYPE; \
+        static size_t getEnumSize(); \
+        static const String & toString(EnumType value); \
+        static EnumType fromString(const std::string_view & str); \
+    }; \
+    \
+    using SettingField##NEW_NAME = SettingFieldMultiEnum<ENUM_TYPE, SettingField##NEW_NAME##Traits>;
+
+#define IMPLEMENT_SETTING_MULTI_ENUM(ENUM_TYPE, ERROR_CODE_FOR_UNEXPECTED_NAME, ...) \
+    IMPLEMENT_SETTING_MULTI_ENUM_WITH_RENAME(ENUM_TYPE, ERROR_CODE_FOR_UNEXPECTED_NAME, __VA_ARGS__)
+
+#define IMPLEMENT_SETTING_MULTI_ENUM_WITH_RENAME(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME, ...) \
+    IMPLEMENT_SETTING_ENUM_WITH_RENAME(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME, __VA_ARGS__)\
+    size_t SettingField##NEW_NAME##Traits::getEnumSize() {\
+        return std::initializer_list<std::pair<const char*, NEW_NAME>> __VA_ARGS__ .size();\
+    }
 
 /// Can keep a value of any type. Used for user-defined settings.
 struct SettingFieldCustom
diff --git a/src/Core/tests/gtest_multienum.cpp b/src/Core/tests/gtest_multienum.cpp
new file mode 100644
index 00000000000..70c7699aa5c
--- /dev/null
+++ b/src/Core/tests/gtest_multienum.cpp
@@ -0,0 +1,158 @@
+#include <gtest/gtest.h>
+
+#include <Core/Types.h>
+#include <type_traits>
+#include <Core/MultiEnum.h>
+
+namespace
+{
+
+using namespace DB;
+enum class TestEnum : UInt8
+{
+    // name represents which bit is going to be set
+    ZERO,
+    ONE,
+    TWO,
+    THREE,
+    FOUR,
+    FIVE
+};
+}
+
+GTEST_TEST(MultiEnum, WithDefault)
+{
+    MultiEnum<TestEnum, UInt8> multi_enum;
+    ASSERT_EQ(0, multi_enum.getValue());
+    ASSERT_EQ(0, multi_enum);
+
+    ASSERT_FALSE(multi_enum.isSet(TestEnum::ZERO));
+    ASSERT_FALSE(multi_enum.isSet(TestEnum::ONE));
+    ASSERT_FALSE(multi_enum.isSet(TestEnum::TWO));
+    ASSERT_FALSE(multi_enum.isSet(TestEnum::THREE));
+    ASSERT_FALSE(multi_enum.isSet(TestEnum::FOUR));
+    ASSERT_FALSE(multi_enum.isSet(TestEnum::FIVE));
+}
+
+GTEST_TEST(MultiEnum, WitheEnum)
+{
+    MultiEnum<TestEnum, UInt8> multi_enum(TestEnum::FOUR);
+    ASSERT_EQ(16, multi_enum.getValue());
+    ASSERT_EQ(16, multi_enum);
+
+    ASSERT_FALSE(multi_enum.isSet(TestEnum::ZERO));
+    ASSERT_FALSE(multi_enum.isSet(TestEnum::ONE));
+    ASSERT_FALSE(multi_enum.isSet(TestEnum::TWO));
+    ASSERT_FALSE(multi_enum.isSet(TestEnum::THREE));
+    ASSERT_TRUE(multi_enum.isSet(TestEnum::FOUR));
+    ASSERT_FALSE(multi_enum.isSet(TestEnum::FIVE));
+}
+
+GTEST_TEST(MultiEnum, WithValue)
+{
+    const MultiEnum<TestEnum> multi_enum(13u); // (1 | (1 << 2 | 1 << 3)
+
+    ASSERT_TRUE(multi_enum.isSet(TestEnum::ZERO));
+    ASSERT_FALSE(multi_enum.isSet(TestEnum::ONE));
+    ASSERT_TRUE(multi_enum.isSet(TestEnum::TWO));
+    ASSERT_TRUE(multi_enum.isSet(TestEnum::THREE));
+    ASSERT_FALSE(multi_enum.isSet(TestEnum::FOUR));
+    ASSERT_FALSE(multi_enum.isSet(TestEnum::FIVE));
+}
+
+GTEST_TEST(MultiEnum, WithMany)
+{
+    MultiEnum<TestEnum> multi_enum{TestEnum::ONE, TestEnum::FIVE};
+    ASSERT_EQ(1 << 1 | 1 << 5, multi_enum.getValue());
+    ASSERT_EQ(1 << 1 | 1 << 5, multi_enum);
+
+    ASSERT_FALSE(multi_enum.isSet(TestEnum::ZERO));
+    ASSERT_TRUE(multi_enum.isSet(TestEnum::ONE));
+    ASSERT_FALSE(multi_enum.isSet(TestEnum::TWO));
+    ASSERT_FALSE(multi_enum.isSet(TestEnum::THREE));
+    ASSERT_FALSE(multi_enum.isSet(TestEnum::FOUR));
+    ASSERT_TRUE(multi_enum.isSet(TestEnum::FIVE));
+}
+
+GTEST_TEST(MultiEnum, WithCopyConstructor)
+{
+    const MultiEnum<TestEnum> multi_enum_source{TestEnum::ONE, TestEnum::FIVE};
+    MultiEnum<TestEnum> multi_enum{multi_enum_source};
+
+    ASSERT_EQ(1 << 1 | 1 << 5, multi_enum.getValue());
+}
+
+GTEST_TEST(MultiEnum, SetAndUnSet)
+{
+    MultiEnum<TestEnum> multi_enum;
+    multi_enum.set(TestEnum::ONE);
+    ASSERT_EQ(1 << 1, multi_enum);
+
+    multi_enum.set(TestEnum::TWO);
+    ASSERT_EQ(1 << 1| (1 << 2), multi_enum);
+
+    multi_enum.unSet(TestEnum::ONE);
+    ASSERT_EQ(1 << 2, multi_enum);
+}
+
+GTEST_TEST(MultiEnum, SetValueOnDifferentTypes)
+{
+    MultiEnum<TestEnum> multi_enum;
+
+    multi_enum.setValue(static_cast<UInt8>(1));
+    ASSERT_EQ(1, multi_enum);
+
+    multi_enum.setValue(static_cast<UInt16>(2));
+    ASSERT_EQ(2, multi_enum);
+
+    multi_enum.setValue(static_cast<UInt32>(3));
+    ASSERT_EQ(3, multi_enum);
+
+    multi_enum.setValue(static_cast<UInt64>(4));
+    ASSERT_EQ(4, multi_enum);
+}
+
+// shouldn't compile
+//GTEST_TEST(MultiEnum, WithOtherEnumType)
+//{
+//    MultiEnum<TestEnum> multi_enum;
+
+//    enum FOO {BAR, FOOBAR};
+//    MultiEnum<TestEnum> multi_enum2(BAR);
+//    MultiEnum<TestEnum> multi_enum3(BAR, FOOBAR);
+//    multi_enum.setValue(FOO::BAR);
+//    multi_enum == FOO::BAR;
+//    FOO::BAR == multi_enum;
+//}
+
+GTEST_TEST(MultiEnum, SetSameValueMultipleTimes)
+{
+    // Setting same value is idempotent.
+    MultiEnum<TestEnum> multi_enum;
+    multi_enum.set(TestEnum::ONE);
+    ASSERT_EQ(1 << 1, multi_enum);
+
+    multi_enum.set(TestEnum::ONE);
+    ASSERT_EQ(1 << 1, multi_enum);
+}
+
+GTEST_TEST(MultiEnum, UnSetValuesThatWerentSet)
+{
+    // Unsetting values that weren't set shouldn't change other flags nor aggregate value.
+    MultiEnum<TestEnum> multi_enum{TestEnum::ONE, TestEnum::THREE};
+    multi_enum.unSet(TestEnum::TWO);
+    ASSERT_EQ(1 << 1 | 1 << 3, multi_enum);
+
+    multi_enum.unSet(TestEnum::FOUR);
+    ASSERT_EQ(1 << 1 | 1 << 3, multi_enum);
+
+    multi_enum.unSet(TestEnum::FIVE);
+    ASSERT_EQ(1 << 1 | 1 << 3, multi_enum);
+}
+
+GTEST_TEST(MultiEnum, Reset)
+{
+    MultiEnum<TestEnum> multi_enum{TestEnum::ONE, TestEnum::THREE};
+    multi_enum.reset();
+    ASSERT_EQ(0, multi_enum);
+}
diff --git a/src/Core/tests/gtest_settings.cpp b/src/Core/tests/gtest_settings.cpp
new file mode 100644
index 00000000000..8833d86c397
--- /dev/null
+++ b/src/Core/tests/gtest_settings.cpp
@@ -0,0 +1,146 @@
+#include <gtest/gtest.h>
+
+#include <Core/SettingsFields.h>
+#include <Core/SettingsEnums.h>
+#include <Core/Field.h>
+
+namespace
+{
+using namespace DB;
+using SettingMySQLDataTypesSupport = SettingFieldMultiEnum<MySQLDataTypesSupport, SettingFieldMySQLDataTypesSupportTraits>;
+}
+
+namespace DB
+{
+
+template <typename Enum, typename Traits>
+bool operator== (const SettingFieldMultiEnum<Enum, Traits> & setting, const Field & f)
+{
+    return Field(setting) == f;
+}
+
+template <typename Enum, typename Traits>
+bool operator== (const Field & f, const SettingFieldMultiEnum<Enum, Traits> & setting)
+{
+    return f == Field(setting);
+}
+
+}
+
+GTEST_TEST(MySQLDataTypesSupport, WithDefault)
+{
+    // Setting can be default-initialized and that means all values are unset.
+    const SettingMySQLDataTypesSupport setting;
+    ASSERT_EQ(0, setting.value.getValue());
+    ASSERT_EQ("", setting.toString());
+    ASSERT_EQ(setting, Field(""));
+
+    ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
+    ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
+}
+
+GTEST_TEST(SettingMySQLDataTypesSupport, WithDECIMAL)
+{
+    // Setting can be initialized with MySQLDataTypesSupport::DECIMAL
+    // and this value can be obtained in varios forms with getters.
+    const SettingMySQLDataTypesSupport setting(MySQLDataTypesSupport::DECIMAL);
+    ASSERT_EQ(1, setting.value.getValue());
+
+    ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
+    ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
+
+    ASSERT_EQ("decimal", setting.toString());
+    ASSERT_EQ(Field("decimal"), setting);
+}
+
+GTEST_TEST(SettingMySQLDataTypesSupport, With1)
+{
+    // Setting can be initialized with int value corresponding to DECIMAL
+    // and rest of the test is the same as for that value.
+    const SettingMySQLDataTypesSupport setting(1u);
+    ASSERT_EQ(1, setting.value.getValue());
+
+    ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
+    ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
+
+    ASSERT_EQ("decimal", setting.toString());
+    ASSERT_EQ(Field("decimal"), setting);
+}
+
+GTEST_TEST(SettingMySQLDataTypesSupport, WithMultipleValues)
+{
+    // Setting can be initialized with int value corresponding to (DECIMAL | DATETIME64)
+    const SettingMySQLDataTypesSupport setting(3u);
+    ASSERT_EQ(3, setting.value.getValue());
+
+    ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
+    ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
+
+    ASSERT_EQ("decimal,datetime64", setting.toString());
+    ASSERT_EQ(Field("decimal,datetime64"), setting);
+}
+
+GTEST_TEST(SettingMySQLDataTypesSupport, SetString)
+{
+    SettingMySQLDataTypesSupport setting;
+    setting = String("decimal");
+    ASSERT_TRUE(setting.changed);
+    ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
+    ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
+    ASSERT_EQ("decimal", setting.toString());
+    ASSERT_EQ(Field("decimal"), setting);
+
+    setting = "datetime64,decimal";
+    ASSERT_TRUE(setting.changed);
+    ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
+    ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
+    ASSERT_EQ("decimal,datetime64", setting.toString());
+    ASSERT_EQ(Field("decimal,datetime64"), setting);
+
+    // comma with spaces
+    setting = " datetime64 ,    decimal ";
+    ASSERT_FALSE(setting.changed); // false since value is the same as previous one.
+    ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
+    ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
+    ASSERT_EQ("decimal,datetime64", setting.toString());
+    ASSERT_EQ(Field("decimal,datetime64"), setting);
+
+    setting = String(",,,,,,,, ,decimal");
+    ASSERT_TRUE(setting.changed);
+    ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
+    ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
+    ASSERT_EQ("decimal", setting.toString());
+    ASSERT_EQ(Field("decimal"), setting);
+
+    setting = String(",decimal,decimal,decimal,decimal,decimal,decimal,decimal,decimal,decimal,");
+    ASSERT_FALSE(setting.changed); //since previous value was DECIMAL
+    ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
+    ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
+    ASSERT_EQ("decimal", setting.toString());
+    ASSERT_EQ(Field("decimal"), setting);
+
+    setting = String("");
+    ASSERT_TRUE(setting.changed);
+    ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
+    ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
+    ASSERT_EQ("", setting.toString());
+    ASSERT_EQ(Field(""), setting);
+}
+
+GTEST_TEST(SettingMySQLDataTypesSupport, SetInvalidString)
+{
+    // Setting can be initialized with int value corresponding to (DECIMAL | DATETIME64)
+    SettingMySQLDataTypesSupport setting;
+    EXPECT_THROW(setting = String("FOOBAR"), Exception);
+    ASSERT_FALSE(setting.changed);
+    ASSERT_EQ(0, setting.value.getValue());
+
+    EXPECT_THROW(setting = String("decimal,datetime64,123"), Exception);
+    ASSERT_FALSE(setting.changed);
+    ASSERT_EQ(0, setting.value.getValue());
+
+    EXPECT_NO_THROW(setting = String(", "));
+    ASSERT_FALSE(setting.changed);
+    ASSERT_EQ(0, setting.value.getValue());
+}
+
diff --git a/src/DataStreams/MongoDBBlockInputStream.cpp b/src/DataStreams/MongoDBBlockInputStream.cpp
index 7865f854547..25abdd909c4 100644
--- a/src/DataStreams/MongoDBBlockInputStream.cpp
+++ b/src/DataStreams/MongoDBBlockInputStream.cpp
@@ -37,6 +37,7 @@ namespace ErrorCodes
     extern const int TYPE_MISMATCH;
     extern const int MONGODB_CANNOT_AUTHENTICATE;
     extern const int NOT_FOUND_COLUMN_IN_BLOCK;
+    extern const int UNKNOWN_TYPE;
 }
 
 
@@ -298,6 +299,8 @@ namespace
                                     ErrorCodes::TYPE_MISMATCH};
                 break;
             }
+            default:
+                throw Exception("Value of unsupported type:" + column.getName(), ErrorCodes::UNKNOWN_TYPE);
         }
     }
 
diff --git a/src/DataTypes/convertMySQLDataType.cpp b/src/DataTypes/convertMySQLDataType.cpp
index 054dc412915..23899ea197a 100644
--- a/src/DataTypes/convertMySQLDataType.cpp
+++ b/src/DataTypes/convertMySQLDataType.cpp
@@ -2,11 +2,16 @@
 
 #include <Core/Field.h>
 #include <Core/Types.h>
+#include <Core/MultiEnum.h>
+#include <Core/SettingsEnums.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTIdentifier.h>
 #include <Parsers/IAST.h>
 #include "DataTypeDate.h"
 #include "DataTypeDateTime.h"
+#include "DataTypeDateTime64.h"
+#include "DataTypeEnum.h"
+#include "DataTypesDecimal.h"
 #include "DataTypeFixedString.h"
 #include "DataTypeNullable.h"
 #include "DataTypeString.h"
@@ -25,52 +30,88 @@ ASTPtr dataTypeConvertToQuery(const DataTypePtr & data_type)
     return makeASTFunction("Nullable", dataTypeConvertToQuery(typeid_cast<const DataTypeNullable *>(data_type.get())->getNestedType()));
 }
 
-DataTypePtr convertMySQLDataType(const std::string & mysql_data_type, bool is_nullable, bool is_unsigned, size_t length)
+DataTypePtr convertMySQLDataType(MultiEnum<MySQLDataTypesSupport> type_support,
+        const std::string & mysql_data_type,
+        bool is_nullable,
+        bool is_unsigned,
+        size_t length,
+        size_t precision,
+        size_t scale)
 {
-    DataTypePtr res;
-    if (mysql_data_type == "tinyint")
-    {
-        if (is_unsigned)
-            res = std::make_shared<DataTypeUInt8>();
-        else
-            res = std::make_shared<DataTypeInt8>();
-    }
-    else if (mysql_data_type == "smallint")
-    {
-        if (is_unsigned)
-            res = std::make_shared<DataTypeUInt16>();
-        else
-            res = std::make_shared<DataTypeInt16>();
-    }
-    else if (mysql_data_type == "int" || mysql_data_type == "mediumint")
-    {
-        if (is_unsigned)
-            res = std::make_shared<DataTypeUInt32>();
-        else
-            res = std::make_shared<DataTypeInt32>();
-    }
-    else if (mysql_data_type == "bigint")
-    {
-        if (is_unsigned)
-            res = std::make_shared<DataTypeUInt64>();
-        else
-            res = std::make_shared<DataTypeInt64>();
-    }
-    else if (mysql_data_type == "float")
-        res = std::make_shared<DataTypeFloat32>();
-    else if (mysql_data_type == "double")
-        res = std::make_shared<DataTypeFloat64>();
-    else if (mysql_data_type == "date")
-        res = std::make_shared<DataTypeDate>();
-    else if (mysql_data_type == "datetime" || mysql_data_type == "timestamp")
-        res = std::make_shared<DataTypeDateTime>();
-    else if (mysql_data_type == "binary")
-        res = std::make_shared<DataTypeFixedString>(length);
-    else
+    // we expect mysql_data_type to be either "basic_type" or "type_with_params(param1, param2, ...)"
+    auto data_type = std::string_view(mysql_data_type);
+    const auto param_start_pos = data_type.find("(");
+    const auto type_name = data_type.substr(0, param_start_pos);
+
+    DataTypePtr res = [&]() -> DataTypePtr {
+        if (type_name == "tinyint")
+        {
+            if (is_unsigned)
+                return std::make_shared<DataTypeUInt8>();
+            else
+                return std::make_shared<DataTypeInt8>();
+        }
+        if (type_name == "smallint")
+        {
+            if (is_unsigned)
+                return std::make_shared<DataTypeUInt16>();
+            else
+                return std::make_shared<DataTypeInt16>();
+        }
+        if (type_name == "int" || type_name == "mediumint")
+        {
+            if (is_unsigned)
+                return std::make_shared<DataTypeUInt32>();
+            else
+                return std::make_shared<DataTypeInt32>();
+        }
+        if (type_name == "bigint")
+        {
+            if (is_unsigned)
+                return std::make_shared<DataTypeUInt64>();
+            else
+                return std::make_shared<DataTypeInt64>();
+        }
+        if (type_name == "float")
+            return std::make_shared<DataTypeFloat32>();
+        if (type_name == "double")
+            return std::make_shared<DataTypeFloat64>();
+        if (type_name == "date")
+            return std::make_shared<DataTypeDate>();
+        if (type_name == "binary")
+            return std::make_shared<DataTypeFixedString>(length);
+        if (type_name == "datetime" || type_name == "timestamp")
+        {
+            if (!type_support.isSet(MySQLDataTypesSupport::DATETIME64))
+                return std::make_shared<DataTypeDateTime>();
+
+            if (type_name == "timestamp" && scale == 0)
+            {
+                return std::make_shared<DataTypeDateTime>();
+            }
+            else if (type_name == "datetime" || type_name == "timestamp")
+            {
+                return std::make_shared<DataTypeDateTime64>(scale);
+            }
+        }
+
+        if (type_support.isSet(MySQLDataTypesSupport::DECIMAL) && (type_name == "numeric" || type_name == "decimal"))
+        {
+            if (precision <= DecimalUtils::maxPrecision<Decimal32>())
+                return std::make_shared<DataTypeDecimal<Decimal32>>(precision, scale);
+            else if (precision <= DecimalUtils::maxPrecision<Decimal64>())
+                return std::make_shared<DataTypeDecimal<Decimal64>>(precision, scale);
+            else if (precision <= DecimalUtils::maxPrecision<Decimal128>())
+                return std::make_shared<DataTypeDecimal<Decimal128>>(precision, scale);
+        }
+
         /// Also String is fallback for all unknown types.
-        res = std::make_shared<DataTypeString>();
+        return std::make_shared<DataTypeString>();
+    }();
+
     if (is_nullable)
         res = std::make_shared<DataTypeNullable>(res);
+
     return res;
 }
 
diff --git a/src/DataTypes/convertMySQLDataType.h b/src/DataTypes/convertMySQLDataType.h
index 54477afb385..f1c4a73d6f7 100644
--- a/src/DataTypes/convertMySQLDataType.h
+++ b/src/DataTypes/convertMySQLDataType.h
@@ -1,17 +1,20 @@
 #pragma once
 
 #include <string>
+#include <Core/MultiEnum.h>
 #include <Parsers/IAST.h>
 #include "IDataType.h"
 
 namespace DB
 {
+enum class MySQLDataTypesSupport;
+
 /// Convert data type to query. for example
 /// DataTypeUInt8 -> ASTIdentifier(UInt8)
 /// DataTypeNullable(DataTypeUInt8) -> ASTFunction(ASTIdentifier(UInt8))
 ASTPtr dataTypeConvertToQuery(const DataTypePtr & data_type);
 
 /// Convert MySQL type to ClickHouse data type.
-DataTypePtr convertMySQLDataType(const std::string & mysql_data_type, bool is_nullable, bool is_unsigned, size_t length);
+DataTypePtr convertMySQLDataType(MultiEnum<MySQLDataTypesSupport> type_support, const std::string & mysql_data_type, bool is_nullable, bool is_unsigned, size_t length, size_t precision, size_t scale);
 
 }
diff --git a/src/DataTypes/tests/gtest_DataType_deserializeAsText.cpp b/src/DataTypes/tests/gtest_DataType_deserializeAsText.cpp
new file mode 100644
index 00000000000..48e2f0d80a0
--- /dev/null
+++ b/src/DataTypes/tests/gtest_DataType_deserializeAsText.cpp
@@ -0,0 +1,101 @@
+#include <Columns/IColumn.h>
+#include <Core/Field.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/IDataType.h>
+#include <DataTypes/getLeastSupertype.h>
+#include <DataTypes/getMostSubtype.h>
+#include <Formats/FormatSettings.h>
+#include <IO/ReadBuffer.h>
+
+#pragma GCC diagnostic ignored "-Wmissing-declarations"
+#include <gtest/gtest.h>
+
+#include <string>
+#include <vector>
+
+#include <Core/iostream_debug_helpers.h>
+
+namespace std
+{
+
+template <typename T>
+inline std::ostream& operator<<(std::ostream & ostr, const std::vector<T> & v)
+{
+    ostr << "[";
+    for (const auto & i : v)
+    {
+        ostr << i << ", ";
+    }
+    return ostr << "] (" << v.size() << ") items";
+}
+
+}
+
+using namespace DB;
+
+struct ParseDataTypeTestCase
+{
+    const char * type_name;
+    std::vector<String> values;
+    FieldVector expected_values;
+};
+
+std::ostream & operator<<(std::ostream & ostr, const ParseDataTypeTestCase & test_case)
+{
+    return ostr << "ParseDataTypeTestCase{\"" << test_case.type_name << "\", " << test_case.values << "}";
+}
+
+
+class ParseDataTypeTest : public ::testing::TestWithParam<ParseDataTypeTestCase>
+{
+public:
+    void SetUp() override
+    {
+        const auto & p = GetParam();
+
+        data_type = DataTypeFactory::instance().get(p.type_name);
+    }
+
+    DataTypePtr data_type;
+};
+
+TEST_P(ParseDataTypeTest, parseStringValue)
+{
+    const auto & p = GetParam();
+
+    auto col = data_type->createColumn();
+    for (const auto & value : p.values)
+    {
+        ReadBuffer buffer(const_cast<char *>(value.data()), value.size(), 0);
+        data_type->deserializeAsWholeText(*col, buffer, FormatSettings{});
+    }
+
+    ASSERT_EQ(p.expected_values.size(), col->size()) << "Actual items: " << *col;
+    for (size_t i = 0; i < col->size(); ++i)
+    {
+        ASSERT_EQ(p.expected_values[i], (*col)[i]);
+    }
+}
+
+
+INSTANTIATE_TEST_SUITE_P(ParseDecimal,
+    ParseDataTypeTest,
+    ::testing::ValuesIn(
+        std::initializer_list<ParseDataTypeTestCase>{
+            {
+                "Decimal(8, 0)",
+                {"0", "5", "8", "-5", "-8", "12345678", "-12345678"},
+
+                std::initializer_list<Field>{
+                    DecimalField<Decimal32>(0, 0),
+                    DecimalField<Decimal32>(5, 0),
+                    DecimalField<Decimal32>(8, 0),
+                    DecimalField<Decimal32>(-5, 0),
+                    DecimalField<Decimal32>(-8, 0),
+                    DecimalField<Decimal32>(12345678, 0),
+                    DecimalField<Decimal32>(-12345678, 0)
+                }
+            }
+        }
+    )
+);
diff --git a/src/Databases/MySQL/DatabaseConnectionMySQL.cpp b/src/Databases/MySQL/DatabaseConnectionMySQL.cpp
index 0d944e215a0..9c94014bf23 100644
--- a/src/Databases/MySQL/DatabaseConnectionMySQL.cpp
+++ b/src/Databases/MySQL/DatabaseConnectionMySQL.cpp
@@ -10,6 +10,7 @@
 #    include <DataTypes/DataTypesNumber.h>
 #    include <DataTypes/convertMySQLDataType.h>
 #    include <Databases/MySQL/DatabaseConnectionMySQL.h>
+#    include <Databases/MySQL/FetchTablesColumnsList.h>
 #    include <Formats/MySQLBlockInputStream.h>
 #    include <IO/Operators.h>
 #    include <Parsers/ASTCreateQuery.h>
@@ -43,31 +44,14 @@ constexpr static const auto suffix = ".remove_flag";
 static constexpr const std::chrono::seconds cleaner_sleep_time{30};
 static const std::chrono::seconds lock_acquire_timeout{10};
 
-static String toQueryStringWithQuote(const std::vector<String> & quote_list)
-{
-    WriteBufferFromOwnString quote_list_query;
-    quote_list_query << "(";
-
-    for (size_t index = 0; index < quote_list.size(); ++index)
-    {
-        if (index)
-            quote_list_query << ",";
-
-        quote_list_query << quote << quote_list[index];
-    }
-
-    quote_list_query << ")";
-    return quote_list_query.str();
-}
-
-DatabaseConnectionMySQL::DatabaseConnectionMySQL(
-    const Context & global_context_, const String & database_name_, const String & metadata_path_,
+DatabaseConnectionMySQL::DatabaseConnectionMySQL(const Context & context, const String & database_name_, const String & metadata_path_,
     const ASTStorage * database_engine_define_, const String & database_name_in_mysql_, mysqlxx::Pool && pool)
     : IDatabase(database_name_)
-    , global_context(global_context_.getGlobalContext())
+    , global_context(context.getGlobalContext())
     , metadata_path(metadata_path_)
     , database_engine_define(database_engine_define_->clone())
     , database_name_in_mysql(database_name_in_mysql_)
+    , mysql_datatypes_support_level(context.getQueryContext().getSettingsRef().mysql_datatypes_support_level)
     , mysql_pool(std::move(pool))
 {
     empty(); /// test database is works fine.
@@ -78,7 +62,7 @@ bool DatabaseConnectionMySQL::empty() const
 {
     std::lock_guard<std::mutex> lock(mutex);
 
-    fetchTablesIntoLocalCache();
+    fetchTablesIntoLocalCache(global_context);
 
     if (local_tables_cache.empty())
         return true;
@@ -90,12 +74,12 @@ bool DatabaseConnectionMySQL::empty() const
     return true;
 }
 
-DatabaseTablesIteratorPtr DatabaseConnectionMySQL::getTablesIterator(const Context &, const FilterByNameFunction & filter_by_table_name)
+DatabaseTablesIteratorPtr DatabaseConnectionMySQL::getTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name)
 {
     Tables tables;
     std::lock_guard<std::mutex> lock(mutex);
 
-    fetchTablesIntoLocalCache();
+    fetchTablesIntoLocalCache(context);
 
     for (const auto & [table_name, modify_time_and_storage] : local_tables_cache)
         if (!remove_or_detach_tables.count(table_name) && (!filter_by_table_name || filter_by_table_name(table_name)))
@@ -109,11 +93,11 @@ bool DatabaseConnectionMySQL::isTableExist(const String & name, const Context &
     return bool(tryGetTable(name, context));
 }
 
-StoragePtr DatabaseConnectionMySQL::tryGetTable(const String & mysql_table_name, const Context &) const
+StoragePtr DatabaseConnectionMySQL::tryGetTable(const String & mysql_table_name, const Context & context) const
 {
     std::lock_guard<std::mutex> lock(mutex);
 
-    fetchTablesIntoLocalCache();
+    fetchTablesIntoLocalCache(context);
 
     if (!remove_or_detach_tables.count(mysql_table_name) && local_tables_cache.find(mysql_table_name) != local_tables_cache.end())
         return local_tables_cache[mysql_table_name].second;
@@ -157,11 +141,11 @@ static ASTPtr getCreateQueryFromStorage(const StoragePtr & storage, const ASTPtr
     return create_table_query;
 }
 
-ASTPtr DatabaseConnectionMySQL::getCreateTableQueryImpl(const String & table_name, const Context &, bool throw_on_error) const
+ASTPtr DatabaseConnectionMySQL::getCreateTableQueryImpl(const String & table_name, const Context & context, bool throw_on_error) const
 {
     std::lock_guard<std::mutex> lock(mutex);
 
-    fetchTablesIntoLocalCache();
+    fetchTablesIntoLocalCache(context);
 
     if (local_tables_cache.find(table_name) == local_tables_cache.end())
     {
@@ -178,7 +162,7 @@ time_t DatabaseConnectionMySQL::getObjectMetadataModificationTime(const String &
 {
     std::lock_guard<std::mutex> lock(mutex);
 
-    fetchTablesIntoLocalCache();
+    fetchTablesIntoLocalCache(global_context);
 
     if (local_tables_cache.find(table_name) == local_tables_cache.end())
         throw Exception("MySQL table " + database_name_in_mysql + "." + table_name + " doesn't exist.", ErrorCodes::UNKNOWN_TABLE);
@@ -194,12 +178,12 @@ ASTPtr DatabaseConnectionMySQL::getCreateDatabaseQuery() const
     return create_query;
 }
 
-void DatabaseConnectionMySQL::fetchTablesIntoLocalCache() const
+void DatabaseConnectionMySQL::fetchTablesIntoLocalCache(const Context & context) const
 {
     const auto & tables_with_modification_time = fetchTablesWithModificationTime();
 
     destroyLocalCacheExtraTables(tables_with_modification_time);
-    fetchLatestTablesStructureIntoCache(tables_with_modification_time);
+    fetchLatestTablesStructureIntoCache(tables_with_modification_time, context);
 }
 
 void DatabaseConnectionMySQL::destroyLocalCacheExtraTables(const std::map<String, UInt64> & tables_with_modification_time) const
@@ -216,7 +200,7 @@ void DatabaseConnectionMySQL::destroyLocalCacheExtraTables(const std::map<String
     }
 }
 
-void DatabaseConnectionMySQL::fetchLatestTablesStructureIntoCache(const std::map<String, UInt64> &tables_modification_time) const
+void DatabaseConnectionMySQL::fetchLatestTablesStructureIntoCache(const std::map<String, UInt64> &tables_modification_time, const Context & context) const
 {
     std::vector<String> wait_update_tables_name;
     for (const auto & table_modification_time : tables_modification_time)
@@ -228,7 +212,7 @@ void DatabaseConnectionMySQL::fetchLatestTablesStructureIntoCache(const std::map
             wait_update_tables_name.emplace_back(table_modification_time.first);
     }
 
-    std::map<String, NamesAndTypesList> tables_and_columns = fetchTablesColumnsList(wait_update_tables_name);
+    std::map<String, NamesAndTypesList> tables_and_columns = fetchTablesColumnsList(wait_update_tables_name, context);
 
     for (const auto & table_and_columns : tables_and_columns)
     {
@@ -280,53 +264,16 @@ std::map<String, UInt64> DatabaseConnectionMySQL::fetchTablesWithModificationTim
     return tables_with_modification_time;
 }
 
-std::map<String, NamesAndTypesList> DatabaseConnectionMySQL::fetchTablesColumnsList(const std::vector<String> & tables_name) const
+std::map<String, NamesAndTypesList> DatabaseConnectionMySQL::fetchTablesColumnsList(const std::vector<String> & tables_name, const Context & context) const
 {
-    std::map<String, NamesAndTypesList> tables_and_columns;
+    const auto & settings = context.getSettingsRef();
 
-    if (tables_name.empty())
-        return tables_and_columns;
-
-    Block tables_columns_sample_block
-    {
-        { std::make_shared<DataTypeString>(),   "table_name" },
-        { std::make_shared<DataTypeString>(),   "column_name" },
-        { std::make_shared<DataTypeString>(),   "column_type" },
-        { std::make_shared<DataTypeUInt8>(),    "is_nullable" },
-        { std::make_shared<DataTypeUInt8>(),    "is_unsigned" },
-        { std::make_shared<DataTypeUInt64>(),   "length" },
-    };
-
-    WriteBufferFromOwnString query;
-    query << "SELECT "
-             " TABLE_NAME AS table_name,"
-             " COLUMN_NAME AS column_name,"
-             " DATA_TYPE AS column_type,"
-             " IS_NULLABLE = 'YES' AS is_nullable,"
-             " COLUMN_TYPE LIKE '%unsigned' AS is_unsigned,"
-             " CHARACTER_MAXIMUM_LENGTH AS length"
-             " FROM INFORMATION_SCHEMA.COLUMNS"
-             " WHERE TABLE_SCHEMA = " << quote << database_name_in_mysql
-          << " AND TABLE_NAME IN " << toQueryStringWithQuote(tables_name) << " ORDER BY ORDINAL_POSITION";
-
-    const auto & external_table_functions_use_nulls = global_context.getSettings().external_table_functions_use_nulls;
-    MySQLBlockInputStream result(mysql_pool.get(), query.str(), tables_columns_sample_block, DEFAULT_BLOCK_SIZE);
-    while (Block block = result.read())
-    {
-        size_t rows = block.rows();
-        for (size_t i = 0; i < rows; ++i)
-        {
-            String table_name = (*block.getByPosition(0).column)[i].safeGet<String>();
-            tables_and_columns[table_name].emplace_back((*block.getByPosition(1).column)[i].safeGet<String>(),
-                                                        convertMySQLDataType(
-                                                            (*block.getByPosition(2).column)[i].safeGet<String>(),
-                                                            (*block.getByPosition(3).column)[i].safeGet<UInt64>() &&
-                                                            external_table_functions_use_nulls,
-                                                            (*block.getByPosition(4).column)[i].safeGet<UInt64>(),
-                                                            (*block.getByPosition(5).column)[i].safeGet<UInt64>()));
-        }
-    }
-    return tables_and_columns;
+    return DB::fetchTablesColumnsList(
+            mysql_pool,
+            database_name_in_mysql,
+            tables_name,
+            settings.external_table_functions_use_nulls,
+            mysql_datatypes_support_level);
 }
 
 void DatabaseConnectionMySQL::shutdown()
diff --git a/src/Databases/MySQL/DatabaseConnectionMySQL.h b/src/Databases/MySQL/DatabaseConnectionMySQL.h
index c4fb3d5f90c..e9f72adc013 100644
--- a/src/Databases/MySQL/DatabaseConnectionMySQL.h
+++ b/src/Databases/MySQL/DatabaseConnectionMySQL.h
@@ -4,17 +4,27 @@
 #if USE_MYSQL
 
 #include <mysqlxx/Pool.h>
-#include <Databases/DatabasesCommon.h>
-#include <memory>
-#include <Parsers/ASTCreateQuery.h>
-#include <Common/ThreadPool.h>
 
+#include <Core/MultiEnum.h>
+#include <Common/ThreadPool.h>
+#include <Databases/DatabasesCommon.h>
+#include <Parsers/ASTCreateQuery.h>
+
+#include <atomic>
+#include <condition_variable>
+#include <map>
+#include <memory>
+#include <mutex>
+#include <unordered_set>
+#include <vector>
 
 namespace DB
 {
 
 class Context;
 
+enum class MySQLDataTypesSupport;
+
 /** Real-time access to table list and table structure from remote MySQL
  *  It doesn't make any manipulations with filesystem.
  *  All tables are created by calling code after real-time pull-out structure from remote MySQL
@@ -25,7 +35,7 @@ public:
     ~DatabaseConnectionMySQL() override;
 
     DatabaseConnectionMySQL(
-        const Context & global_context, const String & database_name, const String & metadata_path,
+        const Context & context, const String & database_name, const String & metadata_path,
         const ASTStorage * database_engine_define, const String & database_name_in_mysql, mysqlxx::Pool && pool);
 
     String getEngineName() const override { return "MySQL"; }
@@ -66,6 +76,9 @@ private:
     String metadata_path;
     ASTPtr database_engine_define;
     String database_name_in_mysql;
+    // Cache setting for later from query context upon creation,
+    // so column types depend on the settings set at query-level.
+    MultiEnum<MySQLDataTypesSupport> mysql_datatypes_support_level;
 
     std::atomic<bool> quit{false};
     std::condition_variable cond;
@@ -81,15 +94,15 @@ private:
 
     void cleanOutdatedTables();
 
-    void fetchTablesIntoLocalCache() const;
+    void fetchTablesIntoLocalCache(const Context & context) const;
 
     std::map<String, UInt64> fetchTablesWithModificationTime() const;
 
-    std::map<String, NamesAndTypesList> fetchTablesColumnsList(const std::vector<String> & tables_name) const;
+    std::map<String, NamesAndTypesList> fetchTablesColumnsList(const std::vector<String> & tables_name, const Context & context) const;
 
     void destroyLocalCacheExtraTables(const std::map<String, UInt64> & tables_with_modification_time) const;
 
-    void fetchLatestTablesStructureIntoCache(const std::map<String, UInt64> & tables_modification_time) const;
+    void fetchLatestTablesStructureIntoCache(const std::map<String, UInt64> & tables_modification_time, const Context & context) const;
 
     ThreadFromGlobalPool thread;
 };
diff --git a/src/Databases/MySQL/FetchTablesColumnsList.cpp b/src/Databases/MySQL/FetchTablesColumnsList.cpp
new file mode 100644
index 00000000000..3e25c703a1d
--- /dev/null
+++ b/src/Databases/MySQL/FetchTablesColumnsList.cpp
@@ -0,0 +1,114 @@
+#if !defined(ARCADIA_BUILD)
+#    include "config_core.h"
+#endif
+
+#if USE_MYSQL
+#include <Core/Block.h>
+#include <Databases/MySQL/FetchTablesColumnsList.h>
+#include <DataTypes/convertMySQLDataType.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Formats/MySQLBlockInputStream.h>
+#include <IO/WriteBufferFromString.h>
+#include <IO/WriteHelpers.h>
+#include <IO/Operators.h>
+
+#include <memory>
+
+namespace
+{
+using namespace DB;
+
+String toQueryStringWithQuote(const std::vector<String> & quote_list)
+{
+    WriteBufferFromOwnString quote_list_query;
+    quote_list_query << "(";
+
+    for (size_t index = 0; index < quote_list.size(); ++index)
+    {
+        if (index)
+            quote_list_query << ",";
+
+        quote_list_query << quote << quote_list[index];
+    }
+
+    quote_list_query << ")";
+    return quote_list_query.str();
+}
+}
+
+namespace DB
+{
+
+std::map<String, NamesAndTypesList> fetchTablesColumnsList(
+        mysqlxx::Pool & pool,
+        const String & database_name,
+        const std::vector<String> & tables_name,
+        bool external_table_functions_use_nulls,
+        MultiEnum<MySQLDataTypesSupport> type_support)
+{
+    std::map<String, NamesAndTypesList> tables_and_columns;
+
+    if (tables_name.empty())
+        return tables_and_columns;
+
+    Block tables_columns_sample_block
+    {
+        { std::make_shared<DataTypeString>(),   "table_name" },
+        { std::make_shared<DataTypeString>(),   "column_name" },
+        { std::make_shared<DataTypeString>(),   "column_type" },
+        { std::make_shared<DataTypeUInt8>(),    "is_nullable" },
+        { std::make_shared<DataTypeUInt8>(),    "is_unsigned" },
+        { std::make_shared<DataTypeUInt64>(),   "length" },
+        { std::make_shared<DataTypeUInt64>(),   "precision" },
+        { std::make_shared<DataTypeUInt64>(),   "scale" },
+    };
+
+    WriteBufferFromOwnString query;
+    query << "SELECT "
+             " TABLE_NAME AS table_name,"
+             " COLUMN_NAME AS column_name,"
+             " COLUMN_TYPE AS column_type,"
+             " IS_NULLABLE = 'YES' AS is_nullable,"
+             " COLUMN_TYPE LIKE '%unsigned' AS is_unsigned,"
+             " CHARACTER_MAXIMUM_LENGTH AS length,"
+             " NUMERIC_PRECISION as '',"
+             " IF(ISNULL(NUMERIC_SCALE), DATETIME_PRECISION, NUMERIC_SCALE) AS scale" // we know DATETIME_PRECISION as a scale in CH
+             " FROM INFORMATION_SCHEMA.COLUMNS"
+             " WHERE TABLE_SCHEMA = " << quote << database_name
+          << " AND TABLE_NAME IN " << toQueryStringWithQuote(tables_name) << " ORDER BY ORDINAL_POSITION";
+
+    MySQLBlockInputStream result(pool.get(), query.str(), tables_columns_sample_block, DEFAULT_BLOCK_SIZE);
+    while (Block block = result.read())
+    {
+        const auto & table_name_col = *block.getByPosition(0).column;
+        const auto & column_name_col = *block.getByPosition(1).column;
+        const auto & column_type_col = *block.getByPosition(2).column;
+        const auto & is_nullable_col = *block.getByPosition(3).column;
+        const auto & is_unsigned_col = *block.getByPosition(4).column;
+        const auto & char_max_length_col = *block.getByPosition(5).column;
+        const auto & precision_col = *block.getByPosition(6).column;
+        const auto & scale_col = *block.getByPosition(7).column;
+
+        size_t rows = block.rows();
+        for (size_t i = 0; i < rows; ++i)
+        {
+            String table_name = table_name_col[i].safeGet<String>();
+            tables_and_columns[table_name].emplace_back(
+                    column_name_col[i].safeGet<String>(),
+                    convertMySQLDataType(
+                            type_support,
+                            column_type_col[i].safeGet<String>(),
+                            external_table_functions_use_nulls && is_nullable_col[i].safeGet<UInt64>(),
+                            is_unsigned_col[i].safeGet<UInt64>(),
+                            char_max_length_col[i].safeGet<UInt64>(),
+                            precision_col[i].safeGet<UInt64>(),
+                            scale_col[i].safeGet<UInt64>()));
+        }
+    }
+    return tables_and_columns;
+}
+
+}
+
+#endif
diff --git a/src/Databases/MySQL/FetchTablesColumnsList.h b/src/Databases/MySQL/FetchTablesColumnsList.h
new file mode 100644
index 00000000000..52191c2ecb8
--- /dev/null
+++ b/src/Databases/MySQL/FetchTablesColumnsList.h
@@ -0,0 +1,28 @@
+#pragma once
+
+#include "config_core.h"
+#if USE_MYSQL
+
+#include <mysqlxx/Pool.h>
+
+#include <common/types.h>
+#include <Core/MultiEnum.h>
+#include <Core/NamesAndTypes.h>
+#include <Core/SettingsEnums.h>
+
+#include <map>
+#include <vector>
+
+namespace DB
+{
+
+std::map<String, NamesAndTypesList> fetchTablesColumnsList(
+        mysqlxx::Pool & pool,
+        const String & database_name,
+        const std::vector<String> & tables_name,
+        bool external_table_functions_use_nulls,
+        MultiEnum<MySQLDataTypesSupport> type_support);
+
+}
+
+#endif
diff --git a/src/Databases/ya.make b/src/Databases/ya.make
index 50b58cf3e71..726127bfe52 100644
--- a/src/Databases/ya.make
+++ b/src/Databases/ya.make
@@ -19,6 +19,7 @@ SRCS(
     DatabaseWithDictionaries.cpp
     MySQL/DatabaseConnectionMySQL.cpp
     MySQL/DatabaseMaterializeMySQL.cpp
+    MySQL/FetchTablesColumnsList.cpp
     MySQL/MaterializeMetadata.cpp
     MySQL/MaterializeMySQLSettings.cpp
     MySQL/MaterializeMySQLSyncThread.cpp
diff --git a/src/Dictionaries/CassandraBlockInputStream.cpp b/src/Dictionaries/CassandraBlockInputStream.cpp
index 4f6a62a0eea..721cb44a82e 100644
--- a/src/Dictionaries/CassandraBlockInputStream.cpp
+++ b/src/Dictionaries/CassandraBlockInputStream.cpp
@@ -19,6 +19,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int TYPE_MISMATCH;
+    extern const int UNKNOWN_TYPE;
 }
 
 CassandraBlockInputStream::CassandraBlockInputStream(
@@ -140,6 +141,8 @@ void CassandraBlockInputStream::insertValue(IColumn & column, ValueType type, co
             assert_cast<ColumnUInt128 &>(column).insert(parse<UUID>(uuid_str.data(), uuid_str.size()));
             break;
         }
+        default:
+            throw Exception("Unknown type : " + std::to_string(static_cast<int>(type)), ErrorCodes::UNKNOWN_TYPE);
     }
 }
 
@@ -252,6 +255,8 @@ void CassandraBlockInputStream::assertTypes(const CassResultPtr & result)
                 expected = CASS_VALUE_TYPE_UUID;
                 expected_text = "uuid";
                 break;
+            default:
+                throw Exception("Unknown type : " + std::to_string(static_cast<int>(description.types[i].first)), ErrorCodes::UNKNOWN_TYPE);
         }
 
         CassValueType got = cass_result_column_type(result, i);
diff --git a/src/Dictionaries/RedisBlockInputStream.cpp b/src/Dictionaries/RedisBlockInputStream.cpp
index a3ee86ae1d6..a5514d14155 100644
--- a/src/Dictionaries/RedisBlockInputStream.cpp
+++ b/src/Dictionaries/RedisBlockInputStream.cpp
@@ -26,6 +26,7 @@ namespace DB
         extern const int LOGICAL_ERROR;
         extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
         extern const int INTERNAL_REDIS_ERROR;
+        extern const int UNKNOWN_TYPE;
     }
 
 
@@ -103,6 +104,8 @@ namespace DB
                 case ValueType::vtUUID:
                     assert_cast<ColumnUInt128 &>(column).insertValue(parse<UUID>(string_value));
                     break;
+                default:
+                    throw Exception("Value of unsupported type:" + column.getName(), ErrorCodes::UNKNOWN_TYPE);
             }
         }
     }
diff --git a/src/Formats/MySQLBlockInputStream.cpp b/src/Formats/MySQLBlockInputStream.cpp
index 17c09cdc14d..f85680c0031 100644
--- a/src/Formats/MySQLBlockInputStream.cpp
+++ b/src/Formats/MySQLBlockInputStream.cpp
@@ -7,13 +7,15 @@
 #    include <Columns/ColumnNullable.h>
 #    include <Columns/ColumnString.h>
 #    include <Columns/ColumnsNumber.h>
+#    include <Columns/ColumnDecimal.h>
+#    include <DataTypes/IDataType.h>
+#    include <DataTypes/DataTypeNullable.h>
 #    include <IO/ReadHelpers.h>
 #    include <IO/WriteHelpers.h>
 #    include <Common/assert_cast.h>
 #    include <ext/range.h>
 #    include "MySQLBlockInputStream.h"
 
-
 namespace DB
 {
 namespace ErrorCodes
@@ -39,7 +41,7 @@ namespace
 {
     using ValueType = ExternalResultDescription::ValueType;
 
-    void insertValue(IColumn & column, const ValueType type, const mysqlxx::Value & value)
+    void insertValue(const IDataType & data_type, IColumn & column, const ValueType type, const mysqlxx::Value & value)
     {
         switch (type)
         {
@@ -85,6 +87,15 @@ namespace
             case ValueType::vtUUID:
                 assert_cast<ColumnUInt128 &>(column).insert(parse<UUID>(value.data(), value.size()));
                 break;
+            case ValueType::vtDateTime64:[[fallthrough]];
+            case ValueType::vtDecimal32: [[fallthrough]];
+            case ValueType::vtDecimal64: [[fallthrough]];
+            case ValueType::vtDecimal128:
+            {
+                ReadBuffer buffer(const_cast<char *>(value.data()), value.size(), 0);
+                data_type.deserializeAsWholeText(column, buffer, FormatSettings{});
+                break;
+            }
         }
     }
 
@@ -112,19 +123,21 @@ Block MySQLBlockInputStream::readImpl()
         for (const auto idx : ext::range(0, row.size()))
         {
             const auto value = row[idx];
+            const auto & sample = description.sample_block.getByPosition(idx);
             if (!value.isNull())
             {
                 if (description.types[idx].second)
                 {
                     ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*columns[idx]);
-                    insertValue(column_nullable.getNestedColumn(), description.types[idx].first, value);
+                    const auto & data_type = assert_cast<const DataTypeNullable &>(*sample.type);
+                    insertValue(*data_type.getNestedType(), column_nullable.getNestedColumn(), description.types[idx].first, value);
                     column_nullable.getNullMapData().emplace_back(0);
                 }
                 else
-                    insertValue(*columns[idx], description.types[idx].first, value);
+                    insertValue(*sample.type, *columns[idx], description.types[idx].first, value);
             }
             else
-                insertDefaultValue(*columns[idx], *description.sample_block.getByPosition(idx).column);
+                insertDefaultValue(*columns[idx], *sample.column);
         }
 
         ++num_rows;
diff --git a/src/TableFunctions/TableFunctionMySQL.cpp b/src/TableFunctions/TableFunctionMySQL.cpp
index 0afc74f163e..05315d18f80 100644
--- a/src/TableFunctions/TableFunctionMySQL.cpp
+++ b/src/TableFunctions/TableFunctionMySQL.cpp
@@ -4,6 +4,7 @@
 
 #if USE_MYSQL
 #    include <Core/Defines.h>
+#    include <Databases/MySQL/FetchTablesColumnsList.h>
 #    include <DataTypes/DataTypeString.h>
 #    include <DataTypes/DataTypesNumber.h>
 #    include <DataTypes/convertMySQLDataType.h>
@@ -21,6 +22,8 @@
 #    include <Common/quoteString.h>
 #    include "registerTableFunctions.h"
 
+#    include <Databases/MySQL/DatabaseConnectionMySQL.h> // for fetchTablesColumnsList
+
 #    include <mysqlxx/Pool.h>
 
 
@@ -74,47 +77,11 @@ StoragePtr TableFunctionMySQL::executeImpl(const ASTPtr & ast_function, const Co
     auto parsed_host_port = parseAddress(host_port, 3306);
 
     mysqlxx::Pool pool(remote_database_name, parsed_host_port.first, user_name, password, parsed_host_port.second);
+    const auto & settings = context.getSettingsRef();
+    const auto tables_and_columns = fetchTablesColumnsList(pool, remote_database_name, {remote_table_name}, settings.external_table_functions_use_nulls, settings.mysql_datatypes_support_level);
 
-    /// Determine table definition by running a query to INFORMATION_SCHEMA.
-
-    Block sample_block
-    {
-        { std::make_shared<DataTypeString>(), "name" },
-        { std::make_shared<DataTypeString>(), "type" },
-        { std::make_shared<DataTypeUInt8>(), "is_nullable" },
-        { std::make_shared<DataTypeUInt8>(), "is_unsigned" },
-        { std::make_shared<DataTypeUInt64>(), "length" },
-    };
-
-    WriteBufferFromOwnString query;
-    query << "SELECT"
-            " COLUMN_NAME AS name,"
-            " DATA_TYPE AS type,"
-            " IS_NULLABLE = 'YES' AS is_nullable,"
-            " COLUMN_TYPE LIKE '%unsigned' AS is_unsigned,"
-            " CHARACTER_MAXIMUM_LENGTH AS length"
-        " FROM INFORMATION_SCHEMA.COLUMNS"
-        " WHERE TABLE_SCHEMA = " << quote << remote_database_name
-        << " AND TABLE_NAME = " << quote << remote_table_name
-        << " ORDER BY ORDINAL_POSITION";
-
-    NamesAndTypesList columns;
-    MySQLBlockInputStream result(pool.get(), query.str(), sample_block, DEFAULT_BLOCK_SIZE);
-    while (Block block = result.read())
-    {
-        size_t rows = block.rows();
-        for (size_t i = 0; i < rows; ++i)
-            columns.emplace_back(
-                (*block.getByPosition(0).column)[i].safeGet<String>(),
-                convertMySQLDataType(
-                    (*block.getByPosition(1).column)[i].safeGet<String>(),
-                    (*block.getByPosition(2).column)[i].safeGet<UInt64>() && context.getSettings().external_table_functions_use_nulls,
-                    (*block.getByPosition(3).column)[i].safeGet<UInt64>(),
-                    (*block.getByPosition(4).column)[i].safeGet<UInt64>()));
-
-    }
-
-    if (columns.empty())
+    const auto columns = tables_and_columns.find(remote_table_name);
+    if (columns == tables_and_columns.end())
         throw Exception("MySQL table " + backQuoteIfNeed(remote_database_name) + "." + backQuoteIfNeed(remote_table_name) + " doesn't exist.", ErrorCodes::UNKNOWN_TABLE);
 
     auto res = StorageMySQL::create(
@@ -124,7 +91,7 @@ StoragePtr TableFunctionMySQL::executeImpl(const ASTPtr & ast_function, const Co
         remote_table_name,
         replace_query,
         on_duplicate_clause,
-        ColumnsDescription{columns},
+        ColumnsDescription{columns->second},
         ConstraintsDescription{},
         context);
 
diff --git a/tests/integration/test_mysql_database_engine/test.py b/tests/integration/test_mysql_database_engine/test.py
index efbbe6d4104..86cfa0364d3 100644
--- a/tests/integration/test_mysql_database_engine/test.py
+++ b/tests/integration/test_mysql_database_engine/test.py
@@ -7,6 +7,8 @@ import pytest
 from helpers.cluster import ClickHouseCluster
 from helpers.client import QueryRuntimeException
 
+from string import Template
+
 cluster = ClickHouseCluster(__file__)
 clickhouse_node = cluster.add_instance('node1', main_configs=['configs/remote_servers.xml'], with_mysql=True)
 
@@ -32,7 +34,21 @@ class MySQLNodeInstance:
         if self.mysql_connection is None:
             self.mysql_connection = pymysql.connect(user=self.user, password=self.password, host=self.hostname, port=self.port)
         with self.mysql_connection.cursor() as cursor:
-            cursor.execute(execution_query)
+            def execute(query):
+                res = cursor.execute(query)
+                if query.lstrip().lower().startswith(('select', 'show')):
+                    # Mimic output of the ClickHouseInstance, which is:
+                    # tab-sparated values and newline (\n)-separated rows.
+                    rows = []
+                    for row in cursor.fetchall():
+                        rows.append("\t".join(str(item) for item in row))
+                    res = "\n".join(rows)
+                return res
+
+            if isinstance(execution_query, (str, bytes, unicode)):
+                return execute(execution_query)
+            else:
+                return [execute(q) for q in execution_query]
 
     def close(self):
         if self.mysql_connection is not None:
@@ -96,7 +112,7 @@ def test_clickhouse_dml_for_mysql_database(started_cluster):
         clickhouse_node.query("CREATE DATABASE test_database ENGINE = MySQL('mysql1:3306', test_database, 'root', 'clickhouse')")
 
         assert clickhouse_node.query("SELECT count() FROM `test_database`.`test_table`").rstrip() == '0'
-        clickhouse_node.query("INSERT INTO `test_database`.`test_table`(`i\`d`) select number from numbers(10000)")
+        clickhouse_node.query("INSERT INTO `test_database`.`test_table`(`i``d`) select number from numbers(10000)")
         assert clickhouse_node.query("SELECT count() FROM `test_database`.`test_table`").rstrip() == '10000'
 
         mysql_node.query("DROP DATABASE test_database")
@@ -130,3 +146,132 @@ def test_bad_arguments_for_mysql_database_engine(started_cluster):
             clickhouse_node.query("CREATE DATABASE test_database_bad_arguments ENGINE = MySQL('mysql1:3306', test_bad_arguments, root, 'clickhouse')")
         assert 'Database engine MySQL requested literal argument.' in str(exception.value)
         mysql_node.query("DROP DATABASE test_bad_arguments")
+
+
+decimal_values = [0.123, 0.4, 5.67, 8.91011, 123456789.123, -0.123, -0.4, -5.67, -8.91011, -123456789.123]
+timestamp_values = ['2015-05-18 07:40:01.123', '2019-09-16 19:20:11.123']
+timestamp_values_no_subsecond = ['2015-05-18 07:40:01', '2019-09-16 19:20:11']
+
+@pytest.mark.parametrize("case_name, mysql_type, expected_ch_type, mysql_values, setting_mysql_datatypes_support_level",
+[
+    ("decimal_default", "decimal NOT NULL", "Decimal(10, 0)", decimal_values, "decimal,datetime64"),
+    ("decimal_default_nullable", "decimal", "Nullable(Decimal(10, 0))", decimal_values, "decimal,datetime64"),
+    ("decimal_18_6", "decimal(18, 6) NOT NULL", "Decimal(18, 6)", decimal_values, "decimal,datetime64"),
+    ("decimal_38_6", "decimal(38, 6) NOT NULL", "Decimal(38, 6)", decimal_values, "decimal,datetime64"),
+
+    # Due to python DB driver roundtrip MySQL timestamp and datetime values
+    # are printed with 6 digits after decimal point, so to simplify tests a bit,
+    # we only validate precision of 0 and 6.
+    ("timestamp_default", "timestamp", "DateTime", timestamp_values, "decimal,datetime64"),
+    ("timestamp_6", "timestamp(6)", "DateTime64(6)", timestamp_values, "decimal,datetime64"),
+    ("datetime_default", "DATETIME NOT NULL", "DateTime64(0)", timestamp_values, "decimal,datetime64"),
+    ("datetime_6", "DATETIME(6) NOT NULL", "DateTime64(6)", timestamp_values, "decimal,datetime64"),
+
+    # right now precision bigger than 39 is not supported by ClickHouse's Decimal, hence fall back to String
+    ("decimal_40_6", "decimal(40, 6) NOT NULL", "String", decimal_values, "decimal,datetime64"),
+    ("decimal_18_6", "decimal(18, 6) NOT NULL", "String", decimal_values, "datetime64"),
+    ("decimal_18_6", "decimal(18, 6) NOT NULL", "String", decimal_values, ""),
+    ("datetime_6", "DATETIME(6) NOT NULL", "DateTime", timestamp_values_no_subsecond, "decimal"),
+    ("datetime_6", "DATETIME(6) NOT NULL", "DateTime", timestamp_values_no_subsecond, ""),
+])
+def test_mysql_types(started_cluster, case_name, mysql_type, expected_ch_type, mysql_values, setting_mysql_datatypes_support_level):
+    """ Verify that values written to MySQL can be read on ClickHouse side via DB engine MySQL,
+    or Table engine MySQL, or mysql() table function.
+    Make sure that type is converted properly and values match exactly.
+    """
+
+    substitutes = dict(
+        mysql_db = 'decimal_support',
+        table_name = case_name,
+        mysql_type = mysql_type,
+        mysql_values = ', '.join('({})'.format(repr(x)) for x in mysql_values),
+        ch_mysql_db = 'mysql_db',
+        ch_mysql_table = 'mysql_table_engine_' + case_name,
+        expected_ch_type = expected_ch_type,
+    )
+
+    clickhouse_query_settings = dict(
+        mysql_datatypes_support_level = setting_mysql_datatypes_support_level
+    )
+
+    def execute_query(node, query, **kwargs):
+        def do_execute(query):
+            query = Template(query).safe_substitute(substitutes)
+            res = node.query(query, **kwargs)
+            return res if isinstance(res, int) else res.rstrip('\n\r')
+
+        if isinstance(query, (str, bytes, unicode)):
+            return do_execute(query)
+        else:
+            return [do_execute(q) for q in query]
+
+    with contextlib.closing(MySQLNodeInstance('root', 'clickhouse', '127.0.0.1', port=3308)) as mysql_node:
+        execute_query(mysql_node, [
+            "DROP DATABASE IF EXISTS ${mysql_db}",
+            "CREATE DATABASE ${mysql_db}  DEFAULT CHARACTER SET 'utf8'",
+            "CREATE TABLE `${mysql_db}`.`${table_name}` (value ${mysql_type})",
+            "INSERT INTO `${mysql_db}`.`${table_name}` (value) VALUES ${mysql_values}",
+            "SELECT * FROM `${mysql_db}`.`${table_name}`",
+            "FLUSH TABLES"
+        ])
+
+        assert execute_query(mysql_node, "SELECT COUNT(*) FROM ${mysql_db}.${table_name}") \
+            == \
+            "{}".format(len(mysql_values))
+
+
+        # MySQL TABLE ENGINE
+        execute_query(clickhouse_node, [
+            "DROP TABLE IF EXISTS ${ch_mysql_table};",
+            "CREATE TABLE ${ch_mysql_table} (value ${expected_ch_type}) ENGINE = MySQL('mysql1:3306', '${mysql_db}', '${table_name}', 'root', 'clickhouse')",
+        ], settings=clickhouse_query_settings)
+
+        # Validate type
+        assert \
+            execute_query(clickhouse_node, "SELECT toTypeName(value) FROM ${ch_mysql_table} LIMIT 1",
+                    settings=clickhouse_query_settings) \
+            == \
+            expected_ch_type
+
+        # Validate values
+        assert \
+            execute_query(clickhouse_node, "SELECT value FROM ${ch_mysql_table}",
+                    settings=clickhouse_query_settings) \
+            == \
+            execute_query(mysql_node, "SELECT value FROM ${mysql_db}.${table_name}")
+
+
+        # MySQL DATABASE ENGINE
+        execute_query(clickhouse_node, [
+            "DROP DATABASE IF EXISTS ${ch_mysql_db}",
+            "CREATE DATABASE ${ch_mysql_db} ENGINE = MySQL('mysql1:3306', '${mysql_db}', 'root', 'clickhouse')"
+        ], settings=clickhouse_query_settings)
+
+        # Validate type
+        assert \
+            execute_query(clickhouse_node, "SELECT toTypeName(value) FROM ${ch_mysql_db}.${table_name} LIMIT 1",
+                    settings=clickhouse_query_settings) \
+            == \
+            expected_ch_type
+
+        # Validate values
+        assert \
+            execute_query(clickhouse_node, "SELECT value FROM ${ch_mysql_db}.${table_name}",
+                    settings=clickhouse_query_settings) \
+            == \
+            execute_query(mysql_node, "SELECT value FROM ${mysql_db}.${table_name}")
+
+        # MySQL TABLE FUNCTION
+        # Validate type
+        assert \
+            execute_query(clickhouse_node, "SELECT toTypeName(value) FROM mysql('mysql1:3306', '${mysql_db}', '${table_name}', 'root', 'clickhouse') LIMIT 1",
+                    settings=clickhouse_query_settings) \
+            == \
+            expected_ch_type
+
+        # Validate values
+        assert \
+            execute_query(mysql_node, "SELECT value FROM ${mysql_db}.${table_name}") \
+            == \
+            execute_query(clickhouse_node, "SELECT value FROM mysql('mysql1:3306', '${mysql_db}', '${table_name}', 'root', 'clickhouse')",
+                    settings=clickhouse_query_settings)

From a8f5d9e70fef1202285eab1911f7aa794e0828f6 Mon Sep 17 00:00:00 2001
From: myrrc <me@myrrec.space>
Date: Wed, 9 Sep 2020 15:52:44 +0300
Subject: [PATCH 177/298] added the test and comment

---
 src/Columns/ColumnLowCardinality.h                            | 4 ++++
 .../0_stateless/01414_low_cardinality_nullable.reference      | 1 +
 tests/queries/0_stateless/01414_low_cardinality_nullable.sql  | 2 ++
 3 files changed, 7 insertions(+)

diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h
index 00f58a133cf..0aeda4567fd 100644
--- a/src/Columns/ColumnLowCardinality.h
+++ b/src/Columns/ColumnLowCardinality.h
@@ -171,6 +171,10 @@ public:
     bool isNumeric() const override { return getDictionary().isNumeric(); }
     bool lowCardinality() const override { return true; }
 
+    /**
+     * Checks if the dictionary column is Nullable(T).
+     * So LC(Nullable(T)) would return true, LC(U) -- false.
+     */
     bool nestedIsNullable() const { return isColumnNullable(*dictionary.getColumnUnique().getNestedColumn()); }
 
     const IColumnUnique & getDictionary() const { return dictionary.getColumnUnique(); }
diff --git a/tests/queries/0_stateless/01414_low_cardinality_nullable.reference b/tests/queries/0_stateless/01414_low_cardinality_nullable.reference
index bf7b6cf4f76..51825f5cb76 100644
--- a/tests/queries/0_stateless/01414_low_cardinality_nullable.reference
+++ b/tests/queries/0_stateless/01414_low_cardinality_nullable.reference
@@ -80,3 +80,4 @@
 1
 1
 1
+2
diff --git a/tests/queries/0_stateless/01414_low_cardinality_nullable.sql b/tests/queries/0_stateless/01414_low_cardinality_nullable.sql
index 6e311f9d519..9a554ead776 100644
--- a/tests/queries/0_stateless/01414_low_cardinality_nullable.sql
+++ b/tests/queries/0_stateless/01414_low_cardinality_nullable.sql
@@ -217,4 +217,6 @@ SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 03:0
 SELECT count() FROM lc_nullable WHERE has(str, '100');
 SELECT count() FROM lc_nullable WHERE has(fixed_string, toFixedString('100', 5));
 
+SELECT count() FROM lc_nullable WHERE has(date, toDate(has(u64, 1), '1970-01\002'));
+
 DROP TABLE IF EXISTS lc_nullable;

From 956138635de536560d0843025720d7ce7b947cf3 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 9 Sep 2020 15:59:26 +0300
Subject: [PATCH 178/298] Fix compiler name

---
 docker/packager/binary/Dockerfile | 2 +-
 docker/packager/deb/Dockerfile    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile
index 893e9191b1e..03bb3b5aefa 100644
--- a/docker/packager/binary/Dockerfile
+++ b/docker/packager/binary/Dockerfile
@@ -97,7 +97,7 @@ RUN wget -nv https://clickhouse-datasets.s3.yandex.net/toolchains/toolchains/fre
 RUN echo 'deb http://archive.ubuntu.com/ubuntu/ focal-proposed restricted main multiverse universe' > /etc/apt/sources.list.d/proposed-repositories.list
 
 RUN apt-get update \
-    && apt-get install gcc-10 g++10 --yes
+    && apt-get install gcc-10 g++-10 --yes
 
 RUN rm /etc/apt/sources.list.d/proposed-repositories.list && apt-get update
 
diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile
index 4b7c2ae53a4..a3c87f13fe4 100644
--- a/docker/packager/deb/Dockerfile
+++ b/docker/packager/deb/Dockerfile
@@ -79,7 +79,7 @@ RUN apt-get update \
 RUN echo 'deb http://archive.ubuntu.com/ubuntu/ focal-proposed restricted main multiverse universe' > /etc/apt/sources.list.d/proposed-repositories.list
 
 RUN apt-get update \
-    && apt-get install gcc-10 g++10 --yes --no-install-recommends
+    && apt-get install gcc-10 g++-10 --yes --no-install-recommends
 
 RUN rm /etc/apt/sources.list.d/proposed-repositories.list && apt-get update
 

From 48f29ae11f83d0190edcfc4853f274ec89725bec Mon Sep 17 00:00:00 2001
From: Artem Zuikov <chertus@gmail.com>
Date: Wed, 9 Sep 2020 16:18:58 +0300
Subject: [PATCH 179/298] Fix bug in Decimal scale (#14603)

---
 src/Core/DecimalComparison.h                  |  2 +-
 src/DataTypes/DataTypeDecimalBase.h           | 39 ++++++++-----------
 src/Functions/FunctionBinaryArithmetic.h      | 23 ++++++-----
 .../01095_tpch_like_smoke.reference           |  2 +-
 .../01474_decimal_scale_bug.reference         | 18 +++++++++
 .../0_stateless/01474_decimal_scale_bug.sql   | 20 ++++++++++
 6 files changed, 67 insertions(+), 37 deletions(-)
 create mode 100644 tests/queries/0_stateless/01474_decimal_scale_bug.reference
 create mode 100644 tests/queries/0_stateless/01474_decimal_scale_bug.sql

diff --git a/src/Core/DecimalComparison.h b/src/Core/DecimalComparison.h
index 93992029634..b9ae2a1fe79 100644
--- a/src/Core/DecimalComparison.h
+++ b/src/Core/DecimalComparison.h
@@ -129,7 +129,7 @@ private:
         Shift shift;
         if (decimal0 && decimal1)
         {
-            auto result_type = decimalResultType(*decimal0, *decimal1, false, false);
+            auto result_type = decimalResultType<false, false>(*decimal0, *decimal1);
             shift.a = static_cast<CompareInt>(result_type.scaleFactorFor(*decimal0, false).value);
             shift.b = static_cast<CompareInt>(result_type.scaleFactorFor(*decimal1, false).value);
         }
diff --git a/src/DataTypes/DataTypeDecimalBase.h b/src/DataTypes/DataTypeDecimalBase.h
index 265d58d69e1..c5669ab735a 100644
--- a/src/DataTypes/DataTypeDecimalBase.h
+++ b/src/DataTypes/DataTypeDecimalBase.h
@@ -156,38 +156,31 @@ protected:
 };
 
 
-template <typename T, typename U, template <typename> typename DecimalType>
-typename std::enable_if_t<(sizeof(T) >= sizeof(U)), DecimalType<T>>
-inline decimalResultType(const DecimalType<T> & tx, const DecimalType<U> & ty, bool is_multiply, bool is_divide)
+template <bool is_multiply, bool is_division, typename T, typename U, template <typename> typename DecimalType>
+inline auto decimalResultType(const DecimalType<T> & tx, const DecimalType<U> & ty)
 {
-    UInt32 scale = (tx.getScale() > ty.getScale() ? tx.getScale() : ty.getScale());
-    if (is_multiply)
+    UInt32 scale{};
+    if constexpr (is_multiply)
         scale = tx.getScale() + ty.getScale();
-    else if (is_divide)
+    else if constexpr (is_division)
         scale = tx.getScale();
-    return DecimalType<T>(DecimalUtils::maxPrecision<T>(), scale);
+    else
+        scale = (tx.getScale() > ty.getScale() ? tx.getScale() : ty.getScale());
+
+    if constexpr (sizeof(T) < sizeof(U))
+        return DecimalType<U>(DecimalUtils::maxPrecision<U>(), scale);
+    else
+        return DecimalType<T>(DecimalUtils::maxPrecision<T>(), scale);
 }
 
-template <typename T, typename U, template <typename> typename DecimalType>
-typename std::enable_if_t<(sizeof(T) < sizeof(U)), const DecimalType<U>>
-inline decimalResultType(const DecimalType<T> & tx, const DecimalType<U> & ty, bool is_multiply, bool is_divide)
-{
-    UInt32 scale = (tx.getScale() > ty.getScale() ? tx.getScale() : ty.getScale());
-    if (is_multiply)
-        scale = tx.getScale() * ty.getScale();
-    else if (is_divide)
-        scale = tx.getScale();
-    return DecimalType<U>(DecimalUtils::maxPrecision<U>(), scale);
-}
-
-template <typename T, typename U, template <typename> typename DecimalType>
-inline const DecimalType<T> decimalResultType(const DecimalType<T> & tx, const DataTypeNumber<U> &, bool, bool)
+template <bool, bool, typename T, typename U, template <typename> typename DecimalType>
+inline const DecimalType<T> decimalResultType(const DecimalType<T> & tx, const DataTypeNumber<U> &)
 {
     return DecimalType<T>(DecimalUtils::maxPrecision<T>(), tx.getScale());
 }
 
-template <typename T, typename U, template <typename> typename DecimalType>
-inline const DecimalType<U> decimalResultType(const DataTypeNumber<T> &, const DecimalType<U> & ty, bool, bool)
+template <bool, bool, typename T, typename U, template <typename> typename DecimalType>
+inline const DecimalType<U> decimalResultType(const DataTypeNumber<T> &, const DecimalType<U> & ty)
 {
     return DecimalType<U>(DecimalUtils::maxPrecision<U>(), ty.getScale());
 }
diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h
index d899a95ddc6..15b6ea6ca5d 100644
--- a/src/Functions/FunctionBinaryArithmetic.h
+++ b/src/Functions/FunctionBinaryArithmetic.h
@@ -561,6 +561,9 @@ public:
 template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true>
 class FunctionBinaryArithmetic : public IFunction
 {
+    static constexpr const bool is_multiply = IsOperation<Op>::multiply;
+    static constexpr const bool is_division = IsOperation<Op>::division;
+
     const Context & context;
     bool check_decimal_overflow = true;
 
@@ -858,7 +861,7 @@ public:
                     return false;
                 else if constexpr (std::is_same_v<LeftDataType, RightDataType>)
                 {
-                   if (left.getN() == right.getN())
+                    if (left.getN() == right.getN())
                     {
                         type_res = std::make_shared<LeftDataType>(left.getN());
                         return true;
@@ -872,10 +875,7 @@ public:
                 {
                     if constexpr (IsDataTypeDecimal<LeftDataType> && IsDataTypeDecimal<RightDataType>)
                     {
-                        constexpr bool is_multiply = IsOperation<Op>::multiply;
-                        constexpr bool is_division = IsOperation<Op>::division;
-
-                        ResultDataType result_type = decimalResultType(left, right, is_multiply, is_division);
+                        ResultDataType result_type = decimalResultType<is_multiply, is_division>(left, right);
                         type_res = std::make_shared<ResultDataType>(result_type.getPrecision(), result_type.getScale());
                     }
                     else if constexpr (IsDataTypeDecimal<LeftDataType>)
@@ -899,7 +899,7 @@ public:
                         type_res = std::make_shared<ResultDataType>();
                     return true;
                 }
-           }
+            }
             return false;
         });
         if (!valid)
@@ -995,8 +995,6 @@ public:
         if constexpr (!std::is_same_v<ResultDataType, InvalidType>)
         {
             constexpr bool result_is_decimal = IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>;
-            constexpr bool is_multiply = IsOperation<Op>::multiply;
-            constexpr bool is_division = IsOperation<Op>::division;
 
             using T0 = typename LeftDataType::FieldType;
             using T1 = typename RightDataType::FieldType;
@@ -1019,7 +1017,7 @@ public:
                     /// the only case with a non-vector result
                     if constexpr (result_is_decimal)
                     {
-                        ResultDataType type = decimalResultType(left, right, is_multiply, is_division);
+                        ResultDataType type = decimalResultType<is_multiply, is_division>(left, right);
                         typename ResultDataType::FieldType scale_a = type.scaleFactorFor(left, is_multiply);
                         typename ResultDataType::FieldType scale_b = type.scaleFactorFor(right, is_multiply || is_division);
                         if constexpr (IsDataTypeDecimal<RightDataType> && is_division)
@@ -1044,7 +1042,7 @@ public:
             typename ColVecResult::MutablePtr col_res = nullptr;
             if constexpr (result_is_decimal)
             {
-                ResultDataType type = decimalResultType(left, right, is_multiply, is_division);
+                ResultDataType type = decimalResultType<is_multiply, is_division>(left, right);
                 col_res = ColVecResult::create(0, type.getScale());
             }
             else
@@ -1059,7 +1057,7 @@ public:
                 {
                     if constexpr (result_is_decimal)
                     {
-                        ResultDataType type = decimalResultType(left, right, is_multiply, is_division);
+                        ResultDataType type = decimalResultType<is_multiply, is_division>(left, right);
 
                         typename ResultDataType::FieldType scale_a = type.scaleFactorFor(left, is_multiply);
                         typename ResultDataType::FieldType scale_b = type.scaleFactorFor(right, is_multiply || is_division);
@@ -1079,12 +1077,13 @@ public:
             {
                 if constexpr (result_is_decimal)
                 {
-                    ResultDataType type = decimalResultType(left, right, is_multiply, is_division);
+                    ResultDataType type = decimalResultType<is_multiply, is_division>(left, right);
 
                     typename ResultDataType::FieldType scale_a = type.scaleFactorFor(left, is_multiply);
                     typename ResultDataType::FieldType scale_b = type.scaleFactorFor(right, is_multiply || is_division);
                     if constexpr (IsDataTypeDecimal<RightDataType> && is_division)
                         scale_a = right.getScaleMultiplier();
+
                     if (auto col_right = checkAndGetColumn<ColVecT1>(col_right_raw))
                     {
                         OpImpl::vectorVector(col_left->getData(), col_right->getData(), vec_res, scale_a, scale_b,
diff --git a/tests/queries/0_stateless/01095_tpch_like_smoke.reference b/tests/queries/0_stateless/01095_tpch_like_smoke.reference
index 58d6ef4a34c..e47b402bf9f 100644
--- a/tests/queries/0_stateless/01095_tpch_like_smoke.reference
+++ b/tests/queries/0_stateless/01095_tpch_like_smoke.reference
@@ -13,7 +13,7 @@
 12
 13	fail: join predicates
 14
-0.00000000
+0.000000
 15	fail: correlated subquery
 16
 17	fail: correlated subquery
diff --git a/tests/queries/0_stateless/01474_decimal_scale_bug.reference b/tests/queries/0_stateless/01474_decimal_scale_bug.reference
new file mode 100644
index 00000000000..69c14de597f
--- /dev/null
+++ b/tests/queries/0_stateless/01474_decimal_scale_bug.reference
@@ -0,0 +1,18 @@
+1.000	Decimal(9, 3)
+1.000	Decimal(9, 3)
+1.0000	Decimal(18, 4)
+1.0000	Decimal(18, 4)
+1.00000	Decimal(38, 5)
+1.00000	Decimal(38, 5)
+1.000	Decimal(18, 3)
+1.000	Decimal(18, 3)
+1.0000	Decimal(18, 4)
+1.0000	Decimal(18, 4)
+1.00000	Decimal(38, 5)
+1.00000	Decimal(38, 5)
+1.000	Decimal(38, 3)
+1.000	Decimal(38, 3)
+1.0000	Decimal(38, 4)
+1.0000	Decimal(38, 4)
+1.00000	Decimal(38, 5)
+1.00000	Decimal(38, 5)
diff --git a/tests/queries/0_stateless/01474_decimal_scale_bug.sql b/tests/queries/0_stateless/01474_decimal_scale_bug.sql
new file mode 100644
index 00000000000..0fdeb3fb0b4
--- /dev/null
+++ b/tests/queries/0_stateless/01474_decimal_scale_bug.sql
@@ -0,0 +1,20 @@
+SELECT toDecimal32(1, 2) * toDecimal32(1, 1) x, toTypeName(x);
+SELECT toDecimal32(1, 1) * toDecimal32(1, 2) x, toTypeName(x);
+SELECT toDecimal32(1, 3) * toDecimal64(1, 1) x, toTypeName(x);
+SELECT toDecimal32(1, 1) * toDecimal64(1, 3) x, toTypeName(x);
+SELECT toDecimal32(1, 2) * toDecimal128(1, 3) x, toTypeName(x);
+SELECT toDecimal32(1, 3) * toDecimal128(1, 2) x, toTypeName(x);
+
+SELECT toDecimal64(1, 2) * toDecimal32(1, 1) x, toTypeName(x);
+SELECT toDecimal64(1, 1) * toDecimal32(1, 2) x, toTypeName(x);
+SELECT toDecimal64(1, 3) * toDecimal64(1, 1) x, toTypeName(x);
+SELECT toDecimal64(1, 1) * toDecimal64(1, 3) x, toTypeName(x);
+SELECT toDecimal64(1, 2) * toDecimal128(1, 3) x, toTypeName(x);
+SELECT toDecimal64(1, 3) * toDecimal128(1, 2) x, toTypeName(x);
+
+SELECT toDecimal128(1, 2) * toDecimal32(1, 1) x, toTypeName(x);
+SELECT toDecimal128(1, 1) * toDecimal32(1, 2) x, toTypeName(x);
+SELECT toDecimal128(1, 3) * toDecimal64(1, 1) x, toTypeName(x);
+SELECT toDecimal128(1, 1) * toDecimal64(1, 3) x, toTypeName(x);
+SELECT toDecimal128(1, 2) * toDecimal128(1, 3) x, toTypeName(x);
+SELECT toDecimal128(1, 3) * toDecimal128(1, 2) x, toTypeName(x);

From b68782d285e5ea76f7318b55bf41cf337dfa71fc Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Wed, 9 Sep 2020 16:32:50 +0300
Subject: [PATCH 180/298] enable more tests with Atomic database

---
 docker/test/stress/stress                     |  2 +-
 programs/client/Client.cpp                    | 28 +++++++-
 src/Interpreters/DatabaseCatalog.cpp          |  5 +-
 src/Interpreters/InterpreterCreateQuery.cpp   |  1 +
 .../MergeTree/MergeTreeWriteAheadLog.cpp      |  1 +
 src/Storages/StorageReplicatedMergeTree.cpp   | 16 +++--
 src/Storages/System/StorageSystemTables.cpp   |  6 ++
 .../queries/0_stateless/00116_storage_set.sql |  2 +-
 .../00180_attach_materialized_view.sql        |  2 +-
 ...per_deduplication_and_unexpected_parts.sql |  2 +-
 .../00281_compile_sizeof_packed.re            |  0
 .../0_stateless/00311_array_primary_key.sql   |  2 +-
 .../00423_storage_log_single_thread.sql       |  6 +-
 .../00816_long_concurrent_alter_column.sh     | 27 +++++---
 .../01190_full_attach_syntax.reference        | 13 ++++
 .../0_stateless/01190_full_attach_syntax.sql  | 66 +++++++++++++++++++
 .../01305_replica_create_drop_zookeeper.sh    | 20 ++++--
 .../00065_loyalty_with_storage_join.sql       |  2 +-
 tests/queries/skip_list.json                  | 33 +---------
 19 files changed, 172 insertions(+), 62 deletions(-)
 delete mode 100644 tests/queries/0_stateless/00281_compile_sizeof_packed.re
 create mode 100644 tests/queries/0_stateless/01190_full_attach_syntax.reference
 create mode 100644 tests/queries/0_stateless/01190_full_attach_syntax.sql

diff --git a/docker/test/stress/stress b/docker/test/stress/stress
index e8675da1546..60db5ec465c 100755
--- a/docker/test/stress/stress
+++ b/docker/test/stress/stress
@@ -28,7 +28,7 @@ def get_options(i):
     options = ""
     if 0 < i:
         options += " --order=random"
-    if i == 1:
+    if i % 2 == 1:
         options += " --atomic-db-engine"
     return options
 
diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index c9701950dc5..83e4062b1f3 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -919,7 +919,33 @@ private:
         while (begin < end)
         {
             const char * pos = begin;
-            ASTPtr orig_ast = parseQuery(pos, end, true);
+
+            ASTPtr orig_ast;
+            try
+            {
+                orig_ast = parseQuery(pos, end, true);
+            }
+            catch (Exception & e)
+            {
+                if (!test_mode)
+                    throw;
+
+                /// Try find test hint for syntax error
+                const char * end_of_line = find_first_symbols<'\n'>(begin, end);
+                TestHint hint(true, String(begin, end_of_line - begin));
+                if (hint.serverError()) /// Syntax errors are considered as client errors
+                    throw;
+                if (hint.clientError() != e.code())
+                {
+                    if (hint.clientError())
+                        e.addMessage("\nExpected clinet error: " + std::to_string(hint.clientError()));
+                    throw;
+                }
+
+                /// It's expected syntax error, skip the line
+                begin = end_of_line;
+                continue;
+            }
 
             if (!orig_ast)
             {
diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp
index 6153f6b52fb..049341918b9 100644
--- a/src/Interpreters/DatabaseCatalog.cpp
+++ b/src/Interpreters/DatabaseCatalog.cpp
@@ -657,7 +657,10 @@ void DatabaseCatalog::enqueueDroppedTableCleanup(StorageID table_id, StoragePtr
     /// Table was removed from database. Enqueue removal of its data from disk.
     time_t drop_time;
     if (table)
+    {
         drop_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
+        table->is_dropped = true;
+    }
     else
     {
         /// Try load table from metadata to drop it correctly (e.g. remove metadata from zk or remove data from all volumes)
@@ -674,6 +677,7 @@ void DatabaseCatalog::enqueueDroppedTableCleanup(StorageID table_id, StoragePtr
             try
             {
                 table = createTableFromAST(*create, table_id.getDatabaseName(), data_path, *global_context, false).second;
+                table->is_dropped = true;
             }
             catch (...)
             {
@@ -763,7 +767,6 @@ void DatabaseCatalog::dropTableFinally(const TableMarkedAsDropped & table) const
     if (table.table)
     {
         table.table->drop();
-        table.table->is_dropped = true;
     }
 
     /// Even if table is not loaded, try remove its data from disk.
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 06973ab029b..d7230940bb2 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -673,6 +673,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
         create.attach_short_syntax = true;
         create.if_not_exists = if_not_exists;
     }
+    /// TODO maybe assert table structure if create.attach_short_syntax is false?
 
     if (!create.temporary && create.database.empty())
         create.database = current_database;
diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp
index 53ef72f3208..3fa3a7e3e40 100644
--- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp
+++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp
@@ -66,6 +66,7 @@ void MergeTreeWriteAheadLog::dropPart(const String & part_name)
     writeIntBinary(static_cast<UInt8>(0), *out);
     writeIntBinary(static_cast<UInt8>(ActionType::DROP_PART), *out);
     writeStringBinary(part_name, *out);
+    out->next();
 }
 
 void MergeTreeWriteAheadLog::rotate(const std::lock_guard<std::mutex> &)
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 6058632d220..6458fe127da 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -4260,9 +4260,13 @@ bool StorageReplicatedMergeTree::waitForReplicaToProcessLogEntry(
       * To do this, check its node `log_pointer` - the maximum number of the element taken from `log` + 1.
       */
 
-    const auto & check_replica_become_inactive = [this, &replica]()
+    bool waiting_itself = replica == replica_name;
+
+    const auto & stop_waiting = [&]()
     {
-        return !getZooKeeper()->exists(zookeeper_path + "/replicas/" + replica + "/is_active");
+        bool stop_waiting_itself = waiting_itself && is_dropped;
+        bool stop_waiting_non_active = !wait_for_non_active && !getZooKeeper()->exists(zookeeper_path + "/replicas/" + replica + "/is_active");
+        return stop_waiting_itself || stop_waiting_non_active;
     };
     constexpr auto event_wait_timeout_ms = 1000;
 
@@ -4277,7 +4281,7 @@ bool StorageReplicatedMergeTree::waitForReplicaToProcessLogEntry(
         LOG_DEBUG(log, "Waiting for {} to pull {} to queue", replica, log_node_name);
 
         /// Let's wait until entry gets into the replica queue.
-        while (wait_for_non_active || !check_replica_become_inactive())
+        while (!stop_waiting())
         {
             zkutil::EventPtr event = std::make_shared<Poco::Event>();
 
@@ -4325,7 +4329,7 @@ bool StorageReplicatedMergeTree::waitForReplicaToProcessLogEntry(
             LOG_DEBUG(log, "Waiting for {} to pull {} to queue", replica, log_node_name);
 
             /// Let's wait until the entry gets into the replica queue.
-            while (wait_for_non_active || !check_replica_become_inactive())
+            while (!stop_waiting())
             {
                 zkutil::EventPtr event = std::make_shared<Poco::Event>();
 
@@ -4378,10 +4382,8 @@ bool StorageReplicatedMergeTree::waitForReplicaToProcessLogEntry(
 
     /// Third - wait until the entry disappears from the replica queue or replica become inactive.
     String path_to_wait_on = zookeeper_path + "/replicas/" + replica + "/queue/" + queue_entry_to_wait_for;
-    if (wait_for_non_active)
-        return getZooKeeper()->waitForDisappear(path_to_wait_on);
 
-    return getZooKeeper()->waitForDisappear(path_to_wait_on, check_replica_become_inactive);
+    return getZooKeeper()->waitForDisappear(path_to_wait_on, stop_waiting);
 }
 
 
diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp
index 5b7dad836e9..0ad961ad7d8 100644
--- a/src/Storages/System/StorageSystemTables.cpp
+++ b/src/Storages/System/StorageSystemTables.cpp
@@ -344,6 +344,12 @@ protected:
                 {
                     ASTPtr ast = database->tryGetCreateTableQuery(table_name, context);
 
+                    if (ast && !context.getSettingsRef().show_table_uuid_in_table_create_query_if_not_nil)
+                    {
+                        auto & create = ast->as<ASTCreateQuery &>();
+                        create.uuid = UUIDHelpers::Nil;
+                    }
+
                     if (columns_mask[src_index++])
                         res_columns[res_index++]->insert(ast ? queryToString(ast) : "");
 
diff --git a/tests/queries/0_stateless/00116_storage_set.sql b/tests/queries/0_stateless/00116_storage_set.sql
index aa93a0620d0..0eeed7e859a 100644
--- a/tests/queries/0_stateless/00116_storage_set.sql
+++ b/tests/queries/0_stateless/00116_storage_set.sql
@@ -19,7 +19,7 @@ INSERT INTO set2 VALUES ('abc'), ('World');
 SELECT arrayJoin(['Hello', 'test', 'World', 'world', 'abc', 'xyz']) AS s WHERE s IN set2;
 
 DETACH TABLE set2;
-ATTACH TABLE set2 (x String) ENGINE = Set;
+ATTACH TABLE set2;
 
 SELECT arrayJoin(['Hello', 'test', 'World', 'world', 'abc', 'xyz']) AS s WHERE s IN set2;
 
diff --git a/tests/queries/0_stateless/00180_attach_materialized_view.sql b/tests/queries/0_stateless/00180_attach_materialized_view.sql
index 089e4926bcf..d674c0bd277 100644
--- a/tests/queries/0_stateless/00180_attach_materialized_view.sql
+++ b/tests/queries/0_stateless/00180_attach_materialized_view.sql
@@ -6,7 +6,7 @@ CREATE TABLE t_00180 (x UInt8) ENGINE = Null;
 CREATE MATERIALIZED VIEW mv_00180 ENGINE = Null AS SELECT * FROM t_00180;
 
 DETACH TABLE mv_00180;
-ATTACH MATERIALIZED VIEW mv_00180 ENGINE = Null AS SELECT * FROM t_00180;
+ATTACH TABLE mv_00180;
 
 DROP TABLE t_00180;
 DROP TABLE mv_00180;
diff --git a/tests/queries/0_stateless/00226_zookeeper_deduplication_and_unexpected_parts.sql b/tests/queries/0_stateless/00226_zookeeper_deduplication_and_unexpected_parts.sql
index 623218af167..c14ce53d4a3 100644
--- a/tests/queries/0_stateless/00226_zookeeper_deduplication_and_unexpected_parts.sql
+++ b/tests/queries/0_stateless/00226_zookeeper_deduplication_and_unexpected_parts.sql
@@ -21,7 +21,7 @@ INSERT INTO deduplication (x) VALUES (1);
 SELECT * FROM deduplication;
 
 DETACH TABLE deduplication;
-ATTACH TABLE deduplication (d Date DEFAULT '2015-01-01', x Int8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_00226/deduplication', 'r1', d, x, 1);
+ATTACH TABLE deduplication;
 
 SELECT * FROM deduplication;
 
diff --git a/tests/queries/0_stateless/00281_compile_sizeof_packed.re b/tests/queries/0_stateless/00281_compile_sizeof_packed.re
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/queries/0_stateless/00311_array_primary_key.sql b/tests/queries/0_stateless/00311_array_primary_key.sql
index 0ea368609da..0e066c64f89 100644
--- a/tests/queries/0_stateless/00311_array_primary_key.sql
+++ b/tests/queries/0_stateless/00311_array_primary_key.sql
@@ -11,7 +11,7 @@ INSERT INTO array_pk VALUES ([5, 6], 'ghi', 6);
 SELECT * FROM array_pk ORDER BY n;
 
 DETACH TABLE array_pk;
-ATTACH TABLE array_pk (key Array(UInt8), s String, n UInt64, d Date MATERIALIZED '2000-01-01') ENGINE = MergeTree(d, (key, s, n), 1);
+ATTACH TABLE array_pk;
 
 SELECT * FROM array_pk ORDER BY n;
 
diff --git a/tests/queries/0_stateless/00423_storage_log_single_thread.sql b/tests/queries/0_stateless/00423_storage_log_single_thread.sql
index 7d5e14c9ee5..8eff9323564 100644
--- a/tests/queries/0_stateless/00423_storage_log_single_thread.sql
+++ b/tests/queries/0_stateless/00423_storage_log_single_thread.sql
@@ -5,7 +5,7 @@ SELECT * FROM log LIMIT 1;
 SELECT * FROM log;
 
 DETACH TABLE log;
-ATTACH TABLE log (s String) ENGINE = Log;
+ATTACH TABLE log;
 
 SELECT * FROM log;
 SELECT * FROM log LIMIT 1;
@@ -15,13 +15,13 @@ INSERT INTO log VALUES ('Hello'), ('World');
 SELECT * FROM log LIMIT 1;
 
 DETACH TABLE log;
-ATTACH TABLE log (s String) ENGINE = Log;
+ATTACH TABLE log;
 
 SELECT * FROM log LIMIT 1;
 SELECT * FROM log;
 
 DETACH TABLE log;
-ATTACH TABLE log (s String) ENGINE = Log;
+ATTACH TABLE log;
 
 SELECT * FROM log;
 SELECT * FROM log LIMIT 1;
diff --git a/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh b/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh
index 93421e003f6..8fdd6654bae 100755
--- a/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh
+++ b/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh
@@ -11,34 +11,34 @@ echo "CREATE TABLE concurrent_alter_column (ts DATETIME) ENGINE = MergeTree PART
 function thread1()
 {
     while true; do
-        for i in {1..500}; do echo "ALTER TABLE concurrent_alter_column ADD COLUMN c$i DOUBLE;"; done | ${CLICKHOUSE_CLIENT} -n --query_id=alter1
+        for i in {1..500}; do echo "ALTER TABLE concurrent_alter_column ADD COLUMN c$i DOUBLE;"; done | ${CLICKHOUSE_CLIENT} -n --query_id=alter_00816_1
     done
 }
 
 function thread2()
 {
     while true; do
-        echo "ALTER TABLE concurrent_alter_column ADD COLUMN d DOUBLE" | ${CLICKHOUSE_CLIENT} --query_id=alter2;
+        echo "ALTER TABLE concurrent_alter_column ADD COLUMN d DOUBLE" | ${CLICKHOUSE_CLIENT} --query_id=alter_00816_2;
         sleep "$(echo 0.0$RANDOM)";
-        echo "ALTER TABLE concurrent_alter_column DROP COLUMN d" | ${CLICKHOUSE_CLIENT} --query_id=alter2;
+        echo "ALTER TABLE concurrent_alter_column DROP COLUMN d" | ${CLICKHOUSE_CLIENT} --query_id=alter_00816_2;
     done
 }
 
 function thread3()
 {
     while true; do
-        echo "ALTER TABLE concurrent_alter_column ADD COLUMN e DOUBLE" | ${CLICKHOUSE_CLIENT} --query_id=alter3;
+        echo "ALTER TABLE concurrent_alter_column ADD COLUMN e DOUBLE" | ${CLICKHOUSE_CLIENT} --query_id=alter_00816_3;
         sleep "$(echo 0.0$RANDOM)";
-        echo "ALTER TABLE concurrent_alter_column DROP COLUMN e" | ${CLICKHOUSE_CLIENT} --query_id=alter3;
+        echo "ALTER TABLE concurrent_alter_column DROP COLUMN e" | ${CLICKHOUSE_CLIENT} --query_id=alter_00816_3;
     done
 }
 
 function thread4()
 {
     while true; do
-        echo "ALTER TABLE concurrent_alter_column ADD COLUMN f DOUBLE" | ${CLICKHOUSE_CLIENT} --query_id=alter4;
+        echo "ALTER TABLE concurrent_alter_column ADD COLUMN f DOUBLE" | ${CLICKHOUSE_CLIENT} --query_id=alter_00816_4;
         sleep "$(echo 0.0$RANDOM)";
-        echo "ALTER TABLE concurrent_alter_column DROP COLUMN f" | ${CLICKHOUSE_CLIENT} --query_id=alter4;
+        echo "ALTER TABLE concurrent_alter_column DROP COLUMN f" | ${CLICKHOUSE_CLIENT} --query_id=alter_00816_4;
     done
 }
 
@@ -57,9 +57,18 @@ timeout $TIMEOUT bash -c thread4 2> /dev/null &
 
 wait
 
-echo "DROP TABLE concurrent_alter_column" | ${CLICKHOUSE_CLIENT}
+echo "DROP TABLE concurrent_alter_column NO DELAY" | ${CLICKHOUSE_CLIENT}   # NO DELAY has effect only for Atomic database
+
+db_engine=`$CLICKHOUSE_CLIENT -q "SELECT engine FROM system.databases WHERE name=currentDatabase()"`
+if [[ $db_engine == "Atomic" ]]; then
+    # DROP is non-blocking, so wait for alters
+    while true; do
+        $CLICKHOUSE_CLIENT -q "SELECT c = 0 FROM (SELECT count() as c FROM system.processes WHERE query_id LIKE 'alter_00816_%')" | grep 1 > /dev/null && break;
+        sleep 1;
+    done
+fi
 
 # Check for deadlocks
-echo "SELECT * FROM system.processes WHERE query_id LIKE 'alter%'" | ${CLICKHOUSE_CLIENT}
+echo "SELECT * FROM system.processes WHERE query_id LIKE 'alter_00816_%'" | ${CLICKHOUSE_CLIENT}
 
 echo 'did not crash'
diff --git a/tests/queries/0_stateless/01190_full_attach_syntax.reference b/tests/queries/0_stateless/01190_full_attach_syntax.reference
new file mode 100644
index 00000000000..619861849c8
--- /dev/null
+++ b/tests/queries/0_stateless/01190_full_attach_syntax.reference
@@ -0,0 +1,13 @@
+CREATE DICTIONARY test_01190.dict\n(\n    `key` UInt64 DEFAULT 0,\n    `col` UInt8 DEFAULT 1\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'test_01190\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT())
+CREATE DICTIONARY test_01190.dict\n(\n    `key` UInt64 DEFAULT 0,\n    `col` UInt8 DEFAULT 1\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'test_01190\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT())
+CREATE TABLE default.log\n(\n    `s` String\n)\nENGINE = Log
+CREATE TABLE default.log\n(\n    `s` String\n)\nENGINE = Log()
+test
+CREATE TABLE default.mt\n(\n    `key` Array(UInt8),\n    `s` String,\n    `n` UInt64,\n    `d` Date MATERIALIZED \'2000-01-01\'\n)\nENGINE = MergeTree(d, (key, s, n), 1)
+[1,2]	Hello	2
+CREATE TABLE default.mt\n(\n    `key` Array(UInt8),\n    `s` String,\n    `n` UInt64,\n    `d` Date\n)\nENGINE = MergeTree(d, (key, s, n), 1)
+CREATE MATERIALIZED VIEW default.mv\n(\n    `s` String\n)\nENGINE = Null AS\nSELECT *\nFROM default.log
+CREATE MATERIALIZED VIEW default.mv\n(\n    `s` String\n)\nENGINE = Null AS\nSELECT *\nFROM default.log
+CREATE MATERIALIZED VIEW default.mv\n(\n    `key` Array(UInt8),\n    `s` String,\n    `n` UInt64,\n    `d` Date\n)\nENGINE = Null AS\nSELECT *\nFROM default.mt
+CREATE LIVE VIEW default.lv\n(\n    `1` UInt8\n) AS\nSELECT 1
+CREATE LIVE VIEW default.lv\n(\n    `1` UInt8\n) AS\nSELECT 1
diff --git a/tests/queries/0_stateless/01190_full_attach_syntax.sql b/tests/queries/0_stateless/01190_full_attach_syntax.sql
new file mode 100644
index 00000000000..3a91eccc8cd
--- /dev/null
+++ b/tests/queries/0_stateless/01190_full_attach_syntax.sql
@@ -0,0 +1,66 @@
+DROP DATABASE IF EXISTS test_01190;
+CREATE DATABASE test_01190;
+
+CREATE TABLE test_01190.table_for_dict (key UInt64, col UInt8) ENGINE = Memory;
+
+CREATE DICTIONARY test_01190.dict (key UInt64 DEFAULT 0, col UInt8 DEFAULT 1) PRIMARY KEY key SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'test_01190')) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT());
+
+SHOW CREATE DICTIONARY test_01190.dict;
+
+DETACH DICTIONARY test_01190.dict;
+ATTACH TABLE test_01190.dict; -- { serverError 80 }
+-- Full ATTACH syntax is not allowed for dictionaries
+ATTACH DICTIONARY test_01190.dict (key UInt64 DEFAULT 0, col UInt8 DEFAULT 42) PRIMARY KEY key SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'test_01190')) LIFETIME(MIN 1 MAX 100) LAYOUT(FLAT()); -- { clientError 62 }
+ATTACH DICTIONARY test_01190.dict;
+SHOW CREATE DICTIONARY test_01190.dict;
+
+DROP DATABASE test_01190;
+
+
+DROP TABLE IF EXISTS log;
+DROP TABLE IF EXISTS mt;
+DROP TABLE IF EXISTS mv;
+DROP TABLE IF EXISTS lv;
+
+CREATE TABLE log ENGINE = Log AS SELECT 'test' AS s;
+SHOW CREATE log;
+DETACH TABLE log;
+ATTACH DICTIONARY log; -- { serverError 487 }
+ATTACH TABLE log (s String) ENGINE = Log();
+SHOW CREATE log;
+SELECT * FROM log;
+
+DROP TABLE IF EXISTS mt;
+CREATE TABLE mt (key Array(UInt8), s String, n UInt64, d Date MATERIALIZED '2000-01-01') ENGINE = MergeTree(d, (key, s, n), 1);
+INSERT INTO mt VALUES ([1, 2], 'Hello', 2);
+DETACH TABLE mt;
+ATTACH TABLE mt (key Array(UInt8), s String, n UInt64, d Date MATERIALIZED '2000-01-01') ENGINE = MergeTree ORDER BY (key, s, n) PARTITION BY toYYYYMM(d); -- { serverError 342 }
+ATTACH TABLE mt (key Array(UInt8), s String, n UInt64, d Date MATERIALIZED '2000-01-01') ENGINE = MergeTree(d, (key, s, n), 1);
+SHOW CREATE mt;
+SELECT * FROM mt;
+DETACH TABLE mt;
+ATTACH TABLE mt (key Array(UInt8), s String, n UInt64, d Date) ENGINE = MergeTree(d, (key, s, n), 1);   -- It works (with Ordinary database), but probably it shouldn't
+SHOW CREATE mt;
+
+CREATE MATERIALIZED VIEW mv ENGINE = Null AS SELECT * FROM log;
+SHOW CREATE mv;
+DETACH VIEW mv;
+ATTACH MATERIALIZED VIEW mv ENGINE = Null AS SELECT * FROM log;
+SHOW CREATE mv;
+DETACH VIEW mv;
+ATTACH MATERIALIZED VIEW mv ENGINE = Null AS SELECT * FROM mt;  -- It works (with Ordinary database), but probably it shouldn't
+SHOW CREATE mv;
+
+SET allow_experimental_live_view = 1;
+CREATE LIVE VIEW lv AS SELECT 1;
+SHOW CREATE lv;
+DETACH VIEW lv;
+ATTACH LIVE VIEW lv AS SELECT 1;
+SHOW CREATE lv;
+
+DROP TABLE log;
+DROP TABLE mt;
+DROP TABLE mv;
+DROP TABLE lv;
+
+
diff --git a/tests/queries/0_stateless/01305_replica_create_drop_zookeeper.sh b/tests/queries/0_stateless/01305_replica_create_drop_zookeeper.sh
index 0a47c6df46c..1313830d589 100755
--- a/tests/queries/0_stateless/01305_replica_create_drop_zookeeper.sh
+++ b/tests/queries/0_stateless/01305_replica_create_drop_zookeeper.sh
@@ -7,11 +7,21 @@ set -e
 
 function thread()
 {
-    while true; do
-        $CLICKHOUSE_CLIENT -n -q "DROP TABLE IF EXISTS test_table_$1;
-            CREATE TABLE test_table_$1 (a UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_01305/alter_table', 'r_$1') ORDER BY tuple();" 2>&1 |
-                grep -vP '(^$)|(^Received exception from server)|(^\d+\. )|because the last replica of the table was dropped right now|is already started to be removing by another replica right now|is already finished removing by another replica right now|Removing leftovers from table|Another replica was suddenly created|was successfully removed from ZooKeeper|was created by another server at the same moment|was suddenly removed|some other replicas were created at the same time'
-        done
+    db_engine=`$CLICKHOUSE_CLIENT -q "SELECT engine FROM system.databases WHERE name=currentDatabase()"`
+    if [[ $db_engine == "Atomic" ]]; then
+        # Ignore "Replica already exists" exception
+        while true; do
+            $CLICKHOUSE_CLIENT -n -q "DROP TABLE IF EXISTS test_table_$1 NO DELAY;
+                CREATE TABLE test_table_$1 (a UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_01305/alter_table', 'r_$1') ORDER BY tuple();" 2>&1 |
+                    grep -vP '(^$)|(^Received exception from server)|(^\d+\. )|because the last replica of the table was dropped right now|is already started to be removing by another replica right now|is already finished removing by another replica right now|Removing leftovers from table|Another replica was suddenly created|was successfully removed from ZooKeeper|was created by another server at the same moment|was suddenly removed|some other replicas were created at the same time|already exists'
+            done
+    else
+        while true; do
+            $CLICKHOUSE_CLIENT -n -q "DROP TABLE IF EXISTS test_table_$1;
+                CREATE TABLE test_table_$1 (a UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_01305/alter_table', 'r_$1') ORDER BY tuple();" 2>&1 |
+                    grep -vP '(^$)|(^Received exception from server)|(^\d+\. )|because the last replica of the table was dropped right now|is already started to be removing by another replica right now|is already finished removing by another replica right now|Removing leftovers from table|Another replica was suddenly created|was successfully removed from ZooKeeper|was created by another server at the same moment|was suddenly removed|some other replicas were created at the same time'
+            done
+    fi
 }
 
 
diff --git a/tests/queries/1_stateful/00065_loyalty_with_storage_join.sql b/tests/queries/1_stateful/00065_loyalty_with_storage_join.sql
index 15a2a75cf58..515a2410583 100644
--- a/tests/queries/1_stateful/00065_loyalty_with_storage_join.sql
+++ b/tests/queries/1_stateful/00065_loyalty_with_storage_join.sql
@@ -22,7 +22,7 @@ GROUP BY loyalty
 ORDER BY loyalty ASC;
 
 DETACH TABLE join;
-ATTACH TABLE join (UserID UInt64, loyalty Int8) ENGINE = Join(SEMI, LEFT, UserID);
+ATTACH TABLE join;
 
 SELECT
     loyalty,
diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index adfc5f0e582..efd622402b2 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -3,10 +3,8 @@
   */
 {
     "thread-sanitizer": [
-        "00281",
         "00877",
         "00985",
-        "avx2",
         "query_profiler",
         "memory_profiler",
         /// 01083 and 00505 and 00505 are critical and temproray disabled
@@ -21,9 +19,7 @@
         "01193_metadata_loading"
     ],
     "address-sanitizer": [
-        "00281",
         "00877",
-        "avx2",
         "query_profiler",
         "memory_profiler",
         "odbc_roundtrip",
@@ -31,9 +27,7 @@
         "01193_metadata_loading"
     ],
     "ub-sanitizer": [
-        "00281",
         "capnproto",
-        "avx2",
         "query_profiler",
         "memory_profiler",
         "01103_check_cpu_instructions_at_startup",
@@ -41,9 +35,7 @@
         "01193_metadata_loading"
     ],
     "memory-sanitizer": [
-        "00281",
         "capnproto",
-        "avx2",
         "query_profiler",
         "memory_profiler",
         "01103_check_cpu_instructions_at_startup",
@@ -53,8 +45,6 @@
         "01193_metadata_loading"
     ],
     "debug-build": [
-        "00281",
-        "avx2",
         "query_profiler",
         "memory_profiler",
         "00899_long_attach",
@@ -70,12 +60,10 @@
     ],
     "unbundled-build": [
         "00429",
-        "00428",
         "00877",
         "pocopatch",
         "parquet",
         "xxhash",
-        "avx2",
         "_h3",
         "query_profiler",
         "memory_profiler",
@@ -98,33 +86,19 @@
         "01455_time_zones"
     ],
     "release-build": [
-        "avx2"
     ],
     "database-atomic": [
-        "00065_loyalty_with_storage_join",
-        "avx",
         /// Inner tables of materialized views have different names
         "00738_lock_for_inner_table",
-        "00699_materialized_view_mutations",
         "00609_mv_index_in_in",
         "00510_materizlized_view_and_deduplication_zookeeper",
-        /// Create queries contain UUID
+        /// Different database engine
         "00604_show_create_database",
-        "00080_show_tables_and_system_tables",
-        "01272_suspicious_codecs",
         /// UUID must be specified in ATTACH TABLE
-        "01249_bad_arguments_for_bloom_filter",
-        "00423_storage_log_single_thread",
-        "00311_array_primary_key",
-        "00226_zookeeper_deduplication_and_unexpected_parts",
-        "00180_attach_materialized_view",
-        "00116_storage_set",
+        "01190_full_attach_syntax",
         /// Assumes blocking DROP
-        "00816_long_concurrent_alter_column",
-        "00992_system_parts_race_condition_zookeeper", /// FIXME
         "01320_create_sync_race_condition",
-        "01305_replica_create_drop_zookeeper",
-        "01130_in_memory_parts_partitons",
+        /// Internal distionary name is different
         "01225_show_create_table_from_dictionary",
         "01224_no_superfluous_dict_reload"
     ],
@@ -132,7 +106,6 @@
         /// These tests fail with compact parts, because they
         /// check some implementation defined things
         /// like checksums, computed granularity, ProfileEvents, etc.
-        "avx",
         "01045_order_by_pk_special_storages",
         "01042_check_query_and_last_granule_size",
         "00961_checksums_in_system_parts_columns_table",

From 83ec93dec868a1e17950f6298b5ec1ce0d5352db Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 9 Sep 2020 16:46:59 +0300
Subject: [PATCH 181/298] Fuzzer: reset default database before reconnect

---
 programs/client/Client.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index c9701950dc5..99598c70397 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -866,6 +866,8 @@ private:
             // will exit. The ping() would be the best match here, but it's
             // private, probably for a good reason that the protocol doesn't allow
             // pings at any possible moment.
+            // Don't forget to reset the default database which might have changed.
+            connection->setDefaultDatabase("");
             connection->forceConnected(connection_parameters.timeouts);
 
             if (text.size() > 4 * 1024)
@@ -1103,7 +1105,9 @@ private:
                 {
                     last_exception_received_from_server = std::make_unique<Exception>(getCurrentExceptionMessage(true), getCurrentExceptionCode());
                     received_exception_from_server = true;
-                    std::cerr << "Error on processing query: " << ast_to_process->formatForErrorMessage() << std::endl << last_exception_received_from_server->message();
+                    fmt::print(stderr, "Error on processing query '{}': {}\n",
+                        ast_to_process->formatForErrorMessage(),
+                        last_exception_received_from_server->message());
                 }
 
                 if (!connection->isConnected())

From 50dee3f4493d7ffb2c75d195cc39862f8f8d8a86 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 9 Sep 2020 17:43:17 +0300
Subject: [PATCH 182/298] Remove false-positive warning

---
 src/Storages/MergeTree/MergeTreePartition.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp
index 4a846f63b7c..8ef3e458871 100644
--- a/src/Storages/MergeTree/MergeTreePartition.cpp
+++ b/src/Storages/MergeTree/MergeTreePartition.cpp
@@ -29,6 +29,9 @@ String MergeTreePartition::getID(const MergeTreeData & storage) const
     return getID(storage.getInMemoryMetadataPtr()->getPartitionKey().sample_block);
 }
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstringop-overflow"
+
 /// NOTE: This ID is used to create part names which are then persisted in ZK and as directory names on the file system.
 /// So if you want to change this method, be sure to guarantee compatibility with existing table data.
 String MergeTreePartition::getID(const Block & partition_key_sample) const
@@ -87,6 +90,8 @@ String MergeTreePartition::getID(const Block & partition_key_sample) const
     return result;
 }
 
+#pragma GCC diagnostic pop
+
 void MergeTreePartition::serializeText(const MergeTreeData & storage, WriteBuffer & out, const FormatSettings & format_settings) const
 {
     auto metadata_snapshot = storage.getInMemoryMetadataPtr();

From 673244876ea15008b54bf93e9d535a0a65e68696 Mon Sep 17 00:00:00 2001
From: Mikhail Cheshkov <mcheshkov@gmail.com>
Date: Wed, 9 Sep 2020 17:57:15 +0300
Subject: [PATCH 183/298] Use global ICU ADDINCL for Arcadia build

---
 src/Columns/ya.make      | 2 --
 src/Functions/ya.make    | 1 -
 src/Functions/ya.make.in | 1 -
 3 files changed, 4 deletions(-)

diff --git a/src/Columns/ya.make b/src/Columns/ya.make
index 78c0e1b992d..910c479c2a9 100644
--- a/src/Columns/ya.make
+++ b/src/Columns/ya.make
@@ -2,8 +2,6 @@
 LIBRARY()
 
 ADDINCL(
-    contrib/libs/icu/common
-    contrib/libs/icu/i18n
     contrib/libs/pdqsort
 )
 
diff --git a/src/Functions/ya.make b/src/Functions/ya.make
index b9a7b5b64ea..f48b4d607ed 100644
--- a/src/Functions/ya.make
+++ b/src/Functions/ya.make
@@ -10,7 +10,6 @@ ADDINCL(
     contrib/libs/farmhash
     contrib/libs/h3/h3lib/include
     contrib/libs/hyperscan/src
-    contrib/libs/icu/common
     contrib/libs/libdivide
     contrib/libs/rapidjson/include
     contrib/libs/xxhash
diff --git a/src/Functions/ya.make.in b/src/Functions/ya.make.in
index a5a54d94c6c..2a66aa5553e 100644
--- a/src/Functions/ya.make.in
+++ b/src/Functions/ya.make.in
@@ -9,7 +9,6 @@ ADDINCL(
     contrib/libs/farmhash
     contrib/libs/h3/h3lib/include
     contrib/libs/hyperscan/src
-    contrib/libs/icu/common
     contrib/libs/libdivide
     contrib/libs/rapidjson/include
     contrib/libs/xxhash

From 7f4106687cb14491246f218654ed8a0a3b751b29 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Wed, 9 Sep 2020 19:23:31 +0300
Subject: [PATCH 184/298] fix

---
 tests/queries/0_stateless/01114_database_atomic.reference | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01114_database_atomic.reference b/tests/queries/0_stateless/01114_database_atomic.reference
index 7980819f9af..a79784230a6 100644
--- a/tests/queries/0_stateless/01114_database_atomic.reference
+++ b/tests/queries/0_stateless/01114_database_atomic.reference
@@ -7,7 +7,7 @@ test_01114_3	Ordinary	test_01114_3	test_01114_3	1
 20
 100
 CREATE TABLE test_01114_2.mt UUID \'00001114-0000-4000-8000-000000000002\'\n(\n    `n` UInt64\n)\nENGINE = MergeTree()\nPARTITION BY n % 5\nORDER BY tuple()\nSETTINGS index_granularity = 8192
-mt	00001114-0000-4000-8000-000000000002	CREATE TABLE test_01114_2.mt UUID \'00001114-0000-4000-8000-000000000002\' (`n` UInt64) ENGINE = MergeTree() PARTITION BY n % 5 ORDER BY tuple() SETTINGS index_granularity = 8192
+mt	00001114-0000-4000-8000-000000000002	CREATE TABLE test_01114_2.mt (`n` UInt64) ENGINE = MergeTree() PARTITION BY n % 5 ORDER BY tuple() SETTINGS index_granularity = 8192
 20
 CREATE TABLE test_01114_1.mt UUID \'00001114-0000-4000-8000-000000000001\'\n(\n    `n` UInt64\n)\nENGINE = MergeTree()\nPARTITION BY n % 5\nORDER BY tuple()\nSETTINGS index_granularity = 8192
 CREATE TABLE test_01114_2.mt UUID \'00001114-0000-4000-8000-000000000002\'\n(\n    `n` UInt64\n)\nENGINE = MergeTree()\nPARTITION BY n % 5\nORDER BY tuple()\nSETTINGS index_granularity = 8192

From d1d3af7501a35629d48b046387dfb95e0731f657 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Wed, 9 Sep 2020 00:22:24 -0700
Subject: [PATCH 185/298] StorageReplicatedMergeTree - more improvements to
 integration test

---
 .../__init__.py                               |  0
 .../test.py                                   | 24 +++++++------------
 2 files changed, 9 insertions(+), 15 deletions(-)
 rename tests/integration/{test_replicated_zk_conn_failure => test_cleanup_dir_after_bad_zk_conn}/__init__.py (100%)
 rename tests/integration/{test_replicated_zk_conn_failure => test_cleanup_dir_after_bad_zk_conn}/test.py (63%)

diff --git a/tests/integration/test_replicated_zk_conn_failure/__init__.py b/tests/integration/test_cleanup_dir_after_bad_zk_conn/__init__.py
similarity index 100%
rename from tests/integration/test_replicated_zk_conn_failure/__init__.py
rename to tests/integration/test_cleanup_dir_after_bad_zk_conn/__init__.py
diff --git a/tests/integration/test_replicated_zk_conn_failure/test.py b/tests/integration/test_cleanup_dir_after_bad_zk_conn/test.py
similarity index 63%
rename from tests/integration/test_replicated_zk_conn_failure/test.py
rename to tests/integration/test_cleanup_dir_after_bad_zk_conn/test.py
index a860716ee82..ef31e8487be 100644
--- a/tests/integration/test_replicated_zk_conn_failure/test.py
+++ b/tests/integration/test_cleanup_dir_after_bad_zk_conn/test.py
@@ -9,12 +9,12 @@ from helpers.network import PartitionManager
 # Test flow is as follows:
 # 1. Configure cluster with ZooKeeper and create a database.
 # 2. Drop all connections to ZooKeeper.
-# 3. Try creating the table and there would be a Poco:Exception.
+# 3. Try creating the table and there will be a Poco:Exception.
 # 4. Try creating the table again and there should not be any error
-# that indicates that the Directory for table already exists.
+# that indicates that the directory for table already exists.
 # 5. Final step is to restore ZooKeeper connection and verify that
-# the table creation and queries work.
-def test_replicated_zk_conn_failure():
+# the table creation works.
+def test_cleanup_dir_after_bad_zk_conn():
     cluster = ClickHouseCluster(__file__)
     node1 = cluster.add_instance('node1', with_zookeeper=True)
     try:
@@ -30,21 +30,15 @@ def test_replicated_zk_conn_failure():
         ORDER BY id;'''
         with PartitionManager() as pm:
             pm.drop_instance_zk_connections(node1)
-            time.sleep(5)
+            time.sleep(3)
             error = node1.query_and_get_error(query_create)
-            # Assert that there was net exception.
-            assert "Poco::Exception. Code: 1000" in error
-            # Assert that the exception was due to ZooKeeper connectivity.
-            assert "All connection tries failed while connecting to ZooKeeper" in error
-            # retry table creation
+            assert "Poco::Exception. Code: 1000" and \
+                   "All connection tries failed while connecting to ZooKeeper" in error
             error = node1.query_and_get_error(query_create)
-            # Should not expect any errors related to directory already existing
-            # and those should have been already cleaned up during the previous retry.
             assert "Directory for table data data/replica/test/ already exists" not in error
-            # restore ZooKeeper connections.
             pm.restore_instance_zk_connections(node1)
-            # retry create query and query the table created.
             node1.query(query_create)
-            assert "0\n" in node1.query('''SELECT count() from replica.test FORMAT TSV''')
+            node1.query('''INSERT INTO replica.test VALUES (1, now())''')
+            assert "1\n" in node1.query('''SELECT count() from replica.test FORMAT TSV''')
     finally:
         cluster.shutdown()

From 62428845a0fdcaaa19ecc5fd33f3ecd849104cf5 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 9 Sep 2020 19:47:06 +0300
Subject: [PATCH 186/298] Bug in mutation

---
 src/Columns/ColumnVector.h                       |  7 ++++---
 .../0_stateless/01475_mutation_with_if.reference |  1 +
 .../0_stateless/01475_mutation_with_if.sql       | 16 ++++++++++++++++
 3 files changed, 21 insertions(+), 3 deletions(-)
 create mode 100644 tests/queries/0_stateless/01475_mutation_with_if.reference
 create mode 100644 tests/queries/0_stateless/01475_mutation_with_if.sql

diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h
index 1090de556a0..55ab67d6214 100644
--- a/src/Columns/ColumnVector.h
+++ b/src/Columns/ColumnVector.h
@@ -7,6 +7,7 @@
 #include <common/unaligned.h>
 #include <Core/Field.h>
 #include <Core/BigInt.h>
+#include <Common/assert_cast.h>
 
 
 namespace DB
@@ -130,7 +131,7 @@ public:
 
     void insertFrom(const IColumn & src, size_t n) override
     {
-        data.push_back(static_cast<const Self &>(src).getData()[n]);
+        data.push_back(assert_cast<const Self &>(src).getData()[n]);
     }
 
     void insertData(const char * pos, size_t) override
@@ -205,14 +206,14 @@ public:
     /// This method implemented in header because it could be possibly devirtualized.
     int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override
     {
-        return CompareHelper<T>::compare(data[n], static_cast<const Self &>(rhs_).data[m], nan_direction_hint);
+        return CompareHelper<T>::compare(data[n], assert_cast<const Self &>(rhs_).data[m], nan_direction_hint);
     }
 
     void compareColumn(const IColumn & rhs, size_t rhs_row_num,
                        PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
                        int direction, int nan_direction_hint) const override
     {
-        return this->template doCompareColumn<Self>(static_cast<const Self &>(rhs), rhs_row_num, row_indexes,
+        return this->template doCompareColumn<Self>(assert_cast<const Self &>(rhs), rhs_row_num, row_indexes,
                                                     compare_results, direction, nan_direction_hint);
     }
 
diff --git a/tests/queries/0_stateless/01475_mutation_with_if.reference b/tests/queries/0_stateless/01475_mutation_with_if.reference
new file mode 100644
index 00000000000..2874a18147f
--- /dev/null
+++ b/tests/queries/0_stateless/01475_mutation_with_if.reference
@@ -0,0 +1 @@
+1	150
diff --git a/tests/queries/0_stateless/01475_mutation_with_if.sql b/tests/queries/0_stateless/01475_mutation_with_if.sql
new file mode 100644
index 00000000000..6f0ef8924be
--- /dev/null
+++ b/tests/queries/0_stateless/01475_mutation_with_if.sql
@@ -0,0 +1,16 @@
+DROP TABLE IF EXISTS mutation_table;
+CREATE TABLE mutation_table (
+    id int,
+    price Nullable(Int32)
+)
+ENGINE = MergeTree()
+PARTITION BY id
+ORDER BY id;
+
+INSERT INTO mutation_table (id, price) VALUES (1, 100);
+
+ALTER TABLE mutation_table UPDATE price = 150 WHERE id = 1 SETTINGS mutations_sync = 2;
+
+SELECT * FROM mutation_table;
+
+DROP TABLE IF EXISTS mutation_table;

From 9fa04cf48b1f2aa9288dbd026e37b7aae9e8a6f3 Mon Sep 17 00:00:00 2001
From: Peng Jian <pengjian.uestc@gmail.com>
Date: Thu, 10 Sep 2020 00:59:38 +0800
Subject: [PATCH 187/298] Add QueryMemoryLimitExceeded event

---
 src/Common/MemoryTracker.cpp | 7 +++++++
 src/Common/ProfileEvents.cpp | 1 +
 2 files changed, 8 insertions(+)

diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp
index 9d073cf8dd8..5d51fc9f301 100644
--- a/src/Common/MemoryTracker.cpp
+++ b/src/Common/MemoryTracker.cpp
@@ -6,6 +6,7 @@
 #include <Common/Exception.h>
 #include <Common/formatReadable.h>
 #include <common/logger_useful.h>
+#include <Common/ProfileEvents.h>
 
 #include <atomic>
 #include <cmath>
@@ -22,6 +23,10 @@ namespace DB
     }
 }
 
+namespace ProfileEvents
+{
+    extern const Event QueryMemoryLimitExceeded;
+}
 
 static constexpr size_t log_peak_memory_usage_every = 1ULL << 30;
 
@@ -104,6 +109,7 @@ void MemoryTracker::alloc(Int64 size)
         /// Prevent recursion. Exception::ctor -> std::string -> new[] -> MemoryTracker::alloc
         auto untrack_lock = blocker.cancel(); // NOLINT
 
+        ProfileEvents::increment(ProfileEvents::QueryMemoryLimitExceeded);
         std::stringstream message;
         message << "Memory tracker";
         if (const auto * description = description_ptr.load(std::memory_order_relaxed))
@@ -136,6 +142,7 @@ void MemoryTracker::alloc(Int64 size)
         /// Prevent recursion. Exception::ctor -> std::string -> new[] -> MemoryTracker::alloc
         auto no_track = blocker.cancel(); // NOLINT
 
+        ProfileEvents::increment(ProfileEvents::QueryMemoryLimitExceeded);
         std::stringstream message;
         message << "Memory limit";
         if (const auto * description = description_ptr.load(std::memory_order_relaxed))
diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index 475e073d253..c9ff9642361 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -233,6 +233,7 @@
     M(S3WriteRequestsErrors, "Number of non-throttling errors in POST, DELETE, PUT and PATCH requests to S3 storage.") \
     M(S3WriteRequestsThrottling, "Number of 429 and 503 errors in POST, DELETE, PUT and PATCH requests to S3 storage.") \
     M(S3WriteRequestsRedirects, "Number of redirects in POST, DELETE, PUT and PATCH requests to S3 storage.") \
+    M(QueryMemoryLimitExceeded, "Number of times of memory limit exceeded for query.") \
 
 
 namespace ProfileEvents

From dee1fefeb472bebdfa4e15eb1b7ec7e428069f8a Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Wed, 9 Sep 2020 20:39:49 +0300
Subject: [PATCH 188/298] add more tests

---
 src/Storages/StorageReplicatedMergeTree.cpp   |  5 +-
 .../test.py                                   | 74 ++++++++++++-------
 2 files changed, 50 insertions(+), 29 deletions(-)

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index a1027a8be53..00dcc7aeb08 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -612,7 +612,10 @@ bool StorageReplicatedMergeTree::createTableIfNotExists(const StorageMetadataPtr
         return true;
     }
 
-    throw Exception("Cannot create table, because it is created concurrently every time or because of logical error", ErrorCodes::LOGICAL_ERROR);
+    /// Do not use LOGICAL_ERROR code, because it may happen if user has specified wrong zookeeper_path
+    throw Exception("Cannot create table, because it is created concurrently every time "
+                    "or because of wrong zookeeper_path "
+                    "or because of logical error", ErrorCodes::REPLICA_IS_ALREADY_EXIST);
 }
 
 void StorageReplicatedMergeTree::createReplica(const StorageMetadataPtr & metadata_snapshot)
diff --git a/tests/integration/test_cleanup_dir_after_bad_zk_conn/test.py b/tests/integration/test_cleanup_dir_after_bad_zk_conn/test.py
index ef31e8487be..4cb243160a0 100644
--- a/tests/integration/test_cleanup_dir_after_bad_zk_conn/test.py
+++ b/tests/integration/test_cleanup_dir_after_bad_zk_conn/test.py
@@ -1,9 +1,21 @@
 import time
+import pytest
 
 from helpers.cluster import ClickHouseCluster
 from helpers.network import PartitionManager
 
 
+cluster = ClickHouseCluster(__file__)
+node1 = cluster.add_instance('node1', with_zookeeper=True)
+
+@pytest.fixture(scope="module")
+def start_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
 # This tests if the data directory for a table is cleaned up if there is a Zookeeper
 # connection exception during a CreateQuery operation involving ReplicatedMergeTree tables.
 # Test flow is as follows:
@@ -14,31 +26,37 @@ from helpers.network import PartitionManager
 # that indicates that the directory for table already exists.
 # 5. Final step is to restore ZooKeeper connection and verify that
 # the table creation works.
-def test_cleanup_dir_after_bad_zk_conn():
-    cluster = ClickHouseCluster(__file__)
-    node1 = cluster.add_instance('node1', with_zookeeper=True)
-    try:
-        cluster.start()
-        node1.query("CREATE DATABASE replica;")
-        query_create = '''CREATE TABLE replica.test
-        (
-           id Int64,
-           event_time DateTime
-        )
-        Engine=ReplicatedMergeTree('/clickhouse/tables/replica/test', 'node1')
-        PARTITION BY toYYYYMMDD(event_time)
-        ORDER BY id;'''
-        with PartitionManager() as pm:
-            pm.drop_instance_zk_connections(node1)
-            time.sleep(3)
-            error = node1.query_and_get_error(query_create)
-            assert "Poco::Exception. Code: 1000" and \
-                   "All connection tries failed while connecting to ZooKeeper" in error
-            error = node1.query_and_get_error(query_create)
-            assert "Directory for table data data/replica/test/ already exists" not in error
-            pm.restore_instance_zk_connections(node1)
-            node1.query(query_create)
-            node1.query('''INSERT INTO replica.test VALUES (1, now())''')
-            assert "1\n" in node1.query('''SELECT count() from replica.test FORMAT TSV''')
-    finally:
-        cluster.shutdown()
+def test_cleanup_dir_after_bad_zk_conn(start_cluster):
+    node1.query("CREATE DATABASE replica;")
+    query_create = '''CREATE TABLE replica.test
+    (
+       id Int64,
+       event_time DateTime
+    )
+    Engine=ReplicatedMergeTree('/clickhouse/tables/replica/test', 'node1')
+    PARTITION BY toYYYYMMDD(event_time)
+    ORDER BY id;'''
+    with PartitionManager() as pm:
+        pm.drop_instance_zk_connections(node1)
+        time.sleep(3)
+        error = node1.query_and_get_error(query_create)
+        assert "Poco::Exception. Code: 1000" and \
+               "All connection tries failed while connecting to ZooKeeper" in error
+        error = node1.query_and_get_error(query_create)
+        assert "Directory for table data data/replica/test/ already exists" not in error
+    node1.query(query_create)
+    node1.query('''INSERT INTO replica.test VALUES (1, now())''')
+    assert "1\n" in node1.query('''SELECT count() from replica.test FORMAT TSV''')
+
+def test_cleanup_dir_after_wrong_replica_name(start_cluster):
+    node1.query("CREATE TABLE test2_r1 (n UInt64) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test2/', 'r1') ORDER BY n")
+    error = node1.query_and_get_error("CREATE TABLE test2_r2 (n UInt64) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test2/', 'r1') ORDER BY n")
+    assert "already exists" in error
+    node1.query("CREATE TABLE test_r2 (n UInt64) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test2/', 'r2') ORDER BY n")
+
+
+def test_cleanup_dir_after_wrong_zk_path(start_cluster):
+    node1.query("CREATE TABLE test3_r1 (n UInt64) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test3/', 'r1') ORDER BY n")
+    error = node1.query_and_get_error("CREATE TABLE test3_r2 (n UInt64) ENGINE=ReplicatedMergeTree('/clickhouse/tables/', 'r2') ORDER BY n")
+    assert "Cannot create" in error
+    node1.query("CREATE TABLE test3_r2 (n UInt64) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test3/', 'r2') ORDER BY n")

From 7304bad56fff15882c82a556a4bd8b197c65092b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Wed, 9 Sep 2020 20:51:05 +0300
Subject: [PATCH 189/298] Support for nested multiline comments

---
 src/Parsers/Lexer.cpp                         | 20 +++++++++++++++----
 .../01491_nested_multiline_comments.reference |  3 +++
 .../01491_nested_multiline_comments.sql       |  3 +++
 3 files changed, 22 insertions(+), 4 deletions(-)
 create mode 100644 tests/queries/0_stateless/01491_nested_multiline_comments.reference
 create mode 100644 tests/queries/0_stateless/01491_nested_multiline_comments.sql

diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp
index baf547a0a1a..ffa8250a3f3 100644
--- a/src/Parsers/Lexer.cpp
+++ b/src/Parsers/Lexer.cpp
@@ -253,15 +253,27 @@ Token Lexer::nextTokenImpl()
                 else
                 {
                     ++pos;
+
+                    /// Nested multiline comments are supported according to the SQL standard.
+                    size_t nesting_level = 1;
+
                     while (pos + 2 <= end)
                     {
-                        /// This means that nested multiline comments are not supported.
-                        if (pos[0] == '*' && pos[1] == '/')
+                        if (pos[0] == '/' && pos[1] == '*')
                         {
                             pos += 2;
-                            return Token(TokenType::Comment, token_begin, pos);
+                            ++nesting_level;
                         }
-                        ++pos;
+                        else if (pos[0] == '*' && pos[1] == '/')
+                        {
+                            pos += 2;
+                            --nesting_level;
+
+                            if (nesting_level == 0)
+                                return Token(TokenType::Comment, token_begin, pos);
+                        }
+                        else
+                            ++pos;
                     }
                     return Token(TokenType::ErrorMultilineCommentIsNotClosed, token_begin, end);
                 }
diff --git a/tests/queries/0_stateless/01491_nested_multiline_comments.reference b/tests/queries/0_stateless/01491_nested_multiline_comments.reference
new file mode 100644
index 00000000000..e8183f05f5d
--- /dev/null
+++ b/tests/queries/0_stateless/01491_nested_multiline_comments.reference
@@ -0,0 +1,3 @@
+1
+1
+1
diff --git a/tests/queries/0_stateless/01491_nested_multiline_comments.sql b/tests/queries/0_stateless/01491_nested_multiline_comments.sql
new file mode 100644
index 00000000000..4c6f7634701
--- /dev/null
+++ b/tests/queries/0_stateless/01491_nested_multiline_comments.sql
@@ -0,0 +1,3 @@
+SELECT /*/**/*/ 1;
+SELECT /*a/*b*/c*/ 1;
+SELECT /*ab/*cd*/ef*/ 1;

From 453914b6b860dc558719269ab9af35828376fac7 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Wed, 9 Sep 2020 21:02:53 +0300
Subject: [PATCH 190/298] Update ProfileEvents.cpp

---
 src/Common/ProfileEvents.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index c9ff9642361..486cb7e1a6e 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -233,7 +233,7 @@
     M(S3WriteRequestsErrors, "Number of non-throttling errors in POST, DELETE, PUT and PATCH requests to S3 storage.") \
     M(S3WriteRequestsThrottling, "Number of 429 and 503 errors in POST, DELETE, PUT and PATCH requests to S3 storage.") \
     M(S3WriteRequestsRedirects, "Number of redirects in POST, DELETE, PUT and PATCH requests to S3 storage.") \
-    M(QueryMemoryLimitExceeded, "Number of times of memory limit exceeded for query.") \
+    M(QueryMemoryLimitExceeded, "Number of times when memory limit exceeded for query.") \
 
 
 namespace ProfileEvents

From 11ffbda7cc48082ec0c3a44dc34946cf03a9dc4a Mon Sep 17 00:00:00 2001
From: myrrc <me@myrrec.space>
Date: Wed, 9 Sep 2020 21:17:01 +0300
Subject: [PATCH 191/298] added the debugger info option

---
 src/Functions/CMakeLists.txt | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt
index 78caabb6941..08eefec84d4 100644
--- a/src/Functions/CMakeLists.txt
+++ b/src/Functions/CMakeLists.txt
@@ -53,8 +53,18 @@ endif()
 
 target_include_directories(clickhouse_functions SYSTEM PRIVATE ${SPARSEHASH_INCLUDE_DIR})
 
-# Won't generate debug info for files with heavy template instantiation to achieve faster linking and lower size.
-target_compile_options(clickhouse_functions PRIVATE "-g0")
+option(STRIP_DEBUG_SYMBOLS_FUNCTIONS
+    "Do not generate debugger info for ClickHouse functions.
+    Provides faster linking and lower binary size.
+    Tradeoff is the inability to debug some source files with e.g. gdb
+    (empty stack frames and no local variables)." OFF)
+
+if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
+    message(WARNING "Not generating debugger info for ClickHouse functions")
+    target_compile_options(clickhouse_functions PRIVATE "-g0")
+else()
+    message(STATUS "Generating debugger info for ClickHouse functions")
+endif()
 
 if (USE_ICU)
     target_link_libraries (clickhouse_functions PRIVATE ${ICU_LIBRARIES})

From 2a9ab482792cdadf0d4e2365c3d11494a3e38230 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitbar@yandex-team.ru>
Date: Tue, 8 Sep 2020 02:08:17 +0300
Subject: [PATCH 192/298] Use join() instead of detach() for the
 no_users_thread in StorageLiveView.

---
 src/Interpreters/Context.cpp                  |   8 +
 src/Interpreters/Context.h                    |   8 +-
 src/Interpreters/InterpreterDropQuery.h       |   1 +
 .../LiveView/LiveViewBlockInputStream.h       |  15 +-
 .../LiveView/LiveViewEventsBlockInputStream.h |  14 +-
 src/Storages/LiveView/StorageLiveView.cpp     | 144 +----------------
 src/Storages/LiveView/StorageLiveView.h       |  23 ++-
 .../LiveView/TemporaryLiveViewCleaner.cpp     | 148 ++++++++++++++++++
 .../LiveView/TemporaryLiveViewCleaner.h       |  51 ++++++
 src/Storages/ya.make                          |   1 +
 10 files changed, 233 insertions(+), 180 deletions(-)
 create mode 100644 src/Storages/LiveView/TemporaryLiveViewCleaner.cpp
 create mode 100644 src/Storages/LiveView/TemporaryLiveViewCleaner.h

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 70cf41a679c..3c4c095cc26 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -23,6 +23,7 @@
 #include <Storages/MergeTree/MergeTreeSettings.h>
 #include <Storages/CompressionCodecSelector.h>
 #include <Storages/StorageS3Settings.h>
+#include <Storages/LiveView/TemporaryLiveViewCleaner.h>
 #include <Disks/DiskLocal.h>
 #include <TableFunctions/TableFunctionFactory.h>
 #include <Interpreters/ActionLocksManager.h>
@@ -423,6 +424,7 @@ struct ContextShared
         if (system_logs)
             system_logs->shutdown();
 
+        TemporaryLiveViewCleaner::shutdown();
         DatabaseCatalog::shutdown();
 
         /// Preemptive destruction is important, because these objects may have a refcount to ContextShared (cyclic reference).
@@ -479,6 +481,12 @@ Context Context::createGlobal(ContextShared * shared)
     return res;
 }
 
+void Context::initGlobal()
+{
+    DatabaseCatalog::init(this);
+    TemporaryLiveViewCleaner::init(*this);
+}
+
 SharedContextHolder Context::createShared()
 {
     return SharedContextHolder(std::make_unique<ContextShared>());
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index c8d13baa9ae..743c92d56b5 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -445,11 +445,7 @@ public:
 
     void makeQueryContext() { query_context = this; }
     void makeSessionContext() { session_context = this; }
-    void makeGlobalContext()
-    {
-        global_context = this;
-        DatabaseCatalog::init(this);
-    }
+    void makeGlobalContext() { initGlobal(); global_context = this; }
 
     const Settings & getSettingsRef() const { return settings; }
 
@@ -622,6 +618,8 @@ public:
 private:
     std::unique_lock<std::recursive_mutex> getLock() const;
 
+    void initGlobal();
+
     /// Compute and set actual user settings, client_info.current_user should be set
     void calculateAccessRights();
 
diff --git a/src/Interpreters/InterpreterDropQuery.h b/src/Interpreters/InterpreterDropQuery.h
index 80bd6c6531a..b54736b5c21 100644
--- a/src/Interpreters/InterpreterDropQuery.h
+++ b/src/Interpreters/InterpreterDropQuery.h
@@ -10,6 +10,7 @@ namespace DB
 {
 class Context;
 using DatabaseAndTable = std::pair<DatabasePtr, StoragePtr>;
+class AccessRightsElements;
 
 /** Allow to either drop table with all its data (DROP),
   * or remove information about table (just forget) from server (DETACH),
diff --git a/src/Storages/LiveView/LiveViewBlockInputStream.h b/src/Storages/LiveView/LiveViewBlockInputStream.h
index 7cab2cb41ed..737e76754c5 100644
--- a/src/Storages/LiveView/LiveViewBlockInputStream.h
+++ b/src/Storages/LiveView/LiveViewBlockInputStream.h
@@ -16,27 +16,17 @@ class LiveViewBlockInputStream : public IBlockInputStream
 using NonBlockingResult = std::pair<Block, bool>;
 
 public:
-    ~LiveViewBlockInputStream() override
-    {
-        /// Start storage no users thread
-        /// if we are the last active user
-        if (!storage->is_dropped && blocks_ptr.use_count() < 3)
-            storage->startNoUsersThread(temporary_live_view_timeout_sec);
-    }
-
     LiveViewBlockInputStream(std::shared_ptr<StorageLiveView> storage_,
         std::shared_ptr<BlocksPtr> blocks_ptr_,
         std::shared_ptr<BlocksMetadataPtr> blocks_metadata_ptr_,
         std::shared_ptr<bool> active_ptr_,
         const bool has_limit_, const UInt64 limit_,
-        const UInt64 heartbeat_interval_sec_,
-        const UInt64 temporary_live_view_timeout_sec_)
+        const UInt64 heartbeat_interval_sec_)
         : storage(std::move(storage_)), blocks_ptr(std::move(blocks_ptr_)),
           blocks_metadata_ptr(std::move(blocks_metadata_ptr_)),
           active_ptr(std::move(active_ptr_)),
           has_limit(has_limit_), limit(limit_),
-          heartbeat_interval_usec(heartbeat_interval_sec_ * 1000000),
-          temporary_live_view_timeout_sec(temporary_live_view_timeout_sec_)
+          heartbeat_interval_usec(heartbeat_interval_sec_ * 1000000)
     {
         /// grab active pointer
         active = active_ptr.lock();
@@ -205,7 +195,6 @@ private:
     Int64 num_updates = -1;
     bool end_of_blocks = false;
     UInt64 heartbeat_interval_usec;
-    UInt64 temporary_live_view_timeout_sec;
     UInt64 last_event_timestamp_usec = 0;
 };
 
diff --git a/src/Storages/LiveView/LiveViewEventsBlockInputStream.h b/src/Storages/LiveView/LiveViewEventsBlockInputStream.h
index ac5e7e3d6fd..4060b17c1ed 100644
--- a/src/Storages/LiveView/LiveViewEventsBlockInputStream.h
+++ b/src/Storages/LiveView/LiveViewEventsBlockInputStream.h
@@ -34,13 +34,6 @@ class LiveViewEventsBlockInputStream : public IBlockInputStream
 using NonBlockingResult = std::pair<Block, bool>;
 
 public:
-    ~LiveViewEventsBlockInputStream() override
-    {
-        /// Start storage no users thread
-        /// if we are the last active user
-        if (!storage->is_dropped && blocks_ptr.use_count() < 3)
-            storage->startNoUsersThread(temporary_live_view_timeout_sec);
-    }
     /// length default -2 because we want LIMIT to specify number of updates so that LIMIT 1 waits for 1 update
     /// and LIMIT 0 just returns data without waiting for any updates
     LiveViewEventsBlockInputStream(std::shared_ptr<StorageLiveView> storage_,
@@ -48,14 +41,12 @@ public:
         std::shared_ptr<BlocksMetadataPtr> blocks_metadata_ptr_,
         std::shared_ptr<bool> active_ptr_,
         const bool has_limit_, const UInt64 limit_,
-        const UInt64 heartbeat_interval_sec_,
-        const UInt64 temporary_live_view_timeout_sec_)
+        const UInt64 heartbeat_interval_sec_)
         : storage(std::move(storage_)), blocks_ptr(std::move(blocks_ptr_)),
           blocks_metadata_ptr(std::move(blocks_metadata_ptr_)),
           active_ptr(std::move(active_ptr_)), has_limit(has_limit_),
           limit(limit_),
-          heartbeat_interval_usec(heartbeat_interval_sec_ * 1000000),
-          temporary_live_view_timeout_sec(temporary_live_view_timeout_sec_)
+          heartbeat_interval_usec(heartbeat_interval_sec_ * 1000000)
     {
         /// grab active pointer
         active = active_ptr.lock();
@@ -236,7 +227,6 @@ private:
     Int64 num_updates = -1;
     bool end_of_blocks = false;
     UInt64 heartbeat_interval_usec;
-    UInt64 temporary_live_view_timeout_sec;
     UInt64 last_event_timestamp_usec = 0;
     Poco::Timestamp timestamp;
 };
diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp
index 54ac5bcc791..b16c02eec6b 100644
--- a/src/Storages/LiveView/StorageLiveView.cpp
+++ b/src/Storages/LiveView/StorageLiveView.cpp
@@ -12,10 +12,8 @@ limitations under the License. */
 #include <Parsers/ASTSelectQuery.h>
 #include <Parsers/ASTCreateQuery.h>
 #include <Parsers/ASTWatchQuery.h>
-#include <Parsers/ASTDropQuery.h>
 #include <Parsers/ASTLiteral.h>
 #include <Interpreters/Context.h>
-#include <Interpreters/InterpreterDropQuery.h>
 #include <Interpreters/InterpreterSelectQuery.h>
 #include <DataStreams/IBlockOutputStream.h>
 #include <DataStreams/OneBlockInputStream.h>
@@ -31,6 +29,7 @@ limitations under the License. */
 #include <Storages/LiveView/LiveViewBlockOutputStream.h>
 #include <Storages/LiveView/LiveViewEventsBlockInputStream.h>
 #include <Storages/LiveView/StorageBlocks.h>
+#include <Storages/LiveView/TemporaryLiveViewCleaner.h>
 
 #include <Storages/StorageFactory.h>
 #include <Parsers/ASTTablesInSelectQuery.h>
@@ -276,7 +275,7 @@ StorageLiveView::StorageLiveView(
     if (query.live_view_timeout)
     {
         is_temporary = true;
-        temporary_live_view_timeout = *query.live_view_timeout;
+        temporary_live_view_timeout = std::chrono::seconds{*query.live_view_timeout};
     }
 
     blocks_ptr = std::make_shared<BlocksPtr>();
@@ -384,128 +383,21 @@ void StorageLiveView::checkTableCanBeDropped() const
     }
 }
 
-void StorageLiveView::noUsersThread(std::shared_ptr<StorageLiveView> storage, const UInt64 & timeout)
-{
-    bool drop_table = false;
-
-    if (storage->shutdown_called)
-        return;
-
-    auto table_id = storage->getStorageID();
-    {
-        while (true)
-        {
-            std::unique_lock lock(storage->no_users_thread_wakeup_mutex);
-            if (!storage->no_users_thread_condition.wait_for(lock, std::chrono::seconds(timeout), [&] { return storage->no_users_thread_wakeup; }))
-            {
-                storage->no_users_thread_wakeup = false;
-                if (storage->shutdown_called)
-                    return;
-                if (storage->hasUsers())
-                    return;
-                if (!DatabaseCatalog::instance().getDependencies(table_id).empty())
-                    continue;
-                drop_table = true;
-            }
-            break;
-        }
-    }
-
-    if (drop_table)
-    {
-        if (DatabaseCatalog::instance().tryGetTable(table_id, storage->global_context))
-        {
-            try
-            {
-                /// We create and execute `drop` query for this table
-                auto drop_query = std::make_shared<ASTDropQuery>();
-                drop_query->database = table_id.database_name;
-                drop_query->table = table_id.table_name;
-                drop_query->kind = ASTDropQuery::Kind::Drop;
-                ASTPtr ast_drop_query = drop_query;
-                InterpreterDropQuery drop_interpreter(ast_drop_query, storage->global_context);
-                drop_interpreter.execute();
-            }
-            catch (...)
-            {
-                tryLogCurrentException(__PRETTY_FUNCTION__);
-            }
-        }
-    }
-}
-
-void StorageLiveView::startNoUsersThread(const UInt64 & timeout)
-{
-    bool expected = false;
-    if (!start_no_users_thread_called.compare_exchange_strong(expected, true))
-        return;
-
-    if (is_temporary)
-    {
-        std::lock_guard no_users_thread_lock(no_users_thread_mutex);
-
-        if (shutdown_called)
-            return;
-
-        if (no_users_thread.joinable())
-        {
-            {
-                std::lock_guard lock(no_users_thread_wakeup_mutex);
-                no_users_thread_wakeup = true;
-                no_users_thread_condition.notify_one();
-            }
-            no_users_thread.join();
-        }
-        {
-            std::lock_guard lock(no_users_thread_wakeup_mutex);
-            no_users_thread_wakeup = false;
-        }
-        if (!is_dropped)
-            no_users_thread = std::thread(&StorageLiveView::noUsersThread,
-                std::static_pointer_cast<StorageLiveView>(shared_from_this()), timeout);
-    }
-
-    start_no_users_thread_called = false;
-}
-
 void StorageLiveView::startup()
 {
-    startNoUsersThread(temporary_live_view_timeout);
+    if (is_temporary)
+        TemporaryLiveViewCleaner::instance().addView(std::static_pointer_cast<StorageLiveView>(shared_from_this()));
 }
 
 void StorageLiveView::shutdown()
 {
+    shutdown_called = true;
     DatabaseCatalog::instance().removeDependency(select_table_id, getStorageID());
-    bool expected = false;
-    if (!shutdown_called.compare_exchange_strong(expected, true))
-        return;
-
-    /// WATCH queries should be stopped after setting shutdown_called to true.
-    /// Otherwise livelock is possible for LiveView table in Atomic database:
-    /// WATCH query will wait for table to be dropped and DatabaseCatalog will wait for queries to finish
-
-    {
-        std::lock_guard no_users_thread_lock(no_users_thread_mutex);
-        if (no_users_thread.joinable())
-        {
-            {
-                std::lock_guard lock(no_users_thread_wakeup_mutex);
-                no_users_thread_wakeup = true;
-                no_users_thread_condition.notify_one();
-            }
-        }
-    }
 }
 
 StorageLiveView::~StorageLiveView()
 {
     shutdown();
-
-    {
-        std::lock_guard lock(no_users_thread_mutex);
-        if (no_users_thread.joinable())
-            no_users_thread.detach();
-    }
 }
 
 void StorageLiveView::drop()
@@ -572,18 +464,7 @@ BlockInputStreams StorageLiveView::watch(
         auto reader = std::make_shared<LiveViewEventsBlockInputStream>(
             std::static_pointer_cast<StorageLiveView>(shared_from_this()),
             blocks_ptr, blocks_metadata_ptr, active_ptr, has_limit, limit,
-            context.getSettingsRef().live_view_heartbeat_interval.totalSeconds(),
-            temporary_live_view_timeout);
-
-        {
-            std::lock_guard no_users_thread_lock(no_users_thread_mutex);
-            if (no_users_thread.joinable())
-            {
-                std::lock_guard lock(no_users_thread_wakeup_mutex);
-                no_users_thread_wakeup = true;
-                no_users_thread_condition.notify_one();
-            }
-        }
+            context.getSettingsRef().live_view_heartbeat_interval.totalSeconds());
 
         {
             std::lock_guard lock(mutex);
@@ -603,18 +484,7 @@ BlockInputStreams StorageLiveView::watch(
         auto reader = std::make_shared<LiveViewBlockInputStream>(
             std::static_pointer_cast<StorageLiveView>(shared_from_this()),
             blocks_ptr, blocks_metadata_ptr, active_ptr, has_limit, limit,
-            context.getSettingsRef().live_view_heartbeat_interval.totalSeconds(),
-            temporary_live_view_timeout);
-
-        {
-            std::lock_guard no_users_thread_lock(no_users_thread_mutex);
-            if (no_users_thread.joinable())
-            {
-                std::lock_guard lock(no_users_thread_wakeup_mutex);
-                no_users_thread_wakeup = true;
-                no_users_thread_condition.notify_one();
-            }
-        }
+            context.getSettingsRef().live_view_heartbeat_interval.totalSeconds());
 
         {
             std::lock_guard lock(mutex);
diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h
index 43afd169a92..32e18ef6092 100644
--- a/src/Storages/LiveView/StorageLiveView.h
+++ b/src/Storages/LiveView/StorageLiveView.h
@@ -38,6 +38,10 @@ using ASTPtr = std::shared_ptr<IAST>;
 using BlocksMetadataPtr = std::shared_ptr<BlocksMetadata>;
 using MergeableBlocksPtr = std::shared_ptr<MergeableBlocks>;
 
+class Pipe;
+using Pipes = std::vector<Pipe>;
+
+
 class StorageLiveView final : public ext::shared_ptr_helper<StorageLiveView>, public IStorage
 {
 friend struct ext::shared_ptr_helper<StorageLiveView>;
@@ -70,7 +74,9 @@ public:
 
     NamesAndTypesList getVirtuals() const override;
 
-    bool isTemporary() { return is_temporary; }
+    bool isTemporary() const { return is_temporary; }
+    std::chrono::seconds getTimeout() const { return temporary_live_view_timeout; }
+
 
     /// Check if we have any readers
     /// must be called with mutex locked
@@ -85,11 +91,7 @@ public:
     {
         return active_ptr.use_count() > 1;
     }
-    /// No users thread mutex, predicate and wake up condition
-    void startNoUsersThread(const UInt64 & timeout);
-    std::mutex no_users_thread_wakeup_mutex;
-    bool no_users_thread_wakeup = false;
-    std::condition_variable no_users_thread_condition;
+
     /// Get blocks hash
     /// must be called with mutex locked
     String getBlocksHashKey()
@@ -175,6 +177,8 @@ private:
     std::unique_ptr<Context> live_view_context;
 
     bool is_temporary = false;
+    std::chrono::seconds temporary_live_view_timeout;
+
     /// Mutex to protect access to sample block and inner_blocks_query
     mutable std::mutex sample_block_lock;
     mutable Block sample_block;
@@ -193,14 +197,7 @@ private:
     std::shared_ptr<BlocksMetadataPtr> blocks_metadata_ptr;
     MergeableBlocksPtr mergeable_blocks;
 
-    /// Background thread for temporary tables
-    /// which drops this table if there are no users
-    static void noUsersThread(std::shared_ptr<StorageLiveView> storage, const UInt64 & timeout);
-    std::mutex no_users_thread_mutex;
-    std::thread no_users_thread;
     std::atomic<bool> shutdown_called = false;
-    std::atomic<bool> start_no_users_thread_called = false;
-    UInt64 temporary_live_view_timeout;
 
     StorageLiveView(
         const StorageID & table_id_,
diff --git a/src/Storages/LiveView/TemporaryLiveViewCleaner.cpp b/src/Storages/LiveView/TemporaryLiveViewCleaner.cpp
new file mode 100644
index 00000000000..0f7c1039d72
--- /dev/null
+++ b/src/Storages/LiveView/TemporaryLiveViewCleaner.cpp
@@ -0,0 +1,148 @@
+#include <Storages/LiveView/TemporaryLiveViewCleaner.h>
+#include <Storages/LiveView/StorageLiveView.h>
+#include <Interpreters/DatabaseCatalog.h>
+#include <Interpreters/InterpreterDropQuery.h>
+#include <Parsers/ASTDropQuery.h>
+
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+
+namespace
+{
+    void executeDropQuery(const StorageID & storage_id, Context & context)
+    {
+        if (!DatabaseCatalog::instance().isTableExist(storage_id, context))
+            return;
+        try
+        {
+            /// We create and execute `drop` query for this table
+            auto drop_query = std::make_shared<ASTDropQuery>();
+            drop_query->database = storage_id.database_name;
+            drop_query->table = storage_id.table_name;
+            drop_query->kind = ASTDropQuery::Kind::Drop;
+            ASTPtr ast_drop_query = drop_query;
+            InterpreterDropQuery drop_interpreter(ast_drop_query, context);
+            drop_interpreter.execute();
+        }
+        catch (...)
+        {
+            tryLogCurrentException(__PRETTY_FUNCTION__);
+        }
+    }
+}
+
+
+std::unique_ptr<TemporaryLiveViewCleaner> TemporaryLiveViewCleaner::the_instance;
+
+
+void TemporaryLiveViewCleaner::init(Context & global_context_)
+{
+    if (the_instance)
+        throw Exception("TemporaryLiveViewCleaner already initialized", ErrorCodes::LOGICAL_ERROR);
+    the_instance.reset(new TemporaryLiveViewCleaner(global_context_));
+}
+
+
+void TemporaryLiveViewCleaner::shutdown()
+{
+    the_instance.reset();
+}
+
+
+TemporaryLiveViewCleaner::TemporaryLiveViewCleaner(Context & global_context_)
+    : global_context(global_context_)
+{
+}
+
+
+TemporaryLiveViewCleaner::~TemporaryLiveViewCleaner()
+{
+    stopBackgroundThread();
+}
+
+
+void TemporaryLiveViewCleaner::addView(const std::shared_ptr<StorageLiveView> & view)
+{
+    if (!view->isTemporary())
+        return;
+
+    auto current_time = std::chrono::system_clock::now();
+    auto time_of_next_check = current_time + view->getTimeout();
+
+    std::lock_guard lock{mutex};
+
+    /// Keep the vector `views` sorted by time of next check.
+    StorageAndTimeOfCheck storage_and_time_of_check{view, time_of_next_check};
+    views.insert(std::upper_bound(views.begin(), views.end(), storage_and_time_of_check), storage_and_time_of_check);
+
+    if (!background_thread.joinable())
+        background_thread = ThreadFromGlobalPool{&TemporaryLiveViewCleaner::backgroundThreadFunc, this};
+
+    background_thread_wake_up.notify_one();
+}
+
+
+void TemporaryLiveViewCleaner::backgroundThreadFunc()
+{
+    std::unique_lock lock{mutex};
+    while (!background_thread_should_exit && !views.empty())
+    {
+        background_thread_wake_up.wait_until(lock, views.front().time_of_check);
+        if (background_thread_should_exit)
+            return;
+
+        auto current_time = std::chrono::system_clock::now();
+        std::vector<StorageID> storages_to_drop;
+
+        auto it = views.begin();
+        while (it != views.end())
+        {
+            std::shared_ptr<StorageLiveView> storage = it->storage.lock();
+            auto & time_of_check = it->time_of_check;
+            if (!storage)
+            {
+                /// Storage has been already removed.
+                it = views.erase(it);
+                continue;
+            }
+
+            ++it;
+
+            if (current_time < time_of_check)
+                break; /// It's not the time to check it yet.
+
+            time_of_check = current_time + storage->getTimeout();
+
+            auto storage_id = storage->getStorageID();
+            if (storage->hasUsers() || !DatabaseCatalog::instance().getDependencies(storage_id).empty())
+                continue;
+
+            storages_to_drop.emplace_back(storage_id);
+        }
+
+        lock.unlock();
+        for (const auto & storage_id : storages_to_drop)
+            executeDropQuery(storage_id, global_context);
+        lock.lock();
+    }
+}
+
+
+void TemporaryLiveViewCleaner::stopBackgroundThread()
+{
+    std::lock_guard lock{mutex};
+    if (background_thread.joinable())
+    {
+        background_thread_should_exit = true;
+        background_thread_wake_up.notify_one();
+        background_thread.join();
+    }
+}
+
+}
diff --git a/src/Storages/LiveView/TemporaryLiveViewCleaner.h b/src/Storages/LiveView/TemporaryLiveViewCleaner.h
new file mode 100644
index 00000000000..57c12bd1c07
--- /dev/null
+++ b/src/Storages/LiveView/TemporaryLiveViewCleaner.h
@@ -0,0 +1,51 @@
+#pragma once
+
+#include <Common/ThreadPool.h>
+#include <chrono>
+
+
+namespace DB
+{
+class StorageLiveView;
+struct StorageID;
+
+/// This class removes temporary live views in the background thread when it's possible.
+/// There should only a single instance of this class.
+class TemporaryLiveViewCleaner
+{
+public:
+    static TemporaryLiveViewCleaner & instance() { return *the_instance; }
+
+    /// Drops a specified live view after a while if it's temporary.
+    void addView(const std::shared_ptr<StorageLiveView> & view);
+
+    /// Should be called once.
+    static void init(Context & global_context_);
+    static void shutdown();
+
+private:
+    friend std::unique_ptr<TemporaryLiveViewCleaner>::deleter_type;
+
+    TemporaryLiveViewCleaner(Context & global_context_);
+    ~TemporaryLiveViewCleaner();
+
+    void backgroundThreadFunc();
+    void stopBackgroundThread();
+
+    struct StorageAndTimeOfCheck
+    {
+        std::weak_ptr<StorageLiveView> storage;
+        std::chrono::system_clock::time_point time_of_check;
+        bool operator <(const StorageAndTimeOfCheck & other) const { return time_of_check < other.time_of_check; }
+    };
+
+    static std::unique_ptr<TemporaryLiveViewCleaner> the_instance;
+    Context & global_context;
+    std::mutex mutex;
+    std::vector<StorageAndTimeOfCheck> views;
+    ThreadFromGlobalPool background_thread;
+    std::atomic<bool> background_thread_should_exit = false;
+    std::condition_variable background_thread_wake_up;
+};
+
+}
diff --git a/src/Storages/ya.make b/src/Storages/ya.make
index 1ddb8c77072..fed961ed2bb 100644
--- a/src/Storages/ya.make
+++ b/src/Storages/ya.make
@@ -20,6 +20,7 @@ SRCS(
     IStorage.cpp
     KeyDescription.cpp
     LiveView/StorageLiveView.cpp
+    LiveView/TemporaryLiveViewCleaner.cpp
     MergeTree/ActiveDataPartSet.cpp
     MergeTree/AllMergeSelector.cpp
     MergeTree/BackgroundProcessingPool.cpp

From b8a2c1d2a29517c2bd0e8f791ce31c474f30f7d5 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 9 Sep 2020 22:45:37 +0300
Subject: [PATCH 193/298] Push pragma only for new gcc

---
 src/Storages/MergeTree/MergeTreePartition.cpp | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp
index 8ef3e458871..2802b842f54 100644
--- a/src/Storages/MergeTree/MergeTreePartition.cpp
+++ b/src/Storages/MergeTree/MergeTreePartition.cpp
@@ -29,8 +29,10 @@ String MergeTreePartition::getID(const MergeTreeData & storage) const
     return getID(storage.getInMemoryMetadataPtr()->getPartitionKey().sample_block);
 }
 
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wstringop-overflow"
+#if defined (__GNUC__) && __GNUC__ >= 10
+    #pragma GCC diagnostic push
+    #pragma GCC diagnostic ignored "-Wstringop-overflow"
+#endif
 
 /// NOTE: This ID is used to create part names which are then persisted in ZK and as directory names on the file system.
 /// So if you want to change this method, be sure to guarantee compatibility with existing table data.
@@ -90,7 +92,9 @@ String MergeTreePartition::getID(const Block & partition_key_sample) const
     return result;
 }
 
-#pragma GCC diagnostic pop
+#if defined (__GNUC__) && __GNUC__ >= 10
+    #pragma GCC diagnostic pop
+#endif
 
 void MergeTreePartition::serializeText(const MergeTreeData & storage, WriteBuffer & out, const FormatSettings & format_settings) const
 {

From 7080d56470a9bfed57dde63ef2e40c9832599c72 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Wed, 9 Sep 2020 22:59:34 +0300
Subject: [PATCH 194/298] fix missed database name when altering mv

---
 src/Storages/SelectQueryDescription.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/Storages/SelectQueryDescription.cpp b/src/Storages/SelectQueryDescription.cpp
index bb8295df6f3..0935a5be5ca 100644
--- a/src/Storages/SelectQueryDescription.cpp
+++ b/src/Storages/SelectQueryDescription.cpp
@@ -105,14 +105,14 @@ SelectQueryDescription SelectQueryDescription::getSelectQueryFromASTForMatView(c
     if (new_select.list_of_selects->children.size() != 1)
         throw Exception("UNION is not supported for MATERIALIZED VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW);
 
-    SelectQueryDescription result;
-
-    result.inner_query = new_select.list_of_selects->children.at(0)->clone();
-
-    auto & select_query = result.inner_query->as<ASTSelectQuery &>();
+    auto & new_inner_query = new_select.list_of_selects->children.at(0);
+    auto & select_query = new_inner_query->as<ASTSelectQuery &>();
     checkAllowedQueries(select_query);
+
+    SelectQueryDescription result;
     result.select_table_id = extractDependentTableFromSelectQuery(select_query, context);
-    result.select_query = select->clone();
+    result.select_query = new_select.clone();
+    result.inner_query = new_inner_query->clone();
 
     return result;
 }

From 27258c8e70213cf57e1bbf36176cda961d56e12f Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 9 Sep 2020 23:47:42 +0300
Subject: [PATCH 195/298] utils/list-licenses/list-licenses.sh: ignore more
 files

- *.rtf

  They can have NULL byte, and StorageSystemLicenses.sh will warn:

      ./StorageSystemLicenses.sh: line 11: warning: command substitution: ignored null byte in input

  Found with:

      find contrib/ -type f -and '(' -iname 'LICENSE*' -or -iname 'COPYING*' -or -iname 'COPYRIGHT*' ')' -and -not -iname '*.html' | xargs grep -Pa '\x00'

- *.h
- *.cpp
- *.htm

And after verified with:

    $ find contrib/ -type f -and '(' -iname 'LICENSE*' -or -iname 'COPYING*' -or -iname 'COPYRIGHT*' ')' -and -not '(' -iname '*.html' -or -iname '*.htm' -or -iname '*.rtf' -or -name '*.cpp' -or -name '*.h' -or -iname '*.json' ')' | xargs file -b | sort -u
    ASCII text
    ASCII text, with CR line terminators
    ASCII text, with very long lines
    empty
    UTF-8 Unicode text
---
 utils/list-licenses/list-licenses.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/list-licenses/list-licenses.sh b/utils/list-licenses/list-licenses.sh
index 987179e26a8..8eee3f97253 100755
--- a/utils/list-licenses/list-licenses.sh
+++ b/utils/list-licenses/list-licenses.sh
@@ -7,7 +7,7 @@ ls -1 -d ${LIBS_PATH}/*/ | grep -F -v -- '-cmake' | while read LIB; do
     LIB_NAME=$(basename $LIB)
 
     LIB_LICENSE=$(
-        LC_ALL=C find "$LIB" -type f -and '(' -iname 'LICENSE*' -or -iname 'COPYING*' -or -iname 'COPYRIGHT*' ')' -and -not -iname '*.html' -printf "%d\t%p\n" |
+        LC_ALL=C find "$LIB" -type f -and '(' -iname 'LICENSE*' -or -iname 'COPYING*' -or -iname 'COPYRIGHT*' ')' -and -not '(' -iname '*.html' -or -iname '*.htm' -or -iname '*.rtf' -or -name '*.cpp' -or -name '*.h' -or -iname '*.json' ')' -printf "%d\t%p\n" |
             awk '
                 BEGIN { IGNORECASE=1; min_depth = 0 }
                 /LICENSE/ { if (!min_depth || $1 <= min_depth) { min_depth = $1; license = $2 } }

From 0f4fdcbf389909ed2e642263b0d6a65a3580d8e0 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 10 Sep 2020 02:05:41 +0300
Subject: [PATCH 196/298] Pass -fsanitize-blacklist for TSAN only under clang
 (gcc does not support this)

And no such check for -fsnaitize=memory, since gcc does not support it
anyway.
---
 cmake/sanitize.cmake | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake
index 32443ed78c3..7c7e9c388a0 100644
--- a/cmake/sanitize.cmake
+++ b/cmake/sanitize.cmake
@@ -36,7 +36,15 @@ if (SANITIZE)
         endif ()
 
     elseif (SANITIZE STREQUAL "thread")
-        set (TSAN_FLAGS "-fsanitize=thread -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/tsan_suppressions.txt")
+        set (TSAN_FLAGS "-fsanitize=thread")
+        if (COMPILER_CLANG)
+            set (TSAN_FLAGS "${TSAN_FLAGS} -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/tsan_suppressions.txt")
+        else()
+            message (WARNING "TSAN suppressions was not passed to the compiler (since the compiler is not clang)")
+            message (WARNING "Use the following command to pass them manually:")
+            message (WARNING "    export TSAN_OPTIONS=\"$TSAN_OPTIONS suppressions=${CMAKE_SOURCE_DIR}/tests/tsan_suppressions.txt\"")
+        endif()
+
 
         set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${TSAN_FLAGS}")
         set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${TSAN_FLAGS}")

From 0a3f7d0fccc8b960d59b415b02313673d1bdc698 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Sat, 5 Sep 2020 19:49:47 -0700
Subject: [PATCH 197/298] AsynchronousMetricsLog - add improved tests

This adds integration tests to test the asynchronous_metric_log table
for event_time_microseconds field.

Asynchronous metrics are updated once every 60s, so this setting needs
to be overridden for having those metrics available in time so that the
tests can run. So the `asynchronous_metric_update_period_s` setting is
being overriden to be 2s.
---
 .../__init__.py                               |  0
 .../asynchronous_metrics_update_period_s.xml  |  3 ++
 .../test.py                                   | 32 +++++++++++++++++++
 .../01473_event_time_microseconds.reference   |  4 +--
 .../01473_event_time_microseconds.sql         | 12 ++++---
 5 files changed, 43 insertions(+), 8 deletions(-)
 create mode 100644 tests/integration/test_asynchronous_metric_log_table/__init__.py
 create mode 100644 tests/integration/test_asynchronous_metric_log_table/configs/asynchronous_metrics_update_period_s.xml
 create mode 100644 tests/integration/test_asynchronous_metric_log_table/test.py

diff --git a/tests/integration/test_asynchronous_metric_log_table/__init__.py b/tests/integration/test_asynchronous_metric_log_table/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_asynchronous_metric_log_table/configs/asynchronous_metrics_update_period_s.xml b/tests/integration/test_asynchronous_metric_log_table/configs/asynchronous_metrics_update_period_s.xml
new file mode 100644
index 00000000000..840c19f03a6
--- /dev/null
+++ b/tests/integration/test_asynchronous_metric_log_table/configs/asynchronous_metrics_update_period_s.xml
@@ -0,0 +1,3 @@
+<yandex>
+    <asynchronous_metrics_update_period_s>2</asynchronous_metrics_update_period_s>
+</yandex>
diff --git a/tests/integration/test_asynchronous_metric_log_table/test.py b/tests/integration/test_asynchronous_metric_log_table/test.py
new file mode 100644
index 00000000000..10face9abc6
--- /dev/null
+++ b/tests/integration/test_asynchronous_metric_log_table/test.py
@@ -0,0 +1,32 @@
+import time
+
+from helpers.cluster import ClickHouseCluster
+
+# Tests that the event_time_microseconds field in system.asynchronous_metric_log table gets populated.
+# asynchronous metrics are updated once every 60s by default. To make the test run faster, the setting
+# asynchronous_metric_update_period_s is being set to 2s so that the metrics are populated faster and
+# are available for querying during the test.
+def test_asynchronous_metric_log():
+    cluster = ClickHouseCluster(__file__)
+    node1 = cluster.add_instance('node1', with_zookeeper=True, main_configs=['configs/asynchronous_metrics_update_period_s.xml'])
+    try:
+        cluster.start()
+        node1.query("SET log_queries = 1;")
+        node1.query("CREATE DATABASE replica;")
+        query_create = '''CREATE TABLE replica.test
+        (
+           id Int64,
+           event_time DateTime
+        )
+        Engine=MergeTree()
+        PARTITION BY toYYYYMMDD(event_time)
+        ORDER BY id;'''
+        time.sleep(2)
+        node1.query(query_create)
+        node1.query('''INSERT INTO replica.test VALUES (1, now())''')
+        node1.query("SYSTEM FLUSH LOGS;")
+        node1.query("SELECT * FROM system.asynchronous_metrics LIMIT 10")
+        assert "1\n" in node1.query('''SELECT count() from replica.test FORMAT TSV''')
+        assert "ok\n" in node1.query("SELECT If((select count(event_time_microseconds)  from system.asynchronous_metric_log) > 0, 'ok', 'fail');")
+    finally:
+        cluster.shutdown()
diff --git a/tests/queries/0_stateless/01473_event_time_microseconds.reference b/tests/queries/0_stateless/01473_event_time_microseconds.reference
index cac87f32a29..8aa31f9ab6a 100644
--- a/tests/queries/0_stateless/01473_event_time_microseconds.reference
+++ b/tests/queries/0_stateless/01473_event_time_microseconds.reference
@@ -1,4 +1,2 @@
-01473_asynchronous_metric_log_event_start_time_milliseconds_test
-ok
-01473_metric_log_event_start_time_milliseconds_test
+01473_metric_log_table_event_start_time_microseconds_test
 ok
diff --git a/tests/queries/0_stateless/01473_event_time_microseconds.sql b/tests/queries/0_stateless/01473_event_time_microseconds.sql
index 6a13d6e1543..9bafd3bdbda 100644
--- a/tests/queries/0_stateless/01473_event_time_microseconds.sql
+++ b/tests/queries/0_stateless/01473_event_time_microseconds.sql
@@ -1,9 +1,11 @@
+-- This file contains tests for the event_time_microseconds field for various tables.
+-- Note: Only event_time_microseconds for asynchronous_metric_log table is tested via
+-- an integration test as those metrics take 60s by default to be updated.
+-- Refer: tests/integration/test_asynchronous_metric_log_table.
+
 set log_queries = 1;
 
-select '01473_asynchronous_metric_log_event_start_time_milliseconds_test';
-system flush logs;
-SELECT If((select count(event_time_microseconds)  from system.asynchronous_metric_log) > 0, 'ok', 'fail'); -- success
-
-select '01473_metric_log_event_start_time_milliseconds_test';
+select '01473_metric_log_table_event_start_time_microseconds_test';
 system flush logs;
+SELECT sleep(3) Format Null;
 SELECT If((select count(event_time_microseconds)  from system.metric_log) > 0, 'ok', 'fail'); -- success

From 2ad63d2b4de47ca598712f9404a26ea53ba35e70 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Wed, 9 Sep 2020 17:56:21 -0700
Subject: [PATCH 198/298] AsynchronousMetricLog - refactor integration test

---
 .../test.py                                   | 23 +++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/tests/integration/test_asynchronous_metric_log_table/test.py b/tests/integration/test_asynchronous_metric_log_table/test.py
index 10face9abc6..9910a188e2c 100644
--- a/tests/integration/test_asynchronous_metric_log_table/test.py
+++ b/tests/integration/test_asynchronous_metric_log_table/test.py
@@ -1,14 +1,28 @@
 import time
 
+import pytest
 from helpers.cluster import ClickHouseCluster
 
+cluster = ClickHouseCluster(__file__)
+node1 = cluster.add_instance('node1', with_zookeeper=True,
+                             main_configs=['configs/asynchronous_metrics_update_period_s.xml'])
+
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+
 # Tests that the event_time_microseconds field in system.asynchronous_metric_log table gets populated.
 # asynchronous metrics are updated once every 60s by default. To make the test run faster, the setting
 # asynchronous_metric_update_period_s is being set to 2s so that the metrics are populated faster and
 # are available for querying during the test.
-def test_asynchronous_metric_log():
-    cluster = ClickHouseCluster(__file__)
-    node1 = cluster.add_instance('node1', with_zookeeper=True, main_configs=['configs/asynchronous_metrics_update_period_s.xml'])
+def test_event_time_microseconds_field(started_cluster):
     try:
         cluster.start()
         node1.query("SET log_queries = 1;")
@@ -27,6 +41,7 @@ def test_asynchronous_metric_log():
         node1.query("SYSTEM FLUSH LOGS;")
         node1.query("SELECT * FROM system.asynchronous_metrics LIMIT 10")
         assert "1\n" in node1.query('''SELECT count() from replica.test FORMAT TSV''')
-        assert "ok\n" in node1.query("SELECT If((select count(event_time_microseconds)  from system.asynchronous_metric_log) > 0, 'ok', 'fail');")
+        assert "ok\n" in node1.query(
+            "SELECT If((select count(event_time_microseconds)  from system.asynchronous_metric_log) > 0, 'ok', 'fail');")
     finally:
         cluster.shutdown()

From 31b0bd481c687160e2217a1443b244fad2733a04 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Thu, 10 Sep 2020 00:17:20 -0700
Subject: [PATCH 199/298] Integration tests - more accurate tests

---
 .../test.py                                   | 20 +++++++++++++++----
 .../01473_event_time_microseconds.sql         | 16 ++++++++++++++-
 2 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/tests/integration/test_asynchronous_metric_log_table/test.py b/tests/integration/test_asynchronous_metric_log_table/test.py
index 9910a188e2c..0eb72c20376 100644
--- a/tests/integration/test_asynchronous_metric_log_table/test.py
+++ b/tests/integration/test_asynchronous_metric_log_table/test.py
@@ -39,9 +39,21 @@ def test_event_time_microseconds_field(started_cluster):
         node1.query(query_create)
         node1.query('''INSERT INTO replica.test VALUES (1, now())''')
         node1.query("SYSTEM FLUSH LOGS;")
-        node1.query("SELECT * FROM system.asynchronous_metrics LIMIT 10")
-        assert "1\n" in node1.query('''SELECT count() from replica.test FORMAT TSV''')
-        assert "ok\n" in node1.query(
-            "SELECT If((select count(event_time_microseconds)  from system.asynchronous_metric_log) > 0, 'ok', 'fail');")
+        #query assumes that the event_time field is accurate
+        equals_query = '''WITH (
+                            (
+                                SELECT event_time_microseconds
+                                FROM system.asynchronous_metric_log
+                                ORDER BY event_time DESC
+                                LIMIT 1
+                            ) AS time_with_microseconds,
+                            (
+                                SELECT event_time
+                                FROM system.asynchronous_metric_log
+                                ORDER BY event_time DESC
+                                LIMIT 1
+                            ) AS time)
+                        SELECT if(dateDiff('second', toDateTime(time_with_microseconds), toDateTime(time)) = 0, 'ok', 'fail')'''
+        assert "ok\n" in node1.query(equals_query)
     finally:
         cluster.shutdown()
diff --git a/tests/queries/0_stateless/01473_event_time_microseconds.sql b/tests/queries/0_stateless/01473_event_time_microseconds.sql
index 9bafd3bdbda..dd79a735d8f 100644
--- a/tests/queries/0_stateless/01473_event_time_microseconds.sql
+++ b/tests/queries/0_stateless/01473_event_time_microseconds.sql
@@ -8,4 +8,18 @@ set log_queries = 1;
 select '01473_metric_log_table_event_start_time_microseconds_test';
 system flush logs;
 SELECT sleep(3) Format Null;
-SELECT If((select count(event_time_microseconds)  from system.metric_log) > 0, 'ok', 'fail'); -- success
+-- query assumes that the event_time field is accurate.
+WITH (
+    (
+        SELECT event_time_microseconds
+        FROM system.metric_log
+        ORDER BY event_time DESC
+        LIMIT 1
+    ) AS time_with_microseconds,
+    (
+        SELECT event_time
+        FROM system.metric_log
+        ORDER BY event_time DESC
+        LIMIT 1
+    ) AS time)
+SELECT if(dateDiff('second', toDateTime(time_with_microseconds), toDateTime(time)) = 0, 'ok', 'fail')

From 7fb45461d174c3ae122c9d11ba5a4414fe0d3fd6 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 10 Sep 2020 10:36:29 +0300
Subject: [PATCH 200/298] Try fix totals port for IAccumulatingTransform.

---
 src/Processors/IAccumulatingTransform.cpp | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/src/Processors/IAccumulatingTransform.cpp b/src/Processors/IAccumulatingTransform.cpp
index 2905d185df2..64bdbe2410f 100644
--- a/src/Processors/IAccumulatingTransform.cpp
+++ b/src/Processors/IAccumulatingTransform.cpp
@@ -49,13 +49,15 @@ IAccumulatingTransform::Status IAccumulatingTransform::prepare()
         return Status::Finished;
     }
 
-    /// Close input if flag was set manually.
+    if (input.isFinished())
+        finished_input = true;
+
     if (finished_input)
+    {
+        /// Close input if flag was set manually.
         input.close();
 
-    /// Read from totals port if has it.
-    if (input.isFinished())
-    {
+        /// Read from totals port if has it.
         if (inputs.size() > 1)
         {
             auto & totals_input = inputs.back();
@@ -69,12 +71,8 @@ IAccumulatingTransform::Status IAccumulatingTransform::prepare()
                 totals_input.close();
             }
         }
-    }
 
-    /// Generate output block.
-    if (input.isFinished())
-    {
-        finished_input = true;
+        /// Generate output block.
         return Status::Ready;
     }
 

From a64473313971bbd3d461d5c7b68165b39d4515e0 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 10 Sep 2020 12:05:57 +0300
Subject: [PATCH 201/298] Attempt to make performance test more reliable

---
 programs/server/Server.cpp     |   8 +-
 src/Common/remapExecutable.cpp | 213 +++++++++++++++++++++++++++++++++
 src/Common/remapExecutable.h   |   7 ++
 3 files changed, 227 insertions(+), 1 deletion(-)
 create mode 100644 src/Common/remapExecutable.cpp
 create mode 100644 src/Common/remapExecutable.h

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index f24ba444203..8149623ffce 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -32,6 +32,7 @@
 #include <Common/getExecutablePath.h>
 #include <Common/ThreadProfileEvents.h>
 #include <Common/ThreadStatus.h>
+#include <Common/remapExecutable.h>
 #include <IO/HTTPCommon.h>
 #include <IO/UseSSL.h>
 #include <Interpreters/AsynchronousMetrics.h>
@@ -307,6 +308,11 @@ int Server::main(const std::vector<std::string> & /*args*/)
     {
         if (config().getBool("mlock_executable", false))
         {
+            LOG_DEBUG(log, "Will remap executable in memory.");
+            remapExecutable();
+            LOG_DEBUG(log, "The code in memory has been successfully remapped.");
+
+/*
             if (hasLinuxCapability(CAP_IPC_LOCK))
             {
                 LOG_TRACE(log, "Will mlockall to prevent executable memory from being paged out. It may take a few seconds.");
@@ -321,7 +327,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
                     " It could happen due to incorrect ClickHouse package installation."
                     " You could resolve the problem manually with 'sudo setcap cap_ipc_lock=+ep {}'."
                     " Note that it will not work on 'nosuid' mounted filesystems.", executable_path);
-            }
+            }*/
         }
     }
 #endif
diff --git a/src/Common/remapExecutable.cpp b/src/Common/remapExecutable.cpp
new file mode 100644
index 00000000000..f7f353a83c6
--- /dev/null
+++ b/src/Common/remapExecutable.cpp
@@ -0,0 +1,213 @@
+#if defined(__linux__) && defined(__amd64__) && defined(__SSE2__)
+
+#include <sys/mman.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#include <emmintrin.h>
+
+#include <utility>
+
+#include <Common/StringUtils/StringUtils.h>
+#include <Common/hex.h>
+#include <Common/Exception.h>
+#include <IO/ReadBufferFromFile.h>
+#include <IO/ReadHelpers.h>
+
+#include "remapExecutable.h"
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+    extern const int CANNOT_ALLOCATE_MEMORY;
+}
+
+
+namespace
+{
+
+uintptr_t readAddressHex(DB::ReadBuffer & in)
+{
+    uintptr_t res = 0;
+    while (!in.eof())
+    {
+        if (isHexDigit(*in.position()))
+        {
+            res *= 16;
+            res += unhex(*in.position());
+            ++in.position();
+        }
+        else
+            break;
+    }
+    return res;
+}
+
+
+/** Find the address and size of the mapped memory region pointed by ptr.
+  */
+std::pair<void *, size_t> getMappedArea(void * ptr)
+{
+    using namespace DB;
+
+    uintptr_t uintptr = reinterpret_cast<uintptr_t>(ptr);
+    ReadBufferFromFile in("/proc/self/maps");
+
+    while (!in.eof())
+    {
+        uintptr_t begin = readAddressHex(in);
+        assertChar('-', in);
+        uintptr_t end = readAddressHex(in);
+        skipToNextLineOrEOF(in);
+
+        if (begin <= uintptr && uintptr < end)
+            return {reinterpret_cast<void *>(begin), end - begin};
+    }
+
+    throw Exception("Cannot find mapped area for pointer", ErrorCodes::LOGICAL_ERROR);
+}
+
+
+__attribute__((__noinline__)) int64_t our_syscall(...)
+{
+     __asm__ __volatile__ (R"(
+        movq %%rdi,%%rax;
+        movq %%rsi,%%rdi;
+        movq %%rdx,%%rsi;
+        movq %%rcx,%%rdx;
+        movq %%r8,%%r10;
+        movq %%r9,%%r8;
+        movq 8(%%rsp),%%r9;
+        syscall;
+        ret
+    )" : : : "memory");
+    return 0;
+}
+
+
+__attribute__((__always_inline__)) void our_memcpy(char * __restrict dst, const char * __restrict src, ssize_t n)
+{
+    while (n > 0)
+    {
+        _mm_storeu_si128(reinterpret_cast<__m128i *>(dst),
+            _mm_loadu_si128(reinterpret_cast<const __m128i *>(src)));
+
+        dst += 16;
+        src += 16;
+        n -= 16;
+    }
+}
+
+
+__attribute__((__noinline__)) void remapToHugeStep3(void * scratch, size_t size, size_t offset)
+{
+    /// The function should not use the stack, otherwise various optimizations, including "omit-frame-pointer" may break the code.
+
+    /// Unmap the scratch area.
+    our_syscall(SYS_munmap, scratch, size);
+
+    /** The return address of this function is pointing to scratch area (because it was called from there).
+      * But the scratch area no longer exists. We should correct the return address by subtracting the offset.
+      */
+    __asm__ __volatile__("subq %0, 8(%%rsp)" : : "r"(offset) : "memory");
+}
+
+
+__attribute__((__noinline__)) void remapToHugeStep2(void * begin, size_t size, void * scratch)
+{
+    /** Unmap old memory region with the code of our program.
+      * Our instruction pointer is located inside scratch area and this function can execute after old code is unmapped.
+      * But it cannot call any other functions because they are not available at usual addresses
+      * - that's why we have to use "our_syscall" and "our_memcpy" functions.
+      * (Relative addressing may continue to work but we should not assume that).
+      */
+
+    int64_t offset = reinterpret_cast<intptr_t>(scratch) - reinterpret_cast<intptr_t>(begin);
+    int64_t (*syscall_func)(...) = reinterpret_cast<int64_t (*)(...)>(reinterpret_cast<intptr_t>(our_syscall) + offset);
+
+    //char dot = '.';
+    //syscall_func(SYS_write, 2, &dot, 1);
+
+    int64_t munmap_res = syscall_func(SYS_munmap, begin, size);
+    if (munmap_res != 0)
+        return;
+
+    //syscall_func(SYS_write, 2, &dot, 1);
+
+    /// Map new anonymous memory region in place of old region with code.
+
+    int64_t mmap_res = syscall_func(SYS_mmap, begin, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+    if (-1 == mmap_res)
+        syscall_func(SYS_exit, 1);
+    //syscall_func(SYS_write, 2, &dot, 1);
+
+    /// As the memory region is anonymous, we can do madvise with MADV_HUGEPAGE.
+
+    syscall_func(SYS_madvise, begin, size, MADV_HUGEPAGE);
+    //syscall_func(SYS_write, 2, &dot, 1);
+
+    /// Copy the code from scratch area to the old memory location.
+
+    our_memcpy(reinterpret_cast<char *>(begin), reinterpret_cast<const char *>(scratch), size);
+    //syscall_func(SYS_write, 2, &dot, 1);
+
+    /// Make the memory area with the code executable and non-writable.
+
+    syscall_func(SYS_mprotect, begin, size, PROT_READ | PROT_EXEC);
+    //syscall_func(SYS_write, 2, &dot, 1);
+
+    /** Step 3 function should unmap the scratch area.
+      * The currently executed code is located in the scratch area and cannot be removed here.
+      * We have to call another function and use its address from the original location (not in scratch area).
+      * To do it, we obtain it's pointer and call by pointer.
+      */
+
+    void(* volatile step3)(void*, size_t, size_t) = remapToHugeStep3;
+    step3(scratch, size, offset);
+}
+
+
+__attribute__((__noinline__)) void remapToHugeStep1(void * begin, size_t size)
+{
+    /// Allocate scratch area and copy the code there.
+
+    void * scratch = mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+    if (MAP_FAILED == scratch)
+        throwFromErrno(fmt::format("Cannot mmap {} bytes", size), ErrorCodes::CANNOT_ALLOCATE_MEMORY);
+
+    memcpy(scratch, begin, size);
+
+    /// Offset to the scratch area from previous location.
+
+    int64_t offset = reinterpret_cast<intptr_t>(scratch) - reinterpret_cast<intptr_t>(begin);
+
+    /// Jump to the next function inside the scratch area.
+
+    reinterpret_cast<void(*)(void*, size_t, void*)>(reinterpret_cast<intptr_t>(remapToHugeStep2) + offset)(begin, size, scratch);
+}
+
+}
+
+
+void remapExecutable()
+{
+    auto [begin, size] = getMappedArea(reinterpret_cast<void *>(remapExecutable));
+    remapToHugeStep1(begin, size);
+}
+
+}
+
+#else
+
+namespace DB
+{
+
+void remapExecutable() {}
+
+}
+
+#endif
diff --git a/src/Common/remapExecutable.h b/src/Common/remapExecutable.h
new file mode 100644
index 00000000000..7acb61f13bd
--- /dev/null
+++ b/src/Common/remapExecutable.h
@@ -0,0 +1,7 @@
+namespace DB
+{
+
+/// This function tries to reallocate the code of the running program in a more efficient way.
+void remapExecutable();
+
+}

From 532d121100fc696fa2edb2d3dff863907850c218 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 10 Sep 2020 12:14:31 +0300
Subject: [PATCH 202/298] Fix typo

---
 src/Common/remapExecutable.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/remapExecutable.cpp b/src/Common/remapExecutable.cpp
index f7f353a83c6..ec8b1703b0a 100644
--- a/src/Common/remapExecutable.cpp
+++ b/src/Common/remapExecutable.cpp
@@ -163,7 +163,7 @@ __attribute__((__noinline__)) void remapToHugeStep2(void * begin, size_t size, v
     /** Step 3 function should unmap the scratch area.
       * The currently executed code is located in the scratch area and cannot be removed here.
       * We have to call another function and use its address from the original location (not in scratch area).
-      * To do it, we obtain it's pointer and call by pointer.
+      * To do it, we obtain its pointer and call by pointer.
       */
 
     void(* volatile step3)(void*, size_t, size_t) = remapToHugeStep3;

From 20b3823770f60feefcd73306141bc25875d87f9a Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Thu, 10 Sep 2020 13:21:13 +0300
Subject: [PATCH 203/298] do not remove directory on attach

---
 src/Storages/StorageReplicatedMergeTree.cpp         | 10 +++++++---
 .../test_cleanup_dir_after_bad_zk_conn/test.py      | 13 ++++++++++++-
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 00dcc7aeb08..908112c17ff 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -234,10 +234,11 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree(
         {
             current_zookeeper = global_context.getZooKeeper();
         }
-        catch (Poco::Exception & e)
+        catch (...)
         {
-            dropIfEmpty();
-            throw e;
+            if (!attach)
+                dropIfEmpty();
+            throw;
         }
     }
 
@@ -262,7 +263,10 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree(
     if (!current_zookeeper)
     {
         if (!attach)
+        {
+            dropIfEmpty();
             throw Exception("Can't create replicated table without ZooKeeper", ErrorCodes::NO_ZOOKEEPER);
+        }
 
         /// Do not activate the replica. It will be readonly.
         LOG_ERROR(log, "No ZooKeeper: table will be in readonly mode.");
diff --git a/tests/integration/test_cleanup_dir_after_bad_zk_conn/test.py b/tests/integration/test_cleanup_dir_after_bad_zk_conn/test.py
index 4cb243160a0..ab2db469157 100644
--- a/tests/integration/test_cleanup_dir_after_bad_zk_conn/test.py
+++ b/tests/integration/test_cleanup_dir_after_bad_zk_conn/test.py
@@ -54,9 +54,20 @@ def test_cleanup_dir_after_wrong_replica_name(start_cluster):
     assert "already exists" in error
     node1.query("CREATE TABLE test_r2 (n UInt64) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test2/', 'r2') ORDER BY n")
 
-
 def test_cleanup_dir_after_wrong_zk_path(start_cluster):
     node1.query("CREATE TABLE test3_r1 (n UInt64) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test3/', 'r1') ORDER BY n")
     error = node1.query_and_get_error("CREATE TABLE test3_r2 (n UInt64) ENGINE=ReplicatedMergeTree('/clickhouse/tables/', 'r2') ORDER BY n")
     assert "Cannot create" in error
     node1.query("CREATE TABLE test3_r2 (n UInt64) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test3/', 'r2') ORDER BY n")
+
+def test_attach_without_zk(start_cluster):
+    node1.query("CREATE TABLE test4_r1 (n UInt64) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test4/', 'r1') ORDER BY n")
+    node1.query("DETACH TABLE test4_r1")
+    with PartitionManager() as pm:
+        pm._add_rule({'probability': 0.5, 'source': node1.ip_address, 'destination_port': 2181, 'action': 'DROP'})
+        try:
+            node1.query("ATTACH TABLE test4_r1")
+        except:
+            pass
+    node1.query("ATTACH TABLE IF NOT EXISTS test4_r1")
+    node1.query("SELECT * FROM test4_r1")

From c309f55c207d5e864f1b05200e2a706627163bac Mon Sep 17 00:00:00 2001
From: myrrc <me@myrrec.space>
Date: Thu, 10 Sep 2020 14:02:52 +0300
Subject: [PATCH 204/298] updated setting and added default value

---
 src/Functions/CMakeLists.txt             | 5 ++++-
 src/Functions/GatherUtils/CMakeLists.txt | 5 +++--
 src/Functions/URL/CMakeLists.txt         | 5 +++--
 src/Functions/array/CMakeLists.txt       | 5 +++--
 4 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt
index 08eefec84d4..b19266726ae 100644
--- a/src/Functions/CMakeLists.txt
+++ b/src/Functions/CMakeLists.txt
@@ -57,7 +57,10 @@ option(STRIP_DEBUG_SYMBOLS_FUNCTIONS
     "Do not generate debugger info for ClickHouse functions.
     Provides faster linking and lower binary size.
     Tradeoff is the inability to debug some source files with e.g. gdb
-    (empty stack frames and no local variables)." OFF)
+    (empty stack frames and no local variables)."
+    CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE"
+    OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO"
+    OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL")
 
 if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
     message(WARNING "Not generating debugger info for ClickHouse functions")
diff --git a/src/Functions/GatherUtils/CMakeLists.txt b/src/Functions/GatherUtils/CMakeLists.txt
index 921b06fb1c2..3f7f08621a1 100644
--- a/src/Functions/GatherUtils/CMakeLists.txt
+++ b/src/Functions/GatherUtils/CMakeLists.txt
@@ -3,5 +3,6 @@ add_headers_and_sources(clickhouse_functions_gatherutils .)
 add_library(clickhouse_functions_gatherutils ${clickhouse_functions_gatherutils_sources} ${clickhouse_functions_gatherutils_headers})
 target_link_libraries(clickhouse_functions_gatherutils PRIVATE dbms)
 
-# Won't generate debug info for files with heavy template instantiation to achieve faster linking and lower size.
-target_compile_options(clickhouse_functions_gatherutils PRIVATE "-g0")
+if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
+    target_compile_options(clickhouse_functions_gatherutils PRIVATE "-g0")
+endif()
diff --git a/src/Functions/URL/CMakeLists.txt b/src/Functions/URL/CMakeLists.txt
index 72fdc3174fc..0cf02fd524f 100644
--- a/src/Functions/URL/CMakeLists.txt
+++ b/src/Functions/URL/CMakeLists.txt
@@ -3,8 +3,9 @@ add_headers_and_sources(clickhouse_functions_url .)
 add_library(clickhouse_functions_url ${clickhouse_functions_url_sources} ${clickhouse_functions_url_headers})
 target_link_libraries(clickhouse_functions_url PRIVATE dbms)
 
-# Won't generate debug info for files with heavy template instantiation to achieve faster linking and lower size.
-target_compile_options(clickhouse_functions_url PRIVATE "-g0")
+if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
+    target_compile_options(clickhouse_functions_url PRIVATE "-g0")
+endif()
 
 # TODO: move Functions/Regexps.h to some lib and use here
 target_link_libraries(clickhouse_functions_url PRIVATE hyperscan)
diff --git a/src/Functions/array/CMakeLists.txt b/src/Functions/array/CMakeLists.txt
index 32ef0216caa..1457e671261 100644
--- a/src/Functions/array/CMakeLists.txt
+++ b/src/Functions/array/CMakeLists.txt
@@ -3,5 +3,6 @@ add_headers_and_sources(clickhouse_functions_array .)
 add_library(clickhouse_functions_array ${clickhouse_functions_array_sources} ${clickhouse_functions_array_headers})
 target_link_libraries(clickhouse_functions_array PRIVATE dbms clickhouse_functions_gatherutils)
 
-# Won't generate debug info for files with heavy template instantiation to achieve faster linking and lower size.
-target_compile_options(clickhouse_functions_array PRIVATE "-g0")
+if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
+    target_compile_options(clickhouse_functions_array PRIVATE "-g0")
+endif()

From 598c81ed0ac1e127136b54d167e9209e252818be Mon Sep 17 00:00:00 2001
From: myrrc <me@myrrec.space>
Date: Thu, 10 Sep 2020 14:07:08 +0300
Subject: [PATCH 205/298] fix: default value

---
 src/Functions/CMakeLists.txt | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt
index b19266726ae..4f6952aad8f 100644
--- a/src/Functions/CMakeLists.txt
+++ b/src/Functions/CMakeLists.txt
@@ -53,14 +53,21 @@ endif()
 
 target_include_directories(clickhouse_functions SYSTEM PRIVATE ${SPARSEHASH_INCLUDE_DIR})
 
+if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE"
+    OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO"
+    OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL")
+    set (STRIP_DSF_DEFAULT ON)
+else()
+    set (STRIP_DSF_DEFAULT OFF)
+endif()
+
+
 option(STRIP_DEBUG_SYMBOLS_FUNCTIONS
     "Do not generate debugger info for ClickHouse functions.
     Provides faster linking and lower binary size.
     Tradeoff is the inability to debug some source files with e.g. gdb
     (empty stack frames and no local variables)."
-    CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE"
-    OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO"
-    OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL")
+    STRIP_DSF_DEFAULT)
 
 if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
     message(WARNING "Not generating debugger info for ClickHouse functions")

From 00b8d91eb859a2642a0583107498d4642c7d4348 Mon Sep 17 00:00:00 2001
From: BohuTANG <overred.shuttler@gmail.com>
Date: Thu, 10 Sep 2020 19:46:59 +0800
Subject: [PATCH 206/298] Fix mysql replication TableMapEvent optional metadata
 ignore

---
 src/Core/MySQL/IMySQLReadPacket.cpp           | 29 ++++++++++---------
 src/Core/MySQL/MySQLReplication.cpp           |  6 +++-
 src/Core/tests/mysql_protocol.cpp             |  1 +
 .../MySQL/MaterializeMySQLSyncThread.cpp      |  2 ++
 4 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/src/Core/MySQL/IMySQLReadPacket.cpp b/src/Core/MySQL/IMySQLReadPacket.cpp
index 8fc8855c8a4..5f6bbc7bceb 100644
--- a/src/Core/MySQL/IMySQLReadPacket.cpp
+++ b/src/Core/MySQL/IMySQLReadPacket.cpp
@@ -50,21 +50,22 @@ uint64_t readLengthEncodedNumber(ReadBuffer & buffer)
     uint64_t buf = 0;
     buffer.readStrict(c);
     auto cc = static_cast<uint8_t>(c);
-    if (cc < 0xfc)
+    switch (cc)
     {
-        return cc;
-    }
-    else if (cc < 0xfd)
-    {
-        buffer.readStrict(reinterpret_cast<char *>(&buf), 2);
-    }
-    else if (cc < 0xfe)
-    {
-        buffer.readStrict(reinterpret_cast<char *>(&buf), 3);
-    }
-    else
-    {
-        buffer.readStrict(reinterpret_cast<char *>(&buf), 8);
+        /// NULL
+        case 0xfb:
+            break;
+        case 0xfc:
+            buffer.readStrict(reinterpret_cast<char *>(&buf), 2);
+            break;
+        case 0xfd:
+            buffer.readStrict(reinterpret_cast<char *>(&buf), 3);
+            break;
+        case 0xfe:
+            buffer.readStrict(reinterpret_cast<char *>(&buf), 8);
+            break;
+        default:
+            return cc;
     }
     return buf;
 }
diff --git a/src/Core/MySQL/MySQLReplication.cpp b/src/Core/MySQL/MySQLReplication.cpp
index 42d077260f8..808e9d1a247 100644
--- a/src/Core/MySQL/MySQLReplication.cpp
+++ b/src/Core/MySQL/MySQLReplication.cpp
@@ -171,7 +171,9 @@ namespace MySQLReplication
 
         /// Ignore MySQL 8.0 optional metadata fields.
         /// https://mysqlhighavailability.com/more-metadata-is-written-into-binary-log/
-        payload.ignore(payload.available() - CHECKSUM_CRC32_SIGNATURE_LENGTH);
+        auto remain = payload.available();
+        if (remain > CHECKSUM_CRC32_SIGNATURE_LENGTH)
+           payload.ignore(remain);
     }
 
     /// Types that do not used in the binlog event:
@@ -221,6 +223,7 @@ namespace MySQLReplication
                 }
                 case MYSQL_TYPE_NEWDECIMAL:
                 case MYSQL_TYPE_STRING: {
+                    /// Big-Endian
                     auto b0 = UInt16(meta[pos] << 8);
                     auto b1 = UInt8(meta[pos + 1]);
                     column_meta.emplace_back(UInt16(b0 + b1));
@@ -231,6 +234,7 @@ namespace MySQLReplication
                 case MYSQL_TYPE_BIT:
                 case MYSQL_TYPE_VARCHAR:
                 case MYSQL_TYPE_VAR_STRING: {
+                    /// Little-Endian
                     auto b0 = UInt8(meta[pos]);
                     auto b1 = UInt16(meta[pos + 1] << 8);
                     column_meta.emplace_back(UInt16(b0 + b1));
diff --git a/src/Core/tests/mysql_protocol.cpp b/src/Core/tests/mysql_protocol.cpp
index acae8603c40..6cad095fc85 100644
--- a/src/Core/tests/mysql_protocol.cpp
+++ b/src/Core/tests/mysql_protocol.cpp
@@ -283,6 +283,7 @@ int main(int argc, char ** argv)
     }
 
     {
+        /// mysql_protocol --host=172.17.0.3 --user=root --password=123 --db=sbtest
         try
         {
             boost::program_options::options_description desc("Allowed options");
diff --git a/src/Databases/MySQL/MaterializeMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializeMySQLSyncThread.cpp
index 851ea351876..465a7cb912a 100644
--- a/src/Databases/MySQL/MaterializeMySQLSyncThread.cpp
+++ b/src/Databases/MySQL/MaterializeMySQLSyncThread.cpp
@@ -195,6 +195,7 @@ void MaterializeMySQLSyncThread::synchronization(const String & mysql_version)
     }
     catch (...)
     {
+        client.disconnect();
         tryLogCurrentException(log);
         getDatabase(database_name).setException(std::current_exception());
     }
@@ -206,6 +207,7 @@ void MaterializeMySQLSyncThread::stopSynchronization()
     {
         sync_quit = true;
         background_thread_pool->join();
+        client.disconnect();
     }
 }
 

From e0081980230e636545a43a67238dc25b44f7777c Mon Sep 17 00:00:00 2001
From: myrrc <me@myrrec.space>
Date: Thu, 10 Sep 2020 15:44:49 +0300
Subject: [PATCH 207/298] fix: default value

---
 src/Functions/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt
index 4f6952aad8f..0a99a034a33 100644
--- a/src/Functions/CMakeLists.txt
+++ b/src/Functions/CMakeLists.txt
@@ -67,7 +67,7 @@ option(STRIP_DEBUG_SYMBOLS_FUNCTIONS
     Provides faster linking and lower binary size.
     Tradeoff is the inability to debug some source files with e.g. gdb
     (empty stack frames and no local variables)."
-    STRIP_DSF_DEFAULT)
+    ${STRIP_DSF_DEFAULT})
 
 if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
     message(WARNING "Not generating debugger info for ClickHouse functions")

From 65e9c678f8990756ac22ad2ad10bc08a9c0ec4b4 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 10 Sep 2020 17:47:02 +0300
Subject: [PATCH 208/298] Disable under certain conditions

---
 src/Common/remapExecutable.cpp | 2 +-
 src/Common/ya.make             | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Common/remapExecutable.cpp b/src/Common/remapExecutable.cpp
index ec8b1703b0a..b41fece0c79 100644
--- a/src/Common/remapExecutable.cpp
+++ b/src/Common/remapExecutable.cpp
@@ -1,4 +1,4 @@
-#if defined(__linux__) && defined(__amd64__) && defined(__SSE2__)
+#if defined(__linux__) && defined(__amd64__) && defined(__SSE2__) && !defined(SANITIZER) && defined(NDEBUG)
 
 #include <sys/mman.h>
 #include <unistd.h>
diff --git a/src/Common/ya.make b/src/Common/ya.make
index d9a7a2ce4de..72f1fa42756 100644
--- a/src/Common/ya.make
+++ b/src/Common/ya.make
@@ -74,6 +74,7 @@ SRCS(
     QueryProfiler.cpp
     quoteString.cpp
     randomSeed.cpp
+    remapExecutable.cpp
     RemoteHostFilter.cpp
     renameat2.cpp
     RWLock.cpp

From f19f02028b7dca3844127a550c8703a8f40fc383 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Thu, 10 Sep 2020 17:50:10 +0300
Subject: [PATCH 209/298] fix misleading exception message about uuid macro

---
 src/Common/Macros.cpp                               | 8 +++++++-
 src/Storages/MergeTree/registerStorageMergeTree.cpp | 9 +++++----
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/src/Common/Macros.cpp b/src/Common/Macros.cpp
index 7b5a896015b..a4981fa5be3 100644
--- a/src/Common/Macros.cpp
+++ b/src/Common/Macros.cpp
@@ -68,8 +68,14 @@ String Macros::expand(const String & s,
             res += database_name;
         else if (macro_name == "table" && !table_name.empty())
             res += table_name;
-        else if (macro_name == "uuid" && uuid != UUIDHelpers::Nil)
+        else if (macro_name == "uuid")
+        {
+            if (uuid == UUIDHelpers::Nil)
+                throw Exception("Macro 'uuid' and empty arguments of ReplicatedMergeTree "
+                                "are supported only for ON CLUSTER queries with Atomic database engine",
+                                ErrorCodes::SYNTAX_ERROR);
             res += toString(uuid);
+        }
         else
             throw Exception("No macro '" + macro_name +
                 "' in config while processing substitutions in '" + s + "' at '"
diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp
index b0c422bd79f..d3af3942428 100644
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@@ -395,9 +395,10 @@ static StoragePtr create(const StorageFactory::Arguments & args)
 
     if (replicated)
     {
-        bool has_arguments = arg_num + 2 <= arg_cnt && engine_args[arg_num]->as<ASTLiteral>() && engine_args[arg_num + 1]->as<ASTLiteral>();
+        bool has_arguments = arg_num + 2 <= arg_cnt;
+        bool has_valid_arguments = has_arguments && engine_args[arg_num]->as<ASTLiteral>() && engine_args[arg_num + 1]->as<ASTLiteral>();
 
-        if (has_arguments)
+        if (has_valid_arguments)
         {
             const auto * ast = engine_args[arg_num]->as<ASTLiteral>();
             if (ast && ast->value.getType() == Field::Types::String)
@@ -420,7 +421,7 @@ static StoragePtr create(const StorageFactory::Arguments & args)
                     "No replica name in config" + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::NO_REPLICA_NAME_GIVEN);
             ++arg_num;
         }
-        else if (is_extended_storage_def)
+        else if (is_extended_storage_def && !has_arguments)
         {
             /// Try use default values if arguments are not specified.
             /// It works for ON CLUSTER queries when database engine is Atomic and there are {shard} and {replica} in config.
@@ -428,7 +429,7 @@ static StoragePtr create(const StorageFactory::Arguments & args)
             replica_name = "{replica}"; /// TODO maybe use hostname if {replica} is not defined?
         }
         else
-            throw Exception("Expected zookeper_path and replica_name arguments", ErrorCodes::BAD_ARGUMENTS);
+            throw Exception("Expected two string literal arguments: zookeper_path and replica_name", ErrorCodes::BAD_ARGUMENTS);
 
         /// Allow implicit {uuid} macros only for zookeeper_path in ON CLUSTER queries
         bool is_on_cluster = args.local_context.getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY;

From ca2a33008b291bc5d1507b568ac31d588a6aa3d8 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 2 Sep 2020 19:42:24 +0300
Subject: [PATCH 210/298] faster

---
 docker/test/performance-comparison/eqmed.sql | 4 ++--
 docker/test/performance-comparison/perf.py   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docker/test/performance-comparison/eqmed.sql b/docker/test/performance-comparison/eqmed.sql
index f7f8d6ac40d..139f0758798 100644
--- a/docker/test/performance-comparison/eqmed.sql
+++ b/docker/test/performance-comparison/eqmed.sql
@@ -8,7 +8,7 @@ select
 from
    (
       -- quantiles of randomization distributions
-      select quantileExactForEach(0.999)(
+      select quantileExactForEach(0.99)(
         arrayMap(x, y -> abs(x - y), metrics_by_label[1], metrics_by_label[2]) as d
       ) threshold
       ---- uncomment to see what the distribution is really like
@@ -33,7 +33,7 @@ from
                                 -- strip the query away before the join -- it might be several kB long;
                                 (select metrics, run, version from table) no_query,
                                 -- duplicate input measurements into many virtual runs
-                                numbers(1, 100000) nn
+                                numbers(1, 10000) nn
                               -- for each virtual run, randomly reorder measurements
                               order by virtual_run, rand()
                            ) virtual_runs
diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py
index e1476d9aeb4..05e89c9e44c 100755
--- a/docker/test/performance-comparison/perf.py
+++ b/docker/test/performance-comparison/perf.py
@@ -20,7 +20,7 @@ parser = argparse.ArgumentParser(description='Run performance test.')
 parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs=1, help='test description file')
 parser.add_argument('--host', nargs='*', default=['localhost'], help="Server hostname(s). Corresponds to '--port' options.")
 parser.add_argument('--port', nargs='*', default=[9000], help="Server port(s). Corresponds to '--host' options.")
-parser.add_argument('--runs', type=int, default=int(os.environ.get('CHPC_RUNS', 13)), help='Number of query runs per server. Defaults to CHPC_RUNS environment variable.')
+parser.add_argument('--runs', type=int, default=int(os.environ.get('CHPC_RUNS', 7)), help='Number of query runs per server. Defaults to CHPC_RUNS environment variable.')
 parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.')
 parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.')
 parser.add_argument('--print-settings', action='store_true', help='Print test settings and exit.')

From 26348ad0143f881c8d14e41e0c80d706614ab110 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 10 Sep 2020 18:48:39 +0300
Subject: [PATCH 211/298] fixup

---
 docker/test/performance-comparison/report.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py
index 1003a6d0e1a..b3f8ef01138 100755
--- a/docker/test/performance-comparison/report.py
+++ b/docker/test/performance-comparison/report.py
@@ -372,7 +372,7 @@ if args.report == 'main':
             'New,&nbsp;s',                                          # 1
             'Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)',                 # 2
             'Relative difference (new&nbsp;&minus;&nbsp;old) / old',   # 3
-            'p&nbsp;<&nbsp;0.001 threshold',                   # 4
+            'p&nbsp;<&nbsp;0.01 threshold',                   # 4
             # Failed                                           # 5
             'Test',                                            # 6
             '#',                                               # 7
@@ -416,7 +416,7 @@ if args.report == 'main':
             'Old,&nbsp;s', #0
             'New,&nbsp;s', #1
             'Relative difference (new&nbsp;-&nbsp;old)/old', #2
-            'p&nbsp;&lt;&nbsp;0.001 threshold', #3
+            'p&nbsp;&lt;&nbsp;0.01 threshold', #3
             # Failed #4
             'Test', #5
             '#',    #6
@@ -649,7 +649,7 @@ elif args.report == 'all-queries':
             'New,&nbsp;s', #3
             'Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)',                 #4
             'Relative difference (new&nbsp;&minus;&nbsp;old) / old', #5
-            'p&nbsp;&lt;&nbsp;0.001 threshold',          #6
+            'p&nbsp;&lt;&nbsp;0.01 threshold',          #6
             'Test',                                   #7
             '#',                                      #8
             'Query',                                  #9

From 397ebdb855c75be3df9f6a007a4e2cf3d0321bb5 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Thu, 10 Sep 2020 08:55:39 -0700
Subject: [PATCH 212/298] remove sleep from unit test

---
 .../configs/asynchronous_metrics_update_period_s.xml            | 2 +-
 tests/queries/0_stateless/01473_event_time_microseconds.sql     | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/integration/test_asynchronous_metric_log_table/configs/asynchronous_metrics_update_period_s.xml b/tests/integration/test_asynchronous_metric_log_table/configs/asynchronous_metrics_update_period_s.xml
index 840c19f03a6..b658b0436b1 100644
--- a/tests/integration/test_asynchronous_metric_log_table/configs/asynchronous_metrics_update_period_s.xml
+++ b/tests/integration/test_asynchronous_metric_log_table/configs/asynchronous_metrics_update_period_s.xml
@@ -1,3 +1,3 @@
 <yandex>
-    <asynchronous_metrics_update_period_s>2</asynchronous_metrics_update_period_s>
+    <asynchronous_metrics_update_period_s>1</asynchronous_metrics_update_period_s>
 </yandex>
diff --git a/tests/queries/0_stateless/01473_event_time_microseconds.sql b/tests/queries/0_stateless/01473_event_time_microseconds.sql
index dd79a735d8f..138df77ffec 100644
--- a/tests/queries/0_stateless/01473_event_time_microseconds.sql
+++ b/tests/queries/0_stateless/01473_event_time_microseconds.sql
@@ -7,7 +7,6 @@ set log_queries = 1;
 
 select '01473_metric_log_table_event_start_time_microseconds_test';
 system flush logs;
-SELECT sleep(3) Format Null;
 -- query assumes that the event_time field is accurate.
 WITH (
     (

From 964c18241cabe7566f2e639190c143d8f9c7ca2b Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 10 Sep 2020 19:04:53 +0300
Subject: [PATCH 213/298] Added test from #13829

---
 tests/queries/0_stateless/01492_array_join_crash_13829.reference | 0
 tests/queries/0_stateless/01492_array_join_crash_13829.sql       | 1 +
 2 files changed, 1 insertion(+)
 create mode 100644 tests/queries/0_stateless/01492_array_join_crash_13829.reference
 create mode 100644 tests/queries/0_stateless/01492_array_join_crash_13829.sql

diff --git a/tests/queries/0_stateless/01492_array_join_crash_13829.reference b/tests/queries/0_stateless/01492_array_join_crash_13829.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01492_array_join_crash_13829.sql b/tests/queries/0_stateless/01492_array_join_crash_13829.sql
new file mode 100644
index 00000000000..9e11c3b69d4
--- /dev/null
+++ b/tests/queries/0_stateless/01492_array_join_crash_13829.sql
@@ -0,0 +1 @@
+SELECT NULL = countEqual(materialize([arrayJoin([NULL, NULL, NULL]), NULL AS x, arrayJoin([255, 1025, NULL, NULL]), arrayJoin([2, 1048576, NULL, NULL])]), materialize(x)) format Null;

From 3b3b7bd5e790ec6ef0dff61793c2790167ea3469 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Thu, 10 Sep 2020 09:09:48 -0700
Subject: [PATCH 214/298] minor changes to comment

---
 src/Storages/StorageReplicatedMergeTree.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 908112c17ff..feb2f95849c 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -226,10 +226,10 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree(
         /// This unclean state will hinder table creation on any retries and will
         /// complain that the Directory for table already exists.
         ///
-        /// To acheive a clean state on failed table creations, catch this error if
-        /// the excaption is of type Poco::Exception and call dropIfEmpty() method,
-        /// then proceed throwing the exception. Without this, the Directory for the
-        /// tables need to be manually deleted before retrying the CreateQuery.
+        /// To achieve a clean state on failed table creations, catch this error and
+        /// call dropIfEmpty() method only if the operation isn't ATTACH then proceed
+        /// throwing the exception. Without this, the Directory for the tables need
+        /// to be manually deleted before retrying the CreateQuery.
         try
         {
             current_zookeeper = global_context.getZooKeeper();

From 15bdb6048e610fb5787e581b7fa346bd7512e109 Mon Sep 17 00:00:00 2001
From: Artem Hnilov <boobsd@gmail.com>
Date: Thu, 10 Sep 2020 16:25:07 +0000
Subject: [PATCH 215/298] Function formatReadableQuantity() added.

---
 src/Functions/FunctionsFormatting.cpp |  1 +
 src/Functions/FunctionsFormatting.h   | 76 +++++++++++++++++++++++++++
 2 files changed, 77 insertions(+)

diff --git a/src/Functions/FunctionsFormatting.cpp b/src/Functions/FunctionsFormatting.cpp
index 7582e234622..1187749aa2d 100644
--- a/src/Functions/FunctionsFormatting.cpp
+++ b/src/Functions/FunctionsFormatting.cpp
@@ -9,6 +9,7 @@ void registerFunctionsFormatting(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionBitmaskToList>();
     factory.registerFunction<FunctionFormatReadableSize>();
+    factory.registerFunction<FunctionFormatReadableQuantity>();
 }
 
 }
diff --git a/src/Functions/FunctionsFormatting.h b/src/Functions/FunctionsFormatting.h
index e11254e2775..da22babec38 100644
--- a/src/Functions/FunctionsFormatting.h
+++ b/src/Functions/FunctionsFormatting.h
@@ -202,4 +202,80 @@ private:
     }
 };
 
+
+class FunctionFormatReadableQuantity : public IFunction
+{
+public:
+    static constexpr auto name = "formatReadableQuantity";
+    static FunctionPtr create(const Context &) { return std::make_shared<FunctionFormatReadableQuantity>(); }
+
+    String getName() const override
+    {
+        return name;
+    }
+
+    size_t getNumberOfArguments() const override { return 1; }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        const IDataType & type = *arguments[0];
+
+        if (!isNativeNumber(type))
+            throw Exception("Cannot format " + type.getName() + " as quantity", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+        return std::make_shared<DataTypeString>();
+    }
+
+    bool useDefaultImplementationForConstants() const override { return true; }
+
+    void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) const override
+    {
+        if (!(executeType<UInt8>(block, arguments, result)
+            || executeType<UInt16>(block, arguments, result)
+            || executeType<UInt32>(block, arguments, result)
+            || executeType<UInt64>(block, arguments, result)
+            || executeType<Int8>(block, arguments, result)
+            || executeType<Int16>(block, arguments, result)
+            || executeType<Int32>(block, arguments, result)
+            || executeType<Int64>(block, arguments, result)
+            || executeType<Float32>(block, arguments, result)
+            || executeType<Float64>(block, arguments, result)))
+            throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
+                + " of argument of function " + getName(),
+                ErrorCodes::ILLEGAL_COLUMN);
+    }
+
+private:
+    template <typename T>
+    bool executeType(Block & block, const ColumnNumbers & arguments, size_t result) const
+    {
+        if (const ColumnVector<T> * col_from = checkAndGetColumn<ColumnVector<T>>(block.getByPosition(arguments[0]).column.get()))
+        {
+            auto col_to = ColumnString::create();
+
+            const typename ColumnVector<T>::Container & vec_from = col_from->getData();
+            ColumnString::Chars & data_to = col_to->getChars();
+            ColumnString::Offsets & offsets_to = col_to->getOffsets();
+            size_t size = vec_from.size();
+            data_to.resize(size * 2);
+            offsets_to.resize(size);
+
+            WriteBufferFromVector<ColumnString::Chars> buf_to(data_to);
+
+            for (size_t i = 0; i < size; ++i)
+            {
+                formatReadableQuantity(static_cast<double>(vec_from[i]), buf_to);
+                writeChar(0, buf_to);
+                offsets_to[i] = buf_to.count();
+            }
+
+            buf_to.finalize();
+            block.getByPosition(result).column = std::move(col_to);
+            return true;
+        }
+
+        return false;
+    }
+};
+
 }

From 6e0d2a427c4e1a3145d2ac33d6cc94040ffd95dc Mon Sep 17 00:00:00 2001
From: nikitamikhaylov <mikhaylovnikitka@gmail.com>
Date: Thu, 10 Sep 2020 19:30:11 +0300
Subject: [PATCH 216/298] add another test

---
 .../01460_line_as_string_format.reference     |  2 ++
 .../01460_line_as_string_format.sh            | 24 ++++++++++++++-----
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/tests/queries/0_stateless/01460_line_as_string_format.reference b/tests/queries/0_stateless/01460_line_as_string_format.reference
index dec67eb2e0a..c795fba4309 100644
--- a/tests/queries/0_stateless/01460_line_as_string_format.reference
+++ b/tests/queries/0_stateless/01460_line_as_string_format.reference
@@ -4,3 +4,5 @@
 "array" : [1, 2, 3],
 
 Finally implement this new feature.
+42	ClickHouse
+42	ClickHouse is a `fast` #open-source# (OLAP) database "management" :system:
diff --git a/tests/queries/0_stateless/01460_line_as_string_format.sh b/tests/queries/0_stateless/01460_line_as_string_format.sh
index a985bc207a8..60a960d2bf8 100755
--- a/tests/queries/0_stateless/01460_line_as_string_format.sh
+++ b/tests/queries/0_stateless/01460_line_as_string_format.sh
@@ -3,17 +3,29 @@
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . "$CURDIR"/../shell_config.sh
 
-$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS line_as_string";
-
-$CLICKHOUSE_CLIENT --query="CREATE TABLE line_as_string(field String) ENGINE = Memory";
+$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS line_as_string1";
+$CLICKHOUSE_CLIENT --query="CREATE TABLE line_as_string1(field String) ENGINE = Memory";
 
 echo '"id" : 1,
 "date" : "01.01.2020",
 "string" : "123{{{\"\\",
 "array" : [1, 2, 3],
 
-Finally implement this new feature.' | $CLICKHOUSE_CLIENT --query="INSERT INTO line_as_string FORMAT LineAsString";
+Finally implement this new feature.' | $CLICKHOUSE_CLIENT --query="INSERT INTO line_as_string1 FORMAT LineAsString";
 
-$CLICKHOUSE_CLIENT --query="SELECT * FROM line_as_string";
-$CLICKHOUSE_CLIENT --query="DROP TABLE line_as_string"
+$CLICKHOUSE_CLIENT --query="SELECT * FROM line_as_string1";
+$CLICKHOUSE_CLIENT --query="DROP TABLE line_as_string1"
 
+$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS line_as_string2";
+$CLICKHOUSE_CLIENT --query="create table line_as_string2(
+    a UInt64 default 42, 
+    b String materialized toString(a),
+    c String
+) engine=MergeTree() order by tuple();";
+
+$CLICKHOUSE_CLIENT --query="INSERT INTO line_as_string2(c) values ('ClickHouse')";
+
+echo 'ClickHouse is a `fast` #open-source# (OLAP) 'database' "management" :system:' | $CLICKHOUSE_CLIENT --query="INSERT INTO line_as_string2(c) FORMAT LineAsString";
+
+$CLICKHOUSE_CLIENT --query="SELECT * FROM line_as_string2 order by c";
+$CLICKHOUSE_CLIENT --query="DROP TABLE line_as_string2"

From d0c259974d1486dc90ae29958a3f1dd196052961 Mon Sep 17 00:00:00 2001
From: nikitamikhaylov <mikhaylovnikitka@gmail.com>
Date: Thu, 10 Sep 2020 19:35:18 +0300
Subject: [PATCH 217/298] style check

---
 .../Formats/Impl/LineAsStringRowInputFormat.cpp   | 15 ++++++---------
 src/Processors/ya.make                            |  2 +-
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp
index 27bc71d764d..befc635386f 100644
--- a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp
@@ -8,8 +8,7 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int LOGICAL_ERROR;
-    extern const int INCORRECT_DATA;
+    extern const int INCORRECT_QUERY;
 }
 
 LineAsStringRowInputFormat::LineAsStringRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_) :
@@ -17,7 +16,7 @@ LineAsStringRowInputFormat::LineAsStringRowInputFormat(const Block & header_, Re
 {
     if (header_.columns() > 1 || header_.getDataTypes()[0]->getTypeId() != TypeIndex::String)
     {
-        throw Exception("This input format is only suitable for tables with a single column of type String.", ErrorCodes::LOGICAL_ERROR);
+        throw Exception("This input format is only suitable for tables with a single column of type String.", ErrorCodes::INCORRECT_QUERY);
     }
 }
 
@@ -39,7 +38,8 @@ void LineAsStringRowInputFormat::readLineObject(IColumn & column)
     {
         pos = find_first_symbols<'\n', '\\'>(buf.position(), buf.buffer().end());
         buf.position() = pos;
-        if (buf.position() == buf.buffer().end())  {
+        if (buf.position() == buf.buffer().end())
+        {
             over = true;
             break;
         }
@@ -48,14 +48,11 @@ void LineAsStringRowInputFormat::readLineObject(IColumn & column)
             newline = false;
         }
         else if (*buf.position() == '\\')
-            {
+        {
             ++buf.position();
             if (!buf.eof())
-            {
-            	++buf.position();
-            }
+                ++buf.position();
         }
-        
     }
 
     buf.makeContinuousMemoryFromCheckpointToPos();
diff --git a/src/Processors/ya.make b/src/Processors/ya.make
index 8fe4e651ade..c69d274a70e 100644
--- a/src/Processors/ya.make
+++ b/src/Processors/ya.make
@@ -25,7 +25,6 @@ SRCS(
     Formats/Impl/ConstantExpressionTemplate.cpp
     Formats/Impl/CSVRowInputFormat.cpp
     Formats/Impl/CSVRowOutputFormat.cpp
-    Formats/Impl/LineAsStringRowInputFormat.cpp
     Formats/Impl/JSONAsStringRowInputFormat.cpp
     Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
     Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp
@@ -34,6 +33,7 @@ SRCS(
     Formats/Impl/JSONEachRowRowOutputFormat.cpp
     Formats/Impl/JSONEachRowWithProgressRowOutputFormat.cpp
     Formats/Impl/JSONRowOutputFormat.cpp
+    Formats/Impl/LineAsStringRowInputFormat.cpp
     Formats/Impl/MarkdownRowOutputFormat.cpp
     Formats/Impl/MsgPackRowInputFormat.cpp
     Formats/Impl/MsgPackRowOutputFormat.cpp

From 4e197cf069c18cafb55b5fa22499483d8f19666d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 10 Sep 2020 19:41:46 +0300
Subject: [PATCH 218/298] Remove unused headers

---
 base/common/types.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/base/common/types.h b/base/common/types.h
index 682fe94366c..a02398a3365 100644
--- a/base/common/types.h
+++ b/base/common/types.h
@@ -1,8 +1,6 @@
 #pragma once
 
-#include <algorithm>
 #include <cstdint>
-#include <cstdlib>
 #include <string>
 #include <type_traits>
 

From 45340c701dc517b29db5a1047c306f88ba891722 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 10 Sep 2020 19:49:57 +0300
Subject: [PATCH 219/298] changelog for 20.8

---
 CHANGELOG.md                              | 148 ++++++++++++++++++++++
 utils/simple-backport/backport.sh         |   7 +-
 utils/simple-backport/format-changelog.py |   2 +-
 3 files changed, 154 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 345ee2c6213..f3266520eb1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,151 @@
+## ClickHouse release 20.8
+
+### ClickHouse release v20.8.2.3-stable, 2020-09-08
+
+#### Backward Incompatible Change
+
+* Now `OPTIMIZE FINAL` query doesn't recalculate TTL for parts that were added before TTL was created. Use `ALTER TABLE ... MATERIALIZE TTL` once to calculate them, after that `OPTIMIZE FINAL` will evaluate TTL's properly. This behavior never worked for replicated tables. [#14220](https://github.com/ClickHouse/ClickHouse/pull/14220) ([alesapin](https://github.com/alesapin)).
+* Extend `parallel_distributed_insert_select` setting, adding an option to run `INSERT` into local table. The setting changes type from `Bool` to `UInt64`, so the values `false` and `true` are no longer supported. If you have these values in server configuration, the server will not start. Please replace them with `0` and `1`, respectively. [#14060](https://github.com/ClickHouse/ClickHouse/pull/14060) ([Azat Khuzhin](https://github.com/azat)).
+* Remove support for the `ODBCDriver` input/output format. This was a deprecated format once used for communication with the ClickHouse ODBC driver, now long superseded by the `ODBCDriver2` format. Resolves [#13629](https://github.com/ClickHouse/ClickHouse/issues/13629). [#13847](https://github.com/ClickHouse/ClickHouse/pull/13847) ([hexiaoting](https://github.com/hexiaoting)).
+
+#### New Feature
+
+* Add `countDigits(x)` function that count number of decimal digits in integer or decimal column. Add `isDecimalOverflow(d, [p])` function that checks if the value in Decimal column is out of its (or specified) precision. [#14151](https://github.com/ClickHouse/ClickHouse/pull/14151) ([Artem Zuikov](https://github.com/4ertus2)).
+* Add setting `min_index_granularity_bytes` that protects against accidentally creating a table with very low `index_granularity_bytes` setting. [#14139](https://github.com/ClickHouse/ClickHouse/pull/14139) ([Bharat Nallan](https://github.com/bharatnc)).
+* Add the ability to specify `Default` compression codec for columns that correspond to settings specified in `config.xml`. Implements: [#9074](https://github.com/ClickHouse/ClickHouse/issues/9074). [#14049](https://github.com/ClickHouse/ClickHouse/pull/14049) ([alesapin](https://github.com/alesapin)).
+* Added `date_trunc` function that truncates a date/time value to a specified date/time part. [#13888](https://github.com/ClickHouse/ClickHouse/pull/13888) ([Vladimir Golovchenko](https://github.com/vladimir-golovchenko)).
+* Add `time_zones` table. [#13880](https://github.com/ClickHouse/ClickHouse/pull/13880) ([Bharat Nallan](https://github.com/bharatnc)).
+* Add function `defaultValueOfTypeName` that returns the default value for a given type. [#13877](https://github.com/ClickHouse/ClickHouse/pull/13877) ([hcz](https://github.com/hczhcz)).
+* Add `quantileExactLow` and `quantileExactHigh` implementations with respective aliases for `medianExactLow` and `medianExactHigh`. [#13818](https://github.com/ClickHouse/ClickHouse/pull/13818) ([Bharat Nallan](https://github.com/bharatnc)).
+* Add function `normalizeQuery` that replaces literals, sequences of literals and complex aliases with placeholders. Add function `normalizedQueryHash` that returns identical 64bit hash values for similar queries. It helps to analyze query log. This closes [#11271](https://github.com/ClickHouse/ClickHouse/issues/11271). [#13816](https://github.com/ClickHouse/ClickHouse/pull/13816) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Add new optional section <user_directories> to the main config. [#13425](https://github.com/ClickHouse/ClickHouse/pull/13425) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Add `ALTER SAMPLE BY` statement that allows to change table sample clause. [#13280](https://github.com/ClickHouse/ClickHouse/pull/13280) ([Amos Bird](https://github.com/amosbird)).
+* Function `position` now supports optional `start_pos` argument. [#13237](https://github.com/ClickHouse/ClickHouse/pull/13237) ([vdimir](https://github.com/vdimir)).
+* Add types `Int128`, `Int256`, `UInt256` and related functions for them. Extend Decimals with Decimal256 (precision up to 76 digits). New types are under the setting `allow_experimental_bigint_types`. [#13097](https://github.com/ClickHouse/ClickHouse/pull/13097) ([Artem Zuikov](https://github.com/4ertus2)).
+* Support Kerberos authentication in Kafka, using `krb5` and `cyrus-sasl` libraries. [#12771](https://github.com/ClickHouse/ClickHouse/pull/12771) ([Ilya Golshtein](https://github.com/ilejn)).
+* Support `MaterializeMySQL` database engine. Implements [#4006](https://github.com/ClickHouse/ClickHouse/issues/4006). [#10851](https://github.com/ClickHouse/ClickHouse/pull/10851) ([Winter Zhang](https://github.com/zhang2014)).
+
+#### Bug Fix
+
+* Check for array size overflow in `topK` aggregate function. Without this check the user may send a query with carefully crafter parameters that will lead to server crash. This closes [#14452](https://github.com/ClickHouse/ClickHouse/issues/14452). [#14467](https://github.com/ClickHouse/ClickHouse/pull/14467) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix bug which leads to wrong merges assignment if table has partitions with a single part. [#14444](https://github.com/ClickHouse/ClickHouse/pull/14444) ([alesapin](https://github.com/alesapin)).
+* Stop query execution if exception happened in `PipelineExecutor` itself. This could prevent rare possible query hung. Continuation of [#14334](https://github.com/ClickHouse/ClickHouse/issues/14334). [#14402](https://github.com/ClickHouse/ClickHouse/pull/14402) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Stop query execution if exception happened in `PipelineExecutor` itself. This could prevent rare possible query hung. [#14334](https://github.com/ClickHouse/ClickHouse/pull/14334) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix crash during `ALTER` query for table which was created `AS table_function`. Fixes [#14212](https://github.com/ClickHouse/ClickHouse/issues/14212). [#14326](https://github.com/ClickHouse/ClickHouse/pull/14326) ([alesapin](https://github.com/alesapin)).
+* Fix exception during ALTER LIVE VIEW query with REFRESH command. [#14320](https://github.com/ClickHouse/ClickHouse/pull/14320) ([Bharat Nallan](https://github.com/bharatnc)).
+* Fix QueryPlan lifetime (for EXPLAIN PIPELINE graph=1) for queries with nested interpreter. [#14315](https://github.com/ClickHouse/ClickHouse/pull/14315) ([Azat Khuzhin](https://github.com/azat)).
+* Fix segfault in `clickhouse-odbc-bridge` during schema fetch from some external sources. This PR fixes https://github.com/ClickHouse/ClickHouse/issues/13861. [#14267](https://github.com/ClickHouse/ClickHouse/pull/14267) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Disallows `CODEC` on `ALIAS` column type. Fixes [#13911](https://github.com/ClickHouse/ClickHouse/issues/13911). [#14263](https://github.com/ClickHouse/ClickHouse/pull/14263) ([Bharat Nallan](https://github.com/bharatnc)).
+* Fix handling of empty transactions in `MaterializeMySQL` database engine. This fixes [#14235](https://github.com/ClickHouse/ClickHouse/issues/14235). [#14253](https://github.com/ClickHouse/ClickHouse/pull/14253) ([BohuTANG](https://github.com/BohuTANG)).
+* fixes [#14231](https://github.com/ClickHouse/ClickHouse/issues/14231) fix wrong lexer in MaterializeMySQL database engine dump stage. [#14232](https://github.com/ClickHouse/ClickHouse/pull/14232) ([Winter Zhang](https://github.com/zhang2014)).
+* Fix crash in mark inclusion search introduced in https://github.com/ClickHouse/ClickHouse/pull/12277 . [#14225](https://github.com/ClickHouse/ClickHouse/pull/14225) ([Amos Bird](https://github.com/amosbird)).
+* Fixed incorrect sorting order if LowCardinality column. This fixes [#13958](https://github.com/ClickHouse/ClickHouse/issues/13958). [#14223](https://github.com/ClickHouse/ClickHouse/pull/14223) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Fix creation of tables with named tuples. This fixes [#13027](https://github.com/ClickHouse/ClickHouse/issues/13027). [#14143](https://github.com/ClickHouse/ClickHouse/pull/14143) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix formatting of minimal negative decimal numbers. This fixes https://github.com/ClickHouse/ClickHouse/issues/14111. [#14119](https://github.com/ClickHouse/ClickHouse/pull/14119) ([Alexander Kuzmenkov](https://github.com/akuzm)).
+* When waiting for a dictionary update to complete, use the timeout specified by `query_wait_timeout_milliseconds` setting instead of a hard-coded value. [#14105](https://github.com/ClickHouse/ClickHouse/pull/14105) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Fix DistributedFilesToInsert metric (zeroed when it should not). [#14095](https://github.com/ClickHouse/ClickHouse/pull/14095) ([Azat Khuzhin](https://github.com/azat)).
+* Fix pointInPolygon with const 2d array as polygon. [#14079](https://github.com/ClickHouse/ClickHouse/pull/14079) ([Alexey Ilyukhov](https://github.com/livace)).
+* Fixed wrong mount point in extra info for `Poco::Exception: no space left on device`. [#14050](https://github.com/ClickHouse/ClickHouse/pull/14050) ([tavplubix](https://github.com/tavplubix)).
+* Fix GRANT ALL statement when executed on a non-global level. [#13987](https://github.com/ClickHouse/ClickHouse/pull/13987) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Fix parser to reject create table as table function with engine. [#13940](https://github.com/ClickHouse/ClickHouse/pull/13940) ([hcz](https://github.com/hczhcz)).
+* Fix wrong results in select queries with `DISTINCT` keyword in case `optimize_duplicate_order_by_and_distinct` setting is enabled. [#13925](https://github.com/ClickHouse/ClickHouse/pull/13925) ([Artem Zuikov](https://github.com/4ertus2)).
+* Fixed potential deadlock when renaming `Distributed` table. [#13922](https://github.com/ClickHouse/ClickHouse/pull/13922) ([tavplubix](https://github.com/tavplubix)).
+* Fix incorrect sorting for `FixedString` columns. Fixes [#13182](https://github.com/ClickHouse/ClickHouse/issues/13182). [#13887](https://github.com/ClickHouse/ClickHouse/pull/13887) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix topK/topKWeighted merge (with non-default parameters). [#13817](https://github.com/ClickHouse/ClickHouse/pull/13817) ([Azat Khuzhin](https://github.com/azat)).
+* Fix reading from MergeTree table with INDEX of type SET fails when comparing against NULL. This fixes [#13686](https://github.com/ClickHouse/ClickHouse/issues/13686). [#13793](https://github.com/ClickHouse/ClickHouse/pull/13793) ([Amos Bird](https://github.com/amosbird)).
+* Fix arrayJoin() capturing in lambda (LOGICAL_ERROR). [#13792](https://github.com/ClickHouse/ClickHouse/pull/13792) ([Azat Khuzhin](https://github.com/azat)).
+* Fix step overflow in range(). [#13790](https://github.com/ClickHouse/ClickHouse/pull/13790) ([Azat Khuzhin](https://github.com/azat)).
+* Fixed `Directory not empty` error when concurrently executing `DROP DATABASE` and `CREATE TABLE`. [#13756](https://github.com/ClickHouse/ClickHouse/pull/13756) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Add range check for h3KRing function. This fixes [#13633](https://github.com/ClickHouse/ClickHouse/issues/13633). [#13752](https://github.com/ClickHouse/ClickHouse/pull/13752) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix race condition between DETACH and background merges. Parts may revive after detach. This is continuation of [#8602](https://github.com/ClickHouse/ClickHouse/issues/8602) that did not fix the issue but introduced a test that started to fail in very rare cases, demonstrating the issue. [#13746](https://github.com/ClickHouse/ClickHouse/pull/13746) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix logging Settings.Names/Values when log_queries_min_type > QUERY_START. [#13737](https://github.com/ClickHouse/ClickHouse/pull/13737) ([Azat Khuzhin](https://github.com/azat)).
+* Fixes /replicas_status endpoint response status code when verbose=1. [#13722](https://github.com/ClickHouse/ClickHouse/pull/13722) ([javi santana](https://github.com/javisantana)).
+* Fix incorrect message in `clickhouse-server.init` while checking user and group. [#13711](https://github.com/ClickHouse/ClickHouse/pull/13711) ([ylchou](https://github.com/ylchou)).
+* Fix visible data clobbering by progress bar in client in interactive mode. This fixes [#12562](https://github.com/ClickHouse/ClickHouse/issues/12562) and [#13369](https://github.com/ClickHouse/ClickHouse/issues/13369) and [#13584](https://github.com/ClickHouse/ClickHouse/issues/13584) and fixes [#12964](https://github.com/ClickHouse/ClickHouse/issues/12964). [#13691](https://github.com/ClickHouse/ClickHouse/pull/13691) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Do not optimize any(arrayJoin()) -> arrayJoin() under optimize_move_functions_out_of_any. [#13681](https://github.com/ClickHouse/ClickHouse/pull/13681) ([Azat Khuzhin](https://github.com/azat)).
+* Fix crash in JOIN with StorageMerge and `set enable_optimize_predicate_expression=1`. [#13679](https://github.com/ClickHouse/ClickHouse/pull/13679) ([Artem Zuikov](https://github.com/4ertus2)).
+* Fix typo in error message about `The value of 'number_of_free_entries_in_pool_to_lower_max_size_of_merge' setting`. [#13678](https://github.com/ClickHouse/ClickHouse/pull/13678) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Concurrent `ALTER ... REPLACE/MOVE PARTITION ...` queries might cause deadlock. It's fixed. [#13626](https://github.com/ClickHouse/ClickHouse/pull/13626) ([tavplubix](https://github.com/tavplubix)).
+* Fixed the behaviour when sometimes cache-dictionary returned default value instead of present value from source. [#13624](https://github.com/ClickHouse/ClickHouse/pull/13624) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Fix secondary indices corruption in compact parts. [#13538](https://github.com/ClickHouse/ClickHouse/pull/13538) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix premature `ON CLUSTER` timeouts for queries that must be executed on a single replica. Fixes [#6704](https://github.com/ClickHouse/ClickHouse/issues/6704), [#7228](https://github.com/ClickHouse/ClickHouse/issues/7228), [#13361](https://github.com/ClickHouse/ClickHouse/issues/13361), [#11884](https://github.com/ClickHouse/ClickHouse/issues/11884). [#13450](https://github.com/ClickHouse/ClickHouse/pull/13450) ([alesapin](https://github.com/alesapin)).
+* Fix wrong code in function `netloc`. This fixes [#13335](https://github.com/ClickHouse/ClickHouse/issues/13335). [#13446](https://github.com/ClickHouse/ClickHouse/pull/13446) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix possible race in `StorageMemory`. https://clickhouse-test-reports.s3.yandex.net/0/9cac8a7244063d2092ad25d45502611e18d3749c/stress_test_(thread)/stderr.log Have no idea how to write a test. [#13416](https://github.com/ClickHouse/ClickHouse/pull/13416) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix missing or excessive headers in `TSV/CSVWithNames` formats. This fixes [#12504](https://github.com/ClickHouse/ClickHouse/issues/12504). [#13343](https://github.com/ClickHouse/ClickHouse/pull/13343) ([Azat Khuzhin](https://github.com/azat)).
+* Fix parsing row policies from users.xml when names of databases or tables contain dots. This fixes https://github.com/ClickHouse/ClickHouse/issues/5779, https://github.com/ClickHouse/ClickHouse/issues/12527. [#13199](https://github.com/ClickHouse/ClickHouse/pull/13199) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Fix access to redis dictionary after connection was dropped once. It may happen with `cache` and `direct` dictionary layouts. [#13082](https://github.com/ClickHouse/ClickHouse/pull/13082) ([Anton Popov](https://github.com/CurtizJ)).
+* Removed wrong auth access check when using ClickHouseDictionarySource to query remote tables. [#12756](https://github.com/ClickHouse/ClickHouse/pull/12756) ([sundyli](https://github.com/sundy-li)).
+* subquery hash values are not enough to distinguish. https://github.com/ClickHouse/ClickHouse/issues/8333. [#8367](https://github.com/ClickHouse/ClickHouse/pull/8367) ([Amos Bird](https://github.com/amosbird)).
+
+#### Improvement
+
+* Now it's possible to `ALTER TABLE table_name FETCH PARTITION partition_expr FROM 'zk://<host>:<port>/path-in-zookeeper'`. It's useful for shipping data to new clusters. [#14155](https://github.com/ClickHouse/ClickHouse/pull/14155) ([Amos Bird](https://github.com/amosbird)).
+* Slightly better performance of Memory table if it was constructed from a huge number of very small blocks (that's unlikely). Author of the idea: [Mark Papadakis](https://github.com/markpapadakis). Closes [#14043](https://github.com/ClickHouse/ClickHouse/issues/14043). [#14056](https://github.com/ClickHouse/ClickHouse/pull/14056) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Conditional aggregate functions (for example: `avgIf`, `sumIf`, `maxIf`) should return `NULL` when miss rows and use nullable arguments. [#13964](https://github.com/ClickHouse/ClickHouse/pull/13964) ([Winter Zhang](https://github.com/zhang2014)).
+* Increase limit in -Resample combinator to 1M. [#13947](https://github.com/ClickHouse/ClickHouse/pull/13947) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Corrected an error in AvroConfluent format that caused the Kafka table engine to stop processing messages when an abnormally small, malformed, message was received. [#13941](https://github.com/ClickHouse/ClickHouse/pull/13941) ([Gervasio Varela](https://github.com/gervarela)).
+* Fix wrong error for long queries. It was possible to get syntax error other than `Max query size exceeded` for correct query. [#13928](https://github.com/ClickHouse/ClickHouse/pull/13928) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Better error message for null value of TabSeparatedRow format. [#13906](https://github.com/ClickHouse/ClickHouse/pull/13906) ([jiang tao](https://github.com/tomjiang1987)).
+* Function `arrayCompact` will compare NaNs bitwise if the type of array elements is Float32/Float64. In previous versions NaNs were always not equal if the type of array elements is Float32/Float64 and were always equal if the type is more complex, like Nullable(Float64). This closes [#13857](https://github.com/ClickHouse/ClickHouse/issues/13857). [#13868](https://github.com/ClickHouse/ClickHouse/pull/13868) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix data race in `lgamma` function. This race was caught only in `tsan`, no side effects a really happened. [#13842](https://github.com/ClickHouse/ClickHouse/pull/13842) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* 1. Add [GTID-Based Replication](https://dev.mysql.com/doc/refman/5.7/en/replication-gtids-concepts.html), it works even when replication topology changes, and supported/prefered in MySQL 5.6/5.7/8.0 2. Add BIT/SET filed type supports 3. Fix up varchar type meta length bug. [#13820](https://github.com/ClickHouse/ClickHouse/pull/13820) ([BohuTANG](https://github.com/BohuTANG)).
+* Avoid too slow queries when arrays are manipulated as fields. Throw exception instead. [#13753](https://github.com/ClickHouse/ClickHouse/pull/13753) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Added Redis requirepass authorization. [#13688](https://github.com/ClickHouse/ClickHouse/pull/13688) ([Ivan Torgashov](https://github.com/it1804)).
+* Add MergeTree Write-Ahead-Log(WAL) dump tool. [#13640](https://github.com/ClickHouse/ClickHouse/pull/13640) ([BohuTANG](https://github.com/BohuTANG)).
+* In previous versions `lcm` function may produce assertion violation in debug build if called with specifically crafted arguments. This fixes [#13368](https://github.com/ClickHouse/ClickHouse/issues/13368). [#13510](https://github.com/ClickHouse/ClickHouse/pull/13510) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Provide monotonicity for `toDate/toDateTime` functions in more cases. Now the input arguments are saturated more naturally and provides better monotonicity. [#13497](https://github.com/ClickHouse/ClickHouse/pull/13497) ([Amos Bird](https://github.com/amosbird)).
+* Support compound identifiers for custom settings. [#13496](https://github.com/ClickHouse/ClickHouse/pull/13496) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Move parts from DIskLocal to DiskS3 in parallel. [#13459](https://github.com/ClickHouse/ClickHouse/pull/13459) ([Pavel Kovalenko](https://github.com/Jokser)).
+* Enable mixed granularity parts by default. [#13449](https://github.com/ClickHouse/ClickHouse/pull/13449) ([alesapin](https://github.com/alesapin)).
+* Proper remote host checking in S3 redirects (security-related thing). [#13404](https://github.com/ClickHouse/ClickHouse/pull/13404) ([Vladimir Chebotarev](https://github.com/excitoon)).
+* Add QueryTimeMicroseconds, SelectQueryTimeMicroseconds and InsertQueryTimeMicroseconds to system.events. [#13336](https://github.com/ClickHouse/ClickHouse/pull/13336) ([ianton-ru](https://github.com/ianton-ru)).
+* Fix assert when decimal has too large negative exponent. Fixes [#13188](https://github.com/ClickHouse/ClickHouse/issues/13188). [#13228](https://github.com/ClickHouse/ClickHouse/pull/13228) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Added cache layer for DiskS3 (cache to local disk mark and index files). [#13076](https://github.com/ClickHouse/ClickHouse/pull/13076) ([Pavel Kovalenko](https://github.com/Jokser)).
+
+#### Performance Improvement
+
+* Slightly optimize very short queries with LowCardinality. [#14129](https://github.com/ClickHouse/ClickHouse/pull/14129) ([Anton Popov](https://github.com/CurtizJ)).
+* Enable parallel INSERTs for table engines `Null`, `Memory`, `Distributed` and `Buffer`. [#14120](https://github.com/ClickHouse/ClickHouse/pull/14120) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fail fast if `max_rows_to_read` limit is exceeded on parts scan. The motivation behind this change is to skip ranges scan for all selected parts if it is clear that `max_rows_to_read` is already exceeded. The change is quite noticeable for queries over big number of parts. [#13677](https://github.com/ClickHouse/ClickHouse/pull/13677) ([Roman Khavronenko](https://github.com/hagen1778)).
+* Slightly improve performance of aggregation by UInt8/UInt16 keys. [#13099](https://github.com/ClickHouse/ClickHouse/pull/13099) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Optimize `has()`, `indexOf()` and `countEqual()` functions for `Array(LowCardinality(T))` and constant right arguments. [#12550](https://github.com/ClickHouse/ClickHouse/pull/12550) ([myrrc](https://github.com/myrrc)).
+* When performing trivial `INSERT SELECT` queries, automatically set `max_threads` to 1 or `max_insert_threads`, and set `max_block_size` to `min_insert_block_size_rows`. Related to [#5907](https://github.com/ClickHouse/ClickHouse/issues/5907). [#12195](https://github.com/ClickHouse/ClickHouse/pull/12195) ([flynn](https://github.com/ucasFL)).
+
+#### Build/Testing/Packaging Improvement
+
+* Actually there are no symlinks there, so `-type f` is enough ``` ~/workspace/ClickHouse/contrib/cctz/testdata/zoneinfo$ find . -type l -ls | wc -l 0 ``` Closes [#14209](https://github.com/ClickHouse/ClickHouse/issues/14209). [#14215](https://github.com/ClickHouse/ClickHouse/pull/14215) ([filimonov](https://github.com/filimonov)).
+* Switch tests docker images to use test-base parent. [#14167](https://github.com/ClickHouse/ClickHouse/pull/14167) ([Ilya Yatsishin](https://github.com/qoega)).
+* Add the ability to write js-style comments in skip_list.json. [#14159](https://github.com/ClickHouse/ClickHouse/pull/14159) ([alesapin](https://github.com/alesapin)).
+* * Adding retry logic when bringing up docker-compose cluster * Increasing COMPOSE_HTTP_TIMEOUT. [#14112](https://github.com/ClickHouse/ClickHouse/pull/14112) ([vzakaznikov](https://github.com/vzakaznikov)).
+* Enabled text-log in stress test to find more bugs. [#13855](https://github.com/ClickHouse/ClickHouse/pull/13855) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Testflows LDAP module: adding missing certificates and dhparam.pem for openldap4. [#13780](https://github.com/ClickHouse/ClickHouse/pull/13780) ([vzakaznikov](https://github.com/vzakaznikov)).
+* ZooKeeper cannot work reliably in unit tests in CI infrastructure. Using unit tests for ZooKeeper interaction with real ZooKeeper is bad idea from the start (unit tests are not supposed to verify complex distributed systems). We already using integration tests for this purpose and they are better suited. [#13745](https://github.com/ClickHouse/ClickHouse/pull/13745) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Added docker image for style check. Added style check that all docker and docker compose files are located in docker directory. [#13724](https://github.com/ClickHouse/ClickHouse/pull/13724) ([Ilya Yatsishin](https://github.com/qoega)).
+* FIx cassandra build on Mac OS. [#13708](https://github.com/ClickHouse/ClickHouse/pull/13708) ([Ilya Yatsishin](https://github.com/qoega)).
+* Fix link error in shared build. [#13700](https://github.com/ClickHouse/ClickHouse/pull/13700) ([Amos Bird](https://github.com/amosbird)).
+* Add a CMake option to fail configuration instead of auto-reconfiguration, enabled by default. [#13687](https://github.com/ClickHouse/ClickHouse/pull/13687) ([Konstantin](https://github.com/podshumok)).
+* Updating LDAP user authentication suite to check that it works with RBAC. [#13656](https://github.com/ClickHouse/ClickHouse/pull/13656) ([vzakaznikov](https://github.com/vzakaznikov)).
+* Expose version of embedded tzdata via TZDATA_VERSION in system.build_options. [#13648](https://github.com/ClickHouse/ClickHouse/pull/13648) ([filimonov](https://github.com/filimonov)).
+* Removed `-DENABLE_CURL_CLIENT` for `contrib/aws`. [#13628](https://github.com/ClickHouse/ClickHouse/pull/13628) ([Vladimir Chebotarev](https://github.com/excitoon)).
+* Build ClickHouse with the most fresh tzdata from package repository. [#13623](https://github.com/ClickHouse/ClickHouse/pull/13623) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Increasing health-check timeouts for ClickHouse nodes and adding support to dump docker-compose logs if unhealthy containers found. [#13612](https://github.com/ClickHouse/ClickHouse/pull/13612) ([vzakaznikov](https://github.com/vzakaznikov)).
+* Make sure https://github.com/ClickHouse/ClickHouse/issues/10977 is invalid. [#13539](https://github.com/ClickHouse/ClickHouse/pull/13539) ([Amos Bird](https://github.com/amosbird)).
+* Enable Shellcheck in CI as a linter of .sh tests. This closes [#13168](https://github.com/ClickHouse/ClickHouse/issues/13168). [#13530](https://github.com/ClickHouse/ClickHouse/pull/13530) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix the remaining shellcheck notices. A preparation to enable Shellcheck. [#13529](https://github.com/ClickHouse/ClickHouse/pull/13529) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Added `clickhouse install` script, that is useful if you only have a single binary. [#13528](https://github.com/ClickHouse/ClickHouse/pull/13528) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Allow to run `clickhouse` binary without configuration. [#13515](https://github.com/ClickHouse/ClickHouse/pull/13515) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Ensure that there is no copy-pasted GPL code. [#13514](https://github.com/ClickHouse/ClickHouse/pull/13514) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Enable check for typos in code with `codespell`. [#13513](https://github.com/ClickHouse/ClickHouse/pull/13513) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix typos in code with codespell. [#13511](https://github.com/ClickHouse/ClickHouse/pull/13511) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Skip PR's from robot-clickhouse. [#13489](https://github.com/ClickHouse/ClickHouse/pull/13489) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Move Dockerfiles from integration tests to `docker/test` directory. docker_compose files are available in `runner` docker container. Docker images are built in CI and not in integration tests. [#13448](https://github.com/ClickHouse/ClickHouse/pull/13448) ([Ilya Yatsishin](https://github.com/qoega)).
+
+#### Other
+
+* Create `system` database with `Atomic` engine by default. [#13680](https://github.com/ClickHouse/ClickHouse/pull/13680) ([tavplubix](https://github.com/tavplubix)).
+* Fix readline so it dumps history to file now. [#13600](https://github.com/ClickHouse/ClickHouse/pull/13600) ([Amos Bird](https://github.com/amosbird)).
+
+
 ## ClickHouse release 20.7
 
 ### ClickHouse release v20.7.2.30-stable, 2020-08-31
diff --git a/utils/simple-backport/backport.sh b/utils/simple-backport/backport.sh
index 71920304d56..64f8e6004bf 100755
--- a/utils/simple-backport/backport.sh
+++ b/utils/simple-backport/backport.sh
@@ -4,7 +4,10 @@ set -e
 branch="$1"
 merge_base=$(git merge-base origin/master "origin/$branch")
 master_git_cmd=(git log "$merge_base..origin/master" --first-parent)
-branch_git_cmd=(git log "$merge_base..origin/$branch" --first-parent)
+# The history in back branches shouldn't be too crazy, and sometimes we have a PR
+# that merges several backport commits there (3f2cba6824fddf31c30bde8c6f4f860572f4f580),
+# so don't use --first-parent
+branch_git_cmd=(git log "$merge_base..origin/$branch")
 
 # Make lists of PRs that were merged into each branch. Use first parent here, or else
 # we'll get weird things like seeing older master that was merged into a PR branch
@@ -30,7 +33,7 @@ fi
 # Search for PR numbers in commit messages. First variant is normal merge, and second
 # variant is squashed. Next are some backport message variants.
 find_prs=(sed -n "s/^.*merg[eding]*.*#\([[:digit:]]\+\).*$/\1/Ip;
-                  s/^.*(#\([[:digit:]]\+\))$/\1/p;
+                  s/^.*#\([[:digit:]]\+\))$/\1/p;
                   s/^.*back[- ]*port[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip;
                   s/^.*cherry[- ]*pick[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip")
 
diff --git a/utils/simple-backport/format-changelog.py b/utils/simple-backport/format-changelog.py
index ccda88c6809..5dff4f1c5e8 100755
--- a/utils/simple-backport/format-changelog.py
+++ b/utils/simple-backport/format-changelog.py
@@ -93,7 +93,7 @@ for line in args.file:
 
     # Normalize category name
     for c in categories_preferred_order:
-        if fuzzywuzzy.fuzz.ratio(pr['category'], c) >= 90:
+        if fuzzywuzzy.fuzz.ratio(pr['category'].lower(), c.lower()) >= 90:
             pr['category'] = c
             break
 

From 142026f4abc810426d4ebf195bb8316a83b6adff Mon Sep 17 00:00:00 2001
From: Artem Hnilov <boobsd@gmail.com>
Date: Thu, 10 Sep 2020 16:54:52 +0000
Subject: [PATCH 220/298] 01492_format_readable_quantity test added

---
 .../01492_format_readable_quantity.reference  | 50 +++++++++++++++++++
 .../01492_format_readable_quantity.sql        |  4 ++
 2 files changed, 54 insertions(+)
 create mode 100644 tests/queries/0_stateless/01492_format_readable_quantity.reference
 create mode 100644 tests/queries/0_stateless/01492_format_readable_quantity.sql

diff --git a/tests/queries/0_stateless/01492_format_readable_quantity.reference b/tests/queries/0_stateless/01492_format_readable_quantity.reference
new file mode 100644
index 00000000000..3736933b5ee
--- /dev/null
+++ b/tests/queries/0_stateless/01492_format_readable_quantity.reference
@@ -0,0 +1,50 @@
+1.00	1.00	1.00
+2.72	2.00	2.00
+7.39	7.00	7.00
+20.09	20.00	20.00
+54.60	54.00	54.00
+148.41	148.00	148.00
+403.43	403.00	403.00
+1.10 thousand	1.10 thousand	1.10 thousand
+2.98 thousand	2.98 thousand	2.98 thousand
+8.10 thousand	8.10 thousand	8.10 thousand
+22.03 thousand	22.03 thousand	22.03 thousand
+59.87 thousand	59.87 thousand	59.87 thousand
+162.75 thousand	162.75 thousand	162.75 thousand
+442.41 thousand	442.41 thousand	442.41 thousand
+1.20 million	1.20 million	1.20 million
+3.27 million	3.27 million	3.27 million
+8.89 million	8.89 million	8.89 million
+24.15 million	24.15 million	24.15 million
+65.66 million	65.66 million	65.66 million
+178.48 million	178.48 million	178.48 million
+485.17 million	485.17 million	485.17 million
+1.32 billion	1.32 billion	1.32 billion
+3.58 billion	3.58 billion	-2.15 billion
+9.74 billion	9.74 billion	-2.15 billion
+26.49 billion	26.49 billion	-2.15 billion
+72.00 billion	72.00 billion	-2.15 billion
+195.73 billion	195.73 billion	-2.15 billion
+532.05 billion	532.05 billion	-2.15 billion
+1.45 trillion	1.45 trillion	-2.15 billion
+3.93 trillion	3.93 trillion	-2.15 billion
+10.69 trillion	10.69 trillion	-2.15 billion
+29.05 trillion	29.05 trillion	-2.15 billion
+78.96 trillion	78.96 trillion	-2.15 billion
+214.64 trillion	214.64 trillion	-2.15 billion
+583.46 trillion	583.46 trillion	-2.15 billion
+1.59 quadrillion	1.59 quadrillion	-2.15 billion
+4.31 quadrillion	4.31 quadrillion	-2.15 billion
+11.72 quadrillion	11.72 quadrillion	-2.15 billion
+31.86 quadrillion	31.86 quadrillion	-2.15 billion
+86.59 quadrillion	86.59 quadrillion	-2.15 billion
+235.39 quadrillion	235.39 quadrillion	-2.15 billion
+639.84 quadrillion	639.84 quadrillion	-2.15 billion
+1739.27 quadrillion	1739.27 quadrillion	-2.15 billion
+4727.84 quadrillion	4727.84 quadrillion	-2.15 billion
+12851.60 quadrillion	12851.60 quadrillion	-2.15 billion
+34934.27 quadrillion	0.00	-2.15 billion
+94961.19 quadrillion	0.00	-2.15 billion
+258131.29 quadrillion	0.00	-2.15 billion
+701673.59 quadrillion	0.00	-2.15 billion
+1907346.57 quadrillion	0.00	-2.15 billion
diff --git a/tests/queries/0_stateless/01492_format_readable_quantity.sql b/tests/queries/0_stateless/01492_format_readable_quantity.sql
new file mode 100644
index 00000000000..c2ebcfc1a97
--- /dev/null
+++ b/tests/queries/0_stateless/01492_format_readable_quantity.sql
@@ -0,0 +1,4 @@
+WITH round(exp(number), 6) AS x, toUInt64(x) AS y, toInt32(x) AS z
+SELECT formatReadableQuantity(x), formatReadableQuantity(y), formatReadableQuantity(z)
+FROM system.numbers
+LIMIT 50;

From bbe379d58e96968ae24c7dee5fef9e872aa2f272 Mon Sep 17 00:00:00 2001
From: Artem Hnilov <boobsd@gmail.com>
Date: Thu, 10 Sep 2020 17:06:32 +0000
Subject: [PATCH 221/298] 00534_filimonov.data was updated

---
 tests/queries/0_stateless/00534_filimonov.data | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/00534_filimonov.data b/tests/queries/0_stateless/00534_filimonov.data
index b4c15b01ef4..393ff5b7155 100644
--- a/tests/queries/0_stateless/00534_filimonov.data
+++ b/tests/queries/0_stateless/00534_filimonov.data
@@ -174,6 +174,7 @@ SELECT sipHash64(NULL);
 SELECT protocol(NULL);
 SELECT toInt16OrZero(NULL);
 SELECT formatReadableSize(NULL);
+SELECT formatReadableQuantity(NULL);
 SELECT concatAssumeInjective(NULL);
 SELECT toString(NULL);
 SELECT MACStringToNum(NULL);

From 5675efbd47fde50524463a14758c672091264897 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 10 Sep 2020 20:16:12 +0300
Subject: [PATCH 222/298] Fix build

---
 src/Common/remapExecutable.cpp | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/src/Common/remapExecutable.cpp b/src/Common/remapExecutable.cpp
index b41fece0c79..6b86e8717a4 100644
--- a/src/Common/remapExecutable.cpp
+++ b/src/Common/remapExecutable.cpp
@@ -89,20 +89,6 @@ __attribute__((__noinline__)) int64_t our_syscall(...)
 }
 
 
-__attribute__((__always_inline__)) void our_memcpy(char * __restrict dst, const char * __restrict src, ssize_t n)
-{
-    while (n > 0)
-    {
-        _mm_storeu_si128(reinterpret_cast<__m128i *>(dst),
-            _mm_loadu_si128(reinterpret_cast<const __m128i *>(src)));
-
-        dst += 16;
-        src += 16;
-        n -= 16;
-    }
-}
-
-
 __attribute__((__noinline__)) void remapToHugeStep3(void * scratch, size_t size, size_t offset)
 {
     /// The function should not use the stack, otherwise various optimizations, including "omit-frame-pointer" may break the code.
@@ -122,7 +108,7 @@ __attribute__((__noinline__)) void remapToHugeStep2(void * begin, size_t size, v
     /** Unmap old memory region with the code of our program.
       * Our instruction pointer is located inside scratch area and this function can execute after old code is unmapped.
       * But it cannot call any other functions because they are not available at usual addresses
-      * - that's why we have to use "our_syscall" and "our_memcpy" functions.
+      * - that's why we have to use "our_syscall" function and a substitution for memcpy.
       * (Relative addressing may continue to work but we should not assume that).
       */
 
@@ -152,7 +138,19 @@ __attribute__((__noinline__)) void remapToHugeStep2(void * begin, size_t size, v
 
     /// Copy the code from scratch area to the old memory location.
 
-    our_memcpy(reinterpret_cast<char *>(begin), reinterpret_cast<const char *>(scratch), size);
+    {
+        __m128i * __restrict dst = reinterpret_cast<__m128i *>(begin);
+        const __m128i * __restrict src = reinterpret_cast<const __m128i *>(scratch);
+        const __m128i * __restrict src_end = reinterpret_cast<const __m128i *>(reinterpret_cast<const char *>(scratch) + size);
+        while (src < src_end)
+        {
+            _mm_storeu_si128(dst, _mm_loadu_si128(src));
+
+            ++dst;
+            ++src;
+        }
+    }
+
     //syscall_func(SYS_write, 2, &dot, 1);
 
     /// Make the memory area with the code executable and non-writable.

From 294af54007b761fbab57f006b4e074f9f996944a Mon Sep 17 00:00:00 2001
From: Artem Hnilov <boobsd@gmail.com>
Date: Thu, 10 Sep 2020 20:27:37 +0300
Subject: [PATCH 223/298] Update other-functions.md

---
 .../functions/other-functions.md              | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md
index 468e15e7d57..7b9dacf21cd 100644
--- a/docs/ru/sql-reference/functions/other-functions.md
+++ b/docs/ru/sql-reference/functions/other-functions.md
@@ -508,6 +508,29 @@ SELECT
 └────────────────┴────────────┘
 ```
 
+## formatReadableQuantity(x) {#formatreadablequantityx}
+
+Принимает число. Возвращает округленное число с суффиксом (thousand, million, billion и т.д.) в виде строки.
+
+Облегчает визуальное восприятие больших чисел живым человеком.
+
+Пример:
+
+``` sql
+SELECT
+    arrayJoin([1024, 1234 * 1000, (4567 * 1000) * 1000, 98765432101234]) AS number,
+    formatReadableQuantity(number) AS number_for_humans
+```
+
+``` text
+┌─────────number─┬─number_for_humans─┐
+│           1024 │ 1.02 thousand     │
+│        1234000 │ 1.23 million      │
+│     4567000000 │ 4.57 billion      │
+│ 98765432101234 │ 98.77 trillion    │
+└────────────────┴───────────────────┘
+```
+
 ## least(a, b) {#leasta-b}
 
 Возвращает наименьшее значение из a и b.

From 726277adec1da5924da59883f17945a8e29e0195 Mon Sep 17 00:00:00 2001
From: Artem Hnilov <boobsd@gmail.com>
Date: Thu, 10 Sep 2020 20:34:23 +0300
Subject: [PATCH 224/298] Update other-functions.md

---
 .../functions/other-functions.md              | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index 05247b6db7d..1c059e9f97b 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -515,6 +515,29 @@ SELECT
 └────────────────┴────────────┘
 ```
 
+## formatReadableQuantity(x) {#formatreadablequantityx}
+
+Accepts the number. Returns a rounded number with a suffix (thousand, million, billion, etc.) as a string.
+
+It is useful for reading big numbers by human.
+
+Example:
+
+``` sql
+SELECT
+    arrayJoin([1024, 1234 * 1000, (4567 * 1000) * 1000, 98765432101234]) AS number,
+    formatReadableQuantity(number) AS number_for_humans
+```
+
+``` text
+┌─────────number─┬─number_for_humans─┐
+│           1024 │ 1.02 thousand     │
+│        1234000 │ 1.23 million      │
+│     4567000000 │ 4.57 billion      │
+│ 98765432101234 │ 98.77 trillion    │
+└────────────────┴───────────────────┘
+```
+
 ## least(a, b) {#leasta-b}
 
 Returns the smallest value from a and b.

From 54491b8608043871634973b997042864ca29bc6f Mon Sep 17 00:00:00 2001
From: Ivan Blinkov <github@blinkov.ru>
Date: Thu, 10 Sep 2020 22:23:23 +0300
Subject: [PATCH 225/298] Update README.md

---
 README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/README.md b/README.md
index 7f6a102a2dd..f14f2e88886 100644
--- a/README.md
+++ b/README.md
@@ -17,5 +17,4 @@ ClickHouse is an open-source column-oriented database management system that all
 
 ## Upcoming Events		
 
-* [ClickHouse Data Integration Virtual Meetup](https://www.eventbrite.com/e/clickhouse-september-virtual-meetup-data-integration-tickets-117421895049) on September 10, 2020.
 * [ClickHouse talk at Ya.Subbotnik (in Russian)](https://ya.cc/t/cIBI-3yECj5JF) on September 12, 2020.

From f2a5216e97f1283b373720717f1d6f7ac79af86d Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Fri, 11 Sep 2020 02:24:16 +0300
Subject: [PATCH 226/298] add waiting for fsync in WAL

---
 src/Common/FileSyncGuard.h                        |  2 +-
 src/Storages/MergeTree/MergeTreeSettings.h        |  1 +
 src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp | 14 +++++++++-----
 src/Storages/MergeTree/MergeTreeWriteAheadLog.h   |  5 +++--
 utils/durability-test/durability-test.sh          | 12 ++++++++++++
 5 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/src/Common/FileSyncGuard.h b/src/Common/FileSyncGuard.h
index 5ec9b1d0c98..6451f6ebf36 100644
--- a/src/Common/FileSyncGuard.h
+++ b/src/Common/FileSyncGuard.h
@@ -17,7 +17,7 @@ public:
     FileSyncGuard(const DiskPtr & disk_, int fd_) : disk(disk_), fd(fd_) {}
 
     FileSyncGuard(const DiskPtr & disk_, const String & path)
-        : disk(disk_), fd(disk_->open(path, O_RDONLY)) {}
+        : disk(disk_), fd(disk_->open(path, O_RDWR)) {}
 
     ~FileSyncGuard()
     {
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 3f8f44dc11e..8652a6ef691 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -47,6 +47,7 @@ struct Settings;
     M(Bool, fsync_part_directory, false, "Do fsync for part directory after all part operations (writes, renames, etc.).", 0) \
     M(UInt64, write_ahead_log_bytes_to_fsync, 100ULL * 1024 * 1024, "Amount of bytes, accumulated in WAL to do fsync.", 0) \
     M(UInt64, write_ahead_log_interval_ms_to_fsync, 100, "Interval in milliseconds after which fsync for WAL is being done.", 0) \
+    M(Bool, in_memory_parts_insert_sync, false, "If true insert of part with in-memory format will wait for fsync of WAL", 0) \
     \
     /** Inserts settings. */ \
     M(UInt64, parts_to_delay_insert, 150, "If table contains at least that many active parts in single partition, artificially slow down insert into table.", 0) \
diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp
index 5cfe9017248..bc6738a8321 100644
--- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp
+++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp
@@ -33,6 +33,7 @@ MergeTreeWriteAheadLog::MergeTreeWriteAheadLog(
         std::lock_guard lock(write_mutex);
         out->sync();
         sync_scheduled = false;
+        sync_cv.notify_all();
     });
 }
 
@@ -50,7 +51,7 @@ void MergeTreeWriteAheadLog::init()
 
 void MergeTreeWriteAheadLog::addPart(const Block & block, const String & part_name)
 {
-    std::lock_guard lock(write_mutex);
+    std::unique_lock lock(write_mutex);
 
     auto part_info = MergeTreePartInfo::fromPartName(part_name, storage.format_version);
     min_block_number = std::min(min_block_number, part_info.min_block);
@@ -70,7 +71,7 @@ void MergeTreeWriteAheadLog::addPart(const Block & block, const String & part_na
 
 void MergeTreeWriteAheadLog::dropPart(const String & part_name)
 {
-    std::lock_guard lock(write_mutex);
+    std::unique_lock lock(write_mutex);
 
     writeIntBinary(static_cast<UInt8>(0), *out);
     writeIntBinary(static_cast<UInt8>(ActionType::DROP_PART), *out);
@@ -78,7 +79,7 @@ void MergeTreeWriteAheadLog::dropPart(const String & part_name)
     sync(lock);
 }
 
-void MergeTreeWriteAheadLog::rotate(const std::lock_guard<std::mutex> &)
+void MergeTreeWriteAheadLog::rotate(const std::unique_lock<std::mutex> &)
 {
     String new_name = String(WAL_FILE_NAME) + "_"
         + toString(min_block_number) + "_"
@@ -90,7 +91,7 @@ void MergeTreeWriteAheadLog::rotate(const std::lock_guard<std::mutex> &)
 
 MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore(const StorageMetadataPtr & metadata_snapshot)
 {
-    std::lock_guard lock(write_mutex);
+    std::unique_lock lock(write_mutex);
 
     MergeTreeData::MutableDataPartsVector parts;
     auto in = disk->readFile(path, DBMS_DEFAULT_BUFFER_SIZE);
@@ -185,7 +186,7 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore(const Stor
     return result;
 }
 
-void MergeTreeWriteAheadLog::sync(const std::lock_guard<std::mutex> &)
+void MergeTreeWriteAheadLog::sync(std::unique_lock<std::mutex> & lock)
 {
     size_t bytes_to_sync = storage.getSettings()->write_ahead_log_bytes_to_fsync;
     time_t time_to_sync = storage.getSettings()->write_ahead_log_interval_ms_to_fsync;
@@ -201,6 +202,9 @@ void MergeTreeWriteAheadLog::sync(const std::lock_guard<std::mutex> &)
         sync_task->scheduleAfter(time_to_sync);
         sync_scheduled = true;
     }
+
+    if (storage.getSettings()->in_memory_parts_insert_sync)
+        sync_cv.wait(lock, [this] { return !sync_scheduled; });
 }
 
 std::optional<MergeTreeWriteAheadLog::MinMaxBlockNumber>
diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h
index 43abf3c04be..c5675eac696 100644
--- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h
+++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h
@@ -44,8 +44,8 @@ public:
 
 private:
     void init();
-    void rotate(const std::lock_guard<std::mutex> & lock);
-    void sync(const std::lock_guard<std::mutex> & lock);
+    void rotate(const std::unique_lock<std::mutex> & lock);
+    void sync(std::unique_lock<std::mutex> & lock);
 
     const MergeTreeData & storage;
     DiskPtr disk;
@@ -60,6 +60,7 @@ private:
 
     BackgroundSchedulePool & pool;
     BackgroundSchedulePoolTaskHolder sync_task;
+    std::condition_variable sync_cv;
 
     size_t bytes_at_last_sync = 0;
     bool sync_scheduled = false;
diff --git a/utils/durability-test/durability-test.sh b/utils/durability-test/durability-test.sh
index c7f8936ec95..97c39473b69 100755
--- a/utils/durability-test/durability-test.sh
+++ b/utils/durability-test/durability-test.sh
@@ -1,5 +1,17 @@
 #!/bin/bash
 
+: '
+A simple test for durability. It starts up clickhouse server in qemu VM and runs
+inserts via clickhouse benchmark tool. Then it kills VM in random moment and
+checks whether table contains broken parts. With enabled fsync no broken parts
+should be appeared.
+
+Usage:
+
+./install.sh
+./durability-test.sh <table name> <file with create query> <file with insert query>
+'
+
 URL=http://cloud-images.ubuntu.com/bionic/current
 IMAGE=bionic-server-cloudimg-amd64.img
 SSH_PORT=11022

From 2873777857351a939564e7fac0a285bf8222f19c Mon Sep 17 00:00:00 2001
From: BohuTANG <overred.shuttler@gmail.com>
Date: Fri, 11 Sep 2020 12:54:53 +0800
Subject: [PATCH 227/298] Ignore unprocessed replication payload

---
 src/Core/MySQL/MySQLReplication.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/Core/MySQL/MySQLReplication.cpp b/src/Core/MySQL/MySQLReplication.cpp
index 808e9d1a247..e7f113ba7af 100644
--- a/src/Core/MySQL/MySQLReplication.cpp
+++ b/src/Core/MySQL/MySQLReplication.cpp
@@ -171,9 +171,7 @@ namespace MySQLReplication
 
         /// Ignore MySQL 8.0 optional metadata fields.
         /// https://mysqlhighavailability.com/more-metadata-is-written-into-binary-log/
-        auto remain = payload.available();
-        if (remain > CHECKSUM_CRC32_SIGNATURE_LENGTH)
-           payload.ignore(remain);
+        payload.ignoreAll();
     }
 
     /// Types that do not used in the binlog event:
@@ -915,7 +913,7 @@ namespace MySQLReplication
                 break;
             }
         }
-        payload.tryIgnore(CHECKSUM_CRC32_SIGNATURE_LENGTH);
+        payload.ignoreAll();
     }
 }
 

From da2bb4e0d3d2e642993f070b923401a9db470d81 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Fri, 11 Sep 2020 15:46:14 +0800
Subject: [PATCH 228/298] Fix missing clone in replace column transformer

---
 src/Parsers/ASTColumnsTransformers.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Parsers/ASTColumnsTransformers.cpp b/src/Parsers/ASTColumnsTransformers.cpp
index 2625a03830b..43d54f07ab8 100644
--- a/src/Parsers/ASTColumnsTransformers.cpp
+++ b/src/Parsers/ASTColumnsTransformers.cpp
@@ -110,7 +110,7 @@ void ASTColumnsReplaceTransformer::replaceChildren(ASTPtr & node, const ASTPtr &
         if (const auto * id = child->as<ASTIdentifier>())
         {
             if (id->shortName() == name)
-                child = replacement;
+                child = replacement->clone();
         }
         else
             replaceChildren(child, replacement, name);

From 3b9ab3f1be330b5ae7ffd7c68fd629ad3ebc9f6b Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 11 Sep 2020 12:23:31 +0300
Subject: [PATCH 229/298] Fix if

---
 src/Functions/if.cpp | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp
index 20848bede32..584bed3f8c5 100644
--- a/src/Functions/if.cpp
+++ b/src/Functions/if.cpp
@@ -604,7 +604,6 @@ private:
         const ColumnUInt8 * cond_col, Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count)
     {
         /// Convert both columns to the common type (if needed).
-
         const ColumnWithTypeAndName & arg1 = block.getByPosition(arguments[1]);
         const ColumnWithTypeAndName & arg2 = block.getByPosition(arguments[2]);
 
@@ -765,10 +764,22 @@ private:
         return ColumnNullable::create(materialized, ColumnUInt8::create(column->size(), 0));
     }
 
-    static ColumnPtr getNestedColumn(const ColumnPtr & column)
+    /// Return nested column recursively removing Nullable, examples:
+    /// Nullable(size = 1, Int32(size = 1), UInt8(size = 1)) -> Int32(size = 1)
+    /// Const(size = 0, Nullable(size = 1, Int32(size = 1), UInt8(size = 1))) ->
+    /// Const(size = 0, Int32(size = 1))
+    static ColumnPtr recursiveGetNestedColumnWithoutNullable(const ColumnPtr & column)
     {
         if (const auto * nullable = checkAndGetColumn<ColumnNullable>(*column))
+        {
+            /// Nullable cannot contain Nullable
             return nullable->getNestedColumnPtr();
+        }
+        else if (const auto * column_const = checkAndGetColumn<ColumnConst>(*column))
+        {
+            /// Save Constant, but remove Nullable
+            return ColumnConst::create(recursiveGetNestedColumnWithoutNullable(column_const->getDataColumnPtr()), column->size());
+        }
 
         return column;
     }
@@ -826,12 +837,12 @@ private:
             {
                 arg_cond,
                 {
-                    getNestedColumn(arg_then.column),
+                    recursiveGetNestedColumnWithoutNullable(arg_then.column),
                     removeNullable(arg_then.type),
                     ""
                 },
                 {
-                    getNestedColumn(arg_else.column),
+                    recursiveGetNestedColumnWithoutNullable(arg_else.column),
                     removeNullable(arg_else.type),
                     ""
                 },

From 5de3d9c03298101876df60eed11ca2499676faf5 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@yandex-team.ru>
Date: Fri, 11 Sep 2020 12:58:04 +0300
Subject: [PATCH 230/298] Update version_date.tsv after release 20.6.6.7

---
 utils/list-versions/version_date.tsv | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index d97fdbfa080..3915000cec3 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,5 +1,6 @@
 v20.8.2.3-stable	2020-09-08
 v20.7.2.30-stable	2020-08-31
+v20.6.6.7-stable	2020-09-11
 v20.6.5.8-stable	2020-09-03
 v20.6.4.44-stable	2020-08-20
 v20.6.3.28-stable	2020-08-07

From e25b1da29fa168b24464c83c1f661b363916afad Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 11 Sep 2020 13:53:26 +0300
Subject: [PATCH 231/298] Disable -Wstringop-overflow for gcc-10

---
 cmake/warnings.cmake                          | 11 +++++++++--
 src/Storages/MergeTree/MergeTreePartition.cpp |  8 --------
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/cmake/warnings.cmake b/cmake/warnings.cmake
index aec3e46ffa6..6b26b9b95a5 100644
--- a/cmake/warnings.cmake
+++ b/cmake/warnings.cmake
@@ -169,9 +169,16 @@ elseif (COMPILER_GCC)
     # Warn if vector operation is not implemented via SIMD capabilities of the architecture
     add_cxx_compile_options(-Wvector-operation-performance)
 
-    # XXX: gcc10 stuck with this option while compiling GatherUtils code
-    # (anyway there are builds with clang, that will warn)
     if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 10)
+        # XXX: gcc10 stuck with this option while compiling GatherUtils code
+        # (anyway there are builds with clang, that will warn)
         add_cxx_compile_options(-Wno-sequence-point)
+        # XXX: gcc10 false positive with this warning in MergeTreePartition.cpp
+        #     inlined from 'void writeHexByteLowercase(UInt8, void*)' at ../src/Common/hex.h:39:11,
+        #     inlined from 'DB::String DB::MergeTreePartition::getID(const DB::Block&) const' at ../src/Storages/MergeTree/MergeTreePartition.cpp:85:30:
+        #     ../contrib/libc-headers/x86_64-linux-gnu/bits/string_fortified.h:34:33: error: writing 2 bytes into a region of size 0 [-Werror=stringop-overflow=]
+        #     34 |   return __builtin___memcpy_chk (__dest, __src, __len, __bos0 (__dest));
+        # For some reason (bug in gcc?) macro 'GCC diagnostic ignored "-Wstringop-overflow"' doesn't help.
+        add_cxx_compile_options(-Wno-stringop-overflow)
     endif()
 endif ()
diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp
index 2802b842f54..880a3aa181d 100644
--- a/src/Storages/MergeTree/MergeTreePartition.cpp
+++ b/src/Storages/MergeTree/MergeTreePartition.cpp
@@ -29,11 +29,6 @@ String MergeTreePartition::getID(const MergeTreeData & storage) const
     return getID(storage.getInMemoryMetadataPtr()->getPartitionKey().sample_block);
 }
 
-#if defined (__GNUC__) && __GNUC__ >= 10
-    #pragma GCC diagnostic push
-    #pragma GCC diagnostic ignored "-Wstringop-overflow"
-#endif
-
 /// NOTE: This ID is used to create part names which are then persisted in ZK and as directory names on the file system.
 /// So if you want to change this method, be sure to guarantee compatibility with existing table data.
 String MergeTreePartition::getID(const Block & partition_key_sample) const
@@ -92,9 +87,6 @@ String MergeTreePartition::getID(const Block & partition_key_sample) const
     return result;
 }
 
-#if defined (__GNUC__) && __GNUC__ >= 10
-    #pragma GCC diagnostic pop
-#endif
 
 void MergeTreePartition::serializeText(const MergeTreeData & storage, WriteBuffer & out, const FormatSettings & format_settings) const
 {

From c36192db233af7ce3f971a0cd950db4cfbb6175d Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 11 Sep 2020 13:54:03 +0300
Subject: [PATCH 232/298] Remove diff

---
 src/Storages/MergeTree/MergeTreePartition.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp
index 880a3aa181d..4a846f63b7c 100644
--- a/src/Storages/MergeTree/MergeTreePartition.cpp
+++ b/src/Storages/MergeTree/MergeTreePartition.cpp
@@ -87,7 +87,6 @@ String MergeTreePartition::getID(const Block & partition_key_sample) const
     return result;
 }
 
-
 void MergeTreePartition::serializeText(const MergeTreeData & storage, WriteBuffer & out, const FormatSettings & format_settings) const
 {
     auto metadata_snapshot = storage.getInMemoryMetadataPtr();

From ebb9de1376d50e834b61b48cc2f4695513244ad9 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 11 Sep 2020 14:13:41 +0300
Subject: [PATCH 233/298] Supress strange warning

---
 src/Functions/negate.cpp | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/Functions/negate.cpp b/src/Functions/negate.cpp
index 39ca434ea89..3101513648b 100644
--- a/src/Functions/negate.cpp
+++ b/src/Functions/negate.cpp
@@ -13,7 +13,14 @@ struct NegateImpl
 
     static inline NO_SANITIZE_UNDEFINED ResultType apply(A a)
     {
-        return -static_cast<ResultType>(a);
+#if defined (__GNUC__) && __GNUC__ >= 10
+    #pragma GCC diagnostic push
+    #pragma GCC diagnostic ignored "-Wvector-operation-performance"
+#endif
+        return -(static_cast<ResultType>(a));
+#if defined (__GNUC__) && __GNUC__ >= 10
+    #pragma GCC diagnostic pop
+#endif
     }
 
 #if USE_EMBEDDED_COMPILER

From 5ce0c21bbe3c08a0f5169bced9dcea208857c88a Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 11 Sep 2020 14:24:42 +0300
Subject: [PATCH 234/298] Remove redundant change

---
 src/Functions/negate.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Functions/negate.cpp b/src/Functions/negate.cpp
index 3101513648b..de3995927d3 100644
--- a/src/Functions/negate.cpp
+++ b/src/Functions/negate.cpp
@@ -17,7 +17,7 @@ struct NegateImpl
     #pragma GCC diagnostic push
     #pragma GCC diagnostic ignored "-Wvector-operation-performance"
 #endif
-        return -(static_cast<ResultType>(a));
+        return -static_cast<ResultType>(a);
 #if defined (__GNUC__) && __GNUC__ >= 10
     #pragma GCC diagnostic pop
 #endif

From eb7e480d399788dae32dc83bd4b32dfdc6ec1aa3 Mon Sep 17 00:00:00 2001
From: Roman Bug <rrrrrroman@gmail.com>
Date: Fri, 11 Sep 2020 15:47:36 +0300
Subject: [PATCH 235/298] DOCSUP-2040: Translate PR to RU (#14551)

* DOCSUP-2040: Update by PR#1127.

* DOCSUP-2040: Update by PR#11088.

* DOCSUP-2040: Update by PR#10923.

* DOCSUP-2040: Minor fix.

* Update docs/ru/sql-reference/functions/random-functions.md

Co-authored-by: Ilya Yatsishin <2159081+qoega@users.noreply.github.com>

* Update docs/ru/sql-reference/functions/type-conversion-functions.md

Co-authored-by: Ilya Yatsishin <2159081+qoega@users.noreply.github.com>

* Update translation.

* Update docs/ru/sql-reference/functions/type-conversion-functions.md

Co-authored-by: BayoNet <da-daos@yandex.ru>

* Update docs/ru/sql-reference/functions/random-functions.md

Co-authored-by: BayoNet <da-daos@yandex.ru>

* Update docs/ru/sql-reference/functions/type-conversion-functions.md

Co-authored-by: BayoNet <da-daos@yandex.ru>

* Update docs/ru/sql-reference/functions/type-conversion-functions.md

Co-authored-by: BayoNet <da-daos@yandex.ru>

Co-authored-by: romanzhukov <romanzhukov@yandex-team.ru>
Co-authored-by: Ilya Yatsishin <2159081+qoega@users.noreply.github.com>
Co-authored-by: BayoNet <da-daos@yandex.ru>
---
 docs/ru/interfaces/formats.md                 | 20 +++-
 .../functions/random-functions.md             | 46 ++++++++++
 .../functions/type-conversion-functions.md    | 91 +++++++++++++++++++
 3 files changed, 156 insertions(+), 1 deletion(-)

diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md
index 054f75e8da8..04bca115974 100644
--- a/docs/ru/interfaces/formats.md
+++ b/docs/ru/interfaces/formats.md
@@ -28,6 +28,8 @@ ClickHouse может принимать (`INSERT`) и отдавать (`SELECT
 | [PrettySpace](#prettyspace)                                     | ✗      | ✔      |
 | [Protobuf](#protobuf)                                           | ✔      | ✔      |
 | [Parquet](#data-format-parquet)                                 | ✔      | ✔      |
+| [Arrow](#data-format-arrow)                                     | ✔      | ✔      |
+| [ArrowStream](#data-format-arrow-stream)                        | ✔      | ✔      |
 | [ORC](#data-format-orc)                                         | ✔      | ✗      |
 | [RowBinary](#rowbinary)                                         | ✔      | ✔      |
 | [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes)       | ✔      | ✔      |
@@ -947,6 +949,12 @@ ClickHouse пишет и читает сообщения `Protocol Buffers` в 
 
 ## Avro {#data-format-avro}
 
+[Apache Avro](https://avro.apache.org/) — это ориентированный на строки фреймворк для сериализации данных. Разработан в рамках проекта Apache Hadoop.
+
+В ClickHouse формат Avro поддерживает чтение и запись [файлов данных Avro](https://avro.apache.org/docs/current/spec.html#Object+Container+Files).
+
+[Логические типы Avro](https://avro.apache.org/docs/current/spec.html#Logical+Types)
+
 ## AvroConfluent {#data-format-avro-confluent}
 
 Для формата `AvroConfluent` ClickHouse поддерживает декодирование сообщений `Avro` с одним объектом. Такие сообщения используются с [Kafka] (http://kafka.apache.org/) и  реестром схем [Confluent](https://docs.confluent.io/current/schema-registry/index.html). 
@@ -996,7 +1004,7 @@ SELECT * FROM topic1_stream;
 
 ## Parquet {#data-format-parquet}
 
-[Apache Parquet](http://parquet.apache.org/) — формат поколоночного хранения данных, который распространён в экосистеме Hadoop. Для формата `Parquet` ClickHouse поддерживает операции чтения и записи.
+[Apache Parquet](https://parquet.apache.org/) — формат поколоночного хранения данных, который распространён в экосистеме Hadoop. Для формата `Parquet` ClickHouse поддерживает операции чтения и записи.
 
 ### Соответствие типов данных {#sootvetstvie-tipov-dannykh}
 
@@ -1042,6 +1050,16 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_
 
 Для обмена данными с экосистемой Hadoop можно использовать движки таблиц [HDFS](../engines/table-engines/integrations/hdfs.md).
 
+## Arrow {data-format-arrow}
+
+[Apache Arrow](https://arrow.apache.org/) поставляется с двумя встроенными поколоночнами форматами хранения. ClickHouse поддерживает операции чтения и записи для этих форматов.
+
+`Arrow` — это Apache Arrow's "file mode" формат. Он предназначен для произвольного доступа в памяти.
+
+## ArrowStream {data-format-arrow-stream}
+
+`ArrowStream` — это Apache Arrow's "stream mode" формат. Он предназначен для обработки потоков в памяти.
+
 ## ORC {#data-format-orc}
 
 [Apache ORC](https://orc.apache.org/) - это column-oriented формат данных, распространённый в экосистеме Hadoop. Вы можете только вставлять данные этого формата в ClickHouse.
diff --git a/docs/ru/sql-reference/functions/random-functions.md b/docs/ru/sql-reference/functions/random-functions.md
index b425505b69d..4aaaef5cb5d 100644
--- a/docs/ru/sql-reference/functions/random-functions.md
+++ b/docs/ru/sql-reference/functions/random-functions.md
@@ -55,4 +55,50 @@ FROM numbers(3)
 └────────────┴────────────┴──────────────┴────────────────┴─────────────────┴──────────────────────┘
 ```
 
+# Случайные функции для работы со строками {#random-functions-for-working-with-strings}
+
+## randomString {#random-string}
+
+## randomFixedString {#random-fixed-string}
+
+## randomPrintableASCII {#random-printable-ascii}
+
+## randomStringUTF8 {#random-string-utf8}
+
+## fuzzBits {#fuzzbits}
+
+**Синтаксис**
+
+``` sql
+fuzzBits([s], [prob])
+```
+Инвертирует каждый бит `s` с вероятностью `prob`.
+
+**Параметры**
+
+- `s` — `String` or `FixedString`
+- `prob` — constant `Float32/64`
+
+**Возвращаемое значение**
+
+Измененная случайным образом строка с тем же типом, что и `s`.
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT fuzzBits(materialize('abacaba'), 0.1)
+FROM numbers(3)
+```
+
+Результат:
+
+``` text
+┌─fuzzBits(materialize('abacaba'), 0.1)─┐
+│ abaaaja                               │
+│ a*cjab+                               │
+│ aeca2A                                │
+└───────────────────────────────────────┘
+
 [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/random_functions/) <!--hide-->
diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md
index 41ded78055c..c7d74a9d881 100644
--- a/docs/ru/sql-reference/functions/type-conversion-functions.md
+++ b/docs/ru/sql-reference/functions/type-conversion-functions.md
@@ -513,4 +513,95 @@ SELECT parseDateTimeBestEffort('10 20:19')
 -   [toDate](#todate)
 -   [toDateTime](#todatetime)
 
+## toUnixTimestamp64Milli
+## toUnixTimestamp64Micro
+## toUnixTimestamp64Nano
+
+Преобразует значение `DateTime64` в значение `Int64` с фиксированной точностью менее одной секунды. 
+Входное значение округляется соответствующим образом вверх или вниз в зависимости от его точности. Обратите внимание, что возвращаемое значение - это временная метка в UTC, а не в часовом поясе `DateTime64`.
+
+**Синтаксис**
+
+``` sql
+toUnixTimestamp64Milli(value)
+```
+
+**Параметры**
+
+-   `value` — значение `DateTime64` с любой точностью.
+
+**Возвращаемое значение**
+
+-   Значение `value`, преобразованное в тип данных `Int64`.
+
+**Примеры**
+
+Запрос:
+
+``` sql
+WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
+SELECT toUnixTimestamp64Milli(dt64)
+```
+
+Ответ:
+
+``` text
+┌─toUnixTimestamp64Milli(dt64)─┐
+│                1568650812345 │
+└──────────────────────────────┘
+```
+
+Запрос: 
+
+``` sql
+WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
+SELECT toUnixTimestamp64Nano(dt64)
+```
+
+Ответ:
+
+``` text
+┌─toUnixTimestamp64Nano(dt64)─┐
+│         1568650812345678000 │
+└─────────────────────────────┘
+```
+
+## fromUnixTimestamp64Milli
+## fromUnixTimestamp64Micro
+## fromUnixTimestamp64Nano
+
+Преобразует значение `Int64` в значение `DateTime64` с фиксированной точностью менее одной секунды и дополнительным часовым поясом. Входное значение округляется соответствующим образом вверх или вниз в зависимости от его точности. Обратите внимание, что входное значение обрабатывается как метка времени UTC, а не метка времени в заданном (или неявном) часовом поясе.
+
+**Синтаксис**
+
+``` sql
+fromUnixTimestamp64Milli(value [, ti])
+```
+
+**Параметры**
+
+-   `value` — значение типы `Int64` с любой точностью.
+-   `timezone` — (не обязательный параметр) часовой пояс в формате `String` для возвращаемого результата.
+
+**Возвращаемое значение**
+
+-   Значение `value`, преобразованное в тип данных `DateTime64`.
+
+**Пример**
+
+Запрос:
+
+``` sql
+WITH CAST(1234567891011, 'Int64') AS i64
+SELECT fromUnixTimestamp64Milli(i64, 'UTC')
+```
+
+Ответ:
+
+``` text
+┌─fromUnixTimestamp64Milli(i64, 'UTC')─┐
+│              2009-02-13 23:31:31.011 │
+└──────────────────────────────────────┘
+```
+
 [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/type_conversion_functions/) <!--hide-->

From a64331d79f04bb9321383269150fe8302289e9b2 Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Fri, 11 Sep 2020 16:09:26 +0300
Subject: [PATCH 236/298] fix syncing of WAL

---
 src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp | 7 +++++++
 src/Storages/MergeTree/MergeTreeWriteAheadLog.h   | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp
index bc6738a8321..35fadb999b4 100644
--- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp
+++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp
@@ -37,6 +37,13 @@ MergeTreeWriteAheadLog::MergeTreeWriteAheadLog(
     });
 }
 
+MergeTreeWriteAheadLog::~MergeTreeWriteAheadLog()
+{
+    std::unique_lock lock(write_mutex);
+    if (sync_scheduled)
+        sync_cv.wait(lock, [this] { return !sync_scheduled; });
+}
+
 void MergeTreeWriteAheadLog::init()
 {
     out = disk->writeFile(path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append);
diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h
index c5675eac696..77c7c7e11e7 100644
--- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h
+++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h
@@ -35,6 +35,8 @@ public:
     MergeTreeWriteAheadLog(MergeTreeData & storage_, const DiskPtr & disk_,
         const String & name = DEFAULT_WAL_FILE_NAME);
 
+    ~MergeTreeWriteAheadLog();
+
     void addPart(const Block & block, const String & part_name);
     void dropPart(const String & part_name);
     std::vector<MergeTreeMutableDataPartPtr> restore(const StorageMetadataPtr & metadata_snapshot);

From 6dd764bcfe5b28d3ccb89ca950558bd82c911847 Mon Sep 17 00:00:00 2001
From: Artem Zuikov <chertus@gmail.com>
Date: Fri, 11 Sep 2020 18:12:08 +0300
Subject: [PATCH 237/298] FunctionBinaryArithmetic refactoring (#14712)

---
 src/Functions/FunctionBinaryArithmetic.h | 165 ++++++++++-------------
 src/Functions/divide.cpp                 |   1 -
 src/Functions/minus.cpp                  |   1 -
 src/Functions/multiply.cpp               |   1 -
 src/Functions/plus.cpp                   |   2 +-
 5 files changed, 72 insertions(+), 98 deletions(-)

diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h
index 15b6ea6ca5d..ca0cc876035 100644
--- a/src/Functions/FunctionBinaryArithmetic.h
+++ b/src/Functions/FunctionBinaryArithmetic.h
@@ -561,6 +561,8 @@ public:
 template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true>
 class FunctionBinaryArithmetic : public IFunction
 {
+    static constexpr const bool is_plus = IsOperation<Op>::plus;
+    static constexpr const bool is_minus = IsOperation<Op>::minus;
     static constexpr const bool is_multiply = IsOperation<Op>::multiply;
     static constexpr const bool is_division = IsOperation<Op>::division;
 
@@ -612,9 +614,7 @@ class FunctionBinaryArithmetic : public IFunction
         /// Special case when the function is plus or minus, one of arguments is Date/DateTime and another is Interval.
         /// We construct another function (example: addMonths) and call it.
 
-        static constexpr bool function_is_plus = IsOperation<Op>::plus;
-        static constexpr bool function_is_minus = IsOperation<Op>::minus;
-        if constexpr (!function_is_plus && !function_is_minus)
+        if constexpr (!is_plus && !is_minus)
             return {};
 
         const DataTypePtr & type_time = first_is_date_or_datetime ? type0 : type1;
@@ -631,21 +631,21 @@ class FunctionBinaryArithmetic : public IFunction
                 return {};
         }
 
-        if (second_is_date_or_datetime && function_is_minus)
+        if (second_is_date_or_datetime && is_minus)
             throw Exception("Wrong order of arguments for function " + getName() + ": argument of type Interval cannot be first.",
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
         std::string function_name;
         if (interval_data_type)
         {
-            function_name = String(function_is_plus ? "add" : "subtract") + interval_data_type->getKind().toString() + 's';
+            function_name = String(is_plus ? "add" : "subtract") + interval_data_type->getKind().toString() + 's';
         }
         else
         {
             if (isDate(type_time))
-                function_name = function_is_plus ? "addDays" : "subtractDays";
+                function_name = is_plus ? "addDays" : "subtractDays";
             else
-                function_name = function_is_plus ? "addSeconds" : "subtractSeconds";
+                function_name = is_plus ? "addSeconds" : "subtractSeconds";
         }
 
         return FunctionFactory::instance().get(function_name, context);
@@ -653,7 +653,7 @@ class FunctionBinaryArithmetic : public IFunction
 
     bool isAggregateMultiply(const DataTypePtr & type0, const DataTypePtr & type1) const
     {
-        if constexpr (!IsOperation<Op>::multiply)
+        if constexpr (!is_multiply)
             return false;
 
         WhichDataType which0(type0);
@@ -665,7 +665,7 @@ class FunctionBinaryArithmetic : public IFunction
 
     bool isAggregateAddition(const DataTypePtr & type0, const DataTypePtr & type1) const
     {
-        if constexpr (!IsOperation<Op>::plus)
+        if constexpr (!is_plus)
             return false;
 
         WhichDataType which0(type0);
@@ -994,8 +994,6 @@ public:
 
         if constexpr (!std::is_same_v<ResultDataType, InvalidType>)
         {
-            constexpr bool result_is_decimal = IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>;
-
             using T0 = typename LeftDataType::FieldType;
             using T1 = typename RightDataType::FieldType;
             using ResultType = typename ResultDataType::FieldType;
@@ -1003,112 +1001,91 @@ public:
             using ColVecT1 = std::conditional_t<IsDecimalNumber<T1>, ColumnDecimal<T1>, ColumnVector<T1>>;
             using ColVecResult = std::conditional_t<IsDecimalNumber<ResultType>, ColumnDecimal<ResultType>, ColumnVector<ResultType>>;
 
-            /// Decimal operations need scale. Operations are on result type.
-            using OpImpl = std::conditional_t<IsDataTypeDecimal<ResultDataType>,
-                DecimalBinaryOperation<T0, T1, Op, ResultType>,
-                BinaryOperationImpl<T0, T1, Op<T0, T1>, ResultType>>;
-
             auto col_left_raw = block.getByPosition(arguments[0]).column.get();
             auto col_right_raw = block.getByPosition(arguments[1]).column.get();
-            if (auto col_left = checkAndGetColumnConst<ColVecT0>(col_left_raw))
-            {
-                if (auto col_right = checkAndGetColumnConst<ColVecT1>(col_right_raw))
-                {
-                    /// the only case with a non-vector result
-                    if constexpr (result_is_decimal)
-                    {
-                        ResultDataType type = decimalResultType<is_multiply, is_division>(left, right);
-                        typename ResultDataType::FieldType scale_a = type.scaleFactorFor(left, is_multiply);
-                        typename ResultDataType::FieldType scale_b = type.scaleFactorFor(right, is_multiply || is_division);
-                        if constexpr (IsDataTypeDecimal<RightDataType> && is_division)
-                            scale_a = right.getScaleMultiplier();
 
-                        auto res = OpImpl::constantConstant(col_left->template getValue<T0>(), col_right->template getValue<T1>(),
-                                                                scale_a, scale_b, check_decimal_overflow);
-                        block.getByPosition(result).column =
-                            ResultDataType(type.getPrecision(), type.getScale()).createColumnConst(
-                                col_left->size(), toField(res, type.getScale()));
-
-                    }
-                    else
-                    {
-                        auto res = OpImpl::constantConstant(col_left->template getValue<T0>(), col_right->template getValue<T1>());
-                        block.getByPosition(result).column = ResultDataType().createColumnConst(col_left->size(), toField(res));
-                    }
-                    return true;
-                }
-            }
+            auto col_left_const = checkAndGetColumnConst<ColVecT0>(col_left_raw);
+            auto col_right_const = checkAndGetColumnConst<ColVecT1>(col_right_raw);
 
             typename ColVecResult::MutablePtr col_res = nullptr;
-            if constexpr (result_is_decimal)
+
+            auto col_left = checkAndGetColumn<ColVecT0>(col_left_raw);
+            auto col_right = checkAndGetColumn<ColVecT1>(col_right_raw);
+
+            if constexpr (IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>)
             {
+                using OpImpl = DecimalBinaryOperation<T0, T1, Op, ResultType>;
+
                 ResultDataType type = decimalResultType<is_multiply, is_division>(left, right);
-                col_res = ColVecResult::create(0, type.getScale());
-            }
-            else
-                col_res = ColVecResult::create();
 
-            auto & vec_res = col_res->getData();
-            vec_res.resize(block.rows());
+                typename ResultDataType::FieldType scale_a = type.scaleFactorFor(left, is_multiply);
+                typename ResultDataType::FieldType scale_b = type.scaleFactorFor(right, is_multiply || is_division);
+                if constexpr (IsDataTypeDecimal<RightDataType> && is_division)
+                    scale_a = right.getScaleMultiplier();
 
-            if (auto col_left_const = checkAndGetColumnConst<ColVecT0>(col_left_raw))
-            {
-                if (auto col_right = checkAndGetColumn<ColVecT1>(col_right_raw))
+                /// non-vector result
+                if (col_left_const && col_right_const)
                 {
-                    if constexpr (result_is_decimal)
-                    {
-                        ResultDataType type = decimalResultType<is_multiply, is_division>(left, right);
+                    auto res = OpImpl::constantConstant(col_left_const->template getValue<T0>(), col_right_const->template getValue<T1>(),
+                                                        scale_a, scale_b, check_decimal_overflow);
 
-                        typename ResultDataType::FieldType scale_a = type.scaleFactorFor(left, is_multiply);
-                        typename ResultDataType::FieldType scale_b = type.scaleFactorFor(right, is_multiply || is_division);
-                        if constexpr (IsDataTypeDecimal<RightDataType> && is_division)
-                            scale_a = right.getScaleMultiplier();
+                    block.getByPosition(result).column = ResultDataType(type.getPrecision(), type.getScale()).createColumnConst(
+                            col_left_const->size(), toField(res, type.getScale()));
+                    return true;
+                }
 
-                        OpImpl::constantVector(col_left_const->template getValue<T0>(), col_right->getData(), vec_res,
-                                                scale_a, scale_b, check_decimal_overflow);
-                    }
-                    else
-                        OpImpl::constantVector(col_left_const->template getValue<T0>(), col_right->getData().data(), vec_res.data(), vec_res.size());
+                col_res = ColVecResult::create(0, type.getScale());
+                auto & vec_res = col_res->getData();
+                vec_res.resize(block.rows());
+
+                if (col_left && col_right)
+                {
+                    OpImpl::vectorVector(col_left->getData(), col_right->getData(), vec_res, scale_a, scale_b, check_decimal_overflow);
+                }
+                else if (col_left_const && col_right)
+                {
+                    OpImpl::constantVector(col_left_const->template getValue<T0>(), col_right->getData(), vec_res,
+                                           scale_a, scale_b, check_decimal_overflow);
+                }
+                else if (col_left && col_right_const)
+                {
+                    OpImpl::vectorConstant(col_left->getData(), col_right_const->template getValue<T1>(), vec_res,
+                                           scale_a, scale_b, check_decimal_overflow);
                 }
                 else
                     return false;
             }
-            else if (auto col_left = checkAndGetColumn<ColVecT0>(col_left_raw))
+            else
             {
-                if constexpr (result_is_decimal)
+                using OpImpl = BinaryOperationImpl<T0, T1, Op<T0, T1>, ResultType>;
+
+                /// non-vector result
+                if (col_left_const && col_right_const)
                 {
-                    ResultDataType type = decimalResultType<is_multiply, is_division>(left, right);
+                    auto res = OpImpl::constantConstant(col_left_const->template getValue<T0>(), col_right_const->template getValue<T1>());
+                    block.getByPosition(result).column = ResultDataType().createColumnConst(col_left_const->size(), toField(res));
+                    return true;
+                }
 
-                    typename ResultDataType::FieldType scale_a = type.scaleFactorFor(left, is_multiply);
-                    typename ResultDataType::FieldType scale_b = type.scaleFactorFor(right, is_multiply || is_division);
-                    if constexpr (IsDataTypeDecimal<RightDataType> && is_division)
-                        scale_a = right.getScaleMultiplier();
+                col_res = ColVecResult::create();
+                auto & vec_res = col_res->getData();
+                vec_res.resize(block.rows());
 
-                    if (auto col_right = checkAndGetColumn<ColVecT1>(col_right_raw))
-                    {
-                        OpImpl::vectorVector(col_left->getData(), col_right->getData(), vec_res, scale_a, scale_b,
-                                              check_decimal_overflow);
-                    }
-                    else if (auto col_right_const = checkAndGetColumnConst<ColVecT1>(col_right_raw))
-                    {
-                        OpImpl::vectorConstant(col_left->getData(), col_right_const->template getValue<T1>(), vec_res,
-                                                scale_a, scale_b, check_decimal_overflow);
-                    }
-                    else
-                        return false;
+                if (col_left && col_right)
+                {
+                    OpImpl::vectorVector(col_left->getData().data(), col_right->getData().data(), vec_res.data(), vec_res.size());
+                }
+                else if (col_left_const && col_right)
+                {
+                    OpImpl::constantVector(col_left_const->template getValue<T0>(), col_right->getData().data(), vec_res.data(), vec_res.size());
+                }
+                else if (col_left && col_right_const)
+                {
+                    OpImpl::vectorConstant(col_left->getData().data(), col_right_const->template getValue<T1>(), vec_res.data(), vec_res.size());
                 }
                 else
-                {
-                    if (auto col_right = checkAndGetColumn<ColVecT1>(col_right_raw))
-                        OpImpl::vectorVector(col_left->getData().data(), col_right->getData().data(), vec_res.data(), vec_res.size());
-                    else if (auto col_right_const = checkAndGetColumnConst<ColVecT1>(col_right_raw))
-                        OpImpl::vectorConstant(col_left->getData().data(), col_right_const->template getValue<T1>(), vec_res.data(), vec_res.size());
-                    else
-                        return false;
-                }
+                    return false;
             }
-            else
-                return false;
 
             block.getByPosition(result).column = std::move(col_res);
             return true;
diff --git a/src/Functions/divide.cpp b/src/Functions/divide.cpp
index cfc535320ed..98bfec08ccd 100644
--- a/src/Functions/divide.cpp
+++ b/src/Functions/divide.cpp
@@ -13,7 +13,6 @@ template <typename A, typename B>
 struct DivideFloatingImpl
 {
     using ResultType = typename NumberTraits::ResultOfFloatingPointDivision<A, B>::Type;
-    static const constexpr bool allow_decimal = true;
     static const constexpr bool allow_fixed_string = false;
 
     template <typename Result = ResultType>
diff --git a/src/Functions/minus.cpp b/src/Functions/minus.cpp
index cacde3936d9..e362855c206 100644
--- a/src/Functions/minus.cpp
+++ b/src/Functions/minus.cpp
@@ -9,7 +9,6 @@ template <typename A, typename B>
 struct MinusImpl
 {
     using ResultType = typename NumberTraits::ResultOfSubtraction<A, B>::Type;
-    static const constexpr bool allow_decimal = true;
     static const constexpr bool allow_fixed_string = false;
 
     template <typename Result = ResultType>
diff --git a/src/Functions/multiply.cpp b/src/Functions/multiply.cpp
index 7018da843b6..62cbdb49ffb 100644
--- a/src/Functions/multiply.cpp
+++ b/src/Functions/multiply.cpp
@@ -9,7 +9,6 @@ template <typename A, typename B>
 struct MultiplyImpl
 {
     using ResultType = typename NumberTraits::ResultOfAdditionMultiplication<A, B>::Type;
-    static const constexpr bool allow_decimal = true;
     static const constexpr bool allow_fixed_string = false;
 
     template <typename Result = ResultType>
diff --git a/src/Functions/plus.cpp b/src/Functions/plus.cpp
index 1421bfcd4c6..16b5bfba309 100644
--- a/src/Functions/plus.cpp
+++ b/src/Functions/plus.cpp
@@ -9,8 +9,8 @@ template <typename A, typename B>
 struct PlusImpl
 {
     using ResultType = typename NumberTraits::ResultOfAdditionMultiplication<A, B>::Type;
-    static const constexpr bool allow_decimal = true;
     static const constexpr bool allow_fixed_string = false;
+    static const constexpr bool is_commutative = true;
 
     template <typename Result = ResultType>
     static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b)

From 7bbf7b295095cf6b9315ae9533b82d5ef9e519bc Mon Sep 17 00:00:00 2001
From: Vxider <lb@vxider.com>
Date: Fri, 11 Sep 2020 23:26:01 +0800
Subject: [PATCH 238/298] improvement chinese translation of remote.md

---
 .../sql-reference/table-functions/remote.md   | 29 +++++++------------
 1 file changed, 11 insertions(+), 18 deletions(-)

diff --git a/docs/zh/sql-reference/table-functions/remote.md b/docs/zh/sql-reference/table-functions/remote.md
index 1125353e2fa..3ec1da3cd2c 100644
--- a/docs/zh/sql-reference/table-functions/remote.md
+++ b/docs/zh/sql-reference/table-functions/remote.md
@@ -1,13 +1,6 @@
----
-machine_translated: true
-machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
-toc_priority: 40
-toc_title: "\u8FDC\u7A0B"
----
-
 # 远程，远程安全 {#remote-remotesecure}
 
-允许您访问远程服务器，而无需创建 `Distributed` 桌子
+允许您访问远程服务器，而无需创建 `Distributed` 表
 
 签名:
 
@@ -18,10 +11,10 @@ remoteSecure('addresses_expr', db, table[, 'user'[, 'password']])
 remoteSecure('addresses_expr', db.table[, 'user'[, 'password']])
 ```
 
-`addresses_expr` – An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port`，或者只是 `host`. 主机可以指定为服务器名称，也可以指定为IPv4或IPv6地址。 IPv6地址在方括号中指定。 端口是远程服务器上的TCP端口。 如果省略端口，它使用 `tcp_port` 从服务器的配置文件（默认情况下，9000）。
+`addresses_expr` – 代表远程服务器地址的一个表达式。可以只是单个服务器地址。 服务器地址可以是 `host:port` 或 `host`。`host` 可以指定为服务器域名，或是IPV4或IPV6地址。IPv6地址在方括号中指定。`port` 是远程服务器上的TCP端口。 如果省略端口，则使用服务器配置文件中的 `tcp_port` （默认情况为，9000）。
 
 !!! important "重要事项"
-    IPv6地址需要该端口。
+    IPv6地址需要指定端口。
 
 例:
 
@@ -34,7 +27,7 @@ localhost
 [2a02:6b8:0:1111::11]:9000
 ```
 
-多个地址可以用逗号分隔。 在这种情况下，ClickHouse将使用分布式处理，因此它将将查询发送到所有指定的地址（如具有不同数据的分片）。
+多个地址可以用逗号分隔。在这种情况下，ClickHouse将使用分布式处理，因此它将将查询发送到所有指定的地址（如具有不同数据的分片）。
 
 示例:
 
@@ -56,7 +49,7 @@ example01-{01..02}-1
 
 如果您有多对大括号，它会生成相应集合的直接乘积。
 
-大括号中的地址和部分地址可以用管道符号(\|)分隔。 在这种情况下，相应的地址集被解释为副本，并且查询将被发送到第一个正常副本。 但是，副本将按照当前设置的顺序进行迭代 [load\_balancing](../../operations/settings/settings.md) 设置。
+大括号中的地址和部分地址可以用管道符号(\|)分隔。 在这种情况下，相应的地址集被解释为副本，并且查询将被发送到第一个正常副本。 但是，副本将按照当前[load\_balancing](../../operations/settings/settings.md)设置的顺序进行迭代。
 
 示例:
 
@@ -66,20 +59,20 @@ example01-{01..02}-{1|2}
 
 此示例指定两个分片，每个分片都有两个副本。
 
-生成的地址数由常量限制。 现在这是1000个地址。
+生成的地址数由常量限制。目前这是1000个地址。
 
-使用 `remote` 表函数比创建一个不太优化 `Distributed` 表，因为在这种情况下，服务器连接被重新建立为每个请求。 此外，如果设置了主机名，则会解析这些名称，并且在使用各种副本时不会计算错误。 在处理大量查询时，始终创建 `Distributed` 表的时间提前，不要使用 `remote` 表功能。
+使用 `remote` 表函数没有创建一个 `Distributed` 表更优，因为在这种情况下，将为每个请求重新建立服务器连接。此外，如果设置了主机名，则会解析这些名称，并且在使用各种副本时不会计算错误。 在处理大量查询时，始终优先创建 `Distributed` 表，不要使用 `remote` 表功能。
 
 该 `remote` 表函数可以在以下情况下是有用的:
 
 -   访问特定服务器进行数据比较、调试和测试。
--   查询之间的各种ClickHouse群集用于研究目的。
--   手动发出的罕见分布式请求。
+-   在多个ClickHouse集群之间的用户研究目的的查询。
+-   手动发出的不频繁分布式请求。
 -   每次重新定义服务器集的分布式请求。
 
-如果未指定用户, `default` 被使用。
+如果未指定用户, 将会使用`default`。
 如果未指定密码，则使用空密码。
 
-`remoteSecure` -相同 `remote` but with secured connection. Default port — [tcp\_port\_secure](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) 从配置或9440.
+`remoteSecure` - 与 `remote` 相同，但是会使用加密链接。默认端口 — [tcp\_port\_secure](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) 配置文件或或9440.
 
 [原始文章](https://clickhouse.tech/docs/en/query_language/table_functions/remote/) <!--hide-->

From d9394fbf66b5313d5c07bfc3d2e9119837516525 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 11 Sep 2020 18:51:08 +0300
Subject: [PATCH 239/298] Fix code

---
 src/Core/MultiEnum.h    | 4 ++--
 tests/ci/ci_config.json | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Core/MultiEnum.h b/src/Core/MultiEnum.h
index 748550a8779..ddfc5b13e86 100644
--- a/src/Core/MultiEnum.h
+++ b/src/Core/MultiEnum.h
@@ -83,13 +83,13 @@ struct MultiEnum
     template <typename ValueType, typename = std::enable_if_t<std::is_convertible_v<ValueType, StorageType>>>
     friend bool operator==(ValueType left, MultiEnum right)
     {
-        return right == left;
+        return right.operator==(left);
     }
 
     template <typename L>
     friend bool operator!=(L left, MultiEnum right)
     {
-        return !(right == left);
+        return !(right.operator==(left));
     }
 
 private:
diff --git a/tests/ci/ci_config.json b/tests/ci/ci_config.json
index adb736a8df3..9a11a06db0d 100644
--- a/tests/ci/ci_config.json
+++ b/tests/ci/ci_config.json
@@ -92,7 +92,7 @@
             "with_coverage": false
         },
         {
-            "compiler": "gcc-10",
+            "compiler": "gcc-9",
             "build-type": "",
             "sanitizer": "",
             "package-type": "deb",

From 31dbfd07e22a307992ac868590eb8a794178630d Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Fri, 11 Sep 2020 16:16:24 +0000
Subject: [PATCH 240/298] remove tests crash reason

---
 tests/integration/test_storage_rabbitmq/test.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py
index c5b65d60de6..370515956ea 100644
--- a/tests/integration/test_storage_rabbitmq/test.py
+++ b/tests/integration/test_storage_rabbitmq/test.py
@@ -24,8 +24,8 @@ import rabbitmq_pb2
 cluster = ClickHouseCluster(__file__)
 instance = cluster.add_instance('instance',
                                 main_configs=['configs/rabbitmq.xml','configs/log_conf.xml'],
-                                with_rabbitmq=True,
-                                clickhouse_path_dir='clickhouse_path')
+                                with_rabbitmq=True)
+#                                clickhouse_path_dir='clickhouse_path')
 rabbitmq_id = ''
 
 
@@ -431,6 +431,7 @@ def test_rabbitmq_many_materialized_views(rabbitmq_cluster):
     rabbitmq_check_result(result2, True)
 
 
+@pytest.mark.skip(reason="clichouse_path with rabbitmq.proto fails to be exported")
 @pytest.mark.timeout(180)
 def test_rabbitmq_protobuf(rabbitmq_cluster):
     instance.query('''

From b96da75ead4e291d3ca6f9785ebe9b361688f311 Mon Sep 17 00:00:00 2001
From: nikitamikhaylov <mikhaylovnikitka@gmail.com>
Date: Fri, 11 Sep 2020 19:44:14 +0300
Subject: [PATCH 241/298] done

---
 tests/queries/0_stateless/arcadia_skip_list.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt
index 71e67c811cd..aa8a9f48ce9 100644
--- a/tests/queries/0_stateless/arcadia_skip_list.txt
+++ b/tests/queries/0_stateless/arcadia_skip_list.txt
@@ -141,3 +141,4 @@
 01460_DistributedFilesToInsert
 01474_executable_dictionary
 01474_bad_global_join
+01473_event_time_microseconds

From 489b9c80aca2099e16b0d7380341f69b9633edd9 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 9 Sep 2020 15:18:16 +0300
Subject: [PATCH 242/298] Starting steps

---
 src/Parsers/ASTAlterQuery.h      | 18 ++++++++++++++++++
 src/Parsers/ParserAlterQuery.cpp | 24 ++++++++++++++++--------
 2 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h
index df27ba0a3b0..00350d4efa1 100644
--- a/src/Parsers/ASTAlterQuery.h
+++ b/src/Parsers/ASTAlterQuery.h
@@ -28,11 +28,13 @@ public:
         ADD_COLUMN,
         DROP_COLUMN,
         MODIFY_COLUMN,
+        MODIFY_COLUMN_REMOVE_PROPERTY,
         COMMENT_COLUMN,
         RENAME_COLUMN,
         MODIFY_ORDER_BY,
         MODIFY_SAMPLE_BY,
         MODIFY_TTL,
+        REMOVE_TABLE_TTL,
         MATERIALIZE_TTL,
         MODIFY_SETTING,
         MODIFY_QUERY,
@@ -61,6 +63,20 @@ public:
         LIVE_VIEW_REFRESH,
     };
 
+    /// Which property user wants to remove from column
+    enum RemoveProperty
+    {
+        /// Default specifiers
+        DEFAULT,
+        MATERIALIZED,
+        ALIAS,
+
+        /// Other properties
+        COMMENT,
+        CODEC,
+        TTL
+    };
+
     Type type = NO_TYPE;
 
     /** The ADD COLUMN query stores the name and type of the column to add
@@ -167,6 +183,8 @@ public:
     /// Target column name
     ASTPtr rename_to;
 
+    RemoveProperty to_remove;
+
     String getID(char delim) const override { return "AlterCommand" + (delim + std::to_string(static_cast<int>(type))); }
 
     ASTPtr clone() const override;
diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp
index 9930bb649b4..b7bde35139b 100644
--- a/src/Parsers/ParserAlterQuery.cpp
+++ b/src/Parsers/ParserAlterQuery.cpp
@@ -82,6 +82,8 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
     ParserKeyword s_where("WHERE");
     ParserKeyword s_to("TO");
 
+    ParserKeyword s_remove("REMOVE");
+
     ParserCompoundIdentifier parser_name;
     ParserStringLiteral parser_string_literal;
     ParserCompoundColumnDeclaration parser_col_decl;
@@ -430,18 +432,24 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
             if (s_if_exists.ignore(pos, expected))
                 command->if_exists = true;
 
-            if (!parser_modify_col_decl.parse(pos, command->col_decl, expected))
-                return false;
-
-            if (s_first.ignore(pos, expected))
-                command->first = true;
-            else if (s_after.ignore(pos, expected))
+            if (s_remove.ignore(pos, expected))
             {
-                if (!parser_name.parse(pos, command->column, expected))
+            }
+            else
+            {
+                if (!parser_modify_col_decl.parse(pos, command->col_decl, expected))
                     return false;
+
+                if (s_first.ignore(pos, expected))
+                    command->first = true;
+                else if (s_after.ignore(pos, expected))
+                {
+                    if (!parser_name.parse(pos, command->column, expected))
+                        return false;
+                }
+                command->type = ASTAlterCommand::MODIFY_COLUMN;
             }
 
-            command->type = ASTAlterCommand::MODIFY_COLUMN;
         }
         else if (s_modify_order_by.ignore(pos, expected))
         {

From a5f889987412404de0b5578492957e745c86782e Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 11 Sep 2020 16:44:48 +0300
Subject: [PATCH 243/298] First implementation

---
 src/Parsers/ASTAlterQuery.cpp    |  49 +++++++++++--
 src/Parsers/ASTAlterQuery.h      |  34 ++++-----
 src/Parsers/ParserAlterQuery.cpp |  36 +++++++--
 src/Storages/AlterCommands.cpp   | 121 +++++++++++++++++++++++++------
 src/Storages/AlterCommands.h     |  12 +--
 5 files changed, 193 insertions(+), 59 deletions(-)

diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp
index d033cdc79a2..b7a55c58714 100644
--- a/src/Parsers/ASTAlterQuery.cpp
+++ b/src/Parsers/ASTAlterQuery.cpp
@@ -99,12 +99,42 @@ void ASTAlterCommand::formatImpl(
         settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY COLUMN " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : "");
         col_decl->formatImpl(settings, state, frame);
 
-        if (first)
-            settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " FIRST " << (settings.hilite ? hilite_none : "");
-        else if (column)    /// AFTER
+        if (to_remove != RemoveProperty::NO_PROPERTY)
         {
-            settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " AFTER " << (settings.hilite ? hilite_none : "");
-            column->formatImpl(settings, state, frame);
+            settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "REMOVE ";
+            switch (to_remove)
+            {
+            case RemoveProperty::DEFAULT:
+                settings.ostr << "DEFAULT";
+                break;
+            case RemoveProperty::MATERIALIZED:
+                settings.ostr << "MATERIALIZED";
+                break;
+            case RemoveProperty::ALIAS:
+                settings.ostr << "ALIAS";
+                break;
+            case RemoveProperty::COMMENT:
+                settings.ostr << "COMMENT";
+                break;
+            case RemoveProperty::CODEC:
+                settings.ostr << "CODEC";
+                break;
+            case RemoveProperty::TTL:
+                settings.ostr << "TTL";
+                break;
+            default:
+                __builtin_unreachable();
+            }
+        }
+        else
+        {
+            if (first)
+                settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " FIRST " << (settings.hilite ? hilite_none : "");
+            else if (column)    /// AFTER
+            {
+                settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " AFTER " << (settings.hilite ? hilite_none : "");
+                column->formatImpl(settings, state, frame);
+            }
         }
     }
     else if (type == ASTAlterCommand::COMMENT_COLUMN)
@@ -278,7 +308,14 @@ void ASTAlterCommand::formatImpl(
     else if (type == ASTAlterCommand::MODIFY_TTL)
     {
         settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY TTL " << (settings.hilite ? hilite_none : "");
-        ttl->formatImpl(settings, state, frame);
+        if (ttl)
+        {
+            ttl->formatImpl(settings, state, frame);
+        }
+        else if (to_remove == RemoveProperty::TTL)
+        {
+            settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str<< " REMOVE " << (settings.hilite ? hilite_none : "");
+        }
     }
     else if (type == ASTAlterCommand::MATERIALIZE_TTL)
     {
diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h
index 00350d4efa1..a7822806797 100644
--- a/src/Parsers/ASTAlterQuery.h
+++ b/src/Parsers/ASTAlterQuery.h
@@ -9,6 +9,22 @@
 namespace DB
 {
 
+/// Which property user wants to remove from column
+enum class RemoveProperty
+{
+    NO_PROPERTY,
+    /// Default specifiers
+    DEFAULT,
+    MATERIALIZED,
+    ALIAS,
+
+    /// Other properties
+    COMMENT,
+    CODEC,
+    TTL
+};
+
+
 /** ALTER query:
  *  ALTER TABLE [db.]name_type
  *      ADD COLUMN col_name type [AFTER col_after],
@@ -28,13 +44,11 @@ public:
         ADD_COLUMN,
         DROP_COLUMN,
         MODIFY_COLUMN,
-        MODIFY_COLUMN_REMOVE_PROPERTY,
         COMMENT_COLUMN,
         RENAME_COLUMN,
         MODIFY_ORDER_BY,
         MODIFY_SAMPLE_BY,
         MODIFY_TTL,
-        REMOVE_TABLE_TTL,
         MATERIALIZE_TTL,
         MODIFY_SETTING,
         MODIFY_QUERY,
@@ -63,20 +77,6 @@ public:
         LIVE_VIEW_REFRESH,
     };
 
-    /// Which property user wants to remove from column
-    enum RemoveProperty
-    {
-        /// Default specifiers
-        DEFAULT,
-        MATERIALIZED,
-        ALIAS,
-
-        /// Other properties
-        COMMENT,
-        CODEC,
-        TTL
-    };
-
     Type type = NO_TYPE;
 
     /** The ADD COLUMN query stores the name and type of the column to add
@@ -183,7 +183,7 @@ public:
     /// Target column name
     ASTPtr rename_to;
 
-    RemoveProperty to_remove;
+    RemoveProperty to_remove = RemoveProperty::NO_PROPERTY;
 
     String getID(char delim) const override { return "AlterCommand" + (delim + std::to_string(static_cast<int>(type))); }
 
diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp
index b7bde35139b..4a1418cbe6a 100644
--- a/src/Parsers/ParserAlterQuery.cpp
+++ b/src/Parsers/ParserAlterQuery.cpp
@@ -83,6 +83,12 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
     ParserKeyword s_to("TO");
 
     ParserKeyword s_remove("REMOVE");
+    ParserKeyword s_default("DEFAULT");
+    ParserKeyword s_materialized("MATERIALIZED");
+    ParserKeyword s_alias("ALIAS");
+    ParserKeyword s_comment("COMMENT");
+    ParserKeyword s_codec("CODEC");
+    ParserKeyword s_ttl("TTL");
 
     ParserCompoundIdentifier parser_name;
     ParserStringLiteral parser_string_literal;
@@ -432,14 +438,28 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
             if (s_if_exists.ignore(pos, expected))
                 command->if_exists = true;
 
+            if (!parser_modify_col_decl.parse(pos, command->col_decl, expected))
+                return false;
+
             if (s_remove.ignore(pos, expected))
             {
+                if (s_default.ignore(pos, expected))
+                    command->to_remove = RemoveProperty::DEFAULT;
+                else if (s_materialized.ignore(pos, expected))
+                    command->to_remove = RemoveProperty::MATERIALIZED;
+                else if (s_alias.ignore(pos, expected))
+                    command->to_remove = RemoveProperty::ALIAS;
+                else if (s_comment.ignore(pos, expected))
+                    command->to_remove = RemoveProperty::COMMENT;
+                else if (s_codec.ignore(pos, expected))
+                    command->to_remove = RemoveProperty::CODEC;
+                else if (s_ttl.ignore(pos, expected))
+                    command->to_remove = RemoveProperty::TTL;
+                else
+                    return false;
             }
             else
             {
-                if (!parser_modify_col_decl.parse(pos, command->col_decl, expected))
-                    return false;
-
                 if (s_first.ignore(pos, expected))
                     command->first = true;
                 else if (s_after.ignore(pos, expected))
@@ -447,9 +467,8 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
                     if (!parser_name.parse(pos, command->column, expected))
                         return false;
                 }
-                command->type = ASTAlterCommand::MODIFY_COLUMN;
             }
-
+            command->type = ASTAlterCommand::MODIFY_COLUMN;
         }
         else if (s_modify_order_by.ignore(pos, expected))
         {
@@ -501,7 +520,12 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
         else if (s_modify_ttl.ignore(pos, expected))
         {
             if (!parser_ttl_list.parse(pos, command->ttl, expected))
-                return false;
+            {
+                if (s_remove.ignore(pos, expected))
+                    command->to_remove = RemoveProperty::TTL;
+                else
+                    return false;
+            }
             command->type = ASTAlterCommand::MODIFY_TTL;
         }
         else if (s_materialize_ttl.ignore(pos, expected))
diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp
index caf98e911ab..5c7a45a27be 100644
--- a/src/Storages/AlterCommands.cpp
+++ b/src/Storages/AlterCommands.cpp
@@ -113,6 +113,8 @@ std::optional<AlterCommand> AlterCommand::parse(const ASTAlterCommand * command_
         const auto & ast_col_decl = command_ast->col_decl->as<ASTColumnDeclaration &>();
 
         command.column_name = ast_col_decl.name;
+        command.to_remove = command_ast->to_remove;
+
         if (ast_col_decl.type)
         {
             command.data_type = data_type_factory.get(ast_col_decl.type);
@@ -301,24 +303,45 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, const Context & con
     {
         metadata.columns.modify(column_name, after_column, first, [&](ColumnDescription & column)
         {
-            if (codec)
-                column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(codec, data_type ? data_type : column.type, false);
-
-            if (comment)
-                column.comment = *comment;
-
-            if (ttl)
-                column.ttl = ttl;
-
-            if (data_type)
-                column.type = data_type;
-
-            /// User specified default expression or changed
-            /// datatype. We have to replace default.
-            if (default_expression || data_type)
+            if (to_remove == RemoveProperty::DEFAULT
+                || to_remove == RemoveProperty::MATERIALIZED
+                || to_remove == RemoveProperty::ALIAS)
             {
-                column.default_desc.kind = default_kind;
-                column.default_desc.expression = default_expression;
+                column.default_desc = ColumnDefault{};
+            }
+            else if (to_remove == RemoveProperty::CODEC)
+            {
+                column.codec.reset();
+            }
+            else if (to_remove == RemoveProperty::COMMENT)
+            {
+                column.comment = String{};
+            }
+            else if (to_remove == RemoveProperty::TTL)
+            {
+                column.ttl.reset();
+            }
+            else
+            {
+                if (codec)
+                    column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(codec, data_type ? data_type : column.type, false);
+
+                if (comment)
+                    column.comment = *comment;
+
+                if (ttl)
+                    column.ttl = ttl;
+
+                if (data_type)
+                    column.type = data_type;
+
+                /// User specified default expression or changed
+                /// datatype. We have to replace default.
+                if (default_expression || data_type)
+                {
+                    column.default_desc.kind = default_kind;
+                    column.default_desc.expression = default_expression;
+                }
             }
         });
 
@@ -448,7 +471,10 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, const Context & con
     }
     else if (type == MODIFY_TTL)
     {
-        metadata.table_ttl = TTLTableDescription::getTTLForTableFromAST(ttl, metadata.columns, context, metadata.primary_key);
+        if (to_remove == RemoveProperty::TTL)
+            metadata.table_ttl = TTLTableDescription{};
+        else
+            metadata.table_ttl = TTLTableDescription::getTTLForTableFromAST(ttl, metadata.columns, context, metadata.primary_key);
     }
     else if (type == MODIFY_QUERY)
     {
@@ -590,6 +616,10 @@ bool AlterCommand::isRequireMutationStage(const StorageInMemoryMetadata & metada
     if (type != MODIFY_COLUMN || data_type == nullptr)
         return false;
 
+    /// We remove properties on metadata level
+    if (type == MODIFY_COLUMN && to_remove != RemoveProperty::NO_PROPERTY)
+        return false;
+
     for (const auto & column : metadata.columns.getAllPhysical())
     {
         if (column.name == column_name && !isMetadataOnlyConversion(column.type.get(), data_type.get()))
@@ -783,14 +813,30 @@ void AlterCommands::prepare(const StorageInMemoryMetadata & metadata)
             if (!has_column && command.if_exists)
                 command.ignore = true;
 
-            if (has_column && command.data_type)
+            if (has_column)
             {
                 auto column_from_table = columns.get(command.column_name);
-                if (!command.default_expression && column_from_table.default_desc.expression)
+                if (command.to_remove != RemoveProperty::NO_PROPERTY)
+                {
+                    auto column_default = columns.getDefault(command.column_name);
+                    if (!column_default
+                        && (command.to_remove == RemoveProperty::ALIAS || command.to_remove == RemoveProperty::DEFAULT
+                            || command.to_remove == RemoveProperty::MATERIALIZED))
+                        command.ignore = true;
+
+                    if (command.to_remove == RemoveProperty::TTL && column_from_table.ttl == nullptr)
+                        command.ignore = true;
+                    if (command.to_remove == RemoveProperty::COMMENT && column_from_table.comment == "")
+                        command.ignore = true;
+                    if (command.to_remove == RemoveProperty::CODEC && column_from_table.codec == nullptr)
+                        command.ignore = true;
+                }
+                else if (command.data_type && !command.default_expression && column_from_table.default_desc.expression)
                 {
                     command.default_kind = column_from_table.default_desc.kind;
                     command.default_expression = column_from_table.default_desc.expression;
                 }
+
             }
         }
         else if (command.type == AlterCommand::ADD_COLUMN)
@@ -805,6 +851,11 @@ void AlterCommands::prepare(const StorageInMemoryMetadata & metadata)
             if (!has_column && command.if_exists)
                 command.ignore = true;
         }
+        else if (command.type == AlterCommand::MODIFY_TTL)
+        {
+            if (!metadata.hasAnyTTL())
+                command.ignore = true;
+        }
     }
     prepared = true;
 }
@@ -857,6 +908,34 @@ void AlterCommands::validate(const StorageInMemoryMetadata & metadata, const Con
 
             if (command.codec)
                 CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type, !context.getSettingsRef().allow_suspicious_codecs);
+            auto column_default = all_columns.getDefault(column_name);
+            if (column_default)
+            {
+                if (command.to_remove == RemoveProperty::DEFAULT && column_default->kind != ColumnDefaultKind::Default)
+                {
+                    throw Exception{
+                        ErrorCodes::BAD_ARGUMENTS,
+                        "Cannot remove DEFAULT from column {}, because column default type is {}. Use REMOVE {} to delete it.",
+                            backQuote(column_name), toString(column_default->kind), toString(column_default->kind)
+                    };
+                }
+                if (command.to_remove == RemoveProperty::MATERIALIZED && column_default->kind != ColumnDefaultKind::Materialized)
+                {
+                    throw Exception{
+                        ErrorCodes::BAD_ARGUMENTS,
+                        "Cannot remove MATERIALIZED from column {}, because column default type is {}. Use REMOVE {} to delete it.",
+                            backQuote(column_name), toString(column_default->kind), toString(column_default->kind)
+                    };
+                }
+                if (command.to_remove == RemoveProperty::ALIAS && column_default->kind != ColumnDefaultKind::Alias)
+                {
+                    throw Exception{
+                        ErrorCodes::BAD_ARGUMENTS,
+                        "Cannot remove ALIAS from column {}, because column default type is {}. Use REMOVE {} to delete it.",
+                            backQuote(column_name), toString(column_default->kind), toString(column_default->kind)
+                    };
+                }
+            }
 
             modified_columns.emplace(column_name);
         }
@@ -1048,7 +1127,7 @@ MutationCommands AlterCommands::getMutationCommands(StorageInMemoryMetadata meta
     {
         for (const auto & alter_cmd : *this)
         {
-            if (alter_cmd.isTTLAlter(metadata))
+            if (alter_cmd.isTTLAlter(metadata) && alter_cmd.to_remove != RemoveProperty::TTL)
             {
                 result.push_back(createMaterializeTTLCommand());
                 break;
diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h
index 3578507a361..a4eff5523b7 100644
--- a/src/Storages/AlterCommands.h
+++ b/src/Storages/AlterCommands.h
@@ -107,16 +107,13 @@ struct AlterCommand
     /// Target column name
     String rename_to;
 
+    /// What to remove from column (or TTL)
+    RemoveProperty to_remove;
+
     static std::optional<AlterCommand> parse(const ASTAlterCommand * command);
 
     void apply(StorageInMemoryMetadata & metadata, const Context & context) const;
 
-    /// Checks that alter query changes data. For MergeTree:
-    ///    * column files (data and marks)
-    ///    * each part meta (columns.txt)
-    /// in each part on disk (it's not lightweight alter).
-    bool isModifyingData(const StorageInMemoryMetadata & metadata) const;
-
     /// Check that alter command require data modification (mutation) to be
     /// executed. For example, cast from Date to UInt16 type can be executed
     /// without any data modifications. But column drop or modify from UInt16 to
@@ -164,9 +161,6 @@ public:
     /// Commands have to be prepared before apply.
     void apply(StorageInMemoryMetadata & metadata, const Context & context) const;
 
-    /// At least one command modify data on disk.
-    bool isModifyingData(const StorageInMemoryMetadata & metadata) const;
-
     /// At least one command modify settings.
     bool isSettingsAlter() const;
 

From a4c43e51b91eef3bf8337653dfec36478ad95ca7 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 11 Sep 2020 17:56:09 +0300
Subject: [PATCH 244/298] Add a test

---
 src/Parsers/ASTAlterQuery.cpp                 |  2 +-
 src/Parsers/ParserAlterQuery.cpp              | 12 +++-
 src/Storages/AlterCommands.cpp                |  1 -
 .../01493_alter_remove_properties.reference   | 17 ++++++
 .../01493_alter_remove_properties.sql         | 58 +++++++++++++++++++
 5 files changed, 87 insertions(+), 3 deletions(-)
 create mode 100644 tests/queries/0_stateless/01493_alter_remove_properties.reference
 create mode 100644 tests/queries/0_stateless/01493_alter_remove_properties.sql

diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp
index b7a55c58714..62f33b25c57 100644
--- a/src/Parsers/ASTAlterQuery.cpp
+++ b/src/Parsers/ASTAlterQuery.cpp
@@ -101,7 +101,7 @@ void ASTAlterCommand::formatImpl(
 
         if (to_remove != RemoveProperty::NO_PROPERTY)
         {
-            settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "REMOVE ";
+            settings.ostr << (settings.hilite ? hilite_keyword : "") << " REMOVE ";
             switch (to_remove)
             {
             case RemoveProperty::DEFAULT:
diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp
index 4a1418cbe6a..4d6e71e95cf 100644
--- a/src/Parsers/ParserAlterQuery.cpp
+++ b/src/Parsers/ParserAlterQuery.cpp
@@ -438,7 +438,9 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
             if (s_if_exists.ignore(pos, expected))
                 command->if_exists = true;
 
-            if (!parser_modify_col_decl.parse(pos, command->col_decl, expected))
+            ASTPtr column_name;
+            Pos stop_pos = pos;
+            if (!parser_name.parse(pos, column_name, expected))
                 return false;
 
             if (s_remove.ignore(pos, expected))
@@ -457,9 +459,17 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
                     command->to_remove = RemoveProperty::TTL;
                 else
                     return false;
+
+                auto column_declaration = std::make_shared<ASTColumnDeclaration>();
+                tryGetIdentifierNameInto(column_name, column_declaration->name);
+                command->col_decl = column_declaration;
             }
             else
             {
+                pos = stop_pos;
+                if (!parser_modify_col_decl.parse(pos, command->col_decl, expected))
+                    return false;
+
                 if (s_first.ignore(pos, expected))
                     command->first = true;
                 else if (s_after.ignore(pos, expected))
diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp
index 5c7a45a27be..bc6455ef420 100644
--- a/src/Storages/AlterCommands.cpp
+++ b/src/Storages/AlterCommands.cpp
@@ -111,7 +111,6 @@ std::optional<AlterCommand> AlterCommand::parse(const ASTAlterCommand * command_
         command.type = AlterCommand::MODIFY_COLUMN;
 
         const auto & ast_col_decl = command_ast->col_decl->as<ASTColumnDeclaration &>();
-
         command.column_name = ast_col_decl.name;
         command.to_remove = command_ast->to_remove;
 
diff --git a/tests/queries/0_stateless/01493_alter_remove_properties.reference b/tests/queries/0_stateless/01493_alter_remove_properties.reference
new file mode 100644
index 00000000000..4ce7a574742
--- /dev/null
+++ b/tests/queries/0_stateless/01493_alter_remove_properties.reference
@@ -0,0 +1,17 @@
+CREATE TABLE default.prop_table\n(\n    `column_default` UInt64 DEFAULT 42,\n    `column_materialized` UInt64 MATERIALIZED column_default * 42,\n    `column_alias` UInt64 ALIAS column_default + 1,\n    `column_codec` String CODEC(ZSTD(10)),\n    `column_comment` Date COMMENT \'Some comment\',\n    `column_ttl` UInt64 TTL column_comment + toIntervalMonth(1)\n)\nENGINE = MergeTree()\nORDER BY tuple()\nTTL column_comment + toIntervalMonth(2)\nSETTINGS index_granularity = 8192
+42	1764	43	str	2019-10-01	1
+CREATE TABLE default.prop_table\n(\n    `column_default` UInt64 DEFAULT 42,\n    `column_materialized` UInt64 MATERIALIZED column_default * 42,\n    `column_alias` UInt64 ALIAS column_default + 1,\n    `column_codec` String CODEC(ZSTD(10)),\n    `column_comment` Date,\n    `column_ttl` UInt64 TTL column_comment + toIntervalMonth(1)\n)\nENGINE = MergeTree()\nORDER BY tuple()\nTTL column_comment + toIntervalMonth(2)\nSETTINGS index_granularity = 8192
+CREATE TABLE default.prop_table\n(\n    `column_default` UInt64 DEFAULT 42,\n    `column_materialized` UInt64 MATERIALIZED column_default * 42,\n    `column_alias` UInt64 ALIAS column_default + 1,\n    `column_codec` String,\n    `column_comment` Date,\n    `column_ttl` UInt64 TTL column_comment + toIntervalMonth(1)\n)\nENGINE = MergeTree()\nORDER BY tuple()\nTTL column_comment + toIntervalMonth(2)\nSETTINGS index_granularity = 8192
+42	1764	0	str	2019-10-01	1
+CREATE TABLE default.prop_table\n(\n    `column_default` UInt64 DEFAULT 42,\n    `column_materialized` UInt64 MATERIALIZED column_default * 42,\n    `column_alias` UInt64,\n    `column_codec` String,\n    `column_comment` Date,\n    `column_ttl` UInt64 TTL column_comment + toIntervalMonth(1)\n)\nENGINE = MergeTree()\nORDER BY tuple()\nTTL column_comment + toIntervalMonth(2)\nSETTINGS index_granularity = 8192
+42	1764	0	str	2019-10-01	1
+42	1764	33	trs	2020-01-01	2
+CREATE TABLE default.prop_table\n(\n    `column_default` UInt64 DEFAULT 42,\n    `column_materialized` UInt64,\n    `column_alias` UInt64,\n    `column_codec` String,\n    `column_comment` Date,\n    `column_ttl` UInt64 TTL column_comment + toIntervalMonth(1)\n)\nENGINE = MergeTree()\nORDER BY tuple()\nTTL column_comment + toIntervalMonth(2)\nSETTINGS index_granularity = 8192
+42	1764	0	str	2019-10-01	1
+42	1764	33	trs	2020-01-01	2
+42	11	44	rts	2020-02-01	3
+CREATE TABLE default.prop_table\n(\n    `column_default` UInt64,\n    `column_materialized` UInt64,\n    `column_alias` UInt64,\n    `column_codec` String,\n    `column_comment` Date,\n    `column_ttl` UInt64 TTL column_comment + toIntervalMonth(1)\n)\nENGINE = MergeTree()\nORDER BY tuple()\nTTL column_comment + toIntervalMonth(2)\nSETTINGS index_granularity = 8192
+42	1764	0	str	2019-10-01	1
+42	1764	33	trs	2020-01-01	2
+42	11	44	rts	2020-02-01	3
+0	22	55	tsr	2020-03-01	4
diff --git a/tests/queries/0_stateless/01493_alter_remove_properties.sql b/tests/queries/0_stateless/01493_alter_remove_properties.sql
new file mode 100644
index 00000000000..25000a50235
--- /dev/null
+++ b/tests/queries/0_stateless/01493_alter_remove_properties.sql
@@ -0,0 +1,58 @@
+DROP TABLE IF EXISTS prop_table;
+
+CREATE TABLE prop_table
+(
+    column_default UInt64 DEFAULT 42,
+    column_materialized UInt64 MATERIALIZED column_default * 42,
+    column_alias UInt64 ALIAS column_default + 1,
+    column_codec String CODEC(ZSTD(10)),
+    column_comment Date COMMENT 'Some comment',
+    column_ttl UInt64 TTL column_comment + INTERVAL 1 MONTH
+)
+ENGINE MergeTree()
+ORDER BY tuple()
+TTL column_comment + INTERVAL 2 MONTH;
+
+SHOW CREATE TABLE prop_table;
+
+SYSTEM STOP TTL MERGES prop_table;
+
+INSERT INTO prop_table (column_codec, column_comment, column_ttl) VALUES ('str', toDate('2019-10-01'), 1);
+
+SELECT column_default, column_materialized, column_alias, column_codec, column_comment, column_ttl FROM prop_table;
+
+ALTER TABLE prop_table MODIFY COLUMN column_comment REMOVE COMMENT;
+
+SHOW CREATE TABLE prop_table;
+
+ALTER TABLE prop_table MODIFY COLUMN column_codec REMOVE CODEC;
+
+SHOW CREATE TABLE prop_table;
+
+ALTER TABLE prop_table MODIFY COLUMN column_alias REMOVE ALIAS;
+
+SELECT column_default, column_materialized, column_alias, column_codec, column_comment, column_ttl FROM prop_table;
+
+SHOW CREATE TABLE prop_table;
+
+INSERT INTO prop_table (column_alias, column_codec, column_comment, column_ttl) VALUES (33, 'trs', toDate('2020-01-01'), 2);
+
+SELECT column_default, column_materialized, column_alias, column_codec, column_comment, column_ttl FROM prop_table ORDER BY column_ttl;
+
+ALTER TABLE prop_table MODIFY COLUMN column_materialized REMOVE MATERIALIZED;
+
+SHOW CREATE TABLE prop_table;
+
+INSERT INTO prop_table (column_materialized, column_alias, column_codec, column_comment, column_ttl) VALUES (11, 44, 'rts', toDate('2020-02-01'), 3);
+
+SELECT column_default, column_materialized, column_alias, column_codec, column_comment, column_ttl FROM prop_table ORDER BY column_ttl;
+
+ALTER TABLE prop_table MODIFY COLUMN column_default REMOVE DEFAULT;
+
+SHOW CREATE TABLE prop_table;
+
+INSERT INTO prop_table (column_materialized, column_alias, column_codec, column_comment, column_ttl) VALUES (22, 55, 'tsr', toDate('2020-03-01'), 4);
+
+SELECT column_default, column_materialized, column_alias, column_codec, column_comment, column_ttl FROM prop_table ORDER BY column_ttl;
+
+DROP TABLE IF EXISTS prop_table;

From 6dd75182f07310a5fa5a0fd72618969ac51d9dad Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 11 Sep 2020 19:21:08 +0300
Subject: [PATCH 245/298] Better

---
 src/Parsers/ParserAlterQuery.cpp | 11 ++++-------
 src/Storages/AlterCommands.cpp   |  2 +-
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp
index 4d6e71e95cf..a6032bd38db 100644
--- a/src/Parsers/ParserAlterQuery.cpp
+++ b/src/Parsers/ParserAlterQuery.cpp
@@ -529,13 +529,10 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
         }
         else if (s_modify_ttl.ignore(pos, expected))
         {
-            if (!parser_ttl_list.parse(pos, command->ttl, expected))
-            {
-                if (s_remove.ignore(pos, expected))
-                    command->to_remove = RemoveProperty::TTL;
-                else
-                    return false;
-            }
+            if (s_remove.ignore(pos, expected))
+                command->to_remove = RemoveProperty::TTL;
+            else if (!parser_ttl_list.parse(pos, command->ttl, expected))
+                return false;
             command->type = ASTAlterCommand::MODIFY_TTL;
         }
         else if (s_materialize_ttl.ignore(pos, expected))
diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp
index bc6455ef420..01b56e0e128 100644
--- a/src/Storages/AlterCommands.cpp
+++ b/src/Storages/AlterCommands.cpp
@@ -825,7 +825,7 @@ void AlterCommands::prepare(const StorageInMemoryMetadata & metadata)
 
                     if (command.to_remove == RemoveProperty::TTL && column_from_table.ttl == nullptr)
                         command.ignore = true;
-                    if (command.to_remove == RemoveProperty::COMMENT && column_from_table.comment == "")
+                    if (command.to_remove == RemoveProperty::COMMENT && column_from_table.comment.empty())
                         command.ignore = true;
                     if (command.to_remove == RemoveProperty::CODEC && column_from_table.codec == nullptr)
                         command.ignore = true;

From c0dafb0283d52d6a9df714105c75d1e8d00603a0 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 11 Sep 2020 20:04:41 +0300
Subject: [PATCH 246/298] Disable test

---
 tests/integration/test_adaptive_granularity/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_adaptive_granularity/test.py b/tests/integration/test_adaptive_granularity/test.py
index 21d65588de4..d5ac91671e0 100644
--- a/tests/integration/test_adaptive_granularity/test.py
+++ b/tests/integration/test_adaptive_granularity/test.py
@@ -298,7 +298,7 @@ def test_mixed_granularity_single_node(start_dynamic_cluster, node):
     #still works
     assert node.query("SELECT count() from table_with_default_granularity") == '6\n'
 
-
+@pytest.mark.skip(reason="flaky")
 def test_version_update_two_nodes(start_dynamic_cluster):
     node11.query("INSERT INTO table_with_default_granularity VALUES (toDate('2018-10-01'), 1, 333), (toDate('2018-10-02'), 2, 444)")
     node12.query("SYSTEM SYNC REPLICA table_with_default_granularity", timeout=20)

From 2c4047b280555df2ef2f50240eeb519d4dde1154 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 11 Sep 2020 20:07:00 +0300
Subject: [PATCH 247/298] Revert accident changes

---
 src/Parsers/ASTAlterQuery.cpp                 |  49 +-------
 src/Parsers/ASTAlterQuery.h                   |  18 ---
 src/Parsers/ParserAlterQuery.cpp              |  51 +-------
 src/Storages/AlterCommands.cpp                | 116 +++---------------
 src/Storages/AlterCommands.h                  |  12 +-
 .../test_adaptive_granularity/test.py         |   2 +-
 .../01493_alter_remove_properties.reference   |  17 ---
 .../01493_alter_remove_properties.sql         |  58 ---------
 8 files changed, 41 insertions(+), 282 deletions(-)
 delete mode 100644 tests/queries/0_stateless/01493_alter_remove_properties.reference
 delete mode 100644 tests/queries/0_stateless/01493_alter_remove_properties.sql

diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp
index 62f33b25c57..d033cdc79a2 100644
--- a/src/Parsers/ASTAlterQuery.cpp
+++ b/src/Parsers/ASTAlterQuery.cpp
@@ -99,42 +99,12 @@ void ASTAlterCommand::formatImpl(
         settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY COLUMN " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : "");
         col_decl->formatImpl(settings, state, frame);
 
-        if (to_remove != RemoveProperty::NO_PROPERTY)
+        if (first)
+            settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " FIRST " << (settings.hilite ? hilite_none : "");
+        else if (column)    /// AFTER
         {
-            settings.ostr << (settings.hilite ? hilite_keyword : "") << " REMOVE ";
-            switch (to_remove)
-            {
-            case RemoveProperty::DEFAULT:
-                settings.ostr << "DEFAULT";
-                break;
-            case RemoveProperty::MATERIALIZED:
-                settings.ostr << "MATERIALIZED";
-                break;
-            case RemoveProperty::ALIAS:
-                settings.ostr << "ALIAS";
-                break;
-            case RemoveProperty::COMMENT:
-                settings.ostr << "COMMENT";
-                break;
-            case RemoveProperty::CODEC:
-                settings.ostr << "CODEC";
-                break;
-            case RemoveProperty::TTL:
-                settings.ostr << "TTL";
-                break;
-            default:
-                __builtin_unreachable();
-            }
-        }
-        else
-        {
-            if (first)
-                settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " FIRST " << (settings.hilite ? hilite_none : "");
-            else if (column)    /// AFTER
-            {
-                settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " AFTER " << (settings.hilite ? hilite_none : "");
-                column->formatImpl(settings, state, frame);
-            }
+            settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " AFTER " << (settings.hilite ? hilite_none : "");
+            column->formatImpl(settings, state, frame);
         }
     }
     else if (type == ASTAlterCommand::COMMENT_COLUMN)
@@ -308,14 +278,7 @@ void ASTAlterCommand::formatImpl(
     else if (type == ASTAlterCommand::MODIFY_TTL)
     {
         settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY TTL " << (settings.hilite ? hilite_none : "");
-        if (ttl)
-        {
-            ttl->formatImpl(settings, state, frame);
-        }
-        else if (to_remove == RemoveProperty::TTL)
-        {
-            settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str<< " REMOVE " << (settings.hilite ? hilite_none : "");
-        }
+        ttl->formatImpl(settings, state, frame);
     }
     else if (type == ASTAlterCommand::MATERIALIZE_TTL)
     {
diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h
index a7822806797..df27ba0a3b0 100644
--- a/src/Parsers/ASTAlterQuery.h
+++ b/src/Parsers/ASTAlterQuery.h
@@ -9,22 +9,6 @@
 namespace DB
 {
 
-/// Which property user wants to remove from column
-enum class RemoveProperty
-{
-    NO_PROPERTY,
-    /// Default specifiers
-    DEFAULT,
-    MATERIALIZED,
-    ALIAS,
-
-    /// Other properties
-    COMMENT,
-    CODEC,
-    TTL
-};
-
-
 /** ALTER query:
  *  ALTER TABLE [db.]name_type
  *      ADD COLUMN col_name type [AFTER col_after],
@@ -183,8 +167,6 @@ public:
     /// Target column name
     ASTPtr rename_to;
 
-    RemoveProperty to_remove = RemoveProperty::NO_PROPERTY;
-
     String getID(char delim) const override { return "AlterCommand" + (delim + std::to_string(static_cast<int>(type))); }
 
     ASTPtr clone() const override;
diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp
index a6032bd38db..9930bb649b4 100644
--- a/src/Parsers/ParserAlterQuery.cpp
+++ b/src/Parsers/ParserAlterQuery.cpp
@@ -82,14 +82,6 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
     ParserKeyword s_where("WHERE");
     ParserKeyword s_to("TO");
 
-    ParserKeyword s_remove("REMOVE");
-    ParserKeyword s_default("DEFAULT");
-    ParserKeyword s_materialized("MATERIALIZED");
-    ParserKeyword s_alias("ALIAS");
-    ParserKeyword s_comment("COMMENT");
-    ParserKeyword s_codec("CODEC");
-    ParserKeyword s_ttl("TTL");
-
     ParserCompoundIdentifier parser_name;
     ParserStringLiteral parser_string_literal;
     ParserCompoundColumnDeclaration parser_col_decl;
@@ -438,46 +430,17 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
             if (s_if_exists.ignore(pos, expected))
                 command->if_exists = true;
 
-            ASTPtr column_name;
-            Pos stop_pos = pos;
-            if (!parser_name.parse(pos, column_name, expected))
+            if (!parser_modify_col_decl.parse(pos, command->col_decl, expected))
                 return false;
 
-            if (s_remove.ignore(pos, expected))
+            if (s_first.ignore(pos, expected))
+                command->first = true;
+            else if (s_after.ignore(pos, expected))
             {
-                if (s_default.ignore(pos, expected))
-                    command->to_remove = RemoveProperty::DEFAULT;
-                else if (s_materialized.ignore(pos, expected))
-                    command->to_remove = RemoveProperty::MATERIALIZED;
-                else if (s_alias.ignore(pos, expected))
-                    command->to_remove = RemoveProperty::ALIAS;
-                else if (s_comment.ignore(pos, expected))
-                    command->to_remove = RemoveProperty::COMMENT;
-                else if (s_codec.ignore(pos, expected))
-                    command->to_remove = RemoveProperty::CODEC;
-                else if (s_ttl.ignore(pos, expected))
-                    command->to_remove = RemoveProperty::TTL;
-                else
+                if (!parser_name.parse(pos, command->column, expected))
                     return false;
-
-                auto column_declaration = std::make_shared<ASTColumnDeclaration>();
-                tryGetIdentifierNameInto(column_name, column_declaration->name);
-                command->col_decl = column_declaration;
             }
-            else
-            {
-                pos = stop_pos;
-                if (!parser_modify_col_decl.parse(pos, command->col_decl, expected))
-                    return false;
 
-                if (s_first.ignore(pos, expected))
-                    command->first = true;
-                else if (s_after.ignore(pos, expected))
-                {
-                    if (!parser_name.parse(pos, command->column, expected))
-                        return false;
-                }
-            }
             command->type = ASTAlterCommand::MODIFY_COLUMN;
         }
         else if (s_modify_order_by.ignore(pos, expected))
@@ -529,9 +492,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
         }
         else if (s_modify_ttl.ignore(pos, expected))
         {
-            if (s_remove.ignore(pos, expected))
-                command->to_remove = RemoveProperty::TTL;
-            else if (!parser_ttl_list.parse(pos, command->ttl, expected))
+            if (!parser_ttl_list.parse(pos, command->ttl, expected))
                 return false;
             command->type = ASTAlterCommand::MODIFY_TTL;
         }
diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp
index 01b56e0e128..caf98e911ab 100644
--- a/src/Storages/AlterCommands.cpp
+++ b/src/Storages/AlterCommands.cpp
@@ -111,9 +111,8 @@ std::optional<AlterCommand> AlterCommand::parse(const ASTAlterCommand * command_
         command.type = AlterCommand::MODIFY_COLUMN;
 
         const auto & ast_col_decl = command_ast->col_decl->as<ASTColumnDeclaration &>();
-        command.column_name = ast_col_decl.name;
-        command.to_remove = command_ast->to_remove;
 
+        command.column_name = ast_col_decl.name;
         if (ast_col_decl.type)
         {
             command.data_type = data_type_factory.get(ast_col_decl.type);
@@ -302,45 +301,24 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, const Context & con
     {
         metadata.columns.modify(column_name, after_column, first, [&](ColumnDescription & column)
         {
-            if (to_remove == RemoveProperty::DEFAULT
-                || to_remove == RemoveProperty::MATERIALIZED
-                || to_remove == RemoveProperty::ALIAS)
-            {
-                column.default_desc = ColumnDefault{};
-            }
-            else if (to_remove == RemoveProperty::CODEC)
-            {
-                column.codec.reset();
-            }
-            else if (to_remove == RemoveProperty::COMMENT)
-            {
-                column.comment = String{};
-            }
-            else if (to_remove == RemoveProperty::TTL)
-            {
-                column.ttl.reset();
-            }
-            else
-            {
-                if (codec)
-                    column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(codec, data_type ? data_type : column.type, false);
+            if (codec)
+                column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(codec, data_type ? data_type : column.type, false);
 
-                if (comment)
-                    column.comment = *comment;
+            if (comment)
+                column.comment = *comment;
 
-                if (ttl)
-                    column.ttl = ttl;
+            if (ttl)
+                column.ttl = ttl;
 
-                if (data_type)
-                    column.type = data_type;
+            if (data_type)
+                column.type = data_type;
 
-                /// User specified default expression or changed
-                /// datatype. We have to replace default.
-                if (default_expression || data_type)
-                {
-                    column.default_desc.kind = default_kind;
-                    column.default_desc.expression = default_expression;
-                }
+            /// User specified default expression or changed
+            /// datatype. We have to replace default.
+            if (default_expression || data_type)
+            {
+                column.default_desc.kind = default_kind;
+                column.default_desc.expression = default_expression;
             }
         });
 
@@ -470,10 +448,7 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, const Context & con
     }
     else if (type == MODIFY_TTL)
     {
-        if (to_remove == RemoveProperty::TTL)
-            metadata.table_ttl = TTLTableDescription{};
-        else
-            metadata.table_ttl = TTLTableDescription::getTTLForTableFromAST(ttl, metadata.columns, context, metadata.primary_key);
+        metadata.table_ttl = TTLTableDescription::getTTLForTableFromAST(ttl, metadata.columns, context, metadata.primary_key);
     }
     else if (type == MODIFY_QUERY)
     {
@@ -615,10 +590,6 @@ bool AlterCommand::isRequireMutationStage(const StorageInMemoryMetadata & metada
     if (type != MODIFY_COLUMN || data_type == nullptr)
         return false;
 
-    /// We remove properties on metadata level
-    if (type == MODIFY_COLUMN && to_remove != RemoveProperty::NO_PROPERTY)
-        return false;
-
     for (const auto & column : metadata.columns.getAllPhysical())
     {
         if (column.name == column_name && !isMetadataOnlyConversion(column.type.get(), data_type.get()))
@@ -812,30 +783,14 @@ void AlterCommands::prepare(const StorageInMemoryMetadata & metadata)
             if (!has_column && command.if_exists)
                 command.ignore = true;
 
-            if (has_column)
+            if (has_column && command.data_type)
             {
                 auto column_from_table = columns.get(command.column_name);
-                if (command.to_remove != RemoveProperty::NO_PROPERTY)
-                {
-                    auto column_default = columns.getDefault(command.column_name);
-                    if (!column_default
-                        && (command.to_remove == RemoveProperty::ALIAS || command.to_remove == RemoveProperty::DEFAULT
-                            || command.to_remove == RemoveProperty::MATERIALIZED))
-                        command.ignore = true;
-
-                    if (command.to_remove == RemoveProperty::TTL && column_from_table.ttl == nullptr)
-                        command.ignore = true;
-                    if (command.to_remove == RemoveProperty::COMMENT && column_from_table.comment.empty())
-                        command.ignore = true;
-                    if (command.to_remove == RemoveProperty::CODEC && column_from_table.codec == nullptr)
-                        command.ignore = true;
-                }
-                else if (command.data_type && !command.default_expression && column_from_table.default_desc.expression)
+                if (!command.default_expression && column_from_table.default_desc.expression)
                 {
                     command.default_kind = column_from_table.default_desc.kind;
                     command.default_expression = column_from_table.default_desc.expression;
                 }
-
             }
         }
         else if (command.type == AlterCommand::ADD_COLUMN)
@@ -850,11 +805,6 @@ void AlterCommands::prepare(const StorageInMemoryMetadata & metadata)
             if (!has_column && command.if_exists)
                 command.ignore = true;
         }
-        else if (command.type == AlterCommand::MODIFY_TTL)
-        {
-            if (!metadata.hasAnyTTL())
-                command.ignore = true;
-        }
     }
     prepared = true;
 }
@@ -907,34 +857,6 @@ void AlterCommands::validate(const StorageInMemoryMetadata & metadata, const Con
 
             if (command.codec)
                 CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type, !context.getSettingsRef().allow_suspicious_codecs);
-            auto column_default = all_columns.getDefault(column_name);
-            if (column_default)
-            {
-                if (command.to_remove == RemoveProperty::DEFAULT && column_default->kind != ColumnDefaultKind::Default)
-                {
-                    throw Exception{
-                        ErrorCodes::BAD_ARGUMENTS,
-                        "Cannot remove DEFAULT from column {}, because column default type is {}. Use REMOVE {} to delete it.",
-                            backQuote(column_name), toString(column_default->kind), toString(column_default->kind)
-                    };
-                }
-                if (command.to_remove == RemoveProperty::MATERIALIZED && column_default->kind != ColumnDefaultKind::Materialized)
-                {
-                    throw Exception{
-                        ErrorCodes::BAD_ARGUMENTS,
-                        "Cannot remove MATERIALIZED from column {}, because column default type is {}. Use REMOVE {} to delete it.",
-                            backQuote(column_name), toString(column_default->kind), toString(column_default->kind)
-                    };
-                }
-                if (command.to_remove == RemoveProperty::ALIAS && column_default->kind != ColumnDefaultKind::Alias)
-                {
-                    throw Exception{
-                        ErrorCodes::BAD_ARGUMENTS,
-                        "Cannot remove ALIAS from column {}, because column default type is {}. Use REMOVE {} to delete it.",
-                            backQuote(column_name), toString(column_default->kind), toString(column_default->kind)
-                    };
-                }
-            }
 
             modified_columns.emplace(column_name);
         }
@@ -1126,7 +1048,7 @@ MutationCommands AlterCommands::getMutationCommands(StorageInMemoryMetadata meta
     {
         for (const auto & alter_cmd : *this)
         {
-            if (alter_cmd.isTTLAlter(metadata) && alter_cmd.to_remove != RemoveProperty::TTL)
+            if (alter_cmd.isTTLAlter(metadata))
             {
                 result.push_back(createMaterializeTTLCommand());
                 break;
diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h
index a4eff5523b7..3578507a361 100644
--- a/src/Storages/AlterCommands.h
+++ b/src/Storages/AlterCommands.h
@@ -107,13 +107,16 @@ struct AlterCommand
     /// Target column name
     String rename_to;
 
-    /// What to remove from column (or TTL)
-    RemoveProperty to_remove;
-
     static std::optional<AlterCommand> parse(const ASTAlterCommand * command);
 
     void apply(StorageInMemoryMetadata & metadata, const Context & context) const;
 
+    /// Checks that alter query changes data. For MergeTree:
+    ///    * column files (data and marks)
+    ///    * each part meta (columns.txt)
+    /// in each part on disk (it's not lightweight alter).
+    bool isModifyingData(const StorageInMemoryMetadata & metadata) const;
+
     /// Check that alter command require data modification (mutation) to be
     /// executed. For example, cast from Date to UInt16 type can be executed
     /// without any data modifications. But column drop or modify from UInt16 to
@@ -161,6 +164,9 @@ public:
     /// Commands have to be prepared before apply.
     void apply(StorageInMemoryMetadata & metadata, const Context & context) const;
 
+    /// At least one command modify data on disk.
+    bool isModifyingData(const StorageInMemoryMetadata & metadata) const;
+
     /// At least one command modify settings.
     bool isSettingsAlter() const;
 
diff --git a/tests/integration/test_adaptive_granularity/test.py b/tests/integration/test_adaptive_granularity/test.py
index d5ac91671e0..21d65588de4 100644
--- a/tests/integration/test_adaptive_granularity/test.py
+++ b/tests/integration/test_adaptive_granularity/test.py
@@ -298,7 +298,7 @@ def test_mixed_granularity_single_node(start_dynamic_cluster, node):
     #still works
     assert node.query("SELECT count() from table_with_default_granularity") == '6\n'
 
-@pytest.mark.skip(reason="flaky")
+
 def test_version_update_two_nodes(start_dynamic_cluster):
     node11.query("INSERT INTO table_with_default_granularity VALUES (toDate('2018-10-01'), 1, 333), (toDate('2018-10-02'), 2, 444)")
     node12.query("SYSTEM SYNC REPLICA table_with_default_granularity", timeout=20)
diff --git a/tests/queries/0_stateless/01493_alter_remove_properties.reference b/tests/queries/0_stateless/01493_alter_remove_properties.reference
deleted file mode 100644
index 4ce7a574742..00000000000
--- a/tests/queries/0_stateless/01493_alter_remove_properties.reference
+++ /dev/null
@@ -1,17 +0,0 @@
-CREATE TABLE default.prop_table\n(\n    `column_default` UInt64 DEFAULT 42,\n    `column_materialized` UInt64 MATERIALIZED column_default * 42,\n    `column_alias` UInt64 ALIAS column_default + 1,\n    `column_codec` String CODEC(ZSTD(10)),\n    `column_comment` Date COMMENT \'Some comment\',\n    `column_ttl` UInt64 TTL column_comment + toIntervalMonth(1)\n)\nENGINE = MergeTree()\nORDER BY tuple()\nTTL column_comment + toIntervalMonth(2)\nSETTINGS index_granularity = 8192
-42	1764	43	str	2019-10-01	1
-CREATE TABLE default.prop_table\n(\n    `column_default` UInt64 DEFAULT 42,\n    `column_materialized` UInt64 MATERIALIZED column_default * 42,\n    `column_alias` UInt64 ALIAS column_default + 1,\n    `column_codec` String CODEC(ZSTD(10)),\n    `column_comment` Date,\n    `column_ttl` UInt64 TTL column_comment + toIntervalMonth(1)\n)\nENGINE = MergeTree()\nORDER BY tuple()\nTTL column_comment + toIntervalMonth(2)\nSETTINGS index_granularity = 8192
-CREATE TABLE default.prop_table\n(\n    `column_default` UInt64 DEFAULT 42,\n    `column_materialized` UInt64 MATERIALIZED column_default * 42,\n    `column_alias` UInt64 ALIAS column_default + 1,\n    `column_codec` String,\n    `column_comment` Date,\n    `column_ttl` UInt64 TTL column_comment + toIntervalMonth(1)\n)\nENGINE = MergeTree()\nORDER BY tuple()\nTTL column_comment + toIntervalMonth(2)\nSETTINGS index_granularity = 8192
-42	1764	0	str	2019-10-01	1
-CREATE TABLE default.prop_table\n(\n    `column_default` UInt64 DEFAULT 42,\n    `column_materialized` UInt64 MATERIALIZED column_default * 42,\n    `column_alias` UInt64,\n    `column_codec` String,\n    `column_comment` Date,\n    `column_ttl` UInt64 TTL column_comment + toIntervalMonth(1)\n)\nENGINE = MergeTree()\nORDER BY tuple()\nTTL column_comment + toIntervalMonth(2)\nSETTINGS index_granularity = 8192
-42	1764	0	str	2019-10-01	1
-42	1764	33	trs	2020-01-01	2
-CREATE TABLE default.prop_table\n(\n    `column_default` UInt64 DEFAULT 42,\n    `column_materialized` UInt64,\n    `column_alias` UInt64,\n    `column_codec` String,\n    `column_comment` Date,\n    `column_ttl` UInt64 TTL column_comment + toIntervalMonth(1)\n)\nENGINE = MergeTree()\nORDER BY tuple()\nTTL column_comment + toIntervalMonth(2)\nSETTINGS index_granularity = 8192
-42	1764	0	str	2019-10-01	1
-42	1764	33	trs	2020-01-01	2
-42	11	44	rts	2020-02-01	3
-CREATE TABLE default.prop_table\n(\n    `column_default` UInt64,\n    `column_materialized` UInt64,\n    `column_alias` UInt64,\n    `column_codec` String,\n    `column_comment` Date,\n    `column_ttl` UInt64 TTL column_comment + toIntervalMonth(1)\n)\nENGINE = MergeTree()\nORDER BY tuple()\nTTL column_comment + toIntervalMonth(2)\nSETTINGS index_granularity = 8192
-42	1764	0	str	2019-10-01	1
-42	1764	33	trs	2020-01-01	2
-42	11	44	rts	2020-02-01	3
-0	22	55	tsr	2020-03-01	4
diff --git a/tests/queries/0_stateless/01493_alter_remove_properties.sql b/tests/queries/0_stateless/01493_alter_remove_properties.sql
deleted file mode 100644
index 25000a50235..00000000000
--- a/tests/queries/0_stateless/01493_alter_remove_properties.sql
+++ /dev/null
@@ -1,58 +0,0 @@
-DROP TABLE IF EXISTS prop_table;
-
-CREATE TABLE prop_table
-(
-    column_default UInt64 DEFAULT 42,
-    column_materialized UInt64 MATERIALIZED column_default * 42,
-    column_alias UInt64 ALIAS column_default + 1,
-    column_codec String CODEC(ZSTD(10)),
-    column_comment Date COMMENT 'Some comment',
-    column_ttl UInt64 TTL column_comment + INTERVAL 1 MONTH
-)
-ENGINE MergeTree()
-ORDER BY tuple()
-TTL column_comment + INTERVAL 2 MONTH;
-
-SHOW CREATE TABLE prop_table;
-
-SYSTEM STOP TTL MERGES prop_table;
-
-INSERT INTO prop_table (column_codec, column_comment, column_ttl) VALUES ('str', toDate('2019-10-01'), 1);
-
-SELECT column_default, column_materialized, column_alias, column_codec, column_comment, column_ttl FROM prop_table;
-
-ALTER TABLE prop_table MODIFY COLUMN column_comment REMOVE COMMENT;
-
-SHOW CREATE TABLE prop_table;
-
-ALTER TABLE prop_table MODIFY COLUMN column_codec REMOVE CODEC;
-
-SHOW CREATE TABLE prop_table;
-
-ALTER TABLE prop_table MODIFY COLUMN column_alias REMOVE ALIAS;
-
-SELECT column_default, column_materialized, column_alias, column_codec, column_comment, column_ttl FROM prop_table;
-
-SHOW CREATE TABLE prop_table;
-
-INSERT INTO prop_table (column_alias, column_codec, column_comment, column_ttl) VALUES (33, 'trs', toDate('2020-01-01'), 2);
-
-SELECT column_default, column_materialized, column_alias, column_codec, column_comment, column_ttl FROM prop_table ORDER BY column_ttl;
-
-ALTER TABLE prop_table MODIFY COLUMN column_materialized REMOVE MATERIALIZED;
-
-SHOW CREATE TABLE prop_table;
-
-INSERT INTO prop_table (column_materialized, column_alias, column_codec, column_comment, column_ttl) VALUES (11, 44, 'rts', toDate('2020-02-01'), 3);
-
-SELECT column_default, column_materialized, column_alias, column_codec, column_comment, column_ttl FROM prop_table ORDER BY column_ttl;
-
-ALTER TABLE prop_table MODIFY COLUMN column_default REMOVE DEFAULT;
-
-SHOW CREATE TABLE prop_table;
-
-INSERT INTO prop_table (column_materialized, column_alias, column_codec, column_comment, column_ttl) VALUES (22, 55, 'tsr', toDate('2020-03-01'), 4);
-
-SELECT column_default, column_materialized, column_alias, column_codec, column_comment, column_ttl FROM prop_table ORDER BY column_ttl;
-
-DROP TABLE IF EXISTS prop_table;

From 36019596c1dd2667f1ee0900ba3d75b63a1c82c1 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 11 Sep 2020 20:08:23 +0300
Subject: [PATCH 248/298] Disable flaky test

---
 tests/integration/test_adaptive_granularity/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_adaptive_granularity/test.py b/tests/integration/test_adaptive_granularity/test.py
index 21d65588de4..d5ac91671e0 100644
--- a/tests/integration/test_adaptive_granularity/test.py
+++ b/tests/integration/test_adaptive_granularity/test.py
@@ -298,7 +298,7 @@ def test_mixed_granularity_single_node(start_dynamic_cluster, node):
     #still works
     assert node.query("SELECT count() from table_with_default_granularity") == '6\n'
 
-
+@pytest.mark.skip(reason="flaky")
 def test_version_update_two_nodes(start_dynamic_cluster):
     node11.query("INSERT INTO table_with_default_granularity VALUES (toDate('2018-10-01'), 1, 333), (toDate('2018-10-02'), 2, 444)")
     node12.query("SYSTEM SYNC REPLICA table_with_default_granularity", timeout=20)

From e2c2a679ef6a932d0372ca0a6f019bdca64c19e8 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitbar@yandex-team.ru>
Date: Fri, 11 Sep 2020 19:54:22 +0300
Subject: [PATCH 249/298] Skip access storages with same path while reading the
 main config.

---
 src/Access/AccessControlManager.cpp           | 30 +++++++++++++++++-
 src/Access/DiskAccessStorage.cpp              | 31 +++++++++++++------
 src/Access/DiskAccessStorage.h                |  6 +++-
 .../configs/duplicates.xml                    | 13 ++++++++
 .../configs/mixed_style.xml                   |  3 ++
 .../integration/test_user_directories/test.py | 16 ++++++----
 6 files changed, 81 insertions(+), 18 deletions(-)
 create mode 100644 tests/integration/test_user_directories/configs/duplicates.xml

diff --git a/src/Access/AccessControlManager.cpp b/src/Access/AccessControlManager.cpp
index 1fa26c85354..41137867213 100644
--- a/src/Access/AccessControlManager.cpp
+++ b/src/Access/AccessControlManager.cpp
@@ -181,6 +181,15 @@ void AccessControlManager::addUsersConfigStorage(
     const String & preprocessed_dir_,
     const zkutil::GetZooKeeper & get_zookeeper_function_)
 {
+    auto storages = getStoragesPtr();
+    for (const auto & storage : *storages)
+    {
+        if (auto users_config_storage = typeid_cast<std::shared_ptr<UsersConfigAccessStorage>>(storage))
+        {
+            if (users_config_storage->getStoragePath() == users_config_path_)
+                return;
+        }
+    }
     auto check_setting_name_function = [this](const std::string_view & setting_name) { checkSettingNameIsAllowed(setting_name); };
     auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, check_setting_name_function);
     new_storage->load(users_config_path_, include_from_path_, preprocessed_dir_, get_zookeeper_function_);
@@ -210,17 +219,36 @@ void AccessControlManager::startPeriodicReloadingUsersConfigs()
 
 void AccessControlManager::addDiskStorage(const String & directory_, bool readonly_)
 {
-    addStorage(std::make_shared<DiskAccessStorage>(directory_, readonly_));
+    addDiskStorage(DiskAccessStorage::STORAGE_TYPE, directory_, readonly_);
 }
 
 void AccessControlManager::addDiskStorage(const String & storage_name_, const String & directory_, bool readonly_)
 {
+    auto storages = getStoragesPtr();
+    for (const auto & storage : *storages)
+    {
+        if (auto disk_storage = typeid_cast<std::shared_ptr<DiskAccessStorage>>(storage))
+        {
+            if (disk_storage->isStoragePathEqual(directory_))
+            {
+                if (readonly_)
+                    disk_storage->setReadOnly(readonly_);
+                return;
+            }
+        }
+    }
     addStorage(std::make_shared<DiskAccessStorage>(storage_name_, directory_, readonly_));
 }
 
 
 void AccessControlManager::addMemoryStorage(const String & storage_name_)
 {
+    auto storages = getStoragesPtr();
+    for (const auto & storage : *storages)
+    {
+        if (auto memory_storage = typeid_cast<std::shared_ptr<MemoryAccessStorage>>(storage))
+            return;
+    }
     addStorage(std::make_shared<MemoryAccessStorage>(storage_name_));
 }
 
diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp
index fc80859885d..2fcb9480e67 100644
--- a/src/Access/DiskAccessStorage.cpp
+++ b/src/Access/DiskAccessStorage.cpp
@@ -218,6 +218,16 @@ namespace
     }
 
 
+    /// Converts a path to an absolute path and append it with a separator.
+    String makeDirectoryPathCanonical(const String & directory_path)
+    {
+        auto canonical_directory_path = std::filesystem::weakly_canonical(directory_path);
+        if (canonical_directory_path.has_filename())
+            canonical_directory_path += std::filesystem::path::preferred_separator;
+        return canonical_directory_path;
+    }
+
+
     /// Calculates the path to a file named <id>.sql for saving an access entity.
     String getEntityFilePath(const String & directory_path, const UUID & id)
     {
@@ -298,22 +308,17 @@ DiskAccessStorage::DiskAccessStorage(const String & directory_path_, bool readon
 {
 }
 
-
 DiskAccessStorage::DiskAccessStorage(const String & storage_name_, const String & directory_path_, bool readonly_)
     : IAccessStorage(storage_name_)
 {
-    auto canonical_directory_path = std::filesystem::weakly_canonical(directory_path_);
-    if (canonical_directory_path.has_filename())
-        canonical_directory_path += std::filesystem::path::preferred_separator;
+    directory_path = makeDirectoryPathCanonical(directory_path_);
+    readonly = readonly_;
 
     std::error_code create_dir_error_code;
-    std::filesystem::create_directories(canonical_directory_path, create_dir_error_code);
+    std::filesystem::create_directories(directory_path, create_dir_error_code);
 
-    if (!std::filesystem::exists(canonical_directory_path) || !std::filesystem::is_directory(canonical_directory_path) || create_dir_error_code)
-        throw Exception("Couldn't create directory " + canonical_directory_path.string() + " reason: '" + create_dir_error_code.message() + "'", ErrorCodes::DIRECTORY_DOESNT_EXIST);
-
-    directory_path = canonical_directory_path;
-    readonly = readonly_;
+    if (!std::filesystem::exists(directory_path) || !std::filesystem::is_directory(directory_path) || create_dir_error_code)
+        throw Exception("Couldn't create directory " + directory_path + " reason: '" + create_dir_error_code.message() + "'", ErrorCodes::DIRECTORY_DOESNT_EXIST);
 
     bool should_rebuild_lists = std::filesystem::exists(getNeedRebuildListsMarkFilePath(directory_path));
     if (!should_rebuild_lists)
@@ -337,6 +342,12 @@ DiskAccessStorage::~DiskAccessStorage()
 }
 
 
+bool DiskAccessStorage::isStoragePathEqual(const String & directory_path_) const
+{
+    return getStoragePath() == makeDirectoryPathCanonical(directory_path_);
+}
+
+
 void DiskAccessStorage::clear()
 {
     entries_by_id.clear();
diff --git a/src/Access/DiskAccessStorage.h b/src/Access/DiskAccessStorage.h
index 11eb1c3b1ad..f136b046ace 100644
--- a/src/Access/DiskAccessStorage.h
+++ b/src/Access/DiskAccessStorage.h
@@ -18,7 +18,11 @@ public:
     ~DiskAccessStorage() override;
 
     const char * getStorageType() const override { return STORAGE_TYPE; }
+
     String getStoragePath() const override { return directory_path; }
+    bool isStoragePathEqual(const String & directory_path_) const;
+
+    void setReadOnly(bool readonly_) { readonly = readonly_; }
     bool isStorageReadOnly() const override { return readonly; }
 
 private:
@@ -67,7 +71,7 @@ private:
     void prepareNotifications(const UUID & id, const Entry & entry, bool remove, Notifications & notifications) const;
 
     String directory_path;
-    bool readonly;
+    std::atomic<bool> readonly;
     std::unordered_map<UUID, Entry> entries_by_id;
     std::unordered_map<std::string_view, Entry *> entries_by_name_and_type[static_cast<size_t>(EntityType::MAX)];
     boost::container::flat_set<EntityType> types_of_lists_to_write;
diff --git a/tests/integration/test_user_directories/configs/duplicates.xml b/tests/integration/test_user_directories/configs/duplicates.xml
new file mode 100644
index 00000000000..69bb06a112b
--- /dev/null
+++ b/tests/integration/test_user_directories/configs/duplicates.xml
@@ -0,0 +1,13 @@
+<yandex>
+    <user_directories replace="replace">
+        <local_directory>
+            <path>/var/lib/clickhouse/access7/</path>
+        </local_directory>
+        <users_xml>
+            <path>/etc/clickhouse-server/users7.xml</path>
+        </users_xml>
+    </user_directories>
+
+    <users_config>/etc/clickhouse-server/users7.xml</users_config>
+    <access_control_path>/var/lib/clickhouse/access7/</access_control_path>
+</yandex>
diff --git a/tests/integration/test_user_directories/configs/mixed_style.xml b/tests/integration/test_user_directories/configs/mixed_style.xml
index d6ddecf6f5d..f668140521a 100644
--- a/tests/integration/test_user_directories/configs/mixed_style.xml
+++ b/tests/integration/test_user_directories/configs/mixed_style.xml
@@ -1,5 +1,8 @@
 <yandex>
     <user_directories replace="replace">
+        <local_directory>
+            <path>/var/lib/clickhouse/access6a/</path>
+        </local_directory>
         <memory/>
     </user_directories>
 
diff --git a/tests/integration/test_user_directories/test.py b/tests/integration/test_user_directories/test.py
index 218330cb1a5..71745502064 100644
--- a/tests/integration/test_user_directories/test.py
+++ b/tests/integration/test_user_directories/test.py
@@ -12,11 +12,8 @@ def started_cluster():
     try:
         cluster.start()
 
-        node.exec_in_container("cp /etc/clickhouse-server/users.xml /etc/clickhouse-server/users2.xml")
-        node.exec_in_container("cp /etc/clickhouse-server/users.xml /etc/clickhouse-server/users3.xml")
-        node.exec_in_container("cp /etc/clickhouse-server/users.xml /etc/clickhouse-server/users4.xml")
-        node.exec_in_container("cp /etc/clickhouse-server/users.xml /etc/clickhouse-server/users5.xml")
-        node.exec_in_container("cp /etc/clickhouse-server/users.xml /etc/clickhouse-server/users6.xml")
+        for i in range(2, 8):
+            node.exec_in_container("cp /etc/clickhouse-server/users.xml /etc/clickhouse-server/users{}.xml".format(i))
 
         yield cluster
 
@@ -56,4 +53,11 @@ def test_mixed_style():
     node.restart_clickhouse()
     assert node.query("SELECT * FROM system.user_directories") == TSV([["users.xml",       "users.xml",       "/etc/clickhouse-server/users6.xml", 1, 1],
                                                                        ["local directory", "local directory", "/var/lib/clickhouse/access6/",      0, 2],
-                                                                       ["memory",          "memory",          "",                                  0, 3]])
+                                                                       ["local directory", "local directory", "/var/lib/clickhouse/access6a/",     0, 3],
+                                                                       ["memory",          "memory",          "",                                  0, 4]])
+
+def test_duplicates():
+    node.copy_file_to_container(os.path.join(SCRIPT_DIR, "configs/duplicates.xml"), '/etc/clickhouse-server/config.d/z.xml')
+    node.restart_clickhouse()
+    assert node.query("SELECT * FROM system.user_directories") == TSV([["users.xml",       "users.xml",       "/etc/clickhouse-server/users7.xml", 1, 1],
+                                                                       ["local directory", "local directory", "/var/lib/clickhouse/access7/",      0, 2]])

From 1e849f297549f90bd7671286cace24f36c14e801 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitbar@yandex-team.ru>
Date: Sat, 12 Sep 2020 03:16:50 +0300
Subject: [PATCH 250/298] Fix permission denied on opening file
 /var/lib/clickhouse/status in integration tests.

---
 tests/integration/helpers/cluster.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index 44a22d3fe2e..a8704ee42b1 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -1165,6 +1165,7 @@ class ClickHouseInstance:
 
         db_dir = p.abspath(p.join(self.path, 'database'))
         print "Setup database dir {}".format(db_dir)
+        os.mkdir(db_dir)
         if self.clickhouse_path_dir is not None:
             print "Database files taken from {}".format(self.clickhouse_path_dir)
             shutil.copytree(self.clickhouse_path_dir, db_dir)

From e12ae99bf7e4b717b30179ac1a65920954cb3656 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 12 Sep 2020 03:55:54 +0300
Subject: [PATCH 251/298] Added review suggestion

---
 programs/git-import/git-import.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/programs/git-import/git-import.cpp b/programs/git-import/git-import.cpp
index d314969a1a8..6e92b88734d 100644
--- a/programs/git-import/git-import.cpp
+++ b/programs/git-import/git-import.cpp
@@ -335,7 +335,7 @@ struct LineChange
       */
     void setLineInfo(std::string full_line)
     {
-        indent = 0;
+        uint32_t num_spaces = 0;
 
         const char * pos = full_line.data();
         const char * end = pos + full_line.size();
@@ -343,14 +343,15 @@ struct LineChange
         while (pos < end)
         {
             if (*pos == ' ')
-                ++indent;
+                ++num_spaces;
             else if (*pos == '\t')
-                indent += 4;
+                num_spaces += 4;
             else
                 break;
             ++pos;
         }
 
+        indent = std::max(255U, num_spaces);
         line.assign(pos, end);
 
         if (pos == end)

From 5b952a369bec6ec4696e5e202e0b5ddedf6be72f Mon Sep 17 00:00:00 2001
From: zhang2014 <coswde@gmail.com>
Date: Sat, 12 Sep 2020 12:07:02 +0800
Subject: [PATCH 252/298] Fix build failure in OSX

---
 src/Functions/GatherUtils/CMakeLists.txt | 11 +++++++++++
 src/Functions/GatherUtils/Sources.h      |  4 ++++
 2 files changed, 15 insertions(+)

diff --git a/src/Functions/GatherUtils/CMakeLists.txt b/src/Functions/GatherUtils/CMakeLists.txt
index 3f7f08621a1..f941091667e 100644
--- a/src/Functions/GatherUtils/CMakeLists.txt
+++ b/src/Functions/GatherUtils/CMakeLists.txt
@@ -3,6 +3,17 @@ add_headers_and_sources(clickhouse_functions_gatherutils .)
 add_library(clickhouse_functions_gatherutils ${clickhouse_functions_gatherutils_sources} ${clickhouse_functions_gatherutils_headers})
 target_link_libraries(clickhouse_functions_gatherutils PRIVATE dbms)
 
+check_cxx_compiler_flag(suggest-override HAS_SUGGEST_OVERRIDE)
+check_cxx_compiler_flag(suggest-destructor-override HAS_SUGGEST_DESTRUCTOR_OVERRIDE)
+
+if (HAS_SUGGEST_OVERRIDE)
+    target_compile_definitions(clickhouse_functions_gatherutils PRIVATE HAS_SUGGEST_OVERRIDE)
+endif()
+
+if (HAS_SUGGEST_DESTRUCTOR_OVERRIDE)
+    target_compile_definitions(clickhouse_functions_gatherutils PRIVATE HAS_SUGGEST_DESTRUCTOR_OVERRIDE)
+endif()
+
 if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
     target_compile_options(clickhouse_functions_gatherutils PRIVATE "-g0")
 endif()
diff --git a/src/Functions/GatherUtils/Sources.h b/src/Functions/GatherUtils/Sources.h
index 299884e1c9e..fe71a1f8be3 100644
--- a/src/Functions/GatherUtils/Sources.h
+++ b/src/Functions/GatherUtils/Sources.h
@@ -129,9 +129,13 @@ struct NumericArraySource : public ArraySourceImpl<NumericArraySource<T>>
     #pragma GCC diagnostic ignored "-Wsuggest-override"
 #elif __clang_major__ >= 11
     #pragma GCC diagnostic push
+#ifdef HAS_SUGGEST_OVERRIDE
     #pragma GCC diagnostic ignored "-Wsuggest-override"
+#endif
+#ifdef HAS_SUGGEST_DESTRUCTOR_OVERRIDE
     #pragma GCC diagnostic ignored "-Wsuggest-destructor-override"
 #endif
+#endif
 
 template <typename Base>
 struct ConstSource : public Base

From ecbcbad0d96f3d7173d535f9bb181f6104e67ff7 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Sat, 12 Sep 2020 10:07:08 +0300
Subject: [PATCH 253/298] Fix flaky test

---
 .../01465_ttl_recompression.reference         | 42 +++++++++----------
 .../0_stateless/01465_ttl_recompression.sql   | 19 +++++----
 2 files changed, 32 insertions(+), 29 deletions(-)

diff --git a/tests/queries/0_stateless/01465_ttl_recompression.reference b/tests/queries/0_stateless/01465_ttl_recompression.reference
index 524c44ef972..1c576c04e45 100644
--- a/tests/queries/0_stateless/01465_ttl_recompression.reference
+++ b/tests/queries/0_stateless/01465_ttl_recompression.reference
@@ -1,24 +1,24 @@
 CREATE TABLE default.recompression_table\n(\n    `dt` DateTime,\n    `key` UInt64,\n    `value` String\n)\nENGINE = MergeTree()\nPARTITION BY key\nORDER BY tuple()\nTTL dt + toIntervalMonth(1) RECOMPRESS CODEC(ZSTD(17)), dt + toIntervalYear(1) RECOMPRESS CODEC(LZ4HC(10))\nSETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity = 8192
 3000
-1_1_1_0	LZ4
-2_2_2_0	LZ4
-3_3_3_0	LZ4
-1_1_1_1	LZ4
-2_2_2_1	ZSTD(17)
-3_3_3_1	LZ4HC(10)
+1_1_1	LZ4
+2_2_2	LZ4
+3_3_3	LZ4
+1_1_1	LZ4
+2_2_2	ZSTD(17)
+3_3_3	LZ4HC(10)
 CREATE TABLE default.recompression_table\n(\n    `dt` DateTime,\n    `key` UInt64,\n    `value` String\n)\nENGINE = MergeTree()\nPARTITION BY key\nORDER BY tuple()\nTTL dt + toIntervalDay(1) RECOMPRESS CODEC(ZSTD(12))\nSETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity = 8192
-1_1_1_1_4	LZ4
-2_2_2_1_4	ZSTD(17)
-3_3_3_1_4	LZ4HC(10)
-1_1_1_2_4	LZ4
-2_2_2_2_4	ZSTD(12)
-3_3_3_2_4	ZSTD(12)
-1_1_1_2_4	['plus(dt, toIntervalDay(1))']
-2_2_2_2_4	['plus(dt, toIntervalDay(1))']
-3_3_3_2_4	['plus(dt, toIntervalDay(1))']
-1_1_1_0	LZ4
-2_2_2_0	LZ4
-3_3_3_0	LZ4
-1_1_1_0_4	LZ4
-2_2_2_0_4	ZSTD(12)
-3_3_3_0_4	ZSTD(12)
+1_1_1	LZ4
+2_2_2	ZSTD(17)
+3_3_3	LZ4HC(10)
+1_1_1	LZ4
+2_2_2	ZSTD(12)
+3_3_3	ZSTD(12)
+1_1_1	['plus(dt, toIntervalDay(1))']
+2_2_2	['plus(dt, toIntervalDay(1))']
+3_3_3	['plus(dt, toIntervalDay(1))']
+1_1_1	LZ4
+2_2_2	LZ4
+3_3_3	LZ4
+1_1_1	LZ4
+2_2_2	ZSTD(12)
+3_3_3	ZSTD(12)
diff --git a/tests/queries/0_stateless/01465_ttl_recompression.sql b/tests/queries/0_stateless/01465_ttl_recompression.sql
index 78550582307..2388e727722 100644
--- a/tests/queries/0_stateless/01465_ttl_recompression.sql
+++ b/tests/queries/0_stateless/01465_ttl_recompression.sql
@@ -24,25 +24,27 @@ INSERT INTO recompression_table SELECT now() - INTERVAL 2 YEAR, 3, toString(numb
 
 SELECT COUNT() FROM recompression_table;
 
-SELECT name, default_compression_codec FROM system.parts WHERE table = 'recompression_table' and active = 1 and database = currentDatabase() ORDER BY name;
+SELECT substring(name, 1, length(name) - 2), default_compression_codec FROM system.parts WHERE table = 'recompression_table' and active = 1 and database = currentDatabase() ORDER BY name;
 
 OPTIMIZE TABLE recompression_table FINAL;
 
-SELECT name, default_compression_codec FROM system.parts WHERE table = 'recompression_table' and active = 1 and database = currentDatabase() ORDER BY name;
+-- merge level and mutation in part name is not important
+SELECT substring(name, 1, length(name) - 2), default_compression_codec FROM system.parts WHERE table = 'recompression_table' and active = 1 and database = currentDatabase() ORDER BY name;
 
 ALTER TABLE recompression_table MODIFY TTL dt + INTERVAL 1 DAY RECOMPRESS CODEC(ZSTD(12)) SETTINGS mutations_sync = 2;
 
 SHOW CREATE TABLE recompression_table;
 
-SELECT name, default_compression_codec FROM system.parts WHERE table = 'recompression_table' and active = 1 and database = currentDatabase() ORDER BY name;
+SELECT substring(name, 1, length(name) - 4), default_compression_codec FROM system.parts WHERE table = 'recompression_table' and active = 1 and database = currentDatabase() ORDER BY name;
 
 SYSTEM START TTL MERGES recompression_table;
-
+-- Additional merge can happen here
 OPTIMIZE TABLE recompression_table FINAL;
 
-SELECT name, default_compression_codec FROM system.parts WHERE table = 'recompression_table' and active = 1 and database = currentDatabase() ORDER BY name;
+-- merge level and mutation in part name is not important
+SELECT substring(name, 1, length(name) - 4), default_compression_codec FROM system.parts WHERE table = 'recompression_table' and active = 1 and database = currentDatabase() ORDER BY name;
 
-SELECT name, recompression_ttl_info.expression FROM system.parts WHERE table = 'recompression_table' and active = 1 and database = currentDatabase() ORDER BY name;
+SELECT substring(name, 1, length(name) - 4), recompression_ttl_info.expression FROM system.parts WHERE table = 'recompression_table' and active = 1 and database = currentDatabase() ORDER BY name;
 
 DROP TABLE IF EXISTS recompression_table;
 
@@ -66,10 +68,11 @@ INSERT INTO recompression_table_compact SELECT now() - INTERVAL 2 MONTH, 2, toSt
 
 INSERT INTO recompression_table_compact SELECT now() - INTERVAL 2 YEAR, 3, toString(number) from numbers(2000, 1000);
 
-SELECT name, default_compression_codec FROM system.parts WHERE table = 'recompression_table_compact' and active = 1 and database = currentDatabase() ORDER BY name;
+SELECT substring(name, 1, length(name) - 2), default_compression_codec FROM system.parts WHERE table = 'recompression_table_compact' and active = 1 and database = currentDatabase() ORDER BY name;
 
 ALTER TABLE recompression_table_compact MODIFY TTL dt + INTERVAL 1 MONTH RECOMPRESS CODEC(ZSTD(12)) SETTINGS mutations_sync = 2; -- mutation affect all columns, so codec changes
 
-SELECT name, default_compression_codec FROM system.parts WHERE table = 'recompression_table_compact' and active = 1 and database = currentDatabase() ORDER BY name;
+-- merge level and mutation in part name is not important
+SELECT substring(name, 1, length(name) - 4), default_compression_codec FROM system.parts WHERE table = 'recompression_table_compact' and active = 1 and database = currentDatabase() ORDER BY name;
 
 DROP TABLE recompression_table_compact;

From 8242a948804622f71eeaba1ad91a6e1cd14ab683 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Sat, 12 Sep 2020 15:42:07 +0300
Subject: [PATCH 254/298] Update ci_config.json

---
 tests/ci/ci_config.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/ci_config.json b/tests/ci/ci_config.json
index 9a11a06db0d..504b554029b 100644
--- a/tests/ci/ci_config.json
+++ b/tests/ci/ci_config.json
@@ -323,7 +323,7 @@
         },
         "Functional stateless tests (unbundled)": {
             "required_build_properties": {
-                "compiler": "gcc-10",
+                "compiler": "gcc-9",
                 "package_type": "deb",
                 "build_type": "relwithdebuginfo",
                 "sanitizer": "none",

From 8075ce28099ea34f26209ab5eba7c8eb9bc603b2 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Sat, 12 Sep 2020 15:42:32 +0300
Subject: [PATCH 255/298] Update warnings.cmake

---
 cmake/warnings.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/warnings.cmake b/cmake/warnings.cmake
index 6b26b9b95a5..425972f00d8 100644
--- a/cmake/warnings.cmake
+++ b/cmake/warnings.cmake
@@ -23,7 +23,7 @@ option (WEVERYTHING "Enables -Weverything option with some exceptions. This is i
 # Control maximum size of stack frames. It can be important if the code is run in fibers with small stack size.
 # Only in release build because debug has too large stack frames.
 if ((NOT CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") AND (NOT SANITIZE))
-    add_warning(frame-larger-than=32768)
+    add_warning(frame-larger-than=16384)
 endif ()
 
 if (COMPILER_CLANG)

From 421eeeccef7622f8f1462f9bce87303d51b880be Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitbar@yandex-team.ru>
Date: Mon, 17 Aug 2020 17:38:10 +0300
Subject: [PATCH 256/298] Add the section user_directories to the default
 config.

---
 programs/server/config.xml                       | 16 +++++++++++-----
 .../helpers/0_common_instance_config.xml         |  3 +++
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/programs/server/config.xml b/programs/server/config.xml
index af01e880dc2..3d7ebf0cd96 100644
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@@ -212,8 +212,17 @@
     <!-- Directory with user provided files that are accessible by 'file' table function. -->
     <user_files_path>/var/lib/clickhouse/user_files/</user_files_path>
 
-    <!-- Path to folder where users and roles created by SQL commands are stored. -->
-    <access_control_path>/var/lib/clickhouse/access/</access_control_path>
+    <!-- Sources to read users, roles, access rights, profiles of settings, quotas. -->
+    <user_directories>
+        <users_xml>
+            <!-- Path to configuration file with predefined users. -->
+            <path>users.xml</path>
+        </users_xml>
+        <local_directory>
+            <!-- Path to folder where users created by SQL commands are stored. -->
+            <path>/var/lib/clickhouse/access/</path>
+        </local_directory>
+    </user_directories>
 
     <!-- External user directories (LDAP). -->
     <ldap_servers>
@@ -256,9 +265,6 @@
         -->
     </ldap_servers>
 
-    <!-- Path to configuration file with users, access rights, profiles of settings, quotas. -->
-    <users_config>users.xml</users_config>
-
     <!-- Default profile of settings. -->
     <default_profile>default</default_profile>
 
diff --git a/tests/integration/helpers/0_common_instance_config.xml b/tests/integration/helpers/0_common_instance_config.xml
index 5377efbc241..b27ecf0c3ef 100644
--- a/tests/integration/helpers/0_common_instance_config.xml
+++ b/tests/integration/helpers/0_common_instance_config.xml
@@ -4,4 +4,7 @@
     <custom_settings_prefixes>custom_</custom_settings_prefixes>
     <path>/var/lib/clickhouse/</path>
     <tmp_path>/var/lib/clickhouse/tmp/</tmp_path>
+
+    <!-- For tests which check compatibility with older versions. -->
+    <users_config>users.xml</users_config>
 </yandex>

From c2d79bc5ccb04aeef881379797c05d57e290782b Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Thu, 10 Sep 2020 22:56:15 +0800
Subject: [PATCH 257/298] Add merge_algorithm to system.merges

---
 docs/en/operations/system-tables/merges.md    |  6 ++++-
 src/Storages/MergeTree/MergeAlgorithm.cpp     | 26 +++++++++++++++++++
 src/Storages/MergeTree/MergeAlgorithm.h       | 17 ++++++++++++
 src/Storages/MergeTree/MergeList.cpp          |  2 ++
 src/Storages/MergeTree/MergeList.h            |  3 +++
 .../MergeTree/MergeTreeDataMergerMutator.cpp  |  7 ++---
 .../MergeTree/MergeTreeDataMergerMutator.h    |  7 +----
 src/Storages/System/StorageSystemMerges.cpp   |  7 +++++
 src/Storages/ya.make                          |  1 +
 9 files changed, 64 insertions(+), 12 deletions(-)
 create mode 100644 src/Storages/MergeTree/MergeAlgorithm.cpp
 create mode 100644 src/Storages/MergeTree/MergeAlgorithm.h

diff --git a/docs/en/operations/system-tables/merges.md b/docs/en/operations/system-tables/merges.md
index fb98a2b9e34..3e712e2962c 100644
--- a/docs/en/operations/system-tables/merges.md
+++ b/docs/en/operations/system-tables/merges.md
@@ -10,12 +10,16 @@ Columns:
 -   `progress` (Float64) — The percentage of completed work from 0 to 1.
 -   `num_parts` (UInt64) — The number of pieces to be merged.
 -   `result_part_name` (String) — The name of the part that will be formed as the result of merging.
--   `is_mutation` (UInt8) - 1 if this process is a part mutation.
+-   `is_mutation` (UInt8) — 1 if this process is a part mutation.
 -   `total_size_bytes_compressed` (UInt64) — The total size of the compressed data in the merged chunks.
 -   `total_size_marks` (UInt64) — The total number of marks in the merged parts.
 -   `bytes_read_uncompressed` (UInt64) — Number of bytes read, uncompressed.
 -   `rows_read` (UInt64) — Number of rows read.
 -   `bytes_written_uncompressed` (UInt64) — Number of bytes written, uncompressed.
 -   `rows_written` (UInt64) — Number of rows written.
+-   `memory_usage` (UInt64) — Memory consumption of the merge process.
+-   `thread_id` (UInt64) — Thread ID of the merge process.
+-   `merge_type` — The type of current merge. Empty if it's an mutation.
+-   `merge_algorithm` — The algorithm used in current merge. Empty if it's an mutation.
 
 [Original article](https://clickhouse.tech/docs/en/operations/system_tables/merges) <!--hide-->
diff --git a/src/Storages/MergeTree/MergeAlgorithm.cpp b/src/Storages/MergeTree/MergeAlgorithm.cpp
new file mode 100644
index 00000000000..9f73557e701
--- /dev/null
+++ b/src/Storages/MergeTree/MergeAlgorithm.cpp
@@ -0,0 +1,26 @@
+#include <Storages/MergeTree/MergeAlgorithm.h>
+#include <Common/Exception.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int NOT_IMPLEMENTED;
+}
+
+String toString(MergeAlgorithm merge_algorithm)
+{
+    switch (merge_algorithm)
+    {
+        case MergeAlgorithm::Undecided:
+            return "Undecided";
+        case MergeAlgorithm::Horizontal:
+            return "Horizontal";
+        case MergeAlgorithm::Vertical:
+            return "Vertical";
+    }
+
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown MergeAlgorithm {}", static_cast<UInt64>(merge_algorithm));
+}
+
+}
diff --git a/src/Storages/MergeTree/MergeAlgorithm.h b/src/Storages/MergeTree/MergeAlgorithm.h
new file mode 100644
index 00000000000..813767f9fb1
--- /dev/null
+++ b/src/Storages/MergeTree/MergeAlgorithm.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <Core/Types.h>
+
+namespace DB
+{
+/// Algorithm of Merge.
+enum class MergeAlgorithm
+{
+    Undecided, /// Not running yet
+    Horizontal, /// per-row merge of all columns
+    Vertical /// per-row merge of PK and secondary indices columns, per-column gather for non-PK columns
+};
+
+String toString(MergeAlgorithm merge_algorithm);
+
+}
diff --git a/src/Storages/MergeTree/MergeList.cpp b/src/Storages/MergeTree/MergeList.cpp
index 30324bd5d9e..05d4cc6f963 100644
--- a/src/Storages/MergeTree/MergeList.cpp
+++ b/src/Storages/MergeTree/MergeList.cpp
@@ -24,6 +24,7 @@ MergeListElement::MergeListElement(const std::string & database_, const std::str
     , num_parts{future_part.parts.size()}
     , thread_id{getThreadId()}
     , merge_type{future_part.merge_type}
+    , merge_algorithm{MergeAlgorithm::Undecided}
 {
     for (const auto & source_part : future_part.parts)
     {
@@ -74,6 +75,7 @@ MergeInfo MergeListElement::getInfo() const
     res.memory_usage = memory_tracker.get();
     res.thread_id = thread_id;
     res.merge_type = toString(merge_type);
+    res.merge_algorithm = toString(merge_algorithm);
 
     for (const auto & source_part_name : source_part_names)
         res.source_part_names.emplace_back(source_part_name);
diff --git a/src/Storages/MergeTree/MergeList.h b/src/Storages/MergeTree/MergeList.h
index 4d080ff3569..c1166c55703 100644
--- a/src/Storages/MergeTree/MergeList.h
+++ b/src/Storages/MergeTree/MergeList.h
@@ -3,6 +3,7 @@
 #include <Common/Stopwatch.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/MemoryTracker.h>
+#include <Storages/MergeTree/MergeAlgorithm.h>
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/MergeTree/MergeType.h>
 #include <memory>
@@ -47,6 +48,7 @@ struct MergeInfo
     UInt64 memory_usage;
     UInt64 thread_id;
     std::string merge_type;
+    std::string merge_algorithm;
 };
 
 struct FutureMergedMutatedPart;
@@ -90,6 +92,7 @@ struct MergeListElement : boost::noncopyable
 
     UInt64 thread_id;
     MergeType merge_type;
+    MergeAlgorithm merge_algorithm;
 
     MergeListElement(const std::string & database, const std::string & table, const FutureMergedMutatedPart & future_part);
 
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index a8f7e265f68..99be79390be 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -62,10 +62,6 @@ namespace ErrorCodes
     extern const int ABORTED;
 }
 
-
-using MergeAlgorithm = MergeTreeDataMergerMutator::MergeAlgorithm;
-
-
 /// Do not start to merge parts, if free space is less than sum size of parts times specified coefficient.
 /// This value is chosen to not allow big merges to eat all free space. Thus allowing small merges to proceed.
 static const double DISK_USAGE_COEFFICIENT_TO_SELECT = 2;
@@ -699,6 +695,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
 
     size_t sum_input_rows_upper_bound = merge_entry->total_rows_count;
     MergeAlgorithm merge_alg = chooseMergeAlgorithm(parts, sum_input_rows_upper_bound, gathering_columns, deduplicate, need_remove_expired_values);
+    merge_entry->merge_algorithm = merge_alg;
 
     LOG_DEBUG(log, "Selected MergeAlgorithm: {}", ((merge_alg == MergeAlgorithm::Vertical) ? "Vertical" : "Horizontal"));
 
@@ -1238,7 +1235,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
 }
 
 
-MergeTreeDataMergerMutator::MergeAlgorithm MergeTreeDataMergerMutator::chooseMergeAlgorithm(
+MergeAlgorithm MergeTreeDataMergerMutator::chooseMergeAlgorithm(
     const MergeTreeData::DataPartsVector & parts, size_t sum_rows_upper_bound,
     const NamesAndTypesList & gathering_columns, bool deduplicate, bool need_remove_expired_values) const
 {
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
index 96ab14ba57b..2ba6b04e082 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
@@ -6,6 +6,7 @@
 #include <functional>
 #include <Common/ActionBlocker.h>
 #include <Storages/MergeTree/TTLMergeSelector.h>
+#include <Storages/MergeTree/MergeAlgorithm.h>
 #include <Storages/MergeTree/MergeType.h>
 
 
@@ -226,12 +227,6 @@ public :
     ActionBlocker merges_blocker;
     ActionBlocker ttl_merges_blocker;
 
-    enum class MergeAlgorithm
-    {
-        Horizontal, /// per-row merge of all columns
-        Vertical    /// per-row merge of PK and secondary indices columns, per-column gather for non-PK columns
-    };
-
 private:
 
     MergeAlgorithm chooseMergeAlgorithm(
diff --git a/src/Storages/System/StorageSystemMerges.cpp b/src/Storages/System/StorageSystemMerges.cpp
index 3b9e39c1ef8..b61324818e4 100644
--- a/src/Storages/System/StorageSystemMerges.cpp
+++ b/src/Storages/System/StorageSystemMerges.cpp
@@ -31,6 +31,7 @@ NamesAndTypesList StorageSystemMerges::getNamesAndTypes()
         {"memory_usage", std::make_shared<DataTypeUInt64>()},
         {"thread_id", std::make_shared<DataTypeUInt64>()},
         {"merge_type", std::make_shared<DataTypeString>()},
+        {"merge_algorithm", std::make_shared<DataTypeString>()},
     };
 }
 
@@ -67,9 +68,15 @@ void StorageSystemMerges::fillData(MutableColumns & res_columns, const Context &
         res_columns[i++]->insert(merge.memory_usage);
         res_columns[i++]->insert(merge.thread_id);
         if (!merge.is_mutation)
+        {
             res_columns[i++]->insert(merge.merge_type);
+            res_columns[i++]->insert(merge.merge_algorithm);
+        }
         else
+        {
             res_columns[i++]->insertDefault();
+            res_columns[i++]->insertDefault();
+        }
     }
 }
 
diff --git a/src/Storages/ya.make b/src/Storages/ya.make
index 597e0c6f975..20377428857 100644
--- a/src/Storages/ya.make
+++ b/src/Storages/ya.make
@@ -36,6 +36,7 @@ SRCS(
     MergeTree/KeyCondition.cpp
     MergeTree/LevelMergeSelector.cpp
     MergeTree/localBackup.cpp
+    MergeTree/MergeAlgorithm.cpp
     MergeTree/MergedBlockOutputStream.cpp
     MergeTree/MergedColumnOnlyOutputStream.cpp
     MergeTree/MergeList.cpp

From 016f707ea133f323ffd135a91ac86959112c6a8e Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Fri, 4 Sep 2020 01:51:16 +0800
Subject: [PATCH 258/298] column transformers in insert select

---
 src/Interpreters/InterpreterInsertQuery.cpp   | 26 ++++++++++++++++++-
 src/Parsers/ParserInsertQuery.cpp             |  9 ++++++-
 src/Parsers/ParserInsertQuery.h               |  9 +++++++
 ...1470_test_insert_select_asterisk.reference |  6 +++++
 .../01470_test_insert_select_asterisk.sql     | 18 +++++++++++++
 5 files changed, 66 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/01470_test_insert_select_asterisk.reference
 create mode 100644 tests/queries/0_stateless/01470_test_insert_select_asterisk.sql

diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp
index 9d33650405a..01fee30a445 100644
--- a/src/Interpreters/InterpreterInsertQuery.cpp
+++ b/src/Interpreters/InterpreterInsertQuery.cpp
@@ -17,6 +17,7 @@
 #include <Interpreters/InterpreterWatchQuery.h>
 #include <Interpreters/JoinedTables.h>
 #include <Parsers/ASTFunction.h>
+#include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTInsertQuery.h>
 #include <Parsers/ASTSelectQuery.h>
 #include <Parsers/ASTSelectWithUnionQuery.h>
@@ -29,6 +30,8 @@
 #include <Storages/StorageDistributed.h>
 #include <TableFunctions/TableFunctionFactory.h>
 #include <Common/checkStackSize.h>
+#include <Interpreters/TranslateQualifiedNamesVisitor.h>
+#include <Interpreters/getTableExpressions.h>
 
 namespace
 {
@@ -90,9 +93,30 @@ Block InterpreterInsertQuery::getSampleBlock(
     }
 
     Block table_sample = metadata_snapshot->getSampleBlock();
+    const auto & columns = metadata_snapshot->getColumns();
+    auto names_and_types = columns.getOrdinary();
+    removeDuplicateColumns(names_and_types);
+    auto table_expr = std::make_shared<ASTTableExpression>();
+    table_expr->database_and_table_name = createTableIdentifier(table->getStorageID());
+    table_expr->children.push_back(table_expr->database_and_table_name);
+    TablesWithColumns tables_with_columns;
+    tables_with_columns.emplace_back(DatabaseAndTableWithAlias(*table_expr, context.getCurrentDatabase()), names_and_types);
+
+    tables_with_columns[0].addHiddenColumns(columns.getMaterialized());
+    tables_with_columns[0].addHiddenColumns(columns.getAliases());
+    tables_with_columns[0].addHiddenColumns(table->getVirtuals());
+
+    NameSet source_columns_set;
+    for (const auto & identifier : query.columns->children)
+        source_columns_set.insert(identifier->getColumnName());
+    TranslateQualifiedNamesVisitor::Data visitor_data(source_columns_set, tables_with_columns);
+    TranslateQualifiedNamesVisitor visitor(visitor_data);
+    auto columns_ast = query.columns->clone();
+    visitor.visit(columns_ast);
+
     /// Form the block based on the column names from the query
     Block res;
-    for (const auto & identifier : query.columns->children)
+    for (const auto & identifier : columns_ast->children)
     {
         std::string current_name = identifier->getColumnName();
 
diff --git a/src/Parsers/ParserInsertQuery.cpp b/src/Parsers/ParserInsertQuery.cpp
index dc25954c71f..50baf7566d1 100644
--- a/src/Parsers/ParserInsertQuery.cpp
+++ b/src/Parsers/ParserInsertQuery.cpp
@@ -36,7 +36,7 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     ParserToken s_lparen(TokenType::OpeningRoundBracket);
     ParserToken s_rparen(TokenType::ClosingRoundBracket);
     ParserIdentifier name_p;
-    ParserList columns_p(std::make_unique<ParserCompoundIdentifier>(), std::make_unique<ParserToken>(TokenType::Comma), false);
+    ParserList columns_p(std::make_unique<ParserInsertElement>(), std::make_unique<ParserToken>(TokenType::Comma), false);
     ParserFunction table_function_p{false};
 
     ASTPtr database;
@@ -189,5 +189,12 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     return true;
 }
 
+bool ParserInsertElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+    return ParserColumnsMatcher().parse(pos, node, expected)
+        || ParserQualifiedAsterisk().parse(pos, node, expected)
+        || ParserAsterisk().parse(pos, node, expected)
+        || ParserCompoundIdentifier().parse(pos, node, expected);
+}
 
 }
diff --git a/src/Parsers/ParserInsertQuery.h b/src/Parsers/ParserInsertQuery.h
index b69bc645c15..b6a199c9d71 100644
--- a/src/Parsers/ParserInsertQuery.h
+++ b/src/Parsers/ParserInsertQuery.h
@@ -33,4 +33,13 @@ public:
     ParserInsertQuery(const char * end_) : end(end_) {}
 };
 
+/** Insert accepts an identifier and an asterisk with variants.
+  */
+class ParserInsertElement : public IParserBase
+{
+protected:
+    const char * getName() const override { return "insert element"; }
+    bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+
 }
diff --git a/tests/queries/0_stateless/01470_test_insert_select_asterisk.reference b/tests/queries/0_stateless/01470_test_insert_select_asterisk.reference
new file mode 100644
index 00000000000..c5d97af6937
--- /dev/null
+++ b/tests/queries/0_stateless/01470_test_insert_select_asterisk.reference
@@ -0,0 +1,6 @@
+1	0	0	2
+3	0	0	4
+1	0	0	2
+3	0	0	4
+1	0	0	2
+3	0	0	4
diff --git a/tests/queries/0_stateless/01470_test_insert_select_asterisk.sql b/tests/queries/0_stateless/01470_test_insert_select_asterisk.sql
new file mode 100644
index 00000000000..607b8a25f82
--- /dev/null
+++ b/tests/queries/0_stateless/01470_test_insert_select_asterisk.sql
@@ -0,0 +1,18 @@
+DROP TABLE IF EXISTS insert_select_dst;
+DROP TABLE IF EXISTS insert_select_src;
+
+CREATE TABLE insert_select_dst (i int, middle_a int, middle_b int, j int) ENGINE = Log;
+
+CREATE TABLE insert_select_src (i int, j int) ENGINE = Log;
+
+INSERT INTO insert_select_src VALUES (1, 2), (3, 4);
+
+INSERT INTO insert_select_dst(* EXCEPT (middle_a, middle_b)) SELECT * FROM insert_select_src;
+INSERT INTO insert_select_dst(insert_select_dst.* EXCEPT (middle_a, middle_b)) SELECT * FROM insert_select_src;
+INSERT INTO insert_select_dst(COLUMNS('.*') EXCEPT (middle_a, middle_b)) SELECT * FROM insert_select_src;
+INSERT INTO insert_select_dst(insert_select_src.* EXCEPT (middle_a, middle_b)) SELECT * FROM insert_select_src;  -- { serverError 47 }
+
+SELECT * FROM insert_select_dst;
+
+DROP TABLE IF EXISTS insert_select_dst;
+DROP TABLE IF EXISTS insert_select_src;

From 34b9547ce1e51c729489f9555d6a60c8c8b7b078 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Sat, 5 Sep 2020 22:12:47 +0800
Subject: [PATCH 259/298] Binary operator monotonicity

---
 src/Functions/FunctionBinaryArithmetic.h      | 186 +++++++++++++++++-
 src/Functions/bitAnd.cpp                      |   2 +-
 src/Functions/bitBoolMaskAnd.cpp              |   2 +-
 src/Functions/bitBoolMaskOr.cpp               |   2 +-
 src/Functions/bitOr.cpp                       |   2 +-
 src/Functions/bitRotateLeft.cpp               |   2 +-
 src/Functions/bitRotateRight.cpp              |   2 +-
 src/Functions/bitShiftLeft.cpp                |   2 +-
 src/Functions/bitShiftRight.cpp               |   2 +-
 src/Functions/bitTest.cpp                     |   2 +-
 src/Functions/bitXor.cpp                      |   2 +-
 src/Functions/divide.cpp                      |   2 +-
 src/Functions/gcd.cpp                         |   2 +-
 src/Functions/intDiv.cpp                      |   2 +-
 src/Functions/intDivOrZero.cpp                |   2 +-
 src/Functions/lcm.cpp                         |   2 +-
 src/Functions/minus.cpp                       |   2 +-
 src/Functions/modulo.cpp                      |   2 +-
 src/Functions/moduloOrZero.cpp                |   2 +-
 src/Functions/multiply.cpp                    |   2 +-
 src/Functions/plus.cpp                        |   2 +-
 src/Storages/MergeTree/KeyCondition.cpp       |  42 +++-
 ...480_binary_operator_monotonicity.reference |   0
 .../01480_binary_operator_monotonicity.sql    |  10 +
 24 files changed, 247 insertions(+), 31 deletions(-)
 create mode 100644 tests/queries/0_stateless/01480_binary_operator_monotonicity.reference
 create mode 100644 tests/queries/0_stateless/01480_binary_operator_monotonicity.sql

diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h
index ca0cc876035..f30b564d677 100644
--- a/src/Functions/FunctionBinaryArithmetic.h
+++ b/src/Functions/FunctionBinaryArithmetic.h
@@ -28,6 +28,7 @@
 #include "FunctionFactory.h"
 #include <Common/typeid_cast.h>
 #include <Common/assert_cast.h>
+#include <ext/map.h>
 
 #if !defined(ARCADIA_BUILD)
 #    include <Common/config.h>
@@ -51,6 +52,7 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
     extern const int DECIMAL_OVERFLOW;
     extern const int CANNOT_ADD_DIFFERENT_AGGREGATE_STATES;
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
 
@@ -602,7 +604,8 @@ class FunctionBinaryArithmetic : public IFunction
         return castType(left, [&](const auto & left_) { return castType(right, [&](const auto & right_) { return f(left_, right_); }); });
     }
 
-    FunctionOverloadResolverPtr getFunctionForIntervalArithmetic(const DataTypePtr & type0, const DataTypePtr & type1) const
+    static FunctionOverloadResolverPtr
+    getFunctionForIntervalArithmetic(const DataTypePtr & type0, const DataTypePtr & type1, const Context & context)
     {
         bool first_is_date_or_datetime = isDateOrDateTime(type0);
         bool second_is_date_or_datetime = isDateOrDateTime(type1);
@@ -632,7 +635,7 @@ class FunctionBinaryArithmetic : public IFunction
         }
 
         if (second_is_date_or_datetime && is_minus)
-            throw Exception("Wrong order of arguments for function " + getName() + ": argument of type Interval cannot be first.",
+            throw Exception("Wrong order of arguments for function " + String(name) + ": argument of type Interval cannot be first.",
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
         std::string function_name;
@@ -651,7 +654,7 @@ class FunctionBinaryArithmetic : public IFunction
         return FunctionFactory::instance().get(function_name, context);
     }
 
-    bool isAggregateMultiply(const DataTypePtr & type0, const DataTypePtr & type1) const
+    static bool isAggregateMultiply(const DataTypePtr & type0, const DataTypePtr & type1)
     {
         if constexpr (!is_multiply)
             return false;
@@ -663,7 +666,7 @@ class FunctionBinaryArithmetic : public IFunction
             || (which0.isNativeUInt() && which1.isAggregateFunction());
     }
 
-    bool isAggregateAddition(const DataTypePtr & type0, const DataTypePtr & type1) const
+    static bool isAggregateAddition(const DataTypePtr & type0, const DataTypePtr & type1)
     {
         if constexpr (!is_plus)
             return false;
@@ -812,6 +815,11 @@ public:
     size_t getNumberOfArguments() const override { return 2; }
 
     DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        return getReturnTypeImplStatic(arguments, context);
+    }
+
+    static DataTypePtr getReturnTypeImplStatic(const DataTypes & arguments, const Context & context)
     {
         /// Special case when multiply aggregate function state
         if (isAggregateMultiply(arguments[0], arguments[1]))
@@ -832,7 +840,7 @@ public:
         }
 
         /// Special case when the function is plus or minus, one of arguments is Date/DateTime and another is Interval.
-        if (auto function_builder = getFunctionForIntervalArithmetic(arguments[0], arguments[1]))
+        if (auto function_builder = getFunctionForIntervalArithmetic(arguments[0], arguments[1], context))
         {
             ColumnsWithTypeAndName new_arguments(2);
 
@@ -903,7 +911,7 @@ public:
             return false;
         });
         if (!valid)
-            throw Exception("Illegal types " + arguments[0]->getName() + " and " + arguments[1]->getName() + " of arguments of function " + getName(),
+            throw Exception("Illegal types " + arguments[0]->getName() + " and " + arguments[1]->getName() + " of arguments of function " + String(name),
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
         return type_res;
     }
@@ -1110,7 +1118,8 @@ public:
         }
 
         /// Special case when the function is plus or minus, one of arguments is Date/DateTime and another is Interval.
-        if (auto function_builder = getFunctionForIntervalArithmetic(block.getByPosition(arguments[0]).type, block.getByPosition(arguments[1]).type))
+        if (auto function_builder
+            = getFunctionForIntervalArithmetic(block.getByPosition(arguments[0]).type, block.getByPosition(arguments[1]).type, context))
         {
             executeDateTimeIntervalPlusMinus(block, arguments, result, input_rows_count, function_builder);
             return;
@@ -1200,4 +1209,167 @@ public:
     bool canBeExecutedOnDefaultArguments() const override { return valid_on_default_arguments; }
 };
 
+
+template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true>
+class FunctionBinaryArithmeticWithConstants : public FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>
+{
+public:
+    using Base = FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>;
+    using Monotonicity = typename Base::Monotonicity;
+    static FunctionPtr create(const ColumnWithTypeAndName & left_, const ColumnWithTypeAndName & right_, const Context & context)
+    {
+        return std::make_shared<FunctionBinaryArithmeticWithConstants>(left_, right_, context);
+    }
+    FunctionBinaryArithmeticWithConstants(
+        const ColumnWithTypeAndName & left_, const ColumnWithTypeAndName & right_, const Context & context_)
+        : Base(context_), left(left_), right(right_)
+    {
+    }
+
+    void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) const override
+    {
+        if (left.column && isColumnConst(*left.column) && arguments.size() == 1)
+        {
+            Block block_with_constant
+                = {{left.column->cloneResized(input_rows_count), left.type, left.name},
+                   block.getByPosition(arguments[0]),
+                   block.getByPosition(result)};
+            Base::executeImpl(block_with_constant, {0, 1}, 2, input_rows_count);
+            block.getByPosition(result) = block_with_constant.getByPosition(2);
+        }
+        else if (right.column && isColumnConst(*right.column) && arguments.size() == 1)
+        {
+            Block block_with_constant
+                = {block.getByPosition(arguments[0]),
+                   {right.column->cloneResized(input_rows_count), right.type, right.name},
+                   block.getByPosition(result)};
+            Base::executeImpl(block_with_constant, {0, 1}, 2, input_rows_count);
+            block.getByPosition(result) = block_with_constant.getByPosition(2);
+        }
+        else
+            Base::executeImpl(block, arguments, result, input_rows_count);
+    }
+
+    bool hasInformationAboutMonotonicity() const override
+    {
+        std::string_view name_ = Name::name;
+        if (name_ == "minus" || name_ == "plus" || name_ == "multiply" || name_ == "divide" || name_ == "intDiv")
+        {
+            return true;
+        }
+        return false;
+    }
+
+    Monotonicity getMonotonicityForRange(const IDataType &, const Field & left_point, const Field & right_point) const override
+    {
+        std::string_view name_ = Name::name;
+        if (name_ == "minus" || name_ == "plus")
+        {
+            return {true, true, true};
+        }
+        if (name_ == "multiply" || name_ == "divide" || name_ == "intDiv")
+        {
+            if (!left.column)
+            {
+                bool positive = true;
+                if (WhichDataType(right.type).isInt())
+                {
+                    positive = right.column->getInt(0) >= 0;
+                }
+
+                if (WhichDataType(left.type).isUInt())
+                    return {true, positive, true};
+                else if (WhichDataType(left.type).isInt())
+                {
+                    if (left_point.get<Int64>() == right_point.get<Int64>())
+                        return {true, positive, true};
+                    if (left_point.get<Int64>() >= 0)
+                        return {true, positive, false};
+                    else if (right_point.get<Int64>() <= 0)
+                        return {true, !positive, false};
+                    else
+                        return {false, true, false};
+                }
+            }
+            if (!right.column)
+            {
+                bool positive = true;
+                if (WhichDataType(left.type).isInt())
+                {
+                    positive = right.column->getInt(0) >= 0;
+                }
+
+                if (WhichDataType(left.type).isUInt())
+                    return {true, !positive, true};
+                else if (WhichDataType(left.type).isInt())
+                {
+                    if (left_point.get<Int64>() == right_point.get<Int64>())
+                        return {true, !positive, true};
+                    if (left_point.get<Int64>() >= 0)
+                        return {true, !positive, false};
+                    else if (right_point.get<Int64>() <= 0)
+                        return {true, positive, false};
+                    else
+                        return {false, true, false};
+                }
+            }
+            return {true, true, true}; // both arguments are constants
+        }
+        return {false, true, false};
+    }
+
+private:
+    ColumnWithTypeAndName left;
+    ColumnWithTypeAndName right;
+};
+
+
+template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true>
+class BinaryArithmeticOverloadResolver : public IFunctionOverloadResolverImpl
+{
+public:
+    static constexpr auto name = Name::name;
+    static FunctionOverloadResolverImplPtr create(const Context & context)
+    {
+        return std::make_unique<BinaryArithmeticOverloadResolver>(context);
+    }
+
+    explicit BinaryArithmeticOverloadResolver(const Context & context_) : context(context_) {}
+
+    String getName() const override { return name; }
+    size_t getNumberOfArguments() const override { return 2; }
+    bool isVariadic() const override { return false; }
+
+    FunctionBaseImplPtr build(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
+    {
+        /// More efficient specialization for two numeric arguments.
+        if (arguments.size() == 2
+            && ((arguments[0].column && isColumnConst(*arguments[0].column))
+                || (arguments[1].column && isColumnConst(*arguments[1].column))))
+        {
+            return std::make_unique<DefaultFunction>(
+                FunctionBinaryArithmeticWithConstants<Op, Name, valid_on_default_arguments>::create(arguments[0], arguments[1], context),
+                ext::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }),
+                return_type);
+        }
+
+        return std::make_unique<DefaultFunction>(
+            FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>::create(context),
+            ext::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }),
+            return_type);
+    }
+
+    DataTypePtr getReturnType(const DataTypes & arguments) const override
+    {
+        if (arguments.size() != 2)
+            throw Exception(
+                "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) + ", should be 2",
+                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+        return FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>::getReturnTypeImplStatic(arguments, context);
+    }
+
+private:
+    const Context & context;
+};
+
 }
diff --git a/src/Functions/bitAnd.cpp b/src/Functions/bitAnd.cpp
index 89c2758bc6a..f02c8fbd4ee 100644
--- a/src/Functions/bitAnd.cpp
+++ b/src/Functions/bitAnd.cpp
@@ -37,7 +37,7 @@ struct BitAndImpl
 };
 
 struct NameBitAnd { static constexpr auto name = "bitAnd"; };
-using FunctionBitAnd = FunctionBinaryArithmetic<BitAndImpl, NameBitAnd, true>;
+using FunctionBitAnd = BinaryArithmeticOverloadResolver<BitAndImpl, NameBitAnd, true>;
 
 }
 
diff --git a/src/Functions/bitBoolMaskAnd.cpp b/src/Functions/bitBoolMaskAnd.cpp
index dd46fa8b1b1..9b0ea85f3fd 100644
--- a/src/Functions/bitBoolMaskAnd.cpp
+++ b/src/Functions/bitBoolMaskAnd.cpp
@@ -42,7 +42,7 @@ struct BitBoolMaskAndImpl
 };
 
 struct NameBitBoolMaskAnd { static constexpr auto name = "__bitBoolMaskAnd"; };
-using FunctionBitBoolMaskAnd = FunctionBinaryArithmetic<BitBoolMaskAndImpl, NameBitBoolMaskAnd>;
+using FunctionBitBoolMaskAnd = BinaryArithmeticOverloadResolver<BitBoolMaskAndImpl, NameBitBoolMaskAnd>;
 
 }
 
diff --git a/src/Functions/bitBoolMaskOr.cpp b/src/Functions/bitBoolMaskOr.cpp
index e86c7dcda8e..d68a54924d0 100644
--- a/src/Functions/bitBoolMaskOr.cpp
+++ b/src/Functions/bitBoolMaskOr.cpp
@@ -42,7 +42,7 @@ struct BitBoolMaskOrImpl
 };
 
 struct NameBitBoolMaskOr { static constexpr auto name = "__bitBoolMaskOr"; };
-using FunctionBitBoolMaskOr = FunctionBinaryArithmetic<BitBoolMaskOrImpl, NameBitBoolMaskOr>;
+using FunctionBitBoolMaskOr = BinaryArithmeticOverloadResolver<BitBoolMaskOrImpl, NameBitBoolMaskOr>;
 
 }
 
diff --git a/src/Functions/bitOr.cpp b/src/Functions/bitOr.cpp
index 0f339b328d8..b3559e13e0e 100644
--- a/src/Functions/bitOr.cpp
+++ b/src/Functions/bitOr.cpp
@@ -36,7 +36,7 @@ struct BitOrImpl
 };
 
 struct NameBitOr { static constexpr auto name = "bitOr"; };
-using FunctionBitOr = FunctionBinaryArithmetic<BitOrImpl, NameBitOr, true>;
+using FunctionBitOr = BinaryArithmeticOverloadResolver<BitOrImpl, NameBitOr, true>;
 
 }
 
diff --git a/src/Functions/bitRotateLeft.cpp b/src/Functions/bitRotateLeft.cpp
index 5d52494eb7d..aac0197f2c5 100644
--- a/src/Functions/bitRotateLeft.cpp
+++ b/src/Functions/bitRotateLeft.cpp
@@ -43,7 +43,7 @@ struct BitRotateLeftImpl
 };
 
 struct NameBitRotateLeft { static constexpr auto name = "bitRotateLeft"; };
-using FunctionBitRotateLeft = FunctionBinaryArithmetic<BitRotateLeftImpl, NameBitRotateLeft>;
+using FunctionBitRotateLeft = BinaryArithmeticOverloadResolver<BitRotateLeftImpl, NameBitRotateLeft>;
 
 }
 
diff --git a/src/Functions/bitRotateRight.cpp b/src/Functions/bitRotateRight.cpp
index 7cda0b4890b..e8932eccaa3 100644
--- a/src/Functions/bitRotateRight.cpp
+++ b/src/Functions/bitRotateRight.cpp
@@ -42,7 +42,7 @@ struct BitRotateRightImpl
 };
 
 struct NameBitRotateRight { static constexpr auto name = "bitRotateRight"; };
-using FunctionBitRotateRight = FunctionBinaryArithmetic<BitRotateRightImpl, NameBitRotateRight>;
+using FunctionBitRotateRight = BinaryArithmeticOverloadResolver<BitRotateRightImpl, NameBitRotateRight>;
 
 }
 
diff --git a/src/Functions/bitShiftLeft.cpp b/src/Functions/bitShiftLeft.cpp
index 59d236ac6af..3df3165a8e6 100644
--- a/src/Functions/bitShiftLeft.cpp
+++ b/src/Functions/bitShiftLeft.cpp
@@ -42,7 +42,7 @@ struct BitShiftLeftImpl
 };
 
 struct NameBitShiftLeft { static constexpr auto name = "bitShiftLeft"; };
-using FunctionBitShiftLeft = FunctionBinaryArithmetic<BitShiftLeftImpl, NameBitShiftLeft>;
+using FunctionBitShiftLeft = BinaryArithmeticOverloadResolver<BitShiftLeftImpl, NameBitShiftLeft>;
 
 }
 
diff --git a/src/Functions/bitShiftRight.cpp b/src/Functions/bitShiftRight.cpp
index fe7def0b56b..da3bd16c4c4 100644
--- a/src/Functions/bitShiftRight.cpp
+++ b/src/Functions/bitShiftRight.cpp
@@ -42,7 +42,7 @@ struct BitShiftRightImpl
 };
 
 struct NameBitShiftRight { static constexpr auto name = "bitShiftRight"; };
-using FunctionBitShiftRight = FunctionBinaryArithmetic<BitShiftRightImpl, NameBitShiftRight>;
+using FunctionBitShiftRight = BinaryArithmeticOverloadResolver<BitShiftRightImpl, NameBitShiftRight>;
 
 }
 
diff --git a/src/Functions/bitTest.cpp b/src/Functions/bitTest.cpp
index 9c9f16d87c4..54c932d9311 100644
--- a/src/Functions/bitTest.cpp
+++ b/src/Functions/bitTest.cpp
@@ -34,7 +34,7 @@ struct BitTestImpl
 };
 
 struct NameBitTest { static constexpr auto name = "bitTest"; };
-using FunctionBitTest = FunctionBinaryArithmetic<BitTestImpl, NameBitTest>;
+using FunctionBitTest = BinaryArithmeticOverloadResolver<BitTestImpl, NameBitTest>;
 
 }
 
diff --git a/src/Functions/bitXor.cpp b/src/Functions/bitXor.cpp
index 3d323fde8bb..9237cb76e59 100644
--- a/src/Functions/bitXor.cpp
+++ b/src/Functions/bitXor.cpp
@@ -36,7 +36,7 @@ struct BitXorImpl
 };
 
 struct NameBitXor { static constexpr auto name = "bitXor"; };
-using FunctionBitXor = FunctionBinaryArithmetic<BitXorImpl, NameBitXor, true>;
+using FunctionBitXor = BinaryArithmeticOverloadResolver<BitXorImpl, NameBitXor, true>;
 
 }
 
diff --git a/src/Functions/divide.cpp b/src/Functions/divide.cpp
index 98bfec08ccd..34ba33effb4 100644
--- a/src/Functions/divide.cpp
+++ b/src/Functions/divide.cpp
@@ -37,7 +37,7 @@ struct DivideFloatingImpl
 };
 
 struct NameDivide { static constexpr auto name = "divide"; };
-using FunctionDivide = FunctionBinaryArithmetic<DivideFloatingImpl, NameDivide>;
+using FunctionDivide = BinaryArithmeticOverloadResolver<DivideFloatingImpl, NameDivide>;
 
 void registerFunctionDivide(FunctionFactory & factory)
 {
diff --git a/src/Functions/gcd.cpp b/src/Functions/gcd.cpp
index 244b25b194d..7c8a28c83f6 100644
--- a/src/Functions/gcd.cpp
+++ b/src/Functions/gcd.cpp
@@ -40,7 +40,7 @@ struct GCDImpl
 };
 
 struct NameGCD { static constexpr auto name = "gcd"; };
-using FunctionGCD = FunctionBinaryArithmetic<GCDImpl, NameGCD, false>;
+using FunctionGCD = BinaryArithmeticOverloadResolver<GCDImpl, NameGCD, false>;
 
 }
 
diff --git a/src/Functions/intDiv.cpp b/src/Functions/intDiv.cpp
index a08525813b1..45fb8bd51bd 100644
--- a/src/Functions/intDiv.cpp
+++ b/src/Functions/intDiv.cpp
@@ -110,7 +110,7 @@ template <> struct BinaryOperationImpl<Int32, Int64, DivideIntegralImpl<Int32, I
 
 
 struct NameIntDiv { static constexpr auto name = "intDiv"; };
-using FunctionIntDiv = FunctionBinaryArithmetic<DivideIntegralImpl, NameIntDiv, false>;
+using FunctionIntDiv = BinaryArithmeticOverloadResolver<DivideIntegralImpl, NameIntDiv, false>;
 
 void registerFunctionIntDiv(FunctionFactory & factory)
 {
diff --git a/src/Functions/intDivOrZero.cpp b/src/Functions/intDivOrZero.cpp
index 64b6994d438..e44d53244cb 100644
--- a/src/Functions/intDivOrZero.cpp
+++ b/src/Functions/intDivOrZero.cpp
@@ -26,7 +26,7 @@ struct DivideIntegralOrZeroImpl
 };
 
 struct NameIntDivOrZero { static constexpr auto name = "intDivOrZero"; };
-using FunctionIntDivOrZero = FunctionBinaryArithmetic<DivideIntegralOrZeroImpl, NameIntDivOrZero>;
+using FunctionIntDivOrZero = BinaryArithmeticOverloadResolver<DivideIntegralOrZeroImpl, NameIntDivOrZero>;
 
 void registerFunctionIntDivOrZero(FunctionFactory & factory)
 {
diff --git a/src/Functions/lcm.cpp b/src/Functions/lcm.cpp
index 06e8d7d89f4..81406861c52 100644
--- a/src/Functions/lcm.cpp
+++ b/src/Functions/lcm.cpp
@@ -78,7 +78,7 @@ struct LCMImpl
 };
 
 struct NameLCM { static constexpr auto name = "lcm"; };
-using FunctionLCM = FunctionBinaryArithmetic<LCMImpl, NameLCM, false>;
+using FunctionLCM = BinaryArithmeticOverloadResolver<LCMImpl, NameLCM, false>;
 
 }
 
diff --git a/src/Functions/minus.cpp b/src/Functions/minus.cpp
index e362855c206..edee792a55a 100644
--- a/src/Functions/minus.cpp
+++ b/src/Functions/minus.cpp
@@ -43,7 +43,7 @@ struct MinusImpl
 };
 
 struct NameMinus { static constexpr auto name = "minus"; };
-using FunctionMinus = FunctionBinaryArithmetic<MinusImpl, NameMinus>;
+using FunctionMinus = BinaryArithmeticOverloadResolver<MinusImpl, NameMinus>;
 
 void registerFunctionMinus(FunctionFactory & factory)
 {
diff --git a/src/Functions/modulo.cpp b/src/Functions/modulo.cpp
index a8ad15c3971..2f9bf8a9c3a 100644
--- a/src/Functions/modulo.cpp
+++ b/src/Functions/modulo.cpp
@@ -101,7 +101,7 @@ template <> struct BinaryOperationImpl<Int32, Int64, ModuloImpl<Int32, Int64>> :
 
 
 struct NameModulo { static constexpr auto name = "modulo"; };
-using FunctionModulo = FunctionBinaryArithmetic<ModuloImpl, NameModulo, false>;
+using FunctionModulo = BinaryArithmeticOverloadResolver<ModuloImpl, NameModulo, false>;
 
 void registerFunctionModulo(FunctionFactory & factory)
 {
diff --git a/src/Functions/moduloOrZero.cpp b/src/Functions/moduloOrZero.cpp
index 1392b0294bb..4143266518a 100644
--- a/src/Functions/moduloOrZero.cpp
+++ b/src/Functions/moduloOrZero.cpp
@@ -36,7 +36,7 @@ struct ModuloOrZeroImpl
 };
 
 struct NameModuloOrZero { static constexpr auto name = "moduloOrZero"; };
-using FunctionModuloOrZero = FunctionBinaryArithmetic<ModuloOrZeroImpl, NameModuloOrZero>;
+using FunctionModuloOrZero = BinaryArithmeticOverloadResolver<ModuloOrZeroImpl, NameModuloOrZero>;
 
 }
 
diff --git a/src/Functions/multiply.cpp b/src/Functions/multiply.cpp
index 62cbdb49ffb..7552af7dbf1 100644
--- a/src/Functions/multiply.cpp
+++ b/src/Functions/multiply.cpp
@@ -43,7 +43,7 @@ struct MultiplyImpl
 };
 
 struct NameMultiply { static constexpr auto name = "multiply"; };
-using FunctionMultiply = FunctionBinaryArithmetic<MultiplyImpl, NameMultiply>;
+using FunctionMultiply = BinaryArithmeticOverloadResolver<MultiplyImpl, NameMultiply>;
 
 void registerFunctionMultiply(FunctionFactory & factory)
 {
diff --git a/src/Functions/plus.cpp b/src/Functions/plus.cpp
index 16b5bfba309..68b364a7abe 100644
--- a/src/Functions/plus.cpp
+++ b/src/Functions/plus.cpp
@@ -45,7 +45,7 @@ struct PlusImpl
 };
 
 struct NamePlus { static constexpr auto name = "plus"; };
-using FunctionPlus = FunctionBinaryArithmetic<PlusImpl, NamePlus>;
+using FunctionPlus = BinaryArithmeticOverloadResolver<PlusImpl, NamePlus>;
 
 void registerFunctionPlus(FunctionFactory & factory)
 {
diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp
index 1ce9b9c9527..4d217e02d1a 100644
--- a/src/Storages/MergeTree/KeyCondition.cpp
+++ b/src/Storages/MergeTree/KeyCondition.cpp
@@ -1,6 +1,7 @@
 #include <Storages/MergeTree/KeyCondition.h>
 #include <Storages/MergeTree/BoolMask.h>
 #include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/FieldToDataType.h>
 #include <Interpreters/TreeRewriter.h>
 #include <Interpreters/ExpressionAnalyzer.h>
 #include <Interpreters/ExpressionActions.h>
@@ -711,8 +712,26 @@ bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctions(
 
     for (auto it = chain_not_tested_for_monotonicity.rbegin(); it != chain_not_tested_for_monotonicity.rend(); ++it)
     {
+        const auto & args = (*it)->arguments->children;
         auto func_builder = FunctionFactory::instance().tryGet((*it)->name, context);
-        ColumnsWithTypeAndName arguments{{ nullptr, key_column_type, "" }};
+        ColumnsWithTypeAndName arguments;
+        if (args.size() == 2)
+        {
+            if (const auto * arg_left = args[0]->as<ASTLiteral>())
+            {
+                auto left_arg_type = applyVisitor(FieldToDataType(), arg_left->value);
+                arguments.push_back({ left_arg_type->createColumnConst(0, arg_left->value), left_arg_type, "" });
+                arguments.push_back({ nullptr, key_column_type, "" });
+            }
+            else if (const auto * arg_right = args[1]->as<ASTLiteral>())
+            {
+                arguments.push_back({ nullptr, key_column_type, "" });
+                auto right_arg_type = applyVisitor(FieldToDataType(), arg_right->value);
+                arguments.push_back({ right_arg_type->createColumnConst(0, arg_right->value), right_arg_type, "" });
+            }
+        }
+        else
+            arguments.push_back({ nullptr, key_column_type, "" });
         auto func = func_builder->build(arguments);
 
         if (!func || !func->hasInformationAboutMonotonicity())
@@ -750,12 +769,27 @@ bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctionsImpl(
     if (const auto * func = node->as<ASTFunction>())
     {
         const auto & args = func->arguments->children;
-        if (args.size() != 1)
+        if (args.size() > 2)
             return false;
 
         out_functions_chain.push_back(func);
-
-        return isKeyPossiblyWrappedByMonotonicFunctionsImpl(args[0], out_key_column_num, out_key_column_type, out_functions_chain);
+        bool ret = false;
+        if (args.size() == 2)
+        {
+            if (args[0]->as<ASTLiteral>())
+            {
+                ret = isKeyPossiblyWrappedByMonotonicFunctionsImpl(args[1], out_key_column_num, out_key_column_type, out_functions_chain);
+            }
+            else if (args[1]->as<ASTLiteral>())
+            {
+                ret = isKeyPossiblyWrappedByMonotonicFunctionsImpl(args[0], out_key_column_num, out_key_column_type, out_functions_chain);
+            }
+        }
+        else
+        {
+            ret = isKeyPossiblyWrappedByMonotonicFunctionsImpl(args[0], out_key_column_num, out_key_column_type, out_functions_chain);
+        }
+        return ret;
     }
 
     return false;
diff --git a/tests/queries/0_stateless/01480_binary_operator_monotonicity.reference b/tests/queries/0_stateless/01480_binary_operator_monotonicity.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01480_binary_operator_monotonicity.sql b/tests/queries/0_stateless/01480_binary_operator_monotonicity.sql
new file mode 100644
index 00000000000..bfaab3abd3c
--- /dev/null
+++ b/tests/queries/0_stateless/01480_binary_operator_monotonicity.sql
@@ -0,0 +1,10 @@
+DROP TABLE IF EXISTS binary_op_mono;
+
+CREATE TABLE binary_op_mono(i int, j int) ENGINE MergeTree PARTITION BY toDate(i / 1000) ORDER BY j;
+
+INSERT INTO binary_op_mono VALUES (toUnixTimestamp('2020-09-01 00:00:00') * 1000, 1), (toUnixTimestamp('2020-09-01 00:00:00') * 1000, 2);
+
+SET max_rows_to_read = 1;
+SELECT * FROM binary_op_mono WHERE toDate(i / 1000) = '2020-09-02';
+
+DROP TABLE IF EXISTS binary_op_mono;

From 72786c81307635c8ad3294de265fb9f33dcb3fc9 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Thu, 10 Sep 2020 18:42:32 +0800
Subject: [PATCH 260/298] Take arithmetic overflow into consideration

---
 src/Functions/FunctionBinaryArithmetic.h | 153 ++++++++++++++++-------
 1 file changed, 108 insertions(+), 45 deletions(-)

diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h
index f30b564d677..cffcdc88dd6 100644
--- a/src/Functions/FunctionBinaryArithmetic.h
+++ b/src/Functions/FunctionBinaryArithmetic.h
@@ -28,6 +28,8 @@
 #include "FunctionFactory.h"
 #include <Common/typeid_cast.h>
 #include <Common/assert_cast.h>
+#include <Common/FieldVisitors.h>
+#include <Common/FieldVisitorsAccurateComparison.h>
 #include <ext/map.h>
 
 #if !defined(ARCADIA_BUILD)
@@ -1216,13 +1218,22 @@ class FunctionBinaryArithmeticWithConstants : public FunctionBinaryArithmetic<Op
 public:
     using Base = FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>;
     using Monotonicity = typename Base::Monotonicity;
-    static FunctionPtr create(const ColumnWithTypeAndName & left_, const ColumnWithTypeAndName & right_, const Context & context)
+
+    static FunctionPtr create(
+        const ColumnWithTypeAndName & left_,
+        const ColumnWithTypeAndName & right_,
+        const DataTypePtr & return_type_,
+        const Context & context)
     {
-        return std::make_shared<FunctionBinaryArithmeticWithConstants>(left_, right_, context);
+        return std::make_shared<FunctionBinaryArithmeticWithConstants>(left_, right_, return_type_, context);
     }
+
     FunctionBinaryArithmeticWithConstants(
-        const ColumnWithTypeAndName & left_, const ColumnWithTypeAndName & right_, const Context & context_)
-        : Base(context_), left(left_), right(right_)
+        const ColumnWithTypeAndName & left_,
+        const ColumnWithTypeAndName & right_,
+        const DataTypePtr & return_type_,
+        const Context & context_)
+        : Base(context_), left(left_), right(right_), return_type(return_type_)
     {
     }
 
@@ -1253,7 +1264,7 @@ public:
     bool hasInformationAboutMonotonicity() const override
     {
         std::string_view name_ = Name::name;
-        if (name_ == "minus" || name_ == "plus" || name_ == "multiply" || name_ == "divide" || name_ == "intDiv")
+        if (name_ == "minus" || name_ == "plus" || name_ == "divide" || name_ == "intDiv")
         {
             return true;
         }
@@ -1262,58 +1273,108 @@ public:
 
     Monotonicity getMonotonicityForRange(const IDataType &, const Field & left_point, const Field & right_point) const override
     {
+        // For simplicity, we treat null values as monotonicity breakers.
+        if (left_point.isNull() || right_point.isNull())
+            return {false, true, false};
+
+        // For simplicity, we treat every single value interval as positive monotonic.
+        if (applyVisitor(FieldVisitorAccurateEquals(), left_point, right_point))
+            return {true, true, false};
+
         std::string_view name_ = Name::name;
         if (name_ == "minus" || name_ == "plus")
         {
-            return {true, true, true};
+            // const +|- variable
+            if (left.column && isColumnConst(*left.column))
+            {
+                auto transform = [&](const Field & point)
+                {
+                    Block block_with_constant
+                        = {{left.column->cloneResized(1), left.type, left.name},
+                           {right.type->createColumnConst(1, point), right.type, right.name},
+                           {nullptr, return_type, ""}};
+                    Base::executeImpl(block_with_constant, {0, 1}, 2, 1);
+                    Field point_transformed;
+                    block_with_constant.getByPosition(2).column->get(0, point_transformed);
+                    return point_transformed;
+                };
+                transform(left_point);
+                transform(right_point);
+                if (name_ == "plus")
+                {
+                    // Check if there is an overflow
+                    if (applyVisitor(FieldVisitorAccurateLess(), left_point, right_point)
+                            == applyVisitor(FieldVisitorAccurateLess(), transform(left_point), transform(right_point)))
+                        return {true, true, false};
+                    else
+                        return {false, true, false};
+                }
+                else
+                {
+                    // Check if there is an overflow
+                    if (applyVisitor(FieldVisitorAccurateLess(), left_point, right_point)
+                            != applyVisitor(FieldVisitorAccurateLess(), transform(left_point), transform(right_point)))
+                        return {true, false, false};
+                    else
+                        return {false, false, false};
+                }
+            }
+            // variable +|- constant
+            else if (right.column && isColumnConst(*right.column))
+            {
+                auto transform = [&](const Field & point)
+                {
+                    Block block_with_constant
+                        = {{left.type->createColumnConst(1, point), left.type, left.name},
+                           {right.column->cloneResized(1), right.type, right.name},
+                           {nullptr, return_type, ""}};
+                    Base::executeImpl(block_with_constant, {0, 1}, 2, 1);
+                    Field point_transformed;
+                    block_with_constant.getByPosition(2).column->get(0, point_transformed);
+                    return point_transformed;
+                };
+
+                // Check if there is an overflow
+                if (applyVisitor(FieldVisitorAccurateLess(), left_point, right_point)
+                    == applyVisitor(FieldVisitorAccurateLess(), transform(left_point), transform(right_point)))
+                    return {true, true, false};
+                else
+                    return {false, true, false};
+            }
         }
-        if (name_ == "multiply" || name_ == "divide" || name_ == "intDiv")
+        if (name_ == "divide" || name_ == "intDiv")
         {
-            if (!left.column)
+            // const / variable
+            if (left.column && isColumnConst(*left.column))
             {
-                bool positive = true;
-                if (WhichDataType(right.type).isInt())
-                {
-                    positive = right.column->getInt(0) >= 0;
-                }
+                auto constant = (*left.column)[0];
+                if (applyVisitor(FieldVisitorAccurateEquals(), constant, Field(0)))
+                    return {true, true, false}; // 0 / 0 is undefined, thus it's not always monotonic
 
-                if (WhichDataType(left.type).isUInt())
-                    return {true, positive, true};
-                else if (WhichDataType(left.type).isInt())
+                bool is_constant_positive = applyVisitor(FieldVisitorAccurateLess(), Field(0), constant);
+                if (applyVisitor(FieldVisitorAccurateLess(), left_point, Field(0)) &&
+                        applyVisitor(FieldVisitorAccurateLess(), right_point, Field(0)))
                 {
-                    if (left_point.get<Int64>() == right_point.get<Int64>())
-                        return {true, positive, true};
-                    if (left_point.get<Int64>() >= 0)
-                        return {true, positive, false};
-                    else if (right_point.get<Int64>() <= 0)
-                        return {true, !positive, false};
-                    else
-                        return {false, true, false};
+                    return {true, is_constant_positive, false};
+                }
+                else
+                if (applyVisitor(FieldVisitorAccurateLess(), Field(0), left_point) &&
+                        applyVisitor(FieldVisitorAccurateLess(), Field(0), right_point))
+                {
+                    return {true, !is_constant_positive, false};
                 }
             }
-            if (!right.column)
+            // variable / constant
+            else if (right.column && isColumnConst(*right.column))
             {
-                bool positive = true;
-                if (WhichDataType(left.type).isInt())
-                {
-                    positive = right.column->getInt(0) >= 0;
-                }
+                auto constant = (*left.column)[0];
+                if (applyVisitor(FieldVisitorAccurateEquals(), constant, Field(0)))
+                    return {false, true, false}; // variable / 0 is undefined, let's treat it as non-monotonic
 
-                if (WhichDataType(left.type).isUInt())
-                    return {true, !positive, true};
-                else if (WhichDataType(left.type).isInt())
-                {
-                    if (left_point.get<Int64>() == right_point.get<Int64>())
-                        return {true, !positive, true};
-                    if (left_point.get<Int64>() >= 0)
-                        return {true, !positive, false};
-                    else if (right_point.get<Int64>() <= 0)
-                        return {true, positive, false};
-                    else
-                        return {false, true, false};
-                }
+                bool is_constant_positive = applyVisitor(FieldVisitorAccurateLess(), Field(0), constant);
+                // division is saturated to `inf`, thus it doesn't have overflow issues.
+                return {true, is_constant_positive, false};
             }
-            return {true, true, true}; // both arguments are constants
         }
         return {false, true, false};
     }
@@ -1321,6 +1382,7 @@ public:
 private:
     ColumnWithTypeAndName left;
     ColumnWithTypeAndName right;
+    DataTypePtr return_type;
 };
 
 
@@ -1348,7 +1410,8 @@ public:
                 || (arguments[1].column && isColumnConst(*arguments[1].column))))
         {
             return std::make_unique<DefaultFunction>(
-                FunctionBinaryArithmeticWithConstants<Op, Name, valid_on_default_arguments>::create(arguments[0], arguments[1], context),
+                FunctionBinaryArithmeticWithConstants<Op, Name, valid_on_default_arguments>::create(
+                    arguments[0], arguments[1], return_type, context),
                 ext::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }),
                 return_type);
         }

From 5cc8fd395c115a7daad89a0c006e2fd6f3336dc0 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Sun, 13 Sep 2020 21:19:02 +0800
Subject: [PATCH 261/298] Fix empty key segfault

---
 src/Storages/MergeTree/KeyCondition.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp
index 4d217e02d1a..bd45d970a7c 100644
--- a/src/Storages/MergeTree/KeyCondition.cpp
+++ b/src/Storages/MergeTree/KeyCondition.cpp
@@ -769,7 +769,7 @@ bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctionsImpl(
     if (const auto * func = node->as<ASTFunction>())
     {
         const auto & args = func->arguments->children;
-        if (args.size() > 2)
+        if (args.size() > 2 || args.empty())
             return false;
 
         out_functions_chain.push_back(func);

From f2293d5d1751271bd80cd37a73e452c439658ed5 Mon Sep 17 00:00:00 2001
From: Vxider <lb@vxider.com>
Date: Mon, 14 Sep 2020 12:29:25 +0800
Subject: [PATCH 262/298] update translation

---
 docs/zh/sql-reference/table-functions/remote.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/zh/sql-reference/table-functions/remote.md b/docs/zh/sql-reference/table-functions/remote.md
index 3ec1da3cd2c..a7fa228cbbd 100644
--- a/docs/zh/sql-reference/table-functions/remote.md
+++ b/docs/zh/sql-reference/table-functions/remote.md
@@ -73,6 +73,6 @@ example01-{01..02}-{1|2}
 如果未指定用户, 将会使用`default`。
 如果未指定密码，则使用空密码。
 
-`remoteSecure` - 与 `remote` 相同，但是会使用加密链接。默认端口 — [tcp\_port\_secure](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) 配置文件或或9440.
+`remoteSecure` - 与 `remote` 相同，但是会使用加密链接。默认端口为配置文件中的[tcp\_port\_secure](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure)，或9440。
 
 [原始文章](https://clickhouse.tech/docs/en/query_language/table_functions/remote/) <!--hide-->

From 631fa3a28b62ae72305d08e81ba7bb1552f51b85 Mon Sep 17 00:00:00 2001
From: Denis Zhuravlev <deniszhuravlov@gmail.com>
Date: Mon, 14 Sep 2020 02:14:53 -0300
Subject: [PATCH 263/298] drop.md sync russian doc. with eng. (#14780)

---
 docs/ru/sql-reference/statements/drop.md | 32 +++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/docs/ru/sql-reference/statements/drop.md b/docs/ru/sql-reference/statements/drop.md
index 4bfd53b1d47..22e553cfdac 100644
--- a/docs/ru/sql-reference/statements/drop.md
+++ b/docs/ru/sql-reference/statements/drop.md
@@ -5,18 +5,35 @@ toc_title: DROP
 
 # DROP {#drop}
 
-Запрос имеет два вида: `DROP DATABASE` и `DROP TABLE`.
+Удаляет существующий объект. 
+Если указано `IF EXISTS` - не выдавать ошибку, если объекта не существует.
+
+## DROP DATABASE {#drop-database}
 
 ``` sql
 DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster]
 ```
 
+Удаляет все таблицы в базе данных db, затем удаляет саму базу данных db.
+
+
+## DROP TABLE {#drop-table}
+
 ``` sql
 DROP [TEMPORARY] TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster]
 ```
 
 Удаляет таблицу.
-Если указано `IF EXISTS` - не выдавать ошибку, если таблица не существует или база данных не существует.
+
+
+## DROP DICTIONARY {#drop-dictionary}
+
+``` sql
+DROP DICTIONARY [IF EXISTS] [db.]name
+```
+
+Удаляет словарь.
+
 
 ## DROP USER {#drop-user-statement}
 
@@ -41,6 +58,7 @@ DROP USER [IF EXISTS] name [,...] [ON CLUSTER cluster_name]
 DROP ROLE [IF EXISTS] name [,...] [ON CLUSTER cluster_name]
 ```
 
+
 ## DROP ROW POLICY {#drop-row-policy-statement}
 
 Удаляет политику доступа к строкам.
@@ -80,5 +98,13 @@ DROP [SETTINGS] PROFILE [IF EXISTS] name [,...] [ON CLUSTER cluster_name]
 ```
 
 
+## DROP VIEW {#drop-view}
 
-[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/drop/) <!--hide-->
\ No newline at end of file
+``` sql
+DROP VIEW [IF EXISTS] [db.]name [ON CLUSTER cluster]
+```
+
+Удаляет представление. Представления могут быть удалены и командой `DROP TABLE`, но команда `DROP VIEW` проверяет, что `[db.]name` является представлением.
+
+
+[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/drop/) <!--hide-->

From d8a7fd2428a5c198a2afcd1a4481ec8749992a11 Mon Sep 17 00:00:00 2001
From: Denis Zhuravlev <deniszhuravlov@gmail.com>
Date: Mon, 14 Sep 2020 02:15:54 -0300
Subject: [PATCH 264/298] view.md sync russian doc with eng. (#14779)

---
 .../sql-reference/statements/create/view.md   | 29 +++++++++++++------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/docs/ru/sql-reference/statements/create/view.md b/docs/ru/sql-reference/statements/create/view.md
index 36a7a3c51e2..caa3d04659e 100644
--- a/docs/ru/sql-reference/statements/create/view.md
+++ b/docs/ru/sql-reference/statements/create/view.md
@@ -5,13 +5,15 @@ toc_title: Представление
 
 # CREATE VIEW {#create-view}
 
-``` sql
-CREATE [MATERIALIZED] VIEW [IF NOT EXISTS] [db.]table_name [TO[db.]name] [ENGINE = engine] [POPULATE] AS SELECT ...
-```
-
 Создаёт представление. Представления бывают двух видов - обычные и материализованные (MATERIALIZED).
 
-Обычные представления не хранят никаких данных, а всего лишь производят чтение из другой таблицы. То есть, обычное представление - не более чем сохранённый запрос. При чтении из представления, этот сохранённый запрос, используется в качестве подзапроса в секции FROM.
+## Обычные представления {#normal}
+
+``` sql
+CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] AS SELECT ...
+```
+
+Normal views don’t store any data, they just perform a read from another table on each access. In other words, a normal view is nothing more than a saved query. When reading from a view, this saved query is used as a subquery in the [FROM](../../../sql-reference/statements/select/from.md) clause.
 
 Для примера, пусть вы создали представление:
 
@@ -31,15 +33,24 @@ SELECT a, b, c FROM view
 SELECT a, b, c FROM (SELECT ...)
 ```
 
-Материализованные (MATERIALIZED) представления хранят данные, преобразованные соответствующим запросом SELECT.
+## Материализованные представления {#materialized}
 
-При создании материализованного представления без использования `TO [db].[table]`, нужно обязательно указать ENGINE - движок таблицы для хранения данных.
+``` sql
+CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE] AS SELECT ...
+```
+
+Материализованные (MATERIALIZED) представления хранят данные, преобразованные соответствующим запросом [SELECT](../../../sql-reference/statements/select/index.md).
+
+При создании материализованного представления без использования `TO [db].[table]`, нужно обязательно указать `ENGINE` - движок таблицы для хранения данных.
 
 При создании материализованного представления с испольованием `TO [db].[table]`, нельзя указывать `POPULATE`
 
 Материализованное представление устроено следующим образом: при вставке данных в таблицу, указанную в SELECT-е, кусок вставляемых данных преобразуется этим запросом SELECT, и полученный результат вставляется в представление.
 
-Если указано POPULATE, то при создании представления, в него будут вставлены имеющиеся данные таблицы, как если бы был сделан запрос `CREATE TABLE ... AS SELECT ...` . Иначе, представление будет содержать только данные, вставляемые в таблицу после создания представления. Не рекомендуется использовать POPULATE, так как вставляемые в таблицу данные во время создания представления, не попадут в него.
+!!! important "Важно"
+    Материализованные представлени в ClickHouse больше похожи на `after insert` триггеры. Если в запросе материализованного представления есть агрегирование, оно применяется только к вставляемому блоку записей. Любые изменения существующих данных исходной таблицы (например обновление, удаление, удаление раздела и т.д.) не изменяют материализованное представление.
+
+Если указано `POPULATE`, то при создании представления, в него будут вставлены имеющиеся данные таблицы, как если бы был сделан запрос `CREATE TABLE ... AS SELECT ...` . Иначе, представление будет содержать только данные, вставляемые в таблицу после создания представления. Не рекомендуется использовать POPULATE, так как вставляемые в таблицу данные во время создания представления, не попадут в него.
 
 Запрос `SELECT` может содержать `DISTINCT`, `GROUP BY`, `ORDER BY`, `LIMIT`… Следует иметь ввиду, что соответствующие преобразования будут выполняться независимо, на каждый блок вставляемых данных. Например, при наличии `GROUP BY`, данные будут агрегироваться при вставке, но только в рамках одной пачки вставляемых данных. Далее, данные не будут доагрегированы. Исключение - использование ENGINE, производящего агрегацию данных самостоятельно, например, `SummingMergeTree`.
 
@@ -50,4 +61,4 @@ SELECT a, b, c FROM (SELECT ...)
 Отсутствует отдельный запрос для удаления представлений. Чтобы удалить представление, следует использовать `DROP TABLE`.
 
 [Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/view) 
-<!--hide-->
\ No newline at end of file
+<!--hide-->

From 3e00d64ebf38218a3210b8be42f73858bbb804c4 Mon Sep 17 00:00:00 2001
From: rodrigargar <rgarcia.garcia@gmail.com>
Date: Mon, 14 Sep 2020 07:17:23 +0200
Subject: [PATCH 265/298] Update backup.md (#14702)

* Update backup.md

Fix most of the first paragraph that was left untranslated and other minor fixes.

* Update backup.md

Co-authored-by: Ivan Blinkov <github@blinkov.ru>
---
 docs/es/operations/backup.md | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/docs/es/operations/backup.md b/docs/es/operations/backup.md
index f1e5b3d3e09..a6297070663 100644
--- a/docs/es/operations/backup.md
+++ b/docs/es/operations/backup.md
@@ -1,20 +1,18 @@
 ---
-machine_translated: true
-machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
 toc_priority: 49
 toc_title: Copia de seguridad de datos
 ---
 
 # Copia de seguridad de datos {#data-backup}
 
-Mientras [replicación](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [no puede simplemente eliminar tablas con un motor similar a MergeTree que contenga más de 50 Gb de datos](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). Sin embargo, estas garantías no cubren todos los casos posibles y pueden eludirse.
+Mientras que la [replicación](../engines/table-engines/mergetree-family/replication.md) proporciona protección contra fallos de hardware, no protege de errores humanos: el borrado accidental de datos, elminar la tabla equivocada o una tabla en el clúster equivocado, y bugs de software que dan como resultado un procesado incorrecto de los datos o la corrupción de los datos. En muchos casos, errores como estos afectarán a todas las réplicas. ClickHouse dispone de salvaguardas para prevenir algunos tipos de errores — por ejemplo, por defecto [no se puede simplemente eliminar tablas con un motor similar a MergeTree que contenga más de 50 Gb de datos](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). Sin embargo, estas salvaguardas no cubren todos los casos posibles y pueden eludirse.
 
 Para mitigar eficazmente los posibles errores humanos, debe preparar cuidadosamente una estrategia para realizar copias de seguridad y restaurar sus datos **previamente**.
 
-Cada empresa tiene diferentes recursos disponibles y requisitos comerciales, por lo que no existe una solución universal para las copias de seguridad y restauraciones de ClickHouse que se adapten a cada situación. Lo que funciona para un gigabyte de datos probablemente no funcionará para decenas de petabytes. Hay una variedad de posibles enfoques con sus propios pros y contras, que se discutirán a continuación. Es una buena idea utilizar varios enfoques en lugar de solo uno para compensar sus diversas deficiencias.
+Cada empresa tiene diferentes recursos disponibles y requisitos comerciales, por lo que no existe una solución universal para las copias de seguridad y restauraciones de ClickHouse que se adapten a cada situación. Lo que funciona para un gigabyte de datos probablemente no funcionará para decenas de petabytes. Hay una variedad de posibles enfoques con sus propios pros y contras, que se discutirán a continuación. Es una buena idea utilizar varios enfoques en lugar de uno solo para compensar sus diversas deficiencias.
 
 !!! note "Nota"
-    Tenga en cuenta que si realizó una copia de seguridad de algo y nunca intentó restaurarlo, es probable que la restauración no funcione correctamente cuando realmente la necesite (o al menos tomará más tiempo de lo que las empresas pueden tolerar). Por lo tanto, cualquiera que sea el enfoque de copia de seguridad que elija, asegúrese de automatizar el proceso de restauración también y practicarlo en un clúster de ClickHouse de repuesto regularmente.
+    Tenga en cuenta que si realizó una copia de seguridad de algo y nunca intentó restaurarlo, es probable que la restauración no funcione correctamente cuando realmente la necesite (o al menos tomará más tiempo de lo que las empresas pueden tolerar). Por lo tanto, cualquiera que sea el enfoque de copia de seguridad que elija, asegúrese de automatizar el proceso de restauración también y ponerlo en practica en un clúster de ClickHouse de repuesto regularmente.
 
 ## Duplicar datos de origen en otro lugar {#duplicating-source-data-somewhere-else}
 
@@ -32,7 +30,7 @@ Para volúmenes de datos más pequeños, un simple `INSERT INTO ... SELECT ...`
 
 ## Manipulaciones con piezas {#manipulations-with-parts}
 
-ClickHouse permite usar el `ALTER TABLE ... FREEZE PARTITION ...` consulta para crear una copia local de particiones de tabla. Esto se implementa utilizando enlaces duros al `/var/lib/clickhouse/shadow/` carpeta, por lo que generalmente no consume espacio adicional en disco para datos antiguos. Las copias creadas de archivos no son manejadas por el servidor ClickHouse, por lo que puede dejarlas allí: tendrá una copia de seguridad simple que no requiere ningún sistema externo adicional, pero seguirá siendo propenso a problemas de hardware. Por esta razón, es mejor copiarlos de forma remota en otra ubicación y luego eliminar las copias locales. Los sistemas de archivos distribuidos y los almacenes de objetos siguen siendo una buena opción para esto, pero los servidores de archivos conectados normales con una capacidad lo suficientemente grande podrían funcionar también (en este caso, la transferencia ocurrirá a través del sistema de archivos de red o tal vez [rsync](https://en.wikipedia.org/wiki/Rsync)).
+ClickHouse permite usar la consulta `ALTER TABLE ... FREEZE PARTITION ...` para crear una copia local de particiones de tabla. Esto se implementa utilizando enlaces duros a la carpeta `/var/lib/clickhouse/shadow/`, por lo que generalmente no consume espacio adicional en disco para datos antiguos. Las copias creadas de archivos no son manejadas por el servidor ClickHouse, por lo que puede dejarlas allí: tendrá una copia de seguridad simple que no requiere ningún sistema externo adicional, pero seguirá siendo propenso a problemas de hardware. Por esta razón, es mejor copiarlos de forma remota en otra ubicación y luego eliminar las copias locales. Los sistemas de archivos distribuidos y los almacenes de objetos siguen siendo una buena opción para esto, pero los servidores de archivos conectados normales con una capacidad lo suficientemente grande podrían funcionar también (en este caso, la transferencia ocurrirá a través del sistema de archivos de red o tal vez [rsync](https://en.wikipedia.org/wiki/Rsync)).
 
 Para obtener más información sobre las consultas relacionadas con las manipulaciones de particiones, consulte [Documentación de ALTER](../sql-reference/statements/alter.md#alter_manipulations-with-partitions).
 

From 0c04f4d00896683a203a7ccc17be7058c50d75fb Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 14 Sep 2020 10:01:20 +0300
Subject: [PATCH 266/298] Update cluster.py

---
 tests/integration/helpers/cluster.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index a8704ee42b1..4d336838eb7 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -1165,11 +1165,12 @@ class ClickHouseInstance:
 
         db_dir = p.abspath(p.join(self.path, 'database'))
         print "Setup database dir {}".format(db_dir)
-        os.mkdir(db_dir)
         if self.clickhouse_path_dir is not None:
             print "Database files taken from {}".format(self.clickhouse_path_dir)
             shutil.copytree(self.clickhouse_path_dir, db_dir)
             print "Database copied from {} to {}".format(self.clickhouse_path_dir, db_dir)
+        else:
+            os.mkdir(db_dir)
 
         logs_dir = p.abspath(p.join(self.path, 'logs'))
         print "Setup logs dir {}".format(logs_dir)

From b0e6df1532e11f3dd6b285efe73edb2b236bda57 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 14 Sep 2020 10:20:42 +0300
Subject: [PATCH 267/298] Trying to fix build

---
 docker/packager/deb/Dockerfile | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile
index a3c87f13fe4..0b3395e1e01 100644
--- a/docker/packager/deb/Dockerfile
+++ b/docker/packager/deb/Dockerfile
@@ -24,13 +24,6 @@ RUN apt-get update \
         software-properties-common \
         --yes --no-install-recommends
 
-# Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able
-# to compress files using pigz (https://zlib.net/pigz/) instead of gzip.
-# Significantly increase deb packaging speed and compatible with old systems
-RUN curl -O https://clickhouse-builds.s3.yandex.net/utils/1/dpkg-deb \
-    && chmod +x dpkg-deb \
-    && cp dpkg-deb /usr/bin
-
 ENV APACHE_PUBKEY_HASH="bba6987b63c63f710fd4ed476121c588bc3812e99659d27a855f8c4d312783ee66ad6adfce238765691b04d62fa3688f"
 
 RUN  export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
@@ -38,6 +31,13 @@ RUN  export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
     && echo "${APACHE_PUBKEY_HASH} /tmp/arrow-keyring.deb" | sha384sum -c \
     && dpkg -i /tmp/arrow-keyring.deb
 
+# Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able
+# to compress files using pigz (https://zlib.net/pigz/) instead of gzip.
+# Significantly increase deb packaging speed and compatible with old systems
+RUN curl -O https://clickhouse-builds.s3.yandex.net/utils/1/dpkg-deb \
+  && chmod +x dpkg-deb \
+  && cp dpkg-deb /usr/bin
+
 
 # Libraries from OS are only needed to test the "unbundled" build (this is not used in production).
 RUN apt-get update \

From c701a15271a3b0900beb132e0071573254fc2804 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Mon, 14 Sep 2020 10:27:46 +0300
Subject: [PATCH 268/298] fixup

---
 docker/test/performance-comparison/report.py | 5 +++--
 tests/performance/joins_in_memory_pmj.xml    | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py
index b3f8ef01138..e9e2ac68c1e 100755
--- a/docker/test/performance-comparison/report.py
+++ b/docker/test/performance-comparison/report.py
@@ -470,12 +470,13 @@ if args.report == 'main':
         text = tableStart('Test times')
         text += tableHeader(columns)
 
-        nominal_runs = 13  # FIXME pass this as an argument
+        nominal_runs = 7  # FIXME pass this as an argument
         total_runs = (nominal_runs + 1) * 2  # one prewarm run, two servers
+        allowed_average_run_time = allowed_single_run_time + 60 / total_runs; # some allowance for fill/create queries
         attrs = ['' for c in columns]
         for r in rows:
             anchor = f'{currentTableAnchor()}.{r[0]}'
-            if float(r[6]) > 1.5 * total_runs:
+            if float(r[6]) > allowed_average_run_time * total_runs:
                 # FIXME should be 15s max -- investigate parallel_insert
                 slow_average_tests += 1
                 attrs[6] = f'style="background: {color_bad}"'
diff --git a/tests/performance/joins_in_memory_pmj.xml b/tests/performance/joins_in_memory_pmj.xml
index bbdc4357ecb..e8d1d80a12b 100644
--- a/tests/performance/joins_in_memory_pmj.xml
+++ b/tests/performance/joins_in_memory_pmj.xml
@@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test max_ignored_relative_change="1.0">
     <create_query>CREATE TABLE ints (i64 Int64, i32 Int32, i16 Int16, i8 Int8) ENGINE = Memory</create_query>
     <create_query>SET join_algorithm = 'partial_merge'</create_query>
 

From 3113aa6cfefbf5eee6de6541ffd1f20596f8f8d2 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Mon, 14 Sep 2020 10:59:45 +0300
Subject: [PATCH 269/298] Avoid extra error in perf report on broken queries

---
 docker/test/performance-comparison/compare.sh    | 12 ++++++++++++
 tests/performance/{ => broken}/decimal_casts.xml |  0
 2 files changed, 12 insertions(+)
 rename tests/performance/{ => broken}/decimal_casts.xml (100%)

diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh
index 364e9994ab7..08f4cb599ab 100755
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@@ -394,12 +394,24 @@ create table query_run_metrics_denorm engine File(TSV, 'analyze/query-run-metric
     order by test, query_index, metric_names, version, query_id
     ;
 
+-- Filter out tests that don't have an even number of runs, to avoid breaking
+-- the further calculations. This may happen if there was an error during the
+-- test runs, e.g. the server died. It will be reported in test errors, so we
+-- don't have to report it again.
+create view broken_queries as
+    select test, query_index
+    from query_runs
+    group by test, query_index
+    having count(*) % 2 != 0
+    ;
+
 -- This is for statistical processing with eqmed.sql
 create table query_run_metrics_for_stats engine File(
         TSV, -- do not add header -- will parse with grep
         'analyze/query-run-metrics-for-stats.tsv')
     as select test, query_index, 0 run, version, metric_values
     from query_run_metric_arrays
+    where (test, query_index) not in broken_queries
     order by test, query_index, run, version
     ;
 
diff --git a/tests/performance/decimal_casts.xml b/tests/performance/broken/decimal_casts.xml
similarity index 100%
rename from tests/performance/decimal_casts.xml
rename to tests/performance/broken/decimal_casts.xml

From e519e2b87a014b8a2ddc709aeaea889f6e81e08a Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 14 Sep 2020 11:16:00 +0300
Subject: [PATCH 270/298] Use patched dpkg on build stage

---
 docker/packager/deb/Dockerfile     | 9 +--------
 docker/packager/deb/build.sh       | 7 +++++++
 docker/packager/unbundled/build.sh | 7 +++++++
 3 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile
index 0b3395e1e01..9c24e9600eb 100644
--- a/docker/packager/deb/Dockerfile
+++ b/docker/packager/deb/Dockerfile
@@ -26,18 +26,11 @@ RUN apt-get update \
 
 ENV APACHE_PUBKEY_HASH="bba6987b63c63f710fd4ed476121c588bc3812e99659d27a855f8c4d312783ee66ad6adfce238765691b04d62fa3688f"
 
-RUN  export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
+RUN export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
     && wget -nv -O /tmp/arrow-keyring.deb "https://apache.bintray.com/arrow/ubuntu/apache-arrow-archive-keyring-latest-${CODENAME}.deb" \
     && echo "${APACHE_PUBKEY_HASH} /tmp/arrow-keyring.deb" | sha384sum -c \
     && dpkg -i /tmp/arrow-keyring.deb
 
-# Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able
-# to compress files using pigz (https://zlib.net/pigz/) instead of gzip.
-# Significantly increase deb packaging speed and compatible with old systems
-RUN curl -O https://clickhouse-builds.s3.yandex.net/utils/1/dpkg-deb \
-  && chmod +x dpkg-deb \
-  && cp dpkg-deb /usr/bin
-
 
 # Libraries from OS are only needed to test the "unbundled" build (this is not used in production).
 RUN apt-get update \
diff --git a/docker/packager/deb/build.sh b/docker/packager/deb/build.sh
index fbaa0151c6b..4b7ab146b9f 100755
--- a/docker/packager/deb/build.sh
+++ b/docker/packager/deb/build.sh
@@ -2,6 +2,13 @@
 
 set -x -e
 
+# Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able
+# to compress files using pigz (https://zlib.net/pigz/) instead of gzip.
+# Significantly increase deb packaging speed and compatible with old systems
+curl -O https://clickhouse-builds.s3.yandex.net/utils/1/dpkg-deb \
+    && chmod +x dpkg-deb \
+    && cp dpkg-deb /usr/bin
+
 ccache --show-stats ||:
 ccache --zero-stats ||:
 build/release --no-pbuilder $ALIEN_PKGS | ts '%Y-%m-%d %H:%M:%S'
diff --git a/docker/packager/unbundled/build.sh b/docker/packager/unbundled/build.sh
index ca1217ac522..aaa3b2eb87b 100755
--- a/docker/packager/unbundled/build.sh
+++ b/docker/packager/unbundled/build.sh
@@ -2,6 +2,13 @@
 
 set -x -e
 
+# Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able
+# to compress files using pigz (https://zlib.net/pigz/) instead of gzip.
+# Significantly increase deb packaging speed and compatible with old systems
+curl -O https://clickhouse-builds.s3.yandex.net/utils/1/dpkg-deb \
+    && chmod +x dpkg-deb \
+    && cp dpkg-deb /usr/bin
+
 ccache --show-stats ||:
 ccache --zero-stats ||:
 build/release --no-pbuilder $ALIEN_PKGS | ts '%Y-%m-%d %H:%M:%S'

From 558164bf24da8d8d29e94bcd7a91737788e6d46f Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 14 Sep 2020 11:17:59 +0300
Subject: [PATCH 271/298] Add retries

---
 docker/packager/deb/build.sh       | 18 +++++++++++++++---
 docker/packager/unbundled/build.sh | 18 +++++++++++++++---
 2 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/docker/packager/deb/build.sh b/docker/packager/deb/build.sh
index 4b7ab146b9f..8ebf3caca59 100755
--- a/docker/packager/deb/build.sh
+++ b/docker/packager/deb/build.sh
@@ -5,9 +5,21 @@ set -x -e
 # Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able
 # to compress files using pigz (https://zlib.net/pigz/) instead of gzip.
 # Significantly increase deb packaging speed and compatible with old systems
-curl -O https://clickhouse-builds.s3.yandex.net/utils/1/dpkg-deb \
-    && chmod +x dpkg-deb \
-    && cp dpkg-deb /usr/bin
+
+counter=0
+until curl -O https://clickhouse-builds.s3.yandex.net/utils/1/dpkg-deb
+do
+    sleep 0.5
+    counter=$(($counter + 1))
+    echo "Cannot fetch better dpgk, retry $counter"
+    if [ "$counter" -gt 120 ]
+    then
+        echo "Cannot fetch busybox image all retries exceeded"
+        exit 1
+    fi
+done
+
+chmod +x dpkg-deb && cp dpkg-deb /usr/bin
 
 ccache --show-stats ||:
 ccache --zero-stats ||:
diff --git a/docker/packager/unbundled/build.sh b/docker/packager/unbundled/build.sh
index aaa3b2eb87b..77c27ce4a2c 100755
--- a/docker/packager/unbundled/build.sh
+++ b/docker/packager/unbundled/build.sh
@@ -5,9 +5,21 @@ set -x -e
 # Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able
 # to compress files using pigz (https://zlib.net/pigz/) instead of gzip.
 # Significantly increase deb packaging speed and compatible with old systems
-curl -O https://clickhouse-builds.s3.yandex.net/utils/1/dpkg-deb \
-    && chmod +x dpkg-deb \
-    && cp dpkg-deb /usr/bin
+
+counter=0
+until curl -O https://clickhouse-builds.s3.yandex.net/utils/1/dpkg-deb
+do
+    sleep 0.5
+    counter=$(($counter + 1))
+    echo "Cannot fetch better dpgk, retry $counter"
+    if [ "$counter" -gt 120 ]
+    then
+        echo "Cannot fetch busybox image all retries exceeded"
+        exit 1
+    fi
+done
+
+chmod +x dpkg-deb && cp dpkg-deb /usr/bin
 
 ccache --show-stats ||:
 ccache --zero-stats ||:

From fb7fc28e6f78c2b65cd498ce73870d3f77b212a3 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 14 Sep 2020 12:20:43 +0300
Subject: [PATCH 272/298] Update warnings.cmake

---
 cmake/warnings.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/warnings.cmake b/cmake/warnings.cmake
index 425972f00d8..6b26b9b95a5 100644
--- a/cmake/warnings.cmake
+++ b/cmake/warnings.cmake
@@ -23,7 +23,7 @@ option (WEVERYTHING "Enables -Weverything option with some exceptions. This is i
 # Control maximum size of stack frames. It can be important if the code is run in fibers with small stack size.
 # Only in release build because debug has too large stack frames.
 if ((NOT CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") AND (NOT SANITIZE))
-    add_warning(frame-larger-than=16384)
+    add_warning(frame-larger-than=32768)
 endif ()
 
 if (COMPILER_CLANG)

From 654245af3c584d08541b003eb435f3981494336a Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Mon, 14 Sep 2020 11:34:14 +0800
Subject: [PATCH 273/298] Fix signedness conversion monotonicity

---
 src/Functions/FunctionsConversion.h           | 24 ++++++-------------
 ...gnedness_conversion_monotonicity.reference |  1 +
 ...496_signedness_conversion_monotonicity.sql |  9 +++++++
 3 files changed, 17 insertions(+), 17 deletions(-)
 create mode 100644 tests/queries/0_stateless/01496_signedness_conversion_monotonicity.reference
 create mode 100644 tests/queries/0_stateless/01496_signedness_conversion_monotonicity.sql

diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h
index ffe7677afe7..2210c61d157 100644
--- a/src/Functions/FunctionsConversion.h
+++ b/src/Functions/FunctionsConversion.h
@@ -1570,25 +1570,15 @@ struct ToNumberMonotonicity
             if (left.isNull() || right.isNull())
                 return {};
 
-            if (from_is_unsigned == to_is_unsigned)
-            {
-                /// all bits other than that fits, must be same.
-                if (divideByRangeOfType(left.get<UInt64>()) == divideByRangeOfType(right.get<UInt64>()))
-                    return {true};
-
+            /// Function cannot be monotonic when left and right are not on the same ranges.
+            if (divideByRangeOfType(left.get<UInt64>()) != divideByRangeOfType(right.get<UInt64>()))
                 return {};
-            }
+
+            if (to_is_unsigned)
+                return {true};
             else
-            {
-                /// When signedness is changed, it's also required for arguments to be from the same half.
-                /// And they must be in the same half after converting to the result type.
-                if (left_in_first_half == right_in_first_half
-                    && (T(left.get<Int64>()) >= 0) == (T(right.get<Int64>()) >= 0)
-                    && divideByRangeOfType(left.get<UInt64>()) == divideByRangeOfType(right.get<UInt64>()))
-                    return {true};
-
-                return {};
-            }
+                // If To is signed, it's possible that the signedness is different after conversion. So we check it explicitly.
+                return {(T(left.get<UInt64>()) >= 0) == (T(right.get<UInt64>()) >= 0)};
         }
 
         __builtin_unreachable();
diff --git a/tests/queries/0_stateless/01496_signedness_conversion_monotonicity.reference b/tests/queries/0_stateless/01496_signedness_conversion_monotonicity.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/01496_signedness_conversion_monotonicity.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/01496_signedness_conversion_monotonicity.sql b/tests/queries/0_stateless/01496_signedness_conversion_monotonicity.sql
new file mode 100644
index 00000000000..5c87ba3c57c
--- /dev/null
+++ b/tests/queries/0_stateless/01496_signedness_conversion_monotonicity.sql
@@ -0,0 +1,9 @@
+drop table if exists test1;
+
+create table test1 (i Int64) engine MergeTree order by i;
+
+insert into test1 values (53), (1777), (53284);
+
+select count() from test1 where toInt16(i) = 1777;
+
+drop table if exists test1;

From 59985707622589db3683d8b15b14f096c93a9453 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Mon, 14 Sep 2020 11:34:14 +0800
Subject: [PATCH 274/298] Fix signedness conversion monotonicity

---
 src/Functions/FunctionsConversion.h           | 24 ++++++-------------
 ...gnedness_conversion_monotonicity.reference |  1 +
 ...496_signedness_conversion_monotonicity.sql |  9 +++++++
 3 files changed, 17 insertions(+), 17 deletions(-)
 create mode 100644 tests/queries/0_stateless/01496_signedness_conversion_monotonicity.reference
 create mode 100644 tests/queries/0_stateless/01496_signedness_conversion_monotonicity.sql

diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h
index ffe7677afe7..2210c61d157 100644
--- a/src/Functions/FunctionsConversion.h
+++ b/src/Functions/FunctionsConversion.h
@@ -1570,25 +1570,15 @@ struct ToNumberMonotonicity
             if (left.isNull() || right.isNull())
                 return {};
 
-            if (from_is_unsigned == to_is_unsigned)
-            {
-                /// all bits other than that fits, must be same.
-                if (divideByRangeOfType(left.get<UInt64>()) == divideByRangeOfType(right.get<UInt64>()))
-                    return {true};
-
+            /// Function cannot be monotonic when left and right are not on the same ranges.
+            if (divideByRangeOfType(left.get<UInt64>()) != divideByRangeOfType(right.get<UInt64>()))
                 return {};
-            }
+
+            if (to_is_unsigned)
+                return {true};
             else
-            {
-                /// When signedness is changed, it's also required for arguments to be from the same half.
-                /// And they must be in the same half after converting to the result type.
-                if (left_in_first_half == right_in_first_half
-                    && (T(left.get<Int64>()) >= 0) == (T(right.get<Int64>()) >= 0)
-                    && divideByRangeOfType(left.get<UInt64>()) == divideByRangeOfType(right.get<UInt64>()))
-                    return {true};
-
-                return {};
-            }
+                // If To is signed, it's possible that the signedness is different after conversion. So we check it explicitly.
+                return {(T(left.get<UInt64>()) >= 0) == (T(right.get<UInt64>()) >= 0)};
         }
 
         __builtin_unreachable();
diff --git a/tests/queries/0_stateless/01496_signedness_conversion_monotonicity.reference b/tests/queries/0_stateless/01496_signedness_conversion_monotonicity.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/01496_signedness_conversion_monotonicity.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/01496_signedness_conversion_monotonicity.sql b/tests/queries/0_stateless/01496_signedness_conversion_monotonicity.sql
new file mode 100644
index 00000000000..5c87ba3c57c
--- /dev/null
+++ b/tests/queries/0_stateless/01496_signedness_conversion_monotonicity.sql
@@ -0,0 +1,9 @@
+drop table if exists test1;
+
+create table test1 (i Int64) engine MergeTree order by i;
+
+insert into test1 values (53), (1777), (53284);
+
+select count() from test1 where toInt16(i) = 1777;
+
+drop table if exists test1;

From 004b91511d8aa1b597d882ea9114f07c9cbeee1b Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Mon, 14 Sep 2020 18:16:22 +0800
Subject: [PATCH 275/298] Fix wrong code

---
 src/Functions/FunctionBinaryArithmetic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h
index cffcdc88dd6..bbac58a92c6 100644
--- a/src/Functions/FunctionBinaryArithmetic.h
+++ b/src/Functions/FunctionBinaryArithmetic.h
@@ -1367,7 +1367,7 @@ public:
             // variable / constant
             else if (right.column && isColumnConst(*right.column))
             {
-                auto constant = (*left.column)[0];
+                auto constant = (*right.column)[0];
                 if (applyVisitor(FieldVisitorAccurateEquals(), constant, Field(0)))
                     return {false, true, false}; // variable / 0 is undefined, let's treat it as non-monotonic
 

From d274125c74c4784b461e938a92c0afd2cb2e9b41 Mon Sep 17 00:00:00 2001
From: Artem Zuikov <chertus@gmail.com>
Date: Mon, 14 Sep 2020 14:56:43 +0300
Subject: [PATCH 276/298] Fix wide integer left shift + refactoring (#14697)

---
 base/common/throwError.h                      |  13 +
 base/common/types.h                           |  10 +-
 base/common/wide_integer.h                    | 134 ++---
 base/common/wide_integer_impl.h               | 477 +++++++++---------
 base/common/wide_integer_to_string.h          |  35 ++
 src/IO/WriteHelpers.h                         |   7 +
 .../01475_fix_bigint_shift.reference          |   2 +
 .../0_stateless/01475_fix_bigint_shift.sql    |   2 +
 8 files changed, 366 insertions(+), 314 deletions(-)
 create mode 100644 base/common/throwError.h
 create mode 100644 base/common/wide_integer_to_string.h
 create mode 100644 tests/queries/0_stateless/01475_fix_bigint_shift.reference
 create mode 100644 tests/queries/0_stateless/01475_fix_bigint_shift.sql

diff --git a/base/common/throwError.h b/base/common/throwError.h
new file mode 100644
index 00000000000..b495a0fbc7a
--- /dev/null
+++ b/base/common/throwError.h
@@ -0,0 +1,13 @@
+#pragma once
+#include <stdexcept>
+
+/// Throw DB::Exception-like exception before its definition.
+/// DB::Exception derived from Poco::Exception derived from std::exception.
+/// DB::Exception generally cought as Poco::Exception. std::exception generally has other catch blocks and could lead to other outcomes.
+/// DB::Exception is not defined yet. It'd better to throw Poco::Exception but we do not want to include any big header here, even <string>.
+/// So we throw some std::exception instead in the hope its catch block is the same as DB::Exception one.
+template <typename T>
+inline void throwError(const T & err)
+{
+    throw std::runtime_error(err);
+}
diff --git a/base/common/types.h b/base/common/types.h
index a02398a3365..2982781ce1f 100644
--- a/base/common/types.h
+++ b/base/common/types.h
@@ -23,8 +23,8 @@ using UInt64 = uint64_t;
 
 using Int128 = __int128;
 
-using wInt256 = std::wide_integer<256, signed>;
-using wUInt256 = std::wide_integer<256, unsigned>;
+using wInt256 = wide::integer<256, signed>;
+using wUInt256 = wide::integer<256, unsigned>;
 
 static_assert(sizeof(wInt256) == 32);
 static_assert(sizeof(wUInt256) == 32);
@@ -119,12 +119,6 @@ template <> struct is_big_int<wUInt256> { static constexpr bool value = true; };
 template <typename T>
 inline constexpr bool is_big_int_v = is_big_int<T>::value;
 
-template <typename T>
-inline std::string bigintToString(const T & x)
-{
-    return to_string(x);
-}
-
 template <typename To, typename From>
 inline To bigint_cast(const From & x [[maybe_unused]])
 {
diff --git a/base/common/wide_integer.h b/base/common/wide_integer.h
index 67d0b3f04da..2aeac072b3f 100644
--- a/base/common/wide_integer.h
+++ b/base/common/wide_integer.h
@@ -22,79 +22,87 @@
  * without express or implied warranty.
  */
 
-#include <climits> // CHAR_BIT
-#include <cmath>
 #include <cstdint>
 #include <limits>
 #include <type_traits>
+#include <initializer_list>
+
+namespace wide
+{
+template <size_t Bits, typename Signed>
+class integer;
+}
 
 namespace std
 {
-template <size_t Bits, typename Signed>
-class wide_integer;
 
 template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
-struct common_type<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>>;
+struct common_type<wide::integer<Bits, Signed>, wide::integer<Bits2, Signed2>>;
 
 template <size_t Bits, typename Signed, typename Arithmetic>
-struct common_type<wide_integer<Bits, Signed>, Arithmetic>;
+struct common_type<wide::integer<Bits, Signed>, Arithmetic>;
 
 template <typename Arithmetic, size_t Bits, typename Signed>
-struct common_type<Arithmetic, wide_integer<Bits, Signed>>;
+struct common_type<Arithmetic, wide::integer<Bits, Signed>>;
+
+}
+
+namespace wide
+{
 
 template <size_t Bits, typename Signed>
-class wide_integer
+class integer
 {
 public:
     using base_type = uint8_t;
     using signed_base_type = int8_t;
 
     // ctors
-    wide_integer() = default;
+    integer() = default;
 
     template <typename T>
-    constexpr wide_integer(T rhs) noexcept;
+    constexpr integer(T rhs) noexcept;
     template <typename T>
-    constexpr wide_integer(std::initializer_list<T> il) noexcept;
+    constexpr integer(std::initializer_list<T> il) noexcept;
 
     // assignment
     template <size_t Bits2, typename Signed2>
-    constexpr wide_integer<Bits, Signed> & operator=(const wide_integer<Bits2, Signed2> & rhs) noexcept;
+    constexpr integer<Bits, Signed> & operator=(const integer<Bits2, Signed2> & rhs) noexcept;
 
     template <typename Arithmetic>
-    constexpr wide_integer<Bits, Signed> & operator=(Arithmetic rhs) noexcept;
+    constexpr integer<Bits, Signed> & operator=(Arithmetic rhs) noexcept;
 
     template <typename Arithmetic>
-    constexpr wide_integer<Bits, Signed> & operator*=(const Arithmetic & rhs);
+    constexpr integer<Bits, Signed> & operator*=(const Arithmetic & rhs);
 
     template <typename Arithmetic>
-    constexpr wide_integer<Bits, Signed> & operator/=(const Arithmetic & rhs);
+    constexpr integer<Bits, Signed> & operator/=(const Arithmetic & rhs);
 
     template <typename Arithmetic>
-    constexpr wide_integer<Bits, Signed> & operator+=(const Arithmetic & rhs) noexcept(is_same<Signed, unsigned>::value);
+    constexpr integer<Bits, Signed> & operator+=(const Arithmetic & rhs) noexcept(std::is_same_v<Signed, unsigned>);
 
     template <typename Arithmetic>
-    constexpr wide_integer<Bits, Signed> & operator-=(const Arithmetic & rhs) noexcept(is_same<Signed, unsigned>::value);
+    constexpr integer<Bits, Signed> & operator-=(const Arithmetic & rhs) noexcept(std::is_same_v<Signed, unsigned>);
 
     template <typename Integral>
-    constexpr wide_integer<Bits, Signed> & operator%=(const Integral & rhs);
+    constexpr integer<Bits, Signed> & operator%=(const Integral & rhs);
 
     template <typename Integral>
-    constexpr wide_integer<Bits, Signed> & operator&=(const Integral & rhs) noexcept;
+    constexpr integer<Bits, Signed> & operator&=(const Integral & rhs) noexcept;
 
     template <typename Integral>
-    constexpr wide_integer<Bits, Signed> & operator|=(const Integral & rhs) noexcept;
+    constexpr integer<Bits, Signed> & operator|=(const Integral & rhs) noexcept;
 
     template <typename Integral>
-    constexpr wide_integer<Bits, Signed> & operator^=(const Integral & rhs) noexcept;
+    constexpr integer<Bits, Signed> & operator^=(const Integral & rhs) noexcept;
 
-    constexpr wide_integer<Bits, Signed> & operator<<=(int n);
-    constexpr wide_integer<Bits, Signed> & operator>>=(int n) noexcept;
+    constexpr integer<Bits, Signed> & operator<<=(int n) noexcept;
+    constexpr integer<Bits, Signed> & operator>>=(int n) noexcept;
 
-    constexpr wide_integer<Bits, Signed> & operator++() noexcept(is_same<Signed, unsigned>::value);
-    constexpr wide_integer<Bits, Signed> operator++(int) noexcept(is_same<Signed, unsigned>::value);
-    constexpr wide_integer<Bits, Signed> & operator--() noexcept(is_same<Signed, unsigned>::value);
-    constexpr wide_integer<Bits, Signed> operator--(int) noexcept(is_same<Signed, unsigned>::value);
+    constexpr integer<Bits, Signed> & operator++() noexcept(std::is_same_v<Signed, unsigned>);
+    constexpr integer<Bits, Signed> operator++(int) noexcept(std::is_same_v<Signed, unsigned>);
+    constexpr integer<Bits, Signed> & operator--() noexcept(std::is_same_v<Signed, unsigned>);
+    constexpr integer<Bits, Signed> operator--(int) noexcept(std::is_same_v<Signed, unsigned>);
 
     // observers
 
@@ -114,10 +122,10 @@ public:
 
 private:
     template <size_t Bits2, typename Signed2>
-    friend class wide_integer;
+    friend class integer;
 
-    friend class numeric_limits<wide_integer<Bits, signed>>;
-    friend class numeric_limits<wide_integer<Bits, unsigned>>;
+    friend class std::numeric_limits<integer<Bits, signed>>;
+    friend class std::numeric_limits<integer<Bits, unsigned>>;
 
     base_type m_arr[_impl::arr_size];
 };
@@ -134,115 +142,117 @@ using __only_integer = typename std::enable_if<IntegralConcept<T>() && IntegralC
 
 // Unary operators
 template <size_t Bits, typename Signed>
-constexpr wide_integer<Bits, Signed> operator~(const wide_integer<Bits, Signed> & lhs) noexcept;
+constexpr integer<Bits, Signed> operator~(const integer<Bits, Signed> & lhs) noexcept;
 
 template <size_t Bits, typename Signed>
-constexpr wide_integer<Bits, Signed> operator-(const wide_integer<Bits, Signed> & lhs) noexcept(is_same<Signed, unsigned>::value);
+constexpr integer<Bits, Signed> operator-(const integer<Bits, Signed> & lhs) noexcept(std::is_same_v<Signed, unsigned>);
 
 template <size_t Bits, typename Signed>
-constexpr wide_integer<Bits, Signed> operator+(const wide_integer<Bits, Signed> & lhs) noexcept(is_same<Signed, unsigned>::value);
+constexpr integer<Bits, Signed> operator+(const integer<Bits, Signed> & lhs) noexcept(std::is_same_v<Signed, unsigned>);
 
 // Binary operators
 template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
-std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
-operator*(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr
+operator*(const integer<Bits, Signed> & lhs, const integer<Bits2, Signed2> & rhs);
 template <typename Arithmetic, typename Arithmetic2, class = __only_arithmetic<Arithmetic, Arithmetic2>>
 std::common_type_t<Arithmetic, Arithmetic2> constexpr operator*(const Arithmetic & rhs, const Arithmetic2 & lhs);
 
 template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
-std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
-operator/(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr
+operator/(const integer<Bits, Signed> & lhs, const integer<Bits2, Signed2> & rhs);
 template <typename Arithmetic, typename Arithmetic2, class = __only_arithmetic<Arithmetic, Arithmetic2>>
 std::common_type_t<Arithmetic, Arithmetic2> constexpr operator/(const Arithmetic & rhs, const Arithmetic2 & lhs);
 
 template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
-std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
-operator+(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr
+operator+(const integer<Bits, Signed> & lhs, const integer<Bits2, Signed2> & rhs);
 template <typename Arithmetic, typename Arithmetic2, class = __only_arithmetic<Arithmetic, Arithmetic2>>
 std::common_type_t<Arithmetic, Arithmetic2> constexpr operator+(const Arithmetic & rhs, const Arithmetic2 & lhs);
 
 template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
-std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
-operator-(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr
+operator-(const integer<Bits, Signed> & lhs, const integer<Bits2, Signed2> & rhs);
 template <typename Arithmetic, typename Arithmetic2, class = __only_arithmetic<Arithmetic, Arithmetic2>>
 std::common_type_t<Arithmetic, Arithmetic2> constexpr operator-(const Arithmetic & rhs, const Arithmetic2 & lhs);
 
 template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
-std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
-operator%(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr
+operator%(const integer<Bits, Signed> & lhs, const integer<Bits2, Signed2> & rhs);
 template <typename Integral, typename Integral2, class = __only_integer<Integral, Integral2>>
 std::common_type_t<Integral, Integral2> constexpr operator%(const Integral & rhs, const Integral2 & lhs);
 
 template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
-std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
-operator&(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr
+operator&(const integer<Bits, Signed> & lhs, const integer<Bits2, Signed2> & rhs);
 template <typename Integral, typename Integral2, class = __only_integer<Integral, Integral2>>
 std::common_type_t<Integral, Integral2> constexpr operator&(const Integral & rhs, const Integral2 & lhs);
 
 template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
-std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
-operator|(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr
+operator|(const integer<Bits, Signed> & lhs, const integer<Bits2, Signed2> & rhs);
 template <typename Integral, typename Integral2, class = __only_integer<Integral, Integral2>>
 std::common_type_t<Integral, Integral2> constexpr operator|(const Integral & rhs, const Integral2 & lhs);
 
 template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
-std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
-operator^(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr
+operator^(const integer<Bits, Signed> & lhs, const integer<Bits2, Signed2> & rhs);
 template <typename Integral, typename Integral2, class = __only_integer<Integral, Integral2>>
 std::common_type_t<Integral, Integral2> constexpr operator^(const Integral & rhs, const Integral2 & lhs);
 
 // TODO: Integral
 template <size_t Bits, typename Signed>
-constexpr wide_integer<Bits, Signed> operator<<(const wide_integer<Bits, Signed> & lhs, int n) noexcept;
+constexpr integer<Bits, Signed> operator<<(const integer<Bits, Signed> & lhs, int n) noexcept;
 template <size_t Bits, typename Signed>
-constexpr wide_integer<Bits, Signed> operator>>(const wide_integer<Bits, Signed> & lhs, int n) noexcept;
+constexpr integer<Bits, Signed> operator>>(const integer<Bits, Signed> & lhs, int n) noexcept;
 
 template <size_t Bits, typename Signed, typename Int, typename = std::enable_if_t<!std::is_same_v<Int, int>>>
-constexpr wide_integer<Bits, Signed> operator<<(const wide_integer<Bits, Signed> & lhs, Int n) noexcept
+constexpr integer<Bits, Signed> operator<<(const integer<Bits, Signed> & lhs, Int n) noexcept
 {
     return lhs << int(n);
 }
 template <size_t Bits, typename Signed, typename Int, typename = std::enable_if_t<!std::is_same_v<Int, int>>>
-constexpr wide_integer<Bits, Signed> operator>>(const wide_integer<Bits, Signed> & lhs, Int n) noexcept
+constexpr integer<Bits, Signed> operator>>(const integer<Bits, Signed> & lhs, Int n) noexcept
 {
     return lhs >> int(n);
 }
 
 template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
-constexpr bool operator<(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
+constexpr bool operator<(const integer<Bits, Signed> & lhs, const integer<Bits2, Signed2> & rhs);
 template <typename Arithmetic, typename Arithmetic2, class = __only_arithmetic<Arithmetic, Arithmetic2>>
 constexpr bool operator<(const Arithmetic & rhs, const Arithmetic2 & lhs);
 
 template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
-constexpr bool operator>(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
+constexpr bool operator>(const integer<Bits, Signed> & lhs, const integer<Bits2, Signed2> & rhs);
 template <typename Arithmetic, typename Arithmetic2, class = __only_arithmetic<Arithmetic, Arithmetic2>>
 constexpr bool operator>(const Arithmetic & rhs, const Arithmetic2 & lhs);
 
 template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
-constexpr bool operator<=(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
+constexpr bool operator<=(const integer<Bits, Signed> & lhs, const integer<Bits2, Signed2> & rhs);
 template <typename Arithmetic, typename Arithmetic2, class = __only_arithmetic<Arithmetic, Arithmetic2>>
 constexpr bool operator<=(const Arithmetic & rhs, const Arithmetic2 & lhs);
 
 template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
-constexpr bool operator>=(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
+constexpr bool operator>=(const integer<Bits, Signed> & lhs, const integer<Bits2, Signed2> & rhs);
 template <typename Arithmetic, typename Arithmetic2, class = __only_arithmetic<Arithmetic, Arithmetic2>>
 constexpr bool operator>=(const Arithmetic & rhs, const Arithmetic2 & lhs);
 
 template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
-constexpr bool operator==(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
+constexpr bool operator==(const integer<Bits, Signed> & lhs, const integer<Bits2, Signed2> & rhs);
 template <typename Arithmetic, typename Arithmetic2, class = __only_arithmetic<Arithmetic, Arithmetic2>>
 constexpr bool operator==(const Arithmetic & rhs, const Arithmetic2 & lhs);
 
 template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
-constexpr bool operator!=(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
+constexpr bool operator!=(const integer<Bits, Signed> & lhs, const integer<Bits2, Signed2> & rhs);
 template <typename Arithmetic, typename Arithmetic2, class = __only_arithmetic<Arithmetic, Arithmetic2>>
 constexpr bool operator!=(const Arithmetic & rhs, const Arithmetic2 & lhs);
 
-template <size_t Bits, typename Signed>
-std::string to_string(const wide_integer<Bits, Signed> & n);
+}
+
+namespace std
+{
 
 template <size_t Bits, typename Signed>
-struct hash<wide_integer<Bits, Signed>>;
+struct hash<wide::integer<Bits, Signed>>;
 
 }
 
diff --git a/base/common/wide_integer_impl.h b/base/common/wide_integer_impl.h
index c77a9120a55..26bd6704bdc 100644
--- a/base/common/wide_integer_impl.h
+++ b/base/common/wide_integer_impl.h
@@ -1,19 +1,47 @@
 /// Original is here https://github.com/cerevra/int
 #pragma once
 
-#include "wide_integer.h"
+#include "throwError.h"
 
-#include <array>
-#include <cstring>
+#ifndef CHAR_BIT
+#define CHAR_BIT 8
+#endif
+
+namespace wide
+{
+
+template <typename T>
+struct IsWideInteger
+{
+    static const constexpr bool value = false;
+};
+
+template <size_t Bits, typename Signed>
+struct IsWideInteger<wide::integer<Bits, Signed>>
+{
+    static const constexpr bool value = true;
+};
+
+template <typename T>
+static constexpr bool ArithmeticConcept() noexcept
+{
+    return std::is_arithmetic_v<T> || IsWideInteger<T>::value;
+}
+
+template <typename T>
+static constexpr bool IntegralConcept() noexcept
+{
+    return std::is_integral_v<T> || IsWideInteger<T>::value;
+}
+
+}
 
 namespace std
 {
-#define CT(x) \
-    std::common_type_t<std::decay_t<decltype(rhs)>, std::decay_t<decltype(lhs)>> { x }
 
 // numeric limits
 template <size_t Bits, typename Signed>
-class numeric_limits<wide_integer<Bits, Signed>>
+class numeric_limits<wide::integer<Bits, Signed>>
 {
 public:
     static constexpr bool is_specialized = true;
@@ -40,103 +68,84 @@ public:
     static constexpr bool traps = true;
     static constexpr bool tinyness_before = false;
 
-    static constexpr wide_integer<Bits, Signed> min() noexcept
+    static constexpr wide::integer<Bits, Signed> min() noexcept
     {
         if (is_same<Signed, signed>::value)
         {
-            using T = wide_integer<Bits, signed>;
+            using T = wide::integer<Bits, signed>;
             T res{};
-            res.m_arr[T::_impl::big(0)] = std::numeric_limits<typename wide_integer<Bits, Signed>::signed_base_type>::min();
+            res.m_arr[T::_impl::big(0)] = std::numeric_limits<typename wide::integer<Bits, Signed>::signed_base_type>::min();
             return res;
         }
         return 0;
     }
 
-    static constexpr wide_integer<Bits, Signed> max() noexcept
+    static constexpr wide::integer<Bits, Signed> max() noexcept
     {
-        using T = wide_integer<Bits, Signed>;
+        using T = wide::integer<Bits, Signed>;
         T res{};
         res.m_arr[T::_impl::big(0)] = is_same<Signed, signed>::value
-            ? std::numeric_limits<typename wide_integer<Bits, Signed>::signed_base_type>::max()
-            : std::numeric_limits<typename wide_integer<Bits, Signed>::base_type>::max();
-        for (int i = 1; i < wide_integer<Bits, Signed>::_impl::arr_size; ++i)
+            ? std::numeric_limits<typename wide::integer<Bits, Signed>::signed_base_type>::max()
+            : std::numeric_limits<typename wide::integer<Bits, Signed>::base_type>::max();
+        for (int i = 1; i < wide::integer<Bits, Signed>::_impl::arr_size; ++i)
         {
-            res.m_arr[T::_impl::big(i)] = std::numeric_limits<typename wide_integer<Bits, Signed>::base_type>::max();
+            res.m_arr[T::_impl::big(i)] = std::numeric_limits<typename wide::integer<Bits, Signed>::base_type>::max();
         }
         return res;
     }
 
-    static constexpr wide_integer<Bits, Signed> lowest() noexcept { return min(); }
-    static constexpr wide_integer<Bits, Signed> epsilon() noexcept { return 0; }
-    static constexpr wide_integer<Bits, Signed> round_error() noexcept { return 0; }
-    static constexpr wide_integer<Bits, Signed> infinity() noexcept { return 0; }
-    static constexpr wide_integer<Bits, Signed> quiet_NaN() noexcept { return 0; }
-    static constexpr wide_integer<Bits, Signed> signaling_NaN() noexcept { return 0; }
-    static constexpr wide_integer<Bits, Signed> denorm_min() noexcept { return 0; }
+    static constexpr wide::integer<Bits, Signed> lowest() noexcept { return min(); }
+    static constexpr wide::integer<Bits, Signed> epsilon() noexcept { return 0; }
+    static constexpr wide::integer<Bits, Signed> round_error() noexcept { return 0; }
+    static constexpr wide::integer<Bits, Signed> infinity() noexcept { return 0; }
+    static constexpr wide::integer<Bits, Signed> quiet_NaN() noexcept { return 0; }
+    static constexpr wide::integer<Bits, Signed> signaling_NaN() noexcept { return 0; }
+    static constexpr wide::integer<Bits, Signed> denorm_min() noexcept { return 0; }
 };
 
-template <typename T>
-struct IsWideInteger
-{
-    static const constexpr bool value = false;
-};
-
-template <size_t Bits, typename Signed>
-struct IsWideInteger<wide_integer<Bits, Signed>>
-{
-    static const constexpr bool value = true;
-};
-
-template <typename T>
-static constexpr bool ArithmeticConcept() noexcept
-{
-    return std::is_arithmetic_v<T> || IsWideInteger<T>::value;
-}
-
-template <typename T>
-static constexpr bool IntegralConcept() noexcept
-{
-    return std::is_integral_v<T> || IsWideInteger<T>::value;
-}
-
 // type traits
 template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
-struct common_type<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>>
+struct common_type<wide::integer<Bits, Signed>, wide::integer<Bits2, Signed2>>
 {
     using type = std::conditional_t < Bits == Bits2,
-          wide_integer<
+          wide::integer<
               Bits,
-              std::conditional_t<(std::is_same<Signed, Signed2>::value && std::is_same<Signed2, signed>::value), signed, unsigned>>,
-          std::conditional_t<Bits2<Bits, wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>>>;
+              std::conditional_t<(std::is_same_v<Signed, Signed2> && std::is_same_v<Signed2, signed>), signed, unsigned>>,
+          std::conditional_t<Bits2<Bits, wide::integer<Bits, Signed>, wide::integer<Bits2, Signed2>>>;
 };
 
 template <size_t Bits, typename Signed, typename Arithmetic>
-struct common_type<wide_integer<Bits, Signed>, Arithmetic>
+struct common_type<wide::integer<Bits, Signed>, Arithmetic>
 {
-    static_assert(ArithmeticConcept<Arithmetic>(), "");
+    static_assert(wide::ArithmeticConcept<Arithmetic>());
 
     using type = std::conditional_t<
-        std::is_floating_point<Arithmetic>::value,
+        std::is_floating_point_v<Arithmetic>,
         Arithmetic,
         std::conditional_t<
             sizeof(Arithmetic) < Bits * sizeof(long),
-            wide_integer<Bits, Signed>,
+            wide::integer<Bits, Signed>,
             std::conditional_t<
                 Bits * sizeof(long) < sizeof(Arithmetic),
                 Arithmetic,
                 std::conditional_t<
-                    Bits * sizeof(long) == sizeof(Arithmetic) && (is_same<Signed, signed>::value || std::is_signed<Arithmetic>::value),
+                    Bits * sizeof(long) == sizeof(Arithmetic) && (std::is_same_v<Signed, signed> || std::is_signed_v<Arithmetic>),
                     Arithmetic,
-                    wide_integer<Bits, Signed>>>>>;
+                    wide::integer<Bits, Signed>>>>>;
 };
 
 template <typename Arithmetic, size_t Bits, typename Signed>
-struct common_type<Arithmetic, wide_integer<Bits, Signed>> : std::common_type<wide_integer<Bits, Signed>, Arithmetic>
+struct common_type<Arithmetic, wide::integer<Bits, Signed>> : common_type<wide::integer<Bits, Signed>, Arithmetic>
 {
 };
 
+}
+
+namespace wide
+{
+
 template <size_t Bits, typename Signed>
-struct wide_integer<Bits, Signed>::_impl
+struct integer<Bits, Signed>::_impl
 {
     static_assert(Bits % CHAR_BIT == 0, "=)");
 
@@ -152,7 +161,7 @@ struct wide_integer<Bits, Signed>::_impl
     static constexpr unsigned any(unsigned idx) { return idx; }
 
     template <size_t B, class T>
-    constexpr static bool is_negative(const wide_integer<B, T> & n) noexcept
+    constexpr static bool is_negative(const integer<B, T> & n) noexcept
     {
         if constexpr (std::is_same_v<T, signed>)
             return static_cast<signed_base_type>(n.m_arr[big(0)]) < 0;
@@ -161,7 +170,7 @@ struct wide_integer<Bits, Signed>::_impl
     }
 
     template <size_t B, class S>
-    constexpr static wide_integer<B, S> make_positive(const wide_integer<B, S> & n) noexcept
+    constexpr static integer<B, S> make_positive(const integer<B, S> & n) noexcept
     {
         return is_negative(n) ? operator_unary_minus(n) : n;
     }
@@ -178,7 +187,7 @@ struct wide_integer<Bits, Signed>::_impl
     }
 
     template <typename Integral>
-    constexpr static void wide_integer_from_bultin(wide_integer<Bits, Signed> & self, Integral rhs) noexcept
+    constexpr static void wide_integer_from_bultin(integer<Bits, Signed> & self, Integral rhs) noexcept
     {
         auto r = _impl::to_Integral(rhs);
 
@@ -197,7 +206,7 @@ struct wide_integer<Bits, Signed>::_impl
         }
     }
 
-    constexpr static void wide_integer_from_bultin(wide_integer<Bits, Signed> & self, double rhs) noexcept
+    constexpr static void wide_integer_from_bultin(integer<Bits, Signed> & self, double rhs) noexcept
     {
         if ((rhs > 0 && rhs < std::numeric_limits<uint64_t>::max()) || (rhs < 0 && rhs > std::numeric_limits<int64_t>::min()))
         {
@@ -223,10 +232,10 @@ struct wide_integer<Bits, Signed>::_impl
 
     template <size_t Bits2, typename Signed2>
     constexpr static void
-    wide_integer_from_wide_integer(wide_integer<Bits, Signed> & self, const wide_integer<Bits2, Signed2> & rhs) noexcept
+    wide_integer_from_wide_integer(integer<Bits, Signed> & self, const integer<Bits2, Signed2> & rhs) noexcept
     {
         //        int Bits_to_copy = std::min(arr_size, rhs.arr_size);
-        auto rhs_arr_size = wide_integer<Bits2, Signed2>::_impl::arr_size;
+        auto rhs_arr_size = integer<Bits2, Signed2>::_impl::arr_size;
         int base_elems_to_copy = _impl::arr_size < rhs_arr_size ? _impl::arr_size : rhs_arr_size;
         for (int i = 0; i < base_elems_to_copy; ++i)
         {
@@ -244,14 +253,14 @@ struct wide_integer<Bits, Signed>::_impl
         return sizeof(T) * CHAR_BIT <= Bits;
     }
 
-    constexpr static wide_integer<Bits, unsigned> shift_left(const wide_integer<Bits, unsigned> & rhs, int n)
+    constexpr static integer<Bits, unsigned> shift_left(const integer<Bits, unsigned> & rhs, int n) noexcept
     {
         if (static_cast<size_t>(n) >= base_bits * arr_size)
             return 0;
         if (n <= 0)
             return rhs;
 
-        wide_integer<Bits, Signed> lhs = rhs;
+        integer<Bits, Signed> lhs = rhs;
         int bit_shift = n % base_bits;
         unsigned n_bytes = n / base_bits;
         if (bit_shift)
@@ -275,23 +284,19 @@ struct wide_integer<Bits, Signed>::_impl
         return lhs;
     }
 
-    constexpr static wide_integer<Bits, signed> shift_left(const wide_integer<Bits, signed> & rhs, int n)
+    constexpr static integer<Bits, signed> shift_left(const integer<Bits, signed> & rhs, int n) noexcept
     {
-        // static_assert(is_negative(rhs), "shift left for negative lhsbers is underfined!");
-        if (is_negative(rhs))
-            throw std::runtime_error("shift left for negative lhsbers is underfined!");
-
-        return wide_integer<Bits, signed>(shift_left(wide_integer<Bits, unsigned>(rhs), n));
+        return integer<Bits, signed>(shift_left(integer<Bits, unsigned>(rhs), n));
     }
 
-    constexpr static wide_integer<Bits, unsigned> shift_right(const wide_integer<Bits, unsigned> & rhs, int n) noexcept
+    constexpr static integer<Bits, unsigned> shift_right(const integer<Bits, unsigned> & rhs, int n) noexcept
     {
         if (static_cast<size_t>(n) >= base_bits * arr_size)
             return 0;
         if (n <= 0)
             return rhs;
 
-        wide_integer<Bits, Signed> lhs = rhs;
+        integer<Bits, Signed> lhs = rhs;
         int bit_shift = n % base_bits;
         unsigned n_bytes = n / base_bits;
         if (bit_shift)
@@ -315,7 +320,7 @@ struct wide_integer<Bits, Signed>::_impl
         return lhs;
     }
 
-    constexpr static wide_integer<Bits, signed> shift_right(const wide_integer<Bits, signed> & rhs, int n) noexcept
+    constexpr static integer<Bits, signed> shift_right(const integer<Bits, signed> & rhs, int n) noexcept
     {
         if (static_cast<size_t>(n) >= base_bits * arr_size)
             return 0;
@@ -324,14 +329,14 @@ struct wide_integer<Bits, Signed>::_impl
 
         bool is_neg = is_negative(rhs);
         if (!is_neg)
-            return shift_right(wide_integer<Bits, unsigned>(rhs), n);
+            return shift_right(integer<Bits, unsigned>(rhs), n);
 
-        wide_integer<Bits, Signed> lhs = rhs;
+        integer<Bits, Signed> lhs = rhs;
         int bit_shift = n % base_bits;
         unsigned n_bytes = n / base_bits;
         if (bit_shift)
         {
-            lhs = shift_right(wide_integer<Bits, unsigned>(lhs), bit_shift);
+            lhs = shift_right(integer<Bits, unsigned>(lhs), bit_shift);
             lhs.m_arr[big(0)] |= std::numeric_limits<base_type>::max() << (base_bits - bit_shift);
         }
         if (n_bytes)
@@ -349,8 +354,8 @@ struct wide_integer<Bits, Signed>::_impl
     }
 
     template <typename T>
-    constexpr static wide_integer<Bits, Signed>
-    operator_plus_T(const wide_integer<Bits, Signed> & lhs, T rhs) noexcept(is_same<Signed, unsigned>::value)
+    constexpr static integer<Bits, Signed>
+    operator_plus_T(const integer<Bits, Signed> & lhs, T rhs) noexcept(std::is_same_v<Signed, unsigned>)
     {
         if (rhs < 0)
             return _operator_minus_T(lhs, -rhs);
@@ -360,10 +365,10 @@ struct wide_integer<Bits, Signed>::_impl
 
 private:
     template <typename T>
-    constexpr static wide_integer<Bits, Signed>
-    _operator_minus_T(const wide_integer<Bits, Signed> & lhs, T rhs) noexcept(is_same<Signed, unsigned>::value)
+    constexpr static integer<Bits, Signed>
+    _operator_minus_T(const integer<Bits, Signed> & lhs, T rhs) noexcept(std::is_same_v<Signed, unsigned>)
     {
-        wide_integer<Bits, Signed> res = lhs;
+        integer<Bits, Signed> res = lhs;
 
         bool is_underflow = false;
         int r_idx = 0;
@@ -399,10 +404,10 @@ private:
     }
 
     template <typename T>
-    constexpr static wide_integer<Bits, Signed>
-    _operator_plus_T(const wide_integer<Bits, Signed> & lhs, T rhs) noexcept(is_same<Signed, unsigned>::value)
+    constexpr static integer<Bits, Signed>
+    _operator_plus_T(const integer<Bits, Signed> & lhs, T rhs) noexcept(std::is_same_v<Signed, unsigned>)
     {
-        wide_integer<Bits, Signed> res = lhs;
+        integer<Bits, Signed> res = lhs;
 
         bool is_overflow = false;
         int r_idx = 0;
@@ -438,27 +443,27 @@ private:
     }
 
 public:
-    constexpr static wide_integer<Bits, Signed> operator_unary_tilda(const wide_integer<Bits, Signed> & lhs) noexcept
+    constexpr static integer<Bits, Signed> operator_unary_tilda(const integer<Bits, Signed> & lhs) noexcept
     {
-        wide_integer<Bits, Signed> res{};
+        integer<Bits, Signed> res{};
 
         for (int i = 0; i < arr_size; ++i)
             res.m_arr[any(i)] = ~lhs.m_arr[any(i)];
         return res;
     }
 
-    constexpr static wide_integer<Bits, Signed>
-    operator_unary_minus(const wide_integer<Bits, Signed> & lhs) noexcept(is_same<Signed, unsigned>::value)
+    constexpr static integer<Bits, Signed>
+    operator_unary_minus(const integer<Bits, Signed> & lhs) noexcept(std::is_same_v<Signed, unsigned>)
     {
         return operator_plus_T(operator_unary_tilda(lhs), 1);
     }
 
     template <typename T>
-    constexpr static auto operator_plus(const wide_integer<Bits, Signed> & lhs, const T & rhs) noexcept(is_same<Signed, unsigned>::value)
+    constexpr static auto operator_plus(const integer<Bits, Signed> & lhs, const T & rhs) noexcept(std::is_same_v<Signed, unsigned>)
     {
         if constexpr (should_keep_size<T>())
         {
-            wide_integer<Bits, Signed> t = rhs;
+            integer<Bits, Signed> t = rhs;
             if (is_negative(t))
                 return _operator_minus_wide_integer(lhs, operator_unary_minus(t));
             else
@@ -467,17 +472,17 @@ public:
         else
         {
             static_assert(T::_impl::_is_wide_integer, "");
-            return std::common_type_t<wide_integer<Bits, Signed>, wide_integer<T::_impl::_Bits, Signed>>::_impl::operator_plus(
-                wide_integer<T::_impl::_Bits, Signed>(lhs), rhs);
+            return std::common_type_t<integer<Bits, Signed>, integer<T::_impl::_Bits, Signed>>::_impl::operator_plus(
+                integer<T::_impl::_Bits, Signed>(lhs), rhs);
         }
     }
 
     template <typename T>
-    constexpr static auto operator_minus(const wide_integer<Bits, Signed> & lhs, const T & rhs) noexcept(is_same<Signed, unsigned>::value)
+    constexpr static auto operator_minus(const integer<Bits, Signed> & lhs, const T & rhs) noexcept(std::is_same_v<Signed, unsigned>)
     {
         if constexpr (should_keep_size<T>())
         {
-            wide_integer<Bits, Signed> t = rhs;
+            integer<Bits, Signed> t = rhs;
             if (is_negative(t))
                 return _operator_plus_wide_integer(lhs, operator_unary_minus(t));
             else
@@ -486,16 +491,16 @@ public:
         else
         {
             static_assert(T::_impl::_is_wide_integer, "");
-            return std::common_type_t<wide_integer<Bits, Signed>, wide_integer<T::_impl::_Bits, Signed>>::_impl::operator_minus(
-                wide_integer<T::_impl::_Bits, Signed>(lhs), rhs);
+            return std::common_type_t<integer<Bits, Signed>, integer<T::_impl::_Bits, Signed>>::_impl::operator_minus(
+                integer<T::_impl::_Bits, Signed>(lhs), rhs);
         }
     }
 
 private:
-    constexpr static wide_integer<Bits, Signed> _operator_minus_wide_integer(
-        const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits, Signed> & rhs) noexcept(is_same<Signed, unsigned>::value)
+    constexpr static integer<Bits, Signed> _operator_minus_wide_integer(
+        const integer<Bits, Signed> & lhs, const integer<Bits, Signed> & rhs) noexcept(std::is_same_v<Signed, unsigned>)
     {
-        wide_integer<Bits, Signed> res = lhs;
+        integer<Bits, Signed> res = lhs;
 
         bool is_underflow = false;
         for (int idx = 0; idx < arr_size; ++idx)
@@ -518,10 +523,10 @@ private:
         return res;
     }
 
-    constexpr static wide_integer<Bits, Signed> _operator_plus_wide_integer(
-        const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits, Signed> & rhs) noexcept(is_same<Signed, unsigned>::value)
+    constexpr static integer<Bits, Signed> _operator_plus_wide_integer(
+        const integer<Bits, Signed> & lhs, const integer<Bits, Signed> & rhs) noexcept(std::is_same_v<Signed, unsigned>)
     {
-        wide_integer<Bits, Signed> res = lhs;
+        integer<Bits, Signed> res = lhs;
 
         bool is_overflow = false;
         for (int idx = 0; idx < arr_size; ++idx)
@@ -546,14 +551,14 @@ private:
 
 public:
     template <typename T>
-    constexpr static auto operator_star(const wide_integer<Bits, Signed> & lhs, const T & rhs)
+    constexpr static auto operator_star(const integer<Bits, Signed> & lhs, const T & rhs)
     {
         if constexpr (should_keep_size<T>())
         {
-            const wide_integer<Bits, unsigned> a = make_positive(lhs);
-            wide_integer<Bits, unsigned> t = make_positive(wide_integer<Bits, Signed>(rhs));
+            const integer<Bits, unsigned> a = make_positive(lhs);
+            integer<Bits, unsigned> t = make_positive(integer<Bits, Signed>(rhs));
 
-            wide_integer<Bits, Signed> res = 0;
+            integer<Bits, Signed> res = 0;
 
             for (size_t i = 0; i < arr_size * base_bits; ++i)
             {
@@ -563,7 +568,7 @@ public:
                 t = shift_right(t, 1);
             }
 
-            if (is_same<Signed, signed>::value && is_negative(wide_integer<Bits, Signed>(rhs)) != is_negative(lhs))
+            if (std::is_same_v<Signed, signed> && is_negative(integer<Bits, Signed>(rhs)) != is_negative(lhs))
                 res = operator_unary_minus(res);
 
             return res;
@@ -571,19 +576,19 @@ public:
         else
         {
             static_assert(T::_impl::_is_wide_integer, "");
-            return std::common_type_t<wide_integer<Bits, Signed>, T>::_impl::operator_star(T(lhs), rhs);
+            return std::common_type_t<integer<Bits, Signed>, T>::_impl::operator_star(T(lhs), rhs);
         }
     }
 
     template <typename T>
-    constexpr static bool operator_more(const wide_integer<Bits, Signed> & lhs, const T & rhs) noexcept
+    constexpr static bool operator_more(const integer<Bits, Signed> & lhs, const T & rhs) noexcept
     {
         if constexpr (should_keep_size<T>())
         {
             // static_assert(Signed == std::is_signed<T>::value,
             //               "warning: operator_more: comparison of integers of different signs");
 
-            wide_integer<Bits, Signed> t = rhs;
+            integer<Bits, Signed> t = rhs;
 
             if (std::numeric_limits<T>::is_signed && (is_negative(lhs) != is_negative(t)))
                 return is_negative(t);
@@ -599,19 +604,19 @@ public:
         else
         {
             static_assert(T::_impl::_is_wide_integer, "");
-            return std::common_type_t<wide_integer<Bits, Signed>, T>::_impl::operator_more(T(lhs), rhs);
+            return std::common_type_t<integer<Bits, Signed>, T>::_impl::operator_more(T(lhs), rhs);
         }
     }
 
     template <typename T>
-    constexpr static bool operator_less(const wide_integer<Bits, Signed> & lhs, const T & rhs) noexcept
+    constexpr static bool operator_less(const integer<Bits, Signed> & lhs, const T & rhs) noexcept
     {
         if constexpr (should_keep_size<T>())
         {
             // static_assert(Signed == std::is_signed<T>::value,
             //               "warning: operator_less: comparison of integers of different signs");
 
-            wide_integer<Bits, Signed> t = rhs;
+            integer<Bits, Signed> t = rhs;
 
             if (std::numeric_limits<T>::is_signed && (is_negative(lhs) != is_negative(t)))
                 return is_negative(lhs);
@@ -625,16 +630,16 @@ public:
         else
         {
             static_assert(T::_impl::_is_wide_integer, "");
-            return std::common_type_t<wide_integer<Bits, Signed>, T>::_impl::operator_less(T(lhs), rhs);
+            return std::common_type_t<integer<Bits, Signed>, T>::_impl::operator_less(T(lhs), rhs);
         }
     }
 
     template <typename T>
-    constexpr static bool operator_eq(const wide_integer<Bits, Signed> & lhs, const T & rhs) noexcept
+    constexpr static bool operator_eq(const integer<Bits, Signed> & lhs, const T & rhs) noexcept
     {
         if constexpr (should_keep_size<T>())
         {
-            wide_integer<Bits, Signed> t = rhs;
+            integer<Bits, Signed> t = rhs;
 
             for (int i = 0; i < arr_size; ++i)
                 if (lhs.m_arr[any(i)] != t.m_arr[any(i)])
@@ -645,17 +650,17 @@ public:
         else
         {
             static_assert(T::_impl::_is_wide_integer, "");
-            return std::common_type_t<wide_integer<Bits, Signed>, T>::_impl::operator_eq(T(lhs), rhs);
+            return std::common_type_t<integer<Bits, Signed>, T>::_impl::operator_eq(T(lhs), rhs);
         }
     }
 
     template <typename T>
-    constexpr static auto operator_pipe(const wide_integer<Bits, Signed> & lhs, const T & rhs) noexcept
+    constexpr static auto operator_pipe(const integer<Bits, Signed> & lhs, const T & rhs) noexcept
     {
         if constexpr (should_keep_size<T>())
         {
-            wide_integer<Bits, Signed> t = rhs;
-            wide_integer<Bits, Signed> res = lhs;
+            integer<Bits, Signed> t = rhs;
+            integer<Bits, Signed> res = lhs;
 
             for (int i = 0; i < arr_size; ++i)
                 res.m_arr[any(i)] |= t.m_arr[any(i)];
@@ -664,17 +669,17 @@ public:
         else
         {
             static_assert(T::_impl::_is_wide_integer, "");
-            return std::common_type_t<wide_integer<Bits, Signed>, T>::_impl::operator_pipe(T(lhs), rhs);
+            return std::common_type_t<integer<Bits, Signed>, T>::_impl::operator_pipe(T(lhs), rhs);
         }
     }
 
     template <typename T>
-    constexpr static auto operator_amp(const wide_integer<Bits, Signed> & lhs, const T & rhs) noexcept
+    constexpr static auto operator_amp(const integer<Bits, Signed> & lhs, const T & rhs) noexcept
     {
         if constexpr (should_keep_size<T>())
         {
-            wide_integer<Bits, Signed> t = rhs;
-            wide_integer<Bits, Signed> res = lhs;
+            integer<Bits, Signed> t = rhs;
+            integer<Bits, Signed> res = lhs;
 
             for (int i = 0; i < arr_size; ++i)
                 res.m_arr[any(i)] &= t.m_arr[any(i)];
@@ -683,7 +688,7 @@ public:
         else
         {
             static_assert(T::_impl::_is_wide_integer, "");
-            return std::common_type_t<wide_integer<Bits, Signed>, T>::_impl::operator_amp(T(lhs), rhs);
+            return std::common_type_t<integer<Bits, Signed>, T>::_impl::operator_amp(T(lhs), rhs);
         }
     }
 
@@ -702,7 +707,7 @@ private:
         }
 
         if (is_zero)
-            throw std::domain_error("divide by zero");
+            throwError("divide by zero");
 
         T n = lhserator;
         T d = denominator;
@@ -733,15 +738,15 @@ private:
 
 public:
     template <typename T>
-    constexpr static auto operator_slash(const wide_integer<Bits, Signed> & lhs, const T & rhs)
+    constexpr static auto operator_slash(const integer<Bits, Signed> & lhs, const T & rhs)
     {
         if constexpr (should_keep_size<T>())
         {
-            wide_integer<Bits, Signed> o = rhs;
-            wide_integer<Bits, Signed> quotient{}, remainder{};
+            integer<Bits, Signed> o = rhs;
+            integer<Bits, Signed> quotient{}, remainder{};
             divide(make_positive(lhs), make_positive(o), quotient, remainder);
 
-            if (is_same<Signed, signed>::value && is_negative(o) != is_negative(lhs))
+            if (std::is_same_v<Signed, signed> && is_negative(o) != is_negative(lhs))
                 quotient = operator_unary_minus(quotient);
 
             return quotient;
@@ -749,20 +754,20 @@ public:
         else
         {
             static_assert(T::_impl::_is_wide_integer, "");
-            return std::common_type_t<wide_integer<Bits, Signed>, wide_integer<T::_impl::_Bits, Signed>>::operator_slash(T(lhs), rhs);
+            return std::common_type_t<integer<Bits, Signed>, integer<T::_impl::_Bits, Signed>>::operator_slash(T(lhs), rhs);
         }
     }
 
     template <typename T>
-    constexpr static auto operator_percent(const wide_integer<Bits, Signed> & lhs, const T & rhs)
+    constexpr static auto operator_percent(const integer<Bits, Signed> & lhs, const T & rhs)
     {
         if constexpr (should_keep_size<T>())
         {
-            wide_integer<Bits, Signed> o = rhs;
-            wide_integer<Bits, Signed> quotient{}, remainder{};
+            integer<Bits, Signed> o = rhs;
+            integer<Bits, Signed> quotient{}, remainder{};
             divide(make_positive(lhs), make_positive(o), quotient, remainder);
 
-            if (is_same<Signed, signed>::value && is_negative(lhs))
+            if (std::is_same_v<Signed, signed> && is_negative(lhs))
                 remainder = operator_unary_minus(remainder);
 
             return remainder;
@@ -770,18 +775,18 @@ public:
         else
         {
             static_assert(T::_impl::_is_wide_integer, "");
-            return std::common_type_t<wide_integer<Bits, Signed>, wide_integer<T::_impl::_Bits, Signed>>::operator_percent(T(lhs), rhs);
+            return std::common_type_t<integer<Bits, Signed>, integer<T::_impl::_Bits, Signed>>::operator_percent(T(lhs), rhs);
         }
     }
 
     // ^
     template <typename T>
-    constexpr static auto operator_circumflex(const wide_integer<Bits, Signed> & lhs, const T & rhs) noexcept
+    constexpr static auto operator_circumflex(const integer<Bits, Signed> & lhs, const T & rhs) noexcept
     {
         if constexpr (should_keep_size<T>())
         {
-            wide_integer<Bits, Signed> t(rhs);
-            wide_integer<Bits, Signed> res = lhs;
+            integer<Bits, Signed> t(rhs);
+            integer<Bits, Signed> res = lhs;
 
             for (int i = 0; i < arr_size; ++i)
                 res.m_arr[any(i)] ^= t.m_arr[any(i)];
@@ -794,11 +799,11 @@ public:
         }
     }
 
-    constexpr static wide_integer<Bits, Signed> from_str(const char * c)
+    constexpr static integer<Bits, Signed> from_str(const char * c)
     {
-        wide_integer<Bits, Signed> res = 0;
+        integer<Bits, Signed> res = 0;
 
-        bool is_neg = is_same<Signed, signed>::value && *c == '-';
+        bool is_neg = std::is_same_v<Signed, signed> && *c == '-';
         if (is_neg)
             ++c;
 
@@ -827,7 +832,7 @@ public:
                     ++c;
                 }
                 else
-                    throw std::runtime_error("invalid char from");
+                    throwError("invalid char from");
             }
         }
         else
@@ -835,7 +840,7 @@ public:
             while (*c)
             {
                 if (*c < '0' || *c > '9')
-                    throw std::runtime_error("invalid char from");
+                    throwError("invalid char from");
 
                 res = operator_star(res, 10U);
                 res = operator_plus_T(res, *c - '0');
@@ -854,7 +859,7 @@ public:
 
 template <size_t Bits, typename Signed>
 template <typename T>
-constexpr wide_integer<Bits, Signed>::wide_integer(T rhs) noexcept
+constexpr integer<Bits, Signed>::integer(T rhs) noexcept
     : m_arr{}
 {
     if constexpr (IsWideInteger<T>::value)
@@ -865,7 +870,7 @@ constexpr wide_integer<Bits, Signed>::wide_integer(T rhs) noexcept
 
 template <size_t Bits, typename Signed>
 template <typename T>
-constexpr wide_integer<Bits, Signed>::wide_integer(std::initializer_list<T> il) noexcept
+constexpr integer<Bits, Signed>::integer(std::initializer_list<T> il) noexcept
     : m_arr{}
 {
     if (il.size() == 1)
@@ -881,7 +886,7 @@ constexpr wide_integer<Bits, Signed>::wide_integer(std::initializer_list<T> il)
 
 template <size_t Bits, typename Signed>
 template <size_t Bits2, typename Signed2>
-constexpr wide_integer<Bits, Signed> & wide_integer<Bits, Signed>::operator=(const wide_integer<Bits2, Signed2> & rhs) noexcept
+constexpr integer<Bits, Signed> & integer<Bits, Signed>::operator=(const integer<Bits2, Signed2> & rhs) noexcept
 {
     _impl::wide_integer_from_wide_integer(*this, rhs);
     return *this;
@@ -889,7 +894,7 @@ constexpr wide_integer<Bits, Signed> & wide_integer<Bits, Signed>::operator=(con
 
 template <size_t Bits, typename Signed>
 template <typename T>
-constexpr wide_integer<Bits, Signed> & wide_integer<Bits, Signed>::operator=(T rhs) noexcept
+constexpr integer<Bits, Signed> & integer<Bits, Signed>::operator=(T rhs) noexcept
 {
     _impl::wide_integer_from_bultin(*this, rhs);
     return *this;
@@ -897,7 +902,7 @@ constexpr wide_integer<Bits, Signed> & wide_integer<Bits, Signed>::operator=(T r
 
 template <size_t Bits, typename Signed>
 template <typename T>
-constexpr wide_integer<Bits, Signed> & wide_integer<Bits, Signed>::operator*=(const T & rhs)
+constexpr integer<Bits, Signed> & integer<Bits, Signed>::operator*=(const T & rhs)
 {
     *this = *this * rhs;
     return *this;
@@ -905,7 +910,7 @@ constexpr wide_integer<Bits, Signed> & wide_integer<Bits, Signed>::operator*=(co
 
 template <size_t Bits, typename Signed>
 template <typename T>
-constexpr wide_integer<Bits, Signed> & wide_integer<Bits, Signed>::operator/=(const T & rhs)
+constexpr integer<Bits, Signed> & integer<Bits, Signed>::operator/=(const T & rhs)
 {
     *this = *this / rhs;
     return *this;
@@ -913,7 +918,7 @@ constexpr wide_integer<Bits, Signed> & wide_integer<Bits, Signed>::operator/=(co
 
 template <size_t Bits, typename Signed>
 template <typename T>
-constexpr wide_integer<Bits, Signed> & wide_integer<Bits, Signed>::operator+=(const T & rhs) noexcept(is_same<Signed, unsigned>::value)
+constexpr integer<Bits, Signed> & integer<Bits, Signed>::operator+=(const T & rhs) noexcept(std::is_same_v<Signed, unsigned>)
 {
     *this = *this + rhs;
     return *this;
@@ -921,7 +926,7 @@ constexpr wide_integer<Bits, Signed> & wide_integer<Bits, Signed>::operator+=(co
 
 template <size_t Bits, typename Signed>
 template <typename T>
-constexpr wide_integer<Bits, Signed> & wide_integer<Bits, Signed>::operator-=(const T & rhs) noexcept(is_same<Signed, unsigned>::value)
+constexpr integer<Bits, Signed> & integer<Bits, Signed>::operator-=(const T & rhs) noexcept(std::is_same_v<Signed, unsigned>)
 {
     *this = *this - rhs;
     return *this;
@@ -929,7 +934,7 @@ constexpr wide_integer<Bits, Signed> & wide_integer<Bits, Signed>::operator-=(co
 
 template <size_t Bits, typename Signed>
 template <typename T>
-constexpr wide_integer<Bits, Signed> & wide_integer<Bits, Signed>::operator%=(const T & rhs)
+constexpr integer<Bits, Signed> & integer<Bits, Signed>::operator%=(const T & rhs)
 {
     *this = *this % rhs;
     return *this;
@@ -937,7 +942,7 @@ constexpr wide_integer<Bits, Signed> & wide_integer<Bits, Signed>::operator%=(co
 
 template <size_t Bits, typename Signed>
 template <typename T>
-constexpr wide_integer<Bits, Signed> & wide_integer<Bits, Signed>::operator&=(const T & rhs) noexcept
+constexpr integer<Bits, Signed> & integer<Bits, Signed>::operator&=(const T & rhs) noexcept
 {
     *this = *this & rhs;
     return *this;
@@ -945,7 +950,7 @@ constexpr wide_integer<Bits, Signed> & wide_integer<Bits, Signed>::operator&=(co
 
 template <size_t Bits, typename Signed>
 template <typename T>
-constexpr wide_integer<Bits, Signed> & wide_integer<Bits, Signed>::operator|=(const T & rhs) noexcept
+constexpr integer<Bits, Signed> & integer<Bits, Signed>::operator|=(const T & rhs) noexcept
 {
     *this = *this | rhs;
     return *this;
@@ -953,35 +958,35 @@ constexpr wide_integer<Bits, Signed> & wide_integer<Bits, Signed>::operator|=(co
 
 template <size_t Bits, typename Signed>
 template <typename T>
-constexpr wide_integer<Bits, Signed> & wide_integer<Bits, Signed>::operator^=(const T & rhs) noexcept
+constexpr integer<Bits, Signed> & integer<Bits, Signed>::operator^=(const T & rhs) noexcept
 {
     *this = *this ^ rhs;
     return *this;
 }
 
 template <size_t Bits, typename Signed>
-constexpr wide_integer<Bits, Signed> & wide_integer<Bits, Signed>::operator<<=(int n)
+constexpr integer<Bits, Signed> & integer<Bits, Signed>::operator<<=(int n) noexcept
 {
     *this = _impl::shift_left(*this, n);
     return *this;
 }
 
 template <size_t Bits, typename Signed>
-constexpr wide_integer<Bits, Signed> & wide_integer<Bits, Signed>::operator>>=(int n) noexcept
+constexpr integer<Bits, Signed> & integer<Bits, Signed>::operator>>=(int n) noexcept
 {
     *this = _impl::shift_right(*this, n);
     return *this;
 }
 
 template <size_t Bits, typename Signed>
-constexpr wide_integer<Bits, Signed> & wide_integer<Bits, Signed>::operator++() noexcept(is_same<Signed, unsigned>::value)
+constexpr integer<Bits, Signed> & integer<Bits, Signed>::operator++() noexcept(std::is_same_v<Signed, unsigned>)
 {
     *this = _impl::operator_plus(*this, 1);
     return *this;
 }
 
 template <size_t Bits, typename Signed>
-constexpr wide_integer<Bits, Signed> wide_integer<Bits, Signed>::operator++(int) noexcept(is_same<Signed, unsigned>::value)
+constexpr integer<Bits, Signed> integer<Bits, Signed>::operator++(int) noexcept(std::is_same_v<Signed, unsigned>)
 {
     auto tmp = *this;
     *this = _impl::operator_plus(*this, 1);
@@ -989,14 +994,14 @@ constexpr wide_integer<Bits, Signed> wide_integer<Bits, Signed>::operator++(int)
 }
 
 template <size_t Bits, typename Signed>
-constexpr wide_integer<Bits, Signed> & wide_integer<Bits, Signed>::operator--() noexcept(is_same<Signed, unsigned>::value)
+constexpr integer<Bits, Signed> & integer<Bits, Signed>::operator--() noexcept(std::is_same_v<Signed, unsigned>)
 {
     *this = _impl::operator_minus(*this, 1);
     return *this;
 }
 
 template <size_t Bits, typename Signed>
-constexpr wide_integer<Bits, Signed> wide_integer<Bits, Signed>::operator--(int) noexcept(is_same<Signed, unsigned>::value)
+constexpr integer<Bits, Signed> integer<Bits, Signed>::operator--(int) noexcept(std::is_same_v<Signed, unsigned>)
 {
     auto tmp = *this;
     *this = _impl::operator_minus(*this, 1);
@@ -1004,14 +1009,14 @@ constexpr wide_integer<Bits, Signed> wide_integer<Bits, Signed>::operator--(int)
 }
 
 template <size_t Bits, typename Signed>
-constexpr wide_integer<Bits, Signed>::operator bool() const noexcept
+constexpr integer<Bits, Signed>::operator bool() const noexcept
 {
     return !_impl::operator_eq(*this, 0);
 }
 
 template <size_t Bits, typename Signed>
 template <class T, class>
-constexpr wide_integer<Bits, Signed>::operator T() const noexcept
+constexpr integer<Bits, Signed>::operator T() const noexcept
 {
     static_assert(std::numeric_limits<T>::is_integer, "");
     T res = 0;
@@ -1023,12 +1028,12 @@ constexpr wide_integer<Bits, Signed>::operator T() const noexcept
 }
 
 template <size_t Bits, typename Signed>
-constexpr wide_integer<Bits, Signed>::operator long double() const noexcept
+constexpr integer<Bits, Signed>::operator long double() const noexcept
 {
     if (_impl::operator_eq(*this, 0))
         return 0;
 
-    wide_integer<Bits, Signed> tmp = *this;
+    integer<Bits, Signed> tmp = *this;
     if (_impl::is_negative(*this))
         tmp = -tmp;
 
@@ -1048,42 +1053,45 @@ constexpr wide_integer<Bits, Signed>::operator long double() const noexcept
 }
 
 template <size_t Bits, typename Signed>
-constexpr wide_integer<Bits, Signed>::operator double() const noexcept
+constexpr integer<Bits, Signed>::operator double() const noexcept
 {
     return static_cast<long double>(*this);
 }
 
 template <size_t Bits, typename Signed>
-constexpr wide_integer<Bits, Signed>::operator float() const noexcept
+constexpr integer<Bits, Signed>::operator float() const noexcept
 {
     return static_cast<long double>(*this);
 }
 
 // Unary operators
 template <size_t Bits, typename Signed>
-constexpr wide_integer<Bits, Signed> operator~(const wide_integer<Bits, Signed> & lhs) noexcept
+constexpr integer<Bits, Signed> operator~(const integer<Bits, Signed> & lhs) noexcept
 {
-    return wide_integer<Bits, Signed>::_impl::operator_unary_tilda(lhs);
+    return integer<Bits, Signed>::_impl::operator_unary_tilda(lhs);
 }
 
 template <size_t Bits, typename Signed>
-constexpr wide_integer<Bits, Signed> operator-(const wide_integer<Bits, Signed> & lhs) noexcept(is_same<Signed, unsigned>::value)
+constexpr integer<Bits, Signed> operator-(const integer<Bits, Signed> & lhs) noexcept(std::is_same_v<Signed, unsigned>)
 {
-    return wide_integer<Bits, Signed>::_impl::operator_unary_minus(lhs);
+    return integer<Bits, Signed>::_impl::operator_unary_minus(lhs);
 }
 
 template <size_t Bits, typename Signed>
-constexpr wide_integer<Bits, Signed> operator+(const wide_integer<Bits, Signed> & lhs) noexcept(is_same<Signed, unsigned>::value)
+constexpr integer<Bits, Signed> operator+(const integer<Bits, Signed> & lhs) noexcept(std::is_same_v<Signed, unsigned>)
 {
     return lhs;
 }
 
+#define CT(x) \
+    std::common_type_t<std::decay_t<decltype(rhs)>, std::decay_t<decltype(lhs)>> { x }
+
 // Binary operators
 template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
-std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
-operator*(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs)
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr
+operator*(const integer<Bits, Signed> & lhs, const integer<Bits2, Signed2> & rhs)
 {
-    return std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>>::_impl::operator_star(lhs, rhs);
+    return std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>>::_impl::operator_star(lhs, rhs);
 }
 
 template <typename Arithmetic, typename Arithmetic2, class>
@@ -1093,10 +1101,10 @@ std::common_type_t<Arithmetic, Arithmetic2> constexpr operator*(const Arithmetic
 }
 
 template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
-std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
-operator/(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs)
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr
+operator/(const integer<Bits, Signed> & lhs, const integer<Bits2, Signed2> & rhs)
 {
-    return std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>>::_impl::operator_slash(lhs, rhs);
+    return std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>>::_impl::operator_slash(lhs, rhs);
 }
 template <typename Arithmetic, typename Arithmetic2, class>
 std::common_type_t<Arithmetic, Arithmetic2> constexpr operator/(const Arithmetic & lhs, const Arithmetic2 & rhs)
@@ -1105,10 +1113,10 @@ std::common_type_t<Arithmetic, Arithmetic2> constexpr operator/(const Arithmetic
 }
 
 template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
-std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
-operator+(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs)
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr
+operator+(const integer<Bits, Signed> & lhs, const integer<Bits2, Signed2> & rhs)
 {
-    return std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>>::_impl::operator_plus(lhs, rhs);
+    return std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>>::_impl::operator_plus(lhs, rhs);
 }
 template <typename Arithmetic, typename Arithmetic2, class>
 std::common_type_t<Arithmetic, Arithmetic2> constexpr operator+(const Arithmetic & lhs, const Arithmetic2 & rhs)
@@ -1117,10 +1125,10 @@ std::common_type_t<Arithmetic, Arithmetic2> constexpr operator+(const Arithmetic
 }
 
 template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
-std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
-operator-(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs)
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr
+operator-(const integer<Bits, Signed> & lhs, const integer<Bits2, Signed2> & rhs)
 {
-    return std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>>::_impl::operator_minus(lhs, rhs);
+    return std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>>::_impl::operator_minus(lhs, rhs);
 }
 template <typename Arithmetic, typename Arithmetic2, class>
 std::common_type_t<Arithmetic, Arithmetic2> constexpr operator-(const Arithmetic & lhs, const Arithmetic2 & rhs)
@@ -1129,10 +1137,10 @@ std::common_type_t<Arithmetic, Arithmetic2> constexpr operator-(const Arithmetic
 }
 
 template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
-std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
-operator%(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs)
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr
+operator%(const integer<Bits, Signed> & lhs, const integer<Bits2, Signed2> & rhs)
 {
-    return std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>>::_impl::operator_percent(lhs, rhs);
+    return std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>>::_impl::operator_percent(lhs, rhs);
 }
 template <typename Integral, typename Integral2, class>
 std::common_type_t<Integral, Integral2> constexpr operator%(const Integral & lhs, const Integral2 & rhs)
@@ -1141,10 +1149,10 @@ std::common_type_t<Integral, Integral2> constexpr operator%(const Integral & lhs
 }
 
 template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
-std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
-operator&(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs)
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr
+operator&(const integer<Bits, Signed> & lhs, const integer<Bits2, Signed2> & rhs)
 {
-    return std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>>::_impl::operator_amp(lhs, rhs);
+    return std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>>::_impl::operator_amp(lhs, rhs);
 }
 template <typename Integral, typename Integral2, class>
 std::common_type_t<Integral, Integral2> constexpr operator&(const Integral & lhs, const Integral2 & rhs)
@@ -1153,10 +1161,10 @@ std::common_type_t<Integral, Integral2> constexpr operator&(const Integral & lhs
 }
 
 template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
-std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
-operator|(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs)
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr
+operator|(const integer<Bits, Signed> & lhs, const integer<Bits2, Signed2> & rhs)
 {
-    return std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>>::_impl::operator_pipe(lhs, rhs);
+    return std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>>::_impl::operator_pipe(lhs, rhs);
 }
 template <typename Integral, typename Integral2, class>
 std::common_type_t<Integral, Integral2> constexpr operator|(const Integral & lhs, const Integral2 & rhs)
@@ -1165,10 +1173,10 @@ std::common_type_t<Integral, Integral2> constexpr operator|(const Integral & lhs
 }
 
 template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
-std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
-operator^(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs)
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr
+operator^(const integer<Bits, Signed> & lhs, const integer<Bits2, Signed2> & rhs)
 {
-    return std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>>::_impl::operator_circumflex(lhs, rhs);
+    return std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>>::_impl::operator_circumflex(lhs, rhs);
 }
 template <typename Integral, typename Integral2, class>
 std::common_type_t<Integral, Integral2> constexpr operator^(const Integral & lhs, const Integral2 & rhs)
@@ -1177,20 +1185,20 @@ std::common_type_t<Integral, Integral2> constexpr operator^(const Integral & lhs
 }
 
 template <size_t Bits, typename Signed>
-constexpr wide_integer<Bits, Signed> operator<<(const wide_integer<Bits, Signed> & lhs, int n) noexcept
+constexpr integer<Bits, Signed> operator<<(const integer<Bits, Signed> & lhs, int n) noexcept
 {
-    return wide_integer<Bits, Signed>::_impl::shift_left(lhs, n);
+    return integer<Bits, Signed>::_impl::shift_left(lhs, n);
 }
 template <size_t Bits, typename Signed>
-constexpr wide_integer<Bits, Signed> operator>>(const wide_integer<Bits, Signed> & lhs, int n) noexcept
+constexpr integer<Bits, Signed> operator>>(const integer<Bits, Signed> & lhs, int n) noexcept
 {
-    return wide_integer<Bits, Signed>::_impl::shift_right(lhs, n);
+    return integer<Bits, Signed>::_impl::shift_right(lhs, n);
 }
 
 template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
-constexpr bool operator<(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs)
+constexpr bool operator<(const integer<Bits, Signed> & lhs, const integer<Bits2, Signed2> & rhs)
 {
-    return std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>>::_impl::operator_less(lhs, rhs);
+    return std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>>::_impl::operator_less(lhs, rhs);
 }
 template <typename Arithmetic, typename Arithmetic2, class>
 constexpr bool operator<(const Arithmetic & lhs, const Arithmetic2 & rhs)
@@ -1199,9 +1207,9 @@ constexpr bool operator<(const Arithmetic & lhs, const Arithmetic2 & rhs)
 }
 
 template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
-constexpr bool operator>(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs)
+constexpr bool operator>(const integer<Bits, Signed> & lhs, const integer<Bits2, Signed2> & rhs)
 {
-    return std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>>::_impl::operator_more(lhs, rhs);
+    return std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>>::_impl::operator_more(lhs, rhs);
 }
 template <typename Arithmetic, typename Arithmetic2, class>
 constexpr bool operator>(const Arithmetic & lhs, const Arithmetic2 & rhs)
@@ -1210,10 +1218,10 @@ constexpr bool operator>(const Arithmetic & lhs, const Arithmetic2 & rhs)
 }
 
 template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
-constexpr bool operator<=(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs)
+constexpr bool operator<=(const integer<Bits, Signed> & lhs, const integer<Bits2, Signed2> & rhs)
 {
-    return std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>>::_impl::operator_less(lhs, rhs)
-        || std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>>::_impl::operator_eq(lhs, rhs);
+    return std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>>::_impl::operator_less(lhs, rhs)
+        || std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>>::_impl::operator_eq(lhs, rhs);
 }
 template <typename Arithmetic, typename Arithmetic2, class>
 constexpr bool operator<=(const Arithmetic & lhs, const Arithmetic2 & rhs)
@@ -1222,10 +1230,10 @@ constexpr bool operator<=(const Arithmetic & lhs, const Arithmetic2 & rhs)
 }
 
 template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
-constexpr bool operator>=(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs)
+constexpr bool operator>=(const integer<Bits, Signed> & lhs, const integer<Bits2, Signed2> & rhs)
 {
-    return std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>>::_impl::operator_more(lhs, rhs)
-        || std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>>::_impl::operator_eq(lhs, rhs);
+    return std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>>::_impl::operator_more(lhs, rhs)
+        || std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>>::_impl::operator_eq(lhs, rhs);
 }
 template <typename Arithmetic, typename Arithmetic2, class>
 constexpr bool operator>=(const Arithmetic & lhs, const Arithmetic2 & rhs)
@@ -1234,9 +1242,9 @@ constexpr bool operator>=(const Arithmetic & lhs, const Arithmetic2 & rhs)
 }
 
 template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
-constexpr bool operator==(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs)
+constexpr bool operator==(const integer<Bits, Signed> & lhs, const integer<Bits2, Signed2> & rhs)
 {
-    return std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>>::_impl::operator_eq(lhs, rhs);
+    return std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>>::_impl::operator_eq(lhs, rhs);
 }
 template <typename Arithmetic, typename Arithmetic2, class>
 constexpr bool operator==(const Arithmetic & lhs, const Arithmetic2 & rhs)
@@ -1245,9 +1253,9 @@ constexpr bool operator==(const Arithmetic & lhs, const Arithmetic2 & rhs)
 }
 
 template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
-constexpr bool operator!=(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs)
+constexpr bool operator!=(const integer<Bits, Signed> & lhs, const integer<Bits2, Signed2> & rhs)
 {
-    return !std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>>::_impl::operator_eq(lhs, rhs);
+    return !std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>>::_impl::operator_eq(lhs, rhs);
 }
 template <typename Arithmetic, typename Arithmetic2, class>
 constexpr bool operator!=(const Arithmetic & lhs, const Arithmetic2 & rhs)
@@ -1255,35 +1263,17 @@ constexpr bool operator!=(const Arithmetic & lhs, const Arithmetic2 & rhs)
     return CT(lhs) != CT(rhs);
 }
 
-template <size_t Bits, typename Signed>
-inline std::string to_string(const wide_integer<Bits, Signed> & n)
-{
-    std::string res;
-    if (wide_integer<Bits, Signed>::_impl::operator_eq(n, 0U))
-        return "0";
+#undef CT
 
-    wide_integer<Bits, unsigned> t;
-    bool is_neg = wide_integer<Bits, Signed>::_impl::is_negative(n);
-    if (is_neg)
-        t = wide_integer<Bits, Signed>::_impl::operator_unary_minus(n);
-    else
-        t = n;
-
-    while (!wide_integer<Bits, unsigned>::_impl::operator_eq(t, 0U))
-    {
-        res.insert(res.begin(), '0' + char(wide_integer<Bits, unsigned>::_impl::operator_percent(t, 10U)));
-        t = wide_integer<Bits, unsigned>::_impl::operator_slash(t, 10U);
-    }
-
-    if (is_neg)
-        res.insert(res.begin(), '-');
-    return res;
 }
 
-template <size_t Bits, typename Signed>
-struct hash<wide_integer<Bits, Signed>>
+namespace std
 {
-    std::size_t operator()(const wide_integer<Bits, Signed> & lhs) const
+
+template <size_t Bits, typename Signed>
+struct hash<wide::integer<Bits, Signed>>
+{
+    std::size_t operator()(const wide::integer<Bits, Signed> & lhs) const
     {
         static_assert(Bits % (sizeof(size_t) * 8) == 0);
 
@@ -1293,9 +1283,8 @@ struct hash<wide_integer<Bits, Signed>>
         size_t res = 0;
         for (unsigned i = 0; i < count; ++i)
             res ^= ptr[i];
-        return hash<size_t>()(res);
+        return res;
     }
 };
 
-#undef CT
 }
diff --git a/base/common/wide_integer_to_string.h b/base/common/wide_integer_to_string.h
new file mode 100644
index 00000000000..9908ef4be7a
--- /dev/null
+++ b/base/common/wide_integer_to_string.h
@@ -0,0 +1,35 @@
+#pragma once
+
+#include <string>
+
+#include "wide_integer.h"
+
+namespace wide
+{
+
+template <size_t Bits, typename Signed>
+inline std::string to_string(const integer<Bits, Signed> & n)
+{
+    std::string res;
+    if (integer<Bits, Signed>::_impl::operator_eq(n, 0U))
+        return "0";
+
+    integer<Bits, unsigned> t;
+    bool is_neg = integer<Bits, Signed>::_impl::is_negative(n);
+    if (is_neg)
+        t = integer<Bits, Signed>::_impl::operator_unary_minus(n);
+    else
+        t = n;
+
+    while (!integer<Bits, unsigned>::_impl::operator_eq(t, 0U))
+    {
+        res.insert(res.begin(), '0' + char(integer<Bits, unsigned>::_impl::operator_percent(t, 10U)));
+        t = integer<Bits, unsigned>::_impl::operator_slash(t, 10U);
+    }
+
+    if (is_neg)
+        res.insert(res.begin(), '-');
+    return res;
+}
+
+}
diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h
index 3b9eced09bd..1f0fe095059 100644
--- a/src/IO/WriteHelpers.h
+++ b/src/IO/WriteHelpers.h
@@ -11,6 +11,7 @@
 #include <common/LocalDateTime.h>
 #include <common/find_symbols.h>
 #include <common/StringRef.h>
+#include <common/wide_integer_to_string.h>
 
 #include <Core/DecimalFunctions.h>
 #include <Core/Types.h>
@@ -42,6 +43,12 @@ namespace ErrorCodes
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+template <typename T>
+inline std::string bigintToString(const T & x)
+{
+    return to_string(x);
+}
+
 /// Helper functions for formatted and binary output.
 
 inline void writeChar(char x, WriteBuffer & buf)
diff --git a/tests/queries/0_stateless/01475_fix_bigint_shift.reference b/tests/queries/0_stateless/01475_fix_bigint_shift.reference
new file mode 100644
index 00000000000..c278273c760
--- /dev/null
+++ b/tests/queries/0_stateless/01475_fix_bigint_shift.reference
@@ -0,0 +1,2 @@
+-4
+-4
diff --git a/tests/queries/0_stateless/01475_fix_bigint_shift.sql b/tests/queries/0_stateless/01475_fix_bigint_shift.sql
new file mode 100644
index 00000000000..d16cdeca85d
--- /dev/null
+++ b/tests/queries/0_stateless/01475_fix_bigint_shift.sql
@@ -0,0 +1,2 @@
+SELECT bitShiftLeft(toInt64(-2), 1);
+SELECT bitShiftLeft(toInt256(-2), 1);

From 393c213866ff56ef98ae2f66ee50afebc9459319 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 14 Sep 2020 15:24:38 +0300
Subject: [PATCH 277/298] Revert changes

---
 docker/packager/deb/Dockerfile     |  9 ++++++++-
 docker/packager/deb/build.sh       | 19 -------------------
 docker/packager/unbundled/build.sh | 19 -------------------
 3 files changed, 8 insertions(+), 39 deletions(-)

diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile
index 9c24e9600eb..a3c87f13fe4 100644
--- a/docker/packager/deb/Dockerfile
+++ b/docker/packager/deb/Dockerfile
@@ -24,9 +24,16 @@ RUN apt-get update \
         software-properties-common \
         --yes --no-install-recommends
 
+# Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able
+# to compress files using pigz (https://zlib.net/pigz/) instead of gzip.
+# Significantly increase deb packaging speed and compatible with old systems
+RUN curl -O https://clickhouse-builds.s3.yandex.net/utils/1/dpkg-deb \
+    && chmod +x dpkg-deb \
+    && cp dpkg-deb /usr/bin
+
 ENV APACHE_PUBKEY_HASH="bba6987b63c63f710fd4ed476121c588bc3812e99659d27a855f8c4d312783ee66ad6adfce238765691b04d62fa3688f"
 
-RUN export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
+RUN  export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
     && wget -nv -O /tmp/arrow-keyring.deb "https://apache.bintray.com/arrow/ubuntu/apache-arrow-archive-keyring-latest-${CODENAME}.deb" \
     && echo "${APACHE_PUBKEY_HASH} /tmp/arrow-keyring.deb" | sha384sum -c \
     && dpkg -i /tmp/arrow-keyring.deb
diff --git a/docker/packager/deb/build.sh b/docker/packager/deb/build.sh
index 8ebf3caca59..fbaa0151c6b 100755
--- a/docker/packager/deb/build.sh
+++ b/docker/packager/deb/build.sh
@@ -2,25 +2,6 @@
 
 set -x -e
 
-# Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able
-# to compress files using pigz (https://zlib.net/pigz/) instead of gzip.
-# Significantly increase deb packaging speed and compatible with old systems
-
-counter=0
-until curl -O https://clickhouse-builds.s3.yandex.net/utils/1/dpkg-deb
-do
-    sleep 0.5
-    counter=$(($counter + 1))
-    echo "Cannot fetch better dpgk, retry $counter"
-    if [ "$counter" -gt 120 ]
-    then
-        echo "Cannot fetch busybox image all retries exceeded"
-        exit 1
-    fi
-done
-
-chmod +x dpkg-deb && cp dpkg-deb /usr/bin
-
 ccache --show-stats ||:
 ccache --zero-stats ||:
 build/release --no-pbuilder $ALIEN_PKGS | ts '%Y-%m-%d %H:%M:%S'
diff --git a/docker/packager/unbundled/build.sh b/docker/packager/unbundled/build.sh
index 77c27ce4a2c..ca1217ac522 100755
--- a/docker/packager/unbundled/build.sh
+++ b/docker/packager/unbundled/build.sh
@@ -2,25 +2,6 @@
 
 set -x -e
 
-# Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able
-# to compress files using pigz (https://zlib.net/pigz/) instead of gzip.
-# Significantly increase deb packaging speed and compatible with old systems
-
-counter=0
-until curl -O https://clickhouse-builds.s3.yandex.net/utils/1/dpkg-deb
-do
-    sleep 0.5
-    counter=$(($counter + 1))
-    echo "Cannot fetch better dpgk, retry $counter"
-    if [ "$counter" -gt 120 ]
-    then
-        echo "Cannot fetch busybox image all retries exceeded"
-        exit 1
-    fi
-done
-
-chmod +x dpkg-deb && cp dpkg-deb /usr/bin
-
 ccache --show-stats ||:
 ccache --zero-stats ||:
 build/release --no-pbuilder $ALIEN_PKGS | ts '%Y-%m-%d %H:%M:%S'

From 709b0f138a429531b67d9d0cec741e9573d26c9a Mon Sep 17 00:00:00 2001
From: filimonov <1549571+filimonov@users.noreply.github.com>
Date: Mon, 14 Sep 2020 14:55:40 +0200
Subject: [PATCH 278/298] Update clickhouse-benchmark.md

---
 docs/en/operations/utilities/clickhouse-benchmark.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/operations/utilities/clickhouse-benchmark.md b/docs/en/operations/utilities/clickhouse-benchmark.md
index ab67ca197dd..f948630b7bb 100644
--- a/docs/en/operations/utilities/clickhouse-benchmark.md
+++ b/docs/en/operations/utilities/clickhouse-benchmark.md
@@ -38,7 +38,7 @@ clickhouse-benchmark [keys] < queries_file
 -   `-d N`, `--delay=N` — Interval in seconds between intermediate reports (set 0 to disable reports). Default value: 1.
 -   `-h WORD`, `--host=WORD` — Server host. Default value: `localhost`. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-h` keys.
 -   `-p N`, `--port=N` — Server port. Default value: 9000. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-p` keys.
--   `-i N`, `--iterations=N` — Total number of queries. Default value: 0.
+-   `-i N`, `--iterations=N` — Total number of queries. Default value: 0 (repeat forever).
 -   `-r`, `--randomize` — Random order of queries execution if there is more then one input query.
 -   `-s`, `--secure` — Using TLS connection.
 -   `-t N`, `--timelimit=N` — Time limit in seconds. `clickhouse-benchmark` stops sending queries when the specified time limit is reached. Default value: 0 (time limit disabled).

From b9666979391f1a69ac701e167759823c758b9ef5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 14 Sep 2020 16:31:30 +0300
Subject: [PATCH 279/298] Style

---
 src/Common/remapExecutable.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/remapExecutable.cpp b/src/Common/remapExecutable.cpp
index 6b86e8717a4..3e9d1ebc710 100644
--- a/src/Common/remapExecutable.cpp
+++ b/src/Common/remapExecutable.cpp
@@ -74,7 +74,7 @@ std::pair<void *, size_t> getMappedArea(void * ptr)
 
 __attribute__((__noinline__)) int64_t our_syscall(...)
 {
-     __asm__ __volatile__ (R"(
+    __asm__ __volatile__ (R"(
         movq %%rdi,%%rax;
         movq %%rsi,%%rdi;
         movq %%rdx,%%rsi;

From 882206c0b6fd2acb59ee730ff0b18b01db674b93 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Mon, 14 Sep 2020 16:45:32 +0300
Subject: [PATCH 280/298] Update InterpreterInsertQuery.cpp

---
 src/Interpreters/InterpreterInsertQuery.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp
index 01fee30a445..5985a7fb7d6 100644
--- a/src/Interpreters/InterpreterInsertQuery.cpp
+++ b/src/Interpreters/InterpreterInsertQuery.cpp
@@ -93,6 +93,8 @@ Block InterpreterInsertQuery::getSampleBlock(
     }
 
     Block table_sample = metadata_snapshot->getSampleBlock();
+
+    /// Process column transformers (e.g. * EXCEPT(a)), asterisks and qualified columns.
     const auto & columns = metadata_snapshot->getColumns();
     auto names_and_types = columns.getOrdinary();
     removeDuplicateColumns(names_and_types);

From 9d9b579291e51df4eb357a9ff742b07d5b8c582b Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Mon, 14 Sep 2020 16:48:18 +0300
Subject: [PATCH 281/298] Update CHANGELOG.md

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f3266520eb1..23107434024 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,7 +18,7 @@
 * Add function `defaultValueOfTypeName` that returns the default value for a given type. [#13877](https://github.com/ClickHouse/ClickHouse/pull/13877) ([hcz](https://github.com/hczhcz)).
 * Add `quantileExactLow` and `quantileExactHigh` implementations with respective aliases for `medianExactLow` and `medianExactHigh`. [#13818](https://github.com/ClickHouse/ClickHouse/pull/13818) ([Bharat Nallan](https://github.com/bharatnc)).
 * Add function `normalizeQuery` that replaces literals, sequences of literals and complex aliases with placeholders. Add function `normalizedQueryHash` that returns identical 64bit hash values for similar queries. It helps to analyze query log. This closes [#11271](https://github.com/ClickHouse/ClickHouse/issues/11271). [#13816](https://github.com/ClickHouse/ClickHouse/pull/13816) ([alexey-milovidov](https://github.com/alexey-milovidov)).
-* Add new optional section <user_directories> to the main config. [#13425](https://github.com/ClickHouse/ClickHouse/pull/13425) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Add new optional section `<user_directories>` to the main config. [#13425](https://github.com/ClickHouse/ClickHouse/pull/13425) ([Vitaly Baranov](https://github.com/vitlibar)).
 * Add `ALTER SAMPLE BY` statement that allows to change table sample clause. [#13280](https://github.com/ClickHouse/ClickHouse/pull/13280) ([Amos Bird](https://github.com/amosbird)).
 * Function `position` now supports optional `start_pos` argument. [#13237](https://github.com/ClickHouse/ClickHouse/pull/13237) ([vdimir](https://github.com/vdimir)).
 * Add types `Int128`, `Int256`, `UInt256` and related functions for them. Extend Decimals with Decimal256 (precision up to 76 digits). New types are under the setting `allow_experimental_bigint_types`. [#13097](https://github.com/ClickHouse/ClickHouse/pull/13097) ([Artem Zuikov](https://github.com/4ertus2)).

From b5620018ca38a0c6a65a617dc9511b87afd3ce79 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 14 Sep 2020 17:02:10 +0300
Subject: [PATCH 282/298] Fix for #14761

---
 src/Functions/GatherUtils/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Functions/GatherUtils/CMakeLists.txt b/src/Functions/GatherUtils/CMakeLists.txt
index f941091667e..b9b9a981110 100644
--- a/src/Functions/GatherUtils/CMakeLists.txt
+++ b/src/Functions/GatherUtils/CMakeLists.txt
@@ -3,8 +3,8 @@ add_headers_and_sources(clickhouse_functions_gatherutils .)
 add_library(clickhouse_functions_gatherutils ${clickhouse_functions_gatherutils_sources} ${clickhouse_functions_gatherutils_headers})
 target_link_libraries(clickhouse_functions_gatherutils PRIVATE dbms)
 
-check_cxx_compiler_flag(suggest-override HAS_SUGGEST_OVERRIDE)
-check_cxx_compiler_flag(suggest-destructor-override HAS_SUGGEST_DESTRUCTOR_OVERRIDE)
+check_cxx_compiler_flag("-Wsuggest-override" HAS_SUGGEST_OVERRIDE)
+check_cxx_compiler_flag("-Wsuggest-destructor-override" HAS_SUGGEST_DESTRUCTOR_OVERRIDE)
 
 if (HAS_SUGGEST_OVERRIDE)
     target_compile_definitions(clickhouse_functions_gatherutils PRIVATE HAS_SUGGEST_OVERRIDE)

From fae21d7d098b32d5298fb4d72144d2032b0564c8 Mon Sep 17 00:00:00 2001
From: Ivan Blinkov <github@blinkov.ru>
Date: Mon, 14 Sep 2020 18:27:15 +0300
Subject: [PATCH 283/298] Update event links

---
 README.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index f14f2e88886..f1c8e17086b 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,7 @@ ClickHouse is an open-source column-oriented database management system that all
 * [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.
 * You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person.
 
-## Upcoming Events		
+## Upcoming Events
 
-* [ClickHouse talk at Ya.Subbotnik (in Russian)](https://ya.cc/t/cIBI-3yECj5JF) on September 12, 2020.
+* [eBay migrating from Druid](https://us02web.zoom.us/webinar/register/tZMkfu6rpjItHtaQ1DXcgPWcSOnmM73HLGKL) on September 23, 2020.
+* [ClickHouse for Edge Analytics](https://ones2020.sched.com/event/bWPs) on September 29, 2020.

From 67b49862d939758d3c5e7ef72475deb9172a3e28 Mon Sep 17 00:00:00 2001
From: Ildus Kurbangaliev <i.kurbangaliev@gmail.com>
Date: Mon, 14 Sep 2020 18:11:50 +0200
Subject: [PATCH 284/298] Add mapPopulateSeries function (#13166)

---
 .../functions/tuple-map-functions.md          |  22 ++
 src/Functions/array/mapPopulateSeries.cpp     | 312 ++++++++++++++++++
 .../array/registerFunctionsArray.cpp          |   2 +
 src/Functions/ya.make                         |   1 +
 .../01318_map_populate_series.reference       |  49 +++
 .../0_stateless/01318_map_populate_series.sql |  36 ++
 6 files changed, 422 insertions(+)
 create mode 100644 src/Functions/array/mapPopulateSeries.cpp
 create mode 100644 tests/queries/0_stateless/01318_map_populate_series.reference
 create mode 100644 tests/queries/0_stateless/01318_map_populate_series.sql

diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md
index 343f45135eb..f826b810d23 100644
--- a/docs/en/sql-reference/functions/tuple-map-functions.md
+++ b/docs/en/sql-reference/functions/tuple-map-functions.md
@@ -46,3 +46,25 @@ SELECT mapSubtract(([toUInt8(1), 2], [toInt32(1), 1]), ([toUInt8(1), 2], [toInt3
 │ ([1,2],[-1,0]) │ Tuple(Array(UInt8), Array(Int64)) │
 └────────────────┴───────────────────────────────────┘
 ````
+
+## mapPopulateSeries {#function-mappopulateseries}
+
+Syntax: `mapPopulateSeries((keys : Array(<IntegerType>), values : Array(<IntegerType>)[, max : <IntegerType>])`
+
+Generates a map, where keys are a series of numbers, from minimum to maximum keys (or `max` argument if it specified) taken from `keys` array with step size of one,
+and corresponding values taken from `values` array. If the value is not specified for the key, then it uses default value in the resulting map.
+For repeated keys only the first value (in order of appearing) gets associated with the key.
+
+The number of elements in `keys` and `values` must be the same for each row.
+
+Returns a tuple of two arrays: keys in sorted order, and values the corresponding keys.
+
+``` sql
+select mapPopulateSeries([1,2,4], [11,22,44], 5) as res, toTypeName(res) as type;
+```
+
+``` text
+┌─res──────────────────────────┬─type──────────────────────────────┐
+│ ([1,2,3,4,5],[11,22,0,44,0]) │ Tuple(Array(UInt8), Array(UInt8)) │
+└──────────────────────────────┴───────────────────────────────────┘
+```
diff --git a/src/Functions/array/mapPopulateSeries.cpp b/src/Functions/array/mapPopulateSeries.cpp
new file mode 100644
index 00000000000..19c48616a8b
--- /dev/null
+++ b/src/Functions/array/mapPopulateSeries.cpp
@@ -0,0 +1,312 @@
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnTuple.h>
+#include <Columns/ColumnVector.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/IFunction.h>
+#include "Core/ColumnWithTypeAndName.h"
+#include "DataTypes/IDataType.h"
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_COLUMN;
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+
+class FunctionMapPopulateSeries : public IFunction
+{
+public:
+    static constexpr auto name = "mapPopulateSeries";
+    static FunctionPtr create(const Context &) { return std::make_shared<FunctionMapPopulateSeries>(); }
+
+private:
+    String getName() const override { return name; }
+
+    size_t getNumberOfArguments() const override { return 0; }
+    bool isVariadic() const override { return true; }
+    bool useDefaultImplementationForConstants() const override { return true; }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        if (arguments.size() < 2)
+            throw Exception{getName() + " accepts at least two arrays for key and value", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
+
+        if (arguments.size() > 3)
+            throw Exception{"too many arguments in " + getName() + " call", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
+
+        const DataTypeArray * key_array_type = checkAndGetDataType<DataTypeArray>(arguments[0].get());
+        const DataTypeArray * val_array_type = checkAndGetDataType<DataTypeArray>(arguments[1].get());
+
+        if (!key_array_type || !val_array_type)
+            throw Exception{getName() + " accepts two arrays for key and value", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
+
+        DataTypePtr keys_type = key_array_type->getNestedType();
+        WhichDataType which_key(keys_type);
+        if (!(which_key.isNativeInt() || which_key.isNativeUInt()))
+        {
+            throw Exception(
+                "Keys for " + getName() + " should be of native integer type (signed or unsigned)", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        }
+
+        if (arguments.size() == 3)
+        {
+            DataTypePtr max_key_type = arguments[2];
+            WhichDataType which_max_key(max_key_type);
+
+            if (which_max_key.isNullable())
+                throw Exception(
+                    "Max key argument in arguments of function " + getName() + " can not be Nullable",
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+            if (keys_type->getTypeId() != max_key_type->getTypeId())
+                throw Exception("Max key type in " + getName() + " should be same as keys type", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        }
+
+        return std::make_shared<DataTypeTuple>(DataTypes{arguments[0], arguments[1]});
+    }
+
+    template <typename KeyType, typename ValType>
+    void execute2(
+        Block & block, size_t result, ColumnPtr key_column, ColumnPtr val_column, ColumnPtr max_key_column, const DataTypeTuple & res_type)
+        const
+    {
+        MutableColumnPtr res_tuple = res_type.createColumn();
+
+        auto * to_tuple = assert_cast<ColumnTuple *>(res_tuple.get());
+        auto & to_keys_arr = assert_cast<ColumnArray &>(to_tuple->getColumn(0));
+        auto & to_keys_data = to_keys_arr.getData();
+        auto & to_keys_offsets = to_keys_arr.getOffsets();
+
+        auto & to_vals_arr = assert_cast<ColumnArray &>(to_tuple->getColumn(1));
+        auto & to_values_data = to_vals_arr.getData();
+
+        bool max_key_is_const = false, key_is_const = false, val_is_const = false;
+
+        const auto * keys_array = checkAndGetColumn<ColumnArray>(key_column.get());
+        if (!keys_array)
+        {
+            const ColumnConst * const_array = checkAndGetColumnConst<ColumnArray>(key_column.get());
+            if (!const_array)
+                throw Exception("Expected array column, found " + key_column->getName(), ErrorCodes::ILLEGAL_COLUMN);
+
+            keys_array = checkAndGetColumn<ColumnArray>(const_array->getDataColumnPtr().get());
+            key_is_const = true;
+        }
+
+        const auto * values_array = checkAndGetColumn<ColumnArray>(val_column.get());
+        if (!values_array)
+        {
+            const ColumnConst * const_array = checkAndGetColumnConst<ColumnArray>(val_column.get());
+            if (!const_array)
+                throw Exception("Expected array column, found " + val_column->getName(), ErrorCodes::ILLEGAL_COLUMN);
+
+            values_array = checkAndGetColumn<ColumnArray>(const_array->getDataColumnPtr().get());
+            val_is_const = true;
+        }
+
+        if (!keys_array || !values_array)
+            /* something went wrong */
+            throw Exception{"Illegal columns in arguments of function " + getName(), ErrorCodes::ILLEGAL_COLUMN};
+
+        KeyType max_key_const{0};
+
+        if (max_key_column && isColumnConst(*max_key_column))
+        {
+            const auto * column_const = static_cast<const ColumnConst *>(&*max_key_column);
+            max_key_const = column_const->template getValue<KeyType>();
+            max_key_is_const = true;
+        }
+
+        auto & keys_data = assert_cast<const ColumnVector<KeyType> &>(keys_array->getData()).getData();
+        auto & values_data = assert_cast<const ColumnVector<ValType> &>(values_array->getData()).getData();
+
+        // Original offsets
+        const IColumn::Offsets & key_offsets = keys_array->getOffsets();
+        const IColumn::Offsets & val_offsets = values_array->getOffsets();
+
+        IColumn::Offset offset{0};
+        size_t row_count = key_is_const ? values_array->size() : keys_array->size();
+
+        std::map<KeyType, ValType> res_map;
+
+        //Iterate through two arrays and fill result values.
+        for (size_t row = 0; row < row_count; ++row)
+        {
+            size_t key_offset = 0, val_offset = 0, array_size = key_offsets[0], val_array_size = val_offsets[0];
+
+            res_map.clear();
+
+            if (!key_is_const)
+            {
+                key_offset = row > 0 ? key_offsets[row - 1] : 0;
+                array_size = key_offsets[row] - key_offset;
+            }
+
+            if (!val_is_const)
+            {
+                val_offset = row > 0 ? val_offsets[row - 1] : 0;
+                val_array_size = val_offsets[row] - val_offset;
+            }
+
+            if (array_size != val_array_size)
+                throw Exception("Key and value array should have same amount of elements", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+
+            if (array_size == 0)
+            {
+                to_keys_offsets.push_back(offset);
+                continue;
+            }
+
+            for (size_t i = 0; i < array_size; ++i)
+            {
+                res_map.insert({keys_data[key_offset + i], values_data[val_offset + i]});
+            }
+
+            auto min_key = res_map.begin()->first;
+            auto max_key = res_map.rbegin()->first;
+
+            if (max_key_column)
+            {
+                /* update the current max key if it's not constant */
+                if (max_key_is_const)
+                {
+                    max_key = max_key_const;
+                }
+                else
+                {
+                    max_key = (static_cast<const ColumnVector<KeyType> *>(max_key_column.get()))->getData()[row];
+                }
+
+                /* no need to add anything, max key is less that first key */
+                if (max_key < min_key)
+                {
+                    to_keys_offsets.push_back(offset);
+                    continue;
+                }
+            }
+
+            /* fill the result arrays */
+            KeyType key;
+            for (key = min_key; key <= max_key; ++key)
+            {
+                to_keys_data.insert(key);
+
+                auto it = res_map.find(key);
+                if (it != res_map.end())
+                {
+                    to_values_data.insert(it->second);
+                }
+                else
+                {
+                    to_values_data.insertDefault();
+                }
+
+                ++offset;
+            }
+
+            to_keys_offsets.push_back(offset);
+        }
+
+        to_vals_arr.getOffsets().insert(to_keys_offsets.begin(), to_keys_offsets.end());
+        block.getByPosition(result).column = std::move(res_tuple);
+    }
+
+    template <typename KeyType>
+    void execute1(
+        Block & block, size_t result, ColumnPtr key_column, ColumnPtr val_column, ColumnPtr max_key_column, const DataTypeTuple & res_type)
+        const
+    {
+        const auto & val_type = (assert_cast<const DataTypeArray *>(res_type.getElements()[1].get()))->getNestedType();
+        switch (val_type->getTypeId())
+        {
+            case TypeIndex::Int8:
+                execute2<KeyType, Int8>(block, result, key_column, val_column, max_key_column, res_type);
+                break;
+            case TypeIndex::Int16:
+                execute2<KeyType, Int16>(block, result, key_column, val_column, max_key_column, res_type);
+                break;
+            case TypeIndex::Int32:
+                execute2<KeyType, Int32>(block, result, key_column, val_column, max_key_column, res_type);
+                break;
+            case TypeIndex::Int64:
+                execute2<KeyType, Int64>(block, result, key_column, val_column, max_key_column, res_type);
+                break;
+            case TypeIndex::UInt8:
+                execute2<KeyType, UInt8>(block, result, key_column, val_column, max_key_column, res_type);
+                break;
+            case TypeIndex::UInt16:
+                execute2<KeyType, UInt16>(block, result, key_column, val_column, max_key_column, res_type);
+                break;
+            case TypeIndex::UInt32:
+                execute2<KeyType, UInt32>(block, result, key_column, val_column, max_key_column, res_type);
+                break;
+            case TypeIndex::UInt64:
+                execute2<KeyType, UInt64>(block, result, key_column, val_column, max_key_column, res_type);
+                break;
+            default:
+                throw Exception{"Illegal columns in arguments of function " + getName(), ErrorCodes::ILLEGAL_COLUMN};
+        }
+    }
+
+    void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t) const override
+    {
+        auto col1 = block.safeGetByPosition(arguments[0]), col2 = block.safeGetByPosition(arguments[1]);
+
+        const auto * k = assert_cast<const DataTypeArray *>(col1.type.get());
+        const auto * v = assert_cast<const DataTypeArray *>(col2.type.get());
+
+        /* determine output type */
+        const DataTypeTuple & res_type = DataTypeTuple(
+            DataTypes{std::make_shared<DataTypeArray>(k->getNestedType()), std::make_shared<DataTypeArray>(v->getNestedType())});
+
+        ColumnPtr max_key_column = nullptr;
+
+        if (arguments.size() == 3)
+        {
+            /* max key provided */
+            max_key_column = block.safeGetByPosition(arguments[2]).column;
+        }
+
+        switch (k->getNestedType()->getTypeId())
+        {
+            case TypeIndex::Int8:
+                execute1<Int8>(block, result, col1.column, col2.column, max_key_column, res_type);
+                break;
+            case TypeIndex::Int16:
+                execute1<Int16>(block, result, col1.column, col2.column, max_key_column, res_type);
+                break;
+            case TypeIndex::Int32:
+                execute1<Int32>(block, result, col1.column, col2.column, max_key_column, res_type);
+                break;
+            case TypeIndex::Int64:
+                execute1<Int64>(block, result, col1.column, col2.column, max_key_column, res_type);
+                break;
+            case TypeIndex::UInt8:
+                execute1<UInt8>(block, result, col1.column, col2.column, max_key_column, res_type);
+                break;
+            case TypeIndex::UInt16:
+                execute1<UInt16>(block, result, col1.column, col2.column, max_key_column, res_type);
+                break;
+            case TypeIndex::UInt32:
+                execute1<UInt32>(block, result, col1.column, col2.column, max_key_column, res_type);
+                break;
+            case TypeIndex::UInt64:
+                execute1<UInt64>(block, result, col1.column, col2.column, max_key_column, res_type);
+                break;
+            default:
+                throw Exception{"Illegal columns in arguments of function " + getName(), ErrorCodes::ILLEGAL_COLUMN};
+        }
+    }
+};
+
+void registerFunctionMapPopulateSeries(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionMapPopulateSeries>();
+}
+
+}
diff --git a/src/Functions/array/registerFunctionsArray.cpp b/src/Functions/array/registerFunctionsArray.cpp
index d10b65f77fd..3bb27cbadf9 100644
--- a/src/Functions/array/registerFunctionsArray.cpp
+++ b/src/Functions/array/registerFunctionsArray.cpp
@@ -36,6 +36,7 @@ void registerFunctionArrayZip(FunctionFactory &);
 void registerFunctionArrayAUC(FunctionFactory &);
 void registerFunctionArrayReduceInRanges(FunctionFactory &);
 void registerFunctionMapOp(FunctionFactory &);
+void registerFunctionMapPopulateSeries(FunctionFactory &);
 
 void registerFunctionsArray(FunctionFactory & factory)
 {
@@ -73,6 +74,7 @@ void registerFunctionsArray(FunctionFactory & factory)
     registerFunctionArrayZip(factory);
     registerFunctionArrayAUC(factory);
     registerFunctionMapOp(factory);
+    registerFunctionMapPopulateSeries(factory);
 }
 
 }
diff --git a/src/Functions/ya.make b/src/Functions/ya.make
index f48b4d607ed..388b140bf11 100644
--- a/src/Functions/ya.make
+++ b/src/Functions/ya.make
@@ -99,6 +99,7 @@ SRCS(
     array/indexOf.cpp
     array/length.cpp
     array/mapOp.cpp
+    array/mapPopulateSeries.cpp
     array/range.cpp
     array/registerFunctionsArray.cpp
     asin.cpp
diff --git a/tests/queries/0_stateless/01318_map_populate_series.reference b/tests/queries/0_stateless/01318_map_populate_series.reference
new file mode 100644
index 00000000000..2d83844c8e1
--- /dev/null
+++ b/tests/queries/0_stateless/01318_map_populate_series.reference
@@ -0,0 +1,49 @@
+([1],[1])
+([1,2],[1,2])
+([1,2,3],[1,0,2])
+([1,2,3,4],[1,0,0,2])
+([1,2,3,4,5],[1,0,0,0,2])
+([1,2,3],[1,0,0])
+([1,2,3],[1,2,0])
+([1,2,3],[1,0,2])
+([1,2,3],[1,0,0])
+([1,2,3],[1,0,0])
+([1,2,3,4,5,6,7,8,9,10],[1,0,0,0,0,0,0,0,0,0])
+([1,2,3,4,5,6,7,8,9,10],[1,2,0,0,0,0,0,0,0,0])
+([1,2,3,4,5,6,7,8,9,10],[1,0,2,0,0,0,0,0,0,0])
+([1,2,3,4,5,6,7,8,9,10],[1,0,0,2,0,0,0,0,0,0])
+([1,2,3,4,5,6,7,8,9,10],[1,0,0,0,2,0,0,0,0,0])
+([1,2],[1,0])
+([1,2,3],[1,2,0])
+([1,2,3,4],[1,0,2,0])
+([1,2,3,4,5],[1,0,0,2,0])
+([1,2,3,4,5,6],[1,0,0,0,2,0])
+([1],[11])
+([1,2],[11,22])
+([1,2,3],[11,0,22])
+([1,2,3,4],[11,0,0,22])
+([1,2,3,4,5],[11,0,0,0,22])
+([3,4],[1,2])
+([3,4],[1,2])
+([3,4],[1,2])
+([3,4],[1,2])
+([3,4],[1,2])
+([],[])
+([3],[1])
+([3,4],[1,2])
+([3,4,5],[1,2,0])
+([3,4,5,6],[1,2,0,0])
+([1,2],[1,1])	Tuple(Array(UInt8), Array(UInt8))
+([1,2],[1,1])	Tuple(Array(UInt16), Array(UInt16))
+([1,2],[1,1])	Tuple(Array(UInt32), Array(UInt32))
+([1,2],[1,1])	Tuple(Array(UInt64), Array(UInt64))
+([1,2],[1,1])	Tuple(Array(Int16), Array(Int16))
+([1,2],[1,1])	Tuple(Array(Int16), Array(Int16))
+([1,2],[1,1])	Tuple(Array(Int32), Array(Int32))
+([1,2],[1,1])	Tuple(Array(Int64), Array(Int64))
+([-10,-9,-8,-7,-6,-5,-4,-3,-2,-1,0,1,2],[1,0,0,0,0,0,0,0,0,0,0,0,1])	Tuple(Array(Int16), Array(Int16))
+([-10,-9,-8,-7,-6,-5,-4,-3,-2,-1,0,1,2],[1,0,0,0,0,0,0,0,0,0,0,0,1])	Tuple(Array(Int16), Array(Int16))
+([-10,-9,-8,-7,-6,-5,-4,-3,-2,-1,0,1,2],[1,0,0,0,0,0,0,0,0,0,0,0,1])	Tuple(Array(Int32), Array(Int32))
+([-10,-9,-8,-7,-6,-5,-4,-3,-2,-1,0,1,2],[1,0,0,0,0,0,0,0,0,0,0,0,1])	Tuple(Array(Int64), Array(Int64))
+([-10,-9,-8,-7,-6,-5],[1,0,0,0,0,0])	Tuple(Array(Int64), Array(Int64))
+([],[])
diff --git a/tests/queries/0_stateless/01318_map_populate_series.sql b/tests/queries/0_stateless/01318_map_populate_series.sql
new file mode 100644
index 00000000000..e52571182fe
--- /dev/null
+++ b/tests/queries/0_stateless/01318_map_populate_series.sql
@@ -0,0 +1,36 @@
+drop table if exists map_test;
+create table map_test engine=TinyLog() as (select (number + 1) as n, ([1, number], [1,2]) as map from numbers(1, 5));
+
+select mapPopulateSeries(map.1, map.2) from map_test;
+select mapPopulateSeries(map.1, map.2, toUInt64(3)) from map_test;
+select mapPopulateSeries(map.1, map.2, toUInt64(10)) from map_test;
+select mapPopulateSeries(map.1, map.2, 1000) from map_test; -- { serverError 43 }
+select mapPopulateSeries(map.1, map.2, n) from map_test;
+select mapPopulateSeries(map.1, [11,22]) from map_test;
+select mapPopulateSeries([3, 4], map.2) from map_test;
+select mapPopulateSeries([toUInt64(3), 4], map.2, n) from map_test;
+
+drop table map_test;
+
+select mapPopulateSeries([toUInt8(1), 2], [toUInt8(1), 1]) as res, toTypeName(res);
+select mapPopulateSeries([toUInt16(1), 2], [toUInt16(1), 1]) as res, toTypeName(res);
+select mapPopulateSeries([toUInt32(1), 2], [toUInt32(1), 1]) as res, toTypeName(res);
+select mapPopulateSeries([toUInt64(1), 2], [toUInt64(1), 1]) as res, toTypeName(res);
+
+select mapPopulateSeries([toInt8(1), 2], [toInt8(1), 1]) as res, toTypeName(res);
+select mapPopulateSeries([toInt16(1), 2], [toInt16(1), 1]) as res, toTypeName(res);
+select mapPopulateSeries([toInt32(1), 2], [toInt32(1), 1]) as res, toTypeName(res);
+select mapPopulateSeries([toInt64(1), 2], [toInt64(1), 1]) as res, toTypeName(res);
+
+select mapPopulateSeries([toInt8(-10), 2], [toInt8(1), 1]) as res, toTypeName(res);
+select mapPopulateSeries([toInt16(-10), 2], [toInt16(1), 1]) as res, toTypeName(res);
+select mapPopulateSeries([toInt32(-10), 2], [toInt32(1), 1]) as res, toTypeName(res);
+select mapPopulateSeries([toInt64(-10), 2], [toInt64(1), 1]) as res, toTypeName(res);
+select mapPopulateSeries([toInt64(-10), 2], [toInt64(1), 1], toInt64(-5)) as res, toTypeName(res);
+
+-- empty
+select mapPopulateSeries(cast([], 'Array(UInt8)'), cast([], 'Array(UInt8)'), 5);
+
+select mapPopulateSeries(['1', '2'], [1,1]) as res, toTypeName(res); -- { serverError 43 }
+select mapPopulateSeries([1, 2, 3], [1,1]) as res, toTypeName(res); -- { serverError 42 }
+select mapPopulateSeries([1, 2], [1,1,1]) as res, toTypeName(res); -- { serverError 42 }

From b3138605c7964b621ac26855c7c4a070f533d186 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Mon, 14 Sep 2020 19:16:38 +0300
Subject: [PATCH 285/298] performance comparison

---
 docker/test/performance-comparison/compare.sh | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh
index 08f4cb599ab..32ea74193b0 100755
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@@ -927,13 +927,15 @@ done
 
 function report_metrics
 {
+build_log_column_definitions
+
 rm -rf metrics ||:
 mkdir metrics
 
 clickhouse-local --query "
 create view right_async_metric_log as
     select * from file('right-async-metric-log.tsv', TSVWithNamesAndTypes,
-        'event_date Date, event_time DateTime, name String, value Float64')
+        '$(cat right-async-metric-log.tsv.columns)')
     ;
 
 -- Use the right log as time reference because it may have higher precision.
@@ -942,7 +944,7 @@ create table metrics engine File(TSV, 'metrics/metrics.tsv') as
     select name metric, r.event_time - min_time event_time, l.value as left, r.value as right
     from right_async_metric_log r
     asof join file('left-async-metric-log.tsv', TSVWithNamesAndTypes,
-        'event_date Date, event_time DateTime, name String, value Float64') l
+        '$(cat left-async-metric-log.tsv.columns)') l
     on l.name = r.name and r.event_time <= l.event_time
     order by metric, event_time
     ;

From 038ae021265150d353f84373fced4e5f98ecdd17 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 14 Sep 2020 20:24:55 +0300
Subject: [PATCH 286/298] Fix for #14761, part 2

---
 src/Functions/GatherUtils/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Functions/GatherUtils/CMakeLists.txt b/src/Functions/GatherUtils/CMakeLists.txt
index b9b9a981110..7f946931c9a 100644
--- a/src/Functions/GatherUtils/CMakeLists.txt
+++ b/src/Functions/GatherUtils/CMakeLists.txt
@@ -7,11 +7,11 @@ check_cxx_compiler_flag("-Wsuggest-override" HAS_SUGGEST_OVERRIDE)
 check_cxx_compiler_flag("-Wsuggest-destructor-override" HAS_SUGGEST_DESTRUCTOR_OVERRIDE)
 
 if (HAS_SUGGEST_OVERRIDE)
-    target_compile_definitions(clickhouse_functions_gatherutils PRIVATE HAS_SUGGEST_OVERRIDE)
+    target_compile_definitions(clickhouse_functions_gatherutils PUBLIC HAS_SUGGEST_OVERRIDE)
 endif()
 
 if (HAS_SUGGEST_DESTRUCTOR_OVERRIDE)
-    target_compile_definitions(clickhouse_functions_gatherutils PRIVATE HAS_SUGGEST_DESTRUCTOR_OVERRIDE)
+    target_compile_definitions(clickhouse_functions_gatherutils PUBLIC HAS_SUGGEST_DESTRUCTOR_OVERRIDE)
 endif()
 
 if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)

From f9361acb88440abb1cebcf31868ccb43cb312629 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 14 Sep 2020 21:00:45 +0300
Subject: [PATCH 287/298] Disable in shared libraries mode

---
 src/CMakeLists.txt             | 4 ++++
 src/Common/remapExecutable.cpp | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 843dd8c2615..b6e8c395b26 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -117,6 +117,10 @@ endif ()
 
 add_library(clickhouse_common_io ${clickhouse_common_io_headers} ${clickhouse_common_io_sources})
 
+if (SPLIT_SHARED_LIBRARIES)
+    target_compile_definitions(clickhouse_common_io PRIVATE SPLIT_SHARED_LIBRARIES)
+endif ()
+
 add_library (clickhouse_malloc OBJECT Common/malloc.cpp)
 set_source_files_properties(Common/malloc.cpp PROPERTIES COMPILE_FLAGS "-fno-builtin")
 
diff --git a/src/Common/remapExecutable.cpp b/src/Common/remapExecutable.cpp
index 3e9d1ebc710..97e0823deda 100644
--- a/src/Common/remapExecutable.cpp
+++ b/src/Common/remapExecutable.cpp
@@ -1,4 +1,4 @@
-#if defined(__linux__) && defined(__amd64__) && defined(__SSE2__) && !defined(SANITIZER) && defined(NDEBUG)
+#if defined(__linux__) && defined(__amd64__) && defined(__SSE2__) && !defined(SANITIZER) && defined(NDEBUG) && !defined(SPLIT_SHARED_LIBRARIES)
 
 #include <sys/mman.h>
 #include <unistd.h>

From 2ac88ab47d1d62a0e6fb8605dfa70b42b52232d2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 14 Sep 2020 21:08:09 +0300
Subject: [PATCH 288/298] Added config option

---
 programs/server/Server.cpp | 8 +++++---
 programs/server/config.xml | 3 +++
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 8149623ffce..56778b8dd69 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -306,13 +306,15 @@ int Server::main(const std::vector<std::string> & /*args*/)
 
     /// After full config loaded
     {
-        if (config().getBool("mlock_executable", false))
+        if (config().getBool("remap_executable", false))
         {
             LOG_DEBUG(log, "Will remap executable in memory.");
             remapExecutable();
             LOG_DEBUG(log, "The code in memory has been successfully remapped.");
+        }
 
-/*
+        if (config().getBool("mlock_executable", false))
+        {
             if (hasLinuxCapability(CAP_IPC_LOCK))
             {
                 LOG_TRACE(log, "Will mlockall to prevent executable memory from being paged out. It may take a few seconds.");
@@ -327,7 +329,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
                     " It could happen due to incorrect ClickHouse package installation."
                     " You could resolve the problem manually with 'sudo setcap cap_ipc_lock=+ep {}'."
                     " Note that it will not work on 'nosuid' mounted filesystems.", executable_path);
-            }*/
+            }
         }
     }
 #endif
diff --git a/programs/server/config.xml b/programs/server/config.xml
index 3d7ebf0cd96..77b59abd891 100644
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@@ -302,6 +302,9 @@
     -->
     <mlock_executable>true</mlock_executable>
 
+    <!-- Reallocate memory for machine code ("text") using huge pages. Highly experimental. -->
+    <remap_executable>false</remap_executable>
+
     <!-- Configuration of clusters that could be used in Distributed tables.
          https://clickhouse.tech/docs/en/operations/table_engines/distributed/
       -->

From 3530f83c0155a8cfc96da146a23515709db8bb67 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 14 Sep 2020 21:12:09 +0300
Subject: [PATCH 289/298] Remove debug output

---
 src/Common/remapExecutable.cpp | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/src/Common/remapExecutable.cpp b/src/Common/remapExecutable.cpp
index 97e0823deda..13bce459022 100644
--- a/src/Common/remapExecutable.cpp
+++ b/src/Common/remapExecutable.cpp
@@ -115,26 +115,19 @@ __attribute__((__noinline__)) void remapToHugeStep2(void * begin, size_t size, v
     int64_t offset = reinterpret_cast<intptr_t>(scratch) - reinterpret_cast<intptr_t>(begin);
     int64_t (*syscall_func)(...) = reinterpret_cast<int64_t (*)(...)>(reinterpret_cast<intptr_t>(our_syscall) + offset);
 
-    //char dot = '.';
-    //syscall_func(SYS_write, 2, &dot, 1);
-
     int64_t munmap_res = syscall_func(SYS_munmap, begin, size);
     if (munmap_res != 0)
         return;
 
-    //syscall_func(SYS_write, 2, &dot, 1);
-
     /// Map new anonymous memory region in place of old region with code.
 
     int64_t mmap_res = syscall_func(SYS_mmap, begin, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
     if (-1 == mmap_res)
         syscall_func(SYS_exit, 1);
-    //syscall_func(SYS_write, 2, &dot, 1);
 
     /// As the memory region is anonymous, we can do madvise with MADV_HUGEPAGE.
 
     syscall_func(SYS_madvise, begin, size, MADV_HUGEPAGE);
-    //syscall_func(SYS_write, 2, &dot, 1);
 
     /// Copy the code from scratch area to the old memory location.
 
@@ -151,12 +144,9 @@ __attribute__((__noinline__)) void remapToHugeStep2(void * begin, size_t size, v
         }
     }
 
-    //syscall_func(SYS_write, 2, &dot, 1);
-
     /// Make the memory area with the code executable and non-writable.
 
     syscall_func(SYS_mprotect, begin, size, PROT_READ | PROT_EXEC);
-    //syscall_func(SYS_write, 2, &dot, 1);
 
     /** Step 3 function should unmap the scratch area.
       * The currently executed code is located in the scratch area and cannot be removed here.

From b87c108246222586c503030a5de15e678eadb9ba Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 14 Sep 2020 21:14:28 +0300
Subject: [PATCH 290/298] Enable remap in performance tests

---
 .../config/config.d/perf-comparison-tweaks-config.xml         | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docker/test/performance-comparison/config/config.d/perf-comparison-tweaks-config.xml b/docker/test/performance-comparison/config/config.d/perf-comparison-tweaks-config.xml
index 6f1726ab36b..bc7ddf1fbbb 100644
--- a/docker/test/performance-comparison/config/config.d/perf-comparison-tweaks-config.xml
+++ b/docker/test/performance-comparison/config/config.d/perf-comparison-tweaks-config.xml
@@ -1,4 +1,4 @@
-<yandex> 
+<yandex>
     <http_port remove="remove"/>
     <mysql_port remove="remove"/>
     <interserver_http_port remove="remove"/>
@@ -22,4 +22,6 @@
     <uncompressed_cache_size>1000000000</uncompressed_cache_size>
 
     <asynchronous_metrics_update_period_s>10</asynchronous_metrics_update_period_s>
+
+    <remap_executable replace="replace">true</remap_executable>
 </yandex>

From 2e13e8e83696042e55146c715224e82b50621b9f Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Mon, 14 Sep 2020 21:19:30 +0300
Subject: [PATCH 291/298] Update CHANGELOG.md

---
 CHANGELOG.md | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 23107434024..6b30395e859 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,29 +10,26 @@
 
 #### New Feature
 
-* Add `countDigits(x)` function that count number of decimal digits in integer or decimal column. Add `isDecimalOverflow(d, [p])` function that checks if the value in Decimal column is out of its (or specified) precision. [#14151](https://github.com/ClickHouse/ClickHouse/pull/14151) ([Artem Zuikov](https://github.com/4ertus2)).
-* Add setting `min_index_granularity_bytes` that protects against accidentally creating a table with very low `index_granularity_bytes` setting. [#14139](https://github.com/ClickHouse/ClickHouse/pull/14139) ([Bharat Nallan](https://github.com/bharatnc)).
+* ClickHouse can work as MySQL replica - it is implemented by `MaterializeMySQL` database engine. Implements [#4006](https://github.com/ClickHouse/ClickHouse/issues/4006). [#10851](https://github.com/ClickHouse/ClickHouse/pull/10851) ([Winter Zhang](https://github.com/zhang2014)).
 * Add the ability to specify `Default` compression codec for columns that correspond to settings specified in `config.xml`. Implements: [#9074](https://github.com/ClickHouse/ClickHouse/issues/9074). [#14049](https://github.com/ClickHouse/ClickHouse/pull/14049) ([alesapin](https://github.com/alesapin)).
-* Added `date_trunc` function that truncates a date/time value to a specified date/time part. [#13888](https://github.com/ClickHouse/ClickHouse/pull/13888) ([Vladimir Golovchenko](https://github.com/vladimir-golovchenko)).
+* Support Kerberos authentication in Kafka, using `krb5` and `cyrus-sasl` libraries. [#12771](https://github.com/ClickHouse/ClickHouse/pull/12771) ([Ilya Golshtein](https://github.com/ilejn)).
+* Add function `normalizeQuery` that replaces literals, sequences of literals and complex aliases with placeholders. Add function `normalizedQueryHash` that returns identical 64bit hash values for similar queries. It helps to analyze query log. This closes [#11271](https://github.com/ClickHouse/ClickHouse/issues/11271). [#13816](https://github.com/ClickHouse/ClickHouse/pull/13816) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Add `time_zones` table. [#13880](https://github.com/ClickHouse/ClickHouse/pull/13880) ([Bharat Nallan](https://github.com/bharatnc)).
 * Add function `defaultValueOfTypeName` that returns the default value for a given type. [#13877](https://github.com/ClickHouse/ClickHouse/pull/13877) ([hcz](https://github.com/hczhcz)).
+* Add `countDigits(x)` function that count number of decimal digits in integer or decimal column. Add `isDecimalOverflow(d, [p])` function that checks if the value in Decimal column is out of its (or specified) precision. [#14151](https://github.com/ClickHouse/ClickHouse/pull/14151) ([Artem Zuikov](https://github.com/4ertus2)).
 * Add `quantileExactLow` and `quantileExactHigh` implementations with respective aliases for `medianExactLow` and `medianExactHigh`. [#13818](https://github.com/ClickHouse/ClickHouse/pull/13818) ([Bharat Nallan](https://github.com/bharatnc)).
-* Add function `normalizeQuery` that replaces literals, sequences of literals and complex aliases with placeholders. Add function `normalizedQueryHash` that returns identical 64bit hash values for similar queries. It helps to analyze query log. This closes [#11271](https://github.com/ClickHouse/ClickHouse/issues/11271). [#13816](https://github.com/ClickHouse/ClickHouse/pull/13816) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Added `date_trunc` function that truncates a date/time value to a specified date/time part. [#13888](https://github.com/ClickHouse/ClickHouse/pull/13888) ([Vladimir Golovchenko](https://github.com/vladimir-golovchenko)).
 * Add new optional section `<user_directories>` to the main config. [#13425](https://github.com/ClickHouse/ClickHouse/pull/13425) ([Vitaly Baranov](https://github.com/vitlibar)).
 * Add `ALTER SAMPLE BY` statement that allows to change table sample clause. [#13280](https://github.com/ClickHouse/ClickHouse/pull/13280) ([Amos Bird](https://github.com/amosbird)).
 * Function `position` now supports optional `start_pos` argument. [#13237](https://github.com/ClickHouse/ClickHouse/pull/13237) ([vdimir](https://github.com/vdimir)).
-* Add types `Int128`, `Int256`, `UInt256` and related functions for them. Extend Decimals with Decimal256 (precision up to 76 digits). New types are under the setting `allow_experimental_bigint_types`. [#13097](https://github.com/ClickHouse/ClickHouse/pull/13097) ([Artem Zuikov](https://github.com/4ertus2)).
-* Support Kerberos authentication in Kafka, using `krb5` and `cyrus-sasl` libraries. [#12771](https://github.com/ClickHouse/ClickHouse/pull/12771) ([Ilya Golshtein](https://github.com/ilejn)).
-* Support `MaterializeMySQL` database engine. Implements [#4006](https://github.com/ClickHouse/ClickHouse/issues/4006). [#10851](https://github.com/ClickHouse/ClickHouse/pull/10851) ([Winter Zhang](https://github.com/zhang2014)).
 
 #### Bug Fix
 
 * Check for array size overflow in `topK` aggregate function. Without this check the user may send a query with carefully crafter parameters that will lead to server crash. This closes [#14452](https://github.com/ClickHouse/ClickHouse/issues/14452). [#14467](https://github.com/ClickHouse/ClickHouse/pull/14467) ([alexey-milovidov](https://github.com/alexey-milovidov)).
-* Fix bug which leads to wrong merges assignment if table has partitions with a single part. [#14444](https://github.com/ClickHouse/ClickHouse/pull/14444) ([alesapin](https://github.com/alesapin)).
-* Stop query execution if exception happened in `PipelineExecutor` itself. This could prevent rare possible query hung. Continuation of [#14334](https://github.com/ClickHouse/ClickHouse/issues/14334). [#14402](https://github.com/ClickHouse/ClickHouse/pull/14402) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
-* Stop query execution if exception happened in `PipelineExecutor` itself. This could prevent rare possible query hung. [#14334](https://github.com/ClickHouse/ClickHouse/pull/14334) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix bug which can lead to wrong merges assignment if table has partitions with a single part. [#14444](https://github.com/ClickHouse/ClickHouse/pull/14444) ([alesapin](https://github.com/alesapin)).
+* Stop query execution if exception happened in `PipelineExecutor` itself. This could prevent rare possible query hung. Continuation of [#14334](https://github.com/ClickHouse/ClickHouse/issues/14334). [#14402](https://github.com/ClickHouse/ClickHouse/pull/14402) [#14334](https://github.com/ClickHouse/ClickHouse/pull/14334) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
 * Fix crash during `ALTER` query for table which was created `AS table_function`. Fixes [#14212](https://github.com/ClickHouse/ClickHouse/issues/14212). [#14326](https://github.com/ClickHouse/ClickHouse/pull/14326) ([alesapin](https://github.com/alesapin)).
-* Fix exception during ALTER LIVE VIEW query with REFRESH command. [#14320](https://github.com/ClickHouse/ClickHouse/pull/14320) ([Bharat Nallan](https://github.com/bharatnc)).
+* Fix exception during ALTER LIVE VIEW query with REFRESH command. Live view is an experimental feature. [#14320](https://github.com/ClickHouse/ClickHouse/pull/14320) ([Bharat Nallan](https://github.com/bharatnc)).
 * Fix QueryPlan lifetime (for EXPLAIN PIPELINE graph=1) for queries with nested interpreter. [#14315](https://github.com/ClickHouse/ClickHouse/pull/14315) ([Azat Khuzhin](https://github.com/azat)).
 * Fix segfault in `clickhouse-odbc-bridge` during schema fetch from some external sources. This PR fixes https://github.com/ClickHouse/ClickHouse/issues/13861. [#14267](https://github.com/ClickHouse/ClickHouse/pull/14267) ([Vitaly Baranov](https://github.com/vitlibar)).
 * Disallows `CODEC` on `ALIAS` column type. Fixes [#13911](https://github.com/ClickHouse/ClickHouse/issues/13911). [#14263](https://github.com/ClickHouse/ClickHouse/pull/14263) ([Bharat Nallan](https://github.com/bharatnc)).
@@ -79,6 +76,7 @@
 
 #### Improvement
 
+* Add setting `min_index_granularity_bytes` that protects against accidentally creating a table with very low `index_granularity_bytes` setting. [#14139](https://github.com/ClickHouse/ClickHouse/pull/14139) ([Bharat Nallan](https://github.com/bharatnc)).
 * Now it's possible to `ALTER TABLE table_name FETCH PARTITION partition_expr FROM 'zk://<host>:<port>/path-in-zookeeper'`. It's useful for shipping data to new clusters. [#14155](https://github.com/ClickHouse/ClickHouse/pull/14155) ([Amos Bird](https://github.com/amosbird)).
 * Slightly better performance of Memory table if it was constructed from a huge number of very small blocks (that's unlikely). Author of the idea: [Mark Papadakis](https://github.com/markpapadakis). Closes [#14043](https://github.com/ClickHouse/ClickHouse/issues/14043). [#14056](https://github.com/ClickHouse/ClickHouse/pull/14056) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Conditional aggregate functions (for example: `avgIf`, `sumIf`, `maxIf`) should return `NULL` when miss rows and use nullable arguments. [#13964](https://github.com/ClickHouse/ClickHouse/pull/13964) ([Winter Zhang](https://github.com/zhang2014)).
@@ -111,6 +109,10 @@
 * Optimize `has()`, `indexOf()` and `countEqual()` functions for `Array(LowCardinality(T))` and constant right arguments. [#12550](https://github.com/ClickHouse/ClickHouse/pull/12550) ([myrrc](https://github.com/myrrc)).
 * When performing trivial `INSERT SELECT` queries, automatically set `max_threads` to 1 or `max_insert_threads`, and set `max_block_size` to `min_insert_block_size_rows`. Related to [#5907](https://github.com/ClickHouse/ClickHouse/issues/5907). [#12195](https://github.com/ClickHouse/ClickHouse/pull/12195) ([flynn](https://github.com/ucasFL)).
 
+#### Experimental Feature
+
+* Add types `Int128`, `Int256`, `UInt256` and related functions for them. Extend Decimals with Decimal256 (precision up to 76 digits). New types are under the setting `allow_experimental_bigint_types`. It is working extremely slow and bad. The implementation is incomplete. Please don't use this feature. [#13097](https://github.com/ClickHouse/ClickHouse/pull/13097) ([Artem Zuikov](https://github.com/4ertus2)).
+
 #### Build/Testing/Packaging Improvement
 
 * Actually there are no symlinks there, so `-type f` is enough ``` ~/workspace/ClickHouse/contrib/cctz/testdata/zoneinfo$ find . -type l -ls | wc -l 0 ``` Closes [#14209](https://github.com/ClickHouse/ClickHouse/issues/14209). [#14215](https://github.com/ClickHouse/ClickHouse/pull/14215) ([filimonov](https://github.com/filimonov)).

From f23798879e2c37c4ce28bf35d5fb91b54599c0eb Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Mon, 14 Sep 2020 21:42:50 +0300
Subject: [PATCH 292/298] Update CHANGELOG.md

---
 CHANGELOG.md | 42 ++++++++++++++++++++----------------------
 1 file changed, 20 insertions(+), 22 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6b30395e859..2752913f036 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,8 @@
 
 #### Bug Fix
 
+* Fix visible data clobbering by progress bar in client in interactive mode. This fixes [#12562](https://github.com/ClickHouse/ClickHouse/issues/12562) and [#13369](https://github.com/ClickHouse/ClickHouse/issues/13369) and [#13584](https://github.com/ClickHouse/ClickHouse/issues/13584) and fixes [#12964](https://github.com/ClickHouse/ClickHouse/issues/12964). [#13691](https://github.com/ClickHouse/ClickHouse/pull/13691) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fixed incorrect sorting order if `LowCardinality` column when sorting by multiple columns. This fixes [#13958](https://github.com/ClickHouse/ClickHouse/issues/13958). [#14223](https://github.com/ClickHouse/ClickHouse/pull/14223) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
 * Check for array size overflow in `topK` aggregate function. Without this check the user may send a query with carefully crafter parameters that will lead to server crash. This closes [#14452](https://github.com/ClickHouse/ClickHouse/issues/14452). [#14467](https://github.com/ClickHouse/ClickHouse/pull/14467) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Fix bug which can lead to wrong merges assignment if table has partitions with a single part. [#14444](https://github.com/ClickHouse/ClickHouse/pull/14444) ([alesapin](https://github.com/alesapin)).
 * Stop query execution if exception happened in `PipelineExecutor` itself. This could prevent rare possible query hung. Continuation of [#14334](https://github.com/ClickHouse/ClickHouse/issues/14334). [#14402](https://github.com/ClickHouse/ClickHouse/pull/14402) [#14334](https://github.com/ClickHouse/ClickHouse/pull/14334) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
@@ -32,50 +34,46 @@
 * Fix exception during ALTER LIVE VIEW query with REFRESH command. Live view is an experimental feature. [#14320](https://github.com/ClickHouse/ClickHouse/pull/14320) ([Bharat Nallan](https://github.com/bharatnc)).
 * Fix QueryPlan lifetime (for EXPLAIN PIPELINE graph=1) for queries with nested interpreter. [#14315](https://github.com/ClickHouse/ClickHouse/pull/14315) ([Azat Khuzhin](https://github.com/azat)).
 * Fix segfault in `clickhouse-odbc-bridge` during schema fetch from some external sources. This PR fixes https://github.com/ClickHouse/ClickHouse/issues/13861. [#14267](https://github.com/ClickHouse/ClickHouse/pull/14267) ([Vitaly Baranov](https://github.com/vitlibar)).
-* Disallows `CODEC` on `ALIAS` column type. Fixes [#13911](https://github.com/ClickHouse/ClickHouse/issues/13911). [#14263](https://github.com/ClickHouse/ClickHouse/pull/14263) ([Bharat Nallan](https://github.com/bharatnc)).
-* Fix handling of empty transactions in `MaterializeMySQL` database engine. This fixes [#14235](https://github.com/ClickHouse/ClickHouse/issues/14235). [#14253](https://github.com/ClickHouse/ClickHouse/pull/14253) ([BohuTANG](https://github.com/BohuTANG)).
-* fixes [#14231](https://github.com/ClickHouse/ClickHouse/issues/14231) fix wrong lexer in MaterializeMySQL database engine dump stage. [#14232](https://github.com/ClickHouse/ClickHouse/pull/14232) ([Winter Zhang](https://github.com/zhang2014)).
-* Fix crash in mark inclusion search introduced in https://github.com/ClickHouse/ClickHouse/pull/12277 . [#14225](https://github.com/ClickHouse/ClickHouse/pull/14225) ([Amos Bird](https://github.com/amosbird)).
-* Fixed incorrect sorting order if LowCardinality column. This fixes [#13958](https://github.com/ClickHouse/ClickHouse/issues/13958). [#14223](https://github.com/ClickHouse/ClickHouse/pull/14223) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Fix crash in mark inclusion search introduced in https://github.com/ClickHouse/ClickHouse/pull/12277. [#14225](https://github.com/ClickHouse/ClickHouse/pull/14225) ([Amos Bird](https://github.com/amosbird)).
 * Fix creation of tables with named tuples. This fixes [#13027](https://github.com/ClickHouse/ClickHouse/issues/13027). [#14143](https://github.com/ClickHouse/ClickHouse/pull/14143) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Fix formatting of minimal negative decimal numbers. This fixes https://github.com/ClickHouse/ClickHouse/issues/14111. [#14119](https://github.com/ClickHouse/ClickHouse/pull/14119) ([Alexander Kuzmenkov](https://github.com/akuzm)).
-* When waiting for a dictionary update to complete, use the timeout specified by `query_wait_timeout_milliseconds` setting instead of a hard-coded value. [#14105](https://github.com/ClickHouse/ClickHouse/pull/14105) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
-* Fix DistributedFilesToInsert metric (zeroed when it should not). [#14095](https://github.com/ClickHouse/ClickHouse/pull/14095) ([Azat Khuzhin](https://github.com/azat)).
-* Fix pointInPolygon with const 2d array as polygon. [#14079](https://github.com/ClickHouse/ClickHouse/pull/14079) ([Alexey Ilyukhov](https://github.com/livace)).
+* Fix `DistributedFilesToInsert` metric (zeroed when it should not). [#14095](https://github.com/ClickHouse/ClickHouse/pull/14095) ([Azat Khuzhin](https://github.com/azat)).
+* Fix `pointInPolygon` with const 2d array as polygon. [#14079](https://github.com/ClickHouse/ClickHouse/pull/14079) ([Alexey Ilyukhov](https://github.com/livace)).
 * Fixed wrong mount point in extra info for `Poco::Exception: no space left on device`. [#14050](https://github.com/ClickHouse/ClickHouse/pull/14050) ([tavplubix](https://github.com/tavplubix)).
 * Fix GRANT ALL statement when executed on a non-global level. [#13987](https://github.com/ClickHouse/ClickHouse/pull/13987) ([Vitaly Baranov](https://github.com/vitlibar)).
 * Fix parser to reject create table as table function with engine. [#13940](https://github.com/ClickHouse/ClickHouse/pull/13940) ([hcz](https://github.com/hczhcz)).
-* Fix wrong results in select queries with `DISTINCT` keyword in case `optimize_duplicate_order_by_and_distinct` setting is enabled. [#13925](https://github.com/ClickHouse/ClickHouse/pull/13925) ([Artem Zuikov](https://github.com/4ertus2)).
+* Fix wrong results in select queries with `DISTINCT` keyword and subqueries with UNION ALL in case `optimize_duplicate_order_by_and_distinct` setting is enabled. [#13925](https://github.com/ClickHouse/ClickHouse/pull/13925) ([Artem Zuikov](https://github.com/4ertus2)).
 * Fixed potential deadlock when renaming `Distributed` table. [#13922](https://github.com/ClickHouse/ClickHouse/pull/13922) ([tavplubix](https://github.com/tavplubix)).
-* Fix incorrect sorting for `FixedString` columns. Fixes [#13182](https://github.com/ClickHouse/ClickHouse/issues/13182). [#13887](https://github.com/ClickHouse/ClickHouse/pull/13887) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
-* Fix topK/topKWeighted merge (with non-default parameters). [#13817](https://github.com/ClickHouse/ClickHouse/pull/13817) ([Azat Khuzhin](https://github.com/azat)).
+* Fix incorrect sorting for `FixedString` columns when sorting by multiple columns. Fixes [#13182](https://github.com/ClickHouse/ClickHouse/issues/13182). [#13887](https://github.com/ClickHouse/ClickHouse/pull/13887) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix potentially imprecise result of `topK`/`topKWeighted` merge (with non-default parameters). [#13817](https://github.com/ClickHouse/ClickHouse/pull/13817) ([Azat Khuzhin](https://github.com/azat)).
 * Fix reading from MergeTree table with INDEX of type SET fails when comparing against NULL. This fixes [#13686](https://github.com/ClickHouse/ClickHouse/issues/13686). [#13793](https://github.com/ClickHouse/ClickHouse/pull/13793) ([Amos Bird](https://github.com/amosbird)).
-* Fix arrayJoin() capturing in lambda (LOGICAL_ERROR). [#13792](https://github.com/ClickHouse/ClickHouse/pull/13792) ([Azat Khuzhin](https://github.com/azat)).
-* Fix step overflow in range(). [#13790](https://github.com/ClickHouse/ClickHouse/pull/13790) ([Azat Khuzhin](https://github.com/azat)).
+* Fix `arrayJoin` capturing in lambda (LOGICAL_ERROR). [#13792](https://github.com/ClickHouse/ClickHouse/pull/13792) ([Azat Khuzhin](https://github.com/azat)).
+* Add step overflow check in function `range`. [#13790](https://github.com/ClickHouse/ClickHouse/pull/13790) ([Azat Khuzhin](https://github.com/azat)).
 * Fixed `Directory not empty` error when concurrently executing `DROP DATABASE` and `CREATE TABLE`. [#13756](https://github.com/ClickHouse/ClickHouse/pull/13756) ([alexey-milovidov](https://github.com/alexey-milovidov)).
-* Add range check for h3KRing function. This fixes [#13633](https://github.com/ClickHouse/ClickHouse/issues/13633). [#13752](https://github.com/ClickHouse/ClickHouse/pull/13752) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Add range check for `h3KRing` function. This fixes [#13633](https://github.com/ClickHouse/ClickHouse/issues/13633). [#13752](https://github.com/ClickHouse/ClickHouse/pull/13752) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Fix race condition between DETACH and background merges. Parts may revive after detach. This is continuation of [#8602](https://github.com/ClickHouse/ClickHouse/issues/8602) that did not fix the issue but introduced a test that started to fail in very rare cases, demonstrating the issue. [#13746](https://github.com/ClickHouse/ClickHouse/pull/13746) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Fix logging Settings.Names/Values when log_queries_min_type > QUERY_START. [#13737](https://github.com/ClickHouse/ClickHouse/pull/13737) ([Azat Khuzhin](https://github.com/azat)).
-* Fixes /replicas_status endpoint response status code when verbose=1. [#13722](https://github.com/ClickHouse/ClickHouse/pull/13722) ([javi santana](https://github.com/javisantana)).
+* Fixes `/replicas_status` endpoint response status code when verbose=1. [#13722](https://github.com/ClickHouse/ClickHouse/pull/13722) ([javi santana](https://github.com/javisantana)).
 * Fix incorrect message in `clickhouse-server.init` while checking user and group. [#13711](https://github.com/ClickHouse/ClickHouse/pull/13711) ([ylchou](https://github.com/ylchou)).
-* Fix visible data clobbering by progress bar in client in interactive mode. This fixes [#12562](https://github.com/ClickHouse/ClickHouse/issues/12562) and [#13369](https://github.com/ClickHouse/ClickHouse/issues/13369) and [#13584](https://github.com/ClickHouse/ClickHouse/issues/13584) and fixes [#12964](https://github.com/ClickHouse/ClickHouse/issues/12964). [#13691](https://github.com/ClickHouse/ClickHouse/pull/13691) ([alexey-milovidov](https://github.com/alexey-milovidov)).
-* Do not optimize any(arrayJoin()) -> arrayJoin() under optimize_move_functions_out_of_any. [#13681](https://github.com/ClickHouse/ClickHouse/pull/13681) ([Azat Khuzhin](https://github.com/azat)).
+* Do not optimize any(arrayJoin()) -> arrayJoin() under `optimize_move_functions_out_of_any` setting. [#13681](https://github.com/ClickHouse/ClickHouse/pull/13681) ([Azat Khuzhin](https://github.com/azat)).
 * Fix crash in JOIN with StorageMerge and `set enable_optimize_predicate_expression=1`. [#13679](https://github.com/ClickHouse/ClickHouse/pull/13679) ([Artem Zuikov](https://github.com/4ertus2)).
 * Fix typo in error message about `The value of 'number_of_free_entries_in_pool_to_lower_max_size_of_merge' setting`. [#13678](https://github.com/ClickHouse/ClickHouse/pull/13678) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Concurrent `ALTER ... REPLACE/MOVE PARTITION ...` queries might cause deadlock. It's fixed. [#13626](https://github.com/ClickHouse/ClickHouse/pull/13626) ([tavplubix](https://github.com/tavplubix)).
 * Fixed the behaviour when sometimes cache-dictionary returned default value instead of present value from source. [#13624](https://github.com/ClickHouse/ClickHouse/pull/13624) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
-* Fix secondary indices corruption in compact parts. [#13538](https://github.com/ClickHouse/ClickHouse/pull/13538) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix secondary indices corruption in compact parts. Compact parts are experimental feature. [#13538](https://github.com/ClickHouse/ClickHouse/pull/13538) ([Anton Popov](https://github.com/CurtizJ)).
 * Fix premature `ON CLUSTER` timeouts for queries that must be executed on a single replica. Fixes [#6704](https://github.com/ClickHouse/ClickHouse/issues/6704), [#7228](https://github.com/ClickHouse/ClickHouse/issues/7228), [#13361](https://github.com/ClickHouse/ClickHouse/issues/13361), [#11884](https://github.com/ClickHouse/ClickHouse/issues/11884). [#13450](https://github.com/ClickHouse/ClickHouse/pull/13450) ([alesapin](https://github.com/alesapin)).
 * Fix wrong code in function `netloc`. This fixes [#13335](https://github.com/ClickHouse/ClickHouse/issues/13335). [#13446](https://github.com/ClickHouse/ClickHouse/pull/13446) ([alexey-milovidov](https://github.com/alexey-milovidov)).
-* Fix possible race in `StorageMemory`. https://clickhouse-test-reports.s3.yandex.net/0/9cac8a7244063d2092ad25d45502611e18d3749c/stress_test_(thread)/stderr.log Have no idea how to write a test. [#13416](https://github.com/ClickHouse/ClickHouse/pull/13416) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
-* Fix missing or excessive headers in `TSV/CSVWithNames` formats. This fixes [#12504](https://github.com/ClickHouse/ClickHouse/issues/12504). [#13343](https://github.com/ClickHouse/ClickHouse/pull/13343) ([Azat Khuzhin](https://github.com/azat)).
+* Fix possible race in `StorageMemory`. [#13416](https://github.com/ClickHouse/ClickHouse/pull/13416) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix missing or excessive headers in `TSV/CSVWithNames` formats in HTTP protocol. This fixes [#12504](https://github.com/ClickHouse/ClickHouse/issues/12504). [#13343](https://github.com/ClickHouse/ClickHouse/pull/13343) ([Azat Khuzhin](https://github.com/azat)).
 * Fix parsing row policies from users.xml when names of databases or tables contain dots. This fixes https://github.com/ClickHouse/ClickHouse/issues/5779, https://github.com/ClickHouse/ClickHouse/issues/12527. [#13199](https://github.com/ClickHouse/ClickHouse/pull/13199) ([Vitaly Baranov](https://github.com/vitlibar)).
-* Fix access to redis dictionary after connection was dropped once. It may happen with `cache` and `direct` dictionary layouts. [#13082](https://github.com/ClickHouse/ClickHouse/pull/13082) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix access to `redis` dictionary after connection was dropped once. It may happen with `cache` and `direct` dictionary layouts. [#13082](https://github.com/ClickHouse/ClickHouse/pull/13082) ([Anton Popov](https://github.com/CurtizJ)).
 * Removed wrong auth access check when using ClickHouseDictionarySource to query remote tables. [#12756](https://github.com/ClickHouse/ClickHouse/pull/12756) ([sundyli](https://github.com/sundy-li)).
-* subquery hash values are not enough to distinguish. https://github.com/ClickHouse/ClickHouse/issues/8333. [#8367](https://github.com/ClickHouse/ClickHouse/pull/8367) ([Amos Bird](https://github.com/amosbird)).
+* Properly distinguish subqueries in some cases for common subexpression elimination. https://github.com/ClickHouse/ClickHouse/issues/8333. [#8367](https://github.com/ClickHouse/ClickHouse/pull/8367) ([Amos Bird](https://github.com/amosbird)).
 
 #### Improvement
 
+* Disallows `CODEC` on `ALIAS` column type. Fixes [#13911](https://github.com/ClickHouse/ClickHouse/issues/13911). [#14263](https://github.com/ClickHouse/ClickHouse/pull/14263) ([Bharat Nallan](https://github.com/bharatnc)).
+* When waiting for a dictionary update to complete, use the timeout specified by `query_wait_timeout_milliseconds` setting instead of a hard-coded value. [#14105](https://github.com/ClickHouse/ClickHouse/pull/14105) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
 * Add setting `min_index_granularity_bytes` that protects against accidentally creating a table with very low `index_granularity_bytes` setting. [#14139](https://github.com/ClickHouse/ClickHouse/pull/14139) ([Bharat Nallan](https://github.com/bharatnc)).
 * Now it's possible to `ALTER TABLE table_name FETCH PARTITION partition_expr FROM 'zk://<host>:<port>/path-in-zookeeper'`. It's useful for shipping data to new clusters. [#14155](https://github.com/ClickHouse/ClickHouse/pull/14155) ([Amos Bird](https://github.com/amosbird)).
 * Slightly better performance of Memory table if it was constructed from a huge number of very small blocks (that's unlikely). Author of the idea: [Mark Papadakis](https://github.com/markpapadakis). Closes [#14043](https://github.com/ClickHouse/ClickHouse/issues/14043). [#14056](https://github.com/ClickHouse/ClickHouse/pull/14056) ([alexey-milovidov](https://github.com/alexey-milovidov)).

From bca73a75c6dd0448b46e9dafa73b3cdc246a2ad6 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Mon, 14 Sep 2020 21:49:45 +0300
Subject: [PATCH 293/298] Update CHANGELOG.md

---
 CHANGELOG.md | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2752913f036..07ff237c9b6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -75,28 +75,27 @@
 * Disallows `CODEC` on `ALIAS` column type. Fixes [#13911](https://github.com/ClickHouse/ClickHouse/issues/13911). [#14263](https://github.com/ClickHouse/ClickHouse/pull/14263) ([Bharat Nallan](https://github.com/bharatnc)).
 * When waiting for a dictionary update to complete, use the timeout specified by `query_wait_timeout_milliseconds` setting instead of a hard-coded value. [#14105](https://github.com/ClickHouse/ClickHouse/pull/14105) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
 * Add setting `min_index_granularity_bytes` that protects against accidentally creating a table with very low `index_granularity_bytes` setting. [#14139](https://github.com/ClickHouse/ClickHouse/pull/14139) ([Bharat Nallan](https://github.com/bharatnc)).
-* Now it's possible to `ALTER TABLE table_name FETCH PARTITION partition_expr FROM 'zk://<host>:<port>/path-in-zookeeper'`. It's useful for shipping data to new clusters. [#14155](https://github.com/ClickHouse/ClickHouse/pull/14155) ([Amos Bird](https://github.com/amosbird)).
+* Now it's possible to fetch partitions from clusters that use different ZooKeeper: `ALTER TABLE table_name FETCH PARTITION partition_expr FROM 'zk-name:/path-in-zookeeper'`. It's useful for shipping data to new clusters. [#14155](https://github.com/ClickHouse/ClickHouse/pull/14155) ([Amos Bird](https://github.com/amosbird)).
 * Slightly better performance of Memory table if it was constructed from a huge number of very small blocks (that's unlikely). Author of the idea: [Mark Papadakis](https://github.com/markpapadakis). Closes [#14043](https://github.com/ClickHouse/ClickHouse/issues/14043). [#14056](https://github.com/ClickHouse/ClickHouse/pull/14056) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Conditional aggregate functions (for example: `avgIf`, `sumIf`, `maxIf`) should return `NULL` when miss rows and use nullable arguments. [#13964](https://github.com/ClickHouse/ClickHouse/pull/13964) ([Winter Zhang](https://github.com/zhang2014)).
 * Increase limit in -Resample combinator to 1M. [#13947](https://github.com/ClickHouse/ClickHouse/pull/13947) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
 * Corrected an error in AvroConfluent format that caused the Kafka table engine to stop processing messages when an abnormally small, malformed, message was received. [#13941](https://github.com/ClickHouse/ClickHouse/pull/13941) ([Gervasio Varela](https://github.com/gervarela)).
 * Fix wrong error for long queries. It was possible to get syntax error other than `Max query size exceeded` for correct query. [#13928](https://github.com/ClickHouse/ClickHouse/pull/13928) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
-* Better error message for null value of TabSeparatedRow format. [#13906](https://github.com/ClickHouse/ClickHouse/pull/13906) ([jiang tao](https://github.com/tomjiang1987)).
+* Better error message for null value of `TabSeparated` format. [#13906](https://github.com/ClickHouse/ClickHouse/pull/13906) ([jiang tao](https://github.com/tomjiang1987)).
 * Function `arrayCompact` will compare NaNs bitwise if the type of array elements is Float32/Float64. In previous versions NaNs were always not equal if the type of array elements is Float32/Float64 and were always equal if the type is more complex, like Nullable(Float64). This closes [#13857](https://github.com/ClickHouse/ClickHouse/issues/13857). [#13868](https://github.com/ClickHouse/ClickHouse/pull/13868) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Fix data race in `lgamma` function. This race was caught only in `tsan`, no side effects a really happened. [#13842](https://github.com/ClickHouse/ClickHouse/pull/13842) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
-* 1. Add [GTID-Based Replication](https://dev.mysql.com/doc/refman/5.7/en/replication-gtids-concepts.html), it works even when replication topology changes, and supported/prefered in MySQL 5.6/5.7/8.0 2. Add BIT/SET filed type supports 3. Fix up varchar type meta length bug. [#13820](https://github.com/ClickHouse/ClickHouse/pull/13820) ([BohuTANG](https://github.com/BohuTANG)).
 * Avoid too slow queries when arrays are manipulated as fields. Throw exception instead. [#13753](https://github.com/ClickHouse/ClickHouse/pull/13753) ([alexey-milovidov](https://github.com/alexey-milovidov)).
-* Added Redis requirepass authorization. [#13688](https://github.com/ClickHouse/ClickHouse/pull/13688) ([Ivan Torgashov](https://github.com/it1804)).
-* Add MergeTree Write-Ahead-Log(WAL) dump tool. [#13640](https://github.com/ClickHouse/ClickHouse/pull/13640) ([BohuTANG](https://github.com/BohuTANG)).
+* Added Redis requirepass authorization (for redis dictionary source). [#13688](https://github.com/ClickHouse/ClickHouse/pull/13688) ([Ivan Torgashov](https://github.com/it1804)).
+* Add MergeTree Write-Ahead-Log (WAL) dump tool. WAL is an experimental feature. [#13640](https://github.com/ClickHouse/ClickHouse/pull/13640) ([BohuTANG](https://github.com/BohuTANG)).
 * In previous versions `lcm` function may produce assertion violation in debug build if called with specifically crafted arguments. This fixes [#13368](https://github.com/ClickHouse/ClickHouse/issues/13368). [#13510](https://github.com/ClickHouse/ClickHouse/pull/13510) ([alexey-milovidov](https://github.com/alexey-milovidov)).
-* Provide monotonicity for `toDate/toDateTime` functions in more cases. Now the input arguments are saturated more naturally and provides better monotonicity. [#13497](https://github.com/ClickHouse/ClickHouse/pull/13497) ([Amos Bird](https://github.com/amosbird)).
-* Support compound identifiers for custom settings. [#13496](https://github.com/ClickHouse/ClickHouse/pull/13496) ([Vitaly Baranov](https://github.com/vitlibar)).
-* Move parts from DIskLocal to DiskS3 in parallel. [#13459](https://github.com/ClickHouse/ClickHouse/pull/13459) ([Pavel Kovalenko](https://github.com/Jokser)).
+* Provide monotonicity for `toDate/toDateTime` functions in more cases. Monotonicity information is used for index analysis (more complex queries will be able to use index). Now the input arguments are saturated more naturally and provides better monotonicity. [#13497](https://github.com/ClickHouse/ClickHouse/pull/13497) ([Amos Bird](https://github.com/amosbird)).
+* Support compound identifiers for custom settings. Custom settings is an integration point of ClickHouse codebase with other codebases (no benefits for ClickHouse itself) [#13496](https://github.com/ClickHouse/ClickHouse/pull/13496) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Move parts from DiskLocal to DiskS3 in parallel. `DiskS3` is an experimental feature. [#13459](https://github.com/ClickHouse/ClickHouse/pull/13459) ([Pavel Kovalenko](https://github.com/Jokser)).
 * Enable mixed granularity parts by default. [#13449](https://github.com/ClickHouse/ClickHouse/pull/13449) ([alesapin](https://github.com/alesapin)).
 * Proper remote host checking in S3 redirects (security-related thing). [#13404](https://github.com/ClickHouse/ClickHouse/pull/13404) ([Vladimir Chebotarev](https://github.com/excitoon)).
-* Add QueryTimeMicroseconds, SelectQueryTimeMicroseconds and InsertQueryTimeMicroseconds to system.events. [#13336](https://github.com/ClickHouse/ClickHouse/pull/13336) ([ianton-ru](https://github.com/ianton-ru)).
-* Fix assert when decimal has too large negative exponent. Fixes [#13188](https://github.com/ClickHouse/ClickHouse/issues/13188). [#13228](https://github.com/ClickHouse/ClickHouse/pull/13228) ([alexey-milovidov](https://github.com/alexey-milovidov)).
-* Added cache layer for DiskS3 (cache to local disk mark and index files). [#13076](https://github.com/ClickHouse/ClickHouse/pull/13076) ([Pavel Kovalenko](https://github.com/Jokser)).
+* Add `QueryTimeMicroseconds`, `SelectQueryTimeMicroseconds` and `InsertQueryTimeMicroseconds` to system.events. [#13336](https://github.com/ClickHouse/ClickHouse/pull/13336) ([ianton-ru](https://github.com/ianton-ru)).
+* Fix debug assertion when Decimal has too large negative exponent. Fixes [#13188](https://github.com/ClickHouse/ClickHouse/issues/13188). [#13228](https://github.com/ClickHouse/ClickHouse/pull/13228) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Added cache layer for DiskS3 (cache to local disk mark and index files). `DiskS3` is an experimental feature. [#13076](https://github.com/ClickHouse/ClickHouse/pull/13076) ([Pavel Kovalenko](https://github.com/Jokser)).
 
 #### Performance Improvement
 

From 493b9b696ee88908694347614f3014e81a143942 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Mon, 14 Sep 2020 21:57:50 +0300
Subject: [PATCH 294/298] Update CHANGELOG.md

---
 CHANGELOG.md | 39 +++++++++++++++++----------------------
 1 file changed, 17 insertions(+), 22 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 07ff237c9b6..e1764f07acf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -96,11 +96,13 @@
 * Add `QueryTimeMicroseconds`, `SelectQueryTimeMicroseconds` and `InsertQueryTimeMicroseconds` to system.events. [#13336](https://github.com/ClickHouse/ClickHouse/pull/13336) ([ianton-ru](https://github.com/ianton-ru)).
 * Fix debug assertion when Decimal has too large negative exponent. Fixes [#13188](https://github.com/ClickHouse/ClickHouse/issues/13188). [#13228](https://github.com/ClickHouse/ClickHouse/pull/13228) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Added cache layer for DiskS3 (cache to local disk mark and index files). `DiskS3` is an experimental feature. [#13076](https://github.com/ClickHouse/ClickHouse/pull/13076) ([Pavel Kovalenko](https://github.com/Jokser)).
+* Fix readline so it dumps history to file now. [#13600](https://github.com/ClickHouse/ClickHouse/pull/13600) ([Amos Bird](https://github.com/amosbird)).
+* Create `system` database with `Atomic` engine by default (a preparation to enable `Atomic` database engine by default everywhere). [#13680](https://github.com/ClickHouse/ClickHouse/pull/13680) ([tavplubix](https://github.com/tavplubix)).
 
 #### Performance Improvement
 
-* Slightly optimize very short queries with LowCardinality. [#14129](https://github.com/ClickHouse/ClickHouse/pull/14129) ([Anton Popov](https://github.com/CurtizJ)).
-* Enable parallel INSERTs for table engines `Null`, `Memory`, `Distributed` and `Buffer`. [#14120](https://github.com/ClickHouse/ClickHouse/pull/14120) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Slightly optimize very short queries with `LowCardinality`. [#14129](https://github.com/ClickHouse/ClickHouse/pull/14129) ([Anton Popov](https://github.com/CurtizJ)).
+* Enable parallel INSERTs for table engines `Null`, `Memory`, `Distributed` and `Buffer` when the setting `max_insert_threads` is set. [#14120](https://github.com/ClickHouse/ClickHouse/pull/14120) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Fail fast if `max_rows_to_read` limit is exceeded on parts scan. The motivation behind this change is to skip ranges scan for all selected parts if it is clear that `max_rows_to_read` is already exceeded. The change is quite noticeable for queries over big number of parts. [#13677](https://github.com/ClickHouse/ClickHouse/pull/13677) ([Roman Khavronenko](https://github.com/hagen1778)).
 * Slightly improve performance of aggregation by UInt8/UInt16 keys. [#13099](https://github.com/ClickHouse/ClickHouse/pull/13099) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Optimize `has()`, `indexOf()` and `countEqual()` functions for `Array(LowCardinality(T))` and constant right arguments. [#12550](https://github.com/ClickHouse/ClickHouse/pull/12550) ([myrrc](https://github.com/myrrc)).
@@ -112,38 +114,31 @@
 
 #### Build/Testing/Packaging Improvement
 
-* Actually there are no symlinks there, so `-type f` is enough ``` ~/workspace/ClickHouse/contrib/cctz/testdata/zoneinfo$ find . -type l -ls | wc -l 0 ``` Closes [#14209](https://github.com/ClickHouse/ClickHouse/issues/14209). [#14215](https://github.com/ClickHouse/ClickHouse/pull/14215) ([filimonov](https://github.com/filimonov)).
-* Switch tests docker images to use test-base parent. [#14167](https://github.com/ClickHouse/ClickHouse/pull/14167) ([Ilya Yatsishin](https://github.com/qoega)).
+* Added `clickhouse install` script, that is useful if you only have a single binary. [#13528](https://github.com/ClickHouse/ClickHouse/pull/13528) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Allow to run `clickhouse` binary without configuration. [#13515](https://github.com/ClickHouse/ClickHouse/pull/13515) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Enable check for typos in code with `codespell`. [#13513](https://github.com/ClickHouse/ClickHouse/pull/13513) [#13511](https://github.com/ClickHouse/ClickHouse/pull/13511) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Enable Shellcheck in CI as a linter of .sh tests. This closes [#13168](https://github.com/ClickHouse/ClickHouse/issues/13168). [#13530](https://github.com/ClickHouse/ClickHouse/pull/13530) [#13529](https://github.com/ClickHouse/ClickHouse/pull/13529) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Add a CMake option to fail configuration instead of auto-reconfiguration, enabled by default. [#13687](https://github.com/ClickHouse/ClickHouse/pull/13687) ([Konstantin](https://github.com/podshumok)).
+* Expose version of embedded tzdata via TZDATA_VERSION in system.build_options. [#13648](https://github.com/ClickHouse/ClickHouse/pull/13648) ([filimonov](https://github.com/filimonov)).
+* Improve generation of system.time_zones table during build. Closes [#14209](https://github.com/ClickHouse/ClickHouse/issues/14209). [#14215](https://github.com/ClickHouse/ClickHouse/pull/14215) ([filimonov](https://github.com/filimonov)).
+* Build ClickHouse with the most fresh tzdata from package repository. [#13623](https://github.com/ClickHouse/ClickHouse/pull/13623) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Add the ability to write js-style comments in skip_list.json. [#14159](https://github.com/ClickHouse/ClickHouse/pull/14159) ([alesapin](https://github.com/alesapin)).
-* * Adding retry logic when bringing up docker-compose cluster * Increasing COMPOSE_HTTP_TIMEOUT. [#14112](https://github.com/ClickHouse/ClickHouse/pull/14112) ([vzakaznikov](https://github.com/vzakaznikov)).
-* Enabled text-log in stress test to find more bugs. [#13855](https://github.com/ClickHouse/ClickHouse/pull/13855) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Ensure that there is no copy-pasted GPL code. [#13514](https://github.com/ClickHouse/ClickHouse/pull/13514) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Switch tests docker images to use test-base parent. [#14167](https://github.com/ClickHouse/ClickHouse/pull/14167) ([Ilya Yatsishin](https://github.com/qoega)).
+* Adding retry logic when bringing up docker-compose cluster; Increasing COMPOSE_HTTP_TIMEOUT. [#14112](https://github.com/ClickHouse/ClickHouse/pull/14112) ([vzakaznikov](https://github.com/vzakaznikov)).
+* Enabled `system.text_log` in stress test to find more bugs. [#13855](https://github.com/ClickHouse/ClickHouse/pull/13855) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
 * Testflows LDAP module: adding missing certificates and dhparam.pem for openldap4. [#13780](https://github.com/ClickHouse/ClickHouse/pull/13780) ([vzakaznikov](https://github.com/vzakaznikov)).
 * ZooKeeper cannot work reliably in unit tests in CI infrastructure. Using unit tests for ZooKeeper interaction with real ZooKeeper is bad idea from the start (unit tests are not supposed to verify complex distributed systems). We already using integration tests for this purpose and they are better suited. [#13745](https://github.com/ClickHouse/ClickHouse/pull/13745) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Added docker image for style check. Added style check that all docker and docker compose files are located in docker directory. [#13724](https://github.com/ClickHouse/ClickHouse/pull/13724) ([Ilya Yatsishin](https://github.com/qoega)).
-* FIx cassandra build on Mac OS. [#13708](https://github.com/ClickHouse/ClickHouse/pull/13708) ([Ilya Yatsishin](https://github.com/qoega)).
+* Fix cassandra build on Mac OS. [#13708](https://github.com/ClickHouse/ClickHouse/pull/13708) ([Ilya Yatsishin](https://github.com/qoega)).
 * Fix link error in shared build. [#13700](https://github.com/ClickHouse/ClickHouse/pull/13700) ([Amos Bird](https://github.com/amosbird)).
-* Add a CMake option to fail configuration instead of auto-reconfiguration, enabled by default. [#13687](https://github.com/ClickHouse/ClickHouse/pull/13687) ([Konstantin](https://github.com/podshumok)).
 * Updating LDAP user authentication suite to check that it works with RBAC. [#13656](https://github.com/ClickHouse/ClickHouse/pull/13656) ([vzakaznikov](https://github.com/vzakaznikov)).
-* Expose version of embedded tzdata via TZDATA_VERSION in system.build_options. [#13648](https://github.com/ClickHouse/ClickHouse/pull/13648) ([filimonov](https://github.com/filimonov)).
 * Removed `-DENABLE_CURL_CLIENT` for `contrib/aws`. [#13628](https://github.com/ClickHouse/ClickHouse/pull/13628) ([Vladimir Chebotarev](https://github.com/excitoon)).
-* Build ClickHouse with the most fresh tzdata from package repository. [#13623](https://github.com/ClickHouse/ClickHouse/pull/13623) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Increasing health-check timeouts for ClickHouse nodes and adding support to dump docker-compose logs if unhealthy containers found. [#13612](https://github.com/ClickHouse/ClickHouse/pull/13612) ([vzakaznikov](https://github.com/vzakaznikov)).
 * Make sure https://github.com/ClickHouse/ClickHouse/issues/10977 is invalid. [#13539](https://github.com/ClickHouse/ClickHouse/pull/13539) ([Amos Bird](https://github.com/amosbird)).
-* Enable Shellcheck in CI as a linter of .sh tests. This closes [#13168](https://github.com/ClickHouse/ClickHouse/issues/13168). [#13530](https://github.com/ClickHouse/ClickHouse/pull/13530) ([alexey-milovidov](https://github.com/alexey-milovidov)).
-* Fix the remaining shellcheck notices. A preparation to enable Shellcheck. [#13529](https://github.com/ClickHouse/ClickHouse/pull/13529) ([alexey-milovidov](https://github.com/alexey-milovidov)).
-* Added `clickhouse install` script, that is useful if you only have a single binary. [#13528](https://github.com/ClickHouse/ClickHouse/pull/13528) ([alexey-milovidov](https://github.com/alexey-milovidov)).
-* Allow to run `clickhouse` binary without configuration. [#13515](https://github.com/ClickHouse/ClickHouse/pull/13515) ([alexey-milovidov](https://github.com/alexey-milovidov)).
-* Ensure that there is no copy-pasted GPL code. [#13514](https://github.com/ClickHouse/ClickHouse/pull/13514) ([alexey-milovidov](https://github.com/alexey-milovidov)).
-* Enable check for typos in code with `codespell`. [#13513](https://github.com/ClickHouse/ClickHouse/pull/13513) ([alexey-milovidov](https://github.com/alexey-milovidov)).
-* Fix typos in code with codespell. [#13511](https://github.com/ClickHouse/ClickHouse/pull/13511) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Skip PR's from robot-clickhouse. [#13489](https://github.com/ClickHouse/ClickHouse/pull/13489) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
 * Move Dockerfiles from integration tests to `docker/test` directory. docker_compose files are available in `runner` docker container. Docker images are built in CI and not in integration tests. [#13448](https://github.com/ClickHouse/ClickHouse/pull/13448) ([Ilya Yatsishin](https://github.com/qoega)).
 
-#### Other
-
-* Create `system` database with `Atomic` engine by default. [#13680](https://github.com/ClickHouse/ClickHouse/pull/13680) ([tavplubix](https://github.com/tavplubix)).
-* Fix readline so it dumps history to file now. [#13600](https://github.com/ClickHouse/ClickHouse/pull/13600) ([Amos Bird](https://github.com/amosbird)).
-
 
 ## ClickHouse release 20.7
 

From 7aa3f86ab9534838dce6786947f4bc78e3c4dda2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 14 Sep 2020 22:44:20 +0300
Subject: [PATCH 295/298] Added config.d file for development

---
 programs/server/config.d/access_control.xml | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 programs/server/config.d/access_control.xml

diff --git a/programs/server/config.d/access_control.xml b/programs/server/config.d/access_control.xml
new file mode 100644
index 00000000000..6567c39f171
--- /dev/null
+++ b/programs/server/config.d/access_control.xml
@@ -0,0 +1,13 @@
+<yandex>
+    <!-- Sources to read users, roles, access rights, profiles of settings, quotas. -->
+    <user_directories replace="replace">
+        <users_xml>
+            <!-- Path to configuration file with predefined users. -->
+            <path>users.xml</path>
+        </users_xml>
+        <local_directory>
+            <!-- Path to folder where users created by SQL commands are stored. -->
+            <path>access/</path>
+        </local_directory>
+    </user_directories>
+</yandex>

From 4770175aa337b6c658189a654bd44ecdb7219ac8 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Tue, 15 Sep 2020 10:02:26 +0800
Subject: [PATCH 296/298] more tests

---
 ...480_binary_operator_monotonicity.reference |  8 ++++
 .../01480_binary_operator_monotonicity.sql    | 45 ++++++++++++++++---
 2 files changed, 48 insertions(+), 5 deletions(-)

diff --git a/tests/queries/0_stateless/01480_binary_operator_monotonicity.reference b/tests/queries/0_stateless/01480_binary_operator_monotonicity.reference
index e69de29bb2d..405d3348775 100644
--- a/tests/queries/0_stateless/01480_binary_operator_monotonicity.reference
+++ b/tests/queries/0_stateless/01480_binary_operator_monotonicity.reference
@@ -0,0 +1,8 @@
+0
+0
+0
+0
+0
+0
+0
+0
diff --git a/tests/queries/0_stateless/01480_binary_operator_monotonicity.sql b/tests/queries/0_stateless/01480_binary_operator_monotonicity.sql
index bfaab3abd3c..20c3b542e18 100644
--- a/tests/queries/0_stateless/01480_binary_operator_monotonicity.sql
+++ b/tests/queries/0_stateless/01480_binary_operator_monotonicity.sql
@@ -1,10 +1,45 @@
-DROP TABLE IF EXISTS binary_op_mono;
+DROP TABLE IF EXISTS binary_op_mono1;
+DROP TABLE IF EXISTS binary_op_mono2;
+DROP TABLE IF EXISTS binary_op_mono3;
+DROP TABLE IF EXISTS binary_op_mono4;
+DROP TABLE IF EXISTS binary_op_mono5;
+DROP TABLE IF EXISTS binary_op_mono6;
+DROP TABLE IF EXISTS binary_op_mono7;
+DROP TABLE IF EXISTS binary_op_mono8;
 
-CREATE TABLE binary_op_mono(i int, j int) ENGINE MergeTree PARTITION BY toDate(i / 1000) ORDER BY j;
+CREATE TABLE binary_op_mono1(i int, j int) ENGINE MergeTree PARTITION BY toDate(i / 1000) ORDER BY j;
+CREATE TABLE binary_op_mono2(i int, j int) ENGINE MergeTree PARTITION BY 1000 / i ORDER BY j;
+CREATE TABLE binary_op_mono3(i int, j int) ENGINE MergeTree PARTITION BY i + 1000 ORDER BY j;
+CREATE TABLE binary_op_mono4(i int, j int) ENGINE MergeTree PARTITION BY 1000 + i ORDER BY j;
+CREATE TABLE binary_op_mono5(i int, j int) ENGINE MergeTree PARTITION BY i - 1000 ORDER BY j;
+CREATE TABLE binary_op_mono6(i int, j int) ENGINE MergeTree PARTITION BY 1000 - i ORDER BY j;
+CREATE TABLE binary_op_mono7(i int, j int) ENGINE MergeTree PARTITION BY i / 1000.0 ORDER BY j;
+CREATE TABLE binary_op_mono8(i int, j int) ENGINE MergeTree PARTITION BY 1000.0 / i ORDER BY j;
 
-INSERT INTO binary_op_mono VALUES (toUnixTimestamp('2020-09-01 00:00:00') * 1000, 1), (toUnixTimestamp('2020-09-01 00:00:00') * 1000, 2);
+INSERT INTO binary_op_mono1 VALUES (toUnixTimestamp('2020-09-01 00:00:00') * 1000, 1), (toUnixTimestamp('2020-09-01 00:00:00') * 1000, 2);
+INSERT INTO binary_op_mono2 VALUES (1, 1), (10000, 2);
+INSERT INTO binary_op_mono3 VALUES (1, 1), (10000, 2);
+INSERT INTO binary_op_mono4 VALUES (1, 1), (10000, 2);
+INSERT INTO binary_op_mono5 VALUES (1, 1), (10000, 2);
+INSERT INTO binary_op_mono6 VALUES (1, 1), (10000, 2);
+INSERT INTO binary_op_mono7 VALUES (1, 1), (10000, 2);
+INSERT INTO binary_op_mono8 VALUES (1, 1), (10000, 2);
 
 SET max_rows_to_read = 1;
-SELECT * FROM binary_op_mono WHERE toDate(i / 1000) = '2020-09-02';
+SELECT count() FROM binary_op_mono1 WHERE toDate(i / 1000) = '2020-09-02';
+SELECT count() FROM binary_op_mono2 WHERE 1000 / i = 100;
+SELECT count() FROM binary_op_mono3 WHERE i + 1000 = 500;
+SELECT count() FROM binary_op_mono4 WHERE 1000 + i = 500;
+SELECT count() FROM binary_op_mono5 WHERE i - 1000 = 1234;
+SELECT count() FROM binary_op_mono6 WHERE 1000 - i = 1234;
+SELECT count() FROM binary_op_mono7 WHERE i / 1000.0 = 22.3;
+SELECT count() FROM binary_op_mono8 WHERE 1000.0 / i = 33.4;
 
-DROP TABLE IF EXISTS binary_op_mono;
+DROP TABLE IF EXISTS binary_op_mono1;
+DROP TABLE IF EXISTS binary_op_mono2;
+DROP TABLE IF EXISTS binary_op_mono3;
+DROP TABLE IF EXISTS binary_op_mono4;
+DROP TABLE IF EXISTS binary_op_mono5;
+DROP TABLE IF EXISTS binary_op_mono6;
+DROP TABLE IF EXISTS binary_op_mono7;
+DROP TABLE IF EXISTS binary_op_mono8;

From 51ba12c2c3106d9277bbc223adfe41fdb3e45439 Mon Sep 17 00:00:00 2001
From: Artem Zuikov <chertus@gmail.com>
Date: Tue, 15 Sep 2020 12:55:57 +0300
Subject: [PATCH 297/298] Try speedup build (#14809)

---
 base/common/arithmeticOverflow.h              |   2 +-
 base/common/extended_types.h                  | 108 ++++++++++++++++
 base/common/types.h                           | 115 +++---------------
 src/Access/AccessFlags.h                      |   2 +-
 src/Access/AccessRights.h                     |   2 +-
 src/Access/AccessType.h                       |   6 +-
 src/Access/AllowedClientHosts.h               |   5 +-
 src/Access/Authentication.h                   |   2 +-
 src/Access/EnabledRowPolicies.h               |   2 +-
 src/Access/EnabledSettings.h                  |   2 +-
 src/Access/ExternalAuthenticators.h           |   2 +-
 src/Access/IAccessEntity.h                    |   2 +-
 src/Access/IAccessStorage.h                   |   2 +-
 src/Access/LDAPClient.h                       |   2 +-
 src/Access/LDAPParams.h                       |   2 +-
 src/Access/SettingsProfilesCache.h            |   2 +-
 .../AggregateFunctionRankCorrelation.h        |   2 +-
 src/AggregateFunctions/IAggregateFunction.h   |   2 +-
 src/AggregateFunctions/QuantileExact.h        |   2 +-
 src/Columns/ColumnsNumber.h                   |   2 +-
 src/Common/BitonicSort.h                      |   2 +-
 .../Config/AbstractConfigurationComparison.h  |   2 +-
 src/Common/CpuId.h                            |   2 +-
 src/Common/CurrentMetrics.h                   |   2 +-
 src/Common/DNSResolver.cpp                    |   2 +-
 src/Common/DNSResolver.h                      |   2 +-
 src/Common/ExternalLoaderStatus.h             |   2 +-
 src/Common/HashTable/Hash.h                   |   2 +-
 src/Common/HashTable/HashTable.h              |   2 +-
 src/Common/IFactoryWithAliases.h              |   2 +-
 src/Common/IntervalKind.h                     |   2 +-
 src/Common/Macros.h                           |   2 +-
 src/Common/NaNUtils.h                         |   2 +-
 src/Common/NamePrompter.h                     |   2 +-
 src/Common/OpenSSLHelpers.h                   |   2 +-
 src/Common/PoolWithFailoverBase.h             |   1 -
 src/Common/QueryProfiler.h                    |   2 +-
 src/Common/RWLock.h                           |   2 +-
 src/Common/RadixSort.h                        |   2 +-
 src/Common/StatusInfo.h                       |   3 +-
 src/Common/TaskStatsInfoGetter.cpp            |   2 +-
 src/Common/TaskStatsInfoGetter.h              |   2 +-
 src/Common/ThreadProfileEvents.h              |   2 +-
 src/Common/UTF8Helpers.h                      |   2 +-
 src/Common/UnicodeBar.h                       |   2 +-
 src/Common/Volnitsky.h                        |   2 +-
 src/Common/ZooKeeper/IKeeper.h                |   2 +-
 src/Common/ZooKeeper/TestKeeper.cpp           |   2 +-
 src/Common/ZooKeeper/ZooKeeperImpl.h          |   2 +-
 src/Common/createHardLink.h                   |   2 +-
 src/Common/filesystemHelpers.h                |   2 +-
 src/Common/intExp.h                           |   2 +-
 src/Common/isLocalAddress.cpp                 |   2 +-
 src/Common/oclBasics.h                        |   2 +-
 src/Common/parseRemoteDescription.h           |   2 +-
 src/Common/quoteString.h                      |   2 +-
 src/Common/randomSeed.cpp                     |   2 +-
 src/Common/randomSeed.h                       |   2 +-
 src/Common/tests/average.cpp                  |   2 +-
 src/Common/tests/gtest_shell_command.cpp      |   2 +-
 .../tests/integer_hash_tables_and_hashes.cpp  |   2 +-
 src/Common/tests/pod_array.cpp                |   2 +-
 src/Compression/CompressedWriteBuffer.cpp     |   2 +-
 src/Compression/CompressionCodecT64.h         |   2 +-
 src/Compression/ICompressionCodec.h           |   2 +-
 .../tests/gtest_compressionCodec.cpp          |   2 +-
 src/Core/BlockInfo.cpp                        |   2 +-
 src/Core/BlockInfo.h                          |   2 +-
 src/Core/DecimalFunctions.h                   |   1 -
 src/Core/MySQL/Authentication.h               |   2 +-
 src/Core/MySQL/MySQLClient.h                  |   2 +-
 src/Core/MySQL/MySQLReplication.h             |   2 +-
 src/Core/Protocol.h                           |   2 +-
 src/Core/QueryProcessingStage.h               |   2 +-
 src/Core/SettingsFields.h                     |   2 +-
 src/Core/Types.h                              |  27 ++--
 src/Core/tests/gtest_multienum.cpp            |   2 +-
 src/DataStreams/BlockStreamProfileInfo.h      |   2 +-
 src/DataStreams/ExecutionSpeedLimits.h        |   2 +-
 src/DataStreams/MarkInCompressedFile.h        |   2 +-
 src/DataStreams/NativeBlockOutputStream.h     |   2 +-
 src/DataTypes/convertMySQLDataType.cpp        |   2 +-
 src/Databases/DatabasesCommon.h               |   2 +-
 src/Databases/IDatabase.h                     |   2 +-
 src/Databases/MySQL/MaterializeMetadata.h     |   2 +-
 src/Dictionaries/PolygonDictionaryUtils.h     |   6 +-
 .../tests/gtest_dictionary_configuration.cpp  |   2 +-
 src/Disks/DiskFactory.h                       |   2 +-
 src/Disks/IDisk.h                             |   2 +-
 src/Disks/S3/ProxyConfiguration.h             |   2 +-
 src/Formats/FormatFactory.h                   |   2 +-
 src/Formats/FormatSchemaInfo.h                |   2 +-
 src/Formats/FormatSettings.h                  |   2 +-
 src/Formats/IRowOutputStream.h                |   2 +-
 src/Formats/ParsedTemplateFormatString.h      |   4 +-
 src/Formats/ProtobufColumnMatcher.h           |   2 +-
 src/Formats/ProtobufSchemas.h                 |   2 +-
 src/Functions/CustomWeekTransforms.h          |   2 +-
 src/Functions/DateTimeTransforms.h            |   2 +-
 src/Functions/DummyJSONParser.h               |   2 +-
 src/Functions/FunctionsLogical.h              |   2 +-
 src/Functions/GatherUtils/Algorithms.h        |   2 +-
 src/Functions/GeoHash.h                       |   2 +-
 src/Functions/PolygonUtils.h                  |   2 +-
 src/Functions/RapidJSONParser.h               |   2 +-
 src/Functions/SimdJSONParser.h                |   2 +-
 src/Functions/TargetSpecific.h                |   2 +-
 src/Functions/VectorExtension.h               |   2 +-
 src/Functions/abtesting.h                     |   2 +-
 src/Functions/formatString.h                  |   2 +-
 src/Functions/likePatternToRegexp.h           |   2 +-
 src/IO/BitHelpers.h                           |   2 +-
 src/IO/HexWriteBuffer.cpp                     |   2 +-
 src/IO/LimitReadBuffer.h                      |   2 +-
 src/IO/ReadWriteBufferFromHTTP.h              |   2 +-
 src/IO/S3Common.h                             |   2 +-
 src/IO/VarInt.h                               |   2 +-
 src/IO/WriteBufferFromS3.h                    |   2 +-
 src/IO/WriteBufferValidUTF8.cpp               |   2 +-
 src/IO/tests/gtest_bit_io.cpp                 |   2 +-
 src/IO/tests/gtest_peekable_read_buffer.cpp   |   2 +-
 src/IO/tests/o_direct_and_dirty_pages.cpp     |   2 +-
 src/IO/tests/read_buffer.cpp                  |   2 +-
 src/IO/tests/read_buffer_perf.cpp             |   2 +-
 src/IO/tests/read_float_perf.cpp              |   2 +-
 src/IO/tests/read_write_int.cpp               |   2 +-
 src/IO/tests/write_buffer.cpp                 |   2 +-
 src/IO/tests/write_buffer_perf.cpp            |   2 +-
 src/Interpreters/ActionLocksManager.h         |   2 +-
 src/Interpreters/Aliases.h                    |   2 +-
 src/Interpreters/BloomFilter.h                |   2 +-
 src/Interpreters/ClientInfo.h                 |   2 +-
 src/Interpreters/Context.h                    |   2 +-
 src/Interpreters/DatabaseAndTableWithAlias.h  |   2 +-
 src/Interpreters/ExternalLoader.h             |   2 +-
 .../ExternalLoaderTempConfigRepository.h      |   2 +-
 .../ExternalLoaderXMLConfigRepository.h       |   2 +-
 src/Interpreters/IExternalLoadable.h          |   2 +-
 .../InJoinSubqueriesPreprocessor.h            |   2 +-
 src/Interpreters/InterpreterSelectQuery.cpp   |   2 +-
 src/Interpreters/InterserverIOHandler.h       |   2 +-
 src/Interpreters/RequiredSourceColumnsData.h  |   2 +-
 src/Interpreters/RowRefs.cpp                  |   2 +-
 src/Interpreters/StorageID.h                  |   2 +-
 src/Interpreters/SystemLog.h                  |   2 +-
 src/Interpreters/TablesStatus.h               |   2 +-
 src/Interpreters/addTypeConversionToAST.h     |   2 +-
 src/Interpreters/tests/hash_map.cpp           |   2 +-
 src/Interpreters/tests/hash_map3.cpp          |   2 +-
 src/Interpreters/tests/hash_map_lookup.cpp    |   2 +-
 src/Interpreters/tests/hash_map_string.cpp    |   2 +-
 src/Interpreters/tests/hash_map_string_2.cpp  |   2 +-
 src/Interpreters/tests/hash_map_string_3.cpp  |   2 +-
 .../tests/hash_map_string_small.cpp           |   2 +-
 src/Interpreters/tests/string_hash_map.cpp    |   2 +-
 src/Interpreters/tests/two_level_hash_map.cpp |   2 +-
 .../ASTFunctionWithKeyValueArguments.h        |   2 +-
 src/Parsers/ASTRolesOrUsersSet.h              |   3 +
 src/Parsers/ASTShowCreateAccessEntityQuery.h  |   2 +
 src/Parsers/IAST.h                            |   2 +-
 src/Parsers/IParser.h                         |   2 +-
 src/Parsers/StringRange.h                     |   2 +-
 src/Parsers/formatSettingName.h               |   2 +-
 src/Parsers/tests/gtest_dictionary_parser.cpp |   2 +-
 src/Parsers/tests/lexer.cpp                   |   2 +-
 src/Parsers/tests/lexer_fuzzer.cpp            |   2 +-
 .../Formats/Impl/ArrowColumnToCHColumn.cpp    |   2 +-
 src/Server/StaticRequestHandler.h             |   2 +-
 src/Storages/CheckResults.h                   |   2 +-
 src/Storages/ColumnDependency.h               |   2 +-
 src/Storages/IStorage_fwd.h                   |   2 +-
 src/Storages/IndicesDescription.h             |   2 +-
 .../Kafka/ReadBufferFromKafkaConsumer.h       |   2 +-
 src/Storages/MergeTree/ActiveDataPartSet.h    |   5 +-
 .../MergeTree/BackgroundProcessingPool.h      |   2 +-
 src/Storages/MergeTree/IMergeTreeDataPart.h   |   2 +-
 src/Storages/MergeTree/MergeAlgorithm.h       |   2 +-
 .../MergeTree/MergeTreeDataFormatVersion.h    |   2 +-
 .../MergeTree/MergeTreeDataPartChecksum.h     |   2 +-
 .../MergeTree/MergeTreeDataPartType.h         |   2 +-
 .../MergeTree/MergeTreeIndexBloomFilter.cpp   |   2 +-
 .../MergeTree/MergeTreeIndexGranularityInfo.h |   2 +-
 .../MergeTree/MergeTreeMutationEntry.h        |   2 +-
 .../MergeTree/MergeTreeMutationStatus.h       |   2 +-
 src/Storages/MergeTree/MergeTreePartInfo.h    |   2 +-
 src/Storages/MergeTree/MergeTreePartition.h   |   2 +-
 src/Storages/MergeTree/MergeType.h            |   2 +-
 .../MergeTree/ReplicatedMergeTreeAddress.h    |   2 +-
 .../ReplicatedMergeTreeBlockOutputStream.h    |   2 +-
 .../ReplicatedMergeTreeCleanupThread.h        |   2 +-
 .../MergeTree/ReplicatedMergeTreeLogEntry.h   |   2 +-
 .../ReplicatedMergeTreeMutationEntry.h        |   2 +-
 .../ReplicatedMergeTreePartCheckThread.h      |   2 +-
 .../MergeTree/ReplicatedMergeTreePartHeader.h |   2 +-
 .../ReplicatedMergeTreeQuorumEntry.h          |   2 +-
 .../ReplicatedMergeTreeRestartingThread.h     |   2 +-
 .../ReplicatedMergeTreeTableMetadata.h        |   2 +-
 src/Storages/MergeTree/TTLMergeSelector.h     |   2 +-
 src/Storages/MergeTree/localBackup.h          |   2 +-
 src/Storages/PartitionCommands.h              |   2 +-
 .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h |   2 +-
 src/Storages/StorageLogSettings.h             |   2 +-
 src/Storages/StorageS3Settings.h              |   3 +-
 .../transformQueryForExternalDatabase.h       |   2 +-
 src/TableFunctions/TableFunctionNumbers.h     |   2 +-
 src/TableFunctions/TableFunctionView.h        |   2 +-
 src/TableFunctions/TableFunctionZeros.h       |   2 +-
 207 files changed, 354 insertions(+), 321 deletions(-)
 create mode 100644 base/common/extended_types.h

diff --git a/base/common/arithmeticOverflow.h b/base/common/arithmeticOverflow.h
index e228af287e2..c20fd635924 100644
--- a/base/common/arithmeticOverflow.h
+++ b/base/common/arithmeticOverflow.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <common/types.h>
+#include <common/extended_types.h>
 
 namespace common
 {
diff --git a/base/common/extended_types.h b/base/common/extended_types.h
new file mode 100644
index 00000000000..fe5f7184954
--- /dev/null
+++ b/base/common/extended_types.h
@@ -0,0 +1,108 @@
+#pragma once
+
+#include <type_traits>
+
+#include <common/types.h>
+#include <common/wide_integer.h>
+
+using Int128 = __int128;
+
+using wInt256 = wide::integer<256, signed>;
+using wUInt256 = wide::integer<256, unsigned>;
+
+static_assert(sizeof(wInt256) == 32);
+static_assert(sizeof(wUInt256) == 32);
+
+/// The standard library type traits, such as std::is_arithmetic, with one exception
+/// (std::common_type), are "set in stone". Attempting to specialize them causes undefined behavior.
+/// So instead of using the std type_traits, we use our own version which allows extension.
+template <typename T>
+struct is_signed
+{
+    static constexpr bool value = std::is_signed_v<T>;
+};
+
+template <> struct is_signed<Int128> { static constexpr bool value = true; };
+template <> struct is_signed<wInt256> { static constexpr bool value = true; };
+
+template <typename T>
+inline constexpr bool is_signed_v = is_signed<T>::value;
+
+template <typename T>
+struct is_unsigned
+{
+    static constexpr bool value = std::is_unsigned_v<T>;
+};
+
+template <> struct is_unsigned<wUInt256> { static constexpr bool value = true; };
+
+template <typename T>
+inline constexpr bool is_unsigned_v = is_unsigned<T>::value;
+
+
+/// TODO: is_integral includes char, char8_t and wchar_t.
+template <typename T>
+struct is_integer
+{
+    static constexpr bool value = std::is_integral_v<T>;
+};
+
+template <> struct is_integer<Int128> { static constexpr bool value = true; };
+template <> struct is_integer<wInt256> { static constexpr bool value = true; };
+template <> struct is_integer<wUInt256> { static constexpr bool value = true; };
+
+template <typename T>
+inline constexpr bool is_integer_v = is_integer<T>::value;
+
+
+template <typename T>
+struct is_arithmetic
+{
+    static constexpr bool value = std::is_arithmetic_v<T>;
+};
+
+template <> struct is_arithmetic<__int128> { static constexpr bool value = true; };
+
+template <typename T>
+inline constexpr bool is_arithmetic_v = is_arithmetic<T>::value;
+
+template <typename T>
+struct make_unsigned
+{
+    typedef std::make_unsigned_t<T> type;
+};
+
+template <> struct make_unsigned<Int128> { using type = unsigned __int128; };
+template <> struct make_unsigned<wInt256>  { using type = wUInt256; };
+template <> struct make_unsigned<wUInt256> { using type = wUInt256; };
+
+template <typename T> using make_unsigned_t = typename make_unsigned<T>::type;
+
+template <typename T>
+struct make_signed
+{
+    typedef std::make_signed_t<T> type;
+};
+
+template <> struct make_signed<wInt256>  { using type = wInt256; };
+template <> struct make_signed<wUInt256> { using type = wInt256; };
+
+template <typename T> using make_signed_t = typename make_signed<T>::type;
+
+template <typename T>
+struct is_big_int
+{
+    static constexpr bool value = false;
+};
+
+template <> struct is_big_int<wInt256> { static constexpr bool value = true; };
+template <> struct is_big_int<wUInt256> { static constexpr bool value = true; };
+
+template <typename T>
+inline constexpr bool is_big_int_v = is_big_int<T>::value;
+
+template <typename To, typename From>
+inline To bigint_cast(const From & x [[maybe_unused]])
+{
+    return static_cast<To>(x);
+}
diff --git a/base/common/types.h b/base/common/types.h
index 2982781ce1f..f3572da2972 100644
--- a/base/common/types.h
+++ b/base/common/types.h
@@ -2,9 +2,6 @@
 
 #include <cstdint>
 #include <string>
-#include <type_traits>
-
-#include <common/wide_integer.h>
 
 using Int8 = int8_t;
 using Int16 = int16_t;
@@ -21,106 +18,24 @@ using UInt16 = uint16_t;
 using UInt32 = uint32_t;
 using UInt64 = uint64_t;
 
-using Int128 = __int128;
+using String = std::string;
 
-using wInt256 = wide::integer<256, signed>;
-using wUInt256 = wide::integer<256, unsigned>;
+namespace DB
+{
 
-static_assert(sizeof(wInt256) == 32);
-static_assert(sizeof(wUInt256) == 32);
+using UInt8 = ::UInt8;
+using UInt16 = ::UInt16;
+using UInt32 = ::UInt32;
+using UInt64 = ::UInt64;
+
+using Int8 = ::Int8;
+using Int16 = ::Int16;
+using Int32 = ::Int32;
+using Int64 = ::Int64;
+
+using Float32 = float;
+using Float64 = double;
 
 using String = std::string;
 
-/// The standard library type traits, such as std::is_arithmetic, with one exception
-/// (std::common_type), are "set in stone". Attempting to specialize them causes undefined behavior.
-/// So instead of using the std type_traits, we use our own version which allows extension.
-template <typename T>
-struct is_signed
-{
-    static constexpr bool value = std::is_signed_v<T>;
-};
-
-template <> struct is_signed<Int128> { static constexpr bool value = true; };
-template <> struct is_signed<wInt256> { static constexpr bool value = true; };
-
-template <typename T>
-inline constexpr bool is_signed_v = is_signed<T>::value;
-
-template <typename T>
-struct is_unsigned
-{
-    static constexpr bool value = std::is_unsigned_v<T>;
-};
-
-template <> struct is_unsigned<wUInt256> { static constexpr bool value = true; };
-
-template <typename T>
-inline constexpr bool is_unsigned_v = is_unsigned<T>::value;
-
-
-/// TODO: is_integral includes char, char8_t and wchar_t.
-template <typename T>
-struct is_integer
-{
-    static constexpr bool value = std::is_integral_v<T>;
-};
-
-template <> struct is_integer<Int128> { static constexpr bool value = true; };
-template <> struct is_integer<wInt256> { static constexpr bool value = true; };
-template <> struct is_integer<wUInt256> { static constexpr bool value = true; };
-
-template <typename T>
-inline constexpr bool is_integer_v = is_integer<T>::value;
-
-
-template <typename T>
-struct is_arithmetic
-{
-    static constexpr bool value = std::is_arithmetic_v<T>;
-};
-
-template <> struct is_arithmetic<__int128> { static constexpr bool value = true; };
-
-template <typename T>
-inline constexpr bool is_arithmetic_v = is_arithmetic<T>::value;
-
-template <typename T>
-struct make_unsigned
-{
-    typedef std::make_unsigned_t<T> type;
-};
-
-template <> struct make_unsigned<Int128> { using type = unsigned __int128; };
-template <> struct make_unsigned<wInt256>  { using type = wUInt256; };
-template <> struct make_unsigned<wUInt256> { using type = wUInt256; };
-
-template <typename T> using make_unsigned_t = typename make_unsigned<T>::type;
-
-template <typename T>
-struct make_signed
-{
-    typedef std::make_signed_t<T> type;
-};
-
-template <> struct make_signed<wInt256>  { using type = wInt256; };
-template <> struct make_signed<wUInt256> { using type = wInt256; };
-
-template <typename T> using make_signed_t = typename make_signed<T>::type;
-
-template <typename T>
-struct is_big_int
-{
-    static constexpr bool value = false;
-};
-
-template <> struct is_big_int<wInt256> { static constexpr bool value = true; };
-template <> struct is_big_int<wUInt256> { static constexpr bool value = true; };
-
-template <typename T>
-inline constexpr bool is_big_int_v = is_big_int<T>::value;
-
-template <typename To, typename From>
-inline To bigint_cast(const From & x [[maybe_unused]])
-{
-    return static_cast<To>(x);
 }
diff --git a/src/Access/AccessFlags.h b/src/Access/AccessFlags.h
index 3cb92b6b855..049140586ea 100644
--- a/src/Access/AccessFlags.h
+++ b/src/Access/AccessFlags.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <Access/AccessType.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Common/Exception.h>
 #include <ext/range.h>
 #include <ext/push_back.h>
diff --git a/src/Access/AccessRights.h b/src/Access/AccessRights.h
index 8e150070f53..c610795ab45 100644
--- a/src/Access/AccessRights.h
+++ b/src/Access/AccessRights.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Access/AccessRightsElement.h>
 #include <memory>
 #include <vector>
diff --git a/src/Access/AccessType.h b/src/Access/AccessType.h
index dae86e62434..11896f628d9 100644
--- a/src/Access/AccessType.h
+++ b/src/Access/AccessType.h
@@ -1,13 +1,17 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <boost/algorithm/string/case_conv.hpp>
 #include <boost/algorithm/string/replace.hpp>
 #include <array>
+#include <vector>
 
 
 namespace DB
 {
+
+using Strings = std::vector<String>;
+
 /// Represents an access type which can be granted on databases, tables, columns, etc.
 enum class AccessType
 {
diff --git a/src/Access/AllowedClientHosts.h b/src/Access/AllowedClientHosts.h
index 2baafb2e04a..615782d75a2 100644
--- a/src/Access/AllowedClientHosts.h
+++ b/src/Access/AllowedClientHosts.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Poco/Net/IPAddress.h>
 #include <memory>
 #include <vector>
@@ -11,6 +11,9 @@
 
 namespace DB
 {
+
+using Strings = std::vector<String>;
+
 /// Represents lists of hosts an user is allowed to connect to server from.
 class AllowedClientHosts
 {
diff --git a/src/Access/Authentication.h b/src/Access/Authentication.h
index 35ff0fa1d32..38714339221 100644
--- a/src/Access/Authentication.h
+++ b/src/Access/Authentication.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Common/Exception.h>
 #include <Common/OpenSSLHelpers.h>
 #include <Poco/SHA1Engine.h>
diff --git a/src/Access/EnabledRowPolicies.h b/src/Access/EnabledRowPolicies.h
index b92939afb03..0ca4f16fcf1 100644
--- a/src/Access/EnabledRowPolicies.h
+++ b/src/Access/EnabledRowPolicies.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <Access/RowPolicy.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Core/UUID.h>
 #include <boost/smart_ptr/atomic_shared_ptr.hpp>
 #include <unordered_map>
diff --git a/src/Access/EnabledSettings.h b/src/Access/EnabledSettings.h
index cc30e4481fc..80635ca4542 100644
--- a/src/Access/EnabledSettings.h
+++ b/src/Access/EnabledSettings.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Core/UUID.h>
 #include <Access/SettingsConstraints.h>
 #include <Access/SettingsProfileElement.h>
diff --git a/src/Access/ExternalAuthenticators.h b/src/Access/ExternalAuthenticators.h
index 54af87604a6..7484996c472 100644
--- a/src/Access/ExternalAuthenticators.h
+++ b/src/Access/ExternalAuthenticators.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <Access/LDAPParams.h>
-#include <Core/Types.h>
+#include <common/types.h>
 
 #include <map>
 #include <memory>
diff --git a/src/Access/IAccessEntity.h b/src/Access/IAccessEntity.h
index 68e14c99982..18b450bff5c 100644
--- a/src/Access/IAccessEntity.h
+++ b/src/Access/IAccessEntity.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Common/typeid_cast.h>
 #include <Common/quoteString.h>
 #include <boost/algorithm/string.hpp>
diff --git a/src/Access/IAccessStorage.h b/src/Access/IAccessStorage.h
index 7851f8c9b6b..d91927e79d9 100644
--- a/src/Access/IAccessStorage.h
+++ b/src/Access/IAccessStorage.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <Access/IAccessEntity.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Core/UUID.h>
 #include <ext/scope_guard.h>
 #include <functional>
diff --git a/src/Access/LDAPClient.h b/src/Access/LDAPClient.h
index 5aad2ed3061..b117ed9a026 100644
--- a/src/Access/LDAPClient.h
+++ b/src/Access/LDAPClient.h
@@ -5,7 +5,7 @@
 #endif
 
 #include <Access/LDAPParams.h>
-#include <Core/Types.h>
+#include <common/types.h>
 
 #if USE_LDAP
 #   include <ldap.h>
diff --git a/src/Access/LDAPParams.h b/src/Access/LDAPParams.h
index 0d7c7dd17cd..2168ce45203 100644
--- a/src/Access/LDAPParams.h
+++ b/src/Access/LDAPParams.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 
 #include <chrono>
 
diff --git a/src/Access/SettingsProfilesCache.h b/src/Access/SettingsProfilesCache.h
index 42dd05df351..ef3cfa51665 100644
--- a/src/Access/SettingsProfilesCache.h
+++ b/src/Access/SettingsProfilesCache.h
@@ -2,7 +2,7 @@
 
 #include <Access/EnabledSettings.h>
 #include <Core/UUID.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <ext/scope_guard.h>
 #include <map>
 #include <unordered_map>
diff --git a/src/AggregateFunctions/AggregateFunctionRankCorrelation.h b/src/AggregateFunctions/AggregateFunctionRankCorrelation.h
index 379a8332f09..15057940ebd 100644
--- a/src/AggregateFunctions/AggregateFunctionRankCorrelation.h
+++ b/src/AggregateFunctions/AggregateFunctionRankCorrelation.h
@@ -6,7 +6,7 @@
 #include <Columns/ColumnTuple.h>
 #include <Common/assert_cast.h>
 #include <Common/FieldVisitors.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <DataTypes/DataTypesDecimal.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypesNumber.h>
diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h
index 7e6b7abbd28..b9656c31fa3 100644
--- a/src/AggregateFunctions/IAggregateFunction.h
+++ b/src/AggregateFunctions/IAggregateFunction.h
@@ -5,7 +5,7 @@
 #include <vector>
 #include <type_traits>
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Core/ColumnNumbers.h>
 #include <Core/Block.h>
 #include <Common/Exception.h>
diff --git a/src/AggregateFunctions/QuantileExact.h b/src/AggregateFunctions/QuantileExact.h
index da0f644721b..3f5a0907126 100644
--- a/src/AggregateFunctions/QuantileExact.h
+++ b/src/AggregateFunctions/QuantileExact.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <algorithm>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <IO/ReadBuffer.h>
 #include <IO/VarInt.h>
 #include <IO/WriteBuffer.h>
diff --git a/src/Columns/ColumnsNumber.h b/src/Columns/ColumnsNumber.h
index c206b37a588..96ce2bd6d6f 100644
--- a/src/Columns/ColumnsNumber.h
+++ b/src/Columns/ColumnsNumber.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Columns/ColumnVector.h>
 
 
diff --git a/src/Common/BitonicSort.h b/src/Common/BitonicSort.h
index 6bf10ebe835..8140687c040 100644
--- a/src/Common/BitonicSort.h
+++ b/src/Common/BitonicSort.h
@@ -12,7 +12,7 @@
 #endif
 
 #include <ext/bit_cast.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Core/Defines.h>
 #include <Common/PODArray.h>
 #include <Columns/ColumnsCommon.h>
diff --git a/src/Common/Config/AbstractConfigurationComparison.h b/src/Common/Config/AbstractConfigurationComparison.h
index f0d126a578a..f825ad4e53d 100644
--- a/src/Common/Config/AbstractConfigurationComparison.h
+++ b/src/Common/Config/AbstractConfigurationComparison.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 
 namespace Poco::Util
 {
diff --git a/src/Common/CpuId.h b/src/Common/CpuId.h
index 1548ff6cc40..2db247173a6 100644
--- a/src/Common/CpuId.h
+++ b/src/Common/CpuId.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 
 #if defined(__x86_64__) || defined(__i386__)
 #include <cpuid.h>
diff --git a/src/Common/CurrentMetrics.h b/src/Common/CurrentMetrics.h
index 09accf96010..eabeca7a0e9 100644
--- a/src/Common/CurrentMetrics.h
+++ b/src/Common/CurrentMetrics.h
@@ -4,7 +4,7 @@
 #include <cstdint>
 #include <utility>
 #include <atomic>
-#include <Core/Types.h>
+#include <common/types.h>
 
 /** Allows to count number of simultaneously happening processes or current value of some metric.
   *  - for high-level profiling.
diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp
index d61982f3406..9059d2838bb 100644
--- a/src/Common/DNSResolver.cpp
+++ b/src/Common/DNSResolver.cpp
@@ -3,7 +3,7 @@
 #include <Common/Exception.h>
 #include <Common/ProfileEvents.h>
 #include <Core/Names.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Poco/Net/IPAddress.h>
 #include <Poco/Net/DNS.h>
 #include <Poco/Net/NetException.h>
diff --git a/src/Common/DNSResolver.h b/src/Common/DNSResolver.h
index 7dbc2852d43..57c28188f58 100644
--- a/src/Common/DNSResolver.h
+++ b/src/Common/DNSResolver.h
@@ -2,7 +2,7 @@
 #include <Poco/Net/IPAddress.h>
 #include <Poco/Net/SocketAddress.h>
 #include <memory>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Core/Names.h>
 #include <boost/noncopyable.hpp>
 #include <common/logger_useful.h>
diff --git a/src/Common/ExternalLoaderStatus.h b/src/Common/ExternalLoaderStatus.h
index 44536198b82..d8852eb6152 100644
--- a/src/Common/ExternalLoaderStatus.h
+++ b/src/Common/ExternalLoaderStatus.h
@@ -3,7 +3,7 @@
 #include <vector>
 #include <utility>
 #include <ostream>
-#include <Core/Types.h>
+#include <common/types.h>
 
 namespace DB
 {
diff --git a/src/Common/HashTable/Hash.h b/src/Common/HashTable/Hash.h
index c561933ab80..abd1a69545f 100644
--- a/src/Common/HashTable/Hash.h
+++ b/src/Common/HashTable/Hash.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Core/BigInt.h>
 #include <Common/UInt128.h>
 #include <common/unaligned.h>
diff --git a/src/Common/HashTable/HashTable.h b/src/Common/HashTable/HashTable.h
index 5c8e7917eb0..baad5d40764 100644
--- a/src/Common/HashTable/HashTable.h
+++ b/src/Common/HashTable/HashTable.h
@@ -9,7 +9,7 @@
 #include <boost/noncopyable.hpp>
 
 #include <Core/Defines.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Common/Exception.h>
 
 #include <IO/WriteBuffer.h>
diff --git a/src/Common/IFactoryWithAliases.h b/src/Common/IFactoryWithAliases.h
index 994b2c1a02c..11ebf31db33 100644
--- a/src/Common/IFactoryWithAliases.h
+++ b/src/Common/IFactoryWithAliases.h
@@ -2,7 +2,7 @@
 
 #include <Common/Exception.h>
 #include <Common/NamePrompter.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Poco/String.h>
 
 #include <unordered_map>
diff --git a/src/Common/IntervalKind.h b/src/Common/IntervalKind.h
index 91c3eb14043..a086d0d2b0c 100644
--- a/src/Common/IntervalKind.h
+++ b/src/Common/IntervalKind.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 
 
 namespace DB
diff --git a/src/Common/Macros.h b/src/Common/Macros.h
index cee133b0ccb..bcd6075782e 100644
--- a/src/Common/Macros.h
+++ b/src/Common/Macros.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Core/Names.h>
 #include <Interpreters/StorageID.h>
 
diff --git a/src/Common/NaNUtils.h b/src/Common/NaNUtils.h
index 7d727fb7793..3b393fad41e 100644
--- a/src/Common/NaNUtils.h
+++ b/src/Common/NaNUtils.h
@@ -4,7 +4,7 @@
 #include <limits>
 #include <type_traits>
 
-#include <common/types.h>
+#include <common/extended_types.h>
 
 
 /// To be sure, that this function is zero-cost for non-floating point types.
diff --git a/src/Common/NamePrompter.h b/src/Common/NamePrompter.h
index a52a5f3775e..5f7832c4423 100644
--- a/src/Common/NamePrompter.h
+++ b/src/Common/NamePrompter.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Common/PODArray.h>
 
 #include <algorithm>
diff --git a/src/Common/OpenSSLHelpers.h b/src/Common/OpenSSLHelpers.h
index e77fc3037c1..2560664de9e 100644
--- a/src/Common/OpenSSLHelpers.h
+++ b/src/Common/OpenSSLHelpers.h
@@ -5,7 +5,7 @@
 #endif
 
 #if USE_SSL
-#    include <Core/Types.h>
+#    include <common/types.h>
 
 
 namespace DB
diff --git a/src/Common/PoolWithFailoverBase.h b/src/Common/PoolWithFailoverBase.h
index f206278fbda..a328e15e4e5 100644
--- a/src/Common/PoolWithFailoverBase.h
+++ b/src/Common/PoolWithFailoverBase.h
@@ -7,7 +7,6 @@
 #include <functional>
 #include <common/types.h>
 #include <ext/scope_guard.h>
-#include <Core/Types.h>
 #include <Common/PoolBase.h>
 #include <Common/ProfileEvents.h>
 #include <Common/NetException.h>
diff --git a/src/Common/QueryProfiler.h b/src/Common/QueryProfiler.h
index 44eeebbf10a..8e2d09e0be2 100644
--- a/src/Common/QueryProfiler.h
+++ b/src/Common/QueryProfiler.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <signal.h>
 #include <time.h>
 
diff --git a/src/Common/RWLock.h b/src/Common/RWLock.h
index ad0a3f139fc..952c8049a0f 100644
--- a/src/Common/RWLock.h
+++ b/src/Common/RWLock.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 
 #include <chrono>
 #include <list>
diff --git a/src/Common/RadixSort.h b/src/Common/RadixSort.h
index cbb8badab4a..22e93a2c324 100644
--- a/src/Common/RadixSort.h
+++ b/src/Common/RadixSort.h
@@ -13,7 +13,7 @@
 #include <type_traits>
 
 #include <ext/bit_cast.h>
-#include <Core/Types.h>
+#include <common/extended_types.h>
 #include <Core/Defines.h>
 
 
diff --git a/src/Common/StatusInfo.h b/src/Common/StatusInfo.h
index 89365f0634f..de92bb838ba 100644
--- a/src/Common/StatusInfo.h
+++ b/src/Common/StatusInfo.h
@@ -4,7 +4,8 @@
 #include <cstdint>
 #include <utility>
 #include <atomic>
-#include <Core/Types.h>
+#include <vector>
+#include <common/types.h>
 #include <mutex>
 #include <unordered_map>
 
diff --git a/src/Common/TaskStatsInfoGetter.cpp b/src/Common/TaskStatsInfoGetter.cpp
index 40b92917343..92978a0ad8c 100644
--- a/src/Common/TaskStatsInfoGetter.cpp
+++ b/src/Common/TaskStatsInfoGetter.cpp
@@ -1,6 +1,6 @@
 #include "TaskStatsInfoGetter.h"
 #include <Common/Exception.h>
-#include <Core/Types.h>
+#include <common/types.h>
 
 #include <unistd.h>
 
diff --git a/src/Common/TaskStatsInfoGetter.h b/src/Common/TaskStatsInfoGetter.h
index 6865c64dc38..00ecf91c475 100644
--- a/src/Common/TaskStatsInfoGetter.h
+++ b/src/Common/TaskStatsInfoGetter.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <sys/types.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <boost/noncopyable.hpp>
 
 struct taskstats;
diff --git a/src/Common/ThreadProfileEvents.h b/src/Common/ThreadProfileEvents.h
index 6bec7b38db5..69db595b426 100644
--- a/src/Common/ThreadProfileEvents.h
+++ b/src/Common/ThreadProfileEvents.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Common/ProfileEvents.h>
 #include <sys/time.h>
 #include <sys/resource.h>
diff --git a/src/Common/UTF8Helpers.h b/src/Common/UTF8Helpers.h
index 129a745afe2..e795b6846b2 100644
--- a/src/Common/UTF8Helpers.h
+++ b/src/Common/UTF8Helpers.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Common/BitHelpers.h>
 #include <Poco/UTF8Encoding.h>
 
diff --git a/src/Common/UnicodeBar.h b/src/Common/UnicodeBar.h
index 13c39f680aa..9a5bcecbd62 100644
--- a/src/Common/UnicodeBar.h
+++ b/src/Common/UnicodeBar.h
@@ -3,7 +3,7 @@
 #include <cstring>
 #include <cmath>
 #include <string>
-#include <Core/Types.h>
+#include <common/types.h>
 
 #define UNICODE_BAR_CHAR_SIZE (strlen("█"))
 
diff --git a/src/Common/Volnitsky.h b/src/Common/Volnitsky.h
index af97dbdae13..a1fa83b4f33 100644
--- a/src/Common/Volnitsky.h
+++ b/src/Common/Volnitsky.h
@@ -4,7 +4,7 @@
 #include <vector>
 #include <stdint.h>
 #include <string.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Poco/Unicode.h>
 #include <Common/StringSearcher.h>
 #include <Common/StringUtils/StringUtils.h>
diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h
index 409c3838147..9d4a2ebb16a 100644
--- a/src/Common/ZooKeeper/IKeeper.h
+++ b/src/Common/ZooKeeper/IKeeper.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Common/Exception.h>
 
 #include <vector>
diff --git a/src/Common/ZooKeeper/TestKeeper.cpp b/src/Common/ZooKeeper/TestKeeper.cpp
index 1b203d92fb8..4f7beadef5f 100644
--- a/src/Common/ZooKeeper/TestKeeper.cpp
+++ b/src/Common/ZooKeeper/TestKeeper.cpp
@@ -1,7 +1,7 @@
 #include <Common/ZooKeeper/TestKeeper.h>
 #include <Common/setThreadName.h>
 #include <Common/StringUtils/StringUtils.h>
-#include <Core/Types.h>
+#include <common/types.h>
 
 #include <sstream>
 #include <iomanip>
diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.h b/src/Common/ZooKeeper/ZooKeeperImpl.h
index 305ee46d58a..085b0e9856a 100644
--- a/src/Common/ZooKeeper/ZooKeeperImpl.h
+++ b/src/Common/ZooKeeper/ZooKeeperImpl.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Common/ConcurrentBoundedQueue.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/ThreadPool.h>
diff --git a/src/Common/createHardLink.h b/src/Common/createHardLink.h
index 8f8e5c27d9f..c2b01cf817b 100644
--- a/src/Common/createHardLink.h
+++ b/src/Common/createHardLink.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 
 namespace DB
 {
diff --git a/src/Common/filesystemHelpers.h b/src/Common/filesystemHelpers.h
index 80a1cf10cb4..f97f91d2647 100644
--- a/src/Common/filesystemHelpers.h
+++ b/src/Common/filesystemHelpers.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Common/Exception.h>
 
 #include <filesystem>
diff --git a/src/Common/intExp.h b/src/Common/intExp.h
index 8a52015c54a..bc977a41d33 100644
--- a/src/Common/intExp.h
+++ b/src/Common/intExp.h
@@ -3,7 +3,7 @@
 #include <cstdint>
 #include <limits>
 
-#include <common/types.h>
+#include <common/extended_types.h>
 
 // Also defined in Core/Defines.h
 #if !defined(NO_SANITIZE_UNDEFINED)
diff --git a/src/Common/isLocalAddress.cpp b/src/Common/isLocalAddress.cpp
index 3e81ecd935c..8da281e3051 100644
--- a/src/Common/isLocalAddress.cpp
+++ b/src/Common/isLocalAddress.cpp
@@ -1,7 +1,7 @@
 #include <Common/isLocalAddress.h>
 
 #include <cstring>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Poco/Util/Application.h>
 #include <Poco/Net/NetworkInterface.h>
 #include <Poco/Net/SocketAddress.h>
diff --git a/src/Common/oclBasics.h b/src/Common/oclBasics.h
index 7c977830e82..a3e7636af1b 100644
--- a/src/Common/oclBasics.h
+++ b/src/Common/oclBasics.h
@@ -14,7 +14,7 @@
 #endif
 
 #include <algorithm>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Common/Exception.h>
 
 
diff --git a/src/Common/parseRemoteDescription.h b/src/Common/parseRemoteDescription.h
index cbc73380628..6ba0bb4737f 100644
--- a/src/Common/parseRemoteDescription.h
+++ b/src/Common/parseRemoteDescription.h
@@ -1,5 +1,5 @@
 #pragma once
-#include <Core/Types.h>
+#include <common/types.h>
 #include <vector>
 namespace DB
 {
diff --git a/src/Common/quoteString.h b/src/Common/quoteString.h
index 426034e4803..3d395a35b03 100644
--- a/src/Common/quoteString.h
+++ b/src/Common/quoteString.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <common/StringRef.h>
 
 
diff --git a/src/Common/randomSeed.cpp b/src/Common/randomSeed.cpp
index 4d466d283c9..8ad624febdd 100644
--- a/src/Common/randomSeed.cpp
+++ b/src/Common/randomSeed.cpp
@@ -4,7 +4,7 @@
 #include <Common/Exception.h>
 #include <Common/randomSeed.h>
 #include <Common/SipHash.h>
-#include <Core/Types.h>
+#include <common/types.h>
 
 
 namespace DB
diff --git a/src/Common/randomSeed.h b/src/Common/randomSeed.h
index e2b8310f79c..4f04e4b974a 100644
--- a/src/Common/randomSeed.h
+++ b/src/Common/randomSeed.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <cstdint>
-#include <Core/Types.h>
+#include <common/types.h>
 
 /** Returns a number suitable as seed for PRNG. Use clock_gettime, pid and so on. */
 DB::UInt64 randomSeed();
diff --git a/src/Common/tests/average.cpp b/src/Common/tests/average.cpp
index 900e99ee752..5f3b13af8e8 100644
--- a/src/Common/tests/average.cpp
+++ b/src/Common/tests/average.cpp
@@ -3,7 +3,7 @@
 
 #include <fmt/format.h>
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Common/PODArray.h>
 #include <Common/HashTable/FixedHashMap.h>
 #include <Common/Arena.h>
diff --git a/src/Common/tests/gtest_shell_command.cpp b/src/Common/tests/gtest_shell_command.cpp
index 057a4d22648..4d578422962 100644
--- a/src/Common/tests/gtest_shell_command.cpp
+++ b/src/Common/tests/gtest_shell_command.cpp
@@ -1,5 +1,5 @@
 #include <iostream>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Common/ShellCommand.h>
 #include <IO/copyData.h>
 #include <IO/WriteBufferFromFileDescriptor.h>
diff --git a/src/Common/tests/integer_hash_tables_and_hashes.cpp b/src/Common/tests/integer_hash_tables_and_hashes.cpp
index 5b090fa6e4e..f5d9150a6ad 100644
--- a/src/Common/tests/integer_hash_tables_and_hashes.cpp
+++ b/src/Common/tests/integer_hash_tables_and_hashes.cpp
@@ -12,7 +12,7 @@
 //#define DBMS_HASH_MAP_COUNT_COLLISIONS
 //#define DBMS_HASH_MAP_DEBUG_RESIZES
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <IO/ReadBufferFromFile.h>
 #include <Compression/CompressedReadBuffer.h>
 #include <Common/HashTable/HashMap.h>
diff --git a/src/Common/tests/pod_array.cpp b/src/Common/tests/pod_array.cpp
index 6e9634ba3cf..7ebf2670271 100644
--- a/src/Common/tests/pod_array.cpp
+++ b/src/Common/tests/pod_array.cpp
@@ -1,5 +1,5 @@
 #include <Common/PODArray.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <iostream>
 
 #define ASSERT_CHECK(cond, res)                        \
diff --git a/src/Compression/CompressedWriteBuffer.cpp b/src/Compression/CompressedWriteBuffer.cpp
index 092da9e4364..02f418dcdf7 100644
--- a/src/Compression/CompressedWriteBuffer.cpp
+++ b/src/Compression/CompressedWriteBuffer.cpp
@@ -2,7 +2,7 @@
 #include <string.h>
 
 #include <common/unaligned.h>
-#include <Core/Types.h>
+#include <common/types.h>
 
 #include "CompressedWriteBuffer.h"
 #include <Compression/CompressionFactory.h>
diff --git a/src/Compression/CompressionCodecT64.h b/src/Compression/CompressionCodecT64.h
index 9671eb81ce1..06c34ba0a4a 100644
--- a/src/Compression/CompressionCodecT64.h
+++ b/src/Compression/CompressionCodecT64.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Compression/ICompressionCodec.h>
 
 
diff --git a/src/Compression/ICompressionCodec.h b/src/Compression/ICompressionCodec.h
index 8f72ba55200..8d7d3fc800c 100644
--- a/src/Compression/ICompressionCodec.h
+++ b/src/Compression/ICompressionCodec.h
@@ -3,7 +3,7 @@
 #include <memory>
 #include <boost/noncopyable.hpp>
 #include <Compression/CompressionInfo.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Parsers/IAST.h>
 #include <Common/SipHash.h>
 
diff --git a/src/Compression/tests/gtest_compressionCodec.cpp b/src/Compression/tests/gtest_compressionCodec.cpp
index 4677efce5da..e9470536ae8 100644
--- a/src/Compression/tests/gtest_compressionCodec.cpp
+++ b/src/Compression/tests/gtest_compressionCodec.cpp
@@ -2,7 +2,7 @@
 
 #include <Common/PODArray.h>
 #include <Common/Stopwatch.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/IDataType.h>
 #include <IO/ReadBufferFromMemory.h>
diff --git a/src/Core/BlockInfo.cpp b/src/Core/BlockInfo.cpp
index 78ee165bad1..9f88513cd3c 100644
--- a/src/Core/BlockInfo.cpp
+++ b/src/Core/BlockInfo.cpp
@@ -1,4 +1,4 @@
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Common/Exception.h>
 #include <IO/ReadBuffer.h>
 #include <IO/WriteBuffer.h>
diff --git a/src/Core/BlockInfo.h b/src/Core/BlockInfo.h
index 886ecd96ef4..c8dd1576b22 100644
--- a/src/Core/BlockInfo.h
+++ b/src/Core/BlockInfo.h
@@ -2,7 +2,7 @@
 
 #include <unordered_map>
 
-#include <Core/Types.h>
+#include <common/types.h>
 
 
 namespace DB
diff --git a/src/Core/DecimalFunctions.h b/src/Core/DecimalFunctions.h
index b821d29dd0d..cd5a2b5a670 100644
--- a/src/Core/DecimalFunctions.h
+++ b/src/Core/DecimalFunctions.h
@@ -1,5 +1,4 @@
 #pragma once
-// Moved Decimal-related functions out from Core/Types.h to reduce compilation time.
 
 #include <Core/Types.h>
 #include <Common/Exception.h>
diff --git a/src/Core/MySQL/Authentication.h b/src/Core/MySQL/Authentication.h
index 3874655e523..e1b7c174139 100644
--- a/src/Core/MySQL/Authentication.h
+++ b/src/Core/MySQL/Authentication.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Interpreters/Context.h>
 #include <Core/MySQL/PacketEndpoint.h>
 
diff --git a/src/Core/MySQL/MySQLClient.h b/src/Core/MySQL/MySQLClient.h
index 3fb86b35833..a31794acc42 100644
--- a/src/Core/MySQL/MySQLClient.h
+++ b/src/Core/MySQL/MySQLClient.h
@@ -1,5 +1,5 @@
 #pragma once
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Core/MySQL/MySQLReplication.h>
 #include <IO/ReadBufferFromPocoSocket.h>
 #include <IO/ReadHelpers.h>
diff --git a/src/Core/MySQL/MySQLReplication.h b/src/Core/MySQL/MySQLReplication.h
index b63b103e87a..ad5e53ed200 100644
--- a/src/Core/MySQL/MySQLReplication.h
+++ b/src/Core/MySQL/MySQLReplication.h
@@ -2,7 +2,7 @@
 #include <Core/Field.h>
 #include <Core/MySQL/PacketsReplication.h>
 #include <Core/MySQL/MySQLGtid.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <IO/ReadBuffer.h>
 #include <IO/WriteBuffer.h>
 
diff --git a/src/Core/Protocol.h b/src/Core/Protocol.h
index bc97e5d47d4..15630d0a6f8 100644
--- a/src/Core/Protocol.h
+++ b/src/Core/Protocol.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 
 
 namespace DB
diff --git a/src/Core/QueryProcessingStage.h b/src/Core/QueryProcessingStage.h
index 658b504fc2c..b1ed4709df2 100644
--- a/src/Core/QueryProcessingStage.h
+++ b/src/Core/QueryProcessingStage.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 
 
 namespace DB
diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h
index 270d0c7c7d0..1a5676bd8a8 100644
--- a/src/Core/SettingsFields.h
+++ b/src/Core/SettingsFields.h
@@ -2,7 +2,7 @@
 
 #include <Poco/Timespan.h>
 #include <Poco/URI.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Core/Field.h>
 #include <Core/MultiEnum.h>
 #include <boost/range/adaptor/map.hpp>
diff --git a/src/Core/Types.h b/src/Core/Types.h
index c23ac4a1379..3157598adc0 100644
--- a/src/Core/Types.h
+++ b/src/Core/Types.h
@@ -3,7 +3,7 @@
 #include <cstdint>
 #include <string>
 #include <vector>
-#include <common/types.h>
+#include <common/extended_types.h>
 
 
 namespace DB
@@ -13,6 +13,11 @@ namespace DB
 
 struct Null {};
 
+/// Ignore strange gcc warning https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55776
+#if !__clang__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+#endif
 /// @note Except explicitly described you should not assume on TypeIndex numbers and/or their orders in this enum.
 enum class TypeIndex
 {
@@ -52,27 +57,15 @@ enum class TypeIndex
     AggregateFunction,
     LowCardinality,
 };
+#if !__clang__
+#pragma GCC diagnostic pop
+#endif
 
-/// defined in common/types.h
-using UInt8 = ::UInt8;
-using UInt16 = ::UInt16;
-using UInt32 = ::UInt32;
-using UInt64 = ::UInt64;
+/// Other int defines are in common/types.h
 using UInt256 = ::wUInt256;
-
-using Int8 = ::Int8;
-using Int16 = ::Int16;
-using Int32 = ::Int32;
-using Int64 = ::Int64;
 using Int128 = ::Int128;
 using Int256 = ::wInt256;
 
-using Float32 = float;
-using Float64 = double;
-
-using String = std::string;
-
-
 /** Note that for types not used in DB, IsNumber is false.
   */
 template <typename T> constexpr bool IsNumber = false;
diff --git a/src/Core/tests/gtest_multienum.cpp b/src/Core/tests/gtest_multienum.cpp
index 70c7699aa5c..91cee6b316a 100644
--- a/src/Core/tests/gtest_multienum.cpp
+++ b/src/Core/tests/gtest_multienum.cpp
@@ -1,6 +1,6 @@
 #include <gtest/gtest.h>
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <type_traits>
 #include <Core/MultiEnum.h>
 
diff --git a/src/DataStreams/BlockStreamProfileInfo.h b/src/DataStreams/BlockStreamProfileInfo.h
index 5f75cf9ddea..d068db89641 100644
--- a/src/DataStreams/BlockStreamProfileInfo.h
+++ b/src/DataStreams/BlockStreamProfileInfo.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <DataStreams/IBlockStream_fwd.h>
 #include <Common/Stopwatch.h>
 
diff --git a/src/DataStreams/ExecutionSpeedLimits.h b/src/DataStreams/ExecutionSpeedLimits.h
index 8f098bfd6b4..9ab58e12cf4 100644
--- a/src/DataStreams/ExecutionSpeedLimits.h
+++ b/src/DataStreams/ExecutionSpeedLimits.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <Poco/Timespan.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <DataStreams/SizeLimits.h>
 
 namespace DB
diff --git a/src/DataStreams/MarkInCompressedFile.h b/src/DataStreams/MarkInCompressedFile.h
index 62886ffad57..94ff5414762 100644
--- a/src/DataStreams/MarkInCompressedFile.h
+++ b/src/DataStreams/MarkInCompressedFile.h
@@ -2,7 +2,7 @@
 
 #include <tuple>
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <IO/WriteHelpers.h>
 #include <Common/PODArray.h>
 
diff --git a/src/DataStreams/NativeBlockOutputStream.h b/src/DataStreams/NativeBlockOutputStream.h
index 720a779ec5e..64ccd267634 100644
--- a/src/DataStreams/NativeBlockOutputStream.h
+++ b/src/DataStreams/NativeBlockOutputStream.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <DataStreams/IBlockOutputStream.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <DataTypes/IDataType.h>
 
 namespace DB
diff --git a/src/DataTypes/convertMySQLDataType.cpp b/src/DataTypes/convertMySQLDataType.cpp
index 23899ea197a..a509cf8b091 100644
--- a/src/DataTypes/convertMySQLDataType.cpp
+++ b/src/DataTypes/convertMySQLDataType.cpp
@@ -1,7 +1,7 @@
 #include "convertMySQLDataType.h"
 
 #include <Core/Field.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Core/MultiEnum.h>
 #include <Core/SettingsEnums.h>
 #include <Parsers/ASTFunction.h>
diff --git a/src/Databases/DatabasesCommon.h b/src/Databases/DatabasesCommon.h
index 4c7ec1ec637..5e1e555a524 100644
--- a/src/Databases/DatabasesCommon.h
+++ b/src/Databases/DatabasesCommon.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/IAST.h>
 #include <Storages/IStorage_fwd.h>
diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h
index d82755a7bc8..b28bd5fd599 100644
--- a/src/Databases/IDatabase.h
+++ b/src/Databases/IDatabase.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Parsers/IAST_fwd.h>
 #include <Storages/IStorage_fwd.h>
 #include <Storages/StorageInMemoryMetadata.h>
diff --git a/src/Databases/MySQL/MaterializeMetadata.h b/src/Databases/MySQL/MaterializeMetadata.h
index c036ea77940..5e77620e365 100644
--- a/src/Databases/MySQL/MaterializeMetadata.h
+++ b/src/Databases/MySQL/MaterializeMetadata.h
@@ -6,7 +6,7 @@
 
 #if USE_MYSQL
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Core/MySQL/MySQLReplication.h>
 #include <mysqlxx/Connection.h>
 #include <mysqlxx/PoolWithFailover.h>
diff --git a/src/Dictionaries/PolygonDictionaryUtils.h b/src/Dictionaries/PolygonDictionaryUtils.h
index 11ec28502af..cd99717f98a 100644
--- a/src/Dictionaries/PolygonDictionaryUtils.h
+++ b/src/Dictionaries/PolygonDictionaryUtils.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Common/ThreadPool.h>
 #include <Poco/Logger.h>
 
@@ -25,8 +25,8 @@ using Ring = IPolygonDictionary::Ring;
 using Box = bg::model::box<IPolygonDictionary::Point>;
 
 /** SlabsPolygonIndex builds index based on shooting ray down from point.
-  * When this ray crosses odd number of edges in single polygon, point is considered inside. 
-  * 
+  * When this ray crosses odd number of edges in single polygon, point is considered inside.
+  *
   * SlabsPolygonIndex divides plane into vertical slabs, separated by vertical lines going through all points.
   * For each slab, all edges falling in that slab are effectively stored.
   * For each find query, required slab is found with binary search, and result is computed
diff --git a/src/Dictionaries/tests/gtest_dictionary_configuration.cpp b/src/Dictionaries/tests/gtest_dictionary_configuration.cpp
index fc99a34cd42..453ce2b81f0 100644
--- a/src/Dictionaries/tests/gtest_dictionary_configuration.cpp
+++ b/src/Dictionaries/tests/gtest_dictionary_configuration.cpp
@@ -1,4 +1,4 @@
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Poco/Util/XMLConfiguration.h>
 #include <Parsers/ASTCreateQuery.h>
 #include <Parsers/DumpASTNode.h>
diff --git a/src/Disks/DiskFactory.h b/src/Disks/DiskFactory.h
index 50520381552..d41f14bd753 100644
--- a/src/Disks/DiskFactory.h
+++ b/src/Disks/DiskFactory.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Disks/IDisk.h>
 
 #include <functional>
diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h
index 47387fb370a..688c1dfad42 100644
--- a/src/Disks/IDisk.h
+++ b/src/Disks/IDisk.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <Core/Defines.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/Exception.h>
 #include <Disks/Executor.h>
diff --git a/src/Disks/S3/ProxyConfiguration.h b/src/Disks/S3/ProxyConfiguration.h
index 62aec0e005e..32a1c8d3c45 100644
--- a/src/Disks/S3/ProxyConfiguration.h
+++ b/src/Disks/S3/ProxyConfiguration.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <utility>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <aws/core/client/ClientConfiguration.h>
 #include <Poco/URI.h>
 
diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h
index 54bff1eefc6..de53490dd3b 100644
--- a/src/Formats/FormatFactory.h
+++ b/src/Formats/FormatFactory.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Columns/IColumn.h>
 #include <DataStreams/IBlockStream_fwd.h>
 #include <IO/BufferWithOwnMemory.h>
diff --git a/src/Formats/FormatSchemaInfo.h b/src/Formats/FormatSchemaInfo.h
index 7af0d56a0cf..67f1baca84b 100644
--- a/src/Formats/FormatSchemaInfo.h
+++ b/src/Formats/FormatSchemaInfo.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 
 namespace DB
 {
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index 70173bc847d..cd5cab8cf5a 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 
 
 namespace DB
diff --git a/src/Formats/IRowOutputStream.h b/src/Formats/IRowOutputStream.h
index 3b18603ee69..7cf6251cd0d 100644
--- a/src/Formats/IRowOutputStream.h
+++ b/src/Formats/IRowOutputStream.h
@@ -3,7 +3,7 @@
 #include <memory>
 #include <cstdint>
 #include <boost/noncopyable.hpp>
-#include <Core/Types.h>
+#include <common/types.h>
 
 
 namespace DB
diff --git a/src/Formats/ParsedTemplateFormatString.h b/src/Formats/ParsedTemplateFormatString.h
index 2da8a074679..f2e801faeab 100644
--- a/src/Formats/ParsedTemplateFormatString.h
+++ b/src/Formats/ParsedTemplateFormatString.h
@@ -1,8 +1,9 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <functional>
 #include <optional>
+#include <vector>
 #include <Formats/FormatSchemaInfo.h>
 #include <Formats/FormatSettings.h>
 
@@ -10,6 +11,7 @@ namespace DB
 {
 
 class Block;
+using Strings = std::vector<String>;
 
 struct ParsedTemplateFormatString
 {
diff --git a/src/Formats/ProtobufColumnMatcher.h b/src/Formats/ProtobufColumnMatcher.h
index 03c5ec40fc6..35521be7a9b 100644
--- a/src/Formats/ProtobufColumnMatcher.h
+++ b/src/Formats/ProtobufColumnMatcher.h
@@ -8,7 +8,7 @@
 #    include <memory>
 #    include <unordered_map>
 #    include <vector>
-#    include <Core/Types.h>
+#    include <common/types.h>
 #    include <boost/blank.hpp>
 #    include <google/protobuf/descriptor.h>
 #    include <google/protobuf/descriptor.pb.h>
diff --git a/src/Formats/ProtobufSchemas.h b/src/Formats/ProtobufSchemas.h
index 590c479bcc8..05778a85343 100644
--- a/src/Formats/ProtobufSchemas.h
+++ b/src/Formats/ProtobufSchemas.h
@@ -5,7 +5,7 @@
 
 #include <memory>
 #include <unordered_map>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <boost/noncopyable.hpp>
 
 
diff --git a/src/Functions/CustomWeekTransforms.h b/src/Functions/CustomWeekTransforms.h
index 97752d51263..86e1c444a78 100644
--- a/src/Functions/CustomWeekTransforms.h
+++ b/src/Functions/CustomWeekTransforms.h
@@ -2,7 +2,7 @@
 #include <regex>
 #include <Columns/ColumnVector.h>
 #include <Columns/ColumnsNumber.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Core/DecimalFunctions.h>
 #include <Functions/FunctionHelpers.h>
 #include <Functions/extractTimeZoneFromFunctionArguments.h>
diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h
index 6e2c3ea9ea6..6220d10a17d 100644
--- a/src/Functions/DateTimeTransforms.h
+++ b/src/Functions/DateTimeTransforms.h
@@ -1,5 +1,5 @@
 #pragma once
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Core/DecimalFunctions.h>
 #include <Common/Exception.h>
 #include <common/DateLUTImpl.h>
diff --git a/src/Functions/DummyJSONParser.h b/src/Functions/DummyJSONParser.h
index 4f4facba957..a71c90e4a19 100644
--- a/src/Functions/DummyJSONParser.h
+++ b/src/Functions/DummyJSONParser.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <Common/Exception.h>
-#include <Core/Types.h>
+#include <common/types.h>
 
 namespace DB
 {
diff --git a/src/Functions/FunctionsLogical.h b/src/Functions/FunctionsLogical.h
index 474831b0b6d..068c3c6e63c 100644
--- a/src/Functions/FunctionsLogical.h
+++ b/src/Functions/FunctionsLogical.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Core/Defines.h>
 #include <DataTypes/IDataType.h>
 #include <Functions/IFunctionImpl.h>
diff --git a/src/Functions/GatherUtils/Algorithms.h b/src/Functions/GatherUtils/Algorithms.h
index e54538c76b3..769d23b66dc 100644
--- a/src/Functions/GatherUtils/Algorithms.h
+++ b/src/Functions/GatherUtils/Algorithms.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Common/FieldVisitors.h>
 #include "Sources.h"
 #include "Sinks.h"
diff --git a/src/Functions/GeoHash.h b/src/Functions/GeoHash.h
index 105384d714c..f918602d473 100644
--- a/src/Functions/GeoHash.h
+++ b/src/Functions/GeoHash.h
@@ -2,7 +2,7 @@
 
 #include <map>
 #include <vector>
-#include <Core/Types.h>
+#include <common/types.h>
 
 
 namespace DB
diff --git a/src/Functions/PolygonUtils.h b/src/Functions/PolygonUtils.h
index 88743312304..c8e96f0b63a 100644
--- a/src/Functions/PolygonUtils.h
+++ b/src/Functions/PolygonUtils.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Core/Defines.h>
 #include <Core/TypeListNumber.h>
 #include <Columns/IColumn.h>
diff --git a/src/Functions/RapidJSONParser.h b/src/Functions/RapidJSONParser.h
index e4d4718abc5..992480d64f7 100644
--- a/src/Functions/RapidJSONParser.h
+++ b/src/Functions/RapidJSONParser.h
@@ -5,7 +5,7 @@
 #endif
 
 #if USE_RAPIDJSON
-#    include <Core/Types.h>
+#    include <common/types.h>
 #    include <common/defines.h>
 #    include <rapidjson/document.h>
 
diff --git a/src/Functions/SimdJSONParser.h b/src/Functions/SimdJSONParser.h
index 30ecbce1ac5..a9adfa27e2c 100644
--- a/src/Functions/SimdJSONParser.h
+++ b/src/Functions/SimdJSONParser.h
@@ -5,7 +5,7 @@
 #endif
 
 #if USE_SIMDJSON
-#    include <Core/Types.h>
+#    include <common/types.h>
 #    include <Common/Exception.h>
 #    include <common/defines.h>
 #    include <simdjson.h>
diff --git a/src/Functions/TargetSpecific.h b/src/Functions/TargetSpecific.h
index ce20dfe2e60..bc433702180 100644
--- a/src/Functions/TargetSpecific.h
+++ b/src/Functions/TargetSpecific.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 
 /* This file contains macros and helpers for writing platform-dependent code.
  *
diff --git a/src/Functions/VectorExtension.h b/src/Functions/VectorExtension.h
index 24c2ae9a18f..bffc04a6024 100644
--- a/src/Functions/VectorExtension.h
+++ b/src/Functions/VectorExtension.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 // Contains types declarations and wrappers for GCC vector extension.
 
 namespace DB::VectorExtension
diff --git a/src/Functions/abtesting.h b/src/Functions/abtesting.h
index 1e9b9747505..24ec22e4a88 100644
--- a/src/Functions/abtesting.h
+++ b/src/Functions/abtesting.h
@@ -9,7 +9,7 @@
 #include <vector>
 #include <algorithm>
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Common/PODArray.h>
 
 
diff --git a/src/Functions/formatString.h b/src/Functions/formatString.h
index 591015f14cb..bea4fa2e1bc 100644
--- a/src/Functions/formatString.h
+++ b/src/Functions/formatString.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <Columns/ColumnString.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Common/Exception.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/memcpySmall.h>
diff --git a/src/Functions/likePatternToRegexp.h b/src/Functions/likePatternToRegexp.h
index 24cb6ea78c7..4301ad18ee0 100644
--- a/src/Functions/likePatternToRegexp.h
+++ b/src/Functions/likePatternToRegexp.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 
 namespace DB
 {
diff --git a/src/IO/BitHelpers.h b/src/IO/BitHelpers.h
index 05eac24f1b0..0e2a08aa9a0 100644
--- a/src/IO/BitHelpers.h
+++ b/src/IO/BitHelpers.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Common/BitHelpers.h>
 #include <Common/Exception.h>
 
diff --git a/src/IO/HexWriteBuffer.cpp b/src/IO/HexWriteBuffer.cpp
index a7e804d00af..d7b8a993ce5 100644
--- a/src/IO/HexWriteBuffer.cpp
+++ b/src/IO/HexWriteBuffer.cpp
@@ -1,4 +1,4 @@
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Common/hex.h>
 #include <Common/Exception.h>
 #include <IO/HexWriteBuffer.h>
diff --git a/src/IO/LimitReadBuffer.h b/src/IO/LimitReadBuffer.h
index 545de6fd4a2..db3d2684ef7 100644
--- a/src/IO/LimitReadBuffer.h
+++ b/src/IO/LimitReadBuffer.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <IO/ReadBuffer.h>
 
 
diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h
index 0eb2136ca6c..48407f76938 100644
--- a/src/IO/ReadWriteBufferFromHTTP.h
+++ b/src/IO/ReadWriteBufferFromHTTP.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <functional>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <IO/ConnectionTimeouts.h>
 #include <IO/HTTPCommon.h>
 #include <IO/ReadBuffer.h>
diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h
index 45ec0ad90c6..d411c903676 100644
--- a/src/IO/S3Common.h
+++ b/src/IO/S3Common.h
@@ -4,7 +4,7 @@
 
 #if USE_AWS_S3
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <aws/core/Aws.h>
 #include <aws/core/client/ClientConfiguration.h>
 #include <Poco/URI.h>
diff --git a/src/IO/VarInt.h b/src/IO/VarInt.h
index 30125f33081..8c57395a250 100644
--- a/src/IO/VarInt.h
+++ b/src/IO/VarInt.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <iostream>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <IO/ReadBuffer.h>
 #include <IO/WriteBuffer.h>
 
diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h
index cabda4f2171..93a6947609e 100644
--- a/src/IO/WriteBufferFromS3.h
+++ b/src/IO/WriteBufferFromS3.h
@@ -6,7 +6,7 @@
 
 #    include <memory>
 #    include <vector>
-#    include <Core/Types.h>
+#    include <common/types.h>
 #    include <IO/BufferWithOwnMemory.h>
 #    include <IO/HTTPCommon.h>
 #    include <IO/WriteBuffer.h>
diff --git a/src/IO/WriteBufferValidUTF8.cpp b/src/IO/WriteBufferValidUTF8.cpp
index 0e04aa4c22d..f1f04e9805b 100644
--- a/src/IO/WriteBufferValidUTF8.cpp
+++ b/src/IO/WriteBufferValidUTF8.cpp
@@ -1,6 +1,6 @@
 #include <Poco/UTF8Encoding.h>
 #include <IO/WriteBufferValidUTF8.h>
-#include <Core/Types.h>
+#include <common/types.h>
 
 #ifdef __SSE2__
     #include <emmintrin.h>
diff --git a/src/IO/tests/gtest_bit_io.cpp b/src/IO/tests/gtest_bit_io.cpp
index 437ec13e93c..f75abf92f30 100644
--- a/src/IO/tests/gtest_bit_io.cpp
+++ b/src/IO/tests/gtest_bit_io.cpp
@@ -1,7 +1,7 @@
 #include <string.h>
 #include <IO/BitHelpers.h>
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <IO/MemoryReadWriteBuffer.h>
 #include <IO/ReadBufferFromMemory.h>
 #include <Common/BitHelpers.h>
diff --git a/src/IO/tests/gtest_peekable_read_buffer.cpp b/src/IO/tests/gtest_peekable_read_buffer.cpp
index 0ba1e79799e..fb4b0b799b4 100644
--- a/src/IO/tests/gtest_peekable_read_buffer.cpp
+++ b/src/IO/tests/gtest_peekable_read_buffer.cpp
@@ -1,6 +1,6 @@
 #include <gtest/gtest.h>
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <IO/ReadHelpers.h>
 #include <IO/ReadBufferFromString.h>
 #include <IO/ConcatReadBuffer.h>
diff --git a/src/IO/tests/o_direct_and_dirty_pages.cpp b/src/IO/tests/o_direct_and_dirty_pages.cpp
index 8387ecac9e4..327d6357e9b 100644
--- a/src/IO/tests/o_direct_and_dirty_pages.cpp
+++ b/src/IO/tests/o_direct_and_dirty_pages.cpp
@@ -2,7 +2,7 @@
 
 #include <iostream>
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <IO/WriteHelpers.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteBufferFromFile.h>
diff --git a/src/IO/tests/read_buffer.cpp b/src/IO/tests/read_buffer.cpp
index 952f8136a63..647382e667b 100644
--- a/src/IO/tests/read_buffer.cpp
+++ b/src/IO/tests/read_buffer.cpp
@@ -2,7 +2,7 @@
 
 #include <iostream>
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <IO/ReadHelpers.h>
 #include <IO/ReadBufferFromString.h>
 
diff --git a/src/IO/tests/read_buffer_perf.cpp b/src/IO/tests/read_buffer_perf.cpp
index 74520a63041..a8198bce71f 100644
--- a/src/IO/tests/read_buffer_perf.cpp
+++ b/src/IO/tests/read_buffer_perf.cpp
@@ -2,7 +2,7 @@
 
 #include <iostream>
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <IO/ReadHelpers.h>
 #include <IO/ReadBufferFromFile.h>
 
diff --git a/src/IO/tests/read_float_perf.cpp b/src/IO/tests/read_float_perf.cpp
index aa0fa8ad15d..acfd294e0e7 100644
--- a/src/IO/tests/read_float_perf.cpp
+++ b/src/IO/tests/read_float_perf.cpp
@@ -3,7 +3,7 @@
 #include <iostream>
 #include <fstream>
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Common/Stopwatch.h>
 #include <Common/formatReadable.h>
 #include <IO/readFloatText.h>
diff --git a/src/IO/tests/read_write_int.cpp b/src/IO/tests/read_write_int.cpp
index 903940db5e4..c09db10cbe2 100644
--- a/src/IO/tests/read_write_int.cpp
+++ b/src/IO/tests/read_write_int.cpp
@@ -2,7 +2,7 @@
 
 #include <iostream>
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <IO/ReadBufferFromString.h>
 #include <IO/WriteBufferFromString.h>
 #include <IO/ReadHelpers.h>
diff --git a/src/IO/tests/write_buffer.cpp b/src/IO/tests/write_buffer.cpp
index 8737f29cffb..14beb6d0539 100644
--- a/src/IO/tests/write_buffer.cpp
+++ b/src/IO/tests/write_buffer.cpp
@@ -2,7 +2,7 @@
 
 #include <iostream>
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <IO/WriteHelpers.h>
 #include <IO/WriteBufferFromOStream.h>
 
diff --git a/src/IO/tests/write_buffer_perf.cpp b/src/IO/tests/write_buffer_perf.cpp
index 14a636e4570..096b58cb7c4 100644
--- a/src/IO/tests/write_buffer_perf.cpp
+++ b/src/IO/tests/write_buffer_perf.cpp
@@ -3,7 +3,7 @@
 #include <iostream>
 #include <fstream>
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <IO/WriteHelpers.h>
 #include <IO/WriteBufferFromOStream.h>
 
diff --git a/src/Interpreters/ActionLocksManager.h b/src/Interpreters/ActionLocksManager.h
index ea5d96ad8ce..a39d8b5784f 100644
--- a/src/Interpreters/ActionLocksManager.h
+++ b/src/Interpreters/ActionLocksManager.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Storages/IStorage_fwd.h>
 #include <Common/ActionLock.h>
 #include <Interpreters/StorageID.h>
diff --git a/src/Interpreters/Aliases.h b/src/Interpreters/Aliases.h
index 52159442224..0bdffe27933 100644
--- a/src/Interpreters/Aliases.h
+++ b/src/Interpreters/Aliases.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Parsers/IAST_fwd.h>
 
 #include <unordered_map>
diff --git a/src/Interpreters/BloomFilter.h b/src/Interpreters/BloomFilter.h
index 0362f475d88..ccfb35bd68f 100644
--- a/src/Interpreters/BloomFilter.h
+++ b/src/Interpreters/BloomFilter.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <vector>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Core/Field.h>
 #include <Common/PODArray.h>
 #include <Common/Allocator.h>
diff --git a/src/Interpreters/ClientInfo.h b/src/Interpreters/ClientInfo.h
index f3a99112170..99426716cb2 100644
--- a/src/Interpreters/ClientInfo.h
+++ b/src/Interpreters/ClientInfo.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <Poco/Net/SocketAddress.h>
-#include <Core/Types.h>
+#include <common/types.h>
 
 
 namespace DB
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 613889333c3..ff2af111885 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -3,7 +3,7 @@
 #include <Core/Block.h>
 #include <Core/NamesAndTypes.h>
 #include <Core/Settings.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Core/UUID.h>
 #include <DataStreams/IBlockStream_fwd.h>
 #include <Interpreters/ClientInfo.h>
diff --git a/src/Interpreters/DatabaseAndTableWithAlias.h b/src/Interpreters/DatabaseAndTableWithAlias.h
index d4a1a582fdc..07a41c12983 100644
--- a/src/Interpreters/DatabaseAndTableWithAlias.h
+++ b/src/Interpreters/DatabaseAndTableWithAlias.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <Core/Names.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Core/NamesAndTypes.h>
 #include <Parsers/IAST_fwd.h>
 
diff --git a/src/Interpreters/ExternalLoader.h b/src/Interpreters/ExternalLoader.h
index 57b711d7f21..09915af2470 100644
--- a/src/Interpreters/ExternalLoader.h
+++ b/src/Interpreters/ExternalLoader.h
@@ -3,7 +3,7 @@
 #include <chrono>
 #include <functional>
 #include <unordered_map>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Interpreters/IExternalLoadable.h>
 #include <Interpreters/IExternalLoaderConfigRepository.h>
 #include <common/logger_useful.h>
diff --git a/src/Interpreters/ExternalLoaderTempConfigRepository.h b/src/Interpreters/ExternalLoaderTempConfigRepository.h
index 6ee717631cc..46e2eb846e9 100644
--- a/src/Interpreters/ExternalLoaderTempConfigRepository.h
+++ b/src/Interpreters/ExternalLoaderTempConfigRepository.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Interpreters/IExternalLoaderConfigRepository.h>
 #include <Poco/Timestamp.h>
 
diff --git a/src/Interpreters/ExternalLoaderXMLConfigRepository.h b/src/Interpreters/ExternalLoaderXMLConfigRepository.h
index b89bc06e196..dd689856300 100644
--- a/src/Interpreters/ExternalLoaderXMLConfigRepository.h
+++ b/src/Interpreters/ExternalLoaderXMLConfigRepository.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <unordered_map>
 #include <Interpreters/IExternalLoaderConfigRepository.h>
 #include <Poco/Timestamp.h>
diff --git a/src/Interpreters/IExternalLoadable.h b/src/Interpreters/IExternalLoadable.h
index 113d22df6bd..2d9a9abfa5f 100644
--- a/src/Interpreters/IExternalLoadable.h
+++ b/src/Interpreters/IExternalLoadable.h
@@ -4,7 +4,7 @@
 #include <memory>
 #include <boost/noncopyable.hpp>
 #include <pcg_random.hpp>
-#include <Core/Types.h>
+#include <common/types.h>
 
 
 namespace Poco::Util
diff --git a/src/Interpreters/InJoinSubqueriesPreprocessor.h b/src/Interpreters/InJoinSubqueriesPreprocessor.h
index 8d2e441e05f..4d46fabfd99 100644
--- a/src/Interpreters/InJoinSubqueriesPreprocessor.h
+++ b/src/Interpreters/InJoinSubqueriesPreprocessor.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Parsers/IAST_fwd.h>
 #include <Storages/IStorage_fwd.h>
 
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index dbf6d5ae8d3..079fc792447 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -66,7 +66,7 @@
 
 #include <Functions/IFunction.h>
 #include <Core/Field.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Columns/Collator.h>
 #include <Common/FieldVisitorsAccurateComparison.h>
 #include <Common/typeid_cast.h>
diff --git a/src/Interpreters/InterserverIOHandler.h b/src/Interpreters/InterserverIOHandler.h
index 952c99ae46d..6d62c9651ca 100644
--- a/src/Interpreters/InterserverIOHandler.h
+++ b/src/Interpreters/InterserverIOHandler.h
@@ -7,7 +7,7 @@
 #include <IO/WriteBufferFromString.h>
 #include <IO/WriteHelpers.h>
 #include <Common/ActionBlocker.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <map>
 #include <atomic>
 #include <utility>
diff --git a/src/Interpreters/RequiredSourceColumnsData.h b/src/Interpreters/RequiredSourceColumnsData.h
index de1f3bc2721..d425989393e 100644
--- a/src/Interpreters/RequiredSourceColumnsData.h
+++ b/src/Interpreters/RequiredSourceColumnsData.h
@@ -4,7 +4,7 @@
 #include <optional>
 
 #include <Core/Names.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Parsers/IAST_fwd.h>
 
 namespace DB
diff --git a/src/Interpreters/RowRefs.cpp b/src/Interpreters/RowRefs.cpp
index a206456f1b6..7617f589eb7 100644
--- a/src/Interpreters/RowRefs.cpp
+++ b/src/Interpreters/RowRefs.cpp
@@ -1,7 +1,7 @@
 #include <Interpreters/RowRefs.h>
 
 #include <Core/Block.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Common/typeid_cast.h>
 #include <Common/ColumnsHashing.h>
 #include <Columns/IColumn.h>
diff --git a/src/Interpreters/StorageID.h b/src/Interpreters/StorageID.h
index 36265b2a2b0..9343f67fe7a 100644
--- a/src/Interpreters/StorageID.h
+++ b/src/Interpreters/StorageID.h
@@ -1,5 +1,5 @@
 #pragma once
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Core/UUID.h>
 #include <tuple>
 #include <Parsers/IAST_fwd.h>
diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h
index a2e97747d12..03b1b735cbc 100644
--- a/src/Interpreters/SystemLog.h
+++ b/src/Interpreters/SystemLog.h
@@ -8,7 +8,7 @@
 #include <condition_variable>
 #include <boost/noncopyable.hpp>
 #include <common/logger_useful.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Core/Defines.h>
 #include <Storages/IStorage.h>
 #include <Common/Stopwatch.h>
diff --git a/src/Interpreters/TablesStatus.h b/src/Interpreters/TablesStatus.h
index c9e16e9615b..85290f69c87 100644
--- a/src/Interpreters/TablesStatus.h
+++ b/src/Interpreters/TablesStatus.h
@@ -3,7 +3,7 @@
 #include <unordered_set>
 #include <unordered_map>
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Core/QualifiedTableName.h>
 
 namespace DB
diff --git a/src/Interpreters/addTypeConversionToAST.h b/src/Interpreters/addTypeConversionToAST.h
index 56c3a636f45..1951eebc3f5 100644
--- a/src/Interpreters/addTypeConversionToAST.h
+++ b/src/Interpreters/addTypeConversionToAST.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Parsers/IAST_fwd.h>
 
 
diff --git a/src/Interpreters/tests/hash_map.cpp b/src/Interpreters/tests/hash_map.cpp
index dc87fd9ddde..620f2515825 100644
--- a/src/Interpreters/tests/hash_map.cpp
+++ b/src/Interpreters/tests/hash_map.cpp
@@ -11,7 +11,7 @@
 /*
 #define DBMS_HASH_MAP_COUNT_COLLISIONS
 */
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Core/Row.h>
 #include <IO/ReadBufferFromFile.h>
 #include <Compression/CompressedReadBuffer.h>
diff --git a/src/Interpreters/tests/hash_map3.cpp b/src/Interpreters/tests/hash_map3.cpp
index 1b4ce8eac77..132e6fe9e87 100644
--- a/src/Interpreters/tests/hash_map3.cpp
+++ b/src/Interpreters/tests/hash_map3.cpp
@@ -10,7 +10,7 @@
 
 #include <utility>
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Common/Exception.h>
 
 #include <IO/ReadHelpers.h>
diff --git a/src/Interpreters/tests/hash_map_lookup.cpp b/src/Interpreters/tests/hash_map_lookup.cpp
index 926e6b6766e..39c43023b20 100644
--- a/src/Interpreters/tests/hash_map_lookup.cpp
+++ b/src/Interpreters/tests/hash_map_lookup.cpp
@@ -8,7 +8,7 @@
 #define DBMS_HASH_MAP_DEBUG_RESIZES
 
 #include <Compression/CompressedReadBuffer.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/ReadHelpers.h>
 #include <Interpreters/AggregationCommon.h>
diff --git a/src/Interpreters/tests/hash_map_string.cpp b/src/Interpreters/tests/hash_map_string.cpp
index 2f35ea2bdc8..c51d1a6db01 100644
--- a/src/Interpreters/tests/hash_map_string.cpp
+++ b/src/Interpreters/tests/hash_map_string.cpp
@@ -12,7 +12,7 @@
 //#define DBMS_HASH_MAP_COUNT_COLLISIONS
 #define DBMS_HASH_MAP_DEBUG_RESIZES
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/ReadHelpers.h>
 #include <Compression/CompressedReadBuffer.h>
diff --git a/src/Interpreters/tests/hash_map_string_2.cpp b/src/Interpreters/tests/hash_map_string_2.cpp
index 5f6954cb0b0..a75978c9e13 100644
--- a/src/Interpreters/tests/hash_map_string_2.cpp
+++ b/src/Interpreters/tests/hash_map_string_2.cpp
@@ -7,7 +7,7 @@
 //#define DBMS_HASH_MAP_COUNT_COLLISIONS
 #define DBMS_HASH_MAP_DEBUG_RESIZES
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/ReadHelpers.h>
 #include <Compression/CompressedReadBuffer.h>
diff --git a/src/Interpreters/tests/hash_map_string_3.cpp b/src/Interpreters/tests/hash_map_string_3.cpp
index 3dfbe5fb0f2..d0bf9093374 100644
--- a/src/Interpreters/tests/hash_map_string_3.cpp
+++ b/src/Interpreters/tests/hash_map_string_3.cpp
@@ -10,7 +10,7 @@
 #define DBMS_HASH_MAP_COUNT_COLLISIONS
 #define DBMS_HASH_MAP_DEBUG_RESIZES
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/ReadHelpers.h>
 #include <Compression/CompressedReadBuffer.h>
diff --git a/src/Interpreters/tests/hash_map_string_small.cpp b/src/Interpreters/tests/hash_map_string_small.cpp
index 7dac9691dc9..fe81c120d90 100644
--- a/src/Interpreters/tests/hash_map_string_small.cpp
+++ b/src/Interpreters/tests/hash_map_string_small.cpp
@@ -12,7 +12,7 @@
 //#define DBMS_HASH_MAP_COUNT_COLLISIONS
 #define DBMS_HASH_MAP_DEBUG_RESIZES
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/ReadHelpers.h>
 #include <Compression/CompressedReadBuffer.h>
diff --git a/src/Interpreters/tests/string_hash_map.cpp b/src/Interpreters/tests/string_hash_map.cpp
index 3969458fced..3420918887c 100644
--- a/src/Interpreters/tests/string_hash_map.cpp
+++ b/src/Interpreters/tests/string_hash_map.cpp
@@ -2,7 +2,7 @@
 #include <iostream>
 #include <vector>
 #include <Compression/CompressedReadBuffer.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/ReadHelpers.h>
 #include <Interpreters/AggregationCommon.h>
diff --git a/src/Interpreters/tests/two_level_hash_map.cpp b/src/Interpreters/tests/two_level_hash_map.cpp
index 604f0de2976..f79be16e095 100644
--- a/src/Interpreters/tests/two_level_hash_map.cpp
+++ b/src/Interpreters/tests/two_level_hash_map.cpp
@@ -12,7 +12,7 @@
 #include <Common/Stopwatch.h>
 #include <AggregateFunctions/UniquesHashSet.h>
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <IO/ReadBufferFromFile.h>
 #include <Compression/CompressedReadBuffer.h>
 #include <Common/HashTable/TwoLevelHashTable.h>
diff --git a/src/Parsers/ASTFunctionWithKeyValueArguments.h b/src/Parsers/ASTFunctionWithKeyValueArguments.h
index 786d31d9e35..88ab712cc04 100644
--- a/src/Parsers/ASTFunctionWithKeyValueArguments.h
+++ b/src/Parsers/ASTFunctionWithKeyValueArguments.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <Parsers/IAST.h>
-#include <Core/Types.h>
+#include <common/types.h>
 
 namespace DB
 {
diff --git a/src/Parsers/ASTRolesOrUsersSet.h b/src/Parsers/ASTRolesOrUsersSet.h
index f257ce1066c..f18aa0bdd73 100644
--- a/src/Parsers/ASTRolesOrUsersSet.h
+++ b/src/Parsers/ASTRolesOrUsersSet.h
@@ -5,6 +5,9 @@
 
 namespace DB
 {
+
+using Strings = std::vector<String>;
+
 /// Represents a set of users/roles like
 /// {user_name | role_name | CURRENT_USER} [,...] | NONE | ALL | ALL EXCEPT {user_name | role_name | CURRENT_USER} [,...]
 class ASTRolesOrUsersSet : public IAST
diff --git a/src/Parsers/ASTShowCreateAccessEntityQuery.h b/src/Parsers/ASTShowCreateAccessEntityQuery.h
index f112e9211fe..10c4c0ca511 100644
--- a/src/Parsers/ASTShowCreateAccessEntityQuery.h
+++ b/src/Parsers/ASTShowCreateAccessEntityQuery.h
@@ -8,6 +8,8 @@ namespace DB
 {
 class ASTRowPolicyNames;
 
+using Strings = std::vector<String>;
+
 /** SHOW CREATE USER [name | CURRENT_USER]
   * SHOW CREATE USERS [name [, name2 ...]
   * SHOW CREATE ROLE name
diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h
index cf6a7efc102..c88c80021d6 100644
--- a/src/Parsers/IAST.h
+++ b/src/Parsers/IAST.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Parsers/IAST_fwd.h>
 #include <Parsers/IdentifierQuotingStyle.h>
 #include <Common/Exception.h>
diff --git a/src/Parsers/IParser.h b/src/Parsers/IParser.h
index 69c199c201e..05ceb8c900b 100644
--- a/src/Parsers/IParser.h
+++ b/src/Parsers/IParser.h
@@ -4,7 +4,7 @@
 #include <memory>
 
 #include <Core/Defines.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Core/Settings.h>
 #include <IO/WriteHelpers.h>
 #include <Parsers/IAST.h>
diff --git a/src/Parsers/StringRange.h b/src/Parsers/StringRange.h
index 45b1b0dfbd1..1fc285a562e 100644
--- a/src/Parsers/StringRange.h
+++ b/src/Parsers/StringRange.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Parsers/TokenIterator.h>
 #include <map>
 #include <memory>
diff --git a/src/Parsers/formatSettingName.h b/src/Parsers/formatSettingName.h
index 40f14d95b4f..c9ed94dcc7d 100644
--- a/src/Parsers/formatSettingName.h
+++ b/src/Parsers/formatSettingName.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 
 
 namespace DB
diff --git a/src/Parsers/tests/gtest_dictionary_parser.cpp b/src/Parsers/tests/gtest_dictionary_parser.cpp
index d1ec5088482..6b777af77a2 100644
--- a/src/Parsers/tests/gtest_dictionary_parser.cpp
+++ b/src/Parsers/tests/gtest_dictionary_parser.cpp
@@ -1,4 +1,4 @@
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Parsers/ASTCreateQuery.h>
 #include <Parsers/ASTDropQuery.h>
 #include <Parsers/ParserCreateQuery.h>
diff --git a/src/Parsers/tests/lexer.cpp b/src/Parsers/tests/lexer.cpp
index b1d0bb6212c..d54267c135e 100644
--- a/src/Parsers/tests/lexer.cpp
+++ b/src/Parsers/tests/lexer.cpp
@@ -1,6 +1,6 @@
 #include <map>
 #include <Parsers/Lexer.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <IO/ReadBufferFromFileDescriptor.h>
 #include <IO/WriteBufferFromFileDescriptor.h>
 #include <IO/ReadHelpers.h>
diff --git a/src/Parsers/tests/lexer_fuzzer.cpp b/src/Parsers/tests/lexer_fuzzer.cpp
index 8ebe39cb67b..8b0a7da238b 100644
--- a/src/Parsers/tests/lexer_fuzzer.cpp
+++ b/src/Parsers/tests/lexer_fuzzer.cpp
@@ -1,6 +1,6 @@
 #include <map>
 #include <Parsers/Lexer.h>
-#include <Core/Types.h>
+#include <common/types.h>
 
 #include <IO/ReadBufferFromMemory.h>
 #include <IO/ReadHelpers.h>
diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
index bf825e367c8..4ad7743151a 100644
--- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
+++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
@@ -8,7 +8,7 @@
 #include <DataTypes/DataTypesDecimal.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <common/DateLUTImpl.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Core/Block.h>
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnNullable.h>
diff --git a/src/Server/StaticRequestHandler.h b/src/Server/StaticRequestHandler.h
index 707087df24d..a5ac44683a1 100644
--- a/src/Server/StaticRequestHandler.h
+++ b/src/Server/StaticRequestHandler.h
@@ -4,7 +4,7 @@
 
 #include <Poco/Net/HTTPRequestHandler.h>
 #include <Common/StringUtils/StringUtils.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <IO/WriteBuffer.h>
 
 
diff --git a/src/Storages/CheckResults.h b/src/Storages/CheckResults.h
index 245f580f625..8fa0bb910d3 100644
--- a/src/Storages/CheckResults.h
+++ b/src/Storages/CheckResults.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <vector>
 
 namespace DB
diff --git a/src/Storages/ColumnDependency.h b/src/Storages/ColumnDependency.h
index 3f5290efa35..606b8b3dc5f 100644
--- a/src/Storages/ColumnDependency.h
+++ b/src/Storages/ColumnDependency.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <Common/SipHash.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <unordered_set>
 
 namespace DB
diff --git a/src/Storages/IStorage_fwd.h b/src/Storages/IStorage_fwd.h
index 015c74fa9a6..98ae9bea7c2 100644
--- a/src/Storages/IStorage_fwd.h
+++ b/src/Storages/IStorage_fwd.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 
 #include <map>
 #include <memory>
diff --git a/src/Storages/IndicesDescription.h b/src/Storages/IndicesDescription.h
index 464e466dcd0..f383029837e 100644
--- a/src/Storages/IndicesDescription.h
+++ b/src/Storages/IndicesDescription.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 
 #include <memory>
 #include <vector>
diff --git a/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h b/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h
index 6def7b8ec0b..1d889655941 100644
--- a/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h
+++ b/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <Core/Names.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <IO/ReadBuffer.h>
 
 #include <cppkafka/cppkafka.h>
diff --git a/src/Storages/MergeTree/ActiveDataPartSet.h b/src/Storages/MergeTree/ActiveDataPartSet.h
index e17f4903a26..323ef3c4830 100644
--- a/src/Storages/MergeTree/ActiveDataPartSet.h
+++ b/src/Storages/MergeTree/ActiveDataPartSet.h
@@ -1,13 +1,16 @@
 #pragma once
 
 #include <Storages/MergeTree/MergeTreePartInfo.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <map>
+#include <vector>
 
 
 namespace DB
 {
 
+using Strings = std::vector<String>;
+
 /** Supports multiple names of active parts of data.
   * Repeats part of the MergeTreeData functionality.
   * TODO: generalize with MergeTreeData
diff --git a/src/Storages/MergeTree/BackgroundProcessingPool.h b/src/Storages/MergeTree/BackgroundProcessingPool.h
index 8bed696ab2c..c1e44ee5cc8 100644
--- a/src/Storages/MergeTree/BackgroundProcessingPool.h
+++ b/src/Storages/MergeTree/BackgroundProcessingPool.h
@@ -11,7 +11,7 @@
 #include <functional>
 #include <Poco/Event.h>
 #include <Poco/Timestamp.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/CurrentThread.h>
 #include <Common/ThreadPool.h>
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index 7df0468dc13..78daf6c9017 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -4,7 +4,7 @@
 
 #include <Core/Row.h>
 #include <Core/Block.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Core/NamesAndTypes.h>
 #include <Storages/IStorage.h>
 #include <Storages/MergeTree/MergeTreeIndexGranularity.h>
diff --git a/src/Storages/MergeTree/MergeAlgorithm.h b/src/Storages/MergeTree/MergeAlgorithm.h
index 813767f9fb1..23d6fc7137e 100644
--- a/src/Storages/MergeTree/MergeAlgorithm.h
+++ b/src/Storages/MergeTree/MergeAlgorithm.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 
 namespace DB
 {
diff --git a/src/Storages/MergeTree/MergeTreeDataFormatVersion.h b/src/Storages/MergeTree/MergeTreeDataFormatVersion.h
index 4b492a9fb61..7b21ac31609 100644
--- a/src/Storages/MergeTree/MergeTreeDataFormatVersion.h
+++ b/src/Storages/MergeTree/MergeTreeDataFormatVersion.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <common/strong_typedef.h>
 
 namespace DB
diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h
index 89e1ba1d78a..3aa77678520 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h
@@ -2,7 +2,7 @@
 #include <map>
 #include <optional>
 #include <city.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Disks/IDisk.h>
 #include <IO/ReadBuffer.h>
 #include <IO/WriteBuffer.h>
diff --git a/src/Storages/MergeTree/MergeTreeDataPartType.h b/src/Storages/MergeTree/MergeTreeDataPartType.h
index bb87918d3a5..44e170141f6 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartType.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartType.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 
 namespace DB
 {
diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
index 421d0e8b38c..5a5dfffe287 100644
--- a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
@@ -2,7 +2,7 @@
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Interpreters/TreeRewriter.h>
 #include <Interpreters/ExpressionAnalyzer.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <ext/bit_cast.h>
 #include <Parsers/ASTLiteral.h>
 #include <IO/ReadHelpers.h>
diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h
index a30d569150c..21a86f9bcea 100644
--- a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h
+++ b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <optional>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Storages/MergeTree/MergeTreeDataPartType.h>
 #include <Disks/IDisk.h>
 
diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.h b/src/Storages/MergeTree/MergeTreeMutationEntry.h
index ccdea771e93..278babb67b9 100644
--- a/src/Storages/MergeTree/MergeTreeMutationEntry.h
+++ b/src/Storages/MergeTree/MergeTreeMutationEntry.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Disks/IDisk.h>
 #include <Storages/MergeTree/MergeTreePartInfo.h>
 #include <Storages/MutationCommands.h>
diff --git a/src/Storages/MergeTree/MergeTreeMutationStatus.h b/src/Storages/MergeTree/MergeTreeMutationStatus.h
index d3a66bd09d6..94d9294624f 100644
--- a/src/Storages/MergeTree/MergeTreeMutationStatus.h
+++ b/src/Storages/MergeTree/MergeTreeMutationStatus.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Core/Names.h>
 #include <optional>
 #include <map>
diff --git a/src/Storages/MergeTree/MergeTreePartInfo.h b/src/Storages/MergeTree/MergeTreePartInfo.h
index 80b0d3508e0..416308861b7 100644
--- a/src/Storages/MergeTree/MergeTreePartInfo.h
+++ b/src/Storages/MergeTree/MergeTreePartInfo.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <tuple>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <common/DayNum.h>
 #include <Storages/MergeTree/MergeTreeDataFormatVersion.h>
 
diff --git a/src/Storages/MergeTree/MergeTreePartition.h b/src/Storages/MergeTree/MergeTreePartition.h
index 947fb3ec504..f89b6f22d4f 100644
--- a/src/Storages/MergeTree/MergeTreePartition.h
+++ b/src/Storages/MergeTree/MergeTreePartition.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <Core/Row.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Disks/IDisk.h>
 #include <IO/WriteBuffer.h>
 
diff --git a/src/Storages/MergeTree/MergeType.h b/src/Storages/MergeTree/MergeType.h
index 490c9b8c822..f7e99f98c8c 100644
--- a/src/Storages/MergeTree/MergeType.h
+++ b/src/Storages/MergeTree/MergeType.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 
 namespace DB
 {
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeAddress.h b/src/Storages/MergeTree/ReplicatedMergeTreeAddress.h
index 2a620515278..8e7ab0c69fe 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeAddress.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeAddress.h
@@ -1,5 +1,5 @@
 #pragma once
-#include <Core/Types.h>
+#include <common/types.h>
 #include <IO/ReadBuffer.h>
 #include <IO/WriteBuffer.h>
 
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h b/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h
index ac169d248c2..fa3ede20c28 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h
@@ -2,7 +2,7 @@
 
 #include <DataStreams/IBlockOutputStream.h>
 #include <Storages/MergeTree/MergeTreeData.h>
-#include <Core/Types.h>
+#include <common/types.h>
 
 
 namespace Poco { class Logger; }
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h
index f4191482d64..520af888621 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Common/ZooKeeper/Types.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <common/logger_useful.h>
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h
index 2f5d038291b..e14a884ca1c 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h
@@ -2,7 +2,7 @@
 
 #include <Common/Exception.h>
 #include <Common/ZooKeeper/Types.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <IO/WriteHelpers.h>
 #include <Storages/MergeTree/MergeTreeDataPartType.h>
 #include <Storages/MergeTree/MergeType.h>
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.h b/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.h
index 1b44a5dec75..cfcc3dec6f2 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <Common/Exception.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <IO/WriteHelpers.h>
 #include <Storages/MutationCommands.h>
 #include <map>
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h
index e86356e1346..4239d7a8051 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h
@@ -8,7 +8,7 @@
 #include <atomic>
 #include <boost/noncopyable.hpp>
 #include <Poco/Event.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <common/logger_useful.h>
 #include <Core/BackgroundSchedulePool.h>
 #include <Storages/CheckResults.h>
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartHeader.h b/src/Storages/MergeTree/ReplicatedMergeTreePartHeader.h
index 90cc327cf4b..7bc2b72d2d5 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreePartHeader.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreePartHeader.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <Storages/MergeTree/MergeTreeDataPartChecksum.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <IO/WriteBuffer.h>
 #include <IO/ReadBuffer.h>
 #include <IO/Operators.h>
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h b/src/Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h
index e3bb658b2d0..ee12cabb5aa 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <set>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <IO/ReadBuffer.h>
 #include <IO/ReadBufferFromString.h>
 #include <IO/WriteBuffer.h>
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h
index 1a8b0bf6378..bb032d9df8c 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h
@@ -3,7 +3,7 @@
 #include <Poco/Event.h>
 #include <common/logger_useful.h>
 #include <Core/BackgroundSchedulePool.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <thread>
 #include <atomic>
 
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h
index 59b7c9bee47..c1c34637664 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h
@@ -2,7 +2,7 @@
 
 #include <Parsers/IAST.h>
 #include <Storages/MergeTree/MergeTreeDataFormatVersion.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Storages/StorageInMemoryMetadata.h>
 
 namespace DB
diff --git a/src/Storages/MergeTree/TTLMergeSelector.h b/src/Storages/MergeTree/TTLMergeSelector.h
index 1d41b65f9fb..73d364f28c7 100644
--- a/src/Storages/MergeTree/TTLMergeSelector.h
+++ b/src/Storages/MergeTree/TTLMergeSelector.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Storages/MergeTree/MergeSelector.h>
 #include <Storages/TTLDescription.h>
 
diff --git a/src/Storages/MergeTree/localBackup.h b/src/Storages/MergeTree/localBackup.h
index 3c9d92fa9da..630767c9ec6 100644
--- a/src/Storages/MergeTree/localBackup.h
+++ b/src/Storages/MergeTree/localBackup.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <optional>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Disks/IDisk.h>
 
 namespace DB
diff --git a/src/Storages/PartitionCommands.h b/src/Storages/PartitionCommands.h
index 242f8f5e2c1..e4f70305dbd 100644
--- a/src/Storages/PartitionCommands.h
+++ b/src/Storages/PartitionCommands.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <Core/Field.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Parsers/IAST.h>
 #include <Storages/IStorage_fwd.h>
 
diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h
index 1877fdfba35..109770c77e9 100644
--- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h
+++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <Core/Names.h>
-#include <Core/Types.h>
+#include <common/types.h>
 #include <IO/ReadBuffer.h>
 #include <amqpcpp.h>
 #include <Storages/RabbitMQ/RabbitMQHandler.h>
diff --git a/src/Storages/StorageLogSettings.h b/src/Storages/StorageLogSettings.h
index c970cd6be37..0918c50aa19 100644
--- a/src/Storages/StorageLogSettings.h
+++ b/src/Storages/StorageLogSettings.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 
 namespace DB
 {
diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h
index ac31928a240..19b2bf48bd8 100644
--- a/src/Storages/StorageS3Settings.h
+++ b/src/Storages/StorageS3Settings.h
@@ -3,7 +3,8 @@
 #include <map>
 #include <memory>
 #include <mutex>
-#include <Core/Types.h>
+#include <vector>
+#include <common/types.h>
 
 namespace Poco::Util
 {
diff --git a/src/Storages/transformQueryForExternalDatabase.h b/src/Storages/transformQueryForExternalDatabase.h
index 09de53e36d3..c760c628970 100644
--- a/src/Storages/transformQueryForExternalDatabase.h
+++ b/src/Storages/transformQueryForExternalDatabase.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Core/Types.h>
+#include <common/types.h>
 #include <Core/NamesAndTypes.h>
 #include <Parsers/IdentifierQuotingStyle.h>
 #include <Storages/SelectQueryInfo.h>
diff --git a/src/TableFunctions/TableFunctionNumbers.h b/src/TableFunctions/TableFunctionNumbers.h
index c3efbc426ef..13064a3309e 100644
--- a/src/TableFunctions/TableFunctionNumbers.h
+++ b/src/TableFunctions/TableFunctionNumbers.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <TableFunctions/ITableFunction.h>
-#include <Core/Types.h>
+#include <common/types.h>
 
 
 namespace DB
diff --git a/src/TableFunctions/TableFunctionView.h b/src/TableFunctions/TableFunctionView.h
index 49f51823735..43d55a7dcb8 100644
--- a/src/TableFunctions/TableFunctionView.h
+++ b/src/TableFunctions/TableFunctionView.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <TableFunctions/ITableFunction.h>
-#include <Core/Types.h>
+#include <common/types.h>
 
 
 namespace DB
diff --git a/src/TableFunctions/TableFunctionZeros.h b/src/TableFunctions/TableFunctionZeros.h
index 71570c23a89..cae4604b1d7 100644
--- a/src/TableFunctions/TableFunctionZeros.h
+++ b/src/TableFunctions/TableFunctionZeros.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <TableFunctions/ITableFunction.h>
-#include <Core/Types.h>
+#include <common/types.h>
 
 
 namespace DB

From 1ba67ea8a1aa3964677ebb534cf1837b691f1fe9 Mon Sep 17 00:00:00 2001
From: Artem Zuikov <chertus@gmail.com>
Date: Tue, 15 Sep 2020 13:31:15 +0300
Subject: [PATCH 298/298] Improve DecimalBinaryOperation specializations
 (#14743)

---
 src/Functions/FunctionBinaryArithmetic.h | 201 +++++++++--------------
 src/Functions/intDiv.cpp                 |   2 +-
 src/Functions/modulo.cpp                 |   2 +-
 3 files changed, 79 insertions(+), 126 deletions(-)

diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h
index bbac58a92c6..bbb08c4068f 100644
--- a/src/Functions/FunctionBinaryArithmetic.h
+++ b/src/Functions/FunctionBinaryArithmetic.h
@@ -65,7 +65,7 @@ namespace ErrorCodes
   */
 
 template <typename A, typename B, typename Op, typename ResultType_ = typename Op::ResultType>
-struct BinaryOperationImplBase
+struct BinaryOperation
 {
     using ResultType = ResultType_;
     static const constexpr bool allow_fixed_string = false;
@@ -167,16 +167,24 @@ struct FixedStringOperationImpl
 
 
 template <typename A, typename B, typename Op, typename ResultType = typename Op::ResultType>
-struct BinaryOperationImpl : BinaryOperationImplBase<A, B, Op, ResultType>
+struct BinaryOperationImpl : BinaryOperation<A, B, Op, ResultType>
 {
 };
 
+template <typename T>
+inline constexpr const auto & undec(const T & x)
+{
+    if constexpr (IsDecimalNumber<T>)
+        return x.value;
+    else
+        return x;
+}
 
 /// Binary operations for Decimals need scale args
 /// +|- scale one of args (which scale factor is not 1). ScaleR = oneof(Scale1, Scale2);
 /// *   no agrs scale. ScaleR = Scale1 + Scale2;
 /// /   first arg scale. ScaleR = Scale1 (scale_a = DecimalType<B>::getScale()).
-template <typename A, typename B, template <typename, typename> typename Operation, typename ResultType_, bool _check_overflow = true>
+template <template <typename, typename> typename Operation, typename ResultType_, bool check_overflow = true>
 struct DecimalBinaryOperation
 {
     static constexpr bool is_plus_minus =   IsOperation<Operation>::plus ||
@@ -196,48 +204,10 @@ struct DecimalBinaryOperation
     using Op = std::conditional_t<is_float_division,
         DivideIntegralImpl<NativeResultType, NativeResultType>, /// substitute divide by intDiv (throw on division by zero)
         Operation<NativeResultType, NativeResultType>>;
-    using ColVecA = std::conditional_t<IsDecimalNumber<A>, ColumnDecimal<A>, ColumnVector<A>>;
-    using ColVecB = std::conditional_t<IsDecimalNumber<B>, ColumnDecimal<B>, ColumnVector<B>>;
-    using ArrayA = typename ColVecA::Container;
-    using ArrayB = typename ColVecB::Container;
+
     using ArrayC = typename ColumnDecimal<ResultType>::Container;
-    using SelfNoOverflow = DecimalBinaryOperation<A, B, Operation, ResultType_, false>;
-
-    static void vectorVector(const ArrayA & a, const ArrayB & b, ArrayC & c,
-                             NativeResultType scale_a, NativeResultType scale_b, bool check_overflow)
-    {
-        if (check_overflow)
-            vectorVector(a, b, c, scale_a, scale_b);
-        else
-            SelfNoOverflow::vectorVector(a, b, c, scale_a, scale_b);
-    }
-
-    static void vectorConstant(const ArrayA & a, B b, ArrayC & c,
-                               NativeResultType scale_a, NativeResultType scale_b, bool check_overflow)
-    {
-        if (check_overflow)
-            vectorConstant(a, b, c, scale_a, scale_b);
-        else
-            SelfNoOverflow::vectorConstant(a, b, c, scale_a, scale_b);
-    }
-
-    static void constantVector(A a, const ArrayB & b, ArrayC & c,
-                               NativeResultType scale_a, NativeResultType scale_b, bool check_overflow)
-    {
-        if (check_overflow)
-            constantVector(a, b, c, scale_a, scale_b);
-        else
-            SelfNoOverflow::constantVector(a, b, c, scale_a, scale_b);
-    }
-
-    static ResultType constantConstant(A a, B b, NativeResultType scale_a, NativeResultType scale_b, bool check_overflow)
-    {
-        if (check_overflow)
-            return constantConstant(a, b, scale_a, scale_b);
-        else
-            return SelfNoOverflow::constantConstant(a, b, scale_a, scale_b);
-    }
 
+    template <bool is_decimal_a, bool is_decimal_b, typename ArrayA, typename ArrayB>
     static void NO_INLINE vectorVector(const ArrayA & a, const ArrayB & b, ArrayC & c,
                                        NativeResultType scale_a [[maybe_unused]], NativeResultType scale_b [[maybe_unused]])
     {
@@ -247,92 +217,102 @@ struct DecimalBinaryOperation
             if (scale_a != 1)
             {
                 for (size_t i = 0; i < size; ++i)
-                    c[i] = applyScaled<true>(a[i], b[i], scale_a);
+                    c[i] = applyScaled<true>(undec(a[i]), undec(b[i]), scale_a);
                 return;
             }
             else if (scale_b != 1)
             {
                 for (size_t i = 0; i < size; ++i)
-                    c[i] = applyScaled<false>(a[i], b[i], scale_b);
+                    c[i] = applyScaled<false>(undec(a[i]), undec(b[i]), scale_b);
                 return;
             }
         }
-        else if constexpr (is_division && IsDecimalNumber<B>)
+        else if constexpr (is_division && is_decimal_b)
         {
             for (size_t i = 0; i < size; ++i)
-                c[i] = applyScaledDiv(a[i], b[i], scale_a);
+                c[i] = applyScaledDiv<is_decimal_a>(undec(a[i]), undec(b[i]), scale_a);
             return;
         }
 
         /// default: use it if no return before
         for (size_t i = 0; i < size; ++i)
-            c[i] = apply(a[i], b[i]);
+            c[i] = apply(undec(a[i]), undec(b[i]));
     }
 
+    template <bool is_decimal_a, bool is_decimal_b, typename ArrayA, typename B>
     static void NO_INLINE vectorConstant(const ArrayA & a, B b, ArrayC & c,
                                          NativeResultType scale_a [[maybe_unused]], NativeResultType scale_b [[maybe_unused]])
     {
+        static_assert(!IsDecimalNumber<B>);
+
         size_t size = a.size();
         if constexpr (is_plus_minus_compare)
         {
             if (scale_a != 1)
             {
                 for (size_t i = 0; i < size; ++i)
-                    c[i] = applyScaled<true>(a[i], b, scale_a);
+                    c[i] = applyScaled<true>(undec(a[i]), b, scale_a);
                 return;
             }
             else if (scale_b != 1)
             {
                 for (size_t i = 0; i < size; ++i)
-                    c[i] = applyScaled<false>(a[i], b, scale_b);
+                    c[i] = applyScaled<false>(undec(a[i]), b, scale_b);
                 return;
             }
         }
-        else if constexpr (is_division && IsDecimalNumber<B>)
+        else if constexpr (is_division && is_decimal_b)
         {
             for (size_t i = 0; i < size; ++i)
-                c[i] = applyScaledDiv(a[i], b, scale_a);
+                c[i] = applyScaledDiv<is_decimal_a>(undec(a[i]), b, scale_a);
             return;
         }
 
         /// default: use it if no return before
         for (size_t i = 0; i < size; ++i)
-            c[i] = apply(a[i], b);
+            c[i] = apply(undec(a[i]), b);
     }
 
+    template <bool is_decimal_a, bool is_decimal_b, typename A, typename ArrayB>
     static void NO_INLINE constantVector(A a, const ArrayB & b, ArrayC & c,
                                          NativeResultType scale_a [[maybe_unused]], NativeResultType scale_b [[maybe_unused]])
     {
+        static_assert(!IsDecimalNumber<A>);
+
         size_t size = b.size();
         if constexpr (is_plus_minus_compare)
         {
             if (scale_a != 1)
             {
                 for (size_t i = 0; i < size; ++i)
-                    c[i] = applyScaled<true>(a, b[i], scale_a);
+                    c[i] = applyScaled<true>(a, undec(b[i]), scale_a);
                 return;
             }
             else if (scale_b != 1)
             {
                 for (size_t i = 0; i < size; ++i)
-                    c[i] = applyScaled<false>(a, b[i], scale_b);
+                    c[i] = applyScaled<false>(a, undec(b[i]), scale_b);
                 return;
             }
         }
-        else if constexpr (is_division && IsDecimalNumber<B>)
+        else if constexpr (is_division && is_decimal_b)
         {
             for (size_t i = 0; i < size; ++i)
-                c[i] = applyScaledDiv(a, b[i], scale_a);
+                c[i] = applyScaledDiv<is_decimal_a>(a, undec(b[i]), scale_a);
             return;
         }
 
         /// default: use it if no return before
         for (size_t i = 0; i < size; ++i)
-            c[i] = apply(a, b[i]);
+            c[i] = apply(a, undec(b[i]));
     }
 
+    template <bool is_decimal_a, bool is_decimal_b, typename A, typename B>
     static ResultType constantConstant(A a, B b, NativeResultType scale_a [[maybe_unused]], NativeResultType scale_b [[maybe_unused]])
     {
+        static_assert(!IsDecimalNumber<A>);
+        static_assert(!IsDecimalNumber<B>);
+
         if constexpr (is_plus_minus_compare)
         {
             if (scale_a != 1)
@@ -340,64 +320,16 @@ struct DecimalBinaryOperation
             else if (scale_b != 1)
                 return applyScaled<false>(a, b, scale_b);
         }
-        else if constexpr (is_division && IsDecimalNumber<B>)
-            return applyScaledDiv(a, b, scale_a);
+        else if constexpr (is_division && is_decimal_b)
+            return applyScaledDiv<is_decimal_a>(a, b, scale_a);
         return apply(a, b);
     }
 
 private:
-    template <typename T, typename U>
-    static NativeResultType apply(const T & a, const U & b)
-    {
-        if constexpr (OverBigInt<T> || OverBigInt<U>)
-        {
-            if constexpr (IsDecimalNumber<T>)
-                return apply(a.value, b);
-            else if constexpr (IsDecimalNumber<U>)
-                return apply(a, b.value);
-            else
-                return applyNative(bigint_cast<NativeResultType>(a), bigint_cast<NativeResultType>(b));
-        }
-        else
-            return applyNative(a, b);
-    }
-
-    template <bool scale_left, typename T, typename U>
-    static NativeResultType applyScaled(const T & a, const U & b, NativeResultType scale)
-    {
-        if constexpr (OverBigInt<T> || OverBigInt<U>)
-        {
-            if constexpr (IsDecimalNumber<T>)
-                return applyScaled<scale_left>(a.value, b, scale);
-            else if constexpr (IsDecimalNumber<U>)
-                return applyScaled<scale_left>(a, b.value, scale);
-            else
-                return applyNativeScaled<scale_left>(bigint_cast<NativeResultType>(a), bigint_cast<NativeResultType>(b), scale);
-        }
-        else
-            return applyNativeScaled<scale_left>(a, b, scale);
-    }
-
-    template <typename T, typename U>
-    static NativeResultType applyScaledDiv(const T & a, const U & b, NativeResultType scale)
-    {
-        if constexpr (OverBigInt<T> || OverBigInt<U>)
-        {
-            if constexpr (IsDecimalNumber<T>)
-                return applyScaledDiv(a.value, b, scale);
-            else if constexpr (IsDecimalNumber<U>)
-                return applyScaledDiv(a, b.value, scale);
-            else
-                return applyNativeScaledDiv(bigint_cast<NativeResultType>(a), bigint_cast<NativeResultType>(b), scale);
-        }
-        else
-            return applyNativeScaledDiv(a, b, scale);
-    }
-
     /// there's implicit type convertion here
-    static NativeResultType applyNative(NativeResultType a, NativeResultType b)
+    static NativeResultType apply(NativeResultType a, NativeResultType b)
     {
-        if constexpr (can_overflow && _check_overflow)
+        if constexpr (can_overflow && check_overflow)
         {
             NativeResultType res;
             if (Op::template apply<NativeResultType>(a, b, res))
@@ -409,13 +341,13 @@ private:
     }
 
     template <bool scale_left>
-    static NO_SANITIZE_UNDEFINED NativeResultType applyNativeScaled(NativeResultType a, NativeResultType b, NativeResultType scale)
+    static NO_SANITIZE_UNDEFINED NativeResultType applyScaled(NativeResultType a, NativeResultType b, NativeResultType scale)
     {
         if constexpr (is_plus_minus_compare)
         {
             NativeResultType res;
 
-            if constexpr (_check_overflow)
+            if constexpr (check_overflow)
             {
                 bool overflow = false;
                 if constexpr (scale_left)
@@ -444,14 +376,15 @@ private:
         }
     }
 
-    static NO_SANITIZE_UNDEFINED NativeResultType applyNativeScaledDiv(NativeResultType a, NativeResultType b, NativeResultType scale)
+    template <bool is_decimal_a>
+    static NO_SANITIZE_UNDEFINED NativeResultType applyScaledDiv(NativeResultType a, NativeResultType b, NativeResultType scale)
     {
         if constexpr (is_division)
         {
-            if constexpr (_check_overflow)
+            if constexpr (check_overflow)
             {
                 bool overflow = false;
-                if constexpr (!IsDecimalNumber<A>)
+                if constexpr (!is_decimal_a)
                     overflow |= common::mulOverflow(scale, scale, scale);
                 overflow |= common::mulOverflow(a, scale, a);
                 if (overflow)
@@ -459,7 +392,7 @@ private:
             }
             else
             {
-                if constexpr (!IsDecimalNumber<A>)
+                if constexpr (!is_decimal_a)
                     scale *= scale;
                 a *= scale;
             }
@@ -1024,10 +957,15 @@ public:
 
             if constexpr (IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>)
             {
-                using OpImpl = DecimalBinaryOperation<T0, T1, Op, ResultType>;
+                using NativeResultType = typename NativeType<ResultType>::Type;
+                using OpImpl = DecimalBinaryOperation<Op, ResultType, false>;
+                using OpImplCheck = DecimalBinaryOperation<Op, ResultType, true>;
 
                 ResultDataType type = decimalResultType<is_multiply, is_division>(left, right);
 
+                static constexpr const bool dec_a = IsDecimalNumber<T0>;
+                static constexpr const bool dec_b = IsDecimalNumber<T1>;
+
                 typename ResultDataType::FieldType scale_a = type.scaleFactorFor(left, is_multiply);
                 typename ResultDataType::FieldType scale_b = type.scaleFactorFor(right, is_multiply || is_division);
                 if constexpr (IsDataTypeDecimal<RightDataType> && is_division)
@@ -1036,8 +974,12 @@ public:
                 /// non-vector result
                 if (col_left_const && col_right_const)
                 {
-                    auto res = OpImpl::constantConstant(col_left_const->template getValue<T0>(), col_right_const->template getValue<T1>(),
-                                                        scale_a, scale_b, check_decimal_overflow);
+                    NativeResultType const_a = col_left_const->template getValue<T0>();
+                    NativeResultType const_b = col_right_const->template getValue<T1>();
+
+                    auto res = check_decimal_overflow ?
+                        OpImplCheck::template constantConstant<dec_a, dec_b>(const_a, const_b, scale_a, scale_b) :
+                        OpImpl::template constantConstant<dec_a, dec_b>(const_a, const_b, scale_a, scale_b);
 
                     block.getByPosition(result).column = ResultDataType(type.getPrecision(), type.getScale()).createColumnConst(
                             col_left_const->size(), toField(res, type.getScale()));
@@ -1050,17 +992,28 @@ public:
 
                 if (col_left && col_right)
                 {
-                    OpImpl::vectorVector(col_left->getData(), col_right->getData(), vec_res, scale_a, scale_b, check_decimal_overflow);
+                    if (check_decimal_overflow)
+                        OpImplCheck::template vectorVector<dec_a, dec_b>(col_left->getData(), col_right->getData(), vec_res, scale_a, scale_b);
+                    else
+                        OpImpl::template vectorVector<dec_a, dec_b>(col_left->getData(), col_right->getData(), vec_res, scale_a, scale_b);
                 }
                 else if (col_left_const && col_right)
                 {
-                    OpImpl::constantVector(col_left_const->template getValue<T0>(), col_right->getData(), vec_res,
-                                           scale_a, scale_b, check_decimal_overflow);
+                    NativeResultType const_a = col_left_const->template getValue<T0>();
+
+                    if (check_decimal_overflow)
+                        OpImplCheck::template constantVector<dec_a, dec_b>(const_a, col_right->getData(), vec_res, scale_a, scale_b);
+                    else
+                        OpImpl::template constantVector<dec_a, dec_b>(const_a, col_right->getData(), vec_res, scale_a, scale_b);
                 }
                 else if (col_left && col_right_const)
                 {
-                    OpImpl::vectorConstant(col_left->getData(), col_right_const->template getValue<T1>(), vec_res,
-                                           scale_a, scale_b, check_decimal_overflow);
+                    NativeResultType const_b = col_right_const->template getValue<T1>();
+
+                    if (check_decimal_overflow)
+                        OpImplCheck::template vectorConstant<dec_a, dec_b>(col_left->getData(), const_b, vec_res, scale_a, scale_b);
+                    else
+                        OpImpl::template vectorConstant<dec_a, dec_b>(col_left->getData(), const_b, vec_res, scale_a, scale_b);
                 }
                 else
                     return false;
diff --git a/src/Functions/intDiv.cpp b/src/Functions/intDiv.cpp
index 45fb8bd51bd..25c21f8439c 100644
--- a/src/Functions/intDiv.cpp
+++ b/src/Functions/intDiv.cpp
@@ -22,7 +22,7 @@ namespace
 
 template <typename A, typename B>
 struct DivideIntegralByConstantImpl
-    : BinaryOperationImplBase<A, B, DivideIntegralImpl<A, B>>
+    : BinaryOperation<A, B, DivideIntegralImpl<A, B>>
 {
     using ResultType = typename DivideIntegralImpl<A, B>::ResultType;
     static const constexpr bool allow_fixed_string = false;
diff --git a/src/Functions/modulo.cpp b/src/Functions/modulo.cpp
index 2f9bf8a9c3a..1458d20c131 100644
--- a/src/Functions/modulo.cpp
+++ b/src/Functions/modulo.cpp
@@ -22,7 +22,7 @@ namespace
 
 template <typename A, typename B>
 struct ModuloByConstantImpl
-    : BinaryOperationImplBase<A, B, ModuloImpl<A, B>>
+    : BinaryOperation<A, B, ModuloImpl<A, B>>
 {
     using ResultType = typename ModuloImpl<A, B>::ResultType;
     static const constexpr bool allow_fixed_string = false;