1. GraphiteMergeTree is added.

2. Merge remote-tracking branch 'upstream/master'.
2024-09-20 00:30:49 +00:00 · 2017-05-16 19:54:45 +03:00 · 2017-05-16 19:54:45 +03:00 · 2d8df96f7e
commit 2d8df96f7e
parent ffaf8c167b 4cc10af7b7
329 changed files with 7551 additions and 3432 deletions
--- a/cmake/dbms_glob_sources.cmake
+++ b/cmake/dbms_glob_sources.cmake
@ -4,7 +4,7 @@ macro(add_glob cur_list)
 endmacro()

 macro(add_headers_and_sources prefix common_path)
-    add_glob(${prefix}_headers RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h ${common_path}/*.inl)
+    add_glob(${prefix}_headers RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h)
    add_glob(${prefix}_sources ${common_path}/*.cpp ${common_path}/*.h)
 endmacro()

--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@ -31,6 +31,7 @@ endif ()
 add_subdirectory (libcityhash)
 add_subdirectory (libfarmhash)
 add_subdirectory (libmetrohash)
+add_subdirectory (libbtrie)

 if (USE_INTERNAL_ZLIB_LIBRARY)
 	add_subdirectory (libzlib-ng)
--- a/contrib/libbtrie/CMakeLists.txt
+++ b/contrib/libbtrie/CMakeLists.txt
@ -0,0 +1,6 @@
+include_directories (BEFORE include)
+
+add_library (btrie
+    src/btrie.c
+    include/btrie.h
+)
--- a/contrib/libbtrie/LICENSE
+++ b/contrib/libbtrie/LICENSE
@ -0,0 +1,23 @@
+Copyright (c) 2013, CobbLiu
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+  Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+  Redistributions in binary form must reproduce the above copyright notice, this
+  list of conditions and the following disclaimer in the documentation and/or
+  other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/contrib/libbtrie/include/btrie.h
+++ b/contrib/libbtrie/include/btrie.h
@ -0,0 +1,155 @@
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#include <stdlib.h>
+#include <stdint.h>
+
+/**
+ * In btrie, each leaf means one bit in ip tree.
+ * Left means 0, and right means 1.
+ */
+
+#define BTRIE_NULL   (uintptr_t) -1
+#define MAX_PAGES    1024 * 16
+
+typedef struct btrie_node_s btrie_node_t;
+
+struct btrie_node_s {
+    btrie_node_t  *right;
+    btrie_node_t  *left;
+    btrie_node_t  *parent;
+    uintptr_t         value;
+};
+
+
+typedef struct btrie_s {
+    btrie_node_t  *root;
+
+    btrie_node_t  *free;    /* free list of btrie */
+    char             *start;
+    size_t            size;
+
+    /*
+     * memory pool.
+     * memory management(esp free) will be so easy by using this facility.
+     */
+    char             *pools[MAX_PAGES];
+    size_t            len;
+} btrie_t;
+
+
+/**
+ * Create an empty btrie
+ *
+ * @Return:
+ * An ip radix_tree created.
+ * NULL if creation failed.
+ */
+
+btrie_t *btrie_create();
+
+/**
+ * Destroy the ip radix_tree
+ *
+ * @Return:
+ * OK if deletion succeed.
+ * ERROR if error occurs while deleting.
+ */
+int btrie_destroy(btrie_t *tree);
+
+/**
+ * Count the nodes in the radix tree.
+ */
+size_t btrie_count(btrie_t *tree);
+
+/**
+ * Return the allocated number of bytes.
+ */
+size_t btrie_allocated(btrie_t *tree);
+
+
+/**
+ * Add an ipv4 into btrie
+ *
+ * @Args:
+ * key: ip address
+ * mask: key's mask
+ * value: value of this IP, may be NULL.
+ *
+ * @Return:
+ * OK for success.
+ * ERROR for failure.
+ */
+int btrie_insert(btrie_t *tree, uint32_t key, uint32_t mask,
+    uintptr_t value);
+
+
+/**
+ * Delete an ipv4 from btrie
+ *
+ * @Args:
+ *
+ * @Return:
+ * OK for success.
+ * ERROR for failure.
+ */
+int btrie_delete(btrie_t *tree, uint32_t key, uint32_t mask);
+
+
+/**
+ * Find an ipv4 from btrie
+ *
+
+ * @Args:
+ *
+ * @Return:
+ * Value if succeed.
+ * NULL if failed.
+ */
+uintptr_t btrie_find(btrie_t *tree, uint32_t key);
+
+
+/**
+ * Add an ipv6 into btrie
+ *
+ * @Args:
+ * key: ip address
+ * mask: key's mask
+ * value: value of this IP, may be NULL.
+ *
+ * @Return:
+ * OK for success.
+ * ERROR for failure.
+ */
+int btrie_insert_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask,
+          uintptr_t value);
+
+/**
+ * Delete an ipv6 from btrie
+ *
+ * @Args:
+ *
+ * @Return:
+ * OK for success.
+ * ERROR for failure.
+ */
+int btrie_delete_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask);
+
+/**
+ * Find an ipv6 from btrie
+ *
+
+ * @Args:
+ *
+ * @Return:
+ * Value if succeed.
+ * NULL if failed.
+ */
+uintptr_t btrie_find_a6(btrie_t *tree, const uint8_t *key);
+
+#if defined (__cplusplus)
+}
+#endif
--- a/contrib/libbtrie/src/btrie.c
+++ b/contrib/libbtrie/src/btrie.c
@ -0,0 +1,460 @@
+#include <stdlib.h>
+#include <string.h>
+#include <btrie.h>
+
+#define PAGE_SIZE 4096
+
+
+static btrie_node_t *
+btrie_alloc(btrie_t *tree)
+{
+    btrie_node_t  *p;
+
+    if (tree->free) {
+        p = tree->free;
+        tree->free = tree->free->right;
+        return p;
+    }
+
+    if (tree->size < sizeof(btrie_node_t)) {
+        tree->start = (char *) calloc(sizeof(char), PAGE_SIZE);
+        if (tree->start == NULL) {
+            return NULL;
+        }
+
+        tree->pools[tree->len++] = tree->start;
+        tree->size = PAGE_SIZE;
+    }
+
+    p = (btrie_node_t *) tree->start;
+
+    tree->start += sizeof(btrie_node_t);
+    tree->size -= sizeof(btrie_node_t);
+
+    return p;
+}
+
+
+btrie_t *
+btrie_create()
+{
+    btrie_t *tree = (btrie_t *) malloc(sizeof(btrie_t));
+    if (tree == NULL) {
+        return NULL;
+    }
+
+    tree->free  = NULL;
+    tree->start = NULL;
+    tree->size  = 0;
+    memset(tree->pools, 0, sizeof(btrie_t *) * MAX_PAGES);
+    tree->len = 0;
+
+    tree->root = btrie_alloc(tree);
+    if (tree->root == NULL) {
+        return NULL;
+    }
+
+    tree->root->right  = NULL;
+    tree->root->left   = NULL;
+    tree->root->parent = NULL;
+    tree->root->value  = BTRIE_NULL;
+
+    return tree;
+}
+
+static size_t
+subtree_weight(btrie_node_t *node)
+{
+    size_t weight = 1;
+    if (node->left) {
+        weight += subtree_weight(node->left);
+    }
+    if (node->right) {
+        weight += subtree_weight(node->right);
+    }
+    return weight;
+}
+
+size_t
+btrie_count(btrie_t *tree)
+{
+    if (tree->root == NULL) {
+        return 0;
+    }
+
+    return subtree_weight(tree->root);
+}
+
+size_t
+btrie_allocated(btrie_t *tree)
+{
+    return tree->len * PAGE_SIZE;
+}
+
+
+int
+btrie_insert(btrie_t *tree, uint32_t key, uint32_t mask,
+    uintptr_t value)
+{
+    uint32_t          bit;
+    btrie_node_t  *node, *next;
+
+    bit = 0x80000000;
+
+    node = tree->root;
+    next = tree->root;
+
+    while (bit & mask) {
+        if (key & bit) {
+            next = node->right;
+
+        } else {
+            next = node->left;
+        }
+
+        if (next == NULL) {
+            break;
+        }
+
+        bit >>= 1;
+        node = next;
+    }
+
+    if (next) {
+        if (node->value != BTRIE_NULL) {
+            return -1;
+        }
+
+        node->value = value;
+        return 0;
+    }
+
+    while (bit & mask) {
+        next = btrie_alloc(tree);
+        if (next == NULL) {
+            return -1;
+        }
+
+        next->right = NULL;
+        next->left = NULL;
+        next->parent = node;
+        next->value = BTRIE_NULL;
+
+        if (key & bit) {
+            node->right = next;
+
+        } else {
+            node->left = next;
+        }
+
+        bit >>= 1;
+        node = next;
+    }
+
+    node->value = value;
+
+    return 0;
+}
+
+
+int
+btrie_delete(btrie_t *tree, uint32_t key, uint32_t mask)
+{
+    uint32_t          bit;
+    btrie_node_t  *node;
+
+    bit = 0x80000000;
+    node = tree->root;
+
+    while (node && (bit & mask)) {
+        if (key & bit) {
+            node = node->right;
+
+        } else {
+            node = node->left;
+        }
+
+        bit >>= 1;
+    }
+
+    if (node == NULL) {
+        return -1;
+    }
+
+    if (node->right || node->left) {
+        if (node->value != BTRIE_NULL) {
+            node->value = BTRIE_NULL;
+            return 0;
+        }
+
+        return -1;
+    }
+
+    for ( ;; ) {
+        if (node->parent->right == node) {
+            node->parent->right = NULL;
+
+        } else {
+            node->parent->left = NULL;
+        }
+
+        node->right = tree->free;
+        tree->free = node;
+
+        node = node->parent;
+
+        if (node->right || node->left) {
+            break;
+        }
+
+        if (node->value != BTRIE_NULL) {
+            break;
+        }
+
+        if (node->parent == NULL) {
+            break;
+        }
+    }
+
+    return 0;
+}
+
+
+uintptr_t
+btrie_find(btrie_t *tree, uint32_t key)
+{
+    uint32_t          bit;
+    uintptr_t         value;
+    btrie_node_t  *node;
+
+    bit = 0x80000000;
+    value = BTRIE_NULL;
+    node = tree->root;
+
+    while (node) {
+        if (node->value != BTRIE_NULL) {
+            value = node->value;
+        }
+
+        if (key & bit) {
+            node = node->right;
+
+        } else {
+            node = node->left;
+        }
+
+        bit >>= 1;
+    }
+
+    return value;
+}
+
+
+int
+btrie_insert_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask,
+    uintptr_t value)
+{
+    uint8_t             bit;
+    uint         i;
+    btrie_node_t  *node, *next;
+
+    i = 0;
+    bit = 0x80;
+
+    node = tree->root;
+    next = tree->root;
+
+    while (bit & mask[i]) {
+        if (key[i] & bit) {
+            next = node->right;
+
+        } else {
+            next = node->left;
+        }
+
+        if (next == NULL) {
+            break;
+        }
+
+        bit >>= 1;
+        node = next;
+
+        if (bit == 0) {
+            if (++i == 16) {
+                break;
+            }
+
+            bit = 0x80;
+        }
+    }
+
+    if (next) {
+        if (node->value != BTRIE_NULL) {
+            return -1;
+        }
+
+        node->value = value;
+        return 0;
+    }
+
+    while (bit & mask[i]) {
+        next = btrie_alloc(tree);
+        if (next == NULL) {
+            return -1;
+        }
+
+        next->right = NULL;
+        next->left = NULL;
+        next->parent = node;
+        next->value = BTRIE_NULL;
+
+        if (key[i] & bit) {
+            node->right = next;
+
+        } else {
+            node->left = next;
+        }
+
+        bit >>= 1;
+        node = next;
+
+        if (bit == 0) {
+            if (++i == 16) {
+                break;
+            }
+
+            bit = 0x80;
+        }
+    }
+
+    node->value = value;
+
+    return 0;
+}
+
+
+int
+btrie_delete_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask)
+{
+    uint8_t             bit;
+    uint         i;
+    btrie_node_t  *node;
+
+    i = 0;
+    bit = 0x80;
+    node = tree->root;
+
+    while (node && (bit & mask[i])) {
+        if (key[i] & bit) {
+            node = node->right;
+
+        } else {
+            node = node->left;
+        }
+
+        bit >>= 1;
+
+        if (bit == 0) {
+            if (++i == 16) {
+                break;
+            }
+
+            bit = 0x80;
+        }
+    }
+
+    if (node == NULL) {
+        return -1;
+    }
+
+    if (node->right || node->left) {
+        if (node->value != BTRIE_NULL) {
+            node->value = BTRIE_NULL;
+            return 0;
+        }
+
+        return -1;
+    }
+
+    for ( ;; ) {
+        if (node->parent->right == node) {
+            node->parent->right = NULL;
+
+        } else {
+            node->parent->left = NULL;
+        }
+
+        node->right = tree->free;
+        tree->free = node;
+
+        node = node->parent;
+
+        if (node->right || node->left) {
+            break;
+        }
+
+        if (node->value != BTRIE_NULL) {
+            break;
+        }
+
+        if (node->parent == NULL) {
+            break;
+        }
+    }
+
+    return 0;
+}
+
+
+uintptr_t
+btrie_find_a6(btrie_t *tree, const uint8_t *key)
+{
+    uint8_t             bit;
+    uintptr_t          value;
+    uint         i;
+    btrie_node_t  *node;
+
+    i = 0;
+    bit = 0x80;
+    value = BTRIE_NULL;
+    node = tree->root;
+
+    while (node) {
+        if (node->value != BTRIE_NULL) {
+            value = node->value;
+        }
+
+        if (key[i] & bit) {
+            node = node->right;
+
+        } else {
+            node = node->left;
+        }
+
+        bit >>= 1;
+
+        if (bit == 0) {
+            i++;
+            bit = 0x80;
+        }
+    }
+
+    return value;
+}
+
+
+int
+btrie_destroy(btrie_t *tree)
+{
+    size_t    i;
+
+
+    /* free memory pools */
+    for (i = 0; i < tree->len; i++) {
+        free(tree->pools[i]);
+    }
+
+    free(tree);
+
+    return 0;
+}
--- a/contrib/libbtrie/test/test_btrie.c
+++ b/contrib/libbtrie/test/test_btrie.c
@ -0,0 +1,94 @@
+#include <stdio.h>
+#include <btrie.h>
+
+int main()
+{
+    btrie_t *it;
+    int            ret;
+
+    uint8_t prefix_v6[16] = {0xde, 0xad, 0xbe, 0xef};
+    uint8_t mask_v6[16] = {0xff, 0xff, 0xff};
+    uint8_t ip_v6[16] = {0xde, 0xad, 0xbe, 0xef, 0xde};
+
+    it = btrie_create();
+    if (it == NULL) {
+        printf("create error!\n");
+        return 0;
+    }
+
+    //add 101.45.69.50/16
+    ret = btrie_insert(it, 1697465650, 0xffff0000, 1);
+    if (ret != 0) {
+        printf("insert 1 error.\n");
+        goto error;
+    }
+
+    //add 10.45.69.50/16
+    ret = btrie_insert(it, 170738994, 0xffff0000, 1);
+    if (ret != 0) {
+        printf("insert 2 error.\n");
+        goto error;
+    }
+
+    //add 10.45.79.50/16
+    ret = btrie_insert(it, 170741554, 0xffff0000, 1);
+    if (ret == 0) {
+        printf("insert 3 error.\n");
+        goto error;
+    }
+
+    //add 102.45.79.50/24
+    ret = btrie_insert(it, 1714245426, 0xffffff00, 1);
+    if (ret != 0) {
+        printf("insert 4 error.\n");
+        goto error;
+    }
+
+    ret = btrie_find(it, 170741554);
+    if (ret == 1) {
+        printf("test case 1 passed\n");
+    } else {
+        printf("test case 1 error\n");
+    }
+
+    ret = btrie_find(it, 170786817);
+    if (ret != 1) {
+        printf("test case 2 passed\n");
+    } else {
+        printf("test case 2 error\n");
+    }
+
+    ret = btrie_delete(it, 1714245426, 0xffffff00);
+    if (ret != 0) {
+        printf("delete 1 error\n");
+        goto error;
+    }
+    
+    ret = btrie_find(it, 1714245426);
+    if (ret != 1) {
+        printf("test case 3 passed\n");
+    } else {
+        printf("test case 3 error\n");
+    }
+
+    //add dead:beef::/32
+    ret = btrie_insert_a6(it, prefix_v6, mask_v6, 1);
+    if (ret != 0) {
+        printf("insert 5 error\n");
+        goto error;
+    }
+
+    ret = btrie_find_a6(it, ip_v6);
+    if (ret == 1) {
+        printf("test case 4 passed\n");
+    } else {
+        printf("test case 4 error\n");
+    }
+
+    return 0;
+    
+ error:
+    btrie_destroy(it);
+    printf("test failed\n");
+    return 1;
+}
--- a/contrib/liblz4/CMakeLists.txt
+++ b/contrib/liblz4/CMakeLists.txt
@ -5,4 +5,6 @@ add_library (lz4
    src/lz4hc.c

    include/lz4/lz4.h
-    include/lz4/lz4hc.h)
+    include/lz4/lz4hc.h
+    include/lz4/lz4opt.h)
+
--- a/contrib/liblz4/include/lz4/lz4.h
+++ b/contrib/liblz4/include/lz4/lz4.h
@ -1,7 +1,7 @@
 /*
-   LZ4 - Fast LZ compression algorithm
-   Header File
-   Copyright (C) 2011-2015, Yann Collet.
+ *  LZ4 - Fast LZ compression algorithm
+ *  Header File
+ *  Copyright (C) 2011-2016, Yann Collet.

   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)

@ -29,34 +29,79 @@
   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

   You can contact the author at :
-   - LZ4 source repository : https://github.com/Cyan4973/lz4
-   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+    - LZ4 homepage : http://www.lz4.org
+    - LZ4 source repository : https://github.com/lz4/lz4
 */
-#pragma once
+#ifndef LZ4_H_2983827168210
+#define LZ4_H_2983827168210

 #if defined (__cplusplus)
 extern "C" {
 #endif

-/*
- * lz4.h provides block compression functions, and gives full buffer control to programmer.
- * If you need to generate inter-operable compressed data (respecting LZ4 frame specification),
- * and can let the library handle its own memory, please use lz4frame.h instead.
+/* --- Dependency --- */
+#include <stddef.h>   /* size_t */
+
+
+/**
+  Introduction
+
+  LZ4 is lossless compression algorithm, providing compression speed at 400 MB/s per core,
+  scalable with multi-cores CPU. It features an extremely fast decoder, with speed in
+  multiple GB/s per core, typically reaching RAM speed limits on multi-core systems.
+
+  The LZ4 compression library provides in-memory compression and decompression functions.
+  Compression can be done in:
+    - a single step (described as Simple Functions)
+    - a single step, reusing a context (described in Advanced Functions)
+    - unbounded multiple steps (described as Streaming compression)
+
+  lz4.h provides block compression functions. It gives full buffer control to user.
+  Decompressing an lz4-compressed block also requires metadata (such as compressed size).
+  Each application is free to encode such metadata in whichever way it wants.
+
+  An additional format, called LZ4 frame specification (doc/lz4_Frame_format.md),
+  take care of encoding standard metadata alongside LZ4-compressed blocks.
+  If your application requires interoperability, it's recommended to use it.
+  A library is provided to take care of it, see lz4frame.h.
 */

-/**************************************
-*  Version
-**************************************/
+/*^***************************************************************
+*  Export parameters
+*****************************************************************/
+/*
+*  LZ4_DLL_EXPORT :
+*  Enable exporting of functions when building a Windows DLL
+*/
+#if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1)
+#  define LZ4LIB_API __declspec(dllexport)
+#elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1)
+#  define LZ4LIB_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#else
+#  define LZ4LIB_API
+#endif
+
+
+/*========== Version =========== */
 #define LZ4_VERSION_MAJOR    1    /* for breaking interface changes  */
 #define LZ4_VERSION_MINOR    7    /* for new (non-breaking) interface capabilities */
-#define LZ4_VERSION_RELEASE  1    /* for tweaks, bug-fixes, or development */
-#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
-int LZ4_versionNumber (void);
+#define LZ4_VERSION_RELEASE  5    /* for tweaks, bug-fixes, or development */

-/**************************************
+#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
+
+#define LZ4_LIB_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE
+#define LZ4_QUOTE(str) #str
+#define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str)
+#define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION)
+
+LZ4LIB_API int LZ4_versionNumber (void);
+LZ4LIB_API const char* LZ4_versionString (void);
+
+
+/*-************************************
 *  Tuning parameter
 **************************************/
-/*
+/*!
 * LZ4_MEMORY_USAGE :
 * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
 * Increasing memory usage improves compression ratio
@ -66,15 +111,10 @@ int LZ4_versionNumber (void);
 #define LZ4_MEMORY_USAGE 14


-/**************************************
+/*-************************************
 *  Simple Functions
 **************************************/
-
-int LZ4_compress_default(const char* source, char* dest, int sourceSize, int maxDestSize);
-int LZ4_decompress_safe (const char* source, char* dest, int compressedSize, int maxDecompressedSize);
-
-/*
-LZ4_compress_default() :
+/*! LZ4_compress_default() :
    Compresses 'sourceSize' bytes from buffer 'source'
    into already allocated 'dest' buffer of size 'maxDestSize'.
    Compression is guaranteed to succeed if 'maxDestSize' >= LZ4_compressBound(sourceSize).
@ -86,9 +126,10 @@ LZ4_compress_default() :
        sourceSize  : Max supported value is LZ4_MAX_INPUT_VALUE
        maxDestSize : full or partial size of buffer 'dest' (which must be already allocated)
        return : the number of bytes written into buffer 'dest' (necessarily <= maxOutputSize)
-              or 0 if compression fails
+              or 0 if compression fails */
+LZ4LIB_API int LZ4_compress_default(const char* source, char* dest, int sourceSize, int maxDestSize);

-LZ4_decompress_safe() :
+/*! LZ4_decompress_safe() :
    compressedSize : is the precise full size of the compressed block.
    maxDecompressedSize : is the size of destination buffer, which must be already allocated.
    return : the number of bytes decompressed into destination buffer (necessarily <= maxDecompressedSize)
@ -97,15 +138,16 @@ LZ4_decompress_safe() :
             This function is protected against buffer overflow exploits, including malicious data packets.
             It never writes outside output buffer, nor reads outside input buffer.
 */
+LZ4LIB_API int LZ4_decompress_safe (const char* source, char* dest, int compressedSize, int maxDecompressedSize);


-/**************************************
+/*-************************************
 *  Advanced Functions
 **************************************/
 #define LZ4_MAX_INPUT_SIZE        0x7E000000   /* 2 113 929 216 bytes */
 #define LZ4_COMPRESSBOUND(isize)  ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)

-/*
+/*!
 LZ4_compressBound() :
    Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible)
    This function is primarily useful for memory allocation purposes (destination buffer size).
@ -115,9 +157,9 @@ LZ4_compressBound() :
        return : maximum output size in a "worst case" scenario
              or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE)
 */
-int LZ4_compressBound(int inputSize);
+LZ4LIB_API int LZ4_compressBound(int inputSize);

-/*
+/*!
 LZ4_compress_fast() :
    Same as LZ4_compress_default(), but allows to select an "acceleration" factor.
    The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
@ -125,21 +167,21 @@ LZ4_compress_fast() :
    An acceleration value of "1" is the same as regular LZ4_compress_default()
    Values <= 0 will be replaced by ACCELERATION_DEFAULT (see lz4.c), which is 1.
 */
-int LZ4_compress_fast (const char* source, char* dest, int sourceSize, int maxDestSize, int acceleration);
+LZ4LIB_API int LZ4_compress_fast (const char* source, char* dest, int sourceSize, int maxDestSize, int acceleration);


-/*
+/*!
 LZ4_compress_fast_extState() :
    Same compression function, just using an externally allocated memory space to store compression state.
    Use LZ4_sizeofState() to know how much memory must be allocated,
    and allocate it on 8-bytes boundaries (using malloc() typically).
    Then, provide it as 'void* state' to compression function.
 */
-int LZ4_sizeofState(void);
-int LZ4_compress_fast_extState (void* state, const char* source, char* dest, int inputSize, int maxDestSize, int acceleration);
+LZ4LIB_API int LZ4_sizeofState(void);
+LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* source, char* dest, int inputSize, int maxDestSize, int acceleration);


-/*
+/*!
 LZ4_compress_destSize() :
    Reverse the logic, by compressing as much data as possible from 'source' buffer
    into already allocated buffer 'dest' of size 'targetDestSize'.
@ -150,10 +192,10 @@ LZ4_compress_destSize() :
        return : Nb bytes written into 'dest' (necessarily <= targetDestSize)
              or 0 if compression fails
 */
-int LZ4_compress_destSize (const char* source, char* dest, int* sourceSizePtr, int targetDestSize);
+LZ4LIB_API int LZ4_compress_destSize (const char* source, char* dest, int* sourceSizePtr, int targetDestSize);


-/*
+/*!
 LZ4_decompress_fast() :
    originalSize : is the original and therefore uncompressed size
    return : the number of bytes read from the source buffer (in other words, the compressed size)
@ -164,9 +206,9 @@ LZ4_decompress_fast() :
           However, it does not provide any protection against intentionally modified data stream (malicious input).
           Use this function in trusted environment only (data to decode comes from a trusted source).
 */
-int LZ4_decompress_fast (const char* source, char* dest, int originalSize);
+LZ4LIB_API int LZ4_decompress_fast (const char* source, char* dest, int originalSize);

-/*
+/*!
 LZ4_decompress_safe_partial() :
    This function decompress a compressed block of size 'compressedSize' at position 'source'
    into destination buffer 'dest' of size 'maxDecompressedSize'.
@ -178,98 +220,73 @@ LZ4_decompress_safe_partial() :
             If the source stream is detected malformed, the function will stop decoding and return a negative result.
             This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets
 */
-int LZ4_decompress_safe_partial (const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize);
+LZ4LIB_API int LZ4_decompress_safe_partial (const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize);


-/***********************************************
+/*-*********************************************
 *  Streaming Compression Functions
 ***********************************************/
-#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
-#define LZ4_STREAMSIZE     (LZ4_STREAMSIZE_U64 * sizeof(long long))
-/*
- * LZ4_stream_t
- * information structure to track an LZ4 stream.
- * important : init this structure content before first use !
- * note : only allocated directly the structure if you are statically linking LZ4
- *        If you are using liblz4 as a DLL, please use below construction methods instead.
+typedef union LZ4_stream_u LZ4_stream_t;   /* incomplete type (defined later) */
+
+/*! LZ4_createStream() and LZ4_freeStream() :
+ *  LZ4_createStream() will allocate and initialize an `LZ4_stream_t` structure.
+ *  LZ4_freeStream() releases its memory.
 */
-typedef struct { long long table[LZ4_STREAMSIZE_U64]; } LZ4_stream_t;
+LZ4LIB_API LZ4_stream_t* LZ4_createStream(void);
+LZ4LIB_API int           LZ4_freeStream (LZ4_stream_t* streamPtr);

-/*
- * LZ4_resetStream
- * Use this function to init an allocated LZ4_stream_t structure
+/*! LZ4_resetStream() :
+ *  An LZ4_stream_t structure can be allocated once and re-used multiple times.
+ *  Use this function to init an allocated `LZ4_stream_t` structure and start a new compression.
 */
-void LZ4_resetStream (LZ4_stream_t* streamPtr);
+LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr);

-/*
- * LZ4_createStream will allocate and initialize an LZ4_stream_t structure
- * LZ4_freeStream releases its memory.
- * In the context of a DLL (liblz4), please use these methods rather than the static struct.
- * They are more future proof, in case of a change of LZ4_stream_t size.
+/*! LZ4_loadDict() :
+ *  Use this function to load a static dictionary into LZ4_stream.
+ *  Any previous data will be forgotten, only 'dictionary' will remain in memory.
+ *  Loading a size of 0 is allowed.
+ *  Return : dictionary size, in bytes (necessarily <= 64 KB)
 */
-LZ4_stream_t* LZ4_createStream(void);
-int           LZ4_freeStream (LZ4_stream_t* streamPtr);
+LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);

-/*
- * LZ4_loadDict
- * Use this function to load a static dictionary into LZ4_stream.
- * Any previous data will be forgotten, only 'dictionary' will remain in memory.
- * Loading a size of 0 is allowed.
- * Return : dictionary size, in bytes (necessarily <= 64 KB)
+/*! LZ4_compress_fast_continue() :
+ *  Compress buffer content 'src', using data from previously compressed blocks as dictionary to improve compression ratio.
+ *  Important : Previous data blocks are assumed to still be present and unmodified !
+ *  'dst' buffer must be already allocated.
+ *  If maxDstSize >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
+ *  If not, and if compressed data cannot fit into 'dst' buffer size, compression stops, and function returns a zero.
 */
-int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);
+LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int maxDstSize, int acceleration);

-/*
- * LZ4_compress_fast_continue
- * Compress buffer content 'src', using data from previously compressed blocks as dictionary to improve compression ratio.
- * Important : Previous data blocks are assumed to still be present and unmodified !
- * 'dst' buffer must be already allocated.
- * If maxDstSize >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
- * If not, and if compressed data cannot fit into 'dst' buffer size, compression stops, and function returns a zero.
+/*! LZ4_saveDict() :
+ *  If previously compressed data block is not guaranteed to remain available at its memory location,
+ *  save it into a safer place (char* safeBuffer).
+ *  Note : you don't need to call LZ4_loadDict() afterwards,
+ *         dictionary is immediately usable, you can therefore call LZ4_compress_fast_continue().
+ *  Return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error.
 */
-int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int maxDstSize, int acceleration);
-
-/*
- * LZ4_saveDict
- * If previously compressed data block is not guaranteed to remain available at its memory location
- * save it into a safer place (char* safeBuffer)
- * Note : you don't need to call LZ4_loadDict() afterwards,
- *        dictionary is immediately usable, you can therefore call LZ4_compress_fast_continue()
- * Return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error
- */
-int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int dictSize);
+LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int dictSize);


-/************************************************
+/*-**********************************************
 *  Streaming Decompression Functions
+*  Bufferless synchronous API
 ************************************************/
+typedef union LZ4_streamDecode_u LZ4_streamDecode_t;   /* incomplete type (defined later) */

-#define LZ4_STREAMDECODESIZE_U64  4
-#define LZ4_STREAMDECODESIZE     (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long))
-typedef struct { unsigned long long table[LZ4_STREAMDECODESIZE_U64]; } LZ4_streamDecode_t;
-/*
- * LZ4_streamDecode_t
- * information structure to track an LZ4 stream.
- * init this structure content using LZ4_setStreamDecode or memset() before first use !
- *
- * In the context of a DLL (liblz4) please prefer usage of construction methods below.
- * They are more future proof, in case of a change of LZ4_streamDecode_t size in the future.
- * LZ4_createStreamDecode will allocate and initialize an LZ4_streamDecode_t structure
- * LZ4_freeStreamDecode releases its memory.
+/* creation / destruction of streaming decompression tracking structure */
+LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void);
+LZ4LIB_API int                 LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
+
+/*! LZ4_setStreamDecode() :
+ *  Use this function to instruct where to find the dictionary.
+ *  Setting a size of 0 is allowed (same effect as reset).
+ *  @return : 1 if OK, 0 if error
 */
-LZ4_streamDecode_t* LZ4_createStreamDecode(void);
-int                 LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
+LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);

-/*
- * LZ4_setStreamDecode
- * Use this function to instruct where to find the dictionary.
- * Setting a size of 0 is allowed (same effect as reset).
- * Return : 1 if OK, 0 if error
- */
-int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
-
-/*
-*_continue() :
+/*!
+LZ4_decompress_*_continue() :
    These decoding functions allow decompression of multiple blocks in "streaming" mode.
    Previously decoded blocks *must* remain available at the memory position where they were decoded (up to 64 KB)
    In the case of a ring buffers, decoding buffer must be either :
@ -285,35 +302,120 @@ int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dicti
    Whenever these conditions are not possible, save the last 64KB of decoded data into a safe buffer,
    and indicate where it is saved using LZ4_setStreamDecode()
 */
-int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxDecompressedSize);
-int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize);
+LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxDecompressedSize);
+LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize);


-/*
-Advanced decoding functions :
-*_usingDict() :
-    These decoding functions work the same as
-    a combination of LZ4_setStreamDecode() followed by LZ4_decompress_x_continue()
-    They are stand-alone. They don't need nor update an LZ4_streamDecode_t structure.
-*/
-int LZ4_decompress_safe_usingDict (const char* source, char* dest, int compressedSize, int maxDecompressedSize, const char* dictStart, int dictSize);
-int LZ4_decompress_fast_usingDict (const char* source, char* dest, int originalSize, const char* dictStart, int dictSize);
+/*! LZ4_decompress_*_usingDict() :
+ *  These decoding functions work the same as
+ *  a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue()
+ *  They are stand-alone, and don't need an LZ4_streamDecode_t structure.
+ */
+LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* source, char* dest, int compressedSize, int maxDecompressedSize, const char* dictStart, int dictSize);
+LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* source, char* dest, int originalSize, const char* dictStart, int dictSize);


+/*^**********************************************
+ * !!!!!!   STATIC LINKING ONLY   !!!!!!
+ ***********************************************/
+/*-************************************
+ *  Private definitions
+ **************************************
+ * Do not use these definitions.
+ * They are exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`.
+ * Using these definitions will expose code to API and/or ABI break in future versions of the library.
+ **************************************/
+#define LZ4_HASHLOG   (LZ4_MEMORY_USAGE-2)
+#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
+#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG)       /* required as macro for static allocation */

-/**************************************
+#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#include <stdint.h>
+
+typedef struct {
+    uint32_t hashTable[LZ4_HASH_SIZE_U32];
+    uint32_t currentOffset;
+    uint32_t initCheck;
+    const uint8_t* dictionary;
+    uint8_t* bufferStart;   /* obsolete, used for slideInputBuffer */
+    uint32_t dictSize;
+} LZ4_stream_t_internal;
+
+typedef struct {
+    const uint8_t* externalDict;
+    size_t extDictSize;
+    const uint8_t* prefixEnd;
+    size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+
+#else
+
+typedef struct {
+    unsigned int hashTable[LZ4_HASH_SIZE_U32];
+    unsigned int currentOffset;
+    unsigned int initCheck;
+    const unsigned char* dictionary;
+    unsigned char* bufferStart;   /* obsolete, used for slideInputBuffer */
+    unsigned int dictSize;
+} LZ4_stream_t_internal;
+
+typedef struct {
+    const unsigned char* externalDict;
+    size_t extDictSize;
+    const unsigned char* prefixEnd;
+    size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+
+#endif
+
+/*!
+ * LZ4_stream_t :
+ * information structure to track an LZ4 stream.
+ * init this structure before first use.
+ * note : only use in association with static linking !
+ *        this definition is not API/ABI safe,
+ *        and may change in a future version !
+ */
+#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
+#define LZ4_STREAMSIZE     (LZ4_STREAMSIZE_U64 * sizeof(unsigned long long))
+union LZ4_stream_u {
+    unsigned long long table[LZ4_STREAMSIZE_U64];
+    LZ4_stream_t_internal internal_donotuse;
+} ;  /* previously typedef'd to LZ4_stream_t */
+
+
+/*!
+ * LZ4_streamDecode_t :
+ * information structure to track an LZ4 stream during decompression.
+ * init this structure  using LZ4_setStreamDecode (or memset()) before first use
+ * note : only use in association with static linking !
+ *        this definition is not API/ABI safe,
+ *        and may change in a future version !
+ */
+#define LZ4_STREAMDECODESIZE_U64  4
+#define LZ4_STREAMDECODESIZE     (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long))
+union LZ4_streamDecode_u {
+    unsigned long long table[LZ4_STREAMDECODESIZE_U64];
+    LZ4_streamDecode_t_internal internal_donotuse;
+} ;   /* previously typedef'd to LZ4_streamDecode_t */
+
+
+/*=************************************
 *  Obsolete Functions
 **************************************/
-/* Deprecate Warnings */
-/* Should these warnings messages be a problem,
+/* Deprecation warnings */
+/* Should these warnings be a problem,
   it is generally possible to disable them,
-   with -Wno-deprecated-declarations for gcc
-   or _CRT_SECURE_NO_WARNINGS in Visual for example.
-   You can also define LZ4_DEPRECATE_WARNING_DEFBLOCK. */
-#ifndef LZ4_DEPRECATE_WARNING_DEFBLOCK
-#  define LZ4_DEPRECATE_WARNING_DEFBLOCK
+   typically with -Wno-deprecated-declarations for gcc
+   or _CRT_SECURE_NO_WARNINGS in Visual.
+   Otherwise, it's also possible to define LZ4_DISABLE_DEPRECATE_WARNINGS */
+#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS
+#  define LZ4_DEPRECATED(message)   /* disable deprecation warnings */
+#else
 #  define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
-#  if (LZ4_GCC_VERSION >= 405) || defined(__clang__)
+#  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
+#    define LZ4_DEPRECATED(message) [[deprecated(message)]]
+#  elif (LZ4_GCC_VERSION >= 405) || defined(__clang__)
 #    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
 #  elif (LZ4_GCC_VERSION >= 301)
 #    define LZ4_DEPRECATED(message) __attribute__((deprecated))
@ -323,20 +425,19 @@ int LZ4_decompress_fast_usingDict (const char* source, char* dest, int originalS
 #    pragma message("WARNING: You need to implement LZ4_DEPRECATED for this compiler")
 #    define LZ4_DEPRECATED(message)
 #  endif
-#endif /* LZ4_DEPRECATE_WARNING_DEFBLOCK */
+#endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */

 /* Obsolete compression functions */
-/* These functions are planned to start generate warnings by r131 approximately */
-int LZ4_compress               (const char* source, char* dest, int sourceSize);
-int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize);
-int LZ4_compress_withState               (void* state, const char* source, char* dest, int inputSize);
-int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
-int LZ4_compress_continue                (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
-int LZ4_compress_limitedOutput_continue  (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_default() instead") int LZ4_compress               (const char* source, char* dest, int sourceSize);
+LZ4_DEPRECATED("use LZ4_compress_default() instead") int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") int LZ4_compress_withState               (void* state, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") int LZ4_compress_continue                (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") int LZ4_compress_limitedOutput_continue  (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);

 /* Obsolete decompression functions */
 /* These function names are completely deprecated and must no longer be used.
-   They are only provided here for compatibility with older programs.
+   They are only provided in lz4.c for compatibility with older programs.
    - LZ4_uncompress is the same as LZ4_decompress_fast
    - LZ4_uncompress_unknownOutputSize is the same as LZ4_decompress_safe
   These function prototypes are now disabled; uncomment them only if you really need them.
@ -358,3 +459,5 @@ LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") int LZ4_decompress
 #if defined (__cplusplus)
 }
 #endif
+
+#endif /* LZ4_H_2983827168210 */
--- a/contrib/liblz4/include/lz4/lz4hc.h
+++ b/contrib/liblz4/include/lz4/lz4hc.h
@ -1,7 +1,7 @@
 /*
   LZ4 HC - High Compression Mode of LZ4
   Header File
-   Copyright (C) 2011-2015, Yann Collet.
+   Copyright (C) 2011-2016, Yann Collet.
   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)

   Redistribution and use in source and binary forms, with or without
@ -28,107 +28,92 @@
   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

   You can contact the author at :
-   - LZ4 source repository : https://github.com/Cyan4973/lz4
+   - LZ4 source repository : https://github.com/lz4/lz4
   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
 */
-#pragma once
-
+#ifndef LZ4_HC_H_19834876238432
+#define LZ4_HC_H_19834876238432

 #if defined (__cplusplus)
 extern "C" {
 #endif

-/*****************************
-*  Includes
-*****************************/
-#include <stddef.h>   /* size_t */
+/* --- Dependency --- */
+/* note : lz4hc is not an independent module, it requires lz4.h/lz4.c for proper compilation */
+#include "lz4.h"   /* stddef, LZ4LIB_API, LZ4_DEPRECATED */


-/**************************************
-*  Block Compression
-**************************************/
-int LZ4_compress_HC (const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
-/*
-LZ4_compress_HC :
-    Destination buffer 'dst' must be already allocated.
-    Compression completion is guaranteed if 'dst' buffer is sized to handle worst circumstances (data not compressible)
-    Worst size evaluation is provided by function LZ4_compressBound() (see "lz4.h")
-      srcSize  : Max supported value is LZ4_MAX_INPUT_SIZE (see "lz4.h")
-      compressionLevel : Recommended values are between 4 and 9, although any value between 0 and 16 will work.
-                         0 means "use default value" (see lz4hc.c).
-                         Values >16 behave the same as 16.
-      return : the number of bytes written into buffer 'dst'
-            or 0 if compression fails.
-*/
+/* --- Useful constants --- */
+#define LZ4HC_CLEVEL_MIN         3
+#define LZ4HC_CLEVEL_DEFAULT     9
+#define LZ4HC_CLEVEL_OPT_MIN    11
+#define LZ4HC_CLEVEL_MAX        12
+
+
+/*-************************************
+ *  Block Compression
+ **************************************/
+/*! LZ4_compress_HC() :
+ * Compress data from `src` into `dst`, using the more powerful but slower "HC" algorithm.
+ * `dst` must be already allocated.
+ * Compression is guaranteed to succeed if `dstCapacity >= LZ4_compressBound(srcSize)` (see "lz4.h")
+ * Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE (see "lz4.h")
+ * `compressionLevel` : Recommended values are between 4 and 9, although any value between 1 and LZ4HC_MAX_CLEVEL will work.
+ *                      Values >LZ4HC_MAX_CLEVEL behave the same as LZ4HC_MAX_CLEVEL.
+ * @return : the number of bytes written into 'dst'
+ *           or 0 if compression fails.
+ */
+LZ4LIB_API int LZ4_compress_HC (const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel);


 /* Note :
-   Decompression functions are provided within LZ4 source code (see "lz4.h") (BSD license)
-*/
+ *   Decompression functions are provided within "lz4.h" (BSD license)
+ */


-int LZ4_sizeofStateHC(void);
-int LZ4_compress_HC_extStateHC(void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
-/*
-LZ4_compress_HC_extStateHC() :
-   Use this function if you prefer to manually allocate memory for compression tables.
-   To know how much memory must be allocated for the compression tables, use :
-      int LZ4_sizeofStateHC();
-
-   Allocated memory must be aligned on 8-bytes boundaries (which a normal malloc() will do properly).
-
-   The allocated memory can then be provided to the compression functions using 'void* state' parameter.
-   LZ4_compress_HC_extStateHC() is equivalent to previously described function.
-   It just uses externally allocated memory for stateHC.
-*/
+/*! LZ4_compress_HC_extStateHC() :
+ * Same as LZ4_compress_HC(), but using an externally allocated memory segment for `state`.
+ * `state` size is provided by LZ4_sizeofStateHC().
+ * Memory segment must be aligned on 8-bytes boundaries (which a normal malloc() will do properly).
+ */
+LZ4LIB_API int LZ4_compress_HC_extStateHC(void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
+LZ4LIB_API int LZ4_sizeofStateHC(void);


-/**************************************
-*  Streaming Compression
-**************************************/
-#define LZ4_STREAMHCSIZE        262192
-#define LZ4_STREAMHCSIZE_SIZET (LZ4_STREAMHCSIZE / sizeof(size_t))
-typedef struct { size_t table[LZ4_STREAMHCSIZE_SIZET]; } LZ4_streamHC_t;
-/*
-  LZ4_streamHC_t
-  This structure allows static allocation of LZ4 HC streaming state.
-  State must then be initialized using LZ4_resetStreamHC() before first use.
+/*-************************************
+ *  Streaming Compression
+ *  Bufferless synchronous API
+ **************************************/
+ typedef union LZ4_streamHC_u LZ4_streamHC_t;   /* incomplete type (defined later) */

-  Static allocation should only be used in combination with static linking.
-  If you want to use LZ4 as a DLL, please use construction functions below, which are future-proof.
-*/
+/*! LZ4_createStreamHC() and LZ4_freeStreamHC() :
+ * These functions create and release memory for LZ4 HC streaming state.
+ * Newly created states are automatically initialized.
+ * Existing states can be re-used several times, using LZ4_resetStreamHC().
+ * These methods are API and ABI stable, they can be used in combination with a DLL.
+ */
+LZ4LIB_API LZ4_streamHC_t* LZ4_createStreamHC(void);
+LZ4LIB_API int             LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr);

+LZ4LIB_API void LZ4_resetStreamHC (LZ4_streamHC_t* streamHCPtr, int compressionLevel);
+LZ4LIB_API int  LZ4_loadDictHC (LZ4_streamHC_t* streamHCPtr, const char* dictionary, int dictSize);

-LZ4_streamHC_t* LZ4_createStreamHC(void);
-int             LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr);
-/*
-  These functions create and release memory for LZ4 HC streaming state.
-  Newly created states are already initialized.
-  Existing state space can be re-used anytime using LZ4_resetStreamHC().
-  If you use LZ4 as a DLL, use these functions instead of static structure allocation,
-  to avoid size mismatch between different versions.
-*/
+LZ4LIB_API int LZ4_compress_HC_continue (LZ4_streamHC_t* streamHCPtr, const char* src, char* dst, int srcSize, int maxDstSize);

-void LZ4_resetStreamHC (LZ4_streamHC_t* streamHCPtr, int compressionLevel);
-int  LZ4_loadDictHC (LZ4_streamHC_t* streamHCPtr, const char* dictionary, int dictSize);
-
-int LZ4_compress_HC_continue (LZ4_streamHC_t* streamHCPtr, const char* src, char* dst, int srcSize, int maxDstSize);
-
-int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSize);
+LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSize);

 /*
  These functions compress data in successive blocks of any size, using previous blocks as dictionary.
  One key assumption is that previous blocks (up to 64 KB) remain read-accessible while compressing next blocks.
-  There is an exception for ring buffers, which can be smaller 64 KB.
-  Such case is automatically detected and correctly handled by LZ4_compress_HC_continue().
+  There is an exception for ring buffers, which can be smaller than 64 KB.
+  Ring buffers scenario is automatically detected and handled by LZ4_compress_HC_continue().

  Before starting compression, state must be properly initialized, using LZ4_resetStreamHC().
  A first "fictional block" can then be designated as initial dictionary, using LZ4_loadDictHC() (Optional).

  Then, use LZ4_compress_HC_continue() to compress each successive block.
-  It works like LZ4_compress_HC(), but use previous memory blocks as dictionary to improve compression.
  Previous memory blocks (including initial dictionary when present) must remain accessible and unmodified during compression.
-  As a reminder, size 'dst' buffer to handle worst cases, using LZ4_compressBound(), to ensure success of compression operation.
+  'dst' buffer should be sized to handle worst case scenarios, using LZ4_compressBound(), to ensure operation success.

  If, for any reason, previous data blocks can't be preserved unmodified in memory during next compression block,
  you must save it to a safer memory space, using LZ4_saveDictHC().
@ -136,50 +121,102 @@ int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSi
 */


+/*-******************************************
+ * !!!!!   STATIC LINKING ONLY   !!!!!
+ *******************************************/

-/**************************************
+ /*-*************************************
+ * PRIVATE DEFINITIONS :
+ * Do not use these definitions.
+ * They are exposed to allow static allocation of `LZ4_streamHC_t`.
+ * Using these definitions makes the code vulnerable to potential API break when upgrading LZ4
+ **************************************/
+#define LZ4HC_DICTIONARY_LOGSIZE 17
+#define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE)
+#define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1)
+
+#define LZ4HC_HASH_LOG 15
+#define LZ4HC_HASHTABLESIZE (1 << LZ4HC_HASH_LOG)
+#define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1)
+
+
+#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#include <stdint.h>
+
+typedef struct
+{
+    uint32_t   hashTable[LZ4HC_HASHTABLESIZE];
+    uint16_t   chainTable[LZ4HC_MAXD];
+    const uint8_t* end;        /* next block here to continue on current prefix */
+    const uint8_t* base;       /* All index relative to this position */
+    const uint8_t* dictBase;   /* alternate base for extDict */
+    uint8_t* inputBuffer;      /* deprecated */
+    uint32_t   dictLimit;        /* below that point, need extDict */
+    uint32_t   lowLimit;         /* below that point, no more dict */
+    uint32_t   nextToUpdate;     /* index from which to continue dictionary update */
+    uint32_t   searchNum;        /* only for optimal parser */
+    uint32_t   compressionLevel;
+} LZ4HC_CCtx_internal;
+
+#else
+
+typedef struct
+{
+    unsigned int   hashTable[LZ4HC_HASHTABLESIZE];
+    unsigned short   chainTable[LZ4HC_MAXD];
+    const unsigned char* end;        /* next block here to continue on current prefix */
+    const unsigned char* base;       /* All index relative to this position */
+    const unsigned char* dictBase;   /* alternate base for extDict */
+    unsigned char* inputBuffer;      /* deprecated */
+    unsigned int   dictLimit;        /* below that point, need extDict */
+    unsigned int   lowLimit;         /* below that point, no more dict */
+    unsigned int   nextToUpdate;     /* index from which to continue dictionary update */
+    unsigned int   searchNum;        /* only for optimal parser */
+    unsigned int   compressionLevel;
+} LZ4HC_CCtx_internal;
+
+#endif
+
+#define LZ4_STREAMHCSIZE       (4*LZ4HC_HASHTABLESIZE + 2*LZ4HC_MAXD + 56) /* 393268 */
+#define LZ4_STREAMHCSIZE_SIZET (LZ4_STREAMHCSIZE / sizeof(size_t))
+union LZ4_streamHC_u {
+    size_t table[LZ4_STREAMHCSIZE_SIZET];
+    LZ4HC_CCtx_internal internal_donotuse;
+};   /* previously typedef'd to LZ4_streamHC_t */
+/*
+  LZ4_streamHC_t :
+  This structure allows static allocation of LZ4 HC streaming state.
+  State must be initialized using LZ4_resetStreamHC() before first use.
+
+  Static allocation shall only be used in combination with static linking.
+  When invoking LZ4 from a DLL, use create/free functions instead, which are API and ABI stable.
+*/
+
+
+/*-************************************
 *  Deprecated Functions
 **************************************/
-/* Deprecate Warnings */
-/* Should these warnings messages be a problem,
-   it is generally possible to disable them,
-   with -Wno-deprecated-declarations for gcc
-   or _CRT_SECURE_NO_WARNINGS in Visual for example.
-   You can also define LZ4_DEPRECATE_WARNING_DEFBLOCK. */
-#ifndef LZ4_DEPRECATE_WARNING_DEFBLOCK
-#  define LZ4_DEPRECATE_WARNING_DEFBLOCK
-#  define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
-#  if (LZ4_GCC_VERSION >= 405) || defined(__clang__)
-#    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
-#  elif (LZ4_GCC_VERSION >= 301)
-#    define LZ4_DEPRECATED(message) __attribute__((deprecated))
-#  elif defined(_MSC_VER)
-#    define LZ4_DEPRECATED(message) __declspec(deprecated(message))
-#  else
-#    pragma message("WARNING: You need to implement LZ4_DEPRECATED for this compiler")
-#    define LZ4_DEPRECATED(message)
-#  endif
-#endif // LZ4_DEPRECATE_WARNING_DEFBLOCK
+/* see lz4.h LZ4_DISABLE_DEPRECATE_WARNINGS to turn off deprecation warnings */

-/* compression functions */
-/* these functions are planned to trigger warning messages by r131 approximately */
-int LZ4_compressHC                (const char* source, char* dest, int inputSize);
-int LZ4_compressHC_limitedOutput  (const char* source, char* dest, int inputSize, int maxOutputSize);
-int LZ4_compressHC2               (const char* source, char* dest, int inputSize, int compressionLevel);
-int LZ4_compressHC2_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
-int LZ4_compressHC_withStateHC               (void* state, const char* source, char* dest, int inputSize);
-int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
-int LZ4_compressHC2_withStateHC              (void* state, const char* source, char* dest, int inputSize, int compressionLevel);
-int LZ4_compressHC2_limitedOutput_withStateHC(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
-int LZ4_compressHC_continue               (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize);
-int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
+/* deprecated compression functions */
+/* these functions will trigger warning messages in future releases */
+LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC               (const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC2 (const char* source, char* dest, int inputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC2_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC_withStateHC               (void* state, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC2_withStateHC (void* state, const char* source, char* dest, int inputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC2_limitedOutput_withStateHC(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC_continue               (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize);

-/* Streaming functions following the older model; should no longer be used */
+/* Deprecated Streaming functions using older model; should no longer be used */
 LZ4_DEPRECATED("use LZ4_createStreamHC() instead") void* LZ4_createHC (char* inputBuffer);
 LZ4_DEPRECATED("use LZ4_saveDictHC() instead")     char* LZ4_slideInputBufferHC (void* LZ4HC_Data);
 LZ4_DEPRECATED("use LZ4_freeStreamHC() instead")   int   LZ4_freeHC (void* LZ4HC_Data);
-LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int   LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel);
-LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int   LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
 LZ4_DEPRECATED("use LZ4_createStreamHC() instead") int   LZ4_sizeofStreamStateHC(void);
 LZ4_DEPRECATED("use LZ4_resetStreamHC() instead")  int   LZ4_resetStreamStateHC(void* state, char* inputBuffer);

@ -187,3 +224,5 @@ LZ4_DEPRECATED("use LZ4_resetStreamHC() instead")  int   LZ4_resetStreamStateHC(
 #if defined (__cplusplus)
 }
 #endif
+
+#endif /* LZ4_HC_H_19834876238432 */
--- a/contrib/liblz4/include/lz4/lz4opt.h
+++ b/contrib/liblz4/include/lz4/lz4opt.h
@ -0,0 +1,361 @@
+/*
+    lz4opt.h - Optimal Mode of LZ4
+    Copyright (C) 2015-2017, Przemyslaw Skibinski <inikep@gmail.com>
+    Note : this file is intended to be included within lz4hc.c
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+       - LZ4 source repository : https://github.com/lz4/lz4
+       - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+#define LZ4_OPT_NUM   (1<<12)
+
+
+typedef struct {
+    int off;
+    int len;
+} LZ4HC_match_t;
+
+typedef struct {
+    int price;
+    int off;
+    int mlen;
+    int litlen;
+} LZ4HC_optimal_t;
+
+
+/* price in bytes */
+FORCE_INLINE size_t LZ4HC_literalsPrice(size_t litlen)
+{
+    size_t price = litlen;
+    if (litlen >= (size_t)RUN_MASK)
+        price += 1 + (litlen-RUN_MASK)/255;
+    return price;
+}
+
+
+/* requires mlen >= MINMATCH */
+FORCE_INLINE size_t LZ4HC_sequencePrice(size_t litlen, size_t mlen)
+{
+    size_t price = 2 + 1; /* 16-bit offset + token */
+
+    price += LZ4HC_literalsPrice(litlen);
+
+    if (mlen >= (size_t)(ML_MASK+MINMATCH))
+        price+= 1 + (mlen-(ML_MASK+MINMATCH))/255;
+
+    return price;
+}
+
+
+/*-*************************************
+*  Binary Tree search
+***************************************/
+FORCE_INLINE int LZ4HC_BinTree_InsertAndGetAllMatches (
+    LZ4HC_CCtx_internal* ctx,
+    const BYTE* const ip,
+    const BYTE* const iHighLimit,
+    size_t best_mlen,
+    LZ4HC_match_t* matches,
+    int* matchNum)
+{
+    U16* const chainTable = ctx->chainTable;
+    U32* const HashTable = ctx->hashTable;
+    const BYTE* const base = ctx->base;
+    const U32 dictLimit = ctx->dictLimit;
+    const U32 current = (U32)(ip - base);
+    const U32 lowLimit = (ctx->lowLimit + MAX_DISTANCE > current) ? ctx->lowLimit : current - (MAX_DISTANCE - 1);
+    const BYTE* const dictBase = ctx->dictBase;
+    const BYTE* match;
+    int nbAttempts = ctx->searchNum;
+    int mnum = 0;
+    U16 *ptr0, *ptr1, delta0, delta1;
+    U32 matchIndex;
+    size_t matchLength = 0;
+    U32* HashPos;
+
+    if (ip + MINMATCH > iHighLimit) return 1;
+
+    /* HC4 match finder */
+    HashPos = &HashTable[LZ4HC_hashPtr(ip)];
+    matchIndex = *HashPos;
+    *HashPos = current;
+
+    ptr0 = &DELTANEXTMAXD(current*2+1);
+    ptr1 = &DELTANEXTMAXD(current*2);
+    delta0 = delta1 = (U16)(current - matchIndex);
+
+    while ((matchIndex < current) && (matchIndex>=lowLimit) && (nbAttempts)) {
+        nbAttempts--;
+        if (matchIndex >= dictLimit) {
+            match = base + matchIndex;
+            matchLength = LZ4_count(ip, match, iHighLimit);
+        } else {
+            const BYTE* vLimit = ip + (dictLimit - matchIndex);
+            match = dictBase + matchIndex;
+            if (vLimit > iHighLimit) vLimit = iHighLimit;
+            matchLength = LZ4_count(ip, match, vLimit);
+            if ((ip+matchLength == vLimit) && (vLimit < iHighLimit))
+                matchLength += LZ4_count(ip+matchLength, base+dictLimit, iHighLimit);
+        }
+
+        if (matchLength > best_mlen) {
+            best_mlen = matchLength;
+            if (matches) {
+                if (matchIndex >= dictLimit)
+                    matches[mnum].off = (int)(ip - match);
+                else
+                    matches[mnum].off = (int)(ip - (base + matchIndex)); /* virtual matchpos */
+                matches[mnum].len = (int)matchLength;
+                mnum++;
+            }
+            if (best_mlen > LZ4_OPT_NUM) break;
+        }
+
+        if (ip+matchLength >= iHighLimit)   /* equal : no way to know if inf or sup */
+            break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */
+
+        if (*(ip+matchLength) < *(match+matchLength)) {
+            *ptr0 = delta0;
+            ptr0 = &DELTANEXTMAXD(matchIndex*2);
+            if (*ptr0 == (U16)-1) break;
+            delta0 = *ptr0;
+            delta1 += delta0;
+            matchIndex -= delta0;
+        } else {
+            *ptr1 = delta1;
+            ptr1 = &DELTANEXTMAXD(matchIndex*2+1);
+            if (*ptr1 == (U16)-1) break;
+            delta1 = *ptr1;
+            delta0 += delta1;
+            matchIndex -= delta1;
+        }
+    }
+
+    *ptr0 = (U16)-1;
+    *ptr1 = (U16)-1;
+    if (matchNum) *matchNum = mnum;
+  /*  if (best_mlen > 8) return best_mlen-8; */
+    if (!matchNum) return 1;
+    return 1;
+}
+
+
+FORCE_INLINE void LZ4HC_updateBinTree(LZ4HC_CCtx_internal* ctx, const BYTE* const ip, const BYTE* const iHighLimit)
+{
+    const BYTE* const base = ctx->base;
+    const U32 target = (U32)(ip - base);
+    U32 idx = ctx->nextToUpdate;
+    while(idx < target)
+        idx += LZ4HC_BinTree_InsertAndGetAllMatches(ctx, base+idx, iHighLimit, 8, NULL, NULL);
+}
+
+
+/** Tree updater, providing best match */
+FORCE_INLINE int LZ4HC_BinTree_GetAllMatches (
+                        LZ4HC_CCtx_internal* ctx,
+                        const BYTE* const ip, const BYTE* const iHighLimit,
+                        size_t best_mlen, LZ4HC_match_t* matches, const int fullUpdate)
+{
+    int mnum = 0;
+    if (ip < ctx->base + ctx->nextToUpdate) return 0;   /* skipped area */
+    if (fullUpdate) LZ4HC_updateBinTree(ctx, ip, iHighLimit);
+    best_mlen = LZ4HC_BinTree_InsertAndGetAllMatches(ctx, ip, iHighLimit, best_mlen, matches, &mnum);
+    ctx->nextToUpdate = (U32)(ip - ctx->base + best_mlen);
+    return mnum;
+}
+
+
+#define SET_PRICE(pos, ml, offset, ll, cost)           \
+{                                                      \
+    while (last_pos < pos)  { opt[last_pos+1].price = 1<<30; last_pos++; } \
+    opt[pos].mlen = (int)ml;                           \
+    opt[pos].off = (int)offset;                        \
+    opt[pos].litlen = (int)ll;                         \
+    opt[pos].price = (int)cost;                        \
+}
+
+
+static int LZ4HC_compress_optimal (
+    LZ4HC_CCtx_internal* ctx,
+    const char* const source,
+    char* dest,
+    int inputSize,
+    int maxOutputSize,
+    limitedOutput_directive limit,
+    size_t sufficient_len,
+    const int fullUpdate
+    )
+{
+    LZ4HC_optimal_t opt[LZ4_OPT_NUM + 1];   /* this uses a bit too much stack memory to my taste ... */
+    LZ4HC_match_t matches[LZ4_OPT_NUM + 1];
+
+    const BYTE* ip = (const BYTE*) source;
+    const BYTE* anchor = ip;
+    const BYTE* const iend = ip + inputSize;
+    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const matchlimit = (iend - LASTLITERALS);
+    BYTE* op = (BYTE*) dest;
+    BYTE* const oend = op + maxOutputSize;
+
+    /* init */
+    if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1;
+    ctx->end += inputSize;
+    ip++;
+
+    /* Main Loop */
+    while (ip < mflimit) {
+        size_t const llen = ip - anchor;
+        size_t last_pos = 0;
+        size_t match_num, cur, best_mlen, best_off;
+        memset(opt, 0, sizeof(LZ4HC_optimal_t));  /* memset only the first one */
+
+        match_num = LZ4HC_BinTree_GetAllMatches(ctx, ip, matchlimit, MINMATCH-1, matches, fullUpdate);
+        if (!match_num) { ip++; continue; }
+
+        if ((size_t)matches[match_num-1].len > sufficient_len) {
+            /* good enough solution : immediate encoding */
+            best_mlen = matches[match_num-1].len;
+            best_off = matches[match_num-1].off;
+            cur = 0;
+            last_pos = 1;
+            goto encode;
+        }
+
+        /* set prices using matches at position = 0 */
+        {   size_t matchNb;
+            for (matchNb = 0; matchNb < match_num; matchNb++) {
+                size_t mlen = (matchNb>0) ? (size_t)matches[matchNb-1].len+1 : MINMATCH;
+                best_mlen = matches[matchNb].len;   /* necessarily < sufficient_len < LZ4_OPT_NUM */
+                for ( ; mlen <= best_mlen ; mlen++) {
+                    size_t const cost = LZ4HC_sequencePrice(llen, mlen) - LZ4HC_literalsPrice(llen);
+                    SET_PRICE(mlen, mlen, matches[matchNb].off, 0, cost);   /* updates last_pos and opt[pos] */
+        }   }   }
+
+        if (last_pos < MINMATCH) { ip++; continue; }  /* note : on clang at least, this test improves performance */
+
+        /* check further positions */
+        opt[0].mlen = opt[1].mlen = 1;
+        for (cur = 1; cur <= last_pos; cur++) {
+            const BYTE* const curPtr = ip + cur;
+
+            /* establish baseline price if cur is literal */
+            {   size_t price, litlen;
+                if (opt[cur-1].mlen == 1) {
+                    /* no match at previous position */
+                    litlen = opt[cur-1].litlen + 1;
+                    if (cur > litlen) {
+                        price = opt[cur - litlen].price + LZ4HC_literalsPrice(litlen);
+                    } else {
+                        price = LZ4HC_literalsPrice(llen + litlen) - LZ4HC_literalsPrice(llen);
+                    }
+                } else {
+                    litlen = 1;
+                    price = opt[cur - 1].price + LZ4HC_literalsPrice(1);
+                }
+
+                if (price < (size_t)opt[cur].price)
+                    SET_PRICE(cur, 1 /*mlen*/, 0 /*off*/, litlen, price);   /* note : increases last_pos */
+            }
+
+            if (cur == last_pos || curPtr >= mflimit) break;
+
+            match_num = LZ4HC_BinTree_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate);
+            if ((match_num > 0) && (size_t)matches[match_num-1].len > sufficient_len) {
+                /* immediate encoding */
+                best_mlen = matches[match_num-1].len;
+                best_off = matches[match_num-1].off;
+                last_pos = cur + 1;
+                goto encode;
+            }
+
+            /* set prices using matches at position = cur */
+            {   size_t matchNb;
+                for (matchNb = 0; matchNb < match_num; matchNb++) {
+                    size_t ml = (matchNb>0) ? (size_t)matches[matchNb-1].len+1 : MINMATCH;
+                    best_mlen = (cur + matches[matchNb].len < LZ4_OPT_NUM) ?
+                                (size_t)matches[matchNb].len : LZ4_OPT_NUM - cur;
+
+                    for ( ; ml <= best_mlen ; ml++) {
+                        size_t ll, price;
+                        if (opt[cur].mlen == 1) {
+                            ll = opt[cur].litlen;
+                            if (cur > ll)
+                                price = opt[cur - ll].price + LZ4HC_sequencePrice(ll, ml);
+                            else
+                                price = LZ4HC_sequencePrice(llen + ll, ml) - LZ4HC_literalsPrice(llen);
+                        } else {
+                            ll = 0;
+                            price = opt[cur].price + LZ4HC_sequencePrice(0, ml);
+                        }
+
+                        if (cur + ml > last_pos || price < (size_t)opt[cur + ml].price) {
+                            SET_PRICE(cur + ml, ml, matches[matchNb].off, ll, price);
+            }   }   }   }
+        } /* for (cur = 1; cur <= last_pos; cur++) */
+
+        best_mlen = opt[last_pos].mlen;
+        best_off = opt[last_pos].off;
+        cur = last_pos - best_mlen;
+
+encode: /* cur, last_pos, best_mlen, best_off must be set */
+        opt[0].mlen = 1;
+        while (1) {  /* from end to beginning */
+            size_t const ml = opt[cur].mlen;
+            int const offset = opt[cur].off;
+            opt[cur].mlen = (int)best_mlen;
+            opt[cur].off = (int)best_off;
+            best_mlen = ml;
+            best_off = offset;
+            if (ml > cur) break;   /* can this happen ? */
+            cur -= ml;
+        }
+
+        /* encode all recorded sequences */
+        cur = 0;
+        while (cur < last_pos) {
+            int const ml = opt[cur].mlen;
+            int const offset = opt[cur].off;
+            if (ml == 1) { ip++; cur++; continue; }
+            cur += ml;
+            if ( LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ip - offset, limit, oend) ) return 0;
+        }
+    }  /* while (ip < mflimit) */
+
+    /* Encode Last Literals */
+    {   int lastRun = (int)(iend - anchor);
+        if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0;  /* Check output limit */
+        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
+        else *op++ = (BYTE)(lastRun<<ML_BITS);
+        memcpy(op, anchor, iend - anchor);
+        op += iend-anchor;
+    }
+
+    /* End */
+    return (int) ((char*)op-dest);
+}
--- a/contrib/liblz4/src/lz4.c
+++ b/contrib/liblz4/src/lz4.c
--- a/contrib/liblz4/src/lz4hc.c
+++ b/contrib/liblz4/src/lz4hc.c
@ -1,6 +1,6 @@
 /*
    LZ4 HC - High Compression Mode of LZ4
-    Copyright (C) 2011-2015, Yann Collet.
+    Copyright (C) 2011-2016, Yann Collet.

    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)

@ -28,27 +28,36 @@
    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

    You can contact the author at :
-       - LZ4 source repository : https://github.com/Cyan4973/lz4
+       - LZ4 source repository : https://github.com/lz4/lz4
       - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
 */
+/* note : lz4hc is not an independent module, it requires lz4.h/lz4.c for proper compilation */


-
-/**************************************
+/* *************************************
 *  Tuning Parameter
-**************************************/
-static const int LZ4HC_compressionLevel_default = 9;
+***************************************/
+
+/*!
+ * HEAPMODE :
+ * Select how default compression function will allocate workplace memory,
+ * in stack (0:fastest), or in heap (1:requires malloc()).
+ * Since workplace is rather large, heap mode is recommended.
+ */
+#ifndef LZ4HC_HEAPMODE
+#  define LZ4HC_HEAPMODE 1
+#endif


-/**************************************
-*  Includes
-**************************************/
-#include <lz4/lz4hc.h>
+/* *************************************
+*  Dependency
+***************************************/
+#include "lz4hc.h"


-/**************************************
+/* *************************************
 *  Local Compiler Options
-**************************************/
+***************************************/
 #if defined(__GNUC__)
 #  pragma GCC diagnostic ignored "-Wunused-function"
 #endif
@ -58,52 +67,24 @@ static const int LZ4HC_compressionLevel_default = 9;
 #endif


-/**************************************
+/* *************************************
 *  Common LZ4 definition
-**************************************/
+***************************************/
 #define LZ4_COMMONDEFS_ONLY
 #include "lz4.c"


-/**************************************
+/* *************************************
 *  Local Constants
-**************************************/
-#define DICTIONARY_LOGSIZE 16
-#define MAXD (1<<DICTIONARY_LOGSIZE)
-#define MAXD_MASK (MAXD - 1)
-
-#define HASH_LOG (DICTIONARY_LOGSIZE-1)
-#define HASHTABLESIZE (1 << HASH_LOG)
-#define HASH_MASK (HASHTABLESIZE - 1)
-
+***************************************/
 #define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)

-static const int g_maxCompressionLevel = 16;
-
-
-/**************************************
-*  Local Types
-**************************************/
-typedef struct
-{
-    U32   hashTable[HASHTABLESIZE];
-    U16   chainTable[MAXD];
-    const BYTE* end;        /* next block here to continue on current prefix */
-    const BYTE* base;       /* All index relative to this position */
-    const BYTE* dictBase;   /* alternate base for extDict */
-    BYTE* inputBuffer;      /* deprecated */
-    U32   dictLimit;        /* below that point, need extDict */
-    U32   lowLimit;         /* below that point, no more dict */
-    U32   nextToUpdate;     /* index from which to continue dictionary update */
-    U32   compressionLevel;
-} LZ4HC_Data_Structure;
-

 /**************************************
 *  Local Macros
 **************************************/
-#define HASH_FUNCTION(i)       (((i) * 2654435761U) >> ((MINMATCH*8)-HASH_LOG))
-//#define DELTANEXTU16(p)        chainTable[(p) & MAXD_MASK]   /* flexible, MAXD dependent */
+#define HASH_FUNCTION(i)       (((i) * 2654435761U) >> ((MINMATCH*8)-LZ4HC_HASH_LOG))
+#define DELTANEXTMAXD(p)       chainTable[(p) & LZ4HC_MAXD_MASK]    /* flexible, LZ4HC_MAXD dependent */
 #define DELTANEXTU16(p)        chainTable[(U16)(p)]   /* faster */

 static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); }
@ -113,7 +94,7 @@ static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)
 /**************************************
 *  HC Compression
 **************************************/
-static void LZ4HC_init (LZ4HC_Data_Structure* hc4, const BYTE* start)
+static void LZ4HC_init (LZ4HC_CCtx_internal* hc4, const BYTE* start)
 {
    MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable));
    MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
@ -127,21 +108,20 @@ static void LZ4HC_init (LZ4HC_Data_Structure* hc4, const BYTE* start)


 /* Update chains up to ip (excluded) */
-FORCE_INLINE void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip)
+FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip)
 {
-    U16* chainTable = hc4->chainTable;
-    U32* HashTable  = hc4->hashTable;
+    U16* const chainTable = hc4->chainTable;
+    U32* const hashTable  = hc4->hashTable;
    const BYTE* const base = hc4->base;
-    const U32 target = (U32)(ip - base);
+    U32 const target = (U32)(ip - base);
    U32 idx = hc4->nextToUpdate;

-    while(idx < target)
-    {
-        U32 h = LZ4HC_hashPtr(base+idx);
-        size_t delta = idx - HashTable[h];
+    while (idx < target) {
+        U32 const h = LZ4HC_hashPtr(base+idx);
+        size_t delta = idx - hashTable[h];
        if (delta>MAX_DISTANCE) delta = MAX_DISTANCE;
        DELTANEXTU16(idx) = (U16)delta;
-        HashTable[h] = idx;
+        hashTable[h] = idx;
        idx++;
    }

@ -149,7 +129,7 @@ FORCE_INLINE void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip)
 }


-FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4,   /* Index table will be updated */
+FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_CCtx_internal* hc4,   /* Index table will be updated */
                                               const BYTE* ip, const BYTE* const iLimit,
                                               const BYTE** matchpos,
                                               const int maxNbAttempts)
@ -161,7 +141,6 @@ FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4,   /* I
    const U32 dictLimit = hc4->dictLimit;
    const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1);
    U32 matchIndex;
-    const BYTE* match;
    int nbAttempts=maxNbAttempts;
    size_t ml=0;

@ -169,24 +148,19 @@ FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4,   /* I
    LZ4HC_Insert(hc4, ip);
    matchIndex = HashTable[LZ4HC_hashPtr(ip)];

-    while ((matchIndex>=lowLimit) && (nbAttempts))
-    {
+    while ((matchIndex>=lowLimit) && (nbAttempts)) {
        nbAttempts--;
-        if (matchIndex >= dictLimit)
-        {
-            match = base + matchIndex;
+        if (matchIndex >= dictLimit) {
+            const BYTE* const match = base + matchIndex;
            if (*(match+ml) == *(ip+ml)
                && (LZ4_read32(match) == LZ4_read32(ip)))
            {
-                size_t mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH;
+                size_t const mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH;
                if (mlt > ml) { ml = mlt; *matchpos = match; }
            }
-        }
-        else
-        {
-            match = dictBase + matchIndex;
-            if (LZ4_read32(match) == LZ4_read32(ip))
-            {
+        } else {
+            const BYTE* const match = dictBase + matchIndex;
+            if (LZ4_read32(match) == LZ4_read32(ip)) {
                size_t mlt;
                const BYTE* vLimit = ip + (dictLimit - matchIndex);
                if (vLimit > iLimit) vLimit = iLimit;
@ -204,7 +178,7 @@ FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4,   /* I


 FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
-    LZ4HC_Data_Structure* hc4,
+    LZ4HC_CCtx_internal* hc4,
    const BYTE* const ip,
    const BYTE* const iLowLimit,
    const BYTE* const iHighLimit,
@ -229,38 +203,32 @@ FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
    LZ4HC_Insert(hc4, ip);
    matchIndex = HashTable[LZ4HC_hashPtr(ip)];

-    while ((matchIndex>=lowLimit) && (nbAttempts))
-    {
+    while ((matchIndex>=lowLimit) && (nbAttempts)) {
        nbAttempts--;
-        if (matchIndex >= dictLimit)
-        {
+        if (matchIndex >= dictLimit) {
            const BYTE* matchPtr = base + matchIndex;
-            if (*(iLowLimit + longest) == *(matchPtr - delta + longest))
-                if (LZ4_read32(matchPtr) == LZ4_read32(ip))
-                {
+            if (*(iLowLimit + longest) == *(matchPtr - delta + longest)) {
+                if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
                    int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
                    int back = 0;

-                    while ((ip+back>iLowLimit)
+                    while ((ip+back > iLowLimit)
                           && (matchPtr+back > lowPrefixPtr)
                           && (ip[back-1] == matchPtr[back-1]))
                            back--;

                    mlt -= back;

-                    if (mlt > longest)
-                    {
+                    if (mlt > longest) {
                        longest = (int)mlt;
                        *matchpos = matchPtr+back;
                        *startpos = ip+back;
                    }
                }
-        }
-        else
-        {
-            const BYTE* matchPtr = dictBase + matchIndex;
-            if (LZ4_read32(matchPtr) == LZ4_read32(ip))
-            {
+            }
+        } else {
+            const BYTE* const matchPtr = dictBase + matchIndex;
+            if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
                size_t mlt;
                int back=0;
                const BYTE* vLimit = ip + (dictLimit - matchIndex);
@ -320,8 +288,15 @@ FORCE_INLINE int LZ4HC_encodeSequence (
    /* Encode MatchLength */
    length = (int)(matchLength-MINMATCH);
    if ((limitedOutputBuffer) && (*op + (length>>8) + (1 + LASTLITERALS) > oend)) return 1;   /* Check output limit */
-    if (length>=(int)ML_MASK) { *token+=ML_MASK; length-=ML_MASK; for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (length > 254) { length-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; }
-    else *token += (BYTE)(length);
+    if (length>=(int)ML_MASK) {
+        *token += ML_MASK;
+        length -= ML_MASK;
+        for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; }
+        if (length > 254) { length-=255; *(*op)++ = 255; }
+        *(*op)++ = (BYTE)length;
+    } else {
+        *token += (BYTE)(length);
+    }

    /* Prepare next loop */
    *ip += matchLength;
@ -330,18 +305,18 @@ FORCE_INLINE int LZ4HC_encodeSequence (
    return 0;
 }

+#include "lz4opt.h"

-static int LZ4HC_compress_generic (
-    void* ctxvoid,
-    const char* source,
-    char* dest,
-    int inputSize,
-    int maxOutputSize,
-    int compressionLevel,
+static int LZ4HC_compress_hashChain (
+    LZ4HC_CCtx_internal* const ctx,
+    const char* const source,
+    char* const dest,
+    int const inputSize,
+    int const maxOutputSize,
+    unsigned maxNbAttempts,
    limitedOutput_directive limit
    )
 {
-    LZ4HC_Data_Structure* ctx = (LZ4HC_Data_Structure*) ctxvoid;
    const BYTE* ip = (const BYTE*) source;
    const BYTE* anchor = ip;
    const BYTE* const iend = ip + inputSize;
@ -351,28 +326,22 @@ static int LZ4HC_compress_generic (
    BYTE* op = (BYTE*) dest;
    BYTE* const oend = op + maxOutputSize;

-    unsigned maxNbAttempts;
    int   ml, ml2, ml3, ml0;
-    const BYTE* ref=NULL;
-    const BYTE* start2=NULL;
-    const BYTE* ref2=NULL;
-    const BYTE* start3=NULL;
-    const BYTE* ref3=NULL;
+    const BYTE* ref = NULL;
+    const BYTE* start2 = NULL;
+    const BYTE* ref2 = NULL;
+    const BYTE* start3 = NULL;
+    const BYTE* ref3 = NULL;
    const BYTE* start0;
    const BYTE* ref0;

-
    /* init */
-    if (compressionLevel > g_maxCompressionLevel) compressionLevel = g_maxCompressionLevel;
-    if (compressionLevel < 1) compressionLevel = LZ4HC_compressionLevel_default;
-    maxNbAttempts = 1 << (compressionLevel-1);
    ctx->end += inputSize;

    ip++;

    /* Main Loop */
-    while (ip < mflimit)
-    {
+    while (ip < mflimit) {
        ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref), maxNbAttempts);
        if (!ml) { ip++; continue; }

@ -383,19 +352,16 @@ static int LZ4HC_compress_generic (

 _Search2:
        if (ip+ml < mflimit)
-            ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2, maxNbAttempts);
+            ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2, maxNbAttempts);
        else ml2 = ml;

-        if (ml2 == ml)  /* No better match */
-        {
+        if (ml2 == ml) { /* No better match */
            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
            continue;
        }

-        if (start0 < ip)
-        {
-            if (start2 < ip + ml0)   /* empirical */
-            {
+        if (start0 < ip) {
+            if (start2 < ip + ml0) {  /* empirical */
                ip = start0;
                ref = ref0;
                ml = ml0;
@ -403,8 +369,7 @@ _Search2:
        }

        /* Here, start0==ip */
-        if ((start2 - ip) < 3)   /* First Match too small : removed */
-        {
+        if ((start2 - ip) < 3) {  /* First Match too small : removed */
            ml = ml2;
            ip = start2;
            ref =ref2;
@ -417,15 +382,13 @@ _Search3:
        * ml2 > ml1, and
        * ip1+3 <= ip2 (usually < ip1+ml1)
        */
-        if ((start2 - ip) < OPTIMAL_ML)
-        {
+        if ((start2 - ip) < OPTIMAL_ML) {
            int correction;
            int new_ml = ml;
            if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
            if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
            correction = new_ml - (int)(start2 - ip);
-            if (correction > 0)
-            {
+            if (correction > 0) {
                start2 += correction;
                ref2 += correction;
                ml2 -= correction;
@ -437,8 +400,7 @@ _Search3:
            ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, maxNbAttempts);
        else ml3 = ml2;

-        if (ml3 == ml2) /* No better match : 2 sequences to encode */
-        {
+        if (ml3 == ml2) {  /* No better match : 2 sequences to encode */
            /* ip & ref are known; Now for ml */
            if (start2 < ip+ml)  ml = (int)(start2 - ip);
            /* Now, encode 2 sequences */
@ -448,18 +410,14 @@ _Search3:
            continue;
        }

-        if (start3 < ip+ml+3) /* Not enough space for match 2 : remove it */
-        {
-            if (start3 >= (ip+ml)) /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */
-            {
-                if (start2 < ip+ml)
-                {
+        if (start3 < ip+ml+3) {  /* Not enough space for match 2 : remove it */
+            if (start3 >= (ip+ml)) {  /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */
+                if (start2 < ip+ml) {
                    int correction = (int)(ip+ml - start2);
                    start2 += correction;
                    ref2 += correction;
                    ml2 -= correction;
-                    if (ml2 < MINMATCH)
-                    {
+                    if (ml2 < MINMATCH) {
                        start2 = start3;
                        ref2 = ref3;
                        ml2 = ml3;
@ -487,23 +445,18 @@ _Search3:
        * OK, now we have 3 ascending matches; let's write at least the first one
        * ip & ref are known; Now for ml
        */
-        if (start2 < ip+ml)
-        {
-            if ((start2 - ip) < (int)ML_MASK)
-            {
+        if (start2 < ip+ml) {
+            if ((start2 - ip) < (int)ML_MASK) {
                int correction;
                if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
                if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
                correction = ml - (int)(start2 - ip);
-                if (correction > 0)
-                {
+                if (correction > 0) {
                    start2 += correction;
                    ref2 += correction;
                    ml2 -= correction;
                }
-            }
-            else
-            {
+            } else {
                ml = (int)(start2 - ip);
            }
        }
@ -521,8 +474,7 @@ _Search3:
    }

    /* Encode Last Literals */
-    {
-        int lastRun = (int)(iend - anchor);
+    {   int lastRun = (int)(iend - anchor);
        if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0;  /* Check output limit */
        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
        else *op++ = (BYTE)(lastRun<<ML_BITS);
@ -534,23 +486,64 @@ _Search3:
    return (int) (((char*)op)-dest);
 }

+static int LZ4HC_getSearchNum(int compressionLevel)
+{
+    switch (compressionLevel) {
+        default: return 0; /* unused */
+        case 11: return 128; 
+        case 12: return 1<<10; 
+    }
+}

-int LZ4_sizeofStateHC(void) { return sizeof(LZ4HC_Data_Structure); }
+static int LZ4HC_compress_generic (
+    LZ4HC_CCtx_internal* const ctx,
+    const char* const source,
+    char* const dest,
+    int const inputSize,
+    int const maxOutputSize,
+    int compressionLevel,
+    limitedOutput_directive limit
+    )
+{
+    if (compressionLevel < 1) compressionLevel = LZ4HC_CLEVEL_DEFAULT;
+    if (compressionLevel > 9) {
+        switch (compressionLevel) {
+            case 10: return LZ4HC_compress_hashChain(ctx, source, dest, inputSize, maxOutputSize, 1 << (16-1), limit);
+            case 11: ctx->searchNum = LZ4HC_getSearchNum(compressionLevel); return LZ4HC_compress_optimal(ctx, source, dest, inputSize, maxOutputSize, limit, 128, 0);
+            default:
+            case 12: ctx->searchNum = LZ4HC_getSearchNum(compressionLevel); return LZ4HC_compress_optimal(ctx, source, dest, inputSize, maxOutputSize, limit, LZ4_OPT_NUM, 1);
+        }
+    }
+    return LZ4HC_compress_hashChain(ctx, source, dest, inputSize, maxOutputSize, 1 << (compressionLevel-1), limit);
+}
+
+
+int LZ4_sizeofStateHC(void) { return sizeof(LZ4_streamHC_t); }

 int LZ4_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel)
 {
+    LZ4HC_CCtx_internal* ctx = &((LZ4_streamHC_t*)state)->internal_donotuse;
    if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0;   /* Error : state is not aligned for pointers (32 or 64 bits) */
-    LZ4HC_init ((LZ4HC_Data_Structure*)state, (const BYTE*)src);
+    LZ4HC_init (ctx, (const BYTE*)src);
    if (maxDstSize < LZ4_compressBound(srcSize))
-        return LZ4HC_compress_generic (state, src, dst, srcSize, maxDstSize, compressionLevel, limitedOutput);
+        return LZ4HC_compress_generic (ctx, src, dst, srcSize, maxDstSize, compressionLevel, limitedOutput);
    else
-        return LZ4HC_compress_generic (state, src, dst, srcSize, maxDstSize, compressionLevel, noLimit);
+        return LZ4HC_compress_generic (ctx, src, dst, srcSize, maxDstSize, compressionLevel, noLimit);
 }

 int LZ4_compress_HC(const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel)
 {
-    LZ4HC_Data_Structure state;
-    return LZ4_compress_HC_extStateHC(&state, src, dst, srcSize, maxDstSize, compressionLevel);
+#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
+    LZ4_streamHC_t* const statePtr = (LZ4_streamHC_t*)malloc(sizeof(LZ4_streamHC_t));
+#else
+    LZ4_streamHC_t state;
+    LZ4_streamHC_t* const statePtr = &state;
+#endif
+    int const cSize = LZ4_compress_HC_extStateHC(statePtr, src, dst, srcSize, maxDstSize, compressionLevel);
+#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
+    free(statePtr);
+#endif
+    return cSize;
 }


@ -566,32 +559,38 @@ int             LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr) { free(LZ4_st
 /* initialization */
 void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
 {
-    LZ4_STATIC_ASSERT(sizeof(LZ4HC_Data_Structure) <= sizeof(LZ4_streamHC_t));   /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */
-    ((LZ4HC_Data_Structure*)LZ4_streamHCPtr)->base = NULL;
-    ((LZ4HC_Data_Structure*)LZ4_streamHCPtr)->compressionLevel = (unsigned)compressionLevel;
+    LZ4_STATIC_ASSERT(sizeof(LZ4HC_CCtx_internal) <= sizeof(size_t) * LZ4_STREAMHCSIZE_SIZET);   /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */
+    LZ4_streamHCPtr->internal_donotuse.base = NULL;
+    LZ4_streamHCPtr->internal_donotuse.compressionLevel = (unsigned)compressionLevel;
+    LZ4_streamHCPtr->internal_donotuse.searchNum = LZ4HC_getSearchNum(compressionLevel);
 }

 int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int dictSize)
 {
-    LZ4HC_Data_Structure* ctxPtr = (LZ4HC_Data_Structure*) LZ4_streamHCPtr;
-    if (dictSize > 64 KB)
-    {
+    LZ4HC_CCtx_internal* ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+    if (dictSize > 64 KB) {
        dictionary += dictSize - 64 KB;
        dictSize = 64 KB;
    }
    LZ4HC_init (ctxPtr, (const BYTE*)dictionary);
-    if (dictSize >= 4) LZ4HC_Insert (ctxPtr, (const BYTE*)dictionary +(dictSize-3));
    ctxPtr->end = (const BYTE*)dictionary + dictSize;
+    if (ctxPtr->compressionLevel >= LZ4HC_CLEVEL_OPT_MIN)
+        LZ4HC_updateBinTree(ctxPtr, ctxPtr->end - MFLIMIT, ctxPtr->end - LASTLITERALS);
+    else
+        if (dictSize >= 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3);
    return dictSize;
 }


 /* compression */

-static void LZ4HC_setExternalDict(LZ4HC_Data_Structure* ctxPtr, const BYTE* newBlock)
+static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock)
 {
-    if (ctxPtr->end >= ctxPtr->base + 4)
-        LZ4HC_Insert (ctxPtr, ctxPtr->end-3);   /* Referencing remaining dictionary content */
+    if (ctxPtr->compressionLevel >= LZ4HC_CLEVEL_OPT_MIN)
+        LZ4HC_updateBinTree(ctxPtr, ctxPtr->end - MFLIMIT, ctxPtr->end - LASTLITERALS);
+    else
+        if (ctxPtr->end >= ctxPtr->base + 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3);   /* Referencing remaining dictionary content */
+
    /* Only one memory segment for extDict, so any previous extDict is lost at this stage */
    ctxPtr->lowLimit  = ctxPtr->dictLimit;
    ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base);
@ -601,34 +600,29 @@ static void LZ4HC_setExternalDict(LZ4HC_Data_Structure* ctxPtr, const BYTE* newB
    ctxPtr->nextToUpdate = ctxPtr->dictLimit;   /* match referencing will resume from there */
 }

-static int LZ4_compressHC_continue_generic (LZ4HC_Data_Structure* ctxPtr,
+static int LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr,
                                            const char* source, char* dest,
                                            int inputSize, int maxOutputSize, limitedOutput_directive limit)
 {
+    LZ4HC_CCtx_internal* ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
    /* auto-init if forgotten */
-    if (ctxPtr->base == NULL)
-        LZ4HC_init (ctxPtr, (const BYTE*) source);
+    if (ctxPtr->base == NULL) LZ4HC_init (ctxPtr, (const BYTE*) source);

    /* Check overflow */
-    if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 GB)
-    {
+    if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 GB) {
        size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base) - ctxPtr->dictLimit;
        if (dictSize > 64 KB) dictSize = 64 KB;
-
-        LZ4_loadDictHC((LZ4_streamHC_t*)ctxPtr, (const char*)(ctxPtr->end) - dictSize, (int)dictSize);
+        LZ4_loadDictHC(LZ4_streamHCPtr, (const char*)(ctxPtr->end) - dictSize, (int)dictSize);
    }

    /* Check if blocks follow each other */
-    if ((const BYTE*)source != ctxPtr->end)
-        LZ4HC_setExternalDict(ctxPtr, (const BYTE*)source);
+    if ((const BYTE*)source != ctxPtr->end) LZ4HC_setExternalDict(ctxPtr, (const BYTE*)source);

    /* Check overlapping input/dictionary space */
-    {
-        const BYTE* sourceEnd = (const BYTE*) source + inputSize;
-        const BYTE* dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit;
-        const BYTE* dictEnd   = ctxPtr->dictBase + ctxPtr->dictLimit;
-        if ((sourceEnd > dictBegin) && ((const BYTE*)source < dictEnd))
-        {
+    {   const BYTE* sourceEnd = (const BYTE*) source + inputSize;
+        const BYTE* const dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit;
+        const BYTE* const dictEnd   = ctxPtr->dictBase + ctxPtr->dictLimit;
+        if ((sourceEnd > dictBegin) && ((const BYTE*)source < dictEnd)) {
            if (sourceEnd > dictEnd) sourceEnd = dictEnd;
            ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase);
            if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4) ctxPtr->lowLimit = ctxPtr->dictLimit;
@ -641,9 +635,9 @@ static int LZ4_compressHC_continue_generic (LZ4HC_Data_Structure* ctxPtr,
 int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize)
 {
    if (maxOutputSize < LZ4_compressBound(inputSize))
-        return LZ4_compressHC_continue_generic ((LZ4HC_Data_Structure*)LZ4_streamHCPtr, source, dest, inputSize, maxOutputSize, limitedOutput);
+        return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, source, dest, inputSize, maxOutputSize, limitedOutput);
    else
-        return LZ4_compressHC_continue_generic ((LZ4HC_Data_Structure*)LZ4_streamHCPtr, source, dest, inputSize, maxOutputSize, noLimit);
+        return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, source, dest, inputSize, maxOutputSize, noLimit);
 }


@ -651,14 +645,13 @@ int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* sourc

 int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictSize)
 {
-    LZ4HC_Data_Structure* streamPtr = (LZ4HC_Data_Structure*)LZ4_streamHCPtr;
-    int prefixSize = (int)(streamPtr->end - (streamPtr->base + streamPtr->dictLimit));
+    LZ4HC_CCtx_internal* const streamPtr = &LZ4_streamHCPtr->internal_donotuse;
+    int const prefixSize = (int)(streamPtr->end - (streamPtr->base + streamPtr->dictLimit));
    if (dictSize > 64 KB) dictSize = 64 KB;
    if (dictSize < 4) dictSize = 0;
    if (dictSize > prefixSize) dictSize = prefixSize;
    memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
-    {
-        U32 endIndex = (U32)(streamPtr->end - streamPtr->base);
+    {   U32 const endIndex = (U32)(streamPtr->end - streamPtr->base);
        streamPtr->end = (const BYTE*)safeBuffer + dictSize;
        streamPtr->base = streamPtr->end - endIndex;
        streamPtr->dictLimit = endIndex - dictSize;
@ -672,8 +665,8 @@ int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictS
 /***********************************
 *  Deprecated Functions
 ***********************************/
+/* These functions currently generate deprecation warnings */
 /* Deprecated compression functions */
-/* These functions are planned to start generate warnings by r131 approximately */
 int LZ4_compressHC(const char* src, char* dst, int srcSize) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
 int LZ4_compressHC_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, 0); }
 int LZ4_compressHC2(const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
@ -687,45 +680,41 @@ int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* ctx, const char* src,


 /* Deprecated streaming functions */
-/* These functions currently generate deprecation warnings */
 int LZ4_sizeofStreamStateHC(void) { return LZ4_STREAMHCSIZE; }

 int LZ4_resetStreamStateHC(void* state, char* inputBuffer)
 {
+    LZ4HC_CCtx_internal *ctx = &((LZ4_streamHC_t*)state)->internal_donotuse;
    if ((((size_t)state) & (sizeof(void*)-1)) != 0) return 1;   /* Error : pointer is not aligned for pointer (32 or 64 bits) */
-    LZ4HC_init((LZ4HC_Data_Structure*)state, (const BYTE*)inputBuffer);
-    ((LZ4HC_Data_Structure*)state)->inputBuffer = (BYTE*)inputBuffer;
+    LZ4HC_init(ctx, (const BYTE*)inputBuffer);
+    ctx->inputBuffer = (BYTE*)inputBuffer;
    return 0;
 }

 void* LZ4_createHC (char* inputBuffer)
 {
-    void* hc4 = ALLOCATOR(1, sizeof(LZ4HC_Data_Structure));
+    LZ4_streamHC_t* hc4 = (LZ4_streamHC_t*)ALLOCATOR(1, sizeof(LZ4_streamHC_t));
    if (hc4 == NULL) return NULL;   /* not enough memory */
-    LZ4HC_init ((LZ4HC_Data_Structure*)hc4, (const BYTE*)inputBuffer);
-    ((LZ4HC_Data_Structure*)hc4)->inputBuffer = (BYTE*)inputBuffer;
+    LZ4HC_init (&hc4->internal_donotuse, (const BYTE*)inputBuffer);
+    hc4->internal_donotuse.inputBuffer = (BYTE*)inputBuffer;
    return hc4;
 }

-int LZ4_freeHC (void* LZ4HC_Data)
-{
-    FREEMEM(LZ4HC_Data);
-    return (0);
-}
+int LZ4_freeHC (void* LZ4HC_Data) { FREEMEM(LZ4HC_Data); return 0; }

 int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel)
 {
-    return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, 0, compressionLevel, noLimit);
+    return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, source, dest, inputSize, 0, compressionLevel, noLimit);
 }

 int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel)
 {
-    return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, maxOutputSize, compressionLevel, limitedOutput);
+    return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, source, dest, inputSize, maxOutputSize, compressionLevel, limitedOutput);
 }

 char* LZ4_slideInputBufferHC(void* LZ4HC_Data)
 {
-    LZ4HC_Data_Structure* hc4 = (LZ4HC_Data_Structure*)LZ4HC_Data;
-    int dictSize = LZ4_saveDictHC((LZ4_streamHC_t*)LZ4HC_Data, (char*)(hc4->inputBuffer), 64 KB);
+    LZ4HC_CCtx_internal* const hc4 = &((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse;
+    int const dictSize = LZ4_saveDictHC((LZ4_streamHC_t*)LZ4HC_Data, (char*)(hc4->inputBuffer), 64 KB);
    return (char*)(hc4->inputBuffer + dictSize);
 }
--- a/contrib/libpoco/Crypto/include/Poco/Crypto/Cipher.h
+++ b/contrib/libpoco/Crypto/include/Poco/Crypto/Cipher.h
@ -96,7 +96,7 @@ public:
 		ENC_BASE64       = 0x01, /// Base64-encoded output
 		ENC_BINHEX       = 0x02, /// BinHex-encoded output
 		ENC_BASE64_NO_LF = 0x81, /// Base64-encoded output, no linefeeds
-		ENC_BINHEX_NO_LF = 0x82, /// BinHex-encoded output, no linefeeds
+		ENC_BINHEX_NO_LF = 0x82  /// BinHex-encoded output, no linefeeds
 		
 	};

--- a/contrib/libpoco/Crypto/include/Poco/Crypto/Crypto.h
+++ b/contrib/libpoco/Crypto/include/Poco/Crypto/Crypto.h
@ -22,7 +22,6 @@
 #define Crypto_Crypto_INCLUDED


-#pragma GCC diagnostic push
 #if defined(__APPLE__)
 // OS X 10.7 deprecates some OpenSSL functions
 #pragma GCC diagnostic ignored "-Wdeprecated-declarations" 
@ -116,6 +115,5 @@ void Crypto_API uninitializeCrypto();

 } } // namespace Poco::Crypto

-#pragma GCC diagnostic pop

 #endif // Crypto_Crypto_INCLUDED
--- a/contrib/libpoco/Crypto/include/Poco/Crypto/DigestEngine.h
+++ b/contrib/libpoco/Crypto/include/Poco/Crypto/DigestEngine.h
@ -61,7 +61,7 @@ protected:
 	
 private:
 	std::string _name;
-	EVP_MD_CTX* _ctx;
+	EVP_MD_CTX* _pContext;
 	Poco::DigestEngine::Digest _digest;
 	OpenSSLInitializer _openSSLInitializer;
 };
--- a/contrib/libpoco/Crypto/include/Poco/Crypto/X509Certificate.h
+++ b/contrib/libpoco/Crypto/include/Poco/Crypto/X509Certificate.h
@ -130,6 +130,14 @@ public:
 		/// Returns true if verification against the issuer certificate
 		/// was successfull, false otherwise.

+	bool equals(const X509Certificate& otherCertificate) const;
+		/// Checks whether the certificate is equal to
+		/// the other certificate, by comparing the hashes
+		/// of both certificates.
+		///
+		/// Returns true if both certificates are identical,
+		/// otherwise false.
+
 	const X509* certificate() const;
 		/// Returns the underlying OpenSSL certificate.

--- a/contrib/libpoco/Crypto/src/CipherImpl.cpp
+++ b/contrib/libpoco/Crypto/src/CipherImpl.cpp
@ -30,7 +30,7 @@ namespace
 	{
 		unsigned long err;
 		std::string msg;
-
+		
 		while ((err = ERR_get_error()))
 		{
 			if (!msg.empty())
@ -60,24 +60,28 @@ namespace
 			Direction         dir);

 		~CryptoTransformImpl();
-
+		
 		std::size_t blockSize() const;

-		int setPadding(int padding);
+		int setPadding(int padding);	

 		std::streamsize transform(
 			const unsigned char* input,
 			std::streamsize      inputLength,
 			unsigned char*       output,
 			std::streamsize      outputLength);
-
+		
 		std::streamsize finalize(
 			unsigned char*  output,
 			std::streamsize length);

 	private:
 		const EVP_CIPHER* _pCipher;
-		EVP_CIPHER_CTX    _ctx;
+#if OPENSSL_VERSION_NUMBER >= 0x10100000L
+		EVP_CIPHER_CTX*   _pContext;
+#else
+		EVP_CIPHER_CTX    _context;
+#endif
 		ByteVec           _key;
 		ByteVec           _iv;
 	};
@ -92,32 +96,54 @@ namespace
 		_key(key),
 		_iv(iv)
 	{
+#if OPENSSL_VERSION_NUMBER >= 0x10100000L
+		_pContext = EVP_CIPHER_CTX_new();
 		EVP_CipherInit(
-			&_ctx,
+			_pContext,
 			_pCipher,
 			&_key[0],
 			_iv.empty() ? 0 : &_iv[0],
 			(dir == DIR_ENCRYPT) ? 1 : 0);
+#else
+		EVP_CipherInit(
+			&_context,
+			_pCipher,
+			&_key[0],
+			_iv.empty() ? 0 : &_iv[0],
+			(dir == DIR_ENCRYPT) ? 1 : 0);
+#endif
 	}


 	CryptoTransformImpl::~CryptoTransformImpl()
 	{
-		EVP_CIPHER_CTX_cleanup(&_ctx);
+#if OPENSSL_VERSION_NUMBER >= 0x10100000L
+		EVP_CIPHER_CTX_cleanup(_pContext);
+#else
+		EVP_CIPHER_CTX_cleanup(&_context);
+#endif
 	}


 	std::size_t CryptoTransformImpl::blockSize() const
 	{
-		return EVP_CIPHER_CTX_block_size(&_ctx);
+#if OPENSSL_VERSION_NUMBER >= 0x10100000L
+		return EVP_CIPHER_CTX_block_size(_pContext);
+#else
+		return EVP_CIPHER_CTX_block_size(&_context);
+#endif
 	}

-
+	
 	int CryptoTransformImpl::setPadding(int padding)
 	{
-		return EVP_CIPHER_CTX_set_padding(&_ctx, padding);
+#if OPENSSL_VERSION_NUMBER >= 0x10100000L
+		return EVP_CIPHER_CTX_block_size(_pContext);
+#else
+		return EVP_CIPHER_CTX_set_padding(&_context, padding);
+#endif
 	}
-
+	

 	std::streamsize CryptoTransformImpl::transform(
 		const unsigned char* input,
@ -125,16 +151,24 @@ namespace
 		unsigned char*       output,
 		std::streamsize      outputLength)
 	{
-		poco_assert (outputLength >= std::streamsize(inputLength + blockSize() - 1));
+		poco_assert (outputLength >= (inputLength + blockSize() - 1));

 		int outLen = static_cast<int>(outputLength);
+#if OPENSSL_VERSION_NUMBER >= 0x10100000L
 		int rc = EVP_CipherUpdate(
-			&_ctx,
+			_pContext,
 			output,
 			&outLen,
 			input,
 			static_cast<int>(inputLength));
-
+#else
+		int rc = EVP_CipherUpdate(
+			&_context,
+			output,
+			&outLen,
+			input,
+			static_cast<int>(inputLength));
+#endif
 		if (rc == 0)
 			throwError();

@ -146,18 +180,22 @@ namespace
 		unsigned char*	output,
 		std::streamsize length)
 	{
-		poco_assert (length >= (std::streamsize)blockSize());
-
+		poco_assert (length >= blockSize());
+		
 		int len = static_cast<int>(length);

 		// Use the '_ex' version that does not perform implicit cleanup since we
 		// will call EVP_CIPHER_CTX_cleanup() from the dtor as there is no
 		// guarantee that finalize() will be called if an error occurred.
-		int rc = EVP_CipherFinal_ex(&_ctx, output, &len);
+#if OPENSSL_VERSION_NUMBER >= 0x10100000L
+		int rc = EVP_CipherFinal_ex(_pContext, output, &len);
+#else
+		int rc = EVP_CipherFinal_ex(&_context, output, &len);
+#endif

 		if (rc == 0)
 			throwError();
-
+			
 		return static_cast<std::streamsize>(len);
 	}
 }
--- a/contrib/libpoco/Crypto/src/CipherKeyImpl.cpp
+++ b/contrib/libpoco/Crypto/src/CipherKeyImpl.cpp
@ -27,8 +27,8 @@ namespace Poco {
 namespace Crypto {


-CipherKeyImpl::CipherKeyImpl(const std::string& name,
-	const std::string& passphrase,
+CipherKeyImpl::CipherKeyImpl(const std::string& name, 
+	const std::string& passphrase, 
 	const std::string& salt,
 	int iterationCount):
 	_pCipher(0),
@ -48,8 +48,8 @@ CipherKeyImpl::CipherKeyImpl(const std::string& name,
 }


-CipherKeyImpl::CipherKeyImpl(const std::string& name,
-	const ByteVec& key,
+CipherKeyImpl::CipherKeyImpl(const std::string& name, 
+	const ByteVec& key, 
 	const ByteVec& iv):
 	_pCipher(0),
 	_name(name),
@ -64,7 +64,7 @@ CipherKeyImpl::CipherKeyImpl(const std::string& name,
 		throw Poco::NotFoundException("Cipher " + name + " was not found");
 }

-
+	
 CipherKeyImpl::CipherKeyImpl(const std::string& name):
 	_pCipher(0),
 	_name(name),
@ -117,7 +117,7 @@ void CipherKeyImpl::generateKey()

 	getRandomBytes(vec, keySize());
 	setKey(vec);
-
+	
 	getRandomBytes(vec, ivSize());
 	setIV(vec);
 }
@ -126,11 +126,11 @@ void CipherKeyImpl::generateKey()
 void CipherKeyImpl::getRandomBytes(ByteVec& vec, std::size_t count)
 {
 	Poco::RandomInputStream random;
-
+	
 	vec.clear();
 	vec.reserve(count);

-	for (std::size_t i = 0; i < count; ++i)
+	for (int i = 0; i < count; ++i)
 		vec.push_back(static_cast<unsigned char>(random.get()));
 }

--- a/contrib/libpoco/Crypto/src/CryptoStream.cpp
+++ b/contrib/libpoco/Crypto/src/CryptoStream.cpp
@ -43,7 +43,7 @@ CryptoStreamBuf::CryptoStreamBuf(std::istream& istr, CryptoTransform* pTransform
 	_buffer(static_cast<std::size_t>(bufferSize))
 {
 	poco_check_ptr (pTransform);
-	poco_assert ((size_t)bufferSize > 2 * pTransform->blockSize());
+	poco_assert (bufferSize > 2 * pTransform->blockSize());
 }


@ -56,7 +56,7 @@ CryptoStreamBuf::CryptoStreamBuf(std::ostream& ostr, CryptoTransform* pTransform
 	_buffer(static_cast<std::size_t>(bufferSize))
 {
 	poco_check_ptr (pTransform);
-	poco_assert ((size_t)bufferSize > 2 * pTransform->blockSize());
+	poco_assert (bufferSize > 2 * pTransform->blockSize());
 }


@ -88,10 +88,10 @@ void CryptoStreamBuf::close()
 		// thrown.
 		std::ostream* pOstr = _pOstr;
 		_pOstr = 0;
-
+		
 		// Finalize transformation.
 		std::streamsize n = _pTransform->finalize(_buffer.begin(), static_cast<std::streamsize>(_buffer.size()));
-
+		
 		if (n > 0)
 		{
 			pOstr->write(reinterpret_cast<char*>(_buffer.begin()), n);
@ -159,7 +159,7 @@ int CryptoStreamBuf::writeToDevice(const char* buffer, std::streamsize length)
 	std::size_t maxChunkSize = _buffer.size()/2;
 	std::size_t count = 0;

-	while (count < (size_t)length)
+	while (count < length)
 	{
 		// Truncate chunk size so that the maximum output fits into _buffer.
 		std::size_t n = static_cast<std::size_t>(length) - count;
--- a/contrib/libpoco/Crypto/src/DigestEngine.cpp
+++ b/contrib/libpoco/Crypto/src/DigestEngine.cpp
@ -23,46 +23,51 @@ namespace Crypto {


 DigestEngine::DigestEngine(const std::string& name):
-	_name(name)
+	_name(name),
+	_pContext(EVP_MD_CTX_create())
 {
 	const EVP_MD* md = EVP_get_digestbyname(_name.c_str());
 	if (!md) throw Poco::NotFoundException(_name);
-	_ctx = EVP_MD_CTX_create();
-	EVP_DigestInit_ex(_ctx, md, NULL);	
+	EVP_DigestInit_ex(_pContext, md, NULL);	
 }

 	
 DigestEngine::~DigestEngine()
 {
-	EVP_MD_CTX_destroy(_ctx);
+	EVP_MD_CTX_destroy(_pContext);
 }

 int DigestEngine::nid() const
 {
-	return EVP_MD_nid(_ctx->digest);
+	return EVP_MD_nid(EVP_MD_CTX_md(_pContext));
 }

 std::size_t DigestEngine::digestLength() const
 {
-	return EVP_MD_CTX_size(_ctx);
+	return EVP_MD_CTX_size(_pContext);
 }


 void DigestEngine::reset()
 {
-	EVP_MD_CTX_cleanup(_ctx);
+#if OPENSSL_VERSION_NUMBER >= 0x10100000L
+	EVP_MD_CTX_free(_pContext);
+	_pContext = EVP_MD_CTX_create();
+#else
+	EVP_MD_CTX_cleanup(_pContext);
+#endif
 	const EVP_MD* md = EVP_get_digestbyname(_name.c_str());
 	if (!md) throw Poco::NotFoundException(_name);
-	EVP_DigestInit_ex(_ctx, md, NULL);
+	EVP_DigestInit_ex(_pContext, md, NULL);
 }


 const Poco::DigestEngine::Digest& DigestEngine::digest()
 {
 	_digest.clear();
-	unsigned len = EVP_MD_CTX_size(_ctx);
+	unsigned len = EVP_MD_CTX_size(_pContext);
 	_digest.resize(len);
-	EVP_DigestFinal_ex(_ctx, &_digest[0], &len);
+	EVP_DigestFinal_ex(_pContext, &_digest[0], &len);
 	reset();
 	return _digest;
 }
@ -70,7 +75,7 @@ const Poco::DigestEngine::Digest& DigestEngine::digest()

 void DigestEngine::updateImpl(const void* data, std::size_t length)
 {
-	EVP_DigestUpdate(_ctx, data, length);
+	EVP_DigestUpdate(_pContext, data, length);
 }


--- a/contrib/libpoco/Crypto/src/RSACipherImpl.cpp
+++ b/contrib/libpoco/Crypto/src/RSACipherImpl.cpp
@ -32,7 +32,7 @@ namespace
 	{
 		unsigned long err;
 		std::string msg;
-
+		
 		while ((err = ERR_get_error()))
 		{
 			if (!msg.empty())
@ -68,7 +68,7 @@ namespace
 	public:
 		RSAEncryptImpl(const RSA* pRSA, RSAPaddingMode paddingMode);
 		~RSAEncryptImpl();
-
+		
 		std::size_t blockSize() const;
 		std::size_t maxDataSize() const;

@ -77,7 +77,7 @@ namespace
 			std::streamsize		 inputLength,
 			unsigned char*		 output,
 			std::streamsize		 outputLength);
-
+		
 		std::streamsize finalize(unsigned char*	output, std::streamsize length);

 	private:
@ -156,7 +156,7 @@ namespace
 				output += n;
 				outputLength -= n;
 				_pos = 0;
-
+				
 			}
 			else
 			{
@ -175,8 +175,8 @@ namespace

 	std::streamsize RSAEncryptImpl::finalize(unsigned char*	output, std::streamsize length)
 	{
-		poco_assert ((size_t)length >= blockSize());
-		poco_assert ((size_t)_pos <= maxDataSize());
+		poco_assert (length >= blockSize());
+		poco_assert (_pos <= maxDataSize());
 		int rc = 0;
 		if (_pos > 0)
 		{
@ -192,7 +192,7 @@ namespace
 	public:
 		RSADecryptImpl(const RSA* pRSA, RSAPaddingMode paddingMode);
 		~RSADecryptImpl();
-
+		
 		std::size_t blockSize() const;

 		std::streamsize transform(
@ -200,7 +200,7 @@ namespace
 			std::streamsize		 inputLength,
 			unsigned char*		 output,
 			std::streamsize		 outputLength);
-
+		
 		std::streamsize finalize(
 			unsigned char*	output,
 			std::streamsize length);
@ -241,7 +241,7 @@ namespace
 		unsigned char*		 output,
 		std::streamsize		 outputLength)
 	{
-
+		
 		// always fill up the buffer before decrypting!
 		std::streamsize rsaSize = static_cast<std::streamsize>(blockSize());
 		poco_assert_dbg(_pos <= rsaSize);
@ -261,7 +261,7 @@ namespace
 				output += tmp;
 				outputLength -= tmp;
 				_pos = 0;
-
+				
 			}
 			else
 			{
@ -280,7 +280,7 @@ namespace

 	std::streamsize RSADecryptImpl::finalize(unsigned char*	output, std::streamsize length)
 	{
-		poco_assert ((size_t)length >= blockSize());
+		poco_assert (length >= blockSize());
 		int rc = 0;
 		if (_pos > 0)
 		{
--- a/contrib/libpoco/Crypto/src/RSAKeyImpl.cpp
+++ b/contrib/libpoco/Crypto/src/RSAKeyImpl.cpp
@ -207,19 +207,43 @@ int RSAKeyImpl::size() const

 RSAKeyImpl::ByteVec RSAKeyImpl::modulus() const
 {
+#if OPENSSL_VERSION_NUMBER >= 0x10100000L
+	const BIGNUM* n = 0;
+	const BIGNUM* e = 0;
+	const BIGNUM* d = 0;
+	RSA_get0_key(_pRSA, &n, &e, &d);
+	return convertToByteVec(n);
+#else
 	return convertToByteVec(_pRSA->n);
+#endif
 }


 RSAKeyImpl::ByteVec RSAKeyImpl::encryptionExponent() const
 {
+#if OPENSSL_VERSION_NUMBER >= 0x10100000L
+	const BIGNUM* n = 0;
+	const BIGNUM* e = 0;
+	const BIGNUM* d = 0;
+	RSA_get0_key(_pRSA, &n, &e, &d);
+	return convertToByteVec(e);
+#else
 	return convertToByteVec(_pRSA->e);
+#endif
 }


 RSAKeyImpl::ByteVec RSAKeyImpl::decryptionExponent() const
 {
+#if OPENSSL_VERSION_NUMBER >= 0x10100000L
+	const BIGNUM* n = 0;
+	const BIGNUM* e = 0;
+	const BIGNUM* d = 0;
+	RSA_get0_key(_pRSA, &n, &e, &d);
+	return convertToByteVec(d);
+#else
 	return convertToByteVec(_pRSA->d);
+#endif
 }


--- a/contrib/libpoco/Crypto/src/X509Certificate.cpp
+++ b/contrib/libpoco/Crypto/src/X509Certificate.cpp
@ -59,7 +59,11 @@ X509Certificate::X509Certificate(X509* pCert, bool shared):
 	
 	if (shared)
 	{
+#if OPENSSL_VERSION_NUMBER >= 0x10100000L
+		X509_up_ref(_pCert);
+#else
 		_pCert->references++;
+#endif
 	}

 	init();
@ -205,10 +209,10 @@ std::string X509Certificate::issuerName(NID nid) const
 	if (X509_NAME* issuer = X509_get_issuer_name(_pCert))
    {
 		char buffer[NAME_BUFFER_SIZE];
-		X509_NAME_get_text_by_NID(issuer, nid, buffer, sizeof(buffer));
-		return std::string(buffer);
+		if (X509_NAME_get_text_by_NID(issuer, nid, buffer, sizeof(buffer)) >= 0)
+			return std::string(buffer);
    }
-    else return std::string();
+    return std::string();
 }


@ -217,10 +221,10 @@ std::string X509Certificate::subjectName(NID nid) const
 	if (X509_NAME* subj = X509_get_subject_name(_pCert))
    {
 		char buffer[NAME_BUFFER_SIZE];
-		X509_NAME_get_text_by_NID(subj, nid, buffer, sizeof(buffer));
-		return std::string(buffer);
+		if (X509_NAME_get_text_by_NID(subj, nid, buffer, sizeof(buffer)) >= 0)
+			return std::string(buffer);
    }
-    else return std::string();
+    return std::string();
 }


@ -280,4 +284,12 @@ bool X509Certificate::issuedBy(const X509Certificate& issuerCertificate) const
 }


+bool X509Certificate::equals(const X509Certificate& otherCertificate) const
+{
+	X509* pCert = const_cast<X509*>(_pCert);
+	X509* pOtherCert = const_cast<X509*>(otherCertificate.certificate());
+	return X509_cmp(pCert, pOtherCert) == 0;
+}
+
+
 } } // namespace Poco::Crypto
--- a/contrib/libpoco/Crypto/testsuite/src/CryptoTest.cpp
+++ b/contrib/libpoco/Crypto/testsuite/src/CryptoTest.cpp
@ -246,6 +246,11 @@ void CryptoTest::testCertificate()
 	
 	// fails with recent OpenSSL versions:
 	// assert (cert.issuedBy(cert));
+	
+	std::istringstream otherCertStream(APPINF_PEM);
+	X509Certificate otherCert(otherCertStream);
+	
+	assert (cert.equals(otherCert));
 }


--- a/dbms/CMakeLists.txt
+++ b/dbms/CMakeLists.txt
@ -21,6 +21,7 @@ include_directories (BEFORE ${ClickHouse_SOURCE_DIR}/contrib/libdivide)
 include_directories (BEFORE ${ClickHouse_SOURCE_DIR}/contrib/libcpuid/include)
 include_directories (BEFORE ${ClickHouse_SOURCE_DIR}/contrib/libfarmhash)
 include_directories (BEFORE ${ClickHouse_SOURCE_DIR}/contrib/libmetrohash/src)
+include_directories (BEFORE ${ClickHouse_SOURCE_DIR}/contrib/libbtrie/include)
 include_directories (${ClickHouse_SOURCE_DIR}/libs/libdaemon/include)
 include_directories (${ClickHouse_BINARY_DIR}/dbms/src)

@ -44,7 +45,6 @@ include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake)
 add_headers_and_sources(dbms src/TableFunctions)
 add_headers_and_sources(dbms src/Parsers)
 add_headers_and_sources(dbms src/Analyzers)
-add_headers_and_sources(dbms src/AggregateFunctions)
 add_headers_and_sources(dbms src/Core)
 add_headers_and_sources(dbms src/DataStreams)
 add_headers_and_sources(dbms src/DataTypes)
@ -70,6 +70,33 @@ list (APPEND dbms_headers ${CONFIG_VERSION} ${CONFIG_COMMON})

 list (APPEND dbms_sources src/Functions/IFunction.cpp src/Functions/FunctionFactory.cpp src/Functions/DataTypeTraits.cpp)
 list (APPEND dbms_headers src/Functions/IFunction.h   src/Functions/FunctionFactory.h   src/Functions/DataTypeTraits.h)
+list (APPEND dbms_sources
+    src/AggregateFunctions/AggregateFunctionFactory.cpp
+    src/AggregateFunctions/AggregateFunctionState.cpp
+    src/AggregateFunctions/AggregateFunctionFactory.cpp
+    src/AggregateFunctions/AggregateFunctionState.cpp
+    src/AggregateFunctions/AggregateFunctionArray.cpp
+    src/AggregateFunctions/AggregateFunctionNull.cpp
+    src/AggregateFunctions/AggregateFunctionForEach.cpp
+    src/AggregateFunctions/AggregateFunctionIf.cpp
+    src/AggregateFunctions/AggregateFunctionMerge.cpp
+    src/AggregateFunctions/AggregateFunctionCount.cpp
+)
+
+list (APPEND dbms_headers
+    src/AggregateFunctions/IAggregateFunction.h
+    src/AggregateFunctions/AggregateFunctionFactory.h
+    src/AggregateFunctions/AggregateFunctionState.h
+    src/AggregateFunctions/AggregateFunctionFactory.h
+    src/AggregateFunctions/AggregateFunctionState.h
+    src/AggregateFunctions/AggregateFunctionArray.h
+    src/AggregateFunctions/AggregateFunctionNull.h
+    src/AggregateFunctions/AggregateFunctionForEach.h
+    src/AggregateFunctions/AggregateFunctionIf.h
+    src/AggregateFunctions/AggregateFunctionMerge.h
+    src/AggregateFunctions/AggregateFunctionCount.h
+)
+

 list(REMOVE_ITEM dbms_sources
    src/Client/Client.cpp
@ -127,6 +154,7 @@ if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
        src/Dictionaries/FlatDictionary.cpp
        src/Dictionaries/HashedDictionary.cpp
        src/Dictionaries/CacheDictionary.cpp
+        src/Dictionaries/TrieDictionary.cpp
        src/Dictionaries/RangeHashedDictionary.cpp
        src/Dictionaries/ComplexKeyHashedDictionary.cpp
        src/Dictionaries/ComplexKeyCacheDictionary.cpp
@ -159,6 +187,7 @@ target_link_libraries (dbms
    ${OPENSSL_CRYPTO_LIBRARY}
    ${Boost_SYSTEM_LIBRARY}
    ${Poco_Data_LIBRARY}
+    btrie
 )

 if (Poco_DataODBC_FOUND)
--- a/dbms/cmake/version.cmake
+++ b/dbms/cmake/version.cmake
@ -1,6 +1,6 @@
 #This strings autochanged from release_lib.sh :
-set(VERSION_DESCRIBE v1.1.54232-testing)
-set(VERSION_REVISION 54232)
+set(VERSION_DESCRIBE v1.1.54234-testing)
+set(VERSION_REVISION 54234)
 #===end of autochange

 set (VERSION_MAJOR 1)
--- a/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp
+++ b/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp
@ -30,24 +30,6 @@ std::string trimRight(const std::string & in, const char * suffix)

 }

-void registerAggregateFunctionAvg(AggregateFunctionFactory & factory);
-void registerAggregateFunctionCount(AggregateFunctionFactory & factory);
-void registerAggregateFunctionGroupArray(AggregateFunctionFactory & factory);
-void registerAggregateFunctionGroupUniqArray(AggregateFunctionFactory & factory);
-void registerAggregateFunctionsQuantile(AggregateFunctionFactory & factory);
-void registerAggregateFunctionsQuantileExact(AggregateFunctionFactory & factory);
-void registerAggregateFunctionsQuantileExactWeighted(AggregateFunctionFactory & factory);
-void registerAggregateFunctionsQuantileDeterministic(AggregateFunctionFactory & factory);
-void registerAggregateFunctionsQuantileTiming(AggregateFunctionFactory & factory);
-void registerAggregateFunctionsQuantileTDigest(AggregateFunctionFactory & factory);
-void registerAggregateFunctionsSequenceMatch(AggregateFunctionFactory & factory);
-void registerAggregateFunctionsMinMaxAny(AggregateFunctionFactory & factory);
-void registerAggregateFunctionsStatistics(AggregateFunctionFactory & factory);
-void registerAggregateFunctionSum(AggregateFunctionFactory & factory);
-void registerAggregateFunctionsUniq(AggregateFunctionFactory & factory);
-void registerAggregateFunctionUniqUpTo(AggregateFunctionFactory & factory);
-void registerAggregateFunctionDebug(AggregateFunctionFactory & factory);
-
 AggregateFunctionPtr createAggregateFunctionArray(AggregateFunctionPtr & nested);
 AggregateFunctionPtr createAggregateFunctionForEach(AggregateFunctionPtr & nested);
 AggregateFunctionPtr createAggregateFunctionIf(AggregateFunctionPtr & nested);
@ -60,23 +42,6 @@ AggregateFunctionPtr createAggregateFunctionCountNotNull(const DataTypes & argum

 AggregateFunctionFactory::AggregateFunctionFactory()
 {
-    registerAggregateFunctionAvg(*this);
-    registerAggregateFunctionCount(*this);
-    registerAggregateFunctionGroupArray(*this);
-    registerAggregateFunctionGroupUniqArray(*this);
-    registerAggregateFunctionsQuantile(*this);
-    registerAggregateFunctionsQuantileExact(*this);
-    registerAggregateFunctionsQuantileExactWeighted(*this);
-    registerAggregateFunctionsQuantileDeterministic(*this);
-    registerAggregateFunctionsQuantileTiming(*this);
-    registerAggregateFunctionsQuantileTDigest(*this);
-    registerAggregateFunctionsSequenceMatch(*this);
-    registerAggregateFunctionsMinMaxAny(*this);
-    registerAggregateFunctionsStatistics(*this);
-    registerAggregateFunctionSum(*this);
-    registerAggregateFunctionsUniq(*this);
-    registerAggregateFunctionUniqUpTo(*this);
-    registerAggregateFunctionDebug(*this);
 }


--- a/dbms/src/AggregateFunctions/AggregateFunctionFactory.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionFactory.h
@ -2,6 +2,7 @@

 #include <unordered_map>
 #include <AggregateFunctions/IAggregateFunction.h>
+#include <common/singleton.h>


 namespace DB
@ -14,7 +15,7 @@ using DataTypes = std::vector<DataTypePtr>;

 /** Creates an aggregate function by name.
  */
-class AggregateFunctionFactory final
+class AggregateFunctionFactory final : public Singleton<AggregateFunctionFactory>
 {
    friend class StorageSystemFunctions;

--- a/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
+++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
@ -11,7 +11,7 @@ namespace
 AggregateFunctionPtr createAggregateFunctionGroupArray(const std::string & name, const DataTypes & argument_types)
 {
    if (argument_types.size() != 1)
-        throw Exception("Incorrect number of arguments for aggregate function " + name,
+        throw Exception("Incorrect number of arguments for aggregate function " + name + ", should be 2",
            ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);

    AggregateFunctionPtr res(createWithNumericType<AggregateFunctionGroupArrayNumeric>(*argument_types[0]));
--- a/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h
@ -100,7 +100,7 @@ public:



-/// General case (ineffective). NOTE You can also implement a special case for strings.
+/// General case (inefficient). NOTE You can also implement a special case for strings.
 struct AggregateFunctionGroupArrayDataGeneric
 {
    Array value;    /// TODO Add MemoryTracker
@ -109,7 +109,7 @@ struct AggregateFunctionGroupArrayDataGeneric

 /// Puts all values to an array, general case. Implemented inefficiently.
 class AggregateFunctionGroupArrayGeneric final
-: public IUnaryAggregateFunction<AggregateFunctionGroupArrayDataGeneric, AggregateFunctionGroupArrayGeneric>
+    : public IUnaryAggregateFunction<AggregateFunctionGroupArrayDataGeneric, AggregateFunctionGroupArrayGeneric>
 {
 private:
    DataTypePtr type;
--- a/dbms/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.cpp
+++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.cpp
@ -0,0 +1,27 @@
+#include <AggregateFunctions/AggregateFunctionFactory.h>
+#include <AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h>
+#include <AggregateFunctions/Helpers.h>
+
+namespace DB
+{
+
+namespace
+{
+
+AggregateFunctionPtr createAggregateFunctionGroupArrayInsertAt(const std::string & name, const DataTypes & argument_types)
+{
+    if (argument_types.size() != 2)
+        throw Exception("Incorrect number of arguments for aggregate function " + name + ", should be 2",
+            ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+
+    return std::make_shared<AggregateFunctionGroupArrayInsertAtGeneric>();
+}
+
+}
+
+void registerAggregateFunctionGroupArrayInsertAt(AggregateFunctionFactory & factory)
+{
+    factory.registerFunction("groupArrayInsertAt", createAggregateFunctionGroupArrayInsertAt);
+}
+
+}
--- a/dbms/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h
@ -0,0 +1,210 @@
+#pragma once
+
+#include <IO/WriteHelpers.h>
+#include <IO/ReadHelpers.h>
+
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypesNumber.h>
+
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnVector.h>
+
+#include <Core/FieldVisitors.h>
+#include <Interpreters/convertFieldToType.h>
+
+#include <AggregateFunctions/IBinaryAggregateFunction.h>
+
+#define AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE 0xFFFFFF
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int TOO_LARGE_ARRAY_SIZE;
+    extern const int CANNOT_CONVERT_TYPE;
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+}
+
+
+/** Aggregate function, that takes two arguments: value and position,
+  *  and as a result, builds an array with values are located at corresponding positions.
+  *
+  * If more than one value was inserted to single position, the any value (first in case of single thread) is stored.
+  * If no values was inserted to some position, then default value will be substituted.
+  *
+  * Aggregate function also accept optional parameters:
+  * - default value to substitute;
+  * - length to resize result arrays (if you want to have results of same length for all aggregation keys);
+  *
+  * If you want to pass length, default value should be also given.
+  */
+
+
+/// Generic case (inefficient).
+struct AggregateFunctionGroupArrayInsertAtDataGeneric
+{
+    Array value;    /// TODO Add MemoryTracker
+};
+
+
+class AggregateFunctionGroupArrayInsertAtGeneric final
+    : public IBinaryAggregateFunction<AggregateFunctionGroupArrayInsertAtDataGeneric, AggregateFunctionGroupArrayInsertAtGeneric>
+{
+private:
+    DataTypePtr type;
+    Field default_value;
+    size_t length_to_resize = 0;    /// zero means - do not do resizing.
+
+public:
+    String getName() const override { return "groupArrayInsertAt"; }
+
+    DataTypePtr getReturnType() const override
+    {
+        return std::make_shared<DataTypeArray>(type);
+    }
+
+    void setArgumentsImpl(const DataTypes & arguments)
+    {
+        if (!arguments.at(1)->behavesAsNumber())    /// TODO filter out floating point types.
+            throw Exception("Second argument of aggregate function " + getName() + " must be integer.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+        type = arguments.front();
+
+        if (default_value.isNull())
+            default_value = type->getDefault();
+        else
+        {
+            Field converted = convertFieldToType(default_value, *type);
+            if (converted.isNull())
+                throw Exception("Cannot convert parameter of aggregate function " + getName() + " (" + applyVisitor(FieldVisitorToString(), default_value) + ")"
+                    " to type " + type->getName() + " to be used as default value in array", ErrorCodes::CANNOT_CONVERT_TYPE);
+
+            default_value = converted;
+        }
+    }
+
+    void setParameters(const Array & params) override
+    {
+        if (params.empty())
+            return;
+
+        if (params.size() > 2)
+            throw Exception("Aggregate function " + getName() + " requires at most two parameters.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+
+        default_value = params[0];
+
+        if (params.size() == 2)
+        {
+            length_to_resize = applyVisitor(FieldVisitorConvertToNumber<size_t>(), params[1]);
+        }
+    }
+
+    void addImpl(AggregateDataPtr place, const IColumn & column_value, const IColumn & column_position, size_t row_num, Arena *) const
+    {
+        /// TODO Do positions need to be 1-based for this function?
+        size_t position = column_position.get64(row_num);
+
+        /// If position is larger than size to which array will be cutted - simply ignore value.
+        if (length_to_resize && position >= length_to_resize)
+            return;
+
+        if (position >= AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE)
+            throw Exception("Too large array size: position argument (" + toString(position) + ")"
+                " is greater or equals to limit (" + toString(AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE) + ")",
+                ErrorCodes::TOO_LARGE_ARRAY_SIZE);
+
+        Array & arr = data(place).value;
+
+        if (arr.size() <= position)
+            arr.resize(position + 1);
+        else if (!arr[position].isNull())
+            return; /// Element was already inserted to the specified position.
+
+        column_value.get(row_num, arr[position]);
+    }
+
+    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    {
+        Array & arr_lhs = data(place).value;
+        const Array & arr_rhs = data(rhs).value;
+
+        if (arr_lhs.size() < arr_rhs.size())
+            arr_lhs.resize(arr_rhs.size());
+
+        for (size_t i = 0, size = arr_rhs.size(); i < size; ++i)
+            if (arr_lhs[i].isNull() && !arr_rhs[i].isNull())
+                arr_lhs[i] = arr_rhs[i];
+    }
+
+    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    {
+        const Array & arr = data(place).value;
+        size_t size = arr.size();
+        writeVarUInt(size, buf);
+
+        for (const Field & elem : arr)
+        {
+            if (elem.isNull())
+            {
+                writeBinary(UInt8(1), buf);
+            }
+            else
+            {
+                writeBinary(UInt8(0), buf);
+                type->serializeBinary(elem, buf);
+            }
+        }
+    }
+
+    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    {
+        size_t size = 0;
+        readVarUInt(size, buf);
+
+        if (size > AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE)
+            throw Exception("Too large array size", ErrorCodes::TOO_LARGE_ARRAY_SIZE);
+
+        Array & arr = data(place).value;
+
+        arr.resize(size);
+        for (size_t i = 0; i < size; ++i)
+        {
+            UInt8 is_null = 0;
+            readBinary(is_null, buf);
+            if (!is_null)
+                type->deserializeBinary(arr[i], buf);
+        }
+    }
+
+    void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
+    {
+        ColumnArray & to_array = static_cast<ColumnArray &>(to);
+        IColumn & to_data = to_array.getData();
+        ColumnArray::Offsets_t & to_offsets = to_array.getOffsets();
+
+        const Array & arr = data(place).value;
+
+        for (const Field & elem : arr)
+        {
+            if (!elem.isNull())
+                to_data.insert(elem);
+            else
+                to_data.insert(default_value);
+        }
+
+        size_t result_array_size = length_to_resize ? length_to_resize : arr.size();
+
+        /// Pad array if need.
+        for (size_t i = arr.size(); i < result_array_size; ++i)
+            to_data.insert(default_value);
+
+        to_offsets.push_back((to_offsets.empty() ? 0 : to_offsets.back()) + result_array_size);
+    }
+};
+
+
+#undef AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE
+
+}
--- a/dbms/src/AggregateFunctions/AggregateFunctionMerge.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionMerge.h
@ -34,6 +34,11 @@ public:
        return nested_func->getReturnType();
    }

+    AggregateFunctionPtr getNestedFunction() const
+    {
+        return nested_func_owner;
+    }
+
    void setArguments(const DataTypes & arguments) override
    {
        if (arguments.size() != 1)
--- a/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.h
@ -286,7 +286,7 @@ private:
        ParserString dot_p(".");
        ParserNumber number_p;

-        auto pos = pattern.data();
+        const char * pos = pattern.data();
        const auto begin = pos;
        const auto end = pos + pattern.size();

--- a/dbms/src/AggregateFunctions/AggregateFunctionState.cpp
+++ b/dbms/src/AggregateFunctions/AggregateFunctionState.cpp
@ -1,8 +1,34 @@
 #include <AggregateFunctions/AggregateFunctionState.h>
+#include <AggregateFunctions/AggregateFunctionMerge.h>

 namespace DB
 {

+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+}
+
+DataTypePtr AggregateFunctionState::getReturnType() const
+{
+    auto ptr = std::make_shared<DataTypeAggregateFunction>(nested_func_owner, arguments, params);
+
+    /// Special case: it is -MergeState combinator
+    if (typeid_cast<const AggregateFunctionMerge *>(ptr->getFunction().get()))
+    {
+        if (arguments.size() != 1)
+            throw Exception("Combinator -MergeState expects only one argument", ErrorCodes::BAD_ARGUMENTS);
+
+        if (!typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get()))
+            throw Exception("Combinator -MergeState expects argument with AggregateFunction type", ErrorCodes::BAD_ARGUMENTS);
+
+        return arguments[0];
+    }
+
+    return ptr;
+}
+
+
 AggregateFunctionPtr createAggregateFunctionState(AggregateFunctionPtr & nested)
 {
    return std::make_shared<AggregateFunctionState>(nested);
--- a/dbms/src/AggregateFunctions/AggregateFunctionState.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionState.h
@ -1,3 +1,4 @@
+
 #pragma once

 #include <DataTypes/DataTypeAggregateFunction.h>
@ -30,10 +31,7 @@ public:
        return nested_func->getName() + "State";
    }

-    DataTypePtr getReturnType() const override
-    {
-        return std::make_shared<DataTypeAggregateFunction>(nested_func_owner, arguments, params);
-    }
+    DataTypePtr getReturnType() const override;

    void setArguments(const DataTypes & arguments_) override
    {
--- a/dbms/src/AggregateFunctions/AggregateFunctionTopK.cpp
+++ b/dbms/src/AggregateFunctions/AggregateFunctionTopK.cpp
@ -0,0 +1,70 @@
+#include <AggregateFunctions/AggregateFunctionFactory.h>
+#include <AggregateFunctions/AggregateFunctionTopK.h>
+#include <AggregateFunctions/Helpers.h>
+
+namespace DB
+{
+
+namespace
+{
+
+/// Substitute return type for Date and DateTime
+class AggregateFunctionTopKDate : public AggregateFunctionTopK<DataTypeDate::FieldType>
+{
+    DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeDate>()); }
+};
+
+class AggregateFunctionTopKDateTime : public AggregateFunctionTopK<DataTypeDateTime::FieldType>
+{
+    DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>()); }
+};
+
+
+static IAggregateFunction * createWithExtraTypes(const IDataType & argument_type)
+{
+         if (typeid_cast<const DataTypeDate *>(&argument_type))     return new AggregateFunctionTopKDate;
+    else if (typeid_cast<const DataTypeDateTime *>(&argument_type))    return new AggregateFunctionTopKDateTime;
+    else
+    {
+        /// Check that we can use plain version of AggregateFunctionTopKGeneric
+        if (typeid_cast<const DataTypeString*>(&argument_type) || typeid_cast<const DataTypeFixedString*>(&argument_type))
+            return new AggregateFunctionTopKGeneric<true>;
+
+        auto * array_type = typeid_cast<const DataTypeArray *>(&argument_type);
+        if (array_type)
+        {
+            auto nested_type = array_type->getNestedType();
+            if (nested_type->isNumeric() || typeid_cast<DataTypeFixedString *>(nested_type.get()))
+                return new AggregateFunctionTopKGeneric<true>;
+        }
+
+        return new AggregateFunctionTopKGeneric<false>;
+    }
+}
+
+AggregateFunctionPtr createAggregateFunctionTopK(const std::string & name, const DataTypes & argument_types)
+{
+    if (argument_types.size() != 1)
+        throw Exception("Incorrect number of arguments for aggregate function " + name,
+            ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+
+    AggregateFunctionPtr res(createWithNumericType<AggregateFunctionTopK>(*argument_types[0]));
+
+    if (!res)
+        res = AggregateFunctionPtr(createWithExtraTypes(*argument_types[0]));
+
+    if (!res)
+        throw Exception("Illegal type " + argument_types[0]->getName() +
+            " of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+    return res;
+}
+
+}
+
+void registerAggregateFunctionTopK(AggregateFunctionFactory & factory)
+{
+    factory.registerFunction("topK", createAggregateFunctionTopK);
+}
+
+}
--- a/dbms/src/AggregateFunctions/AggregateFunctionTopK.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionTopK.h
@ -0,0 +1,261 @@
+#pragma once
+
+#include <IO/WriteHelpers.h>
+#include <IO/ReadHelpers.h>
+
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeString.h>
+
+#include <Columns/ColumnArray.h>
+
+#include <Common/SpaceSaving.h>
+
+#include <Core/FieldVisitors.h>
+
+#include <AggregateFunctions/AggregateFunctionGroupArray.h>
+
+
+namespace DB
+{
+
+
+// Allow NxK more space before calculating top K to increase accuracy
+#define TOP_K_DEFAULT 10
+#define TOP_K_LOAD_FACTOR 3
+#define TOP_K_MAX_SIZE 0xFFFFFF
+
+
+template <typename T>
+struct AggregateFunctionTopKData
+{
+    using Set = SpaceSaving
+    <
+        T,
+        T,
+        HashCRC32<T>,
+        HashTableGrower<4>,
+        HashTableAllocatorWithStackMemory<sizeof(T) * (1 << 4)>
+    >;
+    Set value;
+};
+
+
+template <typename T>
+class AggregateFunctionTopK
+    : public IUnaryAggregateFunction<AggregateFunctionTopKData<T>, AggregateFunctionTopK<T>>
+{
+private:
+    using State = AggregateFunctionTopKData<T>;
+    size_t threshold = TOP_K_DEFAULT;
+    size_t reserved = TOP_K_LOAD_FACTOR * threshold;
+
+public:
+    String getName() const override { return "topK"; }
+
+    DataTypePtr getReturnType() const override
+    {
+        return std::make_shared<DataTypeArray>(std::make_shared<DataTypeNumber<T>>());
+    }
+
+    void setArgument(const DataTypePtr & argument)
+    {
+    }
+
+    void setParameters(const Array & params) override
+    {
+        if (params.size() != 1)
+            throw Exception("Aggregate function " + getName() + " requires exactly one parameter.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+
+        std::size_t k = applyVisitor(FieldVisitorConvertToNumber<size_t>(), params[0]);
+
+        if (k > TOP_K_MAX_SIZE)
+            throw Exception("Too large parameter for aggregate function " + getName() + ". Maximum: " + toString(TOP_K_MAX_SIZE),
+                ErrorCodes::ARGUMENT_OUT_OF_BOUND);
+
+        threshold = k;
+        reserved = TOP_K_LOAD_FACTOR * k;
+    }
+
+    void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num, Arena *) const
+    {
+        auto & set = this->data(place).value;
+        if (set.capacity() != reserved)
+            set.resize(reserved);
+        set.insert(static_cast<const ColumnVector<T> &>(column).getData()[row_num]);
+    }
+
+    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    {
+        this->data(place).value.merge(this->data(rhs).value);
+    }
+
+    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    {
+        this->data(place).value.write(buf);
+    }
+
+    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    {
+        auto & set = this->data(place).value;
+        set.resize(reserved);
+        set.read(buf);
+    }
+
+    void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
+    {
+        ColumnArray & arr_to = static_cast<ColumnArray &>(to);
+        ColumnArray::Offsets_t & offsets_to = arr_to.getOffsets();
+
+        const typename State::Set & set = this->data(place).value;
+        auto resultVec = set.topK(threshold);
+        size_t size = resultVec.size();
+
+        offsets_to.push_back((offsets_to.size() == 0 ? 0 : offsets_to.back()) + size);
+
+        typename ColumnVector<T>::Container_t & data_to = static_cast<ColumnVector<T> &>(arr_to.getData()).getData();
+        size_t old_size = data_to.size();
+        data_to.resize(old_size + size);
+
+        size_t i = 0;
+        for (auto it = resultVec.begin(); it != resultVec.end(); ++it, ++i)
+            data_to[old_size + i] = it->key;
+    }
+};
+
+
+/// Generic implementation, it uses serialized representation as object descriptor.
+struct AggregateFunctionTopKGenericData
+{
+    using Set = SpaceSaving
+    <
+        std::string,
+        StringRef,
+        StringRefHash,
+        HashTableGrower<4>,
+        HashTableAllocatorWithStackMemory<sizeof(StringRef) * (1 << 4)>
+    >;
+
+    Set value;
+};
+
+/** Template parameter with true value should be used for columns that store their elements in memory continuously.
+ *  For such columns topK() can be implemented more efficently (especially for small numeric arrays).
+ */
+template <bool is_plain_column = false>
+class AggregateFunctionTopKGeneric : public IUnaryAggregateFunction<AggregateFunctionTopKGenericData, AggregateFunctionTopKGeneric<is_plain_column>>
+{
+private:
+    using State = AggregateFunctionTopKGenericData;
+    DataTypePtr input_data_type;
+    size_t threshold = TOP_K_DEFAULT;
+    size_t reserved = TOP_K_LOAD_FACTOR * threshold;
+
+    static void deserializeAndInsert(StringRef str, IColumn & data_to);
+
+public:
+    String getName() const override { return "topK"; }
+
+    void setArgument(const DataTypePtr & argument)
+    {
+        input_data_type = argument;
+    }
+
+    void setParameters(const Array & params) override
+    {
+        if (params.size() != 1)
+            throw Exception("Aggregate function " + getName() + " requires exactly one parameter.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+
+        size_t k = applyVisitor(FieldVisitorConvertToNumber<size_t>(), params[0]);
+
+        if (k > TOP_K_MAX_SIZE)
+            throw Exception("Too large parameter for aggregate function " + getName() + ". Maximum: " + toString(TOP_K_MAX_SIZE),
+                ErrorCodes::ARGUMENT_OUT_OF_BOUND);
+
+        threshold = k;
+        reserved = TOP_K_LOAD_FACTOR * k;
+    }
+
+    DataTypePtr getReturnType() const override
+    {
+        return std::make_shared<DataTypeArray>(input_data_type->clone());
+    }
+
+    bool allocatesMemoryInArena() const override
+    {
+        return true;
+    }
+
+    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    {
+        this->data(place).value.write(buf);
+    }
+
+    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    {
+        auto & set = this->data(place).value;
+        set.resize(reserved);
+
+        size_t count = 0;
+        readVarUInt(count, buf);
+        for (size_t i = 0; i < count; ++i) {
+            std::string key_string;
+            readStringBinary(key_string, buf);
+            UInt64 count, error;
+            readVarUInt(count, buf);
+            readVarUInt(error, buf);
+            set.insert(key_string, count, error);
+        }
+    }
+
+    void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num, Arena * arena) const
+    {
+        auto & set = this->data(place).value;
+        if (set.capacity() != reserved) {
+            set.resize(reserved);
+        }
+
+        StringRef str_serialized = column.getDataAt(row_num);
+        set.insert(str_serialized.toString());
+    }
+
+    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    {
+        this->data(place).value.merge(this->data(rhs).value);
+    }
+
+    void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
+    {
+        ColumnArray & arr_to = static_cast<ColumnArray &>(to);
+        ColumnArray::Offsets_t & offsets_to = arr_to.getOffsets();
+        IColumn & data_to = arr_to.getData();
+
+        auto resultVec = this->data(place).value.topK(threshold);
+        offsets_to.push_back((offsets_to.size() == 0 ? 0 : offsets_to.back()) + resultVec.size());
+
+        for (auto & elem : resultVec)
+        {
+            deserializeAndInsert(elem.key, data_to);
+        }
+    }
+};
+
+
+template <>
+inline void AggregateFunctionTopKGeneric<false>::deserializeAndInsert(StringRef str, IColumn & data_to)
+{
+    data_to.deserializeAndInsertFromArena(str.data);
+}
+
+template <>
+inline void AggregateFunctionTopKGeneric<true>::deserializeAndInsert(StringRef str, IColumn & data_to)
+{
+    data_to.insertData(str.data, str.size);
+}
+
+
+#undef TOP_K_DEFAULT
+#undef TOP_K_MAX_SIZE
+#undef TOP_K_LOAD_FACTOR
+
+}
--- a/dbms/src/AggregateFunctions/AggregateFunctionUniq.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionUniq.h
@ -86,12 +86,11 @@ struct AggregateFunctionUniqExactData
    using Key = T;

    /// When creating, the hash table must be small.
-    typedef HashSet<
+    using Set = HashSet<
        Key,
        HashCRC32<Key>,
        HashTableGrower<4>,
-        HashTableAllocatorWithStackMemory<sizeof(Key) * (1 << 4)>
-    > Set;
+        HashTableAllocatorWithStackMemory<sizeof(Key) * (1 << 4)>>;

    Set set;

@ -105,12 +104,11 @@ struct AggregateFunctionUniqExactData<String>
    using Key = UInt128;

    /// When creating, the hash table must be small.
-    typedef HashSet<
+    using Set = HashSet<
        Key,
        UInt128TrivialHash,
        HashTableGrower<3>,
-        HashTableAllocatorWithStackMemory<sizeof(Key) * (1 << 3)>
-    > Set;
+        HashTableAllocatorWithStackMemory<sizeof(Key) * (1 << 3)>>;

    Set set;

--- a/dbms/src/AggregateFunctions/CMakeLists.txt
+++ b/dbms/src/AggregateFunctions/CMakeLists.txt
@ -0,0 +1,27 @@
+include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake)
+add_headers_and_sources(clickhouse_aggregate_functions .)
+
+list(REMOVE_ITEM clickhouse_aggregate_functions_sources
+    AggregateFunctionFactory.cpp
+    AggregateFunctionState.cpp
+    AggregateFunctionArray.cpp
+    AggregateFunctionNull.cpp
+    AggregateFunctionForEach.cpp
+    AggregateFunctionIf.cpp
+    AggregateFunctionMerge.cpp
+    AggregateFunctionCount.cpp
+)
+
+list(REMOVE_ITEM clickhouse_aggregate_functions_headers
+    AggregateFunction.h
+    AggregateFunctionFactory.h
+    AggregateFunctionState.h
+    AggregateFunctionArray.h
+    AggregateFunctionNull.h
+    AggregateFunctionForEach.h
+    AggregateFunctionIf.h
+    AggregateFunctionMerge.h
+    AggregateFunctionCount.h
+)
+
+add_library(clickhouse_aggregate_functions ${clickhouse_aggregate_functions_sources})
--- a/dbms/src/AggregateFunctions/IBinaryAggregateFunction.h
+++ b/dbms/src/AggregateFunctions/IBinaryAggregateFunction.h
@ -19,8 +19,7 @@ public:
        if (arguments.size() != 2)
            throw Exception{
                "Passed " + toString(arguments.size()) + " arguments to binary aggregate function " + this->getName(),
-                    ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH
-            };
+                    ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};

        getDerived().setArgumentsImpl(arguments);
    }
--- a/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp
+++ b/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp
@ -0,0 +1,54 @@
+#include <AggregateFunctions/registerAggregateFunctions.h>
+
+#include <AggregateFunctions/AggregateFunctionFactory.h>
+
+namespace DB
+{
+
+void registerAggregateFunctionAvg(AggregateFunctionFactory & factory);
+void registerAggregateFunctionCount(AggregateFunctionFactory & factory);
+void registerAggregateFunctionGroupArray(AggregateFunctionFactory & factory);
+void registerAggregateFunctionGroupUniqArray(AggregateFunctionFactory & factory);
+void registerAggregateFunctionGroupArrayInsertAt(AggregateFunctionFactory & factory);
+void registerAggregateFunctionsQuantile(AggregateFunctionFactory & factory);
+void registerAggregateFunctionsQuantileExact(AggregateFunctionFactory & factory);
+void registerAggregateFunctionsQuantileExactWeighted(AggregateFunctionFactory & factory);
+void registerAggregateFunctionsQuantileDeterministic(AggregateFunctionFactory & factory);
+void registerAggregateFunctionsQuantileTiming(AggregateFunctionFactory & factory);
+void registerAggregateFunctionsQuantileTDigest(AggregateFunctionFactory & factory);
+void registerAggregateFunctionsSequenceMatch(AggregateFunctionFactory & factory);
+void registerAggregateFunctionsMinMaxAny(AggregateFunctionFactory & factory);
+void registerAggregateFunctionsStatistics(AggregateFunctionFactory & factory);
+void registerAggregateFunctionSum(AggregateFunctionFactory & factory);
+void registerAggregateFunctionsUniq(AggregateFunctionFactory & factory);
+void registerAggregateFunctionUniqUpTo(AggregateFunctionFactory & factory);
+void registerAggregateFunctionTopK(AggregateFunctionFactory & factory);
+void registerAggregateFunctionDebug(AggregateFunctionFactory & factory);
+
+
+void registerAggregateFunctions()
+{
+    auto & factory = AggregateFunctionFactory::instance();
+
+    registerAggregateFunctionAvg(factory);
+    registerAggregateFunctionCount(factory);
+    registerAggregateFunctionGroupArray(factory);
+    registerAggregateFunctionGroupUniqArray(factory);
+    registerAggregateFunctionGroupArrayInsertAt(factory);
+    registerAggregateFunctionsQuantile(factory);
+    registerAggregateFunctionsQuantileExact(factory);
+    registerAggregateFunctionsQuantileExactWeighted(factory);
+    registerAggregateFunctionsQuantileDeterministic(factory);
+    registerAggregateFunctionsQuantileTiming(factory);
+    registerAggregateFunctionsQuantileTDigest(factory);
+    registerAggregateFunctionsSequenceMatch(factory);
+    registerAggregateFunctionsMinMaxAny(factory);
+    registerAggregateFunctionsStatistics(factory);
+    registerAggregateFunctionSum(factory);
+    registerAggregateFunctionsUniq(factory);
+    registerAggregateFunctionUniqUpTo(factory);
+    registerAggregateFunctionTopK(factory);
+    registerAggregateFunctionDebug(factory);
+}
+
+}
--- a/dbms/src/AggregateFunctions/registerAggregateFunctions.h
+++ b/dbms/src/AggregateFunctions/registerAggregateFunctions.h
@ -0,0 +1,8 @@
+#pragma once
+
+namespace DB
+{
+
+void registerAggregateFunctions();
+
+}
--- a/dbms/src/Analyzers/TypeAndConstantInference.cpp
+++ b/dbms/src/Analyzers/TypeAndConstantInference.cpp
@ -165,7 +165,7 @@ void processFunction(const String & column_name, ASTPtr & ast, TypeAndConstantIn
    }

    /// Aggregate function.
-    if (AggregateFunctionPtr aggregate_function_ptr = context.getAggregateFunctionFactory().tryGet(function->name, argument_types))
+    if (AggregateFunctionPtr aggregate_function_ptr = AggregateFunctionFactory::instance().tryGet(function->name, argument_types))
    {
        /// NOTE Not considering aggregate function parameters in type inference. It could become needed in future.
        /// Note that aggregate function could never be constant expression.
--- a/dbms/src/Analyzers/tests/CMakeLists.txt
+++ b/dbms/src/Analyzers/tests/CMakeLists.txt
@ -2,22 +2,22 @@ add_executable(collect_aliases collect_aliases.cpp)
 target_link_libraries(collect_aliases dbms)

 add_executable(collect_tables collect_tables.cpp)
-target_link_libraries(collect_tables dbms storages_system)
+target_link_libraries(collect_tables dbms clickhouse_storages_system)

 add_executable(analyze_columns analyze_columns.cpp)
-target_link_libraries(analyze_columns dbms storages_system)
+target_link_libraries(analyze_columns dbms clickhouse_storages_system)

 add_executable(type_and_constant_inference type_and_constant_inference.cpp)
-target_link_libraries(type_and_constant_inference storages_system clickhouse_functions dbms)
+target_link_libraries(type_and_constant_inference clickhouse_storages_system clickhouse_functions dbms)

 add_executable(analyze_result_of_query analyze_result_of_query.cpp)
-target_link_libraries(analyze_result_of_query dbms storages_system)
+target_link_libraries(analyze_result_of_query dbms clickhouse_storages_system)

 add_executable(translate_positional_arguments translate_positional_arguments.cpp)
 target_link_libraries(translate_positional_arguments dbms)

 add_executable(optimize_group_order_limit_by optimize_group_order_limit_by.cpp)
-target_link_libraries(optimize_group_order_limit_by dbms storages_system)
+target_link_libraries(optimize_group_order_limit_by dbms clickhouse_storages_system)

 add_executable(analyze_lambdas analyze_lambdas.cpp)
 target_link_libraries(analyze_lambdas dbms)
--- a/dbms/src/Client/CMakeLists.txt
+++ b/dbms/src/Client/CMakeLists.txt
@ -1,5 +1,5 @@
 add_library (clickhouse-client Client.cpp)
-target_link_libraries (clickhouse-client dbms ${LINE_EDITING_LIBS} ${Boost_PROGRAM_OPTIONS_LIBRARY})
+target_link_libraries (clickhouse-client dbms clickhouse_aggregate_functions ${LINE_EDITING_LIBS} ${Boost_PROGRAM_OPTIONS_LIBRARY})
 install (FILES config.xml DESTINATION ${CLICKHOUSE_ETC_DIR}/clickhouse-client COMPONENT clickhouse-client)

 add_library (clickhouse-benchmark Benchmark.cpp)
--- a/dbms/src/Client/Client.cpp
+++ b/dbms/src/Client/Client.cpp
@ -46,6 +46,7 @@
 #include <Common/NetException.h>
 #include <common/readline_use.h>
 #include <Functions/registerFunctions.h>
+#include <AggregateFunctions/registerAggregateFunctions.h>


 /// http://en.wikipedia.org/wiki/ANSI_escape_code
@ -191,6 +192,7 @@ private:
 #undef EXTRACT_LIMIT

        registerFunctions();
+        registerAggregateFunctions();
    }


--- a/dbms/src/Common/AIO.h
+++ b/dbms/src/Common/AIO.h
@ -15,7 +15,7 @@
 #include <unistd.h>


-/** Небольшие обёртки для асинхронного ввода-вывода.
+/** Small wrappers for asynchronous I/O.
  */


--- a/dbms/src/Common/Allocator.cpp
+++ b/dbms/src/Common/Allocator.cpp
@ -22,15 +22,15 @@ namespace ErrorCodes
 }


-/** Многие современные аллокаторы (например, tcmalloc) не умеют делать mremap для realloc,
-  *  даже в случае достаточно больших кусков памяти.
-  * Хотя это позволяет увеличить производительность и уменьшить потребление памяти во время realloc-а.
-  * Чтобы это исправить, делаем mremap самостоятельно, если кусок памяти достаточно большой.
-  * Порог (64 МБ) выбран достаточно большим, так как изменение адресного пространства
-  *  довольно сильно тормозит, особенно в случае наличия большого количества потоков.
-  * Рассчитываем, что набор операций mmap/что-то сделать/mremap может выполняться всего лишь около 1000 раз в секунду.
+/** Many modern allocators (for example, tcmalloc) do not do a mremap for realloc,
+  *  even in case of large enough chunks of memory.
+  * Although this allows you to increase performance and reduce memory consumption during realloc.
+  * To fix this, we do mremap manually if the chunk of memory is large enough.
+  * The threshold (64 MB) is chosen quite large, since changing the address space is
+  *  very slow, especially in the case of a large number of threads.
+  * We expect that the set of operations mmap/something to do/mremap can only be performed about 1000 times per second.
  *
-  * PS. Также это требуется, потому что tcmalloc не может выделить кусок памяти больше 16 GB.
+  * PS. This is also required, because tcmalloc can not allocate a chunk of memory greater than 16 GB.
  */
 static constexpr size_t MMAP_THRESHOLD = 64 * (1 << 20);
 static constexpr size_t MMAP_MIN_ALIGNMENT = 4096;
--- a/dbms/src/Common/Allocator.h
+++ b/dbms/src/Common/Allocator.h
@ -3,13 +3,13 @@
 #include <string.h>


-/** Отвечает за выделение/освобождение памяти. Используется, например, в PODArray, Arena.
-  * Также используется в хэш-таблицах.
-  * Интерфейс отличается от std::allocator
-  * - наличием метода realloc, который для больших кусков памяти использует mremap;
-  * - передачей размера в метод free;
-  * - наличием аргумента alignment;
-  * - возможностью зануления памяти (используется в хэш-таблицах);
+/** Responsible for allocating / freeing memory. Used, for example, in PODArray, Arena.
+  * Also used in hash tables.
+  * The interface is different from std::allocator
+  * - the presence of the method realloc, which for large chunks of memory uses mremap;
+  * - passing the size into the `free` method;
+  * - by the presence of the `alignment` argument;
+  * - the possibility of zeroing memory (used in hash tables);
  */
 template <bool clear_memory_>
 class Allocator
@ -38,9 +38,9 @@ protected:
 };


-/** При использовании AllocatorWithStackMemory, размещённом на стеке,
-  *  GCC 4.9 ошибочно делает предположение, что мы можем вызывать free от указателя на стек.
-  * На самом деле, комбинация условий внутри AllocatorWithStackMemory этого не допускает.
+/** When using AllocatorWithStackMemory, located on the stack,
+  *  GCC 4.9 mistakenly assumes that we can call `free` from a pointer to the stack.
+  * In fact, the combination of conditions inside AllocatorWithStackMemory does not allow this.
  */
 #if !__clang__
 #pragma GCC diagnostic push
--- a/dbms/src/Common/ArenaWithFreeLists.h
+++ b/dbms/src/Common/ArenaWithFreeLists.h
@ -8,40 +8,40 @@ namespace DB
 {


-/** В отличие от Arena, позволяет освобождать (для последующего повторного использования)
-  *  выделенные ранее (не обязательно только что) куски памяти.
-  * Для этого, запрашиваемый размер округляется вверх до степени двух
-  *  (или до 8, если меньше; или используется выделение памяти вне Arena, если размер больше 65536).
-  * При освобождении памяти, для каждого размера (всего 14 вариантов: 8, 16... 65536),
-  *  поддерживается односвязный список свободных блоков.
-  * При аллокации, мы берём голову списка свободных блоков,
-  *  либо, если список пуст - выделяем новый блок, используя Arena.
+/** Unlike Arena, allows you to release (for later re-use)
+  *  previously allocated (not necessarily just recently) chunks of memory.
+  * For this, the requested size is rounded up to the power of two
+  *  (or up to 8, if less, or using memory allocation outside Arena if the size is greater than 65536).
+  * When freeing memory, for each size (14 options in all: 8, 16 ... 65536),
+  *  a single-linked list of free blocks is kept track.
+  * When allocating, we take the head of the list of free blocks,
+  *  or, if the list is empty - allocate a new block using Arena.
  */
 class ArenaWithFreeLists : private Allocator<false>, private boost::noncopyable
 {
 private:
-    /// Если блок свободен, то в его начале хранится указатель на следующий свободный блок, либо nullptr, если свободных блоков больше нет.
-    /// Если блок используется, то в нём хранятся какие-то данные.
+    /// If the block is free, then the pointer to the next free block is stored at its beginning, or nullptr, if there are no more free blocks.
+    /// If the block is used, then some data is stored in it.
    union Block
    {
        Block * next;
        char data[0];
    };

-    /// Максимальный размер куска памяти, который выделяется с помощью Arena. Иначе используем Allocator напрямую.
+    /// The maximum size of a piece of memory that is allocated with Arena. Otherwise, we use Allocator directly.
    static constexpr size_t max_fixed_block_size = 65536;

-    /// Получить индекс в массиве freelist-ов для заданного размера.
+    /// Get the index in the freelist array for the specified size.
    static size_t findFreeListIndex(const size_t size)
    {
        return size <= 8 ? 2 : bitScanReverse(size - 1);
    }

-    /// Для выделения блоков не слишком большого размера используется Arena.
+    /// Arena is used to allocate blocks that are not too large.
    Arena pool;

-    /// Списки свободных блоков. Каждый элемент указывает на голову соответствующего списка, либо равен nullptr.
-    /// Первые два элемента не используются, а предназначены для упрощения арифметики.
+    /// Lists of free blocks. Each element points to the head of the corresponding list, or is nullptr.
+    /// The first two elements are not used, but are intended to simplify arithmetic.
    Block * free_lists[16] {};

 public:
@ -60,10 +60,10 @@ public:
        /// find list of required size
        const auto list_idx = findFreeListIndex(size);

-        /// Если есть свободный блок.
+        /// If there is a free block.
        if (auto & free_block_ptr = free_lists[list_idx])
        {
-            /// Возьмём его. И поменяем голову списка на следующий элемент списка.
+            /// Let's take it. And change the head of the list to the next item in the list.
            const auto res = free_block_ptr->data;
            free_block_ptr = free_block_ptr->next;
            return res;
@ -81,14 +81,14 @@ public:
        /// find list of required size
        const auto list_idx = findFreeListIndex(size);

-        /// Вставим освобождённый блок в голову списка.
+        /// Insert the released block into the head of the list.
        auto & free_block_ptr = free_lists[list_idx];
        const auto old_head = free_block_ptr;
        free_block_ptr = reinterpret_cast<Block *>(ptr);
        free_block_ptr->next = old_head;
    }

-    /// Размер выделенного пула в байтах
+    /// Size of the allocated pool in bytes
    size_t size() const
    {
        return pool.size();
--- a/dbms/src/Common/AutoArray.h
+++ b/dbms/src/Common/AutoArray.h
@ -8,30 +8,30 @@
 namespace DB
 {

-/** Массив (почти) неизменяемого размера:
-  *  размер задаётся в конструкторе;
-  *  метод resize приводит к удалению старых данных и нужен лишь для того,
-  *   чтобы можно было сначала создать пустой объект, используя конструктор по-умолчанию,
-  *   а потом уже определиться с размером.
+/** An array of (almost) unchangable size:
+  *  the size is specified in the constructor;
+  *  `resize` method removes old data, and necessary only for
+  *  so that you can first create an empty object using the default constructor,
+  *  and then decide on the size.
  *
-  * Есть возможность не инициализировать элементы по-умолчанию, а создавать их inplace.
-  * Деструкторы элементов вызываются автоматически.
+  * There is a possibility to not initialize elements by default, but create them inplace.
+  * Member destructors are called automatically.
  *
-  * sizeof равен размеру одного указателя.
+  * `sizeof` is equal to the size of one pointer.
  *
-  * Не exception-safe.
-  * Копирование не поддерживается. Перемещение опустошает исходный объект.
-  * То есть, использовать этот массив во многих случаях неудобно.
+  * Not exception-safe.
+  * Copying is not supported. Moving empties the original object.
+  * That is, it is inconvenient to use this array in many cases.
  *
-  * Предназначен для ситуаций, в которых создаётся много массивов одинакового небольшого размера,
-  *  но при этом размер не известен во время компиляции.
-  * Также даёт существенное преимущество в случаях, когда важно, чтобы sizeof был минимальным.
-  * Например, если массивы кладутся в open-addressing хэш-таблицу с inplace хранением значений (как HashMap)
+  * Designed for situations in which many arrays of the same small size are created,
+  *  but the size is not known at compile time.
+  * Also gives a significant advantage in cases where it is important that `sizeof` is minimal.
+  * For example, if arrays are put in an open-addressing hash table with inplace storage of values (like HashMap)
  *
-  * В этом случае, по сравнению с std::vector:
-  * - для массивов размером в 1 элемент - преимущество примерно в 2 раза;
-  * - для массивов размером в 5 элементов - преимущество примерно в 1.5 раза
-  *   (в качестве T использовались DB::Field, содержащие UInt64 и String);
+  * In this case, compared to std::vector:
+  * - for arrays of 1 element size - an advantage of about 2 times;
+  * - for arrays of 5 elements - an advantage of about 1.5 times
+  *   (DB::Field, containing UInt64 and String, used as T);
  */

 const size_t empty_auto_array_helper = 0;
@ -42,7 +42,7 @@ template <typename T>
 class AutoArray
 {
 public:
-    /// Для отложенного создания.
+    /// For deferred creation.
    AutoArray()
    {
        setEmpty();
@ -53,16 +53,16 @@ public:
        init(size_, false);
    }

-    /** Не будут вызваны конструкторы по-умолчанию для элементов.
-      * В этом случае, вы должны вставить все элементы с помощью функции place и placement new,
-      *  так как для них потом будут вызваны деструкторы.
+    /** The default constructors for elements will not be called.
+      * In this case, you must insert all elements using the `place` and `placement new` functions,
+      *  since destructors are then called for them.
      */
    AutoArray(size_t size_, const DontInitElemsTag & tag)
    {
        init(size_, true);
    }

-    /** Инициализирует все элементы копирующим конструктором с параметром value.
+    /** Initializes all elements with a copy constructor with the `value` parameter.
      */
    AutoArray(size_t size_, const T & value)
    {
@ -74,7 +74,7 @@ public:
        }
    }

-    /** resize удаляет все существующие элементы.
+    /** `resize` removes all existing items.
      */
    void resize(size_t size_, bool dont_init_elems = false)
    {
@ -82,7 +82,7 @@ public:
        init(size_, dont_init_elems);
    }

-    /** Премещение.
+    /** Move operations.
      */
    AutoArray(AutoArray && src)
    {
@ -125,10 +125,10 @@ public:
        setEmpty();
    }

-    /** Можно читать и модифицировать элементы с помощью оператора []
-      *  только если элементы были инициализированы
-      *  (то есть, в конструктор не был передан DontInitElemsTag,
-      *   или вы их инициализировали с помощью place и placement new).
+    /** You can read and modify elements using the [] operator
+      *  only if items were initialized
+      *  (that is, into the constructor was not passed DontInitElemsTag,
+      *   or you initialized them using `place` and `placement new`).
      */
    T & operator[](size_t i)
    {
@ -140,9 +140,9 @@ public:
        return elem(i);
    }

-    /** Получить кусок памяти, в котором должен быть расположен элемент.
-      * Функция предназначена, чтобы инициализировать элемент,
-      *  который ещё не был инициализирован:
+    /** Get the piece of memory in which the element should be located.
+      * The function is intended to initialize an element,
+      *  which has not yet been initialized
      * new (arr.place(i)) T(args);
      */
    char * place(size_t i)
--- a/dbms/src/Common/CombinedCardinalityEstimator.h
+++ b/dbms/src/Common/CombinedCardinalityEstimator.h
@ -23,9 +23,9 @@ static inline ContainerType max(const ContainerType & lhs, const ContainerType &

 }

-/** Для маленького количества ключей - массив фиксированного размера "на стеке".
-  * Для среднего - выделяется HashSet.
-  * Для большого - выделяется HyperLogLog.
+/** For a small number of keys - an array of fixed size "on the stack".
+  * For the average, HashSet is allocated.
+  * For large, HyperLogLog is allocated.
  */
 template
 <
@ -146,7 +146,7 @@ public:
            getContainer<Large>().merge(rhs.getContainer<Large>());
    }

-    /// Можно вызывать только для пустого объекта.
+    /// You can only call for an empty object.
    void read(DB::ReadBuffer & in)
    {
        UInt8 v;
@ -171,8 +171,8 @@ public:
    {
        auto container_type = getContainerType();

-        /// Если readAndMerge вызывается с пустым состоянием, просто десериализуем
-        /// состояние задано в качестве параметра.
+        /// If readAndMerge is called with an empty state, just deserialize
+        /// the state is specified as a parameter.
        if ((container_type == details::ContainerType::SMALL) && small.empty())
        {
            read(in);
--- a/dbms/src/Common/CompactArray.h
+++ b/dbms/src/Common/CompactArray.h
@ -15,11 +15,11 @@ namespace ErrorCodes
 }


-/** Компактный массив для хранения данных, размер content_width, в битах, которых составляет
-  * меньше одного байта. Вместо того, чтобы хранить каждое значение в отдельный
-  * байт, что приводит к растрате 37.5% пространства для content_width=5, CompactArray хранит
-  * смежные content_width-битные значения в массиве байтов, т.е. фактически CompactArray
-  * симулирует массив content_width-битных значений.
+/** Compact array for data storage, size `content_width`, in bits, of which is
+  * less than one byte. Instead of storing each value in a separate
+  * bytes, which leads to a waste of 37.5% of the space for content_width = 5, CompactArray stores
+  * adjacent `content_width`-bit values in the byte array, that is actually CompactArray
+  * simulates an array of `content_width`-bit values.
  */
 template <typename BucketIndex, UInt8 content_width, size_t bucket_count>
 class __attribute__ ((packed)) CompactArray final
@ -76,12 +76,12 @@ public:
    }

 private:
-    /// число байт в битсете
+    /// number of bytes in bitset
    static constexpr size_t BITSET_SIZE = (static_cast<size_t>(bucket_count) * content_width + 7) / 8;
    UInt8 bitset[BITSET_SIZE] = { 0 };
 };

-/** Класс для последовательного чтения ячеек из компактного массива на диске.
+/** A class for sequentially reading cells from a compact array on a disk.
  */
 template <typename BucketIndex, UInt8 content_width, size_t bucket_count>
 class CompactArray<BucketIndex, content_width, bucket_count>::Reader final
@ -135,7 +135,7 @@ public:
        return true;
    }

-    /** Вернуть текущий номер ячейки и соответствующее содержание.
+    /** Return the current cell number and the corresponding content.
      */
    inline std::pair<BucketIndex, UInt8> get() const
    {
@ -150,26 +150,26 @@ public:

 private:
    ReadBuffer & in;
-    /// Физическое расположение текущей ячейки.
+    /// The physical location of the current cell.
    Locus locus;
-    /// Текущая позиция в файле в виде номера ячейки.
+    /// The current position in the file as a cell number.
    BucketIndex current_bucket_index = 0;
-    /// Количество прочитанных байтов.
+    /// The number of bytes read.
    size_t read_count = 0;
-    /// Содержание в текущей позиции.
+    /// The content in the current position.
    UInt8 value_l;
    UInt8 value_r;
    ///
    bool is_eof = false;
-    /// Влезает ли ячейка полностью в один байт?
+    /// Does the cell fully fit into one byte?
    bool fits_in_byte;
 };

-/** Структура Locus содержит необходимую информацию, чтобы найти для каждой ячейки
-  * соответствующие байт и смещение, в битах, от начала ячейки. Поскольку в общем
-  * случае размер одного байта не делится на размер одной ячейки, возможны случаи,
-  * когда одна ячейка перекрывает два байта. Поэтому структура Locus содержит две
-  * пары (индекс, смещение).
+/** The `Locus` structure contains the necessary information to find for each cell
+  * the corresponding byte and offset, in bits, from the beginning of the cell. Since in general
+  * case the size of one byte is not divisible by the size of one cell, cases possible
+  * when one cell overlaps two bytes. Therefore, the `Locus` structure contains two
+  * pairs (index, offset).
  */
 template <typename BucketIndex, UInt8 content_width, size_t bucket_count>
 class CompactArray<BucketIndex, content_width, bucket_count>::Locus final
@ -190,13 +190,13 @@ public:
    {
        if ((index_l == index_r) || (index_l == (BITSET_SIZE - 1)))
        {
-            /// Ячейка полностью влезает в один байт.
+            /// The cell completely fits into one byte.
            *content_l &= ~(((1 << content_width) - 1) << offset_l);
            *content_l |= content << offset_l;
        }
        else
        {
-            /// Ячейка перекрывает два байта.
+            /// The cell overlaps two bytes.
            size_t left = 8 - offset_l;

            *content_l &= ~(((1 << left) - 1) << offset_l);
@ -230,13 +230,13 @@ private:

    UInt8 ALWAYS_INLINE read(UInt8 value_l) const
    {
-        /// Ячейка полностью влезает в один байт.
+        /// The cell completely fits into one byte.
        return (value_l >> offset_l) & ((1 << content_width) - 1);
    }

    UInt8 ALWAYS_INLINE read(UInt8 value_l, UInt8 value_r) const
    {
-        /// Ячейка перекрывает два байта.
+        /// The cell overlaps two bytes.
        return ((value_l >> offset_l) & ((1 << (8 - offset_l)) - 1))
            | ((value_r & ((1 << offset_r) - 1)) << (8 - offset_l));
    }
@ -250,7 +250,7 @@ private:
    UInt8 * content_l;
    UInt8 * content_r;

-    /// Проверки
+    /// Checks
    static_assert((content_width > 0) && (content_width < 8), "Invalid parameter value");
    static_assert(bucket_count <= (std::numeric_limits<size_t>::max() / content_width), "Invalid parameter value");
 };
--- a/dbms/src/Common/ConcurrentBoundedQueue.h
+++ b/dbms/src/Common/ConcurrentBoundedQueue.h
@ -38,9 +38,9 @@ namespace detail
    }
 };

-/** Очень простая thread-safe очередь ограниченной длины.
-  * Если пытаться вынуть элемент из пустой очереди, то поток блокируется, пока очередь не станет непустой.
-  * Если пытаться вставить элемент в переполненную очередь, то поток блокируется, пока в очереди не появится элемент.
+/** A very simple thread-safe queue of limited length.
+  * If you try to pop an item from an empty queue, the thread is blocked until the queue becomes nonempty.
+  * If you try to push an element into an overflowed queue, the thread is blocked until space appears in the queue.
  */
 template <typename T>
 class ConcurrentBoundedQueue
--- a/dbms/src/Common/CounterInFile.h
+++ b/dbms/src/Common/CounterInFile.h
@ -22,24 +22,24 @@
 #define SMALL_READ_WRITE_BUFFER_SIZE 16


-/** Хранит в файле число.
- * Предназначен для редких вызовов (не рассчитан на производительность).
+/** Stores a number in the file.
+ * Designed for rare calls (not designed for performance).
 */
 class CounterInFile
 {
 public:
-    /// path - имя файла, включая путь
+    /// path - the name of the file, including the path
    CounterInFile(const std::string & path_) : path(path_) {}

-    /** Добавить delta к числу в файле и вернуть новое значение.
-     * Если параметр create_if_need не установлен в true, то
-     *  в файле уже должно быть записано какое-нибудь число (если нет - создайте файл вручную с нулём).
+    /** Add `delta` to the number in the file and return the new value.
+     * If the `create_if_need` parameter is not set to true, then
+     *  the file should already have a number written (if not - create the file manually with zero).
     *
-     * Для защиты от race condition-ов между разными процессами, используются файловые блокировки.
-     * (Но при первом создании файла race condition возможен, так что лучше создать файл заранее.)
+     * To protect against race conditions between different processes, file locks are used.
+     * (But when the first file is created, the race condition is possible, so it's better to create the file in advance.)
     *
-     * locked_callback вызывается при заблокированном файле со счетчиком. В него передается новое значение.
-     * locked_callback можно использовать, чтобы делать что-нибудь атомарно с увеличением счетчика (например, переименовывать файлы).
+     * `locked_callback` is called when the counter file is locked. A new value is passed to it.
+     * `locked_callback` can be used to do something atomically with incrementing the counter (for example, renaming files).
     */
    template <typename Callback>
    Int64 add(Int64 delta, Callback && locked_callback, bool create_if_need = false)
@ -74,7 +74,7 @@ public:
                }
                catch (const DB::Exception & e)
                {
-                    /// Более понятное сообщение об ошибке.
+                    /// A more understandable error message.
                    if (e.code() == DB::ErrorCodes::CANNOT_READ_ALL_DATA || e.code() == DB::ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF)
                        throw DB::Exception("File " + path + " is empty. You must fill it manually with appropriate value.", e.code());
                    else
@ -118,13 +118,13 @@ public:
        return path;
    }

-    /// Изменить путь к файлу.
+    /// Change the path to the file.
    void setPath(std::string path_)
    {
        path = path_;
    }

-    // Не thread-safe и не синхронизирован между процессами.
+    // Not thread-safe and not synchronized between processes.
    void fixIfBroken(UInt64 value)
    {
        bool file_exists = Poco::File(path).exists();
--- a/dbms/src/Common/Exception.h
+++ b/dbms/src/Common/Exception.h
@ -35,7 +35,7 @@ public:
    DB::Exception * clone() const override { return new DB::Exception(*this); }
    void rethrow() const override { throw *this; }

-    /// Дописать к существующему сообщению что-нибудь ещё.
+    /// Add something to the existing message.
    void addMessage(const std::string & arg) { extendedMessage(arg); }

    const StackTrace & getStackTrace() const { return trace; }
@ -45,7 +45,7 @@ private:
 };


-/// Содержит дополнительный член saved_errno. См. функцию throwFromErrno.
+/// Contains an additional member `saved_errno`. See the throwFromErrno function.
 class ErrnoException : public Exception
 {
 public:
@ -73,8 +73,8 @@ using Exceptions = std::vector<std::exception_ptr>;
 void throwFromErrno(const std::string & s, int code = 0, int the_errno = errno);


-/** Попробовать записать исключение в лог (и забыть про него).
-  * Можно использовать в деструкторах в блоке catch (...).
+/** Try to write an exception to the log (and forget about it).
+  * Can be used in destructors in the catch-all block.
  */
 void tryLogCurrentException(const char * log_name, const std::string & start_of_message = "");
 void tryLogCurrentException(Poco::Logger * logger, const std::string & start_of_message = "");
--- a/dbms/src/Common/ExternalTable.h
+++ b/dbms/src/Common/ExternalTable.h
@ -25,16 +25,16 @@ namespace ErrorCodes
 }


-/// Базовый класс содержащий основную информацию о внешней таблице и
-/// основные функции для извлечения этой информации из текстовых полей.
+/// The base class containing the basic information about external table and
+/// basic functions for extracting this information from text fields.
 class BaseExternalTable
 {
 public:
-    std::string file;         /// Файл с данными или '-' если stdin
-    std::string name;         /// Имя таблицы
-    std::string format;     /// Название формата хранения данных
+    std::string file;       /// File with data or '-' if stdin
+    std::string name;       /// The name of the table
+    std::string format;     /// Name of the data storage format

-    /// Описание структуры таблицы: (имя столбца, имя типа данных)
+    /// Description of the table structure: (column name, data type name)
    std::vector<std::pair<std::string, std::string> > structure;

    std::unique_ptr<ReadBuffer> read_buffer;
@ -42,10 +42,10 @@ public:

    virtual ~BaseExternalTable() {};

-    /// Инициализировать read_buffer в зависимости от источника данных. По умолчанию не делает ничего.
+    /// Initialize read_buffer, depending on the data source. By default, does nothing.
    virtual void initReadBuffer() {};

-    /// Инициализировать sample_block по структуре таблицы сохраненной в structure
+    /// Initialize sample_block according to the structure of the table stored in the `structure`
    virtual void initSampleBlock(const Context & context)
    {
        const DataTypeFactory & data_type_factory = DataTypeFactory::instance();
@ -60,7 +60,7 @@ public:
        }
    }

-    /// Получить данные таблицы - пару (поток с содержимым таблицы, имя таблицы)
+    /// Get the table data - a pair (a thread with the contents of the table, the name of the table)
    virtual ExternalTableData getData(const Context & context)
    {
        initReadBuffer();
@ -71,7 +71,7 @@ public:
    }

 protected:
-    /// Очистить всю накопленную информацию
+    /// Clear all accumulated information
    void clean()
    {
        name = "";
@ -82,7 +82,7 @@ protected:
        read_buffer.reset();
    }

-    /// Функция для отладочного вывода информации
+    /// Function for debugging information output
    void write()
    {
        std::cerr << "file " << file << std::endl;
@ -100,7 +100,7 @@ protected:
        return res;
    }

-    /// Построить вектор structure по текстовому полю structure
+    /// Construct the `structure` vector from the text field `structure`
    virtual void parseStructureFromStructureField(const std::string & argument)
    {
        std::vector<std::string> vals = split(argument, " ,");
@ -112,7 +112,7 @@ protected:
            structure.emplace_back(vals[i], vals[i + 1]);
    }

-    /// Построить вектор structure по текстовому полю types
+    /// Construct the `structure` vector from the text field `types`
    virtual void parseStructureFromTypesField(const std::string & argument)
    {
        std::vector<std::string> vals = split(argument, " ,");
@ -123,7 +123,7 @@ protected:
 };


-/// Парсинг внешей таблицы, используемый в tcp клиенте.
+/// Parsing of external table used in the tcp client.
 class ExternalTable : public BaseExternalTable
 {
 public:
@ -135,7 +135,7 @@ public:
            read_buffer = std::make_unique<ReadBufferFromFile>(file);
    }

-    /// Извлечение параметров из variables_map, которая строится по командной строке клиента
+    /// Extract parameters from variables_map, which is built on the client command line
    ExternalTable(const boost::program_options::variables_map & external_options)
    {
        if (external_options.count("file"))
@ -162,9 +162,9 @@ public:
    }
 };

-/// Парсинг внешей таблицы, используемый при отправке таблиц через http
-/// Функция handlePart будет вызываться для каждой переданной таблицы,
-/// поэтому так же необходимо вызывать clean в конце handlePart.
+/// Parsing of external table used when sending tables via http
+/// The `handlePart` function will be called for each table passed,
+ /// so it's also necessary to call `clean` at the end of the `handlePart`.
 class ExternalTablesHandler : public Poco::Net::PartHandler, BaseExternalTable
 {
 public:
@ -174,15 +174,15 @@ public:

    void handlePart(const Poco::Net::MessageHeader & header, std::istream & stream)
    {
-        /// Буфер инициализируется здесь, а не в виртуальной функции initReadBuffer
+        /// The buffer is initialized here, not in the virtual function initReadBuffer
        read_buffer = std::make_unique<ReadBufferFromIStream>(stream);

-        /// Извлекаем коллекцию параметров из MessageHeader
+        /// Retrieve a collection of parameters from MessageHeader
        Poco::Net::NameValueCollection content;
        std::string label;
        Poco::Net::MessageHeader::splitParameters(header.get("Content-Disposition"), label, content);

-        /// Получаем параметры
+        /// Get parameters
        name = content.get("name", "_data");
        format = params.get(name + "_format", "TabSeparated");

@ -195,13 +195,13 @@ public:

        ExternalTableData data = getData(context);

-        /// Создаем таблицу
+        /// Create table
        NamesAndTypesListPtr columns = std::make_shared<NamesAndTypesList>(sample_block.getColumnsList());
        StoragePtr storage = StorageMemory::create(data.second, columns);
        context.addExternalTable(data.second, storage);
        BlockOutputStreamPtr output = storage->write(ASTPtr(), context.getSettingsRef());

-        /// Записываем данные
+        /// Write data
        data.first->readPrefix();
        output->writePrefix();
        while(Block block = data.first->read())
@ -210,7 +210,7 @@ public:
        output->writeSuffix();

        names.push_back(name);
-        /// Подготавливаемся к приему следующего файла, для этого очищаем всю полученную информацию
+        /// We are ready to receive the next file, for this we clear all the information received
        clean();
    }

--- a/dbms/src/Common/FileChecker.cpp
+++ b/dbms/src/Common/FileChecker.cpp
@ -136,7 +136,7 @@ void FileChecker::load(Map & map) const
        ReadBufferFromFile in(files_info_path);
        WriteBufferFromString out(content);

-        /// The JSON library does not support whitespace. We delete them. Ineffective.
+        /// The JSON library does not support whitespace. We delete them. Inefficient.
        while (!in.eof())
        {
            char c;
--- a/dbms/src/Common/FileChecker.h
+++ b/dbms/src/Common/FileChecker.h
@ -8,11 +8,11 @@
 namespace DB
 {

-/// хранит размеры всех столбцов, и может проверять не побились ли столбцы
+/// stores the sizes of all columns, and can check whether the columns are corrupted
 class FileChecker
 {
 private:
-    /// Имя файла -> размер.
+    /// File name -> size.
    using Map = std::map<std::string, size_t>;

 public:
@ -23,7 +23,7 @@ public:
    void update(const Poco::File & file);
    void update(const Files::const_iterator & begin, const Files::const_iterator & end);

-    /// Проверяем файлы, параметры которых указаны в sizes.json
+    /// Check the files whose parameters are specified in sizes.json
    bool check() const;

 private:
@ -35,7 +35,7 @@ private:
    std::string files_info_path;
    std::string tmp_files_info_path;

-    /// Данные из файла читаются лениво.
+    /// The data from the file is read lazily.
    Map map;
    bool initialized = false;

--- a/dbms/src/Common/HashTable/ClearableHashSet.h
+++ b/dbms/src/Common/HashTable/ClearableHashSet.h
@ -4,12 +4,12 @@
 #include <Common/HashTable/HashSet.h>


-/** Хеш-таблица, позволяющая очищать таблицу за O(1).
-  * Еще более простая, чем HashSet: Key и Mapped должны быть POD-типами.
+/** A hash table that allows you to clear the table in O(1).
+  * Even simpler than HashSet: Key and Mapped must be POD-types.
  *
-  * Вместо этого класса можно было бы просто использовать в HashSet в качестве ключа пару <версия, ключ>,
-  * но тогда таблица накапливала бы все ключи, которые в нее когда-либо складывали, и неоправданно росла.
-  * Этот класс идет на шаг дальше и считает ключи со старой версией пустыми местами в хеш-таблице.
+  * Instead of this class, you could just use the pair (version, key) in the HashSet as the key
+  * but then the table would accumulate all the keys that it ever stored, and it was unreasonably growing.
+  * This class goes a step further and considers the keys with the old version empty in the hash table.
  */


@ -17,11 +17,11 @@ struct ClearableHashSetState
 {
    UInt32 version = 1;

-    /// Сериализация, в бинарном и текстовом виде.
+    /// Serialization, in binary and text form.
    void write(DB::WriteBuffer & wb) const         { DB::writeBinary(version, wb); }
    void writeText(DB::WriteBuffer & wb) const     { DB::writeText(version, wb); }

-    /// Десериализация, в бинарном и текстовом виде.
+    /// Deserialization, in binary and text form.
    void read(DB::ReadBuffer & rb)                 { DB::readBinary(version, rb); }
    void readText(DB::ReadBuffer & rb)             { DB::readText(version, rb); }
 };
@ -38,10 +38,10 @@ struct ClearableHashTableCell : public BaseCell
    bool isZero(const State & state) const { return version != state.version; }
    static bool isZero(const Key & key, const State & state) { return false; }

-    /// Установить значение ключа в ноль.
+    /// Set the key value to zero.
    void setZero() { version = 0; }

-    /// Нужно ли хранить нулевой ключ отдельно (то есть, могут ли в хэш-таблицу вставить нулевой ключ).
+    /// Do I need to store the zero key separately (that is, can a zero key be inserted into the hash table).
    static constexpr bool need_zero_value_storage = false;

    ClearableHashTableCell() {}
--- a/dbms/src/Common/HashTable/Hash.h
+++ b/dbms/src/Common/HashTable/Hash.h
@ -3,12 +3,19 @@
 #include <Core/Types.h>


-/** Хэш функции, которые лучше чем тривиальная функция std::hash.
-  * (при агрегации по идентификатору посетителя, прирост производительности более чем в 5 раз)
+/** Hash functions that are better than the trivial function std::hash.
+  *
+  * Example: when we do aggregation by the visitor ID, the performance increase is more than 5 times.
+  * This is because of following reasons:
+  * - in Yandex, visitor identifier is an integer that has timestamp with seconds resolution in lower bits;
+  * - in typical implementation of standard library, hash function for integers is trivial and just use lower bits;
+  * - traffic is non-uniformly distributed across a day;
+  * - we are using open-addressing linear probing hash tables that are most critical to hash function quality,
+  *   and trivial hash function gives disasterous results.
  */

-/** Взято из MurmurHash.
-  * Быстрее, чем intHash32 при вставке в хэш-таблицу UInt64 -> UInt64, где ключ - идентификатор посетителя.
+/** Taken from MurmurHash. This is Murmur finalizer.
+  * Faster than intHash32 when inserting into the hash table UInt64 -> UInt64, where the key is the visitor ID.
  */
 inline DB::UInt64 intHash64(DB::UInt64 x)
 {
@ -21,21 +28,22 @@ inline DB::UInt64 intHash64(DB::UInt64 x)
    return x;
 }

-/** CRC32C является не очень качественной в роли хэш функции,
-  *  согласно avalanche и bit independence тестам, а также малым количеством бит,
-  *  но может вести себя хорошо при использовании в хэш-таблицах,
-  *  за счёт высокой скорости (latency 3 + 1 такт, througput 1 такт).
-  * Работает только при поддержке SSE 4.2.
-  * Используется asm вместо интринсика, чтобы не обязательно было собирать весь проект с -msse4.
+/** CRC32C is not very high-quality as a hash function,
+  *  according to avalanche and bit independence tests (see SMHasher software), as well as a small number of bits,
+  *  but can behave well when used in hash tables,
+  *  due to high speed (latency 3 + 1 clock cycle, throughput 1 clock cycle).
+  * Works only with SSE 4.2 support.
  */
+#if __SSE4_2__
+#include <nmmintrin.h>
+#endif
+
 inline DB::UInt64 intHashCRC32(DB::UInt64 x)
 {
-#if defined(__x86_64__)
-    DB::UInt64 crc = -1ULL;
-    asm("crc32q %[x], %[crc]\n" : [crc] "+r" (crc) : [x] "rm" (x));
-    return crc;
+#if __SSE4_2__
+    return _mm_crc32_u64(-1ULL, x);
 #else
-    /// На других платформах используем не обязательно CRC32. NOTE Это может сбить с толку.
+    /// On other platforms we do not have CRC32. NOTE This can be confusing.
    return intHash64(x);
 #endif
 }
@ -117,7 +125,7 @@ DEFINE_HASH(DB::Float64)
 #undef DEFINE_HASH


-/// Разумно использовать для UInt8, UInt16 при достаточном размере хэш-таблицы.
+/// It is reasonable to use for UInt8, UInt16 with sufficient hash table size.
 struct TrivialHash
 {
    template <typename T>
@ -128,17 +136,22 @@ struct TrivialHash
 };


-/** Сравнительно неплохая некриптографическая хэш функция из UInt64 в UInt32.
-  * Но хуже (и по качеству и по скорости), чем просто срезка intHash64.
-  * Взята отсюда: http://www.concentric.net/~ttwang/tech/inthash.htm
+/** A relatively good non-cryptographic hash function from UInt64 to UInt32.
+  * But worse (both in quality and speed) than just cutting intHash64.
+  * Taken from here: http://www.concentric.net/~ttwang/tech/inthash.htm
  *
-  * Немного изменена по сравнению с функцией по ссылке: сдвиги вправо случайно заменены на цикличесвие сдвиги вправо.
-  * Это изменение никак не повлияло на результаты тестов smhasher.
+  * Slightly changed compared to the function by link: shifts to the right are accidentally replaced by a cyclic shift to the right.
+  * This change did not affect the smhasher test results.
  *
-  * Рекомендуется для разных задач использовать разные salt.
-  * А то был случай, что в БД значения сортировались по хэшу (для некачественного псевдослучайного разбрасывания),
-  *  а в другом месте, в агрегатной функции, в хэш таблице использовался такой же хэш,
-  *  в результате чего, эта агрегатная функция чудовищно тормозила из-за коллизий.
+  * It is recommended to use different salt for different tasks.
+  * That was the case that in the database values were sorted by hash (for low-quality pseudo-random spread),
+  *  and in another place, in the aggregate function, the same hash was used in the hash table,
+  *  as a result, this aggregate function was monstrously slowed due to collisions.
+  *
+  * NOTE Salting is far from perfect, because it commutes with first steps of calculation.
+  *
+  * NOTE As mentioned, this function is slower than intHash64.
+  * But occasionaly, it is faster, when written in a loop and loop is vectorized.
  */
 template <DB::UInt64 salt>
 inline DB::UInt32 intHash32(DB::UInt64 key)
@ -156,7 +169,7 @@ inline DB::UInt32 intHash32(DB::UInt64 key)
 }


-/// Для контейнеров.
+/// For containers.
 template <typename T, DB::UInt64 salt = 0>
 struct IntHash32
 {
--- a/dbms/src/Common/HashTable/HashMap.h
+++ b/dbms/src/Common/HashTable/HashMap.h
@ -13,7 +13,7 @@

 struct NoInitTag {};

-/// Пара, которая не инициализирует элементы, если не нужно.
+/// A pair that does not initialize the elements, if not needed.
 template <typename First, typename Second>
 struct PairNoInit
 {
@ -60,18 +60,18 @@ struct HashMapCell
    bool isZero(const State & state) const { return isZero(value.first, state); }
    static bool isZero(const Key & key, const State & state) { return ZeroTraits::check(key); }

-    /// Установить значение ключа в ноль.
+    /// Set the key value to zero.
    void setZero() { ZeroTraits::set(value.first); }

-    /// Нужно ли хранить нулевой ключ отдельно (то есть, могут ли в хэш-таблицу вставить нулевой ключ).
+    /// Do I need to store the zero key separately (that is, can a zero key be inserted into the hash table).
    static constexpr bool need_zero_value_storage = true;

-    /// Является ли ячейка удалённой.
+    /// Whether the cell was deleted.
    bool isDeleted() const { return false; }

    void setMapped(const value_type & value_) { value.second = value_.second; }

-    /// Сериализация, в бинарном и текстовом виде.
+    /// Serialization, in binary and text form.
    void write(DB::WriteBuffer & wb) const
    {
        DB::writeBinary(value.first, wb);
@ -85,7 +85,7 @@ struct HashMapCell
        DB::writeDoubleQuoted(value.second, wb);
    }

-    /// Десериализация, в бинарном и текстовом виде.
+    /// Deserialization, in binary and text form.
    void read(DB::ReadBuffer & rb)
    {
        DB::readBinary(value.first, rb);
@ -141,19 +141,19 @@ public:
        bool inserted;
        this->emplace(x, it, inserted);

-        /** Может показаться, что инициализация не обязательна для POD-типов (или __has_trivial_constructor),
-          *  так как кусок памяти для хэш-таблицы изначально инициализирован нулями.
-          * Но, на самом деле, пустая ячейка может быть не инициализирована нулями в следующих случаях:
-          * - ZeroValueStorage (в нём зануляется только ключ);
-          * - после ресайза и переноса части ячеек в новую половину хэш-таблицы, у старых ячеек, тоже зануляется только ключ.
+        /** It may seem that initialization is not necessary for POD-types (or __has_trivial_constructor),
+          *  since the hash table memory is initially initialized with zeros.
+          * But, in fact, an empty cell may not be initialized with zeros in the following cases:
+          * - ZeroValueStorage (it only zeros the key);
+          * - after resizing and moving a part of the cells to the new half of the hash table, the old cells also have only the key to zero.
          *
-          * По производительности, разницы почти всегда нет, за счёт того, что it->second как правило присваивается сразу
-          *  после вызова operator[], и так как operator[] инлайнится, компилятор убирает лишнюю инициализацию.
+          * On performance, there is almost always no difference, due to the fact that it->second is usually assigned immediately
+          *  after calling `operator[]`, and since `operator[]` is inlined, the compiler removes unnecessary initialization.
          *
-          * Иногда из-за инициализации, производительность даже растёт. Это происходит в коде вида ++map[key].
-          * Когда мы делаем инициализацию, то для новых ячеек, достаточно сразу сделать store 1.
-          * А если бы мы не делали инициализацию, то не смотря на то, что в ячейке был ноль,
-          *  компилятор не может об этом догадаться, и генерирует код load, increment, store.
+          * Sometimes due to initialization, the performance even grows. This occurs in code like `++map[key]`.
+          * When we do the initialization, for new cells, it's enough to make `store 1` right away.
+          * And if we did not initialize, then even though there was zero in the cell,
+          *  the compiler can not guess about this, and generates the `load`, `increment`, `store` code.
          */
        if (inserted)
            new(&it->second) mapped_type();
--- a/dbms/src/Common/HashTable/HashTable.h
+++ b/dbms/src/Common/HashTable/HashTable.h
@ -44,27 +44,27 @@ namespace ErrorCodes
 }


-/** Состояние хэш-таблицы, которое влияет на свойства её ячеек.
-  * Используется в качестве параметра шаблона.
-  * Например, существует реализация мгновенно-очищаемой хэш-таблицы - ClearableHashMap.
-  *  Для неё, в каждой ячейке хранится номер версии, и в самой хэш-таблице - текущая версия.
-  *  При очистке, просто увеличивается текущая версия; все ячейки с несовпадающей версией считаются пустыми.
-  * Другой пример: для приближённого рассчёта количества уникальных посетителей, есть хэш-таблица UniquesHashSet.
-  *  В ней имеется понятие "степень". При каждом переполнении, ячейки с ключами, не делящимися на соответствующую степень двух, удаляются.
+/** The state of the hash table that affects the properties of its cells.
+  * Used as a template parameter.
+  * For example, there is an implementation of an instantly clearable hash table - ClearableHashMap.
+  * For it, each cell holds the version number, and in the hash table itself is the current version.
+  *  When clearing, the current version simply increases; All cells with a mismatching version are considered empty.
+  *  Another example: for an approximate calculation of the number of unique visitors, there is a hash table for UniquesHashSet.
+  *  It has the concept of "degree". At each overflow, cells with keys that do not divide by the corresponding power of the two are deleted.
  */
 struct HashTableNoState
 {
-    /// Сериализация, в бинарном и текстовом виде.
+    /// Serialization, in binary and text form.
    void write(DB::WriteBuffer & wb) const         {}
    void writeText(DB::WriteBuffer & wb) const     {}

-    /// Десериализация, в бинарном и текстовом виде.
+    /// Deserialization, in binary and text form.
    void read(DB::ReadBuffer & rb)                 {}
    void readText(DB::ReadBuffer & rb)             {}
 };


-/// Эти функции могут быть перегружены для пользовательских типов.
+/// These functions can be overloaded for custom types.
 namespace ZeroTraits
 {

@ -77,11 +77,11 @@ void set(T & x) { x = 0; }
 };


-/** Compile-time интерфейс ячейки хэш-таблицы.
-  * Разные ячейки используются для реализации разных хэш-таблиц.
-  * Ячейка должна содержать ключ.
-  * Также может содержать значение и произвольные дополнительные данные
-  *  (пример: запомненное значение хэш-функции; номер версии для ClearableHashMap).
+/** Compile-time interface for cell of the hash table.
+  * Different cell types are used to implement different hash tables.
+  * The cell must contain a key.
+  * It can also contain a value and arbitrary additional data
+  *  (example: the stored hash value; version number for ClearableHashMap).
  */
 template <typename Key, typename Hash, typename TState = HashTableNoState>
 struct HashTableCell
@ -93,89 +93,89 @@ struct HashTableCell

    HashTableCell() {}

-    /// Создать ячейку с заданным ключём / ключём и значением.
+    /// Create a cell with the given key / key and value.
    HashTableCell(const Key & key_, const State & state) : key(key_) {}
-///    HashTableCell(const value_type & value_, const State & state) : key(value_) {}
+/// HashTableCell(const value_type & value_, const State & state) : key(value_) {}

-    /// Получить то, что будет value_type контейнера.
+    /// Get what the value_type of the container will be.
    value_type & getValue()             { return key; }
    const value_type & getValue() const { return key; }

-    /// Получить ключ.
+    /// Get the key.
    static Key & getKey(value_type & value)             { return value; }
    static const Key & getKey(const value_type & value) { return value; }

-    /// Равны ли ключи у ячеек.
+    /// Are the keys at the cells equal?
    bool keyEquals(const Key & key_) const { return key == key_; }
    bool keyEquals(const Key & key_, size_t hash_) const { return key == key_; }

-    /// Если ячейка умеет запоминать в себе значение хэш-функции, то запомнить его.
+    /// If the cell can remember the value of the hash function, then remember it.
    void setHash(size_t hash_value) {}

-    /// Если ячейка умеет запоминать в себе значение хэш-функции, то вернуть запомненное значение.
-    /// Оно должно быть хотя бы один раз вычислено до этого.
-    /// Если запоминание значения хэш-функции не предусмотрено, то просто вычислить хэш.
+    /// If the cell can store the hash value in itself, then return the stored value.
+    /// It must be at least once calculated before.
+    /// If storing the hash value is not provided, then just compute the hash.
    size_t getHash(const Hash & hash) const { return hash(key); }

-    /// Является ли ключ нулевым. В основном буфере, ячейки с нулевым ключём, считаются пустыми.
-    /// Если нулевые ключи могут быть вставлены в таблицу, то ячейка для нулевого ключа хранится отдельно, не в основном буфере.
-    /// Нулевые ключи должны быть такими, что занулённый кусок памяти представляет собой нулевой ключ.
+    /// Whether the key is zero. In the main buffer, cells with a zero key are considered empty.
+    /// If zero keys can be inserted into the table, then the cell for the zero key is stored separately, not in the main buffer.
+    /// Zero keys must be such that the zeroed-down piece of memory is a zero key.
    bool isZero(const State & state) const { return isZero(key, state); }
    static bool isZero(const Key & key, const State & state) { return ZeroTraits::check(key); }

-    /// Установить значение ключа в ноль.
+    /// Set the key value to zero.
    void setZero() { ZeroTraits::set(key); }

-    /// Нужно ли хранить нулевой ключ отдельно (то есть, могут ли в хэш-таблицу вставить нулевой ключ).
+    /// Do the hash table need to store the zero key separately (that is, can a zero key be inserted into the hash table).
    static constexpr bool need_zero_value_storage = true;

-    /// Является ли ячейка удалённой.
+    /// Whether the cell is deleted.
    bool isDeleted() const { return false; }

-    /// Установить отображаемое значение, если есть (для HashMap), в соответствующиее из value.
+    /// Set the mapped value, if any (for HashMap), to the corresponding `value`.
    void setMapped(const value_type & value) {}

-    /// Сериализация, в бинарном и текстовом виде.
+    /// Serialization, in binary and text form.
    void write(DB::WriteBuffer & wb) const         { DB::writeBinary(key, wb); }
    void writeText(DB::WriteBuffer & wb) const     { DB::writeDoubleQuoted(key, wb); }

-    /// Десериализация, в бинарном и текстовом виде.
+    /// Deserialization, in binary and text form.
    void read(DB::ReadBuffer & rb)        { DB::readBinary(key, rb); }
    void readText(DB::ReadBuffer & rb)    { DB::writeDoubleQuoted(key, rb); }
 };


-/** Определяет размер хэш-таблицы, а также когда и во сколько раз её надо ресайзить.
+/** Determines the size of the hash table, and when and how much it should be resized.
  */
 template <size_t initial_size_degree = 8>
 struct HashTableGrower
 {
-    /// Состояние этой структуры достаточно, чтобы получить размер буфера хэш-таблицы.
+    /// The state of this structure is enough to get the buffer size of the hash table.

    UInt8 size_degree = initial_size_degree;

-    /// Размер хэш-таблицы в ячейках.
+    /// The size of the hash table in the cells.
    size_t bufSize() const               { return 1 << size_degree; }

    size_t maxFill() const               { return 1 << (size_degree - 1); }
    size_t mask() const                  { return bufSize() - 1; }

-    /// Из значения хэш-функции получить номер ячейки в хэш-таблице.
+    /// From the hash value, get the cell number in the hash table.
    size_t place(size_t x) const         { return x & mask(); }

-    /// Следующая ячейка в цепочке разрешения коллизий.
+    /// The next cell in the collision resolution chain.
    size_t next(size_t pos) const        { ++pos; return pos & mask(); }

-    /// Является ли хэш-таблица достаточно заполненной. Нужно увеличить размер хэш-таблицы, или удалить из неё что-нибудь ненужное.
+    /// Whether the hash table is sufficiently full. You need to increase the size of the hash table, or remove something unnecessary from it.
    bool overflow(size_t elems) const    { return elems > maxFill(); }

-    /// Увеличить размер хэш-таблицы.
+    /// Increase the size of the hash table.
    void increaseSize()
    {
        size_degree += size_degree >= 23 ? 1 : 2;
    }

-    /// Установить размер буфера по количеству элементов хэш-таблицы. Используется при десериализации хэш-таблицы.
+    /// Set the buffer size by the number of elements in the hash table. Used when deserializing a hash table.
    void set(size_t num_elems)
    {
        size_degree = num_elems <= 1
@ -192,17 +192,17 @@ struct HashTableGrower
 };


-/** При использовании в качестве Grower-а, превращает хэш-таблицу в что-то типа lookup-таблицы.
-  * Остаётся неоптимальность - в ячейках хранятся ключи.
-  * Также компилятору не удаётся полностью удалить код хождения по цепочке разрешения коллизий, хотя он не нужен.
-  * TODO Сделать полноценную lookup-таблицу.
+/** When used as a Grower, it turns a hash table into something like a lookup table.
+  * It remains non-optimal - the cells store the keys.
+  * Also, the compiler can not completely remove the code of passing through the collision resolution chain, although it is not needed.
+  * TODO Make a proper lookup table.
  */
 template <size_t key_bits>
 struct HashTableFixedGrower
 {
    size_t bufSize() const               { return 1 << key_bits; }
    size_t place(size_t x) const         { return x; }
-    /// Тут можно было бы написать __builtin_unreachable(), но компилятор не до конца всё оптимизирует, и получается менее эффективно.
+    /// You could write __builtin_unreachable(), but the compiler does not optimize everything, and it turns out less efficiently.
    size_t next(size_t pos) const        { return pos + 1; }
    bool overflow(size_t elems) const    { return false; }

@ -212,7 +212,7 @@ struct HashTableFixedGrower
 };


-/** Если нужно хранить нулевой ключ отдельно - место для его хранения. */
+/** If you want to store the zero key separately - a place to store it. */
 template <bool need_zero_value_storage, typename Cell>
 struct ZeroValueStorage;

@ -271,15 +271,15 @@ protected:
    using Self = HashTable<Key, Cell, Hash, Grower, Allocator>;
    using cell_type = Cell;

-    size_t m_size = 0;        /// Количество элементов
-    Cell * buf;                /// Кусок памяти для всех элементов кроме элемента с ключём 0.
+    size_t m_size = 0;        /// Amount of elements
+    Cell * buf;               /// A piece of memory for all elements except the element with zero key.
    Grower grower;

 #ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
    mutable size_t collisions = 0;
 #endif

-    /// Найти ячейку с тем же ключём или пустую ячейку, начиная с заданного места и далее по цепочке разрешения коллизий.
+    /// Find a cell with the same key or an empty cell, starting from the specified position and further along the collision resolution chain.
    size_t ALWAYS_INLINE findCell(const Key & x, size_t hash_value, size_t place_value) const
    {
        while (!buf[place_value].isZero(*this) && !buf[place_value].keyEquals(x, hash_value))
@ -293,7 +293,7 @@ protected:
        return place_value;
    }

-    /// Найти пустую ячейку, начиная с заданного места и далее по цепочке разрешения коллизий.
+    /// Find an empty cell, starting with the specified position and further along the collision resolution chain.
    size_t ALWAYS_INLINE findEmptyCell(const Key & x, size_t hash_value, size_t place_value) const
    {
        while (!buf[place_value].isZero(*this))
@ -323,7 +323,7 @@ protected:
    }


-    /// Увеличить размер буфера.
+    /// Increase the size of the buffer.
    void resize(size_t for_num_elems = 0, size_t for_buf_size = 0)
    {
 #ifdef DBMS_HASH_MAP_DEBUG_RESIZES
@ -332,10 +332,10 @@ protected:

        size_t old_size = grower.bufSize();

-        /** Чтобы в случае исключения, объект остался в корректном состоянии,
-          *  изменение переменной grower (определяющией размер буфера хэш-таблицы)
-          *  откладываем на момент после реального изменения буфера.
-          * Временная переменная new_grower используется, чтобы определить новый размер.
+        /** In case of exception for the object to remain in the correct state,
+          *  changing the variable `grower` (which determines the buffer size of the hash table)
+          *  is postponed for a moment after a real buffer change.
+          * The temporary variable `new_grower` is used to determine the new size.
          */
        Grower new_grower = grower;

@ -354,29 +354,29 @@ protected:
        else
            new_grower.increaseSize();

-        /// Расширим пространство.
+        /// Expand the space.
        buf = reinterpret_cast<Cell *>(Allocator::realloc(buf, getBufferSizeInBytes(), new_grower.bufSize() * sizeof(Cell)));
        grower = new_grower;

-        /** Теперь некоторые элементы может потребоваться переместить на новое место.
-          * Элемент может остаться на месте, или переместиться в новое место "справа",
-          *  или переместиться левее по цепочке разрешения коллизий, из-за того, что элементы левее него были перемещены в новое место "справа".
+        /** Now some items may need to be moved to a new location.
+          * The element can stay in place, or move to a new location "on the right",
+          *  or move to the left of the collision resolution chain, because the elements to the left of it have been moved to the new "right" location.
          */
        size_t i = 0;
        for (; i < old_size; ++i)
            if (!buf[i].isZero(*this) && !buf[i].isDeleted())
-                reinsert(buf[i]);
+                reinsert(buf[i], buf[i].getHash(*this));

-        /** Также имеется особый случай:
-          *    если элемент должен был быть в конце старого буфера,                    [        x]
-          *    но находится в начале из-за цепочки разрешения коллизий,                [o       x]
-          *    то после ресайза, он сначала снова окажется не на своём месте,          [        xo        ]
-          *    и для того, чтобы перенести его куда надо,
-          *    надо будет после переноса всех элементов из старой половинки            [         o   x    ]
-          *    обработать ещё хвостик из цепочки разрешения коллизий сразу после неё   [        o    x    ]
+        /** There is also a special case:
+          *    if the element was to be at the end of the old buffer,                  [        x]
+          *    but is at the beginning because of the collision resolution chain,      [o       x]
+          *    then after resizing, it will first be out of place again,               [        xo        ]
+          *    and in order to transfer it where necessary,
+          *    after transferring all the elements from the old halves you need to     [         o   x    ]
+          *    process tail from the collision resolution chain immediately after it   [        o    x    ]
          */
        for (; !buf[i].isZero(*this) && !buf[i].isDeleted(); ++i)
-            reinsert(buf[i]);
+            reinsert(buf[i], buf[i].getHash(*this));

 #ifdef DBMS_HASH_MAP_DEBUG_RESIZES
        watch.stop();
@ -387,30 +387,30 @@ protected:
    }


-    /** Вставить в новый буфер значение, которое было в старом буфере.
-      * Используется при увеличении размера буфера.
+    /** Paste into the new buffer the value that was in the old buffer.
+      * Used when increasing the buffer size.
      */
-    void reinsert(Cell & x)
+    void reinsert(Cell & x, size_t hash_value)
    {
-        size_t hash_value = x.getHash(*this);
        size_t place_value = grower.place(hash_value);

-        /// Если элемент на своём месте.
+        /// If the element is in its place.
        if (&x == &buf[place_value])
            return;

-        /// Вычисление нового места, с учётом цепочки разрешения коллизий.
+        /// Compute a new location, taking into account the collision resolution chain.
        place_value = findCell(Cell::getKey(x.getValue()), hash_value, place_value);

-        /// Если элемент остался на своём месте в старой цепочке разрешения коллизий.
+        /// If the item remains in its place in the old collision resolution chain.
        if (!buf[place_value].isZero(*this))
            return;

-        /// Копирование на новое место и зануление старого.
+        /// Copy to a new location and zero the old one.
+        x.setHash(hash_value);
        memcpy(&buf[place_value], &x, sizeof(x));
        x.setZero();

-        /// Потом на старое место могут переместиться элементы, которые раньше были в коллизии с этим.
+        /// Then the elements that previously were in collision with this can move to the old place.
    }


@ -611,10 +611,10 @@ protected:
    iterator iteratorToZero()                         { return iteratorTo(this->zeroValue()); }


-    /// Если ключ нулевой - вставить его в специальное место и вернуть true.
-    bool ALWAYS_INLINE emplaceIfZero(Key x, iterator & it, bool & inserted)
+    /// If the key is zero, insert it into a special place and return true.
+    bool ALWAYS_INLINE emplaceIfZero(Key x, iterator & it, bool & inserted, size_t hash_value)
    {
-        /// Если утверждается, что нулевой ключ не могут вставить в таблицу.
+        /// If it is claimed that the zero key can not be inserted into the table.
        if (!Cell::need_zero_value_storage)
            return false;

@ -625,7 +625,7 @@ protected:
            {
                ++m_size;
                this->setHasZero();
-                it.ptr->setHash(hash(x));
+                it.ptr->setHash(hash_value);
                inserted = true;
            }
            else
@ -638,7 +638,7 @@ protected:
    }


-    /// Только для ненулевых ключей. Найти нужное место, вставить туда ключ, если его ещё нет, вернуть итератор на ячейку.
+    /// Only for non-zero keys. Find the right place, insert the key there, if it does not already exist. Set iterator to the cell in output parameter.
    void ALWAYS_INLINE emplaceNonZero(Key x, iterator & it, bool & inserted, size_t hash_value)
    {
        size_t place_value = findCell(x, hash_value, grower.place(hash_value));
@ -664,9 +664,9 @@ protected:
            }
            catch (...)
            {
-                /** Если этого не делать, то будут проблемы.
-                  * Ведь останется ключ, но неинициализированное mapped-значение,
-                  *  у которого, возможно, даже нельзя вызвать деструктор.
+                /** If we have not resized successfully, then there will be problems.
+                  * There remains a key, but uninitialized mapped-value,
+                  *  which, perhaps, can not even be called a destructor.
                  */
                --m_size;
                buf[place_value].setZero();
@ -679,13 +679,14 @@ protected:


 public:
-    /// Вставить значение. В случае хоть сколько-нибудь сложных значений, лучше используйте функцию emplace.
+    /// Insert a value. In the case of any more complex values, it is better to use the `emplace` function.
    std::pair<iterator, bool> ALWAYS_INLINE insert(const value_type & x)
    {
        std::pair<iterator, bool> res;

-        if (!emplaceIfZero(Cell::getKey(x), res.first, res.second))
-            emplaceNonZero(Cell::getKey(x), res.first, res.second, hash(Cell::getKey(x)));
+        size_t hash_value = hash(Cell::getKey(x));
+        if (!emplaceIfZero(Cell::getKey(x), res.first, res.second, hash_value))
+            emplaceNonZero(Cell::getKey(x), res.first, res.second, hash_value);

        if (res.second)
            res.first.ptr->setMapped(x);
@ -694,14 +695,21 @@ public:
    }


-    /** Вставить ключ,
-      * вернуть итератор на позицию, которую можно использовать для placement new значения,
-      * а также флаг - был ли вставлен новый ключ.
+    /// Reinsert node pointed to by iterator
+    void ALWAYS_INLINE reinsert(iterator & it, size_t hash_value)
+    {
+        reinsert(*it.getPtr(), hash_value);
+    }
+
+
+    /** Insert the key,
+      * return an iterator to a position that can be used for `placement new` of value,
+      * as well as the flag - whether a new key was inserted.
      *
-      * Вы обязаны сделать placement new значения, если был вставлен новый ключ,
-      * так как при уничтожении хэш-таблицы для него будет вызываться деструктор!
+      * You have to make `placement new` of value if you inserted a new key,
+      * since when destroying a hash table, it will call the destructor!
      *
-      * Пример использования:
+      * Example usage:
      *
      * Map::iterator it;
      * bool inserted;
@ -711,20 +719,21 @@ public:
      */
    void ALWAYS_INLINE emplace(Key x, iterator & it, bool & inserted)
    {
-        if (!emplaceIfZero(x, it, inserted))
-            emplaceNonZero(x, it, inserted, hash(x));
-    }
-
-
-    /// То же самое, но с заранее вычисленным значением хэш-функции.
-    void ALWAYS_INLINE emplace(Key x, iterator & it, bool & inserted, size_t hash_value)
-    {
-        if (!emplaceIfZero(x, it, inserted))
+        size_t hash_value = hash(x);
+        if (!emplaceIfZero(x, it, inserted, hash_value))
            emplaceNonZero(x, it, inserted, hash_value);
    }


-    /// Скопировать ячейку из другой хэш-таблицы. Предполагается, что ячейка не нулевая, а также, что такого ключа в таблице ещё не было.
+    /// Same, but with a precalculated value of hash function.
+    void ALWAYS_INLINE emplace(Key x, iterator & it, bool & inserted, size_t hash_value)
+    {
+        if (!emplaceIfZero(x, it, inserted, hash_value))
+            emplaceNonZero(x, it, inserted, hash_value);
+    }
+
+
+    /// Copy the cell from another hash table. It is assumed that the cell is not zero, and also that there was no such key in the table yet.
    void ALWAYS_INLINE insertUniqueNonZero(const Cell * cell, size_t hash_value)
    {
        size_t place_value = findEmptyCell(cell->getKey(cell->getValue()), hash_value, grower.place(hash_value));
@ -903,8 +912,8 @@ public:
        memset(buf, 0, grower.bufSize() * sizeof(*buf));
    }

-    /// После выполнения этой функции, таблицу можно только уничтожить,
-    ///  а также можно использовать методы size, empty, begin, end.
+    /// After executing this function, the table can only be destroyed,
+    ///  and also you can use the methods `size`, `empty`, `begin`, `end`.
    void clearAndShrink()
    {
        destroyElements();
--- a/dbms/src/Common/HashTable/SmallTable.h
+++ b/dbms/src/Common/HashTable/SmallTable.h
@ -3,15 +3,15 @@
 #include <Common/HashTable/HashMap.h>


-/** Замена хэш-таблицы для маленького количества (единицы) ключей.
-  * Реализована в виде массива с линейным поиском.
-  * Массив расположен внутри объекта.
-  * Интерфейс является подмножеством интерфейса HashTable.
+/** Replacement of the hash table for a small number (<10) of keys.
+  * Implemented as an array with linear search.
+  * The array is located inside the object.
+  * The interface is a subset of the HashTable interface.
  *
-  * Вставка возможна только если метод full возвращает false.
-  * При неизвестном количестве различных ключей,
-  *  вы должны проверять, не заполнена ли таблица,
-  *  и делать fallback в этом случае (например, использовать полноценную хэш-таблицу).
+  * Insert is possible only if the `full` method returns false.
+  * With an unknown number of different keys,
+  *  you should check if the table is not full,
+  *  and do a `fallback` in this case (for example, use a real hash table).
  */

 template
@ -32,11 +32,11 @@ protected:
    using Self = SmallTable<Key, Cell, capacity>;
    using cell_type = Cell;

-    size_t m_size = 0;        /// Количество элементов.
-    Cell buf[capacity];        /// Кусок памяти для всех элементов.
+    size_t m_size = 0;        /// Amount of elements.
+    Cell buf[capacity];       /// A piece of memory for all elements.


-    /// Найти ячейку с тем же ключём или пустую ячейку, начиная с заданного места и далее по цепочке разрешения коллизий.
+    /// Find a cell with the same key or an empty cell, starting from the specified position and then by the collision resolution chain.
    const Cell * ALWAYS_INLINE findCell(const Key & x) const
    {
        const Cell * it = buf;
@ -188,8 +188,8 @@ protected:


 public:
-    /** Таблица переполнена.
-      * В переполненную таблицу ничего нельзя вставлять.
+    /** The table is full.
+      * You can not insert anything into the full table.
      */
    bool full()
    {
@ -197,7 +197,7 @@ public:
    }


-    /// Вставить значение. В случае хоть сколько-нибудь сложных значений, лучше используйте функцию emplace.
+    /// Insert the value. In the case of any more complex values, it is better to use the `emplace` function.
    std::pair<iterator, bool> ALWAYS_INLINE insert(const value_type & x)
    {
        std::pair<iterator, bool> res;
@ -211,14 +211,14 @@ public:
    }


-    /** Вставить ключ,
-      * вернуть итератор на позицию, которую можно использовать для placement new значения,
-      * а также флаг - был ли вставлен новый ключ.
+    /** Insert the key,
+      * return an iterator to a position that can be used for `placement new` of value,
+      * as well as the flag - whether a new key was inserted.
      *
-      * Вы обязаны сделать placement new значения, если был вставлен новый ключ,
-      * так как при уничтожении хэш-таблицы для него будет вызываться деструктор!
+      * You have to make `placement new` of value if you inserted a new key,
+      * since when destroying a hash table, a destructor will be called for it!
      *
-      * Пример использования:
+      * Example usage:
      *
      * Map::iterator it;
      * bool inserted;
@ -239,7 +239,7 @@ public:
    }


-    /// То же самое, но вернуть false, если переполнено.
+    /// Same, but return false if it's full.
    bool ALWAYS_INLINE tryEmplace(Key x, iterator & it, bool & inserted)
    {
        Cell * res = findCell(x);
@ -257,7 +257,7 @@ public:
    }


-    /// Скопировать ячейку из другой хэш-таблицы. Предполагается, что такого ключа в таблице ещё не было.
+    /// Copy the cell from another hash table. It is assumed that there was no such key in the table yet.
    void ALWAYS_INLINE insertUnique(const Cell * cell)
    {
        memcpy(&buf[m_size], cell, sizeof(*cell));
--- a/dbms/src/Common/HashTable/TwoLevelHashTable.h
+++ b/dbms/src/Common/HashTable/TwoLevelHashTable.h
@ -3,21 +3,21 @@
 #include <Common/HashTable/HashTable.h>


-/** Двухуровневая хэш-таблица.
-  * Представляет собой 256 (или 1 << BITS_FOR_BUCKET) маленьких хэш-таблиц (bucket-ов первого уровня).
-  * Для определения, какую из них использовать, берётся один из байтов хэш-функции.
+/** Two-level hash table.
+  * Represents 256 (or 1 << BITS_FOR_BUCKET) small hash tables (buckets of the first level).
+  * To determine which one to use, one of the bytes of the hash function is taken.
  *
-  * Обычно работает чуть-чуть медленнее простой хэш-таблицы.
-  * Тем не менее, обладает преимуществами в некоторых случаях:
-  * - если надо мерджить две хэш-таблицы вместе, то это можно легко распараллелить по bucket-ам;
-  * - лаг при ресайзах размазан, так как маленькие хэш-таблицы ресайзятся по-отдельности;
-  * - по идее, ресайзы кэш-локальны в большем диапазоне размеров.
+  * Usually works a little slower than a simple hash table.
+  * However, it has advantages in some cases:
+  * - if you need to merge two hash tables together, then you can easily parallelize it by buckets;
+  * - delay during resizes is amortized, since the small hash tables will be resized separately;
+  * - in theory, resizes are cache-local in a larger range of sizes.
  */

 template <size_t initial_size_degree = 8>
 struct TwoLevelHashTableGrower : public HashTableGrower<initial_size_degree>
 {
-    /// Увеличить размер хэш-таблицы.
+    /// Increase the size of the hash table.
    void increaseSize()
    {
        this->size_degree += this->size_degree >= 15 ? 1 : 2;
@ -52,7 +52,7 @@ public:

    size_t hash(const Key & x) const { return Hash::operator()(x); }

-    /// NOTE Плохо для хэш-таблиц больше чем на 2^32 ячеек.
+    /// NOTE Bad for hash tables with more than 2^32 cells.
    static size_t getBucketFromHash(size_t hash_value) { return (hash_value >> (32 - BITS_FOR_BUCKET)) & MAX_BUCKET; }

 protected:
@ -89,13 +89,13 @@ public:

    TwoLevelHashTable() {}

-    /// Скопировать данные из другой (обычной) хэш-таблицы. У неё должна быть такая же хэш-функция.
+    /// Copy the data from another (normal) hash table. It should have the same hash function.
    template <typename Source>
    TwoLevelHashTable(const Source & src)
    {
        typename Source::const_iterator it = src.begin();

-        /// Предполагается, что нулевой ключ (хранящийся отдельно) при итерировании идёт первым.
+        /// It is assumed that the zero key (stored separately) is first in iteration order.
        if (it != src.end() && it.getPtr()->isZero(src))
        {
            insert(*it);
@ -205,7 +205,7 @@ public:
    iterator end()                     { return { this, MAX_BUCKET, impls[MAX_BUCKET].end() }; }


-    /// Вставить значение. В случае хоть сколько-нибудь сложных значений, лучше используйте функцию emplace.
+    /// Insert a value. In the case of any more complex values, it is better to use the `emplace` function.
    std::pair<iterator, bool> ALWAYS_INLINE insert(const value_type & x)
    {
        size_t hash_value = hash(Cell::getKey(x));
@ -220,14 +220,14 @@ public:
    }


-    /** Вставить ключ,
-      * вернуть итератор на позицию, которую можно использовать для placement new значения,
-      * а также флаг - был ли вставлен новый ключ.
+    /** Insert the key,
+      * return an iterator to a position that can be used for `placement new` of value,
+      * as well as the flag - whether a new key was inserted.
      *
-      * Вы обязаны сделать placement new значения, если был вставлен новый ключ,
-      * так как при уничтожении хэш-таблицы для него будет вызываться деструктор!
+      * You have to make `placement new` values if you inserted a new key,
+      * since when destroying a hash table, the destructor will be invoked for it!
      *
-      * Пример использования:
+      * Example usage:
      *
      * Map::iterator it;
      * bool inserted;
@ -242,7 +242,7 @@ public:
    }


-    /// То же самое, но с заранее вычисленным значением хэш-функции.
+    /// Same, but with a precalculated values of hash function.
    void ALWAYS_INLINE emplace(Key x, iterator & it, bool & inserted, size_t hash_value)
    {
        size_t buck = getBucketFromHash(hash_value);
--- a/dbms/src/Common/HyperLogLogBiasEstimator.h
+++ b/dbms/src/Common/HyperLogLogBiasEstimator.h
@ -7,10 +7,10 @@
 #include <tuple>
 #include <type_traits>

-/** Этот класс предоставляет способ, чтобы оценить погрешность результата применения алгоритма HyperLogLog.
-  * Эмирические наблюдения показывают, что большие погрешности возникают при E < 5 * 2^precision, где
-  * E - возвращаемое значение алгоритмом HyperLogLog, и precision - параметр точности HyperLogLog.
-  * См. "HyperLogLog in Practice: Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm".
+/** This class provides a way to evaluate the error in the result of applying the HyperLogLog algorithm.
+  * Empirical observations show that large errors occur at E < 5 * 2^precision, where
+  * E is the return value of the HyperLogLog algorithm, and `precision` is the HyperLogLog precision parameter.
+  * See "HyperLogLog in Practice: Algorithmic Engineering of a State of the Art Cardinality Estimation Algorithm".
  * (S. Heule et al., Proceedings of the EDBT 2013 Conference).
  */
 template <typename BiasData>
@ -22,14 +22,14 @@ public:
        return false;
    }

-    /// Предельное количество уникальных значений до которого должна примениться поправка
-    /// из алгоритма LinearCounting.
+    /// Maximum number of unique values to which the correction should apply
+    /// from the LinearCounting algorithm.
    static double getThreshold()
    {
        return BiasData::getThreshold();
    }

-    /// Вернуть оценку погрешности.
+    /// Return the error estimate.
    static double getBias(double raw_estimate)
    {
        const auto & estimates = BiasData::getRawEstimates();
@ -52,7 +52,7 @@ public:
        }
        else
        {
-            /// Получаем оценку погрешности путём линейной интерполяции.
+            /// We get the error estimate by linear interpolation.
            size_t index = std::distance(estimates.begin(), it);

            double estimate1 = estimates[index - 1];
@ -60,7 +60,7 @@ public:

            double bias1 = biases[index - 1];
            double bias2 = biases[index];
-            /// Предполагается, что условие estimate1 < estimate2 всегда выполнено.
+            /// It is assumed that the estimate1 < estimate2 condition is always satisfied.
            double slope = (bias2 - bias1) / (estimate2 - estimate1);

            return bias1 + slope * (raw_estimate - estimate1);
@ -68,7 +68,7 @@ public:
    }

 private:
-    /// Статические проверки.
+    /// Static checks.
    using TRawEstimatesRef = decltype(BiasData::getRawEstimates());
    using TRawEstimates = typename std::remove_reference<TRawEstimatesRef>::type;

@ -82,10 +82,10 @@ private:
                  "Bias estimator has inconsistent data");
 };

-/** Тривиальный случай HyperLogLogBiasEstimator: употребляется, если не хотим исправить
-  * погрешность. Это имеет смысль при маленьких значениях параметра точности, например 5 или 12.
-  * Тогда применяются поправки из оригинальной версии алгоритма HyperLogLog.
-  * См. "HyperLogLog: The analysis of a near-optimal cardinality estimation algorithm"
+/** Trivial case of HyperLogLogBiasEstimator: used if we do not want to fix
+  * error. This has meaning for small values of the accuracy parameter, for example 5 or 12.
+  * Then the corrections from the original version of the HyperLogLog algorithm are applied.
+  * See "HyperLogLog: The analysis of a near-optimal cardinality estimation algorithm"
  * (P. Flajolet et al., AOFA '07: Proceedings of the 2007 International Conference on Analysis
  * of Algorithms)
  */
--- a/dbms/src/Common/HyperLogLogWithSmallSetOptimization.h
+++ b/dbms/src/Common/HyperLogLogWithSmallSetOptimization.h
@ -9,10 +9,10 @@ namespace DB
 {


-/** Для маленького количества ключей - массив фиксированного размера "на стеке".
-  * Для большого - выделяется HyperLogLog.
-  * Смотрите также более практичную реализацию в CombinedCardinalityEstimator.h,
-  *  где используется также хэш-таблица для множеств среднего размера.
+/** For a small number of keys - an array of fixed size "on the stack".
+  * For large, HyperLogLog is allocated.
+  * See also the more practical implementation in CombinedCardinalityEstimator.h,
+  *  where a hash table is also used for medium-sized sets.
  */
 template
 <
@ -39,7 +39,7 @@ private:
    {
        CurrentMemoryTracker::alloc(sizeof(large));

-        /// На время копирования данных из tiny, устанавливать значение large ещё нельзя (иначе оно перезатрёт часть данных).
+        /// At the time of copying data from `tiny`, setting the value of `large` is still not possible (otherwise it will overwrite some data).
        Large * tmp_large = new Large;

        for (const auto & x : small)
@ -99,7 +99,7 @@ public:
        }
    }

-    /// Можно вызывать только для пустого объекта.
+    /// You can only call for an empty object.
    void read(DB::ReadBuffer & in)
    {
        bool is_large;
--- a/dbms/src/Common/Increment.h
+++ b/dbms/src/Common/Increment.h
@ -3,24 +3,24 @@
 #include <Common/CounterInFile.h>


-/** Позволяет получать авто-инкрементное число, храня его в файле.
-  * Предназначен для редких вызовов (не рассчитан на производительность).
+/** Allows to get an auto-increment number, storing it in a file.
+  * Intended for rare calls (not designed for performance).
  */
 class Increment
 {
 public:
-    /// path - имя файла, включая путь
+    /// path - the name of the file, including the path
    Increment(const std::string & path_) : counter(path_) {}

-    /** Получить следующее число.
-      * Если параметр create_if_need не установлен в true, то
-      *  в файле уже должно быть записано какое-нибудь число (если нет - создайте файл вручную с нулём).
+    /** Get the next number.
+      * If the `create_if_need` parameter is not set to true, then
+      *  the file must already have a number written (if not - create the file manually with zero).
      *
-      * Для защиты от race condition-ов между разными процессами, используются файловые блокировки.
-      * (Но при первом создании файла race condition возможен, так что лучше создать файл заранее.)
+      * To protect against race conditions between different processes, file locks are used.
+      * (But when the first file is created, the race condition is possible, so it's better to create the file in advance.)
      *
-      * locked_callback вызывается при заблокированном файле со счетчиком. В него передается новое значение.
-      * locked_callback можно использовать, чтобы делать что-нибудь атомарно с увеличением счетчика (например, переименовывать файлы).
+      * `locked_callback` is called when the counter file is locked. A new value is passed to it.
+      * `locked_callback` can be used to do something atomically with the increment of the counter (for example, rename files).
      */
    template <typename Callback>
    UInt64 get(Callback && locked_callback, bool create_if_need = false)
@ -33,25 +33,25 @@ public:
        return getBunch(1, create_if_need);
    }

-    /// Посмотреть следующее значение.
+    /// Peek the next value.
    UInt64 peek(bool create_if_need = false)
    {
        return getBunch(0, create_if_need);
    }

-    /** Получить следующее число и увеличить счетчик на count.
-     * Если параметр create_if_need не установлен в true, то
-     *  в файле уже должно быть записано какое-нибудь число (если нет - создайте файл вручную с нулём).
-     *
-     * Для защиты от race condition-ов между разными процессами, используются файловые блокировки.
-     * (Но при первом создании файла race condition возможен, так что лучше создать файл заранее.)
-     */
+    /** Get the next number and increase the counter by `count`.
+      * If the `create_if_need` parameter is not set to true, then
+      *  the file should already have a number written (if not - create the file manually with zero).
+      *
+      * To protect against race conditions between different processes, file locks are used.
+      * (But when the first file is created, the race condition is possible, so it's better to create the file in advance.)
+      */
    UInt64 getBunch(UInt64 count, bool create_if_need = false)
    {
        return static_cast<UInt64>(counter.add(static_cast<Int64>(count), create_if_need) - count + 1);
    }

-    /// Изменить путь к файлу.
+    /// Change the path to the file.
    void setPath(std::string path_)
    {
        counter.setPath(path_);
@ -65,23 +65,3 @@ public:
 private:
    CounterInFile counter;
 };
-
-
-/** То же самое, но без хранения в файле.
-  */
-struct SimpleIncrement : private boost::noncopyable
-{
-    std::atomic<UInt64> value;
-
-    SimpleIncrement(UInt64 start = 0) : value(start) {}
-
-    void set(UInt64 new_value)
-    {
-        value = new_value;
-    }
-
-    UInt64 get()
-    {
-        return ++value;
-    }
-};
--- a/dbms/src/Common/Macros.h
+++ b/dbms/src/Common/Macros.h
@ -4,10 +4,11 @@
 #include <Poco/Util/AbstractConfiguration.h>
 #include <map>

+
 namespace DB
 {

-/** Раскрывает в строке макросы из конфига.
+/** Apply substitutions from the macros in config to the string.
  */
 class Macros
 {
@ -15,8 +16,8 @@ public:
    Macros();
    Macros(const Poco::Util::AbstractConfiguration & config, const String & key);

-    /** Заменить в строке подстроки вида {macro_name} на значение для macro_name, полученное из конфига.
-      * level - уровень рекурсии.
+    /** Replace the substring of the form {macro_name} with the value for macro_name, obtained from the config file.
+      * level - the level of recursion.
      */
    String expand(const String & s, size_t level = 0) const;

--- a/dbms/src/Common/MemoryTracker.h
+++ b/dbms/src/Common/MemoryTracker.h
@ -102,10 +102,10 @@ public:
 };


-/** Объект MemoryTracker довольно трудно протащить во все места, где выделяются существенные объёмы памяти.
-  * Поэтому, используется thread-local указатель на используемый MemoryTracker или nullptr, если его не нужно использовать.
-  * Этот указатель выставляется, когда в данном потоке следует отслеживать потребление памяти.
-  * Таким образом, его нужно всего-лишь протащить во все потоки, в которых обрабатывается один запрос.
+/** The MemoryTracker object is quite difficult to pass to all places where significant amounts of memory are allocated.
+  * Therefore, a thread-local pointer to used MemoryTracker is set, or nullptr if MemoryTracker does not need to be used.
+  * This pointer is set when memory consumption is monitored in current thread.
+  * So, you just need to pass it to all the threads that handle one request.
  */
 extern __thread MemoryTracker * current_memory_tracker;

--- a/dbms/src/Common/OptimizedRegularExpression.h
+++ b/dbms/src/Common/OptimizedRegularExpression.h
@ -12,20 +12,22 @@
 #endif


-/** Использует два способа оптимизации регулярного выражения:
-  * 1. Если регулярное выражение является тривиальным (сводится к поиску подстроки в строке),
-  *     то заменяет поиск на strstr или strcasestr.
-  * 2. Если регулярное выражение содержит безальтернативную подстроку достаточной длины,
-  *     то перед проверкой используется strstr или strcasestr достаточной длины;
-  *     регулярное выражение проверяется полностью только если подстрока найдена.
-  * 3. В остальных случаях, используется движок re2.
+/** Uses two ways to optimize a regular expression:
+  * 1. If the regular expression is trivial (reduces to finding a substring in a string),
+  *     then replaces the search with strstr or strcasestr.
+  * 2. If the regular expression contains a non-alternative substring of sufficient length,
+  *     then before testing, strstr or strcasestr of sufficient length is used;
+  *     regular expression is only fully checked if a substring is found.
+  * 3. In other cases, the re2 engine is used.
  *
-  * Это имеет смысл, так как strstr и strcasestr в libc под Linux хорошо оптимизированы.
+  * This makes sense, since strstr and strcasestr in libc for Linux are well optimized.
  *
-  * Подходит, если одновременно выполнены следующие условия:
-  * - если в большинстве вызовов, регулярное выражение не матчится;
-  * - если регулярное выражение совместимо с движком re2;
-  * - можете использовать на свой риск, так как, возможно, не все случаи учтены.
+  * Suitable if the following conditions are simultaneously met:
+  * - if in most calls, the regular expression does not match;
+  * - if the regular expression is compatible with the re2 engine;
+  * - you can use at your own risk, since, probably, not all cases are taken into account.
+  *
+  * NOTE: Multi-character metasymbols such as \Pl are handled incorrectly.
  */

 namespace OptimizedRegularExpressionDetails
@ -82,7 +84,7 @@ public:

    unsigned getNumberOfSubpatterns() const { return number_of_subpatterns; }

-    /// Получить регексп re2 или nullptr, если шаблон тривиален (для вывода в лог).
+    /// Get the regexp re2 or nullptr if the pattern is trivial (for output to the log).
    const std::unique_ptr<RegexType>& getRE2() const { return re2; }

    static void analyze(const std::string & regexp_, std::string & required_substring, bool & is_trivial, bool & required_substring_is_prefix);
@ -105,4 +107,4 @@ private:

 using OptimizedRegularExpression = OptimizedRegularExpressionImpl<true>;

-#include "OptimizedRegularExpression.inl"
+#include "OptimizedRegularExpression.inl.h"
--- a/dbms/src/Common/OptimizedRegularExpression.inl
+++ b/dbms/src/Common/OptimizedRegularExpression.inl
@ -1,431 +0,0 @@
-#include <iostream>
-
-#include <Poco/Exception.h>
-
-#include <Common/OptimizedRegularExpression.h>
-
-
-#define MIN_LENGTH_FOR_STRSTR 3
-#define MAX_SUBPATTERNS 5
-
-template <bool b>
-void OptimizedRegularExpressionImpl<b>::analyze(
-	const std::string & regexp,
-	std::string & required_substring,
-	bool & is_trivial,
-	bool & required_substring_is_prefix)
-{
-	/** Выражение тривиально, если в нём все метасимволы эскейплены.
-	  * Безальтернативная строка - это
-	  *  строка вне скобок,
-	  *  в которой все метасимволы эскейплены,
-	  *  а также если вне скобок нет '|',
-	  *  а также избегаются подстроки вида http:// или www.
-	  */
-	const char * begin = regexp.data();
-	const char * pos = begin;
-	const char * end = regexp.data() + regexp.size();
-	int depth = 0;
-	is_trivial = true;
-	required_substring_is_prefix = false;
-	required_substring.clear();
-	bool has_alternative_on_depth_0 = false;
-
-	/// Подстрока с позицией.
-	typedef std::pair<std::string, size_t> Substring;
-
-	typedef std::vector<Substring> Substrings;
-	Substrings trivial_substrings(1);
-	Substring * last_substring = &trivial_substrings.back();
-
-	bool in_curly_braces = false;
-	bool in_square_braces = false;
-
-	while (pos != end)
-	{
-		switch (*pos)
-		{
-			case '\0':
-				pos = end;
-				break;
-
-			case '\\':
-			{
-				++pos;
-				if (pos == end)
-					break;
-
-				switch (*pos)
-				{
-					case '|': case '(': case ')': case '^': case '$': case '.': case '[': case '?': case '*': case '+': case '{':
-						if (depth == 0 && !in_curly_braces && !in_square_braces)
-						{
-							if (last_substring->first.empty())
-								last_substring->second = pos - begin;
-							last_substring->first.push_back(*pos);
-						}
-						break;
-					default:
-						/// все остальные escape-последовательности не поддерживаем
-						is_trivial = false;
-						if (!last_substring->first.empty())
-						{
-							trivial_substrings.resize(trivial_substrings.size() + 1);
-							last_substring = &trivial_substrings.back();
-						}
-						break;
-				}
-
-				++pos;
-				break;
-			}
-
-			case '|':
-				if (depth == 0)
-					has_alternative_on_depth_0 = true;
-				is_trivial = false;
-				if (!in_square_braces && !last_substring->first.empty())
-				{
-					trivial_substrings.resize(trivial_substrings.size() + 1);
-					last_substring = &trivial_substrings.back();
-				}
-				++pos;
-				break;
-
-			case '(':
-				if (!in_square_braces)
-				{
-					++depth;
-					is_trivial = false;
-					if (!last_substring->first.empty())
-					{
-						trivial_substrings.resize(trivial_substrings.size() + 1);
-						last_substring = &trivial_substrings.back();
-					}
-				}
-				++pos;
-				break;
-
-			case '[':
-				in_square_braces = true;
-				++depth;
-				is_trivial = false;
-				if (!last_substring->first.empty())
-				{
-					trivial_substrings.resize(trivial_substrings.size() + 1);
-					last_substring = &trivial_substrings.back();
-				}
-				++pos;
-				break;
-
-			case ']':
-				if (!in_square_braces)
-					goto ordinary;
-
-				in_square_braces = false;
-				--depth;
-				is_trivial = false;
-				if (!last_substring->first.empty())
-				{
-					trivial_substrings.resize(trivial_substrings.size() + 1);
-					last_substring = &trivial_substrings.back();
-				}
-				++pos;
-				break;
-
-			case ')':
-				if (!in_square_braces)
-				{
-					--depth;
-					is_trivial = false;
-					if (!last_substring->first.empty())
-					{
-						trivial_substrings.resize(trivial_substrings.size() + 1);
-						last_substring = &trivial_substrings.back();
-					}
-				}
-				++pos;
-				break;
-
-			case '^': case '$': case '.': case '+':
-				is_trivial = false;
-				if (!last_substring->first.empty() && !in_square_braces)
-				{
-					trivial_substrings.resize(trivial_substrings.size() + 1);
-					last_substring = &trivial_substrings.back();
-				}
-				++pos;
-				break;
-
-			/// Квантификаторы, допускающие нулевое количество.
-			case '{':
-				in_curly_braces = true;
-			case '?': case '*':
-				is_trivial = false;
-				if (!last_substring->first.empty() && !in_square_braces)
-				{
-					last_substring->first.resize(last_substring->first.size() - 1);
-					trivial_substrings.resize(trivial_substrings.size() + 1);
-					last_substring = &trivial_substrings.back();
-				}
-				++pos;
-				break;
-
-			case '}':
-				if (!in_curly_braces)
-					goto ordinary;
-
-				in_curly_braces = false;
-				++pos;
-				break;
-
-			ordinary:	/// Обычный, не заэскейпленный символ.
-			default:
-				if (depth == 0 && !in_curly_braces && !in_square_braces)
-				{
-					if (last_substring->first.empty())
-						last_substring->second = pos - begin;
-					last_substring->first.push_back(*pos);
-				}
-				++pos;
-				break;
-		}
-	}
-
-	if (last_substring && last_substring->first.empty())
-		trivial_substrings.pop_back();
-
-	if (!is_trivial)
-	{
-		if (!has_alternative_on_depth_0)
-		{
-			/** Выберем безальтернативную подстроку максимальной длины, среди префиксов,
-			  *  или безальтернативную подстроку максимальной длины.
-			  */
-			size_t max_length = 0;
-			Substrings::const_iterator candidate_it = trivial_substrings.begin();
-			for (Substrings::const_iterator it = trivial_substrings.begin(); it != trivial_substrings.end(); ++it)
-			{
-				if (((it->second == 0 && candidate_it->second != 0)
-						|| ((it->second == 0) == (candidate_it->second == 0) && it->first.size() > max_length))
-					/// Тюнинг для предметной области
-					&& (it->first.size() > strlen("://") || strncmp(it->first.data(), "://", strlen("://")))
-					&& (it->first.size() > strlen("http://") || strncmp(it->first.data(), "http", strlen("http")))
-					&& (it->first.size() > strlen("www.") || strncmp(it->first.data(), "www", strlen("www")))
-					&& (it->first.size() > strlen("Windows ") || strncmp(it->first.data(), "Windows ", strlen("Windows "))))
-				{
-					max_length = it->first.size();
-					candidate_it = it;
-				}
-			}
-
-			if (max_length >= MIN_LENGTH_FOR_STRSTR)
-			{
-				required_substring = candidate_it->first;
-				required_substring_is_prefix = candidate_it->second == 0;
-			}
-		}
-	}
-	else
-	{
-		required_substring = trivial_substrings.front().first;
-		required_substring_is_prefix = trivial_substrings.front().second == 0;
-	}
-
-/*	std::cerr
-		<< "regexp: " << regexp
-		<< ", is_trivial: " << is_trivial
-		<< ", required_substring: " << required_substring
-		<< ", required_substring_is_prefix: " << required_substring_is_prefix
-		<< std::endl;*/
-}
-
-
-template <bool b>
-OptimizedRegularExpressionImpl<b>::OptimizedRegularExpressionImpl(const std::string & regexp_, int options)
-{
-	analyze(regexp_, required_substring, is_trivial, required_substring_is_prefix);
-
-	/// Поддерживаются 3 опции
-	if (options & (~(RE_CASELESS | RE_NO_CAPTURE | RE_DOT_NL)))
-		throw Poco::Exception("OptimizedRegularExpression: Unsupported option.");
-
-	is_case_insensitive	= options & RE_CASELESS;
-	bool is_no_capture	= options & RE_NO_CAPTURE;
-	bool is_dot_nl		= options & RE_DOT_NL;
-
-	number_of_subpatterns = 0;
-	if (!is_trivial)
-	{
-		/// Скомпилируем регулярное выражение re2.
-		typename RegexType::Options options;
-
-		if (is_case_insensitive)
-			options.set_case_sensitive(false);
-
-		if (is_dot_nl)
-			options.set_dot_nl(true);
-
-		re2 = std::make_unique<RegexType>(regexp_, options);
-		if (!re2->ok())
-			throw Poco::Exception("OptimizedRegularExpression: cannot compile re2: " + regexp_ + ", error: " + re2->error());
-
-		if (!is_no_capture)
-		{
-			number_of_subpatterns = re2->NumberOfCapturingGroups();
-			if (number_of_subpatterns > MAX_SUBPATTERNS)
-				throw Poco::Exception("OptimizedRegularExpression: too many subpatterns in regexp: " + regexp_);
-		}
-	}
-}
-
-
-template <bool b>
-bool OptimizedRegularExpressionImpl<b>::match(const char * subject, size_t subject_size) const
-{
-	if (is_trivial)
-	{
-		if (is_case_insensitive)
-			return nullptr != strcasestr(subject, required_substring.data());
-		else
-			return nullptr != strstr(subject, required_substring.data());
-	}
-	else
-	{
-		if (!required_substring.empty())
-		{
-			const char * pos;
-			if (is_case_insensitive)
-				pos = strcasestr(subject, required_substring.data());
-			else
-				pos = strstr(subject, required_substring.data());
-
-			if (nullptr == pos)
-				return 0;
-		}
-
-		return re2->Match(StringPieceType(subject, subject_size), 0, subject_size, RegexType::UNANCHORED, nullptr, 0);
-	}
-}
-
-
-template <bool b>
-bool OptimizedRegularExpressionImpl<b>::match(const char * subject, size_t subject_size, Match & match) const
-{
-	if (is_trivial)
-	{
-		const char * pos;
-		if (is_case_insensitive)
-			pos = strcasestr(subject, required_substring.data());
-		else
-			pos = strstr(subject, required_substring.data());
-
-		if (pos == nullptr)
-			return 0;
-		else
-		{
-			match.offset = pos - subject;
-			match.length = required_substring.size();
-			return 1;
-		}
-	}
-	else
-	{
-		if (!required_substring.empty())
-		{
-			const char * pos;
-			if (is_case_insensitive)
-				pos = strcasestr(subject, required_substring.data());
-			else
-				pos = strstr(subject, required_substring.data());
-
-			if (nullptr == pos)
-				return 0;
-		}
-
-		StringPieceType piece;
-
-		if (!RegexType::PartialMatch(StringPieceType(subject, subject_size), *re2, &piece))
-			return 0;
-		else
-		{
-			match.offset = piece.data() - subject;
-			match.length = piece.length();
-			return 1;
-		}
-	}
-}
-
-
-template <bool b>
-unsigned OptimizedRegularExpressionImpl<b>::match(const char * subject, size_t subject_size, MatchVec & matches, unsigned limit) const
-{
-	matches.clear();
-
-	if (limit == 0)
-		return 0;
-
-	if (limit > number_of_subpatterns + 1)
-		limit = number_of_subpatterns + 1;
-
-	if (is_trivial)
-	{
-		const char * pos;
-		if (is_case_insensitive)
-			pos = strcasestr(subject, required_substring.data());
-		else
-			pos = strstr(subject, required_substring.data());
-
-		if (pos == nullptr)
-			return 0;
-		else
-		{
-			Match match;
-			match.offset = pos - subject;
-			match.length = required_substring.size();
-			matches.push_back(match);
-			return 1;
-		}
-	}
-	else
-	{
-		if (!required_substring.empty())
-		{
-			const char * pos;
-			if (is_case_insensitive)
-				pos = strcasestr(subject, required_substring.data());
-			else
-				pos = strstr(subject, required_substring.data());
-
-			if (nullptr == pos)
-				return 0;
-		}
-
-		StringPieceType pieces[MAX_SUBPATTERNS];
-
-		if (!re2->Match(StringPieceType(subject, subject_size), 0, subject_size, RegexType::UNANCHORED, pieces, limit))
-			return 0;
-		else
-		{
-			matches.resize(limit);
-			for (size_t i = 0; i < limit; ++i)
-			{
-				if (pieces[i] != nullptr)
-				{
-					matches[i].offset = pieces[i].data() - subject;
-					matches[i].length = pieces[i].length();
-				}
-				else
-				{
-					matches[i].offset = std::string::npos;
-					matches[i].length = 0;
-				}
-			}
-			return limit;
-		}
-	}
-}
-
-#undef MIN_LENGTH_FOR_STRSTR
-#undef MAX_SUBPATTERNS
-
--- a/dbms/src/Common/OptimizedRegularExpression.inl.h
+++ b/dbms/src/Common/OptimizedRegularExpression.inl.h
@ -0,0 +1,433 @@
+#include <iostream>
+
+#include <Poco/Exception.h>
+
+#include <Common/OptimizedRegularExpression.h>
+
+
+#define MIN_LENGTH_FOR_STRSTR 3
+#define MAX_SUBPATTERNS 5
+
+
+template <bool thread_safe>
+void OptimizedRegularExpressionImpl<thread_safe>::analyze(
+    const std::string & regexp,
+    std::string & required_substring,
+    bool & is_trivial,
+    bool & required_substring_is_prefix)
+{
+    /** The expression is trivial if all the metacharacters in it are escaped.
+      * The non-alternative string is
+      *  a string outside parentheses,
+      *  in which all metacharacters are escaped,
+      *  and also if there are no '|' outside the brackets,
+      *  and also avoid substrings of the form `http://` or `www` and some other
+      *   (this is the hack for typical use case in Yandex.Metrica).
+      */
+    const char * begin = regexp.data();
+    const char * pos = begin;
+    const char * end = regexp.data() + regexp.size();
+    int depth = 0;
+    is_trivial = true;
+    required_substring_is_prefix = false;
+    required_substring.clear();
+    bool has_alternative_on_depth_0 = false;
+
+    /// Substring with a position.
+    using Substring = std::pair<std::string, size_t>;
+    using Substrings = std::vector<Substring>;
+
+    Substrings trivial_substrings(1);
+    Substring * last_substring = &trivial_substrings.back();
+
+    bool in_curly_braces = false;
+    bool in_square_braces = false;
+
+    while (pos != end)
+    {
+        switch (*pos)
+        {
+            case '\0':
+                pos = end;
+                break;
+
+            case '\\':
+            {
+                ++pos;
+                if (pos == end)
+                    break;
+
+                switch (*pos)
+                {
+                    case '|': case '(': case ')': case '^': case '$': case '.': case '[': case '?': case '*': case '+': case '{':
+                        if (depth == 0 && !in_curly_braces && !in_square_braces)
+                        {
+                            if (last_substring->first.empty())
+                                last_substring->second = pos - begin;
+                            last_substring->first.push_back(*pos);
+                        }
+                        break;
+                    default:
+                        /// all other escape sequences are not supported
+                        is_trivial = false;
+                        if (!last_substring->first.empty())
+                        {
+                            trivial_substrings.resize(trivial_substrings.size() + 1);
+                            last_substring = &trivial_substrings.back();
+                        }
+                        break;
+                }
+
+                ++pos;
+                break;
+            }
+
+            case '|':
+                if (depth == 0)
+                    has_alternative_on_depth_0 = true;
+                is_trivial = false;
+                if (!in_square_braces && !last_substring->first.empty())
+                {
+                    trivial_substrings.resize(trivial_substrings.size() + 1);
+                    last_substring = &trivial_substrings.back();
+                }
+                ++pos;
+                break;
+
+            case '(':
+                if (!in_square_braces)
+                {
+                    ++depth;
+                    is_trivial = false;
+                    if (!last_substring->first.empty())
+                    {
+                        trivial_substrings.resize(trivial_substrings.size() + 1);
+                        last_substring = &trivial_substrings.back();
+                    }
+                }
+                ++pos;
+                break;
+
+            case '[':
+                in_square_braces = true;
+                ++depth;
+                is_trivial = false;
+                if (!last_substring->first.empty())
+                {
+                    trivial_substrings.resize(trivial_substrings.size() + 1);
+                    last_substring = &trivial_substrings.back();
+                }
+                ++pos;
+                break;
+
+            case ']':
+                if (!in_square_braces)
+                    goto ordinary;
+
+                in_square_braces = false;
+                --depth;
+                is_trivial = false;
+                if (!last_substring->first.empty())
+                {
+                    trivial_substrings.resize(trivial_substrings.size() + 1);
+                    last_substring = &trivial_substrings.back();
+                }
+                ++pos;
+                break;
+
+            case ')':
+                if (!in_square_braces)
+                {
+                    --depth;
+                    is_trivial = false;
+                    if (!last_substring->first.empty())
+                    {
+                        trivial_substrings.resize(trivial_substrings.size() + 1);
+                        last_substring = &trivial_substrings.back();
+                    }
+                }
+                ++pos;
+                break;
+
+            case '^': case '$': case '.': case '+':
+                is_trivial = false;
+                if (!last_substring->first.empty() && !in_square_braces)
+                {
+                    trivial_substrings.resize(trivial_substrings.size() + 1);
+                    last_substring = &trivial_substrings.back();
+                }
+                ++pos;
+                break;
+
+            /// Quantifiers that allow a zero number of occurences.
+            case '{':
+                in_curly_braces = true;
+            case '?': case '*':
+                is_trivial = false;
+                if (!last_substring->first.empty() && !in_square_braces)
+                {
+                    last_substring->first.resize(last_substring->first.size() - 1);
+                    trivial_substrings.resize(trivial_substrings.size() + 1);
+                    last_substring = &trivial_substrings.back();
+                }
+                ++pos;
+                break;
+
+            case '}':
+                if (!in_curly_braces)
+                    goto ordinary;
+
+                in_curly_braces = false;
+                ++pos;
+                break;
+
+            ordinary:   /// Normal, not escaped symbol.
+            default:
+                if (depth == 0 && !in_curly_braces && !in_square_braces)
+                {
+                    if (last_substring->first.empty())
+                        last_substring->second = pos - begin;
+                    last_substring->first.push_back(*pos);
+                }
+                ++pos;
+                break;
+        }
+    }
+
+    if (last_substring && last_substring->first.empty())
+        trivial_substrings.pop_back();
+
+    if (!is_trivial)
+    {
+        if (!has_alternative_on_depth_0)
+        {
+            /** We choose the non-alternative substring of the maximum length, among the prefixes,
+              *  or a non-alternative substring of maximum length.
+              */
+            size_t max_length = 0;
+            Substrings::const_iterator candidate_it = trivial_substrings.begin();
+            for (Substrings::const_iterator it = trivial_substrings.begin(); it != trivial_substrings.end(); ++it)
+            {
+                if (((it->second == 0 && candidate_it->second != 0)
+                        || ((it->second == 0) == (candidate_it->second == 0) && it->first.size() > max_length))
+                    /// Tuning for typical usage domain
+                    && (it->first.size() > strlen("://") || strncmp(it->first.data(), "://", strlen("://")))
+                    && (it->first.size() > strlen("http://") || strncmp(it->first.data(), "http", strlen("http")))
+                    && (it->first.size() > strlen("www.") || strncmp(it->first.data(), "www", strlen("www")))
+                    && (it->first.size() > strlen("Windows ") || strncmp(it->first.data(), "Windows ", strlen("Windows "))))
+                {
+                    max_length = it->first.size();
+                    candidate_it = it;
+                }
+            }
+
+            if (max_length >= MIN_LENGTH_FOR_STRSTR)
+            {
+                required_substring = candidate_it->first;
+                required_substring_is_prefix = candidate_it->second == 0;
+            }
+        }
+    }
+    else
+    {
+        required_substring = trivial_substrings.front().first;
+        required_substring_is_prefix = trivial_substrings.front().second == 0;
+    }
+
+/*    std::cerr
+        << "regexp: " << regexp
+        << ", is_trivial: " << is_trivial
+        << ", required_substring: " << required_substring
+        << ", required_substring_is_prefix: " << required_substring_is_prefix
+        << std::endl;*/
+}
+
+
+template <bool thread_safe>
+OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(const std::string & regexp_, int options)
+{
+    analyze(regexp_, required_substring, is_trivial, required_substring_is_prefix);
+
+    /// Just three following options are supported
+    if (options & (~(RE_CASELESS | RE_NO_CAPTURE | RE_DOT_NL)))
+        throw Poco::Exception("OptimizedRegularExpression: Unsupported option.");
+
+    is_case_insensitive   = options & RE_CASELESS;
+    bool is_no_capture    = options & RE_NO_CAPTURE;
+    bool is_dot_nl        = options & RE_DOT_NL;
+
+    number_of_subpatterns = 0;
+    if (!is_trivial)
+    {
+        /// Compile the re2 regular expression.
+        typename RegexType::Options options;
+
+        if (is_case_insensitive)
+            options.set_case_sensitive(false);
+
+        if (is_dot_nl)
+            options.set_dot_nl(true);
+
+        re2 = std::make_unique<RegexType>(regexp_, options);
+        if (!re2->ok())
+            throw Poco::Exception("OptimizedRegularExpression: cannot compile re2: " + regexp_ + ", error: " + re2->error());
+
+        if (!is_no_capture)
+        {
+            number_of_subpatterns = re2->NumberOfCapturingGroups();
+            if (number_of_subpatterns > MAX_SUBPATTERNS)
+                throw Poco::Exception("OptimizedRegularExpression: too many subpatterns in regexp: " + regexp_);
+        }
+    }
+}
+
+
+template <bool thread_safe>
+bool OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, size_t subject_size) const
+{
+    if (is_trivial)
+    {
+        if (is_case_insensitive)
+            return nullptr != strcasestr(subject, required_substring.data());
+        else
+            return nullptr != strstr(subject, required_substring.data());
+    }
+    else
+    {
+        if (!required_substring.empty())
+        {
+            const char * pos;
+            if (is_case_insensitive)
+                pos = strcasestr(subject, required_substring.data());
+            else
+                pos = strstr(subject, required_substring.data());
+
+            if (nullptr == pos)
+                return 0;
+        }
+
+        return re2->Match(StringPieceType(subject, subject_size), 0, subject_size, RegexType::UNANCHORED, nullptr, 0);
+    }
+}
+
+
+template <bool thread_safe>
+bool OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, size_t subject_size, Match & match) const
+{
+    if (is_trivial)
+    {
+        const char * pos;
+        if (is_case_insensitive)
+            pos = strcasestr(subject, required_substring.data());
+        else
+            pos = strstr(subject, required_substring.data());
+
+        if (pos == nullptr)
+            return 0;
+        else
+        {
+            match.offset = pos - subject;
+            match.length = required_substring.size();
+            return 1;
+        }
+    }
+    else
+    {
+        if (!required_substring.empty())
+        {
+            const char * pos;
+            if (is_case_insensitive)
+                pos = strcasestr(subject, required_substring.data());
+            else
+                pos = strstr(subject, required_substring.data());
+
+            if (nullptr == pos)
+                return 0;
+        }
+
+        StringPieceType piece;
+
+        if (!RegexType::PartialMatch(StringPieceType(subject, subject_size), *re2, &piece))
+            return 0;
+        else
+        {
+            match.offset = piece.data() - subject;
+            match.length = piece.length();
+            return 1;
+        }
+    }
+}
+
+
+template <bool thread_safe>
+unsigned OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, size_t subject_size, MatchVec & matches, unsigned limit) const
+{
+    matches.clear();
+
+    if (limit == 0)
+        return 0;
+
+    if (limit > number_of_subpatterns + 1)
+        limit = number_of_subpatterns + 1;
+
+    if (is_trivial)
+    {
+        const char * pos;
+        if (is_case_insensitive)
+            pos = strcasestr(subject, required_substring.data());
+        else
+            pos = strstr(subject, required_substring.data());
+
+        if (pos == nullptr)
+            return 0;
+        else
+        {
+            Match match;
+            match.offset = pos - subject;
+            match.length = required_substring.size();
+            matches.push_back(match);
+            return 1;
+        }
+    }
+    else
+    {
+        if (!required_substring.empty())
+        {
+            const char * pos;
+            if (is_case_insensitive)
+                pos = strcasestr(subject, required_substring.data());
+            else
+                pos = strstr(subject, required_substring.data());
+
+            if (nullptr == pos)
+                return 0;
+        }
+
+        StringPieceType pieces[MAX_SUBPATTERNS];
+
+        if (!re2->Match(StringPieceType(subject, subject_size), 0, subject_size, RegexType::UNANCHORED, pieces, limit))
+            return 0;
+        else
+        {
+            matches.resize(limit);
+            for (size_t i = 0; i < limit; ++i)
+            {
+                if (pieces[i] != nullptr)
+                {
+                    matches[i].offset = pieces[i].data() - subject;
+                    matches[i].length = pieces[i].length();
+                }
+                else
+                {
+                    matches[i].offset = std::string::npos;
+                    matches[i].length = 0;
+                }
+            }
+            return limit;
+        }
+    }
+}
+
+#undef MIN_LENGTH_FOR_STRSTR
+#undef MAX_SUBPATTERNS
+
--- a/dbms/src/Common/PODArray.h
+++ b/dbms/src/Common/PODArray.h
@ -19,46 +19,46 @@
 namespace DB
 {

-/** Динамический массив для POD-типов.
-  * Предназначен для небольшого количества больших массивов (а не большого количества маленьких).
-  * А точнее - для использования в ColumnVector.
-  * Отличается от std::vector тем, что не инициализирует элементы.
+/** A dynamic array for POD types.
+  * Designed for a small number of large arrays (rather than a lot of small ones).
+  * To be more precise - for use in ColumnVector.
+  * It differs from std::vector in that it does not initialize the elements.
  *
-  * Сделан некопируемым, чтобы не было случайных копий. Скопировать данные можно с помощью метода assign.
+  * Made noncopyable so that there are no accidential copies. You can copy the data using `assign` method.
  *
-  * Поддерживается только часть интерфейса std::vector.
+  * Only part of the std::vector interface is supported.
  *
-  * Конструктор по-умолчанию создаёт пустой объект, который не выделяет память.
-  * Затем выделяется память минимум в INITIAL_SIZE байт.
+  * The default constructor creates an empty object that does not allocate memory.
+  * Then the memory is allocated at least INITIAL_SIZE bytes.
  *
-  * Если вставлять элементы push_back-ом, не делая reserve, то PODArray примерно в 2.5 раза быстрее std::vector.
+  * If you insert elements with push_back, without making a `reserve`, then PODArray is about 2.5 times faster than std::vector.
  *
-  * Шаблонный параметр pad_right - всегда выделять в конце массива столько неиспользуемых байт.
-  * Может использоваться для того, чтобы делать оптимистичное чтение, запись, копирование невыровненными SIMD-инструкциями.
+  * The template parameter `pad_right` - always allocate at the end of the array as many unused bytes.
+  * Can be used to make optimistic reading, writing, copying with unaligned SIMD instructions.
  */
 template <typename T, size_t INITIAL_SIZE = 4096, typename TAllocator = Allocator<false>, size_t pad_right_ = 0>
 class PODArray : private boost::noncopyable, private TAllocator    /// empty base optimization
 {
 private:
-    /// Округление padding-а вверх до целого количества элементов, чтобы упростить арифметику.
+    /// Round padding up to an whole number of elements to simplify arithmetic.
    static constexpr size_t pad_right = (pad_right_ + sizeof(T) - 1) / sizeof(T) * sizeof(T);

-    char * c_start             = nullptr;
-    char * c_end             = nullptr;
-    char * c_end_of_storage = nullptr;    /// Не включает в себя pad_right.
+    char * c_start          = nullptr;
+    char * c_end            = nullptr;
+    char * c_end_of_storage = nullptr;    /// Does not include pad_right.

-    T * t_start()                         { return reinterpret_cast<T *>(c_start); }
-    T * t_end()                         { return reinterpret_cast<T *>(c_end); }
-    T * t_end_of_storage()                 { return reinterpret_cast<T *>(c_end_of_storage); }
+    T * t_start()                      { return reinterpret_cast<T *>(c_start); }
+    T * t_end()                        { return reinterpret_cast<T *>(c_end); }
+    T * t_end_of_storage()             { return reinterpret_cast<T *>(c_end_of_storage); }

-    const T * t_start() const             { return reinterpret_cast<const T *>(c_start); }
-    const T * t_end() const             { return reinterpret_cast<const T *>(c_end); }
-    const T * t_end_of_storage() const     { return reinterpret_cast<const T *>(c_end_of_storage); }
+    const T * t_start() const          { return reinterpret_cast<const T *>(c_start); }
+    const T * t_end() const            { return reinterpret_cast<const T *>(c_end); }
+    const T * t_end_of_storage() const { return reinterpret_cast<const T *>(c_end_of_storage); }

-    /// Количество памяти, занимаемое num_elements элементов.
+    /// The amount of memory occupied by the num_elements of the elements.
    static size_t byte_size(size_t num_elements) { return num_elements * sizeof(T); }

-    /// Минимальное количество памяти, которое нужно выделить для num_elements элементов, включая padding.
+    /// Minimum amount of memory to allocate for num_elements, including padding.
    static size_t minimum_memory_for_elements(size_t num_elements) { return byte_size(num_elements) + pad_right; }

    void alloc_for_num_elements(size_t num_elements)
@ -112,7 +112,7 @@ public:

    size_t allocated_size() const { return c_end_of_storage - c_start + pad_right; }

-    /// Просто typedef нельзя, так как возникает неоднозначность для конструкторов и функций assign.
+    /// You can not just use `typedef`, because there is ambiguity for the constructors and `assign` functions.
    struct iterator : public boost::iterator_adaptor<iterator, T*>
    {
        iterator() {}
@ -173,16 +173,16 @@ public:
    const T & operator[] (size_t n) const     { return t_start()[n]; }

    T & front()             { return t_start()[0]; }
-    T & back()                 { return t_end()[-1]; }
+    T & back()              { return t_end()[-1]; }
    const T & front() const { return t_start()[0]; }
    const T & back() const  { return t_end()[-1]; }

-    iterator begin()                 { return t_start(); }
-    iterator end()                     { return t_end(); }
-    const_iterator begin() const    { return t_start(); }
-    const_iterator end() const        { return t_end(); }
-    const_iterator cbegin() const    { return t_start(); }
-    const_iterator cend() const        { return t_end(); }
+    iterator begin()              { return t_start(); }
+    iterator end()                { return t_end(); }
+    const_iterator begin() const  { return t_start(); }
+    const_iterator end() const    { return t_end(); }
+    const_iterator cbegin() const { return t_start(); }
+    const_iterator cend() const   { return t_end(); }

    void reserve(size_t n)
    {
@ -209,7 +209,7 @@ public:
        c_end = c_start + byte_size(n);
    }

-    /// Как resize, но обнуляет новые элементы.
+    /// Same as resize, but zeroes new elements.
    void resize_fill(size_t n)
    {
        size_t old_size = size();
@ -261,7 +261,7 @@ public:
        c_end -= byte_size(1);
    }

-    /// Не вставляйте в массив кусок самого себя. Потому что при ресайзе, итераторы на самого себя могут инвалидироваться.
+    /// Do not insert into the array a piece of itself. Because with the resize, the iterators on themselves can be invalidated.
    template <typename It1, typename It2>
    void insert(It1 from_begin, It2 from_end)
    {
@ -458,7 +458,7 @@ void swap(PODArray<T, INITIAL_SIZE, TAllocator, pad_right_> & lhs, PODArray<T, I
    lhs.swap(rhs);
 }

-/** Для столбцов. Padding-а хватает, чтобы читать и писать xmm-регистр по адресу последнего элемента. */
+/** For columns. Padding is enough to read and write xmm-register at the address of the last element. */
 template <typename T, size_t INITIAL_SIZE = 4096, typename TAllocator = Allocator<false>>
 using PaddedPODArray = PODArray<T, INITIAL_SIZE, TAllocator, 15>;

--- a/dbms/src/Common/PoolBase.h
+++ b/dbms/src/Common/PoolBase.h
@ -8,8 +8,17 @@
 #include <common/logger_useful.h>
 #include <Common/Exception.h>

-/** Класс, от которого можно унаследоваться и получить пул чего-нибудь. Используется для пулов соединений с БД.
-  * Наследник должен предоставить метод для создания нового объекта для помещения в пул.
+
+namespace DB
+{
+    namespace ErrorCodes
+    {
+        extern const int LOGICAL_ERROR;
+    }
+}
+
+/** A class from which you can inherit and get a pool of something. Used for database connection pools.
+  * Descendant class must provide a method for creating a new object to place in the pool.
  */

 template <typename TObject>
@ -22,7 +31,7 @@ public:

 private:

-    /** Объект с флагом, используется ли он сейчас. */
+    /** The object with the flag, whether it is currently used. */
    struct PooledObject
    {
        PooledObject(ObjectPtr object_, PoolBase & pool_)
@ -37,8 +46,8 @@ private:

    using Objects = std::vector<std::shared_ptr<PooledObject>>;

-    /** Помощник, который устанавливает флаг использования объекта, а в деструкторе - снимает,
-      *  а также уведомляет о событии с помощью condvar-а.
+    /** The helper, which sets the flag for using the object, and in the destructor - removes,
+      *  and also notifies the event using condvar.
      */
    struct PoolEntryHelper
    {
@ -54,36 +63,36 @@ private:
    };

 public:
-    /** То, что выдаётся пользователю. */
+    /** What is given to the user. */
    class Entry
    {
    public:
        friend class PoolBase<Object>;

-        Entry() {}    /// Для отложенной инициализации.
+        Entry() {}    /// For deferred initialization.

-        /** Объект Entry защищает ресурс от использования другим потоком.
-         * Следующие методы запрещены для rvalue, чтобы нельзя было написать подобное
-         *
-         * auto q = pool.Get()->query("SELECT .."); // Упс, после этой строчки Entry уничтожился
-         * q.execute();  // Кто-то еще может использовать этот Connection
-         */
+        /** The `Entry` object protects the resource from being used by another thread.
+          * The following methods are forbidden for `rvalue`, so you can not write a similar to
+          *
+          * auto q = pool.Get()->query("SELECT .."); // Oops, after this line Entry was destroyed
+          * q.execute (); // Someone else can use this Connection
+          */
        Object * operator->() && = delete;
        const Object * operator->() const && = delete;
        Object & operator*() && = delete;
        const Object & operator*() const && = delete;

-        Object * operator->() &            { return &*data->data.object; }
-        const Object * operator->() const &    { return &*data->data.object; }
-        Object & operator*() &                { return *data->data.object; }
-        const Object & operator*() const &    { return *data->data.object; }
+        Object * operator->() &             { return &*data->data.object; }
+        const Object * operator->() const & { return &*data->data.object; }
+        Object & operator*() &              { return *data->data.object; }
+        const Object & operator*() const &  { return *data->data.object; }

        bool isNull() const { return data == nullptr; }

        PoolBase * getPool() const
        {
            if (!data)
-                throw DB::Exception("attempt to get pool from uninitialized entry");
+                throw DB::Exception("Attempt to get pool from uninitialized entry", DB::ErrorCodes::LOGICAL_ERROR);
            return &data->data.pool;
        }

@ -95,7 +104,7 @@ public:

    virtual ~PoolBase() {}

-    /** Выделяет объект для работы. При timeout < 0 таймаут бесконечный. */
+    /** Allocates the object. Wait for free object in pool for 'timeout'. With 'timeout' < 0, the timeout is infinite. */
    Entry get(Poco::Timespan::TimeDiff timeout)
    {
        std::unique_lock<std::mutex> lock(mutex);
@ -131,13 +140,13 @@ public:
    }

 private:
-    /** Максимальный размер пула. */
+    /** The maximum size of the pool. */
    unsigned max_items;

-    /** Пул. */
+    /** Pool. */
    Objects items;

-    /** Блокировка для доступа к пулу. */
+    /** Lock to access the pool. */
    std::mutex mutex;
    std::condition_variable available;

@ -151,7 +160,7 @@ protected:
        items.reserve(max_items);
    }

-    /** Создает новый объект для помещения в пул. */
+    /** Creates a new object to put into the pool. */
    virtual ObjectPtr allocObject() = 0;
 };

--- a/dbms/src/Common/RadixSort.h
+++ b/dbms/src/Common/RadixSort.h
@ -13,18 +13,18 @@
 #include <Core/Defines.h>


-/** Поразрядная сортировка, обладает следующей функциональностью:
-  * Может сортировать unsigned, signed числа, а также float-ы.
-  * Может сортировать массив элементов фиксированной длины, которые содержат что-то ещё кроме ключа.
-  * Настраиваемый размер разряда.
+/** Radix sort, has the following functionality:
+  * Can sort unsigned, signed numbers, and floats.
+  * Can sort an array of fixed length elements that contain something else besides the key.
+  * Customizable radix size.
  *
  * LSB, stable.
-  * NOTE Для некоторых приложений имеет смысл добавить MSB-radix-sort,
-  *  а также алгоритмы radix-select, radix-partial-sort, radix-get-permutation на его основе.
+  * NOTE For some applications it makes sense to add MSB-radix-sort,
+  *  as well as radix-select, radix-partial-sort, radix-get-permutation algorithms based on it.
  */


-/** Используется в качестве параметра шаблона. См. ниже.
+/** Used as a template parameter. See below.
  */
 struct RadixSortMallocAllocator
 {
@ -40,16 +40,16 @@ struct RadixSortMallocAllocator
 };


-/** Преобразование, которое переводит битовое представление ключа в такое целое беззнаковое число,
-  *  что отношение порядка над ключами будет соответствовать отношению порядка над полученными беззнаковыми числами.
-  * Для float-ов это преобразование делает следующее:
-  *  если выставлен знаковый бит, то переворачивает все остальные биты.
-  * При этом, NaN-ы оказываются больше всех нормальных чисел.
+/** A transformation that transforms the bit representation of a key into an unsigned integer number,
+  *  that the order relation over the keys will match the order relation over the obtained unsigned numbers.
+  * For floats this conversion does the following:
+  *  if the signed bit is set, it flips all other bits.
+  * In this case, NaN-s are bigger than all normal numbers.
  */
 template <typename KeyBits>
 struct RadixSortFloatTransform
 {
-    /// Стоит ли записывать результат в память, или лучше делать его каждый раз заново?
+    /// Is it worth writing the result in memory, or is it better to do calculation every time again?
    static constexpr bool transform_is_simple = false;

    static KeyBits forward(KeyBits x)
@ -67,24 +67,24 @@ struct RadixSortFloatTransform
 template <typename Float>
 struct RadixSortFloatTraits
 {
-    using Element = Float;        /// Тип элемента. Это может быть структура с ключём и ещё каким-то payload-ом. Либо просто ключ.
-    using Key = Float;            /// Ключ, по которому нужно сортировать.
-    using CountType = uint32_t;    /// Тип для подсчёта гистограмм. В случае заведомо маленького количества элементов, может быть меньше чем size_t.
+    using Element = Float;        /// The type of the element. It can be a structure with a key and some other payload. Or just a key.
+    using Key = Float;            /// The key to sort.
+    using CountType = uint32_t;   /// Type for calculating histograms. In the case of a known small number of elements, it can be less than size_t.

-    /// Тип, в который переводится ключ, чтобы делать битовые операции. Это UInt такого же размера, как ключ.
+    /// The type to which the key is transformed to do bit operations. This UInt is the same size as the key.
    using KeyBits = typename std::conditional<sizeof(Float) == 8, uint64_t, uint32_t>::type;

-    static constexpr size_t PART_SIZE_BITS = 8;    /// Какими кусочками ключа в количестве бит делать один проход - перестановку массива.
+    static constexpr size_t PART_SIZE_BITS = 8;    /// With what pieces of the key, in bits, to do one pass - reshuffle of the array.

-    /// Преобразования ключа в KeyBits такое, что отношение порядка над ключём соответствует отношению порядка над KeyBits.
+    /// Converting a key into KeyBits is such that the order relation over the key corresponds to the order relation over KeyBits.
    using Transform = RadixSortFloatTransform<KeyBits>;

-    /// Объект с функциями allocate и deallocate.
-    /// Может быть использован, например, чтобы выделить память для временного массива на стеке.
-    /// Для этого сам аллокатор создаётся на стеке.
+    /// An object with the functions allocate and deallocate.
+    /// Can be used, for example, to allocate memory for a temporary array on the stack.
+    /// To do this, the allocator itself is created on the stack.
    using Allocator = RadixSortMallocAllocator;

-    /// Функция получения ключа из элемента массива.
+    /// The function to get the key from an array element.
    static Key & extractKey(Element & elem) { return elem; }
 };

@ -95,7 +95,7 @@ struct RadixSortIdentityTransform
    static constexpr bool transform_is_simple = true;

    static KeyBits forward(KeyBits x)     { return x; }
-    static KeyBits backward(KeyBits x)     { return x; }
+    static KeyBits backward(KeyBits x)    { return x; }
 };


@ -105,7 +105,7 @@ struct RadixSortSignedTransform
    static constexpr bool transform_is_simple = true;

    static KeyBits forward(KeyBits x)     { return x ^ (KeyBits(1) << (sizeof(KeyBits) * 8 - 1)); }
-    static KeyBits backward(KeyBits x)     { return x ^ (KeyBits(1) << (sizeof(KeyBits) * 8 - 1)); }
+    static KeyBits backward(KeyBits x)    { return x ^ (KeyBits(1) << (sizeof(KeyBits) * 8 - 1)); }
 };


@ -122,7 +122,7 @@ struct RadixSortUIntTraits
    using Transform = RadixSortIdentityTransform<KeyBits>;
    using Allocator = RadixSortMallocAllocator;

-    /// Функция получения ключа из элемента массива.
+    /// The function to get the key from an array element.
    static Key & extractKey(Element & elem) { return elem; }
 };

@ -139,7 +139,7 @@ struct RadixSortIntTraits
    using Transform = RadixSortSignedTransform<KeyBits>;
    using Allocator = RadixSortMallocAllocator;

-    /// Функция получения ключа из элемента массива.
+    /// The function to get the key from an array element.
    static Key & extractKey(Element & elem) { return elem; }
 };

@ -150,7 +150,7 @@ struct RadixSort
 private:
    using Element     = typename Traits::Element;
    using Key         = typename Traits::Key;
-    using CountType = typename Traits::CountType;
+    using CountType   = typename Traits::CountType;
    using KeyBits     = typename Traits::KeyBits;

    static constexpr size_t HISTOGRAM_SIZE = 1 << Traits::PART_SIZE_BITS;
@ -172,19 +172,19 @@ private:
 public:
    static void execute(Element * arr, size_t size)
    {
-        /// Если массив имеет размер меньше 256, то лучше использовать другой алгоритм.
+        /// If the array is smaller than 256, then it is better to use another algorithm.

-        /// Здесь есть циклы по NUM_PASSES. Очень важно, что они разворачиваются в compile-time.
+        /// There are loops of NUM_PASSES. It is very important that they are unfolded at compile-time.

-        /// Для каждого из NUM_PASSES кусков бит ключа, считаем, сколько раз каждое значение этого куска встретилось.
+        /// For each of the NUM_PASSES bit ranges of the key, consider how many times each value of this bit range met.
        CountType histograms[HISTOGRAM_SIZE * NUM_PASSES] = {0};

        typename Traits::Allocator allocator;

-        /// Будем делать несколько проходов по массиву. На каждом проходе, данные перекладываются в другой массив. Выделим этот временный массив.
+        /// We will do several passes through the array. On each pass, the data is transferred to another array. Let's allocate this temporary array.
        Element * swap_buffer = reinterpret_cast<Element *>(allocator.allocate(size * sizeof(Element)));

-        /// Трансформируем массив и вычисляем гистограмму.
+        /// Transform the array and calculate the histogram.
        for (size_t i = 0; i < size; ++i)
        {
            if (!Traits::Transform::transform_is_simple)
@ -195,7 +195,7 @@ public:
        }

        {
-            /// Заменяем гистограммы на суммы с накоплением: значение в позиции i равно сумме в предыдущих позициях минус один.
+            /// Replace the histograms with the accumulated sums: the value in position i is the sum of the previous positions minus one.
            size_t sums[NUM_PASSES] = {0};

            for (size_t i = 0; i < HISTOGRAM_SIZE; ++i)
@ -209,7 +209,7 @@ public:
            }
        }

-        /// Перекладываем элементы в порядке начиная от младшего куска бит, и далее делаем несколько проходов по количеству кусков.
+        /// Move the elements in the order starting from the least bit piece, and then do a few passes on the number of pieces.
        for (size_t j = 0; j < NUM_PASSES; ++j)
        {
            Element * writer = j % 2 ? arr : swap_buffer;
@ -219,17 +219,18 @@ public:
            {
                size_t pos = getPart(j, keyToBits(Traits::extractKey(reader[i])));

-                /// Размещаем элемент на следующей свободной позиции.
+                /// Place the element on the next free position.
                auto & dest = writer[++histograms[j * HISTOGRAM_SIZE + pos]];
                dest = reader[i];

-                /// На последнем перекладывании, делаем обратную трансформацию.
+                /// On the last pass, we do the reverse transformation.
                if (!Traits::Transform::transform_is_simple && j == NUM_PASSES - 1)
                    Traits::extractKey(dest) = bitsToKey(Traits::Transform::backward(keyToBits(Traits::extractKey(reader[i]))));
            }
        }

-        /// Если число проходов нечётное, то результирующий массив находится во временном буфере. Скопируем его на место исходного массива.
+        /// If the number of passes is odd, the result array is in a temporary buffer. Copy it to the place of the original array.
+        /// NOTE Sometimes it will be more optimal to provide non-destructive interface, that will not modify original array.
        if (NUM_PASSES % 2)
            memcpy(arr, swap_buffer, size * sizeof(Element));

--- a/dbms/src/Common/ShellCommand.h
+++ b/dbms/src/Common/ShellCommand.h
@ -9,19 +9,19 @@ namespace DB
 {


-/** Позволяет запустить команду,
-  *  читать её stdout, stderr, писать в stdin,
-  *  дождаться завершения.
+/** Lets you run the command,
+  *  read it stdout and stderr; write to stdin;
+  *  wait for completion.
  *
-  * Реализация похожа на функцию popen из POSIX (посмотреть можно в исходниках libc).
+  * The implementation is similar to the popen function from POSIX (see libc source code).
  *
-  * Наиболее важное отличие: использует vfork вместо fork.
-  * Это сделано, потому что fork не работает (с ошибкой о нехватке памяти),
-  *  при некоторых настройках overcommit-а, если размер адресного пространства процесса больше половины количества доступной памяти.
-  * Также, изменение memory map-ов - довольно ресурсоёмкая операция.
+  * The most important difference: uses vfork instead of fork.
+  * This is done because fork does not work (with a memory shortage error),
+  *  with some overcommit settings, if the address space of the process is more than half the amount of available memory.
+  * Also, changing memory maps - a fairly resource-intensive operation.
  *
-  * Второе отличие - позволяет работать одновременно и с stdin, и с stdout, и с stderr запущенного процесса,
-  *  а также узнать код и статус завершения.
+  * The second difference - allows to work simultaneously with stdin, and with stdout, and with stderr of running process,
+  *  and also to obtain the return code and completion status.
  */
 class ShellCommand
 {
@ -34,20 +34,20 @@ private:
    static std::unique_ptr<ShellCommand> executeImpl(const char * filename, char * const argv[], bool pipe_stdin_only);

 public:
-    WriteBufferFromFile in;        /// Если команда читает из stdin, то не забудьте вызвать in.close() после записи туда всех данных.
+    WriteBufferFromFile in;        /// If the command reads from stdin, do not forget to call in.close() after writing all the data there.
    ReadBufferFromFile out;
    ReadBufferFromFile err;

-    /// Выполнить команду с использованием /bin/sh -c
+    /// Run the command using /bin/sh -c
    static std::unique_ptr<ShellCommand> execute(const std::string & command, bool pipe_stdin_only = false);

-    /// Выполнить исполняемый файл с указаннами аргументами. arguments - без argv[0].
+    /// Run the executable with the specified arguments. `arguments` - without argv[0].
    static std::unique_ptr<ShellCommand> executeDirect(const std::string & path, const std::vector<std::string> & arguments);

-    /// Подождать завершения процесса, кинуть исключение, если код не 0 или если процесс был завершён не самостоятельно.
+    /// Wait for the process to end, throw an exception if the code is not 0 or if the process was not completed by itself.
    void wait();

-    /// Подождать завершения процесса, узнать код возврата. Кинуть исключение, если процесс был завершён не самостоятельно.
+    /// Wait for the process to finish, see the return code. To throw an exception if the process was not completed independently.
    int tryWait();
 };

--- a/dbms/src/Common/SimpleCache.h
+++ b/dbms/src/Common/SimpleCache.h
@ -6,13 +6,13 @@
 #include <ext/function_traits.hpp>


-/** Простейший кэш для свободной функции.
-  * Можете также передать статический метод класса или лямбду без захвата.
-  * Размер неограничен. Значения не устаревают.
-  * Для синхронизации используется mutex.
-  * Подходит только для простейших случаев.
+/** The simplest cache for a free function.
+  * You can also pass a static class method or lambda without captures.
+  * The size is unlimited. Values are stored permanently and never evicted.
+  * Mutex is used for synchronization.
+  * Suitable only for the simplest cases.
  *
-  * Использование:
+  * Usage
  *
  * SimpleCache<decltype(func), &func> func_cached;
  * std::cerr << func_cached(args...);
@ -41,7 +41,7 @@ public:
                return it->second;
        }

-        /// Сами вычисления делаются не под mutex-ом.
+        /// The calculations themselves are not done under mutex.
        Result res = f(std::forward<Args>(args)...);

        {
--- a/dbms/src/Common/SimpleIncrement.h
+++ b/dbms/src/Common/SimpleIncrement.h
@ -0,0 +1,24 @@
+#pragma once
+
+#include <common/Types.h>
+#include <atomic>
+
+
+/** Is used for numbering of files.
+  */
+struct SimpleIncrement
+{
+    std::atomic<UInt64> value;
+
+    SimpleIncrement(UInt64 start = 0) : value(start) {}
+
+    void set(UInt64 new_value)
+    {
+        value = new_value;
+    }
+
+    UInt64 get()
+    {
+        return ++value;
+    }
+};
--- a/dbms/src/Common/SipHash.h
+++ b/dbms/src/Common/SipHash.h
@ -1,57 +1,54 @@
 #pragma once

-/** SipHash - быстрая криптографическая хэш функция для коротких строк.
-  * Взято отсюда: https://www.131002.net/siphash/
+/** SipHash is a fast cryptographic hash function for short strings.
+  * Taken from here: https://www.131002.net/siphash/
  *
-  * Сделано два изменения:
-  * - возвращает 128 бит, а не 64;
-  * - сделано потоковой (можно вычислять по частям).
+  * This is SipHash 2-4 variant.
  *
-  * На коротких строках (URL, поисковые фразы) более чем в 3 раза быстрее MD5 от OpenSSL.
-  * (~ 700 МБ/сек., 15 млн. строк в секунду)
+  * Two changes are made:
+  * - returns also 128 bits, not only 64;
+  * - done streaming (can be calculated in parts).
+  *
+  * On short strings (URL, search phrases) more than 3 times faster than MD5 from OpenSSL.
+  * (~ 700 MB/sec, 15 million strings per second)
  */

-#include <cstdint>
-#include <cstddef>
-#include <Core/Types.h>
+#include <common/Types.h>

-#define ROTL(x,b) static_cast<u64>( ((x) << (b)) | ( (x) >> (64 - (b))) )
+#define ROTL(x, b) static_cast<UInt64>(((x) << (b)) | ((x) >> (64 - (b))))

-#define SIPROUND                                            \
-    do                                                         \
-    {                                                        \
-        v0 += v1; v1=ROTL(v1,13); v1 ^= v0; v0=ROTL(v0,32); \
-        v2 += v3; v3=ROTL(v3,16); v3 ^= v2;                    \
-        v0 += v3; v3=ROTL(v3,21); v3 ^= v0;                    \
-        v2 += v1; v1=ROTL(v1,17); v1 ^= v2; v2=ROTL(v2,32); \
+#define SIPROUND                                                  \
+    do                                                            \
+    {                                                             \
+        v0 += v1; v1 = ROTL(v1, 13); v1 ^= v0; v0 = ROTL(v0, 32); \
+        v2 += v3; v3 = ROTL(v3, 16); v3 ^= v2;                    \
+        v0 += v3; v3 = ROTL(v3, 21); v3 ^= v0;                    \
+        v2 += v1; v1 = ROTL(v1, 17); v1 ^= v2; v2 = ROTL(v2, 32); \
    } while(0)


 class SipHash
 {
 private:
-    using u64 = DB::UInt64;
-    using u8 = DB::UInt8;
+    /// State.
+    UInt64 v0;
+    UInt64 v1;
+    UInt64 v2;
+    UInt64 v3;

-    /// Состояние.
-    u64 v0;
-    u64 v1;
-    u64 v2;
-    u64 v3;
+    /// How many bytes have been processed.
+    UInt64 cnt;

-    /// Сколько байт обработано.
-    u64 cnt;
-
-    /// Текущие 8 байт входных данных.
+    /// The current 8 bytes of input data.
    union
    {
-        u64 current_word;
-        u8 current_bytes[8];
+        UInt64 current_word;
+        UInt8 current_bytes[8];
    };

    void finalize()
    {
-        /// В последний свободный байт пишем остаток от деления длины на 256.
+        /// In the last free byte, we write the remainder of the division by 256.
        current_bytes[7] = cnt;

        v3 ^= current_word;
@ -67,10 +64,10 @@ private:
    }

 public:
-    /// Аргументы - seed.
-    SipHash(u64 k0 = 0, u64 k1 = 0)
+    /// Arguments - seed.
+    SipHash(UInt64 k0 = 0, UInt64 k1 = 0)
    {
-        /// Инициализируем состояние некоторыми случайными байтами и seed-ом.
+        /// Initialize the state with some random bytes and seed.
        v0 = 0x736f6d6570736575ULL ^ k0;
        v1 = 0x646f72616e646f6dULL ^ k1;
        v2 = 0x6c7967656e657261ULL ^ k0;
@ -80,11 +77,11 @@ public:
        current_word = 0;
    }

-    void update(const char * data, u64 size)
+    void update(const char * data, UInt64 size)
    {
        const char * end = data + size;

-        /// Дообработаем остаток от предыдущего апдейта, если есть.
+        /// We'll finish to process the remainder of the previous update, if any.
        if (cnt & 7)
        {
            while (cnt & 7 && data < end)
@ -94,7 +91,7 @@ public:
                ++cnt;
            }

-            /// Если всё ещё не хватает байт до восьмибайтового слова.
+            /// If we still do not have enough bytes to an 8-byte word.
            if (cnt & 7)
                return;

@ -108,7 +105,7 @@ public:

        while (data + 8 <= end)
        {
-            current_word = *reinterpret_cast<const u64 *>(data);
+            current_word = *reinterpret_cast<const UInt64 *>(data);

            v3 ^= current_word;
            SIPROUND;
@ -118,7 +115,7 @@ public:
            data += 8;
        }

-        /// Заполняем остаток, которого не хватает до восьмибайтового слова.
+        /// Pad the remainder, which is missing up to an 8-byte word.
        current_word = 0;
        switch (end - data)
        {
@ -133,23 +130,23 @@ public:
        }
    }

-    /// Получить результат в некотором виде. Это можно сделать только один раз!
+    /// Get the result in some form. This can only be done once!

    void get128(char * out)
    {
        finalize();
-        reinterpret_cast<u64 *>(out)[0] = v0 ^ v1;
-        reinterpret_cast<u64 *>(out)[1] = v2 ^ v3;
+        reinterpret_cast<UInt64 *>(out)[0] = v0 ^ v1;
+        reinterpret_cast<UInt64 *>(out)[1] = v2 ^ v3;
    }

-    void get128(u64 & lo, u64 & hi)
+    void get128(UInt64 & lo, UInt64 & hi)
    {
        finalize();
        lo = v0 ^ v1;
        hi = v2 ^ v3;
    }

-    u64 get64()
+    UInt64 get64()
    {
        finalize();
        return v0 ^ v1 ^ v2 ^ v3;
@ -160,6 +157,7 @@ public:
 #undef ROTL
 #undef SIPROUND

+#include <cstddef>

 inline void sipHash128(const char * data, const size_t size, char * out)
 {
@ -168,7 +166,7 @@ inline void sipHash128(const char * data, const size_t size, char * out)
    hash.get128(out);
 }

-inline DB::UInt64 sipHash64(const char * data, const size_t size)
+inline UInt64 sipHash64(const char * data, const size_t size)
 {
    SipHash hash;
    hash.update(data, size);
@ -177,7 +175,7 @@ inline DB::UInt64 sipHash64(const char * data, const size_t size)

 #include <string>

-inline DB::UInt64 sipHash64(const std::string & s)
+inline UInt64 sipHash64(const std::string & s)
 {
    return sipHash64(s.data(), s.size());
 }
--- a/dbms/src/Common/SmallObjectPool.h
+++ b/dbms/src/Common/SmallObjectPool.h
@ -73,7 +73,7 @@ public:
        free_list = block;
    }

-    /// Размер выделенного пула в байтах
+    /// The size of the allocated pool in bytes
    size_t size() const
    {
        return pool.size();
--- a/dbms/src/Common/SpaceSaving.h
+++ b/dbms/src/Common/SpaceSaving.h
@ -0,0 +1,288 @@
+#pragma once
+
+#include <iostream>
+#include <vector>
+
+#include <boost/range/adaptor/reversed.hpp>
+
+#include <Common/UInt128.h>
+#include <Common/HashTable/Hash.h>
+#include <Common/HashTable/HashMap.h>
+
+#include <IO/WriteBuffer.h>
+#include <IO/WriteHelpers.h>
+#include <IO/ReadBuffer.h>
+#include <IO/ReadHelpers.h>
+#include <IO/VarInt.h>
+
+/*
+ * Implementation of the Filtered Space-Saving for TopK streaming analysis.
+ *   http://www.l2f.inesc-id.pt/~fmmb/wiki/uploads/Work/misnis.ref0a.pdf
+ * It implements suggested reduce-and-combine algorithm from Parallel Space Saving:
+ *   https://arxiv.org/pdf/1401.0702.pdf
+ */
+
+namespace DB
+{
+
+template
+<
+    typename TKey,
+    typename HashKey = TKey,
+    typename Hash = DefaultHash<HashKey>,
+    typename Grower = HashTableGrower<>,
+    typename Allocator = HashTableAllocator
+>
+class SpaceSaving
+{
+private:
+    // Suggested constants in the paper "Finding top-k elements in data streams", chap 6. equation (24)
+    // Round to nearest power of 2 for cheaper binning without modulo
+    constexpr uint64_t nextAlphaSize (uint64_t x)
+    {
+        constexpr uint64_t ALPHA_MAP_ELEMENTS_PER_COUNTER = 6;
+        return 1ULL<<(sizeof(uint64_t) * 8 - __builtin_clzll(x * ALPHA_MAP_ELEMENTS_PER_COUNTER));
+    }
+
+public:
+    using Self = SpaceSaving<TKey, HashKey, Hash, Grower, Allocator>;
+
+    struct Counter
+    {
+        Counter() {}
+
+        Counter(const TKey & k, UInt64 c = 0, UInt64 e = 0, size_t h = 0)
+          : key(k), slot(0), hash(h), count(c), error(e) {}
+
+        void write(WriteBuffer & wb) const
+        {
+            writeBinary(key, wb);
+            writeVarUInt(count, wb);
+            writeVarUInt(error, wb);
+        }
+
+        void read(ReadBuffer & rb)
+        {
+            readBinary(key, rb);
+            readVarUInt(count, rb);
+            readVarUInt(error, rb);
+        }
+
+        // greater() taking slot error into account
+        bool operator> (const Counter & b) const
+        {
+            return (count > b.count) || (count == b.count && error < b.error);
+        }
+
+        TKey key;
+        size_t slot, hash;
+        UInt64 count;
+        UInt64 error;
+    };
+
+    SpaceSaving(size_t c = 10) : alpha_map(nextAlphaSize(c)), m_capacity(c) {}
+    ~SpaceSaving() { destroyElements(); }
+
+    inline size_t size() const
+    {
+        return counter_list.size();
+    }
+
+    inline size_t capacity() const
+    {
+        return m_capacity;
+    }
+
+    void resize(size_t new_capacity)
+    {
+        counter_list.reserve(new_capacity);
+        alpha_map.resize(nextAlphaSize(new_capacity));
+        m_capacity = new_capacity;
+    }
+
+    void insert(const TKey & key, UInt64 increment = 1, UInt64 error = 0)
+    {
+        // Increase weight of a key that already exists
+        // It uses hashtable for both value mapping as a presence test (c_i != 0)
+        auto hash = counter_map.hash(key);
+        auto it = counter_map.find(key, hash);
+        if (it != counter_map.end())
+        {
+            auto c = it->second;
+            c->count += increment;
+            c->error += error;
+            percolate(c);
+            return;
+        }
+        // Key doesn't exist, but can fit in the top K
+        else if (unlikely(size() < capacity()))
+        {
+            auto c = new Counter(key, increment, error, hash);
+            push(c);
+            return;
+        }
+
+        auto min = counter_list.back();
+        const size_t alpha_mask = alpha_map.size() - 1;
+        auto & alpha = alpha_map[hash & alpha_mask];
+        if (alpha + increment < min->count)
+        {
+            alpha += increment;
+            return;
+        }
+
+        // Erase the current minimum element
+        alpha_map[min->hash & alpha_mask] = min->count;
+        it = counter_map.find(min->key, min->hash);
+
+        // Replace minimum with newly inserted element
+        if (it != counter_map.end())
+        {
+            min->hash = hash;
+            min->key = key;
+            min->count = alpha + increment;
+            min->error = alpha + error;
+            percolate(min);
+
+            it->second = min;
+            it->first = key;
+            counter_map.reinsert(it, hash);
+        }
+    }
+
+    /*
+     * Parallel Space Saving reduction and combine step from:
+     *  https://arxiv.org/pdf/1401.0702.pdf
+     */
+    void merge(const Self & rhs)
+    {
+        UInt64 m1 = 0;
+        UInt64 m2 = 0;
+
+        if (size() == capacity())
+        {
+            m1 = counter_list.back()->count;
+        }
+
+        if (rhs.size() == rhs.capacity())
+        {
+            m2 = rhs.counter_list.back()->count;
+        }
+
+        /*
+         * Updated algorithm to mutate current table in place
+         * without mutating rhs table or creating new one
+         * in the first step we expect that no elements overlap
+         * and in the second sweep we correct the error if they do.
+         */
+        if (m2 > 0)
+        {
+            for (auto counter : counter_list)
+            {
+                counter->count += m2;
+                counter->error += m2;
+            }
+        }
+
+        // The list is sorted in descending order, we have to scan in reverse
+        for (auto counter : boost::adaptors::reverse(rhs.counter_list))
+        {
+            if (counter_map.find(counter->key) != counter_map.end())
+            {
+                // Subtract m2 previously added, guaranteed not negative
+                insert(counter->key, counter->count - m2, counter->error - m2);
+            }
+            else
+            {
+                // Counters not monitored in S1
+                insert(counter->key, counter->count + m1, counter->error + m1);
+            }
+        }
+    }
+
+    std::vector<Counter> topK(size_t k) const
+    {
+        std::vector<Counter> res;
+        for (auto counter : counter_list)
+        {
+            res.push_back(*counter);
+            if (res.size() == k)
+                break;
+        }
+        return res;
+    }
+
+    void write(WriteBuffer & wb) const
+    {
+        writeVarUInt(size(), wb);
+        for (auto counter : counter_list)
+            counter->write(wb);
+        for (auto alpha : alpha_map)
+            writeVarUInt(alpha, wb);
+    }
+
+    void read(ReadBuffer & rb)
+    {
+        destroyElements();
+        size_t count = 0;
+        readVarUInt(count, rb);
+
+        for (size_t i = 0; i < count; ++i)
+        {
+            auto counter = new Counter();
+            counter->read(rb);
+            counter->hash = counter_map.hash(counter->key);
+            push(counter);
+        }
+
+        for (size_t i = 0; i < nextAlphaSize(m_capacity); ++i)
+        {
+            UInt64 alpha = 0;
+            readVarUInt(alpha, rb);
+            alpha_map.push_back(alpha);
+        }
+    }
+
+protected:
+    void push(Counter * counter)
+    {
+        counter->slot = counter_list.size();
+        counter_list.push_back(counter);
+        counter_map[counter->key] = counter;
+        percolate(counter);
+    }
+
+    // This is equivallent to one step of bubble sort
+    void percolate(Counter * counter)
+    {
+        while (counter->slot > 0)
+        {
+            auto next = counter_list[counter->slot - 1];
+            if (*counter > *next)
+            {
+                std::swap(next->slot, counter->slot);
+                std::swap(counter_list[next->slot], counter_list[counter->slot]);
+            }
+            else
+                break;
+        }
+    }
+
+private:
+    void destroyElements()
+    {
+        for (auto counter : counter_list)
+            delete counter;
+
+        counter_map.clear();
+        counter_list.clear();
+        alpha_map.clear();
+    }
+
+    HashMap<HashKey, Counter *, Hash, Grower, Allocator> counter_map;
+    std::vector<Counter *> counter_list;
+    std::vector<UInt64> alpha_map;
+    size_t m_capacity;
+};
+
+};
--- a/dbms/src/Common/StackTrace.h
+++ b/dbms/src/Common/StackTrace.h
@ -6,14 +6,14 @@
 #define STACK_TRACE_MAX_DEPTH 32


-/// Позволяет получить стек-трейс
+/// Lets you get a stacktrace
 class StackTrace
 {
 public:
-    /// Стектрейс снимается в момент создания объекта
+    /// The stacktrace is captured when the object is created
    StackTrace();

-    /// Вывести в строку
+    /// Print to string
    std::string toString() const;

 private:
--- a/dbms/src/Common/StringSearcher.h
+++ b/dbms/src/Common/StringSearcher.h
@ -19,15 +19,14 @@
 namespace DB
 {

-
 namespace ErrorCodes
 {
    extern const int UNSUPPORTED_PARAMETER;
 }


-/** Варианты поиска подстроки в строке.
-  * В большинстве случаев, менее производительные, чем Volnitsky (см. Volnitsky.h).
+/** Variants for searching a substring in a string.
+  * In most cases, performance is less than Volnitsky (see Volnitsky.h).
  */


@ -37,7 +36,7 @@ struct StringSearcherBase
    static constexpr auto n = sizeof(__m128i);
    const int page_size = getpagesize();

-    bool page_safe(const void * const ptr) const
+    bool pageSafe(const void * const ptr) const
    {
        return ((page_size - 1) & reinterpret_cast<std::uintptr_t>(ptr)) <= page_size - n;
    }
@ -55,7 +54,7 @@ class StringSearcher<false, false> : private StringSearcherBase
 private:
    using UTF8SequenceBuffer = UInt8[6];

-    /// string to be searched for
+    /// substring to be searched for
    const UInt8 * const needle;
    const std::size_t needle_size;
    const UInt8 * const needle_end = needle + needle_size;
@ -135,8 +134,7 @@ public:
            if (!(dst_l_len == dst_u_len && dst_u_len == src_len))
                throw DB::Exception{
                    "UTF8 sequences with different lowercase and uppercase lengths are not supported",
-                    DB::ErrorCodes::UNSUPPORTED_PARAMETER
-                };
+                    DB::ErrorCodes::UNSUPPORTED_PARAMETER};

            cache_actual_len += src_len;
            if (cache_actual_len < n)
@ -165,7 +163,7 @@ public:
        static const Poco::UTF8Encoding utf8;

 #if __SSE4_1__
-        if (page_safe(pos))
+        if (pageSafe(pos))
        {
            const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos));
            const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel);
@ -230,7 +228,7 @@ public:
        while (haystack < haystack_end)
        {
 #if __SSE4_1__
-            if (haystack + n <= haystack_end && page_safe(haystack))
+            if (haystack + n <= haystack_end && pageSafe(haystack))
            {
                const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
                const auto v_against_l = _mm_cmpeq_epi8(v_haystack, patl);
@ -249,7 +247,7 @@ public:
                const auto offset = __builtin_ctz(mask);
                haystack += offset;

-                if (haystack < haystack_end && haystack + n <= haystack_end && page_safe(haystack))
+                if (haystack < haystack_end && haystack + n <= haystack_end && pageSafe(haystack))
                {
                    const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
                    const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel);
@ -377,7 +375,7 @@ public:
    bool compare(const UInt8 * pos) const
    {
 #if __SSE4_1__
-        if (page_safe(pos))
+        if (pageSafe(pos))
        {
            const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos));
            const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel);
@ -429,7 +427,7 @@ public:
        while (haystack < haystack_end)
        {
 #if __SSE4_1__
-            if (haystack + n <= haystack_end && page_safe(haystack))
+            if (haystack + n <= haystack_end && pageSafe(haystack))
            {
                const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
                const auto v_against_l = _mm_cmpeq_epi8(v_haystack, patl);
@ -447,7 +445,7 @@ public:
                const auto offset = __builtin_ctz(mask);
                haystack += offset;

-                if (haystack < haystack_end && haystack + n <= haystack_end && page_safe(haystack))
+                if (haystack < haystack_end && haystack + n <= haystack_end && pageSafe(haystack))
                {
                    const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
                    const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel);
@ -559,7 +557,7 @@ public:
    bool compare(const UInt8 * pos) const
    {
 #if __SSE4_1__
-        if (page_safe(pos))
+        if (pageSafe(pos))
        {
            const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos));
            const auto v_against_cache = _mm_cmpeq_epi8(v_haystack, cache);
@ -609,7 +607,7 @@ public:
        while (haystack < haystack_end)
        {
 #if __SSE4_1__
-            if (haystack + n <= haystack_end && page_safe(haystack))
+            if (haystack + n <= haystack_end && pageSafe(haystack))
            {
                /// find first character
                const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
@ -627,7 +625,7 @@ public:
                const auto offset = __builtin_ctz(mask);
                haystack += offset;

-                if (haystack < haystack_end && haystack + n <= haystack_end && page_safe(haystack))
+                if (haystack < haystack_end && haystack + n <= haystack_end && pageSafe(haystack))
                {
                    /// check for first 16 octets
                    const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
@ -693,10 +691,10 @@ using UTF8CaseSensitiveStringSearcher = StringSearcher<true, false>;
 using UTF8CaseInsensitiveStringSearcher = StringSearcher<false, false>;


-/** Используют функции из libc.
-  * Имеет смысл использовать для коротких строк, когда требуется дешёвая инициализация.
-  * Нет варианта для регистронезависимого поиска UTF-8 строк.
-  * Требуется, чтобы за концом строк был нулевой байт.
+/** Uses functions from libc.
+  * It makes sense to use only with short haystacks when cheap initialization is required.
+  * There is no option for case-insensitive search for UTF-8 strings.
+  * It is required that strings are zero-terminated.
  */

 struct LibCASCIICaseSensitiveStringSearcher
--- a/dbms/src/Common/Throttler.h
+++ b/dbms/src/Common/Throttler.h
@ -1,11 +1,13 @@
 #pragma once

+#include <time.h>   /// nanosleep
 #include <mutex>
 #include <memory>
 #include <Common/Stopwatch.h>
 #include <Common/Exception.h>
 #include <IO/WriteHelpers.h>

+
 namespace DB
 {

@ -15,12 +17,12 @@ namespace ErrorCodes
 }


-/** Позволяет ограничить скорость чего либо (в штуках в секунду) с помощью sleep.
-  * Особенности работы:
-  * - считается только средняя скорость, от момента первого вызова функции add;
-  *   если были периоды с низкой скоростью, то в течение промежутка времени после них, скорость будет выше;
+/** Allows you to limit the speed of something (in entities per second) using sleep.
+  * Specifics of work:
+  * - only the average speed is considered, from the moment of the first call of `add` function;
+  *   if there were periods with low speed, then during some time after them, the speed will be higher;
  *
-  * Также позволяет задать ограничение на максимальное количество в штуках. При превышении кидается исключение.
+  * Also allows you to set a limit on the maximum number of entities. If exceeded, an exception will be thrown.
  */
 class Throttler
 {
@ -56,7 +58,7 @@ public:

        if (max_speed)
        {
-            /// Сколько должно было бы пройти времени, если бы скорость была равна max_speed.
+            /// How much time to wait for the average speed to become `max_speed`.
            UInt64 desired_ns = new_count * 1000000000 / max_speed;

            if (desired_ns > elapsed_ns)
@ -65,7 +67,7 @@ public:
                timespec sleep_ts;
                sleep_ts.tv_sec = sleep_ns / 1000000000;
                sleep_ts.tv_nsec = sleep_ns % 1000000000;
-                nanosleep(&sleep_ts, nullptr);    /// NOTE Завершается раньше в случае сигнала. Это считается нормальным.
+                nanosleep(&sleep_ts, nullptr);    /// NOTE Returns early in case of a signal. This is considered normal.
            }
        }
    }
@ -73,7 +75,7 @@ public:
 private:
    size_t max_speed = 0;
    size_t count = 0;
-    size_t limit = 0;        /// 0 - не ограничено.
+    size_t limit = 0;        /// 0 - not limited.
    const char * limit_exceeded_exception_message = nullptr;
    Stopwatch watch {CLOCK_MONOTONIC_COARSE};
    std::mutex mutex;
--- a/dbms/src/Common/UInt128.h
+++ b/dbms/src/Common/UInt128.h
@ -4,12 +4,16 @@
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>

+#if __SSE4_2__
+#include <nmmintrin.h>
+#endif
+

 namespace DB
 {


-/// Для агрегации по SipHash или конкатенации нескольких полей.
+/// For aggregation by SipHash or concatenation of several fields.
 struct UInt128
 {
 /// Suppress gcc7 warnings: 'prev_key.DB::UInt128::first' may be used uninitialized in this function
@ -42,22 +46,22 @@ struct UInt128Hash
    }
 };

-#if defined(__x86_64__)
+#if __SSE4_2__

 struct UInt128HashCRC32
 {
    size_t operator()(UInt128 x) const
    {
        UInt64 crc = -1ULL;
-        asm("crc32q %[x], %[crc]\n" : [crc] "+r" (crc) : [x] "rm" (x.first));
-        asm("crc32q %[x], %[crc]\n" : [crc] "+r" (crc) : [x] "rm" (x.second));
+        crc = _mm_crc32_u64(crc, x.first);
+        crc = _mm_crc32_u64(crc, x.second);
        return crc;
    }
 };

 #else

-/// На других платформах используем не обязательно CRC32. NOTE Это может сбить с толку.
+/// On other platforms we do not use CRC32. NOTE This can be confusing.
 struct UInt128HashCRC32 : public UInt128Hash {};

 #endif
@ -71,7 +75,7 @@ inline void readBinary(UInt128 & x, ReadBuffer & buf) { readPODBinary(x, buf); }
 inline void writeBinary(const UInt128 & x, WriteBuffer & buf) { writePODBinary(x, buf); }


-/** Используется при агрегации, для укладки большого количества ключей постоянной длины в хэш-таблицу.
+/** Used for aggregation, for putting a large number of constant-length keys in a hash table.
  */
 struct UInt256
 {
@ -91,7 +95,7 @@ struct UInt256
    {
        return a == rhs.a && b == rhs.b && c == rhs.c && d == rhs.d;

-    /* Так получается не лучше.
+    /* So it's no better.
        return 0xFFFF == _mm_movemask_epi8(_mm_and_si128(
            _mm_cmpeq_epi8(
                _mm_loadu_si128(reinterpret_cast<const __m128i *>(&a)),
@ -122,30 +126,30 @@ struct UInt256Hash
    }
 };

-#if defined(__x86_64__)
+#if __SSE4_2__

 struct UInt256HashCRC32
 {
    size_t operator()(UInt256 x) const
    {
        UInt64 crc = -1ULL;
-        asm("crc32q %[x], %[crc]\n" : [crc] "+r" (crc) : [x] "rm" (x.a));
-        asm("crc32q %[x], %[crc]\n" : [crc] "+r" (crc) : [x] "rm" (x.b));
-        asm("crc32q %[x], %[crc]\n" : [crc] "+r" (crc) : [x] "rm" (x.c));
-        asm("crc32q %[x], %[crc]\n" : [crc] "+r" (crc) : [x] "rm" (x.d));
+        crc = _mm_crc32_u64(crc, x.a);
+        crc = _mm_crc32_u64(crc, x.b);
+        crc = _mm_crc32_u64(crc, x.c);
+        crc = _mm_crc32_u64(crc, x.d);
        return crc;
    }
 };

 #else

-/// На других платформах используем не обязательно CRC32. NOTE Это может сбить с толку.
+/// We do not need to use CRC32 on other platforms. NOTE This can be confusing.
 struct UInt256HashCRC32
 {
    DefaultHash<UInt64> hash64;
    size_t operator()(UInt256 x) const
    {
-        /// TODO Это не оптимально.
+        /// TODO This is not optimal.
        return hash64(hash64(hash64(hash64(x.a) ^ x.b) ^ x.c) ^ x.d);
    }
 };
--- a/dbms/src/Common/UnicodeBar.h
+++ b/dbms/src/Common/UnicodeBar.h
@ -8,7 +8,7 @@
 #define UNICODE_BAR_CHAR_SIZE (strlen("█"))


-/** Позволяет нарисовать unicode-art полоску, ширина которой отображается с разрешением 1/8 символа.
+/** Allows you to draw a unicode-art bar whose width is displayed with a resolution of 1/8 character.
  */


@ -32,7 +32,7 @@ namespace UnicodeBar
        return ceil(width - 1.0 / 8) * UNICODE_BAR_CHAR_SIZE;
    }

-    /// В dst должно быть место для barWidthInBytes(width) символов и завершающего нуля.
+    /// In `dst` there must be a space for barWidthInBytes(width) characters and a trailing zero.
    inline void render(double width, char * dst)
    {
        size_t floor_width = floor(width);
--- a/dbms/src/Common/VirtualColumnUtils.h
+++ b/dbms/src/Common/VirtualColumnUtils.h
@ -16,23 +16,23 @@ class Context;
 namespace VirtualColumnUtils
 {

-/// Вычислить минимальный числовый суффикс, который надо добавить к строке, чтобы она не присутствовала в множестве
+/// Calculate the minimum numeric suffix to add to the string so that it is not present in the set
 String chooseSuffix(const NamesAndTypesList & columns, const String & name);

-/// Вычислить минимальный общий числовый суффикс, который надо добавить к каждой строке,
-/// чтобы ни одна не присутствовала в множестве.
+/// Calculate the minimum total numeric suffix to add to each string,
+/// so that none is present in the set.
 String chooseSuffixForSet(const NamesAndTypesList & columns, const std::vector<String> & names);

-/// Добавляет в селект запрос секцию select column_name as value
-/// Например select _port as 9000.
+/// Adds to the select query section `select column_name as value`
+/// For example select _port as 9000.
 void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & value);

-/// Оставить в блоке только строки, подходящие под секции WHERE и PREWHERE запроса.
-/// Рассматриваются только элементы внешней конъюнкции, зависящие только от столбцов, присутствующих в блоке.
-/// Возвращает true, если хоть одна строка выброшена.
+/// Leave in the block only the rows that fit under the WHERE clause and the PREWHERE clause of the query.
+/// Only elements of the outer conjunction are considered, depending only on the columns present in the block.
+/// Returns true if at least one row is discarded.
 bool filterBlockWithQuery(ASTPtr query, Block & block, const Context & context);

-/// Извлечь из входного потока множество значений столбца name
+/// Extract from the input stream a set of `name` column values
 template<typename T1>
 std::multiset<T1> extractSingleValueFromBlock(const Block & block, const String & name)
 {
--- a/dbms/src/Common/Volnitsky.h
+++ b/dbms/src/Common/Volnitsky.h
@ -2,6 +2,7 @@

 #include <Common/StringSearcher.h>
 #include <Common/StringUtils.h>
+#include <Core/Types.h>
 #include <Poco/UTF8Encoding.h>
 #include <Poco/Unicode.h>
 #include <ext/range.hpp>
@ -9,24 +10,24 @@
 #include <string.h>


-/** Поиск подстроки в строке по алгоритму Вольницкого:
+/** Search for a substring in a string by Volnitsky's algorithm
  * http://volnitsky.com/project/str_search/
  *
-  * haystack и needle могут содержать нулевые байты.
+  * `haystack` and `needle` can contain zero bytes.
  *
-  * Алгоритм:
-  * - при слишком маленьком или слишком большом размере needle, или слишком маленьком haystack, используем std::search или memchr;
-  * - при инициализации, заполняем open-addressing linear probing хэш-таблицу вида:
-  *    хэш от биграммы из needle -> позиция этой биграммы в needle + 1.
-  *    (прибавлена единица только чтобы отличить смещение ноль от пустой ячейки)
-  * - в хэш-таблице ключи не хранятся, хранятся только значения;
-  * - биграммы могут быть вставлены несколько раз, если они встречаются в needle несколько раз;
-  * - при поиске, берём из haystack биграмму, которая должна соответствовать последней биграмме needle (сравниваем с конца);
-  * - ищем её в хэш-таблице, если нашли - достаём смещение из хэш-таблицы и сравниваем строку побайтово;
-  * - если сравнить не получилось - проверяем следующую ячейку хэш-таблицы из цепочки разрешения коллизий;
-  * - если не нашли, пропускаем в haystack почти размер needle байт;
+  * Algorithm:
+  * - if the `needle` is too small or too large, or too small `haystack`, use std::search or memchr;
+  * - when initializing, fill in an open-addressing linear probing hash table of the form
+  *    hash from the bigram of needle -> the position of this bigram in needle + 1.
+  *    (one is added only to distinguish zero offset from an empty cell)
+  * - the keys are not stored in the hash table, only the values are stored;
+  * - bigrams can be inserted several times if they occur in the needle several times;
+  * - when searching, take from haystack bigram, which should correspond to the last bigram of needle (comparing from the end);
+  * - look for it in the hash table, if found - get the offset from the hash table and compare the string bytewise;
+  * - if it did not match, we check the next cell of the hash table from the collision resolution chain;
+  * - if not found, skip to haystack almost the size of the needle bytes;
  *
-  * Используется невыровненный доступ к памяти.
+  * Unaligned memory access is used.
  */


@ -39,34 +40,35 @@ template <typename CRTP>
 class VolnitskyBase
 {
 protected:
-    using offset_t = uint8_t;    /// Смещение в needle. Для основного алгоритма, длина needle не должна быть больше 255.
-    using ngram_t = uint16_t;    /// n-грамма (2 байта).
+    using Offset = UInt8;    /// Offset in the needle. For the basic algorithm, the length of the needle must not be greater than 255.
+    using Ngram = UInt16;    /// n-gram (2 bytes).

    const UInt8 * const needle;
    const size_t needle_size;
    const UInt8 * const needle_end = needle + needle_size;
-    /// На сколько двигаемся, если n-грамма из haystack не нашлась в хэш-таблице.
-    const size_t step = needle_size - sizeof(ngram_t) + 1;
+    /// For how long we move, if the n-gram from haystack is not found in the hash table.
+    const size_t step = needle_size - sizeof(Ngram) + 1;

    /** max needle length is 255, max distinct ngrams for case-sensitive is (255 - 1), case-insensitive is 4 * (255 - 1)
-     *    storage of 64K ngrams (n = 2, 128 KB) should be large enough for both cases */
-    static const size_t hash_size = 64 * 1024;    /// Помещается в L2-кэш.
-    offset_t hash[hash_size];    /// Хэш-таблица.
+      *  storage of 64K ngrams (n = 2, 128 KB) should be large enough for both cases */
+    static const size_t hash_size = 64 * 1024;    /// Fits into the L2 cache (of common Intel CPUs).
+    Offset hash[hash_size];    /// Hash table.

    /// min haystack size to use main algorithm instead of fallback
    static constexpr auto min_haystack_size_for_algorithm = 20000;
-    const bool fallback;                /// Нужно ли использовать fallback алгоритм.
+    const bool fallback; /// Do we need to use the fallback algorithm.

 public:
-    /** haystack_size_hint - ожидаемый суммарный размер haystack при вызовах search. Можно не указывать.
-      * Если указать его достаточно маленьким, то будет использован fallback алгоритм,
-      *  так как считается, что тратить время на инициализацию хэш-таблицы не имеет смысла.
+    /** haystack_size_hint - the expected total size of the haystack for `search` calls. Optional (zero means unspecified).
+      * If you specify it small enough, the fallback algorithm will be used,
+      *  since it is considered that it's useless to waste time initializing the hash table.
      */
    VolnitskyBase(const char * const needle, const size_t needle_size, size_t haystack_size_hint = 0)
    : needle{reinterpret_cast<const UInt8 *>(needle)}, needle_size{needle_size},
      fallback{
-          needle_size < 2 * sizeof(ngram_t) || needle_size >= std::numeric_limits<offset_t>::max() ||
-          (haystack_size_hint && haystack_size_hint < min_haystack_size_for_algorithm)}
+          needle_size < 2 * sizeof(Ngram)
+          || needle_size >= std::numeric_limits<Offset>::max()
+          || (haystack_size_hint && haystack_size_hint < min_haystack_size_for_algorithm)}
    {
        if (fallback)
            return;
@ -74,12 +76,12 @@ public:
        memset(hash, 0, sizeof(hash));

        /// int is used here because unsigned can't be used with condition like `i >= 0`, unsigned always >= 0
-        for (auto i = static_cast<int>(needle_size - sizeof(ngram_t)); i >= 0; --i)
+        for (auto i = static_cast<int>(needle_size - sizeof(Ngram)); i >= 0; --i)
            self().putNGram(this->needle + i, i + 1, this->needle);
    }


-    /// Если не найдено - возвращается конец haystack.
+    /// If not found, the end of the haystack is returned.
    const UInt8 * search(const UInt8 * const haystack, const size_t haystack_size) const
    {
        if (needle_size == 0)
@ -90,15 +92,15 @@ public:
        if (needle_size == 1 || fallback || haystack_size <= needle_size)
            return self().search_fallback(haystack, haystack_end);

-        /// Будем "прикладывать" needle к haystack и сравнивать n-грам из конца needle.
-        const auto * pos = haystack + needle_size - sizeof(ngram_t);
+        /// Let's "apply" the needle to the haystack and compare the n-gram from the end of the needle.
+        const auto * pos = haystack + needle_size - sizeof(Ngram);
        for (; pos <= haystack_end - needle_size; pos += step)
        {
-            /// Смотрим все ячейки хэш-таблицы, которые могут соответствовать n-граму из haystack.
+            /// We look at all the cells of the hash table that can correspond to the n-gram from haystack.
            for (size_t cell_num = toNGram(pos) % hash_size; hash[cell_num];
                 cell_num = (cell_num + 1) % hash_size)
            {
-                /// Когда нашли - сравниваем побайтово, используя смещение из хэш-таблицы.
+                /// When found - compare bytewise, using the offset from the hash table.
                const auto res = pos - (hash[cell_num] - 1);

                if (self().compare(res))
@ -106,7 +108,7 @@ public:
            }
        }

-        /// Оставшийся хвостик.
+        /// The remaining tail.
        return self().search_fallback(pos - step + 1, haystack_end);
    }

@ -119,18 +121,18 @@ protected:
    CRTP & self() { return static_cast<CRTP &>(*this); }
    const CRTP & self() const { return const_cast<VolnitskyBase *>(this)->self(); }

-    static const ngram_t & toNGram(const UInt8 * const pos)
+    static const Ngram & toNGram(const UInt8 * const pos)
    {
-        return *reinterpret_cast<const ngram_t *>(pos);
+        return *reinterpret_cast<const Ngram *>(pos);
    }

-    void putNGramBase(const ngram_t ngram, const int offset)
+    void putNGramBase(const Ngram ngram, const int offset)
    {
-        /// Кладём смещение для n-грама в соответствующую ему ячейку или ближайшую свободную.
+        /// Put the offset for the n-gram in the corresponding cell or the nearest free cell.
        size_t cell_num = ngram % hash_size;

        while (hash[cell_num])
-            cell_num = (cell_num + 1) % hash_size; /// Поиск следующей свободной ячейки.
+            cell_num = (cell_num + 1) % hash_size; /// Search for the next free cell.

        hash[cell_num] = offset;
    }
@ -145,7 +147,7 @@ protected:

        union
        {
-            ngram_t n;
+            Ngram n;
            Chars chars;
        };

@ -260,7 +262,7 @@ template <> struct VolnitskyImpl<false, false> : VolnitskyBase<VolnitskyImpl<fal

        union
        {
-            ngram_t n;
+            Ngram n;
            Chars chars;
        };

@ -272,15 +274,17 @@ template <> struct VolnitskyImpl<false, false> : VolnitskyBase<VolnitskyImpl<fal
        }
        else
        {
-            /** n-грам (в случае n = 2)
-              *  может быть целиком расположен внутри одной кодовой точки,
-              *  либо пересекаться с двумя кодовыми точками.
+            /** n-gram (in the case of n = 2)
+              *  can be entirely located within one code point,
+              *  or intersect with two code points.
              *
-              * В первом случае, нужно рассматривать до двух альтернатив - эта кодовая точка в верхнем и нижнем регистре,
-              *  а во втором случае - до четырёх альтернатив - фрагменты двух кодовых точек во всех комбинациях регистров.
+              * In the first case, you need to consider up to two alternatives - this code point in upper and lower case,
+              *  and in the second case - up to four alternatives - fragments of two code points in all combinations of cases.
              *
-              * При этом не учитывается зависимость перевода между регистрами от локали (пример - турецкие Ii)
-              *  а также композиция/декомпозиция и другие особенности.
+              * It does not take into account the dependence of the case-transformation from the locale (for example - Turkish `Ii`)
+              *  as well as composition / decomposition and other features.
+              *
+              * It also does not work if characters with lower and upper cases are represented by different number of bytes or code points.
              */

            using Seq = UInt8[6];
@ -302,12 +306,12 @@ template <> struct VolnitskyImpl<false, false> : VolnitskyBase<VolnitskyImpl<fal
                    putNGramBase(n, offset);
                else
                {
-                    /// where is the given ngram in respect to UTF-8 sequence start?
+                    /// where is the given ngram in respect to the start of UTF-8 sequence?
                    const auto seq_ngram_offset = pos - seq_pos;

                    Seq seq;

-                    /// put ngram from lowercase
+                    /// put ngram for lowercase
                    utf8.convert(l_u32, seq, sizeof(seq));
                    chars.c0 = seq[seq_ngram_offset];
                    chars.c1 = seq[seq_ngram_offset + 1];
@ -326,7 +330,7 @@ template <> struct VolnitskyImpl<false, false> : VolnitskyBase<VolnitskyImpl<fal
                /// first sequence may start before u_pos if it is not ASCII
                auto first_seq_pos = pos;
                UTF8::syncBackward(first_seq_pos, begin);
-                /// where is the given ngram in respect to the first UTF-8 sequence start?
+                /// where is the given ngram in respect to the start of first UTF-8 sequence?
                const auto seq_ngram_offset = pos - first_seq_pos;

                const auto first_u32 = utf8.convert(first_seq_pos);
--- a/dbms/src/Common/formatReadable.h
+++ b/dbms/src/Common/formatReadable.h
@ -4,14 +4,14 @@
 #include <IO/WriteBuffer.h>


-/// Выводит переданный размер в байтах в виде 123.45 GiB.
+/// Displays the passed size in bytes as 123.45 GiB.
 void formatReadableSizeWithBinarySuffix(double value, DB::WriteBuffer & out, int precision = 2);
 std::string formatReadableSizeWithBinarySuffix(double value, int precision = 2);

-/// Выводит переданный размер в байтах в виде 132.55 GB.
+/// Displays the passed size in bytes as 132.55 GB.
 void formatReadableSizeWithDecimalSuffix(double value, DB::WriteBuffer & out, int precision = 2);
 std::string formatReadableSizeWithDecimalSuffix(double value, int precision = 2);

-/// Выводит число в виде 123.45 billion.
+/// Prints the number as 123.45 billion.
 void formatReadableQuantity(double value, DB::WriteBuffer & out, int precision = 2);
 std::string formatReadableQuantity(double value, int precision = 2);
--- a/dbms/src/Common/getFQDNOrHostName.h
+++ b/dbms/src/Common/getFQDNOrHostName.h
@ -2,7 +2,7 @@

 #include <string>

-/** Получить FQDN для локального сервера путём DNS-резолвинга hostname - аналогично вызову утилиты hostname с флагом -f.
-  * Если не получилось отрезолвить, то вернуть hostname - аналогично вызову утилиты hostname без флагов или uname -n.
+/** Get the FQDN for the local server by resolving DNS hostname - similar to calling the 'hostname' tool with the -f flag.
+  * If it does not work, return hostname - similar to calling 'hostname' without flags or 'uname -n'.
  */
 const std::string & getFQDNOrHostName();
--- a/dbms/src/Common/iostream_debug_helpers.cpp
+++ b/dbms/src/Common/iostream_debug_helpers.cpp
@ -39,10 +39,7 @@ std::ostream & operator<<(std::ostream & stream, const DB::IDataType & what)
 std::ostream & operator<<(std::ostream & stream, const DB::IStorage & what)
 {
    stream << "IStorage(name = " << what.getName() << ", tableName = " << what.getTableName() << ") {"
-// TODO: uncomment #if and fix me:
-#if !defined(__APPLE__)
-           << what.getColumnsList()
-#endif
+           << what.getColumnsList().toString()
           << "}";
    // isRemote supportsSampling supportsFinal supportsPrewhere supportsParallelReplicas
    return stream;
@ -64,10 +61,7 @@ std::ostream & operator<<(std::ostream & stream, const DB::IFunction & what)
 std::ostream & operator<<(std::ostream & stream, const DB::Block & what)
 {
    stream << "Block("
-// TODO: uncomment #if and fix me:
-#if !defined(__APPLE__)
-           << "data = " << what.getColumns()
-#endif
+           << "size = " << what.getColumns().size()
           << ")";
    return stream;
 }
--- a/dbms/src/Common/isLocalAddress.h
+++ b/dbms/src/Common/isLocalAddress.h
@ -12,13 +12,13 @@ namespace Poco
 namespace DB
 {

-    /** Позволяет проверить, похож ли адрес на localhost.
-     * Цель этой проверки обычно состоит в том, чтобы сделать предположение,
-     *  что при хождении на этот адрес через интернет, мы попадём на себя.
-     * Следует иметь ввиду, что эта проверка делается неточно:
-     * - адрес просто сравнивается с адресами сетевых интерфейсов;
-     * - для каждого сетевого интерфейса берётся только первый адрес;
-     * - не проверяются правила маршрутизации, которые влияют, через какой сетевой интерфейс мы пойдём на заданный адрес.
+    /** Lets you check if the address is similar to `localhost`.
+     * The purpose of this check is usually to make an assumption,
+     *  that when we go to this address via the Internet, we'll get to ourselves.
+     * Please note that this check is not accurate:
+     * - the address is simply compared to the addresses of the network interfaces;
+     * - only the first address is taken for each network interface;
+     * - the routing rules that affect which network interface we go to the specified address are not checked.
     */
    bool isLocalAddress(const Poco::Net::SocketAddress & address);

--- a/dbms/src/Common/localBackup.h
+++ b/dbms/src/Common/localBackup.h
@ -3,14 +3,14 @@
 #include <Poco/Path.h>


-/** Создаёт локальный (в той же точке монтирования) бэкап (снэпшот) директории.
+/** Creates a local (at the same mount point) backup (snapshot) directory.
  *
-  * В указанной destination-директории создаёт hard link-и на все файлы source-директории
-  *  и во всех вложенных директориях, с сохранением (созданием) всех относительных путей;
-  *  а также делает chown, снимая разрешение на запись.
+  * In the specified destination directory, it creates a hard links on all source-directory files
+  *  and in all nested directories, with saving (creating) all relative paths;
+  *  and also `chown`, removing the write permission.
  *
-  * Это защищает данные от случайного удаления или модификации,
-  *  и предназначено для использования как простое средство защиты от человеческой или программной ошибки,
-  *  но не от аппаратного сбоя.
+  * This protects data from accidental deletion or modification,
+  *  and is intended to be used as a simple means of protection against a human or program error,
+  *  but not from a hardware failure.
  */
 void localBackup(Poco::Path source_path, Poco::Path destination_path);
--- a/dbms/src/Common/setThreadName.h
+++ b/dbms/src/Common/setThreadName.h
@ -1,7 +1,7 @@
 #pragma once

-/** Устанавливает имя потока (максимальная длина - 15 байт),
-  *  которое будет видно в ps, gdb, /proc,
-  *  для удобства наблюдений и отладки.
+/** Sets the thread name (maximum length is 15 bytes),
+  *  which will be visible in ps, gdb, /proc,
+  *  for convenience of observation and debugging.
  */
 void setThreadName(const char * name);
--- a/dbms/src/Common/tests/CMakeLists.txt
+++ b/dbms/src/Common/tests/CMakeLists.txt
@ -54,3 +54,6 @@ target_link_libraries (thread_pool dbms)

 add_executable (array_cache array_cache.cpp)
 target_link_libraries (array_cache dbms)
+
+add_executable (space_saving space_saving.cpp)
+target_link_libraries (space_saving dbms)
--- a/dbms/src/Common/tests/space_saving.cpp
+++ b/dbms/src/Common/tests/space_saving.cpp
@ -0,0 +1,101 @@
+#include <iostream>
+#include <iomanip>
+#include <string>
+#include <map>
+
+#include <Core/StringRef.h>
+#include <Common/SpaceSaving.h>
+
+int main(int argc, char ** argv)
+{
+    {
+        using Cont = DB::SpaceSaving<int>;
+        Cont first(10);
+
+        /* Test biased insertion */
+
+        for (int i = 0; i < 200; ++i) {
+            first.insert(i);
+            int k = i % 5; // Bias towards 0-4
+            first.insert(k);
+        }
+
+        /* Test whether the biased elements are retained */
+
+        std::map<int, UInt64> expect;
+        for (int i = 0; i < 5; ++i) {
+            expect[i] = 41;   
+        }
+
+        for (auto x : first.topK(5)) {
+            if (expect[x.key] != x.count) {
+                std::cerr << "key: " << x.key << " value: " << x.count << " expected: " << expect[x.key] << std::endl;
+            } else {
+                std::cout << "key: " << x.key << " value: " << x.count << std::endl;
+            }
+            expect.erase(x.key);
+        }
+
+        if (!expect.empty()) {
+            std::cerr << "expected to find all heavy hitters" << std::endl;
+        }
+
+        /* Create another table and test merging */
+
+        Cont second(10);
+        for (int i = 0; i < 200; ++i) {
+            first.insert(i);
+        }
+
+        for (int i = 0; i < 5; ++i) {
+            expect[i] = 42;   
+        }
+
+        first.merge(second);
+
+        for (auto x : first.topK(5)) {
+            if (expect[x.key] != x.count) {
+                std::cerr << "key: " << x.key << " value: " << x.count << " expected: " << expect[x.key] << std::endl;
+            } else {
+                std::cout << "key: " << x.key << " value: " << x.count << std::endl;
+            }
+            expect.erase(x.key);
+        }
+    }
+
+    {
+        /* Same test for string keys */
+
+        using Cont = DB::SpaceSaving<std::string, StringRef, StringRefHash>;
+        Cont cont(10);
+
+        for (int i = 0; i < 400; ++i) {
+            cont.insert(std::to_string(i));
+            cont.insert(std::to_string(i % 5)); // Bias towards 0-4
+        }
+
+        // The hashing is going to be more lossy
+        // Expect at least ~ 10% count
+        std::map<std::string, UInt64> expect;
+        for (int i = 0; i < 5; ++i) {
+            expect[std::to_string(i)] = 38;
+        }
+
+        for (auto x : cont.topK(5)) {
+            auto key = x.key;
+            if (x.count < expect[key]) {
+                std::cerr << "key: " << key << " value: " << x.count << " expected: " << expect[key] << std::endl;
+            } else {
+                std::cout << "key: " << key << " value: " << x.count << std::endl;
+            }
+            expect.erase(key);
+        }
+
+        if (!expect.empty()) {
+            std::cerr << "expected to find all heavy hitters" << std::endl;
+            abort();
+        }
+    }
+
+    return 0;
+}
--- a/dbms/src/Common/typeid_cast.h
+++ b/dbms/src/Common/typeid_cast.h
@ -16,9 +16,9 @@ namespace DB
 }


-/** Проверяет совпадение типа путём сравнения typeid-ов.
-  * Проверяется точное совпадение типа. То есть, cast в предка будет неуспешным.
-  * В остальном, ведёт себя как dynamic_cast.
+/** Checks type by comparing typeid.
+  * The exact match of the type is checked. That is, cast in the ancestor will be unsuccessful.
+  * In the rest, behaves like a dynamic_cast.
  */
 template <typename To, typename From>
 typename std::enable_if<std::is_reference<To>::value, To>::type typeid_cast(From & from)
--- a/Show More
+++ b/Show More