Merge remote-tracking branch 'origin/master' into add_ttl_option_for_syslog

2024-11-21 15:12:02 +00:00 · 2020-11-27 10:02:57 +08:00 · 2020-11-27 10:02:57 +08:00 · 17e83cbb8d
commit 17e83cbb8d
parent 4cc6594cb8 7f75062d38
136 changed files with 5600 additions and 3444 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -475,9 +475,6 @@ find_contrib_lib(cityhash)

 find_contrib_lib(farmhash)

-set (USE_INTERNAL_BTRIE_LIBRARY ON CACHE INTERNAL "")
-find_contrib_lib(btrie)
-
 if (ENABLE_TESTS)
    include (cmake/find/gtest.cmake)
 endif ()
--- a/cmake/Modules/Findbtrie.cmake
+++ b/cmake/Modules/Findbtrie.cmake
@ -1,44 +0,0 @@
-# - Try to find btrie headers and libraries.
-#
-# Usage of this module as follows:
-#
-#     find_package(btrie)
-#
-# Variables used by this module, they can change the default behaviour and need
-# to be set before calling find_package:
-#
-#  BTRIE_ROOT_DIR Set this variable to the root installation of
-#                    btrie if the module has problems finding
-#                    the proper installation path.
-#
-# Variables defined by this module:
-#
-#  BTRIE_FOUND             System has btrie libs/headers
-#  BTRIE_LIBRARIES         The btrie library/libraries
-#  BTRIE_INCLUDE_DIR       The location of btrie headers
-
-find_path(BTRIE_ROOT_DIR
-    NAMES include/btrie.h
-)
-
-find_library(BTRIE_LIBRARIES
-    NAMES btrie
-    PATHS ${BTRIE_ROOT_DIR}/lib ${BTRIE_LIBRARIES_PATHS}
-)
-
-find_path(BTRIE_INCLUDE_DIR
-    NAMES btrie.h
-    PATHS ${BTRIE_ROOT_DIR}/include ${BTRIE_INCLUDE_PATHS}
-)
-
-include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(btrie DEFAULT_MSG
-    BTRIE_LIBRARIES
-    BTRIE_INCLUDE_DIR
-)
-
-mark_as_advanced(
-    BTRIE_ROOT_DIR
-    BTRIE_LIBRARIES
-    BTRIE_INCLUDE_DIR
-)
--- a/cmake/find/avro.cmake
+++ b/cmake/find/avro.cmake
@ -1,3 +1,4 @@
+# Needed when using Apache Avro serialization format
 option (ENABLE_AVRO "Enable Avro" ${ENABLE_LIBRARIES})

 if (NOT ENABLE_AVRO)
--- a/cmake/find/ssl.cmake
+++ b/cmake/find/ssl.cmake
@ -1,3 +1,5 @@
+# Needed when securely connecting to an external server, e.g.
+# clickhouse-client --host ... --secure
 option(ENABLE_SSL "Enable ssl" ${ENABLE_LIBRARIES})

 if(NOT ENABLE_SSL)
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@ -66,10 +66,6 @@ if (USE_INTERNAL_FARMHASH_LIBRARY)
    add_subdirectory (libfarmhash)
 endif ()

-if (USE_INTERNAL_BTRIE_LIBRARY)
-    add_subdirectory (libbtrie)
-endif ()
-
 if (USE_INTERNAL_ZLIB_LIBRARY)
    set (ZLIB_ENABLE_TESTS 0 CACHE INTERNAL "")
    set (SKIP_INSTALL_ALL 1 CACHE INTERNAL "")
--- a/contrib/libbtrie/CMakeLists.txt
+++ b/contrib/libbtrie/CMakeLists.txt
@ -1,6 +0,0 @@
-add_library(btrie
-    src/btrie.c
-    include/btrie.h
-)
-
-target_include_directories (btrie SYSTEM PUBLIC include)
--- a/contrib/libbtrie/LICENSE
+++ b/contrib/libbtrie/LICENSE
@ -1,23 +0,0 @@
-Copyright (c) 2013, CobbLiu
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-  Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-
-  Redistributions in binary form must reproduce the above copyright notice, this
-  list of conditions and the following disclaimer in the documentation and/or
-  other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/contrib/libbtrie/include/btrie.h
+++ b/contrib/libbtrie/include/btrie.h
@ -1,160 +0,0 @@
-#pragma once
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-#include <stdlib.h>
-#include <stdint.h>
-
-/**
- * In btrie, each leaf means one bit in ip tree.
- * Left means 0, and right means 1.
- */
-
-#define BTRIE_NULL   (uintptr_t) -1
-
-#if !defined(BTRIE_MAX_PAGES)
-/// 54 ip per page. 8 bytes memory per page when empty
-#define BTRIE_MAX_PAGES    1024 * 2048  /// 128m ips , ~16mb ram when empty
-// #define BTRIE_MAX_PAGES    1024 * 65535 /// 4g ips (whole ipv4), ~512mb ram when empty
-#endif
-
-typedef struct btrie_node_s btrie_node_t;
-
-struct btrie_node_s {
-    btrie_node_t  *right;
-    btrie_node_t  *left;
-    btrie_node_t  *parent;
-    uintptr_t         value;
-};
-
-
-typedef struct btrie_s {
-    btrie_node_t  *root;
-
-    btrie_node_t  *free;    /* free list of btrie */
-    char             *start;
-    size_t            size;
-
-    /*
-     * memory pool.
-     * memory management(esp free) will be so easy by using this facility.
-     */
-    char             *pools[BTRIE_MAX_PAGES];
-    size_t            len;
-} btrie_t;
-
-
-/**
- * Create an empty btrie
- *
- * @Return:
- * An ip radix_tree created.
- * NULL if creation failed.
- */
-
-btrie_t *btrie_create();
-
-/**
- * Destroy the ip radix_tree
- *
- * @Return:
- * OK if deletion succeed.
- * ERROR if error occurs while deleting.
- */
-int btrie_destroy(btrie_t *tree);
-
-/**
- * Count the nodes in the radix tree.
- */
-size_t btrie_count(btrie_t *tree);
-
-/**
- * Return the allocated number of bytes.
- */
-size_t btrie_allocated(btrie_t *tree);
-
-
-/**
- * Add an ipv4 into btrie
- *
- * @Args:
- * key: ip address
- * mask: key's mask
- * value: value of this IP, may be NULL.
- *
- * @Return:
- * OK for success.
- * ERROR for failure.
- */
-int btrie_insert(btrie_t *tree, uint32_t key, uint32_t mask,
-    uintptr_t value);
-
-
-/**
- * Delete an ipv4 from btrie
- *
- * @Args:
- *
- * @Return:
- * OK for success.
- * ERROR for failure.
- */
-int btrie_delete(btrie_t *tree, uint32_t key, uint32_t mask);
-
-
-/**
- * Find an ipv4 from btrie
- *
-
- * @Args:
- *
- * @Return:
- * Value if succeed.
- * NULL if failed.
- */
-uintptr_t btrie_find(btrie_t *tree, uint32_t key);
-
-
-/**
- * Add an ipv6 into btrie
- *
- * @Args:
- * key: ip address
- * mask: key's mask
- * value: value of this IP, may be NULL.
- *
- * @Return:
- * OK for success.
- * ERROR for failure.
- */
-int btrie_insert_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask,
-          uintptr_t value);
-
-/**
- * Delete an ipv6 from btrie
- *
- * @Args:
- *
- * @Return:
- * OK for success.
- * ERROR for failure.
- */
-int btrie_delete_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask);
-
-/**
- * Find an ipv6 from btrie
- *
-
- * @Args:
- *
- * @Return:
- * Value if succeed.
- * NULL if failed.
- */
-uintptr_t btrie_find_a6(btrie_t *tree, const uint8_t *key);
-
-#if defined (__cplusplus)
-}
-#endif
--- a/contrib/libbtrie/src/btrie.c
+++ b/contrib/libbtrie/src/btrie.c
@ -1,460 +0,0 @@
-#include <stdlib.h>
-#include <string.h>
-#include <btrie.h>
-
-#define PAGE_SIZE 4096
-
-
-static btrie_node_t *
-btrie_alloc(btrie_t *tree)
-{
-    btrie_node_t  *p;
-
-    if (tree->free) {
-        p = tree->free;
-        tree->free = tree->free->right;
-        return p;
-    }
-
-    if (tree->size < sizeof(btrie_node_t)) {
-        tree->start = (char *) calloc(sizeof(char), PAGE_SIZE);
-        if (tree->start == NULL) {
-            return NULL;
-        }
-
-        tree->pools[tree->len++] = tree->start;
-        tree->size = PAGE_SIZE;
-    }
-
-    p = (btrie_node_t *) tree->start;
-
-    tree->start += sizeof(btrie_node_t);
-    tree->size -= sizeof(btrie_node_t);
-
-    return p;
-}
-
-
-btrie_t *
-btrie_create()
-{
-    btrie_t *tree = (btrie_t *) malloc(sizeof(btrie_t));
-    if (tree == NULL) {
-        return NULL;
-    }
-
-    tree->free  = NULL;
-    tree->start = NULL;
-    tree->size  = 0;
-    memset(tree->pools, 0, sizeof(btrie_t *) * BTRIE_MAX_PAGES);
-    tree->len = 0;
-
-    tree->root = btrie_alloc(tree);
-    if (tree->root == NULL) {
-        return NULL;
-    }
-
-    tree->root->right  = NULL;
-    tree->root->left   = NULL;
-    tree->root->parent = NULL;
-    tree->root->value  = BTRIE_NULL;
-
-    return tree;
-}
-
-static size_t
-subtree_weight(btrie_node_t *node)
-{
-    size_t weight = 1;
-    if (node->left) {
-        weight += subtree_weight(node->left);
-    }
-    if (node->right) {
-        weight += subtree_weight(node->right);
-    }
-    return weight;
-}
-
-size_t
-btrie_count(btrie_t *tree)
-{
-    if (tree->root == NULL) {
-        return 0;
-    }
-
-    return subtree_weight(tree->root);
-}
-
-size_t
-btrie_allocated(btrie_t *tree)
-{
-    return tree->len * PAGE_SIZE;
-}
-
-
-int
-btrie_insert(btrie_t *tree, uint32_t key, uint32_t mask,
-    uintptr_t value)
-{
-    uint32_t          bit;
-    btrie_node_t  *node, *next;
-
-    bit = 0x80000000;
-
-    node = tree->root;
-    next = tree->root;
-
-    while (bit & mask) {
-        if (key & bit) {
-            next = node->right;
-
-        } else {
-            next = node->left;
-        }
-
-        if (next == NULL) {
-            break;
-        }
-
-        bit >>= 1;
-        node = next;
-    }
-
-    if (next) {
-        if (node->value != BTRIE_NULL) {
-            return -1;
-        }
-
-        node->value = value;
-        return 0;
-    }
-
-    while (bit & mask) {
-        next = btrie_alloc(tree);
-        if (next == NULL) {
-            return -1;
-        }
-
-        next->right = NULL;
-        next->left = NULL;
-        next->parent = node;
-        next->value = BTRIE_NULL;
-
-        if (key & bit) {
-            node->right = next;
-
-        } else {
-            node->left = next;
-        }
-
-        bit >>= 1;
-        node = next;
-    }
-
-    node->value = value;
-
-    return 0;
-}
-
-
-int
-btrie_delete(btrie_t *tree, uint32_t key, uint32_t mask)
-{
-    uint32_t          bit;
-    btrie_node_t  *node;
-
-    bit = 0x80000000;
-    node = tree->root;
-
-    while (node && (bit & mask)) {
-        if (key & bit) {
-            node = node->right;
-
-        } else {
-            node = node->left;
-        }
-
-        bit >>= 1;
-    }
-
-    if (node == NULL) {
-        return -1;
-    }
-
-    if (node->right || node->left) {
-        if (node->value != BTRIE_NULL) {
-            node->value = BTRIE_NULL;
-            return 0;
-        }
-
-        return -1;
-    }
-
-    for ( ;; ) {
-        if (node->parent->right == node) {
-            node->parent->right = NULL;
-
-        } else {
-            node->parent->left = NULL;
-        }
-
-        node->right = tree->free;
-        tree->free = node;
-
-        node = node->parent;
-
-        if (node->right || node->left) {
-            break;
-        }
-
-        if (node->value != BTRIE_NULL) {
-            break;
-        }
-
-        if (node->parent == NULL) {
-            break;
-        }
-    }
-
-    return 0;
-}
-
-
-uintptr_t
-btrie_find(btrie_t *tree, uint32_t key)
-{
-    uint32_t          bit;
-    uintptr_t         value;
-    btrie_node_t  *node;
-
-    bit = 0x80000000;
-    value = BTRIE_NULL;
-    node = tree->root;
-
-    while (node) {
-        if (node->value != BTRIE_NULL) {
-            value = node->value;
-        }
-
-        if (key & bit) {
-            node = node->right;
-
-        } else {
-            node = node->left;
-        }
-
-        bit >>= 1;
-    }
-
-    return value;
-}
-
-
-int
-btrie_insert_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask,
-    uintptr_t value)
-{
-    uint8_t             bit;
-    unsigned int        i;
-    btrie_node_t  *node, *next;
-
-    i = 0;
-    bit = 0x80;
-
-    node = tree->root;
-    next = tree->root;
-
-    while (bit & mask[i]) {
-        if (key[i] & bit) {
-            next = node->right;
-
-        } else {
-            next = node->left;
-        }
-
-        if (next == NULL) {
-            break;
-        }
-
-        bit >>= 1;
-        node = next;
-
-        if (bit == 0) {
-            if (++i == 16) {
-                break;
-            }
-
-            bit = 0x80;
-        }
-    }
-
-    if (next) {
-        if (node->value != BTRIE_NULL) {
-            return -1;
-        }
-
-        node->value = value;
-        return 0;
-    }
-
-    while (bit & mask[i]) {
-        next = btrie_alloc(tree);
-        if (next == NULL) {
-            return -1;
-        }
-
-        next->right = NULL;
-        next->left = NULL;
-        next->parent = node;
-        next->value = BTRIE_NULL;
-
-        if (key[i] & bit) {
-            node->right = next;
-
-        } else {
-            node->left = next;
-        }
-
-        bit >>= 1;
-        node = next;
-
-        if (bit == 0) {
-            if (++i == 16) {
-                break;
-            }
-
-            bit = 0x80;
-        }
-    }
-
-    node->value = value;
-
-    return 0;
-}
-
-
-int
-btrie_delete_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask)
-{
-    uint8_t             bit;
-    unsigned int        i;
-    btrie_node_t  *node;
-
-    i = 0;
-    bit = 0x80;
-    node = tree->root;
-
-    while (node && (bit & mask[i])) {
-        if (key[i] & bit) {
-            node = node->right;
-
-        } else {
-            node = node->left;
-        }
-
-        bit >>= 1;
-
-        if (bit == 0) {
-            if (++i == 16) {
-                break;
-            }
-
-            bit = 0x80;
-        }
-    }
-
-    if (node == NULL) {
-        return -1;
-    }
-
-    if (node->right || node->left) {
-        if (node->value != BTRIE_NULL) {
-            node->value = BTRIE_NULL;
-            return 0;
-        }
-
-        return -1;
-    }
-
-    for ( ;; ) {
-        if (node->parent->right == node) {
-            node->parent->right = NULL;
-
-        } else {
-            node->parent->left = NULL;
-        }
-
-        node->right = tree->free;
-        tree->free = node;
-
-        node = node->parent;
-
-        if (node->right || node->left) {
-            break;
-        }
-
-        if (node->value != BTRIE_NULL) {
-            break;
-        }
-
-        if (node->parent == NULL) {
-            break;
-        }
-    }
-
-    return 0;
-}
-
-
-uintptr_t
-btrie_find_a6(btrie_t *tree, const uint8_t *key)
-{
-    uint8_t             bit;
-    uintptr_t          value;
-    unsigned int        i;
-    btrie_node_t  *node;
-
-    i = 0;
-    bit = 0x80;
-    value = BTRIE_NULL;
-    node = tree->root;
-
-    while (node) {
-        if (node->value != BTRIE_NULL) {
-            value = node->value;
-        }
-
-        if (key[i] & bit) {
-            node = node->right;
-
-        } else {
-            node = node->left;
-        }
-
-        bit >>= 1;
-
-        if (bit == 0) {
-            i++;
-            bit = 0x80;
-        }
-    }
-
-    return value;
-}
-
-
-int
-btrie_destroy(btrie_t *tree)
-{
-    size_t    i;
-
-
-    /* free memory pools */
-    for (i = 0; i < tree->len; i++) {
-        free(tree->pools[i]);
-    }
-
-    free(tree);
-
-    return 0;
-}
--- a/contrib/libbtrie/test/test_btrie.c
+++ b/contrib/libbtrie/test/test_btrie.c
@ -1,103 +0,0 @@
-#include <stdio.h>
-#include <btrie.h>
-
-int main()
-{
-    btrie_t *it;
-    int            ret;
-
-    uint8_t prefix_v6[16] = {0xde, 0xad, 0xbe, 0xef};
-    uint8_t mask_v6[16] = {0xff, 0xff, 0xff};
-    uint8_t ip_v6[16] = {0xde, 0xad, 0xbe, 0xef, 0xde};
-
-    it = btrie_create();
-    if (it == NULL) {
-        printf("create error!\n");
-        return 0;
-    }
-
-    //add 101.45.69.50/16
-    ret = btrie_insert(it, 1697465650, 0xffff0000, 1);
-    if (ret != 0) {
-        printf("insert 1 error.\n");
-        goto error;
-    }
-
-    //add 10.45.69.50/16
-    ret = btrie_insert(it, 170738994, 0xffff0000, 1);
-    if (ret != 0) {
-        printf("insert 2 error.\n");
-        goto error;
-    }
-
-    //add 10.45.79.50/16
-    ret = btrie_insert(it, 170741554, 0xffff0000, 1);
-    if (ret == 0) {
-        printf("insert 3 error.\n");
-        goto error;
-    }
-
-    //add 102.45.79.50/24
-    ret = btrie_insert(it, 1714245426, 0xffffff00, 1);
-    if (ret != 0) {
-        printf("insert 4 error.\n");
-        goto error;
-    }
-
-    ret = btrie_find(it, 170741554);
-    if (ret == 1) {
-        printf("test case 1 passed\n");
-    } else {
-        printf("test case 1 error\n");
-    }
-
-    ret = btrie_find(it, 170786817);
-    if (ret != 1) {
-        printf("test case 2 passed\n");
-    } else {
-        printf("test case 2 error\n");
-    }
-
-    ret = btrie_delete(it, 1714245426, 0xffffff00);
-    if (ret != 0) {
-        printf("delete 1 error\n");
-        goto error;
-    }
-    
-    ret = btrie_find(it, 1714245426);
-    if (ret != 1) {
-        printf("test case 3 passed\n");
-    } else {
-        printf("test case 3 error\n");
-    }
-
-    //add dead:beef::/32
-    ret = btrie_insert_a6(it, prefix_v6, mask_v6, 1);
-    if (ret != 0) {
-        printf("insert 5 error\n");
-        goto error;
-    }
-
-    ret = btrie_find_a6(it, ip_v6);
-    if (ret == 1) {
-        printf("test case 4 passed\n");
-    } else {
-        printf("test case 4 error\n");
-    }
-
-    // insert 4m ips
-    for (size_t ip = 1; ip < 1024 * 1024 * 4; ++ip) {
-        ret = btrie_insert(it, ip, 0xffffffff, 1);
-        if (ret != 0) {
-            printf("insert 5 error (%d) (%zu) .\n", ret, ip);
-            goto error;
-        }
-    }
-
-    return 0;
-    
- error:
-    btrie_destroy(it);
-    printf("test failed\n");
-    return 1;
-}
--- a/debian/clickhouse-server.init
+++ b/debian/clickhouse-server.init
@ -67,26 +67,6 @@ if uname -mpi | grep -q 'x86_64'; then
 fi


-is_running()
-{
-    pgrep --pidfile "$CLICKHOUSE_PIDFILE" $(echo "${PROGRAM}" | cut -c1-15) 1> /dev/null 2> /dev/null
-}
-
-
-wait_for_done()
-{
-    timeout=$1
-    attempts=0
-    while is_running; do
-        attempts=$(($attempts + 1))
-        if [ -n "$timeout" ] && [ $attempts -gt $timeout ]; then
-            return 1
-        fi
-        sleep 1
-    done
-}
-
-
 die()
 {
    echo $1 >&2
@ -105,49 +85,7 @@ check_config()

 initdb()
 {
-    if [ -x "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG" ]; then
-        CLICKHOUSE_DATADIR_FROM_CONFIG=$(su -s $SHELL ${CLICKHOUSE_USER} -c "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path")
-        if [ "(" "$?" -ne "0" ")" -o "(" -z "${CLICKHOUSE_DATADIR_FROM_CONFIG}" ")" ]; then
-            die "Cannot obtain value of path from config file: ${CLICKHOUSE_CONFIG}";
-        fi
-        echo "Path to data directory in ${CLICKHOUSE_CONFIG}: ${CLICKHOUSE_DATADIR_FROM_CONFIG}"
-    else
-        CLICKHOUSE_DATADIR_FROM_CONFIG=$CLICKHOUSE_DATADIR
-    fi
-
-    if ! getent passwd ${CLICKHOUSE_USER} >/dev/null; then
-        echo "Can't chown to non-existing user ${CLICKHOUSE_USER}"
-        return
-    fi
-    if ! getent group ${CLICKHOUSE_GROUP} >/dev/null; then
-        echo "Can't chown to non-existing group ${CLICKHOUSE_GROUP}"
-        return
-    fi
-
-    if ! $(su -s $SHELL ${CLICKHOUSE_USER} -c "test -r ${CLICKHOUSE_CONFIG}"); then
-        echo "Warning! clickhouse config [${CLICKHOUSE_CONFIG}] not readable by user [${CLICKHOUSE_USER}]"
-    fi
-
-    if ! $(su -s $SHELL ${CLICKHOUSE_USER} -c "test -O \"${CLICKHOUSE_DATADIR_FROM_CONFIG}\" && test -G \"${CLICKHOUSE_DATADIR_FROM_CONFIG}\""); then
-        if [ $(dirname "${CLICKHOUSE_DATADIR_FROM_CONFIG}") = "/" ]; then
-            echo "Directory ${CLICKHOUSE_DATADIR_FROM_CONFIG} seems too dangerous to chown."
-        else
-            if [ ! -e "${CLICKHOUSE_DATADIR_FROM_CONFIG}" ]; then
-                echo "Creating directory ${CLICKHOUSE_DATADIR_FROM_CONFIG}"
-                mkdir -p "${CLICKHOUSE_DATADIR_FROM_CONFIG}"
-            fi
-
-            echo "Changing owner of [${CLICKHOUSE_DATADIR_FROM_CONFIG}] to [${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP}]"
-            chown -R ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} "${CLICKHOUSE_DATADIR_FROM_CONFIG}"
-        fi
-    fi
-
-    if ! $(su -s $SHELL ${CLICKHOUSE_USER} -c "test -w ${CLICKHOUSE_LOGDIR}"); then
-        echo "Changing owner of [${CLICKHOUSE_LOGDIR}/*] to [${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP}]"
-        chown -R ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_LOGDIR}/*
-        echo "Changing owner of [${CLICKHOUSE_LOGDIR}] to [${CLICKHOUSE_LOGDIR_USER}:${CLICKHOUSE_GROUP}]"
-        chown ${CLICKHOUSE_LOGDIR_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_LOGDIR}
-    fi
+    ${CLICKHOUSE_GENERIC_PROGRAM} install --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
 }


@ -171,17 +109,7 @@ restart()

 forcestop()
 {
-    local EXIT_STATUS
-    EXIT_STATUS=0
-
-    echo -n "Stop forcefully $PROGRAM service: "
-
-    kill -KILL $(cat "$CLICKHOUSE_PIDFILE")
-
-    wait_for_done
-
-    echo "DONE"
-    return $EXIT_STATUS
+    ${CLICKHOUSE_GENERIC_PROGRAM} stop --force --pid-path "${CLICKHOUSE_PIDDIR}"
 }


@ -261,16 +189,16 @@ main()
        service_or_func restart
        ;;
    condstart)
-        is_running || service_or_func start
+        service_or_func start
        ;;
    condstop)
-        is_running && service_or_func stop
+        service_or_func stop
        ;;
    condrestart)
-        is_running && service_or_func restart
+        service_or_func restart
        ;;
    condreload)
-        is_running && service_or_func restart
+        service_or_func restart
        ;;
    initdb)
        initdb
@ -293,17 +221,7 @@ main()

 status()
 {
-    if is_running; then
-        echo "$PROGRAM service is running"
-        exit 0
-    else
-        if is_cron_disabled; then
-            echo "$PROGRAM service is stopped";
-        else
-            echo "$PROGRAM: process unexpectedly terminated"
-        fi
-        exit 3
-    fi
+    ${CLICKHOUSE_GENERIC_PROGRAM} status --pid-path "${CLICKHOUSE_PIDDIR}"
 }


--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@ -288,6 +288,7 @@ TESTS_TO_SKIP=(

    # Require python libraries like scipy, pandas and numpy
    01322_ttest_scipy
+    01561_mann_whitney_scipy

    01545_system_errors
    # Checks system.errors
--- a/docker/test/performance-comparison/perf.py
+++ b/docker/test/performance-comparison/perf.py
@ -415,4 +415,4 @@ if not args.keep_created_tables and not args.use_existing_tables:
            c.execute(q)
            print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')

-reportStageEnd('drop-2')
+    reportStageEnd('drop-2')
--- a/docs/_includes/cmake_in_clickhouse_header.md
+++ b/docs/_includes/cmake_in_clickhouse_header.md
@ -13,9 +13,9 @@ cmake .. \
    -DENABLE_CLICKHOUSE_SERVER=ON \
    -DENABLE_CLICKHOUSE_CLIENT=ON \
    -DUSE_STATIC_LIBRARIES=OFF \
-    -DCLICKHOUSE_SPLIT_BINARY=ON \
    -DSPLIT_SHARED_LIBRARIES=ON \
    -DENABLE_LIBRARIES=OFF \
+    -DUSE_UNWIND=ON \
    -DENABLE_UTILS=OFF \
    -DENABLE_TESTS=OFF
 ```
--- a/docs/en/development/contrib.md
+++ b/docs/en/development/contrib.md
@ -17,7 +17,6 @@ toc_title: Third-Party Libraries Used
 | googletest          | [BSD 3-Clause License](https://github.com/google/googletest/blob/master/LICENSE)                                                             |
 | h3                  | [Apache License 2.0](https://github.com/uber/h3/blob/master/LICENSE)                                                                         |
 | hyperscan           | [BSD 3-Clause License](https://github.com/intel/hyperscan/blob/master/LICENSE)                                                               |
-| libbtrie            | [BSD 2-Clause License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE)                                        |
 | libcxxabi           | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT)                          |
 | libdivide           | [Zlib License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt)                                           |
 | libgsasl            | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE)                             |
--- a/docs/en/sql-reference/aggregate-functions/reference/avg.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/avg.md
@ -4,4 +4,59 @@ toc_priority: 5

 # avg {#agg_function-avg}

-Calculates the average. Only works for numbers. The result is always Float64.
+Calculates the arithmetic mean.
+
+**Syntax**
+
+``` sql
+avgWeighted(x)
+```
+
+**Parameter**
+
+-   `x` — Values.
+
+`x` must be
+[Integer](../../../sql-reference/data-types/int-uint.md),
+[floating-point](../../../sql-reference/data-types/float.md), or 
+[Decimal](../../../sql-reference/data-types/decimal.md).
+
+**Returned value**
+
+- `NaN` if the supplied parameter is empty.
+- Mean otherwise.
+
+**Return type** is always [Float64](../../../sql-reference/data-types/float.md).
+
+**Example**
+
+Query:
+
+``` sql
+SELECT avg(x) FROM values('x Int8', 0, 1, 2, 3, 4, 5)
+```
+
+Result:
+
+``` text
+┌─avg(x)─┐
+│    2.5 │
+└────────┘
+```
+
+**Example**
+
+Query:
+
+``` sql
+CREATE table test (t UInt8) ENGINE = Memory;
+SELECT avg(t) FROM test
+```
+
+Result:
+
+``` text
+┌─avg(x)─┐
+│    nan │
+└────────┘
+```
--- a/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md
@ -14,17 +14,21 @@ avgWeighted(x, weight)

 **Parameters**

-   `x` — Values. [Integer](../../../sql-reference/data-types/int-uint.md) or [floating-point](../../../sql-reference/data-types/float.md).
-   `weight` — Weights of the values. [Integer](../../../sql-reference/data-types/int-uint.md) or [floating-point](../../../sql-reference/data-types/float.md).
+-   `x` — Values.
+-   `weight` — Weights of the values.

-Type of `x` and `weight` must be the same.
+`x` and `weight` must both be
+[Integer](../../../sql-reference/data-types/int-uint.md),
+[floating-point](../../../sql-reference/data-types/float.md), or 
+[Decimal](../../../sql-reference/data-types/decimal.md),
+but may have different types.

 **Returned value**

-   Weighted mean.
-   `NaN`. If all the weights are equal to 0.
+-   `NaN` if all the weights are equal to 0 or the supplied weights parameter is empty.
+-   Weighted mean otherwise.

-Type: [Float64](../../../sql-reference/data-types/float.md).
+**Return type** is always [Float64](../../../sql-reference/data-types/float.md).

 **Example**

@ -42,3 +46,54 @@ Result:
 │                      8 │
 └────────────────────────┘
 ```
+
+**Example**
+
+Query:
+
+``` sql
+SELECT avgWeighted(x, w)
+FROM values('x Int8, w Float64', (4, 1), (1, 0), (10, 2))
+```
+
+Result:
+
+``` text
+┌─avgWeighted(x, weight)─┐
+│                      8 │
+└────────────────────────┘
+```
+
+**Example**
+
+Query:
+
+``` sql
+SELECT avgWeighted(x, w)
+FROM values('x Int8, w Int8', (0, 0), (1, 0), (10, 0))
+```
+
+Result:
+
+``` text
+┌─avgWeighted(x, weight)─┐
+│                    nan │
+└────────────────────────┘
+```
+
+**Example**
+
+Query:
+
+``` sql
+CREATE table test (t UInt8) ENGINE = Memory;
+SELECT avgWeighted(t) FROM test
+```
+
+Result:
+
+``` text
+┌─avgWeighted(x, weight)─┐
+│                    nan │
+└────────────────────────┘
+```
--- a/docs/es/development/contrib.md
+++ b/docs/es/development/contrib.md
@ -19,7 +19,6 @@ toc_title: Bibliotecas de terceros utilizadas
 | Más información    | [Licencia de 3 cláusulas BSD](https://github.com/google/googletest/blob/master/LICENSE)                                                          |
 | H3                 | [Licencia Apache 2.0](https://github.com/uber/h3/blob/master/LICENSE)                                                                            |
 | hyperscan          | [Licencia de 3 cláusulas BSD](https://github.com/intel/hyperscan/blob/master/LICENSE)                                                            |
-| libbtrie           | [Licencia BSD de 2 cláusulas](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE)                                     |
 | libcxxabi          | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT)                              |
 | libdivide          | [Licencia Zlib](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt)                                              |
 | libgsasl           | [Información adicional](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE)                     |
--- a/docs/fa/development/contrib.md
+++ b/docs/fa/development/contrib.md
@ -21,7 +21,6 @@ toc_title: "\u06A9\u062A\u0627\u0628\u062E\u0627\u0646\u0647 \u0647\u0627\u06CC
 | googletest      | [لیسانس 3 بند](https://github.com/google/googletest/blob/master/LICENSE)                                                                    |
 | اچ 3            | [نمایی مجوز 2.0](https://github.com/uber/h3/blob/master/LICENSE)                                                                            |
 | hyperscan       | [لیسانس 3 بند](https://github.com/intel/hyperscan/blob/master/LICENSE)                                                                      |
-| لیبتری          | [لیسانس 2 بند](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE)                                               |
 | شکنجه نوجوان    | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT)                         |
 | لیبیدوید        | [مجوز زلب](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt)                                              |
 | نوشیدن شراب     | [الجی پی ال2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE)                        |
--- a/docs/fr/development/contrib.md
+++ b/docs/fr/development/contrib.md
@ -19,7 +19,6 @@ toc_title: "Biblioth\xE8ques Tierces Utilis\xE9es"
 | googletest           | [Licence BSD 3-Clause](https://github.com/google/googletest/blob/master/LICENSE)                                                               |
 | h3                   | [Licence Apache 2.0](https://github.com/uber/h3/blob/master/LICENSE)                                                                           |
 | hyperscan            | [Licence BSD 3-Clause](https://github.com/intel/hyperscan/blob/master/LICENSE)                                                                 |
-| libbtrie             | [Licence BSD 2-Clause](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE)                                          |
 | libcxxabi            | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT)                            |
 | libdivide            | [Licence Zlib](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt)                                             |
 | libgsasl             | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE)                               |
--- a/docs/ja/development/contrib.md
+++ b/docs/ja/development/contrib.md
@ -20,7 +20,6 @@ toc_title: "\u30B5\u30FC\u30C9\u30D1\u30FC\u30C6\u30A3\u88FD\u30E9\u30A4\u30D6\u
 | googletest         | [BSD3条項ライセンス](https://github.com/google/googletest/blob/master/LICENSE)                                                              |
 | h3                 | [Apacheライセンス2.0](https://github.com/uber/h3/blob/master/LICENSE)                                                                       |
 | hyperscan          | [BSD3条項ライセンス](https://github.com/intel/hyperscan/blob/master/LICENSE)                                                                |
-| libbtrie           | [BSD2条項ライセンス](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE)                                         |
 | libcxxabi          | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT)                         |
 | libdivide          | [Zlibライセンス](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt)                                        |
 | libgsasl           | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE)                            |
--- a/docs/ru/development/contrib.md
+++ b/docs/ru/development/contrib.md
@ -18,7 +18,6 @@ toc_title: "\u0418\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u043c\u044b\u
 | googletest          | [BSD 3-Clause License](https://github.com/google/googletest/blob/master/LICENSE)                                                             |
 | h3                  | [Apache License 2.0](https://github.com/uber/h3/blob/master/LICENSE)                                                                         |
 | hyperscan           | [BSD 3-Clause License](https://github.com/intel/hyperscan/blob/master/LICENSE)                                                               |
-| libbtrie            | [BSD 2-Clause License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE)                                        |
 | libcxxabi           | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT)                          |
 | libdivide           | [Zlib License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt)                                           |
 | libgsasl            | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE)                             |
--- a/docs/tr/development/contrib.md
+++ b/docs/tr/development/contrib.md
@ -19,7 +19,6 @@ toc_title: "Kullan\u0131lan \xDC\xE7\xFCnc\xFC Taraf K\xFCt\xFCphaneleri"
 | googletest          | [BSD 3-Clause Lisansı](https://github.com/google/googletest/blob/master/LICENSE)                                                                       |
 | h33                 | [Apache Lic 2.0ense 2.0](https://github.com/uber/h3/blob/master/LICENSE)                                                                               |
 | hyperscan           | [BSD 3-Clause Lisansı](https://github.com/intel/hyperscan/blob/master/LICENSE)                                                                         |
-| libbtrie            | [BSD 2-Clause Lisansı](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE)                                                  |
 | libcxxabi           | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT)                                    |
 | libdivide           | [Zlib Lisansı](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt)                                                     |
 | libgsasl            | [LGPL v2. 1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE)                                      |
--- a/docs/zh/development/contrib.md
+++ b/docs/zh/development/contrib.md
@ -11,7 +11,6 @@
 | FastMemcpy         | [MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libmemcpy/impl/LICENSE)                                             |
 | googletest         | [BSD3-条款许可](https://github.com/google/googletest/blob/master/LICENSE)                                                           |
 | 超扫描             | [BSD3-条款许可](https://github.com/intel/hyperscan/blob/master/LICENSE)                                                             |
-| libbtrie           | [BSD2-条款许可](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE)                                      |
 | libcxxabi          | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT)                 |
 | libdivide          | [Zlib许可证](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt)                                    |
 | libgsasl           | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE)                    |
--- a/programs/CMakeLists.txt
+++ b/programs/CMakeLists.txt
@ -43,13 +43,81 @@ else ()
        ${ENABLE_CLICKHOUSE_ALL})
 endif ()

+message(STATUS "ClickHouse modes:")
+
+if (NOT ENABLE_CLICKHOUSE_SERVER)
+    message(WARNING "ClickHouse server mode is not going to be built.")
+else()
+    message(STATUS "Server mode: ON")
+endif()
+
+if (NOT ENABLE_CLICKHOUSE_CLIENT)
+    message(WARNING "ClickHouse client mode is not going to be built. You won't be able to connect to the server and run
+                    tests")
+else()
+    message(STATUS "Client mode: ON")
+endif()
+
+if (ENABLE_CLICKHOUSE_LOCAL)
+    message(STATUS "Local mode: ON")
+else()
+    message(STATUS "Local mode: OFF")
+endif()
+
+if (ENABLE_CLICKHOUSE_BENCHMARK)
+    message(STATUS "Benchmark mode: ON")
+else()
+    message(STATUS "Benchmark mode: OFF")
+endif()
+
+if (ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG)
+    message(STATUS "Extract from config mode: ON")
+else()
+    message(STATUS "Extract from config mode: OFF")
+endif()
+
+if (ENABLE_CLICKHOUSE_COMPRESSOR)
+    message(STATUS "Compressor mode: ON")
+else()
+    message(STATUS "Compressor mode: OFF")
+endif()
+
+if (ENABLE_CLICKHOUSE_COPIER)
+    message(STATUS "Copier mode: ON")
+else()
+    message(STATUS "Copier mode: OFF")
+endif()
+
+if (ENABLE_CLICKHOUSE_FORMAT)
+    message(STATUS "Format mode: ON")
+else()
+    message(STATUS "Format mode: OFF")
+endif()
+
+if (ENABLE_CLICKHOUSE_OBFUSCATOR)
+    message(STATUS "Obfuscator mode: ON")
+else()
+    message(STATUS "Obfuscator mode: OFF")
+endif()
+
+if (ENABLE_CLICKHOUSE_ODBC_BRIDGE)
+    message(STATUS "ODBC bridge mode: ON")
+else()
+    message(STATUS "ODBC bridge mode: OFF")
+endif()
+
+if (ENABLE_CLICKHOUSE_INSTALL)
+    message(STATUS "ClickHouse install: ON")
+else()
+    message(STATUS "ClickHouse install: OFF")
+endif()
+
 if(NOT (MAKE_STATIC_LIBRARIES OR SPLIT_SHARED_LIBRARIES))
    set(CLICKHOUSE_ONE_SHARED ON)
 endif()

 configure_file (config_tools.h.in ${ConfigIncludePath}/config_tools.h)

-
 macro(clickhouse_target_link_split_lib target name)
    if(NOT CLICKHOUSE_ONE_SHARED)
        target_link_libraries(${target} PRIVATE clickhouse-${name}-lib)
--- a/programs/install/Install.cpp
+++ b/programs/install/Install.cpp
@ -21,6 +21,7 @@
 #include <IO/WriteBufferFromFileDescriptor.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/WriteBufferFromFile.h>
+#include <IO/MMapReadBufferFromFile.h>
 #include <IO/ReadBufferFromMemory.h>
 #include <IO/copyData.h>
 #include <IO/Operators.h>
@ -70,7 +71,7 @@ namespace po = boost::program_options;
 namespace fs = std::filesystem;


-auto executeScript(const std::string & command, bool throw_on_error = false)
+static auto executeScript(const std::string & command, bool throw_on_error = false)
 {
    auto sh = ShellCommand::execute(command);
    WriteBufferFromFileDescriptor wb_stdout(STDOUT_FILENO);
@ -87,7 +88,7 @@ auto executeScript(const std::string & command, bool throw_on_error = false)
        return sh->tryWait();
 }

-bool ask(std::string question)
+static bool ask(std::string question)
 {
    while (true)
    {
@ -104,6 +105,16 @@ bool ask(std::string question)
    }
 }

+static bool filesEqual(std::string path1, std::string path2)
+{
+    MMapReadBufferFromFile in1(path1, 0);
+    MMapReadBufferFromFile in2(path2, 0);
+
+    /// memcmp is faster than hashing and comparing hashes
+    return in1.buffer().size() == in2.buffer().size()
+        && 0 == memcmp(in1.buffer().begin(), in2.buffer().begin(), in1.buffer().size());
+}
+

 int mainEntryClickHouseInstall(int argc, char ** argv)
 {
@ -143,57 +154,89 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
            throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot obtain path to the binary from {}, file doesn't exist",
                            binary_self_path.string());

+        fs::path binary_self_canonical_path = fs::canonical(binary_self_path);
+
        /// Copy binary to the destination directory.

        /// TODO An option to link instead of copy - useful for developers.
-        /// TODO Check if the binary is the same.
-
-        size_t binary_size = fs::file_size(binary_self_path);

        fs::path prefix = fs::path(options["prefix"].as<std::string>());
        fs::path bin_dir = prefix / fs::path(options["binary-path"].as<std::string>());

-        size_t available_space = fs::space(bin_dir).available;
-        if (available_space < binary_size)
-            throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space for clickhouse binary in {}, required {}, available {}.",
-                bin_dir.string(), ReadableSize(binary_size), ReadableSize(available_space));
-
        fs::path main_bin_path = bin_dir / "clickhouse";
        fs::path main_bin_tmp_path = bin_dir / "clickhouse.new";
        fs::path main_bin_old_path = bin_dir / "clickhouse.old";

-        fmt::print("Copying ClickHouse binary to {}\n", main_bin_tmp_path.string());
+        size_t binary_size = fs::file_size(binary_self_path);

-        try
+        bool old_binary_exists = fs::exists(main_bin_path);
+        bool already_installed = false;
+
+        /// Check if the binary is the same file (already installed).
+        if (old_binary_exists && binary_self_canonical_path == fs::canonical(main_bin_path))
        {
-            ReadBufferFromFile in(binary_self_path.string());
-            WriteBufferFromFile out(main_bin_tmp_path.string());
-            copyData(in, out);
-            out.sync();
-
-            if (0 != fchmod(out.getFD(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
-                throwFromErrno(fmt::format("Cannot chmod {}", main_bin_tmp_path.string()), ErrorCodes::SYSTEM_ERROR);
-
-            out.finalize();
+            already_installed = true;
+            fmt::print("ClickHouse binary is already located at {}\n", main_bin_path.string());
        }
-        catch (const Exception & e)
+        /// Check if binary has the same content.
+        else if (old_binary_exists && binary_size == fs::file_size(main_bin_path))
        {
-            if (e.code() == ErrorCodes::CANNOT_OPEN_FILE && geteuid() != 0)
-                std::cerr << "Install must be run as root: sudo ./clickhouse install\n";
-            throw;
+            fmt::print("Found already existing ClickHouse binary at {} having the same size. Will check its contents.\n",
+                main_bin_path.string());
+
+            if (filesEqual(binary_self_path.string(), main_bin_path.string()))
+            {
+                already_installed = true;
+                fmt::print("ClickHouse binary is already located at {} and it has the same content as {}\n",
+                    main_bin_path.string(), binary_self_canonical_path.string());
+            }
        }

-        if (fs::exists(main_bin_path))
+        if (already_installed)
        {
-            fmt::print("{} already exists, will rename existing binary to {} and put the new binary in place\n",
-                       main_bin_path.string(), main_bin_old_path.string());
-
-            /// There is file exchange operation in Linux but it's not portable.
-            fs::rename(main_bin_path, main_bin_old_path);
+            if (0 != chmod(main_bin_path.string().c_str(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
+                throwFromErrno(fmt::format("Cannot chmod {}", main_bin_path.string()), ErrorCodes::SYSTEM_ERROR);
        }
+        else
+        {
+            size_t available_space = fs::space(bin_dir).available;
+            if (available_space < binary_size)
+                throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space for clickhouse binary in {}, required {}, available {}.",
+                    bin_dir.string(), ReadableSize(binary_size), ReadableSize(available_space));

-        fmt::print("Renaming {} to {}.\n", main_bin_tmp_path.string(), main_bin_path.string());
-        fs::rename(main_bin_tmp_path, main_bin_path);
+            fmt::print("Copying ClickHouse binary to {}\n", main_bin_tmp_path.string());
+
+            try
+            {
+                ReadBufferFromFile in(binary_self_path.string());
+                WriteBufferFromFile out(main_bin_tmp_path.string());
+                copyData(in, out);
+                out.sync();
+
+                if (0 != fchmod(out.getFD(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
+                    throwFromErrno(fmt::format("Cannot chmod {}", main_bin_tmp_path.string()), ErrorCodes::SYSTEM_ERROR);
+
+                out.finalize();
+            }
+            catch (const Exception & e)
+            {
+                if (e.code() == ErrorCodes::CANNOT_OPEN_FILE && geteuid() != 0)
+                    std::cerr << "Install must be run as root: sudo ./clickhouse install\n";
+                throw;
+            }
+
+            if (old_binary_exists)
+            {
+                fmt::print("{} already exists, will rename existing binary to {} and put the new binary in place\n",
+                        main_bin_path.string(), main_bin_old_path.string());
+
+                /// There is file exchange operation in Linux but it's not portable.
+                fs::rename(main_bin_path, main_bin_old_path);
+            }
+
+            fmt::print("Renaming {} to {}.\n", main_bin_tmp_path.string(), main_bin_path.string());
+            fs::rename(main_bin_tmp_path, main_bin_path);
+        }

        /// Create symlinks.

@ -401,8 +444,8 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
            ConfigurationPtr configuration(new Poco::Util::XMLConfiguration(processor.processConfig()));

            if (!configuration->getString("users.default.password", "").empty()
-                || configuration->getString("users.default.password_sha256_hex", "").empty()
-                || configuration->getString("users.default.password_double_sha1_hex", "").empty())
+                || !configuration->getString("users.default.password_sha256_hex", "").empty()
+                || !configuration->getString("users.default.password_double_sha1_hex", "").empty())
            {
                has_password_for_default_user = true;
            }
@ -576,7 +619,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
            " || echo \"Cannot set 'net_admin' or 'ipc_lock' or 'sys_nice' capability for clickhouse binary."
                " This is optional. Taskstats accounting will be disabled."
                " To enable taskstats accounting you may add the required capability later manually.\"",
-            "/tmp/test_setcap.sh", main_bin_path.string());
+            "/tmp/test_setcap.sh", fs::canonical(main_bin_path).string());
        fmt::print(" {}\n", command);
        executeScript(command);
 #endif
@ -597,10 +640,6 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
            }
        }

-        std::string maybe_sudo;
-        if (getuid() != 0)
-            maybe_sudo = "sudo ";
-
        std::string maybe_password;
        if (has_password_for_default_user)
            maybe_password = " --password";
@ -608,10 +647,19 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
        fmt::print(
            "\nClickHouse has been successfully installed.\n"
            "\nStart clickhouse-server with:\n"
-            " {}clickhouse start\n"
+            " sudo clickhouse start\n"
            "\nStart clickhouse-client with:\n"
            " clickhouse-client{}\n\n",
-            maybe_sudo, maybe_password);
+            maybe_password);
+    }
+    catch (const fs::filesystem_error &)
+    {
+        std::cerr << getCurrentExceptionMessage(false) << '\n';
+
+        if (getuid() != 0)
+            std::cerr << "\nRun with sudo.\n";
+
+        return getCurrentExceptionCode();
    }
    catch (...)
    {
@ -783,17 +831,20 @@ namespace
        return pid;
    }

-    int stop(const fs::path & pid_file)
+    int stop(const fs::path & pid_file, bool force)
    {
        UInt64 pid = isRunning(pid_file);

        if (!pid)
            return 0;

-        if (0 == kill(pid, 15)) /// Terminate
-            fmt::print("Sent termination signal.\n", pid);
+        int signal = force ? SIGKILL : SIGTERM;
+        const char * signal_name = force ? "kill" : "terminate";
+
+        if (0 == kill(pid, signal))
+            fmt::print("Sent {} signal to process with pid {}.\n", signal_name, pid);
        else
-            throwFromErrno("Cannot send termination signal", ErrorCodes::SYSTEM_ERROR);
+            throwFromErrno(fmt::format("Cannot send {} signal", signal_name), ErrorCodes::SYSTEM_ERROR);

        size_t try_num = 0;
        constexpr size_t num_tries = 60;
@ -869,6 +920,7 @@ int mainEntryClickHouseStop(int argc, char ** argv)
    desc.add_options()
        ("help,h", "produce help message")
        ("pid-path", po::value<std::string>()->default_value("/var/run/clickhouse-server"), "directory for pid file")
+        ("force", po::value<bool>()->default_value(false), "Stop with KILL signal instead of TERM")
    ;

    po::variables_map options;
@ -887,7 +939,7 @@ int mainEntryClickHouseStop(int argc, char ** argv)
    {
        fs::path pid_file = fs::path(options["pid-path"].as<std::string>()) / "clickhouse-server.pid";

-        return stop(pid_file);
+        return stop(pid_file, options["force"].as<bool>());
    }
    catch (...)
    {
@ -940,6 +992,7 @@ int mainEntryClickHouseRestart(int argc, char ** argv)
        ("config-path", po::value<std::string>()->default_value("/etc/clickhouse-server"), "directory with configs")
        ("pid-path", po::value<std::string>()->default_value("/var/run/clickhouse-server"), "directory for pid file")
        ("user", po::value<std::string>()->default_value("clickhouse"), "clickhouse user")
+        ("force", po::value<bool>()->default_value(false), "Stop with KILL signal instead of TERM")
    ;

    po::variables_map options;
@ -962,7 +1015,7 @@ int mainEntryClickHouseRestart(int argc, char ** argv)
        fs::path config = fs::path(options["config-path"].as<std::string>()) / "config.xml";
        fs::path pid_file = fs::path(options["pid-path"].as<std::string>()) / "clickhouse-server.pid";

-        if (int res = stop(pid_file))
+        if (int res = stop(pid_file, options["force"].as<bool>()))
            return res;
        return start(user, executable, config, pid_file);
    }
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@ -709,7 +709,7 @@


    <!-- Configuration of external dictionaries. See:
-         https://clickhouse.yandex/docs/en/dicts/external_dicts/
+         https://clickhouse.tech/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts
    -->
    <dictionaries_config>*_dictionary.xml</dictionaries_config>

--- a/src/AggregateFunctions/AggregateFunctionAvg.cpp
+++ b/src/AggregateFunctions/AggregateFunctionAvg.cpp
@ -1,3 +1,4 @@
+#include <memory>
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <AggregateFunctions/AggregateFunctionAvg.h>
 #include <AggregateFunctions/Helpers.h>
@ -13,43 +14,37 @@ namespace ErrorCodes

 namespace
 {
-
-template <typename T>
-struct Avg
+bool allowType(const DataTypePtr& type) noexcept
 {
-    using FieldType = std::conditional_t<IsDecimalNumber<T>,
-                                        std::conditional_t<std::is_same_v<T, Decimal256>, Decimal256, Decimal128>,
-                                        NearestFieldType<T>>;
-    // using FieldType = std::conditional_t<IsDecimalNumber<T>, Decimal128, NearestFieldType<T>>;
-    using Function = AggregateFunctionAvg<T, AggregateFunctionAvgData<FieldType, UInt64>>;
-};
-
-template <typename T>
-using AggregateFuncAvg = typename Avg<T>::Function;
+    const WhichDataType t(type);
+    return t.isInt() || t.isUInt() || t.isFloat() || t.isDecimal();
+}

 AggregateFunctionPtr createAggregateFunctionAvg(const std::string & name, const DataTypes & argument_types, const Array & parameters)
 {
    assertNoParameters(name, parameters);
    assertUnary(name, argument_types);

-    AggregateFunctionPtr res;
-    DataTypePtr data_type = argument_types[0];
-    if (isDecimal(data_type))
-        res.reset(createWithDecimalType<AggregateFuncAvg>(*data_type, *data_type, argument_types));
-    else
-        res.reset(createWithNumericType<AggregateFuncAvg>(*data_type, argument_types));
+    const DataTypePtr& data_type = argument_types[0];
+
+    if (!allowType(data_type))
+        throw Exception("Illegal type " + data_type->getName() + " of argument for aggregate function " + name,
+            ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+    AggregateFunctionPtr res;
+
+    if (isDecimal(data_type))
+        res.reset(createWithDecimalType<AggregateFunctionAvg>(
+            *data_type, argument_types, getDecimalScale(*data_type)));
+    else
+        res.reset(createWithNumericType<AggregateFunctionAvg>(*data_type, argument_types));

-    if (!res)
-        throw Exception("Illegal type " + argument_types[0]->getName() + " of argument for aggregate function " + name,
-                        ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
    return res;
 }
-
 }

 void registerAggregateFunctionAvg(AggregateFunctionFactory & factory)
 {
    factory.registerFunction("avg", createAggregateFunctionAvg, AggregateFunctionFactory::CaseInsensitive);
 }
-
 }
--- a/src/AggregateFunctions/AggregateFunctionAvg.h
+++ b/src/AggregateFunctions/AggregateFunctionAvg.h
@ -1,78 +1,102 @@
 #pragma once

+#include <type_traits>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
-
 #include <Columns/ColumnsNumber.h>
 #include <DataTypes/DataTypesDecimal.h>
 #include <DataTypes/DataTypesNumber.h>
-
 #include <AggregateFunctions/IAggregateFunction.h>
+#include "Core/DecimalFunctions.h"


 namespace DB
 {
-namespace ErrorCodes
-{
-}
+template <class T>
+using DecimalOrVectorCol = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;

-template <typename T, typename Denominator>
-struct AggregateFunctionAvgData
-{
-    using NumeratorType = T;
-    using DenominatorType = Denominator;
+template <class T> constexpr bool DecimalOrExtendedInt =
+    IsDecimalNumber<T>
+    || std::is_same_v<T, Int128>
+    || std::is_same_v<T, Int256>
+    || std::is_same_v<T, UInt128>
+    || std::is_same_v<T, UInt256>;

-    T numerator{0};
+/**
+ * Helper class to encapsulate values conversion for avg and avgWeighted.
+ */
+template <class Numerator, class Denominator>
+struct AvgFraction
+{
+    Numerator numerator{0};
    Denominator denominator{0};

-    template <typename ResultT>
-    ResultT NO_SANITIZE_UNDEFINED result() const
+    /// Allow division by zero as sometimes we need to return NaN.
+    /// Invoked only is either Numerator or Denominator are Decimal.
+    Float64 NO_SANITIZE_UNDEFINED divideIfAnyDecimal(UInt32 num_scale, UInt32 denom_scale) const
    {
-        if constexpr (std::is_floating_point_v<ResultT>)
-            if constexpr (std::numeric_limits<ResultT>::is_iec559)
-            {
-                if constexpr (is_big_int_v<Denominator>)
-                    return static_cast<ResultT>(numerator) / static_cast<ResultT>(denominator);
-                else
-                    return static_cast<ResultT>(numerator) / denominator; /// allow division by zero
-            }
+        if constexpr (IsDecimalNumber<Numerator> && IsDecimalNumber<Denominator>)
+        {
+            // According to the docs, num(S1) / denom(S2) would have scale S1

-        if (denominator == static_cast<Denominator>(0))
-            return static_cast<ResultT>(0);
+            if constexpr (std::is_same_v<Numerator, Decimal256> && std::is_same_v<Denominator, Decimal128>)
+                ///Special case as Decimal256 / Decimal128 = compile error (as Decimal128 is not parametrized by a wide
+                ///int), but an __int128 instead
+                return DecimalUtils::convertTo<Float64>(
+                    numerator / (denominator.template convertTo<Decimal256>()), num_scale);
+            else
+                return DecimalUtils::convertTo<Float64>(numerator / denominator, num_scale);
+        }

-        if constexpr (std::is_same_v<T, Decimal256>)
-            return static_cast<ResultT>(numerator / static_cast<T>(denominator));
+        /// Numerator is always casted to Float64 to divide correctly if the denominator is not Float64.
+        Float64 num_converted;
+
+        if constexpr (IsDecimalNumber<Numerator>)
+            num_converted = DecimalUtils::convertTo<Float64>(numerator, num_scale);
        else
-            return static_cast<ResultT>(numerator / denominator);
+            num_converted = static_cast<Float64>(numerator); /// all other types, including extended integral.
+
+        std::conditional_t<DecimalOrExtendedInt<Denominator>,
+            Float64, Denominator> denom_converted;
+
+        if constexpr (IsDecimalNumber<Denominator>)
+            denom_converted = DecimalUtils::convertTo<Float64>(denominator, denom_scale);
+        else if constexpr (DecimalOrExtendedInt<Denominator>)
+            /// no way to divide Float64 and extended integral type without an explicit cast.
+            denom_converted = static_cast<Float64>(denominator);
+        else
+            denom_converted = denominator; /// can divide on float, no cast required.
+
+        return num_converted / denom_converted;
+    }
+
+    Float64 NO_SANITIZE_UNDEFINED divide() const
+    {
+        if constexpr (DecimalOrExtendedInt<Denominator>) /// if extended int
+            return static_cast<Float64>(numerator) / static_cast<Float64>(denominator);
+        else
+            return static_cast<Float64>(numerator) / denominator;
    }
 };

-/// Calculates arithmetic mean of numbers.
-template <typename T, typename Data, typename Derived>
-class AggregateFunctionAvgBase : public IAggregateFunctionDataHelper<Data, Derived>
+
+/**
+ * @tparam Derived When deriving from this class, use the child class name as in CRTP, e.g.
+ *         class Self : Agg<char, bool, bool, Self>.
+ */
+template <class Numerator, class Denominator, class Derived>
+class AggregateFunctionAvgBase : public
+        IAggregateFunctionDataHelper<AvgFraction<Numerator, Denominator>, Derived>
 {
 public:
-    using ResultType = std::conditional_t<IsDecimalNumber<T>, T, Float64>;
-    using ResultDataType = std::conditional_t<IsDecimalNumber<T>, DataTypeDecimal<T>, DataTypeNumber<Float64>>;
-    using ColVecType = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
-    using ColVecResult = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<Float64>>;
+    using Fraction = AvgFraction<Numerator, Denominator>;
+    using Base = IAggregateFunctionDataHelper<Fraction, Derived>;

-    /// ctor for native types
-    AggregateFunctionAvgBase(const DataTypes & argument_types_) : IAggregateFunctionDataHelper<Data, Derived>(argument_types_, {}), scale(0) {}
+    explicit AggregateFunctionAvgBase(const DataTypes & argument_types_,
+        UInt32 num_scale_ = 0, UInt32 denom_scale_ = 0)
+        : Base(argument_types_, {}), num_scale(num_scale_), denom_scale(denom_scale_) {}

-    /// ctor for Decimals
-    AggregateFunctionAvgBase(const IDataType & data_type, const DataTypes & argument_types_)
-        : IAggregateFunctionDataHelper<Data, Derived>(argument_types_, {}), scale(getDecimalScale(data_type))
-    {
-    }
-
-    DataTypePtr getReturnType() const override
-    {
-        if constexpr (IsDecimalNumber<T>)
-            return std::make_shared<ResultDataType>(ResultDataType::maxPrecision(), scale);
-        else
-            return std::make_shared<ResultDataType>();
-    }
+    DataTypePtr getReturnType() const final { return std::make_shared<DataTypeNumber<Float64>>(); }

    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
    {
@ -84,7 +108,7 @@ public:
    {
        writeBinary(this->data(place).numerator, buf);

-        if constexpr (std::is_unsigned_v<typename Data::DenominatorType>)
+        if constexpr (std::is_unsigned_v<Denominator>)
            writeVarUInt(this->data(place).denominator, buf);
        else /// Floating point denominator type can be used
            writeBinary(this->data(place).denominator, buf);
@ -94,7 +118,7 @@ public:
    {
        readBinary(this->data(place).numerator, buf);

-        if constexpr (std::is_unsigned_v<typename Data::DenominatorType>)
+        if constexpr (std::is_unsigned_v<Denominator>)
            readVarUInt(this->data(place).denominator, buf);
        else /// Floating point denominator type can be used
            readBinary(this->data(place).denominator, buf);
@ -102,29 +126,34 @@ public:

    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
    {
-        auto & column = static_cast<ColVecResult &>(to);
-        column.getData().push_back(this->data(place).template result<ResultType>());
+        if constexpr (IsDecimalNumber<Numerator> || IsDecimalNumber<Denominator>)
+            static_cast<ColumnVector<Float64> &>(to).getData().push_back(
+                this->data(place).divideIfAnyDecimal(num_scale, denom_scale));
+        else
+            static_cast<ColumnVector<Float64> &>(to).getData().push_back(this->data(place).divide());
    }
-
-protected:
-    UInt32 scale;
+private:
+    UInt32 num_scale;
+    UInt32 denom_scale;
 };

-template <typename T, typename Data>
-class AggregateFunctionAvg final : public AggregateFunctionAvgBase<T, Data, AggregateFunctionAvg<T, Data>>
+template <class T>
+using AvgFieldType = std::conditional_t<IsDecimalNumber<T>,
+    std::conditional_t<std::is_same_v<T, Decimal256>, Decimal256, Decimal128>,
+    NearestFieldType<T>>;
+
+template <class T>
+class AggregateFunctionAvg final : public AggregateFunctionAvgBase<AvgFieldType<T>, UInt64, AggregateFunctionAvg<T>>
 {
 public:
-    using AggregateFunctionAvgBase<T, Data, AggregateFunctionAvg<T, Data>>::AggregateFunctionAvgBase;
+    using AggregateFunctionAvgBase<AvgFieldType<T>, UInt64, AggregateFunctionAvg<T>>::AggregateFunctionAvgBase;

-    using ColVecType = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const final
    {
-        const auto & column = static_cast<const ColVecType &>(*columns[0]);
-        this->data(place).numerator += column.getData()[row_num];
-        this->data(place).denominator += 1;
+        this->data(place).numerator += static_cast<const DecimalOrVectorCol<T> &>(*columns[0]).getData()[row_num];
+        ++this->data(place).denominator;
    }

-    String getName() const override { return "avg"; }
+    String getName() const final { return "avg"; }
 };
-
 }
--- a/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp
+++ b/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp
@ -1,3 +1,5 @@
+#include <memory>
+#include <type_traits>
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <AggregateFunctions/AggregateFunctionAvgWeighted.h>
 #include <AggregateFunctions/Helpers.h>
@ -13,47 +15,91 @@ namespace ErrorCodes

 namespace
 {
-
-template <typename T>
-struct AvgWeighted
+bool allowTypes(const DataTypePtr& left, const DataTypePtr& right) noexcept
 {
-    using FieldType = std::conditional_t<IsDecimalNumber<T>,
-                                         std::conditional_t<std::is_same_v<T, Decimal256>, Decimal256, Decimal128>,
-                                         NearestFieldType<T>>;
-    // using FieldType = std::conditional_t<IsDecimalNumber<T>, Decimal128, NearestFieldType<T>>;
-    using Function = AggregateFunctionAvgWeighted<T, AggregateFunctionAvgData<FieldType, FieldType>>;
-};
+    const WhichDataType l_dt(left), r_dt(right);

-template <typename T>
-using AggregateFuncAvgWeighted = typename AvgWeighted<T>::Function;
+    constexpr auto allow = [](WhichDataType t)
+    {
+        return t.isInt() || t.isUInt() || t.isFloat() || t.isDecimal();
+    };
+
+    return allow(l_dt) && allow(r_dt);
+}
+
+#define AT_SWITCH(LINE) \
+    switch (which.idx) \
+    { \
+        LINE(Int8); LINE(Int16); LINE(Int32); LINE(Int64); LINE(Int128); LINE(Int256); \
+        LINE(UInt8); LINE(UInt16); LINE(UInt32); LINE(UInt64); LINE(UInt128); LINE(UInt256); \
+        LINE(Decimal32); LINE(Decimal64); LINE(Decimal128); LINE(Decimal256); \
+        LINE(Float32); LINE(Float64); \
+        default: return nullptr; \
+    }
+
+template <class First, class ... TArgs>
+static IAggregateFunction * create(const IDataType & second_type, TArgs && ... args)
+{
+    const WhichDataType which(second_type);
+
+#define LINE(Type) \
+    case TypeIndex::Type:       return new AggregateFunctionAvgWeighted<First, Type>(std::forward<TArgs>(args)...)
+    AT_SWITCH(LINE)
+#undef LINE
+}
+
+// Not using helper functions because there are no templates for binary decimal/numeric function.
+template <class... TArgs>
+static IAggregateFunction * create(const IDataType & first_type, const IDataType & second_type, TArgs && ... args)
+{
+    const WhichDataType which(first_type);
+
+#define LINE(Type) \
+    case TypeIndex::Type:       return create<Type, TArgs...>(second_type, std::forward<TArgs>(args)...)
+    AT_SWITCH(LINE)
+#undef LINE
+}

 AggregateFunctionPtr createAggregateFunctionAvgWeighted(const std::string & name, const DataTypes & argument_types, const Array & parameters)
 {
    assertNoParameters(name, parameters);
    assertBinary(name, argument_types);

-    AggregateFunctionPtr res;
    const auto data_type = static_cast<const DataTypePtr>(argument_types[0]);
    const auto data_type_weight = static_cast<const DataTypePtr>(argument_types[1]);
-    if (!data_type->equals(*data_type_weight))
-        throw Exception("Different types " + data_type->getName() + " and " + data_type_weight->getName() + " of arguments for aggregate function " + name,
-                        ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-    if (isDecimal(data_type))
-        res.reset(createWithDecimalType<AggregateFuncAvgWeighted>(*data_type, *data_type, argument_types));
+
+    if (!allowTypes(data_type, data_type_weight))
+        throw Exception(
+            "Types " + data_type->getName() +
+            " and " + data_type_weight->getName() +
+            " are non-conforming as arguments for aggregate function " + name,
+            ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+    AggregateFunctionPtr ptr;
+
+    const bool left_decimal = isDecimal(data_type);
+    const bool right_decimal = isDecimal(data_type_weight);
+
+    if (left_decimal && right_decimal)
+        ptr.reset(create(*data_type, *data_type_weight,
+            argument_types,
+            getDecimalScale(*data_type), getDecimalScale(*data_type_weight)));
+    else if (left_decimal)
+        ptr.reset(create(*data_type, *data_type_weight, argument_types,
+            getDecimalScale(*data_type)));
+    else if (right_decimal)
+        ptr.reset(create(*data_type, *data_type_weight, argument_types,
+            // numerator is not decimal, so its scale is 0
+            0, getDecimalScale(*data_type_weight)));
    else
-        res.reset(createWithNumericType<AggregateFuncAvgWeighted>(*data_type, argument_types));
+        ptr.reset(create(*data_type, *data_type_weight, argument_types));

-    if (!res)
-        throw Exception("Illegal type " + data_type->getName() + " of argument for aggregate function " + name,
-                        ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-    return res;
+    return ptr;
 }
-
 }

 void registerAggregateFunctionAvgWeighted(AggregateFunctionFactory & factory)
 {
    factory.registerFunction("avgWeighted", createAggregateFunctionAvgWeighted, AggregateFunctionFactory::CaseSensitive);
 }
-
 }
--- a/src/AggregateFunctions/AggregateFunctionAvgWeighted.h
+++ b/src/AggregateFunctions/AggregateFunctionAvgWeighted.h
@ -1,26 +1,44 @@
 #pragma once

+#include <type_traits>
 #include <AggregateFunctions/AggregateFunctionAvg.h>

 namespace DB
 {
-template <typename T, typename Data>
-class AggregateFunctionAvgWeighted final : public AggregateFunctionAvgBase<T, Data, AggregateFunctionAvgWeighted<T, Data>>
+template <class T>
+using AvgWeightedFieldType = std::conditional_t<IsDecimalNumber<T>,
+    std::conditional_t<std::is_same_v<T, Decimal256>, Decimal256, Decimal128>,
+    std::conditional_t<DecimalOrExtendedInt<T>,
+        Float64, // no way to do UInt128 * UInt128, better cast to Float64
+        NearestFieldType<T>>>;
+
+template <class T, class U>
+using MaxFieldType = std::conditional_t<(sizeof(AvgWeightedFieldType<T>) > sizeof(AvgWeightedFieldType<U>)),
+    AvgWeightedFieldType<T>, AvgWeightedFieldType<U>>;
+
+template <class Value, class Weight>
+class AggregateFunctionAvgWeighted final :
+    public AggregateFunctionAvgBase<
+        MaxFieldType<Value, Weight>, AvgWeightedFieldType<Weight>, AggregateFunctionAvgWeighted<Value, Weight>>
 {
 public:
-    using AggregateFunctionAvgBase<T, Data, AggregateFunctionAvgWeighted<T, Data>>::AggregateFunctionAvgBase;
+    using Base = AggregateFunctionAvgBase<
+        MaxFieldType<Value, Weight>, AvgWeightedFieldType<Weight>, AggregateFunctionAvgWeighted<Value, Weight>>;
+    using Base::Base;
+
+    using ValueT = MaxFieldType<Value, Weight>;

-    using ColVecType = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
    {
-        const auto & values = static_cast<const ColVecType &>(*columns[0]);
-        const auto & weights = static_cast<const ColVecType &>(*columns[1]);
+        const auto& weights = static_cast<const DecimalOrVectorCol<Weight> &>(*columns[1]);

-        this->data(place).numerator += static_cast<typename Data::NumeratorType>(values.getData()[row_num]) * weights.getData()[row_num];
-        this->data(place).denominator += weights.getData()[row_num];
+        this->data(place).numerator += static_cast<ValueT>(
+            static_cast<const DecimalOrVectorCol<Value> &>(*columns[0]).getData()[row_num]) *
+            static_cast<ValueT>(weights.getData()[row_num]);
+
+        this->data(place).denominator += static_cast<AvgWeightedFieldType<Weight>>(weights.getData()[row_num]);
    }

    String getName() const override { return "avgWeighted"; }
 };
-
 }
--- a/src/AggregateFunctions/AggregateFunctionFactory.h
+++ b/src/AggregateFunctions/AggregateFunctionFactory.h
@ -21,7 +21,8 @@ class IDataType;
 using DataTypePtr = std::shared_ptr<const IDataType>;
 using DataTypes = std::vector<DataTypePtr>;

-/** Creator have arguments: name of aggregate function, types of arguments, values of parameters.
+/**
+ * The invoker has arguments: name of aggregate function, types of arguments, values of parameters.
 * Parameters are for "parametric" aggregate functions.
 * For example, in quantileWeighted(0.9)(x, weight), 0.9 is "parameter" and x, weight are "arguments".
 */
@ -87,7 +88,6 @@ private:

    std::optional<AggregateFunctionProperties> tryGetPropertiesImpl(const String & name) const;

-private:
    using AggregateFunctions = std::unordered_map<String, Value>;

    AggregateFunctions aggregate_functions;
--- a/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp
+++ b/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp
@ -0,0 +1,37 @@
+#include <AggregateFunctions/AggregateFunctionFactory.h>
+#include <AggregateFunctions/AggregateFunctionMannWhitney.h>
+#include <AggregateFunctions/FactoryHelpers.h>
+#include "registerAggregateFunctions.h"
+#include <AggregateFunctions/Helpers.h>
+
+
+namespace ErrorCodes
+{
+extern const int NOT_IMPLEMENTED;
+}
+
+namespace DB
+{
+
+namespace
+{
+
+AggregateFunctionPtr createAggregateFunctionMannWhitneyUTest(const std::string & name, const DataTypes & argument_types, const Array & parameters)
+{
+    assertBinary(name, argument_types);
+
+    if (!isNumber(argument_types[0]) || !isNumber(argument_types[1]))
+        throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::NOT_IMPLEMENTED);
+
+    return std::make_shared<AggregateFunctionMannWhitney>(argument_types, parameters);
+}
+
+}
+
+
+void registerAggregateFunctionMannWhitney(AggregateFunctionFactory & factory)
+{
+    factory.registerFunction("mannWhitneyUTest", createAggregateFunctionMannWhitneyUTest);
+}
+
+}
--- a/src/AggregateFunctions/AggregateFunctionMannWhitney.h
+++ b/src/AggregateFunctions/AggregateFunctionMannWhitney.h
@ -0,0 +1,246 @@
+#pragma once
+
+#include <AggregateFunctions/IAggregateFunction.h>
+#include <AggregateFunctions/StatCommon.h>
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnVector.h>
+#include <Columns/ColumnTuple.h>
+#include <Common/assert_cast.h>
+#include <Common/FieldVisitors.h>
+#include <Common/PODArray_fwd.h>
+#include <common/types.h>
+#include <DataTypes/DataTypesDecimal.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <limits>
+
+#include <DataTypes/DataTypeArray.h>
+
+#include <Common/ArenaAllocator.h>
+
+#include <iostream>
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int BAD_ARGUMENTS;
+}
+
+
+struct MannWhitneyData : public StatisticalSample<Float64, Float64>
+{
+    /*Since null hypothesis is "for randomly selected values X and Y from two populations,
+     *the probability of X being greater than Y is equal to the probability of Y being greater than X".
+     *Or "the distribution F of first sample equals to the distribution G of second sample".
+     *Then alternative for this hypothesis (H1) is "two-sided"(F != G), "less"(F < G), "greater" (F > G). */
+    enum class Alternative
+    {
+        TwoSided,
+        Less,
+        Greater
+    };
+
+    /// The behaviour equals to the similar function from scipy.
+    /// https://github.com/scipy/scipy/blob/ab9e9f17e0b7b2d618c4d4d8402cd4c0c200d6c0/scipy/stats/stats.py#L6978
+    std::pair<Float64, Float64> getResult(Alternative alternative, bool continuity_correction)
+    {
+        ConcatenatedSamples both(this->x, this->y);
+        RanksArray ranks;
+        Float64 tie_correction;
+
+        /// Compute ranks according to both samples.
+        std::tie(ranks, tie_correction) = computeRanksAndTieCorrection(both);
+
+        const Float64 n1 = this->size_x;
+        const Float64 n2 = this->size_y;
+
+        Float64 r1 = 0;
+        for (size_t i = 0; i < n1; ++i)
+            r1 += ranks[i];
+
+        const Float64 u1 = n1 * n2 + (n1 * (n1 + 1.)) / 2. - r1;
+        const Float64 u2 = n1 * n2 - u1;
+
+        /// The distribution of U-statistic under null hypothesis H0  is symmetric with respect to meanrank.
+        const Float64 meanrank = n1 * n2 /2. + 0.5 * continuity_correction;
+        const Float64 sd = std::sqrt(tie_correction * n1 * n2 * (n1 + n2 + 1) / 12.0);
+
+        Float64 u = 0;
+        if (alternative == Alternative::TwoSided)
+            /// There is no difference which u_i to take as u, because z will be differ only in sign and we take std::abs() from it.
+            u = std::max(u1, u2);
+        else if (alternative == Alternative::Less)
+            u = u1;
+        else if (alternative == Alternative::Greater)
+            u = u2;
+
+        Float64 z = (u - meanrank) / sd;
+        if (alternative == Alternative::TwoSided)
+            z = std::abs(z);
+
+        /// In fact cdf is a probability function, so it is intergral of density from (-inf, z].
+        /// But since standard normal distribution is symmetric, cdf(0) = 0.5 and we have to compute integral from [0, z].
+        const Float64 cdf = integrateSimpson(0, z, [] (Float64 t) { return std::pow(M_E, -0.5 * t * t) / std::sqrt(2 * M_PI);});
+
+        Float64 p_value = 0;
+        if (alternative == Alternative::TwoSided)
+            p_value = 1 - 2 * cdf;
+        else
+            p_value = 0.5 - cdf;
+
+        return {u2, p_value};
+    }
+
+private:
+    using Sample = typename StatisticalSample<Float64, Float64>::SampleX;
+
+    /// We need to compute ranks according to all samples. Use this class to avoid extra copy and memory allocation.
+    class ConcatenatedSamples
+    {
+        public:
+            ConcatenatedSamples(const Sample & first_, const Sample & second_)
+                : first(first_), second(second_) {}
+
+            const Float64 & operator[](size_t ind) const
+            {
+                if (ind < first.size())
+                    return first[ind];
+                return second[ind % first.size()];
+            }
+
+            size_t size() const
+            {
+                return first.size() + second.size();
+            }
+
+        private:
+            const Sample & first;
+            const Sample & second;
+    };
+};
+
+class AggregateFunctionMannWhitney final:
+    public IAggregateFunctionDataHelper<MannWhitneyData, AggregateFunctionMannWhitney>
+{
+private:
+    using Alternative = typename MannWhitneyData::Alternative;
+    Alternative alternative;
+    bool continuity_correction{true};
+
+public:
+    explicit AggregateFunctionMannWhitney(const DataTypes & arguments, const Array & params)
+        :IAggregateFunctionDataHelper<MannWhitneyData, AggregateFunctionMannWhitney> ({arguments}, {})
+    {
+        if (params.size() > 2)
+            throw Exception("Aggregate function " + getName() + " require two parameter or less", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+
+        if (params.empty())
+        {
+            alternative = Alternative::TwoSided;
+            return;
+        }
+
+        if (params[0].getType() != Field::Types::String)
+            throw Exception("Aggregate function " + getName() + " require require first parameter to be a String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+        auto param = params[0].get<String>();
+        if (param == "two-sided")
+            alternative = Alternative::TwoSided;
+        else if (param == "less")
+            alternative = Alternative::Less;
+        else if (param == "greater")
+            alternative = Alternative::Greater;
+        else
+            throw Exception("Unknown parameter in aggregate function " + getName() +
+                    ". It must be one of: 'two sided', 'less', 'greater'", ErrorCodes::BAD_ARGUMENTS);
+
+        if (params.size() != 2)
+            return;
+
+        if (params[1].getType() != Field::Types::UInt64)
+                throw Exception("Aggregate function " + getName() + " require require second parameter to be a UInt64", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+        continuity_correction = static_cast<bool>(params[1].get<UInt64>());
+    }
+
+    String getName() const override
+    {
+        return "mannWhitneyUTest";
+    }
+
+    DataTypePtr getReturnType() const override
+    {
+        DataTypes types
+        {
+            std::make_shared<DataTypeNumber<Float64>>(),
+            std::make_shared<DataTypeNumber<Float64>>(),
+        };
+
+        Strings names
+        {
+            "u_statistic",
+            "p_value"
+        };
+
+        return std::make_shared<DataTypeTuple>(
+            std::move(types),
+            std::move(names)
+        );
+    }
+
+    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    {
+        Float64 value = columns[0]->getFloat64(row_num);
+        UInt8 is_second = columns[1]->getUInt(row_num);
+
+        if (is_second)
+            this->data(place).addY(value, arena);
+        else
+            this->data(place).addX(value, arena);
+    }
+
+    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    {
+        auto & a = this->data(place);
+        auto & b = this->data(rhs);
+
+        a.merge(b, arena);
+    }
+
+    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    {
+        this->data(place).write(buf);
+    }
+
+    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    {
+        this->data(place).read(buf, arena);
+    }
+
+    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    {
+        if (!this->data(place).size_x || !this->data(place).size_y)
+            throw Exception("Aggregate function " + getName() + " require both samples to be non empty", ErrorCodes::BAD_ARGUMENTS);
+
+        auto [u_statistic, p_value] = this->data(place).getResult(alternative, continuity_correction);
+
+        /// Because p-value is a probability.
+        p_value = std::min(1.0, std::max(0.0, p_value));
+
+        auto & column_tuple = assert_cast<ColumnTuple &>(to);
+        auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
+        auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
+
+        column_stat.getData().push_back(u_statistic);
+        column_value.getData().push_back(p_value);
+    }
+
+};
+
+};
--- a/src/AggregateFunctions/AggregateFunctionRankCorrelation.cpp
+++ b/src/AggregateFunctions/AggregateFunctionRankCorrelation.cpp
@ -21,23 +21,10 @@ AggregateFunctionPtr createAggregateFunctionRankCorrelation(const std::string &
    assertBinary(name, argument_types);
    assertNoParameters(name, parameters);

-    AggregateFunctionPtr res;
-
-    if (isDecimal(argument_types[0]) || isDecimal(argument_types[1]))
-    {
+    if (!isNumber(argument_types[0]) || !isNumber(argument_types[1]))
        throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::NOT_IMPLEMENTED);
-    }
-    else
-    {
-        res.reset(createWithTwoNumericTypes<AggregateFunctionRankCorrelation>(*argument_types[0], *argument_types[1], argument_types));
-    }

-    if (!res)
-    {
-        throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::NOT_IMPLEMENTED);
-    }
-
-    return res;
+    return std::make_shared<AggregateFunctionRankCorrelation>(argument_types);
 }

 }
--- a/src/AggregateFunctions/AggregateFunctionRankCorrelation.h
+++ b/src/AggregateFunctions/AggregateFunctionRankCorrelation.h
@ -1,73 +1,56 @@
 #pragma once

 #include <AggregateFunctions/IAggregateFunction.h>
+#include <AggregateFunctions/StatCommon.h>
 #include <Columns/ColumnArray.h>
 #include <Columns/ColumnVector.h>
 #include <Columns/ColumnTuple.h>
 #include <Common/assert_cast.h>
-#include <Common/FieldVisitors.h>
+#include <Common/PODArray_fwd.h>
 #include <common/types.h>
 #include <DataTypes/DataTypesDecimal.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeTuple.h>
-#include <IO/ReadHelpers.h>
-#include <IO/WriteHelpers.h>
-#include <limits>
-
 #include <DataTypes/DataTypeArray.h>

 #include <Common/ArenaAllocator.h>

-#include <type_traits>
-
-
 namespace DB
 {

-template <template <typename> class Comparator>
-struct ComparePairFirst final
+
+struct RankCorrelationData : public StatisticalSample<Float64, Float64>
 {
-    template <typename X, typename Y>
-    bool operator()(const std::pair<X, Y> & lhs, const std::pair<X, Y> & rhs) const
+    Float64 getResult()
    {
-        return Comparator<X>{}(lhs.first, rhs.first);
+        RanksArray ranks_x;
+        std::tie(ranks_x, std::ignore) = computeRanksAndTieCorrection(this->x);
+
+        RanksArray ranks_y;
+        std::tie(ranks_y, std::ignore) = computeRanksAndTieCorrection(this->y);
+
+        /// In our case sizes of both samples are equal.
+        const auto size = this->size_x;
+
+        /// Count d^2 sum
+        Float64 answer = 0;
+        for (size_t j = 0; j < size; ++j)
+            answer += (ranks_x[j] - ranks_y[j]) * (ranks_x[j] - ranks_y[j]);
+
+        answer *= 6;
+        answer /= size * (size * size - 1);
+        answer = 1 - answer;
+        return answer;
    }
 };

-
-template <template <typename> class Comparator>
-struct ComparePairSecond final
-{
-    template <typename X, typename Y>
-    bool operator()(const std::pair<X, Y> & lhs, const std::pair<X, Y> & rhs) const
-    {
-        return Comparator<Y>{}(lhs.second, rhs.second);
-    }
-};
-
-template <typename X = Float64, typename Y = Float64>
-struct AggregateFunctionRankCorrelationData final
-{
-    size_t size_x = 0;
-
-    using Allocator = MixedAlignedArenaAllocator<alignof(std::pair<X, Y>), 4096>;
-    using Array = PODArray<std::pair<X, Y>, 32, Allocator>;
-
-    Array values;
-};
-
-template <typename X, typename Y>
 class AggregateFunctionRankCorrelation :
-    public IAggregateFunctionDataHelper<AggregateFunctionRankCorrelationData<X, Y>, AggregateFunctionRankCorrelation<X, Y>>
+    public IAggregateFunctionDataHelper<RankCorrelationData, AggregateFunctionRankCorrelation>
 {
-    using Data = AggregateFunctionRankCorrelationData<X, Y>;
-    using Allocator = MixedAlignedArenaAllocator<alignof(std::pair<Float64, Float64>), 4096>;
-    using Array = PODArray<std::pair<Float64, Float64>, 32, Allocator>;
-
 public:
    explicit AggregateFunctionRankCorrelation(const DataTypes & arguments)
-        :IAggregateFunctionDataHelper<AggregateFunctionRankCorrelationData<X, Y>,AggregateFunctionRankCorrelation<X, Y>> ({arguments}, {})
+        :IAggregateFunctionDataHelper<RankCorrelationData, AggregateFunctionRankCorrelation> ({arguments}, {})
    {}

    String getName() const override
@ -80,24 +63,12 @@ public:
        return std::make_shared<DataTypeNumber<Float64>>();
    }

-    void insert(Data & a, const std::pair<X, Y> & x, Arena * arena) const
-    {
-        ++a.size_x;
-        a.values.push_back(x, arena);
-    }
-
    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
    {
-        auto & a = this->data(place);
-
-        auto new_x = assert_cast<const ColumnVector<X> &>(*columns[0]).getData()[row_num];
-        auto new_y = assert_cast<const ColumnVector<Y> &>(*columns[1]).getData()[row_num];
-
-        auto new_arg = std::make_pair(new_x, new_y);
-
-        a.size_x += 1;
-
-        a.values.push_back(new_arg, arena);
+        Float64 new_x = columns[0]->getFloat64(row_num);
+        Float64 new_y = columns[1]->getFloat64(row_num);
+        this->data(place).addX(new_x, arena);
+        this->data(place).addY(new_y, arena);
    }

    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
@ -105,116 +76,22 @@ public:
        auto & a = this->data(place);
        auto & b = this->data(rhs);

-        if (b.size_x)
-            for (size_t i = 0; i < b.size_x; ++i)
-                insert(a, b.values[i], arena);
+        a.merge(b, arena);
    }

    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
    {
-        const auto & value = this->data(place).values;
-        size_t size = this->data(place).size_x;
-        writeVarUInt(size, buf);
-        buf.write(reinterpret_cast<const char *>(value.data()), size * sizeof(value[0]));
+        this->data(place).write(buf);
    }

    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
    {
-        size_t size = 0;
-        readVarUInt(size, buf);
-
-        auto & value = this->data(place).values;
-
-        value.resize(size, arena);
-        buf.read(reinterpret_cast<char *>(value.data()), size * sizeof(value[0]));
+        this->data(place).read(buf, arena);
    }

-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * /*arena*/) const override
+    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
    {
-        const auto & value = this->data(place).values;
-        size_t size = this->data(place).size_x;
-
-        // create a copy of values not to format data
-        PODArrayWithStackMemory<std::pair<Float64, Float64>, 32> tmp_values;
-        tmp_values.resize(size);
-        for (size_t j = 0; j < size; ++ j)
-            tmp_values[j] = static_cast<std::pair<Float64, Float64>>(value[j]);
-
-        // sort x_values
-        std::sort(std::begin(tmp_values), std::end(tmp_values), ComparePairFirst<std::greater>{});
-
-        for (size_t j = 0; j < size;)
-        {
-            // replace x_values with their ranks
-            size_t rank = j + 1;
-            size_t same = 1;
-            size_t cur_sum = rank;
-            size_t cur_start = j;
-
-            while (j < size - 1)
-            {
-                if (tmp_values[j].first == tmp_values[j + 1].first)
-                {
-                    // rank of (j + 1)th number
-                    rank += 1;
-                    ++same;
-                    cur_sum += rank;
-                    ++j;
-                }
-                else
-                    break;
-            }
-
-            // insert rank is calculated as average of ranks of equal values
-            Float64 insert_rank = static_cast<Float64>(cur_sum) / same;
-            for (size_t i = cur_start; i <= j; ++i)
-                tmp_values[i].first = insert_rank;
-            ++j;
-        }
-
-        // sort y_values
-        std::sort(std::begin(tmp_values), std::end(tmp_values), ComparePairSecond<std::greater>{});
-
-        // replace y_values with their ranks
-        for (size_t j = 0; j < size;)
-        {
-            // replace x_values with their ranks
-            size_t rank = j + 1;
-            size_t same = 1;
-            size_t cur_sum = rank;
-            size_t cur_start = j;
-
-            while (j < size - 1)
-            {
-                if (tmp_values[j].second == tmp_values[j + 1].second)
-                {
-                    // rank of (j + 1)th number
-                    rank += 1;
-                    ++same;
-                    cur_sum += rank;
-                    ++j;
-                }
-                else
-                {
-                    break;
-                }
-            }
-
-            // insert rank is calculated as average of ranks of equal values
-            Float64 insert_rank = static_cast<Float64>(cur_sum) / same;
-            for (size_t i = cur_start; i <= j; ++i)
-                tmp_values[i].second = insert_rank;
-            ++j;
-        }
-
-        // count d^2 sum
-        Float64 answer = static_cast<Float64>(0);
-        for (size_t j = 0; j < size; ++ j)
-            answer += (tmp_values[j].first - tmp_values[j].second) * (tmp_values[j].first - tmp_values[j].second);
-
-        answer *= 6;
-        answer /= size * (size * size - 1);
-        answer = 1 - answer;
+        auto answer = this->data(place).getResult();

        auto & column = static_cast<ColumnVector<Float64> &>(to);
        column.getData().push_back(answer);
--- a/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h
+++ b/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h
@ -8,6 +8,7 @@
 #include <IO/ReadHelpers.h>

 #include <AggregateFunctions/IAggregateFunction.h>
+#include <AggregateFunctions/Moments.h>

 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypesDecimal.h>
@ -30,310 +31,6 @@
 namespace DB
 {

-namespace ErrorCodes
-{
-    extern const int DECIMAL_OVERFLOW;
-}
-
-
-/**
-    Calculating univariate central moments
-    Levels:
-        level 2 (pop & samp): var, stddev
-        level 3: skewness
-        level 4: kurtosis
-    References:
-        https://en.wikipedia.org/wiki/Moment_(mathematics)
-        https://en.wikipedia.org/wiki/Skewness
-        https://en.wikipedia.org/wiki/Kurtosis
-*/
-template <typename T, size_t _level>
-struct VarMoments
-{
-    T m[_level + 1]{};
-
-    void add(T x)
-    {
-        ++m[0];
-        m[1] += x;
-        m[2] += x * x;
-        if constexpr (_level >= 3) m[3] += x * x * x;
-        if constexpr (_level >= 4) m[4] += x * x * x * x;
-    }
-
-    void merge(const VarMoments & rhs)
-    {
-        m[0] += rhs.m[0];
-        m[1] += rhs.m[1];
-        m[2] += rhs.m[2];
-        if constexpr (_level >= 3) m[3] += rhs.m[3];
-        if constexpr (_level >= 4) m[4] += rhs.m[4];
-    }
-
-    void write(WriteBuffer & buf) const
-    {
-        writePODBinary(*this, buf);
-    }
-
-    void read(ReadBuffer & buf)
-    {
-        readPODBinary(*this, buf);
-    }
-
-    T getPopulation() const
-    {
-        if (m[0] == 0)
-            return std::numeric_limits<T>::quiet_NaN();
-
-        /// Due to numerical errors, the result can be slightly less than zero,
-        /// but it should be impossible. Trim to zero.
-
-        return std::max(T{}, (m[2] - m[1] * m[1] / m[0]) / m[0]);
-    }
-
-    T getSample() const
-    {
-        if (m[0] <= 1)
-            return std::numeric_limits<T>::quiet_NaN();
-        return std::max(T{}, (m[2] - m[1] * m[1] / m[0]) / (m[0] - 1));
-    }
-
-    T getMoment3() const
-    {
-        if (m[0] == 0)
-            return std::numeric_limits<T>::quiet_NaN();
-        // to avoid accuracy problem
-        if (m[0] == 1)
-            return 0;
-        return (m[3]
-            - (3 * m[2]
-                - 2 * m[1] * m[1] / m[0]
-            ) * m[1] / m[0]
-        ) / m[0];
-    }
-
-    T getMoment4() const
-    {
-        if (m[0] == 0)
-            return std::numeric_limits<T>::quiet_NaN();
-        // to avoid accuracy problem
-        if (m[0] == 1)
-            return 0;
-        return (m[4]
-            - (4 * m[3]
-                - (6 * m[2]
-                    - 3 * m[1] * m[1] / m[0]
-                ) * m[1] / m[0]
-            ) * m[1] / m[0]
-        ) / m[0];
-    }
-};
-
-template <typename T, size_t _level>
-class VarMomentsDecimal
-{
-public:
-    using NativeType = typename T::NativeType;
-
-    void add(NativeType x)
-    {
-        ++m0;
-        getM(1) += x;
-
-        NativeType tmp;
-        bool overflow = common::mulOverflow(x, x, tmp) || common::addOverflow(getM(2), tmp, getM(2));
-        if constexpr (_level >= 3)
-            overflow = overflow || common::mulOverflow(tmp, x, tmp) || common::addOverflow(getM(3), tmp, getM(3));
-        if constexpr (_level >= 4)
-            overflow = overflow || common::mulOverflow(tmp, x, tmp) || common::addOverflow(getM(4), tmp, getM(4));
-
-        if (overflow)
-            throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
-    }
-
-    void merge(const VarMomentsDecimal & rhs)
-    {
-        m0 += rhs.m0;
-        getM(1) += rhs.getM(1);
-
-        bool overflow = common::addOverflow(getM(2), rhs.getM(2), getM(2));
-        if constexpr (_level >= 3)
-            overflow = overflow || common::addOverflow(getM(3), rhs.getM(3), getM(3));
-        if constexpr (_level >= 4)
-            overflow = overflow || common::addOverflow(getM(4), rhs.getM(4), getM(4));
-
-        if (overflow)
-            throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
-    }
-
-    void write(WriteBuffer & buf) const { writePODBinary(*this, buf); }
-    void read(ReadBuffer & buf) { readPODBinary(*this, buf); }
-
-    Float64 getPopulation(UInt32 scale) const
-    {
-        if (m0 == 0)
-            return std::numeric_limits<Float64>::infinity();
-
-        NativeType tmp;
-        if (common::mulOverflow(getM(1), getM(1), tmp) ||
-            common::subOverflow(getM(2), NativeType(tmp / m0), tmp))
-            throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
-        return std::max(Float64{}, DecimalUtils::convertTo<Float64>(T(tmp / m0), scale));
-    }
-
-    Float64 getSample(UInt32 scale) const
-    {
-        if (m0 == 0)
-            return std::numeric_limits<Float64>::quiet_NaN();
-        if (m0 == 1)
-            return std::numeric_limits<Float64>::infinity();
-
-        NativeType tmp;
-        if (common::mulOverflow(getM(1), getM(1), tmp) ||
-            common::subOverflow(getM(2), NativeType(tmp / m0), tmp))
-            throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
-        return std::max(Float64{}, DecimalUtils::convertTo<Float64>(T(tmp / (m0 - 1)), scale));
-    }
-
-    Float64 getMoment3(UInt32 scale) const
-    {
-        if (m0 == 0)
-            return std::numeric_limits<Float64>::infinity();
-
-        NativeType tmp;
-        if (common::mulOverflow(2 * getM(1), getM(1), tmp) ||
-            common::subOverflow(3 * getM(2), NativeType(tmp / m0), tmp) ||
-            common::mulOverflow(tmp, getM(1), tmp) ||
-            common::subOverflow(getM(3), NativeType(tmp / m0), tmp))
-            throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
-        return DecimalUtils::convertTo<Float64>(T(tmp / m0), scale);
-    }
-
-    Float64 getMoment4(UInt32 scale) const
-    {
-        if (m0 == 0)
-            return std::numeric_limits<Float64>::infinity();
-
-        NativeType tmp;
-        if (common::mulOverflow(3 * getM(1), getM(1), tmp) ||
-            common::subOverflow(6 * getM(2), NativeType(tmp / m0), tmp) ||
-            common::mulOverflow(tmp, getM(1), tmp) ||
-            common::subOverflow(4 * getM(3), NativeType(tmp / m0), tmp) ||
-            common::mulOverflow(tmp, getM(1), tmp) ||
-            common::subOverflow(getM(4), NativeType(tmp / m0), tmp))
-            throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
-        return DecimalUtils::convertTo<Float64>(T(tmp / m0), scale);
-    }
-
-private:
-    UInt64 m0{};
-    NativeType m[_level]{};
-
-    NativeType & getM(size_t i) { return m[i - 1]; }
-    const NativeType & getM(size_t i) const { return m[i - 1]; }
-};
-
-/**
-    Calculating multivariate central moments
-    Levels:
-        level 2 (pop & samp): covar
-    References:
-        https://en.wikipedia.org/wiki/Moment_(mathematics)
-*/
-template <typename T>
-struct CovarMoments
-{
-    T m0{};
-    T x1{};
-    T y1{};
-    T xy{};
-
-    void add(T x, T y)
-    {
-        ++m0;
-        x1 += x;
-        y1 += y;
-        xy += x * y;
-    }
-
-    void merge(const CovarMoments & rhs)
-    {
-        m0 += rhs.m0;
-        x1 += rhs.x1;
-        y1 += rhs.y1;
-        xy += rhs.xy;
-    }
-
-    void write(WriteBuffer & buf) const
-    {
-        writePODBinary(*this, buf);
-    }
-
-    void read(ReadBuffer & buf)
-    {
-        readPODBinary(*this, buf);
-    }
-
-    T NO_SANITIZE_UNDEFINED getPopulation() const
-    {
-        return (xy - x1 * y1 / m0) / m0;
-    }
-
-    T NO_SANITIZE_UNDEFINED getSample() const
-    {
-        if (m0 == 0)
-            return std::numeric_limits<T>::quiet_NaN();
-        return (xy - x1 * y1 / m0) / (m0 - 1);
-    }
-};
-
-template <typename T>
-struct CorrMoments
-{
-    T m0{};
-    T x1{};
-    T y1{};
-    T xy{};
-    T x2{};
-    T y2{};
-
-    void add(T x, T y)
-    {
-        ++m0;
-        x1 += x;
-        y1 += y;
-        xy += x * y;
-        x2 += x * x;
-        y2 += y * y;
-    }
-
-    void merge(const CorrMoments & rhs)
-    {
-        m0 += rhs.m0;
-        x1 += rhs.x1;
-        y1 += rhs.y1;
-        xy += rhs.xy;
-        x2 += rhs.x2;
-        y2 += rhs.y2;
-    }
-
-    void write(WriteBuffer & buf) const
-    {
-        writePODBinary(*this, buf);
-    }
-
-    void read(ReadBuffer & buf)
-    {
-        readPODBinary(*this, buf);
-    }
-
-    T NO_SANITIZE_UNDEFINED get() const
-    {
-        return (m0 * xy - x1 * y1) / sqrt((m0 * x2 - x1 * x1) * (m0 * y2 - y1 * y1));
-    }
-};
-
-
 enum class StatisticsFunctionKind
 {
    varPop, varSamp,
--- a/src/AggregateFunctions/AggregateFunctionStudentTTest.cpp
+++ b/src/AggregateFunctions/AggregateFunctionStudentTTest.cpp
@ -0,0 +1,77 @@
+#include <AggregateFunctions/AggregateFunctionFactory.h>
+#include <AggregateFunctions/AggregateFunctionTTest.h>
+#include <AggregateFunctions/FactoryHelpers.h>
+#include <AggregateFunctions/Moments.h>
+
+#include "registerAggregateFunctions.h"
+
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+}
+
+
+namespace DB
+{
+
+namespace
+{
+
+/** Student T-test applies to two samples of independent random variables
+  * that have normal distributions with equal (but unknown) variances.
+  * It allows to answer the question whether means of the distributions differ.
+  *
+  * If variances are not considered equal, Welch T-test should be used instead.
+  */
+struct StudentTTestData : public TTestMoments<Float64>
+{
+    static constexpr auto name = "studentTTest";
+
+    std::pair<Float64, Float64> getResult() const
+    {
+        Float64 mean_x = x1 / nx;
+        Float64 mean_y = y1 / ny;
+
+        /// To estimate the variance we first estimate two means.
+        /// That's why the number of degrees of freedom is the total number of values of both samples minus 2.
+        Float64 degrees_of_freedom = nx + ny - 2;
+
+        /// Calculate s^2
+        /// The original formulae looks like
+        /// \frac{\sum_{i = 1}^{n_x}{(x_i - \bar{x}) ^ 2} + \sum_{i = 1}^{n_y}{(y_i - \bar{y}) ^ 2}}{n_x + n_y - 2}
+        /// But we made some mathematical transformations not to store original sequences.
+        /// Also we dropped sqrt, because later it will be squared later.
+
+        Float64 all_x = x2 + nx * mean_x * mean_x - 2 * mean_x * x1;
+        Float64 all_y = y2 + ny * mean_y * mean_y - 2 * mean_y * y1;
+
+        Float64 s2 = (all_x + all_y) / degrees_of_freedom;
+        Float64 std_err2 = s2 * (1. / nx + 1. / ny);
+
+        /// t-statistic
+        Float64 t_stat = (mean_x - mean_y) / sqrt(std_err2);
+
+        return {t_stat, getPValue(degrees_of_freedom, t_stat * t_stat)};
+    }
+};
+
+AggregateFunctionPtr createAggregateFunctionStudentTTest(const std::string & name, const DataTypes & argument_types, const Array & parameters)
+{
+    assertBinary(name, argument_types);
+    assertNoParameters(name, parameters);
+
+    if (!isNumber(argument_types[0]) || !isNumber(argument_types[1]))
+        throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::BAD_ARGUMENTS);
+
+    return std::make_shared<AggregateFunctionTTest<StudentTTestData>>(argument_types);
+}
+
+}
+
+void registerAggregateFunctionStudentTTest(AggregateFunctionFactory & factory)
+{
+    factory.registerFunction("studentTTest", createAggregateFunctionStudentTTest);
+}
+
+}
--- a/src/AggregateFunctions/AggregateFunctionTTest.h
+++ b/src/AggregateFunctions/AggregateFunctionTTest.h
@ -0,0 +1,154 @@
+#pragma once
+
+#include <AggregateFunctions/IAggregateFunction.h>
+#include <AggregateFunctions/StatCommon.h>
+#include <Columns/ColumnVector.h>
+#include <Columns/ColumnTuple.h>
+#include <Common/assert_cast.h>
+#include <Core/Types.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <cmath>
+
+
+/// This function is used in implementations of different T-Tests.
+/// On Darwin it's unavailable in math.h but actually exists in the library (can be linked successfully).
+#if defined(OS_DARWIN)
+extern "C"
+{
+    double lgamma_r(double x, int * signgamp);
+}
+#endif
+
+
+namespace DB
+{
+
+class ReadBuffer;
+class WriteBuffer;
+
+/**
+ * If you have a cumulative distribution function F, then calculating the p-value for given statistic T is simply 1−F(T)
+ * In our case p-value is two-sided, so we multiply it by 2.
+ * So cumulative distribution function F equals to
+ * \[ F(t) = \int_{-\infty}^{t} f(u)du = 1 - \frac{1}{2} I_{x(t)}(\frac{v}{2}, \frac{1}{2}) \]
+ * where \[ x(t) = \frac{v}{t^2 + v} \]: https://en.wikipedia.org/wiki/Student%27s_t-distribution#Cumulative_distribution_function
+ *
+ * so our resulting \[ p-value = I_{x(t)}(\frac{v}{2}, \frac{1}{2}) \].
+ *
+ * And I is regularized incomplete beta function: https://en.wikipedia.org/wiki/Beta_function#Incomplete_beta_function
+ *
+ * Keepenig in mind that \[ \mathrm {B} (x;a,b)=\int _{0}^{x}r^{a-1}\,(1-r)^{b-1}\,\mathrm {d} r.\! \]
+ * and
+ * \[ \mathrm {B} (x,y)={\dfrac {\Gamma (x)\,\Gamma (y)}{\Gamma (x+y)}}=\
+ * \exp(\ln {\dfrac {\Gamma (x)\,\Gamma (y)}{\Gamma (x+y)}})=\exp((\ln(\Gamma (x))+\ln(\Gamma (y))-\ln(\Gamma (x+y))) \]
+ *
+ * p-value can be calculated in terms of gamma functions and integrals more simply:
+ * \[ {\frac {\int _{0}^{\frac {\nu }{t^{2}+\nu }}r^{{\frac {\nu }{2}}-1}\,(1-r)^{-0.5}\,\mathrm {d} r}\
+ * {\exp((\ln(\Gamma ({\frac {\nu }{2}}))+\ln(\Gamma (0.5))-\ln(\Gamma ({\frac {\nu }{2}}+0.5)))}} \]
+ *
+ * which simplifies to:
+ *
+ * \[ {\frac {\int _{0}^{\frac {\nu }{t^{2}+\nu }}{\frac {r^{{\frac {\nu }{2}}-1}}{\sqrt {1-r}}}\,\mathrm {d} r}\
+ * {\exp((\ln(\Gamma ({\frac {\nu }{2}}))+\ln(\Gamma (0.5))-\ln(\Gamma ({\frac {\nu }{2}}+0.5)))}} \]
+ *
+ * Read here for details https://rosettacode.org/wiki/Welch%27s_t-test#
+ *
+ * Both WelchTTest and StudentTTest have t-statistric with Student distribution but with different degrees of freedom.
+ * So the procedure of computing p-value is the same.
+*/
+static inline Float64 getPValue(Float64 degrees_of_freedom, Float64 t_stat2)
+{
+    Float64 numerator = integrateSimpson(0, degrees_of_freedom / (t_stat2 + degrees_of_freedom),
+        [degrees_of_freedom](double x) { return std::pow(x, degrees_of_freedom / 2 - 1) / std::sqrt(1 - x); });
+
+    int unused;
+    Float64 denominator = std::exp(
+        lgamma_r(degrees_of_freedom / 2, &unused)
+        + lgamma_r(0.5, &unused)
+        - lgamma_r(degrees_of_freedom / 2 + 0.5, &unused));
+
+    return std::min(1.0, std::max(0.0, numerator / denominator));
+}
+
+
+/// Returns tuple of (t-statistic, p-value)
+/// https://cpb-us-w2.wpmucdn.com/voices.uchicago.edu/dist/9/1193/files/2016/01/05b-TandP.pdf
+template <typename Data>
+class AggregateFunctionTTest :
+    public IAggregateFunctionDataHelper<Data, AggregateFunctionTTest<Data>>
+{
+public:
+    AggregateFunctionTTest(const DataTypes & arguments)
+        : IAggregateFunctionDataHelper<Data, AggregateFunctionTTest<Data>>({arguments}, {})
+    {
+    }
+
+    String getName() const override
+    {
+        return Data::name;
+    }
+
+    DataTypePtr getReturnType() const override
+    {
+        DataTypes types
+        {
+            std::make_shared<DataTypeNumber<Float64>>(),
+            std::make_shared<DataTypeNumber<Float64>>(),
+        };
+
+        Strings names
+        {
+            "t_statistic",
+            "p_value"
+        };
+
+        return std::make_shared<DataTypeTuple>(
+            std::move(types),
+            std::move(names)
+        );
+    }
+
+    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    {
+        Float64 value = columns[0]->getFloat64(row_num);
+        UInt8 is_second = columns[1]->getUInt(row_num);
+
+        if (is_second)
+            this->data(place).addY(value);
+        else
+            this->data(place).addX(value);
+    }
+
+    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        this->data(place).merge(this->data(rhs));
+    }
+
+    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    {
+        this->data(place).write(buf);
+    }
+
+    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    {
+        this->data(place).read(buf);
+    }
+
+    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    {
+        auto [t_statistic, p_value] = this->data(place).getResult();
+
+        /// Because p-value is a probability.
+        p_value = std::min(1.0, std::max(0.0, p_value));
+
+        auto & column_tuple = assert_cast<ColumnTuple &>(to);
+        auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
+        auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
+
+        column_stat.getData().push_back(t_statistic);
+        column_value.getData().push_back(p_value);
+    }
+};
+
+};
--- a/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.cpp
+++ b/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.cpp
@ -1,35 +0,0 @@
-#include "AggregateFunctionTimeSeriesGroupSum.h"
-#include "AggregateFunctionFactory.h"
-#include "FactoryHelpers.h"
-#include "Helpers.h"
-#include "registerAggregateFunctions.h"
-
-
-namespace DB
-{
-namespace ErrorCodes
-{
-    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-}
-namespace
-{
-    template <bool rate>
-    AggregateFunctionPtr createAggregateFunctionTimeSeriesGroupSum(const std::string & name, const DataTypes & arguments, const Array & params)
-    {
-        assertNoParameters(name, params);
-
-        if (arguments.size() < 3)
-            throw Exception("Not enough event arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
-
-        return std::make_shared<AggregateFunctionTimeSeriesGroupSum<rate>>(arguments);
-    }
-
-}
-
-void registerAggregateFunctionTimeSeriesGroupSum(AggregateFunctionFactory & factory)
-{
-    factory.registerFunction("timeSeriesGroupSum", createAggregateFunctionTimeSeriesGroupSum<false>);
-    factory.registerFunction("timeSeriesGroupRateSum", createAggregateFunctionTimeSeriesGroupSum<true>);
-}
-
-}
--- a/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.h
+++ b/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.h
@ -1,291 +0,0 @@
-#pragma once
-
-#include <bitset>
-#include <map>
-#include <queue>
-#include <unordered_set>
-#include <utility>
-#include <Columns/ColumnArray.h>
-#include <Columns/ColumnTuple.h>
-#include <Columns/ColumnsNumber.h>
-#include <DataTypes/DataTypeArray.h>
-#include <DataTypes/DataTypeTuple.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <IO/ReadHelpers.h>
-#include <IO/WriteHelpers.h>
-#include <Common/ArenaAllocator.h>
-#include <Common/assert_cast.h>
-#include <ext/range.h>
-#include "IAggregateFunction.h"
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-}
-
-template <bool rate>
-struct AggregateFunctionTimeSeriesGroupSumData
-{
-    using DataPoint = std::pair<Int64, Float64>;
-    struct Points
-    {
-        using Dps = std::queue<DataPoint>;
-        Dps dps;
-        void add(Int64 t, Float64 v)
-        {
-            dps.push(std::make_pair(t, v));
-            if (dps.size() > 2)
-                dps.pop();
-        }
-        Float64 getval(Int64 t)
-        {
-            Int64 t1, t2;
-            Float64 v1, v2;
-            if (rate)
-            {
-                if (dps.size() < 2)
-                    return 0;
-                t1 = dps.back().first;
-                t2 = dps.front().first;
-                v1 = dps.back().second;
-                v2 = dps.front().second;
-                return (v1 - v2) / Float64(t1 - t2);
-            }
-            else
-            {
-                if (dps.size() == 1 && t == dps.front().first)
-                    return dps.front().second;
-                t1 = dps.back().first;
-                t2 = dps.front().first;
-                v1 = dps.back().second;
-                v2 = dps.front().second;
-                return v2 + ((v1 - v2) * Float64(t - t2)) / Float64(t1 - t2);
-            }
-        }
-    };
-
-    typedef std::map<UInt64, Points> Series;
-    typedef PODArrayWithStackMemory<DataPoint, 128> AggSeries;
-    Series ss;
-    AggSeries result;
-
-    void add(UInt64 uid, Int64 t, Float64 v)
-    { //suppose t is coming asc
-        typename Series::iterator it_ss;
-        if (ss.count(uid) == 0)
-        { //time series not exist, insert new one
-            Points tmp;
-            tmp.add(t, v);
-            ss.emplace(uid, tmp);
-            it_ss = ss.find(uid);
-        }
-        else
-        {
-            it_ss = ss.find(uid);
-            it_ss->second.add(t, v);
-        }
-        if (result.size() > 0 && t < result.back().first)
-            throw Exception{"timeSeriesGroupSum or timeSeriesGroupRateSum must order by timestamp asc.", ErrorCodes::LOGICAL_ERROR};
-        if (result.size() > 0 && t == result.back().first)
-        {
-            //do not add new point
-            if (rate)
-                result.back().second += it_ss->second.getval(t);
-            else
-                result.back().second += v;
-        }
-        else
-        {
-            if (rate)
-                result.emplace_back(std::make_pair(t, it_ss->second.getval(t)));
-            else
-                result.emplace_back(std::make_pair(t, v));
-        }
-        ssize_t i = result.size() - 1;
-        //reverse find out the index of timestamp that more than previous timestamp of t
-        while (result[i].first > it_ss->second.dps.front().first && i >= 0)
-            i--;
-
-        i++;
-        while (i < ssize_t(result.size()) - 1)
-        {
-            result[i].second += it_ss->second.getval(result[i].first);
-            i++;
-        }
-    }
-
-    void merge(const AggregateFunctionTimeSeriesGroupSumData & other)
-    {
-        //if ts has overlap, then aggregate two series by interpolation;
-        AggSeries tmp;
-        tmp.reserve(other.result.size() + result.size());
-        size_t i = 0, j = 0;
-        Int64 t1, t2;
-        Float64 v1, v2;
-        while (i < result.size() && j < other.result.size())
-        {
-            if (result[i].first < other.result[j].first)
-            {
-                if (j == 0)
-                {
-                    tmp.emplace_back(result[i]);
-                }
-                else
-                {
-                    t1 = other.result[j].first;
-                    t2 = other.result[j - 1].first;
-                    v1 = other.result[j].second;
-                    v2 = other.result[j - 1].second;
-                    Float64 value = result[i].second + v2 + (v1 - v2) * (Float64(result[i].first - t2)) / Float64(t1 - t2);
-                    tmp.emplace_back(std::make_pair(result[i].first, value));
-                }
-                i++;
-            }
-            else if (result[i].first > other.result[j].first)
-            {
-                if (i == 0)
-                {
-                    tmp.emplace_back(other.result[j]);
-                }
-                else
-                {
-                    t1 = result[i].first;
-                    t2 = result[i - 1].first;
-                    v1 = result[i].second;
-                    v2 = result[i - 1].second;
-                    Float64 value = other.result[j].second + v2 + (v1 - v2) * (Float64(other.result[j].first - t2)) / Float64(t1 - t2);
-                    tmp.emplace_back(std::make_pair(other.result[j].first, value));
-                }
-                j++;
-            }
-            else
-            {
-                tmp.emplace_back(std::make_pair(result[i].first, result[i].second + other.result[j].second));
-                i++;
-                j++;
-            }
-        }
-        while (i < result.size())
-        {
-            tmp.emplace_back(result[i]);
-            i++;
-        }
-        while (j < other.result.size())
-        {
-            tmp.push_back(other.result[j]);
-            j++;
-        }
-        swap(result, tmp);
-    }
-
-    void serialize(WriteBuffer & buf) const
-    {
-        size_t size = result.size();
-        writeVarUInt(size, buf);
-        if (size > 0)
-        {
-            buf.write(reinterpret_cast<const char *>(result.data()), size * sizeof(result[0]));
-        }
-    }
-
-    void deserialize(ReadBuffer & buf)
-    {
-        size_t size = 0;
-        readVarUInt(size, buf);
-        result.resize(size);
-        if (size > 0)
-        {
-            buf.read(reinterpret_cast<char *>(result.data()), size * sizeof(result[0]));
-        }
-    }
-};
-template <bool rate>
-class AggregateFunctionTimeSeriesGroupSum final
-    : public IAggregateFunctionDataHelper<AggregateFunctionTimeSeriesGroupSumData<rate>, AggregateFunctionTimeSeriesGroupSum<rate>>
-{
-private:
-public:
-    String getName() const override { return rate ? "timeSeriesGroupRateSum" : "timeSeriesGroupSum"; }
-
-    AggregateFunctionTimeSeriesGroupSum(const DataTypes & arguments)
-        : IAggregateFunctionDataHelper<AggregateFunctionTimeSeriesGroupSumData<rate>, AggregateFunctionTimeSeriesGroupSum<rate>>(arguments, {})
-    {
-        if (!WhichDataType(arguments[0].get()).isUInt64())
-            throw Exception{"Illegal type " + arguments[0].get()->getName() + " of argument 1 of aggregate function " + getName()
-                                + ", must be UInt64",
-                            ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        if (!WhichDataType(arguments[1].get()).isInt64())
-            throw Exception{"Illegal type " + arguments[1].get()->getName() + " of argument 2 of aggregate function " + getName()
-                                + ", must be Int64",
-                            ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        if (!WhichDataType(arguments[2].get()).isFloat64())
-            throw Exception{"Illegal type " + arguments[2].get()->getName() + " of argument 3 of aggregate function " + getName()
-                                + ", must be Float64",
-                            ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-    }
-
-    DataTypePtr getReturnType() const override
-    {
-        auto datatypes = std::vector<DataTypePtr>();
-        datatypes.push_back(std::make_shared<DataTypeInt64>());
-        datatypes.push_back(std::make_shared<DataTypeFloat64>());
-
-        return std::make_shared<DataTypeArray>(std::make_shared<DataTypeTuple>(datatypes));
-    }
-
-    void add(AggregateDataPtr place, const IColumn ** columns, const size_t row_num, Arena *) const override
-    {
-        auto uid = assert_cast<const ColumnVector<UInt64> *>(columns[0])->getData()[row_num];
-        auto ts = assert_cast<const ColumnVector<Int64> *>(columns[1])->getData()[row_num];
-        auto val = assert_cast<const ColumnVector<Float64> *>(columns[2])->getData()[row_num];
-        if (uid && ts && val)
-        {
-            this->data(place).add(uid, ts, val);
-        }
-    }
-
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override { this->data(place).merge(this->data(rhs)); }
-
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { this->data(place).serialize(buf); }
-
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).deserialize(buf); }
-
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
-    {
-        const auto & value = this->data(place).result;
-        size_t size = value.size();
-
-        ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
-        ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
-        size_t old_size = offsets_to.back();
-
-        offsets_to.push_back(offsets_to.back() + size);
-
-        if (size)
-        {
-            typename ColumnInt64::Container & ts_to
-                = assert_cast<ColumnInt64 &>(assert_cast<ColumnTuple &>(arr_to.getData()).getColumn(0)).getData();
-            typename ColumnFloat64::Container & val_to
-                = assert_cast<ColumnFloat64 &>(assert_cast<ColumnTuple &>(arr_to.getData()).getColumn(1)).getData();
-            ts_to.reserve(old_size + size);
-            val_to.reserve(old_size + size);
-            size_t i = 0;
-            while (i < this->data(place).result.size())
-            {
-                ts_to.push_back(this->data(place).result[i].first);
-                val_to.push_back(this->data(place).result[i].second);
-                i++;
-            }
-        }
-    }
-
-    bool allocatesMemoryInArena() const override { return true; }
-};
-}
--- a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp
+++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp
@ -0,0 +1,74 @@
+#include <AggregateFunctions/AggregateFunctionFactory.h>
+#include <AggregateFunctions/AggregateFunctionTTest.h>
+#include <AggregateFunctions/FactoryHelpers.h>
+#include <AggregateFunctions/Moments.h>
+
+#include "registerAggregateFunctions.h"
+
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+}
+
+
+namespace DB
+{
+
+namespace
+{
+
+struct WelchTTestData : public TTestMoments<Float64>
+{
+    static constexpr auto name = "welchTTest";
+
+    std::pair<Float64, Float64> getResult() const
+    {
+        Float64 mean_x = x1 / nx;
+        Float64 mean_y = y1 / ny;
+
+        /// s_x^2, s_y^2
+
+        /// The original formulae looks like  \frac{1}{size_x - 1} \sum_{i = 1}^{size_x}{(x_i - \bar{x}) ^ 2}
+        /// But we made some mathematical transformations not to store original sequences.
+        /// Also we dropped sqrt, because later it will be squared later.
+
+        Float64 sx2 = (x2 + nx * mean_x * mean_x - 2 * mean_x * x1) / (nx - 1);
+        Float64 sy2 = (y2 + ny * mean_y * mean_y - 2 * mean_y * y1) / (ny - 1);
+
+        /// t-statistic
+        Float64 t_stat = (mean_x - mean_y) / sqrt(sx2 / nx + sy2 / ny);
+
+        /// degrees of freedom
+
+        Float64 numerator_sqrt = sx2 / nx + sy2 / ny;
+        Float64 numerator = numerator_sqrt * numerator_sqrt;
+
+        Float64 denominator_x = sx2 * sx2 / (nx * nx * (nx - 1));
+        Float64 denominator_y = sy2 * sy2 / (ny * ny * (ny - 1));
+
+        Float64 degrees_of_freedom = numerator / (denominator_x + denominator_y);
+
+        return {t_stat, getPValue(degrees_of_freedom, t_stat * t_stat)};
+    }
+};
+
+AggregateFunctionPtr createAggregateFunctionWelchTTest(const std::string & name, const DataTypes & argument_types, const Array & parameters)
+{
+    assertBinary(name, argument_types);
+    assertNoParameters(name, parameters);
+
+    if (!isNumber(argument_types[0]) || !isNumber(argument_types[1]))
+        throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::BAD_ARGUMENTS);
+
+    return std::make_shared<AggregateFunctionTTest<WelchTTestData>>(argument_types);
+}
+
+}
+
+void registerAggregateFunctionWelchTTest(AggregateFunctionFactory & factory)
+{
+    factory.registerFunction("welchTTest", createAggregateFunctionWelchTTest);
+}
+
+}
--- a/src/AggregateFunctions/Helpers.h
+++ b/src/AggregateFunctions/Helpers.h
@ -15,6 +15,7 @@
    M(Float32) \
    M(Float64)

+// No UInt128 here because of the name conflict
 #define FOR_NUMERIC_TYPES(M) \
    M(UInt8) \
    M(UInt16) \
--- a/src/AggregateFunctions/Moments.h
+++ b/src/AggregateFunctions/Moments.h
@ -0,0 +1,361 @@
+#pragma once
+
+#include <IO/WriteHelpers.h>
+#include <IO/ReadHelpers.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int DECIMAL_OVERFLOW;
+}
+
+
+/**
+    Calculating univariate central moments
+    Levels:
+        level 2 (pop & samp): var, stddev
+        level 3: skewness
+        level 4: kurtosis
+    References:
+        https://en.wikipedia.org/wiki/Moment_(mathematics)
+        https://en.wikipedia.org/wiki/Skewness
+        https://en.wikipedia.org/wiki/Kurtosis
+*/
+template <typename T, size_t _level>
+struct VarMoments
+{
+    T m[_level + 1]{};
+
+    void add(T x)
+    {
+        ++m[0];
+        m[1] += x;
+        m[2] += x * x;
+        if constexpr (_level >= 3) m[3] += x * x * x;
+        if constexpr (_level >= 4) m[4] += x * x * x * x;
+    }
+
+    void merge(const VarMoments & rhs)
+    {
+        m[0] += rhs.m[0];
+        m[1] += rhs.m[1];
+        m[2] += rhs.m[2];
+        if constexpr (_level >= 3) m[3] += rhs.m[3];
+        if constexpr (_level >= 4) m[4] += rhs.m[4];
+    }
+
+    void write(WriteBuffer & buf) const
+    {
+        writePODBinary(*this, buf);
+    }
+
+    void read(ReadBuffer & buf)
+    {
+        readPODBinary(*this, buf);
+    }
+
+    T getPopulation() const
+    {
+        if (m[0] == 0)
+            return std::numeric_limits<T>::quiet_NaN();
+
+        /// Due to numerical errors, the result can be slightly less than zero,
+        /// but it should be impossible. Trim to zero.
+
+        return std::max(T{}, (m[2] - m[1] * m[1] / m[0]) / m[0]);
+    }
+
+    T getSample() const
+    {
+        if (m[0] <= 1)
+            return std::numeric_limits<T>::quiet_NaN();
+        return std::max(T{}, (m[2] - m[1] * m[1] / m[0]) / (m[0] - 1));
+    }
+
+    T getMoment3() const
+    {
+        if (m[0] == 0)
+            return std::numeric_limits<T>::quiet_NaN();
+        // to avoid accuracy problem
+        if (m[0] == 1)
+            return 0;
+        /// \[ \frac{1}{m_0} (m_3 - (3 * m_2 - \frac{2 * {m_1}^2}{m_0}) * \frac{m_1}{m_0});\]
+        return (m[3]
+            - (3 * m[2]
+                - 2 * m[1] * m[1] / m[0]
+            ) * m[1] / m[0]
+        ) / m[0];
+    }
+
+    T getMoment4() const
+    {
+        if (m[0] == 0)
+            return std::numeric_limits<T>::quiet_NaN();
+        // to avoid accuracy problem
+        if (m[0] == 1)
+            return 0;
+        /// \[ \frac{1}{m_0}(m_4 - (4 * m_3 - (6 * m_2 - \frac{3 * m_1^2}{m_0} ) \frac{m_1}{m_0})\frac{m_1}{m_0})\]
+        return (m[4]
+            - (4 * m[3]
+                - (6 * m[2]
+                    - 3 * m[1] * m[1] / m[0]
+                ) * m[1] / m[0]
+            ) * m[1] / m[0]
+        ) / m[0];
+    }
+};
+
+template <typename T, size_t _level>
+class VarMomentsDecimal
+{
+public:
+    using NativeType = typename T::NativeType;
+
+    void add(NativeType x)
+    {
+        ++m0;
+        getM(1) += x;
+
+        NativeType tmp;
+        bool overflow = common::mulOverflow(x, x, tmp) || common::addOverflow(getM(2), tmp, getM(2));
+        if constexpr (_level >= 3)
+            overflow = overflow || common::mulOverflow(tmp, x, tmp) || common::addOverflow(getM(3), tmp, getM(3));
+        if constexpr (_level >= 4)
+            overflow = overflow || common::mulOverflow(tmp, x, tmp) || common::addOverflow(getM(4), tmp, getM(4));
+
+        if (overflow)
+            throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
+    }
+
+    void merge(const VarMomentsDecimal & rhs)
+    {
+        m0 += rhs.m0;
+        getM(1) += rhs.getM(1);
+
+        bool overflow = common::addOverflow(getM(2), rhs.getM(2), getM(2));
+        if constexpr (_level >= 3)
+            overflow = overflow || common::addOverflow(getM(3), rhs.getM(3), getM(3));
+        if constexpr (_level >= 4)
+            overflow = overflow || common::addOverflow(getM(4), rhs.getM(4), getM(4));
+
+        if (overflow)
+            throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
+    }
+
+    void write(WriteBuffer & buf) const { writePODBinary(*this, buf); }
+    void read(ReadBuffer & buf) { readPODBinary(*this, buf); }
+
+    Float64 getPopulation(UInt32 scale) const
+    {
+        if (m0 == 0)
+            return std::numeric_limits<Float64>::infinity();
+
+        NativeType tmp;
+        if (common::mulOverflow(getM(1), getM(1), tmp) ||
+            common::subOverflow(getM(2), NativeType(tmp / m0), tmp))
+            throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
+        return std::max(Float64{}, DecimalUtils::convertTo<Float64>(T(tmp / m0), scale));
+    }
+
+    Float64 getSample(UInt32 scale) const
+    {
+        if (m0 == 0)
+            return std::numeric_limits<Float64>::quiet_NaN();
+        if (m0 == 1)
+            return std::numeric_limits<Float64>::infinity();
+
+        NativeType tmp;
+        if (common::mulOverflow(getM(1), getM(1), tmp) ||
+            common::subOverflow(getM(2), NativeType(tmp / m0), tmp))
+            throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
+        return std::max(Float64{}, DecimalUtils::convertTo<Float64>(T(tmp / (m0 - 1)), scale));
+    }
+
+    Float64 getMoment3(UInt32 scale) const
+    {
+        if (m0 == 0)
+            return std::numeric_limits<Float64>::infinity();
+
+        NativeType tmp;
+        if (common::mulOverflow(2 * getM(1), getM(1), tmp) ||
+            common::subOverflow(3 * getM(2), NativeType(tmp / m0), tmp) ||
+            common::mulOverflow(tmp, getM(1), tmp) ||
+            common::subOverflow(getM(3), NativeType(tmp / m0), tmp))
+            throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
+        return DecimalUtils::convertTo<Float64>(T(tmp / m0), scale);
+    }
+
+    Float64 getMoment4(UInt32 scale) const
+    {
+        if (m0 == 0)
+            return std::numeric_limits<Float64>::infinity();
+
+        NativeType tmp;
+        if (common::mulOverflow(3 * getM(1), getM(1), tmp) ||
+            common::subOverflow(6 * getM(2), NativeType(tmp / m0), tmp) ||
+            common::mulOverflow(tmp, getM(1), tmp) ||
+            common::subOverflow(4 * getM(3), NativeType(tmp / m0), tmp) ||
+            common::mulOverflow(tmp, getM(1), tmp) ||
+            common::subOverflow(getM(4), NativeType(tmp / m0), tmp))
+            throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
+        return DecimalUtils::convertTo<Float64>(T(tmp / m0), scale);
+    }
+
+private:
+    UInt64 m0{};
+    NativeType m[_level]{};
+
+    NativeType & getM(size_t i) { return m[i - 1]; }
+    const NativeType & getM(size_t i) const { return m[i - 1]; }
+};
+
+/**
+    Calculating multivariate central moments
+    Levels:
+        level 2 (pop & samp): covar
+    References:
+        https://en.wikipedia.org/wiki/Moment_(mathematics)
+*/
+template <typename T>
+struct CovarMoments
+{
+    T m0{};
+    T x1{};
+    T y1{};
+    T xy{};
+
+    void add(T x, T y)
+    {
+        ++m0;
+        x1 += x;
+        y1 += y;
+        xy += x * y;
+    }
+
+    void merge(const CovarMoments & rhs)
+    {
+        m0 += rhs.m0;
+        x1 += rhs.x1;
+        y1 += rhs.y1;
+        xy += rhs.xy;
+    }
+
+    void write(WriteBuffer & buf) const
+    {
+        writePODBinary(*this, buf);
+    }
+
+    void read(ReadBuffer & buf)
+    {
+        readPODBinary(*this, buf);
+    }
+
+    T NO_SANITIZE_UNDEFINED getPopulation() const
+    {
+        return (xy - x1 * y1 / m0) / m0;
+    }
+
+    T NO_SANITIZE_UNDEFINED getSample() const
+    {
+        if (m0 == 0)
+            return std::numeric_limits<T>::quiet_NaN();
+        return (xy - x1 * y1 / m0) / (m0 - 1);
+    }
+};
+
+template <typename T>
+struct CorrMoments
+{
+    T m0{};
+    T x1{};
+    T y1{};
+    T xy{};
+    T x2{};
+    T y2{};
+
+    void add(T x, T y)
+    {
+        ++m0;
+        x1 += x;
+        y1 += y;
+        xy += x * y;
+        x2 += x * x;
+        y2 += y * y;
+    }
+
+    void merge(const CorrMoments & rhs)
+    {
+        m0 += rhs.m0;
+        x1 += rhs.x1;
+        y1 += rhs.y1;
+        xy += rhs.xy;
+        x2 += rhs.x2;
+        y2 += rhs.y2;
+    }
+
+    void write(WriteBuffer & buf) const
+    {
+        writePODBinary(*this, buf);
+    }
+
+    void read(ReadBuffer & buf)
+    {
+        readPODBinary(*this, buf);
+    }
+
+    T NO_SANITIZE_UNDEFINED get() const
+    {
+        return (m0 * xy - x1 * y1) / sqrt((m0 * x2 - x1 * x1) * (m0 * y2 - y1 * y1));
+    }
+};
+
+/// Data for calculation of Student and Welch T-Tests.
+template <typename T>
+struct TTestMoments
+{
+    T nx{};
+    T ny{};
+    T x1{};
+    T y1{};
+    T x2{};
+    T y2{};
+
+    void addX(T value)
+    {
+        ++nx;
+        x1 += value;
+        x2 += value * value;
+    }
+
+    void addY(T value)
+    {
+        ++ny;
+        y1 += value;
+        y2 += value * value;
+    }
+
+    void merge(const TTestMoments & rhs)
+    {
+        nx += rhs.nx;
+        ny += rhs.ny;
+        x1 += rhs.x1;
+        y1 += rhs.y1;
+        x2 += rhs.x2;
+        y2 += rhs.y2;
+    }
+
+    void write(WriteBuffer & buf) const
+    {
+        writePODBinary(*this, buf);
+    }
+
+    void read(ReadBuffer & buf)
+    {
+        readPODBinary(*this, buf);
+    }
+};
+
+}
--- a/src/AggregateFunctions/QuantileTDigest.h
+++ b/src/AggregateFunctions/QuantileTDigest.h
@ -114,7 +114,7 @@ class QuantileTDigest
        static constexpr size_t PART_SIZE_BITS = 8;

        using Transform = RadixSortFloatTransform<KeyBits>;
-        using Allocator = RadixSortMallocAllocator;
+        using Allocator = RadixSortAllocator;

        /// The function to get the key from an array element.
        static Key & extractKey(Element & elem) { return elem.mean; }
--- a/src/AggregateFunctions/StatCommon.h
+++ b/src/AggregateFunctions/StatCommon.h
@ -0,0 +1,114 @@
+#pragma once
+
+#include <IO/WriteHelpers.h>
+#include <IO/ReadHelpers.h>
+#include <Common/ArenaAllocator.h>
+
+#include <numeric>
+#include <algorithm>
+#include <utility>
+
+namespace DB
+{
+
+template <typename F>
+static Float64 integrateSimpson(Float64 a, Float64 b, F && func)
+{
+    const size_t iterations = std::max(1e6, 1e4 * std::abs(std::round(b) - std::round(a)));
+    const long double h = (b - a) / iterations;
+    Float64 sum_odds = 0.0;
+    for (size_t i = 1; i < iterations; i += 2)
+        sum_odds += func(a + i * h);
+    Float64 sum_evens = 0.0;
+    for (size_t i = 2; i < iterations; i += 2)
+        sum_evens += func(a + i * h);
+    return (func(a) + func(b) + 2 * sum_evens + 4 * sum_odds) * h / 3;
+}
+
+/// Because ranks are adjusted, we have to store each of them in Float type.
+using RanksArray = std::vector<Float64>;
+
+template <typename Values>
+std::pair<RanksArray, Float64> computeRanksAndTieCorrection(const Values & values)
+{
+    const size_t size = values.size();
+    /// Save initial positions, than sort indices according to the values.
+    std::vector<size_t> indexes(size);
+    std::iota(indexes.begin(), indexes.end(), 0);
+    std::sort(indexes.begin(), indexes.end(),
+                [&] (size_t lhs, size_t rhs) { return values[lhs] < values[rhs]; });
+
+    size_t left = 0;
+    Float64 tie_numenator = 0;
+    RanksArray out(size);
+    while (left < size)
+    {
+        size_t right = left;
+        while (right < size && values[indexes[left]] == values[indexes[right]])
+            ++right;
+        auto adjusted = (left + right + 1.) / 2.;
+        auto count_equal = right - left;
+        tie_numenator += std::pow(count_equal, 3) - count_equal;
+        for (size_t iter = left; iter < right; ++iter)
+            out[indexes[iter]] = adjusted;
+        left = right;
+    }
+    return {out, 1 - (tie_numenator / (std::pow(size, 3) - size))};
+}
+
+
+template <typename X, typename Y>
+struct StatisticalSample
+{
+    using AllocatorXSample = MixedAlignedArenaAllocator<alignof(X), 4096>;
+    using SampleX = PODArray<X, 32, AllocatorXSample>;
+
+    using AllocatorYSample = MixedAlignedArenaAllocator<alignof(Y), 4096>;
+    using SampleY = PODArray<Y, 32, AllocatorYSample>;
+
+    SampleX x{};
+    SampleY y{};
+    size_t size_x{0};
+    size_t size_y{0};
+
+    void addX(X value, Arena * arena)
+    {
+        ++size_x;
+        x.push_back(value, arena);
+    }
+
+    void addY(Y value, Arena * arena)
+    {
+        ++size_y;
+        y.push_back(value, arena);
+    }
+
+    void merge(const StatisticalSample & rhs, Arena * arena)
+    {
+        size_x += rhs.size_x;
+        size_y += rhs.size_y;
+        x.insert(rhs.x.begin(), rhs.x.end(), arena);
+        y.insert(rhs.y.begin(), rhs.y.end(), arena);
+    }
+
+    void write(WriteBuffer & buf) const
+    {
+        writeVarUInt(size_x, buf);
+        writeVarUInt(size_y, buf);
+        buf.write(reinterpret_cast<const char *>(x.data()), size_x * sizeof(x[0]));
+        buf.write(reinterpret_cast<const char *>(y.data()), size_y * sizeof(y[0]));
+    }
+
+    void read(ReadBuffer & buf, Arena * arena)
+    {
+        readVarUInt(size_x, buf);
+        readVarUInt(size_y, buf);
+        x.resize(size_x, arena);
+        y.resize(size_y, arena);
+        buf.read(reinterpret_cast<char *>(x.data()), size_x * sizeof(x[0]));
+        buf.read(reinterpret_cast<char *>(y.data()), size_y * sizeof(y[0]));
+    }
+};
+
+}
+
--- a/src/AggregateFunctions/registerAggregateFunctions.cpp
+++ b/src/AggregateFunctions/registerAggregateFunctions.cpp
@ -32,16 +32,16 @@ void registerAggregateFunctionsBitmap(AggregateFunctionFactory &);
 void registerAggregateFunctionsMaxIntersections(AggregateFunctionFactory &);
 void registerAggregateFunctionHistogram(AggregateFunctionFactory &);
 void registerAggregateFunctionRetention(AggregateFunctionFactory &);
-void registerAggregateFunctionTimeSeriesGroupSum(AggregateFunctionFactory &);
 void registerAggregateFunctionMLMethod(AggregateFunctionFactory &);
 void registerAggregateFunctionEntropy(AggregateFunctionFactory &);
 void registerAggregateFunctionSimpleLinearRegression(AggregateFunctionFactory &);
 void registerAggregateFunctionMoving(AggregateFunctionFactory &);
 void registerAggregateFunctionCategoricalIV(AggregateFunctionFactory &);
 void registerAggregateFunctionAggThrow(AggregateFunctionFactory &);
+void registerAggregateFunctionRankCorrelation(AggregateFunctionFactory &);
+void registerAggregateFunctionMannWhitney(AggregateFunctionFactory &);
 void registerAggregateFunctionWelchTTest(AggregateFunctionFactory &);
 void registerAggregateFunctionStudentTTest(AggregateFunctionFactory &);
-void registerAggregateFunctionRankCorrelation(AggregateFunctionFactory &);

 class AggregateFunctionCombinatorFactory;
 void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
@ -86,7 +86,6 @@ void registerAggregateFunctions()
        registerAggregateFunctionsMaxIntersections(factory);
        registerAggregateFunctionHistogram(factory);
        registerAggregateFunctionRetention(factory);
-        registerAggregateFunctionTimeSeriesGroupSum(factory);
        registerAggregateFunctionMLMethod(factory);
        registerAggregateFunctionEntropy(factory);
        registerAggregateFunctionSimpleLinearRegression(factory);
@ -94,6 +93,9 @@ void registerAggregateFunctions()
        registerAggregateFunctionCategoricalIV(factory);
        registerAggregateFunctionAggThrow(factory);
        registerAggregateFunctionRankCorrelation(factory);
+        registerAggregateFunctionMannWhitney(factory);
+        registerAggregateFunctionWelchTTest(factory);
+        registerAggregateFunctionStudentTTest(factory);
    }

    {
--- a/src/AggregateFunctions/tests/gtest_ranks.cpp
+++ b/src/AggregateFunctions/tests/gtest_ranks.cpp
@ -0,0 +1,27 @@
+#include <IO/WriteBufferFromString.h>
+#include <IO/ReadBufferFromString.h>
+#include <Common/PODArray.h>
+#include <AggregateFunctions/StatCommon.h>
+#include <iostream>
+
+#include <gtest/gtest.h>
+
+
+TEST(Ranks, Simple)
+{
+    using namespace DB;
+    RanksArray sample = {310, 195, 480, 530, 155, 530, 245, 385, 450, 450, 465, 545, 170, 180, 125, 180, 230, 170, 75, 430, 480, 495, 295};
+
+    RanksArray ranks;
+    Float64 t = 0;
+    std::tie(ranks, t) = computeRanksAndTieCorrection(sample);
+
+    RanksArray expected{12.0, 8.0, 18.5, 21.5, 3.0, 21.5, 10.0, 13.0, 15.5, 15.5, 17.0, 23.0, 4.5, 6.5, 2.0, 6.5, 9.0, 4.5, 1.0, 14.0, 18.5, 20.0, 11.0};
+
+    ASSERT_EQ(ranks.size(), expected.size());
+
+    for (size_t i = 0; i < ranks.size(); ++i)
+        ASSERT_DOUBLE_EQ(ranks[i], expected[i]);
+
+    ASSERT_DOUBLE_EQ(t, 0.9975296442687747);
+}
--- a/src/AggregateFunctions/ya.make
+++ b/src/AggregateFunctions/ya.make
@ -29,6 +29,7 @@ SRCS(
    AggregateFunctionHistogram.cpp
    AggregateFunctionIf.cpp
    AggregateFunctionMLMethod.cpp
+    AggregateFunctionMannWhitney.cpp
    AggregateFunctionMaxIntersections.cpp
    AggregateFunctionMerge.cpp
    AggregateFunctionMinMaxAny.cpp
@ -43,13 +44,14 @@ SRCS(
    AggregateFunctionState.cpp
    AggregateFunctionStatistics.cpp
    AggregateFunctionStatisticsSimple.cpp
+    AggregateFunctionStudentTTest.cpp
    AggregateFunctionSum.cpp
    AggregateFunctionSumMap.cpp
-    AggregateFunctionTimeSeriesGroupSum.cpp
    AggregateFunctionTopK.cpp
    AggregateFunctionUniq.cpp
    AggregateFunctionUniqCombined.cpp
    AggregateFunctionUniqUpTo.cpp
+    AggregateFunctionWelchTTest.cpp
    AggregateFunctionWindowFunnel.cpp
    UniqCombinedBiasData.cpp
    UniqVariadicHash.cpp
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -223,7 +223,7 @@ if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELW
        Dictionaries/FlatDictionary.cpp
        Dictionaries/HashedDictionary.cpp
        Dictionaries/CacheDictionary.cpp
-        Dictionaries/TrieDictionary.cpp
+        Dictionaries/IPAddressDictionary.cpp
        Dictionaries/RangeHashedDictionary.cpp
        Dictionaries/ComplexKeyHashedDictionary.cpp
        Dictionaries/ComplexKeyCacheDictionary.cpp
@ -305,7 +305,6 @@ endif()

 dbms_target_link_libraries (
    PRIVATE
-        ${BTRIE_LIBRARIES}
        boost::filesystem
        boost::program_options
        clickhouse_common_config
--- a/src/Columns/ColumnDecimal.h
+++ b/src/Columns/ColumnDecimal.h
@ -82,7 +82,7 @@ public:

    bool isNumeric() const override { return false; }
    bool canBeInsideNullable() const override { return true; }
-    bool isFixedAndContiguous() const override { return true; }
+    bool isFixedAndContiguous() const final { return true; }
    size_t sizeOfValueIfFixed() const override { return sizeof(T); }

    size_t size() const override { return data.size(); }
--- a/src/Common/IPv6ToBinary.cpp
+++ b/src/Common/IPv6ToBinary.cpp
@ -1,30 +1,35 @@
 #include "IPv6ToBinary.h"
 #include <Poco/Net/IPAddress.h>
+#include <Poco/ByteOrder.h>
+
 #include <cstring>


 namespace DB
 {

-std::array<char, 16> IPv6ToBinary(const Poco::Net::IPAddress & address)
+void IPv6ToRawBinary(const Poco::Net::IPAddress & address, char * res)
 {
-    std::array<char, 16> res;
-
    if (Poco::Net::IPAddress::IPv6 == address.family())
    {
-        memcpy(res.data(), address.addr(), 16);
+        memcpy(res, address.addr(), 16);
    }
    else if (Poco::Net::IPAddress::IPv4 == address.family())
    {
        /// Convert to IPv6-mapped address.
-        memset(res.data(), 0, 10);
+        memset(res, 0, 10);
        res[10] = '\xFF';
        res[11] = '\xFF';
        memcpy(&res[12], address.addr(), 4);
    }
    else
-        memset(res.data(), 0, 16);
+        memset(res, 0, 16);
+}

+std::array<char, 16> IPv6ToBinary(const Poco::Net::IPAddress & address)
+{
+    std::array<char, 16> res;
+    IPv6ToRawBinary(address, res.data());
    return res;
 }

--- a/src/Common/IPv6ToBinary.h
+++ b/src/Common/IPv6ToBinary.h
@ -1,11 +1,16 @@
 #pragma once
 #include <array>
+#include <common/types.h>

 namespace Poco { namespace Net { class IPAddress; }}

 namespace DB
 {

+/// Convert IP address to raw binary with IPv6 data (big endian). If it's an IPv4, map it to IPv6.
+/// Saves result into the first 16 bytes of `res`.
+void IPv6ToRawBinary(const Poco::Net::IPAddress & address, char * res);
+
 /// Convert IP address to 16-byte array with IPv6 data (big endian). If it's an IPv4, map it to IPv6.
 std::array<char, 16> IPv6ToBinary(const Poco::Net::IPAddress & address);

--- a/src/Common/RadixSort.h
+++ b/src/Common/RadixSort.h
@ -35,16 +35,16 @@

 /** Used as a template parameter. See below.
  */
-struct RadixSortMallocAllocator
+struct RadixSortAllocator
 {
    void * allocate(size_t size)
    {
-        return malloc(size);
+        return ::operator new(size);
    }

-    void deallocate(void * ptr, size_t /*size*/)
+    void deallocate(void * ptr, size_t size)
    {
-        return free(ptr);
+        ::operator delete(ptr, size);
    }
 };

@ -100,7 +100,7 @@ struct RadixSortFloatTraits
    /// An object with the functions allocate and deallocate.
    /// Can be used, for example, to allocate memory for a temporary array on the stack.
    /// To do this, the allocator itself is created on the stack.
-    using Allocator = RadixSortMallocAllocator;
+    using Allocator = RadixSortAllocator;

    /// The function to get the key from an array element.
    static Key & extractKey(Element & elem) { return elem; }
@ -139,7 +139,7 @@ struct RadixSortUIntTraits
    static constexpr size_t PART_SIZE_BITS = 8;

    using Transform = RadixSortIdentityTransform<KeyBits>;
-    using Allocator = RadixSortMallocAllocator;
+    using Allocator = RadixSortAllocator;

    static Key & extractKey(Element & elem) { return elem; }
    static Result & extractResult(Element & elem) { return elem; }
@ -173,7 +173,7 @@ struct RadixSortIntTraits
    static constexpr size_t PART_SIZE_BITS = 8;

    using Transform = RadixSortSignedTransform<KeyBits>;
-    using Allocator = RadixSortMallocAllocator;
+    using Allocator = RadixSortAllocator;

    static Key & extractKey(Element & elem) { return elem; }
    static Result & extractResult(Element & elem) { return elem; }
--- a/src/Dictionaries/CMakeLists.txt
+++ b/src/Dictionaries/CMakeLists.txt
@ -11,7 +11,6 @@ add_library(clickhouse_dictionaries ${clickhouse_dictionaries_sources})

 target_link_libraries(clickhouse_dictionaries
    PRIVATE
-        ${BTRIE_LIBRARIES}
        clickhouse_common_io
        dbms
        Poco::Data
--- a/src/Dictionaries/IPAddressDictionary.cpp
+++ b/src/Dictionaries/IPAddressDictionary.cpp
--- a/src/Dictionaries/IPAddressDictionary.h
+++ b/src/Dictionaries/IPAddressDictionary.h
@ -7,6 +7,9 @@
 #include <Columns/ColumnString.h>
 #include <Common/Arena.h>
 #include <Common/HashTable/HashMap.h>
+#include <Columns/ColumnFixedString.h>
+#include <Columns/ColumnVector.h>
+#include <Poco/Net/IPAddress.h>
 #include <common/StringRef.h>
 #include <common/logger_useful.h>
 #include <ext/range.h>
@ -14,23 +17,18 @@
 #include "IDictionary.h"
 #include "IDictionarySource.h"

-struct btrie_s;
-typedef struct btrie_s btrie_t;
-
 namespace DB
 {
-class TrieDictionary final : public IDictionaryBase
+class IPAddressDictionary final : public IDictionaryBase
 {
 public:
-    TrieDictionary(
+    IPAddressDictionary(
        const StorageID & dict_id_,
        const DictionaryStructure & dict_struct_,
        DictionarySourcePtr source_ptr_,
        const DictionaryLifetime dict_lifetime_,
        bool require_nonempty_);

-    ~TrieDictionary() override;
-
    std::string getKeyDescription() const { return key_description; }

    std::string getTypeName() const override { return "Trie"; }
@ -47,7 +45,7 @@ public:

    std::shared_ptr<const IExternalLoadable> clone() const override
    {
-        return std::make_shared<TrieDictionary>(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty);
+        return std::make_shared<IPAddressDictionary>(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty);
    }

    const IDictionarySource * getSource() const override { return source_ptr.get(); }
@ -150,9 +148,17 @@ public:
    BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;

 private:
+
    template <typename Value>
    using ContainerType = std::vector<Value>;

+    using IPAddress = Poco::Net::IPAddress;
+
+    using IPv4Container = PODArray<UInt32>;
+    using IPv6Container = PaddedPODArray<UInt8>;
+    using IPMaskContainer = PODArray<UInt8>;
+    using RowIdxConstIter = ContainerType<size_t>::const_iterator;
+
    struct Attribute final
    {
        AttributeUnderlyingType type;
@ -207,16 +213,18 @@ private:

    Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);

+    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
+    void getItemsByTwoKeyColumnsImpl(
+        const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;

    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
    void
    getItemsImpl(const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;

-
    template <typename T>
-    bool setAttributeValueImpl(Attribute & attribute, const StringRef key, const T value);
+    void setAttributeValueImpl(Attribute & attribute, const T value);

-    bool setAttributeValue(Attribute & attribute, const StringRef key, const Field & value);
+    void setAttributeValue(Attribute & attribute, const Field & value);

    const Attribute & getAttribute(const std::string & attribute_name) const;

@ -224,6 +232,14 @@ private:
    void has(const Attribute & attribute, const Columns & key_columns, PaddedPODArray<UInt8> & out) const;

    Columns getKeyColumns() const;
+    RowIdxConstIter ipNotFound() const;
+    RowIdxConstIter tryLookupIPv4(UInt32 addr, uint8_t * buf) const;
+    RowIdxConstIter tryLookupIPv6(const uint8_t * addr) const;
+
+    template <typename IPContainerType, typename IPValueType>
+    RowIdxConstIter lookupIP(IPValueType target) const;
+
+    static const uint8_t * getIPv6FromOffset(const IPv6Container & ipv6_col, size_t i);

    const DictionaryStructure dict_struct;
    const DictionarySourcePtr source_ptr;
@ -231,8 +247,22 @@ private:
    const bool require_nonempty;
    const std::string key_description{dict_struct.getKeyDescription()};

+    /// Contains sorted IP subnetworks. If some addresses equals, subnet with lower mask is placed first.
+    std::variant<IPv4Container, IPv6Container> ip_column;
+
+    /// Prefix lengths corresponding to ip_column.
+    IPMaskContainer mask_column;
+
+    /** Contains links to parent subnetworks in ip_column.
+      * Array holds such ip_column's (and mask_column's) indices that
+      * - if parent_subnet[i] < i, then ip_column[i] is subnetwork of ip_column[parent_subnet[i]],
+      * - if parent_subnet[i] == i, then ip_column[i] doesn't belong to any other subnet.
+      */
+    ContainerType<size_t> parent_subnet;
+
+    /// Contains corresponding indices in attributes array.
+    ContainerType<size_t> row_idx;

-    btrie_t * trie = nullptr;
    std::map<std::string, size_t> attribute_index_by_name;
    std::vector<Attribute> attributes;

--- a/src/Dictionaries/TrieDictionary.cpp
+++ b/src/Dictionaries/TrieDictionary.cpp
@ -1,779 +0,0 @@
-#include "TrieDictionary.h"
-#include <stack>
-#include <Columns/ColumnFixedString.h>
-#include <Columns/ColumnVector.h>
-#include <Common/assert_cast.h>
-#include <DataTypes/DataTypeFixedString.h>
-#include <DataTypes/DataTypeString.h>
-#include <IO/WriteIntText.h>
-#include <Poco/ByteOrder.h>
-#include <Poco/Net/IPAddress.h>
-#include <Common/formatIPv6.h>
-#include <common/itoa.h>
-#include <ext/map.h>
-#include <ext/range.h>
-#include "DictionaryBlockInputStream.h"
-#include "DictionaryFactory.h"
-
-#ifdef __clang__
-    #pragma clang diagnostic ignored "-Wold-style-cast"
-    #pragma clang diagnostic ignored "-Wnewline-eof"
-#endif
-
-#include <btrie.h>
-
-
-namespace DB
-{
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-    extern const int TYPE_MISMATCH;
-    extern const int BAD_ARGUMENTS;
-    extern const int DICTIONARY_IS_EMPTY;
-    extern const int NOT_IMPLEMENTED;
-}
-
-static void validateKeyTypes(const DataTypes & key_types)
-{
-    if (key_types.size() != 1)
-        throw Exception{"Expected a single IP address", ErrorCodes::TYPE_MISMATCH};
-
-    const auto & actual_type = key_types[0]->getName();
-
-    if (actual_type != "UInt32" && actual_type != "FixedString(16)")
-        throw Exception{"Key does not match, expected either UInt32 or FixedString(16)", ErrorCodes::TYPE_MISMATCH};
-}
-
-
-TrieDictionary::TrieDictionary(
-    const StorageID & dict_id_,
-    const DictionaryStructure & dict_struct_,
-    DictionarySourcePtr source_ptr_,
-    const DictionaryLifetime dict_lifetime_,
-    bool require_nonempty_)
-    : IDictionaryBase(dict_id_)
-    , dict_struct(dict_struct_)
-    , source_ptr{std::move(source_ptr_)}
-    , dict_lifetime(dict_lifetime_)
-    , require_nonempty(require_nonempty_)
-    , logger(&Poco::Logger::get("TrieDictionary"))
-{
-    createAttributes();
-    trie = btrie_create();
-    loadData();
-    calculateBytesAllocated();
-}
-
-TrieDictionary::~TrieDictionary()
-{
-    btrie_destroy(trie);
-}
-
-#define DECLARE(TYPE) \
-    void TrieDictionary::get##TYPE( \
-        const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
-    { \
-        validateKeyTypes(key_types); \
-\
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        const auto null_value = std::get<TYPE>(attribute.null_values); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, \
-            key_columns, \
-            [&](const size_t row, const auto value) { out[row] = value; }, \
-            [&](const size_t) { return null_value; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void TrieDictionary::getString(
-    const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
-{
-    validateKeyTypes(key_types);
-
-    const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
-
-    const auto & null_value = StringRef{std::get<String>(attribute.null_values)};
-
-    getItemsImpl<StringRef, StringRef>(
-        attribute,
-        key_columns,
-        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-        [&](const size_t) { return null_value; });
-}
-
-#define DECLARE(TYPE) \
-    void TrieDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        validateKeyTypes(key_types); \
-\
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, \
-            key_columns, \
-            [&](const size_t row, const auto value) { out[row] = value; }, \
-            [&](const size_t row) { return def[row]; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void TrieDictionary::getString(
-    const std::string & attribute_name,
-    const Columns & key_columns,
-    const DataTypes & key_types,
-    const ColumnString * const def,
-    ColumnString * const out) const
-{
-    validateKeyTypes(key_types);
-
-    const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
-
-    getItemsImpl<StringRef, StringRef>(
-        attribute,
-        key_columns,
-        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-        [&](const size_t row) { return def->getDataAt(row); });
-}
-
-#define DECLARE(TYPE) \
-    void TrieDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const TYPE def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        validateKeyTypes(key_types); \
-\
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void TrieDictionary::getString(
-    const std::string & attribute_name,
-    const Columns & key_columns,
-    const DataTypes & key_types,
-    const String & def,
-    ColumnString * const out) const
-{
-    validateKeyTypes(key_types);
-
-    const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
-
-    getItemsImpl<StringRef, StringRef>(
-        attribute,
-        key_columns,
-        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-        [&](const size_t) { return StringRef{def}; });
-}
-
-void TrieDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const
-{
-    validateKeyTypes(key_types);
-
-    const auto & attribute = attributes.front();
-
-    switch (attribute.type)
-    {
-        case AttributeUnderlyingType::utUInt8:
-            has<UInt8>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            has<UInt16>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            has<UInt32>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            has<UInt64>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            has<UInt128>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utInt8:
-            has<Int8>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utInt16:
-            has<Int16>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utInt32:
-            has<Int32>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utInt64:
-            has<Int64>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            has<Float32>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            has<Float64>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utString:
-            has<StringRef>(attribute, key_columns, out);
-            break;
-
-        case AttributeUnderlyingType::utDecimal32:
-            has<Decimal32>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            has<Decimal64>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            has<Decimal128>(attribute, key_columns, out);
-            break;
-    }
-}
-
-void TrieDictionary::createAttributes()
-{
-    const auto size = dict_struct.attributes.size();
-    attributes.reserve(size);
-
-    for (const auto & attribute : dict_struct.attributes)
-    {
-        attribute_index_by_name.emplace(attribute.name, attributes.size());
-        attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
-
-        if (attribute.hierarchical)
-            throw Exception{full_name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(),
-                            ErrorCodes::TYPE_MISMATCH};
-    }
-}
-
-void TrieDictionary::loadData()
-{
-    auto stream = source_ptr->loadAll();
-    stream->readPrefix();
-
-    /// created upfront to avoid excess allocations
-    const auto keys_size = dict_struct.key->size();
-    StringRefs keys(keys_size);
-
-    const auto attributes_size = attributes.size();
-
-    while (const auto block = stream->read())
-    {
-        const auto rows = block.rows();
-        element_count += rows;
-
-        const auto key_column_ptrs = ext::map<Columns>(
-            ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
-
-        const auto attribute_column_ptrs = ext::map<Columns>(ext::range(0, attributes_size), [&](const size_t attribute_idx)
-        {
-            return block.safeGetByPosition(keys_size + attribute_idx).column;
-        });
-
-        for (const auto row_idx : ext::range(0, rows))
-        {
-            /// calculate key once per row
-            const auto key_column = key_column_ptrs.front();
-
-            for (const auto attribute_idx : ext::range(0, attributes_size))
-            {
-                const auto & attribute_column = *attribute_column_ptrs[attribute_idx];
-                auto & attribute = attributes[attribute_idx];
-                setAttributeValue(attribute, key_column->getDataAt(row_idx), attribute_column[row_idx]);
-            }
-        }
-    }
-
-    stream->readSuffix();
-
-    if (require_nonempty && 0 == element_count)
-        throw Exception{full_name + ": dictionary source is empty and 'require_nonempty' property is set.", ErrorCodes::DICTIONARY_IS_EMPTY};
-}
-
-template <typename T>
-void TrieDictionary::addAttributeSize(const Attribute & attribute)
-{
-    const auto & vec = std::get<ContainerType<T>>(attribute.maps);
-    bytes_allocated += sizeof(ContainerType<T>) + (vec.capacity() * sizeof(T));
-    bucket_count = vec.size();
-}
-
-void TrieDictionary::calculateBytesAllocated()
-{
-    bytes_allocated += attributes.size() * sizeof(attributes.front());
-
-    for (const auto & attribute : attributes)
-    {
-        switch (attribute.type)
-        {
-            case AttributeUnderlyingType::utUInt8:
-                addAttributeSize<UInt8>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt16:
-                addAttributeSize<UInt16>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt32:
-                addAttributeSize<UInt32>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt64:
-                addAttributeSize<UInt64>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt128:
-                addAttributeSize<UInt128>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt8:
-                addAttributeSize<Int8>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt16:
-                addAttributeSize<Int16>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt32:
-                addAttributeSize<Int32>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt64:
-                addAttributeSize<Int64>(attribute);
-                break;
-            case AttributeUnderlyingType::utFloat32:
-                addAttributeSize<Float32>(attribute);
-                break;
-            case AttributeUnderlyingType::utFloat64:
-                addAttributeSize<Float64>(attribute);
-                break;
-
-            case AttributeUnderlyingType::utDecimal32:
-                addAttributeSize<Decimal32>(attribute);
-                break;
-            case AttributeUnderlyingType::utDecimal64:
-                addAttributeSize<Decimal64>(attribute);
-                break;
-            case AttributeUnderlyingType::utDecimal128:
-                addAttributeSize<Decimal128>(attribute);
-                break;
-
-            case AttributeUnderlyingType::utString:
-            {
-                addAttributeSize<StringRef>(attribute);
-                bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
-
-                break;
-            }
-        }
-    }
-
-    bytes_allocated += btrie_allocated(trie);
-}
-
-
-template <typename T>
-void TrieDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
-{
-    attribute.null_values = T(null_value.get<NearestFieldType<T>>());
-    attribute.maps.emplace<ContainerType<T>>();
-}
-
-TrieDictionary::Attribute TrieDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
-{
-    Attribute attr{type, {}, {}, {}};
-
-    switch (type)
-    {
-        case AttributeUnderlyingType::utUInt8:
-            createAttributeImpl<UInt8>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            createAttributeImpl<UInt16>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            createAttributeImpl<UInt32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            createAttributeImpl<UInt64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            createAttributeImpl<UInt128>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt8:
-            createAttributeImpl<Int8>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt16:
-            createAttributeImpl<Int16>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt32:
-            createAttributeImpl<Int32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt64:
-            createAttributeImpl<Int64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            createAttributeImpl<Float32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            createAttributeImpl<Float64>(attr, null_value);
-            break;
-
-        case AttributeUnderlyingType::utDecimal32:
-            createAttributeImpl<Decimal32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            createAttributeImpl<Decimal64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            createAttributeImpl<Decimal128>(attr, null_value);
-            break;
-
-        case AttributeUnderlyingType::utString:
-        {
-            attr.null_values = null_value.get<String>();
-            attr.maps.emplace<ContainerType<StringRef>>();
-            attr.string_arena = std::make_unique<Arena>();
-            break;
-        }
-    }
-
-    return attr;
-}
-
-
-template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
-void TrieDictionary::getItemsImpl(
-    const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
-{
-    auto & vec = std::get<ContainerType<AttributeType>>(attribute.maps);
-
-    const auto first_column = key_columns.front();
-    const auto rows = first_column->size();
-    if (first_column->isNumeric())
-    {
-        for (const auto i : ext::range(0, rows))
-        {
-            auto addr = Int32(first_column->get64(i));
-            uintptr_t slot = btrie_find(trie, addr);
-#pragma GCC diagnostic push
-#pragma GCC diagnostic warning "-Wold-style-cast"
-            set_value(i, slot != BTRIE_NULL ? static_cast<OutputType>(vec[slot]) : get_default(i));
-#pragma GCC diagnostic pop
-        }
-    }
-    else
-    {
-        for (const auto i : ext::range(0, rows))
-        {
-            auto addr = first_column->getDataAt(i);
-            if (addr.size != 16)
-                throw Exception("Expected key to be FixedString(16)", ErrorCodes::LOGICAL_ERROR);
-
-            uintptr_t slot = btrie_find_a6(trie, reinterpret_cast<const uint8_t *>(addr.data));
-#pragma GCC diagnostic push
-#pragma GCC diagnostic warning "-Wold-style-cast"
-            set_value(i, slot != BTRIE_NULL ? static_cast<OutputType>(vec[slot]) : get_default(i));
-#pragma GCC diagnostic pop
-        }
-    }
-
-    query_count.fetch_add(rows, std::memory_order_relaxed);
-}
-
-
-template <typename T>
-bool TrieDictionary::setAttributeValueImpl(Attribute & attribute, const StringRef key, const T value)
-{
-    // Insert value into appropriate vector type
-    auto & vec = std::get<ContainerType<T>>(attribute.maps);
-    size_t row = vec.size();
-    vec.push_back(value);
-
-    // Parse IP address and subnet length from string (e.g. 2a02:6b8::3/64)
-    Poco::Net::IPAddress addr, mask;
-    std::string addr_str(key.toString());
-    size_t pos = addr_str.find('/');
-    if (pos != std::string::npos)
-    {
-        addr = Poco::Net::IPAddress(addr_str.substr(0, pos));
-        mask = Poco::Net::IPAddress(std::stoi(addr_str.substr(pos + 1), nullptr, 10), addr.family());
-    }
-    else
-    {
-        addr = Poco::Net::IPAddress(addr_str);
-        mask = Poco::Net::IPAddress(addr.length() * 8, addr.family());
-    }
-
-    /*
-     * Here we might overwrite the same key with the same slot as each key can map to multiple attributes.
-     * However, all columns have equal number of rows so it is okay to store only row number for each key
-     * instead of building a trie for each column. This comes at the cost of additional lookup in attribute
-     * vector on lookup time to return cell from row + column. The reason for this is to save space,
-     * and build only single trie instead of trie for each column.
-     */
-    if (addr.family() == Poco::Net::IPAddress::IPv4)
-    {
-        UInt32 addr_v4 = Poco::ByteOrder::toNetwork(*reinterpret_cast<const UInt32 *>(addr.addr()));
-        UInt32 mask_v4 = Poco::ByteOrder::toNetwork(*reinterpret_cast<const UInt32 *>(mask.addr()));
-        return btrie_insert(trie, addr_v4, mask_v4, row) == 0;
-    }
-
-    const uint8_t * addr_v6 = reinterpret_cast<const uint8_t *>(addr.addr());
-    const uint8_t * mask_v6 = reinterpret_cast<const uint8_t *>(mask.addr());
-    return btrie_insert_a6(trie, addr_v6, mask_v6, row) == 0;
-}
-
-bool TrieDictionary::setAttributeValue(Attribute & attribute, const StringRef key, const Field & value)
-{
-    switch (attribute.type)
-    {
-        case AttributeUnderlyingType::utUInt8:
-            return setAttributeValueImpl<UInt8>(attribute, key, value.get<UInt64>());
-        case AttributeUnderlyingType::utUInt16:
-            return setAttributeValueImpl<UInt16>(attribute, key, value.get<UInt64>());
-        case AttributeUnderlyingType::utUInt32:
-            return setAttributeValueImpl<UInt32>(attribute, key, value.get<UInt64>());
-        case AttributeUnderlyingType::utUInt64:
-            return setAttributeValueImpl<UInt64>(attribute, key, value.get<UInt64>());
-        case AttributeUnderlyingType::utUInt128:
-            return setAttributeValueImpl<UInt128>(attribute, key, value.get<UInt128>());
-        case AttributeUnderlyingType::utInt8:
-            return setAttributeValueImpl<Int8>(attribute, key, value.get<Int64>());
-        case AttributeUnderlyingType::utInt16:
-            return setAttributeValueImpl<Int16>(attribute, key, value.get<Int64>());
-        case AttributeUnderlyingType::utInt32:
-            return setAttributeValueImpl<Int32>(attribute, key, value.get<Int64>());
-        case AttributeUnderlyingType::utInt64:
-            return setAttributeValueImpl<Int64>(attribute, key, value.get<Int64>());
-        case AttributeUnderlyingType::utFloat32:
-            return setAttributeValueImpl<Float32>(attribute, key, value.get<Float64>());
-        case AttributeUnderlyingType::utFloat64:
-            return setAttributeValueImpl<Float64>(attribute, key, value.get<Float64>());
-
-        case AttributeUnderlyingType::utDecimal32:
-            return setAttributeValueImpl<Decimal32>(attribute, key, value.get<Decimal32>());
-        case AttributeUnderlyingType::utDecimal64:
-            return setAttributeValueImpl<Decimal64>(attribute, key, value.get<Decimal64>());
-        case AttributeUnderlyingType::utDecimal128:
-            return setAttributeValueImpl<Decimal128>(attribute, key, value.get<Decimal128>());
-
-        case AttributeUnderlyingType::utString:
-        {
-            const auto & string = value.get<String>();
-            const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
-            setAttributeValueImpl<StringRef>(attribute, key, StringRef{string_in_arena, string.size()});
-            return true;
-        }
-    }
-
-    return {};
-}
-
-const TrieDictionary::Attribute & TrieDictionary::getAttribute(const std::string & attribute_name) const
-{
-    const auto it = attribute_index_by_name.find(attribute_name);
-    if (it == std::end(attribute_index_by_name))
-        throw Exception{full_name + ": no such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
-
-    return attributes[it->second];
-}
-
-template <typename T>
-void TrieDictionary::has(const Attribute &, const Columns & key_columns, PaddedPODArray<UInt8> & out) const
-{
-    const auto first_column = key_columns.front();
-    const auto rows = first_column->size();
-    if (first_column->isNumeric())
-    {
-        for (const auto i : ext::range(0, rows))
-        {
-            auto addr = Int32(first_column->get64(i));
-            uintptr_t slot = btrie_find(trie, addr);
-#pragma GCC diagnostic push
-#pragma GCC diagnostic warning "-Wold-style-cast"
-            out[i] = (slot != BTRIE_NULL);
-#pragma GCC diagnostic pop
-        }
-    }
-    else
-    {
-        for (const auto i : ext::range(0, rows))
-        {
-            auto addr = first_column->getDataAt(i);
-            if (unlikely(addr.size != 16))
-                throw Exception("Expected key to be FixedString(16)", ErrorCodes::LOGICAL_ERROR);
-
-            uintptr_t slot = btrie_find_a6(trie, reinterpret_cast<const uint8_t *>(addr.data));
-#pragma GCC diagnostic push
-#pragma GCC diagnostic warning "-Wold-style-cast"
-            out[i] = (slot != BTRIE_NULL);
-#pragma GCC diagnostic pop
-        }
-    }
-
-    query_count.fetch_add(rows, std::memory_order_relaxed);
-}
-
-template <typename Getter, typename KeyType>
-static void trieTraverse(const btrie_t * trie, Getter && getter)
-{
-    KeyType key = 0;
-    const KeyType high_bit = ~((~key) >> 1);
-
-    btrie_node_t * node;
-    node = trie->root;
-
-    std::stack<btrie_node_t *> stack;
-    while (node)
-    {
-        stack.push(node);
-        node = node->left;
-    }
-
-    auto get_bit = [&high_bit](size_t size) { return size ? (high_bit >> (size - 1)) : 0; };
-
-    while (!stack.empty())
-    {
-        node = stack.top();
-        stack.pop();
-#pragma GCC diagnostic push
-#pragma GCC diagnostic warning "-Wold-style-cast"
-        if (node && node->value != BTRIE_NULL)
-#pragma GCC diagnostic pop
-            getter(key, stack.size());
-
-        if (node && node->right)
-        {
-            stack.push(nullptr);
-            key |= get_bit(stack.size());
-            stack.push(node->right);
-            while (stack.top()->left)
-                stack.push(stack.top()->left);
-        }
-        else
-            key &= ~get_bit(stack.size());
-    }
-}
-
-Columns TrieDictionary::getKeyColumns() const
-{
-    auto ip_column = ColumnFixedString::create(IPV6_BINARY_LENGTH);
-    auto mask_column = ColumnVector<UInt8>::create();
-
-#if defined(__SIZEOF_INT128__)
-    auto getter = [&ip_column, &mask_column](__uint128_t ip, size_t mask)
-    {
-        Poco::UInt64 * ip_array = reinterpret_cast<Poco::UInt64 *>(&ip); // Poco:: for old poco + macos
-        ip_array[0] = Poco::ByteOrder::fromNetwork(ip_array[0]);
-        ip_array[1] = Poco::ByteOrder::fromNetwork(ip_array[1]);
-        std::swap(ip_array[0], ip_array[1]);
-        ip_column->insertData(reinterpret_cast<const char *>(ip_array), IPV6_BINARY_LENGTH);
-        mask_column->insertValue(static_cast<UInt8>(mask));
-    };
-
-    trieTraverse<decltype(getter), __uint128_t>(trie, std::move(getter));
-#else
-    throw Exception("TrieDictionary::getKeyColumns is not implemented for 32bit arch", ErrorCodes::NOT_IMPLEMENTED);
-#endif
-    return {std::move(ip_column), std::move(mask_column)};
-}
-
-BlockInputStreamPtr TrieDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
-{
-    using BlockInputStreamType = DictionaryBlockInputStream<TrieDictionary, UInt64>;
-
-    auto get_keys = [](const Columns & columns, const std::vector<DictionaryAttribute> & dict_attributes)
-    {
-        const auto & attr = dict_attributes.front();
-        return ColumnsWithTypeAndName(
-            {ColumnWithTypeAndName(columns.front(), std::make_shared<DataTypeFixedString>(IPV6_BINARY_LENGTH), attr.name)});
-    };
-    auto get_view = [](const Columns & columns, const std::vector<DictionaryAttribute> & dict_attributes)
-    {
-        auto column = ColumnString::create();
-        const auto & ip_column = assert_cast<const ColumnFixedString &>(*columns.front());
-        const auto & mask_column = assert_cast<const ColumnVector<UInt8> &>(*columns.back());
-        char buffer[48];
-        for (size_t row : ext::range(0, ip_column.size()))
-        {
-            UInt8 mask = mask_column.getElement(row);
-            char * ptr = buffer;
-            formatIPv6(reinterpret_cast<const unsigned char *>(ip_column.getDataAt(row).data), ptr);
-            *(ptr - 1) = '/';
-            ptr = itoa(mask, ptr);
-            column->insertData(buffer, ptr - buffer);
-        }
-        return ColumnsWithTypeAndName{
-            ColumnWithTypeAndName(std::move(column), std::make_shared<DataTypeString>(), dict_attributes.front().name)};
-    };
-    return std::make_shared<BlockInputStreamType>(
-        shared_from_this(), max_block_size, getKeyColumns(), column_names, std::move(get_keys), std::move(get_view));
-}
-
-
-void registerDictionaryTrie(DictionaryFactory & factory)
-{
-    auto create_layout = [=](const std::string &,
-                             const DictionaryStructure & dict_struct,
-                             const Poco::Util::AbstractConfiguration & config,
-                             const std::string & config_prefix,
-                             DictionarySourcePtr source_ptr) -> DictionaryPtr
-    {
-        if (!dict_struct.key)
-            throw Exception{"'key' is required for dictionary of layout 'ip_trie'", ErrorCodes::BAD_ARGUMENTS};
-
-        const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix);
-        const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
-        const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
-        // This is specialised trie for storing IPv4 and IPv6 prefixes.
-        return std::make_unique<TrieDictionary>(dict_id, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty);
-    };
-    factory.registerLayout("ip_trie", create_layout, true);
-}
-
-}
--- a/src/Dictionaries/registerDictionaries.cpp
+++ b/src/Dictionaries/registerDictionaries.cpp
@ -27,9 +27,7 @@ void registerDictionaries()
        registerDictionaryComplexKeyHashed(factory);
        registerDictionaryComplexKeyCache(factory);
        registerDictionaryComplexKeyDirect(factory);
-#if !defined(ARCADIA_BUILD)
        registerDictionaryTrie(factory);
-#endif
        registerDictionaryFlat(factory);
        registerDictionaryHashed(factory);
        registerDictionaryCache(factory);
--- a/src/Dictionaries/ya.make
+++ b/src/Dictionaries/ya.make
@ -53,6 +53,7 @@ SRCS(
    FlatDictionary.cpp
    HTTPDictionarySource.cpp
    HashedDictionary.cpp
+    IPAddressDictionary.cpp
    LibraryDictionarySource.cpp
    LibraryDictionarySourceExternal.cpp
    MongoDBDictionarySource.cpp
--- a/src/Functions/FunctionsConversion.h
+++ b/src/Functions/FunctionsConversion.h
@ -91,6 +91,9 @@ inline UInt32 extractToDecimalScale(const ColumnWithTypeAndName & named_column)
    return field.get<UInt32>();
 }

+/// Function toUnixTimestamp has exactly the same implementation as toDateTime of String type.
+struct NameToUnixTimestamp { static constexpr auto name = "toUnixTimestamp"; };
+

 /** Conversion of number types to each other, enums to numbers, dates and datetimes to numbers and back: done by straight assignment.
  *  (Date is represented internally as number of days from some day; DateTime - as unix timestamp)
@ -111,6 +114,13 @@ struct ConvertImpl
        using ColVecFrom = typename FromDataType::ColumnType;
        using ColVecTo = typename ToDataType::ColumnType;

+        if (std::is_same_v<Name, NameToUnixTimestamp>)
+        {
+            if (isDate(named_from.type))
+                throw Exception("Illegal column " + named_from.column->getName() + " of first argument of function " + Name::name,
+                    ErrorCodes::ILLEGAL_COLUMN);
+        }
+
        if constexpr ((IsDataTypeDecimal<FromDataType> || IsDataTypeDecimal<ToDataType>)
            && !(std::is_same_v<DataTypeDateTime64, FromDataType> || std::is_same_v<DataTypeDateTime64, ToDataType>))
        {
@ -923,9 +933,6 @@ struct ConvertImplGenericFromString
 };


-/// Function toUnixTimestamp has exactly the same implementation as toDateTime of String type.
-struct NameToUnixTimestamp { static constexpr auto name = "toUnixTimestamp"; };
-
 template <>
 struct ConvertImpl<DataTypeString, DataTypeUInt32, NameToUnixTimestamp>
    : ConvertImpl<DataTypeString, DataTypeDateTime, NameToUnixTimestamp> {};
--- a/src/Functions/FunctionsExternalDictionaries.h
+++ b/src/Functions/FunctionsExternalDictionaries.h
@ -37,7 +37,7 @@
 #include <Dictionaries/ComplexKeyCacheDictionary.h>
 #include <Dictionaries/ComplexKeyDirectDictionary.h>
 #include <Dictionaries/RangeHashedDictionary.h>
-#include <Dictionaries/TrieDictionary.h>
+#include <Dictionaries/IPAddressDictionary.h>
 #include <Dictionaries/PolygonDictionaryImplementations.h>
 #include <Dictionaries/DirectDictionary.h>

@ -192,7 +192,7 @@ private:
            || (res = executeDispatchComplex<SSDComplexKeyCacheDictionary>(arguments, dict))
 #endif
 #if !defined(ARCADIA_BUILD)
-            || (res = executeDispatchComplex<TrieDictionary>(arguments, dict))
+            || (res = executeDispatchComplex<IPAddressDictionary>(arguments, dict))
 #endif
            || (res = executeDispatchComplex<PolygonDictionarySimple>(arguments, dict))
            || (res = executeDispatchComplex<PolygonDictionaryIndexEach>(arguments, dict))
@ -346,7 +346,7 @@ private:
            || (res = executeDispatchComplex<SSDComplexKeyCacheDictionary>(arguments, dict))
 #endif
 #if !defined(ARCADIA_BUILD)
-            || (res = executeDispatchComplex<TrieDictionary>(arguments, dict))
+            || (res = executeDispatchComplex<IPAddressDictionary>(arguments, dict))
 #endif
            || (res = executeDispatchComplex<PolygonDictionarySimple>(arguments, dict))
            || (res = executeDispatchComplex<PolygonDictionaryIndexEach>(arguments, dict))
@ -524,7 +524,7 @@ private:
            || (res = executeDispatchComplex<SSDComplexKeyCacheDictionary>(arguments, dict))
 #endif
 #if !defined(ARCADIA_BUILD)
-            || (res = executeDispatchComplex<TrieDictionary>(arguments, dict))
+            || (res = executeDispatchComplex<IPAddressDictionary>(arguments, dict))
 #endif
            || (res = executeDispatchComplex<PolygonDictionarySimple>(arguments, dict))
            || (res = executeDispatchComplex<PolygonDictionaryIndexEach>(arguments, dict))
@ -861,7 +861,7 @@ private:
            || (res = executeDispatchComplex<SSDComplexKeyCacheDictionary>(arguments, dict))
 #endif
 #if !defined(ARCADIA_BUILD)
-            || (res = executeDispatchComplex<TrieDictionary>(arguments, dict))
+            || (res = executeDispatchComplex<IPAddressDictionary>(arguments, dict))
 #endif
            || (res = executeDispatchComplex<PolygonDictionarySimple>(arguments, dict))
            || (res = executeDispatchComplex<PolygonDictionaryIndexEach>(arguments, dict))
@ -1116,7 +1116,7 @@ private:
            || (res = executeDispatchComplex<SSDComplexKeyCacheDictionary>(arguments, dict))
 #endif
 #if !defined(ARCADIA_BUILD)
-            || (res = executeDispatchComplex<TrieDictionary>(arguments, dict))
+            || (res = executeDispatchComplex<IPAddressDictionary>(arguments, dict))
 #endif
            || (res = executeDispatchComplex<PolygonDictionarySimple>(arguments, dict))
            || (res = executeDispatchComplex<PolygonDictionaryIndexEach>(arguments, dict))
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@ -1177,7 +1177,7 @@ void InterpreterSelectQuery::executeFetchColumns(
        const auto & func = desc.function;
        std::optional<UInt64> num_rows{};
        if (!query.prewhere() && !query.where())
-            num_rows = storage->totalRows();
+            num_rows = storage->totalRows(settings);
        else // It's possible to optimize count() given only partition predicates
        {
            SelectQueryInfo temp_query_info;
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@ -463,7 +463,7 @@ public:
    /// - For total_rows column in system.tables
    ///
    /// Does takes underlying Storage (if any) into account.
-    virtual std::optional<UInt64> totalRows() const { return {}; }
+    virtual std::optional<UInt64> totalRows(const Settings &) const { return {}; }

    /// Same as above but also take partition predicate into account.
    virtual std::optional<UInt64> totalRowsByPartitionPredicate(const SelectQueryInfo &, const Context &) const { return {}; }
@ -481,7 +481,7 @@ public:
    /// Memory part should be estimated as a resident memory size.
    /// In particular, alloctedBytes() is preferable over bytes()
    /// when considering in-memory blocks.
-    virtual std::optional<UInt64> totalBytes() const { return {}; }
+    virtual std::optional<UInt64> totalBytes(const Settings &) const { return {}; }

    /// Number of rows INSERTed since server start.
    ///
--- a/src/Storages/MergeTree/BackgroundJobsExecutor.cpp
+++ b/src/Storages/MergeTree/BackgroundJobsExecutor.cpp
@ -36,34 +36,31 @@ double IBackgroundJobExecutor::getSleepRandomAdd()
    return std::uniform_real_distribution<double>(0, sleep_settings.task_sleep_seconds_when_no_work_random_part)(rng);
 }

-void IBackgroundJobExecutor::scheduleTask(bool job_done, bool with_backoff)
+void IBackgroundJobExecutor::runTaskWithoutDelay()
 {
-    if (job_done)
-    {
-        no_work_done_count = 0;
-        /// We have background jobs, schedule task as soon as possible
-        scheduling_task->schedule();
+    no_work_done_count = 0;
+    /// We have background jobs, schedule task as soon as possible
+    scheduling_task->schedule();
+}

+void IBackgroundJobExecutor::scheduleTask(bool with_backoff)
+{
+    size_t next_time_to_execute;
+    if (with_backoff)
+    {
+        auto no_work_done_times = no_work_done_count.fetch_add(1, std::memory_order_relaxed);
+
+        next_time_to_execute = 1000 * (std::min(
+                sleep_settings.task_sleep_seconds_when_no_work_max,
+                sleep_settings.thread_sleep_seconds_if_nothing_to_do * std::pow(sleep_settings.task_sleep_seconds_when_no_work_multiplier, no_work_done_times))
+            + getSleepRandomAdd());
    }
    else
    {
-        size_t next_time_to_execute;
-        if (with_backoff)
-        {
-            auto no_work_done_times = no_work_done_count.fetch_add(1, std::memory_order_relaxed);
-
-            next_time_to_execute = 1000 * (std::min(
-                    sleep_settings.task_sleep_seconds_when_no_work_max,
-                    sleep_settings.thread_sleep_seconds_if_nothing_to_do * std::pow(sleep_settings.task_sleep_seconds_when_no_work_multiplier, no_work_done_times))
-                + getSleepRandomAdd());
-        }
-        else
-        {
-            next_time_to_execute = 1000 * sleep_settings.thread_sleep_seconds_if_nothing_to_do;
-        }
-
-        scheduling_task->scheduleAfter(next_time_to_execute, false);
+        next_time_to_execute = 1000 * sleep_settings.thread_sleep_seconds_if_nothing_to_do;
    }
+
+    scheduling_task->scheduleAfter(next_time_to_execute, false);
 }

 namespace
@ -105,42 +102,42 @@ try
                        /// Job done, decrement metric and reset no_work counter
                        CurrentMetrics::values[pool_config.tasks_metric]--;
                        /// Job done, new empty space in pool, schedule background task
-                        scheduleTask(true);
+                        runTaskWithoutDelay();
                    }
                    catch (...)
                    {
                        tryLogCurrentException(__PRETTY_FUNCTION__);
                        CurrentMetrics::values[pool_config.tasks_metric]--;
-                        scheduleTask(false);
+                        scheduleTask(/* with_backoff = */ true);
                    }
                });
                /// We've scheduled task in the background pool and when it will finish we will be triggered again. But this task can be
                /// extremely long and we may have a lot of other small tasks to do, so we schedule ourselves here.
-                scheduleTask(true);
+                runTaskWithoutDelay();
            }
            catch (...)
            {
                /// With our Pool settings scheduleOrThrowOnError shouldn't throw exceptions, but for safety catch added here
                tryLogCurrentException(__PRETTY_FUNCTION__);
                CurrentMetrics::values[pool_config.tasks_metric]--;
-                scheduleTask(false);
+                scheduleTask(/* with_backoff = */ true);
            }
        }
        else /// Pool is full and we have some work to do
        {
-            scheduleTask(false, /* with_backoff = */ false);
+            scheduleTask(/* with_backoff = */ false);
        }
    }
    else /// Nothing to do, no jobs
    {
-        scheduleTask(false);
+        scheduleTask(/* with_backoff = */ true);
    }

 }
 catch (...) /// Exception while we looking for a task, reschedule
 {
    tryLogCurrentException(__PRETTY_FUNCTION__);
-    scheduleTask(false);
+    scheduleTask(/* with_backoff = */ true);
 }

 void IBackgroundJobExecutor::start()
--- a/src/Storages/MergeTree/BackgroundJobsExecutor.h
+++ b/src/Storages/MergeTree/BackgroundJobsExecutor.h
@ -117,7 +117,9 @@ private:
    /// Function that executes in background scheduling pool
    void jobExecutingTask();
    /// Recalculate timeouts when we have to check for a new job
-    void scheduleTask(bool job_done, bool with_backoff=false);
+    void scheduleTask(bool with_backoff);
+    /// Run background task as fast as possible and reset errors counter
+    void runTaskWithoutDelay();
    /// Return random add for sleep in case of error
    double getSleepRandomAdd();
 };
--- a/src/Storages/StorageBuffer.cpp
+++ b/src/Storages/StorageBuffer.cpp
@ -867,13 +867,13 @@ void StorageBuffer::checkAlterIsPossible(const AlterCommands & commands, const S
    }
 }

-std::optional<UInt64> StorageBuffer::totalRows() const
+std::optional<UInt64> StorageBuffer::totalRows(const Settings & settings) const
 {
    std::optional<UInt64> underlying_rows;
    auto underlying = DatabaseCatalog::instance().tryGetTable(destination_id, global_context);

    if (underlying)
-        underlying_rows = underlying->totalRows();
+        underlying_rows = underlying->totalRows(settings);
    if (!underlying_rows)
        return underlying_rows;

@ -886,7 +886,7 @@ std::optional<UInt64> StorageBuffer::totalRows() const
    return rows + *underlying_rows;
 }

-std::optional<UInt64> StorageBuffer::totalBytes() const
+std::optional<UInt64> StorageBuffer::totalBytes(const Settings & /*settings*/) const
 {
    UInt64 bytes = 0;
    for (const auto & buffer : buffers)
--- a/src/Storages/StorageBuffer.h
+++ b/src/Storages/StorageBuffer.h
@ -109,8 +109,8 @@ public:
    /// The structure of the subordinate table is not checked and does not change.
    void alter(const AlterCommands & params, const Context & context, TableLockHolder & table_lock_holder) override;

-    std::optional<UInt64> totalRows() const override;
-    std::optional<UInt64> totalBytes() const override;
+    std::optional<UInt64> totalRows(const Settings & settings) const override;
+    std::optional<UInt64> totalBytes(const Settings & settings) const override;

    std::optional<UInt64> lifetimeRows() const override { return writes.rows; }
    std::optional<UInt64> lifetimeBytes() const override { return writes.bytes; }
--- a/src/Storages/StorageJoin.cpp
+++ b/src/Storages/StorageJoin.cpp
@ -102,8 +102,8 @@ HashJoinPtr StorageJoin::getJoin(std::shared_ptr<TableJoin> analyzed_join) const
 void StorageJoin::insertBlock(const Block & block) { join->addJoinedBlock(block, true); }

 size_t StorageJoin::getSize() const { return join->getTotalRowCount(); }
-std::optional<UInt64> StorageJoin::totalRows() const { return join->getTotalRowCount(); }
-std::optional<UInt64> StorageJoin::totalBytes() const { return join->getTotalByteCount(); }
+std::optional<UInt64> StorageJoin::totalRows(const Settings &) const { return join->getTotalRowCount(); }
+std::optional<UInt64> StorageJoin::totalBytes(const Settings &) const { return join->getTotalByteCount(); }


 void registerStorageJoin(StorageFactory & factory)
--- a/src/Storages/StorageJoin.h
+++ b/src/Storages/StorageJoin.h
@ -46,8 +46,8 @@ public:
        size_t max_block_size,
        unsigned num_streams) override;

-    std::optional<UInt64> totalRows() const override;
-    std::optional<UInt64> totalBytes() const override;
+    std::optional<UInt64> totalRows(const Settings & settings) const override;
+    std::optional<UInt64> totalBytes(const Settings & settings) const override;

 private:
    Block sample_block;
--- a/src/Storages/StorageMemory.cpp
+++ b/src/Storages/StorageMemory.cpp
@ -1,9 +1,11 @@
+#include <cassert>
 #include <Common/Exception.h>

 #include <DataStreams/IBlockInputStream.h>

-#include <Storages/StorageMemory.h>
+#include <Interpreters/MutationsInterpreter.h>
 #include <Storages/StorageFactory.h>
+#include <Storages/StorageMemory.h>

 #include <IO/WriteHelpers.h>
 #include <Processors/Sources/SourceWithProgress.h>
@ -21,7 +23,7 @@ namespace ErrorCodes

 class MemorySource : public SourceWithProgress
 {
-    using InitializerFunc = std::function<void(BlocksList::const_iterator &, size_t &)>;
+    using InitializerFunc = std::function<void(BlocksList::const_iterator &, size_t &, std::shared_ptr<const BlocksList> &)>;
 public:
    /// Blocks are stored in std::list which may be appended in another thread.
    /// We use pointer to the beginning of the list and its current size.
@ -34,11 +36,13 @@ public:
        size_t num_blocks_,
        const StorageMemory & storage,
        const StorageMetadataPtr & metadata_snapshot,
-        InitializerFunc initializer_func_ = [](BlocksList::const_iterator &, size_t &) {})
+        std::shared_ptr<const BlocksList> data_,
+        InitializerFunc initializer_func_ = [](BlocksList::const_iterator &, size_t &, std::shared_ptr<const BlocksList> &) {})
        : SourceWithProgress(metadata_snapshot->getSampleBlockForColumns(column_names_, storage.getVirtuals(), storage.getStorageID()))
        , column_names(std::move(column_names_))
        , current_it(first_)
        , num_blocks(num_blocks_)
+        , data(data_)
        , initializer_func(std::move(initializer_func_))
    {
    }
@ -50,7 +54,7 @@ protected:
    {
        if (!postponed_init_done)
        {
-            initializer_func(current_it, num_blocks);
+            initializer_func(current_it, num_blocks, data);
            postponed_init_done = true;
        }

@ -77,6 +81,7 @@ private:
    size_t num_blocks;
    size_t current_block_idx = 0;

+    std::shared_ptr<const BlocksList> data;
    bool postponed_init_done = false;
    InitializerFunc initializer_func;
 };
@ -102,7 +107,9 @@ public:
        metadata_snapshot->check(block, true);
        {
            std::lock_guard lock(storage.mutex);
-            storage.data.push_back(block);
+            auto new_data = std::make_unique<BlocksList>(*(storage.data.get()));
+            new_data->push_back(block);
+            storage.data.set(std::move(new_data));

            storage.total_size_bytes.fetch_add(size_bytes_diff, std::memory_order_relaxed);
            storage.total_size_rows.fetch_add(size_rows_diff, std::memory_order_relaxed);
@ -116,7 +123,7 @@ private:


 StorageMemory::StorageMemory(const StorageID & table_id_, ColumnsDescription columns_description_, ConstraintsDescription constraints_)
-    : IStorage(table_id_)
+    : IStorage(table_id_), data(std::make_unique<const BlocksList>())
 {
    StorageInMemoryMetadata storage_metadata;
    storage_metadata.setColumns(std::move(columns_description_));
@ -146,30 +153,31 @@ Pipe StorageMemory::read(
        /// set for IN or hash table for JOIN, which can't be done concurrently.
        /// Since no other manipulation with data is done, multiple sources shouldn't give any profit.

-        return Pipe(
-                std::make_shared<MemorySource>(
-                        column_names, data.end(), 0, *this, metadata_snapshot,
-                        /// This hack is needed for global subqueries.
-                        /// It allows to set up this Source for read AFTER Storage::read() has been called and just before actual reading
-                        [this](BlocksList::const_iterator & current_it, size_t & num_blocks)
-                        {
-                            std::lock_guard guard(mutex);
-                            current_it = data.begin();
-                            num_blocks = data.size();
-                        }
-                ));
+        return Pipe(std::make_shared<MemorySource>(
+            column_names,
+            data.get()->end(),
+            0,
+            *this,
+            metadata_snapshot,
+            data.get(),
+            [this](BlocksList::const_iterator & current_it, size_t & num_blocks, std::shared_ptr<const BlocksList> & current_data)
+            {
+                current_data = data.get();
+                current_it = current_data->begin();
+                num_blocks = current_data->size();
+            }));
    }

-    std::lock_guard lock(mutex);
+    auto current_data = data.get();

-    size_t size = data.size();
+    size_t size = current_data->size();

    if (num_streams > size)
        num_streams = size;

    Pipes pipes;

-    BlocksList::const_iterator it = data.begin();
+    BlocksList::const_iterator it = current_data->begin();

    size_t offset = 0;
    for (size_t stream = 0; stream < num_streams; ++stream)
@ -179,7 +187,7 @@ Pipe StorageMemory::read(

        assert(num_blocks > 0);

-        pipes.emplace_back(std::make_shared<MemorySource>(column_names, it, num_blocks, *this, metadata_snapshot));
+        pipes.emplace_back(std::make_shared<MemorySource>(column_names, it, num_blocks, *this, metadata_snapshot, current_data));

        while (offset < next_offset)
        {
@ -200,37 +208,99 @@ BlockOutputStreamPtr StorageMemory::write(const ASTPtr & /*query*/, const Storag

 void StorageMemory::drop()
 {
-    std::lock_guard lock(mutex);
-    data.clear();
+    data.set(std::make_unique<BlocksList>());
    total_size_bytes.store(0, std::memory_order_relaxed);
    total_size_rows.store(0, std::memory_order_relaxed);
 }

+static inline void updateBlockData(Block & old_block, const Block & new_block)
+{
+    for (const auto & it : new_block)
+    {
+        auto col_name = it.name;
+        auto & col_with_type_name = old_block.getByName(col_name);
+        col_with_type_name.column = it.column;
+    }
+}
+
+void StorageMemory::mutate(const MutationCommands & commands, const Context & context)
+{
+    std::lock_guard lock(mutex);
+    auto metadata_snapshot = getInMemoryMetadataPtr();
+    auto storage = getStorageID();
+    auto storage_ptr = DatabaseCatalog::instance().getTable(storage, context);
+    auto interpreter = std::make_unique<MutationsInterpreter>(storage_ptr, metadata_snapshot, commands, context, true);
+    auto in = interpreter->execute();
+
+    in->readPrefix();
+    BlocksList out;
+    Block block;
+    while ((block = in->read()))
+    {
+        out.push_back(block);
+    }
+    in->readSuffix();
+
+    std::unique_ptr<BlocksList> new_data;
+
+    // all column affected
+    if (interpreter->isAffectingAllColumns())
+    {
+        new_data = std::make_unique<BlocksList>(out);
+    }
+    else
+    {
+        /// just some of the column affected, we need update it with new column
+        new_data = std::make_unique<BlocksList>(*(data.get()));
+        auto data_it = new_data->begin();
+        auto out_it = out.begin();
+
+        while (data_it != new_data->end())
+        {
+            /// Mutation does not change the number of blocks
+            assert(out_it != out.end());
+
+            updateBlockData(*data_it, *out_it);
+            ++data_it;
+            ++out_it;
+        }
+
+        assert(out_it == out.end());
+    }
+
+    size_t rows = 0;
+    size_t bytes = 0;
+    for (const auto & buffer : *new_data)
+    {
+        rows += buffer.rows();
+        bytes += buffer.bytes();
+    }
+    total_size_bytes.store(rows, std::memory_order_relaxed);
+    total_size_rows.store(bytes, std::memory_order_relaxed);
+    data.set(std::move(new_data));
+}
+

 void StorageMemory::truncate(
    const ASTPtr &, const StorageMetadataPtr &, const Context &, TableExclusiveLockHolder &)
 {
-    std::lock_guard lock(mutex);
-    data.clear();
+    data.set(std::make_unique<BlocksList>());
    total_size_bytes.store(0, std::memory_order_relaxed);
    total_size_rows.store(0, std::memory_order_relaxed);
 }

-
-std::optional<UInt64> StorageMemory::totalRows() const
+std::optional<UInt64> StorageMemory::totalRows(const Settings &) const
 {
    /// All modifications of these counters are done under mutex which automatically guarantees synchronization/consistency
    /// When run concurrently we are fine with any value: "before" or "after"
    return total_size_rows.load(std::memory_order_relaxed);
 }

-
-std::optional<UInt64> StorageMemory::totalBytes() const
+std::optional<UInt64> StorageMemory::totalBytes(const Settings &) const
 {
    return total_size_bytes.load(std::memory_order_relaxed);
 }

-
 void registerStorageMemory(StorageFactory & factory)
 {
    factory.registerStorage("Memory", [](const StorageFactory::Arguments & args)
--- a/src/Storages/StorageMemory.h
+++ b/src/Storages/StorageMemory.h
@ -10,6 +10,7 @@
 #include <Storages/IStorage.h>
 #include <DataStreams/IBlockOutputStream.h>

+#include <Common/MultiVersion.h>

 namespace DB
 {
@ -27,7 +28,7 @@ friend struct ext::shared_ptr_helper<StorageMemory>;
 public:
    String getName() const override { return "Memory"; }

-    size_t getSize() const { return data.size(); }
+    size_t getSize() const { return data.get()->size(); }

    Pipe read(
        const Names & column_names,
@ -44,10 +45,12 @@ public:

    void drop() override;

+    void mutate(const MutationCommands & commands, const Context & context) override;
+
    void truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableExclusiveLockHolder &) override;

-    std::optional<UInt64> totalRows() const override;
-    std::optional<UInt64> totalBytes() const override;
+    std::optional<UInt64> totalRows(const Settings &) const override;
+    std::optional<UInt64> totalBytes(const Settings &) const override;

    /** Delays initialization of StorageMemory::read() until the first read is actually happen.
      * Usually, fore code like this:
@ -87,8 +90,8 @@ public:
    void delayReadForGlobalSubqueries() { delay_read_for_global_subqueries = true; }

 private:
-    /// The data itself. `list` - so that when inserted to the end, the existing iterators are not invalidated.
-    BlocksList data;
+    /// MultiVersion data storage, so that we can copy the list of blocks to readers.
+    MultiVersion<BlocksList> data;

    mutable std::mutex mutex;

--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@ -202,7 +202,7 @@ Pipe StorageMergeTree::read(
    return plan.convertToPipe();
 }

-std::optional<UInt64> StorageMergeTree::totalRows() const
+std::optional<UInt64> StorageMergeTree::totalRows(const Settings &) const
 {
    return getTotalActiveSizeInRows();
 }
@ -223,7 +223,7 @@ std::optional<UInt64> StorageMergeTree::totalRowsByPartitionPredicate(const Sele
    return res;
 }

-std::optional<UInt64> StorageMergeTree::totalBytes() const
+std::optional<UInt64> StorageMergeTree::totalBytes(const Settings &) const
 {
    return getTotalActiveSizeInBytes();
 }
--- a/src/Storages/StorageMergeTree.h
+++ b/src/Storages/StorageMergeTree.h
@ -56,9 +56,9 @@ public:
        size_t max_block_size,
        unsigned num_streams) override;

-    std::optional<UInt64> totalRows() const override;
+    std::optional<UInt64> totalRows(const Settings &) const override;
    std::optional<UInt64> totalRowsByPartitionPredicate(const SelectQueryInfo &, const Context &) const override;
-    std::optional<UInt64> totalBytes() const override;
+    std::optional<UInt64> totalBytes(const Settings &) const override;

    BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override;

--- a/src/Storages/StorageNull.h
+++ b/src/Storages/StorageNull.h
@ -45,11 +45,11 @@ public:

    void alter(const AlterCommands & params, const Context & context, TableLockHolder & table_lock_holder) override;

-    std::optional<UInt64> totalRows() const override
+    std::optional<UInt64> totalRows(const Settings &) const override
    {
        return {0};
    }
-    std::optional<UInt64> totalBytes() const override
+    std::optional<UInt64> totalBytes(const Settings &) const override
    {
        return {0};
    }
--- a/src/Storages/StorageProxy.h
+++ b/src/Storages/StorageProxy.h
@ -148,8 +148,8 @@ public:
    bool storesDataOnDisk() const override { return getNested()->storesDataOnDisk(); }
    Strings getDataPaths() const override { return getNested()->getDataPaths(); }
    StoragePolicyPtr getStoragePolicy() const override { return getNested()->getStoragePolicy(); }
-    std::optional<UInt64> totalRows() const override { return getNested()->totalRows(); }
-    std::optional<UInt64> totalBytes() const override { return getNested()->totalBytes(); }
+    std::optional<UInt64> totalRows(const Settings & settings) const override { return getNested()->totalRows(settings); }
+    std::optional<UInt64> totalBytes(const Settings & settings) const override { return getNested()->totalBytes(settings); }
    std::optional<UInt64> lifetimeRows() const override { return getNested()->lifetimeRows(); }
    std::optional<UInt64> lifetimeBytes() const override { return getNested()->lifetimeBytes(); }

--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@ -3742,27 +3742,37 @@ Pipe StorageReplicatedMergeTree::read(


 template <class Func>
-void StorageReplicatedMergeTree::foreachCommittedParts(const Func & func) const
+void StorageReplicatedMergeTree::foreachCommittedParts(Func && func, bool select_sequential_consistency) const
 {
-    auto max_added_blocks = getMaxAddedBlocks();
+    std::optional<ReplicatedMergeTreeQuorumAddedParts::PartitionIdToMaxBlock> max_added_blocks = {};
+
+    /**
+     * Synchronously go to ZooKeeper when select_sequential_consistency enabled
+     */
+    if (select_sequential_consistency)
+        max_added_blocks = getMaxAddedBlocks();
+
    auto lock = lockParts();
    for (const auto & part : getDataPartsStateRange(DataPartState::Committed))
    {
        if (part->isEmpty())
            continue;

-        auto blocks_iterator = max_added_blocks.find(part->info.partition_id);
-        if (blocks_iterator == max_added_blocks.end() || part->info.max_block > blocks_iterator->second)
-            continue;
+        if (max_added_blocks)
+        {
+            auto blocks_iterator = max_added_blocks->find(part->info.partition_id);
+            if (blocks_iterator == max_added_blocks->end() || part->info.max_block > blocks_iterator->second)
+                continue;
+        }

        func(part);
    }
 }

-std::optional<UInt64> StorageReplicatedMergeTree::totalRows() const
+std::optional<UInt64> StorageReplicatedMergeTree::totalRows(const Settings & settings) const
 {
    UInt64 res = 0;
-    foreachCommittedParts([&res](auto & part) { res += part->rows_count; });
+    foreachCommittedParts([&res](auto & part) { res += part->rows_count; }, settings.select_sequential_consistency);
    return res;
 }

@ -3777,14 +3787,14 @@ std::optional<UInt64> StorageReplicatedMergeTree::totalRowsByPartitionPredicate(
    {
        if (!partition_pruner.canBePruned(part))
            res += part->rows_count;
-    });
+    }, context.getSettingsRef().select_sequential_consistency);
    return res;
 }

-std::optional<UInt64> StorageReplicatedMergeTree::totalBytes() const
+std::optional<UInt64> StorageReplicatedMergeTree::totalBytes(const Settings & settings) const
 {
    UInt64 res = 0;
-    foreachCommittedParts([&res](auto & part) { res += part->getBytesOnDisk(); });
+    foreachCommittedParts([&res](auto & part) { res += part->getBytesOnDisk(); }, settings.select_sequential_consistency);
    return res;
 }

--- a/src/Storages/StorageReplicatedMergeTree.h
+++ b/src/Storages/StorageReplicatedMergeTree.h
@ -107,9 +107,9 @@ public:
        size_t max_block_size,
        unsigned num_streams) override;

-    std::optional<UInt64> totalRows() const override;
+    std::optional<UInt64> totalRows(const Settings & settings) const override;
    std::optional<UInt64> totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, const Context & context) const override;
-    std::optional<UInt64> totalBytes() const override;
+    std::optional<UInt64> totalBytes(const Settings & settings) const override;

    BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override;

@ -326,7 +326,7 @@ private:
    const size_t replicated_fetches_pool_size;

    template <class Func>
-    void foreachCommittedParts(const Func & func) const;
+    void foreachCommittedParts(Func && func, bool select_sequential_consistency) const;

    /** Creates the minimum set of nodes in ZooKeeper and create first replica.
      * Returns true if was created, false if exists.
--- a/src/Storages/StorageSet.cpp
+++ b/src/Storages/StorageSet.cpp
@ -153,8 +153,8 @@ void StorageSet::insertBlock(const Block & block) { set->insertFromBlock(block);
 void StorageSet::finishInsert() { set->finishInsert(); }

 size_t StorageSet::getSize() const { return set->getTotalRowCount(); }
-std::optional<UInt64> StorageSet::totalRows() const { return set->getTotalRowCount(); }
-std::optional<UInt64> StorageSet::totalBytes() const { return set->getTotalByteCount(); }
+std::optional<UInt64> StorageSet::totalRows(const Settings &) const { return set->getTotalRowCount(); }
+std::optional<UInt64> StorageSet::totalBytes(const Settings &) const { return set->getTotalByteCount(); }

 void StorageSet::truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableExclusiveLockHolder &)
 {
--- a/src/Storages/StorageSet.h
+++ b/src/Storages/StorageSet.h
@ -73,8 +73,8 @@ public:

    void truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableExclusiveLockHolder &) override;

-    std::optional<UInt64> totalRows() const override;
-    std::optional<UInt64> totalBytes() const override;
+    std::optional<UInt64> totalRows(const Settings & settings) const override;
+    std::optional<UInt64> totalBytes(const Settings & settings) const override;

 private:
    SetPtr set;
--- a/src/Storages/System/StorageSystemTables.cpp
+++ b/src/Storages/System/StorageSystemTables.cpp
@ -429,7 +429,7 @@ protected:
                if (columns_mask[src_index++])
                {
                    assert(table != nullptr);
-                    auto total_rows = table->totalRows();
+                    auto total_rows = table->totalRows(context.getSettingsRef());
                    if (total_rows)
                        res_columns[res_index++]->insert(*total_rows);
                    else
@ -439,7 +439,7 @@ protected:
                if (columns_mask[src_index++])
                {
                    assert(table != nullptr);
-                    auto total_bytes = table->totalBytes();
+                    auto total_bytes = table->totalBytes(context.getSettingsRef());
                    if (total_bytes)
                        res_columns[res_index++]->insert(*total_bytes);
                    else
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@ -112,7 +112,6 @@ def get_db_engine(args):
    return ""   # Will use default engine

 def run_single_test(args, ext, server_logs_level, client_options, case_file, stdout_file, stderr_file):
-
    # print(client_options)

    if args.database:
@ -149,10 +148,12 @@ def run_single_test(args, ext, server_logs_level, client_options, case_file, std
        pattern = "{client} --send_logs_level={logs_level} --testmode --multiquery {options} < " + pattern

    command = pattern.format(**params)
-    #print(command)
+
+    # print(command)

    proc = Popen(command, shell=True, env=os.environ)
    start_time = datetime.now()
+
    while (datetime.now() - start_time).total_seconds() < args.timeout and proc.poll() is None:
        sleep(0.01)

@ -317,6 +318,7 @@ def run_tests_array(all_tests_with_params):
                    stderr_file = os.path.join(suite_tmp_dir, name) + '.stderr'

                    proc, stdout, stderr, total_time = run_single_test(args, ext, server_logs_level, client_options, case_file, stdout_file, stderr_file)
+
                    if proc.returncode is None:
                        try:
                            proc.kill()
@ -347,7 +349,7 @@ def run_tests_array(all_tests_with_params):
                            if stderr:
                                print(stderr)

-                            # Stop on fatal errors like segmentation fault. They are send to client via logs.
+                            # Stop on fatal errors like segmentation fault. They are sent to client via logs.
                            if ' <Fatal> ' in stderr:
                                SERVER_DIED = True

@ -486,7 +488,7 @@ def collect_build_flags(client):
        elif b'-fsanitize=memory' in stdout:
            result.append(BuildFlags.MEMORY)
    else:
-        raise Exception("Cannot get inforamtion about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr))
+        raise Exception("Cannot get information about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr))

    clickhouse_proc = Popen(shlex.split(client), stdin=PIPE, stdout=PIPE, stderr=PIPE)
    (stdout, stderr) = clickhouse_proc.communicate(b"SELECT value FROM system.build_options WHERE name = 'BUILD_TYPE'")
@ -497,7 +499,7 @@ def collect_build_flags(client):
        elif b'RelWithDebInfo' in stdout or b'Release' in stdout:
            result.append(BuildFlags.RELEASE)
    else:
-        raise Exception("Cannot get inforamtion about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr))
+        raise Exception("Cannot get information about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr))

    clickhouse_proc = Popen(shlex.split(client), stdin=PIPE, stdout=PIPE, stderr=PIPE)
    (stdout, stderr) = clickhouse_proc.communicate(b"SELECT value FROM system.build_options WHERE name = 'UNBUNDLED'")
@ -506,7 +508,7 @@ def collect_build_flags(client):
        if b'ON' in stdout or b'1' in stdout:
            result.append(BuildFlags.UNBUNDLED)
    else:
-        raise Exception("Cannot get inforamtion about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr))
+        raise Exception("Cannot get information about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr))

    clickhouse_proc = Popen(shlex.split(client), stdin=PIPE, stdout=PIPE, stderr=PIPE)
    (stdout, stderr) = clickhouse_proc.communicate(b"SELECT value FROM system.settings WHERE name = 'default_database_engine'")
@ -515,7 +517,7 @@ def collect_build_flags(client):
        if b'Ordinary' in stdout:
            result.append(BuildFlags.DATABASE_ORDINARY)
    else:
-        raise Exception("Cannot get inforamtion about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr))
+        raise Exception("Cannot get information about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr))

    clickhouse_proc = Popen(shlex.split(client), stdin=PIPE, stdout=PIPE, stderr=PIPE)
    (stdout, stderr) = clickhouse_proc.communicate(b"SELECT value FROM system.merge_tree_settings WHERE name = 'min_bytes_for_wide_part'")
@ -544,8 +546,12 @@ def main(args):
        return stdout.startswith(b'1')

    if not check_server_started(args.client, args.server_check_retries):
-        raise Exception("clickhouse-server is not responding. Cannot execute 'SELECT 1' query.")
+        raise Exception(
+            "Server is not responding. Cannot execute 'SELECT 1' query. \
+            Note: if you are using unbundled mode, you also have to specify -c option.")
+
    build_flags = collect_build_flags(args.client)
+
    if args.use_skip_list:
        tests_to_skip_from_list = collect_tests_to_skip(args.skip_list_path, build_flags)
    else:
@ -790,8 +796,13 @@ if __name__ == '__main__':
    parser=ArgumentParser(description='ClickHouse functional tests')
    parser.add_argument('-q', '--queries', help='Path to queries dir')
    parser.add_argument('--tmp', help='Path to tmp dir')
-    parser.add_argument('-b', '--binary', default='clickhouse', help='Path to clickhouse binary or name of binary in PATH')
-    parser.add_argument('-c', '--client', help='Client program')
+
+    parser.add_argument('-b', '--binary', default='clickhouse',
+        help='Path to clickhouse (if bundled, clickhouse-server otherwise) binary or name of binary in PATH')
+
+    parser.add_argument('-c', '--client',
+        help='Path to clickhouse-client (if unbundled, useless otherwise) binary of name of binary in PATH')
+
    parser.add_argument('--extract_from_config', help='extract-from-config program')
    parser.add_argument('--configclient', help='Client config (if you use not default ports)')
    parser.add_argument('--configserver', default= '/etc/clickhouse-server/config.xml', help='Preprocessed server config')
@ -865,10 +876,14 @@ if __name__ == '__main__':
    if args.client is None:
        if find_binary(args.binary + '-client'):
            args.client = args.binary + '-client'
+
+            print("Using " + args.client + " as client program (expecting unbundled mode)")
        elif find_binary(args.binary):
            args.client = args.binary + ' client'
+
+            print("Using " + args.client + " as client program (expecting bundled mode)")
        else:
-            print("No 'clickhouse' binary found in PATH", file=sys.stderr)
+            print("No 'clickhouse' or 'clickhouse-client' client binary found", file=sys.stderr)
            parser.print_help()
            exit(1)

--- a/tests/fuzz/ast.dict
+++ b/tests/fuzz/ast.dict
@ -344,8 +344,6 @@
 "TABLE"
 "TABLES"
 "TEMPORARY"
-"timeSeriesGroupRateSum"
-"timeSeriesGroupSum"
 "TIMESTAMP"
 "TIMESTAMP_ADD"
 "TIMESTAMPADD"
--- a/tests/integration/test_reload_zookeeper/configs/zookeeper.xml
+++ b/tests/integration/test_reload_zookeeper/configs/zookeeper.xml
@ -13,7 +13,7 @@
            <host>zoo3</host>
            <port>2181</port>
        </node>
-        <session_timeout_ms>2000</session_timeout_ms>
+        <session_timeout_ms>20000</session_timeout_ms>
    </zookeeper>
 </yandex>
    
--- a/tests/integration/test_reload_zookeeper/test.py
+++ b/tests/integration/test_reload_zookeeper/test.py
@ -62,14 +62,16 @@ def test_reload_zookeeper(start_cluster):

    ## stop all zookeepers, table will be readonly
    cluster.stop_zookeeper_nodes(["zoo1", "zoo2", "zoo3"])
+    node.query("SELECT COUNT() FROM test_table")
    with pytest.raises(QueryRuntimeException):
-        node.query("SELECT COUNT() FROM test_table")
+        node.query("SELECT COUNT() FROM test_table", settings={"select_sequential_consistency" : 1})

    ## start zoo2, zoo3, table will be readonly too, because it only connect to zoo1
    cluster.start_zookeeper_nodes(["zoo2", "zoo3"])
    wait_zookeeper_node_to_start(["zoo2", "zoo3"])
+    node.query("SELECT COUNT() FROM test_table")
    with pytest.raises(QueryRuntimeException):
-        node.query("SELECT COUNT() FROM test_table")
+        node.query("SELECT COUNT() FROM test_table", settings={"select_sequential_consistency" : 1})

    ## set config to zoo2, server will be normal
    new_config = """
--- a/tests/performance/avg_weighted.xml
+++ b/tests/performance/avg_weighted.xml
@ -0,0 +1,38 @@
+<test>
+    <settings>
+        <allow_experimental_bigint_types>1</allow_experimental_bigint_types>
+        <max_threads>1</max_threads>
+        <max_insert_threads>8</max_insert_threads>
+    </settings>
+
+    <preconditions>
+        <table_exists>hits_100m_single</table_exists>
+    </preconditions>
+
+    <create_query>CREATE TABLE perf_avg(
+        num UInt64,
+        num_u Decimal256(75) DEFAULT toDecimal256(num / 400000, 75),
+        num_f Float64 DEFAULT num / 100
+        ) ENGINE = MergeTree() ORDER BY num
+    </create_query>
+
+    <fill_query>
+        INSERT INTO perf_avg(num)
+        SELECT toUInt64(UserID / (WatchID + 1)  * 1000000)
+        FROM hits_100m_single
+        LIMIT 50000000
+    </fill_query>
+
+    <query>SELECT avg(num)                  FROM perf_avg FORMAT Null</query>
+    <query>SELECT avg(2 * num)              FROM perf_avg FORMAT Null</query>
+    <query>SELECT avg(num_u)                FROM perf_avg FORMAT Null</query>
+    <query>SELECT avg(num_f)                FROM perf_avg FORMAT Null</query>
+    <query>SELECT avgWeighted(num_f, num)   FROM perf_avg FORMAT Null</query>
+    <query>SELECT avgWeighted(num_f, num_f) FROM perf_avg FORMAT Null</query>
+    <query>SELECT avgWeighted(num_f, num_u) FROM perf_avg FORMAT Null</query>
+    <query>SELECT avgWeighted(num_u, num_f) FROM perf_avg FORMAT Null</query>
+    <query>SELECT avgWeighted(num_u, num)   FROM perf_avg FORMAT Null</query>
+    <query>SELECT avgWeighted(num_u, num_u) FROM perf_avg FORMAT Null</query>
+
+    <drop_query>DROP TABLE IF EXISTS perf_avg</drop_query>
+</test>
--- a/tests/performance/date_time_short.xml
+++ b/tests/performance/date_time_short.xml
@ -13,7 +13,6 @@
               <value>toMonday</value>
               <value>toRelativeDayNum</value>
               <value>toYYYYMMDDhhmmss</value>
-               <value>toUnixTimestamp</value>
           </values>
        </substitution>
        <substitution>
@ -33,8 +32,15 @@
        </substitution>
    </substitutions>

+    <!-- date_transform -->
    <query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {date_transform}(t, '{time_zone}'))</query>
    <query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(toDate('2017-01-01') + number % 1000 + rand() % 10 AS t, {date_transform}(t))</query>
+
+    <!-- toUnixTimestamp() -->
+    <query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, toUnixTimestamp(t, '{time_zone}'))</query>
+    <!-- toUnixTimestamp(Date()) is prohibit, wrap Date() with toUInt16() to overcome -->
+    <query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(toDate('2017-01-01') + number % 1000 + rand() % 10 AS t, toUnixTimestamp(toUInt16(t)))</query>
+
    <query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {binary_function}(t, 1))</query>
    <query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, toStartOfInterval(t, INTERVAL 1 month))</query>
    <query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, date_trunc('month', t))</query>
--- a/tests/performance/ip_trie.xml
+++ b/tests/performance/ip_trie.xml
@ -0,0 +1,89 @@
+<test>
+    <create_query>
+        CREATE TABLE table_ip_trie
+        (
+            ip String,
+            ver UInt8,
+            val Float32
+        ) ENGINE = TinyLog
+    </create_query>
+
+    <create_query>
+        INSERT INTO table_ip_trie
+        SELECT
+            IPv4NumToString(ipv4) || '/' || toString(rand() % 17 + 16) as ip,
+            4 as ver,
+            val
+        FROM generateRandom('ipv4 UInt32, val Float32', 0, 30, 30)
+        LIMIT 200000
+    </create_query>
+
+    <create_query>
+        INSERT INTO table_ip_trie
+        SELECT
+            IPv6NumToString(ipv6) || '/' || toString(rand() % 65 + 64) as ip,
+            6 as ver,
+            val
+        FROM generateRandom('ipv6 FixedString(16), val Float32', 0, 30, 30)
+        LIMIT 2500000
+    </create_query>
+
+    <create_query>
+        CREATE DICTIONARY dict_ip_trie
+        (
+            ip String,
+            ver UInt8,
+            val Float32
+        )
+        PRIMARY KEY ip
+        SOURCE(CLICKHOUSE(DB 'default' TABLE 'table_ip_trie'))
+        LAYOUT(IP_TRIE())
+        LIFETIME(300)
+    </create_query>
+
+    <create_query>
+        CREATE TABLE dict_ip_trie_table
+        (
+            `ip` String,
+            `ver` UInt8,
+            `val` Float32
+        ) ENGINE = Dictionary(default.dict_ip_trie)
+    </create_query>
+
+    <create_query>
+        CREATE TABLE table_ip_from_dict (`ip` String, `ver` UInt8) ENGINE = TinyLog
+    </create_query>
+    <create_query>
+        INSERT INTO table_ip_from_dict
+        SELECT splitByChar('/', ip )[1] as ip, ver FROM dict_ip_trie_table
+    </create_query>
+
+    <query>
+        SELECT dictGetFloat32('default.dict_ip_trie', 'val', tuple(rand32()))
+        FROM numbers(500000) FORMAT Null
+    </query>
+
+    <query>
+        SELECT dictGetFloat32('default.dict_ip_trie', 'val', tuple(randomFixedString(16)))
+        FROM numbers(500000) FORMAT Null
+    </query>
+
+    <query>
+        SELECT dictGetFloat32('default.dict_ip_trie', 'val', tuple(IPv6StringToNum(ip)))
+        FROM table_ip_from_dict
+        WHERE ver == 4
+        LIMIT 500000 FORMAT Null
+    </query>
+
+    <query>
+        SELECT dictGetFloat32('default.dict_ip_trie', 'val', tuple(IPv6StringToNum(ip)))
+        FROM table_ip_from_dict
+        WHERE ver == 6
+        LIMIT 500000 FORMAT Null
+    </query>
+
+    <drop_query>DROP DICTIONARY IF EXISTS default.dict_ip_trie</drop_query>
+    <drop_query>DROP TABLE IF EXISTS table_ip_trie</drop_query>
+    <drop_query>DROP TABLE IF EXISTS dict_ip_trie_table</drop_query>
+    <drop_query>DROP TABLE IF EXISTS table_ip_from_dict</drop_query>
+</test>
--- a/tests/queries/0_stateless/00700_decimal_aggregates.reference
+++ b/tests/queries/0_stateless/00700_decimal_aggregates.reference
@ -5,9 +5,6 @@
 -1275.0000	-424.99999983	-255.00000000	-1275.0000	-424.99999983	-255.00000000
 101.0000	101.00000000	101.00000000	101.0000	101.00000000	101.00000000
 -101.0000	-101.00000000	-101.00000000	-101.0000	-101.00000000	-101.00000000
-0.0000	0.00000000	0.00000000
-25.5000	8.49999999	5.10000000
-25.5000	-8.49999999	-5.10000000
 (101,101,101)	(101,101,101)	(101,101,101)	(101,101,101)	(102,100,101)
 5	5	5
 10	10	10
--- a/tests/queries/0_stateless/00700_decimal_aggregates.sql
+++ b/tests/queries/0_stateless/00700_decimal_aggregates.sql
@ -20,10 +20,6 @@ SELECT sum(a), sum(b), sum(c), sumWithOverflow(a), sumWithOverflow(b), sumWithOv
 SELECT sum(a+1), sum(b+1), sum(c+1), sumWithOverflow(a+1), sumWithOverflow(b+1), sumWithOverflow(c+1) FROM decimal;
 SELECT sum(a-1), sum(b-1), sum(c-1), sumWithOverflow(a-1), sumWithOverflow(b-1), sumWithOverflow(c-1) FROM decimal;

-SELECT avg(a), avg(b), avg(c) FROM decimal;
-SELECT avg(a), avg(b), avg(c) FROM decimal WHERE a > 0;
-SELECT avg(a), avg(b), avg(c) FROM decimal WHERE a < 0;
-
 SELECT (uniq(a), uniq(b), uniq(c)),
    (uniqCombined(a), uniqCombined(b), uniqCombined(c)),
    (uniqCombined(17)(a), uniqCombined(17)(b), uniqCombined(17)(c)),
--- a/tests/queries/0_stateless/00700_decimal_empty_aggregates.reference
+++ b/tests/queries/0_stateless/00700_decimal_empty_aggregates.reference
@ -5,9 +5,6 @@
 0.0000	0.0000000	0.00000000	0.0000	0.0000000	0.00000000
 0.0000	0.0000000	0.00000000	0.0000	0.0000000	0.00000000
 0.0000	0.0000000	0.00000000	0.0000	0.0000000	0.00000000
-0.0000	0.0000000	0.00000000	Decimal(9, 4)	Decimal(18, 7)	Decimal(38, 8)
-0.0000	0.0000000	0.00000000	Decimal(9, 4)	Decimal(18, 7)	Decimal(38, 8)
-0.0000	0.0000000	0.00000000	Decimal(9, 4)	Decimal(18, 7)	Decimal(38, 8)
 (0,0,0)	(0,0,0)	(0,0,0)	(0,0,0)	(0,0,0)
 0	0	0
 0	0	0
--- a/tests/queries/0_stateless/00700_decimal_empty_aggregates.sql
+++ b/tests/queries/0_stateless/00700_decimal_empty_aggregates.sql
@ -16,10 +16,6 @@ SELECT sum(a), sum(b), sum(c), sumWithOverflow(a), sumWithOverflow(b), sumWithOv
 SELECT sum(a+1), sum(b+1), sum(c+1), sumWithOverflow(a+1), sumWithOverflow(b+1), sumWithOverflow(c+1) FROM decimal;
 SELECT sum(a-1), sum(b-1), sum(c-1), sumWithOverflow(a-1), sumWithOverflow(b-1), sumWithOverflow(c-1) FROM decimal;

-SELECT avg(a) as aa, avg(b) as ab, avg(c) as ac, toTypeName(aa), toTypeName(ab),toTypeName(ac) FROM decimal;
-SELECT avg(a) as aa, avg(b) as ab, avg(c) as ac, toTypeName(aa), toTypeName(ab),toTypeName(ac) FROM decimal WHERE a > 0;
-SELECT avg(a) as aa, avg(b) as ab, avg(c) as ac, toTypeName(aa), toTypeName(ab),toTypeName(ac) FROM decimal WHERE a < 0;
-
 SELECT (uniq(a), uniq(b), uniq(c)),
    (uniqCombined(a), uniqCombined(b), uniqCombined(c)),
    (uniqCombined(17)(a), uniqCombined(17)(b), uniqCombined(17)(c)),
--- a/tests/queries/0_stateless/00910_aggregation_timeseriesgroupsum.reference
+++ b/tests/queries/0_stateless/00910_aggregation_timeseriesgroupsum.reference
@ -1,2 +0,0 @@
-[(2,0.2),(3,0.8999999999999999),(7,2.0999999999999996),(8,2.4),(12,3.5999999999999996),(17,5.1000000000000005),(18,5.4),(24,7.199999999999999),(25,2.5)]
-[(2,0),(3,0.09999999999999999),(7,0.3),(8,0.30000000000000004),(12,0.29999999999999993),(17,0.30000000000000004),(18,0.30000000000000004),(24,0.29999999999999993),(25,0.1)]
--- a/tests/queries/0_stateless/00910_aggregation_timeseriesgroupsum.sql
+++ b/tests/queries/0_stateless/00910_aggregation_timeseriesgroupsum.sql
@ -1,10 +0,0 @@
-drop table if exists tsgroupsum_test;
-
-create table tsgroupsum_test (uid UInt64, ts Int64, value Float64) engine=Memory;
-insert into tsgroupsum_test values (1,2,0.2),(1,7,0.7),(1,12,1.2),(1,17,1.7),(1,25,2.5);
-insert into tsgroupsum_test values (2,3,0.6),(2,8,1.6),(2,12,2.4),(2,18,3.6),(2,24,4.8);
-
-select timeSeriesGroupSum(uid, ts, value) from (select * from tsgroupsum_test order by ts asc);
-select timeSeriesGroupRateSum(uid, ts, value) from (select * from tsgroupsum_test order by ts asc);
-
-drop table tsgroupsum_test;
--- a/tests/queries/0_stateless/00910_decimal_group_array_crash_3783.reference
+++ b/tests/queries/0_stateless/00910_decimal_group_array_crash_3783.reference
@ -4,6 +4,6 @@
 [499500.00]
 [499500.00000]
 [499500.0000000000]
-1545081300	[('ed87e57c-9331-462a-80b4-9f0c005e88c8',0.4400)]
+1545081300	[('ed87e57c-9331-462a-80b4-9f0c005e88c8',0.44)]
 4341757	5657967	2018-11-01 16:47:46	txt	321.380000000000	315.080000000000	0.000000000000	2018-11-02 00:00:00
 4360430	5681495	2018-11-02 09:00:07	txt	274.350000000000	268.970000000000	0.000000000000	2018-11-02 00:00:00
--- a/tests/queries/0_stateless/00921_datetime64_compatibility.reference
+++ b/tests/queries/0_stateless/00921_datetime64_compatibility.reference
@ -53,7 +53,8 @@ Code: 43
 "UInt8",11
 ------------------------------------------
 SELECT toUnixTimestamp(N)
-"UInt32",18155
+
+Code: 44
 "UInt32",1568650811
 "UInt32",1568650811
 ------------------------------------------
--- a/tests/queries/0_stateless/01018_ddl_dictionaries_special.reference
+++ b/tests/queries/0_stateless/01018_ddl_dictionaries_special.reference
@ -8,9 +8,6 @@
 0.42
 0.46
 0
-***ip trie dict***
-17501
-NP
 ***hierarchy dict***
 Moscow
 [3,2,1,10000]
--- a/tests/queries/0_stateless/01018_ddl_dictionaries_special.sql
+++ b/tests/queries/0_stateless/01018_ddl_dictionaries_special.sql
@ -72,33 +72,6 @@ SELECT dictGetFloat64('database_for_dict.dict2', 'Tax', toUInt64(1), toDateTime(
 SELECT dictGetFloat64('database_for_dict.dict2', 'Tax', toUInt64(2), toDateTime('2019-05-29 00:00:00'));
 SELECT dictGetFloat64('database_for_dict.dict2', 'Tax', toUInt64(2), toDateTime('2019-05-31 00:00:00'));

-SELECT '***ip trie dict***';
-
-CREATE TABLE database_for_dict.table_ip_trie
-(
-    prefix String,
-    asn UInt32,
-    cca2 String
-)
-engine = TinyLog;
-
-INSERT INTO database_for_dict.table_ip_trie VALUES ('202.79.32.0/20', 17501, 'NP'), ('2620:0:870::/48', 3856, 'US'), ('2a02:6b8:1::/48', 13238, 'RU'), ('2001:db8::/32', 65536, 'ZZ');
-
-
-CREATE DICTIONARY database_for_dict.dict_ip_trie
-(
-  prefix String,
-  asn UInt32,
-  cca2 String
-)
-PRIMARY KEY prefix
-SOURCE(CLICKHOUSE(host 'localhost' port 9000 user 'default' db 'database_for_dict' table 'table_ip_trie'))
-LAYOUT(IP_TRIE())
-LIFETIME(MIN 10 MAX 100);
-
-SELECT dictGetUInt32('database_for_dict.dict_ip_trie', 'asn', tuple(IPv4StringToNum('202.79.32.0')));
-SELECT dictGetString('database_for_dict.dict_ip_trie', 'cca2', tuple(IPv4StringToNum('202.79.32.0')));
-
 SELECT '***hierarchy dict***';

 CREATE TABLE database_for_dict.table_with_hierarchy
--- a/tests/queries/0_stateless/01018_empty_aggregation_filling.reference
+++ b/tests/queries/0_stateless/01018_empty_aggregation_filling.reference
@ -41,7 +41,7 @@ nan
 \N
 \N
 \N
-0.00
+0
 \N
 0
 \N
--- a/tests/queries/0_stateless/01018_ip_dictionary.reference
+++ b/tests/queries/0_stateless/01018_ip_dictionary.reference
@ -0,0 +1,425 @@
+***ipv4 trie dict***
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+***ipv4 trie dict mask***
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+***ipv4 trie dict pt2***
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+***ipv6 trie dict***
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+***ipv6 trie dict mask***
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
--- a/Show More
+++ b/Show More