mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge remote-tracking branch 'origin/master' into add_ttl_option_for_syslog
This commit is contained in:
commit
17e83cbb8d
@ -475,9 +475,6 @@ find_contrib_lib(cityhash)
|
||||
|
||||
find_contrib_lib(farmhash)
|
||||
|
||||
set (USE_INTERNAL_BTRIE_LIBRARY ON CACHE INTERNAL "")
|
||||
find_contrib_lib(btrie)
|
||||
|
||||
if (ENABLE_TESTS)
|
||||
include (cmake/find/gtest.cmake)
|
||||
endif ()
|
||||
|
@ -1,44 +0,0 @@
|
||||
# - Try to find btrie headers and libraries.
|
||||
#
|
||||
# Usage of this module as follows:
|
||||
#
|
||||
# find_package(btrie)
|
||||
#
|
||||
# Variables used by this module, they can change the default behaviour and need
|
||||
# to be set before calling find_package:
|
||||
#
|
||||
# BTRIE_ROOT_DIR Set this variable to the root installation of
|
||||
# btrie if the module has problems finding
|
||||
# the proper installation path.
|
||||
#
|
||||
# Variables defined by this module:
|
||||
#
|
||||
# BTRIE_FOUND System has btrie libs/headers
|
||||
# BTRIE_LIBRARIES The btrie library/libraries
|
||||
# BTRIE_INCLUDE_DIR The location of btrie headers
|
||||
|
||||
find_path(BTRIE_ROOT_DIR
|
||||
NAMES include/btrie.h
|
||||
)
|
||||
|
||||
find_library(BTRIE_LIBRARIES
|
||||
NAMES btrie
|
||||
PATHS ${BTRIE_ROOT_DIR}/lib ${BTRIE_LIBRARIES_PATHS}
|
||||
)
|
||||
|
||||
find_path(BTRIE_INCLUDE_DIR
|
||||
NAMES btrie.h
|
||||
PATHS ${BTRIE_ROOT_DIR}/include ${BTRIE_INCLUDE_PATHS}
|
||||
)
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(btrie DEFAULT_MSG
|
||||
BTRIE_LIBRARIES
|
||||
BTRIE_INCLUDE_DIR
|
||||
)
|
||||
|
||||
mark_as_advanced(
|
||||
BTRIE_ROOT_DIR
|
||||
BTRIE_LIBRARIES
|
||||
BTRIE_INCLUDE_DIR
|
||||
)
|
@ -1,3 +1,4 @@
|
||||
# Needed when using Apache Avro serialization format
|
||||
option (ENABLE_AVRO "Enable Avro" ${ENABLE_LIBRARIES})
|
||||
|
||||
if (NOT ENABLE_AVRO)
|
||||
|
@ -1,3 +1,5 @@
|
||||
# Needed when securely connecting to an external server, e.g.
|
||||
# clickhouse-client --host ... --secure
|
||||
option(ENABLE_SSL "Enable ssl" ${ENABLE_LIBRARIES})
|
||||
|
||||
if(NOT ENABLE_SSL)
|
||||
|
4
contrib/CMakeLists.txt
vendored
4
contrib/CMakeLists.txt
vendored
@ -66,10 +66,6 @@ if (USE_INTERNAL_FARMHASH_LIBRARY)
|
||||
add_subdirectory (libfarmhash)
|
||||
endif ()
|
||||
|
||||
if (USE_INTERNAL_BTRIE_LIBRARY)
|
||||
add_subdirectory (libbtrie)
|
||||
endif ()
|
||||
|
||||
if (USE_INTERNAL_ZLIB_LIBRARY)
|
||||
set (ZLIB_ENABLE_TESTS 0 CACHE INTERNAL "")
|
||||
set (SKIP_INSTALL_ALL 1 CACHE INTERNAL "")
|
||||
|
@ -1,6 +0,0 @@
|
||||
add_library(btrie
|
||||
src/btrie.c
|
||||
include/btrie.h
|
||||
)
|
||||
|
||||
target_include_directories (btrie SYSTEM PUBLIC include)
|
@ -1,23 +0,0 @@
|
||||
Copyright (c) 2013, CobbLiu
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
@ -1,160 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/**
|
||||
* In btrie, each leaf means one bit in ip tree.
|
||||
* Left means 0, and right means 1.
|
||||
*/
|
||||
|
||||
#define BTRIE_NULL (uintptr_t) -1
|
||||
|
||||
#if !defined(BTRIE_MAX_PAGES)
|
||||
/// 54 ip per page. 8 bytes memory per page when empty
|
||||
#define BTRIE_MAX_PAGES 1024 * 2048 /// 128m ips , ~16mb ram when empty
|
||||
// #define BTRIE_MAX_PAGES 1024 * 65535 /// 4g ips (whole ipv4), ~512mb ram when empty
|
||||
#endif
|
||||
|
||||
typedef struct btrie_node_s btrie_node_t;
|
||||
|
||||
struct btrie_node_s {
|
||||
btrie_node_t *right;
|
||||
btrie_node_t *left;
|
||||
btrie_node_t *parent;
|
||||
uintptr_t value;
|
||||
};
|
||||
|
||||
|
||||
typedef struct btrie_s {
|
||||
btrie_node_t *root;
|
||||
|
||||
btrie_node_t *free; /* free list of btrie */
|
||||
char *start;
|
||||
size_t size;
|
||||
|
||||
/*
|
||||
* memory pool.
|
||||
* memory management(esp free) will be so easy by using this facility.
|
||||
*/
|
||||
char *pools[BTRIE_MAX_PAGES];
|
||||
size_t len;
|
||||
} btrie_t;
|
||||
|
||||
|
||||
/**
|
||||
* Create an empty btrie
|
||||
*
|
||||
* @Return:
|
||||
* An ip radix_tree created.
|
||||
* NULL if creation failed.
|
||||
*/
|
||||
|
||||
btrie_t *btrie_create();
|
||||
|
||||
/**
|
||||
* Destroy the ip radix_tree
|
||||
*
|
||||
* @Return:
|
||||
* OK if deletion succeed.
|
||||
* ERROR if error occurs while deleting.
|
||||
*/
|
||||
int btrie_destroy(btrie_t *tree);
|
||||
|
||||
/**
|
||||
* Count the nodes in the radix tree.
|
||||
*/
|
||||
size_t btrie_count(btrie_t *tree);
|
||||
|
||||
/**
|
||||
* Return the allocated number of bytes.
|
||||
*/
|
||||
size_t btrie_allocated(btrie_t *tree);
|
||||
|
||||
|
||||
/**
|
||||
* Add an ipv4 into btrie
|
||||
*
|
||||
* @Args:
|
||||
* key: ip address
|
||||
* mask: key's mask
|
||||
* value: value of this IP, may be NULL.
|
||||
*
|
||||
* @Return:
|
||||
* OK for success.
|
||||
* ERROR for failure.
|
||||
*/
|
||||
int btrie_insert(btrie_t *tree, uint32_t key, uint32_t mask,
|
||||
uintptr_t value);
|
||||
|
||||
|
||||
/**
|
||||
* Delete an ipv4 from btrie
|
||||
*
|
||||
* @Args:
|
||||
*
|
||||
* @Return:
|
||||
* OK for success.
|
||||
* ERROR for failure.
|
||||
*/
|
||||
int btrie_delete(btrie_t *tree, uint32_t key, uint32_t mask);
|
||||
|
||||
|
||||
/**
|
||||
* Find an ipv4 from btrie
|
||||
*
|
||||
|
||||
* @Args:
|
||||
*
|
||||
* @Return:
|
||||
* Value if succeed.
|
||||
* NULL if failed.
|
||||
*/
|
||||
uintptr_t btrie_find(btrie_t *tree, uint32_t key);
|
||||
|
||||
|
||||
/**
|
||||
* Add an ipv6 into btrie
|
||||
*
|
||||
* @Args:
|
||||
* key: ip address
|
||||
* mask: key's mask
|
||||
* value: value of this IP, may be NULL.
|
||||
*
|
||||
* @Return:
|
||||
* OK for success.
|
||||
* ERROR for failure.
|
||||
*/
|
||||
int btrie_insert_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask,
|
||||
uintptr_t value);
|
||||
|
||||
/**
|
||||
* Delete an ipv6 from btrie
|
||||
*
|
||||
* @Args:
|
||||
*
|
||||
* @Return:
|
||||
* OK for success.
|
||||
* ERROR for failure.
|
||||
*/
|
||||
int btrie_delete_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask);
|
||||
|
||||
/**
|
||||
* Find an ipv6 from btrie
|
||||
*
|
||||
|
||||
* @Args:
|
||||
*
|
||||
* @Return:
|
||||
* Value if succeed.
|
||||
* NULL if failed.
|
||||
*/
|
||||
uintptr_t btrie_find_a6(btrie_t *tree, const uint8_t *key);
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
@ -1,460 +0,0 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <btrie.h>
|
||||
|
||||
#define PAGE_SIZE 4096
|
||||
|
||||
|
||||
static btrie_node_t *
|
||||
btrie_alloc(btrie_t *tree)
|
||||
{
|
||||
btrie_node_t *p;
|
||||
|
||||
if (tree->free) {
|
||||
p = tree->free;
|
||||
tree->free = tree->free->right;
|
||||
return p;
|
||||
}
|
||||
|
||||
if (tree->size < sizeof(btrie_node_t)) {
|
||||
tree->start = (char *) calloc(sizeof(char), PAGE_SIZE);
|
||||
if (tree->start == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tree->pools[tree->len++] = tree->start;
|
||||
tree->size = PAGE_SIZE;
|
||||
}
|
||||
|
||||
p = (btrie_node_t *) tree->start;
|
||||
|
||||
tree->start += sizeof(btrie_node_t);
|
||||
tree->size -= sizeof(btrie_node_t);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
btrie_t *
|
||||
btrie_create()
|
||||
{
|
||||
btrie_t *tree = (btrie_t *) malloc(sizeof(btrie_t));
|
||||
if (tree == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tree->free = NULL;
|
||||
tree->start = NULL;
|
||||
tree->size = 0;
|
||||
memset(tree->pools, 0, sizeof(btrie_t *) * BTRIE_MAX_PAGES);
|
||||
tree->len = 0;
|
||||
|
||||
tree->root = btrie_alloc(tree);
|
||||
if (tree->root == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tree->root->right = NULL;
|
||||
tree->root->left = NULL;
|
||||
tree->root->parent = NULL;
|
||||
tree->root->value = BTRIE_NULL;
|
||||
|
||||
return tree;
|
||||
}
|
||||
|
||||
static size_t
|
||||
subtree_weight(btrie_node_t *node)
|
||||
{
|
||||
size_t weight = 1;
|
||||
if (node->left) {
|
||||
weight += subtree_weight(node->left);
|
||||
}
|
||||
if (node->right) {
|
||||
weight += subtree_weight(node->right);
|
||||
}
|
||||
return weight;
|
||||
}
|
||||
|
||||
size_t
|
||||
btrie_count(btrie_t *tree)
|
||||
{
|
||||
if (tree->root == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return subtree_weight(tree->root);
|
||||
}
|
||||
|
||||
size_t
|
||||
btrie_allocated(btrie_t *tree)
|
||||
{
|
||||
return tree->len * PAGE_SIZE;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
btrie_insert(btrie_t *tree, uint32_t key, uint32_t mask,
|
||||
uintptr_t value)
|
||||
{
|
||||
uint32_t bit;
|
||||
btrie_node_t *node, *next;
|
||||
|
||||
bit = 0x80000000;
|
||||
|
||||
node = tree->root;
|
||||
next = tree->root;
|
||||
|
||||
while (bit & mask) {
|
||||
if (key & bit) {
|
||||
next = node->right;
|
||||
|
||||
} else {
|
||||
next = node->left;
|
||||
}
|
||||
|
||||
if (next == NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
node = next;
|
||||
}
|
||||
|
||||
if (next) {
|
||||
if (node->value != BTRIE_NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
node->value = value;
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (bit & mask) {
|
||||
next = btrie_alloc(tree);
|
||||
if (next == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
next->right = NULL;
|
||||
next->left = NULL;
|
||||
next->parent = node;
|
||||
next->value = BTRIE_NULL;
|
||||
|
||||
if (key & bit) {
|
||||
node->right = next;
|
||||
|
||||
} else {
|
||||
node->left = next;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
node = next;
|
||||
}
|
||||
|
||||
node->value = value;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
btrie_delete(btrie_t *tree, uint32_t key, uint32_t mask)
|
||||
{
|
||||
uint32_t bit;
|
||||
btrie_node_t *node;
|
||||
|
||||
bit = 0x80000000;
|
||||
node = tree->root;
|
||||
|
||||
while (node && (bit & mask)) {
|
||||
if (key & bit) {
|
||||
node = node->right;
|
||||
|
||||
} else {
|
||||
node = node->left;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
}
|
||||
|
||||
if (node == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (node->right || node->left) {
|
||||
if (node->value != BTRIE_NULL) {
|
||||
node->value = BTRIE_NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
for ( ;; ) {
|
||||
if (node->parent->right == node) {
|
||||
node->parent->right = NULL;
|
||||
|
||||
} else {
|
||||
node->parent->left = NULL;
|
||||
}
|
||||
|
||||
node->right = tree->free;
|
||||
tree->free = node;
|
||||
|
||||
node = node->parent;
|
||||
|
||||
if (node->right || node->left) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (node->value != BTRIE_NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (node->parent == NULL) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
uintptr_t
|
||||
btrie_find(btrie_t *tree, uint32_t key)
|
||||
{
|
||||
uint32_t bit;
|
||||
uintptr_t value;
|
||||
btrie_node_t *node;
|
||||
|
||||
bit = 0x80000000;
|
||||
value = BTRIE_NULL;
|
||||
node = tree->root;
|
||||
|
||||
while (node) {
|
||||
if (node->value != BTRIE_NULL) {
|
||||
value = node->value;
|
||||
}
|
||||
|
||||
if (key & bit) {
|
||||
node = node->right;
|
||||
|
||||
} else {
|
||||
node = node->left;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
btrie_insert_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask,
|
||||
uintptr_t value)
|
||||
{
|
||||
uint8_t bit;
|
||||
unsigned int i;
|
||||
btrie_node_t *node, *next;
|
||||
|
||||
i = 0;
|
||||
bit = 0x80;
|
||||
|
||||
node = tree->root;
|
||||
next = tree->root;
|
||||
|
||||
while (bit & mask[i]) {
|
||||
if (key[i] & bit) {
|
||||
next = node->right;
|
||||
|
||||
} else {
|
||||
next = node->left;
|
||||
}
|
||||
|
||||
if (next == NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
node = next;
|
||||
|
||||
if (bit == 0) {
|
||||
if (++i == 16) {
|
||||
break;
|
||||
}
|
||||
|
||||
bit = 0x80;
|
||||
}
|
||||
}
|
||||
|
||||
if (next) {
|
||||
if (node->value != BTRIE_NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
node->value = value;
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (bit & mask[i]) {
|
||||
next = btrie_alloc(tree);
|
||||
if (next == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
next->right = NULL;
|
||||
next->left = NULL;
|
||||
next->parent = node;
|
||||
next->value = BTRIE_NULL;
|
||||
|
||||
if (key[i] & bit) {
|
||||
node->right = next;
|
||||
|
||||
} else {
|
||||
node->left = next;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
node = next;
|
||||
|
||||
if (bit == 0) {
|
||||
if (++i == 16) {
|
||||
break;
|
||||
}
|
||||
|
||||
bit = 0x80;
|
||||
}
|
||||
}
|
||||
|
||||
node->value = value;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
btrie_delete_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask)
|
||||
{
|
||||
uint8_t bit;
|
||||
unsigned int i;
|
||||
btrie_node_t *node;
|
||||
|
||||
i = 0;
|
||||
bit = 0x80;
|
||||
node = tree->root;
|
||||
|
||||
while (node && (bit & mask[i])) {
|
||||
if (key[i] & bit) {
|
||||
node = node->right;
|
||||
|
||||
} else {
|
||||
node = node->left;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
|
||||
if (bit == 0) {
|
||||
if (++i == 16) {
|
||||
break;
|
||||
}
|
||||
|
||||
bit = 0x80;
|
||||
}
|
||||
}
|
||||
|
||||
if (node == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (node->right || node->left) {
|
||||
if (node->value != BTRIE_NULL) {
|
||||
node->value = BTRIE_NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
for ( ;; ) {
|
||||
if (node->parent->right == node) {
|
||||
node->parent->right = NULL;
|
||||
|
||||
} else {
|
||||
node->parent->left = NULL;
|
||||
}
|
||||
|
||||
node->right = tree->free;
|
||||
tree->free = node;
|
||||
|
||||
node = node->parent;
|
||||
|
||||
if (node->right || node->left) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (node->value != BTRIE_NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (node->parent == NULL) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
uintptr_t
|
||||
btrie_find_a6(btrie_t *tree, const uint8_t *key)
|
||||
{
|
||||
uint8_t bit;
|
||||
uintptr_t value;
|
||||
unsigned int i;
|
||||
btrie_node_t *node;
|
||||
|
||||
i = 0;
|
||||
bit = 0x80;
|
||||
value = BTRIE_NULL;
|
||||
node = tree->root;
|
||||
|
||||
while (node) {
|
||||
if (node->value != BTRIE_NULL) {
|
||||
value = node->value;
|
||||
}
|
||||
|
||||
if (key[i] & bit) {
|
||||
node = node->right;
|
||||
|
||||
} else {
|
||||
node = node->left;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
|
||||
if (bit == 0) {
|
||||
i++;
|
||||
bit = 0x80;
|
||||
}
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
btrie_destroy(btrie_t *tree)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
|
||||
/* free memory pools */
|
||||
for (i = 0; i < tree->len; i++) {
|
||||
free(tree->pools[i]);
|
||||
}
|
||||
|
||||
free(tree);
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,103 +0,0 @@
|
||||
#include <stdio.h>
|
||||
#include <btrie.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
btrie_t *it;
|
||||
int ret;
|
||||
|
||||
uint8_t prefix_v6[16] = {0xde, 0xad, 0xbe, 0xef};
|
||||
uint8_t mask_v6[16] = {0xff, 0xff, 0xff};
|
||||
uint8_t ip_v6[16] = {0xde, 0xad, 0xbe, 0xef, 0xde};
|
||||
|
||||
it = btrie_create();
|
||||
if (it == NULL) {
|
||||
printf("create error!\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
//add 101.45.69.50/16
|
||||
ret = btrie_insert(it, 1697465650, 0xffff0000, 1);
|
||||
if (ret != 0) {
|
||||
printf("insert 1 error.\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
//add 10.45.69.50/16
|
||||
ret = btrie_insert(it, 170738994, 0xffff0000, 1);
|
||||
if (ret != 0) {
|
||||
printf("insert 2 error.\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
//add 10.45.79.50/16
|
||||
ret = btrie_insert(it, 170741554, 0xffff0000, 1);
|
||||
if (ret == 0) {
|
||||
printf("insert 3 error.\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
//add 102.45.79.50/24
|
||||
ret = btrie_insert(it, 1714245426, 0xffffff00, 1);
|
||||
if (ret != 0) {
|
||||
printf("insert 4 error.\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
ret = btrie_find(it, 170741554);
|
||||
if (ret == 1) {
|
||||
printf("test case 1 passed\n");
|
||||
} else {
|
||||
printf("test case 1 error\n");
|
||||
}
|
||||
|
||||
ret = btrie_find(it, 170786817);
|
||||
if (ret != 1) {
|
||||
printf("test case 2 passed\n");
|
||||
} else {
|
||||
printf("test case 2 error\n");
|
||||
}
|
||||
|
||||
ret = btrie_delete(it, 1714245426, 0xffffff00);
|
||||
if (ret != 0) {
|
||||
printf("delete 1 error\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
ret = btrie_find(it, 1714245426);
|
||||
if (ret != 1) {
|
||||
printf("test case 3 passed\n");
|
||||
} else {
|
||||
printf("test case 3 error\n");
|
||||
}
|
||||
|
||||
//add dead:beef::/32
|
||||
ret = btrie_insert_a6(it, prefix_v6, mask_v6, 1);
|
||||
if (ret != 0) {
|
||||
printf("insert 5 error\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
ret = btrie_find_a6(it, ip_v6);
|
||||
if (ret == 1) {
|
||||
printf("test case 4 passed\n");
|
||||
} else {
|
||||
printf("test case 4 error\n");
|
||||
}
|
||||
|
||||
// insert 4m ips
|
||||
for (size_t ip = 1; ip < 1024 * 1024 * 4; ++ip) {
|
||||
ret = btrie_insert(it, ip, 0xffffffff, 1);
|
||||
if (ret != 0) {
|
||||
printf("insert 5 error (%d) (%zu) .\n", ret, ip);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
error:
|
||||
btrie_destroy(it);
|
||||
printf("test failed\n");
|
||||
return 1;
|
||||
}
|
96
debian/clickhouse-server.init
vendored
96
debian/clickhouse-server.init
vendored
@ -67,26 +67,6 @@ if uname -mpi | grep -q 'x86_64'; then
|
||||
fi
|
||||
|
||||
|
||||
is_running()
|
||||
{
|
||||
pgrep --pidfile "$CLICKHOUSE_PIDFILE" $(echo "${PROGRAM}" | cut -c1-15) 1> /dev/null 2> /dev/null
|
||||
}
|
||||
|
||||
|
||||
wait_for_done()
|
||||
{
|
||||
timeout=$1
|
||||
attempts=0
|
||||
while is_running; do
|
||||
attempts=$(($attempts + 1))
|
||||
if [ -n "$timeout" ] && [ $attempts -gt $timeout ]; then
|
||||
return 1
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
}
|
||||
|
||||
|
||||
die()
|
||||
{
|
||||
echo $1 >&2
|
||||
@ -105,49 +85,7 @@ check_config()
|
||||
|
||||
initdb()
|
||||
{
|
||||
if [ -x "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG" ]; then
|
||||
CLICKHOUSE_DATADIR_FROM_CONFIG=$(su -s $SHELL ${CLICKHOUSE_USER} -c "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path")
|
||||
if [ "(" "$?" -ne "0" ")" -o "(" -z "${CLICKHOUSE_DATADIR_FROM_CONFIG}" ")" ]; then
|
||||
die "Cannot obtain value of path from config file: ${CLICKHOUSE_CONFIG}";
|
||||
fi
|
||||
echo "Path to data directory in ${CLICKHOUSE_CONFIG}: ${CLICKHOUSE_DATADIR_FROM_CONFIG}"
|
||||
else
|
||||
CLICKHOUSE_DATADIR_FROM_CONFIG=$CLICKHOUSE_DATADIR
|
||||
fi
|
||||
|
||||
if ! getent passwd ${CLICKHOUSE_USER} >/dev/null; then
|
||||
echo "Can't chown to non-existing user ${CLICKHOUSE_USER}"
|
||||
return
|
||||
fi
|
||||
if ! getent group ${CLICKHOUSE_GROUP} >/dev/null; then
|
||||
echo "Can't chown to non-existing group ${CLICKHOUSE_GROUP}"
|
||||
return
|
||||
fi
|
||||
|
||||
if ! $(su -s $SHELL ${CLICKHOUSE_USER} -c "test -r ${CLICKHOUSE_CONFIG}"); then
|
||||
echo "Warning! clickhouse config [${CLICKHOUSE_CONFIG}] not readable by user [${CLICKHOUSE_USER}]"
|
||||
fi
|
||||
|
||||
if ! $(su -s $SHELL ${CLICKHOUSE_USER} -c "test -O \"${CLICKHOUSE_DATADIR_FROM_CONFIG}\" && test -G \"${CLICKHOUSE_DATADIR_FROM_CONFIG}\""); then
|
||||
if [ $(dirname "${CLICKHOUSE_DATADIR_FROM_CONFIG}") = "/" ]; then
|
||||
echo "Directory ${CLICKHOUSE_DATADIR_FROM_CONFIG} seems too dangerous to chown."
|
||||
else
|
||||
if [ ! -e "${CLICKHOUSE_DATADIR_FROM_CONFIG}" ]; then
|
||||
echo "Creating directory ${CLICKHOUSE_DATADIR_FROM_CONFIG}"
|
||||
mkdir -p "${CLICKHOUSE_DATADIR_FROM_CONFIG}"
|
||||
fi
|
||||
|
||||
echo "Changing owner of [${CLICKHOUSE_DATADIR_FROM_CONFIG}] to [${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP}]"
|
||||
chown -R ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} "${CLICKHOUSE_DATADIR_FROM_CONFIG}"
|
||||
fi
|
||||
fi
|
||||
|
||||
if ! $(su -s $SHELL ${CLICKHOUSE_USER} -c "test -w ${CLICKHOUSE_LOGDIR}"); then
|
||||
echo "Changing owner of [${CLICKHOUSE_LOGDIR}/*] to [${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP}]"
|
||||
chown -R ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_LOGDIR}/*
|
||||
echo "Changing owner of [${CLICKHOUSE_LOGDIR}] to [${CLICKHOUSE_LOGDIR_USER}:${CLICKHOUSE_GROUP}]"
|
||||
chown ${CLICKHOUSE_LOGDIR_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_LOGDIR}
|
||||
fi
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} install --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
|
||||
}
|
||||
|
||||
|
||||
@ -171,17 +109,7 @@ restart()
|
||||
|
||||
forcestop()
|
||||
{
|
||||
local EXIT_STATUS
|
||||
EXIT_STATUS=0
|
||||
|
||||
echo -n "Stop forcefully $PROGRAM service: "
|
||||
|
||||
kill -KILL $(cat "$CLICKHOUSE_PIDFILE")
|
||||
|
||||
wait_for_done
|
||||
|
||||
echo "DONE"
|
||||
return $EXIT_STATUS
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} stop --force --pid-path "${CLICKHOUSE_PIDDIR}"
|
||||
}
|
||||
|
||||
|
||||
@ -261,16 +189,16 @@ main()
|
||||
service_or_func restart
|
||||
;;
|
||||
condstart)
|
||||
is_running || service_or_func start
|
||||
service_or_func start
|
||||
;;
|
||||
condstop)
|
||||
is_running && service_or_func stop
|
||||
service_or_func stop
|
||||
;;
|
||||
condrestart)
|
||||
is_running && service_or_func restart
|
||||
service_or_func restart
|
||||
;;
|
||||
condreload)
|
||||
is_running && service_or_func restart
|
||||
service_or_func restart
|
||||
;;
|
||||
initdb)
|
||||
initdb
|
||||
@ -293,17 +221,7 @@ main()
|
||||
|
||||
status()
|
||||
{
|
||||
if is_running; then
|
||||
echo "$PROGRAM service is running"
|
||||
exit 0
|
||||
else
|
||||
if is_cron_disabled; then
|
||||
echo "$PROGRAM service is stopped";
|
||||
else
|
||||
echo "$PROGRAM: process unexpectedly terminated"
|
||||
fi
|
||||
exit 3
|
||||
fi
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} status --pid-path "${CLICKHOUSE_PIDDIR}"
|
||||
}
|
||||
|
||||
|
||||
|
@ -288,6 +288,7 @@ TESTS_TO_SKIP=(
|
||||
|
||||
# Require python libraries like scipy, pandas and numpy
|
||||
01322_ttest_scipy
|
||||
01561_mann_whitney_scipy
|
||||
|
||||
01545_system_errors
|
||||
# Checks system.errors
|
||||
|
@ -415,4 +415,4 @@ if not args.keep_created_tables and not args.use_existing_tables:
|
||||
c.execute(q)
|
||||
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
|
||||
reportStageEnd('drop-2')
|
||||
reportStageEnd('drop-2')
|
||||
|
@ -13,9 +13,9 @@ cmake .. \
|
||||
-DENABLE_CLICKHOUSE_SERVER=ON \
|
||||
-DENABLE_CLICKHOUSE_CLIENT=ON \
|
||||
-DUSE_STATIC_LIBRARIES=OFF \
|
||||
-DCLICKHOUSE_SPLIT_BINARY=ON \
|
||||
-DSPLIT_SHARED_LIBRARIES=ON \
|
||||
-DENABLE_LIBRARIES=OFF \
|
||||
-DUSE_UNWIND=ON \
|
||||
-DENABLE_UTILS=OFF \
|
||||
-DENABLE_TESTS=OFF
|
||||
```
|
||||
|
@ -17,7 +17,6 @@ toc_title: Third-Party Libraries Used
|
||||
| googletest | [BSD 3-Clause License](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| h3 | [Apache License 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
|
||||
| hyperscan | [BSD 3-Clause License](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| libbtrie | [BSD 2-Clause License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| libdivide | [Zlib License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
@ -4,4 +4,59 @@ toc_priority: 5
|
||||
|
||||
# avg {#agg_function-avg}
|
||||
|
||||
Calculates the average. Only works for numbers. The result is always Float64.
|
||||
Calculates the arithmetic mean.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
avgWeighted(x)
|
||||
```
|
||||
|
||||
**Parameter**
|
||||
|
||||
- `x` — Values.
|
||||
|
||||
`x` must be
|
||||
[Integer](../../../sql-reference/data-types/int-uint.md),
|
||||
[floating-point](../../../sql-reference/data-types/float.md), or
|
||||
[Decimal](../../../sql-reference/data-types/decimal.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- `NaN` if the supplied parameter is empty.
|
||||
- Mean otherwise.
|
||||
|
||||
**Return type** is always [Float64](../../../sql-reference/data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT avg(x) FROM values('x Int8', 0, 1, 2, 3, 4, 5)
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─avg(x)─┐
|
||||
│ 2.5 │
|
||||
└────────┘
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
CREATE table test (t UInt8) ENGINE = Memory;
|
||||
SELECT avg(t) FROM test
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─avg(x)─┐
|
||||
│ nan │
|
||||
└────────┘
|
||||
```
|
||||
|
@ -14,17 +14,21 @@ avgWeighted(x, weight)
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `x` — Values. [Integer](../../../sql-reference/data-types/int-uint.md) or [floating-point](../../../sql-reference/data-types/float.md).
|
||||
- `weight` — Weights of the values. [Integer](../../../sql-reference/data-types/int-uint.md) or [floating-point](../../../sql-reference/data-types/float.md).
|
||||
- `x` — Values.
|
||||
- `weight` — Weights of the values.
|
||||
|
||||
Type of `x` and `weight` must be the same.
|
||||
`x` and `weight` must both be
|
||||
[Integer](../../../sql-reference/data-types/int-uint.md),
|
||||
[floating-point](../../../sql-reference/data-types/float.md), or
|
||||
[Decimal](../../../sql-reference/data-types/decimal.md),
|
||||
but may have different types.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Weighted mean.
|
||||
- `NaN`. If all the weights are equal to 0.
|
||||
- `NaN` if all the weights are equal to 0 or the supplied weights parameter is empty.
|
||||
- Weighted mean otherwise.
|
||||
|
||||
Type: [Float64](../../../sql-reference/data-types/float.md).
|
||||
**Return type** is always [Float64](../../../sql-reference/data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
@ -42,3 +46,54 @@ Result:
|
||||
│ 8 │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT avgWeighted(x, w)
|
||||
FROM values('x Int8, w Float64', (4, 1), (1, 0), (10, 2))
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─avgWeighted(x, weight)─┐
|
||||
│ 8 │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT avgWeighted(x, w)
|
||||
FROM values('x Int8, w Int8', (0, 0), (1, 0), (10, 0))
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─avgWeighted(x, weight)─┐
|
||||
│ nan │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
CREATE table test (t UInt8) ENGINE = Memory;
|
||||
SELECT avgWeighted(t) FROM test
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─avgWeighted(x, weight)─┐
|
||||
│ nan │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
@ -19,7 +19,6 @@ toc_title: Bibliotecas de terceros utilizadas
|
||||
| Más información | [Licencia de 3 cláusulas BSD](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| H3 | [Licencia Apache 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
|
||||
| hyperscan | [Licencia de 3 cláusulas BSD](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| libbtrie | [Licencia BSD de 2 cláusulas](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| libdivide | [Licencia Zlib](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| libgsasl | [Información adicional](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
@ -21,7 +21,6 @@ toc_title: "\u06A9\u062A\u0627\u0628\u062E\u0627\u0646\u0647 \u0647\u0627\u06CC
|
||||
| googletest | [لیسانس 3 بند](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| اچ 3 | [نمایی مجوز 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
|
||||
| hyperscan | [لیسانس 3 بند](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| لیبتری | [لیسانس 2 بند](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| شکنجه نوجوان | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| لیبیدوید | [مجوز زلب](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| نوشیدن شراب | [الجی پی ال2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
@ -19,7 +19,6 @@ toc_title: "Biblioth\xE8ques Tierces Utilis\xE9es"
|
||||
| googletest | [Licence BSD 3-Clause](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| h3 | [Licence Apache 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
|
||||
| hyperscan | [Licence BSD 3-Clause](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| libbtrie | [Licence BSD 2-Clause](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| libdivide | [Licence Zlib](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
@ -20,7 +20,6 @@ toc_title: "\u30B5\u30FC\u30C9\u30D1\u30FC\u30C6\u30A3\u88FD\u30E9\u30A4\u30D6\u
|
||||
| googletest | [BSD3条項ライセンス](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| h3 | [Apacheライセンス2.0](https://github.com/uber/h3/blob/master/LICENSE) |
|
||||
| hyperscan | [BSD3条項ライセンス](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| libbtrie | [BSD2条項ライセンス](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| libdivide | [Zlibライセンス](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
@ -18,7 +18,6 @@ toc_title: "\u0418\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u043c\u044b\u
|
||||
| googletest | [BSD 3-Clause License](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| h3 | [Apache License 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
|
||||
| hyperscan | [BSD 3-Clause License](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| libbtrie | [BSD 2-Clause License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| libdivide | [Zlib License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
@ -19,7 +19,6 @@ toc_title: "Kullan\u0131lan \xDC\xE7\xFCnc\xFC Taraf K\xFCt\xFCphaneleri"
|
||||
| googletest | [BSD 3-Clause Lisansı](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| h33 | [Apache Lic 2.0ense 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
|
||||
| hyperscan | [BSD 3-Clause Lisansı](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| libbtrie | [BSD 2-Clause Lisansı](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| libdivide | [Zlib Lisansı](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| libgsasl | [LGPL v2. 1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
@ -11,7 +11,6 @@
|
||||
| FastMemcpy | [MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libmemcpy/impl/LICENSE) |
|
||||
| googletest | [BSD3-条款许可](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| 超扫描 | [BSD3-条款许可](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| libbtrie | [BSD2-条款许可](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| libdivide | [Zlib许可证](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
@ -43,13 +43,81 @@ else ()
|
||||
${ENABLE_CLICKHOUSE_ALL})
|
||||
endif ()
|
||||
|
||||
message(STATUS "ClickHouse modes:")
|
||||
|
||||
if (NOT ENABLE_CLICKHOUSE_SERVER)
|
||||
message(WARNING "ClickHouse server mode is not going to be built.")
|
||||
else()
|
||||
message(STATUS "Server mode: ON")
|
||||
endif()
|
||||
|
||||
if (NOT ENABLE_CLICKHOUSE_CLIENT)
|
||||
message(WARNING "ClickHouse client mode is not going to be built. You won't be able to connect to the server and run
|
||||
tests")
|
||||
else()
|
||||
message(STATUS "Client mode: ON")
|
||||
endif()
|
||||
|
||||
if (ENABLE_CLICKHOUSE_LOCAL)
|
||||
message(STATUS "Local mode: ON")
|
||||
else()
|
||||
message(STATUS "Local mode: OFF")
|
||||
endif()
|
||||
|
||||
if (ENABLE_CLICKHOUSE_BENCHMARK)
|
||||
message(STATUS "Benchmark mode: ON")
|
||||
else()
|
||||
message(STATUS "Benchmark mode: OFF")
|
||||
endif()
|
||||
|
||||
if (ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG)
|
||||
message(STATUS "Extract from config mode: ON")
|
||||
else()
|
||||
message(STATUS "Extract from config mode: OFF")
|
||||
endif()
|
||||
|
||||
if (ENABLE_CLICKHOUSE_COMPRESSOR)
|
||||
message(STATUS "Compressor mode: ON")
|
||||
else()
|
||||
message(STATUS "Compressor mode: OFF")
|
||||
endif()
|
||||
|
||||
if (ENABLE_CLICKHOUSE_COPIER)
|
||||
message(STATUS "Copier mode: ON")
|
||||
else()
|
||||
message(STATUS "Copier mode: OFF")
|
||||
endif()
|
||||
|
||||
if (ENABLE_CLICKHOUSE_FORMAT)
|
||||
message(STATUS "Format mode: ON")
|
||||
else()
|
||||
message(STATUS "Format mode: OFF")
|
||||
endif()
|
||||
|
||||
if (ENABLE_CLICKHOUSE_OBFUSCATOR)
|
||||
message(STATUS "Obfuscator mode: ON")
|
||||
else()
|
||||
message(STATUS "Obfuscator mode: OFF")
|
||||
endif()
|
||||
|
||||
if (ENABLE_CLICKHOUSE_ODBC_BRIDGE)
|
||||
message(STATUS "ODBC bridge mode: ON")
|
||||
else()
|
||||
message(STATUS "ODBC bridge mode: OFF")
|
||||
endif()
|
||||
|
||||
if (ENABLE_CLICKHOUSE_INSTALL)
|
||||
message(STATUS "ClickHouse install: ON")
|
||||
else()
|
||||
message(STATUS "ClickHouse install: OFF")
|
||||
endif()
|
||||
|
||||
if(NOT (MAKE_STATIC_LIBRARIES OR SPLIT_SHARED_LIBRARIES))
|
||||
set(CLICKHOUSE_ONE_SHARED ON)
|
||||
endif()
|
||||
|
||||
configure_file (config_tools.h.in ${ConfigIncludePath}/config_tools.h)
|
||||
|
||||
|
||||
macro(clickhouse_target_link_split_lib target name)
|
||||
if(NOT CLICKHOUSE_ONE_SHARED)
|
||||
target_link_libraries(${target} PRIVATE clickhouse-${name}-lib)
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include <IO/WriteBufferFromFileDescriptor.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
#include <IO/MMapReadBufferFromFile.h>
|
||||
#include <IO/ReadBufferFromMemory.h>
|
||||
#include <IO/copyData.h>
|
||||
#include <IO/Operators.h>
|
||||
@ -70,7 +71,7 @@ namespace po = boost::program_options;
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
|
||||
auto executeScript(const std::string & command, bool throw_on_error = false)
|
||||
static auto executeScript(const std::string & command, bool throw_on_error = false)
|
||||
{
|
||||
auto sh = ShellCommand::execute(command);
|
||||
WriteBufferFromFileDescriptor wb_stdout(STDOUT_FILENO);
|
||||
@ -87,7 +88,7 @@ auto executeScript(const std::string & command, bool throw_on_error = false)
|
||||
return sh->tryWait();
|
||||
}
|
||||
|
||||
bool ask(std::string question)
|
||||
static bool ask(std::string question)
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
@ -104,6 +105,16 @@ bool ask(std::string question)
|
||||
}
|
||||
}
|
||||
|
||||
static bool filesEqual(std::string path1, std::string path2)
|
||||
{
|
||||
MMapReadBufferFromFile in1(path1, 0);
|
||||
MMapReadBufferFromFile in2(path2, 0);
|
||||
|
||||
/// memcmp is faster than hashing and comparing hashes
|
||||
return in1.buffer().size() == in2.buffer().size()
|
||||
&& 0 == memcmp(in1.buffer().begin(), in2.buffer().begin(), in1.buffer().size());
|
||||
}
|
||||
|
||||
|
||||
int mainEntryClickHouseInstall(int argc, char ** argv)
|
||||
{
|
||||
@ -143,57 +154,89 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot obtain path to the binary from {}, file doesn't exist",
|
||||
binary_self_path.string());
|
||||
|
||||
fs::path binary_self_canonical_path = fs::canonical(binary_self_path);
|
||||
|
||||
/// Copy binary to the destination directory.
|
||||
|
||||
/// TODO An option to link instead of copy - useful for developers.
|
||||
/// TODO Check if the binary is the same.
|
||||
|
||||
size_t binary_size = fs::file_size(binary_self_path);
|
||||
|
||||
fs::path prefix = fs::path(options["prefix"].as<std::string>());
|
||||
fs::path bin_dir = prefix / fs::path(options["binary-path"].as<std::string>());
|
||||
|
||||
size_t available_space = fs::space(bin_dir).available;
|
||||
if (available_space < binary_size)
|
||||
throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space for clickhouse binary in {}, required {}, available {}.",
|
||||
bin_dir.string(), ReadableSize(binary_size), ReadableSize(available_space));
|
||||
|
||||
fs::path main_bin_path = bin_dir / "clickhouse";
|
||||
fs::path main_bin_tmp_path = bin_dir / "clickhouse.new";
|
||||
fs::path main_bin_old_path = bin_dir / "clickhouse.old";
|
||||
|
||||
fmt::print("Copying ClickHouse binary to {}\n", main_bin_tmp_path.string());
|
||||
size_t binary_size = fs::file_size(binary_self_path);
|
||||
|
||||
try
|
||||
bool old_binary_exists = fs::exists(main_bin_path);
|
||||
bool already_installed = false;
|
||||
|
||||
/// Check if the binary is the same file (already installed).
|
||||
if (old_binary_exists && binary_self_canonical_path == fs::canonical(main_bin_path))
|
||||
{
|
||||
ReadBufferFromFile in(binary_self_path.string());
|
||||
WriteBufferFromFile out(main_bin_tmp_path.string());
|
||||
copyData(in, out);
|
||||
out.sync();
|
||||
|
||||
if (0 != fchmod(out.getFD(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
|
||||
throwFromErrno(fmt::format("Cannot chmod {}", main_bin_tmp_path.string()), ErrorCodes::SYSTEM_ERROR);
|
||||
|
||||
out.finalize();
|
||||
already_installed = true;
|
||||
fmt::print("ClickHouse binary is already located at {}\n", main_bin_path.string());
|
||||
}
|
||||
catch (const Exception & e)
|
||||
/// Check if binary has the same content.
|
||||
else if (old_binary_exists && binary_size == fs::file_size(main_bin_path))
|
||||
{
|
||||
if (e.code() == ErrorCodes::CANNOT_OPEN_FILE && geteuid() != 0)
|
||||
std::cerr << "Install must be run as root: sudo ./clickhouse install\n";
|
||||
throw;
|
||||
fmt::print("Found already existing ClickHouse binary at {} having the same size. Will check its contents.\n",
|
||||
main_bin_path.string());
|
||||
|
||||
if (filesEqual(binary_self_path.string(), main_bin_path.string()))
|
||||
{
|
||||
already_installed = true;
|
||||
fmt::print("ClickHouse binary is already located at {} and it has the same content as {}\n",
|
||||
main_bin_path.string(), binary_self_canonical_path.string());
|
||||
}
|
||||
}
|
||||
|
||||
if (fs::exists(main_bin_path))
|
||||
if (already_installed)
|
||||
{
|
||||
fmt::print("{} already exists, will rename existing binary to {} and put the new binary in place\n",
|
||||
main_bin_path.string(), main_bin_old_path.string());
|
||||
|
||||
/// There is file exchange operation in Linux but it's not portable.
|
||||
fs::rename(main_bin_path, main_bin_old_path);
|
||||
if (0 != chmod(main_bin_path.string().c_str(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
|
||||
throwFromErrno(fmt::format("Cannot chmod {}", main_bin_path.string()), ErrorCodes::SYSTEM_ERROR);
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t available_space = fs::space(bin_dir).available;
|
||||
if (available_space < binary_size)
|
||||
throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space for clickhouse binary in {}, required {}, available {}.",
|
||||
bin_dir.string(), ReadableSize(binary_size), ReadableSize(available_space));
|
||||
|
||||
fmt::print("Renaming {} to {}.\n", main_bin_tmp_path.string(), main_bin_path.string());
|
||||
fs::rename(main_bin_tmp_path, main_bin_path);
|
||||
fmt::print("Copying ClickHouse binary to {}\n", main_bin_tmp_path.string());
|
||||
|
||||
try
|
||||
{
|
||||
ReadBufferFromFile in(binary_self_path.string());
|
||||
WriteBufferFromFile out(main_bin_tmp_path.string());
|
||||
copyData(in, out);
|
||||
out.sync();
|
||||
|
||||
if (0 != fchmod(out.getFD(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
|
||||
throwFromErrno(fmt::format("Cannot chmod {}", main_bin_tmp_path.string()), ErrorCodes::SYSTEM_ERROR);
|
||||
|
||||
out.finalize();
|
||||
}
|
||||
catch (const Exception & e)
|
||||
{
|
||||
if (e.code() == ErrorCodes::CANNOT_OPEN_FILE && geteuid() != 0)
|
||||
std::cerr << "Install must be run as root: sudo ./clickhouse install\n";
|
||||
throw;
|
||||
}
|
||||
|
||||
if (old_binary_exists)
|
||||
{
|
||||
fmt::print("{} already exists, will rename existing binary to {} and put the new binary in place\n",
|
||||
main_bin_path.string(), main_bin_old_path.string());
|
||||
|
||||
/// There is file exchange operation in Linux but it's not portable.
|
||||
fs::rename(main_bin_path, main_bin_old_path);
|
||||
}
|
||||
|
||||
fmt::print("Renaming {} to {}.\n", main_bin_tmp_path.string(), main_bin_path.string());
|
||||
fs::rename(main_bin_tmp_path, main_bin_path);
|
||||
}
|
||||
|
||||
/// Create symlinks.
|
||||
|
||||
@ -401,8 +444,8 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
|
||||
ConfigurationPtr configuration(new Poco::Util::XMLConfiguration(processor.processConfig()));
|
||||
|
||||
if (!configuration->getString("users.default.password", "").empty()
|
||||
|| configuration->getString("users.default.password_sha256_hex", "").empty()
|
||||
|| configuration->getString("users.default.password_double_sha1_hex", "").empty())
|
||||
|| !configuration->getString("users.default.password_sha256_hex", "").empty()
|
||||
|| !configuration->getString("users.default.password_double_sha1_hex", "").empty())
|
||||
{
|
||||
has_password_for_default_user = true;
|
||||
}
|
||||
@ -576,7 +619,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
|
||||
" || echo \"Cannot set 'net_admin' or 'ipc_lock' or 'sys_nice' capability for clickhouse binary."
|
||||
" This is optional. Taskstats accounting will be disabled."
|
||||
" To enable taskstats accounting you may add the required capability later manually.\"",
|
||||
"/tmp/test_setcap.sh", main_bin_path.string());
|
||||
"/tmp/test_setcap.sh", fs::canonical(main_bin_path).string());
|
||||
fmt::print(" {}\n", command);
|
||||
executeScript(command);
|
||||
#endif
|
||||
@ -597,10 +640,6 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
|
||||
}
|
||||
}
|
||||
|
||||
std::string maybe_sudo;
|
||||
if (getuid() != 0)
|
||||
maybe_sudo = "sudo ";
|
||||
|
||||
std::string maybe_password;
|
||||
if (has_password_for_default_user)
|
||||
maybe_password = " --password";
|
||||
@ -608,10 +647,19 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
|
||||
fmt::print(
|
||||
"\nClickHouse has been successfully installed.\n"
|
||||
"\nStart clickhouse-server with:\n"
|
||||
" {}clickhouse start\n"
|
||||
" sudo clickhouse start\n"
|
||||
"\nStart clickhouse-client with:\n"
|
||||
" clickhouse-client{}\n\n",
|
||||
maybe_sudo, maybe_password);
|
||||
maybe_password);
|
||||
}
|
||||
catch (const fs::filesystem_error &)
|
||||
{
|
||||
std::cerr << getCurrentExceptionMessage(false) << '\n';
|
||||
|
||||
if (getuid() != 0)
|
||||
std::cerr << "\nRun with sudo.\n";
|
||||
|
||||
return getCurrentExceptionCode();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -783,17 +831,20 @@ namespace
|
||||
return pid;
|
||||
}
|
||||
|
||||
int stop(const fs::path & pid_file)
|
||||
int stop(const fs::path & pid_file, bool force)
|
||||
{
|
||||
UInt64 pid = isRunning(pid_file);
|
||||
|
||||
if (!pid)
|
||||
return 0;
|
||||
|
||||
if (0 == kill(pid, 15)) /// Terminate
|
||||
fmt::print("Sent termination signal.\n", pid);
|
||||
int signal = force ? SIGKILL : SIGTERM;
|
||||
const char * signal_name = force ? "kill" : "terminate";
|
||||
|
||||
if (0 == kill(pid, signal))
|
||||
fmt::print("Sent {} signal to process with pid {}.\n", signal_name, pid);
|
||||
else
|
||||
throwFromErrno("Cannot send termination signal", ErrorCodes::SYSTEM_ERROR);
|
||||
throwFromErrno(fmt::format("Cannot send {} signal", signal_name), ErrorCodes::SYSTEM_ERROR);
|
||||
|
||||
size_t try_num = 0;
|
||||
constexpr size_t num_tries = 60;
|
||||
@ -869,6 +920,7 @@ int mainEntryClickHouseStop(int argc, char ** argv)
|
||||
desc.add_options()
|
||||
("help,h", "produce help message")
|
||||
("pid-path", po::value<std::string>()->default_value("/var/run/clickhouse-server"), "directory for pid file")
|
||||
("force", po::value<bool>()->default_value(false), "Stop with KILL signal instead of TERM")
|
||||
;
|
||||
|
||||
po::variables_map options;
|
||||
@ -887,7 +939,7 @@ int mainEntryClickHouseStop(int argc, char ** argv)
|
||||
{
|
||||
fs::path pid_file = fs::path(options["pid-path"].as<std::string>()) / "clickhouse-server.pid";
|
||||
|
||||
return stop(pid_file);
|
||||
return stop(pid_file, options["force"].as<bool>());
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -940,6 +992,7 @@ int mainEntryClickHouseRestart(int argc, char ** argv)
|
||||
("config-path", po::value<std::string>()->default_value("/etc/clickhouse-server"), "directory with configs")
|
||||
("pid-path", po::value<std::string>()->default_value("/var/run/clickhouse-server"), "directory for pid file")
|
||||
("user", po::value<std::string>()->default_value("clickhouse"), "clickhouse user")
|
||||
("force", po::value<bool>()->default_value(false), "Stop with KILL signal instead of TERM")
|
||||
;
|
||||
|
||||
po::variables_map options;
|
||||
@ -962,7 +1015,7 @@ int mainEntryClickHouseRestart(int argc, char ** argv)
|
||||
fs::path config = fs::path(options["config-path"].as<std::string>()) / "config.xml";
|
||||
fs::path pid_file = fs::path(options["pid-path"].as<std::string>()) / "clickhouse-server.pid";
|
||||
|
||||
if (int res = stop(pid_file))
|
||||
if (int res = stop(pid_file, options["force"].as<bool>()))
|
||||
return res;
|
||||
return start(user, executable, config, pid_file);
|
||||
}
|
||||
|
@ -709,7 +709,7 @@
|
||||
|
||||
|
||||
<!-- Configuration of external dictionaries. See:
|
||||
https://clickhouse.yandex/docs/en/dicts/external_dicts/
|
||||
https://clickhouse.tech/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts
|
||||
-->
|
||||
<dictionaries_config>*_dictionary.xml</dictionaries_config>
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
#include <memory>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionAvg.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
@ -13,43 +14,37 @@ namespace ErrorCodes
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename T>
|
||||
struct Avg
|
||||
bool allowType(const DataTypePtr& type) noexcept
|
||||
{
|
||||
using FieldType = std::conditional_t<IsDecimalNumber<T>,
|
||||
std::conditional_t<std::is_same_v<T, Decimal256>, Decimal256, Decimal128>,
|
||||
NearestFieldType<T>>;
|
||||
// using FieldType = std::conditional_t<IsDecimalNumber<T>, Decimal128, NearestFieldType<T>>;
|
||||
using Function = AggregateFunctionAvg<T, AggregateFunctionAvgData<FieldType, UInt64>>;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
using AggregateFuncAvg = typename Avg<T>::Function;
|
||||
const WhichDataType t(type);
|
||||
return t.isInt() || t.isUInt() || t.isFloat() || t.isDecimal();
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionAvg(const std::string & name, const DataTypes & argument_types, const Array & parameters)
|
||||
{
|
||||
assertNoParameters(name, parameters);
|
||||
assertUnary(name, argument_types);
|
||||
|
||||
AggregateFunctionPtr res;
|
||||
DataTypePtr data_type = argument_types[0];
|
||||
if (isDecimal(data_type))
|
||||
res.reset(createWithDecimalType<AggregateFuncAvg>(*data_type, *data_type, argument_types));
|
||||
else
|
||||
res.reset(createWithNumericType<AggregateFuncAvg>(*data_type, argument_types));
|
||||
const DataTypePtr& data_type = argument_types[0];
|
||||
|
||||
if (!allowType(data_type))
|
||||
throw Exception("Illegal type " + data_type->getName() + " of argument for aggregate function " + name,
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
AggregateFunctionPtr res;
|
||||
|
||||
if (isDecimal(data_type))
|
||||
res.reset(createWithDecimalType<AggregateFunctionAvg>(
|
||||
*data_type, argument_types, getDecimalScale(*data_type)));
|
||||
else
|
||||
res.reset(createWithNumericType<AggregateFunctionAvg>(*data_type, argument_types));
|
||||
|
||||
if (!res)
|
||||
throw Exception("Illegal type " + argument_types[0]->getName() + " of argument for aggregate function " + name,
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionAvg(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("avg", createAggregateFunctionAvg, AggregateFunctionFactory::CaseInsensitive);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,78 +1,102 @@
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include "Core/DecimalFunctions.h"
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
}
|
||||
template <class T>
|
||||
using DecimalOrVectorCol = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
|
||||
|
||||
template <typename T, typename Denominator>
|
||||
struct AggregateFunctionAvgData
|
||||
{
|
||||
using NumeratorType = T;
|
||||
using DenominatorType = Denominator;
|
||||
template <class T> constexpr bool DecimalOrExtendedInt =
|
||||
IsDecimalNumber<T>
|
||||
|| std::is_same_v<T, Int128>
|
||||
|| std::is_same_v<T, Int256>
|
||||
|| std::is_same_v<T, UInt128>
|
||||
|| std::is_same_v<T, UInt256>;
|
||||
|
||||
T numerator{0};
|
||||
/**
|
||||
* Helper class to encapsulate values conversion for avg and avgWeighted.
|
||||
*/
|
||||
template <class Numerator, class Denominator>
|
||||
struct AvgFraction
|
||||
{
|
||||
Numerator numerator{0};
|
||||
Denominator denominator{0};
|
||||
|
||||
template <typename ResultT>
|
||||
ResultT NO_SANITIZE_UNDEFINED result() const
|
||||
/// Allow division by zero as sometimes we need to return NaN.
|
||||
/// Invoked only is either Numerator or Denominator are Decimal.
|
||||
Float64 NO_SANITIZE_UNDEFINED divideIfAnyDecimal(UInt32 num_scale, UInt32 denom_scale) const
|
||||
{
|
||||
if constexpr (std::is_floating_point_v<ResultT>)
|
||||
if constexpr (std::numeric_limits<ResultT>::is_iec559)
|
||||
{
|
||||
if constexpr (is_big_int_v<Denominator>)
|
||||
return static_cast<ResultT>(numerator) / static_cast<ResultT>(denominator);
|
||||
else
|
||||
return static_cast<ResultT>(numerator) / denominator; /// allow division by zero
|
||||
}
|
||||
if constexpr (IsDecimalNumber<Numerator> && IsDecimalNumber<Denominator>)
|
||||
{
|
||||
// According to the docs, num(S1) / denom(S2) would have scale S1
|
||||
|
||||
if (denominator == static_cast<Denominator>(0))
|
||||
return static_cast<ResultT>(0);
|
||||
if constexpr (std::is_same_v<Numerator, Decimal256> && std::is_same_v<Denominator, Decimal128>)
|
||||
///Special case as Decimal256 / Decimal128 = compile error (as Decimal128 is not parametrized by a wide
|
||||
///int), but an __int128 instead
|
||||
return DecimalUtils::convertTo<Float64>(
|
||||
numerator / (denominator.template convertTo<Decimal256>()), num_scale);
|
||||
else
|
||||
return DecimalUtils::convertTo<Float64>(numerator / denominator, num_scale);
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<T, Decimal256>)
|
||||
return static_cast<ResultT>(numerator / static_cast<T>(denominator));
|
||||
/// Numerator is always casted to Float64 to divide correctly if the denominator is not Float64.
|
||||
Float64 num_converted;
|
||||
|
||||
if constexpr (IsDecimalNumber<Numerator>)
|
||||
num_converted = DecimalUtils::convertTo<Float64>(numerator, num_scale);
|
||||
else
|
||||
return static_cast<ResultT>(numerator / denominator);
|
||||
num_converted = static_cast<Float64>(numerator); /// all other types, including extended integral.
|
||||
|
||||
std::conditional_t<DecimalOrExtendedInt<Denominator>,
|
||||
Float64, Denominator> denom_converted;
|
||||
|
||||
if constexpr (IsDecimalNumber<Denominator>)
|
||||
denom_converted = DecimalUtils::convertTo<Float64>(denominator, denom_scale);
|
||||
else if constexpr (DecimalOrExtendedInt<Denominator>)
|
||||
/// no way to divide Float64 and extended integral type without an explicit cast.
|
||||
denom_converted = static_cast<Float64>(denominator);
|
||||
else
|
||||
denom_converted = denominator; /// can divide on float, no cast required.
|
||||
|
||||
return num_converted / denom_converted;
|
||||
}
|
||||
|
||||
Float64 NO_SANITIZE_UNDEFINED divide() const
|
||||
{
|
||||
if constexpr (DecimalOrExtendedInt<Denominator>) /// if extended int
|
||||
return static_cast<Float64>(numerator) / static_cast<Float64>(denominator);
|
||||
else
|
||||
return static_cast<Float64>(numerator) / denominator;
|
||||
}
|
||||
};
|
||||
|
||||
/// Calculates arithmetic mean of numbers.
|
||||
template <typename T, typename Data, typename Derived>
|
||||
class AggregateFunctionAvgBase : public IAggregateFunctionDataHelper<Data, Derived>
|
||||
|
||||
/**
|
||||
* @tparam Derived When deriving from this class, use the child class name as in CRTP, e.g.
|
||||
* class Self : Agg<char, bool, bool, Self>.
|
||||
*/
|
||||
template <class Numerator, class Denominator, class Derived>
|
||||
class AggregateFunctionAvgBase : public
|
||||
IAggregateFunctionDataHelper<AvgFraction<Numerator, Denominator>, Derived>
|
||||
{
|
||||
public:
|
||||
using ResultType = std::conditional_t<IsDecimalNumber<T>, T, Float64>;
|
||||
using ResultDataType = std::conditional_t<IsDecimalNumber<T>, DataTypeDecimal<T>, DataTypeNumber<Float64>>;
|
||||
using ColVecType = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
|
||||
using ColVecResult = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<Float64>>;
|
||||
using Fraction = AvgFraction<Numerator, Denominator>;
|
||||
using Base = IAggregateFunctionDataHelper<Fraction, Derived>;
|
||||
|
||||
/// ctor for native types
|
||||
AggregateFunctionAvgBase(const DataTypes & argument_types_) : IAggregateFunctionDataHelper<Data, Derived>(argument_types_, {}), scale(0) {}
|
||||
explicit AggregateFunctionAvgBase(const DataTypes & argument_types_,
|
||||
UInt32 num_scale_ = 0, UInt32 denom_scale_ = 0)
|
||||
: Base(argument_types_, {}), num_scale(num_scale_), denom_scale(denom_scale_) {}
|
||||
|
||||
/// ctor for Decimals
|
||||
AggregateFunctionAvgBase(const IDataType & data_type, const DataTypes & argument_types_)
|
||||
: IAggregateFunctionDataHelper<Data, Derived>(argument_types_, {}), scale(getDecimalScale(data_type))
|
||||
{
|
||||
}
|
||||
|
||||
DataTypePtr getReturnType() const override
|
||||
{
|
||||
if constexpr (IsDecimalNumber<T>)
|
||||
return std::make_shared<ResultDataType>(ResultDataType::maxPrecision(), scale);
|
||||
else
|
||||
return std::make_shared<ResultDataType>();
|
||||
}
|
||||
DataTypePtr getReturnType() const final { return std::make_shared<DataTypeNumber<Float64>>(); }
|
||||
|
||||
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
@ -84,7 +108,7 @@ public:
|
||||
{
|
||||
writeBinary(this->data(place).numerator, buf);
|
||||
|
||||
if constexpr (std::is_unsigned_v<typename Data::DenominatorType>)
|
||||
if constexpr (std::is_unsigned_v<Denominator>)
|
||||
writeVarUInt(this->data(place).denominator, buf);
|
||||
else /// Floating point denominator type can be used
|
||||
writeBinary(this->data(place).denominator, buf);
|
||||
@ -94,7 +118,7 @@ public:
|
||||
{
|
||||
readBinary(this->data(place).numerator, buf);
|
||||
|
||||
if constexpr (std::is_unsigned_v<typename Data::DenominatorType>)
|
||||
if constexpr (std::is_unsigned_v<Denominator>)
|
||||
readVarUInt(this->data(place).denominator, buf);
|
||||
else /// Floating point denominator type can be used
|
||||
readBinary(this->data(place).denominator, buf);
|
||||
@ -102,29 +126,34 @@ public:
|
||||
|
||||
void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
|
||||
{
|
||||
auto & column = static_cast<ColVecResult &>(to);
|
||||
column.getData().push_back(this->data(place).template result<ResultType>());
|
||||
if constexpr (IsDecimalNumber<Numerator> || IsDecimalNumber<Denominator>)
|
||||
static_cast<ColumnVector<Float64> &>(to).getData().push_back(
|
||||
this->data(place).divideIfAnyDecimal(num_scale, denom_scale));
|
||||
else
|
||||
static_cast<ColumnVector<Float64> &>(to).getData().push_back(this->data(place).divide());
|
||||
}
|
||||
|
||||
protected:
|
||||
UInt32 scale;
|
||||
private:
|
||||
UInt32 num_scale;
|
||||
UInt32 denom_scale;
|
||||
};
|
||||
|
||||
template <typename T, typename Data>
|
||||
class AggregateFunctionAvg final : public AggregateFunctionAvgBase<T, Data, AggregateFunctionAvg<T, Data>>
|
||||
template <class T>
|
||||
using AvgFieldType = std::conditional_t<IsDecimalNumber<T>,
|
||||
std::conditional_t<std::is_same_v<T, Decimal256>, Decimal256, Decimal128>,
|
||||
NearestFieldType<T>>;
|
||||
|
||||
template <class T>
|
||||
class AggregateFunctionAvg final : public AggregateFunctionAvgBase<AvgFieldType<T>, UInt64, AggregateFunctionAvg<T>>
|
||||
{
|
||||
public:
|
||||
using AggregateFunctionAvgBase<T, Data, AggregateFunctionAvg<T, Data>>::AggregateFunctionAvgBase;
|
||||
using AggregateFunctionAvgBase<AvgFieldType<T>, UInt64, AggregateFunctionAvg<T>>::AggregateFunctionAvgBase;
|
||||
|
||||
using ColVecType = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
|
||||
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const final
|
||||
{
|
||||
const auto & column = static_cast<const ColVecType &>(*columns[0]);
|
||||
this->data(place).numerator += column.getData()[row_num];
|
||||
this->data(place).denominator += 1;
|
||||
this->data(place).numerator += static_cast<const DecimalOrVectorCol<T> &>(*columns[0]).getData()[row_num];
|
||||
++this->data(place).denominator;
|
||||
}
|
||||
|
||||
String getName() const override { return "avg"; }
|
||||
String getName() const final { return "avg"; }
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,3 +1,5 @@
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionAvgWeighted.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
@ -13,47 +15,91 @@ namespace ErrorCodes
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename T>
|
||||
struct AvgWeighted
|
||||
bool allowTypes(const DataTypePtr& left, const DataTypePtr& right) noexcept
|
||||
{
|
||||
using FieldType = std::conditional_t<IsDecimalNumber<T>,
|
||||
std::conditional_t<std::is_same_v<T, Decimal256>, Decimal256, Decimal128>,
|
||||
NearestFieldType<T>>;
|
||||
// using FieldType = std::conditional_t<IsDecimalNumber<T>, Decimal128, NearestFieldType<T>>;
|
||||
using Function = AggregateFunctionAvgWeighted<T, AggregateFunctionAvgData<FieldType, FieldType>>;
|
||||
};
|
||||
const WhichDataType l_dt(left), r_dt(right);
|
||||
|
||||
template <typename T>
|
||||
using AggregateFuncAvgWeighted = typename AvgWeighted<T>::Function;
|
||||
constexpr auto allow = [](WhichDataType t)
|
||||
{
|
||||
return t.isInt() || t.isUInt() || t.isFloat() || t.isDecimal();
|
||||
};
|
||||
|
||||
return allow(l_dt) && allow(r_dt);
|
||||
}
|
||||
|
||||
#define AT_SWITCH(LINE) \
|
||||
switch (which.idx) \
|
||||
{ \
|
||||
LINE(Int8); LINE(Int16); LINE(Int32); LINE(Int64); LINE(Int128); LINE(Int256); \
|
||||
LINE(UInt8); LINE(UInt16); LINE(UInt32); LINE(UInt64); LINE(UInt128); LINE(UInt256); \
|
||||
LINE(Decimal32); LINE(Decimal64); LINE(Decimal128); LINE(Decimal256); \
|
||||
LINE(Float32); LINE(Float64); \
|
||||
default: return nullptr; \
|
||||
}
|
||||
|
||||
template <class First, class ... TArgs>
|
||||
static IAggregateFunction * create(const IDataType & second_type, TArgs && ... args)
|
||||
{
|
||||
const WhichDataType which(second_type);
|
||||
|
||||
#define LINE(Type) \
|
||||
case TypeIndex::Type: return new AggregateFunctionAvgWeighted<First, Type>(std::forward<TArgs>(args)...)
|
||||
AT_SWITCH(LINE)
|
||||
#undef LINE
|
||||
}
|
||||
|
||||
// Not using helper functions because there are no templates for binary decimal/numeric function.
|
||||
template <class... TArgs>
|
||||
static IAggregateFunction * create(const IDataType & first_type, const IDataType & second_type, TArgs && ... args)
|
||||
{
|
||||
const WhichDataType which(first_type);
|
||||
|
||||
#define LINE(Type) \
|
||||
case TypeIndex::Type: return create<Type, TArgs...>(second_type, std::forward<TArgs>(args)...)
|
||||
AT_SWITCH(LINE)
|
||||
#undef LINE
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionAvgWeighted(const std::string & name, const DataTypes & argument_types, const Array & parameters)
|
||||
{
|
||||
assertNoParameters(name, parameters);
|
||||
assertBinary(name, argument_types);
|
||||
|
||||
AggregateFunctionPtr res;
|
||||
const auto data_type = static_cast<const DataTypePtr>(argument_types[0]);
|
||||
const auto data_type_weight = static_cast<const DataTypePtr>(argument_types[1]);
|
||||
if (!data_type->equals(*data_type_weight))
|
||||
throw Exception("Different types " + data_type->getName() + " and " + data_type_weight->getName() + " of arguments for aggregate function " + name,
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
if (isDecimal(data_type))
|
||||
res.reset(createWithDecimalType<AggregateFuncAvgWeighted>(*data_type, *data_type, argument_types));
|
||||
|
||||
if (!allowTypes(data_type, data_type_weight))
|
||||
throw Exception(
|
||||
"Types " + data_type->getName() +
|
||||
" and " + data_type_weight->getName() +
|
||||
" are non-conforming as arguments for aggregate function " + name,
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
AggregateFunctionPtr ptr;
|
||||
|
||||
const bool left_decimal = isDecimal(data_type);
|
||||
const bool right_decimal = isDecimal(data_type_weight);
|
||||
|
||||
if (left_decimal && right_decimal)
|
||||
ptr.reset(create(*data_type, *data_type_weight,
|
||||
argument_types,
|
||||
getDecimalScale(*data_type), getDecimalScale(*data_type_weight)));
|
||||
else if (left_decimal)
|
||||
ptr.reset(create(*data_type, *data_type_weight, argument_types,
|
||||
getDecimalScale(*data_type)));
|
||||
else if (right_decimal)
|
||||
ptr.reset(create(*data_type, *data_type_weight, argument_types,
|
||||
// numerator is not decimal, so its scale is 0
|
||||
0, getDecimalScale(*data_type_weight)));
|
||||
else
|
||||
res.reset(createWithNumericType<AggregateFuncAvgWeighted>(*data_type, argument_types));
|
||||
ptr.reset(create(*data_type, *data_type_weight, argument_types));
|
||||
|
||||
if (!res)
|
||||
throw Exception("Illegal type " + data_type->getName() + " of argument for aggregate function " + name,
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
return res;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionAvgWeighted(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("avgWeighted", createAggregateFunctionAvgWeighted, AggregateFunctionFactory::CaseSensitive);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,26 +1,44 @@
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
#include <AggregateFunctions/AggregateFunctionAvg.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
template <typename T, typename Data>
|
||||
class AggregateFunctionAvgWeighted final : public AggregateFunctionAvgBase<T, Data, AggregateFunctionAvgWeighted<T, Data>>
|
||||
template <class T>
|
||||
using AvgWeightedFieldType = std::conditional_t<IsDecimalNumber<T>,
|
||||
std::conditional_t<std::is_same_v<T, Decimal256>, Decimal256, Decimal128>,
|
||||
std::conditional_t<DecimalOrExtendedInt<T>,
|
||||
Float64, // no way to do UInt128 * UInt128, better cast to Float64
|
||||
NearestFieldType<T>>>;
|
||||
|
||||
template <class T, class U>
|
||||
using MaxFieldType = std::conditional_t<(sizeof(AvgWeightedFieldType<T>) > sizeof(AvgWeightedFieldType<U>)),
|
||||
AvgWeightedFieldType<T>, AvgWeightedFieldType<U>>;
|
||||
|
||||
template <class Value, class Weight>
|
||||
class AggregateFunctionAvgWeighted final :
|
||||
public AggregateFunctionAvgBase<
|
||||
MaxFieldType<Value, Weight>, AvgWeightedFieldType<Weight>, AggregateFunctionAvgWeighted<Value, Weight>>
|
||||
{
|
||||
public:
|
||||
using AggregateFunctionAvgBase<T, Data, AggregateFunctionAvgWeighted<T, Data>>::AggregateFunctionAvgBase;
|
||||
using Base = AggregateFunctionAvgBase<
|
||||
MaxFieldType<Value, Weight>, AvgWeightedFieldType<Weight>, AggregateFunctionAvgWeighted<Value, Weight>>;
|
||||
using Base::Base;
|
||||
|
||||
using ValueT = MaxFieldType<Value, Weight>;
|
||||
|
||||
using ColVecType = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
|
||||
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
const auto & values = static_cast<const ColVecType &>(*columns[0]);
|
||||
const auto & weights = static_cast<const ColVecType &>(*columns[1]);
|
||||
const auto& weights = static_cast<const DecimalOrVectorCol<Weight> &>(*columns[1]);
|
||||
|
||||
this->data(place).numerator += static_cast<typename Data::NumeratorType>(values.getData()[row_num]) * weights.getData()[row_num];
|
||||
this->data(place).denominator += weights.getData()[row_num];
|
||||
this->data(place).numerator += static_cast<ValueT>(
|
||||
static_cast<const DecimalOrVectorCol<Value> &>(*columns[0]).getData()[row_num]) *
|
||||
static_cast<ValueT>(weights.getData()[row_num]);
|
||||
|
||||
this->data(place).denominator += static_cast<AvgWeightedFieldType<Weight>>(weights.getData()[row_num]);
|
||||
}
|
||||
|
||||
String getName() const override { return "avgWeighted"; }
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -21,7 +21,8 @@ class IDataType;
|
||||
using DataTypePtr = std::shared_ptr<const IDataType>;
|
||||
using DataTypes = std::vector<DataTypePtr>;
|
||||
|
||||
/** Creator have arguments: name of aggregate function, types of arguments, values of parameters.
|
||||
/**
|
||||
* The invoker has arguments: name of aggregate function, types of arguments, values of parameters.
|
||||
* Parameters are for "parametric" aggregate functions.
|
||||
* For example, in quantileWeighted(0.9)(x, weight), 0.9 is "parameter" and x, weight are "arguments".
|
||||
*/
|
||||
@ -87,7 +88,6 @@ private:
|
||||
|
||||
std::optional<AggregateFunctionProperties> tryGetPropertiesImpl(const String & name) const;
|
||||
|
||||
private:
|
||||
using AggregateFunctions = std::unordered_map<String, Value>;
|
||||
|
||||
AggregateFunctions aggregate_functions;
|
||||
|
37
src/AggregateFunctions/AggregateFunctionMannWhitney.cpp
Normal file
37
src/AggregateFunctions/AggregateFunctionMannWhitney.cpp
Normal file
@ -0,0 +1,37 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionMannWhitney.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include "registerAggregateFunctions.h"
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionMannWhitneyUTest(const std::string & name, const DataTypes & argument_types, const Array & parameters)
|
||||
{
|
||||
assertBinary(name, argument_types);
|
||||
|
||||
if (!isNumber(argument_types[0]) || !isNumber(argument_types[1]))
|
||||
throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::NOT_IMPLEMENTED);
|
||||
|
||||
return std::make_shared<AggregateFunctionMannWhitney>(argument_types, parameters);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
void registerAggregateFunctionMannWhitney(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("mannWhitneyUTest", createAggregateFunctionMannWhitneyUTest);
|
||||
}
|
||||
|
||||
}
|
246
src/AggregateFunctions/AggregateFunctionMannWhitney.h
Normal file
246
src/AggregateFunctions/AggregateFunctionMannWhitney.h
Normal file
@ -0,0 +1,246 @@
|
||||
#pragma once
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/StatCommon.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/FieldVisitors.h>
|
||||
#include <Common/PODArray_fwd.h>
|
||||
#include <common/types.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <limits>
|
||||
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
|
||||
#include <Common/ArenaAllocator.h>
|
||||
|
||||
#include <iostream>
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
|
||||
struct MannWhitneyData : public StatisticalSample<Float64, Float64>
|
||||
{
|
||||
/*Since null hypothesis is "for randomly selected values X and Y from two populations,
|
||||
*the probability of X being greater than Y is equal to the probability of Y being greater than X".
|
||||
*Or "the distribution F of first sample equals to the distribution G of second sample".
|
||||
*Then alternative for this hypothesis (H1) is "two-sided"(F != G), "less"(F < G), "greater" (F > G). */
|
||||
enum class Alternative
|
||||
{
|
||||
TwoSided,
|
||||
Less,
|
||||
Greater
|
||||
};
|
||||
|
||||
/// The behaviour equals to the similar function from scipy.
|
||||
/// https://github.com/scipy/scipy/blob/ab9e9f17e0b7b2d618c4d4d8402cd4c0c200d6c0/scipy/stats/stats.py#L6978
|
||||
std::pair<Float64, Float64> getResult(Alternative alternative, bool continuity_correction)
|
||||
{
|
||||
ConcatenatedSamples both(this->x, this->y);
|
||||
RanksArray ranks;
|
||||
Float64 tie_correction;
|
||||
|
||||
/// Compute ranks according to both samples.
|
||||
std::tie(ranks, tie_correction) = computeRanksAndTieCorrection(both);
|
||||
|
||||
const Float64 n1 = this->size_x;
|
||||
const Float64 n2 = this->size_y;
|
||||
|
||||
Float64 r1 = 0;
|
||||
for (size_t i = 0; i < n1; ++i)
|
||||
r1 += ranks[i];
|
||||
|
||||
const Float64 u1 = n1 * n2 + (n1 * (n1 + 1.)) / 2. - r1;
|
||||
const Float64 u2 = n1 * n2 - u1;
|
||||
|
||||
/// The distribution of U-statistic under null hypothesis H0 is symmetric with respect to meanrank.
|
||||
const Float64 meanrank = n1 * n2 /2. + 0.5 * continuity_correction;
|
||||
const Float64 sd = std::sqrt(tie_correction * n1 * n2 * (n1 + n2 + 1) / 12.0);
|
||||
|
||||
Float64 u = 0;
|
||||
if (alternative == Alternative::TwoSided)
|
||||
/// There is no difference which u_i to take as u, because z will be differ only in sign and we take std::abs() from it.
|
||||
u = std::max(u1, u2);
|
||||
else if (alternative == Alternative::Less)
|
||||
u = u1;
|
||||
else if (alternative == Alternative::Greater)
|
||||
u = u2;
|
||||
|
||||
Float64 z = (u - meanrank) / sd;
|
||||
if (alternative == Alternative::TwoSided)
|
||||
z = std::abs(z);
|
||||
|
||||
/// In fact cdf is a probability function, so it is intergral of density from (-inf, z].
|
||||
/// But since standard normal distribution is symmetric, cdf(0) = 0.5 and we have to compute integral from [0, z].
|
||||
const Float64 cdf = integrateSimpson(0, z, [] (Float64 t) { return std::pow(M_E, -0.5 * t * t) / std::sqrt(2 * M_PI);});
|
||||
|
||||
Float64 p_value = 0;
|
||||
if (alternative == Alternative::TwoSided)
|
||||
p_value = 1 - 2 * cdf;
|
||||
else
|
||||
p_value = 0.5 - cdf;
|
||||
|
||||
return {u2, p_value};
|
||||
}
|
||||
|
||||
private:
|
||||
using Sample = typename StatisticalSample<Float64, Float64>::SampleX;
|
||||
|
||||
/// We need to compute ranks according to all samples. Use this class to avoid extra copy and memory allocation.
|
||||
class ConcatenatedSamples
|
||||
{
|
||||
public:
|
||||
ConcatenatedSamples(const Sample & first_, const Sample & second_)
|
||||
: first(first_), second(second_) {}
|
||||
|
||||
const Float64 & operator[](size_t ind) const
|
||||
{
|
||||
if (ind < first.size())
|
||||
return first[ind];
|
||||
return second[ind % first.size()];
|
||||
}
|
||||
|
||||
size_t size() const
|
||||
{
|
||||
return first.size() + second.size();
|
||||
}
|
||||
|
||||
private:
|
||||
const Sample & first;
|
||||
const Sample & second;
|
||||
};
|
||||
};
|
||||
|
||||
class AggregateFunctionMannWhitney final:
|
||||
public IAggregateFunctionDataHelper<MannWhitneyData, AggregateFunctionMannWhitney>
|
||||
{
|
||||
private:
|
||||
using Alternative = typename MannWhitneyData::Alternative;
|
||||
Alternative alternative;
|
||||
bool continuity_correction{true};
|
||||
|
||||
public:
|
||||
explicit AggregateFunctionMannWhitney(const DataTypes & arguments, const Array & params)
|
||||
:IAggregateFunctionDataHelper<MannWhitneyData, AggregateFunctionMannWhitney> ({arguments}, {})
|
||||
{
|
||||
if (params.size() > 2)
|
||||
throw Exception("Aggregate function " + getName() + " require two parameter or less", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
if (params.empty())
|
||||
{
|
||||
alternative = Alternative::TwoSided;
|
||||
return;
|
||||
}
|
||||
|
||||
if (params[0].getType() != Field::Types::String)
|
||||
throw Exception("Aggregate function " + getName() + " require require first parameter to be a String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
auto param = params[0].get<String>();
|
||||
if (param == "two-sided")
|
||||
alternative = Alternative::TwoSided;
|
||||
else if (param == "less")
|
||||
alternative = Alternative::Less;
|
||||
else if (param == "greater")
|
||||
alternative = Alternative::Greater;
|
||||
else
|
||||
throw Exception("Unknown parameter in aggregate function " + getName() +
|
||||
". It must be one of: 'two sided', 'less', 'greater'", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
if (params.size() != 2)
|
||||
return;
|
||||
|
||||
if (params[1].getType() != Field::Types::UInt64)
|
||||
throw Exception("Aggregate function " + getName() + " require require second parameter to be a UInt64", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
continuity_correction = static_cast<bool>(params[1].get<UInt64>());
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return "mannWhitneyUTest";
|
||||
}
|
||||
|
||||
DataTypePtr getReturnType() const override
|
||||
{
|
||||
DataTypes types
|
||||
{
|
||||
std::make_shared<DataTypeNumber<Float64>>(),
|
||||
std::make_shared<DataTypeNumber<Float64>>(),
|
||||
};
|
||||
|
||||
Strings names
|
||||
{
|
||||
"u_statistic",
|
||||
"p_value"
|
||||
};
|
||||
|
||||
return std::make_shared<DataTypeTuple>(
|
||||
std::move(types),
|
||||
std::move(names)
|
||||
);
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
Float64 value = columns[0]->getFloat64(row_num);
|
||||
UInt8 is_second = columns[1]->getUInt(row_num);
|
||||
|
||||
if (is_second)
|
||||
this->data(place).addY(value, arena);
|
||||
else
|
||||
this->data(place).addX(value, arena);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
auto & a = this->data(place);
|
||||
auto & b = this->data(rhs);
|
||||
|
||||
a.merge(b, arena);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
|
||||
{
|
||||
this->data(place).write(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
|
||||
{
|
||||
this->data(place).read(buf, arena);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
|
||||
{
|
||||
if (!this->data(place).size_x || !this->data(place).size_y)
|
||||
throw Exception("Aggregate function " + getName() + " require both samples to be non empty", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
auto [u_statistic, p_value] = this->data(place).getResult(alternative, continuity_correction);
|
||||
|
||||
/// Because p-value is a probability.
|
||||
p_value = std::min(1.0, std::max(0.0, p_value));
|
||||
|
||||
auto & column_tuple = assert_cast<ColumnTuple &>(to);
|
||||
auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
|
||||
auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
|
||||
|
||||
column_stat.getData().push_back(u_statistic);
|
||||
column_value.getData().push_back(p_value);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
};
|
@ -21,23 +21,10 @@ AggregateFunctionPtr createAggregateFunctionRankCorrelation(const std::string &
|
||||
assertBinary(name, argument_types);
|
||||
assertNoParameters(name, parameters);
|
||||
|
||||
AggregateFunctionPtr res;
|
||||
|
||||
if (isDecimal(argument_types[0]) || isDecimal(argument_types[1]))
|
||||
{
|
||||
if (!isNumber(argument_types[0]) || !isNumber(argument_types[1]))
|
||||
throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
else
|
||||
{
|
||||
res.reset(createWithTwoNumericTypes<AggregateFunctionRankCorrelation>(*argument_types[0], *argument_types[1], argument_types));
|
||||
}
|
||||
|
||||
if (!res)
|
||||
{
|
||||
throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
return res;
|
||||
return std::make_shared<AggregateFunctionRankCorrelation>(argument_types);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,73 +1,56 @@
|
||||
#pragma once
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/StatCommon.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/FieldVisitors.h>
|
||||
#include <Common/PODArray_fwd.h>
|
||||
#include <common/types.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <limits>
|
||||
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
|
||||
#include <Common/ArenaAllocator.h>
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
template <template <typename> class Comparator>
|
||||
struct ComparePairFirst final
|
||||
|
||||
struct RankCorrelationData : public StatisticalSample<Float64, Float64>
|
||||
{
|
||||
template <typename X, typename Y>
|
||||
bool operator()(const std::pair<X, Y> & lhs, const std::pair<X, Y> & rhs) const
|
||||
Float64 getResult()
|
||||
{
|
||||
return Comparator<X>{}(lhs.first, rhs.first);
|
||||
RanksArray ranks_x;
|
||||
std::tie(ranks_x, std::ignore) = computeRanksAndTieCorrection(this->x);
|
||||
|
||||
RanksArray ranks_y;
|
||||
std::tie(ranks_y, std::ignore) = computeRanksAndTieCorrection(this->y);
|
||||
|
||||
/// In our case sizes of both samples are equal.
|
||||
const auto size = this->size_x;
|
||||
|
||||
/// Count d^2 sum
|
||||
Float64 answer = 0;
|
||||
for (size_t j = 0; j < size; ++j)
|
||||
answer += (ranks_x[j] - ranks_y[j]) * (ranks_x[j] - ranks_y[j]);
|
||||
|
||||
answer *= 6;
|
||||
answer /= size * (size * size - 1);
|
||||
answer = 1 - answer;
|
||||
return answer;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <template <typename> class Comparator>
|
||||
struct ComparePairSecond final
|
||||
{
|
||||
template <typename X, typename Y>
|
||||
bool operator()(const std::pair<X, Y> & lhs, const std::pair<X, Y> & rhs) const
|
||||
{
|
||||
return Comparator<Y>{}(lhs.second, rhs.second);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename X = Float64, typename Y = Float64>
|
||||
struct AggregateFunctionRankCorrelationData final
|
||||
{
|
||||
size_t size_x = 0;
|
||||
|
||||
using Allocator = MixedAlignedArenaAllocator<alignof(std::pair<X, Y>), 4096>;
|
||||
using Array = PODArray<std::pair<X, Y>, 32, Allocator>;
|
||||
|
||||
Array values;
|
||||
};
|
||||
|
||||
template <typename X, typename Y>
|
||||
class AggregateFunctionRankCorrelation :
|
||||
public IAggregateFunctionDataHelper<AggregateFunctionRankCorrelationData<X, Y>, AggregateFunctionRankCorrelation<X, Y>>
|
||||
public IAggregateFunctionDataHelper<RankCorrelationData, AggregateFunctionRankCorrelation>
|
||||
{
|
||||
using Data = AggregateFunctionRankCorrelationData<X, Y>;
|
||||
using Allocator = MixedAlignedArenaAllocator<alignof(std::pair<Float64, Float64>), 4096>;
|
||||
using Array = PODArray<std::pair<Float64, Float64>, 32, Allocator>;
|
||||
|
||||
public:
|
||||
explicit AggregateFunctionRankCorrelation(const DataTypes & arguments)
|
||||
:IAggregateFunctionDataHelper<AggregateFunctionRankCorrelationData<X, Y>,AggregateFunctionRankCorrelation<X, Y>> ({arguments}, {})
|
||||
:IAggregateFunctionDataHelper<RankCorrelationData, AggregateFunctionRankCorrelation> ({arguments}, {})
|
||||
{}
|
||||
|
||||
String getName() const override
|
||||
@ -80,24 +63,12 @@ public:
|
||||
return std::make_shared<DataTypeNumber<Float64>>();
|
||||
}
|
||||
|
||||
void insert(Data & a, const std::pair<X, Y> & x, Arena * arena) const
|
||||
{
|
||||
++a.size_x;
|
||||
a.values.push_back(x, arena);
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
auto & a = this->data(place);
|
||||
|
||||
auto new_x = assert_cast<const ColumnVector<X> &>(*columns[0]).getData()[row_num];
|
||||
auto new_y = assert_cast<const ColumnVector<Y> &>(*columns[1]).getData()[row_num];
|
||||
|
||||
auto new_arg = std::make_pair(new_x, new_y);
|
||||
|
||||
a.size_x += 1;
|
||||
|
||||
a.values.push_back(new_arg, arena);
|
||||
Float64 new_x = columns[0]->getFloat64(row_num);
|
||||
Float64 new_y = columns[1]->getFloat64(row_num);
|
||||
this->data(place).addX(new_x, arena);
|
||||
this->data(place).addY(new_y, arena);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
@ -105,116 +76,22 @@ public:
|
||||
auto & a = this->data(place);
|
||||
auto & b = this->data(rhs);
|
||||
|
||||
if (b.size_x)
|
||||
for (size_t i = 0; i < b.size_x; ++i)
|
||||
insert(a, b.values[i], arena);
|
||||
a.merge(b, arena);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
|
||||
{
|
||||
const auto & value = this->data(place).values;
|
||||
size_t size = this->data(place).size_x;
|
||||
writeVarUInt(size, buf);
|
||||
buf.write(reinterpret_cast<const char *>(value.data()), size * sizeof(value[0]));
|
||||
this->data(place).write(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
|
||||
{
|
||||
size_t size = 0;
|
||||
readVarUInt(size, buf);
|
||||
|
||||
auto & value = this->data(place).values;
|
||||
|
||||
value.resize(size, arena);
|
||||
buf.read(reinterpret_cast<char *>(value.data()), size * sizeof(value[0]));
|
||||
this->data(place).read(buf, arena);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * /*arena*/) const override
|
||||
void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
|
||||
{
|
||||
const auto & value = this->data(place).values;
|
||||
size_t size = this->data(place).size_x;
|
||||
|
||||
// create a copy of values not to format data
|
||||
PODArrayWithStackMemory<std::pair<Float64, Float64>, 32> tmp_values;
|
||||
tmp_values.resize(size);
|
||||
for (size_t j = 0; j < size; ++ j)
|
||||
tmp_values[j] = static_cast<std::pair<Float64, Float64>>(value[j]);
|
||||
|
||||
// sort x_values
|
||||
std::sort(std::begin(tmp_values), std::end(tmp_values), ComparePairFirst<std::greater>{});
|
||||
|
||||
for (size_t j = 0; j < size;)
|
||||
{
|
||||
// replace x_values with their ranks
|
||||
size_t rank = j + 1;
|
||||
size_t same = 1;
|
||||
size_t cur_sum = rank;
|
||||
size_t cur_start = j;
|
||||
|
||||
while (j < size - 1)
|
||||
{
|
||||
if (tmp_values[j].first == tmp_values[j + 1].first)
|
||||
{
|
||||
// rank of (j + 1)th number
|
||||
rank += 1;
|
||||
++same;
|
||||
cur_sum += rank;
|
||||
++j;
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
// insert rank is calculated as average of ranks of equal values
|
||||
Float64 insert_rank = static_cast<Float64>(cur_sum) / same;
|
||||
for (size_t i = cur_start; i <= j; ++i)
|
||||
tmp_values[i].first = insert_rank;
|
||||
++j;
|
||||
}
|
||||
|
||||
// sort y_values
|
||||
std::sort(std::begin(tmp_values), std::end(tmp_values), ComparePairSecond<std::greater>{});
|
||||
|
||||
// replace y_values with their ranks
|
||||
for (size_t j = 0; j < size;)
|
||||
{
|
||||
// replace x_values with their ranks
|
||||
size_t rank = j + 1;
|
||||
size_t same = 1;
|
||||
size_t cur_sum = rank;
|
||||
size_t cur_start = j;
|
||||
|
||||
while (j < size - 1)
|
||||
{
|
||||
if (tmp_values[j].second == tmp_values[j + 1].second)
|
||||
{
|
||||
// rank of (j + 1)th number
|
||||
rank += 1;
|
||||
++same;
|
||||
cur_sum += rank;
|
||||
++j;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// insert rank is calculated as average of ranks of equal values
|
||||
Float64 insert_rank = static_cast<Float64>(cur_sum) / same;
|
||||
for (size_t i = cur_start; i <= j; ++i)
|
||||
tmp_values[i].second = insert_rank;
|
||||
++j;
|
||||
}
|
||||
|
||||
// count d^2 sum
|
||||
Float64 answer = static_cast<Float64>(0);
|
||||
for (size_t j = 0; j < size; ++ j)
|
||||
answer += (tmp_values[j].first - tmp_values[j].second) * (tmp_values[j].first - tmp_values[j].second);
|
||||
|
||||
answer *= 6;
|
||||
answer /= size * (size * size - 1);
|
||||
answer = 1 - answer;
|
||||
auto answer = this->data(place).getResult();
|
||||
|
||||
auto & column = static_cast<ColumnVector<Float64> &>(to);
|
||||
column.getData().push_back(answer);
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/Moments.h>
|
||||
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
@ -30,310 +31,6 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int DECIMAL_OVERFLOW;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Calculating univariate central moments
|
||||
Levels:
|
||||
level 2 (pop & samp): var, stddev
|
||||
level 3: skewness
|
||||
level 4: kurtosis
|
||||
References:
|
||||
https://en.wikipedia.org/wiki/Moment_(mathematics)
|
||||
https://en.wikipedia.org/wiki/Skewness
|
||||
https://en.wikipedia.org/wiki/Kurtosis
|
||||
*/
|
||||
template <typename T, size_t _level>
|
||||
struct VarMoments
|
||||
{
|
||||
T m[_level + 1]{};
|
||||
|
||||
void add(T x)
|
||||
{
|
||||
++m[0];
|
||||
m[1] += x;
|
||||
m[2] += x * x;
|
||||
if constexpr (_level >= 3) m[3] += x * x * x;
|
||||
if constexpr (_level >= 4) m[4] += x * x * x * x;
|
||||
}
|
||||
|
||||
void merge(const VarMoments & rhs)
|
||||
{
|
||||
m[0] += rhs.m[0];
|
||||
m[1] += rhs.m[1];
|
||||
m[2] += rhs.m[2];
|
||||
if constexpr (_level >= 3) m[3] += rhs.m[3];
|
||||
if constexpr (_level >= 4) m[4] += rhs.m[4];
|
||||
}
|
||||
|
||||
void write(WriteBuffer & buf) const
|
||||
{
|
||||
writePODBinary(*this, buf);
|
||||
}
|
||||
|
||||
void read(ReadBuffer & buf)
|
||||
{
|
||||
readPODBinary(*this, buf);
|
||||
}
|
||||
|
||||
T getPopulation() const
|
||||
{
|
||||
if (m[0] == 0)
|
||||
return std::numeric_limits<T>::quiet_NaN();
|
||||
|
||||
/// Due to numerical errors, the result can be slightly less than zero,
|
||||
/// but it should be impossible. Trim to zero.
|
||||
|
||||
return std::max(T{}, (m[2] - m[1] * m[1] / m[0]) / m[0]);
|
||||
}
|
||||
|
||||
T getSample() const
|
||||
{
|
||||
if (m[0] <= 1)
|
||||
return std::numeric_limits<T>::quiet_NaN();
|
||||
return std::max(T{}, (m[2] - m[1] * m[1] / m[0]) / (m[0] - 1));
|
||||
}
|
||||
|
||||
T getMoment3() const
|
||||
{
|
||||
if (m[0] == 0)
|
||||
return std::numeric_limits<T>::quiet_NaN();
|
||||
// to avoid accuracy problem
|
||||
if (m[0] == 1)
|
||||
return 0;
|
||||
return (m[3]
|
||||
- (3 * m[2]
|
||||
- 2 * m[1] * m[1] / m[0]
|
||||
) * m[1] / m[0]
|
||||
) / m[0];
|
||||
}
|
||||
|
||||
T getMoment4() const
|
||||
{
|
||||
if (m[0] == 0)
|
||||
return std::numeric_limits<T>::quiet_NaN();
|
||||
// to avoid accuracy problem
|
||||
if (m[0] == 1)
|
||||
return 0;
|
||||
return (m[4]
|
||||
- (4 * m[3]
|
||||
- (6 * m[2]
|
||||
- 3 * m[1] * m[1] / m[0]
|
||||
) * m[1] / m[0]
|
||||
) * m[1] / m[0]
|
||||
) / m[0];
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, size_t _level>
|
||||
class VarMomentsDecimal
|
||||
{
|
||||
public:
|
||||
using NativeType = typename T::NativeType;
|
||||
|
||||
void add(NativeType x)
|
||||
{
|
||||
++m0;
|
||||
getM(1) += x;
|
||||
|
||||
NativeType tmp;
|
||||
bool overflow = common::mulOverflow(x, x, tmp) || common::addOverflow(getM(2), tmp, getM(2));
|
||||
if constexpr (_level >= 3)
|
||||
overflow = overflow || common::mulOverflow(tmp, x, tmp) || common::addOverflow(getM(3), tmp, getM(3));
|
||||
if constexpr (_level >= 4)
|
||||
overflow = overflow || common::mulOverflow(tmp, x, tmp) || common::addOverflow(getM(4), tmp, getM(4));
|
||||
|
||||
if (overflow)
|
||||
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
|
||||
}
|
||||
|
||||
void merge(const VarMomentsDecimal & rhs)
|
||||
{
|
||||
m0 += rhs.m0;
|
||||
getM(1) += rhs.getM(1);
|
||||
|
||||
bool overflow = common::addOverflow(getM(2), rhs.getM(2), getM(2));
|
||||
if constexpr (_level >= 3)
|
||||
overflow = overflow || common::addOverflow(getM(3), rhs.getM(3), getM(3));
|
||||
if constexpr (_level >= 4)
|
||||
overflow = overflow || common::addOverflow(getM(4), rhs.getM(4), getM(4));
|
||||
|
||||
if (overflow)
|
||||
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
|
||||
}
|
||||
|
||||
void write(WriteBuffer & buf) const { writePODBinary(*this, buf); }
|
||||
void read(ReadBuffer & buf) { readPODBinary(*this, buf); }
|
||||
|
||||
Float64 getPopulation(UInt32 scale) const
|
||||
{
|
||||
if (m0 == 0)
|
||||
return std::numeric_limits<Float64>::infinity();
|
||||
|
||||
NativeType tmp;
|
||||
if (common::mulOverflow(getM(1), getM(1), tmp) ||
|
||||
common::subOverflow(getM(2), NativeType(tmp / m0), tmp))
|
||||
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
|
||||
return std::max(Float64{}, DecimalUtils::convertTo<Float64>(T(tmp / m0), scale));
|
||||
}
|
||||
|
||||
Float64 getSample(UInt32 scale) const
|
||||
{
|
||||
if (m0 == 0)
|
||||
return std::numeric_limits<Float64>::quiet_NaN();
|
||||
if (m0 == 1)
|
||||
return std::numeric_limits<Float64>::infinity();
|
||||
|
||||
NativeType tmp;
|
||||
if (common::mulOverflow(getM(1), getM(1), tmp) ||
|
||||
common::subOverflow(getM(2), NativeType(tmp / m0), tmp))
|
||||
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
|
||||
return std::max(Float64{}, DecimalUtils::convertTo<Float64>(T(tmp / (m0 - 1)), scale));
|
||||
}
|
||||
|
||||
Float64 getMoment3(UInt32 scale) const
|
||||
{
|
||||
if (m0 == 0)
|
||||
return std::numeric_limits<Float64>::infinity();
|
||||
|
||||
NativeType tmp;
|
||||
if (common::mulOverflow(2 * getM(1), getM(1), tmp) ||
|
||||
common::subOverflow(3 * getM(2), NativeType(tmp / m0), tmp) ||
|
||||
common::mulOverflow(tmp, getM(1), tmp) ||
|
||||
common::subOverflow(getM(3), NativeType(tmp / m0), tmp))
|
||||
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
|
||||
return DecimalUtils::convertTo<Float64>(T(tmp / m0), scale);
|
||||
}
|
||||
|
||||
Float64 getMoment4(UInt32 scale) const
|
||||
{
|
||||
if (m0 == 0)
|
||||
return std::numeric_limits<Float64>::infinity();
|
||||
|
||||
NativeType tmp;
|
||||
if (common::mulOverflow(3 * getM(1), getM(1), tmp) ||
|
||||
common::subOverflow(6 * getM(2), NativeType(tmp / m0), tmp) ||
|
||||
common::mulOverflow(tmp, getM(1), tmp) ||
|
||||
common::subOverflow(4 * getM(3), NativeType(tmp / m0), tmp) ||
|
||||
common::mulOverflow(tmp, getM(1), tmp) ||
|
||||
common::subOverflow(getM(4), NativeType(tmp / m0), tmp))
|
||||
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
|
||||
return DecimalUtils::convertTo<Float64>(T(tmp / m0), scale);
|
||||
}
|
||||
|
||||
private:
|
||||
UInt64 m0{};
|
||||
NativeType m[_level]{};
|
||||
|
||||
NativeType & getM(size_t i) { return m[i - 1]; }
|
||||
const NativeType & getM(size_t i) const { return m[i - 1]; }
|
||||
};
|
||||
|
||||
/**
|
||||
Calculating multivariate central moments
|
||||
Levels:
|
||||
level 2 (pop & samp): covar
|
||||
References:
|
||||
https://en.wikipedia.org/wiki/Moment_(mathematics)
|
||||
*/
|
||||
template <typename T>
|
||||
struct CovarMoments
|
||||
{
|
||||
T m0{};
|
||||
T x1{};
|
||||
T y1{};
|
||||
T xy{};
|
||||
|
||||
void add(T x, T y)
|
||||
{
|
||||
++m0;
|
||||
x1 += x;
|
||||
y1 += y;
|
||||
xy += x * y;
|
||||
}
|
||||
|
||||
void merge(const CovarMoments & rhs)
|
||||
{
|
||||
m0 += rhs.m0;
|
||||
x1 += rhs.x1;
|
||||
y1 += rhs.y1;
|
||||
xy += rhs.xy;
|
||||
}
|
||||
|
||||
void write(WriteBuffer & buf) const
|
||||
{
|
||||
writePODBinary(*this, buf);
|
||||
}
|
||||
|
||||
void read(ReadBuffer & buf)
|
||||
{
|
||||
readPODBinary(*this, buf);
|
||||
}
|
||||
|
||||
T NO_SANITIZE_UNDEFINED getPopulation() const
|
||||
{
|
||||
return (xy - x1 * y1 / m0) / m0;
|
||||
}
|
||||
|
||||
T NO_SANITIZE_UNDEFINED getSample() const
|
||||
{
|
||||
if (m0 == 0)
|
||||
return std::numeric_limits<T>::quiet_NaN();
|
||||
return (xy - x1 * y1 / m0) / (m0 - 1);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct CorrMoments
|
||||
{
|
||||
T m0{};
|
||||
T x1{};
|
||||
T y1{};
|
||||
T xy{};
|
||||
T x2{};
|
||||
T y2{};
|
||||
|
||||
void add(T x, T y)
|
||||
{
|
||||
++m0;
|
||||
x1 += x;
|
||||
y1 += y;
|
||||
xy += x * y;
|
||||
x2 += x * x;
|
||||
y2 += y * y;
|
||||
}
|
||||
|
||||
void merge(const CorrMoments & rhs)
|
||||
{
|
||||
m0 += rhs.m0;
|
||||
x1 += rhs.x1;
|
||||
y1 += rhs.y1;
|
||||
xy += rhs.xy;
|
||||
x2 += rhs.x2;
|
||||
y2 += rhs.y2;
|
||||
}
|
||||
|
||||
void write(WriteBuffer & buf) const
|
||||
{
|
||||
writePODBinary(*this, buf);
|
||||
}
|
||||
|
||||
void read(ReadBuffer & buf)
|
||||
{
|
||||
readPODBinary(*this, buf);
|
||||
}
|
||||
|
||||
T NO_SANITIZE_UNDEFINED get() const
|
||||
{
|
||||
return (m0 * xy - x1 * y1) / sqrt((m0 * x2 - x1 * x1) * (m0 * y2 - y1 * y1));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
enum class StatisticsFunctionKind
|
||||
{
|
||||
varPop, varSamp,
|
||||
|
77
src/AggregateFunctions/AggregateFunctionStudentTTest.cpp
Normal file
77
src/AggregateFunctions/AggregateFunctionStudentTTest.cpp
Normal file
@ -0,0 +1,77 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionTTest.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <AggregateFunctions/Moments.h>
|
||||
|
||||
#include "registerAggregateFunctions.h"
|
||||
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
/** Student T-test applies to two samples of independent random variables
|
||||
* that have normal distributions with equal (but unknown) variances.
|
||||
* It allows to answer the question whether means of the distributions differ.
|
||||
*
|
||||
* If variances are not considered equal, Welch T-test should be used instead.
|
||||
*/
|
||||
struct StudentTTestData : public TTestMoments<Float64>
|
||||
{
|
||||
static constexpr auto name = "studentTTest";
|
||||
|
||||
std::pair<Float64, Float64> getResult() const
|
||||
{
|
||||
Float64 mean_x = x1 / nx;
|
||||
Float64 mean_y = y1 / ny;
|
||||
|
||||
/// To estimate the variance we first estimate two means.
|
||||
/// That's why the number of degrees of freedom is the total number of values of both samples minus 2.
|
||||
Float64 degrees_of_freedom = nx + ny - 2;
|
||||
|
||||
/// Calculate s^2
|
||||
/// The original formulae looks like
|
||||
/// \frac{\sum_{i = 1}^{n_x}{(x_i - \bar{x}) ^ 2} + \sum_{i = 1}^{n_y}{(y_i - \bar{y}) ^ 2}}{n_x + n_y - 2}
|
||||
/// But we made some mathematical transformations not to store original sequences.
|
||||
/// Also we dropped sqrt, because later it will be squared later.
|
||||
|
||||
Float64 all_x = x2 + nx * mean_x * mean_x - 2 * mean_x * x1;
|
||||
Float64 all_y = y2 + ny * mean_y * mean_y - 2 * mean_y * y1;
|
||||
|
||||
Float64 s2 = (all_x + all_y) / degrees_of_freedom;
|
||||
Float64 std_err2 = s2 * (1. / nx + 1. / ny);
|
||||
|
||||
/// t-statistic
|
||||
Float64 t_stat = (mean_x - mean_y) / sqrt(std_err2);
|
||||
|
||||
return {t_stat, getPValue(degrees_of_freedom, t_stat * t_stat)};
|
||||
}
|
||||
};
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionStudentTTest(const std::string & name, const DataTypes & argument_types, const Array & parameters)
|
||||
{
|
||||
assertBinary(name, argument_types);
|
||||
assertNoParameters(name, parameters);
|
||||
|
||||
if (!isNumber(argument_types[0]) || !isNumber(argument_types[1]))
|
||||
throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
return std::make_shared<AggregateFunctionTTest<StudentTTestData>>(argument_types);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionStudentTTest(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("studentTTest", createAggregateFunctionStudentTTest);
|
||||
}
|
||||
|
||||
}
|
154
src/AggregateFunctions/AggregateFunctionTTest.h
Normal file
154
src/AggregateFunctions/AggregateFunctionTTest.h
Normal file
@ -0,0 +1,154 @@
|
||||
#pragma once
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/StatCommon.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Core/Types.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <cmath>
|
||||
|
||||
|
||||
/// This function is used in implementations of different T-Tests.
|
||||
/// On Darwin it's unavailable in math.h but actually exists in the library (can be linked successfully).
|
||||
#if defined(OS_DARWIN)
|
||||
extern "C"
|
||||
{
|
||||
double lgamma_r(double x, int * signgamp);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ReadBuffer;
|
||||
class WriteBuffer;
|
||||
|
||||
/**
|
||||
* If you have a cumulative distribution function F, then calculating the p-value for given statistic T is simply 1−F(T)
|
||||
* In our case p-value is two-sided, so we multiply it by 2.
|
||||
* So cumulative distribution function F equals to
|
||||
* \[ F(t) = \int_{-\infty}^{t} f(u)du = 1 - \frac{1}{2} I_{x(t)}(\frac{v}{2}, \frac{1}{2}) \]
|
||||
* where \[ x(t) = \frac{v}{t^2 + v} \]: https://en.wikipedia.org/wiki/Student%27s_t-distribution#Cumulative_distribution_function
|
||||
*
|
||||
* so our resulting \[ p-value = I_{x(t)}(\frac{v}{2}, \frac{1}{2}) \].
|
||||
*
|
||||
* And I is regularized incomplete beta function: https://en.wikipedia.org/wiki/Beta_function#Incomplete_beta_function
|
||||
*
|
||||
* Keepenig in mind that \[ \mathrm {B} (x;a,b)=\int _{0}^{x}r^{a-1}\,(1-r)^{b-1}\,\mathrm {d} r.\! \]
|
||||
* and
|
||||
* \[ \mathrm {B} (x,y)={\dfrac {\Gamma (x)\,\Gamma (y)}{\Gamma (x+y)}}=\
|
||||
* \exp(\ln {\dfrac {\Gamma (x)\,\Gamma (y)}{\Gamma (x+y)}})=\exp((\ln(\Gamma (x))+\ln(\Gamma (y))-\ln(\Gamma (x+y))) \]
|
||||
*
|
||||
* p-value can be calculated in terms of gamma functions and integrals more simply:
|
||||
* \[ {\frac {\int _{0}^{\frac {\nu }{t^{2}+\nu }}r^{{\frac {\nu }{2}}-1}\,(1-r)^{-0.5}\,\mathrm {d} r}\
|
||||
* {\exp((\ln(\Gamma ({\frac {\nu }{2}}))+\ln(\Gamma (0.5))-\ln(\Gamma ({\frac {\nu }{2}}+0.5)))}} \]
|
||||
*
|
||||
* which simplifies to:
|
||||
*
|
||||
* \[ {\frac {\int _{0}^{\frac {\nu }{t^{2}+\nu }}{\frac {r^{{\frac {\nu }{2}}-1}}{\sqrt {1-r}}}\,\mathrm {d} r}\
|
||||
* {\exp((\ln(\Gamma ({\frac {\nu }{2}}))+\ln(\Gamma (0.5))-\ln(\Gamma ({\frac {\nu }{2}}+0.5)))}} \]
|
||||
*
|
||||
* Read here for details https://rosettacode.org/wiki/Welch%27s_t-test#
|
||||
*
|
||||
* Both WelchTTest and StudentTTest have t-statistric with Student distribution but with different degrees of freedom.
|
||||
* So the procedure of computing p-value is the same.
|
||||
*/
|
||||
static inline Float64 getPValue(Float64 degrees_of_freedom, Float64 t_stat2)
|
||||
{
|
||||
Float64 numerator = integrateSimpson(0, degrees_of_freedom / (t_stat2 + degrees_of_freedom),
|
||||
[degrees_of_freedom](double x) { return std::pow(x, degrees_of_freedom / 2 - 1) / std::sqrt(1 - x); });
|
||||
|
||||
int unused;
|
||||
Float64 denominator = std::exp(
|
||||
lgamma_r(degrees_of_freedom / 2, &unused)
|
||||
+ lgamma_r(0.5, &unused)
|
||||
- lgamma_r(degrees_of_freedom / 2 + 0.5, &unused));
|
||||
|
||||
return std::min(1.0, std::max(0.0, numerator / denominator));
|
||||
}
|
||||
|
||||
|
||||
/// Returns tuple of (t-statistic, p-value)
|
||||
/// https://cpb-us-w2.wpmucdn.com/voices.uchicago.edu/dist/9/1193/files/2016/01/05b-TandP.pdf
|
||||
template <typename Data>
|
||||
class AggregateFunctionTTest :
|
||||
public IAggregateFunctionDataHelper<Data, AggregateFunctionTTest<Data>>
|
||||
{
|
||||
public:
|
||||
AggregateFunctionTTest(const DataTypes & arguments)
|
||||
: IAggregateFunctionDataHelper<Data, AggregateFunctionTTest<Data>>({arguments}, {})
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return Data::name;
|
||||
}
|
||||
|
||||
DataTypePtr getReturnType() const override
|
||||
{
|
||||
DataTypes types
|
||||
{
|
||||
std::make_shared<DataTypeNumber<Float64>>(),
|
||||
std::make_shared<DataTypeNumber<Float64>>(),
|
||||
};
|
||||
|
||||
Strings names
|
||||
{
|
||||
"t_statistic",
|
||||
"p_value"
|
||||
};
|
||||
|
||||
return std::make_shared<DataTypeTuple>(
|
||||
std::move(types),
|
||||
std::move(names)
|
||||
);
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
Float64 value = columns[0]->getFloat64(row_num);
|
||||
UInt8 is_second = columns[1]->getUInt(row_num);
|
||||
|
||||
if (is_second)
|
||||
this->data(place).addY(value);
|
||||
else
|
||||
this->data(place).addX(value);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
this->data(place).merge(this->data(rhs));
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
|
||||
{
|
||||
this->data(place).write(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
|
||||
{
|
||||
this->data(place).read(buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
|
||||
{
|
||||
auto [t_statistic, p_value] = this->data(place).getResult();
|
||||
|
||||
/// Because p-value is a probability.
|
||||
p_value = std::min(1.0, std::max(0.0, p_value));
|
||||
|
||||
auto & column_tuple = assert_cast<ColumnTuple &>(to);
|
||||
auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
|
||||
auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
|
||||
|
||||
column_stat.getData().push_back(t_statistic);
|
||||
column_value.getData().push_back(p_value);
|
||||
}
|
||||
};
|
||||
|
||||
};
|
@ -1,35 +0,0 @@
|
||||
#include "AggregateFunctionTimeSeriesGroupSum.h"
|
||||
#include "AggregateFunctionFactory.h"
|
||||
#include "FactoryHelpers.h"
|
||||
#include "Helpers.h"
|
||||
#include "registerAggregateFunctions.h"
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
namespace
|
||||
{
|
||||
template <bool rate>
|
||||
AggregateFunctionPtr createAggregateFunctionTimeSeriesGroupSum(const std::string & name, const DataTypes & arguments, const Array & params)
|
||||
{
|
||||
assertNoParameters(name, params);
|
||||
|
||||
if (arguments.size() < 3)
|
||||
throw Exception("Not enough event arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
return std::make_shared<AggregateFunctionTimeSeriesGroupSum<rate>>(arguments);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionTimeSeriesGroupSum(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("timeSeriesGroupSum", createAggregateFunctionTimeSeriesGroupSum<false>);
|
||||
factory.registerFunction("timeSeriesGroupRateSum", createAggregateFunctionTimeSeriesGroupSum<true>);
|
||||
}
|
||||
|
||||
}
|
@ -1,291 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <bitset>
|
||||
#include <map>
|
||||
#include <queue>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/ArenaAllocator.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <ext/range.h>
|
||||
#include "IAggregateFunction.h"
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
template <bool rate>
|
||||
struct AggregateFunctionTimeSeriesGroupSumData
|
||||
{
|
||||
using DataPoint = std::pair<Int64, Float64>;
|
||||
struct Points
|
||||
{
|
||||
using Dps = std::queue<DataPoint>;
|
||||
Dps dps;
|
||||
void add(Int64 t, Float64 v)
|
||||
{
|
||||
dps.push(std::make_pair(t, v));
|
||||
if (dps.size() > 2)
|
||||
dps.pop();
|
||||
}
|
||||
Float64 getval(Int64 t)
|
||||
{
|
||||
Int64 t1, t2;
|
||||
Float64 v1, v2;
|
||||
if (rate)
|
||||
{
|
||||
if (dps.size() < 2)
|
||||
return 0;
|
||||
t1 = dps.back().first;
|
||||
t2 = dps.front().first;
|
||||
v1 = dps.back().second;
|
||||
v2 = dps.front().second;
|
||||
return (v1 - v2) / Float64(t1 - t2);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (dps.size() == 1 && t == dps.front().first)
|
||||
return dps.front().second;
|
||||
t1 = dps.back().first;
|
||||
t2 = dps.front().first;
|
||||
v1 = dps.back().second;
|
||||
v2 = dps.front().second;
|
||||
return v2 + ((v1 - v2) * Float64(t - t2)) / Float64(t1 - t2);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::map<UInt64, Points> Series;
|
||||
typedef PODArrayWithStackMemory<DataPoint, 128> AggSeries;
|
||||
Series ss;
|
||||
AggSeries result;
|
||||
|
||||
void add(UInt64 uid, Int64 t, Float64 v)
|
||||
{ //suppose t is coming asc
|
||||
typename Series::iterator it_ss;
|
||||
if (ss.count(uid) == 0)
|
||||
{ //time series not exist, insert new one
|
||||
Points tmp;
|
||||
tmp.add(t, v);
|
||||
ss.emplace(uid, tmp);
|
||||
it_ss = ss.find(uid);
|
||||
}
|
||||
else
|
||||
{
|
||||
it_ss = ss.find(uid);
|
||||
it_ss->second.add(t, v);
|
||||
}
|
||||
if (result.size() > 0 && t < result.back().first)
|
||||
throw Exception{"timeSeriesGroupSum or timeSeriesGroupRateSum must order by timestamp asc.", ErrorCodes::LOGICAL_ERROR};
|
||||
if (result.size() > 0 && t == result.back().first)
|
||||
{
|
||||
//do not add new point
|
||||
if (rate)
|
||||
result.back().second += it_ss->second.getval(t);
|
||||
else
|
||||
result.back().second += v;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (rate)
|
||||
result.emplace_back(std::make_pair(t, it_ss->second.getval(t)));
|
||||
else
|
||||
result.emplace_back(std::make_pair(t, v));
|
||||
}
|
||||
ssize_t i = result.size() - 1;
|
||||
//reverse find out the index of timestamp that more than previous timestamp of t
|
||||
while (result[i].first > it_ss->second.dps.front().first && i >= 0)
|
||||
i--;
|
||||
|
||||
i++;
|
||||
while (i < ssize_t(result.size()) - 1)
|
||||
{
|
||||
result[i].second += it_ss->second.getval(result[i].first);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
void merge(const AggregateFunctionTimeSeriesGroupSumData & other)
|
||||
{
|
||||
//if ts has overlap, then aggregate two series by interpolation;
|
||||
AggSeries tmp;
|
||||
tmp.reserve(other.result.size() + result.size());
|
||||
size_t i = 0, j = 0;
|
||||
Int64 t1, t2;
|
||||
Float64 v1, v2;
|
||||
while (i < result.size() && j < other.result.size())
|
||||
{
|
||||
if (result[i].first < other.result[j].first)
|
||||
{
|
||||
if (j == 0)
|
||||
{
|
||||
tmp.emplace_back(result[i]);
|
||||
}
|
||||
else
|
||||
{
|
||||
t1 = other.result[j].first;
|
||||
t2 = other.result[j - 1].first;
|
||||
v1 = other.result[j].second;
|
||||
v2 = other.result[j - 1].second;
|
||||
Float64 value = result[i].second + v2 + (v1 - v2) * (Float64(result[i].first - t2)) / Float64(t1 - t2);
|
||||
tmp.emplace_back(std::make_pair(result[i].first, value));
|
||||
}
|
||||
i++;
|
||||
}
|
||||
else if (result[i].first > other.result[j].first)
|
||||
{
|
||||
if (i == 0)
|
||||
{
|
||||
tmp.emplace_back(other.result[j]);
|
||||
}
|
||||
else
|
||||
{
|
||||
t1 = result[i].first;
|
||||
t2 = result[i - 1].first;
|
||||
v1 = result[i].second;
|
||||
v2 = result[i - 1].second;
|
||||
Float64 value = other.result[j].second + v2 + (v1 - v2) * (Float64(other.result[j].first - t2)) / Float64(t1 - t2);
|
||||
tmp.emplace_back(std::make_pair(other.result[j].first, value));
|
||||
}
|
||||
j++;
|
||||
}
|
||||
else
|
||||
{
|
||||
tmp.emplace_back(std::make_pair(result[i].first, result[i].second + other.result[j].second));
|
||||
i++;
|
||||
j++;
|
||||
}
|
||||
}
|
||||
while (i < result.size())
|
||||
{
|
||||
tmp.emplace_back(result[i]);
|
||||
i++;
|
||||
}
|
||||
while (j < other.result.size())
|
||||
{
|
||||
tmp.push_back(other.result[j]);
|
||||
j++;
|
||||
}
|
||||
swap(result, tmp);
|
||||
}
|
||||
|
||||
void serialize(WriteBuffer & buf) const
|
||||
{
|
||||
size_t size = result.size();
|
||||
writeVarUInt(size, buf);
|
||||
if (size > 0)
|
||||
{
|
||||
buf.write(reinterpret_cast<const char *>(result.data()), size * sizeof(result[0]));
|
||||
}
|
||||
}
|
||||
|
||||
void deserialize(ReadBuffer & buf)
|
||||
{
|
||||
size_t size = 0;
|
||||
readVarUInt(size, buf);
|
||||
result.resize(size);
|
||||
if (size > 0)
|
||||
{
|
||||
buf.read(reinterpret_cast<char *>(result.data()), size * sizeof(result[0]));
|
||||
}
|
||||
}
|
||||
};
|
||||
template <bool rate>
|
||||
class AggregateFunctionTimeSeriesGroupSum final
|
||||
: public IAggregateFunctionDataHelper<AggregateFunctionTimeSeriesGroupSumData<rate>, AggregateFunctionTimeSeriesGroupSum<rate>>
|
||||
{
|
||||
private:
|
||||
public:
|
||||
String getName() const override { return rate ? "timeSeriesGroupRateSum" : "timeSeriesGroupSum"; }
|
||||
|
||||
AggregateFunctionTimeSeriesGroupSum(const DataTypes & arguments)
|
||||
: IAggregateFunctionDataHelper<AggregateFunctionTimeSeriesGroupSumData<rate>, AggregateFunctionTimeSeriesGroupSum<rate>>(arguments, {})
|
||||
{
|
||||
if (!WhichDataType(arguments[0].get()).isUInt64())
|
||||
throw Exception{"Illegal type " + arguments[0].get()->getName() + " of argument 1 of aggregate function " + getName()
|
||||
+ ", must be UInt64",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
|
||||
if (!WhichDataType(arguments[1].get()).isInt64())
|
||||
throw Exception{"Illegal type " + arguments[1].get()->getName() + " of argument 2 of aggregate function " + getName()
|
||||
+ ", must be Int64",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
|
||||
if (!WhichDataType(arguments[2].get()).isFloat64())
|
||||
throw Exception{"Illegal type " + arguments[2].get()->getName() + " of argument 3 of aggregate function " + getName()
|
||||
+ ", must be Float64",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
}
|
||||
|
||||
DataTypePtr getReturnType() const override
|
||||
{
|
||||
auto datatypes = std::vector<DataTypePtr>();
|
||||
datatypes.push_back(std::make_shared<DataTypeInt64>());
|
||||
datatypes.push_back(std::make_shared<DataTypeFloat64>());
|
||||
|
||||
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeTuple>(datatypes));
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr place, const IColumn ** columns, const size_t row_num, Arena *) const override
|
||||
{
|
||||
auto uid = assert_cast<const ColumnVector<UInt64> *>(columns[0])->getData()[row_num];
|
||||
auto ts = assert_cast<const ColumnVector<Int64> *>(columns[1])->getData()[row_num];
|
||||
auto val = assert_cast<const ColumnVector<Float64> *>(columns[2])->getData()[row_num];
|
||||
if (uid && ts && val)
|
||||
{
|
||||
this->data(place).add(uid, ts, val);
|
||||
}
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override { this->data(place).merge(this->data(rhs)); }
|
||||
|
||||
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { this->data(place).serialize(buf); }
|
||||
|
||||
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).deserialize(buf); }
|
||||
|
||||
void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
|
||||
{
|
||||
const auto & value = this->data(place).result;
|
||||
size_t size = value.size();
|
||||
|
||||
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
|
||||
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
|
||||
size_t old_size = offsets_to.back();
|
||||
|
||||
offsets_to.push_back(offsets_to.back() + size);
|
||||
|
||||
if (size)
|
||||
{
|
||||
typename ColumnInt64::Container & ts_to
|
||||
= assert_cast<ColumnInt64 &>(assert_cast<ColumnTuple &>(arr_to.getData()).getColumn(0)).getData();
|
||||
typename ColumnFloat64::Container & val_to
|
||||
= assert_cast<ColumnFloat64 &>(assert_cast<ColumnTuple &>(arr_to.getData()).getColumn(1)).getData();
|
||||
ts_to.reserve(old_size + size);
|
||||
val_to.reserve(old_size + size);
|
||||
size_t i = 0;
|
||||
while (i < this->data(place).result.size())
|
||||
{
|
||||
ts_to.push_back(this->data(place).result[i].first);
|
||||
val_to.push_back(this->data(place).result[i].second);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return true; }
|
||||
};
|
||||
}
|
74
src/AggregateFunctions/AggregateFunctionWelchTTest.cpp
Normal file
74
src/AggregateFunctions/AggregateFunctionWelchTTest.cpp
Normal file
@ -0,0 +1,74 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionTTest.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <AggregateFunctions/Moments.h>
|
||||
|
||||
#include "registerAggregateFunctions.h"
|
||||
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
struct WelchTTestData : public TTestMoments<Float64>
|
||||
{
|
||||
static constexpr auto name = "welchTTest";
|
||||
|
||||
std::pair<Float64, Float64> getResult() const
|
||||
{
|
||||
Float64 mean_x = x1 / nx;
|
||||
Float64 mean_y = y1 / ny;
|
||||
|
||||
/// s_x^2, s_y^2
|
||||
|
||||
/// The original formulae looks like \frac{1}{size_x - 1} \sum_{i = 1}^{size_x}{(x_i - \bar{x}) ^ 2}
|
||||
/// But we made some mathematical transformations not to store original sequences.
|
||||
/// Also we dropped sqrt, because later it will be squared later.
|
||||
|
||||
Float64 sx2 = (x2 + nx * mean_x * mean_x - 2 * mean_x * x1) / (nx - 1);
|
||||
Float64 sy2 = (y2 + ny * mean_y * mean_y - 2 * mean_y * y1) / (ny - 1);
|
||||
|
||||
/// t-statistic
|
||||
Float64 t_stat = (mean_x - mean_y) / sqrt(sx2 / nx + sy2 / ny);
|
||||
|
||||
/// degrees of freedom
|
||||
|
||||
Float64 numerator_sqrt = sx2 / nx + sy2 / ny;
|
||||
Float64 numerator = numerator_sqrt * numerator_sqrt;
|
||||
|
||||
Float64 denominator_x = sx2 * sx2 / (nx * nx * (nx - 1));
|
||||
Float64 denominator_y = sy2 * sy2 / (ny * ny * (ny - 1));
|
||||
|
||||
Float64 degrees_of_freedom = numerator / (denominator_x + denominator_y);
|
||||
|
||||
return {t_stat, getPValue(degrees_of_freedom, t_stat * t_stat)};
|
||||
}
|
||||
};
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionWelchTTest(const std::string & name, const DataTypes & argument_types, const Array & parameters)
|
||||
{
|
||||
assertBinary(name, argument_types);
|
||||
assertNoParameters(name, parameters);
|
||||
|
||||
if (!isNumber(argument_types[0]) || !isNumber(argument_types[1]))
|
||||
throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
return std::make_shared<AggregateFunctionTTest<WelchTTestData>>(argument_types);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionWelchTTest(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("welchTTest", createAggregateFunctionWelchTTest);
|
||||
}
|
||||
|
||||
}
|
@ -15,6 +15,7 @@
|
||||
M(Float32) \
|
||||
M(Float64)
|
||||
|
||||
// No UInt128 here because of the name conflict
|
||||
#define FOR_NUMERIC_TYPES(M) \
|
||||
M(UInt8) \
|
||||
M(UInt16) \
|
||||
|
361
src/AggregateFunctions/Moments.h
Normal file
361
src/AggregateFunctions/Moments.h
Normal file
@ -0,0 +1,361 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int DECIMAL_OVERFLOW;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Calculating univariate central moments
|
||||
Levels:
|
||||
level 2 (pop & samp): var, stddev
|
||||
level 3: skewness
|
||||
level 4: kurtosis
|
||||
References:
|
||||
https://en.wikipedia.org/wiki/Moment_(mathematics)
|
||||
https://en.wikipedia.org/wiki/Skewness
|
||||
https://en.wikipedia.org/wiki/Kurtosis
|
||||
*/
|
||||
template <typename T, size_t _level>
|
||||
struct VarMoments
|
||||
{
|
||||
T m[_level + 1]{};
|
||||
|
||||
void add(T x)
|
||||
{
|
||||
++m[0];
|
||||
m[1] += x;
|
||||
m[2] += x * x;
|
||||
if constexpr (_level >= 3) m[3] += x * x * x;
|
||||
if constexpr (_level >= 4) m[4] += x * x * x * x;
|
||||
}
|
||||
|
||||
void merge(const VarMoments & rhs)
|
||||
{
|
||||
m[0] += rhs.m[0];
|
||||
m[1] += rhs.m[1];
|
||||
m[2] += rhs.m[2];
|
||||
if constexpr (_level >= 3) m[3] += rhs.m[3];
|
||||
if constexpr (_level >= 4) m[4] += rhs.m[4];
|
||||
}
|
||||
|
||||
void write(WriteBuffer & buf) const
|
||||
{
|
||||
writePODBinary(*this, buf);
|
||||
}
|
||||
|
||||
void read(ReadBuffer & buf)
|
||||
{
|
||||
readPODBinary(*this, buf);
|
||||
}
|
||||
|
||||
T getPopulation() const
|
||||
{
|
||||
if (m[0] == 0)
|
||||
return std::numeric_limits<T>::quiet_NaN();
|
||||
|
||||
/// Due to numerical errors, the result can be slightly less than zero,
|
||||
/// but it should be impossible. Trim to zero.
|
||||
|
||||
return std::max(T{}, (m[2] - m[1] * m[1] / m[0]) / m[0]);
|
||||
}
|
||||
|
||||
T getSample() const
|
||||
{
|
||||
if (m[0] <= 1)
|
||||
return std::numeric_limits<T>::quiet_NaN();
|
||||
return std::max(T{}, (m[2] - m[1] * m[1] / m[0]) / (m[0] - 1));
|
||||
}
|
||||
|
||||
T getMoment3() const
|
||||
{
|
||||
if (m[0] == 0)
|
||||
return std::numeric_limits<T>::quiet_NaN();
|
||||
// to avoid accuracy problem
|
||||
if (m[0] == 1)
|
||||
return 0;
|
||||
/// \[ \frac{1}{m_0} (m_3 - (3 * m_2 - \frac{2 * {m_1}^2}{m_0}) * \frac{m_1}{m_0});\]
|
||||
return (m[3]
|
||||
- (3 * m[2]
|
||||
- 2 * m[1] * m[1] / m[0]
|
||||
) * m[1] / m[0]
|
||||
) / m[0];
|
||||
}
|
||||
|
||||
T getMoment4() const
|
||||
{
|
||||
if (m[0] == 0)
|
||||
return std::numeric_limits<T>::quiet_NaN();
|
||||
// to avoid accuracy problem
|
||||
if (m[0] == 1)
|
||||
return 0;
|
||||
/// \[ \frac{1}{m_0}(m_4 - (4 * m_3 - (6 * m_2 - \frac{3 * m_1^2}{m_0} ) \frac{m_1}{m_0})\frac{m_1}{m_0})\]
|
||||
return (m[4]
|
||||
- (4 * m[3]
|
||||
- (6 * m[2]
|
||||
- 3 * m[1] * m[1] / m[0]
|
||||
) * m[1] / m[0]
|
||||
) * m[1] / m[0]
|
||||
) / m[0];
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, size_t _level>
|
||||
class VarMomentsDecimal
|
||||
{
|
||||
public:
|
||||
using NativeType = typename T::NativeType;
|
||||
|
||||
void add(NativeType x)
|
||||
{
|
||||
++m0;
|
||||
getM(1) += x;
|
||||
|
||||
NativeType tmp;
|
||||
bool overflow = common::mulOverflow(x, x, tmp) || common::addOverflow(getM(2), tmp, getM(2));
|
||||
if constexpr (_level >= 3)
|
||||
overflow = overflow || common::mulOverflow(tmp, x, tmp) || common::addOverflow(getM(3), tmp, getM(3));
|
||||
if constexpr (_level >= 4)
|
||||
overflow = overflow || common::mulOverflow(tmp, x, tmp) || common::addOverflow(getM(4), tmp, getM(4));
|
||||
|
||||
if (overflow)
|
||||
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
|
||||
}
|
||||
|
||||
void merge(const VarMomentsDecimal & rhs)
|
||||
{
|
||||
m0 += rhs.m0;
|
||||
getM(1) += rhs.getM(1);
|
||||
|
||||
bool overflow = common::addOverflow(getM(2), rhs.getM(2), getM(2));
|
||||
if constexpr (_level >= 3)
|
||||
overflow = overflow || common::addOverflow(getM(3), rhs.getM(3), getM(3));
|
||||
if constexpr (_level >= 4)
|
||||
overflow = overflow || common::addOverflow(getM(4), rhs.getM(4), getM(4));
|
||||
|
||||
if (overflow)
|
||||
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
|
||||
}
|
||||
|
||||
void write(WriteBuffer & buf) const { writePODBinary(*this, buf); }
|
||||
void read(ReadBuffer & buf) { readPODBinary(*this, buf); }
|
||||
|
||||
Float64 getPopulation(UInt32 scale) const
|
||||
{
|
||||
if (m0 == 0)
|
||||
return std::numeric_limits<Float64>::infinity();
|
||||
|
||||
NativeType tmp;
|
||||
if (common::mulOverflow(getM(1), getM(1), tmp) ||
|
||||
common::subOverflow(getM(2), NativeType(tmp / m0), tmp))
|
||||
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
|
||||
return std::max(Float64{}, DecimalUtils::convertTo<Float64>(T(tmp / m0), scale));
|
||||
}
|
||||
|
||||
Float64 getSample(UInt32 scale) const
|
||||
{
|
||||
if (m0 == 0)
|
||||
return std::numeric_limits<Float64>::quiet_NaN();
|
||||
if (m0 == 1)
|
||||
return std::numeric_limits<Float64>::infinity();
|
||||
|
||||
NativeType tmp;
|
||||
if (common::mulOverflow(getM(1), getM(1), tmp) ||
|
||||
common::subOverflow(getM(2), NativeType(tmp / m0), tmp))
|
||||
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
|
||||
return std::max(Float64{}, DecimalUtils::convertTo<Float64>(T(tmp / (m0 - 1)), scale));
|
||||
}
|
||||
|
||||
Float64 getMoment3(UInt32 scale) const
|
||||
{
|
||||
if (m0 == 0)
|
||||
return std::numeric_limits<Float64>::infinity();
|
||||
|
||||
NativeType tmp;
|
||||
if (common::mulOverflow(2 * getM(1), getM(1), tmp) ||
|
||||
common::subOverflow(3 * getM(2), NativeType(tmp / m0), tmp) ||
|
||||
common::mulOverflow(tmp, getM(1), tmp) ||
|
||||
common::subOverflow(getM(3), NativeType(tmp / m0), tmp))
|
||||
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
|
||||
return DecimalUtils::convertTo<Float64>(T(tmp / m0), scale);
|
||||
}
|
||||
|
||||
Float64 getMoment4(UInt32 scale) const
|
||||
{
|
||||
if (m0 == 0)
|
||||
return std::numeric_limits<Float64>::infinity();
|
||||
|
||||
NativeType tmp;
|
||||
if (common::mulOverflow(3 * getM(1), getM(1), tmp) ||
|
||||
common::subOverflow(6 * getM(2), NativeType(tmp / m0), tmp) ||
|
||||
common::mulOverflow(tmp, getM(1), tmp) ||
|
||||
common::subOverflow(4 * getM(3), NativeType(tmp / m0), tmp) ||
|
||||
common::mulOverflow(tmp, getM(1), tmp) ||
|
||||
common::subOverflow(getM(4), NativeType(tmp / m0), tmp))
|
||||
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
|
||||
return DecimalUtils::convertTo<Float64>(T(tmp / m0), scale);
|
||||
}
|
||||
|
||||
private:
|
||||
UInt64 m0{};
|
||||
NativeType m[_level]{};
|
||||
|
||||
NativeType & getM(size_t i) { return m[i - 1]; }
|
||||
const NativeType & getM(size_t i) const { return m[i - 1]; }
|
||||
};
|
||||
|
||||
/**
|
||||
Calculating multivariate central moments
|
||||
Levels:
|
||||
level 2 (pop & samp): covar
|
||||
References:
|
||||
https://en.wikipedia.org/wiki/Moment_(mathematics)
|
||||
*/
|
||||
template <typename T>
|
||||
struct CovarMoments
|
||||
{
|
||||
T m0{};
|
||||
T x1{};
|
||||
T y1{};
|
||||
T xy{};
|
||||
|
||||
void add(T x, T y)
|
||||
{
|
||||
++m0;
|
||||
x1 += x;
|
||||
y1 += y;
|
||||
xy += x * y;
|
||||
}
|
||||
|
||||
void merge(const CovarMoments & rhs)
|
||||
{
|
||||
m0 += rhs.m0;
|
||||
x1 += rhs.x1;
|
||||
y1 += rhs.y1;
|
||||
xy += rhs.xy;
|
||||
}
|
||||
|
||||
void write(WriteBuffer & buf) const
|
||||
{
|
||||
writePODBinary(*this, buf);
|
||||
}
|
||||
|
||||
void read(ReadBuffer & buf)
|
||||
{
|
||||
readPODBinary(*this, buf);
|
||||
}
|
||||
|
||||
T NO_SANITIZE_UNDEFINED getPopulation() const
|
||||
{
|
||||
return (xy - x1 * y1 / m0) / m0;
|
||||
}
|
||||
|
||||
T NO_SANITIZE_UNDEFINED getSample() const
|
||||
{
|
||||
if (m0 == 0)
|
||||
return std::numeric_limits<T>::quiet_NaN();
|
||||
return (xy - x1 * y1 / m0) / (m0 - 1);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct CorrMoments
|
||||
{
|
||||
T m0{};
|
||||
T x1{};
|
||||
T y1{};
|
||||
T xy{};
|
||||
T x2{};
|
||||
T y2{};
|
||||
|
||||
void add(T x, T y)
|
||||
{
|
||||
++m0;
|
||||
x1 += x;
|
||||
y1 += y;
|
||||
xy += x * y;
|
||||
x2 += x * x;
|
||||
y2 += y * y;
|
||||
}
|
||||
|
||||
void merge(const CorrMoments & rhs)
|
||||
{
|
||||
m0 += rhs.m0;
|
||||
x1 += rhs.x1;
|
||||
y1 += rhs.y1;
|
||||
xy += rhs.xy;
|
||||
x2 += rhs.x2;
|
||||
y2 += rhs.y2;
|
||||
}
|
||||
|
||||
void write(WriteBuffer & buf) const
|
||||
{
|
||||
writePODBinary(*this, buf);
|
||||
}
|
||||
|
||||
void read(ReadBuffer & buf)
|
||||
{
|
||||
readPODBinary(*this, buf);
|
||||
}
|
||||
|
||||
T NO_SANITIZE_UNDEFINED get() const
|
||||
{
|
||||
return (m0 * xy - x1 * y1) / sqrt((m0 * x2 - x1 * x1) * (m0 * y2 - y1 * y1));
|
||||
}
|
||||
};
|
||||
|
||||
/// Data for calculation of Student and Welch T-Tests.
|
||||
template <typename T>
|
||||
struct TTestMoments
|
||||
{
|
||||
T nx{};
|
||||
T ny{};
|
||||
T x1{};
|
||||
T y1{};
|
||||
T x2{};
|
||||
T y2{};
|
||||
|
||||
void addX(T value)
|
||||
{
|
||||
++nx;
|
||||
x1 += value;
|
||||
x2 += value * value;
|
||||
}
|
||||
|
||||
void addY(T value)
|
||||
{
|
||||
++ny;
|
||||
y1 += value;
|
||||
y2 += value * value;
|
||||
}
|
||||
|
||||
void merge(const TTestMoments & rhs)
|
||||
{
|
||||
nx += rhs.nx;
|
||||
ny += rhs.ny;
|
||||
x1 += rhs.x1;
|
||||
y1 += rhs.y1;
|
||||
x2 += rhs.x2;
|
||||
y2 += rhs.y2;
|
||||
}
|
||||
|
||||
void write(WriteBuffer & buf) const
|
||||
{
|
||||
writePODBinary(*this, buf);
|
||||
}
|
||||
|
||||
void read(ReadBuffer & buf)
|
||||
{
|
||||
readPODBinary(*this, buf);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -114,7 +114,7 @@ class QuantileTDigest
|
||||
static constexpr size_t PART_SIZE_BITS = 8;
|
||||
|
||||
using Transform = RadixSortFloatTransform<KeyBits>;
|
||||
using Allocator = RadixSortMallocAllocator;
|
||||
using Allocator = RadixSortAllocator;
|
||||
|
||||
/// The function to get the key from an array element.
|
||||
static Key & extractKey(Element & elem) { return elem.mean; }
|
||||
|
114
src/AggregateFunctions/StatCommon.h
Normal file
114
src/AggregateFunctions/StatCommon.h
Normal file
@ -0,0 +1,114 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <Common/ArenaAllocator.h>
|
||||
|
||||
#include <numeric>
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
template <typename F>
|
||||
static Float64 integrateSimpson(Float64 a, Float64 b, F && func)
|
||||
{
|
||||
const size_t iterations = std::max(1e6, 1e4 * std::abs(std::round(b) - std::round(a)));
|
||||
const long double h = (b - a) / iterations;
|
||||
Float64 sum_odds = 0.0;
|
||||
for (size_t i = 1; i < iterations; i += 2)
|
||||
sum_odds += func(a + i * h);
|
||||
Float64 sum_evens = 0.0;
|
||||
for (size_t i = 2; i < iterations; i += 2)
|
||||
sum_evens += func(a + i * h);
|
||||
return (func(a) + func(b) + 2 * sum_evens + 4 * sum_odds) * h / 3;
|
||||
}
|
||||
|
||||
/// Because ranks are adjusted, we have to store each of them in Float type.
|
||||
using RanksArray = std::vector<Float64>;
|
||||
|
||||
template <typename Values>
|
||||
std::pair<RanksArray, Float64> computeRanksAndTieCorrection(const Values & values)
|
||||
{
|
||||
const size_t size = values.size();
|
||||
/// Save initial positions, than sort indices according to the values.
|
||||
std::vector<size_t> indexes(size);
|
||||
std::iota(indexes.begin(), indexes.end(), 0);
|
||||
std::sort(indexes.begin(), indexes.end(),
|
||||
[&] (size_t lhs, size_t rhs) { return values[lhs] < values[rhs]; });
|
||||
|
||||
size_t left = 0;
|
||||
Float64 tie_numenator = 0;
|
||||
RanksArray out(size);
|
||||
while (left < size)
|
||||
{
|
||||
size_t right = left;
|
||||
while (right < size && values[indexes[left]] == values[indexes[right]])
|
||||
++right;
|
||||
auto adjusted = (left + right + 1.) / 2.;
|
||||
auto count_equal = right - left;
|
||||
tie_numenator += std::pow(count_equal, 3) - count_equal;
|
||||
for (size_t iter = left; iter < right; ++iter)
|
||||
out[indexes[iter]] = adjusted;
|
||||
left = right;
|
||||
}
|
||||
return {out, 1 - (tie_numenator / (std::pow(size, 3) - size))};
|
||||
}
|
||||
|
||||
|
||||
template <typename X, typename Y>
|
||||
struct StatisticalSample
|
||||
{
|
||||
using AllocatorXSample = MixedAlignedArenaAllocator<alignof(X), 4096>;
|
||||
using SampleX = PODArray<X, 32, AllocatorXSample>;
|
||||
|
||||
using AllocatorYSample = MixedAlignedArenaAllocator<alignof(Y), 4096>;
|
||||
using SampleY = PODArray<Y, 32, AllocatorYSample>;
|
||||
|
||||
SampleX x{};
|
||||
SampleY y{};
|
||||
size_t size_x{0};
|
||||
size_t size_y{0};
|
||||
|
||||
void addX(X value, Arena * arena)
|
||||
{
|
||||
++size_x;
|
||||
x.push_back(value, arena);
|
||||
}
|
||||
|
||||
void addY(Y value, Arena * arena)
|
||||
{
|
||||
++size_y;
|
||||
y.push_back(value, arena);
|
||||
}
|
||||
|
||||
void merge(const StatisticalSample & rhs, Arena * arena)
|
||||
{
|
||||
size_x += rhs.size_x;
|
||||
size_y += rhs.size_y;
|
||||
x.insert(rhs.x.begin(), rhs.x.end(), arena);
|
||||
y.insert(rhs.y.begin(), rhs.y.end(), arena);
|
||||
}
|
||||
|
||||
void write(WriteBuffer & buf) const
|
||||
{
|
||||
writeVarUInt(size_x, buf);
|
||||
writeVarUInt(size_y, buf);
|
||||
buf.write(reinterpret_cast<const char *>(x.data()), size_x * sizeof(x[0]));
|
||||
buf.write(reinterpret_cast<const char *>(y.data()), size_y * sizeof(y[0]));
|
||||
}
|
||||
|
||||
void read(ReadBuffer & buf, Arena * arena)
|
||||
{
|
||||
readVarUInt(size_x, buf);
|
||||
readVarUInt(size_y, buf);
|
||||
x.resize(size_x, arena);
|
||||
y.resize(size_y, arena);
|
||||
buf.read(reinterpret_cast<char *>(x.data()), size_x * sizeof(x[0]));
|
||||
buf.read(reinterpret_cast<char *>(y.data()), size_y * sizeof(y[0]));
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -32,16 +32,16 @@ void registerAggregateFunctionsBitmap(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsMaxIntersections(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionHistogram(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionRetention(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionTimeSeriesGroupSum(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionMLMethod(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionEntropy(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionSimpleLinearRegression(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionMoving(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionCategoricalIV(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionAggThrow(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionRankCorrelation(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionMannWhitney(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionWelchTTest(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionStudentTTest(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionRankCorrelation(AggregateFunctionFactory &);
|
||||
|
||||
class AggregateFunctionCombinatorFactory;
|
||||
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
|
||||
@ -86,7 +86,6 @@ void registerAggregateFunctions()
|
||||
registerAggregateFunctionsMaxIntersections(factory);
|
||||
registerAggregateFunctionHistogram(factory);
|
||||
registerAggregateFunctionRetention(factory);
|
||||
registerAggregateFunctionTimeSeriesGroupSum(factory);
|
||||
registerAggregateFunctionMLMethod(factory);
|
||||
registerAggregateFunctionEntropy(factory);
|
||||
registerAggregateFunctionSimpleLinearRegression(factory);
|
||||
@ -94,6 +93,9 @@ void registerAggregateFunctions()
|
||||
registerAggregateFunctionCategoricalIV(factory);
|
||||
registerAggregateFunctionAggThrow(factory);
|
||||
registerAggregateFunctionRankCorrelation(factory);
|
||||
registerAggregateFunctionMannWhitney(factory);
|
||||
registerAggregateFunctionWelchTTest(factory);
|
||||
registerAggregateFunctionStudentTTest(factory);
|
||||
}
|
||||
|
||||
{
|
||||
|
27
src/AggregateFunctions/tests/gtest_ranks.cpp
Normal file
27
src/AggregateFunctions/tests/gtest_ranks.cpp
Normal file
@ -0,0 +1,27 @@
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <Common/PODArray.h>
|
||||
#include <AggregateFunctions/StatCommon.h>
|
||||
#include <iostream>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
|
||||
TEST(Ranks, Simple)
|
||||
{
|
||||
using namespace DB;
|
||||
RanksArray sample = {310, 195, 480, 530, 155, 530, 245, 385, 450, 450, 465, 545, 170, 180, 125, 180, 230, 170, 75, 430, 480, 495, 295};
|
||||
|
||||
RanksArray ranks;
|
||||
Float64 t = 0;
|
||||
std::tie(ranks, t) = computeRanksAndTieCorrection(sample);
|
||||
|
||||
RanksArray expected{12.0, 8.0, 18.5, 21.5, 3.0, 21.5, 10.0, 13.0, 15.5, 15.5, 17.0, 23.0, 4.5, 6.5, 2.0, 6.5, 9.0, 4.5, 1.0, 14.0, 18.5, 20.0, 11.0};
|
||||
|
||||
ASSERT_EQ(ranks.size(), expected.size());
|
||||
|
||||
for (size_t i = 0; i < ranks.size(); ++i)
|
||||
ASSERT_DOUBLE_EQ(ranks[i], expected[i]);
|
||||
|
||||
ASSERT_DOUBLE_EQ(t, 0.9975296442687747);
|
||||
}
|
@ -29,6 +29,7 @@ SRCS(
|
||||
AggregateFunctionHistogram.cpp
|
||||
AggregateFunctionIf.cpp
|
||||
AggregateFunctionMLMethod.cpp
|
||||
AggregateFunctionMannWhitney.cpp
|
||||
AggregateFunctionMaxIntersections.cpp
|
||||
AggregateFunctionMerge.cpp
|
||||
AggregateFunctionMinMaxAny.cpp
|
||||
@ -43,13 +44,14 @@ SRCS(
|
||||
AggregateFunctionState.cpp
|
||||
AggregateFunctionStatistics.cpp
|
||||
AggregateFunctionStatisticsSimple.cpp
|
||||
AggregateFunctionStudentTTest.cpp
|
||||
AggregateFunctionSum.cpp
|
||||
AggregateFunctionSumMap.cpp
|
||||
AggregateFunctionTimeSeriesGroupSum.cpp
|
||||
AggregateFunctionTopK.cpp
|
||||
AggregateFunctionUniq.cpp
|
||||
AggregateFunctionUniqCombined.cpp
|
||||
AggregateFunctionUniqUpTo.cpp
|
||||
AggregateFunctionWelchTTest.cpp
|
||||
AggregateFunctionWindowFunnel.cpp
|
||||
UniqCombinedBiasData.cpp
|
||||
UniqVariadicHash.cpp
|
||||
|
@ -223,7 +223,7 @@ if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELW
|
||||
Dictionaries/FlatDictionary.cpp
|
||||
Dictionaries/HashedDictionary.cpp
|
||||
Dictionaries/CacheDictionary.cpp
|
||||
Dictionaries/TrieDictionary.cpp
|
||||
Dictionaries/IPAddressDictionary.cpp
|
||||
Dictionaries/RangeHashedDictionary.cpp
|
||||
Dictionaries/ComplexKeyHashedDictionary.cpp
|
||||
Dictionaries/ComplexKeyCacheDictionary.cpp
|
||||
@ -305,7 +305,6 @@ endif()
|
||||
|
||||
dbms_target_link_libraries (
|
||||
PRIVATE
|
||||
${BTRIE_LIBRARIES}
|
||||
boost::filesystem
|
||||
boost::program_options
|
||||
clickhouse_common_config
|
||||
|
@ -82,7 +82,7 @@ public:
|
||||
|
||||
bool isNumeric() const override { return false; }
|
||||
bool canBeInsideNullable() const override { return true; }
|
||||
bool isFixedAndContiguous() const override { return true; }
|
||||
bool isFixedAndContiguous() const final { return true; }
|
||||
size_t sizeOfValueIfFixed() const override { return sizeof(T); }
|
||||
|
||||
size_t size() const override { return data.size(); }
|
||||
|
@ -1,30 +1,35 @@
|
||||
#include "IPv6ToBinary.h"
|
||||
#include <Poco/Net/IPAddress.h>
|
||||
#include <Poco/ByteOrder.h>
|
||||
|
||||
#include <cstring>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
std::array<char, 16> IPv6ToBinary(const Poco::Net::IPAddress & address)
|
||||
void IPv6ToRawBinary(const Poco::Net::IPAddress & address, char * res)
|
||||
{
|
||||
std::array<char, 16> res;
|
||||
|
||||
if (Poco::Net::IPAddress::IPv6 == address.family())
|
||||
{
|
||||
memcpy(res.data(), address.addr(), 16);
|
||||
memcpy(res, address.addr(), 16);
|
||||
}
|
||||
else if (Poco::Net::IPAddress::IPv4 == address.family())
|
||||
{
|
||||
/// Convert to IPv6-mapped address.
|
||||
memset(res.data(), 0, 10);
|
||||
memset(res, 0, 10);
|
||||
res[10] = '\xFF';
|
||||
res[11] = '\xFF';
|
||||
memcpy(&res[12], address.addr(), 4);
|
||||
}
|
||||
else
|
||||
memset(res.data(), 0, 16);
|
||||
memset(res, 0, 16);
|
||||
}
|
||||
|
||||
std::array<char, 16> IPv6ToBinary(const Poco::Net::IPAddress & address)
|
||||
{
|
||||
std::array<char, 16> res;
|
||||
IPv6ToRawBinary(address, res.data());
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -1,11 +1,16 @@
|
||||
#pragma once
|
||||
#include <array>
|
||||
#include <common/types.h>
|
||||
|
||||
namespace Poco { namespace Net { class IPAddress; }}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Convert IP address to raw binary with IPv6 data (big endian). If it's an IPv4, map it to IPv6.
|
||||
/// Saves result into the first 16 bytes of `res`.
|
||||
void IPv6ToRawBinary(const Poco::Net::IPAddress & address, char * res);
|
||||
|
||||
/// Convert IP address to 16-byte array with IPv6 data (big endian). If it's an IPv4, map it to IPv6.
|
||||
std::array<char, 16> IPv6ToBinary(const Poco::Net::IPAddress & address);
|
||||
|
||||
|
@ -35,16 +35,16 @@
|
||||
|
||||
/** Used as a template parameter. See below.
|
||||
*/
|
||||
struct RadixSortMallocAllocator
|
||||
struct RadixSortAllocator
|
||||
{
|
||||
void * allocate(size_t size)
|
||||
{
|
||||
return malloc(size);
|
||||
return ::operator new(size);
|
||||
}
|
||||
|
||||
void deallocate(void * ptr, size_t /*size*/)
|
||||
void deallocate(void * ptr, size_t size)
|
||||
{
|
||||
return free(ptr);
|
||||
::operator delete(ptr, size);
|
||||
}
|
||||
};
|
||||
|
||||
@ -100,7 +100,7 @@ struct RadixSortFloatTraits
|
||||
/// An object with the functions allocate and deallocate.
|
||||
/// Can be used, for example, to allocate memory for a temporary array on the stack.
|
||||
/// To do this, the allocator itself is created on the stack.
|
||||
using Allocator = RadixSortMallocAllocator;
|
||||
using Allocator = RadixSortAllocator;
|
||||
|
||||
/// The function to get the key from an array element.
|
||||
static Key & extractKey(Element & elem) { return elem; }
|
||||
@ -139,7 +139,7 @@ struct RadixSortUIntTraits
|
||||
static constexpr size_t PART_SIZE_BITS = 8;
|
||||
|
||||
using Transform = RadixSortIdentityTransform<KeyBits>;
|
||||
using Allocator = RadixSortMallocAllocator;
|
||||
using Allocator = RadixSortAllocator;
|
||||
|
||||
static Key & extractKey(Element & elem) { return elem; }
|
||||
static Result & extractResult(Element & elem) { return elem; }
|
||||
@ -173,7 +173,7 @@ struct RadixSortIntTraits
|
||||
static constexpr size_t PART_SIZE_BITS = 8;
|
||||
|
||||
using Transform = RadixSortSignedTransform<KeyBits>;
|
||||
using Allocator = RadixSortMallocAllocator;
|
||||
using Allocator = RadixSortAllocator;
|
||||
|
||||
static Key & extractKey(Element & elem) { return elem; }
|
||||
static Result & extractResult(Element & elem) { return elem; }
|
||||
|
@ -11,7 +11,6 @@ add_library(clickhouse_dictionaries ${clickhouse_dictionaries_sources})
|
||||
|
||||
target_link_libraries(clickhouse_dictionaries
|
||||
PRIVATE
|
||||
${BTRIE_LIBRARIES}
|
||||
clickhouse_common_io
|
||||
dbms
|
||||
Poco::Data
|
||||
|
1190
src/Dictionaries/IPAddressDictionary.cpp
Normal file
1190
src/Dictionaries/IPAddressDictionary.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -7,6 +7,9 @@
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Poco/Net/IPAddress.h>
|
||||
#include <common/StringRef.h>
|
||||
#include <common/logger_useful.h>
|
||||
#include <ext/range.h>
|
||||
@ -14,23 +17,18 @@
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
|
||||
struct btrie_s;
|
||||
typedef struct btrie_s btrie_t;
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class TrieDictionary final : public IDictionaryBase
|
||||
class IPAddressDictionary final : public IDictionaryBase
|
||||
{
|
||||
public:
|
||||
TrieDictionary(
|
||||
IPAddressDictionary(
|
||||
const StorageID & dict_id_,
|
||||
const DictionaryStructure & dict_struct_,
|
||||
DictionarySourcePtr source_ptr_,
|
||||
const DictionaryLifetime dict_lifetime_,
|
||||
bool require_nonempty_);
|
||||
|
||||
~TrieDictionary() override;
|
||||
|
||||
std::string getKeyDescription() const { return key_description; }
|
||||
|
||||
std::string getTypeName() const override { return "Trie"; }
|
||||
@ -47,7 +45,7 @@ public:
|
||||
|
||||
std::shared_ptr<const IExternalLoadable> clone() const override
|
||||
{
|
||||
return std::make_shared<TrieDictionary>(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty);
|
||||
return std::make_shared<IPAddressDictionary>(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty);
|
||||
}
|
||||
|
||||
const IDictionarySource * getSource() const override { return source_ptr.get(); }
|
||||
@ -150,9 +148,17 @@ public:
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
private:
|
||||
|
||||
template <typename Value>
|
||||
using ContainerType = std::vector<Value>;
|
||||
|
||||
using IPAddress = Poco::Net::IPAddress;
|
||||
|
||||
using IPv4Container = PODArray<UInt32>;
|
||||
using IPv6Container = PaddedPODArray<UInt8>;
|
||||
using IPMaskContainer = PODArray<UInt8>;
|
||||
using RowIdxConstIter = ContainerType<size_t>::const_iterator;
|
||||
|
||||
struct Attribute final
|
||||
{
|
||||
AttributeUnderlyingType type;
|
||||
@ -207,16 +213,18 @@ private:
|
||||
|
||||
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
void getItemsByTwoKeyColumnsImpl(
|
||||
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
void
|
||||
getItemsImpl(const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
|
||||
|
||||
|
||||
template <typename T>
|
||||
bool setAttributeValueImpl(Attribute & attribute, const StringRef key, const T value);
|
||||
void setAttributeValueImpl(Attribute & attribute, const T value);
|
||||
|
||||
bool setAttributeValue(Attribute & attribute, const StringRef key, const Field & value);
|
||||
void setAttributeValue(Attribute & attribute, const Field & value);
|
||||
|
||||
const Attribute & getAttribute(const std::string & attribute_name) const;
|
||||
|
||||
@ -224,6 +232,14 @@ private:
|
||||
void has(const Attribute & attribute, const Columns & key_columns, PaddedPODArray<UInt8> & out) const;
|
||||
|
||||
Columns getKeyColumns() const;
|
||||
RowIdxConstIter ipNotFound() const;
|
||||
RowIdxConstIter tryLookupIPv4(UInt32 addr, uint8_t * buf) const;
|
||||
RowIdxConstIter tryLookupIPv6(const uint8_t * addr) const;
|
||||
|
||||
template <typename IPContainerType, typename IPValueType>
|
||||
RowIdxConstIter lookupIP(IPValueType target) const;
|
||||
|
||||
static const uint8_t * getIPv6FromOffset(const IPv6Container & ipv6_col, size_t i);
|
||||
|
||||
const DictionaryStructure dict_struct;
|
||||
const DictionarySourcePtr source_ptr;
|
||||
@ -231,8 +247,22 @@ private:
|
||||
const bool require_nonempty;
|
||||
const std::string key_description{dict_struct.getKeyDescription()};
|
||||
|
||||
/// Contains sorted IP subnetworks. If some addresses equals, subnet with lower mask is placed first.
|
||||
std::variant<IPv4Container, IPv6Container> ip_column;
|
||||
|
||||
/// Prefix lengths corresponding to ip_column.
|
||||
IPMaskContainer mask_column;
|
||||
|
||||
/** Contains links to parent subnetworks in ip_column.
|
||||
* Array holds such ip_column's (and mask_column's) indices that
|
||||
* - if parent_subnet[i] < i, then ip_column[i] is subnetwork of ip_column[parent_subnet[i]],
|
||||
* - if parent_subnet[i] == i, then ip_column[i] doesn't belong to any other subnet.
|
||||
*/
|
||||
ContainerType<size_t> parent_subnet;
|
||||
|
||||
/// Contains corresponding indices in attributes array.
|
||||
ContainerType<size_t> row_idx;
|
||||
|
||||
btrie_t * trie = nullptr;
|
||||
std::map<std::string, size_t> attribute_index_by_name;
|
||||
std::vector<Attribute> attributes;
|
||||
|
@ -1,779 +0,0 @@
|
||||
#include "TrieDictionary.h"
|
||||
#include <stack>
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <DataTypes/DataTypeFixedString.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <IO/WriteIntText.h>
|
||||
#include <Poco/ByteOrder.h>
|
||||
#include <Poco/Net/IPAddress.h>
|
||||
#include <Common/formatIPv6.h>
|
||||
#include <common/itoa.h>
|
||||
#include <ext/map.h>
|
||||
#include <ext/range.h>
|
||||
#include "DictionaryBlockInputStream.h"
|
||||
#include "DictionaryFactory.h"
|
||||
|
||||
#ifdef __clang__
|
||||
#pragma clang diagnostic ignored "-Wold-style-cast"
|
||||
#pragma clang diagnostic ignored "-Wnewline-eof"
|
||||
#endif
|
||||
|
||||
#include <btrie.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int TYPE_MISMATCH;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int DICTIONARY_IS_EMPTY;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
static void validateKeyTypes(const DataTypes & key_types)
|
||||
{
|
||||
if (key_types.size() != 1)
|
||||
throw Exception{"Expected a single IP address", ErrorCodes::TYPE_MISMATCH};
|
||||
|
||||
const auto & actual_type = key_types[0]->getName();
|
||||
|
||||
if (actual_type != "UInt32" && actual_type != "FixedString(16)")
|
||||
throw Exception{"Key does not match, expected either UInt32 or FixedString(16)", ErrorCodes::TYPE_MISMATCH};
|
||||
}
|
||||
|
||||
|
||||
TrieDictionary::TrieDictionary(
|
||||
const StorageID & dict_id_,
|
||||
const DictionaryStructure & dict_struct_,
|
||||
DictionarySourcePtr source_ptr_,
|
||||
const DictionaryLifetime dict_lifetime_,
|
||||
bool require_nonempty_)
|
||||
: IDictionaryBase(dict_id_)
|
||||
, dict_struct(dict_struct_)
|
||||
, source_ptr{std::move(source_ptr_)}
|
||||
, dict_lifetime(dict_lifetime_)
|
||||
, require_nonempty(require_nonempty_)
|
||||
, logger(&Poco::Logger::get("TrieDictionary"))
|
||||
{
|
||||
createAttributes();
|
||||
trie = btrie_create();
|
||||
loadData();
|
||||
calculateBytesAllocated();
|
||||
}
|
||||
|
||||
TrieDictionary::~TrieDictionary()
|
||||
{
|
||||
btrie_destroy(trie);
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void TrieDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
validateKeyTypes(key_types); \
|
||||
\
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
const auto null_value = std::get<TYPE>(attribute.null_values); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, \
|
||||
key_columns, \
|
||||
[&](const size_t row, const auto value) { out[row] = value; }, \
|
||||
[&](const size_t) { return null_value; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void TrieDictionary::getString(
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
|
||||
{
|
||||
validateKeyTypes(key_types);
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
const auto & null_value = StringRef{std::get<String>(attribute.null_values)};
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t) { return null_value; });
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void TrieDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
validateKeyTypes(key_types); \
|
||||
\
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, \
|
||||
key_columns, \
|
||||
[&](const size_t row, const auto value) { out[row] = value; }, \
|
||||
[&](const size_t row) { return def[row]; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void TrieDictionary::getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnString * const def,
|
||||
ColumnString * const out) const
|
||||
{
|
||||
validateKeyTypes(key_types);
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t row) { return def->getDataAt(row); });
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void TrieDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const TYPE def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
validateKeyTypes(key_types); \
|
||||
\
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void TrieDictionary::getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const String & def,
|
||||
ColumnString * const out) const
|
||||
{
|
||||
validateKeyTypes(key_types);
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t) { return StringRef{def}; });
|
||||
}
|
||||
|
||||
void TrieDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const
|
||||
{
|
||||
validateKeyTypes(key_types);
|
||||
|
||||
const auto & attribute = attributes.front();
|
||||
|
||||
switch (attribute.type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
has<UInt8>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
has<UInt16>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
has<UInt32>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
has<UInt64>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
has<UInt128>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
has<Int8>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
has<Int16>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
has<Int32>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
has<Int64>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
has<Float32>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
has<Float64>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utString:
|
||||
has<StringRef>(attribute, key_columns, out);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
has<Decimal32>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
has<Decimal64>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
has<Decimal128>(attribute, key_columns, out);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void TrieDictionary::createAttributes()
|
||||
{
|
||||
const auto size = dict_struct.attributes.size();
|
||||
attributes.reserve(size);
|
||||
|
||||
for (const auto & attribute : dict_struct.attributes)
|
||||
{
|
||||
attribute_index_by_name.emplace(attribute.name, attributes.size());
|
||||
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
|
||||
|
||||
if (attribute.hierarchical)
|
||||
throw Exception{full_name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(),
|
||||
ErrorCodes::TYPE_MISMATCH};
|
||||
}
|
||||
}
|
||||
|
||||
void TrieDictionary::loadData()
|
||||
{
|
||||
auto stream = source_ptr->loadAll();
|
||||
stream->readPrefix();
|
||||
|
||||
/// created upfront to avoid excess allocations
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys(keys_size);
|
||||
|
||||
const auto attributes_size = attributes.size();
|
||||
|
||||
while (const auto block = stream->read())
|
||||
{
|
||||
const auto rows = block.rows();
|
||||
element_count += rows;
|
||||
|
||||
const auto key_column_ptrs = ext::map<Columns>(
|
||||
ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
|
||||
|
||||
const auto attribute_column_ptrs = ext::map<Columns>(ext::range(0, attributes_size), [&](const size_t attribute_idx)
|
||||
{
|
||||
return block.safeGetByPosition(keys_size + attribute_idx).column;
|
||||
});
|
||||
|
||||
for (const auto row_idx : ext::range(0, rows))
|
||||
{
|
||||
/// calculate key once per row
|
||||
const auto key_column = key_column_ptrs.front();
|
||||
|
||||
for (const auto attribute_idx : ext::range(0, attributes_size))
|
||||
{
|
||||
const auto & attribute_column = *attribute_column_ptrs[attribute_idx];
|
||||
auto & attribute = attributes[attribute_idx];
|
||||
setAttributeValue(attribute, key_column->getDataAt(row_idx), attribute_column[row_idx]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stream->readSuffix();
|
||||
|
||||
if (require_nonempty && 0 == element_count)
|
||||
throw Exception{full_name + ": dictionary source is empty and 'require_nonempty' property is set.", ErrorCodes::DICTIONARY_IS_EMPTY};
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void TrieDictionary::addAttributeSize(const Attribute & attribute)
|
||||
{
|
||||
const auto & vec = std::get<ContainerType<T>>(attribute.maps);
|
||||
bytes_allocated += sizeof(ContainerType<T>) + (vec.capacity() * sizeof(T));
|
||||
bucket_count = vec.size();
|
||||
}
|
||||
|
||||
void TrieDictionary::calculateBytesAllocated()
|
||||
{
|
||||
bytes_allocated += attributes.size() * sizeof(attributes.front());
|
||||
|
||||
for (const auto & attribute : attributes)
|
||||
{
|
||||
switch (attribute.type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
addAttributeSize<UInt8>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
addAttributeSize<UInt16>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
addAttributeSize<UInt32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
addAttributeSize<UInt64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
addAttributeSize<UInt128>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
addAttributeSize<Int8>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
addAttributeSize<Int16>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
addAttributeSize<Int32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
addAttributeSize<Int64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
addAttributeSize<Float32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
addAttributeSize<Float64>(attribute);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
addAttributeSize<Decimal32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
addAttributeSize<Decimal64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
addAttributeSize<Decimal128>(attribute);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
addAttributeSize<StringRef>(attribute);
|
||||
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bytes_allocated += btrie_allocated(trie);
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void TrieDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
|
||||
{
|
||||
attribute.null_values = T(null_value.get<NearestFieldType<T>>());
|
||||
attribute.maps.emplace<ContainerType<T>>();
|
||||
}
|
||||
|
||||
TrieDictionary::Attribute TrieDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
|
||||
{
|
||||
Attribute attr{type, {}, {}, {}};
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
createAttributeImpl<UInt8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
createAttributeImpl<UInt16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
createAttributeImpl<UInt32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
createAttributeImpl<UInt64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
createAttributeImpl<UInt128>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
createAttributeImpl<Int8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
createAttributeImpl<Int16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
createAttributeImpl<Int32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
createAttributeImpl<Int64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
createAttributeImpl<Float32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
createAttributeImpl<Float64>(attr, null_value);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
createAttributeImpl<Decimal32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
createAttributeImpl<Decimal64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
createAttributeImpl<Decimal128>(attr, null_value);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
attr.null_values = null_value.get<String>();
|
||||
attr.maps.emplace<ContainerType<StringRef>>();
|
||||
attr.string_arena = std::make_unique<Arena>();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return attr;
|
||||
}
|
||||
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
void TrieDictionary::getItemsImpl(
|
||||
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
|
||||
{
|
||||
auto & vec = std::get<ContainerType<AttributeType>>(attribute.maps);
|
||||
|
||||
const auto first_column = key_columns.front();
|
||||
const auto rows = first_column->size();
|
||||
if (first_column->isNumeric())
|
||||
{
|
||||
for (const auto i : ext::range(0, rows))
|
||||
{
|
||||
auto addr = Int32(first_column->get64(i));
|
||||
uintptr_t slot = btrie_find(trie, addr);
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic warning "-Wold-style-cast"
|
||||
set_value(i, slot != BTRIE_NULL ? static_cast<OutputType>(vec[slot]) : get_default(i));
|
||||
#pragma GCC diagnostic pop
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const auto i : ext::range(0, rows))
|
||||
{
|
||||
auto addr = first_column->getDataAt(i);
|
||||
if (addr.size != 16)
|
||||
throw Exception("Expected key to be FixedString(16)", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
uintptr_t slot = btrie_find_a6(trie, reinterpret_cast<const uint8_t *>(addr.data));
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic warning "-Wold-style-cast"
|
||||
set_value(i, slot != BTRIE_NULL ? static_cast<OutputType>(vec[slot]) : get_default(i));
|
||||
#pragma GCC diagnostic pop
|
||||
}
|
||||
}
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
bool TrieDictionary::setAttributeValueImpl(Attribute & attribute, const StringRef key, const T value)
|
||||
{
|
||||
// Insert value into appropriate vector type
|
||||
auto & vec = std::get<ContainerType<T>>(attribute.maps);
|
||||
size_t row = vec.size();
|
||||
vec.push_back(value);
|
||||
|
||||
// Parse IP address and subnet length from string (e.g. 2a02:6b8::3/64)
|
||||
Poco::Net::IPAddress addr, mask;
|
||||
std::string addr_str(key.toString());
|
||||
size_t pos = addr_str.find('/');
|
||||
if (pos != std::string::npos)
|
||||
{
|
||||
addr = Poco::Net::IPAddress(addr_str.substr(0, pos));
|
||||
mask = Poco::Net::IPAddress(std::stoi(addr_str.substr(pos + 1), nullptr, 10), addr.family());
|
||||
}
|
||||
else
|
||||
{
|
||||
addr = Poco::Net::IPAddress(addr_str);
|
||||
mask = Poco::Net::IPAddress(addr.length() * 8, addr.family());
|
||||
}
|
||||
|
||||
/*
|
||||
* Here we might overwrite the same key with the same slot as each key can map to multiple attributes.
|
||||
* However, all columns have equal number of rows so it is okay to store only row number for each key
|
||||
* instead of building a trie for each column. This comes at the cost of additional lookup in attribute
|
||||
* vector on lookup time to return cell from row + column. The reason for this is to save space,
|
||||
* and build only single trie instead of trie for each column.
|
||||
*/
|
||||
if (addr.family() == Poco::Net::IPAddress::IPv4)
|
||||
{
|
||||
UInt32 addr_v4 = Poco::ByteOrder::toNetwork(*reinterpret_cast<const UInt32 *>(addr.addr()));
|
||||
UInt32 mask_v4 = Poco::ByteOrder::toNetwork(*reinterpret_cast<const UInt32 *>(mask.addr()));
|
||||
return btrie_insert(trie, addr_v4, mask_v4, row) == 0;
|
||||
}
|
||||
|
||||
const uint8_t * addr_v6 = reinterpret_cast<const uint8_t *>(addr.addr());
|
||||
const uint8_t * mask_v6 = reinterpret_cast<const uint8_t *>(mask.addr());
|
||||
return btrie_insert_a6(trie, addr_v6, mask_v6, row) == 0;
|
||||
}
|
||||
|
||||
bool TrieDictionary::setAttributeValue(Attribute & attribute, const StringRef key, const Field & value)
|
||||
{
|
||||
switch (attribute.type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
return setAttributeValueImpl<UInt8>(attribute, key, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
return setAttributeValueImpl<UInt16>(attribute, key, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
return setAttributeValueImpl<UInt32>(attribute, key, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
return setAttributeValueImpl<UInt64>(attribute, key, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
return setAttributeValueImpl<UInt128>(attribute, key, value.get<UInt128>());
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
return setAttributeValueImpl<Int8>(attribute, key, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
return setAttributeValueImpl<Int16>(attribute, key, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
return setAttributeValueImpl<Int32>(attribute, key, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
return setAttributeValueImpl<Int64>(attribute, key, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
return setAttributeValueImpl<Float32>(attribute, key, value.get<Float64>());
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
return setAttributeValueImpl<Float64>(attribute, key, value.get<Float64>());
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
return setAttributeValueImpl<Decimal32>(attribute, key, value.get<Decimal32>());
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
return setAttributeValueImpl<Decimal64>(attribute, key, value.get<Decimal64>());
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
return setAttributeValueImpl<Decimal128>(attribute, key, value.get<Decimal128>());
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
const auto & string = value.get<String>();
|
||||
const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
|
||||
setAttributeValueImpl<StringRef>(attribute, key, StringRef{string_in_arena, string.size()});
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
const TrieDictionary::Attribute & TrieDictionary::getAttribute(const std::string & attribute_name) const
|
||||
{
|
||||
const auto it = attribute_index_by_name.find(attribute_name);
|
||||
if (it == std::end(attribute_index_by_name))
|
||||
throw Exception{full_name + ": no such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
|
||||
|
||||
return attributes[it->second];
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void TrieDictionary::has(const Attribute &, const Columns & key_columns, PaddedPODArray<UInt8> & out) const
|
||||
{
|
||||
const auto first_column = key_columns.front();
|
||||
const auto rows = first_column->size();
|
||||
if (first_column->isNumeric())
|
||||
{
|
||||
for (const auto i : ext::range(0, rows))
|
||||
{
|
||||
auto addr = Int32(first_column->get64(i));
|
||||
uintptr_t slot = btrie_find(trie, addr);
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic warning "-Wold-style-cast"
|
||||
out[i] = (slot != BTRIE_NULL);
|
||||
#pragma GCC diagnostic pop
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const auto i : ext::range(0, rows))
|
||||
{
|
||||
auto addr = first_column->getDataAt(i);
|
||||
if (unlikely(addr.size != 16))
|
||||
throw Exception("Expected key to be FixedString(16)", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
uintptr_t slot = btrie_find_a6(trie, reinterpret_cast<const uint8_t *>(addr.data));
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic warning "-Wold-style-cast"
|
||||
out[i] = (slot != BTRIE_NULL);
|
||||
#pragma GCC diagnostic pop
|
||||
}
|
||||
}
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
template <typename Getter, typename KeyType>
|
||||
static void trieTraverse(const btrie_t * trie, Getter && getter)
|
||||
{
|
||||
KeyType key = 0;
|
||||
const KeyType high_bit = ~((~key) >> 1);
|
||||
|
||||
btrie_node_t * node;
|
||||
node = trie->root;
|
||||
|
||||
std::stack<btrie_node_t *> stack;
|
||||
while (node)
|
||||
{
|
||||
stack.push(node);
|
||||
node = node->left;
|
||||
}
|
||||
|
||||
auto get_bit = [&high_bit](size_t size) { return size ? (high_bit >> (size - 1)) : 0; };
|
||||
|
||||
while (!stack.empty())
|
||||
{
|
||||
node = stack.top();
|
||||
stack.pop();
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic warning "-Wold-style-cast"
|
||||
if (node && node->value != BTRIE_NULL)
|
||||
#pragma GCC diagnostic pop
|
||||
getter(key, stack.size());
|
||||
|
||||
if (node && node->right)
|
||||
{
|
||||
stack.push(nullptr);
|
||||
key |= get_bit(stack.size());
|
||||
stack.push(node->right);
|
||||
while (stack.top()->left)
|
||||
stack.push(stack.top()->left);
|
||||
}
|
||||
else
|
||||
key &= ~get_bit(stack.size());
|
||||
}
|
||||
}
|
||||
|
||||
Columns TrieDictionary::getKeyColumns() const
|
||||
{
|
||||
auto ip_column = ColumnFixedString::create(IPV6_BINARY_LENGTH);
|
||||
auto mask_column = ColumnVector<UInt8>::create();
|
||||
|
||||
#if defined(__SIZEOF_INT128__)
|
||||
auto getter = [&ip_column, &mask_column](__uint128_t ip, size_t mask)
|
||||
{
|
||||
Poco::UInt64 * ip_array = reinterpret_cast<Poco::UInt64 *>(&ip); // Poco:: for old poco + macos
|
||||
ip_array[0] = Poco::ByteOrder::fromNetwork(ip_array[0]);
|
||||
ip_array[1] = Poco::ByteOrder::fromNetwork(ip_array[1]);
|
||||
std::swap(ip_array[0], ip_array[1]);
|
||||
ip_column->insertData(reinterpret_cast<const char *>(ip_array), IPV6_BINARY_LENGTH);
|
||||
mask_column->insertValue(static_cast<UInt8>(mask));
|
||||
};
|
||||
|
||||
trieTraverse<decltype(getter), __uint128_t>(trie, std::move(getter));
|
||||
#else
|
||||
throw Exception("TrieDictionary::getKeyColumns is not implemented for 32bit arch", ErrorCodes::NOT_IMPLEMENTED);
|
||||
#endif
|
||||
return {std::move(ip_column), std::move(mask_column)};
|
||||
}
|
||||
|
||||
BlockInputStreamPtr TrieDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
|
||||
{
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<TrieDictionary, UInt64>;
|
||||
|
||||
auto get_keys = [](const Columns & columns, const std::vector<DictionaryAttribute> & dict_attributes)
|
||||
{
|
||||
const auto & attr = dict_attributes.front();
|
||||
return ColumnsWithTypeAndName(
|
||||
{ColumnWithTypeAndName(columns.front(), std::make_shared<DataTypeFixedString>(IPV6_BINARY_LENGTH), attr.name)});
|
||||
};
|
||||
auto get_view = [](const Columns & columns, const std::vector<DictionaryAttribute> & dict_attributes)
|
||||
{
|
||||
auto column = ColumnString::create();
|
||||
const auto & ip_column = assert_cast<const ColumnFixedString &>(*columns.front());
|
||||
const auto & mask_column = assert_cast<const ColumnVector<UInt8> &>(*columns.back());
|
||||
char buffer[48];
|
||||
for (size_t row : ext::range(0, ip_column.size()))
|
||||
{
|
||||
UInt8 mask = mask_column.getElement(row);
|
||||
char * ptr = buffer;
|
||||
formatIPv6(reinterpret_cast<const unsigned char *>(ip_column.getDataAt(row).data), ptr);
|
||||
*(ptr - 1) = '/';
|
||||
ptr = itoa(mask, ptr);
|
||||
column->insertData(buffer, ptr - buffer);
|
||||
}
|
||||
return ColumnsWithTypeAndName{
|
||||
ColumnWithTypeAndName(std::move(column), std::make_shared<DataTypeString>(), dict_attributes.front().name)};
|
||||
};
|
||||
return std::make_shared<BlockInputStreamType>(
|
||||
shared_from_this(), max_block_size, getKeyColumns(), column_names, std::move(get_keys), std::move(get_view));
|
||||
}
|
||||
|
||||
|
||||
void registerDictionaryTrie(DictionaryFactory & factory)
|
||||
{
|
||||
auto create_layout = [=](const std::string &,
|
||||
const DictionaryStructure & dict_struct,
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const std::string & config_prefix,
|
||||
DictionarySourcePtr source_ptr) -> DictionaryPtr
|
||||
{
|
||||
if (!dict_struct.key)
|
||||
throw Exception{"'key' is required for dictionary of layout 'ip_trie'", ErrorCodes::BAD_ARGUMENTS};
|
||||
|
||||
const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix);
|
||||
const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
|
||||
const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
|
||||
// This is specialised trie for storing IPv4 and IPv6 prefixes.
|
||||
return std::make_unique<TrieDictionary>(dict_id, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty);
|
||||
};
|
||||
factory.registerLayout("ip_trie", create_layout, true);
|
||||
}
|
||||
|
||||
}
|
@ -27,9 +27,7 @@ void registerDictionaries()
|
||||
registerDictionaryComplexKeyHashed(factory);
|
||||
registerDictionaryComplexKeyCache(factory);
|
||||
registerDictionaryComplexKeyDirect(factory);
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
registerDictionaryTrie(factory);
|
||||
#endif
|
||||
registerDictionaryFlat(factory);
|
||||
registerDictionaryHashed(factory);
|
||||
registerDictionaryCache(factory);
|
||||
|
@ -53,6 +53,7 @@ SRCS(
|
||||
FlatDictionary.cpp
|
||||
HTTPDictionarySource.cpp
|
||||
HashedDictionary.cpp
|
||||
IPAddressDictionary.cpp
|
||||
LibraryDictionarySource.cpp
|
||||
LibraryDictionarySourceExternal.cpp
|
||||
MongoDBDictionarySource.cpp
|
||||
|
@ -91,6 +91,9 @@ inline UInt32 extractToDecimalScale(const ColumnWithTypeAndName & named_column)
|
||||
return field.get<UInt32>();
|
||||
}
|
||||
|
||||
/// Function toUnixTimestamp has exactly the same implementation as toDateTime of String type.
|
||||
struct NameToUnixTimestamp { static constexpr auto name = "toUnixTimestamp"; };
|
||||
|
||||
|
||||
/** Conversion of number types to each other, enums to numbers, dates and datetimes to numbers and back: done by straight assignment.
|
||||
* (Date is represented internally as number of days from some day; DateTime - as unix timestamp)
|
||||
@ -111,6 +114,13 @@ struct ConvertImpl
|
||||
using ColVecFrom = typename FromDataType::ColumnType;
|
||||
using ColVecTo = typename ToDataType::ColumnType;
|
||||
|
||||
if (std::is_same_v<Name, NameToUnixTimestamp>)
|
||||
{
|
||||
if (isDate(named_from.type))
|
||||
throw Exception("Illegal column " + named_from.column->getName() + " of first argument of function " + Name::name,
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
|
||||
if constexpr ((IsDataTypeDecimal<FromDataType> || IsDataTypeDecimal<ToDataType>)
|
||||
&& !(std::is_same_v<DataTypeDateTime64, FromDataType> || std::is_same_v<DataTypeDateTime64, ToDataType>))
|
||||
{
|
||||
@ -923,9 +933,6 @@ struct ConvertImplGenericFromString
|
||||
};
|
||||
|
||||
|
||||
/// Function toUnixTimestamp has exactly the same implementation as toDateTime of String type.
|
||||
struct NameToUnixTimestamp { static constexpr auto name = "toUnixTimestamp"; };
|
||||
|
||||
template <>
|
||||
struct ConvertImpl<DataTypeString, DataTypeUInt32, NameToUnixTimestamp>
|
||||
: ConvertImpl<DataTypeString, DataTypeDateTime, NameToUnixTimestamp> {};
|
||||
|
@ -37,7 +37,7 @@
|
||||
#include <Dictionaries/ComplexKeyCacheDictionary.h>
|
||||
#include <Dictionaries/ComplexKeyDirectDictionary.h>
|
||||
#include <Dictionaries/RangeHashedDictionary.h>
|
||||
#include <Dictionaries/TrieDictionary.h>
|
||||
#include <Dictionaries/IPAddressDictionary.h>
|
||||
#include <Dictionaries/PolygonDictionaryImplementations.h>
|
||||
#include <Dictionaries/DirectDictionary.h>
|
||||
|
||||
@ -192,7 +192,7 @@ private:
|
||||
|| (res = executeDispatchComplex<SSDComplexKeyCacheDictionary>(arguments, dict))
|
||||
#endif
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
|| (res = executeDispatchComplex<TrieDictionary>(arguments, dict))
|
||||
|| (res = executeDispatchComplex<IPAddressDictionary>(arguments, dict))
|
||||
#endif
|
||||
|| (res = executeDispatchComplex<PolygonDictionarySimple>(arguments, dict))
|
||||
|| (res = executeDispatchComplex<PolygonDictionaryIndexEach>(arguments, dict))
|
||||
@ -346,7 +346,7 @@ private:
|
||||
|| (res = executeDispatchComplex<SSDComplexKeyCacheDictionary>(arguments, dict))
|
||||
#endif
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
|| (res = executeDispatchComplex<TrieDictionary>(arguments, dict))
|
||||
|| (res = executeDispatchComplex<IPAddressDictionary>(arguments, dict))
|
||||
#endif
|
||||
|| (res = executeDispatchComplex<PolygonDictionarySimple>(arguments, dict))
|
||||
|| (res = executeDispatchComplex<PolygonDictionaryIndexEach>(arguments, dict))
|
||||
@ -524,7 +524,7 @@ private:
|
||||
|| (res = executeDispatchComplex<SSDComplexKeyCacheDictionary>(arguments, dict))
|
||||
#endif
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
|| (res = executeDispatchComplex<TrieDictionary>(arguments, dict))
|
||||
|| (res = executeDispatchComplex<IPAddressDictionary>(arguments, dict))
|
||||
#endif
|
||||
|| (res = executeDispatchComplex<PolygonDictionarySimple>(arguments, dict))
|
||||
|| (res = executeDispatchComplex<PolygonDictionaryIndexEach>(arguments, dict))
|
||||
@ -861,7 +861,7 @@ private:
|
||||
|| (res = executeDispatchComplex<SSDComplexKeyCacheDictionary>(arguments, dict))
|
||||
#endif
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
|| (res = executeDispatchComplex<TrieDictionary>(arguments, dict))
|
||||
|| (res = executeDispatchComplex<IPAddressDictionary>(arguments, dict))
|
||||
#endif
|
||||
|| (res = executeDispatchComplex<PolygonDictionarySimple>(arguments, dict))
|
||||
|| (res = executeDispatchComplex<PolygonDictionaryIndexEach>(arguments, dict))
|
||||
@ -1116,7 +1116,7 @@ private:
|
||||
|| (res = executeDispatchComplex<SSDComplexKeyCacheDictionary>(arguments, dict))
|
||||
#endif
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
|| (res = executeDispatchComplex<TrieDictionary>(arguments, dict))
|
||||
|| (res = executeDispatchComplex<IPAddressDictionary>(arguments, dict))
|
||||
#endif
|
||||
|| (res = executeDispatchComplex<PolygonDictionarySimple>(arguments, dict))
|
||||
|| (res = executeDispatchComplex<PolygonDictionaryIndexEach>(arguments, dict))
|
||||
|
@ -1177,7 +1177,7 @@ void InterpreterSelectQuery::executeFetchColumns(
|
||||
const auto & func = desc.function;
|
||||
std::optional<UInt64> num_rows{};
|
||||
if (!query.prewhere() && !query.where())
|
||||
num_rows = storage->totalRows();
|
||||
num_rows = storage->totalRows(settings);
|
||||
else // It's possible to optimize count() given only partition predicates
|
||||
{
|
||||
SelectQueryInfo temp_query_info;
|
||||
|
@ -463,7 +463,7 @@ public:
|
||||
/// - For total_rows column in system.tables
|
||||
///
|
||||
/// Does takes underlying Storage (if any) into account.
|
||||
virtual std::optional<UInt64> totalRows() const { return {}; }
|
||||
virtual std::optional<UInt64> totalRows(const Settings &) const { return {}; }
|
||||
|
||||
/// Same as above but also take partition predicate into account.
|
||||
virtual std::optional<UInt64> totalRowsByPartitionPredicate(const SelectQueryInfo &, const Context &) const { return {}; }
|
||||
@ -481,7 +481,7 @@ public:
|
||||
/// Memory part should be estimated as a resident memory size.
|
||||
/// In particular, alloctedBytes() is preferable over bytes()
|
||||
/// when considering in-memory blocks.
|
||||
virtual std::optional<UInt64> totalBytes() const { return {}; }
|
||||
virtual std::optional<UInt64> totalBytes(const Settings &) const { return {}; }
|
||||
|
||||
/// Number of rows INSERTed since server start.
|
||||
///
|
||||
|
@ -36,34 +36,31 @@ double IBackgroundJobExecutor::getSleepRandomAdd()
|
||||
return std::uniform_real_distribution<double>(0, sleep_settings.task_sleep_seconds_when_no_work_random_part)(rng);
|
||||
}
|
||||
|
||||
void IBackgroundJobExecutor::scheduleTask(bool job_done, bool with_backoff)
|
||||
void IBackgroundJobExecutor::runTaskWithoutDelay()
|
||||
{
|
||||
if (job_done)
|
||||
{
|
||||
no_work_done_count = 0;
|
||||
/// We have background jobs, schedule task as soon as possible
|
||||
scheduling_task->schedule();
|
||||
no_work_done_count = 0;
|
||||
/// We have background jobs, schedule task as soon as possible
|
||||
scheduling_task->schedule();
|
||||
}
|
||||
|
||||
void IBackgroundJobExecutor::scheduleTask(bool with_backoff)
|
||||
{
|
||||
size_t next_time_to_execute;
|
||||
if (with_backoff)
|
||||
{
|
||||
auto no_work_done_times = no_work_done_count.fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
next_time_to_execute = 1000 * (std::min(
|
||||
sleep_settings.task_sleep_seconds_when_no_work_max,
|
||||
sleep_settings.thread_sleep_seconds_if_nothing_to_do * std::pow(sleep_settings.task_sleep_seconds_when_no_work_multiplier, no_work_done_times))
|
||||
+ getSleepRandomAdd());
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t next_time_to_execute;
|
||||
if (with_backoff)
|
||||
{
|
||||
auto no_work_done_times = no_work_done_count.fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
next_time_to_execute = 1000 * (std::min(
|
||||
sleep_settings.task_sleep_seconds_when_no_work_max,
|
||||
sleep_settings.thread_sleep_seconds_if_nothing_to_do * std::pow(sleep_settings.task_sleep_seconds_when_no_work_multiplier, no_work_done_times))
|
||||
+ getSleepRandomAdd());
|
||||
}
|
||||
else
|
||||
{
|
||||
next_time_to_execute = 1000 * sleep_settings.thread_sleep_seconds_if_nothing_to_do;
|
||||
}
|
||||
|
||||
scheduling_task->scheduleAfter(next_time_to_execute, false);
|
||||
next_time_to_execute = 1000 * sleep_settings.thread_sleep_seconds_if_nothing_to_do;
|
||||
}
|
||||
|
||||
scheduling_task->scheduleAfter(next_time_to_execute, false);
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -105,42 +102,42 @@ try
|
||||
/// Job done, decrement metric and reset no_work counter
|
||||
CurrentMetrics::values[pool_config.tasks_metric]--;
|
||||
/// Job done, new empty space in pool, schedule background task
|
||||
scheduleTask(true);
|
||||
runTaskWithoutDelay();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
CurrentMetrics::values[pool_config.tasks_metric]--;
|
||||
scheduleTask(false);
|
||||
scheduleTask(/* with_backoff = */ true);
|
||||
}
|
||||
});
|
||||
/// We've scheduled task in the background pool and when it will finish we will be triggered again. But this task can be
|
||||
/// extremely long and we may have a lot of other small tasks to do, so we schedule ourselves here.
|
||||
scheduleTask(true);
|
||||
runTaskWithoutDelay();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
/// With our Pool settings scheduleOrThrowOnError shouldn't throw exceptions, but for safety catch added here
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
CurrentMetrics::values[pool_config.tasks_metric]--;
|
||||
scheduleTask(false);
|
||||
scheduleTask(/* with_backoff = */ true);
|
||||
}
|
||||
}
|
||||
else /// Pool is full and we have some work to do
|
||||
{
|
||||
scheduleTask(false, /* with_backoff = */ false);
|
||||
scheduleTask(/* with_backoff = */ false);
|
||||
}
|
||||
}
|
||||
else /// Nothing to do, no jobs
|
||||
{
|
||||
scheduleTask(false);
|
||||
scheduleTask(/* with_backoff = */ true);
|
||||
}
|
||||
|
||||
}
|
||||
catch (...) /// Exception while we looking for a task, reschedule
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
scheduleTask(false);
|
||||
scheduleTask(/* with_backoff = */ true);
|
||||
}
|
||||
|
||||
void IBackgroundJobExecutor::start()
|
||||
|
@ -117,7 +117,9 @@ private:
|
||||
/// Function that executes in background scheduling pool
|
||||
void jobExecutingTask();
|
||||
/// Recalculate timeouts when we have to check for a new job
|
||||
void scheduleTask(bool job_done, bool with_backoff=false);
|
||||
void scheduleTask(bool with_backoff);
|
||||
/// Run background task as fast as possible and reset errors counter
|
||||
void runTaskWithoutDelay();
|
||||
/// Return random add for sleep in case of error
|
||||
double getSleepRandomAdd();
|
||||
};
|
||||
|
@ -867,13 +867,13 @@ void StorageBuffer::checkAlterIsPossible(const AlterCommands & commands, const S
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<UInt64> StorageBuffer::totalRows() const
|
||||
std::optional<UInt64> StorageBuffer::totalRows(const Settings & settings) const
|
||||
{
|
||||
std::optional<UInt64> underlying_rows;
|
||||
auto underlying = DatabaseCatalog::instance().tryGetTable(destination_id, global_context);
|
||||
|
||||
if (underlying)
|
||||
underlying_rows = underlying->totalRows();
|
||||
underlying_rows = underlying->totalRows(settings);
|
||||
if (!underlying_rows)
|
||||
return underlying_rows;
|
||||
|
||||
@ -886,7 +886,7 @@ std::optional<UInt64> StorageBuffer::totalRows() const
|
||||
return rows + *underlying_rows;
|
||||
}
|
||||
|
||||
std::optional<UInt64> StorageBuffer::totalBytes() const
|
||||
std::optional<UInt64> StorageBuffer::totalBytes(const Settings & /*settings*/) const
|
||||
{
|
||||
UInt64 bytes = 0;
|
||||
for (const auto & buffer : buffers)
|
||||
|
@ -109,8 +109,8 @@ public:
|
||||
/// The structure of the subordinate table is not checked and does not change.
|
||||
void alter(const AlterCommands & params, const Context & context, TableLockHolder & table_lock_holder) override;
|
||||
|
||||
std::optional<UInt64> totalRows() const override;
|
||||
std::optional<UInt64> totalBytes() const override;
|
||||
std::optional<UInt64> totalRows(const Settings & settings) const override;
|
||||
std::optional<UInt64> totalBytes(const Settings & settings) const override;
|
||||
|
||||
std::optional<UInt64> lifetimeRows() const override { return writes.rows; }
|
||||
std::optional<UInt64> lifetimeBytes() const override { return writes.bytes; }
|
||||
|
@ -102,8 +102,8 @@ HashJoinPtr StorageJoin::getJoin(std::shared_ptr<TableJoin> analyzed_join) const
|
||||
void StorageJoin::insertBlock(const Block & block) { join->addJoinedBlock(block, true); }
|
||||
|
||||
size_t StorageJoin::getSize() const { return join->getTotalRowCount(); }
|
||||
std::optional<UInt64> StorageJoin::totalRows() const { return join->getTotalRowCount(); }
|
||||
std::optional<UInt64> StorageJoin::totalBytes() const { return join->getTotalByteCount(); }
|
||||
std::optional<UInt64> StorageJoin::totalRows(const Settings &) const { return join->getTotalRowCount(); }
|
||||
std::optional<UInt64> StorageJoin::totalBytes(const Settings &) const { return join->getTotalByteCount(); }
|
||||
|
||||
|
||||
void registerStorageJoin(StorageFactory & factory)
|
||||
|
@ -46,8 +46,8 @@ public:
|
||||
size_t max_block_size,
|
||||
unsigned num_streams) override;
|
||||
|
||||
std::optional<UInt64> totalRows() const override;
|
||||
std::optional<UInt64> totalBytes() const override;
|
||||
std::optional<UInt64> totalRows(const Settings & settings) const override;
|
||||
std::optional<UInt64> totalBytes(const Settings & settings) const override;
|
||||
|
||||
private:
|
||||
Block sample_block;
|
||||
|
@ -1,9 +1,11 @@
|
||||
#include <cassert>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
|
||||
#include <Storages/StorageMemory.h>
|
||||
#include <Interpreters/MutationsInterpreter.h>
|
||||
#include <Storages/StorageFactory.h>
|
||||
#include <Storages/StorageMemory.h>
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Processors/Sources/SourceWithProgress.h>
|
||||
@ -21,7 +23,7 @@ namespace ErrorCodes
|
||||
|
||||
class MemorySource : public SourceWithProgress
|
||||
{
|
||||
using InitializerFunc = std::function<void(BlocksList::const_iterator &, size_t &)>;
|
||||
using InitializerFunc = std::function<void(BlocksList::const_iterator &, size_t &, std::shared_ptr<const BlocksList> &)>;
|
||||
public:
|
||||
/// Blocks are stored in std::list which may be appended in another thread.
|
||||
/// We use pointer to the beginning of the list and its current size.
|
||||
@ -34,11 +36,13 @@ public:
|
||||
size_t num_blocks_,
|
||||
const StorageMemory & storage,
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
InitializerFunc initializer_func_ = [](BlocksList::const_iterator &, size_t &) {})
|
||||
std::shared_ptr<const BlocksList> data_,
|
||||
InitializerFunc initializer_func_ = [](BlocksList::const_iterator &, size_t &, std::shared_ptr<const BlocksList> &) {})
|
||||
: SourceWithProgress(metadata_snapshot->getSampleBlockForColumns(column_names_, storage.getVirtuals(), storage.getStorageID()))
|
||||
, column_names(std::move(column_names_))
|
||||
, current_it(first_)
|
||||
, num_blocks(num_blocks_)
|
||||
, data(data_)
|
||||
, initializer_func(std::move(initializer_func_))
|
||||
{
|
||||
}
|
||||
@ -50,7 +54,7 @@ protected:
|
||||
{
|
||||
if (!postponed_init_done)
|
||||
{
|
||||
initializer_func(current_it, num_blocks);
|
||||
initializer_func(current_it, num_blocks, data);
|
||||
postponed_init_done = true;
|
||||
}
|
||||
|
||||
@ -77,6 +81,7 @@ private:
|
||||
size_t num_blocks;
|
||||
size_t current_block_idx = 0;
|
||||
|
||||
std::shared_ptr<const BlocksList> data;
|
||||
bool postponed_init_done = false;
|
||||
InitializerFunc initializer_func;
|
||||
};
|
||||
@ -102,7 +107,9 @@ public:
|
||||
metadata_snapshot->check(block, true);
|
||||
{
|
||||
std::lock_guard lock(storage.mutex);
|
||||
storage.data.push_back(block);
|
||||
auto new_data = std::make_unique<BlocksList>(*(storage.data.get()));
|
||||
new_data->push_back(block);
|
||||
storage.data.set(std::move(new_data));
|
||||
|
||||
storage.total_size_bytes.fetch_add(size_bytes_diff, std::memory_order_relaxed);
|
||||
storage.total_size_rows.fetch_add(size_rows_diff, std::memory_order_relaxed);
|
||||
@ -116,7 +123,7 @@ private:
|
||||
|
||||
|
||||
StorageMemory::StorageMemory(const StorageID & table_id_, ColumnsDescription columns_description_, ConstraintsDescription constraints_)
|
||||
: IStorage(table_id_)
|
||||
: IStorage(table_id_), data(std::make_unique<const BlocksList>())
|
||||
{
|
||||
StorageInMemoryMetadata storage_metadata;
|
||||
storage_metadata.setColumns(std::move(columns_description_));
|
||||
@ -146,30 +153,31 @@ Pipe StorageMemory::read(
|
||||
/// set for IN or hash table for JOIN, which can't be done concurrently.
|
||||
/// Since no other manipulation with data is done, multiple sources shouldn't give any profit.
|
||||
|
||||
return Pipe(
|
||||
std::make_shared<MemorySource>(
|
||||
column_names, data.end(), 0, *this, metadata_snapshot,
|
||||
/// This hack is needed for global subqueries.
|
||||
/// It allows to set up this Source for read AFTER Storage::read() has been called and just before actual reading
|
||||
[this](BlocksList::const_iterator & current_it, size_t & num_blocks)
|
||||
{
|
||||
std::lock_guard guard(mutex);
|
||||
current_it = data.begin();
|
||||
num_blocks = data.size();
|
||||
}
|
||||
));
|
||||
return Pipe(std::make_shared<MemorySource>(
|
||||
column_names,
|
||||
data.get()->end(),
|
||||
0,
|
||||
*this,
|
||||
metadata_snapshot,
|
||||
data.get(),
|
||||
[this](BlocksList::const_iterator & current_it, size_t & num_blocks, std::shared_ptr<const BlocksList> & current_data)
|
||||
{
|
||||
current_data = data.get();
|
||||
current_it = current_data->begin();
|
||||
num_blocks = current_data->size();
|
||||
}));
|
||||
}
|
||||
|
||||
std::lock_guard lock(mutex);
|
||||
auto current_data = data.get();
|
||||
|
||||
size_t size = data.size();
|
||||
size_t size = current_data->size();
|
||||
|
||||
if (num_streams > size)
|
||||
num_streams = size;
|
||||
|
||||
Pipes pipes;
|
||||
|
||||
BlocksList::const_iterator it = data.begin();
|
||||
BlocksList::const_iterator it = current_data->begin();
|
||||
|
||||
size_t offset = 0;
|
||||
for (size_t stream = 0; stream < num_streams; ++stream)
|
||||
@ -179,7 +187,7 @@ Pipe StorageMemory::read(
|
||||
|
||||
assert(num_blocks > 0);
|
||||
|
||||
pipes.emplace_back(std::make_shared<MemorySource>(column_names, it, num_blocks, *this, metadata_snapshot));
|
||||
pipes.emplace_back(std::make_shared<MemorySource>(column_names, it, num_blocks, *this, metadata_snapshot, current_data));
|
||||
|
||||
while (offset < next_offset)
|
||||
{
|
||||
@ -200,37 +208,99 @@ BlockOutputStreamPtr StorageMemory::write(const ASTPtr & /*query*/, const Storag
|
||||
|
||||
void StorageMemory::drop()
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
data.clear();
|
||||
data.set(std::make_unique<BlocksList>());
|
||||
total_size_bytes.store(0, std::memory_order_relaxed);
|
||||
total_size_rows.store(0, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
static inline void updateBlockData(Block & old_block, const Block & new_block)
|
||||
{
|
||||
for (const auto & it : new_block)
|
||||
{
|
||||
auto col_name = it.name;
|
||||
auto & col_with_type_name = old_block.getByName(col_name);
|
||||
col_with_type_name.column = it.column;
|
||||
}
|
||||
}
|
||||
|
||||
void StorageMemory::mutate(const MutationCommands & commands, const Context & context)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
auto metadata_snapshot = getInMemoryMetadataPtr();
|
||||
auto storage = getStorageID();
|
||||
auto storage_ptr = DatabaseCatalog::instance().getTable(storage, context);
|
||||
auto interpreter = std::make_unique<MutationsInterpreter>(storage_ptr, metadata_snapshot, commands, context, true);
|
||||
auto in = interpreter->execute();
|
||||
|
||||
in->readPrefix();
|
||||
BlocksList out;
|
||||
Block block;
|
||||
while ((block = in->read()))
|
||||
{
|
||||
out.push_back(block);
|
||||
}
|
||||
in->readSuffix();
|
||||
|
||||
std::unique_ptr<BlocksList> new_data;
|
||||
|
||||
// all column affected
|
||||
if (interpreter->isAffectingAllColumns())
|
||||
{
|
||||
new_data = std::make_unique<BlocksList>(out);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// just some of the column affected, we need update it with new column
|
||||
new_data = std::make_unique<BlocksList>(*(data.get()));
|
||||
auto data_it = new_data->begin();
|
||||
auto out_it = out.begin();
|
||||
|
||||
while (data_it != new_data->end())
|
||||
{
|
||||
/// Mutation does not change the number of blocks
|
||||
assert(out_it != out.end());
|
||||
|
||||
updateBlockData(*data_it, *out_it);
|
||||
++data_it;
|
||||
++out_it;
|
||||
}
|
||||
|
||||
assert(out_it == out.end());
|
||||
}
|
||||
|
||||
size_t rows = 0;
|
||||
size_t bytes = 0;
|
||||
for (const auto & buffer : *new_data)
|
||||
{
|
||||
rows += buffer.rows();
|
||||
bytes += buffer.bytes();
|
||||
}
|
||||
total_size_bytes.store(rows, std::memory_order_relaxed);
|
||||
total_size_rows.store(bytes, std::memory_order_relaxed);
|
||||
data.set(std::move(new_data));
|
||||
}
|
||||
|
||||
|
||||
void StorageMemory::truncate(
|
||||
const ASTPtr &, const StorageMetadataPtr &, const Context &, TableExclusiveLockHolder &)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
data.clear();
|
||||
data.set(std::make_unique<BlocksList>());
|
||||
total_size_bytes.store(0, std::memory_order_relaxed);
|
||||
total_size_rows.store(0, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
|
||||
std::optional<UInt64> StorageMemory::totalRows() const
|
||||
std::optional<UInt64> StorageMemory::totalRows(const Settings &) const
|
||||
{
|
||||
/// All modifications of these counters are done under mutex which automatically guarantees synchronization/consistency
|
||||
/// When run concurrently we are fine with any value: "before" or "after"
|
||||
return total_size_rows.load(std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
|
||||
std::optional<UInt64> StorageMemory::totalBytes() const
|
||||
std::optional<UInt64> StorageMemory::totalBytes(const Settings &) const
|
||||
{
|
||||
return total_size_bytes.load(std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
|
||||
void registerStorageMemory(StorageFactory & factory)
|
||||
{
|
||||
factory.registerStorage("Memory", [](const StorageFactory::Arguments & args)
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <Storages/IStorage.h>
|
||||
#include <DataStreams/IBlockOutputStream.h>
|
||||
|
||||
#include <Common/MultiVersion.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -27,7 +28,7 @@ friend struct ext::shared_ptr_helper<StorageMemory>;
|
||||
public:
|
||||
String getName() const override { return "Memory"; }
|
||||
|
||||
size_t getSize() const { return data.size(); }
|
||||
size_t getSize() const { return data.get()->size(); }
|
||||
|
||||
Pipe read(
|
||||
const Names & column_names,
|
||||
@ -44,10 +45,12 @@ public:
|
||||
|
||||
void drop() override;
|
||||
|
||||
void mutate(const MutationCommands & commands, const Context & context) override;
|
||||
|
||||
void truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableExclusiveLockHolder &) override;
|
||||
|
||||
std::optional<UInt64> totalRows() const override;
|
||||
std::optional<UInt64> totalBytes() const override;
|
||||
std::optional<UInt64> totalRows(const Settings &) const override;
|
||||
std::optional<UInt64> totalBytes(const Settings &) const override;
|
||||
|
||||
/** Delays initialization of StorageMemory::read() until the first read is actually happen.
|
||||
* Usually, fore code like this:
|
||||
@ -87,8 +90,8 @@ public:
|
||||
void delayReadForGlobalSubqueries() { delay_read_for_global_subqueries = true; }
|
||||
|
||||
private:
|
||||
/// The data itself. `list` - so that when inserted to the end, the existing iterators are not invalidated.
|
||||
BlocksList data;
|
||||
/// MultiVersion data storage, so that we can copy the list of blocks to readers.
|
||||
MultiVersion<BlocksList> data;
|
||||
|
||||
mutable std::mutex mutex;
|
||||
|
||||
|
@ -202,7 +202,7 @@ Pipe StorageMergeTree::read(
|
||||
return plan.convertToPipe();
|
||||
}
|
||||
|
||||
std::optional<UInt64> StorageMergeTree::totalRows() const
|
||||
std::optional<UInt64> StorageMergeTree::totalRows(const Settings &) const
|
||||
{
|
||||
return getTotalActiveSizeInRows();
|
||||
}
|
||||
@ -223,7 +223,7 @@ std::optional<UInt64> StorageMergeTree::totalRowsByPartitionPredicate(const Sele
|
||||
return res;
|
||||
}
|
||||
|
||||
std::optional<UInt64> StorageMergeTree::totalBytes() const
|
||||
std::optional<UInt64> StorageMergeTree::totalBytes(const Settings &) const
|
||||
{
|
||||
return getTotalActiveSizeInBytes();
|
||||
}
|
||||
|
@ -56,9 +56,9 @@ public:
|
||||
size_t max_block_size,
|
||||
unsigned num_streams) override;
|
||||
|
||||
std::optional<UInt64> totalRows() const override;
|
||||
std::optional<UInt64> totalRows(const Settings &) const override;
|
||||
std::optional<UInt64> totalRowsByPartitionPredicate(const SelectQueryInfo &, const Context &) const override;
|
||||
std::optional<UInt64> totalBytes() const override;
|
||||
std::optional<UInt64> totalBytes(const Settings &) const override;
|
||||
|
||||
BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override;
|
||||
|
||||
|
@ -45,11 +45,11 @@ public:
|
||||
|
||||
void alter(const AlterCommands & params, const Context & context, TableLockHolder & table_lock_holder) override;
|
||||
|
||||
std::optional<UInt64> totalRows() const override
|
||||
std::optional<UInt64> totalRows(const Settings &) const override
|
||||
{
|
||||
return {0};
|
||||
}
|
||||
std::optional<UInt64> totalBytes() const override
|
||||
std::optional<UInt64> totalBytes(const Settings &) const override
|
||||
{
|
||||
return {0};
|
||||
}
|
||||
|
@ -148,8 +148,8 @@ public:
|
||||
bool storesDataOnDisk() const override { return getNested()->storesDataOnDisk(); }
|
||||
Strings getDataPaths() const override { return getNested()->getDataPaths(); }
|
||||
StoragePolicyPtr getStoragePolicy() const override { return getNested()->getStoragePolicy(); }
|
||||
std::optional<UInt64> totalRows() const override { return getNested()->totalRows(); }
|
||||
std::optional<UInt64> totalBytes() const override { return getNested()->totalBytes(); }
|
||||
std::optional<UInt64> totalRows(const Settings & settings) const override { return getNested()->totalRows(settings); }
|
||||
std::optional<UInt64> totalBytes(const Settings & settings) const override { return getNested()->totalBytes(settings); }
|
||||
std::optional<UInt64> lifetimeRows() const override { return getNested()->lifetimeRows(); }
|
||||
std::optional<UInt64> lifetimeBytes() const override { return getNested()->lifetimeBytes(); }
|
||||
|
||||
|
@ -3742,27 +3742,37 @@ Pipe StorageReplicatedMergeTree::read(
|
||||
|
||||
|
||||
template <class Func>
|
||||
void StorageReplicatedMergeTree::foreachCommittedParts(const Func & func) const
|
||||
void StorageReplicatedMergeTree::foreachCommittedParts(Func && func, bool select_sequential_consistency) const
|
||||
{
|
||||
auto max_added_blocks = getMaxAddedBlocks();
|
||||
std::optional<ReplicatedMergeTreeQuorumAddedParts::PartitionIdToMaxBlock> max_added_blocks = {};
|
||||
|
||||
/**
|
||||
* Synchronously go to ZooKeeper when select_sequential_consistency enabled
|
||||
*/
|
||||
if (select_sequential_consistency)
|
||||
max_added_blocks = getMaxAddedBlocks();
|
||||
|
||||
auto lock = lockParts();
|
||||
for (const auto & part : getDataPartsStateRange(DataPartState::Committed))
|
||||
{
|
||||
if (part->isEmpty())
|
||||
continue;
|
||||
|
||||
auto blocks_iterator = max_added_blocks.find(part->info.partition_id);
|
||||
if (blocks_iterator == max_added_blocks.end() || part->info.max_block > blocks_iterator->second)
|
||||
continue;
|
||||
if (max_added_blocks)
|
||||
{
|
||||
auto blocks_iterator = max_added_blocks->find(part->info.partition_id);
|
||||
if (blocks_iterator == max_added_blocks->end() || part->info.max_block > blocks_iterator->second)
|
||||
continue;
|
||||
}
|
||||
|
||||
func(part);
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<UInt64> StorageReplicatedMergeTree::totalRows() const
|
||||
std::optional<UInt64> StorageReplicatedMergeTree::totalRows(const Settings & settings) const
|
||||
{
|
||||
UInt64 res = 0;
|
||||
foreachCommittedParts([&res](auto & part) { res += part->rows_count; });
|
||||
foreachCommittedParts([&res](auto & part) { res += part->rows_count; }, settings.select_sequential_consistency);
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -3777,14 +3787,14 @@ std::optional<UInt64> StorageReplicatedMergeTree::totalRowsByPartitionPredicate(
|
||||
{
|
||||
if (!partition_pruner.canBePruned(part))
|
||||
res += part->rows_count;
|
||||
});
|
||||
}, context.getSettingsRef().select_sequential_consistency);
|
||||
return res;
|
||||
}
|
||||
|
||||
std::optional<UInt64> StorageReplicatedMergeTree::totalBytes() const
|
||||
std::optional<UInt64> StorageReplicatedMergeTree::totalBytes(const Settings & settings) const
|
||||
{
|
||||
UInt64 res = 0;
|
||||
foreachCommittedParts([&res](auto & part) { res += part->getBytesOnDisk(); });
|
||||
foreachCommittedParts([&res](auto & part) { res += part->getBytesOnDisk(); }, settings.select_sequential_consistency);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -107,9 +107,9 @@ public:
|
||||
size_t max_block_size,
|
||||
unsigned num_streams) override;
|
||||
|
||||
std::optional<UInt64> totalRows() const override;
|
||||
std::optional<UInt64> totalRows(const Settings & settings) const override;
|
||||
std::optional<UInt64> totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, const Context & context) const override;
|
||||
std::optional<UInt64> totalBytes() const override;
|
||||
std::optional<UInt64> totalBytes(const Settings & settings) const override;
|
||||
|
||||
BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override;
|
||||
|
||||
@ -326,7 +326,7 @@ private:
|
||||
const size_t replicated_fetches_pool_size;
|
||||
|
||||
template <class Func>
|
||||
void foreachCommittedParts(const Func & func) const;
|
||||
void foreachCommittedParts(Func && func, bool select_sequential_consistency) const;
|
||||
|
||||
/** Creates the minimum set of nodes in ZooKeeper and create first replica.
|
||||
* Returns true if was created, false if exists.
|
||||
|
@ -153,8 +153,8 @@ void StorageSet::insertBlock(const Block & block) { set->insertFromBlock(block);
|
||||
void StorageSet::finishInsert() { set->finishInsert(); }
|
||||
|
||||
size_t StorageSet::getSize() const { return set->getTotalRowCount(); }
|
||||
std::optional<UInt64> StorageSet::totalRows() const { return set->getTotalRowCount(); }
|
||||
std::optional<UInt64> StorageSet::totalBytes() const { return set->getTotalByteCount(); }
|
||||
std::optional<UInt64> StorageSet::totalRows(const Settings &) const { return set->getTotalRowCount(); }
|
||||
std::optional<UInt64> StorageSet::totalBytes(const Settings &) const { return set->getTotalByteCount(); }
|
||||
|
||||
void StorageSet::truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableExclusiveLockHolder &)
|
||||
{
|
||||
|
@ -73,8 +73,8 @@ public:
|
||||
|
||||
void truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableExclusiveLockHolder &) override;
|
||||
|
||||
std::optional<UInt64> totalRows() const override;
|
||||
std::optional<UInt64> totalBytes() const override;
|
||||
std::optional<UInt64> totalRows(const Settings & settings) const override;
|
||||
std::optional<UInt64> totalBytes(const Settings & settings) const override;
|
||||
|
||||
private:
|
||||
SetPtr set;
|
||||
|
@ -429,7 +429,7 @@ protected:
|
||||
if (columns_mask[src_index++])
|
||||
{
|
||||
assert(table != nullptr);
|
||||
auto total_rows = table->totalRows();
|
||||
auto total_rows = table->totalRows(context.getSettingsRef());
|
||||
if (total_rows)
|
||||
res_columns[res_index++]->insert(*total_rows);
|
||||
else
|
||||
@ -439,7 +439,7 @@ protected:
|
||||
if (columns_mask[src_index++])
|
||||
{
|
||||
assert(table != nullptr);
|
||||
auto total_bytes = table->totalBytes();
|
||||
auto total_bytes = table->totalBytes(context.getSettingsRef());
|
||||
if (total_bytes)
|
||||
res_columns[res_index++]->insert(*total_bytes);
|
||||
else
|
||||
|
@ -112,7 +112,6 @@ def get_db_engine(args):
|
||||
return "" # Will use default engine
|
||||
|
||||
def run_single_test(args, ext, server_logs_level, client_options, case_file, stdout_file, stderr_file):
|
||||
|
||||
# print(client_options)
|
||||
|
||||
if args.database:
|
||||
@ -149,10 +148,12 @@ def run_single_test(args, ext, server_logs_level, client_options, case_file, std
|
||||
pattern = "{client} --send_logs_level={logs_level} --testmode --multiquery {options} < " + pattern
|
||||
|
||||
command = pattern.format(**params)
|
||||
#print(command)
|
||||
|
||||
# print(command)
|
||||
|
||||
proc = Popen(command, shell=True, env=os.environ)
|
||||
start_time = datetime.now()
|
||||
|
||||
while (datetime.now() - start_time).total_seconds() < args.timeout and proc.poll() is None:
|
||||
sleep(0.01)
|
||||
|
||||
@ -317,6 +318,7 @@ def run_tests_array(all_tests_with_params):
|
||||
stderr_file = os.path.join(suite_tmp_dir, name) + '.stderr'
|
||||
|
||||
proc, stdout, stderr, total_time = run_single_test(args, ext, server_logs_level, client_options, case_file, stdout_file, stderr_file)
|
||||
|
||||
if proc.returncode is None:
|
||||
try:
|
||||
proc.kill()
|
||||
@ -347,7 +349,7 @@ def run_tests_array(all_tests_with_params):
|
||||
if stderr:
|
||||
print(stderr)
|
||||
|
||||
# Stop on fatal errors like segmentation fault. They are send to client via logs.
|
||||
# Stop on fatal errors like segmentation fault. They are sent to client via logs.
|
||||
if ' <Fatal> ' in stderr:
|
||||
SERVER_DIED = True
|
||||
|
||||
@ -486,7 +488,7 @@ def collect_build_flags(client):
|
||||
elif b'-fsanitize=memory' in stdout:
|
||||
result.append(BuildFlags.MEMORY)
|
||||
else:
|
||||
raise Exception("Cannot get inforamtion about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr))
|
||||
raise Exception("Cannot get information about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr))
|
||||
|
||||
clickhouse_proc = Popen(shlex.split(client), stdin=PIPE, stdout=PIPE, stderr=PIPE)
|
||||
(stdout, stderr) = clickhouse_proc.communicate(b"SELECT value FROM system.build_options WHERE name = 'BUILD_TYPE'")
|
||||
@ -497,7 +499,7 @@ def collect_build_flags(client):
|
||||
elif b'RelWithDebInfo' in stdout or b'Release' in stdout:
|
||||
result.append(BuildFlags.RELEASE)
|
||||
else:
|
||||
raise Exception("Cannot get inforamtion about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr))
|
||||
raise Exception("Cannot get information about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr))
|
||||
|
||||
clickhouse_proc = Popen(shlex.split(client), stdin=PIPE, stdout=PIPE, stderr=PIPE)
|
||||
(stdout, stderr) = clickhouse_proc.communicate(b"SELECT value FROM system.build_options WHERE name = 'UNBUNDLED'")
|
||||
@ -506,7 +508,7 @@ def collect_build_flags(client):
|
||||
if b'ON' in stdout or b'1' in stdout:
|
||||
result.append(BuildFlags.UNBUNDLED)
|
||||
else:
|
||||
raise Exception("Cannot get inforamtion about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr))
|
||||
raise Exception("Cannot get information about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr))
|
||||
|
||||
clickhouse_proc = Popen(shlex.split(client), stdin=PIPE, stdout=PIPE, stderr=PIPE)
|
||||
(stdout, stderr) = clickhouse_proc.communicate(b"SELECT value FROM system.settings WHERE name = 'default_database_engine'")
|
||||
@ -515,7 +517,7 @@ def collect_build_flags(client):
|
||||
if b'Ordinary' in stdout:
|
||||
result.append(BuildFlags.DATABASE_ORDINARY)
|
||||
else:
|
||||
raise Exception("Cannot get inforamtion about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr))
|
||||
raise Exception("Cannot get information about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr))
|
||||
|
||||
clickhouse_proc = Popen(shlex.split(client), stdin=PIPE, stdout=PIPE, stderr=PIPE)
|
||||
(stdout, stderr) = clickhouse_proc.communicate(b"SELECT value FROM system.merge_tree_settings WHERE name = 'min_bytes_for_wide_part'")
|
||||
@ -544,8 +546,12 @@ def main(args):
|
||||
return stdout.startswith(b'1')
|
||||
|
||||
if not check_server_started(args.client, args.server_check_retries):
|
||||
raise Exception("clickhouse-server is not responding. Cannot execute 'SELECT 1' query.")
|
||||
raise Exception(
|
||||
"Server is not responding. Cannot execute 'SELECT 1' query. \
|
||||
Note: if you are using unbundled mode, you also have to specify -c option.")
|
||||
|
||||
build_flags = collect_build_flags(args.client)
|
||||
|
||||
if args.use_skip_list:
|
||||
tests_to_skip_from_list = collect_tests_to_skip(args.skip_list_path, build_flags)
|
||||
else:
|
||||
@ -790,8 +796,13 @@ if __name__ == '__main__':
|
||||
parser=ArgumentParser(description='ClickHouse functional tests')
|
||||
parser.add_argument('-q', '--queries', help='Path to queries dir')
|
||||
parser.add_argument('--tmp', help='Path to tmp dir')
|
||||
parser.add_argument('-b', '--binary', default='clickhouse', help='Path to clickhouse binary or name of binary in PATH')
|
||||
parser.add_argument('-c', '--client', help='Client program')
|
||||
|
||||
parser.add_argument('-b', '--binary', default='clickhouse',
|
||||
help='Path to clickhouse (if bundled, clickhouse-server otherwise) binary or name of binary in PATH')
|
||||
|
||||
parser.add_argument('-c', '--client',
|
||||
help='Path to clickhouse-client (if unbundled, useless otherwise) binary of name of binary in PATH')
|
||||
|
||||
parser.add_argument('--extract_from_config', help='extract-from-config program')
|
||||
parser.add_argument('--configclient', help='Client config (if you use not default ports)')
|
||||
parser.add_argument('--configserver', default= '/etc/clickhouse-server/config.xml', help='Preprocessed server config')
|
||||
@ -865,10 +876,14 @@ if __name__ == '__main__':
|
||||
if args.client is None:
|
||||
if find_binary(args.binary + '-client'):
|
||||
args.client = args.binary + '-client'
|
||||
|
||||
print("Using " + args.client + " as client program (expecting unbundled mode)")
|
||||
elif find_binary(args.binary):
|
||||
args.client = args.binary + ' client'
|
||||
|
||||
print("Using " + args.client + " as client program (expecting bundled mode)")
|
||||
else:
|
||||
print("No 'clickhouse' binary found in PATH", file=sys.stderr)
|
||||
print("No 'clickhouse' or 'clickhouse-client' client binary found", file=sys.stderr)
|
||||
parser.print_help()
|
||||
exit(1)
|
||||
|
||||
|
@ -344,8 +344,6 @@
|
||||
"TABLE"
|
||||
"TABLES"
|
||||
"TEMPORARY"
|
||||
"timeSeriesGroupRateSum"
|
||||
"timeSeriesGroupSum"
|
||||
"TIMESTAMP"
|
||||
"TIMESTAMP_ADD"
|
||||
"TIMESTAMPADD"
|
||||
|
@ -13,7 +13,7 @@
|
||||
<host>zoo3</host>
|
||||
<port>2181</port>
|
||||
</node>
|
||||
<session_timeout_ms>2000</session_timeout_ms>
|
||||
<session_timeout_ms>20000</session_timeout_ms>
|
||||
</zookeeper>
|
||||
</yandex>
|
||||
|
@ -62,14 +62,16 @@ def test_reload_zookeeper(start_cluster):
|
||||
|
||||
## stop all zookeepers, table will be readonly
|
||||
cluster.stop_zookeeper_nodes(["zoo1", "zoo2", "zoo3"])
|
||||
node.query("SELECT COUNT() FROM test_table")
|
||||
with pytest.raises(QueryRuntimeException):
|
||||
node.query("SELECT COUNT() FROM test_table")
|
||||
node.query("SELECT COUNT() FROM test_table", settings={"select_sequential_consistency" : 1})
|
||||
|
||||
## start zoo2, zoo3, table will be readonly too, because it only connect to zoo1
|
||||
cluster.start_zookeeper_nodes(["zoo2", "zoo3"])
|
||||
wait_zookeeper_node_to_start(["zoo2", "zoo3"])
|
||||
node.query("SELECT COUNT() FROM test_table")
|
||||
with pytest.raises(QueryRuntimeException):
|
||||
node.query("SELECT COUNT() FROM test_table")
|
||||
node.query("SELECT COUNT() FROM test_table", settings={"select_sequential_consistency" : 1})
|
||||
|
||||
## set config to zoo2, server will be normal
|
||||
new_config = """
|
||||
|
38
tests/performance/avg_weighted.xml
Normal file
38
tests/performance/avg_weighted.xml
Normal file
@ -0,0 +1,38 @@
|
||||
<test>
|
||||
<settings>
|
||||
<allow_experimental_bigint_types>1</allow_experimental_bigint_types>
|
||||
<max_threads>1</max_threads>
|
||||
<max_insert_threads>8</max_insert_threads>
|
||||
</settings>
|
||||
|
||||
<preconditions>
|
||||
<table_exists>hits_100m_single</table_exists>
|
||||
</preconditions>
|
||||
|
||||
<create_query>CREATE TABLE perf_avg(
|
||||
num UInt64,
|
||||
num_u Decimal256(75) DEFAULT toDecimal256(num / 400000, 75),
|
||||
num_f Float64 DEFAULT num / 100
|
||||
) ENGINE = MergeTree() ORDER BY num
|
||||
</create_query>
|
||||
|
||||
<fill_query>
|
||||
INSERT INTO perf_avg(num)
|
||||
SELECT toUInt64(UserID / (WatchID + 1) * 1000000)
|
||||
FROM hits_100m_single
|
||||
LIMIT 50000000
|
||||
</fill_query>
|
||||
|
||||
<query>SELECT avg(num) FROM perf_avg FORMAT Null</query>
|
||||
<query>SELECT avg(2 * num) FROM perf_avg FORMAT Null</query>
|
||||
<query>SELECT avg(num_u) FROM perf_avg FORMAT Null</query>
|
||||
<query>SELECT avg(num_f) FROM perf_avg FORMAT Null</query>
|
||||
<query>SELECT avgWeighted(num_f, num) FROM perf_avg FORMAT Null</query>
|
||||
<query>SELECT avgWeighted(num_f, num_f) FROM perf_avg FORMAT Null</query>
|
||||
<query>SELECT avgWeighted(num_f, num_u) FROM perf_avg FORMAT Null</query>
|
||||
<query>SELECT avgWeighted(num_u, num_f) FROM perf_avg FORMAT Null</query>
|
||||
<query>SELECT avgWeighted(num_u, num) FROM perf_avg FORMAT Null</query>
|
||||
<query>SELECT avgWeighted(num_u, num_u) FROM perf_avg FORMAT Null</query>
|
||||
|
||||
<drop_query>DROP TABLE IF EXISTS perf_avg</drop_query>
|
||||
</test>
|
@ -13,7 +13,6 @@
|
||||
<value>toMonday</value>
|
||||
<value>toRelativeDayNum</value>
|
||||
<value>toYYYYMMDDhhmmss</value>
|
||||
<value>toUnixTimestamp</value>
|
||||
</values>
|
||||
</substitution>
|
||||
<substitution>
|
||||
@ -33,8 +32,15 @@
|
||||
</substitution>
|
||||
</substitutions>
|
||||
|
||||
<!-- date_transform -->
|
||||
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {date_transform}(t, '{time_zone}'))</query>
|
||||
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(toDate('2017-01-01') + number % 1000 + rand() % 10 AS t, {date_transform}(t))</query>
|
||||
|
||||
<!-- toUnixTimestamp() -->
|
||||
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, toUnixTimestamp(t, '{time_zone}'))</query>
|
||||
<!-- toUnixTimestamp(Date()) is prohibit, wrap Date() with toUInt16() to overcome -->
|
||||
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(toDate('2017-01-01') + number % 1000 + rand() % 10 AS t, toUnixTimestamp(toUInt16(t)))</query>
|
||||
|
||||
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {binary_function}(t, 1))</query>
|
||||
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, toStartOfInterval(t, INTERVAL 1 month))</query>
|
||||
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, date_trunc('month', t))</query>
|
||||
|
89
tests/performance/ip_trie.xml
Normal file
89
tests/performance/ip_trie.xml
Normal file
@ -0,0 +1,89 @@
|
||||
<test>
|
||||
<create_query>
|
||||
CREATE TABLE table_ip_trie
|
||||
(
|
||||
ip String,
|
||||
ver UInt8,
|
||||
val Float32
|
||||
) ENGINE = TinyLog
|
||||
</create_query>
|
||||
|
||||
<create_query>
|
||||
INSERT INTO table_ip_trie
|
||||
SELECT
|
||||
IPv4NumToString(ipv4) || '/' || toString(rand() % 17 + 16) as ip,
|
||||
4 as ver,
|
||||
val
|
||||
FROM generateRandom('ipv4 UInt32, val Float32', 0, 30, 30)
|
||||
LIMIT 200000
|
||||
</create_query>
|
||||
|
||||
<create_query>
|
||||
INSERT INTO table_ip_trie
|
||||
SELECT
|
||||
IPv6NumToString(ipv6) || '/' || toString(rand() % 65 + 64) as ip,
|
||||
6 as ver,
|
||||
val
|
||||
FROM generateRandom('ipv6 FixedString(16), val Float32', 0, 30, 30)
|
||||
LIMIT 2500000
|
||||
</create_query>
|
||||
|
||||
<create_query>
|
||||
CREATE DICTIONARY dict_ip_trie
|
||||
(
|
||||
ip String,
|
||||
ver UInt8,
|
||||
val Float32
|
||||
)
|
||||
PRIMARY KEY ip
|
||||
SOURCE(CLICKHOUSE(DB 'default' TABLE 'table_ip_trie'))
|
||||
LAYOUT(IP_TRIE())
|
||||
LIFETIME(300)
|
||||
</create_query>
|
||||
|
||||
<create_query>
|
||||
CREATE TABLE dict_ip_trie_table
|
||||
(
|
||||
`ip` String,
|
||||
`ver` UInt8,
|
||||
`val` Float32
|
||||
) ENGINE = Dictionary(default.dict_ip_trie)
|
||||
</create_query>
|
||||
|
||||
<create_query>
|
||||
CREATE TABLE table_ip_from_dict (`ip` String, `ver` UInt8) ENGINE = TinyLog
|
||||
</create_query>
|
||||
<create_query>
|
||||
INSERT INTO table_ip_from_dict
|
||||
SELECT splitByChar('/', ip )[1] as ip, ver FROM dict_ip_trie_table
|
||||
</create_query>
|
||||
|
||||
<query>
|
||||
SELECT dictGetFloat32('default.dict_ip_trie', 'val', tuple(rand32()))
|
||||
FROM numbers(500000) FORMAT Null
|
||||
</query>
|
||||
|
||||
<query>
|
||||
SELECT dictGetFloat32('default.dict_ip_trie', 'val', tuple(randomFixedString(16)))
|
||||
FROM numbers(500000) FORMAT Null
|
||||
</query>
|
||||
|
||||
<query>
|
||||
SELECT dictGetFloat32('default.dict_ip_trie', 'val', tuple(IPv6StringToNum(ip)))
|
||||
FROM table_ip_from_dict
|
||||
WHERE ver == 4
|
||||
LIMIT 500000 FORMAT Null
|
||||
</query>
|
||||
|
||||
<query>
|
||||
SELECT dictGetFloat32('default.dict_ip_trie', 'val', tuple(IPv6StringToNum(ip)))
|
||||
FROM table_ip_from_dict
|
||||
WHERE ver == 6
|
||||
LIMIT 500000 FORMAT Null
|
||||
</query>
|
||||
|
||||
<drop_query>DROP DICTIONARY IF EXISTS default.dict_ip_trie</drop_query>
|
||||
<drop_query>DROP TABLE IF EXISTS table_ip_trie</drop_query>
|
||||
<drop_query>DROP TABLE IF EXISTS dict_ip_trie_table</drop_query>
|
||||
<drop_query>DROP TABLE IF EXISTS table_ip_from_dict</drop_query>
|
||||
</test>
|
@ -5,9 +5,6 @@
|
||||
-1275.0000 -424.99999983 -255.00000000 -1275.0000 -424.99999983 -255.00000000
|
||||
101.0000 101.00000000 101.00000000 101.0000 101.00000000 101.00000000
|
||||
-101.0000 -101.00000000 -101.00000000 -101.0000 -101.00000000 -101.00000000
|
||||
0.0000 0.00000000 0.00000000
|
||||
25.5000 8.49999999 5.10000000
|
||||
-25.5000 -8.49999999 -5.10000000
|
||||
(101,101,101) (101,101,101) (101,101,101) (101,101,101) (102,100,101)
|
||||
5 5 5
|
||||
10 10 10
|
||||
|
@ -20,10 +20,6 @@ SELECT sum(a), sum(b), sum(c), sumWithOverflow(a), sumWithOverflow(b), sumWithOv
|
||||
SELECT sum(a+1), sum(b+1), sum(c+1), sumWithOverflow(a+1), sumWithOverflow(b+1), sumWithOverflow(c+1) FROM decimal;
|
||||
SELECT sum(a-1), sum(b-1), sum(c-1), sumWithOverflow(a-1), sumWithOverflow(b-1), sumWithOverflow(c-1) FROM decimal;
|
||||
|
||||
SELECT avg(a), avg(b), avg(c) FROM decimal;
|
||||
SELECT avg(a), avg(b), avg(c) FROM decimal WHERE a > 0;
|
||||
SELECT avg(a), avg(b), avg(c) FROM decimal WHERE a < 0;
|
||||
|
||||
SELECT (uniq(a), uniq(b), uniq(c)),
|
||||
(uniqCombined(a), uniqCombined(b), uniqCombined(c)),
|
||||
(uniqCombined(17)(a), uniqCombined(17)(b), uniqCombined(17)(c)),
|
||||
|
@ -5,9 +5,6 @@
|
||||
0.0000 0.0000000 0.00000000 0.0000 0.0000000 0.00000000
|
||||
0.0000 0.0000000 0.00000000 0.0000 0.0000000 0.00000000
|
||||
0.0000 0.0000000 0.00000000 0.0000 0.0000000 0.00000000
|
||||
0.0000 0.0000000 0.00000000 Decimal(9, 4) Decimal(18, 7) Decimal(38, 8)
|
||||
0.0000 0.0000000 0.00000000 Decimal(9, 4) Decimal(18, 7) Decimal(38, 8)
|
||||
0.0000 0.0000000 0.00000000 Decimal(9, 4) Decimal(18, 7) Decimal(38, 8)
|
||||
(0,0,0) (0,0,0) (0,0,0) (0,0,0) (0,0,0)
|
||||
0 0 0
|
||||
0 0 0
|
||||
|
@ -16,10 +16,6 @@ SELECT sum(a), sum(b), sum(c), sumWithOverflow(a), sumWithOverflow(b), sumWithOv
|
||||
SELECT sum(a+1), sum(b+1), sum(c+1), sumWithOverflow(a+1), sumWithOverflow(b+1), sumWithOverflow(c+1) FROM decimal;
|
||||
SELECT sum(a-1), sum(b-1), sum(c-1), sumWithOverflow(a-1), sumWithOverflow(b-1), sumWithOverflow(c-1) FROM decimal;
|
||||
|
||||
SELECT avg(a) as aa, avg(b) as ab, avg(c) as ac, toTypeName(aa), toTypeName(ab),toTypeName(ac) FROM decimal;
|
||||
SELECT avg(a) as aa, avg(b) as ab, avg(c) as ac, toTypeName(aa), toTypeName(ab),toTypeName(ac) FROM decimal WHERE a > 0;
|
||||
SELECT avg(a) as aa, avg(b) as ab, avg(c) as ac, toTypeName(aa), toTypeName(ab),toTypeName(ac) FROM decimal WHERE a < 0;
|
||||
|
||||
SELECT (uniq(a), uniq(b), uniq(c)),
|
||||
(uniqCombined(a), uniqCombined(b), uniqCombined(c)),
|
||||
(uniqCombined(17)(a), uniqCombined(17)(b), uniqCombined(17)(c)),
|
||||
|
@ -1,2 +0,0 @@
|
||||
[(2,0.2),(3,0.8999999999999999),(7,2.0999999999999996),(8,2.4),(12,3.5999999999999996),(17,5.1000000000000005),(18,5.4),(24,7.199999999999999),(25,2.5)]
|
||||
[(2,0),(3,0.09999999999999999),(7,0.3),(8,0.30000000000000004),(12,0.29999999999999993),(17,0.30000000000000004),(18,0.30000000000000004),(24,0.29999999999999993),(25,0.1)]
|
@ -1,10 +0,0 @@
|
||||
drop table if exists tsgroupsum_test;
|
||||
|
||||
create table tsgroupsum_test (uid UInt64, ts Int64, value Float64) engine=Memory;
|
||||
insert into tsgroupsum_test values (1,2,0.2),(1,7,0.7),(1,12,1.2),(1,17,1.7),(1,25,2.5);
|
||||
insert into tsgroupsum_test values (2,3,0.6),(2,8,1.6),(2,12,2.4),(2,18,3.6),(2,24,4.8);
|
||||
|
||||
select timeSeriesGroupSum(uid, ts, value) from (select * from tsgroupsum_test order by ts asc);
|
||||
select timeSeriesGroupRateSum(uid, ts, value) from (select * from tsgroupsum_test order by ts asc);
|
||||
|
||||
drop table tsgroupsum_test;
|
@ -4,6 +4,6 @@
|
||||
[499500.00]
|
||||
[499500.00000]
|
||||
[499500.0000000000]
|
||||
1545081300 [('ed87e57c-9331-462a-80b4-9f0c005e88c8',0.4400)]
|
||||
1545081300 [('ed87e57c-9331-462a-80b4-9f0c005e88c8',0.44)]
|
||||
4341757 5657967 2018-11-01 16:47:46 txt 321.380000000000 315.080000000000 0.000000000000 2018-11-02 00:00:00
|
||||
4360430 5681495 2018-11-02 09:00:07 txt 274.350000000000 268.970000000000 0.000000000000 2018-11-02 00:00:00
|
||||
|
@ -53,7 +53,8 @@ Code: 43
|
||||
"UInt8",11
|
||||
------------------------------------------
|
||||
SELECT toUnixTimestamp(N)
|
||||
"UInt32",18155
|
||||
|
||||
Code: 44
|
||||
"UInt32",1568650811
|
||||
"UInt32",1568650811
|
||||
------------------------------------------
|
||||
|
@ -8,9 +8,6 @@
|
||||
0.42
|
||||
0.46
|
||||
0
|
||||
***ip trie dict***
|
||||
17501
|
||||
NP
|
||||
***hierarchy dict***
|
||||
Moscow
|
||||
[3,2,1,10000]
|
||||
|
@ -72,33 +72,6 @@ SELECT dictGetFloat64('database_for_dict.dict2', 'Tax', toUInt64(1), toDateTime(
|
||||
SELECT dictGetFloat64('database_for_dict.dict2', 'Tax', toUInt64(2), toDateTime('2019-05-29 00:00:00'));
|
||||
SELECT dictGetFloat64('database_for_dict.dict2', 'Tax', toUInt64(2), toDateTime('2019-05-31 00:00:00'));
|
||||
|
||||
SELECT '***ip trie dict***';
|
||||
|
||||
CREATE TABLE database_for_dict.table_ip_trie
|
||||
(
|
||||
prefix String,
|
||||
asn UInt32,
|
||||
cca2 String
|
||||
)
|
||||
engine = TinyLog;
|
||||
|
||||
INSERT INTO database_for_dict.table_ip_trie VALUES ('202.79.32.0/20', 17501, 'NP'), ('2620:0:870::/48', 3856, 'US'), ('2a02:6b8:1::/48', 13238, 'RU'), ('2001:db8::/32', 65536, 'ZZ');
|
||||
|
||||
|
||||
CREATE DICTIONARY database_for_dict.dict_ip_trie
|
||||
(
|
||||
prefix String,
|
||||
asn UInt32,
|
||||
cca2 String
|
||||
)
|
||||
PRIMARY KEY prefix
|
||||
SOURCE(CLICKHOUSE(host 'localhost' port 9000 user 'default' db 'database_for_dict' table 'table_ip_trie'))
|
||||
LAYOUT(IP_TRIE())
|
||||
LIFETIME(MIN 10 MAX 100);
|
||||
|
||||
SELECT dictGetUInt32('database_for_dict.dict_ip_trie', 'asn', tuple(IPv4StringToNum('202.79.32.0')));
|
||||
SELECT dictGetString('database_for_dict.dict_ip_trie', 'cca2', tuple(IPv4StringToNum('202.79.32.0')));
|
||||
|
||||
SELECT '***hierarchy dict***';
|
||||
|
||||
CREATE TABLE database_for_dict.table_with_hierarchy
|
||||
|
@ -41,7 +41,7 @@ nan
|
||||
\N
|
||||
\N
|
||||
\N
|
||||
0.00
|
||||
0
|
||||
\N
|
||||
0
|
||||
\N
|
||||
|
425
tests/queries/0_stateless/01018_ip_dictionary.reference
Normal file
425
tests/queries/0_stateless/01018_ip_dictionary.reference
Normal file
@ -0,0 +1,425 @@
|
||||
***ipv4 trie dict***
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
***ipv4 trie dict mask***
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
***ipv4 trie dict pt2***
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
***ipv6 trie dict***
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
***ipv6 trie dict mask***
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user