mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-23 10:10:50 +00:00
merged with master
This commit is contained in:
commit
df087a2e98
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
[submodule "doc/presentations"]
|
||||
path = doc/presentations
|
||||
url = https://github.com/yandex/clickhouse-presentations.git
|
@ -4,7 +4,7 @@ macro(add_glob cur_list)
|
||||
endmacro()
|
||||
|
||||
macro(add_headers_and_sources prefix common_path)
|
||||
add_glob(${prefix}_headers RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h ${common_path}/*.inl)
|
||||
add_glob(${prefix}_headers RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h)
|
||||
add_glob(${prefix}_sources ${common_path}/*.cpp ${common_path}/*.h)
|
||||
endmacro()
|
||||
|
||||
|
@ -1,3 +1,3 @@
|
||||
set (LTDL_PATHS "/usr/local/opt/libtool/lib")
|
||||
find_library (LTDL_LIB ltdl PATHSS ${LTDL_PATHS})
|
||||
find_library (LTDL_LIB ltdl PATHS ${LTDL_PATHS})
|
||||
message (STATUS "Using ltdl: ${LTDL_LIB}")
|
||||
|
1
contrib/CMakeLists.txt
vendored
1
contrib/CMakeLists.txt
vendored
@ -31,6 +31,7 @@ endif ()
|
||||
add_subdirectory (libcityhash)
|
||||
add_subdirectory (libfarmhash)
|
||||
add_subdirectory (libmetrohash)
|
||||
add_subdirectory (libbtrie)
|
||||
|
||||
if (USE_INTERNAL_ZLIB_LIBRARY)
|
||||
add_subdirectory (libzlib-ng)
|
||||
|
6
contrib/libbtrie/CMakeLists.txt
Normal file
6
contrib/libbtrie/CMakeLists.txt
Normal file
@ -0,0 +1,6 @@
|
||||
include_directories (BEFORE include)
|
||||
|
||||
add_library (btrie
|
||||
src/btrie.c
|
||||
include/btrie.h
|
||||
)
|
23
contrib/libbtrie/LICENSE
Normal file
23
contrib/libbtrie/LICENSE
Normal file
@ -0,0 +1,23 @@
|
||||
Copyright (c) 2013, CobbLiu
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
155
contrib/libbtrie/include/btrie.h
Normal file
155
contrib/libbtrie/include/btrie.h
Normal file
@ -0,0 +1,155 @@
|
||||
#pragma once
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/**
|
||||
* In btrie, each leaf means one bit in ip tree.
|
||||
* Left means 0, and right means 1.
|
||||
*/
|
||||
|
||||
#define BTRIE_NULL (uintptr_t) -1
|
||||
#define MAX_PAGES 1024 * 16
|
||||
|
||||
typedef struct btrie_node_s btrie_node_t;
|
||||
|
||||
struct btrie_node_s {
|
||||
btrie_node_t *right;
|
||||
btrie_node_t *left;
|
||||
btrie_node_t *parent;
|
||||
uintptr_t value;
|
||||
};
|
||||
|
||||
|
||||
typedef struct btrie_s {
|
||||
btrie_node_t *root;
|
||||
|
||||
btrie_node_t *free; /* free list of btrie */
|
||||
char *start;
|
||||
size_t size;
|
||||
|
||||
/*
|
||||
* memory pool.
|
||||
* memory management(esp free) will be so easy by using this facility.
|
||||
*/
|
||||
char *pools[MAX_PAGES];
|
||||
size_t len;
|
||||
} btrie_t;
|
||||
|
||||
|
||||
/**
|
||||
* Create an empty btrie
|
||||
*
|
||||
* @Return:
|
||||
* An ip radix_tree created.
|
||||
* NULL if creation failed.
|
||||
*/
|
||||
|
||||
btrie_t *btrie_create();
|
||||
|
||||
/**
|
||||
* Destroy the ip radix_tree
|
||||
*
|
||||
* @Return:
|
||||
* OK if deletion succeed.
|
||||
* ERROR if error occurs while deleting.
|
||||
*/
|
||||
int btrie_destroy(btrie_t *tree);
|
||||
|
||||
/**
|
||||
* Count the nodes in the radix tree.
|
||||
*/
|
||||
size_t btrie_count(btrie_t *tree);
|
||||
|
||||
/**
|
||||
* Return the allocated number of bytes.
|
||||
*/
|
||||
size_t btrie_allocated(btrie_t *tree);
|
||||
|
||||
|
||||
/**
|
||||
* Add an ipv4 into btrie
|
||||
*
|
||||
* @Args:
|
||||
* key: ip address
|
||||
* mask: key's mask
|
||||
* value: value of this IP, may be NULL.
|
||||
*
|
||||
* @Return:
|
||||
* OK for success.
|
||||
* ERROR for failure.
|
||||
*/
|
||||
int btrie_insert(btrie_t *tree, uint32_t key, uint32_t mask,
|
||||
uintptr_t value);
|
||||
|
||||
|
||||
/**
|
||||
* Delete an ipv4 from btrie
|
||||
*
|
||||
* @Args:
|
||||
*
|
||||
* @Return:
|
||||
* OK for success.
|
||||
* ERROR for failure.
|
||||
*/
|
||||
int btrie_delete(btrie_t *tree, uint32_t key, uint32_t mask);
|
||||
|
||||
|
||||
/**
|
||||
* Find an ipv4 from btrie
|
||||
*
|
||||
|
||||
* @Args:
|
||||
*
|
||||
* @Return:
|
||||
* Value if succeed.
|
||||
* NULL if failed.
|
||||
*/
|
||||
uintptr_t btrie_find(btrie_t *tree, uint32_t key);
|
||||
|
||||
|
||||
/**
|
||||
* Add an ipv6 into btrie
|
||||
*
|
||||
* @Args:
|
||||
* key: ip address
|
||||
* mask: key's mask
|
||||
* value: value of this IP, may be NULL.
|
||||
*
|
||||
* @Return:
|
||||
* OK for success.
|
||||
* ERROR for failure.
|
||||
*/
|
||||
int btrie_insert_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask,
|
||||
uintptr_t value);
|
||||
|
||||
/**
|
||||
* Delete an ipv6 from btrie
|
||||
*
|
||||
* @Args:
|
||||
*
|
||||
* @Return:
|
||||
* OK for success.
|
||||
* ERROR for failure.
|
||||
*/
|
||||
int btrie_delete_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask);
|
||||
|
||||
/**
|
||||
* Find an ipv6 from btrie
|
||||
*
|
||||
|
||||
* @Args:
|
||||
*
|
||||
* @Return:
|
||||
* Value if succeed.
|
||||
* NULL if failed.
|
||||
*/
|
||||
uintptr_t btrie_find_a6(btrie_t *tree, const uint8_t *key);
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
460
contrib/libbtrie/src/btrie.c
Normal file
460
contrib/libbtrie/src/btrie.c
Normal file
@ -0,0 +1,460 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <btrie.h>
|
||||
|
||||
#define PAGE_SIZE 4096
|
||||
|
||||
|
||||
static btrie_node_t *
|
||||
btrie_alloc(btrie_t *tree)
|
||||
{
|
||||
btrie_node_t *p;
|
||||
|
||||
if (tree->free) {
|
||||
p = tree->free;
|
||||
tree->free = tree->free->right;
|
||||
return p;
|
||||
}
|
||||
|
||||
if (tree->size < sizeof(btrie_node_t)) {
|
||||
tree->start = (char *) calloc(sizeof(char), PAGE_SIZE);
|
||||
if (tree->start == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tree->pools[tree->len++] = tree->start;
|
||||
tree->size = PAGE_SIZE;
|
||||
}
|
||||
|
||||
p = (btrie_node_t *) tree->start;
|
||||
|
||||
tree->start += sizeof(btrie_node_t);
|
||||
tree->size -= sizeof(btrie_node_t);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
btrie_t *
|
||||
btrie_create()
|
||||
{
|
||||
btrie_t *tree = (btrie_t *) malloc(sizeof(btrie_t));
|
||||
if (tree == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tree->free = NULL;
|
||||
tree->start = NULL;
|
||||
tree->size = 0;
|
||||
memset(tree->pools, 0, sizeof(btrie_t *) * MAX_PAGES);
|
||||
tree->len = 0;
|
||||
|
||||
tree->root = btrie_alloc(tree);
|
||||
if (tree->root == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tree->root->right = NULL;
|
||||
tree->root->left = NULL;
|
||||
tree->root->parent = NULL;
|
||||
tree->root->value = BTRIE_NULL;
|
||||
|
||||
return tree;
|
||||
}
|
||||
|
||||
static size_t
|
||||
subtree_weight(btrie_node_t *node)
|
||||
{
|
||||
size_t weight = 1;
|
||||
if (node->left) {
|
||||
weight += subtree_weight(node->left);
|
||||
}
|
||||
if (node->right) {
|
||||
weight += subtree_weight(node->right);
|
||||
}
|
||||
return weight;
|
||||
}
|
||||
|
||||
size_t
|
||||
btrie_count(btrie_t *tree)
|
||||
{
|
||||
if (tree->root == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return subtree_weight(tree->root);
|
||||
}
|
||||
|
||||
size_t
|
||||
btrie_allocated(btrie_t *tree)
|
||||
{
|
||||
return tree->len * PAGE_SIZE;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
btrie_insert(btrie_t *tree, uint32_t key, uint32_t mask,
|
||||
uintptr_t value)
|
||||
{
|
||||
uint32_t bit;
|
||||
btrie_node_t *node, *next;
|
||||
|
||||
bit = 0x80000000;
|
||||
|
||||
node = tree->root;
|
||||
next = tree->root;
|
||||
|
||||
while (bit & mask) {
|
||||
if (key & bit) {
|
||||
next = node->right;
|
||||
|
||||
} else {
|
||||
next = node->left;
|
||||
}
|
||||
|
||||
if (next == NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
node = next;
|
||||
}
|
||||
|
||||
if (next) {
|
||||
if (node->value != BTRIE_NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
node->value = value;
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (bit & mask) {
|
||||
next = btrie_alloc(tree);
|
||||
if (next == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
next->right = NULL;
|
||||
next->left = NULL;
|
||||
next->parent = node;
|
||||
next->value = BTRIE_NULL;
|
||||
|
||||
if (key & bit) {
|
||||
node->right = next;
|
||||
|
||||
} else {
|
||||
node->left = next;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
node = next;
|
||||
}
|
||||
|
||||
node->value = value;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
btrie_delete(btrie_t *tree, uint32_t key, uint32_t mask)
|
||||
{
|
||||
uint32_t bit;
|
||||
btrie_node_t *node;
|
||||
|
||||
bit = 0x80000000;
|
||||
node = tree->root;
|
||||
|
||||
while (node && (bit & mask)) {
|
||||
if (key & bit) {
|
||||
node = node->right;
|
||||
|
||||
} else {
|
||||
node = node->left;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
}
|
||||
|
||||
if (node == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (node->right || node->left) {
|
||||
if (node->value != BTRIE_NULL) {
|
||||
node->value = BTRIE_NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
for ( ;; ) {
|
||||
if (node->parent->right == node) {
|
||||
node->parent->right = NULL;
|
||||
|
||||
} else {
|
||||
node->parent->left = NULL;
|
||||
}
|
||||
|
||||
node->right = tree->free;
|
||||
tree->free = node;
|
||||
|
||||
node = node->parent;
|
||||
|
||||
if (node->right || node->left) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (node->value != BTRIE_NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (node->parent == NULL) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
uintptr_t
|
||||
btrie_find(btrie_t *tree, uint32_t key)
|
||||
{
|
||||
uint32_t bit;
|
||||
uintptr_t value;
|
||||
btrie_node_t *node;
|
||||
|
||||
bit = 0x80000000;
|
||||
value = BTRIE_NULL;
|
||||
node = tree->root;
|
||||
|
||||
while (node) {
|
||||
if (node->value != BTRIE_NULL) {
|
||||
value = node->value;
|
||||
}
|
||||
|
||||
if (key & bit) {
|
||||
node = node->right;
|
||||
|
||||
} else {
|
||||
node = node->left;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
btrie_insert_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask,
|
||||
uintptr_t value)
|
||||
{
|
||||
uint8_t bit;
|
||||
uint i;
|
||||
btrie_node_t *node, *next;
|
||||
|
||||
i = 0;
|
||||
bit = 0x80;
|
||||
|
||||
node = tree->root;
|
||||
next = tree->root;
|
||||
|
||||
while (bit & mask[i]) {
|
||||
if (key[i] & bit) {
|
||||
next = node->right;
|
||||
|
||||
} else {
|
||||
next = node->left;
|
||||
}
|
||||
|
||||
if (next == NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
node = next;
|
||||
|
||||
if (bit == 0) {
|
||||
if (++i == 16) {
|
||||
break;
|
||||
}
|
||||
|
||||
bit = 0x80;
|
||||
}
|
||||
}
|
||||
|
||||
if (next) {
|
||||
if (node->value != BTRIE_NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
node->value = value;
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (bit & mask[i]) {
|
||||
next = btrie_alloc(tree);
|
||||
if (next == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
next->right = NULL;
|
||||
next->left = NULL;
|
||||
next->parent = node;
|
||||
next->value = BTRIE_NULL;
|
||||
|
||||
if (key[i] & bit) {
|
||||
node->right = next;
|
||||
|
||||
} else {
|
||||
node->left = next;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
node = next;
|
||||
|
||||
if (bit == 0) {
|
||||
if (++i == 16) {
|
||||
break;
|
||||
}
|
||||
|
||||
bit = 0x80;
|
||||
}
|
||||
}
|
||||
|
||||
node->value = value;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
btrie_delete_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask)
|
||||
{
|
||||
uint8_t bit;
|
||||
uint i;
|
||||
btrie_node_t *node;
|
||||
|
||||
i = 0;
|
||||
bit = 0x80;
|
||||
node = tree->root;
|
||||
|
||||
while (node && (bit & mask[i])) {
|
||||
if (key[i] & bit) {
|
||||
node = node->right;
|
||||
|
||||
} else {
|
||||
node = node->left;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
|
||||
if (bit == 0) {
|
||||
if (++i == 16) {
|
||||
break;
|
||||
}
|
||||
|
||||
bit = 0x80;
|
||||
}
|
||||
}
|
||||
|
||||
if (node == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (node->right || node->left) {
|
||||
if (node->value != BTRIE_NULL) {
|
||||
node->value = BTRIE_NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
for ( ;; ) {
|
||||
if (node->parent->right == node) {
|
||||
node->parent->right = NULL;
|
||||
|
||||
} else {
|
||||
node->parent->left = NULL;
|
||||
}
|
||||
|
||||
node->right = tree->free;
|
||||
tree->free = node;
|
||||
|
||||
node = node->parent;
|
||||
|
||||
if (node->right || node->left) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (node->value != BTRIE_NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (node->parent == NULL) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
uintptr_t
|
||||
btrie_find_a6(btrie_t *tree, const uint8_t *key)
|
||||
{
|
||||
uint8_t bit;
|
||||
uintptr_t value;
|
||||
uint i;
|
||||
btrie_node_t *node;
|
||||
|
||||
i = 0;
|
||||
bit = 0x80;
|
||||
value = BTRIE_NULL;
|
||||
node = tree->root;
|
||||
|
||||
while (node) {
|
||||
if (node->value != BTRIE_NULL) {
|
||||
value = node->value;
|
||||
}
|
||||
|
||||
if (key[i] & bit) {
|
||||
node = node->right;
|
||||
|
||||
} else {
|
||||
node = node->left;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
|
||||
if (bit == 0) {
|
||||
i++;
|
||||
bit = 0x80;
|
||||
}
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
btrie_destroy(btrie_t *tree)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
|
||||
/* free memory pools */
|
||||
for (i = 0; i < tree->len; i++) {
|
||||
free(tree->pools[i]);
|
||||
}
|
||||
|
||||
free(tree);
|
||||
|
||||
return 0;
|
||||
}
|
94
contrib/libbtrie/test/test_btrie.c
Normal file
94
contrib/libbtrie/test/test_btrie.c
Normal file
@ -0,0 +1,94 @@
|
||||
#include <stdio.h>
|
||||
#include <btrie.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
btrie_t *it;
|
||||
int ret;
|
||||
|
||||
uint8_t prefix_v6[16] = {0xde, 0xad, 0xbe, 0xef};
|
||||
uint8_t mask_v6[16] = {0xff, 0xff, 0xff};
|
||||
uint8_t ip_v6[16] = {0xde, 0xad, 0xbe, 0xef, 0xde};
|
||||
|
||||
it = btrie_create();
|
||||
if (it == NULL) {
|
||||
printf("create error!\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
//add 101.45.69.50/16
|
||||
ret = btrie_insert(it, 1697465650, 0xffff0000, 1);
|
||||
if (ret != 0) {
|
||||
printf("insert 1 error.\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
//add 10.45.69.50/16
|
||||
ret = btrie_insert(it, 170738994, 0xffff0000, 1);
|
||||
if (ret != 0) {
|
||||
printf("insert 2 error.\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
//add 10.45.79.50/16
|
||||
ret = btrie_insert(it, 170741554, 0xffff0000, 1);
|
||||
if (ret == 0) {
|
||||
printf("insert 3 error.\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
//add 102.45.79.50/24
|
||||
ret = btrie_insert(it, 1714245426, 0xffffff00, 1);
|
||||
if (ret != 0) {
|
||||
printf("insert 4 error.\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
ret = btrie_find(it, 170741554);
|
||||
if (ret == 1) {
|
||||
printf("test case 1 passed\n");
|
||||
} else {
|
||||
printf("test case 1 error\n");
|
||||
}
|
||||
|
||||
ret = btrie_find(it, 170786817);
|
||||
if (ret != 1) {
|
||||
printf("test case 2 passed\n");
|
||||
} else {
|
||||
printf("test case 2 error\n");
|
||||
}
|
||||
|
||||
ret = btrie_delete(it, 1714245426, 0xffffff00);
|
||||
if (ret != 0) {
|
||||
printf("delete 1 error\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
ret = btrie_find(it, 1714245426);
|
||||
if (ret != 1) {
|
||||
printf("test case 3 passed\n");
|
||||
} else {
|
||||
printf("test case 3 error\n");
|
||||
}
|
||||
|
||||
//add dead:beef::/32
|
||||
ret = btrie_insert_a6(it, prefix_v6, mask_v6, 1);
|
||||
if (ret != 0) {
|
||||
printf("insert 5 error\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
ret = btrie_find_a6(it, ip_v6);
|
||||
if (ret == 1) {
|
||||
printf("test case 4 passed\n");
|
||||
} else {
|
||||
printf("test case 4 error\n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
error:
|
||||
btrie_destroy(it);
|
||||
printf("test failed\n");
|
||||
return 1;
|
||||
}
|
@ -5,4 +5,6 @@ add_library (lz4
|
||||
src/lz4hc.c
|
||||
|
||||
include/lz4/lz4.h
|
||||
include/lz4/lz4hc.h)
|
||||
include/lz4/lz4hc.h
|
||||
include/lz4/lz4opt.h)
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
LZ4 - Fast LZ compression algorithm
|
||||
Header File
|
||||
Copyright (C) 2011-2015, Yann Collet.
|
||||
* LZ4 - Fast LZ compression algorithm
|
||||
* Header File
|
||||
* Copyright (C) 2011-2016, Yann Collet.
|
||||
|
||||
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
||||
|
||||
@ -29,34 +29,79 @@
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
You can contact the author at :
|
||||
- LZ4 source repository : https://github.com/Cyan4973/lz4
|
||||
- LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
|
||||
- LZ4 homepage : http://www.lz4.org
|
||||
- LZ4 source repository : https://github.com/lz4/lz4
|
||||
*/
|
||||
#pragma once
|
||||
#ifndef LZ4_H_2983827168210
|
||||
#define LZ4_H_2983827168210
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* lz4.h provides block compression functions, and gives full buffer control to programmer.
|
||||
* If you need to generate inter-operable compressed data (respecting LZ4 frame specification),
|
||||
* and can let the library handle its own memory, please use lz4frame.h instead.
|
||||
/* --- Dependency --- */
|
||||
#include <stddef.h> /* size_t */
|
||||
|
||||
|
||||
/**
|
||||
Introduction
|
||||
|
||||
LZ4 is lossless compression algorithm, providing compression speed at 400 MB/s per core,
|
||||
scalable with multi-cores CPU. It features an extremely fast decoder, with speed in
|
||||
multiple GB/s per core, typically reaching RAM speed limits on multi-core systems.
|
||||
|
||||
The LZ4 compression library provides in-memory compression and decompression functions.
|
||||
Compression can be done in:
|
||||
- a single step (described as Simple Functions)
|
||||
- a single step, reusing a context (described in Advanced Functions)
|
||||
- unbounded multiple steps (described as Streaming compression)
|
||||
|
||||
lz4.h provides block compression functions. It gives full buffer control to user.
|
||||
Decompressing an lz4-compressed block also requires metadata (such as compressed size).
|
||||
Each application is free to encode such metadata in whichever way it wants.
|
||||
|
||||
An additional format, called LZ4 frame specification (doc/lz4_Frame_format.md),
|
||||
take care of encoding standard metadata alongside LZ4-compressed blocks.
|
||||
If your application requires interoperability, it's recommended to use it.
|
||||
A library is provided to take care of it, see lz4frame.h.
|
||||
*/
|
||||
|
||||
/**************************************
|
||||
* Version
|
||||
**************************************/
|
||||
/*^***************************************************************
|
||||
* Export parameters
|
||||
*****************************************************************/
|
||||
/*
|
||||
* LZ4_DLL_EXPORT :
|
||||
* Enable exporting of functions when building a Windows DLL
|
||||
*/
|
||||
#if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1)
|
||||
# define LZ4LIB_API __declspec(dllexport)
|
||||
#elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1)
|
||||
# define LZ4LIB_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
|
||||
#else
|
||||
# define LZ4LIB_API
|
||||
#endif
|
||||
|
||||
|
||||
/*========== Version =========== */
|
||||
#define LZ4_VERSION_MAJOR 1 /* for breaking interface changes */
|
||||
#define LZ4_VERSION_MINOR 7 /* for new (non-breaking) interface capabilities */
|
||||
#define LZ4_VERSION_RELEASE 1 /* for tweaks, bug-fixes, or development */
|
||||
#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
|
||||
int LZ4_versionNumber (void);
|
||||
#define LZ4_VERSION_RELEASE 5 /* for tweaks, bug-fixes, or development */
|
||||
|
||||
/**************************************
|
||||
#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
|
||||
|
||||
#define LZ4_LIB_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE
|
||||
#define LZ4_QUOTE(str) #str
|
||||
#define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str)
|
||||
#define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION)
|
||||
|
||||
LZ4LIB_API int LZ4_versionNumber (void);
|
||||
LZ4LIB_API const char* LZ4_versionString (void);
|
||||
|
||||
|
||||
/*-************************************
|
||||
* Tuning parameter
|
||||
**************************************/
|
||||
/*
|
||||
/*!
|
||||
* LZ4_MEMORY_USAGE :
|
||||
* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
|
||||
* Increasing memory usage improves compression ratio
|
||||
@ -66,15 +111,10 @@ int LZ4_versionNumber (void);
|
||||
#define LZ4_MEMORY_USAGE 14
|
||||
|
||||
|
||||
/**************************************
|
||||
/*-************************************
|
||||
* Simple Functions
|
||||
**************************************/
|
||||
|
||||
int LZ4_compress_default(const char* source, char* dest, int sourceSize, int maxDestSize);
|
||||
int LZ4_decompress_safe (const char* source, char* dest, int compressedSize, int maxDecompressedSize);
|
||||
|
||||
/*
|
||||
LZ4_compress_default() :
|
||||
/*! LZ4_compress_default() :
|
||||
Compresses 'sourceSize' bytes from buffer 'source'
|
||||
into already allocated 'dest' buffer of size 'maxDestSize'.
|
||||
Compression is guaranteed to succeed if 'maxDestSize' >= LZ4_compressBound(sourceSize).
|
||||
@ -86,9 +126,10 @@ LZ4_compress_default() :
|
||||
sourceSize : Max supported value is LZ4_MAX_INPUT_VALUE
|
||||
maxDestSize : full or partial size of buffer 'dest' (which must be already allocated)
|
||||
return : the number of bytes written into buffer 'dest' (necessarily <= maxOutputSize)
|
||||
or 0 if compression fails
|
||||
or 0 if compression fails */
|
||||
LZ4LIB_API int LZ4_compress_default(const char* source, char* dest, int sourceSize, int maxDestSize);
|
||||
|
||||
LZ4_decompress_safe() :
|
||||
/*! LZ4_decompress_safe() :
|
||||
compressedSize : is the precise full size of the compressed block.
|
||||
maxDecompressedSize : is the size of destination buffer, which must be already allocated.
|
||||
return : the number of bytes decompressed into destination buffer (necessarily <= maxDecompressedSize)
|
||||
@ -97,15 +138,16 @@ LZ4_decompress_safe() :
|
||||
This function is protected against buffer overflow exploits, including malicious data packets.
|
||||
It never writes outside output buffer, nor reads outside input buffer.
|
||||
*/
|
||||
LZ4LIB_API int LZ4_decompress_safe (const char* source, char* dest, int compressedSize, int maxDecompressedSize);
|
||||
|
||||
|
||||
/**************************************
|
||||
/*-************************************
|
||||
* Advanced Functions
|
||||
**************************************/
|
||||
#define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */
|
||||
#define LZ4_COMPRESSBOUND(isize) ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
|
||||
|
||||
/*
|
||||
/*!
|
||||
LZ4_compressBound() :
|
||||
Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible)
|
||||
This function is primarily useful for memory allocation purposes (destination buffer size).
|
||||
@ -115,9 +157,9 @@ LZ4_compressBound() :
|
||||
return : maximum output size in a "worst case" scenario
|
||||
or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE)
|
||||
*/
|
||||
int LZ4_compressBound(int inputSize);
|
||||
LZ4LIB_API int LZ4_compressBound(int inputSize);
|
||||
|
||||
/*
|
||||
/*!
|
||||
LZ4_compress_fast() :
|
||||
Same as LZ4_compress_default(), but allows to select an "acceleration" factor.
|
||||
The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
|
||||
@ -125,21 +167,21 @@ LZ4_compress_fast() :
|
||||
An acceleration value of "1" is the same as regular LZ4_compress_default()
|
||||
Values <= 0 will be replaced by ACCELERATION_DEFAULT (see lz4.c), which is 1.
|
||||
*/
|
||||
int LZ4_compress_fast (const char* source, char* dest, int sourceSize, int maxDestSize, int acceleration);
|
||||
LZ4LIB_API int LZ4_compress_fast (const char* source, char* dest, int sourceSize, int maxDestSize, int acceleration);
|
||||
|
||||
|
||||
/*
|
||||
/*!
|
||||
LZ4_compress_fast_extState() :
|
||||
Same compression function, just using an externally allocated memory space to store compression state.
|
||||
Use LZ4_sizeofState() to know how much memory must be allocated,
|
||||
and allocate it on 8-bytes boundaries (using malloc() typically).
|
||||
Then, provide it as 'void* state' to compression function.
|
||||
*/
|
||||
int LZ4_sizeofState(void);
|
||||
int LZ4_compress_fast_extState (void* state, const char* source, char* dest, int inputSize, int maxDestSize, int acceleration);
|
||||
LZ4LIB_API int LZ4_sizeofState(void);
|
||||
LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* source, char* dest, int inputSize, int maxDestSize, int acceleration);
|
||||
|
||||
|
||||
/*
|
||||
/*!
|
||||
LZ4_compress_destSize() :
|
||||
Reverse the logic, by compressing as much data as possible from 'source' buffer
|
||||
into already allocated buffer 'dest' of size 'targetDestSize'.
|
||||
@ -150,10 +192,10 @@ LZ4_compress_destSize() :
|
||||
return : Nb bytes written into 'dest' (necessarily <= targetDestSize)
|
||||
or 0 if compression fails
|
||||
*/
|
||||
int LZ4_compress_destSize (const char* source, char* dest, int* sourceSizePtr, int targetDestSize);
|
||||
LZ4LIB_API int LZ4_compress_destSize (const char* source, char* dest, int* sourceSizePtr, int targetDestSize);
|
||||
|
||||
|
||||
/*
|
||||
/*!
|
||||
LZ4_decompress_fast() :
|
||||
originalSize : is the original and therefore uncompressed size
|
||||
return : the number of bytes read from the source buffer (in other words, the compressed size)
|
||||
@ -164,9 +206,9 @@ LZ4_decompress_fast() :
|
||||
However, it does not provide any protection against intentionally modified data stream (malicious input).
|
||||
Use this function in trusted environment only (data to decode comes from a trusted source).
|
||||
*/
|
||||
int LZ4_decompress_fast (const char* source, char* dest, int originalSize);
|
||||
LZ4LIB_API int LZ4_decompress_fast (const char* source, char* dest, int originalSize);
|
||||
|
||||
/*
|
||||
/*!
|
||||
LZ4_decompress_safe_partial() :
|
||||
This function decompress a compressed block of size 'compressedSize' at position 'source'
|
||||
into destination buffer 'dest' of size 'maxDecompressedSize'.
|
||||
@ -178,98 +220,73 @@ LZ4_decompress_safe_partial() :
|
||||
If the source stream is detected malformed, the function will stop decoding and return a negative result.
|
||||
This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets
|
||||
*/
|
||||
int LZ4_decompress_safe_partial (const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize);
|
||||
LZ4LIB_API int LZ4_decompress_safe_partial (const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize);
|
||||
|
||||
|
||||
/***********************************************
|
||||
/*-*********************************************
|
||||
* Streaming Compression Functions
|
||||
***********************************************/
|
||||
#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
|
||||
#define LZ4_STREAMSIZE (LZ4_STREAMSIZE_U64 * sizeof(long long))
|
||||
/*
|
||||
* LZ4_stream_t
|
||||
* information structure to track an LZ4 stream.
|
||||
* important : init this structure content before first use !
|
||||
* note : only allocated directly the structure if you are statically linking LZ4
|
||||
* If you are using liblz4 as a DLL, please use below construction methods instead.
|
||||
*/
|
||||
typedef struct { long long table[LZ4_STREAMSIZE_U64]; } LZ4_stream_t;
|
||||
typedef union LZ4_stream_u LZ4_stream_t; /* incomplete type (defined later) */
|
||||
|
||||
/*
|
||||
* LZ4_resetStream
|
||||
* Use this function to init an allocated LZ4_stream_t structure
|
||||
/*! LZ4_createStream() and LZ4_freeStream() :
|
||||
* LZ4_createStream() will allocate and initialize an `LZ4_stream_t` structure.
|
||||
* LZ4_freeStream() releases its memory.
|
||||
*/
|
||||
void LZ4_resetStream (LZ4_stream_t* streamPtr);
|
||||
LZ4LIB_API LZ4_stream_t* LZ4_createStream(void);
|
||||
LZ4LIB_API int LZ4_freeStream (LZ4_stream_t* streamPtr);
|
||||
|
||||
/*
|
||||
* LZ4_createStream will allocate and initialize an LZ4_stream_t structure
|
||||
* LZ4_freeStream releases its memory.
|
||||
* In the context of a DLL (liblz4), please use these methods rather than the static struct.
|
||||
* They are more future proof, in case of a change of LZ4_stream_t size.
|
||||
/*! LZ4_resetStream() :
|
||||
* An LZ4_stream_t structure can be allocated once and re-used multiple times.
|
||||
* Use this function to init an allocated `LZ4_stream_t` structure and start a new compression.
|
||||
*/
|
||||
LZ4_stream_t* LZ4_createStream(void);
|
||||
int LZ4_freeStream (LZ4_stream_t* streamPtr);
|
||||
LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr);
|
||||
|
||||
/*
|
||||
* LZ4_loadDict
|
||||
/*! LZ4_loadDict() :
|
||||
* Use this function to load a static dictionary into LZ4_stream.
|
||||
* Any previous data will be forgotten, only 'dictionary' will remain in memory.
|
||||
* Loading a size of 0 is allowed.
|
||||
* Return : dictionary size, in bytes (necessarily <= 64 KB)
|
||||
*/
|
||||
int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);
|
||||
LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);
|
||||
|
||||
/*
|
||||
* LZ4_compress_fast_continue
|
||||
/*! LZ4_compress_fast_continue() :
|
||||
* Compress buffer content 'src', using data from previously compressed blocks as dictionary to improve compression ratio.
|
||||
* Important : Previous data blocks are assumed to still be present and unmodified !
|
||||
* 'dst' buffer must be already allocated.
|
||||
* If maxDstSize >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
|
||||
* If not, and if compressed data cannot fit into 'dst' buffer size, compression stops, and function returns a zero.
|
||||
*/
|
||||
int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int maxDstSize, int acceleration);
|
||||
LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int maxDstSize, int acceleration);
|
||||
|
||||
/*
|
||||
* LZ4_saveDict
|
||||
* If previously compressed data block is not guaranteed to remain available at its memory location
|
||||
* save it into a safer place (char* safeBuffer)
|
||||
/*! LZ4_saveDict() :
|
||||
* If previously compressed data block is not guaranteed to remain available at its memory location,
|
||||
* save it into a safer place (char* safeBuffer).
|
||||
* Note : you don't need to call LZ4_loadDict() afterwards,
|
||||
* dictionary is immediately usable, you can therefore call LZ4_compress_fast_continue()
|
||||
* Return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error
|
||||
* dictionary is immediately usable, you can therefore call LZ4_compress_fast_continue().
|
||||
* Return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error.
|
||||
*/
|
||||
int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int dictSize);
|
||||
LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int dictSize);
|
||||
|
||||
|
||||
/************************************************
|
||||
/*-**********************************************
|
||||
* Streaming Decompression Functions
|
||||
* Bufferless synchronous API
|
||||
************************************************/
|
||||
typedef union LZ4_streamDecode_u LZ4_streamDecode_t; /* incomplete type (defined later) */
|
||||
|
||||
#define LZ4_STREAMDECODESIZE_U64 4
|
||||
#define LZ4_STREAMDECODESIZE (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long))
|
||||
typedef struct { unsigned long long table[LZ4_STREAMDECODESIZE_U64]; } LZ4_streamDecode_t;
|
||||
/*
|
||||
* LZ4_streamDecode_t
|
||||
* information structure to track an LZ4 stream.
|
||||
* init this structure content using LZ4_setStreamDecode or memset() before first use !
|
||||
*
|
||||
* In the context of a DLL (liblz4) please prefer usage of construction methods below.
|
||||
* They are more future proof, in case of a change of LZ4_streamDecode_t size in the future.
|
||||
* LZ4_createStreamDecode will allocate and initialize an LZ4_streamDecode_t structure
|
||||
* LZ4_freeStreamDecode releases its memory.
|
||||
*/
|
||||
LZ4_streamDecode_t* LZ4_createStreamDecode(void);
|
||||
int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
|
||||
/* creation / destruction of streaming decompression tracking structure */
|
||||
LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void);
|
||||
LZ4LIB_API int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
|
||||
|
||||
/*
|
||||
* LZ4_setStreamDecode
|
||||
/*! LZ4_setStreamDecode() :
|
||||
* Use this function to instruct where to find the dictionary.
|
||||
* Setting a size of 0 is allowed (same effect as reset).
|
||||
* Return : 1 if OK, 0 if error
|
||||
* @return : 1 if OK, 0 if error
|
||||
*/
|
||||
int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
|
||||
LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
|
||||
|
||||
/*
|
||||
*_continue() :
|
||||
/*!
|
||||
LZ4_decompress_*_continue() :
|
||||
These decoding functions allow decompression of multiple blocks in "streaming" mode.
|
||||
Previously decoded blocks *must* remain available at the memory position where they were decoded (up to 64 KB)
|
||||
In the case of a ring buffers, decoding buffer must be either :
|
||||
@ -285,35 +302,120 @@ int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dicti
|
||||
Whenever these conditions are not possible, save the last 64KB of decoded data into a safe buffer,
|
||||
and indicate where it is saved using LZ4_setStreamDecode()
|
||||
*/
|
||||
int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxDecompressedSize);
|
||||
int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize);
|
||||
LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxDecompressedSize);
|
||||
LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize);
|
||||
|
||||
|
||||
/*
|
||||
Advanced decoding functions :
|
||||
*_usingDict() :
|
||||
These decoding functions work the same as
|
||||
a combination of LZ4_setStreamDecode() followed by LZ4_decompress_x_continue()
|
||||
They are stand-alone. They don't need nor update an LZ4_streamDecode_t structure.
|
||||
/*! LZ4_decompress_*_usingDict() :
|
||||
* These decoding functions work the same as
|
||||
* a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue()
|
||||
* They are stand-alone, and don't need an LZ4_streamDecode_t structure.
|
||||
*/
|
||||
int LZ4_decompress_safe_usingDict (const char* source, char* dest, int compressedSize, int maxDecompressedSize, const char* dictStart, int dictSize);
|
||||
int LZ4_decompress_fast_usingDict (const char* source, char* dest, int originalSize, const char* dictStart, int dictSize);
|
||||
LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* source, char* dest, int compressedSize, int maxDecompressedSize, const char* dictStart, int dictSize);
|
||||
LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* source, char* dest, int originalSize, const char* dictStart, int dictSize);
|
||||
|
||||
|
||||
/*^**********************************************
|
||||
* !!!!!! STATIC LINKING ONLY !!!!!!
|
||||
***********************************************/
|
||||
/*-************************************
|
||||
* Private definitions
|
||||
**************************************
|
||||
* Do not use these definitions.
|
||||
* They are exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`.
|
||||
* Using these definitions will expose code to API and/or ABI break in future versions of the library.
|
||||
**************************************/
|
||||
#define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2)
|
||||
#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
|
||||
#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG) /* required as macro for static allocation */
|
||||
|
||||
/**************************************
|
||||
#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
|
||||
#include <stdint.h>
|
||||
|
||||
typedef struct {
|
||||
uint32_t hashTable[LZ4_HASH_SIZE_U32];
|
||||
uint32_t currentOffset;
|
||||
uint32_t initCheck;
|
||||
const uint8_t* dictionary;
|
||||
uint8_t* bufferStart; /* obsolete, used for slideInputBuffer */
|
||||
uint32_t dictSize;
|
||||
} LZ4_stream_t_internal;
|
||||
|
||||
typedef struct {
|
||||
const uint8_t* externalDict;
|
||||
size_t extDictSize;
|
||||
const uint8_t* prefixEnd;
|
||||
size_t prefixSize;
|
||||
} LZ4_streamDecode_t_internal;
|
||||
|
||||
#else
|
||||
|
||||
typedef struct {
|
||||
unsigned int hashTable[LZ4_HASH_SIZE_U32];
|
||||
unsigned int currentOffset;
|
||||
unsigned int initCheck;
|
||||
const unsigned char* dictionary;
|
||||
unsigned char* bufferStart; /* obsolete, used for slideInputBuffer */
|
||||
unsigned int dictSize;
|
||||
} LZ4_stream_t_internal;
|
||||
|
||||
typedef struct {
|
||||
const unsigned char* externalDict;
|
||||
size_t extDictSize;
|
||||
const unsigned char* prefixEnd;
|
||||
size_t prefixSize;
|
||||
} LZ4_streamDecode_t_internal;
|
||||
|
||||
#endif
|
||||
|
||||
/*!
|
||||
* LZ4_stream_t :
|
||||
* information structure to track an LZ4 stream.
|
||||
* init this structure before first use.
|
||||
* note : only use in association with static linking !
|
||||
* this definition is not API/ABI safe,
|
||||
* and may change in a future version !
|
||||
*/
|
||||
#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
|
||||
#define LZ4_STREAMSIZE (LZ4_STREAMSIZE_U64 * sizeof(unsigned long long))
|
||||
union LZ4_stream_u {
|
||||
unsigned long long table[LZ4_STREAMSIZE_U64];
|
||||
LZ4_stream_t_internal internal_donotuse;
|
||||
} ; /* previously typedef'd to LZ4_stream_t */
|
||||
|
||||
|
||||
/*!
|
||||
* LZ4_streamDecode_t :
|
||||
* information structure to track an LZ4 stream during decompression.
|
||||
* init this structure using LZ4_setStreamDecode (or memset()) before first use
|
||||
* note : only use in association with static linking !
|
||||
* this definition is not API/ABI safe,
|
||||
* and may change in a future version !
|
||||
*/
|
||||
#define LZ4_STREAMDECODESIZE_U64 4
|
||||
#define LZ4_STREAMDECODESIZE (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long))
|
||||
union LZ4_streamDecode_u {
|
||||
unsigned long long table[LZ4_STREAMDECODESIZE_U64];
|
||||
LZ4_streamDecode_t_internal internal_donotuse;
|
||||
} ; /* previously typedef'd to LZ4_streamDecode_t */
|
||||
|
||||
|
||||
/*=************************************
|
||||
* Obsolete Functions
|
||||
**************************************/
|
||||
/* Deprecate Warnings */
|
||||
/* Should these warnings messages be a problem,
|
||||
/* Deprecation warnings */
|
||||
/* Should these warnings be a problem,
|
||||
it is generally possible to disable them,
|
||||
with -Wno-deprecated-declarations for gcc
|
||||
or _CRT_SECURE_NO_WARNINGS in Visual for example.
|
||||
You can also define LZ4_DEPRECATE_WARNING_DEFBLOCK. */
|
||||
#ifndef LZ4_DEPRECATE_WARNING_DEFBLOCK
|
||||
# define LZ4_DEPRECATE_WARNING_DEFBLOCK
|
||||
typically with -Wno-deprecated-declarations for gcc
|
||||
or _CRT_SECURE_NO_WARNINGS in Visual.
|
||||
Otherwise, it's also possible to define LZ4_DISABLE_DEPRECATE_WARNINGS */
|
||||
#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS
|
||||
# define LZ4_DEPRECATED(message) /* disable deprecation warnings */
|
||||
#else
|
||||
# define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
|
||||
# if (LZ4_GCC_VERSION >= 405) || defined(__clang__)
|
||||
# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
|
||||
# define LZ4_DEPRECATED(message) [[deprecated(message)]]
|
||||
# elif (LZ4_GCC_VERSION >= 405) || defined(__clang__)
|
||||
# define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
|
||||
# elif (LZ4_GCC_VERSION >= 301)
|
||||
# define LZ4_DEPRECATED(message) __attribute__((deprecated))
|
||||
@ -323,20 +425,19 @@ int LZ4_decompress_fast_usingDict (const char* source, char* dest, int originalS
|
||||
# pragma message("WARNING: You need to implement LZ4_DEPRECATED for this compiler")
|
||||
# define LZ4_DEPRECATED(message)
|
||||
# endif
|
||||
#endif /* LZ4_DEPRECATE_WARNING_DEFBLOCK */
|
||||
#endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */
|
||||
|
||||
/* Obsolete compression functions */
|
||||
/* These functions are planned to start generate warnings by r131 approximately */
|
||||
int LZ4_compress (const char* source, char* dest, int sourceSize);
|
||||
int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize);
|
||||
int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize);
|
||||
int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
|
||||
int LZ4_compress_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
|
||||
int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_default() instead") int LZ4_compress (const char* source, char* dest, int sourceSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_default() instead") int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") int LZ4_compress_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
|
||||
|
||||
/* Obsolete decompression functions */
|
||||
/* These function names are completely deprecated and must no longer be used.
|
||||
They are only provided here for compatibility with older programs.
|
||||
They are only provided in lz4.c for compatibility with older programs.
|
||||
- LZ4_uncompress is the same as LZ4_decompress_fast
|
||||
- LZ4_uncompress_unknownOutputSize is the same as LZ4_decompress_safe
|
||||
These function prototypes are now disabled; uncomment them only if you really need them.
|
||||
@ -358,3 +459,5 @@ LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") int LZ4_decompress
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* LZ4_H_2983827168210 */
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
LZ4 HC - High Compression Mode of LZ4
|
||||
Header File
|
||||
Copyright (C) 2011-2015, Yann Collet.
|
||||
Copyright (C) 2011-2016, Yann Collet.
|
||||
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -28,107 +28,92 @@
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
You can contact the author at :
|
||||
- LZ4 source repository : https://github.com/Cyan4973/lz4
|
||||
- LZ4 source repository : https://github.com/lz4/lz4
|
||||
- LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#ifndef LZ4_HC_H_19834876238432
|
||||
#define LZ4_HC_H_19834876238432
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*****************************
|
||||
* Includes
|
||||
*****************************/
|
||||
#include <stddef.h> /* size_t */
|
||||
/* --- Dependency --- */
|
||||
/* note : lz4hc is not an independent module, it requires lz4.h/lz4.c for proper compilation */
|
||||
#include "lz4.h" /* stddef, LZ4LIB_API, LZ4_DEPRECATED */
|
||||
|
||||
|
||||
/**************************************
|
||||
/* --- Useful constants --- */
|
||||
#define LZ4HC_CLEVEL_MIN 3
|
||||
#define LZ4HC_CLEVEL_DEFAULT 9
|
||||
#define LZ4HC_CLEVEL_OPT_MIN 11
|
||||
#define LZ4HC_CLEVEL_MAX 12
|
||||
|
||||
|
||||
/*-************************************
|
||||
* Block Compression
|
||||
**************************************/
|
||||
int LZ4_compress_HC (const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
|
||||
/*
|
||||
LZ4_compress_HC :
|
||||
Destination buffer 'dst' must be already allocated.
|
||||
Compression completion is guaranteed if 'dst' buffer is sized to handle worst circumstances (data not compressible)
|
||||
Worst size evaluation is provided by function LZ4_compressBound() (see "lz4.h")
|
||||
srcSize : Max supported value is LZ4_MAX_INPUT_SIZE (see "lz4.h")
|
||||
compressionLevel : Recommended values are between 4 and 9, although any value between 0 and 16 will work.
|
||||
0 means "use default value" (see lz4hc.c).
|
||||
Values >16 behave the same as 16.
|
||||
return : the number of bytes written into buffer 'dst'
|
||||
or 0 if compression fails.
|
||||
/*! LZ4_compress_HC() :
|
||||
* Compress data from `src` into `dst`, using the more powerful but slower "HC" algorithm.
|
||||
* `dst` must be already allocated.
|
||||
* Compression is guaranteed to succeed if `dstCapacity >= LZ4_compressBound(srcSize)` (see "lz4.h")
|
||||
* Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE (see "lz4.h")
|
||||
* `compressionLevel` : Recommended values are between 4 and 9, although any value between 1 and LZ4HC_MAX_CLEVEL will work.
|
||||
* Values >LZ4HC_MAX_CLEVEL behave the same as LZ4HC_MAX_CLEVEL.
|
||||
* @return : the number of bytes written into 'dst'
|
||||
* or 0 if compression fails.
|
||||
*/
|
||||
LZ4LIB_API int LZ4_compress_HC (const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel);
|
||||
|
||||
|
||||
/* Note :
|
||||
Decompression functions are provided within LZ4 source code (see "lz4.h") (BSD license)
|
||||
* Decompression functions are provided within "lz4.h" (BSD license)
|
||||
*/
|
||||
|
||||
|
||||
int LZ4_sizeofStateHC(void);
|
||||
int LZ4_compress_HC_extStateHC(void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
|
||||
/*
|
||||
LZ4_compress_HC_extStateHC() :
|
||||
Use this function if you prefer to manually allocate memory for compression tables.
|
||||
To know how much memory must be allocated for the compression tables, use :
|
||||
int LZ4_sizeofStateHC();
|
||||
|
||||
Allocated memory must be aligned on 8-bytes boundaries (which a normal malloc() will do properly).
|
||||
|
||||
The allocated memory can then be provided to the compression functions using 'void* state' parameter.
|
||||
LZ4_compress_HC_extStateHC() is equivalent to previously described function.
|
||||
It just uses externally allocated memory for stateHC.
|
||||
/*! LZ4_compress_HC_extStateHC() :
|
||||
* Same as LZ4_compress_HC(), but using an externally allocated memory segment for `state`.
|
||||
* `state` size is provided by LZ4_sizeofStateHC().
|
||||
* Memory segment must be aligned on 8-bytes boundaries (which a normal malloc() will do properly).
|
||||
*/
|
||||
LZ4LIB_API int LZ4_compress_HC_extStateHC(void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
|
||||
LZ4LIB_API int LZ4_sizeofStateHC(void);
|
||||
|
||||
|
||||
/**************************************
|
||||
/*-************************************
|
||||
* Streaming Compression
|
||||
* Bufferless synchronous API
|
||||
**************************************/
|
||||
#define LZ4_STREAMHCSIZE 262192
|
||||
#define LZ4_STREAMHCSIZE_SIZET (LZ4_STREAMHCSIZE / sizeof(size_t))
|
||||
typedef struct { size_t table[LZ4_STREAMHCSIZE_SIZET]; } LZ4_streamHC_t;
|
||||
/*
|
||||
LZ4_streamHC_t
|
||||
This structure allows static allocation of LZ4 HC streaming state.
|
||||
State must then be initialized using LZ4_resetStreamHC() before first use.
|
||||
typedef union LZ4_streamHC_u LZ4_streamHC_t; /* incomplete type (defined later) */
|
||||
|
||||
Static allocation should only be used in combination with static linking.
|
||||
If you want to use LZ4 as a DLL, please use construction functions below, which are future-proof.
|
||||
/*! LZ4_createStreamHC() and LZ4_freeStreamHC() :
|
||||
* These functions create and release memory for LZ4 HC streaming state.
|
||||
* Newly created states are automatically initialized.
|
||||
* Existing states can be re-used several times, using LZ4_resetStreamHC().
|
||||
* These methods are API and ABI stable, they can be used in combination with a DLL.
|
||||
*/
|
||||
LZ4LIB_API LZ4_streamHC_t* LZ4_createStreamHC(void);
|
||||
LZ4LIB_API int LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr);
|
||||
|
||||
LZ4LIB_API void LZ4_resetStreamHC (LZ4_streamHC_t* streamHCPtr, int compressionLevel);
|
||||
LZ4LIB_API int LZ4_loadDictHC (LZ4_streamHC_t* streamHCPtr, const char* dictionary, int dictSize);
|
||||
|
||||
LZ4_streamHC_t* LZ4_createStreamHC(void);
|
||||
int LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr);
|
||||
/*
|
||||
These functions create and release memory for LZ4 HC streaming state.
|
||||
Newly created states are already initialized.
|
||||
Existing state space can be re-used anytime using LZ4_resetStreamHC().
|
||||
If you use LZ4 as a DLL, use these functions instead of static structure allocation,
|
||||
to avoid size mismatch between different versions.
|
||||
*/
|
||||
LZ4LIB_API int LZ4_compress_HC_continue (LZ4_streamHC_t* streamHCPtr, const char* src, char* dst, int srcSize, int maxDstSize);
|
||||
|
||||
void LZ4_resetStreamHC (LZ4_streamHC_t* streamHCPtr, int compressionLevel);
|
||||
int LZ4_loadDictHC (LZ4_streamHC_t* streamHCPtr, const char* dictionary, int dictSize);
|
||||
|
||||
int LZ4_compress_HC_continue (LZ4_streamHC_t* streamHCPtr, const char* src, char* dst, int srcSize, int maxDstSize);
|
||||
|
||||
int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSize);
|
||||
LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSize);
|
||||
|
||||
/*
|
||||
These functions compress data in successive blocks of any size, using previous blocks as dictionary.
|
||||
One key assumption is that previous blocks (up to 64 KB) remain read-accessible while compressing next blocks.
|
||||
There is an exception for ring buffers, which can be smaller 64 KB.
|
||||
Such case is automatically detected and correctly handled by LZ4_compress_HC_continue().
|
||||
There is an exception for ring buffers, which can be smaller than 64 KB.
|
||||
Ring buffers scenario is automatically detected and handled by LZ4_compress_HC_continue().
|
||||
|
||||
Before starting compression, state must be properly initialized, using LZ4_resetStreamHC().
|
||||
A first "fictional block" can then be designated as initial dictionary, using LZ4_loadDictHC() (Optional).
|
||||
|
||||
Then, use LZ4_compress_HC_continue() to compress each successive block.
|
||||
It works like LZ4_compress_HC(), but use previous memory blocks as dictionary to improve compression.
|
||||
Previous memory blocks (including initial dictionary when present) must remain accessible and unmodified during compression.
|
||||
As a reminder, size 'dst' buffer to handle worst cases, using LZ4_compressBound(), to ensure success of compression operation.
|
||||
'dst' buffer should be sized to handle worst case scenarios, using LZ4_compressBound(), to ensure operation success.
|
||||
|
||||
If, for any reason, previous data blocks can't be preserved unmodified in memory during next compression block,
|
||||
you must save it to a safer memory space, using LZ4_saveDictHC().
|
||||
@ -136,45 +121,97 @@ int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSi
|
||||
*/
|
||||
|
||||
|
||||
/*-******************************************
|
||||
* !!!!! STATIC LINKING ONLY !!!!!
|
||||
*******************************************/
|
||||
|
||||
/**************************************
|
||||
/*-*************************************
|
||||
* PRIVATE DEFINITIONS :
|
||||
* Do not use these definitions.
|
||||
* They are exposed to allow static allocation of `LZ4_streamHC_t`.
|
||||
* Using these definitions makes the code vulnerable to potential API break when upgrading LZ4
|
||||
**************************************/
|
||||
#define LZ4HC_DICTIONARY_LOGSIZE 17
|
||||
#define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE)
|
||||
#define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1)
|
||||
|
||||
#define LZ4HC_HASH_LOG 15
|
||||
#define LZ4HC_HASHTABLESIZE (1 << LZ4HC_HASH_LOG)
|
||||
#define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1)
|
||||
|
||||
|
||||
#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
|
||||
#include <stdint.h>
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32_t hashTable[LZ4HC_HASHTABLESIZE];
|
||||
uint16_t chainTable[LZ4HC_MAXD];
|
||||
const uint8_t* end; /* next block here to continue on current prefix */
|
||||
const uint8_t* base; /* All index relative to this position */
|
||||
const uint8_t* dictBase; /* alternate base for extDict */
|
||||
uint8_t* inputBuffer; /* deprecated */
|
||||
uint32_t dictLimit; /* below that point, need extDict */
|
||||
uint32_t lowLimit; /* below that point, no more dict */
|
||||
uint32_t nextToUpdate; /* index from which to continue dictionary update */
|
||||
uint32_t searchNum; /* only for optimal parser */
|
||||
uint32_t compressionLevel;
|
||||
} LZ4HC_CCtx_internal;
|
||||
|
||||
#else
|
||||
|
||||
typedef struct
|
||||
{
|
||||
unsigned int hashTable[LZ4HC_HASHTABLESIZE];
|
||||
unsigned short chainTable[LZ4HC_MAXD];
|
||||
const unsigned char* end; /* next block here to continue on current prefix */
|
||||
const unsigned char* base; /* All index relative to this position */
|
||||
const unsigned char* dictBase; /* alternate base for extDict */
|
||||
unsigned char* inputBuffer; /* deprecated */
|
||||
unsigned int dictLimit; /* below that point, need extDict */
|
||||
unsigned int lowLimit; /* below that point, no more dict */
|
||||
unsigned int nextToUpdate; /* index from which to continue dictionary update */
|
||||
unsigned int searchNum; /* only for optimal parser */
|
||||
unsigned int compressionLevel;
|
||||
} LZ4HC_CCtx_internal;
|
||||
|
||||
#endif
|
||||
|
||||
#define LZ4_STREAMHCSIZE (4*LZ4HC_HASHTABLESIZE + 2*LZ4HC_MAXD + 56) /* 393268 */
|
||||
#define LZ4_STREAMHCSIZE_SIZET (LZ4_STREAMHCSIZE / sizeof(size_t))
|
||||
union LZ4_streamHC_u {
|
||||
size_t table[LZ4_STREAMHCSIZE_SIZET];
|
||||
LZ4HC_CCtx_internal internal_donotuse;
|
||||
}; /* previously typedef'd to LZ4_streamHC_t */
|
||||
/*
|
||||
LZ4_streamHC_t :
|
||||
This structure allows static allocation of LZ4 HC streaming state.
|
||||
State must be initialized using LZ4_resetStreamHC() before first use.
|
||||
|
||||
Static allocation shall only be used in combination with static linking.
|
||||
When invoking LZ4 from a DLL, use create/free functions instead, which are API and ABI stable.
|
||||
*/
|
||||
|
||||
|
||||
/*-************************************
|
||||
* Deprecated Functions
|
||||
**************************************/
|
||||
/* Deprecate Warnings */
|
||||
/* Should these warnings messages be a problem,
|
||||
it is generally possible to disable them,
|
||||
with -Wno-deprecated-declarations for gcc
|
||||
or _CRT_SECURE_NO_WARNINGS in Visual for example.
|
||||
You can also define LZ4_DEPRECATE_WARNING_DEFBLOCK. */
|
||||
#ifndef LZ4_DEPRECATE_WARNING_DEFBLOCK
|
||||
# define LZ4_DEPRECATE_WARNING_DEFBLOCK
|
||||
# define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
|
||||
# if (LZ4_GCC_VERSION >= 405) || defined(__clang__)
|
||||
# define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
|
||||
# elif (LZ4_GCC_VERSION >= 301)
|
||||
# define LZ4_DEPRECATED(message) __attribute__((deprecated))
|
||||
# elif defined(_MSC_VER)
|
||||
# define LZ4_DEPRECATED(message) __declspec(deprecated(message))
|
||||
# else
|
||||
# pragma message("WARNING: You need to implement LZ4_DEPRECATED for this compiler")
|
||||
# define LZ4_DEPRECATED(message)
|
||||
# endif
|
||||
#endif // LZ4_DEPRECATE_WARNING_DEFBLOCK
|
||||
/* see lz4.h LZ4_DISABLE_DEPRECATE_WARNINGS to turn off deprecation warnings */
|
||||
|
||||
/* compression functions */
|
||||
/* these functions are planned to trigger warning messages by r131 approximately */
|
||||
int LZ4_compressHC (const char* source, char* dest, int inputSize);
|
||||
int LZ4_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
|
||||
int LZ4_compressHC2 (const char* source, char* dest, int inputSize, int compressionLevel);
|
||||
int LZ4_compressHC2_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
|
||||
int LZ4_compressHC_withStateHC (void* state, const char* source, char* dest, int inputSize);
|
||||
int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
|
||||
int LZ4_compressHC2_withStateHC (void* state, const char* source, char* dest, int inputSize, int compressionLevel);
|
||||
int LZ4_compressHC2_limitedOutput_withStateHC(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
|
||||
int LZ4_compressHC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize);
|
||||
int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
|
||||
/* deprecated compression functions */
|
||||
/* these functions will trigger warning messages in future releases */
|
||||
LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC (const char* source, char* dest, int inputSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC2 (const char* source, char* dest, int inputSize, int compressionLevel);
|
||||
LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC2_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
|
||||
LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC_withStateHC (void* state, const char* source, char* dest, int inputSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC2_withStateHC (void* state, const char* source, char* dest, int inputSize, int compressionLevel);
|
||||
LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC2_limitedOutput_withStateHC(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
|
||||
LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
|
||||
|
||||
/* Streaming functions following the older model; should no longer be used */
|
||||
/* Deprecated Streaming functions using older model; should no longer be used */
|
||||
LZ4_DEPRECATED("use LZ4_createStreamHC() instead") void* LZ4_createHC (char* inputBuffer);
|
||||
LZ4_DEPRECATED("use LZ4_saveDictHC() instead") char* LZ4_slideInputBufferHC (void* LZ4HC_Data);
|
||||
LZ4_DEPRECATED("use LZ4_freeStreamHC() instead") int LZ4_freeHC (void* LZ4HC_Data);
|
||||
@ -187,3 +224,5 @@ LZ4_DEPRECATED("use LZ4_resetStreamHC() instead") int LZ4_resetStreamStateHC(
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* LZ4_HC_H_19834876238432 */
|
||||
|
361
contrib/liblz4/include/lz4/lz4opt.h
Normal file
361
contrib/liblz4/include/lz4/lz4opt.h
Normal file
@ -0,0 +1,361 @@
|
||||
/*
|
||||
lz4opt.h - Optimal Mode of LZ4
|
||||
Copyright (C) 2015-2017, Przemyslaw Skibinski <inikep@gmail.com>
|
||||
Note : this file is intended to be included within lz4hc.c
|
||||
|
||||
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
You can contact the author at :
|
||||
- LZ4 source repository : https://github.com/lz4/lz4
|
||||
- LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
|
||||
*/
|
||||
|
||||
#define LZ4_OPT_NUM (1<<12)
|
||||
|
||||
|
||||
typedef struct {
|
||||
int off;
|
||||
int len;
|
||||
} LZ4HC_match_t;
|
||||
|
||||
typedef struct {
|
||||
int price;
|
||||
int off;
|
||||
int mlen;
|
||||
int litlen;
|
||||
} LZ4HC_optimal_t;
|
||||
|
||||
|
||||
/* price in bytes */
|
||||
FORCE_INLINE size_t LZ4HC_literalsPrice(size_t litlen)
|
||||
{
|
||||
size_t price = litlen;
|
||||
if (litlen >= (size_t)RUN_MASK)
|
||||
price += 1 + (litlen-RUN_MASK)/255;
|
||||
return price;
|
||||
}
|
||||
|
||||
|
||||
/* requires mlen >= MINMATCH */
|
||||
FORCE_INLINE size_t LZ4HC_sequencePrice(size_t litlen, size_t mlen)
|
||||
{
|
||||
size_t price = 2 + 1; /* 16-bit offset + token */
|
||||
|
||||
price += LZ4HC_literalsPrice(litlen);
|
||||
|
||||
if (mlen >= (size_t)(ML_MASK+MINMATCH))
|
||||
price+= 1 + (mlen-(ML_MASK+MINMATCH))/255;
|
||||
|
||||
return price;
|
||||
}
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Binary Tree search
|
||||
***************************************/
|
||||
FORCE_INLINE int LZ4HC_BinTree_InsertAndGetAllMatches (
|
||||
LZ4HC_CCtx_internal* ctx,
|
||||
const BYTE* const ip,
|
||||
const BYTE* const iHighLimit,
|
||||
size_t best_mlen,
|
||||
LZ4HC_match_t* matches,
|
||||
int* matchNum)
|
||||
{
|
||||
U16* const chainTable = ctx->chainTable;
|
||||
U32* const HashTable = ctx->hashTable;
|
||||
const BYTE* const base = ctx->base;
|
||||
const U32 dictLimit = ctx->dictLimit;
|
||||
const U32 current = (U32)(ip - base);
|
||||
const U32 lowLimit = (ctx->lowLimit + MAX_DISTANCE > current) ? ctx->lowLimit : current - (MAX_DISTANCE - 1);
|
||||
const BYTE* const dictBase = ctx->dictBase;
|
||||
const BYTE* match;
|
||||
int nbAttempts = ctx->searchNum;
|
||||
int mnum = 0;
|
||||
U16 *ptr0, *ptr1, delta0, delta1;
|
||||
U32 matchIndex;
|
||||
size_t matchLength = 0;
|
||||
U32* HashPos;
|
||||
|
||||
if (ip + MINMATCH > iHighLimit) return 1;
|
||||
|
||||
/* HC4 match finder */
|
||||
HashPos = &HashTable[LZ4HC_hashPtr(ip)];
|
||||
matchIndex = *HashPos;
|
||||
*HashPos = current;
|
||||
|
||||
ptr0 = &DELTANEXTMAXD(current*2+1);
|
||||
ptr1 = &DELTANEXTMAXD(current*2);
|
||||
delta0 = delta1 = (U16)(current - matchIndex);
|
||||
|
||||
while ((matchIndex < current) && (matchIndex>=lowLimit) && (nbAttempts)) {
|
||||
nbAttempts--;
|
||||
if (matchIndex >= dictLimit) {
|
||||
match = base + matchIndex;
|
||||
matchLength = LZ4_count(ip, match, iHighLimit);
|
||||
} else {
|
||||
const BYTE* vLimit = ip + (dictLimit - matchIndex);
|
||||
match = dictBase + matchIndex;
|
||||
if (vLimit > iHighLimit) vLimit = iHighLimit;
|
||||
matchLength = LZ4_count(ip, match, vLimit);
|
||||
if ((ip+matchLength == vLimit) && (vLimit < iHighLimit))
|
||||
matchLength += LZ4_count(ip+matchLength, base+dictLimit, iHighLimit);
|
||||
}
|
||||
|
||||
if (matchLength > best_mlen) {
|
||||
best_mlen = matchLength;
|
||||
if (matches) {
|
||||
if (matchIndex >= dictLimit)
|
||||
matches[mnum].off = (int)(ip - match);
|
||||
else
|
||||
matches[mnum].off = (int)(ip - (base + matchIndex)); /* virtual matchpos */
|
||||
matches[mnum].len = (int)matchLength;
|
||||
mnum++;
|
||||
}
|
||||
if (best_mlen > LZ4_OPT_NUM) break;
|
||||
}
|
||||
|
||||
if (ip+matchLength >= iHighLimit) /* equal : no way to know if inf or sup */
|
||||
break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */
|
||||
|
||||
if (*(ip+matchLength) < *(match+matchLength)) {
|
||||
*ptr0 = delta0;
|
||||
ptr0 = &DELTANEXTMAXD(matchIndex*2);
|
||||
if (*ptr0 == (U16)-1) break;
|
||||
delta0 = *ptr0;
|
||||
delta1 += delta0;
|
||||
matchIndex -= delta0;
|
||||
} else {
|
||||
*ptr1 = delta1;
|
||||
ptr1 = &DELTANEXTMAXD(matchIndex*2+1);
|
||||
if (*ptr1 == (U16)-1) break;
|
||||
delta1 = *ptr1;
|
||||
delta0 += delta1;
|
||||
matchIndex -= delta1;
|
||||
}
|
||||
}
|
||||
|
||||
*ptr0 = (U16)-1;
|
||||
*ptr1 = (U16)-1;
|
||||
if (matchNum) *matchNum = mnum;
|
||||
/* if (best_mlen > 8) return best_mlen-8; */
|
||||
if (!matchNum) return 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
FORCE_INLINE void LZ4HC_updateBinTree(LZ4HC_CCtx_internal* ctx, const BYTE* const ip, const BYTE* const iHighLimit)
|
||||
{
|
||||
const BYTE* const base = ctx->base;
|
||||
const U32 target = (U32)(ip - base);
|
||||
U32 idx = ctx->nextToUpdate;
|
||||
while(idx < target)
|
||||
idx += LZ4HC_BinTree_InsertAndGetAllMatches(ctx, base+idx, iHighLimit, 8, NULL, NULL);
|
||||
}
|
||||
|
||||
|
||||
/** Tree updater, providing best match */
|
||||
FORCE_INLINE int LZ4HC_BinTree_GetAllMatches (
|
||||
LZ4HC_CCtx_internal* ctx,
|
||||
const BYTE* const ip, const BYTE* const iHighLimit,
|
||||
size_t best_mlen, LZ4HC_match_t* matches, const int fullUpdate)
|
||||
{
|
||||
int mnum = 0;
|
||||
if (ip < ctx->base + ctx->nextToUpdate) return 0; /* skipped area */
|
||||
if (fullUpdate) LZ4HC_updateBinTree(ctx, ip, iHighLimit);
|
||||
best_mlen = LZ4HC_BinTree_InsertAndGetAllMatches(ctx, ip, iHighLimit, best_mlen, matches, &mnum);
|
||||
ctx->nextToUpdate = (U32)(ip - ctx->base + best_mlen);
|
||||
return mnum;
|
||||
}
|
||||
|
||||
|
||||
#define SET_PRICE(pos, ml, offset, ll, cost) \
|
||||
{ \
|
||||
while (last_pos < pos) { opt[last_pos+1].price = 1<<30; last_pos++; } \
|
||||
opt[pos].mlen = (int)ml; \
|
||||
opt[pos].off = (int)offset; \
|
||||
opt[pos].litlen = (int)ll; \
|
||||
opt[pos].price = (int)cost; \
|
||||
}
|
||||
|
||||
|
||||
static int LZ4HC_compress_optimal (
|
||||
LZ4HC_CCtx_internal* ctx,
|
||||
const char* const source,
|
||||
char* dest,
|
||||
int inputSize,
|
||||
int maxOutputSize,
|
||||
limitedOutput_directive limit,
|
||||
size_t sufficient_len,
|
||||
const int fullUpdate
|
||||
)
|
||||
{
|
||||
LZ4HC_optimal_t opt[LZ4_OPT_NUM + 1]; /* this uses a bit too much stack memory to my taste ... */
|
||||
LZ4HC_match_t matches[LZ4_OPT_NUM + 1];
|
||||
|
||||
const BYTE* ip = (const BYTE*) source;
|
||||
const BYTE* anchor = ip;
|
||||
const BYTE* const iend = ip + inputSize;
|
||||
const BYTE* const mflimit = iend - MFLIMIT;
|
||||
const BYTE* const matchlimit = (iend - LASTLITERALS);
|
||||
BYTE* op = (BYTE*) dest;
|
||||
BYTE* const oend = op + maxOutputSize;
|
||||
|
||||
/* init */
|
||||
if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1;
|
||||
ctx->end += inputSize;
|
||||
ip++;
|
||||
|
||||
/* Main Loop */
|
||||
while (ip < mflimit) {
|
||||
size_t const llen = ip - anchor;
|
||||
size_t last_pos = 0;
|
||||
size_t match_num, cur, best_mlen, best_off;
|
||||
memset(opt, 0, sizeof(LZ4HC_optimal_t)); /* memset only the first one */
|
||||
|
||||
match_num = LZ4HC_BinTree_GetAllMatches(ctx, ip, matchlimit, MINMATCH-1, matches, fullUpdate);
|
||||
if (!match_num) { ip++; continue; }
|
||||
|
||||
if ((size_t)matches[match_num-1].len > sufficient_len) {
|
||||
/* good enough solution : immediate encoding */
|
||||
best_mlen = matches[match_num-1].len;
|
||||
best_off = matches[match_num-1].off;
|
||||
cur = 0;
|
||||
last_pos = 1;
|
||||
goto encode;
|
||||
}
|
||||
|
||||
/* set prices using matches at position = 0 */
|
||||
{ size_t matchNb;
|
||||
for (matchNb = 0; matchNb < match_num; matchNb++) {
|
||||
size_t mlen = (matchNb>0) ? (size_t)matches[matchNb-1].len+1 : MINMATCH;
|
||||
best_mlen = matches[matchNb].len; /* necessarily < sufficient_len < LZ4_OPT_NUM */
|
||||
for ( ; mlen <= best_mlen ; mlen++) {
|
||||
size_t const cost = LZ4HC_sequencePrice(llen, mlen) - LZ4HC_literalsPrice(llen);
|
||||
SET_PRICE(mlen, mlen, matches[matchNb].off, 0, cost); /* updates last_pos and opt[pos] */
|
||||
} } }
|
||||
|
||||
if (last_pos < MINMATCH) { ip++; continue; } /* note : on clang at least, this test improves performance */
|
||||
|
||||
/* check further positions */
|
||||
opt[0].mlen = opt[1].mlen = 1;
|
||||
for (cur = 1; cur <= last_pos; cur++) {
|
||||
const BYTE* const curPtr = ip + cur;
|
||||
|
||||
/* establish baseline price if cur is literal */
|
||||
{ size_t price, litlen;
|
||||
if (opt[cur-1].mlen == 1) {
|
||||
/* no match at previous position */
|
||||
litlen = opt[cur-1].litlen + 1;
|
||||
if (cur > litlen) {
|
||||
price = opt[cur - litlen].price + LZ4HC_literalsPrice(litlen);
|
||||
} else {
|
||||
price = LZ4HC_literalsPrice(llen + litlen) - LZ4HC_literalsPrice(llen);
|
||||
}
|
||||
} else {
|
||||
litlen = 1;
|
||||
price = opt[cur - 1].price + LZ4HC_literalsPrice(1);
|
||||
}
|
||||
|
||||
if (price < (size_t)opt[cur].price)
|
||||
SET_PRICE(cur, 1 /*mlen*/, 0 /*off*/, litlen, price); /* note : increases last_pos */
|
||||
}
|
||||
|
||||
if (cur == last_pos || curPtr >= mflimit) break;
|
||||
|
||||
match_num = LZ4HC_BinTree_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate);
|
||||
if ((match_num > 0) && (size_t)matches[match_num-1].len > sufficient_len) {
|
||||
/* immediate encoding */
|
||||
best_mlen = matches[match_num-1].len;
|
||||
best_off = matches[match_num-1].off;
|
||||
last_pos = cur + 1;
|
||||
goto encode;
|
||||
}
|
||||
|
||||
/* set prices using matches at position = cur */
|
||||
{ size_t matchNb;
|
||||
for (matchNb = 0; matchNb < match_num; matchNb++) {
|
||||
size_t ml = (matchNb>0) ? (size_t)matches[matchNb-1].len+1 : MINMATCH;
|
||||
best_mlen = (cur + matches[matchNb].len < LZ4_OPT_NUM) ?
|
||||
(size_t)matches[matchNb].len : LZ4_OPT_NUM - cur;
|
||||
|
||||
for ( ; ml <= best_mlen ; ml++) {
|
||||
size_t ll, price;
|
||||
if (opt[cur].mlen == 1) {
|
||||
ll = opt[cur].litlen;
|
||||
if (cur > ll)
|
||||
price = opt[cur - ll].price + LZ4HC_sequencePrice(ll, ml);
|
||||
else
|
||||
price = LZ4HC_sequencePrice(llen + ll, ml) - LZ4HC_literalsPrice(llen);
|
||||
} else {
|
||||
ll = 0;
|
||||
price = opt[cur].price + LZ4HC_sequencePrice(0, ml);
|
||||
}
|
||||
|
||||
if (cur + ml > last_pos || price < (size_t)opt[cur + ml].price) {
|
||||
SET_PRICE(cur + ml, ml, matches[matchNb].off, ll, price);
|
||||
} } } }
|
||||
} /* for (cur = 1; cur <= last_pos; cur++) */
|
||||
|
||||
best_mlen = opt[last_pos].mlen;
|
||||
best_off = opt[last_pos].off;
|
||||
cur = last_pos - best_mlen;
|
||||
|
||||
encode: /* cur, last_pos, best_mlen, best_off must be set */
|
||||
opt[0].mlen = 1;
|
||||
while (1) { /* from end to beginning */
|
||||
size_t const ml = opt[cur].mlen;
|
||||
int const offset = opt[cur].off;
|
||||
opt[cur].mlen = (int)best_mlen;
|
||||
opt[cur].off = (int)best_off;
|
||||
best_mlen = ml;
|
||||
best_off = offset;
|
||||
if (ml > cur) break; /* can this happen ? */
|
||||
cur -= ml;
|
||||
}
|
||||
|
||||
/* encode all recorded sequences */
|
||||
cur = 0;
|
||||
while (cur < last_pos) {
|
||||
int const ml = opt[cur].mlen;
|
||||
int const offset = opt[cur].off;
|
||||
if (ml == 1) { ip++; cur++; continue; }
|
||||
cur += ml;
|
||||
if ( LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ip - offset, limit, oend) ) return 0;
|
||||
}
|
||||
} /* while (ip < mflimit) */
|
||||
|
||||
/* Encode Last Literals */
|
||||
{ int lastRun = (int)(iend - anchor);
|
||||
if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0; /* Check output limit */
|
||||
if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
|
||||
else *op++ = (BYTE)(lastRun<<ML_BITS);
|
||||
memcpy(op, anchor, iend - anchor);
|
||||
op += iend-anchor;
|
||||
}
|
||||
|
||||
/* End */
|
||||
return (int) ((char*)op-dest);
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
||||
/*
|
||||
LZ4 HC - High Compression Mode of LZ4
|
||||
Copyright (C) 2011-2015, Yann Collet.
|
||||
Copyright (C) 2011-2016, Yann Collet.
|
||||
|
||||
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
||||
|
||||
@ -28,27 +28,36 @@
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
You can contact the author at :
|
||||
- LZ4 source repository : https://github.com/Cyan4973/lz4
|
||||
- LZ4 source repository : https://github.com/lz4/lz4
|
||||
- LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
|
||||
*/
|
||||
|
||||
/* note : lz4hc is not an independent module, it requires lz4.h/lz4.c for proper compilation */
|
||||
|
||||
|
||||
/* *************************************
|
||||
* Tuning Parameter
|
||||
**************************************/
|
||||
static const int LZ4HC_compressionLevel_default = 9;
|
||||
***************************************/
|
||||
|
||||
/*!
|
||||
* HEAPMODE :
|
||||
* Select how default compression function will allocate workplace memory,
|
||||
* in stack (0:fastest), or in heap (1:requires malloc()).
|
||||
* Since workplace is rather large, heap mode is recommended.
|
||||
*/
|
||||
#ifndef LZ4HC_HEAPMODE
|
||||
# define LZ4HC_HEAPMODE 1
|
||||
#endif
|
||||
|
||||
|
||||
/* *************************************
|
||||
* Includes
|
||||
**************************************/
|
||||
#include <lz4/lz4hc.h>
|
||||
* Dependency
|
||||
***************************************/
|
||||
#include "lz4hc.h"
|
||||
|
||||
|
||||
/* *************************************
|
||||
* Local Compiler Options
|
||||
**************************************/
|
||||
***************************************/
|
||||
#if defined(__GNUC__)
|
||||
# pragma GCC diagnostic ignored "-Wunused-function"
|
||||
#endif
|
||||
@ -60,50 +69,22 @@ static const int LZ4HC_compressionLevel_default = 9;
|
||||
|
||||
/* *************************************
|
||||
* Common LZ4 definition
|
||||
**************************************/
|
||||
***************************************/
|
||||
#define LZ4_COMMONDEFS_ONLY
|
||||
#include "lz4.c"
|
||||
|
||||
|
||||
/* *************************************
|
||||
* Local Constants
|
||||
**************************************/
|
||||
#define DICTIONARY_LOGSIZE 16
|
||||
#define MAXD (1<<DICTIONARY_LOGSIZE)
|
||||
#define MAXD_MASK (MAXD - 1)
|
||||
|
||||
#define HASH_LOG (DICTIONARY_LOGSIZE-1)
|
||||
#define HASHTABLESIZE (1 << HASH_LOG)
|
||||
#define HASH_MASK (HASHTABLESIZE - 1)
|
||||
|
||||
***************************************/
|
||||
#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
|
||||
|
||||
static const int g_maxCompressionLevel = 16;
|
||||
|
||||
|
||||
/**************************************
|
||||
* Local Types
|
||||
**************************************/
|
||||
typedef struct
|
||||
{
|
||||
U32 hashTable[HASHTABLESIZE];
|
||||
U16 chainTable[MAXD];
|
||||
const BYTE* end; /* next block here to continue on current prefix */
|
||||
const BYTE* base; /* All index relative to this position */
|
||||
const BYTE* dictBase; /* alternate base for extDict */
|
||||
BYTE* inputBuffer; /* deprecated */
|
||||
U32 dictLimit; /* below that point, need extDict */
|
||||
U32 lowLimit; /* below that point, no more dict */
|
||||
U32 nextToUpdate; /* index from which to continue dictionary update */
|
||||
U32 compressionLevel;
|
||||
} LZ4HC_Data_Structure;
|
||||
|
||||
|
||||
/**************************************
|
||||
* Local Macros
|
||||
**************************************/
|
||||
#define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-HASH_LOG))
|
||||
//#define DELTANEXTU16(p) chainTable[(p) & MAXD_MASK] /* flexible, MAXD dependent */
|
||||
#define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-LZ4HC_HASH_LOG))
|
||||
#define DELTANEXTMAXD(p) chainTable[(p) & LZ4HC_MAXD_MASK] /* flexible, LZ4HC_MAXD dependent */
|
||||
#define DELTANEXTU16(p) chainTable[(U16)(p)] /* faster */
|
||||
|
||||
static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); }
|
||||
@ -113,7 +94,7 @@ static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)
|
||||
/**************************************
|
||||
* HC Compression
|
||||
**************************************/
|
||||
static void LZ4HC_init (LZ4HC_Data_Structure* hc4, const BYTE* start)
|
||||
static void LZ4HC_init (LZ4HC_CCtx_internal* hc4, const BYTE* start)
|
||||
{
|
||||
MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable));
|
||||
MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
|
||||
@ -127,21 +108,20 @@ static void LZ4HC_init (LZ4HC_Data_Structure* hc4, const BYTE* start)
|
||||
|
||||
|
||||
/* Update chains up to ip (excluded) */
|
||||
FORCE_INLINE void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip)
|
||||
FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip)
|
||||
{
|
||||
U16* chainTable = hc4->chainTable;
|
||||
U32* HashTable = hc4->hashTable;
|
||||
U16* const chainTable = hc4->chainTable;
|
||||
U32* const hashTable = hc4->hashTable;
|
||||
const BYTE* const base = hc4->base;
|
||||
const U32 target = (U32)(ip - base);
|
||||
U32 const target = (U32)(ip - base);
|
||||
U32 idx = hc4->nextToUpdate;
|
||||
|
||||
while(idx < target)
|
||||
{
|
||||
U32 h = LZ4HC_hashPtr(base+idx);
|
||||
size_t delta = idx - HashTable[h];
|
||||
while (idx < target) {
|
||||
U32 const h = LZ4HC_hashPtr(base+idx);
|
||||
size_t delta = idx - hashTable[h];
|
||||
if (delta>MAX_DISTANCE) delta = MAX_DISTANCE;
|
||||
DELTANEXTU16(idx) = (U16)delta;
|
||||
HashTable[h] = idx;
|
||||
hashTable[h] = idx;
|
||||
idx++;
|
||||
}
|
||||
|
||||
@ -149,7 +129,7 @@ FORCE_INLINE void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip)
|
||||
}
|
||||
|
||||
|
||||
FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, /* Index table will be updated */
|
||||
FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_CCtx_internal* hc4, /* Index table will be updated */
|
||||
const BYTE* ip, const BYTE* const iLimit,
|
||||
const BYTE** matchpos,
|
||||
const int maxNbAttempts)
|
||||
@ -161,7 +141,6 @@ FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, /* I
|
||||
const U32 dictLimit = hc4->dictLimit;
|
||||
const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1);
|
||||
U32 matchIndex;
|
||||
const BYTE* match;
|
||||
int nbAttempts=maxNbAttempts;
|
||||
size_t ml=0;
|
||||
|
||||
@ -169,24 +148,19 @@ FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, /* I
|
||||
LZ4HC_Insert(hc4, ip);
|
||||
matchIndex = HashTable[LZ4HC_hashPtr(ip)];
|
||||
|
||||
while ((matchIndex>=lowLimit) && (nbAttempts))
|
||||
{
|
||||
while ((matchIndex>=lowLimit) && (nbAttempts)) {
|
||||
nbAttempts--;
|
||||
if (matchIndex >= dictLimit)
|
||||
{
|
||||
match = base + matchIndex;
|
||||
if (matchIndex >= dictLimit) {
|
||||
const BYTE* const match = base + matchIndex;
|
||||
if (*(match+ml) == *(ip+ml)
|
||||
&& (LZ4_read32(match) == LZ4_read32(ip)))
|
||||
{
|
||||
size_t mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH;
|
||||
size_t const mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH;
|
||||
if (mlt > ml) { ml = mlt; *matchpos = match; }
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
match = dictBase + matchIndex;
|
||||
if (LZ4_read32(match) == LZ4_read32(ip))
|
||||
{
|
||||
} else {
|
||||
const BYTE* const match = dictBase + matchIndex;
|
||||
if (LZ4_read32(match) == LZ4_read32(ip)) {
|
||||
size_t mlt;
|
||||
const BYTE* vLimit = ip + (dictLimit - matchIndex);
|
||||
if (vLimit > iLimit) vLimit = iLimit;
|
||||
@ -204,7 +178,7 @@ FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, /* I
|
||||
|
||||
|
||||
FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
|
||||
LZ4HC_Data_Structure* hc4,
|
||||
LZ4HC_CCtx_internal* hc4,
|
||||
const BYTE* const ip,
|
||||
const BYTE* const iLowLimit,
|
||||
const BYTE* const iHighLimit,
|
||||
@ -229,15 +203,12 @@ FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
|
||||
LZ4HC_Insert(hc4, ip);
|
||||
matchIndex = HashTable[LZ4HC_hashPtr(ip)];
|
||||
|
||||
while ((matchIndex>=lowLimit) && (nbAttempts))
|
||||
{
|
||||
while ((matchIndex>=lowLimit) && (nbAttempts)) {
|
||||
nbAttempts--;
|
||||
if (matchIndex >= dictLimit)
|
||||
{
|
||||
if (matchIndex >= dictLimit) {
|
||||
const BYTE* matchPtr = base + matchIndex;
|
||||
if (*(iLowLimit + longest) == *(matchPtr - delta + longest))
|
||||
if (LZ4_read32(matchPtr) == LZ4_read32(ip))
|
||||
{
|
||||
if (*(iLowLimit + longest) == *(matchPtr - delta + longest)) {
|
||||
if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
|
||||
int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
|
||||
int back = 0;
|
||||
|
||||
@ -248,19 +219,16 @@ FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
|
||||
|
||||
mlt -= back;
|
||||
|
||||
if (mlt > longest)
|
||||
{
|
||||
if (mlt > longest) {
|
||||
longest = (int)mlt;
|
||||
*matchpos = matchPtr+back;
|
||||
*startpos = ip+back;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const BYTE* matchPtr = dictBase + matchIndex;
|
||||
if (LZ4_read32(matchPtr) == LZ4_read32(ip))
|
||||
{
|
||||
} else {
|
||||
const BYTE* const matchPtr = dictBase + matchIndex;
|
||||
if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
|
||||
size_t mlt;
|
||||
int back=0;
|
||||
const BYTE* vLimit = ip + (dictLimit - matchIndex);
|
||||
@ -320,8 +288,15 @@ FORCE_INLINE int LZ4HC_encodeSequence (
|
||||
/* Encode MatchLength */
|
||||
length = (int)(matchLength-MINMATCH);
|
||||
if ((limitedOutputBuffer) && (*op + (length>>8) + (1 + LASTLITERALS) > oend)) return 1; /* Check output limit */
|
||||
if (length>=(int)ML_MASK) { *token+=ML_MASK; length-=ML_MASK; for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (length > 254) { length-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; }
|
||||
else *token += (BYTE)(length);
|
||||
if (length>=(int)ML_MASK) {
|
||||
*token += ML_MASK;
|
||||
length -= ML_MASK;
|
||||
for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; }
|
||||
if (length > 254) { length-=255; *(*op)++ = 255; }
|
||||
*(*op)++ = (BYTE)length;
|
||||
} else {
|
||||
*token += (BYTE)(length);
|
||||
}
|
||||
|
||||
/* Prepare next loop */
|
||||
*ip += matchLength;
|
||||
@ -330,18 +305,18 @@ FORCE_INLINE int LZ4HC_encodeSequence (
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "lz4opt.h"
|
||||
|
||||
static int LZ4HC_compress_generic (
|
||||
void* ctxvoid,
|
||||
const char* source,
|
||||
char* dest,
|
||||
int inputSize,
|
||||
int maxOutputSize,
|
||||
int compressionLevel,
|
||||
static int LZ4HC_compress_hashChain (
|
||||
LZ4HC_CCtx_internal* const ctx,
|
||||
const char* const source,
|
||||
char* const dest,
|
||||
int const inputSize,
|
||||
int const maxOutputSize,
|
||||
unsigned maxNbAttempts,
|
||||
limitedOutput_directive limit
|
||||
)
|
||||
{
|
||||
LZ4HC_Data_Structure* ctx = (LZ4HC_Data_Structure*) ctxvoid;
|
||||
const BYTE* ip = (const BYTE*) source;
|
||||
const BYTE* anchor = ip;
|
||||
const BYTE* const iend = ip + inputSize;
|
||||
@ -351,7 +326,6 @@ static int LZ4HC_compress_generic (
|
||||
BYTE* op = (BYTE*) dest;
|
||||
BYTE* const oend = op + maxOutputSize;
|
||||
|
||||
unsigned maxNbAttempts;
|
||||
int ml, ml2, ml3, ml0;
|
||||
const BYTE* ref = NULL;
|
||||
const BYTE* start2 = NULL;
|
||||
@ -361,18 +335,13 @@ static int LZ4HC_compress_generic (
|
||||
const BYTE* start0;
|
||||
const BYTE* ref0;
|
||||
|
||||
|
||||
/* init */
|
||||
if (compressionLevel > g_maxCompressionLevel) compressionLevel = g_maxCompressionLevel;
|
||||
if (compressionLevel < 1) compressionLevel = LZ4HC_compressionLevel_default;
|
||||
maxNbAttempts = 1 << (compressionLevel-1);
|
||||
ctx->end += inputSize;
|
||||
|
||||
ip++;
|
||||
|
||||
/* Main Loop */
|
||||
while (ip < mflimit)
|
||||
{
|
||||
while (ip < mflimit) {
|
||||
ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref), maxNbAttempts);
|
||||
if (!ml) { ip++; continue; }
|
||||
|
||||
@ -383,19 +352,16 @@ static int LZ4HC_compress_generic (
|
||||
|
||||
_Search2:
|
||||
if (ip+ml < mflimit)
|
||||
ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2, maxNbAttempts);
|
||||
ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2, maxNbAttempts);
|
||||
else ml2 = ml;
|
||||
|
||||
if (ml2 == ml) /* No better match */
|
||||
{
|
||||
if (ml2 == ml) { /* No better match */
|
||||
if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (start0 < ip)
|
||||
{
|
||||
if (start2 < ip + ml0) /* empirical */
|
||||
{
|
||||
if (start0 < ip) {
|
||||
if (start2 < ip + ml0) { /* empirical */
|
||||
ip = start0;
|
||||
ref = ref0;
|
||||
ml = ml0;
|
||||
@ -403,8 +369,7 @@ _Search2:
|
||||
}
|
||||
|
||||
/* Here, start0==ip */
|
||||
if ((start2 - ip) < 3) /* First Match too small : removed */
|
||||
{
|
||||
if ((start2 - ip) < 3) { /* First Match too small : removed */
|
||||
ml = ml2;
|
||||
ip = start2;
|
||||
ref =ref2;
|
||||
@ -417,15 +382,13 @@ _Search3:
|
||||
* ml2 > ml1, and
|
||||
* ip1+3 <= ip2 (usually < ip1+ml1)
|
||||
*/
|
||||
if ((start2 - ip) < OPTIMAL_ML)
|
||||
{
|
||||
if ((start2 - ip) < OPTIMAL_ML) {
|
||||
int correction;
|
||||
int new_ml = ml;
|
||||
if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
|
||||
if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
|
||||
correction = new_ml - (int)(start2 - ip);
|
||||
if (correction > 0)
|
||||
{
|
||||
if (correction > 0) {
|
||||
start2 += correction;
|
||||
ref2 += correction;
|
||||
ml2 -= correction;
|
||||
@ -437,8 +400,7 @@ _Search3:
|
||||
ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, maxNbAttempts);
|
||||
else ml3 = ml2;
|
||||
|
||||
if (ml3 == ml2) /* No better match : 2 sequences to encode */
|
||||
{
|
||||
if (ml3 == ml2) { /* No better match : 2 sequences to encode */
|
||||
/* ip & ref are known; Now for ml */
|
||||
if (start2 < ip+ml) ml = (int)(start2 - ip);
|
||||
/* Now, encode 2 sequences */
|
||||
@ -448,18 +410,14 @@ _Search3:
|
||||
continue;
|
||||
}
|
||||
|
||||
if (start3 < ip+ml+3) /* Not enough space for match 2 : remove it */
|
||||
{
|
||||
if (start3 >= (ip+ml)) /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */
|
||||
{
|
||||
if (start2 < ip+ml)
|
||||
{
|
||||
if (start3 < ip+ml+3) { /* Not enough space for match 2 : remove it */
|
||||
if (start3 >= (ip+ml)) { /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */
|
||||
if (start2 < ip+ml) {
|
||||
int correction = (int)(ip+ml - start2);
|
||||
start2 += correction;
|
||||
ref2 += correction;
|
||||
ml2 -= correction;
|
||||
if (ml2 < MINMATCH)
|
||||
{
|
||||
if (ml2 < MINMATCH) {
|
||||
start2 = start3;
|
||||
ref2 = ref3;
|
||||
ml2 = ml3;
|
||||
@ -487,23 +445,18 @@ _Search3:
|
||||
* OK, now we have 3 ascending matches; let's write at least the first one
|
||||
* ip & ref are known; Now for ml
|
||||
*/
|
||||
if (start2 < ip+ml)
|
||||
{
|
||||
if ((start2 - ip) < (int)ML_MASK)
|
||||
{
|
||||
if (start2 < ip+ml) {
|
||||
if ((start2 - ip) < (int)ML_MASK) {
|
||||
int correction;
|
||||
if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
|
||||
if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
|
||||
correction = ml - (int)(start2 - ip);
|
||||
if (correction > 0)
|
||||
{
|
||||
if (correction > 0) {
|
||||
start2 += correction;
|
||||
ref2 += correction;
|
||||
ml2 -= correction;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
ml = (int)(start2 - ip);
|
||||
}
|
||||
}
|
||||
@ -521,8 +474,7 @@ _Search3:
|
||||
}
|
||||
|
||||
/* Encode Last Literals */
|
||||
{
|
||||
int lastRun = (int)(iend - anchor);
|
||||
{ int lastRun = (int)(iend - anchor);
|
||||
if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0; /* Check output limit */
|
||||
if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
|
||||
else *op++ = (BYTE)(lastRun<<ML_BITS);
|
||||
@ -534,23 +486,64 @@ _Search3:
|
||||
return (int) (((char*)op)-dest);
|
||||
}
|
||||
|
||||
static int LZ4HC_getSearchNum(int compressionLevel)
|
||||
{
|
||||
switch (compressionLevel) {
|
||||
default: return 0; /* unused */
|
||||
case 11: return 128;
|
||||
case 12: return 1<<10;
|
||||
}
|
||||
}
|
||||
|
||||
int LZ4_sizeofStateHC(void) { return sizeof(LZ4HC_Data_Structure); }
|
||||
static int LZ4HC_compress_generic (
|
||||
LZ4HC_CCtx_internal* const ctx,
|
||||
const char* const source,
|
||||
char* const dest,
|
||||
int const inputSize,
|
||||
int const maxOutputSize,
|
||||
int compressionLevel,
|
||||
limitedOutput_directive limit
|
||||
)
|
||||
{
|
||||
if (compressionLevel < 1) compressionLevel = LZ4HC_CLEVEL_DEFAULT;
|
||||
if (compressionLevel > 9) {
|
||||
switch (compressionLevel) {
|
||||
case 10: return LZ4HC_compress_hashChain(ctx, source, dest, inputSize, maxOutputSize, 1 << (16-1), limit);
|
||||
case 11: ctx->searchNum = LZ4HC_getSearchNum(compressionLevel); return LZ4HC_compress_optimal(ctx, source, dest, inputSize, maxOutputSize, limit, 128, 0);
|
||||
default:
|
||||
case 12: ctx->searchNum = LZ4HC_getSearchNum(compressionLevel); return LZ4HC_compress_optimal(ctx, source, dest, inputSize, maxOutputSize, limit, LZ4_OPT_NUM, 1);
|
||||
}
|
||||
}
|
||||
return LZ4HC_compress_hashChain(ctx, source, dest, inputSize, maxOutputSize, 1 << (compressionLevel-1), limit);
|
||||
}
|
||||
|
||||
|
||||
int LZ4_sizeofStateHC(void) { return sizeof(LZ4_streamHC_t); }
|
||||
|
||||
int LZ4_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel)
|
||||
{
|
||||
LZ4HC_CCtx_internal* ctx = &((LZ4_streamHC_t*)state)->internal_donotuse;
|
||||
if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0; /* Error : state is not aligned for pointers (32 or 64 bits) */
|
||||
LZ4HC_init ((LZ4HC_Data_Structure*)state, (const BYTE*)src);
|
||||
LZ4HC_init (ctx, (const BYTE*)src);
|
||||
if (maxDstSize < LZ4_compressBound(srcSize))
|
||||
return LZ4HC_compress_generic (state, src, dst, srcSize, maxDstSize, compressionLevel, limitedOutput);
|
||||
return LZ4HC_compress_generic (ctx, src, dst, srcSize, maxDstSize, compressionLevel, limitedOutput);
|
||||
else
|
||||
return LZ4HC_compress_generic (state, src, dst, srcSize, maxDstSize, compressionLevel, noLimit);
|
||||
return LZ4HC_compress_generic (ctx, src, dst, srcSize, maxDstSize, compressionLevel, noLimit);
|
||||
}
|
||||
|
||||
int LZ4_compress_HC(const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel)
|
||||
{
|
||||
LZ4HC_Data_Structure state;
|
||||
return LZ4_compress_HC_extStateHC(&state, src, dst, srcSize, maxDstSize, compressionLevel);
|
||||
#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
|
||||
LZ4_streamHC_t* const statePtr = (LZ4_streamHC_t*)malloc(sizeof(LZ4_streamHC_t));
|
||||
#else
|
||||
LZ4_streamHC_t state;
|
||||
LZ4_streamHC_t* const statePtr = &state;
|
||||
#endif
|
||||
int const cSize = LZ4_compress_HC_extStateHC(statePtr, src, dst, srcSize, maxDstSize, compressionLevel);
|
||||
#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
|
||||
free(statePtr);
|
||||
#endif
|
||||
return cSize;
|
||||
}
|
||||
|
||||
|
||||
@ -566,32 +559,38 @@ int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr) { free(LZ4_st
|
||||
/* initialization */
|
||||
void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
|
||||
{
|
||||
LZ4_STATIC_ASSERT(sizeof(LZ4HC_Data_Structure) <= sizeof(LZ4_streamHC_t)); /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */
|
||||
((LZ4HC_Data_Structure*)LZ4_streamHCPtr)->base = NULL;
|
||||
((LZ4HC_Data_Structure*)LZ4_streamHCPtr)->compressionLevel = (unsigned)compressionLevel;
|
||||
LZ4_STATIC_ASSERT(sizeof(LZ4HC_CCtx_internal) <= sizeof(size_t) * LZ4_STREAMHCSIZE_SIZET); /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */
|
||||
LZ4_streamHCPtr->internal_donotuse.base = NULL;
|
||||
LZ4_streamHCPtr->internal_donotuse.compressionLevel = (unsigned)compressionLevel;
|
||||
LZ4_streamHCPtr->internal_donotuse.searchNum = LZ4HC_getSearchNum(compressionLevel);
|
||||
}
|
||||
|
||||
int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int dictSize)
|
||||
{
|
||||
LZ4HC_Data_Structure* ctxPtr = (LZ4HC_Data_Structure*) LZ4_streamHCPtr;
|
||||
if (dictSize > 64 KB)
|
||||
{
|
||||
LZ4HC_CCtx_internal* ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
|
||||
if (dictSize > 64 KB) {
|
||||
dictionary += dictSize - 64 KB;
|
||||
dictSize = 64 KB;
|
||||
}
|
||||
LZ4HC_init (ctxPtr, (const BYTE*)dictionary);
|
||||
if (dictSize >= 4) LZ4HC_Insert (ctxPtr, (const BYTE*)dictionary +(dictSize-3));
|
||||
ctxPtr->end = (const BYTE*)dictionary + dictSize;
|
||||
if (ctxPtr->compressionLevel >= LZ4HC_CLEVEL_OPT_MIN)
|
||||
LZ4HC_updateBinTree(ctxPtr, ctxPtr->end - MFLIMIT, ctxPtr->end - LASTLITERALS);
|
||||
else
|
||||
if (dictSize >= 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3);
|
||||
return dictSize;
|
||||
}
|
||||
|
||||
|
||||
/* compression */
|
||||
|
||||
static void LZ4HC_setExternalDict(LZ4HC_Data_Structure* ctxPtr, const BYTE* newBlock)
|
||||
static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock)
|
||||
{
|
||||
if (ctxPtr->end >= ctxPtr->base + 4)
|
||||
LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */
|
||||
if (ctxPtr->compressionLevel >= LZ4HC_CLEVEL_OPT_MIN)
|
||||
LZ4HC_updateBinTree(ctxPtr, ctxPtr->end - MFLIMIT, ctxPtr->end - LASTLITERALS);
|
||||
else
|
||||
if (ctxPtr->end >= ctxPtr->base + 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */
|
||||
|
||||
/* Only one memory segment for extDict, so any previous extDict is lost at this stage */
|
||||
ctxPtr->lowLimit = ctxPtr->dictLimit;
|
||||
ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base);
|
||||
@ -601,34 +600,29 @@ static void LZ4HC_setExternalDict(LZ4HC_Data_Structure* ctxPtr, const BYTE* newB
|
||||
ctxPtr->nextToUpdate = ctxPtr->dictLimit; /* match referencing will resume from there */
|
||||
}
|
||||
|
||||
static int LZ4_compressHC_continue_generic (LZ4HC_Data_Structure* ctxPtr,
|
||||
static int LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr,
|
||||
const char* source, char* dest,
|
||||
int inputSize, int maxOutputSize, limitedOutput_directive limit)
|
||||
{
|
||||
LZ4HC_CCtx_internal* ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
|
||||
/* auto-init if forgotten */
|
||||
if (ctxPtr->base == NULL)
|
||||
LZ4HC_init (ctxPtr, (const BYTE*) source);
|
||||
if (ctxPtr->base == NULL) LZ4HC_init (ctxPtr, (const BYTE*) source);
|
||||
|
||||
/* Check overflow */
|
||||
if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 GB)
|
||||
{
|
||||
if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 GB) {
|
||||
size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base) - ctxPtr->dictLimit;
|
||||
if (dictSize > 64 KB) dictSize = 64 KB;
|
||||
|
||||
LZ4_loadDictHC((LZ4_streamHC_t*)ctxPtr, (const char*)(ctxPtr->end) - dictSize, (int)dictSize);
|
||||
LZ4_loadDictHC(LZ4_streamHCPtr, (const char*)(ctxPtr->end) - dictSize, (int)dictSize);
|
||||
}
|
||||
|
||||
/* Check if blocks follow each other */
|
||||
if ((const BYTE*)source != ctxPtr->end)
|
||||
LZ4HC_setExternalDict(ctxPtr, (const BYTE*)source);
|
||||
if ((const BYTE*)source != ctxPtr->end) LZ4HC_setExternalDict(ctxPtr, (const BYTE*)source);
|
||||
|
||||
/* Check overlapping input/dictionary space */
|
||||
{
|
||||
const BYTE* sourceEnd = (const BYTE*) source + inputSize;
|
||||
const BYTE* dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit;
|
||||
const BYTE* dictEnd = ctxPtr->dictBase + ctxPtr->dictLimit;
|
||||
if ((sourceEnd > dictBegin) && ((const BYTE*)source < dictEnd))
|
||||
{
|
||||
{ const BYTE* sourceEnd = (const BYTE*) source + inputSize;
|
||||
const BYTE* const dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit;
|
||||
const BYTE* const dictEnd = ctxPtr->dictBase + ctxPtr->dictLimit;
|
||||
if ((sourceEnd > dictBegin) && ((const BYTE*)source < dictEnd)) {
|
||||
if (sourceEnd > dictEnd) sourceEnd = dictEnd;
|
||||
ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase);
|
||||
if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4) ctxPtr->lowLimit = ctxPtr->dictLimit;
|
||||
@ -641,9 +635,9 @@ static int LZ4_compressHC_continue_generic (LZ4HC_Data_Structure* ctxPtr,
|
||||
int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize)
|
||||
{
|
||||
if (maxOutputSize < LZ4_compressBound(inputSize))
|
||||
return LZ4_compressHC_continue_generic ((LZ4HC_Data_Structure*)LZ4_streamHCPtr, source, dest, inputSize, maxOutputSize, limitedOutput);
|
||||
return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, source, dest, inputSize, maxOutputSize, limitedOutput);
|
||||
else
|
||||
return LZ4_compressHC_continue_generic ((LZ4HC_Data_Structure*)LZ4_streamHCPtr, source, dest, inputSize, maxOutputSize, noLimit);
|
||||
return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, source, dest, inputSize, maxOutputSize, noLimit);
|
||||
}
|
||||
|
||||
|
||||
@ -651,14 +645,13 @@ int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* sourc
|
||||
|
||||
int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictSize)
|
||||
{
|
||||
LZ4HC_Data_Structure* streamPtr = (LZ4HC_Data_Structure*)LZ4_streamHCPtr;
|
||||
int prefixSize = (int)(streamPtr->end - (streamPtr->base + streamPtr->dictLimit));
|
||||
LZ4HC_CCtx_internal* const streamPtr = &LZ4_streamHCPtr->internal_donotuse;
|
||||
int const prefixSize = (int)(streamPtr->end - (streamPtr->base + streamPtr->dictLimit));
|
||||
if (dictSize > 64 KB) dictSize = 64 KB;
|
||||
if (dictSize < 4) dictSize = 0;
|
||||
if (dictSize > prefixSize) dictSize = prefixSize;
|
||||
memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
|
||||
{
|
||||
U32 endIndex = (U32)(streamPtr->end - streamPtr->base);
|
||||
{ U32 const endIndex = (U32)(streamPtr->end - streamPtr->base);
|
||||
streamPtr->end = (const BYTE*)safeBuffer + dictSize;
|
||||
streamPtr->base = streamPtr->end - endIndex;
|
||||
streamPtr->dictLimit = endIndex - dictSize;
|
||||
@ -672,8 +665,8 @@ int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictS
|
||||
/***********************************
|
||||
* Deprecated Functions
|
||||
***********************************/
|
||||
/* These functions currently generate deprecation warnings */
|
||||
/* Deprecated compression functions */
|
||||
/* These functions are planned to start generate warnings by r131 approximately */
|
||||
int LZ4_compressHC(const char* src, char* dst, int srcSize) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
|
||||
int LZ4_compressHC_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, 0); }
|
||||
int LZ4_compressHC2(const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
|
||||
@ -687,45 +680,41 @@ int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* ctx, const char* src,
|
||||
|
||||
|
||||
/* Deprecated streaming functions */
|
||||
/* These functions currently generate deprecation warnings */
|
||||
int LZ4_sizeofStreamStateHC(void) { return LZ4_STREAMHCSIZE; }
|
||||
|
||||
int LZ4_resetStreamStateHC(void* state, char* inputBuffer)
|
||||
{
|
||||
LZ4HC_CCtx_internal *ctx = &((LZ4_streamHC_t*)state)->internal_donotuse;
|
||||
if ((((size_t)state) & (sizeof(void*)-1)) != 0) return 1; /* Error : pointer is not aligned for pointer (32 or 64 bits) */
|
||||
LZ4HC_init((LZ4HC_Data_Structure*)state, (const BYTE*)inputBuffer);
|
||||
((LZ4HC_Data_Structure*)state)->inputBuffer = (BYTE*)inputBuffer;
|
||||
LZ4HC_init(ctx, (const BYTE*)inputBuffer);
|
||||
ctx->inputBuffer = (BYTE*)inputBuffer;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void* LZ4_createHC (char* inputBuffer)
|
||||
{
|
||||
void* hc4 = ALLOCATOR(1, sizeof(LZ4HC_Data_Structure));
|
||||
LZ4_streamHC_t* hc4 = (LZ4_streamHC_t*)ALLOCATOR(1, sizeof(LZ4_streamHC_t));
|
||||
if (hc4 == NULL) return NULL; /* not enough memory */
|
||||
LZ4HC_init ((LZ4HC_Data_Structure*)hc4, (const BYTE*)inputBuffer);
|
||||
((LZ4HC_Data_Structure*)hc4)->inputBuffer = (BYTE*)inputBuffer;
|
||||
LZ4HC_init (&hc4->internal_donotuse, (const BYTE*)inputBuffer);
|
||||
hc4->internal_donotuse.inputBuffer = (BYTE*)inputBuffer;
|
||||
return hc4;
|
||||
}
|
||||
|
||||
int LZ4_freeHC (void* LZ4HC_Data)
|
||||
{
|
||||
FREEMEM(LZ4HC_Data);
|
||||
return (0);
|
||||
}
|
||||
int LZ4_freeHC (void* LZ4HC_Data) { FREEMEM(LZ4HC_Data); return 0; }
|
||||
|
||||
int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel)
|
||||
{
|
||||
return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, 0, compressionLevel, noLimit);
|
||||
return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, source, dest, inputSize, 0, compressionLevel, noLimit);
|
||||
}
|
||||
|
||||
int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel)
|
||||
{
|
||||
return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, maxOutputSize, compressionLevel, limitedOutput);
|
||||
return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, source, dest, inputSize, maxOutputSize, compressionLevel, limitedOutput);
|
||||
}
|
||||
|
||||
char* LZ4_slideInputBufferHC(void* LZ4HC_Data)
|
||||
{
|
||||
LZ4HC_Data_Structure* hc4 = (LZ4HC_Data_Structure*)LZ4HC_Data;
|
||||
int dictSize = LZ4_saveDictHC((LZ4_streamHC_t*)LZ4HC_Data, (char*)(hc4->inputBuffer), 64 KB);
|
||||
LZ4HC_CCtx_internal* const hc4 = &((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse;
|
||||
int const dictSize = LZ4_saveDictHC((LZ4_streamHC_t*)LZ4HC_Data, (char*)(hc4->inputBuffer), 64 KB);
|
||||
return (char*)(hc4->inputBuffer + dictSize);
|
||||
}
|
||||
|
@ -96,7 +96,7 @@ public:
|
||||
ENC_BASE64 = 0x01, /// Base64-encoded output
|
||||
ENC_BINHEX = 0x02, /// BinHex-encoded output
|
||||
ENC_BASE64_NO_LF = 0x81, /// Base64-encoded output, no linefeeds
|
||||
ENC_BINHEX_NO_LF = 0x82, /// BinHex-encoded output, no linefeeds
|
||||
ENC_BINHEX_NO_LF = 0x82 /// BinHex-encoded output, no linefeeds
|
||||
|
||||
};
|
||||
|
||||
|
@ -22,7 +22,6 @@
|
||||
#define Crypto_Crypto_INCLUDED
|
||||
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#if defined(__APPLE__)
|
||||
// OS X 10.7 deprecates some OpenSSL functions
|
||||
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
||||
@ -116,6 +115,5 @@ void Crypto_API uninitializeCrypto();
|
||||
|
||||
} } // namespace Poco::Crypto
|
||||
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
#endif // Crypto_Crypto_INCLUDED
|
||||
|
@ -61,7 +61,7 @@ protected:
|
||||
|
||||
private:
|
||||
std::string _name;
|
||||
EVP_MD_CTX* _ctx;
|
||||
EVP_MD_CTX* _pContext;
|
||||
Poco::DigestEngine::Digest _digest;
|
||||
OpenSSLInitializer _openSSLInitializer;
|
||||
};
|
||||
|
@ -130,6 +130,14 @@ public:
|
||||
/// Returns true if verification against the issuer certificate
|
||||
/// was successfull, false otherwise.
|
||||
|
||||
bool equals(const X509Certificate& otherCertificate) const;
|
||||
/// Checks whether the certificate is equal to
|
||||
/// the other certificate, by comparing the hashes
|
||||
/// of both certificates.
|
||||
///
|
||||
/// Returns true if both certificates are identical,
|
||||
/// otherwise false.
|
||||
|
||||
const X509* certificate() const;
|
||||
/// Returns the underlying OpenSSL certificate.
|
||||
|
||||
|
@ -77,7 +77,11 @@ namespace
|
||||
|
||||
private:
|
||||
const EVP_CIPHER* _pCipher;
|
||||
EVP_CIPHER_CTX _ctx;
|
||||
#if OPENSSL_VERSION_NUMBER >= 0x10100000L
|
||||
EVP_CIPHER_CTX* _pContext;
|
||||
#else
|
||||
EVP_CIPHER_CTX _context;
|
||||
#endif
|
||||
ByteVec _key;
|
||||
ByteVec _iv;
|
||||
};
|
||||
@ -92,30 +96,52 @@ namespace
|
||||
_key(key),
|
||||
_iv(iv)
|
||||
{
|
||||
#if OPENSSL_VERSION_NUMBER >= 0x10100000L
|
||||
_pContext = EVP_CIPHER_CTX_new();
|
||||
EVP_CipherInit(
|
||||
&_ctx,
|
||||
_pContext,
|
||||
_pCipher,
|
||||
&_key[0],
|
||||
_iv.empty() ? 0 : &_iv[0],
|
||||
(dir == DIR_ENCRYPT) ? 1 : 0);
|
||||
#else
|
||||
EVP_CipherInit(
|
||||
&_context,
|
||||
_pCipher,
|
||||
&_key[0],
|
||||
_iv.empty() ? 0 : &_iv[0],
|
||||
(dir == DIR_ENCRYPT) ? 1 : 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
CryptoTransformImpl::~CryptoTransformImpl()
|
||||
{
|
||||
EVP_CIPHER_CTX_cleanup(&_ctx);
|
||||
#if OPENSSL_VERSION_NUMBER >= 0x10100000L
|
||||
EVP_CIPHER_CTX_cleanup(_pContext);
|
||||
#else
|
||||
EVP_CIPHER_CTX_cleanup(&_context);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
std::size_t CryptoTransformImpl::blockSize() const
|
||||
{
|
||||
return EVP_CIPHER_CTX_block_size(&_ctx);
|
||||
#if OPENSSL_VERSION_NUMBER >= 0x10100000L
|
||||
return EVP_CIPHER_CTX_block_size(_pContext);
|
||||
#else
|
||||
return EVP_CIPHER_CTX_block_size(&_context);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
int CryptoTransformImpl::setPadding(int padding)
|
||||
{
|
||||
return EVP_CIPHER_CTX_set_padding(&_ctx, padding);
|
||||
#if OPENSSL_VERSION_NUMBER >= 0x10100000L
|
||||
return EVP_CIPHER_CTX_block_size(_pContext);
|
||||
#else
|
||||
return EVP_CIPHER_CTX_set_padding(&_context, padding);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -125,16 +151,24 @@ namespace
|
||||
unsigned char* output,
|
||||
std::streamsize outputLength)
|
||||
{
|
||||
poco_assert (outputLength >= std::streamsize(inputLength + blockSize() - 1));
|
||||
poco_assert (outputLength >= (inputLength + blockSize() - 1));
|
||||
|
||||
int outLen = static_cast<int>(outputLength);
|
||||
#if OPENSSL_VERSION_NUMBER >= 0x10100000L
|
||||
int rc = EVP_CipherUpdate(
|
||||
&_ctx,
|
||||
_pContext,
|
||||
output,
|
||||
&outLen,
|
||||
input,
|
||||
static_cast<int>(inputLength));
|
||||
|
||||
#else
|
||||
int rc = EVP_CipherUpdate(
|
||||
&_context,
|
||||
output,
|
||||
&outLen,
|
||||
input,
|
||||
static_cast<int>(inputLength));
|
||||
#endif
|
||||
if (rc == 0)
|
||||
throwError();
|
||||
|
||||
@ -146,14 +180,18 @@ namespace
|
||||
unsigned char* output,
|
||||
std::streamsize length)
|
||||
{
|
||||
poco_assert (length >= (std::streamsize)blockSize());
|
||||
poco_assert (length >= blockSize());
|
||||
|
||||
int len = static_cast<int>(length);
|
||||
|
||||
// Use the '_ex' version that does not perform implicit cleanup since we
|
||||
// will call EVP_CIPHER_CTX_cleanup() from the dtor as there is no
|
||||
// guarantee that finalize() will be called if an error occurred.
|
||||
int rc = EVP_CipherFinal_ex(&_ctx, output, &len);
|
||||
#if OPENSSL_VERSION_NUMBER >= 0x10100000L
|
||||
int rc = EVP_CipherFinal_ex(_pContext, output, &len);
|
||||
#else
|
||||
int rc = EVP_CipherFinal_ex(&_context, output, &len);
|
||||
#endif
|
||||
|
||||
if (rc == 0)
|
||||
throwError();
|
||||
|
@ -130,7 +130,7 @@ void CipherKeyImpl::getRandomBytes(ByteVec& vec, std::size_t count)
|
||||
vec.clear();
|
||||
vec.reserve(count);
|
||||
|
||||
for (std::size_t i = 0; i < count; ++i)
|
||||
for (int i = 0; i < count; ++i)
|
||||
vec.push_back(static_cast<unsigned char>(random.get()));
|
||||
}
|
||||
|
||||
|
@ -43,7 +43,7 @@ CryptoStreamBuf::CryptoStreamBuf(std::istream& istr, CryptoTransform* pTransform
|
||||
_buffer(static_cast<std::size_t>(bufferSize))
|
||||
{
|
||||
poco_check_ptr (pTransform);
|
||||
poco_assert ((size_t)bufferSize > 2 * pTransform->blockSize());
|
||||
poco_assert (bufferSize > 2 * pTransform->blockSize());
|
||||
}
|
||||
|
||||
|
||||
@ -56,7 +56,7 @@ CryptoStreamBuf::CryptoStreamBuf(std::ostream& ostr, CryptoTransform* pTransform
|
||||
_buffer(static_cast<std::size_t>(bufferSize))
|
||||
{
|
||||
poco_check_ptr (pTransform);
|
||||
poco_assert ((size_t)bufferSize > 2 * pTransform->blockSize());
|
||||
poco_assert (bufferSize > 2 * pTransform->blockSize());
|
||||
}
|
||||
|
||||
|
||||
@ -159,7 +159,7 @@ int CryptoStreamBuf::writeToDevice(const char* buffer, std::streamsize length)
|
||||
std::size_t maxChunkSize = _buffer.size()/2;
|
||||
std::size_t count = 0;
|
||||
|
||||
while (count < (size_t)length)
|
||||
while (count < length)
|
||||
{
|
||||
// Truncate chunk size so that the maximum output fits into _buffer.
|
||||
std::size_t n = static_cast<std::size_t>(length) - count;
|
||||
|
@ -23,46 +23,51 @@ namespace Crypto {
|
||||
|
||||
|
||||
DigestEngine::DigestEngine(const std::string& name):
|
||||
_name(name)
|
||||
_name(name),
|
||||
_pContext(EVP_MD_CTX_create())
|
||||
{
|
||||
const EVP_MD* md = EVP_get_digestbyname(_name.c_str());
|
||||
if (!md) throw Poco::NotFoundException(_name);
|
||||
_ctx = EVP_MD_CTX_create();
|
||||
EVP_DigestInit_ex(_ctx, md, NULL);
|
||||
EVP_DigestInit_ex(_pContext, md, NULL);
|
||||
}
|
||||
|
||||
|
||||
DigestEngine::~DigestEngine()
|
||||
{
|
||||
EVP_MD_CTX_destroy(_ctx);
|
||||
EVP_MD_CTX_destroy(_pContext);
|
||||
}
|
||||
|
||||
int DigestEngine::nid() const
|
||||
{
|
||||
return EVP_MD_nid(_ctx->digest);
|
||||
return EVP_MD_nid(EVP_MD_CTX_md(_pContext));
|
||||
}
|
||||
|
||||
std::size_t DigestEngine::digestLength() const
|
||||
{
|
||||
return EVP_MD_CTX_size(_ctx);
|
||||
return EVP_MD_CTX_size(_pContext);
|
||||
}
|
||||
|
||||
|
||||
void DigestEngine::reset()
|
||||
{
|
||||
EVP_MD_CTX_cleanup(_ctx);
|
||||
#if OPENSSL_VERSION_NUMBER >= 0x10100000L
|
||||
EVP_MD_CTX_free(_pContext);
|
||||
_pContext = EVP_MD_CTX_create();
|
||||
#else
|
||||
EVP_MD_CTX_cleanup(_pContext);
|
||||
#endif
|
||||
const EVP_MD* md = EVP_get_digestbyname(_name.c_str());
|
||||
if (!md) throw Poco::NotFoundException(_name);
|
||||
EVP_DigestInit_ex(_ctx, md, NULL);
|
||||
EVP_DigestInit_ex(_pContext, md, NULL);
|
||||
}
|
||||
|
||||
|
||||
const Poco::DigestEngine::Digest& DigestEngine::digest()
|
||||
{
|
||||
_digest.clear();
|
||||
unsigned len = EVP_MD_CTX_size(_ctx);
|
||||
unsigned len = EVP_MD_CTX_size(_pContext);
|
||||
_digest.resize(len);
|
||||
EVP_DigestFinal_ex(_ctx, &_digest[0], &len);
|
||||
EVP_DigestFinal_ex(_pContext, &_digest[0], &len);
|
||||
reset();
|
||||
return _digest;
|
||||
}
|
||||
@ -70,7 +75,7 @@ const Poco::DigestEngine::Digest& DigestEngine::digest()
|
||||
|
||||
void DigestEngine::updateImpl(const void* data, std::size_t length)
|
||||
{
|
||||
EVP_DigestUpdate(_ctx, data, length);
|
||||
EVP_DigestUpdate(_pContext, data, length);
|
||||
}
|
||||
|
||||
|
||||
|
@ -175,8 +175,8 @@ namespace
|
||||
|
||||
std::streamsize RSAEncryptImpl::finalize(unsigned char* output, std::streamsize length)
|
||||
{
|
||||
poco_assert ((size_t)length >= blockSize());
|
||||
poco_assert ((size_t)_pos <= maxDataSize());
|
||||
poco_assert (length >= blockSize());
|
||||
poco_assert (_pos <= maxDataSize());
|
||||
int rc = 0;
|
||||
if (_pos > 0)
|
||||
{
|
||||
@ -280,7 +280,7 @@ namespace
|
||||
|
||||
std::streamsize RSADecryptImpl::finalize(unsigned char* output, std::streamsize length)
|
||||
{
|
||||
poco_assert ((size_t)length >= blockSize());
|
||||
poco_assert (length >= blockSize());
|
||||
int rc = 0;
|
||||
if (_pos > 0)
|
||||
{
|
||||
|
@ -207,19 +207,43 @@ int RSAKeyImpl::size() const
|
||||
|
||||
RSAKeyImpl::ByteVec RSAKeyImpl::modulus() const
|
||||
{
|
||||
#if OPENSSL_VERSION_NUMBER >= 0x10100000L
|
||||
const BIGNUM* n = 0;
|
||||
const BIGNUM* e = 0;
|
||||
const BIGNUM* d = 0;
|
||||
RSA_get0_key(_pRSA, &n, &e, &d);
|
||||
return convertToByteVec(n);
|
||||
#else
|
||||
return convertToByteVec(_pRSA->n);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
RSAKeyImpl::ByteVec RSAKeyImpl::encryptionExponent() const
|
||||
{
|
||||
#if OPENSSL_VERSION_NUMBER >= 0x10100000L
|
||||
const BIGNUM* n = 0;
|
||||
const BIGNUM* e = 0;
|
||||
const BIGNUM* d = 0;
|
||||
RSA_get0_key(_pRSA, &n, &e, &d);
|
||||
return convertToByteVec(e);
|
||||
#else
|
||||
return convertToByteVec(_pRSA->e);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
RSAKeyImpl::ByteVec RSAKeyImpl::decryptionExponent() const
|
||||
{
|
||||
#if OPENSSL_VERSION_NUMBER >= 0x10100000L
|
||||
const BIGNUM* n = 0;
|
||||
const BIGNUM* e = 0;
|
||||
const BIGNUM* d = 0;
|
||||
RSA_get0_key(_pRSA, &n, &e, &d);
|
||||
return convertToByteVec(d);
|
||||
#else
|
||||
return convertToByteVec(_pRSA->d);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
@ -59,7 +59,11 @@ X509Certificate::X509Certificate(X509* pCert, bool shared):
|
||||
|
||||
if (shared)
|
||||
{
|
||||
#if OPENSSL_VERSION_NUMBER >= 0x10100000L
|
||||
X509_up_ref(_pCert);
|
||||
#else
|
||||
_pCert->references++;
|
||||
#endif
|
||||
}
|
||||
|
||||
init();
|
||||
@ -205,10 +209,10 @@ std::string X509Certificate::issuerName(NID nid) const
|
||||
if (X509_NAME* issuer = X509_get_issuer_name(_pCert))
|
||||
{
|
||||
char buffer[NAME_BUFFER_SIZE];
|
||||
X509_NAME_get_text_by_NID(issuer, nid, buffer, sizeof(buffer));
|
||||
if (X509_NAME_get_text_by_NID(issuer, nid, buffer, sizeof(buffer)) >= 0)
|
||||
return std::string(buffer);
|
||||
}
|
||||
else return std::string();
|
||||
return std::string();
|
||||
}
|
||||
|
||||
|
||||
@ -217,10 +221,10 @@ std::string X509Certificate::subjectName(NID nid) const
|
||||
if (X509_NAME* subj = X509_get_subject_name(_pCert))
|
||||
{
|
||||
char buffer[NAME_BUFFER_SIZE];
|
||||
X509_NAME_get_text_by_NID(subj, nid, buffer, sizeof(buffer));
|
||||
if (X509_NAME_get_text_by_NID(subj, nid, buffer, sizeof(buffer)) >= 0)
|
||||
return std::string(buffer);
|
||||
}
|
||||
else return std::string();
|
||||
return std::string();
|
||||
}
|
||||
|
||||
|
||||
@ -280,4 +284,12 @@ bool X509Certificate::issuedBy(const X509Certificate& issuerCertificate) const
|
||||
}
|
||||
|
||||
|
||||
bool X509Certificate::equals(const X509Certificate& otherCertificate) const
|
||||
{
|
||||
X509* pCert = const_cast<X509*>(_pCert);
|
||||
X509* pOtherCert = const_cast<X509*>(otherCertificate.certificate());
|
||||
return X509_cmp(pCert, pOtherCert) == 0;
|
||||
}
|
||||
|
||||
|
||||
} } // namespace Poco::Crypto
|
||||
|
@ -246,6 +246,11 @@ void CryptoTest::testCertificate()
|
||||
|
||||
// fails with recent OpenSSL versions:
|
||||
// assert (cert.issuedBy(cert));
|
||||
|
||||
std::istringstream otherCertStream(APPINF_PEM);
|
||||
X509Certificate otherCert(otherCertStream);
|
||||
|
||||
assert (cert.equals(otherCert));
|
||||
}
|
||||
|
||||
|
||||
|
@ -21,6 +21,7 @@ include_directories (BEFORE ${ClickHouse_SOURCE_DIR}/contrib/libdivide)
|
||||
include_directories (BEFORE ${ClickHouse_SOURCE_DIR}/contrib/libcpuid/include)
|
||||
include_directories (BEFORE ${ClickHouse_SOURCE_DIR}/contrib/libfarmhash)
|
||||
include_directories (BEFORE ${ClickHouse_SOURCE_DIR}/contrib/libmetrohash/src)
|
||||
include_directories (BEFORE ${ClickHouse_SOURCE_DIR}/contrib/libbtrie/include)
|
||||
include_directories (${ClickHouse_SOURCE_DIR}/libs/libdaemon/include)
|
||||
include_directories (${ClickHouse_BINARY_DIR}/dbms/src)
|
||||
|
||||
@ -44,7 +45,6 @@ include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake)
|
||||
add_headers_and_sources(dbms src/TableFunctions)
|
||||
add_headers_and_sources(dbms src/Parsers)
|
||||
add_headers_and_sources(dbms src/Analyzers)
|
||||
add_headers_and_sources(dbms src/AggregateFunctions)
|
||||
add_headers_and_sources(dbms src/Core)
|
||||
add_headers_and_sources(dbms src/DataStreams)
|
||||
add_headers_and_sources(dbms src/DataTypes)
|
||||
@ -70,6 +70,33 @@ list (APPEND dbms_headers ${CONFIG_VERSION} ${CONFIG_COMMON})
|
||||
|
||||
list (APPEND dbms_sources src/Functions/IFunction.cpp src/Functions/FunctionFactory.cpp src/Functions/DataTypeTraits.cpp)
|
||||
list (APPEND dbms_headers src/Functions/IFunction.h src/Functions/FunctionFactory.h src/Functions/DataTypeTraits.h)
|
||||
list (APPEND dbms_sources
|
||||
src/AggregateFunctions/AggregateFunctionFactory.cpp
|
||||
src/AggregateFunctions/AggregateFunctionState.cpp
|
||||
src/AggregateFunctions/AggregateFunctionFactory.cpp
|
||||
src/AggregateFunctions/AggregateFunctionState.cpp
|
||||
src/AggregateFunctions/AggregateFunctionArray.cpp
|
||||
src/AggregateFunctions/AggregateFunctionNull.cpp
|
||||
src/AggregateFunctions/AggregateFunctionForEach.cpp
|
||||
src/AggregateFunctions/AggregateFunctionIf.cpp
|
||||
src/AggregateFunctions/AggregateFunctionMerge.cpp
|
||||
src/AggregateFunctions/AggregateFunctionCount.cpp
|
||||
)
|
||||
|
||||
list (APPEND dbms_headers
|
||||
src/AggregateFunctions/IAggregateFunction.h
|
||||
src/AggregateFunctions/AggregateFunctionFactory.h
|
||||
src/AggregateFunctions/AggregateFunctionState.h
|
||||
src/AggregateFunctions/AggregateFunctionFactory.h
|
||||
src/AggregateFunctions/AggregateFunctionState.h
|
||||
src/AggregateFunctions/AggregateFunctionArray.h
|
||||
src/AggregateFunctions/AggregateFunctionNull.h
|
||||
src/AggregateFunctions/AggregateFunctionForEach.h
|
||||
src/AggregateFunctions/AggregateFunctionIf.h
|
||||
src/AggregateFunctions/AggregateFunctionMerge.h
|
||||
src/AggregateFunctions/AggregateFunctionCount.h
|
||||
)
|
||||
|
||||
|
||||
list(REMOVE_ITEM dbms_sources
|
||||
src/Client/Client.cpp
|
||||
@ -127,6 +154,7 @@ if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
src/Dictionaries/FlatDictionary.cpp
|
||||
src/Dictionaries/HashedDictionary.cpp
|
||||
src/Dictionaries/CacheDictionary.cpp
|
||||
src/Dictionaries/TrieDictionary.cpp
|
||||
src/Dictionaries/RangeHashedDictionary.cpp
|
||||
src/Dictionaries/ComplexKeyHashedDictionary.cpp
|
||||
src/Dictionaries/ComplexKeyCacheDictionary.cpp
|
||||
@ -159,6 +187,7 @@ target_link_libraries (dbms
|
||||
${OPENSSL_CRYPTO_LIBRARY}
|
||||
${Boost_SYSTEM_LIBRARY}
|
||||
${Poco_Data_LIBRARY}
|
||||
btrie
|
||||
)
|
||||
|
||||
if (Poco_DataODBC_FOUND)
|
||||
|
@ -1,6 +1,6 @@
|
||||
#This strings autochanged from release_lib.sh :
|
||||
set(VERSION_DESCRIBE v1.1.54227-testing)
|
||||
set(VERSION_REVISION 54227)
|
||||
set(VERSION_DESCRIBE v1.1.54234-testing)
|
||||
set(VERSION_REVISION 54234)
|
||||
#===end of autochange
|
||||
|
||||
set (VERSION_MAJOR 1)
|
||||
|
@ -30,24 +30,6 @@ std::string trimRight(const std::string & in, const char * suffix)
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionAvg(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionCount(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionGroupArray(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionGroupUniqArray(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionsQuantile(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionsQuantileExact(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionsQuantileExactWeighted(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionsQuantileDeterministic(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionsQuantileTiming(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionsQuantileTDigest(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionsSequenceMatch(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionsMinMaxAny(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionsStatistics(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionSum(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionsUniq(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionUniqUpTo(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionDebug(AggregateFunctionFactory & factory);
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionArray(AggregateFunctionPtr & nested);
|
||||
AggregateFunctionPtr createAggregateFunctionForEach(AggregateFunctionPtr & nested);
|
||||
AggregateFunctionPtr createAggregateFunctionIf(AggregateFunctionPtr & nested);
|
||||
@ -60,23 +42,6 @@ AggregateFunctionPtr createAggregateFunctionCountNotNull(const DataTypes & argum
|
||||
|
||||
AggregateFunctionFactory::AggregateFunctionFactory()
|
||||
{
|
||||
registerAggregateFunctionAvg(*this);
|
||||
registerAggregateFunctionCount(*this);
|
||||
registerAggregateFunctionGroupArray(*this);
|
||||
registerAggregateFunctionGroupUniqArray(*this);
|
||||
registerAggregateFunctionsQuantile(*this);
|
||||
registerAggregateFunctionsQuantileExact(*this);
|
||||
registerAggregateFunctionsQuantileExactWeighted(*this);
|
||||
registerAggregateFunctionsQuantileDeterministic(*this);
|
||||
registerAggregateFunctionsQuantileTiming(*this);
|
||||
registerAggregateFunctionsQuantileTDigest(*this);
|
||||
registerAggregateFunctionsSequenceMatch(*this);
|
||||
registerAggregateFunctionsMinMaxAny(*this);
|
||||
registerAggregateFunctionsStatistics(*this);
|
||||
registerAggregateFunctionSum(*this);
|
||||
registerAggregateFunctionsUniq(*this);
|
||||
registerAggregateFunctionUniqUpTo(*this);
|
||||
registerAggregateFunctionDebug(*this);
|
||||
}
|
||||
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <unordered_map>
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <common/singleton.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -14,7 +15,7 @@ using DataTypes = std::vector<DataTypePtr>;
|
||||
|
||||
/** Creates an aggregate function by name.
|
||||
*/
|
||||
class AggregateFunctionFactory final
|
||||
class AggregateFunctionFactory final : public Singleton<AggregateFunctionFactory>
|
||||
{
|
||||
friend class StorageSystemFunctions;
|
||||
|
||||
|
@ -11,7 +11,7 @@ namespace
|
||||
AggregateFunctionPtr createAggregateFunctionGroupArray(const std::string & name, const DataTypes & argument_types)
|
||||
{
|
||||
if (argument_types.size() != 1)
|
||||
throw Exception("Incorrect number of arguments for aggregate function " + name,
|
||||
throw Exception("Incorrect number of arguments for aggregate function " + name + ", should be 2",
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
AggregateFunctionPtr res(createWithNumericType<AggregateFunctionGroupArrayNumeric>(*argument_types[0]));
|
||||
|
@ -100,7 +100,7 @@ public:
|
||||
|
||||
|
||||
|
||||
/// General case (ineffective). NOTE You can also implement a special case for strings.
|
||||
/// General case (inefficient). NOTE You can also implement a special case for strings.
|
||||
struct AggregateFunctionGroupArrayDataGeneric
|
||||
{
|
||||
Array value; /// TODO Add MemoryTracker
|
||||
|
@ -0,0 +1,27 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionGroupArrayInsertAt(const std::string & name, const DataTypes & argument_types)
|
||||
{
|
||||
if (argument_types.size() != 2)
|
||||
throw Exception("Incorrect number of arguments for aggregate function " + name + ", should be 2",
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
return std::make_shared<AggregateFunctionGroupArrayInsertAtGeneric>();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionGroupArrayInsertAt(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("groupArrayInsertAt", createAggregateFunctionGroupArrayInsertAt);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,210 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
|
||||
#include <Core/FieldVisitors.h>
|
||||
#include <Interpreters/convertFieldToType.h>
|
||||
|
||||
#include <AggregateFunctions/IBinaryAggregateFunction.h>
|
||||
|
||||
#define AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE 0xFFFFFF
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int TOO_LARGE_ARRAY_SIZE;
|
||||
extern const int CANNOT_CONVERT_TYPE;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
|
||||
/** Aggregate function, that takes two arguments: value and position,
|
||||
* and as a result, builds an array with values are located at corresponding positions.
|
||||
*
|
||||
* If more than one value was inserted to single position, the any value (first in case of single thread) is stored.
|
||||
* If no values was inserted to some position, then default value will be substituted.
|
||||
*
|
||||
* Aggregate function also accept optional parameters:
|
||||
* - default value to substitute;
|
||||
* - length to resize result arrays (if you want to have results of same length for all aggregation keys);
|
||||
*
|
||||
* If you want to pass length, default value should be also given.
|
||||
*/
|
||||
|
||||
|
||||
/// Generic case (inefficient).
|
||||
struct AggregateFunctionGroupArrayInsertAtDataGeneric
|
||||
{
|
||||
Array value; /// TODO Add MemoryTracker
|
||||
};
|
||||
|
||||
|
||||
class AggregateFunctionGroupArrayInsertAtGeneric final
|
||||
: public IBinaryAggregateFunction<AggregateFunctionGroupArrayInsertAtDataGeneric, AggregateFunctionGroupArrayInsertAtGeneric>
|
||||
{
|
||||
private:
|
||||
DataTypePtr type;
|
||||
Field default_value;
|
||||
size_t length_to_resize = 0; /// zero means - do not do resizing.
|
||||
|
||||
public:
|
||||
String getName() const override { return "groupArrayInsertAt"; }
|
||||
|
||||
DataTypePtr getReturnType() const override
|
||||
{
|
||||
return std::make_shared<DataTypeArray>(type);
|
||||
}
|
||||
|
||||
void setArgumentsImpl(const DataTypes & arguments)
|
||||
{
|
||||
if (!arguments.at(1)->behavesAsNumber()) /// TODO filter out floating point types.
|
||||
throw Exception("Second argument of aggregate function " + getName() + " must be integer.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
type = arguments.front();
|
||||
|
||||
if (default_value.isNull())
|
||||
default_value = type->getDefault();
|
||||
else
|
||||
{
|
||||
Field converted = convertFieldToType(default_value, *type);
|
||||
if (converted.isNull())
|
||||
throw Exception("Cannot convert parameter of aggregate function " + getName() + " (" + applyVisitor(FieldVisitorToString(), default_value) + ")"
|
||||
" to type " + type->getName() + " to be used as default value in array", ErrorCodes::CANNOT_CONVERT_TYPE);
|
||||
|
||||
default_value = converted;
|
||||
}
|
||||
}
|
||||
|
||||
void setParameters(const Array & params) override
|
||||
{
|
||||
if (params.empty())
|
||||
return;
|
||||
|
||||
if (params.size() > 2)
|
||||
throw Exception("Aggregate function " + getName() + " requires at most two parameters.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
default_value = params[0];
|
||||
|
||||
if (params.size() == 2)
|
||||
{
|
||||
length_to_resize = applyVisitor(FieldVisitorConvertToNumber<size_t>(), params[1]);
|
||||
}
|
||||
}
|
||||
|
||||
void addImpl(AggregateDataPtr place, const IColumn & column_value, const IColumn & column_position, size_t row_num, Arena *) const
|
||||
{
|
||||
/// TODO Do positions need to be 1-based for this function?
|
||||
size_t position = column_position.get64(row_num);
|
||||
|
||||
/// If position is larger than size to which array will be cutted - simply ignore value.
|
||||
if (length_to_resize && position >= length_to_resize)
|
||||
return;
|
||||
|
||||
if (position >= AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE)
|
||||
throw Exception("Too large array size: position argument (" + toString(position) + ")"
|
||||
" is greater or equals to limit (" + toString(AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE) + ")",
|
||||
ErrorCodes::TOO_LARGE_ARRAY_SIZE);
|
||||
|
||||
Array & arr = data(place).value;
|
||||
|
||||
if (arr.size() <= position)
|
||||
arr.resize(position + 1);
|
||||
else if (!arr[position].isNull())
|
||||
return; /// Element was already inserted to the specified position.
|
||||
|
||||
column_value.get(row_num, arr[position]);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
Array & arr_lhs = data(place).value;
|
||||
const Array & arr_rhs = data(rhs).value;
|
||||
|
||||
if (arr_lhs.size() < arr_rhs.size())
|
||||
arr_lhs.resize(arr_rhs.size());
|
||||
|
||||
for (size_t i = 0, size = arr_rhs.size(); i < size; ++i)
|
||||
if (arr_lhs[i].isNull() && !arr_rhs[i].isNull())
|
||||
arr_lhs[i] = arr_rhs[i];
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
|
||||
{
|
||||
const Array & arr = data(place).value;
|
||||
size_t size = arr.size();
|
||||
writeVarUInt(size, buf);
|
||||
|
||||
for (const Field & elem : arr)
|
||||
{
|
||||
if (elem.isNull())
|
||||
{
|
||||
writeBinary(UInt8(1), buf);
|
||||
}
|
||||
else
|
||||
{
|
||||
writeBinary(UInt8(0), buf);
|
||||
type->serializeBinary(elem, buf);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
|
||||
{
|
||||
size_t size = 0;
|
||||
readVarUInt(size, buf);
|
||||
|
||||
if (size > AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE)
|
||||
throw Exception("Too large array size", ErrorCodes::TOO_LARGE_ARRAY_SIZE);
|
||||
|
||||
Array & arr = data(place).value;
|
||||
|
||||
arr.resize(size);
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
UInt8 is_null = 0;
|
||||
readBinary(is_null, buf);
|
||||
if (!is_null)
|
||||
type->deserializeBinary(arr[i], buf);
|
||||
}
|
||||
}
|
||||
|
||||
void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
|
||||
{
|
||||
ColumnArray & to_array = static_cast<ColumnArray &>(to);
|
||||
IColumn & to_data = to_array.getData();
|
||||
ColumnArray::Offsets_t & to_offsets = to_array.getOffsets();
|
||||
|
||||
const Array & arr = data(place).value;
|
||||
|
||||
for (const Field & elem : arr)
|
||||
{
|
||||
if (!elem.isNull())
|
||||
to_data.insert(elem);
|
||||
else
|
||||
to_data.insert(default_value);
|
||||
}
|
||||
|
||||
size_t result_array_size = length_to_resize ? length_to_resize : arr.size();
|
||||
|
||||
/// Pad array if need.
|
||||
for (size_t i = arr.size(); i < result_array_size; ++i)
|
||||
to_data.insert(default_value);
|
||||
|
||||
to_offsets.push_back((to_offsets.empty() ? 0 : to_offsets.back()) + result_array_size);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#undef AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE
|
||||
|
||||
}
|
@ -34,6 +34,11 @@ public:
|
||||
return nested_func->getReturnType();
|
||||
}
|
||||
|
||||
AggregateFunctionPtr getNestedFunction() const
|
||||
{
|
||||
return nested_func_owner;
|
||||
}
|
||||
|
||||
void setArguments(const DataTypes & arguments) override
|
||||
{
|
||||
if (arguments.size() != 1)
|
||||
|
@ -286,7 +286,7 @@ private:
|
||||
ParserString dot_p(".");
|
||||
ParserNumber number_p;
|
||||
|
||||
auto pos = pattern.data();
|
||||
const char * pos = pattern.data();
|
||||
const auto begin = pos;
|
||||
const auto end = pos + pattern.size();
|
||||
|
||||
|
@ -1,8 +1,34 @@
|
||||
#include <AggregateFunctions/AggregateFunctionState.h>
|
||||
#include <AggregateFunctions/AggregateFunctionMerge.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
DataTypePtr AggregateFunctionState::getReturnType() const
|
||||
{
|
||||
auto ptr = std::make_shared<DataTypeAggregateFunction>(nested_func_owner, arguments, params);
|
||||
|
||||
/// Special case: it is -MergeState combinator
|
||||
if (typeid_cast<const AggregateFunctionMerge *>(ptr->getFunction().get()))
|
||||
{
|
||||
if (arguments.size() != 1)
|
||||
throw Exception("Combinator -MergeState expects only one argument", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
if (!typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get()))
|
||||
throw Exception("Combinator -MergeState expects argument with AggregateFunction type", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
return arguments[0];
|
||||
}
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionState(AggregateFunctionPtr & nested)
|
||||
{
|
||||
return std::make_shared<AggregateFunctionState>(nested);
|
||||
|
@ -1,3 +1,4 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <DataTypes/DataTypeAggregateFunction.h>
|
||||
@ -30,10 +31,7 @@ public:
|
||||
return nested_func->getName() + "State";
|
||||
}
|
||||
|
||||
DataTypePtr getReturnType() const override
|
||||
{
|
||||
return std::make_shared<DataTypeAggregateFunction>(nested_func_owner, arguments, params);
|
||||
}
|
||||
DataTypePtr getReturnType() const override;
|
||||
|
||||
void setArguments(const DataTypes & arguments_) override
|
||||
{
|
||||
|
70
dbms/src/AggregateFunctions/AggregateFunctionTopK.cpp
Normal file
70
dbms/src/AggregateFunctions/AggregateFunctionTopK.cpp
Normal file
@ -0,0 +1,70 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionTopK.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
/// Substitute return type for Date and DateTime
|
||||
class AggregateFunctionTopKDate : public AggregateFunctionTopK<DataTypeDate::FieldType>
|
||||
{
|
||||
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeDate>()); }
|
||||
};
|
||||
|
||||
class AggregateFunctionTopKDateTime : public AggregateFunctionTopK<DataTypeDateTime::FieldType>
|
||||
{
|
||||
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>()); }
|
||||
};
|
||||
|
||||
|
||||
static IAggregateFunction * createWithExtraTypes(const IDataType & argument_type)
|
||||
{
|
||||
if (typeid_cast<const DataTypeDate *>(&argument_type)) return new AggregateFunctionTopKDate;
|
||||
else if (typeid_cast<const DataTypeDateTime *>(&argument_type)) return new AggregateFunctionTopKDateTime;
|
||||
else
|
||||
{
|
||||
/// Check that we can use plain version of AggregateFunctionTopKGeneric
|
||||
if (typeid_cast<const DataTypeString*>(&argument_type) || typeid_cast<const DataTypeFixedString*>(&argument_type))
|
||||
return new AggregateFunctionTopKGeneric<true>;
|
||||
|
||||
auto * array_type = typeid_cast<const DataTypeArray *>(&argument_type);
|
||||
if (array_type)
|
||||
{
|
||||
auto nested_type = array_type->getNestedType();
|
||||
if (nested_type->isNumeric() || typeid_cast<DataTypeFixedString *>(nested_type.get()))
|
||||
return new AggregateFunctionTopKGeneric<true>;
|
||||
}
|
||||
|
||||
return new AggregateFunctionTopKGeneric<false>;
|
||||
}
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionTopK(const std::string & name, const DataTypes & argument_types)
|
||||
{
|
||||
if (argument_types.size() != 1)
|
||||
throw Exception("Incorrect number of arguments for aggregate function " + name,
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
AggregateFunctionPtr res(createWithNumericType<AggregateFunctionTopK>(*argument_types[0]));
|
||||
|
||||
if (!res)
|
||||
res = AggregateFunctionPtr(createWithExtraTypes(*argument_types[0]));
|
||||
|
||||
if (!res)
|
||||
throw Exception("Illegal type " + argument_types[0]->getName() +
|
||||
" of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionTopK(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("topK", createAggregateFunctionTopK);
|
||||
}
|
||||
|
||||
}
|
261
dbms/src/AggregateFunctions/AggregateFunctionTopK.h
Normal file
261
dbms/src/AggregateFunctions/AggregateFunctionTopK.h
Normal file
@ -0,0 +1,261 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
|
||||
#include <Columns/ColumnArray.h>
|
||||
|
||||
#include <Common/SpaceSaving.h>
|
||||
|
||||
#include <Core/FieldVisitors.h>
|
||||
|
||||
#include <AggregateFunctions/AggregateFunctionGroupArray.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
||||
// Allow NxK more space before calculating top K to increase accuracy
|
||||
#define TOP_K_DEFAULT 10
|
||||
#define TOP_K_LOAD_FACTOR 3
|
||||
#define TOP_K_MAX_SIZE 0xFFFFFF
|
||||
|
||||
|
||||
template <typename T>
|
||||
struct AggregateFunctionTopKData
|
||||
{
|
||||
using Set = SpaceSaving
|
||||
<
|
||||
T,
|
||||
T,
|
||||
HashCRC32<T>,
|
||||
HashTableGrower<4>,
|
||||
HashTableAllocatorWithStackMemory<sizeof(T) * (1 << 4)>
|
||||
>;
|
||||
Set value;
|
||||
};
|
||||
|
||||
|
||||
template <typename T>
|
||||
class AggregateFunctionTopK
|
||||
: public IUnaryAggregateFunction<AggregateFunctionTopKData<T>, AggregateFunctionTopK<T>>
|
||||
{
|
||||
private:
|
||||
using State = AggregateFunctionTopKData<T>;
|
||||
size_t threshold = TOP_K_DEFAULT;
|
||||
size_t reserved = TOP_K_LOAD_FACTOR * threshold;
|
||||
|
||||
public:
|
||||
String getName() const override { return "topK"; }
|
||||
|
||||
DataTypePtr getReturnType() const override
|
||||
{
|
||||
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeNumber<T>>());
|
||||
}
|
||||
|
||||
void setArgument(const DataTypePtr & argument)
|
||||
{
|
||||
}
|
||||
|
||||
void setParameters(const Array & params) override
|
||||
{
|
||||
if (params.size() != 1)
|
||||
throw Exception("Aggregate function " + getName() + " requires exactly one parameter.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
std::size_t k = applyVisitor(FieldVisitorConvertToNumber<size_t>(), params[0]);
|
||||
|
||||
if (k > TOP_K_MAX_SIZE)
|
||||
throw Exception("Too large parameter for aggregate function " + getName() + ". Maximum: " + toString(TOP_K_MAX_SIZE),
|
||||
ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
||||
|
||||
threshold = k;
|
||||
reserved = TOP_K_LOAD_FACTOR * k;
|
||||
}
|
||||
|
||||
void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num, Arena *) const
|
||||
{
|
||||
auto & set = this->data(place).value;
|
||||
if (set.capacity() != reserved)
|
||||
set.resize(reserved);
|
||||
set.insert(static_cast<const ColumnVector<T> &>(column).getData()[row_num]);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
this->data(place).value.merge(this->data(rhs).value);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
|
||||
{
|
||||
this->data(place).value.write(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
|
||||
{
|
||||
auto & set = this->data(place).value;
|
||||
set.resize(reserved);
|
||||
set.read(buf);
|
||||
}
|
||||
|
||||
void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
|
||||
{
|
||||
ColumnArray & arr_to = static_cast<ColumnArray &>(to);
|
||||
ColumnArray::Offsets_t & offsets_to = arr_to.getOffsets();
|
||||
|
||||
const typename State::Set & set = this->data(place).value;
|
||||
auto resultVec = set.topK(threshold);
|
||||
size_t size = resultVec.size();
|
||||
|
||||
offsets_to.push_back((offsets_to.size() == 0 ? 0 : offsets_to.back()) + size);
|
||||
|
||||
typename ColumnVector<T>::Container_t & data_to = static_cast<ColumnVector<T> &>(arr_to.getData()).getData();
|
||||
size_t old_size = data_to.size();
|
||||
data_to.resize(old_size + size);
|
||||
|
||||
size_t i = 0;
|
||||
for (auto it = resultVec.begin(); it != resultVec.end(); ++it, ++i)
|
||||
data_to[old_size + i] = it->key;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/// Generic implementation, it uses serialized representation as object descriptor.
|
||||
struct AggregateFunctionTopKGenericData
|
||||
{
|
||||
using Set = SpaceSaving
|
||||
<
|
||||
std::string,
|
||||
StringRef,
|
||||
StringRefHash,
|
||||
HashTableGrower<4>,
|
||||
HashTableAllocatorWithStackMemory<sizeof(StringRef) * (1 << 4)>
|
||||
>;
|
||||
|
||||
Set value;
|
||||
};
|
||||
|
||||
/** Template parameter with true value should be used for columns that store their elements in memory continuously.
|
||||
* For such columns topK() can be implemented more efficently (especially for small numeric arrays).
|
||||
*/
|
||||
template <bool is_plain_column = false>
|
||||
class AggregateFunctionTopKGeneric : public IUnaryAggregateFunction<AggregateFunctionTopKGenericData, AggregateFunctionTopKGeneric<is_plain_column>>
|
||||
{
|
||||
private:
|
||||
using State = AggregateFunctionTopKGenericData;
|
||||
DataTypePtr input_data_type;
|
||||
size_t threshold = TOP_K_DEFAULT;
|
||||
size_t reserved = TOP_K_LOAD_FACTOR * threshold;
|
||||
|
||||
static void deserializeAndInsert(StringRef str, IColumn & data_to);
|
||||
|
||||
public:
|
||||
String getName() const override { return "topK"; }
|
||||
|
||||
void setArgument(const DataTypePtr & argument)
|
||||
{
|
||||
input_data_type = argument;
|
||||
}
|
||||
|
||||
void setParameters(const Array & params) override
|
||||
{
|
||||
if (params.size() != 1)
|
||||
throw Exception("Aggregate function " + getName() + " requires exactly one parameter.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
size_t k = applyVisitor(FieldVisitorConvertToNumber<size_t>(), params[0]);
|
||||
|
||||
if (k > TOP_K_MAX_SIZE)
|
||||
throw Exception("Too large parameter for aggregate function " + getName() + ". Maximum: " + toString(TOP_K_MAX_SIZE),
|
||||
ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
||||
|
||||
threshold = k;
|
||||
reserved = TOP_K_LOAD_FACTOR * k;
|
||||
}
|
||||
|
||||
DataTypePtr getReturnType() const override
|
||||
{
|
||||
return std::make_shared<DataTypeArray>(input_data_type->clone());
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
|
||||
{
|
||||
this->data(place).value.write(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
|
||||
{
|
||||
auto & set = this->data(place).value;
|
||||
set.resize(reserved);
|
||||
|
||||
size_t count = 0;
|
||||
readVarUInt(count, buf);
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
std::string key_string;
|
||||
readStringBinary(key_string, buf);
|
||||
UInt64 count, error;
|
||||
readVarUInt(count, buf);
|
||||
readVarUInt(error, buf);
|
||||
set.insert(key_string, count, error);
|
||||
}
|
||||
}
|
||||
|
||||
void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num, Arena * arena) const
|
||||
{
|
||||
auto & set = this->data(place).value;
|
||||
if (set.capacity() != reserved) {
|
||||
set.resize(reserved);
|
||||
}
|
||||
|
||||
StringRef str_serialized = column.getDataAt(row_num);
|
||||
set.insert(str_serialized.toString());
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
this->data(place).value.merge(this->data(rhs).value);
|
||||
}
|
||||
|
||||
void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
|
||||
{
|
||||
ColumnArray & arr_to = static_cast<ColumnArray &>(to);
|
||||
ColumnArray::Offsets_t & offsets_to = arr_to.getOffsets();
|
||||
IColumn & data_to = arr_to.getData();
|
||||
|
||||
auto resultVec = this->data(place).value.topK(threshold);
|
||||
offsets_to.push_back((offsets_to.size() == 0 ? 0 : offsets_to.back()) + resultVec.size());
|
||||
|
||||
for (auto & elem : resultVec)
|
||||
{
|
||||
deserializeAndInsert(elem.key, data_to);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <>
|
||||
inline void AggregateFunctionTopKGeneric<false>::deserializeAndInsert(StringRef str, IColumn & data_to)
|
||||
{
|
||||
data_to.deserializeAndInsertFromArena(str.data);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline void AggregateFunctionTopKGeneric<true>::deserializeAndInsert(StringRef str, IColumn & data_to)
|
||||
{
|
||||
data_to.insertData(str.data, str.size);
|
||||
}
|
||||
|
||||
|
||||
#undef TOP_K_DEFAULT
|
||||
#undef TOP_K_MAX_SIZE
|
||||
#undef TOP_K_LOAD_FACTOR
|
||||
|
||||
}
|
@ -86,12 +86,11 @@ struct AggregateFunctionUniqExactData
|
||||
using Key = T;
|
||||
|
||||
/// When creating, the hash table must be small.
|
||||
typedef HashSet<
|
||||
using Set = HashSet<
|
||||
Key,
|
||||
HashCRC32<Key>,
|
||||
HashTableGrower<4>,
|
||||
HashTableAllocatorWithStackMemory<sizeof(Key) * (1 << 4)>
|
||||
> Set;
|
||||
HashTableAllocatorWithStackMemory<sizeof(Key) * (1 << 4)>>;
|
||||
|
||||
Set set;
|
||||
|
||||
@ -105,12 +104,11 @@ struct AggregateFunctionUniqExactData<String>
|
||||
using Key = UInt128;
|
||||
|
||||
/// When creating, the hash table must be small.
|
||||
typedef HashSet<
|
||||
using Set = HashSet<
|
||||
Key,
|
||||
UInt128TrivialHash,
|
||||
HashTableGrower<3>,
|
||||
HashTableAllocatorWithStackMemory<sizeof(Key) * (1 << 3)>
|
||||
> Set;
|
||||
HashTableAllocatorWithStackMemory<sizeof(Key) * (1 << 3)>>;
|
||||
|
||||
Set set;
|
||||
|
||||
|
27
dbms/src/AggregateFunctions/CMakeLists.txt
Normal file
27
dbms/src/AggregateFunctions/CMakeLists.txt
Normal file
@ -0,0 +1,27 @@
|
||||
include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake)
|
||||
add_headers_and_sources(clickhouse_aggregate_functions .)
|
||||
|
||||
list(REMOVE_ITEM clickhouse_aggregate_functions_sources
|
||||
AggregateFunctionFactory.cpp
|
||||
AggregateFunctionState.cpp
|
||||
AggregateFunctionArray.cpp
|
||||
AggregateFunctionNull.cpp
|
||||
AggregateFunctionForEach.cpp
|
||||
AggregateFunctionIf.cpp
|
||||
AggregateFunctionMerge.cpp
|
||||
AggregateFunctionCount.cpp
|
||||
)
|
||||
|
||||
list(REMOVE_ITEM clickhouse_aggregate_functions_headers
|
||||
AggregateFunction.h
|
||||
AggregateFunctionFactory.h
|
||||
AggregateFunctionState.h
|
||||
AggregateFunctionArray.h
|
||||
AggregateFunctionNull.h
|
||||
AggregateFunctionForEach.h
|
||||
AggregateFunctionIf.h
|
||||
AggregateFunctionMerge.h
|
||||
AggregateFunctionCount.h
|
||||
)
|
||||
|
||||
add_library(clickhouse_aggregate_functions ${clickhouse_aggregate_functions_sources})
|
@ -19,8 +19,7 @@ public:
|
||||
if (arguments.size() != 2)
|
||||
throw Exception{
|
||||
"Passed " + toString(arguments.size()) + " arguments to binary aggregate function " + this->getName(),
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH
|
||||
};
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
|
||||
|
||||
getDerived().setArgumentsImpl(arguments);
|
||||
}
|
||||
|
54
dbms/src/AggregateFunctions/registerAggregateFunctions.cpp
Normal file
54
dbms/src/AggregateFunctions/registerAggregateFunctions.cpp
Normal file
@ -0,0 +1,54 @@
|
||||
#include <AggregateFunctions/registerAggregateFunctions.h>
|
||||
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void registerAggregateFunctionAvg(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionCount(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionGroupArray(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionGroupUniqArray(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionGroupArrayInsertAt(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionsQuantile(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionsQuantileExact(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionsQuantileExactWeighted(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionsQuantileDeterministic(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionsQuantileTiming(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionsQuantileTDigest(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionsSequenceMatch(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionsMinMaxAny(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionsStatistics(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionSum(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionsUniq(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionUniqUpTo(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionTopK(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionDebug(AggregateFunctionFactory & factory);
|
||||
|
||||
|
||||
void registerAggregateFunctions()
|
||||
{
|
||||
auto & factory = AggregateFunctionFactory::instance();
|
||||
|
||||
registerAggregateFunctionAvg(factory);
|
||||
registerAggregateFunctionCount(factory);
|
||||
registerAggregateFunctionGroupArray(factory);
|
||||
registerAggregateFunctionGroupUniqArray(factory);
|
||||
registerAggregateFunctionGroupArrayInsertAt(factory);
|
||||
registerAggregateFunctionsQuantile(factory);
|
||||
registerAggregateFunctionsQuantileExact(factory);
|
||||
registerAggregateFunctionsQuantileExactWeighted(factory);
|
||||
registerAggregateFunctionsQuantileDeterministic(factory);
|
||||
registerAggregateFunctionsQuantileTiming(factory);
|
||||
registerAggregateFunctionsQuantileTDigest(factory);
|
||||
registerAggregateFunctionsSequenceMatch(factory);
|
||||
registerAggregateFunctionsMinMaxAny(factory);
|
||||
registerAggregateFunctionsStatistics(factory);
|
||||
registerAggregateFunctionSum(factory);
|
||||
registerAggregateFunctionsUniq(factory);
|
||||
registerAggregateFunctionUniqUpTo(factory);
|
||||
registerAggregateFunctionTopK(factory);
|
||||
registerAggregateFunctionDebug(factory);
|
||||
}
|
||||
|
||||
}
|
8
dbms/src/AggregateFunctions/registerAggregateFunctions.h
Normal file
8
dbms/src/AggregateFunctions/registerAggregateFunctions.h
Normal file
@ -0,0 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void registerAggregateFunctions();
|
||||
|
||||
}
|
@ -165,7 +165,7 @@ void processFunction(const String & column_name, ASTPtr & ast, TypeAndConstantIn
|
||||
}
|
||||
|
||||
/// Aggregate function.
|
||||
if (AggregateFunctionPtr aggregate_function_ptr = context.getAggregateFunctionFactory().tryGet(function->name, argument_types))
|
||||
if (AggregateFunctionPtr aggregate_function_ptr = AggregateFunctionFactory::instance().tryGet(function->name, argument_types))
|
||||
{
|
||||
/// NOTE Not considering aggregate function parameters in type inference. It could become needed in future.
|
||||
/// Note that aggregate function could never be constant expression.
|
||||
|
@ -2,22 +2,22 @@ add_executable(collect_aliases collect_aliases.cpp)
|
||||
target_link_libraries(collect_aliases dbms)
|
||||
|
||||
add_executable(collect_tables collect_tables.cpp)
|
||||
target_link_libraries(collect_tables dbms storages_system)
|
||||
target_link_libraries(collect_tables dbms clickhouse_storages_system)
|
||||
|
||||
add_executable(analyze_columns analyze_columns.cpp)
|
||||
target_link_libraries(analyze_columns dbms storages_system)
|
||||
target_link_libraries(analyze_columns dbms clickhouse_storages_system)
|
||||
|
||||
add_executable(type_and_constant_inference type_and_constant_inference.cpp)
|
||||
target_link_libraries(type_and_constant_inference storages_system clickhouse_functions dbms)
|
||||
target_link_libraries(type_and_constant_inference clickhouse_storages_system clickhouse_functions dbms)
|
||||
|
||||
add_executable(analyze_result_of_query analyze_result_of_query.cpp)
|
||||
target_link_libraries(analyze_result_of_query dbms storages_system)
|
||||
target_link_libraries(analyze_result_of_query dbms clickhouse_storages_system)
|
||||
|
||||
add_executable(translate_positional_arguments translate_positional_arguments.cpp)
|
||||
target_link_libraries(translate_positional_arguments dbms)
|
||||
|
||||
add_executable(optimize_group_order_limit_by optimize_group_order_limit_by.cpp)
|
||||
target_link_libraries(optimize_group_order_limit_by dbms storages_system)
|
||||
target_link_libraries(optimize_group_order_limit_by dbms clickhouse_storages_system)
|
||||
|
||||
add_executable(analyze_lambdas analyze_lambdas.cpp)
|
||||
target_link_libraries(analyze_lambdas dbms)
|
||||
|
@ -1,5 +1,5 @@
|
||||
add_library (clickhouse-client Client.cpp)
|
||||
target_link_libraries (clickhouse-client dbms ${LINE_EDITING_LIBS} ${Boost_PROGRAM_OPTIONS_LIBRARY})
|
||||
target_link_libraries (clickhouse-client dbms clickhouse_aggregate_functions ${LINE_EDITING_LIBS} ${Boost_PROGRAM_OPTIONS_LIBRARY})
|
||||
install (FILES config.xml DESTINATION ${CLICKHOUSE_ETC_DIR}/clickhouse-client COMPONENT clickhouse-client)
|
||||
|
||||
add_library (clickhouse-benchmark Benchmark.cpp)
|
||||
|
@ -46,6 +46,7 @@
|
||||
#include <Common/NetException.h>
|
||||
#include <common/readline_use.h>
|
||||
#include <Functions/registerFunctions.h>
|
||||
#include <AggregateFunctions/registerAggregateFunctions.h>
|
||||
|
||||
|
||||
/// http://en.wikipedia.org/wiki/ANSI_escape_code
|
||||
@ -191,6 +192,7 @@ private:
|
||||
#undef EXTRACT_LIMIT
|
||||
|
||||
registerFunctions();
|
||||
registerAggregateFunctions();
|
||||
}
|
||||
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
#include <unistd.h>
|
||||
|
||||
|
||||
/** Небольшие обёртки для асинхронного ввода-вывода.
|
||||
/** Small wrappers for asynchronous I/O.
|
||||
*/
|
||||
|
||||
|
||||
|
@ -22,15 +22,15 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
|
||||
/** Многие современные аллокаторы (например, tcmalloc) не умеют делать mremap для realloc,
|
||||
* даже в случае достаточно больших кусков памяти.
|
||||
* Хотя это позволяет увеличить производительность и уменьшить потребление памяти во время realloc-а.
|
||||
* Чтобы это исправить, делаем mremap самостоятельно, если кусок памяти достаточно большой.
|
||||
* Порог (64 МБ) выбран достаточно большим, так как изменение адресного пространства
|
||||
* довольно сильно тормозит, особенно в случае наличия большого количества потоков.
|
||||
* Рассчитываем, что набор операций mmap/что-то сделать/mremap может выполняться всего лишь около 1000 раз в секунду.
|
||||
/** Many modern allocators (for example, tcmalloc) do not do a mremap for realloc,
|
||||
* even in case of large enough chunks of memory.
|
||||
* Although this allows you to increase performance and reduce memory consumption during realloc.
|
||||
* To fix this, we do mremap manually if the chunk of memory is large enough.
|
||||
* The threshold (64 MB) is chosen quite large, since changing the address space is
|
||||
* very slow, especially in the case of a large number of threads.
|
||||
* We expect that the set of operations mmap/something to do/mremap can only be performed about 1000 times per second.
|
||||
*
|
||||
* PS. Также это требуется, потому что tcmalloc не может выделить кусок памяти больше 16 GB.
|
||||
* PS. This is also required, because tcmalloc can not allocate a chunk of memory greater than 16 GB.
|
||||
*/
|
||||
static constexpr size_t MMAP_THRESHOLD = 64 * (1 << 20);
|
||||
static constexpr size_t MMAP_MIN_ALIGNMENT = 4096;
|
||||
|
@ -3,13 +3,13 @@
|
||||
#include <string.h>
|
||||
|
||||
|
||||
/** Отвечает за выделение/освобождение памяти. Используется, например, в PODArray, Arena.
|
||||
* Также используется в хэш-таблицах.
|
||||
* Интерфейс отличается от std::allocator
|
||||
* - наличием метода realloc, который для больших кусков памяти использует mremap;
|
||||
* - передачей размера в метод free;
|
||||
* - наличием аргумента alignment;
|
||||
* - возможностью зануления памяти (используется в хэш-таблицах);
|
||||
/** Responsible for allocating / freeing memory. Used, for example, in PODArray, Arena.
|
||||
* Also used in hash tables.
|
||||
* The interface is different from std::allocator
|
||||
* - the presence of the method realloc, which for large chunks of memory uses mremap;
|
||||
* - passing the size into the `free` method;
|
||||
* - by the presence of the `alignment` argument;
|
||||
* - the possibility of zeroing memory (used in hash tables);
|
||||
*/
|
||||
template <bool clear_memory_>
|
||||
class Allocator
|
||||
@ -38,9 +38,9 @@ protected:
|
||||
};
|
||||
|
||||
|
||||
/** При использовании AllocatorWithStackMemory, размещённом на стеке,
|
||||
* GCC 4.9 ошибочно делает предположение, что мы можем вызывать free от указателя на стек.
|
||||
* На самом деле, комбинация условий внутри AllocatorWithStackMemory этого не допускает.
|
||||
/** When using AllocatorWithStackMemory, located on the stack,
|
||||
* GCC 4.9 mistakenly assumes that we can call `free` from a pointer to the stack.
|
||||
* In fact, the combination of conditions inside AllocatorWithStackMemory does not allow this.
|
||||
*/
|
||||
#if !__clang__
|
||||
#pragma GCC diagnostic push
|
||||
|
@ -8,40 +8,40 @@ namespace DB
|
||||
{
|
||||
|
||||
|
||||
/** В отличие от Arena, позволяет освобождать (для последующего повторного использования)
|
||||
* выделенные ранее (не обязательно только что) куски памяти.
|
||||
* Для этого, запрашиваемый размер округляется вверх до степени двух
|
||||
* (или до 8, если меньше; или используется выделение памяти вне Arena, если размер больше 65536).
|
||||
* При освобождении памяти, для каждого размера (всего 14 вариантов: 8, 16... 65536),
|
||||
* поддерживается односвязный список свободных блоков.
|
||||
* При аллокации, мы берём голову списка свободных блоков,
|
||||
* либо, если список пуст - выделяем новый блок, используя Arena.
|
||||
/** Unlike Arena, allows you to release (for later re-use)
|
||||
* previously allocated (not necessarily just recently) chunks of memory.
|
||||
* For this, the requested size is rounded up to the power of two
|
||||
* (or up to 8, if less, or using memory allocation outside Arena if the size is greater than 65536).
|
||||
* When freeing memory, for each size (14 options in all: 8, 16 ... 65536),
|
||||
* a single-linked list of free blocks is kept track.
|
||||
* When allocating, we take the head of the list of free blocks,
|
||||
* or, if the list is empty - allocate a new block using Arena.
|
||||
*/
|
||||
class ArenaWithFreeLists : private Allocator<false>, private boost::noncopyable
|
||||
{
|
||||
private:
|
||||
/// Если блок свободен, то в его начале хранится указатель на следующий свободный блок, либо nullptr, если свободных блоков больше нет.
|
||||
/// Если блок используется, то в нём хранятся какие-то данные.
|
||||
/// If the block is free, then the pointer to the next free block is stored at its beginning, or nullptr, if there are no more free blocks.
|
||||
/// If the block is used, then some data is stored in it.
|
||||
union Block
|
||||
{
|
||||
Block * next;
|
||||
char data[0];
|
||||
};
|
||||
|
||||
/// Максимальный размер куска памяти, который выделяется с помощью Arena. Иначе используем Allocator напрямую.
|
||||
/// The maximum size of a piece of memory that is allocated with Arena. Otherwise, we use Allocator directly.
|
||||
static constexpr size_t max_fixed_block_size = 65536;
|
||||
|
||||
/// Получить индекс в массиве freelist-ов для заданного размера.
|
||||
/// Get the index in the freelist array for the specified size.
|
||||
static size_t findFreeListIndex(const size_t size)
|
||||
{
|
||||
return size <= 8 ? 2 : bitScanReverse(size - 1);
|
||||
}
|
||||
|
||||
/// Для выделения блоков не слишком большого размера используется Arena.
|
||||
/// Arena is used to allocate blocks that are not too large.
|
||||
Arena pool;
|
||||
|
||||
/// Списки свободных блоков. Каждый элемент указывает на голову соответствующего списка, либо равен nullptr.
|
||||
/// Первые два элемента не используются, а предназначены для упрощения арифметики.
|
||||
/// Lists of free blocks. Each element points to the head of the corresponding list, or is nullptr.
|
||||
/// The first two elements are not used, but are intended to simplify arithmetic.
|
||||
Block * free_lists[16] {};
|
||||
|
||||
public:
|
||||
@ -60,10 +60,10 @@ public:
|
||||
/// find list of required size
|
||||
const auto list_idx = findFreeListIndex(size);
|
||||
|
||||
/// Если есть свободный блок.
|
||||
/// If there is a free block.
|
||||
if (auto & free_block_ptr = free_lists[list_idx])
|
||||
{
|
||||
/// Возьмём его. И поменяем голову списка на следующий элемент списка.
|
||||
/// Let's take it. And change the head of the list to the next item in the list.
|
||||
const auto res = free_block_ptr->data;
|
||||
free_block_ptr = free_block_ptr->next;
|
||||
return res;
|
||||
@ -81,14 +81,14 @@ public:
|
||||
/// find list of required size
|
||||
const auto list_idx = findFreeListIndex(size);
|
||||
|
||||
/// Вставим освобождённый блок в голову списка.
|
||||
/// Insert the released block into the head of the list.
|
||||
auto & free_block_ptr = free_lists[list_idx];
|
||||
const auto old_head = free_block_ptr;
|
||||
free_block_ptr = reinterpret_cast<Block *>(ptr);
|
||||
free_block_ptr->next = old_head;
|
||||
}
|
||||
|
||||
/// Размер выделенного пула в байтах
|
||||
/// Size of the allocated pool in bytes
|
||||
size_t size() const
|
||||
{
|
||||
return pool.size();
|
||||
|
@ -8,30 +8,30 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** Массив (почти) неизменяемого размера:
|
||||
* размер задаётся в конструкторе;
|
||||
* метод resize приводит к удалению старых данных и нужен лишь для того,
|
||||
* чтобы можно было сначала создать пустой объект, используя конструктор по-умолчанию,
|
||||
* а потом уже определиться с размером.
|
||||
/** An array of (almost) unchangable size:
|
||||
* the size is specified in the constructor;
|
||||
* `resize` method removes old data, and necessary only for
|
||||
* so that you can first create an empty object using the default constructor,
|
||||
* and then decide on the size.
|
||||
*
|
||||
* Есть возможность не инициализировать элементы по-умолчанию, а создавать их inplace.
|
||||
* Деструкторы элементов вызываются автоматически.
|
||||
* There is a possibility to not initialize elements by default, but create them inplace.
|
||||
* Member destructors are called automatically.
|
||||
*
|
||||
* sizeof равен размеру одного указателя.
|
||||
* `sizeof` is equal to the size of one pointer.
|
||||
*
|
||||
* Не exception-safe.
|
||||
* Копирование не поддерживается. Перемещение опустошает исходный объект.
|
||||
* То есть, использовать этот массив во многих случаях неудобно.
|
||||
* Not exception-safe.
|
||||
* Copying is not supported. Moving empties the original object.
|
||||
* That is, it is inconvenient to use this array in many cases.
|
||||
*
|
||||
* Предназначен для ситуаций, в которых создаётся много массивов одинакового небольшого размера,
|
||||
* но при этом размер не известен во время компиляции.
|
||||
* Также даёт существенное преимущество в случаях, когда важно, чтобы sizeof был минимальным.
|
||||
* Например, если массивы кладутся в open-addressing хэш-таблицу с inplace хранением значений (как HashMap)
|
||||
* Designed for situations in which many arrays of the same small size are created,
|
||||
* but the size is not known at compile time.
|
||||
* Also gives a significant advantage in cases where it is important that `sizeof` is minimal.
|
||||
* For example, if arrays are put in an open-addressing hash table with inplace storage of values (like HashMap)
|
||||
*
|
||||
* В этом случае, по сравнению с std::vector:
|
||||
* - для массивов размером в 1 элемент - преимущество примерно в 2 раза;
|
||||
* - для массивов размером в 5 элементов - преимущество примерно в 1.5 раза
|
||||
* (в качестве T использовались DB::Field, содержащие UInt64 и String);
|
||||
* In this case, compared to std::vector:
|
||||
* - for arrays of 1 element size - an advantage of about 2 times;
|
||||
* - for arrays of 5 elements - an advantage of about 1.5 times
|
||||
* (DB::Field, containing UInt64 and String, used as T);
|
||||
*/
|
||||
|
||||
const size_t empty_auto_array_helper = 0;
|
||||
@ -42,7 +42,7 @@ template <typename T>
|
||||
class AutoArray
|
||||
{
|
||||
public:
|
||||
/// Для отложенного создания.
|
||||
/// For deferred creation.
|
||||
AutoArray()
|
||||
{
|
||||
setEmpty();
|
||||
@ -53,16 +53,16 @@ public:
|
||||
init(size_, false);
|
||||
}
|
||||
|
||||
/** Не будут вызваны конструкторы по-умолчанию для элементов.
|
||||
* В этом случае, вы должны вставить все элементы с помощью функции place и placement new,
|
||||
* так как для них потом будут вызваны деструкторы.
|
||||
/** The default constructors for elements will not be called.
|
||||
* In this case, you must insert all elements using the `place` and `placement new` functions,
|
||||
* since destructors are then called for them.
|
||||
*/
|
||||
AutoArray(size_t size_, const DontInitElemsTag & tag)
|
||||
{
|
||||
init(size_, true);
|
||||
}
|
||||
|
||||
/** Инициализирует все элементы копирующим конструктором с параметром value.
|
||||
/** Initializes all elements with a copy constructor with the `value` parameter.
|
||||
*/
|
||||
AutoArray(size_t size_, const T & value)
|
||||
{
|
||||
@ -74,7 +74,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
/** resize удаляет все существующие элементы.
|
||||
/** `resize` removes all existing items.
|
||||
*/
|
||||
void resize(size_t size_, bool dont_init_elems = false)
|
||||
{
|
||||
@ -82,7 +82,7 @@ public:
|
||||
init(size_, dont_init_elems);
|
||||
}
|
||||
|
||||
/** Премещение.
|
||||
/** Move operations.
|
||||
*/
|
||||
AutoArray(AutoArray && src)
|
||||
{
|
||||
@ -125,10 +125,10 @@ public:
|
||||
setEmpty();
|
||||
}
|
||||
|
||||
/** Можно читать и модифицировать элементы с помощью оператора []
|
||||
* только если элементы были инициализированы
|
||||
* (то есть, в конструктор не был передан DontInitElemsTag,
|
||||
* или вы их инициализировали с помощью place и placement new).
|
||||
/** You can read and modify elements using the [] operator
|
||||
* only if items were initialized
|
||||
* (that is, into the constructor was not passed DontInitElemsTag,
|
||||
* or you initialized them using `place` and `placement new`).
|
||||
*/
|
||||
T & operator[](size_t i)
|
||||
{
|
||||
@ -140,9 +140,9 @@ public:
|
||||
return elem(i);
|
||||
}
|
||||
|
||||
/** Получить кусок памяти, в котором должен быть расположен элемент.
|
||||
* Функция предназначена, чтобы инициализировать элемент,
|
||||
* который ещё не был инициализирован:
|
||||
/** Get the piece of memory in which the element should be located.
|
||||
* The function is intended to initialize an element,
|
||||
* which has not yet been initialized
|
||||
* new (arr.place(i)) T(args);
|
||||
*/
|
||||
char * place(size_t i)
|
||||
|
@ -23,9 +23,9 @@ static inline ContainerType max(const ContainerType & lhs, const ContainerType &
|
||||
|
||||
}
|
||||
|
||||
/** Для маленького количества ключей - массив фиксированного размера "на стеке".
|
||||
* Для среднего - выделяется HashSet.
|
||||
* Для большого - выделяется HyperLogLog.
|
||||
/** For a small number of keys - an array of fixed size "on the stack".
|
||||
* For the average, HashSet is allocated.
|
||||
* For large, HyperLogLog is allocated.
|
||||
*/
|
||||
template
|
||||
<
|
||||
@ -146,7 +146,7 @@ public:
|
||||
getContainer<Large>().merge(rhs.getContainer<Large>());
|
||||
}
|
||||
|
||||
/// Можно вызывать только для пустого объекта.
|
||||
/// You can only call for an empty object.
|
||||
void read(DB::ReadBuffer & in)
|
||||
{
|
||||
UInt8 v;
|
||||
@ -171,8 +171,8 @@ public:
|
||||
{
|
||||
auto container_type = getContainerType();
|
||||
|
||||
/// Если readAndMerge вызывается с пустым состоянием, просто десериализуем
|
||||
/// состояние задано в качестве параметра.
|
||||
/// If readAndMerge is called with an empty state, just deserialize
|
||||
/// the state is specified as a parameter.
|
||||
if ((container_type == details::ContainerType::SMALL) && small.empty())
|
||||
{
|
||||
read(in);
|
||||
|
@ -15,11 +15,11 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
|
||||
/** Компактный массив для хранения данных, размер content_width, в битах, которых составляет
|
||||
* меньше одного байта. Вместо того, чтобы хранить каждое значение в отдельный
|
||||
* байт, что приводит к растрате 37.5% пространства для content_width=5, CompactArray хранит
|
||||
* смежные content_width-битные значения в массиве байтов, т.е. фактически CompactArray
|
||||
* симулирует массив content_width-битных значений.
|
||||
/** Compact array for data storage, size `content_width`, in bits, of which is
|
||||
* less than one byte. Instead of storing each value in a separate
|
||||
* bytes, which leads to a waste of 37.5% of the space for content_width = 5, CompactArray stores
|
||||
* adjacent `content_width`-bit values in the byte array, that is actually CompactArray
|
||||
* simulates an array of `content_width`-bit values.
|
||||
*/
|
||||
template <typename BucketIndex, UInt8 content_width, size_t bucket_count>
|
||||
class __attribute__ ((packed)) CompactArray final
|
||||
@ -76,12 +76,12 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
/// число байт в битсете
|
||||
/// number of bytes in bitset
|
||||
static constexpr size_t BITSET_SIZE = (static_cast<size_t>(bucket_count) * content_width + 7) / 8;
|
||||
UInt8 bitset[BITSET_SIZE] = { 0 };
|
||||
};
|
||||
|
||||
/** Класс для последовательного чтения ячеек из компактного массива на диске.
|
||||
/** A class for sequentially reading cells from a compact array on a disk.
|
||||
*/
|
||||
template <typename BucketIndex, UInt8 content_width, size_t bucket_count>
|
||||
class CompactArray<BucketIndex, content_width, bucket_count>::Reader final
|
||||
@ -135,7 +135,7 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Вернуть текущий номер ячейки и соответствующее содержание.
|
||||
/** Return the current cell number and the corresponding content.
|
||||
*/
|
||||
inline std::pair<BucketIndex, UInt8> get() const
|
||||
{
|
||||
@ -150,26 +150,26 @@ public:
|
||||
|
||||
private:
|
||||
ReadBuffer & in;
|
||||
/// Физическое расположение текущей ячейки.
|
||||
/// The physical location of the current cell.
|
||||
Locus locus;
|
||||
/// Текущая позиция в файле в виде номера ячейки.
|
||||
/// The current position in the file as a cell number.
|
||||
BucketIndex current_bucket_index = 0;
|
||||
/// Количество прочитанных байтов.
|
||||
/// The number of bytes read.
|
||||
size_t read_count = 0;
|
||||
/// Содержание в текущей позиции.
|
||||
/// The content in the current position.
|
||||
UInt8 value_l;
|
||||
UInt8 value_r;
|
||||
///
|
||||
bool is_eof = false;
|
||||
/// Влезает ли ячейка полностью в один байт?
|
||||
/// Does the cell fully fit into one byte?
|
||||
bool fits_in_byte;
|
||||
};
|
||||
|
||||
/** Структура Locus содержит необходимую информацию, чтобы найти для каждой ячейки
|
||||
* соответствующие байт и смещение, в битах, от начала ячейки. Поскольку в общем
|
||||
* случае размер одного байта не делится на размер одной ячейки, возможны случаи,
|
||||
* когда одна ячейка перекрывает два байта. Поэтому структура Locus содержит две
|
||||
* пары (индекс, смещение).
|
||||
/** The `Locus` structure contains the necessary information to find for each cell
|
||||
* the corresponding byte and offset, in bits, from the beginning of the cell. Since in general
|
||||
* case the size of one byte is not divisible by the size of one cell, cases possible
|
||||
* when one cell overlaps two bytes. Therefore, the `Locus` structure contains two
|
||||
* pairs (index, offset).
|
||||
*/
|
||||
template <typename BucketIndex, UInt8 content_width, size_t bucket_count>
|
||||
class CompactArray<BucketIndex, content_width, bucket_count>::Locus final
|
||||
@ -190,13 +190,13 @@ public:
|
||||
{
|
||||
if ((index_l == index_r) || (index_l == (BITSET_SIZE - 1)))
|
||||
{
|
||||
/// Ячейка полностью влезает в один байт.
|
||||
/// The cell completely fits into one byte.
|
||||
*content_l &= ~(((1 << content_width) - 1) << offset_l);
|
||||
*content_l |= content << offset_l;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Ячейка перекрывает два байта.
|
||||
/// The cell overlaps two bytes.
|
||||
size_t left = 8 - offset_l;
|
||||
|
||||
*content_l &= ~(((1 << left) - 1) << offset_l);
|
||||
@ -230,13 +230,13 @@ private:
|
||||
|
||||
UInt8 ALWAYS_INLINE read(UInt8 value_l) const
|
||||
{
|
||||
/// Ячейка полностью влезает в один байт.
|
||||
/// The cell completely fits into one byte.
|
||||
return (value_l >> offset_l) & ((1 << content_width) - 1);
|
||||
}
|
||||
|
||||
UInt8 ALWAYS_INLINE read(UInt8 value_l, UInt8 value_r) const
|
||||
{
|
||||
/// Ячейка перекрывает два байта.
|
||||
/// The cell overlaps two bytes.
|
||||
return ((value_l >> offset_l) & ((1 << (8 - offset_l)) - 1))
|
||||
| ((value_r & ((1 << offset_r) - 1)) << (8 - offset_l));
|
||||
}
|
||||
@ -250,7 +250,7 @@ private:
|
||||
UInt8 * content_l;
|
||||
UInt8 * content_r;
|
||||
|
||||
/// Проверки
|
||||
/// Checks
|
||||
static_assert((content_width > 0) && (content_width < 8), "Invalid parameter value");
|
||||
static_assert(bucket_count <= (std::numeric_limits<size_t>::max() / content_width), "Invalid parameter value");
|
||||
};
|
||||
|
@ -38,9 +38,9 @@ namespace detail
|
||||
}
|
||||
};
|
||||
|
||||
/** Очень простая thread-safe очередь ограниченной длины.
|
||||
* Если пытаться вынуть элемент из пустой очереди, то поток блокируется, пока очередь не станет непустой.
|
||||
* Если пытаться вставить элемент в переполненную очередь, то поток блокируется, пока в очереди не появится элемент.
|
||||
/** A very simple thread-safe queue of limited length.
|
||||
* If you try to pop an item from an empty queue, the thread is blocked until the queue becomes nonempty.
|
||||
* If you try to push an element into an overflowed queue, the thread is blocked until space appears in the queue.
|
||||
*/
|
||||
template <typename T>
|
||||
class ConcurrentBoundedQueue
|
||||
|
@ -22,24 +22,24 @@
|
||||
#define SMALL_READ_WRITE_BUFFER_SIZE 16
|
||||
|
||||
|
||||
/** Хранит в файле число.
|
||||
* Предназначен для редких вызовов (не рассчитан на производительность).
|
||||
/** Stores a number in the file.
|
||||
* Designed for rare calls (not designed for performance).
|
||||
*/
|
||||
class CounterInFile
|
||||
{
|
||||
public:
|
||||
/// path - имя файла, включая путь
|
||||
/// path - the name of the file, including the path
|
||||
CounterInFile(const std::string & path_) : path(path_) {}
|
||||
|
||||
/** Добавить delta к числу в файле и вернуть новое значение.
|
||||
* Если параметр create_if_need не установлен в true, то
|
||||
* в файле уже должно быть записано какое-нибудь число (если нет - создайте файл вручную с нулём).
|
||||
/** Add `delta` to the number in the file and return the new value.
|
||||
* If the `create_if_need` parameter is not set to true, then
|
||||
* the file should already have a number written (if not - create the file manually with zero).
|
||||
*
|
||||
* Для защиты от race condition-ов между разными процессами, используются файловые блокировки.
|
||||
* (Но при первом создании файла race condition возможен, так что лучше создать файл заранее.)
|
||||
* To protect against race conditions between different processes, file locks are used.
|
||||
* (But when the first file is created, the race condition is possible, so it's better to create the file in advance.)
|
||||
*
|
||||
* locked_callback вызывается при заблокированном файле со счетчиком. В него передается новое значение.
|
||||
* locked_callback можно использовать, чтобы делать что-нибудь атомарно с увеличением счетчика (например, переименовывать файлы).
|
||||
* `locked_callback` is called when the counter file is locked. A new value is passed to it.
|
||||
* `locked_callback` can be used to do something atomically with incrementing the counter (for example, renaming files).
|
||||
*/
|
||||
template <typename Callback>
|
||||
Int64 add(Int64 delta, Callback && locked_callback, bool create_if_need = false)
|
||||
@ -74,7 +74,7 @@ public:
|
||||
}
|
||||
catch (const DB::Exception & e)
|
||||
{
|
||||
/// Более понятное сообщение об ошибке.
|
||||
/// A more understandable error message.
|
||||
if (e.code() == DB::ErrorCodes::CANNOT_READ_ALL_DATA || e.code() == DB::ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF)
|
||||
throw DB::Exception("File " + path + " is empty. You must fill it manually with appropriate value.", e.code());
|
||||
else
|
||||
@ -118,13 +118,13 @@ public:
|
||||
return path;
|
||||
}
|
||||
|
||||
/// Изменить путь к файлу.
|
||||
/// Change the path to the file.
|
||||
void setPath(std::string path_)
|
||||
{
|
||||
path = path_;
|
||||
}
|
||||
|
||||
// Не thread-safe и не синхронизирован между процессами.
|
||||
// Not thread-safe and not synchronized between processes.
|
||||
void fixIfBroken(UInt64 value)
|
||||
{
|
||||
bool file_exists = Poco::File(path).exists();
|
||||
|
@ -35,7 +35,7 @@ public:
|
||||
DB::Exception * clone() const override { return new DB::Exception(*this); }
|
||||
void rethrow() const override { throw *this; }
|
||||
|
||||
/// Дописать к существующему сообщению что-нибудь ещё.
|
||||
/// Add something to the existing message.
|
||||
void addMessage(const std::string & arg) { extendedMessage(arg); }
|
||||
|
||||
const StackTrace & getStackTrace() const { return trace; }
|
||||
@ -45,7 +45,7 @@ private:
|
||||
};
|
||||
|
||||
|
||||
/// Содержит дополнительный член saved_errno. См. функцию throwFromErrno.
|
||||
/// Contains an additional member `saved_errno`. See the throwFromErrno function.
|
||||
class ErrnoException : public Exception
|
||||
{
|
||||
public:
|
||||
@ -73,8 +73,8 @@ using Exceptions = std::vector<std::exception_ptr>;
|
||||
void throwFromErrno(const std::string & s, int code = 0, int the_errno = errno);
|
||||
|
||||
|
||||
/** Попробовать записать исключение в лог (и забыть про него).
|
||||
* Можно использовать в деструкторах в блоке catch (...).
|
||||
/** Try to write an exception to the log (and forget about it).
|
||||
* Can be used in destructors in the catch-all block.
|
||||
*/
|
||||
void tryLogCurrentException(const char * log_name, const std::string & start_of_message = "");
|
||||
void tryLogCurrentException(Poco::Logger * logger, const std::string & start_of_message = "");
|
||||
|
@ -25,16 +25,16 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
|
||||
/// Базовый класс содержащий основную информацию о внешней таблице и
|
||||
/// основные функции для извлечения этой информации из текстовых полей.
|
||||
/// The base class containing the basic information about external table and
|
||||
/// basic functions for extracting this information from text fields.
|
||||
class BaseExternalTable
|
||||
{
|
||||
public:
|
||||
std::string file; /// Файл с данными или '-' если stdin
|
||||
std::string name; /// Имя таблицы
|
||||
std::string format; /// Название формата хранения данных
|
||||
std::string file; /// File with data or '-' if stdin
|
||||
std::string name; /// The name of the table
|
||||
std::string format; /// Name of the data storage format
|
||||
|
||||
/// Описание структуры таблицы: (имя столбца, имя типа данных)
|
||||
/// Description of the table structure: (column name, data type name)
|
||||
std::vector<std::pair<std::string, std::string> > structure;
|
||||
|
||||
std::unique_ptr<ReadBuffer> read_buffer;
|
||||
@ -42,10 +42,10 @@ public:
|
||||
|
||||
virtual ~BaseExternalTable() {};
|
||||
|
||||
/// Инициализировать read_buffer в зависимости от источника данных. По умолчанию не делает ничего.
|
||||
/// Initialize read_buffer, depending on the data source. By default, does nothing.
|
||||
virtual void initReadBuffer() {};
|
||||
|
||||
/// Инициализировать sample_block по структуре таблицы сохраненной в structure
|
||||
/// Initialize sample_block according to the structure of the table stored in the `structure`
|
||||
virtual void initSampleBlock(const Context & context)
|
||||
{
|
||||
const DataTypeFactory & data_type_factory = DataTypeFactory::instance();
|
||||
@ -60,7 +60,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
/// Получить данные таблицы - пару (поток с содержимым таблицы, имя таблицы)
|
||||
/// Get the table data - a pair (a thread with the contents of the table, the name of the table)
|
||||
virtual ExternalTableData getData(const Context & context)
|
||||
{
|
||||
initReadBuffer();
|
||||
@ -71,7 +71,7 @@ public:
|
||||
}
|
||||
|
||||
protected:
|
||||
/// Очистить всю накопленную информацию
|
||||
/// Clear all accumulated information
|
||||
void clean()
|
||||
{
|
||||
name = "";
|
||||
@ -82,7 +82,7 @@ protected:
|
||||
read_buffer.reset();
|
||||
}
|
||||
|
||||
/// Функция для отладочного вывода информации
|
||||
/// Function for debugging information output
|
||||
void write()
|
||||
{
|
||||
std::cerr << "file " << file << std::endl;
|
||||
@ -100,7 +100,7 @@ protected:
|
||||
return res;
|
||||
}
|
||||
|
||||
/// Построить вектор structure по текстовому полю structure
|
||||
/// Construct the `structure` vector from the text field `structure`
|
||||
virtual void parseStructureFromStructureField(const std::string & argument)
|
||||
{
|
||||
std::vector<std::string> vals = split(argument, " ,");
|
||||
@ -112,7 +112,7 @@ protected:
|
||||
structure.emplace_back(vals[i], vals[i + 1]);
|
||||
}
|
||||
|
||||
/// Построить вектор structure по текстовому полю types
|
||||
/// Construct the `structure` vector from the text field `types`
|
||||
virtual void parseStructureFromTypesField(const std::string & argument)
|
||||
{
|
||||
std::vector<std::string> vals = split(argument, " ,");
|
||||
@ -123,7 +123,7 @@ protected:
|
||||
};
|
||||
|
||||
|
||||
/// Парсинг внешей таблицы, используемый в tcp клиенте.
|
||||
/// Parsing of external table used in the tcp client.
|
||||
class ExternalTable : public BaseExternalTable
|
||||
{
|
||||
public:
|
||||
@ -135,7 +135,7 @@ public:
|
||||
read_buffer = std::make_unique<ReadBufferFromFile>(file);
|
||||
}
|
||||
|
||||
/// Извлечение параметров из variables_map, которая строится по командной строке клиента
|
||||
/// Extract parameters from variables_map, which is built on the client command line
|
||||
ExternalTable(const boost::program_options::variables_map & external_options)
|
||||
{
|
||||
if (external_options.count("file"))
|
||||
@ -162,9 +162,9 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
/// Парсинг внешей таблицы, используемый при отправке таблиц через http
|
||||
/// Функция handlePart будет вызываться для каждой переданной таблицы,
|
||||
/// поэтому так же необходимо вызывать clean в конце handlePart.
|
||||
/// Parsing of external table used when sending tables via http
|
||||
/// The `handlePart` function will be called for each table passed,
|
||||
/// so it's also necessary to call `clean` at the end of the `handlePart`.
|
||||
class ExternalTablesHandler : public Poco::Net::PartHandler, BaseExternalTable
|
||||
{
|
||||
public:
|
||||
@ -174,15 +174,15 @@ public:
|
||||
|
||||
void handlePart(const Poco::Net::MessageHeader & header, std::istream & stream)
|
||||
{
|
||||
/// Буфер инициализируется здесь, а не в виртуальной функции initReadBuffer
|
||||
/// The buffer is initialized here, not in the virtual function initReadBuffer
|
||||
read_buffer = std::make_unique<ReadBufferFromIStream>(stream);
|
||||
|
||||
/// Извлекаем коллекцию параметров из MessageHeader
|
||||
/// Retrieve a collection of parameters from MessageHeader
|
||||
Poco::Net::NameValueCollection content;
|
||||
std::string label;
|
||||
Poco::Net::MessageHeader::splitParameters(header.get("Content-Disposition"), label, content);
|
||||
|
||||
/// Получаем параметры
|
||||
/// Get parameters
|
||||
name = content.get("name", "_data");
|
||||
format = params.get(name + "_format", "TabSeparated");
|
||||
|
||||
@ -195,13 +195,13 @@ public:
|
||||
|
||||
ExternalTableData data = getData(context);
|
||||
|
||||
/// Создаем таблицу
|
||||
/// Create table
|
||||
NamesAndTypesListPtr columns = std::make_shared<NamesAndTypesList>(sample_block.getColumnsList());
|
||||
StoragePtr storage = StorageMemory::create(data.second, columns);
|
||||
context.addExternalTable(data.second, storage);
|
||||
BlockOutputStreamPtr output = storage->write(ASTPtr(), context.getSettingsRef());
|
||||
|
||||
/// Записываем данные
|
||||
/// Write data
|
||||
data.first->readPrefix();
|
||||
output->writePrefix();
|
||||
while(Block block = data.first->read())
|
||||
@ -210,7 +210,7 @@ public:
|
||||
output->writeSuffix();
|
||||
|
||||
names.push_back(name);
|
||||
/// Подготавливаемся к приему следующего файла, для этого очищаем всю полученную информацию
|
||||
/// We are ready to receive the next file, for this we clear all the information received
|
||||
clean();
|
||||
}
|
||||
|
||||
|
@ -136,7 +136,7 @@ void FileChecker::load(Map & map) const
|
||||
ReadBufferFromFile in(files_info_path);
|
||||
WriteBufferFromString out(content);
|
||||
|
||||
/// The JSON library does not support whitespace. We delete them. Ineffective.
|
||||
/// The JSON library does not support whitespace. We delete them. Inefficient.
|
||||
while (!in.eof())
|
||||
{
|
||||
char c;
|
||||
|
@ -8,11 +8,11 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// хранит размеры всех столбцов, и может проверять не побились ли столбцы
|
||||
/// stores the sizes of all columns, and can check whether the columns are corrupted
|
||||
class FileChecker
|
||||
{
|
||||
private:
|
||||
/// Имя файла -> размер.
|
||||
/// File name -> size.
|
||||
using Map = std::map<std::string, size_t>;
|
||||
|
||||
public:
|
||||
@ -23,7 +23,7 @@ public:
|
||||
void update(const Poco::File & file);
|
||||
void update(const Files::const_iterator & begin, const Files::const_iterator & end);
|
||||
|
||||
/// Проверяем файлы, параметры которых указаны в sizes.json
|
||||
/// Check the files whose parameters are specified in sizes.json
|
||||
bool check() const;
|
||||
|
||||
private:
|
||||
@ -35,7 +35,7 @@ private:
|
||||
std::string files_info_path;
|
||||
std::string tmp_files_info_path;
|
||||
|
||||
/// Данные из файла читаются лениво.
|
||||
/// The data from the file is read lazily.
|
||||
Map map;
|
||||
bool initialized = false;
|
||||
|
||||
|
@ -4,12 +4,12 @@
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
|
||||
|
||||
/** Хеш-таблица, позволяющая очищать таблицу за O(1).
|
||||
* Еще более простая, чем HashSet: Key и Mapped должны быть POD-типами.
|
||||
/** A hash table that allows you to clear the table in O(1).
|
||||
* Even simpler than HashSet: Key and Mapped must be POD-types.
|
||||
*
|
||||
* Вместо этого класса можно было бы просто использовать в HashSet в качестве ключа пару <версия, ключ>,
|
||||
* но тогда таблица накапливала бы все ключи, которые в нее когда-либо складывали, и неоправданно росла.
|
||||
* Этот класс идет на шаг дальше и считает ключи со старой версией пустыми местами в хеш-таблице.
|
||||
* Instead of this class, you could just use the pair (version, key) in the HashSet as the key
|
||||
* but then the table would accumulate all the keys that it ever stored, and it was unreasonably growing.
|
||||
* This class goes a step further and considers the keys with the old version empty in the hash table.
|
||||
*/
|
||||
|
||||
|
||||
@ -17,11 +17,11 @@ struct ClearableHashSetState
|
||||
{
|
||||
UInt32 version = 1;
|
||||
|
||||
/// Сериализация, в бинарном и текстовом виде.
|
||||
/// Serialization, in binary and text form.
|
||||
void write(DB::WriteBuffer & wb) const { DB::writeBinary(version, wb); }
|
||||
void writeText(DB::WriteBuffer & wb) const { DB::writeText(version, wb); }
|
||||
|
||||
/// Десериализация, в бинарном и текстовом виде.
|
||||
/// Deserialization, in binary and text form.
|
||||
void read(DB::ReadBuffer & rb) { DB::readBinary(version, rb); }
|
||||
void readText(DB::ReadBuffer & rb) { DB::readText(version, rb); }
|
||||
};
|
||||
@ -38,10 +38,10 @@ struct ClearableHashTableCell : public BaseCell
|
||||
bool isZero(const State & state) const { return version != state.version; }
|
||||
static bool isZero(const Key & key, const State & state) { return false; }
|
||||
|
||||
/// Установить значение ключа в ноль.
|
||||
/// Set the key value to zero.
|
||||
void setZero() { version = 0; }
|
||||
|
||||
/// Нужно ли хранить нулевой ключ отдельно (то есть, могут ли в хэш-таблицу вставить нулевой ключ).
|
||||
/// Do I need to store the zero key separately (that is, can a zero key be inserted into the hash table).
|
||||
static constexpr bool need_zero_value_storage = false;
|
||||
|
||||
ClearableHashTableCell() {}
|
||||
|
@ -3,12 +3,19 @@
|
||||
#include <Core/Types.h>
|
||||
|
||||
|
||||
/** Хэш функции, которые лучше чем тривиальная функция std::hash.
|
||||
* (при агрегации по идентификатору посетителя, прирост производительности более чем в 5 раз)
|
||||
/** Hash functions that are better than the trivial function std::hash.
|
||||
*
|
||||
* Example: when we do aggregation by the visitor ID, the performance increase is more than 5 times.
|
||||
* This is because of following reasons:
|
||||
* - in Yandex, visitor identifier is an integer that has timestamp with seconds resolution in lower bits;
|
||||
* - in typical implementation of standard library, hash function for integers is trivial and just use lower bits;
|
||||
* - traffic is non-uniformly distributed across a day;
|
||||
* - we are using open-addressing linear probing hash tables that are most critical to hash function quality,
|
||||
* and trivial hash function gives disasterous results.
|
||||
*/
|
||||
|
||||
/** Взято из MurmurHash.
|
||||
* Быстрее, чем intHash32 при вставке в хэш-таблицу UInt64 -> UInt64, где ключ - идентификатор посетителя.
|
||||
/** Taken from MurmurHash. This is Murmur finalizer.
|
||||
* Faster than intHash32 when inserting into the hash table UInt64 -> UInt64, where the key is the visitor ID.
|
||||
*/
|
||||
inline DB::UInt64 intHash64(DB::UInt64 x)
|
||||
{
|
||||
@ -21,21 +28,22 @@ inline DB::UInt64 intHash64(DB::UInt64 x)
|
||||
return x;
|
||||
}
|
||||
|
||||
/** CRC32C является не очень качественной в роли хэш функции,
|
||||
* согласно avalanche и bit independence тестам, а также малым количеством бит,
|
||||
* но может вести себя хорошо при использовании в хэш-таблицах,
|
||||
* за счёт высокой скорости (latency 3 + 1 такт, througput 1 такт).
|
||||
* Работает только при поддержке SSE 4.2.
|
||||
* Используется asm вместо интринсика, чтобы не обязательно было собирать весь проект с -msse4.
|
||||
/** CRC32C is not very high-quality as a hash function,
|
||||
* according to avalanche and bit independence tests (see SMHasher software), as well as a small number of bits,
|
||||
* but can behave well when used in hash tables,
|
||||
* due to high speed (latency 3 + 1 clock cycle, throughput 1 clock cycle).
|
||||
* Works only with SSE 4.2 support.
|
||||
*/
|
||||
#if __SSE4_2__
|
||||
#include <nmmintrin.h>
|
||||
#endif
|
||||
|
||||
inline DB::UInt64 intHashCRC32(DB::UInt64 x)
|
||||
{
|
||||
#if defined(__x86_64__)
|
||||
DB::UInt64 crc = -1ULL;
|
||||
asm("crc32q %[x], %[crc]\n" : [crc] "+r" (crc) : [x] "rm" (x));
|
||||
return crc;
|
||||
#if __SSE4_2__
|
||||
return _mm_crc32_u64(-1ULL, x);
|
||||
#else
|
||||
/// На других платформах используем не обязательно CRC32. NOTE Это может сбить с толку.
|
||||
/// On other platforms we do not have CRC32. NOTE This can be confusing.
|
||||
return intHash64(x);
|
||||
#endif
|
||||
}
|
||||
@ -117,7 +125,7 @@ DEFINE_HASH(DB::Float64)
|
||||
#undef DEFINE_HASH
|
||||
|
||||
|
||||
/// Разумно использовать для UInt8, UInt16 при достаточном размере хэш-таблицы.
|
||||
/// It is reasonable to use for UInt8, UInt16 with sufficient hash table size.
|
||||
struct TrivialHash
|
||||
{
|
||||
template <typename T>
|
||||
@ -128,17 +136,22 @@ struct TrivialHash
|
||||
};
|
||||
|
||||
|
||||
/** Сравнительно неплохая некриптографическая хэш функция из UInt64 в UInt32.
|
||||
* Но хуже (и по качеству и по скорости), чем просто срезка intHash64.
|
||||
* Взята отсюда: http://www.concentric.net/~ttwang/tech/inthash.htm
|
||||
/** A relatively good non-cryptographic hash function from UInt64 to UInt32.
|
||||
* But worse (both in quality and speed) than just cutting intHash64.
|
||||
* Taken from here: http://www.concentric.net/~ttwang/tech/inthash.htm
|
||||
*
|
||||
* Немного изменена по сравнению с функцией по ссылке: сдвиги вправо случайно заменены на цикличесвие сдвиги вправо.
|
||||
* Это изменение никак не повлияло на результаты тестов smhasher.
|
||||
* Slightly changed compared to the function by link: shifts to the right are accidentally replaced by a cyclic shift to the right.
|
||||
* This change did not affect the smhasher test results.
|
||||
*
|
||||
* Рекомендуется для разных задач использовать разные salt.
|
||||
* А то был случай, что в БД значения сортировались по хэшу (для некачественного псевдослучайного разбрасывания),
|
||||
* а в другом месте, в агрегатной функции, в хэш таблице использовался такой же хэш,
|
||||
* в результате чего, эта агрегатная функция чудовищно тормозила из-за коллизий.
|
||||
* It is recommended to use different salt for different tasks.
|
||||
* That was the case that in the database values were sorted by hash (for low-quality pseudo-random spread),
|
||||
* and in another place, in the aggregate function, the same hash was used in the hash table,
|
||||
* as a result, this aggregate function was monstrously slowed due to collisions.
|
||||
*
|
||||
* NOTE Salting is far from perfect, because it commutes with first steps of calculation.
|
||||
*
|
||||
* NOTE As mentioned, this function is slower than intHash64.
|
||||
* But occasionaly, it is faster, when written in a loop and loop is vectorized.
|
||||
*/
|
||||
template <DB::UInt64 salt>
|
||||
inline DB::UInt32 intHash32(DB::UInt64 key)
|
||||
@ -156,7 +169,7 @@ inline DB::UInt32 intHash32(DB::UInt64 key)
|
||||
}
|
||||
|
||||
|
||||
/// Для контейнеров.
|
||||
/// For containers.
|
||||
template <typename T, DB::UInt64 salt = 0>
|
||||
struct IntHash32
|
||||
{
|
||||
|
@ -13,7 +13,7 @@
|
||||
|
||||
struct NoInitTag {};
|
||||
|
||||
/// Пара, которая не инициализирует элементы, если не нужно.
|
||||
/// A pair that does not initialize the elements, if not needed.
|
||||
template <typename First, typename Second>
|
||||
struct PairNoInit
|
||||
{
|
||||
@ -60,18 +60,18 @@ struct HashMapCell
|
||||
bool isZero(const State & state) const { return isZero(value.first, state); }
|
||||
static bool isZero(const Key & key, const State & state) { return ZeroTraits::check(key); }
|
||||
|
||||
/// Установить значение ключа в ноль.
|
||||
/// Set the key value to zero.
|
||||
void setZero() { ZeroTraits::set(value.first); }
|
||||
|
||||
/// Нужно ли хранить нулевой ключ отдельно (то есть, могут ли в хэш-таблицу вставить нулевой ключ).
|
||||
/// Do I need to store the zero key separately (that is, can a zero key be inserted into the hash table).
|
||||
static constexpr bool need_zero_value_storage = true;
|
||||
|
||||
/// Является ли ячейка удалённой.
|
||||
/// Whether the cell was deleted.
|
||||
bool isDeleted() const { return false; }
|
||||
|
||||
void setMapped(const value_type & value_) { value.second = value_.second; }
|
||||
|
||||
/// Сериализация, в бинарном и текстовом виде.
|
||||
/// Serialization, in binary and text form.
|
||||
void write(DB::WriteBuffer & wb) const
|
||||
{
|
||||
DB::writeBinary(value.first, wb);
|
||||
@ -85,7 +85,7 @@ struct HashMapCell
|
||||
DB::writeDoubleQuoted(value.second, wb);
|
||||
}
|
||||
|
||||
/// Десериализация, в бинарном и текстовом виде.
|
||||
/// Deserialization, in binary and text form.
|
||||
void read(DB::ReadBuffer & rb)
|
||||
{
|
||||
DB::readBinary(value.first, rb);
|
||||
@ -141,19 +141,19 @@ public:
|
||||
bool inserted;
|
||||
this->emplace(x, it, inserted);
|
||||
|
||||
/** Может показаться, что инициализация не обязательна для POD-типов (или __has_trivial_constructor),
|
||||
* так как кусок памяти для хэш-таблицы изначально инициализирован нулями.
|
||||
* Но, на самом деле, пустая ячейка может быть не инициализирована нулями в следующих случаях:
|
||||
* - ZeroValueStorage (в нём зануляется только ключ);
|
||||
* - после ресайза и переноса части ячеек в новую половину хэш-таблицы, у старых ячеек, тоже зануляется только ключ.
|
||||
/** It may seem that initialization is not necessary for POD-types (or __has_trivial_constructor),
|
||||
* since the hash table memory is initially initialized with zeros.
|
||||
* But, in fact, an empty cell may not be initialized with zeros in the following cases:
|
||||
* - ZeroValueStorage (it only zeros the key);
|
||||
* - after resizing and moving a part of the cells to the new half of the hash table, the old cells also have only the key to zero.
|
||||
*
|
||||
* По производительности, разницы почти всегда нет, за счёт того, что it->second как правило присваивается сразу
|
||||
* после вызова operator[], и так как operator[] инлайнится, компилятор убирает лишнюю инициализацию.
|
||||
* On performance, there is almost always no difference, due to the fact that it->second is usually assigned immediately
|
||||
* after calling `operator[]`, and since `operator[]` is inlined, the compiler removes unnecessary initialization.
|
||||
*
|
||||
* Иногда из-за инициализации, производительность даже растёт. Это происходит в коде вида ++map[key].
|
||||
* Когда мы делаем инициализацию, то для новых ячеек, достаточно сразу сделать store 1.
|
||||
* А если бы мы не делали инициализацию, то не смотря на то, что в ячейке был ноль,
|
||||
* компилятор не может об этом догадаться, и генерирует код load, increment, store.
|
||||
* Sometimes due to initialization, the performance even grows. This occurs in code like `++map[key]`.
|
||||
* When we do the initialization, for new cells, it's enough to make `store 1` right away.
|
||||
* And if we did not initialize, then even though there was zero in the cell,
|
||||
* the compiler can not guess about this, and generates the `load`, `increment`, `store` code.
|
||||
*/
|
||||
if (inserted)
|
||||
new(&it->second) mapped_type();
|
||||
|
@ -44,27 +44,27 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
|
||||
/** Состояние хэш-таблицы, которое влияет на свойства её ячеек.
|
||||
* Используется в качестве параметра шаблона.
|
||||
* Например, существует реализация мгновенно-очищаемой хэш-таблицы - ClearableHashMap.
|
||||
* Для неё, в каждой ячейке хранится номер версии, и в самой хэш-таблице - текущая версия.
|
||||
* При очистке, просто увеличивается текущая версия; все ячейки с несовпадающей версией считаются пустыми.
|
||||
* Другой пример: для приближённого рассчёта количества уникальных посетителей, есть хэш-таблица UniquesHashSet.
|
||||
* В ней имеется понятие "степень". При каждом переполнении, ячейки с ключами, не делящимися на соответствующую степень двух, удаляются.
|
||||
/** The state of the hash table that affects the properties of its cells.
|
||||
* Used as a template parameter.
|
||||
* For example, there is an implementation of an instantly clearable hash table - ClearableHashMap.
|
||||
* For it, each cell holds the version number, and in the hash table itself is the current version.
|
||||
* When clearing, the current version simply increases; All cells with a mismatching version are considered empty.
|
||||
* Another example: for an approximate calculation of the number of unique visitors, there is a hash table for UniquesHashSet.
|
||||
* It has the concept of "degree". At each overflow, cells with keys that do not divide by the corresponding power of the two are deleted.
|
||||
*/
|
||||
struct HashTableNoState
|
||||
{
|
||||
/// Сериализация, в бинарном и текстовом виде.
|
||||
/// Serialization, in binary and text form.
|
||||
void write(DB::WriteBuffer & wb) const {}
|
||||
void writeText(DB::WriteBuffer & wb) const {}
|
||||
|
||||
/// Десериализация, в бинарном и текстовом виде.
|
||||
/// Deserialization, in binary and text form.
|
||||
void read(DB::ReadBuffer & rb) {}
|
||||
void readText(DB::ReadBuffer & rb) {}
|
||||
};
|
||||
|
||||
|
||||
/// Эти функции могут быть перегружены для пользовательских типов.
|
||||
/// These functions can be overloaded for custom types.
|
||||
namespace ZeroTraits
|
||||
{
|
||||
|
||||
@ -77,11 +77,11 @@ void set(T & x) { x = 0; }
|
||||
};
|
||||
|
||||
|
||||
/** Compile-time интерфейс ячейки хэш-таблицы.
|
||||
* Разные ячейки используются для реализации разных хэш-таблиц.
|
||||
* Ячейка должна содержать ключ.
|
||||
* Также может содержать значение и произвольные дополнительные данные
|
||||
* (пример: запомненное значение хэш-функции; номер версии для ClearableHashMap).
|
||||
/** Compile-time interface for cell of the hash table.
|
||||
* Different cell types are used to implement different hash tables.
|
||||
* The cell must contain a key.
|
||||
* It can also contain a value and arbitrary additional data
|
||||
* (example: the stored hash value; version number for ClearableHashMap).
|
||||
*/
|
||||
template <typename Key, typename Hash, typename TState = HashTableNoState>
|
||||
struct HashTableCell
|
||||
@ -93,89 +93,89 @@ struct HashTableCell
|
||||
|
||||
HashTableCell() {}
|
||||
|
||||
/// Создать ячейку с заданным ключём / ключём и значением.
|
||||
/// Create a cell with the given key / key and value.
|
||||
HashTableCell(const Key & key_, const State & state) : key(key_) {}
|
||||
/// HashTableCell(const value_type & value_, const State & state) : key(value_) {}
|
||||
|
||||
/// Получить то, что будет value_type контейнера.
|
||||
/// Get what the value_type of the container will be.
|
||||
value_type & getValue() { return key; }
|
||||
const value_type & getValue() const { return key; }
|
||||
|
||||
/// Получить ключ.
|
||||
/// Get the key.
|
||||
static Key & getKey(value_type & value) { return value; }
|
||||
static const Key & getKey(const value_type & value) { return value; }
|
||||
|
||||
/// Равны ли ключи у ячеек.
|
||||
/// Are the keys at the cells equal?
|
||||
bool keyEquals(const Key & key_) const { return key == key_; }
|
||||
bool keyEquals(const Key & key_, size_t hash_) const { return key == key_; }
|
||||
|
||||
/// Если ячейка умеет запоминать в себе значение хэш-функции, то запомнить его.
|
||||
/// If the cell can remember the value of the hash function, then remember it.
|
||||
void setHash(size_t hash_value) {}
|
||||
|
||||
/// Если ячейка умеет запоминать в себе значение хэш-функции, то вернуть запомненное значение.
|
||||
/// Оно должно быть хотя бы один раз вычислено до этого.
|
||||
/// Если запоминание значения хэш-функции не предусмотрено, то просто вычислить хэш.
|
||||
/// If the cell can store the hash value in itself, then return the stored value.
|
||||
/// It must be at least once calculated before.
|
||||
/// If storing the hash value is not provided, then just compute the hash.
|
||||
size_t getHash(const Hash & hash) const { return hash(key); }
|
||||
|
||||
/// Является ли ключ нулевым. В основном буфере, ячейки с нулевым ключём, считаются пустыми.
|
||||
/// Если нулевые ключи могут быть вставлены в таблицу, то ячейка для нулевого ключа хранится отдельно, не в основном буфере.
|
||||
/// Нулевые ключи должны быть такими, что занулённый кусок памяти представляет собой нулевой ключ.
|
||||
/// Whether the key is zero. In the main buffer, cells with a zero key are considered empty.
|
||||
/// If zero keys can be inserted into the table, then the cell for the zero key is stored separately, not in the main buffer.
|
||||
/// Zero keys must be such that the zeroed-down piece of memory is a zero key.
|
||||
bool isZero(const State & state) const { return isZero(key, state); }
|
||||
static bool isZero(const Key & key, const State & state) { return ZeroTraits::check(key); }
|
||||
|
||||
/// Установить значение ключа в ноль.
|
||||
/// Set the key value to zero.
|
||||
void setZero() { ZeroTraits::set(key); }
|
||||
|
||||
/// Нужно ли хранить нулевой ключ отдельно (то есть, могут ли в хэш-таблицу вставить нулевой ключ).
|
||||
/// Do the hash table need to store the zero key separately (that is, can a zero key be inserted into the hash table).
|
||||
static constexpr bool need_zero_value_storage = true;
|
||||
|
||||
/// Является ли ячейка удалённой.
|
||||
/// Whether the cell is deleted.
|
||||
bool isDeleted() const { return false; }
|
||||
|
||||
/// Установить отображаемое значение, если есть (для HashMap), в соответствующиее из value.
|
||||
/// Set the mapped value, if any (for HashMap), to the corresponding `value`.
|
||||
void setMapped(const value_type & value) {}
|
||||
|
||||
/// Сериализация, в бинарном и текстовом виде.
|
||||
/// Serialization, in binary and text form.
|
||||
void write(DB::WriteBuffer & wb) const { DB::writeBinary(key, wb); }
|
||||
void writeText(DB::WriteBuffer & wb) const { DB::writeDoubleQuoted(key, wb); }
|
||||
|
||||
/// Десериализация, в бинарном и текстовом виде.
|
||||
/// Deserialization, in binary and text form.
|
||||
void read(DB::ReadBuffer & rb) { DB::readBinary(key, rb); }
|
||||
void readText(DB::ReadBuffer & rb) { DB::writeDoubleQuoted(key, rb); }
|
||||
};
|
||||
|
||||
|
||||
/** Определяет размер хэш-таблицы, а также когда и во сколько раз её надо ресайзить.
|
||||
/** Determines the size of the hash table, and when and how much it should be resized.
|
||||
*/
|
||||
template <size_t initial_size_degree = 8>
|
||||
struct HashTableGrower
|
||||
{
|
||||
/// Состояние этой структуры достаточно, чтобы получить размер буфера хэш-таблицы.
|
||||
/// The state of this structure is enough to get the buffer size of the hash table.
|
||||
|
||||
UInt8 size_degree = initial_size_degree;
|
||||
|
||||
/// Размер хэш-таблицы в ячейках.
|
||||
/// The size of the hash table in the cells.
|
||||
size_t bufSize() const { return 1 << size_degree; }
|
||||
|
||||
size_t maxFill() const { return 1 << (size_degree - 1); }
|
||||
size_t mask() const { return bufSize() - 1; }
|
||||
|
||||
/// Из значения хэш-функции получить номер ячейки в хэш-таблице.
|
||||
/// From the hash value, get the cell number in the hash table.
|
||||
size_t place(size_t x) const { return x & mask(); }
|
||||
|
||||
/// Следующая ячейка в цепочке разрешения коллизий.
|
||||
/// The next cell in the collision resolution chain.
|
||||
size_t next(size_t pos) const { ++pos; return pos & mask(); }
|
||||
|
||||
/// Является ли хэш-таблица достаточно заполненной. Нужно увеличить размер хэш-таблицы, или удалить из неё что-нибудь ненужное.
|
||||
/// Whether the hash table is sufficiently full. You need to increase the size of the hash table, or remove something unnecessary from it.
|
||||
bool overflow(size_t elems) const { return elems > maxFill(); }
|
||||
|
||||
/// Увеличить размер хэш-таблицы.
|
||||
/// Increase the size of the hash table.
|
||||
void increaseSize()
|
||||
{
|
||||
size_degree += size_degree >= 23 ? 1 : 2;
|
||||
}
|
||||
|
||||
/// Установить размер буфера по количеству элементов хэш-таблицы. Используется при десериализации хэш-таблицы.
|
||||
/// Set the buffer size by the number of elements in the hash table. Used when deserializing a hash table.
|
||||
void set(size_t num_elems)
|
||||
{
|
||||
size_degree = num_elems <= 1
|
||||
@ -192,17 +192,17 @@ struct HashTableGrower
|
||||
};
|
||||
|
||||
|
||||
/** При использовании в качестве Grower-а, превращает хэш-таблицу в что-то типа lookup-таблицы.
|
||||
* Остаётся неоптимальность - в ячейках хранятся ключи.
|
||||
* Также компилятору не удаётся полностью удалить код хождения по цепочке разрешения коллизий, хотя он не нужен.
|
||||
* TODO Сделать полноценную lookup-таблицу.
|
||||
/** When used as a Grower, it turns a hash table into something like a lookup table.
|
||||
* It remains non-optimal - the cells store the keys.
|
||||
* Also, the compiler can not completely remove the code of passing through the collision resolution chain, although it is not needed.
|
||||
* TODO Make a proper lookup table.
|
||||
*/
|
||||
template <size_t key_bits>
|
||||
struct HashTableFixedGrower
|
||||
{
|
||||
size_t bufSize() const { return 1 << key_bits; }
|
||||
size_t place(size_t x) const { return x; }
|
||||
/// Тут можно было бы написать __builtin_unreachable(), но компилятор не до конца всё оптимизирует, и получается менее эффективно.
|
||||
/// You could write __builtin_unreachable(), but the compiler does not optimize everything, and it turns out less efficiently.
|
||||
size_t next(size_t pos) const { return pos + 1; }
|
||||
bool overflow(size_t elems) const { return false; }
|
||||
|
||||
@ -212,7 +212,7 @@ struct HashTableFixedGrower
|
||||
};
|
||||
|
||||
|
||||
/** Если нужно хранить нулевой ключ отдельно - место для его хранения. */
|
||||
/** If you want to store the zero key separately - a place to store it. */
|
||||
template <bool need_zero_value_storage, typename Cell>
|
||||
struct ZeroValueStorage;
|
||||
|
||||
@ -271,15 +271,15 @@ protected:
|
||||
using Self = HashTable<Key, Cell, Hash, Grower, Allocator>;
|
||||
using cell_type = Cell;
|
||||
|
||||
size_t m_size = 0; /// Количество элементов
|
||||
Cell * buf; /// Кусок памяти для всех элементов кроме элемента с ключём 0.
|
||||
size_t m_size = 0; /// Amount of elements
|
||||
Cell * buf; /// A piece of memory for all elements except the element with zero key.
|
||||
Grower grower;
|
||||
|
||||
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
|
||||
mutable size_t collisions = 0;
|
||||
#endif
|
||||
|
||||
/// Найти ячейку с тем же ключём или пустую ячейку, начиная с заданного места и далее по цепочке разрешения коллизий.
|
||||
/// Find a cell with the same key or an empty cell, starting from the specified position and further along the collision resolution chain.
|
||||
size_t ALWAYS_INLINE findCell(const Key & x, size_t hash_value, size_t place_value) const
|
||||
{
|
||||
while (!buf[place_value].isZero(*this) && !buf[place_value].keyEquals(x, hash_value))
|
||||
@ -293,7 +293,7 @@ protected:
|
||||
return place_value;
|
||||
}
|
||||
|
||||
/// Найти пустую ячейку, начиная с заданного места и далее по цепочке разрешения коллизий.
|
||||
/// Find an empty cell, starting with the specified position and further along the collision resolution chain.
|
||||
size_t ALWAYS_INLINE findEmptyCell(const Key & x, size_t hash_value, size_t place_value) const
|
||||
{
|
||||
while (!buf[place_value].isZero(*this))
|
||||
@ -323,7 +323,7 @@ protected:
|
||||
}
|
||||
|
||||
|
||||
/// Увеличить размер буфера.
|
||||
/// Increase the size of the buffer.
|
||||
void resize(size_t for_num_elems = 0, size_t for_buf_size = 0)
|
||||
{
|
||||
#ifdef DBMS_HASH_MAP_DEBUG_RESIZES
|
||||
@ -332,10 +332,10 @@ protected:
|
||||
|
||||
size_t old_size = grower.bufSize();
|
||||
|
||||
/** Чтобы в случае исключения, объект остался в корректном состоянии,
|
||||
* изменение переменной grower (определяющией размер буфера хэш-таблицы)
|
||||
* откладываем на момент после реального изменения буфера.
|
||||
* Временная переменная new_grower используется, чтобы определить новый размер.
|
||||
/** In case of exception for the object to remain in the correct state,
|
||||
* changing the variable `grower` (which determines the buffer size of the hash table)
|
||||
* is postponed for a moment after a real buffer change.
|
||||
* The temporary variable `new_grower` is used to determine the new size.
|
||||
*/
|
||||
Grower new_grower = grower;
|
||||
|
||||
@ -354,29 +354,29 @@ protected:
|
||||
else
|
||||
new_grower.increaseSize();
|
||||
|
||||
/// Расширим пространство.
|
||||
/// Expand the space.
|
||||
buf = reinterpret_cast<Cell *>(Allocator::realloc(buf, getBufferSizeInBytes(), new_grower.bufSize() * sizeof(Cell)));
|
||||
grower = new_grower;
|
||||
|
||||
/** Теперь некоторые элементы может потребоваться переместить на новое место.
|
||||
* Элемент может остаться на месте, или переместиться в новое место "справа",
|
||||
* или переместиться левее по цепочке разрешения коллизий, из-за того, что элементы левее него были перемещены в новое место "справа".
|
||||
/** Now some items may need to be moved to a new location.
|
||||
* The element can stay in place, or move to a new location "on the right",
|
||||
* or move to the left of the collision resolution chain, because the elements to the left of it have been moved to the new "right" location.
|
||||
*/
|
||||
size_t i = 0;
|
||||
for (; i < old_size; ++i)
|
||||
if (!buf[i].isZero(*this) && !buf[i].isDeleted())
|
||||
reinsert(buf[i]);
|
||||
reinsert(buf[i], buf[i].getHash(*this));
|
||||
|
||||
/** Также имеется особый случай:
|
||||
* если элемент должен был быть в конце старого буфера, [ x]
|
||||
* но находится в начале из-за цепочки разрешения коллизий, [o x]
|
||||
* то после ресайза, он сначала снова окажется не на своём месте, [ xo ]
|
||||
* и для того, чтобы перенести его куда надо,
|
||||
* надо будет после переноса всех элементов из старой половинки [ o x ]
|
||||
* обработать ещё хвостик из цепочки разрешения коллизий сразу после неё [ o x ]
|
||||
/** There is also a special case:
|
||||
* if the element was to be at the end of the old buffer, [ x]
|
||||
* but is at the beginning because of the collision resolution chain, [o x]
|
||||
* then after resizing, it will first be out of place again, [ xo ]
|
||||
* and in order to transfer it where necessary,
|
||||
* after transferring all the elements from the old halves you need to [ o x ]
|
||||
* process tail from the collision resolution chain immediately after it [ o x ]
|
||||
*/
|
||||
for (; !buf[i].isZero(*this) && !buf[i].isDeleted(); ++i)
|
||||
reinsert(buf[i]);
|
||||
reinsert(buf[i], buf[i].getHash(*this));
|
||||
|
||||
#ifdef DBMS_HASH_MAP_DEBUG_RESIZES
|
||||
watch.stop();
|
||||
@ -387,30 +387,30 @@ protected:
|
||||
}
|
||||
|
||||
|
||||
/** Вставить в новый буфер значение, которое было в старом буфере.
|
||||
* Используется при увеличении размера буфера.
|
||||
/** Paste into the new buffer the value that was in the old buffer.
|
||||
* Used when increasing the buffer size.
|
||||
*/
|
||||
void reinsert(Cell & x)
|
||||
void reinsert(Cell & x, size_t hash_value)
|
||||
{
|
||||
size_t hash_value = x.getHash(*this);
|
||||
size_t place_value = grower.place(hash_value);
|
||||
|
||||
/// Если элемент на своём месте.
|
||||
/// If the element is in its place.
|
||||
if (&x == &buf[place_value])
|
||||
return;
|
||||
|
||||
/// Вычисление нового места, с учётом цепочки разрешения коллизий.
|
||||
/// Compute a new location, taking into account the collision resolution chain.
|
||||
place_value = findCell(Cell::getKey(x.getValue()), hash_value, place_value);
|
||||
|
||||
/// Если элемент остался на своём месте в старой цепочке разрешения коллизий.
|
||||
/// If the item remains in its place in the old collision resolution chain.
|
||||
if (!buf[place_value].isZero(*this))
|
||||
return;
|
||||
|
||||
/// Копирование на новое место и зануление старого.
|
||||
/// Copy to a new location and zero the old one.
|
||||
x.setHash(hash_value);
|
||||
memcpy(&buf[place_value], &x, sizeof(x));
|
||||
x.setZero();
|
||||
|
||||
/// Потом на старое место могут переместиться элементы, которые раньше были в коллизии с этим.
|
||||
/// Then the elements that previously were in collision with this can move to the old place.
|
||||
}
|
||||
|
||||
|
||||
@ -611,10 +611,10 @@ protected:
|
||||
iterator iteratorToZero() { return iteratorTo(this->zeroValue()); }
|
||||
|
||||
|
||||
/// Если ключ нулевой - вставить его в специальное место и вернуть true.
|
||||
bool ALWAYS_INLINE emplaceIfZero(Key x, iterator & it, bool & inserted)
|
||||
/// If the key is zero, insert it into a special place and return true.
|
||||
bool ALWAYS_INLINE emplaceIfZero(Key x, iterator & it, bool & inserted, size_t hash_value)
|
||||
{
|
||||
/// Если утверждается, что нулевой ключ не могут вставить в таблицу.
|
||||
/// If it is claimed that the zero key can not be inserted into the table.
|
||||
if (!Cell::need_zero_value_storage)
|
||||
return false;
|
||||
|
||||
@ -625,7 +625,7 @@ protected:
|
||||
{
|
||||
++m_size;
|
||||
this->setHasZero();
|
||||
it.ptr->setHash(hash(x));
|
||||
it.ptr->setHash(hash_value);
|
||||
inserted = true;
|
||||
}
|
||||
else
|
||||
@ -638,7 +638,7 @@ protected:
|
||||
}
|
||||
|
||||
|
||||
/// Только для ненулевых ключей. Найти нужное место, вставить туда ключ, если его ещё нет, вернуть итератор на ячейку.
|
||||
/// Only for non-zero keys. Find the right place, insert the key there, if it does not already exist. Set iterator to the cell in output parameter.
|
||||
void ALWAYS_INLINE emplaceNonZero(Key x, iterator & it, bool & inserted, size_t hash_value)
|
||||
{
|
||||
size_t place_value = findCell(x, hash_value, grower.place(hash_value));
|
||||
@ -664,9 +664,9 @@ protected:
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
/** Если этого не делать, то будут проблемы.
|
||||
* Ведь останется ключ, но неинициализированное mapped-значение,
|
||||
* у которого, возможно, даже нельзя вызвать деструктор.
|
||||
/** If we have not resized successfully, then there will be problems.
|
||||
* There remains a key, but uninitialized mapped-value,
|
||||
* which, perhaps, can not even be called a destructor.
|
||||
*/
|
||||
--m_size;
|
||||
buf[place_value].setZero();
|
||||
@ -679,13 +679,14 @@ protected:
|
||||
|
||||
|
||||
public:
|
||||
/// Вставить значение. В случае хоть сколько-нибудь сложных значений, лучше используйте функцию emplace.
|
||||
/// Insert a value. In the case of any more complex values, it is better to use the `emplace` function.
|
||||
std::pair<iterator, bool> ALWAYS_INLINE insert(const value_type & x)
|
||||
{
|
||||
std::pair<iterator, bool> res;
|
||||
|
||||
if (!emplaceIfZero(Cell::getKey(x), res.first, res.second))
|
||||
emplaceNonZero(Cell::getKey(x), res.first, res.second, hash(Cell::getKey(x)));
|
||||
size_t hash_value = hash(Cell::getKey(x));
|
||||
if (!emplaceIfZero(Cell::getKey(x), res.first, res.second, hash_value))
|
||||
emplaceNonZero(Cell::getKey(x), res.first, res.second, hash_value);
|
||||
|
||||
if (res.second)
|
||||
res.first.ptr->setMapped(x);
|
||||
@ -694,14 +695,21 @@ public:
|
||||
}
|
||||
|
||||
|
||||
/** Вставить ключ,
|
||||
* вернуть итератор на позицию, которую можно использовать для placement new значения,
|
||||
* а также флаг - был ли вставлен новый ключ.
|
||||
/// Reinsert node pointed to by iterator
|
||||
void ALWAYS_INLINE reinsert(iterator & it, size_t hash_value)
|
||||
{
|
||||
reinsert(*it.getPtr(), hash_value);
|
||||
}
|
||||
|
||||
|
||||
/** Insert the key,
|
||||
* return an iterator to a position that can be used for `placement new` of value,
|
||||
* as well as the flag - whether a new key was inserted.
|
||||
*
|
||||
* Вы обязаны сделать placement new значения, если был вставлен новый ключ,
|
||||
* так как при уничтожении хэш-таблицы для него будет вызываться деструктор!
|
||||
* You have to make `placement new` of value if you inserted a new key,
|
||||
* since when destroying a hash table, it will call the destructor!
|
||||
*
|
||||
* Пример использования:
|
||||
* Example usage:
|
||||
*
|
||||
* Map::iterator it;
|
||||
* bool inserted;
|
||||
@ -711,20 +719,21 @@ public:
|
||||
*/
|
||||
void ALWAYS_INLINE emplace(Key x, iterator & it, bool & inserted)
|
||||
{
|
||||
if (!emplaceIfZero(x, it, inserted))
|
||||
emplaceNonZero(x, it, inserted, hash(x));
|
||||
}
|
||||
|
||||
|
||||
/// То же самое, но с заранее вычисленным значением хэш-функции.
|
||||
void ALWAYS_INLINE emplace(Key x, iterator & it, bool & inserted, size_t hash_value)
|
||||
{
|
||||
if (!emplaceIfZero(x, it, inserted))
|
||||
size_t hash_value = hash(x);
|
||||
if (!emplaceIfZero(x, it, inserted, hash_value))
|
||||
emplaceNonZero(x, it, inserted, hash_value);
|
||||
}
|
||||
|
||||
|
||||
/// Скопировать ячейку из другой хэш-таблицы. Предполагается, что ячейка не нулевая, а также, что такого ключа в таблице ещё не было.
|
||||
/// Same, but with a precalculated value of hash function.
|
||||
void ALWAYS_INLINE emplace(Key x, iterator & it, bool & inserted, size_t hash_value)
|
||||
{
|
||||
if (!emplaceIfZero(x, it, inserted, hash_value))
|
||||
emplaceNonZero(x, it, inserted, hash_value);
|
||||
}
|
||||
|
||||
|
||||
/// Copy the cell from another hash table. It is assumed that the cell is not zero, and also that there was no such key in the table yet.
|
||||
void ALWAYS_INLINE insertUniqueNonZero(const Cell * cell, size_t hash_value)
|
||||
{
|
||||
size_t place_value = findEmptyCell(cell->getKey(cell->getValue()), hash_value, grower.place(hash_value));
|
||||
@ -903,8 +912,8 @@ public:
|
||||
memset(buf, 0, grower.bufSize() * sizeof(*buf));
|
||||
}
|
||||
|
||||
/// После выполнения этой функции, таблицу можно только уничтожить,
|
||||
/// а также можно использовать методы size, empty, begin, end.
|
||||
/// After executing this function, the table can only be destroyed,
|
||||
/// and also you can use the methods `size`, `empty`, `begin`, `end`.
|
||||
void clearAndShrink()
|
||||
{
|
||||
destroyElements();
|
||||
|
@ -3,15 +3,15 @@
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
|
||||
|
||||
/** Замена хэш-таблицы для маленького количества (единицы) ключей.
|
||||
* Реализована в виде массива с линейным поиском.
|
||||
* Массив расположен внутри объекта.
|
||||
* Интерфейс является подмножеством интерфейса HashTable.
|
||||
/** Replacement of the hash table for a small number (<10) of keys.
|
||||
* Implemented as an array with linear search.
|
||||
* The array is located inside the object.
|
||||
* The interface is a subset of the HashTable interface.
|
||||
*
|
||||
* Вставка возможна только если метод full возвращает false.
|
||||
* При неизвестном количестве различных ключей,
|
||||
* вы должны проверять, не заполнена ли таблица,
|
||||
* и делать fallback в этом случае (например, использовать полноценную хэш-таблицу).
|
||||
* Insert is possible only if the `full` method returns false.
|
||||
* With an unknown number of different keys,
|
||||
* you should check if the table is not full,
|
||||
* and do a `fallback` in this case (for example, use a real hash table).
|
||||
*/
|
||||
|
||||
template
|
||||
@ -32,11 +32,11 @@ protected:
|
||||
using Self = SmallTable<Key, Cell, capacity>;
|
||||
using cell_type = Cell;
|
||||
|
||||
size_t m_size = 0; /// Количество элементов.
|
||||
Cell buf[capacity]; /// Кусок памяти для всех элементов.
|
||||
size_t m_size = 0; /// Amount of elements.
|
||||
Cell buf[capacity]; /// A piece of memory for all elements.
|
||||
|
||||
|
||||
/// Найти ячейку с тем же ключём или пустую ячейку, начиная с заданного места и далее по цепочке разрешения коллизий.
|
||||
/// Find a cell with the same key or an empty cell, starting from the specified position and then by the collision resolution chain.
|
||||
const Cell * ALWAYS_INLINE findCell(const Key & x) const
|
||||
{
|
||||
const Cell * it = buf;
|
||||
@ -188,8 +188,8 @@ protected:
|
||||
|
||||
|
||||
public:
|
||||
/** Таблица переполнена.
|
||||
* В переполненную таблицу ничего нельзя вставлять.
|
||||
/** The table is full.
|
||||
* You can not insert anything into the full table.
|
||||
*/
|
||||
bool full()
|
||||
{
|
||||
@ -197,7 +197,7 @@ public:
|
||||
}
|
||||
|
||||
|
||||
/// Вставить значение. В случае хоть сколько-нибудь сложных значений, лучше используйте функцию emplace.
|
||||
/// Insert the value. In the case of any more complex values, it is better to use the `emplace` function.
|
||||
std::pair<iterator, bool> ALWAYS_INLINE insert(const value_type & x)
|
||||
{
|
||||
std::pair<iterator, bool> res;
|
||||
@ -211,14 +211,14 @@ public:
|
||||
}
|
||||
|
||||
|
||||
/** Вставить ключ,
|
||||
* вернуть итератор на позицию, которую можно использовать для placement new значения,
|
||||
* а также флаг - был ли вставлен новый ключ.
|
||||
/** Insert the key,
|
||||
* return an iterator to a position that can be used for `placement new` of value,
|
||||
* as well as the flag - whether a new key was inserted.
|
||||
*
|
||||
* Вы обязаны сделать placement new значения, если был вставлен новый ключ,
|
||||
* так как при уничтожении хэш-таблицы для него будет вызываться деструктор!
|
||||
* You have to make `placement new` of value if you inserted a new key,
|
||||
* since when destroying a hash table, a destructor will be called for it!
|
||||
*
|
||||
* Пример использования:
|
||||
* Example usage:
|
||||
*
|
||||
* Map::iterator it;
|
||||
* bool inserted;
|
||||
@ -239,7 +239,7 @@ public:
|
||||
}
|
||||
|
||||
|
||||
/// То же самое, но вернуть false, если переполнено.
|
||||
/// Same, but return false if it's full.
|
||||
bool ALWAYS_INLINE tryEmplace(Key x, iterator & it, bool & inserted)
|
||||
{
|
||||
Cell * res = findCell(x);
|
||||
@ -257,7 +257,7 @@ public:
|
||||
}
|
||||
|
||||
|
||||
/// Скопировать ячейку из другой хэш-таблицы. Предполагается, что такого ключа в таблице ещё не было.
|
||||
/// Copy the cell from another hash table. It is assumed that there was no such key in the table yet.
|
||||
void ALWAYS_INLINE insertUnique(const Cell * cell)
|
||||
{
|
||||
memcpy(&buf[m_size], cell, sizeof(*cell));
|
||||
|
@ -3,21 +3,21 @@
|
||||
#include <Common/HashTable/HashTable.h>
|
||||
|
||||
|
||||
/** Двухуровневая хэш-таблица.
|
||||
* Представляет собой 256 (или 1 << BITS_FOR_BUCKET) маленьких хэш-таблиц (bucket-ов первого уровня).
|
||||
* Для определения, какую из них использовать, берётся один из байтов хэш-функции.
|
||||
/** Two-level hash table.
|
||||
* Represents 256 (or 1 << BITS_FOR_BUCKET) small hash tables (buckets of the first level).
|
||||
* To determine which one to use, one of the bytes of the hash function is taken.
|
||||
*
|
||||
* Обычно работает чуть-чуть медленнее простой хэш-таблицы.
|
||||
* Тем не менее, обладает преимуществами в некоторых случаях:
|
||||
* - если надо мерджить две хэш-таблицы вместе, то это можно легко распараллелить по bucket-ам;
|
||||
* - лаг при ресайзах размазан, так как маленькие хэш-таблицы ресайзятся по-отдельности;
|
||||
* - по идее, ресайзы кэш-локальны в большем диапазоне размеров.
|
||||
* Usually works a little slower than a simple hash table.
|
||||
* However, it has advantages in some cases:
|
||||
* - if you need to merge two hash tables together, then you can easily parallelize it by buckets;
|
||||
* - delay during resizes is amortized, since the small hash tables will be resized separately;
|
||||
* - in theory, resizes are cache-local in a larger range of sizes.
|
||||
*/
|
||||
|
||||
template <size_t initial_size_degree = 8>
|
||||
struct TwoLevelHashTableGrower : public HashTableGrower<initial_size_degree>
|
||||
{
|
||||
/// Увеличить размер хэш-таблицы.
|
||||
/// Increase the size of the hash table.
|
||||
void increaseSize()
|
||||
{
|
||||
this->size_degree += this->size_degree >= 15 ? 1 : 2;
|
||||
@ -52,7 +52,7 @@ public:
|
||||
|
||||
size_t hash(const Key & x) const { return Hash::operator()(x); }
|
||||
|
||||
/// NOTE Плохо для хэш-таблиц больше чем на 2^32 ячеек.
|
||||
/// NOTE Bad for hash tables with more than 2^32 cells.
|
||||
static size_t getBucketFromHash(size_t hash_value) { return (hash_value >> (32 - BITS_FOR_BUCKET)) & MAX_BUCKET; }
|
||||
|
||||
protected:
|
||||
@ -89,13 +89,13 @@ public:
|
||||
|
||||
TwoLevelHashTable() {}
|
||||
|
||||
/// Скопировать данные из другой (обычной) хэш-таблицы. У неё должна быть такая же хэш-функция.
|
||||
/// Copy the data from another (normal) hash table. It should have the same hash function.
|
||||
template <typename Source>
|
||||
TwoLevelHashTable(const Source & src)
|
||||
{
|
||||
typename Source::const_iterator it = src.begin();
|
||||
|
||||
/// Предполагается, что нулевой ключ (хранящийся отдельно) при итерировании идёт первым.
|
||||
/// It is assumed that the zero key (stored separately) is first in iteration order.
|
||||
if (it != src.end() && it.getPtr()->isZero(src))
|
||||
{
|
||||
insert(*it);
|
||||
@ -205,7 +205,7 @@ public:
|
||||
iterator end() { return { this, MAX_BUCKET, impls[MAX_BUCKET].end() }; }
|
||||
|
||||
|
||||
/// Вставить значение. В случае хоть сколько-нибудь сложных значений, лучше используйте функцию emplace.
|
||||
/// Insert a value. In the case of any more complex values, it is better to use the `emplace` function.
|
||||
std::pair<iterator, bool> ALWAYS_INLINE insert(const value_type & x)
|
||||
{
|
||||
size_t hash_value = hash(Cell::getKey(x));
|
||||
@ -220,14 +220,14 @@ public:
|
||||
}
|
||||
|
||||
|
||||
/** Вставить ключ,
|
||||
* вернуть итератор на позицию, которую можно использовать для placement new значения,
|
||||
* а также флаг - был ли вставлен новый ключ.
|
||||
/** Insert the key,
|
||||
* return an iterator to a position that can be used for `placement new` of value,
|
||||
* as well as the flag - whether a new key was inserted.
|
||||
*
|
||||
* Вы обязаны сделать placement new значения, если был вставлен новый ключ,
|
||||
* так как при уничтожении хэш-таблицы для него будет вызываться деструктор!
|
||||
* You have to make `placement new` values if you inserted a new key,
|
||||
* since when destroying a hash table, the destructor will be invoked for it!
|
||||
*
|
||||
* Пример использования:
|
||||
* Example usage:
|
||||
*
|
||||
* Map::iterator it;
|
||||
* bool inserted;
|
||||
@ -242,7 +242,7 @@ public:
|
||||
}
|
||||
|
||||
|
||||
/// То же самое, но с заранее вычисленным значением хэш-функции.
|
||||
/// Same, but with a precalculated values of hash function.
|
||||
void ALWAYS_INLINE emplace(Key x, iterator & it, bool & inserted, size_t hash_value)
|
||||
{
|
||||
size_t buck = getBucketFromHash(hash_value);
|
||||
|
@ -7,10 +7,10 @@
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
|
||||
/** Этот класс предоставляет способ, чтобы оценить погрешность результата применения алгоритма HyperLogLog.
|
||||
* Эмирические наблюдения показывают, что большие погрешности возникают при E < 5 * 2^precision, где
|
||||
* E - возвращаемое значение алгоритмом HyperLogLog, и precision - параметр точности HyperLogLog.
|
||||
* См. "HyperLogLog in Practice: Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm".
|
||||
/** This class provides a way to evaluate the error in the result of applying the HyperLogLog algorithm.
|
||||
* Empirical observations show that large errors occur at E < 5 * 2^precision, where
|
||||
* E is the return value of the HyperLogLog algorithm, and `precision` is the HyperLogLog precision parameter.
|
||||
* See "HyperLogLog in Practice: Algorithmic Engineering of a State of the Art Cardinality Estimation Algorithm".
|
||||
* (S. Heule et al., Proceedings of the EDBT 2013 Conference).
|
||||
*/
|
||||
template <typename BiasData>
|
||||
@ -22,14 +22,14 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Предельное количество уникальных значений до которого должна примениться поправка
|
||||
/// из алгоритма LinearCounting.
|
||||
/// Maximum number of unique values to which the correction should apply
|
||||
/// from the LinearCounting algorithm.
|
||||
static double getThreshold()
|
||||
{
|
||||
return BiasData::getThreshold();
|
||||
}
|
||||
|
||||
/// Вернуть оценку погрешности.
|
||||
/// Return the error estimate.
|
||||
static double getBias(double raw_estimate)
|
||||
{
|
||||
const auto & estimates = BiasData::getRawEstimates();
|
||||
@ -52,7 +52,7 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Получаем оценку погрешности путём линейной интерполяции.
|
||||
/// We get the error estimate by linear interpolation.
|
||||
size_t index = std::distance(estimates.begin(), it);
|
||||
|
||||
double estimate1 = estimates[index - 1];
|
||||
@ -60,7 +60,7 @@ public:
|
||||
|
||||
double bias1 = biases[index - 1];
|
||||
double bias2 = biases[index];
|
||||
/// Предполагается, что условие estimate1 < estimate2 всегда выполнено.
|
||||
/// It is assumed that the estimate1 < estimate2 condition is always satisfied.
|
||||
double slope = (bias2 - bias1) / (estimate2 - estimate1);
|
||||
|
||||
return bias1 + slope * (raw_estimate - estimate1);
|
||||
@ -68,7 +68,7 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
/// Статические проверки.
|
||||
/// Static checks.
|
||||
using TRawEstimatesRef = decltype(BiasData::getRawEstimates());
|
||||
using TRawEstimates = typename std::remove_reference<TRawEstimatesRef>::type;
|
||||
|
||||
@ -82,10 +82,10 @@ private:
|
||||
"Bias estimator has inconsistent data");
|
||||
};
|
||||
|
||||
/** Тривиальный случай HyperLogLogBiasEstimator: употребляется, если не хотим исправить
|
||||
* погрешность. Это имеет смысль при маленьких значениях параметра точности, например 5 или 12.
|
||||
* Тогда применяются поправки из оригинальной версии алгоритма HyperLogLog.
|
||||
* См. "HyperLogLog: The analysis of a near-optimal cardinality estimation algorithm"
|
||||
/** Trivial case of HyperLogLogBiasEstimator: used if we do not want to fix
|
||||
* error. This has meaning for small values of the accuracy parameter, for example 5 or 12.
|
||||
* Then the corrections from the original version of the HyperLogLog algorithm are applied.
|
||||
* See "HyperLogLog: The analysis of a near-optimal cardinality estimation algorithm"
|
||||
* (P. Flajolet et al., AOFA '07: Proceedings of the 2007 International Conference on Analysis
|
||||
* of Algorithms)
|
||||
*/
|
||||
|
@ -9,10 +9,10 @@ namespace DB
|
||||
{
|
||||
|
||||
|
||||
/** Для маленького количества ключей - массив фиксированного размера "на стеке".
|
||||
* Для большого - выделяется HyperLogLog.
|
||||
* Смотрите также более практичную реализацию в CombinedCardinalityEstimator.h,
|
||||
* где используется также хэш-таблица для множеств среднего размера.
|
||||
/** For a small number of keys - an array of fixed size "on the stack".
|
||||
* For large, HyperLogLog is allocated.
|
||||
* See also the more practical implementation in CombinedCardinalityEstimator.h,
|
||||
* where a hash table is also used for medium-sized sets.
|
||||
*/
|
||||
template
|
||||
<
|
||||
@ -39,7 +39,7 @@ private:
|
||||
{
|
||||
CurrentMemoryTracker::alloc(sizeof(large));
|
||||
|
||||
/// На время копирования данных из tiny, устанавливать значение large ещё нельзя (иначе оно перезатрёт часть данных).
|
||||
/// At the time of copying data from `tiny`, setting the value of `large` is still not possible (otherwise it will overwrite some data).
|
||||
Large * tmp_large = new Large;
|
||||
|
||||
for (const auto & x : small)
|
||||
@ -99,7 +99,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
/// Можно вызывать только для пустого объекта.
|
||||
/// You can only call for an empty object.
|
||||
void read(DB::ReadBuffer & in)
|
||||
{
|
||||
bool is_large;
|
||||
|
@ -3,24 +3,24 @@
|
||||
#include <Common/CounterInFile.h>
|
||||
|
||||
|
||||
/** Позволяет получать авто-инкрементное число, храня его в файле.
|
||||
* Предназначен для редких вызовов (не рассчитан на производительность).
|
||||
/** Allows to get an auto-increment number, storing it in a file.
|
||||
* Intended for rare calls (not designed for performance).
|
||||
*/
|
||||
class Increment
|
||||
{
|
||||
public:
|
||||
/// path - имя файла, включая путь
|
||||
/// path - the name of the file, including the path
|
||||
Increment(const std::string & path_) : counter(path_) {}
|
||||
|
||||
/** Получить следующее число.
|
||||
* Если параметр create_if_need не установлен в true, то
|
||||
* в файле уже должно быть записано какое-нибудь число (если нет - создайте файл вручную с нулём).
|
||||
/** Get the next number.
|
||||
* If the `create_if_need` parameter is not set to true, then
|
||||
* the file must already have a number written (if not - create the file manually with zero).
|
||||
*
|
||||
* Для защиты от race condition-ов между разными процессами, используются файловые блокировки.
|
||||
* (Но при первом создании файла race condition возможен, так что лучше создать файл заранее.)
|
||||
* To protect against race conditions between different processes, file locks are used.
|
||||
* (But when the first file is created, the race condition is possible, so it's better to create the file in advance.)
|
||||
*
|
||||
* locked_callback вызывается при заблокированном файле со счетчиком. В него передается новое значение.
|
||||
* locked_callback можно использовать, чтобы делать что-нибудь атомарно с увеличением счетчика (например, переименовывать файлы).
|
||||
* `locked_callback` is called when the counter file is locked. A new value is passed to it.
|
||||
* `locked_callback` can be used to do something atomically with the increment of the counter (for example, rename files).
|
||||
*/
|
||||
template <typename Callback>
|
||||
UInt64 get(Callback && locked_callback, bool create_if_need = false)
|
||||
@ -33,25 +33,25 @@ public:
|
||||
return getBunch(1, create_if_need);
|
||||
}
|
||||
|
||||
/// Посмотреть следующее значение.
|
||||
/// Peek the next value.
|
||||
UInt64 peek(bool create_if_need = false)
|
||||
{
|
||||
return getBunch(0, create_if_need);
|
||||
}
|
||||
|
||||
/** Получить следующее число и увеличить счетчик на count.
|
||||
* Если параметр create_if_need не установлен в true, то
|
||||
* в файле уже должно быть записано какое-нибудь число (если нет - создайте файл вручную с нулём).
|
||||
/** Get the next number and increase the counter by `count`.
|
||||
* If the `create_if_need` parameter is not set to true, then
|
||||
* the file should already have a number written (if not - create the file manually with zero).
|
||||
*
|
||||
* Для защиты от race condition-ов между разными процессами, используются файловые блокировки.
|
||||
* (Но при первом создании файла race condition возможен, так что лучше создать файл заранее.)
|
||||
* To protect against race conditions between different processes, file locks are used.
|
||||
* (But when the first file is created, the race condition is possible, so it's better to create the file in advance.)
|
||||
*/
|
||||
UInt64 getBunch(UInt64 count, bool create_if_need = false)
|
||||
{
|
||||
return static_cast<UInt64>(counter.add(static_cast<Int64>(count), create_if_need) - count + 1);
|
||||
}
|
||||
|
||||
/// Изменить путь к файлу.
|
||||
/// Change the path to the file.
|
||||
void setPath(std::string path_)
|
||||
{
|
||||
counter.setPath(path_);
|
||||
@ -65,23 +65,3 @@ public:
|
||||
private:
|
||||
CounterInFile counter;
|
||||
};
|
||||
|
||||
|
||||
/** То же самое, но без хранения в файле.
|
||||
*/
|
||||
struct SimpleIncrement : private boost::noncopyable
|
||||
{
|
||||
std::atomic<UInt64> value;
|
||||
|
||||
SimpleIncrement(UInt64 start = 0) : value(start) {}
|
||||
|
||||
void set(UInt64 new_value)
|
||||
{
|
||||
value = new_value;
|
||||
}
|
||||
|
||||
UInt64 get()
|
||||
{
|
||||
return ++value;
|
||||
}
|
||||
};
|
||||
|
@ -4,10 +4,11 @@
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
#include <map>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** Раскрывает в строке макросы из конфига.
|
||||
/** Apply substitutions from the macros in config to the string.
|
||||
*/
|
||||
class Macros
|
||||
{
|
||||
@ -15,8 +16,8 @@ public:
|
||||
Macros();
|
||||
Macros(const Poco::Util::AbstractConfiguration & config, const String & key);
|
||||
|
||||
/** Заменить в строке подстроки вида {macro_name} на значение для macro_name, полученное из конфига.
|
||||
* level - уровень рекурсии.
|
||||
/** Replace the substring of the form {macro_name} with the value for macro_name, obtained from the config file.
|
||||
* level - the level of recursion.
|
||||
*/
|
||||
String expand(const String & s, size_t level = 0) const;
|
||||
|
||||
|
@ -102,10 +102,10 @@ public:
|
||||
};
|
||||
|
||||
|
||||
/** Объект MemoryTracker довольно трудно протащить во все места, где выделяются существенные объёмы памяти.
|
||||
* Поэтому, используется thread-local указатель на используемый MemoryTracker или nullptr, если его не нужно использовать.
|
||||
* Этот указатель выставляется, когда в данном потоке следует отслеживать потребление памяти.
|
||||
* Таким образом, его нужно всего-лишь протащить во все потоки, в которых обрабатывается один запрос.
|
||||
/** The MemoryTracker object is quite difficult to pass to all places where significant amounts of memory are allocated.
|
||||
* Therefore, a thread-local pointer to used MemoryTracker is set, or nullptr if MemoryTracker does not need to be used.
|
||||
* This pointer is set when memory consumption is monitored in current thread.
|
||||
* So, you just need to pass it to all the threads that handle one request.
|
||||
*/
|
||||
extern __thread MemoryTracker * current_memory_tracker;
|
||||
|
||||
|
@ -12,20 +12,22 @@
|
||||
#endif
|
||||
|
||||
|
||||
/** Использует два способа оптимизации регулярного выражения:
|
||||
* 1. Если регулярное выражение является тривиальным (сводится к поиску подстроки в строке),
|
||||
* то заменяет поиск на strstr или strcasestr.
|
||||
* 2. Если регулярное выражение содержит безальтернативную подстроку достаточной длины,
|
||||
* то перед проверкой используется strstr или strcasestr достаточной длины;
|
||||
* регулярное выражение проверяется полностью только если подстрока найдена.
|
||||
* 3. В остальных случаях, используется движок re2.
|
||||
/** Uses two ways to optimize a regular expression:
|
||||
* 1. If the regular expression is trivial (reduces to finding a substring in a string),
|
||||
* then replaces the search with strstr or strcasestr.
|
||||
* 2. If the regular expression contains a non-alternative substring of sufficient length,
|
||||
* then before testing, strstr or strcasestr of sufficient length is used;
|
||||
* regular expression is only fully checked if a substring is found.
|
||||
* 3. In other cases, the re2 engine is used.
|
||||
*
|
||||
* Это имеет смысл, так как strstr и strcasestr в libc под Linux хорошо оптимизированы.
|
||||
* This makes sense, since strstr and strcasestr in libc for Linux are well optimized.
|
||||
*
|
||||
* Подходит, если одновременно выполнены следующие условия:
|
||||
* - если в большинстве вызовов, регулярное выражение не матчится;
|
||||
* - если регулярное выражение совместимо с движком re2;
|
||||
* - можете использовать на свой риск, так как, возможно, не все случаи учтены.
|
||||
* Suitable if the following conditions are simultaneously met:
|
||||
* - if in most calls, the regular expression does not match;
|
||||
* - if the regular expression is compatible with the re2 engine;
|
||||
* - you can use at your own risk, since, probably, not all cases are taken into account.
|
||||
*
|
||||
* NOTE: Multi-character metasymbols such as \Pl are handled incorrectly.
|
||||
*/
|
||||
|
||||
namespace OptimizedRegularExpressionDetails
|
||||
@ -82,7 +84,7 @@ public:
|
||||
|
||||
unsigned getNumberOfSubpatterns() const { return number_of_subpatterns; }
|
||||
|
||||
/// Получить регексп re2 или nullptr, если шаблон тривиален (для вывода в лог).
|
||||
/// Get the regexp re2 or nullptr if the pattern is trivial (for output to the log).
|
||||
const std::unique_ptr<RegexType>& getRE2() const { return re2; }
|
||||
|
||||
static void analyze(const std::string & regexp_, std::string & required_substring, bool & is_trivial, bool & required_substring_is_prefix);
|
||||
@ -105,4 +107,4 @@ private:
|
||||
|
||||
using OptimizedRegularExpression = OptimizedRegularExpressionImpl<true>;
|
||||
|
||||
#include "OptimizedRegularExpression.inl"
|
||||
#include "OptimizedRegularExpression.inl.h"
|
||||
|
@ -1,431 +0,0 @@
|
||||
#include <iostream>
|
||||
|
||||
#include <Poco/Exception.h>
|
||||
|
||||
#include <Common/OptimizedRegularExpression.h>
|
||||
|
||||
|
||||
#define MIN_LENGTH_FOR_STRSTR 3
|
||||
#define MAX_SUBPATTERNS 5
|
||||
|
||||
template <bool b>
|
||||
void OptimizedRegularExpressionImpl<b>::analyze(
|
||||
const std::string & regexp,
|
||||
std::string & required_substring,
|
||||
bool & is_trivial,
|
||||
bool & required_substring_is_prefix)
|
||||
{
|
||||
/** Выражение тривиально, если в нём все метасимволы эскейплены.
|
||||
* Безальтернативная строка - это
|
||||
* строка вне скобок,
|
||||
* в которой все метасимволы эскейплены,
|
||||
* а также если вне скобок нет '|',
|
||||
* а также избегаются подстроки вида http:// или www.
|
||||
*/
|
||||
const char * begin = regexp.data();
|
||||
const char * pos = begin;
|
||||
const char * end = regexp.data() + regexp.size();
|
||||
int depth = 0;
|
||||
is_trivial = true;
|
||||
required_substring_is_prefix = false;
|
||||
required_substring.clear();
|
||||
bool has_alternative_on_depth_0 = false;
|
||||
|
||||
/// Подстрока с позицией.
|
||||
typedef std::pair<std::string, size_t> Substring;
|
||||
|
||||
typedef std::vector<Substring> Substrings;
|
||||
Substrings trivial_substrings(1);
|
||||
Substring * last_substring = &trivial_substrings.back();
|
||||
|
||||
bool in_curly_braces = false;
|
||||
bool in_square_braces = false;
|
||||
|
||||
while (pos != end)
|
||||
{
|
||||
switch (*pos)
|
||||
{
|
||||
case '\0':
|
||||
pos = end;
|
||||
break;
|
||||
|
||||
case '\\':
|
||||
{
|
||||
++pos;
|
||||
if (pos == end)
|
||||
break;
|
||||
|
||||
switch (*pos)
|
||||
{
|
||||
case '|': case '(': case ')': case '^': case '$': case '.': case '[': case '?': case '*': case '+': case '{':
|
||||
if (depth == 0 && !in_curly_braces && !in_square_braces)
|
||||
{
|
||||
if (last_substring->first.empty())
|
||||
last_substring->second = pos - begin;
|
||||
last_substring->first.push_back(*pos);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
/// все остальные escape-последовательности не поддерживаем
|
||||
is_trivial = false;
|
||||
if (!last_substring->first.empty())
|
||||
{
|
||||
trivial_substrings.resize(trivial_substrings.size() + 1);
|
||||
last_substring = &trivial_substrings.back();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
++pos;
|
||||
break;
|
||||
}
|
||||
|
||||
case '|':
|
||||
if (depth == 0)
|
||||
has_alternative_on_depth_0 = true;
|
||||
is_trivial = false;
|
||||
if (!in_square_braces && !last_substring->first.empty())
|
||||
{
|
||||
trivial_substrings.resize(trivial_substrings.size() + 1);
|
||||
last_substring = &trivial_substrings.back();
|
||||
}
|
||||
++pos;
|
||||
break;
|
||||
|
||||
case '(':
|
||||
if (!in_square_braces)
|
||||
{
|
||||
++depth;
|
||||
is_trivial = false;
|
||||
if (!last_substring->first.empty())
|
||||
{
|
||||
trivial_substrings.resize(trivial_substrings.size() + 1);
|
||||
last_substring = &trivial_substrings.back();
|
||||
}
|
||||
}
|
||||
++pos;
|
||||
break;
|
||||
|
||||
case '[':
|
||||
in_square_braces = true;
|
||||
++depth;
|
||||
is_trivial = false;
|
||||
if (!last_substring->first.empty())
|
||||
{
|
||||
trivial_substrings.resize(trivial_substrings.size() + 1);
|
||||
last_substring = &trivial_substrings.back();
|
||||
}
|
||||
++pos;
|
||||
break;
|
||||
|
||||
case ']':
|
||||
if (!in_square_braces)
|
||||
goto ordinary;
|
||||
|
||||
in_square_braces = false;
|
||||
--depth;
|
||||
is_trivial = false;
|
||||
if (!last_substring->first.empty())
|
||||
{
|
||||
trivial_substrings.resize(trivial_substrings.size() + 1);
|
||||
last_substring = &trivial_substrings.back();
|
||||
}
|
||||
++pos;
|
||||
break;
|
||||
|
||||
case ')':
|
||||
if (!in_square_braces)
|
||||
{
|
||||
--depth;
|
||||
is_trivial = false;
|
||||
if (!last_substring->first.empty())
|
||||
{
|
||||
trivial_substrings.resize(trivial_substrings.size() + 1);
|
||||
last_substring = &trivial_substrings.back();
|
||||
}
|
||||
}
|
||||
++pos;
|
||||
break;
|
||||
|
||||
case '^': case '$': case '.': case '+':
|
||||
is_trivial = false;
|
||||
if (!last_substring->first.empty() && !in_square_braces)
|
||||
{
|
||||
trivial_substrings.resize(trivial_substrings.size() + 1);
|
||||
last_substring = &trivial_substrings.back();
|
||||
}
|
||||
++pos;
|
||||
break;
|
||||
|
||||
/// Квантификаторы, допускающие нулевое количество.
|
||||
case '{':
|
||||
in_curly_braces = true;
|
||||
case '?': case '*':
|
||||
is_trivial = false;
|
||||
if (!last_substring->first.empty() && !in_square_braces)
|
||||
{
|
||||
last_substring->first.resize(last_substring->first.size() - 1);
|
||||
trivial_substrings.resize(trivial_substrings.size() + 1);
|
||||
last_substring = &trivial_substrings.back();
|
||||
}
|
||||
++pos;
|
||||
break;
|
||||
|
||||
case '}':
|
||||
if (!in_curly_braces)
|
||||
goto ordinary;
|
||||
|
||||
in_curly_braces = false;
|
||||
++pos;
|
||||
break;
|
||||
|
||||
ordinary: /// Обычный, не заэскейпленный символ.
|
||||
default:
|
||||
if (depth == 0 && !in_curly_braces && !in_square_braces)
|
||||
{
|
||||
if (last_substring->first.empty())
|
||||
last_substring->second = pos - begin;
|
||||
last_substring->first.push_back(*pos);
|
||||
}
|
||||
++pos;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (last_substring && last_substring->first.empty())
|
||||
trivial_substrings.pop_back();
|
||||
|
||||
if (!is_trivial)
|
||||
{
|
||||
if (!has_alternative_on_depth_0)
|
||||
{
|
||||
/** Выберем безальтернативную подстроку максимальной длины, среди префиксов,
|
||||
* или безальтернативную подстроку максимальной длины.
|
||||
*/
|
||||
size_t max_length = 0;
|
||||
Substrings::const_iterator candidate_it = trivial_substrings.begin();
|
||||
for (Substrings::const_iterator it = trivial_substrings.begin(); it != trivial_substrings.end(); ++it)
|
||||
{
|
||||
if (((it->second == 0 && candidate_it->second != 0)
|
||||
|| ((it->second == 0) == (candidate_it->second == 0) && it->first.size() > max_length))
|
||||
/// Тюнинг для предметной области
|
||||
&& (it->first.size() > strlen("://") || strncmp(it->first.data(), "://", strlen("://")))
|
||||
&& (it->first.size() > strlen("http://") || strncmp(it->first.data(), "http", strlen("http")))
|
||||
&& (it->first.size() > strlen("www.") || strncmp(it->first.data(), "www", strlen("www")))
|
||||
&& (it->first.size() > strlen("Windows ") || strncmp(it->first.data(), "Windows ", strlen("Windows "))))
|
||||
{
|
||||
max_length = it->first.size();
|
||||
candidate_it = it;
|
||||
}
|
||||
}
|
||||
|
||||
if (max_length >= MIN_LENGTH_FOR_STRSTR)
|
||||
{
|
||||
required_substring = candidate_it->first;
|
||||
required_substring_is_prefix = candidate_it->second == 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
required_substring = trivial_substrings.front().first;
|
||||
required_substring_is_prefix = trivial_substrings.front().second == 0;
|
||||
}
|
||||
|
||||
/* std::cerr
|
||||
<< "regexp: " << regexp
|
||||
<< ", is_trivial: " << is_trivial
|
||||
<< ", required_substring: " << required_substring
|
||||
<< ", required_substring_is_prefix: " << required_substring_is_prefix
|
||||
<< std::endl;*/
|
||||
}
|
||||
|
||||
|
||||
template <bool b>
|
||||
OptimizedRegularExpressionImpl<b>::OptimizedRegularExpressionImpl(const std::string & regexp_, int options)
|
||||
{
|
||||
analyze(regexp_, required_substring, is_trivial, required_substring_is_prefix);
|
||||
|
||||
/// Поддерживаются 3 опции
|
||||
if (options & (~(RE_CASELESS | RE_NO_CAPTURE | RE_DOT_NL)))
|
||||
throw Poco::Exception("OptimizedRegularExpression: Unsupported option.");
|
||||
|
||||
is_case_insensitive = options & RE_CASELESS;
|
||||
bool is_no_capture = options & RE_NO_CAPTURE;
|
||||
bool is_dot_nl = options & RE_DOT_NL;
|
||||
|
||||
number_of_subpatterns = 0;
|
||||
if (!is_trivial)
|
||||
{
|
||||
/// Скомпилируем регулярное выражение re2.
|
||||
typename RegexType::Options options;
|
||||
|
||||
if (is_case_insensitive)
|
||||
options.set_case_sensitive(false);
|
||||
|
||||
if (is_dot_nl)
|
||||
options.set_dot_nl(true);
|
||||
|
||||
re2 = std::make_unique<RegexType>(regexp_, options);
|
||||
if (!re2->ok())
|
||||
throw Poco::Exception("OptimizedRegularExpression: cannot compile re2: " + regexp_ + ", error: " + re2->error());
|
||||
|
||||
if (!is_no_capture)
|
||||
{
|
||||
number_of_subpatterns = re2->NumberOfCapturingGroups();
|
||||
if (number_of_subpatterns > MAX_SUBPATTERNS)
|
||||
throw Poco::Exception("OptimizedRegularExpression: too many subpatterns in regexp: " + regexp_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <bool b>
|
||||
bool OptimizedRegularExpressionImpl<b>::match(const char * subject, size_t subject_size) const
|
||||
{
|
||||
if (is_trivial)
|
||||
{
|
||||
if (is_case_insensitive)
|
||||
return nullptr != strcasestr(subject, required_substring.data());
|
||||
else
|
||||
return nullptr != strstr(subject, required_substring.data());
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!required_substring.empty())
|
||||
{
|
||||
const char * pos;
|
||||
if (is_case_insensitive)
|
||||
pos = strcasestr(subject, required_substring.data());
|
||||
else
|
||||
pos = strstr(subject, required_substring.data());
|
||||
|
||||
if (nullptr == pos)
|
||||
return 0;
|
||||
}
|
||||
|
||||
return re2->Match(StringPieceType(subject, subject_size), 0, subject_size, RegexType::UNANCHORED, nullptr, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <bool b>
|
||||
bool OptimizedRegularExpressionImpl<b>::match(const char * subject, size_t subject_size, Match & match) const
|
||||
{
|
||||
if (is_trivial)
|
||||
{
|
||||
const char * pos;
|
||||
if (is_case_insensitive)
|
||||
pos = strcasestr(subject, required_substring.data());
|
||||
else
|
||||
pos = strstr(subject, required_substring.data());
|
||||
|
||||
if (pos == nullptr)
|
||||
return 0;
|
||||
else
|
||||
{
|
||||
match.offset = pos - subject;
|
||||
match.length = required_substring.size();
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!required_substring.empty())
|
||||
{
|
||||
const char * pos;
|
||||
if (is_case_insensitive)
|
||||
pos = strcasestr(subject, required_substring.data());
|
||||
else
|
||||
pos = strstr(subject, required_substring.data());
|
||||
|
||||
if (nullptr == pos)
|
||||
return 0;
|
||||
}
|
||||
|
||||
StringPieceType piece;
|
||||
|
||||
if (!RegexType::PartialMatch(StringPieceType(subject, subject_size), *re2, &piece))
|
||||
return 0;
|
||||
else
|
||||
{
|
||||
match.offset = piece.data() - subject;
|
||||
match.length = piece.length();
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <bool b>
|
||||
unsigned OptimizedRegularExpressionImpl<b>::match(const char * subject, size_t subject_size, MatchVec & matches, unsigned limit) const
|
||||
{
|
||||
matches.clear();
|
||||
|
||||
if (limit == 0)
|
||||
return 0;
|
||||
|
||||
if (limit > number_of_subpatterns + 1)
|
||||
limit = number_of_subpatterns + 1;
|
||||
|
||||
if (is_trivial)
|
||||
{
|
||||
const char * pos;
|
||||
if (is_case_insensitive)
|
||||
pos = strcasestr(subject, required_substring.data());
|
||||
else
|
||||
pos = strstr(subject, required_substring.data());
|
||||
|
||||
if (pos == nullptr)
|
||||
return 0;
|
||||
else
|
||||
{
|
||||
Match match;
|
||||
match.offset = pos - subject;
|
||||
match.length = required_substring.size();
|
||||
matches.push_back(match);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!required_substring.empty())
|
||||
{
|
||||
const char * pos;
|
||||
if (is_case_insensitive)
|
||||
pos = strcasestr(subject, required_substring.data());
|
||||
else
|
||||
pos = strstr(subject, required_substring.data());
|
||||
|
||||
if (nullptr == pos)
|
||||
return 0;
|
||||
}
|
||||
|
||||
StringPieceType pieces[MAX_SUBPATTERNS];
|
||||
|
||||
if (!re2->Match(StringPieceType(subject, subject_size), 0, subject_size, RegexType::UNANCHORED, pieces, limit))
|
||||
return 0;
|
||||
else
|
||||
{
|
||||
matches.resize(limit);
|
||||
for (size_t i = 0; i < limit; ++i)
|
||||
{
|
||||
if (pieces[i] != nullptr)
|
||||
{
|
||||
matches[i].offset = pieces[i].data() - subject;
|
||||
matches[i].length = pieces[i].length();
|
||||
}
|
||||
else
|
||||
{
|
||||
matches[i].offset = std::string::npos;
|
||||
matches[i].length = 0;
|
||||
}
|
||||
}
|
||||
return limit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef MIN_LENGTH_FOR_STRSTR
|
||||
#undef MAX_SUBPATTERNS
|
||||
|
433
dbms/src/Common/OptimizedRegularExpression.inl.h
Normal file
433
dbms/src/Common/OptimizedRegularExpression.inl.h
Normal file
@ -0,0 +1,433 @@
|
||||
#include <iostream>
|
||||
|
||||
#include <Poco/Exception.h>
|
||||
|
||||
#include <Common/OptimizedRegularExpression.h>
|
||||
|
||||
|
||||
#define MIN_LENGTH_FOR_STRSTR 3
|
||||
#define MAX_SUBPATTERNS 5
|
||||
|
||||
|
||||
template <bool thread_safe>
|
||||
void OptimizedRegularExpressionImpl<thread_safe>::analyze(
|
||||
const std::string & regexp,
|
||||
std::string & required_substring,
|
||||
bool & is_trivial,
|
||||
bool & required_substring_is_prefix)
|
||||
{
|
||||
/** The expression is trivial if all the metacharacters in it are escaped.
|
||||
* The non-alternative string is
|
||||
* a string outside parentheses,
|
||||
* in which all metacharacters are escaped,
|
||||
* and also if there are no '|' outside the brackets,
|
||||
* and also avoid substrings of the form `http://` or `www` and some other
|
||||
* (this is the hack for typical use case in Yandex.Metrica).
|
||||
*/
|
||||
const char * begin = regexp.data();
|
||||
const char * pos = begin;
|
||||
const char * end = regexp.data() + regexp.size();
|
||||
int depth = 0;
|
||||
is_trivial = true;
|
||||
required_substring_is_prefix = false;
|
||||
required_substring.clear();
|
||||
bool has_alternative_on_depth_0 = false;
|
||||
|
||||
/// Substring with a position.
|
||||
using Substring = std::pair<std::string, size_t>;
|
||||
using Substrings = std::vector<Substring>;
|
||||
|
||||
Substrings trivial_substrings(1);
|
||||
Substring * last_substring = &trivial_substrings.back();
|
||||
|
||||
bool in_curly_braces = false;
|
||||
bool in_square_braces = false;
|
||||
|
||||
while (pos != end)
|
||||
{
|
||||
switch (*pos)
|
||||
{
|
||||
case '\0':
|
||||
pos = end;
|
||||
break;
|
||||
|
||||
case '\\':
|
||||
{
|
||||
++pos;
|
||||
if (pos == end)
|
||||
break;
|
||||
|
||||
switch (*pos)
|
||||
{
|
||||
case '|': case '(': case ')': case '^': case '$': case '.': case '[': case '?': case '*': case '+': case '{':
|
||||
if (depth == 0 && !in_curly_braces && !in_square_braces)
|
||||
{
|
||||
if (last_substring->first.empty())
|
||||
last_substring->second = pos - begin;
|
||||
last_substring->first.push_back(*pos);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
/// all other escape sequences are not supported
|
||||
is_trivial = false;
|
||||
if (!last_substring->first.empty())
|
||||
{
|
||||
trivial_substrings.resize(trivial_substrings.size() + 1);
|
||||
last_substring = &trivial_substrings.back();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
++pos;
|
||||
break;
|
||||
}
|
||||
|
||||
case '|':
|
||||
if (depth == 0)
|
||||
has_alternative_on_depth_0 = true;
|
||||
is_trivial = false;
|
||||
if (!in_square_braces && !last_substring->first.empty())
|
||||
{
|
||||
trivial_substrings.resize(trivial_substrings.size() + 1);
|
||||
last_substring = &trivial_substrings.back();
|
||||
}
|
||||
++pos;
|
||||
break;
|
||||
|
||||
case '(':
|
||||
if (!in_square_braces)
|
||||
{
|
||||
++depth;
|
||||
is_trivial = false;
|
||||
if (!last_substring->first.empty())
|
||||
{
|
||||
trivial_substrings.resize(trivial_substrings.size() + 1);
|
||||
last_substring = &trivial_substrings.back();
|
||||
}
|
||||
}
|
||||
++pos;
|
||||
break;
|
||||
|
||||
case '[':
|
||||
in_square_braces = true;
|
||||
++depth;
|
||||
is_trivial = false;
|
||||
if (!last_substring->first.empty())
|
||||
{
|
||||
trivial_substrings.resize(trivial_substrings.size() + 1);
|
||||
last_substring = &trivial_substrings.back();
|
||||
}
|
||||
++pos;
|
||||
break;
|
||||
|
||||
case ']':
|
||||
if (!in_square_braces)
|
||||
goto ordinary;
|
||||
|
||||
in_square_braces = false;
|
||||
--depth;
|
||||
is_trivial = false;
|
||||
if (!last_substring->first.empty())
|
||||
{
|
||||
trivial_substrings.resize(trivial_substrings.size() + 1);
|
||||
last_substring = &trivial_substrings.back();
|
||||
}
|
||||
++pos;
|
||||
break;
|
||||
|
||||
case ')':
|
||||
if (!in_square_braces)
|
||||
{
|
||||
--depth;
|
||||
is_trivial = false;
|
||||
if (!last_substring->first.empty())
|
||||
{
|
||||
trivial_substrings.resize(trivial_substrings.size() + 1);
|
||||
last_substring = &trivial_substrings.back();
|
||||
}
|
||||
}
|
||||
++pos;
|
||||
break;
|
||||
|
||||
case '^': case '$': case '.': case '+':
|
||||
is_trivial = false;
|
||||
if (!last_substring->first.empty() && !in_square_braces)
|
||||
{
|
||||
trivial_substrings.resize(trivial_substrings.size() + 1);
|
||||
last_substring = &trivial_substrings.back();
|
||||
}
|
||||
++pos;
|
||||
break;
|
||||
|
||||
/// Quantifiers that allow a zero number of occurences.
|
||||
case '{':
|
||||
in_curly_braces = true;
|
||||
case '?': case '*':
|
||||
is_trivial = false;
|
||||
if (!last_substring->first.empty() && !in_square_braces)
|
||||
{
|
||||
last_substring->first.resize(last_substring->first.size() - 1);
|
||||
trivial_substrings.resize(trivial_substrings.size() + 1);
|
||||
last_substring = &trivial_substrings.back();
|
||||
}
|
||||
++pos;
|
||||
break;
|
||||
|
||||
case '}':
|
||||
if (!in_curly_braces)
|
||||
goto ordinary;
|
||||
|
||||
in_curly_braces = false;
|
||||
++pos;
|
||||
break;
|
||||
|
||||
ordinary: /// Normal, not escaped symbol.
|
||||
default:
|
||||
if (depth == 0 && !in_curly_braces && !in_square_braces)
|
||||
{
|
||||
if (last_substring->first.empty())
|
||||
last_substring->second = pos - begin;
|
||||
last_substring->first.push_back(*pos);
|
||||
}
|
||||
++pos;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (last_substring && last_substring->first.empty())
|
||||
trivial_substrings.pop_back();
|
||||
|
||||
if (!is_trivial)
|
||||
{
|
||||
if (!has_alternative_on_depth_0)
|
||||
{
|
||||
/** We choose the non-alternative substring of the maximum length, among the prefixes,
|
||||
* or a non-alternative substring of maximum length.
|
||||
*/
|
||||
size_t max_length = 0;
|
||||
Substrings::const_iterator candidate_it = trivial_substrings.begin();
|
||||
for (Substrings::const_iterator it = trivial_substrings.begin(); it != trivial_substrings.end(); ++it)
|
||||
{
|
||||
if (((it->second == 0 && candidate_it->second != 0)
|
||||
|| ((it->second == 0) == (candidate_it->second == 0) && it->first.size() > max_length))
|
||||
/// Tuning for typical usage domain
|
||||
&& (it->first.size() > strlen("://") || strncmp(it->first.data(), "://", strlen("://")))
|
||||
&& (it->first.size() > strlen("http://") || strncmp(it->first.data(), "http", strlen("http")))
|
||||
&& (it->first.size() > strlen("www.") || strncmp(it->first.data(), "www", strlen("www")))
|
||||
&& (it->first.size() > strlen("Windows ") || strncmp(it->first.data(), "Windows ", strlen("Windows "))))
|
||||
{
|
||||
max_length = it->first.size();
|
||||
candidate_it = it;
|
||||
}
|
||||
}
|
||||
|
||||
if (max_length >= MIN_LENGTH_FOR_STRSTR)
|
||||
{
|
||||
required_substring = candidate_it->first;
|
||||
required_substring_is_prefix = candidate_it->second == 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
required_substring = trivial_substrings.front().first;
|
||||
required_substring_is_prefix = trivial_substrings.front().second == 0;
|
||||
}
|
||||
|
||||
/* std::cerr
|
||||
<< "regexp: " << regexp
|
||||
<< ", is_trivial: " << is_trivial
|
||||
<< ", required_substring: " << required_substring
|
||||
<< ", required_substring_is_prefix: " << required_substring_is_prefix
|
||||
<< std::endl;*/
|
||||
}
|
||||
|
||||
|
||||
template <bool thread_safe>
|
||||
OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(const std::string & regexp_, int options)
|
||||
{
|
||||
analyze(regexp_, required_substring, is_trivial, required_substring_is_prefix);
|
||||
|
||||
/// Just three following options are supported
|
||||
if (options & (~(RE_CASELESS | RE_NO_CAPTURE | RE_DOT_NL)))
|
||||
throw Poco::Exception("OptimizedRegularExpression: Unsupported option.");
|
||||
|
||||
is_case_insensitive = options & RE_CASELESS;
|
||||
bool is_no_capture = options & RE_NO_CAPTURE;
|
||||
bool is_dot_nl = options & RE_DOT_NL;
|
||||
|
||||
number_of_subpatterns = 0;
|
||||
if (!is_trivial)
|
||||
{
|
||||
/// Compile the re2 regular expression.
|
||||
typename RegexType::Options options;
|
||||
|
||||
if (is_case_insensitive)
|
||||
options.set_case_sensitive(false);
|
||||
|
||||
if (is_dot_nl)
|
||||
options.set_dot_nl(true);
|
||||
|
||||
re2 = std::make_unique<RegexType>(regexp_, options);
|
||||
if (!re2->ok())
|
||||
throw Poco::Exception("OptimizedRegularExpression: cannot compile re2: " + regexp_ + ", error: " + re2->error());
|
||||
|
||||
if (!is_no_capture)
|
||||
{
|
||||
number_of_subpatterns = re2->NumberOfCapturingGroups();
|
||||
if (number_of_subpatterns > MAX_SUBPATTERNS)
|
||||
throw Poco::Exception("OptimizedRegularExpression: too many subpatterns in regexp: " + regexp_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <bool thread_safe>
|
||||
bool OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, size_t subject_size) const
|
||||
{
|
||||
if (is_trivial)
|
||||
{
|
||||
if (is_case_insensitive)
|
||||
return nullptr != strcasestr(subject, required_substring.data());
|
||||
else
|
||||
return nullptr != strstr(subject, required_substring.data());
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!required_substring.empty())
|
||||
{
|
||||
const char * pos;
|
||||
if (is_case_insensitive)
|
||||
pos = strcasestr(subject, required_substring.data());
|
||||
else
|
||||
pos = strstr(subject, required_substring.data());
|
||||
|
||||
if (nullptr == pos)
|
||||
return 0;
|
||||
}
|
||||
|
||||
return re2->Match(StringPieceType(subject, subject_size), 0, subject_size, RegexType::UNANCHORED, nullptr, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <bool thread_safe>
|
||||
bool OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, size_t subject_size, Match & match) const
|
||||
{
|
||||
if (is_trivial)
|
||||
{
|
||||
const char * pos;
|
||||
if (is_case_insensitive)
|
||||
pos = strcasestr(subject, required_substring.data());
|
||||
else
|
||||
pos = strstr(subject, required_substring.data());
|
||||
|
||||
if (pos == nullptr)
|
||||
return 0;
|
||||
else
|
||||
{
|
||||
match.offset = pos - subject;
|
||||
match.length = required_substring.size();
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!required_substring.empty())
|
||||
{
|
||||
const char * pos;
|
||||
if (is_case_insensitive)
|
||||
pos = strcasestr(subject, required_substring.data());
|
||||
else
|
||||
pos = strstr(subject, required_substring.data());
|
||||
|
||||
if (nullptr == pos)
|
||||
return 0;
|
||||
}
|
||||
|
||||
StringPieceType piece;
|
||||
|
||||
if (!RegexType::PartialMatch(StringPieceType(subject, subject_size), *re2, &piece))
|
||||
return 0;
|
||||
else
|
||||
{
|
||||
match.offset = piece.data() - subject;
|
||||
match.length = piece.length();
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <bool thread_safe>
|
||||
unsigned OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, size_t subject_size, MatchVec & matches, unsigned limit) const
|
||||
{
|
||||
matches.clear();
|
||||
|
||||
if (limit == 0)
|
||||
return 0;
|
||||
|
||||
if (limit > number_of_subpatterns + 1)
|
||||
limit = number_of_subpatterns + 1;
|
||||
|
||||
if (is_trivial)
|
||||
{
|
||||
const char * pos;
|
||||
if (is_case_insensitive)
|
||||
pos = strcasestr(subject, required_substring.data());
|
||||
else
|
||||
pos = strstr(subject, required_substring.data());
|
||||
|
||||
if (pos == nullptr)
|
||||
return 0;
|
||||
else
|
||||
{
|
||||
Match match;
|
||||
match.offset = pos - subject;
|
||||
match.length = required_substring.size();
|
||||
matches.push_back(match);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!required_substring.empty())
|
||||
{
|
||||
const char * pos;
|
||||
if (is_case_insensitive)
|
||||
pos = strcasestr(subject, required_substring.data());
|
||||
else
|
||||
pos = strstr(subject, required_substring.data());
|
||||
|
||||
if (nullptr == pos)
|
||||
return 0;
|
||||
}
|
||||
|
||||
StringPieceType pieces[MAX_SUBPATTERNS];
|
||||
|
||||
if (!re2->Match(StringPieceType(subject, subject_size), 0, subject_size, RegexType::UNANCHORED, pieces, limit))
|
||||
return 0;
|
||||
else
|
||||
{
|
||||
matches.resize(limit);
|
||||
for (size_t i = 0; i < limit; ++i)
|
||||
{
|
||||
if (pieces[i] != nullptr)
|
||||
{
|
||||
matches[i].offset = pieces[i].data() - subject;
|
||||
matches[i].length = pieces[i].length();
|
||||
}
|
||||
else
|
||||
{
|
||||
matches[i].offset = std::string::npos;
|
||||
matches[i].length = 0;
|
||||
}
|
||||
}
|
||||
return limit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef MIN_LENGTH_FOR_STRSTR
|
||||
#undef MAX_SUBPATTERNS
|
||||
|
@ -19,33 +19,33 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** Динамический массив для POD-типов.
|
||||
* Предназначен для небольшого количества больших массивов (а не большого количества маленьких).
|
||||
* А точнее - для использования в ColumnVector.
|
||||
* Отличается от std::vector тем, что не инициализирует элементы.
|
||||
/** A dynamic array for POD types.
|
||||
* Designed for a small number of large arrays (rather than a lot of small ones).
|
||||
* To be more precise - for use in ColumnVector.
|
||||
* It differs from std::vector in that it does not initialize the elements.
|
||||
*
|
||||
* Сделан некопируемым, чтобы не было случайных копий. Скопировать данные можно с помощью метода assign.
|
||||
* Made noncopyable so that there are no accidential copies. You can copy the data using `assign` method.
|
||||
*
|
||||
* Поддерживается только часть интерфейса std::vector.
|
||||
* Only part of the std::vector interface is supported.
|
||||
*
|
||||
* Конструктор по-умолчанию создаёт пустой объект, который не выделяет память.
|
||||
* Затем выделяется память минимум в INITIAL_SIZE байт.
|
||||
* The default constructor creates an empty object that does not allocate memory.
|
||||
* Then the memory is allocated at least INITIAL_SIZE bytes.
|
||||
*
|
||||
* Если вставлять элементы push_back-ом, не делая reserve, то PODArray примерно в 2.5 раза быстрее std::vector.
|
||||
* If you insert elements with push_back, without making a `reserve`, then PODArray is about 2.5 times faster than std::vector.
|
||||
*
|
||||
* Шаблонный параметр pad_right - всегда выделять в конце массива столько неиспользуемых байт.
|
||||
* Может использоваться для того, чтобы делать оптимистичное чтение, запись, копирование невыровненными SIMD-инструкциями.
|
||||
* The template parameter `pad_right` - always allocate at the end of the array as many unused bytes.
|
||||
* Can be used to make optimistic reading, writing, copying with unaligned SIMD instructions.
|
||||
*/
|
||||
template <typename T, size_t INITIAL_SIZE = 4096, typename TAllocator = Allocator<false>, size_t pad_right_ = 0>
|
||||
class PODArray : private boost::noncopyable, private TAllocator /// empty base optimization
|
||||
{
|
||||
private:
|
||||
/// Округление padding-а вверх до целого количества элементов, чтобы упростить арифметику.
|
||||
/// Round padding up to an whole number of elements to simplify arithmetic.
|
||||
static constexpr size_t pad_right = (pad_right_ + sizeof(T) - 1) / sizeof(T) * sizeof(T);
|
||||
|
||||
char * c_start = nullptr;
|
||||
char * c_end = nullptr;
|
||||
char * c_end_of_storage = nullptr; /// Не включает в себя pad_right.
|
||||
char * c_end_of_storage = nullptr; /// Does not include pad_right.
|
||||
|
||||
T * t_start() { return reinterpret_cast<T *>(c_start); }
|
||||
T * t_end() { return reinterpret_cast<T *>(c_end); }
|
||||
@ -55,10 +55,10 @@ private:
|
||||
const T * t_end() const { return reinterpret_cast<const T *>(c_end); }
|
||||
const T * t_end_of_storage() const { return reinterpret_cast<const T *>(c_end_of_storage); }
|
||||
|
||||
/// Количество памяти, занимаемое num_elements элементов.
|
||||
/// The amount of memory occupied by the num_elements of the elements.
|
||||
static size_t byte_size(size_t num_elements) { return num_elements * sizeof(T); }
|
||||
|
||||
/// Минимальное количество памяти, которое нужно выделить для num_elements элементов, включая padding.
|
||||
/// Minimum amount of memory to allocate for num_elements, including padding.
|
||||
static size_t minimum_memory_for_elements(size_t num_elements) { return byte_size(num_elements) + pad_right; }
|
||||
|
||||
void alloc_for_num_elements(size_t num_elements)
|
||||
@ -112,7 +112,7 @@ public:
|
||||
|
||||
size_t allocated_size() const { return c_end_of_storage - c_start + pad_right; }
|
||||
|
||||
/// Просто typedef нельзя, так как возникает неоднозначность для конструкторов и функций assign.
|
||||
/// You can not just use `typedef`, because there is ambiguity for the constructors and `assign` functions.
|
||||
struct iterator : public boost::iterator_adaptor<iterator, T*>
|
||||
{
|
||||
iterator() {}
|
||||
@ -209,7 +209,7 @@ public:
|
||||
c_end = c_start + byte_size(n);
|
||||
}
|
||||
|
||||
/// Как resize, но обнуляет новые элементы.
|
||||
/// Same as resize, but zeroes new elements.
|
||||
void resize_fill(size_t n)
|
||||
{
|
||||
size_t old_size = size();
|
||||
@ -261,7 +261,7 @@ public:
|
||||
c_end -= byte_size(1);
|
||||
}
|
||||
|
||||
/// Не вставляйте в массив кусок самого себя. Потому что при ресайзе, итераторы на самого себя могут инвалидироваться.
|
||||
/// Do not insert into the array a piece of itself. Because with the resize, the iterators on themselves can be invalidated.
|
||||
template <typename It1, typename It2>
|
||||
void insert(It1 from_begin, It2 from_end)
|
||||
{
|
||||
@ -458,7 +458,7 @@ void swap(PODArray<T, INITIAL_SIZE, TAllocator, pad_right_> & lhs, PODArray<T, I
|
||||
lhs.swap(rhs);
|
||||
}
|
||||
|
||||
/** Для столбцов. Padding-а хватает, чтобы читать и писать xmm-регистр по адресу последнего элемента. */
|
||||
/** For columns. Padding is enough to read and write xmm-register at the address of the last element. */
|
||||
template <typename T, size_t INITIAL_SIZE = 4096, typename TAllocator = Allocator<false>>
|
||||
using PaddedPODArray = PODArray<T, INITIAL_SIZE, TAllocator, 15>;
|
||||
|
||||
|
@ -8,8 +8,17 @@
|
||||
#include <common/logger_useful.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
/** Класс, от которого можно унаследоваться и получить пул чего-нибудь. Используется для пулов соединений с БД.
|
||||
* Наследник должен предоставить метод для создания нового объекта для помещения в пул.
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
/** A class from which you can inherit and get a pool of something. Used for database connection pools.
|
||||
* Descendant class must provide a method for creating a new object to place in the pool.
|
||||
*/
|
||||
|
||||
template <typename TObject>
|
||||
@ -22,7 +31,7 @@ public:
|
||||
|
||||
private:
|
||||
|
||||
/** Объект с флагом, используется ли он сейчас. */
|
||||
/** The object with the flag, whether it is currently used. */
|
||||
struct PooledObject
|
||||
{
|
||||
PooledObject(ObjectPtr object_, PoolBase & pool_)
|
||||
@ -37,8 +46,8 @@ private:
|
||||
|
||||
using Objects = std::vector<std::shared_ptr<PooledObject>>;
|
||||
|
||||
/** Помощник, который устанавливает флаг использования объекта, а в деструкторе - снимает,
|
||||
* а также уведомляет о событии с помощью condvar-а.
|
||||
/** The helper, which sets the flag for using the object, and in the destructor - removes,
|
||||
* and also notifies the event using condvar.
|
||||
*/
|
||||
struct PoolEntryHelper
|
||||
{
|
||||
@ -54,19 +63,19 @@ private:
|
||||
};
|
||||
|
||||
public:
|
||||
/** То, что выдаётся пользователю. */
|
||||
/** What is given to the user. */
|
||||
class Entry
|
||||
{
|
||||
public:
|
||||
friend class PoolBase<Object>;
|
||||
|
||||
Entry() {} /// Для отложенной инициализации.
|
||||
Entry() {} /// For deferred initialization.
|
||||
|
||||
/** Объект Entry защищает ресурс от использования другим потоком.
|
||||
* Следующие методы запрещены для rvalue, чтобы нельзя было написать подобное
|
||||
/** The `Entry` object protects the resource from being used by another thread.
|
||||
* The following methods are forbidden for `rvalue`, so you can not write a similar to
|
||||
*
|
||||
* auto q = pool.Get()->query("SELECT .."); // Упс, после этой строчки Entry уничтожился
|
||||
* q.execute(); // Кто-то еще может использовать этот Connection
|
||||
* auto q = pool.Get()->query("SELECT .."); // Oops, after this line Entry was destroyed
|
||||
* q.execute (); // Someone else can use this Connection
|
||||
*/
|
||||
Object * operator->() && = delete;
|
||||
const Object * operator->() const && = delete;
|
||||
@ -83,7 +92,7 @@ public:
|
||||
PoolBase * getPool() const
|
||||
{
|
||||
if (!data)
|
||||
throw DB::Exception("attempt to get pool from uninitialized entry");
|
||||
throw DB::Exception("Attempt to get pool from uninitialized entry", DB::ErrorCodes::LOGICAL_ERROR);
|
||||
return &data->data.pool;
|
||||
}
|
||||
|
||||
@ -95,7 +104,7 @@ public:
|
||||
|
||||
virtual ~PoolBase() {}
|
||||
|
||||
/** Выделяет объект для работы. При timeout < 0 таймаут бесконечный. */
|
||||
/** Allocates the object. Wait for free object in pool for 'timeout'. With 'timeout' < 0, the timeout is infinite. */
|
||||
Entry get(Poco::Timespan::TimeDiff timeout)
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
@ -131,13 +140,13 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
/** Максимальный размер пула. */
|
||||
/** The maximum size of the pool. */
|
||||
unsigned max_items;
|
||||
|
||||
/** Пул. */
|
||||
/** Pool. */
|
||||
Objects items;
|
||||
|
||||
/** Блокировка для доступа к пулу. */
|
||||
/** Lock to access the pool. */
|
||||
std::mutex mutex;
|
||||
std::condition_variable available;
|
||||
|
||||
@ -151,7 +160,7 @@ protected:
|
||||
items.reserve(max_items);
|
||||
}
|
||||
|
||||
/** Создает новый объект для помещения в пул. */
|
||||
/** Creates a new object to put into the pool. */
|
||||
virtual ObjectPtr allocObject() = 0;
|
||||
};
|
||||
|
||||
|
@ -13,18 +13,18 @@
|
||||
#include <Core/Defines.h>
|
||||
|
||||
|
||||
/** Поразрядная сортировка, обладает следующей функциональностью:
|
||||
* Может сортировать unsigned, signed числа, а также float-ы.
|
||||
* Может сортировать массив элементов фиксированной длины, которые содержат что-то ещё кроме ключа.
|
||||
* Настраиваемый размер разряда.
|
||||
/** Radix sort, has the following functionality:
|
||||
* Can sort unsigned, signed numbers, and floats.
|
||||
* Can sort an array of fixed length elements that contain something else besides the key.
|
||||
* Customizable radix size.
|
||||
*
|
||||
* LSB, stable.
|
||||
* NOTE Для некоторых приложений имеет смысл добавить MSB-radix-sort,
|
||||
* а также алгоритмы radix-select, radix-partial-sort, radix-get-permutation на его основе.
|
||||
* NOTE For some applications it makes sense to add MSB-radix-sort,
|
||||
* as well as radix-select, radix-partial-sort, radix-get-permutation algorithms based on it.
|
||||
*/
|
||||
|
||||
|
||||
/** Используется в качестве параметра шаблона. См. ниже.
|
||||
/** Used as a template parameter. See below.
|
||||
*/
|
||||
struct RadixSortMallocAllocator
|
||||
{
|
||||
@ -40,16 +40,16 @@ struct RadixSortMallocAllocator
|
||||
};
|
||||
|
||||
|
||||
/** Преобразование, которое переводит битовое представление ключа в такое целое беззнаковое число,
|
||||
* что отношение порядка над ключами будет соответствовать отношению порядка над полученными беззнаковыми числами.
|
||||
* Для float-ов это преобразование делает следующее:
|
||||
* если выставлен знаковый бит, то переворачивает все остальные биты.
|
||||
* При этом, NaN-ы оказываются больше всех нормальных чисел.
|
||||
/** A transformation that transforms the bit representation of a key into an unsigned integer number,
|
||||
* that the order relation over the keys will match the order relation over the obtained unsigned numbers.
|
||||
* For floats this conversion does the following:
|
||||
* if the signed bit is set, it flips all other bits.
|
||||
* In this case, NaN-s are bigger than all normal numbers.
|
||||
*/
|
||||
template <typename KeyBits>
|
||||
struct RadixSortFloatTransform
|
||||
{
|
||||
/// Стоит ли записывать результат в память, или лучше делать его каждый раз заново?
|
||||
/// Is it worth writing the result in memory, or is it better to do calculation every time again?
|
||||
static constexpr bool transform_is_simple = false;
|
||||
|
||||
static KeyBits forward(KeyBits x)
|
||||
@ -67,24 +67,24 @@ struct RadixSortFloatTransform
|
||||
template <typename Float>
|
||||
struct RadixSortFloatTraits
|
||||
{
|
||||
using Element = Float; /// Тип элемента. Это может быть структура с ключём и ещё каким-то payload-ом. Либо просто ключ.
|
||||
using Key = Float; /// Ключ, по которому нужно сортировать.
|
||||
using CountType = uint32_t; /// Тип для подсчёта гистограмм. В случае заведомо маленького количества элементов, может быть меньше чем size_t.
|
||||
using Element = Float; /// The type of the element. It can be a structure with a key and some other payload. Or just a key.
|
||||
using Key = Float; /// The key to sort.
|
||||
using CountType = uint32_t; /// Type for calculating histograms. In the case of a known small number of elements, it can be less than size_t.
|
||||
|
||||
/// Тип, в который переводится ключ, чтобы делать битовые операции. Это UInt такого же размера, как ключ.
|
||||
/// The type to which the key is transformed to do bit operations. This UInt is the same size as the key.
|
||||
using KeyBits = typename std::conditional<sizeof(Float) == 8, uint64_t, uint32_t>::type;
|
||||
|
||||
static constexpr size_t PART_SIZE_BITS = 8; /// Какими кусочками ключа в количестве бит делать один проход - перестановку массива.
|
||||
static constexpr size_t PART_SIZE_BITS = 8; /// With what pieces of the key, in bits, to do one pass - reshuffle of the array.
|
||||
|
||||
/// Преобразования ключа в KeyBits такое, что отношение порядка над ключём соответствует отношению порядка над KeyBits.
|
||||
/// Converting a key into KeyBits is such that the order relation over the key corresponds to the order relation over KeyBits.
|
||||
using Transform = RadixSortFloatTransform<KeyBits>;
|
||||
|
||||
/// Объект с функциями allocate и deallocate.
|
||||
/// Может быть использован, например, чтобы выделить память для временного массива на стеке.
|
||||
/// Для этого сам аллокатор создаётся на стеке.
|
||||
/// An object with the functions allocate and deallocate.
|
||||
/// Can be used, for example, to allocate memory for a temporary array on the stack.
|
||||
/// To do this, the allocator itself is created on the stack.
|
||||
using Allocator = RadixSortMallocAllocator;
|
||||
|
||||
/// Функция получения ключа из элемента массива.
|
||||
/// The function to get the key from an array element.
|
||||
static Key & extractKey(Element & elem) { return elem; }
|
||||
};
|
||||
|
||||
@ -122,7 +122,7 @@ struct RadixSortUIntTraits
|
||||
using Transform = RadixSortIdentityTransform<KeyBits>;
|
||||
using Allocator = RadixSortMallocAllocator;
|
||||
|
||||
/// Функция получения ключа из элемента массива.
|
||||
/// The function to get the key from an array element.
|
||||
static Key & extractKey(Element & elem) { return elem; }
|
||||
};
|
||||
|
||||
@ -139,7 +139,7 @@ struct RadixSortIntTraits
|
||||
using Transform = RadixSortSignedTransform<KeyBits>;
|
||||
using Allocator = RadixSortMallocAllocator;
|
||||
|
||||
/// Функция получения ключа из элемента массива.
|
||||
/// The function to get the key from an array element.
|
||||
static Key & extractKey(Element & elem) { return elem; }
|
||||
};
|
||||
|
||||
@ -172,19 +172,19 @@ private:
|
||||
public:
|
||||
static void execute(Element * arr, size_t size)
|
||||
{
|
||||
/// Если массив имеет размер меньше 256, то лучше использовать другой алгоритм.
|
||||
/// If the array is smaller than 256, then it is better to use another algorithm.
|
||||
|
||||
/// Здесь есть циклы по NUM_PASSES. Очень важно, что они разворачиваются в compile-time.
|
||||
/// There are loops of NUM_PASSES. It is very important that they are unfolded at compile-time.
|
||||
|
||||
/// Для каждого из NUM_PASSES кусков бит ключа, считаем, сколько раз каждое значение этого куска встретилось.
|
||||
/// For each of the NUM_PASSES bit ranges of the key, consider how many times each value of this bit range met.
|
||||
CountType histograms[HISTOGRAM_SIZE * NUM_PASSES] = {0};
|
||||
|
||||
typename Traits::Allocator allocator;
|
||||
|
||||
/// Будем делать несколько проходов по массиву. На каждом проходе, данные перекладываются в другой массив. Выделим этот временный массив.
|
||||
/// We will do several passes through the array. On each pass, the data is transferred to another array. Let's allocate this temporary array.
|
||||
Element * swap_buffer = reinterpret_cast<Element *>(allocator.allocate(size * sizeof(Element)));
|
||||
|
||||
/// Трансформируем массив и вычисляем гистограмму.
|
||||
/// Transform the array and calculate the histogram.
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if (!Traits::Transform::transform_is_simple)
|
||||
@ -195,7 +195,7 @@ public:
|
||||
}
|
||||
|
||||
{
|
||||
/// Заменяем гистограммы на суммы с накоплением: значение в позиции i равно сумме в предыдущих позициях минус один.
|
||||
/// Replace the histograms with the accumulated sums: the value in position i is the sum of the previous positions minus one.
|
||||
size_t sums[NUM_PASSES] = {0};
|
||||
|
||||
for (size_t i = 0; i < HISTOGRAM_SIZE; ++i)
|
||||
@ -209,7 +209,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
/// Перекладываем элементы в порядке начиная от младшего куска бит, и далее делаем несколько проходов по количеству кусков.
|
||||
/// Move the elements in the order starting from the least bit piece, and then do a few passes on the number of pieces.
|
||||
for (size_t j = 0; j < NUM_PASSES; ++j)
|
||||
{
|
||||
Element * writer = j % 2 ? arr : swap_buffer;
|
||||
@ -219,17 +219,18 @@ public:
|
||||
{
|
||||
size_t pos = getPart(j, keyToBits(Traits::extractKey(reader[i])));
|
||||
|
||||
/// Размещаем элемент на следующей свободной позиции.
|
||||
/// Place the element on the next free position.
|
||||
auto & dest = writer[++histograms[j * HISTOGRAM_SIZE + pos]];
|
||||
dest = reader[i];
|
||||
|
||||
/// На последнем перекладывании, делаем обратную трансформацию.
|
||||
/// On the last pass, we do the reverse transformation.
|
||||
if (!Traits::Transform::transform_is_simple && j == NUM_PASSES - 1)
|
||||
Traits::extractKey(dest) = bitsToKey(Traits::Transform::backward(keyToBits(Traits::extractKey(reader[i]))));
|
||||
}
|
||||
}
|
||||
|
||||
/// Если число проходов нечётное, то результирующий массив находится во временном буфере. Скопируем его на место исходного массива.
|
||||
/// If the number of passes is odd, the result array is in a temporary buffer. Copy it to the place of the original array.
|
||||
/// NOTE Sometimes it will be more optimal to provide non-destructive interface, that will not modify original array.
|
||||
if (NUM_PASSES % 2)
|
||||
memcpy(arr, swap_buffer, size * sizeof(Element));
|
||||
|
||||
|
@ -9,19 +9,19 @@ namespace DB
|
||||
{
|
||||
|
||||
|
||||
/** Позволяет запустить команду,
|
||||
* читать её stdout, stderr, писать в stdin,
|
||||
* дождаться завершения.
|
||||
/** Lets you run the command,
|
||||
* read it stdout and stderr; write to stdin;
|
||||
* wait for completion.
|
||||
*
|
||||
* Реализация похожа на функцию popen из POSIX (посмотреть можно в исходниках libc).
|
||||
* The implementation is similar to the popen function from POSIX (see libc source code).
|
||||
*
|
||||
* Наиболее важное отличие: использует vfork вместо fork.
|
||||
* Это сделано, потому что fork не работает (с ошибкой о нехватке памяти),
|
||||
* при некоторых настройках overcommit-а, если размер адресного пространства процесса больше половины количества доступной памяти.
|
||||
* Также, изменение memory map-ов - довольно ресурсоёмкая операция.
|
||||
* The most important difference: uses vfork instead of fork.
|
||||
* This is done because fork does not work (with a memory shortage error),
|
||||
* with some overcommit settings, if the address space of the process is more than half the amount of available memory.
|
||||
* Also, changing memory maps - a fairly resource-intensive operation.
|
||||
*
|
||||
* Второе отличие - позволяет работать одновременно и с stdin, и с stdout, и с stderr запущенного процесса,
|
||||
* а также узнать код и статус завершения.
|
||||
* The second difference - allows to work simultaneously with stdin, and with stdout, and with stderr of running process,
|
||||
* and also to obtain the return code and completion status.
|
||||
*/
|
||||
class ShellCommand
|
||||
{
|
||||
@ -34,20 +34,20 @@ private:
|
||||
static std::unique_ptr<ShellCommand> executeImpl(const char * filename, char * const argv[], bool pipe_stdin_only);
|
||||
|
||||
public:
|
||||
WriteBufferFromFile in; /// Если команда читает из stdin, то не забудьте вызвать in.close() после записи туда всех данных.
|
||||
WriteBufferFromFile in; /// If the command reads from stdin, do not forget to call in.close() after writing all the data there.
|
||||
ReadBufferFromFile out;
|
||||
ReadBufferFromFile err;
|
||||
|
||||
/// Выполнить команду с использованием /bin/sh -c
|
||||
/// Run the command using /bin/sh -c
|
||||
static std::unique_ptr<ShellCommand> execute(const std::string & command, bool pipe_stdin_only = false);
|
||||
|
||||
/// Выполнить исполняемый файл с указаннами аргументами. arguments - без argv[0].
|
||||
/// Run the executable with the specified arguments. `arguments` - without argv[0].
|
||||
static std::unique_ptr<ShellCommand> executeDirect(const std::string & path, const std::vector<std::string> & arguments);
|
||||
|
||||
/// Подождать завершения процесса, кинуть исключение, если код не 0 или если процесс был завершён не самостоятельно.
|
||||
/// Wait for the process to end, throw an exception if the code is not 0 or if the process was not completed by itself.
|
||||
void wait();
|
||||
|
||||
/// Подождать завершения процесса, узнать код возврата. Кинуть исключение, если процесс был завершён не самостоятельно.
|
||||
/// Wait for the process to finish, see the return code. To throw an exception if the process was not completed independently.
|
||||
int tryWait();
|
||||
};
|
||||
|
||||
|
@ -6,13 +6,13 @@
|
||||
#include <ext/function_traits.hpp>
|
||||
|
||||
|
||||
/** Простейший кэш для свободной функции.
|
||||
* Можете также передать статический метод класса или лямбду без захвата.
|
||||
* Размер неограничен. Значения не устаревают.
|
||||
* Для синхронизации используется mutex.
|
||||
* Подходит только для простейших случаев.
|
||||
/** The simplest cache for a free function.
|
||||
* You can also pass a static class method or lambda without captures.
|
||||
* The size is unlimited. Values are stored permanently and never evicted.
|
||||
* Mutex is used for synchronization.
|
||||
* Suitable only for the simplest cases.
|
||||
*
|
||||
* Использование:
|
||||
* Usage
|
||||
*
|
||||
* SimpleCache<decltype(func), &func> func_cached;
|
||||
* std::cerr << func_cached(args...);
|
||||
@ -41,7 +41,7 @@ public:
|
||||
return it->second;
|
||||
}
|
||||
|
||||
/// Сами вычисления делаются не под mutex-ом.
|
||||
/// The calculations themselves are not done under mutex.
|
||||
Result res = f(std::forward<Args>(args)...);
|
||||
|
||||
{
|
||||
|
24
dbms/src/Common/SimpleIncrement.h
Normal file
24
dbms/src/Common/SimpleIncrement.h
Normal file
@ -0,0 +1,24 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/Types.h>
|
||||
#include <atomic>
|
||||
|
||||
|
||||
/** Is used for numbering of files.
|
||||
*/
|
||||
struct SimpleIncrement
|
||||
{
|
||||
std::atomic<UInt64> value;
|
||||
|
||||
SimpleIncrement(UInt64 start = 0) : value(start) {}
|
||||
|
||||
void set(UInt64 new_value)
|
||||
{
|
||||
value = new_value;
|
||||
}
|
||||
|
||||
UInt64 get()
|
||||
{
|
||||
return ++value;
|
||||
}
|
||||
};
|
@ -1,21 +1,21 @@
|
||||
#pragma once
|
||||
|
||||
/** SipHash - быстрая криптографическая хэш функция для коротких строк.
|
||||
* Взято отсюда: https://www.131002.net/siphash/
|
||||
/** SipHash is a fast cryptographic hash function for short strings.
|
||||
* Taken from here: https://www.131002.net/siphash/
|
||||
*
|
||||
* Сделано два изменения:
|
||||
* - возвращает 128 бит, а не 64;
|
||||
* - сделано потоковой (можно вычислять по частям).
|
||||
* This is SipHash 2-4 variant.
|
||||
*
|
||||
* На коротких строках (URL, поисковые фразы) более чем в 3 раза быстрее MD5 от OpenSSL.
|
||||
* (~ 700 МБ/сек., 15 млн. строк в секунду)
|
||||
* Two changes are made:
|
||||
* - returns also 128 bits, not only 64;
|
||||
* - done streaming (can be calculated in parts).
|
||||
*
|
||||
* On short strings (URL, search phrases) more than 3 times faster than MD5 from OpenSSL.
|
||||
* (~ 700 MB/sec, 15 million strings per second)
|
||||
*/
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstddef>
|
||||
#include <Core/Types.h>
|
||||
#include <common/Types.h>
|
||||
|
||||
#define ROTL(x,b) static_cast<u64>( ((x) << (b)) | ( (x) >> (64 - (b))) )
|
||||
#define ROTL(x, b) static_cast<UInt64>(((x) << (b)) | ((x) >> (64 - (b))))
|
||||
|
||||
#define SIPROUND \
|
||||
do \
|
||||
@ -30,28 +30,25 @@
|
||||
class SipHash
|
||||
{
|
||||
private:
|
||||
using u64 = DB::UInt64;
|
||||
using u8 = DB::UInt8;
|
||||
/// State.
|
||||
UInt64 v0;
|
||||
UInt64 v1;
|
||||
UInt64 v2;
|
||||
UInt64 v3;
|
||||
|
||||
/// Состояние.
|
||||
u64 v0;
|
||||
u64 v1;
|
||||
u64 v2;
|
||||
u64 v3;
|
||||
/// How many bytes have been processed.
|
||||
UInt64 cnt;
|
||||
|
||||
/// Сколько байт обработано.
|
||||
u64 cnt;
|
||||
|
||||
/// Текущие 8 байт входных данных.
|
||||
/// The current 8 bytes of input data.
|
||||
union
|
||||
{
|
||||
u64 current_word;
|
||||
u8 current_bytes[8];
|
||||
UInt64 current_word;
|
||||
UInt8 current_bytes[8];
|
||||
};
|
||||
|
||||
void finalize()
|
||||
{
|
||||
/// В последний свободный байт пишем остаток от деления длины на 256.
|
||||
/// In the last free byte, we write the remainder of the division by 256.
|
||||
current_bytes[7] = cnt;
|
||||
|
||||
v3 ^= current_word;
|
||||
@ -67,10 +64,10 @@ private:
|
||||
}
|
||||
|
||||
public:
|
||||
/// Аргументы - seed.
|
||||
SipHash(u64 k0 = 0, u64 k1 = 0)
|
||||
/// Arguments - seed.
|
||||
SipHash(UInt64 k0 = 0, UInt64 k1 = 0)
|
||||
{
|
||||
/// Инициализируем состояние некоторыми случайными байтами и seed-ом.
|
||||
/// Initialize the state with some random bytes and seed.
|
||||
v0 = 0x736f6d6570736575ULL ^ k0;
|
||||
v1 = 0x646f72616e646f6dULL ^ k1;
|
||||
v2 = 0x6c7967656e657261ULL ^ k0;
|
||||
@ -80,11 +77,11 @@ public:
|
||||
current_word = 0;
|
||||
}
|
||||
|
||||
void update(const char * data, u64 size)
|
||||
void update(const char * data, UInt64 size)
|
||||
{
|
||||
const char * end = data + size;
|
||||
|
||||
/// Дообработаем остаток от предыдущего апдейта, если есть.
|
||||
/// We'll finish to process the remainder of the previous update, if any.
|
||||
if (cnt & 7)
|
||||
{
|
||||
while (cnt & 7 && data < end)
|
||||
@ -94,7 +91,7 @@ public:
|
||||
++cnt;
|
||||
}
|
||||
|
||||
/// Если всё ещё не хватает байт до восьмибайтового слова.
|
||||
/// If we still do not have enough bytes to an 8-byte word.
|
||||
if (cnt & 7)
|
||||
return;
|
||||
|
||||
@ -108,7 +105,7 @@ public:
|
||||
|
||||
while (data + 8 <= end)
|
||||
{
|
||||
current_word = *reinterpret_cast<const u64 *>(data);
|
||||
current_word = *reinterpret_cast<const UInt64 *>(data);
|
||||
|
||||
v3 ^= current_word;
|
||||
SIPROUND;
|
||||
@ -118,7 +115,7 @@ public:
|
||||
data += 8;
|
||||
}
|
||||
|
||||
/// Заполняем остаток, которого не хватает до восьмибайтового слова.
|
||||
/// Pad the remainder, which is missing up to an 8-byte word.
|
||||
current_word = 0;
|
||||
switch (end - data)
|
||||
{
|
||||
@ -133,23 +130,23 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
/// Получить результат в некотором виде. Это можно сделать только один раз!
|
||||
/// Get the result in some form. This can only be done once!
|
||||
|
||||
void get128(char * out)
|
||||
{
|
||||
finalize();
|
||||
reinterpret_cast<u64 *>(out)[0] = v0 ^ v1;
|
||||
reinterpret_cast<u64 *>(out)[1] = v2 ^ v3;
|
||||
reinterpret_cast<UInt64 *>(out)[0] = v0 ^ v1;
|
||||
reinterpret_cast<UInt64 *>(out)[1] = v2 ^ v3;
|
||||
}
|
||||
|
||||
void get128(u64 & lo, u64 & hi)
|
||||
void get128(UInt64 & lo, UInt64 & hi)
|
||||
{
|
||||
finalize();
|
||||
lo = v0 ^ v1;
|
||||
hi = v2 ^ v3;
|
||||
}
|
||||
|
||||
u64 get64()
|
||||
UInt64 get64()
|
||||
{
|
||||
finalize();
|
||||
return v0 ^ v1 ^ v2 ^ v3;
|
||||
@ -160,6 +157,7 @@ public:
|
||||
#undef ROTL
|
||||
#undef SIPROUND
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
inline void sipHash128(const char * data, const size_t size, char * out)
|
||||
{
|
||||
@ -168,7 +166,7 @@ inline void sipHash128(const char * data, const size_t size, char * out)
|
||||
hash.get128(out);
|
||||
}
|
||||
|
||||
inline DB::UInt64 sipHash64(const char * data, const size_t size)
|
||||
inline UInt64 sipHash64(const char * data, const size_t size)
|
||||
{
|
||||
SipHash hash;
|
||||
hash.update(data, size);
|
||||
@ -177,7 +175,7 @@ inline DB::UInt64 sipHash64(const char * data, const size_t size)
|
||||
|
||||
#include <string>
|
||||
|
||||
inline DB::UInt64 sipHash64(const std::string & s)
|
||||
inline UInt64 sipHash64(const std::string & s)
|
||||
{
|
||||
return sipHash64(s.data(), s.size());
|
||||
}
|
||||
|
@ -73,7 +73,7 @@ public:
|
||||
free_list = block;
|
||||
}
|
||||
|
||||
/// Размер выделенного пула в байтах
|
||||
/// The size of the allocated pool in bytes
|
||||
size_t size() const
|
||||
{
|
||||
return pool.size();
|
||||
|
288
dbms/src/Common/SpaceSaving.h
Normal file
288
dbms/src/Common/SpaceSaving.h
Normal file
@ -0,0 +1,288 @@
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/range/adaptor/reversed.hpp>
|
||||
|
||||
#include <Common/UInt128.h>
|
||||
#include <Common/HashTable/Hash.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/VarInt.h>
|
||||
|
||||
/*
|
||||
* Implementation of the Filtered Space-Saving for TopK streaming analysis.
|
||||
* http://www.l2f.inesc-id.pt/~fmmb/wiki/uploads/Work/misnis.ref0a.pdf
|
||||
* It implements suggested reduce-and-combine algorithm from Parallel Space Saving:
|
||||
* https://arxiv.org/pdf/1401.0702.pdf
|
||||
*/
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
template
|
||||
<
|
||||
typename TKey,
|
||||
typename HashKey = TKey,
|
||||
typename Hash = DefaultHash<HashKey>,
|
||||
typename Grower = HashTableGrower<>,
|
||||
typename Allocator = HashTableAllocator
|
||||
>
|
||||
class SpaceSaving
|
||||
{
|
||||
private:
|
||||
// Suggested constants in the paper "Finding top-k elements in data streams", chap 6. equation (24)
|
||||
// Round to nearest power of 2 for cheaper binning without modulo
|
||||
constexpr uint64_t nextAlphaSize (uint64_t x)
|
||||
{
|
||||
constexpr uint64_t ALPHA_MAP_ELEMENTS_PER_COUNTER = 6;
|
||||
return 1ULL<<(sizeof(uint64_t) * 8 - __builtin_clzll(x * ALPHA_MAP_ELEMENTS_PER_COUNTER));
|
||||
}
|
||||
|
||||
public:
|
||||
using Self = SpaceSaving<TKey, HashKey, Hash, Grower, Allocator>;
|
||||
|
||||
struct Counter
|
||||
{
|
||||
Counter() {}
|
||||
|
||||
Counter(const TKey & k, UInt64 c = 0, UInt64 e = 0, size_t h = 0)
|
||||
: key(k), slot(0), hash(h), count(c), error(e) {}
|
||||
|
||||
void write(WriteBuffer & wb) const
|
||||
{
|
||||
writeBinary(key, wb);
|
||||
writeVarUInt(count, wb);
|
||||
writeVarUInt(error, wb);
|
||||
}
|
||||
|
||||
void read(ReadBuffer & rb)
|
||||
{
|
||||
readBinary(key, rb);
|
||||
readVarUInt(count, rb);
|
||||
readVarUInt(error, rb);
|
||||
}
|
||||
|
||||
// greater() taking slot error into account
|
||||
bool operator> (const Counter & b) const
|
||||
{
|
||||
return (count > b.count) || (count == b.count && error < b.error);
|
||||
}
|
||||
|
||||
TKey key;
|
||||
size_t slot, hash;
|
||||
UInt64 count;
|
||||
UInt64 error;
|
||||
};
|
||||
|
||||
SpaceSaving(size_t c = 10) : alpha_map(nextAlphaSize(c)), m_capacity(c) {}
|
||||
~SpaceSaving() { destroyElements(); }
|
||||
|
||||
inline size_t size() const
|
||||
{
|
||||
return counter_list.size();
|
||||
}
|
||||
|
||||
inline size_t capacity() const
|
||||
{
|
||||
return m_capacity;
|
||||
}
|
||||
|
||||
void resize(size_t new_capacity)
|
||||
{
|
||||
counter_list.reserve(new_capacity);
|
||||
alpha_map.resize(nextAlphaSize(new_capacity));
|
||||
m_capacity = new_capacity;
|
||||
}
|
||||
|
||||
void insert(const TKey & key, UInt64 increment = 1, UInt64 error = 0)
|
||||
{
|
||||
// Increase weight of a key that already exists
|
||||
// It uses hashtable for both value mapping as a presence test (c_i != 0)
|
||||
auto hash = counter_map.hash(key);
|
||||
auto it = counter_map.find(key, hash);
|
||||
if (it != counter_map.end())
|
||||
{
|
||||
auto c = it->second;
|
||||
c->count += increment;
|
||||
c->error += error;
|
||||
percolate(c);
|
||||
return;
|
||||
}
|
||||
// Key doesn't exist, but can fit in the top K
|
||||
else if (unlikely(size() < capacity()))
|
||||
{
|
||||
auto c = new Counter(key, increment, error, hash);
|
||||
push(c);
|
||||
return;
|
||||
}
|
||||
|
||||
auto min = counter_list.back();
|
||||
const size_t alpha_mask = alpha_map.size() - 1;
|
||||
auto & alpha = alpha_map[hash & alpha_mask];
|
||||
if (alpha + increment < min->count)
|
||||
{
|
||||
alpha += increment;
|
||||
return;
|
||||
}
|
||||
|
||||
// Erase the current minimum element
|
||||
alpha_map[min->hash & alpha_mask] = min->count;
|
||||
it = counter_map.find(min->key, min->hash);
|
||||
|
||||
// Replace minimum with newly inserted element
|
||||
if (it != counter_map.end())
|
||||
{
|
||||
min->hash = hash;
|
||||
min->key = key;
|
||||
min->count = alpha + increment;
|
||||
min->error = alpha + error;
|
||||
percolate(min);
|
||||
|
||||
it->second = min;
|
||||
it->first = key;
|
||||
counter_map.reinsert(it, hash);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Parallel Space Saving reduction and combine step from:
|
||||
* https://arxiv.org/pdf/1401.0702.pdf
|
||||
*/
|
||||
void merge(const Self & rhs)
|
||||
{
|
||||
UInt64 m1 = 0;
|
||||
UInt64 m2 = 0;
|
||||
|
||||
if (size() == capacity())
|
||||
{
|
||||
m1 = counter_list.back()->count;
|
||||
}
|
||||
|
||||
if (rhs.size() == rhs.capacity())
|
||||
{
|
||||
m2 = rhs.counter_list.back()->count;
|
||||
}
|
||||
|
||||
/*
|
||||
* Updated algorithm to mutate current table in place
|
||||
* without mutating rhs table or creating new one
|
||||
* in the first step we expect that no elements overlap
|
||||
* and in the second sweep we correct the error if they do.
|
||||
*/
|
||||
if (m2 > 0)
|
||||
{
|
||||
for (auto counter : counter_list)
|
||||
{
|
||||
counter->count += m2;
|
||||
counter->error += m2;
|
||||
}
|
||||
}
|
||||
|
||||
// The list is sorted in descending order, we have to scan in reverse
|
||||
for (auto counter : boost::adaptors::reverse(rhs.counter_list))
|
||||
{
|
||||
if (counter_map.find(counter->key) != counter_map.end())
|
||||
{
|
||||
// Subtract m2 previously added, guaranteed not negative
|
||||
insert(counter->key, counter->count - m2, counter->error - m2);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Counters not monitored in S1
|
||||
insert(counter->key, counter->count + m1, counter->error + m1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<Counter> topK(size_t k) const
|
||||
{
|
||||
std::vector<Counter> res;
|
||||
for (auto counter : counter_list)
|
||||
{
|
||||
res.push_back(*counter);
|
||||
if (res.size() == k)
|
||||
break;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
void write(WriteBuffer & wb) const
|
||||
{
|
||||
writeVarUInt(size(), wb);
|
||||
for (auto counter : counter_list)
|
||||
counter->write(wb);
|
||||
for (auto alpha : alpha_map)
|
||||
writeVarUInt(alpha, wb);
|
||||
}
|
||||
|
||||
void read(ReadBuffer & rb)
|
||||
{
|
||||
destroyElements();
|
||||
size_t count = 0;
|
||||
readVarUInt(count, rb);
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
auto counter = new Counter();
|
||||
counter->read(rb);
|
||||
counter->hash = counter_map.hash(counter->key);
|
||||
push(counter);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < nextAlphaSize(m_capacity); ++i)
|
||||
{
|
||||
UInt64 alpha = 0;
|
||||
readVarUInt(alpha, rb);
|
||||
alpha_map.push_back(alpha);
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
void push(Counter * counter)
|
||||
{
|
||||
counter->slot = counter_list.size();
|
||||
counter_list.push_back(counter);
|
||||
counter_map[counter->key] = counter;
|
||||
percolate(counter);
|
||||
}
|
||||
|
||||
// This is equivallent to one step of bubble sort
|
||||
void percolate(Counter * counter)
|
||||
{
|
||||
while (counter->slot > 0)
|
||||
{
|
||||
auto next = counter_list[counter->slot - 1];
|
||||
if (*counter > *next)
|
||||
{
|
||||
std::swap(next->slot, counter->slot);
|
||||
std::swap(counter_list[next->slot], counter_list[counter->slot]);
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
void destroyElements()
|
||||
{
|
||||
for (auto counter : counter_list)
|
||||
delete counter;
|
||||
|
||||
counter_map.clear();
|
||||
counter_list.clear();
|
||||
alpha_map.clear();
|
||||
}
|
||||
|
||||
HashMap<HashKey, Counter *, Hash, Grower, Allocator> counter_map;
|
||||
std::vector<Counter *> counter_list;
|
||||
std::vector<UInt64> alpha_map;
|
||||
size_t m_capacity;
|
||||
};
|
||||
|
||||
};
|
@ -6,14 +6,14 @@
|
||||
#define STACK_TRACE_MAX_DEPTH 32
|
||||
|
||||
|
||||
/// Позволяет получить стек-трейс
|
||||
/// Lets you get a stacktrace
|
||||
class StackTrace
|
||||
{
|
||||
public:
|
||||
/// Стектрейс снимается в момент создания объекта
|
||||
/// The stacktrace is captured when the object is created
|
||||
StackTrace();
|
||||
|
||||
/// Вывести в строку
|
||||
/// Print to string
|
||||
std::string toString() const;
|
||||
|
||||
private:
|
||||
|
@ -19,15 +19,14 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int UNSUPPORTED_PARAMETER;
|
||||
}
|
||||
|
||||
|
||||
/** Варианты поиска подстроки в строке.
|
||||
* В большинстве случаев, менее производительные, чем Volnitsky (см. Volnitsky.h).
|
||||
/** Variants for searching a substring in a string.
|
||||
* In most cases, performance is less than Volnitsky (see Volnitsky.h).
|
||||
*/
|
||||
|
||||
|
||||
@ -37,7 +36,7 @@ struct StringSearcherBase
|
||||
static constexpr auto n = sizeof(__m128i);
|
||||
const int page_size = getpagesize();
|
||||
|
||||
bool page_safe(const void * const ptr) const
|
||||
bool pageSafe(const void * const ptr) const
|
||||
{
|
||||
return ((page_size - 1) & reinterpret_cast<std::uintptr_t>(ptr)) <= page_size - n;
|
||||
}
|
||||
@ -55,7 +54,7 @@ class StringSearcher<false, false> : private StringSearcherBase
|
||||
private:
|
||||
using UTF8SequenceBuffer = UInt8[6];
|
||||
|
||||
/// string to be searched for
|
||||
/// substring to be searched for
|
||||
const UInt8 * const needle;
|
||||
const std::size_t needle_size;
|
||||
const UInt8 * const needle_end = needle + needle_size;
|
||||
@ -135,8 +134,7 @@ public:
|
||||
if (!(dst_l_len == dst_u_len && dst_u_len == src_len))
|
||||
throw DB::Exception{
|
||||
"UTF8 sequences with different lowercase and uppercase lengths are not supported",
|
||||
DB::ErrorCodes::UNSUPPORTED_PARAMETER
|
||||
};
|
||||
DB::ErrorCodes::UNSUPPORTED_PARAMETER};
|
||||
|
||||
cache_actual_len += src_len;
|
||||
if (cache_actual_len < n)
|
||||
@ -165,7 +163,7 @@ public:
|
||||
static const Poco::UTF8Encoding utf8;
|
||||
|
||||
#if __SSE4_1__
|
||||
if (page_safe(pos))
|
||||
if (pageSafe(pos))
|
||||
{
|
||||
const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos));
|
||||
const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel);
|
||||
@ -230,7 +228,7 @@ public:
|
||||
while (haystack < haystack_end)
|
||||
{
|
||||
#if __SSE4_1__
|
||||
if (haystack + n <= haystack_end && page_safe(haystack))
|
||||
if (haystack + n <= haystack_end && pageSafe(haystack))
|
||||
{
|
||||
const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
|
||||
const auto v_against_l = _mm_cmpeq_epi8(v_haystack, patl);
|
||||
@ -249,7 +247,7 @@ public:
|
||||
const auto offset = __builtin_ctz(mask);
|
||||
haystack += offset;
|
||||
|
||||
if (haystack < haystack_end && haystack + n <= haystack_end && page_safe(haystack))
|
||||
if (haystack < haystack_end && haystack + n <= haystack_end && pageSafe(haystack))
|
||||
{
|
||||
const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
|
||||
const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel);
|
||||
@ -377,7 +375,7 @@ public:
|
||||
bool compare(const UInt8 * pos) const
|
||||
{
|
||||
#if __SSE4_1__
|
||||
if (page_safe(pos))
|
||||
if (pageSafe(pos))
|
||||
{
|
||||
const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos));
|
||||
const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel);
|
||||
@ -429,7 +427,7 @@ public:
|
||||
while (haystack < haystack_end)
|
||||
{
|
||||
#if __SSE4_1__
|
||||
if (haystack + n <= haystack_end && page_safe(haystack))
|
||||
if (haystack + n <= haystack_end && pageSafe(haystack))
|
||||
{
|
||||
const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
|
||||
const auto v_against_l = _mm_cmpeq_epi8(v_haystack, patl);
|
||||
@ -447,7 +445,7 @@ public:
|
||||
const auto offset = __builtin_ctz(mask);
|
||||
haystack += offset;
|
||||
|
||||
if (haystack < haystack_end && haystack + n <= haystack_end && page_safe(haystack))
|
||||
if (haystack < haystack_end && haystack + n <= haystack_end && pageSafe(haystack))
|
||||
{
|
||||
const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
|
||||
const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel);
|
||||
@ -559,7 +557,7 @@ public:
|
||||
bool compare(const UInt8 * pos) const
|
||||
{
|
||||
#if __SSE4_1__
|
||||
if (page_safe(pos))
|
||||
if (pageSafe(pos))
|
||||
{
|
||||
const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos));
|
||||
const auto v_against_cache = _mm_cmpeq_epi8(v_haystack, cache);
|
||||
@ -609,7 +607,7 @@ public:
|
||||
while (haystack < haystack_end)
|
||||
{
|
||||
#if __SSE4_1__
|
||||
if (haystack + n <= haystack_end && page_safe(haystack))
|
||||
if (haystack + n <= haystack_end && pageSafe(haystack))
|
||||
{
|
||||
/// find first character
|
||||
const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
|
||||
@ -627,7 +625,7 @@ public:
|
||||
const auto offset = __builtin_ctz(mask);
|
||||
haystack += offset;
|
||||
|
||||
if (haystack < haystack_end && haystack + n <= haystack_end && page_safe(haystack))
|
||||
if (haystack < haystack_end && haystack + n <= haystack_end && pageSafe(haystack))
|
||||
{
|
||||
/// check for first 16 octets
|
||||
const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
|
||||
@ -693,10 +691,10 @@ using UTF8CaseSensitiveStringSearcher = StringSearcher<true, false>;
|
||||
using UTF8CaseInsensitiveStringSearcher = StringSearcher<false, false>;
|
||||
|
||||
|
||||
/** Используют функции из libc.
|
||||
* Имеет смысл использовать для коротких строк, когда требуется дешёвая инициализация.
|
||||
* Нет варианта для регистронезависимого поиска UTF-8 строк.
|
||||
* Требуется, чтобы за концом строк был нулевой байт.
|
||||
/** Uses functions from libc.
|
||||
* It makes sense to use only with short haystacks when cheap initialization is required.
|
||||
* There is no option for case-insensitive search for UTF-8 strings.
|
||||
* It is required that strings are zero-terminated.
|
||||
*/
|
||||
|
||||
struct LibCASCIICaseSensitiveStringSearcher
|
||||
|
@ -101,6 +101,12 @@ inline bool isWordCharASCII(char c)
|
||||
|| c == '_';
|
||||
}
|
||||
|
||||
inline bool isValidIdentifierBegin(char c)
|
||||
{
|
||||
return isAlphaASCII(c)
|
||||
|| c == '_';
|
||||
}
|
||||
|
||||
inline bool isWhitespaceASCII(char c)
|
||||
{
|
||||
return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v';
|
||||
|
@ -1,11 +1,13 @@
|
||||
#pragma once
|
||||
|
||||
#include <time.h> /// nanosleep
|
||||
#include <mutex>
|
||||
#include <memory>
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -15,12 +17,12 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
|
||||
/** Позволяет ограничить скорость чего либо (в штуках в секунду) с помощью sleep.
|
||||
* Особенности работы:
|
||||
* - считается только средняя скорость, от момента первого вызова функции add;
|
||||
* если были периоды с низкой скоростью, то в течение промежутка времени после них, скорость будет выше;
|
||||
/** Allows you to limit the speed of something (in entities per second) using sleep.
|
||||
* Specifics of work:
|
||||
* - only the average speed is considered, from the moment of the first call of `add` function;
|
||||
* if there were periods with low speed, then during some time after them, the speed will be higher;
|
||||
*
|
||||
* Также позволяет задать ограничение на максимальное количество в штуках. При превышении кидается исключение.
|
||||
* Also allows you to set a limit on the maximum number of entities. If exceeded, an exception will be thrown.
|
||||
*/
|
||||
class Throttler
|
||||
{
|
||||
@ -56,7 +58,7 @@ public:
|
||||
|
||||
if (max_speed)
|
||||
{
|
||||
/// Сколько должно было бы пройти времени, если бы скорость была равна max_speed.
|
||||
/// How much time to wait for the average speed to become `max_speed`.
|
||||
UInt64 desired_ns = new_count * 1000000000 / max_speed;
|
||||
|
||||
if (desired_ns > elapsed_ns)
|
||||
@ -65,7 +67,7 @@ public:
|
||||
timespec sleep_ts;
|
||||
sleep_ts.tv_sec = sleep_ns / 1000000000;
|
||||
sleep_ts.tv_nsec = sleep_ns % 1000000000;
|
||||
nanosleep(&sleep_ts, nullptr); /// NOTE Завершается раньше в случае сигнала. Это считается нормальным.
|
||||
nanosleep(&sleep_ts, nullptr); /// NOTE Returns early in case of a signal. This is considered normal.
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -73,7 +75,7 @@ public:
|
||||
private:
|
||||
size_t max_speed = 0;
|
||||
size_t count = 0;
|
||||
size_t limit = 0; /// 0 - не ограничено.
|
||||
size_t limit = 0; /// 0 - not limited.
|
||||
const char * limit_exceeded_exception_message = nullptr;
|
||||
Stopwatch watch {CLOCK_MONOTONIC_COARSE};
|
||||
std::mutex mutex;
|
||||
|
@ -4,12 +4,16 @@
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
#if __SSE4_2__
|
||||
#include <nmmintrin.h>
|
||||
#endif
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
||||
/// Для агрегации по SipHash или конкатенации нескольких полей.
|
||||
/// For aggregation by SipHash or concatenation of several fields.
|
||||
struct UInt128
|
||||
{
|
||||
/// Suppress gcc7 warnings: 'prev_key.DB::UInt128::first' may be used uninitialized in this function
|
||||
@ -42,22 +46,22 @@ struct UInt128Hash
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE4_2__
|
||||
|
||||
struct UInt128HashCRC32
|
||||
{
|
||||
size_t operator()(UInt128 x) const
|
||||
{
|
||||
UInt64 crc = -1ULL;
|
||||
asm("crc32q %[x], %[crc]\n" : [crc] "+r" (crc) : [x] "rm" (x.first));
|
||||
asm("crc32q %[x], %[crc]\n" : [crc] "+r" (crc) : [x] "rm" (x.second));
|
||||
crc = _mm_crc32_u64(crc, x.first);
|
||||
crc = _mm_crc32_u64(crc, x.second);
|
||||
return crc;
|
||||
}
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
/// На других платформах используем не обязательно CRC32. NOTE Это может сбить с толку.
|
||||
/// On other platforms we do not use CRC32. NOTE This can be confusing.
|
||||
struct UInt128HashCRC32 : public UInt128Hash {};
|
||||
|
||||
#endif
|
||||
@ -71,7 +75,7 @@ inline void readBinary(UInt128 & x, ReadBuffer & buf) { readPODBinary(x, buf); }
|
||||
inline void writeBinary(const UInt128 & x, WriteBuffer & buf) { writePODBinary(x, buf); }
|
||||
|
||||
|
||||
/** Используется при агрегации, для укладки большого количества ключей постоянной длины в хэш-таблицу.
|
||||
/** Used for aggregation, for putting a large number of constant-length keys in a hash table.
|
||||
*/
|
||||
struct UInt256
|
||||
{
|
||||
@ -91,7 +95,7 @@ struct UInt256
|
||||
{
|
||||
return a == rhs.a && b == rhs.b && c == rhs.c && d == rhs.d;
|
||||
|
||||
/* Так получается не лучше.
|
||||
/* So it's no better.
|
||||
return 0xFFFF == _mm_movemask_epi8(_mm_and_si128(
|
||||
_mm_cmpeq_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(&a)),
|
||||
@ -122,30 +126,30 @@ struct UInt256Hash
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE4_2__
|
||||
|
||||
struct UInt256HashCRC32
|
||||
{
|
||||
size_t operator()(UInt256 x) const
|
||||
{
|
||||
UInt64 crc = -1ULL;
|
||||
asm("crc32q %[x], %[crc]\n" : [crc] "+r" (crc) : [x] "rm" (x.a));
|
||||
asm("crc32q %[x], %[crc]\n" : [crc] "+r" (crc) : [x] "rm" (x.b));
|
||||
asm("crc32q %[x], %[crc]\n" : [crc] "+r" (crc) : [x] "rm" (x.c));
|
||||
asm("crc32q %[x], %[crc]\n" : [crc] "+r" (crc) : [x] "rm" (x.d));
|
||||
crc = _mm_crc32_u64(crc, x.a);
|
||||
crc = _mm_crc32_u64(crc, x.b);
|
||||
crc = _mm_crc32_u64(crc, x.c);
|
||||
crc = _mm_crc32_u64(crc, x.d);
|
||||
return crc;
|
||||
}
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
/// На других платформах используем не обязательно CRC32. NOTE Это может сбить с толку.
|
||||
/// We do not need to use CRC32 on other platforms. NOTE This can be confusing.
|
||||
struct UInt256HashCRC32
|
||||
{
|
||||
DefaultHash<UInt64> hash64;
|
||||
size_t operator()(UInt256 x) const
|
||||
{
|
||||
/// TODO Это не оптимально.
|
||||
/// TODO This is not optimal.
|
||||
return hash64(hash64(hash64(hash64(x.a) ^ x.b) ^ x.c) ^ x.d);
|
||||
}
|
||||
};
|
||||
|
@ -8,7 +8,7 @@
|
||||
#define UNICODE_BAR_CHAR_SIZE (strlen("█"))
|
||||
|
||||
|
||||
/** Позволяет нарисовать unicode-art полоску, ширина которой отображается с разрешением 1/8 символа.
|
||||
/** Allows you to draw a unicode-art bar whose width is displayed with a resolution of 1/8 character.
|
||||
*/
|
||||
|
||||
|
||||
@ -32,7 +32,7 @@ namespace UnicodeBar
|
||||
return ceil(width - 1.0 / 8) * UNICODE_BAR_CHAR_SIZE;
|
||||
}
|
||||
|
||||
/// В dst должно быть место для barWidthInBytes(width) символов и завершающего нуля.
|
||||
/// In `dst` there must be a space for barWidthInBytes(width) characters and a trailing zero.
|
||||
inline void render(double width, char * dst)
|
||||
{
|
||||
size_t floor_width = floor(width);
|
||||
|
@ -16,23 +16,23 @@ class Context;
|
||||
namespace VirtualColumnUtils
|
||||
{
|
||||
|
||||
/// Вычислить минимальный числовый суффикс, который надо добавить к строке, чтобы она не присутствовала в множестве
|
||||
/// Calculate the minimum numeric suffix to add to the string so that it is not present in the set
|
||||
String chooseSuffix(const NamesAndTypesList & columns, const String & name);
|
||||
|
||||
/// Вычислить минимальный общий числовый суффикс, который надо добавить к каждой строке,
|
||||
/// чтобы ни одна не присутствовала в множестве.
|
||||
/// Calculate the minimum total numeric suffix to add to each string,
|
||||
/// so that none is present in the set.
|
||||
String chooseSuffixForSet(const NamesAndTypesList & columns, const std::vector<String> & names);
|
||||
|
||||
/// Добавляет в селект запрос секцию select column_name as value
|
||||
/// Например select _port as 9000.
|
||||
/// Adds to the select query section `select column_name as value`
|
||||
/// For example select _port as 9000.
|
||||
void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & value);
|
||||
|
||||
/// Оставить в блоке только строки, подходящие под секции WHERE и PREWHERE запроса.
|
||||
/// Рассматриваются только элементы внешней конъюнкции, зависящие только от столбцов, присутствующих в блоке.
|
||||
/// Возвращает true, если хоть одна строка выброшена.
|
||||
/// Leave in the block only the rows that fit under the WHERE clause and the PREWHERE clause of the query.
|
||||
/// Only elements of the outer conjunction are considered, depending only on the columns present in the block.
|
||||
/// Returns true if at least one row is discarded.
|
||||
bool filterBlockWithQuery(ASTPtr query, Block & block, const Context & context);
|
||||
|
||||
/// Извлечь из входного потока множество значений столбца name
|
||||
/// Extract from the input stream a set of `name` column values
|
||||
template<typename T1>
|
||||
std::multiset<T1> extractSingleValueFromBlock(const Block & block, const String & name)
|
||||
{
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <Common/StringSearcher.h>
|
||||
#include <Common/StringUtils.h>
|
||||
#include <Core/Types.h>
|
||||
#include <Poco/UTF8Encoding.h>
|
||||
#include <Poco/Unicode.h>
|
||||
#include <ext/range.hpp>
|
||||
@ -9,24 +10,24 @@
|
||||
#include <string.h>
|
||||
|
||||
|
||||
/** Поиск подстроки в строке по алгоритму Вольницкого:
|
||||
/** Search for a substring in a string by Volnitsky's algorithm
|
||||
* http://volnitsky.com/project/str_search/
|
||||
*
|
||||
* haystack и needle могут содержать нулевые байты.
|
||||
* `haystack` and `needle` can contain zero bytes.
|
||||
*
|
||||
* Алгоритм:
|
||||
* - при слишком маленьком или слишком большом размере needle, или слишком маленьком haystack, используем std::search или memchr;
|
||||
* - при инициализации, заполняем open-addressing linear probing хэш-таблицу вида:
|
||||
* хэш от биграммы из needle -> позиция этой биграммы в needle + 1.
|
||||
* (прибавлена единица только чтобы отличить смещение ноль от пустой ячейки)
|
||||
* - в хэш-таблице ключи не хранятся, хранятся только значения;
|
||||
* - биграммы могут быть вставлены несколько раз, если они встречаются в needle несколько раз;
|
||||
* - при поиске, берём из haystack биграмму, которая должна соответствовать последней биграмме needle (сравниваем с конца);
|
||||
* - ищем её в хэш-таблице, если нашли - достаём смещение из хэш-таблицы и сравниваем строку побайтово;
|
||||
* - если сравнить не получилось - проверяем следующую ячейку хэш-таблицы из цепочки разрешения коллизий;
|
||||
* - если не нашли, пропускаем в haystack почти размер needle байт;
|
||||
* Algorithm:
|
||||
* - if the `needle` is too small or too large, or too small `haystack`, use std::search or memchr;
|
||||
* - when initializing, fill in an open-addressing linear probing hash table of the form
|
||||
* hash from the bigram of needle -> the position of this bigram in needle + 1.
|
||||
* (one is added only to distinguish zero offset from an empty cell)
|
||||
* - the keys are not stored in the hash table, only the values are stored;
|
||||
* - bigrams can be inserted several times if they occur in the needle several times;
|
||||
* - when searching, take from haystack bigram, which should correspond to the last bigram of needle (comparing from the end);
|
||||
* - look for it in the hash table, if found - get the offset from the hash table and compare the string bytewise;
|
||||
* - if it did not match, we check the next cell of the hash table from the collision resolution chain;
|
||||
* - if not found, skip to haystack almost the size of the needle bytes;
|
||||
*
|
||||
* Используется невыровненный доступ к памяти.
|
||||
* Unaligned memory access is used.
|
||||
*/
|
||||
|
||||
|
||||
@ -39,34 +40,35 @@ template <typename CRTP>
|
||||
class VolnitskyBase
|
||||
{
|
||||
protected:
|
||||
using offset_t = uint8_t; /// Смещение в needle. Для основного алгоритма, длина needle не должна быть больше 255.
|
||||
using ngram_t = uint16_t; /// n-грамма (2 байта).
|
||||
using Offset = UInt8; /// Offset in the needle. For the basic algorithm, the length of the needle must not be greater than 255.
|
||||
using Ngram = UInt16; /// n-gram (2 bytes).
|
||||
|
||||
const UInt8 * const needle;
|
||||
const size_t needle_size;
|
||||
const UInt8 * const needle_end = needle + needle_size;
|
||||
/// На сколько двигаемся, если n-грамма из haystack не нашлась в хэш-таблице.
|
||||
const size_t step = needle_size - sizeof(ngram_t) + 1;
|
||||
/// For how long we move, if the n-gram from haystack is not found in the hash table.
|
||||
const size_t step = needle_size - sizeof(Ngram) + 1;
|
||||
|
||||
/** max needle length is 255, max distinct ngrams for case-sensitive is (255 - 1), case-insensitive is 4 * (255 - 1)
|
||||
* storage of 64K ngrams (n = 2, 128 KB) should be large enough for both cases */
|
||||
static const size_t hash_size = 64 * 1024; /// Помещается в L2-кэш.
|
||||
offset_t hash[hash_size]; /// Хэш-таблица.
|
||||
static const size_t hash_size = 64 * 1024; /// Fits into the L2 cache (of common Intel CPUs).
|
||||
Offset hash[hash_size]; /// Hash table.
|
||||
|
||||
/// min haystack size to use main algorithm instead of fallback
|
||||
static constexpr auto min_haystack_size_for_algorithm = 20000;
|
||||
const bool fallback; /// Нужно ли использовать fallback алгоритм.
|
||||
const bool fallback; /// Do we need to use the fallback algorithm.
|
||||
|
||||
public:
|
||||
/** haystack_size_hint - ожидаемый суммарный размер haystack при вызовах search. Можно не указывать.
|
||||
* Если указать его достаточно маленьким, то будет использован fallback алгоритм,
|
||||
* так как считается, что тратить время на инициализацию хэш-таблицы не имеет смысла.
|
||||
/** haystack_size_hint - the expected total size of the haystack for `search` calls. Optional (zero means unspecified).
|
||||
* If you specify it small enough, the fallback algorithm will be used,
|
||||
* since it is considered that it's useless to waste time initializing the hash table.
|
||||
*/
|
||||
VolnitskyBase(const char * const needle, const size_t needle_size, size_t haystack_size_hint = 0)
|
||||
: needle{reinterpret_cast<const UInt8 *>(needle)}, needle_size{needle_size},
|
||||
fallback{
|
||||
needle_size < 2 * sizeof(ngram_t) || needle_size >= std::numeric_limits<offset_t>::max() ||
|
||||
(haystack_size_hint && haystack_size_hint < min_haystack_size_for_algorithm)}
|
||||
needle_size < 2 * sizeof(Ngram)
|
||||
|| needle_size >= std::numeric_limits<Offset>::max()
|
||||
|| (haystack_size_hint && haystack_size_hint < min_haystack_size_for_algorithm)}
|
||||
{
|
||||
if (fallback)
|
||||
return;
|
||||
@ -74,12 +76,12 @@ public:
|
||||
memset(hash, 0, sizeof(hash));
|
||||
|
||||
/// int is used here because unsigned can't be used with condition like `i >= 0`, unsigned always >= 0
|
||||
for (auto i = static_cast<int>(needle_size - sizeof(ngram_t)); i >= 0; --i)
|
||||
for (auto i = static_cast<int>(needle_size - sizeof(Ngram)); i >= 0; --i)
|
||||
self().putNGram(this->needle + i, i + 1, this->needle);
|
||||
}
|
||||
|
||||
|
||||
/// Если не найдено - возвращается конец haystack.
|
||||
/// If not found, the end of the haystack is returned.
|
||||
const UInt8 * search(const UInt8 * const haystack, const size_t haystack_size) const
|
||||
{
|
||||
if (needle_size == 0)
|
||||
@ -90,15 +92,15 @@ public:
|
||||
if (needle_size == 1 || fallback || haystack_size <= needle_size)
|
||||
return self().search_fallback(haystack, haystack_end);
|
||||
|
||||
/// Будем "прикладывать" needle к haystack и сравнивать n-грам из конца needle.
|
||||
const auto * pos = haystack + needle_size - sizeof(ngram_t);
|
||||
/// Let's "apply" the needle to the haystack and compare the n-gram from the end of the needle.
|
||||
const auto * pos = haystack + needle_size - sizeof(Ngram);
|
||||
for (; pos <= haystack_end - needle_size; pos += step)
|
||||
{
|
||||
/// Смотрим все ячейки хэш-таблицы, которые могут соответствовать n-граму из haystack.
|
||||
/// We look at all the cells of the hash table that can correspond to the n-gram from haystack.
|
||||
for (size_t cell_num = toNGram(pos) % hash_size; hash[cell_num];
|
||||
cell_num = (cell_num + 1) % hash_size)
|
||||
{
|
||||
/// Когда нашли - сравниваем побайтово, используя смещение из хэш-таблицы.
|
||||
/// When found - compare bytewise, using the offset from the hash table.
|
||||
const auto res = pos - (hash[cell_num] - 1);
|
||||
|
||||
if (self().compare(res))
|
||||
@ -106,7 +108,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
/// Оставшийся хвостик.
|
||||
/// The remaining tail.
|
||||
return self().search_fallback(pos - step + 1, haystack_end);
|
||||
}
|
||||
|
||||
@ -119,18 +121,18 @@ protected:
|
||||
CRTP & self() { return static_cast<CRTP &>(*this); }
|
||||
const CRTP & self() const { return const_cast<VolnitskyBase *>(this)->self(); }
|
||||
|
||||
static const ngram_t & toNGram(const UInt8 * const pos)
|
||||
static const Ngram & toNGram(const UInt8 * const pos)
|
||||
{
|
||||
return *reinterpret_cast<const ngram_t *>(pos);
|
||||
return *reinterpret_cast<const Ngram *>(pos);
|
||||
}
|
||||
|
||||
void putNGramBase(const ngram_t ngram, const int offset)
|
||||
void putNGramBase(const Ngram ngram, const int offset)
|
||||
{
|
||||
/// Кладём смещение для n-грама в соответствующую ему ячейку или ближайшую свободную.
|
||||
/// Put the offset for the n-gram in the corresponding cell or the nearest free cell.
|
||||
size_t cell_num = ngram % hash_size;
|
||||
|
||||
while (hash[cell_num])
|
||||
cell_num = (cell_num + 1) % hash_size; /// Поиск следующей свободной ячейки.
|
||||
cell_num = (cell_num + 1) % hash_size; /// Search for the next free cell.
|
||||
|
||||
hash[cell_num] = offset;
|
||||
}
|
||||
@ -145,7 +147,7 @@ protected:
|
||||
|
||||
union
|
||||
{
|
||||
ngram_t n;
|
||||
Ngram n;
|
||||
Chars chars;
|
||||
};
|
||||
|
||||
@ -260,7 +262,7 @@ template <> struct VolnitskyImpl<false, false> : VolnitskyBase<VolnitskyImpl<fal
|
||||
|
||||
union
|
||||
{
|
||||
ngram_t n;
|
||||
Ngram n;
|
||||
Chars chars;
|
||||
};
|
||||
|
||||
@ -272,15 +274,17 @@ template <> struct VolnitskyImpl<false, false> : VolnitskyBase<VolnitskyImpl<fal
|
||||
}
|
||||
else
|
||||
{
|
||||
/** n-грам (в случае n = 2)
|
||||
* может быть целиком расположен внутри одной кодовой точки,
|
||||
* либо пересекаться с двумя кодовыми точками.
|
||||
/** n-gram (in the case of n = 2)
|
||||
* can be entirely located within one code point,
|
||||
* or intersect with two code points.
|
||||
*
|
||||
* В первом случае, нужно рассматривать до двух альтернатив - эта кодовая точка в верхнем и нижнем регистре,
|
||||
* а во втором случае - до четырёх альтернатив - фрагменты двух кодовых точек во всех комбинациях регистров.
|
||||
* In the first case, you need to consider up to two alternatives - this code point in upper and lower case,
|
||||
* and in the second case - up to four alternatives - fragments of two code points in all combinations of cases.
|
||||
*
|
||||
* При этом не учитывается зависимость перевода между регистрами от локали (пример - турецкие Ii)
|
||||
* а также композиция/декомпозиция и другие особенности.
|
||||
* It does not take into account the dependence of the case-transformation from the locale (for example - Turkish `Ii`)
|
||||
* as well as composition / decomposition and other features.
|
||||
*
|
||||
* It also does not work if characters with lower and upper cases are represented by different number of bytes or code points.
|
||||
*/
|
||||
|
||||
using Seq = UInt8[6];
|
||||
@ -302,12 +306,12 @@ template <> struct VolnitskyImpl<false, false> : VolnitskyBase<VolnitskyImpl<fal
|
||||
putNGramBase(n, offset);
|
||||
else
|
||||
{
|
||||
/// where is the given ngram in respect to UTF-8 sequence start?
|
||||
/// where is the given ngram in respect to the start of UTF-8 sequence?
|
||||
const auto seq_ngram_offset = pos - seq_pos;
|
||||
|
||||
Seq seq;
|
||||
|
||||
/// put ngram from lowercase
|
||||
/// put ngram for lowercase
|
||||
utf8.convert(l_u32, seq, sizeof(seq));
|
||||
chars.c0 = seq[seq_ngram_offset];
|
||||
chars.c1 = seq[seq_ngram_offset + 1];
|
||||
@ -326,7 +330,7 @@ template <> struct VolnitskyImpl<false, false> : VolnitskyBase<VolnitskyImpl<fal
|
||||
/// first sequence may start before u_pos if it is not ASCII
|
||||
auto first_seq_pos = pos;
|
||||
UTF8::syncBackward(first_seq_pos, begin);
|
||||
/// where is the given ngram in respect to the first UTF-8 sequence start?
|
||||
/// where is the given ngram in respect to the start of first UTF-8 sequence?
|
||||
const auto seq_ngram_offset = pos - first_seq_pos;
|
||||
|
||||
const auto first_u32 = utf8.convert(first_seq_pos);
|
||||
|
@ -4,14 +4,14 @@
|
||||
#include <IO/WriteBuffer.h>
|
||||
|
||||
|
||||
/// Выводит переданный размер в байтах в виде 123.45 GiB.
|
||||
/// Displays the passed size in bytes as 123.45 GiB.
|
||||
void formatReadableSizeWithBinarySuffix(double value, DB::WriteBuffer & out, int precision = 2);
|
||||
std::string formatReadableSizeWithBinarySuffix(double value, int precision = 2);
|
||||
|
||||
/// Выводит переданный размер в байтах в виде 132.55 GB.
|
||||
/// Displays the passed size in bytes as 132.55 GB.
|
||||
void formatReadableSizeWithDecimalSuffix(double value, DB::WriteBuffer & out, int precision = 2);
|
||||
std::string formatReadableSizeWithDecimalSuffix(double value, int precision = 2);
|
||||
|
||||
/// Выводит число в виде 123.45 billion.
|
||||
/// Prints the number as 123.45 billion.
|
||||
void formatReadableQuantity(double value, DB::WriteBuffer & out, int precision = 2);
|
||||
std::string formatReadableQuantity(double value, int precision = 2);
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
#include <string>
|
||||
|
||||
/** Получить FQDN для локального сервера путём DNS-резолвинга hostname - аналогично вызову утилиты hostname с флагом -f.
|
||||
* Если не получилось отрезолвить, то вернуть hostname - аналогично вызову утилиты hostname без флагов или uname -n.
|
||||
/** Get the FQDN for the local server by resolving DNS hostname - similar to calling the 'hostname' tool with the -f flag.
|
||||
* If it does not work, return hostname - similar to calling 'hostname' without flags or 'uname -n'.
|
||||
*/
|
||||
const std::string & getFQDNOrHostName();
|
||||
|
81
dbms/src/Common/iostream_debug_helpers.cpp
Normal file
81
dbms/src/Common/iostream_debug_helpers.cpp
Normal file
@ -0,0 +1,81 @@
|
||||
#include "iostream_debug_helpers.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <Core/Block.h>
|
||||
#include <Core/ColumnWithTypeAndName.h>
|
||||
#include <Core/Field.h>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Storages/IStorage.h>
|
||||
|
||||
std::ostream & operator<<(std::ostream & stream, const DB::IBlockInputStream & what)
|
||||
{
|
||||
stream << "IBlockInputStream(id = " << what.getID() << ", name = " << what.getName() << ")";
|
||||
//what.dumpTree(stream); // todo: set const
|
||||
return stream;
|
||||
}
|
||||
|
||||
std::ostream & operator<<(std::ostream & stream, const DB::Field & what)
|
||||
{
|
||||
stream << "Field(type = " << what.getTypeName() << ")";
|
||||
return stream;
|
||||
}
|
||||
|
||||
std::ostream & operator<<(std::ostream & stream, const DB::NameAndTypePair & what)
|
||||
{
|
||||
stream << "NameAndTypePair(name = " << what.name << ", type = " << what.type << ")";
|
||||
return stream;
|
||||
}
|
||||
|
||||
std::ostream & operator<<(std::ostream & stream, const DB::IDataType & what)
|
||||
{
|
||||
stream << "IDataType(name = " << what.getName() << ", default = " << what.getDefault() << ", isNullable = " << what.isNullable()
|
||||
<< ", isNumeric = " << what.isNumeric() << ", behavesAsNumber = " << what.behavesAsNumber() << ")";
|
||||
return stream;
|
||||
}
|
||||
|
||||
std::ostream & operator<<(std::ostream & stream, const DB::IStorage & what)
|
||||
{
|
||||
stream << "IStorage(name = " << what.getName() << ", tableName = " << what.getTableName() << ") {"
|
||||
<< what.getColumnsList().toString()
|
||||
<< "}";
|
||||
// isRemote supportsSampling supportsFinal supportsPrewhere supportsParallelReplicas
|
||||
return stream;
|
||||
}
|
||||
|
||||
std::ostream & operator<<(std::ostream & stream, const DB::TableStructureReadLock & what)
|
||||
{
|
||||
stream << "TableStructureReadLock()";
|
||||
return stream;
|
||||
}
|
||||
|
||||
std::ostream & operator<<(std::ostream & stream, const DB::IFunction & what)
|
||||
{
|
||||
stream << "IFunction(name = " << what.getName() << ", variadic = " << what.isVariadic() << ", args = " << what.getNumberOfArguments()
|
||||
<< ")";
|
||||
return stream;
|
||||
}
|
||||
|
||||
std::ostream & operator<<(std::ostream & stream, const DB::Block & what)
|
||||
{
|
||||
stream << "Block("
|
||||
<< "size = " << what.getColumns().size()
|
||||
<< ")";
|
||||
return stream;
|
||||
}
|
||||
|
||||
std::ostream & operator<<(std::ostream & stream, const DB::ColumnWithTypeAndName & what)
|
||||
{
|
||||
stream << "ColumnWithTypeAndName(name = " << what.name << ", type = " << what.type << ", column = " << what.column << ")";
|
||||
return stream;
|
||||
}
|
||||
|
||||
std::ostream & operator<<(std::ostream & stream, const DB::IColumn & what)
|
||||
{
|
||||
stream << "IColumn(name = " << what.getName()
|
||||
// TODO: maybe many flags here
|
||||
<< ")";
|
||||
return stream;
|
||||
}
|
37
dbms/src/Common/iostream_debug_helpers.h
Normal file
37
dbms/src/Common/iostream_debug_helpers.h
Normal file
@ -0,0 +1,37 @@
|
||||
#pragma once
|
||||
#include <iostream>
|
||||
|
||||
|
||||
namespace DB { class IBlockInputStream; }
|
||||
std::ostream & operator<<(std::ostream & stream, const DB::IBlockInputStream & what);
|
||||
|
||||
namespace DB { class Field; }
|
||||
std::ostream & operator<<(std::ostream & stream, const DB::Field & what);
|
||||
|
||||
namespace DB { struct NameAndTypePair; }
|
||||
std::ostream & operator<<(std::ostream & stream, const DB::NameAndTypePair & what);
|
||||
|
||||
namespace DB { class IDataType; }
|
||||
std::ostream & operator<<(std::ostream & stream, const DB::IDataType & what);
|
||||
|
||||
namespace DB { class IStorage; }
|
||||
std::ostream & operator<<(std::ostream & stream, const DB::IStorage & what);
|
||||
|
||||
namespace DB { class TableStructureReadLock; }
|
||||
std::ostream & operator<<(std::ostream & stream, const DB::TableStructureReadLock & what);
|
||||
|
||||
namespace DB { class IFunction; }
|
||||
std::ostream & operator<<(std::ostream & stream, const DB::IFunction & what);
|
||||
|
||||
namespace DB { class Block; }
|
||||
std::ostream & operator<<(std::ostream & stream, const DB::Block & what);
|
||||
|
||||
namespace DB { struct ColumnWithTypeAndName; }
|
||||
std::ostream & operator<<(std::ostream & stream, const DB::ColumnWithTypeAndName & what);
|
||||
|
||||
namespace DB { class IColumn; }
|
||||
std::ostream & operator<<(std::ostream & stream, const DB::IColumn & what);
|
||||
|
||||
|
||||
/// some operator<< should be declared before operator<<(... std::shared_ptr<>)
|
||||
#include <common/iostream_debug_helpers.h>
|
@ -12,13 +12,13 @@ namespace Poco
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** Позволяет проверить, похож ли адрес на localhost.
|
||||
* Цель этой проверки обычно состоит в том, чтобы сделать предположение,
|
||||
* что при хождении на этот адрес через интернет, мы попадём на себя.
|
||||
* Следует иметь ввиду, что эта проверка делается неточно:
|
||||
* - адрес просто сравнивается с адресами сетевых интерфейсов;
|
||||
* - для каждого сетевого интерфейса берётся только первый адрес;
|
||||
* - не проверяются правила маршрутизации, которые влияют, через какой сетевой интерфейс мы пойдём на заданный адрес.
|
||||
/** Lets you check if the address is similar to `localhost`.
|
||||
* The purpose of this check is usually to make an assumption,
|
||||
* that when we go to this address via the Internet, we'll get to ourselves.
|
||||
* Please note that this check is not accurate:
|
||||
* - the address is simply compared to the addresses of the network interfaces;
|
||||
* - only the first address is taken for each network interface;
|
||||
* - the routing rules that affect which network interface we go to the specified address are not checked.
|
||||
*/
|
||||
bool isLocalAddress(const Poco::Net::SocketAddress & address);
|
||||
|
||||
|
@ -3,14 +3,14 @@
|
||||
#include <Poco/Path.h>
|
||||
|
||||
|
||||
/** Создаёт локальный (в той же точке монтирования) бэкап (снэпшот) директории.
|
||||
/** Creates a local (at the same mount point) backup (snapshot) directory.
|
||||
*
|
||||
* В указанной destination-директории создаёт hard link-и на все файлы source-директории
|
||||
* и во всех вложенных директориях, с сохранением (созданием) всех относительных путей;
|
||||
* а также делает chown, снимая разрешение на запись.
|
||||
* In the specified destination directory, it creates a hard links on all source-directory files
|
||||
* and in all nested directories, with saving (creating) all relative paths;
|
||||
* and also `chown`, removing the write permission.
|
||||
*
|
||||
* Это защищает данные от случайного удаления или модификации,
|
||||
* и предназначено для использования как простое средство защиты от человеческой или программной ошибки,
|
||||
* но не от аппаратного сбоя.
|
||||
* This protects data from accidental deletion or modification,
|
||||
* and is intended to be used as a simple means of protection against a human or program error,
|
||||
* but not from a hardware failure.
|
||||
*/
|
||||
void localBackup(Poco::Path source_path, Poco::Path destination_path);
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user