Merge branch 'ClickHouse:master' into master

This commit is contained in:
Selfuppen 2023-08-22 22:44:07 +08:00 committed by GitHub
commit e11408c901
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
258 changed files with 5758 additions and 1008 deletions

12
.gitmodules vendored
View File

@ -347,3 +347,15 @@
[submodule "contrib/incbin"]
path = contrib/incbin
url = https://github.com/graphitemaster/incbin.git
[submodule "contrib/usearch"]
path = contrib/usearch
url = https://github.com/unum-cloud/usearch.git
[submodule "contrib/SimSIMD"]
path = contrib/SimSIMD
url = https://github.com/ashvardanian/SimSIMD.git
[submodule "contrib/FP16"]
path = contrib/FP16
url = https://github.com/Maratyszcza/FP16.git
[submodule "contrib/robin-map"]
path = contrib/robin-map
url = https://github.com/Tessil/robin-map.git

View File

@ -11,6 +11,7 @@
#include <base/defines.h>
#include <base/types.h>
#include <base/unaligned.h>
#include <base/simd.h>
#include <city.h>
@ -29,6 +30,11 @@
#define CRC_INT __crc32cd
#endif
#if defined(__aarch64__) && defined(__ARM_NEON)
#include <arm_neon.h>
#pragma clang diagnostic ignored "-Wreserved-identifier"
#endif
/**
* The std::string_view-like container to avoid creating strings to find substrings in the hash table.
@ -74,14 +80,14 @@ using StringRefs = std::vector<StringRef>;
* For more information, see hash_map_string_2.cpp
*/
inline bool compareSSE2(const char * p1, const char * p2)
inline bool compare8(const char * p1, const char * p2)
{
return 0xFFFF == _mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(p1)),
_mm_loadu_si128(reinterpret_cast<const __m128i *>(p2))));
}
inline bool compareSSE2x4(const char * p1, const char * p2)
inline bool compare64(const char * p1, const char * p2)
{
return 0xFFFF == _mm_movemask_epi8(
_mm_and_si128(
@ -101,7 +107,30 @@ inline bool compareSSE2x4(const char * p1, const char * p2)
_mm_loadu_si128(reinterpret_cast<const __m128i *>(p2) + 3)))));
}
inline bool memequalSSE2Wide(const char * p1, const char * p2, size_t size)
#elif defined(__aarch64__) && defined(__ARM_NEON)
inline bool compare8(const char * p1, const char * p2)
{
uint64_t mask = getNibbleMask(vceqq_u8(
vld1q_u8(reinterpret_cast<const unsigned char *>(p1)), vld1q_u8(reinterpret_cast<const unsigned char *>(p2))));
return 0xFFFFFFFFFFFFFFFF == mask;
}
inline bool compare64(const char * p1, const char * p2)
{
uint64_t mask = getNibbleMask(vandq_u8(
vandq_u8(vceqq_u8(vld1q_u8(reinterpret_cast<const unsigned char *>(p1)), vld1q_u8(reinterpret_cast<const unsigned char *>(p2))),
vceqq_u8(vld1q_u8(reinterpret_cast<const unsigned char *>(p1 + 16)), vld1q_u8(reinterpret_cast<const unsigned char *>(p2 + 16)))),
vandq_u8(vceqq_u8(vld1q_u8(reinterpret_cast<const unsigned char *>(p1 + 32)), vld1q_u8(reinterpret_cast<const unsigned char *>(p2 + 32))),
vceqq_u8(vld1q_u8(reinterpret_cast<const unsigned char *>(p1 + 48)), vld1q_u8(reinterpret_cast<const unsigned char *>(p2 + 48))))));
return 0xFFFFFFFFFFFFFFFF == mask;
}
#endif
#if defined(__SSE2__) || (defined(__aarch64__) && defined(__ARM_NEON))
inline bool memequalWide(const char * p1, const char * p2, size_t size)
{
/** The order of branches and the trick with overlapping comparisons
* are the same as in memcpy implementation.
@ -138,7 +167,7 @@ inline bool memequalSSE2Wide(const char * p1, const char * p2, size_t size)
while (size >= 64)
{
if (compareSSE2x4(p1, p2))
if (compare64(p1, p2))
{
p1 += 64;
p2 += 64;
@ -150,17 +179,16 @@ inline bool memequalSSE2Wide(const char * p1, const char * p2, size_t size)
switch (size / 16)
{
case 3: if (!compareSSE2(p1 + 32, p2 + 32)) return false; [[fallthrough]];
case 2: if (!compareSSE2(p1 + 16, p2 + 16)) return false; [[fallthrough]];
case 1: if (!compareSSE2(p1, p2)) return false;
case 3: if (!compare8(p1 + 32, p2 + 32)) return false; [[fallthrough]];
case 2: if (!compare8(p1 + 16, p2 + 16)) return false; [[fallthrough]];
case 1: if (!compare8(p1, p2)) return false;
}
return compareSSE2(p1 + size - 16, p2 + size - 16);
return compare8(p1 + size - 16, p2 + size - 16);
}
#endif
inline bool operator== (StringRef lhs, StringRef rhs)
{
if (lhs.size != rhs.size)
@ -169,8 +197,8 @@ inline bool operator== (StringRef lhs, StringRef rhs)
if (lhs.size == 0)
return true;
#if defined(__SSE2__)
return memequalSSE2Wide(lhs.data, rhs.data, lhs.size);
#if defined(__SSE2__) || (defined(__aarch64__) && defined(__ARM_NEON))
return memequalWide(lhs.data, rhs.data, lhs.size);
#else
return 0 == memcmp(lhs.data, rhs.data, lhs.size);
#endif

14
base/base/simd.h Normal file
View File

@ -0,0 +1,14 @@
#pragma once
#if defined(__aarch64__) && defined(__ARM_NEON)
# include <arm_neon.h>
# pragma clang diagnostic ignored "-Wreserved-identifier"
/// Returns a 64 bit mask of nibbles (4 bits for each byte).
inline uint64_t getNibbleMask(uint8x16_t res)
{
return vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(res), 4)), 0);
}
#endif

View File

@ -4,10 +4,19 @@ macro(add_glob cur_list)
endmacro()
macro(add_headers_and_sources prefix common_path)
add_glob(${prefix}_headers ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h)
add_glob(${prefix}_sources ${common_path}/*.cpp ${common_path}/*.c ${common_path}/*.h)
add_glob(${prefix}_headers ${common_path}/*.h)
add_glob(${prefix}_sources ${common_path}/*.cpp ${common_path}/*.c)
endmacro()
macro(add_headers_only prefix common_path)
add_glob(${prefix}_headers ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h)
add_glob(${prefix}_headers ${common_path}/*.h)
endmacro()
macro(extract_into_parent_list src_list dest_list)
list(REMOVE_ITEM ${src_list} ${ARGN})
get_filename_component(__dir_name ${CMAKE_CURRENT_SOURCE_DIR} NAME)
foreach(file IN ITEMS ${ARGN})
list(APPEND ${dest_list} ${__dir_name}/${file})
endforeach()
set(${dest_list} "${${dest_list}}" PARENT_SCOPE)
endmacro()

View File

@ -19,6 +19,19 @@ else ()
message (FATAL_ERROR "Platform ${CMAKE_SYSTEM_NAME} is not supported")
endif ()
# Since we always use toolchain files to generate hermetic builds, cmake will
# always think it's a cross-compilation, See
# https://cmake.org/cmake/help/latest/variable/CMAKE_CROSSCOMPILING.html
#
# This will slow down cmake configuration and compilation. For instance, LLVM
# will try to configure NATIVE LLVM targets with all tests enabled (You'll see
# Building native llvm-tblgen...).
#
# Here, we set it manually by checking the system name and processor.
if (${CMAKE_SYSTEM_NAME} STREQUAL ${CMAKE_HOST_SYSTEM_NAME} AND ${CMAKE_SYSTEM_PROCESSOR} STREQUAL ${CMAKE_HOST_SYSTEM_PROCESSOR})
set (CMAKE_CROSSCOMPILING 0)
endif ()
if (CMAKE_CROSSCOMPILING)
if (OS_DARWIN)
# FIXME: broken dependencies

View File

@ -196,6 +196,17 @@ if (ARCH_S390X)
add_contrib(crc32-s390x-cmake crc32-s390x)
endif()
add_contrib (annoy-cmake annoy)
option(ENABLE_USEARCH "Enable USearch (Approximate Neighborhood Search, HNSW) support" ${ENABLE_LIBRARIES})
if (ENABLE_USEARCH)
add_contrib (FP16-cmake FP16)
add_contrib (robin-map-cmake robin-map)
add_contrib (SimSIMD-cmake SimSIMD)
add_contrib (usearch-cmake usearch) # requires: FP16, robin-map, SimdSIMD
else ()
message(STATUS "Not using USearch")
endif ()
add_contrib (xxHash-cmake xxHash)
add_contrib (libbcrypt-cmake libbcrypt)

1
contrib/FP16 vendored Submodule

@ -0,0 +1 @@
Subproject commit 0a92994d729ff76a58f692d3028ca1b64b145d91

View File

@ -0,0 +1 @@
# See contrib/usearch-cmake/CMakeLists.txt

1
contrib/SimSIMD vendored Submodule

@ -0,0 +1 @@
Subproject commit de2cb75b9e9e3389d5e1e51fd9f8ed151f3c17cf

View File

@ -0,0 +1 @@
# See contrib/usearch-cmake/CMakeLists.txt

2
contrib/boost vendored

@ -1 +1 @@
Subproject commit bb179652862b528d94a9032a784796c4db846c3f
Subproject commit 063a9372b4ae304e869a5c5724971d0501552731

View File

@ -19,6 +19,12 @@ add_library (_boost_filesystem ${SRCS_FILESYSTEM})
add_library (boost::filesystem ALIAS _boost_filesystem)
target_include_directories (_boost_filesystem SYSTEM BEFORE PUBLIC ${LIBRARY_DIR})
if (OS_LINUX)
target_compile_definitions (_boost_filesystem PRIVATE
BOOST_FILESYSTEM_HAS_POSIX_AT_APIS=1
)
endif ()
# headers-only
add_library (_boost_headers_only INTERFACE)

View File

@ -1,6 +1,7 @@
option(ENABLE_ISAL_LIBRARY "Enable ISA-L library" ${ENABLE_LIBRARIES})
if (ARCH_AARCH64)
# Disable ISA-L libray on aarch64.
# ISA-L is only available for x86-64, so it shall be disabled for other platforms
if (NOT ARCH_AMD64)
set (ENABLE_ISAL_LIBRARY OFF)
endif ()

2
contrib/krb5 vendored

@ -1 +1 @@
Subproject commit 1d5c970e9369f444caf81d1d06a231a6bad8581f
Subproject commit 71b06c2276009ae649c7703019f3b4605f66fd3d

View File

@ -147,7 +147,7 @@ target_compile_definitions(_libarchive PUBLIC
target_compile_options(_libarchive PRIVATE "-Wno-reserved-macro-identifier")
if (TARGET ch_contrib::xz)
target_compile_definitions(_libarchive PUBLIC HAVE_LZMA_H=1)
target_compile_definitions(_libarchive PUBLIC HAVE_LZMA_H=1 HAVE_LIBLZMA=1)
target_link_libraries(_libarchive PRIVATE ch_contrib::xz)
endif()
@ -156,6 +156,16 @@ if (TARGET ch_contrib::zlib)
target_link_libraries(_libarchive PRIVATE ch_contrib::zlib)
endif()
if (TARGET ch_contrib::zstd)
target_compile_definitions(_libarchive PUBLIC HAVE_ZSTD_H=1 HAVE_LIBZSTD=1)
target_link_libraries(_libarchive PRIVATE ch_contrib::zstd)
endif()
if (TARGET ch_contrib::bzip2)
target_compile_definitions(_libarchive PUBLIC HAVE_BZLIB_H=1)
target_link_libraries(_libarchive PRIVATE ch_contrib::bzip2)
endif()
if (OS_LINUX)
target_compile_definitions(
_libarchive PUBLIC

@ -1 +1 @@
Subproject commit 4ef26de16c229429141e424375142c9b03234b66
Subproject commit e7b8befca85c8b847614432dba250c22d35fbae0

2
contrib/orc vendored

@ -1 +1 @@
Subproject commit 568d1d60c250af1890f226c182bc15bd8cc94cf1
Subproject commit a20d1d9d7ad4a4be7b7ba97588e16ca8b9abb2b6

1
contrib/robin-map vendored Submodule

@ -0,0 +1 @@
Subproject commit 851a59e0e3063ee0e23089062090a73fd3de482d

View File

@ -0,0 +1 @@
# See contrib/usearch-cmake/CMakeLists.txt

2
contrib/snappy vendored

@ -1 +1 @@
Subproject commit fb057edfed820212076239fd32cb2ff23e9016bf
Subproject commit 6ebb5b1ab8801ea3fde103c5c29f5ab86df5fe7a

1
contrib/usearch vendored Submodule

@ -0,0 +1 @@
Subproject commit 387b78b28b17b8954024ffc81e97cbcfa10d1f30

View File

@ -0,0 +1,17 @@
set(USEARCH_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/usearch")
set(USEARCH_SOURCE_DIR "${USEARCH_PROJECT_DIR}/include")
set(FP16_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/FP16")
set(ROBIN_MAP_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/robin-map")
set(SIMSIMD_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/SimSIMD-map")
add_library(_usearch INTERFACE)
target_include_directories(_usearch SYSTEM INTERFACE
${FP16_PROJECT_DIR}/include
${ROBIN_MAP_PROJECT_DIR}/include
${SIMSIMD_PROJECT_DIR}/include
${USEARCH_SOURCE_DIR})
add_library(ch_contrib::usearch ALIAS _usearch)
target_compile_definitions(_usearch INTERFACE ENABLE_USEARCH)

View File

@ -20,6 +20,9 @@ services:
- type: ${keeper_fs:-tmpfs}
source: ${keeper_db_dir1:-}
target: /var/lib/clickhouse-keeper
- type: ${keeper_fs:-tmpfs}
source: ${keeper_db_dir1:-}
target: /var/lib/clickhouse
entrypoint: "${keeper_cmd_prefix:-clickhouse keeper} --config=/etc/clickhouse-keeper/keeper_config1.xml --log-file=/var/log/clickhouse-keeper/clickhouse-keeper.log --errorlog-file=/var/log/clickhouse-keeper/clickhouse-keeper.err.log"
cap_add:
- SYS_PTRACE
@ -53,6 +56,9 @@ services:
- type: ${keeper_fs:-tmpfs}
source: ${keeper_db_dir2:-}
target: /var/lib/clickhouse-keeper
- type: ${keeper_fs:-tmpfs}
source: ${keeper_db_dir1:-}
target: /var/lib/clickhouse
entrypoint: "${keeper_cmd_prefix:-clickhouse keeper} --config=/etc/clickhouse-keeper/keeper_config2.xml --log-file=/var/log/clickhouse-keeper/clickhouse-keeper.log --errorlog-file=/var/log/clickhouse-keeper/clickhouse-keeper.err.log"
cap_add:
- SYS_PTRACE
@ -86,6 +92,9 @@ services:
- type: ${keeper_fs:-tmpfs}
source: ${keeper_db_dir3:-}
target: /var/lib/clickhouse-keeper
- type: ${keeper_fs:-tmpfs}
source: ${keeper_db_dir1:-}
target: /var/lib/clickhouse
entrypoint: "${keeper_cmd_prefix:-clickhouse keeper} --config=/etc/clickhouse-keeper/keeper_config3.xml --log-file=/var/log/clickhouse-keeper/clickhouse-keeper.log --errorlog-file=/var/log/clickhouse-keeper/clickhouse-keeper.err.log"
cap_add:
- SYS_PTRACE

View File

@ -19,9 +19,9 @@
<max_threads>12</max_threads>
<!-- disable JIT for perf tests -->
<compile_expressions>1</compile_expressions>
<compile_aggregate_expressions>1</compile_aggregate_expressions>
<compile_sort_description>1</compile_sort_description>
<compile_expressions>0</compile_expressions>
<compile_aggregate_expressions>0</compile_aggregate_expressions>
<compile_sort_description>0</compile_sort_description>
<!-- Don't fail some prewarm queries too early -->
<timeout_before_checking_execution_speed>60</timeout_before_checking_execution_speed>

View File

@ -63,6 +63,7 @@ configure
# it contains some new settings, but we can safely remove it
rm /etc/clickhouse-server/config.d/merge_tree.xml
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
rm /etc/clickhouse-server/config.d/filesystem_caches_path.xml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
start
@ -93,6 +94,7 @@ sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_defau
# it contains some new settings, but we can safely remove it
rm /etc/clickhouse-server/config.d/merge_tree.xml
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
rm /etc/clickhouse-server/config.d/filesystem_caches_path.xml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
start

View File

@ -0,0 +1,45 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.3.9.55-lts (b9c5c8622d3) FIXME as compared to v23.3.8.21-lts (1675f2264f3)
#### Performance Improvement
* Backported in [#52213](https://github.com/ClickHouse/ClickHouse/issues/52213): Do not store blocks in `ANY` hash join if nothing is inserted. [#48633](https://github.com/ClickHouse/ClickHouse/pull/48633) ([vdimir](https://github.com/vdimir)).
* Backported in [#52826](https://github.com/ClickHouse/ClickHouse/issues/52826): Fix incorrect projection analysis which invalidates primary keys. This issue only exists when `query_plan_optimize_primary_key = 1, query_plan_optimize_projection = 1` . This fixes [#48823](https://github.com/ClickHouse/ClickHouse/issues/48823) . This fixes [#51173](https://github.com/ClickHouse/ClickHouse/issues/51173) . [#52308](https://github.com/ClickHouse/ClickHouse/pull/52308) ([Amos Bird](https://github.com/amosbird)).
#### Build/Testing/Packaging Improvement
* Backported in [#53019](https://github.com/ClickHouse/ClickHouse/issues/53019): Packing inline cache into docker images sometimes causes strange special effects. Since we don't use it at all, it's good to go. [#53008](https://github.com/ClickHouse/ClickHouse/pull/53008) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#53288](https://github.com/ClickHouse/ClickHouse/issues/53288): The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud., the second attempt after [#53100](https://github.com/ClickHouse/ClickHouse/issues/53100). [#53213](https://github.com/ClickHouse/ClickHouse/pull/53213) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#53461](https://github.com/ClickHouse/ClickHouse/issues/53461): Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix optimization to move functions before sorting. [#51481](https://github.com/ClickHouse/ClickHouse/pull/51481) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix Block structure mismatch in Pipe::unitePipes for FINAL [#51492](https://github.com/ClickHouse/ClickHouse/pull/51492) ([Nikita Taranov](https://github.com/nickitat)).
* Fix binary arithmetic for Nullable(IPv4) [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Support IPv4 and IPv6 as dictionary attributes [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)).
* Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)).
* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* init and destroy ares channel on demand.. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)).
* Fix crash in function `tuple` with one sparse column argument [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)).
* clickhouse-keeper: fix implementation of server with poll() [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)).
* Fix password leak in show create mysql table [#52962](https://github.com/ClickHouse/ClickHouse/pull/52962) ([Duc Canh Le](https://github.com/canhld94)).
* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)).
* Fix loading lazy database during system.table select query [#53372](https://github.com/ClickHouse/ClickHouse/pull/53372) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Fix wrong columns order for queries with parallel FINAL. [#53489](https://github.com/ClickHouse/ClickHouse/pull/53489) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Fix crash in comparison functions due to incorrect query analysis [#52172](https://github.com/ClickHouse/ClickHouse/pull/52172) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix deadlocks in StorageTableFunctionProxy [#52626](https://github.com/ClickHouse/ClickHouse/pull/52626) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Disable test_reverse_dns_query/test.py [#53195](https://github.com/ClickHouse/ClickHouse/pull/53195) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Disable test_host_regexp_multiple_ptr_records/test.py [#53211](https://github.com/ClickHouse/ClickHouse/pull/53211) ([Alexander Tokmakov](https://github.com/tavplubix)).

View File

@ -13,7 +13,7 @@ If more than one table is required, it is highly recommended to use the [Materia
``` sql
CREATE TABLE postgresql_db.postgresql_replica (key UInt64, value UInt64)
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgresql_replica', 'postgres_user', 'postgres_password')
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgresql_table', 'postgres_user', 'postgres_password')
PRIMARY KEY key;
```

View File

@ -142,13 +142,15 @@ was specified for ANN indexes, the default value is 100 million.
- [Annoy](/docs/en/engines/table-engines/mergetree-family/annindexes.md#annoy-annoy)
- [USearch](/docs/en/engines/table-engines/mergetree-family/annindexes.md#usearch-usearch)
## Annoy {#annoy}
Annoy indexes are currently experimental, to use them you first need to `SET allow_experimental_annoy_index = 1`. They are also currently
disabled on ARM due to memory safety problems with the algorithm.
This type of ANN index implements [the Annoy algorithm](https://github.com/spotify/annoy) which is based on a recursive division of the
space in random linear surfaces (lines in 2D, planes in 3D etc.).
This type of ANN index is based on the [Annoy library](https://github.com/spotify/annoy) which recursively divides the space into random
linear surfaces (lines in 2D, planes in 3D etc.).
<div class='vimeo-container'>
<iframe src="//www.youtube.com/embed/QkCCyLW0ehU"
@ -216,3 +218,60 @@ ORDER BY L2Distance(vectors, Point)
LIMIT N
SETTINGS annoy_index_search_k_nodes=100;
```
## USearch {#usearch}
This type of ANN index is based on the [the USearch library](https://github.com/unum-cloud/usearch), which implements the [HNSW
algorithm](https://arxiv.org/abs/1603.09320), i.e., builds a hierarchical graph where each point represents a vector and the edges represent
similarity. Such hierarchical structures can be very efficient on large collections. They may often fetch 0.05% or less data from the
overall dataset, while still providing 99% recall. This is especially useful when working with high-dimensional vectors,
that are expensive to load and compare. The library also has several hardware-specific SIMD optimizations to accelerate further
distance computations on modern Arm (NEON and SVE) and x86 (AVX2 and AVX-512) CPUs and OS-specific optimizations to allow efficient
navigation around immutable persistent files, without loading them into RAM.
<div class='vimeo-container'>
<iframe src="//www.youtube.com/embed/UMrhB3icP9w"
width="640"
height="360"
frameborder="0"
allow="autoplay;
fullscreen;
picture-in-picture"
allowfullscreen>
</iframe>
</div>
Syntax to create an USearch index over an [Array](../../../sql-reference/data-types/array.md) column:
```sql
CREATE TABLE table_with_usearch_index
(
id Int64,
vectors Array(Float32),
INDEX [ann_index_name] vectors TYPE usearch([Distance]) [GRANULARITY N]
)
ENGINE = MergeTree
ORDER BY id;
```
Syntax to create an ANN index over a [Tuple](../../../sql-reference/data-types/tuple.md) column:
```sql
CREATE TABLE table_with_usearch_index
(
id Int64,
vectors Tuple(Float32[, Float32[, ...]]),
INDEX [ann_index_name] vectors TYPE usearch([Distance]) [GRANULARITY N]
)
ENGINE = MergeTree
ORDER BY id;
```
USearch currently supports two distance functions:
- `L2Distance`, also called Euclidean distance, is the length of a line segment between two points in Euclidean space
([Wikipedia](https://en.wikipedia.org/wiki/Euclidean_distance)).
- `cosineDistance`, also called cosine similarity, is the cosine of the angle between two (non-zero) vectors
([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)).
For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no
distance function was specified during index creation, `L2Distance` is used as default.

View File

@ -196,6 +196,7 @@ SELECT * FROM nestedt FORMAT TSV
- [input_format_tsv_skip_first_lines](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_skip_first_lines) - skip specified number of lines at the beginning of data. Default value - `0`.
- [input_format_tsv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_detect_header) - automatically detect header with names and types in TSV format. Default value - `true`.
- [input_format_tsv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`.
- [input_format_tsv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_allow_variable_number_of_columns) - allow variable number of columns in TSV format, ignore extra columns and use default values on missing columns. Default value - `false`.
## TabSeparatedRaw {#tabseparatedraw}
@ -473,7 +474,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
- [input_format_csv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`.
- [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`.
- [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`.
- [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. Default value - `false`.
- [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - allow variable number of columns in CSV format, ignore extra columns and use default values on missing columns. Default value - `false`.
- [input_format_csv_use_default_on_bad_values](/docs/en/operations/settings/settings-formats.md/#input_format_csv_use_default_on_bad_values) - Allow to set default value to column when CSV field deserialization failed on bad value. Default value - `false`.
## CSVWithNames {#csvwithnames}
@ -502,9 +503,10 @@ the types from input data will be compared with the types of the corresponding c
Similar to [Template](#format-template), but it prints or reads all names and types of columns and uses escaping rule from [format_custom_escaping_rule](/docs/en/operations/settings/settings-formats.md/#format_custom_escaping_rule) setting and delimiters from [format_custom_field_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_field_delimiter), [format_custom_row_before_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_before_delimiter), [format_custom_row_after_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_after_delimiter), [format_custom_row_between_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_between_delimiter), [format_custom_result_before_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_result_before_delimiter) and [format_custom_result_after_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_result_after_delimiter) settings, not from format strings.
If setting [input_format_custom_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_custom_detect_header) is enabled, ClickHouse will automatically detect header with names and types if any.
If setting [input_format_tsv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_custom_detect_header) is enabled, trailing empty lines at the end of file will be skipped.
Additional settings:
- [input_format_custom_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_custom_detect_header) - enables automatic detection of header with names and types if any. Default value - `true`.
- [input_format_custom_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_custom_skip_trailing_empty_lines) - skip trailing empty lines at the end of file . Default value - `false`.
- [input_format_custom_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_custom_allow_variable_number_of_columns) - allow variable number of columns in CustomSeparated format, ignore extra columns and use default values on missing columns. Default value - `false`.
There is also `CustomSeparatedIgnoreSpaces` format, which is similar to [TemplateIgnoreSpaces](#templateignorespaces).
@ -1262,6 +1264,7 @@ SELECT * FROM json_each_row_nested
- [input_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings-formats.md/#input_format_json_named_tuples_as_objects) - parse named tuple columns as JSON objects. Default value - `true`.
- [input_format_json_defaults_for_missing_elements_in_named_tuple](/docs/en/operations/settings/settings-formats.md/#input_format_json_defaults_for_missing_elements_in_named_tuple) - insert default values for missing elements in JSON object while parsing named tuple. Default value - `true`.
- [input_format_json_ignore_unknown_keys_in_named_tuple](/docs/en/operations/settings/settings-formats.md/#input_format_json_ignore_unknown_keys_in_named_tuple) - Ignore unknown keys in json object for named tuples. Default value - `false`.
- [input_format_json_compact_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_json_compact_allow_variable_number_of_columns) - allow variable number of columns in JSONCompact/JSONCompactEachRow format, ignore extra columns and use default values on missing columns. Default value - `false`.
- [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_integers) - controls quoting of 64-bit integers in JSON output format. Default value - `true`.
- [output_format_json_quote_64bit_floats](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_floats) - controls quoting of 64-bit floats in JSON output format. Default value - `false`.
- [output_format_json_quote_denormals](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_denormals) - enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format. Default value - `false`.

Binary file not shown.

After

Width:  |  Height:  |  Size: 232 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 102 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 37 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 88 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 246 KiB

View File

@ -6,7 +6,34 @@ sidebar_label: MySQL Interface
# MySQL Interface
ClickHouse supports MySQL wire protocol. To enable the MySQL wire protocol, add the [mysql_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-mysql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d` folder:
ClickHouse supports the MySQL wire protocol. This allow tools that are MySQL-compatible to interact with ClickHouse seamlessly (e.g. [Looker Studio](../integrations/data-visualization/looker-studio-and-clickhouse.md)).
## Enabling the MySQL Interface On ClickHouse Cloud
1. After creating your ClickHouse Cloud Service, on the credentials screen, select the MySQL tab
![Credentials screen - Prompt](./images/mysql1.png)
2. Toggle the switch to enable the MySQL interface for this specific service. This will expose port `3306` for this service and prompt you with your MySQL connection screen that include your unique MySQL username. The password will be the same as the service's default user password.
![Credentials screen - Enabled MySQL](./images/mysql2.png)
Alternatively, in order to enable the MySQL interface for an existing service:
1. Ensure your service is in `Running` state then click on the "View connection string" button for the service you want to enable the MySQL interface for
![Connection screen - Prompt MySQL](./images/mysql3.png)
2. Toggle the switch to enable the MySQL interface for this specific service. This will prompt you to enter the default password.
![Connection screen - Prompt MySQL](./images/mysql4.png)
3. After entering the password, you will get prompted the MySQL connection string for this service
![Connection screen - MySQL Enabled](./images/mysql5.png)
## Enabling the MySQL Interface On Self-managed ClickHouse
Add the [mysql_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-mysql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d/` [folder](../operations/configuration-files):
``` xml
<clickhouse>
@ -20,7 +47,7 @@ Startup your ClickHouse server and look for a log message similar to the followi
{} <Information> Application: Listening for MySQL compatibility protocol: 127.0.0.1:9004
```
## Connect mysql to ClickHouse
## Connect MySQL to ClickHouse
The following command demonstrates how to connect the MySQL client `mysql` to ClickHouse:

View File

@ -221,6 +221,10 @@ Default: 1024
Size of cache for index marks. Zero means disabled.
:::note
This setting can be modified at runtime and will take effect immediately.
:::
Type: UInt64
Default: 0
@ -230,6 +234,10 @@ Default: 0
Size of cache for uncompressed blocks of MergeTree indices. Zero means disabled.
:::note
This setting can be modified at runtime and will take effect immediately.
:::
Type: UInt64
Default: 0
@ -255,6 +263,10 @@ Default: SLRU
Size of cache for marks (index of MergeTree family of tables).
:::note
This setting can be modified at runtime and will take effect immediately.
:::
Type: UInt64
Default: 5368709120
@ -288,7 +300,7 @@ Default: 1000
Limit on total number of concurrently executed queries. Zero means Unlimited. Note that limits on insert and select queries, and on the maximum number of queries for users must also be considered. See also max_concurrent_insert_queries, max_concurrent_select_queries, max_concurrent_queries_for_all_users. Zero means unlimited.
:::note
These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
This setting can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
:::
Type: UInt64
@ -300,7 +312,7 @@ Default: 0
Limit on total number of concurrent insert queries. Zero means Unlimited.
:::note
These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
This setting can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
:::
Type: UInt64
@ -312,7 +324,7 @@ Default: 0
Limit on total number of concurrently select queries. Zero means Unlimited.
:::note
These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
This setting can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
:::
Type: UInt64
@ -456,6 +468,10 @@ Sets the cache size (in bytes) for mapped files. This setting allows avoiding fr
Note that the amount of data in mapped files does not consume memory directly and is not accounted for in query or server memory usage — because this memory can be discarded similar to the OS page cache. The cache is dropped (the files are closed) automatically on the removal of old parts in tables of the MergeTree family, also it can be dropped manually by the `SYSTEM DROP MMAP CACHE` query.
:::note
This setting can be modified at runtime and will take effect immediately.
:::
Type: UInt64
Default: 1000
@ -605,6 +621,10 @@ There is one shared cache for the server. Memory is allocated on demand. The cac
The uncompressed cache is advantageous for very short queries in individual cases.
:::note
This setting can be modified at runtime and will take effect immediately.
:::
Type: UInt64
Default: 0

View File

@ -627,6 +627,13 @@ Column type should be String. If value is empty, default names `row_{i}`will be
Default value: ''.
### input_format_json_compact_allow_variable_number_of_columns {#input_format_json_compact_allow_variable_number_of_columns}
Allow variable number of columns in rows in JSONCompact/JSONCompactEachRow input formats.
Ignore extra columns in rows with more columns than expected and treat missing columns as default values.
Disabled by default.
## TSV format settings {#tsv-format-settings}
### input_format_tsv_empty_as_default {#input_format_tsv_empty_as_default}
@ -764,6 +771,13 @@ When enabled, trailing empty lines at the end of TSV file will be skipped.
Disabled by default.
### input_format_tsv_allow_variable_number_of_columns {#input_format_tsv_allow_variable_number_of_columns}
Allow variable number of columns in rows in TSV input format.
Ignore extra columns in rows with more columns than expected and treat missing columns as default values.
Disabled by default.
## CSV format settings {#csv-format-settings}
### format_csv_delimiter {#format_csv_delimiter}
@ -955,9 +969,11 @@ Result
```text
" string "
```
### input_format_csv_allow_variable_number_of_columns {#input_format_csv_allow_variable_number_of_columns}
ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values.
Allow variable number of columns in rows in CSV input format.
Ignore extra columns in rows with more columns than expected and treat missing columns as default values.
Disabled by default.
@ -1571,6 +1587,13 @@ When enabled, trailing empty lines at the end of file in CustomSeparated format
Disabled by default.
### input_format_custom_allow_variable_number_of_columns {#input_format_custom_allow_variable_number_of_columns}
Allow variable number of columns in rows in CustomSeparated input format.
Ignore extra columns in rows with more columns than expected and treat missing columns as default values.
Disabled by default.
## Regexp format settings {#regexp-format-settings}
### format_regexp_escaping_rule {#format_regexp_escaping_rule}

View File

@ -23,6 +23,7 @@ Columns:
- `database_shard_name` ([String](../../sql-reference/data-types/string.md)) — The name of the `Replicated` database shard (for clusters that belong to a `Replicated` database).
- `database_replica_name` ([String](../../sql-reference/data-types/string.md)) — The name of the `Replicated` database replica (for clusters that belong to a `Replicated` database).
- `is_active` ([Nullable(UInt8)](../../sql-reference/data-types/int-uint.md)) — The status of the `Replicated` database replica (for clusters that belong to a `Replicated` database): 1 means "replica is online", 0 means "replica is offline", `NULL` means "unknown".
- `name` ([String](../../sql-reference/data-types/string.md)) - An alias to cluster.
**Example**

View File

@ -1819,6 +1819,72 @@ Result:
└────────────────────────────────────┘
```
## toUTCTimestamp
Convert DateTime/DateTime64 type value from other time zone to UTC timezone timestamp
**Syntax**
``` sql
toUTCTimestamp(time_val, time_zone)
```
**Arguments**
- `time_val` — A DateTime/DateTime64 type const value or a expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md)
- `time_zone` — A String type const value or a expression represent the time zone. [String types](../../sql-reference/data-types/string.md)
**Returned value**
- DateTime/DateTime64 in text form
**Example**
``` sql
SELECT toUTCTimestamp(toDateTime('2023-03-16'), 'Asia/Shanghai');
```
Result:
``` text
┌─toUTCTimestamp(toDateTime('2023-03-16'),'Asia/Shanghai')┐
│ 2023-03-15 16:00:00 │
└─────────────────────────────────────────────────────────┘
```
## fromUTCTimestamp
Convert DateTime/DateTime64 type value from UTC timezone to other time zone timestamp
**Syntax**
``` sql
fromUTCTimestamp(time_val, time_zone)
```
**Arguments**
- `time_val` — A DateTime/DateTime64 type const value or a expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md)
- `time_zone` — A String type const value or a expression represent the time zone. [String types](../../sql-reference/data-types/string.md)
**Returned value**
- DateTime/DateTime64 in text form
**Example**
``` sql
SELECT fromUTCTimestamp(toDateTime64('2023-03-16 10:00:00', 3), 'Asia/Shanghai');
```
Result:
``` text
┌─fromUTCTimestamp(toDateTime64('2023-03-16 10:00:00',3),'Asia/Shanghai')─┐
│ 2023-03-16 18:00:00.000 │
└─────────────────────────────────────────────────────────────────────────┘
```
## Related content
- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse)

View File

@ -66,13 +66,13 @@ RELOAD FUNCTION [ON CLUSTER cluster_name] function_name
## DROP DNS CACHE
Resets ClickHouses internal DNS cache. Sometimes (for old ClickHouse versions) it is necessary to use this command when changing the infrastructure (changing the IP address of another ClickHouse server or the server used by dictionaries).
Clears ClickHouses internal DNS cache. Sometimes (for old ClickHouse versions) it is necessary to use this command when changing the infrastructure (changing the IP address of another ClickHouse server or the server used by dictionaries).
For more convenient (automatic) cache management, see disable_internal_dns_cache, dns_cache_update_period parameters.
## DROP MARK CACHE
Resets the mark cache.
Clears the mark cache.
## DROP REPLICA
@ -106,22 +106,18 @@ Similar to `SYSTEM DROP REPLICA`, but removes the `Replicated` database replica
## DROP UNCOMPRESSED CACHE
Reset the uncompressed data cache.
Clears the uncompressed data cache.
The uncompressed data cache is enabled/disabled with the query/user/profile-level setting [use_uncompressed_cache](../../operations/settings/settings.md#setting-use_uncompressed_cache).
Its size can be configured using the server-level setting [uncompressed_cache_size](../../operations/server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size).
## DROP COMPILED EXPRESSION CACHE
Reset the compiled expression cache.
Clears the compiled expression cache.
The compiled expression cache is enabled/disabled with the query/user/profile-level setting [compile_expressions](../../operations/settings/settings.md#compile-expressions).
## DROP QUERY CACHE
Resets the [query cache](../../operations/query-cache.md).
```sql
SYSTEM DROP QUERY CACHE [ON CLUSTER cluster_name]
```
Clears the [query cache](../../operations/query-cache.md).
## FLUSH LOGS
@ -443,9 +439,9 @@ SYSTEM STOP LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QU
```
- If `CUSTOM 'protocol'` modifier is specified, the custom protocol with the specified name defined in the protocols section of the server configuration will be stopped.
- If `QUERIES ALL` modifier is specified, all protocols are stopped.
- If `QUERIES DEFAULT` modifier is specified, all default protocols are stopped.
- If `QUERIES CUSTOM` modifier is specified, all custom protocols are stopped.
- If `QUERIES ALL [EXCEPT .. [,..]]` modifier is specified, all protocols are stopped, unless specified with `EXCEPT` clause.
- If `QUERIES DEFAULT [EXCEPT .. [,..]]` modifier is specified, all default protocols are stopped, unless specified with `EXCEPT` clause.
- If `QUERIES CUSTOM [EXCEPT .. [,..]]` modifier is specified, all custom protocols are stopped, unless specified with `EXCEPT` clause.
### SYSTEM START LISTEN

View File

@ -668,8 +668,7 @@ void LocalServer::processConfig()
uncompressed_cache_size = max_cache_size;
LOG_INFO(log, "Lowered uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
if (uncompressed_cache_size)
global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size);
global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size);
String mark_cache_policy = config().getString("mark_cache_policy", DEFAULT_MARK_CACHE_POLICY);
size_t mark_cache_size = config().getUInt64("mark_cache_size", DEFAULT_MARK_CACHE_MAX_SIZE);
@ -680,8 +679,7 @@ void LocalServer::processConfig()
mark_cache_size = max_cache_size;
LOG_INFO(log, "Lowered mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(mark_cache_size));
}
if (mark_cache_size)
global_context->setMarkCache(mark_cache_policy, mark_cache_size);
global_context->setMarkCache(mark_cache_policy, mark_cache_size);
size_t index_uncompressed_cache_size = config().getUInt64("index_uncompressed_cache_size", DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE);
if (index_uncompressed_cache_size > max_cache_size)
@ -689,8 +687,7 @@ void LocalServer::processConfig()
index_uncompressed_cache_size = max_cache_size;
LOG_INFO(log, "Lowered index uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
if (index_uncompressed_cache_size)
global_context->setIndexUncompressedCache(index_uncompressed_cache_size);
global_context->setIndexUncompressedCache(index_uncompressed_cache_size);
size_t index_mark_cache_size = config().getUInt64("index_mark_cache_size", DEFAULT_INDEX_MARK_CACHE_MAX_SIZE);
if (index_mark_cache_size > max_cache_size)
@ -698,8 +695,7 @@ void LocalServer::processConfig()
index_mark_cache_size = max_cache_size;
LOG_INFO(log, "Lowered index mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
if (index_mark_cache_size)
global_context->setIndexMarkCache(index_mark_cache_size);
global_context->setIndexMarkCache(index_mark_cache_size);
size_t mmap_cache_size = config().getUInt64("mmap_cache_size", DEFAULT_MMAP_CACHE_MAX_SIZE);
if (mmap_cache_size > max_cache_size)
@ -707,11 +703,10 @@ void LocalServer::processConfig()
mmap_cache_size = max_cache_size;
LOG_INFO(log, "Lowered mmap file cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
if (mmap_cache_size)
global_context->setMMappedFileCache(mmap_cache_size);
global_context->setMMappedFileCache(mmap_cache_size);
/// In Server.cpp (./clickhouse-server), we would initialize the query cache here.
/// Intentionally not doing this in clickhouse-local as it doesn't make sense.
/// Initialize a dummy query cache.
global_context->setQueryCache(0, 0, 0, 0);
#if USE_EMBEDDED_COMPILER
size_t compiled_expression_cache_max_size_in_bytes = config().getUInt64("compiled_expression_cache_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE);

View File

@ -390,7 +390,10 @@ static void transformFixedString(const UInt8 * src, UInt8 * dst, size_t size, UI
static void transformUUID(const UUID & src_uuid, UUID & dst_uuid, UInt64 seed)
{
const UInt128 & src = src_uuid.toUnderType();
auto src_copy = src_uuid;
transformEndianness<std::endian::little, std::endian::native>(src_copy);
const UInt128 & src = src_copy.toUnderType();
UInt128 & dst = dst_uuid.toUnderType();
SipHash hash;
@ -400,8 +403,9 @@ static void transformUUID(const UUID & src_uuid, UUID & dst_uuid, UInt64 seed)
/// Saving version and variant from an old UUID
dst = hash.get128();
dst.items[1] = (dst.items[1] & 0x1fffffffffffffffull) | (src.items[1] & 0xe000000000000000ull);
dst.items[0] = (dst.items[0] & 0xffffffffffff0fffull) | (src.items[0] & 0x000000000000f000ull);
const UInt64 trace[2] = {0x000000000000f000ull, 0xe000000000000000ull};
UUIDHelpers::getLowBytes(dst_uuid) = (UUIDHelpers::getLowBytes(dst_uuid) & (0xffffffffffffffffull - trace[1])) | (UUIDHelpers::getLowBytes(src_uuid) & trace[1]);
UUIDHelpers::getHighBytes(dst_uuid) = (UUIDHelpers::getHighBytes(dst_uuid) & (0xffffffffffffffffull - trace[0])) | (UUIDHelpers::getHighBytes(src_uuid) & trace[0]);
}
class FixedStringModel : public IModel

View File

@ -1105,6 +1105,69 @@ try
if (config().has("macros"))
global_context->setMacros(std::make_unique<Macros>(config(), "macros", log));
/// Set up caches.
const size_t max_cache_size = static_cast<size_t>(physical_server_memory * server_settings.cache_size_to_ram_max_ratio);
String uncompressed_cache_policy = server_settings.uncompressed_cache_policy;
size_t uncompressed_cache_size = server_settings.uncompressed_cache_size;
if (uncompressed_cache_size > max_cache_size)
{
uncompressed_cache_size = max_cache_size;
LOG_INFO(log, "Lowered uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size);
String mark_cache_policy = server_settings.mark_cache_policy;
size_t mark_cache_size = server_settings.mark_cache_size;
if (mark_cache_size > max_cache_size)
{
mark_cache_size = max_cache_size;
LOG_INFO(log, "Lowered mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(mark_cache_size));
}
global_context->setMarkCache(mark_cache_policy, mark_cache_size);
size_t index_uncompressed_cache_size = server_settings.index_uncompressed_cache_size;
if (index_uncompressed_cache_size > max_cache_size)
{
index_uncompressed_cache_size = max_cache_size;
LOG_INFO(log, "Lowered index uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setIndexUncompressedCache(index_uncompressed_cache_size);
size_t index_mark_cache_size = server_settings.index_mark_cache_size;
if (index_mark_cache_size > max_cache_size)
{
index_mark_cache_size = max_cache_size;
LOG_INFO(log, "Lowered index mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setIndexMarkCache(index_mark_cache_size);
size_t mmap_cache_size = server_settings.mmap_cache_size;
if (mmap_cache_size > max_cache_size)
{
mmap_cache_size = max_cache_size;
LOG_INFO(log, "Lowered mmap file cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setMMappedFileCache(mmap_cache_size);
size_t query_cache_max_size_in_bytes = config().getUInt64("query_cache.max_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_SIZE);
size_t query_cache_max_entries = config().getUInt64("query_cache.max_entries", DEFAULT_QUERY_CACHE_MAX_ENTRIES);
size_t query_cache_query_cache_max_entry_size_in_bytes = config().getUInt64("query_cache.max_entry_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_BYTES);
size_t query_cache_max_entry_size_in_rows = config().getUInt64("query_cache.max_entry_rows_in_rows", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_ROWS);
if (query_cache_max_size_in_bytes > max_cache_size)
{
query_cache_max_size_in_bytes = max_cache_size;
LOG_INFO(log, "Lowered query cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setQueryCache(query_cache_max_size_in_bytes, query_cache_max_entries, query_cache_query_cache_max_entry_size_in_bytes, query_cache_max_entry_size_in_rows);
#if USE_EMBEDDED_COMPILER
size_t compiled_expression_cache_max_size_in_bytes = config().getUInt64("compiled_expression_cache_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE);
size_t compiled_expression_cache_max_elements = config().getUInt64("compiled_expression_cache_elements_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES);
CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_max_size_in_bytes, compiled_expression_cache_max_elements);
#endif
/// Initialize main config reloader.
std::string include_from_path = config().getString("include_from", "/etc/metrika.xml");
@ -1324,7 +1387,14 @@ try
global_context->updateStorageConfiguration(*config);
global_context->updateInterserverCredentials(*config);
global_context->updateUncompressedCacheConfiguration(*config);
global_context->updateMarkCacheConfiguration(*config);
global_context->updateIndexUncompressedCacheConfiguration(*config);
global_context->updateIndexMarkCacheConfiguration(*config);
global_context->updateMMappedFileCacheConfiguration(*config);
global_context->updateQueryCacheConfiguration(*config);
CompressionCodecEncrypted::Configuration::instance().tryLoad(*config, "encryption_codecs");
#if USE_SSL
CertificateReloader::instance().tryLoad(*config);
@ -1484,19 +1554,6 @@ try
/// Limit on total number of concurrently executed queries.
global_context->getProcessList().setMaxSize(server_settings.max_concurrent_queries);
/// Set up caches.
const size_t max_cache_size = static_cast<size_t>(physical_server_memory * server_settings.cache_size_to_ram_max_ratio);
String uncompressed_cache_policy = server_settings.uncompressed_cache_policy;
size_t uncompressed_cache_size = server_settings.uncompressed_cache_size;
if (uncompressed_cache_size > max_cache_size)
{
uncompressed_cache_size = max_cache_size;
LOG_INFO(log, "Lowered uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size);
/// Load global settings from default_profile and system_profile.
global_context->setDefaultProfiles(config());
@ -1512,61 +1569,6 @@ try
server_settings.async_insert_queue_flush_on_shutdown));
}
String mark_cache_policy = server_settings.mark_cache_policy;
size_t mark_cache_size = server_settings.mark_cache_size;
if (!mark_cache_size)
LOG_ERROR(log, "Too low mark cache size will lead to severe performance degradation.");
if (mark_cache_size > max_cache_size)
{
mark_cache_size = max_cache_size;
LOG_INFO(log, "Lowered mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(mark_cache_size));
}
global_context->setMarkCache(mark_cache_policy, mark_cache_size);
size_t index_uncompressed_cache_size = server_settings.index_uncompressed_cache_size;
if (index_uncompressed_cache_size > max_cache_size)
{
index_uncompressed_cache_size = max_cache_size;
LOG_INFO(log, "Lowered index uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
if (index_uncompressed_cache_size)
global_context->setIndexUncompressedCache(server_settings.index_uncompressed_cache_size);
size_t index_mark_cache_size = server_settings.index_mark_cache_size;
if (index_mark_cache_size > max_cache_size)
{
index_mark_cache_size = max_cache_size;
LOG_INFO(log, "Lowered index mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
if (index_mark_cache_size)
global_context->setIndexMarkCache(server_settings.index_mark_cache_size);
size_t mmap_cache_size = server_settings.mmap_cache_size;
if (mmap_cache_size > max_cache_size)
{
mmap_cache_size = max_cache_size;
LOG_INFO(log, "Lowered mmap file cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
if (mmap_cache_size)
global_context->setMMappedFileCache(server_settings.mmap_cache_size);
size_t query_cache_max_size_in_bytes = config().getUInt64("query_cache.max_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_SIZE);
size_t query_cache_max_entries = config().getUInt64("query_cache.max_entries", DEFAULT_QUERY_CACHE_MAX_ENTRIES);
size_t query_cache_query_cache_max_entry_size_in_bytes = config().getUInt64("query_cache.max_entry_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_BYTES);
size_t query_cache_max_entry_size_in_rows = config().getUInt64("query_cache.max_entry_rows_in_rows", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_ROWS);
if (query_cache_max_size_in_bytes > max_cache_size)
{
query_cache_max_size_in_bytes = max_cache_size;
LOG_INFO(log, "Lowered query cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setQueryCache(query_cache_max_size_in_bytes, query_cache_max_entries, query_cache_query_cache_max_entry_size_in_bytes, query_cache_max_entry_size_in_rows);
#if USE_EMBEDDED_COMPILER
size_t compiled_expression_cache_max_size_in_bytes = config().getUInt64("compiled_expression_cache_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE);
size_t compiled_expression_cache_max_elements = config().getUInt64("compiled_expression_cache_elements_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES);
CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_max_size_in_bytes, compiled_expression_cache_max_elements);
#endif
/// Set path for format schema files
fs::path format_schema_path(config().getString("format_schema_path", path / "format_schemas/"));
global_context->setFormatSchemaPath(format_schema_path);
@ -2072,6 +2074,9 @@ void Server::createServers(
for (const auto & protocol : protocols)
{
if (!server_type.shouldStart(ServerType::Type::CUSTOM, protocol))
continue;
std::string prefix = "protocols." + protocol + ".";
std::string port_name = prefix + "port";
std::string description {"<undefined> protocol"};
@ -2081,9 +2086,6 @@ void Server::createServers(
if (!config.has(prefix + "port"))
continue;
if (!server_type.shouldStart(ServerType::Type::CUSTOM, port_name))
continue;
std::vector<std::string> hosts;
if (config.has(prefix + "host"))
hosts.push_back(config.getString(prefix + "host"));

View File

@ -11,6 +11,7 @@
--background: linear-gradient(to bottom, #00CCFF, #00D0D0);
--chart-background: white;
--shadow-color: rgba(0, 0, 0, 0.25);
--moving-shadow-color: rgba(0, 0, 0, 0.5);
--input-shadow-color: rgba(0, 255, 0, 1);
--error-color: red;
--auth-error-color: white;
@ -34,6 +35,7 @@
--background: #151C2C;
--chart-background: #1b2834;
--shadow-color: rgba(0, 0, 0, 0);
--moving-shadow-color: rgba(255, 255, 255, 0.25);
--input-shadow-color: rgba(255, 128, 0, 0.25);
--error-color: #F66;
--legend-background: rgba(255, 255, 255, 0.25);
@ -91,6 +93,21 @@
position: relative;
}
.chart-maximized {
flex: 1 100%;
height: 75vh
}
.chart-moving {
z-index: 11;
box-shadow: 0 0 2rem var(--moving-shadow-color);
}
.chart-displaced {
opacity: 75%;
filter: blur(1px);
}
.chart div { position: absolute; }
.inputs {
@ -303,6 +320,7 @@
}
.chart-buttons a {
margin-right: 0.25rem;
user-select: none;
}
.chart-buttons a:hover {
color: var(--chart-button-hover-color);
@ -454,11 +472,13 @@
let host = 'https://play.clickhouse.com/';
let user = 'explorer';
let password = '';
let add_http_cors_header = true;
/// If it is hosted on server, assume that it is the address of ClickHouse.
if (location.protocol != 'file:') {
host = location.origin;
user = 'default';
add_http_cors_header = false;
}
const errorCodeMessageMap = {
@ -793,6 +813,92 @@ function insertChart(i) {
let edit_buttons = document.createElement('div');
edit_buttons.className = 'chart-buttons';
let move = document.createElement('a');
let move_text = document.createTextNode('✥');
move.appendChild(move_text);
let is_dragging = false;
move.addEventListener('mousedown', e => {
const idx = getCurrentIndex();
is_dragging = true;
chart.className = 'chart chart-moving';
let offset_x = e.clientX;
let offset_y = e.clientY;
let displace_idx = null;
let displace_chart = null;
function mouseup(e) {
is_dragging = false;
chart.className = 'chart';
chart.style.left = null;
chart.style.top = null;
if (displace_idx !== null) {
const elem = queries[idx];
queries.splice(idx, 1);
queries.splice(displace_idx, 0, elem);
displace_chart.className = 'chart';
drawAll();
}
}
function mousemove(e) {
if (!is_dragging) {
document.body.removeEventListener('mousemove', mousemove);
document.body.removeEventListener('mouseup', mouseup);
return;
}
let x = e.clientX - offset_x;
let y = e.clientY - offset_y;
chart.style.left = `${x}px`;
chart.style.top = `${y}px`;
displace_idx = null;
displace_chart = null;
let current_idx = -1;
for (const elem of charts.querySelectorAll('.chart')) {
++current_idx;
if (current_idx == idx) {
continue;
}
const this_rect = chart.getBoundingClientRect();
const this_center_x = this_rect.left + this_rect.width / 2;
const this_center_y = this_rect.top + this_rect.height / 2;
const elem_rect = elem.getBoundingClientRect();
if (this_center_x >= elem_rect.left && this_center_x <= elem_rect.right
&& this_center_y >= elem_rect.top && this_center_y <= elem_rect.bottom) {
elem.className = 'chart chart-displaced';
displace_idx = current_idx;
displace_chart = elem;
} else {
elem.className = 'chart';
}
}
}
document.body.addEventListener('mouseup', mouseup);
document.body.addEventListener('mousemove', mousemove);
});
let maximize = document.createElement('a');
let maximize_text = document.createTextNode('🗖');
maximize.appendChild(maximize_text);
maximize.addEventListener('click', e => {
const idx = getCurrentIndex();
chart.className = (chart.className == 'chart' ? 'chart chart-maximized' : 'chart');
resize();
});
let edit = document.createElement('a');
let edit_text = document.createTextNode('✎');
edit.appendChild(edit_text);
@ -825,6 +931,8 @@ function insertChart(i) {
saveState();
});
edit_buttons.appendChild(move);
edit_buttons.appendChild(maximize);
edit_buttons.appendChild(edit);
edit_buttons.appendChild(trash);
@ -962,8 +1070,6 @@ function legendAsTooltipPlugin({ className, style = { background: "var(--legend-
};
}
let add_http_cors_header = false;
async function draw(idx, chart, url_params, query) {
if (plots[idx]) {
plots[idx].destroy();

View File

@ -46,7 +46,7 @@ void MultipleAccessStorage::setStorages(const std::vector<StoragePtr> & storages
{
std::lock_guard lock{mutex};
nested_storages = std::make_shared<const Storages>(storages);
ids_cache.reset();
ids_cache.clear();
}
void MultipleAccessStorage::addStorage(const StoragePtr & new_storage)
@ -69,7 +69,7 @@ void MultipleAccessStorage::removeStorage(const StoragePtr & storage_to_remove)
auto new_storages = std::make_shared<Storages>(*nested_storages);
new_storages->erase(new_storages->begin() + index);
nested_storages = new_storages;
ids_cache.reset();
ids_cache.clear();
}
std::vector<StoragePtr> MultipleAccessStorage::getStorages()

View File

@ -11,6 +11,7 @@
#include <Common/Config/ConfigReloader.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/quoteString.h>
#include <Common/TransformEndianness.hpp>
#include <Core/Settings.h>
#include <Interpreters/executeQuery.h>
#include <Parsers/Access/ASTGrantQuery.h>
@ -49,6 +50,7 @@ namespace
md5.update(type_storage_chars, strlen(type_storage_chars));
UUID result;
memcpy(&result, md5.digest().data(), md5.digestLength());
transformEndianness<std::endian::native, std::endian::little>(result);
return result;
}

View File

@ -109,7 +109,7 @@ public:
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
writeBinary(this->data(place).numerator, buf);
writeBinaryLittleEndian(this->data(place).numerator, buf);
if constexpr (std::is_unsigned_v<Denominator>)
writeVarUInt(this->data(place).denominator, buf);
@ -119,7 +119,7 @@ public:
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
{
readBinary(this->data(place).numerator, buf);
readBinaryLittleEndian(this->data(place).numerator, buf);
if constexpr (std::is_unsigned_v<Denominator>)
readVarUInt(this->data(place).denominator, buf);

View File

@ -100,6 +100,17 @@ void AggregateFunctionBoundingRatioData::deserialize(ReadBuffer & buf)
}
}
inline void writeBinary(const AggregateFunctionBoundingRatioData::Point & p, WriteBuffer & buf)
{
writePODBinary(p, buf);
}
inline void readBinary(AggregateFunctionBoundingRatioData::Point & p, ReadBuffer & buf)
{
readPODBinary(p, buf);
}
class AggregateFunctionBoundingRatio final : public IAggregateFunctionDataHelper<AggregateFunctionBoundingRatioData, AggregateFunctionBoundingRatio>
{
private:

View File

@ -1,28 +1,26 @@
include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake")
add_headers_and_sources(clickhouse_aggregate_functions .)
list(REMOVE_ITEM clickhouse_aggregate_functions_sources
extract_into_parent_list(clickhouse_aggregate_functions_sources dbms_sources
IAggregateFunction.cpp
AggregateFunctionFactory.cpp
AggregateFunctionCombinatorFactory.cpp
AggregateFunctionCount.cpp
AggregateFunctionState.cpp
AggregateFunctionCount.cpp
parseAggregateFunctionParameters.cpp
FactoryHelpers.cpp
)
list(REMOVE_ITEM clickhouse_aggregate_functions_headers
extract_into_parent_list(clickhouse_aggregate_functions_headers dbms_headers
IAggregateFunction.h
IAggregateFunctionCombinator.h
AggregateFunctionFactory.h
AggregateFunctionCombinatorFactory.h
AggregateFunctionCount.h
AggregateFunctionState.h
parseAggregateFunctionParameters.h
AggregateFunctionCount.cpp
FactoryHelpers.h
parseAggregateFunctionParameters.h
)
add_library(clickhouse_aggregate_functions ${clickhouse_aggregate_functions_sources})
add_library(clickhouse_aggregate_functions ${clickhouse_aggregate_functions_headers} ${clickhouse_aggregate_functions_sources})
target_link_libraries(clickhouse_aggregate_functions PRIVATE dbms PUBLIC ch_contrib::cityhash)
if(ENABLE_EXAMPLES)

View File

@ -783,6 +783,16 @@ public:
for (size_t i = 0; i < size; ++i)
result[i] = std::numeric_limits<float>::quiet_NaN();
}
friend void writeBinary(const Kind & x, WriteBuffer & buf)
{
writePODBinary(x, buf);
}
friend void readBinary(Kind & x, ReadBuffer & buf)
{
readPODBinary(x, buf);
}
};
#undef SMALL_THRESHOLD

View File

@ -276,3 +276,12 @@ private:
return NanLikeValueConstructor<ResultType, std::is_floating_point_v<ResultType>>::getValue();
}
};
namespace DB
{
template <typename T>
void readBinary(std::pair<T, UInt32> & x, ReadBuffer & buf)
{
readPODBinary(x, buf);
}
}

View File

@ -375,7 +375,7 @@ void BackupImpl::readBackupMetadata()
if (!archive_reader->fileExists(".backup"))
throw Exception(ErrorCodes::BACKUP_NOT_FOUND, "Archive {} is not a backup", backup_name_for_logging);
setCompressedSize();
in = archive_reader->readFile(".backup");
in = archive_reader->readFile(".backup", /*throw_on_not_found=*/true);
}
else
{
@ -685,7 +685,7 @@ std::unique_ptr<SeekableReadBuffer> BackupImpl::readFileImpl(const SizeAndChecks
{
/// Make `read_buffer` if there is data for this backup entry in this backup.
if (use_archive)
read_buffer = archive_reader->readFile(info.data_file_name);
read_buffer = archive_reader->readFile(info.data_file_name, /*throw_on_not_found=*/true);
else
read_buffer = reader->readFile(info.data_file_name);
}

View File

@ -563,8 +563,13 @@ void BackupsWorker::writeBackupEntries(BackupMutablePtr backup, BackupEntries &&
}
};
if (always_single_threaded || !backups_thread_pool->trySchedule([job] { job(true); }))
if (always_single_threaded)
{
job(false);
continue;
}
backups_thread_pool->scheduleOrThrowOnError([job] { job(true); });
}
{
@ -854,8 +859,7 @@ void BackupsWorker::restoreTablesData(const OperationID & restore_id, BackupPtr
}
};
if (!thread_pool.trySchedule([job] { job(true); }))
job(false);
thread_pool.scheduleOrThrowOnError([job] { job(true); });
}
{

View File

@ -49,6 +49,8 @@ else()
add_definitions(-DENABLE_MULTITARGET_CODE=0)
endif()
set(dbms_headers)
set(dbms_sources)
add_subdirectory (Access)
add_subdirectory (Backups)
@ -78,10 +80,6 @@ add_subdirectory (Daemon)
add_subdirectory (Loggers)
add_subdirectory (Formats)
set(dbms_headers)
set(dbms_sources)
add_headers_and_sources(clickhouse_common_io Common)
add_headers_and_sources(clickhouse_common_io Common/HashTable)
add_headers_and_sources(clickhouse_common_io IO)
@ -151,47 +149,7 @@ else()
message(STATUS "StorageFileLog is only supported on Linux")
endif ()
list (APPEND clickhouse_common_io_sources ${CONFIG_INCLUDE_PATH}/config_version.cpp)
list (APPEND dbms_sources Functions/IFunction.cpp Functions/FunctionFactory.cpp Functions/FunctionHelpers.cpp Functions/extractTimeZoneFromFunctionArguments.cpp Functions/FunctionsLogical.cpp Functions/indexHint.cpp)
list (APPEND dbms_headers Functions/IFunction.h Functions/FunctionFactory.h Functions/FunctionHelpers.h Functions/extractTimeZoneFromFunctionArguments.h Functions/FunctionsLogical.h Functions/indexHint.h)
list (APPEND dbms_sources
AggregateFunctions/IAggregateFunction.cpp
AggregateFunctions/AggregateFunctionFactory.cpp
AggregateFunctions/AggregateFunctionCombinatorFactory.cpp
AggregateFunctions/AggregateFunctionState.cpp
AggregateFunctions/AggregateFunctionCount.cpp
AggregateFunctions/parseAggregateFunctionParameters.cpp)
list (APPEND dbms_headers
AggregateFunctions/IAggregateFunction.h
AggregateFunctions/IAggregateFunctionCombinator.h
AggregateFunctions/AggregateFunctionFactory.h
AggregateFunctions/AggregateFunctionCombinatorFactory.h
AggregateFunctions/AggregateFunctionState.h
AggregateFunctions/AggregateFunctionCount.cpp
AggregateFunctions/FactoryHelpers.h
AggregateFunctions/parseAggregateFunctionParameters.h)
list (APPEND dbms_sources
TableFunctions/ITableFunction.cpp
TableFunctions/TableFunctionView.cpp
TableFunctions/TableFunctionFactory.cpp)
list (APPEND dbms_headers
TableFunctions/ITableFunction.h
TableFunctions/TableFunctionView.h
TableFunctions/TableFunctionFactory.h)
list (APPEND dbms_sources
Dictionaries/DictionaryFactory.cpp
Dictionaries/DictionarySourceFactory.cpp
Dictionaries/DictionaryStructure.cpp
Dictionaries/getDictionaryConfigurationFromAST.cpp)
list (APPEND dbms_headers
Dictionaries/DictionaryFactory.h
Dictionaries/DictionarySourceFactory.h
Dictionaries/DictionaryStructure.h
Dictionaries/getDictionaryConfigurationFromAST.h)
list(APPEND clickhouse_common_io_sources ${CONFIG_INCLUDE_PATH}/config_version.cpp)
if (NOT ENABLE_SSL)
list (REMOVE_ITEM clickhouse_common_io_sources Common/OpenSSLHelpers.cpp)
@ -599,6 +557,10 @@ if (TARGET ch_contrib::annoy)
dbms_target_link_libraries(PUBLIC ch_contrib::annoy)
endif()
if (TARGET ch_contrib::usearch)
dbms_target_link_libraries(PUBLIC ch_contrib::usearch)
endif()
if (TARGET ch_rust::skim)
dbms_target_include_directories(PRIVATE $<TARGET_PROPERTY:ch_rust::skim,INTERFACE_INCLUDE_DIRECTORIES>)
dbms_target_link_libraries(PUBLIC ch_rust::skim)

View File

@ -865,10 +865,14 @@ ColumnPtr ColumnNullable::getNestedColumnWithDefaultOnNull() const
if (next_null_index != start)
res->insertRangeFrom(*nested_column, start, next_null_index - start);
if (next_null_index < end)
res->insertDefault();
size_t next_none_null_index = next_null_index;
while (next_none_null_index < end && null_map_data[next_none_null_index])
++next_none_null_index;
start = next_null_index + 1;
if (next_null_index != next_none_null_index)
res->insertManyDefaults(next_none_null_index - next_null_index);
start = next_none_null_index;
}
return res;
}

View File

@ -151,7 +151,7 @@ public:
std::lock_guard cache_lock(mutex);
/// Insert the new value only if the token is still in present in insert_tokens.
/// (The token may be absent because of a concurrent reset() call).
/// (The token may be absent because of a concurrent clear() call).
bool result = false;
auto token_it = insert_tokens.find(key);
if (token_it != insert_tokens.end() && token_it->second.get() == token)
@ -179,13 +179,13 @@ public:
return cache_policy->dump();
}
void reset()
void clear()
{
std::lock_guard lock(mutex);
insert_tokens.clear();
hits = 0;
misses = 0;
cache_policy->reset(lock);
cache_policy->clear(lock);
}
void remove(const Key & key)

View File

@ -270,8 +270,8 @@ std::unordered_set<String> DNSResolver::reverseResolve(const Poco::Net::IPAddres
void DNSResolver::dropCache()
{
impl->cache_host.reset();
impl->cache_address.reset();
impl->cache_host.clear();
impl->cache_address.clear();
std::scoped_lock lock(impl->update_mutex, impl->drop_mutex);

View File

@ -33,6 +33,7 @@ static struct InitFiu
#define APPLY_FOR_FAILPOINTS(ONCE, REGULAR, PAUSEABLE_ONCE, PAUSEABLE) \
ONCE(replicated_merge_tree_commit_zk_fail_after_op) \
REGULAR(use_delayed_remote_source) \
REGULAR(dummy_failpoint) \
PAUSEABLE_ONCE(dummy_pausable_failpoint_once) \
PAUSEABLE(dummy_pausable_failpoint)

View File

@ -20,7 +20,7 @@ template <typename T>
static inline void writeQuoted(const DecimalField<T> & x, WriteBuffer & buf)
{
writeChar('\'', buf);
writeText(x.getValue(), x.getScale(), buf, {});
writeText(x.getValue(), x.getScale(), buf, /* trailing_zeros */ true);
writeChar('\'', buf);
}

View File

@ -2,9 +2,10 @@
#include <city.h>
#include <Core/Types.h>
#include <Core/UUID.h>
#include <base/StringRef.h>
#include <base/types.h>
#include <base/unaligned.h>
#include <base/StringRef.h>
#include <type_traits>
@ -406,7 +407,7 @@ struct UInt128TrivialHash
struct UUIDTrivialHash
{
size_t operator()(DB::UUID x) const { return x.toUnderType().items[0]; }
size_t operator()(DB::UUID x) const { return DB::UUIDHelpers::getHighBytes(x); }
};
struct UInt256Hash

View File

@ -201,11 +201,11 @@ struct HashTableCell
void setMapped(const value_type & /*value*/) {}
/// Serialization, in binary and text form.
void write(DB::WriteBuffer & wb) const { DB::writeBinary(key, wb); }
void write(DB::WriteBuffer & wb) const { DB::writeBinaryLittleEndian(key, wb); }
void writeText(DB::WriteBuffer & wb) const { DB::writeDoubleQuoted(key, wb); }
/// Deserialization, in binary and text form.
void read(DB::ReadBuffer & rb) { DB::readBinary(key, rb); }
void read(DB::ReadBuffer & rb) { DB::readBinaryLittleEndian(key, rb); }
void readText(DB::ReadBuffer & rb) { DB::readDoubleQuoted(key, rb); }
/// When cell pointer is moved during erase, reinsert or resize operations

View File

@ -10,11 +10,6 @@
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
template <typename T>
struct EqualWeightFunction
{
@ -46,8 +41,8 @@ public:
virtual size_t count(std::lock_guard<std::mutex> & /*cache_lock*/) const = 0;
virtual size_t maxSize(std::lock_guard<std::mutex>& /*cache_lock*/) const = 0;
virtual void setMaxCount(size_t /*max_count*/, std::lock_guard<std::mutex> & /* cache_lock */) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for cache policy"); }
virtual void setMaxSize(size_t /*max_size_in_bytes*/, std::lock_guard<std::mutex> & /* cache_lock */) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for cache policy"); }
virtual void setMaxCount(size_t /*max_count*/, std::lock_guard<std::mutex> & /* cache_lock */) = 0;
virtual void setMaxSize(size_t /*max_size_in_bytes*/, std::lock_guard<std::mutex> & /* cache_lock */) = 0;
virtual void setQuotaForUser(const String & user_name, size_t max_size_in_bytes, size_t max_entries, std::lock_guard<std::mutex> & /*cache_lock*/) { user_quotas->setQuotaForUser(user_name, max_size_in_bytes, max_entries); }
/// HashFunction usually hashes the entire key and the found key will be equal the provided key. In such cases, use get(). It is also
@ -60,7 +55,7 @@ public:
virtual void remove(const Key & key, std::lock_guard<std::mutex> & /*cache_lock*/) = 0;
virtual void reset(std::lock_guard<std::mutex> & /*cache_lock*/) = 0;
virtual void clear(std::lock_guard<std::mutex> & /*cache_lock*/) = 0;
virtual std::vector<KeyMapped> dump() const = 0;
protected:

View File

@ -7,9 +7,8 @@
namespace DB
{
/// Cache policy LRU evicts entries which are not used for a long time.
/// WeightFunction is a functor that takes Mapped as a parameter and returns "weight" (approximate size)
/// of that value.
/// Cache policy LRU evicts entries which are not used for a long time. Also see cache policy SLRU for reference.
/// WeightFunction is a functor that takes Mapped as a parameter and returns "weight" (approximate size) of that value.
/// Cache starts to evict entries when their total weight exceeds max_size_in_bytes.
/// Value weight should not change after insertion.
/// To work with the thread-safe implementation of this class use a class "CacheBase" with first parameter "LRU"
@ -24,11 +23,12 @@ public:
using typename Base::OnWeightLossFunction;
/** Initialize LRUCachePolicy with max_size_in_bytes and max_count.
* max_size_in_bytes == 0 means the cache accepts no entries.
* max_count == 0 means no elements size restrictions.
*/
LRUCachePolicy(size_t max_size_in_bytes_, size_t max_count_, OnWeightLossFunction on_weight_loss_function_)
: Base(std::make_unique<NoCachePolicyUserQuota>())
, max_size_in_bytes(std::max(1uz, max_size_in_bytes_))
, max_size_in_bytes(max_size_in_bytes_)
, max_count(max_count_)
, on_weight_loss_function(on_weight_loss_function_)
{
@ -49,7 +49,19 @@ public:
return max_size_in_bytes;
}
void reset(std::lock_guard<std::mutex> & /* cache_lock */) override
void setMaxCount(size_t max_count_, std::lock_guard<std::mutex> & /* cache_lock */) override
{
max_count = max_count_;
removeOverflow();
}
void setMaxSize(size_t max_size_in_bytes_, std::lock_guard<std::mutex> & /* cache_lock */) override
{
max_size_in_bytes = max_size_in_bytes_;
removeOverflow();
}
void clear(std::lock_guard<std::mutex> & /* cache_lock */) override
{
queue.clear();
cells.clear();
@ -155,8 +167,8 @@ private:
/// Total weight of values.
size_t current_size_in_bytes = 0;
const size_t max_size_in_bytes;
const size_t max_count;
size_t max_size_in_bytes;
size_t max_count;
WeightFunction weight_function;
OnWeightLossFunction on_weight_loss_function;
@ -172,10 +184,7 @@ private:
auto it = cells.find(key);
if (it == cells.end())
{
// Queue became inconsistent
abort();
}
std::terminate(); // Queue became inconsistent
const auto & cell = it->second;
@ -190,10 +199,7 @@ private:
on_weight_loss_function(current_weight_lost);
if (current_size_in_bytes > (1ull << 63))
{
// Queue became inconsistent
abort();
}
std::terminate(); // Queue became inconsistent
}
};

View File

@ -5,6 +5,7 @@
#include <Common/Exception.h>
#include <base/hex.h>
#include <Core/Settings.h>
#include <Core/UUID.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
@ -227,8 +228,8 @@ bool TracingContext::parseTraceparentHeader(std::string_view traceparent, String
++data;
this->trace_flags = unhex2(data);
this->trace_id.toUnderType().items[0] = trace_id_higher_64;
this->trace_id.toUnderType().items[1] = trace_id_lower_64;
UUIDHelpers::getHighBytes(this->trace_id) = trace_id_higher_64;
UUIDHelpers::getLowBytes(this->trace_id) = trace_id_lower_64;
this->span_id = span_id_64;
return true;
}
@ -239,8 +240,8 @@ String TracingContext::composeTraceparentHeader() const
// parent id.
return fmt::format(
"00-{:016x}{:016x}-{:016x}-{:02x}",
trace_id.toUnderType().items[0],
trace_id.toUnderType().items[1],
UUIDHelpers::getHighBytes(trace_id),
UUIDHelpers::getLowBytes(trace_id),
span_id,
// This cast is needed because fmt is being weird and complaining that
// "mixing character types is not allowed".
@ -335,8 +336,8 @@ TracingContextHolder::TracingContextHolder(
while (_parent_trace_context.trace_id == UUID())
{
// Make sure the random generated trace_id is not 0 which is an invalid id.
_parent_trace_context.trace_id.toUnderType().items[0] = thread_local_rng();
_parent_trace_context.trace_id.toUnderType().items[1] = thread_local_rng();
UUIDHelpers::getHighBytes(_parent_trace_context.trace_id) = thread_local_rng();
UUIDHelpers::getLowBytes(_parent_trace_context.trace_id) = thread_local_rng();
}
_parent_trace_context.span_id = 0;
}

View File

@ -9,9 +9,8 @@ namespace DB
{
/// Cache policy SLRU evicts entries which were used only once and are not used for a long time,
/// this policy protects entries which were used more then once from a sequential scan.
/// WeightFunction is a functor that takes Mapped as a parameter and returns "weight" (approximate size)
/// of that value.
/// this policy protects entries which were used more then once from a sequential scan. Also see cache policy LRU for reference.
/// WeightFunction is a functor that takes Mapped as a parameter and returns "weight" (approximate size) of that value.
/// Cache starts to evict entries when their total weight exceeds max_size_in_bytes.
/// Value weight should not change after insertion.
/// To work with the thread-safe implementation of this class use a class "CacheBase" with first parameter "SLRU"
@ -30,8 +29,9 @@ public:
* max_protected_size == 0 means that the default protected size is equal to half of the total max size.
*/
/// TODO: construct from special struct with cache policy parameters (also with max_protected_size).
SLRUCachePolicy(size_t max_size_in_bytes_, size_t max_count_, double size_ratio, OnWeightLossFunction on_weight_loss_function_)
SLRUCachePolicy(size_t max_size_in_bytes_, size_t max_count_, double size_ratio_, OnWeightLossFunction on_weight_loss_function_)
: Base(std::make_unique<NoCachePolicyUserQuota>())
, size_ratio(size_ratio_)
, max_protected_size(static_cast<size_t>(max_size_in_bytes_ * std::min(1.0, size_ratio)))
, max_size_in_bytes(max_size_in_bytes_)
, max_count(max_count_)
@ -54,7 +54,22 @@ public:
return max_size_in_bytes;
}
void reset(std::lock_guard<std::mutex> & /* cache_lock */) override
void setMaxCount(size_t max_count_, std::lock_guard<std::mutex> & /* cache_lock */) override
{
max_count = max_count_;
removeOverflow(protected_queue, max_protected_size, current_protected_size, /*is_protected=*/true);
removeOverflow(probationary_queue, max_size_in_bytes, current_size_in_bytes, /*is_protected=*/false);
}
void setMaxSize(size_t max_size_in_bytes_, std::lock_guard<std::mutex> & /* cache_lock */) override
{
max_protected_size = static_cast<size_t>(max_size_in_bytes_ * std::min(1.0, size_ratio));
max_size_in_bytes = max_size_in_bytes_;
removeOverflow(protected_queue, max_protected_size, current_protected_size, /*is_protected=*/true);
removeOverflow(probationary_queue, max_size_in_bytes, current_size_in_bytes, /*is_protected=*/false);
}
void clear(std::lock_guard<std::mutex> & /* cache_lock */) override
{
cells.clear();
probationary_queue.clear();
@ -68,12 +83,13 @@ public:
auto it = cells.find(key);
if (it == cells.end())
return;
auto & cell = it->second;
current_size_in_bytes -= cell.size;
if (cell.is_protected)
{
current_protected_size -= cell.size;
}
auto & queue = cell.is_protected ? protected_queue : probationary_queue;
queue.erase(cell.queue_iterator);
cells.erase(it);
@ -192,16 +208,17 @@ private:
Cells cells;
const double size_ratio;
size_t current_protected_size = 0;
size_t current_size_in_bytes = 0;
const size_t max_protected_size;
const size_t max_size_in_bytes;
const size_t max_count;
size_t max_protected_size;
size_t max_size_in_bytes;
size_t max_count;
WeightFunction weight_function;
OnWeightLossFunction on_weight_loss_function;
void removeOverflow(SLRUQueue & queue, const size_t max_weight_size, size_t & current_weight_size, bool is_protected)
void removeOverflow(SLRUQueue & queue, size_t max_weight_size, size_t & current_weight_size, bool is_protected)
{
size_t current_weight_lost = 0;
size_t queue_size = queue.size();
@ -223,8 +240,7 @@ private:
{
need_remove = [&]()
{
return ((max_count != 0 && cells.size() > max_count)
|| (current_weight_size > max_weight_size)) && (queue_size > 0);
return ((max_count != 0 && cells.size() > max_count) || (current_weight_size > max_weight_size)) && (queue_size > 0);
};
}
@ -234,10 +250,7 @@ private:
auto it = cells.find(key);
if (it == cells.end())
{
// Queue became inconsistent
abort();
}
std::terminate(); // Queue became inconsistent
auto & cell = it->second;
@ -262,10 +275,7 @@ private:
on_weight_loss_function(current_weight_lost);
if (current_size_in_bytes > (1ull << 63))
{
// Queue became inconsistent
abort();
}
std::terminate(); // Queue became inconsistent
}
};

View File

@ -101,6 +101,12 @@ bool ShellCommand::tryWaitProcessWithTimeout(size_t timeout_in_seconds)
out.close();
err.close();
for (auto & [_, fd] : write_fds)
fd.close();
for (auto & [_, fd] : read_fds)
fd.close();
return waitForPid(pid, timeout_in_seconds);
}
@ -287,6 +293,12 @@ int ShellCommand::tryWait()
out.close();
err.close();
for (auto & [_, fd] : write_fds)
fd.close();
for (auto & [_, fd] : read_fds)
fd.close();
LOG_TRACE(getLogger(), "Will wait for shell command pid {}", pid);
int status = 0;

View File

@ -121,7 +121,7 @@ public:
max_size_in_bytes = max_size_in_bytes_;
}
void reset(std::lock_guard<std::mutex> & /* cache_lock */) override
void clear(std::lock_guard<std::mutex> & /* cache_lock */) override
{
cache.clear();
}

View File

@ -2,6 +2,7 @@
#include <optional>
#include <base/types.h>
#include <base/simd.h>
#include <Common/BitHelpers.h>
#include <Poco/UTF8Encoding.h>
@ -72,16 +73,13 @@ inline size_t countCodePoints(const UInt8 * data, size_t size)
res += __builtin_popcount(_mm_movemask_epi8(
_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(data)), threshold)));
#elif defined(__aarch64__) && defined(__ARM_NEON)
/// Returns a 64 bit mask of nibbles (4 bits for each byte).
auto get_nibble_mask
= [](uint8x16_t input) -> uint64_t { return vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(input), 4)), 0); };
constexpr auto bytes_sse = 16;
const auto * src_end_sse = data + size / bytes_sse * bytes_sse;
const auto threshold = vdupq_n_s8(0xBF);
for (; data < src_end_sse; data += bytes_sse)
res += std::popcount(get_nibble_mask(vcgtq_s8(vld1q_s8(reinterpret_cast<const int8_t *>(data)), threshold)));
res += std::popcount(getNibbleMask(vcgtq_s8(vld1q_s8(reinterpret_cast<const int8_t *>(data)), threshold)));
res >>= 2;
#endif

View File

@ -4,6 +4,8 @@
#include <bit>
#include <cstdint>
#include <base/simd.h>
#include <Core/Defines.h>
@ -504,11 +506,6 @@ inline bool memoryIsZeroSmallAllowOverflow15(const void * data, size_t size)
# include <arm_neon.h>
# pragma clang diagnostic ignored "-Wreserved-identifier"
inline uint64_t getNibbleMask(uint8x16_t res)
{
return vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(res), 4)), 0);
}
template <typename Char>
inline int memcmpSmallAllowOverflow15(const Char * a, size_t a_size, const Char * b, size_t b_size)
{

View File

@ -92,7 +92,7 @@ TEST(SLRUCache, removeFromProtected)
ASSERT_TRUE(value == nullptr);
}
TEST(SLRUCache, reset)
TEST(SLRUCache, clear)
{
using SimpleCacheBase = DB::CacheBase<int, int>;
auto slru_cache = SimpleCacheBase("SLRU", /*max_size_in_bytes=*/10, /*max_count=*/0, /*size_ratio*/0.5);
@ -101,7 +101,7 @@ TEST(SLRUCache, reset)
slru_cache.set(2, std::make_shared<int>(4)); /// add to protected_queue
slru_cache.reset();
slru_cache.clear();
auto value = slru_cache.get(1);
ASSERT_TRUE(value == nullptr);

View File

@ -73,8 +73,8 @@ void compressDataForType(const char * source, UInt32 source_size, char * dest)
const char * const source_end = source + source_size;
while (source < source_end)
{
T curr_src = unalignedLoad<T>(source);
unalignedStore<T>(dest, curr_src - prev_src);
T curr_src = unalignedLoadLittleEndian<T>(source);
unalignedStoreLittleEndian<T>(dest, curr_src - prev_src);
prev_src = curr_src;
source += sizeof(T);
@ -94,10 +94,10 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest,
const char * const source_end = source + source_size;
while (source < source_end)
{
accumulator += unalignedLoad<T>(source);
accumulator += unalignedLoadLittleEndian<T>(source);
if (dest + sizeof(accumulator) > output_end) [[unlikely]]
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress the data");
unalignedStore<T>(dest, accumulator);
unalignedStoreLittleEndian<T>(dest, accumulator);
source += sizeof(T);
dest += sizeof(T);

View File

@ -86,6 +86,37 @@ struct DataTypeDecimalTrait
}
};
/// Calculates result = x * multiplier + delta.
/// If the multiplication or the addition overflows, returns false or throws DECIMAL_OVERFLOW.
template <typename T, bool throw_on_error>
inline bool multiplyAdd(const T & x, const T & multiplier, const T & delta, T & result)
{
T multiplied = 0;
if (common::mulOverflow(x, multiplier, multiplied))
{
if constexpr (throw_on_error)
throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Decimal math overflow");
return false;
}
if (common::addOverflow(multiplied, delta, result))
{
if constexpr (throw_on_error)
throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Decimal math overflow");
return false;
}
return true;
}
template <typename T>
inline T multiplyAdd(const T & x, const T & multiplier, const T & delta)
{
T res;
multiplyAdd<T, true>(x, multiplier, delta, res);
return res;
}
/** Make a decimal value from whole and fractional components with given scale multiplier.
* where scale_multiplier = scaleMultiplier<T>(scale)
* this is to reduce number of calls to scaleMultiplier when scale is known.
@ -104,23 +135,10 @@ inline bool decimalFromComponentsWithMultiplierImpl(
{
using T = typename DecimalType::NativeType;
const auto fractional_sign = whole < 0 ? -1 : 1;
T whole_scaled = 0;
if (common::mulOverflow(whole, scale_multiplier, whole_scaled))
{
if constexpr (throw_on_error)
throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Decimal math overflow");
return false;
}
T value;
if (common::addOverflow(whole_scaled, fractional_sign * (fractional % scale_multiplier), value))
{
if constexpr (throw_on_error)
throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Decimal math overflow");
if (!multiplyAdd<T, throw_on_error>(
whole, scale_multiplier, fractional_sign * (fractional % scale_multiplier), value))
return false;
}
result = DecimalType(value);
return true;
}

View File

@ -138,7 +138,7 @@ template <typename T> bool decimalEqual(T x, T y, UInt32 x_scale, UInt32 y_scale
template <typename T> bool decimalLess(T x, T y, UInt32 x_scale, UInt32 y_scale);
template <typename T> bool decimalLessOrEqual(T x, T y, UInt32 x_scale, UInt32 y_scale);
template <typename T>
template <is_decimal T>
class DecimalField
{
public:
@ -838,7 +838,7 @@ template <> struct Field::EnumToType<Field::Types::Decimal32> { using Type = Dec
template <> struct Field::EnumToType<Field::Types::Decimal64> { using Type = DecimalField<Decimal64>; };
template <> struct Field::EnumToType<Field::Types::Decimal128> { using Type = DecimalField<Decimal128>; };
template <> struct Field::EnumToType<Field::Types::Decimal256> { using Type = DecimalField<Decimal256>; };
template <> struct Field::EnumToType<Field::Types::AggregateFunctionState> { using Type = DecimalField<AggregateFunctionStateData>; };
template <> struct Field::EnumToType<Field::Types::AggregateFunctionState> { using Type = AggregateFunctionStateData; };
template <> struct Field::EnumToType<Field::Types::CustomType> { using Type = CustomType; };
template <> struct Field::EnumToType<Field::Types::Bool> { using Type = UInt64; };

View File

@ -174,8 +174,8 @@ String GTIDSets::toPayload() const
for (const auto & set : sets)
{
// MySQL UUID is big-endian.
writeBinaryBigEndian(set.uuid.toUnderType().items[0], buffer);
writeBinaryBigEndian(set.uuid.toUnderType().items[1], buffer);
writeBinaryBigEndian(UUIDHelpers::getHighBytes(set.uuid), buffer);
writeBinaryBigEndian(UUIDHelpers::getLowBytes(set.uuid), buffer);
UInt64 intervals_size = set.intervals.size();
buffer.write(reinterpret_cast<const char *>(&intervals_size), 8);

View File

@ -940,13 +940,8 @@ namespace MySQLReplication
payload.readStrict(reinterpret_cast<char *>(&commit_flag), 1);
// MySQL UUID is big-endian.
UInt64 high = 0UL;
UInt64 low = 0UL;
readBigEndianStrict(payload, reinterpret_cast<char *>(&low), 8);
gtid.uuid.toUnderType().items[0] = low;
readBigEndianStrict(payload, reinterpret_cast<char *>(&high), 8);
gtid.uuid.toUnderType().items[1] = high;
readBinaryBigEndian(UUIDHelpers::getHighBytes(gtid.uuid), payload);
readBinaryBigEndian(UUIDHelpers::getLowBytes(gtid.uuid), payload);
payload.readStrict(reinterpret_cast<char *>(&gtid.seq_no), 8);

View File

@ -33,8 +33,10 @@ namespace MySQLReplication
inline void readBigEndianStrict(ReadBuffer & payload, char * to, size_t n)
{
payload.readStrict(to, n);
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
char *start = to, *end = to + n;
std::reverse(start, end);
#endif
}
inline void readTimeFractionalPart(ReadBuffer & payload, UInt32 & factional, UInt16 meta)

View File

@ -39,7 +39,7 @@ namespace DB
M(UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0) \
M(Int32, max_connections, 1024, "Max server connections.", 0) \
M(UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0) \
M(UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating asynchronous metrics.", 0) \
M(UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating heavy asynchronous metrics.", 0) \
M(String, default_database, "default", "Default database name.", 0) \
M(String, tmp_policy, "", "Policy for storage with temporary data.", 0) \
M(UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting., ", 0) \

View File

@ -644,7 +644,7 @@ class IColumn;
M(Bool, database_replicated_always_detach_permanently, false, "Execute DETACH TABLE as DETACH TABLE PERMANENTLY if database engine is Replicated", 0) \
M(Bool, database_replicated_allow_only_replicated_engine, false, "Allow to create only Replicated tables in database with engine Replicated", 0) \
M(Bool, database_replicated_allow_replicated_engine_arguments, true, "Allow to create only Replicated tables in database with engine Replicated with explicit arguments", 0) \
M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result", 0) \
M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result, one of: 'none', 'throw', 'null_status_on_timeout', 'never_throw'", 0) \
M(UInt64, distributed_ddl_entry_format_version, 5, "Compatibility version of distributed DDL (ON CLUSTER) queries", 0) \
\
M(UInt64, external_storage_max_read_rows, 0, "Limit maximum number of rows when table with external engine should flush history data. Now supported only for MySQL table engine, database engine, dictionary and MaterializedMySQL. If equal to 0, this setting is disabled", 0) \
@ -779,6 +779,7 @@ class IColumn;
M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \
M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \
M(Bool, allow_experimental_usearch_index, false, "Allows to use USearch index. Disabled by default because this feature is experimental", 0) \
M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \
M(Int64, annoy_index_search_k_nodes, -1, "SELECT queries search up to this many nodes in Annoy indexes.", 0) \
M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \
@ -876,8 +877,10 @@ class IColumn;
M(Bool, input_format_orc_case_insensitive_column_matching, false, "Ignore case when matching ORC columns with CH columns.", 0) \
M(Bool, input_format_parquet_case_insensitive_column_matching, false, "Ignore case when matching Parquet columns with CH columns.", 0) \
M(Bool, input_format_parquet_preserve_order, false, "Avoid reordering rows when reading from Parquet files. Usually makes it much slower.", 0) \
M(Bool, input_format_parquet_filter_push_down, true, "When reading Parquet files, skip whole row groups based on the WHERE/PREWHERE expressions and min/max statistics in the Parquet metadata.", 0) \
M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \
M(Bool, input_format_orc_allow_missing_columns, false, "Allow missing columns while reading ORC input formats", 0) \
M(Bool, input_format_orc_use_fast_decoder, true, "Use a faster ORC decoder implementation.", 0) \
M(Bool, input_format_parquet_allow_missing_columns, false, "Allow missing columns while reading Parquet input formats", 0) \
M(UInt64, input_format_parquet_local_file_min_bytes_for_seek, 8192, "Min bytes required for local read (file) to do seek, instead of read with ignore in Parquet input format", 0) \
M(Bool, input_format_arrow_allow_missing_columns, false, "Allow missing columns while reading Arrow input formats", 0) \
@ -894,6 +897,10 @@ class IColumn;
M(Bool, input_format_csv_allow_whitespace_or_tab_as_delimiter, false, "Allow to use spaces and tabs(\\t) as field delimiter in the CSV strings", 0) \
M(Bool, input_format_csv_trim_whitespaces, true, "Trims spaces and tabs (\\t) characters at the beginning and end in CSV strings", 0) \
M(Bool, input_format_csv_use_default_on_bad_values, false, "Allow to set default value to column when CSV field deserialization failed on bad value", 0) \
M(Bool, input_format_csv_allow_variable_number_of_columns, false, "Ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values", 0) \
M(Bool, input_format_tsv_allow_variable_number_of_columns, false, "Ignore extra columns in TSV input (if file has more columns than expected) and treat missing fields in TSV input as default values", 0) \
M(Bool, input_format_custom_allow_variable_number_of_columns, false, "Ignore extra columns in CustomSeparated input (if file has more columns than expected) and treat missing fields in CustomSeparated input as default values", 0) \
M(Bool, input_format_json_compact_allow_variable_number_of_columns, false, "Ignore extra columns in JSONCompact(EachRow) input (if file has more columns than expected) and treat missing fields in JSONCompact(EachRow) input as default values", 0) \
M(Bool, input_format_tsv_detect_header, true, "Automatically detect header with names and types in TSV format", 0) \
M(Bool, input_format_custom_detect_header, true, "Automatically detect header with names and types in CustomSeparated format", 0) \
M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format Parquet", 0) \
@ -1042,7 +1049,6 @@ class IColumn;
M(Bool, regexp_dict_allow_hyperscan, true, "Allow regexp_tree dictionary using Hyperscan library.", 0) \
\
M(Bool, dictionary_use_async_executor, false, "Execute a pipeline for reading from a dictionary with several threads. It's supported only by DIRECT dictionary with CLICKHOUSE source.", 0) \
M(Bool, input_format_csv_allow_variable_number_of_columns, false, "Ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values", 0) \
M(Bool, precise_float_parsing, false, "Prefer more precise (but slower) float parsing algorithm", 0) \
// End of FORMAT_FACTORY_SETTINGS

View File

@ -9,10 +9,11 @@ namespace UUIDHelpers
{
UUID generateV4()
{
UInt128 res{thread_local_rng(), thread_local_rng()};
res.items[0] = (res.items[0] & 0xffffffffffff0fffull) | 0x0000000000004000ull;
res.items[1] = (res.items[1] & 0x3fffffffffffffffull) | 0x8000000000000000ull;
return UUID{res};
UUID uuid;
getHighBytes(uuid) = (thread_local_rng() & 0xffffffffffff0fffull) | 0x0000000000004000ull;
getLowBytes(uuid) = (thread_local_rng() & 0x3fffffffffffffffull) | 0x8000000000000000ull;
return uuid;
}
}

View File

@ -2,6 +2,59 @@
#include <Core/Types.h>
/**
* Implementation Details
* ^^^^^^^^^^^^^^^^^^^^^^
* The underlying implementation for a UUID has it represented as a 128-bit unsigned integer. Underlying this, a wide
* integer with a 64-bit unsigned integer as its base is utilized. This wide integer can be interfaced with as an array
* to access different components of the base. For example, on a Little Endian platform, accessing at index 0 will give
* you the 8 higher bytes, and index 1 will give you the 8 lower bytes. On a Big Endian platform, this is reversed where
* index 0 will give you the 8 lower bytes, and index 1 will give you the 8 higher bytes.
*
* uuid.toUnderType().items[0]
*
* // uint64_t uint64_t
* // [xxxxxxxx] [ ]
*
* uuid.toUnderType().items[1]
*
* // uint64_t uint64_t
* // [ ] [xxxxxxxx]
*
* The way that data is stored in the underlying wide integer treats the data as two 64-bit chunks sequenced in the
* array. On a Little Endian platform, this results in the following layout
*
* // Suppose uuid contains 61f0c404-5cb3-11e7-907b-a6006ad3dba0
*
* uuid.toUnderType().items[0]
*
* // uint64_t as HEX
* // [E7 11 B3 5C 04 C4 F0 61] [A0 DB D3 6A 00 A6 7B 90]
* // ^^^^^^^^^^^^^^^^^^^^^^^
*
* uuid.toUnderType().items[1]
*
* // uint64_t as HEX
* // [E7 11 B3 5C 04 C4 F0 61] [A0 DB D3 6A 00 A6 7B 90]
* // ^^^^^^^^^^^^^^^^^^^^^^^
*
* while on a Big Endian platform this would be
*
* // Suppose uuid contains 61f0c404-5cb3-11e7-907b-a6006ad3dba0
*
* uuid.toUnderType().items[0]
*
* // uint64_t as HEX
* // [90 7B A6 00 6A D3 DB A0] [61 F0 C4 04 5C B3 11 E7]
* // ^^^^^^^^^^^^^^^^^^^^^^^
*
* uuid.toUnderType().items[1]
*
* // uint64_t as HEX
* // [90 7B A6 00 6A D3 DB A0] [61 F0 C4 04 5C B3 11 E7]
* // ^^^^^^^^^^^^^^^^^^^^^^^
*/
namespace DB
{
@ -11,6 +64,29 @@ namespace UUIDHelpers
/// Generate random UUID.
UUID generateV4();
constexpr size_t HighBytes = (std::endian::native == std::endian::little) ? 0 : 1;
constexpr size_t LowBytes = (std::endian::native == std::endian::little) ? 1 : 0;
inline uint64_t getHighBytes(const UUID & uuid)
{
return uuid.toUnderType().items[HighBytes];
}
inline uint64_t & getHighBytes(UUID & uuid)
{
return uuid.toUnderType().items[HighBytes];
}
inline uint64_t getLowBytes(const UUID & uuid)
{
return uuid.toUnderType().items[LowBytes];
}
inline uint64_t & getLowBytes(UUID & uuid)
{
return uuid.toUnderType().items[LowBytes];
}
const UUID Nil{};
}

View File

@ -46,6 +46,7 @@ public:
bool canBeUsedInBooleanContext() const override { return dictionary_type->canBeUsedInBooleanContext(); }
bool isValueRepresentedByNumber() const override { return dictionary_type->isValueRepresentedByNumber(); }
bool isValueRepresentedByInteger() const override { return dictionary_type->isValueRepresentedByInteger(); }
bool isValueRepresentedByUnsignedInteger() const override { return dictionary_type->isValueRepresentedByUnsignedInteger(); }
bool isValueUnambiguouslyRepresentedInContiguousMemoryRegion() const override { return true; }
bool haveMaximumSizeOfValue() const override { return dictionary_type->haveMaximumSizeOfValue(); }
size_t getMaximumSizeOfValueInMemory() const override { return dictionary_type->getMaximumSizeOfValueInMemory(); }

View File

@ -10,6 +10,7 @@
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <IO/parseDateTimeBestEffort.h>
#include <IO/ReadBufferFromString.h>
namespace DB
{
@ -145,12 +146,29 @@ void SerializationDateTime::deserializeTextCSV(IColumn & column, ReadBuffer & is
char maybe_quote = *istr.position();
if (maybe_quote == '\'' || maybe_quote == '\"')
{
++istr.position();
readText(x, istr, settings, time_zone, utc_time_zone);
if (maybe_quote == '\'' || maybe_quote == '\"')
readText(x, istr, settings, time_zone, utc_time_zone);
assertChar(maybe_quote, istr);
}
else
{
if (settings.csv.delimiter != ',' || settings.date_time_input_format == FormatSettings::DateTimeInputFormat::Basic)
{
readText(x, istr, settings, time_zone, utc_time_zone);
}
/// Best effort parsing supports datetime in format like "01.01.2000, 00:00:00"
/// and can mistakenly read comma as a part of datetime.
/// For example data "...,01.01.2000,some string,..." cannot be parsed correctly.
/// To fix this problem we first read CSV string and then try to parse it as datetime.
else
{
String datetime_str;
readCSVString(datetime_str, istr, settings.csv);
ReadBufferFromString buf(datetime_str);
readText(x, buf, settings, time_zone, utc_time_zone);
}
}
if (x < 0)
x = 0;

View File

@ -9,6 +9,7 @@
#include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h>
#include <IO/parseDateTimeBestEffort.h>
#include <IO/ReadBufferFromString.h>
namespace DB
{
@ -143,12 +144,29 @@ void SerializationDateTime64::deserializeTextCSV(IColumn & column, ReadBuffer &
char maybe_quote = *istr.position();
if (maybe_quote == '\'' || maybe_quote == '\"')
{
++istr.position();
readText(x, scale, istr, settings, time_zone, utc_time_zone);
if (maybe_quote == '\'' || maybe_quote == '\"')
readText(x, scale, istr, settings, time_zone, utc_time_zone);
assertChar(maybe_quote, istr);
}
else
{
if (settings.csv.delimiter != ',' || settings.date_time_input_format == FormatSettings::DateTimeInputFormat::Basic)
{
readText(x, scale, istr, settings, time_zone, utc_time_zone);
}
/// Best effort parsing supports datetime in format like "01.01.2000, 00:00:00"
/// and can mistakenly read comma as a part of datetime.
/// For example data "...,01.01.2000,some string,..." cannot be parsed correctly.
/// To fix this problem we first read CSV string and then try to parse it as datetime.
else
{
String datetime_str;
readCSVString(datetime_str, istr, settings.csv);
ReadBufferFromString buf(datetime_str);
readText(x, scale, buf, settings, time_zone, utc_time_zone);
}
}
assert_cast<ColumnType &>(column).getData().push_back(x);
}

View File

@ -111,25 +111,25 @@ void SerializationUUID::deserializeTextCSV(IColumn & column, ReadBuffer & istr,
void SerializationUUID::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const
{
UUID x = field.get<UUID>();
writeBinary(x, ostr);
writeBinaryLittleEndian(x, ostr);
}
void SerializationUUID::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const
{
UUID x;
readBinary(x, istr);
readBinaryLittleEndian(x, istr);
field = NearestFieldType<UUID>(x);
}
void SerializationUUID::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeBinary(assert_cast<const ColumnVector<UUID> &>(column).getData()[row_num], ostr);
writeBinaryLittleEndian(assert_cast<const ColumnVector<UUID> &>(column).getData()[row_num], ostr);
}
void SerializationUUID::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
UUID x;
readBinary(x, istr);
readBinaryLittleEndian(x, istr);
assert_cast<ColumnVector<UUID> &>(column).getData().push_back(x);
}

View File

@ -830,6 +830,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
query_context->setSetting("allow_experimental_hash_functions", 1);
query_context->setSetting("allow_experimental_object_type", 1);
query_context->setSetting("allow_experimental_annoy_index", 1);
query_context->setSetting("allow_experimental_usearch_index", 1);
query_context->setSetting("allow_experimental_bigint_types", 1);
query_context->setSetting("allow_experimental_window_functions", 1);
query_context->setSetting("allow_experimental_geo_types", 1);

View File

@ -23,11 +23,10 @@ StoragePtr IDatabase::getTable(const String & name, ContextPtr context) const
return storage;
TableNameHints hints(this->shared_from_this(), context);
std::vector<String> names = hints.getHints(name);
if (!names.empty())
{
if (names.empty())
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} does not exist", backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(name));
else
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} does not exist. Maybe you meant {}?", backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(name), backQuoteIfNeed(names[0]));
}
else throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} does not exist", backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(name));
}
std::vector<std::pair<ASTPtr, StoragePtr>> IDatabase::getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const

View File

@ -16,10 +16,20 @@ if (OMIT_HEAVY_DEBUG_SYMBOLS)
PROPERTIES COMPILE_FLAGS -g0)
endif()
list(REMOVE_ITEM clickhouse_dictionaries_sources DictionaryFactory.cpp DictionarySourceFactory.cpp DictionaryStructure.cpp getDictionaryConfigurationFromAST.cpp)
list(REMOVE_ITEM clickhouse_dictionaries_headers DictionaryFactory.h DictionarySourceFactory.h DictionaryStructure.h getDictionaryConfigurationFromAST.h)
extract_into_parent_list(clickhouse_dictionaries_sources dbms_sources
DictionaryFactory.cpp
DictionarySourceFactory.cpp
DictionaryStructure.cpp
getDictionaryConfigurationFromAST.cpp
)
extract_into_parent_list(clickhouse_dictionaries_headers dbms_headers
DictionaryFactory.h
DictionarySourceFactory.h
DictionaryStructure.h
getDictionaryConfigurationFromAST.h
)
add_library(clickhouse_dictionaries ${clickhouse_dictionaries_sources})
add_library(clickhouse_dictionaries ${clickhouse_dictionaries_headers} ${clickhouse_dictionaries_sources})
target_link_libraries(clickhouse_dictionaries
PRIVATE

View File

@ -86,6 +86,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.custom.row_between_delimiter = settings.format_custom_row_between_delimiter;
format_settings.custom.try_detect_header = settings.input_format_custom_detect_header;
format_settings.custom.skip_trailing_empty_lines = settings.input_format_custom_skip_trailing_empty_lines;
format_settings.custom.allow_variable_number_of_columns = settings.input_format_custom_allow_variable_number_of_columns;
format_settings.date_time_input_format = settings.date_time_input_format;
format_settings.date_time_output_format = settings.date_time_output_format;
format_settings.interval.output_format = settings.interval_output_format;
@ -115,6 +116,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.json.validate_utf8 = settings.output_format_json_validate_utf8;
format_settings.json_object_each_row.column_for_object_name = settings.format_json_object_each_row_column_for_object_name;
format_settings.json.allow_object_type = context->getSettingsRef().allow_experimental_object_type;
format_settings.json.compact_allow_variable_number_of_columns = settings.input_format_json_compact_allow_variable_number_of_columns;
format_settings.null_as_default = settings.input_format_null_as_default;
format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros;
format_settings.parquet.row_group_rows = settings.output_format_parquet_row_group_size;
@ -122,6 +124,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.parquet.output_version = settings.output_format_parquet_version;
format_settings.parquet.case_insensitive_column_matching = settings.input_format_parquet_case_insensitive_column_matching;
format_settings.parquet.preserve_order = settings.input_format_parquet_preserve_order;
format_settings.parquet.filter_push_down = settings.input_format_parquet_filter_push_down;
format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns;
format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference;
format_settings.parquet.output_string_as_string = settings.output_format_parquet_string_as_string;
@ -163,6 +166,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.tsv.skip_first_lines = settings.input_format_tsv_skip_first_lines;
format_settings.tsv.try_detect_header = settings.input_format_tsv_detect_header;
format_settings.tsv.skip_trailing_empty_lines = settings.input_format_tsv_skip_trailing_empty_lines;
format_settings.tsv.allow_variable_number_of_columns = settings.input_format_tsv_allow_variable_number_of_columns;
format_settings.values.accurate_types_of_literals = settings.input_format_values_accurate_types_of_literals;
format_settings.values.deduce_templates_of_expressions = settings.input_format_values_deduce_templates_of_expressions;
format_settings.values.interpret_expressions = settings.input_format_values_interpret_expressions;
@ -186,6 +190,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.orc.case_insensitive_column_matching = settings.input_format_orc_case_insensitive_column_matching;
format_settings.orc.output_string_as_string = settings.output_format_orc_string_as_string;
format_settings.orc.output_compression_method = settings.output_format_orc_compression_method;
format_settings.orc.use_fast_decoder = settings.input_format_orc_use_fast_decoder;
format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields;
format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode;
format_settings.capn_proto.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference;

View File

@ -90,9 +90,6 @@ private:
const FormatSettings & settings)>;
// Incompatible with FileSegmentationEngine.
//
// In future we may also want to pass some information about WHERE conditions (SelectQueryInfo?)
// and get some information about projections (min/max/count per column per row group).
using RandomAccessInputCreator = std::function<InputFormatPtr(
ReadBuffer & buf,
const Block & header,

View File

@ -175,6 +175,7 @@ struct FormatSettings
EscapingRule escaping_rule = EscapingRule::Escaped;
bool try_detect_header = true;
bool skip_trailing_empty_lines = false;
bool allow_variable_number_of_columns = false;
} custom;
struct
@ -197,6 +198,7 @@ struct FormatSettings
bool validate_types_from_metadata = true;
bool validate_utf8 = false;
bool allow_object_type = false;
bool compact_allow_variable_number_of_columns = false;
} json;
struct
@ -229,6 +231,7 @@ struct FormatSettings
bool allow_missing_columns = false;
bool skip_columns_with_unsupported_types_in_schema_inference = false;
bool case_insensitive_column_matching = false;
bool filter_push_down = true;
std::unordered_set<int> skip_row_groups = {};
bool output_string_as_string = false;
bool output_fixed_string_as_fixed_byte_array = true;
@ -317,6 +320,7 @@ struct FormatSettings
UInt64 skip_first_lines = 0;
bool try_detect_header = true;
bool skip_trailing_empty_lines = false;
bool allow_variable_number_of_columns = false;
} tsv;
struct
@ -344,6 +348,7 @@ struct FormatSettings
std::unordered_set<int> skip_stripes = {};
bool output_string_as_string = false;
ORCCompression output_compression_method = ORCCompression::NONE;
bool use_fast_decoder = true;
} orc;
/// For capnProto format we should determine how to

View File

@ -3,10 +3,22 @@ add_subdirectory(divide)
include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake")
add_headers_and_sources(clickhouse_functions .)
list(REMOVE_ITEM clickhouse_functions_sources IFunction.cpp FunctionFactory.cpp FunctionHelpers.cpp extractTimeZoneFromFunctionArguments.cpp FunctionsLogical.cpp)
list(REMOVE_ITEM clickhouse_functions_headers IFunction.h FunctionFactory.h FunctionHelpers.h extractTimeZoneFromFunctionArguments.h FunctionsLogical.h)
extract_into_parent_list(clickhouse_functions_sources dbms_sources
IFunction.cpp
FunctionFactory.cpp
FunctionHelpers.cpp
extractTimeZoneFromFunctionArguments.cpp
FunctionsLogical.cpp
)
extract_into_parent_list(clickhouse_functions_headers dbms_headers
IFunction.h
FunctionFactory.h
FunctionHelpers.h
extractTimeZoneFromFunctionArguments.h
FunctionsLogical.h
)
add_library(clickhouse_functions_obj OBJECT ${clickhouse_functions_sources})
add_library(clickhouse_functions_obj OBJECT ${clickhouse_functions_headers} ${clickhouse_functions_sources})
list (APPEND OBJECT_LIBS $<TARGET_OBJECTS:clickhouse_functions_obj>)

View File

@ -1,6 +1,7 @@
#pragma once
#include <type_traits>
#include <Core/AccurateComparison.h>
#include <Core/DecimalFunctions.h>
#include <Common/DateLUTImpl.h>
#include <DataTypes/DataTypeDate.h>
@ -14,7 +15,6 @@
#include <Functions/FunctionHelpers.h>
#include <Functions/castTypeToEither.h>
#include <Functions/extractTimeZoneFromFunctionArguments.h>
#include <Functions/TransformDateTime64.h>
#include <IO/WriteHelpers.h>
@ -36,7 +36,9 @@ namespace ErrorCodes
/// Corresponding types:
/// - UInt16 => DataTypeDate
/// - UInt32 => DataTypeDateTime
/// - Int32 => DataTypeDate32
/// - DateTime64 => DataTypeDateTime64
/// - Int8 => error
/// Please note that INPUT and OUTPUT types may differ, e.g.:
/// - 'AddSecondsImpl::execute(UInt32, ...) -> UInt32' is available to the ClickHouse users as 'addSeconds(DateTime, ...) -> DateTime'
/// - 'AddSecondsImpl::execute(UInt16, ...) -> UInt32' is available to the ClickHouse users as 'addSeconds(Date, ...) -> DateTime'
@ -45,35 +47,27 @@ struct AddNanosecondsImpl
{
static constexpr auto name = "addNanoseconds";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale)
{
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(9 - scale);
auto division = std::div(t.fractional * multiplier + delta, static_cast<Int64>(1000000000));
return {t.whole * multiplier + division.quot, t.fractional * multiplier + delta};
}
static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
{
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(9 - scale);
return t * multiplier + delta;
return DateTime64(DecimalUtils::multiplyAdd(t.value, multiplier, delta));
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
{
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(9);
return static_cast<UInt32>(t * multiplier + delta);
return DateTime64(DecimalUtils::multiplyAdd(static_cast<Int64>(t), multiplier, delta));
}
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0)
static inline NO_SANITIZE_UNDEFINED Int8 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "addNanoSeconds() cannot be used with Date");
throw Exception(ErrorCodes::LOGICAL_ERROR, "addNanoseconds() cannot be used with Date");
}
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0)
static inline NO_SANITIZE_UNDEFINED Int8 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "addNanoSeconds() cannot be used with Date32");
throw Exception(ErrorCodes::LOGICAL_ERROR, "addNanoseconds() cannot be used with Date32");
}
};
@ -81,43 +75,29 @@ struct AddMicrosecondsImpl
{
static constexpr auto name = "addMicroseconds";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
{
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(std::abs(6 - scale));
if (scale <= 6)
{
auto division = std::div((t.fractional + delta), static_cast<Int64>(10e6));
return {t.whole * multiplier + division.quot, division.rem};
}
else
{
auto division = std::div((t.fractional + delta * multiplier), static_cast<Int64>(10e6 * multiplier));
return {t.whole + division.quot, division.rem};
}
}
static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
{
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(std::abs(6 - scale));
return scale <= 6 ? t * multiplier + delta : t + delta * multiplier;
return DateTime64(scale <= 6
? DecimalUtils::multiplyAdd(t.value, multiplier, delta)
: DecimalUtils::multiplyAdd(delta, multiplier, t.value));
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
{
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(6);
return static_cast<UInt32>(t * multiplier + delta);
return DateTime64(DecimalUtils::multiplyAdd(static_cast<Int64>(t), multiplier, delta));
}
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0)
static inline NO_SANITIZE_UNDEFINED Int8 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "addMicroSeconds() cannot be used with Date");
throw Exception(ErrorCodes::LOGICAL_ERROR, "addMicroseconds() cannot be used with Date");
}
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0)
static inline NO_SANITIZE_UNDEFINED Int8 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "addMicroSeconds() cannot be used with Date32");
throw Exception(ErrorCodes::LOGICAL_ERROR, "addMicroseconds() cannot be used with Date32");
}
};
@ -125,43 +105,29 @@ struct AddMillisecondsImpl
{
static constexpr auto name = "addMilliseconds";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale)
{
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(std::abs(3 - scale));
if (scale <= 3)
{
auto division = std::div((t.fractional + delta), static_cast<Int64>(1000));
return {t.whole * multiplier + division.quot, division.rem};
}
else
{
auto division = std::div((t.fractional + delta * multiplier), static_cast<Int64>(1000 * multiplier));
return {t.whole + division.quot,division.rem};
}
}
static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
{
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(std::abs(3 - scale));
return scale <= 3 ? t * multiplier + delta : t + delta * multiplier;
return DateTime64(scale <= 3
? DecimalUtils::multiplyAdd(t.value, multiplier, delta)
: DecimalUtils::multiplyAdd(delta, multiplier, t.value));
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
{
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(3);
return static_cast<UInt32>(t * multiplier + delta);
return DateTime64(DecimalUtils::multiplyAdd(static_cast<Int64>(t), multiplier, delta));
}
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0)
static inline NO_SANITIZE_UNDEFINED Int8 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "addMilliSeconds() cannot be used with Date");
throw Exception(ErrorCodes::LOGICAL_ERROR, "addMilliseconds() cannot be used with Date");
}
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0)
static inline NO_SANITIZE_UNDEFINED Int8 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "addMilliSeconds() cannot be used with Date32");
throw Exception(ErrorCodes::LOGICAL_ERROR, "addMilliseconds() cannot be used with Date32");
}
};
@ -169,16 +135,10 @@ struct AddSecondsImpl
{
static constexpr auto name = "addSeconds";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
{
return {t.whole + delta, t.fractional};
}
static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
{
return t + delta * DecimalUtils::scaleMultiplier<DateTime64>(scale);
return DateTime64(DecimalUtils::multiplyAdd(delta, DecimalUtils::scaleMultiplier<DateTime64>(scale), t.value));
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
@ -189,6 +149,7 @@ struct AddSecondsImpl
static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{
// use default datetime64 scale
static_assert(DataTypeDateTime64::default_scale == 3, "");
return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta) * 1000;
}
@ -202,12 +163,6 @@ struct AddMinutesImpl
{
static constexpr auto name = "addMinutes";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
{
return {t.whole + delta * 60, t.fractional};
}
static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
{
@ -222,6 +177,7 @@ struct AddMinutesImpl
static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{
// use default datetime64 scale
static_assert(DataTypeDateTime64::default_scale == 3, "");
return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 60) * 1000;
}
@ -235,12 +191,6 @@ struct AddHoursImpl
{
static constexpr auto name = "addHours";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
{
return {t.whole + delta * 3600, t.fractional};
}
static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
{
@ -255,6 +205,7 @@ struct AddHoursImpl
static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{
// use default datetime64 scale
static_assert(DataTypeDateTime64::default_scale == 3, "");
return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 3600) * 1000;
}
@ -268,12 +219,6 @@ struct AddDaysImpl
{
static constexpr auto name = "addDays";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{
return {time_zone.addDays(t.whole, delta), t.fractional};
}
static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0)
{
@ -302,12 +247,6 @@ struct AddWeeksImpl
{
static constexpr auto name = "addWeeks";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{
return {time_zone.addWeeks(t.whole, delta), t.fractional};
}
static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0)
{
@ -336,12 +275,6 @@ struct AddMonthsImpl
{
static constexpr auto name = "addMonths";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{
return {time_zone.addMonths(t.whole, delta), t.fractional};
}
static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0)
{
@ -370,12 +303,6 @@ struct AddQuartersImpl
{
static constexpr auto name = "addQuarters";
static inline DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{
return {time_zone.addQuarters(t.whole, delta), t.fractional};
}
static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0)
{
@ -404,12 +331,6 @@ struct AddYearsImpl
{
static constexpr auto name = "addYears";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{
return {time_zone.addYears(t.whole, delta), t.fractional};
}
static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0)
{
@ -581,11 +502,11 @@ namespace date_and_time_type_details
// Compile-time mapping of value (DataType::FieldType) types to corresponding DataType
template <typename FieldType> struct ResultDataTypeMap {};
template <> struct ResultDataTypeMap<UInt16> { using ResultDataType = DataTypeDate; };
template <> struct ResultDataTypeMap<Int16> { using ResultDataType = DataTypeDate; };
template <> struct ResultDataTypeMap<UInt32> { using ResultDataType = DataTypeDateTime; };
template <> struct ResultDataTypeMap<Int32> { using ResultDataType = DataTypeDate32; };
template <> struct ResultDataTypeMap<DateTime64> { using ResultDataType = DataTypeDateTime64; };
template <> struct ResultDataTypeMap<Int64> { using ResultDataType = DataTypeDateTime64; };
template <> struct ResultDataTypeMap<Int8> { using ResultDataType = DataTypeInt8; }; // error
}
template <typename Transform>
@ -705,6 +626,10 @@ public:
return std::make_shared<DataTypeDateTime64>(target_scale.value_or(DataTypeDateTime64::default_scale), std::move(timezone));
}
else if constexpr (std::is_same_v<ResultDataType, DataTypeInt8>)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{} cannot be used with {}", getName(), arguments[0].type->getName());
}
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected result type in datetime add interval function");
}

View File

@ -507,8 +507,8 @@ public:
// use executeOnUInt instead of using executeOneString
// because the latter one outputs the string in the memory order
Impl::executeOneUIntOrInt(uuid[i].toUnderType().items[0], end, false, false);
Impl::executeOneUIntOrInt(uuid[i].toUnderType().items[1], end, false, true);
Impl::executeOneUIntOrInt(UUIDHelpers::getHighBytes(uuid[i]), end, false, false);
Impl::executeOneUIntOrInt(UUIDHelpers::getLowBytes(uuid[i]), end, false, true);
pos += end - begin;
out_offsets[i] = pos;

View File

@ -203,18 +203,15 @@ struct ConvertImpl
}
}
if constexpr (std::is_same_v<FromDataType, DataTypeUUID> && std::is_same_v<ToDataType,DataTypeUInt128>)
if constexpr (std::is_same_v<FromDataType, DataTypeUUID> && std::is_same_v<ToDataType, DataTypeUInt128>)
{
static_assert(std::is_same_v<DataTypeUInt128::FieldType, DataTypeUUID::FieldType::UnderlyingType>, "UInt128 and UUID types must be same");
if constexpr (std::endian::native == std::endian::little)
{
vec_to[i].items[1] = vec_from[i].toUnderType().items[0];
vec_to[i].items[0] = vec_from[i].toUnderType().items[1];
}
else
{
vec_to[i] = vec_from[i].toUnderType();
}
static_assert(
std::is_same_v<DataTypeUInt128::FieldType, DataTypeUUID::FieldType::UnderlyingType>,
"UInt128 and UUID types must be same");
vec_to[i].items[1] = vec_from[i].toUnderType().items[0];
vec_to[i].items[0] = vec_from[i].toUnderType().items[1];
continue;
}

View File

@ -0,0 +1,141 @@
#include <Columns/ColumnConst.h>
#include <Columns/ColumnsDateTime.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/IColumn.h>
#include <Common/DateLUT.h>
#include <Common/LocalDateTime.h>
#include <Common/logger_useful.h>
#include <Core/DecimalFunctions.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/TimezoneMixin.h>
#include <Functions/FunctionFactory.h>
#include <Functions/IFunction.h>
#include <Functions/FunctionHelpers.h>
#include <Interpreters/Context.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
namespace
{
template <typename Name>
class UTCTimestampTransform : public IFunction
{
public:
static FunctionPtr create(ContextPtr) { return std::make_shared<UTCTimestampTransform>(); }
static constexpr auto name = Name::name;
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 2; }
bool useDefaultImplementationForConstants() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (arguments.size() != 2)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {}'s arguments number must be 2.", name);
WhichDataType which_type_first(arguments[0]);
WhichDataType which_type_second(arguments[1]);
if (!which_type_first.isDateTime() && !which_type_first.isDateTime64())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {}'s 1st argument type must be datetime.", name);
if (dynamic_cast<const TimezoneMixin *>(arguments[0].get())->hasExplicitTimeZone())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {}'s 1st argument should not have explicit time zone.", name);
if (!which_type_second.isString())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {}'s 2nd argument type must be string.", name);
DataTypePtr date_time_type;
if (which_type_first.isDateTime())
date_time_type = std::make_shared<DataTypeDateTime>();
else
{
const DataTypeDateTime64 * date_time_64 = static_cast<const DataTypeDateTime64 *>(arguments[0].get());
date_time_type = std::make_shared<DataTypeDateTime64>(date_time_64->getScale());
}
return date_time_type;
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t) const override
{
if (arguments.size() != 2)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {}'s arguments number must be 2.", name);
ColumnWithTypeAndName arg1 = arguments[0];
ColumnWithTypeAndName arg2 = arguments[1];
const auto * time_zone_const_col = checkAndGetColumnConstData<ColumnString>(arg2.column.get());
if (!time_zone_const_col)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of 2nd argument of function {}. Excepted const(String).", arg2.column->getName(), name);
String time_zone_val = time_zone_const_col->getDataAt(0).toString();
auto column = result_type->createColumn();
if (WhichDataType(arg1.type).isDateTime())
{
const auto * date_time_col = checkAndGetColumn<ColumnDateTime>(arg1.column.get());
for (size_t i = 0; i < date_time_col->size(); ++i)
{
UInt32 date_time_val = date_time_col->getElement(i);
LocalDateTime date_time(date_time_val, Name::to ? DateLUT::instance("UTC") : DateLUT::instance(time_zone_val));
time_t time_val = date_time.to_time_t(Name::from ? DateLUT::instance("UTC") : DateLUT::instance(time_zone_val));
column->insert(time_val);
}
}
else if (WhichDataType(arg1.type).isDateTime64())
{
const auto * date_time_col = checkAndGetColumn<ColumnDateTime64>(arg1.column.get());
const DataTypeDateTime64 * date_time_type = static_cast<const DataTypeDateTime64 *>(arg1.type.get());
Int64 scale_multiplier = DecimalUtils::scaleMultiplier<Int64>(date_time_type->getScale());
for (size_t i = 0; i < date_time_col->size(); ++i)
{
DateTime64 date_time_val = date_time_col->getElement(i);
Int64 seconds = date_time_val.value / scale_multiplier;
Int64 micros = date_time_val.value % scale_multiplier;
LocalDateTime date_time(seconds, Name::to ? DateLUT::instance("UTC") : DateLUT::instance(time_zone_val));
time_t time_val = date_time.to_time_t(Name::from ? DateLUT::instance("UTC") : DateLUT::instance(time_zone_val));
DateTime64 date_time_64(time_val * scale_multiplier + micros);
column->insert(date_time_64);
}
}
else
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {}'s 1st argument can only be datetime/datatime64. ", name);
return column;
}
};
struct NameToUTCTimestamp
{
static constexpr auto name = "toUTCTimestamp";
static constexpr auto from = false;
static constexpr auto to = true;
};
struct NameFromUTCTimestamp
{
static constexpr auto name = "fromUTCTimestamp";
static constexpr auto from = true;
static constexpr auto to = false;
};
using ToUTCTimestampFunction = UTCTimestampTransform<NameToUTCTimestamp>;
using FromUTCTimestampFunction = UTCTimestampTransform<NameFromUTCTimestamp>;
}
REGISTER_FUNCTION(UTCTimestampTransform)
{
factory.registerFunction<ToUTCTimestampFunction>();
factory.registerFunction<FromUTCTimestampFunction>();
factory.registerAlias("to_utc_timestamp", NameToUTCTimestamp::name, FunctionFactory::CaseInsensitive);
factory.registerAlias("from_utc_timestamp", NameFromUTCTimestamp::name, FunctionFactory::CaseInsensitive);
}
}

View File

@ -55,6 +55,7 @@ REGISTER_FUNCTION(CurrentDatabase)
{
factory.registerFunction<FunctionCurrentDatabase>();
factory.registerAlias("DATABASE", FunctionCurrentDatabase::name, FunctionFactory::CaseInsensitive);
factory.registerAlias("SCHEMA", FunctionCurrentDatabase::name, FunctionFactory::CaseInsensitive);
factory.registerAlias("current_database", FunctionCurrentDatabase::name, FunctionFactory::CaseInsensitive);
}

View File

@ -60,9 +60,8 @@ public:
{
/// https://tools.ietf.org/html/rfc4122#section-4.4
UInt128 & impl = uuid.toUnderType();
impl.items[0] = (impl.items[0] & 0xffffffffffff0fffull) | 0x0000000000004000ull;
impl.items[1] = (impl.items[1] & 0x3fffffffffffffffull) | 0x8000000000000000ull;
UUIDHelpers::getHighBytes(uuid) = (UUIDHelpers::getHighBytes(uuid) & 0xffffffffffff0fffull) | 0x0000000000004000ull;
UUIDHelpers::getLowBytes(uuid) = (UUIDHelpers::getLowBytes(uuid) & 0x3fffffffffffffffull) | 0x8000000000000000ull;
}
return col_res;

View File

@ -1,26 +1,27 @@
#include <Functions/FunctionFactory.h>
#include <Functions/castTypeToEither.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/castTypeToEither.h>
#include <Core/callOnTypeIndex.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeFixedString.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnVector.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeUUID.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnDecimal.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeFixedString.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeUUID.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypesNumber.h>
#include <Common/typeid_cast.h>
#include <Common/TransformEndianness.hpp>
#include <Common/memcpySmall.h>
#include <Common/typeid_cast.h>
#include <base/unaligned.h>
@ -261,8 +262,10 @@ public:
memcpy(static_cast<void*>(&to[i]), static_cast<const void*>(&from[i]), copy_size);
else
{
size_t offset_to = sizeof(To) > sizeof(From) ? sizeof(To) - sizeof(From) : 0;
memcpy(reinterpret_cast<char*>(&to[i]) + offset_to, static_cast<const void*>(&from[i]), copy_size);
// Handle the cases of both 128-bit representation to 256-bit and 128-bit to 64-bit or lower.
const size_t offset_from = sizeof(From) > sizeof(To) ? sizeof(From) - sizeof(To) : 0;
const size_t offset_to = sizeof(To) > sizeof(From) ? sizeof(To) - sizeof(From) : 0;
memcpy(reinterpret_cast<char *>(&to[i]) + offset_to, reinterpret_cast<const char *>(&from[i]) + offset_from, copy_size);
}
}
@ -315,7 +318,11 @@ private:
{
std::string_view data = src.getDataAt(i).toView();
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
memcpy(&data_to[offset], data.data(), std::min(n, data.size()));
#else
reverseMemcpy(&data_to[offset], data.data(), std::min(n, data.size()));
#endif
offset += n;
}
}
@ -326,7 +333,11 @@ private:
ColumnFixedString::Chars & data_to = dst.getChars();
data_to.resize(n * rows);
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
memcpy(data_to.data(), src.getRawData().data(), data_to.size());
#else
reverseMemcpy(data_to.data(), src.getRawData().data(), data_to.size());
#endif
}
static void NO_INLINE executeToString(const IColumn & src, ColumnString & dst)

View File

@ -7,6 +7,8 @@
#include <string_view>
#include <base/simd.h>
#ifdef __SSE2__
# include <emmintrin.h>
#endif
@ -73,16 +75,13 @@ struct ToValidUTF8Impl
/// Fast skip of ASCII for aarch64.
static constexpr size_t SIMD_BYTES = 16;
const char * simd_end = p + (end - p) / SIMD_BYTES * SIMD_BYTES;
/// Returns a 64 bit mask of nibbles (4 bits for each byte).
auto get_nibble_mask = [](uint8x16_t input) -> uint64_t
{ return vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(input), 4)), 0); };
/// Other options include
/// vmaxvq_u8(input) < 0b10000000;
/// Used by SIMDJSON, has latency 3 for M1, 6 for everything else
/// SIMDJSON uses it for 64 byte masks, so it's a little different.
/// vmaxvq_u32(vandq_u32(input, vdupq_n_u32(0x80808080))) // u32 version has latency 3
/// shrn version has universally <=3 cycles, on servers 2 cycles.
while (p < simd_end && get_nibble_mask(vcgeq_u8(vld1q_u8(reinterpret_cast<const uint8_t *>(p)), vdupq_n_u8(0x80))) == 0)
while (p < simd_end && getNibbleMask(vcgeq_u8(vld1q_u8(reinterpret_cast<const uint8_t *>(p)), vdupq_n_u8(0x80))) == 0)
p += SIMD_BYTES;
if (!(p < end))

View File

@ -50,8 +50,8 @@ public:
/// Starts reading a file from the archive. The function returns a read buffer,
/// you can read that buffer to extract uncompressed data from the archive.
/// Several read buffers can be used at the same time in parallel.
virtual std::unique_ptr<ReadBufferFromFileBase> readFile(const String & filename) = 0;
virtual std::unique_ptr<ReadBufferFromFileBase> readFile(NameFilter filter) = 0;
virtual std::unique_ptr<ReadBufferFromFileBase> readFile(const String & filename, bool throw_on_not_found) = 0;
virtual std::unique_ptr<ReadBufferFromFileBase> readFile(NameFilter filter, bool throw_on_not_found) = 0;
/// It's possible to convert a file enumerator to a read buffer and vice versa.
virtual std::unique_ptr<ReadBufferFromFileBase> readFile(std::unique_ptr<FileEnumerator> enumerator) = 0;

Some files were not shown because too many files have changed in this diff Show More