mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
Merge branch 'master' into fix-annoy-index-update
This commit is contained in:
commit
76016d9593
12
.gitmodules
vendored
12
.gitmodules
vendored
@ -347,3 +347,15 @@
|
||||
[submodule "contrib/incbin"]
|
||||
path = contrib/incbin
|
||||
url = https://github.com/graphitemaster/incbin.git
|
||||
[submodule "contrib/usearch"]
|
||||
path = contrib/usearch
|
||||
url = https://github.com/unum-cloud/usearch.git
|
||||
[submodule "contrib/SimSIMD"]
|
||||
path = contrib/SimSIMD
|
||||
url = https://github.com/ashvardanian/SimSIMD.git
|
||||
[submodule "contrib/FP16"]
|
||||
path = contrib/FP16
|
||||
url = https://github.com/Maratyszcza/FP16.git
|
||||
[submodule "contrib/robin-map"]
|
||||
path = contrib/robin-map
|
||||
url = https://github.com/Tessil/robin-map.git
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <base/defines.h>
|
||||
#include <base/types.h>
|
||||
#include <base/unaligned.h>
|
||||
#include <base/simd.h>
|
||||
|
||||
#include <city.h>
|
||||
|
||||
@ -29,6 +30,11 @@
|
||||
#define CRC_INT __crc32cd
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__) && defined(__ARM_NEON)
|
||||
#include <arm_neon.h>
|
||||
#pragma clang diagnostic ignored "-Wreserved-identifier"
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* The std::string_view-like container to avoid creating strings to find substrings in the hash table.
|
||||
@ -74,14 +80,14 @@ using StringRefs = std::vector<StringRef>;
|
||||
* For more information, see hash_map_string_2.cpp
|
||||
*/
|
||||
|
||||
inline bool compareSSE2(const char * p1, const char * p2)
|
||||
inline bool compare8(const char * p1, const char * p2)
|
||||
{
|
||||
return 0xFFFF == _mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(p1)),
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(p2))));
|
||||
}
|
||||
|
||||
inline bool compareSSE2x4(const char * p1, const char * p2)
|
||||
inline bool compare64(const char * p1, const char * p2)
|
||||
{
|
||||
return 0xFFFF == _mm_movemask_epi8(
|
||||
_mm_and_si128(
|
||||
@ -101,7 +107,30 @@ inline bool compareSSE2x4(const char * p1, const char * p2)
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(p2) + 3)))));
|
||||
}
|
||||
|
||||
inline bool memequalSSE2Wide(const char * p1, const char * p2, size_t size)
|
||||
#elif defined(__aarch64__) && defined(__ARM_NEON)
|
||||
|
||||
inline bool compare8(const char * p1, const char * p2)
|
||||
{
|
||||
uint64_t mask = getNibbleMask(vceqq_u8(
|
||||
vld1q_u8(reinterpret_cast<const unsigned char *>(p1)), vld1q_u8(reinterpret_cast<const unsigned char *>(p2))));
|
||||
return 0xFFFFFFFFFFFFFFFF == mask;
|
||||
}
|
||||
|
||||
inline bool compare64(const char * p1, const char * p2)
|
||||
{
|
||||
uint64_t mask = getNibbleMask(vandq_u8(
|
||||
vandq_u8(vceqq_u8(vld1q_u8(reinterpret_cast<const unsigned char *>(p1)), vld1q_u8(reinterpret_cast<const unsigned char *>(p2))),
|
||||
vceqq_u8(vld1q_u8(reinterpret_cast<const unsigned char *>(p1 + 16)), vld1q_u8(reinterpret_cast<const unsigned char *>(p2 + 16)))),
|
||||
vandq_u8(vceqq_u8(vld1q_u8(reinterpret_cast<const unsigned char *>(p1 + 32)), vld1q_u8(reinterpret_cast<const unsigned char *>(p2 + 32))),
|
||||
vceqq_u8(vld1q_u8(reinterpret_cast<const unsigned char *>(p1 + 48)), vld1q_u8(reinterpret_cast<const unsigned char *>(p2 + 48))))));
|
||||
return 0xFFFFFFFFFFFFFFFF == mask;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__SSE2__) || (defined(__aarch64__) && defined(__ARM_NEON))
|
||||
|
||||
inline bool memequalWide(const char * p1, const char * p2, size_t size)
|
||||
{
|
||||
/** The order of branches and the trick with overlapping comparisons
|
||||
* are the same as in memcpy implementation.
|
||||
@ -138,7 +167,7 @@ inline bool memequalSSE2Wide(const char * p1, const char * p2, size_t size)
|
||||
|
||||
while (size >= 64)
|
||||
{
|
||||
if (compareSSE2x4(p1, p2))
|
||||
if (compare64(p1, p2))
|
||||
{
|
||||
p1 += 64;
|
||||
p2 += 64;
|
||||
@ -150,17 +179,16 @@ inline bool memequalSSE2Wide(const char * p1, const char * p2, size_t size)
|
||||
|
||||
switch (size / 16)
|
||||
{
|
||||
case 3: if (!compareSSE2(p1 + 32, p2 + 32)) return false; [[fallthrough]];
|
||||
case 2: if (!compareSSE2(p1 + 16, p2 + 16)) return false; [[fallthrough]];
|
||||
case 1: if (!compareSSE2(p1, p2)) return false;
|
||||
case 3: if (!compare8(p1 + 32, p2 + 32)) return false; [[fallthrough]];
|
||||
case 2: if (!compare8(p1 + 16, p2 + 16)) return false; [[fallthrough]];
|
||||
case 1: if (!compare8(p1, p2)) return false;
|
||||
}
|
||||
|
||||
return compareSSE2(p1 + size - 16, p2 + size - 16);
|
||||
return compare8(p1 + size - 16, p2 + size - 16);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
inline bool operator== (StringRef lhs, StringRef rhs)
|
||||
{
|
||||
if (lhs.size != rhs.size)
|
||||
@ -169,8 +197,8 @@ inline bool operator== (StringRef lhs, StringRef rhs)
|
||||
if (lhs.size == 0)
|
||||
return true;
|
||||
|
||||
#if defined(__SSE2__)
|
||||
return memequalSSE2Wide(lhs.data, rhs.data, lhs.size);
|
||||
#if defined(__SSE2__) || (defined(__aarch64__) && defined(__ARM_NEON))
|
||||
return memequalWide(lhs.data, rhs.data, lhs.size);
|
||||
#else
|
||||
return 0 == memcmp(lhs.data, rhs.data, lhs.size);
|
||||
#endif
|
||||
|
14
base/base/simd.h
Normal file
14
base/base/simd.h
Normal file
@ -0,0 +1,14 @@
|
||||
#pragma once
|
||||
|
||||
#if defined(__aarch64__) && defined(__ARM_NEON)
|
||||
|
||||
# include <arm_neon.h>
|
||||
# pragma clang diagnostic ignored "-Wreserved-identifier"
|
||||
|
||||
/// Returns a 64 bit mask of nibbles (4 bits for each byte).
|
||||
inline uint64_t getNibbleMask(uint8x16_t res)
|
||||
{
|
||||
return vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(res), 4)), 0);
|
||||
}
|
||||
|
||||
#endif
|
11
contrib/CMakeLists.txt
vendored
11
contrib/CMakeLists.txt
vendored
@ -196,6 +196,17 @@ if (ARCH_S390X)
|
||||
add_contrib(crc32-s390x-cmake crc32-s390x)
|
||||
endif()
|
||||
add_contrib (annoy-cmake annoy)
|
||||
|
||||
option(ENABLE_USEARCH "Enable USearch (Approximate Neighborhood Search, HNSW) support" ${ENABLE_LIBRARIES})
|
||||
if (ENABLE_USEARCH)
|
||||
add_contrib (FP16-cmake FP16)
|
||||
add_contrib (robin-map-cmake robin-map)
|
||||
add_contrib (SimSIMD-cmake SimSIMD)
|
||||
add_contrib (usearch-cmake usearch) # requires: FP16, robin-map, SimdSIMD
|
||||
else ()
|
||||
message(STATUS "Not using USearch")
|
||||
endif ()
|
||||
|
||||
add_contrib (xxHash-cmake xxHash)
|
||||
|
||||
add_contrib (libbcrypt-cmake libbcrypt)
|
||||
|
1
contrib/FP16
vendored
Submodule
1
contrib/FP16
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 0a92994d729ff76a58f692d3028ca1b64b145d91
|
1
contrib/FP16-cmake/CMakeLists.txt
Normal file
1
contrib/FP16-cmake/CMakeLists.txt
Normal file
@ -0,0 +1 @@
|
||||
# See contrib/usearch-cmake/CMakeLists.txt
|
1
contrib/SimSIMD
vendored
Submodule
1
contrib/SimSIMD
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit de2cb75b9e9e3389d5e1e51fd9f8ed151f3c17cf
|
1
contrib/SimSIMD-cmake/CMakeLists.txt
Normal file
1
contrib/SimSIMD-cmake/CMakeLists.txt
Normal file
@ -0,0 +1 @@
|
||||
# See contrib/usearch-cmake/CMakeLists.txt
|
2
contrib/boost
vendored
2
contrib/boost
vendored
@ -1 +1 @@
|
||||
Subproject commit bb179652862b528d94a9032a784796c4db846c3f
|
||||
Subproject commit 063a9372b4ae304e869a5c5724971d0501552731
|
@ -19,6 +19,12 @@ add_library (_boost_filesystem ${SRCS_FILESYSTEM})
|
||||
add_library (boost::filesystem ALIAS _boost_filesystem)
|
||||
target_include_directories (_boost_filesystem SYSTEM BEFORE PUBLIC ${LIBRARY_DIR})
|
||||
|
||||
if (OS_LINUX)
|
||||
target_compile_definitions (_boost_filesystem PRIVATE
|
||||
BOOST_FILESYSTEM_HAS_POSIX_AT_APIS=1
|
||||
)
|
||||
endif ()
|
||||
|
||||
# headers-only
|
||||
|
||||
add_library (_boost_headers_only INTERFACE)
|
||||
|
@ -1,6 +1,7 @@
|
||||
option(ENABLE_ISAL_LIBRARY "Enable ISA-L library" ${ENABLE_LIBRARIES})
|
||||
if (ARCH_AARCH64)
|
||||
# Disable ISA-L libray on aarch64.
|
||||
|
||||
# ISA-L is only available for x86-64, so it shall be disabled for other platforms
|
||||
if (NOT ARCH_AMD64)
|
||||
set (ENABLE_ISAL_LIBRARY OFF)
|
||||
endif ()
|
||||
|
||||
|
@ -147,7 +147,7 @@ target_compile_definitions(_libarchive PUBLIC
|
||||
target_compile_options(_libarchive PRIVATE "-Wno-reserved-macro-identifier")
|
||||
|
||||
if (TARGET ch_contrib::xz)
|
||||
target_compile_definitions(_libarchive PUBLIC HAVE_LZMA_H=1)
|
||||
target_compile_definitions(_libarchive PUBLIC HAVE_LZMA_H=1 HAVE_LIBLZMA=1)
|
||||
target_link_libraries(_libarchive PRIVATE ch_contrib::xz)
|
||||
endif()
|
||||
|
||||
@ -156,6 +156,16 @@ if (TARGET ch_contrib::zlib)
|
||||
target_link_libraries(_libarchive PRIVATE ch_contrib::zlib)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::zstd)
|
||||
target_compile_definitions(_libarchive PUBLIC HAVE_ZSTD_H=1 HAVE_LIBZSTD=1)
|
||||
target_link_libraries(_libarchive PRIVATE ch_contrib::zstd)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::bzip2)
|
||||
target_compile_definitions(_libarchive PUBLIC HAVE_BZLIB_H=1)
|
||||
target_link_libraries(_libarchive PRIVATE ch_contrib::bzip2)
|
||||
endif()
|
||||
|
||||
if (OS_LINUX)
|
||||
target_compile_definitions(
|
||||
_libarchive PUBLIC
|
||||
|
2
contrib/llvm-project
vendored
2
contrib/llvm-project
vendored
@ -1 +1 @@
|
||||
Subproject commit 4ef26de16c229429141e424375142c9b03234b66
|
||||
Subproject commit e7b8befca85c8b847614432dba250c22d35fbae0
|
2
contrib/orc
vendored
2
contrib/orc
vendored
@ -1 +1 @@
|
||||
Subproject commit 568d1d60c250af1890f226c182bc15bd8cc94cf1
|
||||
Subproject commit a20d1d9d7ad4a4be7b7ba97588e16ca8b9abb2b6
|
1
contrib/robin-map
vendored
Submodule
1
contrib/robin-map
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 851a59e0e3063ee0e23089062090a73fd3de482d
|
1
contrib/robin-map-cmake/CMakeLists.txt
Normal file
1
contrib/robin-map-cmake/CMakeLists.txt
Normal file
@ -0,0 +1 @@
|
||||
# See contrib/usearch-cmake/CMakeLists.txt
|
2
contrib/snappy
vendored
2
contrib/snappy
vendored
@ -1 +1 @@
|
||||
Subproject commit fb057edfed820212076239fd32cb2ff23e9016bf
|
||||
Subproject commit 6ebb5b1ab8801ea3fde103c5c29f5ab86df5fe7a
|
1
contrib/usearch
vendored
Submodule
1
contrib/usearch
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 387b78b28b17b8954024ffc81e97cbcfa10d1f30
|
17
contrib/usearch-cmake/CMakeLists.txt
Normal file
17
contrib/usearch-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,17 @@
|
||||
set(USEARCH_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/usearch")
|
||||
set(USEARCH_SOURCE_DIR "${USEARCH_PROJECT_DIR}/include")
|
||||
|
||||
set(FP16_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/FP16")
|
||||
set(ROBIN_MAP_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/robin-map")
|
||||
set(SIMSIMD_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/SimSIMD-map")
|
||||
|
||||
add_library(_usearch INTERFACE)
|
||||
|
||||
target_include_directories(_usearch SYSTEM INTERFACE
|
||||
${FP16_PROJECT_DIR}/include
|
||||
${ROBIN_MAP_PROJECT_DIR}/include
|
||||
${SIMSIMD_PROJECT_DIR}/include
|
||||
${USEARCH_SOURCE_DIR})
|
||||
|
||||
add_library(ch_contrib::usearch ALIAS _usearch)
|
||||
target_compile_definitions(_usearch INTERFACE ENABLE_USEARCH)
|
@ -19,9 +19,9 @@
|
||||
<max_threads>12</max_threads>
|
||||
|
||||
<!-- disable JIT for perf tests -->
|
||||
<compile_expressions>1</compile_expressions>
|
||||
<compile_aggregate_expressions>1</compile_aggregate_expressions>
|
||||
<compile_sort_description>1</compile_sort_description>
|
||||
<compile_expressions>0</compile_expressions>
|
||||
<compile_aggregate_expressions>0</compile_aggregate_expressions>
|
||||
<compile_sort_description>0</compile_sort_description>
|
||||
|
||||
<!-- Don't fail some prewarm queries too early -->
|
||||
<timeout_before_checking_execution_speed>60</timeout_before_checking_execution_speed>
|
||||
|
@ -63,6 +63,7 @@ configure
|
||||
# it contains some new settings, but we can safely remove it
|
||||
rm /etc/clickhouse-server/config.d/merge_tree.xml
|
||||
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
|
||||
rm /etc/clickhouse-server/config.d/filesystem_caches_path.xml
|
||||
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
|
||||
|
||||
start
|
||||
@ -93,6 +94,7 @@ sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_defau
|
||||
# it contains some new settings, but we can safely remove it
|
||||
rm /etc/clickhouse-server/config.d/merge_tree.xml
|
||||
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
|
||||
rm /etc/clickhouse-server/config.d/filesystem_caches_path.xml
|
||||
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
|
||||
|
||||
start
|
||||
|
45
docs/changelogs/v23.3.9.55-lts.md
Normal file
45
docs/changelogs/v23.3.9.55-lts.md
Normal file
@ -0,0 +1,45 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.3.9.55-lts (b9c5c8622d3) FIXME as compared to v23.3.8.21-lts (1675f2264f3)
|
||||
|
||||
#### Performance Improvement
|
||||
* Backported in [#52213](https://github.com/ClickHouse/ClickHouse/issues/52213): Do not store blocks in `ANY` hash join if nothing is inserted. [#48633](https://github.com/ClickHouse/ClickHouse/pull/48633) ([vdimir](https://github.com/vdimir)).
|
||||
* Backported in [#52826](https://github.com/ClickHouse/ClickHouse/issues/52826): Fix incorrect projection analysis which invalidates primary keys. This issue only exists when `query_plan_optimize_primary_key = 1, query_plan_optimize_projection = 1` . This fixes [#48823](https://github.com/ClickHouse/ClickHouse/issues/48823) . This fixes [#51173](https://github.com/ClickHouse/ClickHouse/issues/51173) . [#52308](https://github.com/ClickHouse/ClickHouse/pull/52308) ([Amos Bird](https://github.com/amosbird)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#53019](https://github.com/ClickHouse/ClickHouse/issues/53019): Packing inline cache into docker images sometimes causes strange special effects. Since we don't use it at all, it's good to go. [#53008](https://github.com/ClickHouse/ClickHouse/pull/53008) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#53288](https://github.com/ClickHouse/ClickHouse/issues/53288): The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud., the second attempt after [#53100](https://github.com/ClickHouse/ClickHouse/issues/53100). [#53213](https://github.com/ClickHouse/ClickHouse/pull/53213) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#53461](https://github.com/ClickHouse/ClickHouse/issues/53461): Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix optimization to move functions before sorting. [#51481](https://github.com/ClickHouse/ClickHouse/pull/51481) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix Block structure mismatch in Pipe::unitePipes for FINAL [#51492](https://github.com/ClickHouse/ClickHouse/pull/51492) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Fix binary arithmetic for Nullable(IPv4) [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Support IPv4 and IPv6 as dictionary attributes [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)).
|
||||
* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* init and destroy ares channel on demand.. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)).
|
||||
* Fix crash in function `tuple` with one sparse column argument [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* clickhouse-keeper: fix implementation of server with poll() [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)).
|
||||
* Fix password leak in show create mysql table [#52962](https://github.com/ClickHouse/ClickHouse/pull/52962) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix loading lazy database during system.table select query [#53372](https://github.com/ClickHouse/ClickHouse/pull/53372) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
|
||||
* Fix wrong columns order for queries with parallel FINAL. [#53489](https://github.com/ClickHouse/ClickHouse/pull/53489) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Fix crash in comparison functions due to incorrect query analysis [#52172](https://github.com/ClickHouse/ClickHouse/pull/52172) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix deadlocks in StorageTableFunctionProxy [#52626](https://github.com/ClickHouse/ClickHouse/pull/52626) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Disable test_reverse_dns_query/test.py [#53195](https://github.com/ClickHouse/ClickHouse/pull/53195) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Disable test_host_regexp_multiple_ptr_records/test.py [#53211](https://github.com/ClickHouse/ClickHouse/pull/53211) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
|
@ -13,7 +13,7 @@ If more than one table is required, it is highly recommended to use the [Materia
|
||||
|
||||
``` sql
|
||||
CREATE TABLE postgresql_db.postgresql_replica (key UInt64, value UInt64)
|
||||
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgresql_replica', 'postgres_user', 'postgres_password')
|
||||
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgresql_table', 'postgres_user', 'postgres_password')
|
||||
PRIMARY KEY key;
|
||||
```
|
||||
|
||||
|
@ -142,13 +142,15 @@ was specified for ANN indexes, the default value is 100 million.
|
||||
|
||||
- [Annoy](/docs/en/engines/table-engines/mergetree-family/annindexes.md#annoy-annoy)
|
||||
|
||||
- [USearch](/docs/en/engines/table-engines/mergetree-family/annindexes.md#usearch-usearch)
|
||||
|
||||
## Annoy {#annoy}
|
||||
|
||||
Annoy indexes are currently experimental, to use them you first need to `SET allow_experimental_annoy_index = 1`. They are also currently
|
||||
disabled on ARM due to memory safety problems with the algorithm.
|
||||
|
||||
This type of ANN index implements [the Annoy algorithm](https://github.com/spotify/annoy) which is based on a recursive division of the
|
||||
space in random linear surfaces (lines in 2D, planes in 3D etc.).
|
||||
This type of ANN index is based on the [Annoy library](https://github.com/spotify/annoy) which recursively divides the space into random
|
||||
linear surfaces (lines in 2D, planes in 3D etc.).
|
||||
|
||||
<div class='vimeo-container'>
|
||||
<iframe src="//www.youtube.com/embed/QkCCyLW0ehU"
|
||||
@ -221,3 +223,59 @@ SETTINGS annoy_index_search_k_nodes=100;
|
||||
The Annoy index currently does not work with per-table, non-default `index_granularity` settings (see
|
||||
[here](https://github.com/ClickHouse/ClickHouse/pull/51325#issuecomment-1605920475)). If necessary, the value must be changed in config.xml.
|
||||
:::
|
||||
## USearch {#usearch}
|
||||
|
||||
This type of ANN index is based on the [the USearch library](https://github.com/unum-cloud/usearch), which implements the [HNSW
|
||||
algorithm](https://arxiv.org/abs/1603.09320), i.e., builds a hierarchical graph where each point represents a vector and the edges represent
|
||||
similarity. Such hierarchical structures can be very efficient on large collections. They may often fetch 0.05% or less data from the
|
||||
overall dataset, while still providing 99% recall. This is especially useful when working with high-dimensional vectors,
|
||||
that are expensive to load and compare. The library also has several hardware-specific SIMD optimizations to accelerate further
|
||||
distance computations on modern Arm (NEON and SVE) and x86 (AVX2 and AVX-512) CPUs and OS-specific optimizations to allow efficient
|
||||
navigation around immutable persistent files, without loading them into RAM.
|
||||
|
||||
<div class='vimeo-container'>
|
||||
<iframe src="//www.youtube.com/embed/UMrhB3icP9w"
|
||||
width="640"
|
||||
height="360"
|
||||
frameborder="0"
|
||||
allow="autoplay;
|
||||
fullscreen;
|
||||
picture-in-picture"
|
||||
allowfullscreen>
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
Syntax to create an USearch index over an [Array](../../../sql-reference/data-types/array.md) column:
|
||||
|
||||
```sql
|
||||
CREATE TABLE table_with_usearch_index
|
||||
(
|
||||
id Int64,
|
||||
vectors Array(Float32),
|
||||
INDEX [ann_index_name] vectors TYPE usearch([Distance]) [GRANULARITY N]
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
```
|
||||
|
||||
Syntax to create an ANN index over a [Tuple](../../../sql-reference/data-types/tuple.md) column:
|
||||
|
||||
```sql
|
||||
CREATE TABLE table_with_usearch_index
|
||||
(
|
||||
id Int64,
|
||||
vectors Tuple(Float32[, Float32[, ...]]),
|
||||
INDEX [ann_index_name] vectors TYPE usearch([Distance]) [GRANULARITY N]
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
```
|
||||
|
||||
USearch currently supports two distance functions:
|
||||
- `L2Distance`, also called Euclidean distance, is the length of a line segment between two points in Euclidean space
|
||||
([Wikipedia](https://en.wikipedia.org/wiki/Euclidean_distance)).
|
||||
- `cosineDistance`, also called cosine similarity, is the cosine of the angle between two (non-zero) vectors
|
||||
([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)).
|
||||
|
||||
For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no
|
||||
distance function was specified during index creation, `L2Distance` is used as default.
|
@ -196,6 +196,7 @@ SELECT * FROM nestedt FORMAT TSV
|
||||
- [input_format_tsv_skip_first_lines](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_skip_first_lines) - skip specified number of lines at the beginning of data. Default value - `0`.
|
||||
- [input_format_tsv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_detect_header) - automatically detect header with names and types in TSV format. Default value - `true`.
|
||||
- [input_format_tsv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`.
|
||||
- [input_format_tsv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_allow_variable_number_of_columns) - allow variable number of columns in TSV format, ignore extra columns and use default values on missing columns. Default value - `false`.
|
||||
|
||||
## TabSeparatedRaw {#tabseparatedraw}
|
||||
|
||||
@ -473,7 +474,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
|
||||
- [input_format_csv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`.
|
||||
- [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`.
|
||||
- [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`.
|
||||
- [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. Default value - `false`.
|
||||
- [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - allow variable number of columns in CSV format, ignore extra columns and use default values on missing columns. Default value - `false`.
|
||||
- [input_format_csv_use_default_on_bad_values](/docs/en/operations/settings/settings-formats.md/#input_format_csv_use_default_on_bad_values) - Allow to set default value to column when CSV field deserialization failed on bad value. Default value - `false`.
|
||||
|
||||
## CSVWithNames {#csvwithnames}
|
||||
@ -502,9 +503,10 @@ the types from input data will be compared with the types of the corresponding c
|
||||
|
||||
Similar to [Template](#format-template), but it prints or reads all names and types of columns and uses escaping rule from [format_custom_escaping_rule](/docs/en/operations/settings/settings-formats.md/#format_custom_escaping_rule) setting and delimiters from [format_custom_field_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_field_delimiter), [format_custom_row_before_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_before_delimiter), [format_custom_row_after_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_after_delimiter), [format_custom_row_between_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_between_delimiter), [format_custom_result_before_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_result_before_delimiter) and [format_custom_result_after_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_result_after_delimiter) settings, not from format strings.
|
||||
|
||||
If setting [input_format_custom_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_custom_detect_header) is enabled, ClickHouse will automatically detect header with names and types if any.
|
||||
|
||||
If setting [input_format_tsv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_custom_detect_header) is enabled, trailing empty lines at the end of file will be skipped.
|
||||
Additional settings:
|
||||
- [input_format_custom_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_custom_detect_header) - enables automatic detection of header with names and types if any. Default value - `true`.
|
||||
- [input_format_custom_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_custom_skip_trailing_empty_lines) - skip trailing empty lines at the end of file . Default value - `false`.
|
||||
- [input_format_custom_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_custom_allow_variable_number_of_columns) - allow variable number of columns in CustomSeparated format, ignore extra columns and use default values on missing columns. Default value - `false`.
|
||||
|
||||
There is also `CustomSeparatedIgnoreSpaces` format, which is similar to [TemplateIgnoreSpaces](#templateignorespaces).
|
||||
|
||||
@ -1262,6 +1264,7 @@ SELECT * FROM json_each_row_nested
|
||||
- [input_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings-formats.md/#input_format_json_named_tuples_as_objects) - parse named tuple columns as JSON objects. Default value - `true`.
|
||||
- [input_format_json_defaults_for_missing_elements_in_named_tuple](/docs/en/operations/settings/settings-formats.md/#input_format_json_defaults_for_missing_elements_in_named_tuple) - insert default values for missing elements in JSON object while parsing named tuple. Default value - `true`.
|
||||
- [input_format_json_ignore_unknown_keys_in_named_tuple](/docs/en/operations/settings/settings-formats.md/#input_format_json_ignore_unknown_keys_in_named_tuple) - Ignore unknown keys in json object for named tuples. Default value - `false`.
|
||||
- [input_format_json_compact_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_json_compact_allow_variable_number_of_columns) - allow variable number of columns in JSONCompact/JSONCompactEachRow format, ignore extra columns and use default values on missing columns. Default value - `false`.
|
||||
- [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_integers) - controls quoting of 64-bit integers in JSON output format. Default value - `true`.
|
||||
- [output_format_json_quote_64bit_floats](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_floats) - controls quoting of 64-bit floats in JSON output format. Default value - `false`.
|
||||
- [output_format_json_quote_denormals](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_denormals) - enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format. Default value - `false`.
|
||||
|
BIN
docs/en/interfaces/images/mysql1.png
Normal file
BIN
docs/en/interfaces/images/mysql1.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 232 KiB |
BIN
docs/en/interfaces/images/mysql2.png
Normal file
BIN
docs/en/interfaces/images/mysql2.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 102 KiB |
BIN
docs/en/interfaces/images/mysql3.png
Normal file
BIN
docs/en/interfaces/images/mysql3.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 37 KiB |
BIN
docs/en/interfaces/images/mysql4.png
Normal file
BIN
docs/en/interfaces/images/mysql4.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 88 KiB |
BIN
docs/en/interfaces/images/mysql5.png
Normal file
BIN
docs/en/interfaces/images/mysql5.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 246 KiB |
@ -6,7 +6,34 @@ sidebar_label: MySQL Interface
|
||||
|
||||
# MySQL Interface
|
||||
|
||||
ClickHouse supports MySQL wire protocol. To enable the MySQL wire protocol, add the [mysql_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-mysql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d` folder:
|
||||
ClickHouse supports the MySQL wire protocol. This allow tools that are MySQL-compatible to interact with ClickHouse seamlessly (e.g. [Looker Studio](../integrations/data-visualization/looker-studio-and-clickhouse.md)).
|
||||
|
||||
## Enabling the MySQL Interface On ClickHouse Cloud
|
||||
|
||||
1. After creating your ClickHouse Cloud Service, on the credentials screen, select the MySQL tab
|
||||
|
||||
![Credentials screen - Prompt](./images/mysql1.png)
|
||||
|
||||
2. Toggle the switch to enable the MySQL interface for this specific service. This will expose port `3306` for this service and prompt you with your MySQL connection screen that include your unique MySQL username. The password will be the same as the service's default user password.
|
||||
|
||||
![Credentials screen - Enabled MySQL](./images/mysql2.png)
|
||||
|
||||
Alternatively, in order to enable the MySQL interface for an existing service:
|
||||
|
||||
1. Ensure your service is in `Running` state then click on the "View connection string" button for the service you want to enable the MySQL interface for
|
||||
|
||||
![Connection screen - Prompt MySQL](./images/mysql3.png)
|
||||
|
||||
2. Toggle the switch to enable the MySQL interface for this specific service. This will prompt you to enter the default password.
|
||||
|
||||
![Connection screen - Prompt MySQL](./images/mysql4.png)
|
||||
|
||||
3. After entering the password, you will get prompted the MySQL connection string for this service
|
||||
![Connection screen - MySQL Enabled](./images/mysql5.png)
|
||||
|
||||
## Enabling the MySQL Interface On Self-managed ClickHouse
|
||||
|
||||
Add the [mysql_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-mysql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d/` [folder](../operations/configuration-files):
|
||||
|
||||
``` xml
|
||||
<clickhouse>
|
||||
@ -20,7 +47,7 @@ Startup your ClickHouse server and look for a log message similar to the followi
|
||||
{} <Information> Application: Listening for MySQL compatibility protocol: 127.0.0.1:9004
|
||||
```
|
||||
|
||||
## Connect mysql to ClickHouse
|
||||
## Connect MySQL to ClickHouse
|
||||
|
||||
The following command demonstrates how to connect the MySQL client `mysql` to ClickHouse:
|
||||
|
||||
|
@ -221,6 +221,10 @@ Default: 1024
|
||||
|
||||
Size of cache for index marks. Zero means disabled.
|
||||
|
||||
:::note
|
||||
This setting can be modified at runtime and will take effect immediately.
|
||||
:::
|
||||
|
||||
Type: UInt64
|
||||
|
||||
Default: 0
|
||||
@ -230,6 +234,10 @@ Default: 0
|
||||
|
||||
Size of cache for uncompressed blocks of MergeTree indices. Zero means disabled.
|
||||
|
||||
:::note
|
||||
This setting can be modified at runtime and will take effect immediately.
|
||||
:::
|
||||
|
||||
Type: UInt64
|
||||
|
||||
Default: 0
|
||||
@ -255,6 +263,10 @@ Default: SLRU
|
||||
|
||||
Size of cache for marks (index of MergeTree family of tables).
|
||||
|
||||
:::note
|
||||
This setting can be modified at runtime and will take effect immediately.
|
||||
:::
|
||||
|
||||
Type: UInt64
|
||||
|
||||
Default: 5368709120
|
||||
@ -288,7 +300,7 @@ Default: 1000
|
||||
Limit on total number of concurrently executed queries. Zero means Unlimited. Note that limits on insert and select queries, and on the maximum number of queries for users must also be considered. See also max_concurrent_insert_queries, max_concurrent_select_queries, max_concurrent_queries_for_all_users. Zero means unlimited.
|
||||
|
||||
:::note
|
||||
These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
|
||||
This setting can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
|
||||
:::
|
||||
|
||||
Type: UInt64
|
||||
@ -300,7 +312,7 @@ Default: 0
|
||||
Limit on total number of concurrent insert queries. Zero means Unlimited.
|
||||
|
||||
:::note
|
||||
These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
|
||||
This setting can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
|
||||
:::
|
||||
|
||||
Type: UInt64
|
||||
@ -312,7 +324,7 @@ Default: 0
|
||||
Limit on total number of concurrently select queries. Zero means Unlimited.
|
||||
|
||||
:::note
|
||||
These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
|
||||
This setting can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
|
||||
:::
|
||||
|
||||
Type: UInt64
|
||||
@ -456,6 +468,10 @@ Sets the cache size (in bytes) for mapped files. This setting allows avoiding fr
|
||||
|
||||
Note that the amount of data in mapped files does not consume memory directly and is not accounted for in query or server memory usage — because this memory can be discarded similar to the OS page cache. The cache is dropped (the files are closed) automatically on the removal of old parts in tables of the MergeTree family, also it can be dropped manually by the `SYSTEM DROP MMAP CACHE` query.
|
||||
|
||||
:::note
|
||||
This setting can be modified at runtime and will take effect immediately.
|
||||
:::
|
||||
|
||||
Type: UInt64
|
||||
|
||||
Default: 1000
|
||||
@ -605,6 +621,10 @@ There is one shared cache for the server. Memory is allocated on demand. The cac
|
||||
|
||||
The uncompressed cache is advantageous for very short queries in individual cases.
|
||||
|
||||
:::note
|
||||
This setting can be modified at runtime and will take effect immediately.
|
||||
:::
|
||||
|
||||
Type: UInt64
|
||||
|
||||
Default: 0
|
||||
|
@ -627,6 +627,13 @@ Column type should be String. If value is empty, default names `row_{i}`will be
|
||||
|
||||
Default value: ''.
|
||||
|
||||
### input_format_json_compact_allow_variable_number_of_columns {#input_format_json_compact_allow_variable_number_of_columns}
|
||||
|
||||
Allow variable number of columns in rows in JSONCompact/JSONCompactEachRow input formats.
|
||||
Ignore extra columns in rows with more columns than expected and treat missing columns as default values.
|
||||
|
||||
Disabled by default.
|
||||
|
||||
## TSV format settings {#tsv-format-settings}
|
||||
|
||||
### input_format_tsv_empty_as_default {#input_format_tsv_empty_as_default}
|
||||
@ -764,6 +771,13 @@ When enabled, trailing empty lines at the end of TSV file will be skipped.
|
||||
|
||||
Disabled by default.
|
||||
|
||||
### input_format_tsv_allow_variable_number_of_columns {#input_format_tsv_allow_variable_number_of_columns}
|
||||
|
||||
Allow variable number of columns in rows in TSV input format.
|
||||
Ignore extra columns in rows with more columns than expected and treat missing columns as default values.
|
||||
|
||||
Disabled by default.
|
||||
|
||||
## CSV format settings {#csv-format-settings}
|
||||
|
||||
### format_csv_delimiter {#format_csv_delimiter}
|
||||
@ -955,9 +969,11 @@ Result
|
||||
```text
|
||||
" string "
|
||||
```
|
||||
|
||||
### input_format_csv_allow_variable_number_of_columns {#input_format_csv_allow_variable_number_of_columns}
|
||||
|
||||
ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values.
|
||||
Allow variable number of columns in rows in CSV input format.
|
||||
Ignore extra columns in rows with more columns than expected and treat missing columns as default values.
|
||||
|
||||
Disabled by default.
|
||||
|
||||
@ -1571,6 +1587,13 @@ When enabled, trailing empty lines at the end of file in CustomSeparated format
|
||||
|
||||
Disabled by default.
|
||||
|
||||
### input_format_custom_allow_variable_number_of_columns {#input_format_custom_allow_variable_number_of_columns}
|
||||
|
||||
Allow variable number of columns in rows in CustomSeparated input format.
|
||||
Ignore extra columns in rows with more columns than expected and treat missing columns as default values.
|
||||
|
||||
Disabled by default.
|
||||
|
||||
## Regexp format settings {#regexp-format-settings}
|
||||
|
||||
### format_regexp_escaping_rule {#format_regexp_escaping_rule}
|
||||
|
@ -1819,6 +1819,72 @@ Result:
|
||||
└────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## toUTCTimestamp
|
||||
|
||||
Convert DateTime/DateTime64 type value from other time zone to UTC timezone timestamp
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
toUTCTimestamp(time_val, time_zone)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `time_val` — A DateTime/DateTime64 type const value or a expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md)
|
||||
- `time_zone` — A String type const value or a expression represent the time zone. [String types](../../sql-reference/data-types/string.md)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- DateTime/DateTime64 in text form
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT toUTCTimestamp(toDateTime('2023-03-16'), 'Asia/Shanghai');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─toUTCTimestamp(toDateTime('2023-03-16'),'Asia/Shanghai')┐
|
||||
│ 2023-03-15 16:00:00 │
|
||||
└─────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## fromUTCTimestamp
|
||||
|
||||
Convert DateTime/DateTime64 type value from UTC timezone to other time zone timestamp
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
fromUTCTimestamp(time_val, time_zone)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `time_val` — A DateTime/DateTime64 type const value or a expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md)
|
||||
- `time_zone` — A String type const value or a expression represent the time zone. [String types](../../sql-reference/data-types/string.md)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- DateTime/DateTime64 in text form
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT fromUTCTimestamp(toDateTime64('2023-03-16 10:00:00', 3), 'Asia/Shanghai');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─fromUTCTimestamp(toDateTime64('2023-03-16 10:00:00',3),'Asia/Shanghai')─┐
|
||||
│ 2023-03-16 18:00:00.000 │
|
||||
└─────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Related content
|
||||
|
||||
- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse)
|
||||
|
@ -66,13 +66,13 @@ RELOAD FUNCTION [ON CLUSTER cluster_name] function_name
|
||||
|
||||
## DROP DNS CACHE
|
||||
|
||||
Resets ClickHouse’s internal DNS cache. Sometimes (for old ClickHouse versions) it is necessary to use this command when changing the infrastructure (changing the IP address of another ClickHouse server or the server used by dictionaries).
|
||||
Clears ClickHouse’s internal DNS cache. Sometimes (for old ClickHouse versions) it is necessary to use this command when changing the infrastructure (changing the IP address of another ClickHouse server or the server used by dictionaries).
|
||||
|
||||
For more convenient (automatic) cache management, see disable_internal_dns_cache, dns_cache_update_period parameters.
|
||||
|
||||
## DROP MARK CACHE
|
||||
|
||||
Resets the mark cache.
|
||||
Clears the mark cache.
|
||||
|
||||
## DROP REPLICA
|
||||
|
||||
@ -106,22 +106,18 @@ Similar to `SYSTEM DROP REPLICA`, but removes the `Replicated` database replica
|
||||
|
||||
## DROP UNCOMPRESSED CACHE
|
||||
|
||||
Reset the uncompressed data cache.
|
||||
Clears the uncompressed data cache.
|
||||
The uncompressed data cache is enabled/disabled with the query/user/profile-level setting [use_uncompressed_cache](../../operations/settings/settings.md#setting-use_uncompressed_cache).
|
||||
Its size can be configured using the server-level setting [uncompressed_cache_size](../../operations/server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size).
|
||||
|
||||
## DROP COMPILED EXPRESSION CACHE
|
||||
|
||||
Reset the compiled expression cache.
|
||||
Clears the compiled expression cache.
|
||||
The compiled expression cache is enabled/disabled with the query/user/profile-level setting [compile_expressions](../../operations/settings/settings.md#compile-expressions).
|
||||
|
||||
## DROP QUERY CACHE
|
||||
|
||||
Resets the [query cache](../../operations/query-cache.md).
|
||||
|
||||
```sql
|
||||
SYSTEM DROP QUERY CACHE [ON CLUSTER cluster_name]
|
||||
```
|
||||
Clears the [query cache](../../operations/query-cache.md).
|
||||
|
||||
## FLUSH LOGS
|
||||
|
||||
@ -443,9 +439,9 @@ SYSTEM STOP LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QU
|
||||
```
|
||||
|
||||
- If `CUSTOM 'protocol'` modifier is specified, the custom protocol with the specified name defined in the protocols section of the server configuration will be stopped.
|
||||
- If `QUERIES ALL` modifier is specified, all protocols are stopped.
|
||||
- If `QUERIES DEFAULT` modifier is specified, all default protocols are stopped.
|
||||
- If `QUERIES CUSTOM` modifier is specified, all custom protocols are stopped.
|
||||
- If `QUERIES ALL [EXCEPT .. [,..]]` modifier is specified, all protocols are stopped, unless specified with `EXCEPT` clause.
|
||||
- If `QUERIES DEFAULT [EXCEPT .. [,..]]` modifier is specified, all default protocols are stopped, unless specified with `EXCEPT` clause.
|
||||
- If `QUERIES CUSTOM [EXCEPT .. [,..]]` modifier is specified, all custom protocols are stopped, unless specified with `EXCEPT` clause.
|
||||
|
||||
### SYSTEM START LISTEN
|
||||
|
||||
|
@ -668,8 +668,7 @@ void LocalServer::processConfig()
|
||||
uncompressed_cache_size = max_cache_size;
|
||||
LOG_INFO(log, "Lowered uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
|
||||
}
|
||||
if (uncompressed_cache_size)
|
||||
global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size);
|
||||
global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size);
|
||||
|
||||
String mark_cache_policy = config().getString("mark_cache_policy", DEFAULT_MARK_CACHE_POLICY);
|
||||
size_t mark_cache_size = config().getUInt64("mark_cache_size", DEFAULT_MARK_CACHE_MAX_SIZE);
|
||||
@ -680,8 +679,7 @@ void LocalServer::processConfig()
|
||||
mark_cache_size = max_cache_size;
|
||||
LOG_INFO(log, "Lowered mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(mark_cache_size));
|
||||
}
|
||||
if (mark_cache_size)
|
||||
global_context->setMarkCache(mark_cache_policy, mark_cache_size);
|
||||
global_context->setMarkCache(mark_cache_policy, mark_cache_size);
|
||||
|
||||
size_t index_uncompressed_cache_size = config().getUInt64("index_uncompressed_cache_size", DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE);
|
||||
if (index_uncompressed_cache_size > max_cache_size)
|
||||
@ -689,8 +687,7 @@ void LocalServer::processConfig()
|
||||
index_uncompressed_cache_size = max_cache_size;
|
||||
LOG_INFO(log, "Lowered index uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
|
||||
}
|
||||
if (index_uncompressed_cache_size)
|
||||
global_context->setIndexUncompressedCache(index_uncompressed_cache_size);
|
||||
global_context->setIndexUncompressedCache(index_uncompressed_cache_size);
|
||||
|
||||
size_t index_mark_cache_size = config().getUInt64("index_mark_cache_size", DEFAULT_INDEX_MARK_CACHE_MAX_SIZE);
|
||||
if (index_mark_cache_size > max_cache_size)
|
||||
@ -698,8 +695,7 @@ void LocalServer::processConfig()
|
||||
index_mark_cache_size = max_cache_size;
|
||||
LOG_INFO(log, "Lowered index mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
|
||||
}
|
||||
if (index_mark_cache_size)
|
||||
global_context->setIndexMarkCache(index_mark_cache_size);
|
||||
global_context->setIndexMarkCache(index_mark_cache_size);
|
||||
|
||||
size_t mmap_cache_size = config().getUInt64("mmap_cache_size", DEFAULT_MMAP_CACHE_MAX_SIZE);
|
||||
if (mmap_cache_size > max_cache_size)
|
||||
@ -707,11 +703,10 @@ void LocalServer::processConfig()
|
||||
mmap_cache_size = max_cache_size;
|
||||
LOG_INFO(log, "Lowered mmap file cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
|
||||
}
|
||||
if (mmap_cache_size)
|
||||
global_context->setMMappedFileCache(mmap_cache_size);
|
||||
global_context->setMMappedFileCache(mmap_cache_size);
|
||||
|
||||
/// In Server.cpp (./clickhouse-server), we would initialize the query cache here.
|
||||
/// Intentionally not doing this in clickhouse-local as it doesn't make sense.
|
||||
/// Initialize a dummy query cache.
|
||||
global_context->setQueryCache(0, 0, 0, 0);
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
size_t compiled_expression_cache_max_size_in_bytes = config().getUInt64("compiled_expression_cache_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE);
|
||||
|
@ -1105,6 +1105,69 @@ try
|
||||
if (config().has("macros"))
|
||||
global_context->setMacros(std::make_unique<Macros>(config(), "macros", log));
|
||||
|
||||
/// Set up caches.
|
||||
|
||||
const size_t max_cache_size = static_cast<size_t>(physical_server_memory * server_settings.cache_size_to_ram_max_ratio);
|
||||
|
||||
String uncompressed_cache_policy = server_settings.uncompressed_cache_policy;
|
||||
size_t uncompressed_cache_size = server_settings.uncompressed_cache_size;
|
||||
if (uncompressed_cache_size > max_cache_size)
|
||||
{
|
||||
uncompressed_cache_size = max_cache_size;
|
||||
LOG_INFO(log, "Lowered uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
|
||||
}
|
||||
global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size);
|
||||
|
||||
String mark_cache_policy = server_settings.mark_cache_policy;
|
||||
size_t mark_cache_size = server_settings.mark_cache_size;
|
||||
if (mark_cache_size > max_cache_size)
|
||||
{
|
||||
mark_cache_size = max_cache_size;
|
||||
LOG_INFO(log, "Lowered mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(mark_cache_size));
|
||||
}
|
||||
global_context->setMarkCache(mark_cache_policy, mark_cache_size);
|
||||
|
||||
size_t index_uncompressed_cache_size = server_settings.index_uncompressed_cache_size;
|
||||
if (index_uncompressed_cache_size > max_cache_size)
|
||||
{
|
||||
index_uncompressed_cache_size = max_cache_size;
|
||||
LOG_INFO(log, "Lowered index uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
|
||||
}
|
||||
global_context->setIndexUncompressedCache(index_uncompressed_cache_size);
|
||||
|
||||
size_t index_mark_cache_size = server_settings.index_mark_cache_size;
|
||||
if (index_mark_cache_size > max_cache_size)
|
||||
{
|
||||
index_mark_cache_size = max_cache_size;
|
||||
LOG_INFO(log, "Lowered index mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
|
||||
}
|
||||
global_context->setIndexMarkCache(index_mark_cache_size);
|
||||
|
||||
size_t mmap_cache_size = server_settings.mmap_cache_size;
|
||||
if (mmap_cache_size > max_cache_size)
|
||||
{
|
||||
mmap_cache_size = max_cache_size;
|
||||
LOG_INFO(log, "Lowered mmap file cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
|
||||
}
|
||||
global_context->setMMappedFileCache(mmap_cache_size);
|
||||
|
||||
size_t query_cache_max_size_in_bytes = config().getUInt64("query_cache.max_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_SIZE);
|
||||
size_t query_cache_max_entries = config().getUInt64("query_cache.max_entries", DEFAULT_QUERY_CACHE_MAX_ENTRIES);
|
||||
size_t query_cache_query_cache_max_entry_size_in_bytes = config().getUInt64("query_cache.max_entry_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_BYTES);
|
||||
size_t query_cache_max_entry_size_in_rows = config().getUInt64("query_cache.max_entry_rows_in_rows", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_ROWS);
|
||||
if (query_cache_max_size_in_bytes > max_cache_size)
|
||||
{
|
||||
query_cache_max_size_in_bytes = max_cache_size;
|
||||
LOG_INFO(log, "Lowered query cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
|
||||
}
|
||||
global_context->setQueryCache(query_cache_max_size_in_bytes, query_cache_max_entries, query_cache_query_cache_max_entry_size_in_bytes, query_cache_max_entry_size_in_rows);
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
size_t compiled_expression_cache_max_size_in_bytes = config().getUInt64("compiled_expression_cache_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE);
|
||||
size_t compiled_expression_cache_max_elements = config().getUInt64("compiled_expression_cache_elements_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES);
|
||||
CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_max_size_in_bytes, compiled_expression_cache_max_elements);
|
||||
#endif
|
||||
|
||||
/// Initialize main config reloader.
|
||||
std::string include_from_path = config().getString("include_from", "/etc/metrika.xml");
|
||||
|
||||
@ -1324,7 +1387,14 @@ try
|
||||
|
||||
global_context->updateStorageConfiguration(*config);
|
||||
global_context->updateInterserverCredentials(*config);
|
||||
|
||||
global_context->updateUncompressedCacheConfiguration(*config);
|
||||
global_context->updateMarkCacheConfiguration(*config);
|
||||
global_context->updateIndexUncompressedCacheConfiguration(*config);
|
||||
global_context->updateIndexMarkCacheConfiguration(*config);
|
||||
global_context->updateMMappedFileCacheConfiguration(*config);
|
||||
global_context->updateQueryCacheConfiguration(*config);
|
||||
|
||||
CompressionCodecEncrypted::Configuration::instance().tryLoad(*config, "encryption_codecs");
|
||||
#if USE_SSL
|
||||
CertificateReloader::instance().tryLoad(*config);
|
||||
@ -1484,19 +1554,6 @@ try
|
||||
/// Limit on total number of concurrently executed queries.
|
||||
global_context->getProcessList().setMaxSize(server_settings.max_concurrent_queries);
|
||||
|
||||
/// Set up caches.
|
||||
|
||||
const size_t max_cache_size = static_cast<size_t>(physical_server_memory * server_settings.cache_size_to_ram_max_ratio);
|
||||
|
||||
String uncompressed_cache_policy = server_settings.uncompressed_cache_policy;
|
||||
size_t uncompressed_cache_size = server_settings.uncompressed_cache_size;
|
||||
if (uncompressed_cache_size > max_cache_size)
|
||||
{
|
||||
uncompressed_cache_size = max_cache_size;
|
||||
LOG_INFO(log, "Lowered uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
|
||||
}
|
||||
global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size);
|
||||
|
||||
/// Load global settings from default_profile and system_profile.
|
||||
global_context->setDefaultProfiles(config());
|
||||
|
||||
@ -1512,61 +1569,6 @@ try
|
||||
server_settings.async_insert_queue_flush_on_shutdown));
|
||||
}
|
||||
|
||||
String mark_cache_policy = server_settings.mark_cache_policy;
|
||||
size_t mark_cache_size = server_settings.mark_cache_size;
|
||||
if (!mark_cache_size)
|
||||
LOG_ERROR(log, "Too low mark cache size will lead to severe performance degradation.");
|
||||
if (mark_cache_size > max_cache_size)
|
||||
{
|
||||
mark_cache_size = max_cache_size;
|
||||
LOG_INFO(log, "Lowered mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(mark_cache_size));
|
||||
}
|
||||
global_context->setMarkCache(mark_cache_policy, mark_cache_size);
|
||||
|
||||
size_t index_uncompressed_cache_size = server_settings.index_uncompressed_cache_size;
|
||||
if (index_uncompressed_cache_size > max_cache_size)
|
||||
{
|
||||
index_uncompressed_cache_size = max_cache_size;
|
||||
LOG_INFO(log, "Lowered index uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
|
||||
}
|
||||
if (index_uncompressed_cache_size)
|
||||
global_context->setIndexUncompressedCache(server_settings.index_uncompressed_cache_size);
|
||||
|
||||
size_t index_mark_cache_size = server_settings.index_mark_cache_size;
|
||||
if (index_mark_cache_size > max_cache_size)
|
||||
{
|
||||
index_mark_cache_size = max_cache_size;
|
||||
LOG_INFO(log, "Lowered index mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
|
||||
}
|
||||
if (index_mark_cache_size)
|
||||
global_context->setIndexMarkCache(server_settings.index_mark_cache_size);
|
||||
|
||||
size_t mmap_cache_size = server_settings.mmap_cache_size;
|
||||
if (mmap_cache_size > max_cache_size)
|
||||
{
|
||||
mmap_cache_size = max_cache_size;
|
||||
LOG_INFO(log, "Lowered mmap file cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
|
||||
}
|
||||
if (mmap_cache_size)
|
||||
global_context->setMMappedFileCache(server_settings.mmap_cache_size);
|
||||
|
||||
size_t query_cache_max_size_in_bytes = config().getUInt64("query_cache.max_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_SIZE);
|
||||
size_t query_cache_max_entries = config().getUInt64("query_cache.max_entries", DEFAULT_QUERY_CACHE_MAX_ENTRIES);
|
||||
size_t query_cache_query_cache_max_entry_size_in_bytes = config().getUInt64("query_cache.max_entry_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_BYTES);
|
||||
size_t query_cache_max_entry_size_in_rows = config().getUInt64("query_cache.max_entry_rows_in_rows", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_ROWS);
|
||||
if (query_cache_max_size_in_bytes > max_cache_size)
|
||||
{
|
||||
query_cache_max_size_in_bytes = max_cache_size;
|
||||
LOG_INFO(log, "Lowered query cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
|
||||
}
|
||||
global_context->setQueryCache(query_cache_max_size_in_bytes, query_cache_max_entries, query_cache_query_cache_max_entry_size_in_bytes, query_cache_max_entry_size_in_rows);
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
size_t compiled_expression_cache_max_size_in_bytes = config().getUInt64("compiled_expression_cache_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE);
|
||||
size_t compiled_expression_cache_max_elements = config().getUInt64("compiled_expression_cache_elements_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES);
|
||||
CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_max_size_in_bytes, compiled_expression_cache_max_elements);
|
||||
#endif
|
||||
|
||||
/// Set path for format schema files
|
||||
fs::path format_schema_path(config().getString("format_schema_path", path / "format_schemas/"));
|
||||
global_context->setFormatSchemaPath(format_schema_path);
|
||||
@ -2072,6 +2074,9 @@ void Server::createServers(
|
||||
|
||||
for (const auto & protocol : protocols)
|
||||
{
|
||||
if (!server_type.shouldStart(ServerType::Type::CUSTOM, protocol))
|
||||
continue;
|
||||
|
||||
std::string prefix = "protocols." + protocol + ".";
|
||||
std::string port_name = prefix + "port";
|
||||
std::string description {"<undefined> protocol"};
|
||||
@ -2081,9 +2086,6 @@ void Server::createServers(
|
||||
if (!config.has(prefix + "port"))
|
||||
continue;
|
||||
|
||||
if (!server_type.shouldStart(ServerType::Type::CUSTOM, port_name))
|
||||
continue;
|
||||
|
||||
std::vector<std::string> hosts;
|
||||
if (config.has(prefix + "host"))
|
||||
hosts.push_back(config.getString(prefix + "host"));
|
||||
|
@ -11,6 +11,7 @@
|
||||
--background: linear-gradient(to bottom, #00CCFF, #00D0D0);
|
||||
--chart-background: white;
|
||||
--shadow-color: rgba(0, 0, 0, 0.25);
|
||||
--moving-shadow-color: rgba(0, 0, 0, 0.5);
|
||||
--input-shadow-color: rgba(0, 255, 0, 1);
|
||||
--error-color: red;
|
||||
--auth-error-color: white;
|
||||
@ -34,6 +35,7 @@
|
||||
--background: #151C2C;
|
||||
--chart-background: #1b2834;
|
||||
--shadow-color: rgba(0, 0, 0, 0);
|
||||
--moving-shadow-color: rgba(255, 255, 255, 0.25);
|
||||
--input-shadow-color: rgba(255, 128, 0, 0.25);
|
||||
--error-color: #F66;
|
||||
--legend-background: rgba(255, 255, 255, 0.25);
|
||||
@ -91,6 +93,21 @@
|
||||
position: relative;
|
||||
}
|
||||
|
||||
.chart-maximized {
|
||||
flex: 1 100%;
|
||||
height: 75vh
|
||||
}
|
||||
|
||||
.chart-moving {
|
||||
z-index: 11;
|
||||
box-shadow: 0 0 2rem var(--moving-shadow-color);
|
||||
}
|
||||
|
||||
.chart-displaced {
|
||||
opacity: 75%;
|
||||
filter: blur(1px);
|
||||
}
|
||||
|
||||
.chart div { position: absolute; }
|
||||
|
||||
.inputs {
|
||||
@ -303,6 +320,7 @@
|
||||
}
|
||||
.chart-buttons a {
|
||||
margin-right: 0.25rem;
|
||||
user-select: none;
|
||||
}
|
||||
.chart-buttons a:hover {
|
||||
color: var(--chart-button-hover-color);
|
||||
@ -454,11 +472,13 @@
|
||||
let host = 'https://play.clickhouse.com/';
|
||||
let user = 'explorer';
|
||||
let password = '';
|
||||
let add_http_cors_header = true;
|
||||
|
||||
/// If it is hosted on server, assume that it is the address of ClickHouse.
|
||||
if (location.protocol != 'file:') {
|
||||
host = location.origin;
|
||||
user = 'default';
|
||||
add_http_cors_header = false;
|
||||
}
|
||||
|
||||
const errorCodeMessageMap = {
|
||||
@ -793,6 +813,92 @@ function insertChart(i) {
|
||||
let edit_buttons = document.createElement('div');
|
||||
edit_buttons.className = 'chart-buttons';
|
||||
|
||||
let move = document.createElement('a');
|
||||
let move_text = document.createTextNode('✥');
|
||||
move.appendChild(move_text);
|
||||
|
||||
let is_dragging = false;
|
||||
move.addEventListener('mousedown', e => {
|
||||
const idx = getCurrentIndex();
|
||||
is_dragging = true;
|
||||
chart.className = 'chart chart-moving';
|
||||
|
||||
let offset_x = e.clientX;
|
||||
let offset_y = e.clientY;
|
||||
|
||||
let displace_idx = null;
|
||||
let displace_chart = null;
|
||||
|
||||
function mouseup(e) {
|
||||
is_dragging = false;
|
||||
chart.className = 'chart';
|
||||
chart.style.left = null;
|
||||
chart.style.top = null;
|
||||
|
||||
if (displace_idx !== null) {
|
||||
const elem = queries[idx];
|
||||
queries.splice(idx, 1);
|
||||
queries.splice(displace_idx, 0, elem);
|
||||
|
||||
displace_chart.className = 'chart';
|
||||
drawAll();
|
||||
}
|
||||
}
|
||||
|
||||
function mousemove(e) {
|
||||
if (!is_dragging) {
|
||||
document.body.removeEventListener('mousemove', mousemove);
|
||||
document.body.removeEventListener('mouseup', mouseup);
|
||||
return;
|
||||
}
|
||||
|
||||
let x = e.clientX - offset_x;
|
||||
let y = e.clientY - offset_y;
|
||||
|
||||
chart.style.left = `${x}px`;
|
||||
chart.style.top = `${y}px`;
|
||||
|
||||
displace_idx = null;
|
||||
displace_chart = null;
|
||||
let current_idx = -1;
|
||||
for (const elem of charts.querySelectorAll('.chart')) {
|
||||
++current_idx;
|
||||
if (current_idx == idx) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const this_rect = chart.getBoundingClientRect();
|
||||
const this_center_x = this_rect.left + this_rect.width / 2;
|
||||
const this_center_y = this_rect.top + this_rect.height / 2;
|
||||
|
||||
const elem_rect = elem.getBoundingClientRect();
|
||||
|
||||
if (this_center_x >= elem_rect.left && this_center_x <= elem_rect.right
|
||||
&& this_center_y >= elem_rect.top && this_center_y <= elem_rect.bottom) {
|
||||
|
||||
elem.className = 'chart chart-displaced';
|
||||
displace_idx = current_idx;
|
||||
displace_chart = elem;
|
||||
} else {
|
||||
elem.className = 'chart';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
document.body.addEventListener('mouseup', mouseup);
|
||||
document.body.addEventListener('mousemove', mousemove);
|
||||
});
|
||||
|
||||
let maximize = document.createElement('a');
|
||||
let maximize_text = document.createTextNode('🗖');
|
||||
maximize.appendChild(maximize_text);
|
||||
|
||||
maximize.addEventListener('click', e => {
|
||||
const idx = getCurrentIndex();
|
||||
chart.className = (chart.className == 'chart' ? 'chart chart-maximized' : 'chart');
|
||||
resize();
|
||||
});
|
||||
|
||||
let edit = document.createElement('a');
|
||||
let edit_text = document.createTextNode('✎');
|
||||
edit.appendChild(edit_text);
|
||||
@ -825,6 +931,8 @@ function insertChart(i) {
|
||||
saveState();
|
||||
});
|
||||
|
||||
edit_buttons.appendChild(move);
|
||||
edit_buttons.appendChild(maximize);
|
||||
edit_buttons.appendChild(edit);
|
||||
edit_buttons.appendChild(trash);
|
||||
|
||||
@ -962,8 +1070,6 @@ function legendAsTooltipPlugin({ className, style = { background: "var(--legend-
|
||||
};
|
||||
}
|
||||
|
||||
let add_http_cors_header = false;
|
||||
|
||||
async function draw(idx, chart, url_params, query) {
|
||||
if (plots[idx]) {
|
||||
plots[idx].destroy();
|
||||
|
@ -46,7 +46,7 @@ void MultipleAccessStorage::setStorages(const std::vector<StoragePtr> & storages
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
nested_storages = std::make_shared<const Storages>(storages);
|
||||
ids_cache.reset();
|
||||
ids_cache.clear();
|
||||
}
|
||||
|
||||
void MultipleAccessStorage::addStorage(const StoragePtr & new_storage)
|
||||
@ -69,7 +69,7 @@ void MultipleAccessStorage::removeStorage(const StoragePtr & storage_to_remove)
|
||||
auto new_storages = std::make_shared<Storages>(*nested_storages);
|
||||
new_storages->erase(new_storages->begin() + index);
|
||||
nested_storages = new_storages;
|
||||
ids_cache.reset();
|
||||
ids_cache.clear();
|
||||
}
|
||||
|
||||
std::vector<StoragePtr> MultipleAccessStorage::getStorages()
|
||||
|
@ -109,7 +109,7 @@ public:
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
writeBinary(this->data(place).numerator, buf);
|
||||
writeBinaryLittleEndian(this->data(place).numerator, buf);
|
||||
|
||||
if constexpr (std::is_unsigned_v<Denominator>)
|
||||
writeVarUInt(this->data(place).denominator, buf);
|
||||
@ -119,7 +119,7 @@ public:
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
readBinary(this->data(place).numerator, buf);
|
||||
readBinaryLittleEndian(this->data(place).numerator, buf);
|
||||
|
||||
if constexpr (std::is_unsigned_v<Denominator>)
|
||||
readVarUInt(this->data(place).denominator, buf);
|
||||
|
@ -375,7 +375,7 @@ void BackupImpl::readBackupMetadata()
|
||||
if (!archive_reader->fileExists(".backup"))
|
||||
throw Exception(ErrorCodes::BACKUP_NOT_FOUND, "Archive {} is not a backup", backup_name_for_logging);
|
||||
setCompressedSize();
|
||||
in = archive_reader->readFile(".backup");
|
||||
in = archive_reader->readFile(".backup", /*throw_on_not_found=*/true);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -685,7 +685,7 @@ std::unique_ptr<SeekableReadBuffer> BackupImpl::readFileImpl(const SizeAndChecks
|
||||
{
|
||||
/// Make `read_buffer` if there is data for this backup entry in this backup.
|
||||
if (use_archive)
|
||||
read_buffer = archive_reader->readFile(info.data_file_name);
|
||||
read_buffer = archive_reader->readFile(info.data_file_name, /*throw_on_not_found=*/true);
|
||||
else
|
||||
read_buffer = reader->readFile(info.data_file_name);
|
||||
}
|
||||
|
@ -599,6 +599,10 @@ if (TARGET ch_contrib::annoy)
|
||||
dbms_target_link_libraries(PUBLIC ch_contrib::annoy)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::usearch)
|
||||
dbms_target_link_libraries(PUBLIC ch_contrib::usearch)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_rust::skim)
|
||||
dbms_target_include_directories(PRIVATE $<TARGET_PROPERTY:ch_rust::skim,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||
dbms_target_link_libraries(PUBLIC ch_rust::skim)
|
||||
|
@ -151,7 +151,7 @@ public:
|
||||
std::lock_guard cache_lock(mutex);
|
||||
|
||||
/// Insert the new value only if the token is still in present in insert_tokens.
|
||||
/// (The token may be absent because of a concurrent reset() call).
|
||||
/// (The token may be absent because of a concurrent clear() call).
|
||||
bool result = false;
|
||||
auto token_it = insert_tokens.find(key);
|
||||
if (token_it != insert_tokens.end() && token_it->second.get() == token)
|
||||
@ -179,13 +179,13 @@ public:
|
||||
return cache_policy->dump();
|
||||
}
|
||||
|
||||
void reset()
|
||||
void clear()
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
insert_tokens.clear();
|
||||
hits = 0;
|
||||
misses = 0;
|
||||
cache_policy->reset(lock);
|
||||
cache_policy->clear(lock);
|
||||
}
|
||||
|
||||
void remove(const Key & key)
|
||||
|
@ -270,8 +270,8 @@ std::unordered_set<String> DNSResolver::reverseResolve(const Poco::Net::IPAddres
|
||||
|
||||
void DNSResolver::dropCache()
|
||||
{
|
||||
impl->cache_host.reset();
|
||||
impl->cache_address.reset();
|
||||
impl->cache_host.clear();
|
||||
impl->cache_address.clear();
|
||||
|
||||
std::scoped_lock lock(impl->update_mutex, impl->drop_mutex);
|
||||
|
||||
|
@ -20,7 +20,7 @@ template <typename T>
|
||||
static inline void writeQuoted(const DecimalField<T> & x, WriteBuffer & buf)
|
||||
{
|
||||
writeChar('\'', buf);
|
||||
writeText(x.getValue(), x.getScale(), buf, {});
|
||||
writeText(x.getValue(), x.getScale(), buf, /* trailing_zeros */ true);
|
||||
writeChar('\'', buf);
|
||||
}
|
||||
|
||||
|
@ -201,11 +201,11 @@ struct HashTableCell
|
||||
void setMapped(const value_type & /*value*/) {}
|
||||
|
||||
/// Serialization, in binary and text form.
|
||||
void write(DB::WriteBuffer & wb) const { DB::writeBinary(key, wb); }
|
||||
void write(DB::WriteBuffer & wb) const { DB::writeBinaryLittleEndian(key, wb); }
|
||||
void writeText(DB::WriteBuffer & wb) const { DB::writeDoubleQuoted(key, wb); }
|
||||
|
||||
/// Deserialization, in binary and text form.
|
||||
void read(DB::ReadBuffer & rb) { DB::readBinary(key, rb); }
|
||||
void read(DB::ReadBuffer & rb) { DB::readBinaryLittleEndian(key, rb); }
|
||||
void readText(DB::ReadBuffer & rb) { DB::readDoubleQuoted(key, rb); }
|
||||
|
||||
/// When cell pointer is moved during erase, reinsert or resize operations
|
||||
|
@ -10,11 +10,6 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct EqualWeightFunction
|
||||
{
|
||||
@ -46,8 +41,8 @@ public:
|
||||
virtual size_t count(std::lock_guard<std::mutex> & /*cache_lock*/) const = 0;
|
||||
virtual size_t maxSize(std::lock_guard<std::mutex>& /*cache_lock*/) const = 0;
|
||||
|
||||
virtual void setMaxCount(size_t /*max_count*/, std::lock_guard<std::mutex> & /* cache_lock */) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for cache policy"); }
|
||||
virtual void setMaxSize(size_t /*max_size_in_bytes*/, std::lock_guard<std::mutex> & /* cache_lock */) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for cache policy"); }
|
||||
virtual void setMaxCount(size_t /*max_count*/, std::lock_guard<std::mutex> & /* cache_lock */) = 0;
|
||||
virtual void setMaxSize(size_t /*max_size_in_bytes*/, std::lock_guard<std::mutex> & /* cache_lock */) = 0;
|
||||
virtual void setQuotaForUser(const String & user_name, size_t max_size_in_bytes, size_t max_entries, std::lock_guard<std::mutex> & /*cache_lock*/) { user_quotas->setQuotaForUser(user_name, max_size_in_bytes, max_entries); }
|
||||
|
||||
/// HashFunction usually hashes the entire key and the found key will be equal the provided key. In such cases, use get(). It is also
|
||||
@ -60,7 +55,7 @@ public:
|
||||
|
||||
virtual void remove(const Key & key, std::lock_guard<std::mutex> & /*cache_lock*/) = 0;
|
||||
|
||||
virtual void reset(std::lock_guard<std::mutex> & /*cache_lock*/) = 0;
|
||||
virtual void clear(std::lock_guard<std::mutex> & /*cache_lock*/) = 0;
|
||||
virtual std::vector<KeyMapped> dump() const = 0;
|
||||
|
||||
protected:
|
||||
|
@ -7,9 +7,8 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
/// Cache policy LRU evicts entries which are not used for a long time.
|
||||
/// WeightFunction is a functor that takes Mapped as a parameter and returns "weight" (approximate size)
|
||||
/// of that value.
|
||||
/// Cache policy LRU evicts entries which are not used for a long time. Also see cache policy SLRU for reference.
|
||||
/// WeightFunction is a functor that takes Mapped as a parameter and returns "weight" (approximate size) of that value.
|
||||
/// Cache starts to evict entries when their total weight exceeds max_size_in_bytes.
|
||||
/// Value weight should not change after insertion.
|
||||
/// To work with the thread-safe implementation of this class use a class "CacheBase" with first parameter "LRU"
|
||||
@ -24,11 +23,12 @@ public:
|
||||
using typename Base::OnWeightLossFunction;
|
||||
|
||||
/** Initialize LRUCachePolicy with max_size_in_bytes and max_count.
|
||||
* max_size_in_bytes == 0 means the cache accepts no entries.
|
||||
* max_count == 0 means no elements size restrictions.
|
||||
*/
|
||||
LRUCachePolicy(size_t max_size_in_bytes_, size_t max_count_, OnWeightLossFunction on_weight_loss_function_)
|
||||
: Base(std::make_unique<NoCachePolicyUserQuota>())
|
||||
, max_size_in_bytes(std::max(1uz, max_size_in_bytes_))
|
||||
, max_size_in_bytes(max_size_in_bytes_)
|
||||
, max_count(max_count_)
|
||||
, on_weight_loss_function(on_weight_loss_function_)
|
||||
{
|
||||
@ -49,7 +49,19 @@ public:
|
||||
return max_size_in_bytes;
|
||||
}
|
||||
|
||||
void reset(std::lock_guard<std::mutex> & /* cache_lock */) override
|
||||
void setMaxCount(size_t max_count_, std::lock_guard<std::mutex> & /* cache_lock */) override
|
||||
{
|
||||
max_count = max_count_;
|
||||
removeOverflow();
|
||||
}
|
||||
|
||||
void setMaxSize(size_t max_size_in_bytes_, std::lock_guard<std::mutex> & /* cache_lock */) override
|
||||
{
|
||||
max_size_in_bytes = max_size_in_bytes_;
|
||||
removeOverflow();
|
||||
}
|
||||
|
||||
void clear(std::lock_guard<std::mutex> & /* cache_lock */) override
|
||||
{
|
||||
queue.clear();
|
||||
cells.clear();
|
||||
@ -155,8 +167,8 @@ private:
|
||||
|
||||
/// Total weight of values.
|
||||
size_t current_size_in_bytes = 0;
|
||||
const size_t max_size_in_bytes;
|
||||
const size_t max_count;
|
||||
size_t max_size_in_bytes;
|
||||
size_t max_count;
|
||||
|
||||
WeightFunction weight_function;
|
||||
OnWeightLossFunction on_weight_loss_function;
|
||||
@ -172,10 +184,7 @@ private:
|
||||
|
||||
auto it = cells.find(key);
|
||||
if (it == cells.end())
|
||||
{
|
||||
// Queue became inconsistent
|
||||
abort();
|
||||
}
|
||||
std::terminate(); // Queue became inconsistent
|
||||
|
||||
const auto & cell = it->second;
|
||||
|
||||
@ -190,10 +199,7 @@ private:
|
||||
on_weight_loss_function(current_weight_lost);
|
||||
|
||||
if (current_size_in_bytes > (1ull << 63))
|
||||
{
|
||||
// Queue became inconsistent
|
||||
abort();
|
||||
}
|
||||
std::terminate(); // Queue became inconsistent
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -9,9 +9,8 @@ namespace DB
|
||||
{
|
||||
|
||||
/// Cache policy SLRU evicts entries which were used only once and are not used for a long time,
|
||||
/// this policy protects entries which were used more then once from a sequential scan.
|
||||
/// WeightFunction is a functor that takes Mapped as a parameter and returns "weight" (approximate size)
|
||||
/// of that value.
|
||||
/// this policy protects entries which were used more then once from a sequential scan. Also see cache policy LRU for reference.
|
||||
/// WeightFunction is a functor that takes Mapped as a parameter and returns "weight" (approximate size) of that value.
|
||||
/// Cache starts to evict entries when their total weight exceeds max_size_in_bytes.
|
||||
/// Value weight should not change after insertion.
|
||||
/// To work with the thread-safe implementation of this class use a class "CacheBase" with first parameter "SLRU"
|
||||
@ -30,8 +29,9 @@ public:
|
||||
* max_protected_size == 0 means that the default protected size is equal to half of the total max size.
|
||||
*/
|
||||
/// TODO: construct from special struct with cache policy parameters (also with max_protected_size).
|
||||
SLRUCachePolicy(size_t max_size_in_bytes_, size_t max_count_, double size_ratio, OnWeightLossFunction on_weight_loss_function_)
|
||||
SLRUCachePolicy(size_t max_size_in_bytes_, size_t max_count_, double size_ratio_, OnWeightLossFunction on_weight_loss_function_)
|
||||
: Base(std::make_unique<NoCachePolicyUserQuota>())
|
||||
, size_ratio(size_ratio_)
|
||||
, max_protected_size(static_cast<size_t>(max_size_in_bytes_ * std::min(1.0, size_ratio)))
|
||||
, max_size_in_bytes(max_size_in_bytes_)
|
||||
, max_count(max_count_)
|
||||
@ -54,7 +54,22 @@ public:
|
||||
return max_size_in_bytes;
|
||||
}
|
||||
|
||||
void reset(std::lock_guard<std::mutex> & /* cache_lock */) override
|
||||
void setMaxCount(size_t max_count_, std::lock_guard<std::mutex> & /* cache_lock */) override
|
||||
{
|
||||
max_count = max_count_;
|
||||
removeOverflow(protected_queue, max_protected_size, current_protected_size, /*is_protected=*/true);
|
||||
removeOverflow(probationary_queue, max_size_in_bytes, current_size_in_bytes, /*is_protected=*/false);
|
||||
}
|
||||
|
||||
void setMaxSize(size_t max_size_in_bytes_, std::lock_guard<std::mutex> & /* cache_lock */) override
|
||||
{
|
||||
max_protected_size = static_cast<size_t>(max_size_in_bytes_ * std::min(1.0, size_ratio));
|
||||
max_size_in_bytes = max_size_in_bytes_;
|
||||
removeOverflow(protected_queue, max_protected_size, current_protected_size, /*is_protected=*/true);
|
||||
removeOverflow(probationary_queue, max_size_in_bytes, current_size_in_bytes, /*is_protected=*/false);
|
||||
}
|
||||
|
||||
void clear(std::lock_guard<std::mutex> & /* cache_lock */) override
|
||||
{
|
||||
cells.clear();
|
||||
probationary_queue.clear();
|
||||
@ -68,12 +83,13 @@ public:
|
||||
auto it = cells.find(key);
|
||||
if (it == cells.end())
|
||||
return;
|
||||
|
||||
auto & cell = it->second;
|
||||
|
||||
current_size_in_bytes -= cell.size;
|
||||
if (cell.is_protected)
|
||||
{
|
||||
current_protected_size -= cell.size;
|
||||
}
|
||||
|
||||
auto & queue = cell.is_protected ? protected_queue : probationary_queue;
|
||||
queue.erase(cell.queue_iterator);
|
||||
cells.erase(it);
|
||||
@ -192,16 +208,17 @@ private:
|
||||
|
||||
Cells cells;
|
||||
|
||||
const double size_ratio;
|
||||
size_t current_protected_size = 0;
|
||||
size_t current_size_in_bytes = 0;
|
||||
const size_t max_protected_size;
|
||||
const size_t max_size_in_bytes;
|
||||
const size_t max_count;
|
||||
size_t max_protected_size;
|
||||
size_t max_size_in_bytes;
|
||||
size_t max_count;
|
||||
|
||||
WeightFunction weight_function;
|
||||
OnWeightLossFunction on_weight_loss_function;
|
||||
|
||||
void removeOverflow(SLRUQueue & queue, const size_t max_weight_size, size_t & current_weight_size, bool is_protected)
|
||||
void removeOverflow(SLRUQueue & queue, size_t max_weight_size, size_t & current_weight_size, bool is_protected)
|
||||
{
|
||||
size_t current_weight_lost = 0;
|
||||
size_t queue_size = queue.size();
|
||||
@ -223,8 +240,7 @@ private:
|
||||
{
|
||||
need_remove = [&]()
|
||||
{
|
||||
return ((max_count != 0 && cells.size() > max_count)
|
||||
|| (current_weight_size > max_weight_size)) && (queue_size > 0);
|
||||
return ((max_count != 0 && cells.size() > max_count) || (current_weight_size > max_weight_size)) && (queue_size > 0);
|
||||
};
|
||||
}
|
||||
|
||||
@ -234,10 +250,7 @@ private:
|
||||
|
||||
auto it = cells.find(key);
|
||||
if (it == cells.end())
|
||||
{
|
||||
// Queue became inconsistent
|
||||
abort();
|
||||
}
|
||||
std::terminate(); // Queue became inconsistent
|
||||
|
||||
auto & cell = it->second;
|
||||
|
||||
@ -262,10 +275,7 @@ private:
|
||||
on_weight_loss_function(current_weight_lost);
|
||||
|
||||
if (current_size_in_bytes > (1ull << 63))
|
||||
{
|
||||
// Queue became inconsistent
|
||||
abort();
|
||||
}
|
||||
std::terminate(); // Queue became inconsistent
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -121,7 +121,7 @@ public:
|
||||
max_size_in_bytes = max_size_in_bytes_;
|
||||
}
|
||||
|
||||
void reset(std::lock_guard<std::mutex> & /* cache_lock */) override
|
||||
void clear(std::lock_guard<std::mutex> & /* cache_lock */) override
|
||||
{
|
||||
cache.clear();
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <optional>
|
||||
#include <base/types.h>
|
||||
#include <base/simd.h>
|
||||
#include <Common/BitHelpers.h>
|
||||
#include <Poco/UTF8Encoding.h>
|
||||
|
||||
@ -72,16 +73,13 @@ inline size_t countCodePoints(const UInt8 * data, size_t size)
|
||||
res += __builtin_popcount(_mm_movemask_epi8(
|
||||
_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(data)), threshold)));
|
||||
#elif defined(__aarch64__) && defined(__ARM_NEON)
|
||||
/// Returns a 64 bit mask of nibbles (4 bits for each byte).
|
||||
auto get_nibble_mask
|
||||
= [](uint8x16_t input) -> uint64_t { return vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(input), 4)), 0); };
|
||||
constexpr auto bytes_sse = 16;
|
||||
const auto * src_end_sse = data + size / bytes_sse * bytes_sse;
|
||||
|
||||
const auto threshold = vdupq_n_s8(0xBF);
|
||||
|
||||
for (; data < src_end_sse; data += bytes_sse)
|
||||
res += std::popcount(get_nibble_mask(vcgtq_s8(vld1q_s8(reinterpret_cast<const int8_t *>(data)), threshold)));
|
||||
res += std::popcount(getNibbleMask(vcgtq_s8(vld1q_s8(reinterpret_cast<const int8_t *>(data)), threshold)));
|
||||
res >>= 2;
|
||||
#endif
|
||||
|
||||
|
@ -4,6 +4,8 @@
|
||||
#include <bit>
|
||||
#include <cstdint>
|
||||
|
||||
#include <base/simd.h>
|
||||
|
||||
#include <Core/Defines.h>
|
||||
|
||||
|
||||
@ -504,11 +506,6 @@ inline bool memoryIsZeroSmallAllowOverflow15(const void * data, size_t size)
|
||||
# include <arm_neon.h>
|
||||
# pragma clang diagnostic ignored "-Wreserved-identifier"
|
||||
|
||||
inline uint64_t getNibbleMask(uint8x16_t res)
|
||||
{
|
||||
return vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(res), 4)), 0);
|
||||
}
|
||||
|
||||
template <typename Char>
|
||||
inline int memcmpSmallAllowOverflow15(const Char * a, size_t a_size, const Char * b, size_t b_size)
|
||||
{
|
||||
|
@ -92,7 +92,7 @@ TEST(SLRUCache, removeFromProtected)
|
||||
ASSERT_TRUE(value == nullptr);
|
||||
}
|
||||
|
||||
TEST(SLRUCache, reset)
|
||||
TEST(SLRUCache, clear)
|
||||
{
|
||||
using SimpleCacheBase = DB::CacheBase<int, int>;
|
||||
auto slru_cache = SimpleCacheBase("SLRU", /*max_size_in_bytes=*/10, /*max_count=*/0, /*size_ratio*/0.5);
|
||||
@ -101,7 +101,7 @@ TEST(SLRUCache, reset)
|
||||
|
||||
slru_cache.set(2, std::make_shared<int>(4)); /// add to protected_queue
|
||||
|
||||
slru_cache.reset();
|
||||
slru_cache.clear();
|
||||
|
||||
auto value = slru_cache.get(1);
|
||||
ASSERT_TRUE(value == nullptr);
|
||||
|
@ -73,8 +73,8 @@ void compressDataForType(const char * source, UInt32 source_size, char * dest)
|
||||
const char * const source_end = source + source_size;
|
||||
while (source < source_end)
|
||||
{
|
||||
T curr_src = unalignedLoad<T>(source);
|
||||
unalignedStore<T>(dest, curr_src - prev_src);
|
||||
T curr_src = unalignedLoadLittleEndian<T>(source);
|
||||
unalignedStoreLittleEndian<T>(dest, curr_src - prev_src);
|
||||
prev_src = curr_src;
|
||||
|
||||
source += sizeof(T);
|
||||
@ -94,10 +94,10 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest,
|
||||
const char * const source_end = source + source_size;
|
||||
while (source < source_end)
|
||||
{
|
||||
accumulator += unalignedLoad<T>(source);
|
||||
accumulator += unalignedLoadLittleEndian<T>(source);
|
||||
if (dest + sizeof(accumulator) > output_end) [[unlikely]]
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress the data");
|
||||
unalignedStore<T>(dest, accumulator);
|
||||
unalignedStoreLittleEndian<T>(dest, accumulator);
|
||||
|
||||
source += sizeof(T);
|
||||
dest += sizeof(T);
|
||||
|
@ -86,6 +86,37 @@ struct DataTypeDecimalTrait
|
||||
}
|
||||
};
|
||||
|
||||
/// Calculates result = x * multiplier + delta.
|
||||
/// If the multiplication or the addition overflows, returns false or throws DECIMAL_OVERFLOW.
|
||||
template <typename T, bool throw_on_error>
|
||||
inline bool multiplyAdd(const T & x, const T & multiplier, const T & delta, T & result)
|
||||
{
|
||||
T multiplied = 0;
|
||||
if (common::mulOverflow(x, multiplier, multiplied))
|
||||
{
|
||||
if constexpr (throw_on_error)
|
||||
throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Decimal math overflow");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (common::addOverflow(multiplied, delta, result))
|
||||
{
|
||||
if constexpr (throw_on_error)
|
||||
throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Decimal math overflow");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline T multiplyAdd(const T & x, const T & multiplier, const T & delta)
|
||||
{
|
||||
T res;
|
||||
multiplyAdd<T, true>(x, multiplier, delta, res);
|
||||
return res;
|
||||
}
|
||||
|
||||
/** Make a decimal value from whole and fractional components with given scale multiplier.
|
||||
* where scale_multiplier = scaleMultiplier<T>(scale)
|
||||
* this is to reduce number of calls to scaleMultiplier when scale is known.
|
||||
@ -104,23 +135,10 @@ inline bool decimalFromComponentsWithMultiplierImpl(
|
||||
{
|
||||
using T = typename DecimalType::NativeType;
|
||||
const auto fractional_sign = whole < 0 ? -1 : 1;
|
||||
|
||||
T whole_scaled = 0;
|
||||
if (common::mulOverflow(whole, scale_multiplier, whole_scaled))
|
||||
{
|
||||
if constexpr (throw_on_error)
|
||||
throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Decimal math overflow");
|
||||
return false;
|
||||
}
|
||||
|
||||
T value;
|
||||
if (common::addOverflow(whole_scaled, fractional_sign * (fractional % scale_multiplier), value))
|
||||
{
|
||||
if constexpr (throw_on_error)
|
||||
throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Decimal math overflow");
|
||||
if (!multiplyAdd<T, throw_on_error>(
|
||||
whole, scale_multiplier, fractional_sign * (fractional % scale_multiplier), value))
|
||||
return false;
|
||||
}
|
||||
|
||||
result = DecimalType(value);
|
||||
return true;
|
||||
}
|
||||
|
@ -138,7 +138,7 @@ template <typename T> bool decimalEqual(T x, T y, UInt32 x_scale, UInt32 y_scale
|
||||
template <typename T> bool decimalLess(T x, T y, UInt32 x_scale, UInt32 y_scale);
|
||||
template <typename T> bool decimalLessOrEqual(T x, T y, UInt32 x_scale, UInt32 y_scale);
|
||||
|
||||
template <typename T>
|
||||
template <is_decimal T>
|
||||
class DecimalField
|
||||
{
|
||||
public:
|
||||
@ -838,7 +838,7 @@ template <> struct Field::EnumToType<Field::Types::Decimal32> { using Type = Dec
|
||||
template <> struct Field::EnumToType<Field::Types::Decimal64> { using Type = DecimalField<Decimal64>; };
|
||||
template <> struct Field::EnumToType<Field::Types::Decimal128> { using Type = DecimalField<Decimal128>; };
|
||||
template <> struct Field::EnumToType<Field::Types::Decimal256> { using Type = DecimalField<Decimal256>; };
|
||||
template <> struct Field::EnumToType<Field::Types::AggregateFunctionState> { using Type = DecimalField<AggregateFunctionStateData>; };
|
||||
template <> struct Field::EnumToType<Field::Types::AggregateFunctionState> { using Type = AggregateFunctionStateData; };
|
||||
template <> struct Field::EnumToType<Field::Types::CustomType> { using Type = CustomType; };
|
||||
template <> struct Field::EnumToType<Field::Types::Bool> { using Type = UInt64; };
|
||||
|
||||
|
@ -39,7 +39,7 @@ namespace DB
|
||||
M(UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0) \
|
||||
M(Int32, max_connections, 1024, "Max server connections.", 0) \
|
||||
M(UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0) \
|
||||
M(UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating asynchronous metrics.", 0) \
|
||||
M(UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating heavy asynchronous metrics.", 0) \
|
||||
M(String, default_database, "default", "Default database name.", 0) \
|
||||
M(String, tmp_policy, "", "Policy for storage with temporary data.", 0) \
|
||||
M(UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting., ", 0) \
|
||||
|
@ -644,7 +644,7 @@ class IColumn;
|
||||
M(Bool, database_replicated_always_detach_permanently, false, "Execute DETACH TABLE as DETACH TABLE PERMANENTLY if database engine is Replicated", 0) \
|
||||
M(Bool, database_replicated_allow_only_replicated_engine, false, "Allow to create only Replicated tables in database with engine Replicated", 0) \
|
||||
M(Bool, database_replicated_allow_replicated_engine_arguments, true, "Allow to create only Replicated tables in database with engine Replicated with explicit arguments", 0) \
|
||||
M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result", 0) \
|
||||
M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result, one of: 'none', 'throw', 'null_status_on_timeout', 'never_throw'", 0) \
|
||||
M(UInt64, distributed_ddl_entry_format_version, 5, "Compatibility version of distributed DDL (ON CLUSTER) queries", 0) \
|
||||
\
|
||||
M(UInt64, external_storage_max_read_rows, 0, "Limit maximum number of rows when table with external engine should flush history data. Now supported only for MySQL table engine, database engine, dictionary and MaterializedMySQL. If equal to 0, this setting is disabled", 0) \
|
||||
@ -779,6 +779,7 @@ class IColumn;
|
||||
M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \
|
||||
M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
|
||||
M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \
|
||||
M(Bool, allow_experimental_usearch_index, false, "Allows to use USearch index. Disabled by default because this feature is experimental", 0) \
|
||||
M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \
|
||||
M(Int64, annoy_index_search_k_nodes, -1, "SELECT queries search up to this many nodes in Annoy indexes.", 0) \
|
||||
M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \
|
||||
@ -876,8 +877,10 @@ class IColumn;
|
||||
M(Bool, input_format_orc_case_insensitive_column_matching, false, "Ignore case when matching ORC columns with CH columns.", 0) \
|
||||
M(Bool, input_format_parquet_case_insensitive_column_matching, false, "Ignore case when matching Parquet columns with CH columns.", 0) \
|
||||
M(Bool, input_format_parquet_preserve_order, false, "Avoid reordering rows when reading from Parquet files. Usually makes it much slower.", 0) \
|
||||
M(Bool, input_format_parquet_filter_push_down, true, "When reading Parquet files, skip whole row groups based on the WHERE/PREWHERE expressions and min/max statistics in the Parquet metadata.", 0) \
|
||||
M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \
|
||||
M(Bool, input_format_orc_allow_missing_columns, false, "Allow missing columns while reading ORC input formats", 0) \
|
||||
M(Bool, input_format_orc_use_fast_decoder, true, "Use a faster ORC decoder implementation.", 0) \
|
||||
M(Bool, input_format_parquet_allow_missing_columns, false, "Allow missing columns while reading Parquet input formats", 0) \
|
||||
M(UInt64, input_format_parquet_local_file_min_bytes_for_seek, 8192, "Min bytes required for local read (file) to do seek, instead of read with ignore in Parquet input format", 0) \
|
||||
M(Bool, input_format_arrow_allow_missing_columns, false, "Allow missing columns while reading Arrow input formats", 0) \
|
||||
@ -894,6 +897,10 @@ class IColumn;
|
||||
M(Bool, input_format_csv_allow_whitespace_or_tab_as_delimiter, false, "Allow to use spaces and tabs(\\t) as field delimiter in the CSV strings", 0) \
|
||||
M(Bool, input_format_csv_trim_whitespaces, true, "Trims spaces and tabs (\\t) characters at the beginning and end in CSV strings", 0) \
|
||||
M(Bool, input_format_csv_use_default_on_bad_values, false, "Allow to set default value to column when CSV field deserialization failed on bad value", 0) \
|
||||
M(Bool, input_format_csv_allow_variable_number_of_columns, false, "Ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values", 0) \
|
||||
M(Bool, input_format_tsv_allow_variable_number_of_columns, false, "Ignore extra columns in TSV input (if file has more columns than expected) and treat missing fields in TSV input as default values", 0) \
|
||||
M(Bool, input_format_custom_allow_variable_number_of_columns, false, "Ignore extra columns in CustomSeparated input (if file has more columns than expected) and treat missing fields in CustomSeparated input as default values", 0) \
|
||||
M(Bool, input_format_json_compact_allow_variable_number_of_columns, false, "Ignore extra columns in JSONCompact(EachRow) input (if file has more columns than expected) and treat missing fields in JSONCompact(EachRow) input as default values", 0) \
|
||||
M(Bool, input_format_tsv_detect_header, true, "Automatically detect header with names and types in TSV format", 0) \
|
||||
M(Bool, input_format_custom_detect_header, true, "Automatically detect header with names and types in CustomSeparated format", 0) \
|
||||
M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format Parquet", 0) \
|
||||
@ -1042,7 +1049,6 @@ class IColumn;
|
||||
M(Bool, regexp_dict_allow_hyperscan, true, "Allow regexp_tree dictionary using Hyperscan library.", 0) \
|
||||
\
|
||||
M(Bool, dictionary_use_async_executor, false, "Execute a pipeline for reading from a dictionary with several threads. It's supported only by DIRECT dictionary with CLICKHOUSE source.", 0) \
|
||||
M(Bool, input_format_csv_allow_variable_number_of_columns, false, "Ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values", 0) \
|
||||
M(Bool, precise_float_parsing, false, "Prefer more precise (but slower) float parsing algorithm", 0) \
|
||||
|
||||
// End of FORMAT_FACTORY_SETTINGS
|
||||
|
@ -46,6 +46,7 @@ public:
|
||||
bool canBeUsedInBooleanContext() const override { return dictionary_type->canBeUsedInBooleanContext(); }
|
||||
bool isValueRepresentedByNumber() const override { return dictionary_type->isValueRepresentedByNumber(); }
|
||||
bool isValueRepresentedByInteger() const override { return dictionary_type->isValueRepresentedByInteger(); }
|
||||
bool isValueRepresentedByUnsignedInteger() const override { return dictionary_type->isValueRepresentedByUnsignedInteger(); }
|
||||
bool isValueUnambiguouslyRepresentedInContiguousMemoryRegion() const override { return true; }
|
||||
bool haveMaximumSizeOfValue() const override { return dictionary_type->haveMaximumSizeOfValue(); }
|
||||
size_t getMaximumSizeOfValueInMemory() const override { return dictionary_type->getMaximumSizeOfValueInMemory(); }
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/parseDateTimeBestEffort.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -145,12 +146,29 @@ void SerializationDateTime::deserializeTextCSV(IColumn & column, ReadBuffer & is
|
||||
char maybe_quote = *istr.position();
|
||||
|
||||
if (maybe_quote == '\'' || maybe_quote == '\"')
|
||||
{
|
||||
++istr.position();
|
||||
|
||||
readText(x, istr, settings, time_zone, utc_time_zone);
|
||||
|
||||
if (maybe_quote == '\'' || maybe_quote == '\"')
|
||||
readText(x, istr, settings, time_zone, utc_time_zone);
|
||||
assertChar(maybe_quote, istr);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (settings.csv.delimiter != ',' || settings.date_time_input_format == FormatSettings::DateTimeInputFormat::Basic)
|
||||
{
|
||||
readText(x, istr, settings, time_zone, utc_time_zone);
|
||||
}
|
||||
/// Best effort parsing supports datetime in format like "01.01.2000, 00:00:00"
|
||||
/// and can mistakenly read comma as a part of datetime.
|
||||
/// For example data "...,01.01.2000,some string,..." cannot be parsed correctly.
|
||||
/// To fix this problem we first read CSV string and then try to parse it as datetime.
|
||||
else
|
||||
{
|
||||
String datetime_str;
|
||||
readCSVString(datetime_str, istr, settings.csv);
|
||||
ReadBufferFromString buf(datetime_str);
|
||||
readText(x, buf, settings, time_zone, utc_time_zone);
|
||||
}
|
||||
}
|
||||
|
||||
if (x < 0)
|
||||
x = 0;
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/parseDateTimeBestEffort.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -143,12 +144,29 @@ void SerializationDateTime64::deserializeTextCSV(IColumn & column, ReadBuffer &
|
||||
char maybe_quote = *istr.position();
|
||||
|
||||
if (maybe_quote == '\'' || maybe_quote == '\"')
|
||||
{
|
||||
++istr.position();
|
||||
|
||||
readText(x, scale, istr, settings, time_zone, utc_time_zone);
|
||||
|
||||
if (maybe_quote == '\'' || maybe_quote == '\"')
|
||||
readText(x, scale, istr, settings, time_zone, utc_time_zone);
|
||||
assertChar(maybe_quote, istr);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (settings.csv.delimiter != ',' || settings.date_time_input_format == FormatSettings::DateTimeInputFormat::Basic)
|
||||
{
|
||||
readText(x, scale, istr, settings, time_zone, utc_time_zone);
|
||||
}
|
||||
/// Best effort parsing supports datetime in format like "01.01.2000, 00:00:00"
|
||||
/// and can mistakenly read comma as a part of datetime.
|
||||
/// For example data "...,01.01.2000,some string,..." cannot be parsed correctly.
|
||||
/// To fix this problem we first read CSV string and then try to parse it as datetime.
|
||||
else
|
||||
{
|
||||
String datetime_str;
|
||||
readCSVString(datetime_str, istr, settings.csv);
|
||||
ReadBufferFromString buf(datetime_str);
|
||||
readText(x, scale, buf, settings, time_zone, utc_time_zone);
|
||||
}
|
||||
}
|
||||
|
||||
assert_cast<ColumnType &>(column).getData().push_back(x);
|
||||
}
|
||||
|
@ -830,6 +830,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
|
||||
query_context->setSetting("allow_experimental_hash_functions", 1);
|
||||
query_context->setSetting("allow_experimental_object_type", 1);
|
||||
query_context->setSetting("allow_experimental_annoy_index", 1);
|
||||
query_context->setSetting("allow_experimental_usearch_index", 1);
|
||||
query_context->setSetting("allow_experimental_bigint_types", 1);
|
||||
query_context->setSetting("allow_experimental_window_functions", 1);
|
||||
query_context->setSetting("allow_experimental_geo_types", 1);
|
||||
|
@ -23,11 +23,10 @@ StoragePtr IDatabase::getTable(const String & name, ContextPtr context) const
|
||||
return storage;
|
||||
TableNameHints hints(this->shared_from_this(), context);
|
||||
std::vector<String> names = hints.getHints(name);
|
||||
if (!names.empty())
|
||||
{
|
||||
if (names.empty())
|
||||
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} does not exist", backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(name));
|
||||
else
|
||||
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} does not exist. Maybe you meant {}?", backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(name), backQuoteIfNeed(names[0]));
|
||||
}
|
||||
else throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} does not exist", backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(name));
|
||||
}
|
||||
|
||||
std::vector<std::pair<ASTPtr, StoragePtr>> IDatabase::getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const
|
||||
|
@ -86,6 +86,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.custom.row_between_delimiter = settings.format_custom_row_between_delimiter;
|
||||
format_settings.custom.try_detect_header = settings.input_format_custom_detect_header;
|
||||
format_settings.custom.skip_trailing_empty_lines = settings.input_format_custom_skip_trailing_empty_lines;
|
||||
format_settings.custom.allow_variable_number_of_columns = settings.input_format_custom_allow_variable_number_of_columns;
|
||||
format_settings.date_time_input_format = settings.date_time_input_format;
|
||||
format_settings.date_time_output_format = settings.date_time_output_format;
|
||||
format_settings.interval.output_format = settings.interval_output_format;
|
||||
@ -115,6 +116,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.json.validate_utf8 = settings.output_format_json_validate_utf8;
|
||||
format_settings.json_object_each_row.column_for_object_name = settings.format_json_object_each_row_column_for_object_name;
|
||||
format_settings.json.allow_object_type = context->getSettingsRef().allow_experimental_object_type;
|
||||
format_settings.json.compact_allow_variable_number_of_columns = settings.input_format_json_compact_allow_variable_number_of_columns;
|
||||
format_settings.null_as_default = settings.input_format_null_as_default;
|
||||
format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros;
|
||||
format_settings.parquet.row_group_rows = settings.output_format_parquet_row_group_size;
|
||||
@ -122,6 +124,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.parquet.output_version = settings.output_format_parquet_version;
|
||||
format_settings.parquet.case_insensitive_column_matching = settings.input_format_parquet_case_insensitive_column_matching;
|
||||
format_settings.parquet.preserve_order = settings.input_format_parquet_preserve_order;
|
||||
format_settings.parquet.filter_push_down = settings.input_format_parquet_filter_push_down;
|
||||
format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns;
|
||||
format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference;
|
||||
format_settings.parquet.output_string_as_string = settings.output_format_parquet_string_as_string;
|
||||
@ -163,6 +166,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.tsv.skip_first_lines = settings.input_format_tsv_skip_first_lines;
|
||||
format_settings.tsv.try_detect_header = settings.input_format_tsv_detect_header;
|
||||
format_settings.tsv.skip_trailing_empty_lines = settings.input_format_tsv_skip_trailing_empty_lines;
|
||||
format_settings.tsv.allow_variable_number_of_columns = settings.input_format_tsv_allow_variable_number_of_columns;
|
||||
format_settings.values.accurate_types_of_literals = settings.input_format_values_accurate_types_of_literals;
|
||||
format_settings.values.deduce_templates_of_expressions = settings.input_format_values_deduce_templates_of_expressions;
|
||||
format_settings.values.interpret_expressions = settings.input_format_values_interpret_expressions;
|
||||
@ -186,6 +190,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.orc.case_insensitive_column_matching = settings.input_format_orc_case_insensitive_column_matching;
|
||||
format_settings.orc.output_string_as_string = settings.output_format_orc_string_as_string;
|
||||
format_settings.orc.output_compression_method = settings.output_format_orc_compression_method;
|
||||
format_settings.orc.use_fast_decoder = settings.input_format_orc_use_fast_decoder;
|
||||
format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields;
|
||||
format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode;
|
||||
format_settings.capn_proto.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference;
|
||||
|
@ -90,9 +90,6 @@ private:
|
||||
const FormatSettings & settings)>;
|
||||
|
||||
// Incompatible with FileSegmentationEngine.
|
||||
//
|
||||
// In future we may also want to pass some information about WHERE conditions (SelectQueryInfo?)
|
||||
// and get some information about projections (min/max/count per column per row group).
|
||||
using RandomAccessInputCreator = std::function<InputFormatPtr(
|
||||
ReadBuffer & buf,
|
||||
const Block & header,
|
||||
|
@ -175,6 +175,7 @@ struct FormatSettings
|
||||
EscapingRule escaping_rule = EscapingRule::Escaped;
|
||||
bool try_detect_header = true;
|
||||
bool skip_trailing_empty_lines = false;
|
||||
bool allow_variable_number_of_columns = false;
|
||||
} custom;
|
||||
|
||||
struct
|
||||
@ -197,6 +198,7 @@ struct FormatSettings
|
||||
bool validate_types_from_metadata = true;
|
||||
bool validate_utf8 = false;
|
||||
bool allow_object_type = false;
|
||||
bool compact_allow_variable_number_of_columns = false;
|
||||
} json;
|
||||
|
||||
struct
|
||||
@ -229,6 +231,7 @@ struct FormatSettings
|
||||
bool allow_missing_columns = false;
|
||||
bool skip_columns_with_unsupported_types_in_schema_inference = false;
|
||||
bool case_insensitive_column_matching = false;
|
||||
bool filter_push_down = true;
|
||||
std::unordered_set<int> skip_row_groups = {};
|
||||
bool output_string_as_string = false;
|
||||
bool output_fixed_string_as_fixed_byte_array = true;
|
||||
@ -317,6 +320,7 @@ struct FormatSettings
|
||||
UInt64 skip_first_lines = 0;
|
||||
bool try_detect_header = true;
|
||||
bool skip_trailing_empty_lines = false;
|
||||
bool allow_variable_number_of_columns = false;
|
||||
} tsv;
|
||||
|
||||
struct
|
||||
@ -344,6 +348,7 @@ struct FormatSettings
|
||||
std::unordered_set<int> skip_stripes = {};
|
||||
bool output_string_as_string = false;
|
||||
ORCCompression output_compression_method = ORCCompression::NONE;
|
||||
bool use_fast_decoder = true;
|
||||
} orc;
|
||||
|
||||
/// For capnProto format we should determine how to
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
#include <type_traits>
|
||||
#include <Core/AccurateComparison.h>
|
||||
#include <Core/DecimalFunctions.h>
|
||||
#include <Common/DateLUTImpl.h>
|
||||
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
@ -14,7 +15,6 @@
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/castTypeToEither.h>
|
||||
#include <Functions/extractTimeZoneFromFunctionArguments.h>
|
||||
#include <Functions/TransformDateTime64.h>
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
@ -36,7 +36,9 @@ namespace ErrorCodes
|
||||
/// Corresponding types:
|
||||
/// - UInt16 => DataTypeDate
|
||||
/// - UInt32 => DataTypeDateTime
|
||||
/// - Int32 => DataTypeDate32
|
||||
/// - DateTime64 => DataTypeDateTime64
|
||||
/// - Int8 => error
|
||||
/// Please note that INPUT and OUTPUT types may differ, e.g.:
|
||||
/// - 'AddSecondsImpl::execute(UInt32, ...) -> UInt32' is available to the ClickHouse users as 'addSeconds(DateTime, ...) -> DateTime'
|
||||
/// - 'AddSecondsImpl::execute(UInt16, ...) -> UInt32' is available to the ClickHouse users as 'addSeconds(Date, ...) -> DateTime'
|
||||
@ -45,35 +47,27 @@ struct AddNanosecondsImpl
|
||||
{
|
||||
static constexpr auto name = "addNanoseconds";
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
|
||||
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale)
|
||||
{
|
||||
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(9 - scale);
|
||||
auto division = std::div(t.fractional * multiplier + delta, static_cast<Int64>(1000000000));
|
||||
return {t.whole * multiplier + division.quot, t.fractional * multiplier + delta};
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64
|
||||
execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
|
||||
{
|
||||
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(9 - scale);
|
||||
return t * multiplier + delta;
|
||||
return DateTime64(DecimalUtils::multiplyAdd(t.value, multiplier, delta));
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
|
||||
{
|
||||
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(9);
|
||||
return static_cast<UInt32>(t * multiplier + delta);
|
||||
return DateTime64(DecimalUtils::multiplyAdd(static_cast<Int64>(t), multiplier, delta));
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0)
|
||||
static inline NO_SANITIZE_UNDEFINED Int8 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0)
|
||||
{
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "addNanoSeconds() cannot be used with Date");
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "addNanoseconds() cannot be used with Date");
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0)
|
||||
static inline NO_SANITIZE_UNDEFINED Int8 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0)
|
||||
{
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "addNanoSeconds() cannot be used with Date32");
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "addNanoseconds() cannot be used with Date32");
|
||||
}
|
||||
};
|
||||
|
||||
@ -81,43 +75,29 @@ struct AddMicrosecondsImpl
|
||||
{
|
||||
static constexpr auto name = "addMicroseconds";
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
|
||||
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
|
||||
{
|
||||
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(std::abs(6 - scale));
|
||||
if (scale <= 6)
|
||||
{
|
||||
auto division = std::div((t.fractional + delta), static_cast<Int64>(10e6));
|
||||
return {t.whole * multiplier + division.quot, division.rem};
|
||||
}
|
||||
else
|
||||
{
|
||||
auto division = std::div((t.fractional + delta * multiplier), static_cast<Int64>(10e6 * multiplier));
|
||||
return {t.whole + division.quot, division.rem};
|
||||
}
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64
|
||||
execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
|
||||
{
|
||||
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(std::abs(6 - scale));
|
||||
return scale <= 6 ? t * multiplier + delta : t + delta * multiplier;
|
||||
return DateTime64(scale <= 6
|
||||
? DecimalUtils::multiplyAdd(t.value, multiplier, delta)
|
||||
: DecimalUtils::multiplyAdd(delta, multiplier, t.value));
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
|
||||
{
|
||||
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(6);
|
||||
return static_cast<UInt32>(t * multiplier + delta);
|
||||
return DateTime64(DecimalUtils::multiplyAdd(static_cast<Int64>(t), multiplier, delta));
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0)
|
||||
static inline NO_SANITIZE_UNDEFINED Int8 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0)
|
||||
{
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "addMicroSeconds() cannot be used with Date");
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "addMicroseconds() cannot be used with Date");
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0)
|
||||
static inline NO_SANITIZE_UNDEFINED Int8 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0)
|
||||
{
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "addMicroSeconds() cannot be used with Date32");
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "addMicroseconds() cannot be used with Date32");
|
||||
}
|
||||
};
|
||||
|
||||
@ -125,43 +105,29 @@ struct AddMillisecondsImpl
|
||||
{
|
||||
static constexpr auto name = "addMilliseconds";
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
|
||||
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale)
|
||||
{
|
||||
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(std::abs(3 - scale));
|
||||
if (scale <= 3)
|
||||
{
|
||||
auto division = std::div((t.fractional + delta), static_cast<Int64>(1000));
|
||||
return {t.whole * multiplier + division.quot, division.rem};
|
||||
}
|
||||
else
|
||||
{
|
||||
auto division = std::div((t.fractional + delta * multiplier), static_cast<Int64>(1000 * multiplier));
|
||||
return {t.whole + division.quot,division.rem};
|
||||
}
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64
|
||||
execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
|
||||
{
|
||||
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(std::abs(3 - scale));
|
||||
return scale <= 3 ? t * multiplier + delta : t + delta * multiplier;
|
||||
return DateTime64(scale <= 3
|
||||
? DecimalUtils::multiplyAdd(t.value, multiplier, delta)
|
||||
: DecimalUtils::multiplyAdd(delta, multiplier, t.value));
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
|
||||
{
|
||||
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(3);
|
||||
return static_cast<UInt32>(t * multiplier + delta);
|
||||
return DateTime64(DecimalUtils::multiplyAdd(static_cast<Int64>(t), multiplier, delta));
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0)
|
||||
static inline NO_SANITIZE_UNDEFINED Int8 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0)
|
||||
{
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "addMilliSeconds() cannot be used with Date");
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "addMilliseconds() cannot be used with Date");
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0)
|
||||
static inline NO_SANITIZE_UNDEFINED Int8 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0)
|
||||
{
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "addMilliSeconds() cannot be used with Date32");
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "addMilliseconds() cannot be used with Date32");
|
||||
}
|
||||
};
|
||||
|
||||
@ -169,16 +135,10 @@ struct AddSecondsImpl
|
||||
{
|
||||
static constexpr auto name = "addSeconds";
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
|
||||
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
|
||||
{
|
||||
return {t.whole + delta, t.fractional};
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64
|
||||
execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
|
||||
{
|
||||
return t + delta * DecimalUtils::scaleMultiplier<DateTime64>(scale);
|
||||
return DateTime64(DecimalUtils::multiplyAdd(delta, DecimalUtils::scaleMultiplier<DateTime64>(scale), t.value));
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
|
||||
@ -189,6 +149,7 @@ struct AddSecondsImpl
|
||||
static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
|
||||
{
|
||||
// use default datetime64 scale
|
||||
static_assert(DataTypeDateTime64::default_scale == 3, "");
|
||||
return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta) * 1000;
|
||||
}
|
||||
|
||||
@ -202,12 +163,6 @@ struct AddMinutesImpl
|
||||
{
|
||||
static constexpr auto name = "addMinutes";
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
|
||||
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
|
||||
{
|
||||
return {t.whole + delta * 60, t.fractional};
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64
|
||||
execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
|
||||
{
|
||||
@ -222,6 +177,7 @@ struct AddMinutesImpl
|
||||
static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
|
||||
{
|
||||
// use default datetime64 scale
|
||||
static_assert(DataTypeDateTime64::default_scale == 3, "");
|
||||
return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 60) * 1000;
|
||||
}
|
||||
|
||||
@ -235,12 +191,6 @@ struct AddHoursImpl
|
||||
{
|
||||
static constexpr auto name = "addHours";
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
|
||||
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
|
||||
{
|
||||
return {t.whole + delta * 3600, t.fractional};
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64
|
||||
execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
|
||||
{
|
||||
@ -255,6 +205,7 @@ struct AddHoursImpl
|
||||
static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
|
||||
{
|
||||
// use default datetime64 scale
|
||||
static_assert(DataTypeDateTime64::default_scale == 3, "");
|
||||
return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 3600) * 1000;
|
||||
}
|
||||
|
||||
@ -268,12 +219,6 @@ struct AddDaysImpl
|
||||
{
|
||||
static constexpr auto name = "addDays";
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
|
||||
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
|
||||
{
|
||||
return {time_zone.addDays(t.whole, delta), t.fractional};
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64
|
||||
execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0)
|
||||
{
|
||||
@ -302,12 +247,6 @@ struct AddWeeksImpl
|
||||
{
|
||||
static constexpr auto name = "addWeeks";
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
|
||||
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
|
||||
{
|
||||
return {time_zone.addWeeks(t.whole, delta), t.fractional};
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64
|
||||
execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0)
|
||||
{
|
||||
@ -336,12 +275,6 @@ struct AddMonthsImpl
|
||||
{
|
||||
static constexpr auto name = "addMonths";
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
|
||||
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
|
||||
{
|
||||
return {time_zone.addMonths(t.whole, delta), t.fractional};
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64
|
||||
execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0)
|
||||
{
|
||||
@ -370,12 +303,6 @@ struct AddQuartersImpl
|
||||
{
|
||||
static constexpr auto name = "addQuarters";
|
||||
|
||||
static inline DecimalUtils::DecimalComponents<DateTime64>
|
||||
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
|
||||
{
|
||||
return {time_zone.addQuarters(t.whole, delta), t.fractional};
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64
|
||||
execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0)
|
||||
{
|
||||
@ -404,12 +331,6 @@ struct AddYearsImpl
|
||||
{
|
||||
static constexpr auto name = "addYears";
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
|
||||
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
|
||||
{
|
||||
return {time_zone.addYears(t.whole, delta), t.fractional};
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64
|
||||
execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0)
|
||||
{
|
||||
@ -581,11 +502,11 @@ namespace date_and_time_type_details
|
||||
// Compile-time mapping of value (DataType::FieldType) types to corresponding DataType
|
||||
template <typename FieldType> struct ResultDataTypeMap {};
|
||||
template <> struct ResultDataTypeMap<UInt16> { using ResultDataType = DataTypeDate; };
|
||||
template <> struct ResultDataTypeMap<Int16> { using ResultDataType = DataTypeDate; };
|
||||
template <> struct ResultDataTypeMap<UInt32> { using ResultDataType = DataTypeDateTime; };
|
||||
template <> struct ResultDataTypeMap<Int32> { using ResultDataType = DataTypeDate32; };
|
||||
template <> struct ResultDataTypeMap<DateTime64> { using ResultDataType = DataTypeDateTime64; };
|
||||
template <> struct ResultDataTypeMap<Int64> { using ResultDataType = DataTypeDateTime64; };
|
||||
template <> struct ResultDataTypeMap<Int8> { using ResultDataType = DataTypeInt8; }; // error
|
||||
}
|
||||
|
||||
template <typename Transform>
|
||||
@ -705,6 +626,10 @@ public:
|
||||
|
||||
return std::make_shared<DataTypeDateTime64>(target_scale.value_or(DataTypeDateTime64::default_scale), std::move(timezone));
|
||||
}
|
||||
else if constexpr (std::is_same_v<ResultDataType, DataTypeInt8>)
|
||||
{
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{} cannot be used with {}", getName(), arguments[0].type->getName());
|
||||
}
|
||||
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected result type in datetime add interval function");
|
||||
}
|
||||
|
141
src/Functions/UTCTimestampTransform.cpp
Normal file
141
src/Functions/UTCTimestampTransform.cpp
Normal file
@ -0,0 +1,141 @@
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Columns/ColumnsDateTime.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Common/DateLUT.h>
|
||||
#include <Common/LocalDateTime.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Core/DecimalFunctions.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <DataTypes/DataTypeDateTime64.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/TimezoneMixin.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
template <typename Name>
|
||||
class UTCTimestampTransform : public IFunction
|
||||
{
|
||||
public:
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<UTCTimestampTransform>(); }
|
||||
static constexpr auto name = Name::name;
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return 2; }
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
if (arguments.size() != 2)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {}'s arguments number must be 2.", name);
|
||||
WhichDataType which_type_first(arguments[0]);
|
||||
WhichDataType which_type_second(arguments[1]);
|
||||
if (!which_type_first.isDateTime() && !which_type_first.isDateTime64())
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {}'s 1st argument type must be datetime.", name);
|
||||
if (dynamic_cast<const TimezoneMixin *>(arguments[0].get())->hasExplicitTimeZone())
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {}'s 1st argument should not have explicit time zone.", name);
|
||||
if (!which_type_second.isString())
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {}'s 2nd argument type must be string.", name);
|
||||
DataTypePtr date_time_type;
|
||||
if (which_type_first.isDateTime())
|
||||
date_time_type = std::make_shared<DataTypeDateTime>();
|
||||
else
|
||||
{
|
||||
const DataTypeDateTime64 * date_time_64 = static_cast<const DataTypeDateTime64 *>(arguments[0].get());
|
||||
date_time_type = std::make_shared<DataTypeDateTime64>(date_time_64->getScale());
|
||||
}
|
||||
return date_time_type;
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t) const override
|
||||
{
|
||||
if (arguments.size() != 2)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {}'s arguments number must be 2.", name);
|
||||
ColumnWithTypeAndName arg1 = arguments[0];
|
||||
ColumnWithTypeAndName arg2 = arguments[1];
|
||||
const auto * time_zone_const_col = checkAndGetColumnConstData<ColumnString>(arg2.column.get());
|
||||
if (!time_zone_const_col)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of 2nd argument of function {}. Excepted const(String).", arg2.column->getName(), name);
|
||||
String time_zone_val = time_zone_const_col->getDataAt(0).toString();
|
||||
auto column = result_type->createColumn();
|
||||
if (WhichDataType(arg1.type).isDateTime())
|
||||
{
|
||||
const auto * date_time_col = checkAndGetColumn<ColumnDateTime>(arg1.column.get());
|
||||
for (size_t i = 0; i < date_time_col->size(); ++i)
|
||||
{
|
||||
UInt32 date_time_val = date_time_col->getElement(i);
|
||||
LocalDateTime date_time(date_time_val, Name::to ? DateLUT::instance("UTC") : DateLUT::instance(time_zone_val));
|
||||
time_t time_val = date_time.to_time_t(Name::from ? DateLUT::instance("UTC") : DateLUT::instance(time_zone_val));
|
||||
column->insert(time_val);
|
||||
}
|
||||
}
|
||||
else if (WhichDataType(arg1.type).isDateTime64())
|
||||
{
|
||||
const auto * date_time_col = checkAndGetColumn<ColumnDateTime64>(arg1.column.get());
|
||||
const DataTypeDateTime64 * date_time_type = static_cast<const DataTypeDateTime64 *>(arg1.type.get());
|
||||
Int64 scale_multiplier = DecimalUtils::scaleMultiplier<Int64>(date_time_type->getScale());
|
||||
for (size_t i = 0; i < date_time_col->size(); ++i)
|
||||
{
|
||||
DateTime64 date_time_val = date_time_col->getElement(i);
|
||||
Int64 seconds = date_time_val.value / scale_multiplier;
|
||||
Int64 micros = date_time_val.value % scale_multiplier;
|
||||
LocalDateTime date_time(seconds, Name::to ? DateLUT::instance("UTC") : DateLUT::instance(time_zone_val));
|
||||
time_t time_val = date_time.to_time_t(Name::from ? DateLUT::instance("UTC") : DateLUT::instance(time_zone_val));
|
||||
DateTime64 date_time_64(time_val * scale_multiplier + micros);
|
||||
column->insert(date_time_64);
|
||||
}
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {}'s 1st argument can only be datetime/datatime64. ", name);
|
||||
return column;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
struct NameToUTCTimestamp
|
||||
{
|
||||
static constexpr auto name = "toUTCTimestamp";
|
||||
static constexpr auto from = false;
|
||||
static constexpr auto to = true;
|
||||
};
|
||||
|
||||
struct NameFromUTCTimestamp
|
||||
{
|
||||
static constexpr auto name = "fromUTCTimestamp";
|
||||
static constexpr auto from = true;
|
||||
static constexpr auto to = false;
|
||||
};
|
||||
|
||||
using ToUTCTimestampFunction = UTCTimestampTransform<NameToUTCTimestamp>;
|
||||
using FromUTCTimestampFunction = UTCTimestampTransform<NameFromUTCTimestamp>;
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(UTCTimestampTransform)
|
||||
{
|
||||
factory.registerFunction<ToUTCTimestampFunction>();
|
||||
factory.registerFunction<FromUTCTimestampFunction>();
|
||||
factory.registerAlias("to_utc_timestamp", NameToUTCTimestamp::name, FunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("from_utc_timestamp", NameFromUTCTimestamp::name, FunctionFactory::CaseInsensitive);
|
||||
}
|
||||
|
||||
}
|
@ -7,6 +7,8 @@
|
||||
|
||||
#include <string_view>
|
||||
|
||||
#include <base/simd.h>
|
||||
|
||||
#ifdef __SSE2__
|
||||
# include <emmintrin.h>
|
||||
#endif
|
||||
@ -73,16 +75,13 @@ struct ToValidUTF8Impl
|
||||
/// Fast skip of ASCII for aarch64.
|
||||
static constexpr size_t SIMD_BYTES = 16;
|
||||
const char * simd_end = p + (end - p) / SIMD_BYTES * SIMD_BYTES;
|
||||
/// Returns a 64 bit mask of nibbles (4 bits for each byte).
|
||||
auto get_nibble_mask = [](uint8x16_t input) -> uint64_t
|
||||
{ return vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(input), 4)), 0); };
|
||||
/// Other options include
|
||||
/// vmaxvq_u8(input) < 0b10000000;
|
||||
/// Used by SIMDJSON, has latency 3 for M1, 6 for everything else
|
||||
/// SIMDJSON uses it for 64 byte masks, so it's a little different.
|
||||
/// vmaxvq_u32(vandq_u32(input, vdupq_n_u32(0x80808080))) // u32 version has latency 3
|
||||
/// shrn version has universally <=3 cycles, on servers 2 cycles.
|
||||
while (p < simd_end && get_nibble_mask(vcgeq_u8(vld1q_u8(reinterpret_cast<const uint8_t *>(p)), vdupq_n_u8(0x80))) == 0)
|
||||
while (p < simd_end && getNibbleMask(vcgeq_u8(vld1q_u8(reinterpret_cast<const uint8_t *>(p)), vdupq_n_u8(0x80))) == 0)
|
||||
p += SIMD_BYTES;
|
||||
|
||||
if (!(p < end))
|
||||
|
@ -50,8 +50,8 @@ public:
|
||||
/// Starts reading a file from the archive. The function returns a read buffer,
|
||||
/// you can read that buffer to extract uncompressed data from the archive.
|
||||
/// Several read buffers can be used at the same time in parallel.
|
||||
virtual std::unique_ptr<ReadBufferFromFileBase> readFile(const String & filename) = 0;
|
||||
virtual std::unique_ptr<ReadBufferFromFileBase> readFile(NameFilter filter) = 0;
|
||||
virtual std::unique_ptr<ReadBufferFromFileBase> readFile(const String & filename, bool throw_on_not_found) = 0;
|
||||
virtual std::unique_ptr<ReadBufferFromFileBase> readFile(NameFilter filter, bool throw_on_not_found) = 0;
|
||||
|
||||
/// It's possible to convert a file enumerator to a read buffer and vice versa.
|
||||
virtual std::unique_ptr<ReadBufferFromFileBase> readFile(std::unique_ptr<FileEnumerator> enumerator) = 0;
|
||||
|
@ -155,7 +155,7 @@ private:
|
||||
archive_read_support_filter_all(archive);
|
||||
archive_read_support_format_all(archive);
|
||||
if (archive_read_open_filename(archive, path_to_archive.c_str(), 10240) != ARCHIVE_OK)
|
||||
throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't open archive: {}", quoteString(path_to_archive));
|
||||
throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't open archive {}: {}", quoteString(path_to_archive), archive_error_string(archive));
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -293,17 +293,21 @@ std::unique_ptr<LibArchiveReader::FileEnumerator> LibArchiveReader::firstFile()
|
||||
return std::make_unique<FileEnumeratorImpl>(std::move(handle));
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBufferFromFileBase> LibArchiveReader::readFile(const String & filename)
|
||||
std::unique_ptr<ReadBufferFromFileBase> LibArchiveReader::readFile(const String & filename, bool throw_on_not_found)
|
||||
{
|
||||
return readFile([&](const std::string & file) { return file == filename; });
|
||||
return readFile([&](const std::string & file) { return file == filename; }, throw_on_not_found);
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBufferFromFileBase> LibArchiveReader::readFile(NameFilter filter)
|
||||
std::unique_ptr<ReadBufferFromFileBase> LibArchiveReader::readFile(NameFilter filter, bool throw_on_not_found)
|
||||
{
|
||||
Handle handle(path_to_archive, lock_on_reading);
|
||||
if (!handle.locateFile(filter))
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't unpack archive {}: no file found satisfying the filter", path_to_archive);
|
||||
{
|
||||
if (throw_on_not_found)
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't unpack archive {}: no file found satisfying the filter", path_to_archive);
|
||||
return nullptr;
|
||||
}
|
||||
return std::make_unique<ReadBufferFromLibArchive>(std::move(handle), path_to_archive);
|
||||
}
|
||||
|
||||
|
@ -34,8 +34,8 @@ public:
|
||||
/// Starts reading a file from the archive. The function returns a read buffer,
|
||||
/// you can read that buffer to extract uncompressed data from the archive.
|
||||
/// Several read buffers can be used at the same time in parallel.
|
||||
std::unique_ptr<ReadBufferFromFileBase> readFile(const String & filename) override;
|
||||
std::unique_ptr<ReadBufferFromFileBase> readFile(NameFilter filter) override;
|
||||
std::unique_ptr<ReadBufferFromFileBase> readFile(const String & filename, bool throw_on_not_found) override;
|
||||
std::unique_ptr<ReadBufferFromFileBase> readFile(NameFilter filter, bool throw_on_not_found) override;
|
||||
|
||||
/// It's possible to convert a file enumerator to a read buffer and vice versa.
|
||||
std::unique_ptr<ReadBufferFromFileBase> readFile(std::unique_ptr<FileEnumerator> enumerator) override;
|
||||
|
@ -75,21 +75,22 @@ public:
|
||||
RawHandle getRawHandle() const { return raw_handle; }
|
||||
std::shared_ptr<ZipArchiveReader> getReader() const { return reader; }
|
||||
|
||||
void locateFile(const String & file_name_)
|
||||
bool locateFile(const String & file_name_)
|
||||
{
|
||||
resetFileInfo();
|
||||
bool case_sensitive = true;
|
||||
int err = unzLocateFile(raw_handle, file_name_.c_str(), reinterpret_cast<unzFileNameComparer>(static_cast<size_t>(case_sensitive)));
|
||||
if (err == UNZ_END_OF_LIST_OF_FILE)
|
||||
showError("File " + quoteString(file_name_) + " not found");
|
||||
return false;
|
||||
file_name = file_name_;
|
||||
return true;
|
||||
}
|
||||
|
||||
void locateFile(NameFilter filter)
|
||||
bool locateFile(NameFilter filter)
|
||||
{
|
||||
int err = unzGoToFirstFile(raw_handle);
|
||||
if (err == UNZ_END_OF_LIST_OF_FILE)
|
||||
showError("No file was found satisfying the filter");
|
||||
return false;
|
||||
|
||||
do
|
||||
{
|
||||
@ -97,12 +98,12 @@ public:
|
||||
resetFileInfo();
|
||||
retrieveFileInfo();
|
||||
if (filter(getFileName()))
|
||||
return;
|
||||
return true;
|
||||
|
||||
err = unzGoToNextFile(raw_handle);
|
||||
} while (err != UNZ_END_OF_LIST_OF_FILE);
|
||||
|
||||
showError("No file was found satisfying the filter");
|
||||
return false;
|
||||
}
|
||||
|
||||
bool tryLocateFile(const String & file_name_)
|
||||
@ -513,7 +514,9 @@ bool ZipArchiveReader::fileExists(const String & filename)
|
||||
ZipArchiveReader::FileInfo ZipArchiveReader::getFileInfo(const String & filename)
|
||||
{
|
||||
auto handle = acquireHandle();
|
||||
handle.locateFile(filename);
|
||||
if (!handle.locateFile(filename))
|
||||
showError(fmt::format("File {} was not found in archive", quoteString(filename)));
|
||||
|
||||
return handle.getFileInfo();
|
||||
}
|
||||
|
||||
@ -525,17 +528,31 @@ std::unique_ptr<ZipArchiveReader::FileEnumerator> ZipArchiveReader::firstFile()
|
||||
return std::make_unique<FileEnumeratorImpl>(std::move(handle));
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBufferFromFileBase> ZipArchiveReader::readFile(const String & filename)
|
||||
std::unique_ptr<ReadBufferFromFileBase> ZipArchiveReader::readFile(const String & filename, bool throw_on_not_found)
|
||||
{
|
||||
auto handle = acquireHandle();
|
||||
handle.locateFile(filename);
|
||||
if (!handle.locateFile(filename))
|
||||
{
|
||||
if (throw_on_not_found)
|
||||
showError(fmt::format("File {} was not found in archive", quoteString(filename)));
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return std::make_unique<ReadBufferFromZipArchive>(std::move(handle));
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBufferFromFileBase> ZipArchiveReader::readFile(NameFilter filter)
|
||||
std::unique_ptr<ReadBufferFromFileBase> ZipArchiveReader::readFile(NameFilter filter, bool throw_on_not_found)
|
||||
{
|
||||
auto handle = acquireHandle();
|
||||
handle.locateFile(filter);
|
||||
if (!handle.locateFile(filter))
|
||||
{
|
||||
if (throw_on_not_found)
|
||||
showError(fmt::format("No file satisfying filter in archive"));
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return std::make_unique<ReadBufferFromZipArchive>(std::move(handle));
|
||||
}
|
||||
|
||||
|
@ -41,8 +41,8 @@ public:
|
||||
/// Starts reading a file from the archive. The function returns a read buffer,
|
||||
/// you can read that buffer to extract uncompressed data from the archive.
|
||||
/// Several read buffers can be used at the same time in parallel.
|
||||
std::unique_ptr<ReadBufferFromFileBase> readFile(const String & filename) override;
|
||||
std::unique_ptr<ReadBufferFromFileBase> readFile(NameFilter filter) override;
|
||||
std::unique_ptr<ReadBufferFromFileBase> readFile(const String & filename, bool throw_on_not_found) override;
|
||||
std::unique_ptr<ReadBufferFromFileBase> readFile(NameFilter filter, bool throw_on_not_found) override;
|
||||
|
||||
/// It's possible to convert a file enumerator to a read buffer and vice versa.
|
||||
std::unique_ptr<ReadBufferFromFileBase> readFile(std::unique_ptr<FileEnumerator> enumerator) override;
|
||||
|
@ -24,6 +24,18 @@ std::shared_ptr<IArchiveReader> createArchiveReader(
|
||||
[[maybe_unused]] const std::function<std::unique_ptr<SeekableReadBuffer>()> & archive_read_function,
|
||||
[[maybe_unused]] size_t archive_size)
|
||||
{
|
||||
using namespace std::literals;
|
||||
static constexpr std::array tar_extensions
|
||||
{
|
||||
".tar"sv,
|
||||
".tar.gz"sv,
|
||||
".tgz"sv,
|
||||
".tar.zst"sv,
|
||||
".tzst"sv,
|
||||
".tar.xz"sv,
|
||||
".tar.bz2"sv
|
||||
};
|
||||
|
||||
if (path_to_archive.ends_with(".zip") || path_to_archive.ends_with(".zipx"))
|
||||
{
|
||||
#if USE_MINIZIP
|
||||
@ -32,7 +44,8 @@ std::shared_ptr<IArchiveReader> createArchiveReader(
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "minizip library is disabled");
|
||||
#endif
|
||||
}
|
||||
else if (path_to_archive.ends_with(".tar") || path_to_archive.ends_with("tar.gz"))
|
||||
else if (std::any_of(
|
||||
tar_extensions.begin(), tar_extensions.end(), [&](const auto extension) { return path_to_archive.ends_with(extension); }))
|
||||
{
|
||||
#if USE_LIBARCHIVE
|
||||
return std::make_shared<TarArchiveReader>(path_to_archive);
|
||||
|
@ -19,7 +19,10 @@ public:
|
||||
class ReadBufferFromOwnString : public String, public ReadBufferFromString
|
||||
{
|
||||
public:
|
||||
explicit ReadBufferFromOwnString(const String & s_): String(s_), ReadBufferFromString(*this) {}
|
||||
template <typename S>
|
||||
explicit ReadBufferFromOwnString(S && s_) : String(std::forward<S>(s_)), ReadBufferFromString(*this)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -12,6 +12,8 @@
|
||||
#include <cstdlib>
|
||||
#include <bit>
|
||||
|
||||
#include <base/simd.h>
|
||||
|
||||
#ifdef __SSE2__
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
@ -819,14 +821,11 @@ void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV &
|
||||
auto rc = vdupq_n_u8('\r');
|
||||
auto nc = vdupq_n_u8('\n');
|
||||
auto dc = vdupq_n_u8(delimiter);
|
||||
/// Returns a 64 bit mask of nibbles (4 bits for each byte).
|
||||
auto get_nibble_mask = [](uint8x16_t input) -> uint64_t
|
||||
{ return vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(input), 4)), 0); };
|
||||
for (; next_pos + 15 < buf.buffer().end(); next_pos += 16)
|
||||
{
|
||||
uint8x16_t bytes = vld1q_u8(reinterpret_cast<const uint8_t *>(next_pos));
|
||||
auto eq = vorrq_u8(vorrq_u8(vceqq_u8(bytes, rc), vceqq_u8(bytes, nc)), vceqq_u8(bytes, dc));
|
||||
uint64_t bit_mask = get_nibble_mask(eq);
|
||||
uint64_t bit_mask = getNibbleMask(eq);
|
||||
if (bit_mask)
|
||||
{
|
||||
next_pos += std::countr_zero(bit_mask) >> 2;
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <Poco/UTF8Encoding.h>
|
||||
#include <IO/WriteBufferValidUTF8.h>
|
||||
#include <base/types.h>
|
||||
#include <base/simd.h>
|
||||
|
||||
#ifdef __SSE2__
|
||||
#include <emmintrin.h>
|
||||
@ -84,16 +85,13 @@ void WriteBufferValidUTF8::nextImpl()
|
||||
/// Fast skip of ASCII for aarch64.
|
||||
static constexpr size_t SIMD_BYTES = 16;
|
||||
const char * simd_end = p + (pos - p) / SIMD_BYTES * SIMD_BYTES;
|
||||
/// Returns a 64 bit mask of nibbles (4 bits for each byte).
|
||||
auto get_nibble_mask = [](uint8x16_t input) -> uint64_t
|
||||
{ return vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(input), 4)), 0); };
|
||||
/// Other options include
|
||||
/// vmaxvq_u8(input) < 0b10000000;
|
||||
/// Used by SIMDJSON, has latency 3 for M1, 6 for everything else
|
||||
/// SIMDJSON uses it for 64 byte masks, so it's a little different.
|
||||
/// vmaxvq_u32(vandq_u32(input, vdupq_n_u32(0x80808080))) // u32 version has latency 3
|
||||
/// shrn version has universally <=3 cycles, on servers 2 cycles.
|
||||
while (p < simd_end && get_nibble_mask(vcgeq_u8(vld1q_u8(reinterpret_cast<const uint8_t *>(p)), vdupq_n_u8(0x80))) == 0)
|
||||
while (p < simd_end && getNibbleMask(vcgeq_u8(vld1q_u8(reinterpret_cast<const uint8_t *>(p)), vdupq_n_u8(0x80))) == 0)
|
||||
p += SIMD_BYTES;
|
||||
|
||||
if (!(p < pos))
|
||||
|
@ -113,11 +113,11 @@ TEST_P(ArchiveReaderAndWriterTest, EmptyArchive)
|
||||
|
||||
EXPECT_FALSE(reader->fileExists("nofile.txt"));
|
||||
|
||||
expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "File 'nofile.txt' not found",
|
||||
expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "File 'nofile.txt' was not found in archive",
|
||||
[&]{ reader->getFileInfo("nofile.txt"); });
|
||||
|
||||
expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "File 'nofile.txt' not found",
|
||||
[&]{ reader->readFile("nofile.txt"); });
|
||||
expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "File 'nofile.txt' was not found in archive",
|
||||
[&]{ reader->readFile("nofile.txt", /*throw_on_not_found=*/true); });
|
||||
|
||||
EXPECT_EQ(reader->firstFile(), nullptr);
|
||||
}
|
||||
@ -145,7 +145,7 @@ TEST_P(ArchiveReaderAndWriterTest, SingleFileInArchive)
|
||||
EXPECT_GT(file_info.compressed_size, 0);
|
||||
|
||||
{
|
||||
auto in = reader->readFile("a.txt");
|
||||
auto in = reader->readFile("a.txt", /*throw_on_not_found=*/true);
|
||||
String str;
|
||||
readStringUntilEOF(str, *in);
|
||||
EXPECT_EQ(str, contents);
|
||||
@ -215,14 +215,14 @@ TEST_P(ArchiveReaderAndWriterTest, TwoFilesInArchive)
|
||||
EXPECT_EQ(reader->getFileInfo("b/c.txt").uncompressed_size, c_contents.size());
|
||||
|
||||
{
|
||||
auto in = reader->readFile("a.txt");
|
||||
auto in = reader->readFile("a.txt", /*throw_on_not_found=*/true);
|
||||
String str;
|
||||
readStringUntilEOF(str, *in);
|
||||
EXPECT_EQ(str, a_contents);
|
||||
}
|
||||
|
||||
{
|
||||
auto in = reader->readFile("b/c.txt");
|
||||
auto in = reader->readFile("b/c.txt", /*throw_on_not_found=*/true);
|
||||
String str;
|
||||
readStringUntilEOF(str, *in);
|
||||
EXPECT_EQ(str, c_contents);
|
||||
@ -230,7 +230,7 @@ TEST_P(ArchiveReaderAndWriterTest, TwoFilesInArchive)
|
||||
|
||||
{
|
||||
/// Read a.txt again.
|
||||
auto in = reader->readFile("a.txt");
|
||||
auto in = reader->readFile("a.txt", /*throw_on_not_found=*/true);
|
||||
String str;
|
||||
readStringUntilEOF(str, *in);
|
||||
EXPECT_EQ(str, a_contents);
|
||||
@ -302,14 +302,14 @@ TEST_P(ArchiveReaderAndWriterTest, InMemory)
|
||||
EXPECT_EQ(reader->getFileInfo("b.txt").uncompressed_size, b_contents.size());
|
||||
|
||||
{
|
||||
auto in = reader->readFile("a.txt");
|
||||
auto in = reader->readFile("a.txt", /*throw_on_not_found=*/true);
|
||||
String str;
|
||||
readStringUntilEOF(str, *in);
|
||||
EXPECT_EQ(str, a_contents);
|
||||
}
|
||||
|
||||
{
|
||||
auto in = reader->readFile("b.txt");
|
||||
auto in = reader->readFile("b.txt", /*throw_on_not_found=*/true);
|
||||
String str;
|
||||
readStringUntilEOF(str, *in);
|
||||
EXPECT_EQ(str, b_contents);
|
||||
@ -317,7 +317,7 @@ TEST_P(ArchiveReaderAndWriterTest, InMemory)
|
||||
|
||||
{
|
||||
/// Read a.txt again.
|
||||
auto in = reader->readFile("a.txt");
|
||||
auto in = reader->readFile("a.txt", /*throw_on_not_found=*/true);
|
||||
String str;
|
||||
readStringUntilEOF(str, *in);
|
||||
EXPECT_EQ(str, a_contents);
|
||||
@ -343,19 +343,19 @@ TEST_P(ArchiveReaderAndWriterTest, Password)
|
||||
|
||||
/// Try to read without a password.
|
||||
expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Password is required",
|
||||
[&]{ reader->readFile("a.txt"); });
|
||||
[&]{ reader->readFile("a.txt", /*throw_on_not_found=*/true); });
|
||||
|
||||
{
|
||||
/// Try to read with a wrong password.
|
||||
reader->setPassword("123Qwe");
|
||||
expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Wrong password",
|
||||
[&]{ reader->readFile("a.txt"); });
|
||||
[&]{ reader->readFile("a.txt", /*throw_on_not_found=*/true); });
|
||||
}
|
||||
|
||||
{
|
||||
/// Reading with the right password is successful.
|
||||
reader->setPassword("Qwe123");
|
||||
auto in = reader->readFile("a.txt");
|
||||
auto in = reader->readFile("a.txt", /*throw_on_not_found=*/true);
|
||||
String str;
|
||||
readStringUntilEOF(str, *in);
|
||||
EXPECT_EQ(str, contents);
|
||||
@ -387,7 +387,7 @@ TEST(TarArchiveReaderTest, ReadFile) {
|
||||
bool created = createArchiveWithFiles<ArchiveType::Tar>(archive_path, {{filename, contents}});
|
||||
EXPECT_EQ(created, true);
|
||||
auto reader = createArchiveReader(archive_path);
|
||||
auto in = reader->readFile(filename);
|
||||
auto in = reader->readFile(filename, /*throw_on_not_found=*/true);
|
||||
String str;
|
||||
readStringUntilEOF(str, *in);
|
||||
EXPECT_EQ(str, contents);
|
||||
@ -405,11 +405,11 @@ TEST(TarArchiveReaderTest, ReadTwoFiles) {
|
||||
auto reader = createArchiveReader(archive_path);
|
||||
EXPECT_EQ(reader->fileExists(file1), true);
|
||||
EXPECT_EQ(reader->fileExists(file2), true);
|
||||
auto in = reader->readFile(file1);
|
||||
auto in = reader->readFile(file1, /*throw_on_not_found=*/true);
|
||||
String str;
|
||||
readStringUntilEOF(str, *in);
|
||||
EXPECT_EQ(str, contents1);
|
||||
in = reader->readFile(file2);
|
||||
in = reader->readFile(file2, /*throw_on_not_found=*/true);
|
||||
|
||||
readStringUntilEOF(str, *in);
|
||||
EXPECT_EQ(str, contents2);
|
||||
@ -448,7 +448,7 @@ TEST(SevenZipArchiveReaderTest, ReadFile) {
|
||||
bool created = createArchiveWithFiles<ArchiveType::SevenZip>(archive_path, {{filename, contents}});
|
||||
EXPECT_EQ(created, true);
|
||||
auto reader = createArchiveReader(archive_path);
|
||||
auto in = reader->readFile(filename);
|
||||
auto in = reader->readFile(filename, /*throw_on_not_found=*/true);
|
||||
String str;
|
||||
readStringUntilEOF(str, *in);
|
||||
EXPECT_EQ(str, contents);
|
||||
@ -479,11 +479,11 @@ TEST(SevenZipArchiveReaderTest, ReadTwoFiles) {
|
||||
auto reader = createArchiveReader(archive_path);
|
||||
EXPECT_EQ(reader->fileExists(file1), true);
|
||||
EXPECT_EQ(reader->fileExists(file2), true);
|
||||
auto in = reader->readFile(file1);
|
||||
auto in = reader->readFile(file1, /*throw_on_not_found=*/true);
|
||||
String str;
|
||||
readStringUntilEOF(str, *in);
|
||||
EXPECT_EQ(str, contents1);
|
||||
in = reader->readFile(file2);
|
||||
in = reader->readFile(file2, /*throw_on_not_found=*/true);
|
||||
|
||||
readStringUntilEOF(str, *in);
|
||||
EXPECT_EQ(str, contents2);
|
||||
|
@ -471,6 +471,21 @@ std::unique_ptr<SourceFromChunks> QueryCache::Reader::getSourceExtremes()
|
||||
return std::move(source_from_chunks_extremes);
|
||||
}
|
||||
|
||||
QueryCache::QueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_)
|
||||
: cache(std::make_unique<TTLCachePolicy<Key, Entry, KeyHasher, QueryCacheEntryWeight, IsStale>>(std::make_unique<PerUserTTLCachePolicyUserQuota>()))
|
||||
{
|
||||
updateConfiguration(max_size_in_bytes, max_entries, max_entry_size_in_bytes_, max_entry_size_in_rows_);
|
||||
}
|
||||
|
||||
void QueryCache::updateConfiguration(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
cache.setMaxSize(max_size_in_bytes);
|
||||
cache.setMaxCount(max_entries);
|
||||
max_entry_size_in_bytes = max_entry_size_in_bytes_;
|
||||
max_entry_size_in_rows = max_entry_size_in_rows_;
|
||||
}
|
||||
|
||||
QueryCache::Reader QueryCache::createReader(const Key & key)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
@ -488,9 +503,9 @@ QueryCache::Writer QueryCache::createWriter(const Key & key, std::chrono::millis
|
||||
return Writer(cache, key, max_entry_size_in_bytes, max_entry_size_in_rows, min_query_runtime, squash_partial_results, max_block_size);
|
||||
}
|
||||
|
||||
void QueryCache::reset()
|
||||
void QueryCache::clear()
|
||||
{
|
||||
cache.reset();
|
||||
cache.clear();
|
||||
std::lock_guard lock(mutex);
|
||||
times_executed.clear();
|
||||
}
|
||||
@ -521,19 +536,4 @@ std::vector<QueryCache::Cache::KeyMapped> QueryCache::dump() const
|
||||
return cache.dump();
|
||||
}
|
||||
|
||||
QueryCache::QueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_)
|
||||
: cache(std::make_unique<TTLCachePolicy<Key, Entry, KeyHasher, QueryCacheEntryWeight, IsStale>>(std::make_unique<PerUserTTLCachePolicyUserQuota>()))
|
||||
{
|
||||
updateConfiguration(max_size_in_bytes, max_entries, max_entry_size_in_bytes_, max_entry_size_in_rows_);
|
||||
}
|
||||
|
||||
void QueryCache::updateConfiguration(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
cache.setMaxSize(max_size_in_bytes);
|
||||
cache.setMaxCount(max_entries);
|
||||
max_entry_size_in_bytes = max_entry_size_in_bytes_;
|
||||
max_entry_size_in_rows = max_entry_size_in_rows_;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -180,7 +180,7 @@ public:
|
||||
Reader createReader(const Key & key);
|
||||
Writer createWriter(const Key & key, std::chrono::milliseconds min_query_runtime, bool squash_partial_results, size_t max_block_size, size_t max_query_cache_size_in_bytes_quota, size_t max_query_cache_entries_quota);
|
||||
|
||||
void reset();
|
||||
void clear();
|
||||
|
||||
size_t weight() const;
|
||||
size_t count() const;
|
||||
|
@ -548,7 +548,7 @@ struct ContextSharedPart : boost::noncopyable
|
||||
*/
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
if (auto * cache = CompiledExpressionCacheFactory::instance().tryGetCache())
|
||||
cache->reset();
|
||||
cache->clear();
|
||||
#endif
|
||||
|
||||
/// Preemptive destruction is important, because these objects may have a refcount to ContextShared (cyclic reference).
|
||||
@ -2278,6 +2278,16 @@ void Context::setUncompressedCache(const String & uncompressed_cache_policy, siz
|
||||
shared->uncompressed_cache = std::make_shared<UncompressedCache>(uncompressed_cache_policy, max_size_in_bytes);
|
||||
}
|
||||
|
||||
void Context::updateUncompressedCacheConfiguration(const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
auto lock = getLock();
|
||||
|
||||
if (!shared->uncompressed_cache)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Uncompressed cache was not created yet.");
|
||||
|
||||
size_t max_size_in_bytes = config.getUInt64("uncompressed_cache_size", DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE);
|
||||
shared->uncompressed_cache->setMaxSize(max_size_in_bytes);
|
||||
}
|
||||
|
||||
UncompressedCachePtr Context::getUncompressedCache() const
|
||||
{
|
||||
@ -2285,14 +2295,13 @@ UncompressedCachePtr Context::getUncompressedCache() const
|
||||
return shared->uncompressed_cache;
|
||||
}
|
||||
|
||||
|
||||
void Context::clearUncompressedCache() const
|
||||
{
|
||||
auto lock = getLock();
|
||||
if (shared->uncompressed_cache)
|
||||
shared->uncompressed_cache->reset();
|
||||
}
|
||||
|
||||
if (shared->uncompressed_cache)
|
||||
shared->uncompressed_cache->clear();
|
||||
}
|
||||
|
||||
void Context::setMarkCache(const String & mark_cache_policy, size_t cache_size_in_bytes)
|
||||
{
|
||||
@ -2304,6 +2313,17 @@ void Context::setMarkCache(const String & mark_cache_policy, size_t cache_size_i
|
||||
shared->mark_cache = std::make_shared<MarkCache>(mark_cache_policy, cache_size_in_bytes);
|
||||
}
|
||||
|
||||
void Context::updateMarkCacheConfiguration(const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
auto lock = getLock();
|
||||
|
||||
if (!shared->mark_cache)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Mark cache was not created yet.");
|
||||
|
||||
size_t max_size_in_bytes = config.getUInt64("mark_cache_size", DEFAULT_MARK_CACHE_MAX_SIZE);
|
||||
shared->mark_cache->setMaxSize(max_size_in_bytes);
|
||||
}
|
||||
|
||||
MarkCachePtr Context::getMarkCache() const
|
||||
{
|
||||
auto lock = getLock();
|
||||
@ -2313,8 +2333,9 @@ MarkCachePtr Context::getMarkCache() const
|
||||
void Context::clearMarkCache() const
|
||||
{
|
||||
auto lock = getLock();
|
||||
|
||||
if (shared->mark_cache)
|
||||
shared->mark_cache->reset();
|
||||
shared->mark_cache->clear();
|
||||
}
|
||||
|
||||
ThreadPool & Context::getLoadMarksThreadpool() const
|
||||
@ -2342,20 +2363,30 @@ void Context::setIndexUncompressedCache(size_t max_size_in_bytes)
|
||||
shared->index_uncompressed_cache = std::make_shared<UncompressedCache>(max_size_in_bytes);
|
||||
}
|
||||
|
||||
void Context::updateIndexUncompressedCacheConfiguration(const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
auto lock = getLock();
|
||||
|
||||
if (!shared->index_uncompressed_cache)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Index uncompressed cache was not created yet.");
|
||||
|
||||
size_t max_size_in_bytes = config.getUInt64("index_uncompressed_cache_size", DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE);
|
||||
shared->index_uncompressed_cache->setMaxSize(max_size_in_bytes);
|
||||
}
|
||||
|
||||
UncompressedCachePtr Context::getIndexUncompressedCache() const
|
||||
{
|
||||
auto lock = getLock();
|
||||
return shared->index_uncompressed_cache;
|
||||
}
|
||||
|
||||
|
||||
void Context::clearIndexUncompressedCache() const
|
||||
{
|
||||
auto lock = getLock();
|
||||
if (shared->index_uncompressed_cache)
|
||||
shared->index_uncompressed_cache->reset();
|
||||
}
|
||||
|
||||
if (shared->index_uncompressed_cache)
|
||||
shared->index_uncompressed_cache->clear();
|
||||
}
|
||||
|
||||
void Context::setIndexMarkCache(size_t cache_size_in_bytes)
|
||||
{
|
||||
@ -2367,6 +2398,17 @@ void Context::setIndexMarkCache(size_t cache_size_in_bytes)
|
||||
shared->index_mark_cache = std::make_shared<MarkCache>(cache_size_in_bytes);
|
||||
}
|
||||
|
||||
void Context::updateIndexMarkCacheConfiguration(const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
auto lock = getLock();
|
||||
|
||||
if (!shared->index_mark_cache)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Index mark cache was not created yet.");
|
||||
|
||||
size_t max_size_in_bytes = config.getUInt64("index_mark_cache_size", DEFAULT_INDEX_MARK_CACHE_MAX_SIZE);
|
||||
shared->index_mark_cache->setMaxSize(max_size_in_bytes);
|
||||
}
|
||||
|
||||
MarkCachePtr Context::getIndexMarkCache() const
|
||||
{
|
||||
auto lock = getLock();
|
||||
@ -2376,8 +2418,9 @@ MarkCachePtr Context::getIndexMarkCache() const
|
||||
void Context::clearIndexMarkCache() const
|
||||
{
|
||||
auto lock = getLock();
|
||||
|
||||
if (shared->index_mark_cache)
|
||||
shared->index_mark_cache->reset();
|
||||
shared->index_mark_cache->clear();
|
||||
}
|
||||
|
||||
void Context::setMMappedFileCache(size_t cache_size_in_num_entries)
|
||||
@ -2390,6 +2433,17 @@ void Context::setMMappedFileCache(size_t cache_size_in_num_entries)
|
||||
shared->mmap_cache = std::make_shared<MMappedFileCache>(cache_size_in_num_entries);
|
||||
}
|
||||
|
||||
void Context::updateMMappedFileCacheConfiguration(const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
auto lock = getLock();
|
||||
|
||||
if (!shared->mmap_cache)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapped file cache was not created yet.");
|
||||
|
||||
size_t max_size_in_bytes = config.getUInt64("mmap_cache_size", DEFAULT_MMAP_CACHE_MAX_SIZE);
|
||||
shared->mmap_cache->setMaxSize(max_size_in_bytes);
|
||||
}
|
||||
|
||||
MMappedFileCachePtr Context::getMMappedFileCache() const
|
||||
{
|
||||
auto lock = getLock();
|
||||
@ -2399,8 +2453,9 @@ MMappedFileCachePtr Context::getMMappedFileCache() const
|
||||
void Context::clearMMappedFileCache() const
|
||||
{
|
||||
auto lock = getLock();
|
||||
|
||||
if (shared->mmap_cache)
|
||||
shared->mmap_cache->reset();
|
||||
shared->mmap_cache->clear();
|
||||
}
|
||||
|
||||
void Context::setQueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes, size_t max_entry_size_in_rows)
|
||||
@ -2416,14 +2471,15 @@ void Context::setQueryCache(size_t max_size_in_bytes, size_t max_entries, size_t
|
||||
void Context::updateQueryCacheConfiguration(const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
auto lock = getLock();
|
||||
if (shared->query_cache)
|
||||
{
|
||||
size_t max_size_in_bytes = config.getUInt64("query_cache.max_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_SIZE);
|
||||
size_t max_entries = config.getUInt64("query_cache.max_entries", DEFAULT_QUERY_CACHE_MAX_ENTRIES);
|
||||
size_t max_entry_size_in_bytes = config.getUInt64("query_cache.max_entry_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_BYTES);
|
||||
size_t max_entry_size_in_rows = config.getUInt64("query_cache.max_entry_rows_in_rows", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_ROWS);
|
||||
shared->query_cache->updateConfiguration(max_size_in_bytes, max_entries, max_entry_size_in_bytes, max_entry_size_in_rows);
|
||||
}
|
||||
|
||||
if (!shared->query_cache)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Query cache was not created yet.");
|
||||
|
||||
size_t max_size_in_bytes = config.getUInt64("query_cache.max_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_SIZE);
|
||||
size_t max_entries = config.getUInt64("query_cache.max_entries", DEFAULT_QUERY_CACHE_MAX_ENTRIES);
|
||||
size_t max_entry_size_in_bytes = config.getUInt64("query_cache.max_entry_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_BYTES);
|
||||
size_t max_entry_size_in_rows = config.getUInt64("query_cache.max_entry_rows_in_rows", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_ROWS);
|
||||
shared->query_cache->updateConfiguration(max_size_in_bytes, max_entries, max_entry_size_in_bytes, max_entry_size_in_rows);
|
||||
}
|
||||
|
||||
QueryCachePtr Context::getQueryCache() const
|
||||
@ -2435,30 +2491,36 @@ QueryCachePtr Context::getQueryCache() const
|
||||
void Context::clearQueryCache() const
|
||||
{
|
||||
auto lock = getLock();
|
||||
|
||||
if (shared->query_cache)
|
||||
shared->query_cache->reset();
|
||||
shared->query_cache->clear();
|
||||
}
|
||||
|
||||
void Context::clearCaches() const
|
||||
{
|
||||
auto lock = getLock();
|
||||
|
||||
if (shared->uncompressed_cache)
|
||||
shared->uncompressed_cache->reset();
|
||||
if (!shared->uncompressed_cache)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Uncompressed cache was not created yet.");
|
||||
shared->uncompressed_cache->clear();
|
||||
|
||||
if (shared->mark_cache)
|
||||
shared->mark_cache->reset();
|
||||
if (!shared->mark_cache)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Mark cache was not created yet.");
|
||||
shared->mark_cache->clear();
|
||||
|
||||
if (shared->index_uncompressed_cache)
|
||||
shared->index_uncompressed_cache->reset();
|
||||
if (!shared->index_uncompressed_cache)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Index uncompressed cache was not created yet.");
|
||||
shared->index_uncompressed_cache->clear();
|
||||
|
||||
if (shared->index_mark_cache)
|
||||
shared->index_mark_cache->reset();
|
||||
if (!shared->index_mark_cache)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Index mark cache was not created yet.");
|
||||
shared->index_mark_cache->clear();
|
||||
|
||||
if (shared->mmap_cache)
|
||||
shared->mmap_cache->reset();
|
||||
if (!shared->mmap_cache)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Mmapped file cache was not created yet.");
|
||||
shared->mmap_cache->clear();
|
||||
|
||||
/// Intentionally not dropping the query cache which is transactionally inconsistent by design.
|
||||
/// Intentionally not clearing the query cache which is transactionally inconsistent by design.
|
||||
}
|
||||
|
||||
ThreadPool & Context::getPrefetchThreadpool() const
|
||||
|
@ -922,33 +922,32 @@ public:
|
||||
|
||||
/// --- Caches ------------------------------------------------------------------------------------------
|
||||
|
||||
/// Create a cache of uncompressed blocks of specified size. This can be done only once.
|
||||
void setUncompressedCache(const String & uncompressed_cache_policy, size_t max_size_in_bytes);
|
||||
void updateUncompressedCacheConfiguration(const Poco::Util::AbstractConfiguration & config);
|
||||
std::shared_ptr<UncompressedCache> getUncompressedCache() const;
|
||||
void clearUncompressedCache() const;
|
||||
|
||||
/// Create a cache of marks of specified size. This can be done only once.
|
||||
void setMarkCache(const String & mark_cache_policy, size_t cache_size_in_bytes);
|
||||
void updateMarkCacheConfiguration(const Poco::Util::AbstractConfiguration & config);
|
||||
std::shared_ptr<MarkCache> getMarkCache() const;
|
||||
void clearMarkCache() const;
|
||||
ThreadPool & getLoadMarksThreadpool() const;
|
||||
|
||||
/// Create a cache of index uncompressed blocks of specified size. This can be done only once.
|
||||
void setIndexUncompressedCache(size_t max_size_in_bytes);
|
||||
void updateIndexUncompressedCacheConfiguration(const Poco::Util::AbstractConfiguration & config);
|
||||
std::shared_ptr<UncompressedCache> getIndexUncompressedCache() const;
|
||||
void clearIndexUncompressedCache() const;
|
||||
|
||||
/// Create a cache of index marks of specified size. This can be done only once.
|
||||
void setIndexMarkCache(size_t cache_size_in_bytes);
|
||||
void updateIndexMarkCacheConfiguration(const Poco::Util::AbstractConfiguration & config);
|
||||
std::shared_ptr<MarkCache> getIndexMarkCache() const;
|
||||
void clearIndexMarkCache() const;
|
||||
|
||||
/// Create a cache of mapped files to avoid frequent open/map/unmap/close and to reuse from several threads.
|
||||
void setMMappedFileCache(size_t cache_size_in_num_entries);
|
||||
void updateMMappedFileCacheConfiguration(const Poco::Util::AbstractConfiguration & config);
|
||||
std::shared_ptr<MMappedFileCache> getMMappedFileCache() const;
|
||||
void clearMMappedFileCache() const;
|
||||
|
||||
/// Create a cache of query results for statements which run repeatedly.
|
||||
void setQueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes, size_t max_entry_size_in_rows);
|
||||
void updateQueryCacheConfiguration(const Poco::Util::AbstractConfiguration & config);
|
||||
std::shared_ptr<QueryCache> getQueryCache() const;
|
||||
|
@ -341,14 +341,10 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
|
||||
{
|
||||
TableNameHints hints(this->tryGetDatabase(table_id.getDatabaseName()), getContext());
|
||||
std::vector<String> names = hints.getHints(table_id.getTableName());
|
||||
if (!names.empty())
|
||||
{
|
||||
/// There is two options: first is to print just the name of the table
|
||||
/// and the second is to print the result in format: db_name.table_name. I'll comment out the second option below
|
||||
/// I also leave possibility to print several suggestions
|
||||
if (names.empty())
|
||||
exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} does not exist", table_id.getNameForLogs()));
|
||||
else
|
||||
exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} does not exist. Maybe you meant {}?", table_id.getNameForLogs(), backQuoteIfNeed(names[0])));
|
||||
}
|
||||
else exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} does not exist", table_id.getNameForLogs()));
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
@ -704,6 +704,9 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
|
||||
if (index_desc.type == "annoy" && !settings.allow_experimental_annoy_index)
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Annoy index is disabled. Turn on allow_experimental_annoy_index");
|
||||
|
||||
if (index_desc.type == "usearch" && !settings.allow_experimental_usearch_index)
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "USearch index is disabled. Turn on allow_experimental_usearch_index");
|
||||
|
||||
properties.indices.push_back(index_desc);
|
||||
}
|
||||
if (create.columns_list->projections)
|
||||
|
@ -345,7 +345,7 @@ BlockIO InterpreterSystemQuery::execute()
|
||||
case Type::DROP_COMPILED_EXPRESSION_CACHE:
|
||||
getContext()->checkAccess(AccessType::SYSTEM_DROP_COMPILED_EXPRESSION_CACHE);
|
||||
if (auto * cache = CompiledExpressionCacheFactory::instance().tryGetCache())
|
||||
cache->reset();
|
||||
cache->clear();
|
||||
break;
|
||||
#endif
|
||||
#if USE_AWS_S3
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <Interpreters/Cache/FileCache.h>
|
||||
#include <Interpreters/Cache/FileCacheFactory.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/Cache/QueryCache.h>
|
||||
#include <Interpreters/JIT/CompiledExpressionCache.h>
|
||||
|
||||
#include <Databases/IDatabase.h>
|
||||
|
@ -64,8 +64,8 @@ inline bool operator==(SmallStringRef lhs, SmallStringRef rhs)
|
||||
if (lhs.size == 0)
|
||||
return true;
|
||||
|
||||
#ifdef __SSE2__
|
||||
return memequalSSE2Wide(lhs.data(), rhs.data(), lhs.size);
|
||||
#if defined(__SSE2__) || (defined(__aarch64__) && defined(__ARM_NEON))
|
||||
return memequalWide(lhs.data(), rhs.data(), lhs.size);
|
||||
#else
|
||||
return 0 == memcmp(lhs.data(), rhs.data(), lhs.size);
|
||||
#endif
|
||||
|
@ -14,6 +14,7 @@ class ASTIndexDeclaration : public IAST
|
||||
public:
|
||||
static const auto DEFAULT_INDEX_GRANULARITY = 1uz;
|
||||
static const auto DEFAULT_ANNOY_INDEX_GRANULARITY = 100'000'000uz;
|
||||
static const auto DEFAULT_USEARCH_INDEX_GRANULARITY = 100'000'000uz;
|
||||
|
||||
String name;
|
||||
IAST * expr;
|
||||
|
@ -204,7 +204,7 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &,
|
||||
}
|
||||
else if (type == Type::SUSPEND)
|
||||
{
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << " FOR "
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << " FOR "
|
||||
<< (settings.hilite ? hilite_none : "") << seconds
|
||||
<< (settings.hilite ? hilite_keyword : "") << " SECOND"
|
||||
<< (settings.hilite ? hilite_none : "");
|
||||
@ -232,12 +232,50 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &,
|
||||
}
|
||||
else if (type == Type::START_LISTEN || type == Type::STOP_LISTEN)
|
||||
{
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << " " << ServerType::serverTypeToString(server_type.type)
|
||||
<< (settings.hilite ? hilite_none : "");
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << " "
|
||||
<< ServerType::serverTypeToString(server_type.type) << (settings.hilite ? hilite_none : "");
|
||||
|
||||
if (server_type.type == ServerType::CUSTOM)
|
||||
if (server_type.type == ServerType::Type::CUSTOM)
|
||||
{
|
||||
settings.ostr << (settings.hilite ? hilite_identifier : "") << " " << backQuoteIfNeed(server_type.custom_name);
|
||||
settings.ostr << " " << quoteString(server_type.custom_name);
|
||||
}
|
||||
|
||||
bool comma = false;
|
||||
|
||||
if (!server_type.exclude_types.empty())
|
||||
{
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "")
|
||||
<< " EXCEPT" << (settings.hilite ? hilite_none : "");
|
||||
|
||||
for (auto cur_type : server_type.exclude_types)
|
||||
{
|
||||
if (cur_type == ServerType::Type::CUSTOM)
|
||||
continue;
|
||||
|
||||
if (comma)
|
||||
settings.ostr << ",";
|
||||
else
|
||||
comma = true;
|
||||
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << " "
|
||||
<< ServerType::serverTypeToString(cur_type) << (settings.hilite ? hilite_none : "");
|
||||
}
|
||||
|
||||
if (server_type.exclude_types.contains(ServerType::Type::CUSTOM))
|
||||
{
|
||||
for (const auto & cur_name : server_type.exclude_custom_names)
|
||||
{
|
||||
if (comma)
|
||||
settings.ostr << ",";
|
||||
else
|
||||
comma = true;
|
||||
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << " "
|
||||
<< ServerType::serverTypeToString(ServerType::Type::CUSTOM) << (settings.hilite ? hilite_none : "");
|
||||
|
||||
settings.ostr << " " << quoteString(cur_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -66,6 +66,8 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected
|
||||
{
|
||||
if (index->type && index->type->name == "annoy")
|
||||
index->granularity = ASTIndexDeclaration::DEFAULT_ANNOY_INDEX_GRANULARITY;
|
||||
else if (index->type && index->type->name == "usearch")
|
||||
index->granularity = ASTIndexDeclaration::DEFAULT_USEARCH_INDEX_GRANULARITY;
|
||||
else
|
||||
index->granularity = ASTIndexDeclaration::DEFAULT_INDEX_GRANULARITY;
|
||||
}
|
||||
|
@ -148,6 +148,8 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
|
||||
{
|
||||
if (index->type->name == "annoy")
|
||||
index->granularity = ASTIndexDeclaration::DEFAULT_ANNOY_INDEX_GRANULARITY;
|
||||
else if (index->type->name == "usearch")
|
||||
index->granularity = ASTIndexDeclaration::DEFAULT_USEARCH_INDEX_GRANULARITY;
|
||||
else
|
||||
index->granularity = ASTIndexDeclaration::DEFAULT_INDEX_GRANULARITY;
|
||||
}
|
||||
|
@ -458,32 +458,71 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected &
|
||||
if (!parseQueryWithOnCluster(res, pos, expected))
|
||||
return false;
|
||||
|
||||
ServerType::Type current_type = ServerType::Type::END;
|
||||
std::string current_custom_name;
|
||||
|
||||
for (const auto & type : magic_enum::enum_values<ServerType::Type>())
|
||||
auto parse_server_type = [&](ServerType::Type & type, std::string & custom_name) -> bool
|
||||
{
|
||||
if (ParserKeyword{ServerType::serverTypeToString(type)}.ignore(pos, expected))
|
||||
type = ServerType::Type::END;
|
||||
custom_name = "";
|
||||
|
||||
for (const auto & cur_type : magic_enum::enum_values<ServerType::Type>())
|
||||
{
|
||||
current_type = type;
|
||||
break;
|
||||
if (ParserKeyword{ServerType::serverTypeToString(cur_type)}.ignore(pos, expected))
|
||||
{
|
||||
type = cur_type;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (type == ServerType::Type::END)
|
||||
return false;
|
||||
|
||||
if (type == ServerType::CUSTOM)
|
||||
{
|
||||
ASTPtr ast;
|
||||
|
||||
if (!ParserStringLiteral{}.parse(pos, ast, expected))
|
||||
return false;
|
||||
|
||||
custom_name = ast->as<ASTLiteral &>().value.get<const String &>();
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
ServerType::Type base_type;
|
||||
std::string base_custom_name;
|
||||
|
||||
ServerType::Types exclude_type;
|
||||
ServerType::CustomNames exclude_custom_names;
|
||||
|
||||
if (!parse_server_type(base_type, base_custom_name))
|
||||
return false;
|
||||
|
||||
if (ParserKeyword{"EXCEPT"}.ignore(pos, expected))
|
||||
{
|
||||
if (base_type != ServerType::Type::QUERIES_ALL &&
|
||||
base_type != ServerType::Type::QUERIES_DEFAULT &&
|
||||
base_type != ServerType::Type::QUERIES_CUSTOM)
|
||||
return false;
|
||||
|
||||
ServerType::Type current_type;
|
||||
std::string current_custom_name;
|
||||
|
||||
while (true)
|
||||
{
|
||||
if (!exclude_type.empty() && !ParserToken(TokenType::Comma).ignore(pos, expected))
|
||||
break;
|
||||
|
||||
if (!parse_server_type(current_type, current_custom_name))
|
||||
return false;
|
||||
|
||||
exclude_type.insert(current_type);
|
||||
|
||||
if (current_type == ServerType::Type::CUSTOM)
|
||||
exclude_custom_names.insert(current_custom_name);
|
||||
}
|
||||
}
|
||||
|
||||
if (current_type == ServerType::Type::END)
|
||||
return false;
|
||||
|
||||
if (current_type == ServerType::CUSTOM)
|
||||
{
|
||||
ASTPtr ast;
|
||||
|
||||
if (!ParserStringLiteral{}.parse(pos, ast, expected))
|
||||
return false;
|
||||
|
||||
current_custom_name = ast->as<ASTLiteral &>().value.get<const String &>();
|
||||
}
|
||||
|
||||
res->server_type = ServerType(current_type, current_custom_name);
|
||||
res->server_type = ServerType(base_type, base_custom_name, exclude_type, exclude_custom_names);
|
||||
|
||||
break;
|
||||
}
|
||||
|
@ -75,10 +75,13 @@ void DelayedPortsProcessor::finishPair(PortsPair & pair)
|
||||
pair.input_port->close();
|
||||
|
||||
pair.is_finished = true;
|
||||
++num_finished_pairs;
|
||||
++num_finished_inputs;
|
||||
|
||||
if (pair.output_port)
|
||||
++num_finished_outputs;
|
||||
|
||||
if (!pair.is_delayed)
|
||||
++num_finished_main_inputs;
|
||||
}
|
||||
}
|
||||
|
||||
@ -112,9 +115,15 @@ bool DelayedPortsProcessor::processPair(PortsPair & pair)
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool DelayedPortsProcessor::shouldSkipDelayed() const
|
||||
{
|
||||
return num_finished_main_inputs + num_delayed_ports < port_pairs.size();
|
||||
}
|
||||
|
||||
IProcessor::Status DelayedPortsProcessor::prepare(const PortNumbers & updated_inputs, const PortNumbers & updated_outputs)
|
||||
{
|
||||
bool skip_delayed = (num_finished_pairs + num_delayed_ports) < port_pairs.size();
|
||||
bool skip_delayed = shouldSkipDelayed();
|
||||
bool need_data = false;
|
||||
|
||||
if (!are_inputs_initialized && !updated_outputs.empty())
|
||||
@ -154,14 +163,14 @@ IProcessor::Status DelayedPortsProcessor::prepare(const PortNumbers & updated_in
|
||||
}
|
||||
|
||||
/// In case if main streams are finished at current iteration, start processing delayed streams.
|
||||
if (skip_delayed && (num_finished_pairs + num_delayed_ports) >= port_pairs.size())
|
||||
if (skip_delayed && !shouldSkipDelayed())
|
||||
{
|
||||
for (auto & pair : port_pairs)
|
||||
if (pair.is_delayed)
|
||||
need_data = processPair(pair) || need_data;
|
||||
}
|
||||
|
||||
if (num_finished_pairs == port_pairs.size())
|
||||
if (num_finished_inputs == port_pairs.size())
|
||||
return Status::Finished;
|
||||
|
||||
if (need_data)
|
||||
|
@ -29,14 +29,16 @@ private:
|
||||
|
||||
std::vector<PortsPair> port_pairs;
|
||||
const size_t num_delayed_ports;
|
||||
size_t num_finished_pairs = 0;
|
||||
size_t num_finished_inputs = 0;
|
||||
size_t num_finished_outputs = 0;
|
||||
size_t num_finished_main_inputs = 0;
|
||||
|
||||
std::vector<size_t> output_to_pair;
|
||||
bool are_inputs_initialized = false;
|
||||
|
||||
bool processPair(PortsPair & pair);
|
||||
void finishPair(PortsPair & pair);
|
||||
bool shouldSkipDelayed() const;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -10,6 +10,8 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct SelectQueryInfo;
|
||||
|
||||
using ColumnMappingPtr = std::shared_ptr<ColumnMapping>;
|
||||
|
||||
/** Input format is a source, that reads data from ReadBuffer.
|
||||
@ -21,9 +23,13 @@ protected:
|
||||
ReadBuffer * in [[maybe_unused]] = nullptr;
|
||||
|
||||
public:
|
||||
// ReadBuffer can be nullptr for random-access formats.
|
||||
/// ReadBuffer can be nullptr for random-access formats.
|
||||
IInputFormat(Block header, ReadBuffer * in_);
|
||||
|
||||
/// If the format is used by a SELECT query, this method may be called.
|
||||
/// The format may use it for filter pushdown.
|
||||
virtual void setQueryInfo(const SelectQueryInfo &, ContextPtr) {}
|
||||
|
||||
/** In some usecase (hello Kafka) we need to read a lot of tiny streams in exactly the same format.
|
||||
* The recreating of parser for each small stream takes too long, so we introduce a method
|
||||
* resetParser() which allow to reset the state of parser to continue reading of
|
||||
|
@ -115,21 +115,24 @@ NamesAndTypesList IRowSchemaReader::readSchema()
|
||||
"Cannot read rows to determine the schema, the maximum number of rows (or bytes) to read is set to 0. "
|
||||
"Most likely setting input_format_max_rows_to_read_for_schema_inference or input_format_max_bytes_to_read_for_schema_inference is set to 0");
|
||||
|
||||
DataTypes data_types = readRowAndGetDataTypes();
|
||||
auto data_types_maybe = readRowAndGetDataTypes();
|
||||
|
||||
/// Check that we read at list one column.
|
||||
if (data_types.empty())
|
||||
if (!data_types_maybe)
|
||||
throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Cannot read rows from the data");
|
||||
|
||||
DataTypes data_types = std::move(*data_types_maybe);
|
||||
|
||||
/// If column names weren't set, use default names 'c1', 'c2', ...
|
||||
if (column_names.empty())
|
||||
bool use_default_column_names = column_names.empty();
|
||||
if (use_default_column_names)
|
||||
{
|
||||
column_names.reserve(data_types.size());
|
||||
for (size_t i = 0; i != data_types.size(); ++i)
|
||||
column_names.push_back("c" + std::to_string(i + 1));
|
||||
}
|
||||
/// If column names were set, check that the number of names match the number of types.
|
||||
else if (column_names.size() != data_types.size())
|
||||
else if (column_names.size() != data_types.size() && !allowVariableNumberOfColumns())
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::INCORRECT_DATA,
|
||||
@ -137,6 +140,9 @@ NamesAndTypesList IRowSchemaReader::readSchema()
|
||||
}
|
||||
else
|
||||
{
|
||||
if (column_names.size() != data_types.size())
|
||||
data_types.resize(column_names.size());
|
||||
|
||||
std::unordered_set<std::string_view> names_set;
|
||||
for (const auto & name : column_names)
|
||||
{
|
||||
@ -155,13 +161,39 @@ NamesAndTypesList IRowSchemaReader::readSchema()
|
||||
|
||||
for (rows_read = 1; rows_read < max_rows_to_read && in.count() < max_bytes_to_read; ++rows_read)
|
||||
{
|
||||
DataTypes new_data_types = readRowAndGetDataTypes();
|
||||
if (new_data_types.empty())
|
||||
auto new_data_types_maybe = readRowAndGetDataTypes();
|
||||
if (!new_data_types_maybe)
|
||||
/// We reached eof.
|
||||
break;
|
||||
|
||||
DataTypes new_data_types = std::move(*new_data_types_maybe);
|
||||
|
||||
if (new_data_types.size() != data_types.size())
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Rows have different amount of values");
|
||||
{
|
||||
if (!allowVariableNumberOfColumns())
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Rows have different amount of values");
|
||||
|
||||
if (use_default_column_names)
|
||||
{
|
||||
/// Current row contains new columns, add new default names.
|
||||
if (new_data_types.size() > data_types.size())
|
||||
{
|
||||
for (size_t i = data_types.size(); i < new_data_types.size(); ++i)
|
||||
column_names.push_back("c" + std::to_string(i + 1));
|
||||
data_types.resize(new_data_types.size());
|
||||
}
|
||||
/// Current row contain less columns than previous rows.
|
||||
else
|
||||
{
|
||||
new_data_types.resize(data_types.size());
|
||||
}
|
||||
}
|
||||
/// If names were explicitly set, ignore all extra columns.
|
||||
else
|
||||
{
|
||||
new_data_types.resize(column_names.size());
|
||||
}
|
||||
}
|
||||
|
||||
for (field_index = 0; field_index != data_types.size(); ++field_index)
|
||||
{
|
||||
|
@ -93,11 +93,13 @@ protected:
|
||||
/// Read one row and determine types of columns in it.
|
||||
/// Return types in the same order in which the values were in the row.
|
||||
/// If it's impossible to determine the type for some column, return nullptr for it.
|
||||
/// Return empty list if can't read more data.
|
||||
virtual DataTypes readRowAndGetDataTypes() = 0;
|
||||
/// Return std::nullopt if can't read more data.
|
||||
virtual std::optional<DataTypes> readRowAndGetDataTypes() = 0;
|
||||
|
||||
void setColumnNames(const std::vector<String> & names) { column_names = names; }
|
||||
|
||||
virtual bool allowVariableNumberOfColumns() const { return false; }
|
||||
|
||||
size_t field_index;
|
||||
|
||||
private:
|
||||
|
@ -284,7 +284,7 @@ bool CSVFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CSVFormatReader::allowVariableNumberOfColumns()
|
||||
bool CSVFormatReader::allowVariableNumberOfColumns() const
|
||||
{
|
||||
return format_settings.csv.allow_variable_number_of_columns;
|
||||
}
|
||||
@ -410,19 +410,22 @@ CSVSchemaReader::CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_t
|
||||
{
|
||||
}
|
||||
|
||||
std::pair<std::vector<String>, DataTypes> CSVSchemaReader::readRowAndGetFieldsAndDataTypes()
|
||||
std::optional<std::pair<std::vector<String>, DataTypes>> CSVSchemaReader::readRowAndGetFieldsAndDataTypes()
|
||||
{
|
||||
if (buf.eof())
|
||||
return {};
|
||||
|
||||
auto fields = reader.readRow();
|
||||
auto data_types = tryInferDataTypesByEscapingRule(fields, format_settings, FormatSettings::EscapingRule::CSV);
|
||||
return {fields, data_types};
|
||||
return std::make_pair(std::move(fields), std::move(data_types));
|
||||
}
|
||||
|
||||
DataTypes CSVSchemaReader::readRowAndGetDataTypesImpl()
|
||||
std::optional<DataTypes> CSVSchemaReader::readRowAndGetDataTypesImpl()
|
||||
{
|
||||
return std::move(readRowAndGetFieldsAndDataTypes().second);
|
||||
auto fields_with_types = readRowAndGetFieldsAndDataTypes();
|
||||
if (!fields_with_types)
|
||||
return {};
|
||||
return std::move(fields_with_types->second);
|
||||
}
|
||||
|
||||
|
||||
|
@ -70,7 +70,7 @@ public:
|
||||
void skipPrefixBeforeHeader() override;
|
||||
|
||||
bool checkForEndOfRow() override;
|
||||
bool allowVariableNumberOfColumns() override;
|
||||
bool allowVariableNumberOfColumns() const override;
|
||||
|
||||
std::vector<String> readNames() override { return readHeaderRow(); }
|
||||
std::vector<String> readTypes() override { return readHeaderRow(); }
|
||||
@ -102,8 +102,10 @@ public:
|
||||
CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_settings_);
|
||||
|
||||
private:
|
||||
DataTypes readRowAndGetDataTypesImpl() override;
|
||||
std::pair<std::vector<String>, DataTypes> readRowAndGetFieldsAndDataTypes() override;
|
||||
bool allowVariableNumberOfColumns() const override { return format_settings.csv.allow_variable_number_of_columns; }
|
||||
|
||||
std::optional<DataTypes> readRowAndGetDataTypesImpl() override;
|
||||
std::optional<std::pair<std::vector<String>, DataTypes>> readRowAndGetFieldsAndDataTypes() override;
|
||||
|
||||
PeekableReadBuffer buf;
|
||||
CSVFormatReader reader;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user