mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 00:22:29 +00:00
Merge branch 'master' into Follow_up_Backup_Restore_concurrency_check_node_2
This commit is contained in:
commit
f20901d9d3
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -253,6 +253,9 @@
|
||||
[submodule "contrib/qpl"]
|
||||
path = contrib/qpl
|
||||
url = https://github.com/intel/qpl
|
||||
[submodule "contrib/idxd-config"]
|
||||
path = contrib/idxd-config
|
||||
url = https://github.com/intel/idxd-config
|
||||
[submodule "contrib/wyhash"]
|
||||
path = contrib/wyhash
|
||||
url = https://github.com/wangyi-fudan/wyhash
|
||||
|
@ -57,7 +57,7 @@ if (ENABLE_CHECK_HEAVY_BUILDS)
|
||||
# set CPU time limit to 1000 seconds
|
||||
set (RLIMIT_CPU 1000)
|
||||
|
||||
# gcc10/gcc10/clang -fsanitize=memory is too heavy
|
||||
# -fsanitize=memory is too heavy
|
||||
if (SANITIZE STREQUAL "memory")
|
||||
set (RLIMIT_DATA 10000000000) # 10G
|
||||
endif()
|
||||
@ -280,7 +280,7 @@ set (CMAKE_C_STANDARD 11)
|
||||
set (CMAKE_C_EXTENSIONS ON) # required by most contribs written in C
|
||||
set (CMAKE_C_STANDARD_REQUIRED ON)
|
||||
|
||||
# Compiler-specific coverage flags e.g. -fcoverage-mapping for gcc
|
||||
# Compiler-specific coverage flags e.g. -fcoverage-mapping
|
||||
option(WITH_COVERAGE "Profile the resulting binary/binaries" OFF)
|
||||
|
||||
if (COMPILER_CLANG)
|
||||
@ -522,6 +522,26 @@ include (cmake/print_flags.cmake)
|
||||
|
||||
if (ENABLE_RUST)
|
||||
add_subdirectory (rust)
|
||||
|
||||
# With LTO Rust adds few symbols with global visiblity, the most common is
|
||||
# rust_eh_personality. And this leads to linking errors because multiple
|
||||
# Rust libraries contains the same symbol.
|
||||
#
|
||||
# If it was shared library, that we could use version script for linker to
|
||||
# hide this symbols, but libraries are static.
|
||||
#
|
||||
# we could in theory compile everything to one library but this will be a
|
||||
# mess
|
||||
#
|
||||
# But this should be OK since CI has lots of other builds that are done
|
||||
# without LTO and it will find multiple definitions if there will be any.
|
||||
#
|
||||
# More information about this behaviour in Rust can be found here
|
||||
# - https://github.com/rust-lang/rust/issues/44322
|
||||
# - https://alanwu.space/post/symbol-hygiene/
|
||||
if (ENABLE_THINLTO)
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--allow-multiple-definition")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
add_subdirectory (base)
|
||||
|
@ -73,18 +73,6 @@
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(ADDRESS_SANITIZER)
|
||||
# define BOOST_USE_ASAN 1
|
||||
# define BOOST_USE_UCONTEXT 1
|
||||
#endif
|
||||
|
||||
#if defined(THREAD_SANITIZER)
|
||||
# define BOOST_USE_TSAN 1
|
||||
# define BOOST_USE_UCONTEXT 1
|
||||
#endif
|
||||
|
||||
/// TODO: Strange enough, there is no way to detect UB sanitizer.
|
||||
|
||||
/// Explicitly allow undefined behaviour for certain functions. Use it as a function attribute.
|
||||
/// It is useful in case when compiler cannot see (and exploit) it, but UBSan can.
|
||||
/// Example: multiplication of signed integers with possibility of overflow when both sides are from user input.
|
||||
|
@ -53,7 +53,7 @@
|
||||
|
||||
|
||||
// Define if no <locale> header is available (such as on WinCE)
|
||||
// #define POCO_NO_LOCALE
|
||||
#define POCO_NO_LOCALE
|
||||
|
||||
|
||||
// Define to desired default thread stack size
|
||||
|
@ -30,9 +30,6 @@
|
||||
#include <cctype>
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
#if !defined(POCO_NO_LOCALE)
|
||||
# include <locale>
|
||||
#endif
|
||||
|
||||
|
||||
// binary numbers are supported, thus 64 (bits) + 1 (string terminating zero)
|
||||
@ -53,11 +50,7 @@ inline char decimalSeparator()
|
||||
/// Returns decimal separator from global locale or
|
||||
/// default '.' for platforms where locale is unavailable.
|
||||
{
|
||||
#if !defined(POCO_NO_LOCALE)
|
||||
return std::use_facet<std::numpunct<char>>(std::locale()).decimal_point();
|
||||
#else
|
||||
return '.';
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -65,11 +58,7 @@ inline char thousandSeparator()
|
||||
/// Returns thousand separator from global locale or
|
||||
/// default ',' for platforms where locale is unavailable.
|
||||
{
|
||||
#if !defined(POCO_NO_LOCALE)
|
||||
return std::use_facet<std::numpunct<char>>(std::locale()).thousands_sep();
|
||||
#else
|
||||
return ',';
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
@ -16,9 +16,6 @@
|
||||
#include "Poco/Exception.h"
|
||||
#include "Poco/Ascii.h"
|
||||
#include <sstream>
|
||||
#if !defined(POCO_NO_LOCALE)
|
||||
#include <locale>
|
||||
#endif
|
||||
#include <cstddef>
|
||||
|
||||
|
||||
@ -147,9 +144,6 @@ namespace
|
||||
void formatOne(std::string& result, std::string::const_iterator& itFmt, const std::string::const_iterator& endFmt, std::vector<Any>::const_iterator& itVal)
|
||||
{
|
||||
std::ostringstream str;
|
||||
#if !defined(POCO_NO_LOCALE)
|
||||
str.imbue(std::locale::classic());
|
||||
#endif
|
||||
try
|
||||
{
|
||||
parseFlags(str, itFmt, endFmt);
|
||||
|
@ -15,9 +15,6 @@
|
||||
#include "Poco/NumberFormatter.h"
|
||||
#include "Poco/MemoryStream.h"
|
||||
#include <iomanip>
|
||||
#if !defined(POCO_NO_LOCALE)
|
||||
#include <locale>
|
||||
#endif
|
||||
#include <cstdio>
|
||||
|
||||
|
||||
|
@ -19,9 +19,6 @@
|
||||
#include <cstdio>
|
||||
#include <cctype>
|
||||
#include <stdlib.h>
|
||||
#if !defined(POCO_NO_LOCALE)
|
||||
#include <locale>
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(POCO_LONG_IS_64_BIT)
|
||||
|
@ -9,27 +9,19 @@ if (CMAKE_CXX_COMPILER_LAUNCHER MATCHES "ccache" OR CMAKE_C_COMPILER_LAUNCHER MA
|
||||
return()
|
||||
endif()
|
||||
|
||||
set(ENABLE_CCACHE "default" CACHE STRING "Deprecated, use COMPILER_CACHE=(auto|ccache|sccache|disabled)")
|
||||
if (NOT ENABLE_CCACHE STREQUAL "default")
|
||||
message(WARNING "The -DENABLE_CCACHE is deprecated in favor of -DCOMPILER_CACHE")
|
||||
endif()
|
||||
|
||||
set(COMPILER_CACHE "auto" CACHE STRING "Speedup re-compilations using the caching tools; valid options are 'auto' (ccache, then sccache), 'ccache', 'sccache', or 'disabled'")
|
||||
|
||||
# It has pretty complex logic, because the ENABLE_CCACHE is deprecated, but still should
|
||||
# control the COMPILER_CACHE
|
||||
# After it will be completely removed, the following block will be much simpler
|
||||
if (COMPILER_CACHE STREQUAL "ccache" OR (ENABLE_CCACHE AND NOT ENABLE_CCACHE STREQUAL "default"))
|
||||
find_program (CCACHE_EXECUTABLE ccache)
|
||||
elseif(COMPILER_CACHE STREQUAL "disabled" OR NOT ENABLE_CCACHE STREQUAL "default")
|
||||
message(STATUS "Using *ccache: no (disabled via configuration)")
|
||||
return()
|
||||
elseif(COMPILER_CACHE STREQUAL "auto")
|
||||
if(COMPILER_CACHE STREQUAL "auto")
|
||||
find_program (CCACHE_EXECUTABLE ccache sccache)
|
||||
elseif (COMPILER_CACHE STREQUAL "ccache")
|
||||
find_program (CCACHE_EXECUTABLE ccache)
|
||||
elseif(COMPILER_CACHE STREQUAL "sccache")
|
||||
find_program (CCACHE_EXECUTABLE sccache)
|
||||
elseif(COMPILER_CACHE STREQUAL "disabled")
|
||||
message(STATUS "Using *ccache: no (disabled via configuration)")
|
||||
return()
|
||||
else()
|
||||
message(${RECONFIGURE_MESSAGE_LEVEL} "The COMPILER_CACHE must be one of (auto|ccache|sccache|disabled), given '${COMPILER_CACHE}'")
|
||||
message(${RECONFIGURE_MESSAGE_LEVEL} "The COMPILER_CACHE must be one of (auto|ccache|sccache|disabled), value: '${COMPILER_CACHE}'")
|
||||
endif()
|
||||
|
||||
|
||||
|
@ -92,6 +92,8 @@ add_library (boost::system ALIAS _boost_system)
|
||||
target_include_directories (_boost_system PRIVATE ${LIBRARY_DIR})
|
||||
|
||||
# context
|
||||
option (BOOST_USE_UCONTEXT "Use ucontext_t for context switching of boost::fiber within boost::context" OFF)
|
||||
|
||||
enable_language(ASM)
|
||||
SET(ASM_OPTIONS "-x assembler-with-cpp")
|
||||
|
||||
@ -100,20 +102,6 @@ set (SRCS_CONTEXT
|
||||
"${LIBRARY_DIR}/libs/context/src/posix/stack_traits.cpp"
|
||||
)
|
||||
|
||||
if (SANITIZE AND (SANITIZE STREQUAL "address" OR SANITIZE STREQUAL "thread"))
|
||||
add_compile_definitions(BOOST_USE_UCONTEXT)
|
||||
|
||||
if (SANITIZE STREQUAL "address")
|
||||
add_compile_definitions(BOOST_USE_ASAN)
|
||||
elseif (SANITIZE STREQUAL "thread")
|
||||
add_compile_definitions(BOOST_USE_TSAN)
|
||||
endif()
|
||||
|
||||
set (SRCS_CONTEXT ${SRCS_CONTEXT}
|
||||
"${LIBRARY_DIR}/libs/context/src/fiber.cpp"
|
||||
"${LIBRARY_DIR}/libs/context/src/continuation.cpp"
|
||||
)
|
||||
endif()
|
||||
if (ARCH_AARCH64)
|
||||
set (SRCS_CONTEXT ${SRCS_CONTEXT}
|
||||
"${LIBRARY_DIR}/libs/context/src/asm/jump_arm64_aapcs_elf_gas.S"
|
||||
@ -152,10 +140,27 @@ else()
|
||||
)
|
||||
endif()
|
||||
|
||||
if (SANITIZE OR BOOST_USE_UCONTEXT)
|
||||
list (APPEND SRCS_CONTEXT
|
||||
"${LIBRARY_DIR}/libs/context/src/fiber.cpp"
|
||||
"${LIBRARY_DIR}/libs/context/src/continuation.cpp"
|
||||
)
|
||||
endif()
|
||||
|
||||
add_library (_boost_context ${SRCS_CONTEXT})
|
||||
add_library (boost::context ALIAS _boost_context)
|
||||
target_include_directories (_boost_context PRIVATE ${LIBRARY_DIR})
|
||||
|
||||
if (SANITIZE OR BOOST_USE_UCONTEXT)
|
||||
target_compile_definitions(_boost_context PUBLIC BOOST_USE_UCONTEXT)
|
||||
endif()
|
||||
|
||||
if (SANITIZE STREQUAL "address")
|
||||
target_compile_definitions(_boost_context PUBLIC BOOST_USE_ASAN)
|
||||
elseif (SANITIZE STREQUAL "thread")
|
||||
target_compile_definitions(_boost_context PUBLIC BOOST_USE_TSAN)
|
||||
endif()
|
||||
|
||||
# coroutine
|
||||
|
||||
set (SRCS_COROUTINE
|
||||
|
1
contrib/idxd-config
vendored
Submodule
1
contrib/idxd-config
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit f6605c41a735e3fdfef2d2d18655a33af6490b99
|
2
contrib/llvm-project
vendored
2
contrib/llvm-project
vendored
@ -1 +1 @@
|
||||
Subproject commit 2aedf7598a4040b23881dbe05b6afaca25a337ef
|
||||
Subproject commit d857c707fccd50423bea1c4710dc469cf89607a9
|
@ -187,7 +187,9 @@ target_include_directories(qplcore_avx512
|
||||
set_target_properties(qplcore_avx512 PROPERTIES
|
||||
$<$<C_COMPILER_ID:GNU>:C_STANDARD 17>)
|
||||
|
||||
target_link_libraries(qplcore_avx512 ${CMAKE_DL_LIBS} isal)
|
||||
target_link_libraries(qplcore_avx512
|
||||
PRIVATE isal
|
||||
PRIVATE ${CMAKE_DL_LIBS})
|
||||
|
||||
target_compile_options(qplcore_avx512
|
||||
PRIVATE ${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}
|
||||
@ -217,7 +219,9 @@ target_include_directories(qplcore_px
|
||||
set_target_properties(qplcore_px PROPERTIES
|
||||
$<$<C_COMPILER_ID:GNU>:C_STANDARD 17>)
|
||||
|
||||
target_link_libraries(qplcore_px isal ${CMAKE_DL_LIBS})
|
||||
target_link_libraries(qplcore_px
|
||||
PRIVATE isal
|
||||
PRIVATE ${CMAKE_DL_LIBS})
|
||||
|
||||
target_compile_options(qplcore_px
|
||||
PRIVATE ${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}
|
||||
@ -339,6 +343,7 @@ target_compile_definitions(_qpl
|
||||
|
||||
target_link_libraries(_qpl
|
||||
PRIVATE accel-config
|
||||
PRIVATE ch_contrib::isal
|
||||
PRIVATE ${CMAKE_DL_LIBS})
|
||||
|
||||
add_library (ch_contrib::qpl ALIAS _qpl)
|
||||
|
@ -1,6 +1,6 @@
|
||||
## The bare minimum ClickHouse Docker image.
|
||||
|
||||
It is intented as a showcase to check the amount of implicit dependencies of ClickHouse from the OS in addition to the OS kernel.
|
||||
It is intended as a showcase to check the amount of implicit dependencies of ClickHouse from the OS in addition to the OS kernel.
|
||||
|
||||
Example usage:
|
||||
|
||||
|
@ -59,12 +59,16 @@ def process_test_log(log_path, broken_tests):
|
||||
|
||||
total += 1
|
||||
if TIMEOUT_SIGN in line:
|
||||
failed += 1
|
||||
test_results.append((test_name, "Timeout", test_time, []))
|
||||
if test_name in broken_tests:
|
||||
success += 1
|
||||
test_results.append((test_name, "BROKEN", test_time, []))
|
||||
else:
|
||||
failed += 1
|
||||
test_results.append((test_name, "Timeout", test_time, []))
|
||||
elif FAIL_SIGN in line:
|
||||
if test_name in broken_tests:
|
||||
success += 1
|
||||
test_results.append((test_name, "OK", test_time, []))
|
||||
test_results.append((test_name, "BROKEN", test_time, []))
|
||||
else:
|
||||
failed += 1
|
||||
test_results.append((test_name, "FAIL", test_time, []))
|
||||
@ -76,11 +80,11 @@ def process_test_log(log_path, broken_tests):
|
||||
test_results.append((test_name, "SKIPPED", test_time, []))
|
||||
else:
|
||||
if OK_SIGN in line and test_name in broken_tests:
|
||||
failed += 1
|
||||
skipped += 1
|
||||
test_results.append(
|
||||
(
|
||||
test_name,
|
||||
"SKIPPED",
|
||||
"NOT_FAILED",
|
||||
test_time,
|
||||
["This test passed. Update broken_tests.txt.\n"],
|
||||
)
|
||||
|
@ -98,7 +98,7 @@ A hand-written recursive descent parser parses a query. For example, `ParserSele
|
||||
|
||||
## Interpreters {#interpreters}
|
||||
|
||||
Interpreters are responsible for creating the query execution pipeline from an `AST`. There are simple interpreters, such as `InterpreterExistsQuery` and `InterpreterDropQuery`, or the more sophisticated `InterpreterSelectQuery`. The query execution pipeline is a combination of block input or output streams. For example, the result of interpreting the `SELECT` query is the `IBlockInputStream` to read the result set from; the result of the INSERT query is the `IBlockOutputStream` to write data for insertion to, and the result of interpreting the `INSERT SELECT` query is the `IBlockInputStream` that returns an empty result set on the first read, but that copies data from `SELECT` to `INSERT` at the same time.
|
||||
Interpreters are responsible for creating the query execution pipeline from an `AST`. There are simple interpreters, such as `InterpreterExistsQuery` and `InterpreterDropQuery`, or the more sophisticated `InterpreterSelectQuery`. The query execution pipeline is a combination of block input or output streams. For example, the result of interpreting the `SELECT` query is the `IBlockInputStream` to read the result set from; the result of the `INSERT` query is the `IBlockOutputStream` to write data for insertion to, and the result of interpreting the `INSERT SELECT` query is the `IBlockInputStream` that returns an empty result set on the first read, but that copies data from `SELECT` to `INSERT` at the same time.
|
||||
|
||||
`InterpreterSelectQuery` uses `ExpressionAnalyzer` and `ExpressionActions` machinery for query analysis and transformations. This is where most rule-based query optimizations are done. `ExpressionAnalyzer` is quite messy and should be rewritten: various query transformations and optimizations should be extracted to separate classes to allow modular transformations of query.
|
||||
|
||||
|
@ -184,6 +184,15 @@ sudo yum install -y yum-utils
|
||||
sudo yum-config-manager --add-repo https://packages.clickhouse.com/rpm/clickhouse.repo
|
||||
```
|
||||
|
||||
For systems with `zypper` package manager (openSUSE, SLES):
|
||||
|
||||
``` bash
|
||||
sudo zypper addrepo -r https://packages.clickhouse.com/rpm/clickhouse.repo -g
|
||||
sudo zypper --gpg-auto-import-keys refresh clickhouse-stable
|
||||
```
|
||||
|
||||
Later any `yum install` can be replaced by `zypper install`. To specify a particular version, add `-$VERSION` to the end of the package name, e.g. `clickhouse-client-22.2.2.22`.
|
||||
|
||||
#### Install ClickHouse server and client
|
||||
|
||||
```bash
|
||||
|
@ -23,6 +23,6 @@ Additional cache types:
|
||||
- [Dictionaries](../sql-reference/dictionaries/index.md) data cache.
|
||||
- Schema inference cache.
|
||||
- [Filesystem cache](storing-data.md) over S3, Azure, Local and other disks.
|
||||
- [(Experimental) Query cache](query-cache.md).
|
||||
- [Query cache](query-cache.md).
|
||||
|
||||
To drop one of the caches, use [SYSTEM DROP ... CACHE](../sql-reference/statements/system.md#drop-mark-cache) statements.
|
||||
|
@ -1,10 +1,10 @@
|
||||
---
|
||||
slug: /en/operations/query-cache
|
||||
sidebar_position: 65
|
||||
sidebar_label: Query Cache [experimental]
|
||||
sidebar_label: Query Cache
|
||||
---
|
||||
|
||||
# Query Cache [experimental]
|
||||
# Query Cache
|
||||
|
||||
The query cache allows to compute `SELECT` queries just once and to serve further executions of the same query directly from the cache.
|
||||
Depending on the type of the queries, this can dramatically reduce latency and resource consumption of the ClickHouse server.
|
||||
@ -29,21 +29,10 @@ Transactionally inconsistent caching is traditionally provided by client tools o
|
||||
the same caching logic and configuration is often duplicated. With ClickHouse's query cache, the caching logic moves to the server side.
|
||||
This reduces maintenance effort and avoids redundancy.
|
||||
|
||||
:::note
|
||||
The query cache is an experimental feature that should not be used in production. There are known cases (e.g. in distributed query
|
||||
processing) where wrong results are returned.
|
||||
:::
|
||||
|
||||
## Configuration Settings and Usage
|
||||
|
||||
As long as the result cache is experimental it must be activated using the following configuration setting:
|
||||
|
||||
```sql
|
||||
SET allow_experimental_query_cache = true;
|
||||
```
|
||||
|
||||
Afterwards, setting [use_query_cache](settings/settings.md#use-query-cache) can be used to control whether a specific query or all queries
|
||||
of the current session should utilize the query cache. For example, the first execution of query
|
||||
Setting [use_query_cache](settings/settings.md#use-query-cache) can be used to control whether a specific query or all queries of the
|
||||
current session should utilize the query cache. For example, the first execution of query
|
||||
|
||||
```sql
|
||||
SELECT some_expensive_calculation(column_1, column_2)
|
||||
|
@ -1514,7 +1514,7 @@ Default value: `0`.
|
||||
|
||||
## query_cache_max_size_in_bytes {#query-cache-max-size-in-bytes}
|
||||
|
||||
The maximum amount of memory (in bytes) the current user may allocate in the query cache. 0 means unlimited.
|
||||
The maximum amount of memory (in bytes) the current user may allocate in the [query cache](../query-cache.md). 0 means unlimited.
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -1524,7 +1524,7 @@ Default value: 0 (no restriction).
|
||||
|
||||
## query_cache_max_entries {#query-cache-max-entries}
|
||||
|
||||
The maximum number of query results the current user may store in the query cache. 0 means unlimited.
|
||||
The maximum number of query results the current user may store in the [query cache](../query-cache.md). 0 means unlimited.
|
||||
|
||||
Possible values:
|
||||
|
||||
|
@ -13,11 +13,11 @@ groupBitAnd(expr)
|
||||
|
||||
**Arguments**
|
||||
|
||||
`expr` – An expression that results in `UInt*` type.
|
||||
`expr` – An expression that results in `UInt*` or `Int*` type.
|
||||
|
||||
**Return value**
|
||||
|
||||
Value of the `UInt*` type.
|
||||
Value of the `UInt*` or `Int*` type.
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -13,11 +13,11 @@ groupBitOr(expr)
|
||||
|
||||
**Arguments**
|
||||
|
||||
`expr` – An expression that results in `UInt*` type.
|
||||
`expr` – An expression that results in `UInt*` or `Int*` type.
|
||||
|
||||
**Returned value**
|
||||
|
||||
Value of the `UInt*` type.
|
||||
Value of the `UInt*` or `Int*` type.
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -13,11 +13,11 @@ groupBitXor(expr)
|
||||
|
||||
**Arguments**
|
||||
|
||||
`expr` – An expression that results in `UInt*` type.
|
||||
`expr` – An expression that results in `UInt*` or `Int*` type.
|
||||
|
||||
**Return value**
|
||||
|
||||
Value of the `UInt*` type.
|
||||
Value of the `UInt*` or `Int*` type.
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -78,6 +78,22 @@ GROUP BY
|
||||
│ 1 │ Bobruisk │ Firefox │
|
||||
└─────────────┴──────────┴─────────┘
|
||||
```
|
||||
### Important note!
|
||||
Using multiple `arrayJoin` with same expression may not produce expected results due to optimizations.
|
||||
For that cases, consider modifying repeated array expression with extra operations that do not affect join result - e.g. `arrayJoin(arraySort(arr))`, `arrayJoin(arrayConcat(arr, []))`
|
||||
|
||||
Example:
|
||||
```sql
|
||||
SELECT
|
||||
arrayJoin(dice) as first_throw,
|
||||
/* arrayJoin(dice) as second_throw */ -- is technically correct, but will annihilate result set
|
||||
arrayJoin(arrayConcat(dice, [])) as second_throw -- intentionally changed expression to force re-evaluation
|
||||
FROM (
|
||||
SELECT [1, 2, 3, 4, 5, 6] as dice
|
||||
);
|
||||
```
|
||||
|
||||
|
||||
|
||||
Note the [ARRAY JOIN](../statements/select/array-join.md) syntax in the SELECT query, which provides broader possibilities.
|
||||
`ARRAY JOIN` allows you to convert multiple arrays with the same number of elements at a time.
|
||||
|
@ -26,19 +26,27 @@ SELECT
|
||||
|
||||
## makeDate
|
||||
|
||||
Creates a [Date](../../sql-reference/data-types/date.md) from a year, month and day argument.
|
||||
Creates a [Date](../../sql-reference/data-types/date.md)
|
||||
- from a year, month and day argument, or
|
||||
- from a year and day of year argument.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
makeDate(year, month, day)
|
||||
makeDate(year, month, day);
|
||||
makeDate(year, day_of_year);
|
||||
```
|
||||
|
||||
Alias:
|
||||
- `MAKEDATE(year, month, day);`
|
||||
- `MAKEDATE(year, day_of_year);`
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `year` — Year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `month` — Month. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `day_of_year` — Day of the year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -48,6 +56,8 @@ Type: [Date](../../sql-reference/data-types/date.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Create a Date from a year, month and day:
|
||||
|
||||
``` sql
|
||||
SELECT makeDate(2023, 2, 28) AS Date;
|
||||
```
|
||||
@ -60,6 +70,19 @@ Result:
|
||||
└────────────┘
|
||||
```
|
||||
|
||||
Create a Date from a year and day of year argument:
|
||||
|
||||
``` sql
|
||||
SELECT makeDate(2023, 42) AS Date;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌───────date─┐
|
||||
│ 2023-02-11 │
|
||||
└────────────┘
|
||||
```
|
||||
## makeDate32
|
||||
|
||||
Like [makeDate](#makeDate) but produces a [Date32](../../sql-reference/data-types/date32.md).
|
||||
@ -108,6 +131,12 @@ Result:
|
||||
|
||||
Like [makeDateTime](#makedatetime) but produces a [DateTime64](../../sql-reference/data-types/datetime64.md).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
makeDateTime32(year, month, day, hour, minute, second[, fraction[, precision[, timezone]]])
|
||||
```
|
||||
|
||||
## timeZone
|
||||
|
||||
Returns the timezone of the server.
|
||||
|
@ -1215,96 +1215,3 @@ Result:
|
||||
│ A240 │
|
||||
└──────────────────┘
|
||||
```
|
||||
|
||||
## extractKeyValuePairs
|
||||
|
||||
Extracts key-value pairs from any string. The string does not need to be 100% structured in a key value pair format;
|
||||
|
||||
It can contain noise (e.g. log files). The key-value pair format to be interpreted should be specified via function arguments.
|
||||
|
||||
A key-value pair consists of a key followed by a `key_value_delimiter` and a value. Quoted keys and values are also supported. Key value pairs must be separated by pair delimiters.
|
||||
|
||||
**Syntax**
|
||||
``` sql
|
||||
extractKeyValuePairs(data, [key_value_delimiter], [pair_delimiter], [quoting_character])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
- `data` - String to extract key-value pairs from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
|
||||
- `key_value_delimiter` - Character to be used as delimiter between the key and the value. Defaults to `:`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
|
||||
- `pair_delimiters` - Set of character to be used as delimiters between pairs. Defaults to `\space`, `,` and `;`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
|
||||
- `quoting_character` - Character to be used as quoting character. Defaults to `"`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
|
||||
|
||||
**Returned values**
|
||||
- The extracted key-value pairs in a Map(String, String).
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
**Simple case**
|
||||
``` sql
|
||||
arthur :) select extractKeyValuePairs('name:neymar, age:31 team:psg,nationality:brazil') as kv
|
||||
|
||||
SELECT extractKeyValuePairs('name:neymar, age:31 team:psg,nationality:brazil') as kv
|
||||
|
||||
Query id: f9e0ca6f-3178-4ee2-aa2c-a5517abb9cee
|
||||
|
||||
┌─kv──────────────────────────────────────────────────────────────────────┐
|
||||
│ {'name':'neymar','age':'31','team':'psg','nationality':'brazil'} │
|
||||
└─────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**Single quote as quoting character**
|
||||
``` sql
|
||||
arthur :) select extractKeyValuePairs('name:\'neymar\';\'age\':31;team:psg;nationality:brazil,last_key:last_value', ':', ';,', '\'') as kv
|
||||
|
||||
SELECT extractKeyValuePairs('name:\'neymar\';\'age\':31;team:psg;nationality:brazil,last_key:last_value', ':', ';,', '\'') as kv
|
||||
|
||||
Query id: 0e22bf6b-9844-414a-99dc-32bf647abd5e
|
||||
|
||||
┌─kv───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ {'name':'neymar','age':'31','team':'psg','nationality':'brazil','last_key':'last_value'} │
|
||||
└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**Escape sequences without escape sequences support**
|
||||
``` sql
|
||||
arthur :) select extractKeyValuePairs('age:a\\x0A\\n\\0') as kv
|
||||
|
||||
SELECT extractKeyValuePairs('age:a\\x0A\\n\\0') AS kv
|
||||
|
||||
Query id: e9fd26ee-b41f-4a11-b17f-25af6fd5d356
|
||||
|
||||
┌─kv─────────────────────┐
|
||||
│ {'age':'a\\x0A\\n\\0'} │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
||||
## extractKeyValuePairsWithEscaping
|
||||
|
||||
Same as `extractKeyValuePairs` but with escaping support.
|
||||
|
||||
Escape sequences supported: `\x`, `\N`, `\a`, `\b`, `\e`, `\f`, `\n`, `\r`, `\t`, `\v` and `\0`.
|
||||
Non standard escape sequences are returned as it is (including the backslash) unless they are one of the following:
|
||||
`\\`, `'`, `"`, `backtick`, `/`, `=` or ASCII control characters (c <= 31).
|
||||
|
||||
This function will satisfy the use case where pre-escaping and post-escaping are not suitable. For instance, consider the following
|
||||
input string: `a: "aaaa\"bbb"`. The expected output is: `a: aaaa\"bbbb`.
|
||||
- Pre-escaping: Pre-escaping it will output: `a: "aaaa"bbb"` and `extractKeyValuePairs` will then output: `a: aaaa`
|
||||
- Post-escaping: `extractKeyValuePairs` will output `a: aaaa\` and post-escaping will keep it as it is.
|
||||
|
||||
Leading escape sequences will be skipped in keys and will be considered invalid for values.
|
||||
|
||||
**Escape sequences with escape sequence support turned on**
|
||||
``` sql
|
||||
arthur :) select extractKeyValuePairsWithEscaping('age:a\\x0A\\n\\0') as kv
|
||||
|
||||
SELECT extractKeyValuePairsWithEscaping('age:a\\x0A\\n\\0') AS kv
|
||||
|
||||
Query id: 44c114f0-5658-4c75-ab87-4574de3a1645
|
||||
|
||||
┌─kv────────────────┐
|
||||
│ {'age':'a\n\n\0'} │
|
||||
└───────────────────┘
|
||||
```
|
||||
|
@ -109,6 +109,108 @@ SELECT mapFromArrays([1, 2, 3], map('a', 1, 'b', 2, 'c', 3))
|
||||
└───────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## extractKeyValuePairs
|
||||
|
||||
Extracts key-value pairs, i.e. a [Map(String, String)](../../sql-reference/data-types/map.md), from a string. Parsing is robust towards noise (e.g. log files).
|
||||
|
||||
A key-value pair consists of a key, followed by a `key_value_delimiter` and a value. Key value pairs must be separated by `pair_delimiter`. Quoted keys and values are also supported.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
extractKeyValuePairs(data[, key_value_delimiter[, pair_delimiter[, quoting_character]]])
|
||||
```
|
||||
|
||||
Alias:
|
||||
- `str_to_map`
|
||||
- `mapFromString`
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `data` - String to extract key-value pairs from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
|
||||
- `key_value_delimiter` - Character to be used as delimiter between the key and the value. Defaults to `:`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
|
||||
- `pair_delimiters` - Set of character to be used as delimiters between pairs. Defaults to ` `, `,` and `;`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
|
||||
- `quoting_character` - Character to be used as quoting character. Defaults to `"`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
|
||||
|
||||
**Returned values**
|
||||
|
||||
- A [Map(String, String)](../../sql-reference/data-types/map.md) of key-value pairs.
|
||||
|
||||
**Examples**
|
||||
|
||||
Simple case:
|
||||
|
||||
``` sql
|
||||
SELECT extractKeyValuePairs('name:neymar, age:31 team:psg,nationality:brazil') as kv
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` Result:
|
||||
┌─kv──────────────────────────────────────────────────────────────────────┐
|
||||
│ {'name':'neymar','age':'31','team':'psg','nationality':'brazil'} │
|
||||
└─────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Single quote as quoting character:
|
||||
|
||||
``` sql
|
||||
SELECT extractKeyValuePairs('name:\'neymar\';\'age\':31;team:psg;nationality:brazil,last_key:last_value', ':', ';,', '\'') as kv
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─kv───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ {'name':'neymar','age':'31','team':'psg','nationality':'brazil','last_key':'last_value'} │
|
||||
└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Escape sequences without escape sequences support:
|
||||
|
||||
``` sql
|
||||
SELECT extractKeyValuePairs('age:a\\x0A\\n\\0') AS kv
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─kv─────────────────────┐
|
||||
│ {'age':'a\\x0A\\n\\0'} │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
||||
## extractKeyValuePairsWithEscaping
|
||||
|
||||
Same as `extractKeyValuePairs` but with escaping support.
|
||||
|
||||
Supported escape sequences: `\x`, `\N`, `\a`, `\b`, `\e`, `\f`, `\n`, `\r`, `\t`, `\v` and `\0`.
|
||||
Non standard escape sequences are returned as it is (including the backslash) unless they are one of the following:
|
||||
`\\`, `'`, `"`, `backtick`, `/`, `=` or ASCII control characters (c <= 31).
|
||||
|
||||
This function will satisfy the use case where pre-escaping and post-escaping are not suitable. For instance, consider the following
|
||||
input string: `a: "aaaa\"bbb"`. The expected output is: `a: aaaa\"bbbb`.
|
||||
- Pre-escaping: Pre-escaping it will output: `a: "aaaa"bbb"` and `extractKeyValuePairs` will then output: `a: aaaa`
|
||||
- Post-escaping: `extractKeyValuePairs` will output `a: aaaa\` and post-escaping will keep it as it is.
|
||||
|
||||
Leading escape sequences will be skipped in keys and will be considered invalid for values.
|
||||
|
||||
**Examples**
|
||||
|
||||
Escape sequences with escape sequence support turned on:
|
||||
|
||||
``` sql
|
||||
SELECT extractKeyValuePairsWithEscaping('age:a\\x0A\\n\\0') AS kv
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` result
|
||||
┌─kv────────────────┐
|
||||
│ {'age':'a\n\n\0'} │
|
||||
└───────────────────┘
|
||||
```
|
||||
|
||||
## mapAdd
|
||||
|
||||
Collect all the keys and sum corresponding values.
|
||||
|
@ -105,22 +105,6 @@ Result:
|
||||
|
||||
- [CREATE DATABASE](https://clickhouse.com/docs/en/sql-reference/statements/create/database/#query-language-create-database)
|
||||
|
||||
## SHOW PROCESSLIST
|
||||
|
||||
``` sql
|
||||
SHOW PROCESSLIST [INTO OUTFILE filename] [FORMAT format]
|
||||
```
|
||||
|
||||
Outputs the content of the [system.processes](../../operations/system-tables/processes.md#system_tables-processes) table, that contains a list of queries that is being processed at the moment, excepting `SHOW PROCESSLIST` queries.
|
||||
|
||||
The `SELECT * FROM system.processes` query returns data about all the current queries.
|
||||
|
||||
Tip (execute in the console):
|
||||
|
||||
``` bash
|
||||
$ watch -n1 "clickhouse-client --query='SHOW PROCESSLIST'"
|
||||
```
|
||||
|
||||
## SHOW TABLES
|
||||
|
||||
Displays a list of tables.
|
||||
@ -284,6 +268,77 @@ SHOW DICTIONARIES FROM db LIKE '%reg%' LIMIT 2
|
||||
└──────────────┘
|
||||
```
|
||||
|
||||
## SHOW INDEX
|
||||
|
||||
Displays a list of primary and data skipping indexes of a table.
|
||||
|
||||
```sql
|
||||
SHOW [EXTENDED] {INDEX | INDEXES | KEYS } {FROM | IN} <table> [{FROM | IN} <db>] [WHERE <expr>] [INTO OUTFILE <filename>] [FORMAT <format>]
|
||||
```
|
||||
|
||||
The database and table name can be specified in abbreviated form as `<db>.<table>`, i.e. `FROM tab FROM db` and `FROM db.tab` are
|
||||
equivalent. If no database is specified, the query assumes the current database as database.
|
||||
|
||||
The optional keyword `EXTENDED` currently has no effect, it only exists for MySQL compatibility.
|
||||
|
||||
`SHOW INDEX` produces a result table with the following structure:
|
||||
- table - The name of the table (String)
|
||||
- non_unique - 0 if the index can contain duplicates, 1 otherwise (UInt8)
|
||||
- key_name - The name of the index, `PRIMARY` if the index is a primary key index (String)
|
||||
- seq_in_index - Currently unused
|
||||
- column_name - Currently unused
|
||||
- collation - The sorting of the column in the index, `A` if ascending, `D` if descending, `NULL` if unsorted (Nullable(String))
|
||||
- cardinality - Currently unused
|
||||
- sub_part - Currently unused
|
||||
- packed - Currently unused
|
||||
- null - Currently unused
|
||||
- index_type - The index type, e.g. `primary`, `minmax`, `bloom_filter` etc. (String)
|
||||
- comment - Currently unused
|
||||
- index_comment - Currently unused
|
||||
- visible - If the index is visible to the optimizer, always `YES` (String)
|
||||
- expression - The index expression (String)
|
||||
|
||||
**Examples**
|
||||
|
||||
Getting information about all indexes in table 'tbl'
|
||||
|
||||
```sql
|
||||
SHOW INDEX FROM 'tbl'
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─table─┬─non_unique─┬─key_name─┬─seq_in_index─┬─column_name─┬─collation─┬─cardinality─┬─sub_part─┬─packed─┬─null─┬─index_type───┬─comment─┬─index_comment─┬─visible─┬─expression─┐
|
||||
│ tbl │ 0 │ blf_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ bloom_filter │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ d, b │
|
||||
│ tbl │ 0 │ mm1_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ minmax │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ a, c, d │
|
||||
│ tbl │ 0 │ mm2_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ minmax │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ c, d, e │
|
||||
│ tbl │ 0 │ PRIMARY │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ A │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ primary │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ c, a │
|
||||
│ tbl │ 0 │ set_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ set │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ e │
|
||||
└───────┴────────────┴──────────┴──────────────┴─────────────┴───────────┴─────────────┴──────────┴────────┴──────┴──────────────┴─────────┴───────────────┴─────────┴────────────┘
|
||||
```
|
||||
|
||||
**See also**
|
||||
|
||||
- [system.tables](../../operations/system-tables/tables.md)
|
||||
- [system.data_skipping_indices](../../operations/system-tables/data_skipping_indices.md)
|
||||
|
||||
## SHOW PROCESSLIST
|
||||
|
||||
``` sql
|
||||
SHOW PROCESSLIST [INTO OUTFILE filename] [FORMAT format]
|
||||
```
|
||||
|
||||
Outputs the content of the [system.processes](../../operations/system-tables/processes.md#system_tables-processes) table, that contains a list of queries that is being processed at the moment, excepting `SHOW PROCESSLIST` queries.
|
||||
|
||||
The `SELECT * FROM system.processes` query returns data about all the current queries.
|
||||
|
||||
Tip (execute in the console):
|
||||
|
||||
``` bash
|
||||
$ watch -n1 "clickhouse-client --query='SHOW PROCESSLIST'"
|
||||
```
|
||||
|
||||
## SHOW GRANTS
|
||||
|
||||
Shows privileges for a user.
|
||||
|
184
docs/en/sql-reference/table-functions/gcs.md
Normal file
184
docs/en/sql-reference/table-functions/gcs.md
Normal file
@ -0,0 +1,184 @@
|
||||
---
|
||||
slug: /en/sql-reference/table-functions/gcs
|
||||
sidebar_position: 45
|
||||
sidebar_label: s3
|
||||
keywords: [gcs, bucket]
|
||||
---
|
||||
|
||||
# gcs Table Function
|
||||
|
||||
Provides a table-like interface to select/insert files in [Google Cloud Storage](https://cloud.google.com/storage/).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
gcs(path [,hmac_key, hmac_secret] [,format] [,structure] [,compression])
|
||||
```
|
||||
|
||||
:::tip GCS
|
||||
The GCS Table Function integrates with Google Cloud Storage by using the GCS XML API and HMAC keys. See the [Google interoperability docs]( https://cloud.google.com/storage/docs/interoperability) for more details about the endpoint and HMAC.
|
||||
|
||||
:::
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings.
|
||||
|
||||
:::note GCS
|
||||
The GCS path is in this format as the endpoint for the Google XML API is different than the JSON API:
|
||||
```
|
||||
https://storage.googleapis.com/<bucket>/<folder>/<filename(s)>
|
||||
```
|
||||
and not ~~https://storage.cloud.google.com~~.
|
||||
:::
|
||||
|
||||
- `format` — The [format](../../interfaces/formats.md#formats) of the file.
|
||||
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
|
||||
- `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension.
|
||||
|
||||
**Returned value**
|
||||
|
||||
A table with the specified structure for reading or writing data in the specified file.
|
||||
|
||||
**Examples**
|
||||
|
||||
Selecting the first two rows from the table from GCS file `https://storage.googleapis.com/my-test-bucket-768/data.csv`:
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM gcs('https://storage.googleapis.com/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32')
|
||||
LIMIT 2;
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─column1─┬─column2─┬─column3─┐
|
||||
│ 1 │ 2 │ 3 │
|
||||
│ 3 │ 2 │ 1 │
|
||||
└─────────┴─────────┴─────────┘
|
||||
```
|
||||
|
||||
The similar but from file with `gzip` compression:
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM gcs('https://storage.googleapis.com/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip')
|
||||
LIMIT 2;
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─column1─┬─column2─┬─column3─┐
|
||||
│ 1 │ 2 │ 3 │
|
||||
│ 3 │ 2 │ 1 │
|
||||
└─────────┴─────────┴─────────┘
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
Suppose that we have several files with following URIs on GCS:
|
||||
|
||||
- 'https://storage.googleapis.com/my-test-bucket-768/some_prefix/some_file_1.csv'
|
||||
- 'https://storage.googleapis.com/my-test-bucket-768/some_prefix/some_file_2.csv'
|
||||
- 'https://storage.googleapis.com/my-test-bucket-768/some_prefix/some_file_3.csv'
|
||||
- 'https://storage.googleapis.com/my-test-bucket-768/some_prefix/some_file_4.csv'
|
||||
- 'https://storage.googleapis.com/my-test-bucket-768/another_prefix/some_file_1.csv'
|
||||
- 'https://storage.googleapis.com/my-test-bucket-768/another_prefix/some_file_2.csv'
|
||||
- 'https://storage.googleapis.com/my-test-bucket-768/another_prefix/some_file_3.csv'
|
||||
- 'https://storage.googleapis.com/my-test-bucket-768/another_prefix/some_file_4.csv'
|
||||
|
||||
Count the amount of rows in files ending with numbers from 1 to 3:
|
||||
|
||||
``` sql
|
||||
SELECT count(*)
|
||||
FROM gcs('https://storage.googleapis.com/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32')
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─count()─┐
|
||||
│ 18 │
|
||||
└─────────┘
|
||||
```
|
||||
|
||||
Count the total amount of rows in all files in these two directories:
|
||||
|
||||
``` sql
|
||||
SELECT count(*)
|
||||
FROM gcs('https://storage.googleapis.com/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32')
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─count()─┐
|
||||
│ 24 │
|
||||
└─────────┘
|
||||
```
|
||||
|
||||
:::warning
|
||||
If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
|
||||
:::
|
||||
|
||||
Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
|
||||
|
||||
``` sql
|
||||
SELECT count(*)
|
||||
FROM gcs('https://storage.googleapis.com/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32');
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─count()─┐
|
||||
│ 12 │
|
||||
└─────────┘
|
||||
```
|
||||
|
||||
Insert data into file `test-data.csv.gz`:
|
||||
|
||||
``` sql
|
||||
INSERT INTO FUNCTION gcs('https://storage.googleapis.com/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip')
|
||||
VALUES ('test-data', 1), ('test-data-2', 2);
|
||||
```
|
||||
|
||||
Insert data into file `test-data.csv.gz` from existing table:
|
||||
|
||||
``` sql
|
||||
INSERT INTO FUNCTION gcs('https://storage.googleapis.com/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip')
|
||||
SELECT name, value FROM existing_table;
|
||||
```
|
||||
|
||||
Glob ** can be used for recursive directory traversal. Consider the below example, it will fetch all files from `my-test-bucket-768` directory recursively:
|
||||
|
||||
``` sql
|
||||
SELECT * FROM gcs('https://storage.googleapis.com/my-test-bucket-768/**', 'CSV', 'name String, value UInt32', 'gzip');
|
||||
```
|
||||
|
||||
The below get data from all `test-data.csv.gz` files from any folder inside `my-test-bucket` directory recursively:
|
||||
|
||||
``` sql
|
||||
SELECT * FROM gcs('https://storage.googleapis.com/my-test-bucket-768/**/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip');
|
||||
```
|
||||
|
||||
## Partitioned Write
|
||||
|
||||
If you specify `PARTITION BY` expression when inserting data into `GCS` table, a separate file is created for each partition value. Splitting the data into separate files helps to improve reading operations efficiency.
|
||||
|
||||
**Examples**
|
||||
|
||||
1. Using partition ID in a key creates separate files:
|
||||
|
||||
```sql
|
||||
INSERT INTO TABLE FUNCTION
|
||||
gcs('http://bucket.amazonaws.com/my_bucket/file_{_partition_id}.csv', 'CSV', 'a String, b UInt32, c UInt32')
|
||||
PARTITION BY a VALUES ('x', 2, 3), ('x', 4, 5), ('y', 11, 12), ('y', 13, 14), ('z', 21, 22), ('z', 23, 24);
|
||||
```
|
||||
As a result, the data is written into three files: `file_x.csv`, `file_y.csv`, and `file_z.csv`.
|
||||
|
||||
2. Using partition ID in a bucket name creates files in different buckets:
|
||||
|
||||
```sql
|
||||
INSERT INTO TABLE FUNCTION
|
||||
gcs('http://bucket.amazonaws.com/my_bucket_{_partition_id}/file.csv', 'CSV', 'a UInt32, b UInt32, c UInt32')
|
||||
PARTITION BY a VALUES (1, 2, 3), (1, 4, 5), (10, 11, 12), (10, 13, 14), (20, 21, 22), (20, 23, 24);
|
||||
```
|
||||
As a result, the data is written into three files in different buckets: `my_bucket_1/file.csv`, `my_bucket_10/file.csv`, and `my_bucket_20/file.csv`.
|
||||
|
||||
**See Also**
|
||||
|
||||
- [S3 table function](s3.md)
|
||||
- [S3 engine](../../engines/table-engines/integrations/s3.md)
|
@ -77,15 +77,37 @@ clickhouse-client # or "clickhouse-client --password" if you set up a password.
|
||||
|
||||
Команда ClickHouse в Яндексе рекомендует использовать официальные предкомпилированные `rpm` пакеты для CentOS, RedHat и всех остальных дистрибутивов Linux, основанных на rpm.
|
||||
|
||||
#### Установка официального репозитория
|
||||
|
||||
Сначала нужно подключить официальный репозиторий:
|
||||
|
||||
``` bash
|
||||
sudo yum install -y yum-utils
|
||||
sudo yum-config-manager --add-repo https://packages.clickhouse.com/rpm/clickhouse.repo
|
||||
sudo yum install -y clickhouse-server clickhouse-client
|
||||
```
|
||||
|
||||
sudo /etc/init.d/clickhouse-server start
|
||||
clickhouse-client # or "clickhouse-client --password" if you set up a password.
|
||||
Для систем с пакетным менеджером `zypper` (openSUSE, SLES):
|
||||
|
||||
``` bash
|
||||
sudo zypper addrepo -r https://packages.clickhouse.com/rpm/clickhouse.repo -g
|
||||
sudo zypper --gpg-auto-import-keys refresh clickhouse-stable
|
||||
```
|
||||
|
||||
Далее любая команда `yum install` может быть заменена на `zypper install`. Чтобы указать желаемую версию, необходимо добавить `-$VERSION` в имени пакета, например `clickhouse-client-22.2.2.22`.
|
||||
|
||||
#### Установка сервера и клиента
|
||||
|
||||
``` bash
|
||||
sudo yum install -y clickhouse-server clickhouse-client
|
||||
```
|
||||
|
||||
#### Запуск сервера
|
||||
|
||||
``` bash
|
||||
sudo systemctl enable clickhouse-server
|
||||
sudo systemctl start clickhouse-server
|
||||
sudo systemctl status clickhouse-server
|
||||
clickhouse-client # илм "clickhouse-client --password" если установлен пароль
|
||||
```
|
||||
|
||||
<details markdown="1">
|
||||
|
@ -84,6 +84,17 @@ sudo /etc/init.d/clickhouse-server start
|
||||
clickhouse-client # or "clickhouse-client --password" if you set up a password.
|
||||
```
|
||||
|
||||
For systems with `zypper` package manager (openSUSE, SLES):
|
||||
|
||||
``` bash
|
||||
sudo zypper addrepo -r https://packages.clickhouse.com/rpm/clickhouse.repo -g
|
||||
sudo zypper --gpg-auto-import-keys refresh clickhouse-stable
|
||||
sudo zypper install -y clickhouse-server clickhouse-client
|
||||
|
||||
sudo /etc/init.d/clickhouse-server start
|
||||
clickhouse-client # or "clickhouse-client --password" if you set up a password.
|
||||
```
|
||||
|
||||
<details markdown="1">
|
||||
|
||||
<summary>Deprecated Method for installing rpm-packages</summary>
|
||||
|
@ -27,7 +27,6 @@
|
||||
#include <Common/ConcurrencyControl.h>
|
||||
#include <Common/Macros.h>
|
||||
#include <Common/ShellCommand.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/ZooKeeper/ZooKeeper.h>
|
||||
#include <Common/ZooKeeper/ZooKeeperNodeCache.h>
|
||||
#include <Common/getMultipleKeysFromConfig.h>
|
||||
@ -98,9 +97,7 @@
|
||||
#include "config_version.h"
|
||||
|
||||
#if defined(OS_LINUX)
|
||||
# include <cstddef>
|
||||
# include <cstdlib>
|
||||
# include <sys/socket.h>
|
||||
# include <sys/un.h>
|
||||
# include <sys/mman.h>
|
||||
# include <sys/ptrace.h>
|
||||
@ -108,7 +105,6 @@
|
||||
#endif
|
||||
|
||||
#if USE_SSL
|
||||
# include <Poco/Net/Context.h>
|
||||
# include <Poco/Net/SecureServerSocket.h>
|
||||
#endif
|
||||
|
||||
|
@ -517,11 +517,12 @@
|
||||
let previous_query = '';
|
||||
|
||||
const current_url = new URL(window.location);
|
||||
const opened_locally = location.protocol == 'file:';
|
||||
|
||||
const server_address = current_url.searchParams.get('url');
|
||||
if (server_address) {
|
||||
document.getElementById('url').value = server_address;
|
||||
} else if (location.protocol != 'file:') {
|
||||
} else if (!opened_locally) {
|
||||
/// Substitute the address of the server where the page is served.
|
||||
document.getElementById('url').value = location.origin;
|
||||
}
|
||||
@ -532,6 +533,19 @@
|
||||
document.getElementById('user').value = user_from_url;
|
||||
}
|
||||
|
||||
const pass_from_url = current_url.searchParams.get('password');
|
||||
if (pass_from_url) {
|
||||
document.getElementById('password').value = pass_from_url;
|
||||
/// Browsers don't allow manipulating history for the 'file:' protocol.
|
||||
if (!opened_locally) {
|
||||
let replaced_pass = current_url.searchParams;
|
||||
replaced_pass.delete('password');
|
||||
window.history.replaceState(null, '',
|
||||
window.location.origin + window.location.pathname + '?'
|
||||
+ replaced_pass.toString() + window.location.hash);
|
||||
}
|
||||
}
|
||||
|
||||
function postImpl(posted_request_num, query)
|
||||
{
|
||||
const user = document.getElementById('user').value;
|
||||
@ -548,7 +562,7 @@
|
||||
'&max_result_rows=1000&max_result_bytes=10000000&result_overflow_mode=break';
|
||||
|
||||
// If play.html is opened locally, append username and password to the URL parameter to avoid CORS issue.
|
||||
if (document.location.href.startsWith("file://")) {
|
||||
if (opened_locally) {
|
||||
url += '&user=' + encodeURIComponent(user) +
|
||||
'&password=' + encodeURIComponent(password)
|
||||
}
|
||||
@ -557,7 +571,7 @@
|
||||
|
||||
xhr.open('POST', url, true);
|
||||
// If play.html is open normally, use Basic auth to prevent username and password being exposed in URL parameters
|
||||
if (!document.location.href.startsWith("file://")) {
|
||||
if (!opened_locally) {
|
||||
xhr.setRequestHeader("Authorization", "Basic " + btoa(user+":"+password));
|
||||
}
|
||||
xhr.onreadystatechange = function()
|
||||
|
@ -11,3 +11,10 @@ libc = "0.2.132"
|
||||
[lib]
|
||||
crate-type = ["staticlib"]
|
||||
|
||||
[profile.release]
|
||||
debug = true
|
||||
|
||||
[profile.release-thinlto]
|
||||
inherits = "release"
|
||||
# BLAKE3 module requires "full" LTO (not "thin") to get additional 10% performance benefit
|
||||
lto = true
|
||||
|
@ -34,9 +34,18 @@ function(clickhouse_import_crate)
|
||||
else()
|
||||
set(CMAKE_CONFIGURATION_TYPES "${CMAKE_BUILD_TYPE};debug")
|
||||
endif()
|
||||
# NOTE: we may use LTO for rust too
|
||||
|
||||
corrosion_import_crate(NO_STD ${ARGN})
|
||||
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
|
||||
set(profile "")
|
||||
else()
|
||||
if (ENABLE_THINLTO)
|
||||
set(profile "release-thinlto")
|
||||
else()
|
||||
set(profile "release")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
corrosion_import_crate(NO_STD ${ARGN} PROFILE ${profile})
|
||||
endfunction()
|
||||
|
||||
# Add crate from the build directory.
|
||||
|
@ -18,3 +18,8 @@ crate-type = ["staticlib"]
|
||||
|
||||
[profile.release]
|
||||
debug = true
|
||||
|
||||
[profile.release-thinlto]
|
||||
inherits = "release"
|
||||
# We use LTO here as well to slightly decrease binary size
|
||||
lto = true
|
||||
|
@ -27,7 +27,7 @@ AggregateFunctionPtr createAggregateFunctionBitwise(const std::string & name, co
|
||||
"is illegal, because it cannot be used in bitwise operations",
|
||||
argument_types[0]->getName(), name);
|
||||
|
||||
AggregateFunctionPtr res(createWithUnsignedIntegerType<AggregateFunctionBitwise, Data>(*argument_types[0], argument_types[0]));
|
||||
AggregateFunctionPtr res(createWithIntegerType<AggregateFunctionBitwise, Data>(*argument_types[0], argument_types[0]));
|
||||
|
||||
if (!res)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
|
@ -1,6 +1,5 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <DataTypes/DataTypeAggregateFunction.h>
|
||||
|
||||
// TODO include this last because of a broken roaring header. See the comment inside.
|
||||
|
@ -13,6 +13,11 @@ namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int INCORRECT_DATA;
|
||||
}
|
||||
|
||||
|
||||
/** Aggregate function that takes arbitrary number of arbitrary arguments and does nothing.
|
||||
*/
|
||||
@ -69,7 +74,8 @@ public:
|
||||
{
|
||||
[[maybe_unused]] char symbol;
|
||||
readChar(symbol, buf);
|
||||
assert(symbol == '\0');
|
||||
if (symbol != '\0')
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect state of aggregate function 'nothing', it should contain exactly one zero byte, while it is {}.", static_cast<UInt32>(symbol));
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict, IColumn & to, Arena *) const override
|
||||
|
@ -100,6 +100,28 @@ static IAggregateFunction * createWithUnsignedIntegerType(const IDataType & argu
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <template <typename, typename> class AggregateFunctionTemplate, template <typename> class Data, typename... TArgs>
|
||||
static IAggregateFunction * createWithSignedIntegerType(const IDataType & argument_type, TArgs && ... args)
|
||||
{
|
||||
WhichDataType which(argument_type);
|
||||
if (which.idx == TypeIndex::Int8) return new AggregateFunctionTemplate<Int8, Data<Int8>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::Int16) return new AggregateFunctionTemplate<Int16, Data<Int16>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::Int32) return new AggregateFunctionTemplate<Int32, Data<Int32>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::Int64) return new AggregateFunctionTemplate<Int64, Data<Int64>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::Int128) return new AggregateFunctionTemplate<Int128, Data<Int128>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::Int256) return new AggregateFunctionTemplate<Int256, Data<Int256>>(std::forward<TArgs>(args)...);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <template <typename, typename> class AggregateFunctionTemplate, template <typename> class Data, typename... TArgs>
|
||||
static IAggregateFunction * createWithIntegerType(const IDataType & argument_type, TArgs && ... args)
|
||||
{
|
||||
IAggregateFunction * f = createWithUnsignedIntegerType<AggregateFunctionTemplate, Data>(argument_type, std::forward<TArgs>(args)...);
|
||||
if (f)
|
||||
return f;
|
||||
return createWithSignedIntegerType<AggregateFunctionTemplate, Data>(argument_type, std::forward<TArgs>(args)...);
|
||||
}
|
||||
|
||||
template <template <typename, typename> class AggregateFunctionTemplate, template <typename> class Data, typename... TArgs>
|
||||
static IAggregateFunction * createWithBasicNumberOrDateOrDateTime(const IDataType & argument_type, TArgs &&... args)
|
||||
{
|
||||
|
@ -335,7 +335,7 @@ public:
|
||||
if constexpr (std::endian::native == std::endian::little)
|
||||
hash_value = hash(x);
|
||||
else
|
||||
hash_value = __builtin_bswap32(hash(x));
|
||||
hash_value = std::byteswap(hash(x));
|
||||
if (!good(hash_value))
|
||||
return;
|
||||
|
||||
|
@ -544,7 +544,8 @@ if (TARGET ch_contrib::qpl)
|
||||
dbms_target_link_libraries(PUBLIC ch_contrib::qpl)
|
||||
endif ()
|
||||
|
||||
dbms_target_link_libraries(PRIVATE _boost_context)
|
||||
target_link_libraries(clickhouse_common_io PUBLIC boost::context)
|
||||
dbms_target_link_libraries(PUBLIC boost::context)
|
||||
|
||||
if (ENABLE_NLP)
|
||||
dbms_target_link_libraries (PUBLIC ch_contrib::stemmer)
|
||||
|
@ -60,13 +60,7 @@ bool ColumnFixedString::isDefaultAt(size_t index) const
|
||||
void ColumnFixedString::insert(const Field & x)
|
||||
{
|
||||
const String & s = x.get<const String &>();
|
||||
|
||||
if (s.size() > n)
|
||||
throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large string '{}' for FixedString column", s);
|
||||
|
||||
size_t old_size = chars.size();
|
||||
chars.resize_fill(old_size + n);
|
||||
memcpy(chars.data() + old_size, s.data(), s.size());
|
||||
insertData(s.data(), s.size());
|
||||
}
|
||||
|
||||
void ColumnFixedString::insertFrom(const IColumn & src_, size_t index)
|
||||
@ -87,8 +81,9 @@ void ColumnFixedString::insertData(const char * pos, size_t length)
|
||||
throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large string for FixedString column");
|
||||
|
||||
size_t old_size = chars.size();
|
||||
chars.resize_fill(old_size + n);
|
||||
chars.resize(old_size + n);
|
||||
memcpy(chars.data() + old_size, pos, length);
|
||||
memset(chars.data() + old_size + length, 0, n - length);
|
||||
}
|
||||
|
||||
StringRef ColumnFixedString::serializeValueIntoArena(size_t index, Arena & arena, char const *& begin) const
|
||||
@ -278,7 +273,7 @@ void ColumnFixedString::expand(const IColumn::Filter & mask, bool inverted)
|
||||
|
||||
ssize_t index = mask.size() - 1;
|
||||
ssize_t from = size() - 1;
|
||||
chars.resize_fill(mask.size() * n, 0);
|
||||
chars.resize_fill(mask.size() * n);
|
||||
while (index >= 0)
|
||||
{
|
||||
if (!!mask[index] ^ inverted)
|
||||
|
@ -485,13 +485,8 @@ void ColumnLowCardinality::setSharedDictionary(const ColumnPtr & column_unique)
|
||||
ColumnLowCardinality::MutablePtr ColumnLowCardinality::cutAndCompact(size_t start, size_t length) const
|
||||
{
|
||||
auto sub_positions = IColumn::mutate(idx.getPositions()->cut(start, length));
|
||||
/// Create column with new indexes and old dictionary.
|
||||
/// Dictionary is shared, but will be recreated after compactInplace call.
|
||||
auto column = ColumnLowCardinality::create(getDictionary().assumeMutable(), std::move(sub_positions));
|
||||
/// Will create new dictionary.
|
||||
column->compactInplace();
|
||||
|
||||
return column;
|
||||
auto new_column_unique = Dictionary::compact(dictionary.getColumnUnique(), sub_positions);
|
||||
return ColumnLowCardinality::create(std::move(new_column_unique), std::move(sub_positions));
|
||||
}
|
||||
|
||||
void ColumnLowCardinality::compactInplace()
|
||||
@ -589,7 +584,7 @@ size_t ColumnLowCardinality::Index::getSizeOfIndexType(const IColumn & column, s
|
||||
column.getName());
|
||||
}
|
||||
|
||||
void ColumnLowCardinality::Index::attachPositions(ColumnPtr positions_)
|
||||
void ColumnLowCardinality::Index::attachPositions(MutableColumnPtr positions_)
|
||||
{
|
||||
positions = std::move(positions_);
|
||||
updateSizeOfType();
|
||||
@ -820,21 +815,23 @@ void ColumnLowCardinality::Dictionary::setShared(const ColumnPtr & column_unique
|
||||
shared = true;
|
||||
}
|
||||
|
||||
void ColumnLowCardinality::Dictionary::compact(ColumnPtr & positions)
|
||||
void ColumnLowCardinality::Dictionary::compact(MutableColumnPtr & positions)
|
||||
{
|
||||
auto new_column_unique = column_unique->cloneEmpty();
|
||||
column_unique = compact(getColumnUnique(), positions);
|
||||
shared = false;
|
||||
}
|
||||
|
||||
auto & unique = getColumnUnique();
|
||||
MutableColumnPtr ColumnLowCardinality::Dictionary::compact(const IColumnUnique & unique, MutableColumnPtr & positions)
|
||||
{
|
||||
auto new_column_unique = unique.cloneEmpty();
|
||||
auto & new_unique = static_cast<IColumnUnique &>(*new_column_unique);
|
||||
|
||||
auto indexes = mapUniqueIndex(positions->assumeMutableRef());
|
||||
auto indexes = mapUniqueIndex(*positions);
|
||||
auto sub_keys = unique.getNestedColumn()->index(*indexes, 0);
|
||||
auto new_indexes = new_unique.uniqueInsertRangeFrom(*sub_keys, 0, sub_keys->size());
|
||||
|
||||
positions = IColumn::mutate(new_indexes->index(*positions, 0));
|
||||
column_unique = std::move(new_column_unique);
|
||||
|
||||
shared = false;
|
||||
return new_column_unique;
|
||||
}
|
||||
|
||||
ColumnPtr ColumnLowCardinality::cloneWithDefaultOnNull() const
|
||||
|
@ -160,7 +160,9 @@ public:
|
||||
|
||||
void reserve(size_t n) override { idx.reserve(n); }
|
||||
|
||||
size_t byteSize() const override { return idx.getPositions()->byteSize() + getDictionary().byteSize(); }
|
||||
/// Don't count the dictionary size as it can be shared between different blocks.
|
||||
size_t byteSize() const override { return idx.getPositions()->byteSize(); }
|
||||
|
||||
size_t byteSizeAt(size_t n) const override { return getDictionary().byteSizeAt(getIndexes().getUInt(n)); }
|
||||
size_t allocatedBytes() const override { return idx.getPositions()->allocatedBytes() + getDictionary().allocatedBytes(); }
|
||||
|
||||
@ -301,8 +303,8 @@ public:
|
||||
|
||||
void checkSizeOfType();
|
||||
|
||||
ColumnPtr detachPositions() { return std::move(positions); }
|
||||
void attachPositions(ColumnPtr positions_);
|
||||
MutableColumnPtr detachPositions() { return IColumn::mutate(std::move(positions)); }
|
||||
void attachPositions(MutableColumnPtr positions_);
|
||||
|
||||
void countKeys(ColumnUInt64::Container & counts) const;
|
||||
|
||||
@ -350,7 +352,9 @@ private:
|
||||
bool isShared() const { return shared; }
|
||||
|
||||
/// Create new dictionary with only keys that are mentioned in positions.
|
||||
void compact(ColumnPtr & positions);
|
||||
void compact(MutableColumnPtr & positions);
|
||||
|
||||
static MutableColumnPtr compact(const IColumnUnique & column_unique, MutableColumnPtr & positions);
|
||||
|
||||
private:
|
||||
WrappedPtr column_unique;
|
||||
|
@ -214,7 +214,7 @@ void ColumnNullable::insertFromNotNullable(const IColumn & src, size_t n)
|
||||
void ColumnNullable::insertRangeFromNotNullable(const IColumn & src, size_t start, size_t length)
|
||||
{
|
||||
getNestedColumn().insertRangeFrom(src, start, length);
|
||||
getNullMapData().resize_fill(getNullMapData().size() + length, 0);
|
||||
getNullMapData().resize_fill(getNullMapData().size() + length);
|
||||
}
|
||||
|
||||
void ColumnNullable::insertManyFromNotNullable(const IColumn & src, size_t position, size_t length)
|
||||
|
@ -176,7 +176,7 @@ void ColumnString::expand(const IColumn::Filter & mask, bool inverted)
|
||||
/// (if not, one of exceptions below will throw) and we can calculate the resulting chars size.
|
||||
UInt64 last_offset = offsets_data[from] + (mask.size() - offsets_data.size());
|
||||
offsets_data.resize(mask.size());
|
||||
chars_data.resize_fill(last_offset, 0);
|
||||
chars_data.resize_fill(last_offset);
|
||||
while (index >= 0)
|
||||
{
|
||||
offsets_data[index] = last_offset;
|
||||
|
@ -26,9 +26,7 @@ using namespace DB;
|
||||
template <typename T>
|
||||
void checkColumn(
|
||||
const WeakHash32::Container & hash,
|
||||
const PaddedPODArray<T> & eq_class,
|
||||
size_t allowed_collisions = 0,
|
||||
size_t max_collisions_to_print = 10)
|
||||
const PaddedPODArray<T> & eq_class)
|
||||
{
|
||||
ASSERT_EQ(hash.size(), eq_class.size());
|
||||
|
||||
@ -52,41 +50,6 @@ void checkColumn(
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Check have not many collisions.
|
||||
{
|
||||
std::unordered_map<UInt32, T> map;
|
||||
size_t num_collisions = 0;
|
||||
|
||||
std::stringstream collisions_str; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
|
||||
collisions_str.exceptions(std::ios::failbit);
|
||||
|
||||
for (size_t i = 0; i < eq_class.size(); ++i)
|
||||
{
|
||||
auto & val = eq_class[i];
|
||||
auto it = map.find(hash[i]);
|
||||
|
||||
if (it == map.end())
|
||||
map[hash[i]] = val;
|
||||
else if (it->second != val)
|
||||
{
|
||||
++num_collisions;
|
||||
|
||||
if (num_collisions <= max_collisions_to_print)
|
||||
{
|
||||
collisions_str << "Collision:\n";
|
||||
}
|
||||
|
||||
if (num_collisions > allowed_collisions)
|
||||
{
|
||||
std::cerr << collisions_str.rdbuf();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_LE(num_collisions, allowed_collisions);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(WeakHash32, ColumnVectorU8)
|
||||
@ -374,10 +337,7 @@ TEST(WeakHash32, ColumnString2)
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
|
||||
/// Now there is single collision between 'k' * 544 and 'q' * 2512 (which is calculated twice)
|
||||
size_t allowed_collisions = 4;
|
||||
|
||||
checkColumn(hash.getData(), data, allowed_collisions);
|
||||
checkColumn(hash.getData(), data);
|
||||
}
|
||||
|
||||
TEST(WeakHash32, ColumnString3)
|
||||
@ -717,8 +677,7 @@ TEST(WeakHash32, ColumnTupleUInt64String)
|
||||
WeakHash32 hash(col_tuple->size());
|
||||
col_tuple->updateWeakHash32(hash);
|
||||
|
||||
size_t allowed_collisions = 8;
|
||||
checkColumn(hash.getData(), eq, allowed_collisions);
|
||||
checkColumn(hash.getData(), eq);
|
||||
}
|
||||
|
||||
TEST(WeakHash32, ColumnTupleUInt64FixedString)
|
||||
@ -803,10 +762,5 @@ TEST(WeakHash32, ColumnTupleUInt64Array)
|
||||
WeakHash32 hash(col_tuple->size());
|
||||
col_tuple->updateWeakHash32(hash);
|
||||
|
||||
/// There are 2 collisions right now (repeated 2 times each):
|
||||
/// (0, [array of size 1212 with values 7]) vs (0, [array of size 2265 with values 17])
|
||||
/// (0, [array of size 558 with values 5]) vs (1, [array of size 879 with values 21])
|
||||
|
||||
size_t allowed_collisions = 8;
|
||||
checkColumn(hash.getData(), eq_data, allowed_collisions);
|
||||
checkColumn(hash.getData(), eq_data);
|
||||
}
|
||||
|
173
src/Common/ConcurrencyControl.cpp
Normal file
173
src/Common/ConcurrencyControl.cpp
Normal file
@ -0,0 +1,173 @@
|
||||
#include <Common/ConcurrencyControl.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
ConcurrencyControl::Slot::~Slot()
|
||||
{
|
||||
allocation->release();
|
||||
}
|
||||
|
||||
ConcurrencyControl::Slot::Slot(AllocationPtr && allocation_)
|
||||
: allocation(std::move(allocation_))
|
||||
{
|
||||
}
|
||||
|
||||
ConcurrencyControl::Allocation::~Allocation()
|
||||
{
|
||||
// We have to lock parent's mutex to avoid race with grant()
|
||||
// NOTE: shortcut can be added, but it requires Allocation::mutex lock even to check if shortcut is possible
|
||||
parent.free(this);
|
||||
}
|
||||
|
||||
[[nodiscard]] ConcurrencyControl::SlotPtr ConcurrencyControl::Allocation::tryAcquire()
|
||||
{
|
||||
SlotCount value = granted.load();
|
||||
while (value)
|
||||
{
|
||||
if (granted.compare_exchange_strong(value, value - 1))
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
return SlotPtr(new Slot(shared_from_this())); // can't use std::make_shared due to private ctor
|
||||
}
|
||||
}
|
||||
return {}; // avoid unnecessary locking
|
||||
}
|
||||
|
||||
ConcurrencyControl::SlotCount ConcurrencyControl::Allocation::grantedCount() const
|
||||
{
|
||||
return granted;
|
||||
}
|
||||
|
||||
ConcurrencyControl::Allocation::Allocation(ConcurrencyControl & parent_, SlotCount limit_, SlotCount granted_, Waiters::iterator waiter_)
|
||||
: parent(parent_)
|
||||
, limit(limit_)
|
||||
, allocated(granted_)
|
||||
, granted(granted_)
|
||||
, waiter(waiter_)
|
||||
{
|
||||
if (allocated < limit)
|
||||
*waiter = this;
|
||||
}
|
||||
|
||||
// Grant single slot to allocation, returns true iff more slot(s) are required
|
||||
bool ConcurrencyControl::Allocation::grant()
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
granted++;
|
||||
allocated++;
|
||||
return allocated < limit;
|
||||
}
|
||||
|
||||
// Release one slot and grant it to other allocation if required
|
||||
void ConcurrencyControl::Allocation::release()
|
||||
{
|
||||
parent.release(1);
|
||||
std::unique_lock lock{mutex};
|
||||
released++;
|
||||
if (released > allocated)
|
||||
abort();
|
||||
}
|
||||
|
||||
ConcurrencyControl::ConcurrencyControl()
|
||||
: cur_waiter(waiters.end())
|
||||
{
|
||||
}
|
||||
|
||||
ConcurrencyControl::~ConcurrencyControl()
|
||||
{
|
||||
if (!waiters.empty())
|
||||
abort();
|
||||
}
|
||||
|
||||
[[nodiscard]] ConcurrencyControl::AllocationPtr ConcurrencyControl::allocate(SlotCount min, SlotCount max)
|
||||
{
|
||||
if (min > max)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "ConcurrencyControl: invalid allocation requirements");
|
||||
|
||||
std::unique_lock lock{mutex};
|
||||
|
||||
// Acquire as many slots as we can, but not lower than `min`
|
||||
SlotCount granted = std::max(min, std::min(max, available(lock)));
|
||||
cur_concurrency += granted;
|
||||
|
||||
// Create allocation and start waiting if more slots are required
|
||||
if (granted < max)
|
||||
return AllocationPtr(new Allocation(*this, max, granted,
|
||||
waiters.insert(cur_waiter, nullptr /* pointer is set by Allocation ctor */)));
|
||||
else
|
||||
return AllocationPtr(new Allocation(*this, max, granted));
|
||||
}
|
||||
|
||||
void ConcurrencyControl::setMaxConcurrency(ConcurrencyControl::SlotCount value)
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
max_concurrency = std::max<SlotCount>(1, value); // never allow max_concurrency to be zero
|
||||
schedule(lock);
|
||||
}
|
||||
|
||||
ConcurrencyControl & ConcurrencyControl::instance()
|
||||
{
|
||||
static ConcurrencyControl result;
|
||||
return result;
|
||||
}
|
||||
|
||||
void ConcurrencyControl::free(Allocation * allocation)
|
||||
{
|
||||
// Allocation is allowed to be canceled even if there are:
|
||||
// - `amount`: granted slots (acquired slots are not possible, because Slot holds AllocationPtr)
|
||||
// - `waiter`: active waiting for more slots to be allocated
|
||||
// Thus Allocation destruction may require the following lock, to avoid race conditions
|
||||
std::unique_lock lock{mutex};
|
||||
auto [amount, waiter] = allocation->cancel();
|
||||
|
||||
cur_concurrency -= amount;
|
||||
if (waiter)
|
||||
{
|
||||
if (cur_waiter == *waiter)
|
||||
cur_waiter = waiters.erase(*waiter);
|
||||
else
|
||||
waiters.erase(*waiter);
|
||||
}
|
||||
schedule(lock);
|
||||
}
|
||||
|
||||
void ConcurrencyControl::release(SlotCount amount)
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
cur_concurrency -= amount;
|
||||
schedule(lock);
|
||||
}
|
||||
|
||||
// Round-robin scheduling of available slots among waiting allocations
|
||||
void ConcurrencyControl::schedule(std::unique_lock<std::mutex> &)
|
||||
{
|
||||
while (cur_concurrency < max_concurrency && !waiters.empty())
|
||||
{
|
||||
cur_concurrency++;
|
||||
if (cur_waiter == waiters.end())
|
||||
cur_waiter = waiters.begin();
|
||||
Allocation * allocation = *cur_waiter;
|
||||
if (allocation->grant())
|
||||
++cur_waiter;
|
||||
else
|
||||
cur_waiter = waiters.erase(cur_waiter); // last required slot has just been granted -- stop waiting
|
||||
}
|
||||
}
|
||||
|
||||
ConcurrencyControl::SlotCount ConcurrencyControl::available(std::unique_lock<std::mutex> &) const
|
||||
{
|
||||
if (cur_concurrency < max_concurrency)
|
||||
return max_concurrency - cur_concurrency;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
@ -5,17 +5,10 @@
|
||||
#include <mutex>
|
||||
#include <memory>
|
||||
#include <list>
|
||||
#include <condition_variable>
|
||||
|
||||
#include <Common/Exception.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Controls how many threads can be allocated for a query (or another activity).
|
||||
@ -53,17 +46,12 @@ public:
|
||||
// Scoped guard for acquired slot, see Allocation::tryAcquire()
|
||||
struct Slot : boost::noncopyable
|
||||
{
|
||||
~Slot()
|
||||
{
|
||||
allocation->release();
|
||||
}
|
||||
~Slot();
|
||||
|
||||
private:
|
||||
friend struct Allocation; // for ctor
|
||||
|
||||
explicit Slot(AllocationPtr && allocation_)
|
||||
: allocation(std::move(allocation_))
|
||||
{}
|
||||
explicit Slot(AllocationPtr && allocation_);
|
||||
|
||||
AllocationPtr allocation;
|
||||
};
|
||||
@ -74,47 +62,18 @@ public:
|
||||
// Manages group of slots for a single query, see ConcurrencyControl::allocate(min, max)
|
||||
struct Allocation : std::enable_shared_from_this<Allocation>, boost::noncopyable
|
||||
{
|
||||
~Allocation()
|
||||
{
|
||||
// We have to lock parent's mutex to avoid race with grant()
|
||||
// NOTE: shortcut can be added, but it requires Allocation::mutex lock even to check if shortcut is possible
|
||||
parent.free(this);
|
||||
}
|
||||
~Allocation();
|
||||
|
||||
// Take one already granted slot if available. Lock-free iff there is no granted slot.
|
||||
[[nodiscard]] SlotPtr tryAcquire()
|
||||
{
|
||||
SlotCount value = granted.load();
|
||||
while (value)
|
||||
{
|
||||
if (granted.compare_exchange_strong(value, value - 1))
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
return SlotPtr(new Slot(shared_from_this())); // can't use std::make_shared due to private ctor
|
||||
}
|
||||
}
|
||||
return {}; // avoid unnecessary locking
|
||||
}
|
||||
[[nodiscard]] SlotPtr tryAcquire();
|
||||
|
||||
SlotCount grantedCount() const
|
||||
{
|
||||
return granted;
|
||||
}
|
||||
SlotCount grantedCount() const;
|
||||
|
||||
private:
|
||||
friend struct Slot; // for release()
|
||||
friend class ConcurrencyControl; // for grant(), free() and ctor
|
||||
|
||||
Allocation(ConcurrencyControl & parent_, SlotCount limit_, SlotCount granted_, Waiters::iterator waiter_ = {})
|
||||
: parent(parent_)
|
||||
, limit(limit_)
|
||||
, allocated(granted_)
|
||||
, granted(granted_)
|
||||
, waiter(waiter_)
|
||||
{
|
||||
if (allocated < limit)
|
||||
*waiter = this;
|
||||
}
|
||||
Allocation(ConcurrencyControl & parent_, SlotCount limit_, SlotCount granted_, Waiters::iterator waiter_ = {});
|
||||
|
||||
auto cancel()
|
||||
{
|
||||
@ -126,23 +85,10 @@ public:
|
||||
}
|
||||
|
||||
// Grant single slot to allocation, returns true iff more slot(s) are required
|
||||
bool grant()
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
granted++;
|
||||
allocated++;
|
||||
return allocated < limit;
|
||||
}
|
||||
bool grant();
|
||||
|
||||
// Release one slot and grant it to other allocation if required
|
||||
void release()
|
||||
{
|
||||
parent.release(1);
|
||||
std::unique_lock lock{mutex};
|
||||
released++;
|
||||
if (released > allocated)
|
||||
abort();
|
||||
}
|
||||
void release();
|
||||
|
||||
ConcurrencyControl & parent;
|
||||
const SlotCount limit;
|
||||
@ -157,106 +103,32 @@ public:
|
||||
};
|
||||
|
||||
public:
|
||||
ConcurrencyControl()
|
||||
: cur_waiter(waiters.end())
|
||||
{}
|
||||
ConcurrencyControl();
|
||||
|
||||
// WARNING: all Allocation objects MUST be destructed before ConcurrencyControl
|
||||
// NOTE: Recommended way to achieve this is to use `instance()` and do graceful shutdown of queries
|
||||
~ConcurrencyControl()
|
||||
{
|
||||
if (!waiters.empty())
|
||||
abort();
|
||||
}
|
||||
~ConcurrencyControl();
|
||||
|
||||
// Allocate at least `min` and at most `max` slots.
|
||||
// If not all `max` slots were successfully allocated, a subscription for later allocation is created
|
||||
// Use `Allocation::tryAcquire()` to acquire allocated slot, before running a thread.
|
||||
[[nodiscard]] AllocationPtr allocate(SlotCount min, SlotCount max)
|
||||
{
|
||||
if (min > max)
|
||||
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "ConcurrencyControl: invalid allocation requirements");
|
||||
[[nodiscard]] AllocationPtr allocate(SlotCount min, SlotCount max);
|
||||
|
||||
std::unique_lock lock{mutex};
|
||||
void setMaxConcurrency(SlotCount value);
|
||||
|
||||
// Acquire as much slots as we can, but not lower than `min`
|
||||
SlotCount granted = std::max(min, std::min(max, available(lock)));
|
||||
cur_concurrency += granted;
|
||||
|
||||
// Create allocation and start waiting if more slots are required
|
||||
if (granted < max)
|
||||
return AllocationPtr(new Allocation(*this, max, granted,
|
||||
waiters.insert(cur_waiter, nullptr /* pointer is set by Allocation ctor */)));
|
||||
else
|
||||
return AllocationPtr(new Allocation(*this, max, granted));
|
||||
}
|
||||
|
||||
void setMaxConcurrency(SlotCount value)
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
max_concurrency = std::max<SlotCount>(1, value); // never allow max_concurrency to be zero
|
||||
schedule(lock);
|
||||
}
|
||||
|
||||
static ConcurrencyControl & instance()
|
||||
{
|
||||
static ConcurrencyControl result;
|
||||
return result;
|
||||
}
|
||||
static ConcurrencyControl & instance();
|
||||
|
||||
private:
|
||||
friend struct Allocation; // for free() and release()
|
||||
|
||||
void free(Allocation * allocation)
|
||||
{
|
||||
// Allocation is allowed to be canceled even if there are:
|
||||
// - `amount`: granted slots (acquired slots are not possible, because Slot holds AllocationPtr)
|
||||
// - `waiter`: active waiting for more slots to be allocated
|
||||
// Thus Allocation destruction may require the following lock, to avoid race conditions
|
||||
std::unique_lock lock{mutex};
|
||||
auto [amount, waiter] = allocation->cancel();
|
||||
void free(Allocation * allocation);
|
||||
|
||||
cur_concurrency -= amount;
|
||||
if (waiter)
|
||||
{
|
||||
if (cur_waiter == *waiter)
|
||||
cur_waiter = waiters.erase(*waiter);
|
||||
else
|
||||
waiters.erase(*waiter);
|
||||
}
|
||||
schedule(lock);
|
||||
}
|
||||
|
||||
void release(SlotCount amount)
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
cur_concurrency -= amount;
|
||||
schedule(lock);
|
||||
}
|
||||
void release(SlotCount amount);
|
||||
|
||||
// Round-robin scheduling of available slots among waiting allocations
|
||||
void schedule(std::unique_lock<std::mutex> &)
|
||||
{
|
||||
while (cur_concurrency < max_concurrency && !waiters.empty())
|
||||
{
|
||||
cur_concurrency++;
|
||||
if (cur_waiter == waiters.end())
|
||||
cur_waiter = waiters.begin();
|
||||
Allocation * allocation = *cur_waiter;
|
||||
if (allocation->grant())
|
||||
++cur_waiter;
|
||||
else
|
||||
cur_waiter = waiters.erase(cur_waiter); // last required slot has just been granted -- stop waiting
|
||||
}
|
||||
}
|
||||
void schedule(std::unique_lock<std::mutex> &);
|
||||
|
||||
SlotCount available(std::unique_lock<std::mutex> &) const
|
||||
{
|
||||
if (cur_concurrency < max_concurrency)
|
||||
return max_concurrency - cur_concurrency;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
SlotCount available(std::unique_lock<std::mutex> &) const;
|
||||
|
||||
std::mutex mutex;
|
||||
Waiters waiters;
|
||||
@ -264,3 +136,5 @@ private:
|
||||
SlotCount max_concurrency = Unlimited;
|
||||
SlotCount cur_concurrency = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -191,10 +191,8 @@
|
||||
\
|
||||
M(InsertedWideParts, "Number of parts inserted in Wide format.") \
|
||||
M(InsertedCompactParts, "Number of parts inserted in Compact format.") \
|
||||
M(InsertedInMemoryParts, "Number of parts inserted in InMemory format.") \
|
||||
M(MergedIntoWideParts, "Number of parts merged into Wide format.") \
|
||||
M(MergedIntoCompactParts, "Number of parts merged into Compact format.") \
|
||||
M(MergedIntoInMemoryParts, "Number of parts in merged into InMemory format.") \
|
||||
\
|
||||
M(MergeTreeDataProjectionWriterRows, "Number of rows INSERTed to MergeTree tables projection.") \
|
||||
M(MergeTreeDataProjectionWriterUncompressedBytes, "Uncompressed bytes (for columns as they stored in memory) INSERTed to MergeTree tables projection.") \
|
||||
|
@ -9,6 +9,10 @@
|
||||
#include <Common/MemorySanitizer.h>
|
||||
#include <Common/SymbolIndex.h>
|
||||
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/Operators.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <filesystem>
|
||||
#include <map>
|
||||
@ -340,8 +344,6 @@ toStringEveryLineImpl([[maybe_unused]] bool fatal, const StackTraceRefTriple & s
|
||||
return callback("<Empty trace>");
|
||||
|
||||
#if defined(__ELF__) && !defined(OS_FREEBSD)
|
||||
std::stringstream out; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
|
||||
out.exceptions(std::ios::failbit);
|
||||
|
||||
using enum DB::Dwarf::LocationInfoMode;
|
||||
const auto mode = fatal ? FULL_WITH_INLINE : FAST;
|
||||
@ -358,6 +360,7 @@ toStringEveryLineImpl([[maybe_unused]] bool fatal, const StackTraceRefTriple & s
|
||||
uintptr_t virtual_offset = object ? uintptr_t(object->address_begin) : 0;
|
||||
const void * physical_addr = reinterpret_cast<const void *>(uintptr_t(virtual_addr) - virtual_offset);
|
||||
|
||||
DB::WriteBufferFromOwnString out;
|
||||
out << i << ". ";
|
||||
|
||||
if (std::error_code ec; object && std::filesystem::exists(object->name, ec) && !ec)
|
||||
@ -376,7 +379,10 @@ toStringEveryLineImpl([[maybe_unused]] bool fatal, const StackTraceRefTriple & s
|
||||
out << "?";
|
||||
|
||||
if (shouldShowAddress(physical_addr))
|
||||
out << " @ " << physical_addr;
|
||||
{
|
||||
out << " @ ";
|
||||
DB::writePointerHex(physical_addr, out);
|
||||
}
|
||||
|
||||
out << " in " << (object ? object->name : "?");
|
||||
|
||||
@ -393,7 +399,6 @@ toStringEveryLineImpl([[maybe_unused]] bool fatal, const StackTraceRefTriple & s
|
||||
}
|
||||
|
||||
callback(out.str());
|
||||
out.str({});
|
||||
}
|
||||
#else
|
||||
for (size_t i = stack_trace.offset; i < stack_trace.size; ++i)
|
||||
@ -431,8 +436,7 @@ String toStringCached(const StackTrace::FramePointers & pointers, size_t offset,
|
||||
return it->second;
|
||||
else
|
||||
{
|
||||
std::ostringstream out; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
|
||||
out.exceptions(std::ios::failbit);
|
||||
DB::WriteBufferFromOwnString out;
|
||||
toStringEveryLineImpl(false, key, [&](std::string_view str) { out << str << '\n'; });
|
||||
|
||||
return cache.emplace(StackTraceTriple{pointers, offset, size}, out.str()).first->second;
|
||||
|
@ -6,6 +6,9 @@
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/Operators.h>
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
@ -153,19 +156,17 @@ std::pair<bool, std::string> StudentTTest::compareAndReport(size_t confidence_le
|
||||
|
||||
double mean_confidence_interval = table_value * t_statistic;
|
||||
|
||||
std::stringstream ss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
|
||||
ss.exceptions(std::ios::failbit);
|
||||
DB::WriteBufferFromOwnString out;
|
||||
|
||||
if (mean_difference > mean_confidence_interval && (mean_difference - mean_confidence_interval > 0.0001)) /// difference must be more than 0.0001, to take into account connection latency.
|
||||
{
|
||||
ss << "Difference at " << confidence_level[confidence_level_index] << "% confidence: ";
|
||||
ss << std::fixed << std::setprecision(8) << "mean difference is " << mean_difference << ", but confidence interval is " << mean_confidence_interval;
|
||||
return {false, ss.str()};
|
||||
out << "Difference at " << confidence_level[confidence_level_index] << "% confidence: ";
|
||||
out << "mean difference is " << mean_difference << ", but confidence interval is " << mean_confidence_interval;
|
||||
return {false, out.str()};
|
||||
}
|
||||
else
|
||||
{
|
||||
ss << "No difference proven at " << confidence_level[confidence_level_index] << "% confidence";
|
||||
return {true, ss.str()};
|
||||
out << "No difference proven at " << confidence_level[confidence_level_index] << "% confidence";
|
||||
return {true, out.str()};
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -397,7 +397,10 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
|
||||
|
||||
/// We don't run jobs after `shutdown` is set, but we have to properly dequeue all jobs and finish them.
|
||||
if (shutdown)
|
||||
{
|
||||
job_is_done = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
ALLOW_ALLOCATIONS_IN_SCOPE;
|
||||
|
@ -1,7 +1,6 @@
|
||||
#if defined(OS_LINUX)
|
||||
#include <Common/TimerDescriptor.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <base/defines.h>
|
||||
|
||||
#include <sys/timerfd.h>
|
||||
#include <fcntl.h>
|
||||
|
@ -17,10 +17,10 @@ namespace Poco { class Logger; }
|
||||
|
||||
namespace
|
||||
{
|
||||
[[maybe_unused]] const ::Poco::Logger * getLogger(const ::Poco::Logger * logger) { return logger; };
|
||||
[[maybe_unused]] const ::Poco::Logger * getLogger(const std::atomic<::Poco::Logger *> & logger) { return logger.load(); };
|
||||
[[maybe_unused]] std::unique_ptr<LogToStrImpl> getLogger(std::unique_ptr<LogToStrImpl> && logger) { return logger; };
|
||||
[[maybe_unused]] std::unique_ptr<LogFrequencyLimiterIml> getLogger(std::unique_ptr<LogFrequencyLimiterIml> && logger) { return logger; };
|
||||
[[maybe_unused]] const ::Poco::Logger * getLogger(const ::Poco::Logger * logger) { return logger; }
|
||||
[[maybe_unused]] const ::Poco::Logger * getLogger(const std::atomic<::Poco::Logger *> & logger) { return logger.load(); }
|
||||
[[maybe_unused]] std::unique_ptr<LogToStrImpl> getLogger(std::unique_ptr<LogToStrImpl> && logger) { return logger; }
|
||||
[[maybe_unused]] std::unique_ptr<LogFrequencyLimiterIml> getLogger(std::unique_ptr<LogFrequencyLimiterIml> && logger) { return logger; }
|
||||
}
|
||||
|
||||
#define LOG_IMPL_FIRST_ARG(X, ...) X
|
||||
|
@ -43,7 +43,8 @@ void setThreadName(const char * name)
|
||||
#else
|
||||
if (0 != prctl(PR_SET_NAME, name, 0, 0, 0))
|
||||
#endif
|
||||
DB::throwFromErrno("Cannot set thread name with prctl(PR_SET_NAME, ...)", DB::ErrorCodes::PTHREAD_ERROR);
|
||||
if (errno != ENOSYS) /// It's ok if the syscall is unsupported in some environments.
|
||||
DB::throwFromErrno("Cannot set thread name with prctl(PR_SET_NAME, ...)", DB::ErrorCodes::PTHREAD_ERROR);
|
||||
|
||||
memcpy(thread_name, name, std::min<size_t>(1 + strlen(name), THREAD_NAME_SIZE - 1));
|
||||
}
|
||||
@ -62,7 +63,8 @@ const char * getThreadName()
|
||||
// throw DB::Exception(DB::ErrorCodes::PTHREAD_ERROR, "Cannot get thread name with pthread_get_name_np()");
|
||||
#else
|
||||
if (0 != prctl(PR_GET_NAME, thread_name, 0, 0, 0))
|
||||
DB::throwFromErrno("Cannot get thread name with prctl(PR_GET_NAME)", DB::ErrorCodes::PTHREAD_ERROR);
|
||||
if (errno != ENOSYS) /// It's ok if the syscall is unsupported in some environments.
|
||||
DB::throwFromErrno("Cannot get thread name with prctl(PR_GET_NAME)", DB::ErrorCodes::PTHREAD_ERROR);
|
||||
#endif
|
||||
|
||||
return thread_name;
|
||||
|
@ -9,6 +9,8 @@
|
||||
#include <Common/ConcurrencyControl.h>
|
||||
#include <Common/randomSeed.h>
|
||||
|
||||
using namespace DB;
|
||||
|
||||
struct ConcurrencyControlTest
|
||||
{
|
||||
ConcurrencyControl cc;
|
||||
@ -276,9 +278,9 @@ TEST(ConcurrencyControl, MultipleThreads)
|
||||
queries.emplace_back([&, max_threads = max_threads_distribution(rng)]
|
||||
{
|
||||
run_query(max_threads);
|
||||
finished++;
|
||||
++finished;
|
||||
});
|
||||
started++;
|
||||
++started;
|
||||
}
|
||||
sleepForMicroseconds(5); // wait some queries to finish
|
||||
t.cc.setMaxConcurrency(cfg_max_concurrency - started % 3); // emulate configuration updates
|
||||
|
@ -51,3 +51,28 @@ TEST(ThreadPool, ExceptionFromSchedule)
|
||||
{
|
||||
EXPECT_TRUE(check());
|
||||
}
|
||||
|
||||
static bool check2()
|
||||
{
|
||||
ThreadPool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, 2);
|
||||
|
||||
pool.scheduleOrThrowOnError([&]{ throw std::runtime_error("Hello, world!"); });
|
||||
pool.scheduleOrThrowOnError([]{});
|
||||
|
||||
try
|
||||
{
|
||||
pool.wait();
|
||||
}
|
||||
catch (const std::runtime_error &)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
TEST(ThreadPool, ExceptionFromWait)
|
||||
{
|
||||
for (size_t i = 0; i < 1000; ++i)
|
||||
EXPECT_TRUE(check2());
|
||||
}
|
||||
|
@ -96,7 +96,7 @@ qpl_job * DeflateQplJobHWPool::acquireJob(UInt32 & job_id)
|
||||
if (isJobPoolReady())
|
||||
{
|
||||
UInt32 retry = 0;
|
||||
auto index = distribution(random_engine);
|
||||
UInt32 index = distribution(random_engine);
|
||||
while (!tryLockJob(index))
|
||||
{
|
||||
index = distribution(random_engine);
|
||||
|
@ -712,15 +712,15 @@ class IColumn;
|
||||
M(String, additional_result_filter, "", "Additional filter expression which would be applied to query result", 0) \
|
||||
\
|
||||
M(String, workload, "default", "Name of workload to be used to access resources", 0) \
|
||||
M(Milliseconds, storage_system_stack_trace_pipe_read_timeout_ms, 100, "Maximum time to read from a pipe for receiving information from the threads when querying the `system.stack_trace` table. This setting is used for testing purposes and not meant to be changed by users.", 0) \
|
||||
\
|
||||
M(Bool, parallelize_output_from_storages, false, "Parallelize output for reading step from storage. It allows parallelizing query processing right after reading from storage if possible", 0) \
|
||||
M(Bool, parallelize_output_from_storages, true, "Parallelize output for reading step from storage. It allows parallelizing query processing right after reading from storage if possible", 0) \
|
||||
\
|
||||
/** Experimental functions */ \
|
||||
M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \
|
||||
M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \
|
||||
M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions (hashid, etc)", 0) \
|
||||
M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
|
||||
M(Bool, allow_experimental_query_cache, false, "Enable experimental query cache", 0) \
|
||||
M(String, insert_deduplication_token, "", "If not empty, used for duplicate detection instead of data digest", 0) \
|
||||
M(String, ann_index_select_query_params, "", "Parameters passed to ANN indexes in SELECT queries, the format is 'param1=x, param2=y, ...'", 0) \
|
||||
M(UInt64, max_limit_for_ann_queries, 1000000, "Maximum limit value for using ANN indexes is used to prevent memory overflow in search queries for indexes", 0) \
|
||||
@ -734,7 +734,7 @@ class IColumn;
|
||||
M(Bool, multiple_joins_try_to_keep_original_names, false, "Do not add aliases to top level expression list on multiple joins rewrite", 0) \
|
||||
M(UInt64, grace_hash_join_initial_buckets, 1, "Initial number of grace hash join buckets", 0) \
|
||||
M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \
|
||||
M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \
|
||||
M(Bool, optimize_distinct_in_order, false, "This optimization has a bug and it is disabled. Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \
|
||||
M(Bool, optimize_sorting_by_input_stream_properties, true, "Optimize sorting by sorting properties of input stream", 0) \
|
||||
M(UInt64, insert_keeper_max_retries, 20, "Max retries for keeper operations during insert", 0) \
|
||||
M(UInt64, insert_keeper_retry_initial_backoff_ms, 100, "Initial backoff timeout for keeper operations during insert", 0) \
|
||||
@ -826,7 +826,7 @@ class IColumn;
|
||||
M(Bool, input_format_parquet_import_nested, false, "Allow to insert array of structs into Nested table in Parquet input format.", 0) \
|
||||
M(Bool, input_format_parquet_case_insensitive_column_matching, false, "Ignore case when matching Parquet columns with CH columns.", 0) \
|
||||
/* TODO: Consider unifying this with https://github.com/ClickHouse/ClickHouse/issues/38755 */ \
|
||||
M(Bool, input_format_parquet_preserve_order, true, "Avoid reordering rows when reading from Parquet files. Usually makes it much slower.", 0) \
|
||||
M(Bool, input_format_parquet_preserve_order, false, "Avoid reordering rows when reading from Parquet files. Usually makes it much slower.", 0) \
|
||||
M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \
|
||||
M(Bool, input_format_orc_allow_missing_columns, false, "Allow missing columns while reading ORC input formats", 0) \
|
||||
M(Bool, input_format_parquet_allow_missing_columns, false, "Allow missing columns while reading Parquet input formats", 0) \
|
||||
@ -904,6 +904,7 @@ class IColumn;
|
||||
M(UInt64, output_format_pretty_max_value_width, 10000, "Maximum width of value to display in Pretty formats. If greater - it will be cut.", 0) \
|
||||
M(Bool, output_format_pretty_color, true, "Use ANSI escape sequences to paint colors in Pretty formats", 0) \
|
||||
M(String, output_format_pretty_grid_charset, "UTF-8", "Charset for printing grid borders. Available charsets: ASCII, UTF-8 (default one).", 0) \
|
||||
M(Milliseconds, output_format_pretty_squash_ms, 100, "Squash blocks in Pretty formats if the time passed after the previous block is not greater than the specified threshold in milliseconds. This avoids printing miltiple small blocks.", 0) \
|
||||
M(UInt64, output_format_parquet_row_group_size, 1000000, "Target row group size in rows.", 0) \
|
||||
M(UInt64, output_format_parquet_row_group_size_bytes, 512 * 1024 * 1024, "Target row group size in bytes, before compression.", 0) \
|
||||
M(Bool, output_format_parquet_string_as_string, false, "Use Parquet String type instead of Binary for String columns.", 0) \
|
||||
|
@ -80,8 +80,10 @@ namespace SettingsChangesHistory
|
||||
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
|
||||
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
|
||||
{
|
||||
{"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"}}},
|
||||
{"23.4", {{"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"},
|
||||
{"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reade to reorder rows for better parallelism."},
|
||||
{"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."}}},
|
||||
{"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"},
|
||||
{"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"},
|
||||
{"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"},
|
||||
{"hedged_connection_timeout_ms", 100, 50, "Start new connection in hedged requests after 50 ms instead of 100 to correspond with previous connect timeout"}}},
|
||||
{"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"},
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
#include <Daemon/BaseDaemon.h>
|
||||
#include <Daemon/SentryWriter.h>
|
||||
#include <Parsers/toOneLineQuery.h>
|
||||
#include <base/errnoToString.h>
|
||||
#include <base/defines.h>
|
||||
|
||||
@ -359,10 +358,13 @@ private:
|
||||
/// NOTE: This still require memory allocations and mutex lock inside logger.
|
||||
/// BTW we can also print it to stderr using write syscalls.
|
||||
|
||||
std::stringstream bare_stacktrace; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
|
||||
bare_stacktrace << "Stack trace:";
|
||||
DB::WriteBufferFromOwnString bare_stacktrace;
|
||||
DB::writeString("Stack trace:", bare_stacktrace);
|
||||
for (size_t i = stack_trace.getOffset(); i < stack_trace.getSize(); ++i)
|
||||
bare_stacktrace << ' ' << stack_trace.getFramePointers()[i];
|
||||
{
|
||||
DB::writeChar(' ', bare_stacktrace);
|
||||
DB::writePointerHex(stack_trace.getFramePointers()[i], bare_stacktrace);
|
||||
}
|
||||
|
||||
LOG_FATAL(log, fmt::runtime(bare_stacktrace.str()));
|
||||
}
|
||||
|
@ -264,7 +264,7 @@ void RegExpTreeDictionary::initGraph()
|
||||
if (regex_nodes.contains(pid))
|
||||
regex_nodes[pid]->children.push_back(id);
|
||||
else
|
||||
throw Exception(ErrorCodes::INCORRECT_DICTIONARY_DEFINITION, "Unknown parent id {}", pid);
|
||||
throw Exception(ErrorCodes::INCORRECT_DICTIONARY_DEFINITION, "Unknown parent id {} in regexp tree dictionary", pid);
|
||||
}
|
||||
std::set<UInt64> visited;
|
||||
UInt64 topology_id = 0;
|
||||
|
@ -936,6 +936,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
|
||||
|
||||
if (result)
|
||||
{
|
||||
bool download_current_segment_succeeded = false;
|
||||
if (download_current_segment)
|
||||
{
|
||||
chassert(file_offset_of_buffer_end + size - 1 <= file_segment.range().right);
|
||||
@ -954,6 +955,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
|
||||
|| file_segment.getCurrentWriteOffset(false) == implementation_buffer->getFileOffsetOfBufferEnd());
|
||||
|
||||
LOG_TEST(log, "Successfully written {} bytes", size);
|
||||
download_current_segment_succeeded = true;
|
||||
|
||||
// The implementation_buffer is valid and positioned correctly (at file_segment->getCurrentWriteOffset()).
|
||||
// Later reads for this file segment can reuse it.
|
||||
@ -962,14 +964,15 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
|
||||
implementation_buffer_can_be_reused = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
chassert(file_segment.state() == FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION);
|
||||
LOG_TRACE(log, "Bypassing cache because writeCache method failed");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_TRACE(log, "No space left in cache to reserve {} bytes, will continue without cache download", size);
|
||||
|
||||
if (!success)
|
||||
{
|
||||
read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE;
|
||||
chassert(file_segment.state() == FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION);
|
||||
}
|
||||
}
|
||||
|
||||
@ -990,6 +993,8 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
|
||||
|
||||
file_offset_of_buffer_end += size;
|
||||
|
||||
if (download_current_segment && download_current_segment_succeeded)
|
||||
chassert(file_segment.getCurrentWriteOffset(false) >= file_offset_of_buffer_end);
|
||||
chassert(file_offset_of_buffer_end <= read_until_position);
|
||||
}
|
||||
|
||||
|
@ -14,11 +14,6 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
ReadBufferFromRemoteFSGather::ReadBufferFromRemoteFSGather(
|
||||
ReadBufferCreator && read_buffer_creator_,
|
||||
const StoredObjects & blobs_to_read_,
|
||||
@ -27,11 +22,13 @@ ReadBufferFromRemoteFSGather::ReadBufferFromRemoteFSGather(
|
||||
, read_buffer_creator(std::move(read_buffer_creator_))
|
||||
, blobs_to_read(blobs_to_read_)
|
||||
, settings(settings_)
|
||||
, current_object(!blobs_to_read_.empty() ? blobs_to_read_.front() : throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read zero number of objects"))
|
||||
, query_id(CurrentThread::isInitialized() && CurrentThread::get().getQueryContext() != nullptr ? CurrentThread::getQueryId() : "")
|
||||
, log(&Poco::Logger::get("ReadBufferFromRemoteFSGather"))
|
||||
, enable_cache_log(!query_id.empty() && settings.enable_filesystem_cache_log)
|
||||
{
|
||||
if (!blobs_to_read.empty())
|
||||
current_object = blobs_to_read.front();
|
||||
|
||||
with_cache = settings.remote_fs_cache
|
||||
&& settings.enable_filesystem_cache
|
||||
&& (!query_id.empty() || settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache);
|
||||
@ -72,7 +69,9 @@ SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(c
|
||||
|
||||
void ReadBufferFromRemoteFSGather::appendFilesystemCacheLog()
|
||||
{
|
||||
chassert(!current_object.remote_path.empty());
|
||||
if (current_object.remote_path.empty())
|
||||
return;
|
||||
|
||||
FilesystemCacheLogElement elem
|
||||
{
|
||||
.event_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()),
|
||||
@ -99,9 +98,7 @@ IAsynchronousReader::Result ReadBufferFromRemoteFSGather::readInto(char * data,
|
||||
file_offset_of_buffer_end = offset;
|
||||
bytes_to_ignore = ignore;
|
||||
|
||||
assert(!bytes_to_ignore || initialized());
|
||||
|
||||
auto result = nextImpl();
|
||||
const auto result = nextImpl();
|
||||
|
||||
if (result)
|
||||
return { working_buffer.size(), BufferBase::offset(), nullptr };
|
||||
@ -111,6 +108,9 @@ IAsynchronousReader::Result ReadBufferFromRemoteFSGather::readInto(char * data,
|
||||
|
||||
void ReadBufferFromRemoteFSGather::initialize()
|
||||
{
|
||||
if (blobs_to_read.empty())
|
||||
return;
|
||||
|
||||
/// One clickhouse file can be split into multiple files in remote fs.
|
||||
auto current_buf_offset = file_offset_of_buffer_end;
|
||||
for (size_t i = 0; i < blobs_to_read.size(); ++i)
|
||||
@ -144,21 +144,14 @@ bool ReadBufferFromRemoteFSGather::nextImpl()
|
||||
if (!current_buf)
|
||||
initialize();
|
||||
|
||||
/// If current buffer has remaining data - use it.
|
||||
if (current_buf)
|
||||
{
|
||||
if (readImpl())
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!current_buf)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (readImpl())
|
||||
return true;
|
||||
|
||||
if (!moveToNextBuffer())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return readImpl();
|
||||
}
|
||||
|
@ -128,6 +128,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.pretty.max_rows = settings.output_format_pretty_max_rows;
|
||||
format_settings.pretty.max_value_width = settings.output_format_pretty_max_value_width;
|
||||
format_settings.pretty.output_format_pretty_row_numbers = settings.output_format_pretty_row_numbers;
|
||||
format_settings.pretty.squash_milliseconds = static_cast<UInt64>(settings.output_format_pretty_squash_ms);
|
||||
format_settings.protobuf.input_flatten_google_wrappers = settings.input_format_protobuf_flatten_google_wrappers;
|
||||
format_settings.protobuf.output_nullables_with_google_wrappers = settings.output_format_protobuf_nullables_with_google_wrappers;
|
||||
format_settings.protobuf.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference;
|
||||
@ -809,6 +810,14 @@ bool FormatFactory::checkIfOutputFormatPrefersLargeBlocks(const String & name) c
|
||||
return target.prefers_large_blocks;
|
||||
}
|
||||
|
||||
bool FormatFactory::checkParallelizeOutputAfterReading(const String & name, ContextPtr context) const
|
||||
{
|
||||
if (name == "Parquet" && context->getSettingsRef().input_format_parquet_preserve_order)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void FormatFactory::checkFormatName(const String & name) const
|
||||
{
|
||||
auto it = dict.find(name);
|
||||
|
@ -250,6 +250,8 @@ public:
|
||||
bool checkIfFormatHasAnySchemaReader(const String & name) const;
|
||||
bool checkIfOutputFormatPrefersLargeBlocks(const String & name) const;
|
||||
|
||||
bool checkParallelizeOutputAfterReading(const String & name, ContextPtr context) const;
|
||||
|
||||
void registerAdditionalInfoForSchemaCacheGetter(const String & name, AdditionalInfoForSchemaCacheGetter additional_info_for_schema_cache_getter);
|
||||
String getAdditionalInfoForSchemaCache(const String & name, ContextPtr context, const std::optional<FormatSettings> & format_settings_ = std::nullopt);
|
||||
|
||||
|
@ -227,6 +227,7 @@ struct FormatSettings
|
||||
UInt64 max_rows = 10000;
|
||||
UInt64 max_column_pad_width = 250;
|
||||
UInt64 max_value_width = 10000;
|
||||
UInt64 squash_milliseconds = 100;
|
||||
bool color = true;
|
||||
|
||||
bool output_format_pretty_row_numbers = false;
|
||||
|
@ -7,8 +7,8 @@
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Core/Block.h>
|
||||
#include <Core/ColumnNumbers.h>
|
||||
#include <Core/ColumnsWithTypeAndName.h>
|
||||
#include <Core/callOnTypeIndex.h>
|
||||
|
||||
|
||||
|
@ -808,23 +808,20 @@ struct ImplBLAKE3
|
||||
static constexpr auto name = "BLAKE3";
|
||||
enum { length = 32 };
|
||||
|
||||
#if !USE_BLAKE3
|
||||
[[noreturn]] static void apply(const char * begin, const size_t size, unsigned char* out_char_data)
|
||||
#if !USE_BLAKE3
|
||||
[[noreturn]] static void apply(const char * /*begin*/, const size_t /*size*/, unsigned char * /*out_char_data*/)
|
||||
{
|
||||
UNUSED(begin);
|
||||
UNUSED(size);
|
||||
UNUSED(out_char_data);
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "BLAKE3 is not available. Rust code or BLAKE3 itself may be disabled.");
|
||||
}
|
||||
#else
|
||||
#else
|
||||
static void apply(const char * begin, const size_t size, unsigned char* out_char_data)
|
||||
{
|
||||
#if defined(MEMORY_SANITIZER)
|
||||
# if defined(MEMORY_SANITIZER)
|
||||
auto err_msg = blake3_apply_shim_msan_compat(begin, safe_cast<uint32_t>(size), out_char_data);
|
||||
__msan_unpoison(out_char_data, length);
|
||||
#else
|
||||
# else
|
||||
auto err_msg = blake3_apply_shim(begin, safe_cast<uint32_t>(size), out_char_data);
|
||||
#endif
|
||||
# endif
|
||||
if (err_msg != nullptr)
|
||||
{
|
||||
auto err_st = std::string(err_msg);
|
||||
@ -832,7 +829,7 @@ struct ImplBLAKE3
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function returned error message: {}", err_st);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
};
|
||||
|
||||
template <typename Impl>
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/SipHash.h>
|
||||
#include <Core/Block.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
|
@ -228,6 +228,8 @@ REGISTER_FUNCTION(ExtractKeyValuePairs)
|
||||
└──────────────────┘
|
||||
```)")
|
||||
);
|
||||
factory.registerAlias("str_to_map", NameExtractKeyValuePairs::name, FunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("mapFromString", NameExtractKeyValuePairs::name);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDate32.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
@ -20,7 +21,6 @@ namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int ARGUMENT_OUT_OF_BOUND;
|
||||
}
|
||||
@ -28,13 +28,7 @@ namespace ErrorCodes
|
||||
namespace
|
||||
{
|
||||
|
||||
/// A helper function to simplify comparisons of valid YYYY-MM-DD values for <,>,=
|
||||
inline constexpr Int64 YearMonthDayToSingleInt(Int64 year, Int64 month, Int64 day)
|
||||
{
|
||||
return year * 512 + month * 32 + day;
|
||||
}
|
||||
|
||||
/// Common logic to handle numeric arguments like year, month, day, hour, minute, second
|
||||
/// Functions common to makeDate, makeDate32, makeDateTime, makeDateTime64
|
||||
class FunctionWithNumericParamsBase : public IFunction
|
||||
{
|
||||
public:
|
||||
@ -49,36 +43,23 @@ public:
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
|
||||
bool isVariadic() const override { return true; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
|
||||
protected:
|
||||
template <class ArgumentNames>
|
||||
void checkRequiredArguments(const ColumnsWithTypeAndName & arguments, const ArgumentNames & argument_names, const size_t optional_argument_count) const
|
||||
{
|
||||
if (arguments.size() < argument_names.size() || arguments.size() > argument_names.size() + optional_argument_count)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Function {} requires {} to {} arguments, but {} given",
|
||||
getName(), argument_names.size(), argument_names.size() + optional_argument_count, arguments.size());
|
||||
|
||||
for (size_t i = 0; i < argument_names.size(); ++i)
|
||||
{
|
||||
DataTypePtr argument_type = arguments[i].type;
|
||||
if (!isNumber(argument_type))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Argument '{}' for function {} must be a number", std::string(argument_names[i]), getName());
|
||||
}
|
||||
}
|
||||
|
||||
template <class ArgumentNames>
|
||||
void convertRequiredArguments(const ColumnsWithTypeAndName & arguments, const ArgumentNames & argument_names, Columns & converted_arguments) const
|
||||
Columns convertMandatoryArguments(const ColumnsWithTypeAndName & arguments, const ArgumentNames & argument_names) const
|
||||
{
|
||||
Columns converted_arguments;
|
||||
const DataTypePtr converted_argument_type = std::make_shared<DataTypeFloat32>();
|
||||
converted_arguments.clear();
|
||||
converted_arguments.reserve(arguments.size());
|
||||
for (size_t i = 0; i < argument_names.size(); ++i)
|
||||
{
|
||||
ColumnPtr argument_column = castColumn(arguments[i], converted_argument_type);
|
||||
argument_column = argument_column->convertToFullColumnIfConst();
|
||||
converted_arguments.push_back(argument_column);
|
||||
}
|
||||
return converted_arguments;
|
||||
}
|
||||
};
|
||||
|
||||
@ -87,7 +68,8 @@ template <typename Traits>
|
||||
class FunctionMakeDate : public FunctionWithNumericParamsBase
|
||||
{
|
||||
private:
|
||||
static constexpr std::array argument_names = {"year", "month", "day"};
|
||||
static constexpr std::array mandatory_argument_names_year_month_day = {"year", "month", "day"};
|
||||
static constexpr std::array mandatory_argument_names_year_dayofyear = {"year", "dayofyear"};
|
||||
|
||||
public:
|
||||
static constexpr auto name = Traits::name;
|
||||
@ -96,56 +78,103 @@ public:
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
bool isVariadic() const override { return false; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return argument_names.size(); }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
checkRequiredArguments(arguments, argument_names, 0);
|
||||
const bool isYearMonthDayVariant = (arguments.size() == 3);
|
||||
|
||||
if (isYearMonthDayVariant)
|
||||
{
|
||||
FunctionArgumentDescriptors args{
|
||||
{mandatory_argument_names_year_month_day[0], &isNumber<IDataType>, nullptr, "Number"},
|
||||
{mandatory_argument_names_year_month_day[1], &isNumber<IDataType>, nullptr, "Number"},
|
||||
{mandatory_argument_names_year_month_day[2], &isNumber<IDataType>, nullptr, "Number"}
|
||||
};
|
||||
validateFunctionArgumentTypes(*this, arguments, args);
|
||||
}
|
||||
else
|
||||
{
|
||||
FunctionArgumentDescriptors args{
|
||||
{mandatory_argument_names_year_dayofyear[0], &isNumber<IDataType>, nullptr, "Number"},
|
||||
{mandatory_argument_names_year_dayofyear[1], &isNumber<IDataType>, nullptr, "Number"}
|
||||
};
|
||||
validateFunctionArgumentTypes(*this, arguments, args);
|
||||
}
|
||||
|
||||
return std::make_shared<typename Traits::ReturnDataType>();
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
const bool isYearMonthDayVariant = (arguments.size() == 3);
|
||||
|
||||
Columns converted_arguments;
|
||||
convertRequiredArguments(arguments, argument_names, converted_arguments);
|
||||
if (isYearMonthDayVariant)
|
||||
converted_arguments = convertMandatoryArguments(arguments, mandatory_argument_names_year_month_day);
|
||||
else
|
||||
converted_arguments = convertMandatoryArguments(arguments, mandatory_argument_names_year_dayofyear);
|
||||
|
||||
auto res_column = Traits::ReturnDataType::ColumnType::create(input_rows_count);
|
||||
auto & result_data = res_column->getData();
|
||||
|
||||
const auto & year_data = typeid_cast<const ColumnFloat32 &>(*converted_arguments[0]).getData();
|
||||
const auto & month_data = typeid_cast<const ColumnFloat32 &>(*converted_arguments[1]).getData();
|
||||
const auto & day_data = typeid_cast<const ColumnFloat32 &>(*converted_arguments[2]).getData();
|
||||
|
||||
const auto & date_lut = DateLUT::instance();
|
||||
const Int32 max_days_since_epoch = date_lut.makeDayNum(Traits::MAX_DATE[0], Traits::MAX_DATE[1], Traits::MAX_DATE[2]);
|
||||
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
if (isYearMonthDayVariant)
|
||||
{
|
||||
const auto year = year_data[i];
|
||||
const auto month = month_data[i];
|
||||
const auto day = day_data[i];
|
||||
const auto & year_data = typeid_cast<const ColumnFloat32 &>(*converted_arguments[0]).getData();
|
||||
const auto & month_data = typeid_cast<const ColumnFloat32 &>(*converted_arguments[1]).getData();
|
||||
const auto & day_data = typeid_cast<const ColumnFloat32 &>(*converted_arguments[2]).getData();
|
||||
|
||||
Int32 day_num = 0;
|
||||
|
||||
if (year >= Traits::MIN_YEAR &&
|
||||
year <= Traits::MAX_YEAR &&
|
||||
month >= 1 && month <= 12 &&
|
||||
day >= 1 && day <= 31 &&
|
||||
YearMonthDayToSingleInt(static_cast<Int64>(year), static_cast<Int64>(month), static_cast<Int64>(day)) <= Traits::MAX_DATE)
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
day_num = date_lut.makeDayNum(static_cast<Int16>(year), static_cast<UInt8>(month), static_cast<UInt8>(day));
|
||||
}
|
||||
const auto year = year_data[i];
|
||||
const auto month = month_data[i];
|
||||
const auto day = day_data[i];
|
||||
|
||||
result_data[i] = day_num;
|
||||
Int32 day_num = 0;
|
||||
|
||||
if (year >= Traits::MIN_YEAR &&
|
||||
year <= Traits::MAX_YEAR &&
|
||||
month >= 1 && month <= 12 &&
|
||||
day >= 1 && day <= 31)
|
||||
{
|
||||
Int32 days_since_epoch = date_lut.makeDayNum(static_cast<Int16>(year), static_cast<UInt8>(month), static_cast<UInt8>(day));
|
||||
if (days_since_epoch <= max_days_since_epoch)
|
||||
day_num = days_since_epoch;
|
||||
}
|
||||
|
||||
result_data[i] = day_num;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto & year_data = typeid_cast<const ColumnFloat32 &>(*converted_arguments[0]).getData();
|
||||
const auto & dayofyear_data = typeid_cast<const ColumnFloat32 &>(*converted_arguments[1]).getData();
|
||||
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
const auto year = year_data[i];
|
||||
const auto dayofyear = dayofyear_data[i];
|
||||
|
||||
Int32 day_num = 0;
|
||||
|
||||
if (year >= Traits::MIN_YEAR &&
|
||||
year <= Traits::MAX_YEAR &&
|
||||
dayofyear >= 1 && dayofyear <= 365)
|
||||
{
|
||||
Int32 days_since_epoch = date_lut.makeDayNum(static_cast<Int16>(year), 1, 1) + static_cast<Int32>(dayofyear) - 1;
|
||||
if (days_since_epoch <= max_days_since_epoch)
|
||||
day_num = days_since_epoch;
|
||||
}
|
||||
|
||||
result_data[i] = day_num;
|
||||
}
|
||||
}
|
||||
|
||||
return res_column;
|
||||
}
|
||||
};
|
||||
|
||||
/// makeDate(year, month, day)
|
||||
struct MakeDateTraits
|
||||
{
|
||||
static constexpr auto name = "makeDate";
|
||||
@ -154,10 +183,9 @@ struct MakeDateTraits
|
||||
static constexpr auto MIN_YEAR = 1970;
|
||||
static constexpr auto MAX_YEAR = 2149;
|
||||
/// This date has the maximum day number that fits in 16-bit uint
|
||||
static constexpr auto MAX_DATE = YearMonthDayToSingleInt(MAX_YEAR, 6, 6);
|
||||
static constexpr std::array MAX_DATE = {MAX_YEAR, 6, 6};
|
||||
};
|
||||
|
||||
/// makeDate32(year, month, day)
|
||||
struct MakeDate32Traits
|
||||
{
|
||||
static constexpr auto name = "makeDate32";
|
||||
@ -165,30 +193,14 @@ struct MakeDate32Traits
|
||||
|
||||
static constexpr auto MIN_YEAR = 1900;
|
||||
static constexpr auto MAX_YEAR = 2299;
|
||||
static constexpr auto MAX_DATE = YearMonthDayToSingleInt(MAX_YEAR, 12, 31);
|
||||
static constexpr std::array MAX_DATE = {MAX_YEAR, 12, 31};
|
||||
};
|
||||
|
||||
/// Common implementation for makeDateTime, makeDateTime64
|
||||
class FunctionMakeDateTimeBase : public FunctionWithNumericParamsBase
|
||||
{
|
||||
protected:
|
||||
static constexpr std::array argument_names = {"year", "month", "day", "hour", "minute", "second"};
|
||||
|
||||
public:
|
||||
bool isVariadic() const override { return true; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
|
||||
protected:
|
||||
void checkRequiredArguments(const ColumnsWithTypeAndName & arguments, const size_t optional_argument_count) const
|
||||
{
|
||||
FunctionWithNumericParamsBase::checkRequiredArguments(arguments, argument_names, optional_argument_count);
|
||||
}
|
||||
|
||||
void convertRequiredArguments(const ColumnsWithTypeAndName & arguments, Columns & converted_arguments) const
|
||||
{
|
||||
FunctionWithNumericParamsBase::convertRequiredArguments(arguments, argument_names, converted_arguments);
|
||||
}
|
||||
static constexpr std::array mandatory_argument_names = {"year", "month", "day", "hour", "minute", "second"};
|
||||
|
||||
template <typename T>
|
||||
static Int64 dateTime(T year, T month, T day_of_month, T hour, T minute, T second, const DateLUTImpl & lut)
|
||||
@ -235,7 +247,7 @@ protected:
|
||||
class FunctionMakeDateTime : public FunctionMakeDateTimeBase
|
||||
{
|
||||
private:
|
||||
static constexpr std::array<const char*, 1> optional_argument_names = {"timezone"};
|
||||
static constexpr std::array optional_argument_names = {"timezone"};
|
||||
|
||||
public:
|
||||
static constexpr auto name = "makeDateTime";
|
||||
@ -246,11 +258,24 @@ public:
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
checkRequiredArguments(arguments, optional_argument_names.size());
|
||||
FunctionArgumentDescriptors mandatory_args{
|
||||
{mandatory_argument_names[0], &isNumber<IDataType>, nullptr, "Number"},
|
||||
{mandatory_argument_names[1], &isNumber<IDataType>, nullptr, "Number"},
|
||||
{mandatory_argument_names[2], &isNumber<IDataType>, nullptr, "Number"},
|
||||
{mandatory_argument_names[3], &isNumber<IDataType>, nullptr, "Number"},
|
||||
{mandatory_argument_names[4], &isNumber<IDataType>, nullptr, "Number"},
|
||||
{mandatory_argument_names[5], &isNumber<IDataType>, nullptr, "Number"}
|
||||
};
|
||||
|
||||
FunctionArgumentDescriptors optional_args{
|
||||
{optional_argument_names[0], &isString<IDataType>, nullptr, "String"}
|
||||
};
|
||||
|
||||
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
|
||||
|
||||
/// Optional timezone argument
|
||||
std::string timezone;
|
||||
if (arguments.size() == argument_names.size() + 1)
|
||||
if (arguments.size() == mandatory_argument_names.size() + 1)
|
||||
timezone = extractTimezone(arguments.back());
|
||||
|
||||
return std::make_shared<DataTypeDateTime>(timezone);
|
||||
@ -260,11 +285,10 @@ public:
|
||||
{
|
||||
/// Optional timezone argument
|
||||
std::string timezone;
|
||||
if (arguments.size() == argument_names.size() + 1)
|
||||
if (arguments.size() == mandatory_argument_names.size() + 1)
|
||||
timezone = extractTimezone(arguments.back());
|
||||
|
||||
Columns converted_arguments;
|
||||
convertRequiredArguments(arguments, converted_arguments);
|
||||
Columns converted_arguments = convertMandatoryArguments(arguments, mandatory_argument_names);
|
||||
|
||||
auto res_column = ColumnDateTime::create(input_rows_count);
|
||||
auto & result_data = res_column->getData();
|
||||
@ -300,11 +324,11 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
/// makeDateTime64(year, month, day, hour, minute, second, [fraction], [precision], [timezone])
|
||||
/// makeDateTime64(year, month, day, hour, minute, second[, fraction[, precision[, timezone]]])
|
||||
class FunctionMakeDateTime64 : public FunctionMakeDateTimeBase
|
||||
{
|
||||
private:
|
||||
static constexpr std::array<const char*, 3> optional_argument_names = {"fraction", "precision", "timezone"};
|
||||
static constexpr std::array optional_argument_names = {"fraction", "precision", "timezone"};
|
||||
static constexpr UInt8 DEFAULT_PRECISION = 3;
|
||||
|
||||
public:
|
||||
@ -316,11 +340,26 @@ public:
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
checkRequiredArguments(arguments, optional_argument_names.size());
|
||||
FunctionArgumentDescriptors mandatory_args{
|
||||
{mandatory_argument_names[0], &isNumber<IDataType>, nullptr, "Number"},
|
||||
{mandatory_argument_names[1], &isNumber<IDataType>, nullptr, "Number"},
|
||||
{mandatory_argument_names[2], &isNumber<IDataType>, nullptr, "Number"},
|
||||
{mandatory_argument_names[3], &isNumber<IDataType>, nullptr, "Number"},
|
||||
{mandatory_argument_names[4], &isNumber<IDataType>, nullptr, "Number"},
|
||||
{mandatory_argument_names[5], &isNumber<IDataType>, nullptr, "Number"}
|
||||
};
|
||||
|
||||
if (arguments.size() >= argument_names.size() + 1)
|
||||
FunctionArgumentDescriptors optional_args{
|
||||
{optional_argument_names[0], &isNumber<IDataType>, nullptr, "Number"},
|
||||
{optional_argument_names[1], &isNumber<IDataType>, nullptr, "Number"},
|
||||
{optional_argument_names[2], &isString<IDataType>, nullptr, "String"}
|
||||
};
|
||||
|
||||
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
|
||||
|
||||
if (arguments.size() >= mandatory_argument_names.size() + 1)
|
||||
{
|
||||
const auto& fraction_argument = arguments[argument_names.size()];
|
||||
const auto& fraction_argument = arguments[mandatory_argument_names.size()];
|
||||
if (!isNumber(fraction_argument.type))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Argument 'fraction' for function {} must be a number", getName());
|
||||
@ -328,12 +367,12 @@ public:
|
||||
|
||||
/// Optional precision argument
|
||||
Int64 precision = DEFAULT_PRECISION;
|
||||
if (arguments.size() >= argument_names.size() + 2)
|
||||
precision = extractPrecision(arguments[argument_names.size() + 1]);
|
||||
if (arguments.size() >= mandatory_argument_names.size() + 2)
|
||||
precision = extractPrecision(arguments[mandatory_argument_names.size() + 1]);
|
||||
|
||||
/// Optional timezone argument
|
||||
std::string timezone;
|
||||
if (arguments.size() == argument_names.size() + 3)
|
||||
if (arguments.size() == mandatory_argument_names.size() + 3)
|
||||
timezone = extractTimezone(arguments.back());
|
||||
|
||||
return std::make_shared<DataTypeDateTime64>(precision, timezone);
|
||||
@ -343,22 +382,21 @@ public:
|
||||
{
|
||||
/// Optional precision argument
|
||||
Int64 precision = DEFAULT_PRECISION;
|
||||
if (arguments.size() >= argument_names.size() + 2)
|
||||
precision = extractPrecision(arguments[argument_names.size() + 1]);
|
||||
if (arguments.size() >= mandatory_argument_names.size() + 2)
|
||||
precision = extractPrecision(arguments[mandatory_argument_names.size() + 1]);
|
||||
|
||||
/// Optional timezone argument
|
||||
std::string timezone;
|
||||
if (arguments.size() == argument_names.size() + 3)
|
||||
if (arguments.size() == mandatory_argument_names.size() + 3)
|
||||
timezone = extractTimezone(arguments.back());
|
||||
|
||||
Columns converted_arguments;
|
||||
convertRequiredArguments(arguments, converted_arguments);
|
||||
Columns converted_arguments = convertMandatoryArguments(arguments, mandatory_argument_names);
|
||||
|
||||
/// Optional fraction argument
|
||||
const ColumnVector<Float64>::Container * fraction_data = nullptr;
|
||||
if (arguments.size() >= argument_names.size() + 1)
|
||||
if (arguments.size() >= mandatory_argument_names.size() + 1)
|
||||
{
|
||||
ColumnPtr fraction_column = castColumn(arguments[argument_names.size()], std::make_shared<DataTypeFloat64>());
|
||||
ColumnPtr fraction_column = castColumn(arguments[mandatory_argument_names.size()], std::make_shared<DataTypeFloat64>());
|
||||
fraction_column = fraction_column->convertToFullColumnIfConst();
|
||||
converted_arguments.push_back(fraction_column);
|
||||
fraction_data = &typeid_cast<const ColumnFloat64 &>(*converted_arguments[6]).getData();
|
||||
@ -439,7 +477,7 @@ private:
|
||||
|
||||
REGISTER_FUNCTION(MakeDate)
|
||||
{
|
||||
factory.registerFunction<FunctionMakeDate<MakeDateTraits>>();
|
||||
factory.registerFunction<FunctionMakeDate<MakeDateTraits>>({}, FunctionFactory::CaseInsensitive);
|
||||
factory.registerFunction<FunctionMakeDate<MakeDate32Traits>>();
|
||||
factory.registerFunction<FunctionMakeDateTime>();
|
||||
factory.registerFunction<FunctionMakeDateTime64>();
|
||||
|
@ -4,9 +4,7 @@
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Columns/ColumnDecimal.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Core/DecimalFunctions.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/getLeastSupertype.h>
|
||||
@ -15,10 +13,7 @@
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Interpreters/castColumn.h>
|
||||
#include <Interpreters/convertFieldToType.h>
|
||||
#include <base/StringRef.h>
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/FieldVisitorConvertToNumber.h>
|
||||
#include <Common/FieldVisitorDump.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
@ -635,22 +630,27 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
static void checkAllowedType(const DataTypePtr & dt)
|
||||
static void checkAllowedType(const DataTypePtr & type)
|
||||
{
|
||||
if (dt->isNullable())
|
||||
checkAllowedTypeHelper(static_cast<const DataTypeNullable *>(dt.get())->getNestedType());
|
||||
if (type->isNullable())
|
||||
checkAllowedTypeHelper(static_cast<const DataTypeNullable *>(type.get())->getNestedType());
|
||||
else
|
||||
checkAllowedTypeHelper(dt);
|
||||
checkAllowedTypeHelper(type);
|
||||
}
|
||||
|
||||
static void checkAllowedTypeHelper(const DataTypePtr & dt)
|
||||
static void checkAllowedTypeHelper(const DataTypePtr & type)
|
||||
{
|
||||
if (isStringOrFixedString(dt))
|
||||
if (isStringOrFixedString(type))
|
||||
return;
|
||||
auto dtsize = dt->getMaximumSizeOfValueInMemory();
|
||||
if (dtsize <= sizeof(UInt64))
|
||||
return;
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected type {} in function 'transform'", dt->getName());
|
||||
|
||||
if (type->haveMaximumSizeOfValue())
|
||||
{
|
||||
auto data_type_size = type->getMaximumSizeOfValueInMemory();
|
||||
if (data_type_size <= sizeof(UInt64))
|
||||
return;
|
||||
}
|
||||
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected type {} in function 'transform'", type->getName());
|
||||
}
|
||||
|
||||
/// Can be called from different threads. It works only on the first call.
|
||||
|
899
src/IO/ReadWriteBufferFromHTTP.cpp
Normal file
899
src/IO/ReadWriteBufferFromHTTP.cpp
Normal file
@ -0,0 +1,899 @@
|
||||
#include "ReadWriteBufferFromHTTP.h"
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event ReadBufferSeekCancelConnection;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int TOO_MANY_REDIRECTS;
|
||||
extern const int HTTP_RANGE_NOT_SATISFIABLE;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int CANNOT_SEEK_THROUGH_FILE;
|
||||
extern const int SEEK_POSITION_OUT_OF_BOUND;
|
||||
extern const int UNKNOWN_FILE_SIZE;
|
||||
}
|
||||
|
||||
template <typename TSessionFactory>
|
||||
UpdatableSession<TSessionFactory>::UpdatableSession(const Poco::URI & uri, UInt64 max_redirects_, std::shared_ptr<TSessionFactory> session_factory_)
|
||||
: max_redirects{max_redirects_}
|
||||
, initial_uri(uri)
|
||||
, session_factory(std::move(session_factory_))
|
||||
{
|
||||
session = session_factory->buildNewSession(uri);
|
||||
}
|
||||
|
||||
template <typename TSessionFactory>
|
||||
typename UpdatableSession<TSessionFactory>::SessionPtr UpdatableSession<TSessionFactory>::getSession() { return session; }
|
||||
|
||||
template <typename TSessionFactory>
|
||||
void UpdatableSession<TSessionFactory>::updateSession(const Poco::URI & uri)
|
||||
{
|
||||
++redirects;
|
||||
if (redirects <= max_redirects)
|
||||
session = session_factory->buildNewSession(uri);
|
||||
else
|
||||
throw Exception(ErrorCodes::TOO_MANY_REDIRECTS, "Too many redirects while trying to access {}", initial_uri.toString());
|
||||
}
|
||||
|
||||
template <typename TSessionFactory>
|
||||
std::shared_ptr<UpdatableSession<TSessionFactory>> UpdatableSession<TSessionFactory>::clone(const Poco::URI & uri)
|
||||
{
|
||||
return std::make_shared<UpdatableSession<TSessionFactory>>(uri, max_redirects, session_factory);
|
||||
}
|
||||
|
||||
|
||||
namespace detail
|
||||
{
|
||||
|
||||
static bool isRetriableError(const Poco::Net::HTTPResponse::HTTPStatus http_status) noexcept
|
||||
{
|
||||
static constexpr std::array non_retriable_errors{
|
||||
Poco::Net::HTTPResponse::HTTPStatus::HTTP_BAD_REQUEST,
|
||||
Poco::Net::HTTPResponse::HTTPStatus::HTTP_UNAUTHORIZED,
|
||||
Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND,
|
||||
Poco::Net::HTTPResponse::HTTPStatus::HTTP_FORBIDDEN,
|
||||
Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_IMPLEMENTED,
|
||||
Poco::Net::HTTPResponse::HTTPStatus::HTTP_METHOD_NOT_ALLOWED};
|
||||
|
||||
return std::all_of(
|
||||
non_retriable_errors.begin(), non_retriable_errors.end(), [&](const auto status) { return http_status != status; });
|
||||
}
|
||||
|
||||
static Poco::URI getUriAfterRedirect(const Poco::URI & prev_uri, Poco::Net::HTTPResponse & response)
|
||||
{
|
||||
auto location = response.get("Location");
|
||||
auto location_uri = Poco::URI(location);
|
||||
if (!location_uri.isRelative())
|
||||
return location_uri;
|
||||
/// Location header contains relative path. So we need to concatenate it
|
||||
/// with path from the original URI and normalize it.
|
||||
auto path = std::filesystem::weakly_canonical(std::filesystem::path(prev_uri.getPath()) / location);
|
||||
location_uri = prev_uri;
|
||||
location_uri.setPath(path);
|
||||
return location_uri;
|
||||
}
|
||||
|
||||
template <typename UpdatableSessionPtr>
|
||||
bool ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::withPartialContent(const HTTPRange & range) const
|
||||
{
|
||||
/**
|
||||
* Add range header if we have some passed range
|
||||
* or if we want to retry GET request on purpose.
|
||||
*/
|
||||
return range.begin || range.end || retry_with_range_header;
|
||||
}
|
||||
|
||||
template <typename UpdatableSessionPtr>
|
||||
size_t ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::getRangeBegin() const { return read_range.begin.value_or(0); }
|
||||
|
||||
template <typename UpdatableSessionPtr>
|
||||
size_t ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::getOffset() const { return getRangeBegin() + offset_from_begin_pos; }
|
||||
|
||||
template <typename UpdatableSessionPtr>
|
||||
std::istream * ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::callImpl(
|
||||
UpdatableSessionPtr & current_session, Poco::URI uri_, Poco::Net::HTTPResponse & response,
|
||||
const std::string & method_, bool for_object_info)
|
||||
{
|
||||
// With empty path poco will send "POST HTTP/1.1" its bug.
|
||||
if (uri_.getPath().empty())
|
||||
uri_.setPath("/");
|
||||
|
||||
Poco::Net::HTTPRequest request(method_, uri_.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1);
|
||||
request.setHost(uri_.getHost()); // use original, not resolved host name in header
|
||||
|
||||
if (out_stream_callback)
|
||||
request.setChunkedTransferEncoding(true);
|
||||
else if (method == Poco::Net::HTTPRequest::HTTP_POST)
|
||||
request.setContentLength(0); /// No callback - no body
|
||||
|
||||
for (auto & [header, value] : http_header_entries)
|
||||
request.set(header, value);
|
||||
|
||||
std::optional<HTTPRange> range;
|
||||
if (!for_object_info)
|
||||
{
|
||||
if (withPartialContent(read_range))
|
||||
range = HTTPRange{getOffset(), read_range.end};
|
||||
}
|
||||
|
||||
if (range)
|
||||
{
|
||||
String range_header_value;
|
||||
if (range->end)
|
||||
range_header_value = fmt::format("bytes={}-{}", *range->begin, *range->end);
|
||||
else
|
||||
range_header_value = fmt::format("bytes={}-", *range->begin);
|
||||
LOG_TEST(log, "Adding header: Range: {}", range_header_value);
|
||||
request.set("Range", range_header_value);
|
||||
}
|
||||
|
||||
if (!credentials.getUsername().empty())
|
||||
credentials.authenticate(request);
|
||||
|
||||
LOG_TRACE(log, "Sending request to {}", uri_.toString());
|
||||
|
||||
auto sess = current_session->getSession();
|
||||
try
|
||||
{
|
||||
auto & stream_out = sess->sendRequest(request);
|
||||
|
||||
if (out_stream_callback)
|
||||
out_stream_callback(stream_out);
|
||||
|
||||
auto result_istr = receiveResponse(*sess, request, response, true);
|
||||
response.getCookies(cookies);
|
||||
|
||||
/// we can fetch object info while the request is being processed
|
||||
/// and we don't want to override any context used by it
|
||||
if (!for_object_info)
|
||||
content_encoding = response.get("Content-Encoding", "");
|
||||
|
||||
return result_istr;
|
||||
}
|
||||
catch (const Poco::Exception & e)
|
||||
{
|
||||
/// We use session data storage as storage for exception text
|
||||
/// Depend on it we can deduce to reconnect session or reresolve session host
|
||||
sess->attachSessionData(e.message());
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename UpdatableSessionPtr>
|
||||
size_t ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::getFileSize()
|
||||
{
|
||||
if (!file_info)
|
||||
file_info = getFileInfo();
|
||||
|
||||
if (file_info->file_size)
|
||||
return *file_info->file_size;
|
||||
|
||||
throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size for: {}", uri.toString());
|
||||
}
|
||||
|
||||
template <typename UpdatableSessionPtr>
|
||||
bool ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::checkIfActuallySeekable()
|
||||
{
|
||||
if (!file_info)
|
||||
file_info = getFileInfo();
|
||||
return file_info->seekable;
|
||||
}
|
||||
|
||||
template <typename UpdatableSessionPtr>
|
||||
String ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::getFileName() const { return uri.toString(); }
|
||||
|
||||
template <typename UpdatableSessionPtr>
|
||||
void ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::getHeadResponse(Poco::Net::HTTPResponse & response)
|
||||
{
|
||||
for (size_t i = 0; i < settings.http_max_tries; ++i)
|
||||
{
|
||||
try
|
||||
{
|
||||
callWithRedirects(response, Poco::Net::HTTPRequest::HTTP_HEAD, true, true);
|
||||
break;
|
||||
}
|
||||
catch (const Poco::Exception & e)
|
||||
{
|
||||
if (i == settings.http_max_tries - 1 || !isRetriableError(response.getStatus()))
|
||||
throw;
|
||||
|
||||
LOG_ERROR(log, "Failed to make HTTP_HEAD request to {}. Error: {}", uri.toString(), e.displayText());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename UpdatableSessionPtr>
|
||||
void ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::setupExternalBuffer()
|
||||
{
|
||||
/**
|
||||
* use_external_buffer -- means we read into the buffer which
|
||||
* was passed to us from somewhere else. We do not check whether
|
||||
* previously returned buffer was read or not (no hasPendingData() check is needed),
|
||||
* because this branch means we are prefetching data,
|
||||
* each nextImpl() call we can fill a different buffer.
|
||||
*/
|
||||
impl->set(internal_buffer.begin(), internal_buffer.size());
|
||||
assert(working_buffer.begin() != nullptr);
|
||||
assert(!internal_buffer.empty());
|
||||
}
|
||||
|
||||
template <typename UpdatableSessionPtr>
|
||||
ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::ReadWriteBufferFromHTTPBase(
|
||||
UpdatableSessionPtr session_,
|
||||
Poco::URI uri_,
|
||||
const Poco::Net::HTTPBasicCredentials & credentials_,
|
||||
const std::string & method_,
|
||||
OutStreamCallback out_stream_callback_,
|
||||
size_t buffer_size_,
|
||||
const ReadSettings & settings_,
|
||||
HTTPHeaderEntries http_header_entries_,
|
||||
const RemoteHostFilter * remote_host_filter_,
|
||||
bool delay_initialization,
|
||||
bool use_external_buffer_,
|
||||
bool http_skip_not_found_url_,
|
||||
std::optional<HTTPFileInfo> file_info_)
|
||||
: SeekableReadBuffer(nullptr, 0)
|
||||
, uri {uri_}
|
||||
, method {!method_.empty() ? method_ : out_stream_callback_ ? Poco::Net::HTTPRequest::HTTP_POST : Poco::Net::HTTPRequest::HTTP_GET}
|
||||
, session {session_}
|
||||
, out_stream_callback {out_stream_callback_}
|
||||
, credentials {credentials_}
|
||||
, http_header_entries {std::move(http_header_entries_)}
|
||||
, remote_host_filter {remote_host_filter_}
|
||||
, buffer_size {buffer_size_}
|
||||
, use_external_buffer {use_external_buffer_}
|
||||
, file_info(file_info_)
|
||||
, http_skip_not_found_url(http_skip_not_found_url_)
|
||||
, settings {settings_}
|
||||
, log(&Poco::Logger::get("ReadWriteBufferFromHTTP"))
|
||||
{
|
||||
if (settings.http_max_tries <= 0 || settings.http_retry_initial_backoff_ms <= 0
|
||||
|| settings.http_retry_initial_backoff_ms >= settings.http_retry_max_backoff_ms)
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"Invalid setting for http backoff, "
|
||||
"must be http_max_tries >= 1 (current is {}) and "
|
||||
"0 < http_retry_initial_backoff_ms < settings.http_retry_max_backoff_ms (now 0 < {} < {})",
|
||||
settings.http_max_tries,
|
||||
settings.http_retry_initial_backoff_ms,
|
||||
settings.http_retry_max_backoff_ms);
|
||||
|
||||
// Configure User-Agent if it not already set.
|
||||
const std::string user_agent = "User-Agent";
|
||||
auto iter = std::find_if(
|
||||
http_header_entries.begin(),
|
||||
http_header_entries.end(),
|
||||
[&user_agent](const HTTPHeaderEntry & entry) { return entry.name == user_agent; });
|
||||
|
||||
if (iter == http_header_entries.end())
|
||||
{
|
||||
http_header_entries.emplace_back("User-Agent", fmt::format("ClickHouse/{}", VERSION_STRING));
|
||||
}
|
||||
|
||||
if (!delay_initialization)
|
||||
{
|
||||
initialize();
|
||||
if (exception)
|
||||
std::rethrow_exception(exception);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename UpdatableSessionPtr>
|
||||
void ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::callWithRedirects(Poco::Net::HTTPResponse & response, const String & method_, bool throw_on_all_errors, bool for_object_info)
|
||||
{
|
||||
UpdatableSessionPtr current_session = nullptr;
|
||||
|
||||
/// we can fetch object info while the request is being processed
|
||||
/// and we don't want to override any context used by it
|
||||
if (for_object_info)
|
||||
current_session = session->clone(uri);
|
||||
else
|
||||
current_session = session;
|
||||
|
||||
call(current_session, response, method_, throw_on_all_errors, for_object_info);
|
||||
Poco::URI prev_uri = uri;
|
||||
|
||||
while (isRedirect(response.getStatus()))
|
||||
{
|
||||
Poco::URI uri_redirect = getUriAfterRedirect(prev_uri, response);
|
||||
prev_uri = uri_redirect;
|
||||
if (remote_host_filter)
|
||||
remote_host_filter->checkURL(uri_redirect);
|
||||
|
||||
current_session->updateSession(uri_redirect);
|
||||
|
||||
/// we can fetch object info while the request is being processed
|
||||
/// and we don't want to override any context used by it
|
||||
auto result_istr = callImpl(current_session, uri_redirect, response, method, for_object_info);
|
||||
if (!for_object_info)
|
||||
istr = result_istr;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename UpdatableSessionPtr>
|
||||
void ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::call(UpdatableSessionPtr & current_session, Poco::Net::HTTPResponse & response, const String & method_, bool throw_on_all_errors, bool for_object_info)
|
||||
{
|
||||
try
|
||||
{
|
||||
/// we can fetch object info while the request is being processed
|
||||
/// and we don't want to override any context used by it
|
||||
auto result_istr = callImpl(current_session, saved_uri_redirect ? *saved_uri_redirect : uri, response, method_, for_object_info);
|
||||
if (!for_object_info)
|
||||
istr = result_istr;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
/// we can fetch object info while the request is being processed
|
||||
/// and we don't want to override any context used by it
|
||||
if (for_object_info)
|
||||
throw;
|
||||
|
||||
if (throw_on_all_errors)
|
||||
throw;
|
||||
|
||||
auto http_status = response.getStatus();
|
||||
|
||||
if (http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND && http_skip_not_found_url)
|
||||
{
|
||||
initialization_error = InitializeError::SKIP_NOT_FOUND_URL;
|
||||
}
|
||||
else if (!isRetriableError(http_status))
|
||||
{
|
||||
initialization_error = InitializeError::NON_RETRYABLE_ERROR;
|
||||
exception = std::current_exception();
|
||||
}
|
||||
else
|
||||
{
|
||||
throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename UpdatableSessionPtr>
|
||||
void ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::initialize()
|
||||
{
|
||||
Poco::Net::HTTPResponse response;
|
||||
|
||||
call(session, response, method);
|
||||
if (initialization_error != InitializeError::NONE)
|
||||
return;
|
||||
|
||||
while (isRedirect(response.getStatus()))
|
||||
{
|
||||
Poco::URI uri_redirect = getUriAfterRedirect(saved_uri_redirect.value_or(uri), response);
|
||||
if (remote_host_filter)
|
||||
remote_host_filter->checkURL(uri_redirect);
|
||||
|
||||
session->updateSession(uri_redirect);
|
||||
|
||||
istr = callImpl(session, uri_redirect, response, method);
|
||||
saved_uri_redirect = uri_redirect;
|
||||
}
|
||||
|
||||
if (response.hasContentLength())
|
||||
LOG_DEBUG(log, "Received response with content length: {}", response.getContentLength());
|
||||
|
||||
if (withPartialContent(read_range) && response.getStatus() != Poco::Net::HTTPResponse::HTTPStatus::HTTP_PARTIAL_CONTENT)
|
||||
{
|
||||
/// Having `200 OK` instead of `206 Partial Content` is acceptable in case we retried with range.begin == 0.
|
||||
if (getOffset() != 0)
|
||||
{
|
||||
if (!exception)
|
||||
{
|
||||
exception = std::make_exception_ptr(Exception(
|
||||
ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE,
|
||||
"Cannot read with range: [{}, {}] (response status: {}, reason: {})",
|
||||
*read_range.begin,
|
||||
read_range.end ? toString(*read_range.end) : "-",
|
||||
toString(response.getStatus()), response.getReason()));
|
||||
}
|
||||
|
||||
/// Retry 200OK
|
||||
if (response.getStatus() == Poco::Net::HTTPResponse::HTTPStatus::HTTP_OK)
|
||||
initialization_error = InitializeError::RETRYABLE_ERROR;
|
||||
else
|
||||
initialization_error = InitializeError::NON_RETRYABLE_ERROR;
|
||||
|
||||
return;
|
||||
}
|
||||
else if (read_range.end)
|
||||
{
|
||||
/// We could have range.begin == 0 and range.end != 0 in case of DiskWeb and failing to read with partial content
|
||||
/// will affect only performance, so a warning is enough.
|
||||
LOG_WARNING(log, "Unable to read with range header: [{}, {}]", getRangeBegin(), *read_range.end);
|
||||
}
|
||||
}
|
||||
|
||||
// Remember file size. It'll be used to report eof in next nextImpl() call.
|
||||
if (!read_range.end && response.hasContentLength())
|
||||
file_info = parseFileInfo(response, withPartialContent(read_range) ? getOffset() : 0);
|
||||
|
||||
try
|
||||
{
|
||||
impl = std::make_unique<ReadBufferFromIStream>(*istr, buffer_size);
|
||||
|
||||
if (use_external_buffer)
|
||||
{
|
||||
setupExternalBuffer();
|
||||
}
|
||||
}
|
||||
catch (const Poco::Exception & e)
|
||||
{
|
||||
/// We use session data storage as storage for exception text
|
||||
/// Depend on it we can deduce to reconnect session or reresolve session host
|
||||
auto sess = session->getSession();
|
||||
sess->attachSessionData(e.message());
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename UpdatableSessionPtr>
|
||||
bool ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::nextImpl()
|
||||
{
|
||||
if (initialization_error == InitializeError::SKIP_NOT_FOUND_URL)
|
||||
return false;
|
||||
assert(initialization_error == InitializeError::NONE);
|
||||
|
||||
if (next_callback)
|
||||
next_callback(count());
|
||||
|
||||
if ((read_range.end && getOffset() > read_range.end.value()) ||
|
||||
(file_info && file_info->file_size && getOffset() >= file_info->file_size.value()))
|
||||
return false;
|
||||
|
||||
if (impl)
|
||||
{
|
||||
if (use_external_buffer)
|
||||
{
|
||||
setupExternalBuffer();
|
||||
}
|
||||
else
|
||||
{
|
||||
/**
|
||||
* impl was initialized before, pass position() to it to make
|
||||
* sure there is no pending data which was not read.
|
||||
*/
|
||||
if (!working_buffer.empty())
|
||||
impl->position() = position();
|
||||
}
|
||||
}
|
||||
|
||||
bool result = false;
|
||||
size_t milliseconds_to_wait = settings.http_retry_initial_backoff_ms;
|
||||
bool last_attempt = false;
|
||||
|
||||
auto on_retriable_error = [&]()
|
||||
{
|
||||
retry_with_range_header = true;
|
||||
impl.reset();
|
||||
auto http_session = session->getSession();
|
||||
http_session->reset();
|
||||
if (!last_attempt)
|
||||
{
|
||||
sleepForMilliseconds(milliseconds_to_wait);
|
||||
milliseconds_to_wait = std::min(milliseconds_to_wait * 2, settings.http_retry_max_backoff_ms);
|
||||
}
|
||||
};
|
||||
|
||||
for (size_t i = 0;; ++i)
|
||||
{
|
||||
if (last_attempt)
|
||||
break;
|
||||
last_attempt = i + 1 >= settings.http_max_tries;
|
||||
|
||||
exception = nullptr;
|
||||
initialization_error = InitializeError::NONE;
|
||||
|
||||
try
|
||||
{
|
||||
if (!impl)
|
||||
{
|
||||
initialize();
|
||||
|
||||
if (initialization_error == InitializeError::NON_RETRYABLE_ERROR)
|
||||
{
|
||||
assert(exception);
|
||||
break;
|
||||
}
|
||||
else if (initialization_error == InitializeError::SKIP_NOT_FOUND_URL)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
else if (initialization_error == InitializeError::RETRYABLE_ERROR)
|
||||
{
|
||||
LOG_ERROR(
|
||||
log,
|
||||
"HTTP request to `{}` failed at try {}/{} with bytes read: {}/{}. "
|
||||
"(Current backoff wait is {}/{} ms)",
|
||||
uri.toString(), i + 1, settings.http_max_tries, getOffset(),
|
||||
read_range.end ? toString(*read_range.end) : "unknown",
|
||||
milliseconds_to_wait, settings.http_retry_max_backoff_ms);
|
||||
|
||||
assert(exception);
|
||||
on_retriable_error();
|
||||
continue;
|
||||
}
|
||||
|
||||
assert(!exception);
|
||||
|
||||
if (use_external_buffer)
|
||||
{
|
||||
setupExternalBuffer();
|
||||
}
|
||||
}
|
||||
|
||||
result = impl->next();
|
||||
exception = nullptr;
|
||||
break;
|
||||
}
|
||||
catch (const Poco::Exception & e)
|
||||
{
|
||||
/// Too many open files - non-retryable.
|
||||
if (e.code() == POCO_EMFILE)
|
||||
throw;
|
||||
|
||||
/** Retry request unconditionally if nothing has been read yet.
|
||||
* Otherwise if it is GET method retry with range header.
|
||||
*/
|
||||
bool can_retry_request = !offset_from_begin_pos || method == Poco::Net::HTTPRequest::HTTP_GET;
|
||||
if (!can_retry_request)
|
||||
throw;
|
||||
|
||||
LOG_ERROR(
|
||||
log,
|
||||
"HTTP request to `{}` failed at try {}/{} with bytes read: {}/{}. "
|
||||
"Error: {}. (Current backoff wait is {}/{} ms)",
|
||||
uri.toString(),
|
||||
i + 1,
|
||||
settings.http_max_tries,
|
||||
getOffset(),
|
||||
read_range.end ? toString(*read_range.end) : "unknown",
|
||||
e.displayText(),
|
||||
milliseconds_to_wait,
|
||||
settings.http_retry_max_backoff_ms);
|
||||
|
||||
on_retriable_error();
|
||||
exception = std::current_exception();
|
||||
}
|
||||
}
|
||||
|
||||
if (exception)
|
||||
std::rethrow_exception(exception);
|
||||
|
||||
if (!result)
|
||||
return false;
|
||||
|
||||
internal_buffer = impl->buffer();
|
||||
working_buffer = internal_buffer;
|
||||
offset_from_begin_pos += working_buffer.size();
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename UpdatableSessionPtr>
|
||||
off_t ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::getPosition() { return getOffset() - available(); }
|
||||
|
||||
template <typename UpdatableSessionPtr>
|
||||
off_t ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::seek(off_t offset_, int whence)
|
||||
{
|
||||
if (whence != SEEK_SET)
|
||||
throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Only SEEK_SET mode is allowed.");
|
||||
|
||||
if (offset_ < 0)
|
||||
throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Seek position is out of bounds. Offset: {}",
|
||||
offset_);
|
||||
|
||||
off_t current_offset = getOffset();
|
||||
if (!working_buffer.empty() && size_t(offset_) >= current_offset - working_buffer.size() && offset_ < current_offset)
|
||||
{
|
||||
pos = working_buffer.end() - (current_offset - offset_);
|
||||
assert(pos >= working_buffer.begin());
|
||||
assert(pos < working_buffer.end());
|
||||
|
||||
return getPosition();
|
||||
}
|
||||
|
||||
if (impl)
|
||||
{
|
||||
auto position = getPosition();
|
||||
if (offset_ > position)
|
||||
{
|
||||
size_t diff = offset_ - position;
|
||||
if (diff < settings.remote_read_min_bytes_for_seek)
|
||||
{
|
||||
ignore(diff);
|
||||
return offset_;
|
||||
}
|
||||
}
|
||||
|
||||
if (!atEndOfRequestedRangeGuess())
|
||||
ProfileEvents::increment(ProfileEvents::ReadBufferSeekCancelConnection);
|
||||
impl.reset();
|
||||
}
|
||||
|
||||
resetWorkingBuffer();
|
||||
read_range.begin = offset_;
|
||||
offset_from_begin_pos = 0;
|
||||
|
||||
return offset_;
|
||||
}
|
||||
|
||||
template <typename UpdatableSessionPtr>
|
||||
void ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::setReadUntilPosition(size_t until)
|
||||
{
|
||||
until = std::max(until, 1ul);
|
||||
if (read_range.end && *read_range.end + 1 == until)
|
||||
return;
|
||||
read_range.end = until - 1;
|
||||
read_range.begin = getPosition();
|
||||
resetWorkingBuffer();
|
||||
if (impl)
|
||||
{
|
||||
if (!atEndOfRequestedRangeGuess())
|
||||
ProfileEvents::increment(ProfileEvents::ReadBufferSeekCancelConnection);
|
||||
impl.reset();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename UpdatableSessionPtr>
|
||||
void ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::setReadUntilEnd()
|
||||
{
|
||||
if (!read_range.end)
|
||||
return;
|
||||
read_range.end.reset();
|
||||
read_range.begin = getPosition();
|
||||
resetWorkingBuffer();
|
||||
if (impl)
|
||||
{
|
||||
if (!atEndOfRequestedRangeGuess())
|
||||
ProfileEvents::increment(ProfileEvents::ReadBufferSeekCancelConnection);
|
||||
impl.reset();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename UpdatableSessionPtr>
|
||||
bool ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::supportsRightBoundedReads() const { return true; }
|
||||
|
||||
template <typename UpdatableSessionPtr>
|
||||
bool ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::atEndOfRequestedRangeGuess()
|
||||
{
|
||||
if (!impl)
|
||||
return true;
|
||||
if (read_range.end)
|
||||
return getPosition() > static_cast<off_t>(*read_range.end);
|
||||
if (file_info && file_info->file_size)
|
||||
return getPosition() >= static_cast<off_t>(*file_info->file_size);
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename UpdatableSessionPtr>
|
||||
std::string ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::getResponseCookie(const std::string & name, const std::string & def) const
|
||||
{
|
||||
for (const auto & cookie : cookies)
|
||||
if (cookie.getName() == name)
|
||||
return cookie.getValue();
|
||||
return def;
|
||||
}
|
||||
|
||||
template <typename UpdatableSessionPtr>
|
||||
void ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::setNextCallback(NextCallback next_callback_)
|
||||
{
|
||||
next_callback = next_callback_;
|
||||
/// Some data maybe already read
|
||||
next_callback(count());
|
||||
}
|
||||
|
||||
template <typename UpdatableSessionPtr>
|
||||
const std::string & ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::getCompressionMethod() const { return content_encoding; }
|
||||
|
||||
template <typename UpdatableSessionPtr>
|
||||
std::optional<time_t> ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::getLastModificationTime()
|
||||
{
|
||||
return getFileInfo().last_modified;
|
||||
}
|
||||
|
||||
template <typename UpdatableSessionPtr>
|
||||
HTTPFileInfo ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::getFileInfo()
|
||||
{
|
||||
Poco::Net::HTTPResponse response;
|
||||
try
|
||||
{
|
||||
getHeadResponse(response);
|
||||
}
|
||||
catch (HTTPException & e)
|
||||
{
|
||||
/// Maybe the web server doesn't support HEAD requests.
|
||||
/// E.g. webhdfs reports status 400.
|
||||
/// We should proceed in hopes that the actual GET request will succeed.
|
||||
/// (Unless the error in transient. Don't want to nondeterministically sometimes
|
||||
/// fall back to slow whole-file reads when HEAD is actually supported; that sounds
|
||||
/// like a nightmare to debug.)
|
||||
if (e.getHTTPStatus() >= 400 && e.getHTTPStatus() <= 499 &&
|
||||
e.getHTTPStatus() != Poco::Net::HTTPResponse::HTTP_TOO_MANY_REQUESTS)
|
||||
return HTTPFileInfo{};
|
||||
|
||||
throw;
|
||||
}
|
||||
return parseFileInfo(response, 0);
|
||||
}
|
||||
|
||||
template <typename UpdatableSessionPtr>
|
||||
HTTPFileInfo ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::parseFileInfo(const Poco::Net::HTTPResponse & response, size_t requested_range_begin)
|
||||
{
|
||||
HTTPFileInfo res;
|
||||
|
||||
if (response.hasContentLength())
|
||||
{
|
||||
res.file_size = response.getContentLength();
|
||||
|
||||
if (response.getStatus() == Poco::Net::HTTPResponse::HTTPStatus::HTTP_PARTIAL_CONTENT)
|
||||
{
|
||||
*res.file_size += requested_range_begin;
|
||||
res.seekable = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
res.seekable = response.has("Accept-Ranges") && response.get("Accept-Ranges") == "bytes";
|
||||
}
|
||||
}
|
||||
|
||||
if (response.has("Last-Modified"))
|
||||
{
|
||||
String date_str = response.get("Last-Modified");
|
||||
struct tm info;
|
||||
char * end = strptime(date_str.data(), "%a, %d %b %Y %H:%M:%S %Z", &info);
|
||||
if (end == date_str.data() + date_str.size())
|
||||
res.last_modified = timegm(&info);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
SessionFactory::SessionFactory(const ConnectionTimeouts & timeouts_)
|
||||
: timeouts(timeouts_) {}
|
||||
|
||||
SessionFactory::SessionType SessionFactory::buildNewSession(const Poco::URI & uri)
|
||||
{
|
||||
return makeHTTPSession(uri, timeouts);
|
||||
}
|
||||
|
||||
ReadWriteBufferFromHTTP::ReadWriteBufferFromHTTP(
|
||||
Poco::URI uri_,
|
||||
const std::string & method_,
|
||||
OutStreamCallback out_stream_callback_,
|
||||
const ConnectionTimeouts & timeouts,
|
||||
const Poco::Net::HTTPBasicCredentials & credentials_,
|
||||
const UInt64 max_redirects,
|
||||
size_t buffer_size_,
|
||||
const ReadSettings & settings_,
|
||||
const HTTPHeaderEntries & http_header_entries_,
|
||||
const RemoteHostFilter * remote_host_filter_,
|
||||
bool delay_initialization_,
|
||||
bool use_external_buffer_,
|
||||
bool skip_not_found_url_,
|
||||
std::optional<HTTPFileInfo> file_info_)
|
||||
: Parent(
|
||||
std::make_shared<SessionType>(uri_, max_redirects, std::make_shared<SessionFactory>(timeouts)),
|
||||
uri_,
|
||||
credentials_,
|
||||
method_,
|
||||
out_stream_callback_,
|
||||
buffer_size_,
|
||||
settings_,
|
||||
http_header_entries_,
|
||||
remote_host_filter_,
|
||||
delay_initialization_,
|
||||
use_external_buffer_,
|
||||
skip_not_found_url_,
|
||||
file_info_) {}
|
||||
|
||||
RangedReadWriteBufferFromHTTPFactory::RangedReadWriteBufferFromHTTPFactory(
|
||||
Poco::URI uri_,
|
||||
std::string method_,
|
||||
OutStreamCallback out_stream_callback_,
|
||||
ConnectionTimeouts timeouts_,
|
||||
const Poco::Net::HTTPBasicCredentials & credentials_,
|
||||
UInt64 max_redirects_,
|
||||
size_t buffer_size_,
|
||||
ReadSettings settings_,
|
||||
HTTPHeaderEntries http_header_entries_,
|
||||
const RemoteHostFilter * remote_host_filter_,
|
||||
bool delay_initialization_,
|
||||
bool use_external_buffer_,
|
||||
bool skip_not_found_url_)
|
||||
: uri(uri_)
|
||||
, method(std::move(method_))
|
||||
, out_stream_callback(out_stream_callback_)
|
||||
, timeouts(std::move(timeouts_))
|
||||
, credentials(credentials_)
|
||||
, max_redirects(max_redirects_)
|
||||
, buffer_size(buffer_size_)
|
||||
, settings(std::move(settings_))
|
||||
, http_header_entries(std::move(http_header_entries_))
|
||||
, remote_host_filter(remote_host_filter_)
|
||||
, delay_initialization(delay_initialization_)
|
||||
, use_external_buffer(use_external_buffer_)
|
||||
, skip_not_found_url(skip_not_found_url_) {}
|
||||
|
||||
std::unique_ptr<SeekableReadBuffer> RangedReadWriteBufferFromHTTPFactory::getReader()
|
||||
{
|
||||
return std::make_unique<ReadWriteBufferFromHTTP>(
|
||||
uri,
|
||||
method,
|
||||
out_stream_callback,
|
||||
timeouts,
|
||||
credentials,
|
||||
max_redirects,
|
||||
buffer_size,
|
||||
settings,
|
||||
http_header_entries,
|
||||
remote_host_filter,
|
||||
delay_initialization,
|
||||
use_external_buffer,
|
||||
skip_not_found_url,
|
||||
file_info);
|
||||
}
|
||||
|
||||
size_t RangedReadWriteBufferFromHTTPFactory::getFileSize()
|
||||
{
|
||||
auto s = getFileInfo().file_size;
|
||||
if (!s)
|
||||
throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size for: {}", uri.toString());
|
||||
return *s;
|
||||
}
|
||||
|
||||
bool RangedReadWriteBufferFromHTTPFactory::checkIfActuallySeekable()
|
||||
{
|
||||
return getFileInfo().seekable;
|
||||
}
|
||||
|
||||
HTTPFileInfo RangedReadWriteBufferFromHTTPFactory::getFileInfo()
|
||||
{
|
||||
if (!file_info)
|
||||
file_info = static_cast<ReadWriteBufferFromHTTP*>(getReader().get())->getFileInfo();
|
||||
return *file_info;
|
||||
}
|
||||
|
||||
String RangedReadWriteBufferFromHTTPFactory::getFileName() const { return uri.toString(); }
|
||||
|
||||
|
||||
PooledSessionFactory::PooledSessionFactory(
|
||||
const ConnectionTimeouts & timeouts_, size_t per_endpoint_pool_size_)
|
||||
: timeouts(timeouts_)
|
||||
, per_endpoint_pool_size(per_endpoint_pool_size_) {}
|
||||
|
||||
PooledSessionFactory::SessionType PooledSessionFactory::buildNewSession(const Poco::URI & uri)
|
||||
{
|
||||
return makePooledHTTPSession(uri, timeouts, per_endpoint_pool_size);
|
||||
}
|
||||
|
||||
|
||||
PooledReadWriteBufferFromHTTP::PooledReadWriteBufferFromHTTP(
|
||||
Poco::URI uri_,
|
||||
const std::string & method_,
|
||||
OutStreamCallback out_stream_callback_,
|
||||
const ConnectionTimeouts & timeouts_,
|
||||
const Poco::Net::HTTPBasicCredentials & credentials_,
|
||||
size_t buffer_size_,
|
||||
const UInt64 max_redirects,
|
||||
size_t max_connections_per_endpoint)
|
||||
: Parent(
|
||||
std::make_shared<SessionType>(uri_, max_redirects, std::make_shared<PooledSessionFactory>(timeouts_, max_connections_per_endpoint)),
|
||||
uri_,
|
||||
credentials_,
|
||||
method_,
|
||||
out_stream_callback_,
|
||||
buffer_size_) {}
|
||||
|
||||
template class UpdatableSession<SessionFactory>;
|
||||
template class UpdatableSession<PooledSessionFactory>;
|
||||
template class detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession<SessionFactory>>>;
|
||||
template class detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession<PooledSessionFactory>>>;
|
||||
|
||||
}
|
@ -27,25 +27,8 @@
|
||||
|
||||
#include <filesystem>
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event ReadBufferSeekCancelConnection;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
/** Perform HTTP POST request and provide response to read.
|
||||
*/
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int TOO_MANY_REDIRECTS;
|
||||
extern const int HTTP_RANGE_NOT_SATISFIABLE;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int CANNOT_SEEK_THROUGH_FILE;
|
||||
extern const int SEEK_POSITION_OUT_OF_BOUND;
|
||||
extern const int UNKNOWN_FILE_SIZE;
|
||||
}
|
||||
|
||||
template <typename TSessionFactory>
|
||||
class UpdatableSession
|
||||
@ -53,29 +36,14 @@ class UpdatableSession
|
||||
public:
|
||||
using SessionPtr = typename TSessionFactory::SessionType;
|
||||
|
||||
explicit UpdatableSession(const Poco::URI & uri, UInt64 max_redirects_, std::shared_ptr<TSessionFactory> session_factory_)
|
||||
: max_redirects{max_redirects_}
|
||||
, initial_uri(uri)
|
||||
, session_factory(std::move(session_factory_))
|
||||
{
|
||||
session = session_factory->buildNewSession(uri);
|
||||
}
|
||||
explicit UpdatableSession(const Poco::URI & uri, UInt64 max_redirects_, std::shared_ptr<TSessionFactory> session_factory_);
|
||||
|
||||
SessionPtr getSession() { return session; }
|
||||
SessionPtr getSession();
|
||||
|
||||
void updateSession(const Poco::URI & uri)
|
||||
{
|
||||
++redirects;
|
||||
if (redirects <= max_redirects)
|
||||
session = session_factory->buildNewSession(uri);
|
||||
else
|
||||
throw Exception(ErrorCodes::TOO_MANY_REDIRECTS, "Too many redirects while trying to access {}", initial_uri.toString());
|
||||
}
|
||||
void updateSession(const Poco::URI & uri);
|
||||
|
||||
std::shared_ptr<UpdatableSession<TSessionFactory>> clone(const Poco::URI & uri);
|
||||
|
||||
std::shared_ptr<UpdatableSession<TSessionFactory>> clone(const Poco::URI & uri)
|
||||
{
|
||||
return std::make_shared<UpdatableSession<TSessionFactory>>(uri, max_redirects, session_factory);
|
||||
}
|
||||
private:
|
||||
SessionPtr session;
|
||||
UInt64 redirects{0};
|
||||
@ -85,29 +53,29 @@ private:
|
||||
};
|
||||
|
||||
|
||||
/// Information from HTTP response header.
|
||||
struct HTTPFileInfo
|
||||
{
|
||||
// nullopt if the server doesn't report it.
|
||||
std::optional<size_t> file_size;
|
||||
std::optional<time_t> last_modified;
|
||||
bool seekable = false;
|
||||
};
|
||||
|
||||
|
||||
namespace detail
|
||||
{
|
||||
/// Byte range, including right bound [begin, end].
|
||||
struct HTTPRange
|
||||
{
|
||||
std::optional<size_t> begin;
|
||||
std::optional<size_t> end;
|
||||
};
|
||||
|
||||
template <typename UpdatableSessionPtr>
|
||||
class ReadWriteBufferFromHTTPBase : public SeekableReadBuffer, public WithFileName, public WithFileSize
|
||||
{
|
||||
public:
|
||||
/// Information from HTTP response header.
|
||||
struct FileInfo
|
||||
{
|
||||
// nullopt if the server doesn't report it.
|
||||
std::optional<size_t> file_size;
|
||||
std::optional<time_t> last_modified;
|
||||
bool seekable = false;
|
||||
};
|
||||
|
||||
protected:
|
||||
/// HTTP range, including right bound [begin, end].
|
||||
struct Range
|
||||
{
|
||||
std::optional<size_t> begin;
|
||||
std::optional<size_t> end;
|
||||
};
|
||||
|
||||
Poco::URI uri;
|
||||
std::string method;
|
||||
std::string content_encoding;
|
||||
@ -126,8 +94,8 @@ namespace detail
|
||||
bool use_external_buffer;
|
||||
|
||||
size_t offset_from_begin_pos = 0;
|
||||
Range read_range;
|
||||
std::optional<FileInfo> file_info;
|
||||
HTTPRange read_range;
|
||||
std::optional<HTTPFileInfo> file_info;
|
||||
|
||||
/// Delayed exception in case retries with partial content are not satisfiable.
|
||||
std::exception_ptr exception;
|
||||
@ -140,106 +108,19 @@ namespace detail
|
||||
ReadSettings settings;
|
||||
Poco::Logger * log;
|
||||
|
||||
bool withPartialContent(const Range & range) const
|
||||
{
|
||||
/**
|
||||
* Add range header if we have some passed range
|
||||
* or if we want to retry GET request on purpose.
|
||||
*/
|
||||
return range.begin || range.end || retry_with_range_header;
|
||||
}
|
||||
bool withPartialContent(const HTTPRange & range) const;
|
||||
|
||||
size_t getRangeBegin() const { return read_range.begin.value_or(0); }
|
||||
size_t getRangeBegin() const;
|
||||
|
||||
size_t getOffset() const { return getRangeBegin() + offset_from_begin_pos; }
|
||||
size_t getOffset() const;
|
||||
|
||||
template <bool for_object_info = false>
|
||||
std::istream * callImpl(UpdatableSessionPtr & current_session, Poco::URI uri_, Poco::Net::HTTPResponse & response, const std::string & method_)
|
||||
{
|
||||
// With empty path poco will send "POST HTTP/1.1" its bug.
|
||||
if (uri_.getPath().empty())
|
||||
uri_.setPath("/");
|
||||
std::istream * callImpl(UpdatableSessionPtr & current_session, Poco::URI uri_, Poco::Net::HTTPResponse & response, const std::string & method_, bool for_object_info = false);
|
||||
|
||||
Poco::Net::HTTPRequest request(method_, uri_.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1);
|
||||
request.setHost(uri_.getHost()); // use original, not resolved host name in header
|
||||
size_t getFileSize() override;
|
||||
|
||||
if (out_stream_callback)
|
||||
request.setChunkedTransferEncoding(true);
|
||||
else if (method == Poco::Net::HTTPRequest::HTTP_POST)
|
||||
request.setContentLength(0); /// No callback - no body
|
||||
bool checkIfActuallySeekable() override;
|
||||
|
||||
for (auto & [header, value] : http_header_entries)
|
||||
request.set(header, value);
|
||||
|
||||
std::optional<Range> range;
|
||||
if constexpr (!for_object_info)
|
||||
{
|
||||
if (withPartialContent(read_range))
|
||||
range = Range{getOffset(), read_range.end};
|
||||
}
|
||||
|
||||
if (range)
|
||||
{
|
||||
String range_header_value;
|
||||
if (range->end)
|
||||
range_header_value = fmt::format("bytes={}-{}", *range->begin, *range->end);
|
||||
else
|
||||
range_header_value = fmt::format("bytes={}-", *range->begin);
|
||||
LOG_TEST(log, "Adding header: Range: {}", range_header_value);
|
||||
request.set("Range", range_header_value);
|
||||
}
|
||||
|
||||
if (!credentials.getUsername().empty())
|
||||
credentials.authenticate(request);
|
||||
|
||||
LOG_TRACE(log, "Sending request to {}", uri_.toString());
|
||||
|
||||
auto sess = current_session->getSession();
|
||||
try
|
||||
{
|
||||
auto & stream_out = sess->sendRequest(request);
|
||||
|
||||
if (out_stream_callback)
|
||||
out_stream_callback(stream_out);
|
||||
|
||||
auto result_istr = receiveResponse(*sess, request, response, true);
|
||||
response.getCookies(cookies);
|
||||
|
||||
/// we can fetch object info while the request is being processed
|
||||
/// and we don't want to override any context used by it
|
||||
if constexpr (!for_object_info)
|
||||
content_encoding = response.get("Content-Encoding", "");
|
||||
|
||||
return result_istr;
|
||||
}
|
||||
catch (const Poco::Exception & e)
|
||||
{
|
||||
/// We use session data storage as storage for exception text
|
||||
/// Depend on it we can deduce to reconnect session or reresolve session host
|
||||
sess->attachSessionData(e.message());
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
size_t getFileSize() override
|
||||
{
|
||||
if (!file_info)
|
||||
file_info = getFileInfo();
|
||||
|
||||
if (file_info->file_size)
|
||||
return *file_info->file_size;
|
||||
|
||||
throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size for: {}", uri.toString());
|
||||
}
|
||||
|
||||
bool checkIfActuallySeekable() override
|
||||
{
|
||||
if (!file_info)
|
||||
file_info = getFileInfo();
|
||||
return file_info->seekable;
|
||||
}
|
||||
|
||||
String getFileName() const override { return uri.toString(); }
|
||||
String getFileName() const override;
|
||||
|
||||
enum class InitializeError
|
||||
{
|
||||
@ -254,38 +135,9 @@ namespace detail
|
||||
InitializeError initialization_error = InitializeError::NONE;
|
||||
|
||||
private:
|
||||
void getHeadResponse(Poco::Net::HTTPResponse & response)
|
||||
{
|
||||
for (size_t i = 0; i < settings.http_max_tries; ++i)
|
||||
{
|
||||
try
|
||||
{
|
||||
callWithRedirects<true>(response, Poco::Net::HTTPRequest::HTTP_HEAD, true);
|
||||
break;
|
||||
}
|
||||
catch (const Poco::Exception & e)
|
||||
{
|
||||
if (i == settings.http_max_tries - 1 || !isRetriableError(response.getStatus()))
|
||||
throw;
|
||||
void getHeadResponse(Poco::Net::HTTPResponse & response);
|
||||
|
||||
LOG_ERROR(log, "Failed to make HTTP_HEAD request to {}. Error: {}", uri.toString(), e.displayText());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void setupExternalBuffer()
|
||||
{
|
||||
/**
|
||||
* use_external_buffer -- means we read into the buffer which
|
||||
* was passed to us from somewhere else. We do not check whether
|
||||
* previously returned buffer was read or not (no hasPendingData() check is needed),
|
||||
* because this branch means we are prefetching data,
|
||||
* each nextImpl() call we can fill a different buffer.
|
||||
*/
|
||||
impl->set(internal_buffer.begin(), internal_buffer.size());
|
||||
assert(working_buffer.begin() != nullptr);
|
||||
assert(!internal_buffer.empty());
|
||||
}
|
||||
void setupExternalBuffer();
|
||||
|
||||
public:
|
||||
using NextCallback = std::function<void(size_t)>;
|
||||
@ -304,563 +156,60 @@ namespace detail
|
||||
bool delay_initialization = false,
|
||||
bool use_external_buffer_ = false,
|
||||
bool http_skip_not_found_url_ = false,
|
||||
std::optional<FileInfo> file_info_ = std::nullopt)
|
||||
: SeekableReadBuffer(nullptr, 0)
|
||||
, uri {uri_}
|
||||
, method {!method_.empty() ? method_ : out_stream_callback_ ? Poco::Net::HTTPRequest::HTTP_POST : Poco::Net::HTTPRequest::HTTP_GET}
|
||||
, session {session_}
|
||||
, out_stream_callback {out_stream_callback_}
|
||||
, credentials {credentials_}
|
||||
, http_header_entries {std::move(http_header_entries_)}
|
||||
, remote_host_filter {remote_host_filter_}
|
||||
, buffer_size {buffer_size_}
|
||||
, use_external_buffer {use_external_buffer_}
|
||||
, file_info(file_info_)
|
||||
, http_skip_not_found_url(http_skip_not_found_url_)
|
||||
, settings {settings_}
|
||||
, log(&Poco::Logger::get("ReadWriteBufferFromHTTP"))
|
||||
{
|
||||
if (settings.http_max_tries <= 0 || settings.http_retry_initial_backoff_ms <= 0
|
||||
|| settings.http_retry_initial_backoff_ms >= settings.http_retry_max_backoff_ms)
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"Invalid setting for http backoff, "
|
||||
"must be http_max_tries >= 1 (current is {}) and "
|
||||
"0 < http_retry_initial_backoff_ms < settings.http_retry_max_backoff_ms (now 0 < {} < {})",
|
||||
settings.http_max_tries,
|
||||
settings.http_retry_initial_backoff_ms,
|
||||
settings.http_retry_max_backoff_ms);
|
||||
std::optional<HTTPFileInfo> file_info_ = std::nullopt);
|
||||
|
||||
// Configure User-Agent if it not already set.
|
||||
const std::string user_agent = "User-Agent";
|
||||
auto iter = std::find_if(
|
||||
http_header_entries.begin(),
|
||||
http_header_entries.end(),
|
||||
[&user_agent](const HTTPHeaderEntry & entry) { return entry.name == user_agent; });
|
||||
void callWithRedirects(Poco::Net::HTTPResponse & response, const String & method_, bool throw_on_all_errors = false, bool for_object_info = false);
|
||||
|
||||
if (iter == http_header_entries.end())
|
||||
{
|
||||
http_header_entries.emplace_back("User-Agent", fmt::format("ClickHouse/{}", VERSION_STRING));
|
||||
}
|
||||
|
||||
if (!delay_initialization)
|
||||
{
|
||||
initialize();
|
||||
if (exception)
|
||||
std::rethrow_exception(exception);
|
||||
}
|
||||
}
|
||||
|
||||
static bool isRetriableError(const Poco::Net::HTTPResponse::HTTPStatus http_status) noexcept
|
||||
{
|
||||
static constexpr std::array non_retriable_errors{
|
||||
Poco::Net::HTTPResponse::HTTPStatus::HTTP_BAD_REQUEST,
|
||||
Poco::Net::HTTPResponse::HTTPStatus::HTTP_UNAUTHORIZED,
|
||||
Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND,
|
||||
Poco::Net::HTTPResponse::HTTPStatus::HTTP_FORBIDDEN,
|
||||
Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_IMPLEMENTED,
|
||||
Poco::Net::HTTPResponse::HTTPStatus::HTTP_METHOD_NOT_ALLOWED};
|
||||
|
||||
return std::all_of(
|
||||
non_retriable_errors.begin(), non_retriable_errors.end(), [&](const auto status) { return http_status != status; });
|
||||
}
|
||||
|
||||
static Poco::URI getUriAfterRedirect(const Poco::URI & prev_uri, Poco::Net::HTTPResponse & response)
|
||||
{
|
||||
auto location = response.get("Location");
|
||||
auto location_uri = Poco::URI(location);
|
||||
if (!location_uri.isRelative())
|
||||
return location_uri;
|
||||
/// Location header contains relative path. So we need to concatenate it
|
||||
/// with path from the original URI and normalize it.
|
||||
auto path = std::filesystem::weakly_canonical(std::filesystem::path(prev_uri.getPath()) / location);
|
||||
location_uri = prev_uri;
|
||||
location_uri.setPath(path);
|
||||
return location_uri;
|
||||
}
|
||||
|
||||
template <bool for_object_info = false>
|
||||
void callWithRedirects(Poco::Net::HTTPResponse & response, const String & method_, bool throw_on_all_errors = false)
|
||||
{
|
||||
UpdatableSessionPtr current_session = nullptr;
|
||||
|
||||
/// we can fetch object info while the request is being processed
|
||||
/// and we don't want to override any context used by it
|
||||
if constexpr (for_object_info)
|
||||
current_session = session->clone(uri);
|
||||
else
|
||||
current_session = session;
|
||||
|
||||
call<for_object_info>(current_session, response, method_, throw_on_all_errors);
|
||||
Poco::URI prev_uri = uri;
|
||||
|
||||
while (isRedirect(response.getStatus()))
|
||||
{
|
||||
Poco::URI uri_redirect = getUriAfterRedirect(prev_uri, response);
|
||||
prev_uri = uri_redirect;
|
||||
if (remote_host_filter)
|
||||
remote_host_filter->checkURL(uri_redirect);
|
||||
|
||||
current_session->updateSession(uri_redirect);
|
||||
|
||||
/// we can fetch object info while the request is being processed
|
||||
/// and we don't want to override any context used by it
|
||||
auto result_istr = callImpl<for_object_info>(current_session, uri_redirect, response, method);
|
||||
if constexpr (!for_object_info)
|
||||
istr = result_istr;
|
||||
}
|
||||
}
|
||||
|
||||
template <bool for_object_info = false>
|
||||
void call(UpdatableSessionPtr & current_session, Poco::Net::HTTPResponse & response, const String & method_, bool throw_on_all_errors = false)
|
||||
{
|
||||
try
|
||||
{
|
||||
/// we can fetch object info while the request is being processed
|
||||
/// and we don't want to override any context used by it
|
||||
auto result_istr = callImpl<for_object_info>(current_session, saved_uri_redirect ? *saved_uri_redirect : uri, response, method_);
|
||||
if constexpr (!for_object_info)
|
||||
istr = result_istr;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
/// we can fetch object info while the request is being processed
|
||||
/// and we don't want to override any context used by it
|
||||
if constexpr (for_object_info)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (throw_on_all_errors)
|
||||
throw;
|
||||
|
||||
auto http_status = response.getStatus();
|
||||
|
||||
if (http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND && http_skip_not_found_url)
|
||||
{
|
||||
initialization_error = InitializeError::SKIP_NOT_FOUND_URL;
|
||||
}
|
||||
else if (!isRetriableError(http_status))
|
||||
{
|
||||
initialization_error = InitializeError::NON_RETRYABLE_ERROR;
|
||||
exception = std::current_exception();
|
||||
}
|
||||
else
|
||||
{
|
||||
throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
void call(UpdatableSessionPtr & current_session, Poco::Net::HTTPResponse & response, const String & method_, bool throw_on_all_errors = false, bool for_object_info = false);
|
||||
|
||||
/**
|
||||
* Throws if error is retryable, otherwise sets initialization_error = NON_RETRYABLE_ERROR and
|
||||
* saves exception into `exception` variable. In case url is not found and skip_not_found_url == true,
|
||||
* sets initialization_error = SKIP_NOT_FOUND_URL, otherwise throws.
|
||||
*/
|
||||
void initialize()
|
||||
{
|
||||
Poco::Net::HTTPResponse response;
|
||||
void initialize();
|
||||
|
||||
call(session, response, method);
|
||||
if (initialization_error != InitializeError::NONE)
|
||||
return;
|
||||
bool nextImpl() override;
|
||||
|
||||
while (isRedirect(response.getStatus()))
|
||||
{
|
||||
Poco::URI uri_redirect = getUriAfterRedirect(saved_uri_redirect.value_or(uri), response);
|
||||
if (remote_host_filter)
|
||||
remote_host_filter->checkURL(uri_redirect);
|
||||
off_t getPosition() override;
|
||||
|
||||
session->updateSession(uri_redirect);
|
||||
off_t seek(off_t offset_, int whence) override;
|
||||
|
||||
istr = callImpl(session, uri_redirect, response, method);
|
||||
saved_uri_redirect = uri_redirect;
|
||||
}
|
||||
void setReadUntilPosition(size_t until) override;
|
||||
|
||||
if (response.hasContentLength())
|
||||
LOG_DEBUG(log, "Received response with content length: {}", response.getContentLength());
|
||||
void setReadUntilEnd() override;
|
||||
|
||||
if (withPartialContent(read_range) && response.getStatus() != Poco::Net::HTTPResponse::HTTPStatus::HTTP_PARTIAL_CONTENT)
|
||||
{
|
||||
/// Having `200 OK` instead of `206 Partial Content` is acceptable in case we retried with range.begin == 0.
|
||||
if (getOffset() != 0)
|
||||
{
|
||||
if (!exception)
|
||||
{
|
||||
exception = std::make_exception_ptr(Exception(
|
||||
ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE,
|
||||
"Cannot read with range: [{}, {}] (response status: {}, reason: {})",
|
||||
*read_range.begin,
|
||||
read_range.end ? toString(*read_range.end) : "-",
|
||||
toString(response.getStatus()), response.getReason()));
|
||||
}
|
||||
|
||||
/// Retry 200OK
|
||||
if (response.getStatus() == Poco::Net::HTTPResponse::HTTPStatus::HTTP_OK)
|
||||
initialization_error = InitializeError::RETRYABLE_ERROR;
|
||||
else
|
||||
initialization_error = InitializeError::NON_RETRYABLE_ERROR;
|
||||
|
||||
return;
|
||||
}
|
||||
else if (read_range.end)
|
||||
{
|
||||
/// We could have range.begin == 0 and range.end != 0 in case of DiskWeb and failing to read with partial content
|
||||
/// will affect only performance, so a warning is enough.
|
||||
LOG_WARNING(log, "Unable to read with range header: [{}, {}]", getRangeBegin(), *read_range.end);
|
||||
}
|
||||
}
|
||||
|
||||
// Remember file size. It'll be used to report eof in next nextImpl() call.
|
||||
if (!read_range.end && response.hasContentLength())
|
||||
file_info = parseFileInfo(response, withPartialContent(read_range) ? getOffset() : 0);
|
||||
|
||||
try
|
||||
{
|
||||
impl = std::make_unique<ReadBufferFromIStream>(*istr, buffer_size);
|
||||
|
||||
if (use_external_buffer)
|
||||
{
|
||||
setupExternalBuffer();
|
||||
}
|
||||
}
|
||||
catch (const Poco::Exception & e)
|
||||
{
|
||||
/// We use session data storage as storage for exception text
|
||||
/// Depend on it we can deduce to reconnect session or reresolve session host
|
||||
auto sess = session->getSession();
|
||||
sess->attachSessionData(e.message());
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
bool nextImpl() override
|
||||
{
|
||||
if (initialization_error == InitializeError::SKIP_NOT_FOUND_URL)
|
||||
return false;
|
||||
assert(initialization_error == InitializeError::NONE);
|
||||
|
||||
if (next_callback)
|
||||
next_callback(count());
|
||||
|
||||
if ((read_range.end && getOffset() > read_range.end.value()) ||
|
||||
(file_info && file_info->file_size && getOffset() >= file_info->file_size.value()))
|
||||
return false;
|
||||
|
||||
if (impl)
|
||||
{
|
||||
if (use_external_buffer)
|
||||
{
|
||||
setupExternalBuffer();
|
||||
}
|
||||
else
|
||||
{
|
||||
/**
|
||||
* impl was initialized before, pass position() to it to make
|
||||
* sure there is no pending data which was not read.
|
||||
*/
|
||||
if (!working_buffer.empty())
|
||||
impl->position() = position();
|
||||
}
|
||||
}
|
||||
|
||||
bool result = false;
|
||||
size_t milliseconds_to_wait = settings.http_retry_initial_backoff_ms;
|
||||
bool last_attempt = false;
|
||||
|
||||
auto on_retriable_error = [&]()
|
||||
{
|
||||
retry_with_range_header = true;
|
||||
impl.reset();
|
||||
auto http_session = session->getSession();
|
||||
http_session->reset();
|
||||
if (!last_attempt)
|
||||
{
|
||||
sleepForMilliseconds(milliseconds_to_wait);
|
||||
milliseconds_to_wait = std::min(milliseconds_to_wait * 2, settings.http_retry_max_backoff_ms);
|
||||
}
|
||||
};
|
||||
|
||||
for (size_t i = 0;; ++i)
|
||||
{
|
||||
if (last_attempt)
|
||||
break;
|
||||
last_attempt = i + 1 >= settings.http_max_tries;
|
||||
|
||||
exception = nullptr;
|
||||
initialization_error = InitializeError::NONE;
|
||||
|
||||
try
|
||||
{
|
||||
if (!impl)
|
||||
{
|
||||
initialize();
|
||||
|
||||
if (initialization_error == InitializeError::NON_RETRYABLE_ERROR)
|
||||
{
|
||||
assert(exception);
|
||||
break;
|
||||
}
|
||||
else if (initialization_error == InitializeError::SKIP_NOT_FOUND_URL)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
else if (initialization_error == InitializeError::RETRYABLE_ERROR)
|
||||
{
|
||||
LOG_ERROR(
|
||||
log,
|
||||
"HTTP request to `{}` failed at try {}/{} with bytes read: {}/{}. "
|
||||
"(Current backoff wait is {}/{} ms)",
|
||||
uri.toString(), i + 1, settings.http_max_tries, getOffset(),
|
||||
read_range.end ? toString(*read_range.end) : "unknown",
|
||||
milliseconds_to_wait, settings.http_retry_max_backoff_ms);
|
||||
|
||||
assert(exception);
|
||||
on_retriable_error();
|
||||
continue;
|
||||
}
|
||||
|
||||
assert(!exception);
|
||||
|
||||
if (use_external_buffer)
|
||||
{
|
||||
setupExternalBuffer();
|
||||
}
|
||||
}
|
||||
|
||||
result = impl->next();
|
||||
exception = nullptr;
|
||||
break;
|
||||
}
|
||||
catch (const Poco::Exception & e)
|
||||
{
|
||||
/// Too many open files - non-retryable.
|
||||
if (e.code() == POCO_EMFILE)
|
||||
throw;
|
||||
|
||||
/** Retry request unconditionally if nothing has been read yet.
|
||||
* Otherwise if it is GET method retry with range header.
|
||||
*/
|
||||
bool can_retry_request = !offset_from_begin_pos || method == Poco::Net::HTTPRequest::HTTP_GET;
|
||||
if (!can_retry_request)
|
||||
throw;
|
||||
|
||||
LOG_ERROR(
|
||||
log,
|
||||
"HTTP request to `{}` failed at try {}/{} with bytes read: {}/{}. "
|
||||
"Error: {}. (Current backoff wait is {}/{} ms)",
|
||||
uri.toString(),
|
||||
i + 1,
|
||||
settings.http_max_tries,
|
||||
getOffset(),
|
||||
read_range.end ? toString(*read_range.end) : "unknown",
|
||||
e.displayText(),
|
||||
milliseconds_to_wait,
|
||||
settings.http_retry_max_backoff_ms);
|
||||
|
||||
on_retriable_error();
|
||||
exception = std::current_exception();
|
||||
}
|
||||
}
|
||||
|
||||
if (exception)
|
||||
std::rethrow_exception(exception);
|
||||
|
||||
if (!result)
|
||||
return false;
|
||||
|
||||
internal_buffer = impl->buffer();
|
||||
working_buffer = internal_buffer;
|
||||
offset_from_begin_pos += working_buffer.size();
|
||||
return true;
|
||||
}
|
||||
|
||||
off_t getPosition() override { return getOffset() - available(); }
|
||||
|
||||
off_t seek(off_t offset_, int whence) override
|
||||
{
|
||||
if (whence != SEEK_SET)
|
||||
throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Only SEEK_SET mode is allowed.");
|
||||
|
||||
if (offset_ < 0)
|
||||
throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Seek position is out of bounds. Offset: {}",
|
||||
offset_);
|
||||
|
||||
off_t current_offset = getOffset();
|
||||
if (!working_buffer.empty() && size_t(offset_) >= current_offset - working_buffer.size() && offset_ < current_offset)
|
||||
{
|
||||
pos = working_buffer.end() - (current_offset - offset_);
|
||||
assert(pos >= working_buffer.begin());
|
||||
assert(pos < working_buffer.end());
|
||||
|
||||
return getPosition();
|
||||
}
|
||||
|
||||
if (impl)
|
||||
{
|
||||
auto position = getPosition();
|
||||
if (offset_ > position)
|
||||
{
|
||||
size_t diff = offset_ - position;
|
||||
if (diff < settings.remote_read_min_bytes_for_seek)
|
||||
{
|
||||
ignore(diff);
|
||||
return offset_;
|
||||
}
|
||||
}
|
||||
|
||||
if (!atEndOfRequestedRangeGuess())
|
||||
ProfileEvents::increment(ProfileEvents::ReadBufferSeekCancelConnection);
|
||||
impl.reset();
|
||||
}
|
||||
|
||||
resetWorkingBuffer();
|
||||
read_range.begin = offset_;
|
||||
offset_from_begin_pos = 0;
|
||||
|
||||
return offset_;
|
||||
}
|
||||
|
||||
void setReadUntilPosition(size_t until) override
|
||||
{
|
||||
until = std::max(until, 1ul);
|
||||
if (read_range.end && *read_range.end + 1 == until)
|
||||
return;
|
||||
read_range.end = until - 1;
|
||||
read_range.begin = getPosition();
|
||||
resetWorkingBuffer();
|
||||
if (impl)
|
||||
{
|
||||
if (!atEndOfRequestedRangeGuess())
|
||||
ProfileEvents::increment(ProfileEvents::ReadBufferSeekCancelConnection);
|
||||
impl.reset();
|
||||
}
|
||||
}
|
||||
|
||||
void setReadUntilEnd() override
|
||||
{
|
||||
if (!read_range.end)
|
||||
return;
|
||||
read_range.end.reset();
|
||||
read_range.begin = getPosition();
|
||||
resetWorkingBuffer();
|
||||
if (impl)
|
||||
{
|
||||
if (!atEndOfRequestedRangeGuess())
|
||||
ProfileEvents::increment(ProfileEvents::ReadBufferSeekCancelConnection);
|
||||
impl.reset();
|
||||
}
|
||||
}
|
||||
|
||||
bool supportsRightBoundedReads() const override { return true; }
|
||||
bool supportsRightBoundedReads() const override;
|
||||
|
||||
// If true, if we destroy impl now, no work was wasted. Just for metrics.
|
||||
bool atEndOfRequestedRangeGuess()
|
||||
{
|
||||
if (!impl)
|
||||
return true;
|
||||
if (read_range.end)
|
||||
return getPosition() > static_cast<off_t>(*read_range.end);
|
||||
if (file_info && file_info->file_size)
|
||||
return getPosition() >= static_cast<off_t>(*file_info->file_size);
|
||||
return false;
|
||||
}
|
||||
bool atEndOfRequestedRangeGuess();
|
||||
|
||||
std::string getResponseCookie(const std::string & name, const std::string & def) const
|
||||
{
|
||||
for (const auto & cookie : cookies)
|
||||
if (cookie.getName() == name)
|
||||
return cookie.getValue();
|
||||
return def;
|
||||
}
|
||||
std::string getResponseCookie(const std::string & name, const std::string & def) const;
|
||||
|
||||
/// Set function to call on each nextImpl, useful when you need to track
|
||||
/// progress.
|
||||
/// NOTE: parameter on each call is not incremental -- it's all bytes count
|
||||
/// passed through the buffer
|
||||
void setNextCallback(NextCallback next_callback_)
|
||||
{
|
||||
next_callback = next_callback_;
|
||||
/// Some data maybe already read
|
||||
next_callback(count());
|
||||
}
|
||||
void setNextCallback(NextCallback next_callback_);
|
||||
|
||||
const std::string & getCompressionMethod() const { return content_encoding; }
|
||||
const std::string & getCompressionMethod() const;
|
||||
|
||||
std::optional<time_t> getLastModificationTime()
|
||||
{
|
||||
return getFileInfo().last_modified;
|
||||
}
|
||||
std::optional<time_t> getLastModificationTime();
|
||||
|
||||
FileInfo getFileInfo()
|
||||
{
|
||||
Poco::Net::HTTPResponse response;
|
||||
try
|
||||
{
|
||||
getHeadResponse(response);
|
||||
}
|
||||
catch (HTTPException & e)
|
||||
{
|
||||
/// Maybe the web server doesn't support HEAD requests.
|
||||
/// E.g. webhdfs reports status 400.
|
||||
/// We should proceed in hopes that the actual GET request will succeed.
|
||||
/// (Unless the error in transient. Don't want to nondeterministically sometimes
|
||||
/// fall back to slow whole-file reads when HEAD is actually supported; that sounds
|
||||
/// like a nightmare to debug.)
|
||||
if (e.getHTTPStatus() >= 400 && e.getHTTPStatus() <= 499 &&
|
||||
e.getHTTPStatus() != Poco::Net::HTTPResponse::HTTP_TOO_MANY_REQUESTS)
|
||||
return FileInfo{};
|
||||
HTTPFileInfo getFileInfo();
|
||||
|
||||
throw;
|
||||
}
|
||||
return parseFileInfo(response, 0);
|
||||
}
|
||||
|
||||
FileInfo parseFileInfo(const Poco::Net::HTTPResponse & response, size_t requested_range_begin)
|
||||
{
|
||||
FileInfo res;
|
||||
|
||||
if (response.hasContentLength())
|
||||
{
|
||||
res.file_size = response.getContentLength();
|
||||
|
||||
if (response.getStatus() == Poco::Net::HTTPResponse::HTTPStatus::HTTP_PARTIAL_CONTENT)
|
||||
{
|
||||
*res.file_size += requested_range_begin;
|
||||
res.seekable = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
res.seekable = response.has("Accept-Ranges") && response.get("Accept-Ranges") == "bytes";
|
||||
}
|
||||
}
|
||||
|
||||
if (response.has("Last-Modified"))
|
||||
{
|
||||
String date_str = response.get("Last-Modified");
|
||||
struct tm info;
|
||||
char * end = strptime(date_str.data(), "%a, %d %b %Y %H:%M:%S %Z", &info);
|
||||
if (end == date_str.data() + date_str.size())
|
||||
res.last_modified = timegm(&info);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
HTTPFileInfo parseFileInfo(const Poco::Net::HTTPResponse & response, size_t requested_range_begin);
|
||||
};
|
||||
}
|
||||
|
||||
class SessionFactory
|
||||
{
|
||||
public:
|
||||
explicit SessionFactory(const ConnectionTimeouts & timeouts_)
|
||||
: timeouts(timeouts_)
|
||||
{}
|
||||
explicit SessionFactory(const ConnectionTimeouts & timeouts_);
|
||||
|
||||
using SessionType = HTTPSessionPtr;
|
||||
|
||||
SessionType buildNewSession(const Poco::URI & uri) { return makeHTTPSession(uri, timeouts); }
|
||||
SessionType buildNewSession(const Poco::URI & uri);
|
||||
private:
|
||||
ConnectionTimeouts timeouts;
|
||||
};
|
||||
@ -885,23 +234,7 @@ public:
|
||||
bool delay_initialization_ = true,
|
||||
bool use_external_buffer_ = false,
|
||||
bool skip_not_found_url_ = false,
|
||||
std::optional<FileInfo> file_info_ = std::nullopt)
|
||||
: Parent(
|
||||
std::make_shared<SessionType>(uri_, max_redirects, std::make_shared<SessionFactory>(timeouts)),
|
||||
uri_,
|
||||
credentials_,
|
||||
method_,
|
||||
out_stream_callback_,
|
||||
buffer_size_,
|
||||
settings_,
|
||||
http_header_entries_,
|
||||
remote_host_filter_,
|
||||
delay_initialization_,
|
||||
use_external_buffer_,
|
||||
skip_not_found_url_,
|
||||
file_info_)
|
||||
{
|
||||
}
|
||||
std::optional<HTTPFileInfo> file_info_ = std::nullopt);
|
||||
};
|
||||
|
||||
class RangedReadWriteBufferFromHTTPFactory : public SeekableReadBufferFactory, public WithFileName
|
||||
@ -922,63 +255,17 @@ public:
|
||||
const RemoteHostFilter * remote_host_filter_ = nullptr,
|
||||
bool delay_initialization_ = true,
|
||||
bool use_external_buffer_ = false,
|
||||
bool skip_not_found_url_ = false)
|
||||
: uri(uri_)
|
||||
, method(std::move(method_))
|
||||
, out_stream_callback(out_stream_callback_)
|
||||
, timeouts(std::move(timeouts_))
|
||||
, credentials(credentials_)
|
||||
, max_redirects(max_redirects_)
|
||||
, buffer_size(buffer_size_)
|
||||
, settings(std::move(settings_))
|
||||
, http_header_entries(std::move(http_header_entries_))
|
||||
, remote_host_filter(remote_host_filter_)
|
||||
, delay_initialization(delay_initialization_)
|
||||
, use_external_buffer(use_external_buffer_)
|
||||
, skip_not_found_url(skip_not_found_url_)
|
||||
{
|
||||
}
|
||||
bool skip_not_found_url_ = false);
|
||||
|
||||
std::unique_ptr<SeekableReadBuffer> getReader() override
|
||||
{
|
||||
return std::make_unique<ReadWriteBufferFromHTTP>(
|
||||
uri,
|
||||
method,
|
||||
out_stream_callback,
|
||||
timeouts,
|
||||
credentials,
|
||||
max_redirects,
|
||||
buffer_size,
|
||||
settings,
|
||||
http_header_entries,
|
||||
remote_host_filter,
|
||||
delay_initialization,
|
||||
use_external_buffer,
|
||||
skip_not_found_url,
|
||||
file_info);
|
||||
}
|
||||
std::unique_ptr<SeekableReadBuffer> getReader() override;
|
||||
|
||||
size_t getFileSize() override
|
||||
{
|
||||
auto s = getFileInfo().file_size;
|
||||
if (!s)
|
||||
throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size for: {}", uri.toString());
|
||||
return *s;
|
||||
}
|
||||
size_t getFileSize() override;
|
||||
|
||||
bool checkIfActuallySeekable() override
|
||||
{
|
||||
return getFileInfo().seekable;
|
||||
}
|
||||
bool checkIfActuallySeekable() override;
|
||||
|
||||
ReadWriteBufferFromHTTP::FileInfo getFileInfo()
|
||||
{
|
||||
if (!file_info)
|
||||
file_info = static_cast<ReadWriteBufferFromHTTP*>(getReader().get())->getFileInfo();
|
||||
return *file_info;
|
||||
}
|
||||
HTTPFileInfo getFileInfo();
|
||||
|
||||
String getFileName() const override { return uri.toString(); }
|
||||
String getFileName() const override;
|
||||
|
||||
private:
|
||||
Poco::URI uri;
|
||||
@ -991,7 +278,7 @@ private:
|
||||
ReadSettings settings;
|
||||
HTTPHeaderEntries http_header_entries;
|
||||
const RemoteHostFilter * remote_host_filter;
|
||||
std::optional<ReadWriteBufferFromHTTP::FileInfo> file_info;
|
||||
std::optional<HTTPFileInfo> file_info;
|
||||
bool delay_initialization;
|
||||
bool use_external_buffer;
|
||||
bool skip_not_found_url;
|
||||
@ -1001,14 +288,11 @@ class PooledSessionFactory
|
||||
{
|
||||
public:
|
||||
explicit PooledSessionFactory(
|
||||
const ConnectionTimeouts & timeouts_, size_t per_endpoint_pool_size_)
|
||||
: timeouts(timeouts_)
|
||||
, per_endpoint_pool_size(per_endpoint_pool_size_)
|
||||
{}
|
||||
const ConnectionTimeouts & timeouts_, size_t per_endpoint_pool_size_);
|
||||
|
||||
using SessionType = PooledHTTPSessionPtr;
|
||||
|
||||
SessionType buildNewSession(const Poco::URI & uri) { return makePooledHTTPSession(uri, timeouts, per_endpoint_pool_size); }
|
||||
SessionType buildNewSession(const Poco::URI & uri);
|
||||
private:
|
||||
ConnectionTimeouts timeouts;
|
||||
size_t per_endpoint_pool_size;
|
||||
@ -1028,16 +312,12 @@ public:
|
||||
const Poco::Net::HTTPBasicCredentials & credentials_ = {},
|
||||
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE,
|
||||
const UInt64 max_redirects = 0,
|
||||
size_t max_connections_per_endpoint = DEFAULT_COUNT_OF_HTTP_CONNECTIONS_PER_ENDPOINT)
|
||||
: Parent(
|
||||
std::make_shared<SessionType>(uri_, max_redirects, std::make_shared<PooledSessionFactory>(timeouts_, max_connections_per_endpoint)),
|
||||
uri_,
|
||||
credentials_,
|
||||
method_,
|
||||
out_stream_callback_,
|
||||
buffer_size_)
|
||||
{
|
||||
}
|
||||
size_t max_connections_per_endpoint = DEFAULT_COUNT_OF_HTTP_CONNECTIONS_PER_ENDPOINT);
|
||||
};
|
||||
|
||||
extern template class UpdatableSession<SessionFactory>;
|
||||
extern template class UpdatableSession<PooledSessionFactory>;
|
||||
extern template class detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession<SessionFactory>>>;
|
||||
extern template class detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession<PooledSessionFactory>>>;
|
||||
|
||||
}
|
||||
|
@ -112,6 +112,22 @@ std::unique_ptr<Client> Client::create(const Client & other)
|
||||
return std::unique_ptr<Client>(new Client(other));
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
ProviderType deduceProviderType(const std::string & url)
|
||||
{
|
||||
if (url.find(".amazonaws.com") != std::string::npos)
|
||||
return ProviderType::AWS;
|
||||
|
||||
if (url.find("storage.googleapis.com") != std::string::npos)
|
||||
return ProviderType::GCS;
|
||||
|
||||
return ProviderType::UNKNOWN;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Client::Client(
|
||||
size_t max_redirects_,
|
||||
ServerSideEncryptionKMSConfig sse_kms_config_,
|
||||
@ -128,9 +144,28 @@ Client::Client(
|
||||
endpoint_provider->GetBuiltInParameters().GetParameter("Region").GetString(explicit_region);
|
||||
endpoint_provider->GetBuiltInParameters().GetParameter("Endpoint").GetString(initial_endpoint);
|
||||
|
||||
provider_type = getProviderTypeFromURL(initial_endpoint);
|
||||
provider_type = deduceProviderType(initial_endpoint);
|
||||
LOG_TRACE(log, "Provider type: {}", toString(provider_type));
|
||||
|
||||
if (provider_type == ProviderType::GCS)
|
||||
{
|
||||
/// GCS can operate in 2 modes for header and query params names:
|
||||
/// - with both x-amz and x-goog prefixes allowed (but cannot mix different prefixes in same request)
|
||||
/// - only with x-goog prefix
|
||||
/// first mode is allowed only with HMAC (or unsigned requests) so when we
|
||||
/// find credential keys we can simply behave as the underlying storage is S3
|
||||
/// otherwise, we need to be aware we are making requests to GCS
|
||||
/// and replace all headers with a valid prefix when needed
|
||||
if (credentials_provider)
|
||||
{
|
||||
auto credentials = credentials_provider->GetAWSCredentials();
|
||||
if (credentials.IsEmpty())
|
||||
api_mode = ApiMode::GCS;
|
||||
}
|
||||
}
|
||||
|
||||
LOG_TRACE(log, "API mode: {}", toString(api_mode));
|
||||
|
||||
detect_region = provider_type == ProviderType::AWS && explicit_region == Aws::Region::AWS_GLOBAL;
|
||||
|
||||
cache = std::make_shared<ClientCache>();
|
||||
@ -208,7 +243,7 @@ Model::HeadObjectOutcome Client::HeadObject(const HeadObjectRequest & request) c
|
||||
{
|
||||
const auto & bucket = request.GetBucket();
|
||||
|
||||
request.setProviderType(provider_type);
|
||||
request.setApiMode(api_mode);
|
||||
|
||||
if (auto region = getRegionForBucket(bucket); !region.empty())
|
||||
{
|
||||
@ -348,7 +383,7 @@ std::invoke_result_t<RequestFn, RequestType>
|
||||
Client::doRequest(const RequestType & request, RequestFn request_fn) const
|
||||
{
|
||||
const auto & bucket = request.GetBucket();
|
||||
request.setProviderType(provider_type);
|
||||
request.setApiMode(api_mode);
|
||||
|
||||
if (auto region = getRegionForBucket(bucket); !region.empty())
|
||||
{
|
||||
@ -421,9 +456,23 @@ Client::doRequest(const RequestType & request, RequestFn request_fn) const
|
||||
throw Exception(ErrorCodes::TOO_MANY_REDIRECTS, "Too many redirects");
|
||||
}
|
||||
|
||||
ProviderType Client::getProviderType() const
|
||||
bool Client::supportsMultiPartCopy() const
|
||||
{
|
||||
return provider_type;
|
||||
return provider_type != ProviderType::GCS;
|
||||
}
|
||||
|
||||
void Client::BuildHttpRequest(const Aws::AmazonWebServiceRequest& request,
|
||||
const std::shared_ptr<Aws::Http::HttpRequest>& httpRequest) const
|
||||
{
|
||||
Aws::S3::S3Client::BuildHttpRequest(request, httpRequest);
|
||||
|
||||
if (api_mode == ApiMode::GCS)
|
||||
{
|
||||
/// some GCS requests don't like S3 specific headers that the client sets
|
||||
httpRequest->DeleteHeader("x-amz-api-version");
|
||||
httpRequest->DeleteHeader("amz-sdk-invocation-id");
|
||||
httpRequest->DeleteHeader("amz-sdk-request");
|
||||
}
|
||||
}
|
||||
|
||||
std::string Client::getRegionForBucket(const std::string & bucket, bool force_detect) const
|
||||
|
@ -190,7 +190,10 @@ public:
|
||||
using Aws::S3::S3Client::EnableRequestProcessing;
|
||||
using Aws::S3::S3Client::DisableRequestProcessing;
|
||||
|
||||
ProviderType getProviderType() const;
|
||||
void BuildHttpRequest(const Aws::AmazonWebServiceRequest& request,
|
||||
const std::shared_ptr<Aws::Http::HttpRequest>& httpRequest) const override;
|
||||
|
||||
bool supportsMultiPartCopy() const;
|
||||
private:
|
||||
Client(size_t max_redirects_,
|
||||
ServerSideEncryptionKMSConfig sse_kms_config_,
|
||||
@ -238,7 +241,12 @@ private:
|
||||
std::string explicit_region;
|
||||
mutable bool detect_region = true;
|
||||
|
||||
/// provider type can determine if some functionality is supported
|
||||
/// but for same provider, we would need to generate different headers depending on the
|
||||
/// mode
|
||||
/// E.g. GCS can work in AWS mode in some cases and accept headers with x-amz prefix
|
||||
ProviderType provider_type{ProviderType::UNKNOWN};
|
||||
ApiMode api_mode{ApiMode::AWS};
|
||||
|
||||
mutable std::shared_ptr<ClientCache> cache;
|
||||
|
||||
|
@ -260,17 +260,6 @@ void PocoHTTPClient::makeRequestInternal(
|
||||
Poco::Logger * log = &Poco::Logger::get("AWSClient");
|
||||
|
||||
auto uri = request.GetUri().GetURIString();
|
||||
#if 0
|
||||
auto provider_type = getProviderTypeFromURL(uri);
|
||||
|
||||
if (provider_type == ProviderType::GCS)
|
||||
{
|
||||
/// some GCS requests don't like S3 specific headers that the client sets
|
||||
request.DeleteHeader("x-amz-api-version");
|
||||
request.DeleteHeader("amz-sdk-invocation-id");
|
||||
request.DeleteHeader("amz-sdk-request");
|
||||
}
|
||||
#endif
|
||||
|
||||
if (enable_s3_requests_logging)
|
||||
LOG_TEST(log, "Make request to: {}", uri);
|
||||
|
@ -22,20 +22,17 @@ std::string_view toString(ProviderType provider_type)
|
||||
}
|
||||
}
|
||||
|
||||
bool supportsMultiPartCopy(ProviderType provider_type)
|
||||
std::string_view toString(ApiMode api_mode)
|
||||
{
|
||||
return provider_type != ProviderType::GCS;
|
||||
}
|
||||
using enum ApiMode;
|
||||
|
||||
ProviderType getProviderTypeFromURL(const std::string & url)
|
||||
{
|
||||
if (url.find(".amazonaws.com") != std::string::npos)
|
||||
return ProviderType::AWS;
|
||||
|
||||
if (url.find("storage.googleapis.com") != std::string::npos)
|
||||
return ProviderType::GCS;
|
||||
|
||||
return ProviderType::UNKNOWN;
|
||||
switch (api_mode)
|
||||
{
|
||||
case AWS:
|
||||
return "AWS";
|
||||
case GCS:
|
||||
return "GCS";
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -10,6 +10,11 @@
|
||||
namespace DB::S3
|
||||
{
|
||||
|
||||
/// Provider type defines the platform containing the object
|
||||
/// we are trying to access
|
||||
/// This information is useful for determining general support for
|
||||
/// some feature like multipart copy which is currently supported by AWS
|
||||
/// but not by GCS
|
||||
enum class ProviderType : uint8_t
|
||||
{
|
||||
AWS,
|
||||
@ -19,9 +24,20 @@ enum class ProviderType : uint8_t
|
||||
|
||||
std::string_view toString(ProviderType provider_type);
|
||||
|
||||
bool supportsMultiPartCopy(ProviderType provider_type);
|
||||
/// Mode in which we can use the XML API
|
||||
/// This value can be same as the provider type but there can be a difference
|
||||
/// For example, GCS can work in both
|
||||
/// AWS compatible mode (accept headers starting with x-amz)
|
||||
/// and GCS mode (accept only headers starting with x-goog)
|
||||
/// Because GCS mode is enforced when some features are used we
|
||||
/// need to have support for both.
|
||||
enum class ApiMode : uint8_t
|
||||
{
|
||||
AWS,
|
||||
GCS
|
||||
};
|
||||
|
||||
ProviderType getProviderTypeFromURL(const std::string & url);
|
||||
std::string_view toString(ApiMode api_mode);
|
||||
|
||||
}
|
||||
|
||||
|
@ -10,7 +10,7 @@ namespace DB::S3
|
||||
Aws::Http::HeaderValueCollection CopyObjectRequest::GetRequestSpecificHeaders() const
|
||||
{
|
||||
auto headers = Model::CopyObjectRequest::GetRequestSpecificHeaders();
|
||||
if (provider_type != ProviderType::GCS)
|
||||
if (api_mode != ApiMode::GCS)
|
||||
return headers;
|
||||
|
||||
/// GCS supports same headers as S3 but with a prefix x-goog instead of x-amz
|
||||
|
@ -62,15 +62,15 @@ public:
|
||||
return uri_override;
|
||||
}
|
||||
|
||||
void setProviderType(ProviderType provider_type_) const
|
||||
void setApiMode(ApiMode api_mode_) const
|
||||
{
|
||||
provider_type = provider_type_;
|
||||
api_mode = api_mode_;
|
||||
}
|
||||
|
||||
protected:
|
||||
mutable std::string region_override;
|
||||
mutable std::optional<S3::URI> uri_override;
|
||||
mutable ProviderType provider_type{ProviderType::UNKNOWN};
|
||||
mutable ApiMode api_mode{ApiMode::AWS};
|
||||
};
|
||||
|
||||
class CopyObjectRequest : public ExtendedRequest<Model::CopyObjectRequest>
|
||||
|
@ -595,7 +595,7 @@ namespace
|
||||
, src_key(src_key_)
|
||||
, offset(src_offset_)
|
||||
, size(src_size_)
|
||||
, supports_multipart_copy(S3::supportsMultiPartCopy(client_ptr_->getProviderType()))
|
||||
, supports_multipart_copy(client_ptr_->supportsMultiPartCopy())
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -316,6 +316,15 @@ void writeAnyEscapedString(const char * begin, const char * end, WriteBuffer & b
|
||||
pos = next_pos;
|
||||
switch (*pos)
|
||||
{
|
||||
case quote_character:
|
||||
{
|
||||
if constexpr (escape_quote_with_quote)
|
||||
writeChar(quote_character, buf);
|
||||
else
|
||||
writeChar('\\', buf);
|
||||
writeChar(quote_character, buf);
|
||||
break;
|
||||
}
|
||||
case '\b':
|
||||
writeChar('\\', buf);
|
||||
writeChar('b', buf);
|
||||
@ -344,15 +353,6 @@ void writeAnyEscapedString(const char * begin, const char * end, WriteBuffer & b
|
||||
writeChar('\\', buf);
|
||||
writeChar('\\', buf);
|
||||
break;
|
||||
case quote_character:
|
||||
{
|
||||
if constexpr (escape_quote_with_quote)
|
||||
writeChar(quote_character, buf);
|
||||
else
|
||||
writeChar('\\', buf);
|
||||
writeChar(quote_character, buf);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
writeChar(*pos, buf);
|
||||
}
|
||||
|
@ -3,7 +3,6 @@
|
||||
#include <array>
|
||||
|
||||
#include <Common/SipHash.h>
|
||||
#include <Common/HashTable/Hash.h>
|
||||
#include <Common/memcpySmall.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Core/Defines.h>
|
||||
|
@ -92,4 +92,4 @@ private:
|
||||
const size_t max_elements = 0;
|
||||
};
|
||||
|
||||
};
|
||||
}
|
||||
|
@ -170,4 +170,4 @@ size_t LRUFileCachePriority::LRUFileCacheIterator::use(const CacheGuard::Lock &)
|
||||
return ++queue_iter->hits;
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
@ -65,4 +65,4 @@ private:
|
||||
mutable LRUFileCachePriority::LRUQueueIterator queue_iter;
|
||||
};
|
||||
|
||||
};
|
||||
}
|
||||
|
@ -299,7 +299,7 @@ LockedKey::LockedKey(std::shared_ptr<KeyMetadata> key_metadata_)
|
||||
|
||||
LockedKey::~LockedKey()
|
||||
{
|
||||
if (!key_metadata->empty())
|
||||
if (!key_metadata->empty() || getKeyState() != KeyMetadata::KeyState::ACTIVE)
|
||||
return;
|
||||
|
||||
key_metadata->key_state = KeyMetadata::KeyState::REMOVING;
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTSetQuery.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Processors/Sources/SourceFromChunks.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
#include <Common/SipHash.h>
|
||||
@ -80,9 +79,7 @@ public:
|
||||
|
||||
auto is_query_cache_related_setting = [](const auto & change)
|
||||
{
|
||||
return change.name == "allow_experimental_query_cache"
|
||||
|| change.name.starts_with("query_cache")
|
||||
|| change.name.ends_with("query_cache");
|
||||
return change.name.starts_with("query_cache_") || change.name.ends_with("_query_cache");
|
||||
};
|
||||
|
||||
std::erase_if(set_clause->changes, is_query_cache_related_setting);
|
||||
@ -157,11 +154,13 @@ size_t QueryCache::KeyHasher::operator()(const Key & key) const
|
||||
return res;
|
||||
}
|
||||
|
||||
size_t QueryCache::QueryResultWeight::operator()(const Chunks & chunks) const
|
||||
size_t QueryCache::QueryCacheEntryWeight::operator()(const Entry & entry) const
|
||||
{
|
||||
size_t res = 0;
|
||||
for (const auto & chunk : chunks)
|
||||
for (const auto & chunk : entry.chunks)
|
||||
res += chunk.allocatedBytes();
|
||||
res += entry.totals.has_value() ? entry.totals->allocatedBytes() : 0;
|
||||
res += entry.extremes.has_value() ? entry.extremes->allocatedBytes() : 0;
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -191,25 +190,57 @@ QueryCache::Writer::Writer(
|
||||
}
|
||||
}
|
||||
|
||||
void QueryCache::Writer::buffer(Chunk && partial_query_result)
|
||||
QueryCache::Writer::Writer(const Writer & other)
|
||||
: cache(other.cache)
|
||||
, key(other.key)
|
||||
, max_entry_size_in_bytes(other.max_entry_size_in_bytes)
|
||||
, max_entry_size_in_rows(other.max_entry_size_in_rows)
|
||||
, min_query_runtime(other.min_query_runtime)
|
||||
, squash_partial_results(other.squash_partial_results)
|
||||
, max_block_size(other.max_block_size)
|
||||
{
|
||||
}
|
||||
|
||||
void QueryCache::Writer::buffer(Chunk && chunk, ChunkType chunk_type)
|
||||
{
|
||||
if (skip_insert)
|
||||
return;
|
||||
|
||||
/// Reading from the query cache is implemented using processor `SourceFromChunks` which inherits from `ISource`.
|
||||
/// The latter has logic which finishes processing (= calls `.finish()` on the output port + returns `Status::Finished`)
|
||||
/// when the derived class returns an empty chunk. If this empty chunk is not the last chunk,
|
||||
/// i.e. if it is followed by non-empty chunks, the query result will be incorrect.
|
||||
/// This situation should theoretically never occur in practice but who knows...
|
||||
/// To be on the safe side, writing into the query cache now rejects empty chunks and thereby avoids this scenario.
|
||||
if (chunk.empty())
|
||||
return;
|
||||
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
auto & chunks = *query_result;
|
||||
|
||||
chunks.emplace_back(std::move(partial_query_result));
|
||||
|
||||
new_entry_size_in_bytes += chunks.back().allocatedBytes();
|
||||
new_entry_size_in_rows += chunks.back().getNumRows();
|
||||
|
||||
if ((new_entry_size_in_bytes > max_entry_size_in_bytes) || (new_entry_size_in_rows > max_entry_size_in_rows))
|
||||
switch (chunk_type)
|
||||
{
|
||||
chunks.clear(); /// eagerly free some space
|
||||
skip_insert = true;
|
||||
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (query result too big), new_entry_size_in_bytes: {} ({}), new_entry_size_in_rows: {} ({}), query: {}", new_entry_size_in_bytes, max_entry_size_in_bytes, new_entry_size_in_rows, max_entry_size_in_rows, key.queryStringFromAst());
|
||||
case ChunkType::Result:
|
||||
{
|
||||
/// Normal query result chunks are simply buffered. They are squashed and compressed later in finalizeWrite().
|
||||
query_result->chunks.emplace_back(std::move(chunk));
|
||||
break;
|
||||
}
|
||||
case ChunkType::Totals:
|
||||
case ChunkType::Extremes:
|
||||
{
|
||||
/// For simplicity, totals and extremes chunks are immediately squashed (totals/extremes are obscure and even if enabled, few
|
||||
/// such chunks are expected).
|
||||
auto & buffered_chunk = (chunk_type == ChunkType::Totals) ? query_result->totals : query_result->extremes;
|
||||
|
||||
convertToFullIfSparse(chunk);
|
||||
|
||||
if (!buffered_chunk.has_value())
|
||||
buffered_chunk = std::move(chunk);
|
||||
else
|
||||
buffered_chunk->append(chunk);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -222,6 +253,8 @@ void QueryCache::Writer::finalizeWrite()
|
||||
|
||||
chassert(!was_finalized);
|
||||
|
||||
/// Check some reasons why the entry must not be cached:
|
||||
|
||||
if (std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - query_start_time) < min_query_runtime)
|
||||
{
|
||||
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (query not expensive enough), query: {}", key.queryStringFromAst());
|
||||
@ -244,7 +277,7 @@ void QueryCache::Writer::finalizeWrite()
|
||||
Chunks squashed_chunks;
|
||||
size_t rows_remaining_in_squashed = 0; /// how many further rows can the last squashed chunk consume until it reaches max_block_size
|
||||
|
||||
for (auto & chunk : *query_result)
|
||||
for (auto & chunk : query_result->chunks)
|
||||
{
|
||||
convertToFullIfSparse(chunk);
|
||||
|
||||
@ -272,26 +305,49 @@ void QueryCache::Writer::finalizeWrite()
|
||||
}
|
||||
}
|
||||
|
||||
*query_result = std::move(squashed_chunks);
|
||||
query_result->chunks = std::move(squashed_chunks);
|
||||
}
|
||||
|
||||
if (key.is_compressed)
|
||||
{
|
||||
/// Compress result chunks. Reduces the space consumption of the cache but means reading from it will be slower due to decompression.
|
||||
|
||||
Chunks compressed_chunks;
|
||||
const Chunks & decompressed_chunks = *query_result;
|
||||
for (const auto & decompressed_chunk : decompressed_chunks)
|
||||
|
||||
for (const auto & chunk : query_result->chunks)
|
||||
{
|
||||
const Columns & decompressed_columns = decompressed_chunk.getColumns();
|
||||
const Columns & columns = chunk.getColumns();
|
||||
Columns compressed_columns;
|
||||
for (const auto & decompressed_column : decompressed_columns)
|
||||
for (const auto & column : columns)
|
||||
{
|
||||
auto compressed_column = decompressed_column->compress();
|
||||
auto compressed_column = column->compress();
|
||||
compressed_columns.push_back(compressed_column);
|
||||
}
|
||||
Chunk compressed_chunk(compressed_columns, decompressed_chunk.getNumRows());
|
||||
Chunk compressed_chunk(compressed_columns, chunk.getNumRows());
|
||||
compressed_chunks.push_back(std::move(compressed_chunk));
|
||||
}
|
||||
*query_result = std::move(compressed_chunks);
|
||||
query_result->chunks = std::move(compressed_chunks);
|
||||
}
|
||||
|
||||
/// Check more reasons why the entry must not be cached.
|
||||
|
||||
auto count_rows_in_chunks = [](const Entry & entry)
|
||||
{
|
||||
size_t res = 0;
|
||||
for (const auto & chunk : entry.chunks)
|
||||
res += chunk.getNumRows();
|
||||
res += entry.totals.has_value() ? entry.totals->getNumRows() : 0;
|
||||
res += entry.extremes.has_value() ? entry.extremes->getNumRows() : 0;
|
||||
return res;
|
||||
};
|
||||
|
||||
size_t new_entry_size_in_bytes = QueryCacheEntryWeight()(*query_result);
|
||||
size_t new_entry_size_in_rows = count_rows_in_chunks(*query_result);
|
||||
|
||||
if ((new_entry_size_in_bytes > max_entry_size_in_bytes) || (new_entry_size_in_rows > max_entry_size_in_rows))
|
||||
{
|
||||
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (query result too big), new_entry_size_in_bytes: {} ({}), new_entry_size_in_rows: {} ({}), query: {}", new_entry_size_in_bytes, max_entry_size_in_bytes, new_entry_size_in_rows, max_entry_size_in_rows, key.queryStringFromAst());
|
||||
return;
|
||||
}
|
||||
|
||||
cache.set(key, query_result);
|
||||
@ -299,6 +355,26 @@ void QueryCache::Writer::finalizeWrite()
|
||||
was_finalized = true;
|
||||
}
|
||||
|
||||
/// Creates a source processor which serves result chunks stored in the query cache, and separate sources for optional totals/extremes.
|
||||
void QueryCache::Reader::buildSourceFromChunks(Block header, Chunks && chunks, const std::optional<Chunk> & totals, const std::optional<Chunk> & extremes)
|
||||
{
|
||||
source_from_chunks = std::make_unique<SourceFromChunks>(header, std::move(chunks));
|
||||
|
||||
if (totals.has_value())
|
||||
{
|
||||
Chunks chunks_totals;
|
||||
chunks_totals.emplace_back(totals->clone());
|
||||
source_from_chunks_totals = std::make_unique<SourceFromChunks>(header, std::move(chunks_totals));
|
||||
}
|
||||
|
||||
if (extremes.has_value())
|
||||
{
|
||||
Chunks chunks_extremes;
|
||||
chunks_extremes.emplace_back(extremes->clone());
|
||||
source_from_chunks_extremes = std::make_unique<SourceFromChunks>(header, std::move(chunks_extremes));
|
||||
}
|
||||
}
|
||||
|
||||
QueryCache::Reader::Reader(Cache & cache_, const Key & key, const std::lock_guard<std::mutex> &)
|
||||
{
|
||||
auto entry = cache_.getWithKey(key);
|
||||
@ -322,25 +398,37 @@ QueryCache::Reader::Reader(Cache & cache_, const Key & key, const std::lock_guar
|
||||
}
|
||||
|
||||
if (!entry->key.is_compressed)
|
||||
pipe = Pipe(std::make_shared<SourceFromChunks>(entry->key.header, entry->mapped));
|
||||
{
|
||||
// Cloning chunks isn't exactly great. It could be avoided by another indirection, i.e. wrapping Entry's members chunks, totals and
|
||||
// extremes into shared_ptrs and assuming that the lifecycle of these shared_ptrs coincides with the lifecycle of the Entry
|
||||
// shared_ptr. This is not done 1. to keep things simple 2. this case (uncompressed chunks) is the exceptional case, in the other
|
||||
// case (the default case aka. compressed chunks) we need to decompress the entry anyways and couldn't apply the potential
|
||||
// optimization.
|
||||
|
||||
Chunks cloned_chunks;
|
||||
for (const auto & chunk : entry->mapped->chunks)
|
||||
cloned_chunks.push_back(chunk.clone());
|
||||
|
||||
buildSourceFromChunks(entry->key.header, std::move(cloned_chunks), entry->mapped->totals, entry->mapped->extremes);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto decompressed_chunks = std::make_shared<Chunks>();
|
||||
const Chunks & compressed_chunks = *entry->mapped;
|
||||
for (const auto & compressed_chunk : compressed_chunks)
|
||||
Chunks decompressed_chunks;
|
||||
const Chunks & chunks = entry->mapped->chunks;
|
||||
for (const auto & chunk : chunks)
|
||||
{
|
||||
const Columns & compressed_chunk_columns = compressed_chunk.getColumns();
|
||||
const Columns & columns = chunk.getColumns();
|
||||
Columns decompressed_columns;
|
||||
for (const auto & compressed_column : compressed_chunk_columns)
|
||||
for (const auto & column : columns)
|
||||
{
|
||||
auto column = compressed_column->decompress();
|
||||
decompressed_columns.push_back(column);
|
||||
auto decompressed_column = column->decompress();
|
||||
decompressed_columns.push_back(decompressed_column);
|
||||
}
|
||||
Chunk decompressed_chunk(decompressed_columns, compressed_chunk.getNumRows());
|
||||
decompressed_chunks->push_back(std::move(decompressed_chunk));
|
||||
Chunk decompressed_chunk(decompressed_columns, chunk.getNumRows());
|
||||
decompressed_chunks.push_back(std::move(decompressed_chunk));
|
||||
}
|
||||
|
||||
pipe = Pipe(std::make_shared<SourceFromChunks>(entry->key.header, decompressed_chunks));
|
||||
buildSourceFromChunks(entry->key.header, std::move(decompressed_chunks), entry->mapped->totals, entry->mapped->extremes);
|
||||
}
|
||||
|
||||
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Entry found for query {}", key.queryStringFromAst());
|
||||
@ -348,20 +436,29 @@ QueryCache::Reader::Reader(Cache & cache_, const Key & key, const std::lock_guar
|
||||
|
||||
bool QueryCache::Reader::hasCacheEntryForKey() const
|
||||
{
|
||||
bool res = !pipe.empty();
|
||||
bool has_entry = (source_from_chunks != nullptr);
|
||||
|
||||
if (res)
|
||||
if (has_entry)
|
||||
ProfileEvents::increment(ProfileEvents::QueryCacheHits);
|
||||
else
|
||||
ProfileEvents::increment(ProfileEvents::QueryCacheMisses);
|
||||
|
||||
return res;
|
||||
return has_entry;
|
||||
}
|
||||
|
||||
Pipe && QueryCache::Reader::getPipe()
|
||||
std::unique_ptr<SourceFromChunks> QueryCache::Reader::getSource()
|
||||
{
|
||||
chassert(!pipe.empty()); // cf. hasCacheEntryForKey()
|
||||
return std::move(pipe);
|
||||
return std::move(source_from_chunks);
|
||||
}
|
||||
|
||||
std::unique_ptr<SourceFromChunks> QueryCache::Reader::getSourceTotals()
|
||||
{
|
||||
return std::move(source_from_chunks_totals);
|
||||
}
|
||||
|
||||
std::unique_ptr<SourceFromChunks> QueryCache::Reader::getSourceExtremes()
|
||||
{
|
||||
return std::move(source_from_chunks_extremes);
|
||||
}
|
||||
|
||||
QueryCache::Reader QueryCache::createReader(const Key & key)
|
||||
@ -406,7 +503,7 @@ std::vector<QueryCache::Cache::KeyMapped> QueryCache::dump() const
|
||||
}
|
||||
|
||||
QueryCache::QueryCache()
|
||||
: cache(std::make_unique<TTLCachePolicy<Key, Chunks, KeyHasher, QueryResultWeight, IsStale>>(std::make_unique<PerUserTTLCachePolicyUserQuota>()))
|
||||
: cache(std::make_unique<TTLCachePolicy<Key, Entry, KeyHasher, QueryCacheEntryWeight, IsStale>>(std::make_unique<PerUserTTLCachePolicyUserQuota>()))
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Common/CacheBase.h>
|
||||
#include <Core/Block.h>
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
#include <Processors/Sources/SourceFromChunks.h>
|
||||
#include <Poco/Util/LayeredConfiguration.h>
|
||||
#include <Processors/Chunk.h>
|
||||
#include <QueryPipeline/Pipe.h>
|
||||
@ -53,7 +54,8 @@ public:
|
||||
/// When does the entry expire?
|
||||
const std::chrono::time_point<std::chrono::system_clock> expires_at;
|
||||
|
||||
/// Is the entry compressed?
|
||||
/// Are the chunks in the entry compressed?
|
||||
/// (we could theoretically apply compression also to the totals and extremes but it's an obscure use case)
|
||||
const bool is_compressed;
|
||||
|
||||
Key(ASTPtr ast_,
|
||||
@ -66,15 +68,22 @@ public:
|
||||
String queryStringFromAst() const;
|
||||
};
|
||||
|
||||
struct Entry
|
||||
{
|
||||
Chunks chunks;
|
||||
std::optional<Chunk> totals = std::nullopt;
|
||||
std::optional<Chunk> extremes = std::nullopt;
|
||||
};
|
||||
|
||||
private:
|
||||
struct KeyHasher
|
||||
{
|
||||
size_t operator()(const Key & key) const;
|
||||
};
|
||||
|
||||
struct QueryResultWeight
|
||||
struct QueryCacheEntryWeight
|
||||
{
|
||||
size_t operator()(const Chunks & chunks) const;
|
||||
size_t operator()(const Entry & entry) const;
|
||||
};
|
||||
|
||||
struct IsStale
|
||||
@ -83,7 +92,7 @@ private:
|
||||
};
|
||||
|
||||
/// query --> query result
|
||||
using Cache = CacheBase<Key, Chunks, KeyHasher, QueryResultWeight>;
|
||||
using Cache = CacheBase<Key, Entry, KeyHasher, QueryCacheEntryWeight>;
|
||||
|
||||
/// query --> query execution count
|
||||
using TimesExecuted = std::unordered_map<Key, size_t, KeyHasher>;
|
||||
@ -103,21 +112,24 @@ public:
|
||||
class Writer
|
||||
{
|
||||
public:
|
||||
void buffer(Chunk && partial_query_result);
|
||||
|
||||
Writer(const Writer & other);
|
||||
|
||||
enum class ChunkType {Result, Totals, Extremes};
|
||||
void buffer(Chunk && chunk, ChunkType chunk_type);
|
||||
|
||||
void finalizeWrite();
|
||||
private:
|
||||
std::mutex mutex;
|
||||
Cache & cache;
|
||||
const Key key;
|
||||
size_t new_entry_size_in_bytes TSA_GUARDED_BY(mutex) = 0;
|
||||
const size_t max_entry_size_in_bytes;
|
||||
size_t new_entry_size_in_rows TSA_GUARDED_BY(mutex) = 0;
|
||||
const size_t max_entry_size_in_rows;
|
||||
const std::chrono::time_point<std::chrono::system_clock> query_start_time = std::chrono::system_clock::now(); /// Writer construction and finalizeWrite() coincide with query start/end
|
||||
const std::chrono::milliseconds min_query_runtime;
|
||||
const bool squash_partial_results;
|
||||
const size_t max_block_size;
|
||||
std::shared_ptr<Chunks> query_result TSA_GUARDED_BY(mutex) = std::make_shared<Chunks>();
|
||||
Cache::MappedPtr query_result TSA_GUARDED_BY(mutex) = std::make_shared<Entry>();
|
||||
std::atomic<bool> skip_insert = false;
|
||||
bool was_finalized = false;
|
||||
|
||||
@ -130,15 +142,22 @@ public:
|
||||
friend class QueryCache; /// for createWriter()
|
||||
};
|
||||
|
||||
/// Looks up a query result for a key in the cache and (if found) constructs a pipe with the query result chunks as source.
|
||||
/// Reader's constructor looks up a query result for a key in the cache. If found, it constructs source processors (that generate the
|
||||
/// cached result) for use in a pipe or query pipeline.
|
||||
class Reader
|
||||
{
|
||||
public:
|
||||
bool hasCacheEntryForKey() const;
|
||||
Pipe && getPipe(); /// must be called only if hasCacheEntryForKey() returns true
|
||||
/// getSource*() moves source processors out of the Reader. Call each of these method just once.
|
||||
std::unique_ptr<SourceFromChunks> getSource();
|
||||
std::unique_ptr<SourceFromChunks> getSourceTotals();
|
||||
std::unique_ptr<SourceFromChunks> getSourceExtremes();
|
||||
private:
|
||||
Reader(Cache & cache_, const Key & key, const std::lock_guard<std::mutex> &);
|
||||
Pipe pipe;
|
||||
void buildSourceFromChunks(Block header, Chunks && chunks, const std::optional<Chunk> & totals, const std::optional<Chunk> & extremes);
|
||||
std::unique_ptr<SourceFromChunks> source_from_chunks;
|
||||
std::unique_ptr<SourceFromChunks> source_from_chunks_totals;
|
||||
std::unique_ptr<SourceFromChunks> source_from_chunks_extremes;
|
||||
friend class QueryCache; /// for createReader()
|
||||
};
|
||||
|
||||
|
@ -7,7 +7,6 @@
|
||||
#include <Common/RemoteHostFilter.h>
|
||||
#include <Common/ThreadPool_fwd.h>
|
||||
#include <Common/Throttler_fwd.h>
|
||||
#include <Core/Block.h>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
#include <Core/Settings.h>
|
||||
#include <Core/UUID.h>
|
||||
@ -25,12 +24,10 @@
|
||||
#include "config.h"
|
||||
|
||||
#include <boost/container/flat_set.hpp>
|
||||
#include <exception>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <thread>
|
||||
|
||||
|
||||
namespace Poco::Net { class IPAddress; }
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include <Parsers/ASTShowProcesslistQuery.h>
|
||||
#include <Parsers/ASTShowTablesQuery.h>
|
||||
#include <Parsers/ASTShowColumnsQuery.h>
|
||||
#include <Parsers/ASTShowIndexesQuery.h>
|
||||
#include <Parsers/ASTUseQuery.h>
|
||||
#include <Parsers/ASTWatchQuery.h>
|
||||
#include <Parsers/ASTCreateNamedCollectionQuery.h>
|
||||
@ -81,6 +82,7 @@
|
||||
#include <Interpreters/InterpreterShowProcesslistQuery.h>
|
||||
#include <Interpreters/InterpreterShowTablesQuery.h>
|
||||
#include <Interpreters/InterpreterShowColumnsQuery.h>
|
||||
#include <Interpreters/InterpreterShowIndexesQuery.h>
|
||||
#include <Interpreters/InterpreterSystemQuery.h>
|
||||
#include <Interpreters/InterpreterUseQuery.h>
|
||||
#include <Interpreters/InterpreterWatchQuery.h>
|
||||
@ -181,6 +183,10 @@ std::unique_ptr<IInterpreter> InterpreterFactory::get(ASTPtr & query, ContextMut
|
||||
{
|
||||
return std::make_unique<InterpreterShowColumnsQuery>(query, context);
|
||||
}
|
||||
else if (query->as<ASTShowIndexesQuery>())
|
||||
{
|
||||
return std::make_unique<InterpreterShowIndexesQuery>(query, context);
|
||||
}
|
||||
else if (query->as<ASTShowEnginesQuery>())
|
||||
{
|
||||
return std::make_unique<InterpreterShowEnginesQuery>(query, context);
|
||||
|
@ -1,12 +1,12 @@
|
||||
#include <Interpreters/InterpreterShowColumnsQuery.h>
|
||||
|
||||
#include <Common/quoteString.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <Parsers/ASTShowColumnsQuery.h>
|
||||
#include <Parsers/formatAST.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/executeQuery.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <boost/algorithm/string.hpp>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -24,9 +24,23 @@ String InterpreterShowColumnsQuery::getRewrittenQuery()
|
||||
{
|
||||
const auto & query = query_ptr->as<ASTShowColumnsQuery &>();
|
||||
|
||||
WriteBufferFromOwnString rewritten_query;
|
||||
WriteBufferFromOwnString buf_database;
|
||||
String resolved_database = getContext()->resolveDatabase(query.database);
|
||||
writeEscapedString(resolved_database, buf_database);
|
||||
String database = buf_database.str();
|
||||
|
||||
rewritten_query << "SELECT name AS field, type AS type, startsWith(type, 'Nullable') AS null, trim(concatWithSeparator(' ', if(is_in_primary_key, 'PRI', ''), if (is_in_sorting_key, 'SOR', ''))) AS key, if(default_kind IN ('ALIAS', 'DEFAULT', 'MATERIALIZED'), default_expression, NULL) AS default, '' AS extra ";
|
||||
WriteBufferFromOwnString buf_table;
|
||||
writeEscapedString(query.table, buf_table);
|
||||
String table = buf_table.str();
|
||||
|
||||
String rewritten_query = R"(
|
||||
SELECT
|
||||
name AS field,
|
||||
type AS type,
|
||||
startsWith(type, 'Nullable') AS null,
|
||||
trim(concatWithSeparator(' ', if (is_in_primary_key, 'PRI', ''), if (is_in_sorting_key, 'SOR', ''))) AS key,
|
||||
if (default_kind IN ('ALIAS', 'DEFAULT', 'MATERIALIZED'), default_expression, NULL) AS default,
|
||||
'' AS extra )";
|
||||
|
||||
// TODO Interpret query.extended. It is supposed to show internal/virtual columns. Need to fetch virtual column names, see
|
||||
// IStorage::getVirtuals(). We can't easily do that via SQL.
|
||||
@ -37,60 +51,41 @@ String InterpreterShowColumnsQuery::getRewrittenQuery()
|
||||
/// - collation: no such thing in ClickHouse
|
||||
/// - comment
|
||||
/// - privileges: <not implemented, TODO ask system.grants>
|
||||
rewritten_query << ", NULL AS collation, comment, '' AS privileges ";
|
||||
rewritten_query += R"(,
|
||||
NULL AS collation,
|
||||
comment,
|
||||
'' AS privileges )";
|
||||
}
|
||||
|
||||
rewritten_query << "FROM system.columns WHERE ";
|
||||
|
||||
String database;
|
||||
String table;
|
||||
if (query.from_table.contains("."))
|
||||
{
|
||||
/// FROM <db>.<table> (abbreviated form)
|
||||
chassert(query.from_database.empty());
|
||||
std::vector<String> split;
|
||||
boost::split(split, query.from_table, boost::is_any_of("."));
|
||||
chassert(split.size() == 2);
|
||||
database = split[0];
|
||||
table = split[1];
|
||||
}
|
||||
else if (query.from_database.empty())
|
||||
{
|
||||
/// FROM <table>
|
||||
chassert(!query.from_table.empty());
|
||||
database = getContext()->getCurrentDatabase();
|
||||
table = query.from_table;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// FROM <database> FROM <table>
|
||||
chassert(!query.from_database.empty());
|
||||
chassert(!query.from_table.empty());
|
||||
database = query.from_database;
|
||||
table = query.from_table;
|
||||
}
|
||||
rewritten_query << "database = " << DB::quote << database;
|
||||
rewritten_query << " AND table = " << DB::quote << table;
|
||||
rewritten_query += fmt::format(R"(
|
||||
FROM system.columns
|
||||
WHERE
|
||||
database = '{}'
|
||||
AND table = '{}' )", database, table);
|
||||
|
||||
if (!query.like.empty())
|
||||
rewritten_query
|
||||
<< " AND name "
|
||||
<< (query.not_like ? "NOT " : "")
|
||||
<< (query.case_insensitive_like ? "ILIKE " : "LIKE ")
|
||||
<< DB::quote << query.like;
|
||||
{
|
||||
rewritten_query += " AND name ";
|
||||
if (query.not_like)
|
||||
rewritten_query += "NOT ";
|
||||
if (query.case_insensitive_like)
|
||||
rewritten_query += "ILIKE ";
|
||||
else
|
||||
rewritten_query += "LIKE ";
|
||||
rewritten_query += fmt::format("'{}'", query.like);
|
||||
}
|
||||
else if (query.where_expression)
|
||||
rewritten_query << " AND (" << query.where_expression << ")";
|
||||
rewritten_query += fmt::format(" AND ({})", query.where_expression);
|
||||
|
||||
/// Sorting is strictly speaking not necessary but 1. it is convenient for users, 2. SQL currently does not allow to
|
||||
/// sort the output of SHOW COLUMNS otherwise (SELECT * FROM (SHOW COLUMNS ...) ORDER BY ...) is rejected) and 3. some
|
||||
/// SQL tests can take advantage of this.
|
||||
rewritten_query << " ORDER BY field, type, null, key, default, extra";
|
||||
rewritten_query += " ORDER BY field, type, null, key, default, extra";
|
||||
|
||||
if (query.limit_length)
|
||||
rewritten_query << " LIMIT " << query.limit_length;
|
||||
|
||||
return rewritten_query.str();
|
||||
rewritten_query += fmt::format(" LIMIT {}", query.limit_length);
|
||||
|
||||
return rewritten_query;
|
||||
}
|
||||
|
||||
|
||||
|
100
src/Interpreters/InterpreterShowIndexesQuery.cpp
Normal file
100
src/Interpreters/InterpreterShowIndexesQuery.cpp
Normal file
@ -0,0 +1,100 @@
|
||||
#include <Interpreters/InterpreterShowIndexesQuery.h>
|
||||
|
||||
#include <Common/quoteString.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <Parsers/ASTShowIndexesQuery.h>
|
||||
#include <Parsers/formatAST.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/executeQuery.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
||||
InterpreterShowIndexesQuery::InterpreterShowIndexesQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_)
|
||||
: WithMutableContext(context_)
|
||||
, query_ptr(query_ptr_)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
String InterpreterShowIndexesQuery::getRewrittenQuery()
|
||||
{
|
||||
const auto & query = query_ptr->as<ASTShowIndexesQuery &>();
|
||||
|
||||
WriteBufferFromOwnString buf_table;
|
||||
writeEscapedString(query.table, buf_table);
|
||||
String table = buf_table.str();
|
||||
|
||||
WriteBufferFromOwnString buf_database;
|
||||
String resolved_database = getContext()->resolveDatabase(query.database);
|
||||
writeEscapedString(resolved_database, buf_database);
|
||||
String database = buf_database.str();
|
||||
|
||||
String where_expression = query.where_expression ? fmt::format("WHERE ({})", query.where_expression) : "";
|
||||
|
||||
String rewritten_query = fmt::format(R"(
|
||||
SELECT *
|
||||
FROM (
|
||||
(SELECT
|
||||
name AS table,
|
||||
0 AS non_unique,
|
||||
'PRIMARY' AS key_name,
|
||||
NULL AS seq_in_index,
|
||||
NULL AS column_name,
|
||||
'A' AS collation,
|
||||
NULL AS cardinality,
|
||||
NULL AS sub_part,
|
||||
NULL AS packed,
|
||||
NULL AS null,
|
||||
'primary' AS index_type,
|
||||
NULL AS comment,
|
||||
NULL AS index_comment,
|
||||
'YES' AS visible,
|
||||
primary_key AS expression
|
||||
FROM system.tables
|
||||
WHERE
|
||||
database = '{0}'
|
||||
AND name = '{1}')
|
||||
UNION ALL (
|
||||
SELECT
|
||||
table AS table,
|
||||
0 AS non_unique,
|
||||
name AS key_name,
|
||||
NULL AS seq_in_index,
|
||||
NULL AS column_name,
|
||||
NULL AS collation,
|
||||
NULL AS cardinality,
|
||||
NULL AS sub_part,
|
||||
NULL AS packed,
|
||||
NULL AS null,
|
||||
type AS index_type,
|
||||
NULL AS comment,
|
||||
NULL AS index_comment,
|
||||
'YES' AS visible,
|
||||
expr AS expression
|
||||
FROM system.data_skipping_indices
|
||||
WHERE
|
||||
database = '{0}'
|
||||
AND table = '{1}'))
|
||||
{2}
|
||||
ORDER BY index_type, expression;)", database, table, where_expression);
|
||||
|
||||
/// Sorting is strictly speaking not necessary but 1. it is convenient for users, 2. SQL currently does not allow to
|
||||
/// sort the output of SHOW INDEXES otherwise (SELECT * FROM (SHOW INDEXES ...) ORDER BY ...) is rejected) and 3. some
|
||||
/// SQL tests can take advantage of this.
|
||||
|
||||
return rewritten_query;
|
||||
}
|
||||
|
||||
|
||||
BlockIO InterpreterShowIndexesQuery::execute()
|
||||
{
|
||||
return executeQuery(getRewrittenQuery(), getContext(), true);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
33
src/Interpreters/InterpreterShowIndexesQuery.h
Normal file
33
src/Interpreters/InterpreterShowIndexesQuery.h
Normal file
@ -0,0 +1,33 @@
|
||||
#pragma once
|
||||
|
||||
#include <Interpreters/IInterpreter.h>
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Context;
|
||||
|
||||
|
||||
/// Returns a list of indexes which meet some conditions.
|
||||
class InterpreterShowIndexesQuery : public IInterpreter, WithMutableContext
|
||||
{
|
||||
public:
|
||||
InterpreterShowIndexesQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_);
|
||||
|
||||
BlockIO execute() override;
|
||||
|
||||
/// Ignore quota and limits here because execute() produces a SELECT query which checks quotas/limits by itself.
|
||||
bool ignoreQuota() const override { return true; }
|
||||
bool ignoreLimits() const override { return true; }
|
||||
|
||||
private:
|
||||
ASTPtr query_ptr;
|
||||
|
||||
String getRewrittenQuery();
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
@ -41,6 +41,9 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes()
|
||||
{"ExceptionWhileProcessing", static_cast<Int8>(EXCEPTION_WHILE_PROCESSING)}
|
||||
});
|
||||
|
||||
auto low_cardinality_string = std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>());
|
||||
auto array_low_cardinality_string = std::make_shared<DataTypeArray>(low_cardinality_string);
|
||||
|
||||
return
|
||||
{
|
||||
{"type", std::move(query_status_datatype)},
|
||||
@ -59,31 +62,26 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes()
|
||||
{"result_bytes", std::make_shared<DataTypeUInt64>()},
|
||||
{"memory_usage", std::make_shared<DataTypeUInt64>()},
|
||||
|
||||
{"current_database", std::make_shared<DataTypeString>()},
|
||||
{"current_database", low_cardinality_string},
|
||||
{"query", std::make_shared<DataTypeString>()},
|
||||
{"formatted_query", std::make_shared<DataTypeString>()},
|
||||
{"normalized_query_hash", std::make_shared<DataTypeUInt64>()},
|
||||
{"query_kind", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
|
||||
{"databases", std::make_shared<DataTypeArray>(
|
||||
std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()))},
|
||||
{"tables", std::make_shared<DataTypeArray>(
|
||||
std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()))},
|
||||
{"columns", std::make_shared<DataTypeArray>(
|
||||
std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()))},
|
||||
{"projections", std::make_shared<DataTypeArray>(
|
||||
std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()))},
|
||||
{"views", std::make_shared<DataTypeArray>(
|
||||
std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()))},
|
||||
{"query_kind", low_cardinality_string},
|
||||
{"databases", array_low_cardinality_string},
|
||||
{"tables", array_low_cardinality_string},
|
||||
{"columns", array_low_cardinality_string},
|
||||
{"projections", array_low_cardinality_string},
|
||||
{"views", array_low_cardinality_string},
|
||||
{"exception_code", std::make_shared<DataTypeInt32>()},
|
||||
{"exception", std::make_shared<DataTypeString>()},
|
||||
{"stack_trace", std::make_shared<DataTypeString>()},
|
||||
|
||||
{"is_initial_query", std::make_shared<DataTypeUInt8>()},
|
||||
{"user", std::make_shared<DataTypeString>()},
|
||||
{"user", low_cardinality_string},
|
||||
{"query_id", std::make_shared<DataTypeString>()},
|
||||
{"address", DataTypeFactory::instance().get("IPv6")},
|
||||
{"port", std::make_shared<DataTypeUInt16>()},
|
||||
{"initial_user", std::make_shared<DataTypeString>()},
|
||||
{"initial_user", low_cardinality_string},
|
||||
{"initial_query_id", std::make_shared<DataTypeString>()},
|
||||
{"initial_address", DataTypeFactory::instance().get("IPv6")},
|
||||
{"initial_port", std::make_shared<DataTypeUInt16>()},
|
||||
@ -91,15 +89,15 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes()
|
||||
{"initial_query_start_time_microseconds", std::make_shared<DataTypeDateTime64>(6)},
|
||||
{"interface", std::make_shared<DataTypeUInt8>()},
|
||||
{"is_secure", std::make_shared<DataTypeUInt8>()},
|
||||
{"os_user", std::make_shared<DataTypeString>()},
|
||||
{"client_hostname", std::make_shared<DataTypeString>()},
|
||||
{"client_name", std::make_shared<DataTypeString>()},
|
||||
{"os_user", low_cardinality_string},
|
||||
{"client_hostname", low_cardinality_string},
|
||||
{"client_name", low_cardinality_string},
|
||||
{"client_revision", std::make_shared<DataTypeUInt32>()},
|
||||
{"client_version_major", std::make_shared<DataTypeUInt32>()},
|
||||
{"client_version_minor", std::make_shared<DataTypeUInt32>()},
|
||||
{"client_version_patch", std::make_shared<DataTypeUInt32>()},
|
||||
{"http_method", std::make_shared<DataTypeUInt8>()},
|
||||
{"http_user_agent", std::make_shared<DataTypeString>()},
|
||||
{"http_user_agent", low_cardinality_string},
|
||||
{"http_referer", std::make_shared<DataTypeString>()},
|
||||
{"forwarded_for", std::make_shared<DataTypeString>()},
|
||||
{"quota_key", std::make_shared<DataTypeString>()},
|
||||
@ -110,36 +108,38 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes()
|
||||
{"log_comment", std::make_shared<DataTypeString>()},
|
||||
|
||||
{"thread_ids", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())},
|
||||
{"ProfileEvents", std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeUInt64>())},
|
||||
{"Settings", std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>())},
|
||||
{"ProfileEvents", std::make_shared<DataTypeMap>(low_cardinality_string, std::make_shared<DataTypeUInt64>())},
|
||||
{"Settings", std::make_shared<DataTypeMap>(low_cardinality_string, low_cardinality_string)},
|
||||
|
||||
{"used_aggregate_functions", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
|
||||
{"used_aggregate_function_combinators", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
|
||||
{"used_database_engines", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
|
||||
{"used_data_type_families", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
|
||||
{"used_dictionaries", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
|
||||
{"used_formats", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
|
||||
{"used_functions", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
|
||||
{"used_storages", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
|
||||
{"used_table_functions", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
|
||||
{"used_aggregate_functions", array_low_cardinality_string},
|
||||
{"used_aggregate_function_combinators", array_low_cardinality_string},
|
||||
{"used_database_engines", array_low_cardinality_string},
|
||||
{"used_data_type_families", array_low_cardinality_string},
|
||||
{"used_dictionaries", array_low_cardinality_string},
|
||||
{"used_formats", array_low_cardinality_string},
|
||||
{"used_functions", array_low_cardinality_string},
|
||||
{"used_storages", array_low_cardinality_string},
|
||||
{"used_table_functions", array_low_cardinality_string},
|
||||
|
||||
{"used_row_policies", std::make_shared<DataTypeArray>(std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()))},
|
||||
{"used_row_policies", array_low_cardinality_string},
|
||||
|
||||
{"transaction_id", getTransactionIDDataType()},
|
||||
|
||||
{"AsyncReadCounters", std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeUInt64>())},
|
||||
{"asynchronous_read_counters", std::make_shared<DataTypeMap>(low_cardinality_string, std::make_shared<DataTypeUInt64>())},
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
NamesAndAliases QueryLogElement::getNamesAndAliases()
|
||||
{
|
||||
auto low_cardinality_string = std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>());
|
||||
auto array_low_cardinality_string = std::make_shared<DataTypeArray>(low_cardinality_string);
|
||||
|
||||
return
|
||||
{
|
||||
{"ProfileEvents.Names", {std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())}, "mapKeys(ProfileEvents)"},
|
||||
{"ProfileEvents.Names", array_low_cardinality_string, "mapKeys(ProfileEvents)"},
|
||||
{"ProfileEvents.Values", {std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())}, "mapValues(ProfileEvents)"},
|
||||
{"Settings.Names", {std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())}, "mapKeys(Settings)" },
|
||||
{"Settings.Values", {std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())}, "mapValues(Settings)"}
|
||||
{"Settings.Names", array_low_cardinality_string, "mapKeys(Settings)" },
|
||||
{"Settings.Values", array_low_cardinality_string, "mapValues(Settings)"}
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <DataTypes/DataTypeDateTime64.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Interpreters/ProfileEventsExt.h>
|
||||
#include <Interpreters/QueryLog.h>
|
||||
@ -22,7 +23,10 @@ namespace DB
|
||||
|
||||
NamesAndTypesList QueryThreadLogElement::getNamesAndTypes()
|
||||
{
|
||||
return {
|
||||
auto low_cardinality_string = std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>());
|
||||
|
||||
return
|
||||
{
|
||||
{"event_date", std::make_shared<DataTypeDate>()},
|
||||
{"event_time", std::make_shared<DataTypeDateTime>()},
|
||||
{"event_time_microseconds", std::make_shared<DataTypeDateTime64>(6)},
|
||||
@ -37,19 +41,19 @@ NamesAndTypesList QueryThreadLogElement::getNamesAndTypes()
|
||||
{"memory_usage", std::make_shared<DataTypeInt64>()},
|
||||
{"peak_memory_usage", std::make_shared<DataTypeInt64>()},
|
||||
|
||||
{"thread_name", std::make_shared<DataTypeString>()},
|
||||
{"thread_name", low_cardinality_string},
|
||||
{"thread_id", std::make_shared<DataTypeUInt64>()},
|
||||
{"master_thread_id", std::make_shared<DataTypeUInt64>()},
|
||||
{"current_database", std::make_shared<DataTypeString>()},
|
||||
{"current_database", low_cardinality_string},
|
||||
{"query", std::make_shared<DataTypeString>()},
|
||||
{"normalized_query_hash", std::make_shared<DataTypeUInt64>()},
|
||||
|
||||
{"is_initial_query", std::make_shared<DataTypeUInt8>()},
|
||||
{"user", std::make_shared<DataTypeString>()},
|
||||
{"user", low_cardinality_string},
|
||||
{"query_id", std::make_shared<DataTypeString>()},
|
||||
{"address", DataTypeFactory::instance().get("IPv6")},
|
||||
{"port", std::make_shared<DataTypeUInt16>()},
|
||||
{"initial_user", std::make_shared<DataTypeString>()},
|
||||
{"initial_user", low_cardinality_string},
|
||||
{"initial_query_id", std::make_shared<DataTypeString>()},
|
||||
{"initial_address", DataTypeFactory::instance().get("IPv6")},
|
||||
{"initial_port", std::make_shared<DataTypeUInt16>()},
|
||||
@ -57,15 +61,15 @@ NamesAndTypesList QueryThreadLogElement::getNamesAndTypes()
|
||||
{"initial_query_start_time_microseconds", std::make_shared<DataTypeDateTime64>(6)},
|
||||
{"interface", std::make_shared<DataTypeUInt8>()},
|
||||
{"is_secure", std::make_shared<DataTypeUInt8>()},
|
||||
{"os_user", std::make_shared<DataTypeString>()},
|
||||
{"client_hostname", std::make_shared<DataTypeString>()},
|
||||
{"client_name", std::make_shared<DataTypeString>()},
|
||||
{"os_user", low_cardinality_string},
|
||||
{"client_hostname", low_cardinality_string},
|
||||
{"client_name", low_cardinality_string},
|
||||
{"client_revision", std::make_shared<DataTypeUInt32>()},
|
||||
{"client_version_major", std::make_shared<DataTypeUInt32>()},
|
||||
{"client_version_minor", std::make_shared<DataTypeUInt32>()},
|
||||
{"client_version_patch", std::make_shared<DataTypeUInt32>()},
|
||||
{"http_method", std::make_shared<DataTypeUInt8>()},
|
||||
{"http_user_agent", std::make_shared<DataTypeString>()},
|
||||
{"http_user_agent", low_cardinality_string},
|
||||
{"http_referer", std::make_shared<DataTypeString>()},
|
||||
{"forwarded_for", std::make_shared<DataTypeString>()},
|
||||
{"quota_key", std::make_shared<DataTypeString>()},
|
||||
@ -73,7 +77,7 @@ NamesAndTypesList QueryThreadLogElement::getNamesAndTypes()
|
||||
|
||||
{"revision", std::make_shared<DataTypeUInt32>()},
|
||||
|
||||
{"ProfileEvents", std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeUInt64>())},
|
||||
{"ProfileEvents", std::make_shared<DataTypeMap>(low_cardinality_string, std::make_shared<DataTypeUInt64>())},
|
||||
};
|
||||
}
|
||||
|
||||
@ -81,7 +85,7 @@ NamesAndAliases QueryThreadLogElement::getNamesAndAliases()
|
||||
{
|
||||
return
|
||||
{
|
||||
{"ProfileEvents.Names", {std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())}, "mapKeys(ProfileEvents)"},
|
||||
{"ProfileEvents.Names", {std::make_shared<DataTypeArray>(std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()))}, "mapKeys(ProfileEvents)"},
|
||||
{"ProfileEvents.Values", {std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())}, "mapValues(ProfileEvents)"}
|
||||
};
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user