Merge branch 'master' into mvcc_prototype

This commit is contained in:
Alexander Tokmakov 2022-02-11 15:53:32 +03:00
commit 07e66e690d
379 changed files with 8016 additions and 2369 deletions

View File

@ -142,6 +142,7 @@ Checks: '-*,
clang-analyzer-cplusplus.PlacementNewChecker,
clang-analyzer-cplusplus.SelfAssignment,
clang-analyzer-deadcode.DeadStores,
clang-analyzer-cplusplus.Move,
clang-analyzer-optin.cplusplus.VirtualCall,
clang-analyzer-security.insecureAPI.UncheckedReturn,
clang-analyzer-security.insecureAPI.bcmp,

3
.gitmodules vendored
View File

@ -259,3 +259,6 @@
[submodule "contrib/azure"]
path = contrib/azure
url = https://github.com/ClickHouse-Extras/azure-sdk-for-cpp.git
[submodule "contrib/minizip-ng"]
path = contrib/minizip-ng
url = https://github.com/zlib-ng/minizip-ng

View File

@ -79,18 +79,14 @@ static void call_default_signal_handler(int sig)
raise(sig);
}
static constexpr size_t max_query_id_size = 127;
static const size_t signal_pipe_buf_size =
sizeof(int)
+ sizeof(siginfo_t)
+ sizeof(ucontext_t)
+ sizeof(ucontext_t*)
+ sizeof(StackTrace)
+ sizeof(UInt32)
+ max_query_id_size + 1 /// query_id + varint encoded length
+ sizeof(void*);
using signal_function = void(int, siginfo_t*, void*);
static void writeSignalIDtoSignalPipe(int sig)
@ -129,18 +125,14 @@ static void signalHandler(int sig, siginfo_t * info, void * context)
char buf[signal_pipe_buf_size];
DB::WriteBufferFromFileDescriptorDiscardOnFailure out(signal_pipe.fds_rw[1], signal_pipe_buf_size, buf);
const ucontext_t signal_context = *reinterpret_cast<ucontext_t *>(context);
const StackTrace stack_trace(signal_context);
StringRef query_id = DB::CurrentThread::getQueryId(); /// This is signal safe.
query_id.size = std::min(query_id.size, max_query_id_size);
const ucontext_t * signal_context = reinterpret_cast<ucontext_t *>(context);
const StackTrace stack_trace(*signal_context);
DB::writeBinary(sig, out);
DB::writePODBinary(*info, out);
DB::writePODBinary(signal_context, out);
DB::writePODBinary(stack_trace, out);
DB::writeBinary(UInt32(getThreadId()), out);
DB::writeStringBinary(query_id, out);
DB::writePODBinary(DB::current_thread, out);
out.next();
@ -184,6 +176,8 @@ public:
void run() override
{
static_assert(PIPE_BUF >= 512);
static_assert(signal_pipe_buf_size <= PIPE_BUF, "Only write of PIPE_BUF to pipe is atomic and the minimal known PIPE_BUF across supported platforms is 512");
char buf[signal_pipe_buf_size];
DB::ReadBufferFromFileDescriptor in(signal_pipe.fds_rw[0], signal_pipe_buf_size, buf);
@ -227,10 +221,9 @@ public:
else
{
siginfo_t info{};
ucontext_t context{};
ucontext_t * context{};
StackTrace stack_trace(NoCapture{});
UInt32 thread_num{};
std::string query_id;
DB::ThreadStatus * thread_ptr{};
if (sig != SanitizerTrap)
@ -241,12 +234,11 @@ public:
DB::readPODBinary(stack_trace, in);
DB::readBinary(thread_num, in);
DB::readBinary(query_id, in);
DB::readPODBinary(thread_ptr, in);
/// This allows to receive more signals if failure happens inside onFault function.
/// Example: segfault while symbolizing stack trace.
std::thread([=, this] { onFault(sig, info, context, stack_trace, thread_num, query_id, thread_ptr); }).detach();
std::thread([=, this] { onFault(sig, info, context, stack_trace, thread_num, thread_ptr); }).detach();
}
}
}
@ -279,18 +271,27 @@ private:
void onFault(
int sig,
const siginfo_t & info,
const ucontext_t & context,
ucontext_t * context,
const StackTrace & stack_trace,
UInt32 thread_num,
const std::string & query_id,
DB::ThreadStatus * thread_ptr) const
{
DB::ThreadStatus thread_status;
String query_id;
String query;
/// Send logs from this thread to client if possible.
/// It will allow client to see failure messages directly.
if (thread_ptr)
{
query_id = thread_ptr->getQueryId().toString();
if (auto thread_group = thread_ptr->getThreadGroup())
{
query = thread_group->query;
}
if (auto logs_queue = thread_ptr->getInternalTextLogsQueue())
DB::CurrentThread::attachInternalTextLogsQueue(logs_queue, DB::LogsLevel::trace);
}
@ -305,15 +306,15 @@ private:
}
else
{
LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (query_id: {}) Received signal {} ({})",
LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (query_id: {}) (query: {}) Received signal {} ({})",
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info,
thread_num, query_id, strsignal(sig), sig);
thread_num, query_id, query, strsignal(sig), sig);
}
String error_message;
if (sig != SanitizerTrap)
error_message = signalToErrorMessage(sig, info, context);
error_message = signalToErrorMessage(sig, info, *context);
else
error_message = "Sanitizer trap.";
@ -389,20 +390,16 @@ static void sanitizerDeathCallback()
const StackTrace stack_trace;
StringRef query_id = DB::CurrentThread::getQueryId();
query_id.size = std::min(query_id.size, max_query_id_size);
int sig = SignalListener::SanitizerTrap;
DB::writeBinary(sig, out);
DB::writePODBinary(stack_trace, out);
DB::writeBinary(UInt32(getThreadId()), out);
DB::writeStringBinary(query_id, out);
DB::writePODBinary(DB::current_thread, out);
out.next();
/// The time that is usually enough for separate thread to print info into log.
sleepForSeconds(10);
sleepForSeconds(20);
}
#endif

View File

@ -78,6 +78,7 @@ add_contrib (croaring-cmake croaring)
add_contrib (zstd-cmake zstd)
add_contrib (zlib-ng-cmake zlib-ng)
add_contrib (bzip2-cmake bzip2)
add_contrib (minizip-ng-cmake minizip-ng)
add_contrib (snappy-cmake snappy)
add_contrib (rocksdb-cmake rocksdb)
add_contrib (thrift-cmake thrift)

View File

@ -56,19 +56,11 @@ list(APPEND SOURCES ${CASS_SRC_DIR}/atomic/atomic_std.hpp)
add_library(_curl_hostcheck OBJECT ${CASS_SRC_DIR}/third_party/curl/hostcheck.cpp)
add_library(_hdr_histogram OBJECT ${CASS_SRC_DIR}/third_party/hdr_histogram/hdr_histogram.cpp)
add_library(_http-parser OBJECT ${CASS_SRC_DIR}/third_party/http-parser/http_parser.c)
add_library(_minizip OBJECT
${CASS_SRC_DIR}/third_party/minizip/ioapi.c
${CASS_SRC_DIR}/third_party/minizip/zip.c
${CASS_SRC_DIR}/third_party/minizip/unzip.c)
target_link_libraries(_minizip ch_contrib::zlib)
target_compile_definitions(_minizip PRIVATE "-Dz_crc_t=unsigned long")
list(APPEND INCLUDE_DIRS
${CASS_SRC_DIR}/third_party/curl
${CASS_SRC_DIR}/third_party/hdr_histogram
${CASS_SRC_DIR}/third_party/http-parser
${CASS_SRC_DIR}/third_party/minizip
${CASS_SRC_DIR}/third_party/mt19937_64
${CASS_SRC_DIR}/third_party/rapidjson/rapidjson
${CASS_SRC_DIR}/third_party/sparsehash/src)
@ -123,10 +115,9 @@ add_library(_cassandra
${SOURCES}
$<TARGET_OBJECTS:_curl_hostcheck>
$<TARGET_OBJECTS:_hdr_histogram>
$<TARGET_OBJECTS:_http-parser>
$<TARGET_OBJECTS:_minizip>)
$<TARGET_OBJECTS:_http-parser>)
target_link_libraries(_cassandra ch_contrib::zlib)
target_link_libraries(_cassandra ch_contrib::zlib ch_contrib::minizip)
target_include_directories(_cassandra PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${INCLUDE_DIRS})
target_include_directories(_cassandra SYSTEM BEFORE PUBLIC ${CASS_INCLUDE_DIR})
target_compile_definitions(_cassandra PRIVATE CASS_BUILDING)

1
contrib/minizip-ng vendored Submodule

@ -0,0 +1 @@
Subproject commit 6cffc951851620e0fac1993be75e4713c334de03

View File

@ -0,0 +1,168 @@
option(ENABLE_MINIZIP "Enable minizip-ng the zip manipulation library" ${ENABLE_LIBRARIES})
if (NOT ENABLE_MINIZIP)
message (STATUS "minizip-ng disabled")
return()
endif()
set(_MINIZIP_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/minizip-ng")
# Initial source files
set(MINIZIP_SRC
${_MINIZIP_SOURCE_DIR}/mz_crypt.c
${_MINIZIP_SOURCE_DIR}/mz_os.c
${_MINIZIP_SOURCE_DIR}/mz_strm.c
${_MINIZIP_SOURCE_DIR}/mz_strm_buf.c
${_MINIZIP_SOURCE_DIR}/mz_strm_mem.c
${_MINIZIP_SOURCE_DIR}/mz_strm_split.c
${_MINIZIP_SOURCE_DIR}/mz_zip.c
${_MINIZIP_SOURCE_DIR}/mz_zip_rw.c)
# Initial header files
set(MINIZIP_HDR
${_MINIZIP_SOURCE_DIR}/mz.h
${_MINIZIP_SOURCE_DIR}/mz_os.h
${_MINIZIP_SOURCE_DIR}/mz_crypt.h
${_MINIZIP_SOURCE_DIR}/mz_strm.h
${_MINIZIP_SOURCE_DIR}/mz_strm_buf.h
${_MINIZIP_SOURCE_DIR}/mz_strm_mem.h
${_MINIZIP_SOURCE_DIR}/mz_strm_split.h
${_MINIZIP_SOURCE_DIR}/mz_strm_os.h
${_MINIZIP_SOURCE_DIR}/mz_zip.h
${_MINIZIP_SOURCE_DIR}/mz_zip_rw.h)
set(MINIZIP_INC ${_MINIZIP_SOURCE_DIR})
set(MINIZIP_DEF)
set(MINIZIP_PUBLIC_DEF)
set(MINIZIP_LIB)
# Check if zlib is present
set(MZ_ZLIB ON)
if(MZ_ZLIB)
# Use zlib from ClickHouse contrib
list(APPEND MINIZIP_LIB ch_contrib::zlib)
list(APPEND MINIZIP_SRC
${_MINIZIP_SOURCE_DIR}/mz_strm_zlib.c)
list(APPEND MINIZIP_HDR
${_MINIZIP_SOURCE_DIR}/mz_strm_zlib.h)
list(APPEND MINIZIP_DEF "-DHAVE_ZLIB")
endif()
# Check if bzip2 is present
set(MZ_BZIP2 ${ENABLE_BZIP2})
if(MZ_BZIP2)
# Use bzip2 from ClickHouse contrib
list(APPEND MINIZIP_LIB ch_contrib::bzip2)
list(APPEND MINIZIP_SRC
${_MINIZIP_SOURCE_DIR}/mz_strm_bzip.c)
list(APPEND MINIZIP_HDR
${_MINIZIP_SOURCE_DIR}/mz_strm_bzip.h)
list(APPEND MINIZIP_DEF "-DHAVE_BZIP2")
endif()
# Check if liblzma is present
set(MZ_LZMA ON)
if(MZ_LZMA)
# Use liblzma from ClickHouse contrib
list(APPEND MINIZIP_LIB ch_contrib::xz)
list(APPEND MINIZIP_SRC
${_MINIZIP_SOURCE_DIR}/mz_strm_lzma.c)
list(APPEND MINIZIP_HDR
${_MINIZIP_SOURCE_DIR}/mz_strm_lzma.h)
list(APPEND MINIZIP_DEF "-DHAVE_LZMA")
endif()
# Check if zstd is present
set(MZ_ZSTD ON)
if(MZ_ZSTD)
# Use zstd from ClickHouse contrib
list(APPEND MINIZIP_LIB ch_contrib::zstd)
list(APPEND MINIZIP_SRC
${_MINIZIP_SOURCE_DIR}/mz_strm_zstd.c)
list(APPEND MINIZIP_HDR
${_MINIZIP_SOURCE_DIR}/mz_strm_zstd.h)
list(APPEND MINIZIP_DEF "-DHAVE_ZSTD")
endif()
if(NOT MZ_ZLIB AND NOT MZ_ZSTD AND NOT MZ_BZIP2 AND NOT MZ_LZMA)
message(STATUS "Compression not supported due to missing libraries")
list(APPEND MINIZIP_DEF -DMZ_ZIP_NO_DECOMPRESSION)
list(APPEND MINIZIP_DEF -DMZ_ZIP_NO_COMPRESSION)
endif()
# Check to see if openssl installation is present
set(MZ_OPENSSL ${ENABLE_SSL})
if(MZ_OPENSSL)
# Use openssl from ClickHouse contrib
list(APPEND MINIZIP_LIB OpenSSL::SSL OpenSSL::Crypto)
list(APPEND MINIZIP_SRC
${_MINIZIP_SOURCE_DIR}/mz_crypt_openssl.c)
endif()
# Include WinZIP AES encryption
set(MZ_WZAES ${ENABLE_SSL})
if(MZ_WZAES)
list(APPEND MINIZIP_DEF -DHAVE_WZAES)
list(APPEND MINIZIP_SRC
${_MINIZIP_SOURCE_DIR}/mz_strm_wzaes.c)
list(APPEND MINIZIP_HDR
${_MINIZIP_SOURCE_DIR}/mz_strm_wzaes.h)
endif()
# Include traditional PKWare encryption
set(MZ_PKCRYPT ON)
if(MZ_PKCRYPT)
list(APPEND MINIZIP_DEF -DHAVE_PKCRYPT)
list(APPEND MINIZIP_SRC
${_MINIZIP_SOURCE_DIR}/mz_strm_pkcrypt.c)
list(APPEND MINIZIP_HDR
${_MINIZIP_SOURCE_DIR}/mz_strm_pkcrypt.h)
endif()
# Unix specific
if(UNIX)
list(APPEND MINIZIP_SRC
${_MINIZIP_SOURCE_DIR}/mz_os_posix.c
${_MINIZIP_SOURCE_DIR}/mz_strm_os_posix.c)
endif()
# Include compatibility layer
set(MZ_COMPAT ON)
if(MZ_COMPAT)
list(APPEND MINIZIP_SRC
${_MINIZIP_SOURCE_DIR}/mz_compat.c)
list(APPEND MINIZIP_HDR
${_MINIZIP_SOURCE_DIR}/mz_compat.h
zip.h
unzip.h)
list(APPEND MINIZIP_INC "${CMAKE_CURRENT_SOURCE_DIR}")
list(APPEND MINIZIP_PUBLIC_DEF "-DMZ_COMPAT_VERSION=110")
endif()
add_library(_minizip ${MINIZIP_SRC} ${MINIZIP_HDR})
target_include_directories(_minizip PUBLIC ${MINIZIP_INC})
target_compile_definitions(_minizip PUBLIC ${MINIZIP_PUBLIC_DEF})
target_compile_definitions(_minizip PRIVATE ${MINIZIP_DEF})
target_link_libraries(_minizip PRIVATE ${MINIZIP_LIB})
add_library(ch_contrib::minizip ALIAS _minizip)

View File

@ -0,0 +1,13 @@
/* unzip.h -- Compatibility layer shim
part of the minizip-ng project
This program is distributed under the terms of the same license as zlib.
See the accompanying LICENSE file for the full text of the license.
*/
#ifndef MZ_COMPAT_UNZIP
#define MZ_COMPAT_UNZIP
#include "mz_compat.h"
#endif

View File

@ -0,0 +1,13 @@
/* zip.h -- Compatibility layer shim
part of the minizip-ng project
This program is distributed under the terms of the same license as zlib.
See the accompanying LICENSE file for the full text of the license.
*/
#ifndef MZ_COMPAT_ZIP
#define MZ_COMPAT_ZIP
#include "mz_compat.h"
#endif

2
contrib/replxx vendored

@ -1 +1 @@
Subproject commit c745b3fb012ee5ae762fbc8cd7a40c4dc3fe15df
Subproject commit 9460e5e0fc10f78f460af26a6bd928798cac864d

View File

@ -127,11 +127,6 @@ endif()
if(CMAKE_SYSTEM_NAME MATCHES "Darwin")
add_definitions(-DOS_MACOSX)
if(CMAKE_SYSTEM_PROCESSOR MATCHES arm)
add_definitions(-DIOS_CROSS_COMPILE -DROCKSDB_LITE)
# no debug info for IOS, that will make our library big
add_definitions(-DNDEBUG)
endif()
elseif(CMAKE_SYSTEM_NAME MATCHES "Linux")
add_definitions(-DOS_LINUX)
elseif(CMAKE_SYSTEM_NAME MATCHES "SunOS")

View File

@ -16,6 +16,8 @@ Restart=always
RestartSec=30
RuntimeDirectory=clickhouse-server
ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=/run/clickhouse-server/clickhouse-server.pid
# Minus means that this file is optional.
EnvironmentFile=-/etc/default/clickhouse
LimitCORE=infinity
LimitNOFILE=500000
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE

View File

@ -886,3 +886,12 @@ S3 disk can be configured as `main` or `cold` storage:
```
In case of `cold` option a data can be moved to S3 if local disk free size will be smaller than `move_factor * disk_size` or by TTL move rule.
## Virtual Columns {#virtual-columns}
- `_part` — Name of a part.
- `_part_index` — Sequential index of the part in the query result.
- `_partition_id` — Name of a partition.
- `_part_uuid` — Unique part identifier (if enabled MergeTree setting `assign_part_uuids`).
- `_partition_value` — Values (a tuple) of a `partition by` expression.
- `_sample_factor` — Sample factor (from the query).

View File

@ -108,7 +108,13 @@ Examples of configuration for quorum with three nodes can be found in [integrati
ClickHouse Keeper is bundled into the ClickHouse server package, just add configuration of `<keeper_server>` and start ClickHouse server as always. If you want to run standalone ClickHouse Keeper you can start it in a similar way with:
```bash
clickhouse-keeper --config /etc/your_path_to_config/config.xml --daemon
clickhouse-keeper --config /etc/your_path_to_config/config.xml
```
If you don't have the symlink (`clickhouse-keeper`) you can create it or specify `keeper` as argument:
```bash
clickhouse keeper --config /etc/your_path_to_config/config.xml
```
## Four Letter Word Commands {#four-letter-word-commands}

View File

@ -27,7 +27,7 @@ To analyze the `trace_log` system table:
For security reasons, introspection functions are disabled by default.
- Use the `addressToLine`, `addressToSymbol` and `demangle` [introspection functions](../../sql-reference/functions/introspection.md) to get function names and their positions in ClickHouse code. To get a profile for some query, you need to aggregate data from the `trace_log` table. You can aggregate data by individual functions or by the whole stack traces.
- Use the `addressToLine`, `addressToLineWithInlines`, `addressToSymbol` and `demangle` [introspection functions](../../sql-reference/functions/introspection.md) to get function names and their positions in ClickHouse code. To get a profile for some query, you need to aggregate data from the `trace_log` table. You can aggregate data by individual functions or by the whole stack traces.
If you need to visualize `trace_log` info, try [flamegraph](../../interfaces/third-party/gui/#clickhouse-flamegraph) and [speedscope](https://github.com/laplab/clickhouse-speedscope).

View File

@ -2,7 +2,7 @@
Contains stack traces of all server threads. Allows developers to introspect the server state.
To analyze stack frames, use the `addressToLine`, `addressToSymbol` and `demangle` [introspection functions](../../sql-reference/functions/introspection.md).
To analyze stack frames, use the `addressToLine`, `addressToLineWithInlines`, `addressToSymbol` and `demangle` [introspection functions](../../sql-reference/functions/introspection.md).
Columns:

View File

@ -4,7 +4,7 @@ Contains stack traces collected by the sampling query profiler.
ClickHouse creates this table when the [trace_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) server configuration section is set. Also the [query_profiler_real_time_period_ns](../../operations/settings/settings.md#query_profiler_real_time_period_ns) and [query_profiler_cpu_time_period_ns](../../operations/settings/settings.md#query_profiler_cpu_time_period_ns) settings should be set.
To analyze logs, use the `addressToLine`, `addressToSymbol` and `demangle` introspection functions.
To analyze logs, use the `addressToLine`, `addressToLineWithInlines`, `addressToSymbol` and `demangle` introspection functions.
Columns:

View File

@ -1,9 +1,9 @@
---
toc_priority: 40
toc_title: UInt8, UInt16, UInt32, UInt64, UInt256, Int8, Int16, Int32, Int64, Int128, Int256
toc_title: UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256
---
# UInt8, UInt16, UInt32, UInt64, UInt256, Int8, Int16, Int32, Int64, Int128, Int256 {#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64-int128-int256}
# UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256
Fixed-length integers, with or without a sign.

View File

@ -120,7 +120,7 @@ The `mail` and `phone` fields are of type String, but the `icq` field is `UInt32
Get the first available contact method for the customer from the contact list:
``` sql
SELECT coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook;
SELECT name, coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook;
```
``` text

View File

@ -113,6 +113,111 @@ trace_source_code_lines: /lib/x86_64-linux-gnu/libpthread-2.27.so
/build/glibc-OTsEL5/glibc-2.27/misc/../sysdeps/unix/sysv/linux/x86_64/clone.S:97
```
## addressToLineWithInlines {#addresstolinewithinlines}
Similar to `addressToLine`, but it will return an Array with all inline functions, and will be much slower as a price.
If you use official ClickHouse packages, you need to install the `clickhouse-common-static-dbg` package.
**Syntax**
``` sql
addressToLineWithInlines(address_of_binary_instruction)
```
**Arguments**
- `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process.
**Returned value**
- Array which first element is source code filename and the line number in this file delimited by colon. And from second element, inline functions' source code filename and line number and function name are listed.
- Array with single element which is name of a binary, if the function couldnt find the debug information.
- Empty array, if the address is not valid.
Type: [Array(String)](../../sql-reference/data-types/array.md).
**Example**
Enabling introspection functions:
``` sql
SET allow_introspection_functions=1;
```
Applying the function to address.
```sql
SELECT addressToLineWithInlines(531055181::UInt64);
```
``` text
┌─addressToLineWithInlines(CAST('531055181', 'UInt64'))────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ ['./src/Functions/addressToLineWithInlines.cpp:98','./build_normal_debug/./src/Functions/addressToLineWithInlines.cpp:176:DB::(anonymous namespace)::FunctionAddressToLineWithInlines::implCached(unsigned long) const'] │
└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
Applying the function to the whole stack trace:
``` sql
SELECT
ta, addressToLineWithInlines(arrayJoin(trace) as ta)
FROM system.trace_log
WHERE
query_id = '5e173544-2020-45de-b645-5deebe2aae54';
```
The [arrayJoin](../../sql-reference/functions/array-functions.md#array-functions-join) functions will split array to rows.
``` text
┌────────ta─┬─addressToLineWithInlines(arrayJoin(trace))───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ 365497529 │ ['./build_normal_debug/./contrib/libcxx/include/string_view:252'] │
│ 365593602 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:191'] │
│ 365593866 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │
│ 365592528 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │
│ 365591003 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:477'] │
│ 365590479 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:442'] │
│ 365590600 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:457'] │
│ 365598941 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │
│ 365607098 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │
│ 365590571 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:451'] │
│ 365598941 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │
│ 365607098 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │
│ 365590571 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:451'] │
│ 365598941 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │
│ 365607098 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │
│ 365590571 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:451'] │
│ 365598941 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │
│ 365597289 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:807'] │
│ 365599840 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:1118'] │
│ 531058145 │ ['./build_normal_debug/./src/Functions/addressToLineWithInlines.cpp:152'] │
│ 531055181 │ ['./src/Functions/addressToLineWithInlines.cpp:98','./build_normal_debug/./src/Functions/addressToLineWithInlines.cpp:176:DB::(anonymous namespace)::FunctionAddressToLineWithInlines::implCached(unsigned long) const'] │
│ 422333613 │ ['./build_normal_debug/./src/Functions/IFunctionAdaptors.h:21'] │
│ 586866022 │ ['./build_normal_debug/./src/Functions/IFunction.cpp:216'] │
│ 586869053 │ ['./build_normal_debug/./src/Functions/IFunction.cpp:264'] │
│ 586873237 │ ['./build_normal_debug/./src/Functions/IFunction.cpp:334'] │
│ 597901620 │ ['./build_normal_debug/./src/Interpreters/ExpressionActions.cpp:601'] │
│ 597898534 │ ['./build_normal_debug/./src/Interpreters/ExpressionActions.cpp:718'] │
│ 630442912 │ ['./build_normal_debug/./src/Processors/Transforms/ExpressionTransform.cpp:23'] │
│ 546354050 │ ['./build_normal_debug/./src/Processors/ISimpleTransform.h:38'] │
│ 626026993 │ ['./build_normal_debug/./src/Processors/ISimpleTransform.cpp:89'] │
│ 626294022 │ ['./build_normal_debug/./src/Processors/Executors/ExecutionThreadContext.cpp:45'] │
│ 626293730 │ ['./build_normal_debug/./src/Processors/Executors/ExecutionThreadContext.cpp:63'] │
│ 626169525 │ ['./build_normal_debug/./src/Processors/Executors/PipelineExecutor.cpp:213'] │
│ 626170308 │ ['./build_normal_debug/./src/Processors/Executors/PipelineExecutor.cpp:178'] │
│ 626166348 │ ['./build_normal_debug/./src/Processors/Executors/PipelineExecutor.cpp:329'] │
│ 626163461 │ ['./build_normal_debug/./src/Processors/Executors/PipelineExecutor.cpp:84'] │
│ 626323536 │ ['./build_normal_debug/./src/Processors/Executors/PullingAsyncPipelineExecutor.cpp:85'] │
│ 626323277 │ ['./build_normal_debug/./src/Processors/Executors/PullingAsyncPipelineExecutor.cpp:112'] │
│ 626323133 │ ['./build_normal_debug/./contrib/libcxx/include/type_traits:3682'] │
│ 626323041 │ ['./build_normal_debug/./contrib/libcxx/include/tuple:1415'] │
└───────────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
## addressToSymbol {#addresstosymbol}
Converts virtual memory address inside ClickHouse server process to the symbol from ClickHouse object files.

View File

@ -22,7 +22,7 @@ tuple(x, y, …)
## tupleElement {#tupleelement}
A function that allows getting a column from a tuple.
N is the column index, starting from 1. N must be a constant. N must be a constant. N must be a strict postive integer no greater than the size of the tuple.
N is the column index, starting from 1. N must be a constant. N must be a strict postive integer no greater than the size of the tuple.
There is no cost to execute the function.
The function implements the operator `x.N`.

View File

@ -172,6 +172,7 @@ Hierarchy of privileges:
- `SYSTEM FLUSH LOGS`
- [INTROSPECTION](#grant-introspection)
- `addressToLine`
- `addressToLineWithInlines`
- `addressToSymbol`
- `demangle`
- [SOURCES](#grant-sources)
@ -430,6 +431,7 @@ Allows using [introspection](../../operations/optimizing-performance/sampling-qu
- `INTROSPECTION`. Level: `GROUP`. Aliases: `INTROSPECTION FUNCTIONS`
- `addressToLine`. Level: `GLOBAL`
- `addressToLineWithInlines`. Level: `GLOBAL`
- `addressToSymbol`. Level: `GLOBAL`
- `demangle`. Level: `GLOBAL`

View File

@ -285,7 +285,7 @@ ORDER BY expr [WITH FILL] [FROM const_expr] [TO const_expr] [STEP const_numeric_
`WITH FILL` can be applied for fields with Numeric (all kinds of float, decimal, int) or Date/DateTime types. When applied for `String` fields, missed values are filled with empty strings.
When `FROM const_expr` not defined sequence of filling use minimal `expr` field value from `ORDER BY`.
When `TO const_expr` not defined sequence of filling use maximum `expr` field value from `ORDER BY`.
When `STEP const_numeric_expr` defined then `const_numeric_expr` interprets `as is` for numeric types as `days` for Date type and as `seconds` for DateTime type.
When `STEP const_numeric_expr` defined then `const_numeric_expr` interprets `as is` for numeric types, as `days` for Date type, as `seconds` for DateTime type. It also supports [INTERVAL](https://clickhouse.com/docs/en/sql-reference/data-types/special-data-types/interval/) data type representing time and date intervals.
When `STEP const_numeric_expr` omitted then sequence of filling use `1.0` for numeric type, `1 day` for Date type and `1 second` for DateTime type.
Example of a query without `WITH FILL`:
@ -402,4 +402,85 @@ Result:
└────────────┴────────────┴──────────┘
```
The following query uses the `INTERVAL` data type of 1 day for each data filled on column `d1`:
``` sql
SELECT
toDate((number * 10) * 86400) AS d1,
toDate(number * 86400) AS d2,
'original' AS source
FROM numbers(10)
WHERE (number % 3) = 1
ORDER BY
d1 WITH FILL STEP INTERVAL 1 DAY,
d2 WITH FILL;
```
Result:
```
┌─────────d1─┬─────────d2─┬─source───┐
│ 1970-01-11 │ 1970-01-02 │ original │
│ 1970-01-12 │ 1970-01-01 │ │
│ 1970-01-13 │ 1970-01-01 │ │
│ 1970-01-14 │ 1970-01-01 │ │
│ 1970-01-15 │ 1970-01-01 │ │
│ 1970-01-16 │ 1970-01-01 │ │
│ 1970-01-17 │ 1970-01-01 │ │
│ 1970-01-18 │ 1970-01-01 │ │
│ 1970-01-19 │ 1970-01-01 │ │
│ 1970-01-20 │ 1970-01-01 │ │
│ 1970-01-21 │ 1970-01-01 │ │
│ 1970-01-22 │ 1970-01-01 │ │
│ 1970-01-23 │ 1970-01-01 │ │
│ 1970-01-24 │ 1970-01-01 │ │
│ 1970-01-25 │ 1970-01-01 │ │
│ 1970-01-26 │ 1970-01-01 │ │
│ 1970-01-27 │ 1970-01-01 │ │
│ 1970-01-28 │ 1970-01-01 │ │
│ 1970-01-29 │ 1970-01-01 │ │
│ 1970-01-30 │ 1970-01-01 │ │
│ 1970-01-31 │ 1970-01-01 │ │
│ 1970-02-01 │ 1970-01-01 │ │
│ 1970-02-02 │ 1970-01-01 │ │
│ 1970-02-03 │ 1970-01-01 │ │
│ 1970-02-04 │ 1970-01-01 │ │
│ 1970-02-05 │ 1970-01-01 │ │
│ 1970-02-06 │ 1970-01-01 │ │
│ 1970-02-07 │ 1970-01-01 │ │
│ 1970-02-08 │ 1970-01-01 │ │
│ 1970-02-09 │ 1970-01-01 │ │
│ 1970-02-10 │ 1970-01-05 │ original │
│ 1970-02-11 │ 1970-01-01 │ │
│ 1970-02-12 │ 1970-01-01 │ │
│ 1970-02-13 │ 1970-01-01 │ │
│ 1970-02-14 │ 1970-01-01 │ │
│ 1970-02-15 │ 1970-01-01 │ │
│ 1970-02-16 │ 1970-01-01 │ │
│ 1970-02-17 │ 1970-01-01 │ │
│ 1970-02-18 │ 1970-01-01 │ │
│ 1970-02-19 │ 1970-01-01 │ │
│ 1970-02-20 │ 1970-01-01 │ │
│ 1970-02-21 │ 1970-01-01 │ │
│ 1970-02-22 │ 1970-01-01 │ │
│ 1970-02-23 │ 1970-01-01 │ │
│ 1970-02-24 │ 1970-01-01 │ │
│ 1970-02-25 │ 1970-01-01 │ │
│ 1970-02-26 │ 1970-01-01 │ │
│ 1970-02-27 │ 1970-01-01 │ │
│ 1970-02-28 │ 1970-01-01 │ │
│ 1970-03-01 │ 1970-01-01 │ │
│ 1970-03-02 │ 1970-01-01 │ │
│ 1970-03-03 │ 1970-01-01 │ │
│ 1970-03-04 │ 1970-01-01 │ │
│ 1970-03-05 │ 1970-01-01 │ │
│ 1970-03-06 │ 1970-01-01 │ │
│ 1970-03-07 │ 1970-01-01 │ │
│ 1970-03-08 │ 1970-01-01 │ │
│ 1970-03-09 │ 1970-01-01 │ │
│ 1970-03-10 │ 1970-01-01 │ │
│ 1970-03-11 │ 1970-01-01 │ │
│ 1970-03-12 │ 1970-01-08 │ original │
└────────────┴────────────┴──────────┘
```
[Original article](https://clickhouse.com/docs/en/sql-reference/statements/select/order-by/) <!--hide-->

View File

@ -5,6 +5,6 @@ toc_title: Roadmap
# Roadmap {#roadmap}
The roadmap for the year 2021 is published for open discussion [here](https://github.com/ClickHouse/ClickHouse/issues/17623).
The roadmap for the year 2022 is published for open discussion [here](https://github.com/ClickHouse/ClickHouse/issues/32513).
{## [Original article](https://clickhouse.com/docs/en/roadmap/) ##}

View File

@ -26,4 +26,6 @@ toc_title: Introduction
- [Replicated](../../engines/database-engines/replicated.md)
- [SQLite](../../engines/database-engines/sqlite.md)
[来源文章](https://clickhouse.com/docs/en/database_engines/) <!--hide-->

View File

@ -24,6 +24,7 @@ ENGINE = PostgreSQL('host:port', 'database', 'user', 'password'[, `use_table_cac
- `database` — 远程数据库名次
- `user` — PostgreSQL用户名称
- `password` — PostgreSQL用户密码
- `schema` - PostgreSQL 模式
- `use_table_cache` — 定义数据库表结构是否已缓存或不进行。可选的。默认值: `0`.
## 支持的数据类型 {#data_types-support}

View File

@ -31,6 +31,7 @@ CREATE DATABASE testdb ENGINE = Replicated('zoo_path', 'shard_name', 'replica_na
当创建数据库的新副本时,该副本会自己创建表。如果副本已经不可用很长一段时间,并且已经滞后于复制日志-它用ZooKeeper中的当前元数据检查它的本地元数据将带有数据的额外表移动到一个单独的非复制数据库(以免意外地删除任何多余的东西),创建缺失的表,如果表名已经被重命名,则更新表名。数据在`ReplicatedMergeTree`级别被复制,也就是说,如果表没有被复制,数据将不会被复制(数据库只负责元数据)。
允许[`ALTER TABLE ATTACH|FETCH|DROP|DROP DETACHED|DETACH PARTITION|PART`](../../sql-reference/statements/alter/partition.md)查询,但不允许复制。数据库引擎将只向当前副本添加/获取/删除分区/部件。但是如果表本身使用了Replicated表引擎那么数据将在使用`ATTACH`后被复制。
## 使用示例 {#usage-example}
创建三台主机的集群:

View File

@ -1 +0,0 @@
../../../en/engines/database-engines/sqlite.md

View File

@ -0,0 +1,80 @@
---
toc_priority: 32
toc_title: SQLite
---
# SQLite {#sqlite}
允许连接到[SQLite](https://www.sqlite.org/index.html)数据库并支持ClickHouse和SQLite交换数据 执行 `INSERT``SELECT` 查询。
## 创建一个数据库 {#creating-a-database}
``` sql
CREATE DATABASE sqlite_database
ENGINE = SQLite('db_path')
```
**引擎参数**
- `db_path` — SQLite 数据库文件的路径.
## 数据类型的支持 {#data_types-support}
| SQLite | ClickHouse |
|---------------|---------------------------------------------------------|
| INTEGER | [Int32](../../sql-reference/data-types/int-uint.md) |
| REAL | [Float32](../../sql-reference/data-types/float.md) |
| TEXT | [String](../../sql-reference/data-types/string.md) |
| BLOB | [String](../../sql-reference/data-types/string.md) |
## 技术细节和建议 {#specifics-and-recommendations}
SQLite将整个数据库(定义、表、索引和数据本身)存储为主机上的单个跨平台文件。在写入过程中SQLite会锁定整个数据库文件因此写入操作是顺序执行的。读操作可以是多任务的。
SQLite不需要服务管理(如启动脚本)或基于`GRANT`和密码的访问控制。访问控制是通过授予数据库文件本身的文件系统权限来处理的。
## 使用示例 {#usage-example}
数据库在ClickHouse连接到SQLite:
``` sql
CREATE DATABASE sqlite_db ENGINE = SQLite('sqlite.db');
SHOW TABLES FROM sqlite_db;
```
``` text
┌──name───┐
│ table1 │
│ table2 │
└─────────┘
```
展示数据表中的内容:
``` sql
SELECT * FROM sqlite_db.table1;
```
``` text
┌─col1──┬─col2─┐
│ line1 │ 1 │
│ line2 │ 2 │
│ line3 │ 3 │
└───────┴──────┘
```
从ClickHouse表插入数据到SQLite表:
``` sql
CREATE TABLE clickhouse_table(`col1` String,`col2` Int16) ENGINE = MergeTree() ORDER BY col2;
INSERT INTO clickhouse_table VALUES ('text',10);
INSERT INTO sqlite_db.table1 SELECT * FROM clickhouse_table;
SELECT * FROM sqlite_db.table1;
```
``` text
┌─col1──┬─col2─┐
│ line1 │ 1 │
│ line2 │ 2 │
│ line3 │ 3 │
│ text │ 10 │
└───────┴──────┘
```

View File

@ -0,0 +1,416 @@
---
toc_priority: 4
toc_title: Hive
---
# Hive {#hive}
Hive引擎允许对HDFS Hive表执行 `SELECT` 查询。目前它支持如下输入格式:
-文本:只支持简单的标量列类型,除了 `Binary`
- ORC:支持简单的标量列类型,除了`char`; 只支持 `array` 这样的复杂类型
- Parquet:支持所有简单标量列类型;只支持 `array` 这样的复杂类型
## 创建表 {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [ALIAS expr1],
name2 [type2] [ALIAS expr2],
...
) ENGINE = Hive('thrift://host:port', 'database', 'table');
PARTITION BY expr
```
查看[CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query)查询的详细描述。
表的结构可以与原来的Hive表结构有所不同:
- 列名应该与原来的Hive表相同但你可以使用这些列中的一些并以任何顺序你也可以使用一些从其他列计算的别名列。
- 列类型与原Hive表的列类型保持一致。
- “Partition by expression”应与原Hive表保持一致“Partition by expression”中的列应在表结构中。
**引擎参数**
- `thrift://host:port` — Hive Metastore 地址
- `database` — 远程数据库名.
- `table` — 远程数据表名.
## 使用示例 {#usage-example}
### 如何使用HDFS文件系统的本地缓存
我们强烈建议您为远程文件系统启用本地缓存。基准测试显示,如果使用缓存,它的速度会快两倍。
在使用缓存之前,请将其添加到 `config.xml`
``` xml
<local_cache_for_remote_fs>
<enable>true</enable>
<root_dir>local_cache</root_dir>
<limit_size>559096952</limit_size>
<bytes_read_before_flush>1048576</bytes_read_before_flush>
</local_cache_for_remote_fs>
```
- enable: 开启后ClickHouse将为HDFS (远程文件系统)维护本地缓存。
- root_dir: 必需的。用于存储远程文件系统的本地缓存文件的根目录。
- limit_size: 必需的。本地缓存文件的最大大小(单位为字节)。
- bytes_read_before_flush: 从远程文件系统下载文件时刷新到本地文件系统前的控制字节数。缺省值为1MB。
当ClickHouse为远程文件系统启用了本地缓存时用户仍然可以选择不使用缓存并在查询中设置`use_local_cache_for_remote_fs = 0 `, `use_local_cache_for_remote_fs` 默认为 `false`
### 查询 ORC 输入格式的Hive 表
#### 在 Hive 中建表
``` text
hive > CREATE TABLE `test`.`test_orc`(
`f_tinyint` tinyint,
`f_smallint` smallint,
`f_int` int,
`f_integer` int,
`f_bigint` bigint,
`f_float` float,
`f_double` double,
`f_decimal` decimal(10,0),
`f_timestamp` timestamp,
`f_date` date,
`f_string` string,
`f_varchar` varchar(100),
`f_bool` boolean,
`f_binary` binary,
`f_array_int` array<int>,
`f_array_string` array<string>,
`f_array_float` array<float>,
`f_array_array_int` array<array<int>>,
`f_array_array_string` array<array<string>>,
`f_array_array_float` array<array<float>>)
PARTITIONED BY (
`day` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
'hdfs://testcluster/data/hive/test.db/test_orc'
OK
Time taken: 0.51 seconds
hive > insert into test.test_orc partition(day='2021-09-18') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, current_timestamp(), current_date(), 'hello world', 'hello world', 'hello world', true, 'hello world', array(1, 2, 3), array('hello world', 'hello world'), array(float(1.1), float(1.2)), array(array(1, 2), array(3, 4)), array(array('a', 'b'), array('c', 'd')), array(array(float(1.11), float(2.22)), array(float(3.33), float(4.44)));
OK
Time taken: 36.025 seconds
hive > select * from test.test_orc;
OK
1 2 3 4 5 6.11 7.22 8 2021-11-05 12:38:16.314 2021-11-05 hello world hello world hello world true hello world [1,2,3] ["hello world","hello world"] [1.1,1.2] [[1,2],[3,4]] [["a","b"],["c","d"]] [[1.11,2.22],[3.33,4.44]] 2021-09-18
Time taken: 0.295 seconds, Fetched: 1 row(s)
```
#### 在 ClickHouse 中建表
ClickHouse中的表从上面创建的Hive表中获取数据:
``` sql
CREATE TABLE test.test_orc
(
`f_tinyint` Int8,
`f_smallint` Int16,
`f_int` Int32,
`f_integer` Int32,
`f_bigint` Int64,
`f_float` Float32,
`f_double` Float64,
`f_decimal` Float64,
`f_timestamp` DateTime,
`f_date` Date,
`f_string` String,
`f_varchar` String,
`f_bool` Bool,
`f_binary` String,
`f_array_int` Array(Int32),
`f_array_string` Array(String),
`f_array_float` Array(Float32),
`f_array_array_int` Array(Array(Int32)),
`f_array_array_string` Array(Array(String)),
`f_array_array_float` Array(Array(Float32)),
`day` String
)
ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc')
PARTITION BY day
```
``` sql
SELECT * FROM test.test_orc settings input_format_orc_allow_missing_columns = 1\G
```
``` text
SELECT *
FROM test.test_orc
SETTINGS input_format_orc_allow_missing_columns = 1
Query id: c3eaffdc-78ab-43cd-96a4-4acc5b480658
Row 1:
──────
f_tinyint: 1
f_smallint: 2
f_int: 3
f_integer: 4
f_bigint: 5
f_float: 6.11
f_double: 7.22
f_decimal: 8
f_timestamp: 2021-12-04 04:00:44
f_date: 2021-12-03
f_string: hello world
f_varchar: hello world
f_bool: true
f_binary: hello world
f_array_int: [1,2,3]
f_array_string: ['hello world','hello world']
f_array_float: [1.1,1.2]
f_array_array_int: [[1,2],[3,4]]
f_array_array_string: [['a','b'],['c','d']]
f_array_array_float: [[1.11,2.22],[3.33,4.44]]
day: 2021-09-18
1 rows in set. Elapsed: 0.078 sec.
```
### 查询 Parquest 输入格式的Hive 表
#### 在 Hive 中建表
``` text
hive >
CREATE TABLE `test`.`test_parquet`(
`f_tinyint` tinyint,
`f_smallint` smallint,
`f_int` int,
`f_integer` int,
`f_bigint` bigint,
`f_float` float,
`f_double` double,
`f_decimal` decimal(10,0),
`f_timestamp` timestamp,
`f_date` date,
`f_string` string,
`f_varchar` varchar(100),
`f_char` char(100),
`f_bool` boolean,
`f_binary` binary,
`f_array_int` array<int>,
`f_array_string` array<string>,
`f_array_float` array<float>,
`f_array_array_int` array<array<int>>,
`f_array_array_string` array<array<string>>,
`f_array_array_float` array<array<float>>)
PARTITIONED BY (
`day` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'hdfs://testcluster/data/hive/test.db/test_parquet'
OK
Time taken: 0.51 seconds
hive > insert into test.test_parquet partition(day='2021-09-18') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, current_timestamp(), current_date(), 'hello world', 'hello world', 'hello world', true, 'hello world', array(1, 2, 3), array('hello world', 'hello world'), array(float(1.1), float(1.2)), array(array(1, 2), array(3, 4)), array(array('a', 'b'), array('c', 'd')), array(array(float(1.11), float(2.22)), array(float(3.33), float(4.44)));
OK
Time taken: 36.025 seconds
hive > select * from test.test_parquet;
OK
1 2 3 4 5 6.11 7.22 8 2021-12-14 17:54:56.743 2021-12-14 hello world hello world hello world true hello world [1,2,3] ["hello world","hello world"] [1.1,1.2] [[1,2],[3,4]] [["a","b"],["c","d"]] [[1.11,2.22],[3.33,4.44]] 2021-09-18
Time taken: 0.766 seconds, Fetched: 1 row(s)
```
#### 在 ClickHouse 中建表
ClickHouse 中的表, 从上面创建的Hive表中获取数据:
``` sql
CREATE TABLE test.test_parquet
(
`f_tinyint` Int8,
`f_smallint` Int16,
`f_int` Int32,
`f_integer` Int32,
`f_bigint` Int64,
`f_float` Float32,
`f_double` Float64,
`f_decimal` Float64,
`f_timestamp` DateTime,
`f_date` Date,
`f_string` String,
`f_varchar` String,
`f_char` String,
`f_bool` Bool,
`f_binary` String,
`f_array_int` Array(Int32),
`f_array_string` Array(String),
`f_array_float` Array(Float32),
`f_array_array_int` Array(Array(Int32)),
`f_array_array_string` Array(Array(String)),
`f_array_array_float` Array(Array(Float32)),
`day` String
)
ENGINE = Hive('thrift://localhost:9083', 'test', 'test_parquet')
PARTITION BY day
```
``` sql
SELECT * FROM test.test_parquet settings input_format_parquet_allow_missing_columns = 1\G
```
``` text
SELECT *
FROM test_parquet
SETTINGS input_format_parquet_allow_missing_columns = 1
Query id: 4e35cf02-c7b2-430d-9b81-16f438e5fca9
Row 1:
──────
f_tinyint: 1
f_smallint: 2
f_int: 3
f_integer: 4
f_bigint: 5
f_float: 6.11
f_double: 7.22
f_decimal: 8
f_timestamp: 2021-12-14 17:54:56
f_date: 2021-12-14
f_string: hello world
f_varchar: hello world
f_char: hello world
f_bool: true
f_binary: hello world
f_array_int: [1,2,3]
f_array_string: ['hello world','hello world']
f_array_float: [1.1,1.2]
f_array_array_int: [[1,2],[3,4]]
f_array_array_string: [['a','b'],['c','d']]
f_array_array_float: [[1.11,2.22],[3.33,4.44]]
day: 2021-09-18
1 rows in set. Elapsed: 0.357 sec.
```
### 查询文本输入格式的Hive表
#### 在Hive 中建表
``` text
hive >
CREATE TABLE `test`.`test_text`(
`f_tinyint` tinyint,
`f_smallint` smallint,
`f_int` int,
`f_integer` int,
`f_bigint` bigint,
`f_float` float,
`f_double` double,
`f_decimal` decimal(10,0),
`f_timestamp` timestamp,
`f_date` date,
`f_string` string,
`f_varchar` varchar(100),
`f_char` char(100),
`f_bool` boolean,
`f_binary` binary,
`f_array_int` array<int>,
`f_array_string` array<string>,
`f_array_float` array<float>,
`f_array_array_int` array<array<int>>,
`f_array_array_string` array<array<string>>,
`f_array_array_float` array<array<float>>)
PARTITIONED BY (
`day` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
'hdfs://testcluster/data/hive/test.db/test_text'
Time taken: 0.1 seconds, Fetched: 34 row(s)
hive > insert into test.test_text partition(day='2021-09-18') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, current_timestamp(), current_date(), 'hello world', 'hello world', 'hello world', true, 'hello world', array(1, 2, 3), array('hello world', 'hello world'), array(float(1.1), float(1.2)), array(array(1, 2), array(3, 4)), array(array('a', 'b'), array('c', 'd')), array(array(float(1.11), float(2.22)), array(float(3.33), float(4.44)));
OK
Time taken: 36.025 seconds
hive > select * from test.test_text;
OK
1 2 3 4 5 6.11 7.22 8 2021-12-14 18:11:17.239 2021-12-14 hello world hello world hello world true hello world [1,2,3] ["hello world","hello world"] [1.1,1.2] [[1,2],[3,4]] [["a","b"],["c","d"]] [[1.11,2.22],[3.33,4.44]] 2021-09-18
Time taken: 0.624 seconds, Fetched: 1 row(s)
```
#### 在 ClickHouse 中建表
ClickHouse中的表 从上面创建的Hive表中获取数据:
``` sql
CREATE TABLE test.test_text
(
`f_tinyint` Int8,
`f_smallint` Int16,
`f_int` Int32,
`f_integer` Int32,
`f_bigint` Int64,
`f_float` Float32,
`f_double` Float64,
`f_decimal` Float64,
`f_timestamp` DateTime,
`f_date` Date,
`f_string` String,
`f_varchar` String,
`f_char` String,
`f_bool` Bool,
`day` String
)
ENGINE = Hive('thrift://localhost:9083', 'test', 'test_text')
PARTITION BY day
```
``` sql
SELECT * FROM test.test_text settings input_format_skip_unknown_fields = 1, input_format_with_names_use_header = 1, date_time_input_format = 'best_effort'\G
```
``` text
SELECT *
FROM test.test_text
SETTINGS input_format_skip_unknown_fields = 1, input_format_with_names_use_header = 1, date_time_input_format = 'best_effort'
Query id: 55b79d35-56de-45b9-8be6-57282fbf1f44
Row 1:
──────
f_tinyint: 1
f_smallint: 2
f_int: 3
f_integer: 4
f_bigint: 5
f_float: 6.11
f_double: 7.22
f_decimal: 8
f_timestamp: 2021-12-14 18:11:17
f_date: 2021-12-14
f_string: hello world
f_varchar: hello world
f_char: hello world
f_bool: true
day: 2021-09-18
```

View File

@ -19,3 +19,5 @@ ClickHouse 提供了多种方式来与外部系统集成,包括表引擎。像
- [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md)
- [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md)
- [PostgreSQL](../../../engines/table-engines/integrations/postgresql.md)
- [SQLite](../../../engines/table-engines/integrations/sqlite.md)
- [Hive](../../../engines/table-engines/integrations/hive.md)

View File

@ -1,67 +1,62 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
---
# 系统。query_thread_log {#system_tables-query_thread_log}
包含有关执行查询的线程的信息,例如,线程名称、线程开始时间、查询处理的持续时间。
始记录:
开启日志功能:
1. 在配置参数 [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log)
2. 设置 [log_query_threads](../../operations/settings/settings.md#settings-log-query-threads) 1。
1. 在配置参数 [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) 部分
2. 设置 [log_query_threads](../../operations/settings/settings.md#settings-log-query-threads) 1。
数据的冲洗周期设置在 `flush_interval_milliseconds` 的参数 [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) 服务器设置部分。 要强制冲洗,请使用 [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) 查询。
数据从缓存写入数据表周期时间参数 `flush_interval_milliseconds` 位于 [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) 服务器设置部分。如果需要强制从缓存写入数据表,请使用 [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) 查询请求
ClickHouse不会自动从表中删除数据。 看 [导言](../../operations/system-tables/index.md#system-tables-introduction) 欲了解更多详情
ClickHouse不会自动从表中删除数据。 欲了解更多详情,请参照 [介绍](../../operations/system-tables/index.md#system-tables-introduction)。
列:
- `event_date` ([日期](../../sql-reference/data-types/date.md)) — The date when the thread has finished execution of the query.
- `event_time` ([日期时间](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query.
- `query_start_time` ([日期时间](../../sql-reference/data-types/datetime.md)) — Start time of query execution.
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution.
- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read rows.
- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read bytes.
- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` 查询,写入的行数。 对于其他查询,列值为0。
- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` 查询时,写入的字节数。 对于其他查询,列值为0。
- `memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The difference between the amount of allocated and freed memory in context of this thread.
- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this thread.
- `thread_name` ([字符串](../../sql-reference/data-types/string.md)) — Name of the thread.
- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Internal thread ID.
- `thread_id` ([Int32](../../sql-reference/data-types/int-uint.md)) — thread ID.
- `master_thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — OS initial ID of initial thread.
- `query` ([字符串](../../sql-reference/data-types/string.md)) — Query string.
- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Query type. Possible values:
- 1 — Query was initiated by the client.
- 0 — Query was initiated by another query for distributed query execution.
- `user` ([字符串](../../sql-reference/data-types/string.md)) — Name of the user who initiated the current query.
- `query_id` ([字符串](../../sql-reference/data-types/string.md)) — ID of the query.
- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that was used to make the query.
- `port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the query.
- `initial_user` ([字符串](../../sql-reference/data-types/string.md)) — Name of the user who ran the initial query (for distributed query execution).
- `initial_query_id` ([字符串](../../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution).
- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that the parent query was launched from.
- `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the parent query.
- `interface` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Interface that the query was initiated from. Possible values:
- `event_date` ([日期](../../sql-reference/data-types/date.md)) — 该查询线程执行完成的日期。
- `event_time` ([日期时间](../../sql-reference/data-types/datetime.md)) — 该查询线程执行完成的时间。
- `query_start_time` ([日期时间](../../sql-reference/data-types/datetime.md)) — 查询的开始时间。
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 查询执行持续的时间。
- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 读取的行数。
- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 读取的字节数。
- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 对于 `INSERT` 查询,写入的行数。 对于其他查询为0。
- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 对于 `INSERT` 查询,写入的字节数。 对于其他查询,为0。
- `memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — 在线程上下文,分配的内存和空闲内存之差。
- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — 在线程上下文,分配的内存和空闲内存之差的最大值。
- `thread_name` ([字符串](../../sql-reference/data-types/string.md)) — 线程名。
- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — 内部线程ID。
- `thread_id` ([Int32](../../sql-reference/data-types/int-uint.md)) — 线程ID。
- `master_thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — OS初始线程的初始ID。
- `query` ([字符串](../../sql-reference/data-types/string.md)) — 查询语句。
- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 查询类型,可能的值:
- 1 — 由用户发起的查询。
- 0 — 由其他查询发起的分布式查询。
- `user` ([字符串](../../sql-reference/data-types/string.md)) — 发起查询的用户名。
- `query_id` ([字符串](../../sql-reference/data-types/string.md)) — 查询的ID。
- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — 发起查询的IP地址。
- `port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 发起查询的端口。
- `initial_user` ([字符串](../../sql-reference/data-types/string.md)) — 首次发起查询的用户名(对于分布式查询)。
- `initial_query_id` ([字符串](../../sql-reference/data-types/string.md)) — 首次发起查询的ID对于分布式查询
- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — 发起该查询的父查询IP地址。
- `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 发起该查询的父查询端口。
- `interface` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 发起查询的界面,可能的值:
- 1 — TCP.
- 2 — HTTP.
- `os_user` ([字符串](../../sql-reference/data-types/string.md)) — OS's username who runs [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md).
- `client_hostname` ([字符串](../../sql-reference/data-types/string.md)) — Hostname of the client machine where the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或者运行另一个TCP客户端
- `client_name` ([字符串](../../sql-reference/data-types/string.md)) — The [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端名称。
- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Revision of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端。
- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Major version of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端。
- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Minor version of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端。
- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Patch component of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端版本。
- `http_method` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — HTTP method that initiated the query. Possible values:
- 0 — The query was launched from the TCP interface.
- `os_user` ([字符串](../../sql-reference/data-types/string.md)) — 使用 [clickhouse-client](../../interfaces/cli.md) 的系统用户名。
- `client_hostname` ([字符串](../../sql-reference/data-types/string.md)) — 运行 [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的主机名
- `client_name` ([字符串](../../sql-reference/data-types/string.md)) — [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端名称。
- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的修订号
- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的主版本号
- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的次版本号
- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的补丁版本
- `http_method` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 发起查询的HTTP方法可能的值
- 0 — 查询通过TCP界面发起。
- 1 — `GET` 方法被使用。
- 2 — `POST` 方法被使用。
- `http_user_agent` ([字符串](../../sql-reference/data-types/string.md)) — The `UserAgent` http请求中传递的标头。
- `quota_key` ([字符串](../../sql-reference/data-types/string.md)) — The “quota key” 在指定 [配额](../../operations/quotas.md) 设置(见 `keyed`).
- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision.
- `ProfileEvents` ([数组(字符串, UInt64)](../../sql-reference/data-types/array.md)) — Counters that measure different metrics for this thread. The description of them could be found in the table [系统。活动](#system_tables-events).
- `http_user_agent` ([字符串](../../sql-reference/data-types/string.md)) — `UserAgent` HTTP请求中传递的UA表头。
- `quota_key` ([字符串](../../sql-reference/data-types/string.md)) — “quota key” 在 [配额](../../operations/quotas.md) 设置`keyed`).
- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse 修订版本号.
- `ProfileEvents` ([数组(字符串, UInt64)](../../sql-reference/data-types/array.md)) — 对于该线程的多个指标计数器。这一项可以参考 [system.events](#system_tables-events).
**示例**
@ -113,4 +108,5 @@ ProfileEvents: {'Query':1,'SelectQuery':1,'ReadCompressedBytes':36,'Compr
**另请参阅**
- [系统。query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — Description of the `query_log` 系统表,其中包含有关查询执行的公共信息。
- [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — `query_log` 系统表描述,其中包含有关查询执行的公共信息。
- [system.query_views_log](../../operations/system-tables/query_views_log.md#system_tables-query_views_log) — 这个表包含在查询线程中使用的各个视图的信息。

View File

@ -1,17 +1,41 @@
# UInt8,UInt16,UInt32,UInt64,Int8,Int16,Int32,Int64 {#uint8-uint16-uint32-uint64-int8-int16-int32-int64}
---
toc_priority: 40
toc_title: UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256
---
# UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256
固定长度的整型,包括有符号整型或无符号整型。
创建表时,可以为整数设置类型参数 (例如. `TINYINT(8)`, `SMALLINT(16)`, `INT(32)`, `BIGINT(64)`), 但 ClickHouse 会忽略它们.
## 整型范围 {#int-ranges}
- Int8-\[-128:127\]
- Int16-\[-32768:32767\]
- Int32-\[-2147483648:2147483647\]
- Int64-\[-9223372036854775808:9223372036854775807\]
- `Int8` — \[-128 : 127\]
- `Int16` — \[-32768 : 32767\]
- `Int32` — \[-2147483648 : 2147483647\]
- `Int64` — \[-9223372036854775808 : 9223372036854775807\]
- `Int128` — \[-170141183460469231731687303715884105728 : 170141183460469231731687303715884105727\]
- `Int256` — \[-57896044618658097711785492504343953926634992332820282019728792003956564819968 : 57896044618658097711785492504343953926634992332820282019728792003956564819967\]
别名:
- `Int8``TINYINT`, `BOOL`, `BOOLEAN`, `INT1`.
- `Int16``SMALLINT`, `INT2`.
- `Int32``INT`, `INT4`, `INTEGER`.
- `Int64``BIGINT`.
## 无符号整型范围 {#uint-ranges}
- UInt8-\[0:255\]
- UInt16-\[0:65535\]
- UInt32-\[0:4294967295\]
- UInt64-\[0:18446744073709551615\]
- `UInt8` — \[0 : 255\]
- `UInt16` — \[0 : 65535\]
- `UInt32` — \[0 : 4294967295\]
- `UInt64` — \[0 : 18446744073709551615\]
- `UInt128` — \[0 : 340282366920938463463374607431768211455\]
- `UInt256` — \[0 : 115792089237316195423570985008687907853269984665640564039457584007913129639935\]
[源文档](https://clickhouse.com/docs/en/data_types/int_uint/) <!--hide-->

View File

@ -5,6 +5,6 @@ toc_title: Roadmap
# Roadmap {#roadmap}
`2021年Roadmap`已公布供公开讨论查看[这里](https://github.com/ClickHouse/ClickHouse/issues/17623).
`2022年Roadmap`已公布供公开讨论查看 [这里](https://github.com/ClickHouse/ClickHouse/issues/32513).
{## [源文章](https://clickhouse.com/docs/en/roadmap/) ##}

View File

@ -481,7 +481,19 @@ catch (...)
void Client::connect()
{
connection_parameters = ConnectionParameters(config());
UInt16 default_port = ConnectionParameters::getPortFromConfig(config());
connection_parameters = ConnectionParameters(config(), hosts_ports[0].host,
hosts_ports[0].port.value_or(default_port));
String server_name;
UInt64 server_version_major = 0;
UInt64 server_version_minor = 0;
UInt64 server_version_patch = 0;
for (size_t attempted_address_index = 0; attempted_address_index < hosts_ports.size(); ++attempted_address_index)
{
connection_parameters.host = hosts_ports[attempted_address_index].host;
connection_parameters.port = hosts_ports[attempted_address_index].port.value_or(default_port);
if (is_interactive)
std::cout << "Connecting to "
@ -490,11 +502,6 @@ void Client::connect()
<< connection_parameters.host << ":" << connection_parameters.port
<< (!connection_parameters.user.empty() ? " as user " + connection_parameters.user : "") << "." << std::endl;
String server_name;
UInt64 server_version_major = 0;
UInt64 server_version_minor = 0;
UInt64 server_version_patch = 0;
try
{
connection = Connection::createConnection(connection_parameters, global_context);
@ -507,10 +514,14 @@ void Client::connect()
connection->getServerVersion(
connection_parameters.timeouts, server_name, server_version_major, server_version_minor, server_version_patch, server_revision);
config().setString("host", connection_parameters.host);
config().setInt("port", connection_parameters.port);
break;
}
catch (const Exception & e)
{
/// It is typical when users install ClickHouse, type some password and instantly forget it.
/// This problem can't be fixed with reconnection so it is not attempted
if ((connection_parameters.user.empty() || connection_parameters.user == "default")
&& e.code() == DB::ErrorCodes::AUTHENTICATION_FAILED)
{
@ -520,10 +531,27 @@ void Client::connect()
<< "and deleting this file will reset the password." << std::endl
<< "See also /etc/clickhouse-server/users.xml on the server where ClickHouse is installed." << std::endl
<< std::endl;
}
throw;
}
else
{
if (attempted_address_index == hosts_ports.size() - 1)
throw;
if (is_interactive)
{
std::cerr << "Connection attempt to database at "
<< connection_parameters.host << ":" << connection_parameters.port
<< " resulted in failure"
<< std::endl
<< getExceptionMessage(e, false)
<< std::endl
<< "Attempting connection to the next provided address"
<< std::endl;
}
}
}
}
server_version = toString(server_version_major) + "." + toString(server_version_minor) + "." + toString(server_version_patch);
load_suggestions = is_interactive && (server_revision >= Suggest::MIN_SERVER_REVISION && !config().getBool("disable_suggestion", false));
@ -966,8 +994,11 @@ void Client::addOptions(OptionsDescription & options_description)
/// Main commandline options related to client functionality and all parameters from Settings.
options_description.main_description->add_options()
("config,c", po::value<std::string>(), "config-file path (another shorthand)")
("host,h", po::value<std::string>()->default_value("localhost"), "server host")
("port", po::value<int>()->default_value(9000), "server port")
("host,h", po::value<std::vector<HostPort>>()->multitoken()->default_value({{"localhost"}}, "localhost"),
"list of server hosts with optionally assigned port to connect. List elements are separated by a space."
"Every list element looks like '<host>[:<port>]'. If port isn't assigned, connection is made by port from '--port' param"
"Example of usage: '-h host1:1 host2 host3:3'")
("port", po::value<int>()->default_value(9000), "server port, which is default port for every host from '--host' param")
("secure,s", "Use TLS connection")
("user,u", po::value<std::string>()->default_value("default"), "user")
/** If "--password [value]" is used but the value is omitted, the bad argument exception will be thrown.
@ -1074,8 +1105,8 @@ void Client::processOptions(const OptionsDescription & options_description,
if (options.count("config"))
config().setString("config-file", options["config"].as<std::string>());
if (options.count("host") && !options["host"].defaulted())
config().setString("host", options["host"].as<std::string>());
if (options.count("host"))
hosts_ports = options["host"].as<std::vector<HostPort>>();
if (options.count("interleave-queries-file"))
interleave_queries_files = options["interleave-queries-file"].as<std::vector<std::string>>();
if (options.count("port") && !options["port"].defaulted())

View File

@ -57,8 +57,16 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
("seed", po::value<std::string>(), "seed (arbitrary string) that determines the result of obfuscation")
;
Settings cmd_settings;
for (const auto & field : cmd_settings.all())
{
if (field.getName() == "max_parser_depth" || field.getName() == "max_query_size")
cmd_settings.addProgramOption(desc, field);
}
boost::program_options::variables_map options;
boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options);
po::notify(options);
if (options.count("help"))
{
@ -149,7 +157,8 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
ParserQuery parser(end);
do
{
ASTPtr res = parseQueryAndMovePosition(parser, pos, end, "query", multiple, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
ASTPtr res = parseQueryAndMovePosition(
parser, pos, end, "query", multiple, cmd_settings.max_query_size, cmd_settings.max_parser_depth);
/// For insert query with data(INSERT INTO ... VALUES ...), will lead to format fail,
/// should throw exception early and make exception message more readable.
if (const auto * insert_query = res->as<ASTInsertQuery>(); insert_query && insert_query->data)
@ -222,6 +231,5 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
std::cerr << getCurrentExceptionMessage(true) << '\n';
return getCurrentExceptionCode();
}
return 0;
}

View File

@ -315,7 +315,7 @@ void LocalServer::cleanup()
std::string LocalServer::getInitialCreateTableQuery()
{
if (!config().has("table-structure") && !config().has("table-file"))
if (!config().has("table-structure") && !config().has("table-file") && !config().has("table-data-format"))
return {};
auto table_name = backQuoteIfNeed(config().getString("table-name", "table"));

View File

@ -217,13 +217,12 @@
<!-- The following file is used only if ssl_require_client_auth=1 -->
<ssl_ca_cert_file>/path/to/ssl_ca_cert_file</ssl_ca_cert_file>
<!-- Default compression algorithm (applied if client doesn't specify another algorithm, see result_compression in QueryInfo).
<!-- Default transport compression type (can be overridden by client, see the transport_compression_type field in QueryInfo).
Supported algorithms: none, deflate, gzip, stream_gzip -->
<compression>deflate</compression>
<transport_compression_type>none</transport_compression_type>
<!-- Default compression level (applied if client doesn't specify another level, see result_compression in QueryInfo).
Supported levels: none, low, medium, high -->
<compression_level>medium</compression_level>
<!-- Default transport compression level. Supported levels: 0..3 -->
<transport_compression_level>0</transport_compression_level>
<!-- Send/receive message size limits in bytes. -1 means unlimited -->
<max_send_message_size>-1</max_send_message_size>

View File

@ -1,8 +1,8 @@
#include <Access/AccessRights.h>
#include <base/logger_useful.h>
#include <base/sort.h>
#include <boost/container/small_vector.hpp>
#include <boost/range/adaptor/map.hpp>
#include <boost/range/algorithm/sort.hpp>
#include <unordered_map>
namespace DB
@ -101,7 +101,7 @@ namespace
AccessRightsElements getResult() const
{
ProtoElements sorted = *this;
boost::range::sort(sorted);
::sort(sorted.begin(), sorted.end());
AccessRightsElements res;
res.reserve(sorted.size());

View File

@ -86,7 +86,7 @@ enum class AccessType
M(CREATE_DICTIONARY, "", DICTIONARY, CREATE) /* allows to execute {CREATE|ATTACH} DICTIONARY */\
M(CREATE_TEMPORARY_TABLE, "", GLOBAL, CREATE) /* allows to create and manipulate temporary tables;
implicitly enabled by the grant CREATE_TABLE on any table */ \
M(CREATE_FUNCTION, "", DATABASE, CREATE) /* allows to execute CREATE FUNCTION */ \
M(CREATE_FUNCTION, "", GLOBAL, CREATE) /* allows to execute CREATE FUNCTION */ \
M(CREATE, "", GROUP, ALL) /* allows to execute {CREATE|ATTACH} */ \
\
M(DROP_DATABASE, "", DATABASE, DROP) /* allows to execute {DROP|DETACH} DATABASE */\
@ -94,7 +94,7 @@ enum class AccessType
M(DROP_VIEW, "", VIEW, DROP) /* allows to execute {DROP|DETACH} TABLE for views;
implicitly enabled by the grant DROP_TABLE */\
M(DROP_DICTIONARY, "", DICTIONARY, DROP) /* allows to execute {DROP|DETACH} DICTIONARY */\
M(DROP_FUNCTION, "", DATABASE, DROP) /* allows to execute DROP FUNCTION */\
M(DROP_FUNCTION, "", GLOBAL, DROP) /* allows to execute DROP FUNCTION */\
M(DROP, "", GROUP, ALL) /* allows to execute {DROP|DETACH} */\
\
M(TRUNCATE, "TRUNCATE TABLE", TABLE, ALL) \
@ -166,6 +166,7 @@ enum class AccessType
M(dictGet, "dictHas, dictGetHierarchy, dictIsIn", DICTIONARY, ALL) /* allows to execute functions dictGet(), dictHas(), dictGetHierarchy(), dictIsIn() */\
\
M(addressToLine, "", GLOBAL, INTROSPECTION) /* allows to execute function addressToLine() */\
M(addressToLineWithInlines, "", GLOBAL, INTROSPECTION) /* allows to execute function addressToLineWithInlines() */\
M(addressToSymbol, "", GLOBAL, INTROSPECTION) /* allows to execute function addressToSymbol() */\
M(demangle, "", GLOBAL, INTROSPECTION) /* allows to execute function demangle() */\
M(INTROSPECTION, "INTROSPECTION FUNCTIONS", GROUP, ALL) /* allows to execute functions addressToLine(), addressToSymbol(), demangle()*/\

View File

@ -425,6 +425,7 @@ bool ContextAccess::checkAccessImplHelper(const AccessFlags & flags, const Args
| AccessType::TRUNCATE;
const AccessFlags dictionary_ddl = AccessType::CREATE_DICTIONARY | AccessType::DROP_DICTIONARY;
const AccessFlags function_ddl = AccessType::CREATE_FUNCTION | AccessType::DROP_FUNCTION;
const AccessFlags table_and_dictionary_ddl = table_ddl | dictionary_ddl;
const AccessFlags write_table_access = AccessType::INSERT | AccessType::OPTIMIZE;
const AccessFlags write_dcl_access = AccessType::ACCESS_MANAGEMENT - AccessType::SHOW_ACCESS;
@ -432,7 +433,7 @@ bool ContextAccess::checkAccessImplHelper(const AccessFlags & flags, const Args
const AccessFlags not_readonly_flags = write_table_access | table_and_dictionary_ddl | write_dcl_access | AccessType::SYSTEM | AccessType::KILL_QUERY;
const AccessFlags not_readonly_1_flags = AccessType::CREATE_TEMPORARY_TABLE;
const AccessFlags ddl_flags = table_ddl | dictionary_ddl;
const AccessFlags ddl_flags = table_ddl | dictionary_ddl | function_ddl;
const AccessFlags introspection_flags = AccessType::INTROSPECTION;
};
static const PrecalculatedFlags precalc;

View File

@ -7,8 +7,8 @@
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <boost/range/algorithm/set_algorithm.hpp>
#include <boost/range/algorithm/sort.hpp>
#include <boost/range/algorithm_ext/push_back.hpp>
#include <base/sort.h>
namespace DB
@ -132,7 +132,7 @@ std::shared_ptr<ASTRolesOrUsersSet> RolesOrUsersSet::toAST() const
ast->names.reserve(ids.size());
for (const UUID & id : ids)
ast->names.emplace_back(::DB::toString(id));
boost::range::sort(ast->names);
::sort(ast->names.begin(), ast->names.end());
}
if (!except_ids.empty())
@ -140,7 +140,7 @@ std::shared_ptr<ASTRolesOrUsersSet> RolesOrUsersSet::toAST() const
ast->except_names.reserve(except_ids.size());
for (const UUID & except_id : except_ids)
ast->except_names.emplace_back(::DB::toString(except_id));
boost::range::sort(ast->except_names);
::sort(ast->except_names.begin(), ast->except_names.end());
}
return ast;
@ -161,7 +161,7 @@ std::shared_ptr<ASTRolesOrUsersSet> RolesOrUsersSet::toASTWithNames(const Access
if (name)
ast->names.emplace_back(std::move(*name));
}
boost::range::sort(ast->names);
::sort(ast->names.begin(), ast->names.end());
}
if (!except_ids.empty())
@ -173,7 +173,7 @@ std::shared_ptr<ASTRolesOrUsersSet> RolesOrUsersSet::toASTWithNames(const Access
if (except_name)
ast->except_names.emplace_back(std::move(*except_name));
}
boost::range::sort(ast->except_names);
::sort(ast->except_names.begin(), ast->except_names.end());
}
return ast;

View File

@ -45,7 +45,7 @@ TEST(AccessRights, Union)
lhs.grant(AccessType::INSERT);
rhs.grant(AccessType::ALL, "db1");
lhs.makeUnion(rhs);
ASSERT_EQ(lhs.toString(), "GRANT INSERT ON *.*, GRANT SHOW, SELECT, ALTER, CREATE DATABASE, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, CREATE FUNCTION, DROP, TRUNCATE, OPTIMIZE, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*");
ASSERT_EQ(lhs.toString(), "GRANT INSERT ON *.*, GRANT SHOW, SELECT, ALTER, CREATE DATABASE, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, DROP DATABASE, DROP TABLE, DROP VIEW, DROP DICTIONARY, TRUNCATE, OPTIMIZE, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*");
}

View File

@ -90,12 +90,12 @@ struct AggregateFunctionIntervalLengthSumData
void sort()
{
if (!sorted)
{
if (sorted)
return;
::sort(std::begin(segments), std::end(segments));
sorted = true;
}
}
void serialize(WriteBuffer & buf) const
{

View File

@ -75,12 +75,12 @@ struct AggregateFunctionSequenceMatchData final
void sort()
{
if (!sorted)
{
if (sorted)
return;
::sort(std::begin(events_list), std::end(events_list), Comparator{});
sorted = true;
}
}
void serialize(WriteBuffer & buf) const
{

View File

@ -239,6 +239,7 @@ private:
UInt64 genRandom(size_t lim)
{
assert(lim > 0);
/// With a large number of values, we will generate random numbers several times slower.
if (lim <= static_cast<UInt64>(rng.max()))
return static_cast<UInt32>(rng()) % static_cast<UInt32>(lim);

View File

@ -260,7 +260,8 @@ private:
if (sorted)
return;
::sort(samples.begin(), samples.end(), [](const auto & lhs, const auto & rhs) { return lhs.first < rhs.first; });
/// In order to provide deterministic result we must sort by value and hash
::sort(samples.begin(), samples.end(), [](const auto & lhs, const auto & rhs) { return lhs < rhs; });
sorted = true;
}

View File

@ -79,6 +79,7 @@ set(dbms_sources)
add_headers_and_sources(clickhouse_common_io Common)
add_headers_and_sources(clickhouse_common_io Common/HashTable)
add_headers_and_sources(clickhouse_common_io IO)
add_headers_and_sources(clickhouse_common_io IO/Archives)
add_headers_and_sources(clickhouse_common_io IO/S3)
list (REMOVE_ITEM clickhouse_common_io_sources Common/malloc.cpp Common/new_delete.cpp)
@ -508,6 +509,10 @@ if (TARGET ch_contrib::bzip2)
target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::bzip2)
endif()
if (TARGET ch_contrib::minizip)
target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::minizip)
endif ()
if (TARGET ch_contrib::simdjson)
dbms_target_link_libraries(PRIVATE ch_contrib::simdjson)
endif()

View File

@ -1317,7 +1317,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
if (insert && insert->select)
insert->tryFindInputFunction(input_function);
bool is_async_insert = global_context->getSettings().async_insert && insert && insert->hasInlinedData();
bool is_async_insert = global_context->getSettingsRef().async_insert && insert && insert->hasInlinedData();
/// INSERT query for which data transfer is needed (not an INSERT SELECT or input()) is processed separately.
if (insert && (!insert->select || input_function) && !insert->watch && !is_async_insert)
@ -1501,6 +1501,25 @@ String ClientBase::prompt() const
}
void ClientBase::initQueryIdFormats()
{
if (!query_id_formats.empty())
return;
/// Initialize query_id_formats if any
if (config().has("query_id_formats"))
{
Poco::Util::AbstractConfiguration::Keys keys;
config().keys("query_id_formats", keys);
for (const auto & name : keys)
query_id_formats.emplace_back(name + ":", config().getString("query_id_formats." + name));
}
if (query_id_formats.empty())
query_id_formats.emplace_back("Query id:", " {query_id}\n");
}
void ClientBase::runInteractive()
{
if (config().has("query_id"))
@ -1508,6 +1527,8 @@ void ClientBase::runInteractive()
if (print_time_to_stderr)
throw Exception("time option could be specified only in non-interactive mode", ErrorCodes::BAD_ARGUMENTS);
initQueryIdFormats();
/// Initialize DateLUT here to avoid counting time spent here as query execution time.
const auto local_tz = DateLUT::instance().getTimeZone();
@ -1528,18 +1549,6 @@ void ClientBase::runInteractive()
home_path = home_path_cstr;
}
/// Initialize query_id_formats if any
if (config().has("query_id_formats"))
{
Poco::Util::AbstractConfiguration::Keys keys;
config().keys("query_id_formats", keys);
for (const auto & name : keys)
query_id_formats.emplace_back(name + ":", config().getString("query_id_formats." + name));
}
if (query_id_formats.empty())
query_id_formats.emplace_back("Query id:", " {query_id}\n");
/// Load command history if present.
if (config().has("history_file"))
history_file = config().getString("history_file");
@ -1648,6 +1657,9 @@ void ClientBase::runInteractive()
void ClientBase::runNonInteractive()
{
if (delayed_interactive)
initQueryIdFormats();
if (!queries_files.empty())
{
auto process_multi_query_from_file = [&](const String & file)
@ -1917,7 +1929,7 @@ void ClientBase::init(int argc, char ** argv)
/// Output of help message.
if (options.count("help")
|| (options.count("host") && options["host"].as<std::string>() == "elp")) /// If user writes -help instead of --help.
|| (options.count("host") && options["host"].as<std::vector<HostPort>>()[0].host == "elp")) /// If user writes -help instead of --help.
{
printHelpMessage(options_description);
exit(0);

View File

@ -5,6 +5,7 @@
#include <Common/InterruptListener.h>
#include <Common/ShellCommand.h>
#include <Common/Stopwatch.h>
#include <Common/DNSResolver.h>
#include <Core/ExternalTable.h>
#include <Poco/Util/Application.h>
#include <Interpreters/Context.h>
@ -138,6 +139,8 @@ private:
void updateSuggest(const ASTCreateQuery & ast_create);
void initQueryIdFormats();
protected:
static bool isSyncInsertWithData(const ASTInsertQuery & insert_query, const ContextPtr & context);
@ -241,6 +244,25 @@ protected:
} profile_events;
QueryProcessingStage::Enum query_processing_stage;
struct HostPort
{
String host;
std::optional<UInt16> port{};
friend std::istream & operator>>(std::istream & in, HostPort & hostPort)
{
String host_with_port;
in >> host_with_port;
DB::DNSResolver & resolver = DB::DNSResolver::instance();
std::pair<Poco::Net::IPAddress, std::optional<UInt16>>
host_and_port = resolver.resolveHostOrAddress(host_with_port);
hostPort.host = host_and_port.first.toString();
hostPort.port = host_and_port.second;
return in;
}
};
std::vector<HostPort> hosts_ports{};
};
}

View File

@ -23,15 +23,13 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
}
ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfiguration & config)
ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfiguration & config,
std::string connection_host,
int connection_port) : host(connection_host), port(connection_port)
{
bool is_secure = config.getBool("secure", false);
security = is_secure ? Protocol::Secure::Enable : Protocol::Secure::Disable;
host = config.getString("host", "localhost");
port = config.getInt(
"port", config.getInt(is_secure ? "tcp_port_secure" : "tcp_port", is_secure ? DBMS_DEFAULT_SECURE_PORT : DBMS_DEFAULT_PORT));
default_database = config.getString("database", "");
/// changed the default value to "default" to fix the issue when the user in the prompt is blank
@ -69,4 +67,17 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati
Poco::Timespan(config.getInt("receive_timeout", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0),
Poco::Timespan(config.getInt("tcp_keep_alive_timeout", 0), 0));
}
ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfiguration & config)
: ConnectionParameters(config, config.getString("host", "localhost"), getPortFromConfig(config))
{
}
int ConnectionParameters::getPortFromConfig(const Poco::Util::AbstractConfiguration & config)
{
bool is_secure = config.getBool("secure", false);
return config.getInt("port",
config.getInt(is_secure ? "tcp_port_secure" : "tcp_port",
is_secure ? DBMS_DEFAULT_SECURE_PORT : DBMS_DEFAULT_PORT));
}
}

View File

@ -24,6 +24,9 @@ struct ConnectionParameters
ConnectionParameters() {}
ConnectionParameters(const Poco::Util::AbstractConfiguration & config);
ConnectionParameters(const Poco::Util::AbstractConfiguration & config, std::string host, int port);
static int getPortFromConfig(const Poco::Util::AbstractConfiguration & config);
};
}

View File

@ -74,6 +74,8 @@ void LocalConnection::sendQuery(
query_context->setProgressCallback([this] (const Progress & value) { return this->updateProgress(value); });
query_context->setFileProgressCallback([this](const FileProgress & value) { this->updateProgress(Progress(value)); });
}
if (!current_database.empty())
query_context->setCurrentDatabase(current_database);
CurrentThread::QueryScope query_scope_holder(query_context);
@ -427,9 +429,9 @@ void LocalConnection::getServerVersion(
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented");
}
void LocalConnection::setDefaultDatabase(const String &)
void LocalConnection::setDefaultDatabase(const String & database)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented");
current_database = database;
}
UInt64 LocalConnection::getServerRevision(const ConnectionTimeouts &)

View File

@ -142,5 +142,7 @@ private:
/// Last "server" packet.
std::optional<UInt64> next_packet_type;
String current_database;
};
}

View File

@ -50,12 +50,12 @@ ColumnArray::ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr &&
if (!offsets_concrete)
throw Exception("offsets_column must be a ColumnUInt64", ErrorCodes::LOGICAL_ERROR);
if (!offsets_concrete->empty() && nested_column)
if (!offsets_concrete->empty() && data)
{
Offset last_offset = offsets_concrete->getData().back();
/// This will also prevent possible overflow in offset.
if (nested_column->size() != last_offset)
if (data->size() != last_offset)
throw Exception("offsets_column has data inconsistent with nested_column", ErrorCodes::LOGICAL_ERROR);
}

View File

@ -202,6 +202,45 @@ Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host, U
return Poco::Net::SocketAddress(impl->cache_host(host).front(), port);
}
std::pair<Poco::Net::IPAddress, std::optional<UInt16>> DNSResolver::resolveHostOrAddress(const std::string & host_and_port)
{
Poco::Net::IPAddress ip;
size_t number_of_colons = std::count(host_and_port.begin(), host_and_port.end(), ':');
if (number_of_colons > 1)
{
/// IPv6 host
if (host_and_port.starts_with('['))
{
size_t close_bracket_pos = host_and_port.find(']');
assert(close_bracket_pos != std::string::npos);
ip = resolveHost(host_and_port.substr(0, close_bracket_pos));
if (close_bracket_pos == host_and_port.size() - 1)
return {ip, std::nullopt};
if (host_and_port[close_bracket_pos + 1] != ':')
throw Exception("Missing delimiter between host and port", ErrorCodes::BAD_ARGUMENTS);
unsigned int port;
if (!Poco::NumberParser::tryParseUnsigned(host_and_port.substr(close_bracket_pos + 2), port))
throw Exception("Port must be numeric", ErrorCodes::BAD_ARGUMENTS);
if (port > 0xFFFF)
throw Exception("Port must be less 0xFFFF", ErrorCodes::BAD_ARGUMENTS);
return {ip, port};
}
return {resolveHost(host_and_port), std::nullopt};
}
else if (number_of_colons == 1)
{
/// IPv4 host with port
Poco::Net::SocketAddress socket = resolveAddress(host_and_port);
return {socket.host(), socket.port()};
}
/// IPv4 host
return {resolveHost(host_and_port), std::nullopt};
}
String DNSResolver::reverseResolve(const Poco::Net::IPAddress & address)
{
if (impl->disable_cache)

View File

@ -34,6 +34,10 @@ public:
Poco::Net::SocketAddress resolveAddress(const std::string & host, UInt16 port);
/// Accepts host names like 'example.com'/'example.com:port' or '127.0.0.1'/'127.0.0.1:port' or '::1'/'[::1]:port'
/// and resolves its IP and port, if port is set
std::pair<Poco::Net::IPAddress, std::optional<UInt16>> resolveHostOrAddress(const std::string & host_and_port);
/// Accepts host IP and resolves its host name
String reverseResolve(const Poco::Net::IPAddress & address);

View File

@ -610,6 +610,8 @@
M(639, SNAPPY_COMPRESS_FAILED) \
M(640, NO_HIVEMETASTORE) \
M(641, CANNOT_APPEND_TO_FILE) \
M(642, CANNOT_PACK_ARCHIVE) \
M(643, CANNOT_UNPACK_ARCHIVE) \
\
M(700, INVALID_TRANSACTION) \
M(701, SERIALIZATION_ERROR) \

View File

@ -281,6 +281,10 @@
M(ExternalDataSourceLocalCacheReadBytes, "Bytes read from local cache buffer in RemoteReadBufferCache")\
\
M(MainConfigLoads, "Number of times the main configuration was reloaded.") \
\
M(ScalarSubqueriesGlobalCacheHit, "Number of times a read from a scalar subquery was done using the global cache") \
M(ScalarSubqueriesLocalCacheHit, "Number of times a read from a scalar subquery was done using the local cache") \
M(ScalarSubqueriesCacheMiss, "Number of times a read from a scalar subquery was not cached and had to be calculated completely")
namespace ProfileEvents
{

View File

@ -17,6 +17,7 @@
#cmakedefine01 USE_YAML_CPP
#cmakedefine01 CLICKHOUSE_SPLIT_BINARY
#cmakedefine01 USE_BZIP2
#cmakedefine01 USE_MINIZIP
#cmakedefine01 USE_SNAPPY
#cmakedefine01 USE_HIVE
#cmakedefine01 USE_ODBC

View File

@ -105,7 +105,7 @@ void CachedCompressedReadBuffer::seek(size_t offset_in_compressed_file, size_t o
/// We will discard our working_buffer, but have to account rest bytes
bytes += offset();
/// No data, everything discarded
pos = working_buffer.end();
resetWorkingBuffer();
owned_cell.reset();
/// Remember required offset in decompressed block which will be set in

View File

@ -80,7 +80,7 @@ void CompressedReadBufferFromFile::seek(size_t offset_in_compressed_file, size_t
/// We will discard our working_buffer, but have to account rest bytes
bytes += offset();
/// No data, everything discarded
pos = working_buffer.end();
resetWorkingBuffer();
size_compressed = 0;
/// Remember required offset in decompressed block which will be set in
/// the next ReadBuffer::next() call
@ -113,7 +113,6 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n)
/// need to skip some bytes in decompressed data (seek happened before readBig call).
if (nextimpl_working_buffer_offset == 0 && size_decompressed + additional_size_at_the_end_of_buffer <= n - bytes_read)
{
decompressTo(to + bytes_read, size_decompressed, size_compressed_without_checksum);
bytes_read += size_decompressed;
bytes += size_decompressed;

View File

@ -36,6 +36,8 @@ private:
/// Allows to avoid additional copies in updateValue function
size_t snapshot_up_to_size = 0;
ArenaWithFreeLists arena;
/// Collect invalid iterators to avoid traversing the whole list
std::vector<Mapped> snapshot_invalid_iters;
uint64_t approximate_data_size{0};
@ -175,6 +177,7 @@ public:
list_itr->active_in_map = false;
auto new_list_itr = list.insert(list.end(), elem);
it->getMapped() = new_list_itr;
snapshot_invalid_iters.push_back(list_itr);
}
else
{
@ -195,6 +198,7 @@ public:
if (snapshot_mode)
{
list_itr->active_in_map = false;
snapshot_invalid_iters.push_back(list_itr);
list_itr->free_key = true;
map.erase(it->getKey());
}
@ -235,6 +239,7 @@ public:
{
auto elem_copy = *(list_itr);
list_itr->active_in_map = false;
snapshot_invalid_iters.push_back(list_itr);
updater(elem_copy.value);
auto itr = list.insert(list.end(), elem_copy);
it->getMapped() = itr;
@ -274,23 +279,15 @@ public:
void clearOutdatedNodes()
{
auto start = list.begin();
auto end = list.end();
for (auto itr = start; itr != end;)
{
if (!itr->active_in_map)
for (auto & itr: snapshot_invalid_iters)
{
assert(!itr->active_in_map);
updateDataSize(CLEAR_OUTDATED_NODES, itr->key.size, itr->value.sizeInBytes(), 0);
if (itr->free_key)
arena.free(const_cast<char *>(itr->key.data), itr->key.size);
itr = list.erase(itr);
}
else
{
assert(!itr->free_key);
itr++;
}
list.erase(itr);
}
snapshot_invalid_iters.clear();
}
void clear()
@ -310,7 +307,6 @@ public:
void disableSnapshotMode()
{
snapshot_mode = false;
snapshot_up_to_size = 0;
}

View File

@ -755,4 +755,30 @@ void materializeBlockInplace(Block & block)
block.getByPosition(i).column = recursiveRemoveSparse(block.getByPosition(i).column->convertToFullColumnIfConst());
}
Block concatenateBlocks(const std::vector<Block> & blocks)
{
if (blocks.empty())
return {};
size_t num_rows = 0;
for (const auto & block : blocks)
num_rows += block.rows();
Block out = blocks[0].cloneEmpty();
MutableColumns columns = out.mutateColumns();
for (size_t i = 0; i < columns.size(); ++i)
{
columns[i]->reserve(num_rows);
for (const auto & block : blocks)
{
const auto & tmp_column = *block.getByPosition(i).column;
columns[i]->insertRangeFrom(tmp_column, 0, block.rows());
}
}
out.setColumns(std::move(columns));
return out;
}
}

View File

@ -203,4 +203,6 @@ ColumnPtr getColumnFromBlock(const Block & block, const NameAndTypePair & column
Block materializeBlock(const Block & block);
void materializeBlockInplace(Block & block);
Block concatenateBlocks(const std::vector<Block> & blocks);
}

View File

@ -85,16 +85,18 @@ void Settings::addProgramOptions(boost::program_options::options_description & o
{
for (const auto & field : all())
{
const std::string_view name = field.getName();
auto on_program_option
= boost::function1<void, const std::string &>([this, name](const std::string & value) { set(name, value); });
options.add(boost::shared_ptr<boost::program_options::option_description>(new boost::program_options::option_description(
name.data(),
boost::program_options::value<std::string>()->composing()->notifier(on_program_option),
field.getDescription())));
addProgramOption(options, field);
}
}
void Settings::addProgramOption(boost::program_options::options_description & options, const SettingFieldRef & field)
{
const std::string_view name = field.getName();
auto on_program_option = boost::function1<void, const std::string &>([this, name](const std::string & value) { set(name, value); });
options.add(boost::shared_ptr<boost::program_options::option_description>(new boost::program_options::option_description(
name.data(), boost::program_options::value<std::string>()->composing()->notifier(on_program_option), field.getDescription())));
}
void Settings::checkNoSettingNamesAtTopLevel(const Poco::Util::AbstractConfiguration & config, const String & config_path)
{
if (config.getBool("skip_check_for_incorrect_settings", false))

View File

@ -70,7 +70,9 @@ class IColumn;
M(UInt64, idle_connection_timeout, 3600, "Close idle TCP connections after specified number of seconds.", 0) \
M(UInt64, distributed_connections_pool_size, 1024, "Maximum number of connections with one remote server in the pool.", 0) \
M(UInt64, connections_with_failover_max_tries, DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, "The maximum number of attempts to connect to replicas.", 0) \
M(UInt64, s3_min_upload_part_size, 32*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \
M(UInt64, s3_min_upload_part_size, 16*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \
M(UInt64, s3_upload_part_size_multiply_factor, 2, "Multiply s3_min_upload_part_size by this factor each time s3_multiply_parts_count_threshold parts were uploaded from a single write to S3.", 0) \
M(UInt64, s3_upload_part_size_multiply_parts_count_threshold, 1000, "Each time this number of parts was uploaded to S3 s3_min_upload_part_size multiplied by s3_upload_part_size_multiply_factor.", 0) \
M(UInt64, s3_max_single_part_upload_size, 32*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \
M(UInt64, s3_max_single_read_retries, 4, "The maximum number of retries during single S3 read.", 0) \
M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \
@ -262,6 +264,7 @@ class IColumn;
M(UInt64, http_max_fields, 1000000, "Maximum number of fields in HTTP header", 0) \
M(UInt64, http_max_field_name_size, 1048576, "Maximum length of field name in HTTP header", 0) \
M(UInt64, http_max_field_value_size, 1048576, "Maximum length of field value in HTTP header", 0) \
M(Bool, http_skip_not_found_url_for_globs, true, "Skip url's for globs with HTTP_NOT_FOUND error", 0) \
M(Bool, optimize_throw_if_noop, false, "If setting is enabled and OPTIMIZE query didn't actually assign a merge then an explanatory exception is thrown", 0) \
M(Bool, use_index_for_in_with_subqueries, true, "Try using an index if there is a subquery or a table expression on the right side of the IN operator.", 0) \
M(Bool, joined_subquery_requires_alias, true, "Force joined subqueries and table functions to have aliases for correct name qualification.", 0) \
@ -611,6 +614,7 @@ class IColumn;
M(Char, input_format_hive_text_collection_items_delimiter, '\x02', "Delimiter between collection(array or map) items in Hive Text File", 0) \
M(Char, input_format_hive_text_map_keys_delimiter, '\x03', "Delimiter between a pair of map key/values in Hive Text File", 0) \
M(UInt64, input_format_msgpack_number_of_columns, 0, "The number of columns in inserted MsgPack data. Used for automatic schema inference from data.", 0) \
M(MsgPackUUIDRepresentation, output_format_msgpack_uuid_representation, FormatSettings::MsgPackUUIDRepresentation::EXT, "The way how to output UUID in MsgPack format.", 0) \
M(UInt64, input_format_max_rows_to_read_for_schema_inference, 100, "The maximum rows of data to read for automatic schema inference", 0) \
\
M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic' and 'best_effort'.", 0) \
@ -718,6 +722,8 @@ struct Settings : public BaseSettings<SettingsTraits>, public IHints<2, Settings
static void checkNoSettingNamesAtTopLevel(const Poco::Util::AbstractConfiguration & config, const String & config_path);
std::vector<String> getAllRegisteredNames() const override;
void addProgramOption(boost::program_options::options_description & options, const SettingFieldRef & field);
};
/*

View File

@ -130,4 +130,10 @@ IMPLEMENT_SETTING_ENUM(EscapingRule, ErrorCodes::BAD_ARGUMENTS,
{"JSON", FormatSettings::EscapingRule::JSON},
{"XML", FormatSettings::EscapingRule::XML},
{"Raw", FormatSettings::EscapingRule::Raw}})
IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS,
{{"bin", FormatSettings::MsgPackUUIDRepresentation::BIN},
{"str", FormatSettings::MsgPackUUIDRepresentation::STR},
{"ext", FormatSettings::MsgPackUUIDRepresentation::EXT}})
}

View File

@ -172,4 +172,6 @@ DECLARE_SETTING_ENUM_WITH_RENAME(EnumComparingMode, FormatSettings::EnumComparin
DECLARE_SETTING_ENUM_WITH_RENAME(EscapingRule, FormatSettings::EscapingRule)
DECLARE_SETTING_ENUM_WITH_RENAME(MsgPackUUIDRepresentation, FormatSettings::MsgPackUUIDRepresentation)
}

View File

@ -13,7 +13,7 @@ private:
DataTypePtr dictionary_type;
public:
DataTypeLowCardinality(DataTypePtr dictionary_type_);
explicit DataTypeLowCardinality(DataTypePtr dictionary_type_);
const DataTypePtr & getDictionaryType() const { return dictionary_type; }

View File

@ -167,8 +167,10 @@ String getNameForSubstreamPath(
/// Because nested data may be represented not by Array of Tuple,
/// but by separate Array columns with names in a form of a.b,
/// and name is encoded as a whole.
stream_name += (escape_tuple_delimiter && it->escape_tuple_delimiter ?
escapeForFileName(".") : ".") + escapeForFileName(it->tuple_element_name);
if (escape_tuple_delimiter && it->escape_tuple_delimiter)
stream_name += escapeForFileName("." + it->tuple_element_name);
else
stream_name += "." + it->tuple_element_name;
}
}

View File

@ -78,7 +78,9 @@ ASTPtr DatabaseMemory::getCreateDatabaseQuery() const
auto create_query = std::make_shared<ASTCreateQuery>();
create_query->setDatabase(getDatabaseName());
create_query->set(create_query->storage, std::make_shared<ASTStorage>());
create_query->storage->set(create_query->storage->engine, makeASTFunction(getEngineName()));
auto engine = makeASTFunction(getEngineName());
engine->no_empty_args = true;
create_query->storage->set(create_query->storage->engine, engine);
if (const auto comment_value = getDatabaseComment(); !comment_value.empty())
create_query->set(create_query->comment, std::make_shared<ASTLiteral>(comment_value));

View File

@ -316,7 +316,7 @@ getTableOutput(const String & database_name, const String & table_name, ContextM
return std::move(res.pipeline);
}
static inline String reWriteMysqlQueryColumn(mysqlxx::Pool::Entry & connection, const String & database_name, const String & table_name, const Settings & global_settings)
static inline String rewriteMysqlQueryColumn(mysqlxx::Pool::Entry & connection, const String & database_name, const String & table_name, const Settings & global_settings)
{
Block tables_columns_sample_block
{
@ -376,7 +376,7 @@ static inline void dumpDataForTables(
auto pipeline = getTableOutput(database_name, table_name, query_context);
StreamSettings mysql_input_stream_settings(context->getSettingsRef());
String mysql_select_all_query = "SELECT " + reWriteMysqlQueryColumn(connection, mysql_database_name, table_name, context->getSettings()) + " FROM "
String mysql_select_all_query = "SELECT " + rewriteMysqlQueryColumn(connection, mysql_database_name, table_name, context->getSettingsRef()) + " FROM "
+ backQuoteIfNeed(mysql_database_name) + "." + backQuoteIfNeed(table_name);
LOG_INFO(&Poco::Logger::get("MaterializedMySQLSyncThread(" + database_name + ")"), "mysql_select_all_query is {}", mysql_select_all_query);
auto input = std::make_unique<MySQLSource>(connection, mysql_select_all_query, pipeline.getHeader(), mysql_input_stream_settings);

View File

@ -197,7 +197,7 @@ void registerDictionarySourceExecutablePool(DictionarySourceFactory & factory)
size_t max_command_execution_time = config.getUInt64(settings_config_prefix + ".max_command_execution_time", 10);
size_t max_execution_time_seconds = static_cast<size_t>(context->getSettings().max_execution_time.totalSeconds());
size_t max_execution_time_seconds = static_cast<size_t>(context->getSettingsRef().max_execution_time.totalSeconds());
if (max_execution_time_seconds != 0 && max_command_execution_time > max_execution_time_seconds)
max_command_execution_time = max_execution_time_seconds;

View File

@ -243,7 +243,7 @@ off_t AsynchronousReadIndirectBufferFromRemoteFS::seek(off_t offset_, int whence
prefetch_future = {};
}
pos = working_buffer.end();
resetWorkingBuffer();
/**
* Lazy ignore. Save number of bytes to ignore and ignore it either for prefetch buffer or current buffer.

View File

@ -64,7 +64,7 @@ off_t ReadIndirectBufferFromRemoteFS::seek(off_t offset_, int whence)
throw Exception("Only SEEK_SET or SEEK_CUR modes are allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
impl->reset();
pos = working_buffer.end();
resetWorkingBuffer();
return impl->file_offset_of_buffer_end;
}

View File

@ -283,6 +283,8 @@ std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path,
bucket,
metadata.remote_fs_root_path + s3_path,
settings->s3_min_upload_part_size,
settings->s3_upload_part_size_multiply_factor,
settings->s3_upload_part_size_multiply_parts_count_threshold,
settings->s3_max_single_part_upload_size,
std::move(object_metadata),
buf_size,
@ -338,6 +340,8 @@ void DiskS3::createFileOperationObject(const String & operation_name, UInt64 rev
bucket,
remote_fs_root_path + key,
settings->s3_min_upload_part_size,
settings->s3_upload_part_size_multiply_factor,
settings->s3_upload_part_size_multiply_parts_count_threshold,
settings->s3_max_single_part_upload_size,
metadata);
@ -417,6 +421,8 @@ void DiskS3::saveSchemaVersion(const int & version)
bucket,
remote_fs_root_path + SCHEMA_VERSION_OBJECT,
settings->s3_min_upload_part_size,
settings->s3_upload_part_size_multiply_factor,
settings->s3_upload_part_size_multiply_parts_count_threshold,
settings->s3_max_single_part_upload_size);
writeIntText(version, buffer);
@ -1076,6 +1082,8 @@ DiskS3Settings::DiskS3Settings(
const std::shared_ptr<Aws::S3::S3Client> & client_,
size_t s3_max_single_read_retries_,
size_t s3_min_upload_part_size_,
size_t s3_upload_part_size_multiply_factor_,
size_t s3_upload_part_size_multiply_parts_count_threshold_,
size_t s3_max_single_part_upload_size_,
size_t min_bytes_for_seek_,
bool send_metadata_,
@ -1085,6 +1093,8 @@ DiskS3Settings::DiskS3Settings(
: client(client_)
, s3_max_single_read_retries(s3_max_single_read_retries_)
, s3_min_upload_part_size(s3_min_upload_part_size_)
, s3_upload_part_size_multiply_factor(s3_upload_part_size_multiply_factor_)
, s3_upload_part_size_multiply_parts_count_threshold(s3_upload_part_size_multiply_parts_count_threshold_)
, s3_max_single_part_upload_size(s3_max_single_part_upload_size_)
, min_bytes_for_seek(min_bytes_for_seek_)
, send_metadata(send_metadata_)

View File

@ -29,6 +29,8 @@ struct DiskS3Settings
const std::shared_ptr<Aws::S3::S3Client> & client_,
size_t s3_max_single_read_retries_,
size_t s3_min_upload_part_size_,
size_t s3_upload_part_size_multiply_factor_,
size_t s3_upload_part_size_multiply_parts_count_threshold_,
size_t s3_max_single_part_upload_size_,
size_t min_bytes_for_seek_,
bool send_metadata_,
@ -39,6 +41,8 @@ struct DiskS3Settings
std::shared_ptr<Aws::S3::S3Client> client;
size_t s3_max_single_read_retries;
size_t s3_min_upload_part_size;
size_t s3_upload_part_size_multiply_factor;
size_t s3_upload_part_size_multiply_parts_count_threshold;
size_t s3_max_single_part_upload_size;
size_t min_bytes_for_seek;
bool send_metadata;

View File

@ -155,6 +155,8 @@ std::unique_ptr<DiskS3Settings> getSettings(const Poco::Util::AbstractConfigurat
getClient(config, config_prefix, context),
config.getUInt64(config_prefix + ".s3_max_single_read_retries", context->getSettingsRef().s3_max_single_read_retries),
config.getUInt64(config_prefix + ".s3_min_upload_part_size", context->getSettingsRef().s3_min_upload_part_size),
config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_factor", context->getSettingsRef().s3_upload_part_size_multiply_factor),
config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_parts_count_threshold", context->getSettingsRef().s3_upload_part_size_multiply_parts_count_threshold),
config.getUInt64(config_prefix + ".s3_max_single_part_upload_size", context->getSettingsRef().s3_max_single_part_upload_size),
config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024),
config.getBool(config_prefix + ".send_metadata", false),

View File

@ -129,6 +129,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode;
format_settings.seekable_read = settings.input_format_allow_seeks;
format_settings.msgpack.number_of_columns = settings.input_format_msgpack_number_of_columns;
format_settings.msgpack.output_uuid_representation = settings.output_format_msgpack_uuid_representation;
format_settings.max_rows_to_read_for_schema_inference = settings.input_format_max_rows_to_read_for_schema_inference;
/// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context

View File

@ -231,9 +231,17 @@ struct FormatSettings
EnumComparingMode enum_comparing_mode = EnumComparingMode::BY_VALUES;
} capn_proto;
enum class MsgPackUUIDRepresentation
{
STR, // Output UUID as a string of 36 characters.
BIN, // Output UUID as 16-bytes binary.
EXT, // Output UUID as ExtType = 2
};
struct
{
UInt64 number_of_columns = 0;
MsgPackUUIDRepresentation output_uuid_representation = MsgPackUUIDRepresentation::EXT;
} msgpack;
};

View File

@ -0,0 +1,11 @@
#pragma once
namespace DB
{
enum class MsgPackExtensionTypes
{
UUID = 0x02,
};
}

View File

@ -26,6 +26,7 @@ namespace ErrorCodes
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int BAD_ARGUMENTS;
extern const int ILLEGAL_COLUMN;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
@ -69,6 +70,8 @@ public:
static constexpr auto name = "alphaTokens";
static String getName() { return name; }
static bool isVariadic() { return false; }
static size_t getNumberOfArguments() { return 1; }
/// Check the type of the function's arguments.
@ -127,6 +130,7 @@ public:
static constexpr auto name = "splitByNonAlpha";
static String getName() { return name; }
static bool isVariadic() { return false; }
static size_t getNumberOfArguments() { return 1; }
/// Check the type of the function's arguments.
@ -185,6 +189,7 @@ public:
static constexpr auto name = "splitByWhitespace";
static String getName() { return name; }
static bool isVariadic() { return false; }
static size_t getNumberOfArguments() { return 1; }
/// Check the type of the function's arguments.
@ -239,14 +244,23 @@ private:
Pos end;
char sep;
std::optional<UInt64> max_split;
UInt64 curr_split = 0;
public:
static constexpr auto name = "splitByChar";
static String getName() { return name; }
static size_t getNumberOfArguments() { return 2; }
static bool isVariadic() { return true; }
static size_t getNumberOfArguments() { return 0; }
static void checkArguments(const DataTypes & arguments)
{
if (arguments.size() < 2 || arguments.size() > 3)
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Function '{}' needs at least 2 arguments, at most 3 arguments; passed {}.",
arguments.size());
if (!isString(arguments[0]))
throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + ". Must be String.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@ -254,6 +268,13 @@ public:
if (!isString(arguments[1]))
throw Exception("Illegal type " + arguments[1]->getName() + " of second argument of function " + getName() + ". Must be String.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (arguments.size() == 3 && !isNativeInteger(arguments[2]))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Third argument for function '{}' must be integer, got '{}' instead",
getName(),
arguments[2]->getName());
}
void init(const ColumnsWithTypeAndName & arguments)
@ -271,6 +292,39 @@ public:
throw Exception("Illegal separator for function " + getName() + ". Must be exactly one byte.", ErrorCodes::BAD_ARGUMENTS);
sep = sep_str[0];
if (arguments.size() > 2)
{
if (!((max_split = getMaxSplit<UInt8>(arguments[2]))
|| (max_split = getMaxSplit<Int8>(arguments[2]))
|| (max_split = getMaxSplit<UInt16>(arguments[2]))
|| (max_split = getMaxSplit<Int16>(arguments[2]))
|| (max_split = getMaxSplit<UInt32>(arguments[2]))
|| (max_split = getMaxSplit<Int32>(arguments[2]))
|| (max_split = getMaxSplit<UInt64>(arguments[2]))
|| (max_split = getMaxSplit<Int64>(arguments[2]))))
{
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal column {} of third argument of function {}",
arguments[2].column->getName(),
getName());
}
}
}
template <typename DataType>
std::optional<UInt64> getMaxSplit(const ColumnWithTypeAndName & argument)
{
const auto * col = checkAndGetColumnConst<ColumnVector<DataType>>(argument.column.get());
if (!col)
return std::nullopt;
auto value = col->template getValue<DataType>();
if (value < 0)
throw Exception(
ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of third argument of function {}", argument.column->getName(), getName());
return value;
}
/// Returns the position of the argument, that is the column of strings
@ -291,12 +345,19 @@ public:
return false;
token_begin = pos;
pos = reinterpret_cast<Pos>(memchr(pos, sep, end - pos));
if (unlikely(max_split && curr_split >= *max_split))
{
token_end = end;
pos = nullptr;
return true;
}
pos = reinterpret_cast<Pos>(memchr(pos, sep, end - pos));
if (pos)
{
token_end = pos;
++pos;
++curr_split;
}
else
token_end = end;
@ -317,6 +378,7 @@ private:
public:
static constexpr auto name = "splitByString";
static String getName() { return name; }
static bool isVariadic() { return false; }
static size_t getNumberOfArguments() { return 2; }
static void checkArguments(const DataTypes & arguments)
@ -394,6 +456,8 @@ private:
public:
static constexpr auto name = "splitByRegexp";
static String getName() { return name; }
static bool isVariadic() { return false; }
static size_t getNumberOfArguments() { return 2; }
/// Check the type of function arguments.
@ -477,6 +541,7 @@ private:
public:
static constexpr auto name = "extractAll";
static String getName() { return name; }
static bool isVariadic() { return false; }
static size_t getNumberOfArguments() { return 2; }
/// Check the type of function arguments.
@ -556,6 +621,8 @@ public:
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
bool isVariadic() const override { return Generator::isVariadic(); }
size_t getNumberOfArguments() const override { return Generator::getNumberOfArguments(); }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override

View File

@ -20,6 +20,7 @@ public:
static constexpr auto name = "URLPathHierarchy";
static String getName() { return name; }
static bool isVariadic() { return false; }
static size_t getNumberOfArguments() { return 1; }
static void checkArguments(const DataTypes & arguments)

View File

@ -19,6 +19,7 @@ public:
static constexpr auto name = "URLHierarchy";
static String getName() { return name; }
static bool isVariadic() { return false; }
static size_t getNumberOfArguments() { return 1; }
static void checkArguments(const DataTypes & arguments)

View File

@ -19,6 +19,7 @@ public:
static constexpr auto name = "extractURLParameterNames";
static String getName() { return name; }
static bool isVariadic() { return false; }
static size_t getNumberOfArguments() { return 1; }
static void checkArguments(const DataTypes & arguments)

View File

@ -19,6 +19,7 @@ public:
static constexpr auto name = "extractURLParameters";
static String getName() { return name; }
static bool isVariadic() { return false; }
static size_t getNumberOfArguments() { return 1; }
static void checkArguments(const DataTypes & arguments)

View File

@ -1,125 +1,49 @@
#if defined(__ELF__) && !defined(__FreeBSD__)
#include <Common/Dwarf.h>
#include <Common/SymbolIndex.h>
#include <Common/HashTable/HashMap.h>
#include <Common/Arena.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeString.h>
#include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h>
#include <IO/WriteBufferFromArena.h>
#include <IO/WriteHelpers.h>
#include <Access/Common/AccessFlags.h>
#include <Interpreters/Context.h>
#include <mutex>
#include <filesystem>
#include <unordered_map>
#include <Functions/addressToLine.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
namespace
{
class FunctionAddressToLine : public IFunction
class FunctionAddressToLine: public FunctionAddressToLineBase<StringRef, Dwarf::LocationInfoMode::FAST>
{
public:
static constexpr auto name = "addressToLine";
String getName() const override { return name; }
static FunctionPtr create(ContextPtr context)
{
context->checkAccess(AccessType::addressToLine);
return std::make_shared<FunctionAddressToLine>();
}
String getName() const override
protected:
DataTypePtr getDataType() const override
{
return name;
}
size_t getNumberOfArguments() const override
{
return 1;
}
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (arguments.size() != 1)
throw Exception("Function " + getName() + " needs exactly one argument; passed "
+ toString(arguments.size()) + ".", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
const auto & type = arguments[0].type;
if (!WhichDataType(type.get()).isUInt64())
throw Exception("The only argument for function " + getName() + " must be UInt64. Found "
+ type->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return std::make_shared<DataTypeString>();
}
bool useDefaultImplementationForConstants() const override
ColumnPtr getResultColumn(const typename ColumnVector<UInt64>::Container & data, size_t input_rows_count) const override
{
return true;
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnPtr & column = arguments[0].column;
const ColumnUInt64 * column_concrete = checkAndGetColumn<ColumnUInt64>(column.get());
if (!column_concrete)
throw Exception("Illegal column " + column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN);
const typename ColumnVector<UInt64>::Container & data = column_concrete->getData();
auto result_column = ColumnString::create();
for (size_t i = 0; i < input_rows_count; ++i)
{
StringRef res_str = implCached(data[i]);
result_column->insertData(res_str.data, res_str.size);
}
return result_column;
}
private:
struct Cache
{
std::mutex mutex;
Arena arena;
using Map = HashMap<uintptr_t, StringRef>;
Map map;
std::unordered_map<std::string, Dwarf> dwarfs;
};
mutable Cache cache;
StringRef impl(uintptr_t addr) const
{
auto symbol_index_ptr = SymbolIndex::instance();
const SymbolIndex & symbol_index = *symbol_index_ptr;
if (const auto * object = symbol_index.findObject(reinterpret_cast<const void *>(addr)))
{
auto dwarf_it = cache.dwarfs.try_emplace(object->name, object->elf).first;
if (!std::filesystem::exists(object->name))
return {};
Dwarf::LocationInfo location;
std::vector<Dwarf::SymbolizedFrame> frames; // NOTE: not used in FAST mode.
if (dwarf_it->second.findAddress(addr - uintptr_t(object->address_begin), location, Dwarf::LocationInfoMode::FAST, frames))
void setResult(StringRef & result, const Dwarf::LocationInfo & location, const std::vector<Dwarf::SymbolizedFrame> &) const override
{
const char * arena_begin = nullptr;
WriteBufferFromArena out(cache.arena, arena_begin);
@ -128,26 +52,7 @@ private:
writeChar(':', out);
writeIntText(location.line, out);
return out.complete();
}
else
{
return object->name;
}
}
else
return {};
}
StringRef implCached(uintptr_t addr) const
{
Cache::Map::LookupResult it;
bool inserted;
std::lock_guard lock(cache.mutex);
cache.map.emplace(addr, it, inserted);
if (inserted)
it->getMapped() = impl(addr);
return it->getMapped();
result = out.complete();
}
};

View File

@ -0,0 +1,133 @@
#pragma once
#if defined(__ELF__) && !defined(__FreeBSD__)
#include <Common/Dwarf.h>
#include <Common/SymbolIndex.h>
#include <Common/HashTable/HashMap.h>
#include <Common/Arena.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeArray.h>
#include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h>
#include <IO/WriteBufferFromArena.h>
#include <IO/WriteHelpers.h>
#include <Access/Common/AccessFlags.h>
#include <Interpreters/Context.h>
#include <mutex>
#include <filesystem>
#include <unordered_map>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
template <typename ResultT, Dwarf::LocationInfoMode locationInfoMode>
class FunctionAddressToLineBase : public IFunction
{
public:
size_t getNumberOfArguments() const override { return 1; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (arguments.size() != 1)
throw Exception(
"Function " + getName() + " needs exactly one argument; passed " + toString(arguments.size()) + ".",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
const auto & type = arguments[0].type;
if (!WhichDataType(type.get()).isUInt64())
throw Exception(
"The only argument for function " + getName() + " must be UInt64. Found " + type->getName() + " instead.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return getDataType();
}
bool useDefaultImplementationForConstants() const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnPtr & column = arguments[0].column;
const ColumnUInt64 * column_concrete = checkAndGetColumn<ColumnUInt64>(column.get());
if (!column_concrete)
throw Exception(
"Illegal column " + column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN);
const typename ColumnVector<UInt64>::Container & data = column_concrete->getData();
return getResultColumn(data, input_rows_count);
}
protected:
virtual DataTypePtr getDataType() const = 0;
virtual ColumnPtr getResultColumn(const typename ColumnVector<UInt64>::Container & data, size_t input_rows_count) const = 0;
virtual void
setResult(ResultT & result, const Dwarf::LocationInfo & location, const std::vector<Dwarf::SymbolizedFrame> & frames) const = 0;
struct Cache
{
std::mutex mutex;
Arena arena;
using Map = HashMap<uintptr_t, ResultT>;
Map map;
std::unordered_map<std::string, Dwarf> dwarfs;
};
mutable Cache cache;
ResultT impl(uintptr_t addr) const
{
auto symbol_index_ptr = SymbolIndex::instance();
const SymbolIndex & symbol_index = *symbol_index_ptr;
if (const auto * object = symbol_index.findObject(reinterpret_cast<const void *>(addr)))
{
auto dwarf_it = cache.dwarfs.try_emplace(object->name, object->elf).first;
if (!std::filesystem::exists(object->name))
return {};
Dwarf::LocationInfo location;
std::vector<Dwarf::SymbolizedFrame> frames; // NOTE: not used in FAST mode.
ResultT result;
if (dwarf_it->second.findAddress(addr - uintptr_t(object->address_begin), location, locationInfoMode, frames))
{
setResult(result, location, frames);
return result;
}
else
return {object->name};
}
else
return {};
}
ResultT implCached(uintptr_t addr) const
{
typename Cache::Map::LookupResult it;
bool inserted;
std::lock_guard lock(cache.mutex);
cache.map.emplace(addr, it, inserted);
if (inserted)
it->getMapped() = impl(addr);
return it->getMapped();
}
};
}
#endif

View File

@ -0,0 +1,99 @@
#if defined(__ELF__) && !defined(__FreeBSD__)
#include <Common/Dwarf.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnArray.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeArray.h>
#include <Functions/FunctionFactory.h>
#include <IO/WriteBufferFromArena.h>
#include <IO/WriteHelpers.h>
#include <Access/Common/AccessFlags.h>
#include <Functions/addressToLine.h>
#include <vector>
namespace DB
{
namespace
{
class FunctionAddressToLineWithInlines: public FunctionAddressToLineBase<StringRefs, Dwarf::LocationInfoMode::FULL_WITH_INLINE>
{
public:
static constexpr auto name = "addressToLineWithInlines";
String getName() const override { return name; }
static FunctionPtr create(ContextPtr context)
{
context->checkAccess(AccessType::addressToLineWithInlines);
return std::make_shared<FunctionAddressToLineWithInlines>();
}
protected:
DataTypePtr getDataType() const override
{
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
}
ColumnPtr getResultColumn(const typename ColumnVector<UInt64>::Container & data, size_t input_rows_count) const override
{
auto result_column = ColumnArray::create(ColumnString::create());
ColumnString & result_strings = typeid_cast<ColumnString &>(result_column->getData());
ColumnArray::Offsets & result_offsets = result_column->getOffsets();
ColumnArray::Offset current_offset = 0;
for (size_t i = 0; i < input_rows_count; ++i)
{
StringRefs res = implCached(data[i]);
for (auto & r : res)
result_strings.insertData(r.data, r.size);
current_offset += res.size();
result_offsets.push_back(current_offset);
}
return result_column;
}
void setResult(StringRefs & result, const Dwarf::LocationInfo & location, const std::vector<Dwarf::SymbolizedFrame> & inline_frames) const override
{
appendLocationToResult(result, location, nullptr);
for (const auto & inline_frame : inline_frames)
appendLocationToResult(result, inline_frame.location, &inline_frame);
}
private:
inline ALWAYS_INLINE void appendLocationToResult(StringRefs & result, const Dwarf::LocationInfo & location, const Dwarf::SymbolizedFrame * frame) const
{
const char * arena_begin = nullptr;
WriteBufferFromArena out(cache.arena, arena_begin);
writeString(location.file.toString(), out);
writeChar(':', out);
writeIntText(location.line, out);
if (frame)
{
writeChar(':', out);
int status = 0;
writeString(demangle(frame->name, status), out);
}
result.emplace_back(out.complete());
}
};
}
void registerFunctionAddressToLineWithInlines(FunctionFactory & factory)
{
factory.registerFunction<FunctionAddressToLineWithInlines>();
}
}
#endif

View File

@ -1,6 +1,5 @@
#include <algorithm>
#include <vector>
#include <base/sort.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include "arrayScalarProduct.h"
@ -113,7 +112,8 @@ public:
sorted_labels[i].label = label;
}
::sort(sorted_labels.begin(), sorted_labels.end(), [](const auto & lhs, const auto & rhs) { return lhs.score > rhs.score; });
/// Stable sort is required for for labels to apply in same order if score is equal
std::stable_sort(sorted_labels.begin(), sorted_labels.end(), [](const auto & lhs, const auto & rhs) { return lhs.score > rhs.score; });
/// We will first calculate non-normalized area.

View File

@ -1,15 +1,20 @@
#include <base/sort.h>
#include <Core/ColumnWithTypeAndName.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnMap.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnVector.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeMap.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include "Core/ColumnWithTypeAndName.h"
#include "DataTypes/DataTypeMap.h"
#include "DataTypes/IDataType.h"
#include <Interpreters/castColumn.h>
namespace DB
{
@ -19,6 +24,8 @@ namespace ErrorCodes
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int TOO_LARGE_ARRAY_SIZE;
extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR;
}
class FunctionMapPopulateSeries : public IFunction
@ -35,415 +42,458 @@ private:
bool useDefaultImplementationForConstants() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
void checkTypes(const DataTypePtr & key_type, const DataTypePtr max_key_type) const
void checkTypes(const DataTypePtr & key_type, const DataTypePtr & value_type, const DataTypePtr & max_key_type) const
{
WhichDataType which_key(key_type);
if (!(which_key.isInt() || which_key.isUInt()))
{
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Keys for {} function should be of integer type (signed or unsigned)", getName());
}
WhichDataType key_data_type(key_type);
WhichDataType value_data_type(value_type);
if (max_key_type)
if (!(key_data_type.isInt() || key_data_type.isUInt()))
{
WhichDataType which_max_key(max_key_type);
if (which_max_key.isNullable())
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Max key argument in arguments of function " + getName() + " can not be Nullable");
if (key_type->getTypeId() != max_key_type->getTypeId())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Max key type in {} should be same as keys type", getName());
}
"Function {} key argument should be of signed or unsigned integer type. Actual type {}",
getName(),
key_type->getName());
}
DataTypePtr getReturnTypeForTuple(const DataTypes & arguments) const
if (!(value_data_type.isInt() || value_data_type.isUInt()))
{
if (arguments.size() < 2)
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} accepts at least two arrays for key and value", getName());
if (arguments.size() > 3)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Too many arguments in {} call", getName());
const DataTypeArray * key_array_type = checkAndGetDataType<DataTypeArray>(arguments[0].get());
const DataTypeArray * val_array_type = checkAndGetDataType<DataTypeArray>(arguments[1].get());
if (!key_array_type || !val_array_type)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {} accepts two arrays for key and value", getName());
const auto & key_type = key_array_type->getNestedType();
if (arguments.size() == 3)
this->checkTypes(key_type, arguments[2]);
else
this->checkTypes(key_type, nullptr);
return std::make_shared<DataTypeTuple>(DataTypes{arguments[0], arguments[1]});
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Function {} key argument should be of signed or unsigned integer type. Actual type {}",
getName(),
key_type->getName());
}
DataTypePtr getReturnTypeForMap(const DataTypes & arguments) const
{
const auto * map = assert_cast<const DataTypeMap *>(arguments[0].get());
if (arguments.size() == 1)
this->checkTypes(map->getKeyType(), nullptr);
else if (arguments.size() == 2)
this->checkTypes(map->getKeyType(), arguments[1]);
else
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Too many arguments in {} call", getName());
if (!max_key_type)
return;
return std::make_shared<DataTypeMap>(map->getKeyType(), map->getValueType());
WhichDataType max_key_data_type(max_key_type);
if (max_key_data_type.isNullable())
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Function {} max key argument can not be Nullable. Actual type {}",
getName(),
max_key_type->getName());
if (!(max_key_data_type.isInt() || max_key_data_type.isUInt()))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Function {} max key should be of signed or unsigned integer type. Actual type {}.",
getName(),
key_type->getName(),
max_key_type->getName());
}
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (arguments.empty())
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, getName() + " accepts at least one map or two arrays");
if (arguments.empty() || arguments.size() > 3)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Function {} accepts at least one map or two arrays arguments, and optional max key argument",
getName());
if (arguments[0]->getTypeId() == TypeIndex::Array)
return getReturnTypeForTuple(arguments);
else if (arguments[0]->getTypeId() == TypeIndex::Map)
return getReturnTypeForMap(arguments);
WhichDataType key_argument_data_type(arguments[0]);
DataTypePtr key_argument_series_type;
DataTypePtr value_argument_series_type;
size_t max_key_argument_index = 0;
if (key_argument_data_type.isArray())
{
DataTypePtr value_type;
if (1 < arguments.size())
value_type = arguments[1];
if (arguments.size() < 2 || (value_type && !isArray(value_type)))
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Function {} if array argument is passed as key, additional array argument as value must be passed",
getName());
const auto & key_array_type = assert_cast<const DataTypeArray &>(*arguments[0]);
const auto & value_array_type = assert_cast<const DataTypeArray &>(*value_type);
key_argument_series_type = key_array_type.getNestedType();
value_argument_series_type = value_array_type.getNestedType();
max_key_argument_index = 2;
}
else if (key_argument_data_type.isMap())
{
const auto & map_data_type = assert_cast<const DataTypeMap &>(*arguments[0]);
key_argument_series_type = map_data_type.getKeyType();
value_argument_series_type = map_data_type.getValueType();
max_key_argument_index = 1;
}
else
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Function {} only accepts one map or arrays, but got {}",
getName(),
arguments[0]->getName());
DataTypePtr max_key_argument_type;
if (max_key_argument_index < arguments.size())
max_key_argument_type = arguments[max_key_argument_index];
checkTypes(key_argument_series_type, value_argument_series_type, max_key_argument_type);
if (key_argument_data_type.isArray())
return std::make_shared<DataTypeTuple>(DataTypes{arguments[0], arguments[1]});
else
return arguments[0];
}
// Struct holds input and output columns references,
// Both arrays and maps have similar columns to work with but extracted differently
template <typename KeyType, typename ValType>
struct ColumnsInOut
template <typename KeyType, typename ValueType>
void executeImplTyped(
const ColumnPtr & key_column,
const ColumnPtr & value_column,
const ColumnPtr & offsets_column,
const ColumnPtr & max_key_column,
MutableColumnPtr result_key_column,
MutableColumnPtr result_value_column,
MutableColumnPtr result_offset_column) const
{
// inputs
const PaddedPODArray<KeyType> & in_keys_data;
const PaddedPODArray<ValType> & in_vals_data;
const IColumn::Offsets & in_key_offsets;
const IColumn::Offsets & in_val_offsets;
size_t row_count;
bool key_is_const;
bool val_is_const;
const auto & key_column_typed = assert_cast<const ColumnVector<KeyType> &>(*key_column);
const auto & key_column_data = key_column_typed.getData();
// outputs
PaddedPODArray<KeyType> & out_keys_data;
PaddedPODArray<ValType> & out_vals_data;
const auto & offsets_column_typed = assert_cast<const ColumnVector<ColumnArray::Offset> &>(*offsets_column);
const auto & offsets = offsets_column_typed.getData();
IColumn::Offsets & out_keys_offsets;
// with map argument this field will not be used
IColumn::Offsets * out_vals_offsets;
};
const auto & value_column_typed = assert_cast<const ColumnVector<ValueType> &>(*value_column);
const auto & value_column_data = value_column_typed.getData();
template <typename KeyType, typename ValType>
ColumnsInOut<KeyType, ValType> getInOutDataFromArrays(MutableColumnPtr & res_column, ColumnPtr * arg_columns) const
auto & result_key_column_typed = assert_cast<ColumnVector<KeyType> &>(*result_key_column);
auto & result_key_data = result_key_column_typed.getData();
auto & result_value_column_typed = assert_cast<ColumnVector<ValueType> &>(*result_value_column);
auto & result_value_data = result_value_column_typed.getData();
auto & result_offsets_column_typed = assert_cast<ColumnVector<ColumnArray::Offset> &>(*result_offset_column);
auto & result_offsets_data = result_offsets_column_typed.getData();
const PaddedPODArray<KeyType> * max_key_data = max_key_column ? &assert_cast<const ColumnVector<KeyType> &>(*max_key_column).getData() : nullptr;
PaddedPODArray<std::pair<KeyType, ValueType>> sorted_keys_values;
size_t key_offsets_size = offsets.size();
result_key_data.reserve(key_offsets_size);
result_value_data.reserve(key_offsets_size);
for (size_t offset_index = 0; offset_index < key_offsets_size; ++offset_index)
{
auto * out_tuple = assert_cast<ColumnTuple *>(res_column.get());
auto & out_keys_array = assert_cast<ColumnArray &>(out_tuple->getColumn(0));
auto & out_vals_array = assert_cast<ColumnArray &>(out_tuple->getColumn(1));
size_t start_offset = offsets[offset_index - 1];
size_t end_offset = offsets[offset_index];
const auto * key_column = arg_columns[0].get();
const auto * in_keys_array = checkAndGetColumn<ColumnArray>(key_column);
sorted_keys_values.clear();
bool key_is_const = false, val_is_const = false;
for (; start_offset < end_offset; ++start_offset)
sorted_keys_values.emplace_back(key_column_data[start_offset], value_column_data[start_offset]);
if (!in_keys_array)
if unlikely(sorted_keys_values.empty())
{
const ColumnConst * const_array = checkAndGetColumnConst<ColumnArray>(key_column);
if (!const_array)
throw Exception(
ErrorCodes::ILLEGAL_COLUMN, "Expected array column in function {}, found {}", getName(), key_column->getName());
in_keys_array = checkAndGetColumn<ColumnArray>(const_array->getDataColumnPtr().get());
key_is_const = true;
}
const auto * val_column = arg_columns[1].get();
const auto * in_values_array = checkAndGetColumn<ColumnArray>(val_column);
if (!in_values_array)
{
const ColumnConst * const_array = checkAndGetColumnConst<ColumnArray>(val_column);
if (!const_array)
throw Exception(
ErrorCodes::ILLEGAL_COLUMN, "Expected array column in function {}, found {}", getName(), val_column->getName());
in_values_array = checkAndGetColumn<ColumnArray>(const_array->getDataColumnPtr().get());
val_is_const = true;
}
if (!in_keys_array || !in_values_array)
/* something went wrong */
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns in arguments of function " + getName());
const auto & in_keys_data = assert_cast<const ColumnVector<KeyType> &>(in_keys_array->getData()).getData();
const auto & in_values_data = assert_cast<const ColumnVector<ValType> &>(in_values_array->getData()).getData();
const auto & in_keys_offsets = in_keys_array->getOffsets();
const auto & in_vals_offsets = in_values_array->getOffsets();
auto & out_keys_data = assert_cast<ColumnVector<KeyType> &>(out_keys_array.getData()).getData();
auto & out_vals_data = assert_cast<ColumnVector<ValType> &>(out_vals_array.getData()).getData();
auto & out_keys_offsets = out_keys_array.getOffsets();
size_t row_count = key_is_const ? in_values_array->size() : in_keys_array->size();
IColumn::Offsets * out_vals_offsets = &out_vals_array.getOffsets();
return {
in_keys_data,
in_values_data,
in_keys_offsets,
in_vals_offsets,
row_count,
key_is_const,
val_is_const,
out_keys_data,
out_vals_data,
out_keys_offsets,
out_vals_offsets};
}
template <typename KeyType, typename ValType>
ColumnsInOut<KeyType, ValType> getInOutDataFromMap(MutableColumnPtr & res_column, ColumnPtr * arg_columns) const
{
const auto * in_map = assert_cast<const ColumnMap *>(arg_columns[0].get());
const auto & in_nested_array = in_map->getNestedColumn();
const auto & in_nested_tuple = in_map->getNestedData();
const auto & in_keys_data = assert_cast<const ColumnVector<KeyType> &>(in_nested_tuple.getColumn(0)).getData();
const auto & in_vals_data = assert_cast<const ColumnVector<ValType> &>(in_nested_tuple.getColumn(1)).getData();
const auto & in_keys_offsets = in_nested_array.getOffsets();
auto * out_map = assert_cast<ColumnMap *>(res_column.get());
auto & out_nested_array = out_map->getNestedColumn();
auto & out_nested_tuple = out_map->getNestedData();
auto & out_keys_data = assert_cast<ColumnVector<KeyType> &>(out_nested_tuple.getColumn(0)).getData();
auto & out_vals_data = assert_cast<ColumnVector<ValType> &>(out_nested_tuple.getColumn(1)).getData();
auto & out_keys_offsets = out_nested_array.getOffsets();
return {
in_keys_data,
in_vals_data,
in_keys_offsets,
in_keys_offsets,
in_nested_array.size(),
false,
false,
out_keys_data,
out_vals_data,
out_keys_offsets,
nullptr};
}
template <typename KeyType, typename ValType>
ColumnPtr execute2(ColumnPtr * arg_columns, ColumnPtr max_key_column, const DataTypePtr & res_type) const
{
MutableColumnPtr res_column = res_type->createColumn();
bool max_key_is_const = false;
auto columns = res_column->getDataType() == TypeIndex::Tuple ? getInOutDataFromArrays<KeyType, ValType>(res_column, arg_columns)
: getInOutDataFromMap<KeyType, ValType>(res_column, arg_columns);
KeyType max_key_const{0};
if (max_key_column && isColumnConst(*max_key_column))
{
const auto * column_const = static_cast<const ColumnConst *>(&*max_key_column);
max_key_const = column_const->template getValue<KeyType>();
max_key_is_const = true;
}
IColumn::Offset offset{0};
std::map<KeyType, ValType> res_map;
//Iterate through two arrays and fill result values.
for (size_t row = 0; row < columns.row_count; ++row)
{
size_t key_offset = 0, val_offset = 0, items_count = columns.in_key_offsets[0], val_array_size = columns.in_val_offsets[0];
res_map.clear();
if (!columns.key_is_const)
{
key_offset = row > 0 ? columns.in_key_offsets[row - 1] : 0;
items_count = columns.in_key_offsets[row] - key_offset;
}
if (!columns.val_is_const)
{
val_offset = row > 0 ? columns.in_val_offsets[row - 1] : 0;
val_array_size = columns.in_val_offsets[row] - val_offset;
}
if (items_count != val_array_size)
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Key and value array should have same amount of elements in function {}",
getName());
if (items_count == 0)
{
columns.out_keys_offsets.push_back(offset);
result_offsets_data.emplace_back(result_value_data.size());
continue;
}
for (size_t i = 0; i < items_count; ++i)
::sort(sorted_keys_values.begin(), sorted_keys_values.end());
KeyType min_key = sorted_keys_values.front().first;
KeyType max_key = sorted_keys_values.back().first;
if (max_key_data)
{
res_map.insert({columns.in_keys_data[key_offset + i], columns.in_vals_data[val_offset + i]});
max_key = (*max_key_data)[offset_index];
if (unlikely(max_key < min_key))
{
result_offsets_data.emplace_back(result_value_data.size());
continue;
}
}
auto min_key = res_map.begin()->first;
auto max_key = res_map.rbegin()->first;
using KeyTypeUnsigned = ::make_unsigned_t<KeyType>;
KeyTypeUnsigned max_min_key_difference = 0;
if (max_key_column)
if constexpr (::is_unsigned_v<KeyType>)
{
/* update the current max key if it's not constant */
if (max_key_is_const)
{
max_key = max_key_const;
max_min_key_difference = max_key - min_key;
}
else
{
max_key = (static_cast<const ColumnVector<KeyType> *>(max_key_column.get()))->getData()[row];
}
bool is_max_key_positive = max_key >= 0;
bool is_min_key_positive = min_key >= 0;
/* no need to add anything, max key is less that first key */
if (max_key < min_key)
if (is_max_key_positive && is_min_key_positive)
{
columns.out_keys_offsets.push_back(offset);
continue;
max_min_key_difference = static_cast<KeyTypeUnsigned>(max_key - min_key);
}
else if (is_max_key_positive && !is_min_key_positive)
{
KeyTypeUnsigned min_key_unsigned = -static_cast<KeyTypeUnsigned>(min_key);
max_min_key_difference = static_cast<KeyTypeUnsigned>(max_key) + min_key_unsigned;
}
else
{
/// Both max and min key are negative
KeyTypeUnsigned min_key_unsigned = -static_cast<KeyTypeUnsigned>(min_key);
KeyTypeUnsigned max_key_unsigned = -static_cast<KeyTypeUnsigned>(max_key);
max_min_key_difference = min_key_unsigned - max_key_unsigned;
}
}
static constexpr size_t MAX_ARRAY_SIZE = 1ULL << 30;
if (static_cast<size_t>(max_key) - static_cast<size_t>(min_key) > MAX_ARRAY_SIZE)
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size in the result of function {}", getName());
if (max_min_key_difference > MAX_ARRAY_SIZE)
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
"Function {} too large array size in the result",
getName());
/* fill the result arrays */
KeyType key;
for (key = min_key;; ++key)
{
columns.out_keys_data.push_back(key);
size_t length = static_cast<size_t>(max_min_key_difference);
size_t result_key_data_size = result_key_data.size();
size_t result_value_data_size = result_value_data.size();
size_t sorted_keys_values_size = sorted_keys_values.size();
auto it = res_map.find(key);
if (it != res_map.end())
result_key_data.resize_fill(result_key_data_size + length + 1);
result_value_data.resize_fill(result_value_data_size + length + 1);
size_t sorted_values_index = 0;
for (KeyType current_key = min_key; current_key <= max_key; ++current_key)
{
columns.out_vals_data.push_back(it->second);
}
else
size_t key_offset_index = current_key - min_key;
size_t insert_index = result_value_data_size + key_offset_index;
result_key_data[insert_index] = current_key;
if (sorted_values_index < sorted_keys_values_size &&
sorted_keys_values[sorted_values_index].first == current_key)
{
columns.out_vals_data.push_back(0);
auto & sorted_key_value = sorted_keys_values[sorted_values_index];
if (current_key == sorted_key_value.first)
{
result_value_data[insert_index] = sorted_key_value.second;
}
++offset;
if (key == max_key)
++sorted_values_index;
while (sorted_values_index < sorted_keys_values_size &&
current_key == sorted_keys_values[sorted_values_index].first)
{
++sorted_values_index;
}
}
if (current_key == max_key)
break;
}
columns.out_keys_offsets.push_back(offset);
}
if (columns.out_vals_offsets)
columns.out_vals_offsets->insert(columns.out_keys_offsets.begin(), columns.out_keys_offsets.end());
return res_column;
}
template <typename KeyType>
ColumnPtr execute1(ColumnPtr * arg_columns, ColumnPtr max_key_column, const DataTypePtr & res_type, const DataTypePtr & val_type) const
{
switch (val_type->getTypeId())
{
case TypeIndex::Int8:
return execute2<KeyType, Int8>(arg_columns, max_key_column, res_type);
case TypeIndex::Int16:
return execute2<KeyType, Int16>(arg_columns, max_key_column, res_type);
case TypeIndex::Int32:
return execute2<KeyType, Int32>(arg_columns, max_key_column, res_type);
case TypeIndex::Int64:
return execute2<KeyType, Int64>(arg_columns, max_key_column, res_type);
case TypeIndex::Int128:
return execute2<KeyType, Int128>(arg_columns, max_key_column, res_type);
case TypeIndex::Int256:
return execute2<KeyType, Int256>(arg_columns, max_key_column, res_type);
case TypeIndex::UInt8:
return execute2<KeyType, UInt8>(arg_columns, max_key_column, res_type);
case TypeIndex::UInt16:
return execute2<KeyType, UInt16>(arg_columns, max_key_column, res_type);
case TypeIndex::UInt32:
return execute2<KeyType, UInt32>(arg_columns, max_key_column, res_type);
case TypeIndex::UInt64:
return execute2<KeyType, UInt64>(arg_columns, max_key_column, res_type);
case TypeIndex::UInt128:
return execute2<KeyType, UInt128>(arg_columns, max_key_column, res_type);
case TypeIndex::UInt256:
return execute2<KeyType, UInt256>(arg_columns, max_key_column, res_type);
default:
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns in arguments of function " + getName());
result_offsets_data.emplace_back(result_value_data.size());
}
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
struct KeyAndValueInput
{
DataTypePtr res_type, key_type, val_type;
ColumnPtr max_key_column = nullptr;
ColumnPtr arg_columns[] = {arguments[0].column, nullptr};
DataTypePtr key_series_type;
DataTypePtr value_series_type;
if (arguments[0].type->getTypeId() == TypeIndex::Array)
{
key_type = assert_cast<const DataTypeArray *>(arguments[0].type.get())->getNestedType();
val_type = assert_cast<const DataTypeArray *>(arguments[1].type.get())->getNestedType();
res_type = getReturnTypeImpl(DataTypes{arguments[0].type, arguments[1].type});
ColumnPtr key_column;
ColumnPtr value_column;
ColumnPtr offsets_column;
arg_columns[1] = arguments[1].column;
if (arguments.size() == 3)
/// Optional max key column
ColumnPtr max_key_column;
};
KeyAndValueInput extractKeyAndValueInput(const ColumnsWithTypeAndName & arguments) const
{
/* max key provided */
max_key_column = arguments[2].column;
KeyAndValueInput input;
size_t max_key_argument_index = 0;
auto first_argument_column = arguments[0].column->convertToFullColumnIfConst();
ColumnPtr second_argument_array_column;
if (const auto * key_argument_array_column = typeid_cast<const ColumnArray *>(first_argument_column.get()))
{
const ColumnArray * value_argument_array_column = nullptr;
if (1 < arguments.size())
{
second_argument_array_column = arguments[1].column->convertToFullColumnIfConst();
value_argument_array_column = typeid_cast<const ColumnArray *>(second_argument_array_column.get());
}
if (!value_argument_array_column)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Function {} if array argument is passed as key, additional array argument as value must be passed",
getName());
input.key_series_type = assert_cast<const DataTypeArray &>(*arguments[0].type).getNestedType();
input.key_column = key_argument_array_column->getDataPtr();
const auto & key_offsets = key_argument_array_column->getOffsets();
input.value_series_type = assert_cast<const DataTypeArray &>(*arguments[1].type).getNestedType();
input.value_column = value_argument_array_column->getDataPtr();
const auto & value_offsets = value_argument_array_column->getOffsets();
if (key_offsets != value_offsets)
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Function {} key and value array should have same amount of elements",
getName());
input.offsets_column = key_argument_array_column->getOffsetsPtr();
max_key_argument_index = 2;
}
else if (const auto * key_argument_map_column = typeid_cast<const ColumnMap *>(first_argument_column.get()))
{
const auto & nested_array = key_argument_map_column->getNestedColumn();
const auto & nested_data_column = key_argument_map_column->getNestedData();
const auto & map_argument_type = assert_cast<const DataTypeMap &>(*arguments[0].type);
input.key_series_type = map_argument_type.getKeyType();
input.value_series_type = map_argument_type.getValueType();
input.key_column = nested_data_column.getColumnPtr(0);
input.value_column = nested_data_column.getColumnPtr(1);
input.offsets_column = nested_array.getOffsetsPtr();
max_key_argument_index = 1;
}
else
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Function {} only accepts one map or arrays, but got {}",
getName(),
arguments[0].type->getName());
ColumnPtr max_key_column;
if (max_key_argument_index < arguments.size())
{
max_key_column = arguments[max_key_argument_index].column->convertToFullColumnIfConst();
auto max_key_column_type = arguments[max_key_argument_index].type;
if (!max_key_column_type->equals(*input.key_series_type))
{
ColumnWithTypeAndName column_to_cast = {max_key_column, max_key_column_type, ""};
auto casted_column = castColumnAccurate(std::move(column_to_cast), input.key_series_type);
max_key_column = std::move(casted_column);
}
}
input.max_key_column = std::move(max_key_column);
return input;
}
struct ResultColumns
{
MutableColumnPtr result_key_column;
MutableColumnPtr result_value_column;
MutableColumnPtr result_offset_column;
IColumn * result_offset_column_raw;
/// If we return tuple of two arrays, this offset need to be the same as result_offset_column
MutableColumnPtr result_array_additional_offset_column;
};
ResultColumns extractResultColumns(MutableColumnPtr & result_column, const DataTypePtr & result_type) const
{
ResultColumns result;
auto * tuple_column = typeid_cast<ColumnTuple *>(result_column.get());
if (tuple_column && tuple_column->tupleSize() == 2)
{
auto key_array_column = tuple_column->getColumnPtr(0)->assumeMutable();
auto value_array_column = tuple_column->getColumnPtr(1)->assumeMutable();
auto * key_array_column_typed = typeid_cast<ColumnArray *>(key_array_column.get());
auto * value_array_column_typed = typeid_cast<ColumnArray *>(value_array_column.get());
if (!key_array_column_typed || !value_array_column_typed)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Function {} result type should be Tuple with two nested Array columns or Map. Actual {}",
getName(),
result_type->getName());
result.result_key_column = key_array_column_typed->getDataPtr()->assumeMutable();
result.result_value_column = value_array_column_typed->getDataPtr()->assumeMutable();
result.result_offset_column = key_array_column_typed->getOffsetsPtr()->assumeMutable();
result.result_offset_column_raw = result.result_offset_column.get();
result.result_array_additional_offset_column = value_array_column_typed->getOffsetsPtr()->assumeMutable();
}
else if (const auto * map_column = typeid_cast<ColumnMap *>(result_column.get()))
{
result.result_key_column = map_column->getNestedData().getColumnPtr(0)->assumeMutable();
result.result_value_column = map_column->getNestedData().getColumnPtr(1)->assumeMutable();
result.result_offset_column = map_column->getNestedColumn().getOffsetsPtr()->assumeMutable();
result.result_offset_column_raw = result.result_offset_column.get();
result.result_array_additional_offset_column = nullptr;
}
else
{
assert(arguments[0].type->getTypeId() == TypeIndex::Map);
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Function {} result type should be Tuple with two nested Array columns or Map. Actual {}",
getName(),
result_type->getName());
}
const auto * map_type = assert_cast<const DataTypeMap *>(arguments[0].type.get());
res_type = getReturnTypeImpl(DataTypes{arguments[0].type});
key_type = map_type->getKeyType();
val_type = map_type->getValueType();
return result;
}
if (arguments.size() == 2)
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t) const override
{
/* max key provided */
max_key_column = arguments[1].column;
}
auto input = extractKeyAndValueInput(arguments);
auto result_column = result_type->createColumn();
auto result_columns = extractResultColumns(result_column, result_type);
auto call = [&](const auto & types)
{
using Types = std::decay_t<decltype(types)>;
using KeyType = typename Types::LeftType;
using ValueType = typename Types::RightType;
static constexpr bool key_and_value_are_numbers = IsDataTypeNumber<KeyType> && IsDataTypeNumber<ValueType>;
static constexpr bool key_is_float = std::is_same_v<KeyType, DataTypeFloat32> || std::is_same_v<KeyType, DataTypeFloat64>;
if constexpr (key_and_value_are_numbers && !key_is_float)
{
using KeyFieldType = typename KeyType::FieldType;
using ValueFieldType = typename ValueType::FieldType;
executeImplTyped<KeyFieldType, ValueFieldType>(
input.key_column,
input.value_column,
input.offsets_column,
input.max_key_column,
std::move(result_columns.result_key_column),
std::move(result_columns.result_value_column),
std::move(result_columns.result_offset_column));
return true;
}
switch (key_type->getTypeId())
return false;
};
if (!callOnTwoTypeIndexes(input.key_series_type->getTypeId(), input.value_series_type->getTypeId(), call))
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
"Function {} illegal columns passed as arguments",
getName());
if (result_columns.result_array_additional_offset_column)
{
case TypeIndex::Int8:
return execute1<Int8>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::Int16:
return execute1<Int16>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::Int32:
return execute1<Int32>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::Int64:
return execute1<Int64>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::Int128:
return execute1<Int128>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::Int256:
return execute1<Int256>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::UInt8:
return execute1<UInt8>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::UInt16:
return execute1<UInt16>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::UInt32:
return execute1<UInt32>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::UInt64:
return execute1<UInt64>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::UInt128:
return execute1<UInt128>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::UInt256:
return execute1<UInt256>(arg_columns, max_key_column, res_type, val_type);
default:
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns in arguments of function " + getName());
result_columns.result_array_additional_offset_column->insertRangeFrom(
*result_columns.result_offset_column_raw,
0,
result_columns.result_offset_column_raw->size());
}
return result_column;
}
};
@ -451,4 +501,5 @@ void registerFunctionMapPopulateSeries(FunctionFactory & factory)
{
factory.registerFunction<FunctionMapPopulateSeries>();
}
}

View File

@ -19,6 +19,7 @@ namespace DB
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ILLEGAL_COLUMN;
}
namespace
@ -66,8 +67,25 @@ public:
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const auto * col_lon = arguments[0].column.get();
const auto * col_lat = arguments[1].column.get();
const auto * col_lon = checkAndGetColumn<ColumnFloat64>(arguments[0].column.get());
if (!col_lon)
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal type {} of argument {} of function {}. Must be Float64",
arguments[0].type->getName(),
1,
getName());
const auto & data_col_lon = col_lon->getData();
const auto * col_lat = checkAndGetColumn<ColumnFloat64>(arguments[1].column.get());
if (!col_lat)
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal type {} of argument {} of function {}. Must be Float64",
arguments[0].type->getName(),
2,
getName());
const auto & data_col_lat = col_lat->getData();
auto dst = ColumnVector<UInt64>::create();
auto & dst_data = dst->getData();
@ -75,16 +93,14 @@ public:
for (size_t row = 0; row < input_rows_count; ++row)
{
const Float64 lon = col_lon->getFloat64(row);
const Float64 lat = col_lat->getFloat64(row);
const Float64 lon = data_col_lon[row];
const Float64 lat = data_col_lat[row];
if (isNaN(lon) || isNaN(lat))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Arguments must not be NaN");
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Arguments must not be NaN");
if (!(isFinite(lon) && isFinite(lat)))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Arguments must not be infinite");
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Arguments must not be infinite");
/// S2 acceptes point as (latitude, longitude)
S2LatLng lat_lng = S2LatLng::FromDegrees(lat, lon);
@ -95,7 +111,6 @@ public:
return dst;
}
};
}
@ -105,7 +120,6 @@ void registerFunctionGeoToS2(FunctionFactory & factory)
factory.registerFunction<FunctionGeoToS2>();
}
}
#endif

View File

@ -20,6 +20,7 @@ using FunctionMatch = FunctionsStringSearch<MatchImpl<NameMatch, false>>;
void registerFunctionMatch(FunctionFactory & factory)
{
factory.registerFunction<FunctionMatch>();
factory.registerAlias("REGEXP_MATCHES", NameMatch::name, FunctionFactory::CaseInsensitive);
}
}

View File

@ -6,6 +6,7 @@ class FunctionFactory;
#if defined(OS_LINUX)
void registerFunctionAddressToSymbol(FunctionFactory & factory);
void registerFunctionAddressToLine(FunctionFactory & factory);
void registerFunctionAddressToLineWithInlines(FunctionFactory & factory);
#endif
void registerFunctionDemangle(FunctionFactory & factory);
@ -17,6 +18,7 @@ void registerFunctionsIntrospection(FunctionFactory & factory)
#if defined(OS_LINUX)
registerFunctionAddressToSymbol(factory);
registerFunctionAddressToLine(factory);
registerFunctionAddressToLineWithInlines(factory);
#endif
registerFunctionDemangle(factory);
registerFunctionTrap(factory);

View File

@ -21,6 +21,7 @@ void registerFunctionReplaceAll(FunctionFactory & factory)
{
factory.registerFunction<FunctionReplaceAll>();
factory.registerAlias("replace", NameReplaceAll::name, FunctionFactory::CaseInsensitive);
factory.registerAlias("REGEXP_REPLACE", NameReplaceAll::name, FunctionFactory::CaseInsensitive);
}
}

View File

@ -20,6 +20,7 @@ namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int BAD_ARGUMENTS;
extern const int ILLEGAL_COLUMN;
}
namespace
@ -83,19 +84,47 @@ public:
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const auto * col_center = arguments[0].column.get();
const auto * col_degrees = arguments[1].column.get();
const auto * col_point = arguments[2].column.get();
const auto * col_center = checkAndGetColumn<ColumnUInt64>(arguments[0].column.get());
if (!col_center)
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal type {} of argument {} of function {}. Must be UInt64",
arguments[0].type->getName(),
1,
getName());
const auto & data_center = col_center->getData();
const auto * col_degrees = checkAndGetColumn<ColumnFloat64>(arguments[1].column.get());
if (!col_degrees)
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal type {} of argument {} of function {}. Must be Float64",
arguments[1].type->getName(),
2,
getName());
const auto & data_degrees = col_degrees->getData();
const auto * col_point = checkAndGetColumn<ColumnUInt64>(arguments[2].column.get());
if (!col_point)
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal type {} of argument {} of function {}. Must be UInt64",
arguments[2].type->getName(),
3,
getName());
const auto & data_point = col_point->getData();
auto dst = ColumnUInt8::create();
auto & dst_data = dst->getData();
dst_data.reserve(input_rows_count);
for (size_t row=0 ; row < input_rows_count; ++row)
for (size_t row = 0; row < input_rows_count; ++row)
{
const auto center = S2CellId(col_center->getUInt(row));
const Float64 degrees = col_degrees->getFloat64(row);
const auto point = S2CellId(col_point->getUInt(row));
const auto center = S2CellId(data_center[row]);
const Float64 degrees = data_degrees[row];
const auto point = S2CellId(data_point[row]);
if (isNaN(degrees))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Radius of the cap must not be nan");

View File

@ -20,6 +20,7 @@ namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int BAD_ARGUMENTS;
extern const int ILLEGAL_COLUMN;
}
namespace
@ -81,10 +82,45 @@ public:
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const auto * col_center1 = arguments[0].column.get();
const auto * col_radius1 = arguments[1].column.get();
const auto * col_center2 = arguments[2].column.get();
const auto * col_radius2 = arguments[3].column.get();
const auto * col_center1 = checkAndGetColumn<ColumnUInt64>(arguments[0].column.get());
if (!col_center1)
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal type {} of argument {} of function {}. Must be UInt64",
arguments[0].type->getName(),
1,
getName());
const auto & data_center1 = col_center1->getData();
const auto * col_radius1 = checkAndGetColumn<ColumnFloat64>(arguments[1].column.get());
if (!col_radius1)
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal type {} of argument {} of function {}. Must be Float64",
arguments[1].type->getName(),
2,
getName());
const auto & data_radius1 = col_radius1->getData();
const auto * col_center2 = checkAndGetColumn<ColumnUInt64>(arguments[2].column.get());
if (!col_center2)
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal type {} of argument {} of function {}. Must be UInt64",
arguments[2].type->getName(),
3,
getName());
const auto & data_center2 = col_center2->getData();
const auto * col_radius2 = checkAndGetColumn<ColumnFloat64>(arguments[3].column.get());
if (!col_radius2)
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal type {} of argument {} of function {}. Must be Float64",
arguments[3].type->getName(),
4,
getName());
const auto & data_radius2 = col_radius2->getData();
auto col_res_center = ColumnUInt64::create();
auto col_res_radius = ColumnFloat64::create();
@ -97,10 +133,10 @@ public:
for (size_t row = 0; row < input_rows_count; ++row)
{
const UInt64 first_center = col_center1->getUInt(row);
const Float64 first_radius = col_radius1->getFloat64(row);
const UInt64 second_center = col_center2->getUInt(row);
const Float64 second_radius = col_radius2->getFloat64(row);
const UInt64 first_center = data_center1[row];
const Float64 first_radius = data_radius1[row];
const UInt64 second_center = data_center2[row];
const Float64 second_radius = data_radius2[row];
if (isNaN(first_radius) || isNaN(second_radius))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Radius of the cap must not be nan");
@ -125,7 +161,6 @@ public:
return ColumnTuple::create(Columns{std::move(col_res_center), std::move(col_res_radius)});
}
};
}
@ -135,7 +170,6 @@ void registerFunctionS2CapUnion(FunctionFactory & factory)
factory.registerFunction<FunctionS2CapUnion>();
}
}
#endif

View File

@ -19,6 +19,7 @@ namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int BAD_ARGUMENTS;
extern const int ILLEGAL_COLUMN;
}
namespace
@ -65,8 +66,25 @@ public:
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const auto * col_id_first = arguments[0].column.get();
const auto * col_id_second = arguments[1].column.get();
const auto * col_id_first = checkAndGetColumn<ColumnUInt64>(arguments[0].column.get());
if (!col_id_first)
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal type {} of argument {} of function {}. Must be UInt64",
arguments[0].type->getName(),
1,
getName());
const auto & data_id_first = col_id_first->getData();
const auto * col_id_second = checkAndGetColumn<ColumnUInt64>(arguments[1].column.get());
if (!col_id_second)
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal type {} of argument {} of function {}. Must be UInt64",
arguments[1].type->getName(),
2,
getName());
const auto & data_id_second = col_id_second->getData();
auto dst = ColumnUInt8::create();
auto & dst_data = dst->getData();
@ -74,8 +92,8 @@ public:
for (size_t row = 0; row < input_rows_count; ++row)
{
const UInt64 id_first = col_id_first->getInt(row);
const UInt64 id_second = col_id_second->getInt(row);
const UInt64 id_first = data_id_first[row];
const UInt64 id_second = data_id_second[row];
auto first_cell = S2CellId(id_first);
auto second_cell = S2CellId(id_second);

View File

@ -19,6 +19,7 @@ namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int BAD_ARGUMENTS;
extern const int ILLEGAL_COLUMN;
}
namespace
@ -56,15 +57,25 @@ public:
if (!WhichDataType(arg).isUInt64())
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument {} of function {}. Must be Float64",
arg->getName(), 1, getName());
"Illegal type {} of argument {} of function {}. Must be UInt64",
arg->getName(),
1,
getName());
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>());
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const auto * col_id = arguments[0].column.get();
const auto * col_id = checkAndGetColumn<ColumnUInt64>(arguments[0].column.get());
if (!col_id)
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal type {} of argument {} of function {}. Must be UInt64",
arguments[0].type->getName(),
1,
getName());
const auto & data_id = col_id->getData();
auto dst = ColumnArray::create(ColumnUInt64::create());
auto & dst_data = dst->getData();
@ -74,7 +85,7 @@ public:
for (size_t row = 0; row < input_rows_count; ++row)
{
const UInt64 id = col_id->getUInt(row);
const UInt64 id = data_id[row];
S2CellId cell_id(id);

Some files were not shown because too many files have changed in this diff Show More