mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 07:31:57 +00:00
Merge branch 'master' of github.com:yandex/ClickHouse
This commit is contained in:
commit
6eeca80355
2
.gitmodules
vendored
2
.gitmodules
vendored
@ -15,7 +15,7 @@
|
||||
url = https://github.com/google/cctz.git
|
||||
[submodule "contrib/zlib-ng"]
|
||||
path = contrib/zlib-ng
|
||||
url = https://github.com/Dead2/zlib-ng.git
|
||||
url = https://github.com/ClickHouse-Extras/zlib-ng.git
|
||||
[submodule "contrib/googletest"]
|
||||
path = contrib/googletest
|
||||
url = https://github.com/google/googletest.git
|
||||
|
@ -1 +1,31 @@
|
||||
## RU
|
||||
|
||||
## ClickHouse release 18.10.3, 2018-08-13
|
||||
|
||||
### Новые возможности:
|
||||
* поддержка межсерверной репликации по HTTPS
|
||||
* MurmurHash
|
||||
* ODBCDriver2 с поддержкой NULL-ов
|
||||
* поддержка UUID в ключевых колонках (экспериментально)
|
||||
|
||||
### Улучшения:
|
||||
* добавлена поддержка SETTINGS для движка Kafka
|
||||
* поддежка пустых кусков после мержей в движках Summing, Collapsing and VersionedCollapsing
|
||||
* удаление старых записей о полностью выполнившихся мутациях
|
||||
* исправлена логика REPLACE PARTITION для движка RplicatedMergeTree
|
||||
* добавлена системная таблица system.merge_tree_settings
|
||||
* в системную таблицу system.tables добавлены столбцы зависимостей: dependencies_database и dependencies_table
|
||||
* заменен аллокатор, теперь используется jemalloc вместо tcmalloc
|
||||
* улучшена валидация connection string ODBC
|
||||
* удалена поддержка CHECK TABLE для распределенных таблиц
|
||||
* добавлены stateful тесты (пока без данных)
|
||||
* добавлена опция конфига max_partition_size_to_drop
|
||||
* добавлена настройка output_format_json_escape_slashes
|
||||
* добавлена настройка max_fetch_partition_retries_count
|
||||
* добавлена настройка prefer_localhost_replica
|
||||
* добавлены libressl, unixodbc и mariadb-connector-c как сабмодули
|
||||
|
||||
### Исправление ошибок:
|
||||
* #2786
|
||||
* #2777
|
||||
* #2795
|
||||
|
@ -1,5 +1,5 @@
|
||||
project (ClickHouse)
|
||||
cmake_minimum_required (VERSION 2.8)
|
||||
cmake_minimum_required (VERSION 3.3)
|
||||
|
||||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${ClickHouse_SOURCE_DIR}/cmake/Modules/")
|
||||
|
||||
@ -218,7 +218,7 @@ else ()
|
||||
set (CLICKHOUSE_ETC_DIR "${CMAKE_INSTALL_PREFIX}/etc")
|
||||
endif ()
|
||||
|
||||
option (UNBUNDLED "Try find all libraries in system (if fail - use bundled from contrib/)" OFF)
|
||||
option (UNBUNDLED "Try find all libraries in system. We recommend to avoid this mode for production builds, because we cannot guarantee exact versions and variants of libraries your system has installed. This mode exists for enthusiastic developers who search for trouble. Also it is useful for maintainers of OS packages." OFF)
|
||||
if (UNBUNDLED)
|
||||
set(NOT_UNBUNDLED 0)
|
||||
else ()
|
||||
|
@ -7,9 +7,9 @@ endif ()
|
||||
if (CMAKE_LIBRARY_ARCHITECTURE MATCHES "i386")
|
||||
set (ARCH_I386 1)
|
||||
endif ()
|
||||
if ( ( ARCH_ARM AND NOT ARCH_AARCH64 ) OR ARCH_I386)
|
||||
if ((ARCH_ARM AND NOT ARCH_AARCH64) OR ARCH_I386)
|
||||
set (ARCH_32 1)
|
||||
message (WARNING "Support for 32bit platforms is highly experimental")
|
||||
message (FATAL_ERROR "32bit platforms are not supported")
|
||||
endif ()
|
||||
|
||||
if (CMAKE_SYSTEM MATCHES "Linux")
|
||||
|
@ -2,7 +2,9 @@ option (ENABLE_RDKAFKA "Enable kafka" ON)
|
||||
|
||||
if (ENABLE_RDKAFKA)
|
||||
|
||||
option (USE_INTERNAL_RDKAFKA_LIBRARY "Set to FALSE to use system librdkafka instead of the bundled" ${NOT_UNBUNDLED})
|
||||
if (OS_LINUX)
|
||||
option (USE_INTERNAL_RDKAFKA_LIBRARY "Set to FALSE to use system librdkafka instead of the bundled" ${NOT_UNBUNDLED})
|
||||
endif ()
|
||||
|
||||
if (USE_INTERNAL_RDKAFKA_LIBRARY AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/librdkafka/CMakeLists.txt")
|
||||
message (WARNING "submodule contrib/librdkafka is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
|
@ -1,4 +1,6 @@
|
||||
option (USE_INTERNAL_ZLIB_LIBRARY "Set to FALSE to use system zlib library instead of bundled" ${NOT_UNBUNDLED})
|
||||
if (NOT OS_FREEBSD AND NOT APPLE)
|
||||
option (USE_INTERNAL_ZLIB_LIBRARY "Set to FALSE to use system zlib library instead of bundled" ${NOT_UNBUNDLED})
|
||||
endif ()
|
||||
|
||||
if (NOT USE_INTERNAL_ZLIB_LIBRARY)
|
||||
find_package (ZLIB)
|
||||
|
6
contrib/CMakeLists.txt
vendored
6
contrib/CMakeLists.txt
vendored
@ -1,10 +1,10 @@
|
||||
# Third-party libraries may have substandard code.
|
||||
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-function -Wno-unused-variable -Wno-unused-but-set-variable -Wno-unused-result -Wno-deprecated-declarations -Wno-maybe-uninitialized -Wno-format -Wno-misleading-indentation")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-old-style-cast -Wno-unused-function -Wno-unused-variable -Wno-unused-but-set-variable -Wno-unused-result -Wno-deprecated-declarations -Wno-non-virtual-dtor -Wno-maybe-uninitialized -Wno-format -Wno-misleading-indentation -std=c++1z")
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-function -Wno-unused-variable -Wno-unused-but-set-variable -Wno-unused-result -Wno-deprecated-declarations -Wno-maybe-uninitialized -Wno-format -Wno-misleading-indentation -Wno-stringop-overflow")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-old-style-cast -Wno-unused-function -Wno-unused-variable -Wno-unused-but-set-variable -Wno-unused-result -Wno-deprecated-declarations -Wno-non-virtual-dtor -Wno-maybe-uninitialized -Wno-format -Wno-misleading-indentation -Wno-implicit-fallthrough -std=c++1z")
|
||||
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-function -Wno-unused-variable -Wno-unused-result -Wno-deprecated-declarations -Wno-format")
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-function -Wno-unused-variable -Wno-unused-result -Wno-deprecated-declarations -Wno-format -Wno-parentheses-equality")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-old-style-cast -Wno-unused-function -Wno-unused-variable -Wno-unused-result -Wno-deprecated-declarations -Wno-non-virtual-dtor -Wno-format -std=c++1z")
|
||||
endif ()
|
||||
|
||||
|
2
contrib/ssl
vendored
2
contrib/ssl
vendored
@ -1 +1 @@
|
||||
Subproject commit 994687ca6c7b5a2b7e4346bf835a54068b3530a4
|
||||
Subproject commit de02224a42c69e3d8c9112c82018816f821878d0
|
2
contrib/zlib-ng
vendored
2
contrib/zlib-ng
vendored
@ -1 +1 @@
|
||||
Subproject commit e07a52dbaa35d003f5659b221b29d220c091667b
|
||||
Subproject commit 9173b89d46799582d20a30578e0aa9788bc7d6e1
|
@ -1,11 +1,11 @@
|
||||
# This strings autochanged from release_lib.sh:
|
||||
set(VERSION_REVISION 54405 CACHE STRING "")
|
||||
set(VERSION_REVISION 54406 CACHE STRING "")
|
||||
set(VERSION_MAJOR 18 CACHE STRING "")
|
||||
set(VERSION_MINOR 10 CACHE STRING "")
|
||||
set(VERSION_PATCH 1 CACHE STRING "")
|
||||
set(VERSION_GITHASH 419bc587c0079b51a906a65af9a10da3300ddaf2 CACHE STRING "")
|
||||
set(VERSION_DESCRIBE v18.10.1-testing CACHE STRING "")
|
||||
set(VERSION_STRING 18.10.1 CACHE STRING "")
|
||||
set(VERSION_MINOR 11 CACHE STRING "")
|
||||
set(VERSION_PATCH 0 CACHE STRING "")
|
||||
set(VERSION_GITHASH 76af46ed5d223b3a7af92e31eae291174da16355 CACHE STRING "")
|
||||
set(VERSION_DESCRIBE v18.11.0-testing CACHE STRING "")
|
||||
set(VERSION_STRING 18.11.0 CACHE STRING "")
|
||||
# end of autochange
|
||||
|
||||
set(VERSION_EXTRA "" CACHE STRING "")
|
||||
|
@ -13,6 +13,7 @@ option (ENABLE_CLICKHOUSE_COMPRESSOR "Enable clickhouse-compressor" ${ENABLE_CLI
|
||||
option (ENABLE_CLICKHOUSE_COPIER "Enable clickhouse-copier" ${ENABLE_CLICKHOUSE_ALL})
|
||||
option (ENABLE_CLICKHOUSE_FORMAT "Enable clickhouse-format" ${ENABLE_CLICKHOUSE_ALL})
|
||||
option (ENABLE_CLICKHOUSE_OBFUSCATOR "Enable clickhouse-obfuscator" ${ENABLE_CLICKHOUSE_ALL})
|
||||
option (ENABLE_CLICKHOUSE_ODBC_BRIDGE "Enable clickhouse-odbc-bridge" ${ENABLE_CLICKHOUSE_ALL})
|
||||
|
||||
configure_file (config_tools.h.in ${CMAKE_CURRENT_BINARY_DIR}/config_tools.h)
|
||||
|
||||
@ -27,10 +28,11 @@ add_subdirectory (copier)
|
||||
add_subdirectory (format)
|
||||
add_subdirectory (clang)
|
||||
add_subdirectory (obfuscator)
|
||||
add_subdirectory (odbc-bridge)
|
||||
|
||||
if (CLICKHOUSE_SPLIT_BINARY)
|
||||
set (CLICKHOUSE_ALL_TARGETS clickhouse-server clickhouse-client clickhouse-local clickhouse-benchmark clickhouse-performance-test
|
||||
clickhouse-extract-from-config clickhouse-format clickhouse-copier)
|
||||
clickhouse-extract-from-config clickhouse-compressor clickhouse-format clickhouse-copier clickhouse-odbc-bridge)
|
||||
|
||||
if (USE_EMBEDDED_COMPILER)
|
||||
list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-clang clickhouse-lld)
|
||||
@ -83,6 +85,9 @@ else ()
|
||||
if (USE_EMBEDDED_COMPILER)
|
||||
target_link_libraries (clickhouse clickhouse-compiler-lib)
|
||||
endif ()
|
||||
if (ENABLE_CLICKHOUSE_ODBC_BRIDGE)
|
||||
target_link_libraries (clickhouse clickhouse-odbc-bridge-lib)
|
||||
endif()
|
||||
|
||||
set (CLICKHOUSE_BUNDLE)
|
||||
if (ENABLE_CLICKHOUSE_SERVER)
|
||||
@ -135,6 +140,12 @@ else ()
|
||||
install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-obfuscator DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
|
||||
list(APPEND CLICKHOUSE_BUNDLE clickhouse-obfuscator)
|
||||
endif ()
|
||||
if (ENABLE_CLICKHOUSE_ODBC_BRIDGE)
|
||||
add_custom_target (clickhouse-odbc-bridge ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-odbc-bridge DEPENDS clickhouse)
|
||||
install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-odbc-bridge DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
|
||||
list(APPEND CLICKHOUSE_BUNDLE clickhouse-odbc-bridge)
|
||||
endif ()
|
||||
|
||||
|
||||
# install always because depian package want this files:
|
||||
add_custom_target (clickhouse-clang ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-clang DEPENDS clickhouse)
|
||||
|
@ -13,10 +13,10 @@ if (CLICKHOUSE_SPLIT_BINARY)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
set(TMP_HEADERS_DIR "${CMAKE_CURRENT_BINARY_DIR}/headers")
|
||||
set(TMP_HEADERS_DIR "${CMAKE_CURRENT_BINARY_DIR}/${INTERNAL_COMPILER_HEADERS_RELATIVE}")
|
||||
# Make and install empty dir for debian package if compiler disabled
|
||||
add_custom_target(make-headers-directory ALL COMMAND ${CMAKE_COMMAND} -E make_directory ${TMP_HEADERS_DIR})
|
||||
install(DIRECTORY ${TMP_HEADERS_DIR} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/clickhouse COMPONENT clickhouse)
|
||||
install(DIRECTORY ${TMP_HEADERS_DIR} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/clickhouse/${INTERNAL_COMPILER_HEADERS_DIR} COMPONENT clickhouse)
|
||||
# TODO: fix on macos copy_headers.sh: sed --posix
|
||||
if (USE_EMBEDDED_COMPILER AND NOT APPLE)
|
||||
add_custom_target(copy-headers ALL env CLANG=${CMAKE_CURRENT_BINARY_DIR}/../clickhouse-clang BUILD_PATH=${ClickHouse_BINARY_DIR} DESTDIR=${ClickHouse_SOURCE_DIR} ${ClickHouse_SOURCE_DIR}/copy_headers.sh ${ClickHouse_SOURCE_DIR} ${TMP_HEADERS_DIR} DEPENDS clickhouse-clang WORKING_DIRECTORY ${ClickHouse_SOURCE_DIR} SOURCES ${ClickHouse_SOURCE_DIR}/copy_headers.sh)
|
||||
|
@ -1,3 +1,5 @@
|
||||
#include "TestHint.h"
|
||||
|
||||
#include <port/unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <fcntl.h>
|
||||
@ -20,7 +22,6 @@
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/ShellCommand.h>
|
||||
#include <Common/ExternalTable.h>
|
||||
#include <Common/UnicodeBar.h>
|
||||
#include <Common/formatReadable.h>
|
||||
#include <Common/NetException.h>
|
||||
@ -31,6 +32,7 @@
|
||||
#include <Common/config_version.h>
|
||||
#include <Core/Types.h>
|
||||
#include <Core/QueryProcessingStage.h>
|
||||
#include <Core/ExternalTable.h>
|
||||
#include <IO/ReadBufferFromFileDescriptor.h>
|
||||
#include <IO/WriteBufferFromFileDescriptor.h>
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
@ -39,6 +41,7 @@
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <DataStreams/AsynchronousBlockInputStream.h>
|
||||
#include <DataStreams/InternalTextLogsRowOutputStream.h>
|
||||
#include <Parsers/ParserQuery.h>
|
||||
#include <Parsers/ASTSetQuery.h>
|
||||
#include <Parsers/ASTUseQuery.h>
|
||||
@ -90,102 +93,6 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
|
||||
/// Checks expected server and client error codes in testmode.
|
||||
/// To enable it add special comment after the query: "-- { serverError 60 }" or "-- { clientError 20 }".
|
||||
class TestHint
|
||||
{
|
||||
public:
|
||||
TestHint(bool enabled_, const String & query)
|
||||
: enabled(enabled_),
|
||||
server_error(0),
|
||||
client_error(0)
|
||||
{
|
||||
if (!enabled_)
|
||||
return;
|
||||
|
||||
size_t pos = query.find("--");
|
||||
if (pos != String::npos && query.find("--", pos + 2) != String::npos)
|
||||
return; /// It's not last comment. Hint belongs to commented query.
|
||||
|
||||
if (pos != String::npos)
|
||||
{
|
||||
pos = query.find('{', pos + 2);
|
||||
if (pos != String::npos)
|
||||
{
|
||||
String hint = query.substr(pos + 1);
|
||||
pos = hint.find('}');
|
||||
hint.resize(pos);
|
||||
parse(hint);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// @returns true if it's possible to continue without reconnect
|
||||
bool checkActual(int & actual_server_error, int & actual_client_error,
|
||||
bool & got_exception, std::unique_ptr<Exception> & last_exception) const
|
||||
{
|
||||
if (!enabled)
|
||||
return true;
|
||||
|
||||
if (allErrorsExpected(actual_server_error, actual_client_error))
|
||||
{
|
||||
got_exception = false;
|
||||
last_exception.reset();
|
||||
actual_server_error = 0;
|
||||
actual_client_error = 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (lostExpectedError(actual_server_error, actual_client_error))
|
||||
{
|
||||
std::cerr << "Success when error expected. It expects server error "
|
||||
<< server_error << ", client error " << client_error << "." << std::endl;
|
||||
got_exception = true;
|
||||
last_exception = std::make_unique<Exception>("Success when error expected", ErrorCodes::LOGICAL_ERROR); /// return error to OS
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int serverError() const { return server_error; }
|
||||
int clientError() const { return client_error; }
|
||||
|
||||
private:
|
||||
bool enabled;
|
||||
int server_error;
|
||||
int client_error;
|
||||
|
||||
void parse(const String & hint)
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << hint;
|
||||
while (!ss.eof())
|
||||
{
|
||||
String item;
|
||||
ss >> item;
|
||||
if (item.empty())
|
||||
break;
|
||||
|
||||
if (item == "serverError")
|
||||
ss >> server_error;
|
||||
else if (item == "clientError")
|
||||
ss >> client_error;
|
||||
}
|
||||
}
|
||||
|
||||
bool allErrorsExpected(int actual_server_error, int actual_client_error) const
|
||||
{
|
||||
return (server_error || client_error) && (server_error == actual_server_error) && (client_error == actual_client_error);
|
||||
}
|
||||
|
||||
bool lostExpectedError(int actual_server_error, int actual_client_error) const
|
||||
{
|
||||
return (server_error && !actual_server_error) || (client_error && !actual_client_error);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class Client : public Poco::Util::Application
|
||||
{
|
||||
public:
|
||||
@ -235,6 +142,11 @@ private:
|
||||
std::optional<WriteBufferFromFile> out_file_buf;
|
||||
BlockOutputStreamPtr block_out_stream;
|
||||
|
||||
/// The user could specify special file for server logs (stderr by default)
|
||||
std::unique_ptr<WriteBuffer> out_logs_buf;
|
||||
String server_logs_file;
|
||||
BlockOutputStreamPtr logs_out_stream;
|
||||
|
||||
String home_path;
|
||||
|
||||
String current_profile;
|
||||
@ -408,20 +320,10 @@ private:
|
||||
/// If exception code isn't zero, we should return non-zero return code anyway.
|
||||
return e.code() ? e.code() : -1;
|
||||
}
|
||||
catch (const Poco::Exception & e)
|
||||
{
|
||||
std::cerr << "Poco::Exception: " << e.displayText() << std::endl;
|
||||
return ErrorCodes::POCO_EXCEPTION;
|
||||
}
|
||||
catch (const std::exception & e)
|
||||
{
|
||||
std::cerr << "std::exception: " << e.what() << std::endl;
|
||||
return ErrorCodes::STD_EXCEPTION;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
std::cerr << "Unknown exception" << std::endl;
|
||||
return ErrorCodes::UNKNOWN_EXCEPTION;
|
||||
std::cerr << getCurrentExceptionMessage(false) << std::endl;
|
||||
return getCurrentExceptionCode();
|
||||
}
|
||||
}
|
||||
|
||||
@ -469,7 +371,12 @@ private:
|
||||
format_max_block_size = config().getInt("format_max_block_size", context.getSettingsRef().max_block_size);
|
||||
|
||||
insert_format = "Values";
|
||||
insert_format_max_block_size = config().getInt("insert_format_max_block_size", context.getSettingsRef().max_insert_block_size);
|
||||
|
||||
/// Setting value from cmd arg overrides one from config
|
||||
if (context.getSettingsRef().max_insert_block_size.changed)
|
||||
insert_format_max_block_size = context.getSettingsRef().max_insert_block_size;
|
||||
else
|
||||
insert_format_max_block_size = config().getInt("insert_format_max_block_size", context.getSettingsRef().max_insert_block_size);
|
||||
|
||||
if (!is_interactive)
|
||||
{
|
||||
@ -782,6 +689,7 @@ private:
|
||||
{
|
||||
const char * pos = begin;
|
||||
ASTPtr ast = parseQuery(pos, end, true);
|
||||
|
||||
if (!ast)
|
||||
{
|
||||
if (ignore_error)
|
||||
@ -797,7 +705,7 @@ private:
|
||||
return true;
|
||||
}
|
||||
|
||||
ASTInsertQuery * insert = typeid_cast<ASTInsertQuery *>(&*ast);
|
||||
ASTInsertQuery * insert = typeid_cast<ASTInsertQuery *>(ast.get());
|
||||
|
||||
if (insert && insert->data)
|
||||
{
|
||||
@ -990,7 +898,7 @@ private:
|
||||
/// If structure was received (thus, server has not thrown an exception),
|
||||
/// send our data with that structure.
|
||||
sendData(sample);
|
||||
receivePacket();
|
||||
receiveEndOfQuery();
|
||||
}
|
||||
}
|
||||
|
||||
@ -1072,6 +980,11 @@ private:
|
||||
connection->sendData(block);
|
||||
processed_rows += block.rows();
|
||||
|
||||
/// Check if server send Log packet
|
||||
auto packet_type = connection->checkPacket();
|
||||
if (packet_type && *packet_type == Protocol::Server::Log)
|
||||
receiveAndProcessPacket();
|
||||
|
||||
if (!block)
|
||||
break;
|
||||
}
|
||||
@ -1083,18 +996,28 @@ private:
|
||||
/// Flush all buffers.
|
||||
void resetOutput()
|
||||
{
|
||||
block_out_stream = nullptr;
|
||||
block_out_stream.reset();
|
||||
logs_out_stream.reset();
|
||||
|
||||
if (pager_cmd)
|
||||
{
|
||||
pager_cmd->in.close();
|
||||
pager_cmd->wait();
|
||||
}
|
||||
pager_cmd = nullptr;
|
||||
|
||||
if (out_file_buf)
|
||||
{
|
||||
out_file_buf->next();
|
||||
out_file_buf.reset();
|
||||
}
|
||||
|
||||
if (out_logs_buf)
|
||||
{
|
||||
out_logs_buf->next();
|
||||
out_logs_buf.reset();
|
||||
}
|
||||
|
||||
std_out.next();
|
||||
}
|
||||
|
||||
@ -1127,7 +1050,7 @@ private:
|
||||
continue; /// If there is no new data, continue checking whether the query was cancelled after a timeout.
|
||||
}
|
||||
|
||||
if (!receivePacket())
|
||||
if (!receiveAndProcessPacket())
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1138,7 +1061,7 @@ private:
|
||||
|
||||
/// Receive a part of the result, or progress info or an exception and process it.
|
||||
/// Returns true if one should continue receiving packets.
|
||||
bool receivePacket()
|
||||
bool receiveAndProcessPacket()
|
||||
{
|
||||
Connection::Packet packet = connection->receivePacket();
|
||||
|
||||
@ -1169,6 +1092,10 @@ private:
|
||||
last_exception = std::move(packet.exception);
|
||||
return false;
|
||||
|
||||
case Protocol::Server::Log:
|
||||
onLogData(packet.block);
|
||||
return true;
|
||||
|
||||
case Protocol::Server::EndOfStream:
|
||||
onEndOfStream();
|
||||
return false;
|
||||
@ -1182,22 +1109,59 @@ private:
|
||||
/// Receive the block that serves as an example of the structure of table where data will be inserted.
|
||||
bool receiveSampleBlock(Block & out)
|
||||
{
|
||||
Connection::Packet packet = connection->receivePacket();
|
||||
|
||||
switch (packet.type)
|
||||
while (true)
|
||||
{
|
||||
case Protocol::Server::Data:
|
||||
out = packet.block;
|
||||
return true;
|
||||
Connection::Packet packet = connection->receivePacket();
|
||||
|
||||
case Protocol::Server::Exception:
|
||||
onException(*packet.exception);
|
||||
last_exception = std::move(packet.exception);
|
||||
return false;
|
||||
switch (packet.type)
|
||||
{
|
||||
case Protocol::Server::Data:
|
||||
out = packet.block;
|
||||
return true;
|
||||
|
||||
default:
|
||||
throw NetException("Unexpected packet from server (expected Data, got "
|
||||
+ String(Protocol::Server::toString(packet.type)) + ")", ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER);
|
||||
case Protocol::Server::Exception:
|
||||
onException(*packet.exception);
|
||||
last_exception = std::move(packet.exception);
|
||||
return false;
|
||||
|
||||
case Protocol::Server::Log:
|
||||
onLogData(packet.block);
|
||||
break;
|
||||
|
||||
default:
|
||||
throw NetException("Unexpected packet from server (expected Data, Exception or Log, got "
|
||||
+ String(Protocol::Server::toString(packet.type)) + ")", ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Process Log packets, exit when recieve Exception or EndOfStream
|
||||
bool receiveEndOfQuery()
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
Connection::Packet packet = connection->receivePacket();
|
||||
|
||||
switch (packet.type)
|
||||
{
|
||||
case Protocol::Server::EndOfStream:
|
||||
onEndOfStream();
|
||||
return true;
|
||||
|
||||
case Protocol::Server::Exception:
|
||||
onException(*packet.exception);
|
||||
last_exception = std::move(packet.exception);
|
||||
return false;
|
||||
|
||||
case Protocol::Server::Log:
|
||||
onLogData(packet.block);
|
||||
break;
|
||||
|
||||
default:
|
||||
throw NetException("Unexpected packet from server (expected Exception, EndOfStream or Log, got "
|
||||
+ String(Protocol::Server::toString(packet.type)) + ")", ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1253,6 +1217,38 @@ private:
|
||||
}
|
||||
|
||||
|
||||
void initLogsOutputStream()
|
||||
{
|
||||
if (!logs_out_stream)
|
||||
{
|
||||
WriteBuffer * wb = out_logs_buf.get();
|
||||
|
||||
if (!out_logs_buf)
|
||||
{
|
||||
if (server_logs_file.empty())
|
||||
{
|
||||
/// Use stderr by default
|
||||
out_logs_buf = std::make_unique<WriteBufferFromFileDescriptor>(STDERR_FILENO);
|
||||
wb = out_logs_buf.get();
|
||||
}
|
||||
else if (server_logs_file == "-")
|
||||
{
|
||||
/// Use stdout if --server_logs_file=- specified
|
||||
wb = &std_out;
|
||||
}
|
||||
else
|
||||
{
|
||||
out_logs_buf = std::make_unique<WriteBufferFromFile>(server_logs_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_APPEND | O_CREAT);
|
||||
wb = out_logs_buf.get();
|
||||
}
|
||||
}
|
||||
|
||||
logs_out_stream = std::make_shared<InternalTextLogsRowOutputStream>(*wb);
|
||||
logs_out_stream->writePrefix();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void onData(Block & block)
|
||||
{
|
||||
if (written_progress_chars)
|
||||
@ -1276,6 +1272,14 @@ private:
|
||||
}
|
||||
|
||||
|
||||
void onLogData(Block & block)
|
||||
{
|
||||
initLogsOutputStream();
|
||||
logs_out_stream->write(block);
|
||||
logs_out_stream->flush();
|
||||
}
|
||||
|
||||
|
||||
void onTotals(Block & block)
|
||||
{
|
||||
initBlockOutputStream(block);
|
||||
@ -1436,6 +1440,9 @@ private:
|
||||
if (block_out_stream)
|
||||
block_out_stream->writeSuffix();
|
||||
|
||||
if (logs_out_stream)
|
||||
logs_out_stream->writeSuffix();
|
||||
|
||||
resetOutput();
|
||||
|
||||
if (is_interactive && !written_first_block)
|
||||
@ -1511,7 +1518,9 @@ public:
|
||||
|
||||
ioctl(0, TIOCGWINSZ, &terminal_size);
|
||||
|
||||
unsigned line_length = boost::program_options::options_description::m_default_line_length;
|
||||
namespace po = boost::program_options;
|
||||
|
||||
unsigned line_length = po::options_description::m_default_line_length;
|
||||
unsigned min_description_length = line_length / 2;
|
||||
if (!stdin_is_not_tty)
|
||||
{
|
||||
@ -1519,55 +1528,58 @@ public:
|
||||
min_description_length = std::min(min_description_length, line_length - 2);
|
||||
}
|
||||
|
||||
#define DECLARE_SETTING(TYPE, NAME, DEFAULT, DESCRIPTION) (#NAME, boost::program_options::value<std::string> (), DESCRIPTION)
|
||||
#define DECLARE_SETTING(TYPE, NAME, DEFAULT, DESCRIPTION) (#NAME, po::value<std::string> (), DESCRIPTION)
|
||||
|
||||
/// Main commandline options related to client functionality and all parameters from Settings.
|
||||
boost::program_options::options_description main_description("Main options", line_length, min_description_length);
|
||||
po::options_description main_description("Main options", line_length, min_description_length);
|
||||
main_description.add_options()
|
||||
("help", "produce help message")
|
||||
("config-file,c", boost::program_options::value<std::string>(), "config-file path")
|
||||
("host,h", boost::program_options::value<std::string>()->default_value("localhost"), "server host")
|
||||
("port", boost::program_options::value<int>()->default_value(9000), "server port")
|
||||
("config-file,c", po::value<std::string>(), "config-file path")
|
||||
("host,h", po::value<std::string>()->default_value("localhost"), "server host")
|
||||
("port", po::value<int>()->default_value(9000), "server port")
|
||||
("secure,s", "secure")
|
||||
("user,u", boost::program_options::value<std::string>()->default_value("default"), "user")
|
||||
("password", boost::program_options::value<std::string>(), "password")
|
||||
("user,u", po::value<std::string>()->default_value("default"), "user")
|
||||
("password", po::value<std::string>(), "password")
|
||||
("ask-password", "ask-password")
|
||||
("query_id", boost::program_options::value<std::string>(), "query_id")
|
||||
("query,q", boost::program_options::value<std::string>(), "query")
|
||||
("database,d", boost::program_options::value<std::string>(), "database")
|
||||
("pager", boost::program_options::value<std::string>(), "pager")
|
||||
("query_id", po::value<std::string>(), "query_id")
|
||||
("query,q", po::value<std::string>(), "query")
|
||||
("database,d", po::value<std::string>(), "database")
|
||||
("pager", po::value<std::string>(), "pager")
|
||||
("multiline,m", "multiline")
|
||||
("multiquery,n", "multiquery")
|
||||
("format,f", po::value<std::string>(), "default output format")
|
||||
("testmode,T", "enable test hints in comments")
|
||||
("ignore-error", "do not stop processing in multiquery mode")
|
||||
("format,f", boost::program_options::value<std::string>(), "default output format")
|
||||
("vertical,E", "vertical output format, same as --format=Vertical or FORMAT Vertical or \\G at end of command")
|
||||
("time,t", "print query execution time to stderr in non-interactive mode (for benchmarks)")
|
||||
("stacktrace", "print stack traces of exceptions")
|
||||
("progress", "print progress even in non-interactive mode")
|
||||
("version,V", "print version information and exit")
|
||||
("version-clean", "print version in machine-readable format and exit")
|
||||
("echo", "in batch mode, print query before execution")
|
||||
("max_client_network_bandwidth", boost::program_options::value<int>(), "the maximum speed of data exchange over the network for the client in bytes per second.")
|
||||
("compression", boost::program_options::value<bool>(), "enable or disable compression")
|
||||
("max_client_network_bandwidth", po::value<int>(), "the maximum speed of data exchange over the network for the client in bytes per second.")
|
||||
("compression", po::value<bool>(), "enable or disable compression")
|
||||
("log-level", po::value<std::string>(), "client log level")
|
||||
("server_logs_file", po::value<std::string>(), "put server logs into specified file")
|
||||
APPLY_FOR_SETTINGS(DECLARE_SETTING)
|
||||
;
|
||||
#undef DECLARE_SETTING
|
||||
|
||||
/// Commandline options related to external tables.
|
||||
boost::program_options::options_description external_description("External tables options");
|
||||
po::options_description external_description("External tables options");
|
||||
external_description.add_options()
|
||||
("file", boost::program_options::value<std::string>(), "data file or - for stdin")
|
||||
("name", boost::program_options::value<std::string>()->default_value("_data"), "name of the table")
|
||||
("format", boost::program_options::value<std::string>()->default_value("TabSeparated"), "data format")
|
||||
("structure", boost::program_options::value<std::string>(), "structure")
|
||||
("types", boost::program_options::value<std::string>(), "types")
|
||||
("file", po::value<std::string>(), "data file or - for stdin")
|
||||
("name", po::value<std::string>()->default_value("_data"), "name of the table")
|
||||
("format", po::value<std::string>()->default_value("TabSeparated"), "data format")
|
||||
("structure", po::value<std::string>(), "structure")
|
||||
("types", po::value<std::string>(), "types")
|
||||
;
|
||||
|
||||
/// Parse main commandline options.
|
||||
boost::program_options::parsed_options parsed = boost::program_options::command_line_parser(
|
||||
po::parsed_options parsed = po::command_line_parser(
|
||||
common_arguments.size(), common_arguments.data()).options(main_description).run();
|
||||
boost::program_options::variables_map options;
|
||||
boost::program_options::store(parsed, options);
|
||||
po::variables_map options;
|
||||
po::store(parsed, options);
|
||||
|
||||
if (options.count("version") || options.count("V"))
|
||||
{
|
||||
@ -1575,6 +1587,12 @@ public:
|
||||
exit(0);
|
||||
}
|
||||
|
||||
if (options.count("version-clean"))
|
||||
{
|
||||
std::cout << VERSION_STRING;
|
||||
exit(0);
|
||||
}
|
||||
|
||||
/// Output of help message.
|
||||
if (options.count("help")
|
||||
|| (options.count("host") && options["host"].as<std::string>() == "elp")) /// If user writes -help instead of --help.
|
||||
@ -1584,14 +1602,17 @@ public:
|
||||
exit(0);
|
||||
}
|
||||
|
||||
if (options.count("log-level"))
|
||||
Poco::Logger::root().setLevel(options["log-level"].as<std::string>());
|
||||
|
||||
size_t number_of_external_tables_with_stdin_source = 0;
|
||||
for (size_t i = 0; i < external_tables_arguments.size(); ++i)
|
||||
{
|
||||
/// Parse commandline options related to external tables.
|
||||
boost::program_options::parsed_options parsed = boost::program_options::command_line_parser(
|
||||
po::parsed_options parsed = po::command_line_parser(
|
||||
external_tables_arguments[i].size(), external_tables_arguments[i].data()).options(external_description).run();
|
||||
boost::program_options::variables_map external_options;
|
||||
boost::program_options::store(parsed, external_options);
|
||||
po::variables_map external_options;
|
||||
po::store(parsed, external_options);
|
||||
|
||||
try
|
||||
{
|
||||
@ -1665,6 +1686,8 @@ public:
|
||||
max_client_network_bandwidth = options["max_client_network_bandwidth"].as<int>();
|
||||
if (options.count("compression"))
|
||||
config().setBool("compression", options["compression"].as<bool>());
|
||||
if (options.count("server_logs_file"))
|
||||
server_logs_file = options["server_logs_file"].as<std::string>();
|
||||
}
|
||||
};
|
||||
|
||||
@ -1684,6 +1707,11 @@ int mainEntryClickHouseClient(int argc, char ** argv)
|
||||
std::cerr << "Bad arguments: " << e.what() << std::endl;
|
||||
return 1;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
std::cerr << DB::getCurrentExceptionMessage(true) << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return client.run();
|
||||
}
|
||||
|
118
dbms/programs/client/TestHint.h
Normal file
118
dbms/programs/client/TestHint.h
Normal file
@ -0,0 +1,118 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
#include <Core/Types.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
|
||||
/// Checks expected server and client error codes in testmode.
|
||||
/// To enable it add special comment after the query: "-- { serverError 60 }" or "-- { clientError 20 }".
|
||||
class TestHint
|
||||
{
|
||||
public:
|
||||
TestHint(bool enabled_, const String & query)
|
||||
: enabled(enabled_)
|
||||
{
|
||||
if (!enabled_)
|
||||
return;
|
||||
|
||||
/// TODO: This is absolutely wrong. Fragment may be contained inside string literal.
|
||||
size_t pos = query.find("--");
|
||||
|
||||
if (pos != String::npos && query.find("--", pos + 2) != String::npos)
|
||||
return; /// It's not last comment. Hint belongs to commented query. /// TODO Absolutely wrong: there maybe the following comment for the next query.
|
||||
|
||||
if (pos != String::npos)
|
||||
{
|
||||
/// TODO: This is also wrong. Comment may already have ended by line break.
|
||||
pos = query.find('{', pos + 2);
|
||||
|
||||
if (pos != String::npos)
|
||||
{
|
||||
String hint = query.substr(pos + 1);
|
||||
|
||||
/// TODO: And this is wrong for the same reason.
|
||||
pos = hint.find('}');
|
||||
hint.resize(pos);
|
||||
parse(hint);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// @returns true if it's possible to continue without reconnect
|
||||
bool checkActual(int & actual_server_error, int & actual_client_error,
|
||||
bool & got_exception, std::unique_ptr<Exception> & last_exception) const
|
||||
{
|
||||
if (!enabled)
|
||||
return true;
|
||||
|
||||
if (allErrorsExpected(actual_server_error, actual_client_error))
|
||||
{
|
||||
got_exception = false;
|
||||
last_exception.reset();
|
||||
actual_server_error = 0;
|
||||
actual_client_error = 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (lostExpectedError(actual_server_error, actual_client_error))
|
||||
{
|
||||
std::cerr << "Success when error expected. It expects server error "
|
||||
<< server_error << ", client error " << client_error << "." << std::endl;
|
||||
got_exception = true;
|
||||
last_exception = std::make_unique<Exception>("Success when error expected", ErrorCodes::LOGICAL_ERROR); /// return error to OS
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int serverError() const { return server_error; }
|
||||
int clientError() const { return client_error; }
|
||||
|
||||
private:
|
||||
bool enabled = false;
|
||||
int server_error = 0;
|
||||
int client_error = 0;
|
||||
|
||||
void parse(const String & hint)
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << hint;
|
||||
while (!ss.eof())
|
||||
{
|
||||
String item;
|
||||
ss >> item;
|
||||
if (item.empty())
|
||||
break;
|
||||
|
||||
if (item == "serverError")
|
||||
ss >> server_error;
|
||||
else if (item == "clientError")
|
||||
ss >> client_error;
|
||||
}
|
||||
}
|
||||
|
||||
bool allErrorsExpected(int actual_server_error, int actual_client_error) const
|
||||
{
|
||||
return (server_error || client_error) && (server_error == actual_server_error) && (client_error == actual_client_error);
|
||||
}
|
||||
|
||||
bool lostExpectedError(int actual_server_error, int actual_client_error) const
|
||||
{
|
||||
return (server_error && !actual_server_error) || (client_error && !actual_client_error);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -30,6 +30,7 @@
|
||||
#include <Common/ClickHouseRevision.h>
|
||||
#include <Common/formatReadable.h>
|
||||
#include <Common/DNSResolver.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <Common/getNumberOfPhysicalCPUCores.h>
|
||||
#include <Client/Connection.h>
|
||||
@ -2143,6 +2144,9 @@ void ClusterCopierApp::mainImpl()
|
||||
context->addDatabase(default_database, std::make_shared<DatabaseMemory>(default_database));
|
||||
context->setCurrentDatabase(default_database);
|
||||
|
||||
/// Initialize query scope just in case.
|
||||
CurrentThread::QueryScope query_scope(*context);
|
||||
|
||||
auto copier = std::make_unique<ClusterCopier>(task_path, host_id, default_database, *context);
|
||||
copier->setSafeMode(is_safe_mode);
|
||||
copier->setCopyFaultProbability(copy_fault_probability);
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include <Parsers/IAST.h>
|
||||
#include <common/ErrorHandlers.h>
|
||||
#include <Common/StatusFile.h>
|
||||
#include <Common/ThreadStatus.h>
|
||||
#include <Functions/registerFunctions.h>
|
||||
#include <AggregateFunctions/registerAggregateFunctions.h>
|
||||
#include <TableFunctions/registerTableFunctions.h>
|
||||
@ -270,6 +271,9 @@ void LocalServer::processQueries()
|
||||
context->setCurrentQueryId("");
|
||||
applyCmdSettings(*context);
|
||||
|
||||
/// Use the same query_id (and thread group) for all queries
|
||||
CurrentThread::QueryScope query_scope_holder(*context);
|
||||
|
||||
bool echo_query = config().hasOption("echo") || config().hasOption("verbose");
|
||||
std::exception_ptr exception;
|
||||
|
||||
|
@ -56,6 +56,10 @@ int mainEntryClickHouseClusterCopier(int argc, char ** argv);
|
||||
#if ENABLE_CLICKHOUSE_OBFUSCATOR
|
||||
int mainEntryClickHouseObfuscator(int argc, char ** argv);
|
||||
#endif
|
||||
#if ENABLE_CLICKHOUSE_ODBC_BRIDGE || !defined(ENABLE_CLICKHOUSE_ODBC_BRIDGE)
|
||||
int mainEntryClickHouseODBCBridge(int argc, char ** argv);
|
||||
#endif
|
||||
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
int mainEntryClickHouseClang(int argc, char ** argv);
|
||||
@ -101,6 +105,10 @@ std::pair<const char *, MainFunc> clickhouse_applications[] =
|
||||
#if ENABLE_CLICKHOUSE_OBFUSCATOR
|
||||
{"obfuscator", mainEntryClickHouseObfuscator},
|
||||
#endif
|
||||
#if ENABLE_CLICKHOUSE_ODBC_BRIDGE || !defined(ENABLE_CLICKHOUSE_ODBC_BRIDGE)
|
||||
{"odbc-bridge", mainEntryClickHouseODBCBridge},
|
||||
#endif
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
{"clang", mainEntryClickHouseClang},
|
||||
{"clang++", mainEntryClickHouseClang},
|
||||
|
31
dbms/programs/odbc-bridge/CMakeLists.txt
Normal file
31
dbms/programs/odbc-bridge/CMakeLists.txt
Normal file
@ -0,0 +1,31 @@
|
||||
add_library (clickhouse-odbc-bridge-lib
|
||||
PingHandler.cpp
|
||||
MainHandler.cpp
|
||||
ColumnInfoHandler.cpp
|
||||
HandlerFactory.cpp
|
||||
ODBCBridge.cpp
|
||||
validateODBCConnectionString.cpp
|
||||
)
|
||||
|
||||
target_link_libraries (clickhouse-odbc-bridge-lib clickhouse_common_io daemon dbms)
|
||||
target_include_directories (clickhouse-odbc-bridge-lib PUBLIC ${ClickHouse_SOURCE_DIR}/libs/libdaemon/include)
|
||||
|
||||
if (USE_POCO_SQLODBC)
|
||||
target_link_libraries (clickhouse-odbc-bridge-lib ${Poco_SQLODBC_LIBRARY})
|
||||
target_include_directories (clickhouse-odbc-bridge-lib SYSTEM PRIVATE ${ODBC_INCLUDE_DIRECTORIES} ${Poco_SQLODBC_INCLUDE_DIRS})
|
||||
endif ()
|
||||
|
||||
if (USE_POCO_DATAODBC)
|
||||
target_link_libraries (clickhouse-odbc-bridge-lib ${Poco_DataODBC_LIBRARY})
|
||||
target_include_directories (clickhouse-odbc-bridge-lib SYSTEM PRIVATE ${ODBC_INCLUDE_DIRECTORIES} ${Poco_DataODBC_INCLUDE_DIRS})
|
||||
endif()
|
||||
|
||||
|
||||
if (ENABLE_TESTS)
|
||||
add_subdirectory (tests)
|
||||
endif ()
|
||||
|
||||
if (CLICKHOUSE_SPLIT_BINARY)
|
||||
add_executable (clickhouse-odbc-bridge odbc-bridge.cpp)
|
||||
target_link_libraries (clickhouse-odbc-bridge clickhouse-odbc-bridge-lib)
|
||||
endif ()
|
123
dbms/programs/odbc-bridge/ColumnInfoHandler.cpp
Normal file
123
dbms/programs/odbc-bridge/ColumnInfoHandler.cpp
Normal file
@ -0,0 +1,123 @@
|
||||
#include "ColumnInfoHandler.h"
|
||||
#if USE_POCO_SQLODBC || USE_POCO_DATAODBC
|
||||
#include <Poco/Data/ODBC/ODBCException.h>
|
||||
#include <Poco/Data/ODBC/SessionImpl.h>
|
||||
#include <Poco/Data/ODBC/Utility.h>
|
||||
#include <Poco/Net/HTTPServerRequest.h>
|
||||
#include <Poco/Net/HTTPServerResponse.h>
|
||||
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <IO/WriteBufferFromHTTPServerResponse.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/HTMLForm.h>
|
||||
#include <common/logger_useful.h>
|
||||
#include <ext/scope_guard.h>
|
||||
#include "validateODBCConnectionString.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace
|
||||
{
|
||||
DataTypePtr getDataType(SQLSMALLINT type)
|
||||
{
|
||||
const auto & factory = DataTypeFactory::instance();
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case SQL_INTEGER:
|
||||
return factory.get("Int32");
|
||||
case SQL_SMALLINT:
|
||||
return factory.get("Int16");
|
||||
case SQL_FLOAT:
|
||||
return factory.get("Float32");
|
||||
case SQL_REAL:
|
||||
return factory.get("Float32");
|
||||
case SQL_DOUBLE:
|
||||
return factory.get("Float64");
|
||||
case SQL_DATETIME:
|
||||
return factory.get("DateTime");
|
||||
case SQL_TYPE_TIMESTAMP:
|
||||
return factory.get("DateTime");
|
||||
case SQL_TYPE_DATE:
|
||||
return factory.get("Date");
|
||||
default:
|
||||
return factory.get("String");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ODBCColumnsInfoHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response)
|
||||
{
|
||||
Poco::Net::HTMLForm params(request, request.stream());
|
||||
LOG_TRACE(log, "Request URI: " + request.getURI());
|
||||
|
||||
auto process_error = [&response, this](const std::string & message) {
|
||||
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
|
||||
if (!response.sent())
|
||||
response.send() << message << std::endl;
|
||||
LOG_WARNING(log, message);
|
||||
};
|
||||
|
||||
if (!params.has("table"))
|
||||
{
|
||||
process_error("No 'table' param in request URL");
|
||||
return;
|
||||
}
|
||||
if (!params.has("connection_string"))
|
||||
{
|
||||
process_error("No 'connection_string' in request URL");
|
||||
return;
|
||||
}
|
||||
std::string table_name = params.get("table");
|
||||
std::string connection_string = params.get("connection_string");
|
||||
LOG_TRACE(log, "Will fetch info for table '" << table_name << "'");
|
||||
LOG_TRACE(log, "Got connection str '" << connection_string << "'");
|
||||
|
||||
try
|
||||
{
|
||||
Poco::Data::ODBC::SessionImpl session(validateODBCConnectionString(connection_string), DBMS_DEFAULT_CONNECT_TIMEOUT_SEC);
|
||||
SQLHDBC hdbc = session.dbc().handle();
|
||||
|
||||
SQLHSTMT hstmt = nullptr;
|
||||
|
||||
if (Poco::Data::ODBC::Utility::isError(SQLAllocStmt(hdbc, &hstmt)))
|
||||
throw Poco::Data::ODBC::ODBCException("Could not allocate connection handle.");
|
||||
|
||||
SCOPE_EXIT(SQLFreeStmt(hstmt, SQL_DROP));
|
||||
|
||||
/// TODO Why not do SQLColumns instead?
|
||||
std::string query = "SELECT * FROM " + table_name + " WHERE 1 = 0";
|
||||
if (Poco::Data::ODBC::Utility::isError(Poco::Data::ODBC::SQLPrepare(hstmt, reinterpret_cast<SQLCHAR *>(&query[0]), query.size())))
|
||||
throw Poco::Data::ODBC::DescriptorException(session.dbc());
|
||||
|
||||
if (Poco::Data::ODBC::Utility::isError(SQLExecute(hstmt)))
|
||||
throw Poco::Data::ODBC::StatementException(hstmt);
|
||||
|
||||
SQLSMALLINT cols = 0;
|
||||
if (Poco::Data::ODBC::Utility::isError(SQLNumResultCols(hstmt, &cols)))
|
||||
throw Poco::Data::ODBC::StatementException(hstmt);
|
||||
|
||||
/// TODO cols not checked
|
||||
|
||||
NamesAndTypesList columns;
|
||||
for (SQLSMALLINT ncol = 1; ncol <= cols; ++ncol)
|
||||
{
|
||||
SQLSMALLINT type = 0;
|
||||
/// TODO Why 301?
|
||||
SQLCHAR column_name[301];
|
||||
/// TODO Result is not checked.
|
||||
Poco::Data::ODBC::SQLDescribeCol(hstmt, ncol, column_name, sizeof(column_name), NULL, &type, NULL, NULL, NULL);
|
||||
columns.emplace_back(reinterpret_cast<char *>(column_name), getDataType(type));
|
||||
}
|
||||
|
||||
WriteBufferFromHTTPServerResponse out(request, response, keep_alive_timeout);
|
||||
writeStringBinary(columns.toString(), out);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
process_error("Error getting columns from ODBC '" + getCurrentExceptionMessage(false) + "'");
|
||||
tryLogCurrentException(log);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
30
dbms/programs/odbc-bridge/ColumnInfoHandler.h
Normal file
30
dbms/programs/odbc-bridge/ColumnInfoHandler.h
Normal file
@ -0,0 +1,30 @@
|
||||
#pragma once
|
||||
#include <Common/config.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Poco/Logger.h>
|
||||
#include <Poco/Net/HTTPRequestHandler.h>
|
||||
|
||||
#if USE_POCO_SQLODBC || USE_POCO_DATAODBC
|
||||
/** The structure of the table is taken from the query "SELECT * FROM table WHERE 1=0".
|
||||
* TODO: It would be much better to utilize ODBC methods dedicated for columns description.
|
||||
* If there is no such table, an exception is thrown.
|
||||
*/
|
||||
namespace DB
|
||||
{
|
||||
class ODBCColumnsInfoHandler : public Poco::Net::HTTPRequestHandler
|
||||
{
|
||||
public:
|
||||
ODBCColumnsInfoHandler(size_t keep_alive_timeout_, std::shared_ptr<Context> context_)
|
||||
: log(&Poco::Logger::get("ODBCColumnsInfoHandler")), keep_alive_timeout(keep_alive_timeout_), context(context_)
|
||||
{
|
||||
}
|
||||
|
||||
void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override;
|
||||
|
||||
private:
|
||||
Poco::Logger * log;
|
||||
size_t keep_alive_timeout;
|
||||
std::shared_ptr<Context> context;
|
||||
};
|
||||
}
|
||||
#endif
|
34
dbms/programs/odbc-bridge/HandlerFactory.cpp
Normal file
34
dbms/programs/odbc-bridge/HandlerFactory.cpp
Normal file
@ -0,0 +1,34 @@
|
||||
#include "HandlerFactory.h"
|
||||
#include "PingHandler.h"
|
||||
#include "ColumnInfoHandler.h"
|
||||
#include <Common/HTMLForm.h>
|
||||
|
||||
#include <Poco/Ext/SessionPoolHelpers.h>
|
||||
#include <Poco/Net/HTTPServerRequest.h>
|
||||
#include <common/logger_useful.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
Poco::Net::HTTPRequestHandler * HandlerFactory::createRequestHandler(const Poco::Net::HTTPServerRequest & request)
|
||||
{
|
||||
Poco::URI uri{request.getURI()};
|
||||
LOG_TRACE(log, "Request URI: " + uri.toString());
|
||||
|
||||
if (uri.getPath() == "/ping" && request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET)
|
||||
return new PingHandler(keep_alive_timeout);
|
||||
|
||||
if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST)
|
||||
{
|
||||
|
||||
if (uri.getPath() == "/columns_info")
|
||||
#if USE_POCO_SQLODBC || USE_POCO_DATAODBC
|
||||
return new ODBCColumnsInfoHandler(keep_alive_timeout, context);
|
||||
#else
|
||||
return nullptr;
|
||||
#endif
|
||||
else
|
||||
return new ODBCHandler(pool_map, keep_alive_timeout, context);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
}
|
38
dbms/programs/odbc-bridge/HandlerFactory.h
Normal file
38
dbms/programs/odbc-bridge/HandlerFactory.h
Normal file
@ -0,0 +1,38 @@
|
||||
#pragma once
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Poco/Logger.h>
|
||||
#include <Poco/Net/HTTPRequestHandler.h>
|
||||
#include <Poco/Net/HTTPRequestHandlerFactory.h>
|
||||
#include "MainHandler.h"
|
||||
#include "ColumnInfoHandler.h"
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wunused-parameter"
|
||||
#include <Poco/Data/SessionPool.h>
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
/** Factory for '/ping', '/' and '/columns_info' handlers.
|
||||
* Also stores Session pools for ODBC connections
|
||||
*/
|
||||
class HandlerFactory : public Poco::Net::HTTPRequestHandlerFactory
|
||||
{
|
||||
public:
|
||||
HandlerFactory(const std::string & name_, size_t keep_alive_timeout_, std::shared_ptr<Context> context_)
|
||||
: log(&Poco::Logger::get(name_)), name(name_), keep_alive_timeout(keep_alive_timeout_), context(context_)
|
||||
{
|
||||
pool_map = std::make_shared<ODBCHandler::PoolMap>();
|
||||
}
|
||||
|
||||
Poco::Net::HTTPRequestHandler * createRequestHandler(const Poco::Net::HTTPServerRequest & request) override;
|
||||
|
||||
private:
|
||||
Poco::Logger * log;
|
||||
std::string name;
|
||||
size_t keep_alive_timeout;
|
||||
std::shared_ptr<Context> context;
|
||||
std::shared_ptr<ODBCHandler::PoolMap> pool_map;
|
||||
};
|
||||
}
|
126
dbms/programs/odbc-bridge/MainHandler.cpp
Normal file
126
dbms/programs/odbc-bridge/MainHandler.cpp
Normal file
@ -0,0 +1,126 @@
|
||||
#include "MainHandler.h"
|
||||
|
||||
#include "validateODBCConnectionString.h"
|
||||
|
||||
#include <memory>
|
||||
#include <DataStreams/copyData.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <Dictionaries/ODBCBlockInputStream.h>
|
||||
#include <Formats/BinaryRowInputStream.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <IO/WriteBufferFromHTTPServerResponse.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Poco/Ext/SessionPoolHelpers.h>
|
||||
#include <Poco/Net/HTTPServerRequest.h>
|
||||
#include <Poco/Net/HTTPServerResponse.h>
|
||||
#include <Common/HTMLForm.h>
|
||||
#include <common/logger_useful.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace
|
||||
{
|
||||
std::unique_ptr<Block> parseColumns(std::string && column_string)
|
||||
{
|
||||
std::unique_ptr<Block> sample_block = std::make_unique<Block>();
|
||||
auto names_and_types = NamesAndTypesList::parse(column_string);
|
||||
for (const NameAndTypePair & column_data : names_and_types)
|
||||
sample_block->insert({column_data.type, column_data.name});
|
||||
return sample_block;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
ODBCHandler::PoolPtr ODBCHandler::getPool(const std::string & connection_str)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
if (!pool_map->count(connection_str))
|
||||
{
|
||||
pool_map->emplace(connection_str, createAndCheckResizePocoSessionPool([connection_str] {
|
||||
return std::make_shared<Poco::Data::SessionPool>("ODBC", validateODBCConnectionString(connection_str));
|
||||
}));
|
||||
}
|
||||
return pool_map->at(connection_str);
|
||||
}
|
||||
|
||||
void ODBCHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response)
|
||||
{
|
||||
Poco::Net::HTMLForm params(request, request.stream());
|
||||
LOG_TRACE(log, "Request URI: " + request.getURI());
|
||||
|
||||
auto process_error = [&response, this](const std::string & message) {
|
||||
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
|
||||
if (!response.sent())
|
||||
response.send() << message << std::endl;
|
||||
LOG_WARNING(log, message);
|
||||
};
|
||||
|
||||
if (!params.has("query"))
|
||||
{
|
||||
process_error("No 'query' in request body");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!params.has("columns"))
|
||||
{
|
||||
process_error("No 'columns' in request URL");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!params.has("connection_string"))
|
||||
{
|
||||
process_error("No 'connection_string' in request URL");
|
||||
return;
|
||||
}
|
||||
|
||||
size_t max_block_size = DEFAULT_BLOCK_SIZE;
|
||||
if (params.has("max_block_size"))
|
||||
{
|
||||
std::string max_block_size_str = params.get("max_block_size", "");
|
||||
if (max_block_size_str.empty())
|
||||
{
|
||||
process_error("Empty max_block_size specified");
|
||||
return;
|
||||
}
|
||||
max_block_size = parse<size_t>(max_block_size_str);
|
||||
}
|
||||
|
||||
std::string columns = params.get("columns");
|
||||
std::unique_ptr<Block> sample_block;
|
||||
try
|
||||
{
|
||||
sample_block = parseColumns(std::move(columns));
|
||||
}
|
||||
catch (const Exception & ex)
|
||||
{
|
||||
process_error("Invalid 'columns' parameter in request body '" + ex.message() + "'");
|
||||
LOG_WARNING(log, ex.getStackTrace().toString());
|
||||
return;
|
||||
}
|
||||
|
||||
std::string format = params.get("format", "RowBinary");
|
||||
std::string query = params.get("query");
|
||||
LOG_TRACE(log, "Query: " << query);
|
||||
|
||||
std::string connection_string = params.get("connection_string");
|
||||
LOG_TRACE(log, "Connection string: '" << connection_string << "'");
|
||||
|
||||
WriteBufferFromHTTPServerResponse out(request, response, keep_alive_timeout);
|
||||
try
|
||||
{
|
||||
BlockOutputStreamPtr writer = FormatFactory::instance().getOutput(format, out, *sample_block, *context);
|
||||
auto pool = getPool(connection_string);
|
||||
ODBCBlockInputStream inp(pool->get(), query, *sample_block, max_block_size);
|
||||
copyData(inp, *writer);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
auto message = getCurrentExceptionMessage(true);
|
||||
response.setStatusAndReason(
|
||||
Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); // can't call process_error, bacause of too soon response sending
|
||||
writeStringBinary(message, out);
|
||||
tryLogCurrentException(log);
|
||||
}
|
||||
}
|
||||
}
|
49
dbms/programs/odbc-bridge/MainHandler.h
Normal file
49
dbms/programs/odbc-bridge/MainHandler.h
Normal file
@ -0,0 +1,49 @@
|
||||
#pragma once
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Poco/Logger.h>
|
||||
#include <Poco/Net/HTTPRequestHandler.h>
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wunused-parameter"
|
||||
#include <Poco/Data/SessionPool.h>
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
namespace DB
|
||||
{
|
||||
/** Main handler for requests to ODBC driver
|
||||
* requires connection_string and columns in request params
|
||||
* and also query in request body
|
||||
* response in RowBinary format
|
||||
*/
|
||||
class ODBCHandler : public Poco::Net::HTTPRequestHandler
|
||||
{
|
||||
public:
|
||||
using PoolPtr = std::shared_ptr<Poco::Data::SessionPool>;
|
||||
using PoolMap = std::unordered_map<std::string, PoolPtr>;
|
||||
|
||||
ODBCHandler(std::shared_ptr<PoolMap> pool_map_,
|
||||
size_t keep_alive_timeout_,
|
||||
std::shared_ptr<Context> context_)
|
||||
: log(&Poco::Logger::get("ODBCHandler"))
|
||||
, pool_map(pool_map_)
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
, context(context_)
|
||||
{
|
||||
}
|
||||
|
||||
void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override;
|
||||
|
||||
private:
|
||||
Poco::Logger * log;
|
||||
|
||||
std::shared_ptr<PoolMap> pool_map;
|
||||
size_t keep_alive_timeout;
|
||||
std::shared_ptr<Context> context;
|
||||
|
||||
static inline std::mutex mutex;
|
||||
|
||||
PoolPtr getPool(const std::string & connection_str);
|
||||
};
|
||||
|
||||
}
|
205
dbms/programs/odbc-bridge/ODBCBridge.cpp
Normal file
205
dbms/programs/odbc-bridge/ODBCBridge.cpp
Normal file
@ -0,0 +1,205 @@
|
||||
#include "ODBCBridge.h"
|
||||
#include "HandlerFactory.h"
|
||||
|
||||
#include <string>
|
||||
#include <errno.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <boost/program_options.hpp>
|
||||
#include <Poco/Net/HTTPServer.h>
|
||||
#include <Poco/Net/NetException.h>
|
||||
#include <Poco/String.h>
|
||||
#include <Poco/Util/HelpFormatter.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/config.h>
|
||||
#include <common/logger_useful.h>
|
||||
#include <ext/scope_guard.h>
|
||||
#include <ext/range.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ARGUMENT_OUT_OF_BOUND;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
Poco::Net::SocketAddress makeSocketAddress(const std::string & host, UInt16 port, Poco::Logger * log)
|
||||
{
|
||||
Poco::Net::SocketAddress socket_address;
|
||||
try
|
||||
{
|
||||
socket_address = Poco::Net::SocketAddress(host, port);
|
||||
}
|
||||
catch (const Poco::Net::DNSException & e)
|
||||
{
|
||||
const auto code = e.code();
|
||||
if (code == EAI_FAMILY
|
||||
#if defined(EAI_ADDRFAMILY)
|
||||
|| code == EAI_ADDRFAMILY
|
||||
#endif
|
||||
)
|
||||
{
|
||||
LOG_ERROR(log,
|
||||
"Cannot resolve listen_host (" << host << "), error " << e.code() << ": " << e.message()
|
||||
<< ". "
|
||||
"If it is an IPv6 address and your host has disabled IPv6, then consider to "
|
||||
"specify IPv4 address to listen in <listen_host> element of configuration "
|
||||
"file. Example: <listen_host>0.0.0.0</listen_host>");
|
||||
}
|
||||
|
||||
throw;
|
||||
}
|
||||
return socket_address;
|
||||
}
|
||||
|
||||
Poco::Net::SocketAddress socketBindListen(Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port, Poco::Logger * log)
|
||||
{
|
||||
auto address = makeSocketAddress(host, port, log);
|
||||
#if POCO_VERSION < 0x01080000
|
||||
socket.bind(address, /* reuseAddress = */ true);
|
||||
#else
|
||||
socket.bind(address, /* reuseAddress = */ true, /* reusePort = */ false);
|
||||
#endif
|
||||
|
||||
socket.listen(/* backlog = */ 64);
|
||||
|
||||
return address;
|
||||
};
|
||||
}
|
||||
|
||||
void ODBCBridge::handleHelp(const std::string &, const std::string &)
|
||||
{
|
||||
Poco::Util::HelpFormatter helpFormatter(options());
|
||||
helpFormatter.setCommand(commandName());
|
||||
helpFormatter.setHeader("HTTP-proxy for odbc requests");
|
||||
helpFormatter.setUsage("--http-port <port>");
|
||||
helpFormatter.format(std::cerr);
|
||||
|
||||
stopOptionsProcessing();
|
||||
}
|
||||
|
||||
|
||||
void ODBCBridge::defineOptions(Poco::Util::OptionSet & options)
|
||||
{
|
||||
options.addOption(Poco::Util::Option("http-port", "", "port to listen").argument("http-port", true).binding("http-port"));
|
||||
options.addOption(
|
||||
Poco::Util::Option("listen-host", "", "hostname to listen, default localhost").argument("listen-host").binding("listen-host"));
|
||||
options.addOption(
|
||||
Poco::Util::Option("http-timeout", "", "http timout for socket, default 1800").argument("http-timeout").binding("http-timeout"));
|
||||
|
||||
options.addOption(Poco::Util::Option("max-server-connections", "", "max connections to server, default 1024")
|
||||
.argument("max-server-connections")
|
||||
.binding("max-server-connections"));
|
||||
options.addOption(Poco::Util::Option("keep-alive-timeout", "", "keepalive timeout, default 10")
|
||||
.argument("keep-alive-timeout")
|
||||
.binding("keep-alive-timeout"));
|
||||
|
||||
options.addOption(Poco::Util::Option("log-level", "", "sets log level, default info").argument("log-level").binding("logger.level"));
|
||||
|
||||
options.addOption(
|
||||
Poco::Util::Option("log-path", "", "log path for all logs, default console").argument("log-path").binding("logger.log"));
|
||||
|
||||
options.addOption(Poco::Util::Option("err-log-path", "", "err log path for all logs, default no")
|
||||
.argument("err-log-path")
|
||||
.binding("logger.errorlog"));
|
||||
|
||||
using Me = std::decay_t<decltype(*this)>;
|
||||
options.addOption(Poco::Util::Option("help", "", "produce this help message")
|
||||
.binding("help")
|
||||
.callback(Poco::Util::OptionCallback<Me>(this, &Me::handleHelp)));
|
||||
|
||||
ServerApplication::defineOptions(options); /// Don't need complex BaseDaemon's .xml config
|
||||
}
|
||||
|
||||
void ODBCBridge::initialize(Application & self)
|
||||
{
|
||||
BaseDaemon::closeFDs();
|
||||
is_help = config().has("help");
|
||||
|
||||
if (is_help)
|
||||
return;
|
||||
|
||||
if (!config().has("logger.log"))
|
||||
config().setBool("logger.console", true);
|
||||
|
||||
config().setString("logger", "ODBCBridge");
|
||||
|
||||
buildLoggers(config());
|
||||
log = &logger();
|
||||
hostname = config().getString("listen-host", "localhost");
|
||||
port = config().getUInt("http-port");
|
||||
if (port > 0xFFFF)
|
||||
throw Exception("Out of range 'http-port': " + std::to_string(port), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
||||
|
||||
http_timeout = config().getUInt("http-timeout", DEFAULT_HTTP_READ_BUFFER_TIMEOUT);
|
||||
max_server_connections = config().getUInt("max-server-connections", 1024);
|
||||
keep_alive_timeout = config().getUInt("keep-alive-timeout", 10);
|
||||
|
||||
initializeTerminationAndSignalProcessing();
|
||||
|
||||
ServerApplication::initialize(self);
|
||||
}
|
||||
|
||||
void ODBCBridge::uninitialize()
|
||||
{
|
||||
BaseDaemon::uninitialize();
|
||||
}
|
||||
|
||||
int ODBCBridge::main(const std::vector<std::string> & /*args*/)
|
||||
{
|
||||
if (is_help)
|
||||
return Application::EXIT_OK;
|
||||
|
||||
LOG_INFO(log, "Starting up");
|
||||
Poco::Net::ServerSocket socket;
|
||||
auto address = socketBindListen(socket, hostname, port, log);
|
||||
socket.setReceiveTimeout(http_timeout);
|
||||
socket.setSendTimeout(http_timeout);
|
||||
Poco::ThreadPool server_pool(3, max_server_connections);
|
||||
Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams;
|
||||
http_params->setTimeout(http_timeout);
|
||||
http_params->setKeepAliveTimeout(keep_alive_timeout);
|
||||
|
||||
context = std::make_shared<Context>(Context::createGlobal());
|
||||
context->setGlobalContext(*context);
|
||||
|
||||
auto server = Poco::Net::HTTPServer(
|
||||
new HandlerFactory("ODBCRequestHandlerFactory-factory", keep_alive_timeout, context), server_pool, socket, http_params);
|
||||
server.start();
|
||||
|
||||
LOG_INFO(log, "Listening http://" + address.toString());
|
||||
|
||||
SCOPE_EXIT({
|
||||
LOG_DEBUG(log, "Received termination signal.");
|
||||
LOG_DEBUG(log, "Waiting for current connections to close.");
|
||||
server.stop();
|
||||
for (size_t count : ext::range(1, 6))
|
||||
{
|
||||
if (server.currentConnections() == 0)
|
||||
break;
|
||||
LOG_DEBUG(log, "Waiting for " << server.currentConnections() << " connections, try " << count);
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
|
||||
}
|
||||
});
|
||||
|
||||
waitForTerminationRequest();
|
||||
return Application::EXIT_OK;
|
||||
}
|
||||
}
|
||||
|
||||
int mainEntryClickHouseODBCBridge(int argc, char ** argv)
|
||||
{
|
||||
DB::ODBCBridge app;
|
||||
try
|
||||
{
|
||||
return app.run(argc, argv);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
std::cerr << DB::getCurrentExceptionMessage(true) << "\n";
|
||||
auto code = DB::getCurrentExceptionCode();
|
||||
return code ? code : 1;
|
||||
}
|
||||
}
|
41
dbms/programs/odbc-bridge/ODBCBridge.h
Normal file
41
dbms/programs/odbc-bridge/ODBCBridge.h
Normal file
@ -0,0 +1,41 @@
|
||||
#pragma once
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Poco/Logger.h>
|
||||
#include <daemon/BaseDaemon.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
/** Class represents clickhouse-odbc-bridge server, which listen
|
||||
* incoming HTTP POST and GET requests on specified port and host.
|
||||
* Has two handlers '/' for all incoming POST requests to ODBC driver
|
||||
* and /ping for GET request about service status
|
||||
*/
|
||||
class ODBCBridge : public BaseDaemon
|
||||
{
|
||||
public:
|
||||
void defineOptions(Poco::Util::OptionSet & options) override;
|
||||
|
||||
protected:
|
||||
void initialize(Application & self) override;
|
||||
|
||||
void uninitialize() override;
|
||||
|
||||
int main(const std::vector<std::string> & args) override;
|
||||
|
||||
private:
|
||||
void handleHelp(const std::string &, const std::string &);
|
||||
|
||||
bool is_help;
|
||||
std::string hostname;
|
||||
size_t port;
|
||||
size_t http_timeout;
|
||||
std::string log_level;
|
||||
size_t max_server_connections;
|
||||
size_t keep_alive_timeout;
|
||||
|
||||
Poco::Logger * log;
|
||||
|
||||
std::shared_ptr<Context> context; /// need for settings only
|
||||
};
|
||||
}
|
22
dbms/programs/odbc-bridge/PingHandler.cpp
Normal file
22
dbms/programs/odbc-bridge/PingHandler.cpp
Normal file
@ -0,0 +1,22 @@
|
||||
#include "PingHandler.h"
|
||||
#include <Poco/Net/HTTPServerRequest.h>
|
||||
#include <Poco/Net/HTTPServerResponse.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <IO/HTTPCommon.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
void PingHandler::handleRequest(Poco::Net::HTTPServerRequest & /*request*/, Poco::Net::HTTPServerResponse & response)
|
||||
{
|
||||
try
|
||||
{
|
||||
setResponseDefaultHeaders(response, keep_alive_timeout);
|
||||
const char * data = "Ok.\n";
|
||||
response.sendBuffer(data, strlen(data));
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException("PingHandler");
|
||||
}
|
||||
}
|
||||
}
|
17
dbms/programs/odbc-bridge/PingHandler.h
Normal file
17
dbms/programs/odbc-bridge/PingHandler.h
Normal file
@ -0,0 +1,17 @@
|
||||
#pragma once
|
||||
#include <Poco/Net/HTTPRequestHandler.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
/** Simple ping handler, answers "Ok." to GET request
|
||||
*/
|
||||
class PingHandler : public Poco::Net::HTTPRequestHandler
|
||||
{
|
||||
public:
|
||||
PingHandler(size_t keep_alive_timeout_) : keep_alive_timeout(keep_alive_timeout_) {}
|
||||
void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override;
|
||||
|
||||
private:
|
||||
size_t keep_alive_timeout;
|
||||
};
|
||||
}
|
38
dbms/programs/odbc-bridge/README.md
Normal file
38
dbms/programs/odbc-bridge/README.md
Normal file
@ -0,0 +1,38 @@
|
||||
# clickhouse-odbc-bridge
|
||||
|
||||
Simple HTTP-server which works like a proxy for ODBC driver. The main motivation
|
||||
was possible segfaults or another faults in ODBC implementations, which can
|
||||
crash whole clickhouse-server process.
|
||||
|
||||
This tool works via HTTP, not via pipes, shared memory, or TCP because:
|
||||
- It's simplier to implement
|
||||
- It's simplier to debug
|
||||
- jdbc-bridge can be implemented in the same way
|
||||
|
||||
## Usage
|
||||
|
||||
`clickhouse-server` use this tool inside odbc table function and StorageODBC.
|
||||
However it can be used as standalone tool from command line with the following
|
||||
parameters in POST-request URL:
|
||||
- `connection_string` -- ODBC connection string.
|
||||
- `columns` -- columns in ClickHouse NamesAndTypesList format, name in backticks,
|
||||
type as string. Name and type are space separated, rows separated with
|
||||
newline.
|
||||
- `max_block_size` -- optional parameter, sets maximum size of single block.
|
||||
Query is send in post body. Response is returned in RowBinary format.
|
||||
|
||||
## Example:
|
||||
|
||||
```bash
|
||||
$ clickhouse-odbc-bridge --http-port 9018 --daemon
|
||||
|
||||
$ curl -d "query=SELECT PageID, ImpID, AdType FROM Keys ORDER BY PageID, ImpID" --data-urlencode "connection_string=DSN=ClickHouse;DATABASE=stat" --data-urlencode "columns=columns format version: 1
|
||||
3 columns:
|
||||
\`PageID\` String
|
||||
\`ImpID\` String
|
||||
\`AdType\` String
|
||||
" "http://localhost:9018/" > result.txt
|
||||
|
||||
$ cat result.txt
|
||||
12246623837185725195925621517
|
||||
```
|
2
dbms/programs/odbc-bridge/odbc-bridge.cpp
Normal file
2
dbms/programs/odbc-bridge/odbc-bridge.cpp
Normal file
@ -0,0 +1,2 @@
|
||||
int mainEntryClickHouseODBCBridge(int argc, char ** argv);
|
||||
int main(int argc_, char ** argv_) { return mainEntryClickHouseODBCBridge(argc_, argv_); }
|
2
dbms/programs/odbc-bridge/tests/CMakeLists.txt
Normal file
2
dbms/programs/odbc-bridge/tests/CMakeLists.txt
Normal file
@ -0,0 +1,2 @@
|
||||
add_executable (validate-odbc-connection-string validate-odbc-connection-string.cpp)
|
||||
target_link_libraries (validate-odbc-connection-string clickhouse-odbc-bridge-lib)
|
@ -1,6 +1,6 @@
|
||||
#include <iostream>
|
||||
#include <Common/Exception.h>
|
||||
#include <Dictionaries/validateODBCConnectionString.h>
|
||||
#include "../validateODBCConnectionString.h"
|
||||
|
||||
|
||||
using namespace DB;
|
@ -5,7 +5,7 @@
|
||||
#include <common/find_first_symbols.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Dictionaries/validateODBCConnectionString.h>
|
||||
#include "validateODBCConnectionString.h"
|
||||
|
||||
|
||||
namespace DB
|
@ -9,10 +9,11 @@
|
||||
|
||||
#include <ext/scope_guard.h>
|
||||
|
||||
#include <Common/ExternalTable.h>
|
||||
#include <Core/ExternalTable.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <Common/getFQDNOrHostName.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <IO/ReadBufferFromIStream.h>
|
||||
#include <IO/ZlibInflatingReadBuffer.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
@ -208,6 +209,13 @@ void HTTPHandler::processQuery(
|
||||
Poco::Net::HTTPServerResponse & response,
|
||||
Output & used_output)
|
||||
{
|
||||
Context context = server.context();
|
||||
context.setGlobalContext(server.context());
|
||||
|
||||
/// It will forcibly detach query even if unexpected error ocurred and detachQuery() was not called
|
||||
/// Normal detaching is happen in BlockIO callbacks
|
||||
CurrentThread::QueryScope query_scope_holder(context);
|
||||
|
||||
LOG_TRACE(log, "Request URI: " << request.getURI());
|
||||
|
||||
std::istream & istr = request.stream();
|
||||
@ -257,14 +265,9 @@ void HTTPHandler::processQuery(
|
||||
}
|
||||
|
||||
std::string query_id = params.get("query_id", "");
|
||||
|
||||
const auto & config = server.config();
|
||||
|
||||
Context context = server.context();
|
||||
context.setGlobalContext(server.context());
|
||||
|
||||
context.setUser(user, password, request.clientAddress(), quota_key);
|
||||
context.setCurrentQueryId(query_id);
|
||||
CurrentThread::attachQueryContext(context);
|
||||
|
||||
/// The user could specify session identifier and session timeout.
|
||||
/// It allows to modify settings, create temporary tables and reuse them in subsequent requests.
|
||||
@ -273,6 +276,7 @@ void HTTPHandler::processQuery(
|
||||
String session_id;
|
||||
std::chrono::steady_clock::duration session_timeout;
|
||||
bool session_is_set = params.has("session_id");
|
||||
const auto & config = server.config();
|
||||
|
||||
if (session_is_set)
|
||||
{
|
||||
@ -421,34 +425,45 @@ void HTTPHandler::processQuery(
|
||||
|
||||
std::unique_ptr<ReadBuffer> in;
|
||||
|
||||
// Used in case of POST request with form-data, but it not to be expectd to be deleted after that scope
|
||||
static const NameSet reserved_param_names{"query", "compress", "decompress", "user", "password", "quota_key", "query_id", "stacktrace",
|
||||
"buffer_size", "wait_end_of_query", "session_id", "session_timeout", "session_check"};
|
||||
|
||||
Names reserved_param_suffixes;
|
||||
|
||||
auto param_could_be_skipped = [&] (const String & name)
|
||||
{
|
||||
if (reserved_param_names.count(name))
|
||||
return true;
|
||||
|
||||
for (const String & suffix : reserved_param_suffixes)
|
||||
{
|
||||
if (endsWith(name, suffix))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
/// Used in case of POST request with form-data, but it isn't expected to be deleted after that scope.
|
||||
std::string full_query;
|
||||
|
||||
/// Support for "external data for query processing".
|
||||
if (startsWith(request.getContentType().data(), "multipart/form-data"))
|
||||
{
|
||||
ExternalTablesHandler handler(context, params);
|
||||
|
||||
/// Params are of both form params POST and uri (GET params)
|
||||
params.load(request, istr, handler);
|
||||
|
||||
for (const auto & it : params)
|
||||
{
|
||||
if (it.first == "query")
|
||||
{
|
||||
full_query += it.second;
|
||||
}
|
||||
}
|
||||
in = std::make_unique<ReadBufferFromString>(full_query);
|
||||
/// Skip unneeded parameters to avoid confusing them later with context settings or query parameters.
|
||||
reserved_param_suffixes.emplace_back("_format");
|
||||
reserved_param_suffixes.emplace_back("_types");
|
||||
reserved_param_suffixes.emplace_back("_structure");
|
||||
|
||||
/// Erase unneeded parameters to avoid confusing them later with context settings or query
|
||||
/// parameters.
|
||||
for (const auto & it : handler.names)
|
||||
{
|
||||
params.erase(it + "_format");
|
||||
params.erase(it + "_types");
|
||||
params.erase(it + "_structure");
|
||||
}
|
||||
/// Params are of both form params POST and uri (GET params)
|
||||
for (const auto & it : params)
|
||||
if (it.first == "query")
|
||||
full_query += it.second;
|
||||
|
||||
in = std::make_unique<ReadBufferFromString>(full_query);
|
||||
}
|
||||
else
|
||||
in = std::make_unique<ConcatReadBuffer>(*in_param, *in_post_maybe_compressed);
|
||||
@ -475,11 +490,6 @@ void HTTPHandler::processQuery(
|
||||
|
||||
auto readonly_before_query = settings.readonly;
|
||||
|
||||
NameSet reserved_param_names{"query", "compress", "decompress", "user", "password", "quota_key", "query_id", "stacktrace",
|
||||
"buffer_size", "wait_end_of_query",
|
||||
"session_id", "session_timeout", "session_check"
|
||||
};
|
||||
|
||||
for (auto it = params.begin(); it != params.end(); ++it)
|
||||
{
|
||||
if (it->first == "database")
|
||||
@ -490,7 +500,7 @@ void HTTPHandler::processQuery(
|
||||
{
|
||||
context.setDefaultFormat(it->second);
|
||||
}
|
||||
else if (reserved_param_names.find(it->first) != reserved_param_names.end())
|
||||
else if (param_could_be_skipped(it->first))
|
||||
{
|
||||
}
|
||||
else
|
||||
|
@ -81,7 +81,7 @@ void MetricsTransmitter::transmit(std::vector<ProfileEvents::Count> & prev_count
|
||||
{
|
||||
for (size_t i = 0, end = ProfileEvents::end(); i < end; ++i)
|
||||
{
|
||||
const auto counter = ProfileEvents::counters[i].load(std::memory_order_relaxed);
|
||||
const auto counter = ProfileEvents::global_counters[i].load(std::memory_order_relaxed);
|
||||
const auto counter_increment = counter - prev_counters[i];
|
||||
prev_counters[i] = counter;
|
||||
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include <Common/getFQDNOrHostName.h>
|
||||
#include <Common/getMultipleKeysFromConfig.h>
|
||||
#include <Common/getNumberOfPhysicalCPUCores.h>
|
||||
#include <Common/TaskStatsInfoGetter.h>
|
||||
#include <IO/HTTPCommon.h>
|
||||
#include <Interpreters/AsynchronousMetrics.h>
|
||||
#include <Interpreters/DDLWorker.h>
|
||||
@ -365,6 +366,13 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
dns_cache_updater = std::make_unique<DNSCacheUpdater>(*global_context);
|
||||
}
|
||||
|
||||
if (!TaskStatsInfoGetter::checkProcessHasRequiredPermissions())
|
||||
{
|
||||
LOG_INFO(log, "It looks like the process has not CAP_NET_ADMIN capability, some performance statistics will be disabled."
|
||||
" It could happen due to incorrect clickhouse package installation."
|
||||
" You could resolve the problem manually calling 'sudo setcap cap_net_admin=+ep /usr/bin/clickhouse'");
|
||||
}
|
||||
|
||||
{
|
||||
Poco::Timespan keep_alive_timeout(config().getUInt("keep_alive_timeout", 10), 0);
|
||||
|
||||
|
@ -1,7 +1,15 @@
|
||||
#include "TCPHandler.h"
|
||||
|
||||
#include <iomanip>
|
||||
#include <ext/scope_guard.h>
|
||||
#include <Poco/Net/NetException.h>
|
||||
#include <daemon/OwnSplitChannel.h>
|
||||
|
||||
#include <Common/ClickHouseRevision.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <Common/ClickHouseRevision.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <Common/NetException.h>
|
||||
#include <Common/config_version.h>
|
||||
#include <IO/Progress.h>
|
||||
#include <IO/CompressedReadBuffer.h>
|
||||
#include <IO/CompressedWriteBuffer.h>
|
||||
@ -15,14 +23,13 @@
|
||||
#include <Interpreters/executeQuery.h>
|
||||
#include <Interpreters/Quota.h>
|
||||
#include <Interpreters/TablesStatus.h>
|
||||
#include <Interpreters/InternalTextLogsQueue.h>
|
||||
#include <Storages/StorageMemory.h>
|
||||
#include <Storages/StorageReplicatedMergeTree.h>
|
||||
#include <Common/ClickHouseRevision.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <Common/ExternalTable.h>
|
||||
#include <Common/NetException.h>
|
||||
#include <Common/config_version.h>
|
||||
#include <ext/scope_guard.h>
|
||||
#include <Core/ExternalTable.h>
|
||||
|
||||
#include "TCPHandler.h"
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -140,13 +147,29 @@ void TCPHandler::runImpl()
|
||||
if (!receivePacket())
|
||||
continue;
|
||||
|
||||
/// Get blocks of temporary tables
|
||||
readData(global_settings);
|
||||
CurrentThread::initializeQuery();
|
||||
|
||||
/// Reset the input stream, as we received an empty block while receiving external table data.
|
||||
/// So, the stream has been marked as cancelled and we can't read from it anymore.
|
||||
state.block_in.reset();
|
||||
state.maybe_compressed_in.reset(); /// For more accurate accounting by MemoryTracker.
|
||||
/// Should we send internal logs to client?
|
||||
if (client_revision >= DBMS_MIN_REVISION_WITH_SERVER_LOGS
|
||||
&& query_context.getSettingsRef().send_logs_level.value != "none")
|
||||
{
|
||||
state.logs_queue = std::make_shared<InternalTextLogsQueue>();
|
||||
state.logs_queue->max_priority = Poco::Logger::parseLevel(query_context.getSettingsRef().send_logs_level.value);
|
||||
CurrentThread::attachInternalTextLogsQueue(state.logs_queue);
|
||||
}
|
||||
|
||||
query_context.setExternalTablesInitializer([&global_settings, this] (Context & context) {
|
||||
if (&context != &query_context)
|
||||
throw Exception("Unexpected context in external tables initializer", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
/// Get blocks of temporary tables
|
||||
readData(global_settings);
|
||||
|
||||
/// Reset the input stream, as we received an empty block while receiving external table data.
|
||||
/// So, the stream has been marked as cancelled and we can't read from it anymore.
|
||||
state.block_in.reset();
|
||||
state.maybe_compressed_in.reset(); /// For more accurate accounting by MemoryTracker.
|
||||
});
|
||||
|
||||
/// Processing Query
|
||||
state.io = executeQuery(state.query, query_context, false, state.stage);
|
||||
@ -163,8 +186,9 @@ void TCPHandler::runImpl()
|
||||
else
|
||||
processOrdinaryQuery();
|
||||
|
||||
sendEndOfStream();
|
||||
sendLogs();
|
||||
|
||||
sendEndOfStream();
|
||||
state.reset();
|
||||
}
|
||||
catch (const Exception & e)
|
||||
@ -209,7 +233,20 @@ void TCPHandler::runImpl()
|
||||
try
|
||||
{
|
||||
if (exception)
|
||||
{
|
||||
try
|
||||
{
|
||||
/// Try to send logs to client, but it could be risky too
|
||||
/// Assume that we can't break output here
|
||||
sendLogs();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(log, "Can't send logs to client");
|
||||
}
|
||||
|
||||
sendException(*exception);
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -220,6 +257,9 @@ void TCPHandler::runImpl()
|
||||
|
||||
try
|
||||
{
|
||||
/// It will forcibly detach query even if unexpected error ocсurred and detachQuery() was not called
|
||||
CurrentThread::detachQueryIfNotDetached();
|
||||
|
||||
state.reset();
|
||||
}
|
||||
catch (...)
|
||||
@ -252,12 +292,14 @@ void TCPHandler::readData(const Settings & global_settings)
|
||||
constexpr size_t min_poll_interval = 5000; // 5 ms
|
||||
size_t poll_interval = std::max(min_poll_interval, std::min(default_poll_interval, current_poll_interval));
|
||||
|
||||
while (1)
|
||||
sendLogs();
|
||||
|
||||
while (true)
|
||||
{
|
||||
Stopwatch watch(CLOCK_MONOTONIC_COARSE);
|
||||
|
||||
/// We are waiting for a packet from the client. Thus, every `POLL_INTERVAL` seconds check whether we need to shut down.
|
||||
while (1)
|
||||
while (true)
|
||||
{
|
||||
if (static_cast<ReadBufferFromPocoSocket &>(*in).poll(poll_interval))
|
||||
break;
|
||||
@ -289,6 +331,8 @@ void TCPHandler::readData(const Settings & global_settings)
|
||||
/// We accept and process data. And if they are over, then we leave.
|
||||
if (!receivePacket())
|
||||
break;
|
||||
|
||||
sendLogs();
|
||||
}
|
||||
}
|
||||
|
||||
@ -346,6 +390,8 @@ void TCPHandler::processOrdinaryQuery()
|
||||
sendProgress();
|
||||
}
|
||||
|
||||
sendLogs();
|
||||
|
||||
if (async_in.poll(query_context.getSettingsRef().interactive_delay / 1000))
|
||||
{
|
||||
/// There is the following result block.
|
||||
@ -368,6 +414,7 @@ void TCPHandler::processOrdinaryQuery()
|
||||
sendExtremes();
|
||||
sendProfileInfo();
|
||||
sendProgress();
|
||||
sendLogs();
|
||||
}
|
||||
|
||||
sendData(block);
|
||||
@ -692,11 +739,14 @@ void TCPHandler::initBlockOutput(const Block & block)
|
||||
{
|
||||
if (!state.block_out)
|
||||
{
|
||||
if (state.compression == Protocol::Compression::Enable)
|
||||
state.maybe_compressed_out = std::make_shared<CompressedWriteBuffer>(
|
||||
*out, CompressionSettings(query_context.getSettingsRef()));
|
||||
else
|
||||
state.maybe_compressed_out = out;
|
||||
if (!state.maybe_compressed_out)
|
||||
{
|
||||
if (state.compression == Protocol::Compression::Enable)
|
||||
state.maybe_compressed_out = std::make_shared<CompressedWriteBuffer>(
|
||||
*out, CompressionSettings(query_context.getSettingsRef()));
|
||||
else
|
||||
state.maybe_compressed_out = out;
|
||||
}
|
||||
|
||||
state.block_out = std::make_shared<NativeBlockOutputStream>(
|
||||
*state.maybe_compressed_out,
|
||||
@ -705,6 +755,18 @@ void TCPHandler::initBlockOutput(const Block & block)
|
||||
}
|
||||
}
|
||||
|
||||
void TCPHandler::initLogsBlockOutput(const Block & block)
|
||||
{
|
||||
if (!state.logs_block_out)
|
||||
{
|
||||
/// Use uncompressed stream since log blocks usually contain only one row
|
||||
state.logs_block_out = std::make_shared<NativeBlockOutputStream>(
|
||||
*out,
|
||||
client_revision,
|
||||
block.cloneEmpty());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool TCPHandler::isQueryCancelled()
|
||||
{
|
||||
@ -745,6 +807,7 @@ void TCPHandler::sendData(const Block & block)
|
||||
initBlockOutput(block);
|
||||
|
||||
writeVarUInt(Protocol::Server::Data, *out);
|
||||
/// Send external table name (empty name is the main table)
|
||||
writeStringBinary("", *out);
|
||||
|
||||
state.block_out->write(block);
|
||||
@ -753,6 +816,19 @@ void TCPHandler::sendData(const Block & block)
|
||||
}
|
||||
|
||||
|
||||
void TCPHandler::sendLogData(const Block & block)
|
||||
{
|
||||
initLogsBlockOutput(block);
|
||||
|
||||
writeVarUInt(Protocol::Server::Log, *out);
|
||||
/// Send log tag (empty tag is the default tag)
|
||||
writeStringBinary("", *out);
|
||||
|
||||
state.logs_block_out->write(block);
|
||||
out->next();
|
||||
}
|
||||
|
||||
|
||||
void TCPHandler::sendException(const Exception & e)
|
||||
{
|
||||
writeVarUInt(Protocol::Server::Exception, *out);
|
||||
@ -784,6 +860,37 @@ void TCPHandler::sendProgress()
|
||||
}
|
||||
|
||||
|
||||
void TCPHandler::sendLogs()
|
||||
{
|
||||
if (!state.logs_queue)
|
||||
return;
|
||||
|
||||
MutableColumns logs_columns;
|
||||
MutableColumns curr_logs_columns;
|
||||
size_t rows = 0;
|
||||
|
||||
for (; state.logs_queue->tryPop(curr_logs_columns); ++rows)
|
||||
{
|
||||
if (rows == 0)
|
||||
{
|
||||
logs_columns = std::move(curr_logs_columns);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t j = 0; j < logs_columns.size(); ++j)
|
||||
logs_columns[j]->insertRangeFrom(*curr_logs_columns[j], 0, curr_logs_columns[j]->size());
|
||||
}
|
||||
}
|
||||
|
||||
if (rows > 0)
|
||||
{
|
||||
Block block = InternalTextLogsQueue::getSampleBlock();
|
||||
block.setColumns(std::move(logs_columns));
|
||||
sendLogData(block);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void TCPHandler::run()
|
||||
{
|
||||
try
|
||||
|
@ -5,16 +5,18 @@
|
||||
#include <Common/getFQDNOrHostName.h>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <IO/Progress.h>
|
||||
#include <Core/Protocol.h>
|
||||
#include <Core/QueryProcessingStage.h>
|
||||
#include <DataStreams/BlockIO.h>
|
||||
#include <IO/Progress.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <DataStreams/BlockIO.h>
|
||||
#include <Interpreters/InternalTextLogsQueue.h>
|
||||
#include <Client/TimeoutSetter.h>
|
||||
|
||||
#include "IServer.h"
|
||||
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
extern const Metric TCPConnection;
|
||||
@ -63,6 +65,9 @@ struct QueryState
|
||||
/// Timeouts setter for current query
|
||||
std::unique_ptr<TimeoutSetter> timeout_setter;
|
||||
|
||||
/// A queue with internal logs that will be passed to client
|
||||
InternalTextLogsQueuePtr logs_queue;
|
||||
BlockOutputStreamPtr logs_block_out;
|
||||
|
||||
void reset()
|
||||
{
|
||||
@ -140,8 +145,10 @@ private:
|
||||
|
||||
void sendHello();
|
||||
void sendData(const Block & block); /// Write a block to the network.
|
||||
void sendLogData(const Block & block);
|
||||
void sendException(const Exception & e);
|
||||
void sendProgress();
|
||||
void sendLogs();
|
||||
void sendEndOfStream();
|
||||
void sendProfileInfo();
|
||||
void sendTotals();
|
||||
@ -150,6 +157,7 @@ private:
|
||||
/// Creates state.block_in/block_out for blocks read/write, depending on whether compression is enabled.
|
||||
void initBlockInput();
|
||||
void initBlockOutput(const Block & block);
|
||||
void initLogsBlockOutput(const Block & block);
|
||||
|
||||
bool isQueryCancelled();
|
||||
|
||||
|
1
dbms/programs/server/config.d/listen.xml
Normal file
1
dbms/programs/server/config.d/listen.xml
Normal file
@ -0,0 +1 @@
|
||||
<yandex><listen_host>0.0.0.0</listen_host></yandex>
|
@ -13,6 +13,7 @@
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
#include <Poco/String.h>
|
||||
#include <DataTypes/DataTypeWithDictionary.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -41,6 +42,20 @@ void AggregateFunctionFactory::registerFunction(const String & name, Creator cre
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
static DataTypes convertTypesWithDictionaryToNested(const DataTypes & types)
|
||||
{
|
||||
DataTypes res_types;
|
||||
res_types.reserve(types.size());
|
||||
for (const auto & type : types)
|
||||
{
|
||||
if (auto * type_with_dict = typeid_cast<const DataTypeWithDictionary *>(type.get()))
|
||||
res_types.push_back(type_with_dict->getDictionaryType());
|
||||
else
|
||||
res_types.push_back(type);
|
||||
}
|
||||
|
||||
return res_types;
|
||||
}
|
||||
|
||||
AggregateFunctionPtr AggregateFunctionFactory::get(
|
||||
const String & name,
|
||||
@ -48,6 +63,8 @@ AggregateFunctionPtr AggregateFunctionFactory::get(
|
||||
const Array & parameters,
|
||||
int recursion_level) const
|
||||
{
|
||||
auto type_without_dictionary = convertTypesWithDictionaryToNested(argument_types);
|
||||
|
||||
/// If one of types is Nullable, we apply aggregate function combinator "Null".
|
||||
|
||||
if (std::any_of(argument_types.begin(), argument_types.end(),
|
||||
@ -57,7 +74,7 @@ AggregateFunctionPtr AggregateFunctionFactory::get(
|
||||
if (!combinator)
|
||||
throw Exception("Logical error: cannot find aggregate function combinator to apply a function to Nullable arguments.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
DataTypes nested_types = combinator->transformArguments(argument_types);
|
||||
DataTypes nested_types = combinator->transformArguments(type_without_dictionary);
|
||||
|
||||
AggregateFunctionPtr nested_function;
|
||||
|
||||
@ -70,7 +87,7 @@ AggregateFunctionPtr AggregateFunctionFactory::get(
|
||||
return combinator->transformAggregateFunction(nested_function, argument_types, parameters);
|
||||
}
|
||||
|
||||
auto res = getImpl(name, argument_types, parameters, recursion_level);
|
||||
auto res = getImpl(name, type_without_dictionary, parameters, recursion_level);
|
||||
if (!res)
|
||||
throw Exception("Logical error: AggregateFunctionFactory returned nullptr", ErrorCodes::LOGICAL_ERROR);
|
||||
return res;
|
||||
|
30
dbms/src/AggregateFunctions/AggregateFunctionRetention.cpp
Normal file
30
dbms/src/AggregateFunctions/AggregateFunctionRetention.cpp
Normal file
@ -0,0 +1,30 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionRetention.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionRetention(const std::string & name, const DataTypes & arguments, const Array & params)
|
||||
{
|
||||
assertNoParameters(name, params);
|
||||
|
||||
if (arguments.size() > AggregateFunctionRetentionData::max_events )
|
||||
throw Exception("Too many event arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
return std::make_shared<AggregateFunctionRetention>(arguments);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionRetention(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("retention", createAggregateFunctionRetention, AggregateFunctionFactory::CaseInsensitive);
|
||||
}
|
||||
|
||||
}
|
150
dbms/src/AggregateFunctions/AggregateFunctionRetention.h
Normal file
150
dbms/src/AggregateFunctions/AggregateFunctionRetention.h
Normal file
@ -0,0 +1,150 @@
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <unordered_set>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/ArenaAllocator.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <ext/range.h>
|
||||
#include <bitset>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION;
|
||||
}
|
||||
|
||||
struct AggregateFunctionRetentionData
|
||||
{
|
||||
static constexpr auto max_events = 32;
|
||||
|
||||
using Events = std::bitset<max_events>;
|
||||
|
||||
Events events;
|
||||
|
||||
void add(UInt8 event)
|
||||
{
|
||||
events.set(event);
|
||||
}
|
||||
|
||||
void merge(const AggregateFunctionRetentionData & other)
|
||||
{
|
||||
events |= other.events;
|
||||
}
|
||||
|
||||
void serialize(WriteBuffer & buf) const
|
||||
{
|
||||
UInt32 event_value = events.to_ulong();
|
||||
writeBinary(event_value, buf);
|
||||
}
|
||||
|
||||
void deserialize(ReadBuffer & buf)
|
||||
{
|
||||
UInt32 event_value;
|
||||
readBinary(event_value, buf);
|
||||
events = event_value;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* The max size of events is 32, that's enough for retention analytics
|
||||
*
|
||||
* Usage:
|
||||
* - retention(cond1, cond2, cond3, ....)
|
||||
* - returns [cond1_flag, cond1_flag && cond2_flag, cond1_flag && cond3_flag, ...]
|
||||
*/
|
||||
class AggregateFunctionRetention final
|
||||
: public IAggregateFunctionDataHelper<AggregateFunctionRetentionData, AggregateFunctionRetention>
|
||||
{
|
||||
private:
|
||||
UInt8 events_size;
|
||||
|
||||
public:
|
||||
String getName() const override
|
||||
{
|
||||
return "retention";
|
||||
}
|
||||
|
||||
AggregateFunctionRetention(const DataTypes & arguments)
|
||||
{
|
||||
for (const auto i : ext::range(0, arguments.size()))
|
||||
{
|
||||
auto cond_arg = arguments[i].get();
|
||||
if (!typeid_cast<const DataTypeUInt8 *>(cond_arg))
|
||||
throw Exception{"Illegal type " + cond_arg->getName() + " of argument " + toString(i) + " of aggregate function "
|
||||
+ getName() + ", must be UInt8",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
}
|
||||
|
||||
events_size = arguments.size();
|
||||
}
|
||||
|
||||
|
||||
DataTypePtr getReturnType() const override
|
||||
{
|
||||
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt8>());
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr place, const IColumn ** columns, const size_t row_num, Arena *) const override
|
||||
{
|
||||
for (const auto i : ext::range(0, events_size))
|
||||
{
|
||||
auto event = static_cast<const ColumnVector<UInt8> *>(columns[i])->getData()[row_num];
|
||||
if (event)
|
||||
{
|
||||
this->data(place).add(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
this->data(place).merge(this->data(rhs));
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
|
||||
{
|
||||
this->data(place).serialize(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
|
||||
{
|
||||
this->data(place).deserialize(buf);
|
||||
}
|
||||
|
||||
void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
|
||||
{
|
||||
auto & data_to = static_cast<ColumnArray &>(to).getData();
|
||||
auto & offsets_to = static_cast<ColumnArray &>(to).getOffsets();
|
||||
|
||||
const bool first_flag = this->data(place).events.test(0);
|
||||
data_to.insert(first_flag ? Field(static_cast<UInt64>(1)) : Field(static_cast<UInt64>(0)));
|
||||
for (const auto i : ext::range(1, events_size))
|
||||
{
|
||||
if (first_flag && this->data(place).events.test(i))
|
||||
data_to.insert(Field(static_cast<UInt64>(1)));
|
||||
else
|
||||
data_to.insert(Field(static_cast<UInt64>(0)));
|
||||
}
|
||||
offsets_to.push_back(offsets_to.size() == 0 ? events_size : offsets_to.back() + events_size);
|
||||
}
|
||||
|
||||
const char * getHeaderFilePath() const override
|
||||
{
|
||||
return __FILE__;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -77,7 +77,7 @@ struct QuantileExact
|
||||
return array[n];
|
||||
}
|
||||
|
||||
return Value();
|
||||
return std::numeric_limits<Value>::quiet_NaN();
|
||||
}
|
||||
|
||||
/// Get the `size` values of `levels` quantiles. Write `size` results starting with `result` address.
|
||||
|
@ -72,7 +72,7 @@ struct QuantileExactWeighted
|
||||
size_t size = map.size();
|
||||
|
||||
if (0 == size)
|
||||
return Value();
|
||||
return std::numeric_limits<Value>::quiet_NaN();
|
||||
|
||||
/// Copy the data to a temporary array to get the element you need in order.
|
||||
using Pair = typename Map::value_type;
|
||||
|
@ -34,6 +34,7 @@ void registerAggregateFunctionCombinatorMerge(AggregateFunctionCombinatorFactory
|
||||
void registerAggregateFunctionCombinatorNull(AggregateFunctionCombinatorFactory &);
|
||||
|
||||
void registerAggregateFunctionHistogram(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionRetention(AggregateFunctionFactory & factory);
|
||||
|
||||
void registerAggregateFunctions()
|
||||
{
|
||||
@ -59,6 +60,7 @@ void registerAggregateFunctions()
|
||||
registerAggregateFunctionsBitwise(factory);
|
||||
registerAggregateFunctionsMaxIntersections(factory);
|
||||
registerAggregateFunctionHistogram(factory);
|
||||
registerAggregateFunctionRetention(factory);
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -114,6 +114,7 @@ void Connection::disconnect()
|
||||
//LOG_TRACE(log_wrapper.get(), "Disconnecting");
|
||||
|
||||
in = nullptr;
|
||||
last_input_packet_type.reset();
|
||||
out = nullptr; // can write to socket
|
||||
if (socket)
|
||||
socket->close();
|
||||
@ -379,6 +380,7 @@ void Connection::sendQuery(
|
||||
maybe_compressed_in.reset();
|
||||
maybe_compressed_out.reset();
|
||||
block_in.reset();
|
||||
block_logs_in.reset();
|
||||
block_out.reset();
|
||||
|
||||
/// Send empty block which means end of data.
|
||||
@ -506,20 +508,50 @@ bool Connection::poll(size_t timeout_microseconds)
|
||||
}
|
||||
|
||||
|
||||
bool Connection::hasReadBufferPendingData() const
|
||||
bool Connection::hasReadPendingData() const
|
||||
{
|
||||
return static_cast<const ReadBufferFromPocoSocket &>(*in).hasPendingData();
|
||||
return last_input_packet_type.has_value() || static_cast<const ReadBufferFromPocoSocket &>(*in).hasPendingData();
|
||||
}
|
||||
|
||||
|
||||
std::optional<UInt64> Connection::checkPacket(size_t timeout_microseconds)
|
||||
{
|
||||
if (last_input_packet_type.has_value())
|
||||
return last_input_packet_type;
|
||||
|
||||
if (hasReadPendingData() || poll(timeout_microseconds))
|
||||
{
|
||||
// LOG_TRACE(log_wrapper.get(), "Receiving packet type");
|
||||
UInt64 packet_type;
|
||||
readVarUInt(packet_type, *in);
|
||||
|
||||
last_input_packet_type.emplace(packet_type);
|
||||
return last_input_packet_type;
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
|
||||
Connection::Packet Connection::receivePacket()
|
||||
{
|
||||
//LOG_TRACE(log_wrapper.get(), "Receiving packet");
|
||||
|
||||
try
|
||||
{
|
||||
Packet res;
|
||||
readVarUInt(res.type, *in);
|
||||
|
||||
/// Have we already read packet type?
|
||||
if (last_input_packet_type)
|
||||
{
|
||||
res.type = *last_input_packet_type;
|
||||
last_input_packet_type.reset();
|
||||
}
|
||||
else
|
||||
{
|
||||
//LOG_TRACE(log_wrapper.get(), "Receiving packet type");
|
||||
readVarUInt(res.type, *in);
|
||||
}
|
||||
|
||||
//LOG_TRACE(log_wrapper.get(), "Receiving packet " << res.type << " " << Protocol::Server::toString(res.type));
|
||||
|
||||
switch (res.type)
|
||||
{
|
||||
@ -549,6 +581,10 @@ Connection::Packet Connection::receivePacket()
|
||||
res.block = receiveData();
|
||||
return res;
|
||||
|
||||
case Protocol::Server::Log:
|
||||
res.block = receiveLogData();
|
||||
return res;
|
||||
|
||||
case Protocol::Server::EndOfStream:
|
||||
return res;
|
||||
|
||||
@ -576,14 +612,26 @@ Block Connection::receiveData()
|
||||
//LOG_TRACE(log_wrapper.get(), "Receiving data");
|
||||
|
||||
initBlockInput();
|
||||
return receiveDataImpl(block_in);
|
||||
}
|
||||
|
||||
|
||||
Block Connection::receiveLogData()
|
||||
{
|
||||
initBlockLogsInput();
|
||||
return receiveDataImpl(block_logs_in);
|
||||
}
|
||||
|
||||
|
||||
Block Connection::receiveDataImpl(BlockInputStreamPtr & stream)
|
||||
{
|
||||
String external_table_name;
|
||||
readStringBinary(external_table_name, *in);
|
||||
|
||||
size_t prev_bytes = in->count();
|
||||
|
||||
/// Read one block from network.
|
||||
Block res = block_in->read();
|
||||
Block res = stream->read();
|
||||
|
||||
if (throttler)
|
||||
throttler->add(in->count() - prev_bytes);
|
||||
@ -592,20 +640,39 @@ Block Connection::receiveData()
|
||||
}
|
||||
|
||||
|
||||
void Connection::initInputBuffers()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
|
||||
void Connection::initBlockInput()
|
||||
{
|
||||
if (!block_in)
|
||||
{
|
||||
if (compression == Protocol::Compression::Enable)
|
||||
maybe_compressed_in = std::make_shared<CompressedReadBuffer>(*in);
|
||||
else
|
||||
maybe_compressed_in = in;
|
||||
if (!maybe_compressed_in)
|
||||
{
|
||||
if (compression == Protocol::Compression::Enable)
|
||||
maybe_compressed_in = std::make_shared<CompressedReadBuffer>(*in);
|
||||
else
|
||||
maybe_compressed_in = in;
|
||||
}
|
||||
|
||||
block_in = std::make_shared<NativeBlockInputStream>(*maybe_compressed_in, server_revision);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Connection::initBlockLogsInput()
|
||||
{
|
||||
if (!block_logs_in)
|
||||
{
|
||||
/// Have to return superset of SystemLogsQueue::getSampleBlock() columns
|
||||
block_logs_in = std::make_shared<NativeBlockInputStream>(*in, server_revision);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Connection::setDescription()
|
||||
{
|
||||
auto resolved_address = getResolvedAddress();
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include <Interpreters/TablesStatus.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <optional>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -138,7 +139,10 @@ public:
|
||||
bool poll(size_t timeout_microseconds = 0);
|
||||
|
||||
/// Check, if has data in read buffer.
|
||||
bool hasReadBufferPendingData() const;
|
||||
bool hasReadPendingData() const;
|
||||
|
||||
/// Checks if there is input data in connection and reads packet ID.
|
||||
std::optional<UInt64> checkPacket(size_t timeout_microseconds = 0);
|
||||
|
||||
/// Receive packet from server.
|
||||
Packet receivePacket();
|
||||
@ -195,6 +199,7 @@ private:
|
||||
std::unique_ptr<Poco::Net::StreamSocket> socket;
|
||||
std::shared_ptr<ReadBuffer> in;
|
||||
std::shared_ptr<WriteBuffer> out;
|
||||
std::optional<UInt64> last_input_packet_type;
|
||||
|
||||
String query_id;
|
||||
Protocol::Compression compression; /// Enable data compression for communication.
|
||||
@ -214,6 +219,7 @@ private:
|
||||
/// From where to read query execution result.
|
||||
std::shared_ptr<ReadBuffer> maybe_compressed_in;
|
||||
BlockInputStreamPtr block_in;
|
||||
BlockInputStreamPtr block_logs_in;
|
||||
|
||||
/// Where to write data for INSERT.
|
||||
std::shared_ptr<WriteBuffer> maybe_compressed_out;
|
||||
@ -249,11 +255,16 @@ private:
|
||||
bool ping();
|
||||
|
||||
Block receiveData();
|
||||
Block receiveLogData();
|
||||
Block receiveDataImpl(BlockInputStreamPtr & stream);
|
||||
|
||||
std::unique_ptr<Exception> receiveException();
|
||||
Progress receiveProgress();
|
||||
BlockStreamProfileInfo receiveProfileInfo();
|
||||
|
||||
void initInputBuffers();
|
||||
void initBlockInput();
|
||||
void initBlockLogsInput();
|
||||
|
||||
void throwUnexpectedPacket(UInt64 packet_type, const char * expected) const;
|
||||
};
|
||||
|
@ -247,6 +247,7 @@ Connection::Packet MultiplexedConnections::receivePacketUnlocked()
|
||||
case Protocol::Server::ProfileInfo:
|
||||
case Protocol::Server::Totals:
|
||||
case Protocol::Server::Extremes:
|
||||
case Protocol::Server::Log:
|
||||
break;
|
||||
|
||||
case Protocol::Server::EndOfStream:
|
||||
@ -276,7 +277,7 @@ MultiplexedConnections::ReplicaState & MultiplexedConnections::getReplicaForRead
|
||||
for (const ReplicaState & state : replica_states)
|
||||
{
|
||||
Connection * connection = state.connection;
|
||||
if ((connection != nullptr) && connection->hasReadBufferPendingData())
|
||||
if ((connection != nullptr) && connection->hasReadPendingData())
|
||||
read_list.push_back(*connection->socket);
|
||||
}
|
||||
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <IO/WriteBufferFromArena.h>
|
||||
#include <Common/SipHash.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Columns/ColumnsCommon.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -161,6 +162,25 @@ ColumnPtr ColumnAggregateFunction::permute(const Permutation & perm, size_t limi
|
||||
return std::move(res);
|
||||
}
|
||||
|
||||
ColumnPtr ColumnAggregateFunction::index(const IColumn & indexes, size_t limit) const
|
||||
{
|
||||
return selectIndexImpl(*this, indexes, limit);
|
||||
}
|
||||
|
||||
template <typename Type>
|
||||
ColumnPtr ColumnAggregateFunction::indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const
|
||||
{
|
||||
auto res = createView();
|
||||
|
||||
res->getData().resize(limit);
|
||||
for (size_t i = 0; i < limit; ++i)
|
||||
res->getData()[i] = getData()[indexes[i]];
|
||||
|
||||
return std::move(res);
|
||||
}
|
||||
|
||||
INSTANTIATE_INDEX_IMPL(ColumnAggregateFunction);
|
||||
|
||||
/// Is required to support operations with Set
|
||||
void ColumnAggregateFunction::updateHashWithValue(size_t n, SipHash & hash) const
|
||||
{
|
||||
|
@ -156,6 +156,11 @@ public:
|
||||
|
||||
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
|
||||
|
||||
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
|
||||
|
||||
template <typename Type>
|
||||
ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
|
||||
|
||||
ColumnPtr replicate(const Offsets & offsets) const override;
|
||||
|
||||
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
|
||||
|
@ -626,6 +626,44 @@ ColumnPtr ColumnArray::permute(const Permutation & perm, size_t limit) const
|
||||
return std::move(res);
|
||||
}
|
||||
|
||||
ColumnPtr ColumnArray::index(const IColumn & indexes, size_t limit) const
|
||||
{
|
||||
return selectIndexImpl(*this, indexes, limit);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
ColumnPtr ColumnArray::indexImpl(const PaddedPODArray<T> & indexes, size_t limit) const
|
||||
{
|
||||
if (limit == 0)
|
||||
return ColumnArray::create(data);
|
||||
|
||||
/// Convert indexes to UInt64 in case of overflow.
|
||||
auto nested_indexes_column = ColumnUInt64::create();
|
||||
PaddedPODArray<UInt64> & nested_indexes = nested_indexes_column->getData();
|
||||
nested_indexes.reserve(getOffsets().back());
|
||||
|
||||
auto res = ColumnArray::create(data->cloneEmpty());
|
||||
|
||||
Offsets & res_offsets = res->getOffsets();
|
||||
res_offsets.resize(limit);
|
||||
size_t current_offset = 0;
|
||||
|
||||
for (size_t i = 0; i < limit; ++i)
|
||||
{
|
||||
for (size_t j = 0; j < sizeAt(indexes[i]); ++j)
|
||||
nested_indexes.push_back(offsetAt(indexes[i]) + j);
|
||||
current_offset += sizeAt(indexes[i]);
|
||||
res_offsets[i] = current_offset;
|
||||
}
|
||||
|
||||
if (current_offset != 0)
|
||||
res->data = data->index(*nested_indexes_column, current_offset);
|
||||
|
||||
return std::move(res);
|
||||
}
|
||||
|
||||
INSTANTIATE_INDEX_IMPL(ColumnArray);
|
||||
|
||||
void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
|
||||
{
|
||||
size_t s = size();
|
||||
|
@ -71,6 +71,8 @@ public:
|
||||
void popBack(size_t n) override;
|
||||
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
|
||||
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
|
||||
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
|
||||
template <typename Type> ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
|
||||
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;
|
||||
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
|
||||
void reserve(size_t n) override;
|
||||
|
@ -30,6 +30,11 @@ ColumnPtr ColumnConst::convertToFullColumn() const
|
||||
return data->replicate(Offsets(1, s));
|
||||
}
|
||||
|
||||
ColumnPtr ColumnConst::removeLowCardinality() const
|
||||
{
|
||||
return ColumnConst::create(data->convertToFullColumnIfWithDictionary(), s);
|
||||
}
|
||||
|
||||
ColumnPtr ColumnConst::filter(const Filter & filt, ssize_t /*result_size_hint*/) const
|
||||
{
|
||||
if (s != filt.size())
|
||||
@ -63,6 +68,18 @@ ColumnPtr ColumnConst::permute(const Permutation & perm, size_t limit) const
|
||||
return ColumnConst::create(data, limit);
|
||||
}
|
||||
|
||||
ColumnPtr ColumnConst::index(const IColumn & indexes, size_t limit) const
|
||||
{
|
||||
if (limit == 0)
|
||||
limit = indexes.size();
|
||||
|
||||
if (indexes.size() < limit)
|
||||
throw Exception("Size of indexes (" + toString(indexes.size()) + ") is less than required (" + toString(limit) + ")",
|
||||
ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
|
||||
|
||||
return ColumnConst::create(data, limit);
|
||||
}
|
||||
|
||||
MutableColumns ColumnConst::scatter(ColumnIndex num_columns, const Selector & selector) const
|
||||
{
|
||||
if (s != selector.size())
|
||||
|
@ -36,6 +36,8 @@ public:
|
||||
return convertToFullColumn();
|
||||
}
|
||||
|
||||
ColumnPtr removeLowCardinality() const;
|
||||
|
||||
std::string getName() const override
|
||||
{
|
||||
return "Const(" + data->getName() + ")";
|
||||
@ -153,6 +155,7 @@ public:
|
||||
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
|
||||
ColumnPtr replicate(const Offsets & offsets) const override;
|
||||
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
|
||||
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
|
||||
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
|
||||
|
||||
size_t byteSize() const override
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
#include <Columns/ColumnsCommon.h>
|
||||
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/SipHash.h>
|
||||
@ -258,6 +259,32 @@ ColumnPtr ColumnFixedString::permute(const Permutation & perm, size_t limit) con
|
||||
return std::move(res);
|
||||
}
|
||||
|
||||
|
||||
ColumnPtr ColumnFixedString::index(const IColumn & indexes, size_t limit) const
|
||||
{
|
||||
return selectIndexImpl(*this, indexes, limit);
|
||||
}
|
||||
|
||||
|
||||
template <typename Type>
|
||||
ColumnPtr ColumnFixedString::indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const
|
||||
{
|
||||
if (limit == 0)
|
||||
return ColumnFixedString::create(n);
|
||||
|
||||
auto res = ColumnFixedString::create(n);
|
||||
|
||||
Chars_t & res_chars = res->chars;
|
||||
|
||||
res_chars.resize(n * limit);
|
||||
|
||||
size_t offset = 0;
|
||||
for (size_t i = 0; i < limit; ++i, offset += n)
|
||||
memcpySmallAllowReadWriteOverflow15(&res_chars[offset], &chars[indexes[i] * n], n);
|
||||
|
||||
return std::move(res);
|
||||
}
|
||||
|
||||
ColumnPtr ColumnFixedString::replicate(const Offsets & offsets) const
|
||||
{
|
||||
size_t col_size = size();
|
||||
|
@ -108,6 +108,11 @@ public:
|
||||
|
||||
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
|
||||
|
||||
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
|
||||
|
||||
template <typename Type>
|
||||
ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
|
||||
|
||||
ColumnPtr replicate(const Offsets & offsets) const override;
|
||||
|
||||
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override
|
||||
|
@ -88,6 +88,15 @@ ColumnPtr ColumnFunction::permute(const Permutation & perm, size_t limit) const
|
||||
return ColumnFunction::create(limit, function, capture);
|
||||
}
|
||||
|
||||
ColumnPtr ColumnFunction::index(const IColumn & indexes, size_t limit) const
|
||||
{
|
||||
ColumnsWithTypeAndName capture = captured_columns;
|
||||
for (auto & column : capture)
|
||||
column.column = column.column->index(indexes, limit);
|
||||
|
||||
return ColumnFunction::create(limit, function, capture);
|
||||
}
|
||||
|
||||
std::vector<MutableColumnPtr> ColumnFunction::scatter(IColumn::ColumnIndex num_columns,
|
||||
const IColumn::Selector & selector) const
|
||||
{
|
||||
|
@ -33,6 +33,7 @@ public:
|
||||
ColumnPtr replicate(const Offsets & offsets) const override;
|
||||
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
|
||||
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
|
||||
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
|
||||
void insertDefault() override;
|
||||
void popBack(size_t n) override;
|
||||
std::vector<MutableColumnPtr> scatter(IColumn::ColumnIndex num_columns,
|
||||
|
@ -166,6 +166,13 @@ ColumnPtr ColumnNullable::permute(const Permutation & perm, size_t limit) const
|
||||
return ColumnNullable::create(permuted_data, permuted_null_map);
|
||||
}
|
||||
|
||||
ColumnPtr ColumnNullable::index(const IColumn & indexes, size_t limit) const
|
||||
{
|
||||
ColumnPtr indexed_data = getNestedColumn().index(indexes, limit);
|
||||
ColumnPtr indexed_null_map = getNullMapColumn().index(indexes, limit);
|
||||
return ColumnNullable::create(indexed_data, indexed_null_map);
|
||||
}
|
||||
|
||||
int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const
|
||||
{
|
||||
/// NULL values share the properties of NaN values.
|
||||
|
@ -65,6 +65,7 @@ public:
|
||||
void popBack(size_t n) override;
|
||||
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
|
||||
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
|
||||
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
|
||||
int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override;
|
||||
void getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override;
|
||||
void reserve(size_t n) override;
|
||||
|
@ -159,6 +159,48 @@ ColumnPtr ColumnString::permute(const Permutation & perm, size_t limit) const
|
||||
}
|
||||
|
||||
|
||||
ColumnPtr ColumnString::index(const IColumn & indexes, size_t limit) const
|
||||
{
|
||||
return selectIndexImpl(*this, indexes, limit);
|
||||
}
|
||||
|
||||
template <typename Type>
|
||||
ColumnPtr ColumnString::indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const
|
||||
{
|
||||
if (limit == 0)
|
||||
return ColumnString::create();
|
||||
|
||||
auto res = ColumnString::create();
|
||||
|
||||
Chars_t & res_chars = res->chars;
|
||||
Offsets & res_offsets = res->offsets;
|
||||
|
||||
|
||||
size_t new_chars_size = 0;
|
||||
for (size_t i = 0; i < limit; ++i)
|
||||
new_chars_size += sizeAt(indexes[i]);
|
||||
res_chars.resize(new_chars_size);
|
||||
|
||||
res_offsets.resize(limit);
|
||||
|
||||
Offset current_new_offset = 0;
|
||||
|
||||
for (size_t i = 0; i < limit; ++i)
|
||||
{
|
||||
size_t j = indexes[i];
|
||||
size_t string_offset = j == 0 ? 0 : offsets[j - 1];
|
||||
size_t string_size = offsets[j] - string_offset;
|
||||
|
||||
memcpySmallAllowReadWriteOverflow15(&res_chars[current_new_offset], &chars[string_offset], string_size);
|
||||
|
||||
current_new_offset += string_size;
|
||||
res_offsets[i] = current_new_offset;
|
||||
}
|
||||
|
||||
return std::move(res);
|
||||
}
|
||||
|
||||
|
||||
template <bool positive>
|
||||
struct ColumnString::less
|
||||
{
|
||||
|
@ -220,6 +220,11 @@ public:
|
||||
|
||||
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
|
||||
|
||||
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
|
||||
|
||||
template <typename Type>
|
||||
ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
|
||||
|
||||
void insertDefault() override
|
||||
{
|
||||
chars.push_back(0);
|
||||
|
@ -181,6 +181,17 @@ ColumnPtr ColumnTuple::permute(const Permutation & perm, size_t limit) const
|
||||
return ColumnTuple::create(new_columns);
|
||||
}
|
||||
|
||||
ColumnPtr ColumnTuple::index(const IColumn & indexes, size_t limit) const
|
||||
{
|
||||
const size_t tuple_size = columns.size();
|
||||
Columns new_columns(tuple_size);
|
||||
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
new_columns[i] = columns[i]->index(indexes, limit);
|
||||
|
||||
return ColumnTuple::create(new_columns);
|
||||
}
|
||||
|
||||
ColumnPtr ColumnTuple::replicate(const Offsets & offsets) const
|
||||
{
|
||||
const size_t tuple_size = columns.size();
|
||||
|
@ -60,6 +60,7 @@ public:
|
||||
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
|
||||
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
|
||||
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
|
||||
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
|
||||
ColumnPtr replicate(const Offsets & offsets) const override;
|
||||
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
|
||||
void gather(ColumnGathererStream & gatherer_stream) override;
|
||||
|
512
dbms/src/Columns/ColumnUnique.h
Normal file
512
dbms/src/Columns/ColumnUnique.h
Normal file
@ -0,0 +1,512 @@
|
||||
#pragma once
|
||||
#include <Columns/IColumnUnique.h>
|
||||
#include <Columns/ReverseIndex.h>
|
||||
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/NumberTraits.h>
|
||||
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <ext/range.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
||||
template <typename ColumnType>
|
||||
class ColumnUnique final : public COWPtrHelper<IColumnUnique, ColumnUnique<ColumnType>>
|
||||
{
|
||||
friend class COWPtrHelper<IColumnUnique, ColumnUnique<ColumnType>>;
|
||||
|
||||
private:
|
||||
explicit ColumnUnique(MutableColumnPtr && holder, bool is_nullable);
|
||||
explicit ColumnUnique(const IDataType & type);
|
||||
ColumnUnique(const ColumnUnique & other);
|
||||
|
||||
public:
|
||||
MutableColumnPtr cloneEmpty() const override;
|
||||
|
||||
const ColumnPtr & getNestedColumn() const override;
|
||||
const ColumnPtr & getNestedNotNullableColumn() const override { return column_holder; }
|
||||
|
||||
size_t uniqueInsert(const Field & x) override;
|
||||
size_t uniqueInsertFrom(const IColumn & src, size_t n) override;
|
||||
MutableColumnPtr uniqueInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
|
||||
IColumnUnique::IndexesWithOverflow uniqueInsertRangeWithOverflow(const IColumn & src, size_t start, size_t length,
|
||||
size_t max_dictionary_size) override;
|
||||
size_t uniqueInsertData(const char * pos, size_t length) override;
|
||||
size_t uniqueInsertDataWithTerminatingZero(const char * pos, size_t length) override;
|
||||
size_t uniqueDeserializeAndInsertFromArena(const char * pos, const char *& new_pos) override;
|
||||
|
||||
size_t getDefaultValueIndex() const override { return is_nullable ? 1 : 0; }
|
||||
size_t getNullValueIndex() const override;
|
||||
bool canContainNulls() const override { return is_nullable; }
|
||||
|
||||
Field operator[](size_t n) const override { return (*getNestedColumn())[n]; }
|
||||
void get(size_t n, Field & res) const override { getNestedColumn()->get(n, res); }
|
||||
StringRef getDataAt(size_t n) const override { return getNestedColumn()->getDataAt(n); }
|
||||
StringRef getDataAtWithTerminatingZero(size_t n) const override
|
||||
{
|
||||
return getNestedColumn()->getDataAtWithTerminatingZero(n);
|
||||
}
|
||||
UInt64 get64(size_t n) const override { return getNestedColumn()->get64(n); }
|
||||
UInt64 getUInt(size_t n) const override { return getNestedColumn()->getUInt(n); }
|
||||
Int64 getInt(size_t n) const override { return getNestedColumn()->getInt(n); }
|
||||
bool isNullAt(size_t n) const override { return is_nullable && n == getNullValueIndex(); }
|
||||
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override
|
||||
{
|
||||
return column_holder->serializeValueIntoArena(n, arena, begin);
|
||||
}
|
||||
void updateHashWithValue(size_t n, SipHash & hash) const override
|
||||
{
|
||||
return getNestedColumn()->updateHashWithValue(n, hash);
|
||||
}
|
||||
|
||||
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override
|
||||
{
|
||||
auto & column_unique = static_cast<const IColumnUnique&>(rhs);
|
||||
return getNestedColumn()->compareAt(n, m, *column_unique.getNestedColumn(), nan_direction_hint);
|
||||
}
|
||||
|
||||
void getExtremes(Field & min, Field & max) const override { column_holder->getExtremes(min, max); }
|
||||
bool valuesHaveFixedSize() const override { return column_holder->valuesHaveFixedSize(); }
|
||||
bool isFixedAndContiguous() const override { return column_holder->isFixedAndContiguous(); }
|
||||
size_t sizeOfValueIfFixed() const override { return column_holder->sizeOfValueIfFixed(); }
|
||||
bool isNumeric() const override { return column_holder->isNumeric(); }
|
||||
|
||||
size_t byteSize() const override { return column_holder->byteSize(); }
|
||||
size_t allocatedBytes() const override
|
||||
{
|
||||
return column_holder->allocatedBytes()
|
||||
+ index.allocatedBytes()
|
||||
+ (cached_null_mask ? cached_null_mask->allocatedBytes() : 0);
|
||||
}
|
||||
void forEachSubcolumn(IColumn::ColumnCallback callback) override
|
||||
{
|
||||
callback(column_holder);
|
||||
index.setColumn(getRawColumnPtr());
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
ColumnPtr column_holder;
|
||||
bool is_nullable;
|
||||
ReverseIndex<UInt64, ColumnType> index;
|
||||
|
||||
/// For DataTypeNullable, stores null map.
|
||||
mutable ColumnPtr cached_null_mask;
|
||||
mutable ColumnPtr cached_column_nullable;
|
||||
|
||||
static size_t numSpecialValues(bool is_nullable) { return is_nullable ? 2 : 1; }
|
||||
size_t numSpecialValues() const { return numSpecialValues(is_nullable); }
|
||||
|
||||
ColumnType * getRawColumnPtr() { return static_cast<ColumnType *>(column_holder->assumeMutable().get()); }
|
||||
const ColumnType * getRawColumnPtr() const { return static_cast<const ColumnType *>(column_holder.get()); }
|
||||
|
||||
template <typename IndexType>
|
||||
MutableColumnPtr uniqueInsertRangeImpl(
|
||||
const IColumn & src,
|
||||
size_t start,
|
||||
size_t length,
|
||||
size_t num_added_rows,
|
||||
typename ColumnVector<IndexType>::MutablePtr && positions_column,
|
||||
ReverseIndex<UInt64, ColumnType> * secondary_index,
|
||||
size_t max_dictionary_size);
|
||||
};
|
||||
|
||||
template <typename ColumnType>
|
||||
MutableColumnPtr ColumnUnique<ColumnType>::cloneEmpty() const
|
||||
{
|
||||
return ColumnUnique<ColumnType>::create(column_holder->cloneResized(numSpecialValues()), is_nullable);
|
||||
}
|
||||
|
||||
template <typename ColumnType>
|
||||
ColumnUnique<ColumnType>::ColumnUnique(const ColumnUnique & other)
|
||||
: column_holder(other.column_holder)
|
||||
, is_nullable(other.is_nullable)
|
||||
, index(numSpecialValues(is_nullable), 0)
|
||||
{
|
||||
index.setColumn(getRawColumnPtr());
|
||||
}
|
||||
|
||||
template <typename ColumnType>
|
||||
ColumnUnique<ColumnType>::ColumnUnique(const IDataType & type)
|
||||
: is_nullable(type.isNullable())
|
||||
, index(numSpecialValues(is_nullable), 0)
|
||||
{
|
||||
const auto & holder_type = is_nullable ? *static_cast<const DataTypeNullable &>(type).getNestedType() : type;
|
||||
column_holder = holder_type.createColumn()->cloneResized(numSpecialValues());
|
||||
index.setColumn(getRawColumnPtr());
|
||||
}
|
||||
|
||||
template <typename ColumnType>
|
||||
ColumnUnique<ColumnType>::ColumnUnique(MutableColumnPtr && holder, bool is_nullable)
|
||||
: column_holder(std::move(holder))
|
||||
, is_nullable(is_nullable)
|
||||
, index(numSpecialValues(is_nullable), 0)
|
||||
{
|
||||
if (column_holder->size() < numSpecialValues())
|
||||
throw Exception("Too small holder column for ColumnUnique.", ErrorCodes::ILLEGAL_COLUMN);
|
||||
if (column_holder->isColumnNullable())
|
||||
throw Exception("Holder column for ColumnUnique can't be nullable.", ErrorCodes::ILLEGAL_COLUMN);
|
||||
|
||||
index.setColumn(getRawColumnPtr());
|
||||
}
|
||||
|
||||
template <typename ColumnType>
|
||||
const ColumnPtr & ColumnUnique<ColumnType>::getNestedColumn() const
|
||||
{
|
||||
if (is_nullable)
|
||||
{
|
||||
size_t size = getRawColumnPtr()->size();
|
||||
if (!cached_null_mask)
|
||||
{
|
||||
ColumnUInt8::MutablePtr null_mask = ColumnUInt8::create(size, UInt8(0));
|
||||
null_mask->getData()[getNullValueIndex()] = 1;
|
||||
cached_null_mask = std::move(null_mask);
|
||||
cached_column_nullable = ColumnNullable::create(column_holder, cached_null_mask);
|
||||
}
|
||||
|
||||
if (cached_null_mask->size() != size)
|
||||
{
|
||||
MutableColumnPtr null_mask = (*std::move(cached_null_mask)).mutate();
|
||||
static_cast<ColumnUInt8 &>(*null_mask).getData().resize_fill(size);
|
||||
cached_null_mask = std::move(null_mask);
|
||||
cached_column_nullable = ColumnNullable::create(column_holder, cached_null_mask);
|
||||
}
|
||||
|
||||
return cached_column_nullable;
|
||||
}
|
||||
return column_holder;
|
||||
}
|
||||
|
||||
template <typename ColumnType>
|
||||
size_t ColumnUnique<ColumnType>::getNullValueIndex() const
|
||||
{
|
||||
if (!is_nullable)
|
||||
throw Exception("ColumnUnique can't contain null values.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename ColumnType>
|
||||
size_t ColumnUnique<ColumnType>::uniqueInsert(const Field & x)
|
||||
{
|
||||
if (x.getType() == Field::Types::Null)
|
||||
return getNullValueIndex();
|
||||
|
||||
auto column = getRawColumnPtr();
|
||||
auto prev_size = static_cast<UInt64>(column->size());
|
||||
|
||||
if ((*column)[getDefaultValueIndex()] == x)
|
||||
return getDefaultValueIndex();
|
||||
|
||||
column->insert(x);
|
||||
auto pos = index.insert(prev_size);
|
||||
if (pos != prev_size)
|
||||
column->popBack(1);
|
||||
|
||||
return pos;
|
||||
}
|
||||
|
||||
template <typename ColumnType>
|
||||
size_t ColumnUnique<ColumnType>::uniqueInsertFrom(const IColumn & src, size_t n)
|
||||
{
|
||||
if (is_nullable && src.isNullAt(n))
|
||||
return getNullValueIndex();
|
||||
|
||||
if (auto * nullable = typeid_cast<const ColumnNullable *>(&src))
|
||||
return uniqueInsertFrom(nullable->getNestedColumn(), n);
|
||||
|
||||
auto ref = src.getDataAt(n);
|
||||
return uniqueInsertData(ref.data, ref.size);
|
||||
}
|
||||
|
||||
template <typename ColumnType>
|
||||
size_t ColumnUnique<ColumnType>::uniqueInsertData(const char * pos, size_t length)
|
||||
{
|
||||
auto column = getRawColumnPtr();
|
||||
|
||||
if (column->getDataAt(getDefaultValueIndex()) == StringRef(pos, length))
|
||||
return getDefaultValueIndex();
|
||||
|
||||
UInt64 size = column->size();
|
||||
UInt64 insertion_point = index.getInsertionPoint(StringRef(pos, length));
|
||||
|
||||
if (insertion_point == size)
|
||||
{
|
||||
column->insertData(pos, length);
|
||||
index.insertFromLastRow();
|
||||
}
|
||||
|
||||
return insertion_point;
|
||||
}
|
||||
|
||||
template <typename ColumnType>
|
||||
size_t ColumnUnique<ColumnType>::uniqueInsertDataWithTerminatingZero(const char * pos, size_t length)
|
||||
{
|
||||
if (std::is_same<ColumnType, ColumnString>::value)
|
||||
return uniqueInsertData(pos, length - 1);
|
||||
|
||||
if (column_holder->valuesHaveFixedSize())
|
||||
return uniqueInsertData(pos, length);
|
||||
|
||||
/// Don't know if data actually has terminating zero. So, insert it firstly.
|
||||
|
||||
auto column = getRawColumnPtr();
|
||||
size_t prev_size = column->size();
|
||||
column->insertDataWithTerminatingZero(pos, length);
|
||||
|
||||
if (column->compareAt(getDefaultValueIndex(), prev_size, *column, 1) == 0)
|
||||
{
|
||||
column->popBack(1);
|
||||
return getDefaultValueIndex();
|
||||
}
|
||||
|
||||
auto position = index.insert(prev_size);
|
||||
if (position != prev_size)
|
||||
column->popBack(1);
|
||||
|
||||
return static_cast<size_t>(position);
|
||||
}
|
||||
|
||||
template <typename ColumnType>
|
||||
size_t ColumnUnique<ColumnType>::uniqueDeserializeAndInsertFromArena(const char * pos, const char *& new_pos)
|
||||
{
|
||||
auto column = getRawColumnPtr();
|
||||
size_t prev_size = column->size();
|
||||
new_pos = column->deserializeAndInsertFromArena(pos);
|
||||
|
||||
if (column->compareAt(getDefaultValueIndex(), prev_size, *column, 1) == 0)
|
||||
{
|
||||
column->popBack(1);
|
||||
return getDefaultValueIndex();
|
||||
}
|
||||
|
||||
auto index_pos = index.insert(prev_size);
|
||||
if (index_pos != prev_size)
|
||||
column->popBack(1);
|
||||
|
||||
return static_cast<size_t>(index_pos);
|
||||
}
|
||||
|
||||
template <typename IndexType>
|
||||
static void checkIndexes(const ColumnVector<IndexType> & indexes, size_t max_dictionary_size)
|
||||
{
|
||||
auto & data = indexes.getData();
|
||||
for (size_t i = 0; i < data.size(); ++i)
|
||||
{
|
||||
if (data[i] >= max_dictionary_size)
|
||||
{
|
||||
throw Exception("Found index " + toString(data[i]) + " at position " + toString(i)
|
||||
+ " which is grated or equal than dictionary size " + toString(max_dictionary_size),
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ColumnType>
|
||||
template <typename IndexType>
|
||||
MutableColumnPtr ColumnUnique<ColumnType>::uniqueInsertRangeImpl(
|
||||
const IColumn & src,
|
||||
size_t start,
|
||||
size_t length,
|
||||
size_t num_added_rows,
|
||||
typename ColumnVector<IndexType>::MutablePtr && positions_column,
|
||||
ReverseIndex<UInt64, ColumnType> * secondary_index,
|
||||
size_t max_dictionary_size)
|
||||
{
|
||||
const ColumnType * src_column;
|
||||
const NullMap * null_map = nullptr;
|
||||
auto & positions = positions_column->getData();
|
||||
|
||||
auto update_position = [&](UInt64 & next_position) -> MutableColumnPtr
|
||||
{
|
||||
constexpr auto next_size = NumberTraits::nextSize(sizeof(IndexType));
|
||||
using SuperiorIndexType = typename NumberTraits::Construct<false, false, next_size>::Type;
|
||||
|
||||
++next_position;
|
||||
|
||||
if (next_position > std::numeric_limits<IndexType>::max())
|
||||
{
|
||||
if (sizeof(SuperiorIndexType) == sizeof(IndexType))
|
||||
throw Exception("Can't find superior index type for type " + demangle(typeid(IndexType).name()),
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
auto expanded_column = ColumnVector<SuperiorIndexType>::create(length);
|
||||
auto & expanded_data = expanded_column->getData();
|
||||
for (size_t i = 0; i < num_added_rows; ++i)
|
||||
expanded_data[i] = positions[i];
|
||||
|
||||
return uniqueInsertRangeImpl<SuperiorIndexType>(
|
||||
src,
|
||||
start,
|
||||
length,
|
||||
num_added_rows,
|
||||
std::move(expanded_column),
|
||||
secondary_index,
|
||||
max_dictionary_size);
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
};
|
||||
|
||||
if (auto nullable_column = typeid_cast<const ColumnNullable *>(&src))
|
||||
{
|
||||
src_column = typeid_cast<const ColumnType *>(&nullable_column->getNestedColumn());
|
||||
null_map = &nullable_column->getNullMapData();
|
||||
}
|
||||
else
|
||||
src_column = typeid_cast<const ColumnType *>(&src);
|
||||
|
||||
if (src_column == nullptr)
|
||||
throw Exception("Invalid column type for ColumnUnique::insertRangeFrom. Expected " + column_holder->getName() +
|
||||
", got " + src.getName(), ErrorCodes::ILLEGAL_COLUMN);
|
||||
|
||||
auto column = getRawColumnPtr();
|
||||
|
||||
UInt64 next_position = column->size();
|
||||
if (secondary_index)
|
||||
next_position += secondary_index->size();
|
||||
|
||||
auto check_inserted_position = [&next_position](UInt64 inserted_position)
|
||||
{
|
||||
if (inserted_position != next_position)
|
||||
throw Exception("Inserted position " + toString(inserted_position)
|
||||
+ " is not equal with expected " + toString(next_position), ErrorCodes::LOGICAL_ERROR);
|
||||
};
|
||||
|
||||
auto insert_key = [&](const StringRef & ref, ReverseIndex<UInt64, ColumnType> * cur_index)
|
||||
{
|
||||
positions[num_added_rows] = next_position;
|
||||
cur_index->getColumn()->insertData(ref.data, ref.size);
|
||||
auto inserted_pos = cur_index->insertFromLastRow();
|
||||
check_inserted_position(inserted_pos);
|
||||
return update_position(next_position);
|
||||
};
|
||||
|
||||
for (; num_added_rows < length; ++num_added_rows)
|
||||
{
|
||||
auto row = start + num_added_rows;
|
||||
|
||||
if (null_map && (*null_map)[row])
|
||||
positions[num_added_rows] = getNullValueIndex();
|
||||
else if (column->compareAt(getDefaultValueIndex(), row, *src_column, 1) == 0)
|
||||
positions[num_added_rows] = getDefaultValueIndex();
|
||||
else
|
||||
{
|
||||
auto ref = src_column->getDataAt(row);
|
||||
auto cur_index = &index;
|
||||
bool inserted = false;
|
||||
|
||||
while (!inserted)
|
||||
{
|
||||
auto insertion_point = cur_index->getInsertionPoint(ref);
|
||||
|
||||
if (insertion_point == cur_index->lastInsertionPoint())
|
||||
{
|
||||
if (secondary_index && cur_index != secondary_index && next_position >= max_dictionary_size)
|
||||
{
|
||||
cur_index = secondary_index;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (auto res = insert_key(ref, cur_index))
|
||||
return res;
|
||||
}
|
||||
else
|
||||
positions[num_added_rows] = insertion_point;
|
||||
|
||||
inserted = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// checkIndexes(*positions_column, column->size() + (overflowed_keys ? overflowed_keys->size() : 0));
|
||||
return std::move(positions_column);
|
||||
}
|
||||
|
||||
template <typename ColumnType>
|
||||
MutableColumnPtr ColumnUnique<ColumnType>::uniqueInsertRangeFrom(const IColumn & src, size_t start, size_t length)
|
||||
{
|
||||
auto callForType = [this, &src, start, length](auto x) -> MutableColumnPtr
|
||||
{
|
||||
size_t size = getRawColumnPtr()->size();
|
||||
|
||||
using IndexType = decltype(x);
|
||||
if (size <= std::numeric_limits<IndexType>::max())
|
||||
{
|
||||
auto positions = ColumnVector<IndexType>::create(length);
|
||||
return this->uniqueInsertRangeImpl<IndexType>(src, start, length, 0, std::move(positions), nullptr, 0);
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
};
|
||||
|
||||
MutableColumnPtr positions_column;
|
||||
if (!positions_column)
|
||||
positions_column = callForType(UInt8());
|
||||
if (!positions_column)
|
||||
positions_column = callForType(UInt16());
|
||||
if (!positions_column)
|
||||
positions_column = callForType(UInt32());
|
||||
if (!positions_column)
|
||||
positions_column = callForType(UInt64());
|
||||
if (!positions_column)
|
||||
throw Exception("Can't find index type for ColumnUnique", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
return positions_column;
|
||||
}
|
||||
|
||||
template <typename ColumnType>
|
||||
IColumnUnique::IndexesWithOverflow ColumnUnique<ColumnType>::uniqueInsertRangeWithOverflow(
|
||||
const IColumn & src,
|
||||
size_t start,
|
||||
size_t length,
|
||||
size_t max_dictionary_size)
|
||||
{
|
||||
auto overflowed_keys = column_holder->cloneEmpty();
|
||||
auto overflowed_keys_ptr = typeid_cast<ColumnType *>(overflowed_keys.get());
|
||||
if (!overflowed_keys_ptr)
|
||||
throw Exception("Invalid keys type for ColumnUnique.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
auto callForType = [this, &src, start, length, overflowed_keys_ptr, max_dictionary_size](auto x) -> MutableColumnPtr
|
||||
{
|
||||
size_t size = getRawColumnPtr()->size();
|
||||
|
||||
using IndexType = decltype(x);
|
||||
if (size <= std::numeric_limits<IndexType>::max())
|
||||
{
|
||||
auto positions = ColumnVector<IndexType>::create(length);
|
||||
ReverseIndex<UInt64, ColumnType> secondary_index(0, max_dictionary_size);
|
||||
secondary_index.setColumn(overflowed_keys_ptr);
|
||||
return this->uniqueInsertRangeImpl<IndexType>(src, start, length, 0, std::move(positions),
|
||||
&secondary_index, max_dictionary_size);
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
};
|
||||
|
||||
MutableColumnPtr positions_column;
|
||||
if (!positions_column)
|
||||
positions_column = callForType(UInt8());
|
||||
if (!positions_column)
|
||||
positions_column = callForType(UInt16());
|
||||
if (!positions_column)
|
||||
positions_column = callForType(UInt32());
|
||||
if (!positions_column)
|
||||
positions_column = callForType(UInt64());
|
||||
if (!positions_column)
|
||||
throw Exception("Can't find index type for ColumnUnique", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
IColumnUnique::IndexesWithOverflow indexes_with_overflow;
|
||||
indexes_with_overflow.indexes = std::move(positions_column);
|
||||
indexes_with_overflow.overflowed_keys = std::move(overflowed_keys);
|
||||
return indexes_with_overflow;
|
||||
}
|
||||
|
||||
};
|
@ -17,6 +17,8 @@
|
||||
|
||||
#if __SSE2__
|
||||
#include <emmintrin.h>
|
||||
#include <Columns/ColumnsCommon.h>
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@ -116,7 +118,7 @@ MutableColumnPtr ColumnVector<T>::cloneResized(size_t size) const
|
||||
memcpy(&new_col.data[0], &data[0], count * sizeof(data[0]));
|
||||
|
||||
if (size > count)
|
||||
memset(&new_col.data[count], static_cast<int>(value_type()), (size - count) * sizeof(value_type));
|
||||
memset(static_cast<void *>(&new_col.data[count]), static_cast<int>(value_type()), (size - count) * sizeof(value_type));
|
||||
}
|
||||
|
||||
return std::move(res);
|
||||
@ -230,6 +232,12 @@ ColumnPtr ColumnVector<T>::permute(const IColumn::Permutation & perm, size_t lim
|
||||
return std::move(res);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
ColumnPtr ColumnVector<T>::index(const IColumn & indexes, size_t limit) const
|
||||
{
|
||||
return selectIndexImpl(*this, indexes, limit);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
ColumnPtr ColumnVector<T>::replicate(const IColumn::Offsets & offsets) const
|
||||
{
|
||||
@ -319,6 +327,11 @@ template class ColumnVector<Int8>;
|
||||
template class ColumnVector<Int16>;
|
||||
template class ColumnVector<Int32>;
|
||||
template class ColumnVector<Int64>;
|
||||
template class ColumnVector<Int128>;
|
||||
template class ColumnVector<Float32>;
|
||||
template class ColumnVector<Float64>;
|
||||
|
||||
template class ColumnVector<Dec32>;
|
||||
template class ColumnVector<Dec64>;
|
||||
template class ColumnVector<Dec128>;
|
||||
}
|
||||
|
@ -8,6 +8,12 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
|
||||
/** Stuff for comparing numbers.
|
||||
* Integer values are compared as usual.
|
||||
* Floating-point numbers are compared this way that NaNs always end up at the end
|
||||
@ -117,22 +123,69 @@ template <> inline UInt64 unionCastToUInt64(Float32 x)
|
||||
}
|
||||
|
||||
|
||||
/// PaddedPODArray extended by Decimal scale
|
||||
template <typename T, size_t INITIAL_SIZE = 4096>
|
||||
class DecPaddedPODArray : public PODArray<T, INITIAL_SIZE, Allocator<false>, sizeof(T)-1>
|
||||
{
|
||||
public:
|
||||
using Base = PODArray<T, INITIAL_SIZE, Allocator<false>, sizeof(T)-1>;
|
||||
using Base::operator[];
|
||||
|
||||
DecPaddedPODArray()
|
||||
{}
|
||||
|
||||
DecPaddedPODArray(size_t n)
|
||||
: Base(n)
|
||||
{}
|
||||
|
||||
DecPaddedPODArray(size_t n, const T & x)
|
||||
: Base(n, x)
|
||||
{}
|
||||
|
||||
DecPaddedPODArray(typename Base::const_iterator from_begin, typename Base::const_iterator from_end)
|
||||
: Base(from_begin, from_end)
|
||||
{}
|
||||
|
||||
DecPaddedPODArray(std::initializer_list<T> il)
|
||||
: DecPaddedPODArray(std::begin(il), std::end(il))
|
||||
{}
|
||||
|
||||
DecPaddedPODArray(DecPaddedPODArray && other)
|
||||
{
|
||||
this->swap(other);
|
||||
std::swap(scale, other.scale);
|
||||
}
|
||||
|
||||
DecPaddedPODArray & operator= (DecPaddedPODArray && other)
|
||||
{
|
||||
this->swap(other);
|
||||
std::swap(scale, other.scale);
|
||||
return *this;
|
||||
}
|
||||
|
||||
void setScale(UInt32 s) { scale = s; }
|
||||
UInt32 getScale() const { return scale; }
|
||||
|
||||
private:
|
||||
UInt32 scale = std::numeric_limits<UInt32>::max();
|
||||
};
|
||||
|
||||
|
||||
/** A template for columns that use a simple array to store.
|
||||
*/
|
||||
*/
|
||||
template <typename T>
|
||||
class ColumnVector final : public COWPtrHelper<IColumn, ColumnVector<T>>
|
||||
{
|
||||
private:
|
||||
friend class COWPtrHelper<IColumn, ColumnVector<T>>;
|
||||
|
||||
using Self = ColumnVector<T>;
|
||||
friend class COWPtrHelper<IColumn, Self>;
|
||||
|
||||
struct less;
|
||||
struct greater;
|
||||
|
||||
public:
|
||||
using value_type = T;
|
||||
using Container = PaddedPODArray<value_type>;
|
||||
using Container = std::conditional_t<decTrait<T>(), DecPaddedPODArray<value_type>, PaddedPODArray<value_type>>;
|
||||
|
||||
private:
|
||||
ColumnVector() {}
|
||||
@ -216,12 +269,20 @@ public:
|
||||
|
||||
Field operator[](size_t n) const override
|
||||
{
|
||||
return typename NearestFieldType<T>::Type(data[n]);
|
||||
if constexpr (decTrait<T>())
|
||||
{
|
||||
UInt32 scale = data.getScale();
|
||||
if (scale == std::numeric_limits<UInt32>::max())
|
||||
throw Exception("Extracting Decimal field with unknown scale. Scale is lost.", ErrorCodes::LOGICAL_ERROR);
|
||||
return DecField(data[n], scale);
|
||||
}
|
||||
else
|
||||
return typename NearestFieldType<T>::Type(data[n]);
|
||||
}
|
||||
|
||||
void get(size_t n, Field & res) const override
|
||||
{
|
||||
res = typename NearestFieldType<T>::Type(data[n]);
|
||||
res = (*this)[n];
|
||||
}
|
||||
|
||||
UInt64 get64(size_t n) const override;
|
||||
@ -252,6 +313,11 @@ public:
|
||||
|
||||
ColumnPtr permute(const IColumn::Permutation & perm, size_t limit) const override;
|
||||
|
||||
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
|
||||
|
||||
template <typename Type>
|
||||
ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
|
||||
|
||||
ColumnPtr replicate(const IColumn::Offsets & offsets) const override;
|
||||
|
||||
void getExtremes(Field & min, Field & max) const override;
|
||||
@ -295,5 +361,23 @@ protected:
|
||||
Container data;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
template <typename Type>
|
||||
ColumnPtr ColumnVector<T>::indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const
|
||||
{
|
||||
size_t size = indexes.size();
|
||||
|
||||
if (limit == 0)
|
||||
limit = size;
|
||||
else
|
||||
limit = std::min(size, limit);
|
||||
|
||||
auto res = this->create(limit);
|
||||
typename Self::Container & res_data = res->getData();
|
||||
for (size_t i = 0; i < limit; ++i)
|
||||
res_data[i] = data[indexes[i]];
|
||||
|
||||
return std::move(res);
|
||||
}
|
||||
|
||||
}
|
||||
|
619
dbms/src/Columns/ColumnWithDictionary.cpp
Normal file
619
dbms/src/Columns/ColumnWithDictionary.cpp
Normal file
@ -0,0 +1,619 @@
|
||||
#include <Columns/ColumnWithDictionary.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <DataStreams/ColumnGathererStream.h>
|
||||
#include <DataTypes/NumberTraits.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
template <typename T>
|
||||
PaddedPODArray<T> * getIndexesData(IColumn & indexes)
|
||||
{
|
||||
auto * column = typeid_cast<ColumnVector<T> *>(&indexes);
|
||||
if (column)
|
||||
return &column->getData();
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
MutableColumnPtr mapUniqueIndexImplRef(PaddedPODArray<T> & index)
|
||||
{
|
||||
PaddedPODArray<T> copy(index.cbegin(), index.cend());
|
||||
|
||||
HashMap<T, T> hash_map;
|
||||
for (auto val : index)
|
||||
hash_map.insert({val, hash_map.size()});
|
||||
|
||||
auto res_col = ColumnVector<T>::create();
|
||||
auto & data = res_col->getData();
|
||||
|
||||
data.resize(hash_map.size());
|
||||
for (auto val : hash_map)
|
||||
data[val.second] = val.first;
|
||||
|
||||
for (auto & ind : index)
|
||||
ind = hash_map[ind];
|
||||
|
||||
for (size_t i = 0; i < index.size(); ++i)
|
||||
if (data[index[i]] != copy[i])
|
||||
throw Exception("Expected " + toString(data[index[i]]) + ", but got " + toString(copy[i]), ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
return std::move(res_col);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
MutableColumnPtr mapUniqueIndexImpl(PaddedPODArray<T> & index)
|
||||
{
|
||||
if (index.empty())
|
||||
return ColumnVector<T>::create();
|
||||
|
||||
auto size = index.size();
|
||||
|
||||
T max_val = index[0];
|
||||
for (size_t i = 1; i < size; ++i)
|
||||
max_val = std::max(max_val, index[i]);
|
||||
|
||||
/// May happen when dictionary is shared.
|
||||
if (max_val > size)
|
||||
return mapUniqueIndexImplRef(index);
|
||||
|
||||
auto map_size = UInt64(max_val) + 1;
|
||||
PaddedPODArray<T> map(map_size, 0);
|
||||
T zero_pos_value = index[0];
|
||||
index[0] = 0;
|
||||
T cur_pos = 0;
|
||||
for (size_t i = 1; i < size; ++i)
|
||||
{
|
||||
T val = index[i];
|
||||
if (val != zero_pos_value && map[val] == 0)
|
||||
{
|
||||
++cur_pos;
|
||||
map[val] = cur_pos;
|
||||
}
|
||||
|
||||
index[i] = map[val];
|
||||
}
|
||||
|
||||
auto res_col = ColumnVector<T>::create(UInt64(cur_pos) + 1);
|
||||
auto & data = res_col->getData();
|
||||
data[0] = zero_pos_value;
|
||||
for (size_t i = 0; i < map_size; ++i)
|
||||
{
|
||||
auto val = map[i];
|
||||
if (val)
|
||||
data[val] = static_cast<T>(i);
|
||||
}
|
||||
|
||||
return std::move(res_col);
|
||||
}
|
||||
|
||||
/// Returns unique values of column. Write new index to column.
|
||||
MutableColumnPtr mapUniqueIndex(IColumn & column)
|
||||
{
|
||||
if (auto * data_uint8 = getIndexesData<UInt8>(column))
|
||||
return mapUniqueIndexImpl(*data_uint8);
|
||||
else if (auto * data_uint16 = getIndexesData<UInt16>(column))
|
||||
return mapUniqueIndexImpl(*data_uint16);
|
||||
else if (auto * data_uint32 = getIndexesData<UInt32>(column))
|
||||
return mapUniqueIndexImpl(*data_uint32);
|
||||
else if (auto * data_uint64 = getIndexesData<UInt64>(column))
|
||||
return mapUniqueIndexImpl(*data_uint64);
|
||||
else
|
||||
throw Exception("Indexes column for getUniqueIndex must be ColumnUInt, got" + column.getName(),
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
ColumnWithDictionary::ColumnWithDictionary(MutableColumnPtr && column_unique_, MutableColumnPtr && indexes_)
|
||||
: dictionary(std::move(column_unique_)), idx(std::move(indexes_))
|
||||
{
|
||||
idx.check(getDictionary().size());
|
||||
}
|
||||
|
||||
void ColumnWithDictionary::insert(const Field & x)
|
||||
{
|
||||
compactIfSharedDictionary();
|
||||
idx.insertPosition(dictionary.getColumnUnique().uniqueInsert(x));
|
||||
idx.check(getDictionary().size());
|
||||
}
|
||||
|
||||
void ColumnWithDictionary::insertDefault()
|
||||
{
|
||||
idx.insertPosition(getDictionary().getDefaultValueIndex());
|
||||
}
|
||||
|
||||
void ColumnWithDictionary::insertFrom(const IColumn & src, size_t n)
|
||||
{
|
||||
auto * src_with_dict = typeid_cast<const ColumnWithDictionary *>(&src);
|
||||
|
||||
if (!src_with_dict)
|
||||
throw Exception("Expected ColumnWithDictionary, got" + src.getName(), ErrorCodes::ILLEGAL_COLUMN);
|
||||
|
||||
size_t position = src_with_dict->getIndexes().getUInt(n);
|
||||
|
||||
if (&src_with_dict->getDictionary() == &getDictionary())
|
||||
{
|
||||
/// Dictionary is shared with src column. Insert only index.
|
||||
idx.insertPosition(position);
|
||||
}
|
||||
else
|
||||
{
|
||||
compactIfSharedDictionary();
|
||||
const auto & nested = *src_with_dict->getDictionary().getNestedColumn();
|
||||
idx.insertPosition(dictionary.getColumnUnique().uniqueInsertFrom(nested, position));
|
||||
}
|
||||
|
||||
idx.check(getDictionary().size());
|
||||
}
|
||||
|
||||
void ColumnWithDictionary::insertFromFullColumn(const IColumn & src, size_t n)
|
||||
{
|
||||
compactIfSharedDictionary();
|
||||
idx.insertPosition(dictionary.getColumnUnique().uniqueInsertFrom(src, n));
|
||||
idx.check(getDictionary().size());
|
||||
}
|
||||
|
||||
void ColumnWithDictionary::insertRangeFrom(const IColumn & src, size_t start, size_t length)
|
||||
{
|
||||
auto * src_with_dict = typeid_cast<const ColumnWithDictionary *>(&src);
|
||||
|
||||
if (!src_with_dict)
|
||||
throw Exception("Expected ColumnWithDictionary, got" + src.getName(), ErrorCodes::ILLEGAL_COLUMN);
|
||||
|
||||
if (&src_with_dict->getDictionary() == &getDictionary())
|
||||
{
|
||||
/// Dictionary is shared with src column. Insert only indexes.
|
||||
idx.insertPositionsRange(src_with_dict->getIndexes(), start, length);
|
||||
}
|
||||
else
|
||||
{
|
||||
compactIfSharedDictionary();
|
||||
|
||||
/// TODO: Support native insertion from other unique column. It will help to avoid null map creation.
|
||||
|
||||
auto sub_idx = (*src_with_dict->getIndexes().cut(start, length)).mutate();
|
||||
auto idx_map = mapUniqueIndex(*sub_idx);
|
||||
|
||||
auto src_nested = src_with_dict->getDictionary().getNestedColumn();
|
||||
auto used_keys = src_nested->index(*idx_map, 0);
|
||||
|
||||
auto inserted_indexes = dictionary.getColumnUnique().uniqueInsertRangeFrom(*used_keys, 0, used_keys->size());
|
||||
idx.insertPositionsRange(*inserted_indexes->index(*sub_idx, 0), 0, length);
|
||||
}
|
||||
idx.check(getDictionary().size());
|
||||
}
|
||||
|
||||
void ColumnWithDictionary::insertRangeFromFullColumn(const IColumn & src, size_t start, size_t length)
|
||||
{
|
||||
compactIfSharedDictionary();
|
||||
auto inserted_indexes = dictionary.getColumnUnique().uniqueInsertRangeFrom(src, start, length);
|
||||
idx.insertPositionsRange(*inserted_indexes, 0, length);
|
||||
idx.check(getDictionary().size());
|
||||
}
|
||||
|
||||
void ColumnWithDictionary::insertRangeFromDictionaryEncodedColumn(const IColumn & keys, const IColumn & positions)
|
||||
{
|
||||
Index(positions.getPtr()).check(keys.size());
|
||||
compactIfSharedDictionary();
|
||||
auto inserted_indexes = dictionary.getColumnUnique().uniqueInsertRangeFrom(keys, 0, keys.size());
|
||||
idx.insertPositionsRange(*inserted_indexes->index(positions, 0), 0, positions.size());
|
||||
idx.check(getDictionary().size());
|
||||
}
|
||||
|
||||
void ColumnWithDictionary::insertData(const char * pos, size_t length)
|
||||
{
|
||||
compactIfSharedDictionary();
|
||||
idx.insertPosition(dictionary.getColumnUnique().uniqueInsertData(pos, length));
|
||||
idx.check(getDictionary().size());
|
||||
}
|
||||
|
||||
void ColumnWithDictionary::insertDataWithTerminatingZero(const char * pos, size_t length)
|
||||
{
|
||||
compactIfSharedDictionary();
|
||||
idx.insertPosition(dictionary.getColumnUnique().uniqueInsertDataWithTerminatingZero(pos, length));
|
||||
idx.check(getDictionary().size());
|
||||
}
|
||||
|
||||
StringRef ColumnWithDictionary::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
|
||||
{
|
||||
return getDictionary().serializeValueIntoArena(getIndexes().getUInt(n), arena, begin);
|
||||
}
|
||||
|
||||
const char * ColumnWithDictionary::deserializeAndInsertFromArena(const char * pos)
|
||||
{
|
||||
compactIfSharedDictionary();
|
||||
|
||||
const char * new_pos;
|
||||
idx.insertPosition(dictionary.getColumnUnique().uniqueDeserializeAndInsertFromArena(pos, new_pos));
|
||||
|
||||
idx.check(getDictionary().size());
|
||||
return new_pos;
|
||||
}
|
||||
|
||||
void ColumnWithDictionary::gather(ColumnGathererStream & gatherer)
|
||||
{
|
||||
gatherer.gather(*this);
|
||||
}
|
||||
|
||||
MutableColumnPtr ColumnWithDictionary::cloneResized(size_t size) const
|
||||
{
|
||||
auto unique_ptr = dictionary.getColumnUniquePtr();
|
||||
return ColumnWithDictionary::create((*std::move(unique_ptr)).mutate(), getIndexes().cloneResized(size));
|
||||
}
|
||||
|
||||
int ColumnWithDictionary::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
|
||||
{
|
||||
const auto & column_with_dictionary = static_cast<const ColumnWithDictionary &>(rhs);
|
||||
size_t n_index = getIndexes().getUInt(n);
|
||||
size_t m_index = column_with_dictionary.getIndexes().getUInt(m);
|
||||
return getDictionary().compareAt(n_index, m_index, column_with_dictionary.getDictionary(), nan_direction_hint);
|
||||
}
|
||||
|
||||
void ColumnWithDictionary::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
|
||||
{
|
||||
if (limit == 0)
|
||||
limit = size();
|
||||
|
||||
size_t unique_limit = std::min(limit, getDictionary().size());
|
||||
Permutation unique_perm;
|
||||
getDictionary().getNestedColumn()->getPermutation(reverse, unique_limit, nan_direction_hint, unique_perm);
|
||||
|
||||
/// TODO: optimize with sse.
|
||||
|
||||
/// Get indexes per row in column_unique.
|
||||
std::vector<std::vector<size_t>> indexes_per_row(getDictionary().size());
|
||||
size_t indexes_size = getIndexes().size();
|
||||
for (size_t row = 0; row < indexes_size; ++row)
|
||||
indexes_per_row[getIndexes().getUInt(row)].push_back(row);
|
||||
|
||||
/// Replicate permutation.
|
||||
size_t perm_size = std::min(indexes_size, limit);
|
||||
res.resize(perm_size);
|
||||
size_t perm_index = 0;
|
||||
for (size_t row = 0; row < indexes_size && perm_index < perm_size; ++row)
|
||||
{
|
||||
const auto & row_indexes = indexes_per_row[unique_perm[row]];
|
||||
for (auto row_index : row_indexes)
|
||||
{
|
||||
res[perm_index] = row_index;
|
||||
++perm_index;
|
||||
|
||||
if (perm_index == perm_size)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<MutableColumnPtr> ColumnWithDictionary::scatter(ColumnIndex num_columns, const Selector & selector) const
|
||||
{
|
||||
auto columns = getIndexes().scatter(num_columns, selector);
|
||||
for (auto & column : columns)
|
||||
{
|
||||
auto unique_ptr = dictionary.getColumnUniquePtr();
|
||||
column = ColumnWithDictionary::create((*std::move(unique_ptr)).mutate(), std::move(column));
|
||||
}
|
||||
|
||||
return columns;
|
||||
}
|
||||
|
||||
void ColumnWithDictionary::setSharedDictionary(const ColumnPtr & column_unique)
|
||||
{
|
||||
if (!empty())
|
||||
throw Exception("Can't set ColumnUnique for ColumnWithDictionary because is't not empty.",
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
dictionary.setShared(column_unique);
|
||||
}
|
||||
|
||||
ColumnWithDictionary::MutablePtr ColumnWithDictionary::compact()
|
||||
{
|
||||
auto positions = idx.getPositions();
|
||||
/// Create column with new indexes and old dictionary.
|
||||
auto column = ColumnWithDictionary::create(getDictionary().assumeMutable(), (*std::move(positions)).mutate());
|
||||
/// Will create new dictionary.
|
||||
column->compactInplace();
|
||||
|
||||
return column;
|
||||
}
|
||||
|
||||
ColumnWithDictionary::MutablePtr ColumnWithDictionary::cutAndCompact(size_t start, size_t length) const
|
||||
{
|
||||
auto sub_positions = (*idx.getPositions()->cut(start, length)).mutate();
|
||||
/// Create column with new indexes and old dictionary.
|
||||
auto column = ColumnWithDictionary::create(getDictionary().assumeMutable(), std::move(sub_positions));
|
||||
/// Will create new dictionary.
|
||||
column->compactInplace();
|
||||
|
||||
return column;
|
||||
}
|
||||
|
||||
void ColumnWithDictionary::compactInplace()
|
||||
{
|
||||
auto positions = idx.detachPositions();
|
||||
dictionary.compact(positions);
|
||||
idx.attachPositions(std::move(positions));
|
||||
}
|
||||
|
||||
void ColumnWithDictionary::compactIfSharedDictionary()
|
||||
{
|
||||
if (dictionary.isShared())
|
||||
compactInplace();
|
||||
}
|
||||
|
||||
|
||||
ColumnWithDictionary::DictionaryEncodedColumn
|
||||
ColumnWithDictionary::getMinimalDictionaryEncodedColumn(size_t offset, size_t limit) const
|
||||
{
|
||||
MutableColumnPtr sub_indexes = (*std::move(idx.getPositions()->cut(offset, limit))).mutate();
|
||||
auto indexes_map = mapUniqueIndex(*sub_indexes);
|
||||
auto sub_keys = getDictionary().getNestedColumn()->index(*indexes_map, 0);
|
||||
|
||||
return {std::move(sub_keys), std::move(sub_indexes)};
|
||||
}
|
||||
|
||||
|
||||
ColumnWithDictionary::Index::Index() : positions(ColumnUInt8::create()), size_of_type(sizeof(UInt8)) {}
|
||||
|
||||
ColumnWithDictionary::Index::Index(MutableColumnPtr && positions) : positions(std::move(positions))
|
||||
{
|
||||
updateSizeOfType();
|
||||
}
|
||||
|
||||
ColumnWithDictionary::Index::Index(ColumnPtr positions) : positions(std::move(positions))
|
||||
{
|
||||
updateSizeOfType();
|
||||
}
|
||||
|
||||
template <typename Callback>
|
||||
void ColumnWithDictionary::Index::callForType(Callback && callback, size_t size_of_type)
|
||||
{
|
||||
switch (size_of_type)
|
||||
{
|
||||
case sizeof(UInt8): { callback(UInt8()); break; }
|
||||
case sizeof(UInt16): { callback(UInt16()); break; }
|
||||
case sizeof(UInt32): { callback(UInt32()); break; }
|
||||
case sizeof(UInt64): { callback(UInt64()); break; }
|
||||
default: {
|
||||
throw Exception("Unexpected size of index type for ColumnWithDictionary: " + toString(size_of_type),
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t ColumnWithDictionary::Index::getSizeOfIndexType(const IColumn & column, size_t hint)
|
||||
{
|
||||
auto checkFor = [&](auto type) { return typeid_cast<const ColumnVector<decltype(type)> *>(&column) != nullptr; };
|
||||
auto tryGetSizeFor = [&](auto type) -> size_t { return checkFor(type) ? sizeof(decltype(type)) : 0; };
|
||||
|
||||
if (hint)
|
||||
{
|
||||
size_t size = 0;
|
||||
callForType([&](auto type) { size = tryGetSizeFor(type); }, hint);
|
||||
|
||||
if (size)
|
||||
return size;
|
||||
}
|
||||
|
||||
if (auto size = tryGetSizeFor(UInt8()))
|
||||
return size;
|
||||
if (auto size = tryGetSizeFor(UInt16()))
|
||||
return size;
|
||||
if (auto size = tryGetSizeFor(UInt32()))
|
||||
return size;
|
||||
if (auto size = tryGetSizeFor(UInt64()))
|
||||
return size;
|
||||
|
||||
throw Exception("Unexpected indexes type for ColumnWithDictionary. Expected UInt, got " + column.getName(),
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
|
||||
void ColumnWithDictionary::Index::attachPositions(ColumnPtr positions_)
|
||||
{
|
||||
positions = std::move(positions_);
|
||||
updateSizeOfType();
|
||||
}
|
||||
|
||||
template <typename IndexType>
|
||||
typename ColumnVector<IndexType>::Container & ColumnWithDictionary::Index::getPositionsData()
|
||||
{
|
||||
auto * positions_ptr = typeid_cast<ColumnVector<IndexType> *>(positions->assumeMutable().get());
|
||||
if (!positions_ptr)
|
||||
throw Exception("Invalid indexes type for ColumnWithDictionary."
|
||||
" Expected UInt" + toString(8 * sizeof(IndexType)) + ", got " + positions->getName(),
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
return positions_ptr->getData();
|
||||
}
|
||||
|
||||
template <typename IndexType>
|
||||
void ColumnWithDictionary::Index::convertPositions()
|
||||
{
|
||||
auto convert = [&](auto x)
|
||||
{
|
||||
using CurIndexType = decltype(x);
|
||||
auto & data = getPositionsData<CurIndexType>();
|
||||
|
||||
if (sizeof(CurIndexType) > sizeof(IndexType))
|
||||
throw Exception("Converting indexes to smaller type: from " + toString(sizeof(CurIndexType)) +
|
||||
" to " + toString(sizeof(IndexType)), ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
if (sizeof(CurIndexType) != sizeof(IndexType))
|
||||
{
|
||||
size_t size = data.size();
|
||||
auto new_positions = ColumnVector<IndexType>::create(size);
|
||||
auto & new_data = new_positions->getData();
|
||||
|
||||
/// TODO: Optimize with SSE?
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
new_data[i] = data[i];
|
||||
|
||||
positions = std::move(new_positions);
|
||||
size_of_type = sizeof(IndexType);
|
||||
}
|
||||
};
|
||||
|
||||
callForType(std::move(convert), size_of_type);
|
||||
|
||||
checkSizeOfType();
|
||||
}
|
||||
|
||||
void ColumnWithDictionary::Index::expandType()
|
||||
{
|
||||
auto expand = [&](auto type)
|
||||
{
|
||||
using CurIndexType = decltype(type);
|
||||
constexpr auto next_size = NumberTraits::nextSize(sizeof(CurIndexType));
|
||||
if (next_size == sizeof(CurIndexType))
|
||||
throw Exception("Can't expand indexes type for ColumnWithDictionary from type: "
|
||||
+ demangle(typeid(CurIndexType).name()), ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
using NewIndexType = typename NumberTraits::Construct<false, false, next_size>::Type;
|
||||
convertPositions<NewIndexType>();
|
||||
};
|
||||
|
||||
callForType(std::move(expand), size_of_type);
|
||||
}
|
||||
|
||||
UInt64 ColumnWithDictionary::Index::getMaxPositionForCurrentType() const
|
||||
{
|
||||
UInt64 value = 0;
|
||||
callForType([&](auto type) { value = std::numeric_limits<decltype(type)>::max(); }, size_of_type);
|
||||
return value;
|
||||
}
|
||||
|
||||
void ColumnWithDictionary::Index::insertPosition(UInt64 position)
|
||||
{
|
||||
while (position > getMaxPositionForCurrentType())
|
||||
expandType();
|
||||
|
||||
positions->assumeMutableRef().insert(UInt64(position));
|
||||
checkSizeOfType();
|
||||
}
|
||||
|
||||
void ColumnWithDictionary::Index::insertPositionsRange(const IColumn & column, size_t offset, size_t limit)
|
||||
{
|
||||
auto insertForType = [&](auto type)
|
||||
{
|
||||
using ColumnType = decltype(type);
|
||||
const auto * column_ptr = typeid_cast<const ColumnVector<ColumnType> *>(&column);
|
||||
|
||||
if (!column_ptr)
|
||||
return false;
|
||||
|
||||
if (size_of_type < sizeof(ColumnType))
|
||||
convertPositions<ColumnType>();
|
||||
|
||||
if (size_of_type == sizeof(ColumnType))
|
||||
positions->assumeMutableRef().insertRangeFrom(column, offset, limit);
|
||||
else
|
||||
{
|
||||
auto copy = [&](auto cur_type)
|
||||
{
|
||||
using CurIndexType = decltype(cur_type);
|
||||
auto & positions_data = getPositionsData<CurIndexType>();
|
||||
const auto & column_data = column_ptr->getData();
|
||||
|
||||
size_t size = positions_data.size();
|
||||
positions_data.resize(size + limit);
|
||||
|
||||
for (size_t i = 0; i < limit; ++i)
|
||||
positions_data[size + i] = column_data[offset + i];
|
||||
};
|
||||
|
||||
callForType(std::move(copy), size_of_type);
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
if (!insertForType(UInt8()) &&
|
||||
!insertForType(UInt16()) &&
|
||||
!insertForType(UInt32()) &&
|
||||
!insertForType(UInt64()))
|
||||
throw Exception("Invalid column for ColumnWithDictionary index. Expected UInt, got " + column.getName(),
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
|
||||
checkSizeOfType();
|
||||
}
|
||||
|
||||
void ColumnWithDictionary::Index::check(size_t /*max_dictionary_size*/)
|
||||
{
|
||||
/// TODO: remove
|
||||
/*
|
||||
auto check = [&](auto cur_type)
|
||||
{
|
||||
using CurIndexType = decltype(cur_type);
|
||||
auto & positions_data = getPositionsData<CurIndexType>();
|
||||
|
||||
for (size_t i = 0; i < positions_data.size(); ++i)
|
||||
{
|
||||
if (positions_data[i] >= max_dictionary_size)
|
||||
{
|
||||
throw Exception("Found index " + toString(positions_data[i]) + " at position " + toString(i)
|
||||
+ " which is grated or equal than dictionary size " + toString(max_dictionary_size),
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
callForType(std::move(check), size_of_type);
|
||||
*/
|
||||
}
|
||||
|
||||
void ColumnWithDictionary::Index::checkSizeOfType()
|
||||
{
|
||||
if (size_of_type != getSizeOfIndexType(*positions, size_of_type))
|
||||
throw Exception("Invalid size of type. Expected " + toString(8 * size_of_type) +
|
||||
", but positions are " + positions->getName(), ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
|
||||
ColumnWithDictionary::Dictionary::Dictionary(MutableColumnPtr && column_unique_)
|
||||
: column_unique(std::move(column_unique_))
|
||||
{
|
||||
checkColumn(*column_unique);
|
||||
}
|
||||
ColumnWithDictionary::Dictionary::Dictionary(ColumnPtr column_unique_)
|
||||
: column_unique(std::move(column_unique_))
|
||||
{
|
||||
checkColumn(*column_unique);
|
||||
}
|
||||
|
||||
void ColumnWithDictionary::Dictionary::checkColumn(const IColumn & column)
|
||||
{
|
||||
|
||||
if (!dynamic_cast<const IColumnUnique *>(&column))
|
||||
throw Exception("ColumnUnique expected as an argument of ColumnWithDictionary.", ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
|
||||
void ColumnWithDictionary::Dictionary::setShared(const ColumnPtr & dictionary)
|
||||
{
|
||||
checkColumn(*dictionary);
|
||||
|
||||
column_unique = dictionary;
|
||||
shared = true;
|
||||
}
|
||||
|
||||
void ColumnWithDictionary::Dictionary::compact(ColumnPtr & positions)
|
||||
{
|
||||
auto new_column_unique = column_unique->cloneEmpty();
|
||||
|
||||
auto & unique = getColumnUnique();
|
||||
auto & new_unique = static_cast<IColumnUnique &>(*new_column_unique);
|
||||
|
||||
auto indexes = mapUniqueIndex(positions->assumeMutableRef());
|
||||
auto sub_keys = unique.getNestedColumn()->index(*indexes, 0);
|
||||
auto new_indexes = new_unique.uniqueInsertRangeFrom(*sub_keys, 0, sub_keys->size());
|
||||
|
||||
positions = (*new_indexes->index(*positions, 0)).mutate();
|
||||
column_unique = std::move(new_column_unique);
|
||||
|
||||
shared = false;
|
||||
}
|
||||
|
||||
}
|
248
dbms/src/Columns/ColumnWithDictionary.h
Normal file
248
dbms/src/Columns/ColumnWithDictionary.h
Normal file
@ -0,0 +1,248 @@
|
||||
#pragma once
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Columns/IColumnUnique.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <AggregateFunctions/AggregateFunctionCount.h>
|
||||
#include "ColumnsNumber.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
class ColumnWithDictionary final : public COWPtrHelper<IColumn, ColumnWithDictionary>
|
||||
{
|
||||
friend class COWPtrHelper<IColumn, ColumnWithDictionary>;
|
||||
|
||||
ColumnWithDictionary(MutableColumnPtr && column_unique, MutableColumnPtr && indexes);
|
||||
ColumnWithDictionary(const ColumnWithDictionary & other) = default;
|
||||
|
||||
public:
|
||||
/** Create immutable column using immutable arguments. This arguments may be shared with other columns.
|
||||
* Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
|
||||
*/
|
||||
using Base = COWPtrHelper<IColumn, ColumnWithDictionary>;
|
||||
static Ptr create(const ColumnPtr & column_unique_, const ColumnPtr & indexes_)
|
||||
{
|
||||
return ColumnWithDictionary::create(column_unique_->assumeMutable(), indexes_->assumeMutable());
|
||||
}
|
||||
|
||||
template <typename ... Args, typename = typename std::enable_if<IsMutableColumns<Args ...>::value>::type>
|
||||
static MutablePtr create(Args &&... args) { return Base::create(std::forward<Args>(args)...); }
|
||||
|
||||
|
||||
std::string getName() const override { return "ColumnWithDictionary"; }
|
||||
const char * getFamilyName() const override { return "ColumnWithDictionary"; }
|
||||
|
||||
ColumnPtr convertToFullColumn() const { return getDictionary().getNestedColumn()->index(getIndexes(), 0); }
|
||||
ColumnPtr convertToFullColumnIfWithDictionary() const override { return convertToFullColumn(); }
|
||||
|
||||
MutableColumnPtr cloneResized(size_t size) const override;
|
||||
size_t size() const override { return getIndexes().size(); }
|
||||
|
||||
Field operator[](size_t n) const override { return getDictionary()[getIndexes().getUInt(n)]; }
|
||||
void get(size_t n, Field & res) const override { getDictionary().get(getIndexes().getUInt(n), res); }
|
||||
|
||||
StringRef getDataAt(size_t n) const override { return getDictionary().getDataAt(getIndexes().getUInt(n)); }
|
||||
StringRef getDataAtWithTerminatingZero(size_t n) const override
|
||||
{
|
||||
return getDictionary().getDataAtWithTerminatingZero(getIndexes().getUInt(n));
|
||||
}
|
||||
|
||||
UInt64 get64(size_t n) const override { return getDictionary().get64(getIndexes().getUInt(n)); }
|
||||
UInt64 getUInt(size_t n) const override { return getDictionary().getUInt(getIndexes().getUInt(n)); }
|
||||
Int64 getInt(size_t n) const override { return getDictionary().getInt(getIndexes().getUInt(n)); }
|
||||
bool isNullAt(size_t n) const override { return getDictionary().isNullAt(getIndexes().getUInt(n)); }
|
||||
ColumnPtr cut(size_t start, size_t length) const override
|
||||
{
|
||||
return ColumnWithDictionary::create(dictionary.getColumnUniquePtr(), getIndexes().cut(start, length));
|
||||
}
|
||||
|
||||
void insert(const Field & x) override;
|
||||
void insertDefault() override;
|
||||
|
||||
void insertFrom(const IColumn & src, size_t n) override;
|
||||
void insertFromFullColumn(const IColumn & src, size_t n);
|
||||
|
||||
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
|
||||
void insertRangeFromFullColumn(const IColumn & src, size_t start, size_t length);
|
||||
void insertRangeFromDictionaryEncodedColumn(const IColumn & keys, const IColumn & positions);
|
||||
|
||||
void insertData(const char * pos, size_t length) override;
|
||||
void insertDataWithTerminatingZero(const char * pos, size_t length) override;
|
||||
|
||||
|
||||
void popBack(size_t n) override { idx.popBack(n); }
|
||||
|
||||
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
|
||||
|
||||
const char * deserializeAndInsertFromArena(const char * pos) override;
|
||||
|
||||
void updateHashWithValue(size_t n, SipHash & hash) const override
|
||||
{
|
||||
return getDictionary().updateHashWithValue(getIndexes().getUInt(n), hash);
|
||||
}
|
||||
|
||||
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override
|
||||
{
|
||||
return ColumnWithDictionary::create(dictionary.getColumnUniquePtr(), getIndexes().filter(filt, result_size_hint));
|
||||
}
|
||||
|
||||
ColumnPtr permute(const Permutation & perm, size_t limit) const override
|
||||
{
|
||||
return ColumnWithDictionary::create(dictionary.getColumnUniquePtr(), getIndexes().permute(perm, limit));
|
||||
}
|
||||
|
||||
ColumnPtr index(const IColumn & indexes_, size_t limit) const override
|
||||
{
|
||||
return ColumnWithDictionary::create(dictionary.getColumnUniquePtr(), getIndexes().index(indexes_, limit));
|
||||
}
|
||||
|
||||
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
|
||||
|
||||
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
|
||||
|
||||
ColumnPtr replicate(const Offsets & offsets) const override
|
||||
{
|
||||
return ColumnWithDictionary::create(dictionary.getColumnUniquePtr(), getIndexes().replicate(offsets));
|
||||
}
|
||||
|
||||
std::vector<MutableColumnPtr> scatter(ColumnIndex num_columns, const Selector & selector) const override;
|
||||
|
||||
void gather(ColumnGathererStream & gatherer_stream) override ;
|
||||
void getExtremes(Field & min, Field & max) const override {
|
||||
return getDictionary().index(getIndexes(), 0)->getExtremes(min, max); /// TODO: optimize
|
||||
}
|
||||
|
||||
void reserve(size_t n) override { idx.reserve(n); }
|
||||
|
||||
size_t byteSize() const override { return idx.getPositions()->byteSize() + getDictionary().byteSize(); }
|
||||
size_t allocatedBytes() const override { return idx.getPositions()->allocatedBytes() + getDictionary().allocatedBytes(); }
|
||||
|
||||
void forEachSubcolumn(ColumnCallback callback) override
|
||||
{
|
||||
callback(idx.getPositionsPtr());
|
||||
|
||||
/// Column doesn't own dictionary if it's shared.
|
||||
if (!dictionary.isShared())
|
||||
callback(dictionary.getColumnUniquePtr());
|
||||
}
|
||||
|
||||
bool valuesHaveFixedSize() const override { return getDictionary().valuesHaveFixedSize(); }
|
||||
bool isFixedAndContiguous() const override { return getDictionary().isFixedAndContiguous(); }
|
||||
size_t sizeOfValueIfFixed() const override { return getDictionary().sizeOfValueIfFixed(); }
|
||||
bool isNumeric() const override { return getDictionary().isNumeric(); }
|
||||
bool withDictionary() const override { return true; }
|
||||
|
||||
const IColumnUnique & getDictionary() const { return dictionary.getColumnUnique(); }
|
||||
/// IColumnUnique & getUnique() { return static_cast<IColumnUnique &>(*column_unique->assumeMutable()); }
|
||||
/// ColumnPtr getUniquePtr() const { return column_unique; }
|
||||
|
||||
/// IColumn & getIndexes() { return idx.getPositions()->assumeMutableRef(); }
|
||||
const IColumn & getIndexes() const { return *idx.getPositions(); }
|
||||
const ColumnPtr & getIndexesPtr() const { return idx.getPositions(); }
|
||||
|
||||
///void setIndexes(MutableColumnPtr && indexes_) { indexes = std::move(indexes_); }
|
||||
|
||||
/// Set shared ColumnUnique for empty column with dictionary.
|
||||
void setSharedDictionary(const ColumnPtr & column_unique);
|
||||
|
||||
/// Create column new dictionary with only keys that are mentioned in index.
|
||||
MutablePtr compact();
|
||||
|
||||
/// Cut + compact.
|
||||
MutablePtr cutAndCompact(size_t start, size_t length) const;
|
||||
|
||||
struct DictionaryEncodedColumn
|
||||
{
|
||||
ColumnPtr dictionary;
|
||||
ColumnPtr indexes;
|
||||
};
|
||||
|
||||
DictionaryEncodedColumn getMinimalDictionaryEncodedColumn(size_t offset, size_t limit) const;
|
||||
|
||||
class Index
|
||||
{
|
||||
public:
|
||||
Index();
|
||||
Index(const Index & other) = default;
|
||||
explicit Index(MutableColumnPtr && positions);
|
||||
explicit Index(ColumnPtr positions);
|
||||
|
||||
const ColumnPtr & getPositions() const { return positions; }
|
||||
ColumnPtr & getPositionsPtr() { return positions; }
|
||||
void insertPosition(UInt64 position);
|
||||
void insertPositionsRange(const IColumn & column, size_t offset, size_t limit);
|
||||
|
||||
void popBack(size_t n) { positions->assumeMutableRef().popBack(n); }
|
||||
void reserve(size_t n) { positions->assumeMutableRef().reserve(n); }
|
||||
|
||||
UInt64 getMaxPositionForCurrentType() const;
|
||||
|
||||
static size_t getSizeOfIndexType(const IColumn & column, size_t hint);
|
||||
|
||||
void check(size_t max_dictionary_size);
|
||||
void checkSizeOfType();
|
||||
|
||||
ColumnPtr detachPositions() { return std::move(positions); }
|
||||
void attachPositions(ColumnPtr positions_);
|
||||
|
||||
private:
|
||||
ColumnPtr positions;
|
||||
size_t size_of_type = 0;
|
||||
|
||||
void updateSizeOfType() { size_of_type = getSizeOfIndexType(*positions, size_of_type); }
|
||||
void expandType();
|
||||
|
||||
template <typename IndexType>
|
||||
typename ColumnVector<IndexType>::Container & getPositionsData();
|
||||
|
||||
template <typename IndexType>
|
||||
void convertPositions();
|
||||
|
||||
template <typename Callback>
|
||||
static void callForType(Callback && callback, size_t size_of_type);
|
||||
};
|
||||
|
||||
private:
|
||||
class Dictionary
|
||||
{
|
||||
public:
|
||||
Dictionary(const Dictionary & other) = default;
|
||||
explicit Dictionary(MutableColumnPtr && column_unique);
|
||||
explicit Dictionary(ColumnPtr column_unique);
|
||||
|
||||
const ColumnPtr & getColumnUniquePtr() const { return column_unique; }
|
||||
ColumnPtr & getColumnUniquePtr() { return column_unique; }
|
||||
|
||||
const IColumnUnique & getColumnUnique() const { return static_cast<const IColumnUnique &>(*column_unique); }
|
||||
IColumnUnique & getColumnUnique() { return static_cast<IColumnUnique &>(column_unique->assumeMutableRef()); }
|
||||
|
||||
/// Dictionary may be shared for several mutable columns.
|
||||
/// Immutable columns may have the same column unique, which isn't necessarily shared dictionary.
|
||||
void setShared(const ColumnPtr & dictionary);
|
||||
bool isShared() const { return shared; }
|
||||
|
||||
/// Create new dictionary with only keys that are mentioned in positions.
|
||||
void compact(ColumnPtr & positions);
|
||||
|
||||
private:
|
||||
ColumnPtr column_unique;
|
||||
bool shared = false;
|
||||
|
||||
void checkColumn(const IColumn & column);
|
||||
};
|
||||
|
||||
Dictionary dictionary;
|
||||
Index idx;
|
||||
|
||||
void compactInplace();
|
||||
void compactIfSharedDictionary();
|
||||
};
|
||||
|
||||
|
||||
|
||||
}
|
@ -3,6 +3,10 @@
|
||||
#endif
|
||||
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -308,4 +312,22 @@ INSTANTIATE(Float64)
|
||||
|
||||
#undef INSTANTIATE
|
||||
|
||||
namespace detail
|
||||
{
|
||||
template <typename T>
|
||||
const PaddedPODArray<T> * getIndexesData(const IColumn & indexes)
|
||||
{
|
||||
auto * column = typeid_cast<const ColumnVector<T> *>(&indexes);
|
||||
if (column)
|
||||
return &column->getData();
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template const PaddedPODArray<UInt8> * getIndexesData<UInt8>(const IColumn & indexes);
|
||||
template const PaddedPODArray<UInt16> * getIndexesData<UInt16>(const IColumn & indexes);
|
||||
template const PaddedPODArray<UInt32> * getIndexesData<UInt32>(const IColumn & indexes);
|
||||
template const PaddedPODArray<UInt64> * getIndexesData<UInt64>(const IColumn & indexes);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -8,6 +8,11 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
/// Counts how many bytes of `filt` are greater than zero.
|
||||
size_t countBytesInFilter(const IColumn::Filter & filt);
|
||||
|
||||
@ -33,4 +38,38 @@ void filterArraysImplOnlyData(
|
||||
PaddedPODArray<T> & res_elems,
|
||||
const IColumn::Filter & filt, ssize_t result_size_hint);
|
||||
|
||||
namespace detail
|
||||
{
|
||||
template <typename T>
|
||||
const PaddedPODArray<T> * getIndexesData(const IColumn & indexes);
|
||||
}
|
||||
|
||||
/// Check limit <= indexes->size() and call column.indexImpl(const PaddedPodArray<Type> & indexes, size_t limit).
|
||||
template <typename Column>
|
||||
ColumnPtr selectIndexImpl(const Column & column, const IColumn & indexes, size_t limit)
|
||||
{
|
||||
if (limit == 0)
|
||||
limit = indexes.size();
|
||||
|
||||
if (indexes.size() < limit)
|
||||
throw Exception("Size of indexes is less than required.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
|
||||
|
||||
if (auto * data_uint8 = detail::getIndexesData<UInt8>(indexes))
|
||||
return column.template indexImpl<UInt8>(*data_uint8, limit);
|
||||
else if (auto * data_uint16 = detail::getIndexesData<UInt16>(indexes))
|
||||
return column.template indexImpl<UInt16>(*data_uint16, limit);
|
||||
else if (auto * data_uint32 = detail::getIndexesData<UInt32>(indexes))
|
||||
return column.template indexImpl<UInt32>(*data_uint32, limit);
|
||||
else if (auto * data_uint64 = detail::getIndexesData<UInt64>(indexes))
|
||||
return column.template indexImpl<UInt64>(*data_uint64, limit);
|
||||
else
|
||||
throw Exception("Indexes column for IColumn::select must be ColumnUInt, got" + indexes.getName(),
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
#define INSTANTIATE_INDEX_IMPL(Column) \
|
||||
template ColumnPtr Column::indexImpl<UInt8>(const PaddedPODArray<UInt8> & indexes, size_t limit) const; \
|
||||
template ColumnPtr Column::indexImpl<UInt16>(const PaddedPODArray<UInt16> & indexes, size_t limit) const; \
|
||||
template ColumnPtr Column::indexImpl<UInt32>(const PaddedPODArray<UInt32> & indexes, size_t limit) const; \
|
||||
template ColumnPtr Column::indexImpl<UInt64>(const PaddedPODArray<UInt64> & indexes, size_t limit) const;
|
||||
}
|
||||
|
@ -27,14 +27,14 @@ ConstantFilterDescription::ConstantFilterDescription(const IColumn & column)
|
||||
if (column.isColumnConst())
|
||||
{
|
||||
const ColumnConst & column_const = static_cast<const ColumnConst &>(column);
|
||||
const IColumn & column_nested = column_const.getDataColumn();
|
||||
ColumnPtr column_nested = column_const.getDataColumnPtr()->convertToFullColumnIfWithDictionary();
|
||||
|
||||
if (!typeid_cast<const ColumnUInt8 *>(&column_nested))
|
||||
if (!typeid_cast<const ColumnUInt8 *>(column_nested.get()))
|
||||
{
|
||||
const ColumnNullable * column_nested_nullable = typeid_cast<const ColumnNullable *>(&column_nested);
|
||||
const ColumnNullable * column_nested_nullable = typeid_cast<const ColumnNullable *>(column_nested.get());
|
||||
if (!column_nested_nullable || !typeid_cast<const ColumnUInt8 *>(&column_nested_nullable->getNestedColumn()))
|
||||
{
|
||||
throw Exception("Illegal type " + column_nested.getName() + " of column for constant filter. Must be UInt8 or Nullable(UInt8).",
|
||||
throw Exception("Illegal type " + column_nested->getName() + " of column for constant filter. Must be UInt8 or Nullable(UInt8).",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER);
|
||||
}
|
||||
}
|
||||
@ -48,8 +48,13 @@ ConstantFilterDescription::ConstantFilterDescription(const IColumn & column)
|
||||
}
|
||||
|
||||
|
||||
FilterDescription::FilterDescription(const IColumn & column)
|
||||
FilterDescription::FilterDescription(const IColumn & column_)
|
||||
{
|
||||
if (column_.withDictionary())
|
||||
data_holder = column_.convertToFullColumnIfWithDictionary();
|
||||
|
||||
const auto & column = data_holder ? *data_holder : column_;
|
||||
|
||||
if (const ColumnUInt8 * concrete_column = typeid_cast<const ColumnUInt8 *>(&column))
|
||||
{
|
||||
data = &concrete_column->getData();
|
||||
|
@ -47,6 +47,10 @@ public:
|
||||
*/
|
||||
virtual Ptr convertToFullColumnIfConst() const { return {}; }
|
||||
|
||||
/// If column isn't ColumnWithDictionary, return itself.
|
||||
/// If column is ColumnWithDictionary, transforms is to full column.
|
||||
virtual Ptr convertToFullColumnIfWithDictionary() const { return getPtr(); }
|
||||
|
||||
/// Creates empty column with the same type.
|
||||
virtual MutablePtr cloneEmpty() const { return cloneResized(0); }
|
||||
|
||||
@ -188,6 +192,10 @@ public:
|
||||
using Permutation = PaddedPODArray<size_t>;
|
||||
virtual Ptr permute(const Permutation & perm, size_t limit) const = 0;
|
||||
|
||||
/// Creates new column with values column[indexes[:limit]]. If limit is 0, all indexes are used.
|
||||
/// Indexes must be one of the ColumnUInt. For default implementation, see selectIndexImpl from ColumnsCommon.h
|
||||
virtual Ptr index(const IColumn & indexes, size_t limit) const = 0;
|
||||
|
||||
/** Compares (*this)[n] and rhs[m].
|
||||
* Returns negative number, 0, or positive number (*this)[n] is less, equal, greater than rhs[m] respectively.
|
||||
* Is used in sortings.
|
||||
@ -325,6 +333,8 @@ public:
|
||||
/// Can be inside ColumnNullable.
|
||||
virtual bool canBeInsideNullable() const { return false; }
|
||||
|
||||
virtual bool withDictionary() const { return false; }
|
||||
|
||||
|
||||
virtual ~IColumn() {}
|
||||
|
||||
|
@ -87,6 +87,14 @@ public:
|
||||
return cloneDummy(limit ? std::min(s, limit) : s);
|
||||
}
|
||||
|
||||
ColumnPtr index(const IColumn & indexes, size_t limit) const override
|
||||
{
|
||||
if (indexes.size() < limit)
|
||||
throw Exception("Size of indexes is less than required.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
|
||||
|
||||
return cloneDummy(limit ? limit : s);
|
||||
}
|
||||
|
||||
void getPermutation(bool /*reverse*/, size_t /*limit*/, int /*nan_direction_hint*/, Permutation & res) const override
|
||||
{
|
||||
res.resize(s);
|
||||
|
129
dbms/src/Columns/IColumnUnique.h
Normal file
129
dbms/src/Columns/IColumnUnique.h
Normal file
@ -0,0 +1,129 @@
|
||||
#pragma once
|
||||
#include <Columns/IColumn.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class IColumnUnique : public IColumn
|
||||
{
|
||||
public:
|
||||
using ColumnUniquePtr = IColumn::template immutable_ptr<IColumnUnique>;
|
||||
using MutableColumnUniquePtr = IColumn::template mutable_ptr<IColumnUnique>;
|
||||
|
||||
/// Column always contains Null if it's Nullable and empty string if it's String or Nullable(String).
|
||||
/// So, size may be greater than the number of inserted unique values.
|
||||
virtual const ColumnPtr & getNestedColumn() const = 0;
|
||||
/// The same as getNestedColumn, but removes null map if nested column is nullable.
|
||||
virtual const ColumnPtr & getNestedNotNullableColumn() const = 0;
|
||||
|
||||
size_t size() const override { return getNestedColumn()->size(); }
|
||||
|
||||
/// Appends new value at the end of column (column's size is increased by 1).
|
||||
/// Is used to transform raw strings to Blocks (for example, inside input format parsers)
|
||||
virtual size_t uniqueInsert(const Field & x) = 0;
|
||||
|
||||
virtual size_t uniqueInsertFrom(const IColumn & src, size_t n) = 0;
|
||||
/// Appends range of elements from other column.
|
||||
/// Could be used to concatenate columns.
|
||||
virtual MutableColumnPtr uniqueInsertRangeFrom(const IColumn & src, size_t start, size_t length) = 0;
|
||||
|
||||
struct IndexesWithOverflow
|
||||
{
|
||||
MutableColumnPtr indexes;
|
||||
MutableColumnPtr overflowed_keys;
|
||||
};
|
||||
/// Like uniqueInsertRangeFrom, but doesn't insert keys if inner dictionary has more than max_dictionary_size keys.
|
||||
/// Keys that won't be inserted into dictionary will be into overflowed_keys, indexes will be calculated for
|
||||
/// concatenation of nested column (which can be got from getNestedColumn() function) and overflowed_keys.
|
||||
virtual IndexesWithOverflow uniqueInsertRangeWithOverflow(const IColumn & src, size_t start,
|
||||
size_t length, size_t max_dictionary_size) = 0;
|
||||
|
||||
/// Appends data located in specified memory chunk if it is possible (throws an exception if it cannot be implemented).
|
||||
/// Is used to optimize some computations (in aggregation, for example).
|
||||
/// Parameter length could be ignored if column values have fixed size.
|
||||
virtual size_t uniqueInsertData(const char * pos, size_t length) = 0;
|
||||
virtual size_t uniqueInsertDataWithTerminatingZero(const char * pos, size_t length) = 0;
|
||||
|
||||
virtual size_t getDefaultValueIndex() const = 0;
|
||||
virtual size_t getNullValueIndex() const = 0;
|
||||
virtual bool canContainNulls() const = 0;
|
||||
|
||||
virtual size_t uniqueDeserializeAndInsertFromArena(const char * pos, const char *& new_pos) = 0;
|
||||
|
||||
const char * getFamilyName() const override { return "ColumnUnique"; }
|
||||
|
||||
void insert(const Field &) override
|
||||
{
|
||||
throw Exception("Method insert is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
void insertRangeFrom(const IColumn &, size_t, size_t) override
|
||||
{
|
||||
throw Exception("Method insertRangeFrom is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
void insertData(const char *, size_t) override
|
||||
{
|
||||
throw Exception("Method insertData is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
void insertDefault() override
|
||||
{
|
||||
throw Exception("Method insertDefault is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
void popBack(size_t) override
|
||||
{
|
||||
throw Exception("Method popBack is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
void gather(ColumnGathererStream &) override
|
||||
{
|
||||
throw Exception("Method gather is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
const char * deserializeAndInsertFromArena(const char *) override
|
||||
{
|
||||
throw Exception("Method deserializeAndInsertFromArena is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
ColumnPtr index(const IColumn &, size_t) const override
|
||||
{
|
||||
throw Exception("Method index is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
ColumnPtr cut(size_t, size_t) const override
|
||||
{
|
||||
throw Exception("Method cut is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
ColumnPtr filter(const IColumn::Filter &, ssize_t) const override
|
||||
{
|
||||
throw Exception("Method filter is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
ColumnPtr permute(const IColumn::Permutation &, size_t) const override
|
||||
{
|
||||
throw Exception("Method permute is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
ColumnPtr replicate(const IColumn::Offsets &) const override
|
||||
{
|
||||
throw Exception("Method replicate is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
void getPermutation(bool, size_t, int, IColumn::Permutation &) const override
|
||||
{
|
||||
throw Exception("Method getPermutation is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
std::vector<MutableColumnPtr> scatter(IColumn::ColumnIndex, const IColumn::Selector &) const override
|
||||
{
|
||||
throw Exception("Method scatter is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
};
|
||||
|
||||
using ColumnUniquePtr = IColumnUnique::ColumnUniquePtr;
|
||||
using MutableColumnUniquePtr = IColumnUnique::MutableColumnUniquePtr;
|
||||
|
||||
}
|
412
dbms/src/Columns/ReverseIndex.h
Normal file
412
dbms/src/Columns/ReverseIndex.h
Normal file
@ -0,0 +1,412 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/HashTable/Hash.h>
|
||||
#include <Common/HashTable/HashTable.h>
|
||||
#include <Common/HashTable/HashTableAllocator.h>
|
||||
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <ext/range.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
template <typename ColumnType, bool with_saved_hash, bool has_base_index>
|
||||
struct ReverseIndexHashTableState;
|
||||
|
||||
template <typename ColumnType>
|
||||
struct ReverseIndexHashTableState<ColumnType, /* with_saved_hash */ false, /* has_base_index */ false>
|
||||
{
|
||||
constexpr static bool with_saved_hash = false;
|
||||
constexpr static bool has_base_index = false;
|
||||
|
||||
ColumnType * index_column;
|
||||
};
|
||||
|
||||
template <typename ColumnType>
|
||||
struct ReverseIndexHashTableState<ColumnType, /* with_saved_hash */ false, /* has_base_index */ true>
|
||||
{
|
||||
constexpr static bool with_saved_hash = false;
|
||||
constexpr static bool has_base_index = true;
|
||||
|
||||
ColumnType * index_column;
|
||||
size_t base_index;
|
||||
};
|
||||
|
||||
template <typename ColumnType>
|
||||
struct ReverseIndexHashTableState<ColumnType, /* with_saved_hash = */ true, /* has_base_index */ false>
|
||||
{
|
||||
constexpr static bool with_saved_hash = true;
|
||||
constexpr static bool has_base_index = false;
|
||||
|
||||
ColumnType * index_column;
|
||||
typename ColumnVector<UInt64>::Container * saved_hash_column;
|
||||
};
|
||||
|
||||
template <typename ColumnType>
|
||||
struct ReverseIndexHashTableState<ColumnType, /* with_saved_hash = */ true, /* has_base_index */ true>
|
||||
{
|
||||
constexpr static bool with_saved_hash = true;
|
||||
constexpr static bool has_base_index = true;
|
||||
|
||||
ColumnType * index_column;
|
||||
typename ColumnVector<UInt64>::Container * saved_hash_column;
|
||||
size_t base_index;
|
||||
};
|
||||
|
||||
|
||||
template <typename Hash>
|
||||
struct ReverseIndexHash : public Hash
|
||||
{
|
||||
template <typename T>
|
||||
size_t operator()(T) const
|
||||
{
|
||||
throw Exception("operator()(key) is not implemented for ReverseIndexHash.", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
template <typename State, typename T>
|
||||
size_t operator()(const State & state, T key) const
|
||||
{
|
||||
auto index = key;
|
||||
if constexpr (State::has_base_index)
|
||||
index -= state.base_index;
|
||||
|
||||
return Hash::operator()(state.index_column->getElement(index));
|
||||
}
|
||||
};
|
||||
|
||||
using ReverseIndexStringHash = ReverseIndexHash<StringRefHash>;
|
||||
|
||||
template <typename IndexType>
|
||||
using ReverseIndexNumberHash = ReverseIndexHash<DefaultHash<IndexType>>;
|
||||
|
||||
|
||||
template <typename IndexType, typename Hash, typename HashTable, typename ColumnType, bool string_hash, bool has_base_index>
|
||||
struct ReverseIndexHashTableCell
|
||||
: public HashTableCell<IndexType, Hash, ReverseIndexHashTableState<ColumnType, string_hash, has_base_index>>
|
||||
{
|
||||
using Base = HashTableCell<IndexType, Hash, ReverseIndexHashTableState<ColumnType, string_hash, has_base_index>>;
|
||||
using State = typename Base::State;
|
||||
using Base::Base;
|
||||
using Base::key;
|
||||
using Base::keyEquals;
|
||||
using Base::isZero;
|
||||
|
||||
template <typename T>
|
||||
static bool isZero(const T &, const State & /*state*/)
|
||||
{
|
||||
static_assert(!std::is_same_v<typename std::decay<T>::type, typename std::decay<IndexType>::type>);
|
||||
return false;
|
||||
}
|
||||
/// Special case when we want to compare with something not in index_column.
|
||||
/// When we compare something inside column default keyEquals checks only that row numbers are equal.
|
||||
bool keyEquals(const StringRef & object, size_t hash_ [[maybe_unused]], const State & state) const
|
||||
{
|
||||
auto index = key;
|
||||
if constexpr (has_base_index)
|
||||
index -= state.base_index;
|
||||
|
||||
if constexpr (string_hash)
|
||||
return hash_ == (*state.saved_hash_column)[index] && object == state.index_column->getDataAt(index);
|
||||
else
|
||||
return object == state.index_column->getDataAt(index);
|
||||
}
|
||||
|
||||
size_t getHash(const Hash & hash) const
|
||||
{
|
||||
auto index = key;
|
||||
|
||||
/// Hack. HashTable is Hash itself.
|
||||
const auto & state = static_cast<const State &>(static_cast<const HashTable &>(hash));
|
||||
|
||||
if constexpr (has_base_index)
|
||||
index -= state.base_index;
|
||||
|
||||
if constexpr (string_hash)
|
||||
return (*state.saved_hash_column)[index];
|
||||
else
|
||||
return hash(state, key);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <typename Key, typename Cell, typename Hash>
|
||||
class HashTableWithPublicState : public HashTable<Key, Cell, Hash, HashTableGrower<>, HashTableAllocator>
|
||||
{
|
||||
using State = typename Cell::State;
|
||||
using Base = HashTable<Key, Cell, Hash, HashTableGrower<>, HashTableAllocator>;
|
||||
|
||||
public:
|
||||
using Base::Base;
|
||||
State & getState() { return *this; }
|
||||
};
|
||||
|
||||
template <typename IndexType, typename ColumnType, bool has_base_index>
|
||||
class ReverseIndexStringHashTable : public HashTableWithPublicState<
|
||||
IndexType,
|
||||
ReverseIndexHashTableCell<
|
||||
IndexType,
|
||||
ReverseIndexStringHash,
|
||||
ReverseIndexStringHashTable<IndexType, ColumnType, has_base_index>,
|
||||
ColumnType,
|
||||
true,
|
||||
has_base_index>,
|
||||
ReverseIndexStringHash>
|
||||
{
|
||||
using Base = HashTableWithPublicState<
|
||||
IndexType,
|
||||
ReverseIndexHashTableCell<
|
||||
IndexType,
|
||||
ReverseIndexStringHash,
|
||||
ReverseIndexStringHashTable<IndexType, ColumnType, has_base_index>,
|
||||
ColumnType,
|
||||
true,
|
||||
has_base_index>,
|
||||
ReverseIndexStringHash>;
|
||||
public:
|
||||
using Base::Base;
|
||||
friend struct ReverseIndexHashTableCell<
|
||||
IndexType,
|
||||
ReverseIndexStringHash,
|
||||
ReverseIndexStringHashTable<IndexType, ColumnType, has_base_index>,
|
||||
ColumnType,
|
||||
true,
|
||||
has_base_index>;
|
||||
};
|
||||
|
||||
template <typename IndexType, typename ColumnType, bool has_base_index>
|
||||
class ReverseIndexNumberHashTable : public HashTableWithPublicState<
|
||||
IndexType,
|
||||
ReverseIndexHashTableCell<
|
||||
IndexType,
|
||||
ReverseIndexNumberHash<typename ColumnType::value_type>,
|
||||
ReverseIndexNumberHashTable<IndexType, ColumnType, has_base_index>,
|
||||
ColumnType,
|
||||
false,
|
||||
has_base_index>,
|
||||
ReverseIndexNumberHash<typename ColumnType::value_type>>
|
||||
{
|
||||
using Base = HashTableWithPublicState<
|
||||
IndexType,
|
||||
ReverseIndexHashTableCell<
|
||||
IndexType,
|
||||
ReverseIndexNumberHash<typename ColumnType::value_type>,
|
||||
ReverseIndexNumberHashTable<IndexType, ColumnType, has_base_index>,
|
||||
ColumnType,
|
||||
false,
|
||||
has_base_index>,
|
||||
ReverseIndexNumberHash<typename ColumnType::value_type>>;
|
||||
public:
|
||||
using Base::Base;
|
||||
friend struct ReverseIndexHashTableCell<
|
||||
IndexType,
|
||||
ReverseIndexNumberHash<typename ColumnType::value_type>,
|
||||
ReverseIndexNumberHashTable<IndexType, ColumnType, has_base_index>,
|
||||
ColumnType,
|
||||
false,
|
||||
has_base_index>;
|
||||
};
|
||||
|
||||
|
||||
template <typename IndexType, typename ColumnType, bool has_base_index, bool is_numeric_column>
|
||||
struct SelectReverseIndexHashTable;
|
||||
|
||||
template <typename IndexType, typename ColumnType, bool has_base_index>
|
||||
struct SelectReverseIndexHashTable<IndexType, ColumnType, has_base_index, true>
|
||||
{
|
||||
using Type = ReverseIndexNumberHashTable<IndexType, ColumnType, has_base_index>;
|
||||
};
|
||||
|
||||
template <typename IndexType, typename ColumnType, bool has_base_index>
|
||||
struct SelectReverseIndexHashTable<IndexType, ColumnType, has_base_index, false>
|
||||
{
|
||||
using Type = ReverseIndexStringHashTable<IndexType, ColumnType, has_base_index>;
|
||||
};
|
||||
|
||||
|
||||
template <typename T>
|
||||
constexpr bool isNumericColumn(const T *) { return false; }
|
||||
|
||||
template <typename T>
|
||||
constexpr bool isNumericColumn(const ColumnVector<T> *) { return true; }
|
||||
|
||||
static_assert(isNumericColumn(static_cast<ColumnVector<UInt8> *>(nullptr)));
|
||||
static_assert(!isNumericColumn(static_cast<ColumnString *>(nullptr)));
|
||||
|
||||
|
||||
template <typename IndexType, typename ColumnType, bool has_base_index>
|
||||
using ReverseIndexHashTable = typename SelectReverseIndexHashTable<IndexType, ColumnType, has_base_index,
|
||||
isNumericColumn(static_cast<ColumnType *>(nullptr))>::Type;
|
||||
}
|
||||
|
||||
|
||||
template <typename IndexType, typename ColumnType>
|
||||
class ReverseIndex
|
||||
{
|
||||
public:
|
||||
explicit ReverseIndex(UInt64 num_prefix_rows_to_skip, UInt64 base_index)
|
||||
: num_prefix_rows_to_skip(num_prefix_rows_to_skip), base_index(base_index) {}
|
||||
|
||||
void setColumn(ColumnType * column_);
|
||||
|
||||
static constexpr bool is_numeric_column = isNumericColumn(static_cast<ColumnType *>(nullptr));
|
||||
static constexpr bool use_saved_hash = !is_numeric_column;
|
||||
|
||||
UInt64 insert(UInt64 from_position); /// Insert into index column[from_position];
|
||||
UInt64 insertFromLastRow();
|
||||
UInt64 getInsertionPoint(const StringRef & data);
|
||||
UInt64 lastInsertionPoint() const { return size() + base_index; }
|
||||
|
||||
ColumnType * getColumn() const { return column; }
|
||||
size_t size() const;
|
||||
|
||||
size_t allocatedBytes() const { return index ? index->getBufferSizeInBytes() : 0; }
|
||||
|
||||
private:
|
||||
ColumnType * column = nullptr;
|
||||
UInt64 num_prefix_rows_to_skip; /// The number prefix tows in column which won't be sored at index.
|
||||
UInt64 base_index; /// This values will be added to row number which is inserted into index.
|
||||
|
||||
using IndexMapType = ReverseIndexHashTable<IndexType, ColumnType, true>;
|
||||
|
||||
/// Lazy initialized.
|
||||
std::unique_ptr<IndexMapType> index;
|
||||
ColumnUInt64::MutablePtr saved_hash;
|
||||
|
||||
void buildIndex();
|
||||
|
||||
UInt64 getHash(const StringRef & ref) const
|
||||
{
|
||||
if constexpr (is_numeric_column)
|
||||
{
|
||||
using ValueType = typename ColumnType::value_type;
|
||||
ValueType value = *reinterpret_cast<const ValueType *>(ref.data);
|
||||
return DefaultHash<ValueType>()(value);
|
||||
}
|
||||
else
|
||||
return StringRefHash()(ref);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
template <typename IndexType, typename ColumnType>
|
||||
void ReverseIndex<IndexType, ColumnType>:: setColumn(ColumnType * column_)
|
||||
{
|
||||
if (column != column_)
|
||||
index = nullptr;
|
||||
|
||||
column = column_;
|
||||
}
|
||||
|
||||
template <typename IndexType, typename ColumnType>
|
||||
size_t ReverseIndex<IndexType, ColumnType>::size() const
|
||||
{
|
||||
if (!column)
|
||||
throw Exception("ReverseIndex has not size because index column wasn't set.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
return column->size();
|
||||
}
|
||||
|
||||
template <typename IndexType, typename ColumnType>
|
||||
void ReverseIndex<IndexType, ColumnType>::buildIndex()
|
||||
{
|
||||
if (index)
|
||||
return;
|
||||
|
||||
if (!column)
|
||||
throw Exception("ReverseIndex can't build index because index column wasn't set.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
auto size = column->size();
|
||||
index = std::make_unique<IndexMapType>(size);
|
||||
|
||||
if constexpr (use_saved_hash)
|
||||
saved_hash = ColumnUInt64::create(size);
|
||||
|
||||
auto & state = index->getState();
|
||||
state.index_column = column;
|
||||
state.base_index = base_index;
|
||||
if constexpr (use_saved_hash)
|
||||
state.saved_hash_column = &saved_hash->getData();
|
||||
|
||||
using IteratorType = typename IndexMapType::iterator;
|
||||
IteratorType iterator;
|
||||
bool inserted;
|
||||
|
||||
for (auto row : ext::range(num_prefix_rows_to_skip, size))
|
||||
{
|
||||
auto hash = getHash(column->getDataAt(row));
|
||||
|
||||
if constexpr (use_saved_hash)
|
||||
saved_hash->getElement(row) = hash;
|
||||
|
||||
index->emplace(row + base_index, iterator, inserted, hash);
|
||||
|
||||
if (!inserted)
|
||||
throw Exception("Duplicating keys found in ReverseIndex.", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename IndexType, typename ColumnType>
|
||||
UInt64 ReverseIndex<IndexType, ColumnType>::insert(UInt64 from_position)
|
||||
{
|
||||
if (!index)
|
||||
buildIndex();
|
||||
|
||||
using IteratorType = typename IndexMapType::iterator;
|
||||
IteratorType iterator;
|
||||
bool inserted;
|
||||
|
||||
auto hash = getHash(column->getDataAt(from_position));
|
||||
|
||||
if constexpr (use_saved_hash)
|
||||
{
|
||||
auto & data = saved_hash->getData();
|
||||
if (data.size() <= from_position)
|
||||
data.resize(from_position + 1);
|
||||
data[from_position] = hash;
|
||||
}
|
||||
|
||||
index->emplace(from_position + base_index, iterator, inserted, hash);
|
||||
|
||||
return *iterator;
|
||||
}
|
||||
|
||||
template <typename IndexType, typename ColumnType>
|
||||
UInt64 ReverseIndex<IndexType, ColumnType>::insertFromLastRow()
|
||||
{
|
||||
if (!column)
|
||||
throw Exception("ReverseIndex can't insert row from column because index column wasn't set.",
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
UInt64 num_rows = size();
|
||||
|
||||
if (num_rows == 0)
|
||||
throw Exception("ReverseIndex can't insert row from column because it is empty.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
UInt64 position = num_rows - 1;
|
||||
UInt64 inserted_pos = insert(position);
|
||||
if (position + base_index != inserted_pos)
|
||||
throw Exception("Can't insert into reverse index from last row (" + toString(position + base_index)
|
||||
+ ") because the same row is in position " + toString(inserted_pos), ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
return inserted_pos;
|
||||
}
|
||||
|
||||
template <typename IndexType, typename ColumnType>
|
||||
UInt64 ReverseIndex<IndexType, ColumnType>::getInsertionPoint(const StringRef & data)
|
||||
{
|
||||
if (!index)
|
||||
buildIndex();
|
||||
|
||||
using IteratorType = typename IndexMapType::iterator;
|
||||
IteratorType iterator;
|
||||
|
||||
auto hash = getHash(data);
|
||||
iterator = index->find(data, hash);
|
||||
|
||||
return iterator == index->end() ? size() + base_index : *iterator;
|
||||
}
|
||||
|
||||
}
|
@ -1,14 +1,15 @@
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
#include <Common/ActionLock.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// An atomic variable that is used to block and interrupt certain actions
|
||||
/// If it is not zero then actions related with it should be considered as interrupted
|
||||
/// An atomic variable that is used to block and interrupt certain actions.
|
||||
/// If it is not zero then actions related with it should be considered as interrupted.
|
||||
/// Uses shared_ptr and the lock uses weak_ptr to be able to "hold" a lock when an object with blocker has already died.
|
||||
class ActionBlocker
|
||||
{
|
||||
public:
|
||||
@ -33,4 +34,5 @@ private:
|
||||
CounterPtr counter;
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <atomic>
|
||||
#include <Core/Types.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -4,8 +4,11 @@
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/setThreadName.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <common/logger_useful.h>
|
||||
#include <chrono>
|
||||
#include <ext/scope_guard.h>
|
||||
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
@ -140,6 +143,12 @@ BackgroundSchedulePool::BackgroundSchedulePool(size_t size)
|
||||
{
|
||||
LOG_INFO(&Logger::get("BackgroundSchedulePool"), "Create BackgroundSchedulePool with " << size << " threads");
|
||||
|
||||
/// Put all threads of both thread pools to one thread group
|
||||
/// The master thread exits immediately
|
||||
CurrentThread::initializeQuery();
|
||||
thread_group = CurrentThread::getGroup();
|
||||
CurrentThread::detachQuery();
|
||||
|
||||
threads.resize(size);
|
||||
for (auto & thread : threads)
|
||||
thread = std::thread([this] { threadFunction(); });
|
||||
@ -212,9 +221,11 @@ void BackgroundSchedulePool::threadFunction()
|
||||
{
|
||||
setThreadName("BackgrSchedPool");
|
||||
|
||||
MemoryTracker memory_tracker;
|
||||
memory_tracker.setMetric(CurrentMetrics::MemoryTrackingInBackgroundSchedulePool);
|
||||
current_memory_tracker = &memory_tracker;
|
||||
/// Put all threads to one thread pool
|
||||
CurrentThread::attachTo(thread_group);
|
||||
SCOPE_EXIT({ CurrentThread::detachQueryIfNotDetached(); });
|
||||
|
||||
CurrentThread::getMemoryTracker().setMetric(CurrentMetrics::MemoryTrackingInBackgroundSchedulePool);
|
||||
|
||||
while (!shutdown)
|
||||
{
|
||||
@ -224,8 +235,6 @@ void BackgroundSchedulePool::threadFunction()
|
||||
task_notification.execute();
|
||||
}
|
||||
}
|
||||
|
||||
current_memory_tracker = nullptr;
|
||||
}
|
||||
|
||||
|
||||
@ -233,6 +242,10 @@ void BackgroundSchedulePool::delayExecutionThreadFunction()
|
||||
{
|
||||
setThreadName("BckSchPoolDelay");
|
||||
|
||||
/// Put all threads to one thread pool
|
||||
CurrentThread::attachTo(thread_group);
|
||||
SCOPE_EXIT({ CurrentThread::detachQueryIfNotDetached(); });
|
||||
|
||||
while (!shutdown)
|
||||
{
|
||||
TaskInfoPtr task;
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <functional>
|
||||
#include <boost/noncopyable.hpp>
|
||||
#include <Common/ZooKeeper/Types.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -138,6 +139,9 @@ private:
|
||||
std::thread delayed_thread;
|
||||
/// Tasks ordered by scheduled time.
|
||||
DelayedTasks delayed_tasks;
|
||||
|
||||
/// Thread group used for profiling purposes
|
||||
ThreadGroupStatusPtr thread_group;
|
||||
};
|
||||
|
||||
using BackgroundSchedulePoolPtr = std::shared_ptr<BackgroundSchedulePool>;
|
||||
|
80
dbms/src/Common/CurrentThread.cpp
Normal file
80
dbms/src/Common/CurrentThread.cpp
Normal file
@ -0,0 +1,80 @@
|
||||
#include "CurrentThread.h"
|
||||
#include <common/logger_useful.h>
|
||||
#include <Common/ThreadStatus.h>
|
||||
#include <Interpreters/ProcessList.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Poco/Ext/ThreadNumber.h>
|
||||
#include <Poco/Logger.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
void CurrentThread::updatePerformanceCounters()
|
||||
{
|
||||
get()->updatePerformanceCounters();
|
||||
}
|
||||
|
||||
ThreadStatusPtr CurrentThread::get()
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
if (!current_thread || current_thread.use_count() <= 0)
|
||||
throw Exception("Thread #" + std::to_string(Poco::ThreadNumber::get()) + " status was not initialized", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
if (Poco::ThreadNumber::get() != current_thread->thread_number)
|
||||
throw Exception("Current thread has different thread number", ErrorCodes::LOGICAL_ERROR);
|
||||
#endif
|
||||
|
||||
return current_thread;
|
||||
}
|
||||
|
||||
ProfileEvents::Counters & CurrentThread::getProfileEvents()
|
||||
{
|
||||
return current_thread->performance_counters;
|
||||
}
|
||||
|
||||
MemoryTracker & CurrentThread::getMemoryTracker()
|
||||
{
|
||||
return current_thread->memory_tracker;
|
||||
}
|
||||
|
||||
void CurrentThread::updateProgressIn(const Progress & value)
|
||||
{
|
||||
current_thread->progress_in.incrementPiecewiseAtomically(value);
|
||||
}
|
||||
|
||||
void CurrentThread::updateProgressOut(const Progress & value)
|
||||
{
|
||||
current_thread->progress_out.incrementPiecewiseAtomically(value);
|
||||
}
|
||||
|
||||
void CurrentThread::attachInternalTextLogsQueue(const std::shared_ptr<InternalTextLogsQueue> & logs_queue)
|
||||
{
|
||||
get()->attachInternalTextLogsQueue(logs_queue);
|
||||
}
|
||||
|
||||
std::shared_ptr<InternalTextLogsQueue> CurrentThread::getInternalTextLogsQueue()
|
||||
{
|
||||
/// NOTE: this method could be called at early server startup stage
|
||||
/// NOTE: this method could be called in ThreadStatus destructor, therefore we make use_count() check just in case
|
||||
|
||||
if (!current_thread || current_thread.use_count() <= 0)
|
||||
return nullptr;
|
||||
|
||||
if (current_thread->getCurrentState() == ThreadStatus::ThreadState::Died)
|
||||
return nullptr;
|
||||
|
||||
return current_thread->getInternalTextLogsQueue();
|
||||
}
|
||||
|
||||
ThreadGroupStatusPtr CurrentThread::getGroup()
|
||||
{
|
||||
return get()->getThreadGroup();
|
||||
}
|
||||
|
||||
}
|
83
dbms/src/Common/CurrentThread.h
Normal file
83
dbms/src/Common/CurrentThread.h
Normal file
@ -0,0 +1,83 @@
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
class Counters;
|
||||
}
|
||||
|
||||
class MemoryTracker;
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Context;
|
||||
class QueryStatus;
|
||||
class ThreadStatus;
|
||||
struct Progress;
|
||||
using ThreadStatusPtr = std::shared_ptr<ThreadStatus>;
|
||||
class InternalTextLogsQueue;
|
||||
class ThreadGroupStatus;
|
||||
using ThreadGroupStatusPtr = std::shared_ptr<ThreadGroupStatus>;
|
||||
|
||||
|
||||
class CurrentThread
|
||||
{
|
||||
public:
|
||||
|
||||
/// Handler to current thread
|
||||
static ThreadStatusPtr get();
|
||||
/// Group to which belongs current thread
|
||||
static ThreadGroupStatusPtr getGroup();
|
||||
|
||||
/// A logs queue used by TCPHandler to pass logs to a client
|
||||
static void attachInternalTextLogsQueue(const std::shared_ptr<InternalTextLogsQueue> & logs_queue);
|
||||
static std::shared_ptr<InternalTextLogsQueue> getInternalTextLogsQueue();
|
||||
|
||||
/// Makes system calls to update ProfileEvents that contain info from rusage and taskstats
|
||||
static void updatePerformanceCounters();
|
||||
|
||||
static ProfileEvents::Counters & getProfileEvents();
|
||||
static MemoryTracker & getMemoryTracker();
|
||||
|
||||
/// Update read and write rows (bytes) statistics (used in system.query_thread_log)
|
||||
static void updateProgressIn(const Progress & value);
|
||||
static void updateProgressOut(const Progress & value);
|
||||
|
||||
/// Query management:
|
||||
|
||||
/// Call from master thread as soon as possible (e.g. when thread accepted connection)
|
||||
static void initializeQuery();
|
||||
|
||||
/// Sets query_context for current thread group
|
||||
static void attachQueryContext(Context & query_context);
|
||||
|
||||
/// You must call one of these methods when create a query child thread:
|
||||
/// Add current thread to a group associated with the thread group
|
||||
static void attachTo(const ThreadGroupStatusPtr & thread_group);
|
||||
/// Is useful for a ThreadPool tasks
|
||||
static void attachToIfDetached(const ThreadGroupStatusPtr & thread_group);
|
||||
|
||||
/// Update ProfileEvents and dumps info to system.query_thread_log
|
||||
static void finalizePerformanceCounters();
|
||||
|
||||
/// Returns a non-empty string if the thread is attached to a query
|
||||
static std::string getCurrentQueryID();
|
||||
|
||||
/// Non-master threads call this method in destructor automatically
|
||||
static void detachQuery();
|
||||
static void detachQueryIfNotDetached();
|
||||
|
||||
/// Initializes query with current thread as master thread in constructor, and detaches it in desstructor
|
||||
struct QueryScope
|
||||
{
|
||||
explicit QueryScope(Context & query_context);
|
||||
~QueryScope();
|
||||
};
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <Poco/NumberParser.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <atomic>
|
||||
#include <optional>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -79,6 +80,10 @@ struct DNSResolver::Impl
|
||||
{
|
||||
SimpleCache<decltype(resolveIPAddressImpl), &resolveIPAddressImpl> cache_host;
|
||||
|
||||
/// Cached server host name
|
||||
std::mutex mutex;
|
||||
std::optional<String> host_name;
|
||||
|
||||
/// If disabled, will not make cache lookups, will resolve addresses manually on each call
|
||||
std::atomic<bool> disable_cache{false};
|
||||
};
|
||||
@ -108,6 +113,9 @@ Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host, U
|
||||
void DNSResolver::dropCache()
|
||||
{
|
||||
impl->cache_host.drop();
|
||||
|
||||
std::unique_lock lock(impl->mutex);
|
||||
impl->host_name.reset();
|
||||
}
|
||||
|
||||
void DNSResolver::setDisableCacheFlag(bool is_disabled)
|
||||
@ -115,6 +123,19 @@ void DNSResolver::setDisableCacheFlag(bool is_disabled)
|
||||
impl->disable_cache = is_disabled;
|
||||
}
|
||||
|
||||
String DNSResolver::getHostName()
|
||||
{
|
||||
if (impl->disable_cache)
|
||||
return Poco::Net::DNS::hostName();
|
||||
|
||||
std::unique_lock lock(impl->mutex);
|
||||
|
||||
if (!impl->host_name.has_value())
|
||||
impl->host_name.emplace(Poco::Net::DNS::hostName());
|
||||
|
||||
return *impl->host_name;
|
||||
}
|
||||
|
||||
DNSResolver::~DNSResolver() = default;
|
||||
|
||||
|
||||
|
@ -25,6 +25,9 @@ public:
|
||||
|
||||
Poco::Net::SocketAddress resolveAddress(const std::string & host, UInt16 port);
|
||||
|
||||
/// Get this server host name
|
||||
String getHostName();
|
||||
|
||||
/// Disables caching
|
||||
void setDisableCacheFlag(bool is_disabled = true);
|
||||
|
||||
|
@ -380,6 +380,13 @@ namespace ErrorCodes
|
||||
extern const int INVALID_JOIN_ON_EXPRESSION = 403;
|
||||
extern const int BAD_ODBC_CONNECTION_STRING = 404;
|
||||
extern const int PARTITION_SIZE_EXCEEDS_MAX_DROP_SIZE_LIMIT = 405;
|
||||
extern const int TOP_AND_LIMIT_TOGETHER = 406;
|
||||
extern const int DECIMAL_OVERFLOW = 407;
|
||||
extern const int BAD_REQUEST_PARAMETER = 408;
|
||||
extern const int EXTERNAL_EXECUTABLE_NOT_FOUND = 409;
|
||||
extern const int EXTERNAL_SERVER_IS_NOT_RESPONDING = 410;
|
||||
extern const int PTHREAD_ERROR = 411;
|
||||
extern const int NETLINK_ERROR = 412;
|
||||
|
||||
extern const int KEEPER_EXCEPTION = 999;
|
||||
extern const int POCO_EXCEPTION = 1000;
|
||||
|
@ -1,225 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <boost/program_options.hpp>
|
||||
#include <boost/algorithm/string.hpp>
|
||||
#include <DataStreams/AsynchronousBlockInputStream.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <IO/copyData.h>
|
||||
#include <IO/ReadBufferFromIStream.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <Storages/StorageMemory.h>
|
||||
#include <Client/Connection.h>
|
||||
#include <Poco/Net/HTMLForm.h>
|
||||
#include <Poco/Net/PartHandler.h>
|
||||
#include <Poco/Net/MessageHeader.h>
|
||||
#include <Common/HTMLForm.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
|
||||
/// The base class containing the basic information about external table and
|
||||
/// basic functions for extracting this information from text fields.
|
||||
class BaseExternalTable
|
||||
{
|
||||
public:
|
||||
std::string file; /// File with data or '-' if stdin
|
||||
std::string name; /// The name of the table
|
||||
std::string format; /// Name of the data storage format
|
||||
|
||||
/// Description of the table structure: (column name, data type name)
|
||||
std::vector<std::pair<std::string, std::string>> structure;
|
||||
|
||||
std::unique_ptr<ReadBuffer> read_buffer;
|
||||
Block sample_block;
|
||||
|
||||
virtual ~BaseExternalTable() {}
|
||||
|
||||
/// Initialize read_buffer, depending on the data source. By default, does nothing.
|
||||
virtual void initReadBuffer() {}
|
||||
|
||||
/// Get the table data - a pair (a thread with the contents of the table, the name of the table)
|
||||
ExternalTableData getData(const Context & context)
|
||||
{
|
||||
initReadBuffer();
|
||||
initSampleBlock();
|
||||
ExternalTableData res = std::make_pair(std::make_shared<AsynchronousBlockInputStream>(context.getInputFormat(
|
||||
format, *read_buffer, sample_block, DEFAULT_BLOCK_SIZE)), name);
|
||||
return res;
|
||||
}
|
||||
|
||||
protected:
|
||||
/// Clear all accumulated information
|
||||
void clean()
|
||||
{
|
||||
name = "";
|
||||
file = "";
|
||||
format = "";
|
||||
structure.clear();
|
||||
sample_block = Block();
|
||||
read_buffer.reset();
|
||||
}
|
||||
|
||||
/// Function for debugging information output
|
||||
void write()
|
||||
{
|
||||
std::cerr << "file " << file << std::endl;
|
||||
std::cerr << "name " << name << std::endl;
|
||||
std::cerr << "format " << format << std::endl;
|
||||
std::cerr << "structure: \n";
|
||||
for (size_t i = 0; i < structure.size(); ++i)
|
||||
std::cerr << "\t" << structure[i].first << " " << structure[i].second << std::endl;
|
||||
}
|
||||
|
||||
static std::vector<std::string> split(const std::string & s, const std::string & d)
|
||||
{
|
||||
std::vector<std::string> res;
|
||||
boost::split(res, s, boost::algorithm::is_any_of(d), boost::algorithm::token_compress_on);
|
||||
return res;
|
||||
}
|
||||
|
||||
/// Construct the `structure` vector from the text field `structure`
|
||||
virtual void parseStructureFromStructureField(const std::string & argument)
|
||||
{
|
||||
std::vector<std::string> vals = split(argument, " ,");
|
||||
|
||||
if (vals.size() & 1)
|
||||
throw Exception("Odd number of attributes in section structure", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
for (size_t i = 0; i < vals.size(); i += 2)
|
||||
structure.emplace_back(vals[i], vals[i + 1]);
|
||||
}
|
||||
|
||||
/// Construct the `structure` vector from the text field `types`
|
||||
virtual void parseStructureFromTypesField(const std::string & argument)
|
||||
{
|
||||
std::vector<std::string> vals = split(argument, " ,");
|
||||
|
||||
for (size_t i = 0; i < vals.size(); ++i)
|
||||
structure.emplace_back("_" + toString(i + 1), vals[i]);
|
||||
}
|
||||
|
||||
private:
|
||||
/// Initialize sample_block according to the structure of the table stored in the `structure`
|
||||
void initSampleBlock()
|
||||
{
|
||||
const DataTypeFactory & data_type_factory = DataTypeFactory::instance();
|
||||
|
||||
for (size_t i = 0; i < structure.size(); ++i)
|
||||
{
|
||||
ColumnWithTypeAndName column;
|
||||
column.name = structure[i].first;
|
||||
column.type = data_type_factory.get(structure[i].second);
|
||||
column.column = column.type->createColumn();
|
||||
sample_block.insert(std::move(column));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/// Parsing of external table used in the tcp client.
|
||||
class ExternalTable : public BaseExternalTable
|
||||
{
|
||||
public:
|
||||
void initReadBuffer() override
|
||||
{
|
||||
if (file == "-")
|
||||
read_buffer = std::make_unique<ReadBufferFromFileDescriptor>(STDIN_FILENO);
|
||||
else
|
||||
read_buffer = std::make_unique<ReadBufferFromFile>(file);
|
||||
}
|
||||
|
||||
/// Extract parameters from variables_map, which is built on the client command line
|
||||
ExternalTable(const boost::program_options::variables_map & external_options)
|
||||
{
|
||||
if (external_options.count("file"))
|
||||
file = external_options["file"].as<std::string>();
|
||||
else
|
||||
throw Exception("--file field have not been provided for external table", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
if (external_options.count("name"))
|
||||
name = external_options["name"].as<std::string>();
|
||||
else
|
||||
throw Exception("--name field have not been provided for external table", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
if (external_options.count("format"))
|
||||
format = external_options["format"].as<std::string>();
|
||||
else
|
||||
throw Exception("--format field have not been provided for external table", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
if (external_options.count("structure"))
|
||||
parseStructureFromStructureField(external_options["structure"].as<std::string>());
|
||||
else if (external_options.count("types"))
|
||||
parseStructureFromTypesField(external_options["types"].as<std::string>());
|
||||
else
|
||||
throw Exception("Neither --structure nor --types have not been provided for external table", ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
};
|
||||
|
||||
/// Parsing of external table used when sending tables via http
|
||||
/// The `handlePart` function will be called for each table passed,
|
||||
/// so it's also necessary to call `clean` at the end of the `handlePart`.
|
||||
class ExternalTablesHandler : public Poco::Net::PartHandler, BaseExternalTable
|
||||
{
|
||||
public:
|
||||
std::vector<std::string> names;
|
||||
|
||||
ExternalTablesHandler(Context & context_, Poco::Net::NameValueCollection params_) : context(context_), params(params_) { }
|
||||
|
||||
void handlePart(const Poco::Net::MessageHeader & header, std::istream & stream)
|
||||
{
|
||||
/// The buffer is initialized here, not in the virtual function initReadBuffer
|
||||
read_buffer = std::make_unique<ReadBufferFromIStream>(stream);
|
||||
|
||||
/// Retrieve a collection of parameters from MessageHeader
|
||||
Poco::Net::NameValueCollection content;
|
||||
std::string label;
|
||||
Poco::Net::MessageHeader::splitParameters(header.get("Content-Disposition"), label, content);
|
||||
|
||||
/// Get parameters
|
||||
name = content.get("name", "_data");
|
||||
format = params.get(name + "_format", "TabSeparated");
|
||||
|
||||
if (params.has(name + "_structure"))
|
||||
parseStructureFromStructureField(params.get(name + "_structure"));
|
||||
else if (params.has(name + "_types"))
|
||||
parseStructureFromTypesField(params.get(name + "_types"));
|
||||
else
|
||||
throw Exception("Neither structure nor types have not been provided for external table " + name + ". Use fields " + name + "_structure or " + name + "_types to do so.", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
ExternalTableData data = getData(context);
|
||||
|
||||
/// Create table
|
||||
NamesAndTypesList columns = sample_block.getNamesAndTypesList();
|
||||
StoragePtr storage = StorageMemory::create(data.second, ColumnsDescription{columns});
|
||||
storage->startup();
|
||||
context.addExternalTable(data.second, storage);
|
||||
BlockOutputStreamPtr output = storage->write(ASTPtr(), context.getSettingsRef());
|
||||
|
||||
/// Write data
|
||||
data.first->readPrefix();
|
||||
output->writePrefix();
|
||||
while(Block block = data.first->read())
|
||||
output->write(block);
|
||||
data.first->readSuffix();
|
||||
output->writeSuffix();
|
||||
|
||||
names.push_back(name);
|
||||
/// We are ready to receive the next file, for this we clear all the information received
|
||||
clean();
|
||||
}
|
||||
|
||||
private:
|
||||
Context & context;
|
||||
Poco::Net::NameValueCollection params;
|
||||
};
|
||||
|
||||
|
||||
}
|
@ -53,6 +53,7 @@ struct HashMapCell
|
||||
|
||||
bool keyEquals(const Key & key_) const { return value.first == key_; }
|
||||
bool keyEquals(const Key & key_, size_t /*hash_*/) const { return value.first == key_; }
|
||||
bool keyEquals(const Key & key_, size_t /*hash_*/, const State & /*state*/) const { return value.first == key_; }
|
||||
|
||||
void setHash(size_t /*hash_value*/) {}
|
||||
size_t getHash(const Hash & hash) const { return hash(value.first); }
|
||||
@ -112,6 +113,7 @@ struct HashMapCellWithSavedHash : public HashMapCell<Key, TMapped, Hash, TState>
|
||||
|
||||
bool keyEquals(const Key & key_) const { return this->value.first == key_; }
|
||||
bool keyEquals(const Key & key_, size_t hash_) const { return saved_hash == hash_ && this->value.first == key_; }
|
||||
bool keyEquals(const Key & key_, size_t hash_, const typename Base::State &) const { return keyEquals(key_, hash_); }
|
||||
|
||||
void setHash(size_t hash_value) { saved_hash = hash_value; }
|
||||
size_t getHash(const Hash & /*hash_function*/) const { return saved_hash; }
|
||||
|
@ -75,6 +75,7 @@ struct HashSetCellWithSavedHash : public HashTableCell<Key, Hash, TState>
|
||||
|
||||
bool keyEquals(const Key & key_) const { return this->key == key_; }
|
||||
bool keyEquals(const Key & key_, size_t hash_) const { return saved_hash == hash_ && this->key == key_; }
|
||||
bool keyEquals(const Key & key_, size_t hash_, const typename Base::State &) const { return keyEquals(key_, hash_); }
|
||||
|
||||
void setHash(size_t hash_value) { saved_hash = hash_value; }
|
||||
size_t getHash(const Hash & /*hash_function*/) const { return saved_hash; }
|
||||
|
@ -108,6 +108,7 @@ struct HashTableCell
|
||||
/// Are the keys at the cells equal?
|
||||
bool keyEquals(const Key & key_) const { return key == key_; }
|
||||
bool keyEquals(const Key & key_, size_t /*hash_*/) const { return key == key_; }
|
||||
bool keyEquals(const Key & key_, size_t /*hash_*/, const State & /*state*/) const { return key == key_; }
|
||||
|
||||
/// If the cell can remember the value of the hash function, then remember it.
|
||||
void setHash(size_t /*hash_value*/) {}
|
||||
@ -280,9 +281,10 @@ protected:
|
||||
#endif
|
||||
|
||||
/// Find a cell with the same key or an empty cell, starting from the specified position and further along the collision resolution chain.
|
||||
size_t ALWAYS_INLINE findCell(const Key & x, size_t hash_value, size_t place_value) const
|
||||
template <typename ObjectToCompareWith>
|
||||
size_t ALWAYS_INLINE findCell(const ObjectToCompareWith & x, size_t hash_value, size_t place_value) const
|
||||
{
|
||||
while (!buf[place_value].isZero(*this) && !buf[place_value].keyEquals(x, hash_value))
|
||||
while (!buf[place_value].isZero(*this) && !buf[place_value].keyEquals(x, hash_value, *this))
|
||||
{
|
||||
place_value = grower.next(place_value);
|
||||
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
|
||||
@ -734,7 +736,8 @@ public:
|
||||
}
|
||||
|
||||
|
||||
iterator ALWAYS_INLINE find(Key x)
|
||||
template <typename ObjectToCompareWith>
|
||||
iterator ALWAYS_INLINE find(ObjectToCompareWith x)
|
||||
{
|
||||
if (Cell::isZero(x, *this))
|
||||
return this->hasZero() ? iteratorToZero() : end();
|
||||
@ -745,7 +748,8 @@ public:
|
||||
}
|
||||
|
||||
|
||||
const_iterator ALWAYS_INLINE find(Key x) const
|
||||
template <typename ObjectToCompareWith>
|
||||
const_iterator ALWAYS_INLINE find(ObjectToCompareWith x) const
|
||||
{
|
||||
if (Cell::isZero(x, *this))
|
||||
return this->hasZero() ? iteratorToZero() : end();
|
||||
@ -756,7 +760,8 @@ public:
|
||||
}
|
||||
|
||||
|
||||
iterator ALWAYS_INLINE find(Key x, size_t hash_value)
|
||||
template <typename ObjectToCompareWith>
|
||||
iterator ALWAYS_INLINE find(ObjectToCompareWith x, size_t hash_value)
|
||||
{
|
||||
if (Cell::isZero(x, *this))
|
||||
return this->hasZero() ? iteratorToZero() : end();
|
||||
@ -766,7 +771,8 @@ public:
|
||||
}
|
||||
|
||||
|
||||
const_iterator ALWAYS_INLINE find(Key x, size_t hash_value) const
|
||||
template <typename ObjectToCompareWith>
|
||||
const_iterator ALWAYS_INLINE find(ObjectToCompareWith x, size_t hash_value) const
|
||||
{
|
||||
if (Cell::isZero(x, *this))
|
||||
return this->hasZero() ? iteratorToZero() : end();
|
||||
|
@ -1,11 +1,10 @@
|
||||
#include "MemoryTracker.h"
|
||||
#include <common/likely.h>
|
||||
#include <common/logger_useful.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/formatReadable.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <iomanip>
|
||||
|
||||
#include <Common/MemoryTracker.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -19,7 +18,7 @@ namespace DB
|
||||
|
||||
MemoryTracker::~MemoryTracker()
|
||||
{
|
||||
if (peak)
|
||||
if (static_cast<int>(level) < static_cast<int>(VariableContext::Process) && peak)
|
||||
{
|
||||
try
|
||||
{
|
||||
@ -56,13 +55,16 @@ void MemoryTracker::logPeakMemoryUsage() const
|
||||
|
||||
void MemoryTracker::alloc(Int64 size)
|
||||
{
|
||||
if (blocker.isCancelled())
|
||||
return;
|
||||
|
||||
/** Using memory_order_relaxed means that if allocations are done simultaneously,
|
||||
* we allow exception about memory limit exceeded to be thrown only on next allocation.
|
||||
* So, we allow over-allocations.
|
||||
*/
|
||||
Int64 will_be = size + amount.fetch_add(size, std::memory_order_relaxed);
|
||||
|
||||
if (!next.load(std::memory_order_relaxed))
|
||||
if (!parent.load(std::memory_order_relaxed))
|
||||
CurrentMetrics::add(metric, size);
|
||||
|
||||
Int64 current_limit = limit.load(std::memory_order_relaxed);
|
||||
@ -102,45 +104,62 @@ void MemoryTracker::alloc(Int64 size)
|
||||
if (will_be > peak.load(std::memory_order_relaxed)) /// Races doesn't matter. Could rewrite with CAS, but not worth.
|
||||
peak.store(will_be, std::memory_order_relaxed);
|
||||
|
||||
if (auto loaded_next = next.load(std::memory_order_relaxed))
|
||||
if (auto loaded_next = parent.load(std::memory_order_relaxed))
|
||||
loaded_next->alloc(size);
|
||||
}
|
||||
|
||||
|
||||
void MemoryTracker::free(Int64 size)
|
||||
{
|
||||
Int64 new_amount = amount.fetch_sub(size, std::memory_order_relaxed) - size;
|
||||
if (blocker.isCancelled())
|
||||
return;
|
||||
|
||||
/** Sometimes, query could free some data, that was allocated outside of query context.
|
||||
* Example: cache eviction.
|
||||
* To avoid negative memory usage, we "saturate" amount.
|
||||
* Memory usage will be calculated with some error.
|
||||
* NOTE The code is not atomic. Not worth to fix.
|
||||
*/
|
||||
if (new_amount < 0)
|
||||
if (level == VariableContext::Thread)
|
||||
{
|
||||
amount.fetch_sub(new_amount);
|
||||
size += new_amount;
|
||||
/// Could become negative if memory allocated in this thread is freed in another one
|
||||
amount.fetch_sub(size, std::memory_order_relaxed);
|
||||
}
|
||||
else
|
||||
{
|
||||
Int64 new_amount = amount.fetch_sub(size, std::memory_order_relaxed) - size;
|
||||
|
||||
/** Sometimes, query could free some data, that was allocated outside of query context.
|
||||
* Example: cache eviction.
|
||||
* To avoid negative memory usage, we "saturate" amount.
|
||||
* Memory usage will be calculated with some error.
|
||||
* NOTE: The code is not atomic. Not worth to fix.
|
||||
*/
|
||||
if (unlikely(new_amount < 0))
|
||||
{
|
||||
amount.fetch_sub(new_amount);
|
||||
size += new_amount;
|
||||
}
|
||||
}
|
||||
|
||||
if (auto loaded_next = next.load(std::memory_order_relaxed))
|
||||
if (auto loaded_next = parent.load(std::memory_order_relaxed))
|
||||
loaded_next->free(size);
|
||||
else
|
||||
CurrentMetrics::sub(metric, size);
|
||||
}
|
||||
|
||||
|
||||
void MemoryTracker::reset()
|
||||
void MemoryTracker::resetCounters()
|
||||
{
|
||||
if (!next.load(std::memory_order_relaxed))
|
||||
CurrentMetrics::sub(metric, amount.load(std::memory_order_relaxed));
|
||||
|
||||
amount.store(0, std::memory_order_relaxed);
|
||||
peak.store(0, std::memory_order_relaxed);
|
||||
limit.store(0, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
|
||||
void MemoryTracker::reset()
|
||||
{
|
||||
if (!parent.load(std::memory_order_relaxed))
|
||||
CurrentMetrics::sub(metric, amount.load(std::memory_order_relaxed));
|
||||
|
||||
resetCounters();
|
||||
}
|
||||
|
||||
|
||||
void MemoryTracker::setOrRaiseLimit(Int64 value)
|
||||
{
|
||||
/// This is just atomic set to maximum.
|
||||
@ -149,29 +168,26 @@ void MemoryTracker::setOrRaiseLimit(Int64 value)
|
||||
;
|
||||
}
|
||||
|
||||
#if __APPLE__ && __clang__
|
||||
__thread MemoryTracker * current_memory_tracker = nullptr;
|
||||
#else
|
||||
thread_local MemoryTracker * current_memory_tracker = nullptr;
|
||||
#endif
|
||||
|
||||
namespace CurrentMemoryTracker
|
||||
{
|
||||
void alloc(Int64 size)
|
||||
{
|
||||
if (current_memory_tracker)
|
||||
current_memory_tracker->alloc(size);
|
||||
DB::CurrentThread::getMemoryTracker().alloc(size);
|
||||
}
|
||||
|
||||
void realloc(Int64 old_size, Int64 new_size)
|
||||
{
|
||||
if (current_memory_tracker)
|
||||
current_memory_tracker->alloc(new_size - old_size);
|
||||
DB::CurrentThread::getMemoryTracker().alloc(new_size - old_size);
|
||||
}
|
||||
|
||||
void free(Int64 size)
|
||||
{
|
||||
if (current_memory_tracker)
|
||||
current_memory_tracker->free(size);
|
||||
DB::CurrentThread::getMemoryTracker().free(size);
|
||||
}
|
||||
}
|
||||
|
||||
DB::SimpleActionLock getCurrentMemoryTrackerActionLock()
|
||||
{
|
||||
return DB::CurrentThread::getMemoryTracker().blocker.cancel();
|
||||
}
|
||||
|
@ -3,6 +3,8 @@
|
||||
#include <atomic>
|
||||
#include <common/Types.h>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
#include <Common/SimpleActionBlocker.h>
|
||||
#include <Common/VariableContext.h>
|
||||
|
||||
|
||||
namespace CurrentMetrics
|
||||
@ -26,7 +28,7 @@ class MemoryTracker
|
||||
|
||||
/// Singly-linked list. All information will be passed to subsequent memory trackers also (it allows to implement trackers hierarchy).
|
||||
/// In terms of tree nodes it is the list of parents. Lifetime of these trackers should "include" lifetime of current tracker.
|
||||
std::atomic<MemoryTracker *> next {};
|
||||
std::atomic<MemoryTracker *> parent {};
|
||||
|
||||
/// You could specify custom metric to track memory usage.
|
||||
CurrentMetrics::Metric metric = CurrentMetrics::MemoryTracking;
|
||||
@ -35,11 +37,14 @@ class MemoryTracker
|
||||
const char * description = nullptr;
|
||||
|
||||
public:
|
||||
MemoryTracker() {}
|
||||
MemoryTracker(Int64 limit_) : limit(limit_) {}
|
||||
MemoryTracker(VariableContext level = VariableContext::Thread) : level(level) {}
|
||||
MemoryTracker(Int64 limit_, VariableContext level = VariableContext::Thread) : limit(limit_), level(level) {}
|
||||
MemoryTracker(MemoryTracker * parent_, VariableContext level = VariableContext::Thread) : parent(parent_), level(level) {}
|
||||
|
||||
~MemoryTracker();
|
||||
|
||||
VariableContext level;
|
||||
|
||||
/** Call the following functions before calling of corresponding operations with memory allocators.
|
||||
*/
|
||||
void alloc(Int64 size);
|
||||
@ -79,9 +84,15 @@ public:
|
||||
}
|
||||
|
||||
/// next should be changed only once: from nullptr to some value.
|
||||
void setNext(MemoryTracker * elem)
|
||||
/// NOTE: It is not true in MergeListElement
|
||||
void setParent(MemoryTracker * elem)
|
||||
{
|
||||
next.store(elem, std::memory_order_relaxed);
|
||||
parent.store(elem, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
MemoryTracker * getParent()
|
||||
{
|
||||
return parent.load(std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
/// The memory consumption could be shown in realtime via CurrentMetrics counter
|
||||
@ -95,26 +106,21 @@ public:
|
||||
description = description_;
|
||||
}
|
||||
|
||||
/// Reset the accumulated data.
|
||||
/// Reset the accumulated data
|
||||
void resetCounters();
|
||||
|
||||
/// Reset the accumulated data and the parent.
|
||||
void reset();
|
||||
|
||||
/// Prints info about peak memory consumption into log.
|
||||
void logPeakMemoryUsage() const;
|
||||
|
||||
/// To be able to temporarily stop memory tracker
|
||||
DB::SimpleActionBlocker blocker;
|
||||
};
|
||||
|
||||
|
||||
/** The MemoryTracker object is quite difficult to pass to all places where significant amounts of memory are allocated.
|
||||
* Therefore, a thread-local pointer to used MemoryTracker is set, or nullptr if MemoryTracker does not need to be used.
|
||||
* This pointer is set when memory consumption is monitored in current thread.
|
||||
* So, you just need to pass it to all the threads that handle one request.
|
||||
*/
|
||||
#if defined(__APPLE__) && defined(__clang__)
|
||||
extern __thread MemoryTracker * current_memory_tracker;
|
||||
#else
|
||||
extern thread_local MemoryTracker * current_memory_tracker;
|
||||
#endif
|
||||
|
||||
/// Convenience methods, that use current_memory_tracker if it is available.
|
||||
/// Convenience methods, that use current thread's memory_tracker if it is available.
|
||||
namespace CurrentMemoryTracker
|
||||
{
|
||||
void alloc(Int64 size);
|
||||
@ -123,20 +129,4 @@ namespace CurrentMemoryTracker
|
||||
}
|
||||
|
||||
|
||||
#include <boost/noncopyable.hpp>
|
||||
|
||||
struct TemporarilyDisableMemoryTracker : private boost::noncopyable
|
||||
{
|
||||
MemoryTracker * memory_tracker;
|
||||
|
||||
TemporarilyDisableMemoryTracker()
|
||||
{
|
||||
memory_tracker = current_memory_tracker;
|
||||
current_memory_tracker = nullptr;
|
||||
}
|
||||
|
||||
~TemporarilyDisableMemoryTracker()
|
||||
{
|
||||
current_memory_tracker = memory_tracker;
|
||||
}
|
||||
};
|
||||
DB::SimpleActionLock getCurrentMemoryTrackerActionLock();
|
||||
|
@ -47,3 +47,11 @@ std::enable_if_t<std::is_class_v<T>, T> NaNOrZero()
|
||||
{
|
||||
return T{};
|
||||
}
|
||||
|
||||
#if 1 /// __int128
|
||||
template <typename T>
|
||||
std::enable_if_t<std::is_same_v<T, __int128> && !std::numeric_limits<T>::is_integer, __int128> NaNOrZero()
|
||||
{
|
||||
return __int128(0);
|
||||
}
|
||||
#endif
|
||||
|
131
dbms/src/Common/ODBCBridgeHelper.cpp
Normal file
131
dbms/src/Common/ODBCBridgeHelper.cpp
Normal file
@ -0,0 +1,131 @@
|
||||
#include <Common/ODBCBridgeHelper.h>
|
||||
|
||||
#include <sstream>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/ReadWriteBufferFromHTTP.h>
|
||||
#include <Poco/Net/HTTPRequest.h>
|
||||
#include <Poco/Path.h>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
#include <Common/ShellCommand.h>
|
||||
#include <common/logger_useful.h>
|
||||
#include <ext/range.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int EXTERNAL_SERVER_IS_NOT_RESPONDING;
|
||||
}
|
||||
ODBCBridgeHelper::ODBCBridgeHelper(
|
||||
const Configuration & config_, const Poco::Timespan & http_timeout_, const std::string & connection_string_)
|
||||
: config(config_), http_timeout(http_timeout_), connection_string(connection_string_)
|
||||
{
|
||||
size_t bridge_port = config.getUInt("odbc_bridge.port", DEFAULT_PORT);
|
||||
std::string bridge_host = config.getString("odbc_bridge.host", DEFAULT_HOST);
|
||||
|
||||
ping_url.setHost(bridge_host);
|
||||
ping_url.setPort(bridge_port);
|
||||
ping_url.setScheme("http");
|
||||
ping_url.setPath(PING_HANDLER);
|
||||
}
|
||||
void ODBCBridgeHelper::startODBCBridge() const
|
||||
{
|
||||
Poco::Path path{config.getString("application.dir", "")};
|
||||
path.setFileName("clickhouse-odbc-bridge");
|
||||
|
||||
if (!path.isFile())
|
||||
throw Exception("clickhouse-odbc-bridge is not found", ErrorCodes::EXTERNAL_EXECUTABLE_NOT_FOUND);
|
||||
|
||||
std::stringstream command;
|
||||
command << path.toString() << ' ';
|
||||
command << "--http-port " << config.getUInt("odbc_bridge.port", DEFAULT_PORT) << ' ';
|
||||
command << "--listen-host " << config.getString("odbc_bridge.listen_host", DEFAULT_HOST) << ' ';
|
||||
command << "--http-timeout " << http_timeout.totalMicroseconds() << ' ';
|
||||
if (config.has("logger.odbc_bridge_log"))
|
||||
command << "--log-path " << config.getString("logger.odbc_bridge_log") << ' ';
|
||||
if (config.has("logger.odbc_bridge_errlog"))
|
||||
command << "--err-log-path " << config.getString("logger.odbc_bridge_errlog") << ' ';
|
||||
if (config.has("logger.odbc_bridge_level"))
|
||||
command << "--log-level " << config.getString("logger.odbc_bridge_level") << ' ';
|
||||
command << "&"; /// we don't want to wait this process
|
||||
|
||||
auto command_str = command.str();
|
||||
LOG_TRACE(log, "Starting clickhouse-odbc-bridge with command: " << command_str);
|
||||
|
||||
auto cmd = ShellCommand::execute(command_str);
|
||||
cmd->wait();
|
||||
}
|
||||
|
||||
std::vector<std::pair<std::string, std::string>> ODBCBridgeHelper::getURLParams(const std::string & cols, size_t max_block_size) const
|
||||
{
|
||||
std::vector<std::pair<std::string, std::string>> result;
|
||||
|
||||
result.emplace_back("connection_string", connection_string); /// already validated
|
||||
result.emplace_back("columns", cols);
|
||||
result.emplace_back("max_block_size", std::to_string(max_block_size));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool ODBCBridgeHelper::checkODBCBridgeIsRunning() const
|
||||
{
|
||||
try
|
||||
{
|
||||
ReadWriteBufferFromHTTP buf(ping_url, Poco::Net::HTTPRequest::HTTP_GET, nullptr);
|
||||
return checkString(ODBCBridgeHelper::PING_OK_ANSWER, buf);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void ODBCBridgeHelper::startODBCBridgeSync() const
|
||||
{
|
||||
if (!checkODBCBridgeIsRunning())
|
||||
{
|
||||
LOG_TRACE(log, "clickhouse-odbc-bridge is not running, will try to start it");
|
||||
startODBCBridge();
|
||||
bool started = false;
|
||||
for (size_t counter : ext::range(1, 20))
|
||||
{
|
||||
LOG_TRACE(log, "Checking clickhouse-odbc-bridge is running, try " << counter);
|
||||
if (checkODBCBridgeIsRunning())
|
||||
{
|
||||
started = true;
|
||||
break;
|
||||
}
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
||||
}
|
||||
if (!started)
|
||||
throw Exception("ODBCBridgeHelper: clickhouse-odbc-bridge is not responding", ErrorCodes::EXTERNAL_SERVER_IS_NOT_RESPONDING);
|
||||
}
|
||||
}
|
||||
|
||||
Poco::URI ODBCBridgeHelper::getMainURI() const
|
||||
{
|
||||
size_t bridge_port = config.getUInt("odbc_bridge.port", DEFAULT_PORT);
|
||||
std::string bridge_host = config.getString("odbc_bridge.host", DEFAULT_HOST);
|
||||
|
||||
Poco::URI main_uri;
|
||||
main_uri.setHost(bridge_host);
|
||||
main_uri.setPort(bridge_port);
|
||||
main_uri.setScheme("http");
|
||||
main_uri.setPath(MAIN_HANDLER);
|
||||
return main_uri;
|
||||
}
|
||||
|
||||
Poco::URI ODBCBridgeHelper::getColumnsInfoURI() const
|
||||
{
|
||||
size_t bridge_port = config.getUInt("odbc_bridge.port", DEFAULT_PORT);
|
||||
std::string bridge_host = config.getString("odbc_bridge.host", DEFAULT_HOST);
|
||||
|
||||
Poco::URI columns_info_uri;
|
||||
columns_info_uri.setHost(bridge_host);
|
||||
columns_info_uri.setPort(bridge_port);
|
||||
columns_info_uri.setScheme("http");
|
||||
columns_info_uri.setPath(COL_INFO_HANDLER);
|
||||
return columns_info_uri;
|
||||
}
|
||||
}
|
52
dbms/src/Common/ODBCBridgeHelper.h
Normal file
52
dbms/src/Common/ODBCBridgeHelper.h
Normal file
@ -0,0 +1,52 @@
|
||||
#pragma once
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Poco/Logger.h>
|
||||
#include <Poco/URI.h>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int EXTERNAL_EXECUTABLE_NOT_FOUND;
|
||||
}
|
||||
/** Helper for odbc-bridge, provide utility methods, not main request
|
||||
*/
|
||||
class ODBCBridgeHelper
|
||||
{
|
||||
private:
|
||||
|
||||
using Configuration = Poco::Util::AbstractConfiguration;
|
||||
|
||||
const Configuration & config;
|
||||
Poco::Timespan http_timeout;
|
||||
|
||||
std::string connection_string;
|
||||
|
||||
Poco::URI ping_url;
|
||||
|
||||
Poco::Logger * log = &Poco::Logger::get("ODBCBridgeHelper");
|
||||
|
||||
public:
|
||||
static constexpr inline size_t DEFAULT_PORT = 9018;
|
||||
|
||||
static constexpr inline auto DEFAULT_HOST = "localhost";
|
||||
static constexpr inline auto DEFAULT_FORMAT = "RowBinary";
|
||||
static constexpr inline auto PING_HANDLER = "/ping";
|
||||
static constexpr inline auto MAIN_HANDLER = "/";
|
||||
static constexpr inline auto COL_INFO_HANDLER = "/columns_info";
|
||||
static constexpr inline auto PING_OK_ANSWER = "Ok.";
|
||||
|
||||
ODBCBridgeHelper(const Configuration & config_, const Poco::Timespan & http_timeout_, const std::string & connection_string_);
|
||||
|
||||
std::vector<std::pair<std::string, std::string>> getURLParams(const std::string & cols, size_t max_block_size) const;
|
||||
bool checkODBCBridgeIsRunning() const;
|
||||
|
||||
void startODBCBridge() const;
|
||||
void startODBCBridgeSync() const;
|
||||
|
||||
Poco::URI getMainURI() const;
|
||||
Poco::URI getColumnsInfoURI() const;
|
||||
};
|
||||
}
|
@ -1,5 +1,7 @@
|
||||
#include <Common/ProfileEvents.h>
|
||||
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
|
||||
/// Available events. Add something here as you wish.
|
||||
#define APPLY_FOR_EVENTS(M) \
|
||||
@ -37,6 +39,11 @@
|
||||
M(CreatedReadBufferAIO) \
|
||||
M(CreatedWriteBufferOrdinary) \
|
||||
M(CreatedWriteBufferAIO) \
|
||||
M(DiskReadElapsedMicroseconds) \
|
||||
M(DiskWriteElapsedMicroseconds) \
|
||||
M(NetworkReceiveElapsedMicroseconds) \
|
||||
M(NetworkSendElapsedMicroseconds) \
|
||||
M(ThrottlerSleepMicroseconds) \
|
||||
\
|
||||
M(ReplicatedPartFetches) \
|
||||
M(ReplicatedPartFailedFetches) \
|
||||
@ -143,31 +150,93 @@
|
||||
M(RWLockAcquiredWriteLocks) \
|
||||
M(RWLockReadersWaitMilliseconds) \
|
||||
M(RWLockWritersWaitMilliseconds) \
|
||||
M(NetworkErrors) \
|
||||
\
|
||||
M(NetworkErrors)
|
||||
M(RealTimeMicroseconds) \
|
||||
M(UserTimeMicroseconds) \
|
||||
M(SystemTimeMicroseconds) \
|
||||
M(SoftPageFaults) \
|
||||
M(HardPageFaults) \
|
||||
M(VoluntaryContextSwitches) \
|
||||
M(InvoluntaryContextSwitches) \
|
||||
\
|
||||
M(OSIOWaitMicroseconds) \
|
||||
M(OSCPUWaitMicroseconds) \
|
||||
M(OSCPUVirtualTimeMicroseconds) \
|
||||
M(OSReadBytes) \
|
||||
M(OSWriteBytes) \
|
||||
M(OSReadChars) \
|
||||
M(OSWriteChars) \
|
||||
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
#define M(NAME) extern const Event NAME = __COUNTER__;
|
||||
|
||||
#define M(NAME) extern const Event NAME = __COUNTER__;
|
||||
APPLY_FOR_EVENTS(M)
|
||||
#undef M
|
||||
constexpr Event END = __COUNTER__;
|
||||
|
||||
/// Global variable, initialized by zeros.
|
||||
Counter global_counters_array[END] {};
|
||||
/// Initialize global counters statically
|
||||
Counters global_counters(global_counters_array);
|
||||
|
||||
const Event Counters::num_counters = END;
|
||||
|
||||
|
||||
Counters::Counters(VariableContext level, Counters * parent)
|
||||
: counters_holder(new Counter[num_counters] {}),
|
||||
parent(parent),
|
||||
level(level)
|
||||
{
|
||||
counters = counters_holder.get();
|
||||
}
|
||||
|
||||
void Counters::resetCounters()
|
||||
{
|
||||
if (counters)
|
||||
{
|
||||
for (Event i = 0; i < num_counters; ++i)
|
||||
counters[i].store(0, std::memory_order_relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
void Counters::reset()
|
||||
{
|
||||
parent = nullptr;
|
||||
resetCounters();
|
||||
}
|
||||
|
||||
Counters Counters::getPartiallyAtomicSnapshot() const
|
||||
{
|
||||
Counters res(VariableContext::Snapshot, nullptr);
|
||||
for (Event i = 0; i < num_counters; ++i)
|
||||
res.counters[i].store(counters[i].load(std::memory_order_relaxed), std::memory_order_relaxed);
|
||||
return res;
|
||||
}
|
||||
|
||||
const char * getDescription(Event event)
|
||||
{
|
||||
static const char * descriptions[] =
|
||||
{
|
||||
#define M(NAME) #NAME,
|
||||
APPLY_FOR_EVENTS(M)
|
||||
#undef M
|
||||
constexpr Event END = __COUNTER__;
|
||||
};
|
||||
|
||||
std::atomic<Count> counters[END] {}; /// Global variable, initialized by zeros.
|
||||
return descriptions[event];
|
||||
}
|
||||
|
||||
const char * getDescription(Event event)
|
||||
{
|
||||
static const char * descriptions[] =
|
||||
{
|
||||
#define M(NAME) #NAME,
|
||||
APPLY_FOR_EVENTS(M)
|
||||
#undef M
|
||||
};
|
||||
|
||||
return descriptions[event];
|
||||
}
|
||||
Event end() { return END; }
|
||||
|
||||
|
||||
void increment(Event event, Count amount)
|
||||
{
|
||||
DB::CurrentThread::getProfileEvents().increment(event, amount);
|
||||
}
|
||||
|
||||
Event end() { return END; }
|
||||
}
|
||||
|
||||
#undef APPLY_FOR_EVENTS
|
||||
|
@ -1,8 +1,9 @@
|
||||
#pragma once
|
||||
|
||||
#include <stddef.h>
|
||||
#include <Common/VariableContext.h>
|
||||
#include <atomic>
|
||||
|
||||
#include <memory>
|
||||
#include <stddef.h>
|
||||
|
||||
/** Implements global counters for various events happening in the application
|
||||
* - for high level profiling.
|
||||
@ -14,19 +15,80 @@ namespace ProfileEvents
|
||||
/// Event identifier (index in array).
|
||||
using Event = size_t;
|
||||
using Count = size_t;
|
||||
using Counter = std::atomic<Count>;
|
||||
class Counters;
|
||||
|
||||
/// Counters - how many times each event happened
|
||||
extern Counters global_counters;
|
||||
|
||||
class Counters
|
||||
{
|
||||
Counter * counters = nullptr;
|
||||
std::unique_ptr<Counter[]> counters_holder;
|
||||
/// Used to propagate increments
|
||||
Counters * parent = nullptr;
|
||||
|
||||
public:
|
||||
|
||||
VariableContext level = VariableContext::Thread;
|
||||
|
||||
/// By default, any instance have to increment global counters
|
||||
Counters(VariableContext level = VariableContext::Thread, Counters * parent = &global_counters);
|
||||
|
||||
/// Global level static initializer
|
||||
Counters(Counter * allocated_counters)
|
||||
: counters(allocated_counters), parent(nullptr), level(VariableContext::Global) {}
|
||||
|
||||
Counter & operator[] (Event event)
|
||||
{
|
||||
return counters[event];
|
||||
}
|
||||
|
||||
const Counter & operator[] (Event event) const
|
||||
{
|
||||
return counters[event];
|
||||
}
|
||||
|
||||
inline void increment(Event event, Count amount = 1)
|
||||
{
|
||||
Counters * current = this;
|
||||
do
|
||||
{
|
||||
current->counters[event].fetch_add(amount, std::memory_order_relaxed);
|
||||
current = current->parent;
|
||||
} while (current != nullptr);
|
||||
}
|
||||
|
||||
/// Every single value is fetched atomically, but not all values as a whole.
|
||||
Counters getPartiallyAtomicSnapshot() const;
|
||||
|
||||
/// Reset all counters to zero and reset parent.
|
||||
void reset();
|
||||
|
||||
/// Get parent (thread unsafe)
|
||||
Counters * getParent()
|
||||
{
|
||||
return parent;
|
||||
}
|
||||
|
||||
/// Set parent (thread unsafe)
|
||||
void setParent(Counters * parent_)
|
||||
{
|
||||
parent = parent_;
|
||||
}
|
||||
|
||||
/// Set all counters to zero
|
||||
void resetCounters();
|
||||
|
||||
static const Event num_counters;
|
||||
};
|
||||
|
||||
/// Increment a counter for event. Thread-safe.
|
||||
void increment(Event event, Count amount = 1);
|
||||
|
||||
/// Get text description of event by identifier. Returns statically allocated string.
|
||||
const char * getDescription(Event event);
|
||||
|
||||
/// Counters - how many times each event happened.
|
||||
extern std::atomic<Count> counters[];
|
||||
|
||||
/// Increment a counter for event. Thread-safe.
|
||||
inline void increment(Event event, Count amount = 1)
|
||||
{
|
||||
counters[event].fetch_add(amount, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
/// Get index just after last event identifier.
|
||||
Event end();
|
||||
}
|
||||
|
@ -86,7 +86,7 @@ RWLockFIFO::LockHandler RWLockFIFO::getLock(RWLockFIFO::Type type, RWLockFIFO::C
|
||||
|
||||
handler_ptr->it_client->info += "; " + client.info;
|
||||
|
||||
return handler_ptr;
|
||||
return handler_ptr;
|
||||
}
|
||||
|
||||
if (type == Type::Write || queue.empty() || queue.back().type == Type::Write)
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user