Merge branch 'master' of github.com:yandex/ClickHouse

This commit is contained in:
BayoNet 2019-03-07 08:33:50 +03:00
commit 0f3cef880c
37 changed files with 521 additions and 185 deletions

View File

@ -1,5 +1,5 @@
set(CLICKHOUSE_COPIER_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/ClusterCopier.cpp) set(CLICKHOUSE_COPIER_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/ClusterCopier.cpp)
set(CLICKHOUSE_COPIER_LINK PRIVATE clickhouse_functions clickhouse_aggregate_functions daemon) set(CLICKHOUSE_COPIER_LINK PRIVATE clickhouse_functions clickhouse_table_functions clickhouse_aggregate_functions daemon)
#set(CLICKHOUSE_COPIER_INCLUDE SYSTEM PRIVATE ...) #set(CLICKHOUSE_COPIER_INCLUDE SYSTEM PRIVATE ...)
clickhouse_program_add(copier) clickhouse_program_add(copier)

View File

@ -602,6 +602,8 @@ void HTTPHandler::processQuery(
}); });
} }
customizeContext(context);
executeQuery(*in, *used_output.out_maybe_delayed_and_compressed, /* allow_into_outfile = */ false, context, executeQuery(*in, *used_output.out_maybe_delayed_and_compressed, /* allow_into_outfile = */ false, context,
[&response] (const String & content_type) { response.setContentType(content_type); }, [&response] (const String & content_type) { response.setContentType(content_type); },
[&response] (const String & current_query_id) { response.add("Query-Id", current_query_id); }); [&response] (const String & current_query_id) { response.add("Query-Id", current_query_id); });

View File

@ -28,6 +28,9 @@ public:
void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override; void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override;
/// This method is called right before the query execution.
virtual void customizeContext(DB::Context& /* context */) {}
private: private:
struct Output struct Output
{ {

View File

@ -122,7 +122,7 @@ void TCPHandler::runImpl()
while (1) while (1)
{ {
/// Restore context of request. /// Set context of request.
query_context = connection_context; query_context = connection_context;
/// We are waiting for a packet from the client. Thus, every `POLL_INTERVAL` seconds check whether we need to shut down. /// We are waiting for a packet from the client. Thus, every `POLL_INTERVAL` seconds check whether we need to shut down.
@ -158,22 +158,22 @@ void TCPHandler::runImpl()
if (!receivePacket()) if (!receivePacket())
continue; continue;
query_scope.emplace(query_context); query_scope.emplace(*query_context);
send_exception_with_stack_trace = query_context.getSettingsRef().calculate_text_stack_trace; send_exception_with_stack_trace = query_context->getSettingsRef().calculate_text_stack_trace;
/// Should we send internal logs to client? /// Should we send internal logs to client?
if (client_revision >= DBMS_MIN_REVISION_WITH_SERVER_LOGS if (client_revision >= DBMS_MIN_REVISION_WITH_SERVER_LOGS
&& query_context.getSettingsRef().send_logs_level.value != LogsLevel::none) && query_context->getSettingsRef().send_logs_level.value != LogsLevel::none)
{ {
state.logs_queue = std::make_shared<InternalTextLogsQueue>(); state.logs_queue = std::make_shared<InternalTextLogsQueue>();
state.logs_queue->max_priority = Poco::Logger::parseLevel(query_context.getSettingsRef().send_logs_level.toString()); state.logs_queue->max_priority = Poco::Logger::parseLevel(query_context->getSettingsRef().send_logs_level.toString());
CurrentThread::attachInternalTextLogsQueue(state.logs_queue); CurrentThread::attachInternalTextLogsQueue(state.logs_queue);
} }
query_context.setExternalTablesInitializer([&global_settings, this] (Context & context) query_context->setExternalTablesInitializer([&global_settings, this] (Context & context)
{ {
if (&context != &query_context) if (&context != &*query_context)
throw Exception("Unexpected context in external tables initializer", ErrorCodes::LOGICAL_ERROR); throw Exception("Unexpected context in external tables initializer", ErrorCodes::LOGICAL_ERROR);
/// Get blocks of temporary tables /// Get blocks of temporary tables
@ -185,9 +185,11 @@ void TCPHandler::runImpl()
state.maybe_compressed_in.reset(); /// For more accurate accounting by MemoryTracker. state.maybe_compressed_in.reset(); /// For more accurate accounting by MemoryTracker.
}); });
customizeContext(*query_context);
bool may_have_embedded_data = client_revision >= DBMS_MIN_REVISION_WITH_CLIENT_SUPPORT_EMBEDDED_DATA; bool may_have_embedded_data = client_revision >= DBMS_MIN_REVISION_WITH_CLIENT_SUPPORT_EMBEDDED_DATA;
/// Processing Query /// Processing Query
state.io = executeQuery(state.query, query_context, false, state.stage, may_have_embedded_data); state.io = executeQuery(state.query, *query_context, false, state.stage, may_have_embedded_data);
if (state.io.out) if (state.io.out)
state.need_receive_data_for_insert = true; state.need_receive_data_for_insert = true;
@ -293,6 +295,9 @@ void TCPHandler::runImpl()
LOG_INFO(log, std::fixed << std::setprecision(3) LOG_INFO(log, std::fixed << std::setprecision(3)
<< "Processed in " << watch.elapsedSeconds() << " sec."); << "Processed in " << watch.elapsedSeconds() << " sec.");
/// It is important to destroy query context here. We do not want it to live arbitrarily longer than the query.
query_context.reset();
if (network_error) if (network_error)
break; break;
} }
@ -301,7 +306,7 @@ void TCPHandler::runImpl()
void TCPHandler::readData(const Settings & global_settings) void TCPHandler::readData(const Settings & global_settings)
{ {
const auto receive_timeout = query_context.getSettingsRef().receive_timeout.value; const auto receive_timeout = query_context->getSettingsRef().receive_timeout.value;
/// Poll interval should not be greater than receive_timeout /// Poll interval should not be greater than receive_timeout
const size_t default_poll_interval = global_settings.poll_interval.value * 1000000; const size_t default_poll_interval = global_settings.poll_interval.value * 1000000;
@ -364,8 +369,8 @@ void TCPHandler::processInsertQuery(const Settings & global_settings)
/// Send ColumnsDescription for insertion table /// Send ColumnsDescription for insertion table
if (client_revision >= DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA) if (client_revision >= DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA)
{ {
const auto & db_and_table = query_context.getInsertionTable(); const auto & db_and_table = query_context->getInsertionTable();
if (auto * columns = ColumnsDescription::loadFromContext(query_context, db_and_table.first, db_and_table.second)) if (auto * columns = ColumnsDescription::loadFromContext(*query_context, db_and_table.first, db_and_table.second))
sendTableColumns(*columns); sendTableColumns(*columns);
} }
@ -408,7 +413,7 @@ void TCPHandler::processOrdinaryQuery()
} }
else else
{ {
if (after_send_progress.elapsed() / 1000 >= query_context.getSettingsRef().interactive_delay) if (after_send_progress.elapsed() / 1000 >= query_context->getSettingsRef().interactive_delay)
{ {
/// Some time passed and there is a progress. /// Some time passed and there is a progress.
after_send_progress.restart(); after_send_progress.restart();
@ -417,7 +422,7 @@ void TCPHandler::processOrdinaryQuery()
sendLogs(); sendLogs();
if (async_in.poll(query_context.getSettingsRef().interactive_delay / 1000)) if (async_in.poll(query_context->getSettingsRef().interactive_delay / 1000))
{ {
/// There is the following result block. /// There is the following result block.
block = async_in.read(); block = async_in.read();
@ -645,11 +650,11 @@ void TCPHandler::receiveQuery()
state.is_empty = false; state.is_empty = false;
readStringBinary(state.query_id, *in); readStringBinary(state.query_id, *in);
query_context.setCurrentQueryId(state.query_id); query_context->setCurrentQueryId(state.query_id);
/// Client info /// Client info
{ {
ClientInfo & client_info = query_context.getClientInfo(); ClientInfo & client_info = query_context->getClientInfo();
if (client_revision >= DBMS_MIN_REVISION_WITH_CLIENT_INFO) if (client_revision >= DBMS_MIN_REVISION_WITH_CLIENT_INFO)
client_info.read(*in, client_revision); client_info.read(*in, client_revision);
@ -677,7 +682,7 @@ void TCPHandler::receiveQuery()
} }
/// Per query settings. /// Per query settings.
Settings & settings = query_context.getSettingsRef(); Settings & settings = query_context->getSettingsRef();
settings.deserialize(*in); settings.deserialize(*in);
/// Sync timeouts on client and server during current query to avoid dangling queries on server /// Sync timeouts on client and server during current query to avoid dangling queries on server
@ -715,16 +720,16 @@ bool TCPHandler::receiveData()
{ {
StoragePtr storage; StoragePtr storage;
/// If such a table does not exist, create it. /// If such a table does not exist, create it.
if (!(storage = query_context.tryGetExternalTable(external_table_name))) if (!(storage = query_context->tryGetExternalTable(external_table_name)))
{ {
NamesAndTypesList columns = block.getNamesAndTypesList(); NamesAndTypesList columns = block.getNamesAndTypesList();
storage = StorageMemory::create(external_table_name, storage = StorageMemory::create(external_table_name,
ColumnsDescription{columns, NamesAndTypesList{}, NamesAndTypesList{}, ColumnDefaults{}, ColumnComments{}, ColumnCodecs{}}); ColumnsDescription{columns, NamesAndTypesList{}, NamesAndTypesList{}, ColumnDefaults{}, ColumnComments{}, ColumnCodecs{}});
storage->startup(); storage->startup();
query_context.addExternalTable(external_table_name, storage); query_context->addExternalTable(external_table_name, storage);
} }
/// The data will be written directly to the table. /// The data will be written directly to the table.
state.io.out = storage->write(ASTPtr(), query_context); state.io.out = storage->write(ASTPtr(), *query_context);
} }
if (block) if (block)
state.io.out->write(block); state.io.out->write(block);
@ -763,10 +768,10 @@ void TCPHandler::initBlockOutput(const Block & block)
{ {
if (!state.maybe_compressed_out) if (!state.maybe_compressed_out)
{ {
std::string method = query_context.getSettingsRef().network_compression_method; std::string method = query_context->getSettingsRef().network_compression_method;
std::optional<int> level; std::optional<int> level;
if (method == "ZSTD") if (method == "ZSTD")
level = query_context.getSettingsRef().network_zstd_compression_level; level = query_context->getSettingsRef().network_zstd_compression_level;
if (state.compression == Protocol::Compression::Enable) if (state.compression == Protocol::Compression::Enable)
state.maybe_compressed_out = std::make_shared<CompressedWriteBuffer>( state.maybe_compressed_out = std::make_shared<CompressedWriteBuffer>(
@ -802,7 +807,7 @@ bool TCPHandler::isQueryCancelled()
if (state.is_cancelled || state.sent_all_data) if (state.is_cancelled || state.sent_all_data)
return true; return true;
if (after_check_cancelled.elapsed() / 1000 < query_context.getSettingsRef().interactive_delay) if (after_check_cancelled.elapsed() / 1000 < query_context->getSettingsRef().interactive_delay)
return false; return false;
after_check_cancelled.restart(); after_check_cancelled.restart();

View File

@ -95,6 +95,9 @@ public:
void run(); void run();
/// This method is called right before the query execution.
virtual void customizeContext(DB::Context & /*context*/) {}
private: private:
IServer & server; IServer & server;
Poco::Logger * log; Poco::Logger * log;
@ -106,7 +109,7 @@ private:
UInt64 client_revision = 0; UInt64 client_revision = 0;
Context connection_context; Context connection_context;
Context query_context; std::optional<Context> query_context;
/// Streams for reading/writing from/to client connection socket. /// Streams for reading/writing from/to client connection socket.
std::shared_ptr<ReadBuffer> in; std::shared_ptr<ReadBuffer> in;

View File

@ -10,6 +10,7 @@
#include <optional> #include <optional>
#include <ext/singleton.h> #include <ext/singleton.h>
#include <Poco/Event.h>
#include <Common/ThreadStatus.h> #include <Common/ThreadStatus.h>
@ -133,18 +134,19 @@ public:
template <typename Function, typename... Args> template <typename Function, typename... Args>
explicit ThreadFromGlobalPool(Function && func, Args &&... args) explicit ThreadFromGlobalPool(Function && func, Args &&... args)
: state(std::make_shared<Poco::Event>())
{ {
mutex = std::make_shared<std::mutex>(); /// NOTE: If this will throw an exception, the descructor won't be called.
/// The function object must be copyable, so we wrap lock_guard in shared_ptr.
GlobalThreadPool::instance().scheduleOrThrow([ GlobalThreadPool::instance().scheduleOrThrow([
mutex = mutex, state = state,
lock = std::make_shared<std::lock_guard<std::mutex>>(*mutex),
func = std::forward<Function>(func), func = std::forward<Function>(func),
args = std::make_tuple(std::forward<Args>(args)...)] args = std::make_tuple(std::forward<Args>(args)...)]
{ {
DB::ThreadStatus thread_status; {
std::apply(func, args); DB::ThreadStatus thread_status;
std::apply(func, args);
}
state->set();
}); });
} }
@ -157,7 +159,7 @@ public:
{ {
if (joinable()) if (joinable())
std::terminate(); std::terminate();
mutex = std::move(rhs.mutex); state = std::move(rhs.state);
return *this; return *this;
} }
@ -171,26 +173,26 @@ public:
{ {
if (!joinable()) if (!joinable())
std::terminate(); std::terminate();
{
std::lock_guard lock(*mutex); state->wait();
} state.reset();
mutex.reset();
} }
void detach() void detach()
{ {
if (!joinable()) if (!joinable())
std::terminate(); std::terminate();
mutex.reset(); state.reset();
} }
bool joinable() const bool joinable() const
{ {
return static_cast<bool>(mutex); return state != nullptr;
} }
private: private:
std::shared_ptr<std::mutex> mutex; /// Object must be moveable. /// The state used in this object and inside the thread job.
std::shared_ptr<Poco::Event> state;
}; };

View File

@ -1,4 +1,4 @@
#include <atomic> #include <mutex>
#include <iostream> #include <iostream>
#include <Common/ThreadPool.h> #include <Common/ThreadPool.h>
@ -10,8 +10,9 @@ void test()
{ {
Pool pool(10, 2, 10); Pool pool(10, 2, 10);
std::mutex mutex;
for (size_t i = 0; i < 10; ++i) for (size_t i = 0; i < 10; ++i)
pool.schedule([]{ std::cerr << '.'; }); pool.schedule([&]{ std::lock_guard lock(mutex); std::cerr << '.'; });
pool.wait(); pool.wait();
} }

View File

@ -116,7 +116,7 @@ public:
*/ */
size_t checkDepth(size_t max_depth) const { return checkDepthImpl(max_depth, max_depth); } size_t checkDepth(size_t max_depth) const { return checkDepthImpl(max_depth, max_depth); }
/// Do not allow to change the table while the blocks stream is alive. /// Do not allow to change the table while the blocks stream and its children are alive.
void addTableLock(const TableStructureReadLockPtr & lock) { table_locks.push_back(lock); } void addTableLock(const TableStructureReadLockPtr & lock) { table_locks.push_back(lock); }
/// Get information about execution speed. /// Get information about execution speed.
@ -242,6 +242,10 @@ public:
void enableExtremes() { enabled_extremes = true; } void enableExtremes() { enabled_extremes = true; }
protected: protected:
/// Order is important: `table_locks` must be destroyed after `children` so that tables from
/// which child streams read are protected by the locks during the lifetime of the child streams.
TableStructureReadLocks table_locks;
BlockInputStreams children; BlockInputStreams children;
std::shared_mutex children_mutex; std::shared_mutex children_mutex;
@ -268,8 +272,6 @@ protected:
} }
private: private:
TableStructureReadLocks table_locks;
bool enabled_extremes = false; bool enabled_extremes = false;
/// The limit on the number of rows/bytes has been exceeded, and you need to stop execution on the next `read` call, as if the thread has run out. /// The limit on the number of rows/bytes has been exceeded, and you need to stop execution on the next `read` call, as if the thread has run out.

View File

@ -498,6 +498,15 @@ bool DataTypeArray::equals(const IDataType & rhs) const
} }
size_t DataTypeArray::getNumberOfDimensions() const
{
const DataTypeArray * nested_array = typeid_cast<const DataTypeArray *>(nested.get());
if (!nested_array)
return 1;
return 1 + nested_array->getNumberOfDimensions(); /// Every modern C++ compiler optimizes tail recursion.
}
static DataTypePtr create(const ASTPtr & arguments) static DataTypePtr create(const ASTPtr & arguments)
{ {
if (!arguments || arguments->children.size() != 1) if (!arguments || arguments->children.size() != 1)

View File

@ -112,6 +112,9 @@ public:
} }
const DataTypePtr & getNestedType() const { return nested; } const DataTypePtr & getNestedType() const { return nested; }
/// 1 for plain array, 2 for array of arrays and so on.
size_t getNumberOfDimensions() const;
}; };
} }

View File

@ -3,6 +3,7 @@
#include <Formats/ODBCDriver2BlockOutputStream.h> #include <Formats/ODBCDriver2BlockOutputStream.h>
#include <IO/WriteBuffer.h> #include <IO/WriteBuffer.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <DataTypes/DataTypeLowCardinality.h>
namespace DB namespace DB
{ {
@ -82,8 +83,10 @@ void ODBCDriver2BlockOutputStream::writePrefix()
writeODBCString(out, "type"); writeODBCString(out, "type");
for (size_t i = 0; i < columns; ++i) for (size_t i = 0; i < columns; ++i)
{ {
const ColumnWithTypeAndName & col = header.getByPosition(i); auto type = header.getByPosition(i).type;
writeODBCString(out, col.type->getName()); if (type->lowCardinality())
type = recursiveRemoveLowCardinality(type);
writeODBCString(out, type->getName());
} }
} }

View File

@ -4,6 +4,7 @@
namespace DB namespace DB
{ {
class FunctionArrayEnumerateDenseRanked : public FunctionArrayEnumerateRankedExtended<FunctionArrayEnumerateDenseRanked> class FunctionArrayEnumerateDenseRanked : public FunctionArrayEnumerateRankedExtended<FunctionArrayEnumerateDenseRanked>
{ {
using Base = FunctionArrayEnumerateRankedExtended<FunctionArrayEnumerateDenseRanked>; using Base = FunctionArrayEnumerateRankedExtended<FunctionArrayEnumerateDenseRanked>;

View File

@ -1,61 +1,53 @@
#include <algorithm>
#include <Columns/ColumnConst.h>
#include "arrayEnumerateRanked.h" #include "arrayEnumerateRanked.h"
namespace DB namespace DB
{ {
ArraysDepths getArraysDepths(const ColumnsWithTypeAndName & arguments) ArraysDepths getArraysDepths(const ColumnsWithTypeAndName & arguments)
{ {
const size_t num_arguments = arguments.size(); const size_t num_arguments = arguments.size();
DepthType clear_depth = 1; DepthType clear_depth = 1;
DepthType max_array_depth = 0;
DepthTypes depths; DepthTypes depths;
/// function signature is the following:
/// f(c0, arr1, c1, arr2, c2, ...)
///
/// c0 is something called "clear_depth" here.
/// cN... - how deep to look into the corresponding arrN, (called "depths" here)
/// may be omitted - then it means "look at the full depth".
size_t array_num = 0; size_t array_num = 0;
DepthType last_array_depth = 0; DepthType prev_array_depth = 0;
for (size_t i = 0; i < num_arguments; ++i) for (size_t i = 0; i < num_arguments; ++i)
{ {
const auto type = arguments[i].type; const DataTypePtr & type = arguments[i].type;
const DataTypeArray * type_array = typeid_cast<const DataTypeArray *>(type.get());
if (isArray(type)) if (type_array)
{ {
if (depths.size() < array_num && last_array_depth) if (depths.size() < array_num && prev_array_depth)
{ {
depths.emplace_back(last_array_depth); depths.emplace_back(prev_array_depth);
last_array_depth = 0; prev_array_depth = 0;
} }
DepthType depth = 0; prev_array_depth = type_array->getNumberOfDimensions();
auto sub_type = type;
do
{
auto sub_type_array = typeid_cast<const DataTypeArray *>(sub_type.get());
if (!sub_type_array)
break;
sub_type = sub_type_array->getNestedType();
++depth;
} while (isArray(sub_type));
last_array_depth = depth;
++array_num; ++array_num;
} }
else
if (!arguments[i].column)
continue;
const IColumn * non_const = nullptr;
if (auto const_array_column = typeid_cast<const ColumnConst *>(arguments[i].column.get()))
non_const = const_array_column->getDataColumnPtr().get();
const auto array = typeid_cast<const ColumnArray *>(non_const ? non_const : arguments[i].column.get());
if (!array)
{ {
const auto & depth_column = arguments[i].column; const auto & depth_column = arguments[i].column;
if (depth_column && depth_column->isColumnConst()) if (depth_column && depth_column->isColumnConst())
{ {
auto value = depth_column->getUInt(0); UInt64 value = static_cast<const ColumnConst &>(*depth_column).getValue<UInt64>();
if (!value) if (!value)
throw Exception( throw Exception("Incorrect arguments for function arrayEnumerateUniqRanked or arrayEnumerateDenseRanked: depth ("
"Arguments for function arrayEnumerateUniqRanked/arrayEnumerateDenseRanked incorrect: depth (" + std::to_string(value) + ") cannot be less or equal 0.",
+ std::to_string(value) + ") cant be 0.",
ErrorCodes::BAD_ARGUMENTS); ErrorCodes::BAD_ARGUMENTS);
if (i == 0) if (i == 0)
@ -65,38 +57,35 @@ ArraysDepths getArraysDepths(const ColumnsWithTypeAndName & arguments)
else else
{ {
if (depths.size() >= array_num) if (depths.size() >= array_num)
{ throw Exception("Incorrect arguments for function arrayEnumerateUniqRanked or arrayEnumerateDenseRanked: depth ("
throw Exception( + std::to_string(value) + ") for missing array.",
"Arguments for function arrayEnumerateUniqRanked/arrayEnumerateDenseRanked incorrect: depth ("
+ std::to_string(value) + ") for missing array.",
ErrorCodes::BAD_ARGUMENTS); ErrorCodes::BAD_ARGUMENTS);
} if (value > prev_array_depth)
throw Exception(
"Arguments for function arrayEnumerateUniqRanked/arrayEnumerateDenseRanked incorrect: depth="
+ std::to_string(value) + " for array with depth=" + std::to_string(prev_array_depth) + ".",
ErrorCodes::BAD_ARGUMENTS);
depths.emplace_back(value); depths.emplace_back(value);
} }
} }
} }
} }
if (depths.size() < array_num) if (depths.size() < array_num)
{ depths.emplace_back(prev_array_depth);
depths.emplace_back(last_array_depth);
}
for (auto & depth : depths)
{
if (max_array_depth < depth)
max_array_depth = depth;
}
if (depths.empty()) if (depths.empty())
throw Exception( throw Exception("Incorrect arguments for function arrayEnumerateUniqRanked or arrayEnumerateDenseRanked: at least one array should be passed.",
"Arguments for function arrayEnumerateUniqRanked/arrayEnumerateDenseRanked incorrect: At least one array should be passed.",
ErrorCodes::BAD_ARGUMENTS); ErrorCodes::BAD_ARGUMENTS);
DepthType max_array_depth = 0;
for (auto depth : depths)
max_array_depth = std::max(depth, max_array_depth);
if (clear_depth > max_array_depth) if (clear_depth > max_array_depth)
throw Exception( throw Exception("Incorrect arguments for function arrayEnumerateUniqRanked or arrayEnumerateDenseRanked: clear_depth ("
"Arguments for function arrayEnumerateUniqRanked/arrayEnumerateDenseRanked incorrect: clear_depth (" + std::to_string(clear_depth) + ") cant be larger than max_array_depth (" + std::to_string(max_array_depth) + ").",
+ std::to_string(clear_depth) + ") cant be larger than max_array_depth (" + std::to_string(max_array_depth) + ").",
ErrorCodes::BAD_ARGUMENTS); ErrorCodes::BAD_ARGUMENTS);
return {clear_depth, depths, max_array_depth}; return {clear_depth, depths, max_array_depth};

View File

@ -12,6 +12,47 @@
#include <Common/HashTable/ClearableHashMap.h> #include <Common/HashTable/ClearableHashMap.h>
/** The function will enumerate distinct values of the passed multidimensional arrays looking inside at the specified depths.
* This is very unusual function made as a special order for Yandex.Metrica.
*
* arrayEnumerateUniqRanked(['hello', 'world', 'hello']) = [1, 1, 2]
* - it returns similar structured array containing number of occurence of the corresponding value.
*
* arrayEnumerateUniqRanked([['hello', 'world'], ['hello'], ['hello']], 1) = [1, 1, 2]
* - look at the depth 1 by default. Elements are ['hello', 'world'], ['hello'], ['hello'].
*
* arrayEnumerateUniqRanked([['hello', 'world'], ['hello'], ['hello']]) = [[1,1],[2],[3]]
* - look at the depth 2. Return similar structured array.
* arrayEnumerateUniqRanked([['hello', 'world'], ['hello'], ['hello']], 2) = [[1,1],[2],[3]]
* - look at the maximum depth by default.
*
* We may pass multiple array arguments. Their elements will be processed as zipped to tuple.
*
* arrayEnumerateUniqRanked(['hello', 'hello', 'world', 'world'], ['a', 'b', 'b', 'b']) = [1, 1, 1, 2]
*
* We may provide arrays of different depths to look at different arguments.
*
* arrayEnumerateUniqRanked([['hello', 'world'], ['hello'], ['world'], ['world']], ['a', 'b', 'b', 'b']) = [[1,1],[1],[1],[2]]
* arrayEnumerateUniqRanked([['hello', 'world'], ['hello'], ['world'], ['world']], 1, ['a', 'b', 'b', 'b'], 1) = [1, 1, 1, 2]
*
* When depths are different, we process less deep arrays as promoted to deeper arrays of similar structure by duplicating elements.
*
* arrayEnumerateUniqRanked(
* [['hello', 'world'], ['hello'], ['world'], ['world']],
* ['a', 'b', 'b', 'b'])
* = arrayEnumerateUniqRanked(
* [['hello', 'world'], ['hello'], ['world'], ['world']],
* [['a', 'a'], ['b'], ['b'], ['b']])
*
* Finally, we can provide extra first argument named "clear_depth" (it can be considered as 1 by default).
* Array elements at the clear_depth will be enumerated as separate elements (enumeration counter is reset for each new element).
*
* SELECT arrayEnumerateUniqRanked(1, [['hello', 'world'], ['hello'], ['world'], ['world']]) = [[1,1],[2],[2],[3]]
* SELECT arrayEnumerateUniqRanked(2, [['hello', 'world'], ['hello'], ['world'], ['world']]) = [[1,1],[1],[1],[1]]
* SELECT arrayEnumerateUniqRanked(1, [['hello', 'world', 'hello'], ['hello'], ['world'], ['world']]) = [[1,1,2],[3],[2],[3]]
* SELECT arrayEnumerateUniqRanked(2, [['hello', 'world', 'hello'], ['hello'], ['world'], ['world']]) = [[1,1,2],[1],[1],[1]]
*/
namespace DB namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
@ -27,12 +68,21 @@ class FunctionArrayEnumerateDenseRanked;
using DepthType = uint32_t; using DepthType = uint32_t;
using DepthTypes = std::vector<DepthType>; using DepthTypes = std::vector<DepthType>;
struct ArraysDepths struct ArraysDepths
{ {
/// Enumerate elements at the specified level separately.
DepthType clear_depth; DepthType clear_depth;
/// Effective depth is the array depth by default or lower value, specified as a constant argument following the array.
/// f([[1, 2], [3]]) - effective depth is 2.
/// f([[1, 2], [3]], 1) - effective depth is 1.
DepthTypes depths; DepthTypes depths;
/// Maximum effective depth.
DepthType max_array_depth; DepthType max_array_depth;
}; };
/// Return depth info about passed arrays /// Return depth info about passed arrays
ArraysDepths getArraysDepths(const ColumnsWithTypeAndName & arguments); ArraysDepths getArraysDepths(const ColumnsWithTypeAndName & arguments);
@ -55,7 +105,9 @@ public:
+ ", should be at least 1.", + ", should be at least 1.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
const auto & arrays_depths = getArraysDepths(arguments); const ArraysDepths arrays_depths = getArraysDepths(arguments);
/// Return type is the array of the depth as the maximum effective depth of arguments, containing UInt32.
DataTypePtr type = std::make_shared<DataTypeUInt32>(); DataTypePtr type = std::make_shared<DataTypeUInt32>();
for (DepthType i = 0; i < arrays_depths.max_array_depth; ++i) for (DepthType i = 0; i < arrays_depths.max_array_depth; ++i)
@ -79,15 +131,15 @@ private:
/// Hash a set of keys into a UInt128 value. /// Hash a set of keys into a UInt128 value.
static inline UInt128 ALWAYS_INLINE hash128depths(const std::vector<size_t> & indexes, const ColumnRawPtrs & key_columns) static inline UInt128 ALWAYS_INLINE hash128depths(const std::vector<size_t> & indices, const ColumnRawPtrs & key_columns)
{ {
UInt128 key; UInt128 key;
SipHash hash; SipHash hash;
for (size_t j = 0, keys_size = key_columns.size(); j < keys_size; ++j) for (size_t j = 0, keys_size = key_columns.size(); j < keys_size; ++j)
{ {
// Debug: const auto & field = (*key_columns[j])[indexes[j]]; DUMP(j, indexes[j], field); // Debug: const auto & field = (*key_columns[j])[indices[j]]; DUMP(j, indices[j], field);
key_columns[j]->updateHashWithValue(indexes[j], hash); key_columns[j]->updateHashWithValue(indices[j], hash);
} }
hash.get128(key.low, key.high); hash.get128(key.low, key.high);
@ -111,9 +163,11 @@ void FunctionArrayEnumerateRankedExtended<Derived>::executeImpl(
for (size_t i = 0; i < arguments.size(); ++i) for (size_t i = 0; i < arguments.size(); ++i)
args.emplace_back(block.getByPosition(arguments[i])); args.emplace_back(block.getByPosition(arguments[i]));
const auto & arrays_depths = getArraysDepths(args); const ArraysDepths arrays_depths = getArraysDepths(args);
auto get_array_column = [&](const auto & column) -> const DB::ColumnArray * { /// If the column is Array - return it. If the const Array - materialize it, keep ownership and return.
auto get_array_column = [&](const auto & column) -> const DB::ColumnArray *
{
const ColumnArray * array = checkAndGetColumn<ColumnArray>(column); const ColumnArray * array = checkAndGetColumn<ColumnArray>(column);
if (!array) if (!array)
{ {
@ -146,7 +200,7 @@ void FunctionArrayEnumerateRankedExtended<Derived>::executeImpl(
if (*offsets_by_depth[0] != array->getOffsets()) if (*offsets_by_depth[0] != array->getOffsets())
{ {
throw Exception( throw Exception(
"Lengths and depths of all arrays passed to " + getName() + " must be equal.", "Lengths and effective depths of all arrays passed to " + getName() + " must be equal.",
ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH);
} }
} }
@ -170,7 +224,7 @@ void FunctionArrayEnumerateRankedExtended<Derived>::executeImpl(
if (*offsets_by_depth[col_depth] != array->getOffsets()) if (*offsets_by_depth[col_depth] != array->getOffsets())
{ {
throw Exception( throw Exception(
"Lengths and depths of all arrays passed to " + getName() + " must be equal.", "Lengths and effective depths of all arrays passed to " + getName() + " must be equal.",
ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH);
} }
} }
@ -180,7 +234,7 @@ void FunctionArrayEnumerateRankedExtended<Derived>::executeImpl(
{ {
throw Exception( throw Exception(
getName() + ": Passed array number " + std::to_string(array_num) + " depth (" getName() + ": Passed array number " + std::to_string(array_num) + " depth ("
+ std::to_string(arrays_depths.depths[array_num]) + ") more than actual array depth (" + std::to_string(col_depth) + std::to_string(arrays_depths.depths[array_num]) + ") is more than the actual array depth (" + std::to_string(col_depth)
+ ").", + ").",
ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH);
} }
@ -251,6 +305,7 @@ void FunctionArrayEnumerateRankedExtended<Derived>::executeMethodImpl(
const ArraysDepths & arrays_depths, const ArraysDepths & arrays_depths,
ColumnUInt32::Container & res_values) ColumnUInt32::Container & res_values)
{ {
/// Offsets at the depth we want to look.
const size_t current_offset_depth = arrays_depths.max_array_depth; const size_t current_offset_depth = arrays_depths.max_array_depth;
const auto & offsets = *offsets_by_depth[current_offset_depth - 1]; const auto & offsets = *offsets_by_depth[current_offset_depth - 1];
@ -264,22 +319,24 @@ void FunctionArrayEnumerateRankedExtended<Derived>::executeMethodImpl(
HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>; HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>;
Map indices; Map indices;
std::vector<size_t> indexes_by_depth(arrays_depths.max_array_depth); std::vector<size_t> indices_by_depth(arrays_depths.max_array_depth);
std::vector<size_t> current_offset_n_by_depth(arrays_depths.max_array_depth); std::vector<size_t> current_offset_n_by_depth(arrays_depths.max_array_depth);
UInt32 rank = 0; UInt32 rank = 0;
std::vector<size_t> columns_indexes(columns.size()); std::vector<size_t> columns_indices(columns.size());
for (size_t off : offsets) for (size_t off : offsets)
{ {
bool want_clear = false; bool want_clear = false;
/// For each element at the depth we want to look.
for (size_t j = prev_off; j < off; ++j) for (size_t j = prev_off; j < off; ++j)
{ {
for (size_t col_n = 0; col_n < columns.size(); ++col_n) for (size_t col_n = 0; col_n < columns.size(); ++col_n)
columns_indexes[col_n] = indexes_by_depth[arrays_depths.depths[col_n] - 1]; columns_indices[col_n] = indices_by_depth[arrays_depths.depths[col_n] - 1];
auto hash = hash128depths(columns_indexes, columns); auto hash = hash128depths(columns_indices, columns);
if constexpr (std::is_same_v<Derived, FunctionArrayEnumerateUniqRanked>) if constexpr (std::is_same_v<Derived, FunctionArrayEnumerateUniqRanked>)
{ {
@ -297,13 +354,13 @@ void FunctionArrayEnumerateRankedExtended<Derived>::executeMethodImpl(
res_values[j] = idx; res_values[j] = idx;
} }
// Debug: DUMP(off, prev_off, j, columns_indexes, res_values[j], columns); // Debug: DUMP(off, prev_off, j, columns_indices, res_values[j], columns);
for (int depth = current_offset_depth - 1; depth >= 0; --depth) for (int depth = current_offset_depth - 1; depth >= 0; --depth)
{ {
++indexes_by_depth[depth]; ++indices_by_depth[depth];
if (indexes_by_depth[depth] == (*offsets_by_depth[depth])[current_offset_n_by_depth[depth]]) if (indices_by_depth[depth] == (*offsets_by_depth[depth])[current_offset_n_by_depth[depth]])
{ {
if (static_cast<int>(arrays_depths.clear_depth) == depth + 1) if (static_cast<int>(arrays_depths.clear_depth) == depth + 1)
want_clear = true; want_clear = true;
@ -315,6 +372,7 @@ void FunctionArrayEnumerateRankedExtended<Derived>::executeMethodImpl(
} }
} }
} }
if (want_clear) if (want_clear)
{ {
want_clear = false; want_clear = false;

View File

@ -1818,6 +1818,19 @@ void Context::addXDBCBridgeCommand(std::unique_ptr<ShellCommand> cmd)
shared->bridge_commands.emplace_back(std::move(cmd)); shared->bridge_commands.emplace_back(std::move(cmd));
} }
IHostContextPtr & Context::getHostContext()
{
return host_context;
}
const IHostContextPtr & Context::getHostContext() const
{
return host_context;
}
std::shared_ptr<ActionLocksManager> Context::getActionLocksManager() std::shared_ptr<ActionLocksManager> Context::getActionLocksManager()
{ {
auto lock = getLock(); auto lock = getLock();

View File

@ -99,6 +99,15 @@ using TableAndCreateASTs = std::map<String, TableAndCreateAST>;
/// Callback for external tables initializer /// Callback for external tables initializer
using ExternalTablesInitializer = std::function<void(Context &)>; using ExternalTablesInitializer = std::function<void(Context &)>;
/// An empty interface for an arbitrary object that may be attached by a shared pointer
/// to query context, when using ClickHouse as a library.
struct IHostContext
{
virtual ~IHostContext() = default;
};
using IHostContextPtr = std::shared_ptr<IHostContext>;
/** A set of known objects that can be used in the query. /** A set of known objects that can be used in the query.
* Consists of a shared part (always common to all sessions and queries) * Consists of a shared part (always common to all sessions and queries)
* and copied part (which can be its own for each session or query). * and copied part (which can be its own for each session or query).
@ -139,6 +148,12 @@ private:
using DatabasePtr = std::shared_ptr<IDatabase>; using DatabasePtr = std::shared_ptr<IDatabase>;
using Databases = std::map<String, std::shared_ptr<IDatabase>>; using Databases = std::map<String, std::shared_ptr<IDatabase>>;
IHostContextPtr host_context; /// Arbitrary object that may used to attach some host specific information to query context,
/// when using ClickHouse as a library in some project. For example, it may contain host
/// logger, some query identification information, profiling guards, etc. This field is
/// to be customized in HTTP and TCP servers by overloading the customizeContext(DB::Context&)
/// methods.
/// Use copy constructor or createGlobal() instead /// Use copy constructor or createGlobal() instead
Context(); Context();
@ -452,6 +467,9 @@ public:
/// Add started bridge command. It will be killed after context destruction /// Add started bridge command. It will be killed after context destruction
void addXDBCBridgeCommand(std::unique_ptr<ShellCommand> cmd); void addXDBCBridgeCommand(std::unique_ptr<ShellCommand> cmd);
IHostContextPtr & getHostContext();
const IHostContextPtr & getHostContext() const;
private: private:
/** Check if the current client has access to the specified database. /** Check if the current client has access to the specified database.
* If access is denied, throw an exception. * If access is denied, throw an exception.

View File

@ -103,7 +103,7 @@ struct Settings
\ \
M(SettingUInt64, optimize_min_equality_disjunction_chain_length, 3, "The minimum length of the expression `expr = x1 OR ... expr = xN` for optimization ") \ M(SettingUInt64, optimize_min_equality_disjunction_chain_length, 3, "The minimum length of the expression `expr = x1 OR ... expr = xN` for optimization ") \
\ \
M(SettingUInt64, min_bytes_to_use_direct_io, 0, "The minimum number of bytes for input/output operations is bypassing the page cache. 0 - disabled.") \ M(SettingUInt64, min_bytes_to_use_direct_io, 0, "The minimum number of bytes for reading the data with O_DIRECT option during SELECT queries execution. 0 - disabled.") \
\ \
M(SettingBool, force_index_by_date, 0, "Throw an exception if there is a partition key in a table, and it is not used.") \ M(SettingBool, force_index_by_date, 0, "Throw an exception if there is a partition key in a table, and it is not used.") \
M(SettingBool, force_primary_key, 0, "Throw an exception if there is primary key in a table, and it is not used.") \ M(SettingBool, force_primary_key, 0, "Throw an exception if there is primary key in a table, and it is not used.") \

View File

@ -11,6 +11,7 @@
#include <Storages/MergeTree/MarkRange.h> #include <Storages/MergeTree/MarkRange.h>
#include <Interpreters/ExpressionActions.h> #include <Interpreters/ExpressionActions.h>
#include <Parsers/ASTIndexDeclaration.h> #include <Parsers/ASTIndexDeclaration.h>
#include <DataTypes/DataTypeLowCardinality.h>
constexpr auto INDEX_FILE_PREFIX = "skp_idx_"; constexpr auto INDEX_FILE_PREFIX = "skp_idx_";

View File

@ -18,7 +18,7 @@ namespace ErrorCodes
} }
/// 0b11 -- can be true and false at the same time /// 0b11 -- can be true and false at the same time
const Field UNKNOWN_FIELD(3); const Field UNKNOWN_FIELD(3u);
MergeTreeSetIndexGranule::MergeTreeSetIndexGranule(const MergeTreeSetSkippingIndex & index) MergeTreeSetIndexGranule::MergeTreeSetIndexGranule(const MergeTreeSetSkippingIndex & index)
@ -47,7 +47,16 @@ void MergeTreeSetIndexGranule::serializeBinary(WriteBuffer & ostr) const
for (size_t i = 0; i < index.columns.size(); ++i) for (size_t i = 0; i < index.columns.size(); ++i)
{ {
const auto & type = index.data_types[i]; const auto & type = index.data_types[i];
type->serializeBinaryBulk(*columns[i], ostr, 0, size());
IDataType::SerializeBinaryBulkSettings settings;
settings.getter = [&ostr](IDataType::SubstreamPath) -> WriteBuffer * { return &ostr; };
settings.position_independent_encoding = false;
settings.low_cardinality_max_dictionary_size = 0;
IDataType::SerializeBinaryBulkStatePtr state;
type->serializeBinaryBulkStatePrefix(settings, state);
type->serializeBinaryBulkWithMultipleStreams(*columns[i], 0, size(), settings, state);
type->serializeBinaryBulkStateSuffix(settings, state);
} }
} }
@ -66,11 +75,21 @@ void MergeTreeSetIndexGranule::deserializeBinary(ReadBuffer & istr)
size_type->deserializeBinary(field_rows, istr); size_type->deserializeBinary(field_rows, istr);
size_t rows_to_read = field_rows.get<size_t>(); size_t rows_to_read = field_rows.get<size_t>();
if (rows_to_read == 0)
return;
for (size_t i = 0; i < index.columns.size(); ++i) for (size_t i = 0; i < index.columns.size(); ++i)
{ {
const auto & type = index.data_types[i]; const auto & type = index.data_types[i];
auto new_column = type->createColumn(); auto new_column = type->createColumn();
type->deserializeBinaryBulk(*new_column, istr, rows_to_read, 0);
IDataType::DeserializeBinaryBulkSettings settings;
settings.getter = [&](IDataType::SubstreamPath) -> ReadBuffer * { return &istr; };
settings.position_independent_encoding = false;
IDataType::DeserializeBinaryBulkStatePtr state;
type->deserializeBinaryBulkStatePrefix(settings, state);
type->deserializeBinaryBulkWithMultipleStreams(*new_column, rows_to_read, settings, state);
block.insert(ColumnWithTypeAndName(new_column->getPtr(), type, index.columns[i])); block.insert(ColumnWithTypeAndName(new_column->getPtr(), type, index.columns[i]));
} }
@ -177,10 +196,24 @@ bool SetIndexCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule)
Block result = granule->getElementsBlock(); Block result = granule->getElementsBlock();
actions->execute(result); actions->execute(result);
const auto & column = result.getByName(expression_ast->getColumnName()).column; auto column = result.getByName(expression_ast->getColumnName()).column->convertToFullColumnIfLowCardinality();
auto * col_uint8 = typeid_cast<const ColumnUInt8 *>(column.get());
const NullMap * null_map = nullptr;
if (auto * col_nullable = typeid_cast<const ColumnNullable *>(column.get()))
{
col_uint8 = typeid_cast<const ColumnUInt8 *>(&col_nullable->getNestedColumn());
null_map = &col_nullable->getNullMapData();
}
if (!col_uint8)
throw Exception("ColumnUInt8 expected as Set index condition result.", ErrorCodes::LOGICAL_ERROR);
auto & condition = col_uint8->getData();
for (size_t i = 0; i < column->size(); ++i) for (size_t i = 0; i < column->size(); ++i)
if (column->getInt(i) & 1) if ((!null_map || (*null_map)[i] == 0) && condition[i] & 1)
return true; return true;
return false; return false;

View File

@ -0,0 +1,30 @@
1 a
-
2 b
-
--
1 a
-
2 b
-
--
1 a
-
2 b
-
----
1 a
-
2 b
-
--
1 a
-
2 b
-
--
1 a
-
2 b
-
----

View File

@ -0,0 +1,69 @@
SET allow_experimental_data_skipping_indices=1;
drop table if exists test.nullable_set_index;
create table test.nullable_set_index (a UInt64, b Nullable(String), INDEX b_index b TYPE set(0) GRANULARITY 8192) engine = MergeTree order by a;
insert into test.nullable_set_index values (1, 'a');
insert into test.nullable_set_index values (2, 'b');
select * from test.nullable_set_index where b = 'a';
select '-';
select * from test.nullable_set_index where b = 'b';
select '-';
select * from test.nullable_set_index where b = 'c';
select '--';
drop table if exists test.nullable_set_index;
create table test.nullable_set_index (a UInt64, b Nullable(String), INDEX b_index b TYPE set(1) GRANULARITY 8192) engine = MergeTree order by a;
insert into test.nullable_set_index values (1, 'a');
insert into test.nullable_set_index values (2, 'b');
select * from test.nullable_set_index where b = 'a';
select '-';
select * from test.nullable_set_index where b = 'b';
select '-';
select * from test.nullable_set_index where b = 'c';
select '--';
drop table if exists test.nullable_set_index;
create table test.nullable_set_index (a UInt64, b Nullable(String), INDEX b_index b TYPE set(0) GRANULARITY 8192) engine = MergeTree order by a;
insert into test.nullable_set_index values (1, 'a'), (2, 'b');
select * from test.nullable_set_index where b = 'a';
select '-';
select * from test.nullable_set_index where b = 'b';
select '-';
select * from test.nullable_set_index where b = 'c';
select '----';
drop table if exists test.nullable_set_index;
create table test.nullable_set_index (a UInt64, b LowCardinality(Nullable(String)), INDEX b_index b TYPE set(0) GRANULARITY 8192) engine = MergeTree order by a;
insert into test.nullable_set_index values (1, 'a');
insert into test.nullable_set_index values (2, 'b');
select * from test.nullable_set_index where b = 'a';
select '-';
select * from test.nullable_set_index where b = 'b';
select '-';
select * from test.nullable_set_index where b = 'c';
select '--';
drop table if exists test.nullable_set_index;
create table test.nullable_set_index (a UInt64, b LowCardinality(Nullable(String)), INDEX b_index b TYPE set(1) GRANULARITY 8192) engine = MergeTree order by a;
insert into test.nullable_set_index values (1, 'a');
insert into test.nullable_set_index values (2, 'b');
select * from test.nullable_set_index where b = 'a';
select '-';
select * from test.nullable_set_index where b = 'b';
select '-';
select * from test.nullable_set_index where b = 'c';
select '--';
drop table if exists test.nullable_set_index;
create table test.nullable_set_index (a UInt64, b LowCardinality(Nullable(String)), INDEX b_index b TYPE set(0) GRANULARITY 8192) engine = MergeTree order by a;
insert into test.nullable_set_index values (1, 'a'), (2, 'b');
select * from test.nullable_set_index where b = 'a';
select '-';
select * from test.nullable_set_index where b = 'b';
select '-';
select * from test.nullable_set_index where b = 'c';
select '----';
drop table if exists test.nullable_set_index;

View File

@ -0,0 +1,20 @@
SET allow_experimental_data_skipping_indices=1;
drop table if exists test.null_lc_set_index;
CREATE TABLE test.null_lc_set_index (
timestamp DateTime,
action LowCardinality(Nullable(String)),
user LowCardinality(Nullable(String)),
INDEX test_user_idx (user) TYPE set(0) GRANULARITY 8192
) ENGINE=MergeTree
PARTITION BY toYYYYMMDD(timestamp)
ORDER BY (timestamp, action, cityHash64(user))
SAMPLE BY cityHash64(user);
INSERT INTO test.null_lc_set_index VALUES (1550883010, 'subscribe', 'alice');
INSERT INTO test.null_lc_set_index VALUES (1550883020, 'follow', 'bob');
SELECT action, user FROM test.null_lc_set_index WHERE user = 'alice';
drop table if exists test.null_lc_set_index;

View File

@ -145,11 +145,11 @@ SELECT arrayEnumerateUniqRanked(); -- { serverError 42 }
SELECT arrayEnumerateUniqRanked([]); SELECT arrayEnumerateUniqRanked([]);
SELECT arrayEnumerateUniqRanked(1); -- { serverError 36 } SELECT arrayEnumerateUniqRanked(1); -- { serverError 36 }
SELECT arrayEnumerateUniqRanked(2,[]); -- { serverError 36 } SELECT arrayEnumerateUniqRanked(2,[]); -- { serverError 36 }
SELECT arrayEnumerateUniqRanked(2,[],2); -- { serverError 190 } SELECT arrayEnumerateUniqRanked(2,[],2); -- { serverError 36 }
SELECT arrayEnumerateUniqRanked(2,[],[]); -- { serverError 36 } SELECT arrayEnumerateUniqRanked(2,[],[]); -- { serverError 36 }
SELECT arrayEnumerateUniqRanked(2,[],[],3); -- { serverError 190 } SELECT arrayEnumerateUniqRanked(2,[],[],3); -- { serverError 36 }
SELECT arrayEnumerateUniqRanked([],2); -- { serverError 190 } SELECT arrayEnumerateUniqRanked([],2); -- { serverError 36 }
SELECT arrayEnumerateUniqRanked([],2,[]); -- { serverError 190 } SELECT arrayEnumerateUniqRanked([],2,[]); -- { serverError 36 }
SELECT arrayEnumerateUniqRanked(0,[],0); -- { serverError 36 } SELECT arrayEnumerateUniqRanked(0,[],0); -- { serverError 36 }
SELECT arrayEnumerateUniqRanked(0,0,0); -- { serverError 36 } SELECT arrayEnumerateUniqRanked(0,0,0); -- { serverError 36 }
SELECT arrayEnumerateUniqRanked(1,1,1); -- { serverError 36 } SELECT arrayEnumerateUniqRanked(1,1,1); -- { serverError 36 }
@ -170,5 +170,13 @@ SELECT arrayEnumerateUniqRanked([1,2], 1, 2); -- { serverError 36 }
SELECT arrayEnumerateUniqRanked([1,2], 1, 3, 4, 5); -- { serverError 36 } SELECT arrayEnumerateUniqRanked([1,2], 1, 3, 4, 5); -- { serverError 36 }
SELECT arrayEnumerateUniqRanked([1,2], 1, 3, [4], 5); -- { serverError 36 } SELECT arrayEnumerateUniqRanked([1,2], 1, 3, [4], 5); -- { serverError 36 }
SELECT arrayEnumerateDenseRanked([[[[[[[[[[42]]]]]]]]]]); SELECT arrayEnumerateDenseRanked([[[[[[[[[[42]]]]]]]]]]);
SELECT arrayEnumerateUniqRanked('wat', [1,2]); -- { serverError 48 } SELECT arrayEnumerateUniqRanked('wat', [1,2]); -- { serverError 170 }
SELECT arrayEnumerateUniqRanked(1, [1,2], 'boom'); -- { serverError 48 } SELECT arrayEnumerateUniqRanked(1, [1,2], 'boom'); -- { serverError 170 }
SELECT arrayEnumerateDenseRanked(['\0'], -8363126); -- { serverError 170 }
SELECT arrayEnumerateDenseRanked(-10, ['\0'], -8363126); -- { serverError 170 }
SELECT arrayEnumerateDenseRanked(1, ['\0'], -8363126); -- { serverError 170 }
SELECT arrayEnumerateDenseRanked(-101, ['\0']); -- { serverError 170 }
SELECT arrayEnumerateDenseRanked(1.1, [10,20,10,30]); -- { serverError 170 }
SELECT arrayEnumerateDenseRanked([10,20,10,30], 0.4); -- { serverError 170 }
SELECT arrayEnumerateDenseRanked([10,20,10,30], 1.8); -- { serverError 170 }
SELECT arrayEnumerateUniqRanked(1, [], 1000000000); -- { serverError 36 }

View File

@ -10,20 +10,37 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test.cannot_kill_query"
$CLICKHOUSE_CLIENT -q "CREATE TABLE test.cannot_kill_query (x UInt64) ENGINE = MergeTree ORDER BY x" &> /dev/null $CLICKHOUSE_CLIENT -q "CREATE TABLE test.cannot_kill_query (x UInt64) ENGINE = MergeTree ORDER BY x" &> /dev/null
$CLICKHOUSE_CLIENT -q "INSERT INTO test.cannot_kill_query SELECT * FROM numbers(10000000)" &> /dev/null $CLICKHOUSE_CLIENT -q "INSERT INTO test.cannot_kill_query SELECT * FROM numbers(10000000)" &> /dev/null
# This SELECT query will run for a long time. It's used as bloker for ALTER query. It will be killed with SYNC kill.
query_for_pending="SELECT count() FROM test.cannot_kill_query WHERE NOT ignore(sleep(1)) SETTINGS max_threads=1, max_block_size=1" query_for_pending="SELECT count() FROM test.cannot_kill_query WHERE NOT ignore(sleep(1)) SETTINGS max_threads=1, max_block_size=1"
$CLICKHOUSE_CLIENT -q "$query_for_pending" &>/dev/null & $CLICKHOUSE_CLIENT -q "$query_for_pending" &>/dev/null &
sleep 1 # queries should be in strict order
# This ALTER query will wait until $query_for_pending finished. Also it will block $query_to_kill.
$CLICKHOUSE_CLIENT -q "ALTER TABLE test.cannot_kill_query MODIFY COLUMN x UInt64" &>/dev/null & $CLICKHOUSE_CLIENT -q "ALTER TABLE test.cannot_kill_query MODIFY COLUMN x UInt64" &>/dev/null &
sleep 1
# This SELECT query will also run for a long time. Also it's blocked by ALTER query. It will be killed with ASYNC kill.
# This is main idea which we check -- blocked queries can be killed with ASYNC kill.
query_to_kill="SELECT sum(1) FROM test.cannot_kill_query WHERE NOT ignore(sleep(1)) SETTINGS max_threads=1" query_to_kill="SELECT sum(1) FROM test.cannot_kill_query WHERE NOT ignore(sleep(1)) SETTINGS max_threads=1"
$CLICKHOUSE_CLIENT -q "$query_to_kill" &>/dev/null & $CLICKHOUSE_CLIENT -q "$query_to_kill" &>/dev/null &
sleep 3 # just to be sure that 'KILL ...' will be executed after 'SELECT ... WHERE NOT ignore(sleep(1))' sleep 1 # just to be sure that kill of $query_to_kill will be executed after $query_to_kill.
timeout 15 $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE query='$query_to_kill' SYNC" &>/dev/null # Kill $query_to_kill with ASYNC kill. We will check that information about KILL is not lost.
$CLICKHOUSE_CLIENT -q "KILL QUERY WHERE query='$query_to_kill' ASYNC" &>/dev/null
sleep 1
# Kill $query_for_pending SYNC. This query is not blocker, so it should be killed fast.
timeout 5 $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE query='$query_for_pending' SYNC" &>/dev/null
# But let's sleep a little time, just to be sure
sleep 3
# Both queries have to be killed, doesn't matter with SYNC or ASYNC kill
$CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes where query='$query_for_pending'"
$CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes where query='$query_to_kill'" $CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes where query='$query_to_kill'"
$CLICKHOUSE_CLIENT -q "KILL QUERY WHERE query='$query_for_pending'" &>/dev/null & # kill pending query
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test.cannot_kill_query" &>/dev/null $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test.cannot_kill_query" &>/dev/null

2
debian/changelog.in vendored
View File

@ -1,4 +1,4 @@
clickhouse (2:@VERSION_STRING@) unstable; urgency=low clickhouse (@VERSION_STRING@) unstable; urgency=low
* Modified source code * Modified source code

View File

@ -32,6 +32,7 @@
- [RClickhouse](https://github.com/IMSMWU/RClickhouse) - [RClickhouse](https://github.com/IMSMWU/RClickhouse)
- Java - Java
- [clickhouse-client-java](https://github.com/VirtusAI/clickhouse-client-java) - [clickhouse-client-java](https://github.com/VirtusAI/clickhouse-client-java)
- [clickhouse-client](https://github.com/Ecwid/clickhouse-client)
- Scala - Scala
- [clickhouse-scala-client](https://github.com/crobox/clickhouse-scala-client) - [clickhouse-scala-client](https://github.com/crobox/clickhouse-scala-client)
- Kotlin - Kotlin

View File

@ -31,5 +31,4 @@ For more information about queries related to partition manipulations, see the [
A third-party tool is available to automate this approach: [clickhouse-backup](https://github.com/AlexAkulov/clickhouse-backup). A third-party tool is available to automate this approach: [clickhouse-backup](https://github.com/AlexAkulov/clickhouse-backup).
[Original article](https://clickhouse.yandex/docs/en/operations/backup/) <!--hide--> [Original article](https://clickhouse.yandex/docs/en/operations/backup/) <!--hide-->

View File

@ -196,7 +196,7 @@ For more details, see [GraphiteMergeTree](../../operations/table_engines/graphit
The port for connecting to the server over HTTP(s). The port for connecting to the server over HTTP(s).
If `https_port` is specified, [openSSL](#openssl) must be configured. If `https_port` is specified, [openSSL](#server_settings-openssl) must be configured.
If `http_port` is specified, the openSSL configuration is ignored even if it is set. If `http_port` is specified, the openSSL configuration is ignored even if it is set.
@ -417,7 +417,7 @@ The value 0 means that you can delete all tables without any restrictions.
## merge_tree {#server_settings-merge_tree} ## merge_tree {#server_settings-merge_tree}
Fine tuning for tables in the [ MergeTree](../../operations/table_engines/mergetree.md). Fine tuning for tables in the [MergeTree](../../operations/table_engines/mergetree.md).
For more information, see the MergeTreeSettings.h header file. For more information, see the MergeTreeSettings.h header file.
@ -430,7 +430,7 @@ For more information, see the MergeTreeSettings.h header file.
``` ```
## openSSL ## openSSL {#server_settings-openssl}
SSL client/server configuration. SSL client/server configuration.
@ -609,6 +609,19 @@ Port for communicating with clients over the TCP protocol.
<tcp_port>9000</tcp_port> <tcp_port>9000</tcp_port>
``` ```
## tcp_port_secure {#server_settings-tcp_port_secure}
Port for communicating with the clients over the secure connection by TCP protocol. Use it with [OpenSSL](#server_settings-openssl) settings.
**Possible values**
Positive integer.
**Default value**
```xml
<tcp_port_secure>9440</tcp_port_secure>
```
## tmp_path ## tmp_path

View File

@ -175,6 +175,20 @@ Any positive integer.
**Default value**: 1048576. **Default value**: 1048576.
## min_bytes_to_use_direct_io {#settings-min_bytes_to_use_direct_io}
The minimum data volume to be read from storage required for using of the direct I/O access to the storage disk.
ClickHouse uses this setting when selecting the data from tables. If summary storage volume of all the data to be read exceeds `min_bytes_to_use_direct_io` bytes, then ClickHouse reads the data from the storage disk with `O_DIRECT` option.
**Possible values**
Positive integer.
0 — The direct I/O is disabled.
**Default value**: 0.
## log_queries ## log_queries
Setting up query logging. Setting up query logging.

View File

@ -9,38 +9,38 @@ Kafka lets you:
- Process streams as they become available. - Process streams as they become available.
Old format: ## Creating a Table {#table_engine-kafka-creating-a-table}
``` ```
Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
[, kafka_row_delimiter, kafka_schema, kafka_num_consumers]) (
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
) ENGINE = Kafka()
SETTINGS
kafka_broker_list = 'host:port',
kafka_topic_list = 'topic1,topic2,...',
kafka_group_name = 'group_name',
kafka_format = 'data_format'[,]
[kafka_row_delimiter = 'delimiter_symbol',]
[kafka_schema = '',]
[kafka_num_consumers = N,]
[kafka_skip_broken_messages = <0|1>]
``` ```
New format:
```
Kafka SETTINGS
kafka_broker_list = 'localhost:9092',
kafka_topic_list = 'topic1,topic2',
kafka_group_name = 'group1',
kafka_format = 'JSONEachRow',
kafka_row_delimiter = '\n',
kafka_schema = '',
kafka_num_consumers = 2
```
Required parameters: Required parameters:
- `kafka_broker_list` A comma-separated list of brokers (`localhost:9092`). - `kafka_broker_list` A comma-separated list of brokers (for example, `localhost:9092`).
- `kafka_topic_list` A list of Kafka topics (`my_topic`). - `kafka_topic_list` A list of Kafka topics.
- `kafka_group_name` A group of Kafka consumers (`group1`). Reading margins are tracked for each group separately. If you don't want messages to be duplicated in the cluster, use the same group name everywhere. - `kafka_group_name` A group of Kafka consumers. Reading margins are tracked for each group separately. If you don't want messages to be duplicated in the cluster, use the same group name everywhere.
- `kafka_format` Message format. Uses the same notation as the SQL ` FORMAT` function, such as ` JSONEachRow`. For more information, see the "Formats" section. - `kafka_format` Message format. Uses the same notation as the SQL `FORMAT` function, such as ` JSONEachRow`. For more information, see the [Formats](../../interfaces/formats.md) section.
Optional parameters: Optional parameters:
- `kafka_row_delimiter` - Character-delimiter of records (rows), which ends the message. - `kafka_row_delimiter` Delimiter character, which ends the message.
- `kafka_schema` An optional parameter that must be used if the format requires a schema definition. For example, [Cap'n Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object. - `kafka_schema` Parameter that must be used if the format requires a schema definition. For example, [Cap'n Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object.
- `kafka_num_consumers` The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition. - `kafka_num_consumers` The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition.
- `kafka_skip_broken_messages` Mode of Kafka messages parser. If `kafka_skip_broken_messages = 1` then the engine skips the Kafka messages (message equals a row of data) that can't be parsed.
Examples: Examples:
@ -72,6 +72,23 @@ Examples:
kafka_num_consumers = 4; kafka_num_consumers = 4;
``` ```
<details markdown="1"><summary>Deprecated Method for Creating a Table</summary>
!!! attention
Do not use this method in new projects and, if possible, switch the old projects to the method described above.
```
Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format
[, kafka_row_delimiter, kafka_schema, kafka_num_consumers, kafka_skip_broken_messages])
```
</details>
## Description
The delivered messages are tracked automatically, so each message in a group is only counted once. If you want to get the data twice, then create a copy of the table with another group name. The delivered messages are tracked automatically, so each message in a group is only counted once. If you want to get the data twice, then create a copy of the table with another group name.
Groups are flexible and synced on the cluster. For instance, if you have 10 topics and 5 copies of a table in a cluster, then each copy gets 2 topics. If the number of copies changes, the topics are redistributed across the copies automatically. Read more about this at [http://kafka.apache.org/intro](http://kafka.apache.org/intro). Groups are flexible and synced on the cluster. For instance, if you have 10 topics and 5 copies of a table in a cluster, then each copy gets 2 topics. If the number of copies changes, the topics are redistributed across the copies automatically. Read more about this at [http://kafka.apache.org/intro](http://kafka.apache.org/intro).

View File

@ -70,6 +70,8 @@ For a description of request parameters, see [request description](../../query_l
- `SETTINGS` — Additional parameters that control the behavior of the `MergeTree`: - `SETTINGS` — Additional parameters that control the behavior of the `MergeTree`:
- `index_granularity` — The granularity of an index. The number of data rows between the "marks" of an index. By default, 8192. The list of all available parameters you can see in [MergeTreeSettings.h](https://github.com/yandex/ClickHouse/blob/master/dbms/src/Storages/MergeTree/MergeTreeSettings.h). - `index_granularity` — The granularity of an index. The number of data rows between the "marks" of an index. By default, 8192. The list of all available parameters you can see in [MergeTreeSettings.h](https://github.com/yandex/ClickHouse/blob/master/dbms/src/Storages/MergeTree/MergeTreeSettings.h).
- `min_merge_bytes_to_use_direct_io` — The minimum data volume for merge operation required for using of the direct I/O access to the storage disk. During the merging of the data parts, ClickHouse calculates summary storage volume of all the data to be merged. If the volume exceeds `min_merge_bytes_to_use_direct_io` bytes, thеn ClickHouse reads and writes the data using direct I/O interface (`O_DIRECT` option) to the storage disk. If `min_merge_bytes_to_use_direct_io = 0`, then the direct I/O is disabled. Default value: `10 * 1024 * 1024 * 1024` bytes.
**Example of sections setting** **Example of sections setting**

View File

@ -111,8 +111,8 @@ Check:
Check: Check:
- The `tcp_port_secure` setting. - The [tcp_port_secure](server_settings/settings.md#server_settings-tcp_port_secure) setting.
- Settings for SSL sertificates. - Settings for [SSL sertificates](server_settings/settings.md#server_settings-openssl).
Use proper parameters while connecting. For example, use the `port_secure` parameter with `clickhouse_client`. Use proper parameters while connecting. For example, use the `port_secure` parameter with `clickhouse_client`.

View File

@ -88,7 +88,7 @@ Example of settings:
</source> </source>
``` ```
In order for ClickHouse to access an HTTPS resource, you must [configure openSSL](../../operations/server_settings/settings.md) in the server configuration. In order for ClickHouse to access an HTTPS resource, you must [configure openSSL](../../operations/server_settings/settings.md#server_settings-openssl) in the server configuration.
Setting fields: Setting fields:

View File

@ -72,6 +72,6 @@ The `remote` table function can be useful in the following cases:
If the user is not specified, `default` is used. If the user is not specified, `default` is used.
If the password is not specified, an empty password is used. If the password is not specified, an empty password is used.
`remoteSecure` - same as `remote` but with secured connection. Default port - `tcp_port_secure` from config or 9440. `remoteSecure` - same as `remote` but with secured connection. Default port — [tcp_port_secure](../../operations/server_settings/settings.md#server_settings-tcp_port_secure) from config or 9440.
[Original article](https://clickhouse.yandex/docs/en/query_language/table_functions/remote/) <!--hide--> [Original article](https://clickhouse.yandex/docs/en/query_language/table_functions/remote/) <!--hide-->

View File

@ -14,7 +14,6 @@ ClickHouse может принимать (`INSERT`) и отдавать (`SELECT
[CSVWithNames](#csvwithnames) | ✔ | ✔ | [CSVWithNames](#csvwithnames) | ✔ | ✔ |
[Values](#values) | ✔ | ✔ | [Values](#values) | ✔ | ✔ |
[Vertical](#vertical) | ✗ | ✔ | [Vertical](#vertical) | ✗ | ✔ |
[VerticalRaw](#verticalraw) | ✗ | ✔ |
[JSON](#json) | ✗ | ✔ | [JSON](#json) | ✗ | ✔ |
[JSONCompact](#jsoncompact) | ✗ | ✔ | [JSONCompact](#jsoncompact) | ✗ | ✔ |
[JSONEachRow](#jsoneachrow) | ✔ | ✔ | [JSONEachRow](#jsoneachrow) | ✔ | ✔ |
@ -354,10 +353,22 @@ SELECT * FROM t_null
└───┴──────┘ └───┴──────┘
``` ```
В форматах `Pretty*` строки выводятся без экранирования. Ниже приведен пример для формата [PrettyCompact](#prettycompact):
``` sql
SELECT 'String with \'quotes\' and \t character' AS Escaping_test
```
```
┌─Escaping_test────────────────────────┐
│ String with 'quotes' and character │
└──────────────────────────────────────┘
```
Для защиты от вываливания слишком большого количества данных в терминал, выводится только первые 10 000 строк. Если строк больше или равно 10 000, то будет написано "Showed first 10 000." Для защиты от вываливания слишком большого количества данных в терминал, выводится только первые 10 000 строк. Если строк больше или равно 10 000, то будет написано "Showed first 10 000."
Этот формат подходит только для вывода результата выполнения запроса, но не для парсинга (приёма данных для вставки в таблицу). Этот формат подходит только для вывода результата выполнения запроса, но не для парсинга (приёма данных для вставки в таблицу).
Формат Pretty поддерживает вывод тотальных значений (при использовании WITH TOTALS) и экстремальных значений (при настройке extremes выставленной в 1). В этих случаях, после основных данных выводятся тотальные значения, и экстремальные значения, в отдельных табличках. Пример (показан для формата PrettyCompact): Формат `Pretty` поддерживает вывод тотальных значений (при использовании WITH TOTALS) и экстремальных значений (при настройке extremes выставленной в 1). В этих случаях, после основных данных выводятся тотальные значения, и экстремальные значения, в отдельных табличках. Пример (показан для формата [PrettyCompact](#prettycompact)):
``` sql ``` sql
SELECT EventDate, count() AS c FROM test.hits GROUP BY EventDate WITH TOTALS ORDER BY EventDate FORMAT PrettyCompact SELECT EventDate, count() AS c FROM test.hits GROUP BY EventDate WITH TOTALS ORDER BY EventDate FORMAT PrettyCompact
@ -388,7 +399,7 @@ Extremes:
## PrettyCompact {#prettycompact} ## PrettyCompact {#prettycompact}
Отличается от `Pretty` тем, что не рисуется сетка между строками - результат более компактный. Отличается от [Pretty](#pretty) тем, что не рисуется сетка между строками - результат более компактный.
Этот формат используется по умолчанию в клиенте командной строки в интерактивном режиме. Этот формат используется по умолчанию в клиенте командной строки в интерактивном режиме.
## PrettyCompactMonoBlock {#prettycompactmonoblock} ## PrettyCompactMonoBlock {#prettycompactmonoblock}
@ -433,6 +444,7 @@ FixedString представлены просто как последовате
Array представлены как длина в формате varint (unsigned [LEB128](https://en.wikipedia.org/wiki/LEB128)), а затем элементы массива, подряд. Array представлены как длина в формате varint (unsigned [LEB128](https://en.wikipedia.org/wiki/LEB128)), а затем элементы массива, подряд.
Для поддержки [NULL](../query_language/syntax.md#null-literal) перед каждым значением типа [Nullable](../data_types/nullable.md Для поддержки [NULL](../query_language/syntax.md#null-literal) перед каждым значением типа [Nullable](../data_types/nullable.md
## Values ## Values
Выводит каждую строку в скобках. Строки разделены запятыми. После последней строки запятой нет. Значения внутри скобок также разделены запятыми. Числа выводятся в десятичном виде без кавычек. Массивы выводятся в квадратных скобках. Строки, даты, даты-с-временем выводятся в кавычках. Правила экранирования и особенности парсинга аналогичны формату [TabSeparated](#tabseparated). При форматировании, лишние пробелы не ставятся, а при парсинге - допустимы и пропускаются (за исключением пробелов внутри значений типа массив, которые недопустимы). [NULL](../query_language/syntax.md) представляется как `NULL`. Выводит каждую строку в скобках. Строки разделены запятыми. После последней строки запятой нет. Значения внутри скобок также разделены запятыми. Числа выводятся в десятичном виде без кавычек. Массивы выводятся в квадратных скобках. Строки, даты, даты-с-временем выводятся в кавычках. Правила экранирования и особенности парсинга аналогичны формату [TabSeparated](#tabseparated). При форматировании, лишние пробелы не ставятся, а при парсинге - допустимы и пропускаются (за исключением пробелов внутри значений типа массив, которые недопустимы). [NULL](../query_language/syntax.md) представляется как `NULL`.
@ -459,34 +471,20 @@ x: 1
y: ᴺᵁᴸᴸ y: ᴺᵁᴸᴸ
``` ```
Этот формат подходит только для вывода результата выполнения запроса, но не для парсинга (приёма данных для вставки в таблицу). В формате `Vertical` строки выводятся без экранирования. Например:
## VerticalRaw {#verticalraw} ``` sql
SELECT 'string with \'quotes\' and \t with some special \n characters' AS test FORMAT Vertical
Отличается от формата `Vertical` тем, что строки выводятся без экранирования.
Этот формат подходит только для вывода результата выполнения запроса, но не для парсинга (приёма данных для вставки в таблицу).
Примеры:
``` ```
:) SHOW CREATE TABLE geonames FORMAT VerticalRaw;
Row 1:
──────
statement: CREATE TABLE default.geonames ( geonameid UInt32, date Date DEFAULT CAST('2017-12-08' AS Date)) ENGINE = MergeTree(date, geonameid, 8192)
:) SELECT 'string with \'quotes\' and \t with some special \n characters' AS test FORMAT VerticalRaw; ```
Row 1: Row 1:
────── ──────
test: string with 'quotes' and with some special test: string with 'quotes' and with some special
characters characters
``` ```
Для сравнения - формат Vertical: Этот формат подходит только для вывода результата выполнения запроса, но не для парсинга (приёма данных для вставки в таблицу).
```
:) SELECT 'string with \'quotes\' and \t with some special \n characters' AS test FORMAT Vertical;
Row 1:
──────
test: string with \'quotes\' and \t with some special \n characters
```
## XML {#xml} ## XML {#xml}