mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-14 18:32:29 +00:00
Merge branch 'master' into rocksdb_metacache
This commit is contained in:
commit
f7c2a32b96
@ -142,6 +142,7 @@ Checks: '-*,
|
||||
clang-analyzer-cplusplus.PlacementNewChecker,
|
||||
clang-analyzer-cplusplus.SelfAssignment,
|
||||
clang-analyzer-deadcode.DeadStores,
|
||||
clang-analyzer-cplusplus.Move,
|
||||
clang-analyzer-optin.cplusplus.VirtualCall,
|
||||
clang-analyzer-security.insecureAPI.UncheckedReturn,
|
||||
clang-analyzer-security.insecureAPI.bcmp,
|
||||
|
@ -610,6 +610,7 @@ class IColumn;
|
||||
M(Char, input_format_hive_text_collection_items_delimiter, '\x02', "Delimiter between collection(array or map) items in Hive Text File", 0) \
|
||||
M(Char, input_format_hive_text_map_keys_delimiter, '\x03', "Delimiter between a pair of map key/values in Hive Text File", 0) \
|
||||
M(UInt64, input_format_msgpack_number_of_columns, 0, "The number of columns in inserted MsgPack data. Used for automatic schema inference from data.", 0) \
|
||||
M(MsgPackUUIDRepresentation, output_format_msgpack_uuid_representation, FormatSettings::MsgPackUUIDRepresentation::EXT, "The way how to output UUID in MsgPack format.", 0) \
|
||||
M(UInt64, input_format_max_rows_to_read_for_schema_inference, 100, "The maximum rows of data to read for automatic schema inference", 0) \
|
||||
\
|
||||
M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic' and 'best_effort'.", 0) \
|
||||
|
@ -130,4 +130,10 @@ IMPLEMENT_SETTING_ENUM(EscapingRule, ErrorCodes::BAD_ARGUMENTS,
|
||||
{"JSON", FormatSettings::EscapingRule::JSON},
|
||||
{"XML", FormatSettings::EscapingRule::XML},
|
||||
{"Raw", FormatSettings::EscapingRule::Raw}})
|
||||
|
||||
IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS,
|
||||
{{"bin", FormatSettings::MsgPackUUIDRepresentation::BIN},
|
||||
{"str", FormatSettings::MsgPackUUIDRepresentation::STR},
|
||||
{"ext", FormatSettings::MsgPackUUIDRepresentation::EXT}})
|
||||
|
||||
}
|
||||
|
@ -171,4 +171,6 @@ DECLARE_SETTING_ENUM(ShortCircuitFunctionEvaluation)
|
||||
DECLARE_SETTING_ENUM_WITH_RENAME(EnumComparingMode, FormatSettings::EnumComparingMode)
|
||||
|
||||
DECLARE_SETTING_ENUM_WITH_RENAME(EscapingRule, FormatSettings::EscapingRule)
|
||||
|
||||
DECLARE_SETTING_ENUM_WITH_RENAME(MsgPackUUIDRepresentation, FormatSettings::MsgPackUUIDRepresentation)
|
||||
}
|
||||
|
@ -129,6 +129,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode;
|
||||
format_settings.seekable_read = settings.input_format_allow_seeks;
|
||||
format_settings.msgpack.number_of_columns = settings.input_format_msgpack_number_of_columns;
|
||||
format_settings.msgpack.output_uuid_representation = settings.output_format_msgpack_uuid_representation;
|
||||
format_settings.max_rows_to_read_for_schema_inference = settings.input_format_max_rows_to_read_for_schema_inference;
|
||||
|
||||
/// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context
|
||||
|
@ -231,9 +231,17 @@ struct FormatSettings
|
||||
EnumComparingMode enum_comparing_mode = EnumComparingMode::BY_VALUES;
|
||||
} capn_proto;
|
||||
|
||||
enum class MsgPackUUIDRepresentation
|
||||
{
|
||||
STR, // Output UUID as a string of 36 characters.
|
||||
BIN, // Output UUID as 16-bytes binary.
|
||||
EXT, // Output UUID as ExtType = 2
|
||||
};
|
||||
|
||||
struct
|
||||
{
|
||||
UInt64 number_of_columns = 0;
|
||||
MsgPackUUIDRepresentation output_uuid_representation = MsgPackUUIDRepresentation::EXT;
|
||||
} msgpack;
|
||||
};
|
||||
|
||||
|
11
src/Formats/MsgPackExtensionTypes.h
Normal file
11
src/Formats/MsgPackExtensionTypes.h
Normal file
@ -0,0 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
enum class MsgPackExtensionTypes
|
||||
{
|
||||
UUID = 0x02,
|
||||
};
|
||||
|
||||
}
|
@ -5,6 +5,7 @@
|
||||
#include <cstdlib>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/ReadBufferFromMemory.h>
|
||||
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeDateTime64.h>
|
||||
@ -12,6 +13,7 @@
|
||||
#include <DataTypes/DataTypeMap.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypeUUID.h>
|
||||
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
@ -20,6 +22,8 @@
|
||||
#include <Columns/ColumnMap.h>
|
||||
#include <Columns/ColumnLowCardinality.h>
|
||||
|
||||
#include <Formats/MsgPackExtensionTypes.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -153,16 +157,29 @@ static void insertInteger(IColumn & column, DataTypePtr type, UInt64 value)
|
||||
}
|
||||
}
|
||||
|
||||
static void insertString(IColumn & column, DataTypePtr type, const char * value, size_t size)
|
||||
static void insertString(IColumn & column, DataTypePtr type, const char * value, size_t size, bool bin)
|
||||
{
|
||||
auto insert_func = [&](IColumn & column_, DataTypePtr type_)
|
||||
{
|
||||
insertString(column_, type_, value, size);
|
||||
insertString(column_, type_, value, size, bin);
|
||||
};
|
||||
|
||||
if (checkAndInsertNullable(column, type, insert_func) || checkAndInsertLowCardinality(column, type, insert_func))
|
||||
return;
|
||||
|
||||
if (isUUID(type))
|
||||
{
|
||||
ReadBufferFromMemory buf(value, size);
|
||||
UUID uuid;
|
||||
if (bin)
|
||||
readBinary(uuid, buf);
|
||||
else
|
||||
readUUIDText(uuid, buf);
|
||||
|
||||
assert_cast<ColumnUUID &>(column).insertValue(uuid);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!isStringOrFixedString(type))
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert MessagePack string into column with type {}.", type->getName());
|
||||
|
||||
@ -218,6 +235,15 @@ static void insertNull(IColumn & column, DataTypePtr type)
|
||||
assert_cast<ColumnNullable &>(column).insertDefault();
|
||||
}
|
||||
|
||||
static void insertUUID(IColumn & column, DataTypePtr /*type*/, const char * value, size_t size)
|
||||
{
|
||||
ReadBufferFromMemory buf(value, size);
|
||||
UUID uuid;
|
||||
readBinaryBigEndian(uuid.toUnderType().items[0], buf);
|
||||
readBinaryBigEndian(uuid.toUnderType().items[1], buf);
|
||||
assert_cast<ColumnUUID &>(column).insertValue(uuid);
|
||||
}
|
||||
|
||||
bool MsgPackVisitor::visit_positive_integer(UInt64 value) // NOLINT
|
||||
{
|
||||
insertInteger(info_stack.top().column, info_stack.top().type, value);
|
||||
@ -232,13 +258,13 @@ bool MsgPackVisitor::visit_negative_integer(Int64 value) // NOLINT
|
||||
|
||||
bool MsgPackVisitor::visit_str(const char * value, size_t size) // NOLINT
|
||||
{
|
||||
insertString(info_stack.top().column, info_stack.top().type, value, size);
|
||||
insertString(info_stack.top().column, info_stack.top().type, value, size, false);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MsgPackVisitor::visit_bin(const char * value, size_t size) // NOLINT
|
||||
{
|
||||
insertString(info_stack.top().column, info_stack.top().type, value, size);
|
||||
insertString(info_stack.top().column, info_stack.top().type, value, size, true);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -324,6 +350,18 @@ bool MsgPackVisitor::visit_nil()
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MsgPackVisitor::visit_ext(const char * value, uint32_t size)
|
||||
{
|
||||
int8_t type = *value;
|
||||
if (*value == int8_t(MsgPackExtensionTypes::UUID))
|
||||
{
|
||||
insertUUID(info_stack.top().column, info_stack.top().type, value + 1, size - 1);
|
||||
return true;
|
||||
}
|
||||
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported MsgPack extension type: {%x}", type);
|
||||
}
|
||||
|
||||
void MsgPackVisitor::parse_error(size_t, size_t) // NOLINT
|
||||
{
|
||||
throw Exception("Error occurred while parsing msgpack data.", ErrorCodes::INCORRECT_DATA);
|
||||
@ -455,8 +493,13 @@ DataTypePtr MsgPackSchemaReader::getDataType(const msgpack::object & object)
|
||||
}
|
||||
case msgpack::type::object_type::NIL:
|
||||
return nullptr;
|
||||
default:
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Msgpack type is not supported");
|
||||
case msgpack::type::object_type::EXT:
|
||||
{
|
||||
msgpack::object_ext object_ext = object.via.ext;
|
||||
if (object_ext.type() == int8_t(MsgPackExtensionTypes::UUID))
|
||||
return std::make_shared<DataTypeUUID>();
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Msgpack extension type {%x} is not supported", object_ext.type());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -42,6 +42,7 @@ public:
|
||||
bool end_map_key();
|
||||
bool start_map_value();
|
||||
bool end_map_value();
|
||||
bool visit_ext(const char * value, uint32_t size);
|
||||
|
||||
/// This function will be called if error occurs in parsing
|
||||
[[noreturn]] void parse_error(size_t parsed_offset, size_t error_offset);
|
||||
@ -55,7 +56,7 @@ private:
|
||||
std::stack<Info> info_stack;
|
||||
};
|
||||
|
||||
class MsgPackRowInputFormat final : public IRowInputFormat
|
||||
class MsgPackRowInputFormat : public IRowInputFormat
|
||||
{
|
||||
public:
|
||||
MsgPackRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_);
|
||||
|
@ -5,6 +5,9 @@
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeDateTime64.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
@ -19,6 +22,8 @@
|
||||
#include <Columns/ColumnMap.h>
|
||||
#include <Columns/ColumnLowCardinality.h>
|
||||
|
||||
#include <Formats/MsgPackExtensionTypes.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -27,8 +32,8 @@ namespace ErrorCodes
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
MsgPackRowOutputFormat::MsgPackRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_)
|
||||
: IRowOutputFormat(header_, out_, params_), packer(out_) {}
|
||||
MsgPackRowOutputFormat::MsgPackRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_)
|
||||
: IRowOutputFormat(header_, out_, params_), packer(out_), format_settings(format_settings_) {}
|
||||
|
||||
void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr data_type, size_t row_num)
|
||||
{
|
||||
@ -164,6 +169,42 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr
|
||||
serializeField(*dict_column, dict_type, index);
|
||||
return;
|
||||
}
|
||||
case TypeIndex::UUID:
|
||||
{
|
||||
const auto & uuid_column = assert_cast<const ColumnUUID &>(column);
|
||||
switch (format_settings.msgpack.output_uuid_representation)
|
||||
{
|
||||
case FormatSettings::MsgPackUUIDRepresentation::BIN:
|
||||
{
|
||||
WriteBufferFromOwnString buf;
|
||||
writeBinary(uuid_column.getElement(row_num), buf);
|
||||
StringRef uuid_bin = buf.stringRef();
|
||||
packer.pack_bin(uuid_bin.size);
|
||||
packer.pack_bin_body(uuid_bin.data, uuid_bin.size);
|
||||
return;
|
||||
}
|
||||
case FormatSettings::MsgPackUUIDRepresentation::STR:
|
||||
{
|
||||
WriteBufferFromOwnString buf;
|
||||
writeText(uuid_column.getElement(row_num), buf);
|
||||
StringRef uuid_text = buf.stringRef();
|
||||
packer.pack_str(uuid_text.size);
|
||||
packer.pack_bin_body(uuid_text.data, uuid_text.size);
|
||||
return;
|
||||
}
|
||||
case FormatSettings::MsgPackUUIDRepresentation::EXT:
|
||||
{
|
||||
WriteBufferFromOwnString buf;
|
||||
UUID value = uuid_column.getElement(row_num);
|
||||
writeBinaryBigEndian(value.toUnderType().items[0], buf);
|
||||
writeBinaryBigEndian(value.toUnderType().items[1], buf);
|
||||
StringRef uuid_ext = buf.stringRef();
|
||||
packer.pack_ext(sizeof(UUID), int8_t(MsgPackExtensionTypes::UUID));
|
||||
packer.pack_ext_body(uuid_ext.data, uuid_ext.size);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -186,9 +227,9 @@ void registerOutputFormatMsgPack(FormatFactory & factory)
|
||||
WriteBuffer & buf,
|
||||
const Block & sample,
|
||||
const RowOutputFormatParams & params,
|
||||
const FormatSettings &)
|
||||
const FormatSettings & settings)
|
||||
{
|
||||
return std::make_shared<MsgPackRowOutputFormat>(buf, sample, params);
|
||||
return std::make_shared<MsgPackRowOutputFormat>(buf, sample, params, settings);
|
||||
});
|
||||
factory.markOutputFormatSupportsParallelFormatting("MsgPack");
|
||||
}
|
||||
|
@ -18,7 +18,7 @@ namespace DB
|
||||
class MsgPackRowOutputFormat final : public IRowOutputFormat
|
||||
{
|
||||
public:
|
||||
MsgPackRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_);
|
||||
MsgPackRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_);
|
||||
|
||||
String getName() const override { return "MsgPackRowOutputFormat"; }
|
||||
|
||||
@ -28,6 +28,7 @@ private:
|
||||
void serializeField(const IColumn & column, DataTypePtr data_type, size_t row_num);
|
||||
|
||||
msgpack::packer<DB::WriteBuffer> packer;
|
||||
const FormatSettings format_settings;
|
||||
};
|
||||
|
||||
}
|
||||
|
4
tests/queries/0_stateless/02187_msg_pack_uuid.reference
Normal file
4
tests/queries/0_stateless/02187_msg_pack_uuid.reference
Normal file
@ -0,0 +1,4 @@
|
||||
5e7084e0-019f-461f-9e70-84e0019f561f
|
||||
5e7084e0-019f-461f-9e70-84e0019f561f
|
||||
5e7084e0-019f-461f-9e70-84e0019f561f
|
||||
5e7084e0-019f-461f-9e70-84e0019f561f UUID
|
17
tests/queries/0_stateless/02187_msg_pack_uuid.sh
Executable file
17
tests/queries/0_stateless/02187_msg_pack_uuid.sh
Executable file
@ -0,0 +1,17 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-parallel, no-fasttest
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_str.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='str'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file('uuid_str.msgpack', 'MsgPack', 'uuid UUID')"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_bin.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='bin'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file('uuid_bin.msgpack', 'MsgPack', 'uuid UUID')"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_ext.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='ext'"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file('uuid_ext.msgpack', 'MsgPack', 'uuid UUID')"
|
||||
$CLICKHOUSE_CLIENT -q "select c1, toTypeName(c1) from file('uuid_ext.msgpack') settings input_format_msgpack_number_of_columns=1"
|
||||
|
Loading…
Reference in New Issue
Block a user