2020-03-26 16:33:00 +00:00
|
|
|
#include <cstdlib>
|
|
|
|
#include <Processors/Formats/Impl/MsgPackRowInputFormat.h>
|
|
|
|
#include <Common/assert_cast.h>
|
|
|
|
#include <IO/ReadHelpers.h>
|
|
|
|
|
|
|
|
#include <DataTypes/DataTypeArray.h>
|
|
|
|
#include <DataTypes/DataTypeDateTime.h>
|
|
|
|
#include <DataTypes/DataTypeDateTime64.h>
|
|
|
|
#include <DataTypes/DataTypeNullable.h>
|
|
|
|
|
|
|
|
#include <Columns/ColumnArray.h>
|
|
|
|
#include <Columns/ColumnFixedString.h>
|
|
|
|
#include <Columns/ColumnNullable.h>
|
|
|
|
#include <Columns/ColumnString.h>
|
|
|
|
#include <Columns/ColumnsNumber.h>
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int ILLEGAL_COLUMN;
|
|
|
|
extern const int INCORRECT_DATA;
|
|
|
|
}
|
|
|
|
|
|
|
|
MsgPackRowInputFormat::MsgPackRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_)
|
2020-04-17 09:35:38 +00:00
|
|
|
: IRowInputFormat(header_, in_, std::move(params_)), buf(in), parser(visitor), data_types(header_.getDataTypes()) {}
|
2020-04-14 23:08:55 +00:00
|
|
|
|
2020-06-11 00:51:27 +00:00
|
|
|
void MsgPackRowInputFormat::resetParser()
|
|
|
|
{
|
|
|
|
IRowInputFormat::resetParser();
|
|
|
|
buf.reset();
|
|
|
|
visitor.reset();
|
|
|
|
}
|
|
|
|
|
2020-04-18 11:26:57 +00:00
|
|
|
void MsgPackVisitor::set_info(IColumn & column, DataTypePtr type) // NOLINT
|
2020-04-14 23:08:55 +00:00
|
|
|
{
|
2020-04-17 09:35:38 +00:00
|
|
|
while (!info_stack.empty())
|
2020-03-26 16:33:00 +00:00
|
|
|
{
|
2020-04-17 09:35:38 +00:00
|
|
|
info_stack.pop();
|
2020-03-26 16:33:00 +00:00
|
|
|
}
|
2020-04-17 09:35:38 +00:00
|
|
|
info_stack.push(Info{column, type});
|
2020-03-26 16:33:00 +00:00
|
|
|
}
|
|
|
|
|
2020-06-11 00:51:27 +00:00
|
|
|
void MsgPackVisitor::reset()
|
|
|
|
{
|
|
|
|
info_stack = {};
|
|
|
|
}
|
|
|
|
|
2020-04-18 11:26:57 +00:00
|
|
|
void MsgPackVisitor::insert_integer(UInt64 value) // NOLINT
|
2020-03-26 16:33:00 +00:00
|
|
|
{
|
2020-04-17 09:35:38 +00:00
|
|
|
Info & info = info_stack.top();
|
|
|
|
switch (info.type->getTypeId())
|
2020-03-26 16:33:00 +00:00
|
|
|
{
|
|
|
|
case TypeIndex::UInt8:
|
|
|
|
{
|
2020-04-17 09:35:38 +00:00
|
|
|
assert_cast<ColumnUInt8 &>(info.column).insertValue(value);
|
|
|
|
break;
|
2020-03-26 16:33:00 +00:00
|
|
|
}
|
|
|
|
case TypeIndex::Date: [[fallthrough]];
|
|
|
|
case TypeIndex::UInt16:
|
|
|
|
{
|
2020-04-17 09:35:38 +00:00
|
|
|
assert_cast<ColumnUInt16 &>(info.column).insertValue(value);
|
|
|
|
break;
|
2020-03-26 16:33:00 +00:00
|
|
|
}
|
|
|
|
case TypeIndex::DateTime: [[fallthrough]];
|
|
|
|
case TypeIndex::UInt32:
|
|
|
|
{
|
2020-04-17 09:35:38 +00:00
|
|
|
assert_cast<ColumnUInt32 &>(info.column).insertValue(value);
|
|
|
|
break;
|
2020-03-26 16:33:00 +00:00
|
|
|
}
|
|
|
|
case TypeIndex::UInt64:
|
|
|
|
{
|
2020-04-17 09:35:38 +00:00
|
|
|
assert_cast<ColumnUInt64 &>(info.column).insertValue(value);
|
|
|
|
break;
|
2020-03-26 16:33:00 +00:00
|
|
|
}
|
|
|
|
case TypeIndex::Int8:
|
|
|
|
{
|
2020-04-17 09:35:38 +00:00
|
|
|
assert_cast<ColumnInt8 &>(info.column).insertValue(value);
|
|
|
|
break;
|
2020-03-26 16:33:00 +00:00
|
|
|
}
|
|
|
|
case TypeIndex::Int16:
|
|
|
|
{
|
2020-04-17 09:35:38 +00:00
|
|
|
assert_cast<ColumnInt16 &>(info.column).insertValue(value);
|
|
|
|
break;
|
2020-03-26 16:33:00 +00:00
|
|
|
}
|
|
|
|
case TypeIndex::Int32:
|
|
|
|
{
|
2020-04-17 09:35:38 +00:00
|
|
|
assert_cast<ColumnInt32 &>(info.column).insertValue(value);
|
|
|
|
break;
|
2020-03-26 16:33:00 +00:00
|
|
|
}
|
|
|
|
case TypeIndex::Int64:
|
|
|
|
{
|
2020-04-17 09:35:38 +00:00
|
|
|
assert_cast<ColumnInt64 &>(info.column).insertValue(value);
|
|
|
|
break;
|
2020-03-26 16:33:00 +00:00
|
|
|
}
|
|
|
|
case TypeIndex::DateTime64:
|
|
|
|
{
|
2020-04-17 09:35:38 +00:00
|
|
|
assert_cast<DataTypeDateTime64::ColumnType &>(info.column).insertValue(value);
|
|
|
|
break;
|
2020-03-26 16:33:00 +00:00
|
|
|
}
|
|
|
|
default:
|
2020-04-17 09:35:38 +00:00
|
|
|
throw Exception("Type " + info.type->getName() + " is not supported for MsgPack input format", ErrorCodes::ILLEGAL_COLUMN);
|
2020-03-26 16:33:00 +00:00
|
|
|
}
|
2020-04-17 09:35:38 +00:00
|
|
|
}
|
|
|
|
|
2020-04-18 11:26:57 +00:00
|
|
|
bool MsgPackVisitor::visit_positive_integer(UInt64 value) // NOLINT
|
2020-04-17 09:35:38 +00:00
|
|
|
{
|
|
|
|
insert_integer(value);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-04-18 11:26:57 +00:00
|
|
|
bool MsgPackVisitor::visit_negative_integer(Int64 value) // NOLINT
|
2020-04-17 09:35:38 +00:00
|
|
|
{
|
|
|
|
insert_integer(value);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-04-18 11:26:57 +00:00
|
|
|
bool MsgPackVisitor::visit_str(const char* value, size_t size) // NOLINT
|
2020-04-17 09:35:38 +00:00
|
|
|
{
|
|
|
|
info_stack.top().column.insertData(value, size);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-04-18 11:26:57 +00:00
|
|
|
bool MsgPackVisitor::visit_float32(Float32 value) // NOLINT
|
2020-04-17 09:35:38 +00:00
|
|
|
{
|
|
|
|
assert_cast<ColumnFloat32 &>(info_stack.top().column).insertValue(value);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-04-18 11:26:57 +00:00
|
|
|
bool MsgPackVisitor::visit_float64(Float64 value) // NOLINT
|
2020-04-17 09:35:38 +00:00
|
|
|
{
|
|
|
|
assert_cast<ColumnFloat64 &>(info_stack.top().column).insertValue(value);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-04-18 11:26:57 +00:00
|
|
|
bool MsgPackVisitor::start_array(size_t size) // NOLINT
|
2020-04-17 09:35:38 +00:00
|
|
|
{
|
|
|
|
auto nested_type = assert_cast<const DataTypeArray &>(*info_stack.top().type).getNestedType();
|
|
|
|
ColumnArray & column_array = assert_cast<ColumnArray &>(info_stack.top().column);
|
|
|
|
ColumnArray::Offsets & offsets = column_array.getOffsets();
|
|
|
|
IColumn & nested_column = column_array.getData();
|
|
|
|
offsets.push_back(offsets.back() + size);
|
|
|
|
info_stack.push(Info{nested_column, nested_type});
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-04-18 11:26:57 +00:00
|
|
|
bool MsgPackVisitor::end_array() // NOLINT
|
2020-04-17 09:35:38 +00:00
|
|
|
{
|
|
|
|
info_stack.pop();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-04-18 11:26:57 +00:00
|
|
|
void MsgPackVisitor::parse_error(size_t, size_t) // NOLINT
|
2020-04-17 09:35:38 +00:00
|
|
|
{
|
|
|
|
throw Exception("Error occurred while parsing msgpack data.", ErrorCodes::INCORRECT_DATA);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool MsgPackRowInputFormat::readObject()
|
|
|
|
{
|
|
|
|
if (buf.eof())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
PeekableReadBufferCheckpoint checkpoint{buf};
|
|
|
|
size_t offset = 0;
|
|
|
|
while (!parser.execute(buf.position(), buf.available(), offset))
|
|
|
|
{
|
|
|
|
buf.position() = buf.buffer().end();
|
|
|
|
if (buf.eof())
|
|
|
|
throw Exception("Unexpected end of file while parsing msgpack object.", ErrorCodes::INCORRECT_DATA);
|
|
|
|
buf.position() = buf.buffer().end();
|
|
|
|
buf.makeContinuousMemoryFromCheckpointToPos();
|
|
|
|
buf.rollbackToCheckpoint();
|
|
|
|
}
|
|
|
|
buf.position() += offset;
|
|
|
|
return true;
|
2020-03-26 16:33:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool MsgPackRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &)
|
|
|
|
{
|
|
|
|
size_t column_index = 0;
|
|
|
|
bool has_more_data = true;
|
|
|
|
for (; column_index != columns.size(); ++column_index)
|
|
|
|
{
|
2020-04-17 09:35:38 +00:00
|
|
|
visitor.set_info(*columns[column_index], data_types[column_index]);
|
2020-03-26 16:33:00 +00:00
|
|
|
has_more_data = readObject();
|
|
|
|
if (!has_more_data)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!has_more_data)
|
|
|
|
{
|
|
|
|
if (column_index != 0)
|
|
|
|
throw Exception("Not enough values to complete the row.", ErrorCodes::INCORRECT_DATA);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-03-26 21:11:33 +00:00
|
|
|
void registerInputFormatProcessorMsgPack(FormatFactory & factory)
|
|
|
|
{
|
2020-03-26 16:33:00 +00:00
|
|
|
factory.registerInputFormatProcessor("MsgPack", [](
|
2020-04-03 20:44:13 +00:00
|
|
|
ReadBuffer & buf,
|
|
|
|
const Block & sample,
|
|
|
|
const RowInputFormatParams & params,
|
2020-03-26 21:11:33 +00:00
|
|
|
const FormatSettings &)
|
|
|
|
{
|
2020-03-26 16:33:00 +00:00
|
|
|
return std::make_shared<MsgPackRowInputFormat>(sample, buf, params);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2020-03-26 21:11:33 +00:00
|
|
|
}
|