mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
Merge branch 'master' into in-memory-compression
This commit is contained in:
commit
a9d9a6d56f
@ -37,15 +37,13 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE)
|
||||
#
|
||||
# - 4.0+ ccache always includes this environment variable into the hash
|
||||
# of the manifest, which do not allow to use previous cache,
|
||||
# - 4.2+ ccache ignores SOURCE_DATE_EPOCH under time_macros sloppiness.
|
||||
# - 4.2+ ccache ignores SOURCE_DATE_EPOCH for every file w/o __DATE__/__TIME__
|
||||
#
|
||||
# So for:
|
||||
# - 4.2+ time_macros sloppiness is used,
|
||||
# - 4.2+ does not require any sloppiness
|
||||
# - 4.0+ will ignore SOURCE_DATE_EPOCH environment variable.
|
||||
if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.2")
|
||||
message(STATUS "Use time_macros sloppiness for ccache")
|
||||
set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_FOUND} --set-config=sloppiness=time_macros")
|
||||
set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK "${CCACHE_FOUND} --set-config=sloppiness=time_macros")
|
||||
message(STATUS "ccache is 4.2+ no quirks for SOURCE_DATE_EPOCH required")
|
||||
elseif (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0")
|
||||
message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache")
|
||||
set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}")
|
||||
|
@ -342,9 +342,10 @@ function run_tests
|
||||
|
||||
# JSON functions
|
||||
01666_blns
|
||||
01674_htm_xml_coarse_parse
|
||||
)
|
||||
|
||||
time clickhouse-test --hung-check -j 8 --order=random --use-skip-list --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
|
||||
(time clickhouse-test --hung-check -j 8 --order=random --use-skip-list --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 ||:) | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
|
||||
|
||||
# substr is to remove semicolon after test name
|
||||
readarray -t FAILED_TESTS < <(awk '/\[ FAIL|TIMEOUT|ERROR \]/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt")
|
||||
|
@ -13,6 +13,7 @@ RUN apt-get update -y \
|
||||
ncdu \
|
||||
netcat-openbsd \
|
||||
openssl \
|
||||
protobuf-compiler \
|
||||
python3 \
|
||||
python3-lxml \
|
||||
python3-requests \
|
||||
|
@ -25,6 +25,7 @@ The following actions are supported:
|
||||
- [COMMENT COLUMN](#alter_comment-column) — Adds a text comment to the column.
|
||||
- [MODIFY COLUMN](#alter_modify-column) — Changes column’s type, default expression and TTL.
|
||||
- [MODIFY COLUMN REMOVE](#modify-remove) — Removes one of the column properties.
|
||||
- [RENAME COLUMN](#alter_rename-column) — Renames an existing column.
|
||||
|
||||
These actions are described in detail below.
|
||||
|
||||
@ -183,6 +184,22 @@ ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
|
||||
|
||||
- [REMOVE TTL](ttl.md).
|
||||
|
||||
## RENAME COLUMN {#alter_rename-column}
|
||||
|
||||
Renames an existing column.
|
||||
|
||||
Syntax:
|
||||
|
||||
```sql
|
||||
ALTER TABLE table_name RENAME COLUMN column_name TO new_column_name;
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
ALTER TABLE table_with_ttl RENAME COLUMN column_ttl TO column_ttl_new;
|
||||
```
|
||||
|
||||
## Limitations {#alter-query-limitations}
|
||||
|
||||
The `ALTER` query lets you create and delete separate elements (columns) in nested data structures, but not whole nested data structures. To add a nested data structure, you can add columns with a name like `name.nested_name` and the type `Array(T)`. A nested data structure is equivalent to multiple array columns with a name that has the same prefix before the dot.
|
||||
|
@ -474,4 +474,19 @@ ColumnPtr ColumnFixedString::compress() const
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
void ColumnFixedString::alignStringLength(ColumnFixedString::Chars & data, size_t n, size_t old_size)
|
||||
{
|
||||
size_t length = data.size() - old_size;
|
||||
if (length < n)
|
||||
{
|
||||
data.resize_fill(old_size + n);
|
||||
}
|
||||
else if (length > n)
|
||||
{
|
||||
data.resize_assume_reserved(old_size);
|
||||
throw Exception("Too large value for FixedString(" + std::to_string(n) + ")", ErrorCodes::TOO_LARGE_STRING_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -184,7 +184,8 @@ public:
|
||||
const Chars & getChars() const { return chars; }
|
||||
|
||||
size_t getN() const { return n; }
|
||||
|
||||
static void alignStringLength(ColumnFixedString::Chars & data, size_t n, size_t old_size);
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
@ -404,7 +404,7 @@
|
||||
M(432, UNKNOWN_CODEC) \
|
||||
M(433, ILLEGAL_CODEC_PARAMETER) \
|
||||
M(434, CANNOT_PARSE_PROTOBUF_SCHEMA) \
|
||||
M(435, NO_DATA_FOR_REQUIRED_PROTOBUF_FIELD) \
|
||||
M(435, NO_COLUMN_SERIALIZED_TO_REQUIRED_PROTOBUF_FIELD) \
|
||||
M(436, PROTOBUF_BAD_CAST) \
|
||||
M(437, PROTOBUF_FIELD_NOT_REPEATED) \
|
||||
M(438, DATA_TYPE_CANNOT_BE_PROMOTED) \
|
||||
@ -412,7 +412,7 @@
|
||||
M(440, INVALID_LIMIT_EXPRESSION) \
|
||||
M(441, CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING) \
|
||||
M(442, BAD_DATABASE_FOR_TEMPORARY_TABLE) \
|
||||
M(443, NO_COMMON_COLUMNS_WITH_PROTOBUF_SCHEMA) \
|
||||
M(443, NO_COLUMNS_SERIALIZED_TO_PROTOBUF_FIELDS) \
|
||||
M(444, UNKNOWN_PROTOBUF_FORMAT) \
|
||||
M(445, CANNOT_MPROTECT) \
|
||||
M(446, FUNCTION_NOT_ALLOWED) \
|
||||
@ -535,6 +535,8 @@
|
||||
M(566, CANNOT_RMDIR) \
|
||||
M(567, DUPLICATED_PART_UUIDS) \
|
||||
M(568, RAFT_ERROR) \
|
||||
M(569, MULTIPLE_COLUMNS_SERIALIZED_TO_SAME_PROTOBUF_FIELD) \
|
||||
M(570, DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD) \
|
||||
\
|
||||
M(999, KEEPER_EXCEPTION) \
|
||||
M(1000, POCO_EXCEPTION) \
|
||||
|
@ -10,8 +10,6 @@
|
||||
#include <Common/AlignedBuffer.h>
|
||||
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <Formats/ProtobufReader.h>
|
||||
#include <Formats/ProtobufWriter.h>
|
||||
#include <DataTypes/DataTypeAggregateFunction.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
@ -261,45 +259,6 @@ void DataTypeAggregateFunction::deserializeTextCSV(IColumn & column, ReadBuffer
|
||||
}
|
||||
|
||||
|
||||
void DataTypeAggregateFunction::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
|
||||
{
|
||||
if (value_index)
|
||||
return;
|
||||
value_index = static_cast<bool>(
|
||||
protobuf.writeAggregateFunction(function, assert_cast<const ColumnAggregateFunction &>(column).getData()[row_num]));
|
||||
}
|
||||
|
||||
void DataTypeAggregateFunction::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
|
||||
{
|
||||
row_added = false;
|
||||
ColumnAggregateFunction & column_concrete = assert_cast<ColumnAggregateFunction &>(column);
|
||||
Arena & arena = column_concrete.createOrGetArena();
|
||||
size_t size_of_state = function->sizeOfData();
|
||||
AggregateDataPtr place = arena.alignedAlloc(size_of_state, function->alignOfData());
|
||||
function->create(place);
|
||||
try
|
||||
{
|
||||
if (!protobuf.readAggregateFunction(function, place, arena))
|
||||
{
|
||||
function->destroy(place);
|
||||
return;
|
||||
}
|
||||
auto & container = column_concrete.getData();
|
||||
if (allow_add_row)
|
||||
{
|
||||
container.emplace_back(place);
|
||||
row_added = true;
|
||||
}
|
||||
else
|
||||
container.back() = place;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
function->destroy(place);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
MutableColumnPtr DataTypeAggregateFunction::createColumn() const
|
||||
{
|
||||
return ColumnAggregateFunction::create(function);
|
||||
|
@ -59,8 +59,6 @@ public:
|
||||
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
|
||||
void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
|
||||
|
||||
MutableColumnPtr createColumn() const override;
|
||||
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <Formats/ProtobufReader.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
@ -522,55 +521,6 @@ void DataTypeArray::deserializeTextCSV(IColumn & column, ReadBuffer & istr, cons
|
||||
}
|
||||
|
||||
|
||||
void DataTypeArray::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
|
||||
{
|
||||
const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
|
||||
const ColumnArray::Offsets & offsets = column_array.getOffsets();
|
||||
size_t offset = offsets[row_num - 1] + value_index;
|
||||
size_t next_offset = offsets[row_num];
|
||||
const IColumn & nested_column = column_array.getData();
|
||||
size_t i;
|
||||
for (i = offset; i < next_offset; ++i)
|
||||
{
|
||||
size_t element_stored = 0;
|
||||
nested->serializeProtobuf(nested_column, i, protobuf, element_stored);
|
||||
if (!element_stored)
|
||||
break;
|
||||
}
|
||||
value_index += i - offset;
|
||||
}
|
||||
|
||||
|
||||
void DataTypeArray::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
|
||||
{
|
||||
row_added = false;
|
||||
ColumnArray & column_array = assert_cast<ColumnArray &>(column);
|
||||
IColumn & nested_column = column_array.getData();
|
||||
ColumnArray::Offsets & offsets = column_array.getOffsets();
|
||||
size_t old_size = offsets.size();
|
||||
try
|
||||
{
|
||||
bool nested_row_added;
|
||||
do
|
||||
nested->deserializeProtobuf(nested_column, protobuf, true, nested_row_added);
|
||||
while (nested_row_added && protobuf.canReadMoreValues());
|
||||
if (allow_add_row)
|
||||
{
|
||||
offsets.emplace_back(nested_column.size());
|
||||
row_added = true;
|
||||
}
|
||||
else
|
||||
offsets.back() = nested_column.size();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
offsets.resize_assume_reserved(old_size);
|
||||
nested_column.popBack(nested_column.size() - offsets.back());
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
MutableColumnPtr DataTypeArray::createColumn() const
|
||||
{
|
||||
return ColumnArray::create(nested->createColumn(), ColumnArray::ColumnOffsets::create());
|
||||
|
@ -85,15 +85,6 @@ public:
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsCache * cache) const override;
|
||||
|
||||
void serializeProtobuf(const IColumn & column,
|
||||
size_t row_num,
|
||||
ProtobufWriter & protobuf,
|
||||
size_t & value_index) const override;
|
||||
void deserializeProtobuf(IColumn & column,
|
||||
ProtobufReader & protobuf,
|
||||
bool allow_add_row,
|
||||
bool & row_added) const override;
|
||||
|
||||
MutableColumnPtr createColumn() const override;
|
||||
|
||||
Field getDefault() const override;
|
||||
|
@ -4,8 +4,6 @@
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <Formats/ProtobufReader.h>
|
||||
#include <Formats/ProtobufWriter.h>
|
||||
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
@ -81,30 +79,6 @@ void DataTypeDate::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const
|
||||
assert_cast<ColumnUInt16 &>(column).getData().push_back(value.getDayNum());
|
||||
}
|
||||
|
||||
void DataTypeDate::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
|
||||
{
|
||||
if (value_index)
|
||||
return;
|
||||
value_index = static_cast<bool>(protobuf.writeDate(DayNum(assert_cast<const ColumnUInt16 &>(column).getData()[row_num])));
|
||||
}
|
||||
|
||||
void DataTypeDate::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
|
||||
{
|
||||
row_added = false;
|
||||
DayNum d;
|
||||
if (!protobuf.readDate(d))
|
||||
return;
|
||||
|
||||
auto & container = assert_cast<ColumnUInt16 &>(column).getData();
|
||||
if (allow_add_row)
|
||||
{
|
||||
container.emplace_back(d);
|
||||
row_added = true;
|
||||
}
|
||||
else
|
||||
container.back() = d;
|
||||
}
|
||||
|
||||
bool DataTypeDate::equals(const IDataType & rhs) const
|
||||
{
|
||||
return typeid(rhs) == typeid(*this);
|
||||
|
@ -24,8 +24,6 @@ public:
|
||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
|
||||
void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
|
||||
|
||||
bool canBeUsedAsVersion() const override { return true; }
|
||||
bool canBeInsideNullable() const override { return true; }
|
||||
|
@ -5,8 +5,6 @@
|
||||
#include <common/DateLUT.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <Formats/ProtobufReader.h>
|
||||
#include <Formats/ProtobufWriter.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
@ -164,32 +162,6 @@ void DataTypeDateTime::deserializeTextCSV(IColumn & column, ReadBuffer & istr, c
|
||||
assert_cast<ColumnType &>(column).getData().push_back(x);
|
||||
}
|
||||
|
||||
void DataTypeDateTime::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
|
||||
{
|
||||
if (value_index)
|
||||
return;
|
||||
|
||||
// On some platforms `time_t` is `long` but not `unsigned int` (UInt32 that we store in column), hence static_cast.
|
||||
value_index = static_cast<bool>(protobuf.writeDateTime(static_cast<time_t>(assert_cast<const ColumnType &>(column).getData()[row_num])));
|
||||
}
|
||||
|
||||
void DataTypeDateTime::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
|
||||
{
|
||||
row_added = false;
|
||||
time_t t;
|
||||
if (!protobuf.readDateTime(t))
|
||||
return;
|
||||
|
||||
auto & container = assert_cast<ColumnType &>(column).getData();
|
||||
if (allow_add_row)
|
||||
{
|
||||
container.emplace_back(t);
|
||||
row_added = true;
|
||||
}
|
||||
else
|
||||
container.back() = t;
|
||||
}
|
||||
|
||||
bool DataTypeDateTime::equals(const IDataType & rhs) const
|
||||
{
|
||||
/// DateTime with different timezones are equal, because:
|
||||
|
@ -68,8 +68,6 @@ public:
|
||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
|
||||
void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
|
||||
|
||||
bool canBeUsedAsVersion() const override { return true; }
|
||||
bool canBeInsideNullable() const override { return true; }
|
||||
|
@ -6,8 +6,6 @@
|
||||
#include <common/DateLUT.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <Formats/ProtobufReader.h>
|
||||
#include <Formats/ProtobufWriter.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
@ -182,30 +180,6 @@ void DataTypeDateTime64::deserializeTextCSV(IColumn & column, ReadBuffer & istr,
|
||||
assert_cast<ColumnType &>(column).getData().push_back(x);
|
||||
}
|
||||
|
||||
void DataTypeDateTime64::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
|
||||
{
|
||||
if (value_index)
|
||||
return;
|
||||
value_index = static_cast<bool>(protobuf.writeDateTime64(assert_cast<const ColumnType &>(column).getData()[row_num], scale));
|
||||
}
|
||||
|
||||
void DataTypeDateTime64::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
|
||||
{
|
||||
row_added = false;
|
||||
DateTime64 t = 0;
|
||||
if (!protobuf.readDateTime64(t, scale))
|
||||
return;
|
||||
|
||||
auto & container = assert_cast<ColumnType &>(column).getData();
|
||||
if (allow_add_row)
|
||||
{
|
||||
container.emplace_back(t);
|
||||
row_added = true;
|
||||
}
|
||||
else
|
||||
container.back() = t;
|
||||
}
|
||||
|
||||
bool DataTypeDateTime64::equals(const IDataType & rhs) const
|
||||
{
|
||||
if (const auto * ptype = typeid_cast<const DataTypeDateTime64 *>(&rhs))
|
||||
|
@ -42,8 +42,6 @@ public:
|
||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
|
||||
void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
|
||||
|
||||
bool equals(const IDataType & rhs) const override;
|
||||
|
||||
|
@ -4,8 +4,6 @@
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Core/DecimalFunctions.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <Formats/ProtobufReader.h>
|
||||
#include <Formats/ProtobufWriter.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
@ -1,7 +1,5 @@
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <Formats/ProtobufReader.h>
|
||||
#include <Formats/ProtobufWriter.h>
|
||||
#include <DataTypes/DataTypeEnum.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <Parsers/IAST.h>
|
||||
@ -254,34 +252,6 @@ void DataTypeEnum<Type>::deserializeBinaryBulk(
|
||||
x.resize(initial_size + size / sizeof(FieldType));
|
||||
}
|
||||
|
||||
template <typename Type>
|
||||
void DataTypeEnum<Type>::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
|
||||
{
|
||||
if (value_index)
|
||||
return;
|
||||
protobuf.prepareEnumMapping(values);
|
||||
value_index = static_cast<bool>(protobuf.writeEnum(assert_cast<const ColumnType &>(column).getData()[row_num]));
|
||||
}
|
||||
|
||||
template<typename Type>
|
||||
void DataTypeEnum<Type>::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
|
||||
{
|
||||
protobuf.prepareEnumMapping(values);
|
||||
row_added = false;
|
||||
Type value;
|
||||
if (!protobuf.readEnum(value))
|
||||
return;
|
||||
|
||||
auto & container = assert_cast<ColumnType &>(column).getData();
|
||||
if (allow_add_row)
|
||||
{
|
||||
container.emplace_back(value);
|
||||
row_added = true;
|
||||
}
|
||||
else
|
||||
container.back() = value;
|
||||
}
|
||||
|
||||
template <typename Type>
|
||||
Field DataTypeEnum<Type>::getDefault() const
|
||||
{
|
||||
|
@ -132,9 +132,6 @@ public:
|
||||
void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, const size_t offset, size_t limit) const override;
|
||||
void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, const size_t limit, const double avg_value_size_hint) const override;
|
||||
|
||||
void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
|
||||
void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
|
||||
|
||||
MutableColumnPtr createColumn() const override { return ColumnType::create(); }
|
||||
|
||||
Field getDefault() const override;
|
||||
|
@ -2,8 +2,6 @@
|
||||
#include <Columns/ColumnConst.h>
|
||||
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <Formats/ProtobufReader.h>
|
||||
#include <Formats/ProtobufWriter.h>
|
||||
#include <DataTypes/DataTypeFixedString.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
|
||||
@ -25,7 +23,6 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_READ_ALL_DATA;
|
||||
extern const int TOO_LARGE_STRING_SIZE;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int UNEXPECTED_AST_STRUCTURE;
|
||||
}
|
||||
@ -127,16 +124,7 @@ static inline void alignStringLength(const DataTypeFixedString & type,
|
||||
ColumnFixedString::Chars & data,
|
||||
size_t string_start)
|
||||
{
|
||||
size_t length = data.size() - string_start;
|
||||
if (length < type.getN())
|
||||
{
|
||||
data.resize_fill(string_start + type.getN());
|
||||
}
|
||||
else if (length > type.getN())
|
||||
{
|
||||
data.resize_assume_reserved(string_start);
|
||||
throw Exception("Too large value for " + type.getName(), ErrorCodes::TOO_LARGE_STRING_SIZE);
|
||||
}
|
||||
ColumnFixedString::alignStringLength(data, type.getN(), string_start);
|
||||
}
|
||||
|
||||
template <typename Reader>
|
||||
@ -215,53 +203,6 @@ void DataTypeFixedString::deserializeTextCSV(IColumn & column, ReadBuffer & istr
|
||||
}
|
||||
|
||||
|
||||
void DataTypeFixedString::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
|
||||
{
|
||||
if (value_index)
|
||||
return;
|
||||
const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]);
|
||||
value_index = static_cast<bool>(protobuf.writeString(StringRef(pos, n)));
|
||||
}
|
||||
|
||||
|
||||
void DataTypeFixedString::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
|
||||
{
|
||||
row_added = false;
|
||||
auto & column_string = assert_cast<ColumnFixedString &>(column);
|
||||
ColumnFixedString::Chars & data = column_string.getChars();
|
||||
size_t old_size = data.size();
|
||||
try
|
||||
{
|
||||
if (allow_add_row)
|
||||
{
|
||||
if (protobuf.readStringInto(data))
|
||||
{
|
||||
alignStringLength(*this, data, old_size);
|
||||
row_added = true;
|
||||
}
|
||||
else
|
||||
data.resize_assume_reserved(old_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
ColumnFixedString::Chars temp_data;
|
||||
if (protobuf.readStringInto(temp_data))
|
||||
{
|
||||
alignStringLength(*this, temp_data, 0);
|
||||
column_string.popBack(1);
|
||||
old_size = data.size();
|
||||
data.insertSmallAllowReadWriteOverflow15(temp_data.begin(), temp_data.end());
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
data.resize_assume_reserved(old_size);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
MutableColumnPtr DataTypeFixedString::createColumn() const
|
||||
{
|
||||
return ColumnFixedString::create(n);
|
||||
|
@ -66,9 +66,6 @@ public:
|
||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
|
||||
void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
|
||||
|
||||
MutableColumnPtr createColumn() const override;
|
||||
|
||||
Field getDefault() const override;
|
||||
|
@ -808,31 +808,6 @@ void DataTypeLowCardinality::serializeTextXML(const IColumn & column, size_t row
|
||||
serializeImpl(column, row_num, &IDataType::serializeAsTextXML, ostr, settings);
|
||||
}
|
||||
|
||||
void DataTypeLowCardinality::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
|
||||
{
|
||||
serializeImpl(column, row_num, &IDataType::serializeProtobuf, protobuf, value_index);
|
||||
}
|
||||
|
||||
void DataTypeLowCardinality::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
|
||||
{
|
||||
if (allow_add_row)
|
||||
{
|
||||
deserializeImpl(column, &IDataType::deserializeProtobuf, protobuf, true, row_added);
|
||||
return;
|
||||
}
|
||||
|
||||
row_added = false;
|
||||
auto & low_cardinality_column= getColumnLowCardinality(column);
|
||||
auto nested_column = low_cardinality_column.getDictionary().getNestedColumn();
|
||||
auto temp_column = nested_column->cloneEmpty();
|
||||
size_t unique_row_number = low_cardinality_column.getIndexes().getUInt(low_cardinality_column.size() - 1);
|
||||
temp_column->insertFrom(*nested_column, unique_row_number);
|
||||
bool dummy;
|
||||
dictionary_type.get()->deserializeProtobuf(*temp_column, protobuf, false, dummy);
|
||||
low_cardinality_column.popBack(1);
|
||||
low_cardinality_column.insertFromFullColumn(*temp_column, 0);
|
||||
}
|
||||
|
||||
template <typename... Params, typename... Args>
|
||||
void DataTypeLowCardinality::serializeImpl(
|
||||
const IColumn & column, size_t row_num, DataTypeLowCardinality::SerializeFunctionPtr<Params...> func, Args &&... args) const
|
||||
|
@ -67,8 +67,6 @@ public:
|
||||
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
|
||||
void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
|
||||
|
||||
MutableColumnPtr createColumn() const override;
|
||||
|
||||
|
@ -336,16 +336,6 @@ void DataTypeMap::deserializeBinaryBulkWithMultipleStreamsImpl(
|
||||
nested->deserializeBinaryBulkWithMultipleStreams(column_map.getNestedColumnPtr(), limit, settings, state, cache);
|
||||
}
|
||||
|
||||
void DataTypeMap::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
|
||||
{
|
||||
nested->serializeProtobuf(extractNestedColumn(column), row_num, protobuf, value_index);
|
||||
}
|
||||
|
||||
void DataTypeMap::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
|
||||
{
|
||||
nested->deserializeProtobuf(extractNestedColumn(column), protobuf, allow_add_row, row_added);
|
||||
}
|
||||
|
||||
MutableColumnPtr DataTypeMap::createColumn() const
|
||||
{
|
||||
return ColumnMap::create(nested->createColumn());
|
||||
|
@ -76,9 +76,6 @@ public:
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsCache * cache) const override;
|
||||
|
||||
void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
|
||||
void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
|
||||
|
||||
MutableColumnPtr createColumn() const override;
|
||||
|
||||
Field getDefault() const override;
|
||||
@ -92,6 +89,8 @@ public:
|
||||
const DataTypePtr & getValueType() const { return value_type; }
|
||||
DataTypes getKeyValueTypes() const { return {key_type, value_type}; }
|
||||
|
||||
const DataTypePtr & getNestedType() const { return nested; }
|
||||
|
||||
private:
|
||||
template <typename Writer>
|
||||
void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, Writer && writer) const;
|
||||
|
@ -486,33 +486,6 @@ void DataTypeNullable::serializeTextXML(const IColumn & column, size_t row_num,
|
||||
nested_data_type->serializeAsTextXML(col.getNestedColumn(), row_num, ostr, settings);
|
||||
}
|
||||
|
||||
void DataTypeNullable::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
|
||||
{
|
||||
const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
|
||||
if (!col.isNullAt(row_num))
|
||||
nested_data_type->serializeProtobuf(col.getNestedColumn(), row_num, protobuf, value_index);
|
||||
}
|
||||
|
||||
void DataTypeNullable::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
|
||||
{
|
||||
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
|
||||
IColumn & nested_column = col.getNestedColumn();
|
||||
size_t old_size = nested_column.size();
|
||||
try
|
||||
{
|
||||
nested_data_type->deserializeProtobuf(nested_column, protobuf, allow_add_row, row_added);
|
||||
if (row_added)
|
||||
col.getNullMapData().push_back(0);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
nested_column.popBack(nested_column.size() - old_size);
|
||||
col.getNullMapData().resize_assume_reserved(old_size);
|
||||
row_added = false;
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
MutableColumnPtr DataTypeNullable::createColumn() const
|
||||
{
|
||||
return ColumnNullable::create(nested_data_type->createColumn(), ColumnUInt8::create());
|
||||
|
@ -73,9 +73,6 @@ public:
|
||||
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
|
||||
void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
|
||||
void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
|
||||
|
||||
MutableColumnPtr createColumn() const override;
|
||||
|
||||
Field getDefault() const override;
|
||||
|
@ -8,8 +8,6 @@
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <Formats/ProtobufReader.h>
|
||||
#include <Formats/ProtobufWriter.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -205,34 +203,6 @@ void DataTypeNumberBase<T>::deserializeBinaryBulk(IColumn & column, ReadBuffer &
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void DataTypeNumberBase<T>::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
|
||||
{
|
||||
if (value_index)
|
||||
return;
|
||||
value_index = static_cast<bool>(protobuf.writeNumber(assert_cast<const ColumnVector<T> &>(column).getData()[row_num]));
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void DataTypeNumberBase<T>::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
|
||||
{
|
||||
row_added = false;
|
||||
T value;
|
||||
if (!protobuf.readNumber(value))
|
||||
return;
|
||||
|
||||
auto & container = typeid_cast<ColumnVector<T> &>(column).getData();
|
||||
if (allow_add_row)
|
||||
{
|
||||
container.emplace_back(value);
|
||||
row_added = true;
|
||||
}
|
||||
else
|
||||
container.back() = value;
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
MutableColumnPtr DataTypeNumberBase<T>::createColumn() const
|
||||
{
|
||||
|
@ -45,9 +45,6 @@ public:
|
||||
void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override;
|
||||
void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override;
|
||||
|
||||
void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
|
||||
void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
|
||||
|
||||
MutableColumnPtr createColumn() const override;
|
||||
|
||||
bool isParametric() const override { return false; }
|
||||
|
@ -9,8 +9,6 @@
|
||||
#include <Core/Field.h>
|
||||
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <Formats/ProtobufReader.h>
|
||||
#include <Formats/ProtobufWriter.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
|
||||
@ -311,55 +309,6 @@ void DataTypeString::deserializeTextCSV(IColumn & column, ReadBuffer & istr, con
|
||||
}
|
||||
|
||||
|
||||
void DataTypeString::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
|
||||
{
|
||||
if (value_index)
|
||||
return;
|
||||
value_index = static_cast<bool>(protobuf.writeString(assert_cast<const ColumnString &>(column).getDataAt(row_num)));
|
||||
}
|
||||
|
||||
|
||||
void DataTypeString::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
|
||||
{
|
||||
row_added = false;
|
||||
auto & column_string = assert_cast<ColumnString &>(column);
|
||||
ColumnString::Chars & data = column_string.getChars();
|
||||
ColumnString::Offsets & offsets = column_string.getOffsets();
|
||||
size_t old_size = offsets.size();
|
||||
try
|
||||
{
|
||||
if (allow_add_row)
|
||||
{
|
||||
if (protobuf.readStringInto(data))
|
||||
{
|
||||
data.emplace_back(0);
|
||||
offsets.emplace_back(data.size());
|
||||
row_added = true;
|
||||
}
|
||||
else
|
||||
data.resize_assume_reserved(offsets.back());
|
||||
}
|
||||
else
|
||||
{
|
||||
ColumnString::Chars temp_data;
|
||||
if (protobuf.readStringInto(temp_data))
|
||||
{
|
||||
temp_data.emplace_back(0);
|
||||
column_string.popBack(1);
|
||||
old_size = offsets.size();
|
||||
data.insertSmallAllowReadWriteOverflow15(temp_data.begin(), temp_data.end());
|
||||
offsets.emplace_back(data.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
offsets.resize_assume_reserved(old_size);
|
||||
data.resize_assume_reserved(offsets.back());
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
Field DataTypeString::getDefault() const
|
||||
{
|
||||
return String();
|
||||
|
@ -47,9 +47,6 @@ public:
|
||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
|
||||
void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
|
||||
|
||||
MutableColumnPtr createColumn() const override;
|
||||
|
||||
Field getDefault() const override;
|
||||
|
@ -504,33 +504,6 @@ void DataTypeTuple::deserializeBinaryBulkWithMultipleStreamsImpl(
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
void DataTypeTuple::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
|
||||
{
|
||||
for (; value_index < elems.size(); ++value_index)
|
||||
{
|
||||
size_t stored = 0;
|
||||
elems[value_index]->serializeProtobuf(extractElementColumn(column, value_index), row_num, protobuf, stored);
|
||||
if (!stored)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void DataTypeTuple::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
|
||||
{
|
||||
row_added = false;
|
||||
bool all_elements_get_row = true;
|
||||
addElementSafe(elems, column, [&]
|
||||
{
|
||||
for (const auto & i : ext::range(0, ext::size(elems)))
|
||||
{
|
||||
bool element_row_added;
|
||||
elems[i]->deserializeProtobuf(extractElementColumn(column, i), protobuf, allow_add_row, element_row_added);
|
||||
all_elements_get_row &= element_row_added;
|
||||
}
|
||||
});
|
||||
row_added = all_elements_get_row;
|
||||
}
|
||||
|
||||
MutableColumnPtr DataTypeTuple::createColumn() const
|
||||
{
|
||||
size_t size = elems.size();
|
||||
|
@ -81,9 +81,6 @@ public:
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsCache * cache) const override;
|
||||
|
||||
void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
|
||||
void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
|
||||
|
||||
MutableColumnPtr createColumn() const override;
|
||||
|
||||
Field getDefault() const override;
|
||||
|
@ -1,8 +1,6 @@
|
||||
#include <DataTypes/DataTypeUUID.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Formats/ProtobufReader.h>
|
||||
#include <Formats/ProtobufWriter.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <Common/assert_cast.h>
|
||||
@ -79,30 +77,6 @@ void DataTypeUUID::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const
|
||||
assert_cast<ColumnUInt128 &>(column).getData().push_back(value);
|
||||
}
|
||||
|
||||
void DataTypeUUID::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
|
||||
{
|
||||
if (value_index)
|
||||
return;
|
||||
value_index = static_cast<bool>(protobuf.writeUUID(UUID(assert_cast<const ColumnUInt128 &>(column).getData()[row_num])));
|
||||
}
|
||||
|
||||
void DataTypeUUID::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
|
||||
{
|
||||
row_added = false;
|
||||
UUID uuid;
|
||||
if (!protobuf.readUUID(uuid))
|
||||
return;
|
||||
|
||||
auto & container = assert_cast<ColumnUInt128 &>(column).getData();
|
||||
if (allow_add_row)
|
||||
{
|
||||
container.emplace_back(uuid);
|
||||
row_added = true;
|
||||
}
|
||||
else
|
||||
container.back() = uuid;
|
||||
}
|
||||
|
||||
bool DataTypeUUID::equals(const IDataType & rhs) const
|
||||
{
|
||||
return typeid(rhs) == typeid(*this);
|
||||
|
@ -26,8 +26,6 @@ public:
|
||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
|
||||
void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
|
||||
|
||||
bool canBeUsedInBitOperations() const override { return true; }
|
||||
bool canBeInsideNullable() const override { return true; }
|
||||
|
@ -4,8 +4,6 @@
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Core/DecimalFunctions.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <Formats/ProtobufReader.h>
|
||||
#include <Formats/ProtobufWriter.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/readDecimalText.h>
|
||||
@ -111,33 +109,6 @@ T DataTypeDecimal<T>::parseFromString(const String & str) const
|
||||
return x;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void DataTypeDecimal<T>::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
|
||||
{
|
||||
if (value_index)
|
||||
return;
|
||||
value_index = static_cast<bool>(protobuf.writeDecimal(assert_cast<const ColumnType &>(column).getData()[row_num], this->scale));
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void DataTypeDecimal<T>::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
|
||||
{
|
||||
row_added = false;
|
||||
T decimal;
|
||||
if (!protobuf.readDecimal(decimal, this->precision, this->scale))
|
||||
return;
|
||||
|
||||
auto & container = assert_cast<ColumnType &>(column).getData();
|
||||
if (allow_add_row)
|
||||
{
|
||||
container.emplace_back(decimal);
|
||||
row_added = true;
|
||||
}
|
||||
else
|
||||
container.back() = decimal;
|
||||
}
|
||||
|
||||
|
||||
static DataTypePtr create(const ASTPtr & arguments)
|
||||
{
|
||||
|
@ -46,9 +46,6 @@ public:
|
||||
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
|
||||
void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
|
||||
|
||||
bool equals(const IDataType & rhs) const override;
|
||||
|
||||
T parseFromString(const String & str) const;
|
||||
|
@ -26,9 +26,6 @@ class Field;
|
||||
using DataTypePtr = std::shared_ptr<const IDataType>;
|
||||
using DataTypes = std::vector<DataTypePtr>;
|
||||
|
||||
class ProtobufReader;
|
||||
class ProtobufWriter;
|
||||
|
||||
struct NameAndTypePair;
|
||||
|
||||
|
||||
@ -235,10 +232,6 @@ public:
|
||||
/// If method will throw an exception, then column will be in same state as before call to method.
|
||||
virtual void deserializeBinary(IColumn & column, ReadBuffer & istr) const = 0;
|
||||
|
||||
/** Serialize to a protobuf. */
|
||||
virtual void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const = 0;
|
||||
virtual void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const = 0;
|
||||
|
||||
/** Text serialization with escaping but without quoting.
|
||||
*/
|
||||
void serializeAsTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const;
|
||||
|
@ -34,8 +34,6 @@ public:
|
||||
void deserializeBinaryBulk(IColumn &, ReadBuffer &, size_t, double) const override { throwNoSerialization(); }
|
||||
void serializeText(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
|
||||
void deserializeText(IColumn &, ReadBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
|
||||
void serializeProtobuf(const IColumn &, size_t, ProtobufWriter &, size_t &) const override { throwNoSerialization(); }
|
||||
void deserializeProtobuf(IColumn &, ProtobufReader &, bool, bool &) const override { throwNoSerialization(); }
|
||||
|
||||
MutableColumnPtr createColumn() const override
|
||||
{
|
||||
|
@ -401,10 +401,16 @@ void buildConfigurationFromFunctionWithKeyValueArguments(
|
||||
{
|
||||
auto builder = FunctionFactory::instance().tryGet(func->name, context);
|
||||
auto function = builder->build({});
|
||||
auto result = function->execute({}, {}, 0);
|
||||
function->prepare({});
|
||||
|
||||
/// We assume that function will not take arguments and will return constant value like tcpPort or hostName
|
||||
/// Such functions will return column with size equal to input_rows_count.
|
||||
size_t input_rows_count = 1;
|
||||
auto result = function->execute({}, function->getResultType(), input_rows_count);
|
||||
|
||||
Field value;
|
||||
result->get(0, value);
|
||||
|
||||
AutoPtr<Text> text_value(doc->createTextNode(getFieldAsString(value)));
|
||||
current_xml_element->appendChild(text_value);
|
||||
}
|
||||
|
@ -120,7 +120,6 @@ struct FormatSettings
|
||||
|
||||
struct
|
||||
{
|
||||
bool write_row_delimiters = true;
|
||||
/**
|
||||
* Some buffers (kafka / rabbit) split the rows internally using callback,
|
||||
* and always send one row per message, so we can push there formats
|
||||
@ -128,7 +127,7 @@ struct FormatSettings
|
||||
* we have to enforce exporting at most one row in the format output,
|
||||
* because Protobuf without delimiters is not generally useful.
|
||||
*/
|
||||
bool allow_many_rows_no_delimiters = false;
|
||||
bool allow_multiple_rows_without_delimiter = false;
|
||||
} protobuf;
|
||||
|
||||
struct
|
||||
|
@ -1,55 +0,0 @@
|
||||
#include "ProtobufColumnMatcher.h"
|
||||
#if USE_PROTOBUF
|
||||
#include <Common/Exception.h>
|
||||
#include <google/protobuf/descriptor.pb.h>
|
||||
#include <Poco/String.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NO_COMMON_COLUMNS_WITH_PROTOBUF_SCHEMA;
|
||||
}
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
String columnNameToSearchableForm(const String & str)
|
||||
{
|
||||
return Poco::replace(Poco::toUpper(str), ".", "_");
|
||||
}
|
||||
}
|
||||
|
||||
namespace ProtobufColumnMatcher
|
||||
{
|
||||
namespace details
|
||||
{
|
||||
ColumnNameMatcher::ColumnNameMatcher(const std::vector<String> & column_names) : column_usage(column_names.size())
|
||||
{
|
||||
column_usage.resize(column_names.size(), false);
|
||||
for (size_t i = 0; i != column_names.size(); ++i)
|
||||
column_name_to_index_map.emplace(columnNameToSearchableForm(column_names[i]), i);
|
||||
}
|
||||
|
||||
size_t ColumnNameMatcher::findColumn(const String & field_name)
|
||||
{
|
||||
auto it = column_name_to_index_map.find(columnNameToSearchableForm(field_name));
|
||||
if (it == column_name_to_index_map.end())
|
||||
return -1;
|
||||
size_t column_index = it->second;
|
||||
if (column_usage[column_index])
|
||||
return -1;
|
||||
column_usage[column_index] = true;
|
||||
return column_index;
|
||||
}
|
||||
|
||||
void throwNoCommonColumns()
|
||||
{
|
||||
throw Exception("No common columns with provided protobuf schema", ErrorCodes::NO_COMMON_COLUMNS_WITH_PROTOBUF_SCHEMA);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
@ -1,196 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include "config_formats.h"
|
||||
#endif
|
||||
|
||||
#if USE_PROTOBUF
|
||||
# include <memory>
|
||||
# include <unordered_map>
|
||||
# include <vector>
|
||||
# include <common/types.h>
|
||||
# include <boost/blank.hpp>
|
||||
# include <google/protobuf/descriptor.h>
|
||||
# include <google/protobuf/descriptor.pb.h>
|
||||
|
||||
namespace google
|
||||
{
|
||||
namespace protobuf
|
||||
{
|
||||
class Descriptor;
|
||||
class FieldDescriptor;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ProtobufColumnMatcher
|
||||
{
|
||||
struct DefaultTraits
|
||||
{
|
||||
using MessageData = boost::blank;
|
||||
using FieldData = boost::blank;
|
||||
};
|
||||
|
||||
template <typename Traits = DefaultTraits>
|
||||
struct Message;
|
||||
|
||||
/// Represents a field in a protobuf message.
|
||||
template <typename Traits = DefaultTraits>
|
||||
struct Field
|
||||
{
|
||||
const google::protobuf::FieldDescriptor * field_descriptor = nullptr;
|
||||
|
||||
/// Same as field_descriptor->number().
|
||||
UInt32 field_number = 0;
|
||||
|
||||
/// Index of a column; either 'column_index' or 'nested_message' is set.
|
||||
size_t column_index = -1;
|
||||
std::unique_ptr<Message<Traits>> nested_message;
|
||||
|
||||
typename Traits::FieldData data;
|
||||
};
|
||||
|
||||
/// Represents a protobuf message.
|
||||
template <typename Traits>
|
||||
struct Message
|
||||
{
|
||||
std::vector<Field<Traits>> fields;
|
||||
|
||||
/// Points to the parent message if this is a nested message.
|
||||
Message * parent = nullptr;
|
||||
size_t index_in_parent = -1;
|
||||
|
||||
typename Traits::MessageData data;
|
||||
};
|
||||
|
||||
/// Utility function finding matching columns for each protobuf field.
|
||||
template <typename Traits = DefaultTraits>
|
||||
static std::unique_ptr<Message<Traits>> matchColumns(
|
||||
const std::vector<String> & column_names,
|
||||
const google::protobuf::Descriptor * message_type);
|
||||
|
||||
template <typename Traits = DefaultTraits>
|
||||
static std::unique_ptr<Message<Traits>> matchColumns(
|
||||
const std::vector<String> & column_names,
|
||||
const google::protobuf::Descriptor * message_type,
|
||||
std::vector<const google::protobuf::FieldDescriptor *> & field_descriptors_without_match);
|
||||
|
||||
namespace details
|
||||
{
|
||||
[[noreturn]] void throwNoCommonColumns();
|
||||
|
||||
class ColumnNameMatcher
|
||||
{
|
||||
public:
|
||||
ColumnNameMatcher(const std::vector<String> & column_names);
|
||||
size_t findColumn(const String & field_name);
|
||||
|
||||
private:
|
||||
std::unordered_map<String, size_t> column_name_to_index_map;
|
||||
std::vector<bool> column_usage;
|
||||
};
|
||||
|
||||
template <typename Traits>
|
||||
std::unique_ptr<Message<Traits>> matchColumnsRecursive(
|
||||
ColumnNameMatcher & name_matcher,
|
||||
const google::protobuf::Descriptor * message_type,
|
||||
const String & field_name_prefix,
|
||||
std::vector<const google::protobuf::FieldDescriptor *> * field_descriptors_without_match)
|
||||
{
|
||||
auto message = std::make_unique<Message<Traits>>();
|
||||
for (int i = 0; i != message_type->field_count(); ++i)
|
||||
{
|
||||
const google::protobuf::FieldDescriptor * field_descriptor = message_type->field(i);
|
||||
if ((field_descriptor->type() == google::protobuf::FieldDescriptor::TYPE_MESSAGE)
|
||||
|| (field_descriptor->type() == google::protobuf::FieldDescriptor::TYPE_GROUP))
|
||||
{
|
||||
auto nested_message = matchColumnsRecursive<Traits>(
|
||||
name_matcher,
|
||||
field_descriptor->message_type(),
|
||||
field_name_prefix + field_descriptor->name() + ".",
|
||||
field_descriptors_without_match);
|
||||
if (nested_message)
|
||||
{
|
||||
message->fields.emplace_back();
|
||||
auto & current_field = message->fields.back();
|
||||
current_field.field_number = field_descriptor->number();
|
||||
current_field.field_descriptor = field_descriptor;
|
||||
current_field.nested_message = std::move(nested_message);
|
||||
current_field.nested_message->parent = message.get();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t column_index = name_matcher.findColumn(field_name_prefix + field_descriptor->name());
|
||||
if (column_index == static_cast<size_t>(-1))
|
||||
{
|
||||
if (field_descriptors_without_match)
|
||||
field_descriptors_without_match->emplace_back(field_descriptor);
|
||||
}
|
||||
else
|
||||
{
|
||||
message->fields.emplace_back();
|
||||
auto & current_field = message->fields.back();
|
||||
current_field.field_number = field_descriptor->number();
|
||||
current_field.field_descriptor = field_descriptor;
|
||||
current_field.column_index = column_index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (message->fields.empty())
|
||||
return nullptr;
|
||||
|
||||
// Columns should be sorted by field_number, it's necessary for writing protobufs and useful reading protobufs.
|
||||
std::sort(message->fields.begin(), message->fields.end(), [](const Field<Traits> & left, const Field<Traits> & right)
|
||||
{
|
||||
return left.field_number < right.field_number;
|
||||
});
|
||||
|
||||
for (size_t i = 0; i != message->fields.size(); ++i)
|
||||
{
|
||||
auto & field = message->fields[i];
|
||||
if (field.nested_message)
|
||||
field.nested_message->index_in_parent = i;
|
||||
}
|
||||
|
||||
return message;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Data>
|
||||
static std::unique_ptr<Message<Data>> matchColumnsImpl(
|
||||
const std::vector<String> & column_names,
|
||||
const google::protobuf::Descriptor * message_type,
|
||||
std::vector<const google::protobuf::FieldDescriptor *> * field_descriptors_without_match)
|
||||
{
|
||||
details::ColumnNameMatcher name_matcher(column_names);
|
||||
auto message = details::matchColumnsRecursive<Data>(name_matcher, message_type, "", field_descriptors_without_match);
|
||||
if (!message)
|
||||
details::throwNoCommonColumns();
|
||||
return message;
|
||||
}
|
||||
|
||||
template <typename Data>
|
||||
static std::unique_ptr<Message<Data>> matchColumns(
|
||||
const std::vector<String> & column_names,
|
||||
const google::protobuf::Descriptor * message_type)
|
||||
{
|
||||
return matchColumnsImpl<Data>(column_names, message_type, nullptr);
|
||||
}
|
||||
|
||||
template <typename Data>
|
||||
static std::unique_ptr<Message<Data>> matchColumns(
|
||||
const std::vector<String> & column_names,
|
||||
const google::protobuf::Descriptor * message_type,
|
||||
std::vector<const google::protobuf::FieldDescriptor *> & field_descriptors_without_match)
|
||||
{
|
||||
return matchColumnsImpl<Data>(column_names, message_type, &field_descriptors_without_match);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
@ -1,258 +1,72 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/DayNum.h>
|
||||
#include <Common/PODArray.h>
|
||||
#include <Common/UInt128.h>
|
||||
#include <Core/UUID.h>
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include "config_formats.h"
|
||||
# include "config_formats.h"
|
||||
#endif
|
||||
|
||||
#if USE_PROTOBUF
|
||||
# include <memory>
|
||||
# include <IO/ReadBuffer.h>
|
||||
# include <boost/noncopyable.hpp>
|
||||
# include "ProtobufColumnMatcher.h"
|
||||
# include <Common/PODArray.h>
|
||||
# include <IO/ReadBuffer.h>
|
||||
|
||||
namespace google
|
||||
{
|
||||
namespace protobuf
|
||||
{
|
||||
class Descriptor;
|
||||
}
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class Arena;
|
||||
class IAggregateFunction;
|
||||
class ReadBuffer;
|
||||
using AggregateDataPtr = char *;
|
||||
using AggregateFunctionPtr = std::shared_ptr<IAggregateFunction>;
|
||||
|
||||
|
||||
/** Deserializes a protobuf, tries to cast data types if necessarily.
|
||||
*/
|
||||
class ProtobufReader : private boost::noncopyable
|
||||
{
|
||||
public:
|
||||
ProtobufReader(ReadBuffer & in_, const google::protobuf::Descriptor * message_type, const std::vector<String> & column_names, const bool use_length_delimiters_);
|
||||
~ProtobufReader();
|
||||
|
||||
/// Should be called when we start reading a new message.
|
||||
bool startMessage();
|
||||
|
||||
/// Ends reading a message.
|
||||
void endMessage(bool ignore_errors = false);
|
||||
|
||||
/// Reads the column index.
|
||||
/// The function returns false if there are no more columns to read (call endMessage() in this case).
|
||||
bool readColumnIndex(size_t & column_index);
|
||||
|
||||
/// Reads a value which should be put to column at index received with readColumnIndex().
|
||||
/// The function returns false if there are no more values to read now (call readColumnIndex() in this case).
|
||||
bool readNumber(Int8 & value) { return current_converter->readInt8(value); }
|
||||
bool readNumber(UInt8 & value) { return current_converter->readUInt8(value); }
|
||||
bool readNumber(Int16 & value) { return current_converter->readInt16(value); }
|
||||
bool readNumber(UInt16 & value) { return current_converter->readUInt16(value); }
|
||||
bool readNumber(Int32 & value) { return current_converter->readInt32(value); }
|
||||
bool readNumber(UInt32 & value) { return current_converter->readUInt32(value); }
|
||||
bool readNumber(Int64 & value) { return current_converter->readInt64(value); }
|
||||
bool readNumber(UInt64 & value) { return current_converter->readUInt64(value); }
|
||||
bool readNumber(Int128 & value) { return current_converter->readInt128(value); }
|
||||
bool readNumber(UInt128 & value) { return current_converter->readUInt128(value); }
|
||||
bool readNumber(Int256 & value) { return current_converter->readInt256(value); }
|
||||
bool readNumber(UInt256 & value) { return current_converter->readUInt256(value); }
|
||||
bool readNumber(Float32 & value) { return current_converter->readFloat32(value); }
|
||||
bool readNumber(Float64 & value) { return current_converter->readFloat64(value); }
|
||||
|
||||
bool readStringInto(PaddedPODArray<UInt8> & str) { return current_converter->readStringInto(str); }
|
||||
|
||||
void prepareEnumMapping(const std::vector<std::pair<std::string, Int8>> & name_value_pairs) { current_converter->prepareEnumMapping8(name_value_pairs); }
|
||||
void prepareEnumMapping(const std::vector<std::pair<std::string, Int16>> & name_value_pairs) { current_converter->prepareEnumMapping16(name_value_pairs); }
|
||||
bool readEnum(Int8 & value) { return current_converter->readEnum8(value); }
|
||||
bool readEnum(Int16 & value) { return current_converter->readEnum16(value); }
|
||||
|
||||
bool readUUID(UUID & uuid) { return current_converter->readUUID(uuid); }
|
||||
bool readDate(DayNum & date) { return current_converter->readDate(date); }
|
||||
bool readDateTime(time_t & tm) { return current_converter->readDateTime(tm); }
|
||||
bool readDateTime64(DateTime64 & tm, UInt32 scale) { return current_converter->readDateTime64(tm, scale); }
|
||||
|
||||
bool readDecimal(Decimal32 & decimal, UInt32 precision, UInt32 scale) { return current_converter->readDecimal32(decimal, precision, scale); }
|
||||
bool readDecimal(Decimal64 & decimal, UInt32 precision, UInt32 scale) { return current_converter->readDecimal64(decimal, precision, scale); }
|
||||
bool readDecimal(Decimal128 & decimal, UInt32 precision, UInt32 scale) { return current_converter->readDecimal128(decimal, precision, scale); }
|
||||
bool readDecimal(Decimal256 & decimal, UInt32 precision, UInt32 scale) { return current_converter->readDecimal256(decimal, precision, scale); }
|
||||
|
||||
bool readAggregateFunction(const AggregateFunctionPtr & function, AggregateDataPtr place, Arena & arena) { return current_converter->readAggregateFunction(function, place, arena); }
|
||||
|
||||
/// Call it after calling one of the read*() function to determine if there are more values available for reading.
|
||||
bool ALWAYS_INLINE canReadMoreValues() const { return simple_reader.canReadMoreValues(); }
|
||||
|
||||
private:
|
||||
class SimpleReader
|
||||
{
|
||||
public:
|
||||
SimpleReader(ReadBuffer & in_, const bool use_length_delimiters_);
|
||||
bool startMessage();
|
||||
void endMessage(bool ignore_errors);
|
||||
void startNestedMessage();
|
||||
void endNestedMessage();
|
||||
bool readFieldNumber(UInt32 & field_number);
|
||||
bool readInt(Int64 & value);
|
||||
bool readSInt(Int64 & value);
|
||||
bool readUInt(UInt64 & value);
|
||||
template<typename T> bool readFixed(T & value);
|
||||
bool readStringInto(PaddedPODArray<UInt8> & str);
|
||||
|
||||
bool ALWAYS_INLINE canReadMoreValues() const { return cursor < field_end; }
|
||||
|
||||
private:
|
||||
void readBinary(void * data, size_t size);
|
||||
void ignore(UInt64 num_bytes);
|
||||
void moveCursorBackward(UInt64 num_bytes);
|
||||
|
||||
UInt64 ALWAYS_INLINE readVarint()
|
||||
{
|
||||
char c;
|
||||
in.readStrict(c);
|
||||
UInt64 first_byte = static_cast<UInt8>(c);
|
||||
++cursor;
|
||||
if (likely(!(c & 0x80)))
|
||||
return first_byte;
|
||||
return continueReadingVarint(first_byte);
|
||||
}
|
||||
|
||||
UInt64 continueReadingVarint(UInt64 first_byte);
|
||||
void ignoreVarint();
|
||||
void ignoreGroup();
|
||||
[[noreturn]] void throwUnknownFormat() const;
|
||||
|
||||
ReadBuffer & in;
|
||||
Int64 cursor;
|
||||
size_t current_message_level;
|
||||
Int64 current_message_end;
|
||||
std::vector<Int64> parent_message_ends;
|
||||
Int64 field_end;
|
||||
Int64 last_string_pos;
|
||||
const bool use_length_delimiters;
|
||||
};
|
||||
|
||||
class IConverter
|
||||
{
|
||||
public:
|
||||
virtual ~IConverter() = default;
|
||||
virtual bool readStringInto(PaddedPODArray<UInt8> &) = 0;
|
||||
virtual bool readInt8(Int8&) = 0;
|
||||
virtual bool readUInt8(UInt8 &) = 0;
|
||||
virtual bool readInt16(Int16 &) = 0;
|
||||
virtual bool readUInt16(UInt16 &) = 0;
|
||||
virtual bool readInt32(Int32 &) = 0;
|
||||
virtual bool readUInt32(UInt32 &) = 0;
|
||||
virtual bool readInt64(Int64 &) = 0;
|
||||
virtual bool readUInt64(UInt64 &) = 0;
|
||||
virtual bool readInt128(Int128 &) = 0;
|
||||
virtual bool readUInt128(UInt128 &) = 0;
|
||||
|
||||
virtual bool readInt256(Int256 &) = 0;
|
||||
virtual bool readUInt256(UInt256 &) = 0;
|
||||
|
||||
virtual bool readFloat32(Float32 &) = 0;
|
||||
virtual bool readFloat64(Float64 &) = 0;
|
||||
virtual void prepareEnumMapping8(const std::vector<std::pair<std::string, Int8>> &) = 0;
|
||||
virtual void prepareEnumMapping16(const std::vector<std::pair<std::string, Int16>> &) = 0;
|
||||
virtual bool readEnum8(Int8 &) = 0;
|
||||
virtual bool readEnum16(Int16 &) = 0;
|
||||
virtual bool readUUID(UUID &) = 0;
|
||||
virtual bool readDate(DayNum &) = 0;
|
||||
virtual bool readDateTime(time_t &) = 0;
|
||||
virtual bool readDateTime64(DateTime64 &, UInt32) = 0;
|
||||
virtual bool readDecimal32(Decimal32 &, UInt32, UInt32) = 0;
|
||||
virtual bool readDecimal64(Decimal64 &, UInt32, UInt32) = 0;
|
||||
virtual bool readDecimal128(Decimal128 &, UInt32, UInt32) = 0;
|
||||
virtual bool readDecimal256(Decimal256 &, UInt32, UInt32) = 0;
|
||||
virtual bool readAggregateFunction(const AggregateFunctionPtr &, AggregateDataPtr, Arena &) = 0;
|
||||
};
|
||||
|
||||
class ConverterBaseImpl;
|
||||
class ConverterFromString;
|
||||
template<int field_type_id, typename FromType> class ConverterFromNumber;
|
||||
class ConverterFromBool;
|
||||
class ConverterFromEnum;
|
||||
|
||||
struct ColumnMatcherTraits
|
||||
{
|
||||
struct FieldData
|
||||
{
|
||||
std::unique_ptr<IConverter> converter;
|
||||
};
|
||||
struct MessageData
|
||||
{
|
||||
std::unordered_map<UInt32, const ProtobufColumnMatcher::Field<ColumnMatcherTraits>*> field_number_to_field_map;
|
||||
};
|
||||
};
|
||||
using Message = ProtobufColumnMatcher::Message<ColumnMatcherTraits>;
|
||||
using Field = ProtobufColumnMatcher::Field<ColumnMatcherTraits>;
|
||||
|
||||
void setTraitsDataAfterMatchingColumns(Message * message);
|
||||
|
||||
template <int field_type_id>
|
||||
std::unique_ptr<IConverter> createConverter(const google::protobuf::FieldDescriptor * field);
|
||||
|
||||
SimpleReader simple_reader;
|
||||
std::unique_ptr<Message> root_message;
|
||||
Message* current_message = nullptr;
|
||||
size_t current_field_index = 0;
|
||||
IConverter* current_converter = nullptr;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class Arena;
|
||||
class IAggregateFunction;
|
||||
class ReadBuffer;
|
||||
using AggregateDataPtr = char *;
|
||||
using AggregateFunctionPtr = std::shared_ptr<IAggregateFunction>;
|
||||
|
||||
/// Utility class for reading in the Protobuf format.
|
||||
/// Knows nothing about protobuf schemas, just provides useful functions to serialize data.
|
||||
class ProtobufReader
|
||||
{
|
||||
public:
|
||||
bool startMessage() { return false; }
|
||||
void endMessage() {}
|
||||
bool readColumnIndex(size_t &) { return false; }
|
||||
bool readNumber(Int8 &) { return false; }
|
||||
bool readNumber(UInt8 &) { return false; }
|
||||
bool readNumber(Int16 &) { return false; }
|
||||
bool readNumber(UInt16 &) { return false; }
|
||||
bool readNumber(Int32 &) { return false; }
|
||||
bool readNumber(UInt32 &) { return false; }
|
||||
bool readNumber(Int64 &) { return false; }
|
||||
bool readNumber(UInt64 &) { return false; }
|
||||
bool readNumber(Int128 &) { return false; }
|
||||
bool readNumber(UInt128 &) { return false; }
|
||||
bool readNumber(Int256 &) { return false; }
|
||||
bool readNumber(UInt256 &) { return false; }
|
||||
bool readNumber(Float32 &) { return false; }
|
||||
bool readNumber(Float64 &) { return false; }
|
||||
bool readStringInto(PaddedPODArray<UInt8> &) { return false; }
|
||||
void prepareEnumMapping(const std::vector<std::pair<std::string, Int8>> &) {}
|
||||
void prepareEnumMapping(const std::vector<std::pair<std::string, Int16>> &) {}
|
||||
bool readEnum(Int8 &) { return false; }
|
||||
bool readEnum(Int16 &) { return false; }
|
||||
bool readUUID(UUID &) { return false; }
|
||||
bool readDate(DayNum &) { return false; }
|
||||
bool readDateTime(time_t &) { return false; }
|
||||
bool readDateTime64(DateTime64 & /*tm*/, UInt32 /*scale*/) { return false; }
|
||||
bool readDecimal(Decimal32 &, UInt32, UInt32) { return false; }
|
||||
bool readDecimal(Decimal64 &, UInt32, UInt32) { return false; }
|
||||
bool readDecimal(Decimal128 &, UInt32, UInt32) { return false; }
|
||||
bool readDecimal(Decimal256 &, UInt32, UInt32) { return false; }
|
||||
bool readAggregateFunction(const AggregateFunctionPtr &, AggregateDataPtr, Arena &) { return false; }
|
||||
bool canReadMoreValues() const { return false; }
|
||||
ProtobufReader(ReadBuffer & in_);
|
||||
|
||||
void startMessage(bool with_length_delimiter_);
|
||||
void endMessage(bool ignore_errors);
|
||||
void startNestedMessage();
|
||||
void endNestedMessage();
|
||||
|
||||
bool readFieldNumber(int & field_number);
|
||||
Int64 readInt();
|
||||
Int64 readSInt();
|
||||
UInt64 readUInt();
|
||||
template<typename T> T readFixed();
|
||||
|
||||
void readString(String & str);
|
||||
void readStringAndAppend(PaddedPODArray<UInt8> & str);
|
||||
|
||||
bool eof() const { return in.eof(); }
|
||||
|
||||
private:
|
||||
void readBinary(void * data, size_t size);
|
||||
void ignore(UInt64 num_bytes);
|
||||
void ignoreAll();
|
||||
void moveCursorBackward(UInt64 num_bytes);
|
||||
|
||||
UInt64 ALWAYS_INLINE readVarint()
|
||||
{
|
||||
char c;
|
||||
in.readStrict(c);
|
||||
UInt64 first_byte = static_cast<UInt8>(c);
|
||||
++cursor;
|
||||
if (likely(!(c & 0x80)))
|
||||
return first_byte;
|
||||
return continueReadingVarint(first_byte);
|
||||
}
|
||||
|
||||
UInt64 continueReadingVarint(UInt64 first_byte);
|
||||
void ignoreVarint();
|
||||
void ignoreGroup();
|
||||
[[noreturn]] void throwUnknownFormat() const;
|
||||
|
||||
ReadBuffer & in;
|
||||
Int64 cursor = 0;
|
||||
bool root_message_has_length_delimiter = false;
|
||||
size_t current_message_level = 0;
|
||||
Int64 current_message_end = 0;
|
||||
std::vector<Int64> parent_message_ends;
|
||||
int field_number = 0;
|
||||
int next_field_number = 0;
|
||||
Int64 field_end = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
2921
src/Formats/ProtobufSerializer.cpp
Normal file
2921
src/Formats/ProtobufSerializer.cpp
Normal file
File diff suppressed because it is too large
Load Diff
52
src/Formats/ProtobufSerializer.h
Normal file
52
src/Formats/ProtobufSerializer.h
Normal file
@ -0,0 +1,52 @@
|
||||
#pragma once
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include "config_formats.h"
|
||||
#endif
|
||||
|
||||
#if USE_PROTOBUF
|
||||
# include <Columns/IColumn.h>
|
||||
|
||||
|
||||
namespace google::protobuf { class Descriptor; }
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class ProtobufReader;
|
||||
class ProtobufWriter;
|
||||
class IDataType;
|
||||
using DataTypePtr = std::shared_ptr<const IDataType>;
|
||||
using DataTypes = std::vector<DataTypePtr>;
|
||||
|
||||
|
||||
/// Utility class, does all the work for serialization in the Protobuf format.
|
||||
class ProtobufSerializer
|
||||
{
|
||||
public:
|
||||
virtual ~ProtobufSerializer() = default;
|
||||
|
||||
virtual void setColumns(const ColumnPtr * columns, size_t num_columns) = 0;
|
||||
virtual void writeRow(size_t row_num) = 0;
|
||||
|
||||
virtual void setColumns(const MutableColumnPtr * columns, size_t num_columns) = 0;
|
||||
virtual void readRow(size_t row_num) = 0;
|
||||
virtual void insertDefaults(size_t row_num) = 0;
|
||||
|
||||
static std::unique_ptr<ProtobufSerializer> create(
|
||||
const Strings & column_names,
|
||||
const DataTypes & data_types,
|
||||
std::vector<size_t> & missing_column_indices,
|
||||
const google::protobuf::Descriptor & message_descriptor,
|
||||
bool with_length_delimiter,
|
||||
ProtobufReader & reader);
|
||||
|
||||
static std::unique_ptr<ProtobufSerializer> create(
|
||||
const Strings & column_names,
|
||||
const DataTypes & data_types,
|
||||
const google::protobuf::Descriptor & message_descriptor,
|
||||
bool with_length_delimiter,
|
||||
ProtobufWriter & writer);
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
@ -1,29 +1,11 @@
|
||||
#include "ProtobufWriter.h"
|
||||
|
||||
#if USE_PROTOBUF
|
||||
# include <cassert>
|
||||
# include <optional>
|
||||
# include <math.h>
|
||||
# include <AggregateFunctions/IAggregateFunction.h>
|
||||
# include <DataTypes/DataTypesDecimal.h>
|
||||
# include <IO/ReadHelpers.h>
|
||||
# include <IO/WriteHelpers.h>
|
||||
# include <boost/numeric/conversion/cast.hpp>
|
||||
# include <google/protobuf/descriptor.h>
|
||||
# include <google/protobuf/descriptor.pb.h>
|
||||
# include <IO/WriteHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int NO_DATA_FOR_REQUIRED_PROTOBUF_FIELD;
|
||||
extern const int PROTOBUF_BAD_CAST;
|
||||
extern const int PROTOBUF_FIELD_NOT_REPEATED;
|
||||
}
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
constexpr size_t MAX_VARINT_SIZE = 10;
|
||||
@ -81,66 +63,24 @@ namespace
|
||||
}
|
||||
|
||||
void writeFieldNumber(UInt32 field_number, WireType wire_type, PODArray<UInt8> & buf) { writeVarint((field_number << 3) | wire_type, buf); }
|
||||
|
||||
// Should we pack repeated values while storing them.
|
||||
// It depends on type of the field in the protobuf schema and the syntax of that schema.
|
||||
bool shouldPackRepeated(const google::protobuf::FieldDescriptor * field)
|
||||
{
|
||||
if (!field->is_repeated())
|
||||
return false;
|
||||
switch (field->type())
|
||||
{
|
||||
case google::protobuf::FieldDescriptor::TYPE_INT32:
|
||||
case google::protobuf::FieldDescriptor::TYPE_UINT32:
|
||||
case google::protobuf::FieldDescriptor::TYPE_SINT32:
|
||||
case google::protobuf::FieldDescriptor::TYPE_INT64:
|
||||
case google::protobuf::FieldDescriptor::TYPE_UINT64:
|
||||
case google::protobuf::FieldDescriptor::TYPE_SINT64:
|
||||
case google::protobuf::FieldDescriptor::TYPE_FIXED32:
|
||||
case google::protobuf::FieldDescriptor::TYPE_SFIXED32:
|
||||
case google::protobuf::FieldDescriptor::TYPE_FIXED64:
|
||||
case google::protobuf::FieldDescriptor::TYPE_SFIXED64:
|
||||
case google::protobuf::FieldDescriptor::TYPE_FLOAT:
|
||||
case google::protobuf::FieldDescriptor::TYPE_DOUBLE:
|
||||
case google::protobuf::FieldDescriptor::TYPE_BOOL:
|
||||
case google::protobuf::FieldDescriptor::TYPE_ENUM:
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
if (field->options().has_packed())
|
||||
return field->options().packed();
|
||||
return field->file()->syntax() == google::protobuf::FileDescriptor::SYNTAX_PROTO3;
|
||||
}
|
||||
|
||||
// Should we omit null values (zero for numbers / empty string for strings) while storing them.
|
||||
bool shouldSkipNullValue(const google::protobuf::FieldDescriptor * field)
|
||||
{
|
||||
return field->is_optional() && (field->file()->syntax() == google::protobuf::FileDescriptor::SYNTAX_PROTO3);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// SimpleWriter is an utility class to serialize protobufs.
|
||||
// Knows nothing about protobuf schemas, just provides useful functions to serialize data.
|
||||
ProtobufWriter::SimpleWriter::SimpleWriter(WriteBuffer & out_, const bool use_length_delimiters_)
|
||||
ProtobufWriter::ProtobufWriter(WriteBuffer & out_)
|
||||
: out(out_)
|
||||
, current_piece_start(0)
|
||||
, num_bytes_skipped(0)
|
||||
, use_length_delimiters(use_length_delimiters_)
|
||||
{
|
||||
}
|
||||
|
||||
ProtobufWriter::SimpleWriter::~SimpleWriter() = default;
|
||||
ProtobufWriter::~ProtobufWriter() = default;
|
||||
|
||||
void ProtobufWriter::SimpleWriter::startMessage()
|
||||
void ProtobufWriter::startMessage()
|
||||
{
|
||||
}
|
||||
|
||||
void ProtobufWriter::SimpleWriter::endMessage()
|
||||
void ProtobufWriter::endMessage(bool with_length_delimiter)
|
||||
{
|
||||
pieces.emplace_back(current_piece_start, buffer.size());
|
||||
if (use_length_delimiters)
|
||||
if (with_length_delimiter)
|
||||
{
|
||||
size_t size_of_message = buffer.size() - num_bytes_skipped;
|
||||
writeVarint(size_of_message, out);
|
||||
@ -154,7 +94,7 @@ void ProtobufWriter::SimpleWriter::endMessage()
|
||||
current_piece_start = 0;
|
||||
}
|
||||
|
||||
void ProtobufWriter::SimpleWriter::startNestedMessage()
|
||||
void ProtobufWriter::startNestedMessage()
|
||||
{
|
||||
nested_infos.emplace_back(pieces.size(), num_bytes_skipped);
|
||||
pieces.emplace_back(current_piece_start, buffer.size());
|
||||
@ -167,7 +107,7 @@ void ProtobufWriter::SimpleWriter::startNestedMessage()
|
||||
num_bytes_skipped = NESTED_MESSAGE_PADDING;
|
||||
}
|
||||
|
||||
void ProtobufWriter::SimpleWriter::endNestedMessage(UInt32 field_number, bool is_group, bool skip_if_empty)
|
||||
void ProtobufWriter::endNestedMessage(int field_number, bool is_group, bool skip_if_empty)
|
||||
{
|
||||
const auto & nested_info = nested_infos.back();
|
||||
size_t num_pieces_at_start = nested_info.num_pieces_at_start;
|
||||
@ -203,8 +143,13 @@ void ProtobufWriter::SimpleWriter::endNestedMessage(UInt32 field_number, bool is
|
||||
num_bytes_skipped += num_bytes_skipped_at_start - num_bytes_inserted;
|
||||
}
|
||||
|
||||
void ProtobufWriter::SimpleWriter::writeUInt(UInt32 field_number, UInt64 value)
|
||||
void ProtobufWriter::writeUInt(int field_number, UInt64 value)
|
||||
{
|
||||
if (in_repeated_pack)
|
||||
{
|
||||
writeVarint(value, buffer);
|
||||
return;
|
||||
}
|
||||
size_t old_size = buffer.size();
|
||||
buffer.reserve(old_size + 2 * MAX_VARINT_SIZE);
|
||||
UInt8 * ptr = buffer.data() + old_size;
|
||||
@ -213,20 +158,27 @@ void ProtobufWriter::SimpleWriter::writeUInt(UInt32 field_number, UInt64 value)
|
||||
buffer.resize_assume_reserved(ptr - buffer.data());
|
||||
}
|
||||
|
||||
void ProtobufWriter::SimpleWriter::writeInt(UInt32 field_number, Int64 value)
|
||||
void ProtobufWriter::writeInt(int field_number, Int64 value)
|
||||
{
|
||||
writeUInt(field_number, static_cast<UInt64>(value));
|
||||
}
|
||||
|
||||
void ProtobufWriter::SimpleWriter::writeSInt(UInt32 field_number, Int64 value)
|
||||
void ProtobufWriter::writeSInt(int field_number, Int64 value)
|
||||
{
|
||||
writeUInt(field_number, encodeZigZag(value));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ProtobufWriter::SimpleWriter::writeFixed(UInt32 field_number, T value)
|
||||
void ProtobufWriter::writeFixed(int field_number, T value)
|
||||
{
|
||||
static_assert((sizeof(T) == 4) || (sizeof(T) == 8));
|
||||
if (in_repeated_pack)
|
||||
{
|
||||
size_t old_size = buffer.size();
|
||||
buffer.resize(old_size + sizeof(T));
|
||||
memcpy(buffer.data() + old_size, &value, sizeof(T));
|
||||
return;
|
||||
}
|
||||
constexpr WireType wire_type = (sizeof(T) == 4) ? BITS32 : BITS64;
|
||||
size_t old_size = buffer.size();
|
||||
buffer.reserve(old_size + MAX_VARINT_SIZE + sizeof(T));
|
||||
@ -237,19 +189,27 @@ void ProtobufWriter::SimpleWriter::writeFixed(UInt32 field_number, T value)
|
||||
buffer.resize_assume_reserved(ptr - buffer.data());
|
||||
}
|
||||
|
||||
void ProtobufWriter::SimpleWriter::writeString(UInt32 field_number, const StringRef & str)
|
||||
template void ProtobufWriter::writeFixed<Int32>(int field_number, Int32 value);
|
||||
template void ProtobufWriter::writeFixed<UInt32>(int field_number, UInt32 value);
|
||||
template void ProtobufWriter::writeFixed<Int64>(int field_number, Int64 value);
|
||||
template void ProtobufWriter::writeFixed<UInt64>(int field_number, UInt64 value);
|
||||
template void ProtobufWriter::writeFixed<Float32>(int field_number, Float32 value);
|
||||
template void ProtobufWriter::writeFixed<Float64>(int field_number, Float64 value);
|
||||
|
||||
void ProtobufWriter::writeString(int field_number, const std::string_view & str)
|
||||
{
|
||||
size_t length = str.length();
|
||||
size_t old_size = buffer.size();
|
||||
buffer.reserve(old_size + 2 * MAX_VARINT_SIZE + str.size);
|
||||
buffer.reserve(old_size + 2 * MAX_VARINT_SIZE + length);
|
||||
UInt8 * ptr = buffer.data() + old_size;
|
||||
ptr = writeFieldNumber(field_number, LENGTH_DELIMITED, ptr);
|
||||
ptr = writeVarint(str.size, ptr);
|
||||
memcpy(ptr, str.data, str.size);
|
||||
ptr += str.size;
|
||||
ptr = writeVarint(length, ptr);
|
||||
memcpy(ptr, str.data(), length);
|
||||
ptr += length;
|
||||
buffer.resize_assume_reserved(ptr - buffer.data());
|
||||
}
|
||||
|
||||
void ProtobufWriter::SimpleWriter::startRepeatedPack()
|
||||
void ProtobufWriter::startRepeatedPack()
|
||||
{
|
||||
pieces.emplace_back(current_piece_start, buffer.size());
|
||||
|
||||
@ -259,17 +219,19 @@ void ProtobufWriter::SimpleWriter::startRepeatedPack()
|
||||
current_piece_start = buffer.size() + REPEATED_PACK_PADDING;
|
||||
buffer.resize(current_piece_start);
|
||||
num_bytes_skipped += REPEATED_PACK_PADDING;
|
||||
in_repeated_pack = true;
|
||||
}
|
||||
|
||||
void ProtobufWriter::SimpleWriter::endRepeatedPack(UInt32 field_number)
|
||||
void ProtobufWriter::endRepeatedPack(int field_number, bool skip_if_empty)
|
||||
{
|
||||
size_t size = buffer.size() - current_piece_start;
|
||||
if (!size)
|
||||
if (!size && skip_if_empty)
|
||||
{
|
||||
current_piece_start = pieces.back().start;
|
||||
buffer.resize(pieces.back().end);
|
||||
pieces.pop_back();
|
||||
num_bytes_skipped -= REPEATED_PACK_PADDING;
|
||||
in_repeated_pack = false;
|
||||
return;
|
||||
}
|
||||
UInt8 * ptr = &buffer[pieces.back().end];
|
||||
@ -278,726 +240,7 @@ void ProtobufWriter::SimpleWriter::endRepeatedPack(UInt32 field_number)
|
||||
size_t num_bytes_inserted = endptr - ptr;
|
||||
pieces.back().end += num_bytes_inserted;
|
||||
num_bytes_skipped -= num_bytes_inserted;
|
||||
}
|
||||
|
||||
void ProtobufWriter::SimpleWriter::addUIntToRepeatedPack(UInt64 value)
|
||||
{
|
||||
writeVarint(value, buffer);
|
||||
}
|
||||
|
||||
void ProtobufWriter::SimpleWriter::addIntToRepeatedPack(Int64 value)
|
||||
{
|
||||
writeVarint(static_cast<UInt64>(value), buffer);
|
||||
}
|
||||
|
||||
void ProtobufWriter::SimpleWriter::addSIntToRepeatedPack(Int64 value)
|
||||
{
|
||||
writeVarint(encodeZigZag(value), buffer);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ProtobufWriter::SimpleWriter::addFixedToRepeatedPack(T value)
|
||||
{
|
||||
static_assert((sizeof(T) == 4) || (sizeof(T) == 8));
|
||||
size_t old_size = buffer.size();
|
||||
buffer.resize(old_size + sizeof(T));
|
||||
memcpy(buffer.data() + old_size, &value, sizeof(T));
|
||||
}
|
||||
|
||||
|
||||
// Implementation for a converter from any DB data type to any protobuf field type.
|
||||
class ProtobufWriter::ConverterBaseImpl : public IConverter
|
||||
{
|
||||
public:
|
||||
ConverterBaseImpl(SimpleWriter & simple_writer_, const google::protobuf::FieldDescriptor * field_)
|
||||
: simple_writer(simple_writer_), field(field_)
|
||||
{
|
||||
field_number = field->number();
|
||||
}
|
||||
|
||||
virtual void writeString(const StringRef &) override { cannotConvertType("String"); }
|
||||
virtual void writeInt8(Int8) override { cannotConvertType("Int8"); }
|
||||
virtual void writeUInt8(UInt8) override { cannotConvertType("UInt8"); }
|
||||
virtual void writeInt16(Int16) override { cannotConvertType("Int16"); }
|
||||
virtual void writeUInt16(UInt16) override { cannotConvertType("UInt16"); }
|
||||
virtual void writeInt32(Int32) override { cannotConvertType("Int32"); }
|
||||
virtual void writeUInt32(UInt32) override { cannotConvertType("UInt32"); }
|
||||
virtual void writeInt64(Int64) override { cannotConvertType("Int64"); }
|
||||
virtual void writeUInt64(UInt64) override { cannotConvertType("UInt64"); }
|
||||
virtual void writeInt128(Int128) override { cannotConvertType("Int128"); }
|
||||
virtual void writeUInt128(const UInt128 &) override { cannotConvertType("UInt128"); }
|
||||
virtual void writeInt256(const Int256 &) override { cannotConvertType("Int256"); }
|
||||
virtual void writeUInt256(const UInt256 &) override { cannotConvertType("UInt256"); }
|
||||
virtual void writeFloat32(Float32) override { cannotConvertType("Float32"); }
|
||||
virtual void writeFloat64(Float64) override { cannotConvertType("Float64"); }
|
||||
virtual void prepareEnumMapping8(const std::vector<std::pair<std::string, Int8>> &) override {}
|
||||
virtual void prepareEnumMapping16(const std::vector<std::pair<std::string, Int16>> &) override {}
|
||||
virtual void writeEnum8(Int8) override { cannotConvertType("Enum"); }
|
||||
virtual void writeEnum16(Int16) override { cannotConvertType("Enum"); }
|
||||
virtual void writeUUID(const UUID &) override { cannotConvertType("UUID"); }
|
||||
virtual void writeDate(DayNum) override { cannotConvertType("Date"); }
|
||||
virtual void writeDateTime(time_t) override { cannotConvertType("DateTime"); }
|
||||
virtual void writeDateTime64(DateTime64, UInt32) override { cannotConvertType("DateTime64"); }
|
||||
virtual void writeDecimal32(Decimal32, UInt32) override { cannotConvertType("Decimal32"); }
|
||||
virtual void writeDecimal64(Decimal64, UInt32) override { cannotConvertType("Decimal64"); }
|
||||
virtual void writeDecimal128(const Decimal128 &, UInt32) override { cannotConvertType("Decimal128"); }
|
||||
virtual void writeDecimal256(const Decimal256 &, UInt32) override { cannotConvertType("Decimal256"); }
|
||||
|
||||
virtual void writeAggregateFunction(const AggregateFunctionPtr &, ConstAggregateDataPtr) override { cannotConvertType("AggregateFunction"); }
|
||||
|
||||
protected:
|
||||
[[noreturn]] void cannotConvertType(const String & type_name)
|
||||
{
|
||||
throw Exception(
|
||||
"Could not convert data type '" + type_name + "' to protobuf type '" + field->type_name() + "' (field: " + field->name() + ")",
|
||||
ErrorCodes::PROTOBUF_BAD_CAST);
|
||||
}
|
||||
|
||||
[[noreturn]] void cannotConvertValue(const String & value)
|
||||
{
|
||||
throw Exception(
|
||||
"Could not convert value '" + value + "' to protobuf type '" + field->type_name() + "' (field: " + field->name() + ")",
|
||||
ErrorCodes::PROTOBUF_BAD_CAST);
|
||||
}
|
||||
|
||||
template <typename To, typename From>
|
||||
To numericCast(From value)
|
||||
{
|
||||
if constexpr (std::is_same_v<To, From>)
|
||||
return value;
|
||||
To result;
|
||||
try
|
||||
{
|
||||
result = boost::numeric_cast<To>(value);
|
||||
}
|
||||
catch (boost::numeric::bad_numeric_cast &)
|
||||
{
|
||||
cannotConvertValue(toString(value));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename To>
|
||||
To parseFromString(const StringRef & str)
|
||||
{
|
||||
To result;
|
||||
try
|
||||
{
|
||||
result = ::DB::parse<To>(str.data, str.size);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
cannotConvertValue(str.toString());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
SimpleWriter & simple_writer;
|
||||
const google::protobuf::FieldDescriptor * field;
|
||||
UInt32 field_number;
|
||||
};
|
||||
|
||||
|
||||
template <bool skip_null_value>
|
||||
class ProtobufWriter::ConverterToString : public ConverterBaseImpl
|
||||
{
|
||||
public:
|
||||
using ConverterBaseImpl::ConverterBaseImpl;
|
||||
|
||||
void writeString(const StringRef & str) override { writeField(str); }
|
||||
|
||||
void writeInt8(Int8 value) override { convertToStringAndWriteField(value); }
|
||||
void writeUInt8(UInt8 value) override { convertToStringAndWriteField(value); }
|
||||
void writeInt16(Int16 value) override { convertToStringAndWriteField(value); }
|
||||
void writeUInt16(UInt16 value) override { convertToStringAndWriteField(value); }
|
||||
void writeInt32(Int32 value) override { convertToStringAndWriteField(value); }
|
||||
void writeUInt32(UInt32 value) override { convertToStringAndWriteField(value); }
|
||||
void writeInt64(Int64 value) override { convertToStringAndWriteField(value); }
|
||||
void writeUInt64(UInt64 value) override { convertToStringAndWriteField(value); }
|
||||
void writeFloat32(Float32 value) override { convertToStringAndWriteField(value); }
|
||||
void writeFloat64(Float64 value) override { convertToStringAndWriteField(value); }
|
||||
|
||||
void prepareEnumMapping8(const std::vector<std::pair<String, Int8>> & name_value_pairs) override
|
||||
{
|
||||
prepareEnumValueToNameMap(name_value_pairs);
|
||||
}
|
||||
void prepareEnumMapping16(const std::vector<std::pair<String, Int16>> & name_value_pairs) override
|
||||
{
|
||||
prepareEnumValueToNameMap(name_value_pairs);
|
||||
}
|
||||
|
||||
void writeEnum8(Int8 value) override { writeEnum16(value); }
|
||||
|
||||
void writeEnum16(Int16 value) override
|
||||
{
|
||||
auto it = enum_value_to_name_map->find(value);
|
||||
if (it == enum_value_to_name_map->end())
|
||||
cannotConvertValue(toString(value));
|
||||
writeField(it->second);
|
||||
}
|
||||
|
||||
void writeUUID(const UUID & uuid) override { convertToStringAndWriteField(uuid); }
|
||||
void writeDate(DayNum date) override { convertToStringAndWriteField(date); }
|
||||
|
||||
void writeDateTime(time_t tm) override
|
||||
{
|
||||
writeDateTimeText(tm, text_buffer);
|
||||
writeField(text_buffer.stringRef());
|
||||
text_buffer.restart();
|
||||
}
|
||||
|
||||
void writeDateTime64(DateTime64 date_time, UInt32 scale) override
|
||||
{
|
||||
writeDateTimeText(date_time, scale, text_buffer);
|
||||
writeField(text_buffer.stringRef());
|
||||
text_buffer.restart();
|
||||
}
|
||||
|
||||
void writeDecimal32(Decimal32 decimal, UInt32 scale) override { writeDecimal(decimal, scale); }
|
||||
void writeDecimal64(Decimal64 decimal, UInt32 scale) override { writeDecimal(decimal, scale); }
|
||||
void writeDecimal128(const Decimal128 & decimal, UInt32 scale) override { writeDecimal(decimal, scale); }
|
||||
|
||||
void writeAggregateFunction(const AggregateFunctionPtr & function, ConstAggregateDataPtr place) override
|
||||
{
|
||||
function->serialize(place, text_buffer);
|
||||
writeField(text_buffer.stringRef());
|
||||
text_buffer.restart();
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
void convertToStringAndWriteField(T value)
|
||||
{
|
||||
writeText(value, text_buffer);
|
||||
writeField(text_buffer.stringRef());
|
||||
text_buffer.restart();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void writeDecimal(const Decimal<T> & decimal, UInt32 scale)
|
||||
{
|
||||
writeText(decimal, scale, text_buffer);
|
||||
writeField(text_buffer.stringRef());
|
||||
text_buffer.restart();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void prepareEnumValueToNameMap(const std::vector<std::pair<String, T>> & name_value_pairs)
|
||||
{
|
||||
if (enum_value_to_name_map.has_value())
|
||||
return;
|
||||
enum_value_to_name_map.emplace();
|
||||
for (const auto & name_value_pair : name_value_pairs)
|
||||
enum_value_to_name_map->emplace(name_value_pair.second, name_value_pair.first);
|
||||
}
|
||||
|
||||
void writeField(const StringRef & str)
|
||||
{
|
||||
if constexpr (skip_null_value)
|
||||
{
|
||||
if (!str.size)
|
||||
return;
|
||||
}
|
||||
simple_writer.writeString(field_number, str);
|
||||
}
|
||||
|
||||
WriteBufferFromOwnString text_buffer;
|
||||
std::optional<std::unordered_map<Int16, String>> enum_value_to_name_map;
|
||||
};
|
||||
|
||||
# define PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_STRINGS(field_type_id) \
|
||||
template <> \
|
||||
std::unique_ptr<ProtobufWriter::IConverter> ProtobufWriter::createConverter<field_type_id>( \
|
||||
const google::protobuf::FieldDescriptor * field) \
|
||||
{ \
|
||||
if (shouldSkipNullValue(field)) \
|
||||
return std::make_unique<ConverterToString<true>>(simple_writer, field); \
|
||||
else \
|
||||
return std::make_unique<ConverterToString<false>>(simple_writer, field); \
|
||||
}
|
||||
PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_STRINGS(google::protobuf::FieldDescriptor::TYPE_STRING)
|
||||
PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_STRINGS(google::protobuf::FieldDescriptor::TYPE_BYTES)
|
||||
# undef PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_STRINGS
|
||||
|
||||
|
||||
template <int field_type_id, typename ToType, bool skip_null_value, bool pack_repeated>
|
||||
class ProtobufWriter::ConverterToNumber : public ConverterBaseImpl
|
||||
{
|
||||
public:
|
||||
using ConverterBaseImpl::ConverterBaseImpl;
|
||||
|
||||
void writeString(const StringRef & str) override { writeField(parseFromString<ToType>(str)); }
|
||||
|
||||
void writeInt8(Int8 value) override { castNumericAndWriteField(value); }
|
||||
void writeUInt8(UInt8 value) override { castNumericAndWriteField(value); }
|
||||
void writeInt16(Int16 value) override { castNumericAndWriteField(value); }
|
||||
void writeUInt16(UInt16 value) override { castNumericAndWriteField(value); }
|
||||
void writeInt32(Int32 value) override { castNumericAndWriteField(value); }
|
||||
void writeUInt32(UInt32 value) override { castNumericAndWriteField(value); }
|
||||
void writeInt64(Int64 value) override { castNumericAndWriteField(value); }
|
||||
void writeUInt64(UInt64 value) override { castNumericAndWriteField(value); }
|
||||
void writeFloat32(Float32 value) override { castNumericAndWriteField(value); }
|
||||
void writeFloat64(Float64 value) override { castNumericAndWriteField(value); }
|
||||
|
||||
void writeEnum8(Int8 value) override { writeEnum16(value); }
|
||||
|
||||
void writeEnum16(Int16 value) override
|
||||
{
|
||||
if constexpr (!is_integer_v<ToType>)
|
||||
cannotConvertType("Enum"); // It's not correct to convert enum to floating point.
|
||||
castNumericAndWriteField(value);
|
||||
}
|
||||
|
||||
void writeDate(DayNum date) override { castNumericAndWriteField(static_cast<UInt16>(date)); }
|
||||
void writeDateTime(time_t tm) override { castNumericAndWriteField(tm); }
|
||||
void writeDateTime64(DateTime64 date_time, UInt32 scale) override { writeDecimal(date_time, scale); }
|
||||
void writeDecimal32(Decimal32 decimal, UInt32 scale) override { writeDecimal(decimal, scale); }
|
||||
void writeDecimal64(Decimal64 decimal, UInt32 scale) override { writeDecimal(decimal, scale); }
|
||||
void writeDecimal128(const Decimal128 & decimal, UInt32 scale) override { writeDecimal(decimal, scale); }
|
||||
|
||||
private:
|
||||
template <typename FromType>
|
||||
void castNumericAndWriteField(FromType value)
|
||||
{
|
||||
writeField(numericCast<ToType>(value));
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
void writeDecimal(const Decimal<S> & decimal, UInt32 scale)
|
||||
{
|
||||
castNumericAndWriteField(DecimalUtils::convertTo<ToType>(decimal, scale));
|
||||
}
|
||||
|
||||
void writeField(ToType value)
|
||||
{
|
||||
if constexpr (skip_null_value)
|
||||
{
|
||||
if (value == 0)
|
||||
return;
|
||||
}
|
||||
if constexpr (((field_type_id == google::protobuf::FieldDescriptor::TYPE_INT32) && std::is_same_v<ToType, Int32>)
|
||||
|| ((field_type_id == google::protobuf::FieldDescriptor::TYPE_INT64) && std::is_same_v<ToType, Int64>))
|
||||
{
|
||||
if constexpr (pack_repeated)
|
||||
simple_writer.addIntToRepeatedPack(value);
|
||||
else
|
||||
simple_writer.writeInt(field_number, value);
|
||||
}
|
||||
else if constexpr (((field_type_id == google::protobuf::FieldDescriptor::TYPE_SINT32) && std::is_same_v<ToType, Int32>)
|
||||
|| ((field_type_id == google::protobuf::FieldDescriptor::TYPE_SINT64) && std::is_same_v<ToType, Int64>))
|
||||
{
|
||||
if constexpr (pack_repeated)
|
||||
simple_writer.addSIntToRepeatedPack(value);
|
||||
else
|
||||
simple_writer.writeSInt(field_number, value);
|
||||
}
|
||||
else if constexpr (((field_type_id == google::protobuf::FieldDescriptor::TYPE_UINT32) && std::is_same_v<ToType, UInt32>)
|
||||
|| ((field_type_id == google::protobuf::FieldDescriptor::TYPE_UINT64) && std::is_same_v<ToType, UInt64>))
|
||||
{
|
||||
if constexpr (pack_repeated)
|
||||
simple_writer.addUIntToRepeatedPack(value);
|
||||
else
|
||||
simple_writer.writeUInt(field_number, value);
|
||||
}
|
||||
else
|
||||
{
|
||||
static_assert(((field_type_id == google::protobuf::FieldDescriptor::TYPE_FIXED32) && std::is_same_v<ToType, UInt32>)
|
||||
|| ((field_type_id == google::protobuf::FieldDescriptor::TYPE_SFIXED32) && std::is_same_v<ToType, Int32>)
|
||||
|| ((field_type_id == google::protobuf::FieldDescriptor::TYPE_FIXED64) && std::is_same_v<ToType, UInt64>)
|
||||
|| ((field_type_id == google::protobuf::FieldDescriptor::TYPE_SFIXED64) && std::is_same_v<ToType, Int64>)
|
||||
|| ((field_type_id == google::protobuf::FieldDescriptor::TYPE_FLOAT) && std::is_same_v<ToType, float>)
|
||||
|| ((field_type_id == google::protobuf::FieldDescriptor::TYPE_DOUBLE) && std::is_same_v<ToType, double>));
|
||||
if constexpr (pack_repeated)
|
||||
simple_writer.addFixedToRepeatedPack(value);
|
||||
else
|
||||
simple_writer.writeFixed(field_number, value);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
# define PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(field_type_id, field_type) \
|
||||
template <> \
|
||||
std::unique_ptr<ProtobufWriter::IConverter> ProtobufWriter::createConverter<field_type_id>( \
|
||||
const google::protobuf::FieldDescriptor * field) \
|
||||
{ \
|
||||
if (shouldSkipNullValue(field)) \
|
||||
return std::make_unique<ConverterToNumber<field_type_id, field_type, true, false>>(simple_writer, field); \
|
||||
else if (shouldPackRepeated(field)) \
|
||||
return std::make_unique<ConverterToNumber<field_type_id, field_type, false, true>>(simple_writer, field); \
|
||||
else \
|
||||
return std::make_unique<ConverterToNumber<field_type_id, field_type, false, false>>(simple_writer, field); \
|
||||
}
|
||||
|
||||
PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_INT32, Int32);
|
||||
PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_SINT32, Int32);
|
||||
PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_UINT32, UInt32);
|
||||
PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_INT64, Int64);
|
||||
PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_SINT64, Int64);
|
||||
PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_UINT64, UInt64);
|
||||
PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_FIXED32, UInt32);
|
||||
PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_SFIXED32, Int32);
|
||||
PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_FIXED64, UInt64);
|
||||
PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_SFIXED64, Int64);
|
||||
PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_FLOAT, float);
|
||||
PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_DOUBLE, double);
|
||||
# undef PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS
|
||||
|
||||
|
||||
template <bool skip_null_value, bool pack_repeated>
|
||||
class ProtobufWriter::ConverterToBool : public ConverterBaseImpl
|
||||
{
|
||||
public:
|
||||
using ConverterBaseImpl::ConverterBaseImpl;
|
||||
|
||||
void writeString(const StringRef & str) override
|
||||
{
|
||||
if (str == "true")
|
||||
writeField(true);
|
||||
else if (str == "false")
|
||||
writeField(false);
|
||||
else
|
||||
cannotConvertValue(str.toString());
|
||||
}
|
||||
|
||||
void writeInt8(Int8 value) override { convertToBoolAndWriteField(value); }
|
||||
void writeUInt8(UInt8 value) override { convertToBoolAndWriteField(value); }
|
||||
void writeInt16(Int16 value) override { convertToBoolAndWriteField(value); }
|
||||
void writeUInt16(UInt16 value) override { convertToBoolAndWriteField(value); }
|
||||
void writeInt32(Int32 value) override { convertToBoolAndWriteField(value); }
|
||||
void writeUInt32(UInt32 value) override { convertToBoolAndWriteField(value); }
|
||||
void writeInt64(Int64 value) override { convertToBoolAndWriteField(value); }
|
||||
void writeUInt64(UInt64 value) override { convertToBoolAndWriteField(value); }
|
||||
void writeFloat32(Float32 value) override { convertToBoolAndWriteField(value); }
|
||||
void writeFloat64(Float64 value) override { convertToBoolAndWriteField(value); }
|
||||
void writeDecimal32(Decimal32 decimal, UInt32) override { convertToBoolAndWriteField(decimal.value); }
|
||||
void writeDecimal64(Decimal64 decimal, UInt32) override { convertToBoolAndWriteField(decimal.value); }
|
||||
void writeDecimal128(const Decimal128 & decimal, UInt32) override { convertToBoolAndWriteField(decimal.value); }
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
void convertToBoolAndWriteField(T value)
|
||||
{
|
||||
writeField(static_cast<bool>(value));
|
||||
}
|
||||
|
||||
void writeField(bool b)
|
||||
{
|
||||
if constexpr (skip_null_value)
|
||||
{
|
||||
if (!b)
|
||||
return;
|
||||
}
|
||||
if constexpr (pack_repeated)
|
||||
simple_writer.addUIntToRepeatedPack(b);
|
||||
else
|
||||
simple_writer.writeUInt(field_number, b);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
std::unique_ptr<ProtobufWriter::IConverter> ProtobufWriter::createConverter<google::protobuf::FieldDescriptor::TYPE_BOOL>(
|
||||
const google::protobuf::FieldDescriptor * field)
|
||||
{
|
||||
if (shouldSkipNullValue(field))
|
||||
return std::make_unique<ConverterToBool<true, false>>(simple_writer, field);
|
||||
else if (shouldPackRepeated(field))
|
||||
return std::make_unique<ConverterToBool<false, true>>(simple_writer, field);
|
||||
else
|
||||
return std::make_unique<ConverterToBool<false, false>>(simple_writer, field);
|
||||
}
|
||||
|
||||
|
||||
template <bool skip_null_value, bool pack_repeated>
|
||||
class ProtobufWriter::ConverterToEnum : public ConverterBaseImpl
|
||||
{
|
||||
public:
|
||||
using ConverterBaseImpl::ConverterBaseImpl;
|
||||
|
||||
void writeString(const StringRef & str) override
|
||||
{
|
||||
prepareEnumNameToPbNumberMap();
|
||||
auto it = enum_name_to_pbnumber_map->find(str);
|
||||
if (it == enum_name_to_pbnumber_map->end())
|
||||
cannotConvertValue(str.toString());
|
||||
writeField(it->second);
|
||||
}
|
||||
|
||||
void writeInt8(Int8 value) override { convertToEnumAndWriteField(value); }
|
||||
void writeUInt8(UInt8 value) override { convertToEnumAndWriteField(value); }
|
||||
void writeInt16(Int16 value) override { convertToEnumAndWriteField(value); }
|
||||
void writeUInt16(UInt16 value) override { convertToEnumAndWriteField(value); }
|
||||
void writeInt32(Int32 value) override { convertToEnumAndWriteField(value); }
|
||||
void writeUInt32(UInt32 value) override { convertToEnumAndWriteField(value); }
|
||||
void writeInt64(Int64 value) override { convertToEnumAndWriteField(value); }
|
||||
void writeUInt64(UInt64 value) override { convertToEnumAndWriteField(value); }
|
||||
|
||||
void prepareEnumMapping8(const std::vector<std::pair<String, Int8>> & name_value_pairs) override
|
||||
{
|
||||
prepareEnumValueToPbNumberMap(name_value_pairs);
|
||||
}
|
||||
void prepareEnumMapping16(const std::vector<std::pair<String, Int16>> & name_value_pairs) override
|
||||
{
|
||||
prepareEnumValueToPbNumberMap(name_value_pairs);
|
||||
}
|
||||
|
||||
void writeEnum8(Int8 value) override { writeEnum16(value); }
|
||||
|
||||
void writeEnum16(Int16 value) override
|
||||
{
|
||||
int pbnumber;
|
||||
if (enum_value_always_equals_pbnumber)
|
||||
pbnumber = value;
|
||||
else
|
||||
{
|
||||
auto it = enum_value_to_pbnumber_map->find(value);
|
||||
if (it == enum_value_to_pbnumber_map->end())
|
||||
cannotConvertValue(toString(value));
|
||||
pbnumber = it->second;
|
||||
}
|
||||
writeField(pbnumber);
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
void convertToEnumAndWriteField(T value)
|
||||
{
|
||||
const auto * enum_descriptor = field->enum_type()->FindValueByNumber(numericCast<int>(value));
|
||||
if (!enum_descriptor)
|
||||
cannotConvertValue(toString(value));
|
||||
writeField(enum_descriptor->number());
|
||||
}
|
||||
|
||||
void prepareEnumNameToPbNumberMap()
|
||||
{
|
||||
if (enum_name_to_pbnumber_map.has_value())
|
||||
return;
|
||||
enum_name_to_pbnumber_map.emplace();
|
||||
const auto * enum_type = field->enum_type();
|
||||
for (int i = 0; i != enum_type->value_count(); ++i)
|
||||
{
|
||||
const auto * enum_value = enum_type->value(i);
|
||||
enum_name_to_pbnumber_map->emplace(enum_value->name(), enum_value->number());
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void prepareEnumValueToPbNumberMap(const std::vector<std::pair<String, T>> & name_value_pairs)
|
||||
{
|
||||
if (enum_value_to_pbnumber_map.has_value())
|
||||
return;
|
||||
enum_value_to_pbnumber_map.emplace();
|
||||
enum_value_always_equals_pbnumber = true;
|
||||
for (const auto & name_value_pair : name_value_pairs)
|
||||
{
|
||||
Int16 value = name_value_pair.second; // NOLINT
|
||||
const auto * enum_descriptor = field->enum_type()->FindValueByName(name_value_pair.first);
|
||||
if (enum_descriptor)
|
||||
{
|
||||
enum_value_to_pbnumber_map->emplace(value, enum_descriptor->number());
|
||||
if (value != enum_descriptor->number())
|
||||
enum_value_always_equals_pbnumber = false;
|
||||
}
|
||||
else
|
||||
enum_value_always_equals_pbnumber = false;
|
||||
}
|
||||
}
|
||||
|
||||
void writeField(int enum_pbnumber)
|
||||
{
|
||||
if constexpr (skip_null_value)
|
||||
{
|
||||
if (!enum_pbnumber)
|
||||
return;
|
||||
}
|
||||
if constexpr (pack_repeated)
|
||||
simple_writer.addUIntToRepeatedPack(enum_pbnumber);
|
||||
else
|
||||
simple_writer.writeUInt(field_number, enum_pbnumber);
|
||||
}
|
||||
|
||||
std::optional<std::unordered_map<StringRef, int>> enum_name_to_pbnumber_map;
|
||||
std::optional<std::unordered_map<Int16, int>> enum_value_to_pbnumber_map;
|
||||
bool enum_value_always_equals_pbnumber;
|
||||
};
|
||||
|
||||
template <>
|
||||
std::unique_ptr<ProtobufWriter::IConverter> ProtobufWriter::createConverter<google::protobuf::FieldDescriptor::TYPE_ENUM>(
|
||||
const google::protobuf::FieldDescriptor * field)
|
||||
{
|
||||
if (shouldSkipNullValue(field))
|
||||
return std::make_unique<ConverterToEnum<true, false>>(simple_writer, field);
|
||||
else if (shouldPackRepeated(field))
|
||||
return std::make_unique<ConverterToEnum<false, true>>(simple_writer, field);
|
||||
else
|
||||
return std::make_unique<ConverterToEnum<false, false>>(simple_writer, field);
|
||||
}
|
||||
|
||||
|
||||
ProtobufWriter::ProtobufWriter(
|
||||
WriteBuffer & out, const google::protobuf::Descriptor * message_type, const std::vector<String> & column_names, const bool use_length_delimiters_)
|
||||
: simple_writer(out, use_length_delimiters_)
|
||||
{
|
||||
std::vector<const google::protobuf::FieldDescriptor *> field_descriptors_without_match;
|
||||
root_message = ProtobufColumnMatcher::matchColumns<ColumnMatcherTraits>(column_names, message_type, field_descriptors_without_match);
|
||||
for (const auto * field_descriptor_without_match : field_descriptors_without_match)
|
||||
{
|
||||
if (field_descriptor_without_match->is_required())
|
||||
throw Exception(
|
||||
"Output doesn't have a column named '" + field_descriptor_without_match->name()
|
||||
+ "' which is required to write the output in the protobuf format.",
|
||||
ErrorCodes::NO_DATA_FOR_REQUIRED_PROTOBUF_FIELD);
|
||||
}
|
||||
setTraitsDataAfterMatchingColumns(root_message.get());
|
||||
}
|
||||
|
||||
ProtobufWriter::~ProtobufWriter() = default;
|
||||
|
||||
void ProtobufWriter::setTraitsDataAfterMatchingColumns(Message * message)
|
||||
{
|
||||
Field * parent_field = message->parent ? &message->parent->fields[message->index_in_parent] : nullptr;
|
||||
message->data.parent_field_number = parent_field ? parent_field->field_number : 0;
|
||||
message->data.is_required = parent_field && parent_field->data.is_required;
|
||||
|
||||
if (parent_field && parent_field->data.is_repeatable)
|
||||
message->data.repeatable_container_message = message;
|
||||
else if (message->parent)
|
||||
message->data.repeatable_container_message = message->parent->data.repeatable_container_message;
|
||||
else
|
||||
message->data.repeatable_container_message = nullptr;
|
||||
|
||||
message->data.is_group = parent_field && (parent_field->field_descriptor->type() == google::protobuf::FieldDescriptor::TYPE_GROUP);
|
||||
|
||||
for (auto & field : message->fields)
|
||||
{
|
||||
field.data.is_repeatable = field.field_descriptor->is_repeated();
|
||||
field.data.is_required = field.field_descriptor->is_required();
|
||||
field.data.repeatable_container_message = message->data.repeatable_container_message;
|
||||
field.data.should_pack_repeated = shouldPackRepeated(field.field_descriptor);
|
||||
|
||||
if (field.nested_message)
|
||||
{
|
||||
setTraitsDataAfterMatchingColumns(field.nested_message.get());
|
||||
continue;
|
||||
}
|
||||
switch (field.field_descriptor->type())
|
||||
{
|
||||
# define PROTOBUF_WRITER_CONVERTER_CREATING_CASE(field_type_id) \
|
||||
case field_type_id: \
|
||||
field.data.converter = createConverter<field_type_id>(field.field_descriptor); \
|
||||
break
|
||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_STRING);
|
||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_BYTES);
|
||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_INT32);
|
||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_SINT32);
|
||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_UINT32);
|
||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_FIXED32);
|
||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_SFIXED32);
|
||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_INT64);
|
||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_SINT64);
|
||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_UINT64);
|
||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_FIXED64);
|
||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_SFIXED64);
|
||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_FLOAT);
|
||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_DOUBLE);
|
||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_BOOL);
|
||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_ENUM);
|
||||
# undef PROTOBUF_WRITER_CONVERTER_CREATING_CASE
|
||||
default:
|
||||
throw Exception(
|
||||
String("Protobuf type '") + field.field_descriptor->type_name() + "' isn't supported", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ProtobufWriter::startMessage()
|
||||
{
|
||||
current_message = root_message.get();
|
||||
current_field_index = 0;
|
||||
simple_writer.startMessage();
|
||||
}
|
||||
|
||||
void ProtobufWriter::endMessage()
|
||||
{
|
||||
if (!current_message)
|
||||
return;
|
||||
endWritingField();
|
||||
while (current_message->parent)
|
||||
{
|
||||
simple_writer.endNestedMessage(
|
||||
current_message->data.parent_field_number, current_message->data.is_group, !current_message->data.is_required);
|
||||
current_message = current_message->parent;
|
||||
}
|
||||
simple_writer.endMessage();
|
||||
current_message = nullptr;
|
||||
}
|
||||
|
||||
bool ProtobufWriter::writeField(size_t & column_index)
|
||||
{
|
||||
endWritingField();
|
||||
while (true)
|
||||
{
|
||||
if (current_field_index < current_message->fields.size())
|
||||
{
|
||||
Field & field = current_message->fields[current_field_index];
|
||||
if (!field.nested_message)
|
||||
{
|
||||
current_field = ¤t_message->fields[current_field_index];
|
||||
current_converter = current_field->data.converter.get();
|
||||
column_index = current_field->column_index;
|
||||
if (current_field->data.should_pack_repeated)
|
||||
simple_writer.startRepeatedPack();
|
||||
return true;
|
||||
}
|
||||
simple_writer.startNestedMessage();
|
||||
current_message = field.nested_message.get();
|
||||
current_message->data.need_repeat = false;
|
||||
current_field_index = 0;
|
||||
continue;
|
||||
}
|
||||
if (current_message->parent)
|
||||
{
|
||||
simple_writer.endNestedMessage(
|
||||
current_message->data.parent_field_number, current_message->data.is_group, !current_message->data.is_required);
|
||||
if (current_message->data.need_repeat)
|
||||
{
|
||||
simple_writer.startNestedMessage();
|
||||
current_message->data.need_repeat = false;
|
||||
current_field_index = 0;
|
||||
continue;
|
||||
}
|
||||
current_field_index = current_message->index_in_parent + 1;
|
||||
current_message = current_message->parent;
|
||||
continue;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void ProtobufWriter::endWritingField()
|
||||
{
|
||||
if (!current_field)
|
||||
return;
|
||||
if (current_field->data.should_pack_repeated)
|
||||
simple_writer.endRepeatedPack(current_field->field_number);
|
||||
else if ((num_values == 0) && current_field->data.is_required)
|
||||
throw Exception(
|
||||
"No data for the required field '" + current_field->field_descriptor->name() + "'",
|
||||
ErrorCodes::NO_DATA_FOR_REQUIRED_PROTOBUF_FIELD);
|
||||
|
||||
current_field = nullptr;
|
||||
current_converter = nullptr;
|
||||
num_values = 0;
|
||||
++current_field_index;
|
||||
}
|
||||
|
||||
void ProtobufWriter::setNestedMessageNeedsRepeat()
|
||||
{
|
||||
if (current_field->data.repeatable_container_message)
|
||||
current_field->data.repeatable_container_message->data.need_repeat = true;
|
||||
else
|
||||
throw Exception(
|
||||
"Cannot write more than single value to the non-repeated field '" + current_field->field_descriptor->name() + "'",
|
||||
ErrorCodes::PROTOBUF_FIELD_NOT_REPEATED);
|
||||
in_repeated_pack = false;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,290 +1,68 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/UUID.h>
|
||||
#include <common/DayNum.h>
|
||||
#include <memory>
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include "config_formats.h"
|
||||
#endif
|
||||
|
||||
#if USE_PROTOBUF
|
||||
# include <IO/WriteBufferFromString.h>
|
||||
# include <boost/noncopyable.hpp>
|
||||
# include <Common/PODArray.h>
|
||||
# include "ProtobufColumnMatcher.h"
|
||||
|
||||
|
||||
namespace google
|
||||
{
|
||||
namespace protobuf
|
||||
{
|
||||
class Descriptor;
|
||||
class FieldDescriptor;
|
||||
}
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class IAggregateFunction;
|
||||
using AggregateFunctionPtr = std::shared_ptr<IAggregateFunction>;
|
||||
using ConstAggregateDataPtr = const char *;
|
||||
|
||||
|
||||
/** Serializes a protobuf, tries to cast types if necessarily.
|
||||
*/
|
||||
class ProtobufWriter : private boost::noncopyable
|
||||
{
|
||||
public:
|
||||
ProtobufWriter(WriteBuffer & out, const google::protobuf::Descriptor * message_type, const std::vector<String> & column_names, const bool use_length_delimiters_);
|
||||
~ProtobufWriter();
|
||||
|
||||
/// Should be called at the beginning of writing a message.
|
||||
void startMessage();
|
||||
|
||||
/// Should be called at the end of writing a message.
|
||||
void endMessage();
|
||||
|
||||
/// Prepares for writing values of a field.
|
||||
/// Returns true and sets 'column_index' to the corresponding column's index.
|
||||
/// Returns false if there are no more fields to write in the message type (call endMessage() in this case).
|
||||
bool writeField(size_t & column_index);
|
||||
|
||||
/// Writes a value. This function should be called one or multiple times after writeField().
|
||||
/// Returns false if there are no more place for the values in the protobuf's field.
|
||||
/// This can happen if the protobuf's field is not declared as repeated in the protobuf schema.
|
||||
bool writeNumber(Int8 value) { return writeValueIfPossible(&IConverter::writeInt8, value); }
|
||||
bool writeNumber(UInt8 value) { return writeValueIfPossible(&IConverter::writeUInt8, value); }
|
||||
bool writeNumber(Int16 value) { return writeValueIfPossible(&IConverter::writeInt16, value); }
|
||||
bool writeNumber(UInt16 value) { return writeValueIfPossible(&IConverter::writeUInt16, value); }
|
||||
bool writeNumber(Int32 value) { return writeValueIfPossible(&IConverter::writeInt32, value); }
|
||||
bool writeNumber(UInt32 value) { return writeValueIfPossible(&IConverter::writeUInt32, value); }
|
||||
bool writeNumber(Int64 value) { return writeValueIfPossible(&IConverter::writeInt64, value); }
|
||||
bool writeNumber(UInt64 value) { return writeValueIfPossible(&IConverter::writeUInt64, value); }
|
||||
bool writeNumber(Int128 value) { return writeValueIfPossible(&IConverter::writeInt128, value); }
|
||||
bool writeNumber(UInt128 value) { return writeValueIfPossible(&IConverter::writeUInt128, value); }
|
||||
|
||||
bool writeNumber(Int256 value) { return writeValueIfPossible(&IConverter::writeInt256, value); }
|
||||
bool writeNumber(UInt256 value) { return writeValueIfPossible(&IConverter::writeUInt256, value); }
|
||||
|
||||
bool writeNumber(Float32 value) { return writeValueIfPossible(&IConverter::writeFloat32, value); }
|
||||
bool writeNumber(Float64 value) { return writeValueIfPossible(&IConverter::writeFloat64, value); }
|
||||
bool writeString(const StringRef & str) { return writeValueIfPossible(&IConverter::writeString, str); }
|
||||
void prepareEnumMapping(const std::vector<std::pair<std::string, Int8>> & enum_values) { current_converter->prepareEnumMapping8(enum_values); }
|
||||
void prepareEnumMapping(const std::vector<std::pair<std::string, Int16>> & enum_values) { current_converter->prepareEnumMapping16(enum_values); }
|
||||
bool writeEnum(Int8 value) { return writeValueIfPossible(&IConverter::writeEnum8, value); }
|
||||
bool writeEnum(Int16 value) { return writeValueIfPossible(&IConverter::writeEnum16, value); }
|
||||
bool writeUUID(const UUID & uuid) { return writeValueIfPossible(&IConverter::writeUUID, uuid); }
|
||||
bool writeDate(DayNum date) { return writeValueIfPossible(&IConverter::writeDate, date); }
|
||||
bool writeDateTime(time_t tm) { return writeValueIfPossible(&IConverter::writeDateTime, tm); }
|
||||
bool writeDateTime64(DateTime64 tm, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDateTime64, tm, scale); }
|
||||
bool writeDecimal(Decimal32 decimal, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDecimal32, decimal, scale); }
|
||||
bool writeDecimal(Decimal64 decimal, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDecimal64, decimal, scale); }
|
||||
bool writeDecimal(const Decimal128 & decimal, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDecimal128, decimal, scale); }
|
||||
bool writeDecimal(const Decimal256 & decimal, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDecimal256, decimal, scale); }
|
||||
bool writeAggregateFunction(const AggregateFunctionPtr & function, ConstAggregateDataPtr place) { return writeValueIfPossible(&IConverter::writeAggregateFunction, function, place); }
|
||||
|
||||
private:
|
||||
class SimpleWriter
|
||||
{
|
||||
public:
|
||||
SimpleWriter(WriteBuffer & out_, const bool use_length_delimiters_);
|
||||
~SimpleWriter();
|
||||
|
||||
void startMessage();
|
||||
void endMessage();
|
||||
|
||||
void startNestedMessage();
|
||||
void endNestedMessage(UInt32 field_number, bool is_group, bool skip_if_empty);
|
||||
|
||||
void writeInt(UInt32 field_number, Int64 value);
|
||||
void writeUInt(UInt32 field_number, UInt64 value);
|
||||
void writeSInt(UInt32 field_number, Int64 value);
|
||||
template <typename T>
|
||||
void writeFixed(UInt32 field_number, T value);
|
||||
void writeString(UInt32 field_number, const StringRef & str);
|
||||
|
||||
void startRepeatedPack();
|
||||
void addIntToRepeatedPack(Int64 value);
|
||||
void addUIntToRepeatedPack(UInt64 value);
|
||||
void addSIntToRepeatedPack(Int64 value);
|
||||
template <typename T>
|
||||
void addFixedToRepeatedPack(T value);
|
||||
void endRepeatedPack(UInt32 field_number);
|
||||
|
||||
private:
|
||||
struct Piece
|
||||
{
|
||||
size_t start;
|
||||
size_t end;
|
||||
Piece(size_t start_, size_t end_) : start(start_), end(end_) {}
|
||||
Piece() = default;
|
||||
};
|
||||
|
||||
struct NestedInfo
|
||||
{
|
||||
size_t num_pieces_at_start;
|
||||
size_t num_bytes_skipped_at_start;
|
||||
NestedInfo(size_t num_pieces_at_start_, size_t num_bytes_skipped_at_start_)
|
||||
: num_pieces_at_start(num_pieces_at_start_), num_bytes_skipped_at_start(num_bytes_skipped_at_start_)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
WriteBuffer & out;
|
||||
PODArray<UInt8> buffer;
|
||||
std::vector<Piece> pieces;
|
||||
size_t current_piece_start;
|
||||
size_t num_bytes_skipped;
|
||||
std::vector<NestedInfo> nested_infos;
|
||||
const bool use_length_delimiters;
|
||||
};
|
||||
|
||||
class IConverter
|
||||
{
|
||||
public:
|
||||
virtual ~IConverter() = default;
|
||||
virtual void writeString(const StringRef &) = 0;
|
||||
virtual void writeInt8(Int8) = 0;
|
||||
virtual void writeUInt8(UInt8) = 0;
|
||||
virtual void writeInt16(Int16) = 0;
|
||||
virtual void writeUInt16(UInt16) = 0;
|
||||
virtual void writeInt32(Int32) = 0;
|
||||
virtual void writeUInt32(UInt32) = 0;
|
||||
virtual void writeInt64(Int64) = 0;
|
||||
virtual void writeUInt64(UInt64) = 0;
|
||||
virtual void writeInt128(Int128) = 0;
|
||||
virtual void writeUInt128(const UInt128 &) = 0;
|
||||
|
||||
virtual void writeInt256(const Int256 &) = 0;
|
||||
virtual void writeUInt256(const UInt256 &) = 0;
|
||||
|
||||
virtual void writeFloat32(Float32) = 0;
|
||||
virtual void writeFloat64(Float64) = 0;
|
||||
virtual void prepareEnumMapping8(const std::vector<std::pair<std::string, Int8>> &) = 0;
|
||||
virtual void prepareEnumMapping16(const std::vector<std::pair<std::string, Int16>> &) = 0;
|
||||
virtual void writeEnum8(Int8) = 0;
|
||||
virtual void writeEnum16(Int16) = 0;
|
||||
virtual void writeUUID(const UUID &) = 0;
|
||||
virtual void writeDate(DayNum) = 0;
|
||||
virtual void writeDateTime(time_t) = 0;
|
||||
virtual void writeDateTime64(DateTime64, UInt32 scale) = 0;
|
||||
virtual void writeDecimal32(Decimal32, UInt32) = 0;
|
||||
virtual void writeDecimal64(Decimal64, UInt32) = 0;
|
||||
virtual void writeDecimal128(const Decimal128 &, UInt32) = 0;
|
||||
virtual void writeDecimal256(const Decimal256 &, UInt32) = 0;
|
||||
virtual void writeAggregateFunction(const AggregateFunctionPtr &, ConstAggregateDataPtr) = 0;
|
||||
};
|
||||
|
||||
class ConverterBaseImpl;
|
||||
template <bool skip_null_value>
|
||||
class ConverterToString;
|
||||
template <int field_type_id, typename ToType, bool skip_null_value, bool pack_repeated>
|
||||
class ConverterToNumber;
|
||||
template <bool skip_null_value, bool pack_repeated>
|
||||
class ConverterToBool;
|
||||
template <bool skip_null_value, bool pack_repeated>
|
||||
class ConverterToEnum;
|
||||
|
||||
struct ColumnMatcherTraits
|
||||
{
|
||||
struct FieldData
|
||||
{
|
||||
std::unique_ptr<IConverter> converter;
|
||||
bool is_required;
|
||||
bool is_repeatable;
|
||||
bool should_pack_repeated;
|
||||
ProtobufColumnMatcher::Message<ColumnMatcherTraits> * repeatable_container_message;
|
||||
};
|
||||
struct MessageData
|
||||
{
|
||||
UInt32 parent_field_number;
|
||||
bool is_group;
|
||||
bool is_required;
|
||||
ProtobufColumnMatcher::Message<ColumnMatcherTraits> * repeatable_container_message;
|
||||
bool need_repeat;
|
||||
};
|
||||
};
|
||||
using Message = ProtobufColumnMatcher::Message<ColumnMatcherTraits>;
|
||||
using Field = ProtobufColumnMatcher::Field<ColumnMatcherTraits>;
|
||||
|
||||
void setTraitsDataAfterMatchingColumns(Message * message);
|
||||
|
||||
template <int field_type_id>
|
||||
std::unique_ptr<IConverter> createConverter(const google::protobuf::FieldDescriptor * field);
|
||||
|
||||
template <typename... Params>
|
||||
using WriteValueFunctionPtr = void (IConverter::*)(Params...);
|
||||
|
||||
template <typename... Params, typename... Args>
|
||||
bool writeValueIfPossible(WriteValueFunctionPtr<Params...> func, Args &&... args)
|
||||
{
|
||||
if (num_values && !current_field->data.is_repeatable)
|
||||
{
|
||||
setNestedMessageNeedsRepeat();
|
||||
return false;
|
||||
}
|
||||
(current_converter->*func)(std::forward<Args>(args)...);
|
||||
++num_values;
|
||||
return true;
|
||||
}
|
||||
|
||||
void setNestedMessageNeedsRepeat();
|
||||
void endWritingField();
|
||||
|
||||
SimpleWriter simple_writer;
|
||||
std::unique_ptr<Message> root_message;
|
||||
|
||||
Message * current_message;
|
||||
size_t current_field_index = 0;
|
||||
const Field * current_field = nullptr;
|
||||
IConverter * current_converter = nullptr;
|
||||
size_t num_values = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
# include <common/StringRef.h>
|
||||
# include <Core/Types.h>
|
||||
# include <Common/PODArray.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class IAggregateFunction;
|
||||
using AggregateFunctionPtr = std::shared_ptr<IAggregateFunction>;
|
||||
using ConstAggregateDataPtr = const char *;
|
||||
class WriteBuffer;
|
||||
|
||||
/// Utility class for writing in the Protobuf format.
|
||||
/// Knows nothing about protobuf schemas, just provides useful functions to serialize data.
|
||||
class ProtobufWriter
|
||||
{
|
||||
public:
|
||||
bool writeNumber(Int8 /* value */) { return false; }
|
||||
bool writeNumber(UInt8 /* value */) { return false; }
|
||||
bool writeNumber(Int16 /* value */) { return false; }
|
||||
bool writeNumber(UInt16 /* value */) { return false; }
|
||||
bool writeNumber(Int32 /* value */) { return false; }
|
||||
bool writeNumber(UInt32 /* value */) { return false; }
|
||||
bool writeNumber(Int64 /* value */) { return false; }
|
||||
bool writeNumber(UInt64 /* value */) { return false; }
|
||||
bool writeNumber(Int128 /* value */) { return false; }
|
||||
bool writeNumber(UInt128 /* value */) { return false; }
|
||||
bool writeNumber(Int256 /* value */) { return false; }
|
||||
bool writeNumber(UInt256 /* value */) { return false; }
|
||||
bool writeNumber(Float32 /* value */) { return false; }
|
||||
bool writeNumber(Float64 /* value */) { return false; }
|
||||
bool writeString(const StringRef & /* value */) { return false; }
|
||||
void prepareEnumMapping(const std::vector<std::pair<std::string, Int8>> & /* name_value_pairs */) {}
|
||||
void prepareEnumMapping(const std::vector<std::pair<std::string, Int16>> & /* name_value_pairs */) {}
|
||||
bool writeEnum(Int8 /* value */) { return false; }
|
||||
bool writeEnum(Int16 /* value */) { return false; }
|
||||
bool writeUUID(const UUID & /* value */) { return false; }
|
||||
bool writeDate(DayNum /* date */) { return false; }
|
||||
bool writeDateTime(time_t /* tm */) { return false; }
|
||||
bool writeDateTime64(DateTime64 /*tm*/, UInt32 /*scale*/) { return false; }
|
||||
bool writeDecimal(Decimal32 /* decimal */, UInt32 /* scale */) { return false; }
|
||||
bool writeDecimal(Decimal64 /* decimal */, UInt32 /* scale */) { return false; }
|
||||
bool writeDecimal(const Decimal128 & /* decimal */, UInt32 /* scale */) { return false; }
|
||||
bool writeDecimal(const Decimal256 & /* decimal */, UInt32 /* scale */) { return false; }
|
||||
bool writeAggregateFunction(const AggregateFunctionPtr & /* function */, ConstAggregateDataPtr /* place */) { return false; }
|
||||
ProtobufWriter(WriteBuffer & out_);
|
||||
~ProtobufWriter();
|
||||
|
||||
void startMessage();
|
||||
void endMessage(bool with_length_delimiter);
|
||||
|
||||
void startNestedMessage();
|
||||
void endNestedMessage(int field_number, bool is_group, bool skip_if_empty);
|
||||
|
||||
void writeInt(int field_number, Int64 value);
|
||||
void writeUInt(int field_number, UInt64 value);
|
||||
void writeSInt(int field_number, Int64 value);
|
||||
template <typename T>
|
||||
void writeFixed(int field_number, T value);
|
||||
void writeString(int field_number, const std::string_view & str);
|
||||
|
||||
void startRepeatedPack();
|
||||
void endRepeatedPack(int field_number, bool skip_if_empty);
|
||||
|
||||
private:
|
||||
struct Piece
|
||||
{
|
||||
size_t start;
|
||||
size_t end;
|
||||
Piece(size_t start_, size_t end_) : start(start_), end(end_) {}
|
||||
Piece() = default;
|
||||
};
|
||||
|
||||
struct NestedInfo
|
||||
{
|
||||
size_t num_pieces_at_start;
|
||||
size_t num_bytes_skipped_at_start;
|
||||
NestedInfo(size_t num_pieces_at_start_, size_t num_bytes_skipped_at_start_)
|
||||
: num_pieces_at_start(num_pieces_at_start_), num_bytes_skipped_at_start(num_bytes_skipped_at_start_)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
WriteBuffer & out;
|
||||
PODArray<UInt8> buffer;
|
||||
std::vector<Piece> pieces;
|
||||
size_t current_piece_start = 0;
|
||||
size_t num_bytes_skipped = 0;
|
||||
std::vector<NestedInfo> nested_infos;
|
||||
bool in_repeated_pack = false;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -20,9 +20,9 @@ SRCS(
|
||||
NativeFormat.cpp
|
||||
NullFormat.cpp
|
||||
ParsedTemplateFormatString.cpp
|
||||
ProtobufColumnMatcher.cpp
|
||||
ProtobufReader.cpp
|
||||
ProtobufSchemas.cpp
|
||||
ProtobufSerializer.cpp
|
||||
ProtobufWriter.cpp
|
||||
registerFormats.cpp
|
||||
verbosePrintString.cpp
|
||||
|
582
src/Functions/htmlOrXmlCoarseParse.cpp
Normal file
582
src/Functions/htmlOrXmlCoarseParse.cpp
Normal file
@ -0,0 +1,582 @@
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/IFunctionImpl.h>
|
||||
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
#if USE_HYPERSCAN
|
||||
# include <hs.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int CANNOT_ALLOCATE_MEMORY;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
struct HxCoarseParseImpl
|
||||
{
|
||||
private:
|
||||
struct SpanInfo
|
||||
{
|
||||
SpanInfo(): id(0), match_space(std::pair<unsigned long long, unsigned long long>(0, 0)) {} // NOLINT
|
||||
SpanInfo(unsigned int matchId, std::pair<unsigned long long, unsigned long long> matchSpan): id(matchId), match_space(matchSpan){} // NOLINT
|
||||
SpanInfo(const SpanInfo& obj)
|
||||
{
|
||||
id = obj.id;
|
||||
match_space = obj.match_space;
|
||||
}
|
||||
SpanInfo& operator=(const SpanInfo& obj) = default;
|
||||
|
||||
unsigned int id;
|
||||
std::pair<unsigned long long, unsigned long long> match_space; // NOLINT
|
||||
};
|
||||
using SpanElement = std::vector<SpanInfo>;
|
||||
struct Span
|
||||
{
|
||||
Span(): set_script(false), set_style(false), set_semi(false), is_finding_cdata(false) {}
|
||||
|
||||
SpanElement copy_stack; // copy area
|
||||
SpanElement tag_stack; // regexp area
|
||||
SpanInfo script_ptr; // script pointer
|
||||
bool set_script; // whether set script
|
||||
SpanInfo style_ptr; // style pointer
|
||||
bool set_style; // whether set style
|
||||
SpanInfo semi_ptr; // tag ptr
|
||||
bool set_semi; // whether set semi
|
||||
|
||||
bool is_finding_cdata;
|
||||
};
|
||||
|
||||
static inline void copyZone(
|
||||
ColumnString::Offset& current_dst_string_offset,
|
||||
ColumnString::Offset& current_copy_loc,
|
||||
ColumnString::Chars& dst_chars,
|
||||
const ColumnString::Chars& src_chars,
|
||||
size_t bytes_to_copy,
|
||||
unsigned is_space
|
||||
)
|
||||
{
|
||||
bool is_last_space = false;
|
||||
if (current_dst_string_offset == 0 || dst_chars[current_dst_string_offset - 1] == 0 || dst_chars[current_dst_string_offset - 1] == ' ')
|
||||
{
|
||||
is_last_space = true;
|
||||
}
|
||||
if (bytes_to_copy == 0)
|
||||
{
|
||||
if (is_space && !is_last_space)
|
||||
{
|
||||
dst_chars[current_dst_string_offset++] = ' ';
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (is_last_space && src_chars[current_copy_loc] == ' ')
|
||||
{
|
||||
--bytes_to_copy;
|
||||
++current_copy_loc;
|
||||
}
|
||||
if (bytes_to_copy > 0)
|
||||
{
|
||||
memcpySmallAllowReadWriteOverflow15(
|
||||
&dst_chars[current_dst_string_offset], &src_chars[current_copy_loc], bytes_to_copy);
|
||||
current_dst_string_offset += bytes_to_copy;
|
||||
}
|
||||
|
||||
// separator is space and last character is not space.
|
||||
if (is_space && !(current_dst_string_offset == 0 || dst_chars[current_dst_string_offset - 1] == 0 || dst_chars[current_dst_string_offset - 1] == ' '))
|
||||
{
|
||||
dst_chars[current_dst_string_offset++] = ' ';
|
||||
}
|
||||
}
|
||||
// return;
|
||||
}
|
||||
static inline void popArea(SpanElement& stack, unsigned long long from, unsigned long long to) //NOLINT
|
||||
{
|
||||
while (!stack.empty())
|
||||
{
|
||||
if (to > stack.back().match_space.second && from < stack.back().match_space.second)
|
||||
{
|
||||
stack.pop_back();
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
// return;
|
||||
}
|
||||
|
||||
static void dealCommonTag(Span* matches)
|
||||
{
|
||||
while (!matches->copy_stack.empty() && matches->copy_stack.back().id != 10)
|
||||
{
|
||||
matches->copy_stack.pop_back();
|
||||
}
|
||||
if (!matches->copy_stack.empty())
|
||||
{
|
||||
matches->copy_stack.pop_back();
|
||||
}
|
||||
unsigned long long from; // NOLINT
|
||||
unsigned long long to; // NOLINT
|
||||
unsigned id;
|
||||
for (auto begin = matches->tag_stack.begin(); begin != matches->tag_stack.end(); ++begin)
|
||||
{
|
||||
from = begin->match_space.first;
|
||||
to = begin->match_space.second;
|
||||
id = begin->id;
|
||||
switch (id)
|
||||
{
|
||||
case 12:
|
||||
case 13:
|
||||
{
|
||||
popArea(matches->copy_stack, from, to);
|
||||
if (matches->copy_stack.empty() || from >= matches->copy_stack.back().match_space.second)
|
||||
matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
|
||||
break;
|
||||
}
|
||||
case 0:
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
case 5:
|
||||
case 6:
|
||||
case 7:
|
||||
case 8:
|
||||
case 9:
|
||||
case 10:
|
||||
{
|
||||
if (!matches->set_semi || (matches->set_semi && from == matches->semi_ptr.match_space.first))
|
||||
{
|
||||
matches->set_semi = true;
|
||||
matches->semi_ptr = SpanInfo(id, std::make_pair(from, to));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 1:
|
||||
{
|
||||
if (matches->set_semi)
|
||||
{
|
||||
switch (matches->semi_ptr.id)
|
||||
{
|
||||
case 0:
|
||||
case 2:
|
||||
case 3:
|
||||
case 6:
|
||||
case 7:
|
||||
case 10:
|
||||
{
|
||||
if (matches->semi_ptr.id == 2 || (matches->semi_ptr.id == 3 && matches->semi_ptr.match_space.second == from))
|
||||
{
|
||||
if (!matches->set_script)
|
||||
{
|
||||
matches->set_script = true;
|
||||
matches->script_ptr = SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to));
|
||||
}
|
||||
}
|
||||
else if (matches->semi_ptr.id == 6 || (matches->semi_ptr.id == 7 && matches->semi_ptr.match_space.second == from))
|
||||
{
|
||||
if (!matches->set_style)
|
||||
{
|
||||
matches->set_style = true;
|
||||
matches->style_ptr = SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to));
|
||||
}
|
||||
}
|
||||
popArea(matches->copy_stack, matches->semi_ptr.match_space.first, to);
|
||||
matches->copy_stack.push_back(SpanInfo(0, std::make_pair(matches->semi_ptr.match_space.first, to)));
|
||||
matches->set_semi = false;
|
||||
break;
|
||||
}
|
||||
case 4:
|
||||
case 5:
|
||||
case 8:
|
||||
case 9:
|
||||
{
|
||||
SpanInfo complete_zone;
|
||||
|
||||
complete_zone.match_space.second = to;
|
||||
if (matches->set_script && (matches->semi_ptr.id == 4 || (matches->semi_ptr.id == 5 && matches->semi_ptr.match_space.second == from)))
|
||||
{
|
||||
complete_zone.id = matches->script_ptr.id;
|
||||
complete_zone.match_space.first = matches->script_ptr.match_space.first;
|
||||
matches->set_script = false;
|
||||
}
|
||||
else if (matches->set_style && (matches->semi_ptr.id == 8 || (matches->semi_ptr.id == 9 && matches->semi_ptr.match_space.second == from)))
|
||||
{
|
||||
complete_zone.id = matches->style_ptr.id;
|
||||
complete_zone.match_space.first = matches->style_ptr.match_space.first;
|
||||
matches->set_style = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
complete_zone.id = matches->semi_ptr.id;
|
||||
complete_zone.match_space.first = matches->semi_ptr.match_space.first;
|
||||
}
|
||||
popArea(matches->copy_stack, complete_zone.match_space.first, complete_zone.match_space.second);
|
||||
matches->copy_stack.push_back(complete_zone);
|
||||
matches->set_semi = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// return;
|
||||
}
|
||||
static int spanCollect(unsigned int id,
|
||||
unsigned long long from, // NOLINT
|
||||
unsigned long long to, // NOLINT
|
||||
unsigned int , void * ctx)
|
||||
{
|
||||
Span* matches = static_cast<Span*>(ctx);
|
||||
from = id == 12 ? from : to - patterns_length[id];
|
||||
|
||||
if (matches->is_finding_cdata)
|
||||
{
|
||||
if (id == 11)
|
||||
{
|
||||
matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
|
||||
matches->is_finding_cdata = false;
|
||||
matches->tag_stack.clear();
|
||||
if (matches->semi_ptr.id == 10)
|
||||
{
|
||||
matches->set_semi = false;
|
||||
}
|
||||
}
|
||||
else if (id == 12 || id == 13)
|
||||
{
|
||||
popArea(matches->copy_stack, from, to);
|
||||
if (matches->copy_stack.empty() || from >= matches->copy_stack.back().match_space.second)
|
||||
matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
|
||||
|
||||
popArea(matches->tag_stack, from, to);
|
||||
if (matches->tag_stack.empty() || from >= matches->tag_stack.back().match_space.second)
|
||||
matches->tag_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
|
||||
}
|
||||
else
|
||||
{
|
||||
popArea(matches->tag_stack, from, to);
|
||||
matches->tag_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (id)
|
||||
{
|
||||
case 12:
|
||||
case 13:
|
||||
{
|
||||
popArea(matches->copy_stack, from, to);
|
||||
if (matches->copy_stack.empty() || from >= matches->copy_stack.back().match_space.second)
|
||||
matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
|
||||
break;
|
||||
}
|
||||
case 0:
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
case 5:
|
||||
case 6:
|
||||
case 7:
|
||||
case 8:
|
||||
case 9:
|
||||
{
|
||||
if (!matches->set_semi || (matches->set_semi && from == matches->semi_ptr.match_space.first))
|
||||
{
|
||||
matches->set_semi = true;
|
||||
matches->semi_ptr = SpanInfo(id, std::make_pair(from, to));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 10:
|
||||
{
|
||||
if (!matches->set_semi || (matches->set_semi && from == matches->semi_ptr.match_space.first))
|
||||
{
|
||||
matches->set_semi = true;
|
||||
matches->semi_ptr = SpanInfo(id, std::make_pair(from, to));
|
||||
}
|
||||
matches->is_finding_cdata = true;
|
||||
matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
|
||||
matches->tag_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
|
||||
break;
|
||||
}
|
||||
case 1:
|
||||
{
|
||||
if (matches->set_semi)
|
||||
{
|
||||
switch (matches->semi_ptr.id)
|
||||
{
|
||||
case 0:
|
||||
case 2:
|
||||
case 3:
|
||||
case 6:
|
||||
case 7:
|
||||
case 10:
|
||||
{
|
||||
if (matches->semi_ptr.id == 2 || (matches->semi_ptr.id == 3 && matches->semi_ptr.match_space.second == from))
|
||||
{
|
||||
if (!matches->set_script)
|
||||
{
|
||||
matches->set_script = true;
|
||||
matches->script_ptr = SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to));
|
||||
}
|
||||
}
|
||||
else if (matches->semi_ptr.id == 6 || (matches->semi_ptr.id == 7 && matches->semi_ptr.match_space.second == from))
|
||||
{
|
||||
if (!matches->set_style)
|
||||
{
|
||||
matches->set_style = true;
|
||||
matches->style_ptr = SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to));
|
||||
}
|
||||
}
|
||||
popArea(matches->copy_stack, matches->semi_ptr.match_space.first, to);
|
||||
matches->copy_stack.push_back(SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to)));
|
||||
matches->set_semi = false;
|
||||
break;
|
||||
}
|
||||
case 4:
|
||||
case 5:
|
||||
case 8:
|
||||
case 9:
|
||||
{
|
||||
SpanInfo complete_zone;
|
||||
complete_zone.match_space.second = to;
|
||||
if (matches->set_script && (matches->semi_ptr.id == 4 || (matches->semi_ptr.id == 5 && matches->semi_ptr.match_space.second == from)))
|
||||
{
|
||||
complete_zone.id = matches->script_ptr.id;
|
||||
complete_zone.match_space.first = matches->script_ptr.match_space.first;
|
||||
matches->set_script = false;
|
||||
}
|
||||
else if (matches->set_style && (matches->semi_ptr.id == 8 || (matches->semi_ptr.id == 9 && matches->semi_ptr.match_space.second == from)))
|
||||
{
|
||||
complete_zone.id = matches->style_ptr.id;
|
||||
complete_zone.match_space.first = matches->style_ptr.match_space.first;
|
||||
matches->set_style = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
complete_zone.id = matches->semi_ptr.id;
|
||||
complete_zone.match_space.first = matches->semi_ptr.match_space.first;
|
||||
}
|
||||
popArea(matches->copy_stack, complete_zone.match_space.first, complete_zone.match_space.second);
|
||||
matches->copy_stack.push_back(complete_zone);
|
||||
matches->set_semi = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#if USE_HYPERSCAN
|
||||
static hs_database_t* buildDatabase(const std::vector<const char* > &expressions,
|
||||
const std::vector<unsigned> &flags,
|
||||
const std::vector<unsigned> &id,
|
||||
unsigned int mode)
|
||||
{
|
||||
hs_database_t *db;
|
||||
hs_compile_error_t *compile_err;
|
||||
hs_error_t err;
|
||||
err = hs_compile_multi(expressions.data(), flags.data(), id.data(),
|
||||
expressions.size(), mode, nullptr, &db, &compile_err);
|
||||
|
||||
if (err != HS_SUCCESS)
|
||||
{
|
||||
hs_free_compile_error(compile_err);
|
||||
throw Exception("Hyper scan database cannot be compiled.", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
}
|
||||
return db;
|
||||
}
|
||||
#endif
|
||||
static std::vector<const char*> patterns;
|
||||
static std::vector<std::size_t> patterns_length;
|
||||
static std::vector<unsigned> patterns_flag;
|
||||
static std::vector<unsigned> ids;
|
||||
|
||||
public:
|
||||
static void executeInternal(
|
||||
const ColumnString::Chars & src_chars,
|
||||
const ColumnString::Offsets & src_offsets,
|
||||
ColumnString::Chars & dst_chars,
|
||||
ColumnString::Offsets & dst_offsets)
|
||||
{
|
||||
#if USE_HYPERSCAN
|
||||
hs_database_t * db = buildDatabase(patterns, patterns_flag, ids, HS_MODE_BLOCK);
|
||||
hs_scratch_t* scratch = nullptr;
|
||||
if (hs_alloc_scratch(db, &scratch) != HS_SUCCESS)
|
||||
{
|
||||
hs_free_database(db);
|
||||
throw Exception("Unable to allocate scratch space.", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
}
|
||||
dst_chars.resize(src_chars.size());
|
||||
dst_offsets.resize(src_offsets.size());
|
||||
|
||||
ColumnString::Offset current_src_string_offset = 0;
|
||||
ColumnString::Offset current_dst_string_offset = 0;
|
||||
ColumnString::Offset current_copy_loc;
|
||||
ColumnString::Offset current_copy_end;
|
||||
unsigned is_space;
|
||||
size_t bytes_to_copy;
|
||||
Span match_zoneall;
|
||||
|
||||
for (size_t off = 0; off < src_offsets.size(); ++off)
|
||||
{
|
||||
hs_scan(db, reinterpret_cast<const char *>(&src_chars[current_src_string_offset]), src_offsets[off] - current_src_string_offset, 0, scratch, spanCollect, &match_zoneall);
|
||||
if (match_zoneall.is_finding_cdata)
|
||||
{
|
||||
dealCommonTag(&match_zoneall);
|
||||
}
|
||||
SpanElement& match_zone = match_zoneall.copy_stack;
|
||||
current_copy_loc = current_src_string_offset;
|
||||
if (match_zone.empty())
|
||||
{
|
||||
current_copy_end = src_offsets[off];
|
||||
is_space = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
current_copy_end = current_src_string_offset + match_zone.begin()->match_space.first;
|
||||
is_space = (match_zone.begin()->id == 12 || match_zone.begin()->id == 13)?1:0;
|
||||
}
|
||||
|
||||
bytes_to_copy = current_copy_end - current_copy_loc;
|
||||
copyZone(current_dst_string_offset, current_copy_loc, dst_chars, src_chars, bytes_to_copy, is_space);
|
||||
for (auto begin = match_zone.begin(); begin != match_zone.end(); ++begin)
|
||||
{
|
||||
current_copy_loc = current_src_string_offset + begin->match_space.second;
|
||||
if (begin + 1 >= match_zone.end())
|
||||
{
|
||||
current_copy_end = src_offsets[off];
|
||||
is_space = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
current_copy_end = current_src_string_offset + (begin+1)->match_space.first;
|
||||
is_space = ((begin+1)->id == 12 || (begin+1)->id == 13)?1:0;
|
||||
}
|
||||
bytes_to_copy = current_copy_end - current_copy_loc;
|
||||
copyZone(current_dst_string_offset, current_copy_loc, dst_chars, src_chars, bytes_to_copy, is_space);
|
||||
}
|
||||
if (current_dst_string_offset > 1 && dst_chars[current_dst_string_offset - 2] == ' ')
|
||||
{
|
||||
dst_chars[current_dst_string_offset - 2] = 0;
|
||||
--current_dst_string_offset;
|
||||
}
|
||||
dst_offsets[off] = current_dst_string_offset;
|
||||
current_src_string_offset = src_offsets[off];
|
||||
match_zoneall.copy_stack.clear();
|
||||
match_zoneall.tag_stack.clear();
|
||||
}
|
||||
dst_chars.resize(dst_chars.size());
|
||||
hs_free_scratch(scratch);
|
||||
hs_free_database(db);
|
||||
#else
|
||||
(void)src_chars;
|
||||
(void)src_offsets;
|
||||
(void)dst_chars;
|
||||
(void)dst_offsets;
|
||||
throw Exception(
|
||||
"htmlOrXmlCoarseParse is not implemented when hyperscan is off (is it x86 processor?)",
|
||||
ErrorCodes::NOT_IMPLEMENTED);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
std::vector<const char*> HxCoarseParseImpl::patterns =
|
||||
{
|
||||
"<[^\\s<>]", // 0 "<", except "< ", "<<", "<>"
|
||||
">", // 1 ">"
|
||||
"<script\\s", // 2 <script xxxxx>
|
||||
"<script", // 3 <script>
|
||||
"</script\\s", // 4 </script xxxx>
|
||||
"</script", // 5 </script>
|
||||
"<style\\s", // 6 <style xxxxxx>
|
||||
"<style", // 7 <style>
|
||||
"</style\\s", // 8 </style xxxxx>
|
||||
"</style", // 9 </style>
|
||||
"<!\\[CDATA\\[", // 10 <![CDATA[xxxxxx]]>
|
||||
"\\]\\]>", // 11 ]]>
|
||||
"\\s{2,}", // 12 " ", continuous blanks
|
||||
"[^\\S ]" // 13 "\n", "\t" and other white space, it does not include single ' '.
|
||||
};
|
||||
std::vector<std::size_t> HxCoarseParseImpl::patterns_length =
|
||||
{
|
||||
2, 1, 8, 7, 9, 8, 7, 6, 8, 7, 9, 3, 0, 1
|
||||
};
|
||||
#if USE_HYPERSCAN
|
||||
std::vector<unsigned> HxCoarseParseImpl::patterns_flag =
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, HS_FLAG_SOM_LEFTMOST, 0
|
||||
};
|
||||
#endif
|
||||
std::vector<unsigned> HxCoarseParseImpl::ids =
|
||||
{
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13
|
||||
};
|
||||
|
||||
class FunctionHtmlOrXmlCoarseParse : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "htmlOrXmlCoarseParse";
|
||||
|
||||
static FunctionPtr create(const Context &) {return std::make_shared<FunctionHtmlOrXmlCoarseParse>(); }
|
||||
|
||||
String getName() const override {return name;}
|
||||
|
||||
size_t getNumberOfArguments() const override {return 1;}
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
if (!isString(arguments[0]))
|
||||
throw Exception(
|
||||
"Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
return arguments[0];
|
||||
}
|
||||
|
||||
bool useDefaultImplementationForConstants() const override {return true;}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & , size_t) const override
|
||||
{
|
||||
const auto & strcolumn = arguments[0].column;
|
||||
if (const ColumnString* html_sentence = checkAndGetColumn<ColumnString>(strcolumn.get()))
|
||||
{
|
||||
auto col_res = ColumnString::create();
|
||||
HxCoarseParseImpl::executeInternal(html_sentence->getChars(), html_sentence->getOffsets(), col_res->getChars(), col_res->getOffsets());
|
||||
return col_res;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception("First argument for function " + getName() + " must be string.", ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
void registerFunctionHtmlOrXmlCoarseParse(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionHtmlOrXmlCoarseParse>();
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
@ -6,7 +6,9 @@ namespace DB
|
||||
{
|
||||
|
||||
class FunctionFactory;
|
||||
|
||||
#if USE_HYPERSCAN
|
||||
void registerFunctionHtmlOrXmlCoarseParse(FunctionFactory &);
|
||||
#endif
|
||||
void registerFunctionRepeat(FunctionFactory &);
|
||||
void registerFunctionEmpty(FunctionFactory &);
|
||||
void registerFunctionNotEmpty(FunctionFactory &);
|
||||
@ -45,6 +47,9 @@ void registerFunctionTryBase64Decode(FunctionFactory &);
|
||||
|
||||
void registerFunctionsString(FunctionFactory & factory)
|
||||
{
|
||||
#if USE_HYPERSCAN
|
||||
registerFunctionHtmlOrXmlCoarseParse(factory);
|
||||
#endif
|
||||
registerFunctionRepeat(factory);
|
||||
registerFunctionEmpty(factory);
|
||||
registerFunctionNotEmpty(factory);
|
||||
|
@ -291,6 +291,7 @@ SRCS(
|
||||
hasToken.cpp
|
||||
hasTokenCaseInsensitive.cpp
|
||||
hostName.cpp
|
||||
htmlOrXmlCoarseParse.cpp
|
||||
hypot.cpp
|
||||
identity.cpp
|
||||
if.cpp
|
||||
|
@ -1,57 +1,48 @@
|
||||
#include "ProtobufRowInputFormat.h"
|
||||
|
||||
#if USE_PROTOBUF
|
||||
#include <Core/Block.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <Formats/FormatSchemaInfo.h>
|
||||
#include <Formats/ProtobufSchemas.h>
|
||||
#include <Interpreters/Context.h>
|
||||
# include <Core/Block.h>
|
||||
# include <Formats/FormatFactory.h>
|
||||
# include <Formats/FormatSchemaInfo.h>
|
||||
# include <Formats/ProtobufReader.h>
|
||||
# include <Formats/ProtobufSchemas.h>
|
||||
# include <Formats/ProtobufSerializer.h>
|
||||
# include <Interpreters/Context.h>
|
||||
# include <ext/range.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
ProtobufRowInputFormat::ProtobufRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSchemaInfo & info_, const bool use_length_delimiters_)
|
||||
ProtobufRowInputFormat::ProtobufRowInputFormat(ReadBuffer & in_, const Block & header_, const Params & params_, const FormatSchemaInfo & schema_info_, bool with_length_delimiter_)
|
||||
: IRowInputFormat(header_, in_, params_)
|
||||
, data_types(header_.getDataTypes())
|
||||
, reader(in, ProtobufSchemas::instance().getMessageTypeForFormatSchema(info_), header_.getNames(), use_length_delimiters_)
|
||||
, reader(std::make_unique<ProtobufReader>(in_))
|
||||
, serializer(ProtobufSerializer::create(
|
||||
header_.getNames(),
|
||||
header_.getDataTypes(),
|
||||
missing_column_indices,
|
||||
*ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_),
|
||||
with_length_delimiter_,
|
||||
*reader))
|
||||
{
|
||||
}
|
||||
|
||||
ProtobufRowInputFormat::~ProtobufRowInputFormat() = default;
|
||||
|
||||
bool ProtobufRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & extra)
|
||||
bool ProtobufRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & row_read_extension)
|
||||
{
|
||||
if (!reader.startMessage())
|
||||
return false; // EOF reached, no more messages.
|
||||
if (reader->eof())
|
||||
return false;
|
||||
|
||||
// Set of columns for which the values were read. The rest will be filled with default values.
|
||||
auto & read_columns = extra.read_columns;
|
||||
read_columns.assign(columns.size(), false);
|
||||
size_t row_num = columns.empty() ? 0 : columns[0]->size();
|
||||
if (!row_num)
|
||||
serializer->setColumns(columns.data(), columns.size());
|
||||
|
||||
// Read values from this message and put them to the columns while it's possible.
|
||||
size_t column_index;
|
||||
while (reader.readColumnIndex(column_index))
|
||||
{
|
||||
bool allow_add_row = !static_cast<bool>(read_columns[column_index]);
|
||||
do
|
||||
{
|
||||
bool row_added;
|
||||
data_types[column_index]->deserializeProtobuf(*columns[column_index], reader, allow_add_row, row_added);
|
||||
if (row_added)
|
||||
{
|
||||
read_columns[column_index] = true;
|
||||
allow_add_row = false;
|
||||
}
|
||||
} while (reader.canReadMoreValues());
|
||||
}
|
||||
serializer->readRow(row_num);
|
||||
|
||||
// Fill non-visited columns with the default values.
|
||||
for (column_index = 0; column_index < read_columns.size(); ++column_index)
|
||||
if (!read_columns[column_index])
|
||||
data_types[column_index]->insertDefaultInto(*columns[column_index]);
|
||||
|
||||
reader.endMessage();
|
||||
row_read_extension.read_columns.clear();
|
||||
row_read_extension.read_columns.resize(columns.size(), true);
|
||||
for (size_t column_idx : missing_column_indices)
|
||||
row_read_extension.read_columns[column_idx] = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -62,14 +53,14 @@ bool ProtobufRowInputFormat::allowSyncAfterError() const
|
||||
|
||||
void ProtobufRowInputFormat::syncAfterError()
|
||||
{
|
||||
reader.endMessage(true);
|
||||
reader->endMessage(true);
|
||||
}
|
||||
|
||||
void registerInputFormatProcessorProtobuf(FormatFactory & factory)
|
||||
{
|
||||
for (bool use_length_delimiters : {false, true})
|
||||
for (bool with_length_delimiter : {false, true})
|
||||
{
|
||||
factory.registerInputFormatProcessor(use_length_delimiters ? "Protobuf" : "ProtobufSingle", [use_length_delimiters](
|
||||
factory.registerInputFormatProcessor(with_length_delimiter ? "Protobuf" : "ProtobufSingle", [with_length_delimiter](
|
||||
ReadBuffer & buf,
|
||||
const Block & sample,
|
||||
IRowInputFormat::Params params,
|
||||
@ -78,7 +69,7 @@ void registerInputFormatProcessorProtobuf(FormatFactory & factory)
|
||||
return std::make_shared<ProtobufRowInputFormat>(buf, sample, std::move(params),
|
||||
FormatSchemaInfo(settings.schema.format_schema, "Protobuf", true,
|
||||
settings.schema.is_server, settings.schema.format_schema_path),
|
||||
use_length_delimiters);
|
||||
with_length_delimiter);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -5,14 +5,14 @@
|
||||
#endif
|
||||
|
||||
#if USE_PROTOBUF
|
||||
# include <DataTypes/IDataType.h>
|
||||
# include <Formats/ProtobufReader.h>
|
||||
# include <Processors/Formats/IRowInputFormat.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class Block;
|
||||
class FormatSchemaInfo;
|
||||
class ProtobufReader;
|
||||
class ProtobufSerializer;
|
||||
|
||||
|
||||
/** Stream designed to deserialize data from the google protobuf format.
|
||||
@ -29,18 +29,19 @@ class FormatSchemaInfo;
|
||||
class ProtobufRowInputFormat : public IRowInputFormat
|
||||
{
|
||||
public:
|
||||
ProtobufRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSchemaInfo & info_, const bool use_length_delimiters_);
|
||||
ProtobufRowInputFormat(ReadBuffer & in_, const Block & header_, const Params & params_, const FormatSchemaInfo & schema_info_, bool with_length_delimiter_);
|
||||
~ProtobufRowInputFormat() override;
|
||||
|
||||
String getName() const override { return "ProtobufRowInputFormat"; }
|
||||
|
||||
bool readRow(MutableColumns & columns, RowReadExtension & extra) override;
|
||||
bool readRow(MutableColumns & columns, RowReadExtension &) override;
|
||||
bool allowSyncAfterError() const override;
|
||||
void syncAfterError() override;
|
||||
|
||||
private:
|
||||
DataTypes data_types;
|
||||
ProtobufReader reader;
|
||||
std::unique_ptr<ProtobufReader> reader;
|
||||
std::vector<size_t> missing_column_indices;
|
||||
std::unique_ptr<ProtobufSerializer> serializer;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,13 +1,13 @@
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include "ProtobufRowOutputFormat.h"
|
||||
|
||||
#if USE_PROTOBUF
|
||||
|
||||
#include <Core/Block.h>
|
||||
#include <Formats/FormatSchemaInfo.h>
|
||||
#include <Formats/ProtobufSchemas.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <google/protobuf/descriptor.h>
|
||||
# include <Formats/FormatFactory.h>
|
||||
# include <Core/Block.h>
|
||||
# include <Formats/FormatSchemaInfo.h>
|
||||
# include <Formats/ProtobufSchemas.h>
|
||||
# include <Formats/ProtobufSerializer.h>
|
||||
# include <Formats/ProtobufWriter.h>
|
||||
# include <google/protobuf/descriptor.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -20,58 +20,55 @@ namespace ErrorCodes
|
||||
|
||||
ProtobufRowOutputFormat::ProtobufRowOutputFormat(
|
||||
WriteBuffer & out_,
|
||||
const Block & header,
|
||||
const Block & header_,
|
||||
const RowOutputFormatParams & params_,
|
||||
const FormatSchemaInfo & format_schema,
|
||||
const FormatSettings & settings)
|
||||
: IRowOutputFormat(header, out_, params_)
|
||||
, data_types(header.getDataTypes())
|
||||
, writer(out,
|
||||
ProtobufSchemas::instance().getMessageTypeForFormatSchema(format_schema),
|
||||
header.getNames(), settings.protobuf.write_row_delimiters)
|
||||
, allow_only_one_row(
|
||||
!settings.protobuf.write_row_delimiters
|
||||
&& !settings.protobuf.allow_many_rows_no_delimiters)
|
||||
const FormatSchemaInfo & schema_info_,
|
||||
const FormatSettings & settings_,
|
||||
bool with_length_delimiter_)
|
||||
: IRowOutputFormat(header_, out_, params_)
|
||||
, writer(std::make_unique<ProtobufWriter>(out))
|
||||
, serializer(ProtobufSerializer::create(
|
||||
header_.getNames(),
|
||||
header_.getDataTypes(),
|
||||
*ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_),
|
||||
with_length_delimiter_,
|
||||
*writer))
|
||||
, allow_multiple_rows(with_length_delimiter_ || settings_.protobuf.allow_multiple_rows_without_delimiter)
|
||||
{
|
||||
value_indices.resize(header.columns());
|
||||
}
|
||||
|
||||
void ProtobufRowOutputFormat::write(const Columns & columns, size_t row_num)
|
||||
{
|
||||
if (allow_only_one_row && !first_row)
|
||||
{
|
||||
throw Exception("The ProtobufSingle format can't be used to write multiple rows because this format doesn't have any row delimiter.", ErrorCodes::NO_ROW_DELIMITER);
|
||||
}
|
||||
if (!allow_multiple_rows && !first_row)
|
||||
throw Exception(
|
||||
"The ProtobufSingle format can't be used to write multiple rows because this format doesn't have any row delimiter.",
|
||||
ErrorCodes::NO_ROW_DELIMITER);
|
||||
|
||||
writer.startMessage();
|
||||
std::fill(value_indices.begin(), value_indices.end(), 0);
|
||||
size_t column_index;
|
||||
while (writer.writeField(column_index))
|
||||
data_types[column_index]->serializeProtobuf(
|
||||
*columns[column_index], row_num, writer, value_indices[column_index]);
|
||||
writer.endMessage();
|
||||
if (!row_num)
|
||||
serializer->setColumns(columns.data(), columns.size());
|
||||
|
||||
serializer->writeRow(row_num);
|
||||
}
|
||||
|
||||
|
||||
void registerOutputFormatProcessorProtobuf(FormatFactory & factory)
|
||||
{
|
||||
for (bool write_row_delimiters : {false, true})
|
||||
for (bool with_length_delimiter : {false, true})
|
||||
{
|
||||
factory.registerOutputFormatProcessor(
|
||||
write_row_delimiters ? "Protobuf" : "ProtobufSingle",
|
||||
[write_row_delimiters](WriteBuffer & buf,
|
||||
with_length_delimiter ? "Protobuf" : "ProtobufSingle",
|
||||
[with_length_delimiter](WriteBuffer & buf,
|
||||
const Block & header,
|
||||
const RowOutputFormatParams & params,
|
||||
const FormatSettings & _settings)
|
||||
const FormatSettings & settings)
|
||||
{
|
||||
FormatSettings settings = _settings;
|
||||
settings.protobuf.write_row_delimiters = write_row_delimiters;
|
||||
return std::make_shared<ProtobufRowOutputFormat>(
|
||||
buf, header, params,
|
||||
FormatSchemaInfo(settings.schema.format_schema, "Protobuf",
|
||||
true, settings.schema.is_server,
|
||||
settings.schema.format_schema_path),
|
||||
settings);
|
||||
settings,
|
||||
with_length_delimiter);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -8,21 +8,16 @@
|
||||
# include <Core/Block.h>
|
||||
# include <Formats/FormatSchemaInfo.h>
|
||||
# include <Formats/FormatSettings.h>
|
||||
# include <Formats/ProtobufWriter.h>
|
||||
# include <Processors/Formats/IRowOutputFormat.h>
|
||||
|
||||
|
||||
namespace google
|
||||
{
|
||||
namespace protobuf
|
||||
{
|
||||
class Message;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class ProtobufWriter;
|
||||
class ProtobufSerializer;
|
||||
class FormatSchemaInfo;
|
||||
struct FormatSettings;
|
||||
|
||||
/** Stream designed to serialize data in the google protobuf format.
|
||||
* Each row is written as a separated message.
|
||||
*
|
||||
@ -38,10 +33,11 @@ class ProtobufRowOutputFormat : public IRowOutputFormat
|
||||
public:
|
||||
ProtobufRowOutputFormat(
|
||||
WriteBuffer & out_,
|
||||
const Block & header,
|
||||
const Block & header_,
|
||||
const RowOutputFormatParams & params_,
|
||||
const FormatSchemaInfo & format_schema,
|
||||
const FormatSettings & settings);
|
||||
const FormatSchemaInfo & schema_info_,
|
||||
const FormatSettings & settings_,
|
||||
bool with_length_delimiter_);
|
||||
|
||||
String getName() const override { return "ProtobufRowOutputFormat"; }
|
||||
|
||||
@ -50,10 +46,9 @@ public:
|
||||
std::string getContentType() const override { return "application/octet-stream"; }
|
||||
|
||||
private:
|
||||
DataTypes data_types;
|
||||
ProtobufWriter writer;
|
||||
std::vector<size_t> value_indices;
|
||||
const bool allow_only_one_row;
|
||||
std::unique_ptr<ProtobufWriter> writer;
|
||||
std::unique_ptr<ProtobufSerializer> serializer;
|
||||
const bool allow_multiple_rows;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -26,7 +26,7 @@ void KafkaBlockOutputStream::writePrefix()
|
||||
buffer = storage.createWriteBuffer(getHeader());
|
||||
|
||||
auto format_settings = getFormatSettings(*context);
|
||||
format_settings.protobuf.allow_many_rows_no_delimiters = true;
|
||||
format_settings.protobuf.allow_multiple_rows_without_delimiter = true;
|
||||
|
||||
child = FormatFactory::instance().getOutputStream(storage.getFormatName(), *buffer,
|
||||
getHeader(), *context,
|
||||
|
@ -34,7 +34,7 @@ void RabbitMQBlockOutputStream::writePrefix()
|
||||
buffer->activateWriting();
|
||||
|
||||
auto format_settings = getFormatSettings(context);
|
||||
format_settings.protobuf.allow_many_rows_no_delimiters = true;
|
||||
format_settings.protobuf.allow_multiple_rows_without_delimiter = true;
|
||||
|
||||
child = FormatFactory::instance().getOutputStream(storage.getFormatName(), *buffer,
|
||||
getHeader(), context,
|
||||
|
@ -342,3 +342,25 @@ def test_bridge_dies_with_parent(started_cluster):
|
||||
|
||||
assert clickhouse_pid is None
|
||||
assert bridge_pid is None
|
||||
|
||||
|
||||
def test_odbc_postgres_date_data_type(started_cluster):
|
||||
conn = get_postgres_conn();
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("CREATE TABLE IF NOT EXISTS clickhouse.test_date (column1 integer, column2 date)")
|
||||
|
||||
cursor.execute("INSERT INTO clickhouse.test_date VALUES (1, '2020-12-01')")
|
||||
cursor.execute("INSERT INTO clickhouse.test_date VALUES (2, '2020-12-02')")
|
||||
cursor.execute("INSERT INTO clickhouse.test_date VALUES (3, '2020-12-03')")
|
||||
conn.commit()
|
||||
|
||||
node1.query(
|
||||
'''
|
||||
CREATE TABLE test_date (column1 UInt64, column2 Date)
|
||||
ENGINE=ODBC('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_date')''')
|
||||
|
||||
expected = '1\t2020-12-01\n2\t2020-12-02\n3\t2020-12-03\n'
|
||||
result = node1.query('SELECT * FROM test_date');
|
||||
assert(result == expected)
|
||||
|
||||
|
||||
|
@ -0,0 +1,14 @@
|
||||
syntax = "proto3";
|
||||
|
||||
message ABC
|
||||
{
|
||||
message nested
|
||||
{
|
||||
message nested
|
||||
{
|
||||
repeated int32 c = 1;
|
||||
}
|
||||
repeated nested b = 1;
|
||||
}
|
||||
repeated nested a = 1;
|
||||
}
|
@ -0,0 +1,52 @@
|
||||
[[],[[]],[[1]],[[2,3],[4]]]
|
||||
[[[5,6,7]],[[8,9,10]]]
|
||||
|
||||
Binary representation:
|
||||
00000000 1a 0a 00 0a 02 0a 00 0a 05 0a 03 0a 01 01 0a 0b |................|
|
||||
00000010 0a 04 0a 02 02 03 0a 03 0a 01 04 12 0a 07 0a 05 |................|
|
||||
00000020 0a 03 05 06 07 0a 07 0a 05 0a 03 08 09 0a |..............|
|
||||
0000002e
|
||||
|
||||
MESSAGE #1 AT 0x00000001
|
||||
a {
|
||||
}
|
||||
a {
|
||||
b {
|
||||
}
|
||||
}
|
||||
a {
|
||||
b {
|
||||
c: 1
|
||||
}
|
||||
}
|
||||
a {
|
||||
b {
|
||||
c: 2
|
||||
c: 3
|
||||
}
|
||||
b {
|
||||
c: 4
|
||||
}
|
||||
}
|
||||
MESSAGE #2 AT 0x0000001C
|
||||
a {
|
||||
b {
|
||||
c: 5
|
||||
c: 6
|
||||
c: 7
|
||||
}
|
||||
}
|
||||
a {
|
||||
b {
|
||||
c: 8
|
||||
c: 9
|
||||
c: 10
|
||||
}
|
||||
}
|
||||
|
||||
Binary representation is as expected
|
||||
|
||||
[[],[[]],[[1]],[[2,3],[4]]]
|
||||
[[[5,6,7]],[[8,9,10]]]
|
||||
[[],[[]],[[1]],[[2,3],[4]]]
|
||||
[[[5,6,7]],[[8,9,10]]]
|
35
tests/queries/0_stateless/00825_protobuf_format_array_3dim.sh
Executable file
35
tests/queries/0_stateless/00825_protobuf_format_array_3dim.sh
Executable file
@ -0,0 +1,35 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
# Run the client.
|
||||
$CLICKHOUSE_CLIENT --multiquery <<'EOF'
|
||||
DROP TABLE IF EXISTS array_3dim_protobuf_00825;
|
||||
|
||||
CREATE TABLE array_3dim_protobuf_00825
|
||||
(
|
||||
`a_b_c` Array(Array(Array(Int32)))
|
||||
) ENGINE = MergeTree ORDER BY tuple();
|
||||
|
||||
INSERT INTO array_3dim_protobuf_00825 VALUES ([[], [[]], [[1]], [[2,3],[4]]]), ([[[5, 6, 7]], [[8, 9, 10]]]);
|
||||
|
||||
SELECT * FROM array_3dim_protobuf_00825;
|
||||
EOF
|
||||
|
||||
BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_array_3dim.XXXXXX.binary")
|
||||
$CLICKHOUSE_CLIENT --query "SELECT * FROM array_3dim_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_array_3dim:ABC'" > "$BINARY_FILE_PATH"
|
||||
|
||||
# Check the output in the protobuf format
|
||||
echo
|
||||
$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_array_3dim:ABC" --input "$BINARY_FILE_PATH"
|
||||
|
||||
# Check the input in the protobuf format (now the table contains the same data twice).
|
||||
echo
|
||||
$CLICKHOUSE_CLIENT --query "INSERT INTO array_3dim_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_array_3dim:ABC'" < "$BINARY_FILE_PATH"
|
||||
$CLICKHOUSE_CLIENT --query "SELECT * FROM array_3dim_protobuf_00825"
|
||||
|
||||
rm "$BINARY_FILE_PATH"
|
@ -0,0 +1,9 @@
|
||||
syntax = "proto3";
|
||||
|
||||
message AA {
|
||||
message nested_array {
|
||||
repeated double c = 2;
|
||||
}
|
||||
string a = 1;
|
||||
repeated nested_array b = 2;
|
||||
}
|
@ -0,0 +1,41 @@
|
||||
one [[1,2,3],[0.5,0.25],[],[4,5],[0.125,0.0625],[6]]
|
||||
|
||||
Binary representation:
|
||||
00000000 6b 0a 03 6f 6e 65 12 1a 12 18 00 00 00 00 00 00 |k..one..........|
|
||||
00000010 f0 3f 00 00 00 00 00 00 00 40 00 00 00 00 00 00 |.?.......@......|
|
||||
00000020 08 40 12 12 12 10 00 00 00 00 00 00 e0 3f 00 00 |.@...........?..|
|
||||
00000030 00 00 00 00 d0 3f 12 00 12 12 12 10 00 00 00 00 |.....?..........|
|
||||
00000040 00 00 10 40 00 00 00 00 00 00 14 40 12 12 12 10 |...@.......@....|
|
||||
00000050 00 00 00 00 00 00 c0 3f 00 00 00 00 00 00 b0 3f |.......?.......?|
|
||||
00000060 12 0a 12 08 00 00 00 00 00 00 18 40 |...........@|
|
||||
0000006c
|
||||
|
||||
MESSAGE #1 AT 0x00000001
|
||||
a: "one"
|
||||
b {
|
||||
c: 1
|
||||
c: 2
|
||||
c: 3
|
||||
}
|
||||
b {
|
||||
c: 0.5
|
||||
c: 0.25
|
||||
}
|
||||
b {
|
||||
}
|
||||
b {
|
||||
c: 4
|
||||
c: 5
|
||||
}
|
||||
b {
|
||||
c: 0.125
|
||||
c: 0.0625
|
||||
}
|
||||
b {
|
||||
c: 6
|
||||
}
|
||||
|
||||
Binary representation is as expected
|
||||
|
||||
one [[1,2,3],[0.5,0.25],[],[4,5],[0.125,0.0625],[6]]
|
||||
one [[1,2,3],[0.5,0.25],[],[4,5],[0.125,0.0625],[6]]
|
38
tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.sh
Executable file
38
tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.sh
Executable file
@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# https://github.com/ClickHouse/ClickHouse/issues/9069
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
# Run the client.
|
||||
$CLICKHOUSE_CLIENT --multiquery <<'EOF'
|
||||
CREATE TABLE array_of_arrays_protobuf_00825
|
||||
(
|
||||
`a` String,
|
||||
`b` Nested (
|
||||
`c` Array(Float64)
|
||||
)
|
||||
) ENGINE = MergeTree ORDER BY tuple();
|
||||
|
||||
INSERT INTO array_of_arrays_protobuf_00825 VALUES ('one', [[1,2,3],[0.5,0.25],[],[4,5],[0.125,0.0625],[6]]);
|
||||
|
||||
SELECT * FROM array_of_arrays_protobuf_00825;
|
||||
EOF
|
||||
|
||||
BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_array_of_arrays.XXXXXX.binary")
|
||||
$CLICKHOUSE_CLIENT --query "SELECT * FROM array_of_arrays_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_array_of_arrays:AA'" > "$BINARY_FILE_PATH"
|
||||
|
||||
# Check the output in the protobuf format
|
||||
echo
|
||||
$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_array_of_arrays:AA" --input "$BINARY_FILE_PATH"
|
||||
|
||||
# Check the input in the protobuf format (now the table contains the same data twice).
|
||||
echo
|
||||
$CLICKHOUSE_CLIENT --query "INSERT INTO array_of_arrays_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_array_of_arrays:AA'" < "$BINARY_FILE_PATH"
|
||||
$CLICKHOUSE_CLIENT --query "SELECT * FROM array_of_arrays_protobuf_00825"
|
||||
|
||||
rm "$BINARY_FILE_PATH"
|
@ -0,0 +1,13 @@
|
||||
syntax = "proto3";
|
||||
|
||||
message Message
|
||||
{
|
||||
enum Enum
|
||||
{
|
||||
FIRST = 0;
|
||||
SECOND = 1;
|
||||
TEN = 10;
|
||||
HUNDRED = 100;
|
||||
};
|
||||
Enum x = 1;
|
||||
};
|
@ -0,0 +1,31 @@
|
||||
Second
|
||||
Third
|
||||
First
|
||||
First
|
||||
Second
|
||||
|
||||
Binary representation:
|
||||
00000000 02 08 01 02 08 64 00 00 02 08 01 |.....d.....|
|
||||
0000000b
|
||||
|
||||
MESSAGE #1 AT 0x00000001
|
||||
x: SECOND
|
||||
MESSAGE #2 AT 0x00000004
|
||||
x: HUNDRED
|
||||
MESSAGE #3 AT 0x00000007
|
||||
MESSAGE #4 AT 0x00000008
|
||||
MESSAGE #5 AT 0x00000009
|
||||
x: SECOND
|
||||
|
||||
Binary representation is as expected
|
||||
|
||||
Second
|
||||
Third
|
||||
First
|
||||
First
|
||||
Second
|
||||
Second
|
||||
Third
|
||||
First
|
||||
First
|
||||
Second
|
37
tests/queries/0_stateless/00825_protobuf_format_enum_mapping.sh
Executable file
37
tests/queries/0_stateless/00825_protobuf_format_enum_mapping.sh
Executable file
@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# https://github.com/ClickHouse/ClickHouse/issues/7438
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
# Run the client.
|
||||
$CLICKHOUSE_CLIENT --multiquery <<'EOF'
|
||||
DROP TABLE IF EXISTS enum_mapping_protobuf_00825;
|
||||
|
||||
CREATE TABLE enum_mapping_protobuf_00825
|
||||
(
|
||||
x Enum16('First'=-100, 'Second'=0, 'Third'=100)
|
||||
) ENGINE = MergeTree ORDER BY tuple();
|
||||
|
||||
INSERT INTO enum_mapping_protobuf_00825 VALUES ('Second'), ('Third'), ('First'), ('First'), ('Second');
|
||||
|
||||
SELECT * FROM enum_mapping_protobuf_00825;
|
||||
EOF
|
||||
|
||||
BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_enum_mapping.XXXXXX.binary")
|
||||
$CLICKHOUSE_CLIENT --query "SELECT * FROM enum_mapping_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_enum_mapping:Message'" > "$BINARY_FILE_PATH"
|
||||
|
||||
# Check the output in the protobuf format
|
||||
echo
|
||||
$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_enum_mapping:Message" --input "$BINARY_FILE_PATH"
|
||||
|
||||
# Check the input in the protobuf format (now the table contains the same data twice).
|
||||
echo
|
||||
$CLICKHOUSE_CLIENT --query "INSERT INTO enum_mapping_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_enum_mapping:Message'" < "$BINARY_FILE_PATH"
|
||||
$CLICKHOUSE_CLIENT --query "SELECT * FROM enum_mapping_protobuf_00825"
|
||||
|
||||
rm "$BINARY_FILE_PATH"
|
@ -0,0 +1,5 @@
|
||||
syntax = "proto3";
|
||||
|
||||
message Message {
|
||||
map<string, uint32> a = 1;
|
||||
};
|
@ -0,0 +1,19 @@
|
||||
{'x':5,'y':7}
|
||||
{'z':11}
|
||||
{'temp':0}
|
||||
{'':0}
|
||||
|
||||
Binary representation:
|
||||
00000000 0e 0a 05 0a 01 78 10 05 0a 05 0a 01 79 10 07 07 |.....x......y...|
|
||||
00000010 0a 05 0a 01 7a 10 0b 0a 0a 08 0a 04 74 65 6d 70 |....z.......temp|
|
||||
00000020 10 00 06 0a 04 0a 00 10 00 |.........|
|
||||
00000029
|
||||
|
||||
{'x':5,'y':7}
|
||||
{'z':11}
|
||||
{'temp':0}
|
||||
{'':0}
|
||||
{'x':5,'y':7}
|
||||
{'z':11}
|
||||
{'temp':0}
|
||||
{'':0}
|
40
tests/queries/0_stateless/00825_protobuf_format_map.sh
Executable file
40
tests/queries/0_stateless/00825_protobuf_format_map.sh
Executable file
@ -0,0 +1,40 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# https://github.com/ClickHouse/ClickHouse/issues/6497
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
# Run the client.
|
||||
$CLICKHOUSE_CLIENT --multiquery <<'EOF'
|
||||
SET allow_experimental_map_type = 1;
|
||||
|
||||
DROP TABLE IF EXISTS map_00825;
|
||||
|
||||
CREATE TABLE map_00825
|
||||
(
|
||||
a Map(String, UInt32)
|
||||
) ENGINE = MergeTree ORDER BY tuple();
|
||||
|
||||
INSERT INTO map_00825 VALUES ({'x':5, 'y':7}), ({'z':11}), ({'temp':0}), ({'':0});
|
||||
|
||||
SELECT * FROM map_00825;
|
||||
EOF
|
||||
|
||||
BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_map.XXXXXX.binary")
|
||||
$CLICKHOUSE_CLIENT --query "SELECT * FROM map_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_map:Message'" > "$BINARY_FILE_PATH"
|
||||
|
||||
# Check the output in the protobuf format
|
||||
echo
|
||||
echo "Binary representation:"
|
||||
hexdump -C $BINARY_FILE_PATH
|
||||
|
||||
# Check the input in the protobuf format (now the table contains the same data twice).
|
||||
echo
|
||||
$CLICKHOUSE_CLIENT --query "INSERT INTO map_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_map:Message'" < "$BINARY_FILE_PATH"
|
||||
$CLICKHOUSE_CLIENT --query "SELECT * FROM map_00825"
|
||||
|
||||
rm "$BINARY_FILE_PATH"
|
@ -0,0 +1,10 @@
|
||||
syntax = "proto3";
|
||||
|
||||
message Repeated {
|
||||
string foo = 1;
|
||||
int64 bar = 2;
|
||||
}
|
||||
|
||||
message Message {
|
||||
repeated Repeated messages = 1;
|
||||
};
|
@ -0,0 +1,25 @@
|
||||
['1'] [0]
|
||||
['1',''] [0,1]
|
||||
|
||||
Binary representation:
|
||||
00000000 05 0a 03 0a 01 31 09 0a 03 0a 01 31 0a 02 10 01 |.....1.....1....|
|
||||
00000010
|
||||
|
||||
MESSAGE #1 AT 0x00000001
|
||||
messages {
|
||||
foo: "1"
|
||||
}
|
||||
MESSAGE #2 AT 0x00000007
|
||||
messages {
|
||||
foo: "1"
|
||||
}
|
||||
messages {
|
||||
bar: 1
|
||||
}
|
||||
|
||||
Binary representation is as expected
|
||||
|
||||
['1'] [0]
|
||||
['1',''] [0,1]
|
||||
['1'] [0]
|
||||
['1',''] [0,1]
|
41
tests/queries/0_stateless/00825_protobuf_format_nested_optional.sh
Executable file
41
tests/queries/0_stateless/00825_protobuf_format_nested_optional.sh
Executable file
@ -0,0 +1,41 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# https://github.com/ClickHouse/ClickHouse/issues/6497
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
# Run the client.
|
||||
$CLICKHOUSE_CLIENT --multiquery <<'EOF'
|
||||
DROP TABLE IF EXISTS nested_optional_protobuf_00825;
|
||||
|
||||
CREATE TABLE nested_optional_protobuf_00825
|
||||
(
|
||||
messages Nested
|
||||
(
|
||||
foo String,
|
||||
bar Int64
|
||||
)
|
||||
) ENGINE = MergeTree ORDER BY tuple();
|
||||
|
||||
INSERT INTO nested_optional_protobuf_00825 VALUES (['1'], [0]), (['1', ''], [0, 1]);
|
||||
|
||||
SELECT * FROM nested_optional_protobuf_00825;
|
||||
EOF
|
||||
|
||||
BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_nested_optional.XXXXXX.binary")
|
||||
$CLICKHOUSE_CLIENT --query "SELECT * FROM nested_optional_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_nested_optional:Message'" > "$BINARY_FILE_PATH"
|
||||
|
||||
# Check the output in the protobuf format
|
||||
echo
|
||||
$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_nested_optional:Message" --input "$BINARY_FILE_PATH"
|
||||
|
||||
# Check the input in the protobuf format (now the table contains the same data twice).
|
||||
echo
|
||||
$CLICKHOUSE_CLIENT --query "INSERT INTO nested_optional_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_nested_optional:Message'" < "$BINARY_FILE_PATH"
|
||||
$CLICKHOUSE_CLIENT --query "SELECT * FROM nested_optional_protobuf_00825"
|
||||
|
||||
rm "$BINARY_FILE_PATH"
|
@ -0,0 +1,6 @@
|
||||
syntax = "proto3";
|
||||
|
||||
message Message {
|
||||
sint32 x = 1;
|
||||
sint32 z = 2;
|
||||
};
|
@ -0,0 +1,37 @@
|
||||
0 0 0
|
||||
2 4 8
|
||||
3 9 27
|
||||
5 25 125
|
||||
101 102 103
|
||||
|
||||
Binary representation:
|
||||
00000000 00 04 08 04 10 10 04 08 06 10 36 05 08 0a 10 fa |..........6.....|
|
||||
00000010 01 06 08 ca 01 10 ce 01 |........|
|
||||
00000018
|
||||
|
||||
MESSAGE #1 AT 0x00000001
|
||||
MESSAGE #2 AT 0x00000002
|
||||
x: 2
|
||||
z: 8
|
||||
MESSAGE #3 AT 0x00000007
|
||||
x: 3
|
||||
z: 27
|
||||
MESSAGE #4 AT 0x0000000C
|
||||
x: 5
|
||||
z: 125
|
||||
MESSAGE #5 AT 0x00000012
|
||||
x: 101
|
||||
z: 103
|
||||
|
||||
Binary representation is as expected
|
||||
|
||||
0 0 0
|
||||
0 0 0
|
||||
2 4 8
|
||||
2 4 8
|
||||
3 9 27
|
||||
3 9 27
|
||||
5 25 125
|
||||
5 25 125
|
||||
101 102 103
|
||||
101 10201 103
|
38
tests/queries/0_stateless/00825_protobuf_format_table_default.sh
Executable file
38
tests/queries/0_stateless/00825_protobuf_format_table_default.sh
Executable file
@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
# Run the client.
|
||||
$CLICKHOUSE_CLIENT --multiquery <<'EOF'
|
||||
DROP TABLE IF EXISTS table_default_protobuf_00825;
|
||||
|
||||
CREATE TABLE table_default_protobuf_00825
|
||||
(
|
||||
x Int64,
|
||||
y Int64 DEFAULT x * x,
|
||||
z Int64 DEFAULT x * x * x
|
||||
) ENGINE = MergeTree ORDER BY tuple();
|
||||
|
||||
INSERT INTO table_default_protobuf_00825 (x) VALUES (0), (2), (3), (5);
|
||||
INSERT INTO table_default_protobuf_00825 VALUES (101, 102, 103);
|
||||
|
||||
SELECT * FROM table_default_protobuf_00825 ORDER BY x,y,z;
|
||||
EOF
|
||||
|
||||
BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_table_default.XXXXXX.binary")
|
||||
$CLICKHOUSE_CLIENT --query "SELECT * FROM table_default_protobuf_00825 ORDER BY x,y,z FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_table_default:Message'" > "$BINARY_FILE_PATH"
|
||||
|
||||
# Check the output in the protobuf format
|
||||
echo
|
||||
$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_table_default:Message" --input "$BINARY_FILE_PATH"
|
||||
|
||||
# Check the input in the protobuf format (now the table contains the same data twice).
|
||||
echo
|
||||
$CLICKHOUSE_CLIENT --query "INSERT INTO table_default_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_table_default:Message'" < "$BINARY_FILE_PATH"
|
||||
$CLICKHOUSE_CLIENT --query "SELECT * FROM table_default_protobuf_00825 ORDER BY x,y,z"
|
||||
|
||||
rm "$BINARY_FILE_PATH"
|
@ -0,0 +1,9 @@
|
||||
|
||||
|
||||
Here is CDTATA.
|
||||
This is a white space test.
|
||||
This is a complex test. <script type="text/javascript">Hello, world</script> world <style> hello
|
||||
hello, world
|
||||
|
||||
hello, world
|
||||
white space collapse
|
15
tests/queries/0_stateless/01674_htm_xml_coarse_parse.sql
Normal file
15
tests/queries/0_stateless/01674_htm_xml_coarse_parse.sql
Normal file
@ -0,0 +1,15 @@
|
||||
SELECT htmlOrXmlCoarseParse('<script>Here is script.</script>');
|
||||
SELECT htmlOrXmlCoarseParse('<style>Here is style.</style>');
|
||||
SELECT htmlOrXmlCoarseParse('<![CDATA[Here is CDTATA.]]>');
|
||||
SELECT htmlOrXmlCoarseParse('This is a white space test.');
|
||||
SELECT htmlOrXmlCoarseParse('This is a complex test. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"\n "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><![CDATA[<script type="text/javascript">Hello, world</script> ]]><hello />world<![CDATA[ <style> ]]> hello</style>\n<script><![CDATA[</script>]]>hello</script>\n</html>');
|
||||
DROP TABLE IF EXISTS defaults;
|
||||
CREATE TABLE defaults
|
||||
(
|
||||
stringColumn String
|
||||
) ENGINE = Memory();
|
||||
|
||||
INSERT INTO defaults values ('<common tag>hello, world<tag>'), ('<script desc=content> some content </script>'), ('<![CDATA[hello, world]]>'), ('white space collapse');
|
||||
|
||||
SELECT htmlOrXmlCoarseParse(stringColumn) FROM defaults;
|
||||
DROP table defaults;
|
@ -0,0 +1 @@
|
||||
1 First
|
@ -0,0 +1,28 @@
|
||||
DROP DATABASE IF EXISTS 01720_dictionary_db;
|
||||
CREATE DATABASE 01720_dictionary_db;
|
||||
|
||||
CREATE TABLE 01720_dictionary_db.dictionary_source_table
|
||||
(
|
||||
key UInt8,
|
||||
value String
|
||||
)
|
||||
ENGINE = TinyLog;
|
||||
|
||||
INSERT INTO 01720_dictionary_db.dictionary_source_table VALUES (1, 'First');
|
||||
|
||||
CREATE DICTIONARY 01720_dictionary_db.dictionary
|
||||
(
|
||||
key UInt64,
|
||||
value String
|
||||
)
|
||||
PRIMARY KEY key
|
||||
SOURCE(CLICKHOUSE(DB '01720_dictionary_db' TABLE 'dictionary_source_table' HOST hostName() PORT tcpPort()))
|
||||
LIFETIME(0)
|
||||
LAYOUT(FLAT());
|
||||
|
||||
SELECT * FROM 01720_dictionary_db.dictionary;
|
||||
|
||||
DROP DICTIONARY 01720_dictionary_db.dictionary;
|
||||
DROP TABLE 01720_dictionary_db.dictionary_source_table;
|
||||
|
||||
DROP DATABASE 01720_dictionary_db;
|
@ -197,6 +197,7 @@
|
||||
01181_db_atomic_drop_on_cluster
|
||||
01658_test_base64Encode_mysql_compatibility
|
||||
01659_test_base64Decode_mysql_compatibility
|
||||
01674_htm_xml_coarse_parse
|
||||
01675_data_type_coroutine
|
||||
01676_clickhouse_client_autocomplete
|
||||
01671_aggregate_function_group_bitmap_data
|
||||
|
180
tests/queries/0_stateless/helpers/protobuf_length_delimited_encoder.py
Executable file
180
tests/queries/0_stateless/helpers/protobuf_length_delimited_encoder.py
Executable file
@ -0,0 +1,180 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# The protobuf compiler protoc doesn't support encoding or decoding length-delimited protobuf message.
|
||||
# To do that this script has been written.
|
||||
|
||||
import argparse
|
||||
import os.path
|
||||
import struct
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
def read_varint(input):
|
||||
res = 0
|
||||
shift = 0
|
||||
while True:
|
||||
c = input.read(1)
|
||||
if len(c) == 0:
|
||||
return None
|
||||
b = c[0]
|
||||
if b < 0x80:
|
||||
res += b << shift
|
||||
break
|
||||
b -= 0x80
|
||||
res += b << shift
|
||||
shift = shift << 7
|
||||
return res
|
||||
|
||||
def write_varint(output, value):
|
||||
while True:
|
||||
if value < 0x80:
|
||||
b = value
|
||||
output.write(b.to_bytes(1, byteorder='little'))
|
||||
break
|
||||
b = (value & 0x7F) + 0x80
|
||||
output.write(b.to_bytes(1, byteorder='little'))
|
||||
value = value >> 7
|
||||
|
||||
def write_hexdump(output, data):
|
||||
with subprocess.Popen(["hexdump", "-C"], stdin=subprocess.PIPE, stdout=output, shell=False) as proc:
|
||||
proc.communicate(data)
|
||||
if proc.returncode != 0:
|
||||
raise RuntimeError("hexdump returned code " + str(proc.returncode))
|
||||
output.flush()
|
||||
|
||||
class FormatSchemaSplitted:
|
||||
def __init__(self, format_schema):
|
||||
self.format_schema = format_schema
|
||||
splitted = self.format_schema.split(':')
|
||||
if len(splitted) < 2:
|
||||
raise RuntimeError('The format schema must have the format "schemafile:MessageType"')
|
||||
path = splitted[0]
|
||||
self.schemadir = os.path.dirname(path)
|
||||
self.schemaname = os.path.basename(path)
|
||||
if not self.schemaname.endswith(".proto"):
|
||||
self.schemaname = self.schemaname + ".proto"
|
||||
self.message_type = splitted[1]
|
||||
|
||||
def decode(input, output, format_schema):
|
||||
if not type(format_schema) is FormatSchemaSplitted:
|
||||
format_schema = FormatSchemaSplitted(format_schema)
|
||||
msgindex = 1
|
||||
while True:
|
||||
sz = read_varint(input)
|
||||
if sz is None:
|
||||
break
|
||||
output.write("MESSAGE #{msgindex} AT 0x{msgoffset:08X}\n".format(msgindex=msgindex, msgoffset=input.tell()).encode())
|
||||
output.flush()
|
||||
msg = input.read(sz)
|
||||
if len(msg) < sz:
|
||||
raise EOFError('Unexpected end of file')
|
||||
with subprocess.Popen(["protoc",
|
||||
"--decode", format_schema.message_type, format_schema.schemaname],
|
||||
cwd=format_schema.schemadir,
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=output,
|
||||
shell=False) as proc:
|
||||
proc.communicate(msg)
|
||||
if proc.returncode != 0:
|
||||
raise RuntimeError("protoc returned code " + str(proc.returncode))
|
||||
output.flush()
|
||||
msgindex = msgindex + 1
|
||||
|
||||
def encode(input, output, format_schema):
|
||||
if not type(format_schema) is FormatSchemaSplitted:
|
||||
format_schema = FormatSchemaSplitted(format_schema)
|
||||
line_offset = input.tell()
|
||||
line = input.readline()
|
||||
while True:
|
||||
if len(line) == 0:
|
||||
break
|
||||
if not line.startswith(b"MESSAGE #"):
|
||||
raise RuntimeError("The line at 0x{line_offset:08X} must start with the text 'MESSAGE #'".format(line_offset=line_offset))
|
||||
msg = b""
|
||||
while True:
|
||||
line_offset = input.tell()
|
||||
line = input.readline()
|
||||
if line.startswith(b"MESSAGE #") or len(line) == 0:
|
||||
break
|
||||
msg += line
|
||||
with subprocess.Popen(["protoc",
|
||||
"--encode", format_schema.message_type, format_schema.schemaname],
|
||||
cwd=format_schema.schemadir,
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
shell=False) as proc:
|
||||
msgbin = proc.communicate(msg)[0]
|
||||
if proc.returncode != 0:
|
||||
raise RuntimeError("protoc returned code " + str(proc.returncode))
|
||||
write_varint(output, len(msgbin))
|
||||
output.write(msgbin)
|
||||
output.flush()
|
||||
|
||||
def decode_and_check(input, output, format_schema):
|
||||
input_data = input.read()
|
||||
output.write(b"Binary representation:\n")
|
||||
output.flush()
|
||||
write_hexdump(output, input_data)
|
||||
output.write(b"\n")
|
||||
output.flush()
|
||||
|
||||
with tempfile.TemporaryFile() as tmp_input, tempfile.TemporaryFile() as tmp_decoded, tempfile.TemporaryFile() as tmp_encoded:
|
||||
tmp_input.write(input_data)
|
||||
tmp_input.flush()
|
||||
tmp_input.seek(0)
|
||||
decode(tmp_input, tmp_decoded, format_schema)
|
||||
tmp_decoded.seek(0)
|
||||
decoded_text = tmp_decoded.read()
|
||||
output.write(decoded_text)
|
||||
output.flush()
|
||||
tmp_decoded.seek(0)
|
||||
encode(tmp_decoded, tmp_encoded, format_schema)
|
||||
tmp_encoded.seek(0)
|
||||
encoded_data = tmp_encoded.read()
|
||||
|
||||
if encoded_data == input_data:
|
||||
output.write(b"\nBinary representation is as expected\n")
|
||||
output.flush()
|
||||
else:
|
||||
output.write(b"\nBinary representation differs from the expected one (listed below):\n")
|
||||
output.flush()
|
||||
write_hexdump(output, encoded_data)
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description='Encodes or decodes length-delimited protobuf messages.')
|
||||
parser.add_argument('--input', help='The input file, the standard input will be used if not specified.')
|
||||
parser.add_argument('--output', help='The output file, the standard output will be used if not specified')
|
||||
parser.add_argument('--format_schema', required=True, help='Format schema in the format "schemafile:MessageType"')
|
||||
group = parser.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument('--encode', action='store_true', help='Specify to encode length-delimited messages.'
|
||||
'The utility will read text-format messages of the given type from the input and write it in binary to the output.')
|
||||
group.add_argument('--decode', action='store_true', help='Specify to decode length-delimited messages.'
|
||||
'The utility will read messages in binary from the input and write text-format messages to the output.')
|
||||
group.add_argument('--decode_and_check', action='store_true', help='The same as --decode, and the utility will then encode '
|
||||
' the decoded data back to the binary form to check that the result of that encoding is the same as the input was.')
|
||||
args = parser.parse_args()
|
||||
|
||||
custom_input_file = None
|
||||
custom_output_file = None
|
||||
try:
|
||||
if args.input:
|
||||
custom_input_file = open(args.input, "rb")
|
||||
if args.output:
|
||||
custom_output_file = open(args.output, "wb")
|
||||
input = custom_input_file if custom_input_file else sys.stdin.buffer
|
||||
output = custom_output_file if custom_output_file else sys.stdout.buffer
|
||||
|
||||
if args.encode:
|
||||
encode(input, output, args.format_schema)
|
||||
elif args.decode:
|
||||
decode(input, output, args.format_schema)
|
||||
elif args.decode_and_check:
|
||||
decode_and_check(input, output, args.format_schema)
|
||||
|
||||
finally:
|
||||
if custom_input_file:
|
||||
custom_input_file.close()
|
||||
if custom_output_file:
|
||||
custom_output_file.close()
|
@ -122,6 +122,12 @@
|
||||
"00763_create_query_as_table_engine_bug",
|
||||
"00765_sql_compatibility_aliases",
|
||||
"00825_protobuf_format_input",
|
||||
"00825_protobuf_format_nested_optional",
|
||||
"00825_protobuf_format_array_3dim",
|
||||
"00825_protobuf_format_map",
|
||||
"00825_protobuf_format_array_of_arrays",
|
||||
"00825_protobuf_format_table_default",
|
||||
"00825_protobuf_format_enum_mapping",
|
||||
"00826_cross_to_inner_join",
|
||||
"00834_not_between",
|
||||
"00909_kill_not_initialized_query",
|
||||
|
Loading…
Reference in New Issue
Block a user