mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 07:31:57 +00:00
Merge branch 'master' into in-memory-compression
This commit is contained in:
commit
a9d9a6d56f
@ -37,15 +37,13 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE)
|
|||||||
#
|
#
|
||||||
# - 4.0+ ccache always includes this environment variable into the hash
|
# - 4.0+ ccache always includes this environment variable into the hash
|
||||||
# of the manifest, which do not allow to use previous cache,
|
# of the manifest, which do not allow to use previous cache,
|
||||||
# - 4.2+ ccache ignores SOURCE_DATE_EPOCH under time_macros sloppiness.
|
# - 4.2+ ccache ignores SOURCE_DATE_EPOCH for every file w/o __DATE__/__TIME__
|
||||||
#
|
#
|
||||||
# So for:
|
# So for:
|
||||||
# - 4.2+ time_macros sloppiness is used,
|
# - 4.2+ does not require any sloppiness
|
||||||
# - 4.0+ will ignore SOURCE_DATE_EPOCH environment variable.
|
# - 4.0+ will ignore SOURCE_DATE_EPOCH environment variable.
|
||||||
if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.2")
|
if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.2")
|
||||||
message(STATUS "Use time_macros sloppiness for ccache")
|
message(STATUS "ccache is 4.2+ no quirks for SOURCE_DATE_EPOCH required")
|
||||||
set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_FOUND} --set-config=sloppiness=time_macros")
|
|
||||||
set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK "${CCACHE_FOUND} --set-config=sloppiness=time_macros")
|
|
||||||
elseif (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0")
|
elseif (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0")
|
||||||
message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache")
|
message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache")
|
||||||
set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}")
|
set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}")
|
||||||
|
@ -342,9 +342,10 @@ function run_tests
|
|||||||
|
|
||||||
# JSON functions
|
# JSON functions
|
||||||
01666_blns
|
01666_blns
|
||||||
|
01674_htm_xml_coarse_parse
|
||||||
)
|
)
|
||||||
|
|
||||||
time clickhouse-test --hung-check -j 8 --order=random --use-skip-list --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
|
(time clickhouse-test --hung-check -j 8 --order=random --use-skip-list --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 ||:) | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
|
||||||
|
|
||||||
# substr is to remove semicolon after test name
|
# substr is to remove semicolon after test name
|
||||||
readarray -t FAILED_TESTS < <(awk '/\[ FAIL|TIMEOUT|ERROR \]/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt")
|
readarray -t FAILED_TESTS < <(awk '/\[ FAIL|TIMEOUT|ERROR \]/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt")
|
||||||
|
@ -13,6 +13,7 @@ RUN apt-get update -y \
|
|||||||
ncdu \
|
ncdu \
|
||||||
netcat-openbsd \
|
netcat-openbsd \
|
||||||
openssl \
|
openssl \
|
||||||
|
protobuf-compiler \
|
||||||
python3 \
|
python3 \
|
||||||
python3-lxml \
|
python3-lxml \
|
||||||
python3-requests \
|
python3-requests \
|
||||||
|
@ -25,6 +25,7 @@ The following actions are supported:
|
|||||||
- [COMMENT COLUMN](#alter_comment-column) — Adds a text comment to the column.
|
- [COMMENT COLUMN](#alter_comment-column) — Adds a text comment to the column.
|
||||||
- [MODIFY COLUMN](#alter_modify-column) — Changes column’s type, default expression and TTL.
|
- [MODIFY COLUMN](#alter_modify-column) — Changes column’s type, default expression and TTL.
|
||||||
- [MODIFY COLUMN REMOVE](#modify-remove) — Removes one of the column properties.
|
- [MODIFY COLUMN REMOVE](#modify-remove) — Removes one of the column properties.
|
||||||
|
- [RENAME COLUMN](#alter_rename-column) — Renames an existing column.
|
||||||
|
|
||||||
These actions are described in detail below.
|
These actions are described in detail below.
|
||||||
|
|
||||||
@ -183,6 +184,22 @@ ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
|
|||||||
|
|
||||||
- [REMOVE TTL](ttl.md).
|
- [REMOVE TTL](ttl.md).
|
||||||
|
|
||||||
|
## RENAME COLUMN {#alter_rename-column}
|
||||||
|
|
||||||
|
Renames an existing column.
|
||||||
|
|
||||||
|
Syntax:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
ALTER TABLE table_name RENAME COLUMN column_name TO new_column_name;
|
||||||
|
```
|
||||||
|
|
||||||
|
**Example**
|
||||||
|
|
||||||
|
```sql
|
||||||
|
ALTER TABLE table_with_ttl RENAME COLUMN column_ttl TO column_ttl_new;
|
||||||
|
```
|
||||||
|
|
||||||
## Limitations {#alter-query-limitations}
|
## Limitations {#alter-query-limitations}
|
||||||
|
|
||||||
The `ALTER` query lets you create and delete separate elements (columns) in nested data structures, but not whole nested data structures. To add a nested data structure, you can add columns with a name like `name.nested_name` and the type `Array(T)`. A nested data structure is equivalent to multiple array columns with a name that has the same prefix before the dot.
|
The `ALTER` query lets you create and delete separate elements (columns) in nested data structures, but not whole nested data structures. To add a nested data structure, you can add columns with a name like `name.nested_name` and the type `Array(T)`. A nested data structure is equivalent to multiple array columns with a name that has the same prefix before the dot.
|
||||||
|
@ -474,4 +474,19 @@ ColumnPtr ColumnFixedString::compress() const
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void ColumnFixedString::alignStringLength(ColumnFixedString::Chars & data, size_t n, size_t old_size)
|
||||||
|
{
|
||||||
|
size_t length = data.size() - old_size;
|
||||||
|
if (length < n)
|
||||||
|
{
|
||||||
|
data.resize_fill(old_size + n);
|
||||||
|
}
|
||||||
|
else if (length > n)
|
||||||
|
{
|
||||||
|
data.resize_assume_reserved(old_size);
|
||||||
|
throw Exception("Too large value for FixedString(" + std::to_string(n) + ")", ErrorCodes::TOO_LARGE_STRING_SIZE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -184,7 +184,8 @@ public:
|
|||||||
const Chars & getChars() const { return chars; }
|
const Chars & getChars() const { return chars; }
|
||||||
|
|
||||||
size_t getN() const { return n; }
|
size_t getN() const { return n; }
|
||||||
|
|
||||||
|
static void alignStringLength(ColumnFixedString::Chars & data, size_t n, size_t old_size);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -404,7 +404,7 @@
|
|||||||
M(432, UNKNOWN_CODEC) \
|
M(432, UNKNOWN_CODEC) \
|
||||||
M(433, ILLEGAL_CODEC_PARAMETER) \
|
M(433, ILLEGAL_CODEC_PARAMETER) \
|
||||||
M(434, CANNOT_PARSE_PROTOBUF_SCHEMA) \
|
M(434, CANNOT_PARSE_PROTOBUF_SCHEMA) \
|
||||||
M(435, NO_DATA_FOR_REQUIRED_PROTOBUF_FIELD) \
|
M(435, NO_COLUMN_SERIALIZED_TO_REQUIRED_PROTOBUF_FIELD) \
|
||||||
M(436, PROTOBUF_BAD_CAST) \
|
M(436, PROTOBUF_BAD_CAST) \
|
||||||
M(437, PROTOBUF_FIELD_NOT_REPEATED) \
|
M(437, PROTOBUF_FIELD_NOT_REPEATED) \
|
||||||
M(438, DATA_TYPE_CANNOT_BE_PROMOTED) \
|
M(438, DATA_TYPE_CANNOT_BE_PROMOTED) \
|
||||||
@ -412,7 +412,7 @@
|
|||||||
M(440, INVALID_LIMIT_EXPRESSION) \
|
M(440, INVALID_LIMIT_EXPRESSION) \
|
||||||
M(441, CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING) \
|
M(441, CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING) \
|
||||||
M(442, BAD_DATABASE_FOR_TEMPORARY_TABLE) \
|
M(442, BAD_DATABASE_FOR_TEMPORARY_TABLE) \
|
||||||
M(443, NO_COMMON_COLUMNS_WITH_PROTOBUF_SCHEMA) \
|
M(443, NO_COLUMNS_SERIALIZED_TO_PROTOBUF_FIELDS) \
|
||||||
M(444, UNKNOWN_PROTOBUF_FORMAT) \
|
M(444, UNKNOWN_PROTOBUF_FORMAT) \
|
||||||
M(445, CANNOT_MPROTECT) \
|
M(445, CANNOT_MPROTECT) \
|
||||||
M(446, FUNCTION_NOT_ALLOWED) \
|
M(446, FUNCTION_NOT_ALLOWED) \
|
||||||
@ -535,6 +535,8 @@
|
|||||||
M(566, CANNOT_RMDIR) \
|
M(566, CANNOT_RMDIR) \
|
||||||
M(567, DUPLICATED_PART_UUIDS) \
|
M(567, DUPLICATED_PART_UUIDS) \
|
||||||
M(568, RAFT_ERROR) \
|
M(568, RAFT_ERROR) \
|
||||||
|
M(569, MULTIPLE_COLUMNS_SERIALIZED_TO_SAME_PROTOBUF_FIELD) \
|
||||||
|
M(570, DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD) \
|
||||||
\
|
\
|
||||||
M(999, KEEPER_EXCEPTION) \
|
M(999, KEEPER_EXCEPTION) \
|
||||||
M(1000, POCO_EXCEPTION) \
|
M(1000, POCO_EXCEPTION) \
|
||||||
|
@ -10,8 +10,6 @@
|
|||||||
#include <Common/AlignedBuffer.h>
|
#include <Common/AlignedBuffer.h>
|
||||||
|
|
||||||
#include <Formats/FormatSettings.h>
|
#include <Formats/FormatSettings.h>
|
||||||
#include <Formats/ProtobufReader.h>
|
|
||||||
#include <Formats/ProtobufWriter.h>
|
|
||||||
#include <DataTypes/DataTypeAggregateFunction.h>
|
#include <DataTypes/DataTypeAggregateFunction.h>
|
||||||
#include <DataTypes/DataTypeFactory.h>
|
#include <DataTypes/DataTypeFactory.h>
|
||||||
#include <IO/WriteBufferFromString.h>
|
#include <IO/WriteBufferFromString.h>
|
||||||
@ -261,45 +259,6 @@ void DataTypeAggregateFunction::deserializeTextCSV(IColumn & column, ReadBuffer
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void DataTypeAggregateFunction::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
|
|
||||||
{
|
|
||||||
if (value_index)
|
|
||||||
return;
|
|
||||||
value_index = static_cast<bool>(
|
|
||||||
protobuf.writeAggregateFunction(function, assert_cast<const ColumnAggregateFunction &>(column).getData()[row_num]));
|
|
||||||
}
|
|
||||||
|
|
||||||
void DataTypeAggregateFunction::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
|
|
||||||
{
|
|
||||||
row_added = false;
|
|
||||||
ColumnAggregateFunction & column_concrete = assert_cast<ColumnAggregateFunction &>(column);
|
|
||||||
Arena & arena = column_concrete.createOrGetArena();
|
|
||||||
size_t size_of_state = function->sizeOfData();
|
|
||||||
AggregateDataPtr place = arena.alignedAlloc(size_of_state, function->alignOfData());
|
|
||||||
function->create(place);
|
|
||||||
try
|
|
||||||
{
|
|
||||||
if (!protobuf.readAggregateFunction(function, place, arena))
|
|
||||||
{
|
|
||||||
function->destroy(place);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
auto & container = column_concrete.getData();
|
|
||||||
if (allow_add_row)
|
|
||||||
{
|
|
||||||
container.emplace_back(place);
|
|
||||||
row_added = true;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
container.back() = place;
|
|
||||||
}
|
|
||||||
catch (...)
|
|
||||||
{
|
|
||||||
function->destroy(place);
|
|
||||||
throw;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
MutableColumnPtr DataTypeAggregateFunction::createColumn() const
|
MutableColumnPtr DataTypeAggregateFunction::createColumn() const
|
||||||
{
|
{
|
||||||
return ColumnAggregateFunction::create(function);
|
return ColumnAggregateFunction::create(function);
|
||||||
|
@ -59,8 +59,6 @@ public:
|
|||||||
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||||
void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
|
|
||||||
void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
|
|
||||||
|
|
||||||
MutableColumnPtr createColumn() const override;
|
MutableColumnPtr createColumn() const override;
|
||||||
|
|
||||||
|
@ -6,7 +6,6 @@
|
|||||||
#include <IO/WriteBufferFromString.h>
|
#include <IO/WriteBufferFromString.h>
|
||||||
|
|
||||||
#include <Formats/FormatSettings.h>
|
#include <Formats/FormatSettings.h>
|
||||||
#include <Formats/ProtobufReader.h>
|
|
||||||
#include <DataTypes/DataTypesNumber.h>
|
#include <DataTypes/DataTypesNumber.h>
|
||||||
#include <DataTypes/DataTypeArray.h>
|
#include <DataTypes/DataTypeArray.h>
|
||||||
#include <DataTypes/DataTypeFactory.h>
|
#include <DataTypes/DataTypeFactory.h>
|
||||||
@ -522,55 +521,6 @@ void DataTypeArray::deserializeTextCSV(IColumn & column, ReadBuffer & istr, cons
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void DataTypeArray::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
|
|
||||||
{
|
|
||||||
const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
|
|
||||||
const ColumnArray::Offsets & offsets = column_array.getOffsets();
|
|
||||||
size_t offset = offsets[row_num - 1] + value_index;
|
|
||||||
size_t next_offset = offsets[row_num];
|
|
||||||
const IColumn & nested_column = column_array.getData();
|
|
||||||
size_t i;
|
|
||||||
for (i = offset; i < next_offset; ++i)
|
|
||||||
{
|
|
||||||
size_t element_stored = 0;
|
|
||||||
nested->serializeProtobuf(nested_column, i, protobuf, element_stored);
|
|
||||||
if (!element_stored)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
value_index += i - offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void DataTypeArray::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
|
|
||||||
{
|
|
||||||
row_added = false;
|
|
||||||
ColumnArray & column_array = assert_cast<ColumnArray &>(column);
|
|
||||||
IColumn & nested_column = column_array.getData();
|
|
||||||
ColumnArray::Offsets & offsets = column_array.getOffsets();
|
|
||||||
size_t old_size = offsets.size();
|
|
||||||
try
|
|
||||||
{
|
|
||||||
bool nested_row_added;
|
|
||||||
do
|
|
||||||
nested->deserializeProtobuf(nested_column, protobuf, true, nested_row_added);
|
|
||||||
while (nested_row_added && protobuf.canReadMoreValues());
|
|
||||||
if (allow_add_row)
|
|
||||||
{
|
|
||||||
offsets.emplace_back(nested_column.size());
|
|
||||||
row_added = true;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
offsets.back() = nested_column.size();
|
|
||||||
}
|
|
||||||
catch (...)
|
|
||||||
{
|
|
||||||
offsets.resize_assume_reserved(old_size);
|
|
||||||
nested_column.popBack(nested_column.size() - offsets.back());
|
|
||||||
throw;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
MutableColumnPtr DataTypeArray::createColumn() const
|
MutableColumnPtr DataTypeArray::createColumn() const
|
||||||
{
|
{
|
||||||
return ColumnArray::create(nested->createColumn(), ColumnArray::ColumnOffsets::create());
|
return ColumnArray::create(nested->createColumn(), ColumnArray::ColumnOffsets::create());
|
||||||
|
@ -85,15 +85,6 @@ public:
|
|||||||
DeserializeBinaryBulkStatePtr & state,
|
DeserializeBinaryBulkStatePtr & state,
|
||||||
SubstreamsCache * cache) const override;
|
SubstreamsCache * cache) const override;
|
||||||
|
|
||||||
void serializeProtobuf(const IColumn & column,
|
|
||||||
size_t row_num,
|
|
||||||
ProtobufWriter & protobuf,
|
|
||||||
size_t & value_index) const override;
|
|
||||||
void deserializeProtobuf(IColumn & column,
|
|
||||||
ProtobufReader & protobuf,
|
|
||||||
bool allow_add_row,
|
|
||||||
bool & row_added) const override;
|
|
||||||
|
|
||||||
MutableColumnPtr createColumn() const override;
|
MutableColumnPtr createColumn() const override;
|
||||||
|
|
||||||
Field getDefault() const override;
|
Field getDefault() const override;
|
||||||
|
@ -4,8 +4,6 @@
|
|||||||
#include <Columns/ColumnsNumber.h>
|
#include <Columns/ColumnsNumber.h>
|
||||||
#include <DataTypes/DataTypeDate.h>
|
#include <DataTypes/DataTypeDate.h>
|
||||||
#include <DataTypes/DataTypeFactory.h>
|
#include <DataTypes/DataTypeFactory.h>
|
||||||
#include <Formats/ProtobufReader.h>
|
|
||||||
#include <Formats/ProtobufWriter.h>
|
|
||||||
|
|
||||||
#include <Common/assert_cast.h>
|
#include <Common/assert_cast.h>
|
||||||
|
|
||||||
@ -81,30 +79,6 @@ void DataTypeDate::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const
|
|||||||
assert_cast<ColumnUInt16 &>(column).getData().push_back(value.getDayNum());
|
assert_cast<ColumnUInt16 &>(column).getData().push_back(value.getDayNum());
|
||||||
}
|
}
|
||||||
|
|
||||||
void DataTypeDate::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
|
|
||||||
{
|
|
||||||
if (value_index)
|
|
||||||
return;
|
|
||||||
value_index = static_cast<bool>(protobuf.writeDate(DayNum(assert_cast<const ColumnUInt16 &>(column).getData()[row_num])));
|
|
||||||
}
|
|
||||||
|
|
||||||
void DataTypeDate::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
|
|
||||||
{
|
|
||||||
row_added = false;
|
|
||||||
DayNum d;
|
|
||||||
if (!protobuf.readDate(d))
|
|
||||||
return;
|
|
||||||
|
|
||||||
auto & container = assert_cast<ColumnUInt16 &>(column).getData();
|
|
||||||
if (allow_add_row)
|
|
||||||
{
|
|
||||||
container.emplace_back(d);
|
|
||||||
row_added = true;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
container.back() = d;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DataTypeDate::equals(const IDataType & rhs) const
|
bool DataTypeDate::equals(const IDataType & rhs) const
|
||||||
{
|
{
|
||||||
return typeid(rhs) == typeid(*this);
|
return typeid(rhs) == typeid(*this);
|
||||||
|
@ -24,8 +24,6 @@ public:
|
|||||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||||
void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
|
|
||||||
void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
|
|
||||||
|
|
||||||
bool canBeUsedAsVersion() const override { return true; }
|
bool canBeUsedAsVersion() const override { return true; }
|
||||||
bool canBeInsideNullable() const override { return true; }
|
bool canBeInsideNullable() const override { return true; }
|
||||||
|
@ -5,8 +5,6 @@
|
|||||||
#include <common/DateLUT.h>
|
#include <common/DateLUT.h>
|
||||||
#include <DataTypes/DataTypeFactory.h>
|
#include <DataTypes/DataTypeFactory.h>
|
||||||
#include <Formats/FormatSettings.h>
|
#include <Formats/FormatSettings.h>
|
||||||
#include <Formats/ProtobufReader.h>
|
|
||||||
#include <Formats/ProtobufWriter.h>
|
|
||||||
#include <IO/Operators.h>
|
#include <IO/Operators.h>
|
||||||
#include <IO/ReadHelpers.h>
|
#include <IO/ReadHelpers.h>
|
||||||
#include <IO/WriteBufferFromString.h>
|
#include <IO/WriteBufferFromString.h>
|
||||||
@ -164,32 +162,6 @@ void DataTypeDateTime::deserializeTextCSV(IColumn & column, ReadBuffer & istr, c
|
|||||||
assert_cast<ColumnType &>(column).getData().push_back(x);
|
assert_cast<ColumnType &>(column).getData().push_back(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
void DataTypeDateTime::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
|
|
||||||
{
|
|
||||||
if (value_index)
|
|
||||||
return;
|
|
||||||
|
|
||||||
// On some platforms `time_t` is `long` but not `unsigned int` (UInt32 that we store in column), hence static_cast.
|
|
||||||
value_index = static_cast<bool>(protobuf.writeDateTime(static_cast<time_t>(assert_cast<const ColumnType &>(column).getData()[row_num])));
|
|
||||||
}
|
|
||||||
|
|
||||||
void DataTypeDateTime::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
|
|
||||||
{
|
|
||||||
row_added = false;
|
|
||||||
time_t t;
|
|
||||||
if (!protobuf.readDateTime(t))
|
|
||||||
return;
|
|
||||||
|
|
||||||
auto & container = assert_cast<ColumnType &>(column).getData();
|
|
||||||
if (allow_add_row)
|
|
||||||
{
|
|
||||||
container.emplace_back(t);
|
|
||||||
row_added = true;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
container.back() = t;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DataTypeDateTime::equals(const IDataType & rhs) const
|
bool DataTypeDateTime::equals(const IDataType & rhs) const
|
||||||
{
|
{
|
||||||
/// DateTime with different timezones are equal, because:
|
/// DateTime with different timezones are equal, because:
|
||||||
|
@ -68,8 +68,6 @@ public:
|
|||||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||||
void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
|
|
||||||
void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
|
|
||||||
|
|
||||||
bool canBeUsedAsVersion() const override { return true; }
|
bool canBeUsedAsVersion() const override { return true; }
|
||||||
bool canBeInsideNullable() const override { return true; }
|
bool canBeInsideNullable() const override { return true; }
|
||||||
|
@ -6,8 +6,6 @@
|
|||||||
#include <common/DateLUT.h>
|
#include <common/DateLUT.h>
|
||||||
#include <DataTypes/DataTypeFactory.h>
|
#include <DataTypes/DataTypeFactory.h>
|
||||||
#include <Formats/FormatSettings.h>
|
#include <Formats/FormatSettings.h>
|
||||||
#include <Formats/ProtobufReader.h>
|
|
||||||
#include <Formats/ProtobufWriter.h>
|
|
||||||
#include <IO/Operators.h>
|
#include <IO/Operators.h>
|
||||||
#include <IO/ReadHelpers.h>
|
#include <IO/ReadHelpers.h>
|
||||||
#include <IO/WriteBufferFromString.h>
|
#include <IO/WriteBufferFromString.h>
|
||||||
@ -182,30 +180,6 @@ void DataTypeDateTime64::deserializeTextCSV(IColumn & column, ReadBuffer & istr,
|
|||||||
assert_cast<ColumnType &>(column).getData().push_back(x);
|
assert_cast<ColumnType &>(column).getData().push_back(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
void DataTypeDateTime64::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
|
|
||||||
{
|
|
||||||
if (value_index)
|
|
||||||
return;
|
|
||||||
value_index = static_cast<bool>(protobuf.writeDateTime64(assert_cast<const ColumnType &>(column).getData()[row_num], scale));
|
|
||||||
}
|
|
||||||
|
|
||||||
void DataTypeDateTime64::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
|
|
||||||
{
|
|
||||||
row_added = false;
|
|
||||||
DateTime64 t = 0;
|
|
||||||
if (!protobuf.readDateTime64(t, scale))
|
|
||||||
return;
|
|
||||||
|
|
||||||
auto & container = assert_cast<ColumnType &>(column).getData();
|
|
||||||
if (allow_add_row)
|
|
||||||
{
|
|
||||||
container.emplace_back(t);
|
|
||||||
row_added = true;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
container.back() = t;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DataTypeDateTime64::equals(const IDataType & rhs) const
|
bool DataTypeDateTime64::equals(const IDataType & rhs) const
|
||||||
{
|
{
|
||||||
if (const auto * ptype = typeid_cast<const DataTypeDateTime64 *>(&rhs))
|
if (const auto * ptype = typeid_cast<const DataTypeDateTime64 *>(&rhs))
|
||||||
|
@ -42,8 +42,6 @@ public:
|
|||||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||||
void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
|
|
||||||
void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
|
|
||||||
|
|
||||||
bool equals(const IDataType & rhs) const override;
|
bool equals(const IDataType & rhs) const override;
|
||||||
|
|
||||||
|
@ -4,8 +4,6 @@
|
|||||||
#include <Common/typeid_cast.h>
|
#include <Common/typeid_cast.h>
|
||||||
#include <Core/DecimalFunctions.h>
|
#include <Core/DecimalFunctions.h>
|
||||||
#include <DataTypes/DataTypeFactory.h>
|
#include <DataTypes/DataTypeFactory.h>
|
||||||
#include <Formats/ProtobufReader.h>
|
|
||||||
#include <Formats/ProtobufWriter.h>
|
|
||||||
#include <IO/ReadHelpers.h>
|
#include <IO/ReadHelpers.h>
|
||||||
#include <IO/WriteHelpers.h>
|
#include <IO/WriteHelpers.h>
|
||||||
#include <Interpreters/Context.h>
|
#include <Interpreters/Context.h>
|
||||||
|
@ -1,7 +1,5 @@
|
|||||||
#include <IO/WriteBufferFromString.h>
|
#include <IO/WriteBufferFromString.h>
|
||||||
#include <Formats/FormatSettings.h>
|
#include <Formats/FormatSettings.h>
|
||||||
#include <Formats/ProtobufReader.h>
|
|
||||||
#include <Formats/ProtobufWriter.h>
|
|
||||||
#include <DataTypes/DataTypeEnum.h>
|
#include <DataTypes/DataTypeEnum.h>
|
||||||
#include <DataTypes/DataTypeFactory.h>
|
#include <DataTypes/DataTypeFactory.h>
|
||||||
#include <Parsers/IAST.h>
|
#include <Parsers/IAST.h>
|
||||||
@ -254,34 +252,6 @@ void DataTypeEnum<Type>::deserializeBinaryBulk(
|
|||||||
x.resize(initial_size + size / sizeof(FieldType));
|
x.resize(initial_size + size / sizeof(FieldType));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Type>
|
|
||||||
void DataTypeEnum<Type>::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
|
|
||||||
{
|
|
||||||
if (value_index)
|
|
||||||
return;
|
|
||||||
protobuf.prepareEnumMapping(values);
|
|
||||||
value_index = static_cast<bool>(protobuf.writeEnum(assert_cast<const ColumnType &>(column).getData()[row_num]));
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename Type>
|
|
||||||
void DataTypeEnum<Type>::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
|
|
||||||
{
|
|
||||||
protobuf.prepareEnumMapping(values);
|
|
||||||
row_added = false;
|
|
||||||
Type value;
|
|
||||||
if (!protobuf.readEnum(value))
|
|
||||||
return;
|
|
||||||
|
|
||||||
auto & container = assert_cast<ColumnType &>(column).getData();
|
|
||||||
if (allow_add_row)
|
|
||||||
{
|
|
||||||
container.emplace_back(value);
|
|
||||||
row_added = true;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
container.back() = value;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename Type>
|
template <typename Type>
|
||||||
Field DataTypeEnum<Type>::getDefault() const
|
Field DataTypeEnum<Type>::getDefault() const
|
||||||
{
|
{
|
||||||
|
@ -132,9 +132,6 @@ public:
|
|||||||
void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, const size_t offset, size_t limit) const override;
|
void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, const size_t offset, size_t limit) const override;
|
||||||
void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, const size_t limit, const double avg_value_size_hint) const override;
|
void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, const size_t limit, const double avg_value_size_hint) const override;
|
||||||
|
|
||||||
void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
|
|
||||||
void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
|
|
||||||
|
|
||||||
MutableColumnPtr createColumn() const override { return ColumnType::create(); }
|
MutableColumnPtr createColumn() const override { return ColumnType::create(); }
|
||||||
|
|
||||||
Field getDefault() const override;
|
Field getDefault() const override;
|
||||||
|
@ -2,8 +2,6 @@
|
|||||||
#include <Columns/ColumnConst.h>
|
#include <Columns/ColumnConst.h>
|
||||||
|
|
||||||
#include <Formats/FormatSettings.h>
|
#include <Formats/FormatSettings.h>
|
||||||
#include <Formats/ProtobufReader.h>
|
|
||||||
#include <Formats/ProtobufWriter.h>
|
|
||||||
#include <DataTypes/DataTypeFixedString.h>
|
#include <DataTypes/DataTypeFixedString.h>
|
||||||
#include <DataTypes/DataTypeFactory.h>
|
#include <DataTypes/DataTypeFactory.h>
|
||||||
|
|
||||||
@ -25,7 +23,6 @@ namespace DB
|
|||||||
namespace ErrorCodes
|
namespace ErrorCodes
|
||||||
{
|
{
|
||||||
extern const int CANNOT_READ_ALL_DATA;
|
extern const int CANNOT_READ_ALL_DATA;
|
||||||
extern const int TOO_LARGE_STRING_SIZE;
|
|
||||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||||
extern const int UNEXPECTED_AST_STRUCTURE;
|
extern const int UNEXPECTED_AST_STRUCTURE;
|
||||||
}
|
}
|
||||||
@ -127,16 +124,7 @@ static inline void alignStringLength(const DataTypeFixedString & type,
|
|||||||
ColumnFixedString::Chars & data,
|
ColumnFixedString::Chars & data,
|
||||||
size_t string_start)
|
size_t string_start)
|
||||||
{
|
{
|
||||||
size_t length = data.size() - string_start;
|
ColumnFixedString::alignStringLength(data, type.getN(), string_start);
|
||||||
if (length < type.getN())
|
|
||||||
{
|
|
||||||
data.resize_fill(string_start + type.getN());
|
|
||||||
}
|
|
||||||
else if (length > type.getN())
|
|
||||||
{
|
|
||||||
data.resize_assume_reserved(string_start);
|
|
||||||
throw Exception("Too large value for " + type.getName(), ErrorCodes::TOO_LARGE_STRING_SIZE);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Reader>
|
template <typename Reader>
|
||||||
@ -215,53 +203,6 @@ void DataTypeFixedString::deserializeTextCSV(IColumn & column, ReadBuffer & istr
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void DataTypeFixedString::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
|
|
||||||
{
|
|
||||||
if (value_index)
|
|
||||||
return;
|
|
||||||
const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]);
|
|
||||||
value_index = static_cast<bool>(protobuf.writeString(StringRef(pos, n)));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void DataTypeFixedString::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
|
|
||||||
{
|
|
||||||
row_added = false;
|
|
||||||
auto & column_string = assert_cast<ColumnFixedString &>(column);
|
|
||||||
ColumnFixedString::Chars & data = column_string.getChars();
|
|
||||||
size_t old_size = data.size();
|
|
||||||
try
|
|
||||||
{
|
|
||||||
if (allow_add_row)
|
|
||||||
{
|
|
||||||
if (protobuf.readStringInto(data))
|
|
||||||
{
|
|
||||||
alignStringLength(*this, data, old_size);
|
|
||||||
row_added = true;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
data.resize_assume_reserved(old_size);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
ColumnFixedString::Chars temp_data;
|
|
||||||
if (protobuf.readStringInto(temp_data))
|
|
||||||
{
|
|
||||||
alignStringLength(*this, temp_data, 0);
|
|
||||||
column_string.popBack(1);
|
|
||||||
old_size = data.size();
|
|
||||||
data.insertSmallAllowReadWriteOverflow15(temp_data.begin(), temp_data.end());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch (...)
|
|
||||||
{
|
|
||||||
data.resize_assume_reserved(old_size);
|
|
||||||
throw;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
MutableColumnPtr DataTypeFixedString::createColumn() const
|
MutableColumnPtr DataTypeFixedString::createColumn() const
|
||||||
{
|
{
|
||||||
return ColumnFixedString::create(n);
|
return ColumnFixedString::create(n);
|
||||||
|
@ -66,9 +66,6 @@ public:
|
|||||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||||
|
|
||||||
void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
|
|
||||||
void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
|
|
||||||
|
|
||||||
MutableColumnPtr createColumn() const override;
|
MutableColumnPtr createColumn() const override;
|
||||||
|
|
||||||
Field getDefault() const override;
|
Field getDefault() const override;
|
||||||
|
@ -808,31 +808,6 @@ void DataTypeLowCardinality::serializeTextXML(const IColumn & column, size_t row
|
|||||||
serializeImpl(column, row_num, &IDataType::serializeAsTextXML, ostr, settings);
|
serializeImpl(column, row_num, &IDataType::serializeAsTextXML, ostr, settings);
|
||||||
}
|
}
|
||||||
|
|
||||||
void DataTypeLowCardinality::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
|
|
||||||
{
|
|
||||||
serializeImpl(column, row_num, &IDataType::serializeProtobuf, protobuf, value_index);
|
|
||||||
}
|
|
||||||
|
|
||||||
void DataTypeLowCardinality::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
|
|
||||||
{
|
|
||||||
if (allow_add_row)
|
|
||||||
{
|
|
||||||
deserializeImpl(column, &IDataType::deserializeProtobuf, protobuf, true, row_added);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
row_added = false;
|
|
||||||
auto & low_cardinality_column= getColumnLowCardinality(column);
|
|
||||||
auto nested_column = low_cardinality_column.getDictionary().getNestedColumn();
|
|
||||||
auto temp_column = nested_column->cloneEmpty();
|
|
||||||
size_t unique_row_number = low_cardinality_column.getIndexes().getUInt(low_cardinality_column.size() - 1);
|
|
||||||
temp_column->insertFrom(*nested_column, unique_row_number);
|
|
||||||
bool dummy;
|
|
||||||
dictionary_type.get()->deserializeProtobuf(*temp_column, protobuf, false, dummy);
|
|
||||||
low_cardinality_column.popBack(1);
|
|
||||||
low_cardinality_column.insertFromFullColumn(*temp_column, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename... Params, typename... Args>
|
template <typename... Params, typename... Args>
|
||||||
void DataTypeLowCardinality::serializeImpl(
|
void DataTypeLowCardinality::serializeImpl(
|
||||||
const IColumn & column, size_t row_num, DataTypeLowCardinality::SerializeFunctionPtr<Params...> func, Args &&... args) const
|
const IColumn & column, size_t row_num, DataTypeLowCardinality::SerializeFunctionPtr<Params...> func, Args &&... args) const
|
||||||
|
@ -67,8 +67,6 @@ public:
|
|||||||
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||||
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||||
void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
|
|
||||||
void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
|
|
||||||
|
|
||||||
MutableColumnPtr createColumn() const override;
|
MutableColumnPtr createColumn() const override;
|
||||||
|
|
||||||
|
@ -336,16 +336,6 @@ void DataTypeMap::deserializeBinaryBulkWithMultipleStreamsImpl(
|
|||||||
nested->deserializeBinaryBulkWithMultipleStreams(column_map.getNestedColumnPtr(), limit, settings, state, cache);
|
nested->deserializeBinaryBulkWithMultipleStreams(column_map.getNestedColumnPtr(), limit, settings, state, cache);
|
||||||
}
|
}
|
||||||
|
|
||||||
void DataTypeMap::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
|
|
||||||
{
|
|
||||||
nested->serializeProtobuf(extractNestedColumn(column), row_num, protobuf, value_index);
|
|
||||||
}
|
|
||||||
|
|
||||||
void DataTypeMap::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
|
|
||||||
{
|
|
||||||
nested->deserializeProtobuf(extractNestedColumn(column), protobuf, allow_add_row, row_added);
|
|
||||||
}
|
|
||||||
|
|
||||||
MutableColumnPtr DataTypeMap::createColumn() const
|
MutableColumnPtr DataTypeMap::createColumn() const
|
||||||
{
|
{
|
||||||
return ColumnMap::create(nested->createColumn());
|
return ColumnMap::create(nested->createColumn());
|
||||||
|
@ -76,9 +76,6 @@ public:
|
|||||||
DeserializeBinaryBulkStatePtr & state,
|
DeserializeBinaryBulkStatePtr & state,
|
||||||
SubstreamsCache * cache) const override;
|
SubstreamsCache * cache) const override;
|
||||||
|
|
||||||
void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
|
|
||||||
void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
|
|
||||||
|
|
||||||
MutableColumnPtr createColumn() const override;
|
MutableColumnPtr createColumn() const override;
|
||||||
|
|
||||||
Field getDefault() const override;
|
Field getDefault() const override;
|
||||||
@ -92,6 +89,8 @@ public:
|
|||||||
const DataTypePtr & getValueType() const { return value_type; }
|
const DataTypePtr & getValueType() const { return value_type; }
|
||||||
DataTypes getKeyValueTypes() const { return {key_type, value_type}; }
|
DataTypes getKeyValueTypes() const { return {key_type, value_type}; }
|
||||||
|
|
||||||
|
const DataTypePtr & getNestedType() const { return nested; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
template <typename Writer>
|
template <typename Writer>
|
||||||
void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, Writer && writer) const;
|
void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, Writer && writer) const;
|
||||||
|
@ -486,33 +486,6 @@ void DataTypeNullable::serializeTextXML(const IColumn & column, size_t row_num,
|
|||||||
nested_data_type->serializeAsTextXML(col.getNestedColumn(), row_num, ostr, settings);
|
nested_data_type->serializeAsTextXML(col.getNestedColumn(), row_num, ostr, settings);
|
||||||
}
|
}
|
||||||
|
|
||||||
void DataTypeNullable::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
|
|
||||||
{
|
|
||||||
const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
|
|
||||||
if (!col.isNullAt(row_num))
|
|
||||||
nested_data_type->serializeProtobuf(col.getNestedColumn(), row_num, protobuf, value_index);
|
|
||||||
}
|
|
||||||
|
|
||||||
void DataTypeNullable::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
|
|
||||||
{
|
|
||||||
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
|
|
||||||
IColumn & nested_column = col.getNestedColumn();
|
|
||||||
size_t old_size = nested_column.size();
|
|
||||||
try
|
|
||||||
{
|
|
||||||
nested_data_type->deserializeProtobuf(nested_column, protobuf, allow_add_row, row_added);
|
|
||||||
if (row_added)
|
|
||||||
col.getNullMapData().push_back(0);
|
|
||||||
}
|
|
||||||
catch (...)
|
|
||||||
{
|
|
||||||
nested_column.popBack(nested_column.size() - old_size);
|
|
||||||
col.getNullMapData().resize_assume_reserved(old_size);
|
|
||||||
row_added = false;
|
|
||||||
throw;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
MutableColumnPtr DataTypeNullable::createColumn() const
|
MutableColumnPtr DataTypeNullable::createColumn() const
|
||||||
{
|
{
|
||||||
return ColumnNullable::create(nested_data_type->createColumn(), ColumnUInt8::create());
|
return ColumnNullable::create(nested_data_type->createColumn(), ColumnUInt8::create());
|
||||||
|
@ -73,9 +73,6 @@ public:
|
|||||||
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||||
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||||
|
|
||||||
void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
|
|
||||||
void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
|
|
||||||
|
|
||||||
MutableColumnPtr createColumn() const override;
|
MutableColumnPtr createColumn() const override;
|
||||||
|
|
||||||
Field getDefault() const override;
|
Field getDefault() const override;
|
||||||
|
@ -8,8 +8,6 @@
|
|||||||
#include <Common/typeid_cast.h>
|
#include <Common/typeid_cast.h>
|
||||||
#include <Common/assert_cast.h>
|
#include <Common/assert_cast.h>
|
||||||
#include <Formats/FormatSettings.h>
|
#include <Formats/FormatSettings.h>
|
||||||
#include <Formats/ProtobufReader.h>
|
|
||||||
#include <Formats/ProtobufWriter.h>
|
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -205,34 +203,6 @@ void DataTypeNumberBase<T>::deserializeBinaryBulk(IColumn & column, ReadBuffer &
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void DataTypeNumberBase<T>::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
|
|
||||||
{
|
|
||||||
if (value_index)
|
|
||||||
return;
|
|
||||||
value_index = static_cast<bool>(protobuf.writeNumber(assert_cast<const ColumnVector<T> &>(column).getData()[row_num]));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void DataTypeNumberBase<T>::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
|
|
||||||
{
|
|
||||||
row_added = false;
|
|
||||||
T value;
|
|
||||||
if (!protobuf.readNumber(value))
|
|
||||||
return;
|
|
||||||
|
|
||||||
auto & container = typeid_cast<ColumnVector<T> &>(column).getData();
|
|
||||||
if (allow_add_row)
|
|
||||||
{
|
|
||||||
container.emplace_back(value);
|
|
||||||
row_added = true;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
container.back() = value;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
MutableColumnPtr DataTypeNumberBase<T>::createColumn() const
|
MutableColumnPtr DataTypeNumberBase<T>::createColumn() const
|
||||||
{
|
{
|
||||||
|
@ -45,9 +45,6 @@ public:
|
|||||||
void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override;
|
void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override;
|
||||||
void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override;
|
void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override;
|
||||||
|
|
||||||
void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
|
|
||||||
void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
|
|
||||||
|
|
||||||
MutableColumnPtr createColumn() const override;
|
MutableColumnPtr createColumn() const override;
|
||||||
|
|
||||||
bool isParametric() const override { return false; }
|
bool isParametric() const override { return false; }
|
||||||
|
@ -9,8 +9,6 @@
|
|||||||
#include <Core/Field.h>
|
#include <Core/Field.h>
|
||||||
|
|
||||||
#include <Formats/FormatSettings.h>
|
#include <Formats/FormatSettings.h>
|
||||||
#include <Formats/ProtobufReader.h>
|
|
||||||
#include <Formats/ProtobufWriter.h>
|
|
||||||
#include <DataTypes/DataTypeString.h>
|
#include <DataTypes/DataTypeString.h>
|
||||||
#include <DataTypes/DataTypeFactory.h>
|
#include <DataTypes/DataTypeFactory.h>
|
||||||
|
|
||||||
@ -311,55 +309,6 @@ void DataTypeString::deserializeTextCSV(IColumn & column, ReadBuffer & istr, con
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void DataTypeString::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
|
|
||||||
{
|
|
||||||
if (value_index)
|
|
||||||
return;
|
|
||||||
value_index = static_cast<bool>(protobuf.writeString(assert_cast<const ColumnString &>(column).getDataAt(row_num)));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void DataTypeString::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
|
|
||||||
{
|
|
||||||
row_added = false;
|
|
||||||
auto & column_string = assert_cast<ColumnString &>(column);
|
|
||||||
ColumnString::Chars & data = column_string.getChars();
|
|
||||||
ColumnString::Offsets & offsets = column_string.getOffsets();
|
|
||||||
size_t old_size = offsets.size();
|
|
||||||
try
|
|
||||||
{
|
|
||||||
if (allow_add_row)
|
|
||||||
{
|
|
||||||
if (protobuf.readStringInto(data))
|
|
||||||
{
|
|
||||||
data.emplace_back(0);
|
|
||||||
offsets.emplace_back(data.size());
|
|
||||||
row_added = true;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
data.resize_assume_reserved(offsets.back());
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
ColumnString::Chars temp_data;
|
|
||||||
if (protobuf.readStringInto(temp_data))
|
|
||||||
{
|
|
||||||
temp_data.emplace_back(0);
|
|
||||||
column_string.popBack(1);
|
|
||||||
old_size = offsets.size();
|
|
||||||
data.insertSmallAllowReadWriteOverflow15(temp_data.begin(), temp_data.end());
|
|
||||||
offsets.emplace_back(data.size());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch (...)
|
|
||||||
{
|
|
||||||
offsets.resize_assume_reserved(old_size);
|
|
||||||
data.resize_assume_reserved(offsets.back());
|
|
||||||
throw;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Field DataTypeString::getDefault() const
|
Field DataTypeString::getDefault() const
|
||||||
{
|
{
|
||||||
return String();
|
return String();
|
||||||
|
@ -47,9 +47,6 @@ public:
|
|||||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||||
|
|
||||||
void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
|
|
||||||
void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
|
|
||||||
|
|
||||||
MutableColumnPtr createColumn() const override;
|
MutableColumnPtr createColumn() const override;
|
||||||
|
|
||||||
Field getDefault() const override;
|
Field getDefault() const override;
|
||||||
|
@ -504,33 +504,6 @@ void DataTypeTuple::deserializeBinaryBulkWithMultipleStreamsImpl(
|
|||||||
settings.path.pop_back();
|
settings.path.pop_back();
|
||||||
}
|
}
|
||||||
|
|
||||||
void DataTypeTuple::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
|
|
||||||
{
|
|
||||||
for (; value_index < elems.size(); ++value_index)
|
|
||||||
{
|
|
||||||
size_t stored = 0;
|
|
||||||
elems[value_index]->serializeProtobuf(extractElementColumn(column, value_index), row_num, protobuf, stored);
|
|
||||||
if (!stored)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void DataTypeTuple::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
|
|
||||||
{
|
|
||||||
row_added = false;
|
|
||||||
bool all_elements_get_row = true;
|
|
||||||
addElementSafe(elems, column, [&]
|
|
||||||
{
|
|
||||||
for (const auto & i : ext::range(0, ext::size(elems)))
|
|
||||||
{
|
|
||||||
bool element_row_added;
|
|
||||||
elems[i]->deserializeProtobuf(extractElementColumn(column, i), protobuf, allow_add_row, element_row_added);
|
|
||||||
all_elements_get_row &= element_row_added;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
row_added = all_elements_get_row;
|
|
||||||
}
|
|
||||||
|
|
||||||
MutableColumnPtr DataTypeTuple::createColumn() const
|
MutableColumnPtr DataTypeTuple::createColumn() const
|
||||||
{
|
{
|
||||||
size_t size = elems.size();
|
size_t size = elems.size();
|
||||||
|
@ -81,9 +81,6 @@ public:
|
|||||||
DeserializeBinaryBulkStatePtr & state,
|
DeserializeBinaryBulkStatePtr & state,
|
||||||
SubstreamsCache * cache) const override;
|
SubstreamsCache * cache) const override;
|
||||||
|
|
||||||
void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
|
|
||||||
void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
|
|
||||||
|
|
||||||
MutableColumnPtr createColumn() const override;
|
MutableColumnPtr createColumn() const override;
|
||||||
|
|
||||||
Field getDefault() const override;
|
Field getDefault() const override;
|
||||||
|
@ -1,8 +1,6 @@
|
|||||||
#include <DataTypes/DataTypeUUID.h>
|
#include <DataTypes/DataTypeUUID.h>
|
||||||
#include <DataTypes/DataTypeFactory.h>
|
#include <DataTypes/DataTypeFactory.h>
|
||||||
#include <Columns/ColumnsNumber.h>
|
#include <Columns/ColumnsNumber.h>
|
||||||
#include <Formats/ProtobufReader.h>
|
|
||||||
#include <Formats/ProtobufWriter.h>
|
|
||||||
#include <IO/WriteHelpers.h>
|
#include <IO/WriteHelpers.h>
|
||||||
#include <IO/ReadHelpers.h>
|
#include <IO/ReadHelpers.h>
|
||||||
#include <Common/assert_cast.h>
|
#include <Common/assert_cast.h>
|
||||||
@ -79,30 +77,6 @@ void DataTypeUUID::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const
|
|||||||
assert_cast<ColumnUInt128 &>(column).getData().push_back(value);
|
assert_cast<ColumnUInt128 &>(column).getData().push_back(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void DataTypeUUID::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
|
|
||||||
{
|
|
||||||
if (value_index)
|
|
||||||
return;
|
|
||||||
value_index = static_cast<bool>(protobuf.writeUUID(UUID(assert_cast<const ColumnUInt128 &>(column).getData()[row_num])));
|
|
||||||
}
|
|
||||||
|
|
||||||
void DataTypeUUID::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
|
|
||||||
{
|
|
||||||
row_added = false;
|
|
||||||
UUID uuid;
|
|
||||||
if (!protobuf.readUUID(uuid))
|
|
||||||
return;
|
|
||||||
|
|
||||||
auto & container = assert_cast<ColumnUInt128 &>(column).getData();
|
|
||||||
if (allow_add_row)
|
|
||||||
{
|
|
||||||
container.emplace_back(uuid);
|
|
||||||
row_added = true;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
container.back() = uuid;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DataTypeUUID::equals(const IDataType & rhs) const
|
bool DataTypeUUID::equals(const IDataType & rhs) const
|
||||||
{
|
{
|
||||||
return typeid(rhs) == typeid(*this);
|
return typeid(rhs) == typeid(*this);
|
||||||
|
@ -26,8 +26,6 @@ public:
|
|||||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||||
void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
|
|
||||||
void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
|
|
||||||
|
|
||||||
bool canBeUsedInBitOperations() const override { return true; }
|
bool canBeUsedInBitOperations() const override { return true; }
|
||||||
bool canBeInsideNullable() const override { return true; }
|
bool canBeInsideNullable() const override { return true; }
|
||||||
|
@ -4,8 +4,6 @@
|
|||||||
#include <Common/typeid_cast.h>
|
#include <Common/typeid_cast.h>
|
||||||
#include <Core/DecimalFunctions.h>
|
#include <Core/DecimalFunctions.h>
|
||||||
#include <DataTypes/DataTypeFactory.h>
|
#include <DataTypes/DataTypeFactory.h>
|
||||||
#include <Formats/ProtobufReader.h>
|
|
||||||
#include <Formats/ProtobufWriter.h>
|
|
||||||
#include <IO/ReadHelpers.h>
|
#include <IO/ReadHelpers.h>
|
||||||
#include <IO/WriteHelpers.h>
|
#include <IO/WriteHelpers.h>
|
||||||
#include <IO/readDecimalText.h>
|
#include <IO/readDecimalText.h>
|
||||||
@ -111,33 +109,6 @@ T DataTypeDecimal<T>::parseFromString(const String & str) const
|
|||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void DataTypeDecimal<T>::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
|
|
||||||
{
|
|
||||||
if (value_index)
|
|
||||||
return;
|
|
||||||
value_index = static_cast<bool>(protobuf.writeDecimal(assert_cast<const ColumnType &>(column).getData()[row_num], this->scale));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void DataTypeDecimal<T>::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
|
|
||||||
{
|
|
||||||
row_added = false;
|
|
||||||
T decimal;
|
|
||||||
if (!protobuf.readDecimal(decimal, this->precision, this->scale))
|
|
||||||
return;
|
|
||||||
|
|
||||||
auto & container = assert_cast<ColumnType &>(column).getData();
|
|
||||||
if (allow_add_row)
|
|
||||||
{
|
|
||||||
container.emplace_back(decimal);
|
|
||||||
row_added = true;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
container.back() = decimal;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static DataTypePtr create(const ASTPtr & arguments)
|
static DataTypePtr create(const ASTPtr & arguments)
|
||||||
{
|
{
|
||||||
|
@ -46,9 +46,6 @@ public:
|
|||||||
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||||
|
|
||||||
void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
|
|
||||||
void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
|
|
||||||
|
|
||||||
bool equals(const IDataType & rhs) const override;
|
bool equals(const IDataType & rhs) const override;
|
||||||
|
|
||||||
T parseFromString(const String & str) const;
|
T parseFromString(const String & str) const;
|
||||||
|
@ -26,9 +26,6 @@ class Field;
|
|||||||
using DataTypePtr = std::shared_ptr<const IDataType>;
|
using DataTypePtr = std::shared_ptr<const IDataType>;
|
||||||
using DataTypes = std::vector<DataTypePtr>;
|
using DataTypes = std::vector<DataTypePtr>;
|
||||||
|
|
||||||
class ProtobufReader;
|
|
||||||
class ProtobufWriter;
|
|
||||||
|
|
||||||
struct NameAndTypePair;
|
struct NameAndTypePair;
|
||||||
|
|
||||||
|
|
||||||
@ -235,10 +232,6 @@ public:
|
|||||||
/// If method will throw an exception, then column will be in same state as before call to method.
|
/// If method will throw an exception, then column will be in same state as before call to method.
|
||||||
virtual void deserializeBinary(IColumn & column, ReadBuffer & istr) const = 0;
|
virtual void deserializeBinary(IColumn & column, ReadBuffer & istr) const = 0;
|
||||||
|
|
||||||
/** Serialize to a protobuf. */
|
|
||||||
virtual void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const = 0;
|
|
||||||
virtual void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const = 0;
|
|
||||||
|
|
||||||
/** Text serialization with escaping but without quoting.
|
/** Text serialization with escaping but without quoting.
|
||||||
*/
|
*/
|
||||||
void serializeAsTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const;
|
void serializeAsTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const;
|
||||||
|
@ -34,8 +34,6 @@ public:
|
|||||||
void deserializeBinaryBulk(IColumn &, ReadBuffer &, size_t, double) const override { throwNoSerialization(); }
|
void deserializeBinaryBulk(IColumn &, ReadBuffer &, size_t, double) const override { throwNoSerialization(); }
|
||||||
void serializeText(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
|
void serializeText(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
|
||||||
void deserializeText(IColumn &, ReadBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
|
void deserializeText(IColumn &, ReadBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
|
||||||
void serializeProtobuf(const IColumn &, size_t, ProtobufWriter &, size_t &) const override { throwNoSerialization(); }
|
|
||||||
void deserializeProtobuf(IColumn &, ProtobufReader &, bool, bool &) const override { throwNoSerialization(); }
|
|
||||||
|
|
||||||
MutableColumnPtr createColumn() const override
|
MutableColumnPtr createColumn() const override
|
||||||
{
|
{
|
||||||
|
@ -401,10 +401,16 @@ void buildConfigurationFromFunctionWithKeyValueArguments(
|
|||||||
{
|
{
|
||||||
auto builder = FunctionFactory::instance().tryGet(func->name, context);
|
auto builder = FunctionFactory::instance().tryGet(func->name, context);
|
||||||
auto function = builder->build({});
|
auto function = builder->build({});
|
||||||
auto result = function->execute({}, {}, 0);
|
function->prepare({});
|
||||||
|
|
||||||
|
/// We assume that function will not take arguments and will return constant value like tcpPort or hostName
|
||||||
|
/// Such functions will return column with size equal to input_rows_count.
|
||||||
|
size_t input_rows_count = 1;
|
||||||
|
auto result = function->execute({}, function->getResultType(), input_rows_count);
|
||||||
|
|
||||||
Field value;
|
Field value;
|
||||||
result->get(0, value);
|
result->get(0, value);
|
||||||
|
|
||||||
AutoPtr<Text> text_value(doc->createTextNode(getFieldAsString(value)));
|
AutoPtr<Text> text_value(doc->createTextNode(getFieldAsString(value)));
|
||||||
current_xml_element->appendChild(text_value);
|
current_xml_element->appendChild(text_value);
|
||||||
}
|
}
|
||||||
|
@ -120,7 +120,6 @@ struct FormatSettings
|
|||||||
|
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
bool write_row_delimiters = true;
|
|
||||||
/**
|
/**
|
||||||
* Some buffers (kafka / rabbit) split the rows internally using callback,
|
* Some buffers (kafka / rabbit) split the rows internally using callback,
|
||||||
* and always send one row per message, so we can push there formats
|
* and always send one row per message, so we can push there formats
|
||||||
@ -128,7 +127,7 @@ struct FormatSettings
|
|||||||
* we have to enforce exporting at most one row in the format output,
|
* we have to enforce exporting at most one row in the format output,
|
||||||
* because Protobuf without delimiters is not generally useful.
|
* because Protobuf without delimiters is not generally useful.
|
||||||
*/
|
*/
|
||||||
bool allow_many_rows_no_delimiters = false;
|
bool allow_multiple_rows_without_delimiter = false;
|
||||||
} protobuf;
|
} protobuf;
|
||||||
|
|
||||||
struct
|
struct
|
||||||
|
@ -1,55 +0,0 @@
|
|||||||
#include "ProtobufColumnMatcher.h"
|
|
||||||
#if USE_PROTOBUF
|
|
||||||
#include <Common/Exception.h>
|
|
||||||
#include <google/protobuf/descriptor.pb.h>
|
|
||||||
#include <Poco/String.h>
|
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
|
||||||
{
|
|
||||||
namespace ErrorCodes
|
|
||||||
{
|
|
||||||
extern const int NO_COMMON_COLUMNS_WITH_PROTOBUF_SCHEMA;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
namespace
|
|
||||||
{
|
|
||||||
String columnNameToSearchableForm(const String & str)
|
|
||||||
{
|
|
||||||
return Poco::replace(Poco::toUpper(str), ".", "_");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace ProtobufColumnMatcher
|
|
||||||
{
|
|
||||||
namespace details
|
|
||||||
{
|
|
||||||
ColumnNameMatcher::ColumnNameMatcher(const std::vector<String> & column_names) : column_usage(column_names.size())
|
|
||||||
{
|
|
||||||
column_usage.resize(column_names.size(), false);
|
|
||||||
for (size_t i = 0; i != column_names.size(); ++i)
|
|
||||||
column_name_to_index_map.emplace(columnNameToSearchableForm(column_names[i]), i);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t ColumnNameMatcher::findColumn(const String & field_name)
|
|
||||||
{
|
|
||||||
auto it = column_name_to_index_map.find(columnNameToSearchableForm(field_name));
|
|
||||||
if (it == column_name_to_index_map.end())
|
|
||||||
return -1;
|
|
||||||
size_t column_index = it->second;
|
|
||||||
if (column_usage[column_index])
|
|
||||||
return -1;
|
|
||||||
column_usage[column_index] = true;
|
|
||||||
return column_index;
|
|
||||||
}
|
|
||||||
|
|
||||||
void throwNoCommonColumns()
|
|
||||||
{
|
|
||||||
throw Exception("No common columns with provided protobuf schema", ErrorCodes::NO_COMMON_COLUMNS_WITH_PROTOBUF_SCHEMA);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
#endif
|
|
@ -1,196 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#if !defined(ARCADIA_BUILD)
|
|
||||||
# include "config_formats.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if USE_PROTOBUF
|
|
||||||
# include <memory>
|
|
||||||
# include <unordered_map>
|
|
||||||
# include <vector>
|
|
||||||
# include <common/types.h>
|
|
||||||
# include <boost/blank.hpp>
|
|
||||||
# include <google/protobuf/descriptor.h>
|
|
||||||
# include <google/protobuf/descriptor.pb.h>
|
|
||||||
|
|
||||||
namespace google
|
|
||||||
{
|
|
||||||
namespace protobuf
|
|
||||||
{
|
|
||||||
class Descriptor;
|
|
||||||
class FieldDescriptor;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
|
||||||
{
|
|
||||||
namespace ProtobufColumnMatcher
|
|
||||||
{
|
|
||||||
struct DefaultTraits
|
|
||||||
{
|
|
||||||
using MessageData = boost::blank;
|
|
||||||
using FieldData = boost::blank;
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename Traits = DefaultTraits>
|
|
||||||
struct Message;
|
|
||||||
|
|
||||||
/// Represents a field in a protobuf message.
|
|
||||||
template <typename Traits = DefaultTraits>
|
|
||||||
struct Field
|
|
||||||
{
|
|
||||||
const google::protobuf::FieldDescriptor * field_descriptor = nullptr;
|
|
||||||
|
|
||||||
/// Same as field_descriptor->number().
|
|
||||||
UInt32 field_number = 0;
|
|
||||||
|
|
||||||
/// Index of a column; either 'column_index' or 'nested_message' is set.
|
|
||||||
size_t column_index = -1;
|
|
||||||
std::unique_ptr<Message<Traits>> nested_message;
|
|
||||||
|
|
||||||
typename Traits::FieldData data;
|
|
||||||
};
|
|
||||||
|
|
||||||
/// Represents a protobuf message.
|
|
||||||
template <typename Traits>
|
|
||||||
struct Message
|
|
||||||
{
|
|
||||||
std::vector<Field<Traits>> fields;
|
|
||||||
|
|
||||||
/// Points to the parent message if this is a nested message.
|
|
||||||
Message * parent = nullptr;
|
|
||||||
size_t index_in_parent = -1;
|
|
||||||
|
|
||||||
typename Traits::MessageData data;
|
|
||||||
};
|
|
||||||
|
|
||||||
/// Utility function finding matching columns for each protobuf field.
|
|
||||||
template <typename Traits = DefaultTraits>
|
|
||||||
static std::unique_ptr<Message<Traits>> matchColumns(
|
|
||||||
const std::vector<String> & column_names,
|
|
||||||
const google::protobuf::Descriptor * message_type);
|
|
||||||
|
|
||||||
template <typename Traits = DefaultTraits>
|
|
||||||
static std::unique_ptr<Message<Traits>> matchColumns(
|
|
||||||
const std::vector<String> & column_names,
|
|
||||||
const google::protobuf::Descriptor * message_type,
|
|
||||||
std::vector<const google::protobuf::FieldDescriptor *> & field_descriptors_without_match);
|
|
||||||
|
|
||||||
namespace details
|
|
||||||
{
|
|
||||||
[[noreturn]] void throwNoCommonColumns();
|
|
||||||
|
|
||||||
class ColumnNameMatcher
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
ColumnNameMatcher(const std::vector<String> & column_names);
|
|
||||||
size_t findColumn(const String & field_name);
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::unordered_map<String, size_t> column_name_to_index_map;
|
|
||||||
std::vector<bool> column_usage;
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename Traits>
|
|
||||||
std::unique_ptr<Message<Traits>> matchColumnsRecursive(
|
|
||||||
ColumnNameMatcher & name_matcher,
|
|
||||||
const google::protobuf::Descriptor * message_type,
|
|
||||||
const String & field_name_prefix,
|
|
||||||
std::vector<const google::protobuf::FieldDescriptor *> * field_descriptors_without_match)
|
|
||||||
{
|
|
||||||
auto message = std::make_unique<Message<Traits>>();
|
|
||||||
for (int i = 0; i != message_type->field_count(); ++i)
|
|
||||||
{
|
|
||||||
const google::protobuf::FieldDescriptor * field_descriptor = message_type->field(i);
|
|
||||||
if ((field_descriptor->type() == google::protobuf::FieldDescriptor::TYPE_MESSAGE)
|
|
||||||
|| (field_descriptor->type() == google::protobuf::FieldDescriptor::TYPE_GROUP))
|
|
||||||
{
|
|
||||||
auto nested_message = matchColumnsRecursive<Traits>(
|
|
||||||
name_matcher,
|
|
||||||
field_descriptor->message_type(),
|
|
||||||
field_name_prefix + field_descriptor->name() + ".",
|
|
||||||
field_descriptors_without_match);
|
|
||||||
if (nested_message)
|
|
||||||
{
|
|
||||||
message->fields.emplace_back();
|
|
||||||
auto & current_field = message->fields.back();
|
|
||||||
current_field.field_number = field_descriptor->number();
|
|
||||||
current_field.field_descriptor = field_descriptor;
|
|
||||||
current_field.nested_message = std::move(nested_message);
|
|
||||||
current_field.nested_message->parent = message.get();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
size_t column_index = name_matcher.findColumn(field_name_prefix + field_descriptor->name());
|
|
||||||
if (column_index == static_cast<size_t>(-1))
|
|
||||||
{
|
|
||||||
if (field_descriptors_without_match)
|
|
||||||
field_descriptors_without_match->emplace_back(field_descriptor);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
message->fields.emplace_back();
|
|
||||||
auto & current_field = message->fields.back();
|
|
||||||
current_field.field_number = field_descriptor->number();
|
|
||||||
current_field.field_descriptor = field_descriptor;
|
|
||||||
current_field.column_index = column_index;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (message->fields.empty())
|
|
||||||
return nullptr;
|
|
||||||
|
|
||||||
// Columns should be sorted by field_number, it's necessary for writing protobufs and useful reading protobufs.
|
|
||||||
std::sort(message->fields.begin(), message->fields.end(), [](const Field<Traits> & left, const Field<Traits> & right)
|
|
||||||
{
|
|
||||||
return left.field_number < right.field_number;
|
|
||||||
});
|
|
||||||
|
|
||||||
for (size_t i = 0; i != message->fields.size(); ++i)
|
|
||||||
{
|
|
||||||
auto & field = message->fields[i];
|
|
||||||
if (field.nested_message)
|
|
||||||
field.nested_message->index_in_parent = i;
|
|
||||||
}
|
|
||||||
|
|
||||||
return message;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename Data>
|
|
||||||
static std::unique_ptr<Message<Data>> matchColumnsImpl(
|
|
||||||
const std::vector<String> & column_names,
|
|
||||||
const google::protobuf::Descriptor * message_type,
|
|
||||||
std::vector<const google::protobuf::FieldDescriptor *> * field_descriptors_without_match)
|
|
||||||
{
|
|
||||||
details::ColumnNameMatcher name_matcher(column_names);
|
|
||||||
auto message = details::matchColumnsRecursive<Data>(name_matcher, message_type, "", field_descriptors_without_match);
|
|
||||||
if (!message)
|
|
||||||
details::throwNoCommonColumns();
|
|
||||||
return message;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename Data>
|
|
||||||
static std::unique_ptr<Message<Data>> matchColumns(
|
|
||||||
const std::vector<String> & column_names,
|
|
||||||
const google::protobuf::Descriptor * message_type)
|
|
||||||
{
|
|
||||||
return matchColumnsImpl<Data>(column_names, message_type, nullptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename Data>
|
|
||||||
static std::unique_ptr<Message<Data>> matchColumns(
|
|
||||||
const std::vector<String> & column_names,
|
|
||||||
const google::protobuf::Descriptor * message_type,
|
|
||||||
std::vector<const google::protobuf::FieldDescriptor *> & field_descriptors_without_match)
|
|
||||||
{
|
|
||||||
return matchColumnsImpl<Data>(column_names, message_type, &field_descriptors_without_match);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
File diff suppressed because it is too large
Load Diff
@ -1,258 +1,72 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <common/DayNum.h>
|
|
||||||
#include <Common/PODArray.h>
|
|
||||||
#include <Common/UInt128.h>
|
|
||||||
#include <Core/UUID.h>
|
|
||||||
|
|
||||||
#if !defined(ARCADIA_BUILD)
|
#if !defined(ARCADIA_BUILD)
|
||||||
# include "config_formats.h"
|
# include "config_formats.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if USE_PROTOBUF
|
#if USE_PROTOBUF
|
||||||
# include <memory>
|
# include <Common/PODArray.h>
|
||||||
# include <IO/ReadBuffer.h>
|
# include <IO/ReadBuffer.h>
|
||||||
# include <boost/noncopyable.hpp>
|
|
||||||
# include "ProtobufColumnMatcher.h"
|
|
||||||
|
|
||||||
namespace google
|
|
||||||
{
|
|
||||||
namespace protobuf
|
|
||||||
{
|
|
||||||
class Descriptor;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
class Arena;
|
|
||||||
class IAggregateFunction;
|
|
||||||
class ReadBuffer;
|
class ReadBuffer;
|
||||||
using AggregateDataPtr = char *;
|
|
||||||
using AggregateFunctionPtr = std::shared_ptr<IAggregateFunction>;
|
|
||||||
|
|
||||||
|
|
||||||
/** Deserializes a protobuf, tries to cast data types if necessarily.
|
|
||||||
*/
|
|
||||||
class ProtobufReader : private boost::noncopyable
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
ProtobufReader(ReadBuffer & in_, const google::protobuf::Descriptor * message_type, const std::vector<String> & column_names, const bool use_length_delimiters_);
|
|
||||||
~ProtobufReader();
|
|
||||||
|
|
||||||
/// Should be called when we start reading a new message.
|
|
||||||
bool startMessage();
|
|
||||||
|
|
||||||
/// Ends reading a message.
|
|
||||||
void endMessage(bool ignore_errors = false);
|
|
||||||
|
|
||||||
/// Reads the column index.
|
|
||||||
/// The function returns false if there are no more columns to read (call endMessage() in this case).
|
|
||||||
bool readColumnIndex(size_t & column_index);
|
|
||||||
|
|
||||||
/// Reads a value which should be put to column at index received with readColumnIndex().
|
|
||||||
/// The function returns false if there are no more values to read now (call readColumnIndex() in this case).
|
|
||||||
bool readNumber(Int8 & value) { return current_converter->readInt8(value); }
|
|
||||||
bool readNumber(UInt8 & value) { return current_converter->readUInt8(value); }
|
|
||||||
bool readNumber(Int16 & value) { return current_converter->readInt16(value); }
|
|
||||||
bool readNumber(UInt16 & value) { return current_converter->readUInt16(value); }
|
|
||||||
bool readNumber(Int32 & value) { return current_converter->readInt32(value); }
|
|
||||||
bool readNumber(UInt32 & value) { return current_converter->readUInt32(value); }
|
|
||||||
bool readNumber(Int64 & value) { return current_converter->readInt64(value); }
|
|
||||||
bool readNumber(UInt64 & value) { return current_converter->readUInt64(value); }
|
|
||||||
bool readNumber(Int128 & value) { return current_converter->readInt128(value); }
|
|
||||||
bool readNumber(UInt128 & value) { return current_converter->readUInt128(value); }
|
|
||||||
bool readNumber(Int256 & value) { return current_converter->readInt256(value); }
|
|
||||||
bool readNumber(UInt256 & value) { return current_converter->readUInt256(value); }
|
|
||||||
bool readNumber(Float32 & value) { return current_converter->readFloat32(value); }
|
|
||||||
bool readNumber(Float64 & value) { return current_converter->readFloat64(value); }
|
|
||||||
|
|
||||||
bool readStringInto(PaddedPODArray<UInt8> & str) { return current_converter->readStringInto(str); }
|
|
||||||
|
|
||||||
void prepareEnumMapping(const std::vector<std::pair<std::string, Int8>> & name_value_pairs) { current_converter->prepareEnumMapping8(name_value_pairs); }
|
|
||||||
void prepareEnumMapping(const std::vector<std::pair<std::string, Int16>> & name_value_pairs) { current_converter->prepareEnumMapping16(name_value_pairs); }
|
|
||||||
bool readEnum(Int8 & value) { return current_converter->readEnum8(value); }
|
|
||||||
bool readEnum(Int16 & value) { return current_converter->readEnum16(value); }
|
|
||||||
|
|
||||||
bool readUUID(UUID & uuid) { return current_converter->readUUID(uuid); }
|
|
||||||
bool readDate(DayNum & date) { return current_converter->readDate(date); }
|
|
||||||
bool readDateTime(time_t & tm) { return current_converter->readDateTime(tm); }
|
|
||||||
bool readDateTime64(DateTime64 & tm, UInt32 scale) { return current_converter->readDateTime64(tm, scale); }
|
|
||||||
|
|
||||||
bool readDecimal(Decimal32 & decimal, UInt32 precision, UInt32 scale) { return current_converter->readDecimal32(decimal, precision, scale); }
|
|
||||||
bool readDecimal(Decimal64 & decimal, UInt32 precision, UInt32 scale) { return current_converter->readDecimal64(decimal, precision, scale); }
|
|
||||||
bool readDecimal(Decimal128 & decimal, UInt32 precision, UInt32 scale) { return current_converter->readDecimal128(decimal, precision, scale); }
|
|
||||||
bool readDecimal(Decimal256 & decimal, UInt32 precision, UInt32 scale) { return current_converter->readDecimal256(decimal, precision, scale); }
|
|
||||||
|
|
||||||
bool readAggregateFunction(const AggregateFunctionPtr & function, AggregateDataPtr place, Arena & arena) { return current_converter->readAggregateFunction(function, place, arena); }
|
|
||||||
|
|
||||||
/// Call it after calling one of the read*() function to determine if there are more values available for reading.
|
|
||||||
bool ALWAYS_INLINE canReadMoreValues() const { return simple_reader.canReadMoreValues(); }
|
|
||||||
|
|
||||||
private:
|
|
||||||
class SimpleReader
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
SimpleReader(ReadBuffer & in_, const bool use_length_delimiters_);
|
|
||||||
bool startMessage();
|
|
||||||
void endMessage(bool ignore_errors);
|
|
||||||
void startNestedMessage();
|
|
||||||
void endNestedMessage();
|
|
||||||
bool readFieldNumber(UInt32 & field_number);
|
|
||||||
bool readInt(Int64 & value);
|
|
||||||
bool readSInt(Int64 & value);
|
|
||||||
bool readUInt(UInt64 & value);
|
|
||||||
template<typename T> bool readFixed(T & value);
|
|
||||||
bool readStringInto(PaddedPODArray<UInt8> & str);
|
|
||||||
|
|
||||||
bool ALWAYS_INLINE canReadMoreValues() const { return cursor < field_end; }
|
|
||||||
|
|
||||||
private:
|
|
||||||
void readBinary(void * data, size_t size);
|
|
||||||
void ignore(UInt64 num_bytes);
|
|
||||||
void moveCursorBackward(UInt64 num_bytes);
|
|
||||||
|
|
||||||
UInt64 ALWAYS_INLINE readVarint()
|
|
||||||
{
|
|
||||||
char c;
|
|
||||||
in.readStrict(c);
|
|
||||||
UInt64 first_byte = static_cast<UInt8>(c);
|
|
||||||
++cursor;
|
|
||||||
if (likely(!(c & 0x80)))
|
|
||||||
return first_byte;
|
|
||||||
return continueReadingVarint(first_byte);
|
|
||||||
}
|
|
||||||
|
|
||||||
UInt64 continueReadingVarint(UInt64 first_byte);
|
|
||||||
void ignoreVarint();
|
|
||||||
void ignoreGroup();
|
|
||||||
[[noreturn]] void throwUnknownFormat() const;
|
|
||||||
|
|
||||||
ReadBuffer & in;
|
|
||||||
Int64 cursor;
|
|
||||||
size_t current_message_level;
|
|
||||||
Int64 current_message_end;
|
|
||||||
std::vector<Int64> parent_message_ends;
|
|
||||||
Int64 field_end;
|
|
||||||
Int64 last_string_pos;
|
|
||||||
const bool use_length_delimiters;
|
|
||||||
};
|
|
||||||
|
|
||||||
class IConverter
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
virtual ~IConverter() = default;
|
|
||||||
virtual bool readStringInto(PaddedPODArray<UInt8> &) = 0;
|
|
||||||
virtual bool readInt8(Int8&) = 0;
|
|
||||||
virtual bool readUInt8(UInt8 &) = 0;
|
|
||||||
virtual bool readInt16(Int16 &) = 0;
|
|
||||||
virtual bool readUInt16(UInt16 &) = 0;
|
|
||||||
virtual bool readInt32(Int32 &) = 0;
|
|
||||||
virtual bool readUInt32(UInt32 &) = 0;
|
|
||||||
virtual bool readInt64(Int64 &) = 0;
|
|
||||||
virtual bool readUInt64(UInt64 &) = 0;
|
|
||||||
virtual bool readInt128(Int128 &) = 0;
|
|
||||||
virtual bool readUInt128(UInt128 &) = 0;
|
|
||||||
|
|
||||||
virtual bool readInt256(Int256 &) = 0;
|
|
||||||
virtual bool readUInt256(UInt256 &) = 0;
|
|
||||||
|
|
||||||
virtual bool readFloat32(Float32 &) = 0;
|
|
||||||
virtual bool readFloat64(Float64 &) = 0;
|
|
||||||
virtual void prepareEnumMapping8(const std::vector<std::pair<std::string, Int8>> &) = 0;
|
|
||||||
virtual void prepareEnumMapping16(const std::vector<std::pair<std::string, Int16>> &) = 0;
|
|
||||||
virtual bool readEnum8(Int8 &) = 0;
|
|
||||||
virtual bool readEnum16(Int16 &) = 0;
|
|
||||||
virtual bool readUUID(UUID &) = 0;
|
|
||||||
virtual bool readDate(DayNum &) = 0;
|
|
||||||
virtual bool readDateTime(time_t &) = 0;
|
|
||||||
virtual bool readDateTime64(DateTime64 &, UInt32) = 0;
|
|
||||||
virtual bool readDecimal32(Decimal32 &, UInt32, UInt32) = 0;
|
|
||||||
virtual bool readDecimal64(Decimal64 &, UInt32, UInt32) = 0;
|
|
||||||
virtual bool readDecimal128(Decimal128 &, UInt32, UInt32) = 0;
|
|
||||||
virtual bool readDecimal256(Decimal256 &, UInt32, UInt32) = 0;
|
|
||||||
virtual bool readAggregateFunction(const AggregateFunctionPtr &, AggregateDataPtr, Arena &) = 0;
|
|
||||||
};
|
|
||||||
|
|
||||||
class ConverterBaseImpl;
|
|
||||||
class ConverterFromString;
|
|
||||||
template<int field_type_id, typename FromType> class ConverterFromNumber;
|
|
||||||
class ConverterFromBool;
|
|
||||||
class ConverterFromEnum;
|
|
||||||
|
|
||||||
struct ColumnMatcherTraits
|
|
||||||
{
|
|
||||||
struct FieldData
|
|
||||||
{
|
|
||||||
std::unique_ptr<IConverter> converter;
|
|
||||||
};
|
|
||||||
struct MessageData
|
|
||||||
{
|
|
||||||
std::unordered_map<UInt32, const ProtobufColumnMatcher::Field<ColumnMatcherTraits>*> field_number_to_field_map;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
using Message = ProtobufColumnMatcher::Message<ColumnMatcherTraits>;
|
|
||||||
using Field = ProtobufColumnMatcher::Field<ColumnMatcherTraits>;
|
|
||||||
|
|
||||||
void setTraitsDataAfterMatchingColumns(Message * message);
|
|
||||||
|
|
||||||
template <int field_type_id>
|
|
||||||
std::unique_ptr<IConverter> createConverter(const google::protobuf::FieldDescriptor * field);
|
|
||||||
|
|
||||||
SimpleReader simple_reader;
|
|
||||||
std::unique_ptr<Message> root_message;
|
|
||||||
Message* current_message = nullptr;
|
|
||||||
size_t current_field_index = 0;
|
|
||||||
IConverter* current_converter = nullptr;
|
|
||||||
};
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
namespace DB
|
|
||||||
{
|
|
||||||
class Arena;
|
|
||||||
class IAggregateFunction;
|
|
||||||
class ReadBuffer;
|
|
||||||
using AggregateDataPtr = char *;
|
|
||||||
using AggregateFunctionPtr = std::shared_ptr<IAggregateFunction>;
|
|
||||||
|
|
||||||
|
/// Utility class for reading in the Protobuf format.
|
||||||
|
/// Knows nothing about protobuf schemas, just provides useful functions to serialize data.
|
||||||
class ProtobufReader
|
class ProtobufReader
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
bool startMessage() { return false; }
|
ProtobufReader(ReadBuffer & in_);
|
||||||
void endMessage() {}
|
|
||||||
bool readColumnIndex(size_t &) { return false; }
|
void startMessage(bool with_length_delimiter_);
|
||||||
bool readNumber(Int8 &) { return false; }
|
void endMessage(bool ignore_errors);
|
||||||
bool readNumber(UInt8 &) { return false; }
|
void startNestedMessage();
|
||||||
bool readNumber(Int16 &) { return false; }
|
void endNestedMessage();
|
||||||
bool readNumber(UInt16 &) { return false; }
|
|
||||||
bool readNumber(Int32 &) { return false; }
|
bool readFieldNumber(int & field_number);
|
||||||
bool readNumber(UInt32 &) { return false; }
|
Int64 readInt();
|
||||||
bool readNumber(Int64 &) { return false; }
|
Int64 readSInt();
|
||||||
bool readNumber(UInt64 &) { return false; }
|
UInt64 readUInt();
|
||||||
bool readNumber(Int128 &) { return false; }
|
template<typename T> T readFixed();
|
||||||
bool readNumber(UInt128 &) { return false; }
|
|
||||||
bool readNumber(Int256 &) { return false; }
|
void readString(String & str);
|
||||||
bool readNumber(UInt256 &) { return false; }
|
void readStringAndAppend(PaddedPODArray<UInt8> & str);
|
||||||
bool readNumber(Float32 &) { return false; }
|
|
||||||
bool readNumber(Float64 &) { return false; }
|
bool eof() const { return in.eof(); }
|
||||||
bool readStringInto(PaddedPODArray<UInt8> &) { return false; }
|
|
||||||
void prepareEnumMapping(const std::vector<std::pair<std::string, Int8>> &) {}
|
private:
|
||||||
void prepareEnumMapping(const std::vector<std::pair<std::string, Int16>> &) {}
|
void readBinary(void * data, size_t size);
|
||||||
bool readEnum(Int8 &) { return false; }
|
void ignore(UInt64 num_bytes);
|
||||||
bool readEnum(Int16 &) { return false; }
|
void ignoreAll();
|
||||||
bool readUUID(UUID &) { return false; }
|
void moveCursorBackward(UInt64 num_bytes);
|
||||||
bool readDate(DayNum &) { return false; }
|
|
||||||
bool readDateTime(time_t &) { return false; }
|
UInt64 ALWAYS_INLINE readVarint()
|
||||||
bool readDateTime64(DateTime64 & /*tm*/, UInt32 /*scale*/) { return false; }
|
{
|
||||||
bool readDecimal(Decimal32 &, UInt32, UInt32) { return false; }
|
char c;
|
||||||
bool readDecimal(Decimal64 &, UInt32, UInt32) { return false; }
|
in.readStrict(c);
|
||||||
bool readDecimal(Decimal128 &, UInt32, UInt32) { return false; }
|
UInt64 first_byte = static_cast<UInt8>(c);
|
||||||
bool readDecimal(Decimal256 &, UInt32, UInt32) { return false; }
|
++cursor;
|
||||||
bool readAggregateFunction(const AggregateFunctionPtr &, AggregateDataPtr, Arena &) { return false; }
|
if (likely(!(c & 0x80)))
|
||||||
bool canReadMoreValues() const { return false; }
|
return first_byte;
|
||||||
|
return continueReadingVarint(first_byte);
|
||||||
|
}
|
||||||
|
|
||||||
|
UInt64 continueReadingVarint(UInt64 first_byte);
|
||||||
|
void ignoreVarint();
|
||||||
|
void ignoreGroup();
|
||||||
|
[[noreturn]] void throwUnknownFormat() const;
|
||||||
|
|
||||||
|
ReadBuffer & in;
|
||||||
|
Int64 cursor = 0;
|
||||||
|
bool root_message_has_length_delimiter = false;
|
||||||
|
size_t current_message_level = 0;
|
||||||
|
Int64 current_message_end = 0;
|
||||||
|
std::vector<Int64> parent_message_ends;
|
||||||
|
int field_number = 0;
|
||||||
|
int next_field_number = 0;
|
||||||
|
Int64 field_end = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
2921
src/Formats/ProtobufSerializer.cpp
Normal file
2921
src/Formats/ProtobufSerializer.cpp
Normal file
File diff suppressed because it is too large
Load Diff
52
src/Formats/ProtobufSerializer.h
Normal file
52
src/Formats/ProtobufSerializer.h
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#if !defined(ARCADIA_BUILD)
|
||||||
|
# include "config_formats.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if USE_PROTOBUF
|
||||||
|
# include <Columns/IColumn.h>
|
||||||
|
|
||||||
|
|
||||||
|
namespace google::protobuf { class Descriptor; }
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
class ProtobufReader;
|
||||||
|
class ProtobufWriter;
|
||||||
|
class IDataType;
|
||||||
|
using DataTypePtr = std::shared_ptr<const IDataType>;
|
||||||
|
using DataTypes = std::vector<DataTypePtr>;
|
||||||
|
|
||||||
|
|
||||||
|
/// Utility class, does all the work for serialization in the Protobuf format.
|
||||||
|
class ProtobufSerializer
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
virtual ~ProtobufSerializer() = default;
|
||||||
|
|
||||||
|
virtual void setColumns(const ColumnPtr * columns, size_t num_columns) = 0;
|
||||||
|
virtual void writeRow(size_t row_num) = 0;
|
||||||
|
|
||||||
|
virtual void setColumns(const MutableColumnPtr * columns, size_t num_columns) = 0;
|
||||||
|
virtual void readRow(size_t row_num) = 0;
|
||||||
|
virtual void insertDefaults(size_t row_num) = 0;
|
||||||
|
|
||||||
|
static std::unique_ptr<ProtobufSerializer> create(
|
||||||
|
const Strings & column_names,
|
||||||
|
const DataTypes & data_types,
|
||||||
|
std::vector<size_t> & missing_column_indices,
|
||||||
|
const google::protobuf::Descriptor & message_descriptor,
|
||||||
|
bool with_length_delimiter,
|
||||||
|
ProtobufReader & reader);
|
||||||
|
|
||||||
|
static std::unique_ptr<ProtobufSerializer> create(
|
||||||
|
const Strings & column_names,
|
||||||
|
const DataTypes & data_types,
|
||||||
|
const google::protobuf::Descriptor & message_descriptor,
|
||||||
|
bool with_length_delimiter,
|
||||||
|
ProtobufWriter & writer);
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
@ -1,29 +1,11 @@
|
|||||||
#include "ProtobufWriter.h"
|
#include "ProtobufWriter.h"
|
||||||
|
|
||||||
#if USE_PROTOBUF
|
#if USE_PROTOBUF
|
||||||
# include <cassert>
|
# include <IO/WriteHelpers.h>
|
||||||
# include <optional>
|
|
||||||
# include <math.h>
|
|
||||||
# include <AggregateFunctions/IAggregateFunction.h>
|
|
||||||
# include <DataTypes/DataTypesDecimal.h>
|
|
||||||
# include <IO/ReadHelpers.h>
|
|
||||||
# include <IO/WriteHelpers.h>
|
|
||||||
# include <boost/numeric/conversion/cast.hpp>
|
|
||||||
# include <google/protobuf/descriptor.h>
|
|
||||||
# include <google/protobuf/descriptor.pb.h>
|
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
namespace ErrorCodes
|
|
||||||
{
|
|
||||||
extern const int NOT_IMPLEMENTED;
|
|
||||||
extern const int NO_DATA_FOR_REQUIRED_PROTOBUF_FIELD;
|
|
||||||
extern const int PROTOBUF_BAD_CAST;
|
|
||||||
extern const int PROTOBUF_FIELD_NOT_REPEATED;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
constexpr size_t MAX_VARINT_SIZE = 10;
|
constexpr size_t MAX_VARINT_SIZE = 10;
|
||||||
@ -81,66 +63,24 @@ namespace
|
|||||||
}
|
}
|
||||||
|
|
||||||
void writeFieldNumber(UInt32 field_number, WireType wire_type, PODArray<UInt8> & buf) { writeVarint((field_number << 3) | wire_type, buf); }
|
void writeFieldNumber(UInt32 field_number, WireType wire_type, PODArray<UInt8> & buf) { writeVarint((field_number << 3) | wire_type, buf); }
|
||||||
|
|
||||||
// Should we pack repeated values while storing them.
|
|
||||||
// It depends on type of the field in the protobuf schema and the syntax of that schema.
|
|
||||||
bool shouldPackRepeated(const google::protobuf::FieldDescriptor * field)
|
|
||||||
{
|
|
||||||
if (!field->is_repeated())
|
|
||||||
return false;
|
|
||||||
switch (field->type())
|
|
||||||
{
|
|
||||||
case google::protobuf::FieldDescriptor::TYPE_INT32:
|
|
||||||
case google::protobuf::FieldDescriptor::TYPE_UINT32:
|
|
||||||
case google::protobuf::FieldDescriptor::TYPE_SINT32:
|
|
||||||
case google::protobuf::FieldDescriptor::TYPE_INT64:
|
|
||||||
case google::protobuf::FieldDescriptor::TYPE_UINT64:
|
|
||||||
case google::protobuf::FieldDescriptor::TYPE_SINT64:
|
|
||||||
case google::protobuf::FieldDescriptor::TYPE_FIXED32:
|
|
||||||
case google::protobuf::FieldDescriptor::TYPE_SFIXED32:
|
|
||||||
case google::protobuf::FieldDescriptor::TYPE_FIXED64:
|
|
||||||
case google::protobuf::FieldDescriptor::TYPE_SFIXED64:
|
|
||||||
case google::protobuf::FieldDescriptor::TYPE_FLOAT:
|
|
||||||
case google::protobuf::FieldDescriptor::TYPE_DOUBLE:
|
|
||||||
case google::protobuf::FieldDescriptor::TYPE_BOOL:
|
|
||||||
case google::protobuf::FieldDescriptor::TYPE_ENUM:
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (field->options().has_packed())
|
|
||||||
return field->options().packed();
|
|
||||||
return field->file()->syntax() == google::protobuf::FileDescriptor::SYNTAX_PROTO3;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Should we omit null values (zero for numbers / empty string for strings) while storing them.
|
|
||||||
bool shouldSkipNullValue(const google::protobuf::FieldDescriptor * field)
|
|
||||||
{
|
|
||||||
return field->is_optional() && (field->file()->syntax() == google::protobuf::FileDescriptor::SYNTAX_PROTO3);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// SimpleWriter is an utility class to serialize protobufs.
|
ProtobufWriter::ProtobufWriter(WriteBuffer & out_)
|
||||||
// Knows nothing about protobuf schemas, just provides useful functions to serialize data.
|
|
||||||
ProtobufWriter::SimpleWriter::SimpleWriter(WriteBuffer & out_, const bool use_length_delimiters_)
|
|
||||||
: out(out_)
|
: out(out_)
|
||||||
, current_piece_start(0)
|
|
||||||
, num_bytes_skipped(0)
|
|
||||||
, use_length_delimiters(use_length_delimiters_)
|
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
ProtobufWriter::SimpleWriter::~SimpleWriter() = default;
|
ProtobufWriter::~ProtobufWriter() = default;
|
||||||
|
|
||||||
void ProtobufWriter::SimpleWriter::startMessage()
|
void ProtobufWriter::startMessage()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
void ProtobufWriter::SimpleWriter::endMessage()
|
void ProtobufWriter::endMessage(bool with_length_delimiter)
|
||||||
{
|
{
|
||||||
pieces.emplace_back(current_piece_start, buffer.size());
|
pieces.emplace_back(current_piece_start, buffer.size());
|
||||||
if (use_length_delimiters)
|
if (with_length_delimiter)
|
||||||
{
|
{
|
||||||
size_t size_of_message = buffer.size() - num_bytes_skipped;
|
size_t size_of_message = buffer.size() - num_bytes_skipped;
|
||||||
writeVarint(size_of_message, out);
|
writeVarint(size_of_message, out);
|
||||||
@ -154,7 +94,7 @@ void ProtobufWriter::SimpleWriter::endMessage()
|
|||||||
current_piece_start = 0;
|
current_piece_start = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ProtobufWriter::SimpleWriter::startNestedMessage()
|
void ProtobufWriter::startNestedMessage()
|
||||||
{
|
{
|
||||||
nested_infos.emplace_back(pieces.size(), num_bytes_skipped);
|
nested_infos.emplace_back(pieces.size(), num_bytes_skipped);
|
||||||
pieces.emplace_back(current_piece_start, buffer.size());
|
pieces.emplace_back(current_piece_start, buffer.size());
|
||||||
@ -167,7 +107,7 @@ void ProtobufWriter::SimpleWriter::startNestedMessage()
|
|||||||
num_bytes_skipped = NESTED_MESSAGE_PADDING;
|
num_bytes_skipped = NESTED_MESSAGE_PADDING;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ProtobufWriter::SimpleWriter::endNestedMessage(UInt32 field_number, bool is_group, bool skip_if_empty)
|
void ProtobufWriter::endNestedMessage(int field_number, bool is_group, bool skip_if_empty)
|
||||||
{
|
{
|
||||||
const auto & nested_info = nested_infos.back();
|
const auto & nested_info = nested_infos.back();
|
||||||
size_t num_pieces_at_start = nested_info.num_pieces_at_start;
|
size_t num_pieces_at_start = nested_info.num_pieces_at_start;
|
||||||
@ -203,8 +143,13 @@ void ProtobufWriter::SimpleWriter::endNestedMessage(UInt32 field_number, bool is
|
|||||||
num_bytes_skipped += num_bytes_skipped_at_start - num_bytes_inserted;
|
num_bytes_skipped += num_bytes_skipped_at_start - num_bytes_inserted;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ProtobufWriter::SimpleWriter::writeUInt(UInt32 field_number, UInt64 value)
|
void ProtobufWriter::writeUInt(int field_number, UInt64 value)
|
||||||
{
|
{
|
||||||
|
if (in_repeated_pack)
|
||||||
|
{
|
||||||
|
writeVarint(value, buffer);
|
||||||
|
return;
|
||||||
|
}
|
||||||
size_t old_size = buffer.size();
|
size_t old_size = buffer.size();
|
||||||
buffer.reserve(old_size + 2 * MAX_VARINT_SIZE);
|
buffer.reserve(old_size + 2 * MAX_VARINT_SIZE);
|
||||||
UInt8 * ptr = buffer.data() + old_size;
|
UInt8 * ptr = buffer.data() + old_size;
|
||||||
@ -213,20 +158,27 @@ void ProtobufWriter::SimpleWriter::writeUInt(UInt32 field_number, UInt64 value)
|
|||||||
buffer.resize_assume_reserved(ptr - buffer.data());
|
buffer.resize_assume_reserved(ptr - buffer.data());
|
||||||
}
|
}
|
||||||
|
|
||||||
void ProtobufWriter::SimpleWriter::writeInt(UInt32 field_number, Int64 value)
|
void ProtobufWriter::writeInt(int field_number, Int64 value)
|
||||||
{
|
{
|
||||||
writeUInt(field_number, static_cast<UInt64>(value));
|
writeUInt(field_number, static_cast<UInt64>(value));
|
||||||
}
|
}
|
||||||
|
|
||||||
void ProtobufWriter::SimpleWriter::writeSInt(UInt32 field_number, Int64 value)
|
void ProtobufWriter::writeSInt(int field_number, Int64 value)
|
||||||
{
|
{
|
||||||
writeUInt(field_number, encodeZigZag(value));
|
writeUInt(field_number, encodeZigZag(value));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void ProtobufWriter::SimpleWriter::writeFixed(UInt32 field_number, T value)
|
void ProtobufWriter::writeFixed(int field_number, T value)
|
||||||
{
|
{
|
||||||
static_assert((sizeof(T) == 4) || (sizeof(T) == 8));
|
static_assert((sizeof(T) == 4) || (sizeof(T) == 8));
|
||||||
|
if (in_repeated_pack)
|
||||||
|
{
|
||||||
|
size_t old_size = buffer.size();
|
||||||
|
buffer.resize(old_size + sizeof(T));
|
||||||
|
memcpy(buffer.data() + old_size, &value, sizeof(T));
|
||||||
|
return;
|
||||||
|
}
|
||||||
constexpr WireType wire_type = (sizeof(T) == 4) ? BITS32 : BITS64;
|
constexpr WireType wire_type = (sizeof(T) == 4) ? BITS32 : BITS64;
|
||||||
size_t old_size = buffer.size();
|
size_t old_size = buffer.size();
|
||||||
buffer.reserve(old_size + MAX_VARINT_SIZE + sizeof(T));
|
buffer.reserve(old_size + MAX_VARINT_SIZE + sizeof(T));
|
||||||
@ -237,19 +189,27 @@ void ProtobufWriter::SimpleWriter::writeFixed(UInt32 field_number, T value)
|
|||||||
buffer.resize_assume_reserved(ptr - buffer.data());
|
buffer.resize_assume_reserved(ptr - buffer.data());
|
||||||
}
|
}
|
||||||
|
|
||||||
void ProtobufWriter::SimpleWriter::writeString(UInt32 field_number, const StringRef & str)
|
template void ProtobufWriter::writeFixed<Int32>(int field_number, Int32 value);
|
||||||
|
template void ProtobufWriter::writeFixed<UInt32>(int field_number, UInt32 value);
|
||||||
|
template void ProtobufWriter::writeFixed<Int64>(int field_number, Int64 value);
|
||||||
|
template void ProtobufWriter::writeFixed<UInt64>(int field_number, UInt64 value);
|
||||||
|
template void ProtobufWriter::writeFixed<Float32>(int field_number, Float32 value);
|
||||||
|
template void ProtobufWriter::writeFixed<Float64>(int field_number, Float64 value);
|
||||||
|
|
||||||
|
void ProtobufWriter::writeString(int field_number, const std::string_view & str)
|
||||||
{
|
{
|
||||||
|
size_t length = str.length();
|
||||||
size_t old_size = buffer.size();
|
size_t old_size = buffer.size();
|
||||||
buffer.reserve(old_size + 2 * MAX_VARINT_SIZE + str.size);
|
buffer.reserve(old_size + 2 * MAX_VARINT_SIZE + length);
|
||||||
UInt8 * ptr = buffer.data() + old_size;
|
UInt8 * ptr = buffer.data() + old_size;
|
||||||
ptr = writeFieldNumber(field_number, LENGTH_DELIMITED, ptr);
|
ptr = writeFieldNumber(field_number, LENGTH_DELIMITED, ptr);
|
||||||
ptr = writeVarint(str.size, ptr);
|
ptr = writeVarint(length, ptr);
|
||||||
memcpy(ptr, str.data, str.size);
|
memcpy(ptr, str.data(), length);
|
||||||
ptr += str.size;
|
ptr += length;
|
||||||
buffer.resize_assume_reserved(ptr - buffer.data());
|
buffer.resize_assume_reserved(ptr - buffer.data());
|
||||||
}
|
}
|
||||||
|
|
||||||
void ProtobufWriter::SimpleWriter::startRepeatedPack()
|
void ProtobufWriter::startRepeatedPack()
|
||||||
{
|
{
|
||||||
pieces.emplace_back(current_piece_start, buffer.size());
|
pieces.emplace_back(current_piece_start, buffer.size());
|
||||||
|
|
||||||
@ -259,17 +219,19 @@ void ProtobufWriter::SimpleWriter::startRepeatedPack()
|
|||||||
current_piece_start = buffer.size() + REPEATED_PACK_PADDING;
|
current_piece_start = buffer.size() + REPEATED_PACK_PADDING;
|
||||||
buffer.resize(current_piece_start);
|
buffer.resize(current_piece_start);
|
||||||
num_bytes_skipped += REPEATED_PACK_PADDING;
|
num_bytes_skipped += REPEATED_PACK_PADDING;
|
||||||
|
in_repeated_pack = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ProtobufWriter::SimpleWriter::endRepeatedPack(UInt32 field_number)
|
void ProtobufWriter::endRepeatedPack(int field_number, bool skip_if_empty)
|
||||||
{
|
{
|
||||||
size_t size = buffer.size() - current_piece_start;
|
size_t size = buffer.size() - current_piece_start;
|
||||||
if (!size)
|
if (!size && skip_if_empty)
|
||||||
{
|
{
|
||||||
current_piece_start = pieces.back().start;
|
current_piece_start = pieces.back().start;
|
||||||
buffer.resize(pieces.back().end);
|
buffer.resize(pieces.back().end);
|
||||||
pieces.pop_back();
|
pieces.pop_back();
|
||||||
num_bytes_skipped -= REPEATED_PACK_PADDING;
|
num_bytes_skipped -= REPEATED_PACK_PADDING;
|
||||||
|
in_repeated_pack = false;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
UInt8 * ptr = &buffer[pieces.back().end];
|
UInt8 * ptr = &buffer[pieces.back().end];
|
||||||
@ -278,726 +240,7 @@ void ProtobufWriter::SimpleWriter::endRepeatedPack(UInt32 field_number)
|
|||||||
size_t num_bytes_inserted = endptr - ptr;
|
size_t num_bytes_inserted = endptr - ptr;
|
||||||
pieces.back().end += num_bytes_inserted;
|
pieces.back().end += num_bytes_inserted;
|
||||||
num_bytes_skipped -= num_bytes_inserted;
|
num_bytes_skipped -= num_bytes_inserted;
|
||||||
}
|
in_repeated_pack = false;
|
||||||
|
|
||||||
void ProtobufWriter::SimpleWriter::addUIntToRepeatedPack(UInt64 value)
|
|
||||||
{
|
|
||||||
writeVarint(value, buffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
void ProtobufWriter::SimpleWriter::addIntToRepeatedPack(Int64 value)
|
|
||||||
{
|
|
||||||
writeVarint(static_cast<UInt64>(value), buffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
void ProtobufWriter::SimpleWriter::addSIntToRepeatedPack(Int64 value)
|
|
||||||
{
|
|
||||||
writeVarint(encodeZigZag(value), buffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void ProtobufWriter::SimpleWriter::addFixedToRepeatedPack(T value)
|
|
||||||
{
|
|
||||||
static_assert((sizeof(T) == 4) || (sizeof(T) == 8));
|
|
||||||
size_t old_size = buffer.size();
|
|
||||||
buffer.resize(old_size + sizeof(T));
|
|
||||||
memcpy(buffer.data() + old_size, &value, sizeof(T));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Implementation for a converter from any DB data type to any protobuf field type.
|
|
||||||
class ProtobufWriter::ConverterBaseImpl : public IConverter
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
ConverterBaseImpl(SimpleWriter & simple_writer_, const google::protobuf::FieldDescriptor * field_)
|
|
||||||
: simple_writer(simple_writer_), field(field_)
|
|
||||||
{
|
|
||||||
field_number = field->number();
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual void writeString(const StringRef &) override { cannotConvertType("String"); }
|
|
||||||
virtual void writeInt8(Int8) override { cannotConvertType("Int8"); }
|
|
||||||
virtual void writeUInt8(UInt8) override { cannotConvertType("UInt8"); }
|
|
||||||
virtual void writeInt16(Int16) override { cannotConvertType("Int16"); }
|
|
||||||
virtual void writeUInt16(UInt16) override { cannotConvertType("UInt16"); }
|
|
||||||
virtual void writeInt32(Int32) override { cannotConvertType("Int32"); }
|
|
||||||
virtual void writeUInt32(UInt32) override { cannotConvertType("UInt32"); }
|
|
||||||
virtual void writeInt64(Int64) override { cannotConvertType("Int64"); }
|
|
||||||
virtual void writeUInt64(UInt64) override { cannotConvertType("UInt64"); }
|
|
||||||
virtual void writeInt128(Int128) override { cannotConvertType("Int128"); }
|
|
||||||
virtual void writeUInt128(const UInt128 &) override { cannotConvertType("UInt128"); }
|
|
||||||
virtual void writeInt256(const Int256 &) override { cannotConvertType("Int256"); }
|
|
||||||
virtual void writeUInt256(const UInt256 &) override { cannotConvertType("UInt256"); }
|
|
||||||
virtual void writeFloat32(Float32) override { cannotConvertType("Float32"); }
|
|
||||||
virtual void writeFloat64(Float64) override { cannotConvertType("Float64"); }
|
|
||||||
virtual void prepareEnumMapping8(const std::vector<std::pair<std::string, Int8>> &) override {}
|
|
||||||
virtual void prepareEnumMapping16(const std::vector<std::pair<std::string, Int16>> &) override {}
|
|
||||||
virtual void writeEnum8(Int8) override { cannotConvertType("Enum"); }
|
|
||||||
virtual void writeEnum16(Int16) override { cannotConvertType("Enum"); }
|
|
||||||
virtual void writeUUID(const UUID &) override { cannotConvertType("UUID"); }
|
|
||||||
virtual void writeDate(DayNum) override { cannotConvertType("Date"); }
|
|
||||||
virtual void writeDateTime(time_t) override { cannotConvertType("DateTime"); }
|
|
||||||
virtual void writeDateTime64(DateTime64, UInt32) override { cannotConvertType("DateTime64"); }
|
|
||||||
virtual void writeDecimal32(Decimal32, UInt32) override { cannotConvertType("Decimal32"); }
|
|
||||||
virtual void writeDecimal64(Decimal64, UInt32) override { cannotConvertType("Decimal64"); }
|
|
||||||
virtual void writeDecimal128(const Decimal128 &, UInt32) override { cannotConvertType("Decimal128"); }
|
|
||||||
virtual void writeDecimal256(const Decimal256 &, UInt32) override { cannotConvertType("Decimal256"); }
|
|
||||||
|
|
||||||
virtual void writeAggregateFunction(const AggregateFunctionPtr &, ConstAggregateDataPtr) override { cannotConvertType("AggregateFunction"); }
|
|
||||||
|
|
||||||
protected:
|
|
||||||
[[noreturn]] void cannotConvertType(const String & type_name)
|
|
||||||
{
|
|
||||||
throw Exception(
|
|
||||||
"Could not convert data type '" + type_name + "' to protobuf type '" + field->type_name() + "' (field: " + field->name() + ")",
|
|
||||||
ErrorCodes::PROTOBUF_BAD_CAST);
|
|
||||||
}
|
|
||||||
|
|
||||||
[[noreturn]] void cannotConvertValue(const String & value)
|
|
||||||
{
|
|
||||||
throw Exception(
|
|
||||||
"Could not convert value '" + value + "' to protobuf type '" + field->type_name() + "' (field: " + field->name() + ")",
|
|
||||||
ErrorCodes::PROTOBUF_BAD_CAST);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename To, typename From>
|
|
||||||
To numericCast(From value)
|
|
||||||
{
|
|
||||||
if constexpr (std::is_same_v<To, From>)
|
|
||||||
return value;
|
|
||||||
To result;
|
|
||||||
try
|
|
||||||
{
|
|
||||||
result = boost::numeric_cast<To>(value);
|
|
||||||
}
|
|
||||||
catch (boost::numeric::bad_numeric_cast &)
|
|
||||||
{
|
|
||||||
cannotConvertValue(toString(value));
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename To>
|
|
||||||
To parseFromString(const StringRef & str)
|
|
||||||
{
|
|
||||||
To result;
|
|
||||||
try
|
|
||||||
{
|
|
||||||
result = ::DB::parse<To>(str.data, str.size);
|
|
||||||
}
|
|
||||||
catch (...)
|
|
||||||
{
|
|
||||||
cannotConvertValue(str.toString());
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
SimpleWriter & simple_writer;
|
|
||||||
const google::protobuf::FieldDescriptor * field;
|
|
||||||
UInt32 field_number;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
template <bool skip_null_value>
|
|
||||||
class ProtobufWriter::ConverterToString : public ConverterBaseImpl
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
using ConverterBaseImpl::ConverterBaseImpl;
|
|
||||||
|
|
||||||
void writeString(const StringRef & str) override { writeField(str); }
|
|
||||||
|
|
||||||
void writeInt8(Int8 value) override { convertToStringAndWriteField(value); }
|
|
||||||
void writeUInt8(UInt8 value) override { convertToStringAndWriteField(value); }
|
|
||||||
void writeInt16(Int16 value) override { convertToStringAndWriteField(value); }
|
|
||||||
void writeUInt16(UInt16 value) override { convertToStringAndWriteField(value); }
|
|
||||||
void writeInt32(Int32 value) override { convertToStringAndWriteField(value); }
|
|
||||||
void writeUInt32(UInt32 value) override { convertToStringAndWriteField(value); }
|
|
||||||
void writeInt64(Int64 value) override { convertToStringAndWriteField(value); }
|
|
||||||
void writeUInt64(UInt64 value) override { convertToStringAndWriteField(value); }
|
|
||||||
void writeFloat32(Float32 value) override { convertToStringAndWriteField(value); }
|
|
||||||
void writeFloat64(Float64 value) override { convertToStringAndWriteField(value); }
|
|
||||||
|
|
||||||
void prepareEnumMapping8(const std::vector<std::pair<String, Int8>> & name_value_pairs) override
|
|
||||||
{
|
|
||||||
prepareEnumValueToNameMap(name_value_pairs);
|
|
||||||
}
|
|
||||||
void prepareEnumMapping16(const std::vector<std::pair<String, Int16>> & name_value_pairs) override
|
|
||||||
{
|
|
||||||
prepareEnumValueToNameMap(name_value_pairs);
|
|
||||||
}
|
|
||||||
|
|
||||||
void writeEnum8(Int8 value) override { writeEnum16(value); }
|
|
||||||
|
|
||||||
void writeEnum16(Int16 value) override
|
|
||||||
{
|
|
||||||
auto it = enum_value_to_name_map->find(value);
|
|
||||||
if (it == enum_value_to_name_map->end())
|
|
||||||
cannotConvertValue(toString(value));
|
|
||||||
writeField(it->second);
|
|
||||||
}
|
|
||||||
|
|
||||||
void writeUUID(const UUID & uuid) override { convertToStringAndWriteField(uuid); }
|
|
||||||
void writeDate(DayNum date) override { convertToStringAndWriteField(date); }
|
|
||||||
|
|
||||||
void writeDateTime(time_t tm) override
|
|
||||||
{
|
|
||||||
writeDateTimeText(tm, text_buffer);
|
|
||||||
writeField(text_buffer.stringRef());
|
|
||||||
text_buffer.restart();
|
|
||||||
}
|
|
||||||
|
|
||||||
void writeDateTime64(DateTime64 date_time, UInt32 scale) override
|
|
||||||
{
|
|
||||||
writeDateTimeText(date_time, scale, text_buffer);
|
|
||||||
writeField(text_buffer.stringRef());
|
|
||||||
text_buffer.restart();
|
|
||||||
}
|
|
||||||
|
|
||||||
void writeDecimal32(Decimal32 decimal, UInt32 scale) override { writeDecimal(decimal, scale); }
|
|
||||||
void writeDecimal64(Decimal64 decimal, UInt32 scale) override { writeDecimal(decimal, scale); }
|
|
||||||
void writeDecimal128(const Decimal128 & decimal, UInt32 scale) override { writeDecimal(decimal, scale); }
|
|
||||||
|
|
||||||
void writeAggregateFunction(const AggregateFunctionPtr & function, ConstAggregateDataPtr place) override
|
|
||||||
{
|
|
||||||
function->serialize(place, text_buffer);
|
|
||||||
writeField(text_buffer.stringRef());
|
|
||||||
text_buffer.restart();
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
template <typename T>
|
|
||||||
void convertToStringAndWriteField(T value)
|
|
||||||
{
|
|
||||||
writeText(value, text_buffer);
|
|
||||||
writeField(text_buffer.stringRef());
|
|
||||||
text_buffer.restart();
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void writeDecimal(const Decimal<T> & decimal, UInt32 scale)
|
|
||||||
{
|
|
||||||
writeText(decimal, scale, text_buffer);
|
|
||||||
writeField(text_buffer.stringRef());
|
|
||||||
text_buffer.restart();
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void prepareEnumValueToNameMap(const std::vector<std::pair<String, T>> & name_value_pairs)
|
|
||||||
{
|
|
||||||
if (enum_value_to_name_map.has_value())
|
|
||||||
return;
|
|
||||||
enum_value_to_name_map.emplace();
|
|
||||||
for (const auto & name_value_pair : name_value_pairs)
|
|
||||||
enum_value_to_name_map->emplace(name_value_pair.second, name_value_pair.first);
|
|
||||||
}
|
|
||||||
|
|
||||||
void writeField(const StringRef & str)
|
|
||||||
{
|
|
||||||
if constexpr (skip_null_value)
|
|
||||||
{
|
|
||||||
if (!str.size)
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
simple_writer.writeString(field_number, str);
|
|
||||||
}
|
|
||||||
|
|
||||||
WriteBufferFromOwnString text_buffer;
|
|
||||||
std::optional<std::unordered_map<Int16, String>> enum_value_to_name_map;
|
|
||||||
};
|
|
||||||
|
|
||||||
# define PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_STRINGS(field_type_id) \
|
|
||||||
template <> \
|
|
||||||
std::unique_ptr<ProtobufWriter::IConverter> ProtobufWriter::createConverter<field_type_id>( \
|
|
||||||
const google::protobuf::FieldDescriptor * field) \
|
|
||||||
{ \
|
|
||||||
if (shouldSkipNullValue(field)) \
|
|
||||||
return std::make_unique<ConverterToString<true>>(simple_writer, field); \
|
|
||||||
else \
|
|
||||||
return std::make_unique<ConverterToString<false>>(simple_writer, field); \
|
|
||||||
}
|
|
||||||
PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_STRINGS(google::protobuf::FieldDescriptor::TYPE_STRING)
|
|
||||||
PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_STRINGS(google::protobuf::FieldDescriptor::TYPE_BYTES)
|
|
||||||
# undef PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_STRINGS
|
|
||||||
|
|
||||||
|
|
||||||
template <int field_type_id, typename ToType, bool skip_null_value, bool pack_repeated>
|
|
||||||
class ProtobufWriter::ConverterToNumber : public ConverterBaseImpl
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
using ConverterBaseImpl::ConverterBaseImpl;
|
|
||||||
|
|
||||||
void writeString(const StringRef & str) override { writeField(parseFromString<ToType>(str)); }
|
|
||||||
|
|
||||||
void writeInt8(Int8 value) override { castNumericAndWriteField(value); }
|
|
||||||
void writeUInt8(UInt8 value) override { castNumericAndWriteField(value); }
|
|
||||||
void writeInt16(Int16 value) override { castNumericAndWriteField(value); }
|
|
||||||
void writeUInt16(UInt16 value) override { castNumericAndWriteField(value); }
|
|
||||||
void writeInt32(Int32 value) override { castNumericAndWriteField(value); }
|
|
||||||
void writeUInt32(UInt32 value) override { castNumericAndWriteField(value); }
|
|
||||||
void writeInt64(Int64 value) override { castNumericAndWriteField(value); }
|
|
||||||
void writeUInt64(UInt64 value) override { castNumericAndWriteField(value); }
|
|
||||||
void writeFloat32(Float32 value) override { castNumericAndWriteField(value); }
|
|
||||||
void writeFloat64(Float64 value) override { castNumericAndWriteField(value); }
|
|
||||||
|
|
||||||
void writeEnum8(Int8 value) override { writeEnum16(value); }
|
|
||||||
|
|
||||||
void writeEnum16(Int16 value) override
|
|
||||||
{
|
|
||||||
if constexpr (!is_integer_v<ToType>)
|
|
||||||
cannotConvertType("Enum"); // It's not correct to convert enum to floating point.
|
|
||||||
castNumericAndWriteField(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
void writeDate(DayNum date) override { castNumericAndWriteField(static_cast<UInt16>(date)); }
|
|
||||||
void writeDateTime(time_t tm) override { castNumericAndWriteField(tm); }
|
|
||||||
void writeDateTime64(DateTime64 date_time, UInt32 scale) override { writeDecimal(date_time, scale); }
|
|
||||||
void writeDecimal32(Decimal32 decimal, UInt32 scale) override { writeDecimal(decimal, scale); }
|
|
||||||
void writeDecimal64(Decimal64 decimal, UInt32 scale) override { writeDecimal(decimal, scale); }
|
|
||||||
void writeDecimal128(const Decimal128 & decimal, UInt32 scale) override { writeDecimal(decimal, scale); }
|
|
||||||
|
|
||||||
private:
|
|
||||||
template <typename FromType>
|
|
||||||
void castNumericAndWriteField(FromType value)
|
|
||||||
{
|
|
||||||
writeField(numericCast<ToType>(value));
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename S>
|
|
||||||
void writeDecimal(const Decimal<S> & decimal, UInt32 scale)
|
|
||||||
{
|
|
||||||
castNumericAndWriteField(DecimalUtils::convertTo<ToType>(decimal, scale));
|
|
||||||
}
|
|
||||||
|
|
||||||
void writeField(ToType value)
|
|
||||||
{
|
|
||||||
if constexpr (skip_null_value)
|
|
||||||
{
|
|
||||||
if (value == 0)
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if constexpr (((field_type_id == google::protobuf::FieldDescriptor::TYPE_INT32) && std::is_same_v<ToType, Int32>)
|
|
||||||
|| ((field_type_id == google::protobuf::FieldDescriptor::TYPE_INT64) && std::is_same_v<ToType, Int64>))
|
|
||||||
{
|
|
||||||
if constexpr (pack_repeated)
|
|
||||||
simple_writer.addIntToRepeatedPack(value);
|
|
||||||
else
|
|
||||||
simple_writer.writeInt(field_number, value);
|
|
||||||
}
|
|
||||||
else if constexpr (((field_type_id == google::protobuf::FieldDescriptor::TYPE_SINT32) && std::is_same_v<ToType, Int32>)
|
|
||||||
|| ((field_type_id == google::protobuf::FieldDescriptor::TYPE_SINT64) && std::is_same_v<ToType, Int64>))
|
|
||||||
{
|
|
||||||
if constexpr (pack_repeated)
|
|
||||||
simple_writer.addSIntToRepeatedPack(value);
|
|
||||||
else
|
|
||||||
simple_writer.writeSInt(field_number, value);
|
|
||||||
}
|
|
||||||
else if constexpr (((field_type_id == google::protobuf::FieldDescriptor::TYPE_UINT32) && std::is_same_v<ToType, UInt32>)
|
|
||||||
|| ((field_type_id == google::protobuf::FieldDescriptor::TYPE_UINT64) && std::is_same_v<ToType, UInt64>))
|
|
||||||
{
|
|
||||||
if constexpr (pack_repeated)
|
|
||||||
simple_writer.addUIntToRepeatedPack(value);
|
|
||||||
else
|
|
||||||
simple_writer.writeUInt(field_number, value);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
static_assert(((field_type_id == google::protobuf::FieldDescriptor::TYPE_FIXED32) && std::is_same_v<ToType, UInt32>)
|
|
||||||
|| ((field_type_id == google::protobuf::FieldDescriptor::TYPE_SFIXED32) && std::is_same_v<ToType, Int32>)
|
|
||||||
|| ((field_type_id == google::protobuf::FieldDescriptor::TYPE_FIXED64) && std::is_same_v<ToType, UInt64>)
|
|
||||||
|| ((field_type_id == google::protobuf::FieldDescriptor::TYPE_SFIXED64) && std::is_same_v<ToType, Int64>)
|
|
||||||
|| ((field_type_id == google::protobuf::FieldDescriptor::TYPE_FLOAT) && std::is_same_v<ToType, float>)
|
|
||||||
|| ((field_type_id == google::protobuf::FieldDescriptor::TYPE_DOUBLE) && std::is_same_v<ToType, double>));
|
|
||||||
if constexpr (pack_repeated)
|
|
||||||
simple_writer.addFixedToRepeatedPack(value);
|
|
||||||
else
|
|
||||||
simple_writer.writeFixed(field_number, value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
# define PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(field_type_id, field_type) \
|
|
||||||
template <> \
|
|
||||||
std::unique_ptr<ProtobufWriter::IConverter> ProtobufWriter::createConverter<field_type_id>( \
|
|
||||||
const google::protobuf::FieldDescriptor * field) \
|
|
||||||
{ \
|
|
||||||
if (shouldSkipNullValue(field)) \
|
|
||||||
return std::make_unique<ConverterToNumber<field_type_id, field_type, true, false>>(simple_writer, field); \
|
|
||||||
else if (shouldPackRepeated(field)) \
|
|
||||||
return std::make_unique<ConverterToNumber<field_type_id, field_type, false, true>>(simple_writer, field); \
|
|
||||||
else \
|
|
||||||
return std::make_unique<ConverterToNumber<field_type_id, field_type, false, false>>(simple_writer, field); \
|
|
||||||
}
|
|
||||||
|
|
||||||
PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_INT32, Int32);
|
|
||||||
PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_SINT32, Int32);
|
|
||||||
PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_UINT32, UInt32);
|
|
||||||
PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_INT64, Int64);
|
|
||||||
PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_SINT64, Int64);
|
|
||||||
PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_UINT64, UInt64);
|
|
||||||
PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_FIXED32, UInt32);
|
|
||||||
PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_SFIXED32, Int32);
|
|
||||||
PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_FIXED64, UInt64);
|
|
||||||
PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_SFIXED64, Int64);
|
|
||||||
PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_FLOAT, float);
|
|
||||||
PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_DOUBLE, double);
|
|
||||||
# undef PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS
|
|
||||||
|
|
||||||
|
|
||||||
template <bool skip_null_value, bool pack_repeated>
|
|
||||||
class ProtobufWriter::ConverterToBool : public ConverterBaseImpl
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
using ConverterBaseImpl::ConverterBaseImpl;
|
|
||||||
|
|
||||||
void writeString(const StringRef & str) override
|
|
||||||
{
|
|
||||||
if (str == "true")
|
|
||||||
writeField(true);
|
|
||||||
else if (str == "false")
|
|
||||||
writeField(false);
|
|
||||||
else
|
|
||||||
cannotConvertValue(str.toString());
|
|
||||||
}
|
|
||||||
|
|
||||||
void writeInt8(Int8 value) override { convertToBoolAndWriteField(value); }
|
|
||||||
void writeUInt8(UInt8 value) override { convertToBoolAndWriteField(value); }
|
|
||||||
void writeInt16(Int16 value) override { convertToBoolAndWriteField(value); }
|
|
||||||
void writeUInt16(UInt16 value) override { convertToBoolAndWriteField(value); }
|
|
||||||
void writeInt32(Int32 value) override { convertToBoolAndWriteField(value); }
|
|
||||||
void writeUInt32(UInt32 value) override { convertToBoolAndWriteField(value); }
|
|
||||||
void writeInt64(Int64 value) override { convertToBoolAndWriteField(value); }
|
|
||||||
void writeUInt64(UInt64 value) override { convertToBoolAndWriteField(value); }
|
|
||||||
void writeFloat32(Float32 value) override { convertToBoolAndWriteField(value); }
|
|
||||||
void writeFloat64(Float64 value) override { convertToBoolAndWriteField(value); }
|
|
||||||
void writeDecimal32(Decimal32 decimal, UInt32) override { convertToBoolAndWriteField(decimal.value); }
|
|
||||||
void writeDecimal64(Decimal64 decimal, UInt32) override { convertToBoolAndWriteField(decimal.value); }
|
|
||||||
void writeDecimal128(const Decimal128 & decimal, UInt32) override { convertToBoolAndWriteField(decimal.value); }
|
|
||||||
|
|
||||||
private:
|
|
||||||
template <typename T>
|
|
||||||
void convertToBoolAndWriteField(T value)
|
|
||||||
{
|
|
||||||
writeField(static_cast<bool>(value));
|
|
||||||
}
|
|
||||||
|
|
||||||
void writeField(bool b)
|
|
||||||
{
|
|
||||||
if constexpr (skip_null_value)
|
|
||||||
{
|
|
||||||
if (!b)
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if constexpr (pack_repeated)
|
|
||||||
simple_writer.addUIntToRepeatedPack(b);
|
|
||||||
else
|
|
||||||
simple_writer.writeUInt(field_number, b);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template <>
|
|
||||||
std::unique_ptr<ProtobufWriter::IConverter> ProtobufWriter::createConverter<google::protobuf::FieldDescriptor::TYPE_BOOL>(
|
|
||||||
const google::protobuf::FieldDescriptor * field)
|
|
||||||
{
|
|
||||||
if (shouldSkipNullValue(field))
|
|
||||||
return std::make_unique<ConverterToBool<true, false>>(simple_writer, field);
|
|
||||||
else if (shouldPackRepeated(field))
|
|
||||||
return std::make_unique<ConverterToBool<false, true>>(simple_writer, field);
|
|
||||||
else
|
|
||||||
return std::make_unique<ConverterToBool<false, false>>(simple_writer, field);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <bool skip_null_value, bool pack_repeated>
|
|
||||||
class ProtobufWriter::ConverterToEnum : public ConverterBaseImpl
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
using ConverterBaseImpl::ConverterBaseImpl;
|
|
||||||
|
|
||||||
void writeString(const StringRef & str) override
|
|
||||||
{
|
|
||||||
prepareEnumNameToPbNumberMap();
|
|
||||||
auto it = enum_name_to_pbnumber_map->find(str);
|
|
||||||
if (it == enum_name_to_pbnumber_map->end())
|
|
||||||
cannotConvertValue(str.toString());
|
|
||||||
writeField(it->second);
|
|
||||||
}
|
|
||||||
|
|
||||||
void writeInt8(Int8 value) override { convertToEnumAndWriteField(value); }
|
|
||||||
void writeUInt8(UInt8 value) override { convertToEnumAndWriteField(value); }
|
|
||||||
void writeInt16(Int16 value) override { convertToEnumAndWriteField(value); }
|
|
||||||
void writeUInt16(UInt16 value) override { convertToEnumAndWriteField(value); }
|
|
||||||
void writeInt32(Int32 value) override { convertToEnumAndWriteField(value); }
|
|
||||||
void writeUInt32(UInt32 value) override { convertToEnumAndWriteField(value); }
|
|
||||||
void writeInt64(Int64 value) override { convertToEnumAndWriteField(value); }
|
|
||||||
void writeUInt64(UInt64 value) override { convertToEnumAndWriteField(value); }
|
|
||||||
|
|
||||||
void prepareEnumMapping8(const std::vector<std::pair<String, Int8>> & name_value_pairs) override
|
|
||||||
{
|
|
||||||
prepareEnumValueToPbNumberMap(name_value_pairs);
|
|
||||||
}
|
|
||||||
void prepareEnumMapping16(const std::vector<std::pair<String, Int16>> & name_value_pairs) override
|
|
||||||
{
|
|
||||||
prepareEnumValueToPbNumberMap(name_value_pairs);
|
|
||||||
}
|
|
||||||
|
|
||||||
void writeEnum8(Int8 value) override { writeEnum16(value); }
|
|
||||||
|
|
||||||
void writeEnum16(Int16 value) override
|
|
||||||
{
|
|
||||||
int pbnumber;
|
|
||||||
if (enum_value_always_equals_pbnumber)
|
|
||||||
pbnumber = value;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
auto it = enum_value_to_pbnumber_map->find(value);
|
|
||||||
if (it == enum_value_to_pbnumber_map->end())
|
|
||||||
cannotConvertValue(toString(value));
|
|
||||||
pbnumber = it->second;
|
|
||||||
}
|
|
||||||
writeField(pbnumber);
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
template <typename T>
|
|
||||||
void convertToEnumAndWriteField(T value)
|
|
||||||
{
|
|
||||||
const auto * enum_descriptor = field->enum_type()->FindValueByNumber(numericCast<int>(value));
|
|
||||||
if (!enum_descriptor)
|
|
||||||
cannotConvertValue(toString(value));
|
|
||||||
writeField(enum_descriptor->number());
|
|
||||||
}
|
|
||||||
|
|
||||||
void prepareEnumNameToPbNumberMap()
|
|
||||||
{
|
|
||||||
if (enum_name_to_pbnumber_map.has_value())
|
|
||||||
return;
|
|
||||||
enum_name_to_pbnumber_map.emplace();
|
|
||||||
const auto * enum_type = field->enum_type();
|
|
||||||
for (int i = 0; i != enum_type->value_count(); ++i)
|
|
||||||
{
|
|
||||||
const auto * enum_value = enum_type->value(i);
|
|
||||||
enum_name_to_pbnumber_map->emplace(enum_value->name(), enum_value->number());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void prepareEnumValueToPbNumberMap(const std::vector<std::pair<String, T>> & name_value_pairs)
|
|
||||||
{
|
|
||||||
if (enum_value_to_pbnumber_map.has_value())
|
|
||||||
return;
|
|
||||||
enum_value_to_pbnumber_map.emplace();
|
|
||||||
enum_value_always_equals_pbnumber = true;
|
|
||||||
for (const auto & name_value_pair : name_value_pairs)
|
|
||||||
{
|
|
||||||
Int16 value = name_value_pair.second; // NOLINT
|
|
||||||
const auto * enum_descriptor = field->enum_type()->FindValueByName(name_value_pair.first);
|
|
||||||
if (enum_descriptor)
|
|
||||||
{
|
|
||||||
enum_value_to_pbnumber_map->emplace(value, enum_descriptor->number());
|
|
||||||
if (value != enum_descriptor->number())
|
|
||||||
enum_value_always_equals_pbnumber = false;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
enum_value_always_equals_pbnumber = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void writeField(int enum_pbnumber)
|
|
||||||
{
|
|
||||||
if constexpr (skip_null_value)
|
|
||||||
{
|
|
||||||
if (!enum_pbnumber)
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if constexpr (pack_repeated)
|
|
||||||
simple_writer.addUIntToRepeatedPack(enum_pbnumber);
|
|
||||||
else
|
|
||||||
simple_writer.writeUInt(field_number, enum_pbnumber);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::optional<std::unordered_map<StringRef, int>> enum_name_to_pbnumber_map;
|
|
||||||
std::optional<std::unordered_map<Int16, int>> enum_value_to_pbnumber_map;
|
|
||||||
bool enum_value_always_equals_pbnumber;
|
|
||||||
};
|
|
||||||
|
|
||||||
template <>
|
|
||||||
std::unique_ptr<ProtobufWriter::IConverter> ProtobufWriter::createConverter<google::protobuf::FieldDescriptor::TYPE_ENUM>(
|
|
||||||
const google::protobuf::FieldDescriptor * field)
|
|
||||||
{
|
|
||||||
if (shouldSkipNullValue(field))
|
|
||||||
return std::make_unique<ConverterToEnum<true, false>>(simple_writer, field);
|
|
||||||
else if (shouldPackRepeated(field))
|
|
||||||
return std::make_unique<ConverterToEnum<false, true>>(simple_writer, field);
|
|
||||||
else
|
|
||||||
return std::make_unique<ConverterToEnum<false, false>>(simple_writer, field);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
ProtobufWriter::ProtobufWriter(
|
|
||||||
WriteBuffer & out, const google::protobuf::Descriptor * message_type, const std::vector<String> & column_names, const bool use_length_delimiters_)
|
|
||||||
: simple_writer(out, use_length_delimiters_)
|
|
||||||
{
|
|
||||||
std::vector<const google::protobuf::FieldDescriptor *> field_descriptors_without_match;
|
|
||||||
root_message = ProtobufColumnMatcher::matchColumns<ColumnMatcherTraits>(column_names, message_type, field_descriptors_without_match);
|
|
||||||
for (const auto * field_descriptor_without_match : field_descriptors_without_match)
|
|
||||||
{
|
|
||||||
if (field_descriptor_without_match->is_required())
|
|
||||||
throw Exception(
|
|
||||||
"Output doesn't have a column named '" + field_descriptor_without_match->name()
|
|
||||||
+ "' which is required to write the output in the protobuf format.",
|
|
||||||
ErrorCodes::NO_DATA_FOR_REQUIRED_PROTOBUF_FIELD);
|
|
||||||
}
|
|
||||||
setTraitsDataAfterMatchingColumns(root_message.get());
|
|
||||||
}
|
|
||||||
|
|
||||||
ProtobufWriter::~ProtobufWriter() = default;
|
|
||||||
|
|
||||||
void ProtobufWriter::setTraitsDataAfterMatchingColumns(Message * message)
|
|
||||||
{
|
|
||||||
Field * parent_field = message->parent ? &message->parent->fields[message->index_in_parent] : nullptr;
|
|
||||||
message->data.parent_field_number = parent_field ? parent_field->field_number : 0;
|
|
||||||
message->data.is_required = parent_field && parent_field->data.is_required;
|
|
||||||
|
|
||||||
if (parent_field && parent_field->data.is_repeatable)
|
|
||||||
message->data.repeatable_container_message = message;
|
|
||||||
else if (message->parent)
|
|
||||||
message->data.repeatable_container_message = message->parent->data.repeatable_container_message;
|
|
||||||
else
|
|
||||||
message->data.repeatable_container_message = nullptr;
|
|
||||||
|
|
||||||
message->data.is_group = parent_field && (parent_field->field_descriptor->type() == google::protobuf::FieldDescriptor::TYPE_GROUP);
|
|
||||||
|
|
||||||
for (auto & field : message->fields)
|
|
||||||
{
|
|
||||||
field.data.is_repeatable = field.field_descriptor->is_repeated();
|
|
||||||
field.data.is_required = field.field_descriptor->is_required();
|
|
||||||
field.data.repeatable_container_message = message->data.repeatable_container_message;
|
|
||||||
field.data.should_pack_repeated = shouldPackRepeated(field.field_descriptor);
|
|
||||||
|
|
||||||
if (field.nested_message)
|
|
||||||
{
|
|
||||||
setTraitsDataAfterMatchingColumns(field.nested_message.get());
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
switch (field.field_descriptor->type())
|
|
||||||
{
|
|
||||||
# define PROTOBUF_WRITER_CONVERTER_CREATING_CASE(field_type_id) \
|
|
||||||
case field_type_id: \
|
|
||||||
field.data.converter = createConverter<field_type_id>(field.field_descriptor); \
|
|
||||||
break
|
|
||||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_STRING);
|
|
||||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_BYTES);
|
|
||||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_INT32);
|
|
||||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_SINT32);
|
|
||||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_UINT32);
|
|
||||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_FIXED32);
|
|
||||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_SFIXED32);
|
|
||||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_INT64);
|
|
||||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_SINT64);
|
|
||||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_UINT64);
|
|
||||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_FIXED64);
|
|
||||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_SFIXED64);
|
|
||||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_FLOAT);
|
|
||||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_DOUBLE);
|
|
||||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_BOOL);
|
|
||||||
PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_ENUM);
|
|
||||||
# undef PROTOBUF_WRITER_CONVERTER_CREATING_CASE
|
|
||||||
default:
|
|
||||||
throw Exception(
|
|
||||||
String("Protobuf type '") + field.field_descriptor->type_name() + "' isn't supported", ErrorCodes::NOT_IMPLEMENTED);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void ProtobufWriter::startMessage()
|
|
||||||
{
|
|
||||||
current_message = root_message.get();
|
|
||||||
current_field_index = 0;
|
|
||||||
simple_writer.startMessage();
|
|
||||||
}
|
|
||||||
|
|
||||||
void ProtobufWriter::endMessage()
|
|
||||||
{
|
|
||||||
if (!current_message)
|
|
||||||
return;
|
|
||||||
endWritingField();
|
|
||||||
while (current_message->parent)
|
|
||||||
{
|
|
||||||
simple_writer.endNestedMessage(
|
|
||||||
current_message->data.parent_field_number, current_message->data.is_group, !current_message->data.is_required);
|
|
||||||
current_message = current_message->parent;
|
|
||||||
}
|
|
||||||
simple_writer.endMessage();
|
|
||||||
current_message = nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool ProtobufWriter::writeField(size_t & column_index)
|
|
||||||
{
|
|
||||||
endWritingField();
|
|
||||||
while (true)
|
|
||||||
{
|
|
||||||
if (current_field_index < current_message->fields.size())
|
|
||||||
{
|
|
||||||
Field & field = current_message->fields[current_field_index];
|
|
||||||
if (!field.nested_message)
|
|
||||||
{
|
|
||||||
current_field = ¤t_message->fields[current_field_index];
|
|
||||||
current_converter = current_field->data.converter.get();
|
|
||||||
column_index = current_field->column_index;
|
|
||||||
if (current_field->data.should_pack_repeated)
|
|
||||||
simple_writer.startRepeatedPack();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
simple_writer.startNestedMessage();
|
|
||||||
current_message = field.nested_message.get();
|
|
||||||
current_message->data.need_repeat = false;
|
|
||||||
current_field_index = 0;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (current_message->parent)
|
|
||||||
{
|
|
||||||
simple_writer.endNestedMessage(
|
|
||||||
current_message->data.parent_field_number, current_message->data.is_group, !current_message->data.is_required);
|
|
||||||
if (current_message->data.need_repeat)
|
|
||||||
{
|
|
||||||
simple_writer.startNestedMessage();
|
|
||||||
current_message->data.need_repeat = false;
|
|
||||||
current_field_index = 0;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
current_field_index = current_message->index_in_parent + 1;
|
|
||||||
current_message = current_message->parent;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void ProtobufWriter::endWritingField()
|
|
||||||
{
|
|
||||||
if (!current_field)
|
|
||||||
return;
|
|
||||||
if (current_field->data.should_pack_repeated)
|
|
||||||
simple_writer.endRepeatedPack(current_field->field_number);
|
|
||||||
else if ((num_values == 0) && current_field->data.is_required)
|
|
||||||
throw Exception(
|
|
||||||
"No data for the required field '" + current_field->field_descriptor->name() + "'",
|
|
||||||
ErrorCodes::NO_DATA_FOR_REQUIRED_PROTOBUF_FIELD);
|
|
||||||
|
|
||||||
current_field = nullptr;
|
|
||||||
current_converter = nullptr;
|
|
||||||
num_values = 0;
|
|
||||||
++current_field_index;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ProtobufWriter::setNestedMessageNeedsRepeat()
|
|
||||||
{
|
|
||||||
if (current_field->data.repeatable_container_message)
|
|
||||||
current_field->data.repeatable_container_message->data.need_repeat = true;
|
|
||||||
else
|
|
||||||
throw Exception(
|
|
||||||
"Cannot write more than single value to the non-repeated field '" + current_field->field_descriptor->name() + "'",
|
|
||||||
ErrorCodes::PROTOBUF_FIELD_NOT_REPEATED);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,290 +1,68 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <Core/UUID.h>
|
|
||||||
#include <common/DayNum.h>
|
|
||||||
#include <memory>
|
|
||||||
|
|
||||||
#if !defined(ARCADIA_BUILD)
|
#if !defined(ARCADIA_BUILD)
|
||||||
# include "config_formats.h"
|
# include "config_formats.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if USE_PROTOBUF
|
#if USE_PROTOBUF
|
||||||
# include <IO/WriteBufferFromString.h>
|
# include <Core/Types.h>
|
||||||
# include <boost/noncopyable.hpp>
|
# include <Common/PODArray.h>
|
||||||
# include <Common/PODArray.h>
|
|
||||||
# include "ProtobufColumnMatcher.h"
|
|
||||||
|
|
||||||
|
|
||||||
namespace google
|
|
||||||
{
|
|
||||||
namespace protobuf
|
|
||||||
{
|
|
||||||
class Descriptor;
|
|
||||||
class FieldDescriptor;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace DB
|
|
||||||
{
|
|
||||||
class IAggregateFunction;
|
|
||||||
using AggregateFunctionPtr = std::shared_ptr<IAggregateFunction>;
|
|
||||||
using ConstAggregateDataPtr = const char *;
|
|
||||||
|
|
||||||
|
|
||||||
/** Serializes a protobuf, tries to cast types if necessarily.
|
|
||||||
*/
|
|
||||||
class ProtobufWriter : private boost::noncopyable
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
ProtobufWriter(WriteBuffer & out, const google::protobuf::Descriptor * message_type, const std::vector<String> & column_names, const bool use_length_delimiters_);
|
|
||||||
~ProtobufWriter();
|
|
||||||
|
|
||||||
/// Should be called at the beginning of writing a message.
|
|
||||||
void startMessage();
|
|
||||||
|
|
||||||
/// Should be called at the end of writing a message.
|
|
||||||
void endMessage();
|
|
||||||
|
|
||||||
/// Prepares for writing values of a field.
|
|
||||||
/// Returns true and sets 'column_index' to the corresponding column's index.
|
|
||||||
/// Returns false if there are no more fields to write in the message type (call endMessage() in this case).
|
|
||||||
bool writeField(size_t & column_index);
|
|
||||||
|
|
||||||
/// Writes a value. This function should be called one or multiple times after writeField().
|
|
||||||
/// Returns false if there are no more place for the values in the protobuf's field.
|
|
||||||
/// This can happen if the protobuf's field is not declared as repeated in the protobuf schema.
|
|
||||||
bool writeNumber(Int8 value) { return writeValueIfPossible(&IConverter::writeInt8, value); }
|
|
||||||
bool writeNumber(UInt8 value) { return writeValueIfPossible(&IConverter::writeUInt8, value); }
|
|
||||||
bool writeNumber(Int16 value) { return writeValueIfPossible(&IConverter::writeInt16, value); }
|
|
||||||
bool writeNumber(UInt16 value) { return writeValueIfPossible(&IConverter::writeUInt16, value); }
|
|
||||||
bool writeNumber(Int32 value) { return writeValueIfPossible(&IConverter::writeInt32, value); }
|
|
||||||
bool writeNumber(UInt32 value) { return writeValueIfPossible(&IConverter::writeUInt32, value); }
|
|
||||||
bool writeNumber(Int64 value) { return writeValueIfPossible(&IConverter::writeInt64, value); }
|
|
||||||
bool writeNumber(UInt64 value) { return writeValueIfPossible(&IConverter::writeUInt64, value); }
|
|
||||||
bool writeNumber(Int128 value) { return writeValueIfPossible(&IConverter::writeInt128, value); }
|
|
||||||
bool writeNumber(UInt128 value) { return writeValueIfPossible(&IConverter::writeUInt128, value); }
|
|
||||||
|
|
||||||
bool writeNumber(Int256 value) { return writeValueIfPossible(&IConverter::writeInt256, value); }
|
|
||||||
bool writeNumber(UInt256 value) { return writeValueIfPossible(&IConverter::writeUInt256, value); }
|
|
||||||
|
|
||||||
bool writeNumber(Float32 value) { return writeValueIfPossible(&IConverter::writeFloat32, value); }
|
|
||||||
bool writeNumber(Float64 value) { return writeValueIfPossible(&IConverter::writeFloat64, value); }
|
|
||||||
bool writeString(const StringRef & str) { return writeValueIfPossible(&IConverter::writeString, str); }
|
|
||||||
void prepareEnumMapping(const std::vector<std::pair<std::string, Int8>> & enum_values) { current_converter->prepareEnumMapping8(enum_values); }
|
|
||||||
void prepareEnumMapping(const std::vector<std::pair<std::string, Int16>> & enum_values) { current_converter->prepareEnumMapping16(enum_values); }
|
|
||||||
bool writeEnum(Int8 value) { return writeValueIfPossible(&IConverter::writeEnum8, value); }
|
|
||||||
bool writeEnum(Int16 value) { return writeValueIfPossible(&IConverter::writeEnum16, value); }
|
|
||||||
bool writeUUID(const UUID & uuid) { return writeValueIfPossible(&IConverter::writeUUID, uuid); }
|
|
||||||
bool writeDate(DayNum date) { return writeValueIfPossible(&IConverter::writeDate, date); }
|
|
||||||
bool writeDateTime(time_t tm) { return writeValueIfPossible(&IConverter::writeDateTime, tm); }
|
|
||||||
bool writeDateTime64(DateTime64 tm, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDateTime64, tm, scale); }
|
|
||||||
bool writeDecimal(Decimal32 decimal, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDecimal32, decimal, scale); }
|
|
||||||
bool writeDecimal(Decimal64 decimal, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDecimal64, decimal, scale); }
|
|
||||||
bool writeDecimal(const Decimal128 & decimal, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDecimal128, decimal, scale); }
|
|
||||||
bool writeDecimal(const Decimal256 & decimal, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDecimal256, decimal, scale); }
|
|
||||||
bool writeAggregateFunction(const AggregateFunctionPtr & function, ConstAggregateDataPtr place) { return writeValueIfPossible(&IConverter::writeAggregateFunction, function, place); }
|
|
||||||
|
|
||||||
private:
|
|
||||||
class SimpleWriter
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
SimpleWriter(WriteBuffer & out_, const bool use_length_delimiters_);
|
|
||||||
~SimpleWriter();
|
|
||||||
|
|
||||||
void startMessage();
|
|
||||||
void endMessage();
|
|
||||||
|
|
||||||
void startNestedMessage();
|
|
||||||
void endNestedMessage(UInt32 field_number, bool is_group, bool skip_if_empty);
|
|
||||||
|
|
||||||
void writeInt(UInt32 field_number, Int64 value);
|
|
||||||
void writeUInt(UInt32 field_number, UInt64 value);
|
|
||||||
void writeSInt(UInt32 field_number, Int64 value);
|
|
||||||
template <typename T>
|
|
||||||
void writeFixed(UInt32 field_number, T value);
|
|
||||||
void writeString(UInt32 field_number, const StringRef & str);
|
|
||||||
|
|
||||||
void startRepeatedPack();
|
|
||||||
void addIntToRepeatedPack(Int64 value);
|
|
||||||
void addUIntToRepeatedPack(UInt64 value);
|
|
||||||
void addSIntToRepeatedPack(Int64 value);
|
|
||||||
template <typename T>
|
|
||||||
void addFixedToRepeatedPack(T value);
|
|
||||||
void endRepeatedPack(UInt32 field_number);
|
|
||||||
|
|
||||||
private:
|
|
||||||
struct Piece
|
|
||||||
{
|
|
||||||
size_t start;
|
|
||||||
size_t end;
|
|
||||||
Piece(size_t start_, size_t end_) : start(start_), end(end_) {}
|
|
||||||
Piece() = default;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct NestedInfo
|
|
||||||
{
|
|
||||||
size_t num_pieces_at_start;
|
|
||||||
size_t num_bytes_skipped_at_start;
|
|
||||||
NestedInfo(size_t num_pieces_at_start_, size_t num_bytes_skipped_at_start_)
|
|
||||||
: num_pieces_at_start(num_pieces_at_start_), num_bytes_skipped_at_start(num_bytes_skipped_at_start_)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
WriteBuffer & out;
|
|
||||||
PODArray<UInt8> buffer;
|
|
||||||
std::vector<Piece> pieces;
|
|
||||||
size_t current_piece_start;
|
|
||||||
size_t num_bytes_skipped;
|
|
||||||
std::vector<NestedInfo> nested_infos;
|
|
||||||
const bool use_length_delimiters;
|
|
||||||
};
|
|
||||||
|
|
||||||
class IConverter
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
virtual ~IConverter() = default;
|
|
||||||
virtual void writeString(const StringRef &) = 0;
|
|
||||||
virtual void writeInt8(Int8) = 0;
|
|
||||||
virtual void writeUInt8(UInt8) = 0;
|
|
||||||
virtual void writeInt16(Int16) = 0;
|
|
||||||
virtual void writeUInt16(UInt16) = 0;
|
|
||||||
virtual void writeInt32(Int32) = 0;
|
|
||||||
virtual void writeUInt32(UInt32) = 0;
|
|
||||||
virtual void writeInt64(Int64) = 0;
|
|
||||||
virtual void writeUInt64(UInt64) = 0;
|
|
||||||
virtual void writeInt128(Int128) = 0;
|
|
||||||
virtual void writeUInt128(const UInt128 &) = 0;
|
|
||||||
|
|
||||||
virtual void writeInt256(const Int256 &) = 0;
|
|
||||||
virtual void writeUInt256(const UInt256 &) = 0;
|
|
||||||
|
|
||||||
virtual void writeFloat32(Float32) = 0;
|
|
||||||
virtual void writeFloat64(Float64) = 0;
|
|
||||||
virtual void prepareEnumMapping8(const std::vector<std::pair<std::string, Int8>> &) = 0;
|
|
||||||
virtual void prepareEnumMapping16(const std::vector<std::pair<std::string, Int16>> &) = 0;
|
|
||||||
virtual void writeEnum8(Int8) = 0;
|
|
||||||
virtual void writeEnum16(Int16) = 0;
|
|
||||||
virtual void writeUUID(const UUID &) = 0;
|
|
||||||
virtual void writeDate(DayNum) = 0;
|
|
||||||
virtual void writeDateTime(time_t) = 0;
|
|
||||||
virtual void writeDateTime64(DateTime64, UInt32 scale) = 0;
|
|
||||||
virtual void writeDecimal32(Decimal32, UInt32) = 0;
|
|
||||||
virtual void writeDecimal64(Decimal64, UInt32) = 0;
|
|
||||||
virtual void writeDecimal128(const Decimal128 &, UInt32) = 0;
|
|
||||||
virtual void writeDecimal256(const Decimal256 &, UInt32) = 0;
|
|
||||||
virtual void writeAggregateFunction(const AggregateFunctionPtr &, ConstAggregateDataPtr) = 0;
|
|
||||||
};
|
|
||||||
|
|
||||||
class ConverterBaseImpl;
|
|
||||||
template <bool skip_null_value>
|
|
||||||
class ConverterToString;
|
|
||||||
template <int field_type_id, typename ToType, bool skip_null_value, bool pack_repeated>
|
|
||||||
class ConverterToNumber;
|
|
||||||
template <bool skip_null_value, bool pack_repeated>
|
|
||||||
class ConverterToBool;
|
|
||||||
template <bool skip_null_value, bool pack_repeated>
|
|
||||||
class ConverterToEnum;
|
|
||||||
|
|
||||||
struct ColumnMatcherTraits
|
|
||||||
{
|
|
||||||
struct FieldData
|
|
||||||
{
|
|
||||||
std::unique_ptr<IConverter> converter;
|
|
||||||
bool is_required;
|
|
||||||
bool is_repeatable;
|
|
||||||
bool should_pack_repeated;
|
|
||||||
ProtobufColumnMatcher::Message<ColumnMatcherTraits> * repeatable_container_message;
|
|
||||||
};
|
|
||||||
struct MessageData
|
|
||||||
{
|
|
||||||
UInt32 parent_field_number;
|
|
||||||
bool is_group;
|
|
||||||
bool is_required;
|
|
||||||
ProtobufColumnMatcher::Message<ColumnMatcherTraits> * repeatable_container_message;
|
|
||||||
bool need_repeat;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
using Message = ProtobufColumnMatcher::Message<ColumnMatcherTraits>;
|
|
||||||
using Field = ProtobufColumnMatcher::Field<ColumnMatcherTraits>;
|
|
||||||
|
|
||||||
void setTraitsDataAfterMatchingColumns(Message * message);
|
|
||||||
|
|
||||||
template <int field_type_id>
|
|
||||||
std::unique_ptr<IConverter> createConverter(const google::protobuf::FieldDescriptor * field);
|
|
||||||
|
|
||||||
template <typename... Params>
|
|
||||||
using WriteValueFunctionPtr = void (IConverter::*)(Params...);
|
|
||||||
|
|
||||||
template <typename... Params, typename... Args>
|
|
||||||
bool writeValueIfPossible(WriteValueFunctionPtr<Params...> func, Args &&... args)
|
|
||||||
{
|
|
||||||
if (num_values && !current_field->data.is_repeatable)
|
|
||||||
{
|
|
||||||
setNestedMessageNeedsRepeat();
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
(current_converter->*func)(std::forward<Args>(args)...);
|
|
||||||
++num_values;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void setNestedMessageNeedsRepeat();
|
|
||||||
void endWritingField();
|
|
||||||
|
|
||||||
SimpleWriter simple_writer;
|
|
||||||
std::unique_ptr<Message> root_message;
|
|
||||||
|
|
||||||
Message * current_message;
|
|
||||||
size_t current_field_index = 0;
|
|
||||||
const Field * current_field = nullptr;
|
|
||||||
IConverter * current_converter = nullptr;
|
|
||||||
size_t num_values = 0;
|
|
||||||
};
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
# include <common/StringRef.h>
|
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
class IAggregateFunction;
|
class WriteBuffer;
|
||||||
using AggregateFunctionPtr = std::shared_ptr<IAggregateFunction>;
|
|
||||||
using ConstAggregateDataPtr = const char *;
|
|
||||||
|
|
||||||
|
/// Utility class for writing in the Protobuf format.
|
||||||
|
/// Knows nothing about protobuf schemas, just provides useful functions to serialize data.
|
||||||
class ProtobufWriter
|
class ProtobufWriter
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
bool writeNumber(Int8 /* value */) { return false; }
|
ProtobufWriter(WriteBuffer & out_);
|
||||||
bool writeNumber(UInt8 /* value */) { return false; }
|
~ProtobufWriter();
|
||||||
bool writeNumber(Int16 /* value */) { return false; }
|
|
||||||
bool writeNumber(UInt16 /* value */) { return false; }
|
void startMessage();
|
||||||
bool writeNumber(Int32 /* value */) { return false; }
|
void endMessage(bool with_length_delimiter);
|
||||||
bool writeNumber(UInt32 /* value */) { return false; }
|
|
||||||
bool writeNumber(Int64 /* value */) { return false; }
|
void startNestedMessage();
|
||||||
bool writeNumber(UInt64 /* value */) { return false; }
|
void endNestedMessage(int field_number, bool is_group, bool skip_if_empty);
|
||||||
bool writeNumber(Int128 /* value */) { return false; }
|
|
||||||
bool writeNumber(UInt128 /* value */) { return false; }
|
void writeInt(int field_number, Int64 value);
|
||||||
bool writeNumber(Int256 /* value */) { return false; }
|
void writeUInt(int field_number, UInt64 value);
|
||||||
bool writeNumber(UInt256 /* value */) { return false; }
|
void writeSInt(int field_number, Int64 value);
|
||||||
bool writeNumber(Float32 /* value */) { return false; }
|
template <typename T>
|
||||||
bool writeNumber(Float64 /* value */) { return false; }
|
void writeFixed(int field_number, T value);
|
||||||
bool writeString(const StringRef & /* value */) { return false; }
|
void writeString(int field_number, const std::string_view & str);
|
||||||
void prepareEnumMapping(const std::vector<std::pair<std::string, Int8>> & /* name_value_pairs */) {}
|
|
||||||
void prepareEnumMapping(const std::vector<std::pair<std::string, Int16>> & /* name_value_pairs */) {}
|
void startRepeatedPack();
|
||||||
bool writeEnum(Int8 /* value */) { return false; }
|
void endRepeatedPack(int field_number, bool skip_if_empty);
|
||||||
bool writeEnum(Int16 /* value */) { return false; }
|
|
||||||
bool writeUUID(const UUID & /* value */) { return false; }
|
private:
|
||||||
bool writeDate(DayNum /* date */) { return false; }
|
struct Piece
|
||||||
bool writeDateTime(time_t /* tm */) { return false; }
|
{
|
||||||
bool writeDateTime64(DateTime64 /*tm*/, UInt32 /*scale*/) { return false; }
|
size_t start;
|
||||||
bool writeDecimal(Decimal32 /* decimal */, UInt32 /* scale */) { return false; }
|
size_t end;
|
||||||
bool writeDecimal(Decimal64 /* decimal */, UInt32 /* scale */) { return false; }
|
Piece(size_t start_, size_t end_) : start(start_), end(end_) {}
|
||||||
bool writeDecimal(const Decimal128 & /* decimal */, UInt32 /* scale */) { return false; }
|
Piece() = default;
|
||||||
bool writeDecimal(const Decimal256 & /* decimal */, UInt32 /* scale */) { return false; }
|
};
|
||||||
bool writeAggregateFunction(const AggregateFunctionPtr & /* function */, ConstAggregateDataPtr /* place */) { return false; }
|
|
||||||
|
struct NestedInfo
|
||||||
|
{
|
||||||
|
size_t num_pieces_at_start;
|
||||||
|
size_t num_bytes_skipped_at_start;
|
||||||
|
NestedInfo(size_t num_pieces_at_start_, size_t num_bytes_skipped_at_start_)
|
||||||
|
: num_pieces_at_start(num_pieces_at_start_), num_bytes_skipped_at_start(num_bytes_skipped_at_start_)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
WriteBuffer & out;
|
||||||
|
PODArray<UInt8> buffer;
|
||||||
|
std::vector<Piece> pieces;
|
||||||
|
size_t current_piece_start = 0;
|
||||||
|
size_t num_bytes_skipped = 0;
|
||||||
|
std::vector<NestedInfo> nested_infos;
|
||||||
|
bool in_repeated_pack = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -20,9 +20,9 @@ SRCS(
|
|||||||
NativeFormat.cpp
|
NativeFormat.cpp
|
||||||
NullFormat.cpp
|
NullFormat.cpp
|
||||||
ParsedTemplateFormatString.cpp
|
ParsedTemplateFormatString.cpp
|
||||||
ProtobufColumnMatcher.cpp
|
|
||||||
ProtobufReader.cpp
|
ProtobufReader.cpp
|
||||||
ProtobufSchemas.cpp
|
ProtobufSchemas.cpp
|
||||||
|
ProtobufSerializer.cpp
|
||||||
ProtobufWriter.cpp
|
ProtobufWriter.cpp
|
||||||
registerFormats.cpp
|
registerFormats.cpp
|
||||||
verbosePrintString.cpp
|
verbosePrintString.cpp
|
||||||
|
582
src/Functions/htmlOrXmlCoarseParse.cpp
Normal file
582
src/Functions/htmlOrXmlCoarseParse.cpp
Normal file
@ -0,0 +1,582 @@
|
|||||||
|
#include <Columns/ColumnString.h>
|
||||||
|
#include <Functions/FunctionFactory.h>
|
||||||
|
#include <Functions/FunctionHelpers.h>
|
||||||
|
#include <Functions/IFunctionImpl.h>
|
||||||
|
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
#if USE_HYPERSCAN
|
||||||
|
# include <hs.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int ILLEGAL_COLUMN;
|
||||||
|
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||||
|
extern const int CANNOT_ALLOCATE_MEMORY;
|
||||||
|
extern const int NOT_IMPLEMENTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
struct HxCoarseParseImpl
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
struct SpanInfo
|
||||||
|
{
|
||||||
|
SpanInfo(): id(0), match_space(std::pair<unsigned long long, unsigned long long>(0, 0)) {} // NOLINT
|
||||||
|
SpanInfo(unsigned int matchId, std::pair<unsigned long long, unsigned long long> matchSpan): id(matchId), match_space(matchSpan){} // NOLINT
|
||||||
|
SpanInfo(const SpanInfo& obj)
|
||||||
|
{
|
||||||
|
id = obj.id;
|
||||||
|
match_space = obj.match_space;
|
||||||
|
}
|
||||||
|
SpanInfo& operator=(const SpanInfo& obj) = default;
|
||||||
|
|
||||||
|
unsigned int id;
|
||||||
|
std::pair<unsigned long long, unsigned long long> match_space; // NOLINT
|
||||||
|
};
|
||||||
|
using SpanElement = std::vector<SpanInfo>;
|
||||||
|
struct Span
|
||||||
|
{
|
||||||
|
Span(): set_script(false), set_style(false), set_semi(false), is_finding_cdata(false) {}
|
||||||
|
|
||||||
|
SpanElement copy_stack; // copy area
|
||||||
|
SpanElement tag_stack; // regexp area
|
||||||
|
SpanInfo script_ptr; // script pointer
|
||||||
|
bool set_script; // whether set script
|
||||||
|
SpanInfo style_ptr; // style pointer
|
||||||
|
bool set_style; // whether set style
|
||||||
|
SpanInfo semi_ptr; // tag ptr
|
||||||
|
bool set_semi; // whether set semi
|
||||||
|
|
||||||
|
bool is_finding_cdata;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline void copyZone(
|
||||||
|
ColumnString::Offset& current_dst_string_offset,
|
||||||
|
ColumnString::Offset& current_copy_loc,
|
||||||
|
ColumnString::Chars& dst_chars,
|
||||||
|
const ColumnString::Chars& src_chars,
|
||||||
|
size_t bytes_to_copy,
|
||||||
|
unsigned is_space
|
||||||
|
)
|
||||||
|
{
|
||||||
|
bool is_last_space = false;
|
||||||
|
if (current_dst_string_offset == 0 || dst_chars[current_dst_string_offset - 1] == 0 || dst_chars[current_dst_string_offset - 1] == ' ')
|
||||||
|
{
|
||||||
|
is_last_space = true;
|
||||||
|
}
|
||||||
|
if (bytes_to_copy == 0)
|
||||||
|
{
|
||||||
|
if (is_space && !is_last_space)
|
||||||
|
{
|
||||||
|
dst_chars[current_dst_string_offset++] = ' ';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (is_last_space && src_chars[current_copy_loc] == ' ')
|
||||||
|
{
|
||||||
|
--bytes_to_copy;
|
||||||
|
++current_copy_loc;
|
||||||
|
}
|
||||||
|
if (bytes_to_copy > 0)
|
||||||
|
{
|
||||||
|
memcpySmallAllowReadWriteOverflow15(
|
||||||
|
&dst_chars[current_dst_string_offset], &src_chars[current_copy_loc], bytes_to_copy);
|
||||||
|
current_dst_string_offset += bytes_to_copy;
|
||||||
|
}
|
||||||
|
|
||||||
|
// separator is space and last character is not space.
|
||||||
|
if (is_space && !(current_dst_string_offset == 0 || dst_chars[current_dst_string_offset - 1] == 0 || dst_chars[current_dst_string_offset - 1] == ' '))
|
||||||
|
{
|
||||||
|
dst_chars[current_dst_string_offset++] = ' ';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// return;
|
||||||
|
}
|
||||||
|
static inline void popArea(SpanElement& stack, unsigned long long from, unsigned long long to) //NOLINT
|
||||||
|
{
|
||||||
|
while (!stack.empty())
|
||||||
|
{
|
||||||
|
if (to > stack.back().match_space.second && from < stack.back().match_space.second)
|
||||||
|
{
|
||||||
|
stack.pop_back();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// return;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void dealCommonTag(Span* matches)
|
||||||
|
{
|
||||||
|
while (!matches->copy_stack.empty() && matches->copy_stack.back().id != 10)
|
||||||
|
{
|
||||||
|
matches->copy_stack.pop_back();
|
||||||
|
}
|
||||||
|
if (!matches->copy_stack.empty())
|
||||||
|
{
|
||||||
|
matches->copy_stack.pop_back();
|
||||||
|
}
|
||||||
|
unsigned long long from; // NOLINT
|
||||||
|
unsigned long long to; // NOLINT
|
||||||
|
unsigned id;
|
||||||
|
for (auto begin = matches->tag_stack.begin(); begin != matches->tag_stack.end(); ++begin)
|
||||||
|
{
|
||||||
|
from = begin->match_space.first;
|
||||||
|
to = begin->match_space.second;
|
||||||
|
id = begin->id;
|
||||||
|
switch (id)
|
||||||
|
{
|
||||||
|
case 12:
|
||||||
|
case 13:
|
||||||
|
{
|
||||||
|
popArea(matches->copy_stack, from, to);
|
||||||
|
if (matches->copy_stack.empty() || from >= matches->copy_stack.back().match_space.second)
|
||||||
|
matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 0:
|
||||||
|
case 2:
|
||||||
|
case 3:
|
||||||
|
case 4:
|
||||||
|
case 5:
|
||||||
|
case 6:
|
||||||
|
case 7:
|
||||||
|
case 8:
|
||||||
|
case 9:
|
||||||
|
case 10:
|
||||||
|
{
|
||||||
|
if (!matches->set_semi || (matches->set_semi && from == matches->semi_ptr.match_space.first))
|
||||||
|
{
|
||||||
|
matches->set_semi = true;
|
||||||
|
matches->semi_ptr = SpanInfo(id, std::make_pair(from, to));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 1:
|
||||||
|
{
|
||||||
|
if (matches->set_semi)
|
||||||
|
{
|
||||||
|
switch (matches->semi_ptr.id)
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
case 2:
|
||||||
|
case 3:
|
||||||
|
case 6:
|
||||||
|
case 7:
|
||||||
|
case 10:
|
||||||
|
{
|
||||||
|
if (matches->semi_ptr.id == 2 || (matches->semi_ptr.id == 3 && matches->semi_ptr.match_space.second == from))
|
||||||
|
{
|
||||||
|
if (!matches->set_script)
|
||||||
|
{
|
||||||
|
matches->set_script = true;
|
||||||
|
matches->script_ptr = SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (matches->semi_ptr.id == 6 || (matches->semi_ptr.id == 7 && matches->semi_ptr.match_space.second == from))
|
||||||
|
{
|
||||||
|
if (!matches->set_style)
|
||||||
|
{
|
||||||
|
matches->set_style = true;
|
||||||
|
matches->style_ptr = SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
popArea(matches->copy_stack, matches->semi_ptr.match_space.first, to);
|
||||||
|
matches->copy_stack.push_back(SpanInfo(0, std::make_pair(matches->semi_ptr.match_space.first, to)));
|
||||||
|
matches->set_semi = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 4:
|
||||||
|
case 5:
|
||||||
|
case 8:
|
||||||
|
case 9:
|
||||||
|
{
|
||||||
|
SpanInfo complete_zone;
|
||||||
|
|
||||||
|
complete_zone.match_space.second = to;
|
||||||
|
if (matches->set_script && (matches->semi_ptr.id == 4 || (matches->semi_ptr.id == 5 && matches->semi_ptr.match_space.second == from)))
|
||||||
|
{
|
||||||
|
complete_zone.id = matches->script_ptr.id;
|
||||||
|
complete_zone.match_space.first = matches->script_ptr.match_space.first;
|
||||||
|
matches->set_script = false;
|
||||||
|
}
|
||||||
|
else if (matches->set_style && (matches->semi_ptr.id == 8 || (matches->semi_ptr.id == 9 && matches->semi_ptr.match_space.second == from)))
|
||||||
|
{
|
||||||
|
complete_zone.id = matches->style_ptr.id;
|
||||||
|
complete_zone.match_space.first = matches->style_ptr.match_space.first;
|
||||||
|
matches->set_style = false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
complete_zone.id = matches->semi_ptr.id;
|
||||||
|
complete_zone.match_space.first = matches->semi_ptr.match_space.first;
|
||||||
|
}
|
||||||
|
popArea(matches->copy_stack, complete_zone.match_space.first, complete_zone.match_space.second);
|
||||||
|
matches->copy_stack.push_back(complete_zone);
|
||||||
|
matches->set_semi = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// return;
|
||||||
|
}
|
||||||
|
static int spanCollect(unsigned int id,
|
||||||
|
unsigned long long from, // NOLINT
|
||||||
|
unsigned long long to, // NOLINT
|
||||||
|
unsigned int , void * ctx)
|
||||||
|
{
|
||||||
|
Span* matches = static_cast<Span*>(ctx);
|
||||||
|
from = id == 12 ? from : to - patterns_length[id];
|
||||||
|
|
||||||
|
if (matches->is_finding_cdata)
|
||||||
|
{
|
||||||
|
if (id == 11)
|
||||||
|
{
|
||||||
|
matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
|
||||||
|
matches->is_finding_cdata = false;
|
||||||
|
matches->tag_stack.clear();
|
||||||
|
if (matches->semi_ptr.id == 10)
|
||||||
|
{
|
||||||
|
matches->set_semi = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (id == 12 || id == 13)
|
||||||
|
{
|
||||||
|
popArea(matches->copy_stack, from, to);
|
||||||
|
if (matches->copy_stack.empty() || from >= matches->copy_stack.back().match_space.second)
|
||||||
|
matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
|
||||||
|
|
||||||
|
popArea(matches->tag_stack, from, to);
|
||||||
|
if (matches->tag_stack.empty() || from >= matches->tag_stack.back().match_space.second)
|
||||||
|
matches->tag_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
popArea(matches->tag_stack, from, to);
|
||||||
|
matches->tag_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
switch (id)
|
||||||
|
{
|
||||||
|
case 12:
|
||||||
|
case 13:
|
||||||
|
{
|
||||||
|
popArea(matches->copy_stack, from, to);
|
||||||
|
if (matches->copy_stack.empty() || from >= matches->copy_stack.back().match_space.second)
|
||||||
|
matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 0:
|
||||||
|
case 2:
|
||||||
|
case 3:
|
||||||
|
case 4:
|
||||||
|
case 5:
|
||||||
|
case 6:
|
||||||
|
case 7:
|
||||||
|
case 8:
|
||||||
|
case 9:
|
||||||
|
{
|
||||||
|
if (!matches->set_semi || (matches->set_semi && from == matches->semi_ptr.match_space.first))
|
||||||
|
{
|
||||||
|
matches->set_semi = true;
|
||||||
|
matches->semi_ptr = SpanInfo(id, std::make_pair(from, to));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 10:
|
||||||
|
{
|
||||||
|
if (!matches->set_semi || (matches->set_semi && from == matches->semi_ptr.match_space.first))
|
||||||
|
{
|
||||||
|
matches->set_semi = true;
|
||||||
|
matches->semi_ptr = SpanInfo(id, std::make_pair(from, to));
|
||||||
|
}
|
||||||
|
matches->is_finding_cdata = true;
|
||||||
|
matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
|
||||||
|
matches->tag_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 1:
|
||||||
|
{
|
||||||
|
if (matches->set_semi)
|
||||||
|
{
|
||||||
|
switch (matches->semi_ptr.id)
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
case 2:
|
||||||
|
case 3:
|
||||||
|
case 6:
|
||||||
|
case 7:
|
||||||
|
case 10:
|
||||||
|
{
|
||||||
|
if (matches->semi_ptr.id == 2 || (matches->semi_ptr.id == 3 && matches->semi_ptr.match_space.second == from))
|
||||||
|
{
|
||||||
|
if (!matches->set_script)
|
||||||
|
{
|
||||||
|
matches->set_script = true;
|
||||||
|
matches->script_ptr = SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (matches->semi_ptr.id == 6 || (matches->semi_ptr.id == 7 && matches->semi_ptr.match_space.second == from))
|
||||||
|
{
|
||||||
|
if (!matches->set_style)
|
||||||
|
{
|
||||||
|
matches->set_style = true;
|
||||||
|
matches->style_ptr = SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
popArea(matches->copy_stack, matches->semi_ptr.match_space.first, to);
|
||||||
|
matches->copy_stack.push_back(SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to)));
|
||||||
|
matches->set_semi = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 4:
|
||||||
|
case 5:
|
||||||
|
case 8:
|
||||||
|
case 9:
|
||||||
|
{
|
||||||
|
SpanInfo complete_zone;
|
||||||
|
complete_zone.match_space.second = to;
|
||||||
|
if (matches->set_script && (matches->semi_ptr.id == 4 || (matches->semi_ptr.id == 5 && matches->semi_ptr.match_space.second == from)))
|
||||||
|
{
|
||||||
|
complete_zone.id = matches->script_ptr.id;
|
||||||
|
complete_zone.match_space.first = matches->script_ptr.match_space.first;
|
||||||
|
matches->set_script = false;
|
||||||
|
}
|
||||||
|
else if (matches->set_style && (matches->semi_ptr.id == 8 || (matches->semi_ptr.id == 9 && matches->semi_ptr.match_space.second == from)))
|
||||||
|
{
|
||||||
|
complete_zone.id = matches->style_ptr.id;
|
||||||
|
complete_zone.match_space.first = matches->style_ptr.match_space.first;
|
||||||
|
matches->set_style = false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
complete_zone.id = matches->semi_ptr.id;
|
||||||
|
complete_zone.match_space.first = matches->semi_ptr.match_space.first;
|
||||||
|
}
|
||||||
|
popArea(matches->copy_stack, complete_zone.match_space.first, complete_zone.match_space.second);
|
||||||
|
matches->copy_stack.push_back(complete_zone);
|
||||||
|
matches->set_semi = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#if USE_HYPERSCAN
|
||||||
|
static hs_database_t* buildDatabase(const std::vector<const char* > &expressions,
|
||||||
|
const std::vector<unsigned> &flags,
|
||||||
|
const std::vector<unsigned> &id,
|
||||||
|
unsigned int mode)
|
||||||
|
{
|
||||||
|
hs_database_t *db;
|
||||||
|
hs_compile_error_t *compile_err;
|
||||||
|
hs_error_t err;
|
||||||
|
err = hs_compile_multi(expressions.data(), flags.data(), id.data(),
|
||||||
|
expressions.size(), mode, nullptr, &db, &compile_err);
|
||||||
|
|
||||||
|
if (err != HS_SUCCESS)
|
||||||
|
{
|
||||||
|
hs_free_compile_error(compile_err);
|
||||||
|
throw Exception("Hyper scan database cannot be compiled.", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||||
|
}
|
||||||
|
return db;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
static std::vector<const char*> patterns;
|
||||||
|
static std::vector<std::size_t> patterns_length;
|
||||||
|
static std::vector<unsigned> patterns_flag;
|
||||||
|
static std::vector<unsigned> ids;
|
||||||
|
|
||||||
|
public:
|
||||||
|
static void executeInternal(
|
||||||
|
const ColumnString::Chars & src_chars,
|
||||||
|
const ColumnString::Offsets & src_offsets,
|
||||||
|
ColumnString::Chars & dst_chars,
|
||||||
|
ColumnString::Offsets & dst_offsets)
|
||||||
|
{
|
||||||
|
#if USE_HYPERSCAN
|
||||||
|
hs_database_t * db = buildDatabase(patterns, patterns_flag, ids, HS_MODE_BLOCK);
|
||||||
|
hs_scratch_t* scratch = nullptr;
|
||||||
|
if (hs_alloc_scratch(db, &scratch) != HS_SUCCESS)
|
||||||
|
{
|
||||||
|
hs_free_database(db);
|
||||||
|
throw Exception("Unable to allocate scratch space.", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||||
|
}
|
||||||
|
dst_chars.resize(src_chars.size());
|
||||||
|
dst_offsets.resize(src_offsets.size());
|
||||||
|
|
||||||
|
ColumnString::Offset current_src_string_offset = 0;
|
||||||
|
ColumnString::Offset current_dst_string_offset = 0;
|
||||||
|
ColumnString::Offset current_copy_loc;
|
||||||
|
ColumnString::Offset current_copy_end;
|
||||||
|
unsigned is_space;
|
||||||
|
size_t bytes_to_copy;
|
||||||
|
Span match_zoneall;
|
||||||
|
|
||||||
|
for (size_t off = 0; off < src_offsets.size(); ++off)
|
||||||
|
{
|
||||||
|
hs_scan(db, reinterpret_cast<const char *>(&src_chars[current_src_string_offset]), src_offsets[off] - current_src_string_offset, 0, scratch, spanCollect, &match_zoneall);
|
||||||
|
if (match_zoneall.is_finding_cdata)
|
||||||
|
{
|
||||||
|
dealCommonTag(&match_zoneall);
|
||||||
|
}
|
||||||
|
SpanElement& match_zone = match_zoneall.copy_stack;
|
||||||
|
current_copy_loc = current_src_string_offset;
|
||||||
|
if (match_zone.empty())
|
||||||
|
{
|
||||||
|
current_copy_end = src_offsets[off];
|
||||||
|
is_space = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
current_copy_end = current_src_string_offset + match_zone.begin()->match_space.first;
|
||||||
|
is_space = (match_zone.begin()->id == 12 || match_zone.begin()->id == 13)?1:0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bytes_to_copy = current_copy_end - current_copy_loc;
|
||||||
|
copyZone(current_dst_string_offset, current_copy_loc, dst_chars, src_chars, bytes_to_copy, is_space);
|
||||||
|
for (auto begin = match_zone.begin(); begin != match_zone.end(); ++begin)
|
||||||
|
{
|
||||||
|
current_copy_loc = current_src_string_offset + begin->match_space.second;
|
||||||
|
if (begin + 1 >= match_zone.end())
|
||||||
|
{
|
||||||
|
current_copy_end = src_offsets[off];
|
||||||
|
is_space = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
current_copy_end = current_src_string_offset + (begin+1)->match_space.first;
|
||||||
|
is_space = ((begin+1)->id == 12 || (begin+1)->id == 13)?1:0;
|
||||||
|
}
|
||||||
|
bytes_to_copy = current_copy_end - current_copy_loc;
|
||||||
|
copyZone(current_dst_string_offset, current_copy_loc, dst_chars, src_chars, bytes_to_copy, is_space);
|
||||||
|
}
|
||||||
|
if (current_dst_string_offset > 1 && dst_chars[current_dst_string_offset - 2] == ' ')
|
||||||
|
{
|
||||||
|
dst_chars[current_dst_string_offset - 2] = 0;
|
||||||
|
--current_dst_string_offset;
|
||||||
|
}
|
||||||
|
dst_offsets[off] = current_dst_string_offset;
|
||||||
|
current_src_string_offset = src_offsets[off];
|
||||||
|
match_zoneall.copy_stack.clear();
|
||||||
|
match_zoneall.tag_stack.clear();
|
||||||
|
}
|
||||||
|
dst_chars.resize(dst_chars.size());
|
||||||
|
hs_free_scratch(scratch);
|
||||||
|
hs_free_database(db);
|
||||||
|
#else
|
||||||
|
(void)src_chars;
|
||||||
|
(void)src_offsets;
|
||||||
|
(void)dst_chars;
|
||||||
|
(void)dst_offsets;
|
||||||
|
throw Exception(
|
||||||
|
"htmlOrXmlCoarseParse is not implemented when hyperscan is off (is it x86 processor?)",
|
||||||
|
ErrorCodes::NOT_IMPLEMENTED);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<const char*> HxCoarseParseImpl::patterns =
|
||||||
|
{
|
||||||
|
"<[^\\s<>]", // 0 "<", except "< ", "<<", "<>"
|
||||||
|
">", // 1 ">"
|
||||||
|
"<script\\s", // 2 <script xxxxx>
|
||||||
|
"<script", // 3 <script>
|
||||||
|
"</script\\s", // 4 </script xxxx>
|
||||||
|
"</script", // 5 </script>
|
||||||
|
"<style\\s", // 6 <style xxxxxx>
|
||||||
|
"<style", // 7 <style>
|
||||||
|
"</style\\s", // 8 </style xxxxx>
|
||||||
|
"</style", // 9 </style>
|
||||||
|
"<!\\[CDATA\\[", // 10 <![CDATA[xxxxxx]]>
|
||||||
|
"\\]\\]>", // 11 ]]>
|
||||||
|
"\\s{2,}", // 12 " ", continuous blanks
|
||||||
|
"[^\\S ]" // 13 "\n", "\t" and other white space, it does not include single ' '.
|
||||||
|
};
|
||||||
|
std::vector<std::size_t> HxCoarseParseImpl::patterns_length =
|
||||||
|
{
|
||||||
|
2, 1, 8, 7, 9, 8, 7, 6, 8, 7, 9, 3, 0, 1
|
||||||
|
};
|
||||||
|
#if USE_HYPERSCAN
|
||||||
|
std::vector<unsigned> HxCoarseParseImpl::patterns_flag =
|
||||||
|
{
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, HS_FLAG_SOM_LEFTMOST, 0
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
std::vector<unsigned> HxCoarseParseImpl::ids =
|
||||||
|
{
|
||||||
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13
|
||||||
|
};
|
||||||
|
|
||||||
|
class FunctionHtmlOrXmlCoarseParse : public IFunction
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static constexpr auto name = "htmlOrXmlCoarseParse";
|
||||||
|
|
||||||
|
static FunctionPtr create(const Context &) {return std::make_shared<FunctionHtmlOrXmlCoarseParse>(); }
|
||||||
|
|
||||||
|
String getName() const override {return name;}
|
||||||
|
|
||||||
|
size_t getNumberOfArguments() const override {return 1;}
|
||||||
|
|
||||||
|
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||||
|
{
|
||||||
|
if (!isString(arguments[0]))
|
||||||
|
throw Exception(
|
||||||
|
"Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||||
|
return arguments[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
bool useDefaultImplementationForConstants() const override {return true;}
|
||||||
|
|
||||||
|
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & , size_t) const override
|
||||||
|
{
|
||||||
|
const auto & strcolumn = arguments[0].column;
|
||||||
|
if (const ColumnString* html_sentence = checkAndGetColumn<ColumnString>(strcolumn.get()))
|
||||||
|
{
|
||||||
|
auto col_res = ColumnString::create();
|
||||||
|
HxCoarseParseImpl::executeInternal(html_sentence->getChars(), html_sentence->getOffsets(), col_res->getChars(), col_res->getOffsets());
|
||||||
|
return col_res;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw Exception("First argument for function " + getName() + " must be string.", ErrorCodes::ILLEGAL_COLUMN);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
void registerFunctionHtmlOrXmlCoarseParse(FunctionFactory & factory)
|
||||||
|
{
|
||||||
|
factory.registerFunction<FunctionHtmlOrXmlCoarseParse>();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
@ -6,7 +6,9 @@ namespace DB
|
|||||||
{
|
{
|
||||||
|
|
||||||
class FunctionFactory;
|
class FunctionFactory;
|
||||||
|
#if USE_HYPERSCAN
|
||||||
|
void registerFunctionHtmlOrXmlCoarseParse(FunctionFactory &);
|
||||||
|
#endif
|
||||||
void registerFunctionRepeat(FunctionFactory &);
|
void registerFunctionRepeat(FunctionFactory &);
|
||||||
void registerFunctionEmpty(FunctionFactory &);
|
void registerFunctionEmpty(FunctionFactory &);
|
||||||
void registerFunctionNotEmpty(FunctionFactory &);
|
void registerFunctionNotEmpty(FunctionFactory &);
|
||||||
@ -45,6 +47,9 @@ void registerFunctionTryBase64Decode(FunctionFactory &);
|
|||||||
|
|
||||||
void registerFunctionsString(FunctionFactory & factory)
|
void registerFunctionsString(FunctionFactory & factory)
|
||||||
{
|
{
|
||||||
|
#if USE_HYPERSCAN
|
||||||
|
registerFunctionHtmlOrXmlCoarseParse(factory);
|
||||||
|
#endif
|
||||||
registerFunctionRepeat(factory);
|
registerFunctionRepeat(factory);
|
||||||
registerFunctionEmpty(factory);
|
registerFunctionEmpty(factory);
|
||||||
registerFunctionNotEmpty(factory);
|
registerFunctionNotEmpty(factory);
|
||||||
|
@ -291,6 +291,7 @@ SRCS(
|
|||||||
hasToken.cpp
|
hasToken.cpp
|
||||||
hasTokenCaseInsensitive.cpp
|
hasTokenCaseInsensitive.cpp
|
||||||
hostName.cpp
|
hostName.cpp
|
||||||
|
htmlOrXmlCoarseParse.cpp
|
||||||
hypot.cpp
|
hypot.cpp
|
||||||
identity.cpp
|
identity.cpp
|
||||||
if.cpp
|
if.cpp
|
||||||
|
@ -1,57 +1,48 @@
|
|||||||
#include "ProtobufRowInputFormat.h"
|
#include "ProtobufRowInputFormat.h"
|
||||||
|
|
||||||
#if USE_PROTOBUF
|
#if USE_PROTOBUF
|
||||||
#include <Core/Block.h>
|
# include <Core/Block.h>
|
||||||
#include <Formats/FormatFactory.h>
|
# include <Formats/FormatFactory.h>
|
||||||
#include <Formats/FormatSchemaInfo.h>
|
# include <Formats/FormatSchemaInfo.h>
|
||||||
#include <Formats/ProtobufSchemas.h>
|
# include <Formats/ProtobufReader.h>
|
||||||
#include <Interpreters/Context.h>
|
# include <Formats/ProtobufSchemas.h>
|
||||||
|
# include <Formats/ProtobufSerializer.h>
|
||||||
|
# include <Interpreters/Context.h>
|
||||||
|
# include <ext/range.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
ProtobufRowInputFormat::ProtobufRowInputFormat(ReadBuffer & in_, const Block & header_, const Params & params_, const FormatSchemaInfo & schema_info_, bool with_length_delimiter_)
|
||||||
ProtobufRowInputFormat::ProtobufRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSchemaInfo & info_, const bool use_length_delimiters_)
|
|
||||||
: IRowInputFormat(header_, in_, params_)
|
: IRowInputFormat(header_, in_, params_)
|
||||||
, data_types(header_.getDataTypes())
|
, reader(std::make_unique<ProtobufReader>(in_))
|
||||||
, reader(in, ProtobufSchemas::instance().getMessageTypeForFormatSchema(info_), header_.getNames(), use_length_delimiters_)
|
, serializer(ProtobufSerializer::create(
|
||||||
|
header_.getNames(),
|
||||||
|
header_.getDataTypes(),
|
||||||
|
missing_column_indices,
|
||||||
|
*ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_),
|
||||||
|
with_length_delimiter_,
|
||||||
|
*reader))
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
ProtobufRowInputFormat::~ProtobufRowInputFormat() = default;
|
ProtobufRowInputFormat::~ProtobufRowInputFormat() = default;
|
||||||
|
|
||||||
bool ProtobufRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & extra)
|
bool ProtobufRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & row_read_extension)
|
||||||
{
|
{
|
||||||
if (!reader.startMessage())
|
if (reader->eof())
|
||||||
return false; // EOF reached, no more messages.
|
return false;
|
||||||
|
|
||||||
// Set of columns for which the values were read. The rest will be filled with default values.
|
size_t row_num = columns.empty() ? 0 : columns[0]->size();
|
||||||
auto & read_columns = extra.read_columns;
|
if (!row_num)
|
||||||
read_columns.assign(columns.size(), false);
|
serializer->setColumns(columns.data(), columns.size());
|
||||||
|
|
||||||
// Read values from this message and put them to the columns while it's possible.
|
serializer->readRow(row_num);
|
||||||
size_t column_index;
|
|
||||||
while (reader.readColumnIndex(column_index))
|
|
||||||
{
|
|
||||||
bool allow_add_row = !static_cast<bool>(read_columns[column_index]);
|
|
||||||
do
|
|
||||||
{
|
|
||||||
bool row_added;
|
|
||||||
data_types[column_index]->deserializeProtobuf(*columns[column_index], reader, allow_add_row, row_added);
|
|
||||||
if (row_added)
|
|
||||||
{
|
|
||||||
read_columns[column_index] = true;
|
|
||||||
allow_add_row = false;
|
|
||||||
}
|
|
||||||
} while (reader.canReadMoreValues());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fill non-visited columns with the default values.
|
row_read_extension.read_columns.clear();
|
||||||
for (column_index = 0; column_index < read_columns.size(); ++column_index)
|
row_read_extension.read_columns.resize(columns.size(), true);
|
||||||
if (!read_columns[column_index])
|
for (size_t column_idx : missing_column_indices)
|
||||||
data_types[column_index]->insertDefaultInto(*columns[column_index]);
|
row_read_extension.read_columns[column_idx] = false;
|
||||||
|
|
||||||
reader.endMessage();
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -62,14 +53,14 @@ bool ProtobufRowInputFormat::allowSyncAfterError() const
|
|||||||
|
|
||||||
void ProtobufRowInputFormat::syncAfterError()
|
void ProtobufRowInputFormat::syncAfterError()
|
||||||
{
|
{
|
||||||
reader.endMessage(true);
|
reader->endMessage(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
void registerInputFormatProcessorProtobuf(FormatFactory & factory)
|
void registerInputFormatProcessorProtobuf(FormatFactory & factory)
|
||||||
{
|
{
|
||||||
for (bool use_length_delimiters : {false, true})
|
for (bool with_length_delimiter : {false, true})
|
||||||
{
|
{
|
||||||
factory.registerInputFormatProcessor(use_length_delimiters ? "Protobuf" : "ProtobufSingle", [use_length_delimiters](
|
factory.registerInputFormatProcessor(with_length_delimiter ? "Protobuf" : "ProtobufSingle", [with_length_delimiter](
|
||||||
ReadBuffer & buf,
|
ReadBuffer & buf,
|
||||||
const Block & sample,
|
const Block & sample,
|
||||||
IRowInputFormat::Params params,
|
IRowInputFormat::Params params,
|
||||||
@ -78,7 +69,7 @@ void registerInputFormatProcessorProtobuf(FormatFactory & factory)
|
|||||||
return std::make_shared<ProtobufRowInputFormat>(buf, sample, std::move(params),
|
return std::make_shared<ProtobufRowInputFormat>(buf, sample, std::move(params),
|
||||||
FormatSchemaInfo(settings.schema.format_schema, "Protobuf", true,
|
FormatSchemaInfo(settings.schema.format_schema, "Protobuf", true,
|
||||||
settings.schema.is_server, settings.schema.format_schema_path),
|
settings.schema.is_server, settings.schema.format_schema_path),
|
||||||
use_length_delimiters);
|
with_length_delimiter);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -5,14 +5,14 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if USE_PROTOBUF
|
#if USE_PROTOBUF
|
||||||
# include <DataTypes/IDataType.h>
|
|
||||||
# include <Formats/ProtobufReader.h>
|
|
||||||
# include <Processors/Formats/IRowInputFormat.h>
|
# include <Processors/Formats/IRowInputFormat.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
class Block;
|
class Block;
|
||||||
class FormatSchemaInfo;
|
class FormatSchemaInfo;
|
||||||
|
class ProtobufReader;
|
||||||
|
class ProtobufSerializer;
|
||||||
|
|
||||||
|
|
||||||
/** Stream designed to deserialize data from the google protobuf format.
|
/** Stream designed to deserialize data from the google protobuf format.
|
||||||
@ -29,18 +29,19 @@ class FormatSchemaInfo;
|
|||||||
class ProtobufRowInputFormat : public IRowInputFormat
|
class ProtobufRowInputFormat : public IRowInputFormat
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
ProtobufRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSchemaInfo & info_, const bool use_length_delimiters_);
|
ProtobufRowInputFormat(ReadBuffer & in_, const Block & header_, const Params & params_, const FormatSchemaInfo & schema_info_, bool with_length_delimiter_);
|
||||||
~ProtobufRowInputFormat() override;
|
~ProtobufRowInputFormat() override;
|
||||||
|
|
||||||
String getName() const override { return "ProtobufRowInputFormat"; }
|
String getName() const override { return "ProtobufRowInputFormat"; }
|
||||||
|
|
||||||
bool readRow(MutableColumns & columns, RowReadExtension & extra) override;
|
bool readRow(MutableColumns & columns, RowReadExtension &) override;
|
||||||
bool allowSyncAfterError() const override;
|
bool allowSyncAfterError() const override;
|
||||||
void syncAfterError() override;
|
void syncAfterError() override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DataTypes data_types;
|
std::unique_ptr<ProtobufReader> reader;
|
||||||
ProtobufReader reader;
|
std::vector<size_t> missing_column_indices;
|
||||||
|
std::unique_ptr<ProtobufSerializer> serializer;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,13 +1,13 @@
|
|||||||
#include <Formats/FormatFactory.h>
|
|
||||||
#include "ProtobufRowOutputFormat.h"
|
#include "ProtobufRowOutputFormat.h"
|
||||||
|
|
||||||
#if USE_PROTOBUF
|
#if USE_PROTOBUF
|
||||||
|
# include <Formats/FormatFactory.h>
|
||||||
#include <Core/Block.h>
|
# include <Core/Block.h>
|
||||||
#include <Formats/FormatSchemaInfo.h>
|
# include <Formats/FormatSchemaInfo.h>
|
||||||
#include <Formats/ProtobufSchemas.h>
|
# include <Formats/ProtobufSchemas.h>
|
||||||
#include <Interpreters/Context.h>
|
# include <Formats/ProtobufSerializer.h>
|
||||||
#include <google/protobuf/descriptor.h>
|
# include <Formats/ProtobufWriter.h>
|
||||||
|
# include <google/protobuf/descriptor.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -20,58 +20,55 @@ namespace ErrorCodes
|
|||||||
|
|
||||||
ProtobufRowOutputFormat::ProtobufRowOutputFormat(
|
ProtobufRowOutputFormat::ProtobufRowOutputFormat(
|
||||||
WriteBuffer & out_,
|
WriteBuffer & out_,
|
||||||
const Block & header,
|
const Block & header_,
|
||||||
const RowOutputFormatParams & params_,
|
const RowOutputFormatParams & params_,
|
||||||
const FormatSchemaInfo & format_schema,
|
const FormatSchemaInfo & schema_info_,
|
||||||
const FormatSettings & settings)
|
const FormatSettings & settings_,
|
||||||
: IRowOutputFormat(header, out_, params_)
|
bool with_length_delimiter_)
|
||||||
, data_types(header.getDataTypes())
|
: IRowOutputFormat(header_, out_, params_)
|
||||||
, writer(out,
|
, writer(std::make_unique<ProtobufWriter>(out))
|
||||||
ProtobufSchemas::instance().getMessageTypeForFormatSchema(format_schema),
|
, serializer(ProtobufSerializer::create(
|
||||||
header.getNames(), settings.protobuf.write_row_delimiters)
|
header_.getNames(),
|
||||||
, allow_only_one_row(
|
header_.getDataTypes(),
|
||||||
!settings.protobuf.write_row_delimiters
|
*ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_),
|
||||||
&& !settings.protobuf.allow_many_rows_no_delimiters)
|
with_length_delimiter_,
|
||||||
|
*writer))
|
||||||
|
, allow_multiple_rows(with_length_delimiter_ || settings_.protobuf.allow_multiple_rows_without_delimiter)
|
||||||
{
|
{
|
||||||
value_indices.resize(header.columns());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ProtobufRowOutputFormat::write(const Columns & columns, size_t row_num)
|
void ProtobufRowOutputFormat::write(const Columns & columns, size_t row_num)
|
||||||
{
|
{
|
||||||
if (allow_only_one_row && !first_row)
|
if (!allow_multiple_rows && !first_row)
|
||||||
{
|
throw Exception(
|
||||||
throw Exception("The ProtobufSingle format can't be used to write multiple rows because this format doesn't have any row delimiter.", ErrorCodes::NO_ROW_DELIMITER);
|
"The ProtobufSingle format can't be used to write multiple rows because this format doesn't have any row delimiter.",
|
||||||
}
|
ErrorCodes::NO_ROW_DELIMITER);
|
||||||
|
|
||||||
writer.startMessage();
|
if (!row_num)
|
||||||
std::fill(value_indices.begin(), value_indices.end(), 0);
|
serializer->setColumns(columns.data(), columns.size());
|
||||||
size_t column_index;
|
|
||||||
while (writer.writeField(column_index))
|
serializer->writeRow(row_num);
|
||||||
data_types[column_index]->serializeProtobuf(
|
|
||||||
*columns[column_index], row_num, writer, value_indices[column_index]);
|
|
||||||
writer.endMessage();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void registerOutputFormatProcessorProtobuf(FormatFactory & factory)
|
void registerOutputFormatProcessorProtobuf(FormatFactory & factory)
|
||||||
{
|
{
|
||||||
for (bool write_row_delimiters : {false, true})
|
for (bool with_length_delimiter : {false, true})
|
||||||
{
|
{
|
||||||
factory.registerOutputFormatProcessor(
|
factory.registerOutputFormatProcessor(
|
||||||
write_row_delimiters ? "Protobuf" : "ProtobufSingle",
|
with_length_delimiter ? "Protobuf" : "ProtobufSingle",
|
||||||
[write_row_delimiters](WriteBuffer & buf,
|
[with_length_delimiter](WriteBuffer & buf,
|
||||||
const Block & header,
|
const Block & header,
|
||||||
const RowOutputFormatParams & params,
|
const RowOutputFormatParams & params,
|
||||||
const FormatSettings & _settings)
|
const FormatSettings & settings)
|
||||||
{
|
{
|
||||||
FormatSettings settings = _settings;
|
|
||||||
settings.protobuf.write_row_delimiters = write_row_delimiters;
|
|
||||||
return std::make_shared<ProtobufRowOutputFormat>(
|
return std::make_shared<ProtobufRowOutputFormat>(
|
||||||
buf, header, params,
|
buf, header, params,
|
||||||
FormatSchemaInfo(settings.schema.format_schema, "Protobuf",
|
FormatSchemaInfo(settings.schema.format_schema, "Protobuf",
|
||||||
true, settings.schema.is_server,
|
true, settings.schema.is_server,
|
||||||
settings.schema.format_schema_path),
|
settings.schema.format_schema_path),
|
||||||
settings);
|
settings,
|
||||||
|
with_length_delimiter);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -8,21 +8,16 @@
|
|||||||
# include <Core/Block.h>
|
# include <Core/Block.h>
|
||||||
# include <Formats/FormatSchemaInfo.h>
|
# include <Formats/FormatSchemaInfo.h>
|
||||||
# include <Formats/FormatSettings.h>
|
# include <Formats/FormatSettings.h>
|
||||||
# include <Formats/ProtobufWriter.h>
|
|
||||||
# include <Processors/Formats/IRowOutputFormat.h>
|
# include <Processors/Formats/IRowOutputFormat.h>
|
||||||
|
|
||||||
|
|
||||||
namespace google
|
|
||||||
{
|
|
||||||
namespace protobuf
|
|
||||||
{
|
|
||||||
class Message;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
class ProtobufWriter;
|
||||||
|
class ProtobufSerializer;
|
||||||
|
class FormatSchemaInfo;
|
||||||
|
struct FormatSettings;
|
||||||
|
|
||||||
/** Stream designed to serialize data in the google protobuf format.
|
/** Stream designed to serialize data in the google protobuf format.
|
||||||
* Each row is written as a separated message.
|
* Each row is written as a separated message.
|
||||||
*
|
*
|
||||||
@ -38,10 +33,11 @@ class ProtobufRowOutputFormat : public IRowOutputFormat
|
|||||||
public:
|
public:
|
||||||
ProtobufRowOutputFormat(
|
ProtobufRowOutputFormat(
|
||||||
WriteBuffer & out_,
|
WriteBuffer & out_,
|
||||||
const Block & header,
|
const Block & header_,
|
||||||
const RowOutputFormatParams & params_,
|
const RowOutputFormatParams & params_,
|
||||||
const FormatSchemaInfo & format_schema,
|
const FormatSchemaInfo & schema_info_,
|
||||||
const FormatSettings & settings);
|
const FormatSettings & settings_,
|
||||||
|
bool with_length_delimiter_);
|
||||||
|
|
||||||
String getName() const override { return "ProtobufRowOutputFormat"; }
|
String getName() const override { return "ProtobufRowOutputFormat"; }
|
||||||
|
|
||||||
@ -50,10 +46,9 @@ public:
|
|||||||
std::string getContentType() const override { return "application/octet-stream"; }
|
std::string getContentType() const override { return "application/octet-stream"; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DataTypes data_types;
|
std::unique_ptr<ProtobufWriter> writer;
|
||||||
ProtobufWriter writer;
|
std::unique_ptr<ProtobufSerializer> serializer;
|
||||||
std::vector<size_t> value_indices;
|
const bool allow_multiple_rows;
|
||||||
const bool allow_only_one_row;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -26,7 +26,7 @@ void KafkaBlockOutputStream::writePrefix()
|
|||||||
buffer = storage.createWriteBuffer(getHeader());
|
buffer = storage.createWriteBuffer(getHeader());
|
||||||
|
|
||||||
auto format_settings = getFormatSettings(*context);
|
auto format_settings = getFormatSettings(*context);
|
||||||
format_settings.protobuf.allow_many_rows_no_delimiters = true;
|
format_settings.protobuf.allow_multiple_rows_without_delimiter = true;
|
||||||
|
|
||||||
child = FormatFactory::instance().getOutputStream(storage.getFormatName(), *buffer,
|
child = FormatFactory::instance().getOutputStream(storage.getFormatName(), *buffer,
|
||||||
getHeader(), *context,
|
getHeader(), *context,
|
||||||
|
@ -34,7 +34,7 @@ void RabbitMQBlockOutputStream::writePrefix()
|
|||||||
buffer->activateWriting();
|
buffer->activateWriting();
|
||||||
|
|
||||||
auto format_settings = getFormatSettings(context);
|
auto format_settings = getFormatSettings(context);
|
||||||
format_settings.protobuf.allow_many_rows_no_delimiters = true;
|
format_settings.protobuf.allow_multiple_rows_without_delimiter = true;
|
||||||
|
|
||||||
child = FormatFactory::instance().getOutputStream(storage.getFormatName(), *buffer,
|
child = FormatFactory::instance().getOutputStream(storage.getFormatName(), *buffer,
|
||||||
getHeader(), context,
|
getHeader(), context,
|
||||||
|
@ -342,3 +342,25 @@ def test_bridge_dies_with_parent(started_cluster):
|
|||||||
|
|
||||||
assert clickhouse_pid is None
|
assert clickhouse_pid is None
|
||||||
assert bridge_pid is None
|
assert bridge_pid is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_odbc_postgres_date_data_type(started_cluster):
|
||||||
|
conn = get_postgres_conn();
|
||||||
|
cursor = conn.cursor()
|
||||||
|
cursor.execute("CREATE TABLE IF NOT EXISTS clickhouse.test_date (column1 integer, column2 date)")
|
||||||
|
|
||||||
|
cursor.execute("INSERT INTO clickhouse.test_date VALUES (1, '2020-12-01')")
|
||||||
|
cursor.execute("INSERT INTO clickhouse.test_date VALUES (2, '2020-12-02')")
|
||||||
|
cursor.execute("INSERT INTO clickhouse.test_date VALUES (3, '2020-12-03')")
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
node1.query(
|
||||||
|
'''
|
||||||
|
CREATE TABLE test_date (column1 UInt64, column2 Date)
|
||||||
|
ENGINE=ODBC('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_date')''')
|
||||||
|
|
||||||
|
expected = '1\t2020-12-01\n2\t2020-12-02\n3\t2020-12-03\n'
|
||||||
|
result = node1.query('SELECT * FROM test_date');
|
||||||
|
assert(result == expected)
|
||||||
|
|
||||||
|
|
||||||
|
@ -0,0 +1,14 @@
|
|||||||
|
syntax = "proto3";
|
||||||
|
|
||||||
|
message ABC
|
||||||
|
{
|
||||||
|
message nested
|
||||||
|
{
|
||||||
|
message nested
|
||||||
|
{
|
||||||
|
repeated int32 c = 1;
|
||||||
|
}
|
||||||
|
repeated nested b = 1;
|
||||||
|
}
|
||||||
|
repeated nested a = 1;
|
||||||
|
}
|
@ -0,0 +1,52 @@
|
|||||||
|
[[],[[]],[[1]],[[2,3],[4]]]
|
||||||
|
[[[5,6,7]],[[8,9,10]]]
|
||||||
|
|
||||||
|
Binary representation:
|
||||||
|
00000000 1a 0a 00 0a 02 0a 00 0a 05 0a 03 0a 01 01 0a 0b |................|
|
||||||
|
00000010 0a 04 0a 02 02 03 0a 03 0a 01 04 12 0a 07 0a 05 |................|
|
||||||
|
00000020 0a 03 05 06 07 0a 07 0a 05 0a 03 08 09 0a |..............|
|
||||||
|
0000002e
|
||||||
|
|
||||||
|
MESSAGE #1 AT 0x00000001
|
||||||
|
a {
|
||||||
|
}
|
||||||
|
a {
|
||||||
|
b {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
a {
|
||||||
|
b {
|
||||||
|
c: 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
a {
|
||||||
|
b {
|
||||||
|
c: 2
|
||||||
|
c: 3
|
||||||
|
}
|
||||||
|
b {
|
||||||
|
c: 4
|
||||||
|
}
|
||||||
|
}
|
||||||
|
MESSAGE #2 AT 0x0000001C
|
||||||
|
a {
|
||||||
|
b {
|
||||||
|
c: 5
|
||||||
|
c: 6
|
||||||
|
c: 7
|
||||||
|
}
|
||||||
|
}
|
||||||
|
a {
|
||||||
|
b {
|
||||||
|
c: 8
|
||||||
|
c: 9
|
||||||
|
c: 10
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Binary representation is as expected
|
||||||
|
|
||||||
|
[[],[[]],[[1]],[[2,3],[4]]]
|
||||||
|
[[[5,6,7]],[[8,9,10]]]
|
||||||
|
[[],[[]],[[1]],[[2,3],[4]]]
|
||||||
|
[[[5,6,7]],[[8,9,10]]]
|
35
tests/queries/0_stateless/00825_protobuf_format_array_3dim.sh
Executable file
35
tests/queries/0_stateless/00825_protobuf_format_array_3dim.sh
Executable file
@ -0,0 +1,35 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
# shellcheck source=../shell_config.sh
|
||||||
|
. "$CURDIR"/../shell_config.sh
|
||||||
|
|
||||||
|
set -eo pipefail
|
||||||
|
|
||||||
|
# Run the client.
|
||||||
|
$CLICKHOUSE_CLIENT --multiquery <<'EOF'
|
||||||
|
DROP TABLE IF EXISTS array_3dim_protobuf_00825;
|
||||||
|
|
||||||
|
CREATE TABLE array_3dim_protobuf_00825
|
||||||
|
(
|
||||||
|
`a_b_c` Array(Array(Array(Int32)))
|
||||||
|
) ENGINE = MergeTree ORDER BY tuple();
|
||||||
|
|
||||||
|
INSERT INTO array_3dim_protobuf_00825 VALUES ([[], [[]], [[1]], [[2,3],[4]]]), ([[[5, 6, 7]], [[8, 9, 10]]]);
|
||||||
|
|
||||||
|
SELECT * FROM array_3dim_protobuf_00825;
|
||||||
|
EOF
|
||||||
|
|
||||||
|
BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_array_3dim.XXXXXX.binary")
|
||||||
|
$CLICKHOUSE_CLIENT --query "SELECT * FROM array_3dim_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_array_3dim:ABC'" > "$BINARY_FILE_PATH"
|
||||||
|
|
||||||
|
# Check the output in the protobuf format
|
||||||
|
echo
|
||||||
|
$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_array_3dim:ABC" --input "$BINARY_FILE_PATH"
|
||||||
|
|
||||||
|
# Check the input in the protobuf format (now the table contains the same data twice).
|
||||||
|
echo
|
||||||
|
$CLICKHOUSE_CLIENT --query "INSERT INTO array_3dim_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_array_3dim:ABC'" < "$BINARY_FILE_PATH"
|
||||||
|
$CLICKHOUSE_CLIENT --query "SELECT * FROM array_3dim_protobuf_00825"
|
||||||
|
|
||||||
|
rm "$BINARY_FILE_PATH"
|
@ -0,0 +1,9 @@
|
|||||||
|
syntax = "proto3";
|
||||||
|
|
||||||
|
message AA {
|
||||||
|
message nested_array {
|
||||||
|
repeated double c = 2;
|
||||||
|
}
|
||||||
|
string a = 1;
|
||||||
|
repeated nested_array b = 2;
|
||||||
|
}
|
@ -0,0 +1,41 @@
|
|||||||
|
one [[1,2,3],[0.5,0.25],[],[4,5],[0.125,0.0625],[6]]
|
||||||
|
|
||||||
|
Binary representation:
|
||||||
|
00000000 6b 0a 03 6f 6e 65 12 1a 12 18 00 00 00 00 00 00 |k..one..........|
|
||||||
|
00000010 f0 3f 00 00 00 00 00 00 00 40 00 00 00 00 00 00 |.?.......@......|
|
||||||
|
00000020 08 40 12 12 12 10 00 00 00 00 00 00 e0 3f 00 00 |.@...........?..|
|
||||||
|
00000030 00 00 00 00 d0 3f 12 00 12 12 12 10 00 00 00 00 |.....?..........|
|
||||||
|
00000040 00 00 10 40 00 00 00 00 00 00 14 40 12 12 12 10 |...@.......@....|
|
||||||
|
00000050 00 00 00 00 00 00 c0 3f 00 00 00 00 00 00 b0 3f |.......?.......?|
|
||||||
|
00000060 12 0a 12 08 00 00 00 00 00 00 18 40 |...........@|
|
||||||
|
0000006c
|
||||||
|
|
||||||
|
MESSAGE #1 AT 0x00000001
|
||||||
|
a: "one"
|
||||||
|
b {
|
||||||
|
c: 1
|
||||||
|
c: 2
|
||||||
|
c: 3
|
||||||
|
}
|
||||||
|
b {
|
||||||
|
c: 0.5
|
||||||
|
c: 0.25
|
||||||
|
}
|
||||||
|
b {
|
||||||
|
}
|
||||||
|
b {
|
||||||
|
c: 4
|
||||||
|
c: 5
|
||||||
|
}
|
||||||
|
b {
|
||||||
|
c: 0.125
|
||||||
|
c: 0.0625
|
||||||
|
}
|
||||||
|
b {
|
||||||
|
c: 6
|
||||||
|
}
|
||||||
|
|
||||||
|
Binary representation is as expected
|
||||||
|
|
||||||
|
one [[1,2,3],[0.5,0.25],[],[4,5],[0.125,0.0625],[6]]
|
||||||
|
one [[1,2,3],[0.5,0.25],[],[4,5],[0.125,0.0625],[6]]
|
38
tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.sh
Executable file
38
tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.sh
Executable file
@ -0,0 +1,38 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
# https://github.com/ClickHouse/ClickHouse/issues/9069
|
||||||
|
|
||||||
|
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
# shellcheck source=../shell_config.sh
|
||||||
|
. "$CURDIR"/../shell_config.sh
|
||||||
|
|
||||||
|
set -eo pipefail
|
||||||
|
|
||||||
|
# Run the client.
|
||||||
|
$CLICKHOUSE_CLIENT --multiquery <<'EOF'
|
||||||
|
CREATE TABLE array_of_arrays_protobuf_00825
|
||||||
|
(
|
||||||
|
`a` String,
|
||||||
|
`b` Nested (
|
||||||
|
`c` Array(Float64)
|
||||||
|
)
|
||||||
|
) ENGINE = MergeTree ORDER BY tuple();
|
||||||
|
|
||||||
|
INSERT INTO array_of_arrays_protobuf_00825 VALUES ('one', [[1,2,3],[0.5,0.25],[],[4,5],[0.125,0.0625],[6]]);
|
||||||
|
|
||||||
|
SELECT * FROM array_of_arrays_protobuf_00825;
|
||||||
|
EOF
|
||||||
|
|
||||||
|
BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_array_of_arrays.XXXXXX.binary")
|
||||||
|
$CLICKHOUSE_CLIENT --query "SELECT * FROM array_of_arrays_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_array_of_arrays:AA'" > "$BINARY_FILE_PATH"
|
||||||
|
|
||||||
|
# Check the output in the protobuf format
|
||||||
|
echo
|
||||||
|
$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_array_of_arrays:AA" --input "$BINARY_FILE_PATH"
|
||||||
|
|
||||||
|
# Check the input in the protobuf format (now the table contains the same data twice).
|
||||||
|
echo
|
||||||
|
$CLICKHOUSE_CLIENT --query "INSERT INTO array_of_arrays_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_array_of_arrays:AA'" < "$BINARY_FILE_PATH"
|
||||||
|
$CLICKHOUSE_CLIENT --query "SELECT * FROM array_of_arrays_protobuf_00825"
|
||||||
|
|
||||||
|
rm "$BINARY_FILE_PATH"
|
@ -0,0 +1,13 @@
|
|||||||
|
syntax = "proto3";
|
||||||
|
|
||||||
|
message Message
|
||||||
|
{
|
||||||
|
enum Enum
|
||||||
|
{
|
||||||
|
FIRST = 0;
|
||||||
|
SECOND = 1;
|
||||||
|
TEN = 10;
|
||||||
|
HUNDRED = 100;
|
||||||
|
};
|
||||||
|
Enum x = 1;
|
||||||
|
};
|
@ -0,0 +1,31 @@
|
|||||||
|
Second
|
||||||
|
Third
|
||||||
|
First
|
||||||
|
First
|
||||||
|
Second
|
||||||
|
|
||||||
|
Binary representation:
|
||||||
|
00000000 02 08 01 02 08 64 00 00 02 08 01 |.....d.....|
|
||||||
|
0000000b
|
||||||
|
|
||||||
|
MESSAGE #1 AT 0x00000001
|
||||||
|
x: SECOND
|
||||||
|
MESSAGE #2 AT 0x00000004
|
||||||
|
x: HUNDRED
|
||||||
|
MESSAGE #3 AT 0x00000007
|
||||||
|
MESSAGE #4 AT 0x00000008
|
||||||
|
MESSAGE #5 AT 0x00000009
|
||||||
|
x: SECOND
|
||||||
|
|
||||||
|
Binary representation is as expected
|
||||||
|
|
||||||
|
Second
|
||||||
|
Third
|
||||||
|
First
|
||||||
|
First
|
||||||
|
Second
|
||||||
|
Second
|
||||||
|
Third
|
||||||
|
First
|
||||||
|
First
|
||||||
|
Second
|
37
tests/queries/0_stateless/00825_protobuf_format_enum_mapping.sh
Executable file
37
tests/queries/0_stateless/00825_protobuf_format_enum_mapping.sh
Executable file
@ -0,0 +1,37 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
# https://github.com/ClickHouse/ClickHouse/issues/7438
|
||||||
|
|
||||||
|
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
# shellcheck source=../shell_config.sh
|
||||||
|
. "$CURDIR"/../shell_config.sh
|
||||||
|
|
||||||
|
set -eo pipefail
|
||||||
|
|
||||||
|
# Run the client.
|
||||||
|
$CLICKHOUSE_CLIENT --multiquery <<'EOF'
|
||||||
|
DROP TABLE IF EXISTS enum_mapping_protobuf_00825;
|
||||||
|
|
||||||
|
CREATE TABLE enum_mapping_protobuf_00825
|
||||||
|
(
|
||||||
|
x Enum16('First'=-100, 'Second'=0, 'Third'=100)
|
||||||
|
) ENGINE = MergeTree ORDER BY tuple();
|
||||||
|
|
||||||
|
INSERT INTO enum_mapping_protobuf_00825 VALUES ('Second'), ('Third'), ('First'), ('First'), ('Second');
|
||||||
|
|
||||||
|
SELECT * FROM enum_mapping_protobuf_00825;
|
||||||
|
EOF
|
||||||
|
|
||||||
|
BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_enum_mapping.XXXXXX.binary")
|
||||||
|
$CLICKHOUSE_CLIENT --query "SELECT * FROM enum_mapping_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_enum_mapping:Message'" > "$BINARY_FILE_PATH"
|
||||||
|
|
||||||
|
# Check the output in the protobuf format
|
||||||
|
echo
|
||||||
|
$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_enum_mapping:Message" --input "$BINARY_FILE_PATH"
|
||||||
|
|
||||||
|
# Check the input in the protobuf format (now the table contains the same data twice).
|
||||||
|
echo
|
||||||
|
$CLICKHOUSE_CLIENT --query "INSERT INTO enum_mapping_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_enum_mapping:Message'" < "$BINARY_FILE_PATH"
|
||||||
|
$CLICKHOUSE_CLIENT --query "SELECT * FROM enum_mapping_protobuf_00825"
|
||||||
|
|
||||||
|
rm "$BINARY_FILE_PATH"
|
@ -0,0 +1,5 @@
|
|||||||
|
syntax = "proto3";
|
||||||
|
|
||||||
|
message Message {
|
||||||
|
map<string, uint32> a = 1;
|
||||||
|
};
|
@ -0,0 +1,19 @@
|
|||||||
|
{'x':5,'y':7}
|
||||||
|
{'z':11}
|
||||||
|
{'temp':0}
|
||||||
|
{'':0}
|
||||||
|
|
||||||
|
Binary representation:
|
||||||
|
00000000 0e 0a 05 0a 01 78 10 05 0a 05 0a 01 79 10 07 07 |.....x......y...|
|
||||||
|
00000010 0a 05 0a 01 7a 10 0b 0a 0a 08 0a 04 74 65 6d 70 |....z.......temp|
|
||||||
|
00000020 10 00 06 0a 04 0a 00 10 00 |.........|
|
||||||
|
00000029
|
||||||
|
|
||||||
|
{'x':5,'y':7}
|
||||||
|
{'z':11}
|
||||||
|
{'temp':0}
|
||||||
|
{'':0}
|
||||||
|
{'x':5,'y':7}
|
||||||
|
{'z':11}
|
||||||
|
{'temp':0}
|
||||||
|
{'':0}
|
40
tests/queries/0_stateless/00825_protobuf_format_map.sh
Executable file
40
tests/queries/0_stateless/00825_protobuf_format_map.sh
Executable file
@ -0,0 +1,40 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
# https://github.com/ClickHouse/ClickHouse/issues/6497
|
||||||
|
|
||||||
|
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
# shellcheck source=../shell_config.sh
|
||||||
|
. "$CURDIR"/../shell_config.sh
|
||||||
|
|
||||||
|
set -eo pipefail
|
||||||
|
|
||||||
|
# Run the client.
|
||||||
|
$CLICKHOUSE_CLIENT --multiquery <<'EOF'
|
||||||
|
SET allow_experimental_map_type = 1;
|
||||||
|
|
||||||
|
DROP TABLE IF EXISTS map_00825;
|
||||||
|
|
||||||
|
CREATE TABLE map_00825
|
||||||
|
(
|
||||||
|
a Map(String, UInt32)
|
||||||
|
) ENGINE = MergeTree ORDER BY tuple();
|
||||||
|
|
||||||
|
INSERT INTO map_00825 VALUES ({'x':5, 'y':7}), ({'z':11}), ({'temp':0}), ({'':0});
|
||||||
|
|
||||||
|
SELECT * FROM map_00825;
|
||||||
|
EOF
|
||||||
|
|
||||||
|
BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_map.XXXXXX.binary")
|
||||||
|
$CLICKHOUSE_CLIENT --query "SELECT * FROM map_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_map:Message'" > "$BINARY_FILE_PATH"
|
||||||
|
|
||||||
|
# Check the output in the protobuf format
|
||||||
|
echo
|
||||||
|
echo "Binary representation:"
|
||||||
|
hexdump -C $BINARY_FILE_PATH
|
||||||
|
|
||||||
|
# Check the input in the protobuf format (now the table contains the same data twice).
|
||||||
|
echo
|
||||||
|
$CLICKHOUSE_CLIENT --query "INSERT INTO map_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_map:Message'" < "$BINARY_FILE_PATH"
|
||||||
|
$CLICKHOUSE_CLIENT --query "SELECT * FROM map_00825"
|
||||||
|
|
||||||
|
rm "$BINARY_FILE_PATH"
|
@ -0,0 +1,10 @@
|
|||||||
|
syntax = "proto3";
|
||||||
|
|
||||||
|
message Repeated {
|
||||||
|
string foo = 1;
|
||||||
|
int64 bar = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
message Message {
|
||||||
|
repeated Repeated messages = 1;
|
||||||
|
};
|
@ -0,0 +1,25 @@
|
|||||||
|
['1'] [0]
|
||||||
|
['1',''] [0,1]
|
||||||
|
|
||||||
|
Binary representation:
|
||||||
|
00000000 05 0a 03 0a 01 31 09 0a 03 0a 01 31 0a 02 10 01 |.....1.....1....|
|
||||||
|
00000010
|
||||||
|
|
||||||
|
MESSAGE #1 AT 0x00000001
|
||||||
|
messages {
|
||||||
|
foo: "1"
|
||||||
|
}
|
||||||
|
MESSAGE #2 AT 0x00000007
|
||||||
|
messages {
|
||||||
|
foo: "1"
|
||||||
|
}
|
||||||
|
messages {
|
||||||
|
bar: 1
|
||||||
|
}
|
||||||
|
|
||||||
|
Binary representation is as expected
|
||||||
|
|
||||||
|
['1'] [0]
|
||||||
|
['1',''] [0,1]
|
||||||
|
['1'] [0]
|
||||||
|
['1',''] [0,1]
|
41
tests/queries/0_stateless/00825_protobuf_format_nested_optional.sh
Executable file
41
tests/queries/0_stateless/00825_protobuf_format_nested_optional.sh
Executable file
@ -0,0 +1,41 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
# https://github.com/ClickHouse/ClickHouse/issues/6497
|
||||||
|
|
||||||
|
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
# shellcheck source=../shell_config.sh
|
||||||
|
. "$CURDIR"/../shell_config.sh
|
||||||
|
|
||||||
|
set -eo pipefail
|
||||||
|
|
||||||
|
# Run the client.
|
||||||
|
$CLICKHOUSE_CLIENT --multiquery <<'EOF'
|
||||||
|
DROP TABLE IF EXISTS nested_optional_protobuf_00825;
|
||||||
|
|
||||||
|
CREATE TABLE nested_optional_protobuf_00825
|
||||||
|
(
|
||||||
|
messages Nested
|
||||||
|
(
|
||||||
|
foo String,
|
||||||
|
bar Int64
|
||||||
|
)
|
||||||
|
) ENGINE = MergeTree ORDER BY tuple();
|
||||||
|
|
||||||
|
INSERT INTO nested_optional_protobuf_00825 VALUES (['1'], [0]), (['1', ''], [0, 1]);
|
||||||
|
|
||||||
|
SELECT * FROM nested_optional_protobuf_00825;
|
||||||
|
EOF
|
||||||
|
|
||||||
|
BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_nested_optional.XXXXXX.binary")
|
||||||
|
$CLICKHOUSE_CLIENT --query "SELECT * FROM nested_optional_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_nested_optional:Message'" > "$BINARY_FILE_PATH"
|
||||||
|
|
||||||
|
# Check the output in the protobuf format
|
||||||
|
echo
|
||||||
|
$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_nested_optional:Message" --input "$BINARY_FILE_PATH"
|
||||||
|
|
||||||
|
# Check the input in the protobuf format (now the table contains the same data twice).
|
||||||
|
echo
|
||||||
|
$CLICKHOUSE_CLIENT --query "INSERT INTO nested_optional_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_nested_optional:Message'" < "$BINARY_FILE_PATH"
|
||||||
|
$CLICKHOUSE_CLIENT --query "SELECT * FROM nested_optional_protobuf_00825"
|
||||||
|
|
||||||
|
rm "$BINARY_FILE_PATH"
|
@ -0,0 +1,6 @@
|
|||||||
|
syntax = "proto3";
|
||||||
|
|
||||||
|
message Message {
|
||||||
|
sint32 x = 1;
|
||||||
|
sint32 z = 2;
|
||||||
|
};
|
@ -0,0 +1,37 @@
|
|||||||
|
0 0 0
|
||||||
|
2 4 8
|
||||||
|
3 9 27
|
||||||
|
5 25 125
|
||||||
|
101 102 103
|
||||||
|
|
||||||
|
Binary representation:
|
||||||
|
00000000 00 04 08 04 10 10 04 08 06 10 36 05 08 0a 10 fa |..........6.....|
|
||||||
|
00000010 01 06 08 ca 01 10 ce 01 |........|
|
||||||
|
00000018
|
||||||
|
|
||||||
|
MESSAGE #1 AT 0x00000001
|
||||||
|
MESSAGE #2 AT 0x00000002
|
||||||
|
x: 2
|
||||||
|
z: 8
|
||||||
|
MESSAGE #3 AT 0x00000007
|
||||||
|
x: 3
|
||||||
|
z: 27
|
||||||
|
MESSAGE #4 AT 0x0000000C
|
||||||
|
x: 5
|
||||||
|
z: 125
|
||||||
|
MESSAGE #5 AT 0x00000012
|
||||||
|
x: 101
|
||||||
|
z: 103
|
||||||
|
|
||||||
|
Binary representation is as expected
|
||||||
|
|
||||||
|
0 0 0
|
||||||
|
0 0 0
|
||||||
|
2 4 8
|
||||||
|
2 4 8
|
||||||
|
3 9 27
|
||||||
|
3 9 27
|
||||||
|
5 25 125
|
||||||
|
5 25 125
|
||||||
|
101 102 103
|
||||||
|
101 10201 103
|
38
tests/queries/0_stateless/00825_protobuf_format_table_default.sh
Executable file
38
tests/queries/0_stateless/00825_protobuf_format_table_default.sh
Executable file
@ -0,0 +1,38 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
# shellcheck source=../shell_config.sh
|
||||||
|
. "$CURDIR"/../shell_config.sh
|
||||||
|
|
||||||
|
set -eo pipefail
|
||||||
|
|
||||||
|
# Run the client.
|
||||||
|
$CLICKHOUSE_CLIENT --multiquery <<'EOF'
|
||||||
|
DROP TABLE IF EXISTS table_default_protobuf_00825;
|
||||||
|
|
||||||
|
CREATE TABLE table_default_protobuf_00825
|
||||||
|
(
|
||||||
|
x Int64,
|
||||||
|
y Int64 DEFAULT x * x,
|
||||||
|
z Int64 DEFAULT x * x * x
|
||||||
|
) ENGINE = MergeTree ORDER BY tuple();
|
||||||
|
|
||||||
|
INSERT INTO table_default_protobuf_00825 (x) VALUES (0), (2), (3), (5);
|
||||||
|
INSERT INTO table_default_protobuf_00825 VALUES (101, 102, 103);
|
||||||
|
|
||||||
|
SELECT * FROM table_default_protobuf_00825 ORDER BY x,y,z;
|
||||||
|
EOF
|
||||||
|
|
||||||
|
BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_table_default.XXXXXX.binary")
|
||||||
|
$CLICKHOUSE_CLIENT --query "SELECT * FROM table_default_protobuf_00825 ORDER BY x,y,z FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_table_default:Message'" > "$BINARY_FILE_PATH"
|
||||||
|
|
||||||
|
# Check the output in the protobuf format
|
||||||
|
echo
|
||||||
|
$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_table_default:Message" --input "$BINARY_FILE_PATH"
|
||||||
|
|
||||||
|
# Check the input in the protobuf format (now the table contains the same data twice).
|
||||||
|
echo
|
||||||
|
$CLICKHOUSE_CLIENT --query "INSERT INTO table_default_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_table_default:Message'" < "$BINARY_FILE_PATH"
|
||||||
|
$CLICKHOUSE_CLIENT --query "SELECT * FROM table_default_protobuf_00825 ORDER BY x,y,z"
|
||||||
|
|
||||||
|
rm "$BINARY_FILE_PATH"
|
@ -0,0 +1,9 @@
|
|||||||
|
|
||||||
|
|
||||||
|
Here is CDTATA.
|
||||||
|
This is a white space test.
|
||||||
|
This is a complex test. <script type="text/javascript">Hello, world</script> world <style> hello
|
||||||
|
hello, world
|
||||||
|
|
||||||
|
hello, world
|
||||||
|
white space collapse
|
15
tests/queries/0_stateless/01674_htm_xml_coarse_parse.sql
Normal file
15
tests/queries/0_stateless/01674_htm_xml_coarse_parse.sql
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
SELECT htmlOrXmlCoarseParse('<script>Here is script.</script>');
|
||||||
|
SELECT htmlOrXmlCoarseParse('<style>Here is style.</style>');
|
||||||
|
SELECT htmlOrXmlCoarseParse('<![CDATA[Here is CDTATA.]]>');
|
||||||
|
SELECT htmlOrXmlCoarseParse('This is a white space test.');
|
||||||
|
SELECT htmlOrXmlCoarseParse('This is a complex test. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"\n "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><![CDATA[<script type="text/javascript">Hello, world</script> ]]><hello />world<![CDATA[ <style> ]]> hello</style>\n<script><![CDATA[</script>]]>hello</script>\n</html>');
|
||||||
|
DROP TABLE IF EXISTS defaults;
|
||||||
|
CREATE TABLE defaults
|
||||||
|
(
|
||||||
|
stringColumn String
|
||||||
|
) ENGINE = Memory();
|
||||||
|
|
||||||
|
INSERT INTO defaults values ('<common tag>hello, world<tag>'), ('<script desc=content> some content </script>'), ('<![CDATA[hello, world]]>'), ('white space collapse');
|
||||||
|
|
||||||
|
SELECT htmlOrXmlCoarseParse(stringColumn) FROM defaults;
|
||||||
|
DROP table defaults;
|
@ -0,0 +1 @@
|
|||||||
|
1 First
|
@ -0,0 +1,28 @@
|
|||||||
|
DROP DATABASE IF EXISTS 01720_dictionary_db;
|
||||||
|
CREATE DATABASE 01720_dictionary_db;
|
||||||
|
|
||||||
|
CREATE TABLE 01720_dictionary_db.dictionary_source_table
|
||||||
|
(
|
||||||
|
key UInt8,
|
||||||
|
value String
|
||||||
|
)
|
||||||
|
ENGINE = TinyLog;
|
||||||
|
|
||||||
|
INSERT INTO 01720_dictionary_db.dictionary_source_table VALUES (1, 'First');
|
||||||
|
|
||||||
|
CREATE DICTIONARY 01720_dictionary_db.dictionary
|
||||||
|
(
|
||||||
|
key UInt64,
|
||||||
|
value String
|
||||||
|
)
|
||||||
|
PRIMARY KEY key
|
||||||
|
SOURCE(CLICKHOUSE(DB '01720_dictionary_db' TABLE 'dictionary_source_table' HOST hostName() PORT tcpPort()))
|
||||||
|
LIFETIME(0)
|
||||||
|
LAYOUT(FLAT());
|
||||||
|
|
||||||
|
SELECT * FROM 01720_dictionary_db.dictionary;
|
||||||
|
|
||||||
|
DROP DICTIONARY 01720_dictionary_db.dictionary;
|
||||||
|
DROP TABLE 01720_dictionary_db.dictionary_source_table;
|
||||||
|
|
||||||
|
DROP DATABASE 01720_dictionary_db;
|
@ -197,6 +197,7 @@
|
|||||||
01181_db_atomic_drop_on_cluster
|
01181_db_atomic_drop_on_cluster
|
||||||
01658_test_base64Encode_mysql_compatibility
|
01658_test_base64Encode_mysql_compatibility
|
||||||
01659_test_base64Decode_mysql_compatibility
|
01659_test_base64Decode_mysql_compatibility
|
||||||
|
01674_htm_xml_coarse_parse
|
||||||
01675_data_type_coroutine
|
01675_data_type_coroutine
|
||||||
01676_clickhouse_client_autocomplete
|
01676_clickhouse_client_autocomplete
|
||||||
01671_aggregate_function_group_bitmap_data
|
01671_aggregate_function_group_bitmap_data
|
||||||
|
180
tests/queries/0_stateless/helpers/protobuf_length_delimited_encoder.py
Executable file
180
tests/queries/0_stateless/helpers/protobuf_length_delimited_encoder.py
Executable file
@ -0,0 +1,180 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
# The protobuf compiler protoc doesn't support encoding or decoding length-delimited protobuf message.
|
||||||
|
# To do that this script has been written.
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import os.path
|
||||||
|
import struct
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
def read_varint(input):
|
||||||
|
res = 0
|
||||||
|
shift = 0
|
||||||
|
while True:
|
||||||
|
c = input.read(1)
|
||||||
|
if len(c) == 0:
|
||||||
|
return None
|
||||||
|
b = c[0]
|
||||||
|
if b < 0x80:
|
||||||
|
res += b << shift
|
||||||
|
break
|
||||||
|
b -= 0x80
|
||||||
|
res += b << shift
|
||||||
|
shift = shift << 7
|
||||||
|
return res
|
||||||
|
|
||||||
|
def write_varint(output, value):
|
||||||
|
while True:
|
||||||
|
if value < 0x80:
|
||||||
|
b = value
|
||||||
|
output.write(b.to_bytes(1, byteorder='little'))
|
||||||
|
break
|
||||||
|
b = (value & 0x7F) + 0x80
|
||||||
|
output.write(b.to_bytes(1, byteorder='little'))
|
||||||
|
value = value >> 7
|
||||||
|
|
||||||
|
def write_hexdump(output, data):
|
||||||
|
with subprocess.Popen(["hexdump", "-C"], stdin=subprocess.PIPE, stdout=output, shell=False) as proc:
|
||||||
|
proc.communicate(data)
|
||||||
|
if proc.returncode != 0:
|
||||||
|
raise RuntimeError("hexdump returned code " + str(proc.returncode))
|
||||||
|
output.flush()
|
||||||
|
|
||||||
|
class FormatSchemaSplitted:
|
||||||
|
def __init__(self, format_schema):
|
||||||
|
self.format_schema = format_schema
|
||||||
|
splitted = self.format_schema.split(':')
|
||||||
|
if len(splitted) < 2:
|
||||||
|
raise RuntimeError('The format schema must have the format "schemafile:MessageType"')
|
||||||
|
path = splitted[0]
|
||||||
|
self.schemadir = os.path.dirname(path)
|
||||||
|
self.schemaname = os.path.basename(path)
|
||||||
|
if not self.schemaname.endswith(".proto"):
|
||||||
|
self.schemaname = self.schemaname + ".proto"
|
||||||
|
self.message_type = splitted[1]
|
||||||
|
|
||||||
|
def decode(input, output, format_schema):
|
||||||
|
if not type(format_schema) is FormatSchemaSplitted:
|
||||||
|
format_schema = FormatSchemaSplitted(format_schema)
|
||||||
|
msgindex = 1
|
||||||
|
while True:
|
||||||
|
sz = read_varint(input)
|
||||||
|
if sz is None:
|
||||||
|
break
|
||||||
|
output.write("MESSAGE #{msgindex} AT 0x{msgoffset:08X}\n".format(msgindex=msgindex, msgoffset=input.tell()).encode())
|
||||||
|
output.flush()
|
||||||
|
msg = input.read(sz)
|
||||||
|
if len(msg) < sz:
|
||||||
|
raise EOFError('Unexpected end of file')
|
||||||
|
with subprocess.Popen(["protoc",
|
||||||
|
"--decode", format_schema.message_type, format_schema.schemaname],
|
||||||
|
cwd=format_schema.schemadir,
|
||||||
|
stdin=subprocess.PIPE,
|
||||||
|
stdout=output,
|
||||||
|
shell=False) as proc:
|
||||||
|
proc.communicate(msg)
|
||||||
|
if proc.returncode != 0:
|
||||||
|
raise RuntimeError("protoc returned code " + str(proc.returncode))
|
||||||
|
output.flush()
|
||||||
|
msgindex = msgindex + 1
|
||||||
|
|
||||||
|
def encode(input, output, format_schema):
|
||||||
|
if not type(format_schema) is FormatSchemaSplitted:
|
||||||
|
format_schema = FormatSchemaSplitted(format_schema)
|
||||||
|
line_offset = input.tell()
|
||||||
|
line = input.readline()
|
||||||
|
while True:
|
||||||
|
if len(line) == 0:
|
||||||
|
break
|
||||||
|
if not line.startswith(b"MESSAGE #"):
|
||||||
|
raise RuntimeError("The line at 0x{line_offset:08X} must start with the text 'MESSAGE #'".format(line_offset=line_offset))
|
||||||
|
msg = b""
|
||||||
|
while True:
|
||||||
|
line_offset = input.tell()
|
||||||
|
line = input.readline()
|
||||||
|
if line.startswith(b"MESSAGE #") or len(line) == 0:
|
||||||
|
break
|
||||||
|
msg += line
|
||||||
|
with subprocess.Popen(["protoc",
|
||||||
|
"--encode", format_schema.message_type, format_schema.schemaname],
|
||||||
|
cwd=format_schema.schemadir,
|
||||||
|
stdin=subprocess.PIPE,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
shell=False) as proc:
|
||||||
|
msgbin = proc.communicate(msg)[0]
|
||||||
|
if proc.returncode != 0:
|
||||||
|
raise RuntimeError("protoc returned code " + str(proc.returncode))
|
||||||
|
write_varint(output, len(msgbin))
|
||||||
|
output.write(msgbin)
|
||||||
|
output.flush()
|
||||||
|
|
||||||
|
def decode_and_check(input, output, format_schema):
|
||||||
|
input_data = input.read()
|
||||||
|
output.write(b"Binary representation:\n")
|
||||||
|
output.flush()
|
||||||
|
write_hexdump(output, input_data)
|
||||||
|
output.write(b"\n")
|
||||||
|
output.flush()
|
||||||
|
|
||||||
|
with tempfile.TemporaryFile() as tmp_input, tempfile.TemporaryFile() as tmp_decoded, tempfile.TemporaryFile() as tmp_encoded:
|
||||||
|
tmp_input.write(input_data)
|
||||||
|
tmp_input.flush()
|
||||||
|
tmp_input.seek(0)
|
||||||
|
decode(tmp_input, tmp_decoded, format_schema)
|
||||||
|
tmp_decoded.seek(0)
|
||||||
|
decoded_text = tmp_decoded.read()
|
||||||
|
output.write(decoded_text)
|
||||||
|
output.flush()
|
||||||
|
tmp_decoded.seek(0)
|
||||||
|
encode(tmp_decoded, tmp_encoded, format_schema)
|
||||||
|
tmp_encoded.seek(0)
|
||||||
|
encoded_data = tmp_encoded.read()
|
||||||
|
|
||||||
|
if encoded_data == input_data:
|
||||||
|
output.write(b"\nBinary representation is as expected\n")
|
||||||
|
output.flush()
|
||||||
|
else:
|
||||||
|
output.write(b"\nBinary representation differs from the expected one (listed below):\n")
|
||||||
|
output.flush()
|
||||||
|
write_hexdump(output, encoded_data)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description='Encodes or decodes length-delimited protobuf messages.')
|
||||||
|
parser.add_argument('--input', help='The input file, the standard input will be used if not specified.')
|
||||||
|
parser.add_argument('--output', help='The output file, the standard output will be used if not specified')
|
||||||
|
parser.add_argument('--format_schema', required=True, help='Format schema in the format "schemafile:MessageType"')
|
||||||
|
group = parser.add_mutually_exclusive_group(required=True)
|
||||||
|
group.add_argument('--encode', action='store_true', help='Specify to encode length-delimited messages.'
|
||||||
|
'The utility will read text-format messages of the given type from the input and write it in binary to the output.')
|
||||||
|
group.add_argument('--decode', action='store_true', help='Specify to decode length-delimited messages.'
|
||||||
|
'The utility will read messages in binary from the input and write text-format messages to the output.')
|
||||||
|
group.add_argument('--decode_and_check', action='store_true', help='The same as --decode, and the utility will then encode '
|
||||||
|
' the decoded data back to the binary form to check that the result of that encoding is the same as the input was.')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
custom_input_file = None
|
||||||
|
custom_output_file = None
|
||||||
|
try:
|
||||||
|
if args.input:
|
||||||
|
custom_input_file = open(args.input, "rb")
|
||||||
|
if args.output:
|
||||||
|
custom_output_file = open(args.output, "wb")
|
||||||
|
input = custom_input_file if custom_input_file else sys.stdin.buffer
|
||||||
|
output = custom_output_file if custom_output_file else sys.stdout.buffer
|
||||||
|
|
||||||
|
if args.encode:
|
||||||
|
encode(input, output, args.format_schema)
|
||||||
|
elif args.decode:
|
||||||
|
decode(input, output, args.format_schema)
|
||||||
|
elif args.decode_and_check:
|
||||||
|
decode_and_check(input, output, args.format_schema)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
if custom_input_file:
|
||||||
|
custom_input_file.close()
|
||||||
|
if custom_output_file:
|
||||||
|
custom_output_file.close()
|
@ -122,6 +122,12 @@
|
|||||||
"00763_create_query_as_table_engine_bug",
|
"00763_create_query_as_table_engine_bug",
|
||||||
"00765_sql_compatibility_aliases",
|
"00765_sql_compatibility_aliases",
|
||||||
"00825_protobuf_format_input",
|
"00825_protobuf_format_input",
|
||||||
|
"00825_protobuf_format_nested_optional",
|
||||||
|
"00825_protobuf_format_array_3dim",
|
||||||
|
"00825_protobuf_format_map",
|
||||||
|
"00825_protobuf_format_array_of_arrays",
|
||||||
|
"00825_protobuf_format_table_default",
|
||||||
|
"00825_protobuf_format_enum_mapping",
|
||||||
"00826_cross_to_inner_join",
|
"00826_cross_to_inner_join",
|
||||||
"00834_not_between",
|
"00834_not_between",
|
||||||
"00909_kill_not_initialized_query",
|
"00909_kill_not_initialized_query",
|
||||||
|
Loading…
Reference in New Issue
Block a user