mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-27 01:51:59 +00:00
Merge pull request #44382 from Avogar/fix-bson-object-id
Fix reading ObjectId in BSON schema inference
This commit is contained in:
commit
09ab5832b1
@ -7,6 +7,8 @@ namespace DB
|
|||||||
{
|
{
|
||||||
|
|
||||||
static const uint8_t BSON_DOCUMENT_END = 0x00;
|
static const uint8_t BSON_DOCUMENT_END = 0x00;
|
||||||
|
static const size_t BSON_OBJECT_ID_SIZE = 12;
|
||||||
|
static const size_t BSON_DB_POINTER_SIZE = 12;
|
||||||
using BSONSizeT = uint32_t;
|
using BSONSizeT = uint32_t;
|
||||||
static const BSONSizeT MAX_BSON_SIZE = std::numeric_limits<BSONSizeT>::max();
|
static const BSONSizeT MAX_BSON_SIZE = std::numeric_limits<BSONSizeT>::max();
|
||||||
|
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
#include <Columns/ColumnMap.h>
|
#include <Columns/ColumnMap.h>
|
||||||
|
|
||||||
#include <DataTypes/DataTypeString.h>
|
#include <DataTypes/DataTypeString.h>
|
||||||
|
#include <DataTypes/DataTypeFixedString.h>
|
||||||
#include <DataTypes/DataTypeUUID.h>
|
#include <DataTypes/DataTypeUUID.h>
|
||||||
#include <DataTypes/DataTypeDateTime64.h>
|
#include <DataTypes/DataTypeDateTime64.h>
|
||||||
#include <DataTypes/DataTypeLowCardinality.h>
|
#include <DataTypes/DataTypeLowCardinality.h>
|
||||||
@ -282,7 +283,7 @@ static void readAndInsertString(ReadBuffer & in, IColumn & column, BSONType bson
|
|||||||
}
|
}
|
||||||
else if (bson_type == BSONType::OBJECT_ID)
|
else if (bson_type == BSONType::OBJECT_ID)
|
||||||
{
|
{
|
||||||
readAndInsertStringImpl<is_fixed_string>(in, column, 12);
|
readAndInsertStringImpl<is_fixed_string>(in, column, BSON_OBJECT_ID_SIZE);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -664,7 +665,7 @@ static void skipBSONField(ReadBuffer & in, BSONType type)
|
|||||||
}
|
}
|
||||||
case BSONType::OBJECT_ID:
|
case BSONType::OBJECT_ID:
|
||||||
{
|
{
|
||||||
in.ignore(12);
|
in.ignore(BSON_OBJECT_ID_SIZE);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case BSONType::REGEXP:
|
case BSONType::REGEXP:
|
||||||
@ -677,7 +678,7 @@ static void skipBSONField(ReadBuffer & in, BSONType type)
|
|||||||
{
|
{
|
||||||
BSONSizeT size;
|
BSONSizeT size;
|
||||||
readBinary(size, in);
|
readBinary(size, in);
|
||||||
in.ignore(size + 12);
|
in.ignore(size + BSON_DB_POINTER_SIZE);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case BSONType::JAVA_SCRIPT_CODE_W_SCOPE:
|
case BSONType::JAVA_SCRIPT_CODE_W_SCOPE:
|
||||||
@ -796,7 +797,6 @@ DataTypePtr BSONEachRowSchemaReader::getDataTypeFromBSONField(BSONType type, boo
|
|||||||
}
|
}
|
||||||
case BSONType::SYMBOL: [[fallthrough]];
|
case BSONType::SYMBOL: [[fallthrough]];
|
||||||
case BSONType::JAVA_SCRIPT_CODE: [[fallthrough]];
|
case BSONType::JAVA_SCRIPT_CODE: [[fallthrough]];
|
||||||
case BSONType::OBJECT_ID: [[fallthrough]];
|
|
||||||
case BSONType::STRING:
|
case BSONType::STRING:
|
||||||
{
|
{
|
||||||
BSONSizeT size;
|
BSONSizeT size;
|
||||||
@ -804,6 +804,11 @@ DataTypePtr BSONEachRowSchemaReader::getDataTypeFromBSONField(BSONType type, boo
|
|||||||
in.ignore(size);
|
in.ignore(size);
|
||||||
return std::make_shared<DataTypeString>();
|
return std::make_shared<DataTypeString>();
|
||||||
}
|
}
|
||||||
|
case BSONType::OBJECT_ID:;
|
||||||
|
{
|
||||||
|
in.ignore(BSON_OBJECT_ID_SIZE);
|
||||||
|
return makeNullable(std::make_shared<DataTypeFixedString>(BSON_OBJECT_ID_SIZE));
|
||||||
|
}
|
||||||
case BSONType::DOCUMENT:
|
case BSONType::DOCUMENT:
|
||||||
{
|
{
|
||||||
auto nested_names_and_types = getDataTypesFromBSONDocument(false);
|
auto nested_names_and_types = getDataTypesFromBSONDocument(false);
|
||||||
@ -954,6 +959,7 @@ void registerInputFormatBSONEachRow(FormatFactory & factory)
|
|||||||
"BSONEachRow",
|
"BSONEachRow",
|
||||||
[](ReadBuffer & buf, const Block & sample, IRowInputFormat::Params params, const FormatSettings & settings)
|
[](ReadBuffer & buf, const Block & sample, IRowInputFormat::Params params, const FormatSettings & settings)
|
||||||
{ return std::make_shared<BSONEachRowRowInputFormat>(buf, sample, std::move(params), settings); });
|
{ return std::make_shared<BSONEachRowRowInputFormat>(buf, sample, std::move(params), settings); });
|
||||||
|
factory.registerFileExtension("bson", "BSONEachRow");
|
||||||
}
|
}
|
||||||
|
|
||||||
void registerFileSegmentationEngineBSONEachRow(FormatFactory & factory)
|
void registerFileSegmentationEngineBSONEachRow(FormatFactory & factory)
|
||||||
|
@ -0,0 +1,6 @@
|
|||||||
|
_id Nullable(FixedString(12))
|
||||||
|
name Nullable(String)
|
||||||
|
email Nullable(String)
|
||||||
|
movie_id Nullable(FixedString(12))
|
||||||
|
text Nullable(String)
|
||||||
|
date Nullable(DateTime64(6, \'UTC\'))
|
10
tests/queries/0_stateless/02500_bson_read_object_id.sh
Executable file
10
tests/queries/0_stateless/02500_bson_read_object_id.sh
Executable file
@ -0,0 +1,10 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Tags: no-fasttest
|
||||||
|
|
||||||
|
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
# shellcheck source=../shell_config.sh
|
||||||
|
. "$CURDIR"/../shell_config.sh
|
||||||
|
|
||||||
|
$CLICKHOUSE_LOCAL -q "desc file('$CURDIR/data_bson/comments.bson')"
|
||||||
|
$CLICKHOUSE_LOCAL -q "select _id from file('$CURDIR/data_bson/comments.bson') format Null"
|
||||||
|
|
BIN
tests/queries/0_stateless/data_bson/comments.bson
Normal file
BIN
tests/queries/0_stateless/data_bson/comments.bson
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user