Support true UUID type for MongoDB engine

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
This commit is contained in:
Azat Khuzhin 2024-07-17 20:34:15 +02:00
parent 6013e4b81d
commit 2f00c96271
4 changed files with 82 additions and 6 deletions

View File

@ -19,6 +19,7 @@
#include "Poco/Foundation.h"
#include <Poco/Types.h>
namespace Poco
@ -135,6 +136,12 @@ public:
static const UUID & x500();
/// Returns the namespace identifier for the X500 namespace.
UInt32 getTimeLow() const { return _timeLow; }
UInt16 getTimeMid() const { return _timeMid; }
UInt16 getTimeHiAndVersion() const { return _timeHiAndVersion; }
UInt16 getClockSeq() const { return _clockSeq; }
std::array<UInt8, 6> getNode() const { return std::array<UInt8, 6>{_node[0], _node[1], _node[2], _node[3], _node[4], _node[5]}; }
protected:
UUID(UInt32 timeLow, UInt32 timeMid, UInt32 timeHiAndVersion, UInt16 clockSeq, UInt8 node[]);
UUID(const char * bytes, Version version);

View File

@ -82,7 +82,7 @@ UUID Binary::uuid() const
uuid.copyFrom((const char*) _buffer.begin());
return uuid;
}
throw BadCastException("Invalid subtype");
throw BadCastException("Invalid subtype: " + std::to_string(_subtype) + ", size: " + std::to_string(_buffer.size()));
}

View File

@ -4,6 +4,7 @@
#include <vector>
#include <Poco/MongoDB/Array.h>
#include <Poco/MongoDB/Binary.h>
#include <Poco/MongoDB/Database.h>
#include <Poco/MongoDB/Connection.h>
#include <Poco/MongoDB/Cursor.h>
@ -17,6 +18,7 @@
#include <IO/ReadHelpers.h>
#include <Common/assert_cast.h>
#include <Common/quoteString.h>
#include "base/types.h"
#include <base/range.h>
#include <Poco/URI.h>
@ -45,8 +47,28 @@ namespace
using ValueType = ExternalResultDescription::ValueType;
using ObjectId = Poco::MongoDB::ObjectId;
using MongoArray = Poco::MongoDB::Array;
using MongoUUID = Poco::MongoDB::Binary::Ptr;
UUID parsePocoUUID(const Poco::UUID & src)
{
UUID uuid;
std::array<Poco::UInt8, 6> src_node = src.getNode();
UInt64 node = 0;
node |= UInt64(src_node[0]) << 40;
node |= UInt64(src_node[1]) << 32;
node |= UInt64(src_node[2]) << 24;
node |= UInt64(src_node[3]) << 16;
node |= UInt64(src_node[4]) << 8;
node |= src_node[5];
UUIDHelpers::getHighBytes(uuid) = UInt64(src.getTimeLow()) << 32 | UInt32(src.getTimeMid() << 16 | src.getTimeHiAndVersion());
UUIDHelpers::getLowBytes(uuid) = UInt64(src.getClockSeq()) << 48 | node;
return uuid;
}
template <typename T>
Field getNumber(const Poco::MongoDB::Element & value, const std::string & name)
{
@ -149,12 +171,20 @@ namespace
else if (which.isUUID())
parser = [](const Poco::MongoDB::Element & value, const std::string & name) -> Field
{
if (value.type() != Poco::MongoDB::ElementTraits<String>::TypeId)
throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected String (UUID), got type id = {} for column {}",
if (value.type() == Poco::MongoDB::ElementTraits<String>::TypeId)
{
String string = static_cast<const Poco::MongoDB::ConcreteElement<String> &>(value).value();
return parse<UUID>(string);
}
else if (value.type() == Poco::MongoDB::ElementTraits<MongoUUID>::TypeId)
{
const Poco::UUID & poco_uuid = static_cast<const Poco::MongoDB::ConcreteElement<MongoUUID> &>(value).value()->uuid();
return parsePocoUUID(poco_uuid);
}
else
throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected String/UUID, got type id = {} for column {}",
toString(value.type()), name);
String string = static_cast<const Poco::MongoDB::ConcreteElement<String> &>(value).value();
return parse<UUID>(string);
};
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Type conversion to {} is not supported", nested->getName());
@ -286,8 +316,14 @@ namespace
String string = static_cast<const Poco::MongoDB::ConcreteElement<String> &>(value).value();
assert_cast<ColumnUUID &>(column).getData().push_back(parse<UUID>(string));
}
else if (value.type() == Poco::MongoDB::ElementTraits<MongoUUID>::TypeId)
{
const Poco::UUID & poco_uuid = static_cast<const Poco::MongoDB::ConcreteElement<MongoUUID> &>(value).value()->uuid();
UUID uuid = parsePocoUUID(poco_uuid);
assert_cast<ColumnUUID &>(column).getData().push_back(uuid);
}
else
throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected String (UUID), got type id = {} for column {}",
throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected String/UUID, got type id = {} for column {}",
toString(value.type()), name);
break;
}

View File

@ -1,4 +1,5 @@
import pymongo
from uuid import UUID
import pytest
from helpers.client import QueryRuntimeException
@ -72,6 +73,28 @@ def test_simple_select(started_cluster):
simple_mongo_table.drop()
@pytest.mark.parametrize("started_cluster", [False], indirect=["started_cluster"])
def test_uuid(started_cluster):
mongo_connection = get_mongo_connection(started_cluster)
db = mongo_connection["test"]
db.add_user("root", "clickhouse")
mongo_table = db["uuid_table"]
mongo_table.insert({"key": 0, "data": UUID("f0e77736-91d1-48ce-8f01-15123ca1c7ed")})
node = started_cluster.instances["node"]
node.query(
"CREATE TABLE uuid_mongo_table(key UInt64, data UUID) ENGINE = MongoDB('mongo1:27017', 'test', 'uuid_table', 'root', 'clickhouse')"
)
assert node.query("SELECT COUNT() FROM uuid_mongo_table") == "1\n"
assert (
node.query("SELECT data from uuid_mongo_table where key = 0")
== "f0e77736-91d1-48ce-8f01-15123ca1c7ed\n"
)
node.query("DROP TABLE uuid_mongo_table")
mongo_table.drop()
@pytest.mark.parametrize("started_cluster", [False], indirect=["started_cluster"])
def test_simple_select_from_view(started_cluster):
mongo_connection = get_mongo_connection(started_cluster)
@ -140,6 +163,10 @@ def test_arrays(started_cluster):
"f0e77736-91d1-48ce-8f01-15123ca1c7ed",
"93376a07-c044-4281-a76e-ad27cf6973c5",
],
"arr_mongo_uuid": [
UUID("f0e77736-91d1-48ce-8f01-15123ca1c7ed"),
UUID("93376a07-c044-4281-a76e-ad27cf6973c5"),
],
"arr_arr_bool": [
[True, False, True],
[True],
@ -174,6 +201,7 @@ def test_arrays(started_cluster):
"arr_datetime Array(DateTime),"
"arr_string Array(String),"
"arr_uuid Array(UUID),"
"arr_mongo_uuid Array(UUID),"
"arr_arr_bool Array(Array(Bool)),"
"arr_empty Array(UInt64),"
"arr_null Array(UInt64),"
@ -222,6 +250,11 @@ def test_arrays(started_cluster):
== "['f0e77736-91d1-48ce-8f01-15123ca1c7ed','93376a07-c044-4281-a76e-ad27cf6973c5']\n"
)
assert (
node.query(f"SELECT arr_mongo_uuid FROM arrays_mongo_table WHERE key = 42")
== "['f0e77736-91d1-48ce-8f01-15123ca1c7ed','93376a07-c044-4281-a76e-ad27cf6973c5']\n"
)
assert (
node.query(f"SELECT arr_arr_bool FROM arrays_mongo_table WHERE key = 42")
== "[[true,false,true],[true],[],[],[false],[false]]\n"