ClickHouse/dbms/Dictionaries/MongoDBBlockInputStream.cpp

239 lines
9.1 KiB
C++
Raw Normal View History

#include "config_core.h"
#if USE_POCO_MONGODB
#include <sstream>
#include <string>
#include <vector>
#include <Poco/MongoDB/Connection.h>
#include <Poco/MongoDB/Cursor.h>
#include <Poco/MongoDB/Element.h>
#include <Poco/MongoDB/ObjectId.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Common/FieldVisitors.h>
#include <Common/assert_cast.h>
#include <ext/range.h>
#include "DictionaryStructure.h"
#include "MongoDBBlockInputStream.h"
2016-12-08 02:49:04 +00:00
namespace DB
{
2018-06-05 19:46:49 +00:00
namespace ErrorCodes
{
extern const int TYPE_MISMATCH;
}
2016-12-08 02:49:04 +00:00
MongoDBBlockInputStream::MongoDBBlockInputStream(
std::shared_ptr<Poco::MongoDB::Connection> & connection_,
std::unique_ptr<Poco::MongoDB::Cursor> cursor_,
const Block & sample_block,
2019-08-03 11:02:40 +00:00
const UInt64 max_block_size_)
: connection(connection_), cursor{std::move(cursor_)}, max_block_size{max_block_size_}
2016-12-08 02:49:04 +00:00
{
description.init(sample_block);
2016-12-08 02:49:04 +00:00
}
MongoDBBlockInputStream::~MongoDBBlockInputStream() = default;
namespace
{
using ValueType = ExternalResultDescription::ValueType;
using ObjectId = Poco::MongoDB::ObjectId;
template <typename T>
void insertNumber(IColumn & column, const Poco::MongoDB::Element & value, const std::string & name)
{
switch (value.type())
{
case Poco::MongoDB::ElementTraits<Int32>::TypeId:
assert_cast<ColumnVector<T> &>(column).getData().push_back(
static_cast<const Poco::MongoDB::ConcreteElement<Int32> &>(value).value());
break;
Squashed commit of the following: commit 2722e540abfee4a527d716474c4ca582eceeee08 Merge: b4f3af824 aebddd550 Author: proller <proller@github.com> Date: Tue Aug 21 18:34:18 2018 +0300 Merge remote-tracking branch 'upstream/master' into fix3 commit b4f3af824b88a8b6826583bb483730827574e8ad Author: proller <proller@github.com> Date: Tue Aug 21 17:00:20 2018 +0300 fix commit 3a18fa7ded2a7d6b7a0883a1b9c2d6b99360837d Author: proller <proller@github.com> Date: Tue Aug 21 16:57:17 2018 +0300 fix commit 5d42220c2cf47e1a86bdc73dbfc24b68f8626891 Author: proller <proller@github.com> Date: Tue Aug 21 16:50:21 2018 +0300 freebsd fixes commit 7baf4fb5ba4575f79b0d12a9ffaaabd34e1006da Author: proller <proller@github.com> Date: Tue Aug 21 16:17:19 2018 +0300 fix commit e1fe707fd765b841a8d0952d91a980128cbf91d0 Author: proller <proller@github.com> Date: Tue Aug 21 15:35:21 2018 +0300 fix commit 027887c71b3ffa98d9473d50d8c3b79cbf3304ac Author: proller <proller@github.com> Date: Tue Aug 21 15:25:57 2018 +0300 fix commit 81af41bfcfff7c02fe4060196cf03c2d2aab416e Author: proller <proller@github.com> Date: Tue Aug 21 15:20:41 2018 +0300 fix commit 93d572c85d9c7e331254999a614c3b22b5573b02 Author: proller <proller@github.com> Date: Tue Aug 21 14:58:07 2018 +0300 SPLIT_SHARED commit dd5b8990d8527d59b1e890943d80c2bf27c613ce Author: proller <proller@github.com> Date: Tue Aug 21 14:52:39 2018 +0300 fix commit 4840ca12ab752a38c1ef482e8ec59c5859bb48d7 Author: proller <proller@github.com> Date: Tue Aug 21 14:46:31 2018 +0300 fix commit abde633beb86f2a0d025d6fcf079965dbd827b92 Author: proller <proller@github.com> Date: Tue Aug 21 14:25:32 2018 +0300 fix commit 0d94a5476a5ba8ba5e88638d58f2cfbf2b4b662d Author: proller <proller@github.com> Date: Tue Aug 21 14:45:18 2018 +0300 Travis: try fail on ninja fail commit c2686f90b68255c2beb0a708804aef404e80a6d2 Merge: 2c3427bbd 2aa7eb463 Author: proller <proller@github.com> Date: Tue Aug 21 14:25:15 2018 +0300 Merge remote-tracking branch 'upstream/master' into fix3 commit 2c3427bbdb861edbb188ed4621e8a05cafaedafb Author: proller <proller@github.com> Date: Mon Aug 20 23:57:24 2018 +0300 fix commit 530170c6a81d31dcfa81230e48520383234df4bc Merge: 9abec162c f6e4ec970 Author: proller <proller@github.com> Date: Mon Aug 20 23:57:03 2018 +0300 Merge remote-tracking branch 'upstream/master' into fix3 commit 9abec162cb2e09bbc2f33cbe80fe76791f6e5a77 Author: proller <proller@github.com> Date: Mon Aug 20 23:49:58 2018 +0300 Apple fixes commit 36d05e8217440fbc8ae21571b06d4eb6d679d538 Author: proller <proller@github.com> Date: Mon Aug 20 23:25:05 2018 +0300 apple fix commit aeec3e845e4456e89fbb1b1af6f9f36820a46e33 Author: proller <proller@github.com> Date: Mon Aug 20 23:20:06 2018 +0300 fixes commit 427961d916a5954981e47d94733996deb2a616ce Author: proller <proller@github.com> Date: Mon Aug 20 23:11:11 2018 +0300 fix commit a7dd55ff8f653624c0f3dbcbc54defd3b3ae97af Author: proller <proller@github.com> Date: Mon Aug 20 22:41:53 2018 +0300 fix commit 6200e0d315c7a62bae63a8de0fc32f7937770ad2 Merge: 8a541d7e6 21cedbe46 Author: proller <proller@github.com> Date: Mon Aug 20 22:35:49 2018 +0300 Merge remote-tracking branch 'upstream/master' into fix3 commit 8a541d7e64c89e2c16af6c909e0353361153aaa3 Author: proller <proller@github.com> Date: Mon Aug 20 22:34:32 2018 +0300 Do not use poco types commit fd560f43d048b7e3307c6c6b9c9d9918230014d8 Author: proller <proller@github.com> Date: Mon Aug 20 22:20:42 2018 +0300 Try fix apple build commit cfb2eba07ac06f19e822d3474341d800b1f98cf1 Merge: 8d2e31c90 5b81fdfc0 Author: proller <proller@github.com> Date: Mon Aug 20 22:03:24 2018 +0300 Merge remote-tracking branch 'upstream/master' into fix3 commit 8d2e31c908be2e99d09e2a9dde2414ab82a5e93c Author: proller <proller@github.com> Date: Mon Aug 20 21:56:06 2018 +0300 travis: TEST_SERVER_STARTUP_WAIT=10
2018-08-21 15:56:50 +00:00
case Poco::MongoDB::ElementTraits<Poco::Int64>::TypeId:
assert_cast<ColumnVector<T> &>(column).getData().push_back(
static_cast<const Poco::MongoDB::ConcreteElement<Poco::Int64> &>(value).value());
break;
case Poco::MongoDB::ElementTraits<Float64>::TypeId:
assert_cast<ColumnVector<T> &>(column).getData().push_back(
static_cast<const Poco::MongoDB::ConcreteElement<Float64> &>(value).value());
break;
case Poco::MongoDB::ElementTraits<bool>::TypeId:
assert_cast<ColumnVector<T> &>(column).getData().push_back(
static_cast<const Poco::MongoDB::ConcreteElement<bool> &>(value).value());
break;
case Poco::MongoDB::ElementTraits<Poco::MongoDB::NullValue>::TypeId:
assert_cast<ColumnVector<T> &>(column).getData().emplace_back();
break;
case Poco::MongoDB::ElementTraits<String>::TypeId:
assert_cast<ColumnVector<T> &>(column).getData().push_back(
parse<T>(static_cast<const Poco::MongoDB::ConcreteElement<String> &>(value).value()));
break;
default:
throw Exception(
"Type mismatch, expected a number, got type id = " + toString(value.type()) + " for column " + name,
ErrorCodes::TYPE_MISMATCH);
}
}
void insertValue(IColumn & column, const ValueType type, const Poco::MongoDB::Element & value, const std::string & name)
{
switch (type)
{
2019-08-03 11:02:40 +00:00
case ValueType::vtUInt8:
insertNumber<UInt8>(column, value, name);
break;
2019-08-03 11:02:40 +00:00
case ValueType::vtUInt16:
insertNumber<UInt16>(column, value, name);
break;
2019-08-03 11:02:40 +00:00
case ValueType::vtUInt32:
insertNumber<UInt32>(column, value, name);
break;
2019-08-03 11:02:40 +00:00
case ValueType::vtUInt64:
insertNumber<UInt64>(column, value, name);
break;
2019-08-03 11:02:40 +00:00
case ValueType::vtInt8:
insertNumber<Int8>(column, value, name);
break;
2019-08-03 11:02:40 +00:00
case ValueType::vtInt16:
insertNumber<Int16>(column, value, name);
break;
2019-08-03 11:02:40 +00:00
case ValueType::vtInt32:
insertNumber<Int32>(column, value, name);
break;
2019-08-03 11:02:40 +00:00
case ValueType::vtInt64:
insertNumber<Int64>(column, value, name);
break;
2019-08-03 11:02:40 +00:00
case ValueType::vtFloat32:
insertNumber<Float32>(column, value, name);
break;
2019-08-03 11:02:40 +00:00
case ValueType::vtFloat64:
insertNumber<Float64>(column, value, name);
break;
2019-08-03 11:02:40 +00:00
case ValueType::vtString:
{
2018-02-06 12:39:16 +00:00
if (value.type() == Poco::MongoDB::ElementTraits<ObjectId::Ptr>::TypeId)
{
std::string string_id = value.toString();
assert_cast<ColumnString &>(column).insertDataWithTerminatingZero(string_id.data(), string_id.size() + 1);
break;
}
else if (value.type() == Poco::MongoDB::ElementTraits<String>::TypeId)
{
String string = static_cast<const Poco::MongoDB::ConcreteElement<String> &>(value).value();
assert_cast<ColumnString &>(column).insertDataWithTerminatingZero(string.data(), string.size() + 1);
break;
}
throw Exception{"Type mismatch, expected String, got type id = " + toString(value.type()) + " for column " + name,
ErrorCodes::TYPE_MISMATCH};
}
2019-08-03 11:02:40 +00:00
case ValueType::vtDate:
{
if (value.type() != Poco::MongoDB::ElementTraits<Poco::Timestamp>::TypeId)
throw Exception{"Type mismatch, expected Timestamp, got type id = " + toString(value.type()) + " for column " + name,
ErrorCodes::TYPE_MISMATCH};
assert_cast<ColumnUInt16 &>(column).getData().push_back(UInt16{DateLUT::instance().toDayNum(
static_cast<const Poco::MongoDB::ConcreteElement<Poco::Timestamp> &>(value).value().epochTime())});
break;
}
2019-08-03 11:02:40 +00:00
case ValueType::vtDateTime:
{
if (value.type() != Poco::MongoDB::ElementTraits<Poco::Timestamp>::TypeId)
throw Exception{"Type mismatch, expected Timestamp, got type id = " + toString(value.type()) + " for column " + name,
ErrorCodes::TYPE_MISMATCH};
assert_cast<ColumnUInt32 &>(column).getData().push_back(
static_cast<const Poco::MongoDB::ConcreteElement<Poco::Timestamp> &>(value).value().epochTime());
break;
}
2019-08-03 11:02:40 +00:00
case ValueType::vtUUID:
2018-09-05 16:20:12 +00:00
{
2018-09-05 20:19:31 +00:00
if (value.type() == Poco::MongoDB::ElementTraits<String>::TypeId)
2018-09-05 16:20:12 +00:00
{
2018-09-05 20:19:31 +00:00
String string = static_cast<const Poco::MongoDB::ConcreteElement<String> &>(value).value();
assert_cast<ColumnUInt128 &>(column).getData().push_back(parse<UUID>(string));
2018-09-05 16:20:12 +00:00
}
else
throw Exception{"Type mismatch, expected String (UUID), got type id = " + toString(value.type()) + " for column "
+ name,
ErrorCodes::TYPE_MISMATCH};
2018-09-05 16:20:12 +00:00
break;
}
}
}
void insertDefaultValue(IColumn & column, const IColumn & sample_column) { column.insertFrom(sample_column, 0); }
2016-12-08 02:49:04 +00:00
}
Block MongoDBBlockInputStream::readImpl()
{
if (all_read)
return {};
MutableColumns columns(description.sample_block.columns());
const size_t size = columns.size();
for (const auto i : ext::range(0, size))
columns[i] = description.sample_block.getByPosition(i).column->cloneEmpty();
size_t num_rows = 0;
while (num_rows < max_block_size)
{
Poco::MongoDB::ResponseMessage & response = cursor->next(*connection);
for (const auto & document : response.documents())
{
++num_rows;
for (const auto idx : ext::range(0, size))
{
const auto & name = description.sample_block.getByPosition(idx).name;
const Poco::MongoDB::Element::Ptr value = document->get(name);
2018-02-06 12:39:16 +00:00
if (value.isNull() || value->type() == Poco::MongoDB::ElementTraits<Poco::MongoDB::NullValue>::TypeId)
insertDefaultValue(*columns[idx], *description.sample_block.getByPosition(idx).column);
else
{
if (description.types[idx].second)
{
ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*columns[idx]);
insertValue(column_nullable.getNestedColumn(), description.types[idx].first, *value, name);
column_nullable.getNullMapData().emplace_back(0);
}
else
insertValue(*columns[idx], description.types[idx].first, *value, name);
}
}
}
if (response.cursorID() == 0)
{
all_read = true;
break;
}
}
if (num_rows == 0)
return {};
return description.sample_block.cloneWithColumns(std::move(columns));
2016-12-08 02:49:04 +00:00
}
}
#endif