mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-27 01:51:59 +00:00
dbms: external dictionaries: store null_values in sample_block, use them instead of IColumn::insertDefault for MySQL and MongoDB [#METR-17854]
This commit is contained in:
parent
c469b726c1
commit
4355b32890
@ -9,6 +9,8 @@
|
|||||||
#include <DB/DataTypes/DataTypesNumberFixed.h>
|
#include <DB/DataTypes/DataTypesNumberFixed.h>
|
||||||
#include <common/singleton.h>
|
#include <common/singleton.h>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <DB/Core/FieldVisitors.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -19,25 +21,21 @@ namespace
|
|||||||
Block createSampleBlock(const DictionaryStructure & dict_struct)
|
Block createSampleBlock(const DictionaryStructure & dict_struct)
|
||||||
{
|
{
|
||||||
Block block{
|
Block block{
|
||||||
ColumnWithTypeAndName{
|
ColumnWithTypeAndName{new ColumnUInt64{1}, new DataTypeUInt64, dict_struct.id.name}
|
||||||
new ColumnUInt64,
|
|
||||||
new DataTypeUInt64,
|
|
||||||
dict_struct.id.name
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
if (dict_struct.range_min)
|
if (dict_struct.range_min)
|
||||||
for (const auto & attribute : { dict_struct.range_min, dict_struct.range_max })
|
for (const auto & attribute : { dict_struct.range_min, dict_struct.range_max })
|
||||||
block.insert(ColumnWithTypeAndName{
|
block.insert(
|
||||||
new ColumnUInt16,
|
ColumnWithTypeAndName{new ColumnUInt16{1}, new DataTypeDate, attribute->name});
|
||||||
new DataTypeDate,
|
|
||||||
attribute->name
|
|
||||||
});
|
|
||||||
|
|
||||||
for (const auto & attribute : dict_struct.attributes)
|
for (const auto & attribute : dict_struct.attributes)
|
||||||
block.insert(ColumnWithTypeAndName{
|
{
|
||||||
attribute.type->createColumn(), attribute.type, attribute.name
|
auto column = attribute.type->createColumn();
|
||||||
});
|
column->insert(attribute.null_value);
|
||||||
|
|
||||||
|
block.insert(ColumnWithTypeAndName{column, attribute.type, attribute.name});
|
||||||
|
}
|
||||||
|
|
||||||
return block;
|
return block;
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <DB/Core/Block.h>
|
#include <DB/Core/Block.h>
|
||||||
|
#include <DB/Dictionaries/DictionaryStructure.h>
|
||||||
#include <DB/DataStreams/IProfilingBlockInputStream.h>
|
#include <DB/DataStreams/IProfilingBlockInputStream.h>
|
||||||
#include <DB/DataTypes/DataTypesNumberFixed.h>
|
#include <DB/DataTypes/DataTypesNumberFixed.h>
|
||||||
#include <DB/DataTypes/DataTypeString.h>
|
#include <DB/DataTypes/DataTypeString.h>
|
||||||
@ -11,6 +12,8 @@
|
|||||||
#include <mongo/client/dbclient.h>
|
#include <mongo/client/dbclient.h>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <DB/Core/FieldVisitors.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -37,17 +40,20 @@ class MongoDBBlockInputStream final : public IProfilingBlockInputStream
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
MongoDBBlockInputStream(
|
MongoDBBlockInputStream(
|
||||||
std::unique_ptr<mongo::DBClientCursor> cursor_, const Block & sample_block, const std::size_t max_block_size)
|
std::unique_ptr<mongo::DBClientCursor> cursor_, const Block & sample_block_, const std::size_t max_block_size)
|
||||||
: cursor{std::move(cursor_)}, sample_block{sample_block}, max_block_size{max_block_size}
|
: cursor{std::move(cursor_)}, sample_block{sample_block_}, max_block_size{max_block_size}
|
||||||
{
|
{
|
||||||
/// do nothing if cursor has no data
|
/// do nothing if cursor has no data
|
||||||
if (!cursor->more())
|
if (!cursor->more())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
types.reserve(sample_block.columns());
|
const auto num_columns = sample_block.columns();
|
||||||
|
types.reserve(num_columns);
|
||||||
|
names.reserve(num_columns);
|
||||||
|
sample_columns.reserve(num_columns);
|
||||||
|
|
||||||
/// save types of each column to eliminate subsequent typeid_cast<> invocations
|
/// save types of each column to eliminate subsequent typeid_cast<> invocations
|
||||||
for (const auto idx : ext::range(0, sample_block.columns()))
|
for (const auto idx : ext::range(0, num_columns))
|
||||||
{
|
{
|
||||||
const auto & column = sample_block.getByPosition(idx);
|
const auto & column = sample_block.getByPosition(idx);
|
||||||
const auto type = column.type.get();
|
const auto type = column.type.get();
|
||||||
@ -85,6 +91,7 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
names.emplace_back(column.name);
|
names.emplace_back(column.name);
|
||||||
|
sample_columns.emplace_back(column.column.get());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -124,7 +131,7 @@ private:
|
|||||||
if (value.ok())
|
if (value.ok())
|
||||||
insertValue(columns[idx], types[idx], value);
|
insertValue(columns[idx], types[idx], value);
|
||||||
else
|
else
|
||||||
insertDefaultValue(columns[idx], types[idx]);
|
insertDefaultValue(columns[idx], *sample_columns[idx]);
|
||||||
}
|
}
|
||||||
|
|
||||||
++num_rows;
|
++num_rows;
|
||||||
@ -287,25 +294,9 @@ private:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// @todo insert default value from the dictionary attribute definition
|
static void insertDefaultValue(IColumn * const column, const IColumn & sample_column)
|
||||||
static void insertDefaultValue(IColumn * const column, const value_type_t type)
|
|
||||||
{
|
{
|
||||||
switch (type)
|
column->insertFrom(sample_column, 0);
|
||||||
{
|
|
||||||
case value_type_t::UInt8: static_cast<ColumnUInt8 *>(column)->insertDefault(); break;
|
|
||||||
case value_type_t::UInt16: static_cast<ColumnUInt16 *>(column)->insertDefault(); break;
|
|
||||||
case value_type_t::UInt32: static_cast<ColumnUInt32 *>(column)->insertDefault(); break;
|
|
||||||
case value_type_t::UInt64: static_cast<ColumnUInt64 *>(column)->insertDefault(); break;
|
|
||||||
case value_type_t::Int8: static_cast<ColumnInt8 *>(column)->insertDefault(); break;
|
|
||||||
case value_type_t::Int16: static_cast<ColumnInt16 *>(column)->insertDefault(); break;
|
|
||||||
case value_type_t::Int32: static_cast<ColumnInt32 *>(column)->insertDefault(); break;
|
|
||||||
case value_type_t::Int64: static_cast<ColumnInt64 *>(column)->insertDefault(); break;
|
|
||||||
case value_type_t::Float32: static_cast<ColumnFloat32 *>(column)->insertDefault(); break;
|
|
||||||
case value_type_t::Float64: static_cast<ColumnFloat64 *>(column)->insertDefault(); break;
|
|
||||||
case value_type_t::String: static_cast<ColumnString *>(column)->insertDefault(); break;
|
|
||||||
case value_type_t::Date: static_cast<ColumnUInt16 *>(column)->insertDefault(); break;
|
|
||||||
case value_type_t::DateTime: static_cast<ColumnUInt32 *>(column)->insertDefault(); break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<mongo::DBClientCursor> cursor;
|
std::unique_ptr<mongo::DBClientCursor> cursor;
|
||||||
@ -313,6 +304,7 @@ private:
|
|||||||
const std::size_t max_block_size;
|
const std::size_t max_block_size;
|
||||||
std::vector<value_type_t> types;
|
std::vector<value_type_t> types;
|
||||||
std::vector<mongo::StringData> names;
|
std::vector<mongo::StringData> names;
|
||||||
|
std::vector<const IColumn *> sample_columns;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -107,6 +107,7 @@ public:
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// @todo: for MongoDB, modification date can somehow be determined from the `_id` object field
|
||||||
bool isModified() const override { return false; }
|
bool isModified() const override { return false; }
|
||||||
|
|
||||||
DictionarySourcePtr clone() const override { return std::make_unique<MongoDBDictionarySource>(*this); }
|
DictionarySourcePtr clone() const override { return std::make_unique<MongoDBDictionarySource>(*this); }
|
||||||
|
@ -37,12 +37,11 @@ class MySQLBlockInputStream final : public IProfilingBlockInputStream
|
|||||||
};
|
};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
MySQLBlockInputStream(const mysqlxx::PoolWithFailover::Entry & entry,
|
MySQLBlockInputStream(
|
||||||
const std::string & query_str,
|
const mysqlxx::PoolWithFailover::Entry & entry, const std::string & query_str, const Block & sample_block_,
|
||||||
const Block & sample_block,
|
|
||||||
const std::size_t max_block_size)
|
const std::size_t max_block_size)
|
||||||
: entry{entry}, query{this->entry->query(query_str)}, result{query.use()},
|
: entry{entry}, query{this->entry->query(query_str)}, result{query.use()},
|
||||||
sample_block{sample_block}, max_block_size{max_block_size}
|
sample_block{sample_block_}, max_block_size{max_block_size}
|
||||||
{
|
{
|
||||||
if (sample_block.columns() != result.getNumFields())
|
if (sample_block.columns() != result.getNumFields())
|
||||||
throw Exception{
|
throw Exception{
|
||||||
@ -51,11 +50,15 @@ public:
|
|||||||
ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH
|
ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH
|
||||||
};
|
};
|
||||||
|
|
||||||
types.reserve(sample_block.columns());
|
const auto num_columns = sample_block.columns();
|
||||||
|
types.reserve(num_columns);
|
||||||
|
sample_columns.reserve(num_columns);
|
||||||
|
|
||||||
for (const auto idx : ext::range(0, sample_block.columns()))
|
for (const auto idx : ext::range(0, num_columns))
|
||||||
{
|
{
|
||||||
const auto type = sample_block.getByPosition(idx).type.get();
|
const auto & column = sample_block.getByPosition(idx);
|
||||||
|
const auto type = column.type.get();
|
||||||
|
|
||||||
if (typeid_cast<const DataTypeUInt8 *>(type))
|
if (typeid_cast<const DataTypeUInt8 *>(type))
|
||||||
types.push_back(value_type_t::UInt8);
|
types.push_back(value_type_t::UInt8);
|
||||||
else if (typeid_cast<const DataTypeUInt16 *>(type))
|
else if (typeid_cast<const DataTypeUInt16 *>(type))
|
||||||
@ -87,6 +90,8 @@ public:
|
|||||||
"Unsupported type " + type->getName(),
|
"Unsupported type " + type->getName(),
|
||||||
ErrorCodes::UNKNOWN_TYPE
|
ErrorCodes::UNKNOWN_TYPE
|
||||||
};
|
};
|
||||||
|
|
||||||
|
sample_columns.emplace_back(column.column.get());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -120,7 +125,7 @@ private:
|
|||||||
if (!value.isNull())
|
if (!value.isNull())
|
||||||
insertValue(columns[idx], types[idx], value);
|
insertValue(columns[idx], types[idx], value);
|
||||||
else
|
else
|
||||||
insertDefaultValue(columns[idx], types[idx]);
|
insertDefaultValue(columns[idx], *sample_columns[idx]);
|
||||||
}
|
}
|
||||||
|
|
||||||
++num_rows;
|
++num_rows;
|
||||||
@ -158,24 +163,9 @@ private:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void insertDefaultValue(IColumn * const column, const value_type_t type)
|
static void insertDefaultValue(IColumn * const column, const IColumn & sample_column)
|
||||||
{
|
{
|
||||||
switch (type)
|
column->insertFrom(sample_column, 0);
|
||||||
{
|
|
||||||
case value_type_t::UInt8: static_cast<ColumnUInt8 *>(column)->insertDefault(); break;
|
|
||||||
case value_type_t::UInt16: static_cast<ColumnUInt16 *>(column)->insertDefault(); break;
|
|
||||||
case value_type_t::UInt32: static_cast<ColumnUInt32 *>(column)->insertDefault(); break;
|
|
||||||
case value_type_t::UInt64: static_cast<ColumnUInt64 *>(column)->insertDefault(); break;
|
|
||||||
case value_type_t::Int8: static_cast<ColumnInt8 *>(column)->insertDefault(); break;
|
|
||||||
case value_type_t::Int16: static_cast<ColumnInt16 *>(column)->insertDefault(); break;
|
|
||||||
case value_type_t::Int32: static_cast<ColumnInt32 *>(column)->insertDefault(); break;
|
|
||||||
case value_type_t::Int64: static_cast<ColumnInt64 *>(column)->insertDefault(); break;
|
|
||||||
case value_type_t::Float32: static_cast<ColumnFloat32 *>(column)->insertDefault(); break;
|
|
||||||
case value_type_t::Float64: static_cast<ColumnFloat64 *>(column)->insertDefault(); break;
|
|
||||||
case value_type_t::String: static_cast<ColumnString *>(column)->insertDefault(); break;
|
|
||||||
case value_type_t::Date: static_cast<ColumnUInt16 *>(column)->insertDefault(); break;
|
|
||||||
case value_type_t::DateTime: static_cast<ColumnUInt32 *>(column)->insertDefault(); break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
mysqlxx::PoolWithFailover::Entry entry;
|
mysqlxx::PoolWithFailover::Entry entry;
|
||||||
@ -184,6 +174,7 @@ private:
|
|||||||
Block sample_block;
|
Block sample_block;
|
||||||
const std::size_t max_block_size;
|
const std::size_t max_block_size;
|
||||||
std::vector<value_type_t> types;
|
std::vector<value_type_t> types;
|
||||||
|
std::vector<const IColumn *> sample_columns;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user