This commit is contained in:
artpaul 2017-04-24 15:04:51 +05:00 committed by alexey-milovidov
parent 323a9aff6d
commit fd9d613dc9
7 changed files with 157 additions and 77 deletions

View File

@ -0,0 +1,86 @@
#include <DataStreams/CastEnumBlockInputStream.h>
#include <DataTypes/DataTypeEnum.h>
namespace DB
{
CastEnumBlockInputStream::CastEnumBlockInputStream(
BlockInputStreamPtr input_,
const Block & in_sample_,
const Block & out_sample_)
{
collectEnums(in_sample_, out_sample_);
children.push_back(input_);
}
String CastEnumBlockInputStream::getName() const
{
return "CastEnumBlockInputStream";
}
String CastEnumBlockInputStream::getID() const
{
std::stringstream res;
res << "CastEnumBlockInputStream(" << children.back()->getID() << ")";
return res.str();
}
Block CastEnumBlockInputStream::readImpl()
{
Block block = children.back()->read();
if (!block || enum_types.empty())
return block;
Block res;
size_t s = block.columns();
for (size_t i = 0; i < s; ++i)
{
const auto & elem = block.getByPosition(i);
if (bool(enum_types[i]))
{
const auto & type = static_cast<const IDataTypeEnum*>(enum_types[i]->type.get());
ColumnPtr new_column = type->createColumn();
for (size_t j = 0; j < elem.column->size(); ++j)
new_column->insert(type->castToValue((*elem.column)[j]));
res.insert({
new_column,
enum_types[i]->type,
enum_types[i]->name}
);
}
else
{
res.insert(elem);
}
}
return res;
}
void CastEnumBlockInputStream::collectEnums(const Block & in_sample, const Block & out_sample)
{
size_t in_size = in_sample.columns();
for (size_t i = 0; i < in_size; ++i)
{
const auto & in_elem = in_sample.getByPosition(i);
const auto & out_elem = out_sample.getByPosition(i);
/// Force conversion only if source type is not Enum.
if ( dynamic_cast<IDataTypeEnum*>(out_elem.type.get()) &&
!dynamic_cast<IDataTypeEnum*>(in_elem.type.get()))
{
enum_types.push_back(NameAndTypePair(out_elem.name, out_elem.type));
}
else
{
enum_types.push_back(std::experimental::nullopt);
}
}
}
}

View File

@ -1,7 +1,6 @@
#pragma once
#include <DataStreams/IProfilingBlockInputStream.h>
#include <DataTypes/DataTypeEnum.h>
#include <experimental/optional>
#include <vector>
@ -9,85 +8,23 @@
namespace DB
{
/// Implicitly converts string and numeric values to Enum.
class CastEnumBlockInputStream : public IProfilingBlockInputStream
{
public:
CastEnumBlockInputStream(BlockInputStreamPtr input_,
const Block & in_sample_,
const Block & out_sample_)
{
collectEnums(in_sample_, out_sample_);
children.push_back(input_);
}
const Block & out_sample_);
String getName() const override { return "CastEnumBlockInputStream"; }
String getName() const override;
String getID() const override
{
std::stringstream res;
res << "CastEnumBlockInputStream(" << children.back()->getID() << ")";
return res.str();
}
String getID() const override;
protected:
Block readImpl() override
{
Block block = children.back()->read();
if (!block || enum_types.empty())
return block;
Block res;
size_t s = block.columns();
for (size_t i = 0; i < s; ++i)
{
const auto & elem = block.getByPosition(i);
if (bool(enum_types[i]))
{
const auto & type = static_cast<const IDataTypeEnum*>(enum_types[i]->type.get());
ColumnPtr new_column = type->createColumn();
for (size_t j = 0; j < elem.column->size(); ++j)
new_column->insert(type->castToValue((*elem.column)[j]));
res.insert({
new_column,
enum_types[i]->type,
enum_types[i]->name}
);
}
else
{
res.insert(elem);
}
}
return res;
}
Block readImpl() override;
private:
void collectEnums(const Block & in_sample, const Block & out_sample)
{
size_t in_size = in_sample.columns();
for (size_t i = 0; i < in_size; ++i)
{
const auto & in_elem = in_sample.getByPosition(i);
const auto & out_elem = out_sample.getByPosition(i);
/// Force conversion only if source type is not Enum.
if ( dynamic_cast<IDataTypeEnum*>(out_elem.type.get()) &&
!dynamic_cast<IDataTypeEnum*>(in_elem.type.get()))
{
enum_types.push_back(NameAndTypePair(out_elem.name, out_elem.type));
}
else
{
enum_types.push_back(std::experimental::nullopt);
}
}
}
void collectEnums(const Block & in_sample, const Block & out_sample);
private:
std::vector<std::experimental::optional<NameAndTypePair>> enum_types;

View File

@ -1,6 +1,7 @@
#include <DataStreams/NullableAdapterBlockInputStream.h>
#include <Columns/ColumnNullable.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataStreams/isConvertableTypes.h>
namespace DB
{
@ -13,13 +14,6 @@ extern const int TYPE_MISMATCH;
}
static DataTypePtr removeNullable(DataTypePtr type)
{
while (type->isNullable())
type = typeid_cast<DataTypeNullable *>(type.get())->getNestedType();
return type;
}
NullableAdapterBlockInputStream::NullableAdapterBlockInputStream(
BlockInputStreamPtr input_,
const Block & in_sample_, const Block & out_sample_)
@ -109,7 +103,7 @@ void NullableAdapterBlockInputStream::buildActions(
const auto & in_elem = in_sample.getByPosition(i);
const auto & out_elem = out_sample.getByPosition(i);
if (removeNullable(in_elem.type)->getName() == removeNullable(out_elem.type)->getName())
if (isConvertableTypes(in_elem.type, out_elem.type))
{
bool is_in_nullable = in_elem.type->isNullable();
bool is_out_nullable = out_elem.type->isNullable();

View File

@ -0,0 +1,34 @@
#include <DataStreams/isConvertableTypes.h>
#include <DataTypes/DataTypeEnum.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeString.h>
namespace DB
{
static DataTypePtr removeNullable(DataTypePtr type)
{
while (type->isNullable())
type = typeid_cast<DataTypeNullable *>(type.get())->getNestedType();
return type;
}
bool isConvertableTypes(const DataTypePtr & from, const DataTypePtr & to)
{
auto from_nn = removeNullable(from);
auto to_nn = removeNullable(to);
if ( dynamic_cast<IDataTypeEnum*>(to_nn.get()) &&
!dynamic_cast<IDataTypeEnum*>(from_nn.get()))
{
if (dynamic_cast<DataTypeString*>(from_nn.get()))
return true;
if (from_nn->isNumeric())
return true;
}
return from_nn->getName() == to_nn->getName();
}
}

View File

@ -0,0 +1,11 @@
#pragma once
#include <DataTypes/IDataType.h>
namespace DB
{
/// Check that type 'from' can be implicitly converted to type 'to'.
bool isConvertableTypes(const DataTypePtr & from, const DataTypePtr & to);
}

View File

@ -0,0 +1,4 @@
session 2017-01-01 0
session 2017-01-01 1
pageview 2017-01-01 0
pageview 2017-01-01 1

View File

@ -0,0 +1,14 @@
DROP TABLE IF EXISTS test.cast_enums;
CREATE TABLE test.cast_enums
(
type Enum8('session' = 1, 'pageview' = 2, 'click' = 3),
date Date,
id UInt64
) ENGINE = MergeTree(date, (type, date, id), 8192);
INSERT INTO test.cast_enums SELECT 'session' AS type, toDate('2017-01-01') AS date, number AS id FROM system.numbers LIMIT 2;
INSERT INTO test.cast_enums SELECT 2 AS type, toDate('2017-01-01') AS date, number AS id FROM system.numbers LIMIT 2;
SELECT type, date, id FROM test.cast_enums ORDER BY type, id;
DROP TABLE IF EXISTS test.cast_enums;