Merge pull request #4629 from bgranvea/simple_aggregate_function

SimpleAggregateFunction data type
This commit is contained in:
alexey-milovidov 2019-05-09 18:45:59 +03:00 committed by GitHub
commit 8f8d2c048e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 486 additions and 115 deletions

View File

@ -1,6 +1,8 @@
#include <DataStreams/AggregatingSortedBlockInputStream.h>
#include <Common/typeid_cast.h>
#include <Common/StringUtils/StringUtils.h>
#include <DataTypes/DataTypeAggregateFunction.h>
#include <DataTypes/DataTypeCustomSimpleAggregateFunction.h>
namespace DB
@ -22,7 +24,7 @@ AggregatingSortedBlockInputStream::AggregatingSortedBlockInputStream(
ColumnWithTypeAndName & column = header.safeGetByPosition(i);
/// We leave only states of aggregate functions.
if (!startsWith(column.type->getName(), "AggregateFunction"))
if (!dynamic_cast<const DataTypeAggregateFunction *>(column.type.get()) && !dynamic_cast<const DataTypeCustomSimpleAggregateFunction *>(column.type->getCustomName()))
{
column_numbers_not_to_aggregate.push_back(i);
continue;
@ -40,7 +42,17 @@ AggregatingSortedBlockInputStream::AggregatingSortedBlockInputStream(
continue;
}
column_numbers_to_aggregate.push_back(i);
if (auto simple_aggr = dynamic_cast<const DataTypeCustomSimpleAggregateFunction *>(column.type->getCustomName()))
{
// simple aggregate function
SimpleAggregateDescription desc{simple_aggr->getFunction(), i};
columns_to_simple_aggregate.emplace_back(std::move(desc));
}
else
{
// standard aggregate function
column_numbers_to_aggregate.push_back(i);
}
}
}
@ -91,7 +103,11 @@ void AggregatingSortedBlockInputStream::merge(MutableColumns & merged_columns, s
/// if there are enough rows accumulated and the last one is calculated completely
if (key_differs && merged_rows >= max_block_size)
{
/// Write the simple aggregation result for the previous group.
insertSimpleAggregationResult(merged_columns);
return;
}
queue.pop();
@ -110,6 +126,14 @@ void AggregatingSortedBlockInputStream::merge(MutableColumns & merged_columns, s
for (auto & column_to_aggregate : columns_to_aggregate)
column_to_aggregate->insertDefault();
/// Write the simple aggregation result for the previous group.
if (merged_rows > 0)
insertSimpleAggregationResult(merged_columns);
/// Reset simple aggregation states for next row
for (auto & desc : columns_to_simple_aggregate)
desc.createState();
++merged_rows;
}
@ -127,6 +151,9 @@ void AggregatingSortedBlockInputStream::merge(MutableColumns & merged_columns, s
}
}
/// Write the simple aggregation result for the previous group.
insertSimpleAggregationResult(merged_columns);
finished = true;
}
@ -138,6 +165,21 @@ void AggregatingSortedBlockInputStream::addRow(SortCursor & cursor)
size_t j = column_numbers_to_aggregate[i];
columns_to_aggregate[i]->insertMergeFrom(*cursor->all_columns[j], cursor->pos);
}
for (auto & desc : columns_to_simple_aggregate)
{
auto & col = cursor->all_columns[desc.column_number];
desc.add_function(desc.function.get(), desc.state.data(), &col, cursor->pos, nullptr);
}
}
void AggregatingSortedBlockInputStream::insertSimpleAggregationResult(MutableColumns & merged_columns)
{
for (auto & desc : columns_to_simple_aggregate)
{
desc.function->insertResultInto(desc.state.data(), *merged_columns[desc.column_number]);
desc.destroyState();
}
}
}

View File

@ -7,6 +7,7 @@
#include <DataStreams/MergingSortedBlockInputStream.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <Columns/ColumnAggregateFunction.h>
#include <Common/AlignedBuffer.h>
namespace DB
@ -38,10 +39,13 @@ private:
/// Read finished.
bool finished = false;
struct SimpleAggregateDescription;
/// Columns with which numbers should be aggregated.
ColumnNumbers column_numbers_to_aggregate;
ColumnNumbers column_numbers_not_to_aggregate;
std::vector<ColumnAggregateFunction *> columns_to_aggregate;
std::vector<SimpleAggregateDescription> columns_to_simple_aggregate;
RowRef current_key; /// The current primary key.
RowRef next_key; /// The primary key of the next row.
@ -54,6 +58,53 @@ private:
/** Extract all states of aggregate functions and merge them with the current group.
*/
void addRow(SortCursor & cursor);
/** Insert all values of current row for simple aggregate functions
*/
void insertSimpleAggregationResult(MutableColumns & merged_columns);
/// Stores information for aggregation of SimpleAggregateFunction columns
struct SimpleAggregateDescription
{
/// An aggregate function 'anyLast', 'sum'...
AggregateFunctionPtr function;
IAggregateFunction::AddFunc add_function;
size_t column_number;
AlignedBuffer state;
bool created = false;
SimpleAggregateDescription(const AggregateFunctionPtr & function_, const size_t column_number_) : function(function_), column_number(column_number_)
{
add_function = function->getAddressOfAddFunction();
state.reset(function->sizeOfData(), function->alignOfData());
}
void createState()
{
if (created)
return;
function->create(state.data());
created = true;
}
void destroyState()
{
if (!created)
return;
function->destroy(state.data());
created = false;
}
/// Explicitly destroy aggregation state if the stream is terminated
~SimpleAggregateDescription()
{
destroyState();
}
SimpleAggregateDescription() = default;
SimpleAggregateDescription(SimpleAggregateDescription &&) = default;
SimpleAggregateDescription(const SimpleAggregateDescription &) = delete;
};
};
}

View File

@ -1,6 +1,8 @@
#pragma once
#include <memory>
#include <cstddef>
#include <Core/Types.h>
namespace DB
{
@ -10,21 +12,21 @@ class WriteBuffer;
struct FormatSettings;
class IColumn;
/** Further refinment of the properties of data type.
*
* Contains methods for serialization/deserialization.
* Implementations of this interface represent a data type domain (example: IPv4)
* which is a refinement of the exsitgin type with a name and specific text
* representation.
*
* IDataTypeDomain is totally immutable object. You can always share them.
/** Allow to customize an existing data type and set a different name and/or text serialization/deserialization methods.
* See use in IPv4 and IPv6 data types, and also in SimpleAggregateFunction.
*/
class IDataTypeDomain
class IDataTypeCustomName
{
public:
virtual ~IDataTypeDomain() {}
virtual ~IDataTypeCustomName() {}
virtual const char* getName() const = 0;
virtual String getName() const = 0;
};
class IDataTypeCustomTextSerialization
{
public:
virtual ~IDataTypeCustomTextSerialization() {}
/** Text serialization for displaying on a terminal or saving into a text file, and the like.
* Without escaping or quoting.
@ -56,4 +58,31 @@ public:
virtual void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const = 0;
};
using DataTypeCustomNamePtr = std::unique_ptr<const IDataTypeCustomName>;
using DataTypeCustomTextSerializationPtr = std::unique_ptr<const IDataTypeCustomTextSerialization>;
/** Describe a data type customization
*/
struct DataTypeCustomDesc
{
DataTypeCustomNamePtr name;
DataTypeCustomTextSerializationPtr text_serialization;
DataTypeCustomDesc(DataTypeCustomNamePtr name_, DataTypeCustomTextSerializationPtr text_serialization_)
: name(std::move(name_)), text_serialization(std::move(text_serialization_)) {}
};
using DataTypeCustomDescPtr = std::unique_ptr<DataTypeCustomDesc>;
/** A simple implementation of IDataTypeCustomName
*/
class DataTypeCustomFixedName : public IDataTypeCustomName
{
private:
String name;
public:
DataTypeCustomFixedName(String name_) : name(name_) {}
String getName() const override { return name; }
};
} // namespace DB

View File

@ -1,9 +1,9 @@
#include <Columns/ColumnsNumber.h>
#include <Common/Exception.h>
#include <Common/formatIPv6.h>
#include <DataTypes/DataTypeDomainWithSimpleSerialization.h>
#include <DataTypes/DataTypeCustomSimpleTextSerialization.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/IDataTypeDomain.h>
#include <DataTypes/DataTypeCustom.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/FunctionsCoding.h>
@ -20,20 +20,15 @@ namespace ErrorCodes
namespace
{
class DataTypeDomainIPv4 : public DataTypeDomainWithSimpleSerialization
class DataTypeCustomIPv4Serialization : public DataTypeCustomSimpleTextSerialization
{
public:
const char * getName() const override
{
return "IPv4";
}
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override
{
const auto col = checkAndGetColumn<ColumnUInt32>(&column);
if (!col)
{
throw Exception(String(getName()) + " domain can only serialize columns of type UInt32." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
throw Exception("IPv4 type can only serialize columns of type UInt32." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
}
char buffer[IPV4_MAX_TEXT_LENGTH + 1] = {'\0'};
@ -48,7 +43,7 @@ public:
ColumnUInt32 * col = typeid_cast<ColumnUInt32 *>(&column);
if (!col)
{
throw Exception(String(getName()) + " domain can only deserialize columns of type UInt32." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
throw Exception("IPv4 type can only deserialize columns of type UInt32." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
}
char buffer[IPV4_MAX_TEXT_LENGTH + 1] = {'\0'};
@ -63,20 +58,16 @@ public:
}
};
class DataTypeDomainIPv6 : public DataTypeDomainWithSimpleSerialization
class DataTypeCustomIPv6Serialization : public DataTypeCustomSimpleTextSerialization
{
public:
const char * getName() const override
{
return "IPv6";
}
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override
{
const auto col = checkAndGetColumn<ColumnFixedString>(&column);
if (!col)
{
throw Exception(String(getName()) + " domain can only serialize columns of type FixedString(16)." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
throw Exception("IPv6 type domain can only serialize columns of type FixedString(16)." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
}
char buffer[IPV6_MAX_TEXT_LENGTH + 1] = {'\0'};
@ -91,7 +82,7 @@ public:
ColumnFixedString * col = typeid_cast<ColumnFixedString *>(&column);
if (!col)
{
throw Exception(String(getName()) + " domain can only deserialize columns of type FixedString(16)." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
throw Exception("IPv6 type domain can only deserialize columns of type FixedString(16)." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
}
char buffer[IPV6_MAX_TEXT_LENGTH + 1] = {'\0'};
@ -100,7 +91,7 @@ public:
std::string ipv6_value(IPV6_BINARY_LENGTH, '\0');
if (!parseIPv6(buffer, reinterpret_cast<unsigned char *>(ipv6_value.data())))
{
throw Exception(String("Invalid ") + getName() + " value.", ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING);
throw Exception("Invalid IPv6 value.", ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING);
}
col->insertString(ipv6_value);
@ -111,8 +102,17 @@ public:
void registerDataTypeDomainIPv4AndIPv6(DataTypeFactory & factory)
{
factory.registerDataTypeDomain("UInt32", std::make_unique<DataTypeDomainIPv4>());
factory.registerDataTypeDomain("FixedString(16)", std::make_unique<DataTypeDomainIPv6>());
factory.registerSimpleDataTypeCustom("IPv4", []
{
return std::make_pair(DataTypeFactory::instance().get("UInt32"),
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeCustomFixedName>("IPv4"), std::make_unique<DataTypeCustomIPv4Serialization>()));
});
factory.registerSimpleDataTypeCustom("IPv6", []
{
return std::make_pair(DataTypeFactory::instance().get("FixedString(16)"),
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeCustomFixedName>("IPv6"), std::make_unique<DataTypeCustomIPv6Serialization>()));
});
}
} // namespace DB

View File

@ -0,0 +1,137 @@
#include <Common/FieldVisitors.h>
#include <Common/typeid_cast.h>
#include <IO/ReadHelpers.h>
#include <Columns/ColumnAggregateFunction.h>
#include <DataTypes/DataTypeCustomSimpleAggregateFunction.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeFactory.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTIdentifier.h>
#include <boost/algorithm/string/join.hpp>
namespace DB
{
namespace ErrorCodes
{
extern const int SYNTAX_ERROR;
extern const int BAD_ARGUMENTS;
extern const int PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int LOGICAL_ERROR;
}
static const std::vector<String> supported_functions{"any", "anyLast", "min", "max", "sum"};
String DataTypeCustomSimpleAggregateFunction::getName() const
{
std::stringstream stream;
stream << "SimpleAggregateFunction(" << function->getName();
if (!parameters.empty())
{
stream << "(";
for (size_t i = 0; i < parameters.size(); ++i)
{
if (i)
stream << ", ";
stream << applyVisitor(DB::FieldVisitorToString(), parameters[i]);
}
stream << ")";
}
for (const auto & argument_type : argument_types)
stream << ", " << argument_type->getName();
stream << ")";
return stream.str();
}
static std::pair<DataTypePtr, DataTypeCustomDescPtr> create(const ASTPtr & arguments)
{
String function_name;
AggregateFunctionPtr function;
DataTypes argument_types;
Array params_row;
if (!arguments || arguments->children.empty())
throw Exception("Data type SimpleAggregateFunction requires parameters: "
"name of aggregate function and list of data types for arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
if (const ASTFunction * parametric = arguments->children[0]->as<ASTFunction>())
{
if (parametric->parameters)
throw Exception("Unexpected level of parameters to aggregate function", ErrorCodes::SYNTAX_ERROR);
function_name = parametric->name;
const ASTs & parameters = parametric->arguments->as<ASTExpressionList &>().children;
params_row.resize(parameters.size());
for (size_t i = 0; i < parameters.size(); ++i)
{
const ASTLiteral * lit = parameters[i]->as<ASTLiteral>();
if (!lit)
throw Exception("Parameters to aggregate functions must be literals",
ErrorCodes::PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS);
params_row[i] = lit->value;
}
}
else if (auto opt_name = getIdentifierName(arguments->children[0]))
{
function_name = *opt_name;
}
else if (arguments->children[0]->as<ASTLiteral>())
{
throw Exception("Aggregate function name for data type SimpleAggregateFunction must be passed as identifier (without quotes) or function",
ErrorCodes::BAD_ARGUMENTS);
}
else
throw Exception("Unexpected AST element passed as aggregate function name for data type SimpleAggregateFunction. Must be identifier or function.",
ErrorCodes::BAD_ARGUMENTS);
for (size_t i = 1; i < arguments->children.size(); ++i)
argument_types.push_back(DataTypeFactory::instance().get(arguments->children[i]));
if (function_name.empty())
throw Exception("Logical error: empty name of aggregate function passed", ErrorCodes::LOGICAL_ERROR);
function = AggregateFunctionFactory::instance().get(function_name, argument_types, params_row);
// check function
if (std::find(std::begin(supported_functions), std::end(supported_functions), function->getName()) == std::end(supported_functions))
{
throw Exception("Unsupported aggregate function " + function->getName() + ", supported functions are " + boost::algorithm::join(supported_functions, ","),
ErrorCodes::BAD_ARGUMENTS);
}
DataTypePtr storage_type = DataTypeFactory::instance().get(argument_types[0]->getName());
if (!function->getReturnType()->equals(*removeLowCardinality(storage_type)))
{
throw Exception("Incompatible data types between aggregate function '" + function->getName() + "' which returns " + function->getReturnType()->getName() + " and column storage type " + storage_type->getName(),
ErrorCodes::BAD_ARGUMENTS);
}
DataTypeCustomNamePtr custom_name = std::make_unique<DataTypeCustomSimpleAggregateFunction>(function, argument_types, params_row);
return std::make_pair(storage_type, std::make_unique<DataTypeCustomDesc>(std::move(custom_name), nullptr));
}
void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory)
{
factory.registerDataTypeCustom("SimpleAggregateFunction", create);
}
}

View File

@ -0,0 +1,42 @@
#pragma once
#include <DataTypes/DataTypeCustom.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <Common/FieldVisitors.h>
#include <IO/ReadHelpers.h>
namespace DB
{
/** The type SimpleAggregateFunction(fct, type) is meant to be used in an AggregatingMergeTree. It behaves like a standard
* data type but when rows are merged, an aggregation function is applied.
*
* The aggregation function is limited to simple functions whose merge state is the final result:
* any, anyLast, min, max, sum
*
* Examples:
*
* SimpleAggregateFunction(sum, Nullable(Float64))
* SimpleAggregateFunction(anyLast, LowCardinality(Nullable(String)))
* SimpleAggregateFunction(anyLast, IPv4)
*
* Technically, a standard IDataType is instanciated and customized with IDataTypeCustomName and DataTypeCustomDesc.
*/
class DataTypeCustomSimpleAggregateFunction : public IDataTypeCustomName
{
private:
const AggregateFunctionPtr function;
const DataTypes argument_types;
const Array parameters;
public:
DataTypeCustomSimpleAggregateFunction(const AggregateFunctionPtr & function_, const DataTypes & argument_types_, const Array & parameters_)
: function(function_), argument_types(argument_types_), parameters(parameters_) {}
const AggregateFunctionPtr getFunction() const { return function; }
String getName() const override;
};
}

View File

@ -1,4 +1,4 @@
#include <DataTypes/DataTypeDomainWithSimpleSerialization.h>
#include <DataTypes/DataTypeCustomSimpleTextSerialization.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
@ -9,7 +9,7 @@ namespace
{
using namespace DB;
static String serializeToString(const DataTypeDomainWithSimpleSerialization & domain, const IColumn & column, size_t row_num, const FormatSettings & settings)
static String serializeToString(const DataTypeCustomSimpleTextSerialization & domain, const IColumn & column, size_t row_num, const FormatSettings & settings)
{
WriteBufferFromOwnString buffer;
domain.serializeText(column, row_num, buffer, settings);
@ -17,7 +17,7 @@ static String serializeToString(const DataTypeDomainWithSimpleSerialization & do
return buffer.str();
}
static void deserializeFromString(const DataTypeDomainWithSimpleSerialization & domain, IColumn & column, const String & s, const FormatSettings & settings)
static void deserializeFromString(const DataTypeCustomSimpleTextSerialization & domain, IColumn & column, const String & s, const FormatSettings & settings)
{
ReadBufferFromString istr(s);
domain.deserializeText(column, istr, settings);
@ -28,59 +28,59 @@ static void deserializeFromString(const DataTypeDomainWithSimpleSerialization &
namespace DB
{
DataTypeDomainWithSimpleSerialization::~DataTypeDomainWithSimpleSerialization()
DataTypeCustomSimpleTextSerialization::~DataTypeCustomSimpleTextSerialization()
{
}
void DataTypeDomainWithSimpleSerialization::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
void DataTypeCustomSimpleTextSerialization::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeEscapedString(serializeToString(*this, column, row_num, settings), ostr);
}
void DataTypeDomainWithSimpleSerialization::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
void DataTypeCustomSimpleTextSerialization::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String str;
readEscapedString(str, istr);
deserializeFromString(*this, column, str, settings);
}
void DataTypeDomainWithSimpleSerialization::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
void DataTypeCustomSimpleTextSerialization::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeQuotedString(serializeToString(*this, column, row_num, settings), ostr);
}
void DataTypeDomainWithSimpleSerialization::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
void DataTypeCustomSimpleTextSerialization::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String str;
readQuotedString(str, istr);
deserializeFromString(*this, column, str, settings);
}
void DataTypeDomainWithSimpleSerialization::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
void DataTypeCustomSimpleTextSerialization::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeCSVString(serializeToString(*this, column, row_num, settings), ostr);
}
void DataTypeDomainWithSimpleSerialization::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
void DataTypeCustomSimpleTextSerialization::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String str;
readCSVString(str, istr, settings.csv);
deserializeFromString(*this, column, str, settings);
}
void DataTypeDomainWithSimpleSerialization::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
void DataTypeCustomSimpleTextSerialization::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeJSONString(serializeToString(*this, column, row_num, settings), ostr, settings);
}
void DataTypeDomainWithSimpleSerialization::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
void DataTypeCustomSimpleTextSerialization::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String str;
readJSONString(str, istr);
deserializeFromString(*this, column, str, settings);
}
void DataTypeDomainWithSimpleSerialization::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
void DataTypeCustomSimpleTextSerialization::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeXMLString(serializeToString(*this, column, row_num, settings), ostr);
}

View File

@ -1,6 +1,6 @@
#pragma once
#include <DataTypes/IDataTypeDomain.h>
#include <DataTypes/DataTypeCustom.h>
namespace DB
{
@ -10,12 +10,12 @@ class WriteBuffer;
struct FormatSettings;
class IColumn;
/** Simple DataTypeDomain that uses serializeText/deserializeText
/** Simple IDataTypeCustomTextSerialization that uses serializeText/deserializeText
* for all serialization and deserialization. */
class DataTypeDomainWithSimpleSerialization : public IDataTypeDomain
class DataTypeCustomSimpleTextSerialization : public IDataTypeCustomTextSerialization
{
public:
virtual ~DataTypeDomainWithSimpleSerialization() override;
virtual ~DataTypeCustomSimpleTextSerialization() override;
// Methods that subclasses must override in order to get full serialization/deserialization support.
virtual void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override = 0;

View File

@ -1,5 +1,5 @@
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/IDataTypeDomain.h>
#include <DataTypes/DataTypeCustom.h>
#include <Parsers/parseQuery.h>
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/ASTFunction.h>
@ -115,19 +115,23 @@ void DataTypeFactory::registerSimpleDataType(const String & name, SimpleCreator
}, case_sensitiveness);
}
void DataTypeFactory::registerDataTypeDomain(const String & type_name, DataTypeDomainPtr domain, CaseSensitiveness case_sensitiveness)
void DataTypeFactory::registerDataTypeCustom(const String & family_name, CreatorWithCustom creator, CaseSensitiveness case_sensitiveness)
{
all_domains.reserve(all_domains.size() + 1);
auto data_type = get(type_name);
setDataTypeDomain(*data_type, *domain);
registerDataType(domain->getName(), [data_type](const ASTPtr & /*ast*/)
registerDataType(family_name, [creator](const ASTPtr & ast)
{
return data_type;
}, case_sensitiveness);
auto res = creator(ast);
res.first->setCustomization(std::move(res.second));
all_domains.emplace_back(std::move(domain));
return res.first;
}, case_sensitiveness);
}
void DataTypeFactory::registerSimpleDataTypeCustom(const String &name, SimpleCreatorWithCustom creator, CaseSensitiveness case_sensitiveness)
{
registerDataTypeCustom(name, [creator](const ASTPtr & /*ast*/)
{
return creator();
}, case_sensitiveness);
}
const DataTypeFactory::Creator& DataTypeFactory::findCreatorByName(const String & family_name) const
@ -153,11 +157,6 @@ const DataTypeFactory::Creator& DataTypeFactory::findCreatorByName(const String
throw Exception("Unknown data type family: " + family_name, ErrorCodes::UNKNOWN_TYPE);
}
void DataTypeFactory::setDataTypeDomain(const IDataType & data_type, const IDataTypeDomain & domain)
{
data_type.setDomain(&domain);
}
void registerDataTypeNumbers(DataTypeFactory & factory);
void registerDataTypeDecimal(DataTypeFactory & factory);
void registerDataTypeDate(DataTypeFactory & factory);
@ -175,6 +174,7 @@ void registerDataTypeNested(DataTypeFactory & factory);
void registerDataTypeInterval(DataTypeFactory & factory);
void registerDataTypeLowCardinality(DataTypeFactory & factory);
void registerDataTypeDomainIPv4AndIPv6(DataTypeFactory & factory);
void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory);
DataTypeFactory::DataTypeFactory()
@ -196,6 +196,7 @@ DataTypeFactory::DataTypeFactory()
registerDataTypeInterval(*this);
registerDataTypeLowCardinality(*this);
registerDataTypeDomainIPv4AndIPv6(*this);
registerDataTypeDomainSimpleAggregateFunction(*this);
}
DataTypeFactory::~DataTypeFactory()

View File

@ -17,9 +17,6 @@ namespace DB
class IDataType;
using DataTypePtr = std::shared_ptr<const IDataType>;
class IDataTypeDomain;
using DataTypeDomainPtr = std::unique_ptr<const IDataTypeDomain>;
/** Creates a data type by name of data type family and parameters.
*/
@ -28,6 +25,8 @@ class DataTypeFactory final : public ext::singleton<DataTypeFactory>, public IFa
private:
using SimpleCreator = std::function<DataTypePtr()>;
using DataTypesDictionary = std::unordered_map<String, Creator>;
using CreatorWithCustom = std::function<std::pair<DataTypePtr,DataTypeCustomDescPtr>(const ASTPtr & parameters)>;
using SimpleCreatorWithCustom = std::function<std::pair<DataTypePtr,DataTypeCustomDescPtr>()>;
public:
DataTypePtr get(const String & full_name) const;
@ -40,11 +39,13 @@ public:
/// Register a simple data type, that have no parameters.
void registerSimpleDataType(const String & name, SimpleCreator creator, CaseSensitiveness case_sensitiveness = CaseSensitive);
// Register a domain - a refinement of existing type.
void registerDataTypeDomain(const String & type_name, DataTypeDomainPtr domain, CaseSensitiveness case_sensitiveness = CaseSensitive);
/// Register a customized type family
void registerDataTypeCustom(const String & family_name, CreatorWithCustom creator, CaseSensitiveness case_sensitiveness = CaseSensitive);
/// Register a simple customized data type
void registerSimpleDataTypeCustom(const String & name, SimpleCreatorWithCustom creator, CaseSensitiveness case_sensitiveness = CaseSensitive);
private:
static void setDataTypeDomain(const IDataType & data_type, const IDataTypeDomain & domain);
const Creator& findCreatorByName(const String & family_name) const;
private:
@ -53,9 +54,6 @@ private:
/// Case insensitive data types will be additionally added here with lowercased name.
DataTypesDictionary case_insensitive_data_types;
// All domains are owned by factory and shared amongst DataType instances.
std::vector<DataTypeDomainPtr> all_domains;
DataTypeFactory();
~DataTypeFactory() override;

View File

@ -9,7 +9,7 @@
#include <IO/WriteHelpers.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/IDataTypeDomain.h>
#include <DataTypes/DataTypeCustom.h>
#include <DataTypes/NestedUtils.h>
@ -23,8 +23,7 @@ namespace ErrorCodes
extern const int DATA_TYPE_CANNOT_BE_PROMOTED;
}
IDataType::IDataType()
: domain(nullptr)
IDataType::IDataType() : custom_name(nullptr), custom_text_serialization(nullptr)
{
}
@ -34,9 +33,9 @@ IDataType::~IDataType()
String IDataType::getName() const
{
if (domain)
if (custom_name)
{
return domain->getName();
return custom_name->getName();
}
else
{
@ -142,9 +141,9 @@ void IDataType::insertDefaultInto(IColumn & column) const
void IDataType::serializeAsTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
if (domain)
if (custom_text_serialization)
{
domain->serializeTextEscaped(column, row_num, ostr, settings);
custom_text_serialization->serializeTextEscaped(column, row_num, ostr, settings);
}
else
{
@ -154,9 +153,9 @@ void IDataType::serializeAsTextEscaped(const IColumn & column, size_t row_num, W
void IDataType::deserializeAsTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
if (domain)
if (custom_text_serialization)
{
domain->deserializeTextEscaped(column, istr, settings);
custom_text_serialization->deserializeTextEscaped(column, istr, settings);
}
else
{
@ -166,9 +165,9 @@ void IDataType::deserializeAsTextEscaped(IColumn & column, ReadBuffer & istr, co
void IDataType::serializeAsTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
if (domain)
if (custom_text_serialization)
{
domain->serializeTextQuoted(column, row_num, ostr, settings);
custom_text_serialization->serializeTextQuoted(column, row_num, ostr, settings);
}
else
{
@ -178,9 +177,9 @@ void IDataType::serializeAsTextQuoted(const IColumn & column, size_t row_num, Wr
void IDataType::deserializeAsTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
if (domain)
if (custom_text_serialization)
{
domain->deserializeTextQuoted(column, istr, settings);
custom_text_serialization->deserializeTextQuoted(column, istr, settings);
}
else
{
@ -190,9 +189,9 @@ void IDataType::deserializeAsTextQuoted(IColumn & column, ReadBuffer & istr, con
void IDataType::serializeAsTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
if (domain)
if (custom_text_serialization)
{
domain->serializeTextCSV(column, row_num, ostr, settings);
custom_text_serialization->serializeTextCSV(column, row_num, ostr, settings);
}
else
{
@ -202,9 +201,9 @@ void IDataType::serializeAsTextCSV(const IColumn & column, size_t row_num, Write
void IDataType::deserializeAsTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
if (domain)
if (custom_text_serialization)
{
domain->deserializeTextCSV(column, istr, settings);
custom_text_serialization->deserializeTextCSV(column, istr, settings);
}
else
{
@ -214,9 +213,9 @@ void IDataType::deserializeAsTextCSV(IColumn & column, ReadBuffer & istr, const
void IDataType::serializeAsText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
if (domain)
if (custom_text_serialization)
{
domain->serializeText(column, row_num, ostr, settings);
custom_text_serialization->serializeText(column, row_num, ostr, settings);
}
else
{
@ -226,9 +225,9 @@ void IDataType::serializeAsText(const IColumn & column, size_t row_num, WriteBuf
void IDataType::serializeAsTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
if (domain)
if (custom_text_serialization)
{
domain->serializeTextJSON(column, row_num, ostr, settings);
custom_text_serialization->serializeTextJSON(column, row_num, ostr, settings);
}
else
{
@ -238,9 +237,9 @@ void IDataType::serializeAsTextJSON(const IColumn & column, size_t row_num, Writ
void IDataType::deserializeAsTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
if (domain)
if (custom_text_serialization)
{
domain->deserializeTextJSON(column, istr, settings);
custom_text_serialization->deserializeTextJSON(column, istr, settings);
}
else
{
@ -250,9 +249,9 @@ void IDataType::deserializeAsTextJSON(IColumn & column, ReadBuffer & istr, const
void IDataType::serializeAsTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
if (domain)
if (custom_text_serialization)
{
domain->serializeTextXML(column, row_num, ostr, settings);
custom_text_serialization->serializeTextXML(column, row_num, ostr, settings);
}
else
{
@ -260,13 +259,14 @@ void IDataType::serializeAsTextXML(const IColumn & column, size_t row_num, Write
}
}
void IDataType::setDomain(const IDataTypeDomain* const new_domain) const
void IDataType::setCustomization(DataTypeCustomDescPtr custom_desc_) const
{
if (domain != nullptr)
{
throw Exception("Type " + getName() + " already has a domain.", ErrorCodes::LOGICAL_ERROR);
}
domain = new_domain;
/// replace only if not null
if (custom_desc_->name)
custom_name = std::move(custom_desc_->name);
if (custom_desc_->text_serialization)
custom_text_serialization = std::move(custom_desc_->text_serialization);
}
}

View File

@ -4,6 +4,7 @@
#include <Common/COW.h>
#include <boost/noncopyable.hpp>
#include <Core/Field.h>
#include <DataTypes/DataTypeCustom.h>
namespace DB
@ -12,7 +13,6 @@ namespace DB
class ReadBuffer;
class WriteBuffer;
class IDataTypeDomain;
class IDataType;
struct FormatSettings;
@ -459,18 +459,19 @@ public:
private:
friend class DataTypeFactory;
/** Sets domain on existing DataType, can be considered as second phase
* of construction explicitly done by DataTypeFactory.
* Will throw an exception if domain is already set.
/** Customize this DataType
*/
void setDomain(const IDataTypeDomain* newDomain) const;
void setCustomization(DataTypeCustomDescPtr custom_desc_) const;
private:
/** This is mutable to allow setting domain on `const IDataType` post construction,
* simplifying creation of domains for all types, without them even knowing
* of domain existence.
/** This is mutable to allow setting custom name and serialization on `const IDataType` post construction.
*/
mutable IDataTypeDomain const* domain;
mutable DataTypeCustomNamePtr custom_name;
mutable DataTypeCustomTextSerializationPtr custom_text_serialization;
public:
const IDataTypeCustomName * getCustomName() const { return custom_name.get(); }
const IDataTypeCustomTextSerialization * getCustomTextSerialization() const { return custom_text_serialization.get(); }
};

View File

@ -0,0 +1,43 @@
0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
SimpleAggregateFunction(sum, Float64)
0 0
1 2
2 4
3 6
4 8
5 10
6 12
7 14
8 16
9 18
0 0
1 2
2 4
3 6
4 8
5 10
6 12
7 14
8 16
9 18
1 1 2 2.2.2.2
SimpleAggregateFunction(anyLast, Nullable(String)) SimpleAggregateFunction(anyLast, LowCardinality(Nullable(String))) SimpleAggregateFunction(anyLast, IPv4)

View File

@ -0,0 +1,27 @@
-- basic test
drop table if exists test.simple;
create table test.simple (id UInt64,val SimpleAggregateFunction(sum,Double)) engine=AggregatingMergeTree order by id;
insert into test.simple select number,number from system.numbers limit 10;
select * from test.simple;
select * from test.simple final;
select toTypeName(val) from test.simple limit 1;
-- merge
insert into test.simple select number,number from system.numbers limit 10;
select * from test.simple final;
optimize table test.simple final;
select * from test.simple;
-- complex types
drop table if exists test.simple;
create table test.simple (id UInt64,nullable_str SimpleAggregateFunction(anyLast,Nullable(String)),low_str SimpleAggregateFunction(anyLast,LowCardinality(Nullable(String))),ip SimpleAggregateFunction(anyLast,IPv4)) engine=AggregatingMergeTree order by id;
insert into test.simple values(1,'1','1','1.1.1.1');
insert into test.simple values(1,null,'2','2.2.2.2');
select * from test.simple final;
select toTypeName(nullable_str),toTypeName(low_str),toTypeName(ip) from test.simple limit 1;