Merge pull request #57887 from canhld94/ch_fast_enum_default_serialization

More efficient constructor for SerializationEnum
This commit is contained in:
Robert Schulze 2023-12-19 12:51:23 +01:00 committed by GitHub
commit f503aa7838
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 36 additions and 17 deletions

View File

@ -170,7 +170,7 @@ bool DataTypeEnum<Type>::contains(const IDataType & rhs) const
template <typename Type>
SerializationPtr DataTypeEnum<Type>::doGetDefaultSerialization() const
{
return std::make_shared<SerializationEnum<Type>>(this->getValues());
return std::make_shared<SerializationEnum<Type>>(std::static_pointer_cast<const DataTypeEnum<Type>>(shared_from_this()));
}

View File

@ -11,13 +11,13 @@ namespace DB
template <typename Type>
void SerializationEnum<Type>::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeString(this->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
writeString(ref_enum_values.getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
}
template <typename Type>
void SerializationEnum<Type>::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeEscapedString(this->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr);
writeEscapedString(ref_enum_values.getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr);
}
template <typename Type>
@ -30,14 +30,14 @@ void SerializationEnum<Type>::deserializeTextEscaped(IColumn & column, ReadBuffe
/// NOTE It would be nice to do without creating a temporary object - at least extract std::string out.
std::string field_name;
readEscapedString(field_name, istr);
assert_cast<ColumnType &>(column).getData().push_back(this->getValue(StringRef(field_name), true));
assert_cast<ColumnType &>(column).getData().push_back(ref_enum_values.getValue(StringRef(field_name), true));
}
}
template <typename Type>
void SerializationEnum<Type>::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeQuotedString(this->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
writeQuotedString(ref_enum_values.getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
}
template <typename Type>
@ -45,7 +45,7 @@ void SerializationEnum<Type>::deserializeTextQuoted(IColumn & column, ReadBuffer
{
std::string field_name;
readQuotedStringWithSQLStyle(field_name, istr);
assert_cast<ColumnType &>(column).getData().push_back(this->getValue(StringRef(field_name)));
assert_cast<ColumnType &>(column).getData().push_back(ref_enum_values.getValue(StringRef(field_name)));
}
template <typename Type>
@ -61,20 +61,20 @@ void SerializationEnum<Type>::deserializeWholeText(IColumn & column, ReadBuffer
{
std::string field_name;
readStringUntilEOF(field_name, istr);
assert_cast<ColumnType &>(column).getData().push_back(this->getValue(StringRef(field_name), true));
assert_cast<ColumnType &>(column).getData().push_back(ref_enum_values.getValue(StringRef(field_name), true));
}
}
template <typename Type>
void SerializationEnum<Type>::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeJSONString(this->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr, settings);
writeJSONString(ref_enum_values.getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr, settings);
}
template <typename Type>
void SerializationEnum<Type>::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeXMLStringForTextElement(this->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr);
writeXMLStringForTextElement(ref_enum_values.getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr);
}
template <typename Type>
@ -86,14 +86,14 @@ void SerializationEnum<Type>::deserializeTextJSON(IColumn & column, ReadBuffer &
{
std::string field_name;
readJSONString(field_name, istr);
assert_cast<ColumnType &>(column).getData().push_back(this->getValue(StringRef(field_name)));
assert_cast<ColumnType &>(column).getData().push_back(ref_enum_values.getValue(StringRef(field_name)));
}
}
template <typename Type>
void SerializationEnum<Type>::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeCSVString(this->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
writeCSVString(ref_enum_values.getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
}
template <typename Type>
@ -105,7 +105,7 @@ void SerializationEnum<Type>::deserializeTextCSV(IColumn & column, ReadBuffer &
{
std::string field_name;
readCSVString(field_name, istr, settings.csv);
assert_cast<ColumnType &>(column).getData().push_back(this->getValue(StringRef(field_name), true));
assert_cast<ColumnType &>(column).getData().push_back(ref_enum_values.getValue(StringRef(field_name), true));
}
}
@ -114,7 +114,7 @@ void SerializationEnum<Type>::serializeTextMarkdown(
const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
if (settings.markdown.escape_special_characters)
writeMarkdownEscapedString(this->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr);
writeMarkdownEscapedString(ref_enum_values.getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr);
else
serializeTextEscaped(column, row_num, ostr, settings);
}

View File

@ -1,20 +1,35 @@
#pragma once
#include <memory>
#include <DataTypes/Serializations/SerializationNumber.h>
#include <DataTypes/EnumValues.h>
#include <DataTypes/DataTypeEnum.h>
namespace DB
{
template <typename Type>
class SerializationEnum : public SerializationNumber<Type>, public EnumValues<Type>
class SerializationEnum : public SerializationNumber<Type>
{
public:
using typename SerializationNumber<Type>::FieldType;
using typename SerializationNumber<Type>::ColumnType;
using typename EnumValues<Type>::Values;
using Values = EnumValues<Type>::Values;
explicit SerializationEnum(const Values & values_) : EnumValues<Type>(values_) {}
// SerializationEnum can be constructed in two ways:
/// - Make a copy of the Enum name-to-type mapping.
/// - Only store a reference to an existing mapping. This is faster if the Enum has a lot of different values or if SerializationEnum is
/// constructed very frequently. Make sure that the pointed-to mapping has a longer lifespan than SerializationEnum!
explicit SerializationEnum(const Values & values_)
: own_enum_values(values_), ref_enum_values(own_enum_values.value())
{
}
explicit SerializationEnum(const std::shared_ptr<const DataTypeEnum<Type>> & enum_type)
: own_enum_type(enum_type), ref_enum_values(*enum_type)
{
}
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
@ -35,8 +50,12 @@ public:
{
FieldType x;
readText(x, istr);
return this->findByValue(x)->first;
return ref_enum_values.findByValue(x)->first;
}
std::optional<EnumValues<Type>> own_enum_values;
std::shared_ptr<const DataTypeEnum<Type>> own_enum_type;
const EnumValues<Type> & ref_enum_values;
};
}