#include #include #include #include #include #include #include #include #include #include #include #include #include namespace DB { namespace ErrorCodes { extern const int SYNTAX_ERROR; extern const int EMPTY_DATA_PASSED; extern const int UNEXPECTED_AST_STRUCTURE; extern const int ARGUMENT_OUT_OF_BOUND; } template struct EnumName; template <> struct EnumName { static constexpr auto value = "Enum8"; }; template <> struct EnumName { static constexpr auto value = "Enum16"; }; template const char * DataTypeEnum::getFamilyName() const { return EnumName::value; } template std::string DataTypeEnum::generateName(const Values & values) { WriteBufferFromOwnString out; writeString(EnumName::value, out); writeChar('(', out); auto first = true; for (const auto & name_and_value : values) { if (!first) writeString(", ", out); first = false; writeQuotedString(name_and_value.first, out); writeString(" = ", out); writeText(name_and_value.second, out); } writeChar(')', out); return out.str(); } template void DataTypeEnum::fillMaps() { for (const auto & name_and_value : values) { const auto name_to_value_pair = name_to_value_map.insert( { StringRef{name_and_value.first}, name_and_value.second }); if (!name_to_value_pair.second) throw Exception{"Duplicate names in enum: '" + name_and_value.first + "' = " + toString(name_and_value.second) + " and '" + name_to_value_pair.first->getFirst().toString() + "' = " + toString(name_to_value_pair.first->getSecond()), ErrorCodes::SYNTAX_ERROR}; const auto value_to_name_pair = value_to_name_map.insert( { name_and_value.second, StringRef{name_and_value.first} }); if (!value_to_name_pair.second) throw Exception{"Duplicate values in enum: '" + name_and_value.first + "' = " + toString(name_and_value.second) + " and '" + value_to_name_pair.first->second.toString() + "' = " + toString(value_to_name_pair.first->first), ErrorCodes::SYNTAX_ERROR}; } } template DataTypeEnum::DataTypeEnum(const Values & values_) : values{values_} { if (values.empty()) throw Exception{"DataTypeEnum enumeration cannot be empty", ErrorCodes::EMPTY_DATA_PASSED}; std::sort(std::begin(values), std::end(values), [] (auto & left, auto & right) { return left.second < right.second; }); fillMaps(); type_name = generateName(values); } template void DataTypeEnum::serializeBinary(const Field & field, WriteBuffer & ostr) const { const FieldType x = get>(field); writeBinary(x, ostr); } template void DataTypeEnum::deserializeBinary(Field & field, ReadBuffer & istr) const { FieldType x; readBinary(x, istr); field = nearestFieldType(x); } template void DataTypeEnum::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const { writeBinary(static_cast(column).getData()[row_num], ostr); } template void DataTypeEnum::deserializeBinary(IColumn & column, ReadBuffer & istr) const { typename ColumnType::value_type x; readBinary(x, istr); static_cast(column).getData().push_back(x); } template void DataTypeEnum::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { writeString(getNameForValue(static_cast(column).getData()[row_num]), ostr); } template void DataTypeEnum::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { writeEscapedString(getNameForValue(static_cast(column).getData()[row_num]), ostr); } template void DataTypeEnum::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { /// NOTE It would be nice to do without creating a temporary object - at least extract std::string out. std::string field_name; readEscapedString(field_name, istr); static_cast(column).getData().push_back(getValue(StringRef(field_name))); } template void DataTypeEnum::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { writeQuotedString(getNameForValue(static_cast(column).getData()[row_num]), ostr); } template void DataTypeEnum::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { std::string field_name; readQuotedStringWithSQLStyle(field_name, istr); static_cast(column).getData().push_back(getValue(StringRef(field_name))); } template void DataTypeEnum::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { std::string field_name; readString(field_name, istr); static_cast(column).getData().push_back(getValue(StringRef(field_name))); } template void DataTypeEnum::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { writeJSONString(getNameForValue(static_cast(column).getData()[row_num]), ostr, settings); } template void DataTypeEnum::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { writeXMLString(getNameForValue(static_cast(column).getData()[row_num]), ostr); } template void DataTypeEnum::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { std::string field_name; readJSONString(field_name, istr); static_cast(column).getData().push_back(getValue(StringRef(field_name))); } template void DataTypeEnum::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { writeCSVString(getNameForValue(static_cast(column).getData()[row_num]), ostr); } template void DataTypeEnum::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { std::string field_name; readCSVString(field_name, istr, settings.csv); static_cast(column).getData().push_back(getValue(StringRef(field_name))); } template void DataTypeEnum::serializeBinaryBulk( const IColumn & column, WriteBuffer & ostr, const size_t offset, size_t limit) const { const auto & x = typeid_cast(column).getData(); const auto size = x.size(); if (limit == 0 || offset + limit > size) limit = size - offset; ostr.write(reinterpret_cast(&x[offset]), sizeof(FieldType) * limit); } template void DataTypeEnum::deserializeBinaryBulk( IColumn & column, ReadBuffer & istr, const size_t limit, const double /*avg_value_size_hint*/) const { auto & x = typeid_cast(column).getData(); const auto initial_size = x.size(); x.resize(initial_size + limit); const auto size = istr.readBig(reinterpret_cast(&x[initial_size]), sizeof(FieldType) * limit); x.resize(initial_size + size / sizeof(FieldType)); } template void DataTypeEnum::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const { if (value_index) return; protobuf.prepareEnumMapping(values); value_index = static_cast(protobuf.writeEnum(static_cast(column).getData()[row_num])); } template void DataTypeEnum::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const { protobuf.prepareEnumMapping(values); row_added = false; Type value; if (!protobuf.readEnum(value)) return; auto & container = static_cast(column).getData(); if (allow_add_row) { container.emplace_back(value); row_added = true; } else container.back() = value; } template Field DataTypeEnum::getDefault() const { return values.front().second; } template void DataTypeEnum::insertDefaultInto(IColumn & column) const { static_cast(column).getData().push_back(values.front().second); } template bool DataTypeEnum::equals(const IDataType & rhs) const { return typeid(rhs) == typeid(*this) && type_name == static_cast &>(rhs).type_name; } template bool DataTypeEnum::textCanContainOnlyValidUTF8() const { for (const auto & elem : values) { const char * pos = elem.first.data(); const char * end = pos + elem.first.size(); while (pos < end) { size_t length = UTF8::seqLength(*pos); if (pos + length > end) return false; if (Poco::UTF8Encoding::isLegal(reinterpret_cast(pos), length)) pos += length; else return false; } } return true; } template static void checkOverflow(Int64 value) { if (!(std::numeric_limits::min() <= value && value <= std::numeric_limits::max())) throw Exception("DataTypeEnum: Unexpected value " + toString(value), ErrorCodes::BAD_TYPE_OF_FIELD); } template Field DataTypeEnum::castToName(const Field & value_or_name) const { if (value_or_name.getType() == Field::Types::String) { getValue(value_or_name.get()); /// Check correctness return value_or_name.get(); } else if (value_or_name.getType() == Field::Types::Int64) { Int64 value = value_or_name.get(); checkOverflow(value); return getNameForValue(static_cast(value)).toString(); } else throw Exception(String("DataTypeEnum: Unsupported type of field ") + value_or_name.getTypeName(), ErrorCodes::BAD_TYPE_OF_FIELD); } template Field DataTypeEnum::castToValue(const Field & value_or_name) const { if (value_or_name.getType() == Field::Types::String) { return getValue(value_or_name.get()); } else if (value_or_name.getType() == Field::Types::Int64 || value_or_name.getType() == Field::Types::UInt64) { Int64 value = value_or_name.get(); checkOverflow(value); getNameForValue(static_cast(value)); /// Check correctness return value; } else throw Exception(String("DataTypeEnum: Unsupported type of field ") + value_or_name.getTypeName(), ErrorCodes::BAD_TYPE_OF_FIELD); } /// Explicit instantiations. template class DataTypeEnum; template class DataTypeEnum; template static DataTypePtr create(const ASTPtr & arguments) { if (!arguments || arguments->children.empty()) throw Exception("Enum data type cannot be empty", ErrorCodes::EMPTY_DATA_PASSED); typename DataTypeEnum::Values values; values.reserve(arguments->children.size()); using FieldType = typename DataTypeEnum::FieldType; /// Children must be functions 'equals' with string literal as left argument and numeric literal as right argument. for (const ASTPtr & child : arguments->children) { const auto * func = child->as(); if (!func || func->name != "equals" || func->parameters || !func->arguments || func->arguments->children.size() != 2) throw Exception("Elements of Enum data type must be of form: 'name' = number, where name is string literal and number is an integer", ErrorCodes::UNEXPECTED_AST_STRUCTURE); const auto * name_literal = func->arguments->children[0]->as(); const auto * value_literal = func->arguments->children[1]->as(); if (!name_literal || !value_literal || name_literal->value.getType() != Field::Types::String || (value_literal->value.getType() != Field::Types::UInt64 && value_literal->value.getType() != Field::Types::Int64)) throw Exception("Elements of Enum data type must be of form: 'name' = number, where name is string literal and number is an integer", ErrorCodes::UNEXPECTED_AST_STRUCTURE); const String & field_name = name_literal->value.get(); const auto value = value_literal->value.get>(); if (value > std::numeric_limits::max() || value < std::numeric_limits::min()) throw Exception{"Value " + toString(value) + " for element '" + field_name + "' exceeds range of " + EnumName::value, ErrorCodes::ARGUMENT_OUT_OF_BOUND}; values.emplace_back(field_name, value); } return std::make_shared(values); } static DataTypePtr createNotExect(const ASTPtr & arguments) { if (!arguments || arguments->children.empty()) throw Exception("Enum data type cannot be empty", ErrorCodes::EMPTY_DATA_PASSED); /// Children must be functions 'equals' with string literal as left argument and numeric literal as right argument. for (const ASTPtr & child : arguments->children) { const auto *func = child->as(); if (!func || func->name != "equals" || func->parameters || !func->arguments || func->arguments->children.size() != 2) throw Exception( "Elements of Enum data type must be of form: 'name' = number, where name is string literal and number is an integer", ErrorCodes::UNEXPECTED_AST_STRUCTURE); const auto *value_literal = func->arguments->children[1]->as(); if (!value_literal) throw Exception( "Elements of Enum data type must be of form: 'name' = number, where name is string literal and number is an integer", ErrorCodes::UNEXPECTED_AST_STRUCTURE); Int64 value = value_literal->value.get(); if (value > std::numeric_limits::max() || value < std::numeric_limits::min()) return create(arguments); } return create(arguments); } void registerDataTypeEnum(DataTypeFactory & factory) { factory.registerDataType("Enum8", create>); factory.registerDataType("Enum16", create>); factory.registerDataType("Enum", createNotExect); } }