#include #include #include #include #include #include #include #include #include #include #include namespace DB { namespace ErrorCodes { extern const int SYNTAX_ERROR; extern const int EMPTY_DATA_PASSED; extern const int UNEXPECTED_AST_STRUCTURE; extern const int ARGUMENT_OUT_OF_BOUND; } template struct EnumName; template <> struct EnumName { static constexpr auto value = "Enum8"; }; template <> struct EnumName { static constexpr auto value = "Enum16"; }; template const char * DataTypeEnum::getFamilyName() const { return EnumName::value; } template std::string DataTypeEnum::generateName(const Values & values) { WriteBufferFromOwnString out; writeString(EnumName::value, out); writeChar('(', out); auto first = true; for (const auto & name_and_value : values) { if (!first) writeString(", ", out); first = false; writeQuotedString(name_and_value.first, out); writeString(" = ", out); writeText(name_and_value.second, out); } writeChar(')', out); return out.str(); } template void DataTypeEnum::fillMaps() { for (const auto & name_and_value : values) { const auto name_to_value_pair = name_to_value_map.insert( { StringRef{name_and_value.first}, name_and_value.second }); if (!name_to_value_pair.second) throw Exception{"Duplicate names in enum: '" + name_and_value.first + "' = " + toString(name_and_value.second) + " and '" + name_to_value_pair.first->first.toString() + "' = " + toString(name_to_value_pair.first->second), ErrorCodes::SYNTAX_ERROR}; const auto value_to_name_pair = value_to_name_map.insert( { name_and_value.second, StringRef{name_and_value.first} }); if (!value_to_name_pair.second) throw Exception{"Duplicate values in enum: '" + name_and_value.first + "' = " + toString(name_and_value.second) + " and '" + value_to_name_pair.first->second.toString() + "' = " + toString(value_to_name_pair.first->first), ErrorCodes::SYNTAX_ERROR}; } } template DataTypeEnum::DataTypeEnum(const Values & values_) : values{values_} { if (values.empty()) throw Exception{"DataTypeEnum enumeration cannot be empty", ErrorCodes::EMPTY_DATA_PASSED}; std::sort(std::begin(values), std::end(values), [] (auto & left, auto & right) { return left.second < right.second; }); fillMaps(); name = generateName(values); } template void DataTypeEnum::serializeBinary(const Field & field, WriteBuffer & ostr) const { const FieldType x = get::Type>(field); writeBinary(x, ostr); } template void DataTypeEnum::deserializeBinary(Field & field, ReadBuffer & istr) const { FieldType x; readBinary(x, istr); field = nearestFieldType(x); } template void DataTypeEnum::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const { writeBinary(static_cast(column).getData()[row_num], ostr); } template void DataTypeEnum::deserializeBinary(IColumn & column, ReadBuffer & istr) const { typename ColumnType::value_type x; readBinary(x, istr); static_cast(column).getData().push_back(x); } template void DataTypeEnum::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { writeString(getNameForValue(static_cast(column).getData()[row_num]), ostr); } template void DataTypeEnum::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { writeEscapedString(getNameForValue(static_cast(column).getData()[row_num]), ostr); } template void DataTypeEnum::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { /// NOTE It would be nice to do without creating a temporary object - at least extract std::string out. std::string name; readEscapedString(name, istr); static_cast(column).getData().push_back(getValue(StringRef(name))); } template void DataTypeEnum::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { writeQuotedString(getNameForValue(static_cast(column).getData()[row_num]), ostr); } template void DataTypeEnum::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { std::string name; readQuotedStringWithSQLStyle(name, istr); static_cast(column).getData().push_back(getValue(StringRef(name))); } template void DataTypeEnum::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { writeJSONString(getNameForValue(static_cast(column).getData()[row_num]), ostr); } template void DataTypeEnum::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { writeXMLString(getNameForValue(static_cast(column).getData()[row_num]), ostr); } template void DataTypeEnum::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { std::string name; readJSONString(name, istr); static_cast(column).getData().push_back(getValue(StringRef(name))); } template void DataTypeEnum::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { writeCSVString(getNameForValue(static_cast(column).getData()[row_num]), ostr); } template void DataTypeEnum::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { std::string name; readCSVString(name, istr, settings.csv.delimiter); static_cast(column).getData().push_back(getValue(StringRef(name))); } template void DataTypeEnum::serializeBinaryBulk( const IColumn & column, WriteBuffer & ostr, const size_t offset, size_t limit) const { const auto & x = typeid_cast(column).getData(); const auto size = x.size(); if (limit == 0 || offset + limit > size) limit = size - offset; ostr.write(reinterpret_cast(&x[offset]), sizeof(FieldType) * limit); } template void DataTypeEnum::deserializeBinaryBulk( IColumn & column, ReadBuffer & istr, const size_t limit, const double /*avg_value_size_hint*/) const { auto & x = typeid_cast(column).getData(); const auto initial_size = x.size(); x.resize(initial_size + limit); const auto size = istr.readBig(reinterpret_cast(&x[initial_size]), sizeof(FieldType) * limit); x.resize(initial_size + size / sizeof(FieldType)); } template Field DataTypeEnum::getDefault() const { return typename NearestFieldType::Type(values.front().second); } template void DataTypeEnum::insertDefaultInto(IColumn & column) const { static_cast(column).getData().push_back(values.front().second); } template bool DataTypeEnum::equals(const IDataType & rhs) const { return typeid(rhs) == typeid(*this) && name == static_cast &>(rhs).name; } template bool DataTypeEnum::textCanContainOnlyValidUTF8() const { for (const auto & elem : values) { const char * pos = elem.first.data(); const char * end = pos + elem.first.size(); while (pos < end) { size_t length = UTF8::seqLength(*pos); if (pos + length > end) return false; if (Poco::UTF8Encoding::isLegal(reinterpret_cast(pos), length)) pos += length; else return false; } } return true; } template static void checkOverflow(Int64 value) { if (!(std::numeric_limits::min() <= value && value <= std::numeric_limits::max())) throw Exception("DataTypeEnum: Unexpected value " + toString(value), ErrorCodes::BAD_TYPE_OF_FIELD); } template Field DataTypeEnum::castToName(const Field & value_or_name) const { if (value_or_name.getType() == Field::Types::String) { getValue(value_or_name.get()); /// Check correctness return value_or_name.get(); } else if (value_or_name.getType() == Field::Types::Int64) { Int64 value = value_or_name.get(); checkOverflow(value); return getNameForValue(static_cast(value)).toString(); } else throw Exception(String("DataTypeEnum: Unsupported type of field ") + value_or_name.getTypeName(), ErrorCodes::BAD_TYPE_OF_FIELD); } template Field DataTypeEnum::castToValue(const Field & value_or_name) const { if (value_or_name.getType() == Field::Types::String) { return static_cast(getValue(value_or_name.get())); } else if (value_or_name.getType() == Field::Types::Int64 || value_or_name.getType() == Field::Types::UInt64) { Int64 value = value_or_name.get(); checkOverflow(value); getNameForValue(static_cast(value)); /// Check correctness return value; } else throw Exception(String("DataTypeEnum: Unsupported type of field ") + value_or_name.getTypeName(), ErrorCodes::BAD_TYPE_OF_FIELD); } /// Explicit instantiations. template class DataTypeEnum; template class DataTypeEnum; template static DataTypePtr create(const ASTPtr & arguments) { if (!arguments || arguments->children.empty()) throw Exception("Enum data type cannot be empty", ErrorCodes::EMPTY_DATA_PASSED); typename DataTypeEnum::Values values; values.reserve(arguments->children.size()); using FieldType = typename DataTypeEnum::FieldType; /// Children must be functions 'equals' with string literal as left argument and numeric literal as right argument. for (const ASTPtr & child : arguments->children) { const ASTFunction * func = typeid_cast(child.get()); if (!func || func->name != "equals" || func->parameters || !func->arguments || func->arguments->children.size() != 2) throw Exception("Elements of Enum data type must be of form: 'name' = number, where name is string literal and number is an integer", ErrorCodes::UNEXPECTED_AST_STRUCTURE); const ASTLiteral * name_literal = typeid_cast(func->arguments->children[0].get()); const ASTLiteral * value_literal = typeid_cast(func->arguments->children[1].get()); if (!name_literal || !value_literal || name_literal->value.getType() != Field::Types::String || (value_literal->value.getType() != Field::Types::UInt64 && value_literal->value.getType() != Field::Types::Int64)) throw Exception("Elements of Enum data type must be of form: 'name' = number, where name is string literal and number is an integer", ErrorCodes::UNEXPECTED_AST_STRUCTURE); const String & name = name_literal->value.get(); const auto value = value_literal->value.get::Type>(); if (value > std::numeric_limits::max() || value < std::numeric_limits::min()) throw Exception{"Value " + toString(value) + " for element '" + name + "' exceeds range of " + EnumName::value, ErrorCodes::ARGUMENT_OUT_OF_BOUND}; values.emplace_back(name, value); } return std::make_shared(values); } void registerDataTypeEnum(DataTypeFactory & factory) { factory.registerDataType("Enum8", create>); factory.registerDataType("Enum16", create>); } }