Merge pull request #48122 from Avogar/bson-more-types

Support Enum output/input in BSONEachRow, allow all map key types and avoid extra calculations
2024-11-10 09:32:06 +00:00 · 2023-04-05 18:26:19 +02:00 · 2023-04-05 18:26:19 +02:00 · 9331c6c260
commit 9331c6c260
parent 48caf26b63 8bd997d680
9 changed files with 133 additions and 98 deletions
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@ -1235,8 +1235,8 @@ For output it uses the following correspondence between ClickHouse types and BSO
 | ClickHouse type                                                                                                       | BSON Type                                                                                                     |
 |-----------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------|
 | [Bool](/docs/en/sql-reference/data-types/boolean.md)                                                                  | `\x08` boolean                                                                                                |
-| [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md)                                                           | `\x10` int32                                                                                                  |
-| [Int16UInt16](/docs/en/sql-reference/data-types/int-uint.md)                                                          | `\x10` int32                                                                                                  |
+| [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md)/[Enum8](/docs/en/sql-reference/data-types/enum.md)        | `\x10` int32                                                                                                  |
+| [Int16/UInt16](/docs/en/sql-reference/data-types/int-uint.md)/[Enum16](/docs/en/sql-reference/data-types/enum.md)      | `\x10` int32                                                                                                  |
 | [Int32](/docs/en/sql-reference/data-types/int-uint.md)                                                                | `\x10` int32                                                                                                  |
 | [UInt32](/docs/en/sql-reference/data-types/int-uint.md)                                                               | `\x12` int64                                                                                                  |
 | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)                                                         | `\x12` int64                                                                                                  |
@ -1255,30 +1255,30 @@ For output it uses the following correspondence between ClickHouse types and BSO
 | [Array](/docs/en/sql-reference/data-types/array.md)                                                                   | `\x04` array                                                                                                  |
 | [Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                                   | `\x04` array                                                                                                  |
 | [Named Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                             | `\x03` document                                                                                               |
-| [Map](/docs/en/sql-reference/data-types/map.md) (with String keys)                                                    | `\x03` document                                                                                               |
+| [Map](/docs/en/sql-reference/data-types/map.md)                                                                       | `\x03` document                                                                                               |
 | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)                                                             | `\x10` int32                                                                                                  |
 | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)                                                             | `\x05` binary, `\x00` binary subtype                                                                          |

 For input it uses the following correspondence between BSON types and ClickHouse types:

-| BSON Type                                | ClickHouse Type                                                                                                                                                                       |
-|------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `\x01` double                            | [Float32/Float64](/docs/en/sql-reference/data-types/float.md)                                                                                                                         |
-| `\x02` string                            | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                 |
-| `\x03` document                          | [Map](/docs/en/sql-reference/data-types/map.md)/[Named Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                                             |
-| `\x04` array                             | [Array](/docs/en/sql-reference/data-types/array.md)/[Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                                               |
-| `\x05` binary, `\x00` binary subtype     | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)/[IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)       |
-| `\x05` binary, `\x02` old binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                 |
-| `\x05` binary, `\x03` old uuid subtype   | [UUID](/docs/en/sql-reference/data-types/uuid.md)                                                                                                                                     |
-| `\x05` binary, `\x04` uuid subtype       | [UUID](/docs/en/sql-reference/data-types/uuid.md)                                                                                                                                     |
-| `\x07` ObjectId                          | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                 |
-| `\x08` boolean                           | [Bool](/docs/en/sql-reference/data-types/boolean.md)                                                                                                                                  |
-| `\x09` datetime                          | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md)                                                                                                                         |
-| `\x0A` null value                        | [NULL](/docs/en/sql-reference/data-types/nullable.md)                                                                                                                                 |
-| `\x0D` JavaScript code                   | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                 |
-| `\x0E` symbol                            | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                 |
-| `\x10` int32                             | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md)/[IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)     |
-| `\x12` int64                             | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal64](/docs/en/sql-reference/data-types/decimal.md)/[DateTime64](/docs/en/sql-reference/data-types/datetime64.md) |
+| BSON Type                                | ClickHouse Type                                                                                                                                                                                                                             |
+|------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `\x01` double                            | [Float32/Float64](/docs/en/sql-reference/data-types/float.md)                                                                                                                                                                               |
+| `\x02` string                            | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                                                                       |
+| `\x03` document                          | [Map](/docs/en/sql-reference/data-types/map.md)/[Named Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                                                                                                   |
+| `\x04` array                             | [Array](/docs/en/sql-reference/data-types/array.md)/[Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                                                                                                     |
+| `\x05` binary, `\x00` binary subtype     | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)/[IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)                                                             |
+| `\x05` binary, `\x02` old binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                                                                       |
+| `\x05` binary, `\x03` old uuid subtype   | [UUID](/docs/en/sql-reference/data-types/uuid.md)                                                                                                                                                                                           |
+| `\x05` binary, `\x04` uuid subtype       | [UUID](/docs/en/sql-reference/data-types/uuid.md)                                                                                                                                                                                           |
+| `\x07` ObjectId                          | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                                                                       |
+| `\x08` boolean                           | [Bool](/docs/en/sql-reference/data-types/boolean.md)                                                                                                                                                                                        |
+| `\x09` datetime                          | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md)                                                                                                                                                                               |
+| `\x0A` null value                        | [NULL](/docs/en/sql-reference/data-types/nullable.md)                                                                                                                                                                                       |
+| `\x0D` JavaScript code                   | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                                                                       |
+| `\x0E` symbol                            | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                                                                       |
+| `\x10` int32                             | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md)/[IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)/[Enum8/Enum16](/docs/en/sql-reference/data-types/enum.md) |
+| `\x12` int64                             | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal64](/docs/en/sql-reference/data-types/decimal.md)/[DateTime64](/docs/en/sql-reference/data-types/datetime64.md)                                                       |

 Other BSON types are not supported. Also, it performs conversion between different integer types (for example, you can insert BSON int32 value into ClickHouse UInt8).
 Big integers and decimals (Int128/UInt128/Int256/UInt256/Decimal128/Decimal256) can be parsed from BSON Binary value with `\x00` binary subtype. In this case this format will validate that the size of binary data equals the size of expected value.
--- a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp
@ -446,11 +446,6 @@ void BSONEachRowRowInputFormat::readMap(IColumn & column, const DataTypePtr & da

    const auto * data_type_map = assert_cast<const DataTypeMap *>(data_type.get());
    const auto & key_data_type = data_type_map->getKeyType();
-    if (!isStringOrFixedString(key_data_type))
-        throw Exception(ErrorCodes::ILLEGAL_COLUMN,
-                        "Only maps with String key type are supported in BSON, got key type: {}",
-                        key_data_type->getName());
-
    const auto & value_data_type = data_type_map->getValueType();
    auto & column_map = assert_cast<ColumnMap &>(column);
    auto & key_column = column_map.getNestedData().getColumn(0);
@ -464,7 +459,8 @@ void BSONEachRowRowInputFormat::readMap(IColumn & column, const DataTypePtr & da
    {
        auto nested_bson_type = getBSONType(readBSONType(*in));
        auto name = readBSONKeyName(*in, current_key_name);
-        key_column.insertData(name.data, name.size);
+        ReadBufferFromMemory buf(name.data, name.size);
+        key_data_type->getDefaultSerialization()->deserializeWholeText(key_column, buf, format_settings);
        readField(value_column, value_data_type, nested_bson_type);
    }

@ -511,6 +507,7 @@ bool BSONEachRowRowInputFormat::readField(IColumn & column, const DataTypePtr &
            lc_column.insertFromFullColumn(*tmp_column, 0);
            return res;
        }
+        case TypeIndex::Enum8: [[fallthrough]];
        case TypeIndex::Int8:
        {
            readAndInsertInteger<Int8>(*in, column, data_type, bson_type);
@ -521,6 +518,7 @@ bool BSONEachRowRowInputFormat::readField(IColumn & column, const DataTypePtr &
            readAndInsertInteger<UInt8>(*in, column, data_type, bson_type);
            return true;
        }
+        case TypeIndex::Enum16: [[fallthrough]];
        case TypeIndex::Int16:
        {
            readAndInsertInteger<Int16>(*in, column, data_type, bson_type);
@ -1008,6 +1006,9 @@ fileSegmentationEngineBSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t
                "the value setting 'min_chunk_bytes_for_parallel_parsing' or check your data manually, most likely BSON is malformed",
                min_bytes, document_size);

+        if (document_size < sizeof(document_size))
+            throw ParsingException(ErrorCodes::INCORRECT_DATA, "Size of BSON document is invalid");
+
        size_t old_size = memory.size();
        memory.resize(old_size + document_size);
        unalignedStore<BSONSizeT>(memory.data() + old_size, document_size);
--- a/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp
@ -33,13 +33,14 @@ namespace ErrorCodes
 }

 /// In BSON all names should be valid UTF8 sequences
-static String toValidUTF8String(const String & name)
+static String toValidUTF8String(const String & name, const FormatSettings & settings)
 {
    WriteBufferFromOwnString buf;
    WriteBufferValidUTF8 validating_buf(buf);
-    writeString(name, validating_buf);
+    writeJSONString(name, validating_buf, settings);
    validating_buf.finalize();
-    return buf.str();
+    /// Return value without quotes
+    return buf.str().substr(1, buf.str().size() - 2);
 }

 BSONEachRowRowOutputFormat::BSONEachRowRowOutputFormat(
@ -49,7 +50,7 @@ BSONEachRowRowOutputFormat::BSONEachRowRowOutputFormat(
    const auto & sample = getPort(PortKind::Main).getHeader();
    fields.reserve(sample.columns());
    for (const auto & field : sample.getNamesAndTypes())
-        fields.emplace_back(toValidUTF8String(field.name), field.type);
+        fields.emplace_back(toValidUTF8String(field.name, settings), field.type);
 }

 static void writeBSONSize(size_t size, WriteBuffer & buf)
@ -112,7 +113,7 @@ static void writeBSONBigInteger(const IColumn & column, size_t row_num, const St
    buf.write(data.data, data.size);
 }

-size_t BSONEachRowRowOutputFormat::countBSONFieldSize(const IColumn & column, const DataTypePtr & data_type, size_t row_num, const String & name)
+size_t BSONEachRowRowOutputFormat::countBSONFieldSize(const IColumn & column, const DataTypePtr & data_type, size_t row_num, const String & name, const String & path, std::unordered_map<String, size_t> & nested_document_sizes)
 {
    size_t size = 1; // Field type
    size += name.size() + 1; // Field name and \0
@ -125,6 +126,8 @@ size_t BSONEachRowRowOutputFormat::countBSONFieldSize(const IColumn & column, co
        case TypeIndex::Date32: [[fallthrough]];
        case TypeIndex::Decimal32: [[fallthrough]];
        case TypeIndex::IPv4: [[fallthrough]];
+        case TypeIndex::Enum8: [[fallthrough]];
+        case TypeIndex::Enum16: [[fallthrough]];
        case TypeIndex::Int32:
        {
            return size + sizeof(Int32);
@ -183,7 +186,7 @@ size_t BSONEachRowRowOutputFormat::countBSONFieldSize(const IColumn & column, co
            auto dict_type = assert_cast<const DataTypeLowCardinality *>(data_type.get())->getDictionaryType();
            auto dict_column = lc_column.getDictionary().getNestedColumn();
            size_t index = lc_column.getIndexAt(row_num);
-            return countBSONFieldSize(*dict_column, dict_type, index, name);
+            return countBSONFieldSize(*dict_column, dict_type, index, name, path, nested_document_sizes);
        }
        case TypeIndex::Nullable:
        {
@ -191,11 +194,11 @@ size_t BSONEachRowRowOutputFormat::countBSONFieldSize(const IColumn & column, co
            const ColumnNullable & column_nullable = assert_cast<const ColumnNullable &>(column);
            if (column_nullable.isNullAt(row_num))
                return size; /// Null has no value, just type
-            return countBSONFieldSize(column_nullable.getNestedColumn(), nested_type, row_num, name);
+            return countBSONFieldSize(column_nullable.getNestedColumn(), nested_type, row_num, name, path, nested_document_sizes);
        }
        case TypeIndex::Array:
        {
-            size += sizeof(BSONSizeT); // Size of a document
+            size_t document_size = sizeof(BSONSizeT); // Size of a document

            const auto & nested_type = assert_cast<const DataTypeArray *>(data_type.get())->getNestedType();
            const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
@ -204,39 +207,41 @@ size_t BSONEachRowRowOutputFormat::countBSONFieldSize(const IColumn & column, co
            size_t offset = offsets[row_num - 1];
            size_t array_size = offsets[row_num] - offset;

+            String current_path = path + "." + name;
            for (size_t i = 0; i < array_size; ++i)
-                size += countBSONFieldSize(nested_column, nested_type, offset + i, std::to_string(i)); // Add size of each value from array
+                document_size += countBSONFieldSize(nested_column, nested_type, offset + i, std::to_string(i), current_path, nested_document_sizes); // Add size of each value from array

-            return size + sizeof(BSON_DOCUMENT_END); // Add final \0
+            document_size += sizeof(BSON_DOCUMENT_END); // Add final \0
+            nested_document_sizes[current_path] = document_size;
+            return size + document_size;
        }
        case TypeIndex::Tuple:
        {
-            size += sizeof(BSONSizeT); // Size of a document
+            size_t document_size = sizeof(BSONSizeT); // Size of a document

            const auto * tuple_type = assert_cast<const DataTypeTuple *>(data_type.get());
            const auto & nested_types = tuple_type->getElements();
-            bool have_explicit_names = tuple_type->haveExplicitNames();
            const auto & nested_names = tuple_type->getElementNames();
            const auto & tuple_column = assert_cast<const ColumnTuple &>(column);
            const auto & nested_columns = tuple_column.getColumns();

+            String current_path = path + "." + name;
            for (size_t i = 0; i < nested_columns.size(); ++i)
            {
-                String key_name = have_explicit_names ? toValidUTF8String(nested_names[i]) : std::to_string(i);
-                size += countBSONFieldSize(*nested_columns[i], nested_types[i], row_num, key_name); // Add size of each value from tuple
+                String key_name = toValidUTF8String(nested_names[i], settings);
+                document_size += countBSONFieldSize(*nested_columns[i], nested_types[i], row_num, key_name, current_path, nested_document_sizes); // Add size of each value from tuple
            }

-            return size + sizeof(BSON_DOCUMENT_END); // Add final \0
+            document_size += sizeof(BSON_DOCUMENT_END); // Add final \0
+            nested_document_sizes[current_path] = document_size;
+            return size + document_size;
        }
        case TypeIndex::Map:
        {
-            size += sizeof(BSONSizeT); // Size of a document
+            size_t document_size = sizeof(BSONSizeT); // Size of a document

            const auto & map_type = assert_cast<const DataTypeMap &>(*data_type);
-            if (!isStringOrFixedString(map_type.getKeyType()))
-                throw Exception(ErrorCodes::ILLEGAL_COLUMN,
-                                "Only maps with String key type are supported in BSON, got key type: {}",
-                                map_type.getKeyType()->getName());
+            const auto & key_type = map_type.getKeyType();
            const auto & value_type = map_type.getValueType();

            const auto & map_column = assert_cast<const ColumnMap &>(column);
@ -248,20 +253,26 @@ size_t BSONEachRowRowOutputFormat::countBSONFieldSize(const IColumn & column, co
            size_t offset = offsets[row_num - 1];
            size_t map_size = offsets[row_num] - offset;

+            WriteBufferFromOwnString buf;
+            String current_path = path + "." + name;
            for (size_t i = 0; i < map_size; ++i)
            {
-                String key = toValidUTF8String(key_column->getDataAt(offset + i).toString());
-                size += countBSONFieldSize(*value_column, value_type, offset + i, key);
+                key_type->getDefaultSerialization()->serializeText(*key_column, offset + i, buf, settings);
+                auto s = countBSONFieldSize(*value_column, value_type, offset + i, toValidUTF8String(buf.str(), settings), current_path, nested_document_sizes);
+                document_size += s;
+                buf.restart();
            }

-            return size + sizeof(BSON_DOCUMENT_END); // Add final \0
+            document_size += sizeof(BSON_DOCUMENT_END); // Add final \0
+            nested_document_sizes[current_path] = document_size;
+            return size + document_size;
        }
        default:
            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Type {} is not supported in BSON output format", data_type->getName());
    }
 }

-void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const DataTypePtr & data_type, size_t row_num, const String & name)
+void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const DataTypePtr & data_type, size_t row_num, const String & name, const String & path, std::unordered_map<String, size_t> & nested_document_sizes)
 {
    switch (data_type->getTypeId())
    {
@ -275,6 +286,7 @@ void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const Da
            writeBSONNumber<ColumnFloat64, double>(BSONType::DOUBLE, column, row_num, name, out);
            break;
        }
+        case TypeIndex::Enum8: [[fallthrough]];
        case TypeIndex::Int8:
        {
            writeBSONNumber<ColumnInt8, Int32>(BSONType::INT32, column, row_num, name, out);
@ -288,6 +300,7 @@ void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const Da
                writeBSONNumber<ColumnUInt8, Int32>(BSONType::INT32, column, row_num, name, out);
            break;
        }
+        case TypeIndex::Enum16: [[fallthrough]];
        case TypeIndex::Int16:
        {
            writeBSONNumber<ColumnInt16, Int32>(BSONType::INT32, column, row_num, name, out);
@ -403,7 +416,7 @@ void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const Da
            auto dict_type = assert_cast<const DataTypeLowCardinality *>(data_type.get())->getDictionaryType();
            auto dict_column = lc_column.getDictionary().getNestedColumn();
            size_t index = lc_column.getIndexAt(row_num);
-            serializeField(*dict_column, dict_type, index, name);
+            serializeField(*dict_column, dict_type, index, name, path, nested_document_sizes);
            break;
        }
        case TypeIndex::Nullable:
@ -411,7 +424,7 @@ void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const Da
            auto nested_type = removeNullable(data_type);
            const ColumnNullable & column_nullable = assert_cast<const ColumnNullable &>(column);
            if (!column_nullable.isNullAt(row_num))
-                serializeField(column_nullable.getNestedColumn(), nested_type, row_num, name);
+                serializeField(column_nullable.getNestedColumn(), nested_type, row_num, name, path, nested_document_sizes);
            else
                writeBSONTypeAndKeyName(BSONType::NULL_VALUE, name, out);
            break;
@ -427,15 +440,12 @@ void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const Da

            writeBSONTypeAndKeyName(BSONType::ARRAY, name, out);

-            size_t document_size = sizeof(BSONSizeT);
-            for (size_t i = 0; i < array_size; ++i)
-                document_size += countBSONFieldSize(nested_column, nested_type, offset + i, std::to_string(i)); // Add size of each value from array
-            document_size += sizeof(BSON_DOCUMENT_END); // Add final \0
-
+            String current_path = path + "." + name;
+            size_t document_size = nested_document_sizes[current_path];
            writeBSONSize(document_size, out);

            for (size_t i = 0; i < array_size; ++i)
-                serializeField(nested_column, nested_type, offset + i, std::to_string(i));
+                serializeField(nested_column, nested_type, offset + i, std::to_string(i), current_path, nested_document_sizes);

            writeChar(BSON_DOCUMENT_END, out);
            break;
@ -444,26 +454,19 @@ void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const Da
        {
            const auto * tuple_type = assert_cast<const DataTypeTuple *>(data_type.get());
            const auto & nested_types = tuple_type->getElements();
-            bool have_explicit_names = tuple_type->haveExplicitNames();
            const auto & nested_names = tuple_type->getElementNames();
            const auto & tuple_column = assert_cast<const ColumnTuple &>(column);
            const auto & nested_columns = tuple_column.getColumns();

-            BSONType bson_type = have_explicit_names ? BSONType::DOCUMENT : BSONType::ARRAY;
+            BSONType bson_type =  tuple_type->haveExplicitNames() ? BSONType::DOCUMENT : BSONType::ARRAY;
            writeBSONTypeAndKeyName(bson_type, name, out);

-            size_t document_size = sizeof(BSONSizeT);
-            for (size_t i = 0; i < nested_columns.size(); ++i)
-            {
-                String key_name = have_explicit_names ? toValidUTF8String(nested_names[i]) : std::to_string(i);
-                document_size += countBSONFieldSize(*nested_columns[i], nested_types[i], row_num, key_name); // Add size of each value from tuple
-            }
-            document_size += sizeof(BSON_DOCUMENT_END); // Add final \0
-
+            String current_path = path + "." + name;
+            size_t document_size = nested_document_sizes[current_path];
            writeBSONSize(document_size, out);

            for (size_t i = 0; i < nested_columns.size(); ++i)
-                serializeField(*nested_columns[i], nested_types[i], row_num, have_explicit_names ? toValidUTF8String(nested_names[i]) : std::to_string(i));
+                serializeField(*nested_columns[i], nested_types[i], row_num, toValidUTF8String(nested_names[i], settings), current_path, nested_document_sizes);

            writeChar(BSON_DOCUMENT_END, out);
            break;
@ -471,10 +474,7 @@ void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const Da
        case TypeIndex::Map:
        {
            const auto & map_type = assert_cast<const DataTypeMap &>(*data_type);
-            if (!isStringOrFixedString(map_type.getKeyType()))
-                throw Exception(ErrorCodes::ILLEGAL_COLUMN,
-                                "Only maps with String key type are supported in BSON, got key type: {}",
-                                map_type.getKeyType()->getName());
+            const auto & key_type = map_type.getKeyType();
            const auto & value_type = map_type.getValueType();

            const auto & map_column = assert_cast<const ColumnMap &>(column);
@ -488,20 +488,16 @@ void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const Da

            writeBSONTypeAndKeyName(BSONType::DOCUMENT, name, out);

-            size_t document_size = sizeof(BSONSizeT);
-            for (size_t i = 0; i < map_size; ++i)
-            {
-                String key = toValidUTF8String(key_column->getDataAt(offset + i).toString());
-                document_size += countBSONFieldSize(*value_column, value_type, offset + i, key);
-            }
-            document_size += sizeof(BSON_DOCUMENT_END);
-
+            String current_path = path + "." + name;
+            size_t document_size = nested_document_sizes[current_path];
            writeBSONSize(document_size, out);

+            WriteBufferFromOwnString buf;
            for (size_t i = 0; i < map_size; ++i)
            {
-                String key = toValidUTF8String(key_column->getDataAt(offset + i).toString());
-                serializeField(*value_column, value_type, offset + i, key);
+                key_type->getDefaultSerialization()->serializeText(*key_column, offset + i, buf, settings);
+                serializeField(*value_column, value_type, offset + i, toValidUTF8String(buf.str(), settings), current_path, nested_document_sizes);
+                buf.restart();
            }

            writeChar(BSON_DOCUMENT_END, out);
@ -516,15 +512,18 @@ void BSONEachRowRowOutputFormat::write(const Columns & columns, size_t row_num)
 {
    /// We should calculate and write document size before its content
    size_t document_size = sizeof(BSONSizeT);
+    /// Remember calculated sizes for nested documents (map document path -> size), so we won't need
+    /// to recalculate it while serializing.
+    std::unordered_map<String, size_t> nested_document_sizes;
    for (size_t i = 0; i != columns.size(); ++i)
-        document_size += countBSONFieldSize(*columns[i], fields[i].type, row_num, fields[i].name);
+        document_size += countBSONFieldSize(*columns[i], fields[i].type, row_num, fields[i].name, "$", nested_document_sizes);
    document_size += sizeof(BSON_DOCUMENT_END);

    size_t document_start = out.count();
    writeBSONSize(document_size, out);

    for (size_t i = 0; i != columns.size(); ++i)
-        serializeField(*columns[i], fields[i].type, row_num, fields[i].name);
+        serializeField(*columns[i], fields[i].type, row_num, fields[i].name, "$", nested_document_sizes);

    writeChar(BSON_DOCUMENT_END, out);

--- a/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.h
+++ b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.h
@ -17,8 +17,8 @@ namespace DB
 *
 * ClickHouse type         | BSON Type
 * Bool                    | \x08 boolean
- * Int8/UInt8              | \x10 int32
- * Int16UInt16             | \x10 int32
+ * Int8/UInt8/Enum8        | \x10 int32
+ * Int16UInt16/Enum16      | \x10 int32
 * Int32                   | \x10 int32
 * UInt32                  | \x12 int64
 * Int64                   | \x12 int64
@ -38,7 +38,7 @@ namespace DB
 * Array                   | \x04 array
 * Tuple                   | \x04 array
 * Named Tuple             | \x03 document
- * Map (with String keys)  | \x03 document
+ * Map                     | \x03 document
 *
 * Note: on Big-Endian platforms this format will not work properly.
 */
@ -55,12 +55,24 @@ private:
    void write(const Columns & columns, size_t row_num) override;
    void writeField(const IColumn &, const ISerialization &, size_t) override { }

-    void serializeField(const IColumn & column, const DataTypePtr & data_type, size_t row_num, const String & name);
+    void serializeField(
+        const IColumn & column,
+        const DataTypePtr & data_type,
+        size_t row_num,
+        const String & name,
+        const String & path,
+        std::unordered_map<String, size_t> & nested_document_sizes);

    /// Count field size in bytes that we will get after serialization in BSON format.
    /// It's needed to calculate document size before actual serialization,
    /// because in BSON format we should write the size of the document before its content.
-    size_t countBSONFieldSize(const IColumn & column, const DataTypePtr & data_type, size_t row_num, const String & name);
+    size_t countBSONFieldSize(
+        const IColumn & column,
+        const DataTypePtr & data_type,
+        size_t row_num,
+        const String & name,
+        const String & path,
+        std::unordered_map<String, size_t> & nested_document_sizes);

    NamesAndTypes fields;
    FormatSettings settings;
--- a/tests/queries/0_stateless/02475_bson_each_row_format.reference
+++ b/tests/queries/0_stateless/02475_bson_each_row_format.reference
@ -121,7 +121,7 @@ Nullable
 2
 0
 4
-FAIL
+OK
 null	Nullable(Int64)					
 0
 \N
@ -191,8 +191,11 @@ tuple	Tuple(Nullable(Int64), Nullable(String))
 (3,'Hello')
 (4,'Hello')
 Map
-OK
-OK
+{1:0,2:1}
+{1:1,2:2}
+{1:2,2:3}
+{1:3,2:4}
+{1:4,2:5}
 {'a':0,'b':1}
 {'a':1,'b':2}
 {'a':2,'b':3}
--- a/tests/queries/0_stateless/02475_bson_each_row_format.sh
+++ b/tests/queries/0_stateless/02475_bson_each_row_format.sh
@ -88,7 +88,7 @@ echo "Nullable"
 $CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'null Nullable(UInt32)') select number % 2 ? NULL : number from numbers(5) settings engine_file_truncate_on_insert=1"
 $CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'null Nullable(UInt32)')"
 $CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'null UInt32')"
-$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'null UInt32') settings input_format_null_as_default=0" 2>&1 | grep -q -F "INCORRECT_DATA" && echo "OK" || echo "FAIL"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'null UInt32') settings input_format_null_as_default=0" 2>&1 | grep -q -F "ILLEGAL_COLUMN" && echo "OK" || echo "FAIL"

 $CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)"
 $CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)"
@ -132,10 +132,10 @@ $CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)"


 echo "Map"
-$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'map Map(UInt64, UInt64)') select map(1, number, 2, number + 1) from numbers(5) settings engine_file_truncate_on_insert=1" 2>&1 | grep -q -F "ILLEGAL_COLUMN" && echo "OK" || echo "FAIL"
-$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'map Map(String, UInt64)') select map('a', number, 'b', number + 1) from numbers(5) settings engine_file_truncate_on_insert=1"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'map Map(UInt64, UInt64)') select map(1, number, 2, number + 1) from numbers(5) settings engine_file_truncate_on_insert=1"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'map Map(UInt64, UInt64)')"

-$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'map Map(UInt64, UInt64)')" 2>&1 | grep -q -F "ILLEGAL_COLUMN" && echo "OK" || echo "FAIL"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'map Map(String, UInt64)') select map('a', number, 'b', number + 1) from numbers(5) settings engine_file_truncate_on_insert=1"
 $CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'map Map(String, UInt64)')"

 $CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)"
--- a/tests/queries/0_stateless/02591_bson_long_tuple.reference
+++ b/tests/queries/0_stateless/02591_bson_long_tuple.reference
--- a/tests/queries/0_stateless/02593_bson_more_types.reference
+++ b/tests/queries/0_stateless/02593_bson_more_types.reference
@ -0,0 +1,5 @@
+{'a\\u0000b':42}
+c1	Nullable(Int32)					
+c2	Nullable(Int32)					
+c3	Map(String, Nullable(Int32))					
+a	b	{42:42}
--- a/tests/queries/0_stateless/02593_bson_more_types.sh
+++ b/tests/queries/0_stateless/02593_bson_more_types.sh
@ -0,0 +1,15 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+
+$CLICKHOUSE_LOCAL -q "select map('a\0b', 42) as c1 format BSONEachRow" | $CLICKHOUSE_LOCAL --input-format BSONEachRow --table test --structure "c1 Map(String, UInt32)" -q "select * from test"
+
+$CLICKHOUSE_LOCAL -q "select 'a'::Enum8('a' = 1) as c1, 'b'::Enum16('b' = 1) as c2, map(42, 42) as c3 format BSONEachRow" | $CLICKHOUSE_LOCAL --input-format BSONEachRow --table test -q "desc test"
+
+$CLICKHOUSE_LOCAL -q "select 'a'::Enum8('a' = 1) as c1, 'b'::Enum16('b' = 1) as c2, map(42, 42) as c3 format BSONEachRow" | $CLICKHOUSE_LOCAL --input-format BSONEachRow --table test --structure "c1 Enum8('a' = 1), c2 Enum16('b' = 1), c3 Map(UInt32, UInt32)" -q "select * from test"
+
+