diff --git a/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs b/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs index ef83c201091..66ae990e5bb 100644 --- a/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs +++ b/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs @@ -14,6 +14,7 @@ */ using System; +using System.Buffers.Binary; using System.Collections.Generic; using System.Linq; using System.Runtime.InteropServices; @@ -26,7 +27,6 @@ public static BinaryVector ReadBinaryVector(ReadOnlyMemory v where TItem : struct { var (items, padding, vectorDataType) = ReadBinaryVectorAsArray(vectorData); - return CreateBinaryVector(items, padding, vectorDataType); } @@ -41,29 +41,39 @@ public static (TItem[] Items, byte Padding, BinaryVectorDataType VectorDataType) switch (vectorDataType) { case BinaryVectorDataType.Float32: - if ((vectorDataBytes.Span.Length & 3) != 0) { throw new FormatException("Data length of binary vector of type Float32 must be a multiple of 4 bytes."); } - if (BitConverter.IsLittleEndian) + if (typeof(TItem) != typeof(float)) { - var singles = MemoryMarshal.Cast(vectorDataBytes.Span); - items = (TItem[])(object)singles.ToArray(); + throw new NotSupportedException($"Expected float for Float32 vector type, but found {typeof(TItem)}."); } - else + + int count = vectorDataBytes.Length / 4; // 4 bytes per float + float[] floatArray = new float[count]; + + for (int i = 0; i < count; i++) { - throw new NotSupportedException("Binary vector data is not supported on Big Endian architecture yet."); + // Each float32 is 4 bytes. So to extract the i-th float, we slice 4 bytes from offset i * 4. Use little-endian or big-endian decoding based on platform. + floatArray[i] = BitConverter.IsLittleEndian + ? MemoryMarshal.Read(vectorDataBytes.Span.Slice(i * 4, 4)) // fast, unaligned read on little endian + : BinaryPrimitives.ReadSingleBigEndian(vectorDataBytes.Span.Slice(i * 4, 4)); // correctly reassemble 4 bytes as big-endian float } + + items = (TItem[])(object)floatArray; break; + case BinaryVectorDataType.Int8: var itemsSpan = MemoryMarshal.Cast(vectorDataBytes.Span); - items = (TItem[])(object)itemsSpan.ToArray(); + items = itemsSpan.ToArray(); break; + case BinaryVectorDataType.PackedBit: items = (TItem[])(object)vectorDataBytes.ToArray(); break; + default: throw new NotSupportedException($"Binary vector data type {vectorDataType} is not supported."); } @@ -149,3 +159,4 @@ private static void ValidateItemTypeForBinaryVector(BinaryVector binaryVector) public static byte[] WriteToBytes(ReadOnlySpan vectorData, BinaryVectorDataType binaryVectorDataType, byte padding) where TItem : struct { - if (!BitConverter.IsLittleEndian) + if (BitConverter.IsLittleEndian) { - throw new NotSupportedException("Binary vector data is not supported on Big Endian architecture yet."); + var vectorDataBytes = MemoryMarshal.Cast(vectorData); + byte[] result = [(byte)binaryVectorDataType, padding, .. vectorDataBytes]; + return result; } - var vectorDataBytes = MemoryMarshal.Cast(vectorData); - byte[] result = [(byte)binaryVectorDataType, padding, .. vectorDataBytes]; + byte[] resultBytes; + switch (binaryVectorDataType) + { + case BinaryVectorDataType.Float32: + int length = vectorData.Length * sizeof(float); + resultBytes = new byte[2 + length]; // Allocate output buffer: + resultBytes[0] = (byte)binaryVectorDataType; // - [0]: vector type + resultBytes[1] = padding; // - [1]: padding + var floatSpan = MemoryMarshal.Cast(vectorData); + Span floatOutput = resultBytes.AsSpan(2); // - [2...]: actual float data , skipping header + foreach (var value in floatSpan) + { + // Each float is 4 bytes - write in Big Endian format + BinaryPrimitives.WriteSingleBigEndian(floatOutput, value); + floatOutput = floatOutput.Slice(4); // advance to next 4-byte block + } + return resultBytes; + + case BinaryVectorDataType.Int8: + case BinaryVectorDataType.PackedBit: + var vectorDataBytes = MemoryMarshal.Cast(vectorData); + resultBytes = new byte[2 + vectorDataBytes.Length]; + resultBytes[0] = (byte)binaryVectorDataType; + resultBytes[1] = padding; + vectorDataBytes.CopyTo(resultBytes.AsSpan(2)); + return resultBytes; - return result; + default: + throw new NotSupportedException($"Binary vector serialization is not supported for {binaryVectorDataType} on Big Endian architecture yet."); + } } } } +