From ff8936865e7ca12a45c5e7fd1ad77bf1c2ff5114 Mon Sep 17 00:00:00 2001 From: root Date: Mon, 5 May 2025 12:42:06 +0200 Subject: [PATCH 1/2] Add Big Endian Support for Float32 in BinaryVectorWriter.WriteToBytes() Signed-off-by: Medha Tiwari --- .../Serialization/BinaryVectorReader.cs | 25 ++++++++---- .../Serialization/BinaryVectorWriter.cs | 38 ++++++++++++++++--- 2 files changed, 50 insertions(+), 13 deletions(-) diff --git a/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs b/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs index ef83c201091..bc89ead7ee6 100644 --- a/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs +++ b/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs @@ -14,6 +14,7 @@ */ using System; +using System.Buffers.Binary; using System.Collections.Generic; using System.Linq; using System.Runtime.InteropServices; @@ -26,7 +27,6 @@ public static BinaryVector ReadBinaryVector(ReadOnlyMemory v where TItem : struct { var (items, padding, vectorDataType) = ReadBinaryVectorAsArray(vectorData); - return CreateBinaryVector(items, padding, vectorDataType); } @@ -41,29 +41,38 @@ public static (TItem[] Items, byte Padding, BinaryVectorDataType VectorDataType) switch (vectorDataType) { case BinaryVectorDataType.Float32: - if ((vectorDataBytes.Span.Length & 3) != 0) { throw new FormatException("Data length of binary vector of type Float32 must be a multiple of 4 bytes."); } - if (BitConverter.IsLittleEndian) + if (typeof(TItem) != typeof(float)) { - var singles = MemoryMarshal.Cast(vectorDataBytes.Span); - items = (TItem[])(object)singles.ToArray(); + throw new NotSupportedException($"Expected float for Float32 vector type, but found {typeof(TItem)}."); } - else + + int count = vectorDataBytes.Length / 4; + float[] floatArray = new float[count]; + + for (int i = 0; i < count; i++) { - throw new NotSupportedException("Binary vector data is not supported on Big Endian architecture yet."); + floatArray[i] = BitConverter.IsLittleEndian + ? MemoryMarshal.Read(vectorDataBytes.Span.Slice(i * 4, 4)) + : BinaryPrimitives.ReadSingleBigEndian(vectorDataBytes.Span.Slice(i * 4, 4)); } + + items = (TItem[])(object)floatArray; break; + case BinaryVectorDataType.Int8: var itemsSpan = MemoryMarshal.Cast(vectorDataBytes.Span); - items = (TItem[])(object)itemsSpan.ToArray(); + items = itemsSpan.ToArray(); break; + case BinaryVectorDataType.PackedBit: items = (TItem[])(object)vectorDataBytes.ToArray(); break; + default: throw new NotSupportedException($"Binary vector data type {vectorDataType} is not supported."); } diff --git a/src/MongoDB.Bson/Serialization/BinaryVectorWriter.cs b/src/MongoDB.Bson/Serialization/BinaryVectorWriter.cs index 0e9d5e74f6d..beea9cabe92 100644 --- a/src/MongoDB.Bson/Serialization/BinaryVectorWriter.cs +++ b/src/MongoDB.Bson/Serialization/BinaryVectorWriter.cs @@ -14,6 +14,7 @@ */ using System; +using System.Buffers.Binary; using System.Runtime.InteropServices; namespace MongoDB.Bson.Serialization @@ -35,15 +36,42 @@ public static byte[] WriteToBytes(BinaryVector binaryVector) public static byte[] WriteToBytes(ReadOnlySpan vectorData, BinaryVectorDataType binaryVectorDataType, byte padding) where TItem : struct { - if (!BitConverter.IsLittleEndian) + if (BitConverter.IsLittleEndian) { - throw new NotSupportedException("Binary vector data is not supported on Big Endian architecture yet."); + var vectorDataBytes = MemoryMarshal.Cast(vectorData); + byte[] result = [(byte)binaryVectorDataType, padding, .. vectorDataBytes]; + return result; } - var vectorDataBytes = MemoryMarshal.Cast(vectorData); - byte[] result = [(byte)binaryVectorDataType, padding, .. vectorDataBytes]; + byte[] resultBytes; + switch (binaryVectorDataType) + { + case BinaryVectorDataType.Float32: + int length = vectorData.Length * sizeof(float); + resultBytes = new byte[2 + length]; + resultBytes[0] = (byte)binaryVectorDataType; + resultBytes[1] = padding; + var floatSpan = MemoryMarshal.Cast(vectorData); + Span floatOutput = resultBytes.AsSpan(2); + foreach (var value in floatSpan) + { + BinaryPrimitives.WriteSingleBigEndian(floatOutput, value); + floatOutput = floatOutput.Slice(4); + } + return resultBytes; + + case BinaryVectorDataType.Int8: + case BinaryVectorDataType.PackedBit: + var vectorDataBytes = MemoryMarshal.Cast(vectorData); + resultBytes = new byte[2 + vectorDataBytes.Length]; + resultBytes[0] = (byte)binaryVectorDataType; + resultBytes[1] = padding; + vectorDataBytes.CopyTo(resultBytes.AsSpan(2)); + return resultBytes; - return result; + default: + throw new NotSupportedException($"Binary vector serialization is not supported for {binaryVectorDataType} on Big Endian architecture yet."); + } } } } From 2c2cae1964050a6cf39fdd1e2d21ffb9751119f0 Mon Sep 17 00:00:00 2001 From: Medha Tiwari Date: Tue, 6 May 2025 10:50:42 +0200 Subject: [PATCH 2/2] Added comments for clarity Signed-off-by: Medha Tiwari --- .../Serialization/BinaryVectorReader.cs | 8 +++++--- .../Serialization/BinaryVectorWriter.cs | 14 ++++++++------ 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs b/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs index bc89ead7ee6..66ae990e5bb 100644 --- a/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs +++ b/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs @@ -51,14 +51,15 @@ public static (TItem[] Items, byte Padding, BinaryVectorDataType VectorDataType) throw new NotSupportedException($"Expected float for Float32 vector type, but found {typeof(TItem)}."); } - int count = vectorDataBytes.Length / 4; + int count = vectorDataBytes.Length / 4; // 4 bytes per float float[] floatArray = new float[count]; for (int i = 0; i < count; i++) { + // Each float32 is 4 bytes. So to extract the i-th float, we slice 4 bytes from offset i * 4. Use little-endian or big-endian decoding based on platform. floatArray[i] = BitConverter.IsLittleEndian - ? MemoryMarshal.Read(vectorDataBytes.Span.Slice(i * 4, 4)) - : BinaryPrimitives.ReadSingleBigEndian(vectorDataBytes.Span.Slice(i * 4, 4)); + ? MemoryMarshal.Read(vectorDataBytes.Span.Slice(i * 4, 4)) // fast, unaligned read on little endian + : BinaryPrimitives.ReadSingleBigEndian(vectorDataBytes.Span.Slice(i * 4, 4)); // correctly reassemble 4 bytes as big-endian float } items = (TItem[])(object)floatArray; @@ -158,3 +159,4 @@ private static void ValidateItemTypeForBinaryVector(ReadOnlySpan vectorData, BinaryV { case BinaryVectorDataType.Float32: int length = vectorData.Length * sizeof(float); - resultBytes = new byte[2 + length]; - resultBytes[0] = (byte)binaryVectorDataType; - resultBytes[1] = padding; - var floatSpan = MemoryMarshal.Cast(vectorData); - Span floatOutput = resultBytes.AsSpan(2); + resultBytes = new byte[2 + length]; // Allocate output buffer: + resultBytes[0] = (byte)binaryVectorDataType; // - [0]: vector type + resultBytes[1] = padding; // - [1]: padding + var floatSpan = MemoryMarshal.Cast(vectorData); + Span floatOutput = resultBytes.AsSpan(2); // - [2...]: actual float data , skipping header foreach (var value in floatSpan) { + // Each float is 4 bytes - write in Big Endian format BinaryPrimitives.WriteSingleBigEndian(floatOutput, value); - floatOutput = floatOutput.Slice(4); + floatOutput = floatOutput.Slice(4); // advance to next 4-byte block } return resultBytes; @@ -75,3 +76,4 @@ public static byte[] WriteToBytes(ReadOnlySpan vectorData, BinaryV } } } +