Skip to content

CSHARP-5603: Add Big Endian support in BinaryVectorReader and BinaryVectorWriter #1682

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
30 changes: 30 additions & 0 deletions src/MongoDB.Bson/IO/BinaryPrimitivesCompat.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,35 @@ public static void WriteDoubleLittleEndian(Span<byte> destination, double value)
{
BinaryPrimitives.WriteInt64LittleEndian(destination, BitConverter.DoubleToInt64Bits(value));
}
public static float ReadSingleLittleEndian(ReadOnlySpan<byte> source)
{
if (source.Length < 4)
{
throw new ArgumentOutOfRangeException(nameof(source), "Source span is too small to contain a float.");
}

int intValue =
source[0] |
(source[1] << 8) |
(source[2] << 16) |
(source[3] << 24);

return BitConverter.Int32BitsToSingle(intValue);
}

public static void WriteSingleLittleEndian(Span<byte> destination, float value)
{
if (destination.Length < 4)
{
throw new ArgumentOutOfRangeException(nameof(destination), "Destination span is too small to hold a float.");
}

int intValue = BitConverter.SingleToInt32Bits(value);
destination[0] = (byte)(intValue);
destination[1] = (byte)(intValue >> 8);
destination[2] = (byte)(intValue >> 16);
destination[3] = (byte)(intValue >> 24);
}

}
}
24 changes: 15 additions & 9 deletions src/MongoDB.Bson/Serialization/BinaryVectorReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
using System.Collections.Generic;
using System.Linq;
using System.Runtime.InteropServices;
using MongoDB.Bson.IO;

namespace MongoDB.Bson.Serialization
{
Expand Down Expand Up @@ -47,15 +48,10 @@ public static (TItem[] Items, byte Padding, BinaryVectorDataType VectorDataType)
throw new FormatException("Data length of binary vector of type Float32 must be a multiple of 4 bytes.");
}

if (BitConverter.IsLittleEndian)
{
var singles = MemoryMarshal.Cast<byte, float>(vectorDataBytes.Span);
items = (TItem[])(object)singles.ToArray();
}
else
{
throw new NotSupportedException("Binary vector data is not supported on Big Endian architecture yet.");
}
var floatArray = BitConverter.IsLittleEndian // We need not to use this condition here, just doing to keep the little endian logic intact
? MemoryMarshal.Cast<byte, float>(vectorDataBytes.Span).ToArray()
: ToFloatArrayBigEndian(vectorDataBytes.Span);
items = (TItem[])(object)floatArray;
break;
case BinaryVectorDataType.Int8:
var itemsSpan = MemoryMarshal.Cast<byte, TItem>(vectorDataBytes.Span);
Expand Down Expand Up @@ -147,5 +143,15 @@ private static void ValidateItemTypeForBinaryVector<TItem, TItemExpectedType, TB
throw new NotSupportedException($"Expected {typeof(TItemExpectedType)} for {typeof(TBinaryVectorType)}, but found {typeof(TItem)}.");
}
}
private static float[] ToFloatArrayBigEndian(ReadOnlySpan<byte> span)
{
var count = span.Length / 4;
var result = new float[count];
for (int i = 0; i < count; i++)
{
result[i] = BinaryPrimitivesCompat.ReadSingleLittleEndian(span.Slice(i * 4, 4));
}
return result;
}
}
}
33 changes: 27 additions & 6 deletions src/MongoDB.Bson/Serialization/BinaryVectorWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

using System;
using System.Runtime.InteropServices;
using MongoDB.Bson.IO;

namespace MongoDB.Bson.Serialization
{
Expand All @@ -35,15 +36,35 @@ public static byte[] WriteToBytes<TItem>(BinaryVector<TItem> binaryVector)
public static byte[] WriteToBytes<TItem>(ReadOnlySpan<TItem> vectorData, BinaryVectorDataType binaryVectorDataType, byte padding)
where TItem : struct
{
if (!BitConverter.IsLittleEndian)
byte[] resultBytes;

switch (binaryVectorDataType)
{
throw new NotSupportedException("Binary vector data is not supported on Big Endian architecture yet.");
}
case BinaryVectorDataType.Float32:
int length = vectorData.Length * sizeof(float);
resultBytes = new byte[2 + length];
resultBytes[0] = (byte)binaryVectorDataType;
resultBytes[1] = padding;

var floatSpan = MemoryMarshal.Cast<TItem, float>(vectorData);
Span<byte> floatOutput = resultBytes.AsSpan(2);

var vectorDataBytes = MemoryMarshal.Cast<TItem, byte>(vectorData);
byte[] result = [(byte)binaryVectorDataType, padding, .. vectorDataBytes];
for (int i = 0; i < floatSpan.Length; i++)
{
BinaryPrimitivesCompat.WriteSingleLittleEndian(floatOutput, floatSpan[i]);
floatOutput = floatOutput.Slice(4);
}

return result;
return resultBytes;

case BinaryVectorDataType.Int8:
case BinaryVectorDataType.PackedBit:
var vectorDataBytes = MemoryMarshal.Cast<TItem, byte>(vectorData);
return [(byte)binaryVectorDataType, padding, .. vectorDataBytes];

default:
throw new NotSupportedException($"Binary vector serialization is not supported for {binaryVectorDataType}.");
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -365,10 +365,35 @@ private BsonBinaryData SerializeToBinaryData<TCollection>(TCollection collection
private static (T[], byte[] VectorBson) GetTestData<T>(BinaryVectorDataType dataType, int elementsCount, byte bitsPadding)
where T : struct
{
var elementsSpan = new ReadOnlySpan<T>(Enumerable.Range(0, elementsCount).Select(i => Convert.ChangeType(i, typeof(T)).As<T>()).ToArray());
byte[] vectorBsonData = [(byte)dataType, bitsPadding, .. MemoryMarshal.Cast<T, byte>(elementsSpan)];

return (elementsSpan.ToArray(), vectorBsonData);
var elementsSpan = new ReadOnlySpan<T>(
Enumerable.Range(0, elementsCount)
.Select(i => Convert.ChangeType(i, typeof(T)).As<T>())
.ToArray());
if (typeof(T) == typeof(float) && dataType == BinaryVectorDataType.Float32)
{
var buffer = new byte[2 + elementsCount * 4]; // 4 bytes per float
buffer[0] = (byte)dataType;
buffer[1] = bitsPadding;
for (int i = 0; i < elementsCount; i++)
{
var floatBytes = BitConverter.GetBytes((float)(object)elementsSpan[i]);
if (!BitConverter.IsLittleEndian)
{
Array.Reverse(floatBytes);
}
Buffer.BlockCopy(floatBytes, 0, buffer, 2 + i * 4, 4);
}
return (elementsSpan.ToArray(), buffer);
}
else if ((typeof(T) == typeof(byte) || typeof(T) == typeof(sbyte)) && (dataType == BinaryVectorDataType.Int8 || dataType == BinaryVectorDataType.PackedBit))
{
byte[] vectorBsonData = [(byte)dataType, bitsPadding, .. MemoryMarshal.Cast<T, byte>(elementsSpan)];
return (elementsSpan.ToArray(), vectorBsonData);
}
else
{
throw new NotSupportedException($"Type {typeof(T)} is not supported for data type {dataType}.");
}
}

private static (BinaryVector<T>, byte[] VectorBson) GetTestDataBinaryVector<T>(BinaryVectorDataType dataType, int elementsCount, byte bitsPadding)
Expand Down