openxla
diff --git a/‎xla/array2d_test.cc
Lines changed: 14 additions & 0 deletions b/‎xla/array2d_test.cc
Lines changed: 14 additions & 0 deletions
diff --git a/‎xla/fp_util_test.cc
Lines changed: 53 additions & 0 deletions b/‎xla/fp_util_test.cc
Lines changed: 53 additions & 0 deletions
diff --git a/‎xla/hlo/builder/lib/math.cc
Lines changed: 7 additions & 4 deletions b/‎xla/hlo/builder/lib/math.cc
Lines changed: 7 additions & 4 deletions
diff --git a/‎xla/hlo/builder/lib/math_test.cc
Lines changed: 24 additions & 10 deletions b/‎xla/hlo/builder/lib/math_test.cc
Lines changed: 24 additions & 10 deletions
diff --git a/‎xla/hlo/transforms/simplifiers/float_normalization.cc
Lines changed: 3 additions & 0 deletions b/‎xla/hlo/transforms/simplifiers/float_normalization.cc
Lines changed: 3 additions & 0 deletions
diff --git a/‎xla/hlo/transforms/simplifiers/float_normalization_test.cc
Lines changed: 3 additions & 1 deletion b/‎xla/hlo/transforms/simplifiers/float_normalization_test.cc
Lines changed: 3 additions & 1 deletion
diff --git a/‎xla/mlir/utils/type_util.cc
Lines changed: 5 additions & 1 deletion b/‎xla/mlir/utils/type_util.cc
Lines changed: 5 additions & 1 deletion
diff --git a/‎xla/mlir/utils/type_util_test.cc
Lines changed: 1 addition & 0 deletions b/‎xla/mlir/utils/type_util_test.cc
Lines changed: 1 addition & 0 deletions
diff --git a/‎xla/mlir_hlo/tests/Dialect/mhlo/ops.mlir
Lines changed: 7 additions & 0 deletions b/‎xla/mlir_hlo/tests/Dialect/mhlo/ops.mlir
Lines changed: 7 additions & 0 deletions
diff --git a/‎xla/python/ifrt/dtype_test.cc
Lines changed: 15 additions & 30 deletions b/‎xla/python/ifrt/dtype_test.cc
Lines changed: 15 additions & 30 deletions
diff --git a/‎xla/service/cpu/cpu_compiler.cc
Lines changed: 2 additions & 0 deletions b/‎xla/service/cpu/cpu_compiler.cc
Lines changed: 2 additions & 0 deletions
diff --git a/‎xla/service/cpu/onednn_memory_util.h
Lines changed: 1 addition & 1 deletion b/‎xla/service/cpu/onednn_memory_util.h
Lines changed: 1 addition & 1 deletion
@@ -219,6 +219,20 @@ TEST(Array2dTest, LinspaceF8E3M4) {
   EXPECT_FLOAT_EQ(static_cast<float>((*arr)(2, 1)), 3.5);
 }
 
+TEST(Array2dTest, LinspaceF4E2M1FN) {
+  auto arr = MakeLinspaceArray2D<tsl::float4_e2m1fn>(1.0, 3.5, 3, 2);
+
+  EXPECT_EQ(arr->n1(), 3);
+  EXPECT_EQ(arr->n2(), 2);
+
+  EXPECT_FLOAT_EQ(static_cast<float>((*arr)(0, 0)), 1.0);
+  EXPECT_FLOAT_EQ(static_cast<float>((*arr)(0, 1)), 1.5);
+  EXPECT_FLOAT_EQ(static_cast<float>((*arr)(1, 0)), 2.0);
+  EXPECT_FLOAT_EQ(static_cast<float>((*arr)(1, 1)), 2.0);  // 2.5 rounded down
+  EXPECT_FLOAT_EQ(static_cast<float>((*arr)(2, 0)), 3.0);
+  EXPECT_FLOAT_EQ(static_cast<float>((*arr)(2, 1)), 4.0);  // 3.5 rounded up
+}
+
 TEST(Array2dTest, Stringification) {
   auto arr = MakeLinspaceArray2D(1.0, 3.5, 3, 2);
   const std::string expected = R"([[1, 1.5],
 
@@ -119,6 +119,59 @@ class FP8E4M3DistanceTest : public ::testing::Test {};
 using F8E4M3Types = ::testing::Types<tsl::float8_e4m3, tsl::float8_e4m3fn>;
 TYPED_TEST_SUITE(FP8E4M3DistanceTest, F8E4M3Types);
 
+TEST(FPDistanceTest, F4E2M1FNDistance) {
+  // a & b are equal
+  EXPECT_EQ(CalculateDistanceInFloats<tsl::float4_e2m1fn>(
+                tsl::float4_e2m1fn(4.0), tsl::float4_e2m1fn(4.0)),
+            0);
+
+  // a & b have the same exponents
+  EXPECT_EQ(CalculateDistanceInFloats<tsl::float4_e2m1fn>(
+                tsl::float4_e2m1fn(4.0), tsl::float4_e2m1fn(6.0)),
+            1);
+
+  // a & b have different exponents
+  EXPECT_EQ(CalculateDistanceInFloats<tsl::float4_e2m1fn>(
+                tsl::float4_e2m1fn(2.0), tsl::float4_e2m1fn(4.0)),
+            2);
+
+  // 1 from 0 in the positive direction
+  EXPECT_EQ(CalculateDistanceInFloats<tsl::float4_e2m1fn>(
+                std::numeric_limits<tsl::float4_e2m1fn>::denorm_min(),
+                tsl::float4_e2m1fn(0)),
+            1);
+
+  // 1 from 0 in the negative direction
+  EXPECT_EQ(CalculateDistanceInFloats<tsl::float4_e2m1fn>(
+                -std::numeric_limits<tsl::float4_e2m1fn>::denorm_min(),
+                tsl::float4_e2m1fn(0)),
+            1);
+
+  // a & b have different signs
+  EXPECT_EQ(CalculateDistanceInFloats<tsl::float4_e2m1fn>(
+                -std::numeric_limits<tsl::float4_e2m1fn>::denorm_min(),
+                std::numeric_limits<tsl::float4_e2m1fn>::denorm_min()),
+            2);
+
+  // 1 non denorm from 0 in the positive direction
+  EXPECT_EQ(CalculateDistanceInFloats<tsl::float4_e2m1fn>(
+                std::numeric_limits<tsl::float4_e2m1fn>::min(),
+                tsl::float4_e2m1fn(0)),
+            2);
+
+  // 1 non denorm from 0 in the negative direction
+  EXPECT_EQ(CalculateDistanceInFloats<tsl::float4_e2m1fn>(
+                -std::numeric_limits<tsl::float4_e2m1fn>::min(),
+                tsl::float4_e2m1fn(0)),
+            2);
+
+  // a & b have different signs
+  EXPECT_EQ(CalculateDistanceInFloats<tsl::float4_e2m1fn>(
+                -std::numeric_limits<tsl::float4_e2m1fn>::min(),
+                std::numeric_limits<tsl::float4_e2m1fn>::min()),
+            4);
+}
+
 TEST(FPDistanceTest, F8E3M4Distance) {
   // a & b are equal
   EXPECT_EQ(CalculateDistanceInFloats<tsl::float8_e3m4>(tsl::float8_e3m4(8.0),
 
@@ -184,6 +184,7 @@ XlaOp IsNegZero(XlaOp operand) {
       case F32:
         return Eq(BitcastConvertType(operand, U32),
                   ConstantR0WithType(&b, U32, uint32_t{1} << 31));
+      case F4E2M1FN:
       case F8E3M4:
       case F8E4M3:
       case F8E5M2:
@@ -971,8 +972,9 @@ XlaOp Igamma(XlaOp a, XlaOp x) {
     TF_RETURN_IF_ERROR(EnsureOperandIsRealFp("Igamma", a));
     PrimitiveType a_x_type = a_shape.element_type();
     bool needs_upcast = false;
-    for (PrimitiveType type : {BF16, F16, F8E3M4, F8E4M3, F8E5M2, F8E4M3FN,
-                               F8E4M3B11FNUZ, F8E5M2FNUZ, F8E4M3FNUZ}) {
+    for (PrimitiveType type :
+         {BF16, F16, F4E2M1FN, F8E3M4, F8E4M3, F8E4M3B11FNUZ, F8E4M3FN,
+          F8E4M3FNUZ, F8E5M2, F8E5M2FNUZ}) {
       if (a_shape.element_type() == type) {
         needs_upcast = true;
         break;
@@ -1024,8 +1026,9 @@ XlaOp IgammaGradA(XlaOp a, XlaOp x) {
     }
     TF_RETURN_IF_ERROR(EnsureOperandIsRealFp("IgammaGradA", a));
     bool needs_upcast = false;
-    for (PrimitiveType type : {BF16, F16, F8E3M4, F8E4M3, F8E5M2, F8E4M3FN,
-                               F8E4M3B11FNUZ, F8E5M2FNUZ, F8E4M3FNUZ}) {
+    for (PrimitiveType type :
+         {BF16, F16, F4E2M1FN, F8E3M4, F8E4M3, F8E4M3B11FNUZ, F8E4M3FN,
+          F8E4M3FNUZ, F8E5M2, F8E5M2FNUZ}) {
       if (a_shape.element_type() == type) {
         needs_upcast = true;
         break;
 
@@ -95,17 +95,22 @@ class MathTypedTest : public MathTest {
     Tuple(&b, {IsFinite(x), IsInf(x), IsPosInf(x), IsNegInf(x), IsNan(x)});
 
     bool has_inf = std::numeric_limits<T>::has_infinity;
+    bool has_nan = std::numeric_limits<T>::has_quiet_NaN;
+    bool is_finite = !has_inf && !has_nan;
+    bool is_nan_only = !has_inf && has_nan;
+
     auto expected = LiteralUtil::MakeTupleOwned(
-        LiteralUtil::CreateR1<bool>(
-            {true, true, true, true, true, false, false, false, false}),
+        LiteralUtil::CreateR1<bool>({true, true, true, true, true, is_finite,
+                                     is_finite, is_finite, is_finite}),
         LiteralUtil::CreateR1<bool>({false, false, false, false, false, has_inf,
                                      has_inf, false, false}),
         LiteralUtil::CreateR1<bool>(
             {false, false, false, false, false, has_inf, false, false, false}),
         LiteralUtil::CreateR1<bool>(
             {false, false, false, false, false, false, has_inf, false, false}),
         LiteralUtil::CreateR1<bool>({false, false, false, false, false,
-                                     !has_inf, !has_inf, true, true}));
+                                     is_nan_only, is_nan_only, has_nan,
+                                     has_nan}));
     ComputeAndCompareLiteral(&b, expected, {});
   }
 
@@ -118,10 +123,11 @@ class MathTypedTest : public MathTest {
         LiteralUtil::CreateR1<T>({T{-0.0}, T{0}, T{1}, T{-1}, inf, -inf, nan}),
         &b));
 
+    bool is_mx = std::is_same_v<T, tsl::float4_e2m1fn>;
     ComputeAndCompareLiteral(
         &b,
         LiteralUtil::CreateR1<bool>(
-            {has_negative_zero_v<T>, false, false, false, false, false, false}),
+            {has_negative_zero_v<T>, false, false, false, false, false, is_mx}),
         {}, error_spec_);
   }
 
@@ -136,6 +142,9 @@ class MathTypedTest : public MathTest {
   // For good measure, we also check pow with an exponent other than 0.5.
   void TestSqrtPowInequivalence() {
     SetFastMathDisabled(true);
+    if (std::is_same_v<T, tsl::float4_e2m1fn>) {
+      GTEST_SKIP() << "Skipping due to low precision";
+    }
 
     // Tests disable constant folding by default, but this test needs it
     // enabled, otherwise we don't tickle the bug we're trying to catch.
@@ -181,19 +190,24 @@ class MathTypedTest : public MathTest {
                       &b);
     Erf(x);
 
-    bool has_inf = std::numeric_limits<T>::has_infinity;
-    std::vector<T> expected = {
-        has_inf ? T(-1) : nan, has_inf ? T(1) : nan, T(-0), T(0), T(-1), T(1)};
+    bool inf_as_nan = !std::numeric_limits<T>::has_infinity &&
+                      std::numeric_limits<T>::has_quiet_NaN;
+    std::vector<T> expected = {inf_as_nan ? nan : T(-1),
+                               inf_as_nan ? nan : T(1),
+                               T(-0),
+                               T(0),
+                               T(-1),
+                               T(1)};
 
     ComputeAndCompareR1<T>(&b, expected, {}, error_spec_);
   }
 };
 
 // TODO(b/123355973): Add bfloat16 to TestTypes once it's working.
 using TestTypes =
-    ::testing::Types<tsl::float8_e3m4, tsl::float8_e4m3, tsl::float8_e4m3fnuz,
-                     tsl::float8_e4m3b11fnuz, tsl::float8_e5m2,
-                     tsl::float8_e5m2fnuz,
+    ::testing::Types<tsl::float4_e2m1fn, tsl::float8_e3m4, tsl::float8_e4m3,
+                     tsl::float8_e4m3fnuz, tsl::float8_e4m3b11fnuz,
+                     tsl::float8_e5m2, tsl::float8_e5m2fnuz,
 #ifndef XLA_BACKEND_DOES_NOT_SUPPORT_FLOAT16
                      Eigen::half,
 #endif
 
@@ -217,6 +217,9 @@ absl::Status FloatNormalizationVisitor::ChangeOutputTypeThenInsertConvertBack(
       hlo->mutable_shape(), [&](Shape* subshape, const xla::ShapeIndex& index) {
         if (subshape->element_type() == from) {
           subshape->set_element_type(to);
+          if (subshape->has_layout() && from == F4E2M1FN) {
+            subshape->mutable_layout()->set_element_size_in_bits(0);
+          }
         }
       });
   float_normalization_->UpdateLayout(hlo->mutable_shape());
 
@@ -150,7 +150,9 @@ class FloatNormalizationF8Test
       public ::testing::WithParamInterface<PrimitiveType> {};
 
 INSTANTIATE_TEST_SUITE_P(FloatNormalizationF8Suite, FloatNormalizationF8Test,
-                         ::testing::Values(F8E3M4, F8E4M3, F8E5M2));
+                         ::testing::Values(F4E2M1FN, F8E3M4, F8E4M3,
+                                           F8E4M3B11FNUZ, F8E4M3FN, F8E4M3FNUZ,
+                                           F8E5M2, F8E5M2FNUZ));
 
 TEST_F(FloatNormalizationTest, NoopIfSupported) {
   auto builder = HloComputation::Builder(TestName());
 
@@ -32,6 +32,8 @@ absl::StatusOr<mlir::Type> ConvertPrimitiveTypeToMlirType(
   switch (type) {
     case xla::PrimitiveType::PRED:
       return b.getI1Type();
+    case xla::PrimitiveType::F4E2M1FN:
+      return b.getFloat4E2M1FNType();
     case xla::PrimitiveType::F8E5M2:
       return b.getFloat8E5M2Type();
     case xla::PrimitiveType::F8E4M3:
@@ -78,7 +80,9 @@ absl::StatusOr<mlir::Type> ConvertPrimitiveTypeToMlirType(
 }
 
 xla::PrimitiveType ConvertMlirTypeToPrimitiveType(mlir::Type type) {
-  if (type.isFloat8E5M2()) {
+  if (type.isFloat4E2M1FN()) {
+    return xla::PrimitiveType::F4E2M1FN;
+  } else if (type.isFloat8E5M2()) {
     return xla::PrimitiveType::F8E5M2;
   } else if (type.isFloat8E4M3()) {
     return xla::PrimitiveType::F8E4M3;
 
@@ -101,6 +101,7 @@ INSTANTIATE_TEST_SUITE_P(
     Execute, TypeUtilTest,
     ::testing::ValuesIn(std::vector<TypeUtilTestParam>(
         {{PRED, [](mlir::Builder b) { return b.getI1Type(); }},
+         {F4E2M1FN, [](mlir::Builder b) { return b.getFloat4E2M1FNType(); }},
          {F8E5M2, [](mlir::Builder b) { return b.getFloat8E5M2Type(); }},
          {F8E4M3, [](mlir::Builder b) { return b.getFloat8E4M3Type(); }},
          {F8E4M3FN, [](mlir::Builder b) { return b.getFloat8E4M3FNType(); }},
 
@@ -6832,6 +6832,13 @@ func.func @invalid_dimension_attr(%arg0: tensor<?x?xf32, #mhlo.type_extensions<b
 
 // -----
 
+func.func @f4e2m1fn(%arg0: tensor<f16>) -> tensor<f4E2M1FN> {
+  %0 = "mhlo.convert"(%arg0) : (tensor<f16>) -> tensor<f4E2M1FN>
+  func.return %0 : tensor<f4E2M1FN>
+}
+
+// -----
+
 func.func @f8e3m4(%arg0: tensor<f16>) -> tensor<f8E3M4> {
   %0 = "mhlo.convert"(%arg0) : (tensor<f16>) -> tensor<f8E3M4>
   func.return %0 : tensor<f8E3M4>
 
@@ -66,36 +66,21 @@ TEST(DTypeTest, ByteSize) {
 TEST(DTypeTest, BitSize) {
   for (const auto& [kind, bit_size] :
        std::vector<std::tuple<DType::Kind, int>>({
-           {DType::kS2, 2},
-           {DType::kU2, 2},
-           {DType::kS4, 4},
-           {DType::kU4, 4},
-           {DType::kPred, 8},
-           {DType::kS8, 8},
-           {DType::kU8, 8},
-           {DType::kF4E2M1FN, 4},
-           {DType::kF8E3M4, 8},
-           {DType::kF8E4M3, 8},
-           {DType::kF8E4M3FN, 8},
-           {DType::kF8E4M3B11FNUZ, 8},
-           {DType::kF8E4M3FNUZ, 8},
-           {DType::kF8E5M2, 8},
-           {DType::kF8E5M2FNUZ, 8},
-           {DType::kS16, 16},
-           {DType::kU16, 16},
-           {DType::kF16, 16},
-           {DType::kBF16, 16},
-           {DType::kS32, 32},
-           {DType::kU32, 32},
-           {DType::kF32, 32},
-           {DType::kS64, 64},
-           {DType::kU64, 64},
-           {DType::kF64, 64},
-           {DType::kC64, 64},
-           {DType::kC128, 128},
-           {DType::kToken, -1},
-           {DType::kInvalid, -1},
-           {DType::kString, -1},
+           {DType::kS2, 2},         {DType::kU2, 2},
+           {DType::kS4, 4},         {DType::kU4, 4},
+           {DType::kPred, 8},       {DType::kS8, 8},
+           {DType::kU8, 8},         {DType::kF4E2M1FN, 4},
+           {DType::kF8E3M4, 8},     {DType::kF8E4M3, 8},
+           {DType::kF8E4M3FN, 8},   {DType::kF8E4M3B11FNUZ, 8},
+           {DType::kF8E4M3FNUZ, 8}, {DType::kF8E5M2, 8},
+           {DType::kF8E5M2FNUZ, 8}, {DType::kS16, 16},
+           {DType::kU16, 16},       {DType::kF16, 16},
+           {DType::kBF16, 16},      {DType::kS32, 32},
+           {DType::kU32, 32},       {DType::kF32, 32},
+           {DType::kS64, 64},       {DType::kU64, 64},
+           {DType::kF64, 64},       {DType::kC64, 64},
+           {DType::kC128, 128},     {DType::kToken, -1},
+           {DType::kInvalid, -1},   {DType::kString, -1},
        })) {
     EXPECT_EQ(DType(kind).bit_size(),
               bit_size == -1 ? std::nullopt : std::make_optional(bit_size));
 
@@ -607,6 +607,8 @@ absl::Status CpuCompiler::RunHloPassesThroughLayoutAssn(
   pipeline.AddPass<FloatNormalization>(&f8e4m3fnuz_support);
   FloatSupport f8e3m4_support(F8E3M4, F16);
   pipeline.AddPass<FloatNormalization>(&f8e3m4_support);
+  FloatSupport f4e2m1fn_support(F4E2M1FN, F16);
+  pipeline.AddPass<FloatNormalization>(&f4e2m1fn_support);
   // After canonicalization, there may be more batch dots that can be
   // simplified.
   pipeline.AddPass<BatchDotSimplification>();
 
@@ -73,7 +73,7 @@ inline dnnl::memory::data_type ToOneDnnDataType(PrimitiveType ptype) {
 
     // TODO(intel-tf): properly handle not supported types:
     // S16, S64, U16, U32, U64, C64, C128, F8E5M2, F8E4M3FN, S4, U4,
-    // F8E4M3B11FNUZ, F8E4M3, F8E3M4
+    // F8E4M3B11FNUZ, F8E4M3, F8E3M4, F4E2M1FN
     default:
       return dt::undef;
   }
Original file line number	Diff line number	Diff line change
`@@ -73,7 +73,7 @@ inline dnnl::memory::data_type ToOneDnnDataType(PrimitiveType ptype) {`
`73`	`73`
`74`	`74`	`// TODO(intel-tf): properly handle not supported types:`
`75`	`75`	`// S16, S64, U16, U32, U64, C64, C128, F8E5M2, F8E4M3FN, S4, U4,`
`76`		`- // F8E4M3B11FNUZ, F8E4M3, F8E3M4`
	`76`	`+ // F8E4M3B11FNUZ, F8E4M3, F8E3M4, F4E2M1FN`
`77`	`77`	`default:`
`78`	`78`	`return dt::undef;`
`79`	`79`	`}`