Add comments

moskvax · moskvax · commit 07d7f2abc9cf · 2020-06-10T21:56:43.000+10:00
thanks @viirya
diff --git a/python/pyspark/sql/pandas/conversion.py b/python/pyspark/sql/pandas/conversion.py
@@ -398,6 +398,8 @@ def _create_from_pandas_with_arrow(self, pdf, schema, timezone):
                               for s in (pdf[c] for c in pdf)]
             struct = StructType()
             for name, t in zip(schema, inferred_types):
+                # nullability is not determined on types inferred by Arrow or
+                # by the non-Arrow conversion path, so default to nullable
                 struct.add(name, from_arrow_type(t), nullable=True)
             schema = struct
 
diff --git a/python/pyspark/sql/pandas/serializers.py b/python/pyspark/sql/pandas/serializers.py
@@ -164,7 +164,8 @@ def create_array(s, t):
                 s = s.astype(s.dtypes.categories.dtype)
             try:
                 mask = s.isnull()
-                # pass _ndarray_values to avoid potential failed type checks from pandas array types
+                # pass _ndarray_values to avoid erroneous failed type checks from pandas array types
+                # that do not implement __arrow_array__ (i.e. pre-1.0.0 IntegerArray)
                 array = pa.Array.from_pandas(s._ndarray_values, mask=mask, type=t,
                                              safe=self._safecheck)
             except pa.ArrowException as e: