Don't localize TIMESTAMP columns if they are already tz-aware.

tswast · tswast · commit 65d31ebccffa · 2019-04-03T15:47:51.000-07:00
diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
@@ -12,7 +12,6 @@
 BIGQUERY_INSTALLED_VERSION = None
 SHOW_VERBOSE_DEPRECATION = False
 SHOW_PRIVATE_KEY_DEPRECATION = False
-USE_TZAWARE_TIMESTAMP = False
 PRIVATE_KEY_DEPRECATION_MESSAGE = (
     "private_key is deprecated and will be removed in a future version."
     "Use the credentials argument instead. See "
@@ -27,7 +26,7 @@
 
 
 def _check_google_client_version():
-    global BIGQUERY_INSTALLED_VERSION, SHOW_VERBOSE_DEPRECATION, SHOW_PRIVATE_KEY_DEPRECATION, USE_TZAWARE_TIMESTAMP
+    global BIGQUERY_INSTALLED_VERSION, SHOW_VERBOSE_DEPRECATION, SHOW_PRIVATE_KEY_DEPRECATION
 
     try:
         import pkg_resources
@@ -62,12 +61,6 @@ def _check_google_client_version():
     SHOW_PRIVATE_KEY_DEPRECATION = (
         pandas_installed_version >= pandas_version_with_credentials_arg
     )
-    pandas_version_supporting_tzaware_dtype = pkg_resources.parse_version(
-        "0.24.0"
-    )
-    USE_TZAWARE_TIMESTAMP = (
-        pandas_installed_version >= pandas_version_supporting_tzaware_dtype
-    )
 
 
 def _test_google_api_imports():
@@ -501,8 +494,8 @@ def run_query(self, query, **kwargs):
         if df.empty:
             df = _cast_empty_df_dtypes(schema_fields, df)
 
-        if not USE_TZAWARE_TIMESTAMP:
-            df = _localize_df(schema_fields, df)
+        # Ensure any TIMESTAMP columns are tz-aware.
+        df = _localize_df(schema_fields, df)
 
         logger.debug("Got {} rows.\n".format(rows_iter.total_rows))
         return df
@@ -662,20 +655,14 @@ def _bqschema_to_nullsafe_dtypes(schema_fields):
     See: http://pandas.pydata.org/pandas-docs/dev/missing_data.html
     #missing-data-casting-rules-and-indexing
     """
-    import pandas.api.types
-
-    # pandas doesn't support timezone-aware dtype in DataFrame/Series
-    # constructors until 0.24.0. See:
-    # https://github.com/pandas-dev/pandas/issues/25843#issuecomment-479656947
-    timestamp_dtype = "datetime64[ns]"
-    if USE_TZAWARE_TIMESTAMP:
-        timestamp_dtype = pandas.api.types.DatetimeTZDtype(unit="ns", tz="UTC")
-
     # If you update this mapping, also update the table at
     # `docs/source/reading.rst`.
     dtype_map = {
         "FLOAT": np.dtype(float),
-        "TIMESTAMP": timestamp_dtype,
+        # pandas doesn't support timezone-aware dtype in DataFrame/Series
+        # constructors. It's more idiomatic to localize after construction.
+        # https://github.com/pandas-dev/pandas/issues/25843
+        "TIMESTAMP": "datetime64[ns]",
         "TIME": "datetime64[ns]",
         "DATE": "datetime64[ns]",
         "DATETIME": "datetime64[ns]",
@@ -734,7 +721,7 @@ def _localize_df(schema_fields, df):
         if field["mode"].upper() == "REPEATED":
             continue
 
-        if field["type"].upper() == "TIMESTAMP":
+        if field["type"].upper() == "TIMESTAMP" and df[column].dt.tz is None:
             df[column] = df[column].dt.tz_localize("UTC")
 
     return df
diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py
@@ -329,9 +329,10 @@ def test_should_properly_handle_arbitrary_timestamp(self, project_id):
             {"valid_timestamp": ["2004-09-15T05:00:00.000000Z"]},
             dtype="datetime64[ns]",
         )
-        expected["valid_timestamp"] = expected[
-            "valid_timestamp"
-        ].dt.tz_localize("UTC")
+        if expected["valid_timestamp"].dt.tz is None:
+            expected["valid_timestamp"] = expected[
+                "valid_timestamp"
+            ].dt.tz_localize("UTC")
         tm.assert_frame_equal(df, expected)
 
     def test_should_properly_handle_datetime_unix_epoch(self, project_id):