googleapis
diff --git a/‎bigframes/core/blocks.py
Lines changed: 60 additions & 12 deletions b/‎bigframes/core/blocks.py
Lines changed: 60 additions & 12 deletions
diff --git a/‎bigframes/core/local_data.py
Lines changed: 3 additions & 1 deletion b/‎bigframes/core/local_data.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎bigframes/core/pyformat.py
Lines changed: 59 additions & 5 deletions b/‎bigframes/core/pyformat.py
Lines changed: 59 additions & 5 deletions
diff --git a/‎bigframes/core/tools/bigquery_schema.py
Lines changed: 48 additions & 0 deletions b/‎bigframes/core/tools/bigquery_schema.py
Lines changed: 48 additions & 0 deletions
diff --git a/‎bigframes/dataframe.py
Lines changed: 5 additions & 3 deletions b/‎bigframes/dataframe.py
Lines changed: 5 additions & 3 deletions
diff --git a/‎bigframes/dtypes.py
Lines changed: 28 additions & 1 deletion b/‎bigframes/dtypes.py
Lines changed: 28 additions & 1 deletion
@@ -154,6 +154,7 @@ def __init__(
         self._stats_cache[" ".join(self.index_columns)] = {}
         self._transpose_cache: Optional[Block] = transpose_cache
         self._view_ref: Optional[bigquery.TableReference] = None
+        self._view_ref_dry_run: Optional[bigquery.TableReference] = None
 
     @classmethod
     def from_local(
@@ -2459,19 +2460,19 @@ def is_monotonic_decreasing(
     ) -> bool:
         return self._is_monotonic(column_id, increasing=False)
 
-    def to_sql_query(
-        self, include_index: bool, enable_cache: bool = True
-    ) -> typing.Tuple[str, list[str], list[Label]]:
+    def _array_value_for_output(
+        self, *, include_index: bool
+    ) -> Tuple[bigframes.core.ArrayValue, list[str], list[Label]]:
         """
-        Compiles this DataFrame's expression tree to SQL, optionally
-        including index columns.
+        Creates the expression tree with user-visible column names, such as for
+        SQL output.
 
         Args:
             include_index (bool):
                 whether to include index columns.
 
         Returns:
-            a tuple of (sql_string, index_column_id_list, index_column_label_list).
+            a tuple of (ArrayValue, index_column_id_list, index_column_label_list).
                 If include_index is set to False, index_column_id_list and index_column_label_list
                 return empty lists.
         """
@@ -2494,25 +2495,72 @@ def to_sql_query(
             # the BigQuery unicode column name feature?
             substitutions[old_id] = new_id
 
+        return (
+            array_value.rename_columns(substitutions),
+            new_ids[: len(idx_labels)],
+            idx_labels,
+        )
+
+    def to_sql_query(
+        self, include_index: bool, enable_cache: bool = True
+    ) -> Tuple[str, list[str], list[Label]]:
+        """
+        Compiles this DataFrame's expression tree to SQL, optionally
+        including index columns.
+
+        Args:
+            include_index (bool):
+                whether to include index columns.
+
+        Returns:
+            a tuple of (sql_string, index_column_id_list, index_column_label_list).
+                If include_index is set to False, index_column_id_list and index_column_label_list
+                return empty lists.
+        """
+        array_value, idx_ids, idx_labels = self._array_value_for_output(
+            include_index=include_index
+        )
+
         # Note: this uses the sql from the executor, so is coupled tightly to execution
         # implementaton. It will reference cached tables instead of original data sources.
         # Maybe should just compile raw BFET? Depends on user intent.
-        sql = self.session._executor.to_sql(
-            array_value.rename_columns(substitutions), enable_cache=enable_cache
-        )
+        sql = self.session._executor.to_sql(array_value, enable_cache=enable_cache)
         return (
             sql,
-            new_ids[: len(idx_labels)],
+            idx_ids,
             idx_labels,
         )
 
-    def to_view(self, include_index: bool) -> bigquery.TableReference:
+    def to_placeholder_table(
+        self, include_index: bool, *, dry_run: bool = False
+    ) -> bigquery.TableReference:
         """
-        Creates a temporary BigQuery VIEW with the SQL corresponding to this block.
+        Creates a temporary BigQuery VIEW (or empty table if dry_run) with the
+        SQL corresponding to this block.
         """
         if self._view_ref is not None:
             return self._view_ref
 
+        # Prefer the real view if it exists, but since dry_run might be called
+        # many times before the real query, we cache that empty table reference
+        # with the correct schema too.
+        if dry_run:
+            if self._view_ref_dry_run is not None:
+                return self._view_ref_dry_run
+
+            # Create empty temp table with the right schema.
+            array_value, _, _ = self._array_value_for_output(
+                include_index=include_index
+            )
+            temp_table_schema = array_value.schema.to_bigquery()
+            self._view_ref_dry_run = self.session._create_temp_table(
+                schema=temp_table_schema
+            )
+            return self._view_ref_dry_run
+
+        # We shouldn't run `to_sql_query` if we have a `dry_run`, because it
+        # could cause us to make unnecessary API calls to upload local node
+        # data.
         sql, _, _ = self.to_sql_query(include_index=include_index)
         self._view_ref = self.session._create_temp_view(sql)
         return self._view_ref
 
@@ -336,7 +336,9 @@ def _adapt_arrow_array(array: pa.Array) -> tuple[pa.Array, bigframes.dtypes.Dtyp
     if target_type != array.type:
         # TODO: Maybe warn if lossy conversion?
         array = array.cast(target_type)
-    bf_type = bigframes.dtypes.arrow_dtype_to_bigframes_dtype(target_type)
+    bf_type = bigframes.dtypes.arrow_dtype_to_bigframes_dtype(
+        target_type, allow_lossless_cast=True
+    )
 
     storage_type = _get_managed_storage_type(bf_type)
     if storage_type != array.type:
 
@@ -21,10 +21,15 @@
 
 import string
 import typing
-from typing import Any, Union
+from typing import Any, Optional, Union
 
 import google.cloud.bigquery
-import google.cloud.bigquery.table
+import pandas
+
+from bigframes.core import utils
+import bigframes.core.local_data
+from bigframes.core.tools import bigquery_schema
+import bigframes.session
 
 _BQ_TABLE_TYPES = Union[
     google.cloud.bigquery.Table,
@@ -37,9 +42,51 @@ def _table_to_sql(table: _BQ_TABLE_TYPES) -> str:
     return f"`{table.project}`.`{table.dataset_id}`.`{table.table_id}`"
 
 
+def _pandas_df_to_sql_dry_run(pd_df: pandas.DataFrame) -> str:
+    # Ensure there are no duplicate column labels.
+    #
+    # Please make sure this stays in sync with the logic used to_gbq(). See
+    # bigframes.dataframe.DataFrame._prepare_export().
+    new_col_labels, new_idx_labels = utils.get_standardized_ids(
+        pd_df.columns, pd_df.index.names
+    )
+    pd_copy = pd_df.copy()
+    pd_copy.columns = pandas.Index(new_col_labels)
+    pd_copy.index.names = new_idx_labels
+
+    managed_table = bigframes.core.local_data.ManagedArrowTable.from_pandas(pd_copy)
+    bqschema = managed_table.schema.to_bigquery()
+    return bigquery_schema.to_sql_dry_run(bqschema)
+
+
+def _pandas_df_to_sql(
+    df_pd: pandas.DataFrame,
+    *,
+    name: str,
+    session: Optional[bigframes.session.Session] = None,
+    dry_run: bool = False,
+) -> str:
+    if session is None:
+        if not dry_run:
+            message = (
+                f"Can't embed pandas DataFrame {name} in a SQL "
+                "string without a bigframes session except if for a dry run."
+            )
+            raise ValueError(message)
+
+        return _pandas_df_to_sql_dry_run(df_pd)
+
+    # Use the _deferred engine to avoid loading data too often during dry run.
+    df = session.read_pandas(df_pd, write_engine="_deferred")
+    return _table_to_sql(df._to_placeholder_table(dry_run=dry_run))
+
+
 def _field_to_template_value(
     name: str,
     value: Any,
+    *,
+    session: Optional[bigframes.session.Session] = None,
+    dry_run: bool = False,
 ) -> str:
     """Convert value to something embeddable in a SQL string."""
     import bigframes.core.sql  # Avoid circular imports
@@ -51,9 +98,11 @@ def _field_to_template_value(
     if isinstance(value, table_types):
         return _table_to_sql(value)
 
-    # TODO(tswast): convert pandas DataFrame objects to gbq tables or a literals subquery.
+    if isinstance(value, pandas.DataFrame):
+        return _pandas_df_to_sql(value, session=session, dry_run=dry_run, name=name)
+
     if isinstance(value, bigframes.dataframe.DataFrame):
-        return _table_to_sql(value._to_view())
+        return _table_to_sql(value._to_placeholder_table(dry_run=dry_run))
 
     return bigframes.core.sql.simple_literal(value)
 
@@ -70,6 +119,7 @@ def _validate_type(name: str, value: Any):
         typing.get_args(_BQ_TABLE_TYPES)
         + typing.get_args(bigframes.core.sql.SIMPLE_LITERAL_TYPES)
         + (bigframes.dataframe.DataFrame,)
+        + (pandas.DataFrame,)
     )
 
     if not isinstance(value, supported_types):
@@ -91,6 +141,8 @@ def pyformat(
     sql_template: str,
     *,
     pyformat_args: dict,
+    session: Optional[bigframes.session.Session] = None,
+    dry_run: bool = False,
 ) -> str:
     """Unsafe Python-style string formatting of SQL string.
 
@@ -115,6 +167,8 @@ def pyformat(
     format_kwargs = {}
     for name in fields:
         value = pyformat_args[name]
-        format_kwargs[name] = _field_to_template_value(name, value)
+        format_kwargs[name] = _field_to_template_value(
+            name, value, session=session, dry_run=dry_run
+        )
 
     return sql_template.format(**format_kwargs)
@@ -0,0 +1,48 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Helpers for working with BigQuery SchemaFields."""
+
+from typing import Tuple
+
+import google.cloud.bigquery
+
+
+def _type_to_sql(field: google.cloud.bigquery.SchemaField):
+    """Turn the type information of the field into SQL.
+
+    Ignores the mode, since this has already been handled by _field_to_sql.
+    """
+    if field.field_type.casefold() in ("record", "struct"):
+        return _to_struct(field.fields)
+    return field.field_type
+
+
+def _field_to_sql(field: google.cloud.bigquery.SchemaField):
+    if field.mode == "REPEATED":
+        # Unlike other types, ARRAY are represented as mode="REPEATED". To get
+        # the array type, we use SchemaField object but ignore the mode.
+        return f"`{field.name}` ARRAY<{_type_to_sql(field)}>"
+
+    return f"`{field.name}` {_type_to_sql(field)}"
+
+
+def _to_struct(bqschema: Tuple[google.cloud.bigquery.SchemaField, ...]):
+    fields = [_field_to_sql(field) for field in bqschema]
+    return f"STRUCT<{', '.join(fields)}>"
+
+
+def to_sql_dry_run(bqschema: Tuple[google.cloud.bigquery.SchemaField, ...]):
+    """Create an empty table expression with the correct schema."""
+    return f"UNNEST(ARRAY<{_to_struct(bqschema)}>[])"
@@ -404,11 +404,13 @@ def _should_sql_have_index(self) -> bool:
             self.index.name is not None or len(self.index.names) > 1
         )
 
-    def _to_view(self) -> bigquery.TableReference:
+    def _to_placeholder_table(self, dry_run: bool = False) -> bigquery.TableReference:
         """Compiles this DataFrame's expression tree to SQL and saves it to a
-        (temporary) view.
+        (temporary) view or table (in the case of a dry run).
         """
-        return self._block.to_view(include_index=self._should_sql_have_index())
+        return self._block.to_placeholder_table(
+            include_index=self._should_sql_have_index(), dry_run=dry_run
+        )
 
     def _to_sql_query(
         self, include_index: bool, enable_cache: bool = True
 
@@ -444,8 +444,35 @@ def dtype_for_etype(etype: ExpressionType) -> Dtype:
     if mapping.arrow_dtype is not None
 }
 
+# Include types that aren't 1:1 to BigQuery but allowed to be loaded in to BigQuery:
+_ARROW_TO_BIGFRAMES_LOSSLESS = {
+    pa.int8(): INT_DTYPE,
+    pa.int16(): INT_DTYPE,
+    pa.int32(): INT_DTYPE,
+    pa.uint8(): INT_DTYPE,
+    pa.uint16(): INT_DTYPE,
+    pa.uint32(): INT_DTYPE,
+    # uint64 is omitted because uint64 -> BigQuery INT64 is a lossy conversion.
+    pa.float16(): FLOAT_DTYPE,
+    pa.float32(): FLOAT_DTYPE,
+    # TODO(tswast): Can we support datetime/timestamp/time with units larger
+    # than microseconds?
+}
+
+
+def arrow_dtype_to_bigframes_dtype(
+    arrow_dtype: pa.DataType, allow_lossless_cast: bool = False
+) -> Dtype:
+    """
+    Convert an arrow type into the pandas-y type used to represent it in BigFrames.
+
+    Args:
+        arrow_dtype: Arrow data type.
+        allow_lossless_cast: Allow lossless conversions, such as int32 to int64.
+    """
+    if allow_lossless_cast and arrow_dtype in _ARROW_TO_BIGFRAMES_LOSSLESS:
+        return _ARROW_TO_BIGFRAMES_LOSSLESS[arrow_dtype]
 
-def arrow_dtype_to_bigframes_dtype(arrow_dtype: pa.DataType) -> Dtype:
     if arrow_dtype in _ARROW_TO_BIGFRAMES:
         return _ARROW_TO_BIGFRAMES[arrow_dtype]