From d268a624d58f0988499eba30b791b3e3acc1f009 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cantoineeripret=E2=80=9D?= Date: Thu, 6 Nov 2025 10:21:30 +0100 Subject: [PATCH 1/2] feat: add dry run to the read_gbq function --- pandas_gbq/gbq.py | 5 ++++- pandas_gbq/gbq_connector.py | 14 +++++++++++++- tests/system/test_gbq.py | 13 +++++++++++++ tests/unit/test_gbq.py | 7 +++++++ 4 files changed, 37 insertions(+), 2 deletions(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 880dcef9..40480a2b 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -119,6 +119,7 @@ def read_gbq( *, col_order=None, bigquery_client=None, + dry_run: bool = False, ): r"""Read data from Google BigQuery to a pandas DataFrame. @@ -269,7 +270,8 @@ def read_gbq( bigquery_client : google.cloud.bigquery.Client, optional A Google Cloud BigQuery Python Client instance. If provided, it will be used for reading data, while the project and credentials parameters will be ignored. - + dry_run : bool, default False + If True, run a dry run query. Returns ------- df: DataFrame @@ -328,6 +330,7 @@ def read_gbq( max_results=max_results, progress_bar_type=progress_bar_type, dtypes=dtypes, + dry_run=dry_run, ) else: final_df = connector.download_table( diff --git a/pandas_gbq/gbq_connector.py b/pandas_gbq/gbq_connector.py index 2b3b716e..518de452 100644 --- a/pandas_gbq/gbq_connector.py +++ b/pandas_gbq/gbq_connector.py @@ -199,7 +199,14 @@ def download_table( user_dtypes=dtypes, ) - def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs): + def run_query( + self, + query, + max_results=None, + progress_bar_type=None, + dry_run: bool = False, + **kwargs, + ): from google.cloud import bigquery job_config_dict = { @@ -235,6 +242,7 @@ def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs): self._start_timer() job_config = bigquery.QueryJobConfig.from_api_repr(job_config_dict) + job_config.dry_run = dry_run if FEATURES.bigquery_has_query_and_wait: rows_iter = pandas_gbq.query.query_and_wait_via_client_library( @@ -260,6 +268,10 @@ def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs): ) dtypes = kwargs.get("dtypes") + + if dry_run: + return rows_iter + return self._download_results( rows_iter, max_results=max_results, diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py index 1457ec30..355ee68e 100644 --- a/tests/system/test_gbq.py +++ b/tests/system/test_gbq.py @@ -656,6 +656,19 @@ def test_columns_and_col_order_raises_error(self, project_id): dialect="standard", ) + def test_read_gbq_with_dry_run(self, project_id): + query = "SELECT 1" + job = gbq.read_gbq( + query, + project_id=project_id, + credentials=self.credentials, + dialect="standard", + dry_run=True, + ) + assert job.dry_run + assert job.state == "DONE" + assert job.total_bytes_processed > 0 + class TestToGBQIntegration(object): @pytest.fixture(autouse=True, scope="function") diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index 75574820..fcbacc2a 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -937,3 +937,10 @@ def test_run_query_with_dml_query(mock_bigquery_client, mock_query_job): type(mock_query_job).destination = mock.PropertyMock(return_value=None) connector.run_query("UPDATE tablename SET value = '';") mock_bigquery_client.list_rows.assert_not_called() + + +def test_read_gbq_with_dry_run(mock_bigquery_client): + gbq.read_gbq("SELECT 1", project_id="my-project", dry_run=True) + _, kwargs = mock_bigquery_client.query.call_args + job_config = kwargs["job_config"] + assert job_config.dry_run is True From 13fbf92276b496ef1c39b12ddb21f546bc348d68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cantoineeripret=E2=80=9D?= Date: Thu, 6 Nov 2025 10:27:30 +0100 Subject: [PATCH 2/2] return the cost (in GB) if dry run is set to True --- pandas_gbq/gbq_connector.py | 2 +- tests/system/test_gbq.py | 8 ++++---- tests/unit/test_gbq.py | 6 ++++-- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/pandas_gbq/gbq_connector.py b/pandas_gbq/gbq_connector.py index 518de452..cc83e8df 100644 --- a/pandas_gbq/gbq_connector.py +++ b/pandas_gbq/gbq_connector.py @@ -270,7 +270,7 @@ def run_query( dtypes = kwargs.get("dtypes") if dry_run: - return rows_iter + return rows_iter.total_bytes_processed / 1024**3 return self._download_results( rows_iter, diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py index 355ee68e..3764cc8b 100644 --- a/tests/system/test_gbq.py +++ b/tests/system/test_gbq.py @@ -658,16 +658,16 @@ def test_columns_and_col_order_raises_error(self, project_id): def test_read_gbq_with_dry_run(self, project_id): query = "SELECT 1" - job = gbq.read_gbq( + cost = gbq.read_gbq( query, project_id=project_id, credentials=self.credentials, dialect="standard", dry_run=True, ) - assert job.dry_run - assert job.state == "DONE" - assert job.total_bytes_processed > 0 + assert isinstance(cost, float) + assert cost > 0 + class TestToGBQIntegration(object): diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index fcbacc2a..621a2448 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -939,8 +939,10 @@ def test_run_query_with_dml_query(mock_bigquery_client, mock_query_job): mock_bigquery_client.list_rows.assert_not_called() -def test_read_gbq_with_dry_run(mock_bigquery_client): - gbq.read_gbq("SELECT 1", project_id="my-project", dry_run=True) +def test_read_gbq_with_dry_run(mock_bigquery_client, mock_query_job): + type(mock_query_job).total_bytes_processed = mock.PropertyMock(return_value=12345) + cost = gbq.read_gbq("SELECT 1", project_id="my-project", dry_run=True) _, kwargs = mock_bigquery_client.query.call_args job_config = kwargs["job_config"] assert job_config.dry_run is True + assert cost == 12345 / 1024**3