Skip to content

Commit 8068e86

Browse files
committed
Make kwarg-only, update docs
changelog
1 parent 32bc8e5 commit 8068e86

File tree

5 files changed

+31
-12
lines changed

5 files changed

+31
-12
lines changed

CHANGES.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@ mast
5252

5353
- Raise informative error if ``MastMissions`` query radius is too large. [#3447]
5454

55+
- Add ``batch_size`` parameter to ``MastMissions.get_product_list``, ``Observations.get_product_list``,
56+
and ``utils.resolve_object`` to allow controlling the number of items sent in each batch request to the server.
57+
This can help avoid timeouts or connection errors for large requests. [#3454]
58+
5559
jplspec
5660
^^^^^^^
5761

astroquery/mast/missions.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,7 @@ def query_object_async(self, objectname, *, radius=3*u.arcmin, limit=5000, offse
409409
select_cols=select_cols, **criteria)
410410

411411
@class_or_instance
412-
def get_product_list_async(self, datasets, batch_size=1000):
412+
def get_product_list_async(self, datasets, *, batch_size=1000):
413413
"""
414414
Given a dataset ID or list of dataset IDs, returns a list of associated data products.
415415
@@ -434,9 +434,8 @@ def get_product_list_async(self, datasets, batch_size=1000):
434434
if isinstance(datasets, Table) or isinstance(datasets, Row):
435435
dataset_kwd = self.get_dataset_kwd()
436436
if not dataset_kwd:
437-
error_msg = (f'Dataset keyword not found for mission "{self.mission}". '
438-
'Please input dataset IDs as a string, list of strings, or `~astropy.table.Column`.')
439-
raise InvalidQueryError(error_msg)
437+
raise InvalidQueryError(f'Dataset keyword not found for mission "{self.mission}". Please input '
438+
'dataset IDs as a string, list of strings, or `~astropy.table.Column`.')
440439

441440
# Extract dataset IDs based on input type and mission
442441
if isinstance(datasets, Table):
@@ -472,7 +471,7 @@ def get_product_list_async(self, datasets, batch_size=1000):
472471
# Return a list of responses
473472
return results
474473

475-
def get_unique_product_list(self, datasets):
474+
def get_unique_product_list(self, datasets, *, batch_size=1000):
476475
"""
477476
Given a dataset ID or list of dataset IDs, returns a list of associated data products with unique
478477
filenames.
@@ -482,13 +481,16 @@ def get_unique_product_list(self, datasets):
482481
datasets : str, list, `~astropy.table.Row`, `~astropy.table.Column`, `~astropy.table.Table`
483482
Row/Table of MastMissions query results (e.g. output from `query_object`)
484483
or single/list of dataset ID(s).
484+
batch_size : int, optional
485+
Default 1000. Number of dataset IDs to include in each batch request to the server.
486+
If you experience timeouts or connection errors, consider lowering this value.
485487
486488
Returns
487489
-------
488490
unique_products : `~astropy.table.Table`
489491
Table containing products with unique URIs.
490492
"""
491-
products = self.get_product_list(datasets)
493+
products = self.get_product_list(datasets, batch_size=batch_size)
492494
unique_products = utils.remove_duplicate_products(products, 'filename')
493495
if len(unique_products) < len(products):
494496
log.info("To return all products, use `MastMissions.get_product_list`")

astroquery/mast/observations.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -504,7 +504,7 @@ def _filter_ffi_observations(self, observations):
504504
return obs_table[mask]
505505

506506
@class_or_instance
507-
def get_product_list_async(self, observations, batch_size=500):
507+
def get_product_list_async(self, observations, *, batch_size=500):
508508
"""
509509
Given a "Product Group Id" (column name obsid) returns a list of associated data products.
510510
Note that obsid is NOT the same as obs_id, and inputting obs_id values will result in
@@ -1048,7 +1048,7 @@ def get_cloud_uri(self, data_product, *, include_bucket=True, full_url=False):
10481048
# Query for product URIs
10491049
return self._cloud_connection.get_cloud_uri(data_product, include_bucket, full_url)
10501050

1051-
def get_unique_product_list(self, observations):
1051+
def get_unique_product_list(self, observations, *, batch_size=500):
10521052
"""
10531053
Given a "Product Group Id" (column name obsid), returns a list of associated data products with
10541054
unique dataURIs. Note that obsid is NOT the same as obs_id, and inputting obs_id values will result in
@@ -1060,13 +1060,16 @@ def get_unique_product_list(self, observations):
10601060
Row/Table of MAST query results (e.g. output from `query_object`)
10611061
or single/list of MAST Product Group Id(s) (obsid).
10621062
See description `here <https://masttest.stsci.edu/api/v0/_c_a_o_mfields.html>`__.
1063+
batch_size : int, optional
1064+
Default 500. Number of obsids to include in each batch request to the server.
1065+
If you experience timeouts or connection errors, consider lowering this value.
10631066
10641067
Returns
10651068
-------
10661069
unique_products : `~astropy.table.Table`
10671070
Table containing products with unique dataURIs.
10681071
"""
1069-
products = self.get_product_list(observations)
1072+
products = self.get_product_list(observations, batch_size=batch_size)
10701073
unique_products = utils.remove_duplicate_products(products, 'dataURI')
10711074
if len(unique_products) < len(products):
10721075
log.info("To return all products, use `Observations.get_product_list`")

docs/mast/mast_missions.rst

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,11 +203,16 @@ Each observation returned from a MAST query can have one or more associated data
203203
one or more datasets or dataset IDs, the `~astroquery.mast.MastMissionsClass.get_product_list` function
204204
will return a `~astropy.table.Table` containing the associated data products.
205205

206+
`~astroquery.mast.MastMissionsClass.get_product_list` also includes an optional ``batch_size`` parameter,
207+
which controls how many datasets are sent to the MAST service per request. This can be useful for managing
208+
memory usage or avoiding timeouts when requesting product lists for large numbers of datasets.
209+
If not provided, batch_size defaults to 1000.
210+
206211
.. doctest-remote-data::
207212
>>> datasets = missions.query_criteria(sci_pep_id=12451,
208213
... sci_instrume='ACS',
209214
... sci_hlsp='>1')
210-
>>> products = missions.get_product_list(datasets[:2])
215+
>>> products = missions.get_product_list(datasets[:2], batch_size=1000)
211216
>>> print(products[:5]) # doctest: +IGNORE_OUTPUT
212217
product_key access dataset ... category size type
213218
---------------------------- ------ --------- ... ---------- --------- -------

docs/mast/mast_obsquery.rst

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -214,17 +214,22 @@ Getting Product Lists
214214
---------------------
215215

216216
Each observation returned from a MAST query can have one or more associated data products.
217-
Given one or more observations or MAST Product Group IDs ("obsid")
217+
Given one or more observations or MAST Product Group IDs ("obsid"),
218218
`~astroquery.mast.ObservationsClass.get_product_list` will return
219219
a `~astropy.table.Table` containing the associated data products.
220220
The product fields are documented `here <https://mast.stsci.edu/api/v0/_productsfields.html>`__.
221221

222+
`~astroquery.mast.ObservationsClass.get_product_list` also includes an optional ``batch_size`` parameter,
223+
which controls how many observations are sent to the MAST service per request. This can be useful for managing
224+
memory usage or avoiding timeouts when requesting product lists for large numbers of observations.
225+
If not provided, batch_size defaults to 500.
226+
222227
.. doctest-remote-data::
223228

224229
>>> from astroquery.mast import Observations
225230
...
226231
>>> obs_table = Observations.query_criteria(objectname="M8", obs_collection=["K2", "IUE"])
227-
>>> data_products_by_obs = Observations.get_product_list(obs_table[0:2])
232+
>>> data_products_by_obs = Observations.get_product_list(obs_table[0:2], batch_size=500)
228233
>>> print(data_products_by_obs) # doctest: +IGNORE_OUTPUT
229234
obsID obs_collection dataproduct_type ... dataRights calib_level filters
230235
------ -------------- ---------------- ... ---------- ----------- -------

0 commit comments

Comments
 (0)