1212# import configurable items declared in __init__.py
1313from astroquery .linelists .cdms import conf
1414from astroquery .exceptions import InvalidQueryError , EmptyResponseError
15+ from astroquery import log
1516
1617import re
1718import string
@@ -31,7 +32,7 @@ class CDMSClass(BaseQuery):
3132 SERVER = conf .server
3233 CLASSIC_URL = conf .classic_server
3334 TIMEOUT = conf .timeout
34- MALFORMATTED_MOLECULE_LIST = ['017506 NH3-wHFS' , '028582 H2NC' , '058501 H2C2S' , '064527 HC3HCN' ]
35+ MALFORMATTED_MOLECULE_LIST = ['017506 NH3-wHFS' , '028528 H2NC' , '058501 H2C2S' , '064527 HC3HCN' ]
3536
3637 def query_lines_async (self , min_frequency , max_frequency , * ,
3738 min_strength = - 500 , molecule = 'All' ,
@@ -54,7 +55,8 @@ def query_lines_async(self, min_frequency, max_frequency, *,
5455 min_strength : int, optional
5556 Minimum strength in catalog units, the default is -500
5657
57- molecule : list, string of regex if parse_name_locally=True, optional
58+ molecule : list or string if parse_name_locally=False,
59+ string of regex if parse_name_locally=True, optional
5860 Identifiers of the molecules to search for. If this parameter
5961 is not provided the search will match any species. Default is 'All'.
6062 As a first pass, the molecule will be searched for with a direct
@@ -134,18 +136,21 @@ def query_lines_async(self, min_frequency, max_frequency, *,
134136 # changes interpretation of query
135137 self ._last_query_temperature = temperature_for_intensity
136138
137- if molecule is not None :
138- if parse_name_locally :
139- self .lookup_ids = build_lookup ()
140- luts = self .lookup_ids .find (molecule , flags )
141- if len (luts ) == 0 :
142- raise InvalidQueryError ('No matching species found. Please '
143- 'refine your search or read the Docs '
144- 'for pointers on how to search.' )
145- payload ['Molecules' ] = tuple (f"{ val :06d} { key } "
146- for key , val in luts .items ())[0 ]
147- else :
148- payload ['Molecules' ] = molecule
139+ if molecule == 'All' :
140+ payload ['Moleculesgrp' ] = 'all species'
141+ else :
142+ if molecule is not None :
143+ if parse_name_locally :
144+ self .lookup_ids = build_lookup ()
145+ luts = self .lookup_ids .find (molecule , flags )
146+ if len (luts ) == 0 :
147+ raise InvalidQueryError ('No matching species found. Please '
148+ 'refine your search or read the Docs '
149+ 'for pointers on how to search.' )
150+ payload ['Molecules' ] = tuple (f"{ val :06d} { key } "
151+ for key , val in luts .items ())[0 ]
152+ else :
153+ payload ['Molecules' ] = molecule
149154
150155 if get_query_payload :
151156 return payload
@@ -180,7 +185,7 @@ def query_lines_async(self, min_frequency, max_frequency, *,
180185 # accounts for three formats, e.g.: '058501' or 'H2C2S' or '058501 H2C2S'
181186 badlist = (self .MALFORMATTED_MOLECULE_LIST + # noqa
182187 [y for x in self .MALFORMATTED_MOLECULE_LIST for y in x .split ()])
183- if payload ['Molecules' ] in badlist :
188+ if 'Moleculesgrp' not in payload . keys () and payload ['Molecules' ] in badlist :
184189 raise ValueError (f"Molecule { payload ['Molecules' ]} is known not to comply with standard CDMS format. "
185190 f"Try get_molecule({ payload ['Molecules' ]} ) instead." )
186191
@@ -233,15 +238,32 @@ def _parse_result(self, response, *, verbose=False):
233238 soup = BeautifulSoup (response .text , 'html.parser' )
234239 text = soup .find ('pre' ).text
235240
241+ need_to_filter_bad_molecules = False
242+ for bad_molecule in self .MALFORMATTED_MOLECULE_LIST :
243+ if text .find (bad_molecule .split ()[1 ]) > - 1 :
244+ need_to_filter_bad_molecules = True
245+ break
246+ if need_to_filter_bad_molecules :
247+ text_new = ''
248+ text = text .split ('\n ' )
249+ for line in text :
250+ need_to_include_line = True
251+ for bad_molecule in self .MALFORMATTED_MOLECULE_LIST :
252+ if line .find (bad_molecule .split ()[1 ]) > - 1 :
253+ need_to_include_line = False
254+ break
255+ if need_to_include_line :
256+ text_new = text_new + '\n ' + line
257+ text = text_new
258+
236259 starts = {'FREQ' : 0 ,
237260 'ERR' : 14 ,
238261 'LGINT' : 25 ,
239262 'DR' : 36 ,
240263 'ELO' : 38 ,
241264 'GUP' : 47 ,
242- 'MOLWT' : 51 ,
243- 'TAG' : 54 ,
244- 'QNFMT' : 58 ,
265+ 'TAG' : 50 ,
266+ 'QNFMT' : 57 ,
245267 'Ju' : 61 ,
246268 'Ku' : 63 ,
247269 'vu' : 65 ,
@@ -256,39 +278,47 @@ def _parse_result(self, response, *, verbose=False):
256278 'F3l' : 83 ,
257279 'name' : 89 }
258280
259- result = ascii .read (text , header_start = None , data_start = 0 ,
260- comment = r'THIS|^\s{12,14}\d{4,6}.*' ,
261- names = list (starts .keys ()),
262- col_starts = list (starts .values ()),
263- format = 'fixed_width' , fast_reader = False )
264-
265- result ['FREQ' ].unit = u .MHz
266- result ['ERR' ].unit = u .MHz
267-
268- result ['Lab' ] = result ['MOLWT' ] < 0
269- result ['MOLWT' ] = np .abs (result ['MOLWT' ])
270- result ['MOLWT' ].unit = u .Da
271-
272- fix_keys = ['GUP' ]
273- for suf in 'ul' :
274- for qn in ('J' , 'v' , 'K' , 'F1' , 'F2' , 'F3' ):
275- qnind = qn + suf
276- fix_keys .append (qnind )
277- for key in fix_keys :
278- if not np .issubdtype (result [key ].dtype , np .integer ):
279- intcol = np .array (list (map (parse_letternumber , result [key ])),
280- dtype = int )
281- result [key ] = intcol
282-
283- # if there is a crash at this step, something went wrong with the query
284- # and the _last_query_temperature was not set. This shouldn't ever
285- # happen, but, well, I anticipate it will.
286- if self ._last_query_temperature == 0 :
287- result .rename_column ('LGINT' , 'LGAIJ' )
288- result ['LGAIJ' ].unit = u .s ** - 1
289- else :
290- result ['LGINT' ].unit = u .nm ** 2 * u .MHz
291- result ['ELO' ].unit = u .cm ** (- 1 )
281+ try :
282+ result = ascii .read (text , header_start = None , data_start = 0 ,
283+ comment = r'THIS|^\s{12,14}\d{4,6}.*' ,
284+ names = list (starts .keys ()),
285+ col_starts = list (starts .values ()),
286+ format = 'fixed_width' , fast_reader = False )
287+
288+ result ['FREQ' ].unit = u .MHz
289+ result ['ERR' ].unit = u .MHz
290+
291+ result ['MOLWT' ] = [int (x / 1e3 ) for x in result ['TAG' ]]
292+ result ['Lab' ] = result ['MOLWT' ] < 0
293+ result ['MOLWT' ] = np .abs (result ['MOLWT' ])
294+ result ['MOLWT' ].unit = u .Da
295+
296+ fix_keys = ['GUP' ]
297+ for suf in 'ul' :
298+ for qn in ('J' , 'v' , 'K' , 'F1' , 'F2' , 'F3' ):
299+ qnind = qn + suf
300+ fix_keys .append (qnind )
301+ for key in fix_keys :
302+ if not np .issubdtype (result [key ].dtype , np .integer ):
303+ intcol = np .array (list (map (parse_letternumber , result [key ])),
304+ dtype = int )
305+ result [key ] = intcol
306+
307+ # if there is a crash at this step, something went wrong with the query
308+ # and the _last_query_temperature was not set. This shouldn't ever
309+ # happen, but, well, I anticipate it will.
310+ if self ._last_query_temperature == 0 :
311+ result .rename_column ('LGINT' , 'LGAIJ' )
312+ result ['LGAIJ' ].unit = u .s ** - 1
313+ else :
314+ result ['LGINT' ].unit = u .nm ** 2 * u .MHz
315+ result ['ELO' ].unit = u .cm ** (- 1 )
316+ except ValueError as ex :
317+ # Give users a more helpful exception when parsing fails
318+ new_message = ("Failed to parse CDMS response. This may be caused by a malformed search return. "
319+ "You can check this by running `CDMS.get_molecule('<id>')` instead; if it works, the "
320+ "problem is caused by the CDMS search interface and cannot be worked around." )
321+ raise ValueError (new_message ) from ex
292322
293323 return result
294324
@@ -387,35 +417,50 @@ def tryfloat(x):
387417
388418 return result
389419
390- def get_molecule (self , molecule_id , * , cache = True ):
420+ def get_molecule (self , molecule_id , * , cache = True , return_response = False ):
391421 """
392422 Retrieve the whole molecule table for a given molecule id
423+
424+ Parameters
425+ ----------
426+ molecule_id : str
427+ The 6-digit molecule identifier as a string
428+ cache : bool
429+ Defaults to True. If set overrides global caching behavior.
430+ See :ref:`caching documentation <astroquery_cache>`.
431+ return_response : bool, optional
432+ If True, return the raw `requests.Response` object instead of parsing
433+ the response. If this is set, the response will be returned whether
434+ or not it was successful. Default is False.
393435 """
394436 if not isinstance (molecule_id , str ) or len (molecule_id ) != 6 :
395437 raise ValueError ("molecule_id should be a length-6 string of numbers" )
396438 url = f'{ self .CLASSIC_URL } /entries/c{ molecule_id } .cat'
397439 response = self ._request (method = 'GET' , url = url ,
398440 timeout = self .TIMEOUT , cache = cache )
399- result = self ._parse_cat (response )
441+
442+ if return_response :
443+ return response
444+
445+ response .raise_for_status ()
446+
447+ if 'Zero lines were found' in response .text :
448+ raise EmptyResponseError (f"Response was empty; message was '{ response .text } '." )
449+
450+ result = self ._parse_cat (response .text )
400451
401452 species_table = self .get_species_table ()
402453 result .meta = dict (species_table .loc [int (molecule_id )])
403454
404455 return result
405456
406- def _parse_cat (self , response , * , verbose = False ):
457+ def _parse_cat (self , text , * , verbose = False ):
407458 """
408459 Parse a catalog response into an `~astropy.table.Table`
409460
410461 See details in _parse_response; this is a very similar function,
411462 but the catalog responses have a slightly different format.
412463 """
413-
414- if 'Zero lines were found' in response .text :
415- raise EmptyResponseError (f"Response was empty; message was '{ response .text } '." )
416-
417- text = response .text
418-
419464 # notes about the format
420465 # [F13.4, 2F8.4, I2, F10.4, I3, I7, I4, 12I2]: FREQ, ERR, LGINT, DR, ELO, GUP, TAG, QNFMT, QN noqa
421466 # 13 21 29 31 41 44 51 55 57 59 61 63 65 67 69 71 73 75 77 79 noqa
@@ -426,21 +471,21 @@ def _parse_cat(self, response, *, verbose=False):
426471 'ELO' : 32 ,
427472 'GUP' : 42 ,
428473 'TAG' : 44 ,
429- 'QNFMT' : 52 ,
430- 'Q1' : 56 ,
431- 'Q2' : 58 ,
432- 'Q3' : 60 ,
433- 'Q4' : 62 ,
434- 'Q5' : 64 ,
435- 'Q6' : 66 ,
436- 'Q7' : 68 ,
437- 'Q8' : 70 ,
438- 'Q9' : 72 ,
439- 'Q10' : 74 ,
440- 'Q11' : 76 ,
441- 'Q12' : 78 ,
442- 'Q13' : 80 ,
443- 'Q14' : 82 ,
474+ 'QNFMT' : 51 ,
475+ 'Q1' : 55 ,
476+ 'Q2' : 57 ,
477+ 'Q3' : 59 ,
478+ 'Q4' : 61 ,
479+ 'Q5' : 63 ,
480+ 'Q6' : 65 ,
481+ 'Q7' : 67 ,
482+ 'Q8' : 69 ,
483+ 'Q9' : 71 ,
484+ 'Q10' : 73 ,
485+ 'Q11' : 75 ,
486+ 'Q12' : 77 ,
487+ 'Q13' : 79 ,
488+ 'Q14' : 81 ,
444489 }
445490
446491 result = ascii .read (text , header_start = None , data_start = 0 ,
@@ -450,7 +495,7 @@ def _parse_cat(self, response, *, verbose=False):
450495 format = 'fixed_width' , fast_reader = False )
451496
452497 # int truncates - which is what we want
453- result ['MOLWT' ] = [int (x / 1e4 ) for x in result ['TAG' ]]
498+ result ['MOLWT' ] = [int (x / 1e3 ) for x in result ['TAG' ]]
454499
455500 result ['FREQ' ].unit = u .MHz
456501 result ['ERR' ].unit = u .MHz
@@ -460,15 +505,18 @@ def _parse_cat(self, response, *, verbose=False):
460505 result ['MOLWT' ].unit = u .Da
461506
462507 fix_keys = ['GUP' ]
463- for suf in '' :
464- for qn in (f'Q{ ii } ' for ii in range (1 , 15 )):
465- qnind = qn + suf
466- fix_keys .append (qnind )
508+ for qn in (f'Q{ ii } ' for ii in range (1 , 15 )):
509+ fix_keys .append (qn )
510+ log .debug (f"fix_keys: { fix_keys } should include Q1, Q2, ..., Q14 and GUP" )
467511 for key in fix_keys :
468512 if not np .issubdtype (result [key ].dtype , np .integer ):
469513 intcol = np .array (list (map (parse_letternumber , result [key ])),
470514 dtype = int )
515+ if any (intcol == - 999999 ):
516+ intcol = np .ma .masked_where (intcol == - 999999 , intcol )
471517 result [key ] = intcol
518+ if not np .issubdtype (result [key ].dtype , np .integer ):
519+ raise ValueError (f"Failed to parse { key } as integer" )
472520
473521 result ['LGINT' ].unit = u .nm ** 2 * u .MHz
474522 result ['ELO' ].unit = u .cm ** (- 1 )
@@ -481,18 +529,23 @@ def _parse_cat(self, response, *, verbose=False):
481529
482530def parse_letternumber (st ):
483531 """
484- Parse CDMS's two-letter QNs
532+ Parse CDMS's two-letter QNs into integers.
533+
534+ Masked values are converted to -999999.
485535
486536 From the CDMS docs:
487537 "Exactly two characters are available for each quantum number. Therefore, half
488538 integer quanta are rounded up ! In addition, capital letters are used to
489- indicate quantum numbers larger than 99. E. g. A0 is 100, Z9 is 359. Small
490- types are used to signal corresponding negative quantum numbers."
539+ indicate quantum numbers larger than 99. E. g. A0 is 100, Z9 is 359. Lower case characters
540+ are used similarly to signal negative quantum numbers smaller than –9. e. g., a0 is –10, b0 is –20, etc ."
491541 """
542+ if np .ma .is_masked (st ):
543+ return - 999999
544+
492545 asc = string .ascii_lowercase
493546 ASC = string .ascii_uppercase
494- newst = '' .join (['-' + str (asc .index (x )+ 10 ) if x in asc else
495- str (ASC .index (x )+ 10 ) if x in ASC else
547+ newst = '' .join (['-' + str (( asc .index (x )+ 1 ) ) if x in asc else
548+ str (( ASC .index (x )+ 10 ) ) if x in ASC else
496549 x for x in st ])
497550 return int (newst )
498551
0 commit comments