@@ -80,6 +80,7 @@ from pandas._libs.tslibs.timestamps import Timestamp
8080
8181cnp.import_array()
8282
83+
8384cdef bint format_is_iso(f: str ):
8485 """
8586 Does format match the iso8601 set that can be handled by the C parser?
@@ -154,6 +155,77 @@ cdef dict _parse_code_table = {"y": 0,
154155 " u" : 22 }
155156
156157
158+ cdef _validate_fmt(str fmt):
159+ if " %W " in fmt or " %U " in fmt:
160+ if " %Y " not in fmt and " %y " not in fmt:
161+ raise ValueError (" Cannot use '%W ' or '%U ' without day and year" )
162+ if " %A " not in fmt and " %a " not in fmt and " %w " not in fmt:
163+ raise ValueError (" Cannot use '%W ' or '%U ' without day and year" )
164+ elif " %Z " in fmt and " %z " in fmt:
165+ raise ValueError (" Cannot parse both %Z and %z " )
166+ elif " %j " in fmt and " %G " in fmt:
167+ raise ValueError (" Day of the year directive '%j ' is not "
168+ " compatible with ISO year directive '%G '. "
169+ " Use '%Y ' instead." )
170+ elif " %G " in fmt and (
171+ " %V " not in fmt
172+ or not (
173+ " %A " in fmt
174+ or " %a " in fmt
175+ or " %w " in fmt
176+ or " %u " in fmt
177+ )
178+ ):
179+ raise ValueError (" ISO year directive '%G ' must be used with "
180+ " the ISO week directive '%V ' and a weekday "
181+ " directive '%A ', '%a ', '%w ', or '%u '." )
182+ elif " %V " in fmt and " %Y " in fmt:
183+ raise ValueError (" ISO week directive '%V ' is incompatible with "
184+ " the year directive '%Y '. Use the ISO year "
185+ " '%G ' instead." )
186+ elif " %V " in fmt and (
187+ " %G " not in fmt
188+ or not (
189+ " %A " in fmt
190+ or " %a " in fmt
191+ or " %w " in fmt
192+ or " %u " in fmt
193+ )
194+ ):
195+ raise ValueError (" ISO week directive '%V ' must be used with "
196+ " the ISO year directive '%G ' and a weekday "
197+ " directive '%A ', '%a ', '%w ', or '%u '." )
198+
199+
200+ cdef _get_format_regex(str fmt):
201+ global _TimeRE_cache, _regex_cache
202+ with _cache_lock:
203+ if _getlang() != _TimeRE_cache.locale_time.lang:
204+ _TimeRE_cache = TimeRE()
205+ _regex_cache.clear()
206+ if len (_regex_cache) > _CACHE_MAX_SIZE:
207+ _regex_cache.clear()
208+ locale_time = _TimeRE_cache.locale_time
209+ format_regex = _regex_cache.get(fmt)
210+ if not format_regex:
211+ try :
212+ format_regex = _TimeRE_cache.compile(fmt)
213+ except KeyError , err:
214+ # KeyError raised when a bad format is found; can be specified as
215+ # \\, in which case it was a stray % but with a space after it
216+ bad_directive = err.args[0 ]
217+ if bad_directive == " \\ " :
218+ bad_directive = " %"
219+ del err
220+ raise ValueError (f" '{bad_directive}' is a bad directive "
221+ f" in format '{fmt}'" )
222+ except IndexError :
223+ # IndexError only occurs when the format string is "%"
224+ raise ValueError (f" stray % i n format '{fmt}'" )
225+ _regex_cache[fmt] = format_regex
226+ return format_regex, locale_time
227+
228+
157229cdef class DatetimeParseState:
158230 def __cinit__ (self ):
159231 self .found_tz = False
@@ -221,71 +293,8 @@ def array_strptime(
221293
222294 assert is_raise or is_ignore or is_coerce
223295
224- if " %W " in fmt or " %U " in fmt:
225- if " %Y " not in fmt and " %y " not in fmt:
226- raise ValueError (" Cannot use '%W ' or '%U ' without day and year" )
227- if " %A " not in fmt and " %a " not in fmt and " %w " not in fmt:
228- raise ValueError (" Cannot use '%W ' or '%U ' without day and year" )
229- elif " %Z " in fmt and " %z " in fmt:
230- raise ValueError (" Cannot parse both %Z and %z " )
231- elif " %j " in fmt and " %G " in fmt:
232- raise ValueError (" Day of the year directive '%j ' is not "
233- " compatible with ISO year directive '%G '. "
234- " Use '%Y ' instead." )
235- elif " %G " in fmt and (
236- " %V " not in fmt
237- or not (
238- " %A " in fmt
239- or " %a " in fmt
240- or " %w " in fmt
241- or " %u " in fmt
242- )
243- ):
244- raise ValueError (" ISO year directive '%G ' must be used with "
245- " the ISO week directive '%V ' and a weekday "
246- " directive '%A ', '%a ', '%w ', or '%u '." )
247- elif " %V " in fmt and " %Y " in fmt:
248- raise ValueError (" ISO week directive '%V ' is incompatible with "
249- " the year directive '%Y '. Use the ISO year "
250- " '%G ' instead." )
251- elif " %V " in fmt and (
252- " %G " not in fmt
253- or not (
254- " %A " in fmt
255- or " %a " in fmt
256- or " %w " in fmt
257- or " %u " in fmt
258- )
259- ):
260- raise ValueError (" ISO week directive '%V ' must be used with "
261- " the ISO year directive '%G ' and a weekday "
262- " directive '%A ', '%a ', '%w ', or '%u '." )
263-
264- global _TimeRE_cache, _regex_cache
265- with _cache_lock:
266- if _getlang() != _TimeRE_cache.locale_time.lang:
267- _TimeRE_cache = TimeRE()
268- _regex_cache.clear()
269- if len (_regex_cache) > _CACHE_MAX_SIZE:
270- _regex_cache.clear()
271- locale_time = _TimeRE_cache.locale_time
272- format_regex = _regex_cache.get(fmt)
273- if not format_regex:
274- try :
275- format_regex = _TimeRE_cache.compile(fmt)
276- # KeyError raised when a bad format is found; can be specified as
277- # \\, in which case it was a stray % but with a space after it
278- except KeyError , err:
279- bad_directive = err.args[0 ]
280- if bad_directive == " \\ " :
281- bad_directive = " %"
282- del err
283- raise ValueError (f" '{bad_directive}' is a bad directive "
284- f" in format '{fmt}'" )
285- # IndexError only occurs when the format string is "%"
286- except IndexError :
287- raise ValueError (f" stray % i n format '{fmt}'" )
288- _regex_cache[fmt] = format_regex
296+ _validate_fmt(fmt)
297+ format_regex, locale_time = _get_format_regex(fmt)
289298
290299 result = np.empty(n, dtype = " M8[ns]" )
291300 iresult = result.view(" i8" )
@@ -366,8 +375,10 @@ def array_strptime(
366375 raise ValueError (f" Time data {val} is not ISO8601 format" )
367376
368377 tz = _parse_with_format(
369- val, fmt, exact, format_regex, locale_time, & iresult[i]
378+ val, fmt, exact, format_regex, locale_time, & dts
370379 )
380+ iresult[i] = npy_datetimestruct_to_datetime(NPY_FR_ns, & dts)
381+ check_dts_bounds(& dts)
371382 result_timezone[i] = tz
372383
373384 except (ValueError , OutOfBoundsDatetime) as ex:
@@ -391,10 +402,10 @@ def array_strptime(
391402
392403
393404cdef tzinfo _parse_with_format(
394- str val, str fmt, bint exact, format_regex, locale_time, int64_t * iresult
405+ str val, str fmt, bint exact, format_regex, locale_time, npy_datetimestruct * dts
395406):
407+ # Based on https://github.com/python/cpython/blob/main/Lib/_strptime.py#L293
396408 cdef:
397- npy_datetimestruct dts
398409 int year, month, day, minute, hour, second, weekday, julian
399410 int week_of_year, week_of_year_start, parse_code, ordinal
400411 int iso_week, iso_year
@@ -452,24 +463,32 @@ cdef tzinfo _parse_with_format(
452463 # value in the range of [00, 68] is in the century 2000, while
453464 # [69,99] is in the century 1900
454465 if year <= 68 :
466+ # e.g. val='May 04'; fmt='%b %y'
455467 year += 2000
456468 else :
457469 year += 1900
470+ # TODO: not reached in tests 2023-10-28
458471 elif parse_code == 1 :
472+ # e.g. val='17-10-2010 07:15:30'; fmt='%d-%m-%Y %H:%M:%S'
459473 year = int (found_dict[" Y" ])
460474 elif parse_code == 2 :
475+ # e.g. val='17-10-2010 07:15:30'; fmt='%d-%m-%Y %H:%M:%S'
461476 month = int (found_dict[" m" ])
462477 # elif group_key == 'B':
463478 elif parse_code == 3 :
479+ # e.g. val='30/December/2011'; fmt='%d/%B/%Y'
464480 month = locale_time.f_month.index(found_dict[" B" ].lower())
465481 # elif group_key == 'b':
466482 elif parse_code == 4 :
483+ # e.g. val='30/Dec/2011 00:00:00'; fmt='%d/%b/%Y %H:%M:%S'
467484 month = locale_time.a_month.index(found_dict[" b" ].lower())
468485 # elif group_key == 'd':
469486 elif parse_code == 5 :
487+ # e.g. val='17-10-2010 07:15:30'; fmt='%d-%m-%Y %H:%M:%S'
470488 day = int (found_dict[" d" ])
471489 # elif group_key == 'H':
472490 elif parse_code == 6 :
491+ # e.g. val='17-10-2010 07:15:30'; fmt='%d-%m-%Y %H:%M:%S'
473492 hour = int (found_dict[" H" ])
474493 elif parse_code == 7 :
475494 hour = int (found_dict[" I" ])
@@ -481,71 +500,101 @@ cdef tzinfo _parse_with_format(
481500 # 12 midnight == 12 AM == hour 0
482501 if hour == 12 :
483502 hour = 0
503+ # TODO: not reached in tests 2023-10-28; the implicit `else`
504+ # branch is tested with e.g.
505+ # val='Tuesday 24 Aug 2021 01:30:48 AM'
506+ # fmt='%A %d %b %Y %I:%M:%S %p'
484507 elif ampm == locale_time.am_pm[1 ]:
485508 # We're in PM so we need to add 12 to the hour unless
486509 # we're looking at 12 noon.
487510 # 12 noon == 12 PM == hour 12
488511 if hour != 12 :
512+ # e.g. val='01/10/2010 08:14 PM'; fmt='%m/%d/%Y %I:%M %p'
489513 hour += 12
514+ # TODO: the implicit `else` branch is not tested 2023-10-28
515+ # TODO: the implicit `else` branch is not reached 2023-10-28; possible?
490516 elif parse_code == 8 :
517+ # e.g. val='17-10-2010 07:15:30'; fmt='%d-%m-%Y %H:%M:%S'
491518 minute = int (found_dict[" M" ])
492519 elif parse_code == 9 :
520+ # e.g. val='17-10-2010 07:15:30'; fmt='%d-%m-%Y %H:%M:%S'
493521 second = int (found_dict[" S" ])
494522 elif parse_code == 10 :
523+ # e.g. val='10:10:10.100'; fmt='%H:%M:%S.%f'
495524 s = found_dict[" f" ]
496525 # Pad to always return nanoseconds
497526 s += " 0" * (9 - len (s))
498527 us = long (s)
499528 ns = us % 1000
500529 us = us // 1000
501530 elif parse_code == 11 :
531+ # e.g val='Tuesday 24 Aug 2021 01:30:48 AM'; fmt='%A %d %b %Y %I:%M:%S %p'
502532 weekday = locale_time.f_weekday.index(found_dict[" A" ].lower())
503533 elif parse_code == 12 :
534+ # e.g. val='Tue 24 Aug 2021 01:30:48 AM'; fmt='%a %d %b %Y %I:%M:%S %p'
504535 weekday = locale_time.a_weekday.index(found_dict[" a" ].lower())
505536 elif parse_code == 13 :
506537 weekday = int (found_dict[" w" ])
507538 if weekday == 0 :
539+ # e.g. val='2013020'; fmt='%Y%U%w'
508540 weekday = 6
509541 else :
542+ # e.g. val='2009324'; fmt='%Y%W%w'
510543 weekday -= 1
511544 elif parse_code == 14 :
545+ # e.g. val='2009164202000'; fmt='%Y%j%H%M%S'
512546 julian = int (found_dict[" j" ])
513547 elif parse_code == 15 or parse_code == 16 :
514548 week_of_year = int (found_dict[group_key])
515549 if group_key == " U" :
550+ # e.g. val='2013020'; fmt='%Y%U%w'
516551 # U starts week on Sunday.
517552 week_of_year_start = 6
518553 else :
554+ # e.g. val='2009324'; fmt='%Y%W%w'
519555 # W starts week on Monday.
520556 week_of_year_start = 0
521557 elif parse_code == 17 :
558+ # e.g. val='2011-12-30T00:00:00.000000UTC'; fmt='%Y-%m-%dT%H:%M:%S.%f%Z'
522559 tz = pytz.timezone(found_dict[" Z" ])
523560 elif parse_code == 19 :
561+ # e.g. val='March 1, 2018 12:00:00+0400'; fmt='%B %d, %Y %H:%M:%S%z'
524562 tz = parse_timezone_directive(found_dict[" z" ])
525563 elif parse_code == 20 :
564+ # e.g. val='2015-1-7'; fmt='%G-%V-%u'
526565 iso_year = int (found_dict[" G" ])
527566 elif parse_code == 21 :
567+ # e.g. val='2015-1-7'; fmt='%G-%V-%u'
528568 iso_week = int (found_dict[" V" ])
529569 elif parse_code == 22 :
570+ # e.g. val='2015-1-7'; fmt='%G-%V-%u'
530571 weekday = int (found_dict[" u" ])
531572 weekday -= 1
532573
533574 # If we know the wk of the year and what day of that wk, we can figure
534575 # out the Julian day of the year.
535576 if julian == - 1 and weekday != - 1 :
536577 if week_of_year != - 1 :
578+ # e.g. val='2013020'; fmt='%Y%U%w'
537579 week_starts_Mon = week_of_year_start == 0
538580 julian = _calc_julian_from_U_or_W(year, week_of_year, weekday,
539581 week_starts_Mon)
540582 elif iso_year != - 1 and iso_week != - 1 :
583+ # e.g. val='2015-1-7'; fmt='%G-%V-%u'
541584 year, julian = _calc_julian_from_V(iso_year, iso_week,
542585 weekday + 1 )
586+ # else:
587+ # # e.g. val='Thu Sep 25 2003'; fmt='%a %b %d %Y'
588+ # pass
589+
543590 # Cannot pre-calculate date() since can change in Julian
544591 # calculation and thus could have different value for the day of the wk
545592 # calculation.
546593 if julian == - 1 :
547594 # Need to add 1 to result since first day of the year is 1, not
548595 # 0.
596+ # We don't actually need ordinal/julian here, but need to raise
597+ # on e.g. val='2015-04-31'; fmt='%Y-%m-%d'
549598 ordinal = date(year, month, day).toordinal()
550599 julian = ordinal - date(year, 1 , 1 ).toordinal() + 1
551600 else :
@@ -557,6 +606,9 @@ cdef tzinfo _parse_with_format(
557606 month = datetime_result.month
558607 day = datetime_result.day
559608 if weekday == - 1 :
609+ # We don't actually use weekday here, but need to do this in order to
610+ # raise on y/m/d combinations
611+ # TODO: not reached in tests 2023-10-28; necessary?
560612 weekday = date(year, month, day).weekday()
561613
562614 dts.year = year
@@ -567,10 +619,6 @@ cdef tzinfo _parse_with_format(
567619 dts.sec = second
568620 dts.us = us
569621 dts.ps = ns * 1000
570-
571- iresult[0 ] = npy_datetimestruct_to_datetime(NPY_FR_ns, & dts)
572- check_dts_bounds(& dts)
573-
574622 return tz
575623
576624
0 commit comments