_re.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. # SPDX-License-Identifier: MIT
  2. # SPDX-FileCopyrightText: 2021 Taneli Hukkinen
  3. # Licensed to PSF under a Contributor Agreement.
  4. from __future__ import annotations
  5. from datetime import date, datetime, time, timedelta, timezone, tzinfo
  6. from functools import lru_cache
  7. import re
  8. TYPE_CHECKING = False
  9. if TYPE_CHECKING:
  10. from typing import Any, Final
  11. from ._types import ParseFloat
  12. _TIME_RE_STR: Final = r"""
  13. ([01][0-9]|2[0-3]) # hours
  14. :([0-5][0-9]) # minutes
  15. (?:
  16. :([0-5][0-9]) # optional seconds
  17. (?:\.([0-9]{1,6})[0-9]*)? # optional fractions of a second
  18. )?
  19. """
  20. RE_NUMBER: Final = re.compile(
  21. r"""
  22. 0
  23. (?:
  24. x[0-9A-Fa-f](?:_?[0-9A-Fa-f])* # hex
  25. |
  26. b[01](?:_?[01])* # bin
  27. |
  28. o[0-7](?:_?[0-7])* # oct
  29. )
  30. |
  31. [+-]?(?:0|[1-9](?:_?[0-9])*) # dec, integer part
  32. (?P<floatpart>
  33. (?:\.[0-9](?:_?[0-9])*)? # optional fractional part
  34. (?:[eE][+-]?[0-9](?:_?[0-9])*)? # optional exponent part
  35. )
  36. """,
  37. flags=re.VERBOSE,
  38. )
  39. RE_LOCALTIME: Final = re.compile(_TIME_RE_STR, flags=re.VERBOSE)
  40. RE_DATETIME: Final = re.compile(
  41. rf"""
  42. ([0-9]{{4}})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01]) # date, e.g. 1988-10-27
  43. (?:
  44. [Tt ]
  45. {_TIME_RE_STR}
  46. (?:([Zz])|([+-])([01][0-9]|2[0-3]):([0-5][0-9]))? # optional time offset
  47. )?
  48. """,
  49. flags=re.VERBOSE,
  50. )
  51. def match_to_datetime(match: re.Match[str]) -> datetime | date:
  52. """Convert a `RE_DATETIME` match to `datetime.datetime` or `datetime.date`.
  53. Raises ValueError if the match does not correspond to a valid date
  54. or datetime.
  55. """
  56. (
  57. year_str,
  58. month_str,
  59. day_str,
  60. hour_str,
  61. minute_str,
  62. sec_str,
  63. micros_str,
  64. zulu_time,
  65. offset_sign_str,
  66. offset_hour_str,
  67. offset_minute_str,
  68. ) = match.groups()
  69. year, month, day = int(year_str), int(month_str), int(day_str)
  70. if hour_str is None:
  71. return date(year, month, day)
  72. hour, minute = int(hour_str), int(minute_str)
  73. sec = int(sec_str) if sec_str else 0
  74. micros = int(micros_str.ljust(6, "0")) if micros_str else 0
  75. if offset_sign_str:
  76. tz: tzinfo | None = cached_tz(
  77. offset_hour_str, offset_minute_str, offset_sign_str
  78. )
  79. elif zulu_time:
  80. tz = timezone.utc
  81. else: # local date-time
  82. tz = None
  83. return datetime(year, month, day, hour, minute, sec, micros, tzinfo=tz)
  84. # No need to limit cache size. This is only ever called on input
  85. # that matched RE_DATETIME, so there is an implicit bound of
  86. # 24 (hours) * 60 (minutes) * 2 (offset direction) = 2880.
  87. @lru_cache(maxsize=None)
  88. def cached_tz(hour_str: str, minute_str: str, sign_str: str) -> timezone:
  89. sign = 1 if sign_str == "+" else -1
  90. return timezone(
  91. timedelta(
  92. hours=sign * int(hour_str),
  93. minutes=sign * int(minute_str),
  94. )
  95. )
  96. def match_to_localtime(match: re.Match[str]) -> time:
  97. hour_str, minute_str, sec_str, micros_str = match.groups()
  98. sec = int(sec_str) if sec_str else 0
  99. micros = int(micros_str.ljust(6, "0")) if micros_str else 0
  100. return time(int(hour_str), int(minute_str), sec, micros)
  101. def match_to_number(match: re.Match[str], parse_float: ParseFloat) -> Any:
  102. if match.group("floatpart"):
  103. return parse_float(match.group())
  104. return int(match.group(), 0)