_re.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. # SPDX-License-Identifier: MIT
  2. # SPDX-FileCopyrightText: 2021 Taneli Hukkinen
  3. # Licensed to PSF under a Contributor Agreement.
  4. from __future__ import annotations
  5. from datetime import date, datetime, time, timedelta, timezone, tzinfo
  6. from functools import lru_cache
  7. import re
  8. TYPE_CHECKING = False
  9. if TYPE_CHECKING:
  10. from typing import Any, Final
  11. from ._types import ParseFloat
  12. # E.g.
  13. # - 00:32:00.999999
  14. # - 00:32:00
  15. _TIME_RE_STR: Final = (
  16. r"([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(?:\.([0-9]{1,6})[0-9]*)?"
  17. )
  18. RE_NUMBER: Final = re.compile(
  19. r"""
  20. 0
  21. (?:
  22. x[0-9A-Fa-f](?:_?[0-9A-Fa-f])* # hex
  23. |
  24. b[01](?:_?[01])* # bin
  25. |
  26. o[0-7](?:_?[0-7])* # oct
  27. )
  28. |
  29. [+-]?(?:0|[1-9](?:_?[0-9])*) # dec, integer part
  30. (?P<floatpart>
  31. (?:\.[0-9](?:_?[0-9])*)? # optional fractional part
  32. (?:[eE][+-]?[0-9](?:_?[0-9])*)? # optional exponent part
  33. )
  34. """,
  35. flags=re.VERBOSE,
  36. )
  37. RE_LOCALTIME: Final = re.compile(_TIME_RE_STR)
  38. RE_DATETIME: Final = re.compile(
  39. rf"""
  40. ([0-9]{{4}})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01]) # date, e.g. 1988-10-27
  41. (?:
  42. [Tt ]
  43. {_TIME_RE_STR}
  44. (?:([Zz])|([+-])([01][0-9]|2[0-3]):([0-5][0-9]))? # optional time offset
  45. )?
  46. """,
  47. flags=re.VERBOSE,
  48. )
  49. def match_to_datetime(match: re.Match[str]) -> datetime | date:
  50. """Convert a `RE_DATETIME` match to `datetime.datetime` or `datetime.date`.
  51. Raises ValueError if the match does not correspond to a valid date
  52. or datetime.
  53. """
  54. (
  55. year_str,
  56. month_str,
  57. day_str,
  58. hour_str,
  59. minute_str,
  60. sec_str,
  61. micros_str,
  62. zulu_time,
  63. offset_sign_str,
  64. offset_hour_str,
  65. offset_minute_str,
  66. ) = match.groups()
  67. year, month, day = int(year_str), int(month_str), int(day_str)
  68. if hour_str is None:
  69. return date(year, month, day)
  70. hour, minute, sec = int(hour_str), int(minute_str), int(sec_str)
  71. micros = int(micros_str.ljust(6, "0")) if micros_str else 0
  72. if offset_sign_str:
  73. tz: tzinfo | None = cached_tz(
  74. offset_hour_str, offset_minute_str, offset_sign_str
  75. )
  76. elif zulu_time:
  77. tz = timezone.utc
  78. else: # local date-time
  79. tz = None
  80. return datetime(year, month, day, hour, minute, sec, micros, tzinfo=tz)
  81. # No need to limit cache size. This is only ever called on input
  82. # that matched RE_DATETIME, so there is an implicit bound of
  83. # 24 (hours) * 60 (minutes) * 2 (offset direction) = 2880.
  84. @lru_cache(maxsize=None)
  85. def cached_tz(hour_str: str, minute_str: str, sign_str: str) -> timezone:
  86. sign = 1 if sign_str == "+" else -1
  87. return timezone(
  88. timedelta(
  89. hours=sign * int(hour_str),
  90. minutes=sign * int(minute_str),
  91. )
  92. )
  93. def match_to_localtime(match: re.Match[str]) -> time:
  94. hour_str, minute_str, sec_str, micros_str = match.groups()
  95. micros = int(micros_str.ljust(6, "0")) if micros_str else 0
  96. return time(int(hour_str), int(minute_str), int(sec_str), micros)
  97. def match_to_number(match: re.Match[str], parse_float: ParseFloat) -> Any:
  98. if match.group("floatpart"):
  99. return parse_float(match.group())
  100. return int(match.group(), 0)