_parsing.py 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. # Copyright 2025-present, the HuggingFace Inc. team.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """Parsing helpers shared across modules."""
  15. import re
  16. import time
  17. RE_NUMBER_WITH_UNIT = re.compile(r"(\d+)([a-z]+)", re.IGNORECASE)
  18. BYTE_UNITS: dict[str, int] = {
  19. "k": 1_000,
  20. "m": 1_000_000,
  21. "g": 1_000_000_000,
  22. "t": 1_000_000_000_000,
  23. "p": 1_000_000_000_000_000,
  24. }
  25. TIME_UNITS: dict[str, int] = {
  26. "s": 1,
  27. "m": 60,
  28. "h": 60 * 60,
  29. "d": 24 * 60 * 60,
  30. "w": 7 * 24 * 60 * 60,
  31. "mo": 30 * 24 * 60 * 60,
  32. "y": 365 * 24 * 60 * 60,
  33. }
  34. def parse_size(value: str) -> int:
  35. """Parse a size expressed as a string with digits and unit (like `"10MB"`) to an integer (in bytes)."""
  36. return _parse_with_unit(value, BYTE_UNITS)
  37. def parse_duration(value: str) -> int:
  38. """Parse a duration expressed as a string with digits and unit (like `"10s"`) to an integer (in seconds)."""
  39. return _parse_with_unit(value, TIME_UNITS)
  40. def _parse_with_unit(value: str, units: dict[str, int]) -> int:
  41. """Parse a numeric value with optional unit."""
  42. stripped = value.strip()
  43. if not stripped:
  44. raise ValueError("Value cannot be empty.")
  45. try:
  46. return int(value)
  47. except ValueError:
  48. pass
  49. match = RE_NUMBER_WITH_UNIT.fullmatch(stripped)
  50. if not match:
  51. raise ValueError(f"Invalid value '{value}'. Must match pattern '\\d+[a-z]+' or be a plain number.")
  52. number = int(match.group(1))
  53. unit = match.group(2).lower()
  54. if unit not in units:
  55. raise ValueError(f"Unknown unit '{unit}'. Must be one of {list(units.keys())}.")
  56. return number * units[unit]
  57. def format_timesince(ts: float) -> str:
  58. """Format timestamp in seconds into a human-readable string, relative to now.
  59. Vaguely inspired by Django's `timesince` formatter.
  60. """
  61. _TIMESINCE_CHUNKS = (
  62. # Label, divider, max value
  63. ("second", 1, 60),
  64. ("minute", 60, 60),
  65. ("hour", 60 * 60, 24),
  66. ("day", 60 * 60 * 24, 6),
  67. ("week", 60 * 60 * 24 * 7, 6),
  68. ("month", 60 * 60 * 24 * 30, 11),
  69. ("year", 60 * 60 * 24 * 365, None),
  70. )
  71. delta = time.time() - ts
  72. if delta < 20:
  73. return "a few seconds ago"
  74. for label, divider, max_value in _TIMESINCE_CHUNKS: # noqa: B007
  75. value = round(delta / divider)
  76. if max_value is not None and value <= max_value:
  77. break
  78. return f"{value} {label}{'s' if value > 1 else ''} ago"