monitor.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. import os
  2. import time
  3. from threading import Thread, Lock
  4. import sentry_sdk
  5. from sentry_sdk.utils import logger
  6. from typing import TYPE_CHECKING
  7. if TYPE_CHECKING:
  8. from typing import Optional
  9. MAX_DOWNSAMPLE_FACTOR = 10
  10. class Monitor:
  11. """
  12. Performs health checks in a separate thread once every interval seconds
  13. and updates the internal state. Other parts of the SDK only read this state
  14. and act accordingly.
  15. """
  16. name = "sentry.monitor"
  17. def __init__(
  18. self, transport: "sentry_sdk.transport.Transport", interval: float = 10
  19. ) -> None:
  20. self.transport: "sentry_sdk.transport.Transport" = transport
  21. self.interval: float = interval
  22. self._healthy = True
  23. self._downsample_factor: int = 0
  24. self._thread: "Optional[Thread]" = None
  25. self._thread_lock = Lock()
  26. self._thread_for_pid: "Optional[int]" = None
  27. self._running = True
  28. def _ensure_running(self) -> None:
  29. """
  30. Check that the monitor has an active thread to run in, or create one if not.
  31. Note that this might fail (e.g. in Python 3.12 it's not possible to
  32. spawn new threads at interpreter shutdown). In that case self._running
  33. will be False after running this function.
  34. """
  35. if self._thread_for_pid == os.getpid() and self._thread is not None:
  36. return None
  37. with self._thread_lock:
  38. if self._thread_for_pid == os.getpid() and self._thread is not None:
  39. return None
  40. def _thread() -> None:
  41. while self._running:
  42. time.sleep(self.interval)
  43. if self._running:
  44. self.run()
  45. thread = Thread(name=self.name, target=_thread)
  46. thread.daemon = True
  47. try:
  48. thread.start()
  49. except RuntimeError:
  50. # Unfortunately at this point the interpreter is in a state that no
  51. # longer allows us to spawn a thread and we have to bail.
  52. self._running = False
  53. return None
  54. self._thread = thread
  55. self._thread_for_pid = os.getpid()
  56. return None
  57. def run(self) -> None:
  58. self.check_health()
  59. self.set_downsample_factor()
  60. def set_downsample_factor(self) -> None:
  61. if self._healthy:
  62. if self._downsample_factor > 0:
  63. logger.debug(
  64. "[Monitor] health check positive, reverting to normal sampling"
  65. )
  66. self._downsample_factor = 0
  67. else:
  68. if self.downsample_factor < MAX_DOWNSAMPLE_FACTOR:
  69. self._downsample_factor += 1
  70. logger.debug(
  71. "[Monitor] health check negative, downsampling with a factor of %d",
  72. self._downsample_factor,
  73. )
  74. def check_health(self) -> None:
  75. """
  76. Perform the actual health checks,
  77. currently only checks if the transport is rate-limited.
  78. TODO: augment in the future with more checks.
  79. """
  80. self._healthy = self.transport.is_healthy()
  81. def is_healthy(self) -> bool:
  82. self._ensure_running()
  83. return self._healthy
  84. @property
  85. def downsample_factor(self) -> int:
  86. self._ensure_running()
  87. return self._downsample_factor
  88. def kill(self) -> None:
  89. self._running = False