_telemetry.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. from queue import Queue
  2. from threading import Lock, Thread
  3. from urllib.parse import quote
  4. from .. import constants, logging
  5. from . import build_hf_headers, get_session, hf_raise_for_status
  6. logger = logging.get_logger(__name__)
  7. # Telemetry is sent by a separate thread to avoid blocking the main thread.
  8. # A daemon thread is started once and consume tasks from the _TELEMETRY_QUEUE.
  9. # If the thread stops for some reason -shouldn't happen-, we restart a new one.
  10. _TELEMETRY_THREAD: Thread | None = None
  11. _TELEMETRY_THREAD_LOCK = Lock() # Lock to avoid starting multiple threads in parallel
  12. _TELEMETRY_QUEUE: Queue = Queue()
  13. def send_telemetry(
  14. topic: str,
  15. *,
  16. library_name: str | None = None,
  17. library_version: str | None = None,
  18. user_agent: dict | str | None = None,
  19. ) -> None:
  20. """
  21. Sends telemetry that helps track usage of different HF libraries.
  22. This usage data helps us debug issues and prioritize new features. However, we understand that not everyone wants
  23. to share additional information, and we respect your privacy. You can disable telemetry collection by setting the
  24. `HF_HUB_DISABLE_TELEMETRY=1` as environment variable. Telemetry is also disabled in offline mode (i.e. when setting
  25. `HF_HUB_OFFLINE=1`).
  26. Telemetry collection is run in a separate thread to minimize impact for the user.
  27. Args:
  28. topic (`str`):
  29. Name of the topic that is monitored. The topic is directly used to build the URL. If you want to monitor
  30. subtopics, just use "/" separation. Examples: "gradio", "transformers/examples",...
  31. library_name (`str`, *optional*):
  32. The name of the library that is making the HTTP request. Will be added to the user-agent header.
  33. library_version (`str`, *optional*):
  34. The version of the library that is making the HTTP request. Will be added to the user-agent header.
  35. user_agent (`str`, `dict`, *optional*):
  36. The user agent info in the form of a dictionary or a single string. It will be completed with information about the installed packages.
  37. Example:
  38. ```py
  39. >>> from huggingface_hub.utils import send_telemetry
  40. # Send telemetry without library information
  41. >>> send_telemetry("ping")
  42. # Send telemetry to subtopic with library information
  43. >>> send_telemetry("gradio/local_link", library_name="gradio", library_version="3.22.1")
  44. # Send telemetry with additional data
  45. >>> send_telemetry(
  46. ... topic="examples",
  47. ... library_name="transformers",
  48. ... library_version="4.26.0",
  49. ... user_agent={"pipeline": "text_classification", "framework": "flax"},
  50. ... )
  51. ```
  52. """
  53. if constants.is_offline_mode() or constants.HF_HUB_DISABLE_TELEMETRY:
  54. return
  55. _start_telemetry_thread() # starts thread only if doesn't exist yet
  56. _TELEMETRY_QUEUE.put(
  57. {"topic": topic, "library_name": library_name, "library_version": library_version, "user_agent": user_agent}
  58. )
  59. def _start_telemetry_thread():
  60. """Start a daemon thread to consume tasks from the telemetry queue.
  61. If the thread is interrupted, start a new one.
  62. """
  63. with _TELEMETRY_THREAD_LOCK: # avoid to start multiple threads if called concurrently
  64. global _TELEMETRY_THREAD
  65. if _TELEMETRY_THREAD is None or not _TELEMETRY_THREAD.is_alive():
  66. _TELEMETRY_THREAD = Thread(target=_telemetry_worker, daemon=True)
  67. _TELEMETRY_THREAD.start()
  68. def _telemetry_worker():
  69. """Wait for a task and consume it."""
  70. while True:
  71. kwargs = _TELEMETRY_QUEUE.get()
  72. _send_telemetry_in_thread(**kwargs)
  73. _TELEMETRY_QUEUE.task_done()
  74. def _send_telemetry_in_thread(
  75. topic: str,
  76. *,
  77. library_name: str | None = None,
  78. library_version: str | None = None,
  79. user_agent: dict | str | None = None,
  80. ) -> None:
  81. """Contains the actual data sending data to the Hub.
  82. This function is called directly in gradio's analytics because
  83. it is not possible to send telemetry from a daemon thread.
  84. See here: https://github.com/gradio-app/gradio/pull/8180
  85. Please do not rename or remove this function.
  86. """
  87. path = "/".join(quote(part) for part in topic.split("/") if len(part) > 0)
  88. try:
  89. r = get_session().head(
  90. f"{constants.ENDPOINT}/api/telemetry/{path}",
  91. headers=build_hf_headers(
  92. token=False, # no need to send a token for telemetry
  93. library_name=library_name,
  94. library_version=library_version,
  95. user_agent=user_agent,
  96. ),
  97. )
  98. hf_raise_for_status(r)
  99. except Exception as e:
  100. # We don't want to error in case of connection errors of any kind.
  101. logger.debug(f"Error while sending telemetry: {e}")