| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125 |
- from queue import Queue
- from threading import Lock, Thread
- from urllib.parse import quote
- from .. import constants, logging
- from . import build_hf_headers, get_session, hf_raise_for_status
- logger = logging.get_logger(__name__)
- # Telemetry is sent by a separate thread to avoid blocking the main thread.
- # A daemon thread is started once and consume tasks from the _TELEMETRY_QUEUE.
- # If the thread stops for some reason -shouldn't happen-, we restart a new one.
- _TELEMETRY_THREAD: Thread | None = None
- _TELEMETRY_THREAD_LOCK = Lock() # Lock to avoid starting multiple threads in parallel
- _TELEMETRY_QUEUE: Queue = Queue()
- def send_telemetry(
- topic: str,
- *,
- library_name: str | None = None,
- library_version: str | None = None,
- user_agent: dict | str | None = None,
- ) -> None:
- """
- Sends telemetry that helps track usage of different HF libraries.
- This usage data helps us debug issues and prioritize new features. However, we understand that not everyone wants
- to share additional information, and we respect your privacy. You can disable telemetry collection by setting the
- `HF_HUB_DISABLE_TELEMETRY=1` as environment variable. Telemetry is also disabled in offline mode (i.e. when setting
- `HF_HUB_OFFLINE=1`).
- Telemetry collection is run in a separate thread to minimize impact for the user.
- Args:
- topic (`str`):
- Name of the topic that is monitored. The topic is directly used to build the URL. If you want to monitor
- subtopics, just use "/" separation. Examples: "gradio", "transformers/examples",...
- library_name (`str`, *optional*):
- The name of the library that is making the HTTP request. Will be added to the user-agent header.
- library_version (`str`, *optional*):
- The version of the library that is making the HTTP request. Will be added to the user-agent header.
- user_agent (`str`, `dict`, *optional*):
- The user agent info in the form of a dictionary or a single string. It will be completed with information about the installed packages.
- Example:
- ```py
- >>> from huggingface_hub.utils import send_telemetry
- # Send telemetry without library information
- >>> send_telemetry("ping")
- # Send telemetry to subtopic with library information
- >>> send_telemetry("gradio/local_link", library_name="gradio", library_version="3.22.1")
- # Send telemetry with additional data
- >>> send_telemetry(
- ... topic="examples",
- ... library_name="transformers",
- ... library_version="4.26.0",
- ... user_agent={"pipeline": "text_classification", "framework": "flax"},
- ... )
- ```
- """
- if constants.is_offline_mode() or constants.HF_HUB_DISABLE_TELEMETRY:
- return
- _start_telemetry_thread() # starts thread only if doesn't exist yet
- _TELEMETRY_QUEUE.put(
- {"topic": topic, "library_name": library_name, "library_version": library_version, "user_agent": user_agent}
- )
- def _start_telemetry_thread():
- """Start a daemon thread to consume tasks from the telemetry queue.
- If the thread is interrupted, start a new one.
- """
- with _TELEMETRY_THREAD_LOCK: # avoid to start multiple threads if called concurrently
- global _TELEMETRY_THREAD
- if _TELEMETRY_THREAD is None or not _TELEMETRY_THREAD.is_alive():
- _TELEMETRY_THREAD = Thread(target=_telemetry_worker, daemon=True)
- _TELEMETRY_THREAD.start()
- def _telemetry_worker():
- """Wait for a task and consume it."""
- while True:
- kwargs = _TELEMETRY_QUEUE.get()
- _send_telemetry_in_thread(**kwargs)
- _TELEMETRY_QUEUE.task_done()
- def _send_telemetry_in_thread(
- topic: str,
- *,
- library_name: str | None = None,
- library_version: str | None = None,
- user_agent: dict | str | None = None,
- ) -> None:
- """Contains the actual data sending data to the Hub.
- This function is called directly in gradio's analytics because
- it is not possible to send telemetry from a daemon thread.
- See here: https://github.com/gradio-app/gradio/pull/8180
- Please do not rename or remove this function.
- """
- path = "/".join(quote(part) for part in topic.split("/") if len(part) > 0)
- try:
- r = get_session().head(
- f"{constants.ENDPOINT}/api/telemetry/{path}",
- headers=build_hf_headers(
- token=False, # no need to send a token for telemetry
- library_name=library_name,
- library_version=library_version,
- user_agent=user_agent,
- ),
- )
- hf_raise_for_status(r)
- except Exception as e:
- # We don't want to error in case of connection errors of any kind.
- logger.debug(f"Error while sending telemetry: {e}")
|