| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208 |
- # Copyright 2022-present, the HuggingFace Inc. team.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- """Contains utilities to handle headers to send in calls to Huggingface Hub."""
- from huggingface_hub.errors import LocalTokenNotFoundError
- from .. import constants
- from ._auth import get_token
- from ._detect_agent import detect_agent
- from ._runtime import (
- get_hf_hub_version,
- get_python_version,
- get_torch_version,
- is_torch_available,
- )
- from ._validators import validate_hf_hub_args
- @validate_hf_hub_args
- def build_hf_headers(
- *,
- token: bool | str | None = None,
- library_name: str | None = None,
- library_version: str | None = None,
- user_agent: dict | str | None = None,
- headers: dict[str, str] | None = None,
- ) -> dict[str, str]:
- """
- Build headers dictionary to send in a HF Hub call.
- By default, authorization token is always provided either from argument (explicit
- use) or retrieved from the cache (implicit use). To explicitly avoid sending the
- token to the Hub, set `token=False` or set the `HF_HUB_DISABLE_IMPLICIT_TOKEN`
- environment variable.
- In case of an API call that requires write access, an error is thrown if token is
- `None` or token is an organization token (starting with `"api_org***"`).
- In addition to the auth header, a user-agent is added to provide information about
- the installed packages (versions of python, huggingface_hub, torch).
- Args:
- token (`str`, `bool`, *optional*):
- The token to be sent in authorization header for the Hub call:
- - if a string, it is used as the Hugging Face token
- - if `True`, the token is read from the machine (cache or env variable)
- - if `False`, authorization header is not set
- - if `None`, the token is read from the machine only except if
- `HF_HUB_DISABLE_IMPLICIT_TOKEN` env variable is set.
- library_name (`str`, *optional*):
- The name of the library that is making the HTTP request. Will be added to
- the user-agent header.
- library_version (`str`, *optional*):
- The version of the library that is making the HTTP request. Will be added
- to the user-agent header.
- user_agent (`str`, `dict`, *optional*):
- The user agent info in the form of a dictionary or a single string. It will
- be completed with information about the installed packages.
- headers (`dict`, *optional*):
- Additional headers to include in the request. Those headers take precedence
- over the ones generated by this function.
- Returns:
- A `dict` of headers to pass in your API call.
- Example:
- ```py
- >>> build_hf_headers(token="hf_***") # explicit token
- {"authorization": "Bearer hf_***", "user-agent": ""}
- >>> build_hf_headers(token=True) # explicitly use cached token
- {"authorization": "Bearer hf_***",...}
- >>> build_hf_headers(token=False) # explicitly don't use cached token
- {"user-agent": ...}
- >>> build_hf_headers() # implicit use of the cached token
- {"authorization": "Bearer hf_***",...}
- # HF_HUB_DISABLE_IMPLICIT_TOKEN=True # to set as env variable
- >>> build_hf_headers() # token is not sent
- {"user-agent": ...}
- >>> build_hf_headers(library_name="transformers", library_version="1.2.3")
- {"authorization": ..., "user-agent": "transformers/1.2.3; hf_hub/0.10.2; python/3.10.4; tensorflow/1.55"}
- ```
- Raises:
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- If organization token is passed and "write" access is required.
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- If "write" access is required but token is not passed and not saved locally.
- [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
- If `token=True` but token is not saved locally.
- """
- # Get auth token to send
- token_to_send = get_token_to_send(token)
- # Combine headers
- hf_headers = {
- "user-agent": _http_user_agent(
- library_name=library_name,
- library_version=library_version,
- user_agent=user_agent,
- )
- }
- if token_to_send is not None:
- hf_headers["authorization"] = f"Bearer {token_to_send}"
- if headers is not None:
- hf_headers.update(headers)
- return hf_headers
- def get_token_to_send(token: bool | str | None) -> str | None:
- """Select the token to send from either `token` or the cache."""
- # Case token is explicitly provided
- if isinstance(token, str):
- return token
- # Case token is explicitly forbidden
- if token is False:
- return None
- # Token is not provided: we get it from local cache
- cached_token = get_token()
- # Case token is explicitly required
- if token is True:
- if cached_token is None:
- raise LocalTokenNotFoundError(
- "Token is required (`token=True`), but no token found. You"
- " need to provide a token or be logged in to Hugging Face with"
- " `hf auth login` or `huggingface_hub.login`. See"
- " https://huggingface.co/settings/tokens."
- )
- return cached_token
- # Case implicit use of the token is forbidden by env variable
- if constants.HF_HUB_DISABLE_IMPLICIT_TOKEN:
- return None
- # Otherwise: we use the cached token as the user has not explicitly forbidden it
- return cached_token
- def _http_user_agent(
- *,
- library_name: str | None = None,
- library_version: str | None = None,
- user_agent: dict | str | None = None,
- ) -> str:
- """Format a user-agent string containing information about the installed packages.
- Args:
- library_name (`str`, *optional*):
- The name of the library that is making the HTTP request.
- library_version (`str`, *optional*):
- The version of the library that is making the HTTP request.
- user_agent (`str`, `dict`, *optional*):
- The user agent info in the form of a dictionary or a single string.
- Returns:
- The formatted user-agent string.
- """
- if library_name is not None:
- ua = f"{library_name}/{library_version}"
- else:
- ua = "unknown/None"
- ua += f"; hf_hub/{get_hf_hub_version()}"
- ua += f"; python/{get_python_version()}"
- if not constants.HF_HUB_DISABLE_TELEMETRY:
- if is_torch_available():
- ua += f"; torch/{get_torch_version()}"
- agent = detect_agent()
- if agent:
- ua += f"; agent/{agent}"
- if isinstance(user_agent, dict):
- ua += "; " + "; ".join(f"{k}/{v}" for k, v in user_agent.items())
- elif isinstance(user_agent, str):
- ua += "; " + user_agent
- # Retrieve user-agent origin headers from environment variable
- origin = constants.HF_HUB_USER_AGENT_ORIGIN
- if origin is not None:
- ua += "; origin/" + origin
- return _deduplicate_user_agent(ua)
- def _deduplicate_user_agent(user_agent: str) -> str:
- """Deduplicate redundant information in the generated user-agent."""
- # Split around ";" > Strip whitespaces > Store as dict keys (ensure unicity) > format back as string
- # Order is implicitly preserved by dictionary structure (see https://stackoverflow.com/a/53657523).
- return "; ".join({key.strip(): None for key in user_agent.split(";")}.keys())
|