download.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. # Copyright 202-present, the HuggingFace Inc. team.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """Contains command to download files from the Hub with the CLI.
  15. Usage:
  16. hf download --help
  17. # Download file
  18. hf download gpt2 config.json
  19. # Download entire repo
  20. hf download fffiloni/zeroscope --repo-type=space --revision=refs/pr/78
  21. # Download repo with filters
  22. hf download gpt2 --include="*.safetensors"
  23. # Download with token
  24. hf download Wauplin/private-model --token=hf_***
  25. # Download quietly (no progress bar, no warnings, only the returned path)
  26. hf download gpt2 config.json --quiet
  27. # Download to local dir
  28. hf download gpt2 --local-dir=./models/gpt2
  29. # Download a subfolder
  30. hf download HuggingFaceM4/FineVision art/ --repo-type=dataset
  31. """
  32. import warnings
  33. from typing import Annotated
  34. import typer
  35. from huggingface_hub._snapshot_download import snapshot_download
  36. from huggingface_hub.errors import CLIError
  37. from huggingface_hub.file_download import DryRunFileInfo, hf_hub_download
  38. from huggingface_hub.utils import _format_size
  39. from ._cli_utils import FormatWithAutoOpt, RepoIdArg, RepoTypeOpt, RevisionOpt, TokenOpt
  40. from ._output import OutputFormatWithAuto, out
  41. DOWNLOAD_EXAMPLES = [
  42. "hf download meta-llama/Llama-3.2-1B-Instruct",
  43. "hf download meta-llama/Llama-3.2-1B-Instruct config.json tokenizer.json",
  44. 'hf download meta-llama/Llama-3.2-1B-Instruct --include "*.safetensors" --exclude "*.bin"',
  45. "hf download meta-llama/Llama-3.2-1B-Instruct --local-dir ./models/llama",
  46. "hf download HuggingFaceM4/FineVision art/ --repo-type dataset",
  47. ]
  48. def download(
  49. repo_id: RepoIdArg,
  50. filenames: Annotated[
  51. list[str] | None,
  52. typer.Argument(
  53. help="Files to download (e.g. `config.json`, `data/metadata.jsonl`).",
  54. ),
  55. ] = None,
  56. repo_type: RepoTypeOpt = RepoTypeOpt.model,
  57. revision: RevisionOpt = None,
  58. include: Annotated[
  59. list[str] | None,
  60. typer.Option(
  61. help="Glob patterns to include from files to download. eg: *.json",
  62. ),
  63. ] = None,
  64. exclude: Annotated[
  65. list[str] | None,
  66. typer.Option(
  67. help="Glob patterns to exclude from files to download.",
  68. ),
  69. ] = None,
  70. cache_dir: Annotated[
  71. str | None,
  72. typer.Option(
  73. help="Directory where to save files.",
  74. ),
  75. ] = None,
  76. local_dir: Annotated[
  77. str | None,
  78. typer.Option(
  79. help="If set, the downloaded file will be placed under this directory. Check out https://huggingface.co/docs/huggingface_hub/guides/download#download-files-to-a-local-folder for more details.",
  80. ),
  81. ] = None,
  82. force_download: Annotated[
  83. bool,
  84. typer.Option(
  85. help="If True, the files will be downloaded even if they are already cached.",
  86. ),
  87. ] = False,
  88. dry_run: Annotated[
  89. bool,
  90. typer.Option(
  91. help="If True, perform a dry run without actually downloading the file.",
  92. ),
  93. ] = False,
  94. token: TokenOpt = None,
  95. max_workers: Annotated[
  96. int,
  97. typer.Option(
  98. help="Maximum number of workers to use for downloading files. Default is 8.",
  99. ),
  100. ] = 8,
  101. format: FormatWithAutoOpt = OutputFormatWithAuto.auto,
  102. ) -> None:
  103. """Download files from the Hub."""
  104. def run_download() -> str | DryRunFileInfo | list[DryRunFileInfo]:
  105. filenames_list = filenames if filenames is not None else []
  106. # Separate subfolder patterns (ending with '/') from regular filenames
  107. # Subfolders like "art/" are converted to include patterns like "art/**"
  108. subfolders = [f for f in filenames_list if f.endswith("/")]
  109. subfolder_patterns = [f"{f.rstrip('/')}/**" for f in subfolders]
  110. regular_filenames = [f for f in filenames_list if not f.endswith("/")]
  111. # Error if subfolder patterns are combined with --include/--exclude
  112. # Guide user to use --include instead of subfolder argument
  113. if len(subfolder_patterns) > 0:
  114. if include is not None and len(include) > 0:
  115. raise CLIError(
  116. f"Cannot combine subfolder argument ('{subfolders[0]}') with `--include`. "
  117. f'Please use `--include "{subfolders[0]}*"` instead.'
  118. )
  119. if exclude is not None and len(exclude) > 0:
  120. raise CLIError(
  121. f"Cannot combine subfolder argument ('{subfolders[0]}') with `--exclude`. "
  122. f'Please use `--include "{subfolders[0]}*"` with `--exclude` instead.'
  123. )
  124. # Warn user if patterns are ignored (only if regular filenames are provided)
  125. if len(regular_filenames) > 0:
  126. if include is not None and len(include) > 0:
  127. warnings.warn("Ignoring `--include` since filenames have being explicitly set.")
  128. if exclude is not None and len(exclude) > 0:
  129. warnings.warn("Ignoring `--exclude` since filenames have being explicitly set.")
  130. # Single file to download (not a subfolder): use `hf_hub_download`
  131. if len(regular_filenames) == 1 and len(subfolder_patterns) == 0:
  132. return hf_hub_download(
  133. repo_id=repo_id,
  134. repo_type=repo_type.value,
  135. revision=revision,
  136. filename=regular_filenames[0],
  137. cache_dir=cache_dir,
  138. force_download=force_download,
  139. token=token,
  140. local_dir=local_dir,
  141. library_name="huggingface-cli",
  142. dry_run=dry_run,
  143. )
  144. # Otherwise: use `snapshot_download` to ensure all files comes from same revision
  145. if len(regular_filenames) == 0 and len(subfolder_patterns) == 0:
  146. # No filenames provided: use include/exclude patterns
  147. allow_patterns = include
  148. ignore_patterns = exclude
  149. else:
  150. # Combine regular filenames and subfolder patterns as allow_patterns
  151. allow_patterns = regular_filenames + subfolder_patterns
  152. ignore_patterns = None
  153. return snapshot_download(
  154. repo_id=repo_id,
  155. repo_type=repo_type.value,
  156. revision=revision,
  157. allow_patterns=allow_patterns,
  158. ignore_patterns=ignore_patterns,
  159. force_download=force_download,
  160. cache_dir=cache_dir,
  161. token=token,
  162. local_dir=local_dir,
  163. library_name="huggingface-cli",
  164. max_workers=max_workers,
  165. dry_run=dry_run,
  166. )
  167. def _print_result(result: str | DryRunFileInfo | list[DryRunFileInfo]) -> None:
  168. if isinstance(result, str):
  169. out.result("Downloaded", path=result)
  170. return
  171. # Print dry run info
  172. if isinstance(result, DryRunFileInfo):
  173. result = [result]
  174. will_download = [r for r in result if r.will_download]
  175. out.text(
  176. f"[dry-run] Will download {len(will_download)} files"
  177. f" (out of {len(result)})"
  178. f" totalling {_format_size(sum(r.file_size for r in will_download))}."
  179. )
  180. items = [
  181. {
  182. "file": info.filename,
  183. "size": _format_size(info.file_size) if info.will_download else "-",
  184. }
  185. for info in sorted(result, key=lambda x: x.filename)
  186. ]
  187. out.table(items)
  188. _print_result(run_download())