upload.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289
  1. # Copyright 2023-present, the HuggingFace Inc. team.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """Contains command to upload a repo or file with the CLI.
  15. Usage:
  16. # Upload file (implicit)
  17. hf upload my-cool-model ./my-cool-model.safetensors
  18. # Upload file (explicit)
  19. hf upload my-cool-model ./my-cool-model.safetensors model.safetensors
  20. # Upload directory (implicit). If `my-cool-model/` is a directory it will be uploaded, otherwise an exception is raised.
  21. hf upload my-cool-model
  22. # Upload directory (explicit)
  23. hf upload my-cool-model ./models/my-cool-model .
  24. # Upload filtered directory (example: tensorboard logs except for the last run)
  25. hf upload my-cool-model ./model/training /logs --include "*.tfevents.*" --exclude "*20230905*"
  26. # Upload with wildcard
  27. hf upload my-cool-model "./model/training/*.safetensors"
  28. # Upload private dataset
  29. hf upload Wauplin/my-cool-dataset ./data . --repo-type=dataset --private
  30. # Upload with token
  31. hf upload Wauplin/my-cool-model --token=hf_****
  32. # Sync local Space with Hub (upload new files, delete removed files)
  33. hf upload Wauplin/space-example --repo-type=space --exclude="/logs/*" --delete="*" --commit-message="Sync local Space with Hub"
  34. # Schedule commits every 30 minutes
  35. hf upload Wauplin/my-cool-model --every=30
  36. """
  37. import os
  38. import time
  39. import warnings
  40. from typing import Annotated
  41. import typer
  42. from huggingface_hub import logging
  43. from huggingface_hub._commit_scheduler import CommitScheduler
  44. from huggingface_hub.errors import RevisionNotFoundError
  45. from ._cli_utils import (
  46. FormatWithAutoOpt,
  47. PrivateOpt,
  48. RepoIdArg,
  49. RepoType,
  50. RepoTypeOpt,
  51. RevisionOpt,
  52. TokenOpt,
  53. get_hf_api,
  54. )
  55. from ._output import OutputFormatWithAuto, out
  56. logger = logging.get_logger(__name__)
  57. UPLOAD_EXAMPLES = [
  58. "hf upload my-cool-model . .",
  59. "hf upload Wauplin/my-cool-model ./models/model.safetensors",
  60. "hf upload Wauplin/my-cool-dataset ./data /train --repo-type=dataset",
  61. 'hf upload Wauplin/my-cool-model ./models . --commit-message="Epoch 34/50" --commit-description="Val accuracy: 68%"',
  62. "hf upload bigcode/the-stack . . --repo-type dataset --create-pr",
  63. ]
  64. def upload(
  65. repo_id: RepoIdArg,
  66. local_path: Annotated[
  67. str | None,
  68. typer.Argument(
  69. help="Local path to the file or folder to upload. Wildcard patterns are supported. Defaults to current directory.",
  70. ),
  71. ] = None,
  72. path_in_repo: Annotated[
  73. str | None,
  74. typer.Argument(
  75. help="Path of the file or folder in the repo. Defaults to the relative path of the file or folder.",
  76. ),
  77. ] = None,
  78. repo_type: RepoTypeOpt = RepoType.model,
  79. revision: RevisionOpt = None,
  80. private: PrivateOpt = None,
  81. include: Annotated[
  82. list[str] | None,
  83. typer.Option(
  84. help="Glob patterns to match files to upload.",
  85. ),
  86. ] = None,
  87. exclude: Annotated[
  88. list[str] | None,
  89. typer.Option(
  90. help="Glob patterns to exclude from files to upload.",
  91. ),
  92. ] = None,
  93. delete: Annotated[
  94. list[str] | None,
  95. typer.Option(
  96. help="Glob patterns for file to be deleted from the repo while committing.",
  97. ),
  98. ] = None,
  99. commit_message: Annotated[
  100. str | None,
  101. typer.Option(
  102. help="The summary / title / first line of the generated commit.",
  103. ),
  104. ] = None,
  105. commit_description: Annotated[
  106. str | None,
  107. typer.Option(
  108. help="The description of the generated commit.",
  109. ),
  110. ] = None,
  111. create_pr: Annotated[
  112. bool,
  113. typer.Option(
  114. help="Whether to upload content as a new Pull Request.",
  115. ),
  116. ] = False,
  117. every: Annotated[
  118. float | None,
  119. typer.Option(
  120. help="If set, a background job is scheduled to create commits every `every` minutes.",
  121. ),
  122. ] = None,
  123. token: TokenOpt = None,
  124. format: FormatWithAutoOpt = OutputFormatWithAuto.auto,
  125. ) -> None:
  126. """Upload a file or a folder to the Hub. Recommended for single-commit uploads."""
  127. if every is not None and every <= 0:
  128. raise typer.BadParameter("--every must be a positive value", param_hint="every")
  129. repo_type_str = repo_type.value
  130. api = get_hf_api(token=token)
  131. # Resolve local_path and path_in_repo based on implicit/explicit rules
  132. resolved_local_path, resolved_path_in_repo, resolved_include = _resolve_upload_paths(
  133. repo_id=repo_id, local_path=local_path, path_in_repo=path_in_repo, include=include
  134. )
  135. def run_upload() -> str:
  136. if os.path.isfile(resolved_local_path):
  137. if resolved_include is not None and len(resolved_include) > 0 and isinstance(resolved_include, list):
  138. warnings.warn("Ignoring --include since a single file is uploaded.")
  139. if exclude is not None and len(exclude) > 0:
  140. warnings.warn("Ignoring --exclude since a single file is uploaded.")
  141. if delete is not None and len(delete) > 0:
  142. warnings.warn("Ignoring --delete since a single file is uploaded.")
  143. # Schedule commits if `every` is set
  144. if every is not None:
  145. allow_patterns: list[str] | None
  146. ignore_patterns: list[str] | None
  147. if os.path.isfile(resolved_local_path):
  148. # If file => watch entire folder + use allow_patterns
  149. folder_path = os.path.dirname(resolved_local_path)
  150. pi = (
  151. resolved_path_in_repo[: -len(resolved_local_path)]
  152. if resolved_path_in_repo.endswith(resolved_local_path)
  153. else resolved_path_in_repo
  154. )
  155. allow_patterns = [resolved_local_path]
  156. ignore_patterns = []
  157. else:
  158. folder_path = resolved_local_path
  159. pi = resolved_path_in_repo
  160. allow_patterns = resolved_include
  161. ignore_patterns = exclude
  162. if delete is not None and len(delete) > 0:
  163. warnings.warn("Ignoring --delete when uploading with scheduled commits.")
  164. scheduler = CommitScheduler(
  165. folder_path=folder_path,
  166. repo_id=repo_id,
  167. repo_type=repo_type_str,
  168. revision=revision,
  169. allow_patterns=allow_patterns,
  170. ignore_patterns=ignore_patterns,
  171. path_in_repo=pi,
  172. private=private,
  173. every=every,
  174. hf_api=api,
  175. )
  176. out.text(f"Scheduling commits every {every} minutes to {scheduler.repo_id}.")
  177. try:
  178. while True:
  179. time.sleep(100)
  180. except KeyboardInterrupt:
  181. scheduler.stop()
  182. return "Stopped scheduled commits."
  183. # Otherwise, create repo and proceed with the upload
  184. if not os.path.isfile(resolved_local_path) and not os.path.isdir(resolved_local_path):
  185. raise FileNotFoundError(f"No such file or directory: '{resolved_local_path}'.")
  186. created = api.create_repo(
  187. repo_id=repo_id,
  188. repo_type=repo_type_str,
  189. exist_ok=True,
  190. private=private,
  191. space_sdk="gradio" if repo_type_str == "space" else None,
  192. # ^ We don't want it to fail when uploading to a Space => let's set Gradio by default.
  193. # ^ I'd rather not add CLI args to set it explicitly as we already have `hf repos create` for that.
  194. ).repo_id
  195. # Check if branch already exists and if not, create it
  196. if revision is not None and not create_pr:
  197. try:
  198. api.repo_info(repo_id=created, repo_type=repo_type_str, revision=revision)
  199. except RevisionNotFoundError:
  200. logger.info(f"Branch '{revision}' not found. Creating it...")
  201. api.create_branch(repo_id=created, repo_type=repo_type_str, branch=revision, exist_ok=True)
  202. # ^ `exist_ok=True` to avoid race concurrency issues
  203. # File-based upload
  204. if os.path.isfile(resolved_local_path):
  205. return api.upload_file(
  206. path_or_fileobj=resolved_local_path,
  207. path_in_repo=resolved_path_in_repo,
  208. repo_id=created,
  209. repo_type=repo_type_str,
  210. revision=revision,
  211. commit_message=commit_message,
  212. commit_description=commit_description,
  213. create_pr=create_pr,
  214. )
  215. # Folder-based upload
  216. return api.upload_folder(
  217. folder_path=resolved_local_path,
  218. path_in_repo=resolved_path_in_repo,
  219. repo_id=created,
  220. repo_type=repo_type_str,
  221. revision=revision,
  222. commit_message=commit_message,
  223. commit_description=commit_description,
  224. create_pr=create_pr,
  225. allow_patterns=resolved_include,
  226. ignore_patterns=exclude,
  227. delete_patterns=delete,
  228. )
  229. result = run_upload()
  230. out.result("Uploaded", url=result)
  231. def _resolve_upload_paths(
  232. *, repo_id: str, local_path: str | None, path_in_repo: str | None, include: list[str] | None
  233. ) -> tuple[str, str, list[str] | None]:
  234. repo_name = repo_id.split("/")[-1]
  235. resolved_include = include
  236. if local_path is not None and any(c in local_path for c in ["*", "?", "["]):
  237. if include is not None:
  238. raise ValueError("Cannot set --include when local_path contains a wildcard.")
  239. if path_in_repo is not None and path_in_repo != ".":
  240. raise ValueError("Cannot set path_in_repo when local_path contains a wildcard.")
  241. return ".", local_path, ["."] # will be adjusted below; placeholder for type
  242. if local_path is None and os.path.isfile(repo_name):
  243. return repo_name, repo_name, resolved_include
  244. if local_path is None and os.path.isdir(repo_name):
  245. return repo_name, ".", resolved_include
  246. if local_path is None:
  247. raise ValueError(f"'{repo_name}' is not a local file or folder. Please set local_path explicitly.")
  248. if path_in_repo is None and os.path.isfile(local_path):
  249. return local_path, os.path.basename(local_path), resolved_include
  250. if path_in_repo is None:
  251. return local_path, ".", resolved_include
  252. return local_path, path_in_repo, resolved_include