_jobs_api.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410
  1. # Copyright 2025-present, the HuggingFace Inc. team.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from dataclasses import dataclass
  15. from datetime import datetime
  16. from enum import Enum
  17. from typing import Any
  18. from huggingface_hub import constants
  19. from huggingface_hub._space_api import SpaceHardware, Volume
  20. from huggingface_hub.utils._datetime import parse_datetime
  21. class JobStage(str, Enum):
  22. """
  23. Enumeration of possible stage of a Job on the Hub.
  24. Value can be compared to a string:
  25. ```py
  26. assert JobStage.COMPLETED == "COMPLETED"
  27. ```
  28. Possible values are: `COMPLETED`, `CANCELED`, `ERROR`, `DELETED`, `RUNNING`.
  29. Taken from https://github.com/huggingface/moon-landing/blob/main/server/job_types/JobInfo.ts#L61 (private url).
  30. """
  31. # Copied from moon-landing > server > lib > Job.ts
  32. COMPLETED = "COMPLETED"
  33. CANCELED = "CANCELED"
  34. ERROR = "ERROR"
  35. DELETED = "DELETED"
  36. RUNNING = "RUNNING"
  37. @dataclass
  38. class JobStatus:
  39. stage: JobStage
  40. message: str | None
  41. @dataclass
  42. class JobOwner:
  43. id: str
  44. name: str
  45. type: str
  46. @dataclass
  47. class JobInfo:
  48. """
  49. Contains information about a Job.
  50. Args:
  51. id (`str`):
  52. Job ID.
  53. created_at (`datetime` or `None`):
  54. When the Job was created.
  55. docker_image (`str` or `None`):
  56. The Docker image from Docker Hub used for the Job.
  57. Can be None if space_id is present instead.
  58. space_id (`str` or `None`):
  59. The Docker image from Hugging Face Spaces used for the Job.
  60. Can be None if docker_image is present instead.
  61. command (`list[str]` or `None`):
  62. Command of the Job, e.g. `["python", "-c", "print('hello world')"]`
  63. arguments (`list[str]` or `None`):
  64. Arguments passed to the command
  65. environment (`dict[str]` or `None`):
  66. Environment variables of the Job as a dictionary.
  67. secrets (`dict[str]` or `None`):
  68. Secret environment variables of the Job (encrypted).
  69. flavor (`str` or `None`):
  70. Flavor for the hardware, as in Hugging Face Spaces. See [`SpaceHardware`] for possible values.
  71. E.g. `"cpu-basic"`.
  72. labels (`dict[str, str]` or `None`):
  73. Labels to attach to the job (key-value pairs).
  74. volumes (`list[Volume]` or `None`):
  75. Volumes mounted in the job container (buckets, models, datasets, spaces).
  76. status: (`JobStatus` or `None`):
  77. Status of the Job, e.g. `JobStatus(stage="RUNNING", message=None)`
  78. See [`JobStage`] for possible stage values.
  79. owner: (`JobOwner` or `None`):
  80. Owner of the Job, e.g. `JobOwner(id="5e9ecfc04957053f60648a3e", name="lhoestq", type="user")`
  81. Example:
  82. ```python
  83. >>> from huggingface_hub import run_job
  84. >>> job = run_job(
  85. ... image="python:3.12",
  86. ... command=["python", "-c", "print('Hello from the cloud!')"]
  87. ... )
  88. >>> job
  89. JobInfo(id='687fb701029421ae5549d998', created_at=datetime.datetime(2025, 7, 22, 16, 6, 25, 79000, tzinfo=datetime.timezone.utc), docker_image='python:3.12', space_id=None, command=['python', '-c', "print('Hello from the cloud!')"], arguments=[], environment={}, secrets={}, flavor='cpu-basic', labels=None, status=JobStatus(stage='RUNNING', message=None), owner=JobOwner(id='5e9ecfc04957053f60648a3e', name='lhoestq', type='user'), endpoint='https://huggingface.co', url='https://huggingface.co/jobs/lhoestq/687fb701029421ae5549d998')
  90. >>> job.id
  91. '687fb701029421ae5549d998'
  92. >>> job.url
  93. 'https://huggingface.co/jobs/lhoestq/687fb701029421ae5549d998'
  94. >>> job.status.stage
  95. 'RUNNING'
  96. ```
  97. """
  98. id: str
  99. created_at: datetime | None
  100. docker_image: str | None
  101. space_id: str | None
  102. command: list[str] | None
  103. arguments: list[str] | None
  104. environment: dict[str, Any] | None
  105. secrets: dict[str, Any] | None
  106. flavor: SpaceHardware | None
  107. labels: dict[str, str] | None
  108. volumes: list[Volume] | None
  109. status: JobStatus
  110. owner: JobOwner
  111. # Inferred fields
  112. endpoint: str
  113. url: str
  114. def __init__(self, **kwargs) -> None:
  115. self.id = kwargs["id"]
  116. created_at = kwargs.get("createdAt") or kwargs.get("created_at")
  117. self.created_at = parse_datetime(created_at) if created_at else None
  118. self.docker_image = kwargs.get("dockerImage") or kwargs.get("docker_image")
  119. self.space_id = kwargs.get("spaceId") or kwargs.get("space_id")
  120. owner = kwargs.get("owner", {})
  121. self.owner = JobOwner(id=owner["id"], name=owner["name"], type=owner["type"])
  122. self.command = kwargs.get("command")
  123. self.arguments = kwargs.get("arguments")
  124. self.environment = kwargs.get("environment")
  125. self.secrets = kwargs.get("secrets")
  126. self.flavor = kwargs.get("flavor")
  127. self.labels = kwargs.get("labels")
  128. volumes = kwargs.get("volumes")
  129. self.volumes = [Volume(**v) for v in volumes] if volumes else None
  130. status = kwargs.get("status", {})
  131. self.status = JobStatus(stage=status["stage"], message=status.get("message"))
  132. # Inferred fields
  133. self.endpoint = kwargs.get("endpoint", constants.ENDPOINT)
  134. self.url = f"{self.endpoint}/jobs/{self.owner.name}/{self.id}"
  135. @dataclass
  136. class JobSpec:
  137. docker_image: str | None
  138. space_id: str | None
  139. command: list[str] | None
  140. arguments: list[str] | None
  141. environment: dict[str, Any] | None
  142. secrets: dict[str, Any] | None
  143. flavor: SpaceHardware | None
  144. timeout: int | None
  145. tags: list[str] | None
  146. arch: str | None
  147. labels: dict[str, str] | None
  148. volumes: list[Volume] | None
  149. def __init__(self, **kwargs) -> None:
  150. self.docker_image = kwargs.get("dockerImage") or kwargs.get("docker_image")
  151. self.space_id = kwargs.get("spaceId") or kwargs.get("space_id")
  152. self.command = kwargs.get("command")
  153. self.arguments = kwargs.get("arguments")
  154. self.environment = kwargs.get("environment")
  155. self.secrets = kwargs.get("secrets")
  156. self.flavor = kwargs.get("flavor")
  157. self.timeout = kwargs.get("timeout")
  158. self.tags = kwargs.get("tags")
  159. self.arch = kwargs.get("arch")
  160. self.labels = kwargs.get("labels")
  161. volumes = kwargs.get("volumes")
  162. self.volumes = [Volume(**v) for v in volumes] if volumes else None
  163. @dataclass
  164. class LastJobInfo:
  165. id: str
  166. at: datetime
  167. def __init__(self, **kwargs) -> None:
  168. self.id = kwargs["id"]
  169. self.at = parse_datetime(kwargs["at"])
  170. @dataclass
  171. class ScheduledJobStatus:
  172. last_job: LastJobInfo | None
  173. next_job_run_at: datetime | None
  174. def __init__(self, **kwargs) -> None:
  175. last_job = kwargs.get("lastJob") or kwargs.get("last_job")
  176. self.last_job = LastJobInfo(**last_job) if last_job else None
  177. next_job_run_at = kwargs.get("nextJobRunAt") or kwargs.get("next_job_run_at")
  178. self.next_job_run_at = parse_datetime(str(next_job_run_at)) if next_job_run_at else None
  179. @dataclass
  180. class ScheduledJobInfo:
  181. """
  182. Contains information about a Job.
  183. Args:
  184. id (`str`):
  185. Scheduled Job ID.
  186. created_at (`datetime` or `None`):
  187. When the scheduled Job was created.
  188. tags (`list[str]` or `None`):
  189. The tags of the scheduled Job.
  190. schedule (`str` or `None`):
  191. One of "@annually", "@yearly", "@monthly", "@weekly", "@daily", "@hourly", or a
  192. CRON schedule expression (e.g., '0 9 * * 1' for 9 AM every Monday).
  193. suspend (`bool` or `None`):
  194. Whether the scheduled job is suspended (paused).
  195. concurrency (`bool` or `None`):
  196. Whether multiple instances of this Job can run concurrently.
  197. status (`ScheduledJobStatus` or `None`):
  198. Status of the scheduled Job.
  199. owner: (`JobOwner` or `None`):
  200. Owner of the scheduled Job, e.g. `JobOwner(id="5e9ecfc04957053f60648a3e", name="lhoestq", type="user")`
  201. job_spec: (`JobSpec` or `None`):
  202. Specifications of the Job.
  203. Example:
  204. ```python
  205. >>> from huggingface_hub import run_job
  206. >>> scheduled_job = create_scheduled_job(
  207. ... image="python:3.12",
  208. ... command=["python", "-c", "print('Hello from the cloud!')"],
  209. ... schedule="@hourly",
  210. ... )
  211. >>> scheduled_job.id
  212. '687fb701029421ae5549d999'
  213. >>> scheduled_job.status.next_job_run_at
  214. datetime.datetime(2025, 7, 22, 17, 6, 25, 79000, tzinfo=datetime.timezone.utc)
  215. ```
  216. """
  217. id: str
  218. created_at: datetime | None
  219. job_spec: JobSpec
  220. schedule: str | None
  221. suspend: bool | None
  222. concurrency: bool | None
  223. status: ScheduledJobStatus
  224. owner: JobOwner
  225. def __init__(self, **kwargs) -> None:
  226. self.id = kwargs["id"]
  227. created_at = kwargs.get("createdAt") or kwargs.get("created_at")
  228. self.created_at = parse_datetime(created_at) if created_at else None
  229. self.job_spec = JobSpec(**(kwargs.get("job_spec") or kwargs.get("jobSpec", {})))
  230. self.schedule = kwargs.get("schedule")
  231. self.suspend = kwargs.get("suspend")
  232. self.concurrency = kwargs.get("concurrency")
  233. status = kwargs.get("status", {})
  234. self.status = ScheduledJobStatus(
  235. last_job=status.get("last_job") or status.get("lastJob"),
  236. next_job_run_at=status.get("next_job_run_at") or status.get("nextJobRunAt"),
  237. )
  238. owner = kwargs.get("owner", {})
  239. self.owner = JobOwner(id=owner["id"], name=owner["name"], type=owner["type"])
  240. @dataclass
  241. class JobAccelerator:
  242. """
  243. Contains information about a Job accelerator (GPU).
  244. Args:
  245. type (`str`):
  246. Type of accelerator, e.g. `"gpu"`.
  247. model (`str`):
  248. Model of accelerator, e.g. `"T4"`, `"A10G"`, `"A100"`, `"L4"`, `"L40S"`.
  249. quantity (`str`):
  250. Number of accelerators, e.g. `"1"`, `"2"`, `"4"`, `"8"`.
  251. vram (`str`):
  252. Total VRAM, e.g. `"16 GB"`, `"24 GB"`.
  253. manufacturer (`str`):
  254. Manufacturer of the accelerator, e.g. `"Nvidia"`.
  255. """
  256. type: str
  257. model: str
  258. quantity: str
  259. vram: str
  260. manufacturer: str
  261. def __init__(self, **kwargs) -> None:
  262. self.type = kwargs["type"]
  263. self.model = kwargs["model"]
  264. self.quantity = kwargs["quantity"]
  265. self.vram = kwargs["vram"]
  266. self.manufacturer = kwargs["manufacturer"]
  267. @dataclass
  268. class JobHardware:
  269. """
  270. Contains information about available Job hardware.
  271. Args:
  272. name (`str`):
  273. Machine identifier, e.g. `"cpu-basic"`, `"a10g-large"`.
  274. pretty_name (`str`):
  275. Human-readable name, e.g. `"CPU Basic"`, `"Nvidia A10G - large"`.
  276. cpu (`str`):
  277. CPU specification, e.g. `"2 vCPU"`, `"12 vCPU"`.
  278. ram (`str`):
  279. RAM specification, e.g. `"16 GB"`, `"46 GB"`.
  280. accelerator (`JobAccelerator` or `None`):
  281. GPU/accelerator details if available.
  282. unit_cost_micro_usd (`int`):
  283. Cost in micro-dollars per unit, e.g. `167` (= $0.000167).
  284. unit_cost_usd (`float`):
  285. Cost in USD per unit, e.g. `0.000167`.
  286. unit_label (`str`):
  287. Cost unit period, e.g. `"minute"`.
  288. Example:
  289. ```python
  290. >>> from huggingface_hub import list_jobs_hardware
  291. >>> hardware_list = list_jobs_hardware()
  292. >>> hardware_list[0]
  293. JobHardware(name='cpu-basic', pretty_name='CPU Basic', cpu='2 vCPU', ram='16 GB', accelerator=None, unit_cost_micro_usd=167, unit_cost_usd=0.000167, unit_label='minute')
  294. >>> hardware_list[0].name
  295. 'cpu-basic'
  296. ```
  297. """
  298. name: str
  299. pretty_name: str
  300. cpu: str
  301. ram: str
  302. accelerator: JobAccelerator | None
  303. unit_cost_micro_usd: int
  304. unit_cost_usd: float
  305. unit_label: str
  306. def __init__(self, **kwargs) -> None:
  307. self.name = kwargs["name"]
  308. self.pretty_name = kwargs["prettyName"]
  309. self.cpu = kwargs["cpu"]
  310. self.ram = kwargs["ram"]
  311. accelerator = kwargs.get("accelerator")
  312. self.accelerator = JobAccelerator(**accelerator) if accelerator else None
  313. self.unit_cost_micro_usd = kwargs["unitCostMicroUSD"]
  314. self.unit_cost_usd = kwargs["unitCostUSD"]
  315. self.unit_label = kwargs["unitLabel"]
  316. def _create_job_spec(
  317. *,
  318. image: str,
  319. command: list[str],
  320. env: dict[str, Any] | None,
  321. secrets: dict[str, Any] | None,
  322. flavor: SpaceHardware | None,
  323. timeout: int | float | str | None,
  324. labels: dict[str, str] | None = None,
  325. volumes: list[Volume] | None = None,
  326. ) -> dict[str, Any]:
  327. # prepare job spec to send to HF Jobs API
  328. job_spec: dict[str, Any] = {
  329. "command": command,
  330. "arguments": [],
  331. "environment": env or {},
  332. "flavor": flavor or SpaceHardware.CPU_BASIC,
  333. }
  334. # secrets are optional
  335. if secrets:
  336. job_spec["secrets"] = secrets
  337. # timeout is optional
  338. if timeout:
  339. time_units_factors = {"s": 1, "m": 60, "h": 3600, "d": 3600 * 24}
  340. if isinstance(timeout, str) and timeout[-1] in time_units_factors:
  341. job_spec["timeoutSeconds"] = int(float(timeout[:-1]) * time_units_factors[timeout[-1]])
  342. else:
  343. job_spec["timeoutSeconds"] = int(timeout)
  344. # labels are optional
  345. if labels:
  346. job_spec["labels"] = labels
  347. # volumes are optional
  348. if volumes:
  349. job_spec["volumes"] = [vol.to_dict() for vol in volumes]
  350. # input is either from docker hub or from HF spaces
  351. for prefix in (
  352. "https://huggingface.co/spaces/",
  353. "https://hf.co/spaces/",
  354. "huggingface.co/spaces/",
  355. "hf.co/spaces/",
  356. ):
  357. if image.startswith(prefix):
  358. job_spec["spaceId"] = image[len(prefix) :]
  359. break
  360. else:
  361. job_spec["dockerImage"] = image
  362. return job_spec