papers.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. # Copyright 2025 The HuggingFace Team. All rights reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """Contains commands to interact with papers on the Hugging Face Hub.
  15. Usage:
  16. # list daily papers (most recently submitted)
  17. hf papers ls
  18. # list trending papers
  19. hf papers ls --sort=trending
  20. # list papers from a specific date, ordered by upvotes
  21. hf papers ls --date=2025-01-23
  22. # list today's papers, ordered by upvotes
  23. hf papers ls --date=today
  24. # list papers from a specific week
  25. hf papers ls --week=2025-W09
  26. # list papers by a specific submitter
  27. hf papers ls --submitter=someuser
  28. # search papers
  29. hf papers search "vision language"
  30. # get info about a paper
  31. hf papers info 2502.08025
  32. # read a paper as markdown
  33. hf papers read 2502.08025
  34. """
  35. import datetime
  36. import enum
  37. from typing import Annotated, get_args
  38. import typer
  39. from huggingface_hub.errors import CLIError, HfHubHTTPError
  40. from huggingface_hub.hf_api import DailyPapersSort_T
  41. from ._cli_utils import (
  42. FormatWithAutoOpt,
  43. LimitOpt,
  44. TokenOpt,
  45. api_object_to_dict,
  46. get_hf_api,
  47. typer_factory,
  48. )
  49. from ._output import OutputFormatWithAuto, out
  50. _SORT_OPTIONS = get_args(DailyPapersSort_T)
  51. PaperSortEnum = enum.Enum("PaperSortEnum", {s: s for s in _SORT_OPTIONS}, type=str) # type: ignore[misc]
  52. def _parse_date(value: str | None) -> str | None:
  53. """Parse date option, converting 'today' to current date."""
  54. if value is None:
  55. return None
  56. if value.lower() == "today":
  57. return datetime.date.today().isoformat()
  58. return value
  59. papers_cli = typer_factory(help="Interact with papers on the Hub.")
  60. @papers_cli.command(
  61. "list | ls",
  62. examples=[
  63. "hf papers ls",
  64. "hf papers ls --sort trending",
  65. "hf papers ls --date 2025-01-23",
  66. "hf papers ls --week 2025-W09",
  67. "hf papers ls --submitter akhaliq",
  68. "hf papers ls --format json",
  69. ],
  70. )
  71. def papers_ls(
  72. date: Annotated[
  73. str | None,
  74. typer.Option(
  75. help="Date in ISO format (YYYY-MM-DD) or 'today'.",
  76. callback=_parse_date,
  77. ),
  78. ] = None,
  79. week: Annotated[
  80. str | None,
  81. typer.Option(help="ISO week to filter by, e.g. '2025-W09'."),
  82. ] = None,
  83. month: Annotated[
  84. str | None,
  85. typer.Option(help="Month to filter by in ISO format (YYYY-MM), e.g. '2025-02'."),
  86. ] = None,
  87. submitter: Annotated[
  88. str | None,
  89. typer.Option(help="Filter by username of the submitter."),
  90. ] = None,
  91. sort: Annotated[
  92. PaperSortEnum | None,
  93. typer.Option(help="Sort results."),
  94. ] = None,
  95. limit: LimitOpt = 50,
  96. format: FormatWithAutoOpt = OutputFormatWithAuto.auto,
  97. token: TokenOpt = None,
  98. ) -> None:
  99. """List daily papers on the Hub."""
  100. api = get_hf_api(token=token)
  101. sort_key = sort.value if sort else None
  102. results = []
  103. for paper_info in api.list_daily_papers(
  104. date=date,
  105. week=week,
  106. month=month,
  107. submitter=submitter,
  108. sort=sort_key,
  109. limit=limit,
  110. ):
  111. item = api_object_to_dict(paper_info)
  112. submitted_by = item.get("submitted_by") or {}
  113. item["submitted_by_name"] = submitted_by.get("fullname") or submitted_by.get("username") or ""
  114. results.append(item)
  115. out.table(
  116. results,
  117. headers=["id", "title", "upvotes", "comments", "published_at", "submitted_by_name"],
  118. alignments={"upvotes": "right", "comments": "right"},
  119. )
  120. @papers_cli.command(
  121. "search",
  122. examples=[
  123. 'hf papers search "vision language"',
  124. 'hf papers search "attention mechanism" --limit 10',
  125. 'hf papers search "diffusion" --format json',
  126. ],
  127. )
  128. def papers_search(
  129. query: Annotated[str, typer.Argument(help="Search query string.")],
  130. limit: LimitOpt = 20,
  131. format: FormatWithAutoOpt = OutputFormatWithAuto.auto,
  132. token: TokenOpt = None,
  133. ) -> None:
  134. """Search papers on the Hub."""
  135. api = get_hf_api(token=token)
  136. results = [api_object_to_dict(paper_info) for paper_info in api.list_papers(query=query, limit=limit)]
  137. out.table(results, headers=["id", "title", "summary", "upvotes", "published_at"], alignments={"upvotes": "right"})
  138. @papers_cli.command(
  139. "info",
  140. examples=[
  141. "hf papers info 2601.15621",
  142. ],
  143. )
  144. def papers_info(
  145. paper_id: Annotated[str, typer.Argument(help="The arXiv paper ID (e.g. '2502.08025').")],
  146. format: FormatWithAutoOpt = OutputFormatWithAuto.auto,
  147. token: TokenOpt = None,
  148. ) -> None:
  149. """Get info about a paper on the Hub."""
  150. api = get_hf_api(token=token)
  151. try:
  152. info = api.paper_info(id=paper_id)
  153. except HfHubHTTPError as e:
  154. if e.response.status_code == 404:
  155. raise CLIError(f"Paper '{paper_id}' not found on the Hub.") from e
  156. raise
  157. out.dict(info)
  158. @papers_cli.command(
  159. "read",
  160. examples=[
  161. "hf papers read 2601.15621",
  162. ],
  163. )
  164. def papers_read(
  165. paper_id: Annotated[str, typer.Argument(help="The arXiv paper ID (e.g. '2502.08025').")],
  166. token: TokenOpt = None,
  167. ) -> None:
  168. """Read a paper as markdown."""
  169. api = get_hf_api(token=token)
  170. try:
  171. content = api.read_paper(id=paper_id)
  172. except HfHubHTTPError as e:
  173. if e.response.status_code == 404:
  174. raise CLIError(f"Paper '{paper_id}' not found on the Hub.") from e
  175. raise
  176. out.text(content)