reports.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617
  1. """W&B Public API for Report objects.
  2. This module provides classes for interacting with W&B reports and
  3. managing report-related data.
  4. """
  5. from __future__ import annotations
  6. import ast
  7. import json
  8. import re
  9. import urllib
  10. from typing import TYPE_CHECKING, Any
  11. from wandb_gql import gql
  12. import wandb
  13. from wandb._strutils import nameof
  14. from wandb.apis import public
  15. from wandb.apis.attrs import Attrs
  16. from wandb.apis.paginator import SizedPaginator
  17. from wandb.sdk.lib import ipython
  18. if TYPE_CHECKING:
  19. from .api import RetryingClient
  20. from .projects import Project
  21. class Reports(SizedPaginator["BetaReport"]):
  22. """Reports is a lazy iterator of `BetaReport` objects.
  23. Args:
  24. client (`wandb.apis.internal.Api`): The API client instance to use.
  25. project (`wandb.sdk.internal.Project`): The project to fetch reports from.
  26. name (str, optional): The name of the report to filter by. If `None`,
  27. fetches all reports.
  28. entity (str, optional): The entity name for the project. Defaults to
  29. the project entity.
  30. per_page (int): Number of reports to fetch per page (default is 50).
  31. """
  32. QUERY = gql(
  33. """
  34. query ProjectViews($project: String!, $entity: String!, $reportCursor: String,
  35. $reportLimit: Int!, $viewType: String = "runs", $viewName: String) {
  36. project(name: $project, entityName: $entity) {
  37. allViews(viewType: $viewType, viewName: $viewName, first:
  38. $reportLimit, after: $reportCursor) {
  39. edges {
  40. node {
  41. id
  42. name
  43. displayName
  44. description
  45. user {
  46. username
  47. photoUrl
  48. email
  49. }
  50. spec
  51. updatedAt
  52. createdAt
  53. }
  54. cursor
  55. }
  56. pageInfo {
  57. endCursor
  58. hasNextPage
  59. }
  60. }
  61. }
  62. }
  63. """
  64. )
  65. def __init__(
  66. self,
  67. client: RetryingClient,
  68. project: Project,
  69. name: str | None = None,
  70. entity: str | None = None,
  71. per_page: int = 50,
  72. ):
  73. self.project = project
  74. self.name = name
  75. variables = {
  76. "project": project.name,
  77. "entity": project.entity,
  78. "viewName": self.name,
  79. }
  80. super().__init__(client, variables, per_page)
  81. @property
  82. def _length(self) -> int | None:
  83. """The number of reports in the project.
  84. <!-- lazydoc-ignore: internal -->
  85. """
  86. # TODO: Add the count the backend
  87. if self.last_response:
  88. return len(self.objects)
  89. return None
  90. @property
  91. def more(self) -> bool:
  92. """Returns whether there are more files to fetch.
  93. <!-- lazydoc-ignore: internal -->
  94. """
  95. if self.last_response:
  96. return bool(
  97. self.last_response["project"]["allViews"]["pageInfo"]["hasNextPage"]
  98. )
  99. return True
  100. @property
  101. def cursor(self) -> str | None:
  102. """Returns the cursor position for pagination of file results.
  103. <!-- lazydoc-ignore: internal -->
  104. """
  105. if self.last_response:
  106. return self.last_response["project"]["allViews"]["edges"][-1]["cursor"]
  107. return None
  108. def update_variables(self) -> None:
  109. """Updates the GraphQL query variables for pagination."""
  110. self.variables.update(
  111. {"reportCursor": self.cursor, "reportLimit": self.per_page}
  112. )
  113. def convert_objects(self) -> list[BetaReport]:
  114. """Converts GraphQL edges to File objects."""
  115. if self.last_response["project"] is None:
  116. raise ValueError(
  117. f"Project {self.variables['project']} does not exist under entity {self.variables['entity']}"
  118. )
  119. return [
  120. BetaReport(
  121. self.client,
  122. r["node"],
  123. entity=self.project.entity,
  124. project=self.project.name,
  125. )
  126. for r in self.last_response["project"]["allViews"]["edges"]
  127. ]
  128. def __repr__(self) -> str:
  129. return f"<{nameof(type(self))} {'/'.join(self.project.path)}>"
  130. class BetaReport(Attrs):
  131. """BetaReport is a class associated with reports created in W&B.
  132. Provides access to report attributes (name, description, user, spec,
  133. timestamps) and methods for retrieving associated runs,
  134. sections, and for rendering the report as HTML.
  135. Attributes:
  136. id (string): Unique identifier of the report.
  137. display_name (string): Human-readable display name of the report.
  138. name (string): The name of the report. Use `display_name` for a more user-friendly name.
  139. description (string): Description of the report.
  140. user (User): Dictionary containing user info (username, email) who
  141. created the report.
  142. spec (dict): The spec of the report.
  143. url (string): The URL of the report.
  144. updated_at (string): Timestamp of last update.
  145. created_at (string): Timestamp when the report was created.
  146. """
  147. def __init__(
  148. self,
  149. client: RetryingClient,
  150. attrs: dict,
  151. entity: str | None = None,
  152. project: str | None = None,
  153. ):
  154. self.client = client
  155. self.project = project
  156. self.entity = entity
  157. self.query_generator = public.QueryGenerator()
  158. super().__init__(dict(attrs))
  159. if "spec" in self._attrs:
  160. if isinstance(self._attrs["spec"], str):
  161. self._attrs["spec"] = json.loads(self._attrs["spec"])
  162. else:
  163. self._attrs["spec"] = {}
  164. @property
  165. def spec(self) -> dict[str, Any]:
  166. return self._attrs["spec"]
  167. @property
  168. def sections(self):
  169. """Get the panel sections (groups) from the report."""
  170. return self.spec["panelGroups"]
  171. def runs(
  172. self,
  173. section: dict[str, Any],
  174. per_page: int = 50,
  175. only_selected: bool = True,
  176. ) -> public.Runs:
  177. """Get runs associated with a section of the report."""
  178. run_set_idx = section.get("openRunSet", 0)
  179. run_set = section["runSets"][run_set_idx]
  180. order = self.query_generator.key_to_server_path(run_set["sort"]["key"])
  181. if run_set["sort"].get("ascending"):
  182. order = "+" + order
  183. else:
  184. order = "-" + order
  185. filters = self.query_generator.filter_to_mongo(run_set["filters"])
  186. if only_selected:
  187. # TODO: handle this not always existing
  188. filters["$or"][0]["$and"].append(
  189. {"name": {"$in": run_set["selections"]["tree"]}}
  190. )
  191. return public.Runs(
  192. self.client,
  193. self.entity,
  194. self.project,
  195. filters=filters,
  196. order=order,
  197. per_page=per_page,
  198. )
  199. @property
  200. def id(self) -> str:
  201. return self._attrs.get("id")
  202. @property
  203. def name(self) -> str | None:
  204. return self._attrs.get("name")
  205. @property
  206. def display_name(self) -> str | None:
  207. return self._attrs.get("displayName")
  208. @property
  209. def description(self) -> str | None:
  210. return self._attrs.get("description")
  211. @property
  212. def user(self):
  213. return self._attrs.get("user")
  214. @property
  215. def updated_at(self):
  216. return self._attrs.get("updatedAt")
  217. @property
  218. def created_at(self):
  219. return self._attrs.get("createdAt")
  220. @property
  221. def url(self) -> str | None:
  222. if (
  223. not self.client
  224. or not self.entity
  225. or not self.project
  226. or not self.display_name
  227. or not self.id
  228. ):
  229. return None
  230. return self.client.app_url + "/".join(
  231. [
  232. self.entity,
  233. self.project,
  234. "reports",
  235. "--".join(
  236. [
  237. # made this more closely match the url creation in the frontend (https://github.com/wandb/core/blob/76943979c8e967f7a62dae8bef0a001a2672584c/frontends/app/src/util/report/urls.ts#L19)
  238. urllib.parse.quote(
  239. re.sub(
  240. r"-+", "-", re.sub(r"\W", "-", self.display_name)
  241. ).strip("-")
  242. ),
  243. self.id.replace("=", ""),
  244. ]
  245. ),
  246. ]
  247. )
  248. def to_html(self, height: int = 1024, hidden: bool = False) -> str:
  249. """Generate HTML containing an iframe displaying this report."""
  250. url = self.url
  251. if url is None:
  252. return "<div>Report URL not available</div>"
  253. url = url + "?jupyter=true"
  254. style = f"border:none;width:100%;height:{height}px;"
  255. prefix = ""
  256. if hidden:
  257. style += "display:none;"
  258. prefix = ipython.toggle_button("report")
  259. return prefix + f"<iframe src={url!r} style={style!r}></iframe>"
  260. def _repr_html_(self) -> str:
  261. return self.to_html()
  262. class PythonMongoishQueryGenerator:
  263. """Converts Python-style query expressions to MongoDB-style queries for W&B reports.
  264. <!-- lazydoc-ignore-class: internal -->
  265. """
  266. SPACER = "----------"
  267. DECIMAL_SPACER = ";;;"
  268. FRONTEND_NAME_MAPPING = {
  269. "ID": "name",
  270. "Name": "displayName",
  271. "Tags": "tags",
  272. "State": "state",
  273. "CreatedTimestamp": "createdAt",
  274. "Runtime": "duration",
  275. "User": "username",
  276. "Sweep": "sweep",
  277. "Group": "group",
  278. "JobType": "jobType",
  279. "Hostname": "host",
  280. "UsingArtifact": "inputArtifacts",
  281. "OutputtingArtifact": "outputArtifacts",
  282. "Step": "_step",
  283. "Relative Time (Wall)": "_absolute_runtime",
  284. "Relative Time (Process)": "_runtime",
  285. "Wall Time": "_timestamp",
  286. # "GroupedRuns": "__wb_group_by_all"
  287. }
  288. FRONTEND_NAME_MAPPING_REVERSED = {v: k for k, v in FRONTEND_NAME_MAPPING.items()}
  289. AST_OPERATORS = {
  290. ast.Lt: "$lt",
  291. ast.LtE: "$lte",
  292. ast.Gt: "$gt",
  293. ast.GtE: "$gte",
  294. ast.Eq: "=",
  295. ast.Is: "=",
  296. ast.NotEq: "$ne",
  297. ast.IsNot: "$ne",
  298. ast.In: "$in",
  299. ast.NotIn: "$nin",
  300. ast.And: "$and",
  301. ast.Or: "$or",
  302. ast.Not: "$not",
  303. }
  304. AST_FIELDS = {
  305. ast.Constant: "value",
  306. ast.Name: "id",
  307. ast.List: "elts",
  308. ast.Tuple: "elts",
  309. }
  310. def __init__(self, run_set):
  311. self.run_set = run_set
  312. self.panel_metrics_helper = PanelMetricsHelper()
  313. def _handle_compare(self, node):
  314. # only left side can be a col
  315. left = self.front_to_back(self._handle_fields(node.left))
  316. op = self._handle_ops(node.ops[0])
  317. right = self._handle_fields(node.comparators[0])
  318. # Eq has no op for some reason
  319. if op == "=":
  320. return {left: right}
  321. else:
  322. return {left: {op: right}}
  323. def _handle_fields(self, node):
  324. result = getattr(node, self.AST_FIELDS.get(type(node)))
  325. if isinstance(result, list):
  326. return [self._handle_fields(node) for node in result]
  327. elif isinstance(result, str):
  328. return self._unconvert(result)
  329. return result
  330. def _handle_ops(self, node):
  331. return self.AST_OPERATORS.get(type(node))
  332. def _replace_numeric_dots(self, s):
  333. numeric_dots = []
  334. for i, (left, mid, right) in enumerate(zip(s, s[1:], s[2:]), 1):
  335. if mid == "." and (
  336. left.isdigit()
  337. and right.isdigit() # 1.2
  338. or left.isdigit()
  339. and right == " " # 1.
  340. or left == " "
  341. and right.isdigit() # .2
  342. ):
  343. numeric_dots.append(i)
  344. # Edge: Catch number ending in dot at end of string
  345. if s[-2].isdigit() and s[-1] == ".":
  346. numeric_dots.append(len(s) - 1)
  347. numeric_dots = [-1] + numeric_dots + [len(s)]
  348. substrs = []
  349. for start, stop in zip(numeric_dots, numeric_dots[1:]):
  350. substrs.append(s[start + 1 : stop])
  351. substrs.append(self.DECIMAL_SPACER)
  352. substrs = substrs[:-1]
  353. return "".join(substrs)
  354. def _convert(self, filterstr):
  355. _conversion = (
  356. self._replace_numeric_dots(filterstr) # temporarily sub numeric dots
  357. .replace(".", self.SPACER) # Allow dotted fields
  358. .replace(self.DECIMAL_SPACER, ".") # add them back
  359. )
  360. return "(" + _conversion + ")"
  361. def _unconvert(self, field_name):
  362. return field_name.replace(self.SPACER, ".") # Allow dotted fields
  363. def python_to_mongo(self, filterstr):
  364. """Convert Python expresion to MongoDB filter.
  365. <!-- lazydoc-ignore: internal -->
  366. """
  367. try:
  368. tree = ast.parse(self._convert(filterstr), mode="eval")
  369. except SyntaxError as e:
  370. raise ValueError(
  371. "Invalid python comparison expression; form something like `my_col == 123`"
  372. ) from e
  373. multiple_filters = hasattr(tree.body, "op")
  374. if multiple_filters:
  375. op = self.AST_OPERATORS.get(type(tree.body.op))
  376. values = [self._handle_compare(v) for v in tree.body.values]
  377. else:
  378. op = "$and"
  379. values = [self._handle_compare(tree.body)]
  380. return {"$or": [{op: values}]}
  381. def front_to_back(self, name):
  382. """Convert frontend metric names to backend field names.
  383. <!-- lazydoc-ignore: internal -->
  384. """
  385. name, *rest = name.split(".")
  386. rest = "." + ".".join(rest) if rest else ""
  387. if name in self.FRONTEND_NAME_MAPPING:
  388. return self.FRONTEND_NAME_MAPPING[name]
  389. elif name in self.FRONTEND_NAME_MAPPING_REVERSED:
  390. return name
  391. elif name in self.run_set._runs_config:
  392. return f"config.{name}.value{rest}"
  393. else: # assume summary metrics
  394. return f"summary_metrics.{name}{rest}"
  395. def back_to_front(self, name):
  396. """Convert backend field names to frontend metric names.
  397. <!-- lazydoc-ignore: internal -->
  398. """
  399. if name in self.FRONTEND_NAME_MAPPING_REVERSED:
  400. return self.FRONTEND_NAME_MAPPING_REVERSED[name]
  401. elif name in self.FRONTEND_NAME_MAPPING:
  402. return name
  403. elif (
  404. name.startswith("config.") and ".value" in name
  405. ): # may be brittle: originally "endswith", but that doesn't work with nested keys...
  406. # strip is weird sometimes (??)
  407. return name.replace("config.", "").replace(".value", "")
  408. elif name.startswith("summary_metrics."):
  409. return name.replace("summary_metrics.", "")
  410. wandb.termerror(f"Unknown token: {name}")
  411. return name
  412. # These are only used for ParallelCoordinatesPlot because it has weird backend names...
  413. def pc_front_to_back(self, name):
  414. """Convert ParallelCoordinatesPlot to backend field names.
  415. <!-- lazydoc-ignore: internal -->
  416. """
  417. name, *rest = name.split(".")
  418. rest = "." + ".".join(rest) if rest else ""
  419. if name is None:
  420. return None
  421. elif name in self.panel_metrics_helper.FRONTEND_NAME_MAPPING:
  422. return "summary:" + self.panel_metrics_helper.FRONTEND_NAME_MAPPING[name]
  423. elif name in self.FRONTEND_NAME_MAPPING:
  424. return self.FRONTEND_NAME_MAPPING[name]
  425. elif name in self.FRONTEND_NAME_MAPPING_REVERSED:
  426. return name
  427. elif name in self.run_set._runs_config:
  428. return f"config:{name}.value{rest}"
  429. else: # assume summary metrics
  430. return f"summary:{name}{rest}"
  431. def pc_back_to_front(self, name):
  432. """Convert backend backend field names to ParallelCoordinatesPlot names.
  433. <!-- lazydoc-ignore: internal -->
  434. """
  435. if name is None:
  436. return None
  437. elif "summary:" in name:
  438. name = name.replace("summary:", "")
  439. return self.panel_metrics_helper.FRONTEND_NAME_MAPPING_REVERSED.get(
  440. name, name
  441. )
  442. elif name in self.FRONTEND_NAME_MAPPING_REVERSED:
  443. return self.FRONTEND_NAME_MAPPING_REVERSED[name]
  444. elif name in self.FRONTEND_NAME_MAPPING:
  445. return name
  446. elif name.startswith("config:") and ".value" in name:
  447. return name.replace("config:", "").replace(".value", "")
  448. elif name.startswith("summary_metrics."):
  449. return name.replace("summary_metrics.", "")
  450. return name
  451. class PanelMetricsHelper:
  452. """Converts Python-style query expressions to MongoDB-style queries for W&B reports.
  453. <!-- lazydoc-ignore-class: internal -->
  454. """
  455. FRONTEND_NAME_MAPPING = {
  456. "Step": "_step",
  457. "Relative Time (Wall)": "_absolute_runtime",
  458. "Relative Time (Process)": "_runtime",
  459. "Wall Time": "_timestamp",
  460. }
  461. FRONTEND_NAME_MAPPING_REVERSED = {v: k for k, v in FRONTEND_NAME_MAPPING.items()}
  462. RUN_MAPPING = {"Created Timestamp": "createdAt", "Latest Timestamp": "heartbeatAt"}
  463. RUN_MAPPING_REVERSED = {v: k for k, v in RUN_MAPPING.items()}
  464. def front_to_back(self, name):
  465. """Convert frontend metric names to backend field names.
  466. <!-- lazydoc-ignore: internal -->
  467. """
  468. if name in self.FRONTEND_NAME_MAPPING:
  469. return self.FRONTEND_NAME_MAPPING[name]
  470. return name
  471. def back_to_front(self, name):
  472. """Convert backend field names to frontend metric names.
  473. <!-- lazydoc-ignore: internal -->
  474. """
  475. if name in self.FRONTEND_NAME_MAPPING_REVERSED:
  476. return self.FRONTEND_NAME_MAPPING_REVERSED[name]
  477. return name
  478. # ScatterPlot and ParallelCoords have weird conventions
  479. def special_front_to_back(self, name):
  480. """Convert frontend metric names to backend field names.
  481. <!-- lazydoc-ignore: internal -->
  482. """
  483. if name is None:
  484. return name
  485. name, *rest = name.split(".")
  486. rest = "." + ".".join(rest) if rest else ""
  487. # special case for config
  488. if name.startswith("c::"):
  489. name = name[3:]
  490. return f"config:{name}.value{rest}"
  491. # special case for summary
  492. if name.startswith("s::"):
  493. name = name[3:] + rest
  494. return f"summary:{name}"
  495. name = name + rest
  496. if name in self.RUN_MAPPING:
  497. return "run:" + self.RUN_MAPPING[name]
  498. if name in self.FRONTEND_NAME_MAPPING:
  499. return "summary:" + self.FRONTEND_NAME_MAPPING[name]
  500. if name == "Index":
  501. return name
  502. return "summary:" + name
  503. def special_back_to_front(self, name):
  504. """Convert backend field names to frontend metric names.
  505. <!-- lazydoc-ignore: internal -->
  506. """
  507. if name is not None:
  508. kind, rest = name.split(":", 1)
  509. if kind == "config":
  510. pieces = rest.split(".")
  511. if len(pieces) <= 1:
  512. raise ValueError(f"Invalid name: {name}")
  513. elif len(pieces) == 2:
  514. name = pieces[0]
  515. elif len(pieces) >= 3:
  516. name = pieces[:1] + pieces[2:]
  517. name = ".".join(name)
  518. return f"c::{name}"
  519. elif kind == "summary":
  520. name = rest
  521. return f"s::{name}"
  522. if name is None:
  523. return name
  524. elif "summary:" in name:
  525. name = name.replace("summary:", "")
  526. return self.FRONTEND_NAME_MAPPING_REVERSED.get(name, name)
  527. elif "run:" in name:
  528. name = name.replace("run:", "")
  529. return self.RUN_MAPPING_REVERSED[name]
  530. return name