wandb_reference.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. """Support for parsing W&B URLs (which might be user provided) into constituent parts."""
  2. from __future__ import annotations
  3. from dataclasses import dataclass
  4. from enum import IntEnum
  5. from urllib.parse import urlparse
  6. PREFIX_HTTP = "http://"
  7. PREFIX_HTTPS = "https://"
  8. class ReferenceType(IntEnum):
  9. RUN = 1
  10. JOB = 2
  11. # Ideally we would not overload the URL paths as we do.
  12. # TODO: Not sure these are exhaustive, and even if so more special paths might get added.
  13. # Would be good to have restrictions that we could check.
  14. RESERVED_NON_ENTITIES = (
  15. "create-team",
  16. "fully-connected",
  17. "registry",
  18. "settings",
  19. "subscriptions",
  20. )
  21. RESERVED_NON_PROJECTS = (
  22. "likes",
  23. "projects",
  24. )
  25. RESERVED_JOB_PATHS = ("_view",)
  26. @dataclass
  27. class WandbReference:
  28. # TODO: This will include port, should we separate that out?
  29. host: str | None = None
  30. entity: str | None = None
  31. project: str | None = None
  32. # Set when we don't know how to parse yet
  33. path: str | None = None
  34. # Reference type will determine what other fields are set
  35. ref_type: ReferenceType | None = None
  36. run_id: str | None = None
  37. job_name: str | None = None
  38. job_alias: str = "latest" # In addition to an alias can be a version specifier
  39. def is_bare(self) -> bool:
  40. return self.host is None
  41. def is_job(self) -> bool:
  42. return self.ref_type == ReferenceType.JOB
  43. def is_run(self) -> bool:
  44. return self.ref_type == ReferenceType.RUN
  45. def is_job_or_run(self) -> bool:
  46. return self.is_job() or self.is_run()
  47. def job_reference(self) -> str:
  48. assert self.is_job()
  49. return f"{self.job_name}:{self.job_alias}"
  50. def job_reference_scoped(self) -> str:
  51. assert self.entity
  52. assert self.project
  53. unscoped = self.job_reference()
  54. return f"{self.entity}/{self.project}/{unscoped}"
  55. def url_host(self) -> str:
  56. return f"{PREFIX_HTTPS}{self.host}" if self.host else ""
  57. def url_entity(self) -> str:
  58. assert self.entity
  59. return f"{self.url_host()}/{self.entity}"
  60. def url_project(self) -> str:
  61. assert self.project
  62. return f"{self.url_entity()}/{self.project}"
  63. @staticmethod
  64. def parse(uri: str) -> WandbReference | None:
  65. """Attempt to parse a string as a W&B URL."""
  66. # TODO: Error if HTTP and host is not localhost?
  67. if (
  68. not uri.startswith("/")
  69. and not uri.startswith(PREFIX_HTTP)
  70. and not uri.startswith(PREFIX_HTTPS)
  71. ):
  72. return None
  73. ref = WandbReference()
  74. # This takes care of things like query and fragment
  75. parsed = urlparse(uri)
  76. if parsed.netloc:
  77. ref.host = parsed.netloc
  78. if not parsed.path.startswith("/"):
  79. return ref
  80. ref.path = parsed.path[1:]
  81. parts = ref.path.split("/")
  82. if len(parts) > 0 and parts[0] not in RESERVED_NON_ENTITIES:
  83. ref.path = None
  84. ref.entity = parts[0]
  85. if len(parts) > 1 and parts[1] not in RESERVED_NON_PROJECTS:
  86. ref.project = parts[1]
  87. if len(parts) > 3 and parts[2] == "runs":
  88. ref.ref_type = ReferenceType.RUN
  89. ref.run_id = parts[3]
  90. elif len(parts) > 4 and parts[2] == "artifacts" and parts[3] == "job":
  91. ref.ref_type = ReferenceType.JOB
  92. ref.job_name = parts[4]
  93. if len(parts) > 5 and parts[5] not in RESERVED_JOB_PATHS:
  94. ref.job_alias = parts[5]
  95. # TODO: Right now we are not tracking selection as part of URL state in the Jobs tab.
  96. # If that changes we'll want to update this.
  97. return ref
  98. @staticmethod
  99. def is_uri_job_or_run(uri: str) -> bool:
  100. ref = WandbReference.parse(uri)
  101. return bool(ref and ref.is_job_or_run())