uri_utils.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. import os
  2. import urllib.parse
  3. from pathlib import Path
  4. from typing import Union
  5. class URI:
  6. """Represents a URI, supporting path appending and retrieving parent URIs.
  7. Example Usage:
  8. >>> s3_uri = URI("s3://bucket/a?scheme=http&param=1")
  9. >>> s3_uri
  10. URI<s3://bucket/a?scheme=http&param=1>
  11. >>> str(s3_uri / "b" / "c")
  12. 's3://bucket/a/b/c?scheme=http&param=1'
  13. >>> str(s3_uri.parent)
  14. 's3://bucket?scheme=http&param=1'
  15. >>> str(s3_uri)
  16. 's3://bucket/a?scheme=http&param=1'
  17. >>> s3_uri.parent.name, s3_uri.name
  18. ('bucket', 'a')
  19. >>> local_path = URI("/tmp/local")
  20. >>> str(local_path)
  21. '/tmp/local'
  22. >>> str(local_path.parent)
  23. '/tmp'
  24. >>> str(local_path / "b" / "c")
  25. '/tmp/local/b/c'
  26. Args:
  27. uri: The URI to represent.
  28. Ex: s3://bucket?scheme=http&endpoint_override=localhost%3A900
  29. Ex: file:///a/b/c/d
  30. """
  31. def __init__(self, uri: str):
  32. self._parsed = urllib.parse.urlparse(uri)
  33. if not self._parsed.scheme:
  34. # Just treat this as a regular path
  35. self._path = Path(uri)
  36. else:
  37. self._path = Path(os.path.normpath(self._parsed.netloc + self._parsed.path))
  38. def rstrip_subpath(self, subpath: Path) -> "URI":
  39. """Returns a new URI that strips the given subpath from the end of this URI.
  40. Example:
  41. >>> uri = URI("s3://bucket/a/b/c/?param=1")
  42. >>> str(uri.rstrip_subpath(Path("b/c")))
  43. 's3://bucket/a?param=1'
  44. >>> uri = URI("/tmp/a/b/c/")
  45. >>> str(uri.rstrip_subpath(Path("/b/c/.//")))
  46. '/tmp/a'
  47. """
  48. assert str(self._path).endswith(str(subpath)), (self._path, subpath)
  49. stripped_path = str(self._path).replace(str(subpath), "")
  50. return URI(self._get_str_representation(self._parsed, stripped_path))
  51. @property
  52. def name(self) -> str:
  53. return self._path.name
  54. @property
  55. def parent(self) -> "URI":
  56. assert self._path.parent != ".", f"{str(self)} has no valid parent URI"
  57. return URI(self._get_str_representation(self._parsed, self._path.parent))
  58. @property
  59. def scheme(self) -> str:
  60. return self._parsed.scheme
  61. @property
  62. def path(self) -> str:
  63. return str(self._path)
  64. def __truediv__(self, path_to_append):
  65. assert isinstance(path_to_append, str)
  66. return URI(
  67. self._get_str_representation(self._parsed, self._path / path_to_append)
  68. )
  69. @classmethod
  70. def _get_str_representation(
  71. cls, parsed_uri: urllib.parse.ParseResult, path: Union[str, Path]
  72. ) -> str:
  73. if not parsed_uri.scheme:
  74. return str(path)
  75. return parsed_uri._replace(netloc=str(path), path="").geturl()
  76. def __repr__(self):
  77. return f"URI<{str(self)}>"
  78. def __str__(self):
  79. return self._get_str_representation(self._parsed, self._path)
  80. def is_uri(path: str) -> bool:
  81. return bool(urllib.parse.urlparse(path).scheme)