git.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. import os
  2. import pygit2
  3. from fsspec.spec import AbstractFileSystem
  4. from .memory import MemoryFile
  5. class GitFileSystem(AbstractFileSystem):
  6. """Browse the files of a local git repo at any hash/tag/branch
  7. (experimental backend)
  8. """
  9. root_marker = ""
  10. cachable = True
  11. def __init__(self, path=None, fo=None, ref=None, **kwargs):
  12. """
  13. Parameters
  14. ----------
  15. path: str (optional)
  16. Local location of the repo (uses current directory if not given).
  17. May be deprecated in favour of ``fo``. When used with a higher
  18. level function such as fsspec.open(), may be of the form
  19. "git://[path-to-repo[:]][ref@]path/to/file" (but the actual
  20. file path should not contain "@" or ":").
  21. fo: str (optional)
  22. Same as ``path``, but passed as part of a chained URL. This one
  23. takes precedence if both are given.
  24. ref: str (optional)
  25. Reference to work with, could be a hash, tag or branch name. Defaults
  26. to current working tree. Note that ``ls`` and ``open`` also take hash,
  27. so this becomes the default for those operations
  28. kwargs
  29. """
  30. super().__init__(**kwargs)
  31. self.repo = pygit2.Repository(fo or path or os.getcwd())
  32. self.ref = ref or "master"
  33. @classmethod
  34. def _strip_protocol(cls, path):
  35. path = super()._strip_protocol(path).lstrip("/")
  36. if ":" in path:
  37. path = path.split(":", 1)[1]
  38. if "@" in path:
  39. path = path.split("@", 1)[1]
  40. return path.lstrip("/")
  41. def _path_to_object(self, path, ref):
  42. comm, ref = self.repo.resolve_refish(ref or self.ref)
  43. parts = path.split("/")
  44. tree = comm.tree
  45. for part in parts:
  46. if part and isinstance(tree, pygit2.Tree):
  47. if part not in tree:
  48. raise FileNotFoundError(path)
  49. tree = tree[part]
  50. return tree
  51. @staticmethod
  52. def _get_kwargs_from_urls(path):
  53. path = path.removeprefix("git://")
  54. out = {}
  55. if ":" in path:
  56. out["path"], path = path.split(":", 1)
  57. if "@" in path:
  58. out["ref"], path = path.split("@", 1)
  59. return out
  60. @staticmethod
  61. def _object_to_info(obj, path=None):
  62. # obj.name and obj.filemode are None for the root tree!
  63. is_dir = isinstance(obj, pygit2.Tree)
  64. return {
  65. "type": "directory" if is_dir else "file",
  66. "name": (
  67. "/".join([path, obj.name or ""]).lstrip("/") if path else obj.name
  68. ),
  69. "hex": str(obj.id),
  70. "mode": "100644" if obj.filemode is None else f"{obj.filemode:o}",
  71. "size": 0 if is_dir else obj.size,
  72. }
  73. def ls(self, path, detail=True, ref=None, **kwargs):
  74. tree = self._path_to_object(self._strip_protocol(path), ref)
  75. return [
  76. GitFileSystem._object_to_info(obj, path)
  77. if detail
  78. else GitFileSystem._object_to_info(obj, path)["name"]
  79. for obj in (tree if isinstance(tree, pygit2.Tree) else [tree])
  80. ]
  81. def info(self, path, ref=None, **kwargs):
  82. tree = self._path_to_object(self._strip_protocol(path), ref)
  83. return GitFileSystem._object_to_info(tree, path)
  84. def ukey(self, path, ref=None):
  85. return self.info(path, ref=ref)["hex"]
  86. def _open(
  87. self,
  88. path,
  89. mode="rb",
  90. block_size=None,
  91. autocommit=True,
  92. cache_options=None,
  93. ref=None,
  94. **kwargs,
  95. ):
  96. obj = self._path_to_object(path, ref or self.ref)
  97. return MemoryFile(data=obj.data)