fun.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471
  1. # This module is part of GitPython and is released under the
  2. # 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
  3. """Standalone functions to accompany the index implementation and make it more
  4. versatile."""
  5. __all__ = [
  6. "write_cache",
  7. "read_cache",
  8. "write_tree_from_cache",
  9. "entry_key",
  10. "stat_mode_to_index_mode",
  11. "S_IFGITLINK",
  12. "run_commit_hook",
  13. "hook_path",
  14. ]
  15. from io import BytesIO
  16. import os
  17. import os.path as osp
  18. from pathlib import Path
  19. from stat import S_IFDIR, S_IFLNK, S_IFMT, S_IFREG, S_ISDIR, S_ISLNK, S_IXUSR
  20. import subprocess
  21. import sys
  22. from gitdb.base import IStream
  23. from gitdb.typ import str_tree_type
  24. from git.cmd import handle_process_output, safer_popen
  25. from git.compat import defenc, force_bytes, force_text, safe_decode
  26. from git.exc import HookExecutionError, UnmergedEntriesError
  27. from git.objects.fun import (
  28. traverse_tree_recursive,
  29. traverse_trees_recursive,
  30. tree_to_stream,
  31. )
  32. from git.util import IndexFileSHA1Writer, finalize_process
  33. from .typ import CE_EXTENDED, BaseIndexEntry, IndexEntry, CE_NAMEMASK, CE_STAGESHIFT
  34. from .util import pack, unpack
  35. # typing -----------------------------------------------------------------------------
  36. from typing import Dict, IO, List, Sequence, TYPE_CHECKING, Tuple, Type, Union, cast
  37. from git.types import PathLike
  38. if TYPE_CHECKING:
  39. from git.db import GitCmdObjectDB
  40. from git.objects.tree import TreeCacheTup
  41. from .base import IndexFile
  42. # ------------------------------------------------------------------------------------
  43. S_IFGITLINK = S_IFLNK | S_IFDIR
  44. """Flags for a submodule."""
  45. CE_NAMEMASK_INV = ~CE_NAMEMASK
  46. def hook_path(name: str, git_dir: PathLike) -> str:
  47. """:return: path to the given named hook in the given git repository directory"""
  48. return osp.join(git_dir, "hooks", name)
  49. def _has_file_extension(path: str) -> str:
  50. return osp.splitext(path)[1]
  51. def run_commit_hook(name: str, index: "IndexFile", *args: str) -> None:
  52. """Run the commit hook of the given name. Silently ignore hooks that do not exist.
  53. :param name:
  54. Name of hook, like ``pre-commit``.
  55. :param index:
  56. :class:`~git.index.base.IndexFile` instance.
  57. :param args:
  58. Arguments passed to hook file.
  59. :raise git.exc.HookExecutionError:
  60. """
  61. hp = hook_path(name, index.repo.git_dir)
  62. if not os.access(hp, os.X_OK):
  63. return
  64. env = os.environ.copy()
  65. env["GIT_INDEX_FILE"] = safe_decode(os.fspath(index.path))
  66. env["GIT_EDITOR"] = ":"
  67. cmd = [hp]
  68. try:
  69. if sys.platform == "win32" and not _has_file_extension(hp):
  70. # Windows only uses extensions to determine how to open files
  71. # (doesn't understand shebangs). Try using bash to run the hook.
  72. relative_hp = Path(hp).relative_to(index.repo.working_dir).as_posix()
  73. cmd = ["bash.exe", relative_hp]
  74. process = safer_popen(
  75. cmd + list(args),
  76. env=env,
  77. stdout=subprocess.PIPE,
  78. stderr=subprocess.PIPE,
  79. cwd=index.repo.working_dir,
  80. )
  81. except Exception as ex:
  82. raise HookExecutionError(hp, ex) from ex
  83. else:
  84. stdout_list: List[str] = []
  85. stderr_list: List[str] = []
  86. handle_process_output(process, stdout_list.append, stderr_list.append, finalize_process)
  87. stdout = "".join(stdout_list)
  88. stderr = "".join(stderr_list)
  89. if process.returncode != 0:
  90. stdout = force_text(stdout, defenc)
  91. stderr = force_text(stderr, defenc)
  92. raise HookExecutionError(hp, process.returncode, stderr, stdout)
  93. # END handle return code
  94. def stat_mode_to_index_mode(mode: int) -> int:
  95. """Convert the given mode from a stat call to the corresponding index mode and
  96. return it."""
  97. if S_ISLNK(mode): # symlinks
  98. return S_IFLNK
  99. if S_ISDIR(mode) or S_IFMT(mode) == S_IFGITLINK: # submodules
  100. return S_IFGITLINK
  101. return S_IFREG | (mode & S_IXUSR and 0o755 or 0o644) # blobs with or without executable bit
  102. def write_cache(
  103. entries: Sequence[Union[BaseIndexEntry, "IndexEntry"]],
  104. stream: IO[bytes],
  105. extension_data: Union[None, bytes] = None,
  106. ShaStreamCls: Type[IndexFileSHA1Writer] = IndexFileSHA1Writer,
  107. ) -> None:
  108. """Write the cache represented by entries to a stream.
  109. :param entries:
  110. **Sorted** list of entries.
  111. :param stream:
  112. Stream to wrap into the AdapterStreamCls - it is used for final output.
  113. :param ShaStreamCls:
  114. Type to use when writing to the stream. It produces a sha while writing to it,
  115. before the data is passed on to the wrapped stream.
  116. :param extension_data:
  117. Any kind of data to write as a trailer, it must begin a 4 byte identifier,
  118. followed by its size (4 bytes).
  119. """
  120. # Wrap the stream into a compatible writer.
  121. stream_sha = ShaStreamCls(stream)
  122. tell = stream_sha.tell
  123. write = stream_sha.write
  124. # Header
  125. version = 3 if any(entry.extended_flags for entry in entries) else 2
  126. write(b"DIRC")
  127. write(pack(">LL", version, len(entries)))
  128. # Body
  129. for entry in entries:
  130. beginoffset = tell()
  131. write(entry.ctime_bytes) # ctime
  132. write(entry.mtime_bytes) # mtime
  133. path_str = str(entry.path)
  134. path: bytes = force_bytes(path_str, encoding=defenc)
  135. plen = len(path) & CE_NAMEMASK # Path length
  136. assert plen == len(path), "Path %s too long to fit into index" % entry.path
  137. flags = plen | (entry.flags & CE_NAMEMASK_INV) # Clear possible previous values.
  138. if entry.extended_flags:
  139. flags |= CE_EXTENDED
  140. write(
  141. pack(
  142. ">LLLLLL20sH",
  143. entry.dev,
  144. entry.inode,
  145. entry.mode,
  146. entry.uid,
  147. entry.gid,
  148. entry.size,
  149. entry.binsha,
  150. flags,
  151. )
  152. )
  153. if entry.extended_flags:
  154. write(pack(">H", entry.extended_flags))
  155. write(path)
  156. real_size = (tell() - beginoffset + 8) & ~7
  157. write(b"\0" * ((beginoffset + real_size) - tell()))
  158. # END for each entry
  159. # Write previously cached extensions data.
  160. if extension_data is not None:
  161. stream_sha.write(extension_data)
  162. # Write the sha over the content.
  163. stream_sha.write_sha()
  164. def read_header(stream: IO[bytes]) -> Tuple[int, int]:
  165. """Return tuple(version_long, num_entries) from the given stream."""
  166. type_id = stream.read(4)
  167. if type_id != b"DIRC":
  168. raise AssertionError("Invalid index file header: %r" % type_id)
  169. unpacked = cast(Tuple[int, int], unpack(">LL", stream.read(4 * 2)))
  170. version, num_entries = unpacked
  171. assert version in (1, 2, 3), "Unsupported git index version %i, only 1, 2, and 3 are supported" % version
  172. return version, num_entries
  173. def entry_key(*entry: Union[BaseIndexEntry, PathLike, int]) -> Tuple[PathLike, int]:
  174. """
  175. :return:
  176. Key suitable to be used for the
  177. :attr:`index.entries <git.index.base.IndexFile.entries>` dictionary.
  178. :param entry:
  179. One instance of type BaseIndexEntry or the path and the stage.
  180. """
  181. # def is_entry_key_tup(entry_key: Tuple) -> TypeGuard[Tuple[PathLike, int]]:
  182. # return isinstance(entry_key, tuple) and len(entry_key) == 2
  183. if len(entry) == 1:
  184. entry_first = entry[0]
  185. assert isinstance(entry_first, BaseIndexEntry)
  186. return (entry_first.path, entry_first.stage)
  187. else:
  188. # assert is_entry_key_tup(entry)
  189. entry = cast(Tuple[PathLike, int], entry)
  190. return entry
  191. # END handle entry
  192. def read_cache(
  193. stream: IO[bytes],
  194. ) -> Tuple[int, Dict[Tuple[PathLike, int], "IndexEntry"], bytes, bytes]:
  195. """Read a cache file from the given stream.
  196. :return:
  197. tuple(version, entries_dict, extension_data, content_sha)
  198. * *version* is the integer version number.
  199. * *entries_dict* is a dictionary which maps IndexEntry instances to a path at a
  200. stage.
  201. * *extension_data* is ``""`` or 4 bytes of type + 4 bytes of size + size bytes.
  202. * *content_sha* is a 20 byte sha on all cache file contents.
  203. """
  204. version, num_entries = read_header(stream)
  205. count = 0
  206. entries: Dict[Tuple[PathLike, int], "IndexEntry"] = {}
  207. read = stream.read
  208. tell = stream.tell
  209. while count < num_entries:
  210. beginoffset = tell()
  211. ctime = unpack(">8s", read(8))[0]
  212. mtime = unpack(">8s", read(8))[0]
  213. (dev, ino, mode, uid, gid, size, sha, flags) = unpack(">LLLLLL20sH", read(20 + 4 * 6 + 2))
  214. extended_flags = 0
  215. if flags & CE_EXTENDED:
  216. extended_flags = unpack(">H", read(2))[0]
  217. path_size = flags & CE_NAMEMASK
  218. path = read(path_size).decode(defenc)
  219. real_size = (tell() - beginoffset + 8) & ~7
  220. read((beginoffset + real_size) - tell())
  221. entry = IndexEntry((mode, sha, flags, path, ctime, mtime, dev, ino, uid, gid, size, extended_flags))
  222. # entry_key would be the method to use, but we save the effort.
  223. entries[(path, entry.stage)] = entry
  224. count += 1
  225. # END for each entry
  226. # The footer contains extension data and a sha on the content so far.
  227. # Keep the extension footer,and verify we have a sha in the end.
  228. # Extension data format is:
  229. # 4 bytes ID
  230. # 4 bytes length of chunk
  231. # Repeated 0 - N times
  232. extension_data = stream.read(~0)
  233. assert len(extension_data) > 19, (
  234. "Index Footer was not at least a sha on content as it was only %i bytes in size" % len(extension_data)
  235. )
  236. content_sha = extension_data[-20:]
  237. # Truncate the sha in the end as we will dynamically create it anyway.
  238. extension_data = extension_data[:-20]
  239. return (version, entries, extension_data, content_sha)
  240. def write_tree_from_cache(
  241. entries: List[IndexEntry], odb: "GitCmdObjectDB", sl: slice, si: int = 0
  242. ) -> Tuple[bytes, List["TreeCacheTup"]]:
  243. R"""Create a tree from the given sorted list of entries and put the respective
  244. trees into the given object database.
  245. :param entries:
  246. **Sorted** list of :class:`~git.index.typ.IndexEntry`\s.
  247. :param odb:
  248. Object database to store the trees in.
  249. :param si:
  250. Start index at which we should start creating subtrees.
  251. :param sl:
  252. Slice indicating the range we should process on the entries list.
  253. :return:
  254. tuple(binsha, list(tree_entry, ...))
  255. A tuple of a sha and a list of tree entries being a tuple of hexsha, mode, name.
  256. """
  257. tree_items: List["TreeCacheTup"] = []
  258. ci = sl.start
  259. end = sl.stop
  260. while ci < end:
  261. entry = entries[ci]
  262. if entry.stage != 0:
  263. raise UnmergedEntriesError(entry)
  264. # END abort on unmerged
  265. ci += 1
  266. rbound = entry.path.find("/", si)
  267. if rbound == -1:
  268. # It's not a tree.
  269. tree_items.append((entry.binsha, entry.mode, entry.path[si:]))
  270. else:
  271. # Find common base range.
  272. base = entry.path[si:rbound]
  273. xi = ci
  274. while xi < end:
  275. oentry = entries[xi]
  276. orbound = oentry.path.find("/", si)
  277. if orbound == -1 or oentry.path[si:orbound] != base:
  278. break
  279. # END abort on base mismatch
  280. xi += 1
  281. # END find common base
  282. # Enter recursion.
  283. # ci - 1 as we want to count our current item as well.
  284. sha, _tree_entry_list = write_tree_from_cache(entries, odb, slice(ci - 1, xi), rbound + 1)
  285. tree_items.append((sha, S_IFDIR, base))
  286. # Skip ahead.
  287. ci = xi
  288. # END handle bounds
  289. # END for each entry
  290. # Finally create the tree.
  291. sio = BytesIO()
  292. tree_to_stream(tree_items, sio.write) # Writes to stream as bytes, but doesn't change tree_items.
  293. sio.seek(0)
  294. istream = odb.store(IStream(str_tree_type, len(sio.getvalue()), sio))
  295. return (istream.binsha, tree_items)
  296. def _tree_entry_to_baseindexentry(tree_entry: "TreeCacheTup", stage: int) -> BaseIndexEntry:
  297. return BaseIndexEntry((tree_entry[1], tree_entry[0], stage << CE_STAGESHIFT, tree_entry[2]))
  298. def aggressive_tree_merge(odb: "GitCmdObjectDB", tree_shas: Sequence[bytes]) -> List[BaseIndexEntry]:
  299. R"""
  300. :return:
  301. List of :class:`~git.index.typ.BaseIndexEntry`\s representing the aggressive
  302. merge of the given trees. All valid entries are on stage 0, whereas the
  303. conflicting ones are left on stage 1, 2 or 3, whereas stage 1 corresponds to the
  304. common ancestor tree, 2 to our tree and 3 to 'their' tree.
  305. :param tree_shas:
  306. 1, 2 or 3 trees as identified by their binary 20 byte shas. If 1 or two, the
  307. entries will effectively correspond to the last given tree. If 3 are given, a 3
  308. way merge is performed.
  309. """
  310. out: List[BaseIndexEntry] = []
  311. # One and two way is the same for us, as we don't have to handle an existing
  312. # index, instrea
  313. if len(tree_shas) in (1, 2):
  314. for entry in traverse_tree_recursive(odb, tree_shas[-1], ""):
  315. out.append(_tree_entry_to_baseindexentry(entry, 0))
  316. # END for each entry
  317. return out
  318. # END handle single tree
  319. if len(tree_shas) > 3:
  320. raise ValueError("Cannot handle %i trees at once" % len(tree_shas))
  321. # Three trees.
  322. for base, ours, theirs in traverse_trees_recursive(odb, tree_shas, ""):
  323. if base is not None:
  324. # Base version exists.
  325. if ours is not None:
  326. # Ours exists.
  327. if theirs is not None:
  328. # It exists in all branches. Ff it was changed in both
  329. # its a conflict. Otherwise, we take the changed version.
  330. # This should be the most common branch, so it comes first.
  331. if (base[0] != ours[0] and base[0] != theirs[0] and ours[0] != theirs[0]) or (
  332. base[1] != ours[1] and base[1] != theirs[1] and ours[1] != theirs[1]
  333. ):
  334. # Changed by both.
  335. out.append(_tree_entry_to_baseindexentry(base, 1))
  336. out.append(_tree_entry_to_baseindexentry(ours, 2))
  337. out.append(_tree_entry_to_baseindexentry(theirs, 3))
  338. elif base[0] != ours[0] or base[1] != ours[1]:
  339. # Only we changed it.
  340. out.append(_tree_entry_to_baseindexentry(ours, 0))
  341. else:
  342. # Either nobody changed it, or they did. In either
  343. # case, use theirs.
  344. out.append(_tree_entry_to_baseindexentry(theirs, 0))
  345. # END handle modification
  346. else:
  347. if ours[0] != base[0] or ours[1] != base[1]:
  348. # They deleted it, we changed it, conflict.
  349. out.append(_tree_entry_to_baseindexentry(base, 1))
  350. out.append(_tree_entry_to_baseindexentry(ours, 2))
  351. # else:
  352. # # We didn't change it, ignore.
  353. # pass
  354. # END handle our change
  355. # END handle theirs
  356. else:
  357. if theirs is None:
  358. # Deleted in both, its fine - it's out.
  359. pass
  360. else:
  361. if theirs[0] != base[0] or theirs[1] != base[1]:
  362. # Deleted in ours, changed theirs, conflict.
  363. out.append(_tree_entry_to_baseindexentry(base, 1))
  364. out.append(_tree_entry_to_baseindexentry(theirs, 3))
  365. # END theirs changed
  366. # else:
  367. # # Theirs didn't change.
  368. # pass
  369. # END handle theirs
  370. # END handle ours
  371. else:
  372. # All three can't be None.
  373. if ours is None:
  374. # Added in their branch.
  375. assert theirs is not None
  376. out.append(_tree_entry_to_baseindexentry(theirs, 0))
  377. elif theirs is None:
  378. # Added in our branch.
  379. out.append(_tree_entry_to_baseindexentry(ours, 0))
  380. else:
  381. # Both have it, except for the base, see whether it changed.
  382. if ours[0] != theirs[0] or ours[1] != theirs[1]:
  383. out.append(_tree_entry_to_baseindexentry(ours, 2))
  384. out.append(_tree_entry_to_baseindexentry(theirs, 3))
  385. else:
  386. # It was added the same in both.
  387. out.append(_tree_entry_to_baseindexentry(ours, 0))
  388. # END handle two items
  389. # END handle heads
  390. # END handle base exists
  391. # END for each entries tuple
  392. return out