| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741 |
- # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
- #
- # This module is part of GitPython and is released under the
- # 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
- from __future__ import annotations
- __all__ = ["GitMeta", "Git"]
- import contextlib
- import io
- import itertools
- import logging
- import os
- import re
- import signal
- import subprocess
- from subprocess import DEVNULL, PIPE, Popen
- import sys
- from textwrap import dedent
- import threading
- import warnings
- from git.compat import defenc, force_bytes, safe_decode
- from git.exc import (
- CommandError,
- GitCommandError,
- GitCommandNotFound,
- UnsafeOptionError,
- UnsafeProtocolError,
- )
- from git.util import (
- cygpath,
- expand_path,
- is_cygwin_git,
- patch_env,
- remove_password_if_present,
- stream_copy,
- )
- # typing ---------------------------------------------------------------------------
- from typing import (
- Any,
- AnyStr,
- BinaryIO,
- Callable,
- Dict,
- IO,
- Iterator,
- List,
- Mapping,
- Optional,
- Sequence,
- TYPE_CHECKING,
- TextIO,
- Tuple,
- Union,
- cast,
- overload,
- )
- if sys.version_info >= (3, 10):
- from typing import TypeAlias
- else:
- from typing_extensions import TypeAlias
- from git.types import Literal, PathLike, TBD
- if TYPE_CHECKING:
- from git.diff import DiffIndex
- from git.repo.base import Repo
- # ---------------------------------------------------------------------------------
- execute_kwargs = {
- "istream",
- "with_extended_output",
- "with_exceptions",
- "as_process",
- "output_stream",
- "stdout_as_string",
- "kill_after_timeout",
- "with_stdout",
- "universal_newlines",
- "shell",
- "env",
- "max_chunk_size",
- "strip_newline_in_stdout",
- }
- _logger = logging.getLogger(__name__)
- # ==============================================================================
- ## @name Utilities
- # ------------------------------------------------------------------------------
- # Documentation
- ## @{
- def handle_process_output(
- process: "Git.AutoInterrupt" | Popen,
- stdout_handler: Union[
- None,
- Callable[[AnyStr], None],
- Callable[[List[AnyStr]], None],
- Callable[[bytes, "Repo", "DiffIndex"], None],
- ],
- stderr_handler: Union[None, Callable[[AnyStr], None], Callable[[List[AnyStr]], None]],
- finalizer: Union[None, Callable[[Union[Popen, "Git.AutoInterrupt"]], None]] = None,
- decode_streams: bool = True,
- kill_after_timeout: Union[None, float] = None,
- ) -> None:
- R"""Register for notifications to learn that process output is ready to read, and
- dispatch lines to the respective line handlers.
- This function returns once the finalizer returns.
- :param process:
- :class:`subprocess.Popen` instance.
- :param stdout_handler:
- f(stdout_line_string), or ``None``.
- :param stderr_handler:
- f(stderr_line_string), or ``None``.
- :param finalizer:
- f(proc) - wait for proc to finish.
- :param decode_streams:
- Assume stdout/stderr streams are binary and decode them before pushing their
- contents to handlers.
- This defaults to ``True``. Set it to ``False`` if:
- - ``universal_newlines == True``, as then streams are in text mode, or
- - decoding must happen later, such as for :class:`~git.diff.Diff`\s.
- :param kill_after_timeout:
- :class:`float` or ``None``, Default = ``None``
- To specify a timeout in seconds for the git command, after which the process
- should be killed.
- """
- # Use 2 "pump" threads and wait for both to finish.
- def pump_stream(
- cmdline: List[str],
- name: str,
- stream: Union[BinaryIO, TextIO],
- is_decode: bool,
- handler: Union[None, Callable[[Union[bytes, str]], None]],
- ) -> None:
- try:
- for line in stream:
- if handler:
- if is_decode:
- assert isinstance(line, bytes)
- line_str = line.decode(defenc)
- handler(line_str)
- else:
- handler(line)
- except Exception as ex:
- _logger.error(f"Pumping {name!r} of cmd({remove_password_if_present(cmdline)}) failed due to: {ex!r}")
- if "I/O operation on closed file" not in str(ex):
- # Only reraise if the error was not due to the stream closing.
- raise CommandError([f"<{name}-pump>"] + remove_password_if_present(cmdline), ex) from ex
- finally:
- stream.close()
- if hasattr(process, "proc"):
- process = cast("Git.AutoInterrupt", process)
- cmdline: str | Tuple[str, ...] | List[str] = getattr(process.proc, "args", "")
- p_stdout = process.proc.stdout if process.proc else None
- p_stderr = process.proc.stderr if process.proc else None
- else:
- process = cast(Popen, process) # type: ignore[redundant-cast]
- cmdline = getattr(process, "args", "")
- p_stdout = process.stdout
- p_stderr = process.stderr
- if not isinstance(cmdline, (tuple, list)):
- cmdline = cmdline.split()
- pumps: List[Tuple[str, IO, Callable[..., None] | None]] = []
- if p_stdout:
- pumps.append(("stdout", p_stdout, stdout_handler))
- if p_stderr:
- pumps.append(("stderr", p_stderr, stderr_handler))
- threads: List[threading.Thread] = []
- for name, stream, handler in pumps:
- t = threading.Thread(target=pump_stream, args=(cmdline, name, stream, decode_streams, handler))
- t.daemon = True
- t.start()
- threads.append(t)
- # FIXME: Why join? Will block if stdin needs feeding...
- for t in threads:
- t.join(timeout=kill_after_timeout)
- if t.is_alive():
- if isinstance(process, Git.AutoInterrupt):
- process._terminate()
- else: # Don't want to deal with the other case.
- raise RuntimeError(
- "Thread join() timed out in cmd.handle_process_output()."
- f" kill_after_timeout={kill_after_timeout} seconds"
- )
- if stderr_handler:
- error_str: Union[str, bytes] = (
- f"error: process killed because it timed out. kill_after_timeout={kill_after_timeout} seconds"
- )
- if not decode_streams and isinstance(p_stderr, BinaryIO):
- # Assume stderr_handler needs binary input.
- error_str = cast(str, error_str)
- error_str = error_str.encode()
- # We ignore typing on the next line because mypy does not like the way
- # we inferred that stderr takes str or bytes.
- stderr_handler(error_str) # type: ignore[arg-type]
- if finalizer:
- finalizer(process)
- safer_popen: Callable[..., Popen]
- if sys.platform == "win32":
- def _safer_popen_windows(
- command: Union[str, Sequence[Any]],
- *,
- shell: bool = False,
- env: Optional[Mapping[str, str]] = None,
- **kwargs: Any,
- ) -> Popen:
- """Call :class:`subprocess.Popen` on Windows but don't include a CWD in the
- search.
- This avoids an untrusted search path condition where a file like ``git.exe`` in
- a malicious repository would be run when GitPython operates on the repository.
- The process using GitPython may have an untrusted repository's working tree as
- its current working directory. Some operations may temporarily change to that
- directory before running a subprocess. In addition, while by default GitPython
- does not run external commands with a shell, it can be made to do so, in which
- case the CWD of the subprocess, which GitPython usually sets to a repository
- working tree, can itself be searched automatically by the shell. This wrapper
- covers all those cases.
- :note:
- This currently works by setting the
- :envvar:`NoDefaultCurrentDirectoryInExePath` environment variable during
- subprocess creation. It also takes care of passing Windows-specific process
- creation flags, but that is unrelated to path search.
- :note:
- The current implementation contains a race condition on :attr:`os.environ`.
- GitPython isn't thread-safe, but a program using it on one thread should
- ideally be able to mutate :attr:`os.environ` on another, without
- unpredictable results. See comments in:
- https://github.com/gitpython-developers/GitPython/pull/1650
- """
- # CREATE_NEW_PROCESS_GROUP is needed for some ways of killing it afterwards.
- # https://docs.python.org/3/library/subprocess.html#subprocess.Popen.send_signal
- # https://docs.python.org/3/library/subprocess.html#subprocess.CREATE_NEW_PROCESS_GROUP
- creationflags = subprocess.CREATE_NO_WINDOW | subprocess.CREATE_NEW_PROCESS_GROUP
- # When using a shell, the shell is the direct subprocess, so the variable must
- # be set in its environment, to affect its search behavior.
- if shell:
- # The original may be immutable, or the caller may reuse it. Mutate a copy.
- env = {} if env is None else dict(env)
- env["NoDefaultCurrentDirectoryInExePath"] = "1" # The "1" can be any value.
- # When not using a shell, the current process does the search in a
- # CreateProcessW API call, so the variable must be set in our environment. With
- # a shell, that's unnecessary if https://github.com/python/cpython/issues/101283
- # is patched. In Python versions where it is unpatched, in the rare case the
- # ComSpec environment variable is unset, the search for the shell itself is
- # unsafe. Setting NoDefaultCurrentDirectoryInExePath in all cases, as done here,
- # is simpler and protects against that. (As above, the "1" can be any value.)
- with patch_env("NoDefaultCurrentDirectoryInExePath", "1"):
- return Popen(
- command,
- shell=shell,
- env=env,
- creationflags=creationflags,
- **kwargs,
- )
- safer_popen = _safer_popen_windows
- else:
- safer_popen = Popen
- def dashify(string: str) -> str:
- return string.replace("_", "-")
- def slots_to_dict(self: "Git", exclude: Sequence[str] = ()) -> Dict[str, Any]:
- return {s: getattr(self, s) for s in self.__slots__ if s not in exclude}
- def dict_to_slots_and__excluded_are_none(self: object, d: Mapping[str, Any], excluded: Sequence[str] = ()) -> None:
- for k, v in d.items():
- setattr(self, k, v)
- for k in excluded:
- setattr(self, k, None)
- ## -- End Utilities -- @}
- class _AutoInterrupt:
- """Process wrapper that terminates the wrapped process on finalization.
- This kills/interrupts the stored process instance once this instance goes out of
- scope. It is used to prevent processes piling up in case iterators stop reading.
- All attributes are wired through to the contained process object.
- The wait method is overridden to perform automatic status code checking and possibly
- raise.
- """
- __slots__ = ("proc", "args", "status")
- # If this is non-zero it will override any status code during _terminate, used
- # to prevent race conditions in testing.
- _status_code_if_terminate: int = 0
- def __init__(self, proc: Union[None, subprocess.Popen], args: Any) -> None:
- self.proc = proc
- self.args = args
- self.status: Union[int, None] = None
- def _terminate(self) -> None:
- """Terminate the underlying process."""
- if self.proc is None:
- return
- proc = self.proc
- self.proc = None
- if proc.stdin:
- proc.stdin.close()
- if proc.stdout:
- proc.stdout.close()
- if proc.stderr:
- proc.stderr.close()
- # Did the process finish already so we have a return code?
- try:
- if proc.poll() is not None:
- self.status = self._status_code_if_terminate or proc.poll()
- return
- except OSError as ex:
- _logger.info("Ignored error after process had died: %r", ex)
- # It can be that nothing really exists anymore...
- if os is None or getattr(os, "kill", None) is None:
- return
- # Try to kill it.
- try:
- proc.terminate()
- status = proc.wait() # Ensure the process goes away.
- self.status = self._status_code_if_terminate or status
- except OSError as ex:
- _logger.info("Ignored error after process had died: %r", ex)
- # END exception handling
- def __del__(self) -> None:
- self._terminate()
- def __getattr__(self, attr: str) -> Any:
- return getattr(self.proc, attr)
- # TODO: Bad choice to mimic `proc.wait()` but with different args.
- def wait(self, stderr: Union[None, str, bytes] = b"") -> int:
- """Wait for the process and return its status code.
- :param stderr:
- Previously read value of stderr, in case stderr is already closed.
- :warn:
- May deadlock if output or error pipes are used and not handled separately.
- :raise git.exc.GitCommandError:
- If the return status is not 0.
- """
- if stderr is None:
- stderr_b = b""
- stderr_b = force_bytes(data=stderr, encoding="utf-8")
- status: Union[int, None]
- if self.proc is not None:
- status = self.proc.wait()
- p_stderr = self.proc.stderr
- else: # Assume the underlying proc was killed earlier or never existed.
- status = self.status
- p_stderr = None
- def read_all_from_possibly_closed_stream(stream: Union[IO[bytes], None]) -> bytes:
- if stream:
- try:
- return stderr_b + force_bytes(stream.read())
- except (OSError, ValueError):
- return stderr_b or b""
- else:
- return stderr_b or b""
- # END status handling
- if status != 0:
- errstr = read_all_from_possibly_closed_stream(p_stderr)
- _logger.debug("AutoInterrupt wait stderr: %r" % (errstr,))
- raise GitCommandError(remove_password_if_present(self.args), status, errstr)
- return status
- _AutoInterrupt.__name__ = "AutoInterrupt"
- _AutoInterrupt.__qualname__ = "Git.AutoInterrupt"
- class _CatFileContentStream:
- """Object representing a sized read-only stream returning the contents of
- an object.
- This behaves like a stream, but counts the data read and simulates an empty stream
- once our sized content region is empty.
- If not all data are read to the end of the object's lifetime, we read the rest to
- ensure the underlying stream continues to work.
- """
- __slots__ = ("_stream", "_nbr", "_size")
- def __init__(self, size: int, stream: IO[bytes]) -> None:
- self._stream = stream
- self._size = size
- self._nbr = 0 # Number of bytes read.
- # Special case: If the object is empty, has null bytes, get the final
- # newline right away.
- if size == 0:
- stream.read(1)
- # END handle empty streams
- def read(self, size: int = -1) -> bytes:
- bytes_left = self._size - self._nbr
- if bytes_left == 0:
- return b""
- if size > -1:
- # Ensure we don't try to read past our limit.
- size = min(bytes_left, size)
- else:
- # They try to read all, make sure it's not more than what remains.
- size = bytes_left
- # END check early depletion
- data = self._stream.read(size)
- self._nbr += len(data)
- # Check for depletion, read our final byte to make the stream usable by
- # others.
- if self._size - self._nbr == 0:
- self._stream.read(1) # final newline
- # END finish reading
- return data
- def readline(self, size: int = -1) -> bytes:
- if self._nbr == self._size:
- return b""
- # Clamp size to lowest allowed value.
- bytes_left = self._size - self._nbr
- if size > -1:
- size = min(bytes_left, size)
- else:
- size = bytes_left
- # END handle size
- data = self._stream.readline(size)
- self._nbr += len(data)
- # Handle final byte.
- if self._size - self._nbr == 0:
- self._stream.read(1)
- # END finish reading
- return data
- def readlines(self, size: int = -1) -> List[bytes]:
- if self._nbr == self._size:
- return []
- # Leave all additional logic to our readline method, we just check the size.
- out = []
- nbr = 0
- while True:
- line = self.readline()
- if not line:
- break
- out.append(line)
- if size > -1:
- nbr += len(line)
- if nbr > size:
- break
- # END handle size constraint
- # END readline loop
- return out
- # skipcq: PYL-E0301
- def __iter__(self) -> "Git.CatFileContentStream":
- return self
- def __next__(self) -> bytes:
- line = self.readline()
- if not line:
- raise StopIteration
- return line
- next = __next__
- def __del__(self) -> None:
- bytes_left = self._size - self._nbr
- if bytes_left:
- # Read and discard - seeking is impossible within a stream.
- # This includes any terminating newline.
- self._stream.read(bytes_left + 1)
- # END handle incomplete read
- _CatFileContentStream.__name__ = "CatFileContentStream"
- _CatFileContentStream.__qualname__ = "Git.CatFileContentStream"
- _USE_SHELL_DEFAULT_MESSAGE = (
- "Git.USE_SHELL is deprecated, because only its default value of False is safe. "
- "It will be removed in a future release."
- )
- _USE_SHELL_DANGER_MESSAGE = (
- "Setting Git.USE_SHELL to True is unsafe and insecure, as the effect of special "
- "shell syntax cannot usually be accounted for. This can result in a command "
- "injection vulnerability and arbitrary code execution. Git.USE_SHELL is deprecated "
- "and will be removed in a future release."
- )
- def _warn_use_shell(*, extra_danger: bool) -> None:
- warnings.warn(
- _USE_SHELL_DANGER_MESSAGE if extra_danger else _USE_SHELL_DEFAULT_MESSAGE,
- DeprecationWarning,
- stacklevel=3,
- )
- class _GitMeta(type):
- """Metaclass for :class:`Git`.
- This helps issue :class:`DeprecationWarning` if :attr:`Git.USE_SHELL` is used.
- """
- def __getattribute(cls, name: str) -> Any:
- if name == "USE_SHELL":
- _warn_use_shell(extra_danger=False)
- return super().__getattribute__(name)
- def __setattr(cls, name: str, value: Any) -> Any:
- if name == "USE_SHELL":
- _warn_use_shell(extra_danger=value)
- super().__setattr__(name, value)
- if not TYPE_CHECKING:
- # To preserve static checking for undefined/misspelled attributes while letting
- # the methods' bodies be type-checked, these are defined as non-special methods,
- # then bound to special names out of view of static type checkers. (The original
- # names invoke name mangling (leading "__") to avoid confusion in other scopes.)
- __getattribute__ = __getattribute
- __setattr__ = __setattr
- GitMeta = _GitMeta
- """Alias of :class:`Git`'s metaclass, whether it is :class:`type` or a custom metaclass.
- Whether the :class:`Git` class has the default :class:`type` as its metaclass or uses a
- custom metaclass is not documented and may change at any time. This statically checkable
- metaclass alias is equivalent at runtime to ``type(Git)``. This should almost never be
- used. Code that benefits from it is likely to be remain brittle even if it is used.
- In view of the :class:`Git` class's intended use and :class:`Git` objects' dynamic
- callable attributes representing git subcommands, it rarely makes sense to inherit from
- :class:`Git` at all. Using :class:`Git` in multiple inheritance can be especially tricky
- to do correctly. Attempting uses of :class:`Git` where its metaclass is relevant, such
- as when a sibling class has an unrelated metaclass and a shared lower bound metaclass
- might have to be introduced to solve a metaclass conflict, is not recommended.
- :note:
- The correct static type of the :class:`Git` class itself, and any subclasses, is
- ``Type[Git]``. (This can be written as ``type[Git]`` in Python 3.9 later.)
- :class:`GitMeta` should never be used in any annotation where ``Type[Git]`` is
- intended or otherwise possible to use. This alias is truly only for very rare and
- inherently precarious situations where it is necessary to deal with the metaclass
- explicitly.
- """
- class Git(metaclass=_GitMeta):
- """The Git class manages communication with the Git binary.
- It provides a convenient interface to calling the Git binary, such as in::
- g = Git( git_dir )
- g.init() # calls 'git init' program
- rval = g.ls_files() # calls 'git ls-files' program
- Debugging:
- * Set the :envvar:`GIT_PYTHON_TRACE` environment variable to print each invocation
- of the command to stdout.
- * Set its value to ``full`` to see details about the returned values.
- """
- __slots__ = (
- "_working_dir",
- "cat_file_all",
- "cat_file_header",
- "_version_info",
- "_version_info_token",
- "_git_options",
- "_persistent_git_options",
- "_environment",
- )
- _excluded_ = (
- "cat_file_all",
- "cat_file_header",
- "_version_info",
- "_version_info_token",
- )
- re_unsafe_protocol = re.compile(r"(.+)::.+")
- def __getstate__(self) -> Dict[str, Any]:
- return slots_to_dict(self, exclude=self._excluded_)
- def __setstate__(self, d: Dict[str, Any]) -> None:
- dict_to_slots_and__excluded_are_none(self, d, excluded=self._excluded_)
- # CONFIGURATION
- git_exec_name = "git"
- """Default git command that should work on Linux, Windows, and other systems."""
- GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False)
- """Enables debugging of GitPython's git commands."""
- USE_SHELL: bool = False
- """Deprecated. If set to ``True``, a shell will be used when executing git commands.
- Code that uses ``USE_SHELL = True`` or that passes ``shell=True`` to any GitPython
- functions should be updated to use the default value of ``False`` instead. ``True``
- is unsafe unless the effect of syntax treated specially by the shell is fully
- considered and accounted for, which is not possible under most circumstances. As
- detailed below, it is also no longer needed, even where it had been in the past.
- It is in many if not most cases a command injection vulnerability for an application
- to set :attr:`USE_SHELL` to ``True``. Any attacker who can cause a specially crafted
- fragment of text to make its way into any part of any argument to any git command
- (including paths, branch names, etc.) can cause the shell to read and write
- arbitrary files and execute arbitrary commands. Innocent input may also accidentally
- contain special shell syntax, leading to inadvertent malfunctions.
- In addition, how a value of ``True`` interacts with some aspects of GitPython's
- operation is not precisely specified and may change without warning, even before
- GitPython 4.0.0 when :attr:`USE_SHELL` may be removed. This includes:
- * Whether or how GitPython automatically customizes the shell environment.
- * Whether, outside of Windows (where :class:`subprocess.Popen` supports lists of
- separate arguments even when ``shell=True``), this can be used with any GitPython
- functionality other than direct calls to the :meth:`execute` method.
- * Whether any GitPython feature that runs git commands ever attempts to partially
- sanitize data a shell may treat specially. Currently this is not done.
- Prior to GitPython 2.0.8, this had a narrow purpose in suppressing console windows
- in graphical Windows applications. In 2.0.8 and higher, it provides no benefit, as
- GitPython solves that problem more robustly and safely by using the
- ``CREATE_NO_WINDOW`` process creation flag on Windows.
- Because Windows path search differs subtly based on whether a shell is used, in rare
- cases changing this from ``True`` to ``False`` may keep an unusual git "executable",
- such as a batch file, from being found. To fix this, set the command name or full
- path in the :envvar:`GIT_PYTHON_GIT_EXECUTABLE` environment variable or pass the
- full path to :func:`git.refresh` (or invoke the script using a ``.exe`` shim).
- Further reading:
- * :meth:`Git.execute` (on the ``shell`` parameter).
- * https://github.com/gitpython-developers/GitPython/commit/0d9390866f9ce42870d3116094cd49e0019a970a
- * https://learn.microsoft.com/en-us/windows/win32/procthread/process-creation-flags
- * https://github.com/python/cpython/issues/91558#issuecomment-1100942950
- * https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-createprocessw
- """
- _git_exec_env_var = "GIT_PYTHON_GIT_EXECUTABLE"
- _refresh_env_var = "GIT_PYTHON_REFRESH"
- GIT_PYTHON_GIT_EXECUTABLE = None
- """Provide the full path to the git executable. Otherwise it assumes git is in the
- executable search path.
- :note:
- The git executable is actually found during the refresh step in the top level
- ``__init__``. It can also be changed by explicitly calling :func:`git.refresh`.
- """
- _refresh_token = object() # Since None would match an initial _version_info_token.
- @classmethod
- def refresh(cls, path: Union[None, PathLike] = None) -> bool:
- """Update information about the git executable :class:`Git` objects will use.
- Called by the :func:`git.refresh` function in the top level ``__init__``.
- :param path:
- Optional path to the git executable. If not absolute, it is resolved
- immediately, relative to the current directory. (See note below.)
- :note:
- The top-level :func:`git.refresh` should be preferred because it calls this
- method and may also update other state accordingly.
- :note:
- There are three different ways to specify the command that refreshing causes
- to be used for git:
- 1. Pass no `path` argument and do not set the
- :envvar:`GIT_PYTHON_GIT_EXECUTABLE` environment variable. The command
- name ``git`` is used. It is looked up in a path search by the system, in
- each command run (roughly similar to how git is found when running
- ``git`` commands manually). This is usually the desired behavior.
- 2. Pass no `path` argument but set the :envvar:`GIT_PYTHON_GIT_EXECUTABLE`
- environment variable. The command given as the value of that variable is
- used. This may be a simple command or an arbitrary path. It is looked up
- in each command run. Setting :envvar:`GIT_PYTHON_GIT_EXECUTABLE` to
- ``git`` has the same effect as not setting it.
- 3. Pass a `path` argument. This path, if not absolute, is immediately
- resolved, relative to the current directory. This resolution occurs at
- the time of the refresh. When git commands are run, they are run using
- that previously resolved path. If a `path` argument is passed, the
- :envvar:`GIT_PYTHON_GIT_EXECUTABLE` environment variable is not
- consulted.
- :note:
- Refreshing always sets the :attr:`Git.GIT_PYTHON_GIT_EXECUTABLE` class
- attribute, which can be read on the :class:`Git` class or any of its
- instances to check what command is used to run git. This attribute should
- not be confused with the related :envvar:`GIT_PYTHON_GIT_EXECUTABLE`
- environment variable. The class attribute is set no matter how refreshing is
- performed.
- """
- # Discern which path to refresh with.
- if path is not None:
- new_git = os.path.expanduser(path)
- new_git = os.path.abspath(new_git)
- else:
- new_git = os.environ.get(cls._git_exec_env_var, cls.git_exec_name)
- # Keep track of the old and new git executable path.
- old_git = cls.GIT_PYTHON_GIT_EXECUTABLE
- old_refresh_token = cls._refresh_token
- cls.GIT_PYTHON_GIT_EXECUTABLE = new_git
- cls._refresh_token = object()
- # Test if the new git executable path is valid. A GitCommandNotFound error is
- # raised by us. A PermissionError is raised if the git executable cannot be
- # executed for whatever reason.
- has_git = False
- try:
- cls().version()
- has_git = True
- except (GitCommandNotFound, PermissionError):
- pass
- # Warn or raise exception if test failed.
- if not has_git:
- err = (
- dedent(
- """\
- Bad git executable.
- The git executable must be specified in one of the following ways:
- - be included in your $PATH
- - be set via $%s
- - explicitly set via git.refresh(<full-path-to-git-executable>)
- """
- )
- % cls._git_exec_env_var
- )
- # Revert to whatever the old_git was.
- cls.GIT_PYTHON_GIT_EXECUTABLE = old_git
- cls._refresh_token = old_refresh_token
- if old_git is None:
- # On the first refresh (when GIT_PYTHON_GIT_EXECUTABLE is None) we only
- # are quiet, warn, or error depending on the GIT_PYTHON_REFRESH value.
- # Determine what the user wants to happen during the initial refresh. We
- # expect GIT_PYTHON_REFRESH to either be unset or be one of the
- # following values:
- #
- # 0|q|quiet|s|silence|silent|n|none
- # 1|w|warn|warning|l|log
- # 2|r|raise|e|error|exception
- mode = os.environ.get(cls._refresh_env_var, "raise").lower()
- quiet = ["quiet", "q", "silence", "s", "silent", "none", "n", "0"]
- warn = ["warn", "w", "warning", "log", "l", "1"]
- error = ["error", "e", "exception", "raise", "r", "2"]
- if mode in quiet:
- pass
- elif mode in warn or mode in error:
- err = dedent(
- """\
- %s
- All git commands will error until this is rectified.
- This initial message can be silenced or aggravated in the future by setting the
- $%s environment variable. Use one of the following values:
- - %s: for no message or exception
- - %s: for a warning message (logging level CRITICAL, displayed by default)
- - %s: for a raised exception
- Example:
- export %s=%s
- """
- ) % (
- err,
- cls._refresh_env_var,
- "|".join(quiet),
- "|".join(warn),
- "|".join(error),
- cls._refresh_env_var,
- quiet[0],
- )
- if mode in warn:
- _logger.critical(err)
- else:
- raise ImportError(err)
- else:
- err = dedent(
- """\
- %s environment variable has been set but it has been set with an invalid value.
- Use only the following values:
- - %s: for no message or exception
- - %s: for a warning message (logging level CRITICAL, displayed by default)
- - %s: for a raised exception
- """
- ) % (
- cls._refresh_env_var,
- "|".join(quiet),
- "|".join(warn),
- "|".join(error),
- )
- raise ImportError(err)
- # We get here if this was the initial refresh and the refresh mode was
- # not error. Go ahead and set the GIT_PYTHON_GIT_EXECUTABLE such that we
- # discern the difference between the first refresh at import time
- # and subsequent calls to git.refresh or this refresh method.
- cls.GIT_PYTHON_GIT_EXECUTABLE = cls.git_exec_name
- else:
- # After the first refresh (when GIT_PYTHON_GIT_EXECUTABLE is no longer
- # None) we raise an exception.
- raise GitCommandNotFound(new_git, err)
- return has_git
- @classmethod
- def is_cygwin(cls) -> bool:
- return is_cygwin_git(cls.GIT_PYTHON_GIT_EXECUTABLE)
- @overload
- @classmethod
- def polish_url(cls, url: str, is_cygwin: Literal[False] = ...) -> str: ...
- @overload
- @classmethod
- def polish_url(cls, url: str, is_cygwin: Union[None, bool] = None) -> str: ...
- @classmethod
- def polish_url(cls, url: str, is_cygwin: Union[None, bool] = None) -> PathLike:
- """Remove any backslashes from URLs to be written in config files.
- Windows might create config files containing paths with backslashes, but git
- stops liking them as it will escape the backslashes. Hence we undo the escaping
- just to be sure.
- """
- if is_cygwin is None:
- is_cygwin = cls.is_cygwin()
- if is_cygwin:
- url = cygpath(url)
- else:
- url = os.path.expandvars(url)
- if url.startswith("~"):
- url = os.path.expanduser(url)
- url = url.replace("\\\\", "\\").replace("\\", "/")
- return url
- @classmethod
- def check_unsafe_protocols(cls, url: str) -> None:
- """Check for unsafe protocols.
- Apart from the usual protocols (http, git, ssh), Git allows "remote helpers"
- that have the form ``<transport>::<address>``. One of these helpers (``ext::``)
- can be used to invoke any arbitrary command.
- See:
- - https://git-scm.com/docs/gitremote-helpers
- - https://git-scm.com/docs/git-remote-ext
- """
- match = cls.re_unsafe_protocol.match(url)
- if match:
- protocol = match.group(1)
- raise UnsafeProtocolError(
- f"The `{protocol}::` protocol looks suspicious, use `allow_unsafe_protocols=True` to allow it."
- )
- @classmethod
- def check_unsafe_options(cls, options: List[str], unsafe_options: List[str]) -> None:
- """Check for unsafe options.
- Some options that are passed to ``git <command>`` can be used to execute
- arbitrary commands. These are blocked by default.
- """
- # Options can be of the form `foo`, `--foo bar`, or `--foo=bar`, so we need to
- # check if they start with "--foo" or if they are equal to "foo".
- bare_unsafe_options = [option.lstrip("-") for option in unsafe_options]
- for option in options:
- for unsafe_option, bare_option in zip(unsafe_options, bare_unsafe_options):
- if option.startswith(unsafe_option) or option == bare_option:
- raise UnsafeOptionError(
- f"{unsafe_option} is not allowed, use `allow_unsafe_options=True` to allow it."
- )
- AutoInterrupt: TypeAlias = _AutoInterrupt
- CatFileContentStream: TypeAlias = _CatFileContentStream
- def __init__(self, working_dir: Union[None, PathLike] = None) -> None:
- """Initialize this instance with:
- :param working_dir:
- Git directory we should work in. If ``None``, we always work in the current
- directory as returned by :func:`os.getcwd`.
- This is meant to be the working tree directory if available, or the
- ``.git`` directory in case of bare repositories.
- """
- super().__init__()
- self._working_dir = expand_path(working_dir)
- self._git_options: Union[List[str], Tuple[str, ...]] = ()
- self._persistent_git_options: List[str] = []
- # Extra environment variables to pass to git commands
- self._environment: Dict[str, str] = {}
- # Cached version slots
- self._version_info: Union[Tuple[int, ...], None] = None
- self._version_info_token: object = None
- # Cached command slots
- self.cat_file_header: Union[None, TBD] = None
- self.cat_file_all: Union[None, TBD] = None
- def __getattribute__(self, name: str) -> Any:
- if name == "USE_SHELL":
- _warn_use_shell(extra_danger=False)
- return super().__getattribute__(name)
- def __getattr__(self, name: str) -> Any:
- """A convenience method as it allows to call the command as if it was an object.
- :return:
- Callable object that will execute call :meth:`_call_process` with your
- arguments.
- """
- if name.startswith("_"):
- return super().__getattribute__(name)
- return lambda *args, **kwargs: self._call_process(name, *args, **kwargs)
- def set_persistent_git_options(self, **kwargs: Any) -> None:
- """Specify command line options to the git executable for subsequent
- subcommand calls.
- :param kwargs:
- A dict of keyword arguments.
- These arguments are passed as in :meth:`_call_process`, but will be passed
- to the git command rather than the subcommand.
- """
- self._persistent_git_options = self.transform_kwargs(split_single_char_options=True, **kwargs)
- @property
- def working_dir(self) -> Union[None, PathLike]:
- """:return: Git directory we are working on"""
- return self._working_dir
- @property
- def version_info(self) -> Tuple[int, ...]:
- """
- :return: Tuple with integers representing the major, minor and additional
- version numbers as parsed from :manpage:`git-version(1)`. Up to four fields
- are used.
- This value is generated on demand and is cached.
- """
- # Refreshing is global, but version_info caching is per-instance.
- refresh_token = self._refresh_token # Copy token in case of concurrent refresh.
- # Use the cached version if obtained after the most recent refresh.
- if self._version_info_token is refresh_token:
- assert self._version_info is not None, "Bug: corrupted token-check state"
- return self._version_info
- # Run "git version" and parse it.
- process_version = self._call_process("version")
- version_string = process_version.split(" ")[2]
- version_fields = version_string.split(".")[:4]
- leading_numeric_fields = itertools.takewhile(str.isdigit, version_fields)
- self._version_info = tuple(map(int, leading_numeric_fields))
- # This value will be considered valid until the next refresh.
- self._version_info_token = refresh_token
- return self._version_info
- @overload
- def execute(
- self,
- command: Union[str, Sequence[Any]],
- *,
- as_process: Literal[True],
- ) -> "AutoInterrupt": ...
- @overload
- def execute(
- self,
- command: Union[str, Sequence[Any]],
- *,
- as_process: Literal[False] = False,
- stdout_as_string: Literal[True],
- ) -> Union[str, Tuple[int, str, str]]: ...
- @overload
- def execute(
- self,
- command: Union[str, Sequence[Any]],
- *,
- as_process: Literal[False] = False,
- stdout_as_string: Literal[False] = False,
- ) -> Union[bytes, Tuple[int, bytes, str]]: ...
- @overload
- def execute(
- self,
- command: Union[str, Sequence[Any]],
- *,
- with_extended_output: Literal[False],
- as_process: Literal[False],
- stdout_as_string: Literal[True],
- ) -> str: ...
- @overload
- def execute(
- self,
- command: Union[str, Sequence[Any]],
- *,
- with_extended_output: Literal[False],
- as_process: Literal[False],
- stdout_as_string: Literal[False],
- ) -> bytes: ...
- def execute(
- self,
- command: Union[str, Sequence[Any]],
- istream: Union[None, BinaryIO] = None,
- with_extended_output: bool = False,
- with_exceptions: bool = True,
- as_process: bool = False,
- output_stream: Union[None, BinaryIO] = None,
- stdout_as_string: bool = True,
- kill_after_timeout: Union[None, float] = None,
- with_stdout: bool = True,
- universal_newlines: bool = False,
- shell: Union[None, bool] = None,
- env: Union[None, Mapping[str, str]] = None,
- max_chunk_size: int = io.DEFAULT_BUFFER_SIZE,
- strip_newline_in_stdout: bool = True,
- **subprocess_kwargs: Any,
- ) -> Union[str, bytes, Tuple[int, Union[str, bytes], str], AutoInterrupt]:
- R"""Handle executing the command, and consume and return the returned
- information (stdout).
- :param command:
- The command argument list to execute.
- It should be a sequence of program arguments, or a string. The
- program to execute is the first item in the args sequence or string.
- :param istream:
- Standard input filehandle passed to :class:`subprocess.Popen`.
- :param with_extended_output:
- Whether to return a (status, stdout, stderr) tuple.
- :param with_exceptions:
- Whether to raise an exception when git returns a non-zero status.
- :param as_process:
- Whether to return the created process instance directly from which
- streams can be read on demand. This will render `with_extended_output`
- and `with_exceptions` ineffective - the caller will have to deal with
- the details. It is important to note that the process will be placed
- into an :class:`AutoInterrupt` wrapper that will interrupt the process
- once it goes out of scope. If you use the command in iterators, you
- should pass the whole process instance instead of a single stream.
- :param output_stream:
- If set to a file-like object, data produced by the git command will be
- copied to the given stream instead of being returned as a string.
- This feature only has any effect if `as_process` is ``False``.
- :param stdout_as_string:
- If ``False``, the command's standard output will be bytes. Otherwise, it
- will be decoded into a string using the default encoding (usually UTF-8).
- The latter can fail, if the output contains binary data.
- :param kill_after_timeout:
- Specifies a timeout in seconds for the git command, after which the process
- should be killed. This will have no effect if `as_process` is set to
- ``True``. It is set to ``None`` by default and will let the process run
- until the timeout is explicitly specified. Uses of this feature should be
- carefully considered, due to the following limitations:
- 1. This feature is not supported at all on Windows.
- 2. Effectiveness may vary by operating system. ``ps --ppid`` is used to
- enumerate child processes, which is available on most GNU/Linux systems
- but not most others.
- 3. Deeper descendants do not receive signals, though they may sometimes
- terminate as a consequence of their parent processes being killed.
- 4. `kill_after_timeout` uses ``SIGKILL``, which can have negative side
- effects on a repository. For example, stale locks in case of
- :manpage:`git-gc(1)` could render the repository incapable of accepting
- changes until the lock is manually removed.
- :param with_stdout:
- If ``True``, default ``True``, we open stdout on the created process.
- :param universal_newlines:
- If ``True``, pipes will be opened as text, and lines are split at all known
- line endings.
- :param shell:
- Whether to invoke commands through a shell
- (see :class:`Popen(..., shell=True) <subprocess.Popen>`).
- If this is not ``None``, it overrides :attr:`USE_SHELL`.
- Passing ``shell=True`` to this or any other GitPython function should be
- avoided, as it is unsafe under most circumstances. This is because it is
- typically not feasible to fully consider and account for the effect of shell
- expansions, especially when passing ``shell=True`` to other methods that
- forward it to :meth:`Git.execute`. Passing ``shell=True`` is also no longer
- needed (nor useful) to work around any known operating system specific
- issues.
- :param env:
- A dictionary of environment variables to be passed to
- :class:`subprocess.Popen`.
- :param max_chunk_size:
- Maximum number of bytes in one chunk of data passed to the `output_stream`
- in one invocation of its ``write()`` method. If the given number is not
- positive then the default value is used.
- :param strip_newline_in_stdout:
- Whether to strip the trailing ``\n`` of the command stdout.
- :param subprocess_kwargs:
- Keyword arguments to be passed to :class:`subprocess.Popen`. Please note
- that some of the valid kwargs are already set by this method; the ones you
- specify may not be the same ones.
- :return:
- * str(output), if `extended_output` is ``False`` (Default)
- * tuple(int(status), str(stdout), str(stderr)),
- if `extended_output` is ``True``
- If `output_stream` is ``True``, the stdout value will be your output stream:
- * output_stream, if `extended_output` is ``False``
- * tuple(int(status), output_stream, str(stderr)),
- if `extended_output` is ``True``
- Note that git is executed with ``LC_MESSAGES="C"`` to ensure consistent
- output regardless of system language.
- :raise git.exc.GitCommandError:
- :note:
- If you add additional keyword arguments to the signature of this method, you
- must update the ``execute_kwargs`` variable housed in this module.
- """
- # Remove password for the command if present.
- redacted_command = remove_password_if_present(command)
- if self.GIT_PYTHON_TRACE and (self.GIT_PYTHON_TRACE != "full" or as_process):
- _logger.info(" ".join(redacted_command))
- # Allow the user to have the command executed in their working dir.
- try:
- cwd = self._working_dir or os.getcwd() # type: Union[None, str]
- if not os.access(str(cwd), os.X_OK):
- cwd = None
- except FileNotFoundError:
- cwd = None
- # Start the process.
- inline_env = env
- env = os.environ.copy()
- # Attempt to force all output to plain ASCII English, which is what some parsing
- # code may expect.
- # According to https://askubuntu.com/a/311796, we are setting LANGUAGE as well
- # just to be sure.
- env["LANGUAGE"] = "C"
- env["LC_ALL"] = "C"
- env.update(self._environment)
- if inline_env is not None:
- env.update(inline_env)
- if sys.platform == "win32":
- if kill_after_timeout is not None:
- raise GitCommandError(
- redacted_command,
- '"kill_after_timeout" feature is not supported on Windows.',
- )
- cmd_not_found_exception = OSError
- else:
- cmd_not_found_exception = FileNotFoundError
- # END handle
- stdout_sink = PIPE if with_stdout else getattr(subprocess, "DEVNULL", None) or open(os.devnull, "wb")
- if shell is None:
- # Get the value of USE_SHELL with no deprecation warning. Do this without
- # warnings.catch_warnings, to avoid a race condition with application code
- # configuring warnings. The value could be looked up in type(self).__dict__
- # or Git.__dict__, but those can break under some circumstances. This works
- # the same as self.USE_SHELL in more situations; see Git.__getattribute__.
- shell = super().__getattribute__("USE_SHELL")
- _logger.debug(
- "Popen(%s, cwd=%s, stdin=%s, shell=%s, universal_newlines=%s)",
- redacted_command,
- cwd,
- "<valid stream>" if istream else "None",
- shell,
- universal_newlines,
- )
- try:
- proc = safer_popen(
- command,
- env=env,
- cwd=cwd,
- bufsize=-1,
- stdin=(istream or DEVNULL),
- stderr=PIPE,
- stdout=stdout_sink,
- shell=shell,
- universal_newlines=universal_newlines,
- encoding=defenc if universal_newlines else None,
- **subprocess_kwargs,
- )
- except cmd_not_found_exception as err:
- raise GitCommandNotFound(redacted_command, err) from err
- else:
- # Replace with a typeguard for Popen[bytes]?
- proc.stdout = cast(BinaryIO, proc.stdout)
- proc.stderr = cast(BinaryIO, proc.stderr)
- if as_process:
- return self.AutoInterrupt(proc, command)
- if sys.platform != "win32" and kill_after_timeout is not None:
- # Help mypy figure out this is not None even when used inside communicate().
- timeout = kill_after_timeout
- def kill_process(pid: int) -> None:
- """Callback to kill a process.
- This callback implementation would be ineffective and unsafe on Windows.
- """
- p = Popen(["ps", "--ppid", str(pid)], stdout=PIPE)
- child_pids = []
- if p.stdout is not None:
- for line in p.stdout:
- if len(line.split()) > 0:
- local_pid = (line.split())[0]
- if local_pid.isdigit():
- child_pids.append(int(local_pid))
- try:
- os.kill(pid, signal.SIGKILL)
- for child_pid in child_pids:
- try:
- os.kill(child_pid, signal.SIGKILL)
- except OSError:
- pass
- # Tell the main routine that the process was killed.
- kill_check.set()
- except OSError:
- # It is possible that the process gets completed in the duration
- # after timeout happens and before we try to kill the process.
- pass
- return
- def communicate() -> Tuple[AnyStr, AnyStr]:
- watchdog.start()
- out, err = proc.communicate()
- watchdog.cancel()
- if kill_check.is_set():
- err = 'Timeout: the command "%s" did not complete in %d secs.' % (
- " ".join(redacted_command),
- timeout,
- )
- if not universal_newlines:
- err = err.encode(defenc)
- return out, err
- # END helpers
- kill_check = threading.Event()
- watchdog = threading.Timer(timeout, kill_process, args=(proc.pid,))
- else:
- communicate = proc.communicate
- # Wait for the process to return.
- status = 0
- stdout_value: Union[str, bytes] = b""
- stderr_value: Union[str, bytes] = b""
- newline = "\n" if universal_newlines else b"\n"
- try:
- if output_stream is None:
- stdout_value, stderr_value = communicate()
- # Strip trailing "\n".
- if stdout_value.endswith(newline) and strip_newline_in_stdout: # type: ignore[arg-type]
- stdout_value = stdout_value[:-1]
- if stderr_value.endswith(newline): # type: ignore[arg-type]
- stderr_value = stderr_value[:-1]
- status = proc.returncode
- else:
- max_chunk_size = max_chunk_size if max_chunk_size and max_chunk_size > 0 else io.DEFAULT_BUFFER_SIZE
- stream_copy(proc.stdout, output_stream, max_chunk_size)
- stdout_value = proc.stdout.read()
- stderr_value = proc.stderr.read()
- # Strip trailing "\n".
- if stderr_value.endswith(newline): # type: ignore[arg-type]
- stderr_value = stderr_value[:-1]
- status = proc.wait()
- # END stdout handling
- finally:
- proc.stdout.close()
- proc.stderr.close()
- if self.GIT_PYTHON_TRACE == "full":
- cmdstr = " ".join(redacted_command)
- def as_text(stdout_value: Union[bytes, str]) -> str:
- return not output_stream and safe_decode(stdout_value) or "<OUTPUT_STREAM>"
- # END as_text
- if stderr_value:
- _logger.info(
- "%s -> %d; stdout: '%s'; stderr: '%s'",
- cmdstr,
- status,
- as_text(stdout_value),
- safe_decode(stderr_value),
- )
- elif stdout_value:
- _logger.info("%s -> %d; stdout: '%s'", cmdstr, status, as_text(stdout_value))
- else:
- _logger.info("%s -> %d", cmdstr, status)
- # END handle debug printing
- if with_exceptions and status != 0:
- raise GitCommandError(redacted_command, status, stderr_value, stdout_value)
- if isinstance(stdout_value, bytes) and stdout_as_string: # Could also be output_stream.
- stdout_value = safe_decode(stdout_value)
- # Allow access to the command's status code.
- if with_extended_output:
- return (status, stdout_value, safe_decode(stderr_value))
- else:
- return stdout_value
- def environment(self) -> Dict[str, str]:
- return self._environment
- def update_environment(self, **kwargs: Any) -> Dict[str, Union[str, None]]:
- """Set environment variables for future git invocations. Return all changed
- values in a format that can be passed back into this function to revert the
- changes.
- Examples::
- old_env = self.update_environment(PWD='/tmp')
- self.update_environment(**old_env)
- :param kwargs:
- Environment variables to use for git processes.
- :return:
- Dict that maps environment variables to their old values
- """
- old_env = {}
- for key, value in kwargs.items():
- # Set value if it is None.
- if value is not None:
- old_env[key] = self._environment.get(key)
- self._environment[key] = value
- # Remove key from environment if its value is None.
- elif key in self._environment:
- old_env[key] = self._environment[key]
- del self._environment[key]
- return old_env
- @contextlib.contextmanager
- def custom_environment(self, **kwargs: Any) -> Iterator[None]:
- """A context manager around the above :meth:`update_environment` method to
- restore the environment back to its previous state after operation.
- Examples::
- with self.custom_environment(GIT_SSH='/bin/ssh_wrapper'):
- repo.remotes.origin.fetch()
- :param kwargs:
- See :meth:`update_environment`.
- """
- old_env = self.update_environment(**kwargs)
- try:
- yield
- finally:
- self.update_environment(**old_env)
- def transform_kwarg(self, name: str, value: Any, split_single_char_options: bool) -> List[str]:
- if len(name) == 1:
- if value is True:
- return ["-%s" % name]
- elif value not in (False, None):
- if split_single_char_options:
- return ["-%s" % name, "%s" % value]
- else:
- return ["-%s%s" % (name, value)]
- else:
- if value is True:
- return ["--%s" % dashify(name)]
- elif value is not False and value is not None:
- return ["--%s=%s" % (dashify(name), value)]
- return []
- def transform_kwargs(self, split_single_char_options: bool = True, **kwargs: Any) -> List[str]:
- """Transform Python-style kwargs into git command line options."""
- args = []
- for k, v in kwargs.items():
- if isinstance(v, (list, tuple)):
- for value in v:
- args += self.transform_kwarg(k, value, split_single_char_options)
- else:
- args += self.transform_kwarg(k, v, split_single_char_options)
- return args
- @classmethod
- def _unpack_args(cls, arg_list: Sequence[str]) -> List[str]:
- outlist = []
- if isinstance(arg_list, (list, tuple)):
- for arg in arg_list:
- outlist.extend(cls._unpack_args(arg))
- else:
- outlist.append(str(arg_list))
- return outlist
- def __call__(self, **kwargs: Any) -> "Git":
- """Specify command line options to the git executable for a subcommand call.
- :param kwargs:
- A dict of keyword arguments.
- These arguments are passed as in :meth:`_call_process`, but will be passed
- to the git command rather than the subcommand.
- Examples::
- git(work_tree='/tmp').difftool()
- """
- self._git_options = self.transform_kwargs(split_single_char_options=True, **kwargs)
- return self
- @overload
- def _call_process(
- self, method: str, *args: None, **kwargs: None
- ) -> str: ... # If no args were given, execute the call with all defaults.
- @overload
- def _call_process(
- self,
- method: str,
- istream: int,
- as_process: Literal[True],
- *args: Any,
- **kwargs: Any,
- ) -> "Git.AutoInterrupt": ...
- @overload
- def _call_process(
- self, method: str, *args: Any, **kwargs: Any
- ) -> Union[str, bytes, Tuple[int, Union[str, bytes], str], "Git.AutoInterrupt"]: ...
- def _call_process(
- self, method: str, *args: Any, **kwargs: Any
- ) -> Union[str, bytes, Tuple[int, Union[str, bytes], str], "Git.AutoInterrupt"]:
- """Run the given git command with the specified arguments and return the result
- as a string.
- :param method:
- The command. Contained ``_`` characters will be converted to hyphens, such
- as in ``ls_files`` to call ``ls-files``.
- :param args:
- The list of arguments. If ``None`` is included, it will be pruned.
- This allows your commands to call git more conveniently, as ``None`` is
- realized as non-existent.
- :param kwargs:
- Contains key-values for the following:
- - The :meth:`execute()` kwds, as listed in ``execute_kwargs``.
- - "Command options" to be converted by :meth:`transform_kwargs`.
- - The ``insert_kwargs_after`` key which its value must match one of
- ``*args``.
- It also contains any command options, to be appended after the matched arg.
- Examples::
- git.rev_list('master', max_count=10, header=True)
- turns into::
- git rev-list max-count 10 --header master
- :return:
- Same as :meth:`execute`. If no args are given, used :meth:`execute`'s
- default (especially ``as_process = False``, ``stdout_as_string = True``) and
- return :class:`str`.
- """
- # Handle optional arguments prior to calling transform_kwargs.
- # Otherwise these'll end up in args, which is bad.
- exec_kwargs = {k: v for k, v in kwargs.items() if k in execute_kwargs}
- opts_kwargs = {k: v for k, v in kwargs.items() if k not in execute_kwargs}
- insert_after_this_arg = opts_kwargs.pop("insert_kwargs_after", None)
- # Prepare the argument list.
- opt_args = self.transform_kwargs(**opts_kwargs)
- ext_args = self._unpack_args([a for a in args if a is not None])
- if insert_after_this_arg is None:
- args_list = opt_args + ext_args
- else:
- try:
- index = ext_args.index(insert_after_this_arg)
- except ValueError as err:
- raise ValueError(
- "Couldn't find argument '%s' in args %s to insert cmd options after"
- % (insert_after_this_arg, str(ext_args))
- ) from err
- # END handle error
- args_list = ext_args[: index + 1] + opt_args + ext_args[index + 1 :]
- # END handle opts_kwargs
- call = [self.GIT_PYTHON_GIT_EXECUTABLE]
- # Add persistent git options.
- call.extend(self._persistent_git_options)
- # Add the git options, then reset to empty to avoid side effects.
- call.extend(self._git_options)
- self._git_options = ()
- call.append(dashify(method))
- call.extend(args_list)
- return self.execute(call, **exec_kwargs)
- def _parse_object_header(self, header_line: str) -> Tuple[str, str, int]:
- """
- :param header_line:
- A line of the form::
- <hex_sha> type_string size_as_int
- :return:
- (hex_sha, type_string, size_as_int)
- :raise ValueError:
- If the header contains indication for an error due to incorrect input sha.
- """
- tokens = header_line.split()
- if len(tokens) != 3:
- if not tokens:
- err_msg = (
- f"SHA is empty, possible dubious ownership in the repository "
- f"""at {self._working_dir}.\n If this is unintended run:\n\n """
- f""" "git config --global --add safe.directory {self._working_dir}" """
- )
- raise ValueError(err_msg)
- else:
- raise ValueError("SHA %s could not be resolved, git returned: %r" % (tokens[0], header_line.strip()))
- # END handle actual return value
- # END error handling
- if len(tokens[0]) != 40:
- raise ValueError("Failed to parse header: %r" % header_line)
- return (tokens[0], tokens[1], int(tokens[2]))
- def _prepare_ref(self, ref: AnyStr) -> bytes:
- # Required for command to separate refs on stdin, as bytes.
- if isinstance(ref, bytes):
- # Assume 40 bytes hexsha - bin-to-ascii for some reason returns bytes, not text.
- refstr: str = ref.decode("ascii")
- elif not isinstance(ref, str):
- refstr = str(ref) # Could be ref-object.
- else:
- refstr = ref
- if not refstr.endswith("\n"):
- refstr += "\n"
- return refstr.encode(defenc)
- def _get_persistent_cmd(self, attr_name: str, cmd_name: str, *args: Any, **kwargs: Any) -> "Git.AutoInterrupt":
- cur_val = getattr(self, attr_name)
- if cur_val is not None:
- return cur_val
- options = {"istream": PIPE, "as_process": True}
- options.update(kwargs)
- cmd = self._call_process(cmd_name, *args, **options)
- setattr(self, attr_name, cmd)
- cmd = cast("Git.AutoInterrupt", cmd)
- return cmd
- def __get_object_header(self, cmd: "Git.AutoInterrupt", ref: AnyStr) -> Tuple[str, str, int]:
- if cmd.stdin and cmd.stdout:
- cmd.stdin.write(self._prepare_ref(ref))
- cmd.stdin.flush()
- return self._parse_object_header(cmd.stdout.readline())
- else:
- raise ValueError("cmd stdin was empty")
- def get_object_header(self, ref: str) -> Tuple[str, str, int]:
- """Use this method to quickly examine the type and size of the object behind the
- given ref.
- :note:
- The method will only suffer from the costs of command invocation once and
- reuses the command in subsequent calls.
- :return:
- (hexsha, type_string, size_as_int)
- """
- cmd = self._get_persistent_cmd("cat_file_header", "cat_file", batch_check=True)
- return self.__get_object_header(cmd, ref)
- def get_object_data(self, ref: str) -> Tuple[str, str, int, bytes]:
- """Similar to :meth:`get_object_header`, but returns object data as well.
- :return:
- (hexsha, type_string, size_as_int, data_string)
- :note:
- Not threadsafe.
- """
- hexsha, typename, size, stream = self.stream_object_data(ref)
- data = stream.read(size)
- del stream
- return (hexsha, typename, size, data)
- def stream_object_data(self, ref: str) -> Tuple[str, str, int, "Git.CatFileContentStream"]:
- """Similar to :meth:`get_object_data`, but returns the data as a stream.
- :return:
- (hexsha, type_string, size_as_int, stream)
- :note:
- This method is not threadsafe. You need one independent :class:`Git`
- instance per thread to be safe!
- """
- cmd = self._get_persistent_cmd("cat_file_all", "cat_file", batch=True)
- hexsha, typename, size = self.__get_object_header(cmd, ref)
- cmd_stdout = cmd.stdout if cmd.stdout is not None else io.BytesIO()
- return (hexsha, typename, size, self.CatFileContentStream(size, cmd_stdout))
- def clear_cache(self) -> "Git":
- """Clear all kinds of internal caches to release resources.
- Currently persistent commands will be interrupted.
- :return:
- self
- """
- for cmd in (self.cat_file_all, self.cat_file_header):
- if cmd:
- cmd.__del__()
- self.cat_file_all = None
- self.cat_file_header = None
- return self
|