loose.py 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268
  1. # Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
  2. #
  3. # This module is part of GitDB and is released under
  4. # the New BSD License: https://opensource.org/license/bsd-3-clause/
  5. from contextlib import suppress
  6. from gitdb.db.base import (
  7. FileDBBase,
  8. ObjectDBR,
  9. ObjectDBW
  10. )
  11. from gitdb.exc import (
  12. BadObject,
  13. AmbiguousObjectName
  14. )
  15. from gitdb.stream import (
  16. DecompressMemMapReader,
  17. FDCompressedSha1Writer,
  18. FDStream,
  19. Sha1Writer
  20. )
  21. from gitdb.base import (
  22. OStream,
  23. OInfo
  24. )
  25. from gitdb.util import (
  26. file_contents_ro_filepath,
  27. ENOENT,
  28. hex_to_bin,
  29. bin_to_hex,
  30. exists,
  31. chmod,
  32. isfile,
  33. remove,
  34. rename,
  35. dirname,
  36. basename,
  37. join
  38. )
  39. from gitdb.fun import (
  40. chunk_size,
  41. loose_object_header_info,
  42. write_object,
  43. stream_copy
  44. )
  45. from gitdb.utils.encoding import force_bytes
  46. import tempfile
  47. import os
  48. import sys
  49. import time
  50. __all__ = ('LooseObjectDB', )
  51. class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW):
  52. """A database which operates on loose object files"""
  53. # CONFIGURATION
  54. # chunks in which data will be copied between streams
  55. stream_chunk_size = chunk_size
  56. # On windows we need to keep it writable, otherwise it cannot be removed
  57. # either
  58. new_objects_mode = int("444", 8)
  59. if os.name == 'nt':
  60. new_objects_mode = int("644", 8)
  61. def __init__(self, root_path):
  62. super().__init__(root_path)
  63. self._hexsha_to_file = dict()
  64. # Additional Flags - might be set to 0 after the first failure
  65. # Depending on the root, this might work for some mounts, for others not, which
  66. # is why it is per instance
  67. self._fd_open_flags = getattr(os, 'O_NOATIME', 0)
  68. #{ Interface
  69. def object_path(self, hexsha):
  70. """
  71. :return: path at which the object with the given hexsha would be stored,
  72. relative to the database root"""
  73. return join(hexsha[:2], hexsha[2:])
  74. def readable_db_object_path(self, hexsha):
  75. """
  76. :return: readable object path to the object identified by hexsha
  77. :raise BadObject: If the object file does not exist"""
  78. with suppress(KeyError):
  79. return self._hexsha_to_file[hexsha]
  80. # END ignore cache misses
  81. # try filesystem
  82. path = self.db_path(self.object_path(hexsha))
  83. if exists(path):
  84. self._hexsha_to_file[hexsha] = path
  85. return path
  86. # END handle cache
  87. raise BadObject(hexsha)
  88. def partial_to_complete_sha_hex(self, partial_hexsha):
  89. """:return: 20 byte binary sha1 string which matches the given name uniquely
  90. :param name: hexadecimal partial name (bytes or ascii string)
  91. :raise AmbiguousObjectName:
  92. :raise BadObject: """
  93. candidate = None
  94. for binsha in self.sha_iter():
  95. if bin_to_hex(binsha).startswith(force_bytes(partial_hexsha)):
  96. # it can't ever find the same object twice
  97. if candidate is not None:
  98. raise AmbiguousObjectName(partial_hexsha)
  99. candidate = binsha
  100. # END for each object
  101. if candidate is None:
  102. raise BadObject(partial_hexsha)
  103. return candidate
  104. #} END interface
  105. def _map_loose_object(self, sha):
  106. """
  107. :return: memory map of that file to allow random read access
  108. :raise BadObject: if object could not be located"""
  109. db_path = self.db_path(self.object_path(bin_to_hex(sha)))
  110. try:
  111. return file_contents_ro_filepath(db_path, flags=self._fd_open_flags)
  112. except OSError as e:
  113. if e.errno != ENOENT:
  114. # try again without noatime
  115. try:
  116. return file_contents_ro_filepath(db_path)
  117. except OSError as new_e:
  118. raise BadObject(sha) from new_e
  119. # didn't work because of our flag, don't try it again
  120. self._fd_open_flags = 0
  121. else:
  122. raise BadObject(sha) from e
  123. # END handle error
  124. # END exception handling
  125. def set_ostream(self, stream):
  126. """:raise TypeError: if the stream does not support the Sha1Writer interface"""
  127. if stream is not None and not isinstance(stream, Sha1Writer):
  128. raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__)
  129. return super().set_ostream(stream)
  130. def info(self, sha):
  131. m = self._map_loose_object(sha)
  132. try:
  133. typ, size = loose_object_header_info(m)
  134. return OInfo(sha, typ, size)
  135. finally:
  136. if hasattr(m, 'close'):
  137. m.close()
  138. # END assure release of system resources
  139. def stream(self, sha):
  140. m = self._map_loose_object(sha)
  141. type, size, stream = DecompressMemMapReader.new(m, close_on_deletion=True)
  142. return OStream(sha, type, size, stream)
  143. def has_object(self, sha):
  144. try:
  145. self.readable_db_object_path(bin_to_hex(sha))
  146. return True
  147. except BadObject:
  148. return False
  149. # END check existence
  150. def store(self, istream):
  151. """note: The sha we produce will be hex by nature"""
  152. tmp_path = None
  153. writer = self.ostream()
  154. if writer is None:
  155. # open a tmp file to write the data to
  156. fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)
  157. if istream.binsha is None:
  158. writer = FDCompressedSha1Writer(fd)
  159. else:
  160. writer = FDStream(fd)
  161. # END handle direct stream copies
  162. # END handle custom writer
  163. try:
  164. try:
  165. if istream.binsha is not None:
  166. # copy as much as possible, the actual uncompressed item size might
  167. # be smaller than the compressed version
  168. stream_copy(istream.read, writer.write, sys.maxsize, self.stream_chunk_size)
  169. else:
  170. # write object with header, we have to make a new one
  171. write_object(istream.type, istream.size, istream.read, writer.write,
  172. chunk_size=self.stream_chunk_size)
  173. # END handle direct stream copies
  174. finally:
  175. if tmp_path:
  176. writer.close()
  177. # END assure target stream is closed
  178. except:
  179. if tmp_path:
  180. remove(tmp_path)
  181. raise
  182. # END assure tmpfile removal on error
  183. hexsha = None
  184. if istream.binsha:
  185. hexsha = istream.hexsha
  186. else:
  187. hexsha = writer.sha(as_hex=True)
  188. # END handle sha
  189. if tmp_path:
  190. obj_path = self.db_path(self.object_path(hexsha))
  191. obj_dir = dirname(obj_path)
  192. os.makedirs(obj_dir, exist_ok=True)
  193. # END handle destination directory
  194. # rename onto existing doesn't work on NTFS
  195. if isfile(obj_path):
  196. remove(tmp_path)
  197. else:
  198. rename(tmp_path, obj_path)
  199. # end rename only if needed
  200. # Ensure rename is actually done and file is stable
  201. # Retry up to 14 times - exponential wait & retry in ms.
  202. # The total maximum wait time is 1000ms, which should be vastly enough for the
  203. # OS to return and commit the file to disk.
  204. for exp_backoff_ms in [1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 181]:
  205. with suppress(PermissionError):
  206. # make sure its readable for all ! It started out as rw-- tmp file
  207. # but needs to be rwrr
  208. chmod(obj_path, self.new_objects_mode)
  209. break
  210. time.sleep(exp_backoff_ms / 1000.0)
  211. else:
  212. raise PermissionError(
  213. "Impossible to apply `chmod` to file {}".format(obj_path)
  214. )
  215. # END handle dry_run
  216. istream.binsha = hex_to_bin(hexsha)
  217. return istream
  218. def sha_iter(self):
  219. # find all files which look like an object, extract sha from there
  220. for root, dirs, files in os.walk(self.root_path()):
  221. root_base = basename(root)
  222. if len(root_base) != 2:
  223. continue
  224. for f in files:
  225. if len(f) != 38:
  226. continue
  227. yield hex_to_bin(root_base + f)
  228. # END for each file
  229. # END for each walk iteration
  230. def size(self):
  231. return len(tuple(self.sha_iter()))