Spaces:
Running
Running
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors | |
# | |
# This module is part of GitDB and is released under | |
# the New BSD License: https://opensource.org/license/bsd-3-clause/ | |
from gitdb.db.base import ( | |
FileDBBase, | |
ObjectDBR, | |
ObjectDBW | |
) | |
from gitdb.exc import ( | |
BadObject, | |
AmbiguousObjectName | |
) | |
from gitdb.stream import ( | |
DecompressMemMapReader, | |
FDCompressedSha1Writer, | |
FDStream, | |
Sha1Writer | |
) | |
from gitdb.base import ( | |
OStream, | |
OInfo | |
) | |
from gitdb.util import ( | |
file_contents_ro_filepath, | |
ENOENT, | |
hex_to_bin, | |
bin_to_hex, | |
exists, | |
chmod, | |
isfile, | |
remove, | |
rename, | |
dirname, | |
basename, | |
join | |
) | |
from gitdb.fun import ( | |
chunk_size, | |
loose_object_header_info, | |
write_object, | |
stream_copy | |
) | |
from gitdb.utils.encoding import force_bytes | |
import tempfile | |
import os | |
import sys | |
__all__ = ('LooseObjectDB', ) | |
class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW): | |
"""A database which operates on loose object files""" | |
# CONFIGURATION | |
# chunks in which data will be copied between streams | |
stream_chunk_size = chunk_size | |
# On windows we need to keep it writable, otherwise it cannot be removed | |
# either | |
new_objects_mode = int("444", 8) | |
if os.name == 'nt': | |
new_objects_mode = int("644", 8) | |
def __init__(self, root_path): | |
super().__init__(root_path) | |
self._hexsha_to_file = dict() | |
# Additional Flags - might be set to 0 after the first failure | |
# Depending on the root, this might work for some mounts, for others not, which | |
# is why it is per instance | |
self._fd_open_flags = getattr(os, 'O_NOATIME', 0) | |
#{ Interface | |
def object_path(self, hexsha): | |
""" | |
:return: path at which the object with the given hexsha would be stored, | |
relative to the database root""" | |
return join(hexsha[:2], hexsha[2:]) | |
def readable_db_object_path(self, hexsha): | |
""" | |
:return: readable object path to the object identified by hexsha | |
:raise BadObject: If the object file does not exist""" | |
try: | |
return self._hexsha_to_file[hexsha] | |
except KeyError: | |
pass | |
# END ignore cache misses | |
# try filesystem | |
path = self.db_path(self.object_path(hexsha)) | |
if exists(path): | |
self._hexsha_to_file[hexsha] = path | |
return path | |
# END handle cache | |
raise BadObject(hexsha) | |
def partial_to_complete_sha_hex(self, partial_hexsha): | |
""":return: 20 byte binary sha1 string which matches the given name uniquely | |
:param name: hexadecimal partial name (bytes or ascii string) | |
:raise AmbiguousObjectName: | |
:raise BadObject: """ | |
candidate = None | |
for binsha in self.sha_iter(): | |
if bin_to_hex(binsha).startswith(force_bytes(partial_hexsha)): | |
# it can't ever find the same object twice | |
if candidate is not None: | |
raise AmbiguousObjectName(partial_hexsha) | |
candidate = binsha | |
# END for each object | |
if candidate is None: | |
raise BadObject(partial_hexsha) | |
return candidate | |
#} END interface | |
def _map_loose_object(self, sha): | |
""" | |
:return: memory map of that file to allow random read access | |
:raise BadObject: if object could not be located""" | |
db_path = self.db_path(self.object_path(bin_to_hex(sha))) | |
try: | |
return file_contents_ro_filepath(db_path, flags=self._fd_open_flags) | |
except OSError as e: | |
if e.errno != ENOENT: | |
# try again without noatime | |
try: | |
return file_contents_ro_filepath(db_path) | |
except OSError as new_e: | |
raise BadObject(sha) from new_e | |
# didn't work because of our flag, don't try it again | |
self._fd_open_flags = 0 | |
else: | |
raise BadObject(sha) from e | |
# END handle error | |
# END exception handling | |
def set_ostream(self, stream): | |
""":raise TypeError: if the stream does not support the Sha1Writer interface""" | |
if stream is not None and not isinstance(stream, Sha1Writer): | |
raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__) | |
return super().set_ostream(stream) | |
def info(self, sha): | |
m = self._map_loose_object(sha) | |
try: | |
typ, size = loose_object_header_info(m) | |
return OInfo(sha, typ, size) | |
finally: | |
if hasattr(m, 'close'): | |
m.close() | |
# END assure release of system resources | |
def stream(self, sha): | |
m = self._map_loose_object(sha) | |
type, size, stream = DecompressMemMapReader.new(m, close_on_deletion=True) | |
return OStream(sha, type, size, stream) | |
def has_object(self, sha): | |
try: | |
self.readable_db_object_path(bin_to_hex(sha)) | |
return True | |
except BadObject: | |
return False | |
# END check existence | |
def store(self, istream): | |
"""note: The sha we produce will be hex by nature""" | |
tmp_path = None | |
writer = self.ostream() | |
if writer is None: | |
# open a tmp file to write the data to | |
fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path) | |
if istream.binsha is None: | |
writer = FDCompressedSha1Writer(fd) | |
else: | |
writer = FDStream(fd) | |
# END handle direct stream copies | |
# END handle custom writer | |
try: | |
try: | |
if istream.binsha is not None: | |
# copy as much as possible, the actual uncompressed item size might | |
# be smaller than the compressed version | |
stream_copy(istream.read, writer.write, sys.maxsize, self.stream_chunk_size) | |
else: | |
# write object with header, we have to make a new one | |
write_object(istream.type, istream.size, istream.read, writer.write, | |
chunk_size=self.stream_chunk_size) | |
# END handle direct stream copies | |
finally: | |
if tmp_path: | |
writer.close() | |
# END assure target stream is closed | |
except: | |
if tmp_path: | |
os.remove(tmp_path) | |
raise | |
# END assure tmpfile removal on error | |
hexsha = None | |
if istream.binsha: | |
hexsha = istream.hexsha | |
else: | |
hexsha = writer.sha(as_hex=True) | |
# END handle sha | |
if tmp_path: | |
obj_path = self.db_path(self.object_path(hexsha)) | |
obj_dir = dirname(obj_path) | |
os.makedirs(obj_dir, exist_ok=True) | |
# END handle destination directory | |
# rename onto existing doesn't work on NTFS | |
if isfile(obj_path): | |
remove(tmp_path) | |
else: | |
rename(tmp_path, obj_path) | |
# end rename only if needed | |
# make sure its readable for all ! It started out as rw-- tmp file | |
# but needs to be rwrr | |
chmod(obj_path, self.new_objects_mode) | |
# END handle dry_run | |
istream.binsha = hex_to_bin(hexsha) | |
return istream | |
def sha_iter(self): | |
# find all files which look like an object, extract sha from there | |
for root, dirs, files in os.walk(self.root_path()): | |
root_base = basename(root) | |
if len(root_base) != 2: | |
continue | |
for f in files: | |
if len(f) != 38: | |
continue | |
yield hex_to_bin(root_base + f) | |
# END for each file | |
# END for each walk iteration | |
def size(self): | |
return len(tuple(self.sha_iter())) | |