Spaces:
Running
Running
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors | |
# | |
# This module is part of GitDB and is released under | |
# the New BSD License: https://opensource.org/license/bsd-3-clause/ | |
"""Module containing a database to deal with packs""" | |
from gitdb.db.base import ( | |
FileDBBase, | |
ObjectDBR, | |
CachingDB | |
) | |
from gitdb.util import LazyMixin | |
from gitdb.exc import ( | |
BadObject, | |
UnsupportedOperation, | |
AmbiguousObjectName | |
) | |
from gitdb.pack import PackEntity | |
from functools import reduce | |
import os | |
import glob | |
__all__ = ('PackedDB', ) | |
#{ Utilities | |
class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin): | |
"""A database operating on a set of object packs""" | |
# sort the priority list every N queries | |
# Higher values are better, performance tests don't show this has | |
# any effect, but it should have one | |
_sort_interval = 500 | |
def __init__(self, root_path): | |
super().__init__(root_path) | |
# list of lists with three items: | |
# * hits - number of times the pack was hit with a request | |
# * entity - Pack entity instance | |
# * sha_to_index - PackIndexFile.sha_to_index method for direct cache query | |
# self._entities = list() # lazy loaded list | |
self._hit_count = 0 # amount of hits | |
self._st_mtime = 0 # last modification data of our root path | |
def _set_cache_(self, attr): | |
if attr == '_entities': | |
self._entities = list() | |
self.update_cache(force=True) | |
# END handle entities initialization | |
def _sort_entities(self): | |
self._entities.sort(key=lambda l: l[0], reverse=True) | |
def _pack_info(self, sha): | |
""":return: tuple(entity, index) for an item at the given sha | |
:param sha: 20 or 40 byte sha | |
:raise BadObject: | |
**Note:** This method is not thread-safe, but may be hit in multi-threaded | |
operation. The worst thing that can happen though is a counter that | |
was not incremented, or the list being in wrong order. So we safe | |
the time for locking here, lets see how that goes""" | |
# presort ? | |
if self._hit_count % self._sort_interval == 0: | |
self._sort_entities() | |
# END update sorting | |
for item in self._entities: | |
index = item[2](sha) | |
if index is not None: | |
item[0] += 1 # one hit for you | |
self._hit_count += 1 # general hit count | |
return (item[1], index) | |
# END index found in pack | |
# END for each item | |
# no hit, see whether we have to update packs | |
# NOTE: considering packs don't change very often, we safe this call | |
# and leave it to the super-caller to trigger that | |
raise BadObject(sha) | |
#{ Object DB Read | |
def has_object(self, sha): | |
try: | |
self._pack_info(sha) | |
return True | |
except BadObject: | |
return False | |
# END exception handling | |
def info(self, sha): | |
entity, index = self._pack_info(sha) | |
return entity.info_at_index(index) | |
def stream(self, sha): | |
entity, index = self._pack_info(sha) | |
return entity.stream_at_index(index) | |
def sha_iter(self): | |
for entity in self.entities(): | |
index = entity.index() | |
sha_by_index = index.sha | |
for index in range(index.size()): | |
yield sha_by_index(index) | |
# END for each index | |
# END for each entity | |
def size(self): | |
sizes = [item[1].index().size() for item in self._entities] | |
return reduce(lambda x, y: x + y, sizes, 0) | |
#} END object db read | |
#{ object db write | |
def store(self, istream): | |
"""Storing individual objects is not feasible as a pack is designed to | |
hold multiple objects. Writing or rewriting packs for single objects is | |
inefficient""" | |
raise UnsupportedOperation() | |
#} END object db write | |
#{ Interface | |
def update_cache(self, force=False): | |
""" | |
Update our cache with the actually existing packs on disk. Add new ones, | |
and remove deleted ones. We keep the unchanged ones | |
:param force: If True, the cache will be updated even though the directory | |
does not appear to have changed according to its modification timestamp. | |
:return: True if the packs have been updated so there is new information, | |
False if there was no change to the pack database""" | |
stat = os.stat(self.root_path()) | |
if not force and stat.st_mtime <= self._st_mtime: | |
return False | |
# END abort early on no change | |
self._st_mtime = stat.st_mtime | |
# packs are supposed to be prefixed with pack- by git-convention | |
# get all pack files, figure out what changed | |
pack_files = set(glob.glob(os.path.join(self.root_path(), "pack-*.pack"))) | |
our_pack_files = {item[1].pack().path() for item in self._entities} | |
# new packs | |
for pack_file in (pack_files - our_pack_files): | |
# init the hit-counter/priority with the size, a good measure for hit- | |
# probability. Its implemented so that only 12 bytes will be read | |
entity = PackEntity(pack_file) | |
self._entities.append([entity.pack().size(), entity, entity.index().sha_to_index]) | |
# END for each new packfile | |
# removed packs | |
for pack_file in (our_pack_files - pack_files): | |
del_index = -1 | |
for i, item in enumerate(self._entities): | |
if item[1].pack().path() == pack_file: | |
del_index = i | |
break | |
# END found index | |
# END for each entity | |
assert del_index != -1 | |
del(self._entities[del_index]) | |
# END for each removed pack | |
# reinitialize prioritiess | |
self._sort_entities() | |
return True | |
def entities(self): | |
""":return: list of pack entities operated upon by this database""" | |
return [item[1] for item in self._entities] | |
def partial_to_complete_sha(self, partial_binsha, canonical_length): | |
""":return: 20 byte sha as inferred by the given partial binary sha | |
:param partial_binsha: binary sha with less than 20 bytes | |
:param canonical_length: length of the corresponding canonical representation. | |
It is required as binary sha's cannot display whether the original hex sha | |
had an odd or even number of characters | |
:raise AmbiguousObjectName: | |
:raise BadObject: """ | |
candidate = None | |
for item in self._entities: | |
item_index = item[1].index().partial_sha_to_index(partial_binsha, canonical_length) | |
if item_index is not None: | |
sha = item[1].index().sha(item_index) | |
if candidate and candidate != sha: | |
raise AmbiguousObjectName(partial_binsha) | |
candidate = sha | |
# END handle full sha could be found | |
# END for each entity | |
if candidate: | |
return candidate | |
# still not found ? | |
raise BadObject(partial_binsha) | |
#} END interface | |