|
import os |
|
|
|
import pygit2 |
|
|
|
from fsspec.spec import AbstractFileSystem |
|
|
|
from .memory import MemoryFile |
|
|
|
|
|
class GitFileSystem(AbstractFileSystem): |
|
"""Browse the files of a local git repo at any hash/tag/branch |
|
|
|
(experimental backend) |
|
""" |
|
|
|
root_marker = "" |
|
cachable = True |
|
|
|
def __init__(self, path=None, fo=None, ref=None, **kwargs): |
|
""" |
|
|
|
Parameters |
|
---------- |
|
path: str (optional) |
|
Local location of the repo (uses current directory if not given). |
|
May be deprecated in favour of ``fo``. When used with a higher |
|
level function such as fsspec.open(), may be of the form |
|
"git://[path-to-repo[:]][ref@]path/to/file" (but the actual |
|
file path should not contain "@" or ":"). |
|
fo: str (optional) |
|
Same as ``path``, but passed as part of a chained URL. This one |
|
takes precedence if both are given. |
|
ref: str (optional) |
|
Reference to work with, could be a hash, tag or branch name. Defaults |
|
to current working tree. Note that ``ls`` and ``open`` also take hash, |
|
so this becomes the default for those operations |
|
kwargs |
|
""" |
|
super().__init__(**kwargs) |
|
self.repo = pygit2.Repository(fo or path or os.getcwd()) |
|
self.ref = ref or "master" |
|
|
|
@classmethod |
|
def _strip_protocol(cls, path): |
|
path = super()._strip_protocol(path).lstrip("/") |
|
if ":" in path: |
|
path = path.split(":", 1)[1] |
|
if "@" in path: |
|
path = path.split("@", 1)[1] |
|
return path.lstrip("/") |
|
|
|
def _path_to_object(self, path, ref): |
|
comm, ref = self.repo.resolve_refish(ref or self.ref) |
|
parts = path.split("/") |
|
tree = comm.tree |
|
for part in parts: |
|
if part and isinstance(tree, pygit2.Tree): |
|
tree = tree[part] |
|
return tree |
|
|
|
@staticmethod |
|
def _get_kwargs_from_urls(path): |
|
if path.startswith("git://"): |
|
path = path[6:] |
|
out = {} |
|
if ":" in path: |
|
out["path"], path = path.split(":", 1) |
|
if "@" in path: |
|
out["ref"], path = path.split("@", 1) |
|
return out |
|
|
|
def ls(self, path, detail=True, ref=None, **kwargs): |
|
path = self._strip_protocol(path) |
|
tree = self._path_to_object(path, ref) |
|
if isinstance(tree, pygit2.Tree): |
|
out = [] |
|
for obj in tree: |
|
if isinstance(obj, pygit2.Tree): |
|
out.append( |
|
{ |
|
"type": "directory", |
|
"name": "/".join([path, obj.name]).lstrip("/"), |
|
"hex": obj.hex, |
|
"mode": f"{obj.filemode:o}", |
|
"size": 0, |
|
} |
|
) |
|
else: |
|
out.append( |
|
{ |
|
"type": "file", |
|
"name": "/".join([path, obj.name]).lstrip("/"), |
|
"hex": obj.hex, |
|
"mode": f"{obj.filemode:o}", |
|
"size": obj.size, |
|
} |
|
) |
|
else: |
|
obj = tree |
|
out = [ |
|
{ |
|
"type": "file", |
|
"name": obj.name, |
|
"hex": obj.hex, |
|
"mode": f"{obj.filemode:o}", |
|
"size": obj.size, |
|
} |
|
] |
|
if detail: |
|
return out |
|
return [o["name"] for o in out] |
|
|
|
def ukey(self, path, ref=None): |
|
return self.info(path, ref=ref)["hex"] |
|
|
|
def _open( |
|
self, |
|
path, |
|
mode="rb", |
|
block_size=None, |
|
autocommit=True, |
|
cache_options=None, |
|
ref=None, |
|
**kwargs, |
|
): |
|
obj = self._path_to_object(path, ref or self.ref) |
|
return MemoryFile(data=obj.data) |
|
|