|
|
|
|
|
|
|
|
|
|
|
"""Utility for reading some info from inside a zip file. |
|
""" |
|
|
|
import typing |
|
import zipfile |
|
|
|
from dataclasses import dataclass |
|
from functools import lru_cache |
|
from typing_extensions import Literal |
|
|
|
|
|
DEFAULT_SIZE = 32 |
|
MODE = Literal['r', 'w', 'x', 'a'] |
|
|
|
|
|
@dataclass(order=True) |
|
class PathInZip: |
|
"""Hold a path of file within a zip file. |
|
|
|
Args: |
|
path (str): The convention is <path_to_zip>:<relative_path_inside_zip>. |
|
Let's assume there is a zip file /some/location/foo.zip |
|
and inside of it is a json file located at /data/file1.json, |
|
Then we expect path = "/some/location/foo.zip:/data/file1.json". |
|
""" |
|
|
|
INFO_PATH_SEP = ':' |
|
zip_path: str |
|
file_path: str |
|
|
|
def __init__(self, path: str) -> None: |
|
split_path = path.split(self.INFO_PATH_SEP) |
|
assert len(split_path) == 2 |
|
self.zip_path, self.file_path = split_path |
|
|
|
@classmethod |
|
def from_paths(cls, zip_path: str, file_path: str): |
|
return cls(zip_path + cls.INFO_PATH_SEP + file_path) |
|
|
|
def __str__(self) -> str: |
|
return self.zip_path + self.INFO_PATH_SEP + self.file_path |
|
|
|
|
|
def _open_zip(path: str, mode: MODE = 'r'): |
|
return zipfile.ZipFile(path, mode) |
|
|
|
|
|
_cached_open_zip = lru_cache(DEFAULT_SIZE)(_open_zip) |
|
|
|
|
|
def set_zip_cache_size(max_size: int): |
|
"""Sets the maximal LRU caching for zip file opening. |
|
|
|
Args: |
|
max_size (int): the maximal LRU cache. |
|
""" |
|
global _cached_open_zip |
|
_cached_open_zip = lru_cache(max_size)(_open_zip) |
|
|
|
|
|
def open_file_in_zip(path_in_zip: PathInZip, mode: str = 'r') -> typing.IO: |
|
"""Opens a file stored inside a zip and returns a file-like object. |
|
|
|
Args: |
|
path_in_zip (PathInZip): A PathInZip object representing the file to return a file-like object of. |
|
mode (str): The mode in which to open the file with. |
|
Returns: |
|
A file-like object for PathInZip. |
|
""" |
|
zf = _cached_open_zip(path_in_zip.zip_path) |
|
return zf.open(path_in_zip.file_path) |
|
|