Gael Le Lan
Initial commit
9d0d223
raw
history blame
2.2 kB
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
"""Utility for reading some info from inside a zip file.
"""
import typing
import zipfile
from dataclasses import dataclass
from functools import lru_cache
from typing_extensions import Literal
DEFAULT_SIZE = 32
MODE = Literal['r', 'w', 'x', 'a']
@dataclass(order=True)
class PathInZip:
"""Hold a path of file within a zip file.
Args:
path (str): The convention is <path_to_zip>:<relative_path_inside_zip>.
Let's assume there is a zip file /some/location/foo.zip
and inside of it is a json file located at /data/file1.json,
Then we expect path = "/some/location/foo.zip:/data/file1.json".
"""
INFO_PATH_SEP = ':'
zip_path: str
file_path: str
def __init__(self, path: str) -> None:
split_path = path.split(self.INFO_PATH_SEP)
assert len(split_path) == 2
self.zip_path, self.file_path = split_path
@classmethod
def from_paths(cls, zip_path: str, file_path: str):
return cls(zip_path + cls.INFO_PATH_SEP + file_path)
def __str__(self) -> str:
return self.zip_path + self.INFO_PATH_SEP + self.file_path
def _open_zip(path: str, mode: MODE = 'r'):
return zipfile.ZipFile(path, mode)
_cached_open_zip = lru_cache(DEFAULT_SIZE)(_open_zip)
def set_zip_cache_size(max_size: int):
"""Sets the maximal LRU caching for zip file opening.
Args:
max_size (int): the maximal LRU cache.
"""
global _cached_open_zip
_cached_open_zip = lru_cache(max_size)(_open_zip)
def open_file_in_zip(path_in_zip: PathInZip, mode: str = 'r') -> typing.IO:
"""Opens a file stored inside a zip and returns a file-like object.
Args:
path_in_zip (PathInZip): A PathInZip object representing the file to return a file-like object of.
mode (str): The mode in which to open the file with.
Returns:
A file-like object for PathInZip.
"""
zf = _cached_open_zip(path_in_zip.zip_path)
return zf.open(path_in_zip.file_path)