iiif_downloader / app.py
Gabriel's picture
Update app.py
741a077 verified
raw
history blame
5.79 kB
from concurrent.futures import ThreadPoolExecutor
import os
import requests
import shutil
import gradio as gr
from zipfile import ZipFile
import logging
from typing import IO
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
IIIF_URL = "https://lbiiif.riksarkivet.se" # "https://iiifintern.ra.se"
def get_image_ids(batch_id: str) -> list[str]:
"""A list of image IDs in the given batch"""
logging.info(f"Fetching image IDs for batch {batch_id}")
response = requests.get(f"{IIIF_URL}/arkis!{batch_id}/manifest")
response.raise_for_status()
response = response.json()
image_ids = []
for item in response.get("items", []):
id_parts = item["id"].split("!")
if len(id_parts) > 1:
image_id = id_parts[1][:14]
image_ids.append(image_id)
else:
logging.warning(f"Unexpected id format: {item['id']}")
if not image_ids:
raise ValueError("No images found in the manifest.")
logging.info(f"Found {len(image_ids)} images in batch {batch_id}")
return image_ids
def download_image(url: str, dest: str) -> None:
"""
Download an image
Arguments:
url: Image url
dest: Destination file name
"""
logging.info(f"Downloading image from {url} to {dest}")
response = requests.get(url, stream=True)
if response.status_code == 200:
with open(dest, "wb") as out_file:
shutil.copyfileobj(response.raw, out_file)
logging.info(f"Successfully downloaded image: {dest}")
else:
logging.error(f"Failed to download image from {url}. Status code: {response.status_code}")
raise Exception(f"Failed to download image from {url}. Status code: {response.status_code}")
del response
def download_image_by_image_id(image_id: str):
"""
Download the image with the given image ID
Creates a directory named after the batch ID and saves the image in
that directory.
"""
batch_id = image_id[:8]
os.makedirs(batch_id, exist_ok=True)
url = f"{IIIF_URL}/arkis!{image_id}/full/max/0/default.jpg"
dest = os.path.join(batch_id, image_id + ".jpg")
download_image(url, dest)
def rest_download_batch_images(batch_id: str) -> str:
image_ids = get_image_ids(batch_id)
def track_download(image_id):
download_image_by_image_id(image_id)
with ThreadPoolExecutor() as executor:
for _, image_id in enumerate(image_ids):
executor.submit(track_download, image_id)
zip_filename = f"{batch_id}.zip"
with ZipFile(zip_filename, 'w') as zipf:
for image_id in image_ids:
img_path = os.path.join(batch_id, f"{image_id}.jpg")
if os.path.exists(img_path):
zipf.write(img_path, arcname=os.path.basename(img_path))
return zip_filename
def download_batch_images(batch_id: str, progress=None):
logging.info(f"Starting download for batch {batch_id}")
if progress is not None:
progress(0, desc=f"Starting download for {batch_id}...")
image_ids = get_image_ids(batch_id)
total_images = len(image_ids)
for idx, image_id in enumerate(image_ids):
download_image_by_image_id(image_id)
logging.info(f"Downloaded image {image_id}")
if progress is not None:
current_progress = (idx + 1) / total_images
progress(current_progress, desc=f"Downloading {image_id}...")
logging.info(f"Zipping downloaded images for batch {batch_id}")
zip_filename = f"{batch_id}.zip"
with ZipFile(zip_filename, 'w') as zipf:
for image_id in image_ids:
img_path = os.path.join(batch_id, f"{image_id}.jpg")
if os.path.exists(img_path):
zipf.write(img_path, arcname=os.path.basename(img_path))
else:
logging.warning(f"Image {img_path} does not exist and will not be zipped.")
if progress is not None:
progress(1, desc=f"Completed {batch_id}")
logging.info(f"Completed download and zip for batch {batch_id}")
return zip_filename
def gradio_interface(batch_id_input, progress=gr.Progress()):
try:
zip_file = download_batch_images(batch_id_input, progress=progress)
return zip_file
except Exception as e:
logging.error(f"Error processing batch: {e}")
raise gr.Error(f"Error: {str(e)}")
def rest_gradio_interface(batch_id_input :str ) -> IO[bytes]:
try:
zip_file = rest_download_batch_images(batch_id_input)
return zip_file
except Exception as e:
logging.error(f"Error processing batch: {e}")
raise gr.Error(f"Error: {str(e)}")
with gr.Blocks() as app:
gr.Markdown("# IIIF Downloader")
with gr.Tab("Download Batch"):
with gr.Row():
with gr.Column():
batch_id_input = gr.Textbox(label="Batch ID", placeholder="Enter batch ID.")
download_button = gr.Button("Download Images")
with gr.Column():
output_file = gr.File(label="Download Zip File")
download_button.click(
gradio_interface,
inputs=[batch_id_input],
outputs=[output_file]
)
download_button.click(
rest_gradio_interface,
api_name="iiif_rest_download" ,
inputs=[batch_id_input],
outputs=[output_file]
)
with gr.Tab("Multiple Batches"):
gr.Markdown("WIP")
gr.Markdown("Make it possible to download batches to a huggingface account so it can be used through fastapi")
gr.Markdown("Will uses threading")
pass
with gr.Tab("How to use"):
gr.Markdown("WIP, instructional video")
pass
app.queue()
app.launch()