import streamlit as st # Don't forget to include `streamlit` in your `requirements.txt` file to ensure the app runs properly on Hugging Face Spaces. from transformers import PaliGemmaProcessor, PaliGemmaForConditionalGeneration # Make sure that the Hugging Face `transformers` library version supports the `PaliGemma2` model. You may need to specify the version in `requirements.txt`. from PIL import Image # Ensure the `pillow` library is included in your `requirements.txt`. import torch # Since PyTorch is required for this app, specify the appropriate version of `torch` in `requirements.txt` based on compatibility with the model. import os def load_model(): """Load PaliGemma2 model and processor with Hugging Face token.""" token = os.getenv("HUGGINGFACEHUB_API_TOKEN") # Retrieve token from environment variable if not token: raise ValueError("Hugging Face API token not found. Please set it in the environment variables.") processor = PaliGemmaProcessor.from_pretrained("google/paligemma2", token=token) model = PaliGemmaForConditionalGeneration.from_pretrained("google/paligemma2", token=token) return processor, model def process_image(image, processor, model): """Extract text from image using PaliGemma2.""" # Preprocess the image inputs = processor(images=image, return_tensors="pt") # Generate predictions with torch.no_grad(): generated_ids = model.generate(**inputs) text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] return text