SnapText / app.py
hruday96's picture
Update app.py
8509e3e verified
raw
history blame
1.56 kB
import streamlit as st # Don't forget to include `streamlit` in your `requirements.txt` file to ensure the app runs properly on Hugging Face Spaces.
from transformers import PaliGemmaProcessor, PaliGemmaForConditionalGeneration # Make sure that the Hugging Face `transformers` library version supports the `PaliGemma2` model. You may need to specify the version in `requirements.txt`.
from PIL import Image # Ensure the `pillow` library is included in your `requirements.txt`.
import torch # Since PyTorch is required for this app, specify the appropriate version of `torch` in `requirements.txt` based on compatibility with the model.
import os
def load_model():
"""Load PaliGemma2 model and processor with Hugging Face token."""
token = os.getenv("HUGGINGFACEHUB_API_TOKEN") # Retrieve token from environment variable
if not token:
raise ValueError("Hugging Face API token not found. Please set it in the environment variables.")
processor = PaliGemmaProcessor.from_pretrained("google/paligemma2", token=token)
model = PaliGemmaForConditionalGeneration.from_pretrained("google/paligemma2", token=token)
return processor, model
def process_image(image, processor, model):
"""Extract text from image using PaliGemma2."""
# Preprocess the image
inputs = processor(images=image, return_tensors="pt")
# Generate predictions
with torch.no_grad():
generated_ids = model.generate(**inputs)
text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
return text