Spaces:

hruday96
/

SnapText

Running

File size: 1,558 Bytes

import streamlit as st  # Don't forget to include `streamlit` in your `requirements.txt` file to ensure the app runs properly on Hugging Face Spaces.
from transformers import PaliGemmaProcessor, PaliGemmaForConditionalGeneration  # Make sure that the Hugging Face `transformers` library version supports the `PaliGemma2` model. You may need to specify the version in `requirements.txt`.
from PIL import Image  # Ensure the `pillow` library is included in your `requirements.txt`.
import torch  # Since PyTorch is required for this app, specify the appropriate version of `torch` in `requirements.txt` based on compatibility with the model.
import os

def load_model():
    """Load PaliGemma2 model and processor with Hugging Face token."""
    token = os.getenv("HUGGINGFACEHUB_API_TOKEN")  # Retrieve token from environment variable
    if not token:
        raise ValueError("Hugging Face API token not found. Please set it in the environment variables.")
    processor = PaliGemmaProcessor.from_pretrained("google/paligemma2", token=token)
    model = PaliGemmaForConditionalGeneration.from_pretrained("google/paligemma2", token=token)
    return processor, model

def process_image(image, processor, model):
    """Extract text from image using PaliGemma2."""
    # Preprocess the image
    inputs = processor(images=image, return_tensors="pt")
    
    # Generate predictions
    with torch.no_grad():
        generated_ids = model.generate(**inputs)
        text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    
    return text