Spaces:

wjbmattingly
/

medieval-htr-page

Running on Zero

wjbmattingly commited on Aug 9

Commit

546d56f

•

1 Parent(s): 0456d74

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import json
 from PIL import Image, ImageDraw
 import os
 import tempfile
 # Dictionary of model names and their corresponding HuggingFace model IDs
 MODEL_OPTIONS = {
     "Microsoft Handwritten": "microsoft/trocr-base-handwritten",
@@ -37,7 +37,7 @@ def load_model(model_name):
         current_model_name = model_name
         # Move model to GPU
-        # current_model = current_model.to('cuda')
     return current_processor, current_model
@@ -69,9 +69,17 @@ def process_image(image, model_name):
         # Crop the line from the original image
         line_image = image.crop((x1, y1, x2, y2))
         # Prepare image for TrOCR
-        pixel_values = processor(line_image, return_tensors="pt").pixel_values
-        # pixel_values = pixel_values.to('cuda')
         # Generate (no beam search)
         with torch.no_grad():

 from PIL import Image, ImageDraw
 import os
 import tempfile
+import numpy as np
 # Dictionary of model names and their corresponding HuggingFace model IDs
 MODEL_OPTIONS = {
     "Microsoft Handwritten": "microsoft/trocr-base-handwritten",
         current_model_name = model_name
         # Move model to GPU
+        current_model = current_model.to('cuda')
     return current_processor, current_model
         # Crop the line from the original image
         line_image = image.crop((x1, y1, x2, y2))
+        # Convert to grayscale if it's not already
+        if line_image.mode != 'L':
+            line_image = line_image.convert('L')
+        # Convert to numpy array and normalize
+        line_image_np = np.array(line_image).astype(np.float32) / 255.0
+        line_image_np = np.expand_dims(line_image_np, axis=0)  # Add channel dimension
         # Prepare image for TrOCR
+        pixel_values = processor(images=line_image_np, return_tensors="pt").pixel_values
+        pixel_values = pixel_values.to('cuda')
         # Generate (no beam search)
         with torch.no_grad():